aes-gcm-riscv64-zvkb-zvkg-zvkned.S revision 1.1 1 #include <machine/asm.h>
2 .text
3 .p2align 3
4 .globl rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt
5 .type rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt,@function
6 rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt:
7 srli t0, a2, 4
8 beqz t0, .Lenc_end
9 slli t5, t0, 2
10
11 mv a7, t5
12
13 # Compute the AES-GCM full-block e32 length for `LMUL=4`. We will handle
14 # the multiple AES-GCM blocks at the same time within `LMUL=4` register.
15 # The AES-GCM's SEW is e32 and EGW is 128 bits.
16 # FULL_BLOCK_LEN32 = (VLEN*LMUL)/(EGW) * (EGW/SEW) = (VLEN*4)/(32*4) * 4
17 # = (VLEN*4)/32
18 # We could get the block_num using the VL value of `vsetvli with e32, m4`.
19 .word 220231767
20 # If `LEN32 % FULL_BLOCK_LEN32` is not equal to zero, we could fill the
21 # zero padding data to make sure we could always handle FULL_BLOCK_LEN32
22 # blocks for all iterations.
23
24 ## Prepare the H^n multiplier in v16 for GCM multiplier. The `n` is the gcm
25 ## block number in a LMUL=4 register group.
26 ## n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4))
27 ## = (VLEN/32)
28 ## We could use vsetvli with `e32, m1` to compute the `n` number.
29 .word 218133207
30
31 # The H is at `gcm128_context.Htable[0]`(addr(Xi)+16*2).
32 addi t1, a5, 32
33 .word 3439489111
34 .word 33779591
35
36 # Compute the H^n
37 li t1, 1
38 1:
39 .word 2750984183
40 slli t1, t1, 1
41 bltu t1, t0, 1b
42
43 .word 220754007
44 .word 1577072727
45 .word 2817763447
46
47 #### Load plaintext into v24 and handle padding. We also load the init tag
48 #### data into v20 and prepare the AES ctr input data into v12 and v28.
49 .word 1577073239
50
51 ## Prepare the AES ctr input data into v12.
52 # Setup ctr input mask.
53 # ctr mask : [000100010001....]
54 # Note: The actual vl should be `FULL_BLOCK_LEN32/4 * 2`, but we just use
55 # `FULL_BLOCK_LEN32` here.
56 .word 201879639
57 li t0, 0b10001000
58 .word 1577238615
59 # Load IV.
60 .word 3439489111
61 .word 34041735
62 # Convert the big-endian counter into little-endian.
63 .word 3305271383
64 .word 1240772567
65 # Splat the `single block of IV` to v12
66 .word 220754007
67 .word 1577072215
68 .word 2817762935
69 # Prepare the ctr counter into v8
70 # v8: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...]
71 .word 1342710871
72 # Merge IV and ctr counter into v12.
73 # v12:[x, x, x, count+0, x, x, x, count+1, ...]
74 .word 86536279
75 .word 12846679
76
77 li t4, 0
78 # Get the SEW32 size in the first round.
79 # If we have the non-zero value for `LEN32&(FULL_BLOCK_LEN32-1)`, then
80 # we will have the leading padding zero.
81 addi t0, a6, -1
82 and t0, t0, t5
83 beqz t0, 1f
84
85 ## with padding
86 sub t5, t5, t0
87 sub t4, a6, t0
88 # padding block size
89 srli t1, t4, 2
90 # padding byte size
91 slli t2, t4, 2
92
93 # Adjust the ctr counter to make the counter start from `counter+0` for the
94 # first non-padding block.
95 .word 86536279
96 .word 147015255
97 # Prepare the AES ctr input into v28.
98 # The ctr data uses big-endian form.
99 .word 1577455191
100 .word 1237626455
101
102 # Prepare the mask for input loading in the first round. We use
103 # `VL=FULL_BLOCK_LEN32` with the mask in the first round.
104 # Adjust input ptr.
105 sub a0, a0, t2
106 # Adjust output ptr.
107 sub a1, a1, t2
108 .word 211316823
109 .word 1376297303
110 # We don't use the pseudo instruction `vmsgeu` here. Use `vmsgtu` instead.
111 # The original code is:
112 # vmsgeu.vx v0, v2, t4
113 addi t0, t4, -1
114 .word 2049097815
115 .word 220754007
116 .word 1577073751
117 # Load the input for length FULL_BLOCK_LEN32 with mask.
118 .word 86536279
119 .word 355335
120
121 # Load the init `Xi` data to v20 with preceding zero padding.
122 # Adjust Xi ptr.
123 sub t0, a5, t2
124 # Load for length `zero-padding-e32-length + 4`.
125 addi t1, t4, 4
126 .word 19099735
127 .word 190983
128 j 2f
129
130 1:
131 ## without padding
132 sub t5, t5, a6
133
134 .word 220754007
135 .word 33909767
136
137 # Load the init Xi data to v20.
138 .word 3372380247
139 .word 34073095
140
141 # Prepare the AES ctr input into v28.
142 # The ctr data uses big-endian form.
143 .word 86536279
144 .word 1577455191
145 .word 1237626455
146 2:
147
148
149 # Load number of rounds
150 lwu t0, 240(a3)
151 li t1, 14
152 li t2, 12
153 li t3, 10
154
155 beq t0, t1, aes_gcm_enc_blocks_256
156 beq t0, t2, aes_gcm_enc_blocks_192
157 beq t0, t3, aes_gcm_enc_blocks_128
158
159 .Lenc_end:
160 li a0, 0
161 ret
162
163 .size rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt,.-rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt
164 .p2align 3
165 aes_gcm_enc_blocks_128:
166 srli t6, a6, 2
167 slli t0, a6, 2
168
169 # Load all 11 aes round keys to v1-v11 registers.
170 .word 3439489111
171 .word 34005127
172 addi a3, a3, 16
173 .word 34005255
174 addi a3, a3, 16
175 .word 34005383
176 addi a3, a3, 16
177 .word 34005511
178 addi a3, a3, 16
179 .word 34005639
180 addi a3, a3, 16
181 .word 34005767
182 addi a3, a3, 16
183 .word 34005895
184 addi a3, a3, 16
185 .word 34006023
186 addi a3, a3, 16
187 .word 34006151
188 addi a3, a3, 16
189 .word 34006279
190 addi a3, a3, 16
191 .word 34006407
192
193 # We already have the ciphertext/plaintext and ctr data for the first round.
194 .word 220754007
195 .word 2786307703
196 .word 2787192439
197 .word 2788241015
198 .word 2789289591
199 .word 2790338167
200 .word 2791386743
201 .word 2792435319
202 .word 2793483895
203 .word 2794532471
204 .word 2795581047
205 .word 2796662391
206
207
208 # Compute AES ctr result.
209 .word 801902167
210
211 bnez t4, 1f
212
213 ## without padding
214 # Store ciphertext/plaintext
215 .word 33943079
216 j 2f
217
218 ## with padding
219 1:
220 # Store ciphertext/plaintext using mask
221 .word 388647
222
223 # Fill zero for the padding blocks
224 .word 154071127
225 .word 1577074263
226
227 # We have used mask register for `INPUT_PADDING_MASK` before. We need to
228 # setup the ctr mask back.
229 # ctr mask : [000100010001....]
230 .word 201879639
231 li t1, 0b10001000
232 .word 1577271383
233 2:
234
235
236
237 add a0, a0, t0
238 add a1, a1, t0
239
240
241 .word 220754007
242
243 .Lenc_blocks_128:
244 # Compute the partial tags.
245 # The partial tags will multiply with [H^n, H^n, ..., H^n]
246 # [tag0, tag1, ...] =
247 # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
248 # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
249 beqz t5, .Lenc_blocks_128_end
250 .word 3004050039
251
252 .word 86536279
253 # Increase ctr in v12.
254 .word 13616727
255 sub t5, t5, a6
256 # Load plaintext into v24
257 .word 220229719
258 .word 33909767
259 # Prepare the AES ctr input into v28.
260 # The ctr data uses big-endian form.
261 .word 1577455191
262 add a0, a0, t0
263 .word 86011991
264 .word 1237626455
265
266
267 .word 220754007
268 .word 2786307703
269 .word 2787192439
270 .word 2788241015
271 .word 2789289591
272 .word 2790338167
273 .word 2791386743
274 .word 2792435319
275 .word 2793483895
276 .word 2794532471
277 .word 2795581047
278 .word 2796662391
279
280
281 # Compute AES ctr ciphertext result.
282 .word 801902167
283
284 # Store ciphertext
285 .word 33943079
286 add a1, a1, t0
287
288 j .Lenc_blocks_128
289 .Lenc_blocks_128_end:
290
291 # Add ciphertext into partial tag
292 .word 793643607
293
294 .word 3441586263
295 # Update current ctr value to v12
296 .word 13616727
297 # Convert ctr to big-endian counter.
298 .word 1220847191
299 .word 484903
300
301
302 # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
303 # Load H to v1
304 addi t1, a5, 32
305 .word 3439489111
306 .word 33775751
307 # Multiply H for each partial tag and XOR them together.
308 # Handle 1st partial tag
309 .word 1577713751
310 .word 2719522935
311 # Handle 2nd to N-th partial tags
312 li t1, 4
313 1:
314 .word 3441586263
315 .word 1061372503
316 .word 3439489111
317 .word 2987532407
318 addi t1, t1, 4
319 blt t1, a6, 1b
320
321
322 # Save the final tag
323 .word 34070567
324
325 # return the processed size.
326 slli a0, a7, 2
327 ret
328 .size aes_gcm_enc_blocks_128,.-aes_gcm_enc_blocks_128
329 .p2align 3
330 aes_gcm_enc_blocks_192:
331 srli t6, a6, 2
332 slli t0, a6, 2
333
334 # We run out of 32 vector registers, so we just preserve some round keys
335 # and load the remaining round keys inside the aes body.
336 # We keep the round keys for:
337 # 1, 2, 3, 5, 6, 7, 9, 10, 11 and 12th keys.
338 # The following keys will be loaded in the aes body:
339 # 4, 8 and 13th keys.
340 .word 3439489111
341 # key 1
342 .word 34005127
343 # key 2
344 addi t1, a3, 16
345 .word 33775879
346 # key 3
347 addi t1, a3, 32
348 .word 33776007
349 # key 5
350 addi t1, a3, 64
351 .word 33776135
352 # key 6
353 addi t1, a3, 80
354 .word 33776263
355 # key 7
356 addi t1, a3, 96
357 .word 33776391
358 # key 9
359 addi t1, a3, 128
360 .word 33776519
361 # key 10
362 addi t1, a3, 144
363 .word 33776647
364 # key 11
365 addi t1, a3, 160
366 .word 33776775
367 # key 12
368 addi t1, a3, 176
369 .word 33776903
370
371 # We already have the ciphertext/plaintext and ctr data for the first round.
372 # Load key 4
373 .word 3439489111
374 addi t1, a3, 48
375 .word 33777031
376 .word 220754007
377 .word 2786307703
378 .word 2787192439
379 .word 2788241015
380 .word 2796629623
381 # Load key 8
382 .word 3439489111
383 addi t1, a3, 112
384 .word 33777031
385 .word 220754007
386 .word 2789289591
387 .word 2790338167
388 .word 2791386743
389 .word 2796629623
390 # Load key 13
391 .word 3439489111
392 addi t1, a3, 192
393 .word 33777031
394 .word 220754007
395 .word 2792435319
396 .word 2793483895
397 .word 2794532471
398 .word 2795581047
399 .word 2796662391
400
401
402 # Compute AES ctr result.
403 .word 801902167
404
405 bnez t4, 1f
406
407 ## without padding
408 # Store ciphertext/plaintext
409 .word 33943079
410 j 2f
411
412 ## with padding
413 1:
414 # Store ciphertext/plaintext using mask
415 .word 388647
416
417 # Fill zero for the padding blocks
418 .word 154071127
419 .word 1577074263
420
421 # We have used mask register for `INPUT_PADDING_MASK` before. We need to
422 # setup the ctr mask back.
423 # ctr mask : [000100010001....]
424 .word 201879639
425 li t1, 0b10001000
426 .word 1577271383
427 2:
428
429
430
431 add a0, a0, t0
432 add a1, a1, t0
433
434
435 .word 220754007
436
437 .Lenc_blocks_192:
438 # Compute the partial tags.
439 # The partial tags will multiply with [H^n, H^n, ..., H^n]
440 # [tag0, tag1, ...] =
441 # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
442 # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
443 beqz t5, .Lenc_blocks_192_end
444 .word 3004050039
445
446 .word 86536279
447 # Increase ctr in v12.
448 .word 13616727
449 sub t5, t5, a6
450 # Load plaintext into v24
451 .word 220229719
452 .word 33909767
453 # Prepare the AES ctr input into v28.
454 # The ctr data uses big-endian form.
455 .word 1577455191
456 add a0, a0, t0
457 .word 86011991
458 .word 1237626455
459
460
461 # Load key 4
462 .word 3439489111
463 addi t1, a3, 48
464 .word 33777031
465 .word 220754007
466 .word 2786307703
467 .word 2787192439
468 .word 2788241015
469 .word 2796629623
470 # Load key 8
471 .word 3439489111
472 addi t1, a3, 112
473 .word 33777031
474 .word 220754007
475 .word 2789289591
476 .word 2790338167
477 .word 2791386743
478 .word 2796629623
479 # Load key 13
480 .word 3439489111
481 addi t1, a3, 192
482 .word 33777031
483 .word 220754007
484 .word 2792435319
485 .word 2793483895
486 .word 2794532471
487 .word 2795581047
488 .word 2796662391
489
490
491 # Compute AES ctr ciphertext result.
492 .word 801902167
493
494 # Store ciphertext
495 .word 33943079
496 add a1, a1, t0
497
498 j .Lenc_blocks_192
499 .Lenc_blocks_192_end:
500
501 # Add ciphertext into partial tag
502 .word 793643607
503
504 .word 3441586263
505 # Update current ctr value to v12
506 .word 13616727
507 # Convert ctr to big-endian counter.
508 .word 1220847191
509 .word 484903
510
511
512 # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
513 # Load H to v1
514 addi t1, a5, 32
515 .word 3439489111
516 .word 33775751
517 # Multiply H for each partial tag and XOR them together.
518 # Handle 1st partial tag
519 .word 1577713751
520 .word 2719522935
521 # Handle 2nd to N-th partial tags
522 li t1, 4
523 1:
524 .word 3441586263
525 .word 1061372503
526 .word 3439489111
527 .word 2987532407
528 addi t1, t1, 4
529 blt t1, a6, 1b
530
531
532 # Save the final tag
533 .word 34070567
534
535 # return the processed size.
536 slli a0, a7, 2
537 ret
538 .size aes_gcm_enc_blocks_192,.-aes_gcm_enc_blocks_192
539 .p2align 3
540 aes_gcm_enc_blocks_256:
541 srli t6, a6, 2
542 slli t0, a6, 2
543
544 # We run out of 32 vector registers, so we just preserve some round keys
545 # and load the remaining round keys inside the aes body.
546 # We keep the round keys for:
547 # 1, 2, 4, 5, 7, 8, 10, 11, 13 and 14th keys.
548 # The following keys will be loaded in the aes body:
549 # 3, 6, 9, 12 and 15th keys.
550 .word 3439489111
551 # key 1
552 .word 34005127
553 # key 2
554 addi t1, a3, 16
555 .word 33775879
556 # key 4
557 addi t1, a3, 48
558 .word 33776007
559 # key 5
560 addi t1, a3, 64
561 .word 33776135
562 # key 7
563 addi t1, a3, 96
564 .word 33776263
565 # key 8
566 addi t1, a3, 112
567 .word 33776391
568 # key 10
569 addi t1, a3, 144
570 .word 33776519
571 # key 11
572 addi t1, a3, 160
573 .word 33776647
574 # key 13
575 addi t1, a3, 192
576 .word 33776775
577 # key 14
578 addi t1, a3, 208
579 .word 33776903
580
581 # We already have the ciphertext/plaintext and ctr data for the first round.
582 # Load key 3
583 .word 3439489111
584 addi t1, a3, 32
585 .word 33777031
586 .word 220754007
587 .word 2786307703
588 .word 2787192439
589 .word 2796629623
590 # Load key 6
591 .word 3439489111
592 addi t1, a3, 80
593 .word 33777031
594 .word 220754007
595 .word 2788241015
596 .word 2789289591
597 .word 2796629623
598 # Load key 9
599 .word 3439489111
600 addi t1, a3, 128
601 .word 33777031
602 .word 220754007
603 .word 2790338167
604 .word 2791386743
605 .word 2796629623
606 # Load key 12
607 .word 3439489111
608 addi t1, a3, 176
609 .word 33777031
610 .word 220754007
611 .word 2792435319
612 .word 2793483895
613 .word 2796629623
614 # Load key 15
615 .word 3439489111
616 addi t1, a3, 224
617 .word 33777031
618 .word 220754007
619 .word 2794532471
620 .word 2795581047
621 .word 2796662391
622
623
624 # Compute AES ctr result.
625 .word 801902167
626
627 bnez t4, 1f
628
629 ## without padding
630 # Store ciphertext/plaintext
631 .word 33943079
632 j 2f
633
634 ## with padding
635 1:
636 # Store ciphertext/plaintext using mask
637 .word 388647
638
639 # Fill zero for the padding blocks
640 .word 154071127
641 .word 1577074263
642
643 # We have used mask register for `INPUT_PADDING_MASK` before. We need to
644 # setup the ctr mask back.
645 # ctr mask : [000100010001....]
646 .word 201879639
647 li t1, 0b10001000
648 .word 1577271383
649 2:
650
651
652
653 add a0, a0, t0
654 add a1, a1, t0
655
656
657 .word 220754007
658
659 .Lenc_blocks_256:
660 # Compute the partial tags.
661 # The partial tags will multiply with [H^n, H^n, ..., H^n]
662 # [tag0, tag1, ...] =
663 # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
664 # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
665 beqz t5, .Lenc_blocks_256_end
666 .word 3004050039
667
668 .word 86536279
669 # Increase ctr in v12.
670 .word 13616727
671 sub t5, t5, a6
672 # Load plaintext into v24
673 .word 220229719
674 .word 33909767
675 # Prepare the AES ctr input into v28.
676 # The ctr data uses big-endian form.
677 .word 1577455191
678 add a0, a0, t0
679 .word 86011991
680 .word 1237626455
681
682
683 # Load key 3
684 .word 3439489111
685 addi t1, a3, 32
686 .word 33777031
687 .word 220754007
688 .word 2786307703
689 .word 2787192439
690 .word 2796629623
691 # Load key 6
692 .word 3439489111
693 addi t1, a3, 80
694 .word 33777031
695 .word 220754007
696 .word 2788241015
697 .word 2789289591
698 .word 2796629623
699 # Load key 9
700 .word 3439489111
701 addi t1, a3, 128
702 .word 33777031
703 .word 220754007
704 .word 2790338167
705 .word 2791386743
706 .word 2796629623
707 # Load key 12
708 .word 3439489111
709 addi t1, a3, 176
710 .word 33777031
711 .word 220754007
712 .word 2792435319
713 .word 2793483895
714 .word 2796629623
715 # Load key 15
716 .word 3439489111
717 addi t1, a3, 224
718 .word 33777031
719 .word 220754007
720 .word 2794532471
721 .word 2795581047
722 .word 2796662391
723
724
725 # Compute AES ctr ciphertext result.
726 .word 801902167
727
728 # Store ciphertext
729 .word 33943079
730 add a1, a1, t0
731
732 j .Lenc_blocks_256
733 .Lenc_blocks_256_end:
734
735 # Add ciphertext into partial tag
736 .word 793643607
737
738 .word 3441586263
739 # Update current ctr value to v12
740 .word 13616727
741 # Convert ctr to big-endian counter.
742 .word 1220847191
743 .word 484903
744
745
746 # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
747 # Load H to v1
748 addi t1, a5, 32
749 .word 3439489111
750 .word 33775751
751 # Multiply H for each partial tag and XOR them together.
752 # Handle 1st partial tag
753 .word 1577713751
754 .word 2719522935
755 # Handle 2nd to N-th partial tags
756 li t1, 4
757 1:
758 .word 3441586263
759 .word 1061372503
760 .word 3439489111
761 .word 2987532407
762 addi t1, t1, 4
763 blt t1, a6, 1b
764
765
766 # Save the final tag
767 .word 34070567
768
769 # return the processed size.
770 slli a0, a7, 2
771 ret
772 .size aes_gcm_enc_blocks_256,.-aes_gcm_enc_blocks_256
773 .p2align 3
774 .globl rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt
775 .type rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt,@function
776 rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt:
777 srli t0, a2, 4
778 beqz t0, .Ldec_end
779 slli t5, t0, 2
780
781 mv a7, t5
782
783 # Compute the AES-GCM full-block e32 length for `LMUL=4`. We will handle
784 # the multiple AES-GCM blocks at the same time within `LMUL=4` register.
785 # The AES-GCM's SEW is e32 and EGW is 128 bits.
786 # FULL_BLOCK_LEN32 = (VLEN*LMUL)/(EGW) * (EGW/SEW) = (VLEN*4)/(32*4) * 4
787 # = (VLEN*4)/32
788 # We could get the block_num using the VL value of `vsetvli with e32, m4`.
789 .word 220231767
790 # If `LEN32 % FULL_BLOCK_LEN32` is not equal to zero, we could fill the
791 # zero padding data to make sure we could always handle FULL_BLOCK_LEN32
792 # blocks for all iterations.
793
794 ## Prepare the H^n multiplier in v16 for GCM multiplier. The `n` is the gcm
795 ## block number in a LMUL=4 register group.
796 ## n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4))
797 ## = (VLEN/32)
798 ## We could use vsetvli with `e32, m1` to compute the `n` number.
799 .word 218133207
800
801 # The H is at `gcm128_context.Htable[0]`(addr(Xi)+16*2).
802 addi t1, a5, 32
803 .word 3439489111
804 .word 33779591
805
806 # Compute the H^n
807 li t1, 1
808 1:
809 .word 2750984183
810 slli t1, t1, 1
811 bltu t1, t0, 1b
812
813 .word 220754007
814 .word 1577072727
815 .word 2817763447
816
817 #### Load plaintext into v24 and handle padding. We also load the init tag
818 #### data into v20 and prepare the AES ctr input data into v12 and v28.
819 .word 1577073239
820
821 ## Prepare the AES ctr input data into v12.
822 # Setup ctr input mask.
823 # ctr mask : [000100010001....]
824 # Note: The actual vl should be `FULL_BLOCK_LEN32/4 * 2`, but we just use
825 # `FULL_BLOCK_LEN32` here.
826 .word 201879639
827 li t0, 0b10001000
828 .word 1577238615
829 # Load IV.
830 .word 3439489111
831 .word 34041735
832 # Convert the big-endian counter into little-endian.
833 .word 3305271383
834 .word 1240772567
835 # Splat the `single block of IV` to v12
836 .word 220754007
837 .word 1577072215
838 .word 2817762935
839 # Prepare the ctr counter into v8
840 # v8: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...]
841 .word 1342710871
842 # Merge IV and ctr counter into v12.
843 # v12:[x, x, x, count+0, x, x, x, count+1, ...]
844 .word 86536279
845 .word 12846679
846
847 li t4, 0
848 # Get the SEW32 size in the first round.
849 # If we have the non-zero value for `LEN32&(FULL_BLOCK_LEN32-1)`, then
850 # we will have the leading padding zero.
851 addi t0, a6, -1
852 and t0, t0, t5
853 beqz t0, 1f
854
855 ## with padding
856 sub t5, t5, t0
857 sub t4, a6, t0
858 # padding block size
859 srli t1, t4, 2
860 # padding byte size
861 slli t2, t4, 2
862
863 # Adjust the ctr counter to make the counter start from `counter+0` for the
864 # first non-padding block.
865 .word 86536279
866 .word 147015255
867 # Prepare the AES ctr input into v28.
868 # The ctr data uses big-endian form.
869 .word 1577455191
870 .word 1237626455
871
872 # Prepare the mask for input loading in the first round. We use
873 # `VL=FULL_BLOCK_LEN32` with the mask in the first round.
874 # Adjust input ptr.
875 sub a0, a0, t2
876 # Adjust output ptr.
877 sub a1, a1, t2
878 .word 211316823
879 .word 1376297303
880 # We don't use the pseudo instruction `vmsgeu` here. Use `vmsgtu` instead.
881 # The original code is:
882 # vmsgeu.vx v0, v2, t4
883 addi t0, t4, -1
884 .word 2049097815
885 .word 220754007
886 .word 1577073751
887 # Load the input for length FULL_BLOCK_LEN32 with mask.
888 .word 86536279
889 .word 355335
890
891 # Load the init `Xi` data to v20 with preceding zero padding.
892 # Adjust Xi ptr.
893 sub t0, a5, t2
894 # Load for length `zero-padding-e32-length + 4`.
895 addi t1, t4, 4
896 .word 19099735
897 .word 190983
898 j 2f
899
900 1:
901 ## without padding
902 sub t5, t5, a6
903
904 .word 220754007
905 .word 33909767
906
907 # Load the init Xi data to v20.
908 .word 3372380247
909 .word 34073095
910
911 # Prepare the AES ctr input into v28.
912 # The ctr data uses big-endian form.
913 .word 86536279
914 .word 1577455191
915 .word 1237626455
916 2:
917
918
919 # Load number of rounds
920 lwu t0, 240(a3)
921 li t1, 14
922 li t2, 12
923 li t3, 10
924
925 beq t0, t1, aes_gcm_dec_blocks_256
926 beq t0, t2, aes_gcm_dec_blocks_192
927 beq t0, t3, aes_gcm_dec_blocks_128
928
929 .Ldec_end:
930 li a0, 0
931 ret
932 .size rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt,.-rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt
933 .p2align 3
934 aes_gcm_dec_blocks_128:
935 srli t6, a6, 2
936 slli t0, a6, 2
937
938 # Load all 11 aes round keys to v1-v11 registers.
939 .word 3439489111
940 .word 34005127
941 addi a3, a3, 16
942 .word 34005255
943 addi a3, a3, 16
944 .word 34005383
945 addi a3, a3, 16
946 .word 34005511
947 addi a3, a3, 16
948 .word 34005639
949 addi a3, a3, 16
950 .word 34005767
951 addi a3, a3, 16
952 .word 34005895
953 addi a3, a3, 16
954 .word 34006023
955 addi a3, a3, 16
956 .word 34006151
957 addi a3, a3, 16
958 .word 34006279
959 addi a3, a3, 16
960 .word 34006407
961
962 # We already have the ciphertext/plaintext and ctr data for the first round.
963 .word 220754007
964 .word 2786307703
965 .word 2787192439
966 .word 2788241015
967 .word 2789289591
968 .word 2790338167
969 .word 2791386743
970 .word 2792435319
971 .word 2793483895
972 .word 2794532471
973 .word 2795581047
974 .word 2796662391
975
976
977 # Compute AES ctr result.
978 .word 801902167
979
980 bnez t4, 1f
981
982 ## without padding
983 # Store ciphertext/plaintext
984 .word 33943079
985 j 2f
986
987 ## with padding
988 1:
989 # Store ciphertext/plaintext using mask
990 .word 388647
991
992 # Fill zero for the padding blocks
993 .word 154071127
994 .word 1577074263
995
996 # We have used mask register for `INPUT_PADDING_MASK` before. We need to
997 # setup the ctr mask back.
998 # ctr mask : [000100010001....]
999 .word 201879639
1000 li t1, 0b10001000
1001 .word 1577271383
1002 2:
1003
1004
1005
1006 add a0, a0, t0
1007 add a1, a1, t0
1008
1009
1010 .word 220754007
1011
1012 .Ldec_blocks_128:
1013 # Compute the partial tags.
1014 # The partial tags will multiply with [H^n, H^n, ..., H^n]
1015 # [tag0, tag1, ...] =
1016 # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
1017 # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
1018 beqz t5, .Ldec_blocks_256_end
1019 .word 3003918967
1020
1021 .word 86536279
1022 # Increase ctr in v12.
1023 .word 13616727
1024 sub t5, t5, a6
1025 # Load plaintext into v24
1026 .word 220229719
1027 .word 33909767
1028 # Prepare the AES ctr input into v28.
1029 # The ctr data uses big-endian form.
1030 .word 1577455191
1031 add a0, a0, t0
1032 .word 86011991
1033 .word 1237626455
1034
1035
1036 .word 220754007
1037 .word 2786307703
1038 .word 2787192439
1039 .word 2788241015
1040 .word 2789289591
1041 .word 2790338167
1042 .word 2791386743
1043 .word 2792435319
1044 .word 2793483895
1045 .word 2794532471
1046 .word 2795581047
1047 .word 2796662391
1048
1049
1050 # Compute AES ctr plaintext result.
1051 .word 801902167
1052
1053 # Store plaintext
1054 .word 33943079
1055 add a1, a1, t0
1056
1057 j .Ldec_blocks_128
1058 .Ldec_blocks_128_end:
1059
1060 # Add ciphertext into partial tag
1061 .word 793512535
1062
1063 .word 3441586263
1064 # Update current ctr value to v12
1065 .word 13616727
1066 # Convert ctr to big-endian counter.
1067 .word 1220847191
1068 .word 484903
1069
1070
1071 # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
1072 # Load H to v1
1073 addi t1, a5, 32
1074 .word 3439489111
1075 .word 33775751
1076 # Multiply H for each partial tag and XOR them together.
1077 # Handle 1st partial tag
1078 .word 1577713751
1079 .word 2719522935
1080 # Handle 2nd to N-th partial tags
1081 li t1, 4
1082 1:
1083 .word 3441586263
1084 .word 1061372503
1085 .word 3439489111
1086 .word 2987532407
1087 addi t1, t1, 4
1088 blt t1, a6, 1b
1089
1090
1091 # Save the final tag
1092 .word 34070567
1093
1094 # return the processed size.
1095 slli a0, a7, 2
1096 ret
1097 .size aes_gcm_dec_blocks_128,.-aes_gcm_dec_blocks_128
1098 .p2align 3
1099 aes_gcm_dec_blocks_192:
1100 srli t6, a6, 2
1101 slli t0, a6, 2
1102
1103 # We run out of 32 vector registers, so we just preserve some round keys
1104 # and load the remaining round keys inside the aes body.
1105 # We keep the round keys for:
1106 # 1, 2, 3, 5, 6, 7, 9, 10, 11 and 12th keys.
1107 # The following keys will be loaded in the aes body:
1108 # 4, 8 and 13th keys.
1109 .word 3439489111
1110 # key 1
1111 .word 34005127
1112 # key 2
1113 addi t1, a3, 16
1114 .word 33775879
1115 # key 3
1116 addi t1, a3, 32
1117 .word 33776007
1118 # key 5
1119 addi t1, a3, 64
1120 .word 33776135
1121 # key 6
1122 addi t1, a3, 80
1123 .word 33776263
1124 # key 7
1125 addi t1, a3, 96
1126 .word 33776391
1127 # key 9
1128 addi t1, a3, 128
1129 .word 33776519
1130 # key 10
1131 addi t1, a3, 144
1132 .word 33776647
1133 # key 11
1134 addi t1, a3, 160
1135 .word 33776775
1136 # key 12
1137 addi t1, a3, 176
1138 .word 33776903
1139
1140 # We already have the ciphertext/plaintext and ctr data for the first round.
1141 # Load key 4
1142 .word 3439489111
1143 addi t1, a3, 48
1144 .word 33777031
1145 .word 220754007
1146 .word 2786307703
1147 .word 2787192439
1148 .word 2788241015
1149 .word 2796629623
1150 # Load key 8
1151 .word 3439489111
1152 addi t1, a3, 112
1153 .word 33777031
1154 .word 220754007
1155 .word 2789289591
1156 .word 2790338167
1157 .word 2791386743
1158 .word 2796629623
1159 # Load key 13
1160 .word 3439489111
1161 addi t1, a3, 192
1162 .word 33777031
1163 .word 220754007
1164 .word 2792435319
1165 .word 2793483895
1166 .word 2794532471
1167 .word 2795581047
1168 .word 2796662391
1169
1170
1171 # Compute AES ctr result.
1172 .word 801902167
1173
1174 bnez t4, 1f
1175
1176 ## without padding
1177 # Store ciphertext/plaintext
1178 .word 33943079
1179 j 2f
1180
1181 ## with padding
1182 1:
1183 # Store ciphertext/plaintext using mask
1184 .word 388647
1185
1186 # Fill zero for the padding blocks
1187 .word 154071127
1188 .word 1577074263
1189
1190 # We have used mask register for `INPUT_PADDING_MASK` before. We need to
1191 # setup the ctr mask back.
1192 # ctr mask : [000100010001....]
1193 .word 201879639
1194 li t1, 0b10001000
1195 .word 1577271383
1196 2:
1197
1198
1199
1200 add a0, a0, t0
1201 add a1, a1, t0
1202
1203
1204 .word 220754007
1205
1206 .Ldec_blocks_192:
1207 # Compute the partial tags.
1208 # The partial tags will multiply with [H^n, H^n, ..., H^n]
1209 # [tag0, tag1, ...] =
1210 # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
1211 # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
1212 beqz t5, .Ldec_blocks_192_end
1213 .word 3003918967
1214
1215 .word 86536279
1216 # Increase ctr in v12.
1217 .word 13616727
1218 sub t5, t5, a6
1219 # Load plaintext into v24
1220 .word 220229719
1221 .word 33909767
1222 # Prepare the AES ctr input into v28.
1223 # The ctr data uses big-endian form.
1224 .word 1577455191
1225 add a0, a0, t0
1226 .word 86011991
1227 .word 1237626455
1228
1229
1230 # Load key 4
1231 .word 3439489111
1232 addi t1, a3, 48
1233 .word 33777031
1234 .word 220754007
1235 .word 2786307703
1236 .word 2787192439
1237 .word 2788241015
1238 .word 2796629623
1239 # Load key 8
1240 .word 3439489111
1241 addi t1, a3, 112
1242 .word 33777031
1243 .word 220754007
1244 .word 2789289591
1245 .word 2790338167
1246 .word 2791386743
1247 .word 2796629623
1248 # Load key 13
1249 .word 3439489111
1250 addi t1, a3, 192
1251 .word 33777031
1252 .word 220754007
1253 .word 2792435319
1254 .word 2793483895
1255 .word 2794532471
1256 .word 2795581047
1257 .word 2796662391
1258
1259
1260 # Compute AES ctr plaintext result.
1261 .word 801902167
1262
1263 # Store plaintext
1264 .word 33943079
1265 add a1, a1, t0
1266
1267 j .Ldec_blocks_192
1268 .Ldec_blocks_192_end:
1269
1270 # Add ciphertext into partial tag
1271 .word 793512535
1272
1273 .word 3441586263
1274 # Update current ctr value to v12
1275 .word 13616727
1276 # Convert ctr to big-endian counter.
1277 .word 1220847191
1278 .word 484903
1279
1280
1281 # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
1282 # Load H to v1
1283 addi t1, a5, 32
1284 .word 3439489111
1285 .word 33775751
1286 # Multiply H for each partial tag and XOR them together.
1287 # Handle 1st partial tag
1288 .word 1577713751
1289 .word 2719522935
1290 # Handle 2nd to N-th partial tags
1291 li t1, 4
1292 1:
1293 .word 3441586263
1294 .word 1061372503
1295 .word 3439489111
1296 .word 2987532407
1297 addi t1, t1, 4
1298 blt t1, a6, 1b
1299
1300
1301 # Save the final tag
1302 .word 34070567
1303
1304 # return the processed size.
1305 slli a0, a7, 2
1306 ret
1307 .size aes_gcm_dec_blocks_192,.-aes_gcm_dec_blocks_192
1308 .p2align 3
1309 aes_gcm_dec_blocks_256:
1310 srli t6, a6, 2
1311 slli t0, a6, 2
1312
1313 # We run out of 32 vector registers, so we just preserve some round keys
1314 # and load the remaining round keys inside the aes body.
1315 # We keep the round keys for:
1316 # 1, 2, 4, 5, 7, 8, 10, 11, 13 and 14th keys.
1317 # The following keys will be loaded in the aes body:
1318 # 3, 6, 9, 12 and 15th keys.
1319 .word 3439489111
1320 # key 1
1321 .word 34005127
1322 # key 2
1323 addi t1, a3, 16
1324 .word 33775879
1325 # key 4
1326 addi t1, a3, 48
1327 .word 33776007
1328 # key 5
1329 addi t1, a3, 64
1330 .word 33776135
1331 # key 7
1332 addi t1, a3, 96
1333 .word 33776263
1334 # key 8
1335 addi t1, a3, 112
1336 .word 33776391
1337 # key 10
1338 addi t1, a3, 144
1339 .word 33776519
1340 # key 11
1341 addi t1, a3, 160
1342 .word 33776647
1343 # key 13
1344 addi t1, a3, 192
1345 .word 33776775
1346 # key 14
1347 addi t1, a3, 208
1348 .word 33776903
1349
1350 # We already have the ciphertext/plaintext and ctr data for the first round.
1351 # Load key 3
1352 .word 3439489111
1353 addi t1, a3, 32
1354 .word 33777031
1355 .word 220754007
1356 .word 2786307703
1357 .word 2787192439
1358 .word 2796629623
1359 # Load key 6
1360 .word 3439489111
1361 addi t1, a3, 80
1362 .word 33777031
1363 .word 220754007
1364 .word 2788241015
1365 .word 2789289591
1366 .word 2796629623
1367 # Load key 9
1368 .word 3439489111
1369 addi t1, a3, 128
1370 .word 33777031
1371 .word 220754007
1372 .word 2790338167
1373 .word 2791386743
1374 .word 2796629623
1375 # Load key 12
1376 .word 3439489111
1377 addi t1, a3, 176
1378 .word 33777031
1379 .word 220754007
1380 .word 2792435319
1381 .word 2793483895
1382 .word 2796629623
1383 # Load key 15
1384 .word 3439489111
1385 addi t1, a3, 224
1386 .word 33777031
1387 .word 220754007
1388 .word 2794532471
1389 .word 2795581047
1390 .word 2796662391
1391
1392
1393 # Compute AES ctr result.
1394 .word 801902167
1395
1396 bnez t4, 1f
1397
1398 ## without padding
1399 # Store ciphertext/plaintext
1400 .word 33943079
1401 j 2f
1402
1403 ## with padding
1404 1:
1405 # Store ciphertext/plaintext using mask
1406 .word 388647
1407
1408 # Fill zero for the padding blocks
1409 .word 154071127
1410 .word 1577074263
1411
1412 # We have used mask register for `INPUT_PADDING_MASK` before. We need to
1413 # setup the ctr mask back.
1414 # ctr mask : [000100010001....]
1415 .word 201879639
1416 li t1, 0b10001000
1417 .word 1577271383
1418 2:
1419
1420
1421
1422 add a0, a0, t0
1423 add a1, a1, t0
1424
1425
1426 .word 220754007
1427
1428 .Ldec_blocks_256:
1429 # Compute the partial tags.
1430 # The partial tags will multiply with [H^n, H^n, ..., H^n]
1431 # [tag0, tag1, ...] =
1432 # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
1433 # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
1434 beqz t5, .Ldec_blocks_256_end
1435 .word 3003918967
1436
1437 .word 86536279
1438 # Increase ctr in v12.
1439 .word 13616727
1440 sub t5, t5, a6
1441 # Load plaintext into v24
1442 .word 220229719
1443 .word 33909767
1444 # Prepare the AES ctr input into v28.
1445 # The ctr data uses big-endian form.
1446 .word 1577455191
1447 add a0, a0, t0
1448 .word 86011991
1449 .word 1237626455
1450
1451
1452 # Load key 3
1453 .word 3439489111
1454 addi t1, a3, 32
1455 .word 33777031
1456 .word 220754007
1457 .word 2786307703
1458 .word 2787192439
1459 .word 2796629623
1460 # Load key 6
1461 .word 3439489111
1462 addi t1, a3, 80
1463 .word 33777031
1464 .word 220754007
1465 .word 2788241015
1466 .word 2789289591
1467 .word 2796629623
1468 # Load key 9
1469 .word 3439489111
1470 addi t1, a3, 128
1471 .word 33777031
1472 .word 220754007
1473 .word 2790338167
1474 .word 2791386743
1475 .word 2796629623
1476 # Load key 12
1477 .word 3439489111
1478 addi t1, a3, 176
1479 .word 33777031
1480 .word 220754007
1481 .word 2792435319
1482 .word 2793483895
1483 .word 2796629623
1484 # Load key 15
1485 .word 3439489111
1486 addi t1, a3, 224
1487 .word 33777031
1488 .word 220754007
1489 .word 2794532471
1490 .word 2795581047
1491 .word 2796662391
1492
1493
1494 # Compute AES ctr plaintext result.
1495 .word 801902167
1496
1497 # Store plaintext
1498 .word 33943079
1499 add a1, a1, t0
1500
1501 j .Ldec_blocks_256
1502 .Ldec_blocks_256_end:
1503
1504 # Add ciphertext into partial tag
1505 .word 793512535
1506
1507 .word 3441586263
1508 # Update current ctr value to v12
1509 .word 13616727
1510 # Convert ctr to big-endian counter.
1511 .word 1220847191
1512 .word 484903
1513
1514
1515 # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
1516 # Load H to v1
1517 addi t1, a5, 32
1518 .word 3439489111
1519 .word 33775751
1520 # Multiply H for each partial tag and XOR them together.
1521 # Handle 1st partial tag
1522 .word 1577713751
1523 .word 2719522935
1524 # Handle 2nd to N-th partial tags
1525 li t1, 4
1526 1:
1527 .word 3441586263
1528 .word 1061372503
1529 .word 3439489111
1530 .word 2987532407
1531 addi t1, t1, 4
1532 blt t1, a6, 1b
1533
1534
1535 # Save the final tag
1536 .word 34070567
1537
1538 # return the processed size.
1539 slli a0, a7, 2
1540 ret
1541 .size aes_gcm_dec_blocks_256,.-aes_gcm_dec_blocks_256
1542