Home | History | Annotate | Line # | Download | only in riscv64
      1  1.1  christos #include <machine/asm.h>
      2  1.1  christos .text
      3  1.1  christos .p2align 3
      4  1.1  christos .globl rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt
      5  1.1  christos .type rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt,@function
      6  1.1  christos rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt:
      7  1.1  christos     srli t0, a2, 4
      8  1.1  christos     beqz t0, .Lenc_end
      9  1.1  christos     slli t5, t0, 2
     10  1.1  christos 
     11  1.1  christos     mv a7, t5
     12  1.1  christos 
     13  1.1  christos         # Compute the AES-GCM full-block e32 length for `LMUL=4`. We will handle
     14  1.1  christos     # the multiple AES-GCM blocks at the same time within `LMUL=4` register.
     15  1.1  christos     # The AES-GCM's SEW is e32 and EGW is 128 bits.
     16  1.1  christos     #   FULL_BLOCK_LEN32 = (VLEN*LMUL)/(EGW) * (EGW/SEW) = (VLEN*4)/(32*4) * 4
     17  1.1  christos     #                    = (VLEN*4)/32
     18  1.1  christos     # We could get the block_num using the VL value of `vsetvli with e32, m4`.
     19  1.1  christos     .word 220231767
     20  1.1  christos     # If `LEN32 % FULL_BLOCK_LEN32` is not equal to zero, we could fill the
     21  1.1  christos     # zero padding data to make sure we could always handle FULL_BLOCK_LEN32
     22  1.1  christos     # blocks for all iterations.
     23  1.1  christos 
     24  1.1  christos     ## Prepare the H^n multiplier in v16 for GCM multiplier. The `n` is the gcm
     25  1.1  christos     ## block number in a LMUL=4 register group.
     26  1.1  christos     ##   n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4))
     27  1.1  christos     ##     = (VLEN/32)
     28  1.1  christos     ## We could use vsetvli with `e32, m1` to compute the `n` number.
     29  1.1  christos     .word 218133207
     30  1.1  christos 
     31  1.1  christos     # The H is at `gcm128_context.Htable[0]`(addr(Xi)+16*2).
     32  1.1  christos     addi t1, a5, 32
     33  1.1  christos     .word 3439489111
     34  1.1  christos     .word 33779591
     35  1.1  christos 
     36  1.1  christos     # Compute the H^n
     37  1.1  christos     li t1, 1
     38  1.1  christos 1:
     39  1.1  christos     .word 2750984183
     40  1.1  christos     slli t1, t1, 1
     41  1.1  christos     bltu t1, t0, 1b
     42  1.1  christos 
     43  1.1  christos     .word 220754007
     44  1.1  christos     .word 1577072727
     45  1.1  christos     .word 2817763447
     46  1.1  christos 
     47  1.1  christos     #### Load plaintext into v24 and handle padding. We also load the init tag
     48  1.1  christos     #### data into v20 and prepare the AES ctr input data into v12 and v28.
     49  1.1  christos     .word 1577073239
     50  1.1  christos 
     51  1.1  christos     ## Prepare the AES ctr input data into v12.
     52  1.1  christos     # Setup ctr input mask.
     53  1.1  christos     # ctr mask : [000100010001....]
     54  1.1  christos     # Note: The actual vl should be `FULL_BLOCK_LEN32/4 * 2`, but we just use
     55  1.1  christos     #   `FULL_BLOCK_LEN32` here.
     56  1.1  christos     .word 201879639
     57  1.1  christos     li t0, 0b10001000
     58  1.1  christos     .word 1577238615
     59  1.1  christos     # Load IV.
     60  1.1  christos     .word 3439489111
     61  1.1  christos     .word 34041735
     62  1.1  christos     # Convert the big-endian counter into little-endian.
     63  1.1  christos     .word 3305271383
     64  1.1  christos     .word 1240772567
     65  1.1  christos     # Splat the `single block of IV` to v12
     66  1.1  christos     .word 220754007
     67  1.1  christos     .word 1577072215
     68  1.1  christos     .word 2817762935
     69  1.1  christos     # Prepare the ctr counter into v8
     70  1.1  christos     # v8: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...]
     71  1.1  christos     .word 1342710871
     72  1.1  christos     # Merge IV and ctr counter into v12.
     73  1.1  christos     # v12:[x, x, x, count+0, x, x, x, count+1, ...]
     74  1.1  christos     .word 86536279
     75  1.1  christos     .word 12846679
     76  1.1  christos 
     77  1.1  christos     li t4, 0
     78  1.1  christos     # Get the SEW32 size in the first round.
     79  1.1  christos     # If we have the non-zero value for `LEN32&(FULL_BLOCK_LEN32-1)`, then
     80  1.1  christos     # we will have the leading padding zero.
     81  1.1  christos     addi t0, a6, -1
     82  1.1  christos     and t0, t0, t5
     83  1.1  christos     beqz t0, 1f
     84  1.1  christos 
     85  1.1  christos     ## with padding
     86  1.1  christos     sub t5, t5, t0
     87  1.1  christos     sub t4, a6, t0
     88  1.1  christos     # padding block size
     89  1.1  christos     srli t1, t4, 2
     90  1.1  christos     # padding byte size
     91  1.1  christos     slli t2, t4, 2
     92  1.1  christos 
     93  1.1  christos     # Adjust the ctr counter to make the counter start from `counter+0` for the
     94  1.1  christos     # first non-padding block.
     95  1.1  christos     .word 86536279
     96  1.1  christos     .word 147015255
     97  1.1  christos     # Prepare the AES ctr input into v28.
     98  1.1  christos     # The ctr data uses big-endian form.
     99  1.1  christos     .word 1577455191
    100  1.1  christos     .word 1237626455
    101  1.1  christos 
    102  1.1  christos     # Prepare the mask for input loading in the first round. We use
    103  1.1  christos     # `VL=FULL_BLOCK_LEN32` with the mask in the first round.
    104  1.1  christos     # Adjust input ptr.
    105  1.1  christos     sub a0, a0, t2
    106  1.1  christos     # Adjust output ptr.
    107  1.1  christos     sub a1, a1, t2
    108  1.1  christos     .word 211316823
    109  1.1  christos     .word 1376297303
    110  1.1  christos     # We don't use the pseudo instruction `vmsgeu` here. Use `vmsgtu` instead.
    111  1.1  christos     # The original code is:
    112  1.1  christos     #   vmsgeu.vx v0, v2, t4
    113  1.1  christos     addi t0, t4, -1
    114  1.1  christos     .word 2049097815
    115  1.1  christos     .word 220754007
    116  1.1  christos     .word 1577073751
    117  1.1  christos     # Load the input for length FULL_BLOCK_LEN32 with mask.
    118  1.1  christos     .word 86536279
    119  1.1  christos     .word 355335
    120  1.1  christos 
    121  1.1  christos     # Load the init `Xi` data to v20 with preceding zero padding.
    122  1.1  christos     # Adjust Xi ptr.
    123  1.1  christos     sub t0, a5, t2
    124  1.1  christos     # Load for length `zero-padding-e32-length + 4`.
    125  1.1  christos     addi t1, t4, 4
    126  1.1  christos     .word 19099735
    127  1.1  christos     .word 190983
    128  1.1  christos     j 2f
    129  1.1  christos 
    130  1.1  christos 1:
    131  1.1  christos     ## without padding
    132  1.1  christos     sub t5, t5, a6
    133  1.1  christos 
    134  1.1  christos     .word 220754007
    135  1.1  christos     .word 33909767
    136  1.1  christos 
    137  1.1  christos     # Load the init Xi data to v20.
    138  1.1  christos     .word 3372380247
    139  1.1  christos     .word 34073095
    140  1.1  christos 
    141  1.1  christos     # Prepare the AES ctr input into v28.
    142  1.1  christos     # The ctr data uses big-endian form.
    143  1.1  christos     .word 86536279
    144  1.1  christos     .word 1577455191
    145  1.1  christos     .word 1237626455
    146  1.1  christos 2:
    147  1.1  christos 
    148  1.1  christos 
    149  1.1  christos     # Load number of rounds
    150  1.1  christos     lwu t0, 240(a3)
    151  1.1  christos     li t1, 14
    152  1.1  christos     li t2, 12
    153  1.1  christos     li t3, 10
    154  1.1  christos 
    155  1.1  christos     beq t0, t1, aes_gcm_enc_blocks_256
    156  1.1  christos     beq t0, t2, aes_gcm_enc_blocks_192
    157  1.1  christos     beq t0, t3, aes_gcm_enc_blocks_128
    158  1.1  christos 
    159  1.1  christos .Lenc_end:
    160  1.1  christos     li a0, 0
    161  1.1  christos     ret
    162  1.1  christos 
    163  1.1  christos .size rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt,.-rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt
    164  1.1  christos .p2align 3
    165  1.1  christos aes_gcm_enc_blocks_128:
    166  1.1  christos     srli t6, a6, 2
    167  1.1  christos     slli t0, a6, 2
    168  1.1  christos 
    169  1.1  christos         # Load all 11 aes round keys to v1-v11 registers.
    170  1.1  christos     .word 3439489111
    171  1.1  christos     .word 34005127
    172  1.1  christos     addi a3, a3, 16
    173  1.1  christos     .word 34005255
    174  1.1  christos     addi a3, a3, 16
    175  1.1  christos     .word 34005383
    176  1.1  christos     addi a3, a3, 16
    177  1.1  christos     .word 34005511
    178  1.1  christos     addi a3, a3, 16
    179  1.1  christos     .word 34005639
    180  1.1  christos     addi a3, a3, 16
    181  1.1  christos     .word 34005767
    182  1.1  christos     addi a3, a3, 16
    183  1.1  christos     .word 34005895
    184  1.1  christos     addi a3, a3, 16
    185  1.1  christos     .word 34006023
    186  1.1  christos     addi a3, a3, 16
    187  1.1  christos     .word 34006151
    188  1.1  christos     addi a3, a3, 16
    189  1.1  christos     .word 34006279
    190  1.1  christos     addi a3, a3, 16
    191  1.1  christos     .word 34006407
    192  1.1  christos 
    193  1.1  christos     # We already have the ciphertext/plaintext and ctr data for the first round.
    194  1.1  christos         .word 220754007
    195  1.1  christos     .word 2786307703
    196  1.1  christos     .word 2787192439
    197  1.1  christos     .word 2788241015
    198  1.1  christos     .word 2789289591
    199  1.1  christos     .word 2790338167
    200  1.1  christos     .word 2791386743
    201  1.1  christos     .word 2792435319
    202  1.1  christos     .word 2793483895
    203  1.1  christos     .word 2794532471
    204  1.1  christos     .word 2795581047
    205  1.1  christos     .word 2796662391
    206  1.1  christos 
    207  1.1  christos 
    208  1.1  christos     # Compute AES ctr result.
    209  1.1  christos     .word 801902167
    210  1.1  christos 
    211  1.1  christos         bnez t4, 1f
    212  1.1  christos 
    213  1.1  christos     ## without padding
    214  1.1  christos     # Store ciphertext/plaintext
    215  1.1  christos     .word 33943079
    216  1.1  christos     j 2f
    217  1.1  christos 
    218  1.1  christos     ## with padding
    219  1.1  christos 1:
    220  1.1  christos     # Store ciphertext/plaintext using mask
    221  1.1  christos     .word 388647
    222  1.1  christos 
    223  1.1  christos     # Fill zero for the padding blocks
    224  1.1  christos     .word 154071127
    225  1.1  christos     .word 1577074263
    226  1.1  christos 
    227  1.1  christos     # We have used mask register for `INPUT_PADDING_MASK` before. We need to
    228  1.1  christos     # setup the ctr mask back.
    229  1.1  christos     # ctr mask : [000100010001....]
    230  1.1  christos     .word 201879639
    231  1.1  christos     li t1, 0b10001000
    232  1.1  christos     .word 1577271383
    233  1.1  christos 2:
    234  1.1  christos 
    235  1.1  christos 
    236  1.1  christos 
    237  1.1  christos     add a0, a0, t0
    238  1.1  christos     add a1, a1, t0
    239  1.1  christos 
    240  1.1  christos 
    241  1.1  christos     .word 220754007
    242  1.1  christos 
    243  1.1  christos .Lenc_blocks_128:
    244  1.1  christos     # Compute the partial tags.
    245  1.1  christos     # The partial tags will multiply with [H^n, H^n, ..., H^n]
    246  1.1  christos     #   [tag0, tag1, ...] =
    247  1.1  christos     #     ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
    248  1.1  christos     # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
    249  1.1  christos     beqz t5, .Lenc_blocks_128_end
    250  1.1  christos     .word 3004050039
    251  1.1  christos 
    252  1.1  christos         .word 86536279
    253  1.1  christos     # Increase ctr in v12.
    254  1.1  christos     .word 13616727
    255  1.1  christos     sub t5, t5, a6
    256  1.1  christos     # Load plaintext into v24
    257  1.1  christos     .word 220229719
    258  1.1  christos     .word 33909767
    259  1.1  christos     # Prepare the AES ctr input into v28.
    260  1.1  christos     # The ctr data uses big-endian form.
    261  1.1  christos     .word 1577455191
    262  1.1  christos     add a0, a0, t0
    263  1.1  christos     .word 86011991
    264  1.1  christos     .word 1237626455
    265  1.1  christos 
    266  1.1  christos 
    267  1.1  christos         .word 220754007
    268  1.1  christos     .word 2786307703
    269  1.1  christos     .word 2787192439
    270  1.1  christos     .word 2788241015
    271  1.1  christos     .word 2789289591
    272  1.1  christos     .word 2790338167
    273  1.1  christos     .word 2791386743
    274  1.1  christos     .word 2792435319
    275  1.1  christos     .word 2793483895
    276  1.1  christos     .word 2794532471
    277  1.1  christos     .word 2795581047
    278  1.1  christos     .word 2796662391
    279  1.1  christos 
    280  1.1  christos 
    281  1.1  christos     # Compute AES ctr ciphertext result.
    282  1.1  christos     .word 801902167
    283  1.1  christos 
    284  1.1  christos     # Store ciphertext
    285  1.1  christos     .word 33943079
    286  1.1  christos     add a1, a1, t0
    287  1.1  christos 
    288  1.1  christos     j .Lenc_blocks_128
    289  1.1  christos .Lenc_blocks_128_end:
    290  1.1  christos 
    291  1.1  christos     # Add ciphertext into partial tag
    292  1.1  christos     .word 793643607
    293  1.1  christos 
    294  1.1  christos         .word 3441586263
    295  1.1  christos     # Update current ctr value to v12
    296  1.1  christos     .word 13616727
    297  1.1  christos     # Convert ctr to big-endian counter.
    298  1.1  christos     .word 1220847191
    299  1.1  christos     .word 484903
    300  1.1  christos 
    301  1.1  christos 
    302  1.1  christos         # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
    303  1.1  christos     # Load H to v1
    304  1.1  christos     addi t1, a5, 32
    305  1.1  christos     .word 3439489111
    306  1.1  christos     .word 33775751
    307  1.1  christos     # Multiply H for each partial tag and XOR them together.
    308  1.1  christos     # Handle 1st partial tag
    309  1.1  christos     .word 1577713751
    310  1.1  christos     .word 2719522935
    311  1.1  christos     # Handle 2nd to N-th partial tags
    312  1.1  christos     li t1, 4
    313  1.1  christos 1:
    314  1.1  christos     .word 3441586263
    315  1.1  christos     .word 1061372503
    316  1.1  christos     .word 3439489111
    317  1.1  christos     .word 2987532407
    318  1.1  christos     addi t1, t1, 4
    319  1.1  christos     blt t1, a6, 1b
    320  1.1  christos 
    321  1.1  christos 
    322  1.1  christos     # Save the final tag
    323  1.1  christos     .word 34070567
    324  1.1  christos 
    325  1.1  christos     # return the processed size.
    326  1.1  christos     slli a0, a7, 2
    327  1.1  christos     ret
    328  1.1  christos .size aes_gcm_enc_blocks_128,.-aes_gcm_enc_blocks_128
    329  1.1  christos .p2align 3
    330  1.1  christos aes_gcm_enc_blocks_192:
    331  1.1  christos     srli t6, a6, 2
    332  1.1  christos     slli t0, a6, 2
    333  1.1  christos 
    334  1.1  christos         # We run out of 32 vector registers, so we just preserve some round keys
    335  1.1  christos     # and load the remaining round keys inside the aes body.
    336  1.1  christos     # We keep the round keys for:
    337  1.1  christos     #   1, 2, 3, 5, 6, 7, 9, 10, 11 and 12th keys.
    338  1.1  christos     # The following keys will be loaded in the aes body:
    339  1.1  christos     #   4, 8 and 13th keys.
    340  1.1  christos     .word 3439489111
    341  1.1  christos     # key 1
    342  1.1  christos     .word 34005127
    343  1.1  christos     # key 2
    344  1.1  christos     addi t1, a3, 16
    345  1.1  christos     .word 33775879
    346  1.1  christos     # key 3
    347  1.1  christos     addi t1, a3, 32
    348  1.1  christos     .word 33776007
    349  1.1  christos     # key 5
    350  1.1  christos     addi t1, a3, 64
    351  1.1  christos     .word 33776135
    352  1.1  christos     # key 6
    353  1.1  christos     addi t1, a3, 80
    354  1.1  christos     .word 33776263
    355  1.1  christos     # key 7
    356  1.1  christos     addi t1, a3, 96
    357  1.1  christos     .word 33776391
    358  1.1  christos     # key 9
    359  1.1  christos     addi t1, a3, 128
    360  1.1  christos     .word 33776519
    361  1.1  christos     # key 10
    362  1.1  christos     addi t1, a3, 144
    363  1.1  christos     .word 33776647
    364  1.1  christos     # key 11
    365  1.1  christos     addi t1, a3, 160
    366  1.1  christos     .word 33776775
    367  1.1  christos     # key 12
    368  1.1  christos     addi t1, a3, 176
    369  1.1  christos     .word 33776903
    370  1.1  christos 
    371  1.1  christos     # We already have the ciphertext/plaintext and ctr data for the first round.
    372  1.1  christos         # Load key 4
    373  1.1  christos     .word 3439489111
    374  1.1  christos     addi t1, a3, 48
    375  1.1  christos     .word 33777031
    376  1.1  christos     .word 220754007
    377  1.1  christos     .word 2786307703
    378  1.1  christos     .word 2787192439
    379  1.1  christos     .word 2788241015
    380  1.1  christos     .word 2796629623
    381  1.1  christos     # Load key 8
    382  1.1  christos     .word 3439489111
    383  1.1  christos     addi t1, a3, 112
    384  1.1  christos     .word 33777031
    385  1.1  christos     .word 220754007
    386  1.1  christos     .word 2789289591
    387  1.1  christos     .word 2790338167
    388  1.1  christos     .word 2791386743
    389  1.1  christos     .word 2796629623
    390  1.1  christos     # Load key 13
    391  1.1  christos     .word 3439489111
    392  1.1  christos     addi t1, a3, 192
    393  1.1  christos     .word 33777031
    394  1.1  christos     .word 220754007
    395  1.1  christos     .word 2792435319
    396  1.1  christos     .word 2793483895
    397  1.1  christos     .word 2794532471
    398  1.1  christos     .word 2795581047
    399  1.1  christos     .word 2796662391
    400  1.1  christos 
    401  1.1  christos 
    402  1.1  christos     # Compute AES ctr result.
    403  1.1  christos     .word 801902167
    404  1.1  christos 
    405  1.1  christos         bnez t4, 1f
    406  1.1  christos 
    407  1.1  christos     ## without padding
    408  1.1  christos     # Store ciphertext/plaintext
    409  1.1  christos     .word 33943079
    410  1.1  christos     j 2f
    411  1.1  christos 
    412  1.1  christos     ## with padding
    413  1.1  christos 1:
    414  1.1  christos     # Store ciphertext/plaintext using mask
    415  1.1  christos     .word 388647
    416  1.1  christos 
    417  1.1  christos     # Fill zero for the padding blocks
    418  1.1  christos     .word 154071127
    419  1.1  christos     .word 1577074263
    420  1.1  christos 
    421  1.1  christos     # We have used mask register for `INPUT_PADDING_MASK` before. We need to
    422  1.1  christos     # setup the ctr mask back.
    423  1.1  christos     # ctr mask : [000100010001....]
    424  1.1  christos     .word 201879639
    425  1.1  christos     li t1, 0b10001000
    426  1.1  christos     .word 1577271383
    427  1.1  christos 2:
    428  1.1  christos 
    429  1.1  christos 
    430  1.1  christos 
    431  1.1  christos     add a0, a0, t0
    432  1.1  christos     add a1, a1, t0
    433  1.1  christos 
    434  1.1  christos 
    435  1.1  christos     .word 220754007
    436  1.1  christos 
    437  1.1  christos .Lenc_blocks_192:
    438  1.1  christos     # Compute the partial tags.
    439  1.1  christos     # The partial tags will multiply with [H^n, H^n, ..., H^n]
    440  1.1  christos     #   [tag0, tag1, ...] =
    441  1.1  christos     #     ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
    442  1.1  christos     # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
    443  1.1  christos     beqz t5, .Lenc_blocks_192_end
    444  1.1  christos     .word 3004050039
    445  1.1  christos 
    446  1.1  christos         .word 86536279
    447  1.1  christos     # Increase ctr in v12.
    448  1.1  christos     .word 13616727
    449  1.1  christos     sub t5, t5, a6
    450  1.1  christos     # Load plaintext into v24
    451  1.1  christos     .word 220229719
    452  1.1  christos     .word 33909767
    453  1.1  christos     # Prepare the AES ctr input into v28.
    454  1.1  christos     # The ctr data uses big-endian form.
    455  1.1  christos     .word 1577455191
    456  1.1  christos     add a0, a0, t0
    457  1.1  christos     .word 86011991
    458  1.1  christos     .word 1237626455
    459  1.1  christos 
    460  1.1  christos 
    461  1.1  christos         # Load key 4
    462  1.1  christos     .word 3439489111
    463  1.1  christos     addi t1, a3, 48
    464  1.1  christos     .word 33777031
    465  1.1  christos     .word 220754007
    466  1.1  christos     .word 2786307703
    467  1.1  christos     .word 2787192439
    468  1.1  christos     .word 2788241015
    469  1.1  christos     .word 2796629623
    470  1.1  christos     # Load key 8
    471  1.1  christos     .word 3439489111
    472  1.1  christos     addi t1, a3, 112
    473  1.1  christos     .word 33777031
    474  1.1  christos     .word 220754007
    475  1.1  christos     .word 2789289591
    476  1.1  christos     .word 2790338167
    477  1.1  christos     .word 2791386743
    478  1.1  christos     .word 2796629623
    479  1.1  christos     # Load key 13
    480  1.1  christos     .word 3439489111
    481  1.1  christos     addi t1, a3, 192
    482  1.1  christos     .word 33777031
    483  1.1  christos     .word 220754007
    484  1.1  christos     .word 2792435319
    485  1.1  christos     .word 2793483895
    486  1.1  christos     .word 2794532471
    487  1.1  christos     .word 2795581047
    488  1.1  christos     .word 2796662391
    489  1.1  christos 
    490  1.1  christos 
    491  1.1  christos     # Compute AES ctr ciphertext result.
    492  1.1  christos     .word 801902167
    493  1.1  christos 
    494  1.1  christos     # Store ciphertext
    495  1.1  christos     .word 33943079
    496  1.1  christos     add a1, a1, t0
    497  1.1  christos 
    498  1.1  christos     j .Lenc_blocks_192
    499  1.1  christos .Lenc_blocks_192_end:
    500  1.1  christos 
    501  1.1  christos     # Add ciphertext into partial tag
    502  1.1  christos     .word 793643607
    503  1.1  christos 
    504  1.1  christos         .word 3441586263
    505  1.1  christos     # Update current ctr value to v12
    506  1.1  christos     .word 13616727
    507  1.1  christos     # Convert ctr to big-endian counter.
    508  1.1  christos     .word 1220847191
    509  1.1  christos     .word 484903
    510  1.1  christos 
    511  1.1  christos 
    512  1.1  christos         # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
    513  1.1  christos     # Load H to v1
    514  1.1  christos     addi t1, a5, 32
    515  1.1  christos     .word 3439489111
    516  1.1  christos     .word 33775751
    517  1.1  christos     # Multiply H for each partial tag and XOR them together.
    518  1.1  christos     # Handle 1st partial tag
    519  1.1  christos     .word 1577713751
    520  1.1  christos     .word 2719522935
    521  1.1  christos     # Handle 2nd to N-th partial tags
    522  1.1  christos     li t1, 4
    523  1.1  christos 1:
    524  1.1  christos     .word 3441586263
    525  1.1  christos     .word 1061372503
    526  1.1  christos     .word 3439489111
    527  1.1  christos     .word 2987532407
    528  1.1  christos     addi t1, t1, 4
    529  1.1  christos     blt t1, a6, 1b
    530  1.1  christos 
    531  1.1  christos 
    532  1.1  christos     # Save the final tag
    533  1.1  christos     .word 34070567
    534  1.1  christos 
    535  1.1  christos     # return the processed size.
    536  1.1  christos     slli a0, a7, 2
    537  1.1  christos     ret
    538  1.1  christos .size aes_gcm_enc_blocks_192,.-aes_gcm_enc_blocks_192
    539  1.1  christos .p2align 3
    540  1.1  christos aes_gcm_enc_blocks_256:
    541  1.1  christos     srli t6, a6, 2
    542  1.1  christos     slli t0, a6, 2
    543  1.1  christos 
    544  1.1  christos         # We run out of 32 vector registers, so we just preserve some round keys
    545  1.1  christos     # and load the remaining round keys inside the aes body.
    546  1.1  christos     # We keep the round keys for:
    547  1.1  christos     #   1, 2, 4, 5, 7, 8, 10, 11, 13 and 14th keys.
    548  1.1  christos     # The following keys will be loaded in the aes body:
    549  1.1  christos     #   3, 6, 9, 12 and 15th keys.
    550  1.1  christos     .word 3439489111
    551  1.1  christos     # key 1
    552  1.1  christos     .word 34005127
    553  1.1  christos     # key 2
    554  1.1  christos     addi t1, a3, 16
    555  1.1  christos     .word 33775879
    556  1.1  christos     # key 4
    557  1.1  christos     addi t1, a3, 48
    558  1.1  christos     .word 33776007
    559  1.1  christos     # key 5
    560  1.1  christos     addi t1, a3, 64
    561  1.1  christos     .word 33776135
    562  1.1  christos     # key 7
    563  1.1  christos     addi t1, a3, 96
    564  1.1  christos     .word 33776263
    565  1.1  christos     # key 8
    566  1.1  christos     addi t1, a3, 112
    567  1.1  christos     .word 33776391
    568  1.1  christos     # key 10
    569  1.1  christos     addi t1, a3, 144
    570  1.1  christos     .word 33776519
    571  1.1  christos     # key 11
    572  1.1  christos     addi t1, a3, 160
    573  1.1  christos     .word 33776647
    574  1.1  christos     # key 13
    575  1.1  christos     addi t1, a3, 192
    576  1.1  christos     .word 33776775
    577  1.1  christos     # key 14
    578  1.1  christos     addi t1, a3, 208
    579  1.1  christos     .word 33776903
    580  1.1  christos 
    581  1.1  christos     # We already have the ciphertext/plaintext and ctr data for the first round.
    582  1.1  christos         # Load key 3
    583  1.1  christos     .word 3439489111
    584  1.1  christos     addi t1, a3, 32
    585  1.1  christos     .word 33777031
    586  1.1  christos     .word 220754007
    587  1.1  christos     .word 2786307703
    588  1.1  christos     .word 2787192439
    589  1.1  christos     .word 2796629623
    590  1.1  christos     # Load key 6
    591  1.1  christos     .word 3439489111
    592  1.1  christos     addi t1, a3, 80
    593  1.1  christos     .word 33777031
    594  1.1  christos     .word 220754007
    595  1.1  christos     .word 2788241015
    596  1.1  christos     .word 2789289591
    597  1.1  christos     .word 2796629623
    598  1.1  christos     # Load key 9
    599  1.1  christos     .word 3439489111
    600  1.1  christos     addi t1, a3, 128
    601  1.1  christos     .word 33777031
    602  1.1  christos     .word 220754007
    603  1.1  christos     .word 2790338167
    604  1.1  christos     .word 2791386743
    605  1.1  christos     .word 2796629623
    606  1.1  christos     # Load key 12
    607  1.1  christos     .word 3439489111
    608  1.1  christos     addi t1, a3, 176
    609  1.1  christos     .word 33777031
    610  1.1  christos     .word 220754007
    611  1.1  christos     .word 2792435319
    612  1.1  christos     .word 2793483895
    613  1.1  christos     .word 2796629623
    614  1.1  christos     # Load key 15
    615  1.1  christos     .word 3439489111
    616  1.1  christos     addi t1, a3, 224
    617  1.1  christos     .word 33777031
    618  1.1  christos     .word 220754007
    619  1.1  christos     .word 2794532471
    620  1.1  christos     .word 2795581047
    621  1.1  christos     .word 2796662391
    622  1.1  christos 
    623  1.1  christos 
    624  1.1  christos     # Compute AES ctr result.
    625  1.1  christos     .word 801902167
    626  1.1  christos 
    627  1.1  christos         bnez t4, 1f
    628  1.1  christos 
    629  1.1  christos     ## without padding
    630  1.1  christos     # Store ciphertext/plaintext
    631  1.1  christos     .word 33943079
    632  1.1  christos     j 2f
    633  1.1  christos 
    634  1.1  christos     ## with padding
    635  1.1  christos 1:
    636  1.1  christos     # Store ciphertext/plaintext using mask
    637  1.1  christos     .word 388647
    638  1.1  christos 
    639  1.1  christos     # Fill zero for the padding blocks
    640  1.1  christos     .word 154071127
    641  1.1  christos     .word 1577074263
    642  1.1  christos 
    643  1.1  christos     # We have used mask register for `INPUT_PADDING_MASK` before. We need to
    644  1.1  christos     # setup the ctr mask back.
    645  1.1  christos     # ctr mask : [000100010001....]
    646  1.1  christos     .word 201879639
    647  1.1  christos     li t1, 0b10001000
    648  1.1  christos     .word 1577271383
    649  1.1  christos 2:
    650  1.1  christos 
    651  1.1  christos 
    652  1.1  christos 
    653  1.1  christos     add a0, a0, t0
    654  1.1  christos     add a1, a1, t0
    655  1.1  christos 
    656  1.1  christos 
    657  1.1  christos     .word 220754007
    658  1.1  christos 
    659  1.1  christos .Lenc_blocks_256:
    660  1.1  christos     # Compute the partial tags.
    661  1.1  christos     # The partial tags will multiply with [H^n, H^n, ..., H^n]
    662  1.1  christos     #   [tag0, tag1, ...] =
    663  1.1  christos     #     ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
    664  1.1  christos     # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
    665  1.1  christos     beqz t5, .Lenc_blocks_256_end
    666  1.1  christos     .word 3004050039
    667  1.1  christos 
    668  1.1  christos         .word 86536279
    669  1.1  christos     # Increase ctr in v12.
    670  1.1  christos     .word 13616727
    671  1.1  christos     sub t5, t5, a6
    672  1.1  christos     # Load plaintext into v24
    673  1.1  christos     .word 220229719
    674  1.1  christos     .word 33909767
    675  1.1  christos     # Prepare the AES ctr input into v28.
    676  1.1  christos     # The ctr data uses big-endian form.
    677  1.1  christos     .word 1577455191
    678  1.1  christos     add a0, a0, t0
    679  1.1  christos     .word 86011991
    680  1.1  christos     .word 1237626455
    681  1.1  christos 
    682  1.1  christos 
    683  1.1  christos         # Load key 3
    684  1.1  christos     .word 3439489111
    685  1.1  christos     addi t1, a3, 32
    686  1.1  christos     .word 33777031
    687  1.1  christos     .word 220754007
    688  1.1  christos     .word 2786307703
    689  1.1  christos     .word 2787192439
    690  1.1  christos     .word 2796629623
    691  1.1  christos     # Load key 6
    692  1.1  christos     .word 3439489111
    693  1.1  christos     addi t1, a3, 80
    694  1.1  christos     .word 33777031
    695  1.1  christos     .word 220754007
    696  1.1  christos     .word 2788241015
    697  1.1  christos     .word 2789289591
    698  1.1  christos     .word 2796629623
    699  1.1  christos     # Load key 9
    700  1.1  christos     .word 3439489111
    701  1.1  christos     addi t1, a3, 128
    702  1.1  christos     .word 33777031
    703  1.1  christos     .word 220754007
    704  1.1  christos     .word 2790338167
    705  1.1  christos     .word 2791386743
    706  1.1  christos     .word 2796629623
    707  1.1  christos     # Load key 12
    708  1.1  christos     .word 3439489111
    709  1.1  christos     addi t1, a3, 176
    710  1.1  christos     .word 33777031
    711  1.1  christos     .word 220754007
    712  1.1  christos     .word 2792435319
    713  1.1  christos     .word 2793483895
    714  1.1  christos     .word 2796629623
    715  1.1  christos     # Load key 15
    716  1.1  christos     .word 3439489111
    717  1.1  christos     addi t1, a3, 224
    718  1.1  christos     .word 33777031
    719  1.1  christos     .word 220754007
    720  1.1  christos     .word 2794532471
    721  1.1  christos     .word 2795581047
    722  1.1  christos     .word 2796662391
    723  1.1  christos 
    724  1.1  christos 
    725  1.1  christos     # Compute AES ctr ciphertext result.
    726  1.1  christos     .word 801902167
    727  1.1  christos 
    728  1.1  christos     # Store ciphertext
    729  1.1  christos     .word 33943079
    730  1.1  christos     add a1, a1, t0
    731  1.1  christos 
    732  1.1  christos     j .Lenc_blocks_256
    733  1.1  christos .Lenc_blocks_256_end:
    734  1.1  christos 
    735  1.1  christos     # Add ciphertext into partial tag
    736  1.1  christos     .word 793643607
    737  1.1  christos 
    738  1.1  christos         .word 3441586263
    739  1.1  christos     # Update current ctr value to v12
    740  1.1  christos     .word 13616727
    741  1.1  christos     # Convert ctr to big-endian counter.
    742  1.1  christos     .word 1220847191
    743  1.1  christos     .word 484903
    744  1.1  christos 
    745  1.1  christos 
    746  1.1  christos         # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
    747  1.1  christos     # Load H to v1
    748  1.1  christos     addi t1, a5, 32
    749  1.1  christos     .word 3439489111
    750  1.1  christos     .word 33775751
    751  1.1  christos     # Multiply H for each partial tag and XOR them together.
    752  1.1  christos     # Handle 1st partial tag
    753  1.1  christos     .word 1577713751
    754  1.1  christos     .word 2719522935
    755  1.1  christos     # Handle 2nd to N-th partial tags
    756  1.1  christos     li t1, 4
    757  1.1  christos 1:
    758  1.1  christos     .word 3441586263
    759  1.1  christos     .word 1061372503
    760  1.1  christos     .word 3439489111
    761  1.1  christos     .word 2987532407
    762  1.1  christos     addi t1, t1, 4
    763  1.1  christos     blt t1, a6, 1b
    764  1.1  christos 
    765  1.1  christos 
    766  1.1  christos     # Save the final tag
    767  1.1  christos     .word 34070567
    768  1.1  christos 
    769  1.1  christos     # return the processed size.
    770  1.1  christos     slli a0, a7, 2
    771  1.1  christos     ret
    772  1.1  christos .size aes_gcm_enc_blocks_256,.-aes_gcm_enc_blocks_256
    773  1.1  christos .p2align 3
    774  1.1  christos .globl rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt
    775  1.1  christos .type rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt,@function
    776  1.1  christos rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt:
    777  1.1  christos     srli t0, a2, 4
    778  1.1  christos     beqz t0, .Ldec_end
    779  1.1  christos     slli t5, t0, 2
    780  1.1  christos 
    781  1.1  christos     mv a7, t5
    782  1.1  christos 
    783  1.1  christos         # Compute the AES-GCM full-block e32 length for `LMUL=4`. We will handle
    784  1.1  christos     # the multiple AES-GCM blocks at the same time within `LMUL=4` register.
    785  1.1  christos     # The AES-GCM's SEW is e32 and EGW is 128 bits.
    786  1.1  christos     #   FULL_BLOCK_LEN32 = (VLEN*LMUL)/(EGW) * (EGW/SEW) = (VLEN*4)/(32*4) * 4
    787  1.1  christos     #                    = (VLEN*4)/32
    788  1.1  christos     # We could get the block_num using the VL value of `vsetvli with e32, m4`.
    789  1.1  christos     .word 220231767
    790  1.1  christos     # If `LEN32 % FULL_BLOCK_LEN32` is not equal to zero, we could fill the
    791  1.1  christos     # zero padding data to make sure we could always handle FULL_BLOCK_LEN32
    792  1.1  christos     # blocks for all iterations.
    793  1.1  christos 
    794  1.1  christos     ## Prepare the H^n multiplier in v16 for GCM multiplier. The `n` is the gcm
    795  1.1  christos     ## block number in a LMUL=4 register group.
    796  1.1  christos     ##   n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4))
    797  1.1  christos     ##     = (VLEN/32)
    798  1.1  christos     ## We could use vsetvli with `e32, m1` to compute the `n` number.
    799  1.1  christos     .word 218133207
    800  1.1  christos 
    801  1.1  christos     # The H is at `gcm128_context.Htable[0]`(addr(Xi)+16*2).
    802  1.1  christos     addi t1, a5, 32
    803  1.1  christos     .word 3439489111
    804  1.1  christos     .word 33779591
    805  1.1  christos 
    806  1.1  christos     # Compute the H^n
    807  1.1  christos     li t1, 1
    808  1.1  christos 1:
    809  1.1  christos     .word 2750984183
    810  1.1  christos     slli t1, t1, 1
    811  1.1  christos     bltu t1, t0, 1b
    812  1.1  christos 
    813  1.1  christos     .word 220754007
    814  1.1  christos     .word 1577072727
    815  1.1  christos     .word 2817763447
    816  1.1  christos 
    817  1.1  christos     #### Load plaintext into v24 and handle padding. We also load the init tag
    818  1.1  christos     #### data into v20 and prepare the AES ctr input data into v12 and v28.
    819  1.1  christos     .word 1577073239
    820  1.1  christos 
    821  1.1  christos     ## Prepare the AES ctr input data into v12.
    822  1.1  christos     # Setup ctr input mask.
    823  1.1  christos     # ctr mask : [000100010001....]
    824  1.1  christos     # Note: The actual vl should be `FULL_BLOCK_LEN32/4 * 2`, but we just use
    825  1.1  christos     #   `FULL_BLOCK_LEN32` here.
    826  1.1  christos     .word 201879639
    827  1.1  christos     li t0, 0b10001000
    828  1.1  christos     .word 1577238615
    829  1.1  christos     # Load IV.
    830  1.1  christos     .word 3439489111
    831  1.1  christos     .word 34041735
    832  1.1  christos     # Convert the big-endian counter into little-endian.
    833  1.1  christos     .word 3305271383
    834  1.1  christos     .word 1240772567
    835  1.1  christos     # Splat the `single block of IV` to v12
    836  1.1  christos     .word 220754007
    837  1.1  christos     .word 1577072215
    838  1.1  christos     .word 2817762935
    839  1.1  christos     # Prepare the ctr counter into v8
    840  1.1  christos     # v8: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...]
    841  1.1  christos     .word 1342710871
    842  1.1  christos     # Merge IV and ctr counter into v12.
    843  1.1  christos     # v12:[x, x, x, count+0, x, x, x, count+1, ...]
    844  1.1  christos     .word 86536279
    845  1.1  christos     .word 12846679
    846  1.1  christos 
    847  1.1  christos     li t4, 0
    848  1.1  christos     # Get the SEW32 size in the first round.
    849  1.1  christos     # If we have the non-zero value for `LEN32&(FULL_BLOCK_LEN32-1)`, then
    850  1.1  christos     # we will have the leading padding zero.
    851  1.1  christos     addi t0, a6, -1
    852  1.1  christos     and t0, t0, t5
    853  1.1  christos     beqz t0, 1f
    854  1.1  christos 
    855  1.1  christos     ## with padding
    856  1.1  christos     sub t5, t5, t0
    857  1.1  christos     sub t4, a6, t0
    858  1.1  christos     # padding block size
    859  1.1  christos     srli t1, t4, 2
    860  1.1  christos     # padding byte size
    861  1.1  christos     slli t2, t4, 2
    862  1.1  christos 
    863  1.1  christos     # Adjust the ctr counter to make the counter start from `counter+0` for the
    864  1.1  christos     # first non-padding block.
    865  1.1  christos     .word 86536279
    866  1.1  christos     .word 147015255
    867  1.1  christos     # Prepare the AES ctr input into v28.
    868  1.1  christos     # The ctr data uses big-endian form.
    869  1.1  christos     .word 1577455191
    870  1.1  christos     .word 1237626455
    871  1.1  christos 
    872  1.1  christos     # Prepare the mask for input loading in the first round. We use
    873  1.1  christos     # `VL=FULL_BLOCK_LEN32` with the mask in the first round.
    874  1.1  christos     # Adjust input ptr.
    875  1.1  christos     sub a0, a0, t2
    876  1.1  christos     # Adjust output ptr.
    877  1.1  christos     sub a1, a1, t2
    878  1.1  christos     .word 211316823
    879  1.1  christos     .word 1376297303
    880  1.1  christos     # We don't use the pseudo instruction `vmsgeu` here. Use `vmsgtu` instead.
    881  1.1  christos     # The original code is:
    882  1.1  christos     #   vmsgeu.vx v0, v2, t4
    883  1.1  christos     addi t0, t4, -1
    884  1.1  christos     .word 2049097815
    885  1.1  christos     .word 220754007
    886  1.1  christos     .word 1577073751
    887  1.1  christos     # Load the input for length FULL_BLOCK_LEN32 with mask.
    888  1.1  christos     .word 86536279
    889  1.1  christos     .word 355335
    890  1.1  christos 
    891  1.1  christos     # Load the init `Xi` data to v20 with preceding zero padding.
    892  1.1  christos     # Adjust Xi ptr.
    893  1.1  christos     sub t0, a5, t2
    894  1.1  christos     # Load for length `zero-padding-e32-length + 4`.
    895  1.1  christos     addi t1, t4, 4
    896  1.1  christos     .word 19099735
    897  1.1  christos     .word 190983
    898  1.1  christos     j 2f
    899  1.1  christos 
    900  1.1  christos 1:
    901  1.1  christos     ## without padding
    902  1.1  christos     sub t5, t5, a6
    903  1.1  christos 
    904  1.1  christos     .word 220754007
    905  1.1  christos     .word 33909767
    906  1.1  christos 
    907  1.1  christos     # Load the init Xi data to v20.
    908  1.1  christos     .word 3372380247
    909  1.1  christos     .word 34073095
    910  1.1  christos 
    911  1.1  christos     # Prepare the AES ctr input into v28.
    912  1.1  christos     # The ctr data uses big-endian form.
    913  1.1  christos     .word 86536279
    914  1.1  christos     .word 1577455191
    915  1.1  christos     .word 1237626455
    916  1.1  christos 2:
    917  1.1  christos 
    918  1.1  christos 
    919  1.1  christos     # Load number of rounds
    920  1.1  christos     lwu t0, 240(a3)
    921  1.1  christos     li t1, 14
    922  1.1  christos     li t2, 12
    923  1.1  christos     li t3, 10
    924  1.1  christos 
    925  1.1  christos     beq t0, t1, aes_gcm_dec_blocks_256
    926  1.1  christos     beq t0, t2, aes_gcm_dec_blocks_192
    927  1.1  christos     beq t0, t3, aes_gcm_dec_blocks_128
    928  1.1  christos 
    929  1.1  christos .Ldec_end:
    930  1.1  christos     li a0, 0
    931  1.1  christos     ret
    932  1.1  christos .size rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt,.-rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt
    933  1.1  christos .p2align 3
    934  1.1  christos aes_gcm_dec_blocks_128:
    935  1.1  christos     srli t6, a6, 2
    936  1.1  christos     slli t0, a6, 2
    937  1.1  christos 
    938  1.1  christos         # Load all 11 aes round keys to v1-v11 registers.
    939  1.1  christos     .word 3439489111
    940  1.1  christos     .word 34005127
    941  1.1  christos     addi a3, a3, 16
    942  1.1  christos     .word 34005255
    943  1.1  christos     addi a3, a3, 16
    944  1.1  christos     .word 34005383
    945  1.1  christos     addi a3, a3, 16
    946  1.1  christos     .word 34005511
    947  1.1  christos     addi a3, a3, 16
    948  1.1  christos     .word 34005639
    949  1.1  christos     addi a3, a3, 16
    950  1.1  christos     .word 34005767
    951  1.1  christos     addi a3, a3, 16
    952  1.1  christos     .word 34005895
    953  1.1  christos     addi a3, a3, 16
    954  1.1  christos     .word 34006023
    955  1.1  christos     addi a3, a3, 16
    956  1.1  christos     .word 34006151
    957  1.1  christos     addi a3, a3, 16
    958  1.1  christos     .word 34006279
    959  1.1  christos     addi a3, a3, 16
    960  1.1  christos     .word 34006407
    961  1.1  christos 
    962  1.1  christos     # We already have the ciphertext/plaintext and ctr data for the first round.
    963  1.1  christos         .word 220754007
    964  1.1  christos     .word 2786307703
    965  1.1  christos     .word 2787192439
    966  1.1  christos     .word 2788241015
    967  1.1  christos     .word 2789289591
    968  1.1  christos     .word 2790338167
    969  1.1  christos     .word 2791386743
    970  1.1  christos     .word 2792435319
    971  1.1  christos     .word 2793483895
    972  1.1  christos     .word 2794532471
    973  1.1  christos     .word 2795581047
    974  1.1  christos     .word 2796662391
    975  1.1  christos 
    976  1.1  christos 
    977  1.1  christos     # Compute AES ctr result.
    978  1.1  christos     .word 801902167
    979  1.1  christos 
    980  1.1  christos         bnez t4, 1f
    981  1.1  christos 
    982  1.1  christos     ## without padding
    983  1.1  christos     # Store ciphertext/plaintext
    984  1.1  christos     .word 33943079
    985  1.1  christos     j 2f
    986  1.1  christos 
    987  1.1  christos     ## with padding
    988  1.1  christos 1:
    989  1.1  christos     # Store ciphertext/plaintext using mask
    990  1.1  christos     .word 388647
    991  1.1  christos 
    992  1.1  christos     # Fill zero for the padding blocks
    993  1.1  christos     .word 154071127
    994  1.1  christos     .word 1577074263
    995  1.1  christos 
    996  1.1  christos     # We have used mask register for `INPUT_PADDING_MASK` before. We need to
    997  1.1  christos     # setup the ctr mask back.
    998  1.1  christos     # ctr mask : [000100010001....]
    999  1.1  christos     .word 201879639
   1000  1.1  christos     li t1, 0b10001000
   1001  1.1  christos     .word 1577271383
   1002  1.1  christos 2:
   1003  1.1  christos 
   1004  1.1  christos 
   1005  1.1  christos 
   1006  1.1  christos     add a0, a0, t0
   1007  1.1  christos     add a1, a1, t0
   1008  1.1  christos 
   1009  1.1  christos 
   1010  1.1  christos     .word 220754007
   1011  1.1  christos 
   1012  1.1  christos .Ldec_blocks_128:
   1013  1.1  christos     # Compute the partial tags.
   1014  1.1  christos     # The partial tags will multiply with [H^n, H^n, ..., H^n]
   1015  1.1  christos     #   [tag0, tag1, ...] =
   1016  1.1  christos     #     ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
   1017  1.1  christos     # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
   1018  1.1  christos     beqz t5, .Ldec_blocks_256_end
   1019  1.1  christos     .word 3003918967
   1020  1.1  christos 
   1021  1.1  christos         .word 86536279
   1022  1.1  christos     # Increase ctr in v12.
   1023  1.1  christos     .word 13616727
   1024  1.1  christos     sub t5, t5, a6
   1025  1.1  christos     # Load plaintext into v24
   1026  1.1  christos     .word 220229719
   1027  1.1  christos     .word 33909767
   1028  1.1  christos     # Prepare the AES ctr input into v28.
   1029  1.1  christos     # The ctr data uses big-endian form.
   1030  1.1  christos     .word 1577455191
   1031  1.1  christos     add a0, a0, t0
   1032  1.1  christos     .word 86011991
   1033  1.1  christos     .word 1237626455
   1034  1.1  christos 
   1035  1.1  christos 
   1036  1.1  christos         .word 220754007
   1037  1.1  christos     .word 2786307703
   1038  1.1  christos     .word 2787192439
   1039  1.1  christos     .word 2788241015
   1040  1.1  christos     .word 2789289591
   1041  1.1  christos     .word 2790338167
   1042  1.1  christos     .word 2791386743
   1043  1.1  christos     .word 2792435319
   1044  1.1  christos     .word 2793483895
   1045  1.1  christos     .word 2794532471
   1046  1.1  christos     .word 2795581047
   1047  1.1  christos     .word 2796662391
   1048  1.1  christos 
   1049  1.1  christos 
   1050  1.1  christos     # Compute AES ctr plaintext result.
   1051  1.1  christos     .word 801902167
   1052  1.1  christos 
   1053  1.1  christos     # Store plaintext
   1054  1.1  christos     .word 33943079
   1055  1.1  christos     add a1, a1, t0
   1056  1.1  christos 
   1057  1.1  christos     j .Ldec_blocks_128
   1058  1.1  christos .Ldec_blocks_128_end:
   1059  1.1  christos 
   1060  1.1  christos     # Add ciphertext into partial tag
   1061  1.1  christos     .word 793512535
   1062  1.1  christos 
   1063  1.1  christos         .word 3441586263
   1064  1.1  christos     # Update current ctr value to v12
   1065  1.1  christos     .word 13616727
   1066  1.1  christos     # Convert ctr to big-endian counter.
   1067  1.1  christos     .word 1220847191
   1068  1.1  christos     .word 484903
   1069  1.1  christos 
   1070  1.1  christos 
   1071  1.1  christos         # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
   1072  1.1  christos     # Load H to v1
   1073  1.1  christos     addi t1, a5, 32
   1074  1.1  christos     .word 3439489111
   1075  1.1  christos     .word 33775751
   1076  1.1  christos     # Multiply H for each partial tag and XOR them together.
   1077  1.1  christos     # Handle 1st partial tag
   1078  1.1  christos     .word 1577713751
   1079  1.1  christos     .word 2719522935
   1080  1.1  christos     # Handle 2nd to N-th partial tags
   1081  1.1  christos     li t1, 4
   1082  1.1  christos 1:
   1083  1.1  christos     .word 3441586263
   1084  1.1  christos     .word 1061372503
   1085  1.1  christos     .word 3439489111
   1086  1.1  christos     .word 2987532407
   1087  1.1  christos     addi t1, t1, 4
   1088  1.1  christos     blt t1, a6, 1b
   1089  1.1  christos 
   1090  1.1  christos 
   1091  1.1  christos     # Save the final tag
   1092  1.1  christos     .word 34070567
   1093  1.1  christos 
   1094  1.1  christos     # return the processed size.
   1095  1.1  christos     slli a0, a7, 2
   1096  1.1  christos     ret
   1097  1.1  christos .size aes_gcm_dec_blocks_128,.-aes_gcm_dec_blocks_128
   1098  1.1  christos .p2align 3
   1099  1.1  christos aes_gcm_dec_blocks_192:
   1100  1.1  christos     srli t6, a6, 2
   1101  1.1  christos     slli t0, a6, 2
   1102  1.1  christos 
   1103  1.1  christos         # We run out of 32 vector registers, so we just preserve some round keys
   1104  1.1  christos     # and load the remaining round keys inside the aes body.
   1105  1.1  christos     # We keep the round keys for:
   1106  1.1  christos     #   1, 2, 3, 5, 6, 7, 9, 10, 11 and 12th keys.
   1107  1.1  christos     # The following keys will be loaded in the aes body:
   1108  1.1  christos     #   4, 8 and 13th keys.
   1109  1.1  christos     .word 3439489111
   1110  1.1  christos     # key 1
   1111  1.1  christos     .word 34005127
   1112  1.1  christos     # key 2
   1113  1.1  christos     addi t1, a3, 16
   1114  1.1  christos     .word 33775879
   1115  1.1  christos     # key 3
   1116  1.1  christos     addi t1, a3, 32
   1117  1.1  christos     .word 33776007
   1118  1.1  christos     # key 5
   1119  1.1  christos     addi t1, a3, 64
   1120  1.1  christos     .word 33776135
   1121  1.1  christos     # key 6
   1122  1.1  christos     addi t1, a3, 80
   1123  1.1  christos     .word 33776263
   1124  1.1  christos     # key 7
   1125  1.1  christos     addi t1, a3, 96
   1126  1.1  christos     .word 33776391
   1127  1.1  christos     # key 9
   1128  1.1  christos     addi t1, a3, 128
   1129  1.1  christos     .word 33776519
   1130  1.1  christos     # key 10
   1131  1.1  christos     addi t1, a3, 144
   1132  1.1  christos     .word 33776647
   1133  1.1  christos     # key 11
   1134  1.1  christos     addi t1, a3, 160
   1135  1.1  christos     .word 33776775
   1136  1.1  christos     # key 12
   1137  1.1  christos     addi t1, a3, 176
   1138  1.1  christos     .word 33776903
   1139  1.1  christos 
   1140  1.1  christos     # We already have the ciphertext/plaintext and ctr data for the first round.
   1141  1.1  christos         # Load key 4
   1142  1.1  christos     .word 3439489111
   1143  1.1  christos     addi t1, a3, 48
   1144  1.1  christos     .word 33777031
   1145  1.1  christos     .word 220754007
   1146  1.1  christos     .word 2786307703
   1147  1.1  christos     .word 2787192439
   1148  1.1  christos     .word 2788241015
   1149  1.1  christos     .word 2796629623
   1150  1.1  christos     # Load key 8
   1151  1.1  christos     .word 3439489111
   1152  1.1  christos     addi t1, a3, 112
   1153  1.1  christos     .word 33777031
   1154  1.1  christos     .word 220754007
   1155  1.1  christos     .word 2789289591
   1156  1.1  christos     .word 2790338167
   1157  1.1  christos     .word 2791386743
   1158  1.1  christos     .word 2796629623
   1159  1.1  christos     # Load key 13
   1160  1.1  christos     .word 3439489111
   1161  1.1  christos     addi t1, a3, 192
   1162  1.1  christos     .word 33777031
   1163  1.1  christos     .word 220754007
   1164  1.1  christos     .word 2792435319
   1165  1.1  christos     .word 2793483895
   1166  1.1  christos     .word 2794532471
   1167  1.1  christos     .word 2795581047
   1168  1.1  christos     .word 2796662391
   1169  1.1  christos 
   1170  1.1  christos 
   1171  1.1  christos     # Compute AES ctr result.
   1172  1.1  christos     .word 801902167
   1173  1.1  christos 
   1174  1.1  christos         bnez t4, 1f
   1175  1.1  christos 
   1176  1.1  christos     ## without padding
   1177  1.1  christos     # Store ciphertext/plaintext
   1178  1.1  christos     .word 33943079
   1179  1.1  christos     j 2f
   1180  1.1  christos 
   1181  1.1  christos     ## with padding
   1182  1.1  christos 1:
   1183  1.1  christos     # Store ciphertext/plaintext using mask
   1184  1.1  christos     .word 388647
   1185  1.1  christos 
   1186  1.1  christos     # Fill zero for the padding blocks
   1187  1.1  christos     .word 154071127
   1188  1.1  christos     .word 1577074263
   1189  1.1  christos 
   1190  1.1  christos     # We have used mask register for `INPUT_PADDING_MASK` before. We need to
   1191  1.1  christos     # setup the ctr mask back.
   1192  1.1  christos     # ctr mask : [000100010001....]
   1193  1.1  christos     .word 201879639
   1194  1.1  christos     li t1, 0b10001000
   1195  1.1  christos     .word 1577271383
   1196  1.1  christos 2:
   1197  1.1  christos 
   1198  1.1  christos 
   1199  1.1  christos 
   1200  1.1  christos     add a0, a0, t0
   1201  1.1  christos     add a1, a1, t0
   1202  1.1  christos 
   1203  1.1  christos 
   1204  1.1  christos     .word 220754007
   1205  1.1  christos 
   1206  1.1  christos .Ldec_blocks_192:
   1207  1.1  christos     # Compute the partial tags.
   1208  1.1  christos     # The partial tags will multiply with [H^n, H^n, ..., H^n]
   1209  1.1  christos     #   [tag0, tag1, ...] =
   1210  1.1  christos     #     ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
   1211  1.1  christos     # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
   1212  1.1  christos     beqz t5, .Ldec_blocks_192_end
   1213  1.1  christos     .word 3003918967
   1214  1.1  christos 
   1215  1.1  christos         .word 86536279
   1216  1.1  christos     # Increase ctr in v12.
   1217  1.1  christos     .word 13616727
   1218  1.1  christos     sub t5, t5, a6
   1219  1.1  christos     # Load plaintext into v24
   1220  1.1  christos     .word 220229719
   1221  1.1  christos     .word 33909767
   1222  1.1  christos     # Prepare the AES ctr input into v28.
   1223  1.1  christos     # The ctr data uses big-endian form.
   1224  1.1  christos     .word 1577455191
   1225  1.1  christos     add a0, a0, t0
   1226  1.1  christos     .word 86011991
   1227  1.1  christos     .word 1237626455
   1228  1.1  christos 
   1229  1.1  christos 
   1230  1.1  christos         # Load key 4
   1231  1.1  christos     .word 3439489111
   1232  1.1  christos     addi t1, a3, 48
   1233  1.1  christos     .word 33777031
   1234  1.1  christos     .word 220754007
   1235  1.1  christos     .word 2786307703
   1236  1.1  christos     .word 2787192439
   1237  1.1  christos     .word 2788241015
   1238  1.1  christos     .word 2796629623
   1239  1.1  christos     # Load key 8
   1240  1.1  christos     .word 3439489111
   1241  1.1  christos     addi t1, a3, 112
   1242  1.1  christos     .word 33777031
   1243  1.1  christos     .word 220754007
   1244  1.1  christos     .word 2789289591
   1245  1.1  christos     .word 2790338167
   1246  1.1  christos     .word 2791386743
   1247  1.1  christos     .word 2796629623
   1248  1.1  christos     # Load key 13
   1249  1.1  christos     .word 3439489111
   1250  1.1  christos     addi t1, a3, 192
   1251  1.1  christos     .word 33777031
   1252  1.1  christos     .word 220754007
   1253  1.1  christos     .word 2792435319
   1254  1.1  christos     .word 2793483895
   1255  1.1  christos     .word 2794532471
   1256  1.1  christos     .word 2795581047
   1257  1.1  christos     .word 2796662391
   1258  1.1  christos 
   1259  1.1  christos 
   1260  1.1  christos     # Compute AES ctr plaintext result.
   1261  1.1  christos     .word 801902167
   1262  1.1  christos 
   1263  1.1  christos     # Store plaintext
   1264  1.1  christos     .word 33943079
   1265  1.1  christos     add a1, a1, t0
   1266  1.1  christos 
   1267  1.1  christos     j .Ldec_blocks_192
   1268  1.1  christos .Ldec_blocks_192_end:
   1269  1.1  christos 
   1270  1.1  christos     # Add ciphertext into partial tag
   1271  1.1  christos     .word 793512535
   1272  1.1  christos 
   1273  1.1  christos         .word 3441586263
   1274  1.1  christos     # Update current ctr value to v12
   1275  1.1  christos     .word 13616727
   1276  1.1  christos     # Convert ctr to big-endian counter.
   1277  1.1  christos     .word 1220847191
   1278  1.1  christos     .word 484903
   1279  1.1  christos 
   1280  1.1  christos 
   1281  1.1  christos         # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
   1282  1.1  christos     # Load H to v1
   1283  1.1  christos     addi t1, a5, 32
   1284  1.1  christos     .word 3439489111
   1285  1.1  christos     .word 33775751
   1286  1.1  christos     # Multiply H for each partial tag and XOR them together.
   1287  1.1  christos     # Handle 1st partial tag
   1288  1.1  christos     .word 1577713751
   1289  1.1  christos     .word 2719522935
   1290  1.1  christos     # Handle 2nd to N-th partial tags
   1291  1.1  christos     li t1, 4
   1292  1.1  christos 1:
   1293  1.1  christos     .word 3441586263
   1294  1.1  christos     .word 1061372503
   1295  1.1  christos     .word 3439489111
   1296  1.1  christos     .word 2987532407
   1297  1.1  christos     addi t1, t1, 4
   1298  1.1  christos     blt t1, a6, 1b
   1299  1.1  christos 
   1300  1.1  christos 
   1301  1.1  christos     # Save the final tag
   1302  1.1  christos     .word 34070567
   1303  1.1  christos 
   1304  1.1  christos     # return the processed size.
   1305  1.1  christos     slli a0, a7, 2
   1306  1.1  christos     ret
   1307  1.1  christos .size aes_gcm_dec_blocks_192,.-aes_gcm_dec_blocks_192
   1308  1.1  christos .p2align 3
   1309  1.1  christos aes_gcm_dec_blocks_256:
   1310  1.1  christos     srli t6, a6, 2
   1311  1.1  christos     slli t0, a6, 2
   1312  1.1  christos 
   1313  1.1  christos         # We run out of 32 vector registers, so we just preserve some round keys
   1314  1.1  christos     # and load the remaining round keys inside the aes body.
   1315  1.1  christos     # We keep the round keys for:
   1316  1.1  christos     #   1, 2, 4, 5, 7, 8, 10, 11, 13 and 14th keys.
   1317  1.1  christos     # The following keys will be loaded in the aes body:
   1318  1.1  christos     #   3, 6, 9, 12 and 15th keys.
   1319  1.1  christos     .word 3439489111
   1320  1.1  christos     # key 1
   1321  1.1  christos     .word 34005127
   1322  1.1  christos     # key 2
   1323  1.1  christos     addi t1, a3, 16
   1324  1.1  christos     .word 33775879
   1325  1.1  christos     # key 4
   1326  1.1  christos     addi t1, a3, 48
   1327  1.1  christos     .word 33776007
   1328  1.1  christos     # key 5
   1329  1.1  christos     addi t1, a3, 64
   1330  1.1  christos     .word 33776135
   1331  1.1  christos     # key 7
   1332  1.1  christos     addi t1, a3, 96
   1333  1.1  christos     .word 33776263
   1334  1.1  christos     # key 8
   1335  1.1  christos     addi t1, a3, 112
   1336  1.1  christos     .word 33776391
   1337  1.1  christos     # key 10
   1338  1.1  christos     addi t1, a3, 144
   1339  1.1  christos     .word 33776519
   1340  1.1  christos     # key 11
   1341  1.1  christos     addi t1, a3, 160
   1342  1.1  christos     .word 33776647
   1343  1.1  christos     # key 13
   1344  1.1  christos     addi t1, a3, 192
   1345  1.1  christos     .word 33776775
   1346  1.1  christos     # key 14
   1347  1.1  christos     addi t1, a3, 208
   1348  1.1  christos     .word 33776903
   1349  1.1  christos 
   1350  1.1  christos     # We already have the ciphertext/plaintext and ctr data for the first round.
   1351  1.1  christos         # Load key 3
   1352  1.1  christos     .word 3439489111
   1353  1.1  christos     addi t1, a3, 32
   1354  1.1  christos     .word 33777031
   1355  1.1  christos     .word 220754007
   1356  1.1  christos     .word 2786307703
   1357  1.1  christos     .word 2787192439
   1358  1.1  christos     .word 2796629623
   1359  1.1  christos     # Load key 6
   1360  1.1  christos     .word 3439489111
   1361  1.1  christos     addi t1, a3, 80
   1362  1.1  christos     .word 33777031
   1363  1.1  christos     .word 220754007
   1364  1.1  christos     .word 2788241015
   1365  1.1  christos     .word 2789289591
   1366  1.1  christos     .word 2796629623
   1367  1.1  christos     # Load key 9
   1368  1.1  christos     .word 3439489111
   1369  1.1  christos     addi t1, a3, 128
   1370  1.1  christos     .word 33777031
   1371  1.1  christos     .word 220754007
   1372  1.1  christos     .word 2790338167
   1373  1.1  christos     .word 2791386743
   1374  1.1  christos     .word 2796629623
   1375  1.1  christos     # Load key 12
   1376  1.1  christos     .word 3439489111
   1377  1.1  christos     addi t1, a3, 176
   1378  1.1  christos     .word 33777031
   1379  1.1  christos     .word 220754007
   1380  1.1  christos     .word 2792435319
   1381  1.1  christos     .word 2793483895
   1382  1.1  christos     .word 2796629623
   1383  1.1  christos     # Load key 15
   1384  1.1  christos     .word 3439489111
   1385  1.1  christos     addi t1, a3, 224
   1386  1.1  christos     .word 33777031
   1387  1.1  christos     .word 220754007
   1388  1.1  christos     .word 2794532471
   1389  1.1  christos     .word 2795581047
   1390  1.1  christos     .word 2796662391
   1391  1.1  christos 
   1392  1.1  christos 
   1393  1.1  christos     # Compute AES ctr result.
   1394  1.1  christos     .word 801902167
   1395  1.1  christos 
   1396  1.1  christos         bnez t4, 1f
   1397  1.1  christos 
   1398  1.1  christos     ## without padding
   1399  1.1  christos     # Store ciphertext/plaintext
   1400  1.1  christos     .word 33943079
   1401  1.1  christos     j 2f
   1402  1.1  christos 
   1403  1.1  christos     ## with padding
   1404  1.1  christos 1:
   1405  1.1  christos     # Store ciphertext/plaintext using mask
   1406  1.1  christos     .word 388647
   1407  1.1  christos 
   1408  1.1  christos     # Fill zero for the padding blocks
   1409  1.1  christos     .word 154071127
   1410  1.1  christos     .word 1577074263
   1411  1.1  christos 
   1412  1.1  christos     # We have used mask register for `INPUT_PADDING_MASK` before. We need to
   1413  1.1  christos     # setup the ctr mask back.
   1414  1.1  christos     # ctr mask : [000100010001....]
   1415  1.1  christos     .word 201879639
   1416  1.1  christos     li t1, 0b10001000
   1417  1.1  christos     .word 1577271383
   1418  1.1  christos 2:
   1419  1.1  christos 
   1420  1.1  christos 
   1421  1.1  christos 
   1422  1.1  christos     add a0, a0, t0
   1423  1.1  christos     add a1, a1, t0
   1424  1.1  christos 
   1425  1.1  christos 
   1426  1.1  christos     .word 220754007
   1427  1.1  christos 
   1428  1.1  christos .Ldec_blocks_256:
   1429  1.1  christos     # Compute the partial tags.
   1430  1.1  christos     # The partial tags will multiply with [H^n, H^n, ..., H^n]
   1431  1.1  christos     #   [tag0, tag1, ...] =
   1432  1.1  christos     #     ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n]
   1433  1.1  christos     # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round.
   1434  1.1  christos     beqz t5, .Ldec_blocks_256_end
   1435  1.1  christos     .word 3003918967
   1436  1.1  christos 
   1437  1.1  christos         .word 86536279
   1438  1.1  christos     # Increase ctr in v12.
   1439  1.1  christos     .word 13616727
   1440  1.1  christos     sub t5, t5, a6
   1441  1.1  christos     # Load plaintext into v24
   1442  1.1  christos     .word 220229719
   1443  1.1  christos     .word 33909767
   1444  1.1  christos     # Prepare the AES ctr input into v28.
   1445  1.1  christos     # The ctr data uses big-endian form.
   1446  1.1  christos     .word 1577455191
   1447  1.1  christos     add a0, a0, t0
   1448  1.1  christos     .word 86011991
   1449  1.1  christos     .word 1237626455
   1450  1.1  christos 
   1451  1.1  christos 
   1452  1.1  christos         # Load key 3
   1453  1.1  christos     .word 3439489111
   1454  1.1  christos     addi t1, a3, 32
   1455  1.1  christos     .word 33777031
   1456  1.1  christos     .word 220754007
   1457  1.1  christos     .word 2786307703
   1458  1.1  christos     .word 2787192439
   1459  1.1  christos     .word 2796629623
   1460  1.1  christos     # Load key 6
   1461  1.1  christos     .word 3439489111
   1462  1.1  christos     addi t1, a3, 80
   1463  1.1  christos     .word 33777031
   1464  1.1  christos     .word 220754007
   1465  1.1  christos     .word 2788241015
   1466  1.1  christos     .word 2789289591
   1467  1.1  christos     .word 2796629623
   1468  1.1  christos     # Load key 9
   1469  1.1  christos     .word 3439489111
   1470  1.1  christos     addi t1, a3, 128
   1471  1.1  christos     .word 33777031
   1472  1.1  christos     .word 220754007
   1473  1.1  christos     .word 2790338167
   1474  1.1  christos     .word 2791386743
   1475  1.1  christos     .word 2796629623
   1476  1.1  christos     # Load key 12
   1477  1.1  christos     .word 3439489111
   1478  1.1  christos     addi t1, a3, 176
   1479  1.1  christos     .word 33777031
   1480  1.1  christos     .word 220754007
   1481  1.1  christos     .word 2792435319
   1482  1.1  christos     .word 2793483895
   1483  1.1  christos     .word 2796629623
   1484  1.1  christos     # Load key 15
   1485  1.1  christos     .word 3439489111
   1486  1.1  christos     addi t1, a3, 224
   1487  1.1  christos     .word 33777031
   1488  1.1  christos     .word 220754007
   1489  1.1  christos     .word 2794532471
   1490  1.1  christos     .word 2795581047
   1491  1.1  christos     .word 2796662391
   1492  1.1  christos 
   1493  1.1  christos 
   1494  1.1  christos     # Compute AES ctr plaintext result.
   1495  1.1  christos     .word 801902167
   1496  1.1  christos 
   1497  1.1  christos     # Store plaintext
   1498  1.1  christos     .word 33943079
   1499  1.1  christos     add a1, a1, t0
   1500  1.1  christos 
   1501  1.1  christos     j .Ldec_blocks_256
   1502  1.1  christos .Ldec_blocks_256_end:
   1503  1.1  christos 
   1504  1.1  christos     # Add ciphertext into partial tag
   1505  1.1  christos     .word 793512535
   1506  1.1  christos 
   1507  1.1  christos         .word 3441586263
   1508  1.1  christos     # Update current ctr value to v12
   1509  1.1  christos     .word 13616727
   1510  1.1  christos     # Convert ctr to big-endian counter.
   1511  1.1  christos     .word 1220847191
   1512  1.1  christos     .word 484903
   1513  1.1  christos 
   1514  1.1  christos 
   1515  1.1  christos         # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2).
   1516  1.1  christos     # Load H to v1
   1517  1.1  christos     addi t1, a5, 32
   1518  1.1  christos     .word 3439489111
   1519  1.1  christos     .word 33775751
   1520  1.1  christos     # Multiply H for each partial tag and XOR them together.
   1521  1.1  christos     # Handle 1st partial tag
   1522  1.1  christos     .word 1577713751
   1523  1.1  christos     .word 2719522935
   1524  1.1  christos     # Handle 2nd to N-th partial tags
   1525  1.1  christos     li t1, 4
   1526  1.1  christos 1:
   1527  1.1  christos     .word 3441586263
   1528  1.1  christos     .word 1061372503
   1529  1.1  christos     .word 3439489111
   1530  1.1  christos     .word 2987532407
   1531  1.1  christos     addi t1, t1, 4
   1532  1.1  christos     blt t1, a6, 1b
   1533  1.1  christos 
   1534  1.1  christos 
   1535  1.1  christos     # Save the final tag
   1536  1.1  christos     .word 34070567
   1537  1.1  christos 
   1538  1.1  christos     # return the processed size.
   1539  1.1  christos     slli a0, a7, 2
   1540  1.1  christos     ret
   1541  1.1  christos .size aes_gcm_dec_blocks_256,.-aes_gcm_dec_blocks_256
   1542