1 1.1 christos #include <machine/asm.h> 2 1.1 christos .text 3 1.1 christos .p2align 3 4 1.1 christos .globl rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt 5 1.1 christos .type rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt,@function 6 1.1 christos rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt: 7 1.1 christos srli t0, a2, 4 8 1.1 christos beqz t0, .Lenc_end 9 1.1 christos slli t5, t0, 2 10 1.1 christos 11 1.1 christos mv a7, t5 12 1.1 christos 13 1.1 christos # Compute the AES-GCM full-block e32 length for `LMUL=4`. We will handle 14 1.1 christos # the multiple AES-GCM blocks at the same time within `LMUL=4` register. 15 1.1 christos # The AES-GCM's SEW is e32 and EGW is 128 bits. 16 1.1 christos # FULL_BLOCK_LEN32 = (VLEN*LMUL)/(EGW) * (EGW/SEW) = (VLEN*4)/(32*4) * 4 17 1.1 christos # = (VLEN*4)/32 18 1.1 christos # We could get the block_num using the VL value of `vsetvli with e32, m4`. 19 1.1 christos .word 220231767 20 1.1 christos # If `LEN32 % FULL_BLOCK_LEN32` is not equal to zero, we could fill the 21 1.1 christos # zero padding data to make sure we could always handle FULL_BLOCK_LEN32 22 1.1 christos # blocks for all iterations. 23 1.1 christos 24 1.1 christos ## Prepare the H^n multiplier in v16 for GCM multiplier. The `n` is the gcm 25 1.1 christos ## block number in a LMUL=4 register group. 26 1.1 christos ## n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4)) 27 1.1 christos ## = (VLEN/32) 28 1.1 christos ## We could use vsetvli with `e32, m1` to compute the `n` number. 29 1.1 christos .word 218133207 30 1.1 christos 31 1.1 christos # The H is at `gcm128_context.Htable[0]`(addr(Xi)+16*2). 32 1.1 christos addi t1, a5, 32 33 1.1 christos .word 3439489111 34 1.1 christos .word 33779591 35 1.1 christos 36 1.1 christos # Compute the H^n 37 1.1 christos li t1, 1 38 1.1 christos 1: 39 1.1 christos .word 2750984183 40 1.1 christos slli t1, t1, 1 41 1.1 christos bltu t1, t0, 1b 42 1.1 christos 43 1.1 christos .word 220754007 44 1.1 christos .word 1577072727 45 1.1 christos .word 2817763447 46 1.1 christos 47 1.1 christos #### Load plaintext into v24 and handle padding. We also load the init tag 48 1.1 christos #### data into v20 and prepare the AES ctr input data into v12 and v28. 49 1.1 christos .word 1577073239 50 1.1 christos 51 1.1 christos ## Prepare the AES ctr input data into v12. 52 1.1 christos # Setup ctr input mask. 53 1.1 christos # ctr mask : [000100010001....] 54 1.1 christos # Note: The actual vl should be `FULL_BLOCK_LEN32/4 * 2`, but we just use 55 1.1 christos # `FULL_BLOCK_LEN32` here. 56 1.1 christos .word 201879639 57 1.1 christos li t0, 0b10001000 58 1.1 christos .word 1577238615 59 1.1 christos # Load IV. 60 1.1 christos .word 3439489111 61 1.1 christos .word 34041735 62 1.1 christos # Convert the big-endian counter into little-endian. 63 1.1 christos .word 3305271383 64 1.1 christos .word 1240772567 65 1.1 christos # Splat the `single block of IV` to v12 66 1.1 christos .word 220754007 67 1.1 christos .word 1577072215 68 1.1 christos .word 2817762935 69 1.1 christos # Prepare the ctr counter into v8 70 1.1 christos # v8: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...] 71 1.1 christos .word 1342710871 72 1.1 christos # Merge IV and ctr counter into v12. 73 1.1 christos # v12:[x, x, x, count+0, x, x, x, count+1, ...] 74 1.1 christos .word 86536279 75 1.1 christos .word 12846679 76 1.1 christos 77 1.1 christos li t4, 0 78 1.1 christos # Get the SEW32 size in the first round. 79 1.1 christos # If we have the non-zero value for `LEN32&(FULL_BLOCK_LEN32-1)`, then 80 1.1 christos # we will have the leading padding zero. 81 1.1 christos addi t0, a6, -1 82 1.1 christos and t0, t0, t5 83 1.1 christos beqz t0, 1f 84 1.1 christos 85 1.1 christos ## with padding 86 1.1 christos sub t5, t5, t0 87 1.1 christos sub t4, a6, t0 88 1.1 christos # padding block size 89 1.1 christos srli t1, t4, 2 90 1.1 christos # padding byte size 91 1.1 christos slli t2, t4, 2 92 1.1 christos 93 1.1 christos # Adjust the ctr counter to make the counter start from `counter+0` for the 94 1.1 christos # first non-padding block. 95 1.1 christos .word 86536279 96 1.1 christos .word 147015255 97 1.1 christos # Prepare the AES ctr input into v28. 98 1.1 christos # The ctr data uses big-endian form. 99 1.1 christos .word 1577455191 100 1.1 christos .word 1237626455 101 1.1 christos 102 1.1 christos # Prepare the mask for input loading in the first round. We use 103 1.1 christos # `VL=FULL_BLOCK_LEN32` with the mask in the first round. 104 1.1 christos # Adjust input ptr. 105 1.1 christos sub a0, a0, t2 106 1.1 christos # Adjust output ptr. 107 1.1 christos sub a1, a1, t2 108 1.1 christos .word 211316823 109 1.1 christos .word 1376297303 110 1.1 christos # We don't use the pseudo instruction `vmsgeu` here. Use `vmsgtu` instead. 111 1.1 christos # The original code is: 112 1.1 christos # vmsgeu.vx v0, v2, t4 113 1.1 christos addi t0, t4, -1 114 1.1 christos .word 2049097815 115 1.1 christos .word 220754007 116 1.1 christos .word 1577073751 117 1.1 christos # Load the input for length FULL_BLOCK_LEN32 with mask. 118 1.1 christos .word 86536279 119 1.1 christos .word 355335 120 1.1 christos 121 1.1 christos # Load the init `Xi` data to v20 with preceding zero padding. 122 1.1 christos # Adjust Xi ptr. 123 1.1 christos sub t0, a5, t2 124 1.1 christos # Load for length `zero-padding-e32-length + 4`. 125 1.1 christos addi t1, t4, 4 126 1.1 christos .word 19099735 127 1.1 christos .word 190983 128 1.1 christos j 2f 129 1.1 christos 130 1.1 christos 1: 131 1.1 christos ## without padding 132 1.1 christos sub t5, t5, a6 133 1.1 christos 134 1.1 christos .word 220754007 135 1.1 christos .word 33909767 136 1.1 christos 137 1.1 christos # Load the init Xi data to v20. 138 1.1 christos .word 3372380247 139 1.1 christos .word 34073095 140 1.1 christos 141 1.1 christos # Prepare the AES ctr input into v28. 142 1.1 christos # The ctr data uses big-endian form. 143 1.1 christos .word 86536279 144 1.1 christos .word 1577455191 145 1.1 christos .word 1237626455 146 1.1 christos 2: 147 1.1 christos 148 1.1 christos 149 1.1 christos # Load number of rounds 150 1.1 christos lwu t0, 240(a3) 151 1.1 christos li t1, 14 152 1.1 christos li t2, 12 153 1.1 christos li t3, 10 154 1.1 christos 155 1.1 christos beq t0, t1, aes_gcm_enc_blocks_256 156 1.1 christos beq t0, t2, aes_gcm_enc_blocks_192 157 1.1 christos beq t0, t3, aes_gcm_enc_blocks_128 158 1.1 christos 159 1.1 christos .Lenc_end: 160 1.1 christos li a0, 0 161 1.1 christos ret 162 1.1 christos 163 1.1 christos .size rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt,.-rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt 164 1.1 christos .p2align 3 165 1.1 christos aes_gcm_enc_blocks_128: 166 1.1 christos srli t6, a6, 2 167 1.1 christos slli t0, a6, 2 168 1.1 christos 169 1.1 christos # Load all 11 aes round keys to v1-v11 registers. 170 1.1 christos .word 3439489111 171 1.1 christos .word 34005127 172 1.1 christos addi a3, a3, 16 173 1.1 christos .word 34005255 174 1.1 christos addi a3, a3, 16 175 1.1 christos .word 34005383 176 1.1 christos addi a3, a3, 16 177 1.1 christos .word 34005511 178 1.1 christos addi a3, a3, 16 179 1.1 christos .word 34005639 180 1.1 christos addi a3, a3, 16 181 1.1 christos .word 34005767 182 1.1 christos addi a3, a3, 16 183 1.1 christos .word 34005895 184 1.1 christos addi a3, a3, 16 185 1.1 christos .word 34006023 186 1.1 christos addi a3, a3, 16 187 1.1 christos .word 34006151 188 1.1 christos addi a3, a3, 16 189 1.1 christos .word 34006279 190 1.1 christos addi a3, a3, 16 191 1.1 christos .word 34006407 192 1.1 christos 193 1.1 christos # We already have the ciphertext/plaintext and ctr data for the first round. 194 1.1 christos .word 220754007 195 1.1 christos .word 2786307703 196 1.1 christos .word 2787192439 197 1.1 christos .word 2788241015 198 1.1 christos .word 2789289591 199 1.1 christos .word 2790338167 200 1.1 christos .word 2791386743 201 1.1 christos .word 2792435319 202 1.1 christos .word 2793483895 203 1.1 christos .word 2794532471 204 1.1 christos .word 2795581047 205 1.1 christos .word 2796662391 206 1.1 christos 207 1.1 christos 208 1.1 christos # Compute AES ctr result. 209 1.1 christos .word 801902167 210 1.1 christos 211 1.1 christos bnez t4, 1f 212 1.1 christos 213 1.1 christos ## without padding 214 1.1 christos # Store ciphertext/plaintext 215 1.1 christos .word 33943079 216 1.1 christos j 2f 217 1.1 christos 218 1.1 christos ## with padding 219 1.1 christos 1: 220 1.1 christos # Store ciphertext/plaintext using mask 221 1.1 christos .word 388647 222 1.1 christos 223 1.1 christos # Fill zero for the padding blocks 224 1.1 christos .word 154071127 225 1.1 christos .word 1577074263 226 1.1 christos 227 1.1 christos # We have used mask register for `INPUT_PADDING_MASK` before. We need to 228 1.1 christos # setup the ctr mask back. 229 1.1 christos # ctr mask : [000100010001....] 230 1.1 christos .word 201879639 231 1.1 christos li t1, 0b10001000 232 1.1 christos .word 1577271383 233 1.1 christos 2: 234 1.1 christos 235 1.1 christos 236 1.1 christos 237 1.1 christos add a0, a0, t0 238 1.1 christos add a1, a1, t0 239 1.1 christos 240 1.1 christos 241 1.1 christos .word 220754007 242 1.1 christos 243 1.1 christos .Lenc_blocks_128: 244 1.1 christos # Compute the partial tags. 245 1.1 christos # The partial tags will multiply with [H^n, H^n, ..., H^n] 246 1.1 christos # [tag0, tag1, ...] = 247 1.1 christos # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] 248 1.1 christos # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. 249 1.1 christos beqz t5, .Lenc_blocks_128_end 250 1.1 christos .word 3004050039 251 1.1 christos 252 1.1 christos .word 86536279 253 1.1 christos # Increase ctr in v12. 254 1.1 christos .word 13616727 255 1.1 christos sub t5, t5, a6 256 1.1 christos # Load plaintext into v24 257 1.1 christos .word 220229719 258 1.1 christos .word 33909767 259 1.1 christos # Prepare the AES ctr input into v28. 260 1.1 christos # The ctr data uses big-endian form. 261 1.1 christos .word 1577455191 262 1.1 christos add a0, a0, t0 263 1.1 christos .word 86011991 264 1.1 christos .word 1237626455 265 1.1 christos 266 1.1 christos 267 1.1 christos .word 220754007 268 1.1 christos .word 2786307703 269 1.1 christos .word 2787192439 270 1.1 christos .word 2788241015 271 1.1 christos .word 2789289591 272 1.1 christos .word 2790338167 273 1.1 christos .word 2791386743 274 1.1 christos .word 2792435319 275 1.1 christos .word 2793483895 276 1.1 christos .word 2794532471 277 1.1 christos .word 2795581047 278 1.1 christos .word 2796662391 279 1.1 christos 280 1.1 christos 281 1.1 christos # Compute AES ctr ciphertext result. 282 1.1 christos .word 801902167 283 1.1 christos 284 1.1 christos # Store ciphertext 285 1.1 christos .word 33943079 286 1.1 christos add a1, a1, t0 287 1.1 christos 288 1.1 christos j .Lenc_blocks_128 289 1.1 christos .Lenc_blocks_128_end: 290 1.1 christos 291 1.1 christos # Add ciphertext into partial tag 292 1.1 christos .word 793643607 293 1.1 christos 294 1.1 christos .word 3441586263 295 1.1 christos # Update current ctr value to v12 296 1.1 christos .word 13616727 297 1.1 christos # Convert ctr to big-endian counter. 298 1.1 christos .word 1220847191 299 1.1 christos .word 484903 300 1.1 christos 301 1.1 christos 302 1.1 christos # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). 303 1.1 christos # Load H to v1 304 1.1 christos addi t1, a5, 32 305 1.1 christos .word 3439489111 306 1.1 christos .word 33775751 307 1.1 christos # Multiply H for each partial tag and XOR them together. 308 1.1 christos # Handle 1st partial tag 309 1.1 christos .word 1577713751 310 1.1 christos .word 2719522935 311 1.1 christos # Handle 2nd to N-th partial tags 312 1.1 christos li t1, 4 313 1.1 christos 1: 314 1.1 christos .word 3441586263 315 1.1 christos .word 1061372503 316 1.1 christos .word 3439489111 317 1.1 christos .word 2987532407 318 1.1 christos addi t1, t1, 4 319 1.1 christos blt t1, a6, 1b 320 1.1 christos 321 1.1 christos 322 1.1 christos # Save the final tag 323 1.1 christos .word 34070567 324 1.1 christos 325 1.1 christos # return the processed size. 326 1.1 christos slli a0, a7, 2 327 1.1 christos ret 328 1.1 christos .size aes_gcm_enc_blocks_128,.-aes_gcm_enc_blocks_128 329 1.1 christos .p2align 3 330 1.1 christos aes_gcm_enc_blocks_192: 331 1.1 christos srli t6, a6, 2 332 1.1 christos slli t0, a6, 2 333 1.1 christos 334 1.1 christos # We run out of 32 vector registers, so we just preserve some round keys 335 1.1 christos # and load the remaining round keys inside the aes body. 336 1.1 christos # We keep the round keys for: 337 1.1 christos # 1, 2, 3, 5, 6, 7, 9, 10, 11 and 12th keys. 338 1.1 christos # The following keys will be loaded in the aes body: 339 1.1 christos # 4, 8 and 13th keys. 340 1.1 christos .word 3439489111 341 1.1 christos # key 1 342 1.1 christos .word 34005127 343 1.1 christos # key 2 344 1.1 christos addi t1, a3, 16 345 1.1 christos .word 33775879 346 1.1 christos # key 3 347 1.1 christos addi t1, a3, 32 348 1.1 christos .word 33776007 349 1.1 christos # key 5 350 1.1 christos addi t1, a3, 64 351 1.1 christos .word 33776135 352 1.1 christos # key 6 353 1.1 christos addi t1, a3, 80 354 1.1 christos .word 33776263 355 1.1 christos # key 7 356 1.1 christos addi t1, a3, 96 357 1.1 christos .word 33776391 358 1.1 christos # key 9 359 1.1 christos addi t1, a3, 128 360 1.1 christos .word 33776519 361 1.1 christos # key 10 362 1.1 christos addi t1, a3, 144 363 1.1 christos .word 33776647 364 1.1 christos # key 11 365 1.1 christos addi t1, a3, 160 366 1.1 christos .word 33776775 367 1.1 christos # key 12 368 1.1 christos addi t1, a3, 176 369 1.1 christos .word 33776903 370 1.1 christos 371 1.1 christos # We already have the ciphertext/plaintext and ctr data for the first round. 372 1.1 christos # Load key 4 373 1.1 christos .word 3439489111 374 1.1 christos addi t1, a3, 48 375 1.1 christos .word 33777031 376 1.1 christos .word 220754007 377 1.1 christos .word 2786307703 378 1.1 christos .word 2787192439 379 1.1 christos .word 2788241015 380 1.1 christos .word 2796629623 381 1.1 christos # Load key 8 382 1.1 christos .word 3439489111 383 1.1 christos addi t1, a3, 112 384 1.1 christos .word 33777031 385 1.1 christos .word 220754007 386 1.1 christos .word 2789289591 387 1.1 christos .word 2790338167 388 1.1 christos .word 2791386743 389 1.1 christos .word 2796629623 390 1.1 christos # Load key 13 391 1.1 christos .word 3439489111 392 1.1 christos addi t1, a3, 192 393 1.1 christos .word 33777031 394 1.1 christos .word 220754007 395 1.1 christos .word 2792435319 396 1.1 christos .word 2793483895 397 1.1 christos .word 2794532471 398 1.1 christos .word 2795581047 399 1.1 christos .word 2796662391 400 1.1 christos 401 1.1 christos 402 1.1 christos # Compute AES ctr result. 403 1.1 christos .word 801902167 404 1.1 christos 405 1.1 christos bnez t4, 1f 406 1.1 christos 407 1.1 christos ## without padding 408 1.1 christos # Store ciphertext/plaintext 409 1.1 christos .word 33943079 410 1.1 christos j 2f 411 1.1 christos 412 1.1 christos ## with padding 413 1.1 christos 1: 414 1.1 christos # Store ciphertext/plaintext using mask 415 1.1 christos .word 388647 416 1.1 christos 417 1.1 christos # Fill zero for the padding blocks 418 1.1 christos .word 154071127 419 1.1 christos .word 1577074263 420 1.1 christos 421 1.1 christos # We have used mask register for `INPUT_PADDING_MASK` before. We need to 422 1.1 christos # setup the ctr mask back. 423 1.1 christos # ctr mask : [000100010001....] 424 1.1 christos .word 201879639 425 1.1 christos li t1, 0b10001000 426 1.1 christos .word 1577271383 427 1.1 christos 2: 428 1.1 christos 429 1.1 christos 430 1.1 christos 431 1.1 christos add a0, a0, t0 432 1.1 christos add a1, a1, t0 433 1.1 christos 434 1.1 christos 435 1.1 christos .word 220754007 436 1.1 christos 437 1.1 christos .Lenc_blocks_192: 438 1.1 christos # Compute the partial tags. 439 1.1 christos # The partial tags will multiply with [H^n, H^n, ..., H^n] 440 1.1 christos # [tag0, tag1, ...] = 441 1.1 christos # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] 442 1.1 christos # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. 443 1.1 christos beqz t5, .Lenc_blocks_192_end 444 1.1 christos .word 3004050039 445 1.1 christos 446 1.1 christos .word 86536279 447 1.1 christos # Increase ctr in v12. 448 1.1 christos .word 13616727 449 1.1 christos sub t5, t5, a6 450 1.1 christos # Load plaintext into v24 451 1.1 christos .word 220229719 452 1.1 christos .word 33909767 453 1.1 christos # Prepare the AES ctr input into v28. 454 1.1 christos # The ctr data uses big-endian form. 455 1.1 christos .word 1577455191 456 1.1 christos add a0, a0, t0 457 1.1 christos .word 86011991 458 1.1 christos .word 1237626455 459 1.1 christos 460 1.1 christos 461 1.1 christos # Load key 4 462 1.1 christos .word 3439489111 463 1.1 christos addi t1, a3, 48 464 1.1 christos .word 33777031 465 1.1 christos .word 220754007 466 1.1 christos .word 2786307703 467 1.1 christos .word 2787192439 468 1.1 christos .word 2788241015 469 1.1 christos .word 2796629623 470 1.1 christos # Load key 8 471 1.1 christos .word 3439489111 472 1.1 christos addi t1, a3, 112 473 1.1 christos .word 33777031 474 1.1 christos .word 220754007 475 1.1 christos .word 2789289591 476 1.1 christos .word 2790338167 477 1.1 christos .word 2791386743 478 1.1 christos .word 2796629623 479 1.1 christos # Load key 13 480 1.1 christos .word 3439489111 481 1.1 christos addi t1, a3, 192 482 1.1 christos .word 33777031 483 1.1 christos .word 220754007 484 1.1 christos .word 2792435319 485 1.1 christos .word 2793483895 486 1.1 christos .word 2794532471 487 1.1 christos .word 2795581047 488 1.1 christos .word 2796662391 489 1.1 christos 490 1.1 christos 491 1.1 christos # Compute AES ctr ciphertext result. 492 1.1 christos .word 801902167 493 1.1 christos 494 1.1 christos # Store ciphertext 495 1.1 christos .word 33943079 496 1.1 christos add a1, a1, t0 497 1.1 christos 498 1.1 christos j .Lenc_blocks_192 499 1.1 christos .Lenc_blocks_192_end: 500 1.1 christos 501 1.1 christos # Add ciphertext into partial tag 502 1.1 christos .word 793643607 503 1.1 christos 504 1.1 christos .word 3441586263 505 1.1 christos # Update current ctr value to v12 506 1.1 christos .word 13616727 507 1.1 christos # Convert ctr to big-endian counter. 508 1.1 christos .word 1220847191 509 1.1 christos .word 484903 510 1.1 christos 511 1.1 christos 512 1.1 christos # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). 513 1.1 christos # Load H to v1 514 1.1 christos addi t1, a5, 32 515 1.1 christos .word 3439489111 516 1.1 christos .word 33775751 517 1.1 christos # Multiply H for each partial tag and XOR them together. 518 1.1 christos # Handle 1st partial tag 519 1.1 christos .word 1577713751 520 1.1 christos .word 2719522935 521 1.1 christos # Handle 2nd to N-th partial tags 522 1.1 christos li t1, 4 523 1.1 christos 1: 524 1.1 christos .word 3441586263 525 1.1 christos .word 1061372503 526 1.1 christos .word 3439489111 527 1.1 christos .word 2987532407 528 1.1 christos addi t1, t1, 4 529 1.1 christos blt t1, a6, 1b 530 1.1 christos 531 1.1 christos 532 1.1 christos # Save the final tag 533 1.1 christos .word 34070567 534 1.1 christos 535 1.1 christos # return the processed size. 536 1.1 christos slli a0, a7, 2 537 1.1 christos ret 538 1.1 christos .size aes_gcm_enc_blocks_192,.-aes_gcm_enc_blocks_192 539 1.1 christos .p2align 3 540 1.1 christos aes_gcm_enc_blocks_256: 541 1.1 christos srli t6, a6, 2 542 1.1 christos slli t0, a6, 2 543 1.1 christos 544 1.1 christos # We run out of 32 vector registers, so we just preserve some round keys 545 1.1 christos # and load the remaining round keys inside the aes body. 546 1.1 christos # We keep the round keys for: 547 1.1 christos # 1, 2, 4, 5, 7, 8, 10, 11, 13 and 14th keys. 548 1.1 christos # The following keys will be loaded in the aes body: 549 1.1 christos # 3, 6, 9, 12 and 15th keys. 550 1.1 christos .word 3439489111 551 1.1 christos # key 1 552 1.1 christos .word 34005127 553 1.1 christos # key 2 554 1.1 christos addi t1, a3, 16 555 1.1 christos .word 33775879 556 1.1 christos # key 4 557 1.1 christos addi t1, a3, 48 558 1.1 christos .word 33776007 559 1.1 christos # key 5 560 1.1 christos addi t1, a3, 64 561 1.1 christos .word 33776135 562 1.1 christos # key 7 563 1.1 christos addi t1, a3, 96 564 1.1 christos .word 33776263 565 1.1 christos # key 8 566 1.1 christos addi t1, a3, 112 567 1.1 christos .word 33776391 568 1.1 christos # key 10 569 1.1 christos addi t1, a3, 144 570 1.1 christos .word 33776519 571 1.1 christos # key 11 572 1.1 christos addi t1, a3, 160 573 1.1 christos .word 33776647 574 1.1 christos # key 13 575 1.1 christos addi t1, a3, 192 576 1.1 christos .word 33776775 577 1.1 christos # key 14 578 1.1 christos addi t1, a3, 208 579 1.1 christos .word 33776903 580 1.1 christos 581 1.1 christos # We already have the ciphertext/plaintext and ctr data for the first round. 582 1.1 christos # Load key 3 583 1.1 christos .word 3439489111 584 1.1 christos addi t1, a3, 32 585 1.1 christos .word 33777031 586 1.1 christos .word 220754007 587 1.1 christos .word 2786307703 588 1.1 christos .word 2787192439 589 1.1 christos .word 2796629623 590 1.1 christos # Load key 6 591 1.1 christos .word 3439489111 592 1.1 christos addi t1, a3, 80 593 1.1 christos .word 33777031 594 1.1 christos .word 220754007 595 1.1 christos .word 2788241015 596 1.1 christos .word 2789289591 597 1.1 christos .word 2796629623 598 1.1 christos # Load key 9 599 1.1 christos .word 3439489111 600 1.1 christos addi t1, a3, 128 601 1.1 christos .word 33777031 602 1.1 christos .word 220754007 603 1.1 christos .word 2790338167 604 1.1 christos .word 2791386743 605 1.1 christos .word 2796629623 606 1.1 christos # Load key 12 607 1.1 christos .word 3439489111 608 1.1 christos addi t1, a3, 176 609 1.1 christos .word 33777031 610 1.1 christos .word 220754007 611 1.1 christos .word 2792435319 612 1.1 christos .word 2793483895 613 1.1 christos .word 2796629623 614 1.1 christos # Load key 15 615 1.1 christos .word 3439489111 616 1.1 christos addi t1, a3, 224 617 1.1 christos .word 33777031 618 1.1 christos .word 220754007 619 1.1 christos .word 2794532471 620 1.1 christos .word 2795581047 621 1.1 christos .word 2796662391 622 1.1 christos 623 1.1 christos 624 1.1 christos # Compute AES ctr result. 625 1.1 christos .word 801902167 626 1.1 christos 627 1.1 christos bnez t4, 1f 628 1.1 christos 629 1.1 christos ## without padding 630 1.1 christos # Store ciphertext/plaintext 631 1.1 christos .word 33943079 632 1.1 christos j 2f 633 1.1 christos 634 1.1 christos ## with padding 635 1.1 christos 1: 636 1.1 christos # Store ciphertext/plaintext using mask 637 1.1 christos .word 388647 638 1.1 christos 639 1.1 christos # Fill zero for the padding blocks 640 1.1 christos .word 154071127 641 1.1 christos .word 1577074263 642 1.1 christos 643 1.1 christos # We have used mask register for `INPUT_PADDING_MASK` before. We need to 644 1.1 christos # setup the ctr mask back. 645 1.1 christos # ctr mask : [000100010001....] 646 1.1 christos .word 201879639 647 1.1 christos li t1, 0b10001000 648 1.1 christos .word 1577271383 649 1.1 christos 2: 650 1.1 christos 651 1.1 christos 652 1.1 christos 653 1.1 christos add a0, a0, t0 654 1.1 christos add a1, a1, t0 655 1.1 christos 656 1.1 christos 657 1.1 christos .word 220754007 658 1.1 christos 659 1.1 christos .Lenc_blocks_256: 660 1.1 christos # Compute the partial tags. 661 1.1 christos # The partial tags will multiply with [H^n, H^n, ..., H^n] 662 1.1 christos # [tag0, tag1, ...] = 663 1.1 christos # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] 664 1.1 christos # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. 665 1.1 christos beqz t5, .Lenc_blocks_256_end 666 1.1 christos .word 3004050039 667 1.1 christos 668 1.1 christos .word 86536279 669 1.1 christos # Increase ctr in v12. 670 1.1 christos .word 13616727 671 1.1 christos sub t5, t5, a6 672 1.1 christos # Load plaintext into v24 673 1.1 christos .word 220229719 674 1.1 christos .word 33909767 675 1.1 christos # Prepare the AES ctr input into v28. 676 1.1 christos # The ctr data uses big-endian form. 677 1.1 christos .word 1577455191 678 1.1 christos add a0, a0, t0 679 1.1 christos .word 86011991 680 1.1 christos .word 1237626455 681 1.1 christos 682 1.1 christos 683 1.1 christos # Load key 3 684 1.1 christos .word 3439489111 685 1.1 christos addi t1, a3, 32 686 1.1 christos .word 33777031 687 1.1 christos .word 220754007 688 1.1 christos .word 2786307703 689 1.1 christos .word 2787192439 690 1.1 christos .word 2796629623 691 1.1 christos # Load key 6 692 1.1 christos .word 3439489111 693 1.1 christos addi t1, a3, 80 694 1.1 christos .word 33777031 695 1.1 christos .word 220754007 696 1.1 christos .word 2788241015 697 1.1 christos .word 2789289591 698 1.1 christos .word 2796629623 699 1.1 christos # Load key 9 700 1.1 christos .word 3439489111 701 1.1 christos addi t1, a3, 128 702 1.1 christos .word 33777031 703 1.1 christos .word 220754007 704 1.1 christos .word 2790338167 705 1.1 christos .word 2791386743 706 1.1 christos .word 2796629623 707 1.1 christos # Load key 12 708 1.1 christos .word 3439489111 709 1.1 christos addi t1, a3, 176 710 1.1 christos .word 33777031 711 1.1 christos .word 220754007 712 1.1 christos .word 2792435319 713 1.1 christos .word 2793483895 714 1.1 christos .word 2796629623 715 1.1 christos # Load key 15 716 1.1 christos .word 3439489111 717 1.1 christos addi t1, a3, 224 718 1.1 christos .word 33777031 719 1.1 christos .word 220754007 720 1.1 christos .word 2794532471 721 1.1 christos .word 2795581047 722 1.1 christos .word 2796662391 723 1.1 christos 724 1.1 christos 725 1.1 christos # Compute AES ctr ciphertext result. 726 1.1 christos .word 801902167 727 1.1 christos 728 1.1 christos # Store ciphertext 729 1.1 christos .word 33943079 730 1.1 christos add a1, a1, t0 731 1.1 christos 732 1.1 christos j .Lenc_blocks_256 733 1.1 christos .Lenc_blocks_256_end: 734 1.1 christos 735 1.1 christos # Add ciphertext into partial tag 736 1.1 christos .word 793643607 737 1.1 christos 738 1.1 christos .word 3441586263 739 1.1 christos # Update current ctr value to v12 740 1.1 christos .word 13616727 741 1.1 christos # Convert ctr to big-endian counter. 742 1.1 christos .word 1220847191 743 1.1 christos .word 484903 744 1.1 christos 745 1.1 christos 746 1.1 christos # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). 747 1.1 christos # Load H to v1 748 1.1 christos addi t1, a5, 32 749 1.1 christos .word 3439489111 750 1.1 christos .word 33775751 751 1.1 christos # Multiply H for each partial tag and XOR them together. 752 1.1 christos # Handle 1st partial tag 753 1.1 christos .word 1577713751 754 1.1 christos .word 2719522935 755 1.1 christos # Handle 2nd to N-th partial tags 756 1.1 christos li t1, 4 757 1.1 christos 1: 758 1.1 christos .word 3441586263 759 1.1 christos .word 1061372503 760 1.1 christos .word 3439489111 761 1.1 christos .word 2987532407 762 1.1 christos addi t1, t1, 4 763 1.1 christos blt t1, a6, 1b 764 1.1 christos 765 1.1 christos 766 1.1 christos # Save the final tag 767 1.1 christos .word 34070567 768 1.1 christos 769 1.1 christos # return the processed size. 770 1.1 christos slli a0, a7, 2 771 1.1 christos ret 772 1.1 christos .size aes_gcm_enc_blocks_256,.-aes_gcm_enc_blocks_256 773 1.1 christos .p2align 3 774 1.1 christos .globl rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt 775 1.1 christos .type rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt,@function 776 1.1 christos rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt: 777 1.1 christos srli t0, a2, 4 778 1.1 christos beqz t0, .Ldec_end 779 1.1 christos slli t5, t0, 2 780 1.1 christos 781 1.1 christos mv a7, t5 782 1.1 christos 783 1.1 christos # Compute the AES-GCM full-block e32 length for `LMUL=4`. We will handle 784 1.1 christos # the multiple AES-GCM blocks at the same time within `LMUL=4` register. 785 1.1 christos # The AES-GCM's SEW is e32 and EGW is 128 bits. 786 1.1 christos # FULL_BLOCK_LEN32 = (VLEN*LMUL)/(EGW) * (EGW/SEW) = (VLEN*4)/(32*4) * 4 787 1.1 christos # = (VLEN*4)/32 788 1.1 christos # We could get the block_num using the VL value of `vsetvli with e32, m4`. 789 1.1 christos .word 220231767 790 1.1 christos # If `LEN32 % FULL_BLOCK_LEN32` is not equal to zero, we could fill the 791 1.1 christos # zero padding data to make sure we could always handle FULL_BLOCK_LEN32 792 1.1 christos # blocks for all iterations. 793 1.1 christos 794 1.1 christos ## Prepare the H^n multiplier in v16 for GCM multiplier. The `n` is the gcm 795 1.1 christos ## block number in a LMUL=4 register group. 796 1.1 christos ## n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4)) 797 1.1 christos ## = (VLEN/32) 798 1.1 christos ## We could use vsetvli with `e32, m1` to compute the `n` number. 799 1.1 christos .word 218133207 800 1.1 christos 801 1.1 christos # The H is at `gcm128_context.Htable[0]`(addr(Xi)+16*2). 802 1.1 christos addi t1, a5, 32 803 1.1 christos .word 3439489111 804 1.1 christos .word 33779591 805 1.1 christos 806 1.1 christos # Compute the H^n 807 1.1 christos li t1, 1 808 1.1 christos 1: 809 1.1 christos .word 2750984183 810 1.1 christos slli t1, t1, 1 811 1.1 christos bltu t1, t0, 1b 812 1.1 christos 813 1.1 christos .word 220754007 814 1.1 christos .word 1577072727 815 1.1 christos .word 2817763447 816 1.1 christos 817 1.1 christos #### Load plaintext into v24 and handle padding. We also load the init tag 818 1.1 christos #### data into v20 and prepare the AES ctr input data into v12 and v28. 819 1.1 christos .word 1577073239 820 1.1 christos 821 1.1 christos ## Prepare the AES ctr input data into v12. 822 1.1 christos # Setup ctr input mask. 823 1.1 christos # ctr mask : [000100010001....] 824 1.1 christos # Note: The actual vl should be `FULL_BLOCK_LEN32/4 * 2`, but we just use 825 1.1 christos # `FULL_BLOCK_LEN32` here. 826 1.1 christos .word 201879639 827 1.1 christos li t0, 0b10001000 828 1.1 christos .word 1577238615 829 1.1 christos # Load IV. 830 1.1 christos .word 3439489111 831 1.1 christos .word 34041735 832 1.1 christos # Convert the big-endian counter into little-endian. 833 1.1 christos .word 3305271383 834 1.1 christos .word 1240772567 835 1.1 christos # Splat the `single block of IV` to v12 836 1.1 christos .word 220754007 837 1.1 christos .word 1577072215 838 1.1 christos .word 2817762935 839 1.1 christos # Prepare the ctr counter into v8 840 1.1 christos # v8: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...] 841 1.1 christos .word 1342710871 842 1.1 christos # Merge IV and ctr counter into v12. 843 1.1 christos # v12:[x, x, x, count+0, x, x, x, count+1, ...] 844 1.1 christos .word 86536279 845 1.1 christos .word 12846679 846 1.1 christos 847 1.1 christos li t4, 0 848 1.1 christos # Get the SEW32 size in the first round. 849 1.1 christos # If we have the non-zero value for `LEN32&(FULL_BLOCK_LEN32-1)`, then 850 1.1 christos # we will have the leading padding zero. 851 1.1 christos addi t0, a6, -1 852 1.1 christos and t0, t0, t5 853 1.1 christos beqz t0, 1f 854 1.1 christos 855 1.1 christos ## with padding 856 1.1 christos sub t5, t5, t0 857 1.1 christos sub t4, a6, t0 858 1.1 christos # padding block size 859 1.1 christos srli t1, t4, 2 860 1.1 christos # padding byte size 861 1.1 christos slli t2, t4, 2 862 1.1 christos 863 1.1 christos # Adjust the ctr counter to make the counter start from `counter+0` for the 864 1.1 christos # first non-padding block. 865 1.1 christos .word 86536279 866 1.1 christos .word 147015255 867 1.1 christos # Prepare the AES ctr input into v28. 868 1.1 christos # The ctr data uses big-endian form. 869 1.1 christos .word 1577455191 870 1.1 christos .word 1237626455 871 1.1 christos 872 1.1 christos # Prepare the mask for input loading in the first round. We use 873 1.1 christos # `VL=FULL_BLOCK_LEN32` with the mask in the first round. 874 1.1 christos # Adjust input ptr. 875 1.1 christos sub a0, a0, t2 876 1.1 christos # Adjust output ptr. 877 1.1 christos sub a1, a1, t2 878 1.1 christos .word 211316823 879 1.1 christos .word 1376297303 880 1.1 christos # We don't use the pseudo instruction `vmsgeu` here. Use `vmsgtu` instead. 881 1.1 christos # The original code is: 882 1.1 christos # vmsgeu.vx v0, v2, t4 883 1.1 christos addi t0, t4, -1 884 1.1 christos .word 2049097815 885 1.1 christos .word 220754007 886 1.1 christos .word 1577073751 887 1.1 christos # Load the input for length FULL_BLOCK_LEN32 with mask. 888 1.1 christos .word 86536279 889 1.1 christos .word 355335 890 1.1 christos 891 1.1 christos # Load the init `Xi` data to v20 with preceding zero padding. 892 1.1 christos # Adjust Xi ptr. 893 1.1 christos sub t0, a5, t2 894 1.1 christos # Load for length `zero-padding-e32-length + 4`. 895 1.1 christos addi t1, t4, 4 896 1.1 christos .word 19099735 897 1.1 christos .word 190983 898 1.1 christos j 2f 899 1.1 christos 900 1.1 christos 1: 901 1.1 christos ## without padding 902 1.1 christos sub t5, t5, a6 903 1.1 christos 904 1.1 christos .word 220754007 905 1.1 christos .word 33909767 906 1.1 christos 907 1.1 christos # Load the init Xi data to v20. 908 1.1 christos .word 3372380247 909 1.1 christos .word 34073095 910 1.1 christos 911 1.1 christos # Prepare the AES ctr input into v28. 912 1.1 christos # The ctr data uses big-endian form. 913 1.1 christos .word 86536279 914 1.1 christos .word 1577455191 915 1.1 christos .word 1237626455 916 1.1 christos 2: 917 1.1 christos 918 1.1 christos 919 1.1 christos # Load number of rounds 920 1.1 christos lwu t0, 240(a3) 921 1.1 christos li t1, 14 922 1.1 christos li t2, 12 923 1.1 christos li t3, 10 924 1.1 christos 925 1.1 christos beq t0, t1, aes_gcm_dec_blocks_256 926 1.1 christos beq t0, t2, aes_gcm_dec_blocks_192 927 1.1 christos beq t0, t3, aes_gcm_dec_blocks_128 928 1.1 christos 929 1.1 christos .Ldec_end: 930 1.1 christos li a0, 0 931 1.1 christos ret 932 1.1 christos .size rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt,.-rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt 933 1.1 christos .p2align 3 934 1.1 christos aes_gcm_dec_blocks_128: 935 1.1 christos srli t6, a6, 2 936 1.1 christos slli t0, a6, 2 937 1.1 christos 938 1.1 christos # Load all 11 aes round keys to v1-v11 registers. 939 1.1 christos .word 3439489111 940 1.1 christos .word 34005127 941 1.1 christos addi a3, a3, 16 942 1.1 christos .word 34005255 943 1.1 christos addi a3, a3, 16 944 1.1 christos .word 34005383 945 1.1 christos addi a3, a3, 16 946 1.1 christos .word 34005511 947 1.1 christos addi a3, a3, 16 948 1.1 christos .word 34005639 949 1.1 christos addi a3, a3, 16 950 1.1 christos .word 34005767 951 1.1 christos addi a3, a3, 16 952 1.1 christos .word 34005895 953 1.1 christos addi a3, a3, 16 954 1.1 christos .word 34006023 955 1.1 christos addi a3, a3, 16 956 1.1 christos .word 34006151 957 1.1 christos addi a3, a3, 16 958 1.1 christos .word 34006279 959 1.1 christos addi a3, a3, 16 960 1.1 christos .word 34006407 961 1.1 christos 962 1.1 christos # We already have the ciphertext/plaintext and ctr data for the first round. 963 1.1 christos .word 220754007 964 1.1 christos .word 2786307703 965 1.1 christos .word 2787192439 966 1.1 christos .word 2788241015 967 1.1 christos .word 2789289591 968 1.1 christos .word 2790338167 969 1.1 christos .word 2791386743 970 1.1 christos .word 2792435319 971 1.1 christos .word 2793483895 972 1.1 christos .word 2794532471 973 1.1 christos .word 2795581047 974 1.1 christos .word 2796662391 975 1.1 christos 976 1.1 christos 977 1.1 christos # Compute AES ctr result. 978 1.1 christos .word 801902167 979 1.1 christos 980 1.1 christos bnez t4, 1f 981 1.1 christos 982 1.1 christos ## without padding 983 1.1 christos # Store ciphertext/plaintext 984 1.1 christos .word 33943079 985 1.1 christos j 2f 986 1.1 christos 987 1.1 christos ## with padding 988 1.1 christos 1: 989 1.1 christos # Store ciphertext/plaintext using mask 990 1.1 christos .word 388647 991 1.1 christos 992 1.1 christos # Fill zero for the padding blocks 993 1.1 christos .word 154071127 994 1.1 christos .word 1577074263 995 1.1 christos 996 1.1 christos # We have used mask register for `INPUT_PADDING_MASK` before. We need to 997 1.1 christos # setup the ctr mask back. 998 1.1 christos # ctr mask : [000100010001....] 999 1.1 christos .word 201879639 1000 1.1 christos li t1, 0b10001000 1001 1.1 christos .word 1577271383 1002 1.1 christos 2: 1003 1.1 christos 1004 1.1 christos 1005 1.1 christos 1006 1.1 christos add a0, a0, t0 1007 1.1 christos add a1, a1, t0 1008 1.1 christos 1009 1.1 christos 1010 1.1 christos .word 220754007 1011 1.1 christos 1012 1.1 christos .Ldec_blocks_128: 1013 1.1 christos # Compute the partial tags. 1014 1.1 christos # The partial tags will multiply with [H^n, H^n, ..., H^n] 1015 1.1 christos # [tag0, tag1, ...] = 1016 1.1 christos # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] 1017 1.1 christos # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. 1018 1.1 christos beqz t5, .Ldec_blocks_256_end 1019 1.1 christos .word 3003918967 1020 1.1 christos 1021 1.1 christos .word 86536279 1022 1.1 christos # Increase ctr in v12. 1023 1.1 christos .word 13616727 1024 1.1 christos sub t5, t5, a6 1025 1.1 christos # Load plaintext into v24 1026 1.1 christos .word 220229719 1027 1.1 christos .word 33909767 1028 1.1 christos # Prepare the AES ctr input into v28. 1029 1.1 christos # The ctr data uses big-endian form. 1030 1.1 christos .word 1577455191 1031 1.1 christos add a0, a0, t0 1032 1.1 christos .word 86011991 1033 1.1 christos .word 1237626455 1034 1.1 christos 1035 1.1 christos 1036 1.1 christos .word 220754007 1037 1.1 christos .word 2786307703 1038 1.1 christos .word 2787192439 1039 1.1 christos .word 2788241015 1040 1.1 christos .word 2789289591 1041 1.1 christos .word 2790338167 1042 1.1 christos .word 2791386743 1043 1.1 christos .word 2792435319 1044 1.1 christos .word 2793483895 1045 1.1 christos .word 2794532471 1046 1.1 christos .word 2795581047 1047 1.1 christos .word 2796662391 1048 1.1 christos 1049 1.1 christos 1050 1.1 christos # Compute AES ctr plaintext result. 1051 1.1 christos .word 801902167 1052 1.1 christos 1053 1.1 christos # Store plaintext 1054 1.1 christos .word 33943079 1055 1.1 christos add a1, a1, t0 1056 1.1 christos 1057 1.1 christos j .Ldec_blocks_128 1058 1.1 christos .Ldec_blocks_128_end: 1059 1.1 christos 1060 1.1 christos # Add ciphertext into partial tag 1061 1.1 christos .word 793512535 1062 1.1 christos 1063 1.1 christos .word 3441586263 1064 1.1 christos # Update current ctr value to v12 1065 1.1 christos .word 13616727 1066 1.1 christos # Convert ctr to big-endian counter. 1067 1.1 christos .word 1220847191 1068 1.1 christos .word 484903 1069 1.1 christos 1070 1.1 christos 1071 1.1 christos # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). 1072 1.1 christos # Load H to v1 1073 1.1 christos addi t1, a5, 32 1074 1.1 christos .word 3439489111 1075 1.1 christos .word 33775751 1076 1.1 christos # Multiply H for each partial tag and XOR them together. 1077 1.1 christos # Handle 1st partial tag 1078 1.1 christos .word 1577713751 1079 1.1 christos .word 2719522935 1080 1.1 christos # Handle 2nd to N-th partial tags 1081 1.1 christos li t1, 4 1082 1.1 christos 1: 1083 1.1 christos .word 3441586263 1084 1.1 christos .word 1061372503 1085 1.1 christos .word 3439489111 1086 1.1 christos .word 2987532407 1087 1.1 christos addi t1, t1, 4 1088 1.1 christos blt t1, a6, 1b 1089 1.1 christos 1090 1.1 christos 1091 1.1 christos # Save the final tag 1092 1.1 christos .word 34070567 1093 1.1 christos 1094 1.1 christos # return the processed size. 1095 1.1 christos slli a0, a7, 2 1096 1.1 christos ret 1097 1.1 christos .size aes_gcm_dec_blocks_128,.-aes_gcm_dec_blocks_128 1098 1.1 christos .p2align 3 1099 1.1 christos aes_gcm_dec_blocks_192: 1100 1.1 christos srli t6, a6, 2 1101 1.1 christos slli t0, a6, 2 1102 1.1 christos 1103 1.1 christos # We run out of 32 vector registers, so we just preserve some round keys 1104 1.1 christos # and load the remaining round keys inside the aes body. 1105 1.1 christos # We keep the round keys for: 1106 1.1 christos # 1, 2, 3, 5, 6, 7, 9, 10, 11 and 12th keys. 1107 1.1 christos # The following keys will be loaded in the aes body: 1108 1.1 christos # 4, 8 and 13th keys. 1109 1.1 christos .word 3439489111 1110 1.1 christos # key 1 1111 1.1 christos .word 34005127 1112 1.1 christos # key 2 1113 1.1 christos addi t1, a3, 16 1114 1.1 christos .word 33775879 1115 1.1 christos # key 3 1116 1.1 christos addi t1, a3, 32 1117 1.1 christos .word 33776007 1118 1.1 christos # key 5 1119 1.1 christos addi t1, a3, 64 1120 1.1 christos .word 33776135 1121 1.1 christos # key 6 1122 1.1 christos addi t1, a3, 80 1123 1.1 christos .word 33776263 1124 1.1 christos # key 7 1125 1.1 christos addi t1, a3, 96 1126 1.1 christos .word 33776391 1127 1.1 christos # key 9 1128 1.1 christos addi t1, a3, 128 1129 1.1 christos .word 33776519 1130 1.1 christos # key 10 1131 1.1 christos addi t1, a3, 144 1132 1.1 christos .word 33776647 1133 1.1 christos # key 11 1134 1.1 christos addi t1, a3, 160 1135 1.1 christos .word 33776775 1136 1.1 christos # key 12 1137 1.1 christos addi t1, a3, 176 1138 1.1 christos .word 33776903 1139 1.1 christos 1140 1.1 christos # We already have the ciphertext/plaintext and ctr data for the first round. 1141 1.1 christos # Load key 4 1142 1.1 christos .word 3439489111 1143 1.1 christos addi t1, a3, 48 1144 1.1 christos .word 33777031 1145 1.1 christos .word 220754007 1146 1.1 christos .word 2786307703 1147 1.1 christos .word 2787192439 1148 1.1 christos .word 2788241015 1149 1.1 christos .word 2796629623 1150 1.1 christos # Load key 8 1151 1.1 christos .word 3439489111 1152 1.1 christos addi t1, a3, 112 1153 1.1 christos .word 33777031 1154 1.1 christos .word 220754007 1155 1.1 christos .word 2789289591 1156 1.1 christos .word 2790338167 1157 1.1 christos .word 2791386743 1158 1.1 christos .word 2796629623 1159 1.1 christos # Load key 13 1160 1.1 christos .word 3439489111 1161 1.1 christos addi t1, a3, 192 1162 1.1 christos .word 33777031 1163 1.1 christos .word 220754007 1164 1.1 christos .word 2792435319 1165 1.1 christos .word 2793483895 1166 1.1 christos .word 2794532471 1167 1.1 christos .word 2795581047 1168 1.1 christos .word 2796662391 1169 1.1 christos 1170 1.1 christos 1171 1.1 christos # Compute AES ctr result. 1172 1.1 christos .word 801902167 1173 1.1 christos 1174 1.1 christos bnez t4, 1f 1175 1.1 christos 1176 1.1 christos ## without padding 1177 1.1 christos # Store ciphertext/plaintext 1178 1.1 christos .word 33943079 1179 1.1 christos j 2f 1180 1.1 christos 1181 1.1 christos ## with padding 1182 1.1 christos 1: 1183 1.1 christos # Store ciphertext/plaintext using mask 1184 1.1 christos .word 388647 1185 1.1 christos 1186 1.1 christos # Fill zero for the padding blocks 1187 1.1 christos .word 154071127 1188 1.1 christos .word 1577074263 1189 1.1 christos 1190 1.1 christos # We have used mask register for `INPUT_PADDING_MASK` before. We need to 1191 1.1 christos # setup the ctr mask back. 1192 1.1 christos # ctr mask : [000100010001....] 1193 1.1 christos .word 201879639 1194 1.1 christos li t1, 0b10001000 1195 1.1 christos .word 1577271383 1196 1.1 christos 2: 1197 1.1 christos 1198 1.1 christos 1199 1.1 christos 1200 1.1 christos add a0, a0, t0 1201 1.1 christos add a1, a1, t0 1202 1.1 christos 1203 1.1 christos 1204 1.1 christos .word 220754007 1205 1.1 christos 1206 1.1 christos .Ldec_blocks_192: 1207 1.1 christos # Compute the partial tags. 1208 1.1 christos # The partial tags will multiply with [H^n, H^n, ..., H^n] 1209 1.1 christos # [tag0, tag1, ...] = 1210 1.1 christos # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] 1211 1.1 christos # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. 1212 1.1 christos beqz t5, .Ldec_blocks_192_end 1213 1.1 christos .word 3003918967 1214 1.1 christos 1215 1.1 christos .word 86536279 1216 1.1 christos # Increase ctr in v12. 1217 1.1 christos .word 13616727 1218 1.1 christos sub t5, t5, a6 1219 1.1 christos # Load plaintext into v24 1220 1.1 christos .word 220229719 1221 1.1 christos .word 33909767 1222 1.1 christos # Prepare the AES ctr input into v28. 1223 1.1 christos # The ctr data uses big-endian form. 1224 1.1 christos .word 1577455191 1225 1.1 christos add a0, a0, t0 1226 1.1 christos .word 86011991 1227 1.1 christos .word 1237626455 1228 1.1 christos 1229 1.1 christos 1230 1.1 christos # Load key 4 1231 1.1 christos .word 3439489111 1232 1.1 christos addi t1, a3, 48 1233 1.1 christos .word 33777031 1234 1.1 christos .word 220754007 1235 1.1 christos .word 2786307703 1236 1.1 christos .word 2787192439 1237 1.1 christos .word 2788241015 1238 1.1 christos .word 2796629623 1239 1.1 christos # Load key 8 1240 1.1 christos .word 3439489111 1241 1.1 christos addi t1, a3, 112 1242 1.1 christos .word 33777031 1243 1.1 christos .word 220754007 1244 1.1 christos .word 2789289591 1245 1.1 christos .word 2790338167 1246 1.1 christos .word 2791386743 1247 1.1 christos .word 2796629623 1248 1.1 christos # Load key 13 1249 1.1 christos .word 3439489111 1250 1.1 christos addi t1, a3, 192 1251 1.1 christos .word 33777031 1252 1.1 christos .word 220754007 1253 1.1 christos .word 2792435319 1254 1.1 christos .word 2793483895 1255 1.1 christos .word 2794532471 1256 1.1 christos .word 2795581047 1257 1.1 christos .word 2796662391 1258 1.1 christos 1259 1.1 christos 1260 1.1 christos # Compute AES ctr plaintext result. 1261 1.1 christos .word 801902167 1262 1.1 christos 1263 1.1 christos # Store plaintext 1264 1.1 christos .word 33943079 1265 1.1 christos add a1, a1, t0 1266 1.1 christos 1267 1.1 christos j .Ldec_blocks_192 1268 1.1 christos .Ldec_blocks_192_end: 1269 1.1 christos 1270 1.1 christos # Add ciphertext into partial tag 1271 1.1 christos .word 793512535 1272 1.1 christos 1273 1.1 christos .word 3441586263 1274 1.1 christos # Update current ctr value to v12 1275 1.1 christos .word 13616727 1276 1.1 christos # Convert ctr to big-endian counter. 1277 1.1 christos .word 1220847191 1278 1.1 christos .word 484903 1279 1.1 christos 1280 1.1 christos 1281 1.1 christos # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). 1282 1.1 christos # Load H to v1 1283 1.1 christos addi t1, a5, 32 1284 1.1 christos .word 3439489111 1285 1.1 christos .word 33775751 1286 1.1 christos # Multiply H for each partial tag and XOR them together. 1287 1.1 christos # Handle 1st partial tag 1288 1.1 christos .word 1577713751 1289 1.1 christos .word 2719522935 1290 1.1 christos # Handle 2nd to N-th partial tags 1291 1.1 christos li t1, 4 1292 1.1 christos 1: 1293 1.1 christos .word 3441586263 1294 1.1 christos .word 1061372503 1295 1.1 christos .word 3439489111 1296 1.1 christos .word 2987532407 1297 1.1 christos addi t1, t1, 4 1298 1.1 christos blt t1, a6, 1b 1299 1.1 christos 1300 1.1 christos 1301 1.1 christos # Save the final tag 1302 1.1 christos .word 34070567 1303 1.1 christos 1304 1.1 christos # return the processed size. 1305 1.1 christos slli a0, a7, 2 1306 1.1 christos ret 1307 1.1 christos .size aes_gcm_dec_blocks_192,.-aes_gcm_dec_blocks_192 1308 1.1 christos .p2align 3 1309 1.1 christos aes_gcm_dec_blocks_256: 1310 1.1 christos srli t6, a6, 2 1311 1.1 christos slli t0, a6, 2 1312 1.1 christos 1313 1.1 christos # We run out of 32 vector registers, so we just preserve some round keys 1314 1.1 christos # and load the remaining round keys inside the aes body. 1315 1.1 christos # We keep the round keys for: 1316 1.1 christos # 1, 2, 4, 5, 7, 8, 10, 11, 13 and 14th keys. 1317 1.1 christos # The following keys will be loaded in the aes body: 1318 1.1 christos # 3, 6, 9, 12 and 15th keys. 1319 1.1 christos .word 3439489111 1320 1.1 christos # key 1 1321 1.1 christos .word 34005127 1322 1.1 christos # key 2 1323 1.1 christos addi t1, a3, 16 1324 1.1 christos .word 33775879 1325 1.1 christos # key 4 1326 1.1 christos addi t1, a3, 48 1327 1.1 christos .word 33776007 1328 1.1 christos # key 5 1329 1.1 christos addi t1, a3, 64 1330 1.1 christos .word 33776135 1331 1.1 christos # key 7 1332 1.1 christos addi t1, a3, 96 1333 1.1 christos .word 33776263 1334 1.1 christos # key 8 1335 1.1 christos addi t1, a3, 112 1336 1.1 christos .word 33776391 1337 1.1 christos # key 10 1338 1.1 christos addi t1, a3, 144 1339 1.1 christos .word 33776519 1340 1.1 christos # key 11 1341 1.1 christos addi t1, a3, 160 1342 1.1 christos .word 33776647 1343 1.1 christos # key 13 1344 1.1 christos addi t1, a3, 192 1345 1.1 christos .word 33776775 1346 1.1 christos # key 14 1347 1.1 christos addi t1, a3, 208 1348 1.1 christos .word 33776903 1349 1.1 christos 1350 1.1 christos # We already have the ciphertext/plaintext and ctr data for the first round. 1351 1.1 christos # Load key 3 1352 1.1 christos .word 3439489111 1353 1.1 christos addi t1, a3, 32 1354 1.1 christos .word 33777031 1355 1.1 christos .word 220754007 1356 1.1 christos .word 2786307703 1357 1.1 christos .word 2787192439 1358 1.1 christos .word 2796629623 1359 1.1 christos # Load key 6 1360 1.1 christos .word 3439489111 1361 1.1 christos addi t1, a3, 80 1362 1.1 christos .word 33777031 1363 1.1 christos .word 220754007 1364 1.1 christos .word 2788241015 1365 1.1 christos .word 2789289591 1366 1.1 christos .word 2796629623 1367 1.1 christos # Load key 9 1368 1.1 christos .word 3439489111 1369 1.1 christos addi t1, a3, 128 1370 1.1 christos .word 33777031 1371 1.1 christos .word 220754007 1372 1.1 christos .word 2790338167 1373 1.1 christos .word 2791386743 1374 1.1 christos .word 2796629623 1375 1.1 christos # Load key 12 1376 1.1 christos .word 3439489111 1377 1.1 christos addi t1, a3, 176 1378 1.1 christos .word 33777031 1379 1.1 christos .word 220754007 1380 1.1 christos .word 2792435319 1381 1.1 christos .word 2793483895 1382 1.1 christos .word 2796629623 1383 1.1 christos # Load key 15 1384 1.1 christos .word 3439489111 1385 1.1 christos addi t1, a3, 224 1386 1.1 christos .word 33777031 1387 1.1 christos .word 220754007 1388 1.1 christos .word 2794532471 1389 1.1 christos .word 2795581047 1390 1.1 christos .word 2796662391 1391 1.1 christos 1392 1.1 christos 1393 1.1 christos # Compute AES ctr result. 1394 1.1 christos .word 801902167 1395 1.1 christos 1396 1.1 christos bnez t4, 1f 1397 1.1 christos 1398 1.1 christos ## without padding 1399 1.1 christos # Store ciphertext/plaintext 1400 1.1 christos .word 33943079 1401 1.1 christos j 2f 1402 1.1 christos 1403 1.1 christos ## with padding 1404 1.1 christos 1: 1405 1.1 christos # Store ciphertext/plaintext using mask 1406 1.1 christos .word 388647 1407 1.1 christos 1408 1.1 christos # Fill zero for the padding blocks 1409 1.1 christos .word 154071127 1410 1.1 christos .word 1577074263 1411 1.1 christos 1412 1.1 christos # We have used mask register for `INPUT_PADDING_MASK` before. We need to 1413 1.1 christos # setup the ctr mask back. 1414 1.1 christos # ctr mask : [000100010001....] 1415 1.1 christos .word 201879639 1416 1.1 christos li t1, 0b10001000 1417 1.1 christos .word 1577271383 1418 1.1 christos 2: 1419 1.1 christos 1420 1.1 christos 1421 1.1 christos 1422 1.1 christos add a0, a0, t0 1423 1.1 christos add a1, a1, t0 1424 1.1 christos 1425 1.1 christos 1426 1.1 christos .word 220754007 1427 1.1 christos 1428 1.1 christos .Ldec_blocks_256: 1429 1.1 christos # Compute the partial tags. 1430 1.1 christos # The partial tags will multiply with [H^n, H^n, ..., H^n] 1431 1.1 christos # [tag0, tag1, ...] = 1432 1.1 christos # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] 1433 1.1 christos # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. 1434 1.1 christos beqz t5, .Ldec_blocks_256_end 1435 1.1 christos .word 3003918967 1436 1.1 christos 1437 1.1 christos .word 86536279 1438 1.1 christos # Increase ctr in v12. 1439 1.1 christos .word 13616727 1440 1.1 christos sub t5, t5, a6 1441 1.1 christos # Load plaintext into v24 1442 1.1 christos .word 220229719 1443 1.1 christos .word 33909767 1444 1.1 christos # Prepare the AES ctr input into v28. 1445 1.1 christos # The ctr data uses big-endian form. 1446 1.1 christos .word 1577455191 1447 1.1 christos add a0, a0, t0 1448 1.1 christos .word 86011991 1449 1.1 christos .word 1237626455 1450 1.1 christos 1451 1.1 christos 1452 1.1 christos # Load key 3 1453 1.1 christos .word 3439489111 1454 1.1 christos addi t1, a3, 32 1455 1.1 christos .word 33777031 1456 1.1 christos .word 220754007 1457 1.1 christos .word 2786307703 1458 1.1 christos .word 2787192439 1459 1.1 christos .word 2796629623 1460 1.1 christos # Load key 6 1461 1.1 christos .word 3439489111 1462 1.1 christos addi t1, a3, 80 1463 1.1 christos .word 33777031 1464 1.1 christos .word 220754007 1465 1.1 christos .word 2788241015 1466 1.1 christos .word 2789289591 1467 1.1 christos .word 2796629623 1468 1.1 christos # Load key 9 1469 1.1 christos .word 3439489111 1470 1.1 christos addi t1, a3, 128 1471 1.1 christos .word 33777031 1472 1.1 christos .word 220754007 1473 1.1 christos .word 2790338167 1474 1.1 christos .word 2791386743 1475 1.1 christos .word 2796629623 1476 1.1 christos # Load key 12 1477 1.1 christos .word 3439489111 1478 1.1 christos addi t1, a3, 176 1479 1.1 christos .word 33777031 1480 1.1 christos .word 220754007 1481 1.1 christos .word 2792435319 1482 1.1 christos .word 2793483895 1483 1.1 christos .word 2796629623 1484 1.1 christos # Load key 15 1485 1.1 christos .word 3439489111 1486 1.1 christos addi t1, a3, 224 1487 1.1 christos .word 33777031 1488 1.1 christos .word 220754007 1489 1.1 christos .word 2794532471 1490 1.1 christos .word 2795581047 1491 1.1 christos .word 2796662391 1492 1.1 christos 1493 1.1 christos 1494 1.1 christos # Compute AES ctr plaintext result. 1495 1.1 christos .word 801902167 1496 1.1 christos 1497 1.1 christos # Store plaintext 1498 1.1 christos .word 33943079 1499 1.1 christos add a1, a1, t0 1500 1.1 christos 1501 1.1 christos j .Ldec_blocks_256 1502 1.1 christos .Ldec_blocks_256_end: 1503 1.1 christos 1504 1.1 christos # Add ciphertext into partial tag 1505 1.1 christos .word 793512535 1506 1.1 christos 1507 1.1 christos .word 3441586263 1508 1.1 christos # Update current ctr value to v12 1509 1.1 christos .word 13616727 1510 1.1 christos # Convert ctr to big-endian counter. 1511 1.1 christos .word 1220847191 1512 1.1 christos .word 484903 1513 1.1 christos 1514 1.1 christos 1515 1.1 christos # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). 1516 1.1 christos # Load H to v1 1517 1.1 christos addi t1, a5, 32 1518 1.1 christos .word 3439489111 1519 1.1 christos .word 33775751 1520 1.1 christos # Multiply H for each partial tag and XOR them together. 1521 1.1 christos # Handle 1st partial tag 1522 1.1 christos .word 1577713751 1523 1.1 christos .word 2719522935 1524 1.1 christos # Handle 2nd to N-th partial tags 1525 1.1 christos li t1, 4 1526 1.1 christos 1: 1527 1.1 christos .word 3441586263 1528 1.1 christos .word 1061372503 1529 1.1 christos .word 3439489111 1530 1.1 christos .word 2987532407 1531 1.1 christos addi t1, t1, 4 1532 1.1 christos blt t1, a6, 1b 1533 1.1 christos 1534 1.1 christos 1535 1.1 christos # Save the final tag 1536 1.1 christos .word 34070567 1537 1.1 christos 1538 1.1 christos # return the processed size. 1539 1.1 christos slli a0, a7, 2 1540 1.1 christos ret 1541 1.1 christos .size aes_gcm_dec_blocks_256,.-aes_gcm_dec_blocks_256 1542