1 #include "arm_arch.h" 2 3 #if __ARM_MAX_ARCH__>=8 4 .arch armv8-a+crypto 5 .text 6 .globl unroll8_eor3_aes_gcm_enc_128_kernel 7 .type unroll8_eor3_aes_gcm_enc_128_kernel,%function 8 .align 4 9 unroll8_eor3_aes_gcm_enc_128_kernel: 10 AARCH64_VALID_CALL_TARGET 11 cbz x1, .L128_enc_ret 12 stp d8, d9, [sp, #-80]! 13 lsr x9, x1, #3 14 mov x16, x4 15 mov x8, x5 16 stp d10, d11, [sp, #16] 17 stp d12, d13, [sp, #32] 18 stp d14, d15, [sp, #48] 19 mov x5, #0xc200000000000000 20 stp x5, xzr, [sp, #64] 21 add x10, sp, #64 22 23 mov x15, #0x100000000 //set up counter increment 24 movi v31.16b, #0x0 25 mov v31.d[1], x15 26 mov x5, x9 27 ld1 { v0.16b}, [x16] //CTR block 0 28 29 sub x5, x5, #1 //byte_len - 1 30 31 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 32 33 rev32 v30.16b, v0.16b //set up reversed counter 34 35 add v30.4s, v30.4s, v31.4s //CTR block 0 36 37 rev32 v1.16b, v30.16b //CTR block 1 38 add v30.4s, v30.4s, v31.4s //CTR block 1 39 40 rev32 v2.16b, v30.16b //CTR block 2 41 add v30.4s, v30.4s, v31.4s //CTR block 2 42 43 rev32 v3.16b, v30.16b //CTR block 3 44 add v30.4s, v30.4s, v31.4s //CTR block 3 45 46 rev32 v4.16b, v30.16b //CTR block 4 47 add v30.4s, v30.4s, v31.4s //CTR block 4 48 49 rev32 v5.16b, v30.16b //CTR block 5 50 add v30.4s, v30.4s, v31.4s //CTR block 5 51 ldp q26, q27, [x8, #0] //load rk0, rk1 52 53 rev32 v6.16b, v30.16b //CTR block 6 54 add v30.4s, v30.4s, v31.4s //CTR block 6 55 56 rev32 v7.16b, v30.16b //CTR block 7 57 add v30.4s, v30.4s, v31.4s //CTR block 7 58 59 aese v4.16b, v26.16b 60 aesmc v4.16b, v4.16b //AES block 4 - round 0 61 aese v6.16b, v26.16b 62 aesmc v6.16b, v6.16b //AES block 6 - round 0 63 aese v3.16b, v26.16b 64 aesmc v3.16b, v3.16b //AES block 3 - round 0 65 66 aese v0.16b, v26.16b 67 aesmc v0.16b, v0.16b //AES block 0 - round 0 68 aese v1.16b, v26.16b 69 aesmc v1.16b, v1.16b //AES block 1 - round 0 70 aese v2.16b, v26.16b 71 aesmc v2.16b, v2.16b //AES block 2 - round 0 72 73 aese v7.16b, v26.16b 74 aesmc v7.16b, v7.16b //AES block 7 - round 0 75 aese v5.16b, v26.16b 76 aesmc v5.16b, v5.16b //AES block 5 - round 0 77 ldp q28, q26, [x8, #32] //load rk2, rk3 78 79 aese v3.16b, v27.16b 80 aesmc v3.16b, v3.16b //AES block 3 - round 1 81 82 aese v7.16b, v27.16b 83 aesmc v7.16b, v7.16b //AES block 7 - round 1 84 aese v5.16b, v27.16b 85 aesmc v5.16b, v5.16b //AES block 5 - round 1 86 aese v4.16b, v27.16b 87 aesmc v4.16b, v4.16b //AES block 4 - round 1 88 89 aese v2.16b, v27.16b 90 aesmc v2.16b, v2.16b //AES block 2 - round 1 91 aese v6.16b, v27.16b 92 aesmc v6.16b, v6.16b //AES block 6 - round 1 93 aese v0.16b, v27.16b 94 aesmc v0.16b, v0.16b //AES block 0 - round 1 95 96 aese v5.16b, v28.16b 97 aesmc v5.16b, v5.16b //AES block 5 - round 2 98 aese v1.16b, v27.16b 99 aesmc v1.16b, v1.16b //AES block 1 - round 1 100 aese v0.16b, v28.16b 101 aesmc v0.16b, v0.16b //AES block 0 - round 2 102 103 aese v2.16b, v28.16b 104 aesmc v2.16b, v2.16b //AES block 2 - round 2 105 aese v3.16b, v28.16b 106 aesmc v3.16b, v3.16b //AES block 3 - round 2 107 aese v7.16b, v28.16b 108 aesmc v7.16b, v7.16b //AES block 7 - round 2 109 110 aese v1.16b, v28.16b 111 aesmc v1.16b, v1.16b //AES block 1 - round 2 112 aese v6.16b, v28.16b 113 aesmc v6.16b, v6.16b //AES block 6 - round 2 114 aese v4.16b, v28.16b 115 aesmc v4.16b, v4.16b //AES block 4 - round 2 116 117 aese v2.16b, v26.16b 118 aesmc v2.16b, v2.16b //AES block 2 - round 3 119 120 ldp q27, q28, [x8, #64] //load rk4, rk5 121 aese v5.16b, v26.16b 122 aesmc v5.16b, v5.16b //AES block 5 - round 3 123 aese v0.16b, v26.16b 124 aesmc v0.16b, v0.16b //AES block 0 - round 3 125 126 aese v4.16b, v26.16b 127 aesmc v4.16b, v4.16b //AES block 4 - round 3 128 aese v3.16b, v26.16b 129 aesmc v3.16b, v3.16b //AES block 3 - round 3 130 aese v6.16b, v26.16b 131 aesmc v6.16b, v6.16b //AES block 6 - round 3 132 133 aese v7.16b, v26.16b 134 aesmc v7.16b, v7.16b //AES block 7 - round 3 135 136 aese v6.16b, v27.16b 137 aesmc v6.16b, v6.16b //AES block 6 - round 4 138 aese v1.16b, v26.16b 139 aesmc v1.16b, v1.16b //AES block 1 - round 3 140 aese v5.16b, v27.16b 141 aesmc v5.16b, v5.16b //AES block 5 - round 4 142 143 aese v7.16b, v27.16b 144 aesmc v7.16b, v7.16b //AES block 7 - round 4 145 aese v4.16b, v27.16b 146 aesmc v4.16b, v4.16b //AES block 4 - round 4 147 aese v0.16b, v27.16b 148 aesmc v0.16b, v0.16b //AES block 0 - round 4 149 150 aese v1.16b, v27.16b 151 aesmc v1.16b, v1.16b //AES block 1 - round 4 152 aese v2.16b, v27.16b 153 aesmc v2.16b, v2.16b //AES block 2 - round 4 154 aese v3.16b, v27.16b 155 aesmc v3.16b, v3.16b //AES block 3 - round 4 156 157 aese v7.16b, v28.16b 158 aesmc v7.16b, v7.16b //AES block 7 - round 5 159 aese v0.16b, v28.16b 160 aesmc v0.16b, v0.16b //AES block 0 - round 5 161 ldp q26, q27, [x8, #96] //load rk6, rk7 162 163 aese v1.16b, v28.16b 164 aesmc v1.16b, v1.16b //AES block 1 - round 5 165 aese v3.16b, v28.16b 166 aesmc v3.16b, v3.16b //AES block 3 - round 5 167 aese v2.16b, v28.16b 168 aesmc v2.16b, v2.16b //AES block 2 - round 5 169 170 aese v4.16b, v28.16b 171 aesmc v4.16b, v4.16b //AES block 4 - round 5 172 aese v5.16b, v28.16b 173 aesmc v5.16b, v5.16b //AES block 5 - round 5 174 aese v6.16b, v28.16b 175 aesmc v6.16b, v6.16b //AES block 6 - round 5 176 177 aese v4.16b, v26.16b 178 aesmc v4.16b, v4.16b //AES block 4 - round 6 179 aese v3.16b, v26.16b 180 aesmc v3.16b, v3.16b //AES block 3 - round 6 181 aese v2.16b, v26.16b 182 aesmc v2.16b, v2.16b //AES block 2 - round 6 183 184 aese v7.16b, v26.16b 185 aesmc v7.16b, v7.16b //AES block 7 - round 6 186 aese v6.16b, v26.16b 187 aesmc v6.16b, v6.16b //AES block 6 - round 6 188 aese v5.16b, v26.16b 189 aesmc v5.16b, v5.16b //AES block 5 - round 6 190 191 aese v0.16b, v26.16b 192 aesmc v0.16b, v0.16b //AES block 0 - round 6 193 aese v1.16b, v26.16b 194 aesmc v1.16b, v1.16b //AES block 1 - round 6 195 ldp q28, q26, [x8, #128] //load rk8, rk9 196 197 aese v5.16b, v27.16b 198 aesmc v5.16b, v5.16b //AES block 5 - round 7 199 200 ld1 { v19.16b}, [x3] 201 ext v19.16b, v19.16b, v19.16b, #8 202 rev64 v19.16b, v19.16b 203 204 aese v7.16b, v27.16b 205 aesmc v7.16b, v7.16b //AES block 7 - round 7 206 207 aese v4.16b, v27.16b 208 aesmc v4.16b, v4.16b //AES block 4 - round 7 209 aese v3.16b, v27.16b 210 aesmc v3.16b, v3.16b //AES block 3 - round 7 211 aese v6.16b, v27.16b 212 aesmc v6.16b, v6.16b //AES block 6 - round 7 213 214 aese v1.16b, v27.16b 215 aesmc v1.16b, v1.16b //AES block 1 - round 7 216 aese v2.16b, v27.16b 217 aesmc v2.16b, v2.16b //AES block 2 - round 7 218 aese v0.16b, v27.16b 219 aesmc v0.16b, v0.16b //AES block 0 - round 7 220 221 aese v3.16b, v28.16b 222 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 223 aese v6.16b, v28.16b 224 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 225 aese v2.16b, v28.16b 226 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 227 228 aese v7.16b, v28.16b 229 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 230 aese v0.16b, v28.16b 231 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 232 ldr q27, [x8, #160] //load rk10 233 234 aese v3.16b, v26.16b //AES block 8k+11 - round 9 235 aese v4.16b, v28.16b 236 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 237 aese v2.16b, v26.16b //AES block 8k+10 - round 9 238 239 aese v5.16b, v28.16b 240 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 241 aese v1.16b, v28.16b 242 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 243 aese v6.16b, v26.16b //AES block 8k+14 - round 9 244 245 aese v4.16b, v26.16b //AES block 8k+12 - round 9 246 add x5, x5, x0 247 aese v0.16b, v26.16b //AES block 8k+8 - round 9 248 249 aese v7.16b, v26.16b //AES block 8k+15 - round 9 250 aese v5.16b, v26.16b //AES block 8k+13 - round 9 251 aese v1.16b, v26.16b //AES block 8k+9 - round 9 252 253 add x4, x0, x1, lsr #3 //end_input_ptr 254 cmp x0, x5 //check if we have <= 8 blocks 255 b.ge .L128_enc_tail //handle tail 256 257 ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext 258 259 ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext 260 261 ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 262 263 ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 264 cmp x0, x5 //check if we have <= 8 blocks 265 266 .inst 0xce006d08 //eor3 v8.16b, v8.16b, v0.16b, v27.16b //AES block 0 - result 267 rev32 v0.16b, v30.16b //CTR block 8 268 add v30.4s, v30.4s, v31.4s //CTR block 8 269 270 .inst 0xce016d29 //eor3 v9.16b, v9.16b, v1.16b, v27.16b //AES block 1 - result 271 stp q8, q9, [x2], #32 //AES block 0, 1 - store result 272 273 rev32 v1.16b, v30.16b //CTR block 9 274 .inst 0xce056dad //eor3 v13.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result 275 add v30.4s, v30.4s, v31.4s //CTR block 9 276 277 .inst 0xce026d4a //eor3 v10.16b, v10.16b, v2.16b, v27.16b //AES block 2 - result 278 .inst 0xce066dce //eor3 v14.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result 279 .inst 0xce046d8c //eor3 v12.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result 280 281 rev32 v2.16b, v30.16b //CTR block 10 282 add v30.4s, v30.4s, v31.4s //CTR block 10 283 284 .inst 0xce036d6b //eor3 v11.16b, v11.16b, v3.16b, v27.16b //AES block 3 - result 285 .inst 0xce076def //eor3 v15.16b, v15.16b, v7.16b,v27.16b //AES block 7 - result 286 stp q10, q11, [x2], #32 //AES block 2, 3 - store result 287 288 rev32 v3.16b, v30.16b //CTR block 11 289 add v30.4s, v30.4s, v31.4s //CTR block 11 290 stp q12, q13, [x2], #32 //AES block 4, 5 - store result 291 292 stp q14, q15, [x2], #32 //AES block 6, 7 - store result 293 294 rev32 v4.16b, v30.16b //CTR block 12 295 add v30.4s, v30.4s, v31.4s //CTR block 12 296 b.ge .L128_enc_prepretail //do prepretail 297 298 .L128_enc_main_loop: //main loop start 299 rev32 v5.16b, v30.16b //CTR block 8k+13 300 ldr q20, [x3, #128] //load h5l | h5h 301 ext v20.16b, v20.16b, v20.16b, #8 302 ldr q22, [x3, #160] //load h6l | h6h 303 ext v22.16b, v22.16b, v22.16b, #8 304 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 305 306 rev64 v9.16b, v9.16b //GHASH block 8k+1 307 rev64 v8.16b, v8.16b //GHASH block 8k 308 ldr q23, [x3, #176] //load h7l | h7h 309 ext v23.16b, v23.16b, v23.16b, #8 310 ldr q25, [x3, #208] //load h8l | h8h 311 ext v25.16b, v25.16b, v25.16b, #8 312 313 rev32 v6.16b, v30.16b //CTR block 8k+14 314 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 315 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 316 317 ldr q21, [x3, #144] //load h6k | h5k 318 ldr q24, [x3, #192] //load h8k | h7k 319 rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 320 rev64 v11.16b, v11.16b //GHASH block 8k+3 321 322 ldp q26, q27, [x8, #0] //load rk0, rk1 323 eor v8.16b, v8.16b, v19.16b //PRE 1 324 rev32 v7.16b, v30.16b //CTR block 8k+15 325 326 rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 327 328 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 329 rev64 v10.16b, v10.16b //GHASH block 8k+2 330 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 331 332 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 333 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 334 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 335 336 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 337 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 338 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 339 340 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 341 ldr q23, [x3, #80] //load h3l | h3h 342 ext v23.16b, v23.16b, v23.16b, #8 343 ldr q25, [x3, #112] //load h3l | h3h 344 ext v25.16b, v25.16b, v25.16b, #8 345 aese v5.16b, v26.16b 346 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 347 348 aese v1.16b, v26.16b 349 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 350 aese v4.16b, v26.16b 351 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 352 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 353 354 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 355 aese v2.16b, v26.16b 356 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 357 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 358 359 aese v6.16b, v26.16b 360 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 361 aese v1.16b, v27.16b 362 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 363 aese v0.16b, v26.16b 364 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 365 366 aese v2.16b, v27.16b 367 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 368 aese v3.16b, v26.16b 369 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 370 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 371 372 aese v5.16b, v27.16b 373 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 374 aese v7.16b, v26.16b 375 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 376 aese v0.16b, v27.16b 377 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 378 379 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b,v9.16b //GHASH block 8k+2, 8k+3 - high 380 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 381 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 382 383 ldp q28, q26, [x8, #32] //load rk2, rk3 384 aese v4.16b, v27.16b 385 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 386 aese v3.16b, v27.16b 387 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 388 389 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 390 aese v7.16b, v27.16b 391 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 392 aese v6.16b, v27.16b 393 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 394 395 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 396 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 397 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 398 399 rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 400 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 401 402 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 403 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 404 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 405 406 aese v5.16b, v28.16b 407 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 408 aese v4.16b, v28.16b 409 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 410 aese v2.16b, v28.16b 411 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 412 413 aese v1.16b, v28.16b 414 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 415 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 416 aese v6.16b, v28.16b 417 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 418 419 aese v0.16b, v28.16b 420 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 421 aese v3.16b, v28.16b 422 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 423 aese v7.16b, v28.16b 424 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 425 426 aese v6.16b, v26.16b 427 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 428 ldr q21, [x3, #48] //load h2k | h1k 429 ldr q24, [x3, #96] //load h4k | h3k 430 rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 431 432 ldp q27, q28, [x8, #64] //load rk4, rk5 433 aese v2.16b, v26.16b 434 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 435 aese v1.16b, v26.16b 436 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 437 438 ldr q20, [x3, #32] //load h1l | h1h 439 ext v20.16b, v20.16b, v20.16b, #8 440 ldr q22, [x3, #64] //load h1l | h1h 441 ext v22.16b, v22.16b, v22.16b, #8 442 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 443 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 444 445 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 446 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 447 448 aese v0.16b, v26.16b 449 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 450 aese v3.16b, v26.16b 451 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 452 453 aese v7.16b, v26.16b 454 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 455 aese v4.16b, v26.16b 456 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 457 458 aese v5.16b, v26.16b 459 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 460 aese v0.16b, v27.16b 461 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 462 463 aese v7.16b, v27.16b 464 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 465 aese v3.16b, v27.16b 466 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 467 aese v4.16b, v27.16b 468 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 469 470 aese v5.16b, v27.16b 471 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 472 aese v6.16b, v27.16b 473 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 474 aese v1.16b, v27.16b 475 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 476 477 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 478 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 479 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 480 481 aese v2.16b, v27.16b 482 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 483 ldp q26, q27, [x8, #96] //load rk6, rk7 484 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 485 486 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 487 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 488 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 489 490 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 491 aese v2.16b, v28.16b 492 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 493 aese v5.16b, v28.16b 494 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 495 496 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 497 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 498 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 499 500 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 501 aese v6.16b, v28.16b 502 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 503 504 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 505 aese v7.16b, v28.16b 506 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 507 aese v1.16b, v28.16b 508 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 509 510 aese v3.16b, v28.16b 511 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 512 aese v4.16b, v28.16b 513 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 514 aese v0.16b, v28.16b 515 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 516 517 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 518 ldr d16, [x10] //MODULO - load modulo constant 519 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 520 521 aese v7.16b, v26.16b 522 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 523 aese v5.16b, v26.16b 524 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 525 526 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 527 aese v1.16b, v26.16b 528 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 529 aese v2.16b, v26.16b 530 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 531 532 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 533 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 534 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext 535 536 aese v3.16b, v26.16b 537 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 538 rev32 v20.16b, v30.16b //CTR block 8k+16 539 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 540 541 aese v4.16b, v26.16b 542 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 543 aese v0.16b, v26.16b 544 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 545 aese v6.16b, v26.16b 546 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 547 548 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 549 ldp q28, q26, [x8, #128] //load rk8, rk9 550 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 551 552 aese v2.16b, v27.16b 553 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 554 aese v7.16b, v27.16b 555 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 556 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext 557 558 aese v5.16b, v27.16b 559 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 560 aese v6.16b, v27.16b 561 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 562 aese v1.16b, v27.16b 563 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 564 565 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 566 aese v0.16b, v27.16b 567 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 568 aese v4.16b, v27.16b 569 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 570 571 rev32 v22.16b, v30.16b //CTR block 8k+17 572 aese v3.16b, v27.16b 573 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 574 575 aese v5.16b, v28.16b 576 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 577 ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load plaintext 578 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 579 580 aese v2.16b, v28.16b 581 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 582 aese v1.16b, v28.16b 583 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 584 aese v7.16b, v28.16b 585 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 586 587 aese v4.16b, v28.16b 588 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 589 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 590 ldr q27, [x8, #160] //load rk10 591 592 ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 593 rev32 v23.16b, v30.16b //CTR block 8k+18 594 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 595 aese v3.16b, v28.16b 596 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 597 598 aese v0.16b, v28.16b 599 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 600 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 601 aese v6.16b, v28.16b 602 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 603 604 aese v2.16b, v26.16b //AES block 8k+10 - round 9 605 aese v4.16b, v26.16b //AES block 8k+12 - round 9 606 aese v1.16b, v26.16b //AES block 8k+9 - round 9 607 608 ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load plaintext 609 rev32 v25.16b, v30.16b //CTR block 8k+19 610 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 611 612 cmp x0, x5 //.LOOP CONTROL 613 .inst 0xce046d8c //eor3 v12.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result 614 aese v7.16b, v26.16b //AES block 8k+15 - round 9 615 616 aese v6.16b, v26.16b //AES block 8k+14 - round 9 617 aese v3.16b, v26.16b //AES block 8k+11 - round 9 618 619 .inst 0xce026d4a //eor3 v10.16b, v10.16b, v2.16b, v27.16b //AES block 8k+10 - result 620 621 mov v2.16b, v23.16b //CTR block 8k+18 622 aese v0.16b, v26.16b //AES block 8k+8 - round 9 623 624 rev32 v4.16b, v30.16b //CTR block 8k+20 625 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 626 627 .inst 0xce076def //eor3 v15.16b, v15.16b, v7.16b, v27.16b //AES block 7 - result 628 aese v5.16b, v26.16b //AES block 8k+13 - round 9 629 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 630 631 .inst 0xce016d29 //eor3 v9.16b, v9.16b, v1.16b, v27.16b //AES block 8k+9 - result 632 .inst 0xce036d6b //eor3 v11.16b, v11.16b, v3.16b, v27.16b //AES block 8k+11 - result 633 mov v3.16b, v25.16b //CTR block 8k+19 634 635 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 636 .inst 0xce056dad //eor3 v13.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result 637 mov v1.16b, v22.16b //CTR block 8k+17 638 639 .inst 0xce006d08 //eor3 v8.16b, v8.16b, v0.16b, v27.16b //AES block 8k+8 - result 640 mov v0.16b, v20.16b //CTR block 8k+16 641 stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result 642 643 stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result 644 .inst 0xce066dce //eor3 v14.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result 645 646 stp q12, q13, [x2], #32 //AES block 8k+12, 8k+13 - store result 647 .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 648 649 stp q14, q15, [x2], #32 //AES block 8k+14, 8k+15 - store result 650 b.lt .L128_enc_main_loop 651 652 .L128_enc_prepretail: //PREPRETAIL 653 rev32 v5.16b, v30.16b //CTR block 8k+13 654 ldr q23, [x3, #176] //load h7l | h7h 655 ext v23.16b, v23.16b, v23.16b, #8 656 ldr q25, [x3, #208] //load h8l | h8h 657 ext v25.16b, v25.16b, v25.16b, #8 658 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 659 660 ldr q20, [x3, #128] //load h5l | h5h 661 ext v20.16b, v20.16b, v20.16b, #8 662 ldr q22, [x3, #160] //load h6l | h6h 663 ext v22.16b, v22.16b, v22.16b, #8 664 rev64 v8.16b, v8.16b //GHASH block 8k 665 rev64 v9.16b, v9.16b //GHASH block 8k+1 666 667 ldr q21, [x3, #144] //load h6k | h5k 668 ldr q24, [x3, #192] //load h6k | h5k 669 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 670 rev64 v11.16b, v11.16b //GHASH block 8k+3 671 672 rev64 v10.16b, v10.16b //GHASH block 8k+2 673 eor v8.16b, v8.16b, v19.16b //PRE 1 674 675 rev32 v6.16b, v30.16b //CTR block 8k+14 676 677 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 678 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 679 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 680 681 rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 682 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 683 684 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 685 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 686 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 687 688 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 689 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 690 691 ldp q26, q27, [x8, #0] //load rk0, rk1 692 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 693 694 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 695 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 696 697 rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 698 rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 699 700 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 701 702 rev32 v7.16b, v30.16b //CTR block 8k+15 703 704 rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 705 706 aese v2.16b, v26.16b 707 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 708 709 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 710 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 711 712 aese v6.16b, v26.16b 713 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 714 aese v3.16b, v26.16b 715 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 716 717 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 718 aese v1.16b, v26.16b 719 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 720 721 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 722 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 723 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 724 725 aese v5.16b, v26.16b 726 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 727 aese v7.16b, v26.16b 728 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 729 730 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 731 aese v4.16b, v26.16b 732 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 733 aese v0.16b, v26.16b 734 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 735 736 aese v3.16b, v27.16b 737 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 738 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 739 740 ldr q23, [x3, #80] //load h3l | h3h 741 ext v23.16b, v23.16b, v23.16b, #8 742 ldr q25, [x3, #112] //load h4l | h4h 743 ext v25.16b, v25.16b, v25.16b, #8 744 745 ldp q28, q26, [x8, #32] //load rk2, rk3 746 aese v5.16b, v27.16b 747 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 748 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 749 750 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 751 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 752 753 aese v1.16b, v27.16b 754 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 755 aese v0.16b, v27.16b 756 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 757 758 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 759 ldr q21, [x3, #48] //load h2k | h1k 760 ldr q24, [x3, #96] //load h4k | h3k 761 aese v2.16b, v27.16b 762 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 763 764 aese v4.16b, v27.16b 765 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 766 aese v7.16b, v27.16b 767 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 768 769 aese v5.16b, v28.16b 770 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 771 aese v2.16b, v28.16b 772 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 773 aese v3.16b, v28.16b 774 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 775 776 aese v1.16b, v28.16b 777 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 778 aese v6.16b, v27.16b 779 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 780 aese v4.16b, v28.16b 781 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 782 783 aese v5.16b, v26.16b 784 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 785 aese v0.16b, v28.16b 786 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 787 788 aese v6.16b, v28.16b 789 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 790 aese v7.16b, v28.16b 791 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 792 ldp q27, q28, [x8, #64] //load rk4, rk5 793 794 ldr q20, [x3, #32] //load h1l | h1h 795 ext v20.16b, v20.16b, v20.16b, #8 796 ldr q22, [x3, #64] //load h1l | h1h 797 ext v22.16b, v22.16b, v22.16b, #8 798 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 799 aese v0.16b, v26.16b 800 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 801 802 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 803 aese v6.16b, v26.16b 804 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 805 aese v3.16b, v26.16b 806 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 807 808 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 809 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 810 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 811 812 aese v2.16b, v26.16b 813 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 814 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 815 816 aese v7.16b, v26.16b 817 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 818 aese v1.16b, v26.16b 819 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 820 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 821 822 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 823 aese v4.16b, v26.16b 824 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 825 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 826 827 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 828 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 829 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 830 831 aese v1.16b, v27.16b 832 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 833 aese v3.16b, v27.16b 834 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 835 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 836 837 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 838 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 839 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 840 841 aese v1.16b, v28.16b 842 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 843 aese v6.16b, v27.16b 844 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 845 aese v0.16b, v27.16b 846 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 847 848 aese v7.16b, v27.16b 849 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 850 aese v2.16b, v27.16b 851 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 852 853 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 854 aese v4.16b, v27.16b 855 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 856 aese v5.16b, v27.16b 857 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 858 859 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 860 ldp q26, q27, [x8, #96] //load rk6, rk7 861 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 862 863 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 864 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 865 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 866 867 aese v0.16b, v28.16b 868 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 869 aese v7.16b, v28.16b 870 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 871 ldr d16, [x10] //MODULO - load modulo constant 872 873 aese v2.16b, v28.16b 874 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 875 aese v4.16b, v28.16b 876 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 877 878 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 879 aese v5.16b, v28.16b 880 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 881 aese v6.16b, v28.16b 882 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 883 884 aese v3.16b, v28.16b 885 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 886 aese v4.16b, v26.16b 887 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 888 889 aese v5.16b, v26.16b 890 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 891 aese v2.16b, v26.16b 892 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 893 aese v0.16b, v26.16b 894 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 895 896 aese v3.16b, v26.16b 897 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 898 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 899 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 900 901 aese v6.16b, v26.16b 902 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 903 aese v1.16b, v26.16b 904 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 905 aese v7.16b, v26.16b 906 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 907 908 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 909 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 910 ldp q28, q26, [x8, #128] //load rk8, rk9 911 912 aese v3.16b, v27.16b 913 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 914 aese v6.16b, v27.16b 915 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 916 aese v1.16b, v27.16b 917 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 918 ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 919 920 aese v5.16b, v27.16b 921 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 922 aese v0.16b, v27.16b 923 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 924 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 925 926 aese v2.16b, v27.16b 927 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 928 aese v7.16b, v27.16b 929 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 930 931 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 932 aese v4.16b, v27.16b 933 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 934 935 aese v7.16b, v28.16b 936 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 937 aese v2.16b, v28.16b 938 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 939 aese v1.16b, v28.16b 940 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 941 ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 942 943 aese v6.16b, v28.16b 944 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 945 .inst 0xce114a73 //eor3 v19.16b, v19.16b, v17.16b, v18.16b //MODULO - fold into low 946 aese v4.16b, v28.16b 947 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 948 949 aese v3.16b, v28.16b 950 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 951 aese v0.16b, v28.16b 952 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 953 aese v5.16b, v28.16b 954 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 955 956 ldr q27, [x8, #160] //load rk10 957 aese v6.16b, v26.16b //AES block 8k+14 - round 9 958 aese v2.16b, v26.16b //AES block 8k+10 - round 9 959 960 aese v0.16b, v26.16b //AES block 8k+8 - round 9 961 aese v1.16b, v26.16b //AES block 8k+9 - round 9 962 963 aese v3.16b, v26.16b //AES block 8k+11 - round 9 964 aese v5.16b, v26.16b //AES block 8k+13 - round 9 965 966 aese v4.16b, v26.16b //AES block 8k+12 - round 9 967 aese v7.16b, v26.16b //AES block 8k+15 - round 9 968 .L128_enc_tail: //TAIL 969 970 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 971 ldr q8, [x0], #16 //AES block 8k+8 - load plaintext 972 973 mov v29.16b, v27.16b 974 ldp q20, q21, [x3, #128] //load h5l | h5h 975 ext v20.16b, v20.16b, v20.16b, #8 976 977 .inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result 978 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 979 ldp q22, q23, [x3, #160] //load h6l | h6h 980 ext v22.16b, v22.16b, v22.16b, #8 981 ext v23.16b, v23.16b, v23.16b, #8 982 983 ldp q24, q25, [x3, #192] //load h8k | h7k 984 ext v25.16b, v25.16b, v25.16b, #8 985 cmp x5, #112 986 b.gt .L128_enc_blocks_more_than_7 987 988 mov v7.16b, v6.16b 989 mov v6.16b, v5.16b 990 movi v17.8b, #0 991 992 cmp x5, #96 993 sub v30.4s, v30.4s, v31.4s 994 mov v5.16b, v4.16b 995 996 mov v4.16b, v3.16b 997 mov v3.16b, v2.16b 998 mov v2.16b, v1.16b 999 1000 movi v19.8b, #0 1001 movi v18.8b, #0 1002 b.gt .L128_enc_blocks_more_than_6 1003 1004 mov v7.16b, v6.16b 1005 cmp x5, #80 1006 1007 sub v30.4s, v30.4s, v31.4s 1008 mov v6.16b, v5.16b 1009 mov v5.16b, v4.16b 1010 1011 mov v4.16b, v3.16b 1012 mov v3.16b, v1.16b 1013 b.gt .L128_enc_blocks_more_than_5 1014 1015 cmp x5, #64 1016 sub v30.4s, v30.4s, v31.4s 1017 1018 mov v7.16b, v6.16b 1019 mov v6.16b, v5.16b 1020 1021 mov v5.16b, v4.16b 1022 mov v4.16b, v1.16b 1023 b.gt .L128_enc_blocks_more_than_4 1024 1025 mov v7.16b, v6.16b 1026 sub v30.4s, v30.4s, v31.4s 1027 mov v6.16b, v5.16b 1028 1029 mov v5.16b, v1.16b 1030 cmp x5, #48 1031 b.gt .L128_enc_blocks_more_than_3 1032 1033 sub v30.4s, v30.4s, v31.4s 1034 mov v7.16b, v6.16b 1035 mov v6.16b, v1.16b 1036 1037 cmp x5, #32 1038 ldr q24, [x3, #96] //load h4k | h3k 1039 b.gt .L128_enc_blocks_more_than_2 1040 1041 cmp x5, #16 1042 1043 sub v30.4s, v30.4s, v31.4s 1044 mov v7.16b, v1.16b 1045 b.gt .L128_enc_blocks_more_than_1 1046 1047 ldr q21, [x3, #48] //load h2k | h1k 1048 sub v30.4s, v30.4s, v31.4s 1049 b .L128_enc_blocks_less_than_1 1050 .L128_enc_blocks_more_than_7: //blocks left > 7 1051 st1 { v9.16b}, [x2], #16 //AES final-7 block - store result 1052 1053 rev64 v8.16b, v9.16b //GHASH final-7 block 1054 ldr q9, [x0], #16 //AES final-6 block - load plaintext 1055 1056 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1057 1058 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 1059 1060 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 1061 1062 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 1063 1064 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 1065 movi v16.8b, #0 //suppress further partial tag feed in 1066 1067 .inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 1068 1069 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 1070 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 1071 .L128_enc_blocks_more_than_6: //blocks left > 6 1072 1073 st1 { v9.16b}, [x2], #16 //AES final-6 block - store result 1074 1075 rev64 v8.16b, v9.16b //GHASH final-6 block 1076 ldr q9, [x0], #16 //AES final-5 block - load plaintext 1077 1078 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1079 1080 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 1081 1082 .inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 1083 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 1084 1085 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 1086 movi v16.8b, #0 //suppress further partial tag feed in 1087 1088 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 1089 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 1090 1091 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 1092 1093 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 1094 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 1095 .L128_enc_blocks_more_than_5: //blocks left > 5 1096 1097 st1 { v9.16b}, [x2], #16 //AES final-5 block - store result 1098 1099 rev64 v8.16b, v9.16b //GHASH final-5 block 1100 1101 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1102 1103 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 1104 ldr q9, [x0], #16 //AES final-4 block - load plaintext 1105 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 1106 1107 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 1108 1109 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 1110 1111 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 1112 1113 .inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 1114 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 1115 movi v16.8b, #0 //suppress further partial tag feed in 1116 1117 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 1118 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 1119 1120 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 1121 .L128_enc_blocks_more_than_4: //blocks left > 4 1122 1123 st1 { v9.16b}, [x2], #16 //AES final-4 block - store result 1124 1125 rev64 v8.16b, v9.16b //GHASH final-4 block 1126 1127 ldr q9, [x0], #16 //AES final-3 block - load plaintext 1128 1129 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1130 1131 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 1132 movi v16.8b, #0 //suppress further partial tag feed in 1133 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 1134 1135 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 1136 1137 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 1138 1139 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 1140 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 1141 1142 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 1143 1144 .inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 1145 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 1146 .L128_enc_blocks_more_than_3: //blocks left > 3 1147 1148 st1 { v9.16b}, [x2], #16 //AES final-3 block - store result 1149 1150 ldr q25, [x3, #112] //load h4l | h4h 1151 ext v25.16b, v25.16b, v25.16b, #8 1152 1153 rev64 v8.16b, v9.16b //GHASH final-3 block 1154 1155 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1156 movi v16.8b, #0 //suppress further partial tag feed in 1157 1158 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 1159 ldr q24, [x3, #96] //load h4k | h3k 1160 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 1161 1162 ldr q9, [x0], #16 //AES final-2 block - load plaintext 1163 1164 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 1165 1166 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 1167 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 1168 1169 .inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 1170 1171 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 1172 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 1173 1174 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 1175 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 1176 .L128_enc_blocks_more_than_2: //blocks left > 2 1177 1178 st1 { v9.16b}, [x2], #16 //AES final-2 block - store result 1179 1180 rev64 v8.16b, v9.16b //GHASH final-2 block 1181 1182 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1183 1184 ldr q9, [x0], #16 //AES final-1 block - load plaintext 1185 1186 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 1187 ldr q23, [x3, #80] //load h3l | h3h 1188 ext v23.16b, v23.16b, v23.16b, #8 1189 movi v16.8b, #0 //suppress further partial tag feed in 1190 1191 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 1192 .inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 1193 1194 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 1195 1196 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 1197 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 1198 1199 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 1200 1201 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 1202 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 1203 .L128_enc_blocks_more_than_1: //blocks left > 1 1204 1205 st1 { v9.16b}, [x2], #16 //AES final-1 block - store result 1206 1207 ldr q22, [x3, #64] //load h2l | h2h 1208 ext v22.16b, v22.16b, v22.16b, #8 1209 rev64 v8.16b, v9.16b //GHASH final-1 block 1210 ldr q9, [x0], #16 //AES final block - load plaintext 1211 1212 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1213 1214 movi v16.8b, #0 //suppress further partial tag feed in 1215 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 1216 .inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result 1217 1218 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 1219 1220 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 1221 1222 ldr q21, [x3, #48] //load h2k | h1k 1223 1224 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 1225 1226 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 1227 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 1228 1229 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 1230 1231 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 1232 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 1233 .L128_enc_blocks_less_than_1: //blocks left <= 1 1234 1235 rev32 v30.16b, v30.16b 1236 str q30, [x16] //store the updated counter 1237 and x1, x1, #127 //bit_length %= 128 1238 1239 sub x1, x1, #128 //bit_length -= 128 1240 1241 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 1242 1243 mvn x6, xzr //temp0_x = 0xffffffffffffffff 1244 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 1245 and x1, x1, #127 //bit_length %= 128 1246 1247 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 1248 mvn x7, xzr //temp1_x = 0xffffffffffffffff 1249 cmp x1, #64 1250 1251 csel x13, x7, x6, lt 1252 csel x14, x6, xzr, lt 1253 1254 mov v0.d[1], x14 1255 mov v0.d[0], x13 //ctr0b is mask for last block 1256 1257 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 1258 1259 rev64 v8.16b, v9.16b //GHASH final block 1260 1261 bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 1262 st1 { v9.16b}, [x2] //store all 16B 1263 1264 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1265 1266 ins v16.d[0], v8.d[1] //GHASH final block - mid 1267 1268 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 1269 ldr q20, [x3, #32] //load h1l | h1h 1270 ext v20.16b, v20.16b, v20.16b, #8 1271 1272 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 1273 1274 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 1275 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 1276 ldr d16, [x10] //MODULO - load modulo constant 1277 1278 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 1279 1280 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 1281 1282 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 1283 1284 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 1285 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 1286 1287 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 1288 1289 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 1290 1291 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 1292 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 1293 1294 .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 1295 ext v19.16b, v19.16b, v19.16b, #8 1296 rev64 v19.16b, v19.16b 1297 st1 { v19.16b }, [x3] 1298 mov x0, x9 1299 1300 ldp d10, d11, [sp, #16] 1301 ldp d12, d13, [sp, #32] 1302 ldp d14, d15, [sp, #48] 1303 ldp d8, d9, [sp], #80 1304 ret 1305 1306 .L128_enc_ret: 1307 mov w0, #0x0 1308 ret 1309 .size unroll8_eor3_aes_gcm_enc_128_kernel,.-unroll8_eor3_aes_gcm_enc_128_kernel 1310 .globl unroll8_eor3_aes_gcm_dec_128_kernel 1311 .type unroll8_eor3_aes_gcm_dec_128_kernel,%function 1312 .align 4 1313 unroll8_eor3_aes_gcm_dec_128_kernel: 1314 AARCH64_VALID_CALL_TARGET 1315 cbz x1, .L128_dec_ret 1316 stp d8, d9, [sp, #-80]! 1317 lsr x9, x1, #3 1318 mov x16, x4 1319 mov x8, x5 1320 stp d10, d11, [sp, #16] 1321 stp d12, d13, [sp, #32] 1322 stp d14, d15, [sp, #48] 1323 mov x5, #0xc200000000000000 1324 stp x5, xzr, [sp, #64] 1325 add x10, sp, #64 1326 1327 mov x5, x9 1328 ld1 { v0.16b}, [x16] //CTR block 0 1329 1330 ldp q26, q27, [x8, #0] //load rk0, rk1 1331 sub x5, x5, #1 //byte_len - 1 1332 1333 mov x15, #0x100000000 //set up counter increment 1334 movi v31.16b, #0x0 1335 mov v31.d[1], x15 1336 ld1 { v19.16b}, [x3] 1337 ext v19.16b, v19.16b, v19.16b, #8 1338 rev64 v19.16b, v19.16b 1339 1340 rev32 v30.16b, v0.16b //set up reversed counter 1341 1342 aese v0.16b, v26.16b 1343 aesmc v0.16b, v0.16b //AES block 0 - round 0 1344 1345 add v30.4s, v30.4s, v31.4s //CTR block 0 1346 1347 rev32 v1.16b, v30.16b //CTR block 1 1348 add v30.4s, v30.4s, v31.4s //CTR block 1 1349 1350 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 1351 1352 rev32 v2.16b, v30.16b //CTR block 2 1353 add v30.4s, v30.4s, v31.4s //CTR block 2 1354 aese v1.16b, v26.16b 1355 aesmc v1.16b, v1.16b //AES block 1 - round 0 1356 1357 rev32 v3.16b, v30.16b //CTR block 3 1358 add v30.4s, v30.4s, v31.4s //CTR block 3 1359 1360 aese v0.16b, v27.16b 1361 aesmc v0.16b, v0.16b //AES block 0 - round 1 1362 aese v1.16b, v27.16b 1363 aesmc v1.16b, v1.16b //AES block 1 - round 1 1364 1365 rev32 v4.16b, v30.16b //CTR block 4 1366 add v30.4s, v30.4s, v31.4s //CTR block 4 1367 1368 rev32 v5.16b, v30.16b //CTR block 5 1369 add v30.4s, v30.4s, v31.4s //CTR block 5 1370 1371 aese v2.16b, v26.16b 1372 aesmc v2.16b, v2.16b //AES block 2 - round 0 1373 1374 rev32 v6.16b, v30.16b //CTR block 6 1375 add v30.4s, v30.4s, v31.4s //CTR block 6 1376 aese v5.16b, v26.16b 1377 aesmc v5.16b, v5.16b //AES block 5 - round 0 1378 1379 aese v3.16b, v26.16b 1380 aesmc v3.16b, v3.16b //AES block 3 - round 0 1381 aese v4.16b, v26.16b 1382 aesmc v4.16b, v4.16b //AES block 4 - round 0 1383 1384 rev32 v7.16b, v30.16b //CTR block 7 1385 1386 aese v6.16b, v26.16b 1387 aesmc v6.16b, v6.16b //AES block 6 - round 0 1388 aese v2.16b, v27.16b 1389 aesmc v2.16b, v2.16b //AES block 2 - round 1 1390 1391 aese v7.16b, v26.16b 1392 aesmc v7.16b, v7.16b //AES block 7 - round 0 1393 1394 ldp q28, q26, [x8, #32] //load rk2, rk3 1395 1396 aese v6.16b, v27.16b 1397 aesmc v6.16b, v6.16b //AES block 6 - round 1 1398 aese v5.16b, v27.16b 1399 aesmc v5.16b, v5.16b //AES block 5 - round 1 1400 1401 aese v4.16b, v27.16b 1402 aesmc v4.16b, v4.16b //AES block 4 - round 1 1403 aese v7.16b, v27.16b 1404 aesmc v7.16b, v7.16b //AES block 7 - round 1 1405 1406 aese v7.16b, v28.16b 1407 aesmc v7.16b, v7.16b //AES block 7 - round 2 1408 aese v0.16b, v28.16b 1409 aesmc v0.16b, v0.16b //AES block 0 - round 2 1410 aese v3.16b, v27.16b 1411 aesmc v3.16b, v3.16b //AES block 3 - round 1 1412 1413 aese v6.16b, v28.16b 1414 aesmc v6.16b, v6.16b //AES block 6 - round 2 1415 aese v2.16b, v28.16b 1416 aesmc v2.16b, v2.16b //AES block 2 - round 2 1417 aese v5.16b, v28.16b 1418 aesmc v5.16b, v5.16b //AES block 5 - round 2 1419 1420 aese v4.16b, v28.16b 1421 aesmc v4.16b, v4.16b //AES block 4 - round 2 1422 aese v3.16b, v28.16b 1423 aesmc v3.16b, v3.16b //AES block 3 - round 2 1424 aese v1.16b, v28.16b 1425 aesmc v1.16b, v1.16b //AES block 1 - round 2 1426 1427 aese v6.16b, v26.16b 1428 aesmc v6.16b, v6.16b //AES block 6 - round 3 1429 aese v2.16b, v26.16b 1430 aesmc v2.16b, v2.16b //AES block 2 - round 3 1431 1432 ldp q27, q28, [x8, #64] //load rk4, rk5 1433 aese v5.16b, v26.16b 1434 aesmc v5.16b, v5.16b //AES block 5 - round 3 1435 1436 aese v0.16b, v26.16b 1437 aesmc v0.16b, v0.16b //AES block 0 - round 3 1438 aese v7.16b, v26.16b 1439 aesmc v7.16b, v7.16b //AES block 7 - round 3 1440 1441 aese v3.16b, v26.16b 1442 aesmc v3.16b, v3.16b //AES block 3 - round 3 1443 aese v1.16b, v26.16b 1444 aesmc v1.16b, v1.16b //AES block 1 - round 3 1445 1446 aese v0.16b, v27.16b 1447 aesmc v0.16b, v0.16b //AES block 0 - round 4 1448 aese v7.16b, v27.16b 1449 aesmc v7.16b, v7.16b //AES block 7 - round 4 1450 aese v4.16b, v26.16b 1451 aesmc v4.16b, v4.16b //AES block 4 - round 3 1452 1453 aese v6.16b, v27.16b 1454 aesmc v6.16b, v6.16b //AES block 6 - round 4 1455 aese v1.16b, v27.16b 1456 aesmc v1.16b, v1.16b //AES block 1 - round 4 1457 aese v3.16b, v27.16b 1458 aesmc v3.16b, v3.16b //AES block 3 - round 4 1459 1460 aese v5.16b, v27.16b 1461 aesmc v5.16b, v5.16b //AES block 5 - round 4 1462 aese v4.16b, v27.16b 1463 aesmc v4.16b, v4.16b //AES block 4 - round 4 1464 aese v2.16b, v27.16b 1465 aesmc v2.16b, v2.16b //AES block 2 - round 4 1466 1467 ldp q26, q27, [x8, #96] //load rk6, rk7 1468 aese v2.16b, v28.16b 1469 aesmc v2.16b, v2.16b //AES block 2 - round 5 1470 aese v3.16b, v28.16b 1471 aesmc v3.16b, v3.16b //AES block 3 - round 5 1472 1473 aese v6.16b, v28.16b 1474 aesmc v6.16b, v6.16b //AES block 6 - round 5 1475 aese v1.16b, v28.16b 1476 aesmc v1.16b, v1.16b //AES block 1 - round 5 1477 1478 aese v7.16b, v28.16b 1479 aesmc v7.16b, v7.16b //AES block 7 - round 5 1480 aese v5.16b, v28.16b 1481 aesmc v5.16b, v5.16b //AES block 5 - round 5 1482 1483 aese v4.16b, v28.16b 1484 aesmc v4.16b, v4.16b //AES block 4 - round 5 1485 1486 aese v3.16b, v26.16b 1487 aesmc v3.16b, v3.16b //AES block 3 - round 6 1488 aese v2.16b, v26.16b 1489 aesmc v2.16b, v2.16b //AES block 2 - round 6 1490 aese v0.16b, v28.16b 1491 aesmc v0.16b, v0.16b //AES block 0 - round 5 1492 1493 aese v5.16b, v26.16b 1494 aesmc v5.16b, v5.16b //AES block 5 - round 6 1495 aese v4.16b, v26.16b 1496 aesmc v4.16b, v4.16b //AES block 4 - round 6 1497 aese v1.16b, v26.16b 1498 aesmc v1.16b, v1.16b //AES block 1 - round 6 1499 1500 aese v0.16b, v26.16b 1501 aesmc v0.16b, v0.16b //AES block 0 - round 6 1502 aese v7.16b, v26.16b 1503 aesmc v7.16b, v7.16b //AES block 7 - round 6 1504 aese v6.16b, v26.16b 1505 aesmc v6.16b, v6.16b //AES block 6 - round 6 1506 1507 aese v3.16b, v27.16b 1508 aesmc v3.16b, v3.16b //AES block 3 - round 7 1509 aese v4.16b, v27.16b 1510 aesmc v4.16b, v4.16b //AES block 4 - round 7 1511 aese v1.16b, v27.16b 1512 aesmc v1.16b, v1.16b //AES block 1 - round 7 1513 1514 aese v7.16b, v27.16b 1515 aesmc v7.16b, v7.16b //AES block 7 - round 7 1516 aese v5.16b, v27.16b 1517 aesmc v5.16b, v5.16b //AES block 5 - round 7 1518 ldp q28, q26, [x8, #128] //load rk8, rk9 1519 1520 aese v6.16b, v27.16b 1521 aesmc v6.16b, v6.16b //AES block 6 - round 7 1522 aese v2.16b, v27.16b 1523 aesmc v2.16b, v2.16b //AES block 2 - round 7 1524 aese v0.16b, v27.16b 1525 aesmc v0.16b, v0.16b //AES block 0 - round 7 1526 1527 add x5, x5, x0 1528 add v30.4s, v30.4s, v31.4s //CTR block 7 1529 1530 aese v6.16b, v28.16b 1531 aesmc v6.16b, v6.16b //AES block 6 - round 8 1532 aese v0.16b, v28.16b 1533 aesmc v0.16b, v0.16b //AES block 0 - round 8 1534 1535 aese v1.16b, v28.16b 1536 aesmc v1.16b, v1.16b //AES block 1 - round 8 1537 aese v7.16b, v28.16b 1538 aesmc v7.16b, v7.16b //AES block 7 - round 8 1539 aese v3.16b, v28.16b 1540 aesmc v3.16b, v3.16b //AES block 3 - round 8 1541 1542 aese v5.16b, v28.16b 1543 aesmc v5.16b, v5.16b //AES block 5 - round 8 1544 aese v2.16b, v28.16b 1545 aesmc v2.16b, v2.16b //AES block 2 - round 8 1546 aese v4.16b, v28.16b 1547 aesmc v4.16b, v4.16b //AES block 4 - round 8 1548 1549 aese v0.16b, v26.16b //AES block 0 - round 9 1550 aese v1.16b, v26.16b //AES block 1 - round 9 1551 aese v6.16b, v26.16b //AES block 6 - round 9 1552 1553 ldr q27, [x8, #160] //load rk10 1554 aese v4.16b, v26.16b //AES block 4 - round 9 1555 aese v3.16b, v26.16b //AES block 3 - round 9 1556 1557 aese v2.16b, v26.16b //AES block 2 - round 9 1558 aese v5.16b, v26.16b //AES block 5 - round 9 1559 aese v7.16b, v26.16b //AES block 7 - round 9 1560 1561 add x4, x0, x1, lsr #3 //end_input_ptr 1562 cmp x0, x5 //check if we have <= 8 blocks 1563 b.ge .L128_dec_tail //handle tail 1564 1565 ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext 1566 1567 .inst 0xce006d00 //eor3 v0.16b, v8.16b, v0.16b, v27.16b //AES block 0 - result 1568 .inst 0xce016d21 //eor3 v1.16b, v9.16b, v1.16b, v27.16b //AES block 1 - result 1569 stp q0, q1, [x2], #32 //AES block 0, 1 - store result 1570 1571 rev32 v0.16b, v30.16b //CTR block 8 1572 add v30.4s, v30.4s, v31.4s //CTR block 8 1573 ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext 1574 1575 ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext 1576 1577 rev32 v1.16b, v30.16b //CTR block 9 1578 add v30.4s, v30.4s, v31.4s //CTR block 9 1579 ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext 1580 1581 .inst 0xce036d63 //eor3 v3.16b, v11.16b, v3.16b, v27.16b //AES block 3 - result 1582 .inst 0xce026d42 //eor3 v2.16b, v10.16b, v2.16b, v27.16b //AES block 2 - result 1583 stp q2, q3, [x2], #32 //AES block 2, 3 - store result 1584 1585 rev32 v2.16b, v30.16b //CTR block 10 1586 add v30.4s, v30.4s, v31.4s //CTR block 10 1587 1588 .inst 0xce066dc6 //eor3 v6.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result 1589 1590 rev32 v3.16b, v30.16b //CTR block 11 1591 add v30.4s, v30.4s, v31.4s //CTR block 11 1592 1593 .inst 0xce046d84 //eor3 v4.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result 1594 .inst 0xce056da5 //eor3 v5.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result 1595 stp q4, q5, [x2], #32 //AES block 4, 5 - store result 1596 1597 .inst 0xce076de7 //eor3 v7.16b, v15.16b, v7.16b, v27.16b //AES block 7 - result 1598 stp q6, q7, [x2], #32 //AES block 6, 7 - store result 1599 rev32 v4.16b, v30.16b //CTR block 12 1600 1601 cmp x0, x5 //check if we have <= 8 blocks 1602 add v30.4s, v30.4s, v31.4s //CTR block 12 1603 b.ge .L128_dec_prepretail //do prepretail 1604 1605 .L128_dec_main_loop: //main loop start 1606 ldr q23, [x3, #176] //load h7l | h7h 1607 ext v23.16b, v23.16b, v23.16b, #8 1608 ldr q25, [x3, #208] //load h8l | h8h 1609 ext v25.16b, v25.16b, v25.16b, #8 1610 1611 rev64 v9.16b, v9.16b //GHASH block 8k+1 1612 rev64 v8.16b, v8.16b //GHASH block 8k 1613 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 1614 1615 rev64 v14.16b, v14.16b //GHASH block 8k+6 1616 ldr q20, [x3, #128] //load h5l | h5h 1617 ext v20.16b, v20.16b, v20.16b, #8 1618 ldr q22, [x3, #160] //load h6l | h6h 1619 ext v22.16b, v22.16b, v22.16b, #8 1620 1621 eor v8.16b, v8.16b, v19.16b //PRE 1 1622 rev32 v5.16b, v30.16b //CTR block 8k+13 1623 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 1624 1625 rev64 v10.16b, v10.16b //GHASH block 8k+2 1626 rev64 v12.16b, v12.16b //GHASH block 8k+4 1627 ldp q26, q27, [x8, #0] //load rk0, rk1 1628 1629 rev32 v6.16b, v30.16b //CTR block 8k+14 1630 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 1631 ldr q21, [x3, #144] //load h6k | h5k 1632 ldr q24, [x3, #192] //load h8k | h7k 1633 1634 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 1635 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 1636 rev64 v11.16b, v11.16b //GHASH block 8k+3 1637 1638 rev32 v7.16b, v30.16b //CTR block 8k+15 1639 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1640 rev64 v13.16b, v13.16b //GHASH block 8k+5 1641 1642 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 1643 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 1644 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1645 1646 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 1647 aese v4.16b, v26.16b 1648 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 1649 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 1650 1651 aese v6.16b, v26.16b 1652 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 1653 aese v5.16b, v26.16b 1654 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 1655 aese v7.16b, v26.16b 1656 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 1657 1658 aese v3.16b, v26.16b 1659 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 1660 aese v2.16b, v26.16b 1661 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 1662 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 1663 1664 aese v1.16b, v26.16b 1665 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 1666 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 1667 aese v0.16b, v26.16b 1668 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 1669 1670 aese v2.16b, v27.16b 1671 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 1672 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 1673 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 1674 1675 ldp q28, q26, [x8, #32] //load rk2, rk3 1676 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 1677 aese v7.16b, v27.16b 1678 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 1679 1680 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 1681 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 1682 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 1683 1684 ldr q23, [x3, #80] //load h3l | h3h 1685 ext v23.16b, v23.16b, v23.16b, #8 1686 ldr q25, [x3, #112] //load h4l | h4h 1687 ext v25.16b, v25.16b, v25.16b, #8 1688 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 1689 aese v6.16b, v27.16b 1690 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 1691 1692 aese v4.16b, v27.16b 1693 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 1694 aese v5.16b, v27.16b 1695 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 1696 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 1697 1698 aese v3.16b, v27.16b 1699 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 1700 aese v0.16b, v27.16b 1701 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 1702 aese v1.16b, v27.16b 1703 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 1704 1705 aese v7.16b, v28.16b 1706 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 1707 aese v2.16b, v28.16b 1708 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 1709 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 1710 1711 aese v4.16b, v28.16b 1712 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 1713 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 1714 ldr q20, [x3, #32] //load h1l | h1h 1715 ext v20.16b, v20.16b, v20.16b, #8 1716 ldr q22, [x3, #64] //load h2l | h2h 1717 ext v22.16b, v22.16b, v22.16b, #8 1718 1719 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 1720 aese v1.16b, v28.16b 1721 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 1722 aese v3.16b, v28.16b 1723 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 1724 1725 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 1726 aese v5.16b, v28.16b 1727 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 1728 aese v0.16b, v28.16b 1729 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 1730 1731 aese v6.16b, v28.16b 1732 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 1733 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 1734 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 1735 1736 aese v7.16b, v26.16b 1737 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 1738 rev64 v15.16b, v15.16b //GHASH block 8k+7 1739 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 1740 1741 ldp q27, q28, [x8, #64] //load rk4, rk5 1742 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 1743 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 1744 1745 ldr q21, [x3, #48] //load h2k | h1k 1746 ldr q24, [x3, #96] //load h4k | h3k 1747 aese v2.16b, v26.16b 1748 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 1749 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 1750 1751 aese v4.16b, v26.16b 1752 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 1753 aese v3.16b, v26.16b 1754 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 1755 aese v1.16b, v26.16b 1756 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 1757 1758 aese v0.16b, v26.16b 1759 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 1760 aese v6.16b, v26.16b 1761 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 1762 aese v5.16b, v26.16b 1763 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 1764 1765 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 1766 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 1767 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 1768 1769 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 1770 aese v0.16b, v27.16b 1771 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 1772 aese v7.16b, v27.16b 1773 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 1774 1775 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 1776 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 1777 aese v3.16b, v27.16b 1778 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 1779 1780 aese v1.16b, v27.16b 1781 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 1782 aese v5.16b, v27.16b 1783 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 1784 aese v6.16b, v27.16b 1785 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 1786 1787 aese v2.16b, v27.16b 1788 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 1789 aese v4.16b, v27.16b 1790 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 1791 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 1792 1793 ldp q26, q27, [x8, #96] //load rk6, rk7 1794 aese v0.16b, v28.16b 1795 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 1796 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 1797 1798 aese v2.16b, v28.16b 1799 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 1800 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 1801 aese v1.16b, v28.16b 1802 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 1803 1804 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 1805 aese v6.16b, v28.16b 1806 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 1807 aese v7.16b, v28.16b 1808 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 1809 1810 aese v3.16b, v28.16b 1811 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 1812 aese v5.16b, v28.16b 1813 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 1814 aese v4.16b, v28.16b 1815 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 1816 1817 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 1818 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 1819 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 1820 1821 aese v3.16b, v26.16b 1822 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 1823 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 1824 aese v7.16b, v26.16b 1825 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 1826 1827 aese v1.16b, v26.16b 1828 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 1829 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 1830 aese v6.16b, v26.16b 1831 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 1832 1833 aese v2.16b, v26.16b 1834 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 1835 aese v5.16b, v26.16b 1836 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 1837 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 1838 1839 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 1840 aese v0.16b, v26.16b 1841 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 1842 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 1843 1844 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 1845 aese v4.16b, v26.16b 1846 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 1847 ldp q28, q26, [x8, #128] //load rk8, rk9 1848 1849 ldr d16, [x10] //MODULO - load modulo constant 1850 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 1851 aese v5.16b, v27.16b 1852 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 1853 1854 rev32 v20.16b, v30.16b //CTR block 8k+16 1855 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 1856 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 1857 1858 aese v6.16b, v27.16b 1859 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 1860 aese v3.16b, v27.16b 1861 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 1862 aese v7.16b, v27.16b 1863 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 1864 1865 aese v2.16b, v27.16b 1866 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 1867 aese v1.16b, v27.16b 1868 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 1869 rev32 v22.16b, v30.16b //CTR block 8k+17 1870 1871 aese v4.16b, v27.16b 1872 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 1873 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 1874 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 1875 1876 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 1877 aese v0.16b, v27.16b 1878 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 1879 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 1880 1881 aese v5.16b, v28.16b 1882 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 1883 aese v1.16b, v28.16b 1884 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 1885 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext 1886 1887 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext 1888 aese v0.16b, v28.16b 1889 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 1890 rev32 v23.16b, v30.16b //CTR block 8k+18 1891 1892 ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext 1893 aese v4.16b, v28.16b 1894 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 1895 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 1896 1897 ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext 1898 aese v3.16b, v28.16b 1899 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 1900 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 1901 1902 aese v7.16b, v28.16b 1903 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 1904 aese v2.16b, v28.16b 1905 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 1906 aese v6.16b, v28.16b 1907 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 1908 1909 aese v0.16b, v26.16b //AES block 8k+8 - round 9 1910 aese v1.16b, v26.16b //AES block 8k+9 - round 9 1911 ldr q27, [x8, #160] //load rk10 1912 1913 aese v6.16b, v26.16b //AES block 8k+14 - round 9 1914 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 1915 aese v2.16b, v26.16b //AES block 8k+10 - round 9 1916 1917 aese v7.16b, v26.16b //AES block 8k+15 - round 9 1918 aese v4.16b, v26.16b //AES block 8k+12 - round 9 1919 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 1920 1921 rev32 v25.16b, v30.16b //CTR block 8k+19 1922 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 1923 1924 aese v3.16b, v26.16b //AES block 8k+11 - round 9 1925 aese v5.16b, v26.16b //AES block 8k+13 - round 9 1926 .inst 0xce016d21 //eor3 v1.16b, v9.16b, v1.16b, v27.16b //AES block 8k+9 - result 1927 1928 .inst 0xce006d00 //eor3 v0.16b, v8.16b, v0.16b, v27.16b //AES block 8k+8 - result 1929 .inst 0xce076de7 //eor3 v7.16b, v15.16b, v7.16b, v27.16b //AES block 8k+15 - result 1930 .inst 0xce066dc6 //eor3 v6.16b, v14.16b, v6.16b, v27.16b //AES block 8k+14 - result 1931 1932 .inst 0xce026d42 //eor3 v2.16b, v10.16b, v2.16b, v27.16b //AES block 8k+10 - result 1933 stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result 1934 mov v1.16b, v22.16b //CTR block 8k+17 1935 1936 .inst 0xce046d84 //eor3 v4.16b, v12.16b, v4.16b, v27.16b //AES block 8k+12 - result 1937 .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 1938 mov v0.16b, v20.16b //CTR block 8k+16 1939 1940 .inst 0xce036d63 //eor3 v3.16b, v11.16b, v3.16b, v27.16b //AES block 8k+11 - result 1941 cmp x0, x5 //.LOOP CONTROL 1942 stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result 1943 1944 .inst 0xce056da5 //eor3 v5.16b, v13.16b, v5.16b, v27.16b //AES block 8k+13 - result 1945 mov v2.16b, v23.16b //CTR block 8k+18 1946 1947 stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result 1948 rev32 v4.16b, v30.16b //CTR block 8k+20 1949 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 1950 1951 stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result 1952 mov v3.16b, v25.16b //CTR block 8k+19 1953 b.lt .L128_dec_main_loop 1954 1955 .L128_dec_prepretail: //PREPRETAIL 1956 rev64 v11.16b, v11.16b //GHASH block 8k+3 1957 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 1958 rev64 v8.16b, v8.16b //GHASH block 8k 1959 1960 rev64 v10.16b, v10.16b //GHASH block 8k+2 1961 rev32 v5.16b, v30.16b //CTR block 8k+13 1962 ldp q26, q27, [x8, #0] //load rk0, rk1 1963 1964 ldr q23, [x3, #176] //load h7l | h7h 1965 ext v23.16b, v23.16b, v23.16b, #8 1966 ldr q25, [x3, #208] //load h8l | h8h 1967 ext v25.16b, v25.16b, v25.16b, #8 1968 eor v8.16b, v8.16b, v19.16b //PRE 1 1969 rev64 v9.16b, v9.16b //GHASH block 8k+1 1970 1971 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 1972 ldr q20, [x3, #128] //load h5l | h5h 1973 ext v20.16b, v20.16b, v20.16b, #8 1974 ldr q22, [x3, #160] //load h6l | h6h 1975 ext v22.16b, v22.16b, v22.16b, #8 1976 rev64 v13.16b, v13.16b //GHASH block 8k+5 1977 1978 rev64 v12.16b, v12.16b //GHASH block 8k+4 1979 1980 rev64 v14.16b, v14.16b //GHASH block 8k+6 1981 1982 ldr q21, [x3, #144] //load h6k | h5k 1983 ldr q24, [x3, #192] //load h8k | h7k 1984 rev32 v6.16b, v30.16b //CTR block 8k+14 1985 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 1986 1987 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 1988 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 1989 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 1990 1991 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1992 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1993 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 1994 1995 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 1996 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 1997 aese v0.16b, v26.16b 1998 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 1999 2000 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 2001 aese v4.16b, v26.16b 2002 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 2003 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 2004 2005 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 2006 rev32 v7.16b, v30.16b //CTR block 8k+15 2007 aese v3.16b, v26.16b 2008 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 2009 2010 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 2011 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 2012 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 2013 2014 aese v2.16b, v26.16b 2015 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 2016 aese v1.16b, v26.16b 2017 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 2018 aese v5.16b, v26.16b 2019 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 2020 2021 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 2022 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 2023 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 2024 2025 aese v2.16b, v27.16b 2026 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 2027 aese v7.16b, v26.16b 2028 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 2029 aese v6.16b, v26.16b 2030 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 2031 2032 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 2033 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 2034 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 2035 2036 aese v6.16b, v27.16b 2037 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 2038 aese v4.16b, v27.16b 2039 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 2040 aese v5.16b, v27.16b 2041 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 2042 2043 ldp q28, q26, [x8, #32] //load rk2, rk3 2044 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 2045 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 2046 2047 ldr q23, [x3, #80] //load h3l | h3h 2048 ext v23.16b, v23.16b, v23.16b, #8 2049 ldr q25, [x3, #112] //load h4l | h4h 2050 ext v25.16b, v25.16b, v25.16b, #8 2051 aese v1.16b, v27.16b 2052 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 2053 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 2054 2055 aese v3.16b, v27.16b 2056 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 2057 aese v7.16b, v27.16b 2058 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 2059 aese v0.16b, v27.16b 2060 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 2061 2062 ldr q20, [x3, #32] //load h1l | h1h 2063 ext v20.16b, v20.16b, v20.16b, #8 2064 ldr q22, [x3, #64] //load h2l | h2h 2065 ext v22.16b, v22.16b, v22.16b, #8 2066 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 2067 2068 aese v0.16b, v28.16b 2069 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 2070 aese v6.16b, v28.16b 2071 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 2072 aese v2.16b, v28.16b 2073 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 2074 2075 aese v4.16b, v28.16b 2076 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 2077 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 2078 aese v7.16b, v28.16b 2079 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 2080 2081 aese v1.16b, v28.16b 2082 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 2083 aese v5.16b, v28.16b 2084 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 2085 aese v3.16b, v28.16b 2086 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 2087 2088 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 2089 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 2090 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 2091 2092 ldp q27, q28, [x8, #64] //load rk4, rk5 2093 rev64 v15.16b, v15.16b //GHASH block 8k+7 2094 aese v6.16b, v26.16b 2095 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 2096 2097 ldr q21, [x3, #48] //load h2k | h1k 2098 ldr q24, [x3, #96] //load h4k | h3k 2099 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 2100 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 2101 2102 aese v2.16b, v26.16b 2103 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 2104 aese v0.16b, v26.16b 2105 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 2106 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 2107 2108 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 2109 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 2110 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 2111 2112 aese v4.16b, v26.16b 2113 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 2114 aese v3.16b, v26.16b 2115 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 2116 aese v7.16b, v26.16b 2117 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 2118 2119 aese v1.16b, v26.16b 2120 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 2121 aese v5.16b, v26.16b 2122 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 2123 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 2124 2125 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 2126 aese v0.16b, v27.16b 2127 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 2128 aese v2.16b, v27.16b 2129 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 2130 2131 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 2132 aese v5.16b, v27.16b 2133 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 2134 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 2135 2136 aese v1.16b, v27.16b 2137 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 2138 aese v6.16b, v27.16b 2139 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 2140 aese v4.16b, v27.16b 2141 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 2142 2143 aese v7.16b, v27.16b 2144 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 2145 aese v3.16b, v27.16b 2146 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 2147 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 2148 2149 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 2150 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 2151 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 2152 2153 ldp q26, q27, [x8, #96] //load rk6, rk7 2154 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 2155 aese v6.16b, v28.16b 2156 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 2157 2158 ldr d16, [x10] //MODULO - load modulo constant 2159 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 2160 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 2161 2162 aese v0.16b, v28.16b 2163 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 2164 aese v2.16b, v28.16b 2165 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 2166 aese v4.16b, v28.16b 2167 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 2168 2169 aese v3.16b, v28.16b 2170 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 2171 aese v1.16b, v28.16b 2172 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 2173 aese v5.16b, v28.16b 2174 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 2175 2176 aese v7.16b, v28.16b 2177 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 2178 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 2179 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 2180 2181 aese v4.16b, v26.16b 2182 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 2183 aese v1.16b, v26.16b 2184 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 2185 aese v2.16b, v26.16b 2186 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 2187 2188 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 2189 aese v5.16b, v26.16b 2190 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 2191 aese v0.16b, v26.16b 2192 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 2193 2194 aese v3.16b, v26.16b 2195 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 2196 aese v6.16b, v26.16b 2197 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 2198 aese v7.16b, v26.16b 2199 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 2200 2201 aese v4.16b, v27.16b 2202 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 2203 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 2204 ldp q28, q26, [x8, #128] //load rk8, rk9 2205 2206 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 2207 aese v3.16b, v27.16b 2208 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 2209 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 2210 2211 aese v5.16b, v27.16b 2212 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 2213 aese v6.16b, v27.16b 2214 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 2215 aese v0.16b, v27.16b 2216 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 2217 2218 aese v7.16b, v27.16b 2219 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 2220 aese v1.16b, v27.16b 2221 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 2222 aese v2.16b, v27.16b 2223 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 2224 2225 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 2226 ldr q27, [x8, #160] //load rk10 2227 2228 aese v3.16b, v28.16b 2229 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 2230 aese v0.16b, v28.16b 2231 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 2232 2233 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 2234 aese v6.16b, v28.16b 2235 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 2236 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 2237 2238 aese v2.16b, v28.16b 2239 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 2240 aese v1.16b, v28.16b 2241 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 2242 aese v7.16b, v28.16b 2243 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 2244 2245 aese v6.16b, v26.16b //AES block 8k+14 - round 9 2246 aese v5.16b, v28.16b 2247 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 2248 aese v4.16b, v28.16b 2249 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 2250 2251 .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 2252 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 2253 aese v2.16b, v26.16b //AES block 8k+10 - round 9 2254 2255 aese v3.16b, v26.16b //AES block 8k+11 - round 9 2256 aese v5.16b, v26.16b //AES block 8k+13 - round 9 2257 aese v0.16b, v26.16b //AES block 8k+8 - round 9 2258 2259 aese v4.16b, v26.16b //AES block 8k+12 - round 9 2260 aese v1.16b, v26.16b //AES block 8k+9 - round 9 2261 aese v7.16b, v26.16b //AES block 8k+15 - round 9 2262 2263 .L128_dec_tail: //TAIL 2264 2265 mov v29.16b, v27.16b 2266 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 2267 2268 cmp x5, #112 2269 2270 ldp q24, q25, [x3, #192] //load h8k | h7k 2271 ext v25.16b, v25.16b, v25.16b, #8 2272 ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext 2273 2274 ldp q20, q21, [x3, #128] //load h5l | h5h 2275 ext v20.16b, v20.16b, v20.16b, #8 2276 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 2277 2278 ldp q22, q23, [x3, #160] //load h6l | h6h 2279 ext v22.16b, v22.16b, v22.16b, #8 2280 ext v23.16b, v23.16b, v23.16b, #8 2281 2282 .inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result 2283 b.gt .L128_dec_blocks_more_than_7 2284 2285 cmp x5, #96 2286 mov v7.16b, v6.16b 2287 movi v19.8b, #0 2288 2289 movi v17.8b, #0 2290 mov v6.16b, v5.16b 2291 mov v5.16b, v4.16b 2292 2293 mov v4.16b, v3.16b 2294 mov v3.16b, v2.16b 2295 mov v2.16b, v1.16b 2296 2297 movi v18.8b, #0 2298 sub v30.4s, v30.4s, v31.4s 2299 b.gt .L128_dec_blocks_more_than_6 2300 2301 cmp x5, #80 2302 sub v30.4s, v30.4s, v31.4s 2303 2304 mov v7.16b, v6.16b 2305 mov v6.16b, v5.16b 2306 mov v5.16b, v4.16b 2307 2308 mov v4.16b, v3.16b 2309 mov v3.16b, v1.16b 2310 b.gt .L128_dec_blocks_more_than_5 2311 2312 cmp x5, #64 2313 2314 mov v7.16b, v6.16b 2315 mov v6.16b, v5.16b 2316 mov v5.16b, v4.16b 2317 2318 mov v4.16b, v1.16b 2319 sub v30.4s, v30.4s, v31.4s 2320 b.gt .L128_dec_blocks_more_than_4 2321 2322 sub v30.4s, v30.4s, v31.4s 2323 mov v7.16b, v6.16b 2324 mov v6.16b, v5.16b 2325 2326 mov v5.16b, v1.16b 2327 cmp x5, #48 2328 b.gt .L128_dec_blocks_more_than_3 2329 2330 sub v30.4s, v30.4s, v31.4s 2331 mov v7.16b, v6.16b 2332 cmp x5, #32 2333 2334 ldr q24, [x3, #96] //load h4k | h3k 2335 mov v6.16b, v1.16b 2336 b.gt .L128_dec_blocks_more_than_2 2337 2338 cmp x5, #16 2339 2340 mov v7.16b, v1.16b 2341 sub v30.4s, v30.4s, v31.4s 2342 b.gt .L128_dec_blocks_more_than_1 2343 2344 sub v30.4s, v30.4s, v31.4s 2345 ldr q21, [x3, #48] //load h2k | h1k 2346 b .L128_dec_blocks_less_than_1 2347 .L128_dec_blocks_more_than_7: //blocks left > 7 2348 rev64 v8.16b, v9.16b //GHASH final-7 block 2349 2350 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2351 2352 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 2353 2354 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 2355 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 2356 2357 movi v16.8b, #0 //suppress further partial tag feed in 2358 ldr q9, [x0], #16 //AES final-6 block - load ciphertext 2359 2360 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 2361 2362 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 2363 st1 { v12.16b}, [x2], #16 //AES final-7 block - store result 2364 .inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 2365 2366 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 2367 .L128_dec_blocks_more_than_6: //blocks left > 6 2368 2369 rev64 v8.16b, v9.16b //GHASH final-6 block 2370 2371 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2372 2373 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 2374 2375 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 2376 2377 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 2378 ldr q9, [x0], #16 //AES final-5 block - load ciphertext 2379 movi v16.8b, #0 //suppress further partial tag feed in 2380 2381 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 2382 st1 { v12.16b}, [x2], #16 //AES final-6 block - store result 2383 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 2384 2385 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 2386 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 2387 2388 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 2389 .inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 2390 .L128_dec_blocks_more_than_5: //blocks left > 5 2391 2392 rev64 v8.16b, v9.16b //GHASH final-5 block 2393 2394 ldr q9, [x0], #16 //AES final-4 block - load ciphertext 2395 st1 { v12.16b}, [x2], #16 //AES final-5 block - store result 2396 2397 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2398 2399 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 2400 2401 .inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 2402 2403 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 2404 2405 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 2406 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 2407 movi v16.8b, #0 //suppress further partial tag feed in 2408 2409 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 2410 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 2411 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 2412 2413 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 2414 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 2415 .L128_dec_blocks_more_than_4: //blocks left > 4 2416 2417 rev64 v8.16b, v9.16b //GHASH final-4 block 2418 2419 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2420 ldr q9, [x0], #16 //AES final-3 block - load ciphertext 2421 2422 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 2423 movi v16.8b, #0 //suppress further partial tag feed in 2424 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 2425 2426 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 2427 2428 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 2429 2430 st1 { v12.16b}, [x2], #16 //AES final-4 block - store result 2431 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 2432 2433 .inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 2434 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 2435 2436 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 2437 2438 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 2439 .L128_dec_blocks_more_than_3: //blocks left > 3 2440 2441 st1 { v12.16b}, [x2], #16 //AES final-3 block - store result 2442 rev64 v8.16b, v9.16b //GHASH final-3 block 2443 2444 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2445 2446 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 2447 2448 ldr q25, [x3, #112] //load h4l | h4h 2449 ext v25.16b, v25.16b, v25.16b, #8 2450 ldr q24, [x3, #96] //load h4k | h3k 2451 2452 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 2453 2454 ldr q9, [x0], #16 //AES final-2 block - load ciphertext 2455 2456 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 2457 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 2458 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 2459 2460 movi v16.8b, #0 //suppress further partial tag feed in 2461 .inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 2462 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 2463 2464 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 2465 2466 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 2467 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 2468 .L128_dec_blocks_more_than_2: //blocks left > 2 2469 2470 rev64 v8.16b, v9.16b //GHASH final-2 block 2471 2472 st1 { v12.16b}, [x2], #16 //AES final-2 block - store result 2473 2474 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2475 ldr q23, [x3, #80] //load h3l | h3h 2476 ext v23.16b, v23.16b, v23.16b, #8 2477 movi v16.8b, #0 //suppress further partial tag feed in 2478 2479 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 2480 2481 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 2482 2483 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 2484 2485 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 2486 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 2487 ldr q9, [x0], #16 //AES final-1 block - load ciphertext 2488 2489 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 2490 2491 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 2492 2493 .inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 2494 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 2495 .L128_dec_blocks_more_than_1: //blocks left > 1 2496 2497 st1 { v12.16b}, [x2], #16 //AES final-1 block - store result 2498 rev64 v8.16b, v9.16b //GHASH final-1 block 2499 2500 ldr q22, [x3, #64] //load h2l | h2h 2501 ext v22.16b, v22.16b, v22.16b, #8 2502 2503 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2504 2505 movi v16.8b, #0 //suppress further partial tag feed in 2506 2507 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 2508 2509 ldr q9, [x0], #16 //AES final block - load ciphertext 2510 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 2511 2512 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 2513 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 2514 ldr q21, [x3, #48] //load h2k | h1k 2515 2516 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 2517 .inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result 2518 2519 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 2520 2521 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 2522 2523 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 2524 2525 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 2526 .L128_dec_blocks_less_than_1: //blocks left <= 1 2527 2528 and x1, x1, #127 //bit_length %= 128 2529 2530 sub x1, x1, #128 //bit_length -= 128 2531 2532 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 2533 2534 mvn x6, xzr //temp0_x = 0xffffffffffffffff 2535 and x1, x1, #127 //bit_length %= 128 2536 2537 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 2538 cmp x1, #64 2539 mvn x7, xzr //temp1_x = 0xffffffffffffffff 2540 2541 csel x13, x7, x6, lt 2542 csel x14, x6, xzr, lt 2543 2544 mov v0.d[1], x14 2545 mov v0.d[0], x13 //ctr0b is mask for last block 2546 2547 ldr q20, [x3, #32] //load h1l | h1h 2548 ext v20.16b, v20.16b, v20.16b, #8 2549 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 2550 2551 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 2552 2553 rev64 v8.16b, v9.16b //GHASH final block 2554 2555 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2556 2557 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 2558 ins v16.d[0], v8.d[1] //GHASH final block - mid 2559 2560 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 2561 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 2562 2563 bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 2564 2565 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 2566 st1 { v12.16b}, [x2] //store all 16B 2567 2568 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 2569 2570 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 2571 ldr d16, [x10] //MODULO - load modulo constant 2572 2573 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 2574 2575 eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 2576 2577 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 2578 ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 2579 2580 eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up 2581 2582 .inst 0xce115652 //eor3 v18.16b, v18.16b, v17.16b, v21.16b //MODULO - fold into mid 2583 2584 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 2585 ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 2586 2587 .inst 0xce124673 //eor3 v19.16b, v19.16b, v18.16b, v17.16b //MODULO - fold into low 2588 ext v19.16b, v19.16b, v19.16b, #8 2589 rev64 v19.16b, v19.16b 2590 st1 { v19.16b }, [x3] 2591 rev32 v30.16b, v30.16b 2592 2593 str q30, [x16] //store the updated counter 2594 2595 mov x0, x9 2596 2597 ldp d10, d11, [sp, #16] 2598 ldp d12, d13, [sp, #32] 2599 ldp d14, d15, [sp, #48] 2600 ldp d8, d9, [sp], #80 2601 ret 2602 .L128_dec_ret: 2603 mov w0, #0x0 2604 ret 2605 .size unroll8_eor3_aes_gcm_dec_128_kernel,.-unroll8_eor3_aes_gcm_dec_128_kernel 2606 .globl unroll8_eor3_aes_gcm_enc_192_kernel 2607 .type unroll8_eor3_aes_gcm_enc_192_kernel,%function 2608 .align 4 2609 unroll8_eor3_aes_gcm_enc_192_kernel: 2610 AARCH64_VALID_CALL_TARGET 2611 cbz x1, .L192_enc_ret 2612 stp d8, d9, [sp, #-80]! 2613 lsr x9, x1, #3 2614 mov x16, x4 2615 mov x8, x5 2616 stp d10, d11, [sp, #16] 2617 stp d12, d13, [sp, #32] 2618 stp d14, d15, [sp, #48] 2619 mov x5, #0xc200000000000000 2620 stp x5, xzr, [sp, #64] 2621 add x10, sp, #64 2622 2623 mov x5, x9 2624 ld1 { v0.16b}, [x16] //CTR block 0 2625 2626 mov x15, #0x100000000 //set up counter increment 2627 movi v31.16b, #0x0 2628 mov v31.d[1], x15 2629 2630 rev32 v30.16b, v0.16b //set up reversed counter 2631 2632 add v30.4s, v30.4s, v31.4s //CTR block 0 2633 2634 rev32 v1.16b, v30.16b //CTR block 1 2635 add v30.4s, v30.4s, v31.4s //CTR block 1 2636 2637 rev32 v2.16b, v30.16b //CTR block 2 2638 add v30.4s, v30.4s, v31.4s //CTR block 2 2639 2640 rev32 v3.16b, v30.16b //CTR block 3 2641 add v30.4s, v30.4s, v31.4s //CTR block 3 2642 2643 rev32 v4.16b, v30.16b //CTR block 4 2644 add v30.4s, v30.4s, v31.4s //CTR block 4 2645 sub x5, x5, #1 //byte_len - 1 2646 2647 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 2648 2649 rev32 v5.16b, v30.16b //CTR block 5 2650 add v30.4s, v30.4s, v31.4s //CTR block 5 2651 ldp q26, q27, [x8, #0] //load rk0, rk1 2652 2653 add x5, x5, x0 2654 2655 rev32 v6.16b, v30.16b //CTR block 6 2656 add v30.4s, v30.4s, v31.4s //CTR block 6 2657 2658 rev32 v7.16b, v30.16b //CTR block 7 2659 2660 aese v5.16b, v26.16b 2661 aesmc v5.16b, v5.16b //AES block 5 - round 0 2662 aese v4.16b, v26.16b 2663 aesmc v4.16b, v4.16b //AES block 4 - round 0 2664 aese v3.16b, v26.16b 2665 aesmc v3.16b, v3.16b //AES block 3 - round 0 2666 2667 aese v0.16b, v26.16b 2668 aesmc v0.16b, v0.16b //AES block 0 - round 0 2669 aese v1.16b, v26.16b 2670 aesmc v1.16b, v1.16b //AES block 1 - round 0 2671 aese v7.16b, v26.16b 2672 aesmc v7.16b, v7.16b //AES block 7 - round 0 2673 2674 aese v6.16b, v26.16b 2675 aesmc v6.16b, v6.16b //AES block 6 - round 0 2676 aese v2.16b, v26.16b 2677 aesmc v2.16b, v2.16b //AES block 2 - round 0 2678 ldp q28, q26, [x8, #32] //load rk2, rk3 2679 2680 aese v5.16b, v27.16b 2681 aesmc v5.16b, v5.16b //AES block 5 - round 1 2682 aese v7.16b, v27.16b 2683 aesmc v7.16b, v7.16b //AES block 7 - round 1 2684 2685 aese v2.16b, v27.16b 2686 aesmc v2.16b, v2.16b //AES block 2 - round 1 2687 aese v3.16b, v27.16b 2688 aesmc v3.16b, v3.16b //AES block 3 - round 1 2689 aese v6.16b, v27.16b 2690 aesmc v6.16b, v6.16b //AES block 6 - round 1 2691 2692 aese v5.16b, v28.16b 2693 aesmc v5.16b, v5.16b //AES block 5 - round 2 2694 aese v4.16b, v27.16b 2695 aesmc v4.16b, v4.16b //AES block 4 - round 1 2696 aese v0.16b, v27.16b 2697 aesmc v0.16b, v0.16b //AES block 0 - round 1 2698 2699 aese v1.16b, v27.16b 2700 aesmc v1.16b, v1.16b //AES block 1 - round 1 2701 aese v7.16b, v28.16b 2702 aesmc v7.16b, v7.16b //AES block 7 - round 2 2703 aese v3.16b, v28.16b 2704 aesmc v3.16b, v3.16b //AES block 3 - round 2 2705 2706 aese v2.16b, v28.16b 2707 aesmc v2.16b, v2.16b //AES block 2 - round 2 2708 aese v0.16b, v28.16b 2709 aesmc v0.16b, v0.16b //AES block 0 - round 2 2710 2711 aese v1.16b, v28.16b 2712 aesmc v1.16b, v1.16b //AES block 1 - round 2 2713 aese v4.16b, v28.16b 2714 aesmc v4.16b, v4.16b //AES block 4 - round 2 2715 aese v6.16b, v28.16b 2716 aesmc v6.16b, v6.16b //AES block 6 - round 2 2717 2718 ldp q27, q28, [x8, #64] //load rk4, rk5 2719 aese v4.16b, v26.16b 2720 aesmc v4.16b, v4.16b //AES block 4 - round 3 2721 2722 aese v7.16b, v26.16b 2723 aesmc v7.16b, v7.16b //AES block 7 - round 3 2724 aese v3.16b, v26.16b 2725 aesmc v3.16b, v3.16b //AES block 3 - round 3 2726 aese v2.16b, v26.16b 2727 aesmc v2.16b, v2.16b //AES block 2 - round 3 2728 2729 aese v1.16b, v26.16b 2730 aesmc v1.16b, v1.16b //AES block 1 - round 3 2731 2732 aese v0.16b, v26.16b 2733 aesmc v0.16b, v0.16b //AES block 0 - round 3 2734 2735 aese v6.16b, v26.16b 2736 aesmc v6.16b, v6.16b //AES block 6 - round 3 2737 2738 aese v0.16b, v27.16b 2739 aesmc v0.16b, v0.16b //AES block 0 - round 4 2740 aese v1.16b, v27.16b 2741 aesmc v1.16b, v1.16b //AES block 1 - round 4 2742 aese v5.16b, v26.16b 2743 aesmc v5.16b, v5.16b //AES block 5 - round 3 2744 2745 aese v3.16b, v27.16b 2746 aesmc v3.16b, v3.16b //AES block 3 - round 4 2747 aese v2.16b, v27.16b 2748 aesmc v2.16b, v2.16b //AES block 2 - round 4 2749 aese v4.16b, v27.16b 2750 aesmc v4.16b, v4.16b //AES block 4 - round 4 2751 2752 aese v6.16b, v27.16b 2753 aesmc v6.16b, v6.16b //AES block 6 - round 4 2754 aese v7.16b, v27.16b 2755 aesmc v7.16b, v7.16b //AES block 7 - round 4 2756 aese v5.16b, v27.16b 2757 aesmc v5.16b, v5.16b //AES block 5 - round 4 2758 2759 aese v1.16b, v28.16b 2760 aesmc v1.16b, v1.16b //AES block 1 - round 5 2761 ldp q26, q27, [x8, #96] //load rk6, rk7 2762 aese v2.16b, v28.16b 2763 aesmc v2.16b, v2.16b //AES block 2 - round 5 2764 2765 aese v4.16b, v28.16b 2766 aesmc v4.16b, v4.16b //AES block 4 - round 5 2767 aese v7.16b, v28.16b 2768 aesmc v7.16b, v7.16b //AES block 7 - round 5 2769 aese v0.16b, v28.16b 2770 aesmc v0.16b, v0.16b //AES block 0 - round 5 2771 2772 aese v5.16b, v28.16b 2773 aesmc v5.16b, v5.16b //AES block 5 - round 5 2774 aese v6.16b, v28.16b 2775 aesmc v6.16b, v6.16b //AES block 6 - round 5 2776 aese v3.16b, v28.16b 2777 aesmc v3.16b, v3.16b //AES block 3 - round 5 2778 2779 add v30.4s, v30.4s, v31.4s //CTR block 7 2780 2781 aese v5.16b, v26.16b 2782 aesmc v5.16b, v5.16b //AES block 5 - round 6 2783 aese v4.16b, v26.16b 2784 aesmc v4.16b, v4.16b //AES block 4 - round 6 2785 aese v3.16b, v26.16b 2786 aesmc v3.16b, v3.16b //AES block 3 - round 6 2787 2788 aese v2.16b, v26.16b 2789 aesmc v2.16b, v2.16b //AES block 2 - round 6 2790 aese v6.16b, v26.16b 2791 aesmc v6.16b, v6.16b //AES block 6 - round 6 2792 aese v1.16b, v26.16b 2793 aesmc v1.16b, v1.16b //AES block 1 - round 6 2794 2795 aese v0.16b, v26.16b 2796 aesmc v0.16b, v0.16b //AES block 0 - round 6 2797 aese v7.16b, v26.16b 2798 aesmc v7.16b, v7.16b //AES block 7 - round 6 2799 ldp q28, q26, [x8, #128] //load rk8, rk9 2800 2801 aese v6.16b, v27.16b 2802 aesmc v6.16b, v6.16b //AES block 6 - round 7 2803 aese v3.16b, v27.16b 2804 aesmc v3.16b, v3.16b //AES block 3 - round 7 2805 2806 aese v4.16b, v27.16b 2807 aesmc v4.16b, v4.16b //AES block 4 - round 7 2808 aese v0.16b, v27.16b 2809 aesmc v0.16b, v0.16b //AES block 0 - round 7 2810 2811 aese v7.16b, v27.16b 2812 aesmc v7.16b, v7.16b //AES block 7 - round 7 2813 aese v1.16b, v27.16b 2814 aesmc v1.16b, v1.16b //AES block 1 - round 7 2815 2816 aese v2.16b, v27.16b 2817 aesmc v2.16b, v2.16b //AES block 2 - round 7 2818 aese v5.16b, v27.16b 2819 aesmc v5.16b, v5.16b //AES block 5 - round 7 2820 2821 aese v7.16b, v28.16b 2822 aesmc v7.16b, v7.16b //AES block 7 - round 8 2823 aese v0.16b, v28.16b 2824 aesmc v0.16b, v0.16b //AES block 0 - round 8 2825 2826 aese v4.16b, v28.16b 2827 aesmc v4.16b, v4.16b //AES block 4 - round 8 2828 aese v3.16b, v28.16b 2829 aesmc v3.16b, v3.16b //AES block 3 - round 8 2830 aese v5.16b, v28.16b 2831 aesmc v5.16b, v5.16b //AES block 5 - round 8 2832 2833 aese v2.16b, v28.16b 2834 aesmc v2.16b, v2.16b //AES block 2 - round 8 2835 aese v1.16b, v28.16b 2836 aesmc v1.16b, v1.16b //AES block 1 - round 8 2837 aese v6.16b, v28.16b 2838 aesmc v6.16b, v6.16b //AES block 6 - round 8 2839 2840 add x4, x0, x1, lsr #3 //end_input_ptr 2841 cmp x0, x5 //check if we have <= 8 blocks 2842 aese v3.16b, v26.16b 2843 aesmc v3.16b, v3.16b //AES block 3 - round 9 2844 2845 ld1 { v19.16b}, [x3] 2846 ext v19.16b, v19.16b, v19.16b, #8 2847 rev64 v19.16b, v19.16b 2848 ldp q27, q28, [x8, #160] //load rk10, rk11 2849 2850 aese v6.16b, v26.16b 2851 aesmc v6.16b, v6.16b //AES block 6 - round 9 2852 aese v1.16b, v26.16b 2853 aesmc v1.16b, v1.16b //AES block 1 - round 9 2854 2855 aese v5.16b, v26.16b 2856 aesmc v5.16b, v5.16b //AES block 5 - round 9 2857 aese v2.16b, v26.16b 2858 aesmc v2.16b, v2.16b //AES block 2 - round 9 2859 2860 aese v0.16b, v26.16b 2861 aesmc v0.16b, v0.16b //AES block 0 - round 9 2862 aese v4.16b, v26.16b 2863 aesmc v4.16b, v4.16b //AES block 4 - round 9 2864 2865 aese v6.16b, v27.16b 2866 aesmc v6.16b, v6.16b //AES block 14 - round 10 2867 aese v7.16b, v26.16b 2868 aesmc v7.16b, v7.16b //AES block 7 - round 9 2869 aese v3.16b, v27.16b 2870 aesmc v3.16b, v3.16b //AES block 11 - round 10 2871 2872 aese v1.16b, v27.16b 2873 aesmc v1.16b, v1.16b //AES block 9 - round 10 2874 aese v5.16b, v27.16b 2875 aesmc v5.16b, v5.16b //AES block 13 - round 10 2876 aese v4.16b, v27.16b 2877 aesmc v4.16b, v4.16b //AES block 12 - round 10 2878 2879 aese v0.16b, v27.16b 2880 aesmc v0.16b, v0.16b //AES block 8 - round 10 2881 aese v2.16b, v27.16b 2882 aesmc v2.16b, v2.16b //AES block 10 - round 10 2883 aese v7.16b, v27.16b 2884 aesmc v7.16b, v7.16b //AES block 15 - round 10 2885 2886 aese v6.16b, v28.16b //AES block 14 - round 11 2887 aese v3.16b, v28.16b //AES block 11 - round 11 2888 2889 aese v4.16b, v28.16b //AES block 12 - round 11 2890 aese v7.16b, v28.16b //AES block 15 - round 11 2891 ldr q26, [x8, #192] //load rk12 2892 2893 aese v1.16b, v28.16b //AES block 9 - round 11 2894 aese v5.16b, v28.16b //AES block 13 - round 11 2895 2896 aese v2.16b, v28.16b //AES block 10 - round 11 2897 aese v0.16b, v28.16b //AES block 8 - round 11 2898 b.ge .L192_enc_tail //handle tail 2899 2900 ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext 2901 2902 ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext 2903 2904 ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 2905 2906 ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 2907 2908 .inst 0xce006908 //eor3 v8.16b, v8.16b, v0.16b, v26.16b //AES block 0 - result 2909 rev32 v0.16b, v30.16b //CTR block 8 2910 add v30.4s, v30.4s, v31.4s //CTR block 8 2911 2912 .inst 0xce03696b //eor3 v11.16b, v11.16b, v3.16b, v26.16b //AES block 3 - result 2913 .inst 0xce016929 //eor3 v9.16b, v9.16b, v1.16b, v26.16b //AES block 1 - result 2914 2915 rev32 v1.16b, v30.16b //CTR block 9 2916 add v30.4s, v30.4s, v31.4s //CTR block 9 2917 .inst 0xce04698c //eor3 v12.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result 2918 2919 .inst 0xce0569ad //eor3 v13.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result 2920 .inst 0xce0769ef //eor3 v15.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result 2921 stp q8, q9, [x2], #32 //AES block 0, 1 - store result 2922 2923 .inst 0xce02694a //eor3 v10.16b, v10.16b, v2.16b, v26.16b //AES block 2 - result 2924 rev32 v2.16b, v30.16b //CTR block 10 2925 add v30.4s, v30.4s, v31.4s //CTR block 10 2926 2927 stp q10, q11, [x2], #32 //AES block 2, 3 - store result 2928 cmp x0, x5 //check if we have <= 8 blocks 2929 2930 rev32 v3.16b, v30.16b //CTR block 11 2931 add v30.4s, v30.4s, v31.4s //CTR block 11 2932 .inst 0xce0669ce //eor3 v14.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result 2933 2934 stp q12, q13, [x2], #32 //AES block 4, 5 - store result 2935 2936 rev32 v4.16b, v30.16b //CTR block 12 2937 stp q14, q15, [x2], #32 //AES block 6, 7 - store result 2938 add v30.4s, v30.4s, v31.4s //CTR block 12 2939 2940 b.ge .L192_enc_prepretail //do prepretail 2941 2942 .L192_enc_main_loop: //main loop start 2943 rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 2944 ldp q26, q27, [x8, #0] //load rk0, rk1 2945 rev64 v10.16b, v10.16b //GHASH block 8k+2 2946 2947 rev32 v5.16b, v30.16b //CTR block 8k+13 2948 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 2949 ldr q23, [x3, #176] //load h7l | h7h 2950 ext v23.16b, v23.16b, v23.16b, #8 2951 ldr q25, [x3, #208] //load h8l | h8h 2952 ext v25.16b, v25.16b, v25.16b, #8 2953 2954 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 2955 rev64 v8.16b, v8.16b //GHASH block 8k 2956 ldr q20, [x3, #128] //load h5l | h5h 2957 ext v20.16b, v20.16b, v20.16b, #8 2958 ldr q22, [x3, #160] //load h6l | h6h 2959 ext v22.16b, v22.16b, v22.16b, #8 2960 2961 rev64 v9.16b, v9.16b //GHASH block 8k+1 2962 rev32 v6.16b, v30.16b //CTR block 8k+14 2963 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 2964 2965 eor v8.16b, v8.16b, v19.16b //PRE 1 2966 rev64 v11.16b, v11.16b //GHASH block 8k+3 2967 rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 2968 2969 aese v0.16b, v26.16b 2970 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 2971 rev32 v7.16b, v30.16b //CTR block 8k+15 2972 aese v1.16b, v26.16b 2973 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 2974 2975 aese v3.16b, v26.16b 2976 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 2977 aese v5.16b, v26.16b 2978 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 2979 aese v2.16b, v26.16b 2980 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 2981 2982 aese v7.16b, v26.16b 2983 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 2984 aese v4.16b, v26.16b 2985 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 2986 aese v6.16b, v26.16b 2987 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 2988 2989 ldp q28, q26, [x8, #32] //load rk2, rk3 2990 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 2991 aese v0.16b, v27.16b 2992 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 2993 2994 aese v4.16b, v27.16b 2995 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 2996 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 2997 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 2998 2999 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3000 aese v3.16b, v27.16b 3001 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 3002 ldr q21, [x3, #144] //load h6k | h5k 3003 ldr q24, [x3, #192] //load h8k | h7k 3004 3005 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 3006 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 3007 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3008 3009 aese v1.16b, v27.16b 3010 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 3011 aese v2.16b, v27.16b 3012 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 3013 aese v5.16b, v27.16b 3014 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 3015 3016 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 3017 aese v6.16b, v27.16b 3018 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 3019 aese v7.16b, v27.16b 3020 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 3021 3022 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 3023 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 3024 aese v1.16b, v28.16b 3025 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 3026 3027 aese v3.16b, v28.16b 3028 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 3029 aese v4.16b, v28.16b 3030 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 3031 aese v6.16b, v28.16b 3032 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 3033 3034 aese v5.16b, v28.16b 3035 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 3036 aese v1.16b, v26.16b 3037 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 3038 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 3039 3040 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 3041 aese v7.16b, v28.16b 3042 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 3043 aese v4.16b, v26.16b 3044 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 3045 3046 aese v2.16b, v28.16b 3047 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 3048 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3049 aese v0.16b, v28.16b 3050 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 3051 3052 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3053 aese v3.16b, v26.16b 3054 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 3055 ldp q27, q28, [x8, #64] //load rk4, rk5 3056 3057 aese v0.16b, v26.16b 3058 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 3059 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 3060 ldr q23, [x3, #80] //load h3l | h3h 3061 ext v23.16b, v23.16b, v23.16b, #8 3062 ldr q25, [x3, #112] //load h4l | h4h 3063 ext v25.16b, v25.16b, v25.16b, #8 3064 3065 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 3066 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 3067 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 3068 3069 aese v5.16b, v26.16b 3070 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 3071 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3072 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3073 3074 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 3075 aese v6.16b, v26.16b 3076 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 3077 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 3078 3079 aese v1.16b, v27.16b 3080 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 3081 aese v3.16b, v27.16b 3082 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 3083 aese v7.16b, v26.16b 3084 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 3085 3086 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 3087 aese v6.16b, v27.16b 3088 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 3089 aese v2.16b, v26.16b 3090 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 3091 3092 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 3093 aese v0.16b, v27.16b 3094 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 3095 aese v4.16b, v27.16b 3096 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 3097 3098 aese v2.16b, v27.16b 3099 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 3100 aese v5.16b, v27.16b 3101 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 3102 aese v7.16b, v27.16b 3103 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 3104 3105 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3106 aese v4.16b, v28.16b 3107 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 3108 ldr q20, [x3, #32] //load h1l | h1h 3109 ext v20.16b, v20.16b, v20.16b, #8 3110 ldr q22, [x3, #64] //load h2l | h2h 3111 ext v22.16b, v22.16b, v22.16b, #8 3112 3113 ldp q26, q27, [x8, #96] //load rk6, rk7 3114 aese v2.16b, v28.16b 3115 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 3116 rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 3117 3118 rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 3119 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 3120 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 3121 3122 aese v5.16b, v28.16b 3123 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 3124 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3125 3126 aese v6.16b, v28.16b 3127 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 3128 ldr q21, [x3, #48] //load h2k | h1k 3129 ldr q24, [x3, #96] //load h4k | h3k 3130 3131 aese v1.16b, v28.16b 3132 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 3133 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 3134 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3135 3136 aese v3.16b, v28.16b 3137 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 3138 aese v7.16b, v28.16b 3139 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 3140 aese v0.16b, v28.16b 3141 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 3142 3143 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 3144 aese v4.16b, v26.16b 3145 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 3146 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3147 3148 aese v0.16b, v26.16b 3149 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 3150 aese v3.16b, v26.16b 3151 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 3152 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 3153 3154 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 3155 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3156 aese v2.16b, v26.16b 3157 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 3158 3159 aese v6.16b, v26.16b 3160 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 3161 aese v5.16b, v26.16b 3162 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 3163 3164 aese v7.16b, v26.16b 3165 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 3166 aese v2.16b, v27.16b 3167 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 3168 aese v1.16b, v26.16b 3169 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 3170 3171 aese v6.16b, v27.16b 3172 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 3173 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3174 3175 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 3176 ldp q28, q26, [x8, #128] //load rk8, rk9 3177 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 3178 3179 aese v4.16b, v27.16b 3180 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 3181 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 3182 aese v5.16b, v27.16b 3183 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 3184 3185 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3186 aese v7.16b, v27.16b 3187 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 3188 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 3189 3190 ldr d16, [x10] //MODULO - load modulo constant 3191 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 3192 aese v0.16b, v27.16b 3193 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 3194 3195 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 3196 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 3197 aese v3.16b, v27.16b 3198 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 3199 3200 aese v5.16b, v28.16b 3201 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 3202 aese v4.16b, v28.16b 3203 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 3204 aese v0.16b, v28.16b 3205 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 3206 3207 aese v6.16b, v28.16b 3208 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 3209 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 3210 aese v1.16b, v27.16b 3211 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 3212 3213 aese v7.16b, v28.16b 3214 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 3215 aese v2.16b, v28.16b 3216 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 3217 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 3218 3219 aese v1.16b, v28.16b 3220 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 3221 aese v3.16b, v28.16b 3222 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 3223 ldp q27, q28, [x8, #160] //load rk10, rk11 3224 3225 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 3226 rev32 v20.16b, v30.16b //CTR block 8k+16 3227 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 3228 3229 aese v2.16b, v26.16b 3230 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 3231 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3232 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 3233 3234 aese v6.16b, v26.16b 3235 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 3236 aese v3.16b, v26.16b 3237 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 3238 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext 3239 3240 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 3241 rev32 v22.16b, v30.16b //CTR block 8k+17 3242 aese v0.16b, v26.16b 3243 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 3244 3245 aese v4.16b, v26.16b 3246 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 3247 aese v1.16b, v26.16b 3248 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 3249 aese v7.16b, v26.16b 3250 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 3251 3252 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 3253 aese v5.16b, v26.16b 3254 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 3255 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 3256 3257 aese v2.16b, v27.16b 3258 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 3259 aese v4.16b, v27.16b 3260 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 3261 ldr q26, [x8, #192] //load rk12 3262 ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 3263 3264 aese v0.16b, v27.16b 3265 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 3266 aese v7.16b, v27.16b 3267 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 3268 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext 3269 3270 aese v4.16b, v28.16b //AES block 8k+12 - round 11 3271 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 3272 ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load plaintext 3273 3274 ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load plaintext 3275 aese v2.16b, v28.16b //AES block 8k+10 - round 11 3276 aese v1.16b, v27.16b 3277 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 3278 3279 rev32 v23.16b, v30.16b //CTR block 8k+18 3280 aese v5.16b, v27.16b 3281 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 3282 3283 aese v3.16b, v27.16b 3284 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 3285 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 3286 3287 aese v6.16b, v27.16b 3288 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 3289 aese v5.16b, v28.16b //AES block 8k+13 - round 11 3290 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 3291 3292 aese v7.16b, v28.16b //AES block 8k+15 - round 11 3293 aese v0.16b, v28.16b //AES block 8k+8 - round 11 3294 .inst 0xce04698c //eor3 v12.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result 3295 3296 aese v6.16b, v28.16b //AES block 8k+14 - round 11 3297 aese v3.16b, v28.16b //AES block 8k+11 - round 11 3298 aese v1.16b, v28.16b //AES block 8k+9 - round 11 3299 3300 rev32 v25.16b, v30.16b //CTR block 8k+19 3301 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 3302 .inst 0xce0769ef //eor3 v15.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result 3303 3304 .inst 0xce02694a //eor3 v10.16b, v10.16b, v2.16b, v26.16b //AES block 8k+10 - result 3305 .inst 0xce006908 //eor3 v8.16b, v8.16b, v0.16b, v26.16b //AES block 8k+8 - result 3306 mov v2.16b, v23.16b //CTR block 8k+18 3307 3308 .inst 0xce016929 //eor3 v9.16b, v9.16b, v1.16b, v26.16b //AES block 8k+9 - result 3309 mov v1.16b, v22.16b //CTR block 8k+17 3310 stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result 3311 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 3312 3313 .inst 0xce0669ce //eor3 v14.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result 3314 mov v0.16b, v20.16b //CTR block 8k+16 3315 rev32 v4.16b, v30.16b //CTR block 8k+20 3316 3317 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 3318 .inst 0xce0569ad //eor3 v13.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result 3319 .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 3320 3321 .inst 0xce03696b //eor3 v11.16b, v11.16b, v3.16b, v26.16b //AES block 8k+11 - result 3322 mov v3.16b, v25.16b //CTR block 8k+19 3323 3324 stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result 3325 3326 stp q12, q13, [x2], #32 //AES block 8k+12, 8k+13 - store result 3327 3328 cmp x0, x5 //.LOOP CONTROL 3329 stp q14, q15, [x2], #32 //AES block 8k+14, 8k+15 - store result 3330 b.lt .L192_enc_main_loop 3331 3332 .L192_enc_prepretail: //PREPRETAIL 3333 rev32 v5.16b, v30.16b //CTR block 8k+13 3334 ldp q26, q27, [x8, #0] //load rk0, rk1 3335 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 3336 3337 ldr q23, [x3, #176] //load h7l | h7h 3338 ext v23.16b, v23.16b, v23.16b, #8 3339 ldr q25, [x3, #208] //load h8l | h8h 3340 ext v25.16b, v25.16b, v25.16b, #8 3341 rev64 v8.16b, v8.16b //GHASH block 8k 3342 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 3343 3344 rev32 v6.16b, v30.16b //CTR block 8k+14 3345 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 3346 ldr q21, [x3, #144] //load h6k | h5k 3347 ldr q24, [x3, #192] //load h8k | h7k 3348 3349 rev64 v11.16b, v11.16b //GHASH block 8k+3 3350 rev64 v10.16b, v10.16b //GHASH block 8k+2 3351 ldr q20, [x3, #128] //load h5l | h5h 3352 ext v20.16b, v20.16b, v20.16b, #8 3353 ldr q22, [x3, #160] //load h6l | h6h 3354 ext v22.16b, v22.16b, v22.16b, #8 3355 3356 eor v8.16b, v8.16b, v19.16b //PRE 1 3357 rev32 v7.16b, v30.16b //CTR block 8k+15 3358 rev64 v9.16b, v9.16b //GHASH block 8k+1 3359 3360 aese v5.16b, v26.16b 3361 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 3362 aese v2.16b, v26.16b 3363 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 3364 aese v3.16b, v26.16b 3365 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 3366 3367 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 3368 aese v0.16b, v26.16b 3369 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 3370 aese v6.16b, v26.16b 3371 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 3372 3373 aese v1.16b, v26.16b 3374 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 3375 aese v4.16b, v26.16b 3376 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 3377 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 3378 3379 aese v6.16b, v27.16b 3380 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 3381 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 3382 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3383 3384 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3385 aese v7.16b, v26.16b 3386 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 3387 ldp q28, q26, [x8, #32] //load rk2, rk3 3388 3389 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 3390 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 3391 aese v2.16b, v27.16b 3392 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 3393 3394 aese v5.16b, v27.16b 3395 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 3396 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 3397 aese v1.16b, v27.16b 3398 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 3399 3400 aese v7.16b, v27.16b 3401 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 3402 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 3403 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 3404 3405 aese v3.16b, v27.16b 3406 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 3407 aese v0.16b, v27.16b 3408 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 3409 aese v4.16b, v27.16b 3410 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 3411 3412 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 3413 aese v5.16b, v28.16b 3414 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 3415 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 3416 3417 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 3418 aese v7.16b, v28.16b 3419 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 3420 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 3421 3422 aese v5.16b, v26.16b 3423 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 3424 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3425 aese v6.16b, v28.16b 3426 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 3427 3428 aese v0.16b, v28.16b 3429 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 3430 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 3431 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3432 3433 aese v3.16b, v28.16b 3434 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 3435 rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 3436 rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 3437 3438 aese v2.16b, v28.16b 3439 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 3440 aese v1.16b, v28.16b 3441 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 3442 aese v4.16b, v28.16b 3443 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 3444 3445 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3446 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 3447 ldp q27, q28, [x8, #64] //load rk4, rk5 3448 3449 aese v1.16b, v26.16b 3450 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 3451 aese v6.16b, v26.16b 3452 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 3453 aese v2.16b, v26.16b 3454 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 3455 3456 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 3457 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 3458 aese v7.16b, v26.16b 3459 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 3460 3461 ldr q23, [x3, #80] //load h3l | h3h 3462 ext v23.16b, v23.16b, v23.16b, #8 3463 ldr q25, [x3, #112] //load h4l | h4h 3464 ext v25.16b, v25.16b, v25.16b, #8 3465 aese v3.16b, v26.16b 3466 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 3467 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 3468 3469 ldr q20, [x3, #32] //load h1l | h1h 3470 ext v20.16b, v20.16b, v20.16b, #8 3471 ldr q22, [x3, #64] //load h2l | h2h 3472 ext v22.16b, v22.16b, v22.16b, #8 3473 aese v4.16b, v26.16b 3474 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 3475 rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 3476 3477 aese v0.16b, v26.16b 3478 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 3479 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 3480 aese v6.16b, v27.16b 3481 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 3482 3483 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3484 aese v7.16b, v27.16b 3485 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 3486 aese v5.16b, v27.16b 3487 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 3488 3489 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3490 aese v3.16b, v27.16b 3491 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 3492 aese v0.16b, v27.16b 3493 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 3494 3495 aese v1.16b, v27.16b 3496 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 3497 aese v4.16b, v27.16b 3498 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 3499 aese v2.16b, v27.16b 3500 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 3501 3502 aese v0.16b, v28.16b 3503 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 3504 rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 3505 ldr q21, [x3, #48] //load h2k | h1k 3506 ldr q24, [x3, #96] //load h4k | h3k 3507 3508 aese v1.16b, v28.16b 3509 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 3510 aese v2.16b, v28.16b 3511 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 3512 ldp q26, q27, [x8, #96] //load rk6, rk7 3513 3514 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 3515 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 3516 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 3517 3518 aese v4.16b, v28.16b 3519 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 3520 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3521 3522 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 3523 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 3524 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 3525 3526 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3527 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3528 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3529 3530 aese v5.16b, v28.16b 3531 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 3532 aese v1.16b, v26.16b 3533 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 3534 aese v7.16b, v28.16b 3535 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 3536 3537 aese v6.16b, v28.16b 3538 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 3539 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3540 aese v3.16b, v28.16b 3541 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 3542 3543 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 3544 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 3545 3546 aese v4.16b, v26.16b 3547 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 3548 aese v5.16b, v26.16b 3549 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 3550 aese v1.16b, v27.16b 3551 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 3552 3553 aese v0.16b, v26.16b 3554 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 3555 aese v7.16b, v26.16b 3556 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 3557 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3558 3559 aese v2.16b, v26.16b 3560 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 3561 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 3562 aese v5.16b, v27.16b 3563 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 3564 3565 aese v6.16b, v26.16b 3566 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 3567 ldr d16, [x10] //MODULO - load modulo constant 3568 aese v3.16b, v26.16b 3569 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 3570 3571 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 3572 aese v0.16b, v27.16b 3573 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 3574 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 3575 3576 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 3577 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 3578 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 3579 3580 aese v4.16b, v27.16b 3581 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 3582 aese v2.16b, v27.16b 3583 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 3584 ldp q28, q26, [x8, #128] //load rk8, rk9 3585 3586 aese v3.16b, v27.16b 3587 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 3588 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3589 3590 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 3591 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 3592 3593 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 3594 ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 3595 aese v7.16b, v27.16b 3596 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 3597 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 3598 3599 aese v5.16b, v28.16b 3600 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 3601 aese v1.16b, v28.16b 3602 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 3603 3604 aese v6.16b, v27.16b 3605 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 3606 aese v2.16b, v28.16b 3607 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 3608 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 3609 3610 aese v3.16b, v28.16b 3611 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 3612 aese v5.16b, v26.16b 3613 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 3614 aese v4.16b, v28.16b 3615 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 3616 3617 aese v0.16b, v28.16b 3618 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 3619 aese v7.16b, v28.16b 3620 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 3621 aese v6.16b, v28.16b 3622 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 3623 3624 aese v3.16b, v26.16b 3625 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 3626 ldp q27, q28, [x8, #160] //load rk10, rk11 3627 aese v4.16b, v26.16b 3628 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 3629 3630 aese v2.16b, v26.16b 3631 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 3632 aese v7.16b, v26.16b 3633 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 3634 3635 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 3636 aese v6.16b, v26.16b 3637 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 3638 aese v0.16b, v26.16b 3639 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 3640 aese v1.16b, v26.16b 3641 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 3642 3643 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 3644 ldr q26, [x8, #192] //load rk12 3645 3646 aese v7.16b, v27.16b 3647 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 3648 aese v1.16b, v27.16b 3649 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 3650 aese v2.16b, v27.16b 3651 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 3652 3653 .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 3654 aese v0.16b, v27.16b 3655 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 3656 aese v3.16b, v27.16b 3657 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 3658 3659 aese v1.16b, v28.16b //AES block 8k+9 - round 11 3660 aese v7.16b, v28.16b //AES block 8k+15 - round 11 3661 3662 aese v4.16b, v27.16b 3663 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 3664 aese v3.16b, v28.16b //AES block 8k+11 - round 11 3665 3666 aese v5.16b, v27.16b 3667 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 3668 aese v6.16b, v27.16b 3669 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 3670 3671 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 3672 aese v2.16b, v28.16b //AES block 8k+10 - round 11 3673 aese v0.16b, v28.16b //AES block 8k+8 - round 11 3674 3675 aese v6.16b, v28.16b //AES block 8k+14 - round 11 3676 aese v4.16b, v28.16b //AES block 8k+12 - round 11 3677 aese v5.16b, v28.16b //AES block 8k+13 - round 11 3678 3679 .L192_enc_tail: //TAIL 3680 3681 ldp q20, q21, [x3, #128] //load h5l | h5h 3682 ext v20.16b, v20.16b, v20.16b, #8 3683 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 3684 3685 ldr q8, [x0], #16 //AES block 8k+8 - l3ad plaintext 3686 3687 ldp q24, q25, [x3, #192] //load h8k | h7k 3688 ext v25.16b, v25.16b, v25.16b, #8 3689 3690 mov v29.16b, v26.16b 3691 3692 ldp q22, q23, [x3, #160] //load h6l | h6h 3693 ext v22.16b, v22.16b, v22.16b, #8 3694 ext v23.16b, v23.16b, v23.16b, #8 3695 cmp x5, #112 3696 3697 .inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result 3698 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 3699 b.gt .L192_enc_blocks_more_than_7 3700 3701 cmp x5, #96 3702 mov v7.16b, v6.16b 3703 movi v17.8b, #0 3704 3705 mov v6.16b, v5.16b 3706 movi v19.8b, #0 3707 sub v30.4s, v30.4s, v31.4s 3708 3709 mov v5.16b, v4.16b 3710 mov v4.16b, v3.16b 3711 mov v3.16b, v2.16b 3712 3713 mov v2.16b, v1.16b 3714 movi v18.8b, #0 3715 b.gt .L192_enc_blocks_more_than_6 3716 3717 mov v7.16b, v6.16b 3718 cmp x5, #80 3719 3720 mov v6.16b, v5.16b 3721 mov v5.16b, v4.16b 3722 mov v4.16b, v3.16b 3723 3724 mov v3.16b, v1.16b 3725 sub v30.4s, v30.4s, v31.4s 3726 b.gt .L192_enc_blocks_more_than_5 3727 3728 cmp x5, #64 3729 sub v30.4s, v30.4s, v31.4s 3730 3731 mov v7.16b, v6.16b 3732 mov v6.16b, v5.16b 3733 mov v5.16b, v4.16b 3734 3735 mov v4.16b, v1.16b 3736 b.gt .L192_enc_blocks_more_than_4 3737 3738 mov v7.16b, v6.16b 3739 mov v6.16b, v5.16b 3740 mov v5.16b, v1.16b 3741 3742 sub v30.4s, v30.4s, v31.4s 3743 cmp x5, #48 3744 b.gt .L192_enc_blocks_more_than_3 3745 3746 mov v7.16b, v6.16b 3747 mov v6.16b, v1.16b 3748 sub v30.4s, v30.4s, v31.4s 3749 3750 ldr q24, [x3, #96] //load h4k | h3k 3751 cmp x5, #32 3752 b.gt .L192_enc_blocks_more_than_2 3753 3754 sub v30.4s, v30.4s, v31.4s 3755 3756 cmp x5, #16 3757 mov v7.16b, v1.16b 3758 b.gt .L192_enc_blocks_more_than_1 3759 3760 sub v30.4s, v30.4s, v31.4s 3761 ldr q21, [x3, #48] //load h2k | h1k 3762 b .L192_enc_blocks_less_than_1 3763 .L192_enc_blocks_more_than_7: //blocks left > 7 3764 st1 { v9.16b}, [x2], #16 //AES final-7 block - store result 3765 3766 rev64 v8.16b, v9.16b //GHASH final-7 block 3767 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 3768 3769 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3770 3771 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 3772 3773 ldr q9, [x0], #16 //AES final-6 block - load plaintext 3774 3775 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 3776 movi v16.8b, #0 //suppress further partial tag feed in 3777 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 3778 3779 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 3780 3781 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 3782 .inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 3783 .L192_enc_blocks_more_than_6: //blocks left > 6 3784 3785 st1 { v9.16b}, [x2], #16 //AES final-6 block - store result 3786 3787 rev64 v8.16b, v9.16b //GHASH final-6 block 3788 3789 ldr q9, [x0], #16 //AES final-5 block - load plaintext 3790 3791 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3792 3793 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 3794 3795 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 3796 .inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 3797 3798 movi v16.8b, #0 //suppress further partial tag feed in 3799 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 3800 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 3801 3802 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 3803 3804 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 3805 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 3806 3807 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 3808 .L192_enc_blocks_more_than_5: //blocks left > 5 3809 3810 st1 { v9.16b}, [x2], #16 //AES final-5 block - store result 3811 3812 rev64 v8.16b, v9.16b //GHASH final-5 block 3813 3814 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3815 3816 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 3817 3818 ldr q9, [x0], #16 //AES final-4 block - load plaintext 3819 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 3820 3821 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 3822 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 3823 3824 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 3825 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 3826 3827 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 3828 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 3829 3830 .inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 3831 movi v16.8b, #0 //suppress further partial tag feed in 3832 3833 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 3834 .L192_enc_blocks_more_than_4: //blocks left > 4 3835 3836 st1 { v9.16b}, [x2], #16 //AES final-4 block - store result 3837 3838 rev64 v8.16b, v9.16b //GHASH final-4 block 3839 3840 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3841 3842 ldr q9, [x0], #16 //AES final-3 block - load plaintext 3843 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 3844 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 3845 3846 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 3847 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 3848 3849 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 3850 3851 movi v16.8b, #0 //suppress further partial tag feed in 3852 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 3853 3854 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 3855 3856 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 3857 .inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 3858 .L192_enc_blocks_more_than_3: //blocks left > 3 3859 3860 ldr q24, [x3, #96] //load h4k | h3k 3861 st1 { v9.16b}, [x2], #16 //AES final-3 block - store result 3862 3863 rev64 v8.16b, v9.16b //GHASH final-3 block 3864 3865 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3866 movi v16.8b, #0 //suppress further partial tag feed in 3867 3868 ldr q9, [x0], #16 //AES final-2 block - load plaintext 3869 ldr q25, [x3, #112] //load h4l | h4h 3870 ext v25.16b, v25.16b, v25.16b, #8 3871 3872 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 3873 3874 .inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 3875 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 3876 3877 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 3878 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 3879 3880 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 3881 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 3882 3883 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 3884 3885 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 3886 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 3887 .L192_enc_blocks_more_than_2: //blocks left > 2 3888 3889 st1 { v9.16b}, [x2], #16 //AES final-2 block - store result 3890 3891 rev64 v8.16b, v9.16b //GHASH final-2 block 3892 ldr q23, [x3, #80] //load h3l | h3h 3893 ext v23.16b, v23.16b, v23.16b, #8 3894 3895 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3896 3897 ldr q9, [x0], #16 //AES final-1 block - load plaintext 3898 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 3899 3900 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 3901 3902 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 3903 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 3904 movi v16.8b, #0 //suppress further partial tag feed in 3905 3906 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 3907 3908 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 3909 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 3910 3911 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 3912 .inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 3913 .L192_enc_blocks_more_than_1: //blocks left > 1 3914 3915 ldr q22, [x3, #64] //load h1l | h1h 3916 ext v22.16b, v22.16b, v22.16b, #8 3917 st1 { v9.16b}, [x2], #16 //AES final-1 block - store result 3918 3919 rev64 v8.16b, v9.16b //GHASH final-1 block 3920 3921 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3922 3923 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 3924 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 3925 3926 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 3927 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 3928 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 3929 3930 ldr q9, [x0], #16 //AES final block - load plaintext 3931 ldr q21, [x3, #48] //load h2k | h1k 3932 3933 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 3934 3935 .inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result 3936 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 3937 3938 movi v16.8b, #0 //suppress further partial tag feed in 3939 3940 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 3941 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 3942 .L192_enc_blocks_less_than_1: //blocks left <= 1 3943 3944 mvn x6, xzr //temp0_x = 0xffffffffffffffff 3945 and x1, x1, #127 //bit_length %= 128 3946 3947 sub x1, x1, #128 //bit_length -= 128 3948 3949 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 3950 3951 and x1, x1, #127 //bit_length %= 128 3952 3953 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 3954 cmp x1, #64 3955 mvn x7, xzr //temp1_x = 0xffffffffffffffff 3956 3957 csel x13, x7, x6, lt 3958 csel x14, x6, xzr, lt 3959 3960 mov v0.d[1], x14 3961 ldr q20, [x3, #32] //load h1l | h1h 3962 ext v20.16b, v20.16b, v20.16b, #8 3963 3964 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 3965 mov v0.d[0], x13 //ctr0b is mask for last block 3966 3967 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 3968 3969 rev64 v8.16b, v9.16b //GHASH final block 3970 bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 3971 3972 st1 { v9.16b}, [x2] //store all 16B 3973 3974 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3975 3976 ins v16.d[0], v8.d[1] //GHASH final block - mid 3977 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 3978 3979 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 3980 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 3981 3982 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 3983 3984 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 3985 3986 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 3987 ldr d16, [x10] //MODULO - load modulo constant 3988 3989 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 3990 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 3991 3992 rev32 v30.16b, v30.16b 3993 3994 str q30, [x16] //store the updated counter 3995 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 3996 3997 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 3998 3999 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 4000 4001 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 4002 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 4003 4004 .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 4005 ext v19.16b, v19.16b, v19.16b, #8 4006 rev64 v19.16b, v19.16b 4007 st1 { v19.16b }, [x3] 4008 4009 mov x0, x9 //return sizes 4010 4011 ldp d10, d11, [sp, #16] 4012 ldp d12, d13, [sp, #32] 4013 ldp d14, d15, [sp, #48] 4014 ldp d8, d9, [sp], #80 4015 ret 4016 4017 .L192_enc_ret: 4018 mov w0, #0x0 4019 ret 4020 .size unroll8_eor3_aes_gcm_enc_192_kernel,.-unroll8_eor3_aes_gcm_enc_192_kernel 4021 .globl unroll8_eor3_aes_gcm_dec_192_kernel 4022 .type unroll8_eor3_aes_gcm_dec_192_kernel,%function 4023 .align 4 4024 unroll8_eor3_aes_gcm_dec_192_kernel: 4025 AARCH64_VALID_CALL_TARGET 4026 cbz x1, .L192_dec_ret 4027 stp d8, d9, [sp, #-80]! 4028 lsr x9, x1, #3 4029 mov x16, x4 4030 mov x8, x5 4031 stp d10, d11, [sp, #16] 4032 stp d12, d13, [sp, #32] 4033 stp d14, d15, [sp, #48] 4034 mov x5, #0xc200000000000000 4035 stp x5, xzr, [sp, #64] 4036 add x10, sp, #64 4037 4038 mov x5, x9 4039 ld1 { v0.16b}, [x16] //CTR block 0 4040 ld1 { v19.16b}, [x3] 4041 4042 mov x15, #0x100000000 //set up counter increment 4043 movi v31.16b, #0x0 4044 mov v31.d[1], x15 4045 4046 rev32 v30.16b, v0.16b //set up reversed counter 4047 4048 add v30.4s, v30.4s, v31.4s //CTR block 0 4049 4050 rev32 v1.16b, v30.16b //CTR block 1 4051 add v30.4s, v30.4s, v31.4s //CTR block 1 4052 4053 rev32 v2.16b, v30.16b //CTR block 2 4054 add v30.4s, v30.4s, v31.4s //CTR block 2 4055 4056 rev32 v3.16b, v30.16b //CTR block 3 4057 add v30.4s, v30.4s, v31.4s //CTR block 3 4058 4059 rev32 v4.16b, v30.16b //CTR block 4 4060 add v30.4s, v30.4s, v31.4s //CTR block 4 4061 4062 rev32 v5.16b, v30.16b //CTR block 5 4063 add v30.4s, v30.4s, v31.4s //CTR block 5 4064 ldp q26, q27, [x8, #0] //load rk0, rk1 4065 4066 rev32 v6.16b, v30.16b //CTR block 6 4067 add v30.4s, v30.4s, v31.4s //CTR block 6 4068 4069 rev32 v7.16b, v30.16b //CTR block 7 4070 4071 aese v3.16b, v26.16b 4072 aesmc v3.16b, v3.16b //AES block 3 - round 0 4073 aese v6.16b, v26.16b 4074 aesmc v6.16b, v6.16b //AES block 6 - round 0 4075 aese v5.16b, v26.16b 4076 aesmc v5.16b, v5.16b //AES block 5 - round 0 4077 4078 aese v0.16b, v26.16b 4079 aesmc v0.16b, v0.16b //AES block 0 - round 0 4080 aese v1.16b, v26.16b 4081 aesmc v1.16b, v1.16b //AES block 1 - round 0 4082 aese v7.16b, v26.16b 4083 aesmc v7.16b, v7.16b //AES block 7 - round 0 4084 4085 aese v2.16b, v26.16b 4086 aesmc v2.16b, v2.16b //AES block 2 - round 0 4087 aese v4.16b, v26.16b 4088 aesmc v4.16b, v4.16b //AES block 4 - round 0 4089 ldp q28, q26, [x8, #32] //load rk2, rk3 4090 4091 aese v1.16b, v27.16b 4092 aesmc v1.16b, v1.16b //AES block 1 - round 1 4093 4094 aese v2.16b, v27.16b 4095 aesmc v2.16b, v2.16b //AES block 2 - round 1 4096 4097 aese v0.16b, v27.16b 4098 aesmc v0.16b, v0.16b //AES block 0 - round 1 4099 aese v3.16b, v27.16b 4100 aesmc v3.16b, v3.16b //AES block 3 - round 1 4101 aese v7.16b, v27.16b 4102 aesmc v7.16b, v7.16b //AES block 7 - round 1 4103 4104 aese v5.16b, v27.16b 4105 aesmc v5.16b, v5.16b //AES block 5 - round 1 4106 aese v6.16b, v27.16b 4107 aesmc v6.16b, v6.16b //AES block 6 - round 1 4108 4109 aese v7.16b, v28.16b 4110 aesmc v7.16b, v7.16b //AES block 7 - round 2 4111 aese v0.16b, v28.16b 4112 aesmc v0.16b, v0.16b //AES block 0 - round 2 4113 aese v4.16b, v27.16b 4114 aesmc v4.16b, v4.16b //AES block 4 - round 1 4115 4116 aese v5.16b, v28.16b 4117 aesmc v5.16b, v5.16b //AES block 5 - round 2 4118 aese v1.16b, v28.16b 4119 aesmc v1.16b, v1.16b //AES block 1 - round 2 4120 aese v2.16b, v28.16b 4121 aesmc v2.16b, v2.16b //AES block 2 - round 2 4122 4123 aese v3.16b, v28.16b 4124 aesmc v3.16b, v3.16b //AES block 3 - round 2 4125 aese v4.16b, v28.16b 4126 aesmc v4.16b, v4.16b //AES block 4 - round 2 4127 aese v6.16b, v28.16b 4128 aesmc v6.16b, v6.16b //AES block 6 - round 2 4129 4130 aese v7.16b, v26.16b 4131 aesmc v7.16b, v7.16b //AES block 7 - round 3 4132 4133 ldp q27, q28, [x8, #64] //load rk4, rk5 4134 aese v2.16b, v26.16b 4135 aesmc v2.16b, v2.16b //AES block 2 - round 3 4136 aese v5.16b, v26.16b 4137 aesmc v5.16b, v5.16b //AES block 5 - round 3 4138 4139 aese v0.16b, v26.16b 4140 aesmc v0.16b, v0.16b //AES block 0 - round 3 4141 aese v3.16b, v26.16b 4142 aesmc v3.16b, v3.16b //AES block 3 - round 3 4143 4144 aese v4.16b, v26.16b 4145 aesmc v4.16b, v4.16b //AES block 4 - round 3 4146 aese v1.16b, v26.16b 4147 aesmc v1.16b, v1.16b //AES block 1 - round 3 4148 aese v6.16b, v26.16b 4149 aesmc v6.16b, v6.16b //AES block 6 - round 3 4150 4151 aese v3.16b, v27.16b 4152 aesmc v3.16b, v3.16b //AES block 3 - round 4 4153 aese v2.16b, v27.16b 4154 aesmc v2.16b, v2.16b //AES block 2 - round 4 4155 aese v5.16b, v27.16b 4156 aesmc v5.16b, v5.16b //AES block 5 - round 4 4157 4158 aese v1.16b, v27.16b 4159 aesmc v1.16b, v1.16b //AES block 1 - round 4 4160 aese v7.16b, v27.16b 4161 aesmc v7.16b, v7.16b //AES block 7 - round 4 4162 aese v6.16b, v27.16b 4163 aesmc v6.16b, v6.16b //AES block 6 - round 4 4164 4165 aese v0.16b, v27.16b 4166 aesmc v0.16b, v0.16b //AES block 0 - round 4 4167 aese v5.16b, v28.16b 4168 aesmc v5.16b, v5.16b //AES block 5 - round 5 4169 aese v4.16b, v27.16b 4170 aesmc v4.16b, v4.16b //AES block 4 - round 4 4171 4172 aese v6.16b, v28.16b 4173 aesmc v6.16b, v6.16b //AES block 6 - round 5 4174 ldp q26, q27, [x8, #96] //load rk6, rk7 4175 4176 aese v0.16b, v28.16b 4177 aesmc v0.16b, v0.16b //AES block 0 - round 5 4178 aese v4.16b, v28.16b 4179 aesmc v4.16b, v4.16b //AES block 4 - round 5 4180 aese v1.16b, v28.16b 4181 aesmc v1.16b, v1.16b //AES block 1 - round 5 4182 4183 aese v3.16b, v28.16b 4184 aesmc v3.16b, v3.16b //AES block 3 - round 5 4185 aese v2.16b, v28.16b 4186 aesmc v2.16b, v2.16b //AES block 2 - round 5 4187 aese v7.16b, v28.16b 4188 aesmc v7.16b, v7.16b //AES block 7 - round 5 4189 4190 sub x5, x5, #1 //byte_len - 1 4191 4192 aese v4.16b, v26.16b 4193 aesmc v4.16b, v4.16b //AES block 4 - round 6 4194 aese v5.16b, v26.16b 4195 aesmc v5.16b, v5.16b //AES block 5 - round 6 4196 aese v1.16b, v26.16b 4197 aesmc v1.16b, v1.16b //AES block 1 - round 6 4198 4199 aese v0.16b, v26.16b 4200 aesmc v0.16b, v0.16b //AES block 0 - round 6 4201 aese v3.16b, v26.16b 4202 aesmc v3.16b, v3.16b //AES block 3 - round 6 4203 aese v6.16b, v26.16b 4204 aesmc v6.16b, v6.16b //AES block 6 - round 6 4205 4206 aese v7.16b, v26.16b 4207 aesmc v7.16b, v7.16b //AES block 7 - round 6 4208 aese v2.16b, v26.16b 4209 aesmc v2.16b, v2.16b //AES block 2 - round 6 4210 ldp q28, q26, [x8, #128] //load rk8, rk9 4211 4212 add v30.4s, v30.4s, v31.4s //CTR block 7 4213 4214 aese v3.16b, v27.16b 4215 aesmc v3.16b, v3.16b //AES block 3 - round 7 4216 aese v7.16b, v27.16b 4217 aesmc v7.16b, v7.16b //AES block 7 - round 7 4218 4219 aese v2.16b, v27.16b 4220 aesmc v2.16b, v2.16b //AES block 2 - round 7 4221 aese v1.16b, v27.16b 4222 aesmc v1.16b, v1.16b //AES block 1 - round 7 4223 aese v4.16b, v27.16b 4224 aesmc v4.16b, v4.16b //AES block 4 - round 7 4225 4226 aese v6.16b, v27.16b 4227 aesmc v6.16b, v6.16b //AES block 6 - round 7 4228 aese v0.16b, v27.16b 4229 aesmc v0.16b, v0.16b //AES block 0 - round 7 4230 aese v5.16b, v27.16b 4231 aesmc v5.16b, v5.16b //AES block 5 - round 7 4232 4233 aese v1.16b, v28.16b 4234 aesmc v1.16b, v1.16b //AES block 1 - round 8 4235 aese v2.16b, v28.16b 4236 aesmc v2.16b, v2.16b //AES block 2 - round 8 4237 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 4238 4239 aese v7.16b, v28.16b 4240 aesmc v7.16b, v7.16b //AES block 7 - round 8 4241 aese v6.16b, v28.16b 4242 aesmc v6.16b, v6.16b //AES block 6 - round 8 4243 aese v5.16b, v28.16b 4244 aesmc v5.16b, v5.16b //AES block 5 - round 8 4245 4246 aese v4.16b, v28.16b 4247 aesmc v4.16b, v4.16b //AES block 4 - round 8 4248 aese v3.16b, v28.16b 4249 aesmc v3.16b, v3.16b //AES block 3 - round 8 4250 aese v0.16b, v28.16b 4251 aesmc v0.16b, v0.16b //AES block 0 - round 8 4252 4253 add x4, x0, x1, lsr #3 //end_input_ptr 4254 aese v6.16b, v26.16b 4255 aesmc v6.16b, v6.16b //AES block 6 - round 9 4256 4257 ld1 { v19.16b}, [x3] 4258 ext v19.16b, v19.16b, v19.16b, #8 4259 rev64 v19.16b, v19.16b 4260 4261 ldp q27, q28, [x8, #160] //load rk10, rk11 4262 4263 aese v0.16b, v26.16b 4264 aesmc v0.16b, v0.16b //AES block 0 - round 9 4265 add x5, x5, x0 4266 4267 aese v1.16b, v26.16b 4268 aesmc v1.16b, v1.16b //AES block 1 - round 9 4269 aese v7.16b, v26.16b 4270 aesmc v7.16b, v7.16b //AES block 7 - round 9 4271 aese v4.16b, v26.16b 4272 aesmc v4.16b, v4.16b //AES block 4 - round 9 4273 4274 cmp x0, x5 //check if we have <= 8 blocks 4275 aese v3.16b, v26.16b 4276 aesmc v3.16b, v3.16b //AES block 3 - round 9 4277 4278 aese v5.16b, v26.16b 4279 aesmc v5.16b, v5.16b //AES block 5 - round 9 4280 aese v2.16b, v26.16b 4281 aesmc v2.16b, v2.16b //AES block 2 - round 9 4282 4283 aese v3.16b, v27.16b 4284 aesmc v3.16b, v3.16b //AES block 3 - round 10 4285 aese v1.16b, v27.16b 4286 aesmc v1.16b, v1.16b //AES block 1 - round 10 4287 aese v7.16b, v27.16b 4288 aesmc v7.16b, v7.16b //AES block 7 - round 10 4289 4290 aese v4.16b, v27.16b 4291 aesmc v4.16b, v4.16b //AES block 4 - round 10 4292 aese v0.16b, v27.16b 4293 aesmc v0.16b, v0.16b //AES block 0 - round 10 4294 aese v2.16b, v27.16b 4295 aesmc v2.16b, v2.16b //AES block 2 - round 10 4296 4297 aese v6.16b, v27.16b 4298 aesmc v6.16b, v6.16b //AES block 6 - round 10 4299 aese v5.16b, v27.16b 4300 aesmc v5.16b, v5.16b //AES block 5 - round 10 4301 ldr q26, [x8, #192] //load rk12 4302 4303 aese v0.16b, v28.16b //AES block 0 - round 11 4304 aese v1.16b, v28.16b //AES block 1 - round 11 4305 aese v4.16b, v28.16b //AES block 4 - round 11 4306 4307 aese v6.16b, v28.16b //AES block 6 - round 11 4308 aese v5.16b, v28.16b //AES block 5 - round 11 4309 aese v7.16b, v28.16b //AES block 7 - round 11 4310 4311 aese v2.16b, v28.16b //AES block 2 - round 11 4312 aese v3.16b, v28.16b //AES block 3 - round 11 4313 b.ge .L192_dec_tail //handle tail 4314 4315 ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext 4316 4317 ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext 4318 4319 ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext 4320 4321 .inst 0xce016921 //eor3 v1.16b, v9.16b, v1.16b, v26.16b //AES block 1 - result 4322 .inst 0xce006900 //eor3 v0.16b, v8.16b, v0.16b, v26.16b //AES block 0 - result 4323 stp q0, q1, [x2], #32 //AES block 0, 1 - store result 4324 4325 rev32 v0.16b, v30.16b //CTR block 8 4326 add v30.4s, v30.4s, v31.4s //CTR block 8 4327 4328 rev32 v1.16b, v30.16b //CTR block 9 4329 add v30.4s, v30.4s, v31.4s //CTR block 9 4330 .inst 0xce036963 //eor3 v3.16b, v11.16b, v3.16b, v26.16b //AES block 3 - result 4331 4332 .inst 0xce026942 //eor3 v2.16b, v10.16b, v2.16b, v26.16b //AES block 2 - result 4333 stp q2, q3, [x2], #32 //AES block 2, 3 - store result 4334 ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext 4335 4336 rev32 v2.16b, v30.16b //CTR block 10 4337 add v30.4s, v30.4s, v31.4s //CTR block 10 4338 4339 .inst 0xce046984 //eor3 v4.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result 4340 4341 rev32 v3.16b, v30.16b //CTR block 11 4342 add v30.4s, v30.4s, v31.4s //CTR block 11 4343 4344 .inst 0xce0569a5 //eor3 v5.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result 4345 stp q4, q5, [x2], #32 //AES block 4, 5 - store result 4346 cmp x0, x5 //check if we have <= 8 blocks 4347 4348 .inst 0xce0669c6 //eor3 v6.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result 4349 .inst 0xce0769e7 //eor3 v7.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result 4350 rev32 v4.16b, v30.16b //CTR block 12 4351 4352 add v30.4s, v30.4s, v31.4s //CTR block 12 4353 stp q6, q7, [x2], #32 //AES block 6, 7 - store result 4354 b.ge .L192_dec_prepretail //do prepretail 4355 4356 .L192_dec_main_loop: //main loop start 4357 rev64 v9.16b, v9.16b //GHASH block 8k+1 4358 ldp q26, q27, [x8, #0] //load rk0, rk1 4359 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 4360 4361 rev64 v8.16b, v8.16b //GHASH block 8k 4362 rev32 v5.16b, v30.16b //CTR block 8k+13 4363 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 4364 4365 ldr q23, [x3, #176] //load h7l | h7h 4366 ext v23.16b, v23.16b, v23.16b, #8 4367 ldr q25, [x3, #208] //load h8l | h8h 4368 ext v25.16b, v25.16b, v25.16b, #8 4369 rev64 v12.16b, v12.16b //GHASH block 8k+4 4370 rev64 v11.16b, v11.16b //GHASH block 8k+3 4371 4372 eor v8.16b, v8.16b, v19.16b //PRE 1 4373 rev32 v6.16b, v30.16b //CTR block 8k+14 4374 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 4375 4376 rev64 v13.16b, v13.16b //GHASH block 8k+5 4377 4378 rev32 v7.16b, v30.16b //CTR block 8k+15 4379 aese v1.16b, v26.16b 4380 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 4381 aese v6.16b, v26.16b 4382 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 4383 4384 aese v5.16b, v26.16b 4385 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 4386 aese v4.16b, v26.16b 4387 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 4388 aese v0.16b, v26.16b 4389 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 4390 4391 aese v7.16b, v26.16b 4392 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 4393 aese v2.16b, v26.16b 4394 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 4395 aese v3.16b, v26.16b 4396 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 4397 4398 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 4399 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 4400 ldp q28, q26, [x8, #32] //load rk2, rk3 4401 4402 aese v6.16b, v27.16b 4403 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 4404 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 4405 ldr q20, [x3, #128] //load h5l | h5h 4406 ext v20.16b, v20.16b, v20.16b, #8 4407 ldr q22, [x3, #160] //load h6l | h6h 4408 ext v22.16b, v22.16b, v22.16b, #8 4409 4410 aese v0.16b, v27.16b 4411 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 4412 aese v3.16b, v27.16b 4413 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 4414 aese v7.16b, v27.16b 4415 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 4416 4417 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 4418 aese v2.16b, v27.16b 4419 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 4420 aese v4.16b, v27.16b 4421 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 4422 4423 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4424 rev64 v10.16b, v10.16b //GHASH block 8k+2 4425 aese v1.16b, v27.16b 4426 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 4427 4428 aese v5.16b, v27.16b 4429 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 4430 ldr q21, [x3, #144] //load h6k | h5k 4431 ldr q24, [x3, #192] //load h8k | h7k 4432 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4433 4434 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 4435 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 4436 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 4437 4438 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 4439 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 4440 aese v6.16b, v28.16b 4441 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 4442 4443 aese v2.16b, v28.16b 4444 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 4445 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 4446 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 4447 4448 aese v1.16b, v28.16b 4449 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 4450 aese v6.16b, v26.16b 4451 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 4452 aese v4.16b, v28.16b 4453 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 4454 4455 aese v0.16b, v28.16b 4456 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 4457 aese v7.16b, v28.16b 4458 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 4459 aese v3.16b, v28.16b 4460 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 4461 4462 ldr q23, [x3, #80] //load h3l | h3h 4463 ext v23.16b, v23.16b, v23.16b, #8 4464 ldr q25, [x3, #112] //load h4l | h4h 4465 ext v25.16b, v25.16b, v25.16b, #8 4466 aese v5.16b, v28.16b 4467 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 4468 aese v2.16b, v26.16b 4469 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 4470 4471 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 4472 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4473 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4474 4475 aese v3.16b, v26.16b 4476 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 4477 aese v4.16b, v26.16b 4478 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 4479 4480 aese v0.16b, v26.16b 4481 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 4482 aese v7.16b, v26.16b 4483 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 4484 ldp q27, q28, [x8, #64] //load rk4, rk5 4485 4486 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4487 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 4488 aese v1.16b, v26.16b 4489 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 4490 4491 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4492 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 4493 4494 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 4495 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 4496 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 4497 4498 aese v5.16b, v26.16b 4499 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 4500 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 4501 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 4502 4503 aese v4.16b, v27.16b 4504 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 4505 aese v6.16b, v27.16b 4506 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 4507 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 4508 4509 aese v5.16b, v27.16b 4510 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 4511 aese v1.16b, v27.16b 4512 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 4513 aese v3.16b, v27.16b 4514 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 4515 4516 aese v2.16b, v27.16b 4517 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 4518 aese v0.16b, v27.16b 4519 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 4520 aese v7.16b, v27.16b 4521 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 4522 4523 ldr q20, [x3, #32] //load h1l | h1h 4524 ext v20.16b, v20.16b, v20.16b, #8 4525 ldr q22, [x3, #64] //load h2l | h2h 4526 ext v22.16b, v22.16b, v22.16b, #8 4527 aese v3.16b, v28.16b 4528 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 4529 aese v5.16b, v28.16b 4530 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 4531 4532 ldp q26, q27, [x8, #96] //load rk6, rk7 4533 aese v7.16b, v28.16b 4534 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 4535 rev64 v15.16b, v15.16b //GHASH block 8k+7 4536 4537 aese v4.16b, v28.16b 4538 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 4539 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4540 aese v1.16b, v28.16b 4541 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 4542 4543 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 4544 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4545 aese v2.16b, v28.16b 4546 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 4547 4548 aese v6.16b, v28.16b 4549 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 4550 aese v0.16b, v28.16b 4551 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 4552 rev64 v14.16b, v14.16b //GHASH block 8k+6 4553 4554 ldr q21, [x3, #48] //load h2k | h1k 4555 ldr q24, [x3, #96] //load h4k | h3k 4556 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 4557 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 4558 4559 aese v0.16b, v26.16b 4560 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 4561 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4562 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4563 4564 aese v7.16b, v26.16b 4565 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 4566 aese v2.16b, v26.16b 4567 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 4568 aese v6.16b, v26.16b 4569 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 4570 4571 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 4572 aese v3.16b, v26.16b 4573 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 4574 aese v1.16b, v26.16b 4575 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 4576 4577 aese v2.16b, v27.16b 4578 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 4579 aese v6.16b, v27.16b 4580 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 4581 aese v5.16b, v26.16b 4582 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 4583 4584 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 4585 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 4586 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 4587 4588 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 4589 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4590 aese v4.16b, v26.16b 4591 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 4592 4593 aese v5.16b, v27.16b 4594 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 4595 ldp q28, q26, [x8, #128] //load rk8, rk9 4596 aese v3.16b, v27.16b 4597 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 4598 4599 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 4600 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 4601 aese v1.16b, v27.16b 4602 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 4603 4604 aese v4.16b, v27.16b 4605 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 4606 aese v0.16b, v27.16b 4607 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 4608 aese v7.16b, v27.16b 4609 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 4610 4611 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4612 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 4613 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 4614 4615 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 4616 ldr d16, [x10] //MODULO - load modulo constant 4617 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 4618 4619 aese v2.16b, v28.16b 4620 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 4621 aese v5.16b, v28.16b 4622 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 4623 aese v7.16b, v28.16b 4624 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 4625 4626 aese v0.16b, v28.16b 4627 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 4628 aese v3.16b, v28.16b 4629 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 4630 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 4631 4632 aese v4.16b, v28.16b 4633 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 4634 aese v1.16b, v28.16b 4635 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 4636 aese v6.16b, v28.16b 4637 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 4638 4639 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 4640 rev32 v20.16b, v30.16b //CTR block 8k+16 4641 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 4642 4643 aese v5.16b, v26.16b 4644 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 4645 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 4646 aese v1.16b, v26.16b 4647 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 4648 4649 aese v3.16b, v26.16b 4650 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 4651 aese v7.16b, v26.16b 4652 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 4653 ldp q27, q28, [x8, #160] //load rk10, rk11 4654 4655 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 4656 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext 4657 4658 aese v2.16b, v26.16b 4659 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 4660 aese v0.16b, v26.16b 4661 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 4662 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext 4663 4664 rev32 v22.16b, v30.16b //CTR block 8k+17 4665 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 4666 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 4667 4668 aese v6.16b, v26.16b 4669 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 4670 aese v4.16b, v26.16b 4671 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 4672 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 4673 4674 aese v3.16b, v27.16b 4675 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 4676 aese v7.16b, v27.16b 4677 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 4678 ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext 4679 4680 rev32 v23.16b, v30.16b //CTR block 8k+18 4681 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 4682 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 4683 4684 aese v0.16b, v27.16b 4685 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 4686 aese v1.16b, v27.16b 4687 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 4688 ldr q26, [x8, #192] //load rk12 4689 4690 ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext 4691 aese v4.16b, v27.16b 4692 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 4693 aese v6.16b, v27.16b 4694 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 4695 4696 aese v0.16b, v28.16b //AES block 8k+8 - round 11 4697 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 4698 aese v1.16b, v28.16b //AES block 8k+9 - round 11 4699 4700 aese v2.16b, v27.16b 4701 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 4702 aese v6.16b, v28.16b //AES block 8k+14 - round 11 4703 aese v3.16b, v28.16b //AES block 8k+11 - round 11 4704 4705 .inst 0xce006900 //eor3 v0.16b, v8.16b, v0.16b, v26.16b //AES block 8k+8 - result 4706 rev32 v25.16b, v30.16b //CTR block 8k+19 4707 aese v5.16b, v27.16b 4708 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 4709 4710 aese v4.16b, v28.16b //AES block 8k+12 - round 11 4711 aese v2.16b, v28.16b //AES block 8k+10 - round 11 4712 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 4713 4714 aese v7.16b, v28.16b //AES block 8k+15 - round 11 4715 aese v5.16b, v28.16b //AES block 8k+13 - round 11 4716 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 4717 4718 .inst 0xce016921 //eor3 v1.16b, v9.16b, v1.16b, v26.16b //AES block 8k+9 - result 4719 stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result 4720 .inst 0xce036963 //eor3 v3.16b, v11.16b, v3.16b, v26.16b //AES block 8k+11 - result 4721 4722 .inst 0xce026942 //eor3 v2.16b, v10.16b, v2.16b, v26.16b //AES block 8k+10 - result 4723 .inst 0xce0769e7 //eor3 v7.16b, v15.16b, v7.16b, v26.16b //AES block 8k+15 - result 4724 stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result 4725 4726 .inst 0xce0569a5 //eor3 v5.16b, v13.16b, v5.16b, v26.16b //AES block 8k+13 - result 4727 .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 4728 mov v3.16b, v25.16b //CTR block 8k+19 4729 4730 .inst 0xce046984 //eor3 v4.16b, v12.16b, v4.16b, v26.16b //AES block 8k+12 - result 4731 stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result 4732 cmp x0, x5 //.LOOP CONTROL 4733 4734 .inst 0xce0669c6 //eor3 v6.16b, v14.16b, v6.16b, v26.16b //AES block 8k+14 - result 4735 stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result 4736 mov v0.16b, v20.16b //CTR block 8k+16 4737 4738 mov v1.16b, v22.16b //CTR block 8k+17 4739 mov v2.16b, v23.16b //CTR block 8k+18 4740 4741 rev32 v4.16b, v30.16b //CTR block 8k+20 4742 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 4743 b.lt .L192_dec_main_loop 4744 4745 .L192_dec_prepretail: //PREPRETAIL 4746 ldp q26, q27, [x8, #0] //load rk0, rk1 4747 rev32 v5.16b, v30.16b //CTR block 8k+13 4748 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 4749 4750 ldr q23, [x3, #176] //load h7l | h7h 4751 ext v23.16b, v23.16b, v23.16b, #8 4752 ldr q25, [x3, #208] //load h8l | h8h 4753 ext v25.16b, v25.16b, v25.16b, #8 4754 rev64 v8.16b, v8.16b //GHASH block 8k 4755 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 4756 4757 rev64 v11.16b, v11.16b //GHASH block 8k+3 4758 rev32 v6.16b, v30.16b //CTR block 8k+14 4759 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 4760 4761 eor v8.16b, v8.16b, v19.16b //PRE 1 4762 rev64 v10.16b, v10.16b //GHASH block 8k+2 4763 rev64 v9.16b, v9.16b //GHASH block 8k+1 4764 4765 ldr q20, [x3, #128] //load h5l | h5h 4766 ext v20.16b, v20.16b, v20.16b, #8 4767 ldr q22, [x3, #160] //load h6l | h6h 4768 ext v22.16b, v22.16b, v22.16b, #8 4769 rev32 v7.16b, v30.16b //CTR block 8k+15 4770 4771 aese v0.16b, v26.16b 4772 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 4773 aese v6.16b, v26.16b 4774 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 4775 aese v5.16b, v26.16b 4776 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 4777 4778 aese v3.16b, v26.16b 4779 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 4780 aese v2.16b, v26.16b 4781 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 4782 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 4783 4784 aese v4.16b, v26.16b 4785 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 4786 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 4787 aese v1.16b, v26.16b 4788 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 4789 4790 aese v6.16b, v27.16b 4791 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 4792 aese v7.16b, v26.16b 4793 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 4794 ldp q28, q26, [x8, #32] //load rk2, rk3 4795 4796 aese v4.16b, v27.16b 4797 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 4798 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 4799 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 4800 4801 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 4802 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 4803 aese v3.16b, v27.16b 4804 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 4805 4806 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 4807 aese v7.16b, v27.16b 4808 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 4809 aese v0.16b, v27.16b 4810 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 4811 4812 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4813 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4814 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 4815 4816 aese v2.16b, v27.16b 4817 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 4818 aese v1.16b, v27.16b 4819 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 4820 aese v5.16b, v27.16b 4821 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 4822 4823 ldr q21, [x3, #144] //load h6k | h5k 4824 ldr q24, [x3, #192] //load h8k | h7k 4825 aese v3.16b, v28.16b 4826 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 4827 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 4828 4829 aese v6.16b, v28.16b 4830 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 4831 rev64 v13.16b, v13.16b //GHASH block 8k+5 4832 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 4833 4834 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 4835 aese v4.16b, v28.16b 4836 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 4837 aese v5.16b, v28.16b 4838 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 4839 4840 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4841 aese v3.16b, v26.16b 4842 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 4843 aese v7.16b, v28.16b 4844 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 4845 4846 aese v0.16b, v28.16b 4847 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 4848 aese v2.16b, v28.16b 4849 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 4850 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4851 4852 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 4853 aese v1.16b, v28.16b 4854 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 4855 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 4856 4857 aese v5.16b, v26.16b 4858 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 4859 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4860 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 4861 4862 aese v7.16b, v26.16b 4863 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 4864 aese v6.16b, v26.16b 4865 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 4866 aese v4.16b, v26.16b 4867 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 4868 4869 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 4870 ldp q27, q28, [x8, #64] //load rk4, rk5 4871 aese v0.16b, v26.16b 4872 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 4873 4874 ldr q23, [x3, #80] //load h3l | h3h 4875 ext v23.16b, v23.16b, v23.16b, #8 4876 ldr q25, [x3, #112] //load h4l | h4h 4877 ext v25.16b, v25.16b, v25.16b, #8 4878 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 4879 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 4880 4881 ldr q20, [x3, #32] //load h1l | h1h 4882 ext v20.16b, v20.16b, v20.16b, #8 4883 ldr q22, [x3, #64] //load h2l | h2h 4884 ext v22.16b, v22.16b, v22.16b, #8 4885 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 4886 aese v2.16b, v26.16b 4887 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 4888 4889 rev64 v15.16b, v15.16b //GHASH block 8k+7 4890 4891 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4892 rev64 v12.16b, v12.16b //GHASH block 8k+4 4893 4894 aese v5.16b, v27.16b 4895 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 4896 aese v4.16b, v27.16b 4897 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 4898 aese v1.16b, v26.16b 4899 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 4900 4901 aese v2.16b, v27.16b 4902 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 4903 aese v0.16b, v27.16b 4904 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 4905 aese v3.16b, v27.16b 4906 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 4907 4908 aese v1.16b, v27.16b 4909 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 4910 aese v6.16b, v27.16b 4911 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 4912 aese v7.16b, v27.16b 4913 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 4914 4915 rev64 v14.16b, v14.16b //GHASH block 8k+6 4916 ldr q21, [x3, #48] //load h2k | h1k 4917 ldr q24, [x3, #96] //load h4k | h3k 4918 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4919 4920 aese v7.16b, v28.16b 4921 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 4922 aese v1.16b, v28.16b 4923 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 4924 aese v2.16b, v28.16b 4925 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 4926 4927 ldp q26, q27, [x8, #96] //load rk6, rk7 4928 aese v6.16b, v28.16b 4929 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 4930 aese v5.16b, v28.16b 4931 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 4932 4933 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 4934 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 4935 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 4936 4937 aese v4.16b, v28.16b 4938 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 4939 4940 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 4941 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4942 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 4943 4944 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 4945 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4946 aese v0.16b, v28.16b 4947 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 4948 4949 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4950 aese v3.16b, v28.16b 4951 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 4952 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4953 4954 aese v4.16b, v26.16b 4955 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 4956 aese v2.16b, v26.16b 4957 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 4958 4959 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 4960 aese v1.16b, v26.16b 4961 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 4962 aese v7.16b, v26.16b 4963 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 4964 4965 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 4966 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 4967 aese v0.16b, v26.16b 4968 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 4969 4970 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 4971 aese v5.16b, v26.16b 4972 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 4973 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 4974 4975 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4976 aese v4.16b, v27.16b 4977 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 4978 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 4979 4980 aese v3.16b, v26.16b 4981 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 4982 aese v6.16b, v26.16b 4983 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 4984 aese v5.16b, v27.16b 4985 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 4986 4987 ldp q28, q26, [x8, #128] //load rk8, rk9 4988 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 4989 aese v2.16b, v27.16b 4990 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 4991 4992 ldr d16, [x10] //MODULO - load modulo constant 4993 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 4994 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 4995 4996 aese v1.16b, v27.16b 4997 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 4998 aese v7.16b, v27.16b 4999 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 5000 aese v6.16b, v27.16b 5001 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 5002 5003 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 5004 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 5005 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 5006 5007 aese v0.16b, v27.16b 5008 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 5009 aese v3.16b, v27.16b 5010 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 5011 5012 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 5013 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 5014 aese v2.16b, v28.16b 5015 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 5016 5017 aese v6.16b, v28.16b 5018 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 5019 aese v7.16b, v28.16b 5020 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 5021 aese v1.16b, v28.16b 5022 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 5023 5024 aese v3.16b, v28.16b 5025 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 5026 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 5027 aese v0.16b, v28.16b 5028 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 5029 5030 aese v5.16b, v28.16b 5031 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 5032 aese v4.16b, v28.16b 5033 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 5034 ldp q27, q28, [x8, #160] //load rk10, rk11 5035 5036 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 5037 aese v7.16b, v26.16b 5038 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 5039 aese v6.16b, v26.16b 5040 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 5041 5042 aese v5.16b, v26.16b 5043 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 5044 aese v2.16b, v26.16b 5045 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 5046 aese v3.16b, v26.16b 5047 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 5048 5049 aese v0.16b, v26.16b 5050 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 5051 aese v1.16b, v26.16b 5052 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 5053 aese v4.16b, v26.16b 5054 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 5055 5056 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 5057 ldr q26, [x8, #192] //load rk12 5058 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 5059 5060 aese v2.16b, v27.16b 5061 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 5062 aese v5.16b, v27.16b 5063 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 5064 aese v0.16b, v27.16b 5065 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 5066 5067 aese v4.16b, v27.16b 5068 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 5069 aese v6.16b, v27.16b 5070 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 5071 aese v7.16b, v27.16b 5072 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 5073 5074 aese v0.16b, v28.16b //AES block 8k+8 - round 11 5075 .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 5076 aese v5.16b, v28.16b //AES block 8k+13 - round 11 5077 5078 aese v2.16b, v28.16b //AES block 8k+10 - round 11 5079 aese v3.16b, v27.16b 5080 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 5081 aese v1.16b, v27.16b 5082 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 5083 5084 aese v6.16b, v28.16b //AES block 8k+14 - round 11 5085 aese v4.16b, v28.16b //AES block 8k+12 - round 11 5086 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 5087 5088 aese v3.16b, v28.16b //AES block 8k+11 - round 11 5089 aese v1.16b, v28.16b //AES block 8k+9 - round 11 5090 aese v7.16b, v28.16b //AES block 8k+15 - round 11 5091 5092 .L192_dec_tail: //TAIL 5093 5094 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 5095 5096 ldp q20, q21, [x3, #128] //load h5l | h5h 5097 ext v20.16b, v20.16b, v20.16b, #8 5098 ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext 5099 5100 ldp q24, q25, [x3, #192] //load h8k | h7k 5101 ext v25.16b, v25.16b, v25.16b, #8 5102 5103 mov v29.16b, v26.16b 5104 5105 ldp q22, q23, [x3, #160] //load h6l | h6h 5106 ext v22.16b, v22.16b, v22.16b, #8 5107 ext v23.16b, v23.16b, v23.16b, #8 5108 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 5109 5110 .inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result 5111 cmp x5, #112 5112 b.gt .L192_dec_blocks_more_than_7 5113 5114 mov v7.16b, v6.16b 5115 movi v17.8b, #0 5116 sub v30.4s, v30.4s, v31.4s 5117 5118 mov v6.16b, v5.16b 5119 mov v5.16b, v4.16b 5120 mov v4.16b, v3.16b 5121 5122 cmp x5, #96 5123 movi v19.8b, #0 5124 mov v3.16b, v2.16b 5125 5126 mov v2.16b, v1.16b 5127 movi v18.8b, #0 5128 b.gt .L192_dec_blocks_more_than_6 5129 5130 mov v7.16b, v6.16b 5131 mov v6.16b, v5.16b 5132 mov v5.16b, v4.16b 5133 5134 mov v4.16b, v3.16b 5135 mov v3.16b, v1.16b 5136 5137 sub v30.4s, v30.4s, v31.4s 5138 cmp x5, #80 5139 b.gt .L192_dec_blocks_more_than_5 5140 5141 mov v7.16b, v6.16b 5142 mov v6.16b, v5.16b 5143 5144 mov v5.16b, v4.16b 5145 mov v4.16b, v1.16b 5146 cmp x5, #64 5147 5148 sub v30.4s, v30.4s, v31.4s 5149 b.gt .L192_dec_blocks_more_than_4 5150 5151 sub v30.4s, v30.4s, v31.4s 5152 mov v7.16b, v6.16b 5153 mov v6.16b, v5.16b 5154 5155 mov v5.16b, v1.16b 5156 cmp x5, #48 5157 b.gt .L192_dec_blocks_more_than_3 5158 5159 sub v30.4s, v30.4s, v31.4s 5160 mov v7.16b, v6.16b 5161 cmp x5, #32 5162 5163 mov v6.16b, v1.16b 5164 ldr q24, [x3, #96] //load h4k | h3k 5165 b.gt .L192_dec_blocks_more_than_2 5166 5167 sub v30.4s, v30.4s, v31.4s 5168 5169 mov v7.16b, v1.16b 5170 cmp x5, #16 5171 b.gt .L192_dec_blocks_more_than_1 5172 5173 sub v30.4s, v30.4s, v31.4s 5174 ldr q21, [x3, #48] //load h2k | h1k 5175 b .L192_dec_blocks_less_than_1 5176 .L192_dec_blocks_more_than_7: //blocks left > 7 5177 rev64 v8.16b, v9.16b //GHASH final-7 block 5178 5179 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 5180 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5181 5182 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 5183 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 5184 ldr q9, [x0], #16 //AES final-6 block - load ciphertext 5185 5186 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 5187 5188 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 5189 st1 { v12.16b}, [x2], #16 //AES final-7 block - store result 5190 5191 .inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 5192 5193 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 5194 movi v16.8b, #0 //suppress further partial tag feed in 5195 .L192_dec_blocks_more_than_6: //blocks left > 6 5196 5197 rev64 v8.16b, v9.16b //GHASH final-6 block 5198 5199 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5200 5201 ldr q9, [x0], #16 //AES final-5 block - load ciphertext 5202 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 5203 5204 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 5205 movi v16.8b, #0 //suppress further partial tag feed in 5206 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 5207 5208 st1 { v12.16b}, [x2], #16 //AES final-6 block - store result 5209 .inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 5210 5211 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 5212 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 5213 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 5214 5215 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 5216 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 5217 .L192_dec_blocks_more_than_5: //blocks left > 5 5218 5219 rev64 v8.16b, v9.16b //GHASH final-5 block 5220 5221 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5222 5223 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 5224 5225 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 5226 5227 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 5228 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 5229 5230 ldr q9, [x0], #16 //AES final-4 block - load ciphertext 5231 5232 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 5233 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 5234 5235 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 5236 5237 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 5238 movi v16.8b, #0 //suppress further partial tag feed in 5239 st1 { v12.16b}, [x2], #16 //AES final-5 block - store result 5240 5241 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 5242 .inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 5243 .L192_dec_blocks_more_than_4: //blocks left > 4 5244 5245 rev64 v8.16b, v9.16b //GHASH final-4 block 5246 5247 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5248 movi v16.8b, #0 //suppress further partial tag feed in 5249 5250 ldr q9, [x0], #16 //AES final-3 block - load ciphertext 5251 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 5252 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 5253 5254 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 5255 5256 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 5257 5258 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 5259 st1 { v12.16b}, [x2], #16 //AES final-4 block - store result 5260 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 5261 5262 .inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 5263 5264 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 5265 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 5266 .L192_dec_blocks_more_than_3: //blocks left > 3 5267 5268 ldr q25, [x3, #112] //load h4l | h4h 5269 ext v25.16b, v25.16b, v25.16b, #8 5270 rev64 v8.16b, v9.16b //GHASH final-3 block 5271 ldr q9, [x0], #16 //AES final-2 block - load ciphertext 5272 5273 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5274 5275 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 5276 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 5277 5278 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 5279 movi v16.8b, #0 //suppress further partial tag feed in 5280 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 5281 5282 st1 { v12.16b}, [x2], #16 //AES final-3 block - store result 5283 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 5284 .inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 5285 5286 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 5287 ldr q24, [x3, #96] //load h4k | h3k 5288 5289 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 5290 5291 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 5292 5293 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 5294 .L192_dec_blocks_more_than_2: //blocks left > 2 5295 5296 rev64 v8.16b, v9.16b //GHASH final-2 block 5297 ldr q23, [x3, #80] //load h3l | h3h 5298 ext v23.16b, v23.16b, v23.16b, #8 5299 5300 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5301 5302 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 5303 ldr q9, [x0], #16 //AES final-1 block - load ciphertext 5304 5305 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 5306 5307 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 5308 5309 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 5310 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 5311 5312 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 5313 movi v16.8b, #0 //suppress further partial tag feed in 5314 5315 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 5316 st1 { v12.16b}, [x2], #16 //AES final-2 block - store result 5317 5318 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 5319 .inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 5320 .L192_dec_blocks_more_than_1: //blocks left > 1 5321 5322 rev64 v8.16b, v9.16b //GHASH final-1 block 5323 ldr q9, [x0], #16 //AES final block - load ciphertext 5324 ldr q22, [x3, #64] //load h1l | h1h 5325 ext v22.16b, v22.16b, v22.16b, #8 5326 5327 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5328 movi v16.8b, #0 //suppress further partial tag feed in 5329 ldr q21, [x3, #48] //load h2k | h1k 5330 5331 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 5332 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 5333 st1 { v12.16b}, [x2], #16 //AES final-1 block - store result 5334 5335 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 5336 5337 .inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result 5338 5339 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 5340 5341 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 5342 5343 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 5344 5345 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 5346 5347 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 5348 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 5349 .L192_dec_blocks_less_than_1: //blocks left <= 1 5350 5351 rev32 v30.16b, v30.16b 5352 and x1, x1, #127 //bit_length %= 128 5353 5354 sub x1, x1, #128 //bit_length -= 128 5355 str q30, [x16] //store the updated counter 5356 5357 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 5358 mvn x6, xzr //temp0_x = 0xffffffffffffffff 5359 5360 and x1, x1, #127 //bit_length %= 128 5361 5362 mvn x7, xzr //temp1_x = 0xffffffffffffffff 5363 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 5364 cmp x1, #64 5365 5366 csel x13, x7, x6, lt 5367 csel x14, x6, xzr, lt 5368 ldr q20, [x3, #32] //load h1l | h1h 5369 ext v20.16b, v20.16b, v20.16b, #8 5370 5371 mov v0.d[1], x14 5372 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 5373 5374 mov v0.d[0], x13 //ctr0b is mask for last block 5375 5376 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 5377 bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 5378 5379 rev64 v8.16b, v9.16b //GHASH final block 5380 5381 st1 { v12.16b}, [x2] //store all 16B 5382 5383 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5384 5385 ins v16.d[0], v8.d[1] //GHASH final block - mid 5386 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 5387 5388 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 5389 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 5390 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 5391 5392 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 5393 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 5394 5395 eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 5396 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 5397 ldr d16, [x10] //MODULO - load modulo constant 5398 5399 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 5400 ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 5401 5402 eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up 5403 5404 .inst 0xce115652 //eor3 v18.16b, v18.16b, v17.16b, v21.16b //MODULO - fold into mid 5405 5406 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 5407 ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 5408 5409 .inst 0xce124673 //eor3 v19.16b, v19.16b, v18.16b, v17.16b //MODULO - fold into low 5410 ext v19.16b, v19.16b, v19.16b, #8 5411 rev64 v19.16b, v19.16b 5412 st1 { v19.16b }, [x3] 5413 5414 mov x0, x9 5415 5416 ldp d10, d11, [sp, #16] 5417 ldp d12, d13, [sp, #32] 5418 ldp d14, d15, [sp, #48] 5419 ldp d8, d9, [sp], #80 5420 ret 5421 5422 .L192_dec_ret: 5423 mov w0, #0x0 5424 ret 5425 .size unroll8_eor3_aes_gcm_dec_192_kernel,.-unroll8_eor3_aes_gcm_dec_192_kernel 5426 .globl unroll8_eor3_aes_gcm_enc_256_kernel 5427 .type unroll8_eor3_aes_gcm_enc_256_kernel,%function 5428 .align 4 5429 unroll8_eor3_aes_gcm_enc_256_kernel: 5430 AARCH64_VALID_CALL_TARGET 5431 cbz x1, .L256_enc_ret 5432 stp d8, d9, [sp, #-80]! 5433 lsr x9, x1, #3 5434 mov x16, x4 5435 mov x8, x5 5436 stp d10, d11, [sp, #16] 5437 stp d12, d13, [sp, #32] 5438 stp d14, d15, [sp, #48] 5439 mov x5, #0xc200000000000000 5440 stp x5, xzr, [sp, #64] 5441 add x10, sp, #64 5442 5443 ld1 { v0.16b}, [x16] //CTR block 0 5444 5445 mov x5, x9 5446 5447 mov x15, #0x100000000 //set up counter increment 5448 movi v31.16b, #0x0 5449 mov v31.d[1], x15 5450 sub x5, x5, #1 //byte_len - 1 5451 5452 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 5453 5454 add x5, x5, x0 5455 5456 rev32 v30.16b, v0.16b //set up reversed counter 5457 5458 add v30.4s, v30.4s, v31.4s //CTR block 0 5459 5460 rev32 v1.16b, v30.16b //CTR block 1 5461 add v30.4s, v30.4s, v31.4s //CTR block 1 5462 5463 rev32 v2.16b, v30.16b //CTR block 2 5464 add v30.4s, v30.4s, v31.4s //CTR block 2 5465 5466 rev32 v3.16b, v30.16b //CTR block 3 5467 add v30.4s, v30.4s, v31.4s //CTR block 3 5468 5469 rev32 v4.16b, v30.16b //CTR block 4 5470 add v30.4s, v30.4s, v31.4s //CTR block 4 5471 5472 rev32 v5.16b, v30.16b //CTR block 5 5473 add v30.4s, v30.4s, v31.4s //CTR block 5 5474 ldp q26, q27, [x8, #0] //load rk0, rk1 5475 5476 rev32 v6.16b, v30.16b //CTR block 6 5477 add v30.4s, v30.4s, v31.4s //CTR block 6 5478 5479 rev32 v7.16b, v30.16b //CTR block 7 5480 5481 aese v3.16b, v26.16b 5482 aesmc v3.16b, v3.16b //AES block 3 - round 0 5483 aese v4.16b, v26.16b 5484 aesmc v4.16b, v4.16b //AES block 4 - round 0 5485 aese v2.16b, v26.16b 5486 aesmc v2.16b, v2.16b //AES block 2 - round 0 5487 5488 aese v0.16b, v26.16b 5489 aesmc v0.16b, v0.16b //AES block 0 - round 0 5490 aese v1.16b, v26.16b 5491 aesmc v1.16b, v1.16b //AES block 1 - round 0 5492 aese v6.16b, v26.16b 5493 aesmc v6.16b, v6.16b //AES block 6 - round 0 5494 5495 aese v5.16b, v26.16b 5496 aesmc v5.16b, v5.16b //AES block 5 - round 0 5497 aese v7.16b, v26.16b 5498 aesmc v7.16b, v7.16b //AES block 7 - round 0 5499 ldp q28, q26, [x8, #32] //load rk2, rk3 5500 5501 aese v4.16b, v27.16b 5502 aesmc v4.16b, v4.16b //AES block 4 - round 1 5503 aese v1.16b, v27.16b 5504 aesmc v1.16b, v1.16b //AES block 1 - round 1 5505 aese v3.16b, v27.16b 5506 aesmc v3.16b, v3.16b //AES block 3 - round 1 5507 5508 aese v6.16b, v27.16b 5509 aesmc v6.16b, v6.16b //AES block 6 - round 1 5510 aese v5.16b, v27.16b 5511 aesmc v5.16b, v5.16b //AES block 5 - round 1 5512 5513 aese v2.16b, v27.16b 5514 aesmc v2.16b, v2.16b //AES block 2 - round 1 5515 5516 aese v7.16b, v27.16b 5517 aesmc v7.16b, v7.16b //AES block 7 - round 1 5518 5519 aese v2.16b, v28.16b 5520 aesmc v2.16b, v2.16b //AES block 2 - round 2 5521 aese v3.16b, v28.16b 5522 aesmc v3.16b, v3.16b //AES block 3 - round 2 5523 aese v0.16b, v27.16b 5524 aesmc v0.16b, v0.16b //AES block 0 - round 1 5525 5526 aese v7.16b, v28.16b 5527 aesmc v7.16b, v7.16b //AES block 7 - round 2 5528 aese v6.16b, v28.16b 5529 aesmc v6.16b, v6.16b //AES block 6 - round 2 5530 aese v5.16b, v28.16b 5531 aesmc v5.16b, v5.16b //AES block 5 - round 2 5532 5533 aese v4.16b, v28.16b 5534 aesmc v4.16b, v4.16b //AES block 4 - round 2 5535 aese v0.16b, v28.16b 5536 aesmc v0.16b, v0.16b //AES block 0 - round 2 5537 aese v1.16b, v28.16b 5538 aesmc v1.16b, v1.16b //AES block 1 - round 2 5539 5540 aese v5.16b, v26.16b 5541 aesmc v5.16b, v5.16b //AES block 5 - round 3 5542 aese v3.16b, v26.16b 5543 aesmc v3.16b, v3.16b //AES block 3 - round 3 5544 ldp q27, q28, [x8, #64] //load rk4, rk5 5545 5546 aese v4.16b, v26.16b 5547 aesmc v4.16b, v4.16b //AES block 4 - round 3 5548 5549 aese v1.16b, v26.16b 5550 aesmc v1.16b, v1.16b //AES block 1 - round 3 5551 aese v6.16b, v26.16b 5552 aesmc v6.16b, v6.16b //AES block 6 - round 3 5553 aese v7.16b, v26.16b 5554 aesmc v7.16b, v7.16b //AES block 7 - round 3 5555 5556 aese v2.16b, v26.16b 5557 aesmc v2.16b, v2.16b //AES block 2 - round 3 5558 aese v0.16b, v26.16b 5559 aesmc v0.16b, v0.16b //AES block 0 - round 3 5560 5561 aese v4.16b, v27.16b 5562 aesmc v4.16b, v4.16b //AES block 4 - round 4 5563 aese v6.16b, v27.16b 5564 aesmc v6.16b, v6.16b //AES block 6 - round 4 5565 aese v1.16b, v27.16b 5566 aesmc v1.16b, v1.16b //AES block 1 - round 4 5567 5568 aese v2.16b, v27.16b 5569 aesmc v2.16b, v2.16b //AES block 2 - round 4 5570 aese v0.16b, v27.16b 5571 aesmc v0.16b, v0.16b //AES block 0 - round 4 5572 5573 aese v3.16b, v27.16b 5574 aesmc v3.16b, v3.16b //AES block 3 - round 4 5575 aese v7.16b, v27.16b 5576 aesmc v7.16b, v7.16b //AES block 7 - round 4 5577 aese v5.16b, v27.16b 5578 aesmc v5.16b, v5.16b //AES block 5 - round 4 5579 5580 aese v0.16b, v28.16b 5581 aesmc v0.16b, v0.16b //AES block 0 - round 5 5582 aese v2.16b, v28.16b 5583 aesmc v2.16b, v2.16b //AES block 2 - round 5 5584 ldp q26, q27, [x8, #96] //load rk6, rk7 5585 5586 aese v1.16b, v28.16b 5587 aesmc v1.16b, v1.16b //AES block 1 - round 5 5588 aese v4.16b, v28.16b 5589 aesmc v4.16b, v4.16b //AES block 4 - round 5 5590 aese v5.16b, v28.16b 5591 aesmc v5.16b, v5.16b //AES block 5 - round 5 5592 5593 aese v3.16b, v28.16b 5594 aesmc v3.16b, v3.16b //AES block 3 - round 5 5595 aese v6.16b, v28.16b 5596 aesmc v6.16b, v6.16b //AES block 6 - round 5 5597 aese v7.16b, v28.16b 5598 aesmc v7.16b, v7.16b //AES block 7 - round 5 5599 5600 aese v1.16b, v26.16b 5601 aesmc v1.16b, v1.16b //AES block 1 - round 6 5602 aese v5.16b, v26.16b 5603 aesmc v5.16b, v5.16b //AES block 5 - round 6 5604 aese v4.16b, v26.16b 5605 aesmc v4.16b, v4.16b //AES block 4 - round 6 5606 5607 aese v2.16b, v26.16b 5608 aesmc v2.16b, v2.16b //AES block 2 - round 6 5609 aese v6.16b, v26.16b 5610 aesmc v6.16b, v6.16b //AES block 6 - round 6 5611 aese v0.16b, v26.16b 5612 aesmc v0.16b, v0.16b //AES block 0 - round 6 5613 5614 aese v7.16b, v26.16b 5615 aesmc v7.16b, v7.16b //AES block 7 - round 6 5616 aese v3.16b, v26.16b 5617 aesmc v3.16b, v3.16b //AES block 3 - round 6 5618 ldp q28, q26, [x8, #128] //load rk8, rk9 5619 5620 aese v2.16b, v27.16b 5621 aesmc v2.16b, v2.16b //AES block 2 - round 7 5622 aese v0.16b, v27.16b 5623 aesmc v0.16b, v0.16b //AES block 0 - round 7 5624 5625 aese v7.16b, v27.16b 5626 aesmc v7.16b, v7.16b //AES block 7 - round 7 5627 aese v6.16b, v27.16b 5628 aesmc v6.16b, v6.16b //AES block 6 - round 7 5629 aese v1.16b, v27.16b 5630 aesmc v1.16b, v1.16b //AES block 1 - round 7 5631 5632 aese v5.16b, v27.16b 5633 aesmc v5.16b, v5.16b //AES block 5 - round 7 5634 aese v3.16b, v27.16b 5635 aesmc v3.16b, v3.16b //AES block 3 - round 7 5636 5637 aese v4.16b, v27.16b 5638 aesmc v4.16b, v4.16b //AES block 4 - round 7 5639 5640 aese v6.16b, v28.16b 5641 aesmc v6.16b, v6.16b //AES block 6 - round 8 5642 aese v1.16b, v28.16b 5643 aesmc v1.16b, v1.16b //AES block 1 - round 8 5644 5645 aese v3.16b, v28.16b 5646 aesmc v3.16b, v3.16b //AES block 3 - round 8 5647 aese v0.16b, v28.16b 5648 aesmc v0.16b, v0.16b //AES block 0 - round 8 5649 aese v7.16b, v28.16b 5650 aesmc v7.16b, v7.16b //AES block 7 - round 8 5651 5652 aese v5.16b, v28.16b 5653 aesmc v5.16b, v5.16b //AES block 5 - round 8 5654 aese v4.16b, v28.16b 5655 aesmc v4.16b, v4.16b //AES block 4 - round 8 5656 aese v2.16b, v28.16b 5657 aesmc v2.16b, v2.16b //AES block 2 - round 8 5658 5659 ld1 { v19.16b}, [x3] 5660 ext v19.16b, v19.16b, v19.16b, #8 5661 rev64 v19.16b, v19.16b 5662 ldp q27, q28, [x8, #160] //load rk10, rk11 5663 5664 aese v6.16b, v26.16b 5665 aesmc v6.16b, v6.16b //AES block 6 - round 9 5666 aese v7.16b, v26.16b 5667 aesmc v7.16b, v7.16b //AES block 7 - round 9 5668 aese v3.16b, v26.16b 5669 aesmc v3.16b, v3.16b //AES block 3 - round 9 5670 5671 aese v4.16b, v26.16b 5672 aesmc v4.16b, v4.16b //AES block 4 - round 9 5673 aese v5.16b, v26.16b 5674 aesmc v5.16b, v5.16b //AES block 5 - round 9 5675 aese v2.16b, v26.16b 5676 aesmc v2.16b, v2.16b //AES block 2 - round 9 5677 5678 aese v1.16b, v26.16b 5679 aesmc v1.16b, v1.16b //AES block 1 - round 9 5680 5681 aese v7.16b, v27.16b 5682 aesmc v7.16b, v7.16b //AES block 7 - round 10 5683 aese v4.16b, v27.16b 5684 aesmc v4.16b, v4.16b //AES block 4 - round 10 5685 aese v0.16b, v26.16b 5686 aesmc v0.16b, v0.16b //AES block 0 - round 9 5687 5688 aese v1.16b, v27.16b 5689 aesmc v1.16b, v1.16b //AES block 1 - round 10 5690 aese v5.16b, v27.16b 5691 aesmc v5.16b, v5.16b //AES block 5 - round 10 5692 aese v3.16b, v27.16b 5693 aesmc v3.16b, v3.16b //AES block 3 - round 10 5694 5695 aese v2.16b, v27.16b 5696 aesmc v2.16b, v2.16b //AES block 2 - round 10 5697 aese v0.16b, v27.16b 5698 aesmc v0.16b, v0.16b //AES block 0 - round 10 5699 aese v6.16b, v27.16b 5700 aesmc v6.16b, v6.16b //AES block 6 - round 10 5701 5702 aese v4.16b, v28.16b 5703 aesmc v4.16b, v4.16b //AES block 4 - round 11 5704 ldp q26, q27, [x8, #192] //load rk12, rk13 5705 aese v5.16b, v28.16b 5706 aesmc v5.16b, v5.16b //AES block 5 - round 11 5707 5708 aese v2.16b, v28.16b 5709 aesmc v2.16b, v2.16b //AES block 2 - round 11 5710 aese v6.16b, v28.16b 5711 aesmc v6.16b, v6.16b //AES block 6 - round 11 5712 aese v1.16b, v28.16b 5713 aesmc v1.16b, v1.16b //AES block 1 - round 11 5714 5715 aese v0.16b, v28.16b 5716 aesmc v0.16b, v0.16b //AES block 0 - round 11 5717 aese v3.16b, v28.16b 5718 aesmc v3.16b, v3.16b //AES block 3 - round 11 5719 aese v7.16b, v28.16b 5720 aesmc v7.16b, v7.16b //AES block 7 - round 11 5721 5722 add v30.4s, v30.4s, v31.4s //CTR block 7 5723 ldr q28, [x8, #224] //load rk14 5724 5725 aese v4.16b, v26.16b 5726 aesmc v4.16b, v4.16b //AES block 4 - round 12 5727 aese v2.16b, v26.16b 5728 aesmc v2.16b, v2.16b //AES block 2 - round 12 5729 aese v1.16b, v26.16b 5730 aesmc v1.16b, v1.16b //AES block 1 - round 12 5731 5732 aese v0.16b, v26.16b 5733 aesmc v0.16b, v0.16b //AES block 0 - round 12 5734 aese v5.16b, v26.16b 5735 aesmc v5.16b, v5.16b //AES block 5 - round 12 5736 aese v3.16b, v26.16b 5737 aesmc v3.16b, v3.16b //AES block 3 - round 12 5738 5739 aese v2.16b, v27.16b //AES block 2 - round 13 5740 aese v1.16b, v27.16b //AES block 1 - round 13 5741 aese v4.16b, v27.16b //AES block 4 - round 13 5742 5743 aese v6.16b, v26.16b 5744 aesmc v6.16b, v6.16b //AES block 6 - round 12 5745 aese v7.16b, v26.16b 5746 aesmc v7.16b, v7.16b //AES block 7 - round 12 5747 5748 aese v0.16b, v27.16b //AES block 0 - round 13 5749 aese v5.16b, v27.16b //AES block 5 - round 13 5750 5751 aese v6.16b, v27.16b //AES block 6 - round 13 5752 aese v7.16b, v27.16b //AES block 7 - round 13 5753 aese v3.16b, v27.16b //AES block 3 - round 13 5754 5755 add x4, x0, x1, lsr #3 //end_input_ptr 5756 cmp x0, x5 //check if we have <= 8 blocks 5757 b.ge .L256_enc_tail //handle tail 5758 5759 ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext 5760 5761 ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext 5762 5763 .inst 0xce007108 //eor3 v8.16b, v8.16b, v0.16b, v28.16b //AES block 0 - result 5764 rev32 v0.16b, v30.16b //CTR block 8 5765 add v30.4s, v30.4s, v31.4s //CTR block 8 5766 5767 .inst 0xce017129 //eor3 v9.16b, v9.16b, v1.16b, v28.16b //AES block 1 - result 5768 .inst 0xce03716b //eor3 v11.16b, v11.16b, v3.16b, v28.16b //AES block 3 - result 5769 5770 rev32 v1.16b, v30.16b //CTR block 9 5771 add v30.4s, v30.4s, v31.4s //CTR block 9 5772 ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 5773 5774 ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 5775 .inst 0xce02714a //eor3 v10.16b, v10.16b, v2.16b, v28.16b //AES block 2 - result 5776 cmp x0, x5 //check if we have <= 8 blocks 5777 5778 rev32 v2.16b, v30.16b //CTR block 10 5779 add v30.4s, v30.4s, v31.4s //CTR block 10 5780 stp q8, q9, [x2], #32 //AES block 0, 1 - store result 5781 5782 stp q10, q11, [x2], #32 //AES block 2, 3 - store result 5783 5784 rev32 v3.16b, v30.16b //CTR block 11 5785 add v30.4s, v30.4s, v31.4s //CTR block 11 5786 5787 .inst 0xce04718c //eor3 v12.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result 5788 5789 .inst 0xce0771ef //eor3 v15.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result 5790 .inst 0xce0671ce //eor3 v14.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result 5791 .inst 0xce0571ad //eor3 v13.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result 5792 5793 stp q12, q13, [x2], #32 //AES block 4, 5 - store result 5794 rev32 v4.16b, v30.16b //CTR block 12 5795 5796 stp q14, q15, [x2], #32 //AES block 6, 7 - store result 5797 add v30.4s, v30.4s, v31.4s //CTR block 12 5798 b.ge .L256_enc_prepretail //do prepretail 5799 5800 .L256_enc_main_loop: //main loop start 5801 ldp q26, q27, [x8, #0] //load rk0, rk1 5802 5803 rev32 v5.16b, v30.16b //CTR block 8k+13 5804 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 5805 ldr q21, [x3, #144] //load h6k | h5k 5806 ldr q24, [x3, #192] //load h8k | h7k 5807 5808 rev64 v11.16b, v11.16b //GHASH block 8k+3 5809 ldr q20, [x3, #128] //load h5l | h5h 5810 ext v20.16b, v20.16b, v20.16b, #8 5811 ldr q22, [x3, #160] //load h6l | h6h 5812 ext v22.16b, v22.16b, v22.16b, #8 5813 rev64 v9.16b, v9.16b //GHASH block 8k+1 5814 5815 rev32 v6.16b, v30.16b //CTR block 8k+14 5816 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 5817 rev64 v8.16b, v8.16b //GHASH block 8k 5818 5819 rev64 v12.16b, v12.16b //GHASH block 8k+4 5820 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 5821 ldr q23, [x3, #176] //load h7l | h7h 5822 ext v23.16b, v23.16b, v23.16b, #8 5823 ldr q25, [x3, #208] //load h8l | h8h 5824 ext v25.16b, v25.16b, v25.16b, #8 5825 5826 aese v3.16b, v26.16b 5827 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 5828 aese v5.16b, v26.16b 5829 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 5830 rev32 v7.16b, v30.16b //CTR block 8k+15 5831 5832 aese v0.16b, v26.16b 5833 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 5834 aese v1.16b, v26.16b 5835 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 5836 aese v6.16b, v26.16b 5837 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 5838 5839 aese v7.16b, v26.16b 5840 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 5841 aese v2.16b, v26.16b 5842 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 5843 aese v4.16b, v26.16b 5844 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 5845 5846 ldp q28, q26, [x8, #32] //load rk2, rk3 5847 eor v8.16b, v8.16b, v19.16b //PRE 1 5848 aese v6.16b, v27.16b 5849 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 5850 5851 aese v2.16b, v27.16b 5852 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 5853 aese v1.16b, v27.16b 5854 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 5855 aese v0.16b, v27.16b 5856 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 5857 5858 aese v4.16b, v27.16b 5859 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 5860 aese v3.16b, v27.16b 5861 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 5862 aese v5.16b, v27.16b 5863 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 5864 5865 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 5866 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 5867 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 5868 5869 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 5870 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 5871 aese v7.16b, v27.16b 5872 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 5873 5874 aese v1.16b, v28.16b 5875 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 5876 aese v5.16b, v28.16b 5877 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 5878 aese v6.16b, v28.16b 5879 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 5880 5881 aese v2.16b, v28.16b 5882 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 5883 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 5884 aese v4.16b, v28.16b 5885 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 5886 5887 aese v5.16b, v26.16b 5888 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 5889 aese v6.16b, v26.16b 5890 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 5891 aese v0.16b, v28.16b 5892 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 5893 5894 aese v1.16b, v26.16b 5895 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 5896 aese v7.16b, v28.16b 5897 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 5898 aese v3.16b, v28.16b 5899 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 5900 5901 aese v4.16b, v26.16b 5902 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 5903 rev64 v14.16b, v14.16b //GHASH block 8k+6 5904 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 5905 5906 aese v3.16b, v26.16b 5907 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 5908 ldp q27, q28, [x8, #64] //load rk4, rk5 5909 rev64 v10.16b, v10.16b //GHASH block 8k+2 5910 5911 aese v2.16b, v26.16b 5912 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 5913 aese v7.16b, v26.16b 5914 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 5915 aese v0.16b, v26.16b 5916 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 5917 5918 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 5919 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 5920 rev64 v13.16b, v13.16b //GHASH block 8k+5 5921 5922 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 5923 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 5924 ldr q23, [x3, #80] //load h3l | h3h 5925 ext v23.16b, v23.16b, v23.16b, #8 5926 ldr q25, [x3, #112] //load h4l | h4h 5927 ext v25.16b, v25.16b, v25.16b, #8 5928 5929 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 5930 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 5931 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 5932 5933 aese v4.16b, v27.16b 5934 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 5935 aese v1.16b, v27.16b 5936 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 5937 aese v5.16b, v27.16b 5938 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 5939 5940 aese v7.16b, v27.16b 5941 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 5942 aese v3.16b, v27.16b 5943 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 5944 aese v2.16b, v27.16b 5945 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 5946 5947 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 5948 aese v6.16b, v27.16b 5949 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 5950 aese v0.16b, v27.16b 5951 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 5952 5953 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 5954 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 5955 ldp q26, q27, [x8, #96] //load rk6, rk7 5956 5957 aese v5.16b, v28.16b 5958 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 5959 aese v7.16b, v28.16b 5960 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 5961 aese v4.16b, v28.16b 5962 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 5963 5964 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 5965 aese v2.16b, v28.16b 5966 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 5967 rev64 v15.16b, v15.16b //GHASH block 8k+7 5968 5969 aese v3.16b, v28.16b 5970 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 5971 aese v6.16b, v28.16b 5972 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 5973 aese v1.16b, v28.16b 5974 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 5975 5976 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 5977 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 5978 aese v0.16b, v28.16b 5979 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 5980 5981 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 5982 aese v4.16b, v26.16b 5983 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 5984 aese v2.16b, v26.16b 5985 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 5986 5987 aese v6.16b, v26.16b 5988 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 5989 aese v1.16b, v26.16b 5990 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 5991 aese v7.16b, v26.16b 5992 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 5993 5994 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 5995 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 5996 aese v5.16b, v26.16b 5997 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 5998 5999 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 6000 aese v3.16b, v26.16b 6001 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 6002 aese v0.16b, v26.16b 6003 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 6004 6005 ldp q28, q26, [x8, #128] //load rk8, rk9 6006 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 6007 aese v5.16b, v27.16b 6008 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 6009 6010 ldr q20, [x3, #32] //load h1l | h1h 6011 ext v20.16b, v20.16b, v20.16b, #8 6012 ldr q22, [x3, #64] //load h2l | h2h 6013 ext v22.16b, v22.16b, v22.16b, #8 6014 aese v2.16b, v27.16b 6015 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 6016 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 6017 6018 ldr q21, [x3, #48] //load h2k | h1k 6019 ldr q24, [x3, #96] //load h4k | h3k 6020 aese v6.16b, v27.16b 6021 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 6022 aese v3.16b, v27.16b 6023 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 6024 6025 aese v0.16b, v27.16b 6026 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 6027 aese v7.16b, v27.16b 6028 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 6029 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 6030 6031 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 6032 aese v4.16b, v27.16b 6033 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 6034 aese v1.16b, v27.16b 6035 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 6036 6037 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 6038 aese v7.16b, v28.16b 6039 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 6040 aese v0.16b, v28.16b 6041 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 6042 6043 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 6044 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6045 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6046 6047 aese v3.16b, v28.16b 6048 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 6049 aese v0.16b, v26.16b 6050 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 6051 aese v1.16b, v28.16b 6052 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 6053 6054 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 6055 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 6056 aese v2.16b, v28.16b 6057 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 6058 6059 aese v5.16b, v28.16b 6060 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 6061 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 6062 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 6063 6064 aese v6.16b, v28.16b 6065 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 6066 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6067 aese v4.16b, v28.16b 6068 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 6069 6070 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6071 aese v7.16b, v26.16b 6072 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 6073 aese v5.16b, v26.16b 6074 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 6075 6076 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6077 aese v6.16b, v26.16b 6078 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 6079 aese v4.16b, v26.16b 6080 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 6081 6082 ldp q27, q28, [x8, #160] //load rk10, rk11 6083 aese v2.16b, v26.16b 6084 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 6085 aese v3.16b, v26.16b 6086 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 6087 6088 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 6089 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 6090 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 6091 6092 ldr d16, [x10] //MODULO - load modulo constant 6093 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 6094 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 6095 6096 aese v1.16b, v26.16b 6097 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 6098 6099 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6100 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 6101 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 6102 6103 aese v4.16b, v27.16b 6104 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 6105 aese v3.16b, v27.16b 6106 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 6107 aese v5.16b, v27.16b 6108 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 6109 6110 aese v0.16b, v27.16b 6111 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 6112 aese v2.16b, v27.16b 6113 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 6114 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 6115 6116 aese v1.16b, v27.16b 6117 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 6118 aese v7.16b, v27.16b 6119 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 6120 aese v6.16b, v27.16b 6121 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 6122 6123 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 6124 6125 ldp q26, q27, [x8, #192] //load rk12, rk13 6126 rev32 v20.16b, v30.16b //CTR block 8k+16 6127 6128 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 6129 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext 6130 aese v2.16b, v28.16b 6131 aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 6132 6133 aese v6.16b, v28.16b 6134 aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 6135 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 6136 aese v3.16b, v28.16b 6137 aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 6138 6139 aese v0.16b, v28.16b 6140 aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 6141 aese v7.16b, v28.16b 6142 aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 6143 6144 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 6145 aese v1.16b, v28.16b 6146 aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 6147 6148 aese v7.16b, v26.16b 6149 aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 6150 aese v5.16b, v28.16b 6151 aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 6152 6153 aese v3.16b, v26.16b 6154 aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 6155 aese v6.16b, v26.16b 6156 aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 6157 rev32 v22.16b, v30.16b //CTR block 8k+17 6158 6159 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 6160 aese v4.16b, v28.16b 6161 aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 6162 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 6163 6164 aese v5.16b, v26.16b 6165 aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 6166 ldr q28, [x8, #224] //load rk14 6167 aese v7.16b, v27.16b //AES block 8k+15 - round 13 6168 6169 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext 6170 aese v2.16b, v26.16b 6171 aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 6172 aese v4.16b, v26.16b 6173 aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 6174 6175 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 6176 aese v1.16b, v26.16b 6177 aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 6178 ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 6179 6180 ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 6181 aese v2.16b, v27.16b //AES block 8k+10 - round 13 6182 aese v4.16b, v27.16b //AES block 8k+12 - round 13 6183 6184 rev32 v23.16b, v30.16b //CTR block 8k+18 6185 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 6186 aese v5.16b, v27.16b //AES block 8k+13 - round 13 6187 6188 aese v0.16b, v26.16b 6189 aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 6190 aese v3.16b, v27.16b //AES block 8k+11 - round 13 6191 cmp x0, x5 //.LOOP CONTROL 6192 6193 .inst 0xce02714a //eor3 v10.16b, v10.16b, v2.16b, v28.16b //AES block 8k+10 - result 6194 rev32 v25.16b, v30.16b //CTR block 8k+19 6195 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 6196 6197 aese v0.16b, v27.16b //AES block 8k+8 - round 13 6198 aese v6.16b, v27.16b //AES block 8k+14 - round 13 6199 .inst 0xce0571ad //eor3 v13.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result 6200 6201 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 6202 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 6203 aese v1.16b, v27.16b //AES block 8k+9 - round 13 6204 6205 .inst 0xce04718c //eor3 v12.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result 6206 rev32 v4.16b, v30.16b //CTR block 8k+20 6207 .inst 0xce03716b //eor3 v11.16b, v11.16b, v3.16b, v28.16b //AES block 8k+11 - result 6208 6209 mov v3.16b, v25.16b //CTR block 8k+19 6210 .inst 0xce017129 //eor3 v9.16b, v9.16b, v1.16b, v28.16b //AES block 8k+9 - result 6211 .inst 0xce007108 //eor3 v8.16b, v8.16b, v0.16b, v28.16b //AES block 8k+8 - result 6212 6213 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 6214 stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result 6215 mov v2.16b, v23.16b //CTR block 8k+18 6216 6217 .inst 0xce0771ef //eor3 v15.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result 6218 .inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 6219 stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result 6220 6221 .inst 0xce0671ce //eor3 v14.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result 6222 mov v1.16b, v22.16b //CTR block 8k+17 6223 stp q12, q13, [x2], #32 //AES block 4, 5 - store result 6224 6225 stp q14, q15, [x2], #32 //AES block 6, 7 - store result 6226 mov v0.16b, v20.16b //CTR block 8k+16 6227 b.lt .L256_enc_main_loop 6228 6229 .L256_enc_prepretail: //PREPRETAIL 6230 rev32 v5.16b, v30.16b //CTR block 8k+13 6231 ldp q26, q27, [x8, #0] //load rk0, rk1 6232 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 6233 6234 rev64 v10.16b, v10.16b //GHASH block 8k+2 6235 6236 rev32 v6.16b, v30.16b //CTR block 8k+14 6237 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 6238 6239 rev64 v13.16b, v13.16b //GHASH block 8k+5 6240 ldr q21, [x3, #144] //load h6k | h5k 6241 ldr q24, [x3, #192] //load h8k | h7k 6242 6243 rev32 v7.16b, v30.16b //CTR block 8k+15 6244 6245 aese v6.16b, v26.16b 6246 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 6247 aese v4.16b, v26.16b 6248 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 6249 aese v1.16b, v26.16b 6250 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 6251 6252 aese v5.16b, v26.16b 6253 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 6254 aese v0.16b, v26.16b 6255 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 6256 6257 aese v2.16b, v26.16b 6258 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 6259 aese v7.16b, v26.16b 6260 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 6261 aese v3.16b, v26.16b 6262 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 6263 6264 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 6265 rev64 v8.16b, v8.16b //GHASH block 8k 6266 aese v1.16b, v27.16b 6267 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 6268 6269 rev64 v9.16b, v9.16b //GHASH block 8k+1 6270 ldp q28, q26, [x8, #32] //load rk2, rk3 6271 aese v3.16b, v27.16b 6272 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 6273 6274 ldr q23, [x3, #176] //load h7l | h7h 6275 ext v23.16b, v23.16b, v23.16b, #8 6276 ldr q25, [x3, #208] //load h8l | h8h 6277 ext v25.16b, v25.16b, v25.16b, #8 6278 aese v2.16b, v27.16b 6279 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 6280 6281 ldr q20, [x3, #128] //load h5l | h5h 6282 ext v20.16b, v20.16b, v20.16b, #8 6283 ldr q22, [x3, #160] //load h6l | h6h 6284 ext v22.16b, v22.16b, v22.16b, #8 6285 aese v0.16b, v27.16b 6286 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 6287 aese v5.16b, v27.16b 6288 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 6289 6290 aese v4.16b, v27.16b 6291 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 6292 eor v8.16b, v8.16b, v19.16b //PRE 1 6293 6294 rev64 v11.16b, v11.16b //GHASH block 8k+3 6295 aese v6.16b, v27.16b 6296 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 6297 6298 aese v1.16b, v28.16b 6299 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 6300 aese v2.16b, v28.16b 6301 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 6302 aese v7.16b, v27.16b 6303 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 6304 6305 aese v4.16b, v28.16b 6306 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 6307 aese v0.16b, v28.16b 6308 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 6309 aese v6.16b, v28.16b 6310 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 6311 6312 aese v5.16b, v28.16b 6313 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 6314 aese v7.16b, v28.16b 6315 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 6316 aese v3.16b, v28.16b 6317 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 6318 6319 ldp q27, q28, [x8, #64] //load rk4, rk5 6320 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 6321 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 6322 6323 rev64 v14.16b, v14.16b //GHASH block 8k+6 6324 aese v4.16b, v26.16b 6325 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 6326 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 6327 6328 aese v7.16b, v26.16b 6329 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 6330 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 6331 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 6332 6333 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 6334 aese v6.16b, v26.16b 6335 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 6336 6337 aese v2.16b, v26.16b 6338 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 6339 aese v3.16b, v26.16b 6340 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 6341 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 6342 6343 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 6344 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 6345 aese v1.16b, v26.16b 6346 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 6347 6348 aese v0.16b, v26.16b 6349 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 6350 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 6351 aese v5.16b, v26.16b 6352 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 6353 6354 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 6355 aese v1.16b, v27.16b 6356 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 6357 aese v6.16b, v27.16b 6358 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 6359 6360 aese v0.16b, v27.16b 6361 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 6362 aese v2.16b, v27.16b 6363 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 6364 aese v4.16b, v27.16b 6365 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 6366 6367 aese v6.16b, v28.16b 6368 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 6369 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 6370 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 6371 6372 aese v7.16b, v27.16b 6373 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 6374 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 6375 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 6376 6377 aese v5.16b, v27.16b 6378 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 6379 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 6380 aese v3.16b, v27.16b 6381 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 6382 6383 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 6384 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 6385 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 6386 6387 rev64 v12.16b, v12.16b //GHASH block 8k+4 6388 aese v1.16b, v28.16b 6389 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 6390 aese v0.16b, v28.16b 6391 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 6392 6393 aese v7.16b, v28.16b 6394 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 6395 aese v4.16b, v28.16b 6396 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 6397 ldp q26, q27, [x8, #96] //load rk6, rk7 6398 6399 ldr q23, [x3, #80] //load h3l | h3h 6400 ext v23.16b, v23.16b, v23.16b, #8 6401 ldr q25, [x3, #112] //load h4l | h4h 6402 ext v25.16b, v25.16b, v25.16b, #8 6403 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 6404 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 6405 6406 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 6407 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 6408 6409 aese v5.16b, v28.16b 6410 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 6411 rev64 v15.16b, v15.16b //GHASH block 8k+7 6412 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 6413 6414 aese v3.16b, v28.16b 6415 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 6416 aese v2.16b, v28.16b 6417 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 6418 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 6419 6420 aese v7.16b, v26.16b 6421 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 6422 aese v4.16b, v26.16b 6423 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 6424 aese v6.16b, v26.16b 6425 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 6426 6427 ldr q21, [x3, #48] //load h2k | h1k 6428 ldr q24, [x3, #96] //load h4k | h3k 6429 aese v5.16b, v26.16b 6430 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 6431 aese v3.16b, v26.16b 6432 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 6433 6434 aese v0.16b, v26.16b 6435 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 6436 aese v1.16b, v26.16b 6437 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 6438 aese v2.16b, v26.16b 6439 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 6440 6441 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 6442 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 6443 ldr q20, [x3, #32] //load h1l | h1h 6444 ext v20.16b, v20.16b, v20.16b, #8 6445 ldr q22, [x3, #64] //load h2l | h2h 6446 ext v22.16b, v22.16b, v22.16b, #8 6447 6448 ldp q28, q26, [x8, #128] //load rk8, rk9 6449 aese v1.16b, v27.16b 6450 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 6451 aese v4.16b, v27.16b 6452 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 6453 6454 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 6455 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 6456 6457 aese v5.16b, v27.16b 6458 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 6459 aese v6.16b, v27.16b 6460 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 6461 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 6462 6463 aese v7.16b, v27.16b 6464 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 6465 aese v3.16b, v27.16b 6466 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 6467 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6468 6469 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 6470 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 6471 aese v2.16b, v27.16b 6472 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 6473 6474 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6475 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6476 aese v0.16b, v27.16b 6477 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 6478 6479 aese v7.16b, v28.16b 6480 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 6481 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 6482 aese v2.16b, v28.16b 6483 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 6484 6485 aese v6.16b, v28.16b 6486 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 6487 aese v4.16b, v28.16b 6488 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 6489 aese v3.16b, v28.16b 6490 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 6491 6492 aese v5.16b, v28.16b 6493 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 6494 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6495 aese v0.16b, v28.16b 6496 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 6497 6498 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 6499 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 6500 aese v1.16b, v28.16b 6501 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 6502 6503 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 6504 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 6505 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 6506 6507 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 6508 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6509 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 6510 6511 ldp q27, q28, [x8, #160] //load rk10, rk11 6512 aese v1.16b, v26.16b 6513 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 6514 aese v0.16b, v26.16b 6515 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 6516 6517 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 6518 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6519 ldr d16, [x10] //MODULO - load modulo constant 6520 6521 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 6522 6523 aese v3.16b, v26.16b 6524 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 6525 aese v7.16b, v26.16b 6526 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 6527 aese v5.16b, v26.16b 6528 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 6529 6530 aese v2.16b, v26.16b 6531 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 6532 aese v6.16b, v26.16b 6533 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 6534 6535 aese v5.16b, v27.16b 6536 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 6537 aese v1.16b, v27.16b 6538 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 6539 aese v4.16b, v26.16b 6540 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 6541 6542 aese v7.16b, v27.16b 6543 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 6544 aese v6.16b, v27.16b 6545 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 6546 aese v3.16b, v27.16b 6547 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 6548 6549 aese v4.16b, v27.16b 6550 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 6551 aese v0.16b, v27.16b 6552 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 6553 aese v2.16b, v27.16b 6554 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 6555 6556 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 6557 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 6558 aese v7.16b, v28.16b 6559 aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 6560 6561 ldp q26, q27, [x8, #192] //load rk12, rk13 6562 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 6563 aese v2.16b, v28.16b 6564 aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 6565 6566 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 6567 aese v1.16b, v28.16b 6568 aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 6569 aese v6.16b, v28.16b 6570 aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 6571 6572 aese v0.16b, v28.16b 6573 aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 6574 aese v4.16b, v28.16b 6575 aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 6576 aese v5.16b, v28.16b 6577 aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 6578 6579 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 6580 aese v3.16b, v28.16b 6581 aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 6582 ldr q28, [x8, #224] //load rk14 6583 6584 aese v1.16b, v26.16b 6585 aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 6586 aese v2.16b, v26.16b 6587 aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 6588 aese v0.16b, v26.16b 6589 aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 6590 6591 aese v6.16b, v26.16b 6592 aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 6593 aese v5.16b, v26.16b 6594 aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 6595 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 6596 6597 aese v4.16b, v26.16b 6598 aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 6599 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 6600 6601 aese v3.16b, v26.16b 6602 aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 6603 aese v7.16b, v26.16b 6604 aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 6605 aese v0.16b, v27.16b //AES block 8k+8 - round 13 6606 6607 .inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 6608 aese v5.16b, v27.16b //AES block 8k+13 - round 13 6609 aese v1.16b, v27.16b //AES block 8k+9 - round 13 6610 6611 aese v3.16b, v27.16b //AES block 8k+11 - round 13 6612 aese v4.16b, v27.16b //AES block 8k+12 - round 13 6613 aese v7.16b, v27.16b //AES block 8k+15 - round 13 6614 6615 aese v2.16b, v27.16b //AES block 8k+10 - round 13 6616 aese v6.16b, v27.16b //AES block 8k+14 - round 13 6617 .L256_enc_tail: //TAIL 6618 6619 ldp q24, q25, [x3, #192] //load h8l | h8h 6620 ext v25.16b, v25.16b, v25.16b, #8 6621 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 6622 6623 ldr q8, [x0], #16 //AES block 8k+8 - load plaintext 6624 6625 ldp q20, q21, [x3, #128] //load h5l | h5h 6626 ext v20.16b, v20.16b, v20.16b, #8 6627 6628 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 6629 ldp q22, q23, [x3, #160] //load h6l | h6h 6630 ext v22.16b, v22.16b, v22.16b, #8 6631 ext v23.16b, v23.16b, v23.16b, #8 6632 mov v29.16b, v28.16b 6633 6634 cmp x5, #112 6635 .inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result 6636 b.gt .L256_enc_blocks_more_than_7 6637 6638 movi v19.8b, #0 6639 mov v7.16b, v6.16b 6640 movi v17.8b, #0 6641 6642 mov v6.16b, v5.16b 6643 mov v5.16b, v4.16b 6644 mov v4.16b, v3.16b 6645 6646 mov v3.16b, v2.16b 6647 sub v30.4s, v30.4s, v31.4s 6648 mov v2.16b, v1.16b 6649 6650 movi v18.8b, #0 6651 cmp x5, #96 6652 b.gt .L256_enc_blocks_more_than_6 6653 6654 mov v7.16b, v6.16b 6655 mov v6.16b, v5.16b 6656 cmp x5, #80 6657 6658 mov v5.16b, v4.16b 6659 mov v4.16b, v3.16b 6660 mov v3.16b, v1.16b 6661 6662 sub v30.4s, v30.4s, v31.4s 6663 b.gt .L256_enc_blocks_more_than_5 6664 6665 mov v7.16b, v6.16b 6666 sub v30.4s, v30.4s, v31.4s 6667 6668 mov v6.16b, v5.16b 6669 mov v5.16b, v4.16b 6670 6671 cmp x5, #64 6672 mov v4.16b, v1.16b 6673 b.gt .L256_enc_blocks_more_than_4 6674 6675 cmp x5, #48 6676 mov v7.16b, v6.16b 6677 mov v6.16b, v5.16b 6678 6679 mov v5.16b, v1.16b 6680 sub v30.4s, v30.4s, v31.4s 6681 b.gt .L256_enc_blocks_more_than_3 6682 6683 cmp x5, #32 6684 mov v7.16b, v6.16b 6685 ldr q24, [x3, #96] //load h4k | h3k 6686 6687 mov v6.16b, v1.16b 6688 sub v30.4s, v30.4s, v31.4s 6689 b.gt .L256_enc_blocks_more_than_2 6690 6691 mov v7.16b, v1.16b 6692 6693 sub v30.4s, v30.4s, v31.4s 6694 cmp x5, #16 6695 b.gt .L256_enc_blocks_more_than_1 6696 6697 sub v30.4s, v30.4s, v31.4s 6698 ldr q21, [x3, #48] //load h2k | h1k 6699 b .L256_enc_blocks_less_than_1 6700 .L256_enc_blocks_more_than_7: //blocks left > 7 6701 st1 { v9.16b}, [x2], #16 //AES final-7 block - store result 6702 6703 rev64 v8.16b, v9.16b //GHASH final-7 block 6704 6705 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6706 6707 ldr q9, [x0], #16 //AES final-6 block - load plaintext 6708 6709 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 6710 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 6711 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 6712 6713 movi v16.8b, #0 //suppress further partial tag feed in 6714 6715 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 6716 .inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 6717 6718 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 6719 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 6720 .L256_enc_blocks_more_than_6: //blocks left > 6 6721 6722 st1 { v9.16b}, [x2], #16 //AES final-6 block - store result 6723 6724 rev64 v8.16b, v9.16b //GHASH final-6 block 6725 6726 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6727 6728 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 6729 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 6730 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 6731 6732 ldr q9, [x0], #16 //AES final-5 block - load plaintext 6733 6734 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 6735 6736 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 6737 6738 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 6739 .inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 6740 6741 movi v16.8b, #0 //suppress further partial tag feed in 6742 6743 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 6744 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 6745 .L256_enc_blocks_more_than_5: //blocks left > 5 6746 6747 st1 { v9.16b}, [x2], #16 //AES final-5 block - store result 6748 6749 rev64 v8.16b, v9.16b //GHASH final-5 block 6750 6751 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6752 6753 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 6754 6755 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 6756 6757 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 6758 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 6759 6760 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 6761 6762 ldr q9, [x0], #16 //AES final-4 block - load plaintext 6763 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 6764 6765 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 6766 movi v16.8b, #0 //suppress further partial tag feed in 6767 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 6768 6769 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 6770 .inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 6771 .L256_enc_blocks_more_than_4: //blocks left > 4 6772 6773 st1 { v9.16b}, [x2], #16 //AES final-4 block - store result 6774 6775 rev64 v8.16b, v9.16b //GHASH final-4 block 6776 6777 ldr q9, [x0], #16 //AES final-3 block - load plaintext 6778 6779 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6780 6781 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 6782 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 6783 6784 .inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 6785 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 6786 6787 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 6788 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 6789 6790 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 6791 6792 movi v16.8b, #0 //suppress further partial tag feed in 6793 6794 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 6795 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 6796 .L256_enc_blocks_more_than_3: //blocks left > 3 6797 6798 st1 { v9.16b}, [x2], #16 //AES final-3 block - store result 6799 6800 ldr q25, [x3, #112] //load h4l | h4h 6801 ext v25.16b, v25.16b, v25.16b, #8 6802 rev64 v8.16b, v9.16b //GHASH final-3 block 6803 6804 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6805 6806 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 6807 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 6808 6809 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 6810 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 6811 ldr q24, [x3, #96] //load h4k | h3k 6812 6813 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 6814 ldr q9, [x0], #16 //AES final-2 block - load plaintext 6815 6816 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 6817 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 6818 6819 .inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 6820 movi v16.8b, #0 //suppress further partial tag feed in 6821 6822 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 6823 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 6824 .L256_enc_blocks_more_than_2: //blocks left > 2 6825 6826 ldr q23, [x3, #80] //load h3l | h3h 6827 ext v23.16b, v23.16b, v23.16b, #8 6828 6829 st1 { v9.16b}, [x2], #16 //AES final-2 block - store result 6830 6831 rev64 v8.16b, v9.16b //GHASH final-2 block 6832 ldr q9, [x0], #16 //AES final-1 block - load plaintext 6833 6834 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6835 6836 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 6837 6838 movi v16.8b, #0 //suppress further partial tag feed in 6839 6840 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 6841 .inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 6842 6843 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 6844 6845 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 6846 6847 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 6848 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 6849 6850 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 6851 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 6852 .L256_enc_blocks_more_than_1: //blocks left > 1 6853 6854 st1 { v9.16b}, [x2], #16 //AES final-1 block - store result 6855 6856 ldr q22, [x3, #64] //load h2l | h2h 6857 ext v22.16b, v22.16b, v22.16b, #8 6858 rev64 v8.16b, v9.16b //GHASH final-1 block 6859 ldr q9, [x0], #16 //AES final block - load plaintext 6860 6861 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6862 movi v16.8b, #0 //suppress further partial tag feed in 6863 6864 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 6865 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 6866 6867 .inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result 6868 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 6869 6870 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 6871 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 6872 6873 ldr q21, [x3, #48] //load h2k | h1k 6874 6875 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 6876 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 6877 6878 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 6879 6880 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 6881 .L256_enc_blocks_less_than_1: //blocks left <= 1 6882 6883 and x1, x1, #127 //bit_length %= 128 6884 6885 sub x1, x1, #128 //bit_length -= 128 6886 6887 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 6888 6889 mvn x6, xzr //temp0_x = 0xffffffffffffffff 6890 and x1, x1, #127 //bit_length %= 128 6891 6892 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 6893 cmp x1, #64 6894 mvn x7, xzr //temp1_x = 0xffffffffffffffff 6895 6896 csel x14, x6, xzr, lt 6897 csel x13, x7, x6, lt 6898 6899 mov v0.d[0], x13 //ctr0b is mask for last block 6900 ldr q20, [x3, #32] //load h1l | h1h 6901 ext v20.16b, v20.16b, v20.16b, #8 6902 6903 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 6904 mov v0.d[1], x14 6905 6906 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 6907 6908 rev64 v8.16b, v9.16b //GHASH final block 6909 6910 rev32 v30.16b, v30.16b 6911 bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 6912 str q30, [x16] //store the updated counter 6913 6914 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6915 st1 { v9.16b}, [x2] //store all 16B 6916 6917 ins v16.d[0], v8.d[1] //GHASH final block - mid 6918 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 6919 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 6920 6921 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 6922 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 6923 6924 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 6925 6926 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 6927 6928 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 6929 ldr d16, [x10] //MODULO - load modulo constant 6930 6931 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 6932 6933 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 6934 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 6935 6936 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 6937 6938 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 6939 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 6940 6941 .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 6942 ext v19.16b, v19.16b, v19.16b, #8 6943 rev64 v19.16b, v19.16b 6944 st1 { v19.16b }, [x3] 6945 mov x0, x9 //return sizes 6946 6947 ldp d10, d11, [sp, #16] 6948 ldp d12, d13, [sp, #32] 6949 ldp d14, d15, [sp, #48] 6950 ldp d8, d9, [sp], #80 6951 ret 6952 6953 .L256_enc_ret: 6954 mov w0, #0x0 6955 ret 6956 .size unroll8_eor3_aes_gcm_enc_256_kernel,.-unroll8_eor3_aes_gcm_enc_256_kernel 6957 .globl unroll8_eor3_aes_gcm_dec_256_kernel 6958 .type unroll8_eor3_aes_gcm_dec_256_kernel,%function 6959 .align 4 6960 unroll8_eor3_aes_gcm_dec_256_kernel: 6961 AARCH64_VALID_CALL_TARGET 6962 cbz x1, .L256_dec_ret 6963 stp d8, d9, [sp, #-80]! 6964 lsr x9, x1, #3 6965 mov x16, x4 6966 mov x8, x5 6967 stp d10, d11, [sp, #16] 6968 stp d12, d13, [sp, #32] 6969 stp d14, d15, [sp, #48] 6970 mov x5, #0xc200000000000000 6971 stp x5, xzr, [sp, #64] 6972 add x10, sp, #64 6973 6974 ld1 { v0.16b}, [x16] //CTR block 0 6975 6976 mov x15, #0x100000000 //set up counter increment 6977 movi v31.16b, #0x0 6978 mov v31.d[1], x15 6979 mov x5, x9 6980 6981 sub x5, x5, #1 //byte_len - 1 6982 6983 rev32 v30.16b, v0.16b //set up reversed counter 6984 6985 add v30.4s, v30.4s, v31.4s //CTR block 0 6986 6987 rev32 v1.16b, v30.16b //CTR block 1 6988 add v30.4s, v30.4s, v31.4s //CTR block 1 6989 6990 rev32 v2.16b, v30.16b //CTR block 2 6991 add v30.4s, v30.4s, v31.4s //CTR block 2 6992 ldp q26, q27, [x8, #0] //load rk0, rk1 6993 6994 rev32 v3.16b, v30.16b //CTR block 3 6995 add v30.4s, v30.4s, v31.4s //CTR block 3 6996 6997 rev32 v4.16b, v30.16b //CTR block 4 6998 add v30.4s, v30.4s, v31.4s //CTR block 4 6999 7000 aese v0.16b, v26.16b 7001 aesmc v0.16b, v0.16b //AES block 0 - round 0 7002 7003 rev32 v5.16b, v30.16b //CTR block 5 7004 add v30.4s, v30.4s, v31.4s //CTR block 5 7005 7006 aese v1.16b, v26.16b 7007 aesmc v1.16b, v1.16b //AES block 1 - round 0 7008 aese v2.16b, v26.16b 7009 aesmc v2.16b, v2.16b //AES block 2 - round 0 7010 7011 rev32 v6.16b, v30.16b //CTR block 6 7012 add v30.4s, v30.4s, v31.4s //CTR block 6 7013 7014 rev32 v7.16b, v30.16b //CTR block 7 7015 aese v4.16b, v26.16b 7016 aesmc v4.16b, v4.16b //AES block 4 - round 0 7017 7018 aese v6.16b, v26.16b 7019 aesmc v6.16b, v6.16b //AES block 6 - round 0 7020 aese v5.16b, v26.16b 7021 aesmc v5.16b, v5.16b //AES block 5 - round 0 7022 7023 aese v3.16b, v26.16b 7024 aesmc v3.16b, v3.16b //AES block 3 - round 0 7025 aese v7.16b, v26.16b 7026 aesmc v7.16b, v7.16b //AES block 7 - round 0 7027 ldp q28, q26, [x8, #32] //load rk2, rk3 7028 7029 aese v6.16b, v27.16b 7030 aesmc v6.16b, v6.16b //AES block 6 - round 1 7031 aese v4.16b, v27.16b 7032 aesmc v4.16b, v4.16b //AES block 4 - round 1 7033 aese v0.16b, v27.16b 7034 aesmc v0.16b, v0.16b //AES block 0 - round 1 7035 7036 aese v5.16b, v27.16b 7037 aesmc v5.16b, v5.16b //AES block 5 - round 1 7038 aese v7.16b, v27.16b 7039 aesmc v7.16b, v7.16b //AES block 7 - round 1 7040 aese v1.16b, v27.16b 7041 aesmc v1.16b, v1.16b //AES block 1 - round 1 7042 7043 aese v2.16b, v27.16b 7044 aesmc v2.16b, v2.16b //AES block 2 - round 1 7045 aese v3.16b, v27.16b 7046 aesmc v3.16b, v3.16b //AES block 3 - round 1 7047 7048 aese v3.16b, v28.16b 7049 aesmc v3.16b, v3.16b //AES block 3 - round 2 7050 aese v2.16b, v28.16b 7051 aesmc v2.16b, v2.16b //AES block 2 - round 2 7052 aese v6.16b, v28.16b 7053 aesmc v6.16b, v6.16b //AES block 6 - round 2 7054 7055 aese v1.16b, v28.16b 7056 aesmc v1.16b, v1.16b //AES block 1 - round 2 7057 aese v7.16b, v28.16b 7058 aesmc v7.16b, v7.16b //AES block 7 - round 2 7059 aese v5.16b, v28.16b 7060 aesmc v5.16b, v5.16b //AES block 5 - round 2 7061 7062 aese v0.16b, v28.16b 7063 aesmc v0.16b, v0.16b //AES block 0 - round 2 7064 aese v4.16b, v28.16b 7065 aesmc v4.16b, v4.16b //AES block 4 - round 2 7066 ldp q27, q28, [x8, #64] //load rk4, rk5 7067 7068 aese v1.16b, v26.16b 7069 aesmc v1.16b, v1.16b //AES block 1 - round 3 7070 aese v2.16b, v26.16b 7071 aesmc v2.16b, v2.16b //AES block 2 - round 3 7072 7073 aese v3.16b, v26.16b 7074 aesmc v3.16b, v3.16b //AES block 3 - round 3 7075 aese v4.16b, v26.16b 7076 aesmc v4.16b, v4.16b //AES block 4 - round 3 7077 7078 aese v5.16b, v26.16b 7079 aesmc v5.16b, v5.16b //AES block 5 - round 3 7080 aese v7.16b, v26.16b 7081 aesmc v7.16b, v7.16b //AES block 7 - round 3 7082 aese v0.16b, v26.16b 7083 aesmc v0.16b, v0.16b //AES block 0 - round 3 7084 7085 aese v6.16b, v26.16b 7086 aesmc v6.16b, v6.16b //AES block 6 - round 3 7087 7088 aese v7.16b, v27.16b 7089 aesmc v7.16b, v7.16b //AES block 7 - round 4 7090 aese v3.16b, v27.16b 7091 aesmc v3.16b, v3.16b //AES block 3 - round 4 7092 7093 aese v6.16b, v27.16b 7094 aesmc v6.16b, v6.16b //AES block 6 - round 4 7095 aese v2.16b, v27.16b 7096 aesmc v2.16b, v2.16b //AES block 2 - round 4 7097 aese v0.16b, v27.16b 7098 aesmc v0.16b, v0.16b //AES block 0 - round 4 7099 7100 aese v4.16b, v27.16b 7101 aesmc v4.16b, v4.16b //AES block 4 - round 4 7102 aese v1.16b, v27.16b 7103 aesmc v1.16b, v1.16b //AES block 1 - round 4 7104 aese v5.16b, v27.16b 7105 aesmc v5.16b, v5.16b //AES block 5 - round 4 7106 7107 aese v0.16b, v28.16b 7108 aesmc v0.16b, v0.16b //AES block 0 - round 5 7109 aese v6.16b, v28.16b 7110 aesmc v6.16b, v6.16b //AES block 6 - round 5 7111 7112 ldp q26, q27, [x8, #96] //load rk6, rk7 7113 aese v4.16b, v28.16b 7114 aesmc v4.16b, v4.16b //AES block 4 - round 5 7115 aese v7.16b, v28.16b 7116 aesmc v7.16b, v7.16b //AES block 7 - round 5 7117 7118 aese v5.16b, v28.16b 7119 aesmc v5.16b, v5.16b //AES block 5 - round 5 7120 7121 aese v2.16b, v28.16b 7122 aesmc v2.16b, v2.16b //AES block 2 - round 5 7123 aese v3.16b, v28.16b 7124 aesmc v3.16b, v3.16b //AES block 3 - round 5 7125 7126 aese v1.16b, v28.16b 7127 aesmc v1.16b, v1.16b //AES block 1 - round 5 7128 7129 aese v4.16b, v26.16b 7130 aesmc v4.16b, v4.16b //AES block 4 - round 6 7131 aese v3.16b, v26.16b 7132 aesmc v3.16b, v3.16b //AES block 3 - round 6 7133 aese v7.16b, v26.16b 7134 aesmc v7.16b, v7.16b //AES block 7 - round 6 7135 7136 aese v6.16b, v26.16b 7137 aesmc v6.16b, v6.16b //AES block 6 - round 6 7138 aese v0.16b, v26.16b 7139 aesmc v0.16b, v0.16b //AES block 0 - round 6 7140 aese v5.16b, v26.16b 7141 aesmc v5.16b, v5.16b //AES block 5 - round 6 7142 7143 aese v2.16b, v26.16b 7144 aesmc v2.16b, v2.16b //AES block 2 - round 6 7145 aese v1.16b, v26.16b 7146 aesmc v1.16b, v1.16b //AES block 1 - round 6 7147 ldp q28, q26, [x8, #128] //load rk8, rk9 7148 7149 aese v5.16b, v27.16b 7150 aesmc v5.16b, v5.16b //AES block 5 - round 7 7151 aese v0.16b, v27.16b 7152 aesmc v0.16b, v0.16b //AES block 0 - round 7 7153 7154 aese v3.16b, v27.16b 7155 aesmc v3.16b, v3.16b //AES block 3 - round 7 7156 aese v2.16b, v27.16b 7157 aesmc v2.16b, v2.16b //AES block 2 - round 7 7158 aese v7.16b, v27.16b 7159 aesmc v7.16b, v7.16b //AES block 7 - round 7 7160 7161 aese v4.16b, v27.16b 7162 aesmc v4.16b, v4.16b //AES block 4 - round 7 7163 aese v1.16b, v27.16b 7164 aesmc v1.16b, v1.16b //AES block 1 - round 7 7165 aese v6.16b, v27.16b 7166 aesmc v6.16b, v6.16b //AES block 6 - round 7 7167 7168 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 7169 aese v7.16b, v28.16b 7170 aesmc v7.16b, v7.16b //AES block 7 - round 8 7171 aese v5.16b, v28.16b 7172 aesmc v5.16b, v5.16b //AES block 5 - round 8 7173 7174 aese v0.16b, v28.16b 7175 aesmc v0.16b, v0.16b //AES block 0 - round 8 7176 aese v1.16b, v28.16b 7177 aesmc v1.16b, v1.16b //AES block 1 - round 8 7178 aese v2.16b, v28.16b 7179 aesmc v2.16b, v2.16b //AES block 2 - round 8 7180 7181 aese v4.16b, v28.16b 7182 aesmc v4.16b, v4.16b //AES block 4 - round 8 7183 aese v3.16b, v28.16b 7184 aesmc v3.16b, v3.16b //AES block 3 - round 8 7185 aese v6.16b, v28.16b 7186 aesmc v6.16b, v6.16b //AES block 6 - round 8 7187 7188 aese v2.16b, v26.16b 7189 aesmc v2.16b, v2.16b //AES block 2 - round 9 7190 7191 ld1 { v19.16b}, [x3] 7192 ext v19.16b, v19.16b, v19.16b, #8 7193 rev64 v19.16b, v19.16b 7194 ldp q27, q28, [x8, #160] //load rk10, rk11 7195 add x4, x0, x1, lsr #3 //end_input_ptr 7196 add x5, x5, x0 7197 7198 aese v3.16b, v26.16b 7199 aesmc v3.16b, v3.16b //AES block 3 - round 9 7200 aese v6.16b, v26.16b 7201 aesmc v6.16b, v6.16b //AES block 6 - round 9 7202 7203 aese v4.16b, v26.16b 7204 aesmc v4.16b, v4.16b //AES block 4 - round 9 7205 aese v5.16b, v26.16b 7206 aesmc v5.16b, v5.16b //AES block 5 - round 9 7207 7208 aese v7.16b, v26.16b 7209 aesmc v7.16b, v7.16b //AES block 7 - round 9 7210 7211 aese v0.16b, v26.16b 7212 aesmc v0.16b, v0.16b //AES block 0 - round 9 7213 aese v1.16b, v26.16b 7214 aesmc v1.16b, v1.16b //AES block 1 - round 9 7215 7216 aese v4.16b, v27.16b 7217 aesmc v4.16b, v4.16b //AES block 4 - round 10 7218 aese v7.16b, v27.16b 7219 aesmc v7.16b, v7.16b //AES block 7 - round 10 7220 aese v5.16b, v27.16b 7221 aesmc v5.16b, v5.16b //AES block 5 - round 10 7222 7223 aese v1.16b, v27.16b 7224 aesmc v1.16b, v1.16b //AES block 1 - round 10 7225 aese v2.16b, v27.16b 7226 aesmc v2.16b, v2.16b //AES block 2 - round 10 7227 aese v0.16b, v27.16b 7228 aesmc v0.16b, v0.16b //AES block 0 - round 10 7229 7230 aese v6.16b, v27.16b 7231 aesmc v6.16b, v6.16b //AES block 6 - round 10 7232 aese v3.16b, v27.16b 7233 aesmc v3.16b, v3.16b //AES block 3 - round 10 7234 ldp q26, q27, [x8, #192] //load rk12, rk13 7235 7236 aese v0.16b, v28.16b 7237 aesmc v0.16b, v0.16b //AES block 0 - round 11 7238 add v30.4s, v30.4s, v31.4s //CTR block 7 7239 7240 aese v7.16b, v28.16b 7241 aesmc v7.16b, v7.16b //AES block 7 - round 11 7242 aese v3.16b, v28.16b 7243 aesmc v3.16b, v3.16b //AES block 3 - round 11 7244 aese v1.16b, v28.16b 7245 aesmc v1.16b, v1.16b //AES block 1 - round 11 7246 7247 aese v5.16b, v28.16b 7248 aesmc v5.16b, v5.16b //AES block 5 - round 11 7249 aese v4.16b, v28.16b 7250 aesmc v4.16b, v4.16b //AES block 4 - round 11 7251 aese v2.16b, v28.16b 7252 aesmc v2.16b, v2.16b //AES block 2 - round 11 7253 7254 aese v6.16b, v28.16b 7255 aesmc v6.16b, v6.16b //AES block 6 - round 11 7256 ldr q28, [x8, #224] //load rk14 7257 7258 aese v1.16b, v26.16b 7259 aesmc v1.16b, v1.16b //AES block 1 - round 12 7260 aese v4.16b, v26.16b 7261 aesmc v4.16b, v4.16b //AES block 4 - round 12 7262 aese v5.16b, v26.16b 7263 aesmc v5.16b, v5.16b //AES block 5 - round 12 7264 7265 cmp x0, x5 //check if we have <= 8 blocks 7266 aese v3.16b, v26.16b 7267 aesmc v3.16b, v3.16b //AES block 3 - round 12 7268 aese v2.16b, v26.16b 7269 aesmc v2.16b, v2.16b //AES block 2 - round 12 7270 7271 aese v6.16b, v26.16b 7272 aesmc v6.16b, v6.16b //AES block 6 - round 12 7273 aese v0.16b, v26.16b 7274 aesmc v0.16b, v0.16b //AES block 0 - round 12 7275 aese v7.16b, v26.16b 7276 aesmc v7.16b, v7.16b //AES block 7 - round 12 7277 7278 aese v5.16b, v27.16b //AES block 5 - round 13 7279 aese v1.16b, v27.16b //AES block 1 - round 13 7280 aese v2.16b, v27.16b //AES block 2 - round 13 7281 7282 aese v0.16b, v27.16b //AES block 0 - round 13 7283 aese v4.16b, v27.16b //AES block 4 - round 13 7284 aese v6.16b, v27.16b //AES block 6 - round 13 7285 7286 aese v3.16b, v27.16b //AES block 3 - round 13 7287 aese v7.16b, v27.16b //AES block 7 - round 13 7288 b.ge .L256_dec_tail //handle tail 7289 7290 ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext 7291 7292 ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext 7293 7294 ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext 7295 7296 ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext 7297 cmp x0, x5 //check if we have <= 8 blocks 7298 7299 .inst 0xce017121 //eor3 v1.16b, v9.16b, v1.16b, v28.16b //AES block 1 - result 7300 .inst 0xce007100 //eor3 v0.16b, v8.16b, v0.16b, v28.16b //AES block 0 - result 7301 stp q0, q1, [x2], #32 //AES block 0, 1 - store result 7302 7303 rev32 v0.16b, v30.16b //CTR block 8 7304 add v30.4s, v30.4s, v31.4s //CTR block 8 7305 .inst 0xce037163 //eor3 v3.16b, v11.16b, v3.16b, v28.16b //AES block 3 - result 7306 7307 .inst 0xce0571a5 //eor3 v5.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result 7308 7309 .inst 0xce047184 //eor3 v4.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result 7310 rev32 v1.16b, v30.16b //CTR block 9 7311 add v30.4s, v30.4s, v31.4s //CTR block 9 7312 7313 .inst 0xce027142 //eor3 v2.16b, v10.16b, v2.16b, v28.16b //AES block 2 - result 7314 stp q2, q3, [x2], #32 //AES block 2, 3 - store result 7315 7316 rev32 v2.16b, v30.16b //CTR block 10 7317 add v30.4s, v30.4s, v31.4s //CTR block 10 7318 7319 .inst 0xce0671c6 //eor3 v6.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result 7320 7321 rev32 v3.16b, v30.16b //CTR block 11 7322 add v30.4s, v30.4s, v31.4s //CTR block 11 7323 stp q4, q5, [x2], #32 //AES block 4, 5 - store result 7324 7325 .inst 0xce0771e7 //eor3 v7.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result 7326 stp q6, q7, [x2], #32 //AES block 6, 7 - store result 7327 7328 rev32 v4.16b, v30.16b //CTR block 12 7329 add v30.4s, v30.4s, v31.4s //CTR block 12 7330 b.ge .L256_dec_prepretail //do prepretail 7331 7332 .L256_dec_main_loop: //main loop start 7333 rev32 v5.16b, v30.16b //CTR block 8k+13 7334 ldp q26, q27, [x8, #0] //load rk0, rk1 7335 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 7336 7337 rev64 v9.16b, v9.16b //GHASH block 8k+1 7338 ldr q23, [x3, #176] //load h7l | h7h 7339 ext v23.16b, v23.16b, v23.16b, #8 7340 ldr q25, [x3, #208] //load h8l | h8h 7341 ext v25.16b, v25.16b, v25.16b, #8 7342 7343 rev32 v6.16b, v30.16b //CTR block 8k+14 7344 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 7345 rev64 v8.16b, v8.16b //GHASH block 8k 7346 7347 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 7348 rev64 v12.16b, v12.16b //GHASH block 8k+4 7349 rev64 v11.16b, v11.16b //GHASH block 8k+3 7350 7351 rev32 v7.16b, v30.16b //CTR block 8k+15 7352 rev64 v15.16b, v15.16b //GHASH block 8k+7 7353 7354 aese v3.16b, v26.16b 7355 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 7356 aese v6.16b, v26.16b 7357 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 7358 aese v2.16b, v26.16b 7359 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 7360 7361 aese v7.16b, v26.16b 7362 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 7363 aese v0.16b, v26.16b 7364 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 7365 aese v5.16b, v26.16b 7366 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 7367 7368 aese v4.16b, v26.16b 7369 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 7370 aese v1.16b, v26.16b 7371 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 7372 ldp q28, q26, [x8, #32] //load rk2, rk3 7373 7374 eor v8.16b, v8.16b, v19.16b //PRE 1 7375 ldr q20, [x3, #128] //load h5l | h5h 7376 ext v20.16b, v20.16b, v20.16b, #8 7377 ldr q22, [x3, #160] //load h6l | h6h 7378 ext v22.16b, v22.16b, v22.16b, #8 7379 aese v6.16b, v27.16b 7380 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 7381 7382 aese v4.16b, v27.16b 7383 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 7384 rev64 v10.16b, v10.16b //GHASH block 8k+2 7385 aese v3.16b, v27.16b 7386 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 7387 7388 aese v0.16b, v27.16b 7389 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 7390 aese v5.16b, v27.16b 7391 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 7392 aese v2.16b, v27.16b 7393 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 7394 7395 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7396 aese v7.16b, v27.16b 7397 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 7398 aese v1.16b, v27.16b 7399 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 7400 7401 aese v4.16b, v28.16b 7402 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 7403 aese v0.16b, v28.16b 7404 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 7405 aese v3.16b, v28.16b 7406 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 7407 7408 aese v6.16b, v28.16b 7409 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 7410 aese v7.16b, v28.16b 7411 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 7412 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 7413 7414 aese v5.16b, v28.16b 7415 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 7416 aese v2.16b, v28.16b 7417 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 7418 aese v1.16b, v28.16b 7419 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 7420 7421 ldp q27, q28, [x8, #64] //load rk4, rk5 7422 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 7423 aese v3.16b, v26.16b 7424 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 7425 7426 aese v0.16b, v26.16b 7427 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 7428 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 7429 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 7430 7431 aese v5.16b, v26.16b 7432 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 7433 aese v6.16b, v26.16b 7434 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 7435 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 7436 7437 aese v4.16b, v26.16b 7438 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 7439 aese v1.16b, v26.16b 7440 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 7441 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7442 7443 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 7444 aese v2.16b, v26.16b 7445 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 7446 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 7447 7448 aese v5.16b, v27.16b 7449 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 7450 aese v7.16b, v26.16b 7451 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 7452 aese v3.16b, v27.16b 7453 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 7454 7455 aese v2.16b, v27.16b 7456 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 7457 aese v0.16b, v27.16b 7458 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 7459 aese v1.16b, v27.16b 7460 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 7461 7462 aese v6.16b, v27.16b 7463 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 7464 aese v7.16b, v27.16b 7465 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 7466 aese v4.16b, v27.16b 7467 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 7468 7469 ldr q21, [x3, #144] //load h6k | h5k 7470 ldr q24, [x3, #192] //load h8k | h7k 7471 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 7472 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 7473 7474 ldp q26, q27, [x8, #96] //load rk6, rk7 7475 aese v5.16b, v28.16b 7476 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 7477 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 7478 7479 aese v0.16b, v28.16b 7480 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 7481 aese v3.16b, v28.16b 7482 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 7483 aese v7.16b, v28.16b 7484 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 7485 7486 aese v1.16b, v28.16b 7487 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 7488 aese v2.16b, v28.16b 7489 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 7490 aese v6.16b, v28.16b 7491 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 7492 7493 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 7494 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7495 rev64 v13.16b, v13.16b //GHASH block 8k+5 7496 7497 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 7498 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 7499 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7500 7501 aese v3.16b, v26.16b 7502 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 7503 aese v0.16b, v26.16b 7504 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 7505 aese v4.16b, v28.16b 7506 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 7507 7508 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7509 aese v1.16b, v26.16b 7510 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 7511 aese v6.16b, v26.16b 7512 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 7513 7514 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7515 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 7516 aese v4.16b, v26.16b 7517 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 7518 7519 aese v2.16b, v26.16b 7520 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 7521 aese v5.16b, v26.16b 7522 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 7523 aese v7.16b, v26.16b 7524 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 7525 7526 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 7527 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 7528 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 7529 7530 ldr q23, [x3, #80] //load h3l | h3h 7531 ext v23.16b, v23.16b, v23.16b, #8 7532 ldr q25, [x3, #112] //load h4l | h4h 7533 ext v25.16b, v25.16b, v25.16b, #8 7534 rev64 v14.16b, v14.16b //GHASH block 8k+6 7535 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 7536 7537 aese v2.16b, v27.16b 7538 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 7539 aese v5.16b, v27.16b 7540 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 7541 ldp q28, q26, [x8, #128] //load rk8, rk9 7542 7543 ldr q20, [x3, #32] //load h1l | h1h 7544 ext v20.16b, v20.16b, v20.16b, #8 7545 ldr q22, [x3, #64] //load h2l | h2h 7546 ext v22.16b, v22.16b, v22.16b, #8 7547 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7548 aese v7.16b, v27.16b 7549 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 7550 7551 aese v1.16b, v27.16b 7552 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 7553 aese v3.16b, v27.16b 7554 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 7555 aese v6.16b, v27.16b 7556 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 7557 7558 ldr q21, [x3, #48] //load h2k | h1k 7559 ldr q24, [x3, #96] //load h4k | h3k 7560 aese v0.16b, v27.16b 7561 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 7562 aese v4.16b, v27.16b 7563 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 7564 7565 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 7566 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 7567 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7568 7569 aese v5.16b, v28.16b 7570 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 7571 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 7572 aese v2.16b, v28.16b 7573 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 7574 7575 aese v6.16b, v28.16b 7576 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 7577 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 7578 aese v1.16b, v28.16b 7579 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 7580 7581 aese v4.16b, v28.16b 7582 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 7583 aese v0.16b, v28.16b 7584 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 7585 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 7586 7587 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 7588 aese v3.16b, v28.16b 7589 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 7590 aese v7.16b, v28.16b 7591 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 7592 7593 ldp q27, q28, [x8, #160] //load rk10, rk11 7594 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 7595 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 7596 7597 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 7598 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 7599 aese v3.16b, v26.16b 7600 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 7601 7602 aese v6.16b, v26.16b 7603 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 7604 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 7605 aese v5.16b, v26.16b 7606 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 7607 7608 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext 7609 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 7610 aese v7.16b, v26.16b 7611 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 7612 7613 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 7614 aese v2.16b, v26.16b 7615 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 7616 aese v1.16b, v26.16b 7617 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 7618 7619 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 7620 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 7621 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 7622 7623 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 7624 aese v3.16b, v27.16b 7625 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 7626 aese v6.16b, v27.16b 7627 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 7628 7629 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 7630 aese v0.16b, v26.16b 7631 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 7632 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 7633 7634 aese v4.16b, v26.16b 7635 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 7636 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 7637 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 7638 7639 aese v2.16b, v27.16b 7640 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 7641 aese v5.16b, v27.16b 7642 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 7643 aese v7.16b, v27.16b 7644 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 7645 7646 aese v1.16b, v27.16b 7647 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 7648 aese v0.16b, v27.16b 7649 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 7650 aese v4.16b, v27.16b 7651 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 7652 7653 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 7654 rev32 v20.16b, v30.16b //CTR block 8k+16 7655 ldr d16, [x10] //MODULO - load modulo constant 7656 7657 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 7658 aese v1.16b, v28.16b 7659 aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 7660 ldp q26, q27, [x8, #192] //load rk12, rk13 7661 7662 aese v0.16b, v28.16b 7663 aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 7664 aese v6.16b, v28.16b 7665 aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 7666 7667 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 7668 rev32 v22.16b, v30.16b //CTR block 8k+17 7669 aese v2.16b, v28.16b 7670 aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 7671 7672 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext 7673 aese v7.16b, v28.16b 7674 aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 7675 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 7676 7677 aese v5.16b, v28.16b 7678 aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 7679 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 7680 aese v3.16b, v28.16b 7681 aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 7682 7683 aese v2.16b, v26.16b 7684 aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 7685 aese v7.16b, v26.16b 7686 aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 7687 aese v6.16b, v26.16b 7688 aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 7689 7690 rev32 v23.16b, v30.16b //CTR block 8k+18 7691 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 7692 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 7693 7694 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 7695 aese v1.16b, v26.16b 7696 aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 7697 aese v4.16b, v28.16b 7698 aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 7699 7700 ldr q28, [x8, #224] //load rk14 7701 aese v5.16b, v26.16b 7702 aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 7703 aese v3.16b, v26.16b 7704 aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 7705 7706 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 7707 aese v0.16b, v26.16b 7708 aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 7709 aese v4.16b, v26.16b 7710 aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 7711 7712 ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext 7713 aese v1.16b, v27.16b //AES block 8k+9 - round 13 7714 aese v2.16b, v27.16b //AES block 8k+10 - round 13 7715 7716 ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext 7717 aese v0.16b, v27.16b //AES block 8k+8 - round 13 7718 aese v5.16b, v27.16b //AES block 8k+13 - round 13 7719 7720 rev32 v25.16b, v30.16b //CTR block 8k+19 7721 .inst 0xce027142 //eor3 v2.16b, v10.16b, v2.16b, v28.16b //AES block 8k+10 - result 7722 .inst 0xce017121 //eor3 v1.16b, v9.16b, v1.16b, v28.16b //AES block 8k+9 - result 7723 7724 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 7725 aese v7.16b, v27.16b //AES block 8k+15 - round 13 7726 7727 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 7728 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 7729 aese v4.16b, v27.16b //AES block 8k+12 - round 13 7730 7731 .inst 0xce0571a5 //eor3 v5.16b, v13.16b, v5.16b, v28.16b //AES block 8k+13 - result 7732 .inst 0xce007100 //eor3 v0.16b, v8.16b, v0.16b, v28.16b //AES block 8k+8 - result 7733 aese v3.16b, v27.16b //AES block 8k+11 - round 13 7734 7735 stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result 7736 mov v0.16b, v20.16b //CTR block 8k+16 7737 .inst 0xce047184 //eor3 v4.16b, v12.16b, v4.16b, v28.16b //AES block 8k+12 - result 7738 7739 .inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 7740 .inst 0xce037163 //eor3 v3.16b, v11.16b, v3.16b, v28.16b //AES block 8k+11 - result 7741 stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result 7742 7743 mov v3.16b, v25.16b //CTR block 8k+19 7744 mov v2.16b, v23.16b //CTR block 8k+18 7745 aese v6.16b, v27.16b //AES block 8k+14 - round 13 7746 7747 mov v1.16b, v22.16b //CTR block 8k+17 7748 stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result 7749 .inst 0xce0771e7 //eor3 v7.16b, v15.16b, v7.16b, v28.16b //AES block 8k+15 - result 7750 7751 .inst 0xce0671c6 //eor3 v6.16b, v14.16b, v6.16b, v28.16b //AES block 8k+14 - result 7752 rev32 v4.16b, v30.16b //CTR block 8k+20 7753 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 7754 7755 cmp x0, x5 //.LOOP CONTROL 7756 stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result 7757 b.lt .L256_dec_main_loop 7758 7759 .L256_dec_prepretail: //PREPRETAIL 7760 ldp q26, q27, [x8, #0] //load rk0, rk1 7761 rev32 v5.16b, v30.16b //CTR block 8k+13 7762 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 7763 7764 rev64 v12.16b, v12.16b //GHASH block 8k+4 7765 ldr q21, [x3, #144] //load h6k | h5k 7766 ldr q24, [x3, #192] //load h8k | h7k 7767 7768 rev32 v6.16b, v30.16b //CTR block 8k+14 7769 rev64 v8.16b, v8.16b //GHASH block 8k 7770 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 7771 7772 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 7773 ldr q23, [x3, #176] //load h7l | h7h 7774 ext v23.16b, v23.16b, v23.16b, #8 7775 ldr q25, [x3, #208] //load h8l | h8h 7776 ext v25.16b, v25.16b, v25.16b, #8 7777 rev64 v9.16b, v9.16b //GHASH block 8k+1 7778 7779 rev32 v7.16b, v30.16b //CTR block 8k+15 7780 rev64 v10.16b, v10.16b //GHASH block 8k+2 7781 ldr q20, [x3, #128] //load h5l | h5h 7782 ext v20.16b, v20.16b, v20.16b, #8 7783 ldr q22, [x3, #160] //load h6l | h6h 7784 ext v22.16b, v22.16b, v22.16b, #8 7785 7786 aese v0.16b, v26.16b 7787 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 7788 aese v1.16b, v26.16b 7789 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 7790 aese v4.16b, v26.16b 7791 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 7792 7793 aese v3.16b, v26.16b 7794 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 7795 aese v5.16b, v26.16b 7796 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 7797 aese v6.16b, v26.16b 7798 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 7799 7800 aese v4.16b, v27.16b 7801 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 7802 aese v7.16b, v26.16b 7803 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 7804 aese v2.16b, v26.16b 7805 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 7806 7807 ldp q28, q26, [x8, #32] //load rk2, rk3 7808 aese v0.16b, v27.16b 7809 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 7810 eor v8.16b, v8.16b, v19.16b //PRE 1 7811 7812 aese v7.16b, v27.16b 7813 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 7814 aese v6.16b, v27.16b 7815 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 7816 aese v2.16b, v27.16b 7817 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 7818 7819 aese v3.16b, v27.16b 7820 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 7821 aese v1.16b, v27.16b 7822 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 7823 aese v5.16b, v27.16b 7824 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 7825 7826 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 7827 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7828 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 7829 7830 rev64 v11.16b, v11.16b //GHASH block 8k+3 7831 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 7832 7833 aese v5.16b, v28.16b 7834 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 7835 aese v7.16b, v28.16b 7836 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 7837 aese v1.16b, v28.16b 7838 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 7839 7840 aese v3.16b, v28.16b 7841 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 7842 aese v6.16b, v28.16b 7843 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 7844 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 7845 7846 aese v0.16b, v28.16b 7847 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 7848 aese v7.16b, v26.16b 7849 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 7850 7851 aese v5.16b, v26.16b 7852 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 7853 rev64 v14.16b, v14.16b //GHASH block 8k+6 7854 7855 aese v0.16b, v26.16b 7856 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 7857 aese v2.16b, v28.16b 7858 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 7859 aese v6.16b, v26.16b 7860 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 7861 7862 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 7863 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7864 aese v4.16b, v28.16b 7865 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 7866 7867 ldp q27, q28, [x8, #64] //load rk4, rk5 7868 aese v1.16b, v26.16b 7869 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 7870 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 7871 7872 aese v2.16b, v26.16b 7873 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 7874 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 7875 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 7876 7877 aese v4.16b, v26.16b 7878 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 7879 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 7880 aese v3.16b, v26.16b 7881 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 7882 7883 .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 7884 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7885 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7886 7887 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 7888 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 7889 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 7890 7891 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 7892 aese v5.16b, v27.16b 7893 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 7894 aese v0.16b, v27.16b 7895 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 7896 7897 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 7898 ldr q20, [x3, #32] //load h1l | h1h 7899 ext v20.16b, v20.16b, v20.16b, #8 7900 ldr q22, [x3, #64] //load h2l | h2h 7901 ext v22.16b, v22.16b, v22.16b, #8 7902 aese v7.16b, v27.16b 7903 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 7904 7905 aese v2.16b, v27.16b 7906 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 7907 aese v6.16b, v27.16b 7908 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 7909 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 7910 7911 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7912 aese v7.16b, v28.16b 7913 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 7914 aese v1.16b, v27.16b 7915 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 7916 7917 aese v2.16b, v28.16b 7918 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 7919 aese v3.16b, v27.16b 7920 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 7921 aese v4.16b, v27.16b 7922 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 7923 7924 aese v1.16b, v28.16b 7925 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 7926 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 7927 aese v6.16b, v28.16b 7928 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 7929 7930 aese v4.16b, v28.16b 7931 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 7932 aese v3.16b, v28.16b 7933 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 7934 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 7935 7936 aese v0.16b, v28.16b 7937 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 7938 aese v5.16b, v28.16b 7939 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 7940 ldp q26, q27, [x8, #96] //load rk6, rk7 7941 7942 ldr q23, [x3, #80] //load h3l | h3h 7943 ext v23.16b, v23.16b, v23.16b, #8 7944 ldr q25, [x3, #112] //load h4l | h4h 7945 ext v25.16b, v25.16b, v25.16b, #8 7946 rev64 v15.16b, v15.16b //GHASH block 8k+7 7947 rev64 v13.16b, v13.16b //GHASH block 8k+5 7948 7949 .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7950 7951 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7952 7953 aese v0.16b, v26.16b 7954 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 7955 ldr q21, [x3, #48] //load h2k | h1k 7956 ldr q24, [x3, #96] //load h4k | h3k 7957 aese v6.16b, v26.16b 7958 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 7959 7960 aese v5.16b, v26.16b 7961 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 7962 aese v7.16b, v26.16b 7963 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 7964 7965 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 7966 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 7967 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 7968 7969 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7970 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 7971 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 7972 7973 aese v7.16b, v27.16b 7974 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 7975 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 7976 aese v1.16b, v26.16b 7977 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 7978 7979 aese v2.16b, v26.16b 7980 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 7981 aese v3.16b, v26.16b 7982 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 7983 aese v4.16b, v26.16b 7984 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 7985 7986 ldp q28, q26, [x8, #128] //load rk8, rk9 7987 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 7988 aese v5.16b, v27.16b 7989 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 7990 7991 aese v1.16b, v27.16b 7992 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 7993 aese v4.16b, v27.16b 7994 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 7995 7996 aese v6.16b, v27.16b 7997 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 7998 aese v2.16b, v27.16b 7999 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 8000 .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 8001 8002 aese v0.16b, v27.16b 8003 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 8004 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 8005 aese v3.16b, v27.16b 8006 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 8007 8008 aese v0.16b, v28.16b 8009 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 8010 aese v7.16b, v28.16b 8011 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 8012 aese v4.16b, v28.16b 8013 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 8014 8015 aese v1.16b, v28.16b 8016 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 8017 aese v5.16b, v28.16b 8018 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 8019 aese v6.16b, v28.16b 8020 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 8021 8022 aese v3.16b, v28.16b 8023 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 8024 aese v4.16b, v26.16b 8025 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 8026 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 8027 8028 aese v0.16b, v26.16b 8029 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 8030 aese v1.16b, v26.16b 8031 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 8032 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 8033 8034 aese v6.16b, v26.16b 8035 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 8036 aese v7.16b, v26.16b 8037 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 8038 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 8039 8040 aese v2.16b, v28.16b 8041 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 8042 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 8043 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 8044 8045 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 8046 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 8047 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 8048 8049 ldp q27, q28, [x8, #160] //load rk10, rk11 8050 .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 8051 .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 8052 8053 aese v2.16b, v26.16b 8054 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 8055 aese v3.16b, v26.16b 8056 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 8057 aese v5.16b, v26.16b 8058 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 8059 8060 .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 8061 .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 8062 ldr d16, [x10] //MODULO - load modulo constant 8063 8064 .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 8065 8066 aese v4.16b, v27.16b 8067 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 8068 aese v6.16b, v27.16b 8069 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 8070 aese v5.16b, v27.16b 8071 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 8072 8073 aese v0.16b, v27.16b 8074 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 8075 aese v2.16b, v27.16b 8076 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 8077 aese v3.16b, v27.16b 8078 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 8079 8080 .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 8081 8082 aese v7.16b, v27.16b 8083 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 8084 aese v1.16b, v27.16b 8085 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 8086 ldp q26, q27, [x8, #192] //load rk12, rk13 8087 8088 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 8089 8090 aese v2.16b, v28.16b 8091 aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 8092 aese v1.16b, v28.16b 8093 aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 8094 aese v0.16b, v28.16b 8095 aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 8096 8097 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 8098 aese v3.16b, v28.16b 8099 aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 8100 8101 aese v7.16b, v28.16b 8102 aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 8103 aese v6.16b, v28.16b 8104 aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 8105 aese v4.16b, v28.16b 8106 aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 8107 8108 aese v5.16b, v28.16b 8109 aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 8110 aese v3.16b, v26.16b 8111 aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 8112 8113 .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 8114 8115 aese v3.16b, v27.16b //AES block 8k+11 - round 13 8116 aese v2.16b, v26.16b 8117 aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 8118 aese v6.16b, v26.16b 8119 aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 8120 8121 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 8122 aese v4.16b, v26.16b 8123 aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 8124 aese v7.16b, v26.16b 8125 aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 8126 8127 aese v0.16b, v26.16b 8128 aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 8129 ldr q28, [x8, #224] //load rk14 8130 aese v1.16b, v26.16b 8131 aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 8132 8133 aese v4.16b, v27.16b //AES block 8k+12 - round 13 8134 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 8135 aese v5.16b, v26.16b 8136 aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 8137 8138 aese v6.16b, v27.16b //AES block 8k+14 - round 13 8139 aese v2.16b, v27.16b //AES block 8k+10 - round 13 8140 aese v1.16b, v27.16b //AES block 8k+9 - round 13 8141 8142 aese v5.16b, v27.16b //AES block 8k+13 - round 13 8143 .inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 8144 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 8145 8146 aese v7.16b, v27.16b //AES block 8k+15 - round 13 8147 aese v0.16b, v27.16b //AES block 8k+8 - round 13 8148 .L256_dec_tail: //TAIL 8149 8150 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 8151 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 8152 cmp x5, #112 8153 8154 ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext 8155 8156 ldp q24, q25, [x3, #192] //load h8k | h7k 8157 ext v25.16b, v25.16b, v25.16b, #8 8158 mov v29.16b, v28.16b 8159 8160 ldp q20, q21, [x3, #128] //load h5l | h5h 8161 ext v20.16b, v20.16b, v20.16b, #8 8162 8163 .inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result 8164 ldp q22, q23, [x3, #160] //load h6l | h6h 8165 ext v22.16b, v22.16b, v22.16b, #8 8166 ext v23.16b, v23.16b, v23.16b, #8 8167 b.gt .L256_dec_blocks_more_than_7 8168 8169 mov v7.16b, v6.16b 8170 sub v30.4s, v30.4s, v31.4s 8171 mov v6.16b, v5.16b 8172 8173 mov v5.16b, v4.16b 8174 mov v4.16b, v3.16b 8175 movi v19.8b, #0 8176 8177 movi v17.8b, #0 8178 movi v18.8b, #0 8179 mov v3.16b, v2.16b 8180 8181 cmp x5, #96 8182 mov v2.16b, v1.16b 8183 b.gt .L256_dec_blocks_more_than_6 8184 8185 mov v7.16b, v6.16b 8186 mov v6.16b, v5.16b 8187 8188 mov v5.16b, v4.16b 8189 cmp x5, #80 8190 sub v30.4s, v30.4s, v31.4s 8191 8192 mov v4.16b, v3.16b 8193 mov v3.16b, v1.16b 8194 b.gt .L256_dec_blocks_more_than_5 8195 8196 cmp x5, #64 8197 mov v7.16b, v6.16b 8198 sub v30.4s, v30.4s, v31.4s 8199 8200 mov v6.16b, v5.16b 8201 8202 mov v5.16b, v4.16b 8203 mov v4.16b, v1.16b 8204 b.gt .L256_dec_blocks_more_than_4 8205 8206 sub v30.4s, v30.4s, v31.4s 8207 mov v7.16b, v6.16b 8208 cmp x5, #48 8209 8210 mov v6.16b, v5.16b 8211 mov v5.16b, v1.16b 8212 b.gt .L256_dec_blocks_more_than_3 8213 8214 ldr q24, [x3, #96] //load h4k | h3k 8215 sub v30.4s, v30.4s, v31.4s 8216 mov v7.16b, v6.16b 8217 8218 cmp x5, #32 8219 mov v6.16b, v1.16b 8220 b.gt .L256_dec_blocks_more_than_2 8221 8222 sub v30.4s, v30.4s, v31.4s 8223 8224 mov v7.16b, v1.16b 8225 cmp x5, #16 8226 b.gt .L256_dec_blocks_more_than_1 8227 8228 sub v30.4s, v30.4s, v31.4s 8229 ldr q21, [x3, #48] //load h2k | h1k 8230 b .L256_dec_blocks_less_than_1 8231 .L256_dec_blocks_more_than_7: //blocks left > 7 8232 rev64 v8.16b, v9.16b //GHASH final-7 block 8233 ldr q9, [x0], #16 //AES final-6 block - load ciphertext 8234 st1 { v12.16b}, [x2], #16 //AES final-7 block - store result 8235 8236 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 8237 8238 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8239 8240 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 8241 .inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 8242 8243 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 8244 8245 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 8246 movi v16.8b, #0 //suppress further partial tag feed in 8247 8248 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 8249 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 8250 .L256_dec_blocks_more_than_6: //blocks left > 6 8251 8252 rev64 v8.16b, v9.16b //GHASH final-6 block 8253 8254 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8255 ldr q9, [x0], #16 //AES final-5 block - load ciphertext 8256 movi v16.8b, #0 //suppress further partial tag feed in 8257 8258 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 8259 st1 { v12.16b}, [x2], #16 //AES final-6 block - store result 8260 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 8261 8262 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 8263 8264 .inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 8265 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 8266 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 8267 8268 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 8269 8270 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 8271 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 8272 .L256_dec_blocks_more_than_5: //blocks left > 5 8273 8274 rev64 v8.16b, v9.16b //GHASH final-5 block 8275 8276 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8277 8278 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 8279 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 8280 8281 ldr q9, [x0], #16 //AES final-4 block - load ciphertext 8282 8283 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 8284 st1 { v12.16b}, [x2], #16 //AES final-5 block - store result 8285 8286 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 8287 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 8288 8289 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 8290 8291 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 8292 .inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 8293 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 8294 8295 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 8296 movi v16.8b, #0 //suppress further partial tag feed in 8297 .L256_dec_blocks_more_than_4: //blocks left > 4 8298 8299 rev64 v8.16b, v9.16b //GHASH final-4 block 8300 8301 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8302 8303 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 8304 ldr q9, [x0], #16 //AES final-3 block - load ciphertext 8305 8306 movi v16.8b, #0 //suppress further partial tag feed in 8307 8308 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 8309 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 8310 8311 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 8312 8313 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 8314 8315 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 8316 8317 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 8318 st1 { v12.16b}, [x2], #16 //AES final-4 block - store result 8319 8320 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 8321 .inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 8322 .L256_dec_blocks_more_than_3: //blocks left > 3 8323 8324 ldr q25, [x3, #112] //load h4l | h4h 8325 ext v25.16b, v25.16b, v25.16b, #8 8326 rev64 v8.16b, v9.16b //GHASH final-3 block 8327 8328 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8329 ldr q9, [x0], #16 //AES final-2 block - load ciphertext 8330 ldr q24, [x3, #96] //load h4k | h3k 8331 8332 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 8333 st1 { v12.16b}, [x2], #16 //AES final-3 block - store result 8334 8335 .inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 8336 8337 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 8338 8339 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 8340 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 8341 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 8342 8343 movi v16.8b, #0 //suppress further partial tag feed in 8344 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 8345 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 8346 8347 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 8348 8349 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 8350 .L256_dec_blocks_more_than_2: //blocks left > 2 8351 8352 rev64 v8.16b, v9.16b //GHASH final-2 block 8353 8354 ldr q23, [x3, #80] //load h3l | h3h 8355 ext v23.16b, v23.16b, v23.16b, #8 8356 ldr q9, [x0], #16 //AES final-1 block - load ciphertext 8357 8358 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8359 8360 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 8361 8362 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 8363 st1 { v12.16b}, [x2], #16 //AES final-2 block - store result 8364 .inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 8365 8366 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 8367 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 8368 movi v16.8b, #0 //suppress further partial tag feed in 8369 8370 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 8371 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 8372 8373 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 8374 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 8375 .L256_dec_blocks_more_than_1: //blocks left > 1 8376 8377 rev64 v8.16b, v9.16b //GHASH final-1 block 8378 8379 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8380 8381 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 8382 ldr q22, [x3, #64] //load h2l | h2h 8383 ext v22.16b, v22.16b, v22.16b, #8 8384 8385 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 8386 ldr q9, [x0], #16 //AES final block - load ciphertext 8387 st1 { v12.16b}, [x2], #16 //AES final-1 block - store result 8388 8389 ldr q21, [x3, #48] //load h2k | h1k 8390 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 8391 8392 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 8393 8394 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 8395 8396 .inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result 8397 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 8398 8399 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 8400 8401 movi v16.8b, #0 //suppress further partial tag feed in 8402 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 8403 8404 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 8405 .L256_dec_blocks_less_than_1: //blocks left <= 1 8406 8407 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 8408 mvn x6, xzr //temp0_x = 0xffffffffffffffff 8409 and x1, x1, #127 //bit_length %= 128 8410 8411 sub x1, x1, #128 //bit_length -= 128 8412 rev32 v30.16b, v30.16b 8413 str q30, [x16] //store the updated counter 8414 8415 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 8416 8417 and x1, x1, #127 //bit_length %= 128 8418 8419 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 8420 cmp x1, #64 8421 mvn x7, xzr //temp1_x = 0xffffffffffffffff 8422 8423 csel x14, x6, xzr, lt 8424 csel x13, x7, x6, lt 8425 8426 mov v0.d[0], x13 //ctr0b is mask for last block 8427 mov v0.d[1], x14 8428 8429 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 8430 ldr q20, [x3, #32] //load h1l | h1h 8431 ext v20.16b, v20.16b, v20.16b, #8 8432 bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 8433 8434 rev64 v8.16b, v9.16b //GHASH final block 8435 8436 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8437 8438 ins v16.d[0], v8.d[1] //GHASH final block - mid 8439 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 8440 8441 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 8442 8443 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 8444 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 8445 8446 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 8447 8448 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 8449 ldr d16, [x10] //MODULO - load modulo constant 8450 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 8451 8452 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 8453 eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 8454 8455 ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 8456 st1 { v12.16b}, [x2] //store all 16B 8457 8458 eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up 8459 8460 eor v21.16b, v17.16b, v21.16b //MODULO - fold into mid 8461 eor v18.16b, v18.16b, v21.16b //MODULO - fold into mid 8462 8463 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 8464 8465 ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 8466 eor v19.16b, v19.16b, v17.16b //MODULO - fold into low 8467 8468 eor v19.16b, v19.16b, v18.16b //MODULO - fold into low 8469 ext v19.16b, v19.16b, v19.16b, #8 8470 rev64 v19.16b, v19.16b 8471 st1 { v19.16b }, [x3] 8472 mov x0, x9 8473 8474 ldp d10, d11, [sp, #16] 8475 ldp d12, d13, [sp, #32] 8476 ldp d14, d15, [sp, #48] 8477 ldp d8, d9, [sp], #80 8478 ret 8479 8480 .L256_dec_ret: 8481 mov w0, #0x0 8482 ret 8483 .size unroll8_eor3_aes_gcm_dec_256_kernel,.-unroll8_eor3_aes_gcm_dec_256_kernel 8484 .byte 65,69,83,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,65,82,77,118,56,44,32,83,80,68,88,32,66,83,68,45,51,45,67,108,97,117,115,101,32,98,121,32,60,120,105,97,111,107,97,110,103,46,113,105,97,110,64,97,114,109,46,99,111,109,62,0 8485 .align 2 8486 .align 2 8487 #endif 8488