1 1.1 christos #include "arm_arch.h" 2 1.1 christos 3 1.1 christos #if __ARM_MAX_ARCH__>=8 4 1.1 christos .arch armv8-a+crypto 5 1.1 christos .text 6 1.1 christos .globl unroll8_eor3_aes_gcm_enc_128_kernel 7 1.1 christos .type unroll8_eor3_aes_gcm_enc_128_kernel,%function 8 1.1 christos .align 4 9 1.1 christos unroll8_eor3_aes_gcm_enc_128_kernel: 10 1.1 christos AARCH64_VALID_CALL_TARGET 11 1.1 christos cbz x1, .L128_enc_ret 12 1.1 christos stp d8, d9, [sp, #-80]! 13 1.1 christos lsr x9, x1, #3 14 1.1 christos mov x16, x4 15 1.1 christos mov x8, x5 16 1.1 christos stp d10, d11, [sp, #16] 17 1.1 christos stp d12, d13, [sp, #32] 18 1.1 christos stp d14, d15, [sp, #48] 19 1.1 christos mov x5, #0xc200000000000000 20 1.1 christos stp x5, xzr, [sp, #64] 21 1.1 christos add x10, sp, #64 22 1.1 christos 23 1.1 christos mov x15, #0x100000000 //set up counter increment 24 1.1 christos movi v31.16b, #0x0 25 1.1 christos mov v31.d[1], x15 26 1.1 christos mov x5, x9 27 1.1 christos ld1 { v0.16b}, [x16] //CTR block 0 28 1.1 christos 29 1.1 christos sub x5, x5, #1 //byte_len - 1 30 1.1 christos 31 1.1 christos and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 32 1.1 christos 33 1.1 christos rev32 v30.16b, v0.16b //set up reversed counter 34 1.1 christos 35 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 0 36 1.1 christos 37 1.1 christos rev32 v1.16b, v30.16b //CTR block 1 38 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 1 39 1.1 christos 40 1.1 christos rev32 v2.16b, v30.16b //CTR block 2 41 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 2 42 1.1 christos 43 1.1 christos rev32 v3.16b, v30.16b //CTR block 3 44 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 3 45 1.1 christos 46 1.1 christos rev32 v4.16b, v30.16b //CTR block 4 47 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 4 48 1.1 christos 49 1.1 christos rev32 v5.16b, v30.16b //CTR block 5 50 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 5 51 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 52 1.1 christos 53 1.1 christos rev32 v6.16b, v30.16b //CTR block 6 54 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 6 55 1.1 christos 56 1.1 christos rev32 v7.16b, v30.16b //CTR block 7 57 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 7 58 1.1 christos 59 1.1 christos aese v4.16b, v26.16b 60 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 0 61 1.1 christos aese v6.16b, v26.16b 62 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 0 63 1.1 christos aese v3.16b, v26.16b 64 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 0 65 1.1 christos 66 1.1 christos aese v0.16b, v26.16b 67 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 0 68 1.1 christos aese v1.16b, v26.16b 69 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 0 70 1.1 christos aese v2.16b, v26.16b 71 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 0 72 1.1 christos 73 1.1 christos aese v7.16b, v26.16b 74 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 0 75 1.1 christos aese v5.16b, v26.16b 76 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 0 77 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 78 1.1 christos 79 1.1 christos aese v3.16b, v27.16b 80 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 1 81 1.1 christos 82 1.1 christos aese v7.16b, v27.16b 83 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 1 84 1.1 christos aese v5.16b, v27.16b 85 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 1 86 1.1 christos aese v4.16b, v27.16b 87 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 1 88 1.1 christos 89 1.1 christos aese v2.16b, v27.16b 90 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 1 91 1.1 christos aese v6.16b, v27.16b 92 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 1 93 1.1 christos aese v0.16b, v27.16b 94 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 1 95 1.1 christos 96 1.1 christos aese v5.16b, v28.16b 97 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 2 98 1.1 christos aese v1.16b, v27.16b 99 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 1 100 1.1 christos aese v0.16b, v28.16b 101 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 2 102 1.1 christos 103 1.1 christos aese v2.16b, v28.16b 104 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 2 105 1.1 christos aese v3.16b, v28.16b 106 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 2 107 1.1 christos aese v7.16b, v28.16b 108 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 2 109 1.1 christos 110 1.1 christos aese v1.16b, v28.16b 111 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 2 112 1.1 christos aese v6.16b, v28.16b 113 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 2 114 1.1 christos aese v4.16b, v28.16b 115 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 2 116 1.1 christos 117 1.1 christos aese v2.16b, v26.16b 118 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 3 119 1.1 christos 120 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 121 1.1 christos aese v5.16b, v26.16b 122 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 3 123 1.1 christos aese v0.16b, v26.16b 124 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 3 125 1.1 christos 126 1.1 christos aese v4.16b, v26.16b 127 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 3 128 1.1 christos aese v3.16b, v26.16b 129 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 3 130 1.1 christos aese v6.16b, v26.16b 131 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 3 132 1.1 christos 133 1.1 christos aese v7.16b, v26.16b 134 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 3 135 1.1 christos 136 1.1 christos aese v6.16b, v27.16b 137 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 4 138 1.1 christos aese v1.16b, v26.16b 139 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 3 140 1.1 christos aese v5.16b, v27.16b 141 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 4 142 1.1 christos 143 1.1 christos aese v7.16b, v27.16b 144 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 4 145 1.1 christos aese v4.16b, v27.16b 146 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 4 147 1.1 christos aese v0.16b, v27.16b 148 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 4 149 1.1 christos 150 1.1 christos aese v1.16b, v27.16b 151 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 4 152 1.1 christos aese v2.16b, v27.16b 153 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 4 154 1.1 christos aese v3.16b, v27.16b 155 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 4 156 1.1 christos 157 1.1 christos aese v7.16b, v28.16b 158 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 5 159 1.1 christos aese v0.16b, v28.16b 160 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 5 161 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 162 1.1 christos 163 1.1 christos aese v1.16b, v28.16b 164 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 5 165 1.1 christos aese v3.16b, v28.16b 166 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 5 167 1.1 christos aese v2.16b, v28.16b 168 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 5 169 1.1 christos 170 1.1 christos aese v4.16b, v28.16b 171 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 5 172 1.1 christos aese v5.16b, v28.16b 173 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 5 174 1.1 christos aese v6.16b, v28.16b 175 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 5 176 1.1 christos 177 1.1 christos aese v4.16b, v26.16b 178 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 6 179 1.1 christos aese v3.16b, v26.16b 180 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 6 181 1.1 christos aese v2.16b, v26.16b 182 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 6 183 1.1 christos 184 1.1 christos aese v7.16b, v26.16b 185 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 6 186 1.1 christos aese v6.16b, v26.16b 187 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 6 188 1.1 christos aese v5.16b, v26.16b 189 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 6 190 1.1 christos 191 1.1 christos aese v0.16b, v26.16b 192 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 6 193 1.1 christos aese v1.16b, v26.16b 194 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 6 195 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 196 1.1 christos 197 1.1 christos aese v5.16b, v27.16b 198 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 7 199 1.1 christos 200 1.1 christos ld1 { v19.16b}, [x3] 201 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 202 1.1 christos rev64 v19.16b, v19.16b 203 1.1 christos 204 1.1 christos aese v7.16b, v27.16b 205 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 7 206 1.1 christos 207 1.1 christos aese v4.16b, v27.16b 208 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 7 209 1.1 christos aese v3.16b, v27.16b 210 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 7 211 1.1 christos aese v6.16b, v27.16b 212 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 7 213 1.1 christos 214 1.1 christos aese v1.16b, v27.16b 215 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 7 216 1.1 christos aese v2.16b, v27.16b 217 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 7 218 1.1 christos aese v0.16b, v27.16b 219 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 7 220 1.1 christos 221 1.1 christos aese v3.16b, v28.16b 222 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 223 1.1 christos aese v6.16b, v28.16b 224 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 225 1.1 christos aese v2.16b, v28.16b 226 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 227 1.1 christos 228 1.1 christos aese v7.16b, v28.16b 229 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 230 1.1 christos aese v0.16b, v28.16b 231 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 232 1.1 christos ldr q27, [x8, #160] //load rk10 233 1.1 christos 234 1.1 christos aese v3.16b, v26.16b //AES block 8k+11 - round 9 235 1.1 christos aese v4.16b, v28.16b 236 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 237 1.1 christos aese v2.16b, v26.16b //AES block 8k+10 - round 9 238 1.1 christos 239 1.1 christos aese v5.16b, v28.16b 240 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 241 1.1 christos aese v1.16b, v28.16b 242 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 243 1.1 christos aese v6.16b, v26.16b //AES block 8k+14 - round 9 244 1.1 christos 245 1.1 christos aese v4.16b, v26.16b //AES block 8k+12 - round 9 246 1.1 christos add x5, x5, x0 247 1.1 christos aese v0.16b, v26.16b //AES block 8k+8 - round 9 248 1.1 christos 249 1.1 christos aese v7.16b, v26.16b //AES block 8k+15 - round 9 250 1.1 christos aese v5.16b, v26.16b //AES block 8k+13 - round 9 251 1.1 christos aese v1.16b, v26.16b //AES block 8k+9 - round 9 252 1.1 christos 253 1.1 christos add x4, x0, x1, lsr #3 //end_input_ptr 254 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 255 1.1 christos b.ge .L128_enc_tail //handle tail 256 1.1 christos 257 1.1 christos ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext 258 1.1 christos 259 1.1 christos ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext 260 1.1 christos 261 1.1 christos ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 262 1.1 christos 263 1.1 christos ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 264 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 265 1.1 christos 266 1.1 christos .inst 0xce006d08 //eor3 v8.16b, v8.16b, v0.16b, v27.16b //AES block 0 - result 267 1.1 christos rev32 v0.16b, v30.16b //CTR block 8 268 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8 269 1.1 christos 270 1.1 christos .inst 0xce016d29 //eor3 v9.16b, v9.16b, v1.16b, v27.16b //AES block 1 - result 271 1.1 christos stp q8, q9, [x2], #32 //AES block 0, 1 - store result 272 1.1 christos 273 1.1 christos rev32 v1.16b, v30.16b //CTR block 9 274 1.1 christos .inst 0xce056dad //eor3 v13.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result 275 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 9 276 1.1 christos 277 1.1 christos .inst 0xce026d4a //eor3 v10.16b, v10.16b, v2.16b, v27.16b //AES block 2 - result 278 1.1 christos .inst 0xce066dce //eor3 v14.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result 279 1.1 christos .inst 0xce046d8c //eor3 v12.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result 280 1.1 christos 281 1.1 christos rev32 v2.16b, v30.16b //CTR block 10 282 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 10 283 1.1 christos 284 1.1 christos .inst 0xce036d6b //eor3 v11.16b, v11.16b, v3.16b, v27.16b //AES block 3 - result 285 1.1 christos .inst 0xce076def //eor3 v15.16b, v15.16b, v7.16b,v27.16b //AES block 7 - result 286 1.1 christos stp q10, q11, [x2], #32 //AES block 2, 3 - store result 287 1.1 christos 288 1.1 christos rev32 v3.16b, v30.16b //CTR block 11 289 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 11 290 1.1 christos stp q12, q13, [x2], #32 //AES block 4, 5 - store result 291 1.1 christos 292 1.1 christos stp q14, q15, [x2], #32 //AES block 6, 7 - store result 293 1.1 christos 294 1.1 christos rev32 v4.16b, v30.16b //CTR block 12 295 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 12 296 1.1 christos b.ge .L128_enc_prepretail //do prepretail 297 1.1 christos 298 1.1 christos .L128_enc_main_loop: //main loop start 299 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 300 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 301 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 302 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 303 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 304 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 305 1.1 christos 306 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 307 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 308 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 309 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 310 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 311 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 312 1.1 christos 313 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 314 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 315 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 316 1.1 christos 317 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 318 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 319 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 320 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 321 1.1 christos 322 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 323 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 324 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 325 1.1 christos 326 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 327 1.1 christos 328 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 329 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 330 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 331 1.1 christos 332 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 333 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 334 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 335 1.1 christos 336 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 337 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 338 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 339 1.1 christos 340 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 341 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 342 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 343 1.1 christos ldr q25, [x3, #112] //load h3l | h3h 344 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 345 1.1 christos aese v5.16b, v26.16b 346 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 347 1.1 christos 348 1.1 christos aese v1.16b, v26.16b 349 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 350 1.1 christos aese v4.16b, v26.16b 351 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 352 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 353 1.1 christos 354 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 355 1.1 christos aese v2.16b, v26.16b 356 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 357 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 358 1.1 christos 359 1.1 christos aese v6.16b, v26.16b 360 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 361 1.1 christos aese v1.16b, v27.16b 362 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 363 1.1 christos aese v0.16b, v26.16b 364 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 365 1.1 christos 366 1.1 christos aese v2.16b, v27.16b 367 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 368 1.1 christos aese v3.16b, v26.16b 369 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 370 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 371 1.1 christos 372 1.1 christos aese v5.16b, v27.16b 373 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 374 1.1 christos aese v7.16b, v26.16b 375 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 376 1.1 christos aese v0.16b, v27.16b 377 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 378 1.1 christos 379 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b,v9.16b //GHASH block 8k+2, 8k+3 - high 380 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 381 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 382 1.1 christos 383 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 384 1.1 christos aese v4.16b, v27.16b 385 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 386 1.1 christos aese v3.16b, v27.16b 387 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 388 1.1 christos 389 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 390 1.1 christos aese v7.16b, v27.16b 391 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 392 1.1 christos aese v6.16b, v27.16b 393 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 394 1.1 christos 395 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 396 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 397 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 398 1.1 christos 399 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 400 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 401 1.1 christos 402 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 403 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 404 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 405 1.1 christos 406 1.1 christos aese v5.16b, v28.16b 407 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 408 1.1 christos aese v4.16b, v28.16b 409 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 410 1.1 christos aese v2.16b, v28.16b 411 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 412 1.1 christos 413 1.1 christos aese v1.16b, v28.16b 414 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 415 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 416 1.1 christos aese v6.16b, v28.16b 417 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 418 1.1 christos 419 1.1 christos aese v0.16b, v28.16b 420 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 421 1.1 christos aese v3.16b, v28.16b 422 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 423 1.1 christos aese v7.16b, v28.16b 424 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 425 1.1 christos 426 1.1 christos aese v6.16b, v26.16b 427 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 428 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 429 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 430 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 431 1.1 christos 432 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 433 1.1 christos aese v2.16b, v26.16b 434 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 435 1.1 christos aese v1.16b, v26.16b 436 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 437 1.1 christos 438 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 439 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 440 1.1 christos ldr q22, [x3, #64] //load h1l | h1h 441 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 442 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 443 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 444 1.1 christos 445 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 446 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 447 1.1 christos 448 1.1 christos aese v0.16b, v26.16b 449 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 450 1.1 christos aese v3.16b, v26.16b 451 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 452 1.1 christos 453 1.1 christos aese v7.16b, v26.16b 454 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 455 1.1 christos aese v4.16b, v26.16b 456 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 457 1.1 christos 458 1.1 christos aese v5.16b, v26.16b 459 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 460 1.1 christos aese v0.16b, v27.16b 461 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 462 1.1 christos 463 1.1 christos aese v7.16b, v27.16b 464 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 465 1.1 christos aese v3.16b, v27.16b 466 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 467 1.1 christos aese v4.16b, v27.16b 468 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 469 1.1 christos 470 1.1 christos aese v5.16b, v27.16b 471 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 472 1.1 christos aese v6.16b, v27.16b 473 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 474 1.1 christos aese v1.16b, v27.16b 475 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 476 1.1 christos 477 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 478 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 479 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 480 1.1 christos 481 1.1 christos aese v2.16b, v27.16b 482 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 483 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 484 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 485 1.1 christos 486 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 487 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 488 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 489 1.1 christos 490 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 491 1.1 christos aese v2.16b, v28.16b 492 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 493 1.1 christos aese v5.16b, v28.16b 494 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 495 1.1 christos 496 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 497 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 498 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 499 1.1 christos 500 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 501 1.1 christos aese v6.16b, v28.16b 502 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 503 1.1 christos 504 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 505 1.1 christos aese v7.16b, v28.16b 506 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 507 1.1 christos aese v1.16b, v28.16b 508 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 509 1.1 christos 510 1.1 christos aese v3.16b, v28.16b 511 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 512 1.1 christos aese v4.16b, v28.16b 513 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 514 1.1 christos aese v0.16b, v28.16b 515 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 516 1.1 christos 517 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 518 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 519 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 520 1.1 christos 521 1.1 christos aese v7.16b, v26.16b 522 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 523 1.1 christos aese v5.16b, v26.16b 524 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 525 1.1 christos 526 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 527 1.1 christos aese v1.16b, v26.16b 528 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 529 1.1 christos aese v2.16b, v26.16b 530 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 531 1.1 christos 532 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 533 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 534 1.1 christos ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext 535 1.1 christos 536 1.1 christos aese v3.16b, v26.16b 537 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 538 1.1 christos rev32 v20.16b, v30.16b //CTR block 8k+16 539 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+16 540 1.1 christos 541 1.1 christos aese v4.16b, v26.16b 542 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 543 1.1 christos aese v0.16b, v26.16b 544 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 545 1.1 christos aese v6.16b, v26.16b 546 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 547 1.1 christos 548 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 549 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 550 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 551 1.1 christos 552 1.1 christos aese v2.16b, v27.16b 553 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 554 1.1 christos aese v7.16b, v27.16b 555 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 556 1.1 christos ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext 557 1.1 christos 558 1.1 christos aese v5.16b, v27.16b 559 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 560 1.1 christos aese v6.16b, v27.16b 561 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 562 1.1 christos aese v1.16b, v27.16b 563 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 564 1.1 christos 565 1.1 christos pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 566 1.1 christos aese v0.16b, v27.16b 567 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 568 1.1 christos aese v4.16b, v27.16b 569 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 570 1.1 christos 571 1.1 christos rev32 v22.16b, v30.16b //CTR block 8k+17 572 1.1 christos aese v3.16b, v27.16b 573 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 574 1.1 christos 575 1.1 christos aese v5.16b, v28.16b 576 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 577 1.1 christos ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load plaintext 578 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+17 579 1.1 christos 580 1.1 christos aese v2.16b, v28.16b 581 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 582 1.1 christos aese v1.16b, v28.16b 583 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 584 1.1 christos aese v7.16b, v28.16b 585 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 586 1.1 christos 587 1.1 christos aese v4.16b, v28.16b 588 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 589 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 590 1.1 christos ldr q27, [x8, #160] //load rk10 591 1.1 christos 592 1.1 christos ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 593 1.1 christos rev32 v23.16b, v30.16b //CTR block 8k+18 594 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+18 595 1.1 christos aese v3.16b, v28.16b 596 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 597 1.1 christos 598 1.1 christos aese v0.16b, v28.16b 599 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 600 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 601 1.1 christos aese v6.16b, v28.16b 602 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 603 1.1 christos 604 1.1 christos aese v2.16b, v26.16b //AES block 8k+10 - round 9 605 1.1 christos aese v4.16b, v26.16b //AES block 8k+12 - round 9 606 1.1 christos aese v1.16b, v26.16b //AES block 8k+9 - round 9 607 1.1 christos 608 1.1 christos ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load plaintext 609 1.1 christos rev32 v25.16b, v30.16b //CTR block 8k+19 610 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+19 611 1.1 christos 612 1.1 christos cmp x0, x5 //.LOOP CONTROL 613 1.1 christos .inst 0xce046d8c //eor3 v12.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result 614 1.1 christos aese v7.16b, v26.16b //AES block 8k+15 - round 9 615 1.1 christos 616 1.1 christos aese v6.16b, v26.16b //AES block 8k+14 - round 9 617 1.1 christos aese v3.16b, v26.16b //AES block 8k+11 - round 9 618 1.1 christos 619 1.1 christos .inst 0xce026d4a //eor3 v10.16b, v10.16b, v2.16b, v27.16b //AES block 8k+10 - result 620 1.1 christos 621 1.1 christos mov v2.16b, v23.16b //CTR block 8k+18 622 1.1 christos aese v0.16b, v26.16b //AES block 8k+8 - round 9 623 1.1 christos 624 1.1 christos rev32 v4.16b, v30.16b //CTR block 8k+20 625 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+20 626 1.1 christos 627 1.1 christos .inst 0xce076def //eor3 v15.16b, v15.16b, v7.16b, v27.16b //AES block 7 - result 628 1.1 christos aese v5.16b, v26.16b //AES block 8k+13 - round 9 629 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 630 1.1 christos 631 1.1 christos .inst 0xce016d29 //eor3 v9.16b, v9.16b, v1.16b, v27.16b //AES block 8k+9 - result 632 1.1 christos .inst 0xce036d6b //eor3 v11.16b, v11.16b, v3.16b, v27.16b //AES block 8k+11 - result 633 1.1 christos mov v3.16b, v25.16b //CTR block 8k+19 634 1.1 christos 635 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 636 1.1 christos .inst 0xce056dad //eor3 v13.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result 637 1.1 christos mov v1.16b, v22.16b //CTR block 8k+17 638 1.1 christos 639 1.1 christos .inst 0xce006d08 //eor3 v8.16b, v8.16b, v0.16b, v27.16b //AES block 8k+8 - result 640 1.1 christos mov v0.16b, v20.16b //CTR block 8k+16 641 1.1 christos stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result 642 1.1 christos 643 1.1 christos stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result 644 1.1 christos .inst 0xce066dce //eor3 v14.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result 645 1.1 christos 646 1.1 christos stp q12, q13, [x2], #32 //AES block 8k+12, 8k+13 - store result 647 1.1 christos .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 648 1.1 christos 649 1.1 christos stp q14, q15, [x2], #32 //AES block 8k+14, 8k+15 - store result 650 1.1 christos b.lt .L128_enc_main_loop 651 1.1 christos 652 1.1 christos .L128_enc_prepretail: //PREPRETAIL 653 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 654 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 655 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 656 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 657 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 658 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 659 1.1 christos 660 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 661 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 662 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 663 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 664 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 665 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 666 1.1 christos 667 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 668 1.1 christos ldr q24, [x3, #192] //load h6k | h5k 669 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 670 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 671 1.1 christos 672 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 673 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 674 1.1 christos 675 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 676 1.1 christos 677 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 678 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 679 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 680 1.1 christos 681 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 682 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 683 1.1 christos 684 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 685 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 686 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 687 1.1 christos 688 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 689 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 690 1.1 christos 691 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 692 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 693 1.1 christos 694 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 695 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 696 1.1 christos 697 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 698 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 699 1.1 christos 700 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 701 1.1 christos 702 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 703 1.1 christos 704 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 705 1.1 christos 706 1.1 christos aese v2.16b, v26.16b 707 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 708 1.1 christos 709 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 710 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 711 1.1 christos 712 1.1 christos aese v6.16b, v26.16b 713 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 714 1.1 christos aese v3.16b, v26.16b 715 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 716 1.1 christos 717 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 718 1.1 christos aese v1.16b, v26.16b 719 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 720 1.1 christos 721 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 722 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 723 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 724 1.1 christos 725 1.1 christos aese v5.16b, v26.16b 726 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 727 1.1 christos aese v7.16b, v26.16b 728 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 729 1.1 christos 730 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 731 1.1 christos aese v4.16b, v26.16b 732 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 733 1.1 christos aese v0.16b, v26.16b 734 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 735 1.1 christos 736 1.1 christos aese v3.16b, v27.16b 737 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 738 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 739 1.1 christos 740 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 741 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 742 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 743 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 744 1.1 christos 745 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 746 1.1 christos aese v5.16b, v27.16b 747 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 748 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 749 1.1 christos 750 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 751 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 752 1.1 christos 753 1.1 christos aese v1.16b, v27.16b 754 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 755 1.1 christos aese v0.16b, v27.16b 756 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 757 1.1 christos 758 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 759 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 760 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 761 1.1 christos aese v2.16b, v27.16b 762 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 763 1.1 christos 764 1.1 christos aese v4.16b, v27.16b 765 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 766 1.1 christos aese v7.16b, v27.16b 767 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 768 1.1 christos 769 1.1 christos aese v5.16b, v28.16b 770 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 771 1.1 christos aese v2.16b, v28.16b 772 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 773 1.1 christos aese v3.16b, v28.16b 774 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 775 1.1 christos 776 1.1 christos aese v1.16b, v28.16b 777 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 778 1.1 christos aese v6.16b, v27.16b 779 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 780 1.1 christos aese v4.16b, v28.16b 781 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 782 1.1 christos 783 1.1 christos aese v5.16b, v26.16b 784 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 785 1.1 christos aese v0.16b, v28.16b 786 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 787 1.1 christos 788 1.1 christos aese v6.16b, v28.16b 789 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 790 1.1 christos aese v7.16b, v28.16b 791 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 792 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 793 1.1 christos 794 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 795 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 796 1.1 christos ldr q22, [x3, #64] //load h1l | h1h 797 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 798 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 799 1.1 christos aese v0.16b, v26.16b 800 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 801 1.1 christos 802 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 803 1.1 christos aese v6.16b, v26.16b 804 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 805 1.1 christos aese v3.16b, v26.16b 806 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 807 1.1 christos 808 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 809 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 810 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 811 1.1 christos 812 1.1 christos aese v2.16b, v26.16b 813 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 814 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 815 1.1 christos 816 1.1 christos aese v7.16b, v26.16b 817 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 818 1.1 christos aese v1.16b, v26.16b 819 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 820 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 821 1.1 christos 822 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 823 1.1 christos aese v4.16b, v26.16b 824 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 825 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 826 1.1 christos 827 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 828 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 829 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 830 1.1 christos 831 1.1 christos aese v1.16b, v27.16b 832 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 833 1.1 christos aese v3.16b, v27.16b 834 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 835 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 836 1.1 christos 837 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 838 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 839 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 840 1.1 christos 841 1.1 christos aese v1.16b, v28.16b 842 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 843 1.1 christos aese v6.16b, v27.16b 844 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 845 1.1 christos aese v0.16b, v27.16b 846 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 847 1.1 christos 848 1.1 christos aese v7.16b, v27.16b 849 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 850 1.1 christos aese v2.16b, v27.16b 851 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 852 1.1 christos 853 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 854 1.1 christos aese v4.16b, v27.16b 855 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 856 1.1 christos aese v5.16b, v27.16b 857 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 858 1.1 christos 859 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 860 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 861 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 862 1.1 christos 863 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 864 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 865 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 866 1.1 christos 867 1.1 christos aese v0.16b, v28.16b 868 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 869 1.1 christos aese v7.16b, v28.16b 870 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 871 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 872 1.1 christos 873 1.1 christos aese v2.16b, v28.16b 874 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 875 1.1 christos aese v4.16b, v28.16b 876 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 877 1.1 christos 878 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 879 1.1 christos aese v5.16b, v28.16b 880 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 881 1.1 christos aese v6.16b, v28.16b 882 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 883 1.1 christos 884 1.1 christos aese v3.16b, v28.16b 885 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 886 1.1 christos aese v4.16b, v26.16b 887 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 888 1.1 christos 889 1.1 christos aese v5.16b, v26.16b 890 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 891 1.1 christos aese v2.16b, v26.16b 892 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 893 1.1 christos aese v0.16b, v26.16b 894 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 895 1.1 christos 896 1.1 christos aese v3.16b, v26.16b 897 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 898 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 899 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 900 1.1 christos 901 1.1 christos aese v6.16b, v26.16b 902 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 903 1.1 christos aese v1.16b, v26.16b 904 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 905 1.1 christos aese v7.16b, v26.16b 906 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 907 1.1 christos 908 1.1 christos pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 909 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 910 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 911 1.1 christos 912 1.1 christos aese v3.16b, v27.16b 913 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 914 1.1 christos aese v6.16b, v27.16b 915 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 916 1.1 christos aese v1.16b, v27.16b 917 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 918 1.1 christos ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 919 1.1 christos 920 1.1 christos aese v5.16b, v27.16b 921 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 922 1.1 christos aese v0.16b, v27.16b 923 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 924 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 925 1.1 christos 926 1.1 christos aese v2.16b, v27.16b 927 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 928 1.1 christos aese v7.16b, v27.16b 929 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 930 1.1 christos 931 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 932 1.1 christos aese v4.16b, v27.16b 933 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 934 1.1 christos 935 1.1 christos aese v7.16b, v28.16b 936 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 937 1.1 christos aese v2.16b, v28.16b 938 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 939 1.1 christos aese v1.16b, v28.16b 940 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 941 1.1 christos ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 942 1.1 christos 943 1.1 christos aese v6.16b, v28.16b 944 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 945 1.1 christos .inst 0xce114a73 //eor3 v19.16b, v19.16b, v17.16b, v18.16b //MODULO - fold into low 946 1.1 christos aese v4.16b, v28.16b 947 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 948 1.1 christos 949 1.1 christos aese v3.16b, v28.16b 950 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 951 1.1 christos aese v0.16b, v28.16b 952 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 953 1.1 christos aese v5.16b, v28.16b 954 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 955 1.1 christos 956 1.1 christos ldr q27, [x8, #160] //load rk10 957 1.1 christos aese v6.16b, v26.16b //AES block 8k+14 - round 9 958 1.1 christos aese v2.16b, v26.16b //AES block 8k+10 - round 9 959 1.1 christos 960 1.1 christos aese v0.16b, v26.16b //AES block 8k+8 - round 9 961 1.1 christos aese v1.16b, v26.16b //AES block 8k+9 - round 9 962 1.1 christos 963 1.1 christos aese v3.16b, v26.16b //AES block 8k+11 - round 9 964 1.1 christos aese v5.16b, v26.16b //AES block 8k+13 - round 9 965 1.1 christos 966 1.1 christos aese v4.16b, v26.16b //AES block 8k+12 - round 9 967 1.1 christos aese v7.16b, v26.16b //AES block 8k+15 - round 9 968 1.1 christos .L128_enc_tail: //TAIL 969 1.1 christos 970 1.1 christos sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 971 1.1 christos ldr q8, [x0], #16 //AES block 8k+8 - load plaintext 972 1.1 christos 973 1.1 christos mov v29.16b, v27.16b 974 1.1 christos ldp q20, q21, [x3, #128] //load h5l | h5h 975 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 976 1.1 christos 977 1.1 christos .inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result 978 1.1 christos ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 979 1.1 christos ldp q22, q23, [x3, #160] //load h6l | h6h 980 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 981 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 982 1.1 christos 983 1.1 christos ldp q24, q25, [x3, #192] //load h8k | h7k 984 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 985 1.1 christos cmp x5, #112 986 1.1 christos b.gt .L128_enc_blocks_more_than_7 987 1.1 christos 988 1.1 christos mov v7.16b, v6.16b 989 1.1 christos mov v6.16b, v5.16b 990 1.1 christos movi v17.8b, #0 991 1.1 christos 992 1.1 christos cmp x5, #96 993 1.1 christos sub v30.4s, v30.4s, v31.4s 994 1.1 christos mov v5.16b, v4.16b 995 1.1 christos 996 1.1 christos mov v4.16b, v3.16b 997 1.1 christos mov v3.16b, v2.16b 998 1.1 christos mov v2.16b, v1.16b 999 1.1 christos 1000 1.1 christos movi v19.8b, #0 1001 1.1 christos movi v18.8b, #0 1002 1.1 christos b.gt .L128_enc_blocks_more_than_6 1003 1.1 christos 1004 1.1 christos mov v7.16b, v6.16b 1005 1.1 christos cmp x5, #80 1006 1.1 christos 1007 1.1 christos sub v30.4s, v30.4s, v31.4s 1008 1.1 christos mov v6.16b, v5.16b 1009 1.1 christos mov v5.16b, v4.16b 1010 1.1 christos 1011 1.1 christos mov v4.16b, v3.16b 1012 1.1 christos mov v3.16b, v1.16b 1013 1.1 christos b.gt .L128_enc_blocks_more_than_5 1014 1.1 christos 1015 1.1 christos cmp x5, #64 1016 1.1 christos sub v30.4s, v30.4s, v31.4s 1017 1.1 christos 1018 1.1 christos mov v7.16b, v6.16b 1019 1.1 christos mov v6.16b, v5.16b 1020 1.1 christos 1021 1.1 christos mov v5.16b, v4.16b 1022 1.1 christos mov v4.16b, v1.16b 1023 1.1 christos b.gt .L128_enc_blocks_more_than_4 1024 1.1 christos 1025 1.1 christos mov v7.16b, v6.16b 1026 1.1 christos sub v30.4s, v30.4s, v31.4s 1027 1.1 christos mov v6.16b, v5.16b 1028 1.1 christos 1029 1.1 christos mov v5.16b, v1.16b 1030 1.1 christos cmp x5, #48 1031 1.1 christos b.gt .L128_enc_blocks_more_than_3 1032 1.1 christos 1033 1.1 christos sub v30.4s, v30.4s, v31.4s 1034 1.1 christos mov v7.16b, v6.16b 1035 1.1 christos mov v6.16b, v1.16b 1036 1.1 christos 1037 1.1 christos cmp x5, #32 1038 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 1039 1.1 christos b.gt .L128_enc_blocks_more_than_2 1040 1.1 christos 1041 1.1 christos cmp x5, #16 1042 1.1 christos 1043 1.1 christos sub v30.4s, v30.4s, v31.4s 1044 1.1 christos mov v7.16b, v1.16b 1045 1.1 christos b.gt .L128_enc_blocks_more_than_1 1046 1.1 christos 1047 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 1048 1.1 christos sub v30.4s, v30.4s, v31.4s 1049 1.1 christos b .L128_enc_blocks_less_than_1 1050 1.1 christos .L128_enc_blocks_more_than_7: //blocks left > 7 1051 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-7 block - store result 1052 1.1 christos 1053 1.1 christos rev64 v8.16b, v9.16b //GHASH final-7 block 1054 1.1 christos ldr q9, [x0], #16 //AES final-6 block - load plaintext 1055 1.1 christos 1056 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 1057 1.1 christos 1058 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 1059 1.1 christos 1060 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 1061 1.1 christos 1062 1.1 christos ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 1063 1.1 christos 1064 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 1065 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 1066 1.1 christos 1067 1.1 christos .inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 1068 1.1 christos 1069 1.1 christos pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 1070 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 1071 1.1 christos .L128_enc_blocks_more_than_6: //blocks left > 6 1072 1.1 christos 1073 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-6 block - store result 1074 1.1 christos 1075 1.1 christos rev64 v8.16b, v9.16b //GHASH final-6 block 1076 1.1 christos ldr q9, [x0], #16 //AES final-5 block - load plaintext 1077 1.1 christos 1078 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 1079 1.1 christos 1080 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 1081 1.1 christos 1082 1.1 christos .inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 1083 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 1084 1.1 christos 1085 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 1086 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 1087 1.1 christos 1088 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 1089 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 1090 1.1 christos 1091 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 1092 1.1 christos 1093 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 1094 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 1095 1.1 christos .L128_enc_blocks_more_than_5: //blocks left > 5 1096 1.1 christos 1097 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-5 block - store result 1098 1.1 christos 1099 1.1 christos rev64 v8.16b, v9.16b //GHASH final-5 block 1100 1.1 christos 1101 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 1102 1.1 christos 1103 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 1104 1.1 christos ldr q9, [x0], #16 //AES final-4 block - load plaintext 1105 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 1106 1.1 christos 1107 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 1108 1.1 christos 1109 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 1110 1.1 christos 1111 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 1112 1.1 christos 1113 1.1 christos .inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 1114 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 1115 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 1116 1.1 christos 1117 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 1118 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 1119 1.1 christos 1120 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 1121 1.1 christos .L128_enc_blocks_more_than_4: //blocks left > 4 1122 1.1 christos 1123 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-4 block - store result 1124 1.1 christos 1125 1.1 christos rev64 v8.16b, v9.16b //GHASH final-4 block 1126 1.1 christos 1127 1.1 christos ldr q9, [x0], #16 //AES final-3 block - load plaintext 1128 1.1 christos 1129 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 1130 1.1 christos 1131 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 1132 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 1133 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 1134 1.1 christos 1135 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 1136 1.1 christos 1137 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 1138 1.1 christos 1139 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 1140 1.1 christos pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 1141 1.1 christos 1142 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 1143 1.1 christos 1144 1.1 christos .inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 1145 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 1146 1.1 christos .L128_enc_blocks_more_than_3: //blocks left > 3 1147 1.1 christos 1148 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-3 block - store result 1149 1.1 christos 1150 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 1151 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 1152 1.1 christos 1153 1.1 christos rev64 v8.16b, v9.16b //GHASH final-3 block 1154 1.1 christos 1155 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 1156 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 1157 1.1 christos 1158 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 1159 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 1160 1.1 christos pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 1161 1.1 christos 1162 1.1 christos ldr q9, [x0], #16 //AES final-2 block - load plaintext 1163 1.1 christos 1164 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 1165 1.1 christos 1166 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 1167 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 1168 1.1 christos 1169 1.1 christos .inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 1170 1.1 christos 1171 1.1 christos pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 1172 1.1 christos pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 1173 1.1 christos 1174 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 1175 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 1176 1.1 christos .L128_enc_blocks_more_than_2: //blocks left > 2 1177 1.1 christos 1178 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-2 block - store result 1179 1.1 christos 1180 1.1 christos rev64 v8.16b, v9.16b //GHASH final-2 block 1181 1.1 christos 1182 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 1183 1.1 christos 1184 1.1 christos ldr q9, [x0], #16 //AES final-1 block - load plaintext 1185 1.1 christos 1186 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 1187 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 1188 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 1189 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 1190 1.1 christos 1191 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 1192 1.1 christos .inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 1193 1.1 christos 1194 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 1195 1.1 christos 1196 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 1197 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 1198 1.1 christos 1199 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 1200 1.1 christos 1201 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 1202 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 1203 1.1 christos .L128_enc_blocks_more_than_1: //blocks left > 1 1204 1.1 christos 1205 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-1 block - store result 1206 1.1 christos 1207 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 1208 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 1209 1.1 christos rev64 v8.16b, v9.16b //GHASH final-1 block 1210 1.1 christos ldr q9, [x0], #16 //AES final block - load plaintext 1211 1.1 christos 1212 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 1213 1.1 christos 1214 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 1215 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 1216 1.1 christos .inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result 1217 1.1 christos 1218 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 1219 1.1 christos 1220 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 1221 1.1 christos 1222 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 1223 1.1 christos 1224 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 1225 1.1 christos 1226 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 1227 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 1228 1.1 christos 1229 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 1230 1.1 christos 1231 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 1232 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 1233 1.1 christos .L128_enc_blocks_less_than_1: //blocks left <= 1 1234 1.1 christos 1235 1.1 christos rev32 v30.16b, v30.16b 1236 1.1 christos str q30, [x16] //store the updated counter 1237 1.1 christos and x1, x1, #127 //bit_length %= 128 1238 1.1 christos 1239 1.1 christos sub x1, x1, #128 //bit_length -= 128 1240 1.1 christos 1241 1.1 christos neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 1242 1.1 christos 1243 1.1 christos mvn x6, xzr //temp0_x = 0xffffffffffffffff 1244 1.1 christos ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 1245 1.1 christos and x1, x1, #127 //bit_length %= 128 1246 1.1 christos 1247 1.1 christos lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 1248 1.1 christos mvn x7, xzr //temp1_x = 0xffffffffffffffff 1249 1.1 christos cmp x1, #64 1250 1.1 christos 1251 1.1 christos csel x13, x7, x6, lt 1252 1.1 christos csel x14, x6, xzr, lt 1253 1.1 christos 1254 1.1 christos mov v0.d[1], x14 1255 1.1 christos mov v0.d[0], x13 //ctr0b is mask for last block 1256 1.1 christos 1257 1.1 christos and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 1258 1.1 christos 1259 1.1 christos rev64 v8.16b, v9.16b //GHASH final block 1260 1.1 christos 1261 1.1 christos bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 1262 1.1 christos st1 { v9.16b}, [x2] //store all 16B 1263 1.1 christos 1264 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 1265 1.1 christos 1266 1.1 christos ins v16.d[0], v8.d[1] //GHASH final block - mid 1267 1.1 christos 1268 1.1 christos eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 1269 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 1270 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 1271 1.1 christos 1272 1.1 christos pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 1273 1.1 christos 1274 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 1275 1.1 christos eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 1276 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 1277 1.1 christos 1278 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 1279 1.1 christos 1280 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final block - high 1281 1.1 christos 1282 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final block - low 1283 1.1 christos 1284 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 1285 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 1286 1.1 christos 1287 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 1288 1.1 christos 1289 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 1290 1.1 christos 1291 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 1292 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 1293 1.1 christos 1294 1.1 christos .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 1295 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 1296 1.1 christos rev64 v19.16b, v19.16b 1297 1.1 christos st1 { v19.16b }, [x3] 1298 1.1 christos mov x0, x9 1299 1.1 christos 1300 1.1 christos ldp d10, d11, [sp, #16] 1301 1.1 christos ldp d12, d13, [sp, #32] 1302 1.1 christos ldp d14, d15, [sp, #48] 1303 1.1 christos ldp d8, d9, [sp], #80 1304 1.1 christos ret 1305 1.1 christos 1306 1.1 christos .L128_enc_ret: 1307 1.1 christos mov w0, #0x0 1308 1.1 christos ret 1309 1.1 christos .size unroll8_eor3_aes_gcm_enc_128_kernel,.-unroll8_eor3_aes_gcm_enc_128_kernel 1310 1.1 christos .globl unroll8_eor3_aes_gcm_dec_128_kernel 1311 1.1 christos .type unroll8_eor3_aes_gcm_dec_128_kernel,%function 1312 1.1 christos .align 4 1313 1.1 christos unroll8_eor3_aes_gcm_dec_128_kernel: 1314 1.1 christos AARCH64_VALID_CALL_TARGET 1315 1.1 christos cbz x1, .L128_dec_ret 1316 1.1 christos stp d8, d9, [sp, #-80]! 1317 1.1 christos lsr x9, x1, #3 1318 1.1 christos mov x16, x4 1319 1.1 christos mov x8, x5 1320 1.1 christos stp d10, d11, [sp, #16] 1321 1.1 christos stp d12, d13, [sp, #32] 1322 1.1 christos stp d14, d15, [sp, #48] 1323 1.1 christos mov x5, #0xc200000000000000 1324 1.1 christos stp x5, xzr, [sp, #64] 1325 1.1 christos add x10, sp, #64 1326 1.1 christos 1327 1.1 christos mov x5, x9 1328 1.1 christos ld1 { v0.16b}, [x16] //CTR block 0 1329 1.1 christos 1330 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 1331 1.1 christos sub x5, x5, #1 //byte_len - 1 1332 1.1 christos 1333 1.1 christos mov x15, #0x100000000 //set up counter increment 1334 1.1 christos movi v31.16b, #0x0 1335 1.1 christos mov v31.d[1], x15 1336 1.1 christos ld1 { v19.16b}, [x3] 1337 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 1338 1.1 christos rev64 v19.16b, v19.16b 1339 1.1 christos 1340 1.1 christos rev32 v30.16b, v0.16b //set up reversed counter 1341 1.1 christos 1342 1.1 christos aese v0.16b, v26.16b 1343 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 0 1344 1.1 christos 1345 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 0 1346 1.1 christos 1347 1.1 christos rev32 v1.16b, v30.16b //CTR block 1 1348 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 1 1349 1.1 christos 1350 1.1 christos and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 1351 1.1 christos 1352 1.1 christos rev32 v2.16b, v30.16b //CTR block 2 1353 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 2 1354 1.1 christos aese v1.16b, v26.16b 1355 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 0 1356 1.1 christos 1357 1.1 christos rev32 v3.16b, v30.16b //CTR block 3 1358 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 3 1359 1.1 christos 1360 1.1 christos aese v0.16b, v27.16b 1361 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 1 1362 1.1 christos aese v1.16b, v27.16b 1363 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 1 1364 1.1 christos 1365 1.1 christos rev32 v4.16b, v30.16b //CTR block 4 1366 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 4 1367 1.1 christos 1368 1.1 christos rev32 v5.16b, v30.16b //CTR block 5 1369 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 5 1370 1.1 christos 1371 1.1 christos aese v2.16b, v26.16b 1372 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 0 1373 1.1 christos 1374 1.1 christos rev32 v6.16b, v30.16b //CTR block 6 1375 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 6 1376 1.1 christos aese v5.16b, v26.16b 1377 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 0 1378 1.1 christos 1379 1.1 christos aese v3.16b, v26.16b 1380 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 0 1381 1.1 christos aese v4.16b, v26.16b 1382 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 0 1383 1.1 christos 1384 1.1 christos rev32 v7.16b, v30.16b //CTR block 7 1385 1.1 christos 1386 1.1 christos aese v6.16b, v26.16b 1387 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 0 1388 1.1 christos aese v2.16b, v27.16b 1389 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 1 1390 1.1 christos 1391 1.1 christos aese v7.16b, v26.16b 1392 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 0 1393 1.1 christos 1394 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 1395 1.1 christos 1396 1.1 christos aese v6.16b, v27.16b 1397 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 1 1398 1.1 christos aese v5.16b, v27.16b 1399 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 1 1400 1.1 christos 1401 1.1 christos aese v4.16b, v27.16b 1402 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 1 1403 1.1 christos aese v7.16b, v27.16b 1404 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 1 1405 1.1 christos 1406 1.1 christos aese v7.16b, v28.16b 1407 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 2 1408 1.1 christos aese v0.16b, v28.16b 1409 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 2 1410 1.1 christos aese v3.16b, v27.16b 1411 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 1 1412 1.1 christos 1413 1.1 christos aese v6.16b, v28.16b 1414 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 2 1415 1.1 christos aese v2.16b, v28.16b 1416 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 2 1417 1.1 christos aese v5.16b, v28.16b 1418 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 2 1419 1.1 christos 1420 1.1 christos aese v4.16b, v28.16b 1421 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 2 1422 1.1 christos aese v3.16b, v28.16b 1423 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 2 1424 1.1 christos aese v1.16b, v28.16b 1425 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 2 1426 1.1 christos 1427 1.1 christos aese v6.16b, v26.16b 1428 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 3 1429 1.1 christos aese v2.16b, v26.16b 1430 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 3 1431 1.1 christos 1432 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 1433 1.1 christos aese v5.16b, v26.16b 1434 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 3 1435 1.1 christos 1436 1.1 christos aese v0.16b, v26.16b 1437 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 3 1438 1.1 christos aese v7.16b, v26.16b 1439 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 3 1440 1.1 christos 1441 1.1 christos aese v3.16b, v26.16b 1442 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 3 1443 1.1 christos aese v1.16b, v26.16b 1444 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 3 1445 1.1 christos 1446 1.1 christos aese v0.16b, v27.16b 1447 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 4 1448 1.1 christos aese v7.16b, v27.16b 1449 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 4 1450 1.1 christos aese v4.16b, v26.16b 1451 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 3 1452 1.1 christos 1453 1.1 christos aese v6.16b, v27.16b 1454 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 4 1455 1.1 christos aese v1.16b, v27.16b 1456 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 4 1457 1.1 christos aese v3.16b, v27.16b 1458 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 4 1459 1.1 christos 1460 1.1 christos aese v5.16b, v27.16b 1461 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 4 1462 1.1 christos aese v4.16b, v27.16b 1463 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 4 1464 1.1 christos aese v2.16b, v27.16b 1465 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 4 1466 1.1 christos 1467 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 1468 1.1 christos aese v2.16b, v28.16b 1469 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 5 1470 1.1 christos aese v3.16b, v28.16b 1471 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 5 1472 1.1 christos 1473 1.1 christos aese v6.16b, v28.16b 1474 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 5 1475 1.1 christos aese v1.16b, v28.16b 1476 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 5 1477 1.1 christos 1478 1.1 christos aese v7.16b, v28.16b 1479 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 5 1480 1.1 christos aese v5.16b, v28.16b 1481 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 5 1482 1.1 christos 1483 1.1 christos aese v4.16b, v28.16b 1484 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 5 1485 1.1 christos 1486 1.1 christos aese v3.16b, v26.16b 1487 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 6 1488 1.1 christos aese v2.16b, v26.16b 1489 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 6 1490 1.1 christos aese v0.16b, v28.16b 1491 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 5 1492 1.1 christos 1493 1.1 christos aese v5.16b, v26.16b 1494 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 6 1495 1.1 christos aese v4.16b, v26.16b 1496 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 6 1497 1.1 christos aese v1.16b, v26.16b 1498 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 6 1499 1.1 christos 1500 1.1 christos aese v0.16b, v26.16b 1501 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 6 1502 1.1 christos aese v7.16b, v26.16b 1503 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 6 1504 1.1 christos aese v6.16b, v26.16b 1505 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 6 1506 1.1 christos 1507 1.1 christos aese v3.16b, v27.16b 1508 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 7 1509 1.1 christos aese v4.16b, v27.16b 1510 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 7 1511 1.1 christos aese v1.16b, v27.16b 1512 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 7 1513 1.1 christos 1514 1.1 christos aese v7.16b, v27.16b 1515 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 7 1516 1.1 christos aese v5.16b, v27.16b 1517 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 7 1518 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 1519 1.1 christos 1520 1.1 christos aese v6.16b, v27.16b 1521 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 7 1522 1.1 christos aese v2.16b, v27.16b 1523 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 7 1524 1.1 christos aese v0.16b, v27.16b 1525 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 7 1526 1.1 christos 1527 1.1 christos add x5, x5, x0 1528 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 7 1529 1.1 christos 1530 1.1 christos aese v6.16b, v28.16b 1531 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 8 1532 1.1 christos aese v0.16b, v28.16b 1533 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 8 1534 1.1 christos 1535 1.1 christos aese v1.16b, v28.16b 1536 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 8 1537 1.1 christos aese v7.16b, v28.16b 1538 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 8 1539 1.1 christos aese v3.16b, v28.16b 1540 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 8 1541 1.1 christos 1542 1.1 christos aese v5.16b, v28.16b 1543 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 8 1544 1.1 christos aese v2.16b, v28.16b 1545 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 8 1546 1.1 christos aese v4.16b, v28.16b 1547 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 8 1548 1.1 christos 1549 1.1 christos aese v0.16b, v26.16b //AES block 0 - round 9 1550 1.1 christos aese v1.16b, v26.16b //AES block 1 - round 9 1551 1.1 christos aese v6.16b, v26.16b //AES block 6 - round 9 1552 1.1 christos 1553 1.1 christos ldr q27, [x8, #160] //load rk10 1554 1.1 christos aese v4.16b, v26.16b //AES block 4 - round 9 1555 1.1 christos aese v3.16b, v26.16b //AES block 3 - round 9 1556 1.1 christos 1557 1.1 christos aese v2.16b, v26.16b //AES block 2 - round 9 1558 1.1 christos aese v5.16b, v26.16b //AES block 5 - round 9 1559 1.1 christos aese v7.16b, v26.16b //AES block 7 - round 9 1560 1.1 christos 1561 1.1 christos add x4, x0, x1, lsr #3 //end_input_ptr 1562 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 1563 1.1 christos b.ge .L128_dec_tail //handle tail 1564 1.1 christos 1565 1.1 christos ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext 1566 1.1 christos 1567 1.1 christos .inst 0xce006d00 //eor3 v0.16b, v8.16b, v0.16b, v27.16b //AES block 0 - result 1568 1.1 christos .inst 0xce016d21 //eor3 v1.16b, v9.16b, v1.16b, v27.16b //AES block 1 - result 1569 1.1 christos stp q0, q1, [x2], #32 //AES block 0, 1 - store result 1570 1.1 christos 1571 1.1 christos rev32 v0.16b, v30.16b //CTR block 8 1572 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8 1573 1.1 christos ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext 1574 1.1 christos 1575 1.1 christos ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext 1576 1.1 christos 1577 1.1 christos rev32 v1.16b, v30.16b //CTR block 9 1578 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 9 1579 1.1 christos ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext 1580 1.1 christos 1581 1.1 christos .inst 0xce036d63 //eor3 v3.16b, v11.16b, v3.16b, v27.16b //AES block 3 - result 1582 1.1 christos .inst 0xce026d42 //eor3 v2.16b, v10.16b, v2.16b, v27.16b //AES block 2 - result 1583 1.1 christos stp q2, q3, [x2], #32 //AES block 2, 3 - store result 1584 1.1 christos 1585 1.1 christos rev32 v2.16b, v30.16b //CTR block 10 1586 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 10 1587 1.1 christos 1588 1.1 christos .inst 0xce066dc6 //eor3 v6.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result 1589 1.1 christos 1590 1.1 christos rev32 v3.16b, v30.16b //CTR block 11 1591 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 11 1592 1.1 christos 1593 1.1 christos .inst 0xce046d84 //eor3 v4.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result 1594 1.1 christos .inst 0xce056da5 //eor3 v5.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result 1595 1.1 christos stp q4, q5, [x2], #32 //AES block 4, 5 - store result 1596 1.1 christos 1597 1.1 christos .inst 0xce076de7 //eor3 v7.16b, v15.16b, v7.16b, v27.16b //AES block 7 - result 1598 1.1 christos stp q6, q7, [x2], #32 //AES block 6, 7 - store result 1599 1.1 christos rev32 v4.16b, v30.16b //CTR block 12 1600 1.1 christos 1601 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 1602 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 12 1603 1.1 christos b.ge .L128_dec_prepretail //do prepretail 1604 1.1 christos 1605 1.1 christos .L128_dec_main_loop: //main loop start 1606 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 1607 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 1608 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 1609 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 1610 1.1 christos 1611 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 1612 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 1613 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 1614 1.1 christos 1615 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 1616 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 1617 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 1618 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 1619 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 1620 1.1 christos 1621 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 1622 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 1623 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 1624 1.1 christos 1625 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 1626 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 1627 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 1628 1.1 christos 1629 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 1630 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 1631 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 1632 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 1633 1.1 christos 1634 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 1635 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 1636 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 1637 1.1 christos 1638 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 1639 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1640 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 1641 1.1 christos 1642 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 1643 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 1644 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1645 1.1 christos 1646 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 1647 1.1 christos aese v4.16b, v26.16b 1648 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 1649 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 1650 1.1 christos 1651 1.1 christos aese v6.16b, v26.16b 1652 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 1653 1.1 christos aese v5.16b, v26.16b 1654 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 1655 1.1 christos aese v7.16b, v26.16b 1656 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 1657 1.1 christos 1658 1.1 christos aese v3.16b, v26.16b 1659 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 1660 1.1 christos aese v2.16b, v26.16b 1661 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 1662 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 1663 1.1 christos 1664 1.1 christos aese v1.16b, v26.16b 1665 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 1666 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 1667 1.1 christos aese v0.16b, v26.16b 1668 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 1669 1.1 christos 1670 1.1 christos aese v2.16b, v27.16b 1671 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 1672 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 1673 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 1674 1.1 christos 1675 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 1676 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 1677 1.1 christos aese v7.16b, v27.16b 1678 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 1679 1.1 christos 1680 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 1681 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 1682 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 1683 1.1 christos 1684 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 1685 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 1686 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 1687 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 1688 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 1689 1.1 christos aese v6.16b, v27.16b 1690 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 1691 1.1 christos 1692 1.1 christos aese v4.16b, v27.16b 1693 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 1694 1.1 christos aese v5.16b, v27.16b 1695 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 1696 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 1697 1.1 christos 1698 1.1 christos aese v3.16b, v27.16b 1699 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 1700 1.1 christos aese v0.16b, v27.16b 1701 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 1702 1.1 christos aese v1.16b, v27.16b 1703 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 1704 1.1 christos 1705 1.1 christos aese v7.16b, v28.16b 1706 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 1707 1.1 christos aese v2.16b, v28.16b 1708 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 1709 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 1710 1.1 christos 1711 1.1 christos aese v4.16b, v28.16b 1712 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 1713 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 1714 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 1715 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 1716 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 1717 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 1718 1.1 christos 1719 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 1720 1.1 christos aese v1.16b, v28.16b 1721 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 1722 1.1 christos aese v3.16b, v28.16b 1723 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 1724 1.1 christos 1725 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 1726 1.1 christos aese v5.16b, v28.16b 1727 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 1728 1.1 christos aese v0.16b, v28.16b 1729 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 1730 1.1 christos 1731 1.1 christos aese v6.16b, v28.16b 1732 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 1733 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 1734 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 1735 1.1 christos 1736 1.1 christos aese v7.16b, v26.16b 1737 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 1738 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 1739 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 1740 1.1 christos 1741 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 1742 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 1743 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 1744 1.1 christos 1745 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 1746 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 1747 1.1 christos aese v2.16b, v26.16b 1748 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 1749 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 1750 1.1 christos 1751 1.1 christos aese v4.16b, v26.16b 1752 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 1753 1.1 christos aese v3.16b, v26.16b 1754 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 1755 1.1 christos aese v1.16b, v26.16b 1756 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 1757 1.1 christos 1758 1.1 christos aese v0.16b, v26.16b 1759 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 1760 1.1 christos aese v6.16b, v26.16b 1761 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 1762 1.1 christos aese v5.16b, v26.16b 1763 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 1764 1.1 christos 1765 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 1766 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 1767 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 1768 1.1 christos 1769 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 1770 1.1 christos aese v0.16b, v27.16b 1771 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 1772 1.1 christos aese v7.16b, v27.16b 1773 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 1774 1.1 christos 1775 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 1776 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 1777 1.1 christos aese v3.16b, v27.16b 1778 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 1779 1.1 christos 1780 1.1 christos aese v1.16b, v27.16b 1781 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 1782 1.1 christos aese v5.16b, v27.16b 1783 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 1784 1.1 christos aese v6.16b, v27.16b 1785 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 1786 1.1 christos 1787 1.1 christos aese v2.16b, v27.16b 1788 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 1789 1.1 christos aese v4.16b, v27.16b 1790 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 1791 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 1792 1.1 christos 1793 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 1794 1.1 christos aese v0.16b, v28.16b 1795 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 1796 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 1797 1.1 christos 1798 1.1 christos aese v2.16b, v28.16b 1799 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 1800 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 1801 1.1 christos aese v1.16b, v28.16b 1802 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 1803 1.1 christos 1804 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 1805 1.1 christos aese v6.16b, v28.16b 1806 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 1807 1.1 christos aese v7.16b, v28.16b 1808 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 1809 1.1 christos 1810 1.1 christos aese v3.16b, v28.16b 1811 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 1812 1.1 christos aese v5.16b, v28.16b 1813 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 1814 1.1 christos aese v4.16b, v28.16b 1815 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 1816 1.1 christos 1817 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 1818 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 1819 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 1820 1.1 christos 1821 1.1 christos aese v3.16b, v26.16b 1822 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 1823 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 1824 1.1 christos aese v7.16b, v26.16b 1825 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 1826 1.1 christos 1827 1.1 christos aese v1.16b, v26.16b 1828 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 1829 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 1830 1.1 christos aese v6.16b, v26.16b 1831 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 1832 1.1 christos 1833 1.1 christos aese v2.16b, v26.16b 1834 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 1835 1.1 christos aese v5.16b, v26.16b 1836 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 1837 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 1838 1.1 christos 1839 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 1840 1.1 christos aese v0.16b, v26.16b 1841 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 1842 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 1843 1.1 christos 1844 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 1845 1.1 christos aese v4.16b, v26.16b 1846 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 1847 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 1848 1.1 christos 1849 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 1850 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 1851 1.1 christos aese v5.16b, v27.16b 1852 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 1853 1.1 christos 1854 1.1 christos rev32 v20.16b, v30.16b //CTR block 8k+16 1855 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 1856 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+16 1857 1.1 christos 1858 1.1 christos aese v6.16b, v27.16b 1859 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 1860 1.1 christos aese v3.16b, v27.16b 1861 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 1862 1.1 christos aese v7.16b, v27.16b 1863 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 1864 1.1 christos 1865 1.1 christos aese v2.16b, v27.16b 1866 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 1867 1.1 christos aese v1.16b, v27.16b 1868 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 1869 1.1 christos rev32 v22.16b, v30.16b //CTR block 8k+17 1870 1.1 christos 1871 1.1 christos aese v4.16b, v27.16b 1872 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 1873 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 1874 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 1875 1.1 christos 1876 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 1877 1.1 christos aese v0.16b, v27.16b 1878 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 1879 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+17 1880 1.1 christos 1881 1.1 christos aese v5.16b, v28.16b 1882 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 1883 1.1 christos aese v1.16b, v28.16b 1884 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 1885 1.1 christos ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext 1886 1.1 christos 1887 1.1 christos ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext 1888 1.1 christos aese v0.16b, v28.16b 1889 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 1890 1.1 christos rev32 v23.16b, v30.16b //CTR block 8k+18 1891 1.1 christos 1892 1.1 christos ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext 1893 1.1 christos aese v4.16b, v28.16b 1894 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 1895 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 1896 1.1 christos 1897 1.1 christos ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext 1898 1.1 christos aese v3.16b, v28.16b 1899 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 1900 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+18 1901 1.1 christos 1902 1.1 christos aese v7.16b, v28.16b 1903 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 1904 1.1 christos aese v2.16b, v28.16b 1905 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 1906 1.1 christos aese v6.16b, v28.16b 1907 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 1908 1.1 christos 1909 1.1 christos aese v0.16b, v26.16b //AES block 8k+8 - round 9 1910 1.1 christos aese v1.16b, v26.16b //AES block 8k+9 - round 9 1911 1.1 christos ldr q27, [x8, #160] //load rk10 1912 1.1 christos 1913 1.1 christos aese v6.16b, v26.16b //AES block 8k+14 - round 9 1914 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 1915 1.1 christos aese v2.16b, v26.16b //AES block 8k+10 - round 9 1916 1.1 christos 1917 1.1 christos aese v7.16b, v26.16b //AES block 8k+15 - round 9 1918 1.1 christos aese v4.16b, v26.16b //AES block 8k+12 - round 9 1919 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 1920 1.1 christos 1921 1.1 christos rev32 v25.16b, v30.16b //CTR block 8k+19 1922 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+19 1923 1.1 christos 1924 1.1 christos aese v3.16b, v26.16b //AES block 8k+11 - round 9 1925 1.1 christos aese v5.16b, v26.16b //AES block 8k+13 - round 9 1926 1.1 christos .inst 0xce016d21 //eor3 v1.16b, v9.16b, v1.16b, v27.16b //AES block 8k+9 - result 1927 1.1 christos 1928 1.1 christos .inst 0xce006d00 //eor3 v0.16b, v8.16b, v0.16b, v27.16b //AES block 8k+8 - result 1929 1.1 christos .inst 0xce076de7 //eor3 v7.16b, v15.16b, v7.16b, v27.16b //AES block 8k+15 - result 1930 1.1 christos .inst 0xce066dc6 //eor3 v6.16b, v14.16b, v6.16b, v27.16b //AES block 8k+14 - result 1931 1.1 christos 1932 1.1 christos .inst 0xce026d42 //eor3 v2.16b, v10.16b, v2.16b, v27.16b //AES block 8k+10 - result 1933 1.1 christos stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result 1934 1.1 christos mov v1.16b, v22.16b //CTR block 8k+17 1935 1.1 christos 1936 1.1 christos .inst 0xce046d84 //eor3 v4.16b, v12.16b, v4.16b, v27.16b //AES block 8k+12 - result 1937 1.1 christos .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 1938 1.1 christos mov v0.16b, v20.16b //CTR block 8k+16 1939 1.1 christos 1940 1.1 christos .inst 0xce036d63 //eor3 v3.16b, v11.16b, v3.16b, v27.16b //AES block 8k+11 - result 1941 1.1 christos cmp x0, x5 //.LOOP CONTROL 1942 1.1 christos stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result 1943 1.1 christos 1944 1.1 christos .inst 0xce056da5 //eor3 v5.16b, v13.16b, v5.16b, v27.16b //AES block 8k+13 - result 1945 1.1 christos mov v2.16b, v23.16b //CTR block 8k+18 1946 1.1 christos 1947 1.1 christos stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result 1948 1.1 christos rev32 v4.16b, v30.16b //CTR block 8k+20 1949 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+20 1950 1.1 christos 1951 1.1 christos stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result 1952 1.1 christos mov v3.16b, v25.16b //CTR block 8k+19 1953 1.1 christos b.lt .L128_dec_main_loop 1954 1.1 christos 1955 1.1 christos .L128_dec_prepretail: //PREPRETAIL 1956 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 1957 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 1958 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 1959 1.1 christos 1960 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 1961 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 1962 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 1963 1.1 christos 1964 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 1965 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 1966 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 1967 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 1968 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 1969 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 1970 1.1 christos 1971 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 1972 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 1973 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 1974 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 1975 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 1976 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 1977 1.1 christos 1978 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 1979 1.1 christos 1980 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 1981 1.1 christos 1982 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 1983 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 1984 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 1985 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 1986 1.1 christos 1987 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 1988 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 1989 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 1990 1.1 christos 1991 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1992 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1993 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 1994 1.1 christos 1995 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 1996 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 1997 1.1 christos aese v0.16b, v26.16b 1998 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 1999 1.1 christos 2000 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 2001 1.1 christos aese v4.16b, v26.16b 2002 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 2003 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 2004 1.1 christos 2005 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 2006 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 2007 1.1 christos aese v3.16b, v26.16b 2008 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 2009 1.1 christos 2010 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 2011 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 2012 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 2013 1.1 christos 2014 1.1 christos aese v2.16b, v26.16b 2015 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 2016 1.1 christos aese v1.16b, v26.16b 2017 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 2018 1.1 christos aese v5.16b, v26.16b 2019 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 2020 1.1 christos 2021 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 2022 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 2023 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 2024 1.1 christos 2025 1.1 christos aese v2.16b, v27.16b 2026 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 2027 1.1 christos aese v7.16b, v26.16b 2028 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 2029 1.1 christos aese v6.16b, v26.16b 2030 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 2031 1.1 christos 2032 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 2033 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 2034 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 2035 1.1 christos 2036 1.1 christos aese v6.16b, v27.16b 2037 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 2038 1.1 christos aese v4.16b, v27.16b 2039 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 2040 1.1 christos aese v5.16b, v27.16b 2041 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 2042 1.1 christos 2043 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 2044 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 2045 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 2046 1.1 christos 2047 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 2048 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 2049 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 2050 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 2051 1.1 christos aese v1.16b, v27.16b 2052 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 2053 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 2054 1.1 christos 2055 1.1 christos aese v3.16b, v27.16b 2056 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 2057 1.1 christos aese v7.16b, v27.16b 2058 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 2059 1.1 christos aese v0.16b, v27.16b 2060 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 2061 1.1 christos 2062 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 2063 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 2064 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 2065 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 2066 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 2067 1.1 christos 2068 1.1 christos aese v0.16b, v28.16b 2069 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 2070 1.1 christos aese v6.16b, v28.16b 2071 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 2072 1.1 christos aese v2.16b, v28.16b 2073 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 2074 1.1 christos 2075 1.1 christos aese v4.16b, v28.16b 2076 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 2077 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 2078 1.1 christos aese v7.16b, v28.16b 2079 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 2080 1.1 christos 2081 1.1 christos aese v1.16b, v28.16b 2082 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 2083 1.1 christos aese v5.16b, v28.16b 2084 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 2085 1.1 christos aese v3.16b, v28.16b 2086 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 2087 1.1 christos 2088 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 2089 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 2090 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 2091 1.1 christos 2092 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 2093 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 2094 1.1 christos aese v6.16b, v26.16b 2095 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 2096 1.1 christos 2097 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 2098 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 2099 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 2100 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 2101 1.1 christos 2102 1.1 christos aese v2.16b, v26.16b 2103 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 2104 1.1 christos aese v0.16b, v26.16b 2105 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 2106 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 2107 1.1 christos 2108 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 2109 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 2110 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 2111 1.1 christos 2112 1.1 christos aese v4.16b, v26.16b 2113 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 2114 1.1 christos aese v3.16b, v26.16b 2115 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 2116 1.1 christos aese v7.16b, v26.16b 2117 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 2118 1.1 christos 2119 1.1 christos aese v1.16b, v26.16b 2120 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 2121 1.1 christos aese v5.16b, v26.16b 2122 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 2123 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 2124 1.1 christos 2125 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 2126 1.1 christos aese v0.16b, v27.16b 2127 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 2128 1.1 christos aese v2.16b, v27.16b 2129 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 2130 1.1 christos 2131 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 2132 1.1 christos aese v5.16b, v27.16b 2133 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 2134 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 2135 1.1 christos 2136 1.1 christos aese v1.16b, v27.16b 2137 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 2138 1.1 christos aese v6.16b, v27.16b 2139 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 2140 1.1 christos aese v4.16b, v27.16b 2141 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 2142 1.1 christos 2143 1.1 christos aese v7.16b, v27.16b 2144 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 2145 1.1 christos aese v3.16b, v27.16b 2146 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 2147 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 2148 1.1 christos 2149 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 2150 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 2151 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 2152 1.1 christos 2153 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 2154 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 2155 1.1 christos aese v6.16b, v28.16b 2156 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 2157 1.1 christos 2158 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 2159 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 2160 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 2161 1.1 christos 2162 1.1 christos aese v0.16b, v28.16b 2163 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 2164 1.1 christos aese v2.16b, v28.16b 2165 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 2166 1.1 christos aese v4.16b, v28.16b 2167 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 2168 1.1 christos 2169 1.1 christos aese v3.16b, v28.16b 2170 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 2171 1.1 christos aese v1.16b, v28.16b 2172 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 2173 1.1 christos aese v5.16b, v28.16b 2174 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 2175 1.1 christos 2176 1.1 christos aese v7.16b, v28.16b 2177 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 2178 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 2179 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 2180 1.1 christos 2181 1.1 christos aese v4.16b, v26.16b 2182 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 2183 1.1 christos aese v1.16b, v26.16b 2184 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 2185 1.1 christos aese v2.16b, v26.16b 2186 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 2187 1.1 christos 2188 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 2189 1.1 christos aese v5.16b, v26.16b 2190 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 2191 1.1 christos aese v0.16b, v26.16b 2192 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 2193 1.1 christos 2194 1.1 christos aese v3.16b, v26.16b 2195 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 2196 1.1 christos aese v6.16b, v26.16b 2197 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 2198 1.1 christos aese v7.16b, v26.16b 2199 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 2200 1.1 christos 2201 1.1 christos aese v4.16b, v27.16b 2202 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 2203 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 2204 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 2205 1.1 christos 2206 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 2207 1.1 christos aese v3.16b, v27.16b 2208 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 2209 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 2210 1.1 christos 2211 1.1 christos aese v5.16b, v27.16b 2212 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 2213 1.1 christos aese v6.16b, v27.16b 2214 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 2215 1.1 christos aese v0.16b, v27.16b 2216 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 2217 1.1 christos 2218 1.1 christos aese v7.16b, v27.16b 2219 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 2220 1.1 christos aese v1.16b, v27.16b 2221 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 2222 1.1 christos aese v2.16b, v27.16b 2223 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 2224 1.1 christos 2225 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 2226 1.1 christos ldr q27, [x8, #160] //load rk10 2227 1.1 christos 2228 1.1 christos aese v3.16b, v28.16b 2229 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 2230 1.1 christos aese v0.16b, v28.16b 2231 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 2232 1.1 christos 2233 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 2234 1.1 christos aese v6.16b, v28.16b 2235 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 2236 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 2237 1.1 christos 2238 1.1 christos aese v2.16b, v28.16b 2239 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 2240 1.1 christos aese v1.16b, v28.16b 2241 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 2242 1.1 christos aese v7.16b, v28.16b 2243 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 2244 1.1 christos 2245 1.1 christos aese v6.16b, v26.16b //AES block 8k+14 - round 9 2246 1.1 christos aese v5.16b, v28.16b 2247 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 2248 1.1 christos aese v4.16b, v28.16b 2249 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 2250 1.1 christos 2251 1.1 christos .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 2252 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 2253 1.1 christos aese v2.16b, v26.16b //AES block 8k+10 - round 9 2254 1.1 christos 2255 1.1 christos aese v3.16b, v26.16b //AES block 8k+11 - round 9 2256 1.1 christos aese v5.16b, v26.16b //AES block 8k+13 - round 9 2257 1.1 christos aese v0.16b, v26.16b //AES block 8k+8 - round 9 2258 1.1 christos 2259 1.1 christos aese v4.16b, v26.16b //AES block 8k+12 - round 9 2260 1.1 christos aese v1.16b, v26.16b //AES block 8k+9 - round 9 2261 1.1 christos aese v7.16b, v26.16b //AES block 8k+15 - round 9 2262 1.1 christos 2263 1.1 christos .L128_dec_tail: //TAIL 2264 1.1 christos 2265 1.1 christos mov v29.16b, v27.16b 2266 1.1 christos sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 2267 1.1 christos 2268 1.1 christos cmp x5, #112 2269 1.1 christos 2270 1.1 christos ldp q24, q25, [x3, #192] //load h8k | h7k 2271 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 2272 1.1 christos ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext 2273 1.1 christos 2274 1.1 christos ldp q20, q21, [x3, #128] //load h5l | h5h 2275 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 2276 1.1 christos ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 2277 1.1 christos 2278 1.1 christos ldp q22, q23, [x3, #160] //load h6l | h6h 2279 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 2280 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 2281 1.1 christos 2282 1.1 christos .inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result 2283 1.1 christos b.gt .L128_dec_blocks_more_than_7 2284 1.1 christos 2285 1.1 christos cmp x5, #96 2286 1.1 christos mov v7.16b, v6.16b 2287 1.1 christos movi v19.8b, #0 2288 1.1 christos 2289 1.1 christos movi v17.8b, #0 2290 1.1 christos mov v6.16b, v5.16b 2291 1.1 christos mov v5.16b, v4.16b 2292 1.1 christos 2293 1.1 christos mov v4.16b, v3.16b 2294 1.1 christos mov v3.16b, v2.16b 2295 1.1 christos mov v2.16b, v1.16b 2296 1.1 christos 2297 1.1 christos movi v18.8b, #0 2298 1.1 christos sub v30.4s, v30.4s, v31.4s 2299 1.1 christos b.gt .L128_dec_blocks_more_than_6 2300 1.1 christos 2301 1.1 christos cmp x5, #80 2302 1.1 christos sub v30.4s, v30.4s, v31.4s 2303 1.1 christos 2304 1.1 christos mov v7.16b, v6.16b 2305 1.1 christos mov v6.16b, v5.16b 2306 1.1 christos mov v5.16b, v4.16b 2307 1.1 christos 2308 1.1 christos mov v4.16b, v3.16b 2309 1.1 christos mov v3.16b, v1.16b 2310 1.1 christos b.gt .L128_dec_blocks_more_than_5 2311 1.1 christos 2312 1.1 christos cmp x5, #64 2313 1.1 christos 2314 1.1 christos mov v7.16b, v6.16b 2315 1.1 christos mov v6.16b, v5.16b 2316 1.1 christos mov v5.16b, v4.16b 2317 1.1 christos 2318 1.1 christos mov v4.16b, v1.16b 2319 1.1 christos sub v30.4s, v30.4s, v31.4s 2320 1.1 christos b.gt .L128_dec_blocks_more_than_4 2321 1.1 christos 2322 1.1 christos sub v30.4s, v30.4s, v31.4s 2323 1.1 christos mov v7.16b, v6.16b 2324 1.1 christos mov v6.16b, v5.16b 2325 1.1 christos 2326 1.1 christos mov v5.16b, v1.16b 2327 1.1 christos cmp x5, #48 2328 1.1 christos b.gt .L128_dec_blocks_more_than_3 2329 1.1 christos 2330 1.1 christos sub v30.4s, v30.4s, v31.4s 2331 1.1 christos mov v7.16b, v6.16b 2332 1.1 christos cmp x5, #32 2333 1.1 christos 2334 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 2335 1.1 christos mov v6.16b, v1.16b 2336 1.1 christos b.gt .L128_dec_blocks_more_than_2 2337 1.1 christos 2338 1.1 christos cmp x5, #16 2339 1.1 christos 2340 1.1 christos mov v7.16b, v1.16b 2341 1.1 christos sub v30.4s, v30.4s, v31.4s 2342 1.1 christos b.gt .L128_dec_blocks_more_than_1 2343 1.1 christos 2344 1.1 christos sub v30.4s, v30.4s, v31.4s 2345 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 2346 1.1 christos b .L128_dec_blocks_less_than_1 2347 1.1 christos .L128_dec_blocks_more_than_7: //blocks left > 7 2348 1.1 christos rev64 v8.16b, v9.16b //GHASH final-7 block 2349 1.1 christos 2350 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 2351 1.1 christos 2352 1.1 christos ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 2353 1.1 christos 2354 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 2355 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 2356 1.1 christos 2357 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 2358 1.1 christos ldr q9, [x0], #16 //AES final-6 block - load ciphertext 2359 1.1 christos 2360 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 2361 1.1 christos 2362 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 2363 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-7 block - store result 2364 1.1 christos .inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 2365 1.1 christos 2366 1.1 christos pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 2367 1.1 christos .L128_dec_blocks_more_than_6: //blocks left > 6 2368 1.1 christos 2369 1.1 christos rev64 v8.16b, v9.16b //GHASH final-6 block 2370 1.1 christos 2371 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 2372 1.1 christos 2373 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 2374 1.1 christos 2375 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 2376 1.1 christos 2377 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 2378 1.1 christos ldr q9, [x0], #16 //AES final-5 block - load ciphertext 2379 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 2380 1.1 christos 2381 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 2382 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-6 block - store result 2383 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 2384 1.1 christos 2385 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 2386 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 2387 1.1 christos 2388 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 2389 1.1 christos .inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 2390 1.1 christos .L128_dec_blocks_more_than_5: //blocks left > 5 2391 1.1 christos 2392 1.1 christos rev64 v8.16b, v9.16b //GHASH final-5 block 2393 1.1 christos 2394 1.1 christos ldr q9, [x0], #16 //AES final-4 block - load ciphertext 2395 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-5 block - store result 2396 1.1 christos 2397 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 2398 1.1 christos 2399 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 2400 1.1 christos 2401 1.1 christos .inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 2402 1.1 christos 2403 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 2404 1.1 christos 2405 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 2406 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 2407 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 2408 1.1 christos 2409 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 2410 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 2411 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 2412 1.1 christos 2413 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 2414 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 2415 1.1 christos .L128_dec_blocks_more_than_4: //blocks left > 4 2416 1.1 christos 2417 1.1 christos rev64 v8.16b, v9.16b //GHASH final-4 block 2418 1.1 christos 2419 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 2420 1.1 christos ldr q9, [x0], #16 //AES final-3 block - load ciphertext 2421 1.1 christos 2422 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 2423 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 2424 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 2425 1.1 christos 2426 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 2427 1.1 christos 2428 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 2429 1.1 christos 2430 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-4 block - store result 2431 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 2432 1.1 christos 2433 1.1 christos .inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 2434 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 2435 1.1 christos 2436 1.1 christos pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 2437 1.1 christos 2438 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 2439 1.1 christos .L128_dec_blocks_more_than_3: //blocks left > 3 2440 1.1 christos 2441 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-3 block - store result 2442 1.1 christos rev64 v8.16b, v9.16b //GHASH final-3 block 2443 1.1 christos 2444 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 2445 1.1 christos 2446 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 2447 1.1 christos 2448 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 2449 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 2450 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 2451 1.1 christos 2452 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 2453 1.1 christos 2454 1.1 christos ldr q9, [x0], #16 //AES final-2 block - load ciphertext 2455 1.1 christos 2456 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 2457 1.1 christos pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 2458 1.1 christos pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 2459 1.1 christos 2460 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 2461 1.1 christos .inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 2462 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 2463 1.1 christos 2464 1.1 christos pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 2465 1.1 christos 2466 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 2467 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 2468 1.1 christos .L128_dec_blocks_more_than_2: //blocks left > 2 2469 1.1 christos 2470 1.1 christos rev64 v8.16b, v9.16b //GHASH final-2 block 2471 1.1 christos 2472 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-2 block - store result 2473 1.1 christos 2474 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 2475 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 2476 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 2477 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 2478 1.1 christos 2479 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 2480 1.1 christos 2481 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 2482 1.1 christos 2483 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 2484 1.1 christos 2485 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 2486 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 2487 1.1 christos ldr q9, [x0], #16 //AES final-1 block - load ciphertext 2488 1.1 christos 2489 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 2490 1.1 christos 2491 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 2492 1.1 christos 2493 1.1 christos .inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 2494 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 2495 1.1 christos .L128_dec_blocks_more_than_1: //blocks left > 1 2496 1.1 christos 2497 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-1 block - store result 2498 1.1 christos rev64 v8.16b, v9.16b //GHASH final-1 block 2499 1.1 christos 2500 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 2501 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 2502 1.1 christos 2503 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 2504 1.1 christos 2505 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 2506 1.1 christos 2507 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 2508 1.1 christos 2509 1.1 christos ldr q9, [x0], #16 //AES final block - load ciphertext 2510 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 2511 1.1 christos 2512 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 2513 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 2514 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 2515 1.1 christos 2516 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 2517 1.1 christos .inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result 2518 1.1 christos 2519 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 2520 1.1 christos 2521 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 2522 1.1 christos 2523 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 2524 1.1 christos 2525 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 2526 1.1 christos .L128_dec_blocks_less_than_1: //blocks left <= 1 2527 1.1 christos 2528 1.1 christos and x1, x1, #127 //bit_length %= 128 2529 1.1 christos 2530 1.1 christos sub x1, x1, #128 //bit_length -= 128 2531 1.1 christos 2532 1.1 christos neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 2533 1.1 christos 2534 1.1 christos mvn x6, xzr //temp0_x = 0xffffffffffffffff 2535 1.1 christos and x1, x1, #127 //bit_length %= 128 2536 1.1 christos 2537 1.1 christos lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 2538 1.1 christos cmp x1, #64 2539 1.1 christos mvn x7, xzr //temp1_x = 0xffffffffffffffff 2540 1.1 christos 2541 1.1 christos csel x13, x7, x6, lt 2542 1.1 christos csel x14, x6, xzr, lt 2543 1.1 christos 2544 1.1 christos mov v0.d[1], x14 2545 1.1 christos mov v0.d[0], x13 //ctr0b is mask for last block 2546 1.1 christos 2547 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 2548 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 2549 1.1 christos ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 2550 1.1 christos 2551 1.1 christos and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 2552 1.1 christos 2553 1.1 christos rev64 v8.16b, v9.16b //GHASH final block 2554 1.1 christos 2555 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 2556 1.1 christos 2557 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 2558 1.1 christos ins v16.d[0], v8.d[1] //GHASH final block - mid 2559 1.1 christos 2560 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final block - high 2561 1.1 christos eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 2562 1.1 christos 2563 1.1 christos bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 2564 1.1 christos 2565 1.1 christos pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 2566 1.1 christos st1 { v12.16b}, [x2] //store all 16B 2567 1.1 christos 2568 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 2569 1.1 christos 2570 1.1 christos eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 2571 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 2572 1.1 christos 2573 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final block - low 2574 1.1 christos 2575 1.1 christos eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 2576 1.1 christos 2577 1.1 christos pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 2578 1.1 christos ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 2579 1.1 christos 2580 1.1 christos eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up 2581 1.1 christos 2582 1.1 christos .inst 0xce115652 //eor3 v18.16b, v18.16b, v17.16b, v21.16b //MODULO - fold into mid 2583 1.1 christos 2584 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 2585 1.1 christos ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 2586 1.1 christos 2587 1.1 christos .inst 0xce124673 //eor3 v19.16b, v19.16b, v18.16b, v17.16b //MODULO - fold into low 2588 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 2589 1.1 christos rev64 v19.16b, v19.16b 2590 1.1 christos st1 { v19.16b }, [x3] 2591 1.1 christos rev32 v30.16b, v30.16b 2592 1.1 christos 2593 1.1 christos str q30, [x16] //store the updated counter 2594 1.1 christos 2595 1.1 christos mov x0, x9 2596 1.1 christos 2597 1.1 christos ldp d10, d11, [sp, #16] 2598 1.1 christos ldp d12, d13, [sp, #32] 2599 1.1 christos ldp d14, d15, [sp, #48] 2600 1.1 christos ldp d8, d9, [sp], #80 2601 1.1 christos ret 2602 1.1 christos .L128_dec_ret: 2603 1.1 christos mov w0, #0x0 2604 1.1 christos ret 2605 1.1 christos .size unroll8_eor3_aes_gcm_dec_128_kernel,.-unroll8_eor3_aes_gcm_dec_128_kernel 2606 1.1 christos .globl unroll8_eor3_aes_gcm_enc_192_kernel 2607 1.1 christos .type unroll8_eor3_aes_gcm_enc_192_kernel,%function 2608 1.1 christos .align 4 2609 1.1 christos unroll8_eor3_aes_gcm_enc_192_kernel: 2610 1.1 christos AARCH64_VALID_CALL_TARGET 2611 1.1 christos cbz x1, .L192_enc_ret 2612 1.1 christos stp d8, d9, [sp, #-80]! 2613 1.1 christos lsr x9, x1, #3 2614 1.1 christos mov x16, x4 2615 1.1 christos mov x8, x5 2616 1.1 christos stp d10, d11, [sp, #16] 2617 1.1 christos stp d12, d13, [sp, #32] 2618 1.1 christos stp d14, d15, [sp, #48] 2619 1.1 christos mov x5, #0xc200000000000000 2620 1.1 christos stp x5, xzr, [sp, #64] 2621 1.1 christos add x10, sp, #64 2622 1.1 christos 2623 1.1 christos mov x5, x9 2624 1.1 christos ld1 { v0.16b}, [x16] //CTR block 0 2625 1.1 christos 2626 1.1 christos mov x15, #0x100000000 //set up counter increment 2627 1.1 christos movi v31.16b, #0x0 2628 1.1 christos mov v31.d[1], x15 2629 1.1 christos 2630 1.1 christos rev32 v30.16b, v0.16b //set up reversed counter 2631 1.1 christos 2632 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 0 2633 1.1 christos 2634 1.1 christos rev32 v1.16b, v30.16b //CTR block 1 2635 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 1 2636 1.1 christos 2637 1.1 christos rev32 v2.16b, v30.16b //CTR block 2 2638 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 2 2639 1.1 christos 2640 1.1 christos rev32 v3.16b, v30.16b //CTR block 3 2641 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 3 2642 1.1 christos 2643 1.1 christos rev32 v4.16b, v30.16b //CTR block 4 2644 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 4 2645 1.1 christos sub x5, x5, #1 //byte_len - 1 2646 1.1 christos 2647 1.1 christos and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 2648 1.1 christos 2649 1.1 christos rev32 v5.16b, v30.16b //CTR block 5 2650 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 5 2651 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 2652 1.1 christos 2653 1.1 christos add x5, x5, x0 2654 1.1 christos 2655 1.1 christos rev32 v6.16b, v30.16b //CTR block 6 2656 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 6 2657 1.1 christos 2658 1.1 christos rev32 v7.16b, v30.16b //CTR block 7 2659 1.1 christos 2660 1.1 christos aese v5.16b, v26.16b 2661 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 0 2662 1.1 christos aese v4.16b, v26.16b 2663 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 0 2664 1.1 christos aese v3.16b, v26.16b 2665 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 0 2666 1.1 christos 2667 1.1 christos aese v0.16b, v26.16b 2668 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 0 2669 1.1 christos aese v1.16b, v26.16b 2670 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 0 2671 1.1 christos aese v7.16b, v26.16b 2672 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 0 2673 1.1 christos 2674 1.1 christos aese v6.16b, v26.16b 2675 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 0 2676 1.1 christos aese v2.16b, v26.16b 2677 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 0 2678 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 2679 1.1 christos 2680 1.1 christos aese v5.16b, v27.16b 2681 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 1 2682 1.1 christos aese v7.16b, v27.16b 2683 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 1 2684 1.1 christos 2685 1.1 christos aese v2.16b, v27.16b 2686 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 1 2687 1.1 christos aese v3.16b, v27.16b 2688 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 1 2689 1.1 christos aese v6.16b, v27.16b 2690 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 1 2691 1.1 christos 2692 1.1 christos aese v5.16b, v28.16b 2693 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 2 2694 1.1 christos aese v4.16b, v27.16b 2695 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 1 2696 1.1 christos aese v0.16b, v27.16b 2697 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 1 2698 1.1 christos 2699 1.1 christos aese v1.16b, v27.16b 2700 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 1 2701 1.1 christos aese v7.16b, v28.16b 2702 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 2 2703 1.1 christos aese v3.16b, v28.16b 2704 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 2 2705 1.1 christos 2706 1.1 christos aese v2.16b, v28.16b 2707 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 2 2708 1.1 christos aese v0.16b, v28.16b 2709 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 2 2710 1.1 christos 2711 1.1 christos aese v1.16b, v28.16b 2712 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 2 2713 1.1 christos aese v4.16b, v28.16b 2714 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 2 2715 1.1 christos aese v6.16b, v28.16b 2716 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 2 2717 1.1 christos 2718 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 2719 1.1 christos aese v4.16b, v26.16b 2720 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 3 2721 1.1 christos 2722 1.1 christos aese v7.16b, v26.16b 2723 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 3 2724 1.1 christos aese v3.16b, v26.16b 2725 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 3 2726 1.1 christos aese v2.16b, v26.16b 2727 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 3 2728 1.1 christos 2729 1.1 christos aese v1.16b, v26.16b 2730 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 3 2731 1.1 christos 2732 1.1 christos aese v0.16b, v26.16b 2733 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 3 2734 1.1 christos 2735 1.1 christos aese v6.16b, v26.16b 2736 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 3 2737 1.1 christos 2738 1.1 christos aese v0.16b, v27.16b 2739 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 4 2740 1.1 christos aese v1.16b, v27.16b 2741 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 4 2742 1.1 christos aese v5.16b, v26.16b 2743 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 3 2744 1.1 christos 2745 1.1 christos aese v3.16b, v27.16b 2746 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 4 2747 1.1 christos aese v2.16b, v27.16b 2748 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 4 2749 1.1 christos aese v4.16b, v27.16b 2750 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 4 2751 1.1 christos 2752 1.1 christos aese v6.16b, v27.16b 2753 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 4 2754 1.1 christos aese v7.16b, v27.16b 2755 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 4 2756 1.1 christos aese v5.16b, v27.16b 2757 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 4 2758 1.1 christos 2759 1.1 christos aese v1.16b, v28.16b 2760 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 5 2761 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 2762 1.1 christos aese v2.16b, v28.16b 2763 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 5 2764 1.1 christos 2765 1.1 christos aese v4.16b, v28.16b 2766 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 5 2767 1.1 christos aese v7.16b, v28.16b 2768 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 5 2769 1.1 christos aese v0.16b, v28.16b 2770 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 5 2771 1.1 christos 2772 1.1 christos aese v5.16b, v28.16b 2773 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 5 2774 1.1 christos aese v6.16b, v28.16b 2775 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 5 2776 1.1 christos aese v3.16b, v28.16b 2777 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 5 2778 1.1 christos 2779 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 7 2780 1.1 christos 2781 1.1 christos aese v5.16b, v26.16b 2782 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 6 2783 1.1 christos aese v4.16b, v26.16b 2784 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 6 2785 1.1 christos aese v3.16b, v26.16b 2786 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 6 2787 1.1 christos 2788 1.1 christos aese v2.16b, v26.16b 2789 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 6 2790 1.1 christos aese v6.16b, v26.16b 2791 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 6 2792 1.1 christos aese v1.16b, v26.16b 2793 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 6 2794 1.1 christos 2795 1.1 christos aese v0.16b, v26.16b 2796 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 6 2797 1.1 christos aese v7.16b, v26.16b 2798 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 6 2799 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 2800 1.1 christos 2801 1.1 christos aese v6.16b, v27.16b 2802 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 7 2803 1.1 christos aese v3.16b, v27.16b 2804 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 7 2805 1.1 christos 2806 1.1 christos aese v4.16b, v27.16b 2807 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 7 2808 1.1 christos aese v0.16b, v27.16b 2809 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 7 2810 1.1 christos 2811 1.1 christos aese v7.16b, v27.16b 2812 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 7 2813 1.1 christos aese v1.16b, v27.16b 2814 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 7 2815 1.1 christos 2816 1.1 christos aese v2.16b, v27.16b 2817 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 7 2818 1.1 christos aese v5.16b, v27.16b 2819 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 7 2820 1.1 christos 2821 1.1 christos aese v7.16b, v28.16b 2822 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 8 2823 1.1 christos aese v0.16b, v28.16b 2824 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 8 2825 1.1 christos 2826 1.1 christos aese v4.16b, v28.16b 2827 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 8 2828 1.1 christos aese v3.16b, v28.16b 2829 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 8 2830 1.1 christos aese v5.16b, v28.16b 2831 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 8 2832 1.1 christos 2833 1.1 christos aese v2.16b, v28.16b 2834 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 8 2835 1.1 christos aese v1.16b, v28.16b 2836 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 8 2837 1.1 christos aese v6.16b, v28.16b 2838 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 8 2839 1.1 christos 2840 1.1 christos add x4, x0, x1, lsr #3 //end_input_ptr 2841 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 2842 1.1 christos aese v3.16b, v26.16b 2843 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 9 2844 1.1 christos 2845 1.1 christos ld1 { v19.16b}, [x3] 2846 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 2847 1.1 christos rev64 v19.16b, v19.16b 2848 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 2849 1.1 christos 2850 1.1 christos aese v6.16b, v26.16b 2851 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 9 2852 1.1 christos aese v1.16b, v26.16b 2853 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 9 2854 1.1 christos 2855 1.1 christos aese v5.16b, v26.16b 2856 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 9 2857 1.1 christos aese v2.16b, v26.16b 2858 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 9 2859 1.1 christos 2860 1.1 christos aese v0.16b, v26.16b 2861 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 9 2862 1.1 christos aese v4.16b, v26.16b 2863 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 9 2864 1.1 christos 2865 1.1 christos aese v6.16b, v27.16b 2866 1.1 christos aesmc v6.16b, v6.16b //AES block 14 - round 10 2867 1.1 christos aese v7.16b, v26.16b 2868 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 9 2869 1.1 christos aese v3.16b, v27.16b 2870 1.1 christos aesmc v3.16b, v3.16b //AES block 11 - round 10 2871 1.1 christos 2872 1.1 christos aese v1.16b, v27.16b 2873 1.1 christos aesmc v1.16b, v1.16b //AES block 9 - round 10 2874 1.1 christos aese v5.16b, v27.16b 2875 1.1 christos aesmc v5.16b, v5.16b //AES block 13 - round 10 2876 1.1 christos aese v4.16b, v27.16b 2877 1.1 christos aesmc v4.16b, v4.16b //AES block 12 - round 10 2878 1.1 christos 2879 1.1 christos aese v0.16b, v27.16b 2880 1.1 christos aesmc v0.16b, v0.16b //AES block 8 - round 10 2881 1.1 christos aese v2.16b, v27.16b 2882 1.1 christos aesmc v2.16b, v2.16b //AES block 10 - round 10 2883 1.1 christos aese v7.16b, v27.16b 2884 1.1 christos aesmc v7.16b, v7.16b //AES block 15 - round 10 2885 1.1 christos 2886 1.1 christos aese v6.16b, v28.16b //AES block 14 - round 11 2887 1.1 christos aese v3.16b, v28.16b //AES block 11 - round 11 2888 1.1 christos 2889 1.1 christos aese v4.16b, v28.16b //AES block 12 - round 11 2890 1.1 christos aese v7.16b, v28.16b //AES block 15 - round 11 2891 1.1 christos ldr q26, [x8, #192] //load rk12 2892 1.1 christos 2893 1.1 christos aese v1.16b, v28.16b //AES block 9 - round 11 2894 1.1 christos aese v5.16b, v28.16b //AES block 13 - round 11 2895 1.1 christos 2896 1.1 christos aese v2.16b, v28.16b //AES block 10 - round 11 2897 1.1 christos aese v0.16b, v28.16b //AES block 8 - round 11 2898 1.1 christos b.ge .L192_enc_tail //handle tail 2899 1.1 christos 2900 1.1 christos ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext 2901 1.1 christos 2902 1.1 christos ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext 2903 1.1 christos 2904 1.1 christos ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 2905 1.1 christos 2906 1.1 christos ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 2907 1.1 christos 2908 1.1 christos .inst 0xce006908 //eor3 v8.16b, v8.16b, v0.16b, v26.16b //AES block 0 - result 2909 1.1 christos rev32 v0.16b, v30.16b //CTR block 8 2910 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8 2911 1.1 christos 2912 1.1 christos .inst 0xce03696b //eor3 v11.16b, v11.16b, v3.16b, v26.16b //AES block 3 - result 2913 1.1 christos .inst 0xce016929 //eor3 v9.16b, v9.16b, v1.16b, v26.16b //AES block 1 - result 2914 1.1 christos 2915 1.1 christos rev32 v1.16b, v30.16b //CTR block 9 2916 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 9 2917 1.1 christos .inst 0xce04698c //eor3 v12.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result 2918 1.1 christos 2919 1.1 christos .inst 0xce0569ad //eor3 v13.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result 2920 1.1 christos .inst 0xce0769ef //eor3 v15.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result 2921 1.1 christos stp q8, q9, [x2], #32 //AES block 0, 1 - store result 2922 1.1 christos 2923 1.1 christos .inst 0xce02694a //eor3 v10.16b, v10.16b, v2.16b, v26.16b //AES block 2 - result 2924 1.1 christos rev32 v2.16b, v30.16b //CTR block 10 2925 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 10 2926 1.1 christos 2927 1.1 christos stp q10, q11, [x2], #32 //AES block 2, 3 - store result 2928 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 2929 1.1 christos 2930 1.1 christos rev32 v3.16b, v30.16b //CTR block 11 2931 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 11 2932 1.1 christos .inst 0xce0669ce //eor3 v14.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result 2933 1.1 christos 2934 1.1 christos stp q12, q13, [x2], #32 //AES block 4, 5 - store result 2935 1.1 christos 2936 1.1 christos rev32 v4.16b, v30.16b //CTR block 12 2937 1.1 christos stp q14, q15, [x2], #32 //AES block 6, 7 - store result 2938 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 12 2939 1.1 christos 2940 1.1 christos b.ge .L192_enc_prepretail //do prepretail 2941 1.1 christos 2942 1.1 christos .L192_enc_main_loop: //main loop start 2943 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 2944 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 2945 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 2946 1.1 christos 2947 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 2948 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 2949 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 2950 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 2951 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 2952 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 2953 1.1 christos 2954 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 2955 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 2956 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 2957 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 2958 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 2959 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 2960 1.1 christos 2961 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 2962 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 2963 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 2964 1.1 christos 2965 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 2966 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 2967 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 2968 1.1 christos 2969 1.1 christos aese v0.16b, v26.16b 2970 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 2971 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 2972 1.1 christos aese v1.16b, v26.16b 2973 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 2974 1.1 christos 2975 1.1 christos aese v3.16b, v26.16b 2976 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 2977 1.1 christos aese v5.16b, v26.16b 2978 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 2979 1.1 christos aese v2.16b, v26.16b 2980 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 2981 1.1 christos 2982 1.1 christos aese v7.16b, v26.16b 2983 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 2984 1.1 christos aese v4.16b, v26.16b 2985 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 2986 1.1 christos aese v6.16b, v26.16b 2987 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 2988 1.1 christos 2989 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 2990 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 2991 1.1 christos aese v0.16b, v27.16b 2992 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 2993 1.1 christos 2994 1.1 christos aese v4.16b, v27.16b 2995 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 2996 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 2997 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 2998 1.1 christos 2999 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3000 1.1 christos aese v3.16b, v27.16b 3001 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 3002 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 3003 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 3004 1.1 christos 3005 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 3006 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 3007 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3008 1.1 christos 3009 1.1 christos aese v1.16b, v27.16b 3010 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 3011 1.1 christos aese v2.16b, v27.16b 3012 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 3013 1.1 christos aese v5.16b, v27.16b 3014 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 3015 1.1 christos 3016 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 3017 1.1 christos aese v6.16b, v27.16b 3018 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 3019 1.1 christos aese v7.16b, v27.16b 3020 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 3021 1.1 christos 3022 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 3023 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 3024 1.1 christos aese v1.16b, v28.16b 3025 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 3026 1.1 christos 3027 1.1 christos aese v3.16b, v28.16b 3028 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 3029 1.1 christos aese v4.16b, v28.16b 3030 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 3031 1.1 christos aese v6.16b, v28.16b 3032 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 3033 1.1 christos 3034 1.1 christos aese v5.16b, v28.16b 3035 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 3036 1.1 christos aese v1.16b, v26.16b 3037 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 3038 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 3039 1.1 christos 3040 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 3041 1.1 christos aese v7.16b, v28.16b 3042 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 3043 1.1 christos aese v4.16b, v26.16b 3044 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 3045 1.1 christos 3046 1.1 christos aese v2.16b, v28.16b 3047 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 3048 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3049 1.1 christos aese v0.16b, v28.16b 3050 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 3051 1.1 christos 3052 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3053 1.1 christos aese v3.16b, v26.16b 3054 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 3055 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 3056 1.1 christos 3057 1.1 christos aese v0.16b, v26.16b 3058 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 3059 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 3060 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 3061 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 3062 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 3063 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 3064 1.1 christos 3065 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 3066 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 3067 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 3068 1.1 christos 3069 1.1 christos aese v5.16b, v26.16b 3070 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 3071 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3072 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3073 1.1 christos 3074 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 3075 1.1 christos aese v6.16b, v26.16b 3076 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 3077 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 3078 1.1 christos 3079 1.1 christos aese v1.16b, v27.16b 3080 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 3081 1.1 christos aese v3.16b, v27.16b 3082 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 3083 1.1 christos aese v7.16b, v26.16b 3084 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 3085 1.1 christos 3086 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 3087 1.1 christos aese v6.16b, v27.16b 3088 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 3089 1.1 christos aese v2.16b, v26.16b 3090 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 3091 1.1 christos 3092 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 3093 1.1 christos aese v0.16b, v27.16b 3094 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 3095 1.1 christos aese v4.16b, v27.16b 3096 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 3097 1.1 christos 3098 1.1 christos aese v2.16b, v27.16b 3099 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 3100 1.1 christos aese v5.16b, v27.16b 3101 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 3102 1.1 christos aese v7.16b, v27.16b 3103 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 3104 1.1 christos 3105 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3106 1.1 christos aese v4.16b, v28.16b 3107 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 3108 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 3109 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 3110 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 3111 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 3112 1.1 christos 3113 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 3114 1.1 christos aese v2.16b, v28.16b 3115 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 3116 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 3117 1.1 christos 3118 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 3119 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 3120 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 3121 1.1 christos 3122 1.1 christos aese v5.16b, v28.16b 3123 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 3124 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3125 1.1 christos 3126 1.1 christos aese v6.16b, v28.16b 3127 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 3128 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 3129 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 3130 1.1 christos 3131 1.1 christos aese v1.16b, v28.16b 3132 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 3133 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 3134 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3135 1.1 christos 3136 1.1 christos aese v3.16b, v28.16b 3137 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 3138 1.1 christos aese v7.16b, v28.16b 3139 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 3140 1.1 christos aese v0.16b, v28.16b 3141 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 3142 1.1 christos 3143 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 3144 1.1 christos aese v4.16b, v26.16b 3145 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 3146 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3147 1.1 christos 3148 1.1 christos aese v0.16b, v26.16b 3149 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 3150 1.1 christos aese v3.16b, v26.16b 3151 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 3152 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 3153 1.1 christos 3154 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 3155 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3156 1.1 christos aese v2.16b, v26.16b 3157 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 3158 1.1 christos 3159 1.1 christos aese v6.16b, v26.16b 3160 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 3161 1.1 christos aese v5.16b, v26.16b 3162 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 3163 1.1 christos 3164 1.1 christos aese v7.16b, v26.16b 3165 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 3166 1.1 christos aese v2.16b, v27.16b 3167 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 3168 1.1 christos aese v1.16b, v26.16b 3169 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 3170 1.1 christos 3171 1.1 christos aese v6.16b, v27.16b 3172 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 3173 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3174 1.1 christos 3175 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 3176 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 3177 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 3178 1.1 christos 3179 1.1 christos aese v4.16b, v27.16b 3180 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 3181 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 3182 1.1 christos aese v5.16b, v27.16b 3183 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 3184 1.1 christos 3185 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3186 1.1 christos aese v7.16b, v27.16b 3187 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 3188 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 3189 1.1 christos 3190 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 3191 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 3192 1.1 christos aese v0.16b, v27.16b 3193 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 3194 1.1 christos 3195 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 3196 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 3197 1.1 christos aese v3.16b, v27.16b 3198 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 3199 1.1 christos 3200 1.1 christos aese v5.16b, v28.16b 3201 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 3202 1.1 christos aese v4.16b, v28.16b 3203 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 3204 1.1 christos aese v0.16b, v28.16b 3205 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 3206 1.1 christos 3207 1.1 christos aese v6.16b, v28.16b 3208 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 3209 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 3210 1.1 christos aese v1.16b, v27.16b 3211 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 3212 1.1 christos 3213 1.1 christos aese v7.16b, v28.16b 3214 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 3215 1.1 christos aese v2.16b, v28.16b 3216 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 3217 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 3218 1.1 christos 3219 1.1 christos aese v1.16b, v28.16b 3220 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 3221 1.1 christos aese v3.16b, v28.16b 3222 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 3223 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 3224 1.1 christos 3225 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 3226 1.1 christos rev32 v20.16b, v30.16b //CTR block 8k+16 3227 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+16 3228 1.1 christos 3229 1.1 christos aese v2.16b, v26.16b 3230 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 3231 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3232 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 3233 1.1 christos 3234 1.1 christos aese v6.16b, v26.16b 3235 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 3236 1.1 christos aese v3.16b, v26.16b 3237 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 3238 1.1 christos ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext 3239 1.1 christos 3240 1.1 christos pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 3241 1.1 christos rev32 v22.16b, v30.16b //CTR block 8k+17 3242 1.1 christos aese v0.16b, v26.16b 3243 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 3244 1.1 christos 3245 1.1 christos aese v4.16b, v26.16b 3246 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 3247 1.1 christos aese v1.16b, v26.16b 3248 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 3249 1.1 christos aese v7.16b, v26.16b 3250 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 3251 1.1 christos 3252 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 3253 1.1 christos aese v5.16b, v26.16b 3254 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 3255 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+17 3256 1.1 christos 3257 1.1 christos aese v2.16b, v27.16b 3258 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 3259 1.1 christos aese v4.16b, v27.16b 3260 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 3261 1.1 christos ldr q26, [x8, #192] //load rk12 3262 1.1 christos ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 3263 1.1 christos 3264 1.1 christos aese v0.16b, v27.16b 3265 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 3266 1.1 christos aese v7.16b, v27.16b 3267 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 3268 1.1 christos ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext 3269 1.1 christos 3270 1.1 christos aese v4.16b, v28.16b //AES block 8k+12 - round 11 3271 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 3272 1.1 christos ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load plaintext 3273 1.1 christos 3274 1.1 christos ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load plaintext 3275 1.1 christos aese v2.16b, v28.16b //AES block 8k+10 - round 11 3276 1.1 christos aese v1.16b, v27.16b 3277 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 3278 1.1 christos 3279 1.1 christos rev32 v23.16b, v30.16b //CTR block 8k+18 3280 1.1 christos aese v5.16b, v27.16b 3281 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 3282 1.1 christos 3283 1.1 christos aese v3.16b, v27.16b 3284 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 3285 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 3286 1.1 christos 3287 1.1 christos aese v6.16b, v27.16b 3288 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 3289 1.1 christos aese v5.16b, v28.16b //AES block 8k+13 - round 11 3290 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+18 3291 1.1 christos 3292 1.1 christos aese v7.16b, v28.16b //AES block 8k+15 - round 11 3293 1.1 christos aese v0.16b, v28.16b //AES block 8k+8 - round 11 3294 1.1 christos .inst 0xce04698c //eor3 v12.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result 3295 1.1 christos 3296 1.1 christos aese v6.16b, v28.16b //AES block 8k+14 - round 11 3297 1.1 christos aese v3.16b, v28.16b //AES block 8k+11 - round 11 3298 1.1 christos aese v1.16b, v28.16b //AES block 8k+9 - round 11 3299 1.1 christos 3300 1.1 christos rev32 v25.16b, v30.16b //CTR block 8k+19 3301 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+19 3302 1.1 christos .inst 0xce0769ef //eor3 v15.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result 3303 1.1 christos 3304 1.1 christos .inst 0xce02694a //eor3 v10.16b, v10.16b, v2.16b, v26.16b //AES block 8k+10 - result 3305 1.1 christos .inst 0xce006908 //eor3 v8.16b, v8.16b, v0.16b, v26.16b //AES block 8k+8 - result 3306 1.1 christos mov v2.16b, v23.16b //CTR block 8k+18 3307 1.1 christos 3308 1.1 christos .inst 0xce016929 //eor3 v9.16b, v9.16b, v1.16b, v26.16b //AES block 8k+9 - result 3309 1.1 christos mov v1.16b, v22.16b //CTR block 8k+17 3310 1.1 christos stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result 3311 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 3312 1.1 christos 3313 1.1 christos .inst 0xce0669ce //eor3 v14.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result 3314 1.1 christos mov v0.16b, v20.16b //CTR block 8k+16 3315 1.1 christos rev32 v4.16b, v30.16b //CTR block 8k+20 3316 1.1 christos 3317 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+20 3318 1.1 christos .inst 0xce0569ad //eor3 v13.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result 3319 1.1 christos .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 3320 1.1 christos 3321 1.1 christos .inst 0xce03696b //eor3 v11.16b, v11.16b, v3.16b, v26.16b //AES block 8k+11 - result 3322 1.1 christos mov v3.16b, v25.16b //CTR block 8k+19 3323 1.1 christos 3324 1.1 christos stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result 3325 1.1 christos 3326 1.1 christos stp q12, q13, [x2], #32 //AES block 8k+12, 8k+13 - store result 3327 1.1 christos 3328 1.1 christos cmp x0, x5 //.LOOP CONTROL 3329 1.1 christos stp q14, q15, [x2], #32 //AES block 8k+14, 8k+15 - store result 3330 1.1 christos b.lt .L192_enc_main_loop 3331 1.1 christos 3332 1.1 christos .L192_enc_prepretail: //PREPRETAIL 3333 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 3334 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 3335 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 3336 1.1 christos 3337 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 3338 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 3339 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 3340 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 3341 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 3342 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 3343 1.1 christos 3344 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 3345 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 3346 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 3347 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 3348 1.1 christos 3349 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 3350 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 3351 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 3352 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 3353 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 3354 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 3355 1.1 christos 3356 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 3357 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 3358 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 3359 1.1 christos 3360 1.1 christos aese v5.16b, v26.16b 3361 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 3362 1.1 christos aese v2.16b, v26.16b 3363 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 3364 1.1 christos aese v3.16b, v26.16b 3365 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 3366 1.1 christos 3367 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 3368 1.1 christos aese v0.16b, v26.16b 3369 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 3370 1.1 christos aese v6.16b, v26.16b 3371 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 3372 1.1 christos 3373 1.1 christos aese v1.16b, v26.16b 3374 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 3375 1.1 christos aese v4.16b, v26.16b 3376 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 3377 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 3378 1.1 christos 3379 1.1 christos aese v6.16b, v27.16b 3380 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 3381 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 3382 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3383 1.1 christos 3384 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3385 1.1 christos aese v7.16b, v26.16b 3386 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 3387 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 3388 1.1 christos 3389 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 3390 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 3391 1.1 christos aese v2.16b, v27.16b 3392 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 3393 1.1 christos 3394 1.1 christos aese v5.16b, v27.16b 3395 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 3396 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 3397 1.1 christos aese v1.16b, v27.16b 3398 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 3399 1.1 christos 3400 1.1 christos aese v7.16b, v27.16b 3401 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 3402 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 3403 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 3404 1.1 christos 3405 1.1 christos aese v3.16b, v27.16b 3406 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 3407 1.1 christos aese v0.16b, v27.16b 3408 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 3409 1.1 christos aese v4.16b, v27.16b 3410 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 3411 1.1 christos 3412 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 3413 1.1 christos aese v5.16b, v28.16b 3414 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 3415 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 3416 1.1 christos 3417 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 3418 1.1 christos aese v7.16b, v28.16b 3419 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 3420 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 3421 1.1 christos 3422 1.1 christos aese v5.16b, v26.16b 3423 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 3424 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3425 1.1 christos aese v6.16b, v28.16b 3426 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 3427 1.1 christos 3428 1.1 christos aese v0.16b, v28.16b 3429 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 3430 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 3431 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3432 1.1 christos 3433 1.1 christos aese v3.16b, v28.16b 3434 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 3435 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 3436 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 3437 1.1 christos 3438 1.1 christos aese v2.16b, v28.16b 3439 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 3440 1.1 christos aese v1.16b, v28.16b 3441 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 3442 1.1 christos aese v4.16b, v28.16b 3443 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 3444 1.1 christos 3445 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3446 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 3447 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 3448 1.1 christos 3449 1.1 christos aese v1.16b, v26.16b 3450 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 3451 1.1 christos aese v6.16b, v26.16b 3452 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 3453 1.1 christos aese v2.16b, v26.16b 3454 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 3455 1.1 christos 3456 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 3457 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 3458 1.1 christos aese v7.16b, v26.16b 3459 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 3460 1.1 christos 3461 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 3462 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 3463 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 3464 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 3465 1.1 christos aese v3.16b, v26.16b 3466 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 3467 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 3468 1.1 christos 3469 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 3470 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 3471 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 3472 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 3473 1.1 christos aese v4.16b, v26.16b 3474 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 3475 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 3476 1.1 christos 3477 1.1 christos aese v0.16b, v26.16b 3478 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 3479 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 3480 1.1 christos aese v6.16b, v27.16b 3481 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 3482 1.1 christos 3483 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3484 1.1 christos aese v7.16b, v27.16b 3485 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 3486 1.1 christos aese v5.16b, v27.16b 3487 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 3488 1.1 christos 3489 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3490 1.1 christos aese v3.16b, v27.16b 3491 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 3492 1.1 christos aese v0.16b, v27.16b 3493 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 3494 1.1 christos 3495 1.1 christos aese v1.16b, v27.16b 3496 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 3497 1.1 christos aese v4.16b, v27.16b 3498 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 3499 1.1 christos aese v2.16b, v27.16b 3500 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 3501 1.1 christos 3502 1.1 christos aese v0.16b, v28.16b 3503 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 3504 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 3505 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 3506 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 3507 1.1 christos 3508 1.1 christos aese v1.16b, v28.16b 3509 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 3510 1.1 christos aese v2.16b, v28.16b 3511 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 3512 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 3513 1.1 christos 3514 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 3515 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 3516 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 3517 1.1 christos 3518 1.1 christos aese v4.16b, v28.16b 3519 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 3520 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3521 1.1 christos 3522 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 3523 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 3524 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 3525 1.1 christos 3526 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3527 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3528 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3529 1.1 christos 3530 1.1 christos aese v5.16b, v28.16b 3531 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 3532 1.1 christos aese v1.16b, v26.16b 3533 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 3534 1.1 christos aese v7.16b, v28.16b 3535 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 3536 1.1 christos 3537 1.1 christos aese v6.16b, v28.16b 3538 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 3539 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3540 1.1 christos aese v3.16b, v28.16b 3541 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 3542 1.1 christos 3543 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 3544 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 3545 1.1 christos 3546 1.1 christos aese v4.16b, v26.16b 3547 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 3548 1.1 christos aese v5.16b, v26.16b 3549 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 3550 1.1 christos aese v1.16b, v27.16b 3551 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 3552 1.1 christos 3553 1.1 christos aese v0.16b, v26.16b 3554 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 3555 1.1 christos aese v7.16b, v26.16b 3556 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 3557 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3558 1.1 christos 3559 1.1 christos aese v2.16b, v26.16b 3560 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 3561 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 3562 1.1 christos aese v5.16b, v27.16b 3563 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 3564 1.1 christos 3565 1.1 christos aese v6.16b, v26.16b 3566 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 3567 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 3568 1.1 christos aese v3.16b, v26.16b 3569 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 3570 1.1 christos 3571 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 3572 1.1 christos aese v0.16b, v27.16b 3573 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 3574 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 3575 1.1 christos 3576 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 3577 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 3578 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 3579 1.1 christos 3580 1.1 christos aese v4.16b, v27.16b 3581 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 3582 1.1 christos aese v2.16b, v27.16b 3583 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 3584 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 3585 1.1 christos 3586 1.1 christos aese v3.16b, v27.16b 3587 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 3588 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3589 1.1 christos 3590 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 3591 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 3592 1.1 christos 3593 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 3594 1.1 christos ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 3595 1.1 christos aese v7.16b, v27.16b 3596 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 3597 1.1 christos pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 3598 1.1 christos 3599 1.1 christos aese v5.16b, v28.16b 3600 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 3601 1.1 christos aese v1.16b, v28.16b 3602 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 3603 1.1 christos 3604 1.1 christos aese v6.16b, v27.16b 3605 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 3606 1.1 christos aese v2.16b, v28.16b 3607 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 3608 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 3609 1.1 christos 3610 1.1 christos aese v3.16b, v28.16b 3611 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 3612 1.1 christos aese v5.16b, v26.16b 3613 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 3614 1.1 christos aese v4.16b, v28.16b 3615 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 3616 1.1 christos 3617 1.1 christos aese v0.16b, v28.16b 3618 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 3619 1.1 christos aese v7.16b, v28.16b 3620 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 3621 1.1 christos aese v6.16b, v28.16b 3622 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 3623 1.1 christos 3624 1.1 christos aese v3.16b, v26.16b 3625 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 3626 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 3627 1.1 christos aese v4.16b, v26.16b 3628 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 3629 1.1 christos 3630 1.1 christos aese v2.16b, v26.16b 3631 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 3632 1.1 christos aese v7.16b, v26.16b 3633 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 3634 1.1 christos 3635 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 3636 1.1 christos aese v6.16b, v26.16b 3637 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 3638 1.1 christos aese v0.16b, v26.16b 3639 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 3640 1.1 christos aese v1.16b, v26.16b 3641 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 3642 1.1 christos 3643 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 3644 1.1 christos ldr q26, [x8, #192] //load rk12 3645 1.1 christos 3646 1.1 christos aese v7.16b, v27.16b 3647 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 3648 1.1 christos aese v1.16b, v27.16b 3649 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 3650 1.1 christos aese v2.16b, v27.16b 3651 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 3652 1.1 christos 3653 1.1 christos .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 3654 1.1 christos aese v0.16b, v27.16b 3655 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 3656 1.1 christos aese v3.16b, v27.16b 3657 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 3658 1.1 christos 3659 1.1 christos aese v1.16b, v28.16b //AES block 8k+9 - round 11 3660 1.1 christos aese v7.16b, v28.16b //AES block 8k+15 - round 11 3661 1.1 christos 3662 1.1 christos aese v4.16b, v27.16b 3663 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 3664 1.1 christos aese v3.16b, v28.16b //AES block 8k+11 - round 11 3665 1.1 christos 3666 1.1 christos aese v5.16b, v27.16b 3667 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 3668 1.1 christos aese v6.16b, v27.16b 3669 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 3670 1.1 christos 3671 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 3672 1.1 christos aese v2.16b, v28.16b //AES block 8k+10 - round 11 3673 1.1 christos aese v0.16b, v28.16b //AES block 8k+8 - round 11 3674 1.1 christos 3675 1.1 christos aese v6.16b, v28.16b //AES block 8k+14 - round 11 3676 1.1 christos aese v4.16b, v28.16b //AES block 8k+12 - round 11 3677 1.1 christos aese v5.16b, v28.16b //AES block 8k+13 - round 11 3678 1.1 christos 3679 1.1 christos .L192_enc_tail: //TAIL 3680 1.1 christos 3681 1.1 christos ldp q20, q21, [x3, #128] //load h5l | h5h 3682 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 3683 1.1 christos sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 3684 1.1 christos 3685 1.1 christos ldr q8, [x0], #16 //AES block 8k+8 - l3ad plaintext 3686 1.1 christos 3687 1.1 christos ldp q24, q25, [x3, #192] //load h8k | h7k 3688 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 3689 1.1 christos 3690 1.1 christos mov v29.16b, v26.16b 3691 1.1 christos 3692 1.1 christos ldp q22, q23, [x3, #160] //load h6l | h6h 3693 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 3694 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 3695 1.1 christos cmp x5, #112 3696 1.1 christos 3697 1.1 christos .inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result 3698 1.1 christos ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 3699 1.1 christos b.gt .L192_enc_blocks_more_than_7 3700 1.1 christos 3701 1.1 christos cmp x5, #96 3702 1.1 christos mov v7.16b, v6.16b 3703 1.1 christos movi v17.8b, #0 3704 1.1 christos 3705 1.1 christos mov v6.16b, v5.16b 3706 1.1 christos movi v19.8b, #0 3707 1.1 christos sub v30.4s, v30.4s, v31.4s 3708 1.1 christos 3709 1.1 christos mov v5.16b, v4.16b 3710 1.1 christos mov v4.16b, v3.16b 3711 1.1 christos mov v3.16b, v2.16b 3712 1.1 christos 3713 1.1 christos mov v2.16b, v1.16b 3714 1.1 christos movi v18.8b, #0 3715 1.1 christos b.gt .L192_enc_blocks_more_than_6 3716 1.1 christos 3717 1.1 christos mov v7.16b, v6.16b 3718 1.1 christos cmp x5, #80 3719 1.1 christos 3720 1.1 christos mov v6.16b, v5.16b 3721 1.1 christos mov v5.16b, v4.16b 3722 1.1 christos mov v4.16b, v3.16b 3723 1.1 christos 3724 1.1 christos mov v3.16b, v1.16b 3725 1.1 christos sub v30.4s, v30.4s, v31.4s 3726 1.1 christos b.gt .L192_enc_blocks_more_than_5 3727 1.1 christos 3728 1.1 christos cmp x5, #64 3729 1.1 christos sub v30.4s, v30.4s, v31.4s 3730 1.1 christos 3731 1.1 christos mov v7.16b, v6.16b 3732 1.1 christos mov v6.16b, v5.16b 3733 1.1 christos mov v5.16b, v4.16b 3734 1.1 christos 3735 1.1 christos mov v4.16b, v1.16b 3736 1.1 christos b.gt .L192_enc_blocks_more_than_4 3737 1.1 christos 3738 1.1 christos mov v7.16b, v6.16b 3739 1.1 christos mov v6.16b, v5.16b 3740 1.1 christos mov v5.16b, v1.16b 3741 1.1 christos 3742 1.1 christos sub v30.4s, v30.4s, v31.4s 3743 1.1 christos cmp x5, #48 3744 1.1 christos b.gt .L192_enc_blocks_more_than_3 3745 1.1 christos 3746 1.1 christos mov v7.16b, v6.16b 3747 1.1 christos mov v6.16b, v1.16b 3748 1.1 christos sub v30.4s, v30.4s, v31.4s 3749 1.1 christos 3750 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 3751 1.1 christos cmp x5, #32 3752 1.1 christos b.gt .L192_enc_blocks_more_than_2 3753 1.1 christos 3754 1.1 christos sub v30.4s, v30.4s, v31.4s 3755 1.1 christos 3756 1.1 christos cmp x5, #16 3757 1.1 christos mov v7.16b, v1.16b 3758 1.1 christos b.gt .L192_enc_blocks_more_than_1 3759 1.1 christos 3760 1.1 christos sub v30.4s, v30.4s, v31.4s 3761 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 3762 1.1 christos b .L192_enc_blocks_less_than_1 3763 1.1 christos .L192_enc_blocks_more_than_7: //blocks left > 7 3764 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-7 block - store result 3765 1.1 christos 3766 1.1 christos rev64 v8.16b, v9.16b //GHASH final-7 block 3767 1.1 christos ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 3768 1.1 christos 3769 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 3770 1.1 christos 3771 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 3772 1.1 christos 3773 1.1 christos ldr q9, [x0], #16 //AES final-6 block - load plaintext 3774 1.1 christos 3775 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 3776 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 3777 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 3778 1.1 christos 3779 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 3780 1.1 christos 3781 1.1 christos pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 3782 1.1 christos .inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 3783 1.1 christos .L192_enc_blocks_more_than_6: //blocks left > 6 3784 1.1 christos 3785 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-6 block - store result 3786 1.1 christos 3787 1.1 christos rev64 v8.16b, v9.16b //GHASH final-6 block 3788 1.1 christos 3789 1.1 christos ldr q9, [x0], #16 //AES final-5 block - load plaintext 3790 1.1 christos 3791 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 3792 1.1 christos 3793 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 3794 1.1 christos 3795 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 3796 1.1 christos .inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 3797 1.1 christos 3798 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 3799 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 3800 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 3801 1.1 christos 3802 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 3803 1.1 christos 3804 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 3805 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 3806 1.1 christos 3807 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 3808 1.1 christos .L192_enc_blocks_more_than_5: //blocks left > 5 3809 1.1 christos 3810 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-5 block - store result 3811 1.1 christos 3812 1.1 christos rev64 v8.16b, v9.16b //GHASH final-5 block 3813 1.1 christos 3814 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 3815 1.1 christos 3816 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 3817 1.1 christos 3818 1.1 christos ldr q9, [x0], #16 //AES final-4 block - load plaintext 3819 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 3820 1.1 christos 3821 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 3822 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 3823 1.1 christos 3824 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 3825 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 3826 1.1 christos 3827 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 3828 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 3829 1.1 christos 3830 1.1 christos .inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 3831 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 3832 1.1 christos 3833 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 3834 1.1 christos .L192_enc_blocks_more_than_4: //blocks left > 4 3835 1.1 christos 3836 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-4 block - store result 3837 1.1 christos 3838 1.1 christos rev64 v8.16b, v9.16b //GHASH final-4 block 3839 1.1 christos 3840 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 3841 1.1 christos 3842 1.1 christos ldr q9, [x0], #16 //AES final-3 block - load plaintext 3843 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 3844 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 3845 1.1 christos 3846 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 3847 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 3848 1.1 christos 3849 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 3850 1.1 christos 3851 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 3852 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 3853 1.1 christos 3854 1.1 christos pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 3855 1.1 christos 3856 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 3857 1.1 christos .inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 3858 1.1 christos .L192_enc_blocks_more_than_3: //blocks left > 3 3859 1.1 christos 3860 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 3861 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-3 block - store result 3862 1.1 christos 3863 1.1 christos rev64 v8.16b, v9.16b //GHASH final-3 block 3864 1.1 christos 3865 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 3866 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 3867 1.1 christos 3868 1.1 christos ldr q9, [x0], #16 //AES final-2 block - load plaintext 3869 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 3870 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 3871 1.1 christos 3872 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 3873 1.1 christos 3874 1.1 christos .inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 3875 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 3876 1.1 christos 3877 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 3878 1.1 christos pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 3879 1.1 christos 3880 1.1 christos pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 3881 1.1 christos pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 3882 1.1 christos 3883 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 3884 1.1 christos 3885 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 3886 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 3887 1.1 christos .L192_enc_blocks_more_than_2: //blocks left > 2 3888 1.1 christos 3889 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-2 block - store result 3890 1.1 christos 3891 1.1 christos rev64 v8.16b, v9.16b //GHASH final-2 block 3892 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 3893 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 3894 1.1 christos 3895 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 3896 1.1 christos 3897 1.1 christos ldr q9, [x0], #16 //AES final-1 block - load plaintext 3898 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 3899 1.1 christos 3900 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 3901 1.1 christos 3902 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 3903 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 3904 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 3905 1.1 christos 3906 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 3907 1.1 christos 3908 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 3909 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 3910 1.1 christos 3911 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 3912 1.1 christos .inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 3913 1.1 christos .L192_enc_blocks_more_than_1: //blocks left > 1 3914 1.1 christos 3915 1.1 christos ldr q22, [x3, #64] //load h1l | h1h 3916 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 3917 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-1 block - store result 3918 1.1 christos 3919 1.1 christos rev64 v8.16b, v9.16b //GHASH final-1 block 3920 1.1 christos 3921 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 3922 1.1 christos 3923 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 3924 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 3925 1.1 christos 3926 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 3927 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 3928 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 3929 1.1 christos 3930 1.1 christos ldr q9, [x0], #16 //AES final block - load plaintext 3931 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 3932 1.1 christos 3933 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 3934 1.1 christos 3935 1.1 christos .inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result 3936 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 3937 1.1 christos 3938 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 3939 1.1 christos 3940 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 3941 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 3942 1.1 christos .L192_enc_blocks_less_than_1: //blocks left <= 1 3943 1.1 christos 3944 1.1 christos mvn x6, xzr //temp0_x = 0xffffffffffffffff 3945 1.1 christos and x1, x1, #127 //bit_length %= 128 3946 1.1 christos 3947 1.1 christos sub x1, x1, #128 //bit_length -= 128 3948 1.1 christos 3949 1.1 christos neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 3950 1.1 christos 3951 1.1 christos and x1, x1, #127 //bit_length %= 128 3952 1.1 christos 3953 1.1 christos lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 3954 1.1 christos cmp x1, #64 3955 1.1 christos mvn x7, xzr //temp1_x = 0xffffffffffffffff 3956 1.1 christos 3957 1.1 christos csel x13, x7, x6, lt 3958 1.1 christos csel x14, x6, xzr, lt 3959 1.1 christos 3960 1.1 christos mov v0.d[1], x14 3961 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 3962 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 3963 1.1 christos 3964 1.1 christos ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 3965 1.1 christos mov v0.d[0], x13 //ctr0b is mask for last block 3966 1.1 christos 3967 1.1 christos and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 3968 1.1 christos 3969 1.1 christos rev64 v8.16b, v9.16b //GHASH final block 3970 1.1 christos bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 3971 1.1 christos 3972 1.1 christos st1 { v9.16b}, [x2] //store all 16B 3973 1.1 christos 3974 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 3975 1.1 christos 3976 1.1 christos ins v16.d[0], v8.d[1] //GHASH final block - mid 3977 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 3978 1.1 christos 3979 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final block - high 3980 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 3981 1.1 christos 3982 1.1 christos eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 3983 1.1 christos 3984 1.1 christos pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 3985 1.1 christos 3986 1.1 christos eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 3987 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 3988 1.1 christos 3989 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final block - low 3990 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 3991 1.1 christos 3992 1.1 christos rev32 v30.16b, v30.16b 3993 1.1 christos 3994 1.1 christos str q30, [x16] //store the updated counter 3995 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 3996 1.1 christos 3997 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 3998 1.1 christos 3999 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 4000 1.1 christos 4001 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 4002 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 4003 1.1 christos 4004 1.1 christos .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 4005 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 4006 1.1 christos rev64 v19.16b, v19.16b 4007 1.1 christos st1 { v19.16b }, [x3] 4008 1.1 christos 4009 1.1 christos mov x0, x9 //return sizes 4010 1.1 christos 4011 1.1 christos ldp d10, d11, [sp, #16] 4012 1.1 christos ldp d12, d13, [sp, #32] 4013 1.1 christos ldp d14, d15, [sp, #48] 4014 1.1 christos ldp d8, d9, [sp], #80 4015 1.1 christos ret 4016 1.1 christos 4017 1.1 christos .L192_enc_ret: 4018 1.1 christos mov w0, #0x0 4019 1.1 christos ret 4020 1.1 christos .size unroll8_eor3_aes_gcm_enc_192_kernel,.-unroll8_eor3_aes_gcm_enc_192_kernel 4021 1.1 christos .globl unroll8_eor3_aes_gcm_dec_192_kernel 4022 1.1 christos .type unroll8_eor3_aes_gcm_dec_192_kernel,%function 4023 1.1 christos .align 4 4024 1.1 christos unroll8_eor3_aes_gcm_dec_192_kernel: 4025 1.1 christos AARCH64_VALID_CALL_TARGET 4026 1.1 christos cbz x1, .L192_dec_ret 4027 1.1 christos stp d8, d9, [sp, #-80]! 4028 1.1 christos lsr x9, x1, #3 4029 1.1 christos mov x16, x4 4030 1.1 christos mov x8, x5 4031 1.1 christos stp d10, d11, [sp, #16] 4032 1.1 christos stp d12, d13, [sp, #32] 4033 1.1 christos stp d14, d15, [sp, #48] 4034 1.1 christos mov x5, #0xc200000000000000 4035 1.1 christos stp x5, xzr, [sp, #64] 4036 1.1 christos add x10, sp, #64 4037 1.1 christos 4038 1.1 christos mov x5, x9 4039 1.1 christos ld1 { v0.16b}, [x16] //CTR block 0 4040 1.1 christos ld1 { v19.16b}, [x3] 4041 1.1 christos 4042 1.1 christos mov x15, #0x100000000 //set up counter increment 4043 1.1 christos movi v31.16b, #0x0 4044 1.1 christos mov v31.d[1], x15 4045 1.1 christos 4046 1.1 christos rev32 v30.16b, v0.16b //set up reversed counter 4047 1.1 christos 4048 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 0 4049 1.1 christos 4050 1.1 christos rev32 v1.16b, v30.16b //CTR block 1 4051 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 1 4052 1.1 christos 4053 1.1 christos rev32 v2.16b, v30.16b //CTR block 2 4054 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 2 4055 1.1 christos 4056 1.1 christos rev32 v3.16b, v30.16b //CTR block 3 4057 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 3 4058 1.1 christos 4059 1.1 christos rev32 v4.16b, v30.16b //CTR block 4 4060 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 4 4061 1.1 christos 4062 1.1 christos rev32 v5.16b, v30.16b //CTR block 5 4063 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 5 4064 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 4065 1.1 christos 4066 1.1 christos rev32 v6.16b, v30.16b //CTR block 6 4067 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 6 4068 1.1 christos 4069 1.1 christos rev32 v7.16b, v30.16b //CTR block 7 4070 1.1 christos 4071 1.1 christos aese v3.16b, v26.16b 4072 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 0 4073 1.1 christos aese v6.16b, v26.16b 4074 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 0 4075 1.1 christos aese v5.16b, v26.16b 4076 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 0 4077 1.1 christos 4078 1.1 christos aese v0.16b, v26.16b 4079 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 0 4080 1.1 christos aese v1.16b, v26.16b 4081 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 0 4082 1.1 christos aese v7.16b, v26.16b 4083 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 0 4084 1.1 christos 4085 1.1 christos aese v2.16b, v26.16b 4086 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 0 4087 1.1 christos aese v4.16b, v26.16b 4088 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 0 4089 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 4090 1.1 christos 4091 1.1 christos aese v1.16b, v27.16b 4092 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 1 4093 1.1 christos 4094 1.1 christos aese v2.16b, v27.16b 4095 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 1 4096 1.1 christos 4097 1.1 christos aese v0.16b, v27.16b 4098 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 1 4099 1.1 christos aese v3.16b, v27.16b 4100 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 1 4101 1.1 christos aese v7.16b, v27.16b 4102 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 1 4103 1.1 christos 4104 1.1 christos aese v5.16b, v27.16b 4105 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 1 4106 1.1 christos aese v6.16b, v27.16b 4107 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 1 4108 1.1 christos 4109 1.1 christos aese v7.16b, v28.16b 4110 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 2 4111 1.1 christos aese v0.16b, v28.16b 4112 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 2 4113 1.1 christos aese v4.16b, v27.16b 4114 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 1 4115 1.1 christos 4116 1.1 christos aese v5.16b, v28.16b 4117 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 2 4118 1.1 christos aese v1.16b, v28.16b 4119 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 2 4120 1.1 christos aese v2.16b, v28.16b 4121 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 2 4122 1.1 christos 4123 1.1 christos aese v3.16b, v28.16b 4124 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 2 4125 1.1 christos aese v4.16b, v28.16b 4126 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 2 4127 1.1 christos aese v6.16b, v28.16b 4128 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 2 4129 1.1 christos 4130 1.1 christos aese v7.16b, v26.16b 4131 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 3 4132 1.1 christos 4133 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 4134 1.1 christos aese v2.16b, v26.16b 4135 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 3 4136 1.1 christos aese v5.16b, v26.16b 4137 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 3 4138 1.1 christos 4139 1.1 christos aese v0.16b, v26.16b 4140 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 3 4141 1.1 christos aese v3.16b, v26.16b 4142 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 3 4143 1.1 christos 4144 1.1 christos aese v4.16b, v26.16b 4145 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 3 4146 1.1 christos aese v1.16b, v26.16b 4147 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 3 4148 1.1 christos aese v6.16b, v26.16b 4149 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 3 4150 1.1 christos 4151 1.1 christos aese v3.16b, v27.16b 4152 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 4 4153 1.1 christos aese v2.16b, v27.16b 4154 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 4 4155 1.1 christos aese v5.16b, v27.16b 4156 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 4 4157 1.1 christos 4158 1.1 christos aese v1.16b, v27.16b 4159 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 4 4160 1.1 christos aese v7.16b, v27.16b 4161 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 4 4162 1.1 christos aese v6.16b, v27.16b 4163 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 4 4164 1.1 christos 4165 1.1 christos aese v0.16b, v27.16b 4166 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 4 4167 1.1 christos aese v5.16b, v28.16b 4168 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 5 4169 1.1 christos aese v4.16b, v27.16b 4170 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 4 4171 1.1 christos 4172 1.1 christos aese v6.16b, v28.16b 4173 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 5 4174 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 4175 1.1 christos 4176 1.1 christos aese v0.16b, v28.16b 4177 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 5 4178 1.1 christos aese v4.16b, v28.16b 4179 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 5 4180 1.1 christos aese v1.16b, v28.16b 4181 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 5 4182 1.1 christos 4183 1.1 christos aese v3.16b, v28.16b 4184 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 5 4185 1.1 christos aese v2.16b, v28.16b 4186 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 5 4187 1.1 christos aese v7.16b, v28.16b 4188 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 5 4189 1.1 christos 4190 1.1 christos sub x5, x5, #1 //byte_len - 1 4191 1.1 christos 4192 1.1 christos aese v4.16b, v26.16b 4193 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 6 4194 1.1 christos aese v5.16b, v26.16b 4195 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 6 4196 1.1 christos aese v1.16b, v26.16b 4197 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 6 4198 1.1 christos 4199 1.1 christos aese v0.16b, v26.16b 4200 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 6 4201 1.1 christos aese v3.16b, v26.16b 4202 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 6 4203 1.1 christos aese v6.16b, v26.16b 4204 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 6 4205 1.1 christos 4206 1.1 christos aese v7.16b, v26.16b 4207 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 6 4208 1.1 christos aese v2.16b, v26.16b 4209 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 6 4210 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 4211 1.1 christos 4212 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 7 4213 1.1 christos 4214 1.1 christos aese v3.16b, v27.16b 4215 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 7 4216 1.1 christos aese v7.16b, v27.16b 4217 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 7 4218 1.1 christos 4219 1.1 christos aese v2.16b, v27.16b 4220 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 7 4221 1.1 christos aese v1.16b, v27.16b 4222 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 7 4223 1.1 christos aese v4.16b, v27.16b 4224 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 7 4225 1.1 christos 4226 1.1 christos aese v6.16b, v27.16b 4227 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 7 4228 1.1 christos aese v0.16b, v27.16b 4229 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 7 4230 1.1 christos aese v5.16b, v27.16b 4231 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 7 4232 1.1 christos 4233 1.1 christos aese v1.16b, v28.16b 4234 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 8 4235 1.1 christos aese v2.16b, v28.16b 4236 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 8 4237 1.1 christos and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 4238 1.1 christos 4239 1.1 christos aese v7.16b, v28.16b 4240 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 8 4241 1.1 christos aese v6.16b, v28.16b 4242 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 8 4243 1.1 christos aese v5.16b, v28.16b 4244 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 8 4245 1.1 christos 4246 1.1 christos aese v4.16b, v28.16b 4247 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 8 4248 1.1 christos aese v3.16b, v28.16b 4249 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 8 4250 1.1 christos aese v0.16b, v28.16b 4251 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 8 4252 1.1 christos 4253 1.1 christos add x4, x0, x1, lsr #3 //end_input_ptr 4254 1.1 christos aese v6.16b, v26.16b 4255 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 9 4256 1.1 christos 4257 1.1 christos ld1 { v19.16b}, [x3] 4258 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 4259 1.1 christos rev64 v19.16b, v19.16b 4260 1.1 christos 4261 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 4262 1.1 christos 4263 1.1 christos aese v0.16b, v26.16b 4264 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 9 4265 1.1 christos add x5, x5, x0 4266 1.1 christos 4267 1.1 christos aese v1.16b, v26.16b 4268 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 9 4269 1.1 christos aese v7.16b, v26.16b 4270 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 9 4271 1.1 christos aese v4.16b, v26.16b 4272 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 9 4273 1.1 christos 4274 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 4275 1.1 christos aese v3.16b, v26.16b 4276 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 9 4277 1.1 christos 4278 1.1 christos aese v5.16b, v26.16b 4279 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 9 4280 1.1 christos aese v2.16b, v26.16b 4281 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 9 4282 1.1 christos 4283 1.1 christos aese v3.16b, v27.16b 4284 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 10 4285 1.1 christos aese v1.16b, v27.16b 4286 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 10 4287 1.1 christos aese v7.16b, v27.16b 4288 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 10 4289 1.1 christos 4290 1.1 christos aese v4.16b, v27.16b 4291 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 10 4292 1.1 christos aese v0.16b, v27.16b 4293 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 10 4294 1.1 christos aese v2.16b, v27.16b 4295 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 10 4296 1.1 christos 4297 1.1 christos aese v6.16b, v27.16b 4298 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 10 4299 1.1 christos aese v5.16b, v27.16b 4300 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 10 4301 1.1 christos ldr q26, [x8, #192] //load rk12 4302 1.1 christos 4303 1.1 christos aese v0.16b, v28.16b //AES block 0 - round 11 4304 1.1 christos aese v1.16b, v28.16b //AES block 1 - round 11 4305 1.1 christos aese v4.16b, v28.16b //AES block 4 - round 11 4306 1.1 christos 4307 1.1 christos aese v6.16b, v28.16b //AES block 6 - round 11 4308 1.1 christos aese v5.16b, v28.16b //AES block 5 - round 11 4309 1.1 christos aese v7.16b, v28.16b //AES block 7 - round 11 4310 1.1 christos 4311 1.1 christos aese v2.16b, v28.16b //AES block 2 - round 11 4312 1.1 christos aese v3.16b, v28.16b //AES block 3 - round 11 4313 1.1 christos b.ge .L192_dec_tail //handle tail 4314 1.1 christos 4315 1.1 christos ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext 4316 1.1 christos 4317 1.1 christos ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext 4318 1.1 christos 4319 1.1 christos ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext 4320 1.1 christos 4321 1.1 christos .inst 0xce016921 //eor3 v1.16b, v9.16b, v1.16b, v26.16b //AES block 1 - result 4322 1.1 christos .inst 0xce006900 //eor3 v0.16b, v8.16b, v0.16b, v26.16b //AES block 0 - result 4323 1.1 christos stp q0, q1, [x2], #32 //AES block 0, 1 - store result 4324 1.1 christos 4325 1.1 christos rev32 v0.16b, v30.16b //CTR block 8 4326 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8 4327 1.1 christos 4328 1.1 christos rev32 v1.16b, v30.16b //CTR block 9 4329 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 9 4330 1.1 christos .inst 0xce036963 //eor3 v3.16b, v11.16b, v3.16b, v26.16b //AES block 3 - result 4331 1.1 christos 4332 1.1 christos .inst 0xce026942 //eor3 v2.16b, v10.16b, v2.16b, v26.16b //AES block 2 - result 4333 1.1 christos stp q2, q3, [x2], #32 //AES block 2, 3 - store result 4334 1.1 christos ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext 4335 1.1 christos 4336 1.1 christos rev32 v2.16b, v30.16b //CTR block 10 4337 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 10 4338 1.1 christos 4339 1.1 christos .inst 0xce046984 //eor3 v4.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result 4340 1.1 christos 4341 1.1 christos rev32 v3.16b, v30.16b //CTR block 11 4342 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 11 4343 1.1 christos 4344 1.1 christos .inst 0xce0569a5 //eor3 v5.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result 4345 1.1 christos stp q4, q5, [x2], #32 //AES block 4, 5 - store result 4346 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 4347 1.1 christos 4348 1.1 christos .inst 0xce0669c6 //eor3 v6.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result 4349 1.1 christos .inst 0xce0769e7 //eor3 v7.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result 4350 1.1 christos rev32 v4.16b, v30.16b //CTR block 12 4351 1.1 christos 4352 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 12 4353 1.1 christos stp q6, q7, [x2], #32 //AES block 6, 7 - store result 4354 1.1 christos b.ge .L192_dec_prepretail //do prepretail 4355 1.1 christos 4356 1.1 christos .L192_dec_main_loop: //main loop start 4357 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 4358 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 4359 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 4360 1.1 christos 4361 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 4362 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 4363 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 4364 1.1 christos 4365 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 4366 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 4367 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 4368 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 4369 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 4370 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 4371 1.1 christos 4372 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 4373 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 4374 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 4375 1.1 christos 4376 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 4377 1.1 christos 4378 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 4379 1.1 christos aese v1.16b, v26.16b 4380 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 4381 1.1 christos aese v6.16b, v26.16b 4382 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 4383 1.1 christos 4384 1.1 christos aese v5.16b, v26.16b 4385 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 4386 1.1 christos aese v4.16b, v26.16b 4387 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 4388 1.1 christos aese v0.16b, v26.16b 4389 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 4390 1.1 christos 4391 1.1 christos aese v7.16b, v26.16b 4392 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 4393 1.1 christos aese v2.16b, v26.16b 4394 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 4395 1.1 christos aese v3.16b, v26.16b 4396 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 4397 1.1 christos 4398 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 4399 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 4400 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 4401 1.1 christos 4402 1.1 christos aese v6.16b, v27.16b 4403 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 4404 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 4405 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 4406 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 4407 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 4408 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 4409 1.1 christos 4410 1.1 christos aese v0.16b, v27.16b 4411 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 4412 1.1 christos aese v3.16b, v27.16b 4413 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 4414 1.1 christos aese v7.16b, v27.16b 4415 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 4416 1.1 christos 4417 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 4418 1.1 christos aese v2.16b, v27.16b 4419 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 4420 1.1 christos aese v4.16b, v27.16b 4421 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 4422 1.1 christos 4423 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4424 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 4425 1.1 christos aese v1.16b, v27.16b 4426 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 4427 1.1 christos 4428 1.1 christos aese v5.16b, v27.16b 4429 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 4430 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 4431 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 4432 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4433 1.1 christos 4434 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 4435 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 4436 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 4437 1.1 christos 4438 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 4439 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 4440 1.1 christos aese v6.16b, v28.16b 4441 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 4442 1.1 christos 4443 1.1 christos aese v2.16b, v28.16b 4444 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 4445 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 4446 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 4447 1.1 christos 4448 1.1 christos aese v1.16b, v28.16b 4449 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 4450 1.1 christos aese v6.16b, v26.16b 4451 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 4452 1.1 christos aese v4.16b, v28.16b 4453 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 4454 1.1 christos 4455 1.1 christos aese v0.16b, v28.16b 4456 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 4457 1.1 christos aese v7.16b, v28.16b 4458 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 4459 1.1 christos aese v3.16b, v28.16b 4460 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 4461 1.1 christos 4462 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 4463 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 4464 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 4465 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 4466 1.1 christos aese v5.16b, v28.16b 4467 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 4468 1.1 christos aese v2.16b, v26.16b 4469 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 4470 1.1 christos 4471 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 4472 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4473 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4474 1.1 christos 4475 1.1 christos aese v3.16b, v26.16b 4476 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 4477 1.1 christos aese v4.16b, v26.16b 4478 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 4479 1.1 christos 4480 1.1 christos aese v0.16b, v26.16b 4481 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 4482 1.1 christos aese v7.16b, v26.16b 4483 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 4484 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 4485 1.1 christos 4486 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4487 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 4488 1.1 christos aese v1.16b, v26.16b 4489 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 4490 1.1 christos 4491 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4492 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 4493 1.1 christos 4494 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 4495 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 4496 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 4497 1.1 christos 4498 1.1 christos aese v5.16b, v26.16b 4499 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 4500 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 4501 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 4502 1.1 christos 4503 1.1 christos aese v4.16b, v27.16b 4504 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 4505 1.1 christos aese v6.16b, v27.16b 4506 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 4507 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 4508 1.1 christos 4509 1.1 christos aese v5.16b, v27.16b 4510 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 4511 1.1 christos aese v1.16b, v27.16b 4512 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 4513 1.1 christos aese v3.16b, v27.16b 4514 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 4515 1.1 christos 4516 1.1 christos aese v2.16b, v27.16b 4517 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 4518 1.1 christos aese v0.16b, v27.16b 4519 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 4520 1.1 christos aese v7.16b, v27.16b 4521 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 4522 1.1 christos 4523 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 4524 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 4525 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 4526 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 4527 1.1 christos aese v3.16b, v28.16b 4528 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 4529 1.1 christos aese v5.16b, v28.16b 4530 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 4531 1.1 christos 4532 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 4533 1.1 christos aese v7.16b, v28.16b 4534 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 4535 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 4536 1.1 christos 4537 1.1 christos aese v4.16b, v28.16b 4538 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 4539 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4540 1.1 christos aese v1.16b, v28.16b 4541 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 4542 1.1 christos 4543 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 4544 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4545 1.1 christos aese v2.16b, v28.16b 4546 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 4547 1.1 christos 4548 1.1 christos aese v6.16b, v28.16b 4549 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 4550 1.1 christos aese v0.16b, v28.16b 4551 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 4552 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 4553 1.1 christos 4554 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 4555 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 4556 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 4557 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 4558 1.1 christos 4559 1.1 christos aese v0.16b, v26.16b 4560 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 4561 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4562 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4563 1.1 christos 4564 1.1 christos aese v7.16b, v26.16b 4565 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 4566 1.1 christos aese v2.16b, v26.16b 4567 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 4568 1.1 christos aese v6.16b, v26.16b 4569 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 4570 1.1 christos 4571 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 4572 1.1 christos aese v3.16b, v26.16b 4573 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 4574 1.1 christos aese v1.16b, v26.16b 4575 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 4576 1.1 christos 4577 1.1 christos aese v2.16b, v27.16b 4578 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 4579 1.1 christos aese v6.16b, v27.16b 4580 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 4581 1.1 christos aese v5.16b, v26.16b 4582 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 4583 1.1 christos 4584 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 4585 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 4586 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 4587 1.1 christos 4588 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 4589 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4590 1.1 christos aese v4.16b, v26.16b 4591 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 4592 1.1 christos 4593 1.1 christos aese v5.16b, v27.16b 4594 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 4595 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 4596 1.1 christos aese v3.16b, v27.16b 4597 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 4598 1.1 christos 4599 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 4600 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 4601 1.1 christos aese v1.16b, v27.16b 4602 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 4603 1.1 christos 4604 1.1 christos aese v4.16b, v27.16b 4605 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 4606 1.1 christos aese v0.16b, v27.16b 4607 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 4608 1.1 christos aese v7.16b, v27.16b 4609 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 4610 1.1 christos 4611 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4612 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 4613 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 4614 1.1 christos 4615 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 4616 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 4617 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 4618 1.1 christos 4619 1.1 christos aese v2.16b, v28.16b 4620 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 4621 1.1 christos aese v5.16b, v28.16b 4622 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 4623 1.1 christos aese v7.16b, v28.16b 4624 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 4625 1.1 christos 4626 1.1 christos aese v0.16b, v28.16b 4627 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 4628 1.1 christos aese v3.16b, v28.16b 4629 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 4630 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 4631 1.1 christos 4632 1.1 christos aese v4.16b, v28.16b 4633 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 4634 1.1 christos aese v1.16b, v28.16b 4635 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 4636 1.1 christos aese v6.16b, v28.16b 4637 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 4638 1.1 christos 4639 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 4640 1.1 christos rev32 v20.16b, v30.16b //CTR block 8k+16 4641 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+16 4642 1.1 christos 4643 1.1 christos aese v5.16b, v26.16b 4644 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 4645 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 4646 1.1 christos aese v1.16b, v26.16b 4647 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 4648 1.1 christos 4649 1.1 christos aese v3.16b, v26.16b 4650 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 4651 1.1 christos aese v7.16b, v26.16b 4652 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 4653 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 4654 1.1 christos 4655 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 4656 1.1 christos ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext 4657 1.1 christos 4658 1.1 christos aese v2.16b, v26.16b 4659 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 4660 1.1 christos aese v0.16b, v26.16b 4661 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 4662 1.1 christos ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext 4663 1.1 christos 4664 1.1 christos rev32 v22.16b, v30.16b //CTR block 8k+17 4665 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 4666 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+17 4667 1.1 christos 4668 1.1 christos aese v6.16b, v26.16b 4669 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 4670 1.1 christos aese v4.16b, v26.16b 4671 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 4672 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 4673 1.1 christos 4674 1.1 christos aese v3.16b, v27.16b 4675 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 4676 1.1 christos aese v7.16b, v27.16b 4677 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 4678 1.1 christos ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext 4679 1.1 christos 4680 1.1 christos rev32 v23.16b, v30.16b //CTR block 8k+18 4681 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+18 4682 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 4683 1.1 christos 4684 1.1 christos aese v0.16b, v27.16b 4685 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 4686 1.1 christos aese v1.16b, v27.16b 4687 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 4688 1.1 christos ldr q26, [x8, #192] //load rk12 4689 1.1 christos 4690 1.1 christos ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext 4691 1.1 christos aese v4.16b, v27.16b 4692 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 4693 1.1 christos aese v6.16b, v27.16b 4694 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 4695 1.1 christos 4696 1.1 christos aese v0.16b, v28.16b //AES block 8k+8 - round 11 4697 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 4698 1.1 christos aese v1.16b, v28.16b //AES block 8k+9 - round 11 4699 1.1 christos 4700 1.1 christos aese v2.16b, v27.16b 4701 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 4702 1.1 christos aese v6.16b, v28.16b //AES block 8k+14 - round 11 4703 1.1 christos aese v3.16b, v28.16b //AES block 8k+11 - round 11 4704 1.1 christos 4705 1.1 christos .inst 0xce006900 //eor3 v0.16b, v8.16b, v0.16b, v26.16b //AES block 8k+8 - result 4706 1.1 christos rev32 v25.16b, v30.16b //CTR block 8k+19 4707 1.1 christos aese v5.16b, v27.16b 4708 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 4709 1.1 christos 4710 1.1 christos aese v4.16b, v28.16b //AES block 8k+12 - round 11 4711 1.1 christos aese v2.16b, v28.16b //AES block 8k+10 - round 11 4712 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+19 4713 1.1 christos 4714 1.1 christos aese v7.16b, v28.16b //AES block 8k+15 - round 11 4715 1.1 christos aese v5.16b, v28.16b //AES block 8k+13 - round 11 4716 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 4717 1.1 christos 4718 1.1 christos .inst 0xce016921 //eor3 v1.16b, v9.16b, v1.16b, v26.16b //AES block 8k+9 - result 4719 1.1 christos stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result 4720 1.1 christos .inst 0xce036963 //eor3 v3.16b, v11.16b, v3.16b, v26.16b //AES block 8k+11 - result 4721 1.1 christos 4722 1.1 christos .inst 0xce026942 //eor3 v2.16b, v10.16b, v2.16b, v26.16b //AES block 8k+10 - result 4723 1.1 christos .inst 0xce0769e7 //eor3 v7.16b, v15.16b, v7.16b, v26.16b //AES block 8k+15 - result 4724 1.1 christos stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result 4725 1.1 christos 4726 1.1 christos .inst 0xce0569a5 //eor3 v5.16b, v13.16b, v5.16b, v26.16b //AES block 8k+13 - result 4727 1.1 christos .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 4728 1.1 christos mov v3.16b, v25.16b //CTR block 8k+19 4729 1.1 christos 4730 1.1 christos .inst 0xce046984 //eor3 v4.16b, v12.16b, v4.16b, v26.16b //AES block 8k+12 - result 4731 1.1 christos stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result 4732 1.1 christos cmp x0, x5 //.LOOP CONTROL 4733 1.1 christos 4734 1.1 christos .inst 0xce0669c6 //eor3 v6.16b, v14.16b, v6.16b, v26.16b //AES block 8k+14 - result 4735 1.1 christos stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result 4736 1.1 christos mov v0.16b, v20.16b //CTR block 8k+16 4737 1.1 christos 4738 1.1 christos mov v1.16b, v22.16b //CTR block 8k+17 4739 1.1 christos mov v2.16b, v23.16b //CTR block 8k+18 4740 1.1 christos 4741 1.1 christos rev32 v4.16b, v30.16b //CTR block 8k+20 4742 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+20 4743 1.1 christos b.lt .L192_dec_main_loop 4744 1.1 christos 4745 1.1 christos .L192_dec_prepretail: //PREPRETAIL 4746 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 4747 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 4748 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 4749 1.1 christos 4750 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 4751 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 4752 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 4753 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 4754 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 4755 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 4756 1.1 christos 4757 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 4758 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 4759 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 4760 1.1 christos 4761 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 4762 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 4763 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 4764 1.1 christos 4765 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 4766 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 4767 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 4768 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 4769 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 4770 1.1 christos 4771 1.1 christos aese v0.16b, v26.16b 4772 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 4773 1.1 christos aese v6.16b, v26.16b 4774 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 4775 1.1 christos aese v5.16b, v26.16b 4776 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 4777 1.1 christos 4778 1.1 christos aese v3.16b, v26.16b 4779 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 4780 1.1 christos aese v2.16b, v26.16b 4781 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 4782 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 4783 1.1 christos 4784 1.1 christos aese v4.16b, v26.16b 4785 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 4786 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 4787 1.1 christos aese v1.16b, v26.16b 4788 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 4789 1.1 christos 4790 1.1 christos aese v6.16b, v27.16b 4791 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 4792 1.1 christos aese v7.16b, v26.16b 4793 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 4794 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 4795 1.1 christos 4796 1.1 christos aese v4.16b, v27.16b 4797 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 4798 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 4799 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 4800 1.1 christos 4801 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 4802 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 4803 1.1 christos aese v3.16b, v27.16b 4804 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 4805 1.1 christos 4806 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 4807 1.1 christos aese v7.16b, v27.16b 4808 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 4809 1.1 christos aese v0.16b, v27.16b 4810 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 4811 1.1 christos 4812 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4813 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4814 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 4815 1.1 christos 4816 1.1 christos aese v2.16b, v27.16b 4817 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 4818 1.1 christos aese v1.16b, v27.16b 4819 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 4820 1.1 christos aese v5.16b, v27.16b 4821 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 4822 1.1 christos 4823 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 4824 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 4825 1.1 christos aese v3.16b, v28.16b 4826 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 4827 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 4828 1.1 christos 4829 1.1 christos aese v6.16b, v28.16b 4830 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 4831 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 4832 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 4833 1.1 christos 4834 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 4835 1.1 christos aese v4.16b, v28.16b 4836 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 4837 1.1 christos aese v5.16b, v28.16b 4838 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 4839 1.1 christos 4840 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4841 1.1 christos aese v3.16b, v26.16b 4842 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 4843 1.1 christos aese v7.16b, v28.16b 4844 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 4845 1.1 christos 4846 1.1 christos aese v0.16b, v28.16b 4847 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 4848 1.1 christos aese v2.16b, v28.16b 4849 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 4850 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4851 1.1 christos 4852 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 4853 1.1 christos aese v1.16b, v28.16b 4854 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 4855 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 4856 1.1 christos 4857 1.1 christos aese v5.16b, v26.16b 4858 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 4859 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4860 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 4861 1.1 christos 4862 1.1 christos aese v7.16b, v26.16b 4863 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 4864 1.1 christos aese v6.16b, v26.16b 4865 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 4866 1.1 christos aese v4.16b, v26.16b 4867 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 4868 1.1 christos 4869 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 4870 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 4871 1.1 christos aese v0.16b, v26.16b 4872 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 4873 1.1 christos 4874 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 4875 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 4876 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 4877 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 4878 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 4879 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 4880 1.1 christos 4881 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 4882 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 4883 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 4884 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 4885 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 4886 1.1 christos aese v2.16b, v26.16b 4887 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 4888 1.1 christos 4889 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 4890 1.1 christos 4891 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4892 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 4893 1.1 christos 4894 1.1 christos aese v5.16b, v27.16b 4895 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 4896 1.1 christos aese v4.16b, v27.16b 4897 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 4898 1.1 christos aese v1.16b, v26.16b 4899 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 4900 1.1 christos 4901 1.1 christos aese v2.16b, v27.16b 4902 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 4903 1.1 christos aese v0.16b, v27.16b 4904 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 4905 1.1 christos aese v3.16b, v27.16b 4906 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 4907 1.1 christos 4908 1.1 christos aese v1.16b, v27.16b 4909 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 4910 1.1 christos aese v6.16b, v27.16b 4911 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 4912 1.1 christos aese v7.16b, v27.16b 4913 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 4914 1.1 christos 4915 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 4916 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 4917 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 4918 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4919 1.1 christos 4920 1.1 christos aese v7.16b, v28.16b 4921 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 4922 1.1 christos aese v1.16b, v28.16b 4923 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 4924 1.1 christos aese v2.16b, v28.16b 4925 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 4926 1.1 christos 4927 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 4928 1.1 christos aese v6.16b, v28.16b 4929 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 4930 1.1 christos aese v5.16b, v28.16b 4931 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 4932 1.1 christos 4933 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 4934 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 4935 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 4936 1.1 christos 4937 1.1 christos aese v4.16b, v28.16b 4938 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 4939 1.1 christos 4940 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 4941 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4942 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 4943 1.1 christos 4944 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 4945 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4946 1.1 christos aese v0.16b, v28.16b 4947 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 4948 1.1 christos 4949 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4950 1.1 christos aese v3.16b, v28.16b 4951 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 4952 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4953 1.1 christos 4954 1.1 christos aese v4.16b, v26.16b 4955 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 4956 1.1 christos aese v2.16b, v26.16b 4957 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 4958 1.1 christos 4959 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 4960 1.1 christos aese v1.16b, v26.16b 4961 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 4962 1.1 christos aese v7.16b, v26.16b 4963 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 4964 1.1 christos 4965 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 4966 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 4967 1.1 christos aese v0.16b, v26.16b 4968 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 4969 1.1 christos 4970 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 4971 1.1 christos aese v5.16b, v26.16b 4972 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 4973 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 4974 1.1 christos 4975 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4976 1.1 christos aese v4.16b, v27.16b 4977 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 4978 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 4979 1.1 christos 4980 1.1 christos aese v3.16b, v26.16b 4981 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 4982 1.1 christos aese v6.16b, v26.16b 4983 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 4984 1.1 christos aese v5.16b, v27.16b 4985 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 4986 1.1 christos 4987 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 4988 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 4989 1.1 christos aese v2.16b, v27.16b 4990 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 4991 1.1 christos 4992 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 4993 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 4994 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 4995 1.1 christos 4996 1.1 christos aese v1.16b, v27.16b 4997 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 4998 1.1 christos aese v7.16b, v27.16b 4999 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 5000 1.1 christos aese v6.16b, v27.16b 5001 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 5002 1.1 christos 5003 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 5004 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 5005 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 5006 1.1 christos 5007 1.1 christos aese v0.16b, v27.16b 5008 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 5009 1.1 christos aese v3.16b, v27.16b 5010 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 5011 1.1 christos 5012 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 5013 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 5014 1.1 christos aese v2.16b, v28.16b 5015 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 5016 1.1 christos 5017 1.1 christos aese v6.16b, v28.16b 5018 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 5019 1.1 christos aese v7.16b, v28.16b 5020 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 5021 1.1 christos aese v1.16b, v28.16b 5022 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 5023 1.1 christos 5024 1.1 christos aese v3.16b, v28.16b 5025 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 5026 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 5027 1.1 christos aese v0.16b, v28.16b 5028 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 5029 1.1 christos 5030 1.1 christos aese v5.16b, v28.16b 5031 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 5032 1.1 christos aese v4.16b, v28.16b 5033 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 5034 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 5035 1.1 christos 5036 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 5037 1.1 christos aese v7.16b, v26.16b 5038 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 5039 1.1 christos aese v6.16b, v26.16b 5040 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 5041 1.1 christos 5042 1.1 christos aese v5.16b, v26.16b 5043 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 5044 1.1 christos aese v2.16b, v26.16b 5045 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 5046 1.1 christos aese v3.16b, v26.16b 5047 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 5048 1.1 christos 5049 1.1 christos aese v0.16b, v26.16b 5050 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 5051 1.1 christos aese v1.16b, v26.16b 5052 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 5053 1.1 christos aese v4.16b, v26.16b 5054 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 5055 1.1 christos 5056 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 5057 1.1 christos ldr q26, [x8, #192] //load rk12 5058 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 5059 1.1 christos 5060 1.1 christos aese v2.16b, v27.16b 5061 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 5062 1.1 christos aese v5.16b, v27.16b 5063 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 5064 1.1 christos aese v0.16b, v27.16b 5065 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 5066 1.1 christos 5067 1.1 christos aese v4.16b, v27.16b 5068 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 5069 1.1 christos aese v6.16b, v27.16b 5070 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 5071 1.1 christos aese v7.16b, v27.16b 5072 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 5073 1.1 christos 5074 1.1 christos aese v0.16b, v28.16b //AES block 8k+8 - round 11 5075 1.1 christos .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 5076 1.1 christos aese v5.16b, v28.16b //AES block 8k+13 - round 11 5077 1.1 christos 5078 1.1 christos aese v2.16b, v28.16b //AES block 8k+10 - round 11 5079 1.1 christos aese v3.16b, v27.16b 5080 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 5081 1.1 christos aese v1.16b, v27.16b 5082 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 5083 1.1 christos 5084 1.1 christos aese v6.16b, v28.16b //AES block 8k+14 - round 11 5085 1.1 christos aese v4.16b, v28.16b //AES block 8k+12 - round 11 5086 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 5087 1.1 christos 5088 1.1 christos aese v3.16b, v28.16b //AES block 8k+11 - round 11 5089 1.1 christos aese v1.16b, v28.16b //AES block 8k+9 - round 11 5090 1.1 christos aese v7.16b, v28.16b //AES block 8k+15 - round 11 5091 1.1 christos 5092 1.1 christos .L192_dec_tail: //TAIL 5093 1.1 christos 5094 1.1 christos sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 5095 1.1 christos 5096 1.1 christos ldp q20, q21, [x3, #128] //load h5l | h5h 5097 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 5098 1.1 christos ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext 5099 1.1 christos 5100 1.1 christos ldp q24, q25, [x3, #192] //load h8k | h7k 5101 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 5102 1.1 christos 5103 1.1 christos mov v29.16b, v26.16b 5104 1.1 christos 5105 1.1 christos ldp q22, q23, [x3, #160] //load h6l | h6h 5106 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 5107 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 5108 1.1 christos ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 5109 1.1 christos 5110 1.1 christos .inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result 5111 1.1 christos cmp x5, #112 5112 1.1 christos b.gt .L192_dec_blocks_more_than_7 5113 1.1 christos 5114 1.1 christos mov v7.16b, v6.16b 5115 1.1 christos movi v17.8b, #0 5116 1.1 christos sub v30.4s, v30.4s, v31.4s 5117 1.1 christos 5118 1.1 christos mov v6.16b, v5.16b 5119 1.1 christos mov v5.16b, v4.16b 5120 1.1 christos mov v4.16b, v3.16b 5121 1.1 christos 5122 1.1 christos cmp x5, #96 5123 1.1 christos movi v19.8b, #0 5124 1.1 christos mov v3.16b, v2.16b 5125 1.1 christos 5126 1.1 christos mov v2.16b, v1.16b 5127 1.1 christos movi v18.8b, #0 5128 1.1 christos b.gt .L192_dec_blocks_more_than_6 5129 1.1 christos 5130 1.1 christos mov v7.16b, v6.16b 5131 1.1 christos mov v6.16b, v5.16b 5132 1.1 christos mov v5.16b, v4.16b 5133 1.1 christos 5134 1.1 christos mov v4.16b, v3.16b 5135 1.1 christos mov v3.16b, v1.16b 5136 1.1 christos 5137 1.1 christos sub v30.4s, v30.4s, v31.4s 5138 1.1 christos cmp x5, #80 5139 1.1 christos b.gt .L192_dec_blocks_more_than_5 5140 1.1 christos 5141 1.1 christos mov v7.16b, v6.16b 5142 1.1 christos mov v6.16b, v5.16b 5143 1.1 christos 5144 1.1 christos mov v5.16b, v4.16b 5145 1.1 christos mov v4.16b, v1.16b 5146 1.1 christos cmp x5, #64 5147 1.1 christos 5148 1.1 christos sub v30.4s, v30.4s, v31.4s 5149 1.1 christos b.gt .L192_dec_blocks_more_than_4 5150 1.1 christos 5151 1.1 christos sub v30.4s, v30.4s, v31.4s 5152 1.1 christos mov v7.16b, v6.16b 5153 1.1 christos mov v6.16b, v5.16b 5154 1.1 christos 5155 1.1 christos mov v5.16b, v1.16b 5156 1.1 christos cmp x5, #48 5157 1.1 christos b.gt .L192_dec_blocks_more_than_3 5158 1.1 christos 5159 1.1 christos sub v30.4s, v30.4s, v31.4s 5160 1.1 christos mov v7.16b, v6.16b 5161 1.1 christos cmp x5, #32 5162 1.1 christos 5163 1.1 christos mov v6.16b, v1.16b 5164 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 5165 1.1 christos b.gt .L192_dec_blocks_more_than_2 5166 1.1 christos 5167 1.1 christos sub v30.4s, v30.4s, v31.4s 5168 1.1 christos 5169 1.1 christos mov v7.16b, v1.16b 5170 1.1 christos cmp x5, #16 5171 1.1 christos b.gt .L192_dec_blocks_more_than_1 5172 1.1 christos 5173 1.1 christos sub v30.4s, v30.4s, v31.4s 5174 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 5175 1.1 christos b .L192_dec_blocks_less_than_1 5176 1.1 christos .L192_dec_blocks_more_than_7: //blocks left > 7 5177 1.1 christos rev64 v8.16b, v9.16b //GHASH final-7 block 5178 1.1 christos 5179 1.1 christos ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 5180 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 5181 1.1 christos 5182 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 5183 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 5184 1.1 christos ldr q9, [x0], #16 //AES final-6 block - load ciphertext 5185 1.1 christos 5186 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 5187 1.1 christos 5188 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 5189 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-7 block - store result 5190 1.1 christos 5191 1.1 christos .inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 5192 1.1 christos 5193 1.1 christos pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 5194 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 5195 1.1 christos .L192_dec_blocks_more_than_6: //blocks left > 6 5196 1.1 christos 5197 1.1 christos rev64 v8.16b, v9.16b //GHASH final-6 block 5198 1.1 christos 5199 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 5200 1.1 christos 5201 1.1 christos ldr q9, [x0], #16 //AES final-5 block - load ciphertext 5202 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 5203 1.1 christos 5204 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 5205 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 5206 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 5207 1.1 christos 5208 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-6 block - store result 5209 1.1 christos .inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 5210 1.1 christos 5211 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 5212 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 5213 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 5214 1.1 christos 5215 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 5216 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 5217 1.1 christos .L192_dec_blocks_more_than_5: //blocks left > 5 5218 1.1 christos 5219 1.1 christos rev64 v8.16b, v9.16b //GHASH final-5 block 5220 1.1 christos 5221 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 5222 1.1 christos 5223 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 5224 1.1 christos 5225 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 5226 1.1 christos 5227 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 5228 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 5229 1.1 christos 5230 1.1 christos ldr q9, [x0], #16 //AES final-4 block - load ciphertext 5231 1.1 christos 5232 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 5233 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 5234 1.1 christos 5235 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 5236 1.1 christos 5237 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 5238 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 5239 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-5 block - store result 5240 1.1 christos 5241 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 5242 1.1 christos .inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 5243 1.1 christos .L192_dec_blocks_more_than_4: //blocks left > 4 5244 1.1 christos 5245 1.1 christos rev64 v8.16b, v9.16b //GHASH final-4 block 5246 1.1 christos 5247 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 5248 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 5249 1.1 christos 5250 1.1 christos ldr q9, [x0], #16 //AES final-3 block - load ciphertext 5251 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 5252 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 5253 1.1 christos 5254 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 5255 1.1 christos 5256 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 5257 1.1 christos 5258 1.1 christos pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 5259 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-4 block - store result 5260 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 5261 1.1 christos 5262 1.1 christos .inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 5263 1.1 christos 5264 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 5265 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 5266 1.1 christos .L192_dec_blocks_more_than_3: //blocks left > 3 5267 1.1 christos 5268 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 5269 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 5270 1.1 christos rev64 v8.16b, v9.16b //GHASH final-3 block 5271 1.1 christos ldr q9, [x0], #16 //AES final-2 block - load ciphertext 5272 1.1 christos 5273 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 5274 1.1 christos 5275 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 5276 1.1 christos pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 5277 1.1 christos 5278 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 5279 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 5280 1.1 christos pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 5281 1.1 christos 5282 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-3 block - store result 5283 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 5284 1.1 christos .inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 5285 1.1 christos 5286 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 5287 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 5288 1.1 christos 5289 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 5290 1.1 christos 5291 1.1 christos pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 5292 1.1 christos 5293 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 5294 1.1 christos .L192_dec_blocks_more_than_2: //blocks left > 2 5295 1.1 christos 5296 1.1 christos rev64 v8.16b, v9.16b //GHASH final-2 block 5297 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 5298 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 5299 1.1 christos 5300 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 5301 1.1 christos 5302 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 5303 1.1 christos ldr q9, [x0], #16 //AES final-1 block - load ciphertext 5304 1.1 christos 5305 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 5306 1.1 christos 5307 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 5308 1.1 christos 5309 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 5310 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 5311 1.1 christos 5312 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 5313 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 5314 1.1 christos 5315 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 5316 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-2 block - store result 5317 1.1 christos 5318 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 5319 1.1 christos .inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 5320 1.1 christos .L192_dec_blocks_more_than_1: //blocks left > 1 5321 1.1 christos 5322 1.1 christos rev64 v8.16b, v9.16b //GHASH final-1 block 5323 1.1 christos ldr q9, [x0], #16 //AES final block - load ciphertext 5324 1.1 christos ldr q22, [x3, #64] //load h1l | h1h 5325 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 5326 1.1 christos 5327 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 5328 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 5329 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 5330 1.1 christos 5331 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 5332 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 5333 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-1 block - store result 5334 1.1 christos 5335 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 5336 1.1 christos 5337 1.1 christos .inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result 5338 1.1 christos 5339 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 5340 1.1 christos 5341 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 5342 1.1 christos 5343 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 5344 1.1 christos 5345 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 5346 1.1 christos 5347 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 5348 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 5349 1.1 christos .L192_dec_blocks_less_than_1: //blocks left <= 1 5350 1.1 christos 5351 1.1 christos rev32 v30.16b, v30.16b 5352 1.1 christos and x1, x1, #127 //bit_length %= 128 5353 1.1 christos 5354 1.1 christos sub x1, x1, #128 //bit_length -= 128 5355 1.1 christos str q30, [x16] //store the updated counter 5356 1.1 christos 5357 1.1 christos neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 5358 1.1 christos mvn x6, xzr //temp0_x = 0xffffffffffffffff 5359 1.1 christos 5360 1.1 christos and x1, x1, #127 //bit_length %= 128 5361 1.1 christos 5362 1.1 christos mvn x7, xzr //temp1_x = 0xffffffffffffffff 5363 1.1 christos lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 5364 1.1 christos cmp x1, #64 5365 1.1 christos 5366 1.1 christos csel x13, x7, x6, lt 5367 1.1 christos csel x14, x6, xzr, lt 5368 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 5369 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 5370 1.1 christos 5371 1.1 christos mov v0.d[1], x14 5372 1.1 christos ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 5373 1.1 christos 5374 1.1 christos mov v0.d[0], x13 //ctr0b is mask for last block 5375 1.1 christos 5376 1.1 christos and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 5377 1.1 christos bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 5378 1.1 christos 5379 1.1 christos rev64 v8.16b, v9.16b //GHASH final block 5380 1.1 christos 5381 1.1 christos st1 { v12.16b}, [x2] //store all 16B 5382 1.1 christos 5383 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 5384 1.1 christos 5385 1.1 christos ins v16.d[0], v8.d[1] //GHASH final block - mid 5386 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 5387 1.1 christos 5388 1.1 christos eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 5389 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 5390 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final block - low 5391 1.1 christos 5392 1.1 christos pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 5393 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final block - high 5394 1.1 christos 5395 1.1 christos eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 5396 1.1 christos eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 5397 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 5398 1.1 christos 5399 1.1 christos pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 5400 1.1 christos ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 5401 1.1 christos 5402 1.1 christos eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up 5403 1.1 christos 5404 1.1 christos .inst 0xce115652 //eor3 v18.16b, v18.16b, v17.16b, v21.16b //MODULO - fold into mid 5405 1.1 christos 5406 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 5407 1.1 christos ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 5408 1.1 christos 5409 1.1 christos .inst 0xce124673 //eor3 v19.16b, v19.16b, v18.16b, v17.16b //MODULO - fold into low 5410 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 5411 1.1 christos rev64 v19.16b, v19.16b 5412 1.1 christos st1 { v19.16b }, [x3] 5413 1.1 christos 5414 1.1 christos mov x0, x9 5415 1.1 christos 5416 1.1 christos ldp d10, d11, [sp, #16] 5417 1.1 christos ldp d12, d13, [sp, #32] 5418 1.1 christos ldp d14, d15, [sp, #48] 5419 1.1 christos ldp d8, d9, [sp], #80 5420 1.1 christos ret 5421 1.1 christos 5422 1.1 christos .L192_dec_ret: 5423 1.1 christos mov w0, #0x0 5424 1.1 christos ret 5425 1.1 christos .size unroll8_eor3_aes_gcm_dec_192_kernel,.-unroll8_eor3_aes_gcm_dec_192_kernel 5426 1.1 christos .globl unroll8_eor3_aes_gcm_enc_256_kernel 5427 1.1 christos .type unroll8_eor3_aes_gcm_enc_256_kernel,%function 5428 1.1 christos .align 4 5429 1.1 christos unroll8_eor3_aes_gcm_enc_256_kernel: 5430 1.1 christos AARCH64_VALID_CALL_TARGET 5431 1.1 christos cbz x1, .L256_enc_ret 5432 1.1 christos stp d8, d9, [sp, #-80]! 5433 1.1 christos lsr x9, x1, #3 5434 1.1 christos mov x16, x4 5435 1.1 christos mov x8, x5 5436 1.1 christos stp d10, d11, [sp, #16] 5437 1.1 christos stp d12, d13, [sp, #32] 5438 1.1 christos stp d14, d15, [sp, #48] 5439 1.1 christos mov x5, #0xc200000000000000 5440 1.1 christos stp x5, xzr, [sp, #64] 5441 1.1 christos add x10, sp, #64 5442 1.1 christos 5443 1.1 christos ld1 { v0.16b}, [x16] //CTR block 0 5444 1.1 christos 5445 1.1 christos mov x5, x9 5446 1.1 christos 5447 1.1 christos mov x15, #0x100000000 //set up counter increment 5448 1.1 christos movi v31.16b, #0x0 5449 1.1 christos mov v31.d[1], x15 5450 1.1 christos sub x5, x5, #1 //byte_len - 1 5451 1.1 christos 5452 1.1 christos and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 5453 1.1 christos 5454 1.1 christos add x5, x5, x0 5455 1.1 christos 5456 1.1 christos rev32 v30.16b, v0.16b //set up reversed counter 5457 1.1 christos 5458 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 0 5459 1.1 christos 5460 1.1 christos rev32 v1.16b, v30.16b //CTR block 1 5461 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 1 5462 1.1 christos 5463 1.1 christos rev32 v2.16b, v30.16b //CTR block 2 5464 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 2 5465 1.1 christos 5466 1.1 christos rev32 v3.16b, v30.16b //CTR block 3 5467 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 3 5468 1.1 christos 5469 1.1 christos rev32 v4.16b, v30.16b //CTR block 4 5470 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 4 5471 1.1 christos 5472 1.1 christos rev32 v5.16b, v30.16b //CTR block 5 5473 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 5 5474 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 5475 1.1 christos 5476 1.1 christos rev32 v6.16b, v30.16b //CTR block 6 5477 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 6 5478 1.1 christos 5479 1.1 christos rev32 v7.16b, v30.16b //CTR block 7 5480 1.1 christos 5481 1.1 christos aese v3.16b, v26.16b 5482 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 0 5483 1.1 christos aese v4.16b, v26.16b 5484 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 0 5485 1.1 christos aese v2.16b, v26.16b 5486 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 0 5487 1.1 christos 5488 1.1 christos aese v0.16b, v26.16b 5489 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 0 5490 1.1 christos aese v1.16b, v26.16b 5491 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 0 5492 1.1 christos aese v6.16b, v26.16b 5493 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 0 5494 1.1 christos 5495 1.1 christos aese v5.16b, v26.16b 5496 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 0 5497 1.1 christos aese v7.16b, v26.16b 5498 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 0 5499 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 5500 1.1 christos 5501 1.1 christos aese v4.16b, v27.16b 5502 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 1 5503 1.1 christos aese v1.16b, v27.16b 5504 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 1 5505 1.1 christos aese v3.16b, v27.16b 5506 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 1 5507 1.1 christos 5508 1.1 christos aese v6.16b, v27.16b 5509 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 1 5510 1.1 christos aese v5.16b, v27.16b 5511 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 1 5512 1.1 christos 5513 1.1 christos aese v2.16b, v27.16b 5514 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 1 5515 1.1 christos 5516 1.1 christos aese v7.16b, v27.16b 5517 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 1 5518 1.1 christos 5519 1.1 christos aese v2.16b, v28.16b 5520 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 2 5521 1.1 christos aese v3.16b, v28.16b 5522 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 2 5523 1.1 christos aese v0.16b, v27.16b 5524 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 1 5525 1.1 christos 5526 1.1 christos aese v7.16b, v28.16b 5527 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 2 5528 1.1 christos aese v6.16b, v28.16b 5529 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 2 5530 1.1 christos aese v5.16b, v28.16b 5531 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 2 5532 1.1 christos 5533 1.1 christos aese v4.16b, v28.16b 5534 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 2 5535 1.1 christos aese v0.16b, v28.16b 5536 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 2 5537 1.1 christos aese v1.16b, v28.16b 5538 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 2 5539 1.1 christos 5540 1.1 christos aese v5.16b, v26.16b 5541 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 3 5542 1.1 christos aese v3.16b, v26.16b 5543 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 3 5544 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 5545 1.1 christos 5546 1.1 christos aese v4.16b, v26.16b 5547 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 3 5548 1.1 christos 5549 1.1 christos aese v1.16b, v26.16b 5550 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 3 5551 1.1 christos aese v6.16b, v26.16b 5552 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 3 5553 1.1 christos aese v7.16b, v26.16b 5554 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 3 5555 1.1 christos 5556 1.1 christos aese v2.16b, v26.16b 5557 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 3 5558 1.1 christos aese v0.16b, v26.16b 5559 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 3 5560 1.1 christos 5561 1.1 christos aese v4.16b, v27.16b 5562 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 4 5563 1.1 christos aese v6.16b, v27.16b 5564 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 4 5565 1.1 christos aese v1.16b, v27.16b 5566 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 4 5567 1.1 christos 5568 1.1 christos aese v2.16b, v27.16b 5569 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 4 5570 1.1 christos aese v0.16b, v27.16b 5571 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 4 5572 1.1 christos 5573 1.1 christos aese v3.16b, v27.16b 5574 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 4 5575 1.1 christos aese v7.16b, v27.16b 5576 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 4 5577 1.1 christos aese v5.16b, v27.16b 5578 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 4 5579 1.1 christos 5580 1.1 christos aese v0.16b, v28.16b 5581 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 5 5582 1.1 christos aese v2.16b, v28.16b 5583 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 5 5584 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 5585 1.1 christos 5586 1.1 christos aese v1.16b, v28.16b 5587 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 5 5588 1.1 christos aese v4.16b, v28.16b 5589 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 5 5590 1.1 christos aese v5.16b, v28.16b 5591 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 5 5592 1.1 christos 5593 1.1 christos aese v3.16b, v28.16b 5594 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 5 5595 1.1 christos aese v6.16b, v28.16b 5596 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 5 5597 1.1 christos aese v7.16b, v28.16b 5598 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 5 5599 1.1 christos 5600 1.1 christos aese v1.16b, v26.16b 5601 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 6 5602 1.1 christos aese v5.16b, v26.16b 5603 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 6 5604 1.1 christos aese v4.16b, v26.16b 5605 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 6 5606 1.1 christos 5607 1.1 christos aese v2.16b, v26.16b 5608 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 6 5609 1.1 christos aese v6.16b, v26.16b 5610 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 6 5611 1.1 christos aese v0.16b, v26.16b 5612 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 6 5613 1.1 christos 5614 1.1 christos aese v7.16b, v26.16b 5615 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 6 5616 1.1 christos aese v3.16b, v26.16b 5617 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 6 5618 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 5619 1.1 christos 5620 1.1 christos aese v2.16b, v27.16b 5621 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 7 5622 1.1 christos aese v0.16b, v27.16b 5623 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 7 5624 1.1 christos 5625 1.1 christos aese v7.16b, v27.16b 5626 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 7 5627 1.1 christos aese v6.16b, v27.16b 5628 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 7 5629 1.1 christos aese v1.16b, v27.16b 5630 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 7 5631 1.1 christos 5632 1.1 christos aese v5.16b, v27.16b 5633 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 7 5634 1.1 christos aese v3.16b, v27.16b 5635 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 7 5636 1.1 christos 5637 1.1 christos aese v4.16b, v27.16b 5638 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 7 5639 1.1 christos 5640 1.1 christos aese v6.16b, v28.16b 5641 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 8 5642 1.1 christos aese v1.16b, v28.16b 5643 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 8 5644 1.1 christos 5645 1.1 christos aese v3.16b, v28.16b 5646 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 8 5647 1.1 christos aese v0.16b, v28.16b 5648 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 8 5649 1.1 christos aese v7.16b, v28.16b 5650 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 8 5651 1.1 christos 5652 1.1 christos aese v5.16b, v28.16b 5653 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 8 5654 1.1 christos aese v4.16b, v28.16b 5655 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 8 5656 1.1 christos aese v2.16b, v28.16b 5657 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 8 5658 1.1 christos 5659 1.1 christos ld1 { v19.16b}, [x3] 5660 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 5661 1.1 christos rev64 v19.16b, v19.16b 5662 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 5663 1.1 christos 5664 1.1 christos aese v6.16b, v26.16b 5665 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 9 5666 1.1 christos aese v7.16b, v26.16b 5667 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 9 5668 1.1 christos aese v3.16b, v26.16b 5669 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 9 5670 1.1 christos 5671 1.1 christos aese v4.16b, v26.16b 5672 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 9 5673 1.1 christos aese v5.16b, v26.16b 5674 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 9 5675 1.1 christos aese v2.16b, v26.16b 5676 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 9 5677 1.1 christos 5678 1.1 christos aese v1.16b, v26.16b 5679 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 9 5680 1.1 christos 5681 1.1 christos aese v7.16b, v27.16b 5682 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 10 5683 1.1 christos aese v4.16b, v27.16b 5684 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 10 5685 1.1 christos aese v0.16b, v26.16b 5686 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 9 5687 1.1 christos 5688 1.1 christos aese v1.16b, v27.16b 5689 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 10 5690 1.1 christos aese v5.16b, v27.16b 5691 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 10 5692 1.1 christos aese v3.16b, v27.16b 5693 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 10 5694 1.1 christos 5695 1.1 christos aese v2.16b, v27.16b 5696 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 10 5697 1.1 christos aese v0.16b, v27.16b 5698 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 10 5699 1.1 christos aese v6.16b, v27.16b 5700 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 10 5701 1.1 christos 5702 1.1 christos aese v4.16b, v28.16b 5703 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 11 5704 1.1 christos ldp q26, q27, [x8, #192] //load rk12, rk13 5705 1.1 christos aese v5.16b, v28.16b 5706 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 11 5707 1.1 christos 5708 1.1 christos aese v2.16b, v28.16b 5709 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 11 5710 1.1 christos aese v6.16b, v28.16b 5711 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 11 5712 1.1 christos aese v1.16b, v28.16b 5713 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 11 5714 1.1 christos 5715 1.1 christos aese v0.16b, v28.16b 5716 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 11 5717 1.1 christos aese v3.16b, v28.16b 5718 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 11 5719 1.1 christos aese v7.16b, v28.16b 5720 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 11 5721 1.1 christos 5722 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 7 5723 1.1 christos ldr q28, [x8, #224] //load rk14 5724 1.1 christos 5725 1.1 christos aese v4.16b, v26.16b 5726 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 12 5727 1.1 christos aese v2.16b, v26.16b 5728 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 12 5729 1.1 christos aese v1.16b, v26.16b 5730 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 12 5731 1.1 christos 5732 1.1 christos aese v0.16b, v26.16b 5733 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 12 5734 1.1 christos aese v5.16b, v26.16b 5735 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 12 5736 1.1 christos aese v3.16b, v26.16b 5737 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 12 5738 1.1 christos 5739 1.1 christos aese v2.16b, v27.16b //AES block 2 - round 13 5740 1.1 christos aese v1.16b, v27.16b //AES block 1 - round 13 5741 1.1 christos aese v4.16b, v27.16b //AES block 4 - round 13 5742 1.1 christos 5743 1.1 christos aese v6.16b, v26.16b 5744 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 12 5745 1.1 christos aese v7.16b, v26.16b 5746 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 12 5747 1.1 christos 5748 1.1 christos aese v0.16b, v27.16b //AES block 0 - round 13 5749 1.1 christos aese v5.16b, v27.16b //AES block 5 - round 13 5750 1.1 christos 5751 1.1 christos aese v6.16b, v27.16b //AES block 6 - round 13 5752 1.1 christos aese v7.16b, v27.16b //AES block 7 - round 13 5753 1.1 christos aese v3.16b, v27.16b //AES block 3 - round 13 5754 1.1 christos 5755 1.1 christos add x4, x0, x1, lsr #3 //end_input_ptr 5756 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 5757 1.1 christos b.ge .L256_enc_tail //handle tail 5758 1.1 christos 5759 1.1 christos ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext 5760 1.1 christos 5761 1.1 christos ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext 5762 1.1 christos 5763 1.1 christos .inst 0xce007108 //eor3 v8.16b, v8.16b, v0.16b, v28.16b //AES block 0 - result 5764 1.1 christos rev32 v0.16b, v30.16b //CTR block 8 5765 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8 5766 1.1 christos 5767 1.1 christos .inst 0xce017129 //eor3 v9.16b, v9.16b, v1.16b, v28.16b //AES block 1 - result 5768 1.1 christos .inst 0xce03716b //eor3 v11.16b, v11.16b, v3.16b, v28.16b //AES block 3 - result 5769 1.1 christos 5770 1.1 christos rev32 v1.16b, v30.16b //CTR block 9 5771 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 9 5772 1.1 christos ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 5773 1.1 christos 5774 1.1 christos ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 5775 1.1 christos .inst 0xce02714a //eor3 v10.16b, v10.16b, v2.16b, v28.16b //AES block 2 - result 5776 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 5777 1.1 christos 5778 1.1 christos rev32 v2.16b, v30.16b //CTR block 10 5779 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 10 5780 1.1 christos stp q8, q9, [x2], #32 //AES block 0, 1 - store result 5781 1.1 christos 5782 1.1 christos stp q10, q11, [x2], #32 //AES block 2, 3 - store result 5783 1.1 christos 5784 1.1 christos rev32 v3.16b, v30.16b //CTR block 11 5785 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 11 5786 1.1 christos 5787 1.1 christos .inst 0xce04718c //eor3 v12.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result 5788 1.1 christos 5789 1.1 christos .inst 0xce0771ef //eor3 v15.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result 5790 1.1 christos .inst 0xce0671ce //eor3 v14.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result 5791 1.1 christos .inst 0xce0571ad //eor3 v13.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result 5792 1.1 christos 5793 1.1 christos stp q12, q13, [x2], #32 //AES block 4, 5 - store result 5794 1.1 christos rev32 v4.16b, v30.16b //CTR block 12 5795 1.1 christos 5796 1.1 christos stp q14, q15, [x2], #32 //AES block 6, 7 - store result 5797 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 12 5798 1.1 christos b.ge .L256_enc_prepretail //do prepretail 5799 1.1 christos 5800 1.1 christos .L256_enc_main_loop: //main loop start 5801 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 5802 1.1 christos 5803 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 5804 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 5805 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 5806 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 5807 1.1 christos 5808 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 5809 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 5810 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 5811 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 5812 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 5813 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 5814 1.1 christos 5815 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 5816 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 5817 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 5818 1.1 christos 5819 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 5820 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 5821 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 5822 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 5823 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 5824 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 5825 1.1 christos 5826 1.1 christos aese v3.16b, v26.16b 5827 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 5828 1.1 christos aese v5.16b, v26.16b 5829 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 5830 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 5831 1.1 christos 5832 1.1 christos aese v0.16b, v26.16b 5833 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 5834 1.1 christos aese v1.16b, v26.16b 5835 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 5836 1.1 christos aese v6.16b, v26.16b 5837 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 5838 1.1 christos 5839 1.1 christos aese v7.16b, v26.16b 5840 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 5841 1.1 christos aese v2.16b, v26.16b 5842 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 5843 1.1 christos aese v4.16b, v26.16b 5844 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 5845 1.1 christos 5846 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 5847 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 5848 1.1 christos aese v6.16b, v27.16b 5849 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 5850 1.1 christos 5851 1.1 christos aese v2.16b, v27.16b 5852 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 5853 1.1 christos aese v1.16b, v27.16b 5854 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 5855 1.1 christos aese v0.16b, v27.16b 5856 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 5857 1.1 christos 5858 1.1 christos aese v4.16b, v27.16b 5859 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 5860 1.1 christos aese v3.16b, v27.16b 5861 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 5862 1.1 christos aese v5.16b, v27.16b 5863 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 5864 1.1 christos 5865 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 5866 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 5867 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 5868 1.1 christos 5869 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 5870 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 5871 1.1 christos aese v7.16b, v27.16b 5872 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 5873 1.1 christos 5874 1.1 christos aese v1.16b, v28.16b 5875 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 5876 1.1 christos aese v5.16b, v28.16b 5877 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 5878 1.1 christos aese v6.16b, v28.16b 5879 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 5880 1.1 christos 5881 1.1 christos aese v2.16b, v28.16b 5882 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 5883 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 5884 1.1 christos aese v4.16b, v28.16b 5885 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 5886 1.1 christos 5887 1.1 christos aese v5.16b, v26.16b 5888 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 5889 1.1 christos aese v6.16b, v26.16b 5890 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 5891 1.1 christos aese v0.16b, v28.16b 5892 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 5893 1.1 christos 5894 1.1 christos aese v1.16b, v26.16b 5895 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 5896 1.1 christos aese v7.16b, v28.16b 5897 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 5898 1.1 christos aese v3.16b, v28.16b 5899 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 5900 1.1 christos 5901 1.1 christos aese v4.16b, v26.16b 5902 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 5903 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 5904 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 5905 1.1 christos 5906 1.1 christos aese v3.16b, v26.16b 5907 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 5908 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 5909 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 5910 1.1 christos 5911 1.1 christos aese v2.16b, v26.16b 5912 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 5913 1.1 christos aese v7.16b, v26.16b 5914 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 5915 1.1 christos aese v0.16b, v26.16b 5916 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 5917 1.1 christos 5918 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 5919 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 5920 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 5921 1.1 christos 5922 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 5923 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 5924 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 5925 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 5926 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 5927 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 5928 1.1 christos 5929 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 5930 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 5931 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 5932 1.1 christos 5933 1.1 christos aese v4.16b, v27.16b 5934 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 5935 1.1 christos aese v1.16b, v27.16b 5936 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 5937 1.1 christos aese v5.16b, v27.16b 5938 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 5939 1.1 christos 5940 1.1 christos aese v7.16b, v27.16b 5941 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 5942 1.1 christos aese v3.16b, v27.16b 5943 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 5944 1.1 christos aese v2.16b, v27.16b 5945 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 5946 1.1 christos 5947 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 5948 1.1 christos aese v6.16b, v27.16b 5949 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 5950 1.1 christos aese v0.16b, v27.16b 5951 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 5952 1.1 christos 5953 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 5954 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 5955 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 5956 1.1 christos 5957 1.1 christos aese v5.16b, v28.16b 5958 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 5959 1.1 christos aese v7.16b, v28.16b 5960 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 5961 1.1 christos aese v4.16b, v28.16b 5962 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 5963 1.1 christos 5964 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 5965 1.1 christos aese v2.16b, v28.16b 5966 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 5967 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 5968 1.1 christos 5969 1.1 christos aese v3.16b, v28.16b 5970 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 5971 1.1 christos aese v6.16b, v28.16b 5972 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 5973 1.1 christos aese v1.16b, v28.16b 5974 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 5975 1.1 christos 5976 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 5977 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 5978 1.1 christos aese v0.16b, v28.16b 5979 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 5980 1.1 christos 5981 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 5982 1.1 christos aese v4.16b, v26.16b 5983 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 5984 1.1 christos aese v2.16b, v26.16b 5985 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 5986 1.1 christos 5987 1.1 christos aese v6.16b, v26.16b 5988 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 5989 1.1 christos aese v1.16b, v26.16b 5990 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 5991 1.1 christos aese v7.16b, v26.16b 5992 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 5993 1.1 christos 5994 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 5995 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 5996 1.1 christos aese v5.16b, v26.16b 5997 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 5998 1.1 christos 5999 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 6000 1.1 christos aese v3.16b, v26.16b 6001 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 6002 1.1 christos aese v0.16b, v26.16b 6003 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 6004 1.1 christos 6005 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 6006 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 6007 1.1 christos aese v5.16b, v27.16b 6008 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 6009 1.1 christos 6010 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 6011 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 6012 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 6013 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 6014 1.1 christos aese v2.16b, v27.16b 6015 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 6016 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 6017 1.1 christos 6018 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 6019 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 6020 1.1 christos aese v6.16b, v27.16b 6021 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 6022 1.1 christos aese v3.16b, v27.16b 6023 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 6024 1.1 christos 6025 1.1 christos aese v0.16b, v27.16b 6026 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 6027 1.1 christos aese v7.16b, v27.16b 6028 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 6029 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 6030 1.1 christos 6031 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 6032 1.1 christos aese v4.16b, v27.16b 6033 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 6034 1.1 christos aese v1.16b, v27.16b 6035 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 6036 1.1 christos 6037 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 6038 1.1 christos aese v7.16b, v28.16b 6039 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 6040 1.1 christos aese v0.16b, v28.16b 6041 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 6042 1.1 christos 6043 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 6044 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6045 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6046 1.1 christos 6047 1.1 christos aese v3.16b, v28.16b 6048 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 6049 1.1 christos aese v0.16b, v26.16b 6050 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 6051 1.1 christos aese v1.16b, v28.16b 6052 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 6053 1.1 christos 6054 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 6055 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 6056 1.1 christos aese v2.16b, v28.16b 6057 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 6058 1.1 christos 6059 1.1 christos aese v5.16b, v28.16b 6060 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 6061 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 6062 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 6063 1.1 christos 6064 1.1 christos aese v6.16b, v28.16b 6065 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 6066 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6067 1.1 christos aese v4.16b, v28.16b 6068 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 6069 1.1 christos 6070 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6071 1.1 christos aese v7.16b, v26.16b 6072 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 6073 1.1 christos aese v5.16b, v26.16b 6074 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 6075 1.1 christos 6076 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6077 1.1 christos aese v6.16b, v26.16b 6078 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 6079 1.1 christos aese v4.16b, v26.16b 6080 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 6081 1.1 christos 6082 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 6083 1.1 christos aese v2.16b, v26.16b 6084 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 6085 1.1 christos aese v3.16b, v26.16b 6086 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 6087 1.1 christos 6088 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 6089 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 6090 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 6091 1.1 christos 6092 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 6093 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 6094 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 6095 1.1 christos 6096 1.1 christos aese v1.16b, v26.16b 6097 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 6098 1.1 christos 6099 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6100 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 6101 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 6102 1.1 christos 6103 1.1 christos aese v4.16b, v27.16b 6104 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 6105 1.1 christos aese v3.16b, v27.16b 6106 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 6107 1.1 christos aese v5.16b, v27.16b 6108 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 6109 1.1 christos 6110 1.1 christos aese v0.16b, v27.16b 6111 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 6112 1.1 christos aese v2.16b, v27.16b 6113 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 6114 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 6115 1.1 christos 6116 1.1 christos aese v1.16b, v27.16b 6117 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 6118 1.1 christos aese v7.16b, v27.16b 6119 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 6120 1.1 christos aese v6.16b, v27.16b 6121 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 6122 1.1 christos 6123 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 6124 1.1 christos 6125 1.1 christos ldp q26, q27, [x8, #192] //load rk12, rk13 6126 1.1 christos rev32 v20.16b, v30.16b //CTR block 8k+16 6127 1.1 christos 6128 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 6129 1.1 christos ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext 6130 1.1 christos aese v2.16b, v28.16b 6131 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 6132 1.1 christos 6133 1.1 christos aese v6.16b, v28.16b 6134 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 6135 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+16 6136 1.1 christos aese v3.16b, v28.16b 6137 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 6138 1.1 christos 6139 1.1 christos aese v0.16b, v28.16b 6140 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 6141 1.1 christos aese v7.16b, v28.16b 6142 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 6143 1.1 christos 6144 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 6145 1.1 christos aese v1.16b, v28.16b 6146 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 6147 1.1 christos 6148 1.1 christos aese v7.16b, v26.16b 6149 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 6150 1.1 christos aese v5.16b, v28.16b 6151 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 6152 1.1 christos 6153 1.1 christos aese v3.16b, v26.16b 6154 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 6155 1.1 christos aese v6.16b, v26.16b 6156 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 6157 1.1 christos rev32 v22.16b, v30.16b //CTR block 8k+17 6158 1.1 christos 6159 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+17 6160 1.1 christos aese v4.16b, v28.16b 6161 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 6162 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 6163 1.1 christos 6164 1.1 christos aese v5.16b, v26.16b 6165 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 6166 1.1 christos ldr q28, [x8, #224] //load rk14 6167 1.1 christos aese v7.16b, v27.16b //AES block 8k+15 - round 13 6168 1.1 christos 6169 1.1 christos ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext 6170 1.1 christos aese v2.16b, v26.16b 6171 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 6172 1.1 christos aese v4.16b, v26.16b 6173 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 6174 1.1 christos 6175 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 6176 1.1 christos aese v1.16b, v26.16b 6177 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 6178 1.1 christos ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 6179 1.1 christos 6180 1.1 christos ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 6181 1.1 christos aese v2.16b, v27.16b //AES block 8k+10 - round 13 6182 1.1 christos aese v4.16b, v27.16b //AES block 8k+12 - round 13 6183 1.1 christos 6184 1.1 christos rev32 v23.16b, v30.16b //CTR block 8k+18 6185 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+18 6186 1.1 christos aese v5.16b, v27.16b //AES block 8k+13 - round 13 6187 1.1 christos 6188 1.1 christos aese v0.16b, v26.16b 6189 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 6190 1.1 christos aese v3.16b, v27.16b //AES block 8k+11 - round 13 6191 1.1 christos cmp x0, x5 //.LOOP CONTROL 6192 1.1 christos 6193 1.1 christos .inst 0xce02714a //eor3 v10.16b, v10.16b, v2.16b, v28.16b //AES block 8k+10 - result 6194 1.1 christos rev32 v25.16b, v30.16b //CTR block 8k+19 6195 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+19 6196 1.1 christos 6197 1.1 christos aese v0.16b, v27.16b //AES block 8k+8 - round 13 6198 1.1 christos aese v6.16b, v27.16b //AES block 8k+14 - round 13 6199 1.1 christos .inst 0xce0571ad //eor3 v13.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result 6200 1.1 christos 6201 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 6202 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 6203 1.1 christos aese v1.16b, v27.16b //AES block 8k+9 - round 13 6204 1.1 christos 6205 1.1 christos .inst 0xce04718c //eor3 v12.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result 6206 1.1 christos rev32 v4.16b, v30.16b //CTR block 8k+20 6207 1.1 christos .inst 0xce03716b //eor3 v11.16b, v11.16b, v3.16b, v28.16b //AES block 8k+11 - result 6208 1.1 christos 6209 1.1 christos mov v3.16b, v25.16b //CTR block 8k+19 6210 1.1 christos .inst 0xce017129 //eor3 v9.16b, v9.16b, v1.16b, v28.16b //AES block 8k+9 - result 6211 1.1 christos .inst 0xce007108 //eor3 v8.16b, v8.16b, v0.16b, v28.16b //AES block 8k+8 - result 6212 1.1 christos 6213 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+20 6214 1.1 christos stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result 6215 1.1 christos mov v2.16b, v23.16b //CTR block 8k+18 6216 1.1 christos 6217 1.1 christos .inst 0xce0771ef //eor3 v15.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result 6218 1.1 christos .inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 6219 1.1 christos stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result 6220 1.1 christos 6221 1.1 christos .inst 0xce0671ce //eor3 v14.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result 6222 1.1 christos mov v1.16b, v22.16b //CTR block 8k+17 6223 1.1 christos stp q12, q13, [x2], #32 //AES block 4, 5 - store result 6224 1.1 christos 6225 1.1 christos stp q14, q15, [x2], #32 //AES block 6, 7 - store result 6226 1.1 christos mov v0.16b, v20.16b //CTR block 8k+16 6227 1.1 christos b.lt .L256_enc_main_loop 6228 1.1 christos 6229 1.1 christos .L256_enc_prepretail: //PREPRETAIL 6230 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 6231 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 6232 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 6233 1.1 christos 6234 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 6235 1.1 christos 6236 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 6237 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 6238 1.1 christos 6239 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 6240 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 6241 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 6242 1.1 christos 6243 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 6244 1.1 christos 6245 1.1 christos aese v6.16b, v26.16b 6246 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 6247 1.1 christos aese v4.16b, v26.16b 6248 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 6249 1.1 christos aese v1.16b, v26.16b 6250 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 6251 1.1 christos 6252 1.1 christos aese v5.16b, v26.16b 6253 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 6254 1.1 christos aese v0.16b, v26.16b 6255 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 6256 1.1 christos 6257 1.1 christos aese v2.16b, v26.16b 6258 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 6259 1.1 christos aese v7.16b, v26.16b 6260 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 6261 1.1 christos aese v3.16b, v26.16b 6262 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 6263 1.1 christos 6264 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 6265 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 6266 1.1 christos aese v1.16b, v27.16b 6267 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 6268 1.1 christos 6269 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 6270 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 6271 1.1 christos aese v3.16b, v27.16b 6272 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 6273 1.1 christos 6274 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 6275 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 6276 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 6277 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 6278 1.1 christos aese v2.16b, v27.16b 6279 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 6280 1.1 christos 6281 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 6282 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 6283 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 6284 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 6285 1.1 christos aese v0.16b, v27.16b 6286 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 6287 1.1 christos aese v5.16b, v27.16b 6288 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 6289 1.1 christos 6290 1.1 christos aese v4.16b, v27.16b 6291 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 6292 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 6293 1.1 christos 6294 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 6295 1.1 christos aese v6.16b, v27.16b 6296 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 6297 1.1 christos 6298 1.1 christos aese v1.16b, v28.16b 6299 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 6300 1.1 christos aese v2.16b, v28.16b 6301 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 6302 1.1 christos aese v7.16b, v27.16b 6303 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 6304 1.1 christos 6305 1.1 christos aese v4.16b, v28.16b 6306 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 6307 1.1 christos aese v0.16b, v28.16b 6308 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 6309 1.1 christos aese v6.16b, v28.16b 6310 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 6311 1.1 christos 6312 1.1 christos aese v5.16b, v28.16b 6313 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 6314 1.1 christos aese v7.16b, v28.16b 6315 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 6316 1.1 christos aese v3.16b, v28.16b 6317 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 6318 1.1 christos 6319 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 6320 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 6321 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 6322 1.1 christos 6323 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 6324 1.1 christos aese v4.16b, v26.16b 6325 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 6326 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 6327 1.1 christos 6328 1.1 christos aese v7.16b, v26.16b 6329 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 6330 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 6331 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 6332 1.1 christos 6333 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 6334 1.1 christos aese v6.16b, v26.16b 6335 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 6336 1.1 christos 6337 1.1 christos aese v2.16b, v26.16b 6338 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 6339 1.1 christos aese v3.16b, v26.16b 6340 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 6341 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 6342 1.1 christos 6343 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 6344 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 6345 1.1 christos aese v1.16b, v26.16b 6346 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 6347 1.1 christos 6348 1.1 christos aese v0.16b, v26.16b 6349 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 6350 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 6351 1.1 christos aese v5.16b, v26.16b 6352 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 6353 1.1 christos 6354 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 6355 1.1 christos aese v1.16b, v27.16b 6356 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 6357 1.1 christos aese v6.16b, v27.16b 6358 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 6359 1.1 christos 6360 1.1 christos aese v0.16b, v27.16b 6361 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 6362 1.1 christos aese v2.16b, v27.16b 6363 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 6364 1.1 christos aese v4.16b, v27.16b 6365 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 6366 1.1 christos 6367 1.1 christos aese v6.16b, v28.16b 6368 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 6369 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 6370 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 6371 1.1 christos 6372 1.1 christos aese v7.16b, v27.16b 6373 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 6374 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 6375 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 6376 1.1 christos 6377 1.1 christos aese v5.16b, v27.16b 6378 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 6379 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 6380 1.1 christos aese v3.16b, v27.16b 6381 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 6382 1.1 christos 6383 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 6384 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 6385 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 6386 1.1 christos 6387 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 6388 1.1 christos aese v1.16b, v28.16b 6389 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 6390 1.1 christos aese v0.16b, v28.16b 6391 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 6392 1.1 christos 6393 1.1 christos aese v7.16b, v28.16b 6394 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 6395 1.1 christos aese v4.16b, v28.16b 6396 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 6397 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 6398 1.1 christos 6399 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 6400 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 6401 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 6402 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 6403 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 6404 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 6405 1.1 christos 6406 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 6407 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 6408 1.1 christos 6409 1.1 christos aese v5.16b, v28.16b 6410 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 6411 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 6412 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 6413 1.1 christos 6414 1.1 christos aese v3.16b, v28.16b 6415 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 6416 1.1 christos aese v2.16b, v28.16b 6417 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 6418 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 6419 1.1 christos 6420 1.1 christos aese v7.16b, v26.16b 6421 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 6422 1.1 christos aese v4.16b, v26.16b 6423 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 6424 1.1 christos aese v6.16b, v26.16b 6425 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 6426 1.1 christos 6427 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 6428 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 6429 1.1 christos aese v5.16b, v26.16b 6430 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 6431 1.1 christos aese v3.16b, v26.16b 6432 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 6433 1.1 christos 6434 1.1 christos aese v0.16b, v26.16b 6435 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 6436 1.1 christos aese v1.16b, v26.16b 6437 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 6438 1.1 christos aese v2.16b, v26.16b 6439 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 6440 1.1 christos 6441 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 6442 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 6443 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 6444 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 6445 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 6446 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 6447 1.1 christos 6448 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 6449 1.1 christos aese v1.16b, v27.16b 6450 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 6451 1.1 christos aese v4.16b, v27.16b 6452 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 6453 1.1 christos 6454 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 6455 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 6456 1.1 christos 6457 1.1 christos aese v5.16b, v27.16b 6458 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 6459 1.1 christos aese v6.16b, v27.16b 6460 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 6461 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 6462 1.1 christos 6463 1.1 christos aese v7.16b, v27.16b 6464 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 6465 1.1 christos aese v3.16b, v27.16b 6466 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 6467 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6468 1.1 christos 6469 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 6470 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 6471 1.1 christos aese v2.16b, v27.16b 6472 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 6473 1.1 christos 6474 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6475 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6476 1.1 christos aese v0.16b, v27.16b 6477 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 6478 1.1 christos 6479 1.1 christos aese v7.16b, v28.16b 6480 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 6481 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 6482 1.1 christos aese v2.16b, v28.16b 6483 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 6484 1.1 christos 6485 1.1 christos aese v6.16b, v28.16b 6486 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 6487 1.1 christos aese v4.16b, v28.16b 6488 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 6489 1.1 christos aese v3.16b, v28.16b 6490 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 6491 1.1 christos 6492 1.1 christos aese v5.16b, v28.16b 6493 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 6494 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6495 1.1 christos aese v0.16b, v28.16b 6496 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 6497 1.1 christos 6498 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 6499 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 6500 1.1 christos aese v1.16b, v28.16b 6501 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 6502 1.1 christos 6503 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 6504 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 6505 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 6506 1.1 christos 6507 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 6508 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6509 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 6510 1.1 christos 6511 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 6512 1.1 christos aese v1.16b, v26.16b 6513 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 6514 1.1 christos aese v0.16b, v26.16b 6515 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 6516 1.1 christos 6517 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 6518 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6519 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 6520 1.1 christos 6521 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 6522 1.1 christos 6523 1.1 christos aese v3.16b, v26.16b 6524 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 6525 1.1 christos aese v7.16b, v26.16b 6526 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 6527 1.1 christos aese v5.16b, v26.16b 6528 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 6529 1.1 christos 6530 1.1 christos aese v2.16b, v26.16b 6531 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 6532 1.1 christos aese v6.16b, v26.16b 6533 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 6534 1.1 christos 6535 1.1 christos aese v5.16b, v27.16b 6536 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 6537 1.1 christos aese v1.16b, v27.16b 6538 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 6539 1.1 christos aese v4.16b, v26.16b 6540 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 6541 1.1 christos 6542 1.1 christos aese v7.16b, v27.16b 6543 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 6544 1.1 christos aese v6.16b, v27.16b 6545 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 6546 1.1 christos aese v3.16b, v27.16b 6547 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 6548 1.1 christos 6549 1.1 christos aese v4.16b, v27.16b 6550 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 6551 1.1 christos aese v0.16b, v27.16b 6552 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 6553 1.1 christos aese v2.16b, v27.16b 6554 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 6555 1.1 christos 6556 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 6557 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 6558 1.1 christos aese v7.16b, v28.16b 6559 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 6560 1.1 christos 6561 1.1 christos ldp q26, q27, [x8, #192] //load rk12, rk13 6562 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 6563 1.1 christos aese v2.16b, v28.16b 6564 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 6565 1.1 christos 6566 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 6567 1.1 christos aese v1.16b, v28.16b 6568 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 6569 1.1 christos aese v6.16b, v28.16b 6570 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 6571 1.1 christos 6572 1.1 christos aese v0.16b, v28.16b 6573 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 6574 1.1 christos aese v4.16b, v28.16b 6575 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 6576 1.1 christos aese v5.16b, v28.16b 6577 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 6578 1.1 christos 6579 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 6580 1.1 christos aese v3.16b, v28.16b 6581 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 6582 1.1 christos ldr q28, [x8, #224] //load rk14 6583 1.1 christos 6584 1.1 christos aese v1.16b, v26.16b 6585 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 6586 1.1 christos aese v2.16b, v26.16b 6587 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 6588 1.1 christos aese v0.16b, v26.16b 6589 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 6590 1.1 christos 6591 1.1 christos aese v6.16b, v26.16b 6592 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 6593 1.1 christos aese v5.16b, v26.16b 6594 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 6595 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 6596 1.1 christos 6597 1.1 christos aese v4.16b, v26.16b 6598 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 6599 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 6600 1.1 christos 6601 1.1 christos aese v3.16b, v26.16b 6602 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 6603 1.1 christos aese v7.16b, v26.16b 6604 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 6605 1.1 christos aese v0.16b, v27.16b //AES block 8k+8 - round 13 6606 1.1 christos 6607 1.1 christos .inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 6608 1.1 christos aese v5.16b, v27.16b //AES block 8k+13 - round 13 6609 1.1 christos aese v1.16b, v27.16b //AES block 8k+9 - round 13 6610 1.1 christos 6611 1.1 christos aese v3.16b, v27.16b //AES block 8k+11 - round 13 6612 1.1 christos aese v4.16b, v27.16b //AES block 8k+12 - round 13 6613 1.1 christos aese v7.16b, v27.16b //AES block 8k+15 - round 13 6614 1.1 christos 6615 1.1 christos aese v2.16b, v27.16b //AES block 8k+10 - round 13 6616 1.1 christos aese v6.16b, v27.16b //AES block 8k+14 - round 13 6617 1.1 christos .L256_enc_tail: //TAIL 6618 1.1 christos 6619 1.1 christos ldp q24, q25, [x3, #192] //load h8l | h8h 6620 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 6621 1.1 christos sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 6622 1.1 christos 6623 1.1 christos ldr q8, [x0], #16 //AES block 8k+8 - load plaintext 6624 1.1 christos 6625 1.1 christos ldp q20, q21, [x3, #128] //load h5l | h5h 6626 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 6627 1.1 christos 6628 1.1 christos ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 6629 1.1 christos ldp q22, q23, [x3, #160] //load h6l | h6h 6630 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 6631 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 6632 1.1 christos mov v29.16b, v28.16b 6633 1.1 christos 6634 1.1 christos cmp x5, #112 6635 1.1 christos .inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result 6636 1.1 christos b.gt .L256_enc_blocks_more_than_7 6637 1.1 christos 6638 1.1 christos movi v19.8b, #0 6639 1.1 christos mov v7.16b, v6.16b 6640 1.1 christos movi v17.8b, #0 6641 1.1 christos 6642 1.1 christos mov v6.16b, v5.16b 6643 1.1 christos mov v5.16b, v4.16b 6644 1.1 christos mov v4.16b, v3.16b 6645 1.1 christos 6646 1.1 christos mov v3.16b, v2.16b 6647 1.1 christos sub v30.4s, v30.4s, v31.4s 6648 1.1 christos mov v2.16b, v1.16b 6649 1.1 christos 6650 1.1 christos movi v18.8b, #0 6651 1.1 christos cmp x5, #96 6652 1.1 christos b.gt .L256_enc_blocks_more_than_6 6653 1.1 christos 6654 1.1 christos mov v7.16b, v6.16b 6655 1.1 christos mov v6.16b, v5.16b 6656 1.1 christos cmp x5, #80 6657 1.1 christos 6658 1.1 christos mov v5.16b, v4.16b 6659 1.1 christos mov v4.16b, v3.16b 6660 1.1 christos mov v3.16b, v1.16b 6661 1.1 christos 6662 1.1 christos sub v30.4s, v30.4s, v31.4s 6663 1.1 christos b.gt .L256_enc_blocks_more_than_5 6664 1.1 christos 6665 1.1 christos mov v7.16b, v6.16b 6666 1.1 christos sub v30.4s, v30.4s, v31.4s 6667 1.1 christos 6668 1.1 christos mov v6.16b, v5.16b 6669 1.1 christos mov v5.16b, v4.16b 6670 1.1 christos 6671 1.1 christos cmp x5, #64 6672 1.1 christos mov v4.16b, v1.16b 6673 1.1 christos b.gt .L256_enc_blocks_more_than_4 6674 1.1 christos 6675 1.1 christos cmp x5, #48 6676 1.1 christos mov v7.16b, v6.16b 6677 1.1 christos mov v6.16b, v5.16b 6678 1.1 christos 6679 1.1 christos mov v5.16b, v1.16b 6680 1.1 christos sub v30.4s, v30.4s, v31.4s 6681 1.1 christos b.gt .L256_enc_blocks_more_than_3 6682 1.1 christos 6683 1.1 christos cmp x5, #32 6684 1.1 christos mov v7.16b, v6.16b 6685 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 6686 1.1 christos 6687 1.1 christos mov v6.16b, v1.16b 6688 1.1 christos sub v30.4s, v30.4s, v31.4s 6689 1.1 christos b.gt .L256_enc_blocks_more_than_2 6690 1.1 christos 6691 1.1 christos mov v7.16b, v1.16b 6692 1.1 christos 6693 1.1 christos sub v30.4s, v30.4s, v31.4s 6694 1.1 christos cmp x5, #16 6695 1.1 christos b.gt .L256_enc_blocks_more_than_1 6696 1.1 christos 6697 1.1 christos sub v30.4s, v30.4s, v31.4s 6698 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 6699 1.1 christos b .L256_enc_blocks_less_than_1 6700 1.1 christos .L256_enc_blocks_more_than_7: //blocks left > 7 6701 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-7 block - store result 6702 1.1 christos 6703 1.1 christos rev64 v8.16b, v9.16b //GHASH final-7 block 6704 1.1 christos 6705 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 6706 1.1 christos 6707 1.1 christos ldr q9, [x0], #16 //AES final-6 block - load plaintext 6708 1.1 christos 6709 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 6710 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 6711 1.1 christos ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 6712 1.1 christos 6713 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 6714 1.1 christos 6715 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 6716 1.1 christos .inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 6717 1.1 christos 6718 1.1 christos pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 6719 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 6720 1.1 christos .L256_enc_blocks_more_than_6: //blocks left > 6 6721 1.1 christos 6722 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-6 block - store result 6723 1.1 christos 6724 1.1 christos rev64 v8.16b, v9.16b //GHASH final-6 block 6725 1.1 christos 6726 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 6727 1.1 christos 6728 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 6729 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 6730 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 6731 1.1 christos 6732 1.1 christos ldr q9, [x0], #16 //AES final-5 block - load plaintext 6733 1.1 christos 6734 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 6735 1.1 christos 6736 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 6737 1.1 christos 6738 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 6739 1.1 christos .inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 6740 1.1 christos 6741 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 6742 1.1 christos 6743 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 6744 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 6745 1.1 christos .L256_enc_blocks_more_than_5: //blocks left > 5 6746 1.1 christos 6747 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-5 block - store result 6748 1.1 christos 6749 1.1 christos rev64 v8.16b, v9.16b //GHASH final-5 block 6750 1.1 christos 6751 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 6752 1.1 christos 6753 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 6754 1.1 christos 6755 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 6756 1.1 christos 6757 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 6758 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 6759 1.1 christos 6760 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 6761 1.1 christos 6762 1.1 christos ldr q9, [x0], #16 //AES final-4 block - load plaintext 6763 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 6764 1.1 christos 6765 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 6766 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 6767 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 6768 1.1 christos 6769 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 6770 1.1 christos .inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 6771 1.1 christos .L256_enc_blocks_more_than_4: //blocks left > 4 6772 1.1 christos 6773 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-4 block - store result 6774 1.1 christos 6775 1.1 christos rev64 v8.16b, v9.16b //GHASH final-4 block 6776 1.1 christos 6777 1.1 christos ldr q9, [x0], #16 //AES final-3 block - load plaintext 6778 1.1 christos 6779 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 6780 1.1 christos 6781 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 6782 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 6783 1.1 christos 6784 1.1 christos .inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 6785 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 6786 1.1 christos 6787 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 6788 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 6789 1.1 christos 6790 1.1 christos pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 6791 1.1 christos 6792 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 6793 1.1 christos 6794 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 6795 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 6796 1.1 christos .L256_enc_blocks_more_than_3: //blocks left > 3 6797 1.1 christos 6798 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-3 block - store result 6799 1.1 christos 6800 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 6801 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 6802 1.1 christos rev64 v8.16b, v9.16b //GHASH final-3 block 6803 1.1 christos 6804 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 6805 1.1 christos 6806 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 6807 1.1 christos pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 6808 1.1 christos 6809 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 6810 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 6811 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 6812 1.1 christos 6813 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 6814 1.1 christos ldr q9, [x0], #16 //AES final-2 block - load plaintext 6815 1.1 christos 6816 1.1 christos pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 6817 1.1 christos pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 6818 1.1 christos 6819 1.1 christos .inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 6820 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 6821 1.1 christos 6822 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 6823 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 6824 1.1 christos .L256_enc_blocks_more_than_2: //blocks left > 2 6825 1.1 christos 6826 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 6827 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 6828 1.1 christos 6829 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-2 block - store result 6830 1.1 christos 6831 1.1 christos rev64 v8.16b, v9.16b //GHASH final-2 block 6832 1.1 christos ldr q9, [x0], #16 //AES final-1 block - load plaintext 6833 1.1 christos 6834 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 6835 1.1 christos 6836 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 6837 1.1 christos 6838 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 6839 1.1 christos 6840 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 6841 1.1 christos .inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 6842 1.1 christos 6843 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 6844 1.1 christos 6845 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 6846 1.1 christos 6847 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 6848 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 6849 1.1 christos 6850 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 6851 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 6852 1.1 christos .L256_enc_blocks_more_than_1: //blocks left > 1 6853 1.1 christos 6854 1.1 christos st1 { v9.16b}, [x2], #16 //AES final-1 block - store result 6855 1.1 christos 6856 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 6857 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 6858 1.1 christos rev64 v8.16b, v9.16b //GHASH final-1 block 6859 1.1 christos ldr q9, [x0], #16 //AES final block - load plaintext 6860 1.1 christos 6861 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 6862 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 6863 1.1 christos 6864 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 6865 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 6866 1.1 christos 6867 1.1 christos .inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result 6868 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 6869 1.1 christos 6870 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 6871 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 6872 1.1 christos 6873 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 6874 1.1 christos 6875 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 6876 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 6877 1.1 christos 6878 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 6879 1.1 christos 6880 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 6881 1.1 christos .L256_enc_blocks_less_than_1: //blocks left <= 1 6882 1.1 christos 6883 1.1 christos and x1, x1, #127 //bit_length %= 128 6884 1.1 christos 6885 1.1 christos sub x1, x1, #128 //bit_length -= 128 6886 1.1 christos 6887 1.1 christos neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 6888 1.1 christos 6889 1.1 christos mvn x6, xzr //temp0_x = 0xffffffffffffffff 6890 1.1 christos and x1, x1, #127 //bit_length %= 128 6891 1.1 christos 6892 1.1 christos lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 6893 1.1 christos cmp x1, #64 6894 1.1 christos mvn x7, xzr //temp1_x = 0xffffffffffffffff 6895 1.1 christos 6896 1.1 christos csel x14, x6, xzr, lt 6897 1.1 christos csel x13, x7, x6, lt 6898 1.1 christos 6899 1.1 christos mov v0.d[0], x13 //ctr0b is mask for last block 6900 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 6901 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 6902 1.1 christos 6903 1.1 christos ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 6904 1.1 christos mov v0.d[1], x14 6905 1.1 christos 6906 1.1 christos and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 6907 1.1 christos 6908 1.1 christos rev64 v8.16b, v9.16b //GHASH final block 6909 1.1 christos 6910 1.1 christos rev32 v30.16b, v30.16b 6911 1.1 christos bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 6912 1.1 christos str q30, [x16] //store the updated counter 6913 1.1 christos 6914 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 6915 1.1 christos st1 { v9.16b}, [x2] //store all 16B 6916 1.1 christos 6917 1.1 christos ins v16.d[0], v8.d[1] //GHASH final block - mid 6918 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 6919 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 6920 1.1 christos 6921 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final block - high 6922 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final block - low 6923 1.1 christos 6924 1.1 christos eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 6925 1.1 christos 6926 1.1 christos pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 6927 1.1 christos 6928 1.1 christos eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 6929 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 6930 1.1 christos 6931 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 6932 1.1 christos 6933 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 6934 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 6935 1.1 christos 6936 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 6937 1.1 christos 6938 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 6939 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 6940 1.1 christos 6941 1.1 christos .inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 6942 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 6943 1.1 christos rev64 v19.16b, v19.16b 6944 1.1 christos st1 { v19.16b }, [x3] 6945 1.1 christos mov x0, x9 //return sizes 6946 1.1 christos 6947 1.1 christos ldp d10, d11, [sp, #16] 6948 1.1 christos ldp d12, d13, [sp, #32] 6949 1.1 christos ldp d14, d15, [sp, #48] 6950 1.1 christos ldp d8, d9, [sp], #80 6951 1.1 christos ret 6952 1.1 christos 6953 1.1 christos .L256_enc_ret: 6954 1.1 christos mov w0, #0x0 6955 1.1 christos ret 6956 1.1 christos .size unroll8_eor3_aes_gcm_enc_256_kernel,.-unroll8_eor3_aes_gcm_enc_256_kernel 6957 1.1 christos .globl unroll8_eor3_aes_gcm_dec_256_kernel 6958 1.1 christos .type unroll8_eor3_aes_gcm_dec_256_kernel,%function 6959 1.1 christos .align 4 6960 1.1 christos unroll8_eor3_aes_gcm_dec_256_kernel: 6961 1.1 christos AARCH64_VALID_CALL_TARGET 6962 1.1 christos cbz x1, .L256_dec_ret 6963 1.1 christos stp d8, d9, [sp, #-80]! 6964 1.1 christos lsr x9, x1, #3 6965 1.1 christos mov x16, x4 6966 1.1 christos mov x8, x5 6967 1.1 christos stp d10, d11, [sp, #16] 6968 1.1 christos stp d12, d13, [sp, #32] 6969 1.1 christos stp d14, d15, [sp, #48] 6970 1.1 christos mov x5, #0xc200000000000000 6971 1.1 christos stp x5, xzr, [sp, #64] 6972 1.1 christos add x10, sp, #64 6973 1.1 christos 6974 1.1 christos ld1 { v0.16b}, [x16] //CTR block 0 6975 1.1 christos 6976 1.1 christos mov x15, #0x100000000 //set up counter increment 6977 1.1 christos movi v31.16b, #0x0 6978 1.1 christos mov v31.d[1], x15 6979 1.1 christos mov x5, x9 6980 1.1 christos 6981 1.1 christos sub x5, x5, #1 //byte_len - 1 6982 1.1 christos 6983 1.1 christos rev32 v30.16b, v0.16b //set up reversed counter 6984 1.1 christos 6985 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 0 6986 1.1 christos 6987 1.1 christos rev32 v1.16b, v30.16b //CTR block 1 6988 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 1 6989 1.1 christos 6990 1.1 christos rev32 v2.16b, v30.16b //CTR block 2 6991 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 2 6992 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 6993 1.1 christos 6994 1.1 christos rev32 v3.16b, v30.16b //CTR block 3 6995 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 3 6996 1.1 christos 6997 1.1 christos rev32 v4.16b, v30.16b //CTR block 4 6998 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 4 6999 1.1 christos 7000 1.1 christos aese v0.16b, v26.16b 7001 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 0 7002 1.1 christos 7003 1.1 christos rev32 v5.16b, v30.16b //CTR block 5 7004 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 5 7005 1.1 christos 7006 1.1 christos aese v1.16b, v26.16b 7007 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 0 7008 1.1 christos aese v2.16b, v26.16b 7009 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 0 7010 1.1 christos 7011 1.1 christos rev32 v6.16b, v30.16b //CTR block 6 7012 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 6 7013 1.1 christos 7014 1.1 christos rev32 v7.16b, v30.16b //CTR block 7 7015 1.1 christos aese v4.16b, v26.16b 7016 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 0 7017 1.1 christos 7018 1.1 christos aese v6.16b, v26.16b 7019 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 0 7020 1.1 christos aese v5.16b, v26.16b 7021 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 0 7022 1.1 christos 7023 1.1 christos aese v3.16b, v26.16b 7024 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 0 7025 1.1 christos aese v7.16b, v26.16b 7026 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 0 7027 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 7028 1.1 christos 7029 1.1 christos aese v6.16b, v27.16b 7030 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 1 7031 1.1 christos aese v4.16b, v27.16b 7032 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 1 7033 1.1 christos aese v0.16b, v27.16b 7034 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 1 7035 1.1 christos 7036 1.1 christos aese v5.16b, v27.16b 7037 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 1 7038 1.1 christos aese v7.16b, v27.16b 7039 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 1 7040 1.1 christos aese v1.16b, v27.16b 7041 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 1 7042 1.1 christos 7043 1.1 christos aese v2.16b, v27.16b 7044 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 1 7045 1.1 christos aese v3.16b, v27.16b 7046 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 1 7047 1.1 christos 7048 1.1 christos aese v3.16b, v28.16b 7049 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 2 7050 1.1 christos aese v2.16b, v28.16b 7051 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 2 7052 1.1 christos aese v6.16b, v28.16b 7053 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 2 7054 1.1 christos 7055 1.1 christos aese v1.16b, v28.16b 7056 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 2 7057 1.1 christos aese v7.16b, v28.16b 7058 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 2 7059 1.1 christos aese v5.16b, v28.16b 7060 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 2 7061 1.1 christos 7062 1.1 christos aese v0.16b, v28.16b 7063 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 2 7064 1.1 christos aese v4.16b, v28.16b 7065 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 2 7066 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 7067 1.1 christos 7068 1.1 christos aese v1.16b, v26.16b 7069 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 3 7070 1.1 christos aese v2.16b, v26.16b 7071 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 3 7072 1.1 christos 7073 1.1 christos aese v3.16b, v26.16b 7074 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 3 7075 1.1 christos aese v4.16b, v26.16b 7076 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 3 7077 1.1 christos 7078 1.1 christos aese v5.16b, v26.16b 7079 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 3 7080 1.1 christos aese v7.16b, v26.16b 7081 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 3 7082 1.1 christos aese v0.16b, v26.16b 7083 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 3 7084 1.1 christos 7085 1.1 christos aese v6.16b, v26.16b 7086 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 3 7087 1.1 christos 7088 1.1 christos aese v7.16b, v27.16b 7089 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 4 7090 1.1 christos aese v3.16b, v27.16b 7091 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 4 7092 1.1 christos 7093 1.1 christos aese v6.16b, v27.16b 7094 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 4 7095 1.1 christos aese v2.16b, v27.16b 7096 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 4 7097 1.1 christos aese v0.16b, v27.16b 7098 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 4 7099 1.1 christos 7100 1.1 christos aese v4.16b, v27.16b 7101 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 4 7102 1.1 christos aese v1.16b, v27.16b 7103 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 4 7104 1.1 christos aese v5.16b, v27.16b 7105 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 4 7106 1.1 christos 7107 1.1 christos aese v0.16b, v28.16b 7108 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 5 7109 1.1 christos aese v6.16b, v28.16b 7110 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 5 7111 1.1 christos 7112 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 7113 1.1 christos aese v4.16b, v28.16b 7114 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 5 7115 1.1 christos aese v7.16b, v28.16b 7116 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 5 7117 1.1 christos 7118 1.1 christos aese v5.16b, v28.16b 7119 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 5 7120 1.1 christos 7121 1.1 christos aese v2.16b, v28.16b 7122 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 5 7123 1.1 christos aese v3.16b, v28.16b 7124 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 5 7125 1.1 christos 7126 1.1 christos aese v1.16b, v28.16b 7127 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 5 7128 1.1 christos 7129 1.1 christos aese v4.16b, v26.16b 7130 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 6 7131 1.1 christos aese v3.16b, v26.16b 7132 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 6 7133 1.1 christos aese v7.16b, v26.16b 7134 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 6 7135 1.1 christos 7136 1.1 christos aese v6.16b, v26.16b 7137 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 6 7138 1.1 christos aese v0.16b, v26.16b 7139 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 6 7140 1.1 christos aese v5.16b, v26.16b 7141 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 6 7142 1.1 christos 7143 1.1 christos aese v2.16b, v26.16b 7144 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 6 7145 1.1 christos aese v1.16b, v26.16b 7146 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 6 7147 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 7148 1.1 christos 7149 1.1 christos aese v5.16b, v27.16b 7150 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 7 7151 1.1 christos aese v0.16b, v27.16b 7152 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 7 7153 1.1 christos 7154 1.1 christos aese v3.16b, v27.16b 7155 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 7 7156 1.1 christos aese v2.16b, v27.16b 7157 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 7 7158 1.1 christos aese v7.16b, v27.16b 7159 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 7 7160 1.1 christos 7161 1.1 christos aese v4.16b, v27.16b 7162 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 7 7163 1.1 christos aese v1.16b, v27.16b 7164 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 7 7165 1.1 christos aese v6.16b, v27.16b 7166 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 7 7167 1.1 christos 7168 1.1 christos and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 7169 1.1 christos aese v7.16b, v28.16b 7170 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 8 7171 1.1 christos aese v5.16b, v28.16b 7172 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 8 7173 1.1 christos 7174 1.1 christos aese v0.16b, v28.16b 7175 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 8 7176 1.1 christos aese v1.16b, v28.16b 7177 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 8 7178 1.1 christos aese v2.16b, v28.16b 7179 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 8 7180 1.1 christos 7181 1.1 christos aese v4.16b, v28.16b 7182 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 8 7183 1.1 christos aese v3.16b, v28.16b 7184 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 8 7185 1.1 christos aese v6.16b, v28.16b 7186 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 8 7187 1.1 christos 7188 1.1 christos aese v2.16b, v26.16b 7189 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 9 7190 1.1 christos 7191 1.1 christos ld1 { v19.16b}, [x3] 7192 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 7193 1.1 christos rev64 v19.16b, v19.16b 7194 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 7195 1.1 christos add x4, x0, x1, lsr #3 //end_input_ptr 7196 1.1 christos add x5, x5, x0 7197 1.1 christos 7198 1.1 christos aese v3.16b, v26.16b 7199 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 9 7200 1.1 christos aese v6.16b, v26.16b 7201 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 9 7202 1.1 christos 7203 1.1 christos aese v4.16b, v26.16b 7204 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 9 7205 1.1 christos aese v5.16b, v26.16b 7206 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 9 7207 1.1 christos 7208 1.1 christos aese v7.16b, v26.16b 7209 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 9 7210 1.1 christos 7211 1.1 christos aese v0.16b, v26.16b 7212 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 9 7213 1.1 christos aese v1.16b, v26.16b 7214 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 9 7215 1.1 christos 7216 1.1 christos aese v4.16b, v27.16b 7217 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 10 7218 1.1 christos aese v7.16b, v27.16b 7219 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 10 7220 1.1 christos aese v5.16b, v27.16b 7221 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 10 7222 1.1 christos 7223 1.1 christos aese v1.16b, v27.16b 7224 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 10 7225 1.1 christos aese v2.16b, v27.16b 7226 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 10 7227 1.1 christos aese v0.16b, v27.16b 7228 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 10 7229 1.1 christos 7230 1.1 christos aese v6.16b, v27.16b 7231 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 10 7232 1.1 christos aese v3.16b, v27.16b 7233 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 10 7234 1.1 christos ldp q26, q27, [x8, #192] //load rk12, rk13 7235 1.1 christos 7236 1.1 christos aese v0.16b, v28.16b 7237 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 11 7238 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 7 7239 1.1 christos 7240 1.1 christos aese v7.16b, v28.16b 7241 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 11 7242 1.1 christos aese v3.16b, v28.16b 7243 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 11 7244 1.1 christos aese v1.16b, v28.16b 7245 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 11 7246 1.1 christos 7247 1.1 christos aese v5.16b, v28.16b 7248 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 11 7249 1.1 christos aese v4.16b, v28.16b 7250 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 11 7251 1.1 christos aese v2.16b, v28.16b 7252 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 11 7253 1.1 christos 7254 1.1 christos aese v6.16b, v28.16b 7255 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 11 7256 1.1 christos ldr q28, [x8, #224] //load rk14 7257 1.1 christos 7258 1.1 christos aese v1.16b, v26.16b 7259 1.1 christos aesmc v1.16b, v1.16b //AES block 1 - round 12 7260 1.1 christos aese v4.16b, v26.16b 7261 1.1 christos aesmc v4.16b, v4.16b //AES block 4 - round 12 7262 1.1 christos aese v5.16b, v26.16b 7263 1.1 christos aesmc v5.16b, v5.16b //AES block 5 - round 12 7264 1.1 christos 7265 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 7266 1.1 christos aese v3.16b, v26.16b 7267 1.1 christos aesmc v3.16b, v3.16b //AES block 3 - round 12 7268 1.1 christos aese v2.16b, v26.16b 7269 1.1 christos aesmc v2.16b, v2.16b //AES block 2 - round 12 7270 1.1 christos 7271 1.1 christos aese v6.16b, v26.16b 7272 1.1 christos aesmc v6.16b, v6.16b //AES block 6 - round 12 7273 1.1 christos aese v0.16b, v26.16b 7274 1.1 christos aesmc v0.16b, v0.16b //AES block 0 - round 12 7275 1.1 christos aese v7.16b, v26.16b 7276 1.1 christos aesmc v7.16b, v7.16b //AES block 7 - round 12 7277 1.1 christos 7278 1.1 christos aese v5.16b, v27.16b //AES block 5 - round 13 7279 1.1 christos aese v1.16b, v27.16b //AES block 1 - round 13 7280 1.1 christos aese v2.16b, v27.16b //AES block 2 - round 13 7281 1.1 christos 7282 1.1 christos aese v0.16b, v27.16b //AES block 0 - round 13 7283 1.1 christos aese v4.16b, v27.16b //AES block 4 - round 13 7284 1.1 christos aese v6.16b, v27.16b //AES block 6 - round 13 7285 1.1 christos 7286 1.1 christos aese v3.16b, v27.16b //AES block 3 - round 13 7287 1.1 christos aese v7.16b, v27.16b //AES block 7 - round 13 7288 1.1 christos b.ge .L256_dec_tail //handle tail 7289 1.1 christos 7290 1.1 christos ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext 7291 1.1 christos 7292 1.1 christos ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext 7293 1.1 christos 7294 1.1 christos ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext 7295 1.1 christos 7296 1.1 christos ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext 7297 1.1 christos cmp x0, x5 //check if we have <= 8 blocks 7298 1.1 christos 7299 1.1 christos .inst 0xce017121 //eor3 v1.16b, v9.16b, v1.16b, v28.16b //AES block 1 - result 7300 1.1 christos .inst 0xce007100 //eor3 v0.16b, v8.16b, v0.16b, v28.16b //AES block 0 - result 7301 1.1 christos stp q0, q1, [x2], #32 //AES block 0, 1 - store result 7302 1.1 christos 7303 1.1 christos rev32 v0.16b, v30.16b //CTR block 8 7304 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8 7305 1.1 christos .inst 0xce037163 //eor3 v3.16b, v11.16b, v3.16b, v28.16b //AES block 3 - result 7306 1.1 christos 7307 1.1 christos .inst 0xce0571a5 //eor3 v5.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result 7308 1.1 christos 7309 1.1 christos .inst 0xce047184 //eor3 v4.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result 7310 1.1 christos rev32 v1.16b, v30.16b //CTR block 9 7311 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 9 7312 1.1 christos 7313 1.1 christos .inst 0xce027142 //eor3 v2.16b, v10.16b, v2.16b, v28.16b //AES block 2 - result 7314 1.1 christos stp q2, q3, [x2], #32 //AES block 2, 3 - store result 7315 1.1 christos 7316 1.1 christos rev32 v2.16b, v30.16b //CTR block 10 7317 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 10 7318 1.1 christos 7319 1.1 christos .inst 0xce0671c6 //eor3 v6.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result 7320 1.1 christos 7321 1.1 christos rev32 v3.16b, v30.16b //CTR block 11 7322 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 11 7323 1.1 christos stp q4, q5, [x2], #32 //AES block 4, 5 - store result 7324 1.1 christos 7325 1.1 christos .inst 0xce0771e7 //eor3 v7.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result 7326 1.1 christos stp q6, q7, [x2], #32 //AES block 6, 7 - store result 7327 1.1 christos 7328 1.1 christos rev32 v4.16b, v30.16b //CTR block 12 7329 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 12 7330 1.1 christos b.ge .L256_dec_prepretail //do prepretail 7331 1.1 christos 7332 1.1 christos .L256_dec_main_loop: //main loop start 7333 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 7334 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 7335 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 7336 1.1 christos 7337 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 7338 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 7339 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 7340 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 7341 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 7342 1.1 christos 7343 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 7344 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 7345 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 7346 1.1 christos 7347 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 7348 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 7349 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 7350 1.1 christos 7351 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 7352 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 7353 1.1 christos 7354 1.1 christos aese v3.16b, v26.16b 7355 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 7356 1.1 christos aese v6.16b, v26.16b 7357 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 7358 1.1 christos aese v2.16b, v26.16b 7359 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 7360 1.1 christos 7361 1.1 christos aese v7.16b, v26.16b 7362 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 7363 1.1 christos aese v0.16b, v26.16b 7364 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 7365 1.1 christos aese v5.16b, v26.16b 7366 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 7367 1.1 christos 7368 1.1 christos aese v4.16b, v26.16b 7369 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 7370 1.1 christos aese v1.16b, v26.16b 7371 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 7372 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 7373 1.1 christos 7374 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 7375 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 7376 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 7377 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 7378 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 7379 1.1 christos aese v6.16b, v27.16b 7380 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 7381 1.1 christos 7382 1.1 christos aese v4.16b, v27.16b 7383 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 7384 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 7385 1.1 christos aese v3.16b, v27.16b 7386 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 7387 1.1 christos 7388 1.1 christos aese v0.16b, v27.16b 7389 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 7390 1.1 christos aese v5.16b, v27.16b 7391 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 7392 1.1 christos aese v2.16b, v27.16b 7393 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 7394 1.1 christos 7395 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7396 1.1 christos aese v7.16b, v27.16b 7397 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 7398 1.1 christos aese v1.16b, v27.16b 7399 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 7400 1.1 christos 7401 1.1 christos aese v4.16b, v28.16b 7402 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 7403 1.1 christos aese v0.16b, v28.16b 7404 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 7405 1.1 christos aese v3.16b, v28.16b 7406 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 7407 1.1 christos 7408 1.1 christos aese v6.16b, v28.16b 7409 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 7410 1.1 christos aese v7.16b, v28.16b 7411 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 7412 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 7413 1.1 christos 7414 1.1 christos aese v5.16b, v28.16b 7415 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 7416 1.1 christos aese v2.16b, v28.16b 7417 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 7418 1.1 christos aese v1.16b, v28.16b 7419 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 7420 1.1 christos 7421 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 7422 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 7423 1.1 christos aese v3.16b, v26.16b 7424 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 7425 1.1 christos 7426 1.1 christos aese v0.16b, v26.16b 7427 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 7428 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 7429 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 7430 1.1 christos 7431 1.1 christos aese v5.16b, v26.16b 7432 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 7433 1.1 christos aese v6.16b, v26.16b 7434 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 7435 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 7436 1.1 christos 7437 1.1 christos aese v4.16b, v26.16b 7438 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 7439 1.1 christos aese v1.16b, v26.16b 7440 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 7441 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7442 1.1 christos 7443 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 7444 1.1 christos aese v2.16b, v26.16b 7445 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 7446 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 7447 1.1 christos 7448 1.1 christos aese v5.16b, v27.16b 7449 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 7450 1.1 christos aese v7.16b, v26.16b 7451 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 7452 1.1 christos aese v3.16b, v27.16b 7453 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 7454 1.1 christos 7455 1.1 christos aese v2.16b, v27.16b 7456 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 7457 1.1 christos aese v0.16b, v27.16b 7458 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 7459 1.1 christos aese v1.16b, v27.16b 7460 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 7461 1.1 christos 7462 1.1 christos aese v6.16b, v27.16b 7463 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 7464 1.1 christos aese v7.16b, v27.16b 7465 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 7466 1.1 christos aese v4.16b, v27.16b 7467 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 7468 1.1 christos 7469 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 7470 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 7471 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 7472 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 7473 1.1 christos 7474 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 7475 1.1 christos aese v5.16b, v28.16b 7476 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 7477 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 7478 1.1 christos 7479 1.1 christos aese v0.16b, v28.16b 7480 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 7481 1.1 christos aese v3.16b, v28.16b 7482 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 7483 1.1 christos aese v7.16b, v28.16b 7484 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 7485 1.1 christos 7486 1.1 christos aese v1.16b, v28.16b 7487 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 7488 1.1 christos aese v2.16b, v28.16b 7489 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 7490 1.1 christos aese v6.16b, v28.16b 7491 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 7492 1.1 christos 7493 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 7494 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7495 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 7496 1.1 christos 7497 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 7498 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 7499 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7500 1.1 christos 7501 1.1 christos aese v3.16b, v26.16b 7502 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 7503 1.1 christos aese v0.16b, v26.16b 7504 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 7505 1.1 christos aese v4.16b, v28.16b 7506 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 7507 1.1 christos 7508 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7509 1.1 christos aese v1.16b, v26.16b 7510 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 7511 1.1 christos aese v6.16b, v26.16b 7512 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 7513 1.1 christos 7514 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7515 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 7516 1.1 christos aese v4.16b, v26.16b 7517 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 7518 1.1 christos 7519 1.1 christos aese v2.16b, v26.16b 7520 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 7521 1.1 christos aese v5.16b, v26.16b 7522 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 7523 1.1 christos aese v7.16b, v26.16b 7524 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 7525 1.1 christos 7526 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 7527 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 7528 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 7529 1.1 christos 7530 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 7531 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 7532 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 7533 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 7534 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 7535 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 7536 1.1 christos 7537 1.1 christos aese v2.16b, v27.16b 7538 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 7539 1.1 christos aese v5.16b, v27.16b 7540 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 7541 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 7542 1.1 christos 7543 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 7544 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 7545 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 7546 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 7547 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7548 1.1 christos aese v7.16b, v27.16b 7549 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 7550 1.1 christos 7551 1.1 christos aese v1.16b, v27.16b 7552 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 7553 1.1 christos aese v3.16b, v27.16b 7554 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 7555 1.1 christos aese v6.16b, v27.16b 7556 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 7557 1.1 christos 7558 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 7559 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 7560 1.1 christos aese v0.16b, v27.16b 7561 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 7562 1.1 christos aese v4.16b, v27.16b 7563 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 7564 1.1 christos 7565 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 7566 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 7567 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7568 1.1 christos 7569 1.1 christos aese v5.16b, v28.16b 7570 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 7571 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 7572 1.1 christos aese v2.16b, v28.16b 7573 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 7574 1.1 christos 7575 1.1 christos aese v6.16b, v28.16b 7576 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 7577 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 7578 1.1 christos aese v1.16b, v28.16b 7579 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 7580 1.1 christos 7581 1.1 christos aese v4.16b, v28.16b 7582 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 7583 1.1 christos aese v0.16b, v28.16b 7584 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 7585 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 7586 1.1 christos 7587 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 7588 1.1 christos aese v3.16b, v28.16b 7589 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 7590 1.1 christos aese v7.16b, v28.16b 7591 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 7592 1.1 christos 7593 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 7594 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 7595 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 7596 1.1 christos 7597 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 7598 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 7599 1.1 christos aese v3.16b, v26.16b 7600 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 7601 1.1 christos 7602 1.1 christos aese v6.16b, v26.16b 7603 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 7604 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 7605 1.1 christos aese v5.16b, v26.16b 7606 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 7607 1.1 christos 7608 1.1 christos ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext 7609 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 7610 1.1 christos aese v7.16b, v26.16b 7611 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 7612 1.1 christos 7613 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 7614 1.1 christos aese v2.16b, v26.16b 7615 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 7616 1.1 christos aese v1.16b, v26.16b 7617 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 7618 1.1 christos 7619 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 7620 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 7621 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 7622 1.1 christos 7623 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 7624 1.1 christos aese v3.16b, v27.16b 7625 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 7626 1.1 christos aese v6.16b, v27.16b 7627 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 7628 1.1 christos 7629 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 7630 1.1 christos aese v0.16b, v26.16b 7631 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 7632 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 7633 1.1 christos 7634 1.1 christos aese v4.16b, v26.16b 7635 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 7636 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 7637 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 7638 1.1 christos 7639 1.1 christos aese v2.16b, v27.16b 7640 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 7641 1.1 christos aese v5.16b, v27.16b 7642 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 7643 1.1 christos aese v7.16b, v27.16b 7644 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 7645 1.1 christos 7646 1.1 christos aese v1.16b, v27.16b 7647 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 7648 1.1 christos aese v0.16b, v27.16b 7649 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 7650 1.1 christos aese v4.16b, v27.16b 7651 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 7652 1.1 christos 7653 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 7654 1.1 christos rev32 v20.16b, v30.16b //CTR block 8k+16 7655 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 7656 1.1 christos 7657 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+16 7658 1.1 christos aese v1.16b, v28.16b 7659 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 7660 1.1 christos ldp q26, q27, [x8, #192] //load rk12, rk13 7661 1.1 christos 7662 1.1 christos aese v0.16b, v28.16b 7663 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 7664 1.1 christos aese v6.16b, v28.16b 7665 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 7666 1.1 christos 7667 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 7668 1.1 christos rev32 v22.16b, v30.16b //CTR block 8k+17 7669 1.1 christos aese v2.16b, v28.16b 7670 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 7671 1.1 christos 7672 1.1 christos ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext 7673 1.1 christos aese v7.16b, v28.16b 7674 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 7675 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 7676 1.1 christos 7677 1.1 christos aese v5.16b, v28.16b 7678 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 7679 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+17 7680 1.1 christos aese v3.16b, v28.16b 7681 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 7682 1.1 christos 7683 1.1 christos aese v2.16b, v26.16b 7684 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 7685 1.1 christos aese v7.16b, v26.16b 7686 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 7687 1.1 christos aese v6.16b, v26.16b 7688 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 7689 1.1 christos 7690 1.1 christos rev32 v23.16b, v30.16b //CTR block 8k+18 7691 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+18 7692 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 7693 1.1 christos 7694 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 7695 1.1 christos aese v1.16b, v26.16b 7696 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 7697 1.1 christos aese v4.16b, v28.16b 7698 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 7699 1.1 christos 7700 1.1 christos ldr q28, [x8, #224] //load rk14 7701 1.1 christos aese v5.16b, v26.16b 7702 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 7703 1.1 christos aese v3.16b, v26.16b 7704 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 7705 1.1 christos 7706 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 7707 1.1 christos aese v0.16b, v26.16b 7708 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 7709 1.1 christos aese v4.16b, v26.16b 7710 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 7711 1.1 christos 7712 1.1 christos ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext 7713 1.1 christos aese v1.16b, v27.16b //AES block 8k+9 - round 13 7714 1.1 christos aese v2.16b, v27.16b //AES block 8k+10 - round 13 7715 1.1 christos 7716 1.1 christos ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext 7717 1.1 christos aese v0.16b, v27.16b //AES block 8k+8 - round 13 7718 1.1 christos aese v5.16b, v27.16b //AES block 8k+13 - round 13 7719 1.1 christos 7720 1.1 christos rev32 v25.16b, v30.16b //CTR block 8k+19 7721 1.1 christos .inst 0xce027142 //eor3 v2.16b, v10.16b, v2.16b, v28.16b //AES block 8k+10 - result 7722 1.1 christos .inst 0xce017121 //eor3 v1.16b, v9.16b, v1.16b, v28.16b //AES block 8k+9 - result 7723 1.1 christos 7724 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 7725 1.1 christos aese v7.16b, v27.16b //AES block 8k+15 - round 13 7726 1.1 christos 7727 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+19 7728 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 7729 1.1 christos aese v4.16b, v27.16b //AES block 8k+12 - round 13 7730 1.1 christos 7731 1.1 christos .inst 0xce0571a5 //eor3 v5.16b, v13.16b, v5.16b, v28.16b //AES block 8k+13 - result 7732 1.1 christos .inst 0xce007100 //eor3 v0.16b, v8.16b, v0.16b, v28.16b //AES block 8k+8 - result 7733 1.1 christos aese v3.16b, v27.16b //AES block 8k+11 - round 13 7734 1.1 christos 7735 1.1 christos stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result 7736 1.1 christos mov v0.16b, v20.16b //CTR block 8k+16 7737 1.1 christos .inst 0xce047184 //eor3 v4.16b, v12.16b, v4.16b, v28.16b //AES block 8k+12 - result 7738 1.1 christos 7739 1.1 christos .inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 7740 1.1 christos .inst 0xce037163 //eor3 v3.16b, v11.16b, v3.16b, v28.16b //AES block 8k+11 - result 7741 1.1 christos stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result 7742 1.1 christos 7743 1.1 christos mov v3.16b, v25.16b //CTR block 8k+19 7744 1.1 christos mov v2.16b, v23.16b //CTR block 8k+18 7745 1.1 christos aese v6.16b, v27.16b //AES block 8k+14 - round 13 7746 1.1 christos 7747 1.1 christos mov v1.16b, v22.16b //CTR block 8k+17 7748 1.1 christos stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result 7749 1.1 christos .inst 0xce0771e7 //eor3 v7.16b, v15.16b, v7.16b, v28.16b //AES block 8k+15 - result 7750 1.1 christos 7751 1.1 christos .inst 0xce0671c6 //eor3 v6.16b, v14.16b, v6.16b, v28.16b //AES block 8k+14 - result 7752 1.1 christos rev32 v4.16b, v30.16b //CTR block 8k+20 7753 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+20 7754 1.1 christos 7755 1.1 christos cmp x0, x5 //.LOOP CONTROL 7756 1.1 christos stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result 7757 1.1 christos b.lt .L256_dec_main_loop 7758 1.1 christos 7759 1.1 christos .L256_dec_prepretail: //PREPRETAIL 7760 1.1 christos ldp q26, q27, [x8, #0] //load rk0, rk1 7761 1.1 christos rev32 v5.16b, v30.16b //CTR block 8k+13 7762 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+13 7763 1.1 christos 7764 1.1 christos rev64 v12.16b, v12.16b //GHASH block 8k+4 7765 1.1 christos ldr q21, [x3, #144] //load h6k | h5k 7766 1.1 christos ldr q24, [x3, #192] //load h8k | h7k 7767 1.1 christos 7768 1.1 christos rev32 v6.16b, v30.16b //CTR block 8k+14 7769 1.1 christos rev64 v8.16b, v8.16b //GHASH block 8k 7770 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+14 7771 1.1 christos 7772 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 7773 1.1 christos ldr q23, [x3, #176] //load h7l | h7h 7774 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 7775 1.1 christos ldr q25, [x3, #208] //load h8l | h8h 7776 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 7777 1.1 christos rev64 v9.16b, v9.16b //GHASH block 8k+1 7778 1.1 christos 7779 1.1 christos rev32 v7.16b, v30.16b //CTR block 8k+15 7780 1.1 christos rev64 v10.16b, v10.16b //GHASH block 8k+2 7781 1.1 christos ldr q20, [x3, #128] //load h5l | h5h 7782 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 7783 1.1 christos ldr q22, [x3, #160] //load h6l | h6h 7784 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 7785 1.1 christos 7786 1.1 christos aese v0.16b, v26.16b 7787 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 7788 1.1 christos aese v1.16b, v26.16b 7789 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 7790 1.1 christos aese v4.16b, v26.16b 7791 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 7792 1.1 christos 7793 1.1 christos aese v3.16b, v26.16b 7794 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 7795 1.1 christos aese v5.16b, v26.16b 7796 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 7797 1.1 christos aese v6.16b, v26.16b 7798 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 7799 1.1 christos 7800 1.1 christos aese v4.16b, v27.16b 7801 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 7802 1.1 christos aese v7.16b, v26.16b 7803 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 7804 1.1 christos aese v2.16b, v26.16b 7805 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 7806 1.1 christos 7807 1.1 christos ldp q28, q26, [x8, #32] //load rk2, rk3 7808 1.1 christos aese v0.16b, v27.16b 7809 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 7810 1.1 christos eor v8.16b, v8.16b, v19.16b //PRE 1 7811 1.1 christos 7812 1.1 christos aese v7.16b, v27.16b 7813 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 7814 1.1 christos aese v6.16b, v27.16b 7815 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 7816 1.1 christos aese v2.16b, v27.16b 7817 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 7818 1.1 christos 7819 1.1 christos aese v3.16b, v27.16b 7820 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 7821 1.1 christos aese v1.16b, v27.16b 7822 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 7823 1.1 christos aese v5.16b, v27.16b 7824 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 7825 1.1 christos 7826 1.1 christos pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 7827 1.1 christos trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7828 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 7829 1.1 christos 7830 1.1 christos rev64 v11.16b, v11.16b //GHASH block 8k+3 7831 1.1 christos pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 7832 1.1 christos 7833 1.1 christos aese v5.16b, v28.16b 7834 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 7835 1.1 christos aese v7.16b, v28.16b 7836 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 7837 1.1 christos aese v1.16b, v28.16b 7838 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 7839 1.1 christos 7840 1.1 christos aese v3.16b, v28.16b 7841 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 7842 1.1 christos aese v6.16b, v28.16b 7843 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 7844 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 7845 1.1 christos 7846 1.1 christos aese v0.16b, v28.16b 7847 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 7848 1.1 christos aese v7.16b, v26.16b 7849 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 7850 1.1 christos 7851 1.1 christos aese v5.16b, v26.16b 7852 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 7853 1.1 christos rev64 v14.16b, v14.16b //GHASH block 8k+6 7854 1.1 christos 7855 1.1 christos aese v0.16b, v26.16b 7856 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 7857 1.1 christos aese v2.16b, v28.16b 7858 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 7859 1.1 christos aese v6.16b, v26.16b 7860 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 7861 1.1 christos 7862 1.1 christos pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 7863 1.1 christos trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7864 1.1 christos aese v4.16b, v28.16b 7865 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 7866 1.1 christos 7867 1.1 christos ldp q27, q28, [x8, #64] //load rk4, rk5 7868 1.1 christos aese v1.16b, v26.16b 7869 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 7870 1.1 christos pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 7871 1.1 christos 7872 1.1 christos aese v2.16b, v26.16b 7873 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 7874 1.1 christos eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 7875 1.1 christos eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 7876 1.1 christos 7877 1.1 christos aese v4.16b, v26.16b 7878 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 7879 1.1 christos pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 7880 1.1 christos aese v3.16b, v26.16b 7881 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 7882 1.1 christos 7883 1.1 christos .inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 7884 1.1 christos trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7885 1.1 christos trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7886 1.1 christos 7887 1.1 christos pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 7888 1.1 christos pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 7889 1.1 christos eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 7890 1.1 christos 7891 1.1 christos pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 7892 1.1 christos aese v5.16b, v27.16b 7893 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 7894 1.1 christos aese v0.16b, v27.16b 7895 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 7896 1.1 christos 7897 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 7898 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 7899 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 7900 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 7901 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 7902 1.1 christos aese v7.16b, v27.16b 7903 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 7904 1.1 christos 7905 1.1 christos aese v2.16b, v27.16b 7906 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 7907 1.1 christos aese v6.16b, v27.16b 7908 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 7909 1.1 christos eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 7910 1.1 christos 7911 1.1 christos eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7912 1.1 christos aese v7.16b, v28.16b 7913 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 7914 1.1 christos aese v1.16b, v27.16b 7915 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 7916 1.1 christos 7917 1.1 christos aese v2.16b, v28.16b 7918 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 7919 1.1 christos aese v3.16b, v27.16b 7920 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 7921 1.1 christos aese v4.16b, v27.16b 7922 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 7923 1.1 christos 7924 1.1 christos aese v1.16b, v28.16b 7925 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 7926 1.1 christos pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 7927 1.1 christos aese v6.16b, v28.16b 7928 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 7929 1.1 christos 7930 1.1 christos aese v4.16b, v28.16b 7931 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 7932 1.1 christos aese v3.16b, v28.16b 7933 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 7934 1.1 christos pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 7935 1.1 christos 7936 1.1 christos aese v0.16b, v28.16b 7937 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 7938 1.1 christos aese v5.16b, v28.16b 7939 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 7940 1.1 christos ldp q26, q27, [x8, #96] //load rk6, rk7 7941 1.1 christos 7942 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 7943 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 7944 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 7945 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 7946 1.1 christos rev64 v15.16b, v15.16b //GHASH block 8k+7 7947 1.1 christos rev64 v13.16b, v13.16b //GHASH block 8k+5 7948 1.1 christos 7949 1.1 christos .inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7950 1.1 christos 7951 1.1 christos trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7952 1.1 christos 7953 1.1 christos aese v0.16b, v26.16b 7954 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 7955 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 7956 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 7957 1.1 christos aese v6.16b, v26.16b 7958 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 7959 1.1 christos 7960 1.1 christos aese v5.16b, v26.16b 7961 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 7962 1.1 christos aese v7.16b, v26.16b 7963 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 7964 1.1 christos 7965 1.1 christos pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 7966 1.1 christos pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 7967 1.1 christos pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 7968 1.1 christos 7969 1.1 christos trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7970 1.1 christos pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 7971 1.1 christos trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 7972 1.1 christos 7973 1.1 christos aese v7.16b, v27.16b 7974 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 7975 1.1 christos pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 7976 1.1 christos aese v1.16b, v26.16b 7977 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 7978 1.1 christos 7979 1.1 christos aese v2.16b, v26.16b 7980 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 7981 1.1 christos aese v3.16b, v26.16b 7982 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 7983 1.1 christos aese v4.16b, v26.16b 7984 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 7985 1.1 christos 7986 1.1 christos ldp q28, q26, [x8, #128] //load rk8, rk9 7987 1.1 christos pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 7988 1.1 christos aese v5.16b, v27.16b 7989 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 7990 1.1 christos 7991 1.1 christos aese v1.16b, v27.16b 7992 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 7993 1.1 christos aese v4.16b, v27.16b 7994 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 7995 1.1 christos 7996 1.1 christos aese v6.16b, v27.16b 7997 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 7998 1.1 christos aese v2.16b, v27.16b 7999 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 8000 1.1 christos .inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 8001 1.1 christos 8002 1.1 christos aese v0.16b, v27.16b 8003 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 8004 1.1 christos trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 8005 1.1 christos aese v3.16b, v27.16b 8006 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 8007 1.1 christos 8008 1.1 christos aese v0.16b, v28.16b 8009 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 8010 1.1 christos aese v7.16b, v28.16b 8011 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 8012 1.1 christos aese v4.16b, v28.16b 8013 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 8014 1.1 christos 8015 1.1 christos aese v1.16b, v28.16b 8016 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 8017 1.1 christos aese v5.16b, v28.16b 8018 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 8019 1.1 christos aese v6.16b, v28.16b 8020 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 8021 1.1 christos 8022 1.1 christos aese v3.16b, v28.16b 8023 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 8024 1.1 christos aese v4.16b, v26.16b 8025 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 8026 1.1 christos eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 8027 1.1 christos 8028 1.1 christos aese v0.16b, v26.16b 8029 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 8030 1.1 christos aese v1.16b, v26.16b 8031 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 8032 1.1 christos eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 8033 1.1 christos 8034 1.1 christos aese v6.16b, v26.16b 8035 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 8036 1.1 christos aese v7.16b, v26.16b 8037 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 8038 1.1 christos pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 8039 1.1 christos 8040 1.1 christos aese v2.16b, v28.16b 8041 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 8042 1.1 christos pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 8043 1.1 christos pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 8044 1.1 christos 8045 1.1 christos pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 8046 1.1 christos pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 8047 1.1 christos pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 8048 1.1 christos 8049 1.1 christos ldp q27, q28, [x8, #160] //load rk10, rk11 8050 1.1 christos .inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 8051 1.1 christos .inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 8052 1.1 christos 8053 1.1 christos aese v2.16b, v26.16b 8054 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 8055 1.1 christos aese v3.16b, v26.16b 8056 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 8057 1.1 christos aese v5.16b, v26.16b 8058 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 8059 1.1 christos 8060 1.1 christos .inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 8061 1.1 christos .inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 8062 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 8063 1.1 christos 8064 1.1 christos .inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 8065 1.1 christos 8066 1.1 christos aese v4.16b, v27.16b 8067 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 8068 1.1 christos aese v6.16b, v27.16b 8069 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 8070 1.1 christos aese v5.16b, v27.16b 8071 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 8072 1.1 christos 8073 1.1 christos aese v0.16b, v27.16b 8074 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 8075 1.1 christos aese v2.16b, v27.16b 8076 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 8077 1.1 christos aese v3.16b, v27.16b 8078 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 8079 1.1 christos 8080 1.1 christos .inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 8081 1.1 christos 8082 1.1 christos aese v7.16b, v27.16b 8083 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 8084 1.1 christos aese v1.16b, v27.16b 8085 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 8086 1.1 christos ldp q26, q27, [x8, #192] //load rk12, rk13 8087 1.1 christos 8088 1.1 christos ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 8089 1.1 christos 8090 1.1 christos aese v2.16b, v28.16b 8091 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 8092 1.1 christos aese v1.16b, v28.16b 8093 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 8094 1.1 christos aese v0.16b, v28.16b 8095 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 8096 1.1 christos 8097 1.1 christos pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 8098 1.1 christos aese v3.16b, v28.16b 8099 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 8100 1.1 christos 8101 1.1 christos aese v7.16b, v28.16b 8102 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 8103 1.1 christos aese v6.16b, v28.16b 8104 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 8105 1.1 christos aese v4.16b, v28.16b 8106 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 8107 1.1 christos 8108 1.1 christos aese v5.16b, v28.16b 8109 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 8110 1.1 christos aese v3.16b, v26.16b 8111 1.1 christos aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 8112 1.1 christos 8113 1.1 christos .inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 8114 1.1 christos 8115 1.1 christos aese v3.16b, v27.16b //AES block 8k+11 - round 13 8116 1.1 christos aese v2.16b, v26.16b 8117 1.1 christos aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 8118 1.1 christos aese v6.16b, v26.16b 8119 1.1 christos aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 8120 1.1 christos 8121 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 8122 1.1 christos aese v4.16b, v26.16b 8123 1.1 christos aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 8124 1.1 christos aese v7.16b, v26.16b 8125 1.1 christos aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 8126 1.1 christos 8127 1.1 christos aese v0.16b, v26.16b 8128 1.1 christos aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 8129 1.1 christos ldr q28, [x8, #224] //load rk14 8130 1.1 christos aese v1.16b, v26.16b 8131 1.1 christos aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 8132 1.1 christos 8133 1.1 christos aese v4.16b, v27.16b //AES block 8k+12 - round 13 8134 1.1 christos ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 8135 1.1 christos aese v5.16b, v26.16b 8136 1.1 christos aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 8137 1.1 christos 8138 1.1 christos aese v6.16b, v27.16b //AES block 8k+14 - round 13 8139 1.1 christos aese v2.16b, v27.16b //AES block 8k+10 - round 13 8140 1.1 christos aese v1.16b, v27.16b //AES block 8k+9 - round 13 8141 1.1 christos 8142 1.1 christos aese v5.16b, v27.16b //AES block 8k+13 - round 13 8143 1.1 christos .inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 8144 1.1 christos add v30.4s, v30.4s, v31.4s //CTR block 8k+15 8145 1.1 christos 8146 1.1 christos aese v7.16b, v27.16b //AES block 8k+15 - round 13 8147 1.1 christos aese v0.16b, v27.16b //AES block 8k+8 - round 13 8148 1.1 christos .L256_dec_tail: //TAIL 8149 1.1 christos 8150 1.1 christos ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 8151 1.1 christos sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 8152 1.1 christos cmp x5, #112 8153 1.1 christos 8154 1.1 christos ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext 8155 1.1 christos 8156 1.1 christos ldp q24, q25, [x3, #192] //load h8k | h7k 8157 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 8158 1.1 christos mov v29.16b, v28.16b 8159 1.1 christos 8160 1.1 christos ldp q20, q21, [x3, #128] //load h5l | h5h 8161 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 8162 1.1 christos 8163 1.1 christos .inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result 8164 1.1 christos ldp q22, q23, [x3, #160] //load h6l | h6h 8165 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 8166 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 8167 1.1 christos b.gt .L256_dec_blocks_more_than_7 8168 1.1 christos 8169 1.1 christos mov v7.16b, v6.16b 8170 1.1 christos sub v30.4s, v30.4s, v31.4s 8171 1.1 christos mov v6.16b, v5.16b 8172 1.1 christos 8173 1.1 christos mov v5.16b, v4.16b 8174 1.1 christos mov v4.16b, v3.16b 8175 1.1 christos movi v19.8b, #0 8176 1.1 christos 8177 1.1 christos movi v17.8b, #0 8178 1.1 christos movi v18.8b, #0 8179 1.1 christos mov v3.16b, v2.16b 8180 1.1 christos 8181 1.1 christos cmp x5, #96 8182 1.1 christos mov v2.16b, v1.16b 8183 1.1 christos b.gt .L256_dec_blocks_more_than_6 8184 1.1 christos 8185 1.1 christos mov v7.16b, v6.16b 8186 1.1 christos mov v6.16b, v5.16b 8187 1.1 christos 8188 1.1 christos mov v5.16b, v4.16b 8189 1.1 christos cmp x5, #80 8190 1.1 christos sub v30.4s, v30.4s, v31.4s 8191 1.1 christos 8192 1.1 christos mov v4.16b, v3.16b 8193 1.1 christos mov v3.16b, v1.16b 8194 1.1 christos b.gt .L256_dec_blocks_more_than_5 8195 1.1 christos 8196 1.1 christos cmp x5, #64 8197 1.1 christos mov v7.16b, v6.16b 8198 1.1 christos sub v30.4s, v30.4s, v31.4s 8199 1.1 christos 8200 1.1 christos mov v6.16b, v5.16b 8201 1.1 christos 8202 1.1 christos mov v5.16b, v4.16b 8203 1.1 christos mov v4.16b, v1.16b 8204 1.1 christos b.gt .L256_dec_blocks_more_than_4 8205 1.1 christos 8206 1.1 christos sub v30.4s, v30.4s, v31.4s 8207 1.1 christos mov v7.16b, v6.16b 8208 1.1 christos cmp x5, #48 8209 1.1 christos 8210 1.1 christos mov v6.16b, v5.16b 8211 1.1 christos mov v5.16b, v1.16b 8212 1.1 christos b.gt .L256_dec_blocks_more_than_3 8213 1.1 christos 8214 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 8215 1.1 christos sub v30.4s, v30.4s, v31.4s 8216 1.1 christos mov v7.16b, v6.16b 8217 1.1 christos 8218 1.1 christos cmp x5, #32 8219 1.1 christos mov v6.16b, v1.16b 8220 1.1 christos b.gt .L256_dec_blocks_more_than_2 8221 1.1 christos 8222 1.1 christos sub v30.4s, v30.4s, v31.4s 8223 1.1 christos 8224 1.1 christos mov v7.16b, v1.16b 8225 1.1 christos cmp x5, #16 8226 1.1 christos b.gt .L256_dec_blocks_more_than_1 8227 1.1 christos 8228 1.1 christos sub v30.4s, v30.4s, v31.4s 8229 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 8230 1.1 christos b .L256_dec_blocks_less_than_1 8231 1.1 christos .L256_dec_blocks_more_than_7: //blocks left > 7 8232 1.1 christos rev64 v8.16b, v9.16b //GHASH final-7 block 8233 1.1 christos ldr q9, [x0], #16 //AES final-6 block - load ciphertext 8234 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-7 block - store result 8235 1.1 christos 8236 1.1 christos ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 8237 1.1 christos 8238 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 8239 1.1 christos 8240 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 8241 1.1 christos .inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 8242 1.1 christos 8243 1.1 christos pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 8244 1.1 christos 8245 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 8246 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 8247 1.1 christos 8248 1.1 christos pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 8249 1.1 christos pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 8250 1.1 christos .L256_dec_blocks_more_than_6: //blocks left > 6 8251 1.1 christos 8252 1.1 christos rev64 v8.16b, v9.16b //GHASH final-6 block 8253 1.1 christos 8254 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 8255 1.1 christos ldr q9, [x0], #16 //AES final-5 block - load ciphertext 8256 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 8257 1.1 christos 8258 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 8259 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-6 block - store result 8260 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 8261 1.1 christos 8262 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 8263 1.1 christos 8264 1.1 christos .inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 8265 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 8266 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 8267 1.1 christos 8268 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 8269 1.1 christos 8270 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 8271 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 8272 1.1 christos .L256_dec_blocks_more_than_5: //blocks left > 5 8273 1.1 christos 8274 1.1 christos rev64 v8.16b, v9.16b //GHASH final-5 block 8275 1.1 christos 8276 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 8277 1.1 christos 8278 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 8279 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 8280 1.1 christos 8281 1.1 christos ldr q9, [x0], #16 //AES final-4 block - load ciphertext 8282 1.1 christos 8283 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 8284 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-5 block - store result 8285 1.1 christos 8286 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 8287 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 8288 1.1 christos 8289 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 8290 1.1 christos 8291 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 8292 1.1 christos .inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 8293 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 8294 1.1 christos 8295 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 8296 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 8297 1.1 christos .L256_dec_blocks_more_than_4: //blocks left > 4 8298 1.1 christos 8299 1.1 christos rev64 v8.16b, v9.16b //GHASH final-4 block 8300 1.1 christos 8301 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 8302 1.1 christos 8303 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 8304 1.1 christos ldr q9, [x0], #16 //AES final-3 block - load ciphertext 8305 1.1 christos 8306 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 8307 1.1 christos 8308 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 8309 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 8310 1.1 christos 8311 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 8312 1.1 christos 8313 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 8314 1.1 christos 8315 1.1 christos pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 8316 1.1 christos 8317 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 8318 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-4 block - store result 8319 1.1 christos 8320 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 8321 1.1 christos .inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 8322 1.1 christos .L256_dec_blocks_more_than_3: //blocks left > 3 8323 1.1 christos 8324 1.1 christos ldr q25, [x3, #112] //load h4l | h4h 8325 1.1 christos ext v25.16b, v25.16b, v25.16b, #8 8326 1.1 christos rev64 v8.16b, v9.16b //GHASH final-3 block 8327 1.1 christos 8328 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 8329 1.1 christos ldr q9, [x0], #16 //AES final-2 block - load ciphertext 8330 1.1 christos ldr q24, [x3, #96] //load h4k | h3k 8331 1.1 christos 8332 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 8333 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-3 block - store result 8334 1.1 christos 8335 1.1 christos .inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 8336 1.1 christos 8337 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 8338 1.1 christos 8339 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 8340 1.1 christos pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 8341 1.1 christos pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 8342 1.1 christos 8343 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 8344 1.1 christos pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 8345 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 8346 1.1 christos 8347 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 8348 1.1 christos 8349 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 8350 1.1 christos .L256_dec_blocks_more_than_2: //blocks left > 2 8351 1.1 christos 8352 1.1 christos rev64 v8.16b, v9.16b //GHASH final-2 block 8353 1.1 christos 8354 1.1 christos ldr q23, [x3, #80] //load h3l | h3h 8355 1.1 christos ext v23.16b, v23.16b, v23.16b, #8 8356 1.1 christos ldr q9, [x0], #16 //AES final-1 block - load ciphertext 8357 1.1 christos 8358 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 8359 1.1 christos 8360 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 8361 1.1 christos 8362 1.1 christos pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 8363 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-2 block - store result 8364 1.1 christos .inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 8365 1.1 christos 8366 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 8367 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 8368 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 8369 1.1 christos 8370 1.1 christos pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 8371 1.1 christos pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 8372 1.1 christos 8373 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 8374 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 8375 1.1 christos .L256_dec_blocks_more_than_1: //blocks left > 1 8376 1.1 christos 8377 1.1 christos rev64 v8.16b, v9.16b //GHASH final-1 block 8378 1.1 christos 8379 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 8380 1.1 christos 8381 1.1 christos ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 8382 1.1 christos ldr q22, [x3, #64] //load h2l | h2h 8383 1.1 christos ext v22.16b, v22.16b, v22.16b, #8 8384 1.1 christos 8385 1.1 christos eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 8386 1.1 christos ldr q9, [x0], #16 //AES final block - load ciphertext 8387 1.1 christos st1 { v12.16b}, [x2], #16 //AES final-1 block - store result 8388 1.1 christos 8389 1.1 christos ldr q21, [x3, #48] //load h2k | h1k 8390 1.1 christos pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 8391 1.1 christos 8392 1.1 christos ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 8393 1.1 christos 8394 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 8395 1.1 christos 8396 1.1 christos .inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result 8397 1.1 christos pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 8398 1.1 christos 8399 1.1 christos pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 8400 1.1 christos 8401 1.1 christos movi v16.8b, #0 //suppress further partial tag feed in 8402 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 8403 1.1 christos 8404 1.1 christos eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 8405 1.1 christos .L256_dec_blocks_less_than_1: //blocks left <= 1 8406 1.1 christos 8407 1.1 christos ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 8408 1.1 christos mvn x6, xzr //temp0_x = 0xffffffffffffffff 8409 1.1 christos and x1, x1, #127 //bit_length %= 128 8410 1.1 christos 8411 1.1 christos sub x1, x1, #128 //bit_length -= 128 8412 1.1 christos rev32 v30.16b, v30.16b 8413 1.1 christos str q30, [x16] //store the updated counter 8414 1.1 christos 8415 1.1 christos neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 8416 1.1 christos 8417 1.1 christos and x1, x1, #127 //bit_length %= 128 8418 1.1 christos 8419 1.1 christos lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 8420 1.1 christos cmp x1, #64 8421 1.1 christos mvn x7, xzr //temp1_x = 0xffffffffffffffff 8422 1.1 christos 8423 1.1 christos csel x14, x6, xzr, lt 8424 1.1 christos csel x13, x7, x6, lt 8425 1.1 christos 8426 1.1 christos mov v0.d[0], x13 //ctr0b is mask for last block 8427 1.1 christos mov v0.d[1], x14 8428 1.1 christos 8429 1.1 christos and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 8430 1.1 christos ldr q20, [x3, #32] //load h1l | h1h 8431 1.1 christos ext v20.16b, v20.16b, v20.16b, #8 8432 1.1 christos bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 8433 1.1 christos 8434 1.1 christos rev64 v8.16b, v9.16b //GHASH final block 8435 1.1 christos 8436 1.1 christos eor v8.16b, v8.16b, v16.16b //feed in partial tag 8437 1.1 christos 8438 1.1 christos ins v16.d[0], v8.d[1] //GHASH final block - mid 8439 1.1 christos pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 8440 1.1 christos 8441 1.1 christos eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 8442 1.1 christos 8443 1.1 christos pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 8444 1.1 christos eor v17.16b, v17.16b, v28.16b //GHASH final block - high 8445 1.1 christos 8446 1.1 christos pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 8447 1.1 christos 8448 1.1 christos eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 8449 1.1 christos ldr d16, [x10] //MODULO - load modulo constant 8450 1.1 christos eor v19.16b, v19.16b, v26.16b //GHASH final block - low 8451 1.1 christos 8452 1.1 christos pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 8453 1.1 christos eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 8454 1.1 christos 8455 1.1 christos ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 8456 1.1 christos st1 { v12.16b}, [x2] //store all 16B 8457 1.1 christos 8458 1.1 christos eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up 8459 1.1 christos 8460 1.1 christos eor v21.16b, v17.16b, v21.16b //MODULO - fold into mid 8461 1.1 christos eor v18.16b, v18.16b, v21.16b //MODULO - fold into mid 8462 1.1 christos 8463 1.1 christos pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 8464 1.1 christos 8465 1.1 christos ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 8466 1.1 christos eor v19.16b, v19.16b, v17.16b //MODULO - fold into low 8467 1.1 christos 8468 1.1 christos eor v19.16b, v19.16b, v18.16b //MODULO - fold into low 8469 1.1 christos ext v19.16b, v19.16b, v19.16b, #8 8470 1.1 christos rev64 v19.16b, v19.16b 8471 1.1 christos st1 { v19.16b }, [x3] 8472 1.1 christos mov x0, x9 8473 1.1 christos 8474 1.1 christos ldp d10, d11, [sp, #16] 8475 1.1 christos ldp d12, d13, [sp, #32] 8476 1.1 christos ldp d14, d15, [sp, #48] 8477 1.1 christos ldp d8, d9, [sp], #80 8478 1.1 christos ret 8479 1.1 christos 8480 1.1 christos .L256_dec_ret: 8481 1.1 christos mov w0, #0x0 8482 1.1 christos ret 8483 1.1 christos .size unroll8_eor3_aes_gcm_dec_256_kernel,.-unroll8_eor3_aes_gcm_dec_256_kernel 8484 1.1 christos .byte 65,69,83,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,65,82,77,118,56,44,32,83,80,68,88,32,66,83,68,45,51,45,67,108,97,117,115,101,32,98,121,32,60,120,105,97,111,107,97,110,103,46,113,105,97,110,64,97,114,109,46,99,111,109,62,0 8485 1.1 christos .align 2 8486 1.1 christos .align 2 8487 1.1 christos #endif 8488