1 1.1 joerg #include "arm_arch.h" 2 1.1 joerg 3 1.1 joerg #if __ARM_MAX_ARCH__>=7 4 1.5 christos .arch armv8-a+crypto 5 1.4 christos .text 6 1.1 joerg .align 5 7 1.2 christos .Lrcon: 8 1.1 joerg .long 0x01,0x01,0x01,0x01 9 1.5 christos .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 10 1.1 joerg .long 0x1b,0x1b,0x1b,0x1b 11 1.1 joerg 12 1.1 joerg .globl aes_v8_set_encrypt_key 13 1.1 joerg .type aes_v8_set_encrypt_key,%function 14 1.1 joerg .align 5 15 1.1 joerg aes_v8_set_encrypt_key: 16 1.1 joerg .Lenc_key: 17 1.5 christos stp x29,x30,[sp,#-16]! 18 1.5 christos add x29,sp,#0 19 1.5 christos mov x3,#-1 20 1.5 christos cmp x0,#0 21 1.5 christos b.eq .Lenc_key_abort 22 1.5 christos cmp x2,#0 23 1.5 christos b.eq .Lenc_key_abort 24 1.5 christos mov x3,#-2 25 1.5 christos cmp w1,#128 26 1.5 christos b.lt .Lenc_key_abort 27 1.5 christos cmp w1,#256 28 1.5 christos b.gt .Lenc_key_abort 29 1.5 christos tst w1,#0x3f 30 1.5 christos b.ne .Lenc_key_abort 31 1.5 christos 32 1.5 christos adr x3,.Lrcon 33 1.5 christos cmp w1,#192 34 1.5 christos 35 1.5 christos eor v0.16b,v0.16b,v0.16b 36 1.5 christos ld1 {v3.16b},[x0],#16 37 1.5 christos mov w1,#8 // reuse w1 38 1.5 christos ld1 {v1.4s,v2.4s},[x3],#32 39 1.1 joerg 40 1.5 christos b.lt .Loop128 41 1.5 christos b.eq .L192 42 1.1 joerg b .L256 43 1.1 joerg 44 1.1 joerg .align 4 45 1.1 joerg .Loop128: 46 1.5 christos tbl v6.16b,{v3.16b},v2.16b 47 1.5 christos ext v5.16b,v0.16b,v3.16b,#12 48 1.5 christos st1 {v3.4s},[x2],#16 49 1.5 christos aese v6.16b,v0.16b 50 1.5 christos subs w1,w1,#1 51 1.5 christos 52 1.5 christos eor v3.16b,v3.16b,v5.16b 53 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 54 1.5 christos eor v3.16b,v3.16b,v5.16b 55 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 56 1.5 christos eor v6.16b,v6.16b,v1.16b 57 1.5 christos eor v3.16b,v3.16b,v5.16b 58 1.5 christos shl v1.16b,v1.16b,#1 59 1.5 christos eor v3.16b,v3.16b,v6.16b 60 1.5 christos b.ne .Loop128 61 1.5 christos 62 1.5 christos ld1 {v1.4s},[x3] 63 1.5 christos 64 1.5 christos tbl v6.16b,{v3.16b},v2.16b 65 1.5 christos ext v5.16b,v0.16b,v3.16b,#12 66 1.5 christos st1 {v3.4s},[x2],#16 67 1.5 christos aese v6.16b,v0.16b 68 1.5 christos 69 1.5 christos eor v3.16b,v3.16b,v5.16b 70 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 71 1.5 christos eor v3.16b,v3.16b,v5.16b 72 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 73 1.5 christos eor v6.16b,v6.16b,v1.16b 74 1.5 christos eor v3.16b,v3.16b,v5.16b 75 1.5 christos shl v1.16b,v1.16b,#1 76 1.5 christos eor v3.16b,v3.16b,v6.16b 77 1.5 christos 78 1.5 christos tbl v6.16b,{v3.16b},v2.16b 79 1.5 christos ext v5.16b,v0.16b,v3.16b,#12 80 1.5 christos st1 {v3.4s},[x2],#16 81 1.5 christos aese v6.16b,v0.16b 82 1.5 christos 83 1.5 christos eor v3.16b,v3.16b,v5.16b 84 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 85 1.5 christos eor v3.16b,v3.16b,v5.16b 86 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 87 1.5 christos eor v6.16b,v6.16b,v1.16b 88 1.5 christos eor v3.16b,v3.16b,v5.16b 89 1.5 christos eor v3.16b,v3.16b,v6.16b 90 1.5 christos st1 {v3.4s},[x2] 91 1.5 christos add x2,x2,#0x50 92 1.1 joerg 93 1.5 christos mov w12,#10 94 1.1 joerg b .Ldone 95 1.1 joerg 96 1.1 joerg .align 4 97 1.1 joerg .L192: 98 1.5 christos ld1 {v4.8b},[x0],#8 99 1.5 christos movi v6.16b,#8 // borrow v6.16b 100 1.5 christos st1 {v3.4s},[x2],#16 101 1.5 christos sub v2.16b,v2.16b,v6.16b // adjust the mask 102 1.1 joerg 103 1.1 joerg .Loop192: 104 1.5 christos tbl v6.16b,{v4.16b},v2.16b 105 1.5 christos ext v5.16b,v0.16b,v3.16b,#12 106 1.7 christos #ifdef __AARCH64EB__ 107 1.5 christos st1 {v4.4s},[x2],#16 108 1.5 christos sub x2,x2,#8 109 1.3 christos #else 110 1.5 christos st1 {v4.8b},[x2],#8 111 1.3 christos #endif 112 1.5 christos aese v6.16b,v0.16b 113 1.5 christos subs w1,w1,#1 114 1.1 joerg 115 1.5 christos eor v3.16b,v3.16b,v5.16b 116 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 117 1.5 christos eor v3.16b,v3.16b,v5.16b 118 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 119 1.5 christos eor v3.16b,v3.16b,v5.16b 120 1.5 christos 121 1.5 christos dup v5.4s,v3.s[3] 122 1.5 christos eor v5.16b,v5.16b,v4.16b 123 1.5 christos eor v6.16b,v6.16b,v1.16b 124 1.5 christos ext v4.16b,v0.16b,v4.16b,#12 125 1.5 christos shl v1.16b,v1.16b,#1 126 1.5 christos eor v4.16b,v4.16b,v5.16b 127 1.5 christos eor v3.16b,v3.16b,v6.16b 128 1.5 christos eor v4.16b,v4.16b,v6.16b 129 1.5 christos st1 {v3.4s},[x2],#16 130 1.5 christos b.ne .Loop192 131 1.1 joerg 132 1.5 christos mov w12,#12 133 1.5 christos add x2,x2,#0x20 134 1.1 joerg b .Ldone 135 1.1 joerg 136 1.1 joerg .align 4 137 1.1 joerg .L256: 138 1.5 christos ld1 {v4.16b},[x0] 139 1.5 christos mov w1,#7 140 1.5 christos mov w12,#14 141 1.5 christos st1 {v3.4s},[x2],#16 142 1.1 joerg 143 1.1 joerg .Loop256: 144 1.5 christos tbl v6.16b,{v4.16b},v2.16b 145 1.5 christos ext v5.16b,v0.16b,v3.16b,#12 146 1.5 christos st1 {v4.4s},[x2],#16 147 1.5 christos aese v6.16b,v0.16b 148 1.5 christos subs w1,w1,#1 149 1.5 christos 150 1.5 christos eor v3.16b,v3.16b,v5.16b 151 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 152 1.5 christos eor v3.16b,v3.16b,v5.16b 153 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 154 1.5 christos eor v6.16b,v6.16b,v1.16b 155 1.5 christos eor v3.16b,v3.16b,v5.16b 156 1.5 christos shl v1.16b,v1.16b,#1 157 1.5 christos eor v3.16b,v3.16b,v6.16b 158 1.5 christos st1 {v3.4s},[x2],#16 159 1.5 christos b.eq .Ldone 160 1.5 christos 161 1.5 christos dup v6.4s,v3.s[3] // just splat 162 1.5 christos ext v5.16b,v0.16b,v4.16b,#12 163 1.5 christos aese v6.16b,v0.16b 164 1.5 christos 165 1.5 christos eor v4.16b,v4.16b,v5.16b 166 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 167 1.5 christos eor v4.16b,v4.16b,v5.16b 168 1.5 christos ext v5.16b,v0.16b,v5.16b,#12 169 1.5 christos eor v4.16b,v4.16b,v5.16b 170 1.1 joerg 171 1.5 christos eor v4.16b,v4.16b,v6.16b 172 1.1 joerg b .Loop256 173 1.1 joerg 174 1.1 joerg .Ldone: 175 1.5 christos str w12,[x2] 176 1.5 christos mov x3,#0 177 1.1 joerg 178 1.1 joerg .Lenc_key_abort: 179 1.5 christos mov x0,x3 // return value 180 1.5 christos ldr x29,[sp],#16 181 1.5 christos ret 182 1.1 joerg .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 183 1.1 joerg 184 1.1 joerg .globl aes_v8_set_decrypt_key 185 1.1 joerg .type aes_v8_set_decrypt_key,%function 186 1.1 joerg .align 5 187 1.1 joerg aes_v8_set_decrypt_key: 188 1.5 christos .inst 0xd503233f // paciasp 189 1.5 christos stp x29,x30,[sp,#-16]! 190 1.5 christos add x29,sp,#0 191 1.1 joerg bl .Lenc_key 192 1.1 joerg 193 1.5 christos cmp x0,#0 194 1.5 christos b.ne .Ldec_key_abort 195 1.1 joerg 196 1.5 christos sub x2,x2,#240 // restore original x2 197 1.5 christos mov x4,#-16 198 1.5 christos add x0,x2,x12,lsl#4 // end of key schedule 199 1.5 christos 200 1.5 christos ld1 {v0.4s},[x2] 201 1.5 christos ld1 {v1.4s},[x0] 202 1.5 christos st1 {v0.4s},[x0],x4 203 1.5 christos st1 {v1.4s},[x2],#16 204 1.1 joerg 205 1.1 joerg .Loop_imc: 206 1.5 christos ld1 {v0.4s},[x2] 207 1.5 christos ld1 {v1.4s},[x0] 208 1.5 christos aesimc v0.16b,v0.16b 209 1.5 christos aesimc v1.16b,v1.16b 210 1.5 christos st1 {v0.4s},[x0],x4 211 1.5 christos st1 {v1.4s},[x2],#16 212 1.5 christos cmp x0,x2 213 1.5 christos b.hi .Loop_imc 214 1.5 christos 215 1.5 christos ld1 {v0.4s},[x2] 216 1.5 christos aesimc v0.16b,v0.16b 217 1.5 christos st1 {v0.4s},[x0] 218 1.1 joerg 219 1.5 christos eor x0,x0,x0 // return value 220 1.1 joerg .Ldec_key_abort: 221 1.5 christos ldp x29,x30,[sp],#16 222 1.5 christos .inst 0xd50323bf // autiasp 223 1.5 christos ret 224 1.1 joerg .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 225 1.1 joerg .globl aes_v8_encrypt 226 1.1 joerg .type aes_v8_encrypt,%function 227 1.1 joerg .align 5 228 1.1 joerg aes_v8_encrypt: 229 1.5 christos ldr w3,[x2,#240] 230 1.5 christos ld1 {v0.4s},[x2],#16 231 1.5 christos ld1 {v2.16b},[x0] 232 1.5 christos sub w3,w3,#2 233 1.5 christos ld1 {v1.4s},[x2],#16 234 1.1 joerg 235 1.1 joerg .Loop_enc: 236 1.5 christos aese v2.16b,v0.16b 237 1.5 christos aesmc v2.16b,v2.16b 238 1.5 christos ld1 {v0.4s},[x2],#16 239 1.5 christos subs w3,w3,#2 240 1.5 christos aese v2.16b,v1.16b 241 1.5 christos aesmc v2.16b,v2.16b 242 1.5 christos ld1 {v1.4s},[x2],#16 243 1.5 christos b.gt .Loop_enc 244 1.5 christos 245 1.5 christos aese v2.16b,v0.16b 246 1.5 christos aesmc v2.16b,v2.16b 247 1.5 christos ld1 {v0.4s},[x2] 248 1.5 christos aese v2.16b,v1.16b 249 1.5 christos eor v2.16b,v2.16b,v0.16b 250 1.1 joerg 251 1.5 christos st1 {v2.16b},[x1] 252 1.5 christos ret 253 1.1 joerg .size aes_v8_encrypt,.-aes_v8_encrypt 254 1.1 joerg .globl aes_v8_decrypt 255 1.1 joerg .type aes_v8_decrypt,%function 256 1.1 joerg .align 5 257 1.1 joerg aes_v8_decrypt: 258 1.5 christos ldr w3,[x2,#240] 259 1.5 christos ld1 {v0.4s},[x2],#16 260 1.5 christos ld1 {v2.16b},[x0] 261 1.5 christos sub w3,w3,#2 262 1.5 christos ld1 {v1.4s},[x2],#16 263 1.1 joerg 264 1.1 joerg .Loop_dec: 265 1.5 christos aesd v2.16b,v0.16b 266 1.5 christos aesimc v2.16b,v2.16b 267 1.5 christos ld1 {v0.4s},[x2],#16 268 1.5 christos subs w3,w3,#2 269 1.5 christos aesd v2.16b,v1.16b 270 1.5 christos aesimc v2.16b,v2.16b 271 1.5 christos ld1 {v1.4s},[x2],#16 272 1.5 christos b.gt .Loop_dec 273 1.5 christos 274 1.5 christos aesd v2.16b,v0.16b 275 1.5 christos aesimc v2.16b,v2.16b 276 1.5 christos ld1 {v0.4s},[x2] 277 1.5 christos aesd v2.16b,v1.16b 278 1.5 christos eor v2.16b,v2.16b,v0.16b 279 1.1 joerg 280 1.5 christos st1 {v2.16b},[x1] 281 1.5 christos ret 282 1.1 joerg .size aes_v8_decrypt,.-aes_v8_decrypt 283 1.4 christos .globl aes_v8_ecb_encrypt 284 1.4 christos .type aes_v8_ecb_encrypt,%function 285 1.4 christos .align 5 286 1.4 christos aes_v8_ecb_encrypt: 287 1.5 christos subs x2,x2,#16 288 1.5 christos // Original input data size bigger than 16, jump to big size processing. 289 1.5 christos b.ne .Lecb_big_size 290 1.5 christos ld1 {v0.16b},[x0] 291 1.5 christos cmp w4,#0 // en- or decrypting? 292 1.5 christos ldr w5,[x3,#240] 293 1.5 christos ld1 {v5.4s,v6.4s},[x3],#32 // load key schedule... 294 1.5 christos 295 1.5 christos b.eq .Lecb_small_dec 296 1.5 christos aese v0.16b,v5.16b 297 1.5 christos aesmc v0.16b,v0.16b 298 1.5 christos ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 299 1.5 christos aese v0.16b,v6.16b 300 1.5 christos aesmc v0.16b,v0.16b 301 1.5 christos subs w5,w5,#10 // if rounds==10, jump to aes-128-ecb processing 302 1.5 christos b.eq .Lecb_128_enc 303 1.5 christos .Lecb_round_loop: 304 1.5 christos aese v0.16b,v16.16b 305 1.5 christos aesmc v0.16b,v0.16b 306 1.5 christos ld1 {v16.4s},[x3],#16 // load key schedule... 307 1.5 christos aese v0.16b,v17.16b 308 1.5 christos aesmc v0.16b,v0.16b 309 1.5 christos ld1 {v17.4s},[x3],#16 // load key schedule... 310 1.5 christos subs w5,w5,#2 // bias 311 1.5 christos b.gt .Lecb_round_loop 312 1.5 christos .Lecb_128_enc: 313 1.5 christos ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 314 1.5 christos aese v0.16b,v16.16b 315 1.5 christos aesmc v0.16b,v0.16b 316 1.5 christos aese v0.16b,v17.16b 317 1.5 christos aesmc v0.16b,v0.16b 318 1.5 christos ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 319 1.5 christos aese v0.16b,v18.16b 320 1.5 christos aesmc v0.16b,v0.16b 321 1.5 christos aese v0.16b,v19.16b 322 1.5 christos aesmc v0.16b,v0.16b 323 1.5 christos ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 324 1.5 christos aese v0.16b,v20.16b 325 1.5 christos aesmc v0.16b,v0.16b 326 1.5 christos aese v0.16b,v21.16b 327 1.5 christos aesmc v0.16b,v0.16b 328 1.5 christos ld1 {v7.4s},[x3] 329 1.5 christos aese v0.16b,v22.16b 330 1.5 christos aesmc v0.16b,v0.16b 331 1.5 christos aese v0.16b,v23.16b 332 1.5 christos eor v0.16b,v0.16b,v7.16b 333 1.5 christos st1 {v0.16b},[x1] 334 1.5 christos b .Lecb_Final_abort 335 1.5 christos .Lecb_small_dec: 336 1.5 christos aesd v0.16b,v5.16b 337 1.5 christos aesimc v0.16b,v0.16b 338 1.5 christos ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 339 1.5 christos aesd v0.16b,v6.16b 340 1.5 christos aesimc v0.16b,v0.16b 341 1.5 christos subs w5,w5,#10 // bias 342 1.5 christos b.eq .Lecb_128_dec 343 1.5 christos .Lecb_dec_round_loop: 344 1.5 christos aesd v0.16b,v16.16b 345 1.5 christos aesimc v0.16b,v0.16b 346 1.5 christos ld1 {v16.4s},[x3],#16 // load key schedule... 347 1.5 christos aesd v0.16b,v17.16b 348 1.5 christos aesimc v0.16b,v0.16b 349 1.5 christos ld1 {v17.4s},[x3],#16 // load key schedule... 350 1.5 christos subs w5,w5,#2 // bias 351 1.5 christos b.gt .Lecb_dec_round_loop 352 1.5 christos .Lecb_128_dec: 353 1.5 christos ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 354 1.5 christos aesd v0.16b,v16.16b 355 1.5 christos aesimc v0.16b,v0.16b 356 1.5 christos aesd v0.16b,v17.16b 357 1.5 christos aesimc v0.16b,v0.16b 358 1.5 christos ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 359 1.5 christos aesd v0.16b,v18.16b 360 1.5 christos aesimc v0.16b,v0.16b 361 1.5 christos aesd v0.16b,v19.16b 362 1.5 christos aesimc v0.16b,v0.16b 363 1.5 christos ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 364 1.5 christos aesd v0.16b,v20.16b 365 1.5 christos aesimc v0.16b,v0.16b 366 1.5 christos aesd v0.16b,v21.16b 367 1.5 christos aesimc v0.16b,v0.16b 368 1.5 christos ld1 {v7.4s},[x3] 369 1.5 christos aesd v0.16b,v22.16b 370 1.5 christos aesimc v0.16b,v0.16b 371 1.5 christos aesd v0.16b,v23.16b 372 1.5 christos eor v0.16b,v0.16b,v7.16b 373 1.5 christos st1 {v0.16b},[x1] 374 1.5 christos b .Lecb_Final_abort 375 1.5 christos .Lecb_big_size: 376 1.5 christos stp x29,x30,[sp,#-16]! 377 1.5 christos add x29,sp,#0 378 1.5 christos mov x8,#16 379 1.5 christos b.lo .Lecb_done 380 1.5 christos csel x8,xzr,x8,eq 381 1.5 christos 382 1.5 christos cmp w4,#0 // en- or decrypting? 383 1.5 christos ldr w5,[x3,#240] 384 1.5 christos and x2,x2,#-16 385 1.5 christos ld1 {v0.16b},[x0],x8 386 1.5 christos 387 1.5 christos ld1 {v16.4s,v17.4s},[x3] // load key schedule... 388 1.5 christos sub w5,w5,#6 389 1.5 christos add x7,x3,x5,lsl#4 // pointer to last 7 round keys 390 1.5 christos sub w5,w5,#2 391 1.5 christos ld1 {v18.4s,v19.4s},[x7],#32 392 1.5 christos ld1 {v20.4s,v21.4s},[x7],#32 393 1.5 christos ld1 {v22.4s,v23.4s},[x7],#32 394 1.5 christos ld1 {v7.4s},[x7] 395 1.5 christos 396 1.5 christos add x7,x3,#32 397 1.5 christos mov w6,w5 398 1.5 christos b.eq .Lecb_dec 399 1.5 christos 400 1.5 christos ld1 {v1.16b},[x0],#16 401 1.5 christos subs x2,x2,#32 // bias 402 1.5 christos add w6,w5,#2 403 1.5 christos orr v3.16b,v1.16b,v1.16b 404 1.5 christos orr v24.16b,v1.16b,v1.16b 405 1.5 christos orr v1.16b,v0.16b,v0.16b 406 1.5 christos b.lo .Lecb_enc_tail 407 1.5 christos 408 1.5 christos orr v1.16b,v3.16b,v3.16b 409 1.5 christos ld1 {v24.16b},[x0],#16 410 1.5 christos cmp x2,#32 411 1.5 christos b.lo .Loop3x_ecb_enc 412 1.5 christos 413 1.5 christos ld1 {v25.16b},[x0],#16 414 1.5 christos ld1 {v26.16b},[x0],#16 415 1.5 christos sub x2,x2,#32 // bias 416 1.5 christos mov w6,w5 417 1.5 christos 418 1.5 christos .Loop5x_ecb_enc: 419 1.5 christos aese v0.16b,v16.16b 420 1.5 christos aesmc v0.16b,v0.16b 421 1.5 christos aese v1.16b,v16.16b 422 1.5 christos aesmc v1.16b,v1.16b 423 1.5 christos aese v24.16b,v16.16b 424 1.5 christos aesmc v24.16b,v24.16b 425 1.5 christos aese v25.16b,v16.16b 426 1.5 christos aesmc v25.16b,v25.16b 427 1.5 christos aese v26.16b,v16.16b 428 1.5 christos aesmc v26.16b,v26.16b 429 1.5 christos ld1 {v16.4s},[x7],#16 430 1.5 christos subs w6,w6,#2 431 1.5 christos aese v0.16b,v17.16b 432 1.5 christos aesmc v0.16b,v0.16b 433 1.5 christos aese v1.16b,v17.16b 434 1.5 christos aesmc v1.16b,v1.16b 435 1.5 christos aese v24.16b,v17.16b 436 1.5 christos aesmc v24.16b,v24.16b 437 1.5 christos aese v25.16b,v17.16b 438 1.5 christos aesmc v25.16b,v25.16b 439 1.5 christos aese v26.16b,v17.16b 440 1.5 christos aesmc v26.16b,v26.16b 441 1.5 christos ld1 {v17.4s},[x7],#16 442 1.5 christos b.gt .Loop5x_ecb_enc 443 1.5 christos 444 1.5 christos aese v0.16b,v16.16b 445 1.5 christos aesmc v0.16b,v0.16b 446 1.5 christos aese v1.16b,v16.16b 447 1.5 christos aesmc v1.16b,v1.16b 448 1.5 christos aese v24.16b,v16.16b 449 1.5 christos aesmc v24.16b,v24.16b 450 1.5 christos aese v25.16b,v16.16b 451 1.5 christos aesmc v25.16b,v25.16b 452 1.5 christos aese v26.16b,v16.16b 453 1.5 christos aesmc v26.16b,v26.16b 454 1.5 christos cmp x2,#0x40 // because .Lecb_enc_tail4x 455 1.5 christos sub x2,x2,#0x50 456 1.5 christos 457 1.5 christos aese v0.16b,v17.16b 458 1.5 christos aesmc v0.16b,v0.16b 459 1.5 christos aese v1.16b,v17.16b 460 1.5 christos aesmc v1.16b,v1.16b 461 1.5 christos aese v24.16b,v17.16b 462 1.5 christos aesmc v24.16b,v24.16b 463 1.5 christos aese v25.16b,v17.16b 464 1.5 christos aesmc v25.16b,v25.16b 465 1.5 christos aese v26.16b,v17.16b 466 1.5 christos aesmc v26.16b,v26.16b 467 1.5 christos csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 468 1.5 christos mov x7,x3 469 1.5 christos 470 1.5 christos aese v0.16b,v18.16b 471 1.5 christos aesmc v0.16b,v0.16b 472 1.5 christos aese v1.16b,v18.16b 473 1.5 christos aesmc v1.16b,v1.16b 474 1.5 christos aese v24.16b,v18.16b 475 1.5 christos aesmc v24.16b,v24.16b 476 1.5 christos aese v25.16b,v18.16b 477 1.5 christos aesmc v25.16b,v25.16b 478 1.5 christos aese v26.16b,v18.16b 479 1.5 christos aesmc v26.16b,v26.16b 480 1.5 christos add x0,x0,x6 // x0 is adjusted in such way that 481 1.5 christos // at exit from the loop v1.16b-v26.16b 482 1.5 christos // are loaded with last "words" 483 1.5 christos add x6,x2,#0x60 // because .Lecb_enc_tail4x 484 1.5 christos 485 1.5 christos aese v0.16b,v19.16b 486 1.5 christos aesmc v0.16b,v0.16b 487 1.5 christos aese v1.16b,v19.16b 488 1.5 christos aesmc v1.16b,v1.16b 489 1.5 christos aese v24.16b,v19.16b 490 1.5 christos aesmc v24.16b,v24.16b 491 1.5 christos aese v25.16b,v19.16b 492 1.5 christos aesmc v25.16b,v25.16b 493 1.5 christos aese v26.16b,v19.16b 494 1.5 christos aesmc v26.16b,v26.16b 495 1.5 christos 496 1.5 christos aese v0.16b,v20.16b 497 1.5 christos aesmc v0.16b,v0.16b 498 1.5 christos aese v1.16b,v20.16b 499 1.5 christos aesmc v1.16b,v1.16b 500 1.5 christos aese v24.16b,v20.16b 501 1.5 christos aesmc v24.16b,v24.16b 502 1.5 christos aese v25.16b,v20.16b 503 1.5 christos aesmc v25.16b,v25.16b 504 1.5 christos aese v26.16b,v20.16b 505 1.5 christos aesmc v26.16b,v26.16b 506 1.5 christos 507 1.5 christos aese v0.16b,v21.16b 508 1.5 christos aesmc v0.16b,v0.16b 509 1.5 christos aese v1.16b,v21.16b 510 1.5 christos aesmc v1.16b,v1.16b 511 1.5 christos aese v24.16b,v21.16b 512 1.5 christos aesmc v24.16b,v24.16b 513 1.5 christos aese v25.16b,v21.16b 514 1.5 christos aesmc v25.16b,v25.16b 515 1.5 christos aese v26.16b,v21.16b 516 1.5 christos aesmc v26.16b,v26.16b 517 1.5 christos 518 1.5 christos aese v0.16b,v22.16b 519 1.5 christos aesmc v0.16b,v0.16b 520 1.5 christos aese v1.16b,v22.16b 521 1.5 christos aesmc v1.16b,v1.16b 522 1.5 christos aese v24.16b,v22.16b 523 1.5 christos aesmc v24.16b,v24.16b 524 1.5 christos aese v25.16b,v22.16b 525 1.5 christos aesmc v25.16b,v25.16b 526 1.5 christos aese v26.16b,v22.16b 527 1.5 christos aesmc v26.16b,v26.16b 528 1.5 christos 529 1.5 christos aese v0.16b,v23.16b 530 1.5 christos ld1 {v2.16b},[x0],#16 531 1.5 christos aese v1.16b,v23.16b 532 1.5 christos ld1 {v3.16b},[x0],#16 533 1.5 christos aese v24.16b,v23.16b 534 1.5 christos ld1 {v27.16b},[x0],#16 535 1.5 christos aese v25.16b,v23.16b 536 1.5 christos ld1 {v28.16b},[x0],#16 537 1.5 christos aese v26.16b,v23.16b 538 1.5 christos ld1 {v29.16b},[x0],#16 539 1.5 christos cbz x6,.Lecb_enc_tail4x 540 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 541 1.5 christos eor v4.16b,v7.16b,v0.16b 542 1.5 christos orr v0.16b,v2.16b,v2.16b 543 1.5 christos eor v5.16b,v7.16b,v1.16b 544 1.5 christos orr v1.16b,v3.16b,v3.16b 545 1.5 christos eor v17.16b,v7.16b,v24.16b 546 1.5 christos orr v24.16b,v27.16b,v27.16b 547 1.5 christos eor v30.16b,v7.16b,v25.16b 548 1.5 christos orr v25.16b,v28.16b,v28.16b 549 1.5 christos eor v31.16b,v7.16b,v26.16b 550 1.5 christos st1 {v4.16b},[x1],#16 551 1.5 christos orr v26.16b,v29.16b,v29.16b 552 1.5 christos st1 {v5.16b},[x1],#16 553 1.5 christos mov w6,w5 554 1.5 christos st1 {v17.16b},[x1],#16 555 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 556 1.5 christos st1 {v30.16b},[x1],#16 557 1.5 christos st1 {v31.16b},[x1],#16 558 1.5 christos b.hs .Loop5x_ecb_enc 559 1.5 christos 560 1.5 christos add x2,x2,#0x50 561 1.5 christos cbz x2,.Lecb_done 562 1.5 christos 563 1.5 christos add w6,w5,#2 564 1.5 christos subs x2,x2,#0x30 565 1.5 christos orr v0.16b,v27.16b,v27.16b 566 1.5 christos orr v1.16b,v28.16b,v28.16b 567 1.5 christos orr v24.16b,v29.16b,v29.16b 568 1.5 christos b.lo .Lecb_enc_tail 569 1.5 christos 570 1.5 christos b .Loop3x_ecb_enc 571 1.5 christos 572 1.5 christos .align 4 573 1.5 christos .Lecb_enc_tail4x: 574 1.5 christos eor v5.16b,v7.16b,v1.16b 575 1.5 christos eor v17.16b,v7.16b,v24.16b 576 1.5 christos eor v30.16b,v7.16b,v25.16b 577 1.5 christos eor v31.16b,v7.16b,v26.16b 578 1.5 christos st1 {v5.16b},[x1],#16 579 1.5 christos st1 {v17.16b},[x1],#16 580 1.5 christos st1 {v30.16b},[x1],#16 581 1.5 christos st1 {v31.16b},[x1],#16 582 1.4 christos 583 1.5 christos b .Lecb_done 584 1.5 christos .align 4 585 1.4 christos .Loop3x_ecb_enc: 586 1.5 christos aese v0.16b,v16.16b 587 1.5 christos aesmc v0.16b,v0.16b 588 1.5 christos aese v1.16b,v16.16b 589 1.5 christos aesmc v1.16b,v1.16b 590 1.5 christos aese v24.16b,v16.16b 591 1.5 christos aesmc v24.16b,v24.16b 592 1.5 christos ld1 {v16.4s},[x7],#16 593 1.5 christos subs w6,w6,#2 594 1.5 christos aese v0.16b,v17.16b 595 1.5 christos aesmc v0.16b,v0.16b 596 1.5 christos aese v1.16b,v17.16b 597 1.5 christos aesmc v1.16b,v1.16b 598 1.5 christos aese v24.16b,v17.16b 599 1.5 christos aesmc v24.16b,v24.16b 600 1.5 christos ld1 {v17.4s},[x7],#16 601 1.5 christos b.gt .Loop3x_ecb_enc 602 1.5 christos 603 1.5 christos aese v0.16b,v16.16b 604 1.5 christos aesmc v0.16b,v0.16b 605 1.5 christos aese v1.16b,v16.16b 606 1.5 christos aesmc v1.16b,v1.16b 607 1.5 christos aese v24.16b,v16.16b 608 1.5 christos aesmc v24.16b,v24.16b 609 1.5 christos subs x2,x2,#0x30 610 1.5 christos csel x6,x2,x6,lo // x6, w6, is zero at this point 611 1.5 christos aese v0.16b,v17.16b 612 1.5 christos aesmc v0.16b,v0.16b 613 1.5 christos aese v1.16b,v17.16b 614 1.5 christos aesmc v1.16b,v1.16b 615 1.5 christos aese v24.16b,v17.16b 616 1.5 christos aesmc v24.16b,v24.16b 617 1.5 christos add x0,x0,x6 // x0 is adjusted in such way that 618 1.5 christos // at exit from the loop v1.16b-v24.16b 619 1.5 christos // are loaded with last "words" 620 1.5 christos mov x7,x3 621 1.5 christos aese v0.16b,v20.16b 622 1.5 christos aesmc v0.16b,v0.16b 623 1.5 christos aese v1.16b,v20.16b 624 1.5 christos aesmc v1.16b,v1.16b 625 1.5 christos aese v24.16b,v20.16b 626 1.5 christos aesmc v24.16b,v24.16b 627 1.5 christos ld1 {v2.16b},[x0],#16 628 1.5 christos aese v0.16b,v21.16b 629 1.5 christos aesmc v0.16b,v0.16b 630 1.5 christos aese v1.16b,v21.16b 631 1.5 christos aesmc v1.16b,v1.16b 632 1.5 christos aese v24.16b,v21.16b 633 1.5 christos aesmc v24.16b,v24.16b 634 1.5 christos ld1 {v3.16b},[x0],#16 635 1.5 christos aese v0.16b,v22.16b 636 1.5 christos aesmc v0.16b,v0.16b 637 1.5 christos aese v1.16b,v22.16b 638 1.5 christos aesmc v1.16b,v1.16b 639 1.5 christos aese v24.16b,v22.16b 640 1.5 christos aesmc v24.16b,v24.16b 641 1.5 christos ld1 {v27.16b},[x0],#16 642 1.5 christos aese v0.16b,v23.16b 643 1.5 christos aese v1.16b,v23.16b 644 1.5 christos aese v24.16b,v23.16b 645 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 646 1.5 christos add w6,w5,#2 647 1.5 christos eor v4.16b,v7.16b,v0.16b 648 1.5 christos eor v5.16b,v7.16b,v1.16b 649 1.5 christos eor v24.16b,v24.16b,v7.16b 650 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 651 1.5 christos st1 {v4.16b},[x1],#16 652 1.5 christos orr v0.16b,v2.16b,v2.16b 653 1.5 christos st1 {v5.16b},[x1],#16 654 1.5 christos orr v1.16b,v3.16b,v3.16b 655 1.5 christos st1 {v24.16b},[x1],#16 656 1.5 christos orr v24.16b,v27.16b,v27.16b 657 1.5 christos b.hs .Loop3x_ecb_enc 658 1.4 christos 659 1.5 christos cmn x2,#0x30 660 1.5 christos b.eq .Lecb_done 661 1.4 christos nop 662 1.4 christos 663 1.4 christos .Lecb_enc_tail: 664 1.5 christos aese v1.16b,v16.16b 665 1.5 christos aesmc v1.16b,v1.16b 666 1.5 christos aese v24.16b,v16.16b 667 1.5 christos aesmc v24.16b,v24.16b 668 1.5 christos ld1 {v16.4s},[x7],#16 669 1.5 christos subs w6,w6,#2 670 1.5 christos aese v1.16b,v17.16b 671 1.5 christos aesmc v1.16b,v1.16b 672 1.5 christos aese v24.16b,v17.16b 673 1.5 christos aesmc v24.16b,v24.16b 674 1.5 christos ld1 {v17.4s},[x7],#16 675 1.5 christos b.gt .Lecb_enc_tail 676 1.5 christos 677 1.5 christos aese v1.16b,v16.16b 678 1.5 christos aesmc v1.16b,v1.16b 679 1.5 christos aese v24.16b,v16.16b 680 1.5 christos aesmc v24.16b,v24.16b 681 1.5 christos aese v1.16b,v17.16b 682 1.5 christos aesmc v1.16b,v1.16b 683 1.5 christos aese v24.16b,v17.16b 684 1.5 christos aesmc v24.16b,v24.16b 685 1.5 christos aese v1.16b,v20.16b 686 1.5 christos aesmc v1.16b,v1.16b 687 1.5 christos aese v24.16b,v20.16b 688 1.5 christos aesmc v24.16b,v24.16b 689 1.5 christos cmn x2,#0x20 690 1.5 christos aese v1.16b,v21.16b 691 1.5 christos aesmc v1.16b,v1.16b 692 1.5 christos aese v24.16b,v21.16b 693 1.5 christos aesmc v24.16b,v24.16b 694 1.5 christos aese v1.16b,v22.16b 695 1.5 christos aesmc v1.16b,v1.16b 696 1.5 christos aese v24.16b,v22.16b 697 1.5 christos aesmc v24.16b,v24.16b 698 1.5 christos aese v1.16b,v23.16b 699 1.5 christos aese v24.16b,v23.16b 700 1.5 christos b.eq .Lecb_enc_one 701 1.5 christos eor v5.16b,v7.16b,v1.16b 702 1.5 christos eor v17.16b,v7.16b,v24.16b 703 1.5 christos st1 {v5.16b},[x1],#16 704 1.5 christos st1 {v17.16b},[x1],#16 705 1.4 christos b .Lecb_done 706 1.4 christos 707 1.4 christos .Lecb_enc_one: 708 1.5 christos eor v5.16b,v7.16b,v24.16b 709 1.5 christos st1 {v5.16b},[x1],#16 710 1.4 christos b .Lecb_done 711 1.4 christos .align 5 712 1.4 christos .Lecb_dec: 713 1.5 christos ld1 {v1.16b},[x0],#16 714 1.5 christos subs x2,x2,#32 // bias 715 1.5 christos add w6,w5,#2 716 1.5 christos orr v3.16b,v1.16b,v1.16b 717 1.5 christos orr v24.16b,v1.16b,v1.16b 718 1.5 christos orr v1.16b,v0.16b,v0.16b 719 1.5 christos b.lo .Lecb_dec_tail 720 1.5 christos 721 1.5 christos orr v1.16b,v3.16b,v3.16b 722 1.5 christos ld1 {v24.16b},[x0],#16 723 1.5 christos cmp x2,#32 724 1.5 christos b.lo .Loop3x_ecb_dec 725 1.5 christos 726 1.5 christos ld1 {v25.16b},[x0],#16 727 1.5 christos ld1 {v26.16b},[x0],#16 728 1.5 christos sub x2,x2,#32 // bias 729 1.5 christos mov w6,w5 730 1.5 christos 731 1.5 christos .Loop5x_ecb_dec: 732 1.5 christos aesd v0.16b,v16.16b 733 1.5 christos aesimc v0.16b,v0.16b 734 1.5 christos aesd v1.16b,v16.16b 735 1.5 christos aesimc v1.16b,v1.16b 736 1.5 christos aesd v24.16b,v16.16b 737 1.5 christos aesimc v24.16b,v24.16b 738 1.5 christos aesd v25.16b,v16.16b 739 1.5 christos aesimc v25.16b,v25.16b 740 1.5 christos aesd v26.16b,v16.16b 741 1.5 christos aesimc v26.16b,v26.16b 742 1.5 christos ld1 {v16.4s},[x7],#16 743 1.5 christos subs w6,w6,#2 744 1.5 christos aesd v0.16b,v17.16b 745 1.5 christos aesimc v0.16b,v0.16b 746 1.5 christos aesd v1.16b,v17.16b 747 1.5 christos aesimc v1.16b,v1.16b 748 1.5 christos aesd v24.16b,v17.16b 749 1.5 christos aesimc v24.16b,v24.16b 750 1.5 christos aesd v25.16b,v17.16b 751 1.5 christos aesimc v25.16b,v25.16b 752 1.5 christos aesd v26.16b,v17.16b 753 1.5 christos aesimc v26.16b,v26.16b 754 1.5 christos ld1 {v17.4s},[x7],#16 755 1.5 christos b.gt .Loop5x_ecb_dec 756 1.5 christos 757 1.5 christos aesd v0.16b,v16.16b 758 1.5 christos aesimc v0.16b,v0.16b 759 1.5 christos aesd v1.16b,v16.16b 760 1.5 christos aesimc v1.16b,v1.16b 761 1.5 christos aesd v24.16b,v16.16b 762 1.5 christos aesimc v24.16b,v24.16b 763 1.5 christos aesd v25.16b,v16.16b 764 1.5 christos aesimc v25.16b,v25.16b 765 1.5 christos aesd v26.16b,v16.16b 766 1.5 christos aesimc v26.16b,v26.16b 767 1.5 christos cmp x2,#0x40 // because .Lecb_tail4x 768 1.5 christos sub x2,x2,#0x50 769 1.5 christos 770 1.5 christos aesd v0.16b,v17.16b 771 1.5 christos aesimc v0.16b,v0.16b 772 1.5 christos aesd v1.16b,v17.16b 773 1.5 christos aesimc v1.16b,v1.16b 774 1.5 christos aesd v24.16b,v17.16b 775 1.5 christos aesimc v24.16b,v24.16b 776 1.5 christos aesd v25.16b,v17.16b 777 1.5 christos aesimc v25.16b,v25.16b 778 1.5 christos aesd v26.16b,v17.16b 779 1.5 christos aesimc v26.16b,v26.16b 780 1.5 christos csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 781 1.5 christos mov x7,x3 782 1.5 christos 783 1.5 christos aesd v0.16b,v18.16b 784 1.5 christos aesimc v0.16b,v0.16b 785 1.5 christos aesd v1.16b,v18.16b 786 1.5 christos aesimc v1.16b,v1.16b 787 1.5 christos aesd v24.16b,v18.16b 788 1.5 christos aesimc v24.16b,v24.16b 789 1.5 christos aesd v25.16b,v18.16b 790 1.5 christos aesimc v25.16b,v25.16b 791 1.5 christos aesd v26.16b,v18.16b 792 1.5 christos aesimc v26.16b,v26.16b 793 1.5 christos add x0,x0,x6 // x0 is adjusted in such way that 794 1.5 christos // at exit from the loop v1.16b-v26.16b 795 1.5 christos // are loaded with last "words" 796 1.5 christos add x6,x2,#0x60 // because .Lecb_tail4x 797 1.5 christos 798 1.5 christos aesd v0.16b,v19.16b 799 1.5 christos aesimc v0.16b,v0.16b 800 1.5 christos aesd v1.16b,v19.16b 801 1.5 christos aesimc v1.16b,v1.16b 802 1.5 christos aesd v24.16b,v19.16b 803 1.5 christos aesimc v24.16b,v24.16b 804 1.5 christos aesd v25.16b,v19.16b 805 1.5 christos aesimc v25.16b,v25.16b 806 1.5 christos aesd v26.16b,v19.16b 807 1.5 christos aesimc v26.16b,v26.16b 808 1.5 christos 809 1.5 christos aesd v0.16b,v20.16b 810 1.5 christos aesimc v0.16b,v0.16b 811 1.5 christos aesd v1.16b,v20.16b 812 1.5 christos aesimc v1.16b,v1.16b 813 1.5 christos aesd v24.16b,v20.16b 814 1.5 christos aesimc v24.16b,v24.16b 815 1.5 christos aesd v25.16b,v20.16b 816 1.5 christos aesimc v25.16b,v25.16b 817 1.5 christos aesd v26.16b,v20.16b 818 1.5 christos aesimc v26.16b,v26.16b 819 1.5 christos 820 1.5 christos aesd v0.16b,v21.16b 821 1.5 christos aesimc v0.16b,v0.16b 822 1.5 christos aesd v1.16b,v21.16b 823 1.5 christos aesimc v1.16b,v1.16b 824 1.5 christos aesd v24.16b,v21.16b 825 1.5 christos aesimc v24.16b,v24.16b 826 1.5 christos aesd v25.16b,v21.16b 827 1.5 christos aesimc v25.16b,v25.16b 828 1.5 christos aesd v26.16b,v21.16b 829 1.5 christos aesimc v26.16b,v26.16b 830 1.5 christos 831 1.5 christos aesd v0.16b,v22.16b 832 1.5 christos aesimc v0.16b,v0.16b 833 1.5 christos aesd v1.16b,v22.16b 834 1.5 christos aesimc v1.16b,v1.16b 835 1.5 christos aesd v24.16b,v22.16b 836 1.5 christos aesimc v24.16b,v24.16b 837 1.5 christos aesd v25.16b,v22.16b 838 1.5 christos aesimc v25.16b,v25.16b 839 1.5 christos aesd v26.16b,v22.16b 840 1.5 christos aesimc v26.16b,v26.16b 841 1.5 christos 842 1.5 christos aesd v0.16b,v23.16b 843 1.5 christos ld1 {v2.16b},[x0],#16 844 1.5 christos aesd v1.16b,v23.16b 845 1.5 christos ld1 {v3.16b},[x0],#16 846 1.5 christos aesd v24.16b,v23.16b 847 1.5 christos ld1 {v27.16b},[x0],#16 848 1.5 christos aesd v25.16b,v23.16b 849 1.5 christos ld1 {v28.16b},[x0],#16 850 1.5 christos aesd v26.16b,v23.16b 851 1.5 christos ld1 {v29.16b},[x0],#16 852 1.5 christos cbz x6,.Lecb_tail4x 853 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 854 1.5 christos eor v4.16b,v7.16b,v0.16b 855 1.5 christos orr v0.16b,v2.16b,v2.16b 856 1.5 christos eor v5.16b,v7.16b,v1.16b 857 1.5 christos orr v1.16b,v3.16b,v3.16b 858 1.5 christos eor v17.16b,v7.16b,v24.16b 859 1.5 christos orr v24.16b,v27.16b,v27.16b 860 1.5 christos eor v30.16b,v7.16b,v25.16b 861 1.5 christos orr v25.16b,v28.16b,v28.16b 862 1.5 christos eor v31.16b,v7.16b,v26.16b 863 1.5 christos st1 {v4.16b},[x1],#16 864 1.5 christos orr v26.16b,v29.16b,v29.16b 865 1.5 christos st1 {v5.16b},[x1],#16 866 1.5 christos mov w6,w5 867 1.5 christos st1 {v17.16b},[x1],#16 868 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 869 1.5 christos st1 {v30.16b},[x1],#16 870 1.5 christos st1 {v31.16b},[x1],#16 871 1.5 christos b.hs .Loop5x_ecb_dec 872 1.5 christos 873 1.5 christos add x2,x2,#0x50 874 1.5 christos cbz x2,.Lecb_done 875 1.5 christos 876 1.5 christos add w6,w5,#2 877 1.5 christos subs x2,x2,#0x30 878 1.5 christos orr v0.16b,v27.16b,v27.16b 879 1.5 christos orr v1.16b,v28.16b,v28.16b 880 1.5 christos orr v24.16b,v29.16b,v29.16b 881 1.5 christos b.lo .Lecb_dec_tail 882 1.5 christos 883 1.5 christos b .Loop3x_ecb_dec 884 1.5 christos 885 1.5 christos .align 4 886 1.5 christos .Lecb_tail4x: 887 1.5 christos eor v5.16b,v7.16b,v1.16b 888 1.5 christos eor v17.16b,v7.16b,v24.16b 889 1.5 christos eor v30.16b,v7.16b,v25.16b 890 1.5 christos eor v31.16b,v7.16b,v26.16b 891 1.5 christos st1 {v5.16b},[x1],#16 892 1.5 christos st1 {v17.16b},[x1],#16 893 1.5 christos st1 {v30.16b},[x1],#16 894 1.5 christos st1 {v31.16b},[x1],#16 895 1.4 christos 896 1.5 christos b .Lecb_done 897 1.5 christos .align 4 898 1.4 christos .Loop3x_ecb_dec: 899 1.5 christos aesd v0.16b,v16.16b 900 1.5 christos aesimc v0.16b,v0.16b 901 1.5 christos aesd v1.16b,v16.16b 902 1.5 christos aesimc v1.16b,v1.16b 903 1.5 christos aesd v24.16b,v16.16b 904 1.5 christos aesimc v24.16b,v24.16b 905 1.5 christos ld1 {v16.4s},[x7],#16 906 1.5 christos subs w6,w6,#2 907 1.5 christos aesd v0.16b,v17.16b 908 1.5 christos aesimc v0.16b,v0.16b 909 1.5 christos aesd v1.16b,v17.16b 910 1.5 christos aesimc v1.16b,v1.16b 911 1.5 christos aesd v24.16b,v17.16b 912 1.5 christos aesimc v24.16b,v24.16b 913 1.5 christos ld1 {v17.4s},[x7],#16 914 1.5 christos b.gt .Loop3x_ecb_dec 915 1.5 christos 916 1.5 christos aesd v0.16b,v16.16b 917 1.5 christos aesimc v0.16b,v0.16b 918 1.5 christos aesd v1.16b,v16.16b 919 1.5 christos aesimc v1.16b,v1.16b 920 1.5 christos aesd v24.16b,v16.16b 921 1.5 christos aesimc v24.16b,v24.16b 922 1.5 christos subs x2,x2,#0x30 923 1.5 christos csel x6,x2,x6,lo // x6, w6, is zero at this point 924 1.5 christos aesd v0.16b,v17.16b 925 1.5 christos aesimc v0.16b,v0.16b 926 1.5 christos aesd v1.16b,v17.16b 927 1.5 christos aesimc v1.16b,v1.16b 928 1.5 christos aesd v24.16b,v17.16b 929 1.5 christos aesimc v24.16b,v24.16b 930 1.5 christos add x0,x0,x6 // x0 is adjusted in such way that 931 1.5 christos // at exit from the loop v1.16b-v24.16b 932 1.5 christos // are loaded with last "words" 933 1.5 christos mov x7,x3 934 1.5 christos aesd v0.16b,v20.16b 935 1.5 christos aesimc v0.16b,v0.16b 936 1.5 christos aesd v1.16b,v20.16b 937 1.5 christos aesimc v1.16b,v1.16b 938 1.5 christos aesd v24.16b,v20.16b 939 1.5 christos aesimc v24.16b,v24.16b 940 1.5 christos ld1 {v2.16b},[x0],#16 941 1.5 christos aesd v0.16b,v21.16b 942 1.5 christos aesimc v0.16b,v0.16b 943 1.5 christos aesd v1.16b,v21.16b 944 1.5 christos aesimc v1.16b,v1.16b 945 1.5 christos aesd v24.16b,v21.16b 946 1.5 christos aesimc v24.16b,v24.16b 947 1.5 christos ld1 {v3.16b},[x0],#16 948 1.5 christos aesd v0.16b,v22.16b 949 1.5 christos aesimc v0.16b,v0.16b 950 1.5 christos aesd v1.16b,v22.16b 951 1.5 christos aesimc v1.16b,v1.16b 952 1.5 christos aesd v24.16b,v22.16b 953 1.5 christos aesimc v24.16b,v24.16b 954 1.5 christos ld1 {v27.16b},[x0],#16 955 1.5 christos aesd v0.16b,v23.16b 956 1.5 christos aesd v1.16b,v23.16b 957 1.5 christos aesd v24.16b,v23.16b 958 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 959 1.5 christos add w6,w5,#2 960 1.5 christos eor v4.16b,v7.16b,v0.16b 961 1.5 christos eor v5.16b,v7.16b,v1.16b 962 1.5 christos eor v24.16b,v24.16b,v7.16b 963 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 964 1.5 christos st1 {v4.16b},[x1],#16 965 1.5 christos orr v0.16b,v2.16b,v2.16b 966 1.5 christos st1 {v5.16b},[x1],#16 967 1.5 christos orr v1.16b,v3.16b,v3.16b 968 1.5 christos st1 {v24.16b},[x1],#16 969 1.5 christos orr v24.16b,v27.16b,v27.16b 970 1.5 christos b.hs .Loop3x_ecb_dec 971 1.4 christos 972 1.5 christos cmn x2,#0x30 973 1.5 christos b.eq .Lecb_done 974 1.4 christos nop 975 1.4 christos 976 1.4 christos .Lecb_dec_tail: 977 1.5 christos aesd v1.16b,v16.16b 978 1.5 christos aesimc v1.16b,v1.16b 979 1.5 christos aesd v24.16b,v16.16b 980 1.5 christos aesimc v24.16b,v24.16b 981 1.5 christos ld1 {v16.4s},[x7],#16 982 1.5 christos subs w6,w6,#2 983 1.5 christos aesd v1.16b,v17.16b 984 1.5 christos aesimc v1.16b,v1.16b 985 1.5 christos aesd v24.16b,v17.16b 986 1.5 christos aesimc v24.16b,v24.16b 987 1.5 christos ld1 {v17.4s},[x7],#16 988 1.5 christos b.gt .Lecb_dec_tail 989 1.5 christos 990 1.5 christos aesd v1.16b,v16.16b 991 1.5 christos aesimc v1.16b,v1.16b 992 1.5 christos aesd v24.16b,v16.16b 993 1.5 christos aesimc v24.16b,v24.16b 994 1.5 christos aesd v1.16b,v17.16b 995 1.5 christos aesimc v1.16b,v1.16b 996 1.5 christos aesd v24.16b,v17.16b 997 1.5 christos aesimc v24.16b,v24.16b 998 1.5 christos aesd v1.16b,v20.16b 999 1.5 christos aesimc v1.16b,v1.16b 1000 1.5 christos aesd v24.16b,v20.16b 1001 1.5 christos aesimc v24.16b,v24.16b 1002 1.5 christos cmn x2,#0x20 1003 1.5 christos aesd v1.16b,v21.16b 1004 1.5 christos aesimc v1.16b,v1.16b 1005 1.5 christos aesd v24.16b,v21.16b 1006 1.5 christos aesimc v24.16b,v24.16b 1007 1.5 christos aesd v1.16b,v22.16b 1008 1.5 christos aesimc v1.16b,v1.16b 1009 1.5 christos aesd v24.16b,v22.16b 1010 1.5 christos aesimc v24.16b,v24.16b 1011 1.5 christos aesd v1.16b,v23.16b 1012 1.5 christos aesd v24.16b,v23.16b 1013 1.5 christos b.eq .Lecb_dec_one 1014 1.5 christos eor v5.16b,v7.16b,v1.16b 1015 1.5 christos eor v17.16b,v7.16b,v24.16b 1016 1.5 christos st1 {v5.16b},[x1],#16 1017 1.5 christos st1 {v17.16b},[x1],#16 1018 1.4 christos b .Lecb_done 1019 1.4 christos 1020 1.4 christos .Lecb_dec_one: 1021 1.5 christos eor v5.16b,v7.16b,v24.16b 1022 1.5 christos st1 {v5.16b},[x1],#16 1023 1.4 christos 1024 1.4 christos .Lecb_done: 1025 1.5 christos ldr x29,[sp],#16 1026 1.5 christos .Lecb_Final_abort: 1027 1.5 christos ret 1028 1.4 christos .size aes_v8_ecb_encrypt,.-aes_v8_ecb_encrypt 1029 1.1 joerg .globl aes_v8_cbc_encrypt 1030 1.1 joerg .type aes_v8_cbc_encrypt,%function 1031 1.1 joerg .align 5 1032 1.1 joerg aes_v8_cbc_encrypt: 1033 1.5 christos stp x29,x30,[sp,#-16]! 1034 1.5 christos add x29,sp,#0 1035 1.5 christos subs x2,x2,#16 1036 1.5 christos mov x8,#16 1037 1.5 christos b.lo .Lcbc_abort 1038 1.5 christos csel x8,xzr,x8,eq 1039 1.5 christos 1040 1.5 christos cmp w5,#0 // en- or decrypting? 1041 1.5 christos ldr w5,[x3,#240] 1042 1.5 christos and x2,x2,#-16 1043 1.5 christos ld1 {v6.16b},[x4] 1044 1.5 christos ld1 {v0.16b},[x0],x8 1045 1.5 christos 1046 1.5 christos ld1 {v16.4s,v17.4s},[x3] // load key schedule... 1047 1.5 christos sub w5,w5,#6 1048 1.5 christos add x7,x3,x5,lsl#4 // pointer to last 7 round keys 1049 1.5 christos sub w5,w5,#2 1050 1.5 christos ld1 {v18.4s,v19.4s},[x7],#32 1051 1.5 christos ld1 {v20.4s,v21.4s},[x7],#32 1052 1.5 christos ld1 {v22.4s,v23.4s},[x7],#32 1053 1.5 christos ld1 {v7.4s},[x7] 1054 1.5 christos 1055 1.5 christos add x7,x3,#32 1056 1.5 christos mov w6,w5 1057 1.5 christos b.eq .Lcbc_dec 1058 1.5 christos 1059 1.5 christos cmp w5,#2 1060 1.5 christos eor v0.16b,v0.16b,v6.16b 1061 1.5 christos eor v5.16b,v16.16b,v7.16b 1062 1.5 christos b.eq .Lcbc_enc128 1063 1.5 christos 1064 1.5 christos ld1 {v2.4s,v3.4s},[x7] 1065 1.5 christos add x7,x3,#16 1066 1.5 christos add x6,x3,#16*4 1067 1.5 christos add x12,x3,#16*5 1068 1.5 christos aese v0.16b,v16.16b 1069 1.5 christos aesmc v0.16b,v0.16b 1070 1.5 christos add x14,x3,#16*6 1071 1.5 christos add x3,x3,#16*7 1072 1.1 joerg b .Lenter_cbc_enc 1073 1.1 joerg 1074 1.1 joerg .align 4 1075 1.1 joerg .Loop_cbc_enc: 1076 1.5 christos aese v0.16b,v16.16b 1077 1.5 christos aesmc v0.16b,v0.16b 1078 1.5 christos st1 {v6.16b},[x1],#16 1079 1.1 joerg .Lenter_cbc_enc: 1080 1.5 christos aese v0.16b,v17.16b 1081 1.5 christos aesmc v0.16b,v0.16b 1082 1.5 christos aese v0.16b,v2.16b 1083 1.5 christos aesmc v0.16b,v0.16b 1084 1.5 christos ld1 {v16.4s},[x6] 1085 1.5 christos cmp w5,#4 1086 1.5 christos aese v0.16b,v3.16b 1087 1.5 christos aesmc v0.16b,v0.16b 1088 1.5 christos ld1 {v17.4s},[x12] 1089 1.5 christos b.eq .Lcbc_enc192 1090 1.5 christos 1091 1.5 christos aese v0.16b,v16.16b 1092 1.5 christos aesmc v0.16b,v0.16b 1093 1.5 christos ld1 {v16.4s},[x14] 1094 1.5 christos aese v0.16b,v17.16b 1095 1.5 christos aesmc v0.16b,v0.16b 1096 1.5 christos ld1 {v17.4s},[x3] 1097 1.1 joerg nop 1098 1.1 joerg 1099 1.1 joerg .Lcbc_enc192: 1100 1.5 christos aese v0.16b,v16.16b 1101 1.5 christos aesmc v0.16b,v0.16b 1102 1.5 christos subs x2,x2,#16 1103 1.5 christos aese v0.16b,v17.16b 1104 1.5 christos aesmc v0.16b,v0.16b 1105 1.5 christos csel x8,xzr,x8,eq 1106 1.5 christos aese v0.16b,v18.16b 1107 1.5 christos aesmc v0.16b,v0.16b 1108 1.5 christos aese v0.16b,v19.16b 1109 1.5 christos aesmc v0.16b,v0.16b 1110 1.5 christos ld1 {v16.16b},[x0],x8 1111 1.5 christos aese v0.16b,v20.16b 1112 1.5 christos aesmc v0.16b,v0.16b 1113 1.5 christos eor v16.16b,v16.16b,v5.16b 1114 1.5 christos aese v0.16b,v21.16b 1115 1.5 christos aesmc v0.16b,v0.16b 1116 1.5 christos ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 1117 1.5 christos aese v0.16b,v22.16b 1118 1.5 christos aesmc v0.16b,v0.16b 1119 1.5 christos aese v0.16b,v23.16b 1120 1.5 christos eor v6.16b,v0.16b,v7.16b 1121 1.5 christos b.hs .Loop_cbc_enc 1122 1.1 joerg 1123 1.5 christos st1 {v6.16b},[x1],#16 1124 1.1 joerg b .Lcbc_done 1125 1.1 joerg 1126 1.1 joerg .align 5 1127 1.1 joerg .Lcbc_enc128: 1128 1.5 christos ld1 {v2.4s,v3.4s},[x7] 1129 1.5 christos aese v0.16b,v16.16b 1130 1.5 christos aesmc v0.16b,v0.16b 1131 1.1 joerg b .Lenter_cbc_enc128 1132 1.1 joerg .Loop_cbc_enc128: 1133 1.5 christos aese v0.16b,v16.16b 1134 1.5 christos aesmc v0.16b,v0.16b 1135 1.5 christos st1 {v6.16b},[x1],#16 1136 1.1 joerg .Lenter_cbc_enc128: 1137 1.5 christos aese v0.16b,v17.16b 1138 1.5 christos aesmc v0.16b,v0.16b 1139 1.5 christos subs x2,x2,#16 1140 1.5 christos aese v0.16b,v2.16b 1141 1.5 christos aesmc v0.16b,v0.16b 1142 1.5 christos csel x8,xzr,x8,eq 1143 1.5 christos aese v0.16b,v3.16b 1144 1.5 christos aesmc v0.16b,v0.16b 1145 1.5 christos aese v0.16b,v18.16b 1146 1.5 christos aesmc v0.16b,v0.16b 1147 1.5 christos aese v0.16b,v19.16b 1148 1.5 christos aesmc v0.16b,v0.16b 1149 1.5 christos ld1 {v16.16b},[x0],x8 1150 1.5 christos aese v0.16b,v20.16b 1151 1.5 christos aesmc v0.16b,v0.16b 1152 1.5 christos aese v0.16b,v21.16b 1153 1.5 christos aesmc v0.16b,v0.16b 1154 1.5 christos aese v0.16b,v22.16b 1155 1.5 christos aesmc v0.16b,v0.16b 1156 1.5 christos eor v16.16b,v16.16b,v5.16b 1157 1.5 christos aese v0.16b,v23.16b 1158 1.5 christos eor v6.16b,v0.16b,v7.16b 1159 1.5 christos b.hs .Loop_cbc_enc128 1160 1.1 joerg 1161 1.5 christos st1 {v6.16b},[x1],#16 1162 1.1 joerg b .Lcbc_done 1163 1.1 joerg .align 5 1164 1.1 joerg .Lcbc_dec: 1165 1.5 christos ld1 {v24.16b},[x0],#16 1166 1.5 christos subs x2,x2,#32 // bias 1167 1.5 christos add w6,w5,#2 1168 1.5 christos orr v3.16b,v0.16b,v0.16b 1169 1.5 christos orr v1.16b,v0.16b,v0.16b 1170 1.5 christos orr v27.16b,v24.16b,v24.16b 1171 1.5 christos b.lo .Lcbc_dec_tail 1172 1.5 christos 1173 1.5 christos orr v1.16b,v24.16b,v24.16b 1174 1.5 christos ld1 {v24.16b},[x0],#16 1175 1.5 christos orr v2.16b,v0.16b,v0.16b 1176 1.5 christos orr v3.16b,v1.16b,v1.16b 1177 1.5 christos orr v27.16b,v24.16b,v24.16b 1178 1.5 christos cmp x2,#32 1179 1.5 christos b.lo .Loop3x_cbc_dec 1180 1.5 christos 1181 1.5 christos ld1 {v25.16b},[x0],#16 1182 1.5 christos ld1 {v26.16b},[x0],#16 1183 1.5 christos sub x2,x2,#32 // bias 1184 1.5 christos mov w6,w5 1185 1.5 christos orr v28.16b,v25.16b,v25.16b 1186 1.5 christos orr v29.16b,v26.16b,v26.16b 1187 1.5 christos 1188 1.5 christos .Loop5x_cbc_dec: 1189 1.5 christos aesd v0.16b,v16.16b 1190 1.5 christos aesimc v0.16b,v0.16b 1191 1.5 christos aesd v1.16b,v16.16b 1192 1.5 christos aesimc v1.16b,v1.16b 1193 1.5 christos aesd v24.16b,v16.16b 1194 1.5 christos aesimc v24.16b,v24.16b 1195 1.5 christos aesd v25.16b,v16.16b 1196 1.5 christos aesimc v25.16b,v25.16b 1197 1.5 christos aesd v26.16b,v16.16b 1198 1.5 christos aesimc v26.16b,v26.16b 1199 1.5 christos ld1 {v16.4s},[x7],#16 1200 1.5 christos subs w6,w6,#2 1201 1.5 christos aesd v0.16b,v17.16b 1202 1.5 christos aesimc v0.16b,v0.16b 1203 1.5 christos aesd v1.16b,v17.16b 1204 1.5 christos aesimc v1.16b,v1.16b 1205 1.5 christos aesd v24.16b,v17.16b 1206 1.5 christos aesimc v24.16b,v24.16b 1207 1.5 christos aesd v25.16b,v17.16b 1208 1.5 christos aesimc v25.16b,v25.16b 1209 1.5 christos aesd v26.16b,v17.16b 1210 1.5 christos aesimc v26.16b,v26.16b 1211 1.5 christos ld1 {v17.4s},[x7],#16 1212 1.5 christos b.gt .Loop5x_cbc_dec 1213 1.5 christos 1214 1.5 christos aesd v0.16b,v16.16b 1215 1.5 christos aesimc v0.16b,v0.16b 1216 1.5 christos aesd v1.16b,v16.16b 1217 1.5 christos aesimc v1.16b,v1.16b 1218 1.5 christos aesd v24.16b,v16.16b 1219 1.5 christos aesimc v24.16b,v24.16b 1220 1.5 christos aesd v25.16b,v16.16b 1221 1.5 christos aesimc v25.16b,v25.16b 1222 1.5 christos aesd v26.16b,v16.16b 1223 1.5 christos aesimc v26.16b,v26.16b 1224 1.5 christos cmp x2,#0x40 // because .Lcbc_tail4x 1225 1.5 christos sub x2,x2,#0x50 1226 1.5 christos 1227 1.5 christos aesd v0.16b,v17.16b 1228 1.5 christos aesimc v0.16b,v0.16b 1229 1.5 christos aesd v1.16b,v17.16b 1230 1.5 christos aesimc v1.16b,v1.16b 1231 1.5 christos aesd v24.16b,v17.16b 1232 1.5 christos aesimc v24.16b,v24.16b 1233 1.5 christos aesd v25.16b,v17.16b 1234 1.5 christos aesimc v25.16b,v25.16b 1235 1.5 christos aesd v26.16b,v17.16b 1236 1.5 christos aesimc v26.16b,v26.16b 1237 1.5 christos csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 1238 1.5 christos mov x7,x3 1239 1.5 christos 1240 1.5 christos aesd v0.16b,v18.16b 1241 1.5 christos aesimc v0.16b,v0.16b 1242 1.5 christos aesd v1.16b,v18.16b 1243 1.5 christos aesimc v1.16b,v1.16b 1244 1.5 christos aesd v24.16b,v18.16b 1245 1.5 christos aesimc v24.16b,v24.16b 1246 1.5 christos aesd v25.16b,v18.16b 1247 1.5 christos aesimc v25.16b,v25.16b 1248 1.5 christos aesd v26.16b,v18.16b 1249 1.5 christos aesimc v26.16b,v26.16b 1250 1.5 christos add x0,x0,x6 // x0 is adjusted in such way that 1251 1.5 christos // at exit from the loop v1.16b-v26.16b 1252 1.5 christos // are loaded with last "words" 1253 1.5 christos add x6,x2,#0x60 // because .Lcbc_tail4x 1254 1.5 christos 1255 1.5 christos aesd v0.16b,v19.16b 1256 1.5 christos aesimc v0.16b,v0.16b 1257 1.5 christos aesd v1.16b,v19.16b 1258 1.5 christos aesimc v1.16b,v1.16b 1259 1.5 christos aesd v24.16b,v19.16b 1260 1.5 christos aesimc v24.16b,v24.16b 1261 1.5 christos aesd v25.16b,v19.16b 1262 1.5 christos aesimc v25.16b,v25.16b 1263 1.5 christos aesd v26.16b,v19.16b 1264 1.5 christos aesimc v26.16b,v26.16b 1265 1.5 christos 1266 1.5 christos aesd v0.16b,v20.16b 1267 1.5 christos aesimc v0.16b,v0.16b 1268 1.5 christos aesd v1.16b,v20.16b 1269 1.5 christos aesimc v1.16b,v1.16b 1270 1.5 christos aesd v24.16b,v20.16b 1271 1.5 christos aesimc v24.16b,v24.16b 1272 1.5 christos aesd v25.16b,v20.16b 1273 1.5 christos aesimc v25.16b,v25.16b 1274 1.5 christos aesd v26.16b,v20.16b 1275 1.5 christos aesimc v26.16b,v26.16b 1276 1.5 christos 1277 1.5 christos aesd v0.16b,v21.16b 1278 1.5 christos aesimc v0.16b,v0.16b 1279 1.5 christos aesd v1.16b,v21.16b 1280 1.5 christos aesimc v1.16b,v1.16b 1281 1.5 christos aesd v24.16b,v21.16b 1282 1.5 christos aesimc v24.16b,v24.16b 1283 1.5 christos aesd v25.16b,v21.16b 1284 1.5 christos aesimc v25.16b,v25.16b 1285 1.5 christos aesd v26.16b,v21.16b 1286 1.5 christos aesimc v26.16b,v26.16b 1287 1.5 christos 1288 1.5 christos aesd v0.16b,v22.16b 1289 1.5 christos aesimc v0.16b,v0.16b 1290 1.5 christos aesd v1.16b,v22.16b 1291 1.5 christos aesimc v1.16b,v1.16b 1292 1.5 christos aesd v24.16b,v22.16b 1293 1.5 christos aesimc v24.16b,v24.16b 1294 1.5 christos aesd v25.16b,v22.16b 1295 1.5 christos aesimc v25.16b,v25.16b 1296 1.5 christos aesd v26.16b,v22.16b 1297 1.5 christos aesimc v26.16b,v26.16b 1298 1.5 christos 1299 1.5 christos eor v4.16b,v6.16b,v7.16b 1300 1.5 christos aesd v0.16b,v23.16b 1301 1.5 christos eor v5.16b,v2.16b,v7.16b 1302 1.5 christos ld1 {v2.16b},[x0],#16 1303 1.5 christos aesd v1.16b,v23.16b 1304 1.5 christos eor v17.16b,v3.16b,v7.16b 1305 1.5 christos ld1 {v3.16b},[x0],#16 1306 1.5 christos aesd v24.16b,v23.16b 1307 1.5 christos eor v30.16b,v27.16b,v7.16b 1308 1.5 christos ld1 {v27.16b},[x0],#16 1309 1.5 christos aesd v25.16b,v23.16b 1310 1.5 christos eor v31.16b,v28.16b,v7.16b 1311 1.5 christos ld1 {v28.16b},[x0],#16 1312 1.5 christos aesd v26.16b,v23.16b 1313 1.5 christos orr v6.16b,v29.16b,v29.16b 1314 1.5 christos ld1 {v29.16b},[x0],#16 1315 1.5 christos cbz x6,.Lcbc_tail4x 1316 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1317 1.5 christos eor v4.16b,v4.16b,v0.16b 1318 1.5 christos orr v0.16b,v2.16b,v2.16b 1319 1.5 christos eor v5.16b,v5.16b,v1.16b 1320 1.5 christos orr v1.16b,v3.16b,v3.16b 1321 1.5 christos eor v17.16b,v17.16b,v24.16b 1322 1.5 christos orr v24.16b,v27.16b,v27.16b 1323 1.5 christos eor v30.16b,v30.16b,v25.16b 1324 1.5 christos orr v25.16b,v28.16b,v28.16b 1325 1.5 christos eor v31.16b,v31.16b,v26.16b 1326 1.5 christos st1 {v4.16b},[x1],#16 1327 1.5 christos orr v26.16b,v29.16b,v29.16b 1328 1.5 christos st1 {v5.16b},[x1],#16 1329 1.5 christos mov w6,w5 1330 1.5 christos st1 {v17.16b},[x1],#16 1331 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1332 1.5 christos st1 {v30.16b},[x1],#16 1333 1.5 christos st1 {v31.16b},[x1],#16 1334 1.5 christos b.hs .Loop5x_cbc_dec 1335 1.5 christos 1336 1.5 christos add x2,x2,#0x50 1337 1.5 christos cbz x2,.Lcbc_done 1338 1.5 christos 1339 1.5 christos add w6,w5,#2 1340 1.5 christos subs x2,x2,#0x30 1341 1.5 christos orr v0.16b,v27.16b,v27.16b 1342 1.5 christos orr v2.16b,v27.16b,v27.16b 1343 1.5 christos orr v1.16b,v28.16b,v28.16b 1344 1.5 christos orr v3.16b,v28.16b,v28.16b 1345 1.5 christos orr v24.16b,v29.16b,v29.16b 1346 1.5 christos orr v27.16b,v29.16b,v29.16b 1347 1.5 christos b.lo .Lcbc_dec_tail 1348 1.5 christos 1349 1.5 christos b .Loop3x_cbc_dec 1350 1.5 christos 1351 1.5 christos .align 4 1352 1.5 christos .Lcbc_tail4x: 1353 1.5 christos eor v5.16b,v4.16b,v1.16b 1354 1.5 christos eor v17.16b,v17.16b,v24.16b 1355 1.5 christos eor v30.16b,v30.16b,v25.16b 1356 1.5 christos eor v31.16b,v31.16b,v26.16b 1357 1.5 christos st1 {v5.16b},[x1],#16 1358 1.5 christos st1 {v17.16b},[x1],#16 1359 1.5 christos st1 {v30.16b},[x1],#16 1360 1.5 christos st1 {v31.16b},[x1],#16 1361 1.5 christos 1362 1.5 christos b .Lcbc_done 1363 1.5 christos .align 4 1364 1.1 joerg .Loop3x_cbc_dec: 1365 1.5 christos aesd v0.16b,v16.16b 1366 1.5 christos aesimc v0.16b,v0.16b 1367 1.5 christos aesd v1.16b,v16.16b 1368 1.5 christos aesimc v1.16b,v1.16b 1369 1.5 christos aesd v24.16b,v16.16b 1370 1.5 christos aesimc v24.16b,v24.16b 1371 1.5 christos ld1 {v16.4s},[x7],#16 1372 1.5 christos subs w6,w6,#2 1373 1.5 christos aesd v0.16b,v17.16b 1374 1.5 christos aesimc v0.16b,v0.16b 1375 1.5 christos aesd v1.16b,v17.16b 1376 1.5 christos aesimc v1.16b,v1.16b 1377 1.5 christos aesd v24.16b,v17.16b 1378 1.5 christos aesimc v24.16b,v24.16b 1379 1.5 christos ld1 {v17.4s},[x7],#16 1380 1.5 christos b.gt .Loop3x_cbc_dec 1381 1.5 christos 1382 1.5 christos aesd v0.16b,v16.16b 1383 1.5 christos aesimc v0.16b,v0.16b 1384 1.5 christos aesd v1.16b,v16.16b 1385 1.5 christos aesimc v1.16b,v1.16b 1386 1.5 christos aesd v24.16b,v16.16b 1387 1.5 christos aesimc v24.16b,v24.16b 1388 1.5 christos eor v4.16b,v6.16b,v7.16b 1389 1.5 christos subs x2,x2,#0x30 1390 1.5 christos eor v5.16b,v2.16b,v7.16b 1391 1.5 christos csel x6,x2,x6,lo // x6, w6, is zero at this point 1392 1.5 christos aesd v0.16b,v17.16b 1393 1.5 christos aesimc v0.16b,v0.16b 1394 1.5 christos aesd v1.16b,v17.16b 1395 1.5 christos aesimc v1.16b,v1.16b 1396 1.5 christos aesd v24.16b,v17.16b 1397 1.5 christos aesimc v24.16b,v24.16b 1398 1.5 christos eor v17.16b,v3.16b,v7.16b 1399 1.5 christos add x0,x0,x6 // x0 is adjusted in such way that 1400 1.5 christos // at exit from the loop v1.16b-v24.16b 1401 1.5 christos // are loaded with last "words" 1402 1.5 christos orr v6.16b,v27.16b,v27.16b 1403 1.5 christos mov x7,x3 1404 1.5 christos aesd v0.16b,v20.16b 1405 1.5 christos aesimc v0.16b,v0.16b 1406 1.5 christos aesd v1.16b,v20.16b 1407 1.5 christos aesimc v1.16b,v1.16b 1408 1.5 christos aesd v24.16b,v20.16b 1409 1.5 christos aesimc v24.16b,v24.16b 1410 1.5 christos ld1 {v2.16b},[x0],#16 1411 1.5 christos aesd v0.16b,v21.16b 1412 1.5 christos aesimc v0.16b,v0.16b 1413 1.5 christos aesd v1.16b,v21.16b 1414 1.5 christos aesimc v1.16b,v1.16b 1415 1.5 christos aesd v24.16b,v21.16b 1416 1.5 christos aesimc v24.16b,v24.16b 1417 1.5 christos ld1 {v3.16b},[x0],#16 1418 1.5 christos aesd v0.16b,v22.16b 1419 1.5 christos aesimc v0.16b,v0.16b 1420 1.5 christos aesd v1.16b,v22.16b 1421 1.5 christos aesimc v1.16b,v1.16b 1422 1.5 christos aesd v24.16b,v22.16b 1423 1.5 christos aesimc v24.16b,v24.16b 1424 1.5 christos ld1 {v27.16b},[x0],#16 1425 1.5 christos aesd v0.16b,v23.16b 1426 1.5 christos aesd v1.16b,v23.16b 1427 1.5 christos aesd v24.16b,v23.16b 1428 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1429 1.5 christos add w6,w5,#2 1430 1.5 christos eor v4.16b,v4.16b,v0.16b 1431 1.5 christos eor v5.16b,v5.16b,v1.16b 1432 1.5 christos eor v24.16b,v24.16b,v17.16b 1433 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1434 1.5 christos st1 {v4.16b},[x1],#16 1435 1.5 christos orr v0.16b,v2.16b,v2.16b 1436 1.5 christos st1 {v5.16b},[x1],#16 1437 1.5 christos orr v1.16b,v3.16b,v3.16b 1438 1.5 christos st1 {v24.16b},[x1],#16 1439 1.5 christos orr v24.16b,v27.16b,v27.16b 1440 1.5 christos b.hs .Loop3x_cbc_dec 1441 1.1 joerg 1442 1.5 christos cmn x2,#0x30 1443 1.5 christos b.eq .Lcbc_done 1444 1.1 joerg nop 1445 1.1 joerg 1446 1.1 joerg .Lcbc_dec_tail: 1447 1.5 christos aesd v1.16b,v16.16b 1448 1.5 christos aesimc v1.16b,v1.16b 1449 1.5 christos aesd v24.16b,v16.16b 1450 1.5 christos aesimc v24.16b,v24.16b 1451 1.5 christos ld1 {v16.4s},[x7],#16 1452 1.5 christos subs w6,w6,#2 1453 1.5 christos aesd v1.16b,v17.16b 1454 1.5 christos aesimc v1.16b,v1.16b 1455 1.5 christos aesd v24.16b,v17.16b 1456 1.5 christos aesimc v24.16b,v24.16b 1457 1.5 christos ld1 {v17.4s},[x7],#16 1458 1.5 christos b.gt .Lcbc_dec_tail 1459 1.5 christos 1460 1.5 christos aesd v1.16b,v16.16b 1461 1.5 christos aesimc v1.16b,v1.16b 1462 1.5 christos aesd v24.16b,v16.16b 1463 1.5 christos aesimc v24.16b,v24.16b 1464 1.5 christos aesd v1.16b,v17.16b 1465 1.5 christos aesimc v1.16b,v1.16b 1466 1.5 christos aesd v24.16b,v17.16b 1467 1.5 christos aesimc v24.16b,v24.16b 1468 1.5 christos aesd v1.16b,v20.16b 1469 1.5 christos aesimc v1.16b,v1.16b 1470 1.5 christos aesd v24.16b,v20.16b 1471 1.5 christos aesimc v24.16b,v24.16b 1472 1.5 christos cmn x2,#0x20 1473 1.5 christos aesd v1.16b,v21.16b 1474 1.5 christos aesimc v1.16b,v1.16b 1475 1.5 christos aesd v24.16b,v21.16b 1476 1.5 christos aesimc v24.16b,v24.16b 1477 1.5 christos eor v5.16b,v6.16b,v7.16b 1478 1.5 christos aesd v1.16b,v22.16b 1479 1.5 christos aesimc v1.16b,v1.16b 1480 1.5 christos aesd v24.16b,v22.16b 1481 1.5 christos aesimc v24.16b,v24.16b 1482 1.5 christos eor v17.16b,v3.16b,v7.16b 1483 1.5 christos aesd v1.16b,v23.16b 1484 1.5 christos aesd v24.16b,v23.16b 1485 1.5 christos b.eq .Lcbc_dec_one 1486 1.5 christos eor v5.16b,v5.16b,v1.16b 1487 1.5 christos eor v17.16b,v17.16b,v24.16b 1488 1.5 christos orr v6.16b,v27.16b,v27.16b 1489 1.5 christos st1 {v5.16b},[x1],#16 1490 1.5 christos st1 {v17.16b},[x1],#16 1491 1.1 joerg b .Lcbc_done 1492 1.1 joerg 1493 1.1 joerg .Lcbc_dec_one: 1494 1.5 christos eor v5.16b,v5.16b,v24.16b 1495 1.5 christos orr v6.16b,v27.16b,v27.16b 1496 1.5 christos st1 {v5.16b},[x1],#16 1497 1.1 joerg 1498 1.1 joerg .Lcbc_done: 1499 1.5 christos st1 {v6.16b},[x4] 1500 1.1 joerg .Lcbc_abort: 1501 1.5 christos ldr x29,[sp],#16 1502 1.5 christos ret 1503 1.1 joerg .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 1504 1.1 joerg .globl aes_v8_ctr32_encrypt_blocks 1505 1.1 joerg .type aes_v8_ctr32_encrypt_blocks,%function 1506 1.1 joerg .align 5 1507 1.1 joerg aes_v8_ctr32_encrypt_blocks: 1508 1.5 christos stp x29,x30,[sp,#-16]! 1509 1.5 christos add x29,sp,#0 1510 1.5 christos ldr w5,[x3,#240] 1511 1.2 christos 1512 1.5 christos ldr w8, [x4, #12] 1513 1.7 christos #ifdef __AARCH64EB__ 1514 1.5 christos ld1 {v0.16b},[x4] 1515 1.3 christos #else 1516 1.5 christos ld1 {v0.4s},[x4] 1517 1.3 christos #endif 1518 1.5 christos ld1 {v16.4s,v17.4s},[x3] // load key schedule... 1519 1.5 christos sub w5,w5,#4 1520 1.5 christos mov x12,#16 1521 1.5 christos cmp x2,#2 1522 1.5 christos add x7,x3,x5,lsl#4 // pointer to last 5 round keys 1523 1.5 christos sub w5,w5,#2 1524 1.5 christos ld1 {v20.4s,v21.4s},[x7],#32 1525 1.5 christos ld1 {v22.4s,v23.4s},[x7],#32 1526 1.5 christos ld1 {v7.4s},[x7] 1527 1.5 christos add x7,x3,#32 1528 1.5 christos mov w6,w5 1529 1.5 christos csel x12,xzr,x12,lo 1530 1.7 christos #ifndef __AARCH64EB__ 1531 1.5 christos rev w8, w8 1532 1.1 joerg #endif 1533 1.5 christos orr v1.16b,v0.16b,v0.16b 1534 1.5 christos add w10, w8, #1 1535 1.5 christos orr v18.16b,v0.16b,v0.16b 1536 1.5 christos add w8, w8, #2 1537 1.5 christos orr v6.16b,v0.16b,v0.16b 1538 1.5 christos rev w10, w10 1539 1.5 christos mov v1.s[3],w10 1540 1.5 christos b.ls .Lctr32_tail 1541 1.5 christos rev w12, w8 1542 1.5 christos sub x2,x2,#3 // bias 1543 1.5 christos mov v18.s[3],w12 1544 1.5 christos cmp x2,#32 1545 1.5 christos b.lo .Loop3x_ctr32 1546 1.5 christos 1547 1.5 christos add w13,w8,#1 1548 1.5 christos add w14,w8,#2 1549 1.5 christos orr v24.16b,v0.16b,v0.16b 1550 1.5 christos rev w13,w13 1551 1.5 christos orr v25.16b,v0.16b,v0.16b 1552 1.5 christos rev w14,w14 1553 1.5 christos mov v24.s[3],w13 1554 1.5 christos sub x2,x2,#2 // bias 1555 1.5 christos mov v25.s[3],w14 1556 1.5 christos add w8,w8,#2 1557 1.5 christos b .Loop5x_ctr32 1558 1.5 christos 1559 1.5 christos .align 4 1560 1.5 christos .Loop5x_ctr32: 1561 1.5 christos aese v0.16b,v16.16b 1562 1.5 christos aesmc v0.16b,v0.16b 1563 1.5 christos aese v1.16b,v16.16b 1564 1.5 christos aesmc v1.16b,v1.16b 1565 1.5 christos aese v18.16b,v16.16b 1566 1.5 christos aesmc v18.16b,v18.16b 1567 1.5 christos aese v24.16b,v16.16b 1568 1.5 christos aesmc v24.16b,v24.16b 1569 1.5 christos aese v25.16b,v16.16b 1570 1.5 christos aesmc v25.16b,v25.16b 1571 1.5 christos ld1 {v16.4s},[x7],#16 1572 1.5 christos subs w6,w6,#2 1573 1.5 christos aese v0.16b,v17.16b 1574 1.5 christos aesmc v0.16b,v0.16b 1575 1.5 christos aese v1.16b,v17.16b 1576 1.5 christos aesmc v1.16b,v1.16b 1577 1.5 christos aese v18.16b,v17.16b 1578 1.5 christos aesmc v18.16b,v18.16b 1579 1.5 christos aese v24.16b,v17.16b 1580 1.5 christos aesmc v24.16b,v24.16b 1581 1.5 christos aese v25.16b,v17.16b 1582 1.5 christos aesmc v25.16b,v25.16b 1583 1.5 christos ld1 {v17.4s},[x7],#16 1584 1.5 christos b.gt .Loop5x_ctr32 1585 1.5 christos 1586 1.5 christos mov x7,x3 1587 1.5 christos aese v0.16b,v16.16b 1588 1.5 christos aesmc v0.16b,v0.16b 1589 1.5 christos aese v1.16b,v16.16b 1590 1.5 christos aesmc v1.16b,v1.16b 1591 1.5 christos aese v18.16b,v16.16b 1592 1.5 christos aesmc v18.16b,v18.16b 1593 1.5 christos aese v24.16b,v16.16b 1594 1.5 christos aesmc v24.16b,v24.16b 1595 1.5 christos aese v25.16b,v16.16b 1596 1.5 christos aesmc v25.16b,v25.16b 1597 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1598 1.5 christos 1599 1.5 christos aese v0.16b,v17.16b 1600 1.5 christos aesmc v0.16b,v0.16b 1601 1.5 christos aese v1.16b,v17.16b 1602 1.5 christos aesmc v1.16b,v1.16b 1603 1.5 christos aese v18.16b,v17.16b 1604 1.5 christos aesmc v18.16b,v18.16b 1605 1.5 christos aese v24.16b,v17.16b 1606 1.5 christos aesmc v24.16b,v24.16b 1607 1.5 christos aese v25.16b,v17.16b 1608 1.5 christos aesmc v25.16b,v25.16b 1609 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1610 1.5 christos 1611 1.5 christos aese v0.16b,v20.16b 1612 1.5 christos aesmc v0.16b,v0.16b 1613 1.5 christos add w9,w8,#1 1614 1.5 christos add w10,w8,#2 1615 1.5 christos aese v1.16b,v20.16b 1616 1.5 christos aesmc v1.16b,v1.16b 1617 1.5 christos add w12,w8,#3 1618 1.5 christos add w13,w8,#4 1619 1.5 christos aese v18.16b,v20.16b 1620 1.5 christos aesmc v18.16b,v18.16b 1621 1.5 christos add w14,w8,#5 1622 1.5 christos rev w9,w9 1623 1.5 christos aese v24.16b,v20.16b 1624 1.5 christos aesmc v24.16b,v24.16b 1625 1.5 christos rev w10,w10 1626 1.5 christos rev w12,w12 1627 1.5 christos aese v25.16b,v20.16b 1628 1.5 christos aesmc v25.16b,v25.16b 1629 1.5 christos rev w13,w13 1630 1.5 christos rev w14,w14 1631 1.5 christos 1632 1.5 christos aese v0.16b,v21.16b 1633 1.5 christos aesmc v0.16b,v0.16b 1634 1.5 christos aese v1.16b,v21.16b 1635 1.5 christos aesmc v1.16b,v1.16b 1636 1.5 christos aese v18.16b,v21.16b 1637 1.5 christos aesmc v18.16b,v18.16b 1638 1.5 christos aese v24.16b,v21.16b 1639 1.5 christos aesmc v24.16b,v24.16b 1640 1.5 christos aese v25.16b,v21.16b 1641 1.5 christos aesmc v25.16b,v25.16b 1642 1.5 christos 1643 1.5 christos aese v0.16b,v22.16b 1644 1.5 christos aesmc v0.16b,v0.16b 1645 1.5 christos ld1 {v2.16b},[x0],#16 1646 1.5 christos aese v1.16b,v22.16b 1647 1.5 christos aesmc v1.16b,v1.16b 1648 1.5 christos ld1 {v3.16b},[x0],#16 1649 1.5 christos aese v18.16b,v22.16b 1650 1.5 christos aesmc v18.16b,v18.16b 1651 1.5 christos ld1 {v19.16b},[x0],#16 1652 1.5 christos aese v24.16b,v22.16b 1653 1.5 christos aesmc v24.16b,v24.16b 1654 1.5 christos ld1 {v26.16b},[x0],#16 1655 1.5 christos aese v25.16b,v22.16b 1656 1.5 christos aesmc v25.16b,v25.16b 1657 1.5 christos ld1 {v27.16b},[x0],#16 1658 1.5 christos 1659 1.5 christos aese v0.16b,v23.16b 1660 1.5 christos eor v2.16b,v2.16b,v7.16b 1661 1.5 christos aese v1.16b,v23.16b 1662 1.5 christos eor v3.16b,v3.16b,v7.16b 1663 1.5 christos aese v18.16b,v23.16b 1664 1.5 christos eor v19.16b,v19.16b,v7.16b 1665 1.5 christos aese v24.16b,v23.16b 1666 1.5 christos eor v26.16b,v26.16b,v7.16b 1667 1.5 christos aese v25.16b,v23.16b 1668 1.5 christos eor v27.16b,v27.16b,v7.16b 1669 1.5 christos 1670 1.5 christos eor v2.16b,v2.16b,v0.16b 1671 1.5 christos orr v0.16b,v6.16b,v6.16b 1672 1.5 christos eor v3.16b,v3.16b,v1.16b 1673 1.5 christos orr v1.16b,v6.16b,v6.16b 1674 1.5 christos eor v19.16b,v19.16b,v18.16b 1675 1.5 christos orr v18.16b,v6.16b,v6.16b 1676 1.5 christos eor v26.16b,v26.16b,v24.16b 1677 1.5 christos orr v24.16b,v6.16b,v6.16b 1678 1.5 christos eor v27.16b,v27.16b,v25.16b 1679 1.5 christos orr v25.16b,v6.16b,v6.16b 1680 1.5 christos 1681 1.5 christos st1 {v2.16b},[x1],#16 1682 1.5 christos mov v0.s[3],w9 1683 1.5 christos st1 {v3.16b},[x1],#16 1684 1.5 christos mov v1.s[3],w10 1685 1.5 christos st1 {v19.16b},[x1],#16 1686 1.5 christos mov v18.s[3],w12 1687 1.5 christos st1 {v26.16b},[x1],#16 1688 1.5 christos mov v24.s[3],w13 1689 1.5 christos st1 {v27.16b},[x1],#16 1690 1.5 christos mov v25.s[3],w14 1691 1.5 christos 1692 1.5 christos mov w6,w5 1693 1.5 christos cbz x2,.Lctr32_done 1694 1.5 christos 1695 1.5 christos add w8,w8,#5 1696 1.5 christos subs x2,x2,#5 1697 1.5 christos b.hs .Loop5x_ctr32 1698 1.5 christos 1699 1.5 christos add x2,x2,#5 1700 1.5 christos sub w8,w8,#5 1701 1.5 christos 1702 1.5 christos cmp x2,#2 1703 1.5 christos mov x12,#16 1704 1.5 christos csel x12,xzr,x12,lo 1705 1.5 christos b.ls .Lctr32_tail 1706 1.5 christos 1707 1.5 christos sub x2,x2,#3 // bias 1708 1.5 christos add w8,w8,#3 1709 1.2 christos b .Loop3x_ctr32 1710 1.1 joerg 1711 1.1 joerg .align 4 1712 1.1 joerg .Loop3x_ctr32: 1713 1.5 christos aese v0.16b,v16.16b 1714 1.5 christos aesmc v0.16b,v0.16b 1715 1.5 christos aese v1.16b,v16.16b 1716 1.5 christos aesmc v1.16b,v1.16b 1717 1.5 christos aese v18.16b,v16.16b 1718 1.5 christos aesmc v18.16b,v18.16b 1719 1.5 christos ld1 {v16.4s},[x7],#16 1720 1.5 christos subs w6,w6,#2 1721 1.5 christos aese v0.16b,v17.16b 1722 1.5 christos aesmc v0.16b,v0.16b 1723 1.5 christos aese v1.16b,v17.16b 1724 1.5 christos aesmc v1.16b,v1.16b 1725 1.5 christos aese v18.16b,v17.16b 1726 1.5 christos aesmc v18.16b,v18.16b 1727 1.5 christos ld1 {v17.4s},[x7],#16 1728 1.5 christos b.gt .Loop3x_ctr32 1729 1.5 christos 1730 1.5 christos aese v0.16b,v16.16b 1731 1.5 christos aesmc v4.16b,v0.16b 1732 1.5 christos aese v1.16b,v16.16b 1733 1.5 christos aesmc v5.16b,v1.16b 1734 1.5 christos ld1 {v2.16b},[x0],#16 1735 1.5 christos orr v0.16b,v6.16b,v6.16b 1736 1.5 christos aese v18.16b,v16.16b 1737 1.5 christos aesmc v18.16b,v18.16b 1738 1.5 christos ld1 {v3.16b},[x0],#16 1739 1.5 christos orr v1.16b,v6.16b,v6.16b 1740 1.5 christos aese v4.16b,v17.16b 1741 1.5 christos aesmc v4.16b,v4.16b 1742 1.5 christos aese v5.16b,v17.16b 1743 1.5 christos aesmc v5.16b,v5.16b 1744 1.5 christos ld1 {v19.16b},[x0],#16 1745 1.5 christos mov x7,x3 1746 1.5 christos aese v18.16b,v17.16b 1747 1.5 christos aesmc v17.16b,v18.16b 1748 1.5 christos orr v18.16b,v6.16b,v6.16b 1749 1.5 christos add w9,w8,#1 1750 1.5 christos aese v4.16b,v20.16b 1751 1.5 christos aesmc v4.16b,v4.16b 1752 1.5 christos aese v5.16b,v20.16b 1753 1.5 christos aesmc v5.16b,v5.16b 1754 1.5 christos eor v2.16b,v2.16b,v7.16b 1755 1.5 christos add w10,w8,#2 1756 1.5 christos aese v17.16b,v20.16b 1757 1.5 christos aesmc v17.16b,v17.16b 1758 1.5 christos eor v3.16b,v3.16b,v7.16b 1759 1.5 christos add w8,w8,#3 1760 1.5 christos aese v4.16b,v21.16b 1761 1.5 christos aesmc v4.16b,v4.16b 1762 1.5 christos aese v5.16b,v21.16b 1763 1.5 christos aesmc v5.16b,v5.16b 1764 1.5 christos eor v19.16b,v19.16b,v7.16b 1765 1.5 christos rev w9,w9 1766 1.5 christos aese v17.16b,v21.16b 1767 1.5 christos aesmc v17.16b,v17.16b 1768 1.5 christos mov v0.s[3], w9 1769 1.5 christos rev w10,w10 1770 1.5 christos aese v4.16b,v22.16b 1771 1.5 christos aesmc v4.16b,v4.16b 1772 1.5 christos aese v5.16b,v22.16b 1773 1.5 christos aesmc v5.16b,v5.16b 1774 1.5 christos mov v1.s[3], w10 1775 1.5 christos rev w12,w8 1776 1.5 christos aese v17.16b,v22.16b 1777 1.5 christos aesmc v17.16b,v17.16b 1778 1.5 christos mov v18.s[3], w12 1779 1.5 christos subs x2,x2,#3 1780 1.5 christos aese v4.16b,v23.16b 1781 1.5 christos aese v5.16b,v23.16b 1782 1.5 christos aese v17.16b,v23.16b 1783 1.5 christos 1784 1.5 christos eor v2.16b,v2.16b,v4.16b 1785 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 1786 1.5 christos st1 {v2.16b},[x1],#16 1787 1.5 christos eor v3.16b,v3.16b,v5.16b 1788 1.5 christos mov w6,w5 1789 1.5 christos st1 {v3.16b},[x1],#16 1790 1.5 christos eor v19.16b,v19.16b,v17.16b 1791 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 1792 1.5 christos st1 {v19.16b},[x1],#16 1793 1.5 christos b.hs .Loop3x_ctr32 1794 1.5 christos 1795 1.5 christos adds x2,x2,#3 1796 1.5 christos b.eq .Lctr32_done 1797 1.5 christos cmp x2,#1 1798 1.5 christos mov x12,#16 1799 1.5 christos csel x12,xzr,x12,eq 1800 1.1 joerg 1801 1.1 joerg .Lctr32_tail: 1802 1.5 christos aese v0.16b,v16.16b 1803 1.5 christos aesmc v0.16b,v0.16b 1804 1.5 christos aese v1.16b,v16.16b 1805 1.5 christos aesmc v1.16b,v1.16b 1806 1.5 christos ld1 {v16.4s},[x7],#16 1807 1.5 christos subs w6,w6,#2 1808 1.5 christos aese v0.16b,v17.16b 1809 1.5 christos aesmc v0.16b,v0.16b 1810 1.5 christos aese v1.16b,v17.16b 1811 1.5 christos aesmc v1.16b,v1.16b 1812 1.5 christos ld1 {v17.4s},[x7],#16 1813 1.5 christos b.gt .Lctr32_tail 1814 1.5 christos 1815 1.5 christos aese v0.16b,v16.16b 1816 1.5 christos aesmc v0.16b,v0.16b 1817 1.5 christos aese v1.16b,v16.16b 1818 1.5 christos aesmc v1.16b,v1.16b 1819 1.5 christos aese v0.16b,v17.16b 1820 1.5 christos aesmc v0.16b,v0.16b 1821 1.5 christos aese v1.16b,v17.16b 1822 1.5 christos aesmc v1.16b,v1.16b 1823 1.5 christos ld1 {v2.16b},[x0],x12 1824 1.5 christos aese v0.16b,v20.16b 1825 1.5 christos aesmc v0.16b,v0.16b 1826 1.5 christos aese v1.16b,v20.16b 1827 1.5 christos aesmc v1.16b,v1.16b 1828 1.5 christos ld1 {v3.16b},[x0] 1829 1.5 christos aese v0.16b,v21.16b 1830 1.5 christos aesmc v0.16b,v0.16b 1831 1.5 christos aese v1.16b,v21.16b 1832 1.5 christos aesmc v1.16b,v1.16b 1833 1.5 christos eor v2.16b,v2.16b,v7.16b 1834 1.5 christos aese v0.16b,v22.16b 1835 1.5 christos aesmc v0.16b,v0.16b 1836 1.5 christos aese v1.16b,v22.16b 1837 1.5 christos aesmc v1.16b,v1.16b 1838 1.5 christos eor v3.16b,v3.16b,v7.16b 1839 1.5 christos aese v0.16b,v23.16b 1840 1.5 christos aese v1.16b,v23.16b 1841 1.5 christos 1842 1.5 christos cmp x2,#1 1843 1.5 christos eor v2.16b,v2.16b,v0.16b 1844 1.5 christos eor v3.16b,v3.16b,v1.16b 1845 1.5 christos st1 {v2.16b},[x1],#16 1846 1.5 christos b.eq .Lctr32_done 1847 1.5 christos st1 {v3.16b},[x1] 1848 1.1 joerg 1849 1.1 joerg .Lctr32_done: 1850 1.5 christos ldr x29,[sp],#16 1851 1.5 christos ret 1852 1.1 joerg .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 1853 1.5 christos .globl aes_v8_xts_encrypt 1854 1.5 christos .type aes_v8_xts_encrypt,%function 1855 1.5 christos .align 5 1856 1.5 christos aes_v8_xts_encrypt: 1857 1.5 christos cmp x2,#16 1858 1.5 christos // Original input data size bigger than 16, jump to big size processing. 1859 1.5 christos b.ne .Lxts_enc_big_size 1860 1.5 christos // Encrypt the iv with key2, as the first XEX iv. 1861 1.5 christos ldr w6,[x4,#240] 1862 1.6 christos ld1 {v0.4s},[x4],#16 1863 1.5 christos ld1 {v6.16b},[x5] 1864 1.5 christos sub w6,w6,#2 1865 1.6 christos ld1 {v1.4s},[x4],#16 1866 1.5 christos 1867 1.5 christos .Loop_enc_iv_enc: 1868 1.5 christos aese v6.16b,v0.16b 1869 1.5 christos aesmc v6.16b,v6.16b 1870 1.5 christos ld1 {v0.4s},[x4],#16 1871 1.5 christos subs w6,w6,#2 1872 1.5 christos aese v6.16b,v1.16b 1873 1.5 christos aesmc v6.16b,v6.16b 1874 1.5 christos ld1 {v1.4s},[x4],#16 1875 1.5 christos b.gt .Loop_enc_iv_enc 1876 1.5 christos 1877 1.5 christos aese v6.16b,v0.16b 1878 1.5 christos aesmc v6.16b,v6.16b 1879 1.5 christos ld1 {v0.4s},[x4] 1880 1.5 christos aese v6.16b,v1.16b 1881 1.5 christos eor v6.16b,v6.16b,v0.16b 1882 1.5 christos 1883 1.5 christos ld1 {v0.16b},[x0] 1884 1.5 christos eor v0.16b,v6.16b,v0.16b 1885 1.5 christos 1886 1.5 christos ldr w6,[x3,#240] 1887 1.5 christos ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... 1888 1.5 christos 1889 1.5 christos aese v0.16b,v28.16b 1890 1.5 christos aesmc v0.16b,v0.16b 1891 1.5 christos ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 1892 1.5 christos aese v0.16b,v29.16b 1893 1.5 christos aesmc v0.16b,v0.16b 1894 1.5 christos subs w6,w6,#10 // if rounds==10, jump to aes-128-xts processing 1895 1.5 christos b.eq .Lxts_128_enc 1896 1.5 christos .Lxts_enc_round_loop: 1897 1.5 christos aese v0.16b,v16.16b 1898 1.5 christos aesmc v0.16b,v0.16b 1899 1.5 christos ld1 {v16.4s},[x3],#16 // load key schedule... 1900 1.5 christos aese v0.16b,v17.16b 1901 1.5 christos aesmc v0.16b,v0.16b 1902 1.5 christos ld1 {v17.4s},[x3],#16 // load key schedule... 1903 1.5 christos subs w6,w6,#2 // bias 1904 1.5 christos b.gt .Lxts_enc_round_loop 1905 1.5 christos .Lxts_128_enc: 1906 1.5 christos ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 1907 1.5 christos aese v0.16b,v16.16b 1908 1.5 christos aesmc v0.16b,v0.16b 1909 1.5 christos aese v0.16b,v17.16b 1910 1.5 christos aesmc v0.16b,v0.16b 1911 1.5 christos ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 1912 1.5 christos aese v0.16b,v18.16b 1913 1.5 christos aesmc v0.16b,v0.16b 1914 1.5 christos aese v0.16b,v19.16b 1915 1.5 christos aesmc v0.16b,v0.16b 1916 1.5 christos ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 1917 1.5 christos aese v0.16b,v20.16b 1918 1.5 christos aesmc v0.16b,v0.16b 1919 1.5 christos aese v0.16b,v21.16b 1920 1.5 christos aesmc v0.16b,v0.16b 1921 1.5 christos ld1 {v7.4s},[x3] 1922 1.5 christos aese v0.16b,v22.16b 1923 1.5 christos aesmc v0.16b,v0.16b 1924 1.5 christos aese v0.16b,v23.16b 1925 1.5 christos eor v0.16b,v0.16b,v7.16b 1926 1.5 christos eor v0.16b,v0.16b,v6.16b 1927 1.5 christos st1 {v0.16b},[x1] 1928 1.5 christos b .Lxts_enc_final_abort 1929 1.5 christos 1930 1.5 christos .align 4 1931 1.5 christos .Lxts_enc_big_size: 1932 1.5 christos stp x19,x20,[sp,#-64]! 1933 1.5 christos stp x21,x22,[sp,#48] 1934 1.5 christos stp d8,d9,[sp,#32] 1935 1.5 christos stp d10,d11,[sp,#16] 1936 1.5 christos 1937 1.5 christos // tailcnt store the tail value of length%16. 1938 1.5 christos and x21,x2,#0xf 1939 1.5 christos and x2,x2,#-16 1940 1.5 christos subs x2,x2,#16 1941 1.5 christos mov x8,#16 1942 1.5 christos b.lo .Lxts_abort 1943 1.5 christos csel x8,xzr,x8,eq 1944 1.5 christos 1945 1.5 christos // Firstly, encrypt the iv with key2, as the first iv of XEX. 1946 1.5 christos ldr w6,[x4,#240] 1947 1.5 christos ld1 {v0.4s},[x4],#16 1948 1.5 christos ld1 {v6.16b},[x5] 1949 1.5 christos sub w6,w6,#2 1950 1.5 christos ld1 {v1.4s},[x4],#16 1951 1.5 christos 1952 1.5 christos .Loop_iv_enc: 1953 1.5 christos aese v6.16b,v0.16b 1954 1.5 christos aesmc v6.16b,v6.16b 1955 1.5 christos ld1 {v0.4s},[x4],#16 1956 1.5 christos subs w6,w6,#2 1957 1.5 christos aese v6.16b,v1.16b 1958 1.5 christos aesmc v6.16b,v6.16b 1959 1.5 christos ld1 {v1.4s},[x4],#16 1960 1.5 christos b.gt .Loop_iv_enc 1961 1.5 christos 1962 1.5 christos aese v6.16b,v0.16b 1963 1.5 christos aesmc v6.16b,v6.16b 1964 1.5 christos ld1 {v0.4s},[x4] 1965 1.5 christos aese v6.16b,v1.16b 1966 1.5 christos eor v6.16b,v6.16b,v0.16b 1967 1.5 christos 1968 1.5 christos // The iv for second block 1969 1.5 christos // x9- iv(low), x10 - iv(high) 1970 1.5 christos // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b 1971 1.5 christos fmov x9,d6 1972 1.5 christos fmov x10,v6.d[1] 1973 1.5 christos mov w19,#0x87 1974 1.5 christos extr x22,x10,x10,#32 1975 1.5 christos extr x10,x10,x9,#63 1976 1.5 christos and w11,w19,w22,asr#31 1977 1.5 christos eor x9,x11,x9,lsl#1 1978 1.5 christos fmov d8,x9 1979 1.5 christos fmov v8.d[1],x10 1980 1.5 christos 1981 1.5 christos ldr w5,[x3,#240] // next starting point 1982 1.5 christos ld1 {v0.16b},[x0],x8 1983 1.5 christos 1984 1.5 christos ld1 {v16.4s,v17.4s},[x3] // load key schedule... 1985 1.5 christos sub w5,w5,#6 1986 1.5 christos add x7,x3,x5,lsl#4 // pointer to last 7 round keys 1987 1.5 christos sub w5,w5,#2 1988 1.5 christos ld1 {v18.4s,v19.4s},[x7],#32 1989 1.5 christos ld1 {v20.4s,v21.4s},[x7],#32 1990 1.5 christos ld1 {v22.4s,v23.4s},[x7],#32 1991 1.5 christos ld1 {v7.4s},[x7] 1992 1.5 christos 1993 1.5 christos add x7,x3,#32 1994 1.5 christos mov w6,w5 1995 1.5 christos 1996 1.5 christos // Encryption 1997 1.5 christos .Lxts_enc: 1998 1.5 christos ld1 {v24.16b},[x0],#16 1999 1.5 christos subs x2,x2,#32 // bias 2000 1.5 christos add w6,w5,#2 2001 1.5 christos orr v3.16b,v0.16b,v0.16b 2002 1.5 christos orr v1.16b,v0.16b,v0.16b 2003 1.5 christos orr v28.16b,v0.16b,v0.16b 2004 1.5 christos orr v27.16b,v24.16b,v24.16b 2005 1.5 christos orr v29.16b,v24.16b,v24.16b 2006 1.5 christos b.lo .Lxts_inner_enc_tail 2007 1.5 christos eor v0.16b,v0.16b,v6.16b // before encryption, xor with iv 2008 1.5 christos eor v24.16b,v24.16b,v8.16b 2009 1.5 christos 2010 1.5 christos // The iv for third block 2011 1.5 christos extr x22,x10,x10,#32 2012 1.5 christos extr x10,x10,x9,#63 2013 1.5 christos and w11,w19,w22,asr#31 2014 1.5 christos eor x9,x11,x9,lsl#1 2015 1.5 christos fmov d9,x9 2016 1.5 christos fmov v9.d[1],x10 2017 1.5 christos 2018 1.5 christos 2019 1.5 christos orr v1.16b,v24.16b,v24.16b 2020 1.5 christos ld1 {v24.16b},[x0],#16 2021 1.5 christos orr v2.16b,v0.16b,v0.16b 2022 1.5 christos orr v3.16b,v1.16b,v1.16b 2023 1.5 christos eor v27.16b,v24.16b,v9.16b // the third block 2024 1.5 christos eor v24.16b,v24.16b,v9.16b 2025 1.5 christos cmp x2,#32 2026 1.5 christos b.lo .Lxts_outer_enc_tail 2027 1.5 christos 2028 1.5 christos // The iv for fourth block 2029 1.5 christos extr x22,x10,x10,#32 2030 1.5 christos extr x10,x10,x9,#63 2031 1.5 christos and w11,w19,w22,asr#31 2032 1.5 christos eor x9,x11,x9,lsl#1 2033 1.5 christos fmov d10,x9 2034 1.5 christos fmov v10.d[1],x10 2035 1.5 christos 2036 1.5 christos ld1 {v25.16b},[x0],#16 2037 1.5 christos // The iv for fifth block 2038 1.5 christos extr x22,x10,x10,#32 2039 1.5 christos extr x10,x10,x9,#63 2040 1.5 christos and w11,w19,w22,asr#31 2041 1.5 christos eor x9,x11,x9,lsl#1 2042 1.5 christos fmov d11,x9 2043 1.5 christos fmov v11.d[1],x10 2044 1.5 christos 2045 1.5 christos ld1 {v26.16b},[x0],#16 2046 1.5 christos eor v25.16b,v25.16b,v10.16b // the fourth block 2047 1.5 christos eor v26.16b,v26.16b,v11.16b 2048 1.5 christos sub x2,x2,#32 // bias 2049 1.5 christos mov w6,w5 2050 1.5 christos b .Loop5x_xts_enc 2051 1.5 christos 2052 1.5 christos .align 4 2053 1.5 christos .Loop5x_xts_enc: 2054 1.5 christos aese v0.16b,v16.16b 2055 1.5 christos aesmc v0.16b,v0.16b 2056 1.5 christos aese v1.16b,v16.16b 2057 1.5 christos aesmc v1.16b,v1.16b 2058 1.5 christos aese v24.16b,v16.16b 2059 1.5 christos aesmc v24.16b,v24.16b 2060 1.5 christos aese v25.16b,v16.16b 2061 1.5 christos aesmc v25.16b,v25.16b 2062 1.5 christos aese v26.16b,v16.16b 2063 1.5 christos aesmc v26.16b,v26.16b 2064 1.5 christos ld1 {v16.4s},[x7],#16 2065 1.5 christos subs w6,w6,#2 2066 1.5 christos aese v0.16b,v17.16b 2067 1.5 christos aesmc v0.16b,v0.16b 2068 1.5 christos aese v1.16b,v17.16b 2069 1.5 christos aesmc v1.16b,v1.16b 2070 1.5 christos aese v24.16b,v17.16b 2071 1.5 christos aesmc v24.16b,v24.16b 2072 1.5 christos aese v25.16b,v17.16b 2073 1.5 christos aesmc v25.16b,v25.16b 2074 1.5 christos aese v26.16b,v17.16b 2075 1.5 christos aesmc v26.16b,v26.16b 2076 1.5 christos ld1 {v17.4s},[x7],#16 2077 1.5 christos b.gt .Loop5x_xts_enc 2078 1.5 christos 2079 1.5 christos aese v0.16b,v16.16b 2080 1.5 christos aesmc v0.16b,v0.16b 2081 1.5 christos aese v1.16b,v16.16b 2082 1.5 christos aesmc v1.16b,v1.16b 2083 1.5 christos aese v24.16b,v16.16b 2084 1.5 christos aesmc v24.16b,v24.16b 2085 1.5 christos aese v25.16b,v16.16b 2086 1.5 christos aesmc v25.16b,v25.16b 2087 1.5 christos aese v26.16b,v16.16b 2088 1.5 christos aesmc v26.16b,v26.16b 2089 1.5 christos subs x2,x2,#0x50 // because .Lxts_enc_tail4x 2090 1.5 christos 2091 1.5 christos aese v0.16b,v17.16b 2092 1.5 christos aesmc v0.16b,v0.16b 2093 1.5 christos aese v1.16b,v17.16b 2094 1.5 christos aesmc v1.16b,v1.16b 2095 1.5 christos aese v24.16b,v17.16b 2096 1.5 christos aesmc v24.16b,v24.16b 2097 1.5 christos aese v25.16b,v17.16b 2098 1.5 christos aesmc v25.16b,v25.16b 2099 1.5 christos aese v26.16b,v17.16b 2100 1.5 christos aesmc v26.16b,v26.16b 2101 1.5 christos csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 2102 1.5 christos mov x7,x3 2103 1.5 christos 2104 1.5 christos aese v0.16b,v18.16b 2105 1.5 christos aesmc v0.16b,v0.16b 2106 1.5 christos aese v1.16b,v18.16b 2107 1.5 christos aesmc v1.16b,v1.16b 2108 1.5 christos aese v24.16b,v18.16b 2109 1.5 christos aesmc v24.16b,v24.16b 2110 1.5 christos aese v25.16b,v18.16b 2111 1.5 christos aesmc v25.16b,v25.16b 2112 1.5 christos aese v26.16b,v18.16b 2113 1.5 christos aesmc v26.16b,v26.16b 2114 1.5 christos add x0,x0,x6 // x0 is adjusted in such way that 2115 1.5 christos // at exit from the loop v1.16b-v26.16b 2116 1.5 christos // are loaded with last "words" 2117 1.5 christos add x6,x2,#0x60 // because .Lxts_enc_tail4x 2118 1.5 christos 2119 1.5 christos aese v0.16b,v19.16b 2120 1.5 christos aesmc v0.16b,v0.16b 2121 1.5 christos aese v1.16b,v19.16b 2122 1.5 christos aesmc v1.16b,v1.16b 2123 1.5 christos aese v24.16b,v19.16b 2124 1.5 christos aesmc v24.16b,v24.16b 2125 1.5 christos aese v25.16b,v19.16b 2126 1.5 christos aesmc v25.16b,v25.16b 2127 1.5 christos aese v26.16b,v19.16b 2128 1.5 christos aesmc v26.16b,v26.16b 2129 1.5 christos 2130 1.5 christos aese v0.16b,v20.16b 2131 1.5 christos aesmc v0.16b,v0.16b 2132 1.5 christos aese v1.16b,v20.16b 2133 1.5 christos aesmc v1.16b,v1.16b 2134 1.5 christos aese v24.16b,v20.16b 2135 1.5 christos aesmc v24.16b,v24.16b 2136 1.5 christos aese v25.16b,v20.16b 2137 1.5 christos aesmc v25.16b,v25.16b 2138 1.5 christos aese v26.16b,v20.16b 2139 1.5 christos aesmc v26.16b,v26.16b 2140 1.5 christos 2141 1.5 christos aese v0.16b,v21.16b 2142 1.5 christos aesmc v0.16b,v0.16b 2143 1.5 christos aese v1.16b,v21.16b 2144 1.5 christos aesmc v1.16b,v1.16b 2145 1.5 christos aese v24.16b,v21.16b 2146 1.5 christos aesmc v24.16b,v24.16b 2147 1.5 christos aese v25.16b,v21.16b 2148 1.5 christos aesmc v25.16b,v25.16b 2149 1.5 christos aese v26.16b,v21.16b 2150 1.5 christos aesmc v26.16b,v26.16b 2151 1.5 christos 2152 1.5 christos aese v0.16b,v22.16b 2153 1.5 christos aesmc v0.16b,v0.16b 2154 1.5 christos aese v1.16b,v22.16b 2155 1.5 christos aesmc v1.16b,v1.16b 2156 1.5 christos aese v24.16b,v22.16b 2157 1.5 christos aesmc v24.16b,v24.16b 2158 1.5 christos aese v25.16b,v22.16b 2159 1.5 christos aesmc v25.16b,v25.16b 2160 1.5 christos aese v26.16b,v22.16b 2161 1.5 christos aesmc v26.16b,v26.16b 2162 1.5 christos 2163 1.5 christos eor v4.16b,v7.16b,v6.16b 2164 1.5 christos aese v0.16b,v23.16b 2165 1.5 christos // The iv for first block of one iteration 2166 1.5 christos extr x22,x10,x10,#32 2167 1.5 christos extr x10,x10,x9,#63 2168 1.5 christos and w11,w19,w22,asr#31 2169 1.5 christos eor x9,x11,x9,lsl#1 2170 1.5 christos fmov d6,x9 2171 1.5 christos fmov v6.d[1],x10 2172 1.5 christos eor v5.16b,v7.16b,v8.16b 2173 1.5 christos ld1 {v2.16b},[x0],#16 2174 1.5 christos aese v1.16b,v23.16b 2175 1.5 christos // The iv for second block 2176 1.5 christos extr x22,x10,x10,#32 2177 1.5 christos extr x10,x10,x9,#63 2178 1.5 christos and w11,w19,w22,asr#31 2179 1.5 christos eor x9,x11,x9,lsl#1 2180 1.5 christos fmov d8,x9 2181 1.5 christos fmov v8.d[1],x10 2182 1.5 christos eor v17.16b,v7.16b,v9.16b 2183 1.5 christos ld1 {v3.16b},[x0],#16 2184 1.5 christos aese v24.16b,v23.16b 2185 1.5 christos // The iv for third block 2186 1.5 christos extr x22,x10,x10,#32 2187 1.5 christos extr x10,x10,x9,#63 2188 1.5 christos and w11,w19,w22,asr#31 2189 1.5 christos eor x9,x11,x9,lsl#1 2190 1.5 christos fmov d9,x9 2191 1.5 christos fmov v9.d[1],x10 2192 1.5 christos eor v30.16b,v7.16b,v10.16b 2193 1.5 christos ld1 {v27.16b},[x0],#16 2194 1.5 christos aese v25.16b,v23.16b 2195 1.5 christos // The iv for fourth block 2196 1.5 christos extr x22,x10,x10,#32 2197 1.5 christos extr x10,x10,x9,#63 2198 1.5 christos and w11,w19,w22,asr#31 2199 1.5 christos eor x9,x11,x9,lsl#1 2200 1.5 christos fmov d10,x9 2201 1.5 christos fmov v10.d[1],x10 2202 1.5 christos eor v31.16b,v7.16b,v11.16b 2203 1.5 christos ld1 {v28.16b},[x0],#16 2204 1.5 christos aese v26.16b,v23.16b 2205 1.5 christos 2206 1.5 christos // The iv for fifth block 2207 1.5 christos extr x22,x10,x10,#32 2208 1.5 christos extr x10,x10,x9,#63 2209 1.5 christos and w11,w19,w22,asr #31 2210 1.5 christos eor x9,x11,x9,lsl #1 2211 1.5 christos fmov d11,x9 2212 1.5 christos fmov v11.d[1],x10 2213 1.5 christos 2214 1.5 christos ld1 {v29.16b},[x0],#16 2215 1.5 christos cbz x6,.Lxts_enc_tail4x 2216 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 2217 1.5 christos eor v4.16b,v4.16b,v0.16b 2218 1.5 christos eor v0.16b,v2.16b,v6.16b 2219 1.5 christos eor v5.16b,v5.16b,v1.16b 2220 1.5 christos eor v1.16b,v3.16b,v8.16b 2221 1.5 christos eor v17.16b,v17.16b,v24.16b 2222 1.5 christos eor v24.16b,v27.16b,v9.16b 2223 1.5 christos eor v30.16b,v30.16b,v25.16b 2224 1.5 christos eor v25.16b,v28.16b,v10.16b 2225 1.5 christos eor v31.16b,v31.16b,v26.16b 2226 1.5 christos st1 {v4.16b},[x1],#16 2227 1.5 christos eor v26.16b,v29.16b,v11.16b 2228 1.5 christos st1 {v5.16b},[x1],#16 2229 1.5 christos mov w6,w5 2230 1.5 christos st1 {v17.16b},[x1],#16 2231 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 2232 1.5 christos st1 {v30.16b},[x1],#16 2233 1.5 christos st1 {v31.16b},[x1],#16 2234 1.5 christos b.hs .Loop5x_xts_enc 2235 1.5 christos 2236 1.5 christos 2237 1.5 christos // If left 4 blocks, borrow the five block's processing. 2238 1.5 christos cmn x2,#0x10 2239 1.5 christos b.ne .Loop5x_enc_after 2240 1.5 christos orr v11.16b,v10.16b,v10.16b 2241 1.5 christos orr v10.16b,v9.16b,v9.16b 2242 1.5 christos orr v9.16b,v8.16b,v8.16b 2243 1.5 christos orr v8.16b,v6.16b,v6.16b 2244 1.5 christos fmov x9,d11 2245 1.5 christos fmov x10,v11.d[1] 2246 1.5 christos eor v0.16b,v6.16b,v2.16b 2247 1.5 christos eor v1.16b,v8.16b,v3.16b 2248 1.5 christos eor v24.16b,v27.16b,v9.16b 2249 1.5 christos eor v25.16b,v28.16b,v10.16b 2250 1.5 christos eor v26.16b,v29.16b,v11.16b 2251 1.5 christos b.eq .Loop5x_xts_enc 2252 1.5 christos 2253 1.5 christos .Loop5x_enc_after: 2254 1.5 christos add x2,x2,#0x50 2255 1.5 christos cbz x2,.Lxts_enc_done 2256 1.5 christos 2257 1.5 christos add w6,w5,#2 2258 1.5 christos subs x2,x2,#0x30 2259 1.5 christos b.lo .Lxts_inner_enc_tail 2260 1.5 christos 2261 1.5 christos eor v0.16b,v6.16b,v27.16b 2262 1.5 christos eor v1.16b,v8.16b,v28.16b 2263 1.5 christos eor v24.16b,v29.16b,v9.16b 2264 1.5 christos b .Lxts_outer_enc_tail 2265 1.5 christos 2266 1.5 christos .align 4 2267 1.5 christos .Lxts_enc_tail4x: 2268 1.5 christos add x0,x0,#16 2269 1.5 christos eor v5.16b,v1.16b,v5.16b 2270 1.5 christos st1 {v5.16b},[x1],#16 2271 1.5 christos eor v17.16b,v24.16b,v17.16b 2272 1.5 christos st1 {v17.16b},[x1],#16 2273 1.5 christos eor v30.16b,v25.16b,v30.16b 2274 1.5 christos eor v31.16b,v26.16b,v31.16b 2275 1.5 christos st1 {v30.16b,v31.16b},[x1],#32 2276 1.5 christos 2277 1.5 christos b .Lxts_enc_done 2278 1.5 christos .align 4 2279 1.5 christos .Lxts_outer_enc_tail: 2280 1.5 christos aese v0.16b,v16.16b 2281 1.5 christos aesmc v0.16b,v0.16b 2282 1.5 christos aese v1.16b,v16.16b 2283 1.5 christos aesmc v1.16b,v1.16b 2284 1.5 christos aese v24.16b,v16.16b 2285 1.5 christos aesmc v24.16b,v24.16b 2286 1.5 christos ld1 {v16.4s},[x7],#16 2287 1.5 christos subs w6,w6,#2 2288 1.5 christos aese v0.16b,v17.16b 2289 1.5 christos aesmc v0.16b,v0.16b 2290 1.5 christos aese v1.16b,v17.16b 2291 1.5 christos aesmc v1.16b,v1.16b 2292 1.5 christos aese v24.16b,v17.16b 2293 1.5 christos aesmc v24.16b,v24.16b 2294 1.5 christos ld1 {v17.4s},[x7],#16 2295 1.5 christos b.gt .Lxts_outer_enc_tail 2296 1.5 christos 2297 1.5 christos aese v0.16b,v16.16b 2298 1.5 christos aesmc v0.16b,v0.16b 2299 1.5 christos aese v1.16b,v16.16b 2300 1.5 christos aesmc v1.16b,v1.16b 2301 1.5 christos aese v24.16b,v16.16b 2302 1.5 christos aesmc v24.16b,v24.16b 2303 1.5 christos eor v4.16b,v6.16b,v7.16b 2304 1.5 christos subs x2,x2,#0x30 2305 1.5 christos // The iv for first block 2306 1.5 christos fmov x9,d9 2307 1.5 christos fmov x10,v9.d[1] 2308 1.5 christos //mov w19,#0x87 2309 1.5 christos extr x22,x10,x10,#32 2310 1.5 christos extr x10,x10,x9,#63 2311 1.5 christos and w11,w19,w22,asr#31 2312 1.5 christos eor x9,x11,x9,lsl#1 2313 1.5 christos fmov d6,x9 2314 1.5 christos fmov v6.d[1],x10 2315 1.5 christos eor v5.16b,v8.16b,v7.16b 2316 1.5 christos csel x6,x2,x6,lo // x6, w6, is zero at this point 2317 1.5 christos aese v0.16b,v17.16b 2318 1.5 christos aesmc v0.16b,v0.16b 2319 1.5 christos aese v1.16b,v17.16b 2320 1.5 christos aesmc v1.16b,v1.16b 2321 1.5 christos aese v24.16b,v17.16b 2322 1.5 christos aesmc v24.16b,v24.16b 2323 1.5 christos eor v17.16b,v9.16b,v7.16b 2324 1.5 christos 2325 1.5 christos add x6,x6,#0x20 2326 1.5 christos add x0,x0,x6 2327 1.5 christos mov x7,x3 2328 1.5 christos 2329 1.5 christos aese v0.16b,v20.16b 2330 1.5 christos aesmc v0.16b,v0.16b 2331 1.5 christos aese v1.16b,v20.16b 2332 1.5 christos aesmc v1.16b,v1.16b 2333 1.5 christos aese v24.16b,v20.16b 2334 1.5 christos aesmc v24.16b,v24.16b 2335 1.5 christos aese v0.16b,v21.16b 2336 1.5 christos aesmc v0.16b,v0.16b 2337 1.5 christos aese v1.16b,v21.16b 2338 1.5 christos aesmc v1.16b,v1.16b 2339 1.5 christos aese v24.16b,v21.16b 2340 1.5 christos aesmc v24.16b,v24.16b 2341 1.5 christos aese v0.16b,v22.16b 2342 1.5 christos aesmc v0.16b,v0.16b 2343 1.5 christos aese v1.16b,v22.16b 2344 1.5 christos aesmc v1.16b,v1.16b 2345 1.5 christos aese v24.16b,v22.16b 2346 1.5 christos aesmc v24.16b,v24.16b 2347 1.5 christos aese v0.16b,v23.16b 2348 1.5 christos aese v1.16b,v23.16b 2349 1.5 christos aese v24.16b,v23.16b 2350 1.5 christos ld1 {v27.16b},[x0],#16 2351 1.5 christos add w6,w5,#2 2352 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 2353 1.5 christos eor v4.16b,v4.16b,v0.16b 2354 1.5 christos eor v5.16b,v5.16b,v1.16b 2355 1.5 christos eor v24.16b,v24.16b,v17.16b 2356 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 2357 1.5 christos st1 {v4.16b},[x1],#16 2358 1.5 christos st1 {v5.16b},[x1],#16 2359 1.5 christos st1 {v24.16b},[x1],#16 2360 1.5 christos cmn x2,#0x30 2361 1.5 christos b.eq .Lxts_enc_done 2362 1.5 christos .Lxts_encxor_one: 2363 1.5 christos orr v28.16b,v3.16b,v3.16b 2364 1.5 christos orr v29.16b,v27.16b,v27.16b 2365 1.5 christos nop 2366 1.5 christos 2367 1.5 christos .Lxts_inner_enc_tail: 2368 1.5 christos cmn x2,#0x10 2369 1.5 christos eor v1.16b,v28.16b,v6.16b 2370 1.5 christos eor v24.16b,v29.16b,v8.16b 2371 1.5 christos b.eq .Lxts_enc_tail_loop 2372 1.5 christos eor v24.16b,v29.16b,v6.16b 2373 1.5 christos .Lxts_enc_tail_loop: 2374 1.5 christos aese v1.16b,v16.16b 2375 1.5 christos aesmc v1.16b,v1.16b 2376 1.5 christos aese v24.16b,v16.16b 2377 1.5 christos aesmc v24.16b,v24.16b 2378 1.5 christos ld1 {v16.4s},[x7],#16 2379 1.5 christos subs w6,w6,#2 2380 1.5 christos aese v1.16b,v17.16b 2381 1.5 christos aesmc v1.16b,v1.16b 2382 1.5 christos aese v24.16b,v17.16b 2383 1.5 christos aesmc v24.16b,v24.16b 2384 1.5 christos ld1 {v17.4s},[x7],#16 2385 1.5 christos b.gt .Lxts_enc_tail_loop 2386 1.5 christos 2387 1.5 christos aese v1.16b,v16.16b 2388 1.5 christos aesmc v1.16b,v1.16b 2389 1.5 christos aese v24.16b,v16.16b 2390 1.5 christos aesmc v24.16b,v24.16b 2391 1.5 christos aese v1.16b,v17.16b 2392 1.5 christos aesmc v1.16b,v1.16b 2393 1.5 christos aese v24.16b,v17.16b 2394 1.5 christos aesmc v24.16b,v24.16b 2395 1.5 christos aese v1.16b,v20.16b 2396 1.5 christos aesmc v1.16b,v1.16b 2397 1.5 christos aese v24.16b,v20.16b 2398 1.5 christos aesmc v24.16b,v24.16b 2399 1.5 christos cmn x2,#0x20 2400 1.5 christos aese v1.16b,v21.16b 2401 1.5 christos aesmc v1.16b,v1.16b 2402 1.5 christos aese v24.16b,v21.16b 2403 1.5 christos aesmc v24.16b,v24.16b 2404 1.5 christos eor v5.16b,v6.16b,v7.16b 2405 1.5 christos aese v1.16b,v22.16b 2406 1.5 christos aesmc v1.16b,v1.16b 2407 1.5 christos aese v24.16b,v22.16b 2408 1.5 christos aesmc v24.16b,v24.16b 2409 1.5 christos eor v17.16b,v8.16b,v7.16b 2410 1.5 christos aese v1.16b,v23.16b 2411 1.5 christos aese v24.16b,v23.16b 2412 1.5 christos b.eq .Lxts_enc_one 2413 1.5 christos eor v5.16b,v5.16b,v1.16b 2414 1.5 christos st1 {v5.16b},[x1],#16 2415 1.5 christos eor v17.16b,v17.16b,v24.16b 2416 1.5 christos orr v6.16b,v8.16b,v8.16b 2417 1.5 christos st1 {v17.16b},[x1],#16 2418 1.5 christos fmov x9,d8 2419 1.5 christos fmov x10,v8.d[1] 2420 1.5 christos mov w19,#0x87 2421 1.5 christos extr x22,x10,x10,#32 2422 1.5 christos extr x10,x10,x9,#63 2423 1.5 christos and w11,w19,w22,asr #31 2424 1.5 christos eor x9,x11,x9,lsl #1 2425 1.5 christos fmov d6,x9 2426 1.5 christos fmov v6.d[1],x10 2427 1.5 christos b .Lxts_enc_done 2428 1.5 christos 2429 1.5 christos .Lxts_enc_one: 2430 1.5 christos eor v5.16b,v5.16b,v24.16b 2431 1.5 christos orr v6.16b,v6.16b,v6.16b 2432 1.5 christos st1 {v5.16b},[x1],#16 2433 1.5 christos fmov x9,d6 2434 1.5 christos fmov x10,v6.d[1] 2435 1.5 christos mov w19,#0x87 2436 1.5 christos extr x22,x10,x10,#32 2437 1.5 christos extr x10,x10,x9,#63 2438 1.5 christos and w11,w19,w22,asr #31 2439 1.5 christos eor x9,x11,x9,lsl #1 2440 1.5 christos fmov d6,x9 2441 1.5 christos fmov v6.d[1],x10 2442 1.5 christos b .Lxts_enc_done 2443 1.5 christos .align 5 2444 1.5 christos .Lxts_enc_done: 2445 1.5 christos // Process the tail block with cipher stealing. 2446 1.5 christos tst x21,#0xf 2447 1.5 christos b.eq .Lxts_abort 2448 1.5 christos 2449 1.5 christos mov x20,x0 2450 1.5 christos mov x13,x1 2451 1.5 christos sub x1,x1,#16 2452 1.5 christos .composite_enc_loop: 2453 1.5 christos subs x21,x21,#1 2454 1.5 christos ldrb w15,[x1,x21] 2455 1.5 christos ldrb w14,[x20,x21] 2456 1.5 christos strb w15,[x13,x21] 2457 1.5 christos strb w14,[x1,x21] 2458 1.5 christos b.gt .composite_enc_loop 2459 1.5 christos .Lxts_enc_load_done: 2460 1.5 christos ld1 {v26.16b},[x1] 2461 1.5 christos eor v26.16b,v26.16b,v6.16b 2462 1.5 christos 2463 1.5 christos // Encrypt the composite block to get the last second encrypted text block 2464 1.5 christos ldr w6,[x3,#240] // load key schedule... 2465 1.6 christos ld1 {v0.4s},[x3],#16 2466 1.5 christos sub w6,w6,#2 2467 1.6 christos ld1 {v1.4s},[x3],#16 // load key schedule... 2468 1.5 christos .Loop_final_enc: 2469 1.5 christos aese v26.16b,v0.16b 2470 1.5 christos aesmc v26.16b,v26.16b 2471 1.5 christos ld1 {v0.4s},[x3],#16 2472 1.5 christos subs w6,w6,#2 2473 1.5 christos aese v26.16b,v1.16b 2474 1.5 christos aesmc v26.16b,v26.16b 2475 1.5 christos ld1 {v1.4s},[x3],#16 2476 1.5 christos b.gt .Loop_final_enc 2477 1.5 christos 2478 1.5 christos aese v26.16b,v0.16b 2479 1.5 christos aesmc v26.16b,v26.16b 2480 1.5 christos ld1 {v0.4s},[x3] 2481 1.5 christos aese v26.16b,v1.16b 2482 1.5 christos eor v26.16b,v26.16b,v0.16b 2483 1.5 christos eor v26.16b,v26.16b,v6.16b 2484 1.5 christos st1 {v26.16b},[x1] 2485 1.5 christos 2486 1.5 christos .Lxts_abort: 2487 1.5 christos ldp x21,x22,[sp,#48] 2488 1.5 christos ldp d8,d9,[sp,#32] 2489 1.5 christos ldp d10,d11,[sp,#16] 2490 1.5 christos ldp x19,x20,[sp],#64 2491 1.5 christos .Lxts_enc_final_abort: 2492 1.5 christos ret 2493 1.5 christos .size aes_v8_xts_encrypt,.-aes_v8_xts_encrypt 2494 1.5 christos .globl aes_v8_xts_decrypt 2495 1.5 christos .type aes_v8_xts_decrypt,%function 2496 1.5 christos .align 5 2497 1.5 christos aes_v8_xts_decrypt: 2498 1.5 christos cmp x2,#16 2499 1.5 christos // Original input data size bigger than 16, jump to big size processing. 2500 1.5 christos b.ne .Lxts_dec_big_size 2501 1.5 christos // Encrypt the iv with key2, as the first XEX iv. 2502 1.5 christos ldr w6,[x4,#240] 2503 1.6 christos ld1 {v0.4s},[x4],#16 2504 1.5 christos ld1 {v6.16b},[x5] 2505 1.5 christos sub w6,w6,#2 2506 1.6 christos ld1 {v1.4s},[x4],#16 2507 1.5 christos 2508 1.5 christos .Loop_dec_small_iv_enc: 2509 1.5 christos aese v6.16b,v0.16b 2510 1.5 christos aesmc v6.16b,v6.16b 2511 1.5 christos ld1 {v0.4s},[x4],#16 2512 1.5 christos subs w6,w6,#2 2513 1.5 christos aese v6.16b,v1.16b 2514 1.5 christos aesmc v6.16b,v6.16b 2515 1.5 christos ld1 {v1.4s},[x4],#16 2516 1.5 christos b.gt .Loop_dec_small_iv_enc 2517 1.5 christos 2518 1.5 christos aese v6.16b,v0.16b 2519 1.5 christos aesmc v6.16b,v6.16b 2520 1.5 christos ld1 {v0.4s},[x4] 2521 1.5 christos aese v6.16b,v1.16b 2522 1.5 christos eor v6.16b,v6.16b,v0.16b 2523 1.5 christos 2524 1.5 christos ld1 {v0.16b},[x0] 2525 1.5 christos eor v0.16b,v6.16b,v0.16b 2526 1.5 christos 2527 1.5 christos ldr w6,[x3,#240] 2528 1.5 christos ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... 2529 1.5 christos 2530 1.5 christos aesd v0.16b,v28.16b 2531 1.5 christos aesimc v0.16b,v0.16b 2532 1.5 christos ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... 2533 1.5 christos aesd v0.16b,v29.16b 2534 1.5 christos aesimc v0.16b,v0.16b 2535 1.5 christos subs w6,w6,#10 // bias 2536 1.5 christos b.eq .Lxts_128_dec 2537 1.5 christos .Lxts_dec_round_loop: 2538 1.5 christos aesd v0.16b,v16.16b 2539 1.5 christos aesimc v0.16b,v0.16b 2540 1.5 christos ld1 {v16.4s},[x3],#16 // load key schedule... 2541 1.5 christos aesd v0.16b,v17.16b 2542 1.5 christos aesimc v0.16b,v0.16b 2543 1.5 christos ld1 {v17.4s},[x3],#16 // load key schedule... 2544 1.5 christos subs w6,w6,#2 // bias 2545 1.5 christos b.gt .Lxts_dec_round_loop 2546 1.5 christos .Lxts_128_dec: 2547 1.5 christos ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... 2548 1.5 christos aesd v0.16b,v16.16b 2549 1.5 christos aesimc v0.16b,v0.16b 2550 1.5 christos aesd v0.16b,v17.16b 2551 1.5 christos aesimc v0.16b,v0.16b 2552 1.5 christos ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... 2553 1.5 christos aesd v0.16b,v18.16b 2554 1.5 christos aesimc v0.16b,v0.16b 2555 1.5 christos aesd v0.16b,v19.16b 2556 1.5 christos aesimc v0.16b,v0.16b 2557 1.5 christos ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... 2558 1.5 christos aesd v0.16b,v20.16b 2559 1.5 christos aesimc v0.16b,v0.16b 2560 1.5 christos aesd v0.16b,v21.16b 2561 1.5 christos aesimc v0.16b,v0.16b 2562 1.5 christos ld1 {v7.4s},[x3] 2563 1.5 christos aesd v0.16b,v22.16b 2564 1.5 christos aesimc v0.16b,v0.16b 2565 1.5 christos aesd v0.16b,v23.16b 2566 1.5 christos eor v0.16b,v0.16b,v7.16b 2567 1.5 christos eor v0.16b,v6.16b,v0.16b 2568 1.5 christos st1 {v0.16b},[x1] 2569 1.5 christos b .Lxts_dec_final_abort 2570 1.5 christos .Lxts_dec_big_size: 2571 1.5 christos stp x19,x20,[sp,#-64]! 2572 1.5 christos stp x21,x22,[sp,#48] 2573 1.5 christos stp d8,d9,[sp,#32] 2574 1.5 christos stp d10,d11,[sp,#16] 2575 1.5 christos 2576 1.5 christos and x21,x2,#0xf 2577 1.5 christos and x2,x2,#-16 2578 1.5 christos subs x2,x2,#16 2579 1.5 christos mov x8,#16 2580 1.5 christos b.lo .Lxts_dec_abort 2581 1.5 christos 2582 1.5 christos // Encrypt the iv with key2, as the first XEX iv 2583 1.5 christos ldr w6,[x4,#240] 2584 1.6 christos ld1 {v0.4s},[x4],#16 2585 1.5 christos ld1 {v6.16b},[x5] 2586 1.5 christos sub w6,w6,#2 2587 1.6 christos ld1 {v1.4s},[x4],#16 2588 1.5 christos 2589 1.5 christos .Loop_dec_iv_enc: 2590 1.5 christos aese v6.16b,v0.16b 2591 1.5 christos aesmc v6.16b,v6.16b 2592 1.5 christos ld1 {v0.4s},[x4],#16 2593 1.5 christos subs w6,w6,#2 2594 1.5 christos aese v6.16b,v1.16b 2595 1.5 christos aesmc v6.16b,v6.16b 2596 1.5 christos ld1 {v1.4s},[x4],#16 2597 1.5 christos b.gt .Loop_dec_iv_enc 2598 1.5 christos 2599 1.5 christos aese v6.16b,v0.16b 2600 1.5 christos aesmc v6.16b,v6.16b 2601 1.5 christos ld1 {v0.4s},[x4] 2602 1.5 christos aese v6.16b,v1.16b 2603 1.5 christos eor v6.16b,v6.16b,v0.16b 2604 1.5 christos 2605 1.5 christos // The iv for second block 2606 1.5 christos // x9- iv(low), x10 - iv(high) 2607 1.5 christos // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b 2608 1.5 christos fmov x9,d6 2609 1.5 christos fmov x10,v6.d[1] 2610 1.5 christos mov w19,#0x87 2611 1.5 christos extr x22,x10,x10,#32 2612 1.5 christos extr x10,x10,x9,#63 2613 1.5 christos and w11,w19,w22,asr #31 2614 1.5 christos eor x9,x11,x9,lsl #1 2615 1.5 christos fmov d8,x9 2616 1.5 christos fmov v8.d[1],x10 2617 1.5 christos 2618 1.5 christos ldr w5,[x3,#240] // load rounds number 2619 1.5 christos 2620 1.5 christos // The iv for third block 2621 1.5 christos extr x22,x10,x10,#32 2622 1.5 christos extr x10,x10,x9,#63 2623 1.5 christos and w11,w19,w22,asr #31 2624 1.5 christos eor x9,x11,x9,lsl #1 2625 1.5 christos fmov d9,x9 2626 1.5 christos fmov v9.d[1],x10 2627 1.5 christos 2628 1.5 christos ld1 {v16.4s,v17.4s},[x3] // load key schedule... 2629 1.5 christos sub w5,w5,#6 2630 1.5 christos add x7,x3,x5,lsl#4 // pointer to last 7 round keys 2631 1.5 christos sub w5,w5,#2 2632 1.5 christos ld1 {v18.4s,v19.4s},[x7],#32 // load key schedule... 2633 1.5 christos ld1 {v20.4s,v21.4s},[x7],#32 2634 1.5 christos ld1 {v22.4s,v23.4s},[x7],#32 2635 1.5 christos ld1 {v7.4s},[x7] 2636 1.5 christos 2637 1.5 christos // The iv for fourth block 2638 1.5 christos extr x22,x10,x10,#32 2639 1.5 christos extr x10,x10,x9,#63 2640 1.5 christos and w11,w19,w22,asr #31 2641 1.5 christos eor x9,x11,x9,lsl #1 2642 1.5 christos fmov d10,x9 2643 1.5 christos fmov v10.d[1],x10 2644 1.5 christos 2645 1.5 christos add x7,x3,#32 2646 1.5 christos mov w6,w5 2647 1.5 christos b .Lxts_dec 2648 1.5 christos 2649 1.5 christos // Decryption 2650 1.5 christos .align 5 2651 1.5 christos .Lxts_dec: 2652 1.5 christos tst x21,#0xf 2653 1.5 christos b.eq .Lxts_dec_begin 2654 1.5 christos subs x2,x2,#16 2655 1.5 christos csel x8,xzr,x8,eq 2656 1.5 christos ld1 {v0.16b},[x0],#16 2657 1.5 christos b.lo .Lxts_done 2658 1.5 christos sub x0,x0,#16 2659 1.5 christos .Lxts_dec_begin: 2660 1.5 christos ld1 {v0.16b},[x0],x8 2661 1.5 christos subs x2,x2,#32 // bias 2662 1.5 christos add w6,w5,#2 2663 1.5 christos orr v3.16b,v0.16b,v0.16b 2664 1.5 christos orr v1.16b,v0.16b,v0.16b 2665 1.5 christos orr v28.16b,v0.16b,v0.16b 2666 1.5 christos ld1 {v24.16b},[x0],#16 2667 1.5 christos orr v27.16b,v24.16b,v24.16b 2668 1.5 christos orr v29.16b,v24.16b,v24.16b 2669 1.5 christos b.lo .Lxts_inner_dec_tail 2670 1.5 christos eor v0.16b,v0.16b,v6.16b // before decryt, xor with iv 2671 1.5 christos eor v24.16b,v24.16b,v8.16b 2672 1.5 christos 2673 1.5 christos orr v1.16b,v24.16b,v24.16b 2674 1.5 christos ld1 {v24.16b},[x0],#16 2675 1.5 christos orr v2.16b,v0.16b,v0.16b 2676 1.5 christos orr v3.16b,v1.16b,v1.16b 2677 1.5 christos eor v27.16b,v24.16b,v9.16b // third block xox with third iv 2678 1.5 christos eor v24.16b,v24.16b,v9.16b 2679 1.5 christos cmp x2,#32 2680 1.5 christos b.lo .Lxts_outer_dec_tail 2681 1.5 christos 2682 1.5 christos ld1 {v25.16b},[x0],#16 2683 1.5 christos 2684 1.5 christos // The iv for fifth block 2685 1.5 christos extr x22,x10,x10,#32 2686 1.5 christos extr x10,x10,x9,#63 2687 1.5 christos and w11,w19,w22,asr #31 2688 1.5 christos eor x9,x11,x9,lsl #1 2689 1.5 christos fmov d11,x9 2690 1.5 christos fmov v11.d[1],x10 2691 1.5 christos 2692 1.5 christos ld1 {v26.16b},[x0],#16 2693 1.5 christos eor v25.16b,v25.16b,v10.16b // the fourth block 2694 1.5 christos eor v26.16b,v26.16b,v11.16b 2695 1.5 christos sub x2,x2,#32 // bias 2696 1.5 christos mov w6,w5 2697 1.5 christos b .Loop5x_xts_dec 2698 1.5 christos 2699 1.5 christos .align 4 2700 1.5 christos .Loop5x_xts_dec: 2701 1.5 christos aesd v0.16b,v16.16b 2702 1.5 christos aesimc v0.16b,v0.16b 2703 1.5 christos aesd v1.16b,v16.16b 2704 1.5 christos aesimc v1.16b,v1.16b 2705 1.5 christos aesd v24.16b,v16.16b 2706 1.5 christos aesimc v24.16b,v24.16b 2707 1.5 christos aesd v25.16b,v16.16b 2708 1.5 christos aesimc v25.16b,v25.16b 2709 1.5 christos aesd v26.16b,v16.16b 2710 1.5 christos aesimc v26.16b,v26.16b 2711 1.5 christos ld1 {v16.4s},[x7],#16 // load key schedule... 2712 1.5 christos subs w6,w6,#2 2713 1.5 christos aesd v0.16b,v17.16b 2714 1.5 christos aesimc v0.16b,v0.16b 2715 1.5 christos aesd v1.16b,v17.16b 2716 1.5 christos aesimc v1.16b,v1.16b 2717 1.5 christos aesd v24.16b,v17.16b 2718 1.5 christos aesimc v24.16b,v24.16b 2719 1.5 christos aesd v25.16b,v17.16b 2720 1.5 christos aesimc v25.16b,v25.16b 2721 1.5 christos aesd v26.16b,v17.16b 2722 1.5 christos aesimc v26.16b,v26.16b 2723 1.5 christos ld1 {v17.4s},[x7],#16 // load key schedule... 2724 1.5 christos b.gt .Loop5x_xts_dec 2725 1.5 christos 2726 1.5 christos aesd v0.16b,v16.16b 2727 1.5 christos aesimc v0.16b,v0.16b 2728 1.5 christos aesd v1.16b,v16.16b 2729 1.5 christos aesimc v1.16b,v1.16b 2730 1.5 christos aesd v24.16b,v16.16b 2731 1.5 christos aesimc v24.16b,v24.16b 2732 1.5 christos aesd v25.16b,v16.16b 2733 1.5 christos aesimc v25.16b,v25.16b 2734 1.5 christos aesd v26.16b,v16.16b 2735 1.5 christos aesimc v26.16b,v26.16b 2736 1.5 christos subs x2,x2,#0x50 // because .Lxts_dec_tail4x 2737 1.5 christos 2738 1.5 christos aesd v0.16b,v17.16b 2739 1.5 christos aesimc v0.16b,v0.16b 2740 1.5 christos aesd v1.16b,v17.16b 2741 1.5 christos aesimc v1.16b,v1.16b 2742 1.5 christos aesd v24.16b,v17.16b 2743 1.5 christos aesimc v24.16b,v24.16b 2744 1.5 christos aesd v25.16b,v17.16b 2745 1.5 christos aesimc v25.16b,v25.16b 2746 1.5 christos aesd v26.16b,v17.16b 2747 1.5 christos aesimc v26.16b,v26.16b 2748 1.5 christos csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo 2749 1.5 christos mov x7,x3 2750 1.5 christos 2751 1.5 christos aesd v0.16b,v18.16b 2752 1.5 christos aesimc v0.16b,v0.16b 2753 1.5 christos aesd v1.16b,v18.16b 2754 1.5 christos aesimc v1.16b,v1.16b 2755 1.5 christos aesd v24.16b,v18.16b 2756 1.5 christos aesimc v24.16b,v24.16b 2757 1.5 christos aesd v25.16b,v18.16b 2758 1.5 christos aesimc v25.16b,v25.16b 2759 1.5 christos aesd v26.16b,v18.16b 2760 1.5 christos aesimc v26.16b,v26.16b 2761 1.5 christos add x0,x0,x6 // x0 is adjusted in such way that 2762 1.5 christos // at exit from the loop v1.16b-v26.16b 2763 1.5 christos // are loaded with last "words" 2764 1.5 christos add x6,x2,#0x60 // because .Lxts_dec_tail4x 2765 1.5 christos 2766 1.5 christos aesd v0.16b,v19.16b 2767 1.5 christos aesimc v0.16b,v0.16b 2768 1.5 christos aesd v1.16b,v19.16b 2769 1.5 christos aesimc v1.16b,v1.16b 2770 1.5 christos aesd v24.16b,v19.16b 2771 1.5 christos aesimc v24.16b,v24.16b 2772 1.5 christos aesd v25.16b,v19.16b 2773 1.5 christos aesimc v25.16b,v25.16b 2774 1.5 christos aesd v26.16b,v19.16b 2775 1.5 christos aesimc v26.16b,v26.16b 2776 1.5 christos 2777 1.5 christos aesd v0.16b,v20.16b 2778 1.5 christos aesimc v0.16b,v0.16b 2779 1.5 christos aesd v1.16b,v20.16b 2780 1.5 christos aesimc v1.16b,v1.16b 2781 1.5 christos aesd v24.16b,v20.16b 2782 1.5 christos aesimc v24.16b,v24.16b 2783 1.5 christos aesd v25.16b,v20.16b 2784 1.5 christos aesimc v25.16b,v25.16b 2785 1.5 christos aesd v26.16b,v20.16b 2786 1.5 christos aesimc v26.16b,v26.16b 2787 1.5 christos 2788 1.5 christos aesd v0.16b,v21.16b 2789 1.5 christos aesimc v0.16b,v0.16b 2790 1.5 christos aesd v1.16b,v21.16b 2791 1.5 christos aesimc v1.16b,v1.16b 2792 1.5 christos aesd v24.16b,v21.16b 2793 1.5 christos aesimc v24.16b,v24.16b 2794 1.5 christos aesd v25.16b,v21.16b 2795 1.5 christos aesimc v25.16b,v25.16b 2796 1.5 christos aesd v26.16b,v21.16b 2797 1.5 christos aesimc v26.16b,v26.16b 2798 1.5 christos 2799 1.5 christos aesd v0.16b,v22.16b 2800 1.5 christos aesimc v0.16b,v0.16b 2801 1.5 christos aesd v1.16b,v22.16b 2802 1.5 christos aesimc v1.16b,v1.16b 2803 1.5 christos aesd v24.16b,v22.16b 2804 1.5 christos aesimc v24.16b,v24.16b 2805 1.5 christos aesd v25.16b,v22.16b 2806 1.5 christos aesimc v25.16b,v25.16b 2807 1.5 christos aesd v26.16b,v22.16b 2808 1.5 christos aesimc v26.16b,v26.16b 2809 1.5 christos 2810 1.5 christos eor v4.16b,v7.16b,v6.16b 2811 1.5 christos aesd v0.16b,v23.16b 2812 1.5 christos // The iv for first block of next iteration. 2813 1.5 christos extr x22,x10,x10,#32 2814 1.5 christos extr x10,x10,x9,#63 2815 1.5 christos and w11,w19,w22,asr #31 2816 1.5 christos eor x9,x11,x9,lsl #1 2817 1.5 christos fmov d6,x9 2818 1.5 christos fmov v6.d[1],x10 2819 1.5 christos eor v5.16b,v7.16b,v8.16b 2820 1.5 christos ld1 {v2.16b},[x0],#16 2821 1.5 christos aesd v1.16b,v23.16b 2822 1.5 christos // The iv for second block 2823 1.5 christos extr x22,x10,x10,#32 2824 1.5 christos extr x10,x10,x9,#63 2825 1.5 christos and w11,w19,w22,asr #31 2826 1.5 christos eor x9,x11,x9,lsl #1 2827 1.5 christos fmov d8,x9 2828 1.5 christos fmov v8.d[1],x10 2829 1.5 christos eor v17.16b,v7.16b,v9.16b 2830 1.5 christos ld1 {v3.16b},[x0],#16 2831 1.5 christos aesd v24.16b,v23.16b 2832 1.5 christos // The iv for third block 2833 1.5 christos extr x22,x10,x10,#32 2834 1.5 christos extr x10,x10,x9,#63 2835 1.5 christos and w11,w19,w22,asr #31 2836 1.5 christos eor x9,x11,x9,lsl #1 2837 1.5 christos fmov d9,x9 2838 1.5 christos fmov v9.d[1],x10 2839 1.5 christos eor v30.16b,v7.16b,v10.16b 2840 1.5 christos ld1 {v27.16b},[x0],#16 2841 1.5 christos aesd v25.16b,v23.16b 2842 1.5 christos // The iv for fourth block 2843 1.5 christos extr x22,x10,x10,#32 2844 1.5 christos extr x10,x10,x9,#63 2845 1.5 christos and w11,w19,w22,asr #31 2846 1.5 christos eor x9,x11,x9,lsl #1 2847 1.5 christos fmov d10,x9 2848 1.5 christos fmov v10.d[1],x10 2849 1.5 christos eor v31.16b,v7.16b,v11.16b 2850 1.5 christos ld1 {v28.16b},[x0],#16 2851 1.5 christos aesd v26.16b,v23.16b 2852 1.5 christos 2853 1.5 christos // The iv for fifth block 2854 1.5 christos extr x22,x10,x10,#32 2855 1.5 christos extr x10,x10,x9,#63 2856 1.5 christos and w11,w19,w22,asr #31 2857 1.5 christos eor x9,x11,x9,lsl #1 2858 1.5 christos fmov d11,x9 2859 1.5 christos fmov v11.d[1],x10 2860 1.5 christos 2861 1.5 christos ld1 {v29.16b},[x0],#16 2862 1.5 christos cbz x6,.Lxts_dec_tail4x 2863 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 2864 1.5 christos eor v4.16b,v4.16b,v0.16b 2865 1.5 christos eor v0.16b,v2.16b,v6.16b 2866 1.5 christos eor v5.16b,v5.16b,v1.16b 2867 1.5 christos eor v1.16b,v3.16b,v8.16b 2868 1.5 christos eor v17.16b,v17.16b,v24.16b 2869 1.5 christos eor v24.16b,v27.16b,v9.16b 2870 1.5 christos eor v30.16b,v30.16b,v25.16b 2871 1.5 christos eor v25.16b,v28.16b,v10.16b 2872 1.5 christos eor v31.16b,v31.16b,v26.16b 2873 1.5 christos st1 {v4.16b},[x1],#16 2874 1.5 christos eor v26.16b,v29.16b,v11.16b 2875 1.5 christos st1 {v5.16b},[x1],#16 2876 1.5 christos mov w6,w5 2877 1.5 christos st1 {v17.16b},[x1],#16 2878 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 2879 1.5 christos st1 {v30.16b},[x1],#16 2880 1.5 christos st1 {v31.16b},[x1],#16 2881 1.5 christos b.hs .Loop5x_xts_dec 2882 1.5 christos 2883 1.5 christos cmn x2,#0x10 2884 1.5 christos b.ne .Loop5x_dec_after 2885 1.5 christos // If x2(x2) equal to -0x10, the left blocks is 4. 2886 1.5 christos // After specially processing, utilize the five blocks processing again. 2887 1.5 christos // It will use the following IVs: v6.16b,v6.16b,v8.16b,v9.16b,v10.16b. 2888 1.5 christos orr v11.16b,v10.16b,v10.16b 2889 1.5 christos orr v10.16b,v9.16b,v9.16b 2890 1.5 christos orr v9.16b,v8.16b,v8.16b 2891 1.5 christos orr v8.16b,v6.16b,v6.16b 2892 1.5 christos fmov x9,d11 2893 1.5 christos fmov x10,v11.d[1] 2894 1.5 christos eor v0.16b,v6.16b,v2.16b 2895 1.5 christos eor v1.16b,v8.16b,v3.16b 2896 1.5 christos eor v24.16b,v27.16b,v9.16b 2897 1.5 christos eor v25.16b,v28.16b,v10.16b 2898 1.5 christos eor v26.16b,v29.16b,v11.16b 2899 1.5 christos b.eq .Loop5x_xts_dec 2900 1.5 christos 2901 1.5 christos .Loop5x_dec_after: 2902 1.5 christos add x2,x2,#0x50 2903 1.5 christos cbz x2,.Lxts_done 2904 1.5 christos 2905 1.5 christos add w6,w5,#2 2906 1.5 christos subs x2,x2,#0x30 2907 1.5 christos b.lo .Lxts_inner_dec_tail 2908 1.5 christos 2909 1.5 christos eor v0.16b,v6.16b,v27.16b 2910 1.5 christos eor v1.16b,v8.16b,v28.16b 2911 1.5 christos eor v24.16b,v29.16b,v9.16b 2912 1.5 christos b .Lxts_outer_dec_tail 2913 1.5 christos 2914 1.5 christos .align 4 2915 1.5 christos .Lxts_dec_tail4x: 2916 1.5 christos add x0,x0,#16 2917 1.6 christos tst x21,#0xf 2918 1.5 christos eor v5.16b,v1.16b,v4.16b 2919 1.5 christos st1 {v5.16b},[x1],#16 2920 1.5 christos eor v17.16b,v24.16b,v17.16b 2921 1.5 christos st1 {v17.16b},[x1],#16 2922 1.5 christos eor v30.16b,v25.16b,v30.16b 2923 1.5 christos eor v31.16b,v26.16b,v31.16b 2924 1.5 christos st1 {v30.16b,v31.16b},[x1],#32 2925 1.5 christos 2926 1.6 christos b.eq .Lxts_dec_abort 2927 1.6 christos ld1 {v0.16b},[x0],#16 2928 1.5 christos b .Lxts_done 2929 1.5 christos .align 4 2930 1.5 christos .Lxts_outer_dec_tail: 2931 1.5 christos aesd v0.16b,v16.16b 2932 1.5 christos aesimc v0.16b,v0.16b 2933 1.5 christos aesd v1.16b,v16.16b 2934 1.5 christos aesimc v1.16b,v1.16b 2935 1.5 christos aesd v24.16b,v16.16b 2936 1.5 christos aesimc v24.16b,v24.16b 2937 1.5 christos ld1 {v16.4s},[x7],#16 2938 1.5 christos subs w6,w6,#2 2939 1.5 christos aesd v0.16b,v17.16b 2940 1.5 christos aesimc v0.16b,v0.16b 2941 1.5 christos aesd v1.16b,v17.16b 2942 1.5 christos aesimc v1.16b,v1.16b 2943 1.5 christos aesd v24.16b,v17.16b 2944 1.5 christos aesimc v24.16b,v24.16b 2945 1.5 christos ld1 {v17.4s},[x7],#16 2946 1.5 christos b.gt .Lxts_outer_dec_tail 2947 1.5 christos 2948 1.5 christos aesd v0.16b,v16.16b 2949 1.5 christos aesimc v0.16b,v0.16b 2950 1.5 christos aesd v1.16b,v16.16b 2951 1.5 christos aesimc v1.16b,v1.16b 2952 1.5 christos aesd v24.16b,v16.16b 2953 1.5 christos aesimc v24.16b,v24.16b 2954 1.5 christos eor v4.16b,v6.16b,v7.16b 2955 1.5 christos subs x2,x2,#0x30 2956 1.5 christos // The iv for first block 2957 1.5 christos fmov x9,d9 2958 1.5 christos fmov x10,v9.d[1] 2959 1.5 christos mov w19,#0x87 2960 1.5 christos extr x22,x10,x10,#32 2961 1.5 christos extr x10,x10,x9,#63 2962 1.5 christos and w11,w19,w22,asr #31 2963 1.5 christos eor x9,x11,x9,lsl #1 2964 1.5 christos fmov d6,x9 2965 1.5 christos fmov v6.d[1],x10 2966 1.5 christos eor v5.16b,v8.16b,v7.16b 2967 1.5 christos csel x6,x2,x6,lo // x6, w6, is zero at this point 2968 1.5 christos aesd v0.16b,v17.16b 2969 1.5 christos aesimc v0.16b,v0.16b 2970 1.5 christos aesd v1.16b,v17.16b 2971 1.5 christos aesimc v1.16b,v1.16b 2972 1.5 christos aesd v24.16b,v17.16b 2973 1.5 christos aesimc v24.16b,v24.16b 2974 1.5 christos eor v17.16b,v9.16b,v7.16b 2975 1.5 christos // The iv for second block 2976 1.5 christos extr x22,x10,x10,#32 2977 1.5 christos extr x10,x10,x9,#63 2978 1.5 christos and w11,w19,w22,asr #31 2979 1.5 christos eor x9,x11,x9,lsl #1 2980 1.5 christos fmov d8,x9 2981 1.5 christos fmov v8.d[1],x10 2982 1.5 christos 2983 1.5 christos add x6,x6,#0x20 2984 1.5 christos add x0,x0,x6 // x0 is adjusted to the last data 2985 1.5 christos 2986 1.5 christos mov x7,x3 2987 1.5 christos 2988 1.5 christos // The iv for third block 2989 1.5 christos extr x22,x10,x10,#32 2990 1.5 christos extr x10,x10,x9,#63 2991 1.5 christos and w11,w19,w22,asr #31 2992 1.5 christos eor x9,x11,x9,lsl #1 2993 1.5 christos fmov d9,x9 2994 1.5 christos fmov v9.d[1],x10 2995 1.5 christos 2996 1.5 christos aesd v0.16b,v20.16b 2997 1.5 christos aesimc v0.16b,v0.16b 2998 1.5 christos aesd v1.16b,v20.16b 2999 1.5 christos aesimc v1.16b,v1.16b 3000 1.5 christos aesd v24.16b,v20.16b 3001 1.5 christos aesimc v24.16b,v24.16b 3002 1.5 christos aesd v0.16b,v21.16b 3003 1.5 christos aesimc v0.16b,v0.16b 3004 1.5 christos aesd v1.16b,v21.16b 3005 1.5 christos aesimc v1.16b,v1.16b 3006 1.5 christos aesd v24.16b,v21.16b 3007 1.5 christos aesimc v24.16b,v24.16b 3008 1.5 christos aesd v0.16b,v22.16b 3009 1.5 christos aesimc v0.16b,v0.16b 3010 1.5 christos aesd v1.16b,v22.16b 3011 1.5 christos aesimc v1.16b,v1.16b 3012 1.5 christos aesd v24.16b,v22.16b 3013 1.5 christos aesimc v24.16b,v24.16b 3014 1.5 christos ld1 {v27.16b},[x0],#16 3015 1.5 christos aesd v0.16b,v23.16b 3016 1.5 christos aesd v1.16b,v23.16b 3017 1.5 christos aesd v24.16b,v23.16b 3018 1.5 christos ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 3019 1.5 christos add w6,w5,#2 3020 1.5 christos eor v4.16b,v4.16b,v0.16b 3021 1.5 christos eor v5.16b,v5.16b,v1.16b 3022 1.5 christos eor v24.16b,v24.16b,v17.16b 3023 1.5 christos ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 3024 1.5 christos st1 {v4.16b},[x1],#16 3025 1.5 christos st1 {v5.16b},[x1],#16 3026 1.5 christos st1 {v24.16b},[x1],#16 3027 1.5 christos 3028 1.5 christos cmn x2,#0x30 3029 1.5 christos add x2,x2,#0x30 3030 1.5 christos b.eq .Lxts_done 3031 1.5 christos sub x2,x2,#0x30 3032 1.5 christos orr v28.16b,v3.16b,v3.16b 3033 1.5 christos orr v29.16b,v27.16b,v27.16b 3034 1.5 christos nop 3035 1.5 christos 3036 1.5 christos .Lxts_inner_dec_tail: 3037 1.5 christos // x2 == -0x10 means two blocks left. 3038 1.5 christos cmn x2,#0x10 3039 1.5 christos eor v1.16b,v28.16b,v6.16b 3040 1.5 christos eor v24.16b,v29.16b,v8.16b 3041 1.5 christos b.eq .Lxts_dec_tail_loop 3042 1.5 christos eor v24.16b,v29.16b,v6.16b 3043 1.5 christos .Lxts_dec_tail_loop: 3044 1.5 christos aesd v1.16b,v16.16b 3045 1.5 christos aesimc v1.16b,v1.16b 3046 1.5 christos aesd v24.16b,v16.16b 3047 1.5 christos aesimc v24.16b,v24.16b 3048 1.5 christos ld1 {v16.4s},[x7],#16 3049 1.5 christos subs w6,w6,#2 3050 1.5 christos aesd v1.16b,v17.16b 3051 1.5 christos aesimc v1.16b,v1.16b 3052 1.5 christos aesd v24.16b,v17.16b 3053 1.5 christos aesimc v24.16b,v24.16b 3054 1.5 christos ld1 {v17.4s},[x7],#16 3055 1.5 christos b.gt .Lxts_dec_tail_loop 3056 1.5 christos 3057 1.5 christos aesd v1.16b,v16.16b 3058 1.5 christos aesimc v1.16b,v1.16b 3059 1.5 christos aesd v24.16b,v16.16b 3060 1.5 christos aesimc v24.16b,v24.16b 3061 1.5 christos aesd v1.16b,v17.16b 3062 1.5 christos aesimc v1.16b,v1.16b 3063 1.5 christos aesd v24.16b,v17.16b 3064 1.5 christos aesimc v24.16b,v24.16b 3065 1.5 christos aesd v1.16b,v20.16b 3066 1.5 christos aesimc v1.16b,v1.16b 3067 1.5 christos aesd v24.16b,v20.16b 3068 1.5 christos aesimc v24.16b,v24.16b 3069 1.5 christos cmn x2,#0x20 3070 1.5 christos aesd v1.16b,v21.16b 3071 1.5 christos aesimc v1.16b,v1.16b 3072 1.5 christos aesd v24.16b,v21.16b 3073 1.5 christos aesimc v24.16b,v24.16b 3074 1.5 christos eor v5.16b,v6.16b,v7.16b 3075 1.5 christos aesd v1.16b,v22.16b 3076 1.5 christos aesimc v1.16b,v1.16b 3077 1.5 christos aesd v24.16b,v22.16b 3078 1.5 christos aesimc v24.16b,v24.16b 3079 1.5 christos eor v17.16b,v8.16b,v7.16b 3080 1.5 christos aesd v1.16b,v23.16b 3081 1.5 christos aesd v24.16b,v23.16b 3082 1.5 christos b.eq .Lxts_dec_one 3083 1.5 christos eor v5.16b,v5.16b,v1.16b 3084 1.5 christos eor v17.16b,v17.16b,v24.16b 3085 1.5 christos orr v6.16b,v9.16b,v9.16b 3086 1.5 christos orr v8.16b,v10.16b,v10.16b 3087 1.5 christos st1 {v5.16b},[x1],#16 3088 1.5 christos st1 {v17.16b},[x1],#16 3089 1.5 christos add x2,x2,#16 3090 1.5 christos b .Lxts_done 3091 1.5 christos 3092 1.5 christos .Lxts_dec_one: 3093 1.5 christos eor v5.16b,v5.16b,v24.16b 3094 1.5 christos orr v6.16b,v8.16b,v8.16b 3095 1.5 christos orr v8.16b,v9.16b,v9.16b 3096 1.5 christos st1 {v5.16b},[x1],#16 3097 1.5 christos add x2,x2,#32 3098 1.5 christos 3099 1.5 christos .Lxts_done: 3100 1.5 christos tst x21,#0xf 3101 1.5 christos b.eq .Lxts_dec_abort 3102 1.5 christos // Processing the last two blocks with cipher stealing. 3103 1.5 christos mov x7,x3 3104 1.5 christos cbnz x2,.Lxts_dec_1st_done 3105 1.6 christos ld1 {v0.16b},[x0],#16 3106 1.5 christos 3107 1.5 christos // Decrypt the last secod block to get the last plain text block 3108 1.5 christos .Lxts_dec_1st_done: 3109 1.5 christos eor v26.16b,v0.16b,v8.16b 3110 1.5 christos ldr w6,[x3,#240] 3111 1.5 christos ld1 {v0.4s},[x3],#16 3112 1.5 christos sub w6,w6,#2 3113 1.5 christos ld1 {v1.4s},[x3],#16 3114 1.5 christos .Loop_final_2nd_dec: 3115 1.5 christos aesd v26.16b,v0.16b 3116 1.5 christos aesimc v26.16b,v26.16b 3117 1.5 christos ld1 {v0.4s},[x3],#16 // load key schedule... 3118 1.5 christos subs w6,w6,#2 3119 1.5 christos aesd v26.16b,v1.16b 3120 1.5 christos aesimc v26.16b,v26.16b 3121 1.5 christos ld1 {v1.4s},[x3],#16 // load key schedule... 3122 1.5 christos b.gt .Loop_final_2nd_dec 3123 1.5 christos 3124 1.5 christos aesd v26.16b,v0.16b 3125 1.5 christos aesimc v26.16b,v26.16b 3126 1.5 christos ld1 {v0.4s},[x3] 3127 1.5 christos aesd v26.16b,v1.16b 3128 1.5 christos eor v26.16b,v26.16b,v0.16b 3129 1.5 christos eor v26.16b,v26.16b,v8.16b 3130 1.5 christos st1 {v26.16b},[x1] 3131 1.5 christos 3132 1.5 christos mov x20,x0 3133 1.5 christos add x13,x1,#16 3134 1.5 christos 3135 1.5 christos // Composite the tailcnt "16 byte not aligned block" into the last second plain blocks 3136 1.5 christos // to get the last encrypted block. 3137 1.5 christos .composite_dec_loop: 3138 1.5 christos subs x21,x21,#1 3139 1.5 christos ldrb w15,[x1,x21] 3140 1.5 christos ldrb w14,[x20,x21] 3141 1.5 christos strb w15,[x13,x21] 3142 1.5 christos strb w14,[x1,x21] 3143 1.5 christos b.gt .composite_dec_loop 3144 1.5 christos .Lxts_dec_load_done: 3145 1.5 christos ld1 {v26.16b},[x1] 3146 1.5 christos eor v26.16b,v26.16b,v6.16b 3147 1.5 christos 3148 1.5 christos // Decrypt the composite block to get the last second plain text block 3149 1.5 christos ldr w6,[x7,#240] 3150 1.6 christos ld1 {v0.4s},[x7],#16 3151 1.5 christos sub w6,w6,#2 3152 1.6 christos ld1 {v1.4s},[x7],#16 3153 1.5 christos .Loop_final_dec: 3154 1.5 christos aesd v26.16b,v0.16b 3155 1.5 christos aesimc v26.16b,v26.16b 3156 1.5 christos ld1 {v0.4s},[x7],#16 // load key schedule... 3157 1.5 christos subs w6,w6,#2 3158 1.5 christos aesd v26.16b,v1.16b 3159 1.5 christos aesimc v26.16b,v26.16b 3160 1.5 christos ld1 {v1.4s},[x7],#16 // load key schedule... 3161 1.5 christos b.gt .Loop_final_dec 3162 1.5 christos 3163 1.5 christos aesd v26.16b,v0.16b 3164 1.5 christos aesimc v26.16b,v26.16b 3165 1.5 christos ld1 {v0.4s},[x7] 3166 1.5 christos aesd v26.16b,v1.16b 3167 1.5 christos eor v26.16b,v26.16b,v0.16b 3168 1.5 christos eor v26.16b,v26.16b,v6.16b 3169 1.5 christos st1 {v26.16b},[x1] 3170 1.5 christos 3171 1.5 christos .Lxts_dec_abort: 3172 1.5 christos ldp x21,x22,[sp,#48] 3173 1.5 christos ldp d8,d9,[sp,#32] 3174 1.5 christos ldp d10,d11,[sp,#16] 3175 1.5 christos ldp x19,x20,[sp],#64 3176 1.5 christos 3177 1.5 christos .Lxts_dec_final_abort: 3178 1.5 christos ret 3179 1.5 christos .size aes_v8_xts_decrypt,.-aes_v8_xts_decrypt 3180 1.1 joerg #endif 3181