1 1.1 christos .text 2 1.1 christos 3 1.1 christos .align 8 // strategic alignment and padding that allows to use 4 1.1 christos // address value as loop termination condition... 5 1.1 christos .quad 0,0,0,0,0,0,0,0 6 1.1 christos .type iotas,%object 7 1.1 christos iotas: 8 1.1 christos .quad 0x0000000000000001 9 1.1 christos .quad 0x0000000000008082 10 1.1 christos .quad 0x800000000000808a 11 1.1 christos .quad 0x8000000080008000 12 1.1 christos .quad 0x000000000000808b 13 1.1 christos .quad 0x0000000080000001 14 1.1 christos .quad 0x8000000080008081 15 1.1 christos .quad 0x8000000000008009 16 1.1 christos .quad 0x000000000000008a 17 1.1 christos .quad 0x0000000000000088 18 1.1 christos .quad 0x0000000080008009 19 1.1 christos .quad 0x000000008000000a 20 1.1 christos .quad 0x000000008000808b 21 1.1 christos .quad 0x800000000000008b 22 1.1 christos .quad 0x8000000000008089 23 1.1 christos .quad 0x8000000000008003 24 1.1 christos .quad 0x8000000000008002 25 1.1 christos .quad 0x8000000000000080 26 1.1 christos .quad 0x000000000000800a 27 1.1 christos .quad 0x800000008000000a 28 1.1 christos .quad 0x8000000080008081 29 1.1 christos .quad 0x8000000000008080 30 1.1 christos .quad 0x0000000080000001 31 1.1 christos .quad 0x8000000080008008 32 1.1 christos .size iotas,.-iotas 33 1.1 christos .type KeccakF1600_int,%function 34 1.1 christos .align 5 35 1.1 christos KeccakF1600_int: 36 1.1 christos adr x28,iotas 37 1.1 christos .inst 0xd503233f // paciasp 38 1.1 christos stp x28,x30,[sp,#16] // 32 bytes on top are mine 39 1.1 christos b .Loop 40 1.1 christos .align 4 41 1.1 christos .Loop: 42 1.1 christos ////////////////////////////////////////// Theta 43 1.1 christos eor x26,x0,x5 44 1.1 christos stp x4,x9,[sp,#0] // offload pair... 45 1.1 christos eor x27,x1,x6 46 1.1 christos eor x28,x2,x7 47 1.1 christos eor x30,x3,x8 48 1.1 christos eor x4,x4,x9 49 1.1 christos eor x26,x26,x10 50 1.1 christos eor x27,x27,x11 51 1.1 christos eor x28,x28,x12 52 1.1 christos eor x30,x30,x13 53 1.1 christos eor x4,x4,x14 54 1.1 christos eor x26,x26,x15 55 1.1 christos eor x27,x27,x16 56 1.1 christos eor x28,x28,x17 57 1.1 christos eor x30,x30,x25 58 1.1 christos eor x4,x4,x19 59 1.1 christos eor x26,x26,x20 60 1.1 christos eor x28,x28,x22 61 1.1 christos eor x27,x27,x21 62 1.1 christos eor x30,x30,x23 63 1.1 christos eor x4,x4,x24 64 1.1 christos 65 1.1 christos eor x9,x26,x28,ror#63 66 1.1 christos 67 1.1 christos eor x1,x1,x9 68 1.1 christos eor x6,x6,x9 69 1.1 christos eor x11,x11,x9 70 1.1 christos eor x16,x16,x9 71 1.1 christos eor x21,x21,x9 72 1.1 christos 73 1.1 christos eor x9,x27,x30,ror#63 74 1.1 christos eor x28,x28,x4,ror#63 75 1.1 christos eor x30,x30,x26,ror#63 76 1.1 christos eor x4,x4,x27,ror#63 77 1.1 christos 78 1.1 christos eor x27, x2,x9 // mov x27,x2 79 1.1 christos eor x7,x7,x9 80 1.1 christos eor x12,x12,x9 81 1.1 christos eor x17,x17,x9 82 1.1 christos eor x22,x22,x9 83 1.1 christos 84 1.1 christos eor x0,x0,x4 85 1.1 christos eor x5,x5,x4 86 1.1 christos eor x10,x10,x4 87 1.1 christos eor x15,x15,x4 88 1.1 christos eor x20,x20,x4 89 1.1 christos ldp x4,x9,[sp,#0] // re-load offloaded data 90 1.1 christos eor x26, x3,x28 // mov x26,x3 91 1.1 christos eor x8,x8,x28 92 1.1 christos eor x13,x13,x28 93 1.1 christos eor x25,x25,x28 94 1.1 christos eor x23,x23,x28 95 1.1 christos 96 1.1 christos eor x28, x4,x30 // mov x28,x4 97 1.1 christos eor x9,x9,x30 98 1.1 christos eor x14,x14,x30 99 1.1 christos eor x19,x19,x30 100 1.1 christos eor x24,x24,x30 101 1.1 christos 102 1.1 christos ////////////////////////////////////////// Rho+Pi 103 1.1 christos mov x30,x1 104 1.1 christos ror x1,x6,#64-44 105 1.1 christos //mov x27,x2 106 1.1 christos ror x2,x12,#64-43 107 1.1 christos //mov x26,x3 108 1.1 christos ror x3,x25,#64-21 109 1.1 christos //mov x28,x4 110 1.1 christos ror x4,x24,#64-14 111 1.1 christos 112 1.1 christos ror x6,x9,#64-20 113 1.1 christos ror x12,x13,#64-25 114 1.1 christos ror x25,x17,#64-15 115 1.1 christos ror x24,x21,#64-2 116 1.1 christos 117 1.1 christos ror x9,x22,#64-61 118 1.1 christos ror x13,x19,#64-8 119 1.1 christos ror x17,x11,#64-10 120 1.1 christos ror x21,x8,#64-55 121 1.1 christos 122 1.1 christos ror x22,x14,#64-39 123 1.1 christos ror x19,x23,#64-56 124 1.1 christos ror x11,x7,#64-6 125 1.1 christos ror x8,x16,#64-45 126 1.1 christos 127 1.1 christos ror x14,x20,#64-18 128 1.1 christos ror x23,x15,#64-41 129 1.1 christos ror x7,x10,#64-3 130 1.1 christos ror x16,x5,#64-36 131 1.1 christos 132 1.1 christos ror x5,x26,#64-28 133 1.1 christos ror x10,x30,#64-1 134 1.1 christos ror x15,x28,#64-27 135 1.1 christos ror x20,x27,#64-62 136 1.1 christos 137 1.1 christos ////////////////////////////////////////// Chi+Iota 138 1.1 christos bic x26,x2,x1 139 1.1 christos bic x27,x3,x2 140 1.1 christos bic x28,x0,x4 141 1.1 christos bic x30,x1,x0 142 1.1 christos eor x0,x0,x26 143 1.1 christos bic x26,x4,x3 144 1.1 christos eor x1,x1,x27 145 1.1 christos ldr x27,[sp,#16] 146 1.1 christos eor x3,x3,x28 147 1.1 christos eor x4,x4,x30 148 1.1 christos eor x2,x2,x26 149 1.1 christos ldr x30,[x27],#8 // Iota[i++] 150 1.1 christos 151 1.1 christos bic x26,x7,x6 152 1.1 christos tst x27,#255 // are we done? 153 1.1 christos str x27,[sp,#16] 154 1.1 christos bic x27,x8,x7 155 1.1 christos bic x28,x5,x9 156 1.1 christos eor x0,x0,x30 // A[0][0] ^= Iota 157 1.1 christos bic x30,x6,x5 158 1.1 christos eor x5,x5,x26 159 1.1 christos bic x26,x9,x8 160 1.1 christos eor x6,x6,x27 161 1.1 christos eor x8,x8,x28 162 1.1 christos eor x9,x9,x30 163 1.1 christos eor x7,x7,x26 164 1.1 christos 165 1.1 christos bic x26,x12,x11 166 1.1 christos bic x27,x13,x12 167 1.1 christos bic x28,x10,x14 168 1.1 christos bic x30,x11,x10 169 1.1 christos eor x10,x10,x26 170 1.1 christos bic x26,x14,x13 171 1.1 christos eor x11,x11,x27 172 1.1 christos eor x13,x13,x28 173 1.1 christos eor x14,x14,x30 174 1.1 christos eor x12,x12,x26 175 1.1 christos 176 1.1 christos bic x26,x17,x16 177 1.1 christos bic x27,x25,x17 178 1.1 christos bic x28,x15,x19 179 1.1 christos bic x30,x16,x15 180 1.1 christos eor x15,x15,x26 181 1.1 christos bic x26,x19,x25 182 1.1 christos eor x16,x16,x27 183 1.1 christos eor x25,x25,x28 184 1.1 christos eor x19,x19,x30 185 1.1 christos eor x17,x17,x26 186 1.1 christos 187 1.1 christos bic x26,x22,x21 188 1.1 christos bic x27,x23,x22 189 1.1 christos bic x28,x20,x24 190 1.1 christos bic x30,x21,x20 191 1.1 christos eor x20,x20,x26 192 1.1 christos bic x26,x24,x23 193 1.1 christos eor x21,x21,x27 194 1.1 christos eor x23,x23,x28 195 1.1 christos eor x24,x24,x30 196 1.1 christos eor x22,x22,x26 197 1.1 christos 198 1.1 christos bne .Loop 199 1.1 christos 200 1.1 christos ldr x30,[sp,#24] 201 1.1 christos .inst 0xd50323bf // autiasp 202 1.1 christos ret 203 1.1 christos .size KeccakF1600_int,.-KeccakF1600_int 204 1.1 christos 205 1.1 christos .type KeccakF1600,%function 206 1.1 christos .align 5 207 1.1 christos KeccakF1600: 208 1.1 christos .inst 0xd503233f // paciasp 209 1.1 christos stp x29,x30,[sp,#-128]! 210 1.1 christos add x29,sp,#0 211 1.1 christos stp x19,x20,[sp,#16] 212 1.1 christos stp x21,x22,[sp,#32] 213 1.1 christos stp x23,x24,[sp,#48] 214 1.1 christos stp x25,x26,[sp,#64] 215 1.1 christos stp x27,x28,[sp,#80] 216 1.1 christos sub sp,sp,#48 217 1.1 christos 218 1.1 christos str x0,[sp,#32] // offload argument 219 1.1 christos mov x26,x0 220 1.1 christos ldp x0,x1,[x0,#16*0] 221 1.1 christos ldp x2,x3,[x26,#16*1] 222 1.1 christos ldp x4,x5,[x26,#16*2] 223 1.1 christos ldp x6,x7,[x26,#16*3] 224 1.1 christos ldp x8,x9,[x26,#16*4] 225 1.1 christos ldp x10,x11,[x26,#16*5] 226 1.1 christos ldp x12,x13,[x26,#16*6] 227 1.1 christos ldp x14,x15,[x26,#16*7] 228 1.1 christos ldp x16,x17,[x26,#16*8] 229 1.1 christos ldp x25,x19,[x26,#16*9] 230 1.1 christos ldp x20,x21,[x26,#16*10] 231 1.1 christos ldp x22,x23,[x26,#16*11] 232 1.1 christos ldr x24,[x26,#16*12] 233 1.1 christos 234 1.1 christos bl KeccakF1600_int 235 1.1 christos 236 1.1 christos ldr x26,[sp,#32] 237 1.1 christos stp x0,x1,[x26,#16*0] 238 1.1 christos stp x2,x3,[x26,#16*1] 239 1.1 christos stp x4,x5,[x26,#16*2] 240 1.1 christos stp x6,x7,[x26,#16*3] 241 1.1 christos stp x8,x9,[x26,#16*4] 242 1.1 christos stp x10,x11,[x26,#16*5] 243 1.1 christos stp x12,x13,[x26,#16*6] 244 1.1 christos stp x14,x15,[x26,#16*7] 245 1.1 christos stp x16,x17,[x26,#16*8] 246 1.1 christos stp x25,x19,[x26,#16*9] 247 1.1 christos stp x20,x21,[x26,#16*10] 248 1.1 christos stp x22,x23,[x26,#16*11] 249 1.1 christos str x24,[x26,#16*12] 250 1.1 christos 251 1.1 christos ldp x19,x20,[x29,#16] 252 1.1 christos add sp,sp,#48 253 1.1 christos ldp x21,x22,[x29,#32] 254 1.1 christos ldp x23,x24,[x29,#48] 255 1.1 christos ldp x25,x26,[x29,#64] 256 1.1 christos ldp x27,x28,[x29,#80] 257 1.1 christos ldp x29,x30,[sp],#128 258 1.1 christos .inst 0xd50323bf // autiasp 259 1.1 christos ret 260 1.1 christos .size KeccakF1600,.-KeccakF1600 261 1.1 christos 262 1.1 christos .globl SHA3_absorb 263 1.1 christos .type SHA3_absorb,%function 264 1.1 christos .align 5 265 1.1 christos SHA3_absorb: 266 1.1 christos .inst 0xd503233f // paciasp 267 1.1 christos stp x29,x30,[sp,#-128]! 268 1.1 christos add x29,sp,#0 269 1.1 christos stp x19,x20,[sp,#16] 270 1.1 christos stp x21,x22,[sp,#32] 271 1.1 christos stp x23,x24,[sp,#48] 272 1.1 christos stp x25,x26,[sp,#64] 273 1.1 christos stp x27,x28,[sp,#80] 274 1.1 christos sub sp,sp,#64 275 1.1 christos 276 1.1 christos stp x0,x1,[sp,#32] // offload arguments 277 1.1 christos stp x2,x3,[sp,#48] 278 1.1 christos 279 1.1 christos mov x26,x0 // uint64_t A[5][5] 280 1.1 christos mov x27,x1 // const void *inp 281 1.1 christos mov x28,x2 // size_t len 282 1.1 christos mov x30,x3 // size_t bsz 283 1.1 christos ldp x0,x1,[x26,#16*0] 284 1.1 christos ldp x2,x3,[x26,#16*1] 285 1.1 christos ldp x4,x5,[x26,#16*2] 286 1.1 christos ldp x6,x7,[x26,#16*3] 287 1.1 christos ldp x8,x9,[x26,#16*4] 288 1.1 christos ldp x10,x11,[x26,#16*5] 289 1.1 christos ldp x12,x13,[x26,#16*6] 290 1.1 christos ldp x14,x15,[x26,#16*7] 291 1.1 christos ldp x16,x17,[x26,#16*8] 292 1.1 christos ldp x25,x19,[x26,#16*9] 293 1.1 christos ldp x20,x21,[x26,#16*10] 294 1.1 christos ldp x22,x23,[x26,#16*11] 295 1.1 christos ldr x24,[x26,#16*12] 296 1.1 christos b .Loop_absorb 297 1.1 christos 298 1.1 christos .align 4 299 1.1 christos .Loop_absorb: 300 1.1 christos subs x26,x28,x30 // len - bsz 301 1.1 christos blo .Labsorbed 302 1.1 christos 303 1.1 christos str x26,[sp,#48] // save len - bsz 304 1.1 christos ldr x26,[x27],#8 // *inp++ 305 1.1 christos #ifdef __AARCH64EB__ 306 1.1 christos rev x26,x26 307 1.1 christos #endif 308 1.1 christos eor x0,x0,x26 309 1.1 christos cmp x30,#8*(0+2) 310 1.1 christos blo .Lprocess_block 311 1.1 christos ldr x26,[x27],#8 // *inp++ 312 1.1 christos #ifdef __AARCH64EB__ 313 1.1 christos rev x26,x26 314 1.1 christos #endif 315 1.1 christos eor x1,x1,x26 316 1.1 christos beq .Lprocess_block 317 1.1 christos ldr x26,[x27],#8 // *inp++ 318 1.1 christos #ifdef __AARCH64EB__ 319 1.1 christos rev x26,x26 320 1.1 christos #endif 321 1.1 christos eor x2,x2,x26 322 1.1 christos cmp x30,#8*(2+2) 323 1.1 christos blo .Lprocess_block 324 1.1 christos ldr x26,[x27],#8 // *inp++ 325 1.1 christos #ifdef __AARCH64EB__ 326 1.1 christos rev x26,x26 327 1.1 christos #endif 328 1.1 christos eor x3,x3,x26 329 1.1 christos beq .Lprocess_block 330 1.1 christos ldr x26,[x27],#8 // *inp++ 331 1.1 christos #ifdef __AARCH64EB__ 332 1.1 christos rev x26,x26 333 1.1 christos #endif 334 1.1 christos eor x4,x4,x26 335 1.1 christos cmp x30,#8*(4+2) 336 1.1 christos blo .Lprocess_block 337 1.1 christos ldr x26,[x27],#8 // *inp++ 338 1.1 christos #ifdef __AARCH64EB__ 339 1.1 christos rev x26,x26 340 1.1 christos #endif 341 1.1 christos eor x5,x5,x26 342 1.1 christos beq .Lprocess_block 343 1.1 christos ldr x26,[x27],#8 // *inp++ 344 1.1 christos #ifdef __AARCH64EB__ 345 1.1 christos rev x26,x26 346 1.1 christos #endif 347 1.1 christos eor x6,x6,x26 348 1.1 christos cmp x30,#8*(6+2) 349 1.1 christos blo .Lprocess_block 350 1.1 christos ldr x26,[x27],#8 // *inp++ 351 1.1 christos #ifdef __AARCH64EB__ 352 1.1 christos rev x26,x26 353 1.1 christos #endif 354 1.1 christos eor x7,x7,x26 355 1.1 christos beq .Lprocess_block 356 1.1 christos ldr x26,[x27],#8 // *inp++ 357 1.1 christos #ifdef __AARCH64EB__ 358 1.1 christos rev x26,x26 359 1.1 christos #endif 360 1.1 christos eor x8,x8,x26 361 1.1 christos cmp x30,#8*(8+2) 362 1.1 christos blo .Lprocess_block 363 1.1 christos ldr x26,[x27],#8 // *inp++ 364 1.1 christos #ifdef __AARCH64EB__ 365 1.1 christos rev x26,x26 366 1.1 christos #endif 367 1.1 christos eor x9,x9,x26 368 1.1 christos beq .Lprocess_block 369 1.1 christos ldr x26,[x27],#8 // *inp++ 370 1.1 christos #ifdef __AARCH64EB__ 371 1.1 christos rev x26,x26 372 1.1 christos #endif 373 1.1 christos eor x10,x10,x26 374 1.1 christos cmp x30,#8*(10+2) 375 1.1 christos blo .Lprocess_block 376 1.1 christos ldr x26,[x27],#8 // *inp++ 377 1.1 christos #ifdef __AARCH64EB__ 378 1.1 christos rev x26,x26 379 1.1 christos #endif 380 1.1 christos eor x11,x11,x26 381 1.1 christos beq .Lprocess_block 382 1.1 christos ldr x26,[x27],#8 // *inp++ 383 1.1 christos #ifdef __AARCH64EB__ 384 1.1 christos rev x26,x26 385 1.1 christos #endif 386 1.1 christos eor x12,x12,x26 387 1.1 christos cmp x30,#8*(12+2) 388 1.1 christos blo .Lprocess_block 389 1.1 christos ldr x26,[x27],#8 // *inp++ 390 1.1 christos #ifdef __AARCH64EB__ 391 1.1 christos rev x26,x26 392 1.1 christos #endif 393 1.1 christos eor x13,x13,x26 394 1.1 christos beq .Lprocess_block 395 1.1 christos ldr x26,[x27],#8 // *inp++ 396 1.1 christos #ifdef __AARCH64EB__ 397 1.1 christos rev x26,x26 398 1.1 christos #endif 399 1.1 christos eor x14,x14,x26 400 1.1 christos cmp x30,#8*(14+2) 401 1.1 christos blo .Lprocess_block 402 1.1 christos ldr x26,[x27],#8 // *inp++ 403 1.1 christos #ifdef __AARCH64EB__ 404 1.1 christos rev x26,x26 405 1.1 christos #endif 406 1.1 christos eor x15,x15,x26 407 1.1 christos beq .Lprocess_block 408 1.1 christos ldr x26,[x27],#8 // *inp++ 409 1.1 christos #ifdef __AARCH64EB__ 410 1.1 christos rev x26,x26 411 1.1 christos #endif 412 1.1 christos eor x16,x16,x26 413 1.1 christos cmp x30,#8*(16+2) 414 1.1 christos blo .Lprocess_block 415 1.1 christos ldr x26,[x27],#8 // *inp++ 416 1.1 christos #ifdef __AARCH64EB__ 417 1.1 christos rev x26,x26 418 1.1 christos #endif 419 1.1 christos eor x17,x17,x26 420 1.1 christos beq .Lprocess_block 421 1.1 christos ldr x26,[x27],#8 // *inp++ 422 1.1 christos #ifdef __AARCH64EB__ 423 1.1 christos rev x26,x26 424 1.1 christos #endif 425 1.1 christos eor x25,x25,x26 426 1.1 christos cmp x30,#8*(18+2) 427 1.1 christos blo .Lprocess_block 428 1.1 christos ldr x26,[x27],#8 // *inp++ 429 1.1 christos #ifdef __AARCH64EB__ 430 1.1 christos rev x26,x26 431 1.1 christos #endif 432 1.1 christos eor x19,x19,x26 433 1.1 christos beq .Lprocess_block 434 1.1 christos ldr x26,[x27],#8 // *inp++ 435 1.1 christos #ifdef __AARCH64EB__ 436 1.1 christos rev x26,x26 437 1.1 christos #endif 438 1.1 christos eor x20,x20,x26 439 1.1 christos cmp x30,#8*(20+2) 440 1.1 christos blo .Lprocess_block 441 1.1 christos ldr x26,[x27],#8 // *inp++ 442 1.1 christos #ifdef __AARCH64EB__ 443 1.1 christos rev x26,x26 444 1.1 christos #endif 445 1.1 christos eor x21,x21,x26 446 1.1 christos beq .Lprocess_block 447 1.1 christos ldr x26,[x27],#8 // *inp++ 448 1.1 christos #ifdef __AARCH64EB__ 449 1.1 christos rev x26,x26 450 1.1 christos #endif 451 1.1 christos eor x22,x22,x26 452 1.1 christos cmp x30,#8*(22+2) 453 1.1 christos blo .Lprocess_block 454 1.1 christos ldr x26,[x27],#8 // *inp++ 455 1.1 christos #ifdef __AARCH64EB__ 456 1.1 christos rev x26,x26 457 1.1 christos #endif 458 1.1 christos eor x23,x23,x26 459 1.1 christos beq .Lprocess_block 460 1.1 christos ldr x26,[x27],#8 // *inp++ 461 1.1 christos #ifdef __AARCH64EB__ 462 1.1 christos rev x26,x26 463 1.1 christos #endif 464 1.1 christos eor x24,x24,x26 465 1.1 christos 466 1.1 christos .Lprocess_block: 467 1.1 christos str x27,[sp,#40] // save inp 468 1.1 christos 469 1.1 christos bl KeccakF1600_int 470 1.1 christos 471 1.1 christos ldr x27,[sp,#40] // restore arguments 472 1.1 christos ldp x28,x30,[sp,#48] 473 1.1 christos b .Loop_absorb 474 1.1 christos 475 1.1 christos .align 4 476 1.1 christos .Labsorbed: 477 1.1 christos ldr x27,[sp,#32] 478 1.1 christos stp x0,x1,[x27,#16*0] 479 1.1 christos stp x2,x3,[x27,#16*1] 480 1.1 christos stp x4,x5,[x27,#16*2] 481 1.1 christos stp x6,x7,[x27,#16*3] 482 1.1 christos stp x8,x9,[x27,#16*4] 483 1.1 christos stp x10,x11,[x27,#16*5] 484 1.1 christos stp x12,x13,[x27,#16*6] 485 1.1 christos stp x14,x15,[x27,#16*7] 486 1.1 christos stp x16,x17,[x27,#16*8] 487 1.1 christos stp x25,x19,[x27,#16*9] 488 1.1 christos stp x20,x21,[x27,#16*10] 489 1.1 christos stp x22,x23,[x27,#16*11] 490 1.1 christos str x24,[x27,#16*12] 491 1.1 christos 492 1.1 christos mov x0,x28 // return value 493 1.1 christos ldp x19,x20,[x29,#16] 494 1.1 christos add sp,sp,#64 495 1.1 christos ldp x21,x22,[x29,#32] 496 1.1 christos ldp x23,x24,[x29,#48] 497 1.1 christos ldp x25,x26,[x29,#64] 498 1.1 christos ldp x27,x28,[x29,#80] 499 1.1 christos ldp x29,x30,[sp],#128 500 1.1 christos .inst 0xd50323bf // autiasp 501 1.1 christos ret 502 1.1 christos .size SHA3_absorb,.-SHA3_absorb 503 1.1 christos .globl SHA3_squeeze 504 1.1 christos .type SHA3_squeeze,%function 505 1.1 christos .align 5 506 1.1 christos SHA3_squeeze: 507 1.1 christos .inst 0xd503233f // paciasp 508 1.1 christos stp x29,x30,[sp,#-48]! 509 1.1 christos add x29,sp,#0 510 1.1 christos stp x19,x20,[sp,#16] 511 1.1 christos stp x21,x22,[sp,#32] 512 1.1 christos 513 1.1 christos mov x19,x0 // put aside arguments 514 1.1 christos mov x20,x1 515 1.1 christos mov x21,x2 516 1.1 christos mov x22,x3 517 1.1 christos 518 1.1 christos .Loop_squeeze: 519 1.1 christos ldr x4,[x0],#8 520 1.1 christos cmp x21,#8 521 1.1 christos blo .Lsqueeze_tail 522 1.1 christos #ifdef __AARCH64EB__ 523 1.1 christos rev x4,x4 524 1.1 christos #endif 525 1.1 christos str x4,[x20],#8 526 1.1 christos subs x21,x21,#8 527 1.1 christos beq .Lsqueeze_done 528 1.1 christos 529 1.1 christos subs x3,x3,#8 530 1.1 christos bhi .Loop_squeeze 531 1.1 christos 532 1.1 christos mov x0,x19 533 1.1 christos bl KeccakF1600 534 1.1 christos mov x0,x19 535 1.1 christos mov x3,x22 536 1.1 christos b .Loop_squeeze 537 1.1 christos 538 1.1 christos .align 4 539 1.1 christos .Lsqueeze_tail: 540 1.1 christos strb w4,[x20],#1 541 1.1 christos lsr x4,x4,#8 542 1.1 christos subs x21,x21,#1 543 1.1 christos beq .Lsqueeze_done 544 1.1 christos strb w4,[x20],#1 545 1.1 christos lsr x4,x4,#8 546 1.1 christos subs x21,x21,#1 547 1.1 christos beq .Lsqueeze_done 548 1.1 christos strb w4,[x20],#1 549 1.1 christos lsr x4,x4,#8 550 1.1 christos subs x21,x21,#1 551 1.1 christos beq .Lsqueeze_done 552 1.1 christos strb w4,[x20],#1 553 1.1 christos lsr x4,x4,#8 554 1.1 christos subs x21,x21,#1 555 1.1 christos beq .Lsqueeze_done 556 1.1 christos strb w4,[x20],#1 557 1.1 christos lsr x4,x4,#8 558 1.1 christos subs x21,x21,#1 559 1.1 christos beq .Lsqueeze_done 560 1.1 christos strb w4,[x20],#1 561 1.1 christos lsr x4,x4,#8 562 1.1 christos subs x21,x21,#1 563 1.1 christos beq .Lsqueeze_done 564 1.1 christos strb w4,[x20],#1 565 1.1 christos 566 1.1 christos .Lsqueeze_done: 567 1.1 christos ldp x19,x20,[sp,#16] 568 1.1 christos ldp x21,x22,[sp,#32] 569 1.1 christos ldp x29,x30,[sp],#48 570 1.1 christos .inst 0xd50323bf // autiasp 571 1.1 christos ret 572 1.1 christos .size SHA3_squeeze,.-SHA3_squeeze 573 1.1 christos .type KeccakF1600_ce,%function 574 1.1 christos .align 5 575 1.1 christos KeccakF1600_ce: 576 1.1 christos mov x9,#12 577 1.1 christos adr x10,iotas 578 1.1 christos b .Loop_ce 579 1.1 christos .align 4 580 1.1 christos .Loop_ce: 581 1.1 christos ////////////////////////////////////////////////// Theta 582 1.1 christos .inst 0xce052819 //eor3 v25.16b,v0.16b,v5.16b,v10.16b 583 1.1 christos .inst 0xce062c3a //eor3 v26.16b,v1.16b,v6.16b,v11.16b 584 1.1 christos .inst 0xce07305b //eor3 v27.16b,v2.16b,v7.16b,v12.16b 585 1.1 christos .inst 0xce08347c //eor3 v28.16b,v3.16b,v8.16b,v13.16b 586 1.1 christos .inst 0xce09389d //eor3 v29.16b,v4.16b,v9.16b,v14.16b 587 1.1 christos .inst 0xce0f5339 //eor3 v25.16b,v25.16b, v15.16b,v20.16b 588 1.1 christos .inst 0xce10575a //eor3 v26.16b,v26.16b, v16.16b,v21.16b 589 1.1 christos .inst 0xce115b7b //eor3 v27.16b,v27.16b, v17.16b,v22.16b 590 1.1 christos .inst 0xce125f9c //eor3 v28.16b,v28.16b, v18.16b,v23.16b 591 1.1 christos .inst 0xce1363bd //eor3 v29.16b,v29.16b, v19.16b,v24.16b 592 1.1 christos 593 1.1 christos .inst 0xce7b8f3e //rax1 v30.16b,v25.16b,v27.16b // D[1] 594 1.1 christos .inst 0xce7c8f5f //rax1 v31.16b,v26.16b,v28.16b // D[2] 595 1.1 christos .inst 0xce7d8f7b //rax1 v27.16b,v27.16b,v29.16b // D[3] 596 1.1 christos .inst 0xce798f9c //rax1 v28.16b,v28.16b,v25.16b // D[4] 597 1.1 christos .inst 0xce7a8fbd //rax1 v29.16b,v29.16b,v26.16b // D[0] 598 1.1 christos 599 1.1 christos ////////////////////////////////////////////////// Theta+Rho+Pi 600 1.1 christos .inst 0xce9e50d9 //xar v25.16b, v6.16b,v30.16b,#64-44 // C[0]=A[0][1] 601 1.1 christos .inst 0xce9cb126 //xar v6.16b,v9.16b,v28.16b,#64-20 602 1.1 christos .inst 0xce9f0ec9 //xar v9.16b,v22.16b,v31.16b,#64-61 603 1.1 christos .inst 0xce9c65d6 //xar v22.16b,v14.16b,v28.16b,#64-39 604 1.1 christos .inst 0xce9dba8e //xar v14.16b,v20.16b,v29.16b,#64-18 605 1.1 christos 606 1.1 christos .inst 0xce9f0854 //xar v20.16b,v2.16b,v31.16b,#64-62 607 1.1 christos 608 1.1 christos .inst 0xce9f5582 //xar v2.16b,v12.16b,v31.16b,#64-43 609 1.1 christos .inst 0xce9b9dac //xar v12.16b,v13.16b,v27.16b,#64-25 610 1.1 christos .inst 0xce9ce26d //xar v13.16b,v19.16b,v28.16b,#64-8 611 1.1 christos .inst 0xce9b22f3 //xar v19.16b,v23.16b,v27.16b,#64-56 612 1.1 christos .inst 0xce9d5df7 //xar v23.16b,v15.16b,v29.16b,#64-41 613 1.1 christos 614 1.1 christos .inst 0xce9c948f //xar v15.16b,v4.16b,v28.16b,#64-27 615 1.1 christos 616 1.1 christos eor v0.16b,v0.16b,v29.16b 617 1.1 christos ldr x11,[x10],#8 618 1.1 christos 619 1.1 christos .inst 0xce9bae5a //xar v26.16b, v18.16b,v27.16b,#64-21 // C[1]=A[0][3] 620 1.1 christos .inst 0xce9fc632 //xar v18.16b,v17.16b,v31.16b,#64-15 621 1.1 christos .inst 0xce9ed971 //xar v17.16b,v11.16b,v30.16b,#64-10 622 1.1 christos .inst 0xce9fe8eb //xar v11.16b,v7.16b,v31.16b,#64-6 623 1.1 christos .inst 0xce9df547 //xar v7.16b,v10.16b,v29.16b,#64-3 624 1.1 christos 625 1.1 christos .inst 0xce9efc2a //xar v10.16b,v1.16b,v30.16b,#64-1 // * 626 1.1 christos 627 1.1 christos .inst 0xce9ccb04 //xar v4.16b,v24.16b,v28.16b,#64-14 628 1.1 christos .inst 0xce9efab8 //xar v24.16b,v21.16b,v30.16b,#64-2 629 1.1 christos .inst 0xce9b2515 //xar v21.16b,v8.16b,v27.16b,#64-55 630 1.1 christos .inst 0xce9e4e08 //xar v8.16b,v16.16b,v30.16b,#64-45 631 1.1 christos .inst 0xce9d70b0 //xar v16.16b,v5.16b,v29.16b,#64-36 632 1.1 christos 633 1.1 christos .inst 0xce9b907b //xar v27.16b, v3.16b,v27.16b,#64-28 // C[2]=A[1][0] 634 1.1 christos 635 1.1 christos ////////////////////////////////////////////////// Chi+Iota 636 1.1 christos dup v31.2d,x11 // borrow C[6] 637 1.1 christos .inst 0xce22641c //bcax v28.16b, v0.16b,v2.16b,v25.16b // * 638 1.1 christos .inst 0xce3a0b21 //bcax v1.16b,v25.16b, v26.16b, v2.16b // * 639 1.1 christos .inst 0xce246842 //bcax v2.16b,v2.16b,v4.16b,v26.16b 640 1.1 christos .inst 0xce201343 //bcax v3.16b,v26.16b, v0.16b,v4.16b 641 1.1 christos .inst 0xce390084 //bcax v4.16b,v4.16b,v25.16b, v0.16b 642 1.1 christos 643 1.1 christos .inst 0xce271b65 //bcax v5.16b,v27.16b, v7.16b,v6.16b // * 644 1.1 christos .inst 0xce281cd9 //bcax v25.16b, v6.16b,v8.16b,v7.16b // * 645 1.1 christos .inst 0xce2920e7 //bcax v7.16b,v7.16b,v9.16b,v8.16b 646 1.1 christos .inst 0xce3b2508 //bcax v8.16b,v8.16b,v27.16b, v9.16b 647 1.1 christos .inst 0xce266d29 //bcax v9.16b,v9.16b,v6.16b,v27.16b 648 1.1 christos 649 1.1 christos eor v0.16b,v28.16b,v31.16b // Iota 650 1.1 christos 651 1.1 christos .inst 0xce2c2d5a //bcax v26.16b, v10.16b,v12.16b,v11.16b // * 652 1.1 christos .inst 0xce2d317b //bcax v27.16b, v11.16b,v13.16b,v12.16b // * 653 1.1 christos .inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b 654 1.1 christos .inst 0xce2a39ad //bcax v13.16b,v13.16b,v10.16b,v14.16b 655 1.1 christos .inst 0xce2b29ce //bcax v14.16b,v14.16b,v11.16b,v10.16b 656 1.1 christos 657 1.1 christos .inst 0xce3141fc //bcax v28.16b, v15.16b,v17.16b,v16.16b // * 658 1.1 christos .inst 0xce32461d //bcax v29.16b, v16.16b,v18.16b,v17.16b // * 659 1.1 christos .inst 0xce334a31 //bcax v17.16b,v17.16b,v19.16b,v18.16b 660 1.1 christos .inst 0xce2f4e52 //bcax v18.16b,v18.16b,v15.16b,v19.16b 661 1.1 christos .inst 0xce303e73 //bcax v19.16b,v19.16b,v16.16b,v15.16b 662 1.1 christos 663 1.1 christos .inst 0xce36569e //bcax v30.16b, v20.16b,v22.16b,v21.16b // * 664 1.1 christos .inst 0xce375abf //bcax v31.16b, v21.16b,v23.16b,v22.16b // * 665 1.1 christos .inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b 666 1.1 christos .inst 0xce3462f7 //bcax v23.16b,v23.16b,v20.16b,v24.16b 667 1.1 christos .inst 0xce355318 //bcax v24.16b,v24.16b,v21.16b,v20.16b 668 1.1 christos ////////////////////////////////////////////////// Theta 669 1.1 christos .inst 0xce056806 //eor3 v6.16b,v0.16b,v5.16b,v26.16b 670 1.1 christos .inst 0xce196c2a //eor3 v10.16b,v1.16b,v25.16b,v27.16b 671 1.1 christos .inst 0xce07304b //eor3 v11.16b,v2.16b,v7.16b,v12.16b 672 1.1 christos .inst 0xce08346f //eor3 v15.16b,v3.16b,v8.16b,v13.16b 673 1.1 christos .inst 0xce093890 //eor3 v16.16b,v4.16b,v9.16b,v14.16b 674 1.1 christos .inst 0xce1c78c6 //eor3 v6.16b,v6.16b, v28.16b,v30.16b 675 1.1 christos .inst 0xce1d7d4a //eor3 v10.16b,v10.16b, v29.16b,v31.16b 676 1.1 christos .inst 0xce11596b //eor3 v11.16b,v11.16b, v17.16b,v22.16b 677 1.1 christos .inst 0xce125def //eor3 v15.16b,v15.16b, v18.16b,v23.16b 678 1.1 christos .inst 0xce136210 //eor3 v16.16b,v16.16b, v19.16b,v24.16b 679 1.1 christos 680 1.1 christos .inst 0xce6b8cd4 //rax1 v20.16b,v6.16b,v11.16b // D[1] 681 1.1 christos .inst 0xce6f8d55 //rax1 v21.16b,v10.16b,v15.16b // D[2] 682 1.1 christos .inst 0xce708d6b //rax1 v11.16b,v11.16b,v16.16b // D[3] 683 1.1 christos .inst 0xce668def //rax1 v15.16b,v15.16b,v6.16b // D[4] 684 1.1 christos .inst 0xce6a8e10 //rax1 v16.16b,v16.16b,v10.16b // D[0] 685 1.1 christos 686 1.1 christos ////////////////////////////////////////////////// Theta+Rho+Pi 687 1.1 christos .inst 0xce945326 //xar v6.16b, v25.16b,v20.16b,#64-44 // C[0]=A[0][1] 688 1.1 christos .inst 0xce8fb139 //xar v25.16b,v9.16b,v15.16b,#64-20 689 1.1 christos .inst 0xce950ec9 //xar v9.16b,v22.16b,v21.16b,#64-61 690 1.1 christos .inst 0xce8f65d6 //xar v22.16b,v14.16b,v15.16b,#64-39 691 1.1 christos .inst 0xce90bbce //xar v14.16b,v30.16b,v16.16b,#64-18 692 1.1 christos 693 1.1 christos .inst 0xce95085e //xar v30.16b,v2.16b,v21.16b,#64-62 694 1.1 christos 695 1.1 christos .inst 0xce955582 //xar v2.16b,v12.16b,v21.16b,#64-43 696 1.1 christos .inst 0xce8b9dac //xar v12.16b,v13.16b,v11.16b,#64-25 697 1.1 christos .inst 0xce8fe26d //xar v13.16b,v19.16b,v15.16b,#64-8 698 1.1 christos .inst 0xce8b22f3 //xar v19.16b,v23.16b,v11.16b,#64-56 699 1.1 christos .inst 0xce905f97 //xar v23.16b,v28.16b,v16.16b,#64-41 700 1.1 christos 701 1.1 christos .inst 0xce8f949c //xar v28.16b,v4.16b,v15.16b,#64-27 702 1.1 christos 703 1.1 christos eor v0.16b,v0.16b,v16.16b 704 1.1 christos ldr x11,[x10],#8 705 1.1 christos 706 1.1 christos .inst 0xce8bae4a //xar v10.16b, v18.16b,v11.16b,#64-21 // C[1]=A[0][3] 707 1.1 christos .inst 0xce95c632 //xar v18.16b,v17.16b,v21.16b,#64-15 708 1.1 christos .inst 0xce94db71 //xar v17.16b,v27.16b,v20.16b,#64-10 709 1.1 christos .inst 0xce95e8fb //xar v27.16b,v7.16b,v21.16b,#64-6 710 1.1 christos .inst 0xce90f747 //xar v7.16b,v26.16b,v16.16b,#64-3 711 1.1 christos 712 1.1 christos .inst 0xce94fc3a //xar v26.16b,v1.16b,v20.16b,#64-1 // * 713 1.1 christos 714 1.1 christos .inst 0xce8fcb04 //xar v4.16b,v24.16b,v15.16b,#64-14 715 1.1 christos .inst 0xce94fbf8 //xar v24.16b,v31.16b,v20.16b,#64-2 716 1.1 christos .inst 0xce8b251f //xar v31.16b,v8.16b,v11.16b,#64-55 717 1.1 christos .inst 0xce944fa8 //xar v8.16b,v29.16b,v20.16b,#64-45 718 1.1 christos .inst 0xce9070bd //xar v29.16b,v5.16b,v16.16b,#64-36 719 1.1 christos 720 1.1 christos .inst 0xce8b906b //xar v11.16b, v3.16b,v11.16b,#64-28 // C[2]=A[1][0] 721 1.1 christos 722 1.1 christos ////////////////////////////////////////////////// Chi+Iota 723 1.1 christos dup v21.2d,x11 // borrow C[6] 724 1.1 christos .inst 0xce22180f //bcax v15.16b, v0.16b,v2.16b,v6.16b // * 725 1.1 christos .inst 0xce2a08c1 //bcax v1.16b,v6.16b, v10.16b, v2.16b // * 726 1.1 christos .inst 0xce242842 //bcax v2.16b,v2.16b,v4.16b,v10.16b 727 1.1 christos .inst 0xce201143 //bcax v3.16b,v10.16b, v0.16b,v4.16b 728 1.1 christos .inst 0xce260084 //bcax v4.16b,v4.16b,v6.16b, v0.16b 729 1.1 christos 730 1.1 christos .inst 0xce276565 //bcax v5.16b,v11.16b, v7.16b,v25.16b // * 731 1.1 christos .inst 0xce281f26 //bcax v6.16b, v25.16b,v8.16b,v7.16b // * 732 1.1 christos .inst 0xce2920e7 //bcax v7.16b,v7.16b,v9.16b,v8.16b 733 1.1 christos .inst 0xce2b2508 //bcax v8.16b,v8.16b,v11.16b, v9.16b 734 1.1 christos .inst 0xce392d29 //bcax v9.16b,v9.16b,v25.16b,v11.16b 735 1.1 christos 736 1.1 christos eor v0.16b,v15.16b,v21.16b // Iota 737 1.1 christos 738 1.1 christos .inst 0xce2c6f4a //bcax v10.16b, v26.16b,v12.16b,v27.16b // * 739 1.1 christos .inst 0xce2d336b //bcax v11.16b, v27.16b,v13.16b,v12.16b // * 740 1.1 christos .inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b 741 1.1 christos .inst 0xce3a39ad //bcax v13.16b,v13.16b,v26.16b,v14.16b 742 1.1 christos .inst 0xce3b69ce //bcax v14.16b,v14.16b,v27.16b,v26.16b 743 1.1 christos 744 1.1 christos .inst 0xce31778f //bcax v15.16b, v28.16b,v17.16b,v29.16b // * 745 1.1 christos .inst 0xce3247b0 //bcax v16.16b, v29.16b,v18.16b,v17.16b // * 746 1.1 christos .inst 0xce334a31 //bcax v17.16b,v17.16b,v19.16b,v18.16b 747 1.1 christos .inst 0xce3c4e52 //bcax v18.16b,v18.16b,v28.16b,v19.16b 748 1.1 christos .inst 0xce3d7273 //bcax v19.16b,v19.16b,v29.16b,v28.16b 749 1.1 christos 750 1.1 christos .inst 0xce367fd4 //bcax v20.16b, v30.16b,v22.16b,v31.16b // * 751 1.1 christos .inst 0xce375bf5 //bcax v21.16b, v31.16b,v23.16b,v22.16b // * 752 1.1 christos .inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b 753 1.1 christos .inst 0xce3e62f7 //bcax v23.16b,v23.16b,v30.16b,v24.16b 754 1.1 christos .inst 0xce3f7b18 //bcax v24.16b,v24.16b,v31.16b,v30.16b 755 1.1 christos subs x9,x9,#1 756 1.1 christos bne .Loop_ce 757 1.1 christos 758 1.1 christos ret 759 1.1 christos .size KeccakF1600_ce,.-KeccakF1600_ce 760 1.1 christos 761 1.1 christos .type KeccakF1600_cext,%function 762 1.1 christos .align 5 763 1.1 christos KeccakF1600_cext: 764 1.1 christos .inst 0xd503233f // paciasp 765 1.1 christos stp x29,x30,[sp,#-80]! 766 1.1 christos add x29,sp,#0 767 1.1 christos stp d8,d9,[sp,#16] // per ABI requirement 768 1.1 christos stp d10,d11,[sp,#32] 769 1.1 christos stp d12,d13,[sp,#48] 770 1.1 christos stp d14,d15,[sp,#64] 771 1.1 christos ldp d0,d1,[x0,#8*0] 772 1.1 christos ldp d2,d3,[x0,#8*2] 773 1.1 christos ldp d4,d5,[x0,#8*4] 774 1.1 christos ldp d6,d7,[x0,#8*6] 775 1.1 christos ldp d8,d9,[x0,#8*8] 776 1.1 christos ldp d10,d11,[x0,#8*10] 777 1.1 christos ldp d12,d13,[x0,#8*12] 778 1.1 christos ldp d14,d15,[x0,#8*14] 779 1.1 christos ldp d16,d17,[x0,#8*16] 780 1.1 christos ldp d18,d19,[x0,#8*18] 781 1.1 christos ldp d20,d21,[x0,#8*20] 782 1.1 christos ldp d22,d23,[x0,#8*22] 783 1.1 christos ldr d24,[x0,#8*24] 784 1.1 christos bl KeccakF1600_ce 785 1.1 christos ldr x30,[sp,#8] 786 1.1 christos stp d0,d1,[x0,#8*0] 787 1.1 christos stp d2,d3,[x0,#8*2] 788 1.1 christos stp d4,d5,[x0,#8*4] 789 1.1 christos stp d6,d7,[x0,#8*6] 790 1.1 christos stp d8,d9,[x0,#8*8] 791 1.1 christos stp d10,d11,[x0,#8*10] 792 1.1 christos stp d12,d13,[x0,#8*12] 793 1.1 christos stp d14,d15,[x0,#8*14] 794 1.1 christos stp d16,d17,[x0,#8*16] 795 1.1 christos stp d18,d19,[x0,#8*18] 796 1.1 christos stp d20,d21,[x0,#8*20] 797 1.1 christos stp d22,d23,[x0,#8*22] 798 1.1 christos str d24,[x0,#8*24] 799 1.1 christos 800 1.1 christos ldp d8,d9,[sp,#16] 801 1.1 christos ldp d10,d11,[sp,#32] 802 1.1 christos ldp d12,d13,[sp,#48] 803 1.1 christos ldp d14,d15,[sp,#64] 804 1.1 christos ldr x29,[sp],#80 805 1.1 christos .inst 0xd50323bf // autiasp 806 1.1 christos ret 807 1.1 christos .size KeccakF1600_cext,.-KeccakF1600_cext 808 1.1 christos .globl SHA3_absorb_cext 809 1.1 christos .type SHA3_absorb_cext,%function 810 1.1 christos .align 5 811 1.1 christos SHA3_absorb_cext: 812 1.1 christos .inst 0xd503233f // paciasp 813 1.1 christos stp x29,x30,[sp,#-80]! 814 1.1 christos add x29,sp,#0 815 1.1 christos stp d8,d9,[sp,#16] // per ABI requirement 816 1.1 christos stp d10,d11,[sp,#32] 817 1.1 christos stp d12,d13,[sp,#48] 818 1.1 christos stp d14,d15,[sp,#64] 819 1.1 christos ldp d0,d1,[x0,#8*0] 820 1.1 christos ldp d2,d3,[x0,#8*2] 821 1.1 christos ldp d4,d5,[x0,#8*4] 822 1.1 christos ldp d6,d7,[x0,#8*6] 823 1.1 christos ldp d8,d9,[x0,#8*8] 824 1.1 christos ldp d10,d11,[x0,#8*10] 825 1.1 christos ldp d12,d13,[x0,#8*12] 826 1.1 christos ldp d14,d15,[x0,#8*14] 827 1.1 christos ldp d16,d17,[x0,#8*16] 828 1.1 christos ldp d18,d19,[x0,#8*18] 829 1.1 christos ldp d20,d21,[x0,#8*20] 830 1.1 christos ldp d22,d23,[x0,#8*22] 831 1.1 christos ldr d24,[x0,#8*24] 832 1.1 christos b .Loop_absorb_ce 833 1.1 christos 834 1.1 christos .align 4 835 1.1 christos .Loop_absorb_ce: 836 1.1 christos subs x2,x2,x3 // len - bsz 837 1.1 christos blo .Labsorbed_ce 838 1.1 christos ldr d31,[x1],#8 // *inp++ 839 1.1 christos #ifdef __AARCH64EB__ 840 1.1 christos rev64 v31.16b,v31.16b 841 1.1 christos #endif 842 1.1 christos eor v0.16b,v0.16b,v31.16b 843 1.1 christos cmp x3,#8*(0+2) 844 1.1 christos blo .Lprocess_block_ce 845 1.1 christos ldr d31,[x1],#8 // *inp++ 846 1.1 christos #ifdef __AARCH64EB__ 847 1.1 christos rev64 v31.16b,v31.16b 848 1.1 christos #endif 849 1.1 christos eor v1.16b,v1.16b,v31.16b 850 1.1 christos beq .Lprocess_block_ce 851 1.1 christos ldr d31,[x1],#8 // *inp++ 852 1.1 christos #ifdef __AARCH64EB__ 853 1.1 christos rev64 v31.16b,v31.16b 854 1.1 christos #endif 855 1.1 christos eor v2.16b,v2.16b,v31.16b 856 1.1 christos cmp x3,#8*(2+2) 857 1.1 christos blo .Lprocess_block_ce 858 1.1 christos ldr d31,[x1],#8 // *inp++ 859 1.1 christos #ifdef __AARCH64EB__ 860 1.1 christos rev64 v31.16b,v31.16b 861 1.1 christos #endif 862 1.1 christos eor v3.16b,v3.16b,v31.16b 863 1.1 christos beq .Lprocess_block_ce 864 1.1 christos ldr d31,[x1],#8 // *inp++ 865 1.1 christos #ifdef __AARCH64EB__ 866 1.1 christos rev64 v31.16b,v31.16b 867 1.1 christos #endif 868 1.1 christos eor v4.16b,v4.16b,v31.16b 869 1.1 christos cmp x3,#8*(4+2) 870 1.1 christos blo .Lprocess_block_ce 871 1.1 christos ldr d31,[x1],#8 // *inp++ 872 1.1 christos #ifdef __AARCH64EB__ 873 1.1 christos rev64 v31.16b,v31.16b 874 1.1 christos #endif 875 1.1 christos eor v5.16b,v5.16b,v31.16b 876 1.1 christos beq .Lprocess_block_ce 877 1.1 christos ldr d31,[x1],#8 // *inp++ 878 1.1 christos #ifdef __AARCH64EB__ 879 1.1 christos rev64 v31.16b,v31.16b 880 1.1 christos #endif 881 1.1 christos eor v6.16b,v6.16b,v31.16b 882 1.1 christos cmp x3,#8*(6+2) 883 1.1 christos blo .Lprocess_block_ce 884 1.1 christos ldr d31,[x1],#8 // *inp++ 885 1.1 christos #ifdef __AARCH64EB__ 886 1.1 christos rev64 v31.16b,v31.16b 887 1.1 christos #endif 888 1.1 christos eor v7.16b,v7.16b,v31.16b 889 1.1 christos beq .Lprocess_block_ce 890 1.1 christos ldr d31,[x1],#8 // *inp++ 891 1.1 christos #ifdef __AARCH64EB__ 892 1.1 christos rev64 v31.16b,v31.16b 893 1.1 christos #endif 894 1.1 christos eor v8.16b,v8.16b,v31.16b 895 1.1 christos cmp x3,#8*(8+2) 896 1.1 christos blo .Lprocess_block_ce 897 1.1 christos ldr d31,[x1],#8 // *inp++ 898 1.1 christos #ifdef __AARCH64EB__ 899 1.1 christos rev64 v31.16b,v31.16b 900 1.1 christos #endif 901 1.1 christos eor v9.16b,v9.16b,v31.16b 902 1.1 christos beq .Lprocess_block_ce 903 1.1 christos ldr d31,[x1],#8 // *inp++ 904 1.1 christos #ifdef __AARCH64EB__ 905 1.1 christos rev64 v31.16b,v31.16b 906 1.1 christos #endif 907 1.1 christos eor v10.16b,v10.16b,v31.16b 908 1.1 christos cmp x3,#8*(10+2) 909 1.1 christos blo .Lprocess_block_ce 910 1.1 christos ldr d31,[x1],#8 // *inp++ 911 1.1 christos #ifdef __AARCH64EB__ 912 1.1 christos rev64 v31.16b,v31.16b 913 1.1 christos #endif 914 1.1 christos eor v11.16b,v11.16b,v31.16b 915 1.1 christos beq .Lprocess_block_ce 916 1.1 christos ldr d31,[x1],#8 // *inp++ 917 1.1 christos #ifdef __AARCH64EB__ 918 1.1 christos rev64 v31.16b,v31.16b 919 1.1 christos #endif 920 1.1 christos eor v12.16b,v12.16b,v31.16b 921 1.1 christos cmp x3,#8*(12+2) 922 1.1 christos blo .Lprocess_block_ce 923 1.1 christos ldr d31,[x1],#8 // *inp++ 924 1.1 christos #ifdef __AARCH64EB__ 925 1.1 christos rev64 v31.16b,v31.16b 926 1.1 christos #endif 927 1.1 christos eor v13.16b,v13.16b,v31.16b 928 1.1 christos beq .Lprocess_block_ce 929 1.1 christos ldr d31,[x1],#8 // *inp++ 930 1.1 christos #ifdef __AARCH64EB__ 931 1.1 christos rev64 v31.16b,v31.16b 932 1.1 christos #endif 933 1.1 christos eor v14.16b,v14.16b,v31.16b 934 1.1 christos cmp x3,#8*(14+2) 935 1.1 christos blo .Lprocess_block_ce 936 1.1 christos ldr d31,[x1],#8 // *inp++ 937 1.1 christos #ifdef __AARCH64EB__ 938 1.1 christos rev64 v31.16b,v31.16b 939 1.1 christos #endif 940 1.1 christos eor v15.16b,v15.16b,v31.16b 941 1.1 christos beq .Lprocess_block_ce 942 1.1 christos ldr d31,[x1],#8 // *inp++ 943 1.1 christos #ifdef __AARCH64EB__ 944 1.1 christos rev64 v31.16b,v31.16b 945 1.1 christos #endif 946 1.1 christos eor v16.16b,v16.16b,v31.16b 947 1.1 christos cmp x3,#8*(16+2) 948 1.1 christos blo .Lprocess_block_ce 949 1.1 christos ldr d31,[x1],#8 // *inp++ 950 1.1 christos #ifdef __AARCH64EB__ 951 1.1 christos rev64 v31.16b,v31.16b 952 1.1 christos #endif 953 1.1 christos eor v17.16b,v17.16b,v31.16b 954 1.1 christos beq .Lprocess_block_ce 955 1.1 christos ldr d31,[x1],#8 // *inp++ 956 1.1 christos #ifdef __AARCH64EB__ 957 1.1 christos rev64 v31.16b,v31.16b 958 1.1 christos #endif 959 1.1 christos eor v18.16b,v18.16b,v31.16b 960 1.1 christos cmp x3,#8*(18+2) 961 1.1 christos blo .Lprocess_block_ce 962 1.1 christos ldr d31,[x1],#8 // *inp++ 963 1.1 christos #ifdef __AARCH64EB__ 964 1.1 christos rev64 v31.16b,v31.16b 965 1.1 christos #endif 966 1.1 christos eor v19.16b,v19.16b,v31.16b 967 1.1 christos beq .Lprocess_block_ce 968 1.1 christos ldr d31,[x1],#8 // *inp++ 969 1.1 christos #ifdef __AARCH64EB__ 970 1.1 christos rev64 v31.16b,v31.16b 971 1.1 christos #endif 972 1.1 christos eor v20.16b,v20.16b,v31.16b 973 1.1 christos cmp x3,#8*(20+2) 974 1.1 christos blo .Lprocess_block_ce 975 1.1 christos ldr d31,[x1],#8 // *inp++ 976 1.1 christos #ifdef __AARCH64EB__ 977 1.1 christos rev64 v31.16b,v31.16b 978 1.1 christos #endif 979 1.1 christos eor v21.16b,v21.16b,v31.16b 980 1.1 christos beq .Lprocess_block_ce 981 1.1 christos ldr d31,[x1],#8 // *inp++ 982 1.1 christos #ifdef __AARCH64EB__ 983 1.1 christos rev64 v31.16b,v31.16b 984 1.1 christos #endif 985 1.1 christos eor v22.16b,v22.16b,v31.16b 986 1.1 christos cmp x3,#8*(22+2) 987 1.1 christos blo .Lprocess_block_ce 988 1.1 christos ldr d31,[x1],#8 // *inp++ 989 1.1 christos #ifdef __AARCH64EB__ 990 1.1 christos rev64 v31.16b,v31.16b 991 1.1 christos #endif 992 1.1 christos eor v23.16b,v23.16b,v31.16b 993 1.1 christos beq .Lprocess_block_ce 994 1.1 christos ldr d31,[x1],#8 // *inp++ 995 1.1 christos #ifdef __AARCH64EB__ 996 1.1 christos rev64 v31.16b,v31.16b 997 1.1 christos #endif 998 1.1 christos eor v24.16b,v24.16b,v31.16b 999 1.1 christos 1000 1.1 christos .Lprocess_block_ce: 1001 1.1 christos 1002 1.1 christos bl KeccakF1600_ce 1003 1.1 christos 1004 1.1 christos b .Loop_absorb_ce 1005 1.1 christos 1006 1.1 christos .align 4 1007 1.1 christos .Labsorbed_ce: 1008 1.1 christos stp d0,d1,[x0,#8*0] 1009 1.1 christos stp d2,d3,[x0,#8*2] 1010 1.1 christos stp d4,d5,[x0,#8*4] 1011 1.1 christos stp d6,d7,[x0,#8*6] 1012 1.1 christos stp d8,d9,[x0,#8*8] 1013 1.1 christos stp d10,d11,[x0,#8*10] 1014 1.1 christos stp d12,d13,[x0,#8*12] 1015 1.1 christos stp d14,d15,[x0,#8*14] 1016 1.1 christos stp d16,d17,[x0,#8*16] 1017 1.1 christos stp d18,d19,[x0,#8*18] 1018 1.1 christos stp d20,d21,[x0,#8*20] 1019 1.1 christos stp d22,d23,[x0,#8*22] 1020 1.1 christos str d24,[x0,#8*24] 1021 1.1 christos add x0,x2,x3 // return value 1022 1.1 christos 1023 1.1 christos ldp d8,d9,[sp,#16] 1024 1.1 christos ldp d10,d11,[sp,#32] 1025 1.1 christos ldp d12,d13,[sp,#48] 1026 1.1 christos ldp d14,d15,[sp,#64] 1027 1.1 christos ldp x29,x30,[sp],#80 1028 1.1 christos .inst 0xd50323bf // autiasp 1029 1.1 christos ret 1030 1.1 christos .size SHA3_absorb_cext,.-SHA3_absorb_cext 1031 1.1 christos .globl SHA3_squeeze_cext 1032 1.1 christos .type SHA3_squeeze_cext,%function 1033 1.1 christos .align 5 1034 1.1 christos SHA3_squeeze_cext: 1035 1.1 christos .inst 0xd503233f // paciasp 1036 1.1 christos stp x29,x30,[sp,#-16]! 1037 1.1 christos add x29,sp,#0 1038 1.1 christos mov x9,x0 1039 1.1 christos mov x10,x3 1040 1.1 christos 1041 1.1 christos .Loop_squeeze_ce: 1042 1.1 christos ldr x4,[x9],#8 1043 1.1 christos cmp x2,#8 1044 1.1 christos blo .Lsqueeze_tail_ce 1045 1.1 christos #ifdef __AARCH64EB__ 1046 1.1 christos rev x4,x4 1047 1.1 christos #endif 1048 1.1 christos str x4,[x1],#8 1049 1.1 christos beq .Lsqueeze_done_ce 1050 1.1 christos 1051 1.1 christos sub x2,x2,#8 1052 1.1 christos subs x10,x10,#8 1053 1.1 christos bhi .Loop_squeeze_ce 1054 1.1 christos 1055 1.1 christos bl KeccakF1600_cext 1056 1.1 christos ldr x30,[sp,#8] 1057 1.1 christos mov x9,x0 1058 1.1 christos mov x10,x3 1059 1.1 christos b .Loop_squeeze_ce 1060 1.1 christos 1061 1.1 christos .align 4 1062 1.1 christos .Lsqueeze_tail_ce: 1063 1.1 christos strb w4,[x1],#1 1064 1.1 christos lsr x4,x4,#8 1065 1.1 christos subs x2,x2,#1 1066 1.1 christos beq .Lsqueeze_done_ce 1067 1.1 christos strb w4,[x1],#1 1068 1.1 christos lsr x4,x4,#8 1069 1.1 christos subs x2,x2,#1 1070 1.1 christos beq .Lsqueeze_done_ce 1071 1.1 christos strb w4,[x1],#1 1072 1.1 christos lsr x4,x4,#8 1073 1.1 christos subs x2,x2,#1 1074 1.1 christos beq .Lsqueeze_done_ce 1075 1.1 christos strb w4,[x1],#1 1076 1.1 christos lsr x4,x4,#8 1077 1.1 christos subs x2,x2,#1 1078 1.1 christos beq .Lsqueeze_done_ce 1079 1.1 christos strb w4,[x1],#1 1080 1.1 christos lsr x4,x4,#8 1081 1.1 christos subs x2,x2,#1 1082 1.1 christos beq .Lsqueeze_done_ce 1083 1.1 christos strb w4,[x1],#1 1084 1.1 christos lsr x4,x4,#8 1085 1.1 christos subs x2,x2,#1 1086 1.1 christos beq .Lsqueeze_done_ce 1087 1.1 christos strb w4,[x1],#1 1088 1.1 christos 1089 1.1 christos .Lsqueeze_done_ce: 1090 1.1 christos ldr x29,[sp],#16 1091 1.1 christos .inst 0xd50323bf // autiasp 1092 1.1 christos ret 1093 1.1 christos .size SHA3_squeeze_cext,.-SHA3_squeeze_cext 1094 1.1 christos .byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1095 1.1 christos .align 2 1096