1 1.1 christos #include "arm_asm.h" 2 1.1 christos // Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved. 3 1.1 christos // 4 1.1 christos // Licensed under the OpenSSL license (the "License"). You may not use 5 1.1 christos // this file except in compliance with the License. You can obtain a copy 6 1.1 christos // in the file LICENSE in the source distribution or at 7 1.1 christos // https://www.openssl.org/source/license.html 8 1.1 christos // 9 1.1 christos // ==================================================================== 10 1.1 christos // Written by Ben Avison <bavison (at) riscosopen.org> for the OpenSSL 11 1.1 christos // project. Rights for redistribution and usage in source and binary 12 1.1 christos // forms are granted according to the OpenSSL license. 13 1.1 christos // ==================================================================== 14 1.1 christos // 15 1.1 christos // This implementation is a translation of bsaes-armv7 for AArch64. 16 1.1 christos // No attempt has been made to carry across the build switches for 17 1.1 christos // kernel targets, since the Linux kernel crypto support has moved on 18 1.1 christos // from when it was based on OpenSSL. 19 1.1 christos 20 1.1 christos // A lot of hand-scheduling has been performed. Consequently, this code 21 1.1 christos // doesn't factor out neatly into macros in the same way that the 22 1.1 christos // AArch32 version did, and there is little to be gained by wrapping it 23 1.1 christos // up in Perl, and it is presented as pure assembly. 24 1.1 christos 25 1.1 christos 26 1.1 christos #include "crypto/arm_arch.h" 27 1.1 christos 28 1.1 christos .text 29 1.1 christos 30 1.1 christos 31 1.1 christos 32 1.1 christos 33 1.1 christos 34 1.1 christos .type _bsaes_decrypt8,%function 35 1.1 christos .align 4 36 1.1 christos // On entry: 37 1.1 christos // x9 -> key (previously expanded using _bsaes_key_convert) 38 1.1 christos // x10 = number of rounds 39 1.1 christos // v0-v7 input data 40 1.1 christos // On exit: 41 1.1 christos // x9-x11 corrupted 42 1.1 christos // other general-purpose registers preserved 43 1.1 christos // v0-v7 output data 44 1.1 christos // v11-v15 preserved 45 1.1 christos // other SIMD registers corrupted 46 1.1 christos _bsaes_decrypt8: 47 1.1 christos ldr q8, [x9], #16 48 1.1 christos adrp x11, .LM0ISR 49 1.1 christos add x11, x11, #:lo12:.LM0ISR 50 1.1 christos movi v9.16b, #0x55 51 1.1 christos ldr q10, [x11], #16 52 1.1 christos movi v16.16b, #0x33 53 1.1 christos movi v17.16b, #0x0f 54 1.1 christos sub x10, x10, #1 55 1.1 christos eor v0.16b, v0.16b, v8.16b 56 1.1 christos eor v1.16b, v1.16b, v8.16b 57 1.1 christos eor v2.16b, v2.16b, v8.16b 58 1.1 christos eor v4.16b, v4.16b, v8.16b 59 1.1 christos eor v3.16b, v3.16b, v8.16b 60 1.1 christos eor v5.16b, v5.16b, v8.16b 61 1.1 christos tbl v0.16b, {v0.16b}, v10.16b 62 1.1 christos tbl v1.16b, {v1.16b}, v10.16b 63 1.1 christos tbl v2.16b, {v2.16b}, v10.16b 64 1.1 christos tbl v4.16b, {v4.16b}, v10.16b 65 1.1 christos eor v6.16b, v6.16b, v8.16b 66 1.1 christos eor v7.16b, v7.16b, v8.16b 67 1.1 christos tbl v3.16b, {v3.16b}, v10.16b 68 1.1 christos tbl v5.16b, {v5.16b}, v10.16b 69 1.1 christos tbl v6.16b, {v6.16b}, v10.16b 70 1.1 christos ushr v8.2d, v0.2d, #1 71 1.1 christos tbl v7.16b, {v7.16b}, v10.16b 72 1.1 christos ushr v10.2d, v4.2d, #1 73 1.1 christos ushr v18.2d, v2.2d, #1 74 1.1 christos eor v8.16b, v8.16b, v1.16b 75 1.1 christos ushr v19.2d, v6.2d, #1 76 1.1 christos eor v10.16b, v10.16b, v5.16b 77 1.1 christos eor v18.16b, v18.16b, v3.16b 78 1.1 christos and v8.16b, v8.16b, v9.16b 79 1.1 christos eor v19.16b, v19.16b, v7.16b 80 1.1 christos and v10.16b, v10.16b, v9.16b 81 1.1 christos and v18.16b, v18.16b, v9.16b 82 1.1 christos eor v1.16b, v1.16b, v8.16b 83 1.1 christos shl v8.2d, v8.2d, #1 84 1.1 christos and v9.16b, v19.16b, v9.16b 85 1.1 christos eor v5.16b, v5.16b, v10.16b 86 1.1 christos shl v10.2d, v10.2d, #1 87 1.1 christos eor v3.16b, v3.16b, v18.16b 88 1.1 christos shl v18.2d, v18.2d, #1 89 1.1 christos eor v0.16b, v0.16b, v8.16b 90 1.1 christos shl v8.2d, v9.2d, #1 91 1.1 christos eor v7.16b, v7.16b, v9.16b 92 1.1 christos eor v4.16b, v4.16b, v10.16b 93 1.1 christos eor v2.16b, v2.16b, v18.16b 94 1.1 christos ushr v9.2d, v1.2d, #2 95 1.1 christos eor v6.16b, v6.16b, v8.16b 96 1.1 christos ushr v8.2d, v0.2d, #2 97 1.1 christos ushr v10.2d, v5.2d, #2 98 1.1 christos ushr v18.2d, v4.2d, #2 99 1.1 christos eor v9.16b, v9.16b, v3.16b 100 1.1 christos eor v8.16b, v8.16b, v2.16b 101 1.1 christos eor v10.16b, v10.16b, v7.16b 102 1.1 christos eor v18.16b, v18.16b, v6.16b 103 1.1 christos and v9.16b, v9.16b, v16.16b 104 1.1 christos and v8.16b, v8.16b, v16.16b 105 1.1 christos and v10.16b, v10.16b, v16.16b 106 1.1 christos and v16.16b, v18.16b, v16.16b 107 1.1 christos eor v3.16b, v3.16b, v9.16b 108 1.1 christos shl v9.2d, v9.2d, #2 109 1.1 christos eor v2.16b, v2.16b, v8.16b 110 1.1 christos shl v8.2d, v8.2d, #2 111 1.1 christos eor v7.16b, v7.16b, v10.16b 112 1.1 christos shl v10.2d, v10.2d, #2 113 1.1 christos eor v6.16b, v6.16b, v16.16b 114 1.1 christos shl v16.2d, v16.2d, #2 115 1.1 christos eor v1.16b, v1.16b, v9.16b 116 1.1 christos eor v0.16b, v0.16b, v8.16b 117 1.1 christos eor v5.16b, v5.16b, v10.16b 118 1.1 christos eor v4.16b, v4.16b, v16.16b 119 1.1 christos ushr v8.2d, v3.2d, #4 120 1.1 christos ushr v9.2d, v2.2d, #4 121 1.1 christos ushr v10.2d, v1.2d, #4 122 1.1 christos ushr v16.2d, v0.2d, #4 123 1.1 christos eor v8.16b, v8.16b, v7.16b 124 1.1 christos eor v9.16b, v9.16b, v6.16b 125 1.1 christos eor v10.16b, v10.16b, v5.16b 126 1.1 christos eor v16.16b, v16.16b, v4.16b 127 1.1 christos and v8.16b, v8.16b, v17.16b 128 1.1 christos and v9.16b, v9.16b, v17.16b 129 1.1 christos and v10.16b, v10.16b, v17.16b 130 1.1 christos and v16.16b, v16.16b, v17.16b 131 1.1 christos eor v7.16b, v7.16b, v8.16b 132 1.1 christos shl v8.2d, v8.2d, #4 133 1.1 christos eor v6.16b, v6.16b, v9.16b 134 1.1 christos shl v9.2d, v9.2d, #4 135 1.1 christos eor v5.16b, v5.16b, v10.16b 136 1.1 christos shl v10.2d, v10.2d, #4 137 1.1 christos eor v4.16b, v4.16b, v16.16b 138 1.1 christos shl v16.2d, v16.2d, #4 139 1.1 christos eor v3.16b, v3.16b, v8.16b 140 1.1 christos eor v2.16b, v2.16b, v9.16b 141 1.1 christos eor v1.16b, v1.16b, v10.16b 142 1.1 christos eor v0.16b, v0.16b, v16.16b 143 1.1 christos b .Ldec_sbox 144 1.1 christos .align 4 145 1.1 christos .Ldec_loop: 146 1.1 christos ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64 147 1.1 christos ldp q8, q9, [x9], #32 148 1.1 christos eor v0.16b, v16.16b, v0.16b 149 1.1 christos ldr q10, [x9], #16 150 1.1 christos eor v1.16b, v17.16b, v1.16b 151 1.1 christos ldr q16, [x9], #16 152 1.1 christos eor v2.16b, v18.16b, v2.16b 153 1.1 christos eor v3.16b, v19.16b, v3.16b 154 1.1 christos eor v4.16b, v8.16b, v4.16b 155 1.1 christos eor v5.16b, v9.16b, v5.16b 156 1.1 christos eor v6.16b, v10.16b, v6.16b 157 1.1 christos eor v7.16b, v16.16b, v7.16b 158 1.1 christos tbl v0.16b, {v0.16b}, v28.16b 159 1.1 christos tbl v1.16b, {v1.16b}, v28.16b 160 1.1 christos tbl v2.16b, {v2.16b}, v28.16b 161 1.1 christos tbl v3.16b, {v3.16b}, v28.16b 162 1.1 christos tbl v4.16b, {v4.16b}, v28.16b 163 1.1 christos tbl v5.16b, {v5.16b}, v28.16b 164 1.1 christos tbl v6.16b, {v6.16b}, v28.16b 165 1.1 christos tbl v7.16b, {v7.16b}, v28.16b 166 1.1 christos .Ldec_sbox: 167 1.1 christos eor v1.16b, v1.16b, v4.16b 168 1.1 christos eor v3.16b, v3.16b, v4.16b 169 1.1 christos subs x10, x10, #1 170 1.1 christos eor v4.16b, v4.16b, v7.16b 171 1.1 christos eor v2.16b, v2.16b, v7.16b 172 1.1 christos eor v1.16b, v1.16b, v6.16b 173 1.1 christos eor v6.16b, v6.16b, v4.16b 174 1.1 christos eor v2.16b, v2.16b, v5.16b 175 1.1 christos eor v0.16b, v0.16b, v1.16b 176 1.1 christos eor v7.16b, v7.16b, v6.16b 177 1.1 christos eor v8.16b, v6.16b, v2.16b 178 1.1 christos and v9.16b, v4.16b, v6.16b 179 1.1 christos eor v10.16b, v2.16b, v6.16b 180 1.1 christos eor v3.16b, v3.16b, v0.16b 181 1.1 christos eor v5.16b, v5.16b, v0.16b 182 1.1 christos eor v16.16b, v7.16b, v4.16b 183 1.1 christos eor v17.16b, v4.16b, v0.16b 184 1.1 christos and v18.16b, v0.16b, v2.16b 185 1.1 christos eor v19.16b, v7.16b, v4.16b 186 1.1 christos eor v1.16b, v1.16b, v3.16b 187 1.1 christos eor v20.16b, v3.16b, v0.16b 188 1.1 christos eor v21.16b, v5.16b, v2.16b 189 1.1 christos eor v22.16b, v3.16b, v7.16b 190 1.1 christos and v8.16b, v17.16b, v8.16b 191 1.1 christos orr v17.16b, v3.16b, v5.16b 192 1.1 christos eor v23.16b, v1.16b, v6.16b 193 1.1 christos eor v24.16b, v20.16b, v16.16b 194 1.1 christos eor v25.16b, v1.16b, v5.16b 195 1.1 christos orr v26.16b, v20.16b, v21.16b 196 1.1 christos and v20.16b, v20.16b, v21.16b 197 1.1 christos and v27.16b, v7.16b, v1.16b 198 1.1 christos eor v21.16b, v21.16b, v23.16b 199 1.1 christos orr v28.16b, v16.16b, v23.16b 200 1.1 christos orr v29.16b, v22.16b, v25.16b 201 1.1 christos eor v26.16b, v26.16b, v8.16b 202 1.1 christos and v16.16b, v16.16b, v23.16b 203 1.1 christos and v22.16b, v22.16b, v25.16b 204 1.1 christos and v21.16b, v24.16b, v21.16b 205 1.1 christos eor v8.16b, v28.16b, v8.16b 206 1.1 christos eor v23.16b, v5.16b, v2.16b 207 1.1 christos eor v24.16b, v1.16b, v6.16b 208 1.1 christos eor v16.16b, v16.16b, v22.16b 209 1.1 christos eor v22.16b, v3.16b, v0.16b 210 1.1 christos eor v25.16b, v29.16b, v21.16b 211 1.1 christos eor v21.16b, v26.16b, v21.16b 212 1.1 christos eor v8.16b, v8.16b, v20.16b 213 1.1 christos eor v26.16b, v23.16b, v24.16b 214 1.1 christos eor v16.16b, v16.16b, v20.16b 215 1.1 christos eor v28.16b, v22.16b, v19.16b 216 1.1 christos eor v20.16b, v25.16b, v20.16b 217 1.1 christos eor v9.16b, v21.16b, v9.16b 218 1.1 christos eor v8.16b, v8.16b, v18.16b 219 1.1 christos eor v18.16b, v5.16b, v1.16b 220 1.1 christos eor v21.16b, v16.16b, v17.16b 221 1.1 christos eor v16.16b, v16.16b, v17.16b 222 1.1 christos eor v17.16b, v20.16b, v27.16b 223 1.1 christos eor v20.16b, v3.16b, v7.16b 224 1.1 christos eor v25.16b, v9.16b, v8.16b 225 1.1 christos eor v27.16b, v0.16b, v4.16b 226 1.1 christos and v29.16b, v9.16b, v17.16b 227 1.1 christos eor v30.16b, v8.16b, v29.16b 228 1.1 christos eor v31.16b, v21.16b, v29.16b 229 1.1 christos eor v29.16b, v21.16b, v29.16b 230 1.1 christos bsl v30.16b, v17.16b, v21.16b 231 1.1 christos bsl v31.16b, v9.16b, v8.16b 232 1.1 christos bsl v16.16b, v30.16b, v29.16b 233 1.1 christos bsl v21.16b, v29.16b, v30.16b 234 1.1 christos eor v8.16b, v31.16b, v30.16b 235 1.1 christos and v1.16b, v1.16b, v31.16b 236 1.1 christos and v9.16b, v16.16b, v31.16b 237 1.1 christos and v6.16b, v6.16b, v30.16b 238 1.1 christos eor v16.16b, v17.16b, v21.16b 239 1.1 christos and v4.16b, v4.16b, v30.16b 240 1.1 christos eor v17.16b, v8.16b, v30.16b 241 1.1 christos and v21.16b, v24.16b, v8.16b 242 1.1 christos eor v9.16b, v9.16b, v25.16b 243 1.1 christos and v19.16b, v19.16b, v8.16b 244 1.1 christos eor v24.16b, v30.16b, v16.16b 245 1.1 christos eor v25.16b, v30.16b, v16.16b 246 1.1 christos and v7.16b, v7.16b, v17.16b 247 1.1 christos and v10.16b, v10.16b, v16.16b 248 1.1 christos eor v29.16b, v9.16b, v16.16b 249 1.1 christos eor v30.16b, v31.16b, v9.16b 250 1.1 christos and v0.16b, v24.16b, v0.16b 251 1.1 christos and v9.16b, v18.16b, v9.16b 252 1.1 christos and v2.16b, v25.16b, v2.16b 253 1.1 christos eor v10.16b, v10.16b, v6.16b 254 1.1 christos eor v18.16b, v29.16b, v16.16b 255 1.1 christos and v5.16b, v30.16b, v5.16b 256 1.1 christos eor v24.16b, v8.16b, v29.16b 257 1.1 christos and v25.16b, v26.16b, v29.16b 258 1.1 christos and v26.16b, v28.16b, v29.16b 259 1.1 christos eor v8.16b, v8.16b, v29.16b 260 1.1 christos eor v17.16b, v17.16b, v18.16b 261 1.1 christos eor v5.16b, v1.16b, v5.16b 262 1.1 christos and v23.16b, v24.16b, v23.16b 263 1.1 christos eor v21.16b, v21.16b, v25.16b 264 1.1 christos eor v19.16b, v19.16b, v26.16b 265 1.1 christos eor v0.16b, v4.16b, v0.16b 266 1.1 christos and v3.16b, v17.16b, v3.16b 267 1.1 christos eor v1.16b, v9.16b, v1.16b 268 1.1 christos eor v9.16b, v25.16b, v23.16b 269 1.1 christos eor v5.16b, v5.16b, v21.16b 270 1.1 christos eor v2.16b, v6.16b, v2.16b 271 1.1 christos and v6.16b, v8.16b, v22.16b 272 1.1 christos eor v3.16b, v7.16b, v3.16b 273 1.1 christos and v8.16b, v20.16b, v18.16b 274 1.1 christos eor v10.16b, v10.16b, v9.16b 275 1.1 christos eor v0.16b, v0.16b, v19.16b 276 1.1 christos eor v9.16b, v1.16b, v9.16b 277 1.1 christos eor v1.16b, v2.16b, v21.16b 278 1.1 christos eor v3.16b, v3.16b, v19.16b 279 1.1 christos and v16.16b, v27.16b, v16.16b 280 1.1 christos eor v17.16b, v26.16b, v6.16b 281 1.1 christos eor v6.16b, v8.16b, v7.16b 282 1.1 christos eor v7.16b, v1.16b, v9.16b 283 1.1 christos eor v1.16b, v5.16b, v3.16b 284 1.1 christos eor v2.16b, v10.16b, v3.16b 285 1.1 christos eor v4.16b, v16.16b, v4.16b 286 1.1 christos eor v8.16b, v6.16b, v17.16b 287 1.1 christos eor v5.16b, v9.16b, v3.16b 288 1.1 christos eor v9.16b, v0.16b, v1.16b 289 1.1 christos eor v6.16b, v7.16b, v1.16b 290 1.1 christos eor v0.16b, v4.16b, v17.16b 291 1.1 christos eor v4.16b, v8.16b, v7.16b 292 1.1 christos eor v7.16b, v9.16b, v2.16b 293 1.1 christos eor v8.16b, v3.16b, v0.16b 294 1.1 christos eor v7.16b, v7.16b, v5.16b 295 1.1 christos eor v3.16b, v4.16b, v7.16b 296 1.1 christos eor v4.16b, v7.16b, v0.16b 297 1.1 christos eor v7.16b, v8.16b, v3.16b 298 1.1 christos bcc .Ldec_done 299 1.1 christos ext v8.16b, v0.16b, v0.16b, #8 300 1.1 christos ext v9.16b, v1.16b, v1.16b, #8 301 1.1 christos ldr q28, [x11] // load from .LISR in common case (x10 > 0) 302 1.1 christos ext v10.16b, v6.16b, v6.16b, #8 303 1.1 christos ext v16.16b, v3.16b, v3.16b, #8 304 1.1 christos ext v17.16b, v5.16b, v5.16b, #8 305 1.1 christos ext v18.16b, v4.16b, v4.16b, #8 306 1.1 christos eor v8.16b, v8.16b, v0.16b 307 1.1 christos eor v9.16b, v9.16b, v1.16b 308 1.1 christos eor v10.16b, v10.16b, v6.16b 309 1.1 christos eor v16.16b, v16.16b, v3.16b 310 1.1 christos eor v17.16b, v17.16b, v5.16b 311 1.1 christos ext v19.16b, v2.16b, v2.16b, #8 312 1.1 christos ext v20.16b, v7.16b, v7.16b, #8 313 1.1 christos eor v18.16b, v18.16b, v4.16b 314 1.1 christos eor v6.16b, v6.16b, v8.16b 315 1.1 christos eor v8.16b, v2.16b, v10.16b 316 1.1 christos eor v4.16b, v4.16b, v9.16b 317 1.1 christos eor v2.16b, v19.16b, v2.16b 318 1.1 christos eor v9.16b, v20.16b, v7.16b 319 1.1 christos eor v0.16b, v0.16b, v16.16b 320 1.1 christos eor v1.16b, v1.16b, v16.16b 321 1.1 christos eor v6.16b, v6.16b, v17.16b 322 1.1 christos eor v8.16b, v8.16b, v16.16b 323 1.1 christos eor v7.16b, v7.16b, v18.16b 324 1.1 christos eor v4.16b, v4.16b, v16.16b 325 1.1 christos eor v2.16b, v3.16b, v2.16b 326 1.1 christos eor v1.16b, v1.16b, v17.16b 327 1.1 christos eor v3.16b, v5.16b, v9.16b 328 1.1 christos eor v5.16b, v8.16b, v17.16b 329 1.1 christos eor v7.16b, v7.16b, v17.16b 330 1.1 christos ext v8.16b, v0.16b, v0.16b, #12 331 1.1 christos ext v9.16b, v6.16b, v6.16b, #12 332 1.1 christos ext v10.16b, v4.16b, v4.16b, #12 333 1.1 christos ext v16.16b, v1.16b, v1.16b, #12 334 1.1 christos ext v17.16b, v5.16b, v5.16b, #12 335 1.1 christos ext v18.16b, v7.16b, v7.16b, #12 336 1.1 christos eor v0.16b, v0.16b, v8.16b 337 1.1 christos eor v6.16b, v6.16b, v9.16b 338 1.1 christos eor v4.16b, v4.16b, v10.16b 339 1.1 christos ext v19.16b, v2.16b, v2.16b, #12 340 1.1 christos ext v20.16b, v3.16b, v3.16b, #12 341 1.1 christos eor v1.16b, v1.16b, v16.16b 342 1.1 christos eor v5.16b, v5.16b, v17.16b 343 1.1 christos eor v7.16b, v7.16b, v18.16b 344 1.1 christos eor v2.16b, v2.16b, v19.16b 345 1.1 christos eor v16.16b, v16.16b, v0.16b 346 1.1 christos eor v3.16b, v3.16b, v20.16b 347 1.1 christos eor v17.16b, v17.16b, v4.16b 348 1.1 christos eor v10.16b, v10.16b, v6.16b 349 1.1 christos ext v0.16b, v0.16b, v0.16b, #8 350 1.1 christos eor v9.16b, v9.16b, v1.16b 351 1.1 christos ext v1.16b, v1.16b, v1.16b, #8 352 1.1 christos eor v8.16b, v8.16b, v3.16b 353 1.1 christos eor v16.16b, v16.16b, v3.16b 354 1.1 christos eor v18.16b, v18.16b, v5.16b 355 1.1 christos eor v19.16b, v19.16b, v7.16b 356 1.1 christos ext v21.16b, v5.16b, v5.16b, #8 357 1.1 christos ext v5.16b, v7.16b, v7.16b, #8 358 1.1 christos eor v7.16b, v20.16b, v2.16b 359 1.1 christos ext v4.16b, v4.16b, v4.16b, #8 360 1.1 christos ext v20.16b, v3.16b, v3.16b, #8 361 1.1 christos eor v17.16b, v17.16b, v3.16b 362 1.1 christos ext v2.16b, v2.16b, v2.16b, #8 363 1.1 christos eor v3.16b, v10.16b, v3.16b 364 1.1 christos ext v10.16b, v6.16b, v6.16b, #8 365 1.1 christos eor v0.16b, v0.16b, v8.16b 366 1.1 christos eor v1.16b, v1.16b, v16.16b 367 1.1 christos eor v5.16b, v5.16b, v18.16b 368 1.1 christos eor v3.16b, v3.16b, v4.16b 369 1.1 christos eor v7.16b, v20.16b, v7.16b 370 1.1 christos eor v6.16b, v2.16b, v19.16b 371 1.1 christos eor v4.16b, v21.16b, v17.16b 372 1.1 christos eor v2.16b, v10.16b, v9.16b 373 1.1 christos bne .Ldec_loop 374 1.1 christos ldr q28, [x11, #16]! // load from .LISRM0 on last round (x10 == 0) 375 1.1 christos b .Ldec_loop 376 1.1 christos .align 4 377 1.1 christos .Ldec_done: 378 1.1 christos ushr v8.2d, v0.2d, #1 379 1.1 christos movi v9.16b, #0x55 380 1.1 christos ldr q10, [x9] 381 1.1 christos ushr v16.2d, v2.2d, #1 382 1.1 christos movi v17.16b, #0x33 383 1.1 christos ushr v18.2d, v6.2d, #1 384 1.1 christos movi v19.16b, #0x0f 385 1.1 christos eor v8.16b, v8.16b, v1.16b 386 1.1 christos ushr v20.2d, v3.2d, #1 387 1.1 christos eor v16.16b, v16.16b, v7.16b 388 1.1 christos eor v18.16b, v18.16b, v4.16b 389 1.1 christos and v8.16b, v8.16b, v9.16b 390 1.1 christos eor v20.16b, v20.16b, v5.16b 391 1.1 christos and v16.16b, v16.16b, v9.16b 392 1.1 christos and v18.16b, v18.16b, v9.16b 393 1.1 christos shl v21.2d, v8.2d, #1 394 1.1 christos eor v1.16b, v1.16b, v8.16b 395 1.1 christos and v8.16b, v20.16b, v9.16b 396 1.1 christos eor v7.16b, v7.16b, v16.16b 397 1.1 christos shl v9.2d, v16.2d, #1 398 1.1 christos eor v4.16b, v4.16b, v18.16b 399 1.1 christos shl v16.2d, v18.2d, #1 400 1.1 christos eor v0.16b, v0.16b, v21.16b 401 1.1 christos shl v18.2d, v8.2d, #1 402 1.1 christos eor v5.16b, v5.16b, v8.16b 403 1.1 christos eor v2.16b, v2.16b, v9.16b 404 1.1 christos eor v6.16b, v6.16b, v16.16b 405 1.1 christos ushr v8.2d, v1.2d, #2 406 1.1 christos eor v3.16b, v3.16b, v18.16b 407 1.1 christos ushr v9.2d, v0.2d, #2 408 1.1 christos ushr v16.2d, v7.2d, #2 409 1.1 christos ushr v18.2d, v2.2d, #2 410 1.1 christos eor v8.16b, v8.16b, v4.16b 411 1.1 christos eor v9.16b, v9.16b, v6.16b 412 1.1 christos eor v16.16b, v16.16b, v5.16b 413 1.1 christos eor v18.16b, v18.16b, v3.16b 414 1.1 christos and v8.16b, v8.16b, v17.16b 415 1.1 christos and v9.16b, v9.16b, v17.16b 416 1.1 christos and v16.16b, v16.16b, v17.16b 417 1.1 christos and v17.16b, v18.16b, v17.16b 418 1.1 christos eor v4.16b, v4.16b, v8.16b 419 1.1 christos shl v8.2d, v8.2d, #2 420 1.1 christos eor v6.16b, v6.16b, v9.16b 421 1.1 christos shl v9.2d, v9.2d, #2 422 1.1 christos eor v5.16b, v5.16b, v16.16b 423 1.1 christos shl v16.2d, v16.2d, #2 424 1.1 christos eor v3.16b, v3.16b, v17.16b 425 1.1 christos shl v17.2d, v17.2d, #2 426 1.1 christos eor v1.16b, v1.16b, v8.16b 427 1.1 christos eor v0.16b, v0.16b, v9.16b 428 1.1 christos eor v7.16b, v7.16b, v16.16b 429 1.1 christos eor v2.16b, v2.16b, v17.16b 430 1.1 christos ushr v8.2d, v4.2d, #4 431 1.1 christos ushr v9.2d, v6.2d, #4 432 1.1 christos ushr v16.2d, v1.2d, #4 433 1.1 christos ushr v17.2d, v0.2d, #4 434 1.1 christos eor v8.16b, v8.16b, v5.16b 435 1.1 christos eor v9.16b, v9.16b, v3.16b 436 1.1 christos eor v16.16b, v16.16b, v7.16b 437 1.1 christos eor v17.16b, v17.16b, v2.16b 438 1.1 christos and v8.16b, v8.16b, v19.16b 439 1.1 christos and v9.16b, v9.16b, v19.16b 440 1.1 christos and v16.16b, v16.16b, v19.16b 441 1.1 christos and v17.16b, v17.16b, v19.16b 442 1.1 christos eor v5.16b, v5.16b, v8.16b 443 1.1 christos shl v8.2d, v8.2d, #4 444 1.1 christos eor v3.16b, v3.16b, v9.16b 445 1.1 christos shl v9.2d, v9.2d, #4 446 1.1 christos eor v7.16b, v7.16b, v16.16b 447 1.1 christos shl v16.2d, v16.2d, #4 448 1.1 christos eor v2.16b, v2.16b, v17.16b 449 1.1 christos shl v17.2d, v17.2d, #4 450 1.1 christos eor v4.16b, v4.16b, v8.16b 451 1.1 christos eor v6.16b, v6.16b, v9.16b 452 1.1 christos eor v7.16b, v7.16b, v10.16b 453 1.1 christos eor v1.16b, v1.16b, v16.16b 454 1.1 christos eor v2.16b, v2.16b, v10.16b 455 1.1 christos eor v0.16b, v0.16b, v17.16b 456 1.1 christos eor v4.16b, v4.16b, v10.16b 457 1.1 christos eor v6.16b, v6.16b, v10.16b 458 1.1 christos eor v3.16b, v3.16b, v10.16b 459 1.1 christos eor v5.16b, v5.16b, v10.16b 460 1.1 christos eor v1.16b, v1.16b, v10.16b 461 1.1 christos eor v0.16b, v0.16b, v10.16b 462 1.1 christos ret 463 1.1 christos .size _bsaes_decrypt8,.-_bsaes_decrypt8 464 1.1 christos 465 1.1 christos .section .rodata 466 1.1 christos .type _bsaes_consts,%object 467 1.1 christos .align 6 468 1.1 christos _bsaes_consts: 469 1.1 christos // InvShiftRows constants 470 1.1 christos // Used in _bsaes_decrypt8, which assumes contiguity 471 1.1 christos // .LM0ISR used with round 0 key 472 1.1 christos // .LISR used with middle round keys 473 1.1 christos // .LISRM0 used with final round key 474 1.1 christos .LM0ISR: 475 1.1 christos .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 476 1.1 christos .LISR: 477 1.1 christos .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 478 1.1 christos .LISRM0: 479 1.1 christos .quad 0x01040b0e0205080f, 0x0306090c00070a0d 480 1.1 christos 481 1.1 christos // ShiftRows constants 482 1.1 christos // Used in _bsaes_encrypt8, which assumes contiguity 483 1.1 christos // .LM0SR used with round 0 key 484 1.1 christos // .LSR used with middle round keys 485 1.1 christos // .LSRM0 used with final round key 486 1.1 christos .LM0SR: 487 1.1 christos .quad 0x0a0e02060f03070b, 0x0004080c05090d01 488 1.1 christos .LSR: 489 1.1 christos .quad 0x0504070600030201, 0x0f0e0d0c0a09080b 490 1.1 christos .LSRM0: 491 1.1 christos .quad 0x0304090e00050a0f, 0x01060b0c0207080d 492 1.1 christos 493 1.1 christos .LM0_bigendian: 494 1.1 christos .quad 0x02060a0e03070b0f, 0x0004080c0105090d 495 1.1 christos .LM0_littleendian: 496 1.1 christos .quad 0x0105090d0004080c, 0x03070b0f02060a0e 497 1.1 christos 498 1.1 christos // Used in ossl_bsaes_ctr32_encrypt_blocks, prior to dropping into 499 1.1 christos // _bsaes_encrypt8_alt, for round 0 key in place of .LM0SR 500 1.1 christos .LREVM0SR: 501 1.1 christos .quad 0x090d01050c000408, 0x03070b0f060a0e02 502 1.1 christos 503 1.1 christos .align 6 504 1.1 christos .size _bsaes_consts,.-_bsaes_consts 505 1.1 christos 506 1.1 christos .previous 507 1.1 christos 508 1.1 christos .type _bsaes_encrypt8,%function 509 1.1 christos .align 4 510 1.1 christos // On entry: 511 1.1 christos // x9 -> key (previously expanded using _bsaes_key_convert) 512 1.1 christos // x10 = number of rounds 513 1.1 christos // v0-v7 input data 514 1.1 christos // On exit: 515 1.1 christos // x9-x11 corrupted 516 1.1 christos // other general-purpose registers preserved 517 1.1 christos // v0-v7 output data 518 1.1 christos // v11-v15 preserved 519 1.1 christos // other SIMD registers corrupted 520 1.1 christos _bsaes_encrypt8: 521 1.1 christos ldr q8, [x9], #16 522 1.1 christos adrp x11, .LM0SR 523 1.1 christos add x11, x11, #:lo12:.LM0SR 524 1.1 christos ldr q9, [x11], #16 525 1.1 christos _bsaes_encrypt8_alt: 526 1.1 christos eor v0.16b, v0.16b, v8.16b 527 1.1 christos eor v1.16b, v1.16b, v8.16b 528 1.1 christos sub x10, x10, #1 529 1.1 christos eor v2.16b, v2.16b, v8.16b 530 1.1 christos eor v4.16b, v4.16b, v8.16b 531 1.1 christos eor v3.16b, v3.16b, v8.16b 532 1.1 christos eor v5.16b, v5.16b, v8.16b 533 1.1 christos tbl v0.16b, {v0.16b}, v9.16b 534 1.1 christos tbl v1.16b, {v1.16b}, v9.16b 535 1.1 christos tbl v2.16b, {v2.16b}, v9.16b 536 1.1 christos tbl v4.16b, {v4.16b}, v9.16b 537 1.1 christos eor v6.16b, v6.16b, v8.16b 538 1.1 christos eor v7.16b, v7.16b, v8.16b 539 1.1 christos tbl v3.16b, {v3.16b}, v9.16b 540 1.1 christos tbl v5.16b, {v5.16b}, v9.16b 541 1.1 christos tbl v6.16b, {v6.16b}, v9.16b 542 1.1 christos ushr v8.2d, v0.2d, #1 543 1.1 christos movi v10.16b, #0x55 544 1.1 christos tbl v7.16b, {v7.16b}, v9.16b 545 1.1 christos ushr v9.2d, v4.2d, #1 546 1.1 christos movi v16.16b, #0x33 547 1.1 christos ushr v17.2d, v2.2d, #1 548 1.1 christos eor v8.16b, v8.16b, v1.16b 549 1.1 christos movi v18.16b, #0x0f 550 1.1 christos ushr v19.2d, v6.2d, #1 551 1.1 christos eor v9.16b, v9.16b, v5.16b 552 1.1 christos eor v17.16b, v17.16b, v3.16b 553 1.1 christos and v8.16b, v8.16b, v10.16b 554 1.1 christos eor v19.16b, v19.16b, v7.16b 555 1.1 christos and v9.16b, v9.16b, v10.16b 556 1.1 christos and v17.16b, v17.16b, v10.16b 557 1.1 christos eor v1.16b, v1.16b, v8.16b 558 1.1 christos shl v8.2d, v8.2d, #1 559 1.1 christos and v10.16b, v19.16b, v10.16b 560 1.1 christos eor v5.16b, v5.16b, v9.16b 561 1.1 christos shl v9.2d, v9.2d, #1 562 1.1 christos eor v3.16b, v3.16b, v17.16b 563 1.1 christos shl v17.2d, v17.2d, #1 564 1.1 christos eor v0.16b, v0.16b, v8.16b 565 1.1 christos shl v8.2d, v10.2d, #1 566 1.1 christos eor v7.16b, v7.16b, v10.16b 567 1.1 christos eor v4.16b, v4.16b, v9.16b 568 1.1 christos eor v2.16b, v2.16b, v17.16b 569 1.1 christos ushr v9.2d, v1.2d, #2 570 1.1 christos eor v6.16b, v6.16b, v8.16b 571 1.1 christos ushr v8.2d, v0.2d, #2 572 1.1 christos ushr v10.2d, v5.2d, #2 573 1.1 christos ushr v17.2d, v4.2d, #2 574 1.1 christos eor v9.16b, v9.16b, v3.16b 575 1.1 christos eor v8.16b, v8.16b, v2.16b 576 1.1 christos eor v10.16b, v10.16b, v7.16b 577 1.1 christos eor v17.16b, v17.16b, v6.16b 578 1.1 christos and v9.16b, v9.16b, v16.16b 579 1.1 christos and v8.16b, v8.16b, v16.16b 580 1.1 christos and v10.16b, v10.16b, v16.16b 581 1.1 christos and v16.16b, v17.16b, v16.16b 582 1.1 christos eor v3.16b, v3.16b, v9.16b 583 1.1 christos shl v9.2d, v9.2d, #2 584 1.1 christos eor v2.16b, v2.16b, v8.16b 585 1.1 christos shl v8.2d, v8.2d, #2 586 1.1 christos eor v7.16b, v7.16b, v10.16b 587 1.1 christos shl v10.2d, v10.2d, #2 588 1.1 christos eor v6.16b, v6.16b, v16.16b 589 1.1 christos shl v16.2d, v16.2d, #2 590 1.1 christos eor v1.16b, v1.16b, v9.16b 591 1.1 christos eor v0.16b, v0.16b, v8.16b 592 1.1 christos eor v5.16b, v5.16b, v10.16b 593 1.1 christos eor v4.16b, v4.16b, v16.16b 594 1.1 christos ushr v8.2d, v3.2d, #4 595 1.1 christos ushr v9.2d, v2.2d, #4 596 1.1 christos ushr v10.2d, v1.2d, #4 597 1.1 christos ushr v16.2d, v0.2d, #4 598 1.1 christos eor v8.16b, v8.16b, v7.16b 599 1.1 christos eor v9.16b, v9.16b, v6.16b 600 1.1 christos eor v10.16b, v10.16b, v5.16b 601 1.1 christos eor v16.16b, v16.16b, v4.16b 602 1.1 christos and v8.16b, v8.16b, v18.16b 603 1.1 christos and v9.16b, v9.16b, v18.16b 604 1.1 christos and v10.16b, v10.16b, v18.16b 605 1.1 christos and v16.16b, v16.16b, v18.16b 606 1.1 christos eor v7.16b, v7.16b, v8.16b 607 1.1 christos shl v8.2d, v8.2d, #4 608 1.1 christos eor v6.16b, v6.16b, v9.16b 609 1.1 christos shl v9.2d, v9.2d, #4 610 1.1 christos eor v5.16b, v5.16b, v10.16b 611 1.1 christos shl v10.2d, v10.2d, #4 612 1.1 christos eor v4.16b, v4.16b, v16.16b 613 1.1 christos shl v16.2d, v16.2d, #4 614 1.1 christos eor v3.16b, v3.16b, v8.16b 615 1.1 christos eor v2.16b, v2.16b, v9.16b 616 1.1 christos eor v1.16b, v1.16b, v10.16b 617 1.1 christos eor v0.16b, v0.16b, v16.16b 618 1.1 christos b .Lenc_sbox 619 1.1 christos .align 4 620 1.1 christos .Lenc_loop: 621 1.1 christos ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64 622 1.1 christos ldp q8, q9, [x9], #32 623 1.1 christos eor v0.16b, v16.16b, v0.16b 624 1.1 christos ldr q10, [x9], #16 625 1.1 christos eor v1.16b, v17.16b, v1.16b 626 1.1 christos ldr q16, [x9], #16 627 1.1 christos eor v2.16b, v18.16b, v2.16b 628 1.1 christos eor v3.16b, v19.16b, v3.16b 629 1.1 christos eor v4.16b, v8.16b, v4.16b 630 1.1 christos eor v5.16b, v9.16b, v5.16b 631 1.1 christos eor v6.16b, v10.16b, v6.16b 632 1.1 christos eor v7.16b, v16.16b, v7.16b 633 1.1 christos tbl v0.16b, {v0.16b}, v28.16b 634 1.1 christos tbl v1.16b, {v1.16b}, v28.16b 635 1.1 christos tbl v2.16b, {v2.16b}, v28.16b 636 1.1 christos tbl v3.16b, {v3.16b}, v28.16b 637 1.1 christos tbl v4.16b, {v4.16b}, v28.16b 638 1.1 christos tbl v5.16b, {v5.16b}, v28.16b 639 1.1 christos tbl v6.16b, {v6.16b}, v28.16b 640 1.1 christos tbl v7.16b, {v7.16b}, v28.16b 641 1.1 christos .Lenc_sbox: 642 1.1 christos eor v5.16b, v5.16b, v6.16b 643 1.1 christos eor v3.16b, v3.16b, v0.16b 644 1.1 christos subs x10, x10, #1 645 1.1 christos eor v2.16b, v2.16b, v1.16b 646 1.1 christos eor v5.16b, v5.16b, v0.16b 647 1.1 christos eor v8.16b, v3.16b, v7.16b 648 1.1 christos eor v6.16b, v6.16b, v2.16b 649 1.1 christos eor v7.16b, v7.16b, v5.16b 650 1.1 christos eor v8.16b, v8.16b, v4.16b 651 1.1 christos eor v3.16b, v6.16b, v3.16b 652 1.1 christos eor v4.16b, v4.16b, v5.16b 653 1.1 christos eor v6.16b, v1.16b, v5.16b 654 1.1 christos eor v2.16b, v2.16b, v7.16b 655 1.1 christos eor v1.16b, v8.16b, v1.16b 656 1.1 christos eor v8.16b, v7.16b, v4.16b 657 1.1 christos eor v9.16b, v3.16b, v0.16b 658 1.1 christos eor v10.16b, v7.16b, v6.16b 659 1.1 christos eor v16.16b, v5.16b, v3.16b 660 1.1 christos eor v17.16b, v6.16b, v2.16b 661 1.1 christos eor v18.16b, v5.16b, v1.16b 662 1.1 christos eor v19.16b, v2.16b, v4.16b 663 1.1 christos eor v20.16b, v1.16b, v0.16b 664 1.1 christos orr v21.16b, v8.16b, v9.16b 665 1.1 christos orr v22.16b, v10.16b, v16.16b 666 1.1 christos eor v23.16b, v8.16b, v17.16b 667 1.1 christos eor v24.16b, v9.16b, v18.16b 668 1.1 christos and v19.16b, v19.16b, v20.16b 669 1.1 christos orr v20.16b, v17.16b, v18.16b 670 1.1 christos and v8.16b, v8.16b, v9.16b 671 1.1 christos and v9.16b, v17.16b, v18.16b 672 1.1 christos and v17.16b, v23.16b, v24.16b 673 1.1 christos and v10.16b, v10.16b, v16.16b 674 1.1 christos eor v16.16b, v21.16b, v19.16b 675 1.1 christos eor v18.16b, v20.16b, v19.16b 676 1.1 christos and v19.16b, v2.16b, v1.16b 677 1.1 christos and v20.16b, v6.16b, v5.16b 678 1.1 christos eor v21.16b, v22.16b, v17.16b 679 1.1 christos eor v9.16b, v9.16b, v10.16b 680 1.1 christos eor v10.16b, v16.16b, v17.16b 681 1.1 christos eor v16.16b, v18.16b, v8.16b 682 1.1 christos and v17.16b, v4.16b, v0.16b 683 1.1 christos orr v18.16b, v7.16b, v3.16b 684 1.1 christos eor v21.16b, v21.16b, v8.16b 685 1.1 christos eor v8.16b, v9.16b, v8.16b 686 1.1 christos eor v9.16b, v10.16b, v19.16b 687 1.1 christos eor v10.16b, v3.16b, v0.16b 688 1.1 christos eor v16.16b, v16.16b, v17.16b 689 1.1 christos eor v17.16b, v5.16b, v1.16b 690 1.1 christos eor v19.16b, v21.16b, v20.16b 691 1.1 christos eor v20.16b, v8.16b, v18.16b 692 1.1 christos eor v8.16b, v8.16b, v18.16b 693 1.1 christos eor v18.16b, v7.16b, v4.16b 694 1.1 christos eor v21.16b, v9.16b, v16.16b 695 1.1 christos eor v22.16b, v6.16b, v2.16b 696 1.1 christos and v23.16b, v9.16b, v19.16b 697 1.1 christos eor v24.16b, v10.16b, v17.16b 698 1.1 christos eor v25.16b, v0.16b, v1.16b 699 1.1 christos eor v26.16b, v7.16b, v6.16b 700 1.1 christos eor v27.16b, v18.16b, v22.16b 701 1.1 christos eor v28.16b, v3.16b, v5.16b 702 1.1 christos eor v29.16b, v16.16b, v23.16b 703 1.1 christos eor v30.16b, v20.16b, v23.16b 704 1.1 christos eor v23.16b, v20.16b, v23.16b 705 1.1 christos eor v31.16b, v4.16b, v2.16b 706 1.1 christos bsl v29.16b, v19.16b, v20.16b 707 1.1 christos bsl v30.16b, v9.16b, v16.16b 708 1.1 christos bsl v8.16b, v29.16b, v23.16b 709 1.1 christos bsl v20.16b, v23.16b, v29.16b 710 1.1 christos eor v9.16b, v30.16b, v29.16b 711 1.1 christos and v5.16b, v5.16b, v30.16b 712 1.1 christos and v8.16b, v8.16b, v30.16b 713 1.1 christos and v1.16b, v1.16b, v29.16b 714 1.1 christos eor v16.16b, v19.16b, v20.16b 715 1.1 christos and v2.16b, v2.16b, v29.16b 716 1.1 christos eor v19.16b, v9.16b, v29.16b 717 1.1 christos and v17.16b, v17.16b, v9.16b 718 1.1 christos eor v8.16b, v8.16b, v21.16b 719 1.1 christos and v20.16b, v22.16b, v9.16b 720 1.1 christos eor v21.16b, v29.16b, v16.16b 721 1.1 christos eor v22.16b, v29.16b, v16.16b 722 1.1 christos and v23.16b, v25.16b, v16.16b 723 1.1 christos and v6.16b, v6.16b, v19.16b 724 1.1 christos eor v25.16b, v8.16b, v16.16b 725 1.1 christos eor v29.16b, v30.16b, v8.16b 726 1.1 christos and v4.16b, v21.16b, v4.16b 727 1.1 christos and v8.16b, v28.16b, v8.16b 728 1.1 christos and v0.16b, v22.16b, v0.16b 729 1.1 christos eor v21.16b, v23.16b, v1.16b 730 1.1 christos eor v22.16b, v9.16b, v25.16b 731 1.1 christos eor v9.16b, v9.16b, v25.16b 732 1.1 christos eor v23.16b, v25.16b, v16.16b 733 1.1 christos and v3.16b, v29.16b, v3.16b 734 1.1 christos and v24.16b, v24.16b, v25.16b 735 1.1 christos and v25.16b, v27.16b, v25.16b 736 1.1 christos and v10.16b, v22.16b, v10.16b 737 1.1 christos and v9.16b, v9.16b, v18.16b 738 1.1 christos eor v18.16b, v19.16b, v23.16b 739 1.1 christos and v19.16b, v26.16b, v23.16b 740 1.1 christos eor v3.16b, v5.16b, v3.16b 741 1.1 christos eor v17.16b, v17.16b, v24.16b 742 1.1 christos eor v10.16b, v24.16b, v10.16b 743 1.1 christos and v16.16b, v31.16b, v16.16b 744 1.1 christos eor v20.16b, v20.16b, v25.16b 745 1.1 christos eor v9.16b, v25.16b, v9.16b 746 1.1 christos eor v4.16b, v2.16b, v4.16b 747 1.1 christos and v7.16b, v18.16b, v7.16b 748 1.1 christos eor v18.16b, v19.16b, v6.16b 749 1.1 christos eor v5.16b, v8.16b, v5.16b 750 1.1 christos eor v0.16b, v1.16b, v0.16b 751 1.1 christos eor v1.16b, v21.16b, v10.16b 752 1.1 christos eor v8.16b, v3.16b, v17.16b 753 1.1 christos eor v2.16b, v16.16b, v2.16b 754 1.1 christos eor v3.16b, v6.16b, v7.16b 755 1.1 christos eor v6.16b, v18.16b, v9.16b 756 1.1 christos eor v4.16b, v4.16b, v20.16b 757 1.1 christos eor v10.16b, v5.16b, v10.16b 758 1.1 christos eor v0.16b, v0.16b, v17.16b 759 1.1 christos eor v9.16b, v2.16b, v9.16b 760 1.1 christos eor v3.16b, v3.16b, v20.16b 761 1.1 christos eor v7.16b, v6.16b, v1.16b 762 1.1 christos eor v5.16b, v8.16b, v4.16b 763 1.1 christos eor v6.16b, v10.16b, v1.16b 764 1.1 christos eor v2.16b, v4.16b, v0.16b 765 1.1 christos eor v4.16b, v3.16b, v10.16b 766 1.1 christos eor v9.16b, v9.16b, v7.16b 767 1.1 christos eor v3.16b, v0.16b, v5.16b 768 1.1 christos eor v0.16b, v1.16b, v4.16b 769 1.1 christos eor v1.16b, v4.16b, v8.16b 770 1.1 christos eor v4.16b, v9.16b, v5.16b 771 1.1 christos eor v6.16b, v6.16b, v3.16b 772 1.1 christos bcc .Lenc_done 773 1.1 christos ext v8.16b, v0.16b, v0.16b, #12 774 1.1 christos ext v9.16b, v4.16b, v4.16b, #12 775 1.1 christos ldr q28, [x11] 776 1.1 christos ext v10.16b, v6.16b, v6.16b, #12 777 1.1 christos ext v16.16b, v1.16b, v1.16b, #12 778 1.1 christos ext v17.16b, v3.16b, v3.16b, #12 779 1.1 christos ext v18.16b, v7.16b, v7.16b, #12 780 1.1 christos eor v0.16b, v0.16b, v8.16b 781 1.1 christos eor v4.16b, v4.16b, v9.16b 782 1.1 christos eor v6.16b, v6.16b, v10.16b 783 1.1 christos ext v19.16b, v2.16b, v2.16b, #12 784 1.1 christos ext v20.16b, v5.16b, v5.16b, #12 785 1.1 christos eor v1.16b, v1.16b, v16.16b 786 1.1 christos eor v3.16b, v3.16b, v17.16b 787 1.1 christos eor v7.16b, v7.16b, v18.16b 788 1.1 christos eor v2.16b, v2.16b, v19.16b 789 1.1 christos eor v16.16b, v16.16b, v0.16b 790 1.1 christos eor v5.16b, v5.16b, v20.16b 791 1.1 christos eor v17.16b, v17.16b, v6.16b 792 1.1 christos eor v10.16b, v10.16b, v4.16b 793 1.1 christos ext v0.16b, v0.16b, v0.16b, #8 794 1.1 christos eor v9.16b, v9.16b, v1.16b 795 1.1 christos ext v1.16b, v1.16b, v1.16b, #8 796 1.1 christos eor v8.16b, v8.16b, v5.16b 797 1.1 christos eor v16.16b, v16.16b, v5.16b 798 1.1 christos eor v18.16b, v18.16b, v3.16b 799 1.1 christos eor v19.16b, v19.16b, v7.16b 800 1.1 christos ext v3.16b, v3.16b, v3.16b, #8 801 1.1 christos ext v7.16b, v7.16b, v7.16b, #8 802 1.1 christos eor v20.16b, v20.16b, v2.16b 803 1.1 christos ext v6.16b, v6.16b, v6.16b, #8 804 1.1 christos ext v21.16b, v5.16b, v5.16b, #8 805 1.1 christos eor v17.16b, v17.16b, v5.16b 806 1.1 christos ext v2.16b, v2.16b, v2.16b, #8 807 1.1 christos eor v10.16b, v10.16b, v5.16b 808 1.1 christos ext v22.16b, v4.16b, v4.16b, #8 809 1.1 christos eor v0.16b, v0.16b, v8.16b 810 1.1 christos eor v1.16b, v1.16b, v16.16b 811 1.1 christos eor v5.16b, v7.16b, v18.16b 812 1.1 christos eor v4.16b, v3.16b, v17.16b 813 1.1 christos eor v3.16b, v6.16b, v10.16b 814 1.1 christos eor v7.16b, v21.16b, v20.16b 815 1.1 christos eor v6.16b, v2.16b, v19.16b 816 1.1 christos eor v2.16b, v22.16b, v9.16b 817 1.1 christos bne .Lenc_loop 818 1.1 christos ldr q28, [x11, #16]! // load from .LSRM0 on last round (x10 == 0) 819 1.1 christos b .Lenc_loop 820 1.1 christos .align 4 821 1.1 christos .Lenc_done: 822 1.1 christos ushr v8.2d, v0.2d, #1 823 1.1 christos movi v9.16b, #0x55 824 1.1 christos ldr q10, [x9] 825 1.1 christos ushr v16.2d, v3.2d, #1 826 1.1 christos movi v17.16b, #0x33 827 1.1 christos ushr v18.2d, v4.2d, #1 828 1.1 christos movi v19.16b, #0x0f 829 1.1 christos eor v8.16b, v8.16b, v1.16b 830 1.1 christos ushr v20.2d, v2.2d, #1 831 1.1 christos eor v16.16b, v16.16b, v7.16b 832 1.1 christos eor v18.16b, v18.16b, v6.16b 833 1.1 christos and v8.16b, v8.16b, v9.16b 834 1.1 christos eor v20.16b, v20.16b, v5.16b 835 1.1 christos and v16.16b, v16.16b, v9.16b 836 1.1 christos and v18.16b, v18.16b, v9.16b 837 1.1 christos shl v21.2d, v8.2d, #1 838 1.1 christos eor v1.16b, v1.16b, v8.16b 839 1.1 christos and v8.16b, v20.16b, v9.16b 840 1.1 christos eor v7.16b, v7.16b, v16.16b 841 1.1 christos shl v9.2d, v16.2d, #1 842 1.1 christos eor v6.16b, v6.16b, v18.16b 843 1.1 christos shl v16.2d, v18.2d, #1 844 1.1 christos eor v0.16b, v0.16b, v21.16b 845 1.1 christos shl v18.2d, v8.2d, #1 846 1.1 christos eor v5.16b, v5.16b, v8.16b 847 1.1 christos eor v3.16b, v3.16b, v9.16b 848 1.1 christos eor v4.16b, v4.16b, v16.16b 849 1.1 christos ushr v8.2d, v1.2d, #2 850 1.1 christos eor v2.16b, v2.16b, v18.16b 851 1.1 christos ushr v9.2d, v0.2d, #2 852 1.1 christos ushr v16.2d, v7.2d, #2 853 1.1 christos ushr v18.2d, v3.2d, #2 854 1.1 christos eor v8.16b, v8.16b, v6.16b 855 1.1 christos eor v9.16b, v9.16b, v4.16b 856 1.1 christos eor v16.16b, v16.16b, v5.16b 857 1.1 christos eor v18.16b, v18.16b, v2.16b 858 1.1 christos and v8.16b, v8.16b, v17.16b 859 1.1 christos and v9.16b, v9.16b, v17.16b 860 1.1 christos and v16.16b, v16.16b, v17.16b 861 1.1 christos and v17.16b, v18.16b, v17.16b 862 1.1 christos eor v6.16b, v6.16b, v8.16b 863 1.1 christos shl v8.2d, v8.2d, #2 864 1.1 christos eor v4.16b, v4.16b, v9.16b 865 1.1 christos shl v9.2d, v9.2d, #2 866 1.1 christos eor v5.16b, v5.16b, v16.16b 867 1.1 christos shl v16.2d, v16.2d, #2 868 1.1 christos eor v2.16b, v2.16b, v17.16b 869 1.1 christos shl v17.2d, v17.2d, #2 870 1.1 christos eor v1.16b, v1.16b, v8.16b 871 1.1 christos eor v0.16b, v0.16b, v9.16b 872 1.1 christos eor v7.16b, v7.16b, v16.16b 873 1.1 christos eor v3.16b, v3.16b, v17.16b 874 1.1 christos ushr v8.2d, v6.2d, #4 875 1.1 christos ushr v9.2d, v4.2d, #4 876 1.1 christos ushr v16.2d, v1.2d, #4 877 1.1 christos ushr v17.2d, v0.2d, #4 878 1.1 christos eor v8.16b, v8.16b, v5.16b 879 1.1 christos eor v9.16b, v9.16b, v2.16b 880 1.1 christos eor v16.16b, v16.16b, v7.16b 881 1.1 christos eor v17.16b, v17.16b, v3.16b 882 1.1 christos and v8.16b, v8.16b, v19.16b 883 1.1 christos and v9.16b, v9.16b, v19.16b 884 1.1 christos and v16.16b, v16.16b, v19.16b 885 1.1 christos and v17.16b, v17.16b, v19.16b 886 1.1 christos eor v5.16b, v5.16b, v8.16b 887 1.1 christos shl v8.2d, v8.2d, #4 888 1.1 christos eor v2.16b, v2.16b, v9.16b 889 1.1 christos shl v9.2d, v9.2d, #4 890 1.1 christos eor v7.16b, v7.16b, v16.16b 891 1.1 christos shl v16.2d, v16.2d, #4 892 1.1 christos eor v3.16b, v3.16b, v17.16b 893 1.1 christos shl v17.2d, v17.2d, #4 894 1.1 christos eor v6.16b, v6.16b, v8.16b 895 1.1 christos eor v4.16b, v4.16b, v9.16b 896 1.1 christos eor v7.16b, v7.16b, v10.16b 897 1.1 christos eor v1.16b, v1.16b, v16.16b 898 1.1 christos eor v3.16b, v3.16b, v10.16b 899 1.1 christos eor v0.16b, v0.16b, v17.16b 900 1.1 christos eor v6.16b, v6.16b, v10.16b 901 1.1 christos eor v4.16b, v4.16b, v10.16b 902 1.1 christos eor v2.16b, v2.16b, v10.16b 903 1.1 christos eor v5.16b, v5.16b, v10.16b 904 1.1 christos eor v1.16b, v1.16b, v10.16b 905 1.1 christos eor v0.16b, v0.16b, v10.16b 906 1.1 christos ret 907 1.1 christos .size _bsaes_encrypt8,.-_bsaes_encrypt8 908 1.1 christos 909 1.1 christos .type _bsaes_key_convert,%function 910 1.1 christos .align 4 911 1.1 christos // On entry: 912 1.1 christos // x9 -> input key (big-endian) 913 1.1 christos // x10 = number of rounds 914 1.1 christos // x17 -> output key (native endianness) 915 1.1 christos // On exit: 916 1.1 christos // x9, x10 corrupted 917 1.1 christos // x11 -> .LM0_bigendian 918 1.1 christos // x17 -> last quadword of output key 919 1.1 christos // other general-purpose registers preserved 920 1.1 christos // v2-v6 preserved 921 1.1 christos // v7.16b[] = 0x63 922 1.1 christos // v8-v14 preserved 923 1.1 christos // v15 = last round key (converted to native endianness) 924 1.1 christos // other SIMD registers corrupted 925 1.1 christos _bsaes_key_convert: 926 1.1 christos #ifdef __AARCH64EL__ 927 1.1 christos adrp x11, .LM0_littleendian 928 1.1 christos add x11, x11, #:lo12:.LM0_littleendian 929 1.1 christos #else 930 1.1 christos adrp x11, .LM0_bigendian 931 1.1 christos add x11, x11, #:lo12:.LM0_bigendian 932 1.1 christos #endif 933 1.1 christos ldr q0, [x9], #16 // load round 0 key 934 1.1 christos ldr q1, [x11] // .LM0 935 1.1 christos ldr q15, [x9], #16 // load round 1 key 936 1.1 christos 937 1.1 christos movi v7.16b, #0x63 // compose .L63 938 1.1 christos movi v16.16b, #0x01 // bit masks 939 1.1 christos movi v17.16b, #0x02 940 1.1 christos movi v18.16b, #0x04 941 1.1 christos movi v19.16b, #0x08 942 1.1 christos movi v20.16b, #0x10 943 1.1 christos movi v21.16b, #0x20 944 1.1 christos movi v22.16b, #0x40 945 1.1 christos movi v23.16b, #0x80 946 1.1 christos 947 1.1 christos #ifdef __AARCH64EL__ 948 1.1 christos rev32 v0.16b, v0.16b 949 1.1 christos #endif 950 1.1 christos sub x10, x10, #1 951 1.1 christos str q0, [x17], #16 // save round 0 key 952 1.1 christos 953 1.1 christos .align 4 954 1.1 christos .Lkey_loop: 955 1.1 christos tbl v0.16b, {v15.16b}, v1.16b 956 1.1 christos ldr q15, [x9], #16 // load next round key 957 1.1 christos 958 1.1 christos eor v0.16b, v0.16b, v7.16b 959 1.1 christos cmtst v24.16b, v0.16b, v16.16b 960 1.1 christos cmtst v25.16b, v0.16b, v17.16b 961 1.1 christos cmtst v26.16b, v0.16b, v18.16b 962 1.1 christos cmtst v27.16b, v0.16b, v19.16b 963 1.1 christos cmtst v28.16b, v0.16b, v20.16b 964 1.1 christos cmtst v29.16b, v0.16b, v21.16b 965 1.1 christos cmtst v30.16b, v0.16b, v22.16b 966 1.1 christos cmtst v31.16b, v0.16b, v23.16b 967 1.1 christos sub x10, x10, #1 968 1.1 christos st1 {v24.16b,v25.16b,v26.16b,v27.16b}, [x17], #64 // write bit-sliced round key 969 1.1 christos st1 {v28.16b,v29.16b,v30.16b,v31.16b}, [x17], #64 970 1.1 christos cbnz x10, .Lkey_loop 971 1.1 christos 972 1.1 christos // don't save last round key 973 1.1 christos #ifdef __AARCH64EL__ 974 1.1 christos rev32 v15.16b, v15.16b 975 1.1 christos adrp x11, .LM0_bigendian 976 1.1 christos add x11, x11, #:lo12:.LM0_bigendian 977 1.1 christos #endif 978 1.1 christos ret 979 1.1 christos .size _bsaes_key_convert,.-_bsaes_key_convert 980 1.1 christos 981 1.1 christos .globl ossl_bsaes_cbc_encrypt 982 1.1 christos .type ossl_bsaes_cbc_encrypt,%function 983 1.1 christos .align 4 984 1.1 christos // On entry: 985 1.1 christos // x0 -> input ciphertext 986 1.1 christos // x1 -> output plaintext 987 1.1 christos // x2 = size of ciphertext and plaintext in bytes (assumed a multiple of 16) 988 1.1 christos // x3 -> key 989 1.1 christos // x4 -> 128-bit initialisation vector (or preceding 128-bit block of ciphertext if continuing after an earlier call) 990 1.1 christos // w5 must be == 0 991 1.1 christos // On exit: 992 1.1 christos // Output plaintext filled in 993 1.1 christos // Initialisation vector overwritten with last quadword of ciphertext 994 1.1 christos // No output registers, usual AAPCS64 register preservation 995 1.1 christos ossl_bsaes_cbc_encrypt: 996 1.1 christos AARCH64_VALID_CALL_TARGET 997 1.1 christos cmp x2, #128 998 1.1 christos bhs .Lcbc_do_bsaes 999 1.1 christos b AES_cbc_encrypt 1000 1.1 christos .Lcbc_do_bsaes: 1001 1.1 christos 1002 1.1 christos // it is up to the caller to make sure we are called with enc == 0 1003 1.1 christos 1004 1.1 christos stp x29, x30, [sp, #-48]! 1005 1.1 christos stp d8, d9, [sp, #16] 1006 1.1 christos stp d10, d15, [sp, #32] 1007 1.1 christos lsr x2, x2, #4 // len in 16 byte blocks 1008 1.1 christos 1009 1.1 christos ldr w15, [x3, #240] // get # of rounds 1010 1.1 christos mov x14, sp 1011 1.1 christos 1012 1.1 christos // allocate the key schedule on the stack 1013 1.1 christos add x17, sp, #96 1014 1.1 christos sub x17, x17, x15, lsl #7 // 128 bytes per inner round key, less 96 bytes 1015 1.1 christos 1016 1.1 christos // populate the key schedule 1017 1.1 christos mov x9, x3 // pass key 1018 1.1 christos mov x10, x15 // pass # of rounds 1019 1.1 christos mov sp, x17 // sp is sp 1020 1.1 christos bl _bsaes_key_convert 1021 1.1 christos ldr q6, [sp] 1022 1.1 christos str q15, [x17] // save last round key 1023 1.1 christos eor v6.16b, v6.16b, v7.16b // fix up round 0 key (by XORing with 0x63) 1024 1.1 christos str q6, [sp] 1025 1.1 christos 1026 1.1 christos ldr q15, [x4] // load IV 1027 1.1 christos b .Lcbc_dec_loop 1028 1.1 christos 1029 1.1 christos .align 4 1030 1.1 christos .Lcbc_dec_loop: 1031 1.1 christos subs x2, x2, #0x8 1032 1.1 christos bmi .Lcbc_dec_loop_finish 1033 1.1 christos 1034 1.1 christos ldr q0, [x0], #16 // load input 1035 1.1 christos mov x9, sp // pass the key 1036 1.1 christos ldr q1, [x0], #16 1037 1.1 christos mov x10, x15 1038 1.1 christos ldr q2, [x0], #16 1039 1.1 christos ldr q3, [x0], #16 1040 1.1 christos ldr q4, [x0], #16 1041 1.1 christos ldr q5, [x0], #16 1042 1.1 christos ldr q6, [x0], #16 1043 1.1 christos ldr q7, [x0], #-7*16 1044 1.1 christos 1045 1.1 christos bl _bsaes_decrypt8 1046 1.1 christos 1047 1.1 christos ldr q16, [x0], #16 // reload input 1048 1.1 christos eor v0.16b, v0.16b, v15.16b // ^= IV 1049 1.1 christos eor v1.16b, v1.16b, v16.16b 1050 1.1 christos str q0, [x1], #16 // write output 1051 1.1 christos ldr q0, [x0], #16 1052 1.1 christos str q1, [x1], #16 1053 1.1 christos ldr q1, [x0], #16 1054 1.1 christos eor v1.16b, v4.16b, v1.16b 1055 1.1 christos ldr q4, [x0], #16 1056 1.1 christos eor v2.16b, v2.16b, v4.16b 1057 1.1 christos eor v0.16b, v6.16b, v0.16b 1058 1.1 christos ldr q4, [x0], #16 1059 1.1 christos str q0, [x1], #16 1060 1.1 christos str q1, [x1], #16 1061 1.1 christos eor v0.16b, v7.16b, v4.16b 1062 1.1 christos ldr q1, [x0], #16 1063 1.1 christos str q2, [x1], #16 1064 1.1 christos ldr q2, [x0], #16 1065 1.1 christos ldr q15, [x0], #16 1066 1.1 christos str q0, [x1], #16 1067 1.1 christos eor v0.16b, v5.16b, v2.16b 1068 1.1 christos eor v1.16b, v3.16b, v1.16b 1069 1.1 christos str q1, [x1], #16 1070 1.1 christos str q0, [x1], #16 1071 1.1 christos 1072 1.1 christos b .Lcbc_dec_loop 1073 1.1 christos 1074 1.1 christos .Lcbc_dec_loop_finish: 1075 1.1 christos adds x2, x2, #8 1076 1.1 christos beq .Lcbc_dec_done 1077 1.1 christos 1078 1.1 christos ldr q0, [x0], #16 // load input 1079 1.1 christos cmp x2, #2 1080 1.1 christos blo .Lcbc_dec_one 1081 1.1 christos ldr q1, [x0], #16 1082 1.1 christos mov x9, sp // pass the key 1083 1.1 christos mov x10, x15 1084 1.1 christos beq .Lcbc_dec_two 1085 1.1 christos ldr q2, [x0], #16 1086 1.1 christos cmp x2, #4 1087 1.1 christos blo .Lcbc_dec_three 1088 1.1 christos ldr q3, [x0], #16 1089 1.1 christos beq .Lcbc_dec_four 1090 1.1 christos ldr q4, [x0], #16 1091 1.1 christos cmp x2, #6 1092 1.1 christos blo .Lcbc_dec_five 1093 1.1 christos ldr q5, [x0], #16 1094 1.1 christos beq .Lcbc_dec_six 1095 1.1 christos ldr q6, [x0], #-6*16 1096 1.1 christos 1097 1.1 christos bl _bsaes_decrypt8 1098 1.1 christos 1099 1.1 christos ldr q5, [x0], #16 // reload input 1100 1.1 christos eor v0.16b, v0.16b, v15.16b // ^= IV 1101 1.1 christos ldr q8, [x0], #16 1102 1.1 christos ldr q9, [x0], #16 1103 1.1 christos ldr q10, [x0], #16 1104 1.1 christos str q0, [x1], #16 // write output 1105 1.1 christos ldr q0, [x0], #16 1106 1.1 christos eor v1.16b, v1.16b, v5.16b 1107 1.1 christos ldr q5, [x0], #16 1108 1.1 christos eor v6.16b, v6.16b, v8.16b 1109 1.1 christos ldr q15, [x0] 1110 1.1 christos eor v4.16b, v4.16b, v9.16b 1111 1.1 christos eor v2.16b, v2.16b, v10.16b 1112 1.1 christos str q1, [x1], #16 1113 1.1 christos eor v0.16b, v7.16b, v0.16b 1114 1.1 christos str q6, [x1], #16 1115 1.1 christos eor v1.16b, v3.16b, v5.16b 1116 1.1 christos str q4, [x1], #16 1117 1.1 christos str q2, [x1], #16 1118 1.1 christos str q0, [x1], #16 1119 1.1 christos str q1, [x1] 1120 1.1 christos b .Lcbc_dec_done 1121 1.1 christos .align 4 1122 1.1 christos .Lcbc_dec_six: 1123 1.1 christos sub x0, x0, #0x60 1124 1.1 christos bl _bsaes_decrypt8 1125 1.1 christos ldr q3, [x0], #16 // reload input 1126 1.1 christos eor v0.16b, v0.16b, v15.16b // ^= IV 1127 1.1 christos ldr q5, [x0], #16 1128 1.1 christos ldr q8, [x0], #16 1129 1.1 christos ldr q9, [x0], #16 1130 1.1 christos str q0, [x1], #16 // write output 1131 1.1 christos ldr q0, [x0], #16 1132 1.1 christos eor v1.16b, v1.16b, v3.16b 1133 1.1 christos ldr q15, [x0] 1134 1.1 christos eor v3.16b, v6.16b, v5.16b 1135 1.1 christos eor v4.16b, v4.16b, v8.16b 1136 1.1 christos eor v2.16b, v2.16b, v9.16b 1137 1.1 christos str q1, [x1], #16 1138 1.1 christos eor v0.16b, v7.16b, v0.16b 1139 1.1 christos str q3, [x1], #16 1140 1.1 christos str q4, [x1], #16 1141 1.1 christos str q2, [x1], #16 1142 1.1 christos str q0, [x1] 1143 1.1 christos b .Lcbc_dec_done 1144 1.1 christos .align 4 1145 1.1 christos .Lcbc_dec_five: 1146 1.1 christos sub x0, x0, #0x50 1147 1.1 christos bl _bsaes_decrypt8 1148 1.1 christos ldr q3, [x0], #16 // reload input 1149 1.1 christos eor v0.16b, v0.16b, v15.16b // ^= IV 1150 1.1 christos ldr q5, [x0], #16 1151 1.1 christos ldr q7, [x0], #16 1152 1.1 christos ldr q8, [x0], #16 1153 1.1 christos str q0, [x1], #16 // write output 1154 1.1 christos ldr q15, [x0] 1155 1.1 christos eor v0.16b, v1.16b, v3.16b 1156 1.1 christos eor v1.16b, v6.16b, v5.16b 1157 1.1 christos eor v3.16b, v4.16b, v7.16b 1158 1.1 christos str q0, [x1], #16 1159 1.1 christos eor v0.16b, v2.16b, v8.16b 1160 1.1 christos str q1, [x1], #16 1161 1.1 christos str q3, [x1], #16 1162 1.1 christos str q0, [x1] 1163 1.1 christos b .Lcbc_dec_done 1164 1.1 christos .align 4 1165 1.1 christos .Lcbc_dec_four: 1166 1.1 christos sub x0, x0, #0x40 1167 1.1 christos bl _bsaes_decrypt8 1168 1.1 christos ldr q2, [x0], #16 // reload input 1169 1.1 christos eor v0.16b, v0.16b, v15.16b // ^= IV 1170 1.1 christos ldr q3, [x0], #16 1171 1.1 christos ldr q5, [x0], #16 1172 1.1 christos str q0, [x1], #16 // write output 1173 1.1 christos ldr q15, [x0] 1174 1.1 christos eor v0.16b, v1.16b, v2.16b 1175 1.1 christos eor v1.16b, v6.16b, v3.16b 1176 1.1 christos eor v2.16b, v4.16b, v5.16b 1177 1.1 christos str q0, [x1], #16 1178 1.1 christos str q1, [x1], #16 1179 1.1 christos str q2, [x1] 1180 1.1 christos b .Lcbc_dec_done 1181 1.1 christos .align 4 1182 1.1 christos .Lcbc_dec_three: 1183 1.1 christos sub x0, x0, #0x30 1184 1.1 christos bl _bsaes_decrypt8 1185 1.1 christos ldr q2, [x0], #16 // reload input 1186 1.1 christos eor v0.16b, v0.16b, v15.16b // ^= IV 1187 1.1 christos ldr q3, [x0], #16 1188 1.1 christos ldr q15, [x0] 1189 1.1 christos str q0, [x1], #16 // write output 1190 1.1 christos eor v0.16b, v1.16b, v2.16b 1191 1.1 christos eor v1.16b, v6.16b, v3.16b 1192 1.1 christos str q0, [x1], #16 1193 1.1 christos str q1, [x1] 1194 1.1 christos b .Lcbc_dec_done 1195 1.1 christos .align 4 1196 1.1 christos .Lcbc_dec_two: 1197 1.1 christos sub x0, x0, #0x20 1198 1.1 christos bl _bsaes_decrypt8 1199 1.1 christos ldr q2, [x0], #16 // reload input 1200 1.1 christos eor v0.16b, v0.16b, v15.16b // ^= IV 1201 1.1 christos ldr q15, [x0] 1202 1.1 christos str q0, [x1], #16 // write output 1203 1.1 christos eor v0.16b, v1.16b, v2.16b 1204 1.1 christos str q0, [x1] 1205 1.1 christos b .Lcbc_dec_done 1206 1.1 christos .align 4 1207 1.1 christos .Lcbc_dec_one: 1208 1.1 christos sub x0, x0, #0x10 1209 1.1 christos stp x1, x4, [sp, #-32]! 1210 1.1 christos str x14, [sp, #16] 1211 1.1 christos mov v8.16b, v15.16b 1212 1.1 christos mov v15.16b, v0.16b 1213 1.1 christos mov x2, x3 1214 1.1 christos bl AES_decrypt 1215 1.1 christos ldr x14, [sp, #16] 1216 1.1 christos ldp x1, x4, [sp], #32 1217 1.1 christos ldr q0, [x1] // load result 1218 1.1 christos eor v0.16b, v0.16b, v8.16b // ^= IV 1219 1.1 christos str q0, [x1] // write output 1220 1.1 christos 1221 1.1 christos .align 4 1222 1.1 christos .Lcbc_dec_done: 1223 1.1 christos movi v0.16b, #0 1224 1.1 christos movi v1.16b, #0 1225 1.1 christos .Lcbc_dec_bzero: // wipe key schedule [if any] 1226 1.1 christos stp q0, q1, [sp], #32 1227 1.1 christos cmp sp, x14 1228 1.1 christos bne .Lcbc_dec_bzero 1229 1.1 christos str q15, [x4] // return IV 1230 1.1 christos ldp d8, d9, [sp, #16] 1231 1.1 christos ldp d10, d15, [sp, #32] 1232 1.1 christos ldp x29, x30, [sp], #48 1233 1.1 christos ret 1234 1.1 christos .size ossl_bsaes_cbc_encrypt,.-ossl_bsaes_cbc_encrypt 1235 1.1 christos 1236 1.1 christos .globl ossl_bsaes_ctr32_encrypt_blocks 1237 1.1 christos .type ossl_bsaes_ctr32_encrypt_blocks,%function 1238 1.1 christos .align 4 1239 1.1 christos // On entry: 1240 1.1 christos // x0 -> input text (whole 16-byte blocks) 1241 1.1 christos // x1 -> output text (whole 16-byte blocks) 1242 1.1 christos // x2 = number of 16-byte blocks to encrypt/decrypt (> 0) 1243 1.1 christos // x3 -> key 1244 1.1 christos // x4 -> initial value of 128-bit counter (stored big-endian) which increments, modulo 2^32, for each block 1245 1.1 christos // On exit: 1246 1.1 christos // Output text filled in 1247 1.1 christos // No output registers, usual AAPCS64 register preservation 1248 1.1 christos ossl_bsaes_ctr32_encrypt_blocks: 1249 1.1 christos AARCH64_VALID_CALL_TARGET 1250 1.1 christos cmp x2, #8 // use plain AES for 1251 1.1 christos blo .Lctr_enc_short // small sizes 1252 1.1 christos 1253 1.1 christos stp x29, x30, [sp, #-80]! 1254 1.1 christos stp d8, d9, [sp, #16] 1255 1.1 christos stp d10, d11, [sp, #32] 1256 1.1 christos stp d12, d13, [sp, #48] 1257 1.1 christos stp d14, d15, [sp, #64] 1258 1.1 christos 1259 1.1 christos ldr w15, [x3, #240] // get # of rounds 1260 1.1 christos mov x14, sp 1261 1.1 christos 1262 1.1 christos // allocate the key schedule on the stack 1263 1.1 christos add x17, sp, #96 1264 1.1 christos sub x17, x17, x15, lsl #7 // 128 bytes per inner round key, less 96 bytes 1265 1.1 christos 1266 1.1 christos // populate the key schedule 1267 1.1 christos mov x9, x3 // pass key 1268 1.1 christos mov x10, x15 // pass # of rounds 1269 1.1 christos mov sp, x17 // sp is sp 1270 1.1 christos bl _bsaes_key_convert 1271 1.1 christos eor v7.16b, v7.16b, v15.16b // fix up last round key 1272 1.1 christos str q7, [x17] // save last round key 1273 1.1 christos 1274 1.1 christos ldr q0, [x4] // load counter 1275 1.1 christos add x13, x11, #.LREVM0SR-.LM0_bigendian 1276 1.1 christos ldr q4, [sp] // load round0 key 1277 1.1 christos 1278 1.1 christos movi v8.4s, #1 // compose 1<<96 1279 1.1 christos movi v9.16b, #0 1280 1.1 christos rev32 v15.16b, v0.16b 1281 1.1 christos rev32 v0.16b, v0.16b 1282 1.1 christos ext v11.16b, v9.16b, v8.16b, #4 1283 1.1 christos rev32 v4.16b, v4.16b 1284 1.1 christos add v12.4s, v11.4s, v11.4s // compose 2<<96 1285 1.1 christos str q4, [sp] // save adjusted round0 key 1286 1.1 christos add v13.4s, v11.4s, v12.4s // compose 3<<96 1287 1.1 christos add v14.4s, v12.4s, v12.4s // compose 4<<96 1288 1.1 christos b .Lctr_enc_loop 1289 1.1 christos 1290 1.1 christos .align 4 1291 1.1 christos .Lctr_enc_loop: 1292 1.1 christos // Intermix prologue from _bsaes_encrypt8 to use the opportunity 1293 1.1 christos // to flip byte order in 32-bit counter 1294 1.1 christos 1295 1.1 christos add v1.4s, v15.4s, v11.4s // +1 1296 1.1 christos add x9, sp, #0x10 // pass next round key 1297 1.1 christos add v2.4s, v15.4s, v12.4s // +2 1298 1.1 christos ldr q9, [x13] // .LREVM0SR 1299 1.1 christos ldr q8, [sp] // load round0 key 1300 1.1 christos add v3.4s, v15.4s, v13.4s // +3 1301 1.1 christos mov x10, x15 // pass rounds 1302 1.1 christos sub x11, x13, #.LREVM0SR-.LSR // pass constants 1303 1.1 christos add v6.4s, v2.4s, v14.4s 1304 1.1 christos add v4.4s, v15.4s, v14.4s // +4 1305 1.1 christos add v7.4s, v3.4s, v14.4s 1306 1.1 christos add v15.4s, v4.4s, v14.4s // next counter 1307 1.1 christos add v5.4s, v1.4s, v14.4s 1308 1.1 christos 1309 1.1 christos bl _bsaes_encrypt8_alt 1310 1.1 christos 1311 1.1 christos subs x2, x2, #8 1312 1.1 christos blo .Lctr_enc_loop_done 1313 1.1 christos 1314 1.1 christos ldr q16, [x0], #16 1315 1.1 christos ldr q17, [x0], #16 1316 1.1 christos eor v1.16b, v1.16b, v17.16b 1317 1.1 christos ldr q17, [x0], #16 1318 1.1 christos eor v0.16b, v0.16b, v16.16b 1319 1.1 christos eor v4.16b, v4.16b, v17.16b 1320 1.1 christos str q0, [x1], #16 1321 1.1 christos ldr q16, [x0], #16 1322 1.1 christos str q1, [x1], #16 1323 1.1 christos mov v0.16b, v15.16b 1324 1.1 christos str q4, [x1], #16 1325 1.1 christos ldr q1, [x0], #16 1326 1.1 christos eor v4.16b, v6.16b, v16.16b 1327 1.1 christos eor v1.16b, v3.16b, v1.16b 1328 1.1 christos ldr q3, [x0], #16 1329 1.1 christos eor v3.16b, v7.16b, v3.16b 1330 1.1 christos ldr q6, [x0], #16 1331 1.1 christos eor v2.16b, v2.16b, v6.16b 1332 1.1 christos ldr q6, [x0], #16 1333 1.1 christos eor v5.16b, v5.16b, v6.16b 1334 1.1 christos str q4, [x1], #16 1335 1.1 christos str q1, [x1], #16 1336 1.1 christos str q3, [x1], #16 1337 1.1 christos str q2, [x1], #16 1338 1.1 christos str q5, [x1], #16 1339 1.1 christos 1340 1.1 christos bne .Lctr_enc_loop 1341 1.1 christos b .Lctr_enc_done 1342 1.1 christos 1343 1.1 christos .align 4 1344 1.1 christos .Lctr_enc_loop_done: 1345 1.1 christos add x2, x2, #8 1346 1.1 christos ldr q16, [x0], #16 // load input 1347 1.1 christos eor v0.16b, v0.16b, v16.16b 1348 1.1 christos str q0, [x1], #16 // write output 1349 1.1 christos cmp x2, #2 1350 1.1 christos blo .Lctr_enc_done 1351 1.1 christos ldr q17, [x0], #16 1352 1.1 christos eor v1.16b, v1.16b, v17.16b 1353 1.1 christos str q1, [x1], #16 1354 1.1 christos beq .Lctr_enc_done 1355 1.1 christos ldr q18, [x0], #16 1356 1.1 christos eor v4.16b, v4.16b, v18.16b 1357 1.1 christos str q4, [x1], #16 1358 1.1 christos cmp x2, #4 1359 1.1 christos blo .Lctr_enc_done 1360 1.1 christos ldr q19, [x0], #16 1361 1.1 christos eor v6.16b, v6.16b, v19.16b 1362 1.1 christos str q6, [x1], #16 1363 1.1 christos beq .Lctr_enc_done 1364 1.1 christos ldr q20, [x0], #16 1365 1.1 christos eor v3.16b, v3.16b, v20.16b 1366 1.1 christos str q3, [x1], #16 1367 1.1 christos cmp x2, #6 1368 1.1 christos blo .Lctr_enc_done 1369 1.1 christos ldr q21, [x0], #16 1370 1.1 christos eor v7.16b, v7.16b, v21.16b 1371 1.1 christos str q7, [x1], #16 1372 1.1 christos beq .Lctr_enc_done 1373 1.1 christos ldr q22, [x0] 1374 1.1 christos eor v2.16b, v2.16b, v22.16b 1375 1.1 christos str q2, [x1], #16 1376 1.1 christos 1377 1.1 christos .Lctr_enc_done: 1378 1.1 christos movi v0.16b, #0 1379 1.1 christos movi v1.16b, #0 1380 1.1 christos .Lctr_enc_bzero: // wipe key schedule [if any] 1381 1.1 christos stp q0, q1, [sp], #32 1382 1.1 christos cmp sp, x14 1383 1.1 christos bne .Lctr_enc_bzero 1384 1.1 christos 1385 1.1 christos ldp d8, d9, [sp, #16] 1386 1.1 christos ldp d10, d11, [sp, #32] 1387 1.1 christos ldp d12, d13, [sp, #48] 1388 1.1 christos ldp d14, d15, [sp, #64] 1389 1.1 christos ldp x29, x30, [sp], #80 1390 1.1 christos ret 1391 1.1 christos 1392 1.1 christos .Lctr_enc_short: 1393 1.1 christos stp x29, x30, [sp, #-96]! 1394 1.1 christos stp x19, x20, [sp, #16] 1395 1.1 christos stp x21, x22, [sp, #32] 1396 1.1 christos str x23, [sp, #48] 1397 1.1 christos 1398 1.1 christos mov x19, x0 // copy arguments 1399 1.1 christos mov x20, x1 1400 1.1 christos mov x21, x2 1401 1.1 christos mov x22, x3 1402 1.1 christos ldr w23, [x4, #12] // load counter .LSW 1403 1.1 christos ldr q1, [x4] // load whole counter value 1404 1.1 christos #ifdef __AARCH64EL__ 1405 1.1 christos rev w23, w23 1406 1.1 christos #endif 1407 1.1 christos str q1, [sp, #80] // copy counter value 1408 1.1 christos 1409 1.1 christos .Lctr_enc_short_loop: 1410 1.1 christos add x0, sp, #80 // input counter value 1411 1.1 christos add x1, sp, #64 // output on the stack 1412 1.1 christos mov x2, x22 // key 1413 1.1 christos 1414 1.1 christos bl AES_encrypt 1415 1.1 christos 1416 1.1 christos ldr q0, [x19], #16 // load input 1417 1.1 christos ldr q1, [sp, #64] // load encrypted counter 1418 1.1 christos add x23, x23, #1 1419 1.1 christos #ifdef __AARCH64EL__ 1420 1.1 christos rev w0, w23 1421 1.1 christos str w0, [sp, #80+12] // next counter value 1422 1.1 christos #else 1423 1.1 christos str w23, [sp, #80+12] // next counter value 1424 1.1 christos #endif 1425 1.1 christos eor v0.16b, v0.16b, v1.16b 1426 1.1 christos str q0, [x20], #16 // store output 1427 1.1 christos subs x21, x21, #1 1428 1.1 christos bne .Lctr_enc_short_loop 1429 1.1 christos 1430 1.1 christos movi v0.16b, #0 1431 1.1 christos movi v1.16b, #0 1432 1.1 christos stp q0, q1, [sp, #64] 1433 1.1 christos 1434 1.1 christos ldr x23, [sp, #48] 1435 1.1 christos ldp x21, x22, [sp, #32] 1436 1.1 christos ldp x19, x20, [sp, #16] 1437 1.1 christos ldp x29, x30, [sp], #96 1438 1.1 christos ret 1439 1.1 christos .size ossl_bsaes_ctr32_encrypt_blocks,.-ossl_bsaes_ctr32_encrypt_blocks 1440 1.1 christos 1441 1.1 christos .globl ossl_bsaes_xts_encrypt 1442 1.1 christos .type ossl_bsaes_xts_encrypt,%function 1443 1.1 christos .align 4 1444 1.1 christos // On entry: 1445 1.1 christos // x0 -> input plaintext 1446 1.1 christos // x1 -> output ciphertext 1447 1.1 christos // x2 -> length of text in bytes (must be at least 16) 1448 1.1 christos // x3 -> key1 (used to encrypt the XORed plaintext blocks) 1449 1.1 christos // x4 -> key2 (used to encrypt the initial vector to yield the initial tweak) 1450 1.1 christos // x5 -> 16-byte initial vector (typically, sector number) 1451 1.1 christos // On exit: 1452 1.1 christos // Output ciphertext filled in 1453 1.1 christos // No output registers, usual AAPCS64 register preservation 1454 1.1 christos ossl_bsaes_xts_encrypt: 1455 1.1 christos AARCH64_VALID_CALL_TARGET 1456 1.1 christos // Stack layout: 1457 1.1 christos // sp -> 1458 1.1 christos // nrounds*128-96 bytes: key schedule 1459 1.1 christos // x19 -> 1460 1.1 christos // 16 bytes: frame record 1461 1.1 christos // 4*16 bytes: tweak storage across _bsaes_encrypt8 1462 1.1 christos // 6*8 bytes: storage for 5 callee-saved general-purpose registers 1463 1.1 christos // 8*8 bytes: storage for 8 callee-saved SIMD registers 1464 1.1 christos stp x29, x30, [sp, #-192]! 1465 1.1 christos stp x19, x20, [sp, #80] 1466 1.1 christos stp x21, x22, [sp, #96] 1467 1.1 christos str x23, [sp, #112] 1468 1.1 christos stp d8, d9, [sp, #128] 1469 1.1 christos stp d10, d11, [sp, #144] 1470 1.1 christos stp d12, d13, [sp, #160] 1471 1.1 christos stp d14, d15, [sp, #176] 1472 1.1 christos 1473 1.1 christos mov x19, sp 1474 1.1 christos mov x20, x0 1475 1.1 christos mov x21, x1 1476 1.1 christos mov x22, x2 1477 1.1 christos mov x23, x3 1478 1.1 christos 1479 1.1 christos // generate initial tweak 1480 1.1 christos sub sp, sp, #16 1481 1.1 christos mov x0, x5 // iv[] 1482 1.1 christos mov x1, sp 1483 1.1 christos mov x2, x4 // key2 1484 1.1 christos bl AES_encrypt 1485 1.1 christos ldr q11, [sp], #16 1486 1.1 christos 1487 1.1 christos ldr w1, [x23, #240] // get # of rounds 1488 1.1 christos // allocate the key schedule on the stack 1489 1.1 christos add x17, sp, #96 1490 1.1 christos sub x17, x17, x1, lsl #7 // 128 bytes per inner round key, less 96 bytes 1491 1.1 christos 1492 1.1 christos // populate the key schedule 1493 1.1 christos mov x9, x23 // pass key 1494 1.1 christos mov x10, x1 // pass # of rounds 1495 1.1 christos mov sp, x17 1496 1.1 christos bl _bsaes_key_convert 1497 1.1 christos eor v15.16b, v15.16b, v7.16b // fix up last round key 1498 1.1 christos str q15, [x17] // save last round key 1499 1.1 christos 1500 1.1 christos subs x22, x22, #0x80 1501 1.1 christos blo .Lxts_enc_short 1502 1.1 christos b .Lxts_enc_loop 1503 1.1 christos 1504 1.1 christos .align 4 1505 1.1 christos .Lxts_enc_loop: 1506 1.1 christos ldr q8, .Lxts_magic 1507 1.1 christos mov x10, x1 // pass rounds 1508 1.1 christos add x2, x19, #16 1509 1.1 christos ldr q0, [x20], #16 1510 1.1 christos sshr v1.2d, v11.2d, #63 1511 1.1 christos mov x9, sp // pass key schedule 1512 1.1 christos ldr q6, .Lxts_magic+16 1513 1.1 christos add v2.2d, v11.2d, v11.2d 1514 1.1 christos cmtst v3.2d, v11.2d, v6.2d 1515 1.1 christos and v1.16b, v1.16b, v8.16b 1516 1.1 christos ext v1.16b, v1.16b, v1.16b, #8 1517 1.1 christos and v3.16b, v3.16b, v8.16b 1518 1.1 christos ldr q4, [x20], #16 1519 1.1 christos eor v12.16b, v2.16b, v1.16b 1520 1.1 christos eor v1.16b, v4.16b, v12.16b 1521 1.1 christos eor v0.16b, v0.16b, v11.16b 1522 1.1 christos cmtst v2.2d, v12.2d, v6.2d 1523 1.1 christos add v4.2d, v12.2d, v12.2d 1524 1.1 christos add x0, x19, #16 1525 1.1 christos ext v3.16b, v3.16b, v3.16b, #8 1526 1.1 christos and v2.16b, v2.16b, v8.16b 1527 1.1 christos eor v13.16b, v4.16b, v3.16b 1528 1.1 christos ldr q3, [x20], #16 1529 1.1 christos ext v4.16b, v2.16b, v2.16b, #8 1530 1.1 christos eor v2.16b, v3.16b, v13.16b 1531 1.1 christos ldr q3, [x20], #16 1532 1.1 christos add v5.2d, v13.2d, v13.2d 1533 1.1 christos cmtst v7.2d, v13.2d, v6.2d 1534 1.1 christos and v7.16b, v7.16b, v8.16b 1535 1.1 christos ldr q9, [x20], #16 1536 1.1 christos ext v7.16b, v7.16b, v7.16b, #8 1537 1.1 christos ldr q10, [x20], #16 1538 1.1 christos eor v14.16b, v5.16b, v4.16b 1539 1.1 christos ldr q16, [x20], #16 1540 1.1 christos add v4.2d, v14.2d, v14.2d 1541 1.1 christos eor v3.16b, v3.16b, v14.16b 1542 1.1 christos eor v15.16b, v4.16b, v7.16b 1543 1.1 christos add v5.2d, v15.2d, v15.2d 1544 1.1 christos ldr q7, [x20], #16 1545 1.1 christos cmtst v4.2d, v14.2d, v6.2d 1546 1.1 christos and v17.16b, v4.16b, v8.16b 1547 1.1 christos cmtst v18.2d, v15.2d, v6.2d 1548 1.1 christos eor v4.16b, v9.16b, v15.16b 1549 1.1 christos ext v9.16b, v17.16b, v17.16b, #8 1550 1.1 christos eor v9.16b, v5.16b, v9.16b 1551 1.1 christos add v17.2d, v9.2d, v9.2d 1552 1.1 christos and v18.16b, v18.16b, v8.16b 1553 1.1 christos eor v5.16b, v10.16b, v9.16b 1554 1.1 christos str q9, [x2], #16 1555 1.1 christos ext v10.16b, v18.16b, v18.16b, #8 1556 1.1 christos cmtst v9.2d, v9.2d, v6.2d 1557 1.1 christos and v9.16b, v9.16b, v8.16b 1558 1.1 christos eor v10.16b, v17.16b, v10.16b 1559 1.1 christos cmtst v17.2d, v10.2d, v6.2d 1560 1.1 christos eor v6.16b, v16.16b, v10.16b 1561 1.1 christos str q10, [x2], #16 1562 1.1 christos ext v9.16b, v9.16b, v9.16b, #8 1563 1.1 christos add v10.2d, v10.2d, v10.2d 1564 1.1 christos eor v9.16b, v10.16b, v9.16b 1565 1.1 christos str q9, [x2], #16 1566 1.1 christos eor v7.16b, v7.16b, v9.16b 1567 1.1 christos add v9.2d, v9.2d, v9.2d 1568 1.1 christos and v8.16b, v17.16b, v8.16b 1569 1.1 christos ext v8.16b, v8.16b, v8.16b, #8 1570 1.1 christos eor v8.16b, v9.16b, v8.16b 1571 1.1 christos str q8, [x2] // next round tweak 1572 1.1 christos 1573 1.1 christos bl _bsaes_encrypt8 1574 1.1 christos 1575 1.1 christos ldr q8, [x0], #16 1576 1.1 christos eor v0.16b, v0.16b, v11.16b 1577 1.1 christos eor v1.16b, v1.16b, v12.16b 1578 1.1 christos ldr q9, [x0], #16 1579 1.1 christos eor v4.16b, v4.16b, v13.16b 1580 1.1 christos eor v6.16b, v6.16b, v14.16b 1581 1.1 christos ldr q10, [x0], #16 1582 1.1 christos eor v3.16b, v3.16b, v15.16b 1583 1.1 christos subs x22, x22, #0x80 1584 1.1 christos str q0, [x21], #16 1585 1.1 christos ldr q11, [x0] // next round tweak 1586 1.1 christos str q1, [x21], #16 1587 1.1 christos eor v0.16b, v7.16b, v8.16b 1588 1.1 christos eor v1.16b, v2.16b, v9.16b 1589 1.1 christos str q4, [x21], #16 1590 1.1 christos eor v2.16b, v5.16b, v10.16b 1591 1.1 christos str q6, [x21], #16 1592 1.1 christos str q3, [x21], #16 1593 1.1 christos str q0, [x21], #16 1594 1.1 christos str q1, [x21], #16 1595 1.1 christos str q2, [x21], #16 1596 1.1 christos bpl .Lxts_enc_loop 1597 1.1 christos 1598 1.1 christos .Lxts_enc_short: 1599 1.1 christos adds x22, x22, #0x70 1600 1.1 christos bmi .Lxts_enc_done 1601 1.1 christos 1602 1.1 christos ldr q8, .Lxts_magic 1603 1.1 christos sshr v1.2d, v11.2d, #63 1604 1.1 christos add v2.2d, v11.2d, v11.2d 1605 1.1 christos ldr q9, .Lxts_magic+16 1606 1.1 christos subs x22, x22, #0x10 1607 1.1 christos ldr q0, [x20], #16 1608 1.1 christos and v1.16b, v1.16b, v8.16b 1609 1.1 christos cmtst v3.2d, v11.2d, v9.2d 1610 1.1 christos ext v1.16b, v1.16b, v1.16b, #8 1611 1.1 christos and v3.16b, v3.16b, v8.16b 1612 1.1 christos eor v12.16b, v2.16b, v1.16b 1613 1.1 christos ext v1.16b, v3.16b, v3.16b, #8 1614 1.1 christos add v2.2d, v12.2d, v12.2d 1615 1.1 christos cmtst v3.2d, v12.2d, v9.2d 1616 1.1 christos eor v13.16b, v2.16b, v1.16b 1617 1.1 christos and v22.16b, v3.16b, v8.16b 1618 1.1 christos bmi .Lxts_enc_1 1619 1.1 christos 1620 1.1 christos ext v2.16b, v22.16b, v22.16b, #8 1621 1.1 christos add v3.2d, v13.2d, v13.2d 1622 1.1 christos ldr q1, [x20], #16 1623 1.1 christos cmtst v4.2d, v13.2d, v9.2d 1624 1.1 christos subs x22, x22, #0x10 1625 1.1 christos eor v14.16b, v3.16b, v2.16b 1626 1.1 christos and v23.16b, v4.16b, v8.16b 1627 1.1 christos bmi .Lxts_enc_2 1628 1.1 christos 1629 1.1 christos ext v3.16b, v23.16b, v23.16b, #8 1630 1.1 christos add v4.2d, v14.2d, v14.2d 1631 1.1 christos ldr q2, [x20], #16 1632 1.1 christos cmtst v5.2d, v14.2d, v9.2d 1633 1.1 christos eor v0.16b, v0.16b, v11.16b 1634 1.1 christos subs x22, x22, #0x10 1635 1.1 christos eor v15.16b, v4.16b, v3.16b 1636 1.1 christos and v24.16b, v5.16b, v8.16b 1637 1.1 christos bmi .Lxts_enc_3 1638 1.1 christos 1639 1.1 christos ext v4.16b, v24.16b, v24.16b, #8 1640 1.1 christos add v5.2d, v15.2d, v15.2d 1641 1.1 christos ldr q3, [x20], #16 1642 1.1 christos cmtst v6.2d, v15.2d, v9.2d 1643 1.1 christos eor v1.16b, v1.16b, v12.16b 1644 1.1 christos subs x22, x22, #0x10 1645 1.1 christos eor v16.16b, v5.16b, v4.16b 1646 1.1 christos and v25.16b, v6.16b, v8.16b 1647 1.1 christos bmi .Lxts_enc_4 1648 1.1 christos 1649 1.1 christos ext v5.16b, v25.16b, v25.16b, #8 1650 1.1 christos add v6.2d, v16.2d, v16.2d 1651 1.1 christos add x0, x19, #16 1652 1.1 christos cmtst v7.2d, v16.2d, v9.2d 1653 1.1 christos ldr q4, [x20], #16 1654 1.1 christos eor v2.16b, v2.16b, v13.16b 1655 1.1 christos str q16, [x0], #16 1656 1.1 christos subs x22, x22, #0x10 1657 1.1 christos eor v17.16b, v6.16b, v5.16b 1658 1.1 christos and v26.16b, v7.16b, v8.16b 1659 1.1 christos bmi .Lxts_enc_5 1660 1.1 christos 1661 1.1 christos ext v7.16b, v26.16b, v26.16b, #8 1662 1.1 christos add v18.2d, v17.2d, v17.2d 1663 1.1 christos ldr q5, [x20], #16 1664 1.1 christos eor v3.16b, v3.16b, v14.16b 1665 1.1 christos str q17, [x0], #16 1666 1.1 christos subs x22, x22, #0x10 1667 1.1 christos eor v18.16b, v18.16b, v7.16b 1668 1.1 christos bmi .Lxts_enc_6 1669 1.1 christos 1670 1.1 christos ldr q6, [x20], #16 1671 1.1 christos eor v4.16b, v4.16b, v15.16b 1672 1.1 christos eor v5.16b, v5.16b, v16.16b 1673 1.1 christos str q18, [x0] // next round tweak 1674 1.1 christos mov x9, sp // pass key schedule 1675 1.1 christos mov x10, x1 1676 1.1 christos add x0, x19, #16 1677 1.1 christos sub x22, x22, #0x10 1678 1.1 christos eor v6.16b, v6.16b, v17.16b 1679 1.1 christos 1680 1.1 christos bl _bsaes_encrypt8 1681 1.1 christos 1682 1.1 christos ldr q16, [x0], #16 1683 1.1 christos eor v0.16b, v0.16b, v11.16b 1684 1.1 christos eor v1.16b, v1.16b, v12.16b 1685 1.1 christos ldr q17, [x0], #16 1686 1.1 christos eor v4.16b, v4.16b, v13.16b 1687 1.1 christos eor v6.16b, v6.16b, v14.16b 1688 1.1 christos eor v3.16b, v3.16b, v15.16b 1689 1.1 christos ldr q11, [x0] // next round tweak 1690 1.1 christos str q0, [x21], #16 1691 1.1 christos str q1, [x21], #16 1692 1.1 christos eor v0.16b, v7.16b, v16.16b 1693 1.1 christos eor v1.16b, v2.16b, v17.16b 1694 1.1 christos str q4, [x21], #16 1695 1.1 christos str q6, [x21], #16 1696 1.1 christos str q3, [x21], #16 1697 1.1 christos str q0, [x21], #16 1698 1.1 christos str q1, [x21], #16 1699 1.1 christos b .Lxts_enc_done 1700 1.1 christos 1701 1.1 christos .align 4 1702 1.1 christos .Lxts_enc_6: 1703 1.1 christos eor v4.16b, v4.16b, v15.16b 1704 1.1 christos eor v5.16b, v5.16b, v16.16b 1705 1.1 christos mov x9, sp // pass key schedule 1706 1.1 christos mov x10, x1 // pass rounds 1707 1.1 christos add x0, x19, #16 1708 1.1 christos 1709 1.1 christos bl _bsaes_encrypt8 1710 1.1 christos 1711 1.1 christos ldr q16, [x0], #16 1712 1.1 christos eor v0.16b, v0.16b, v11.16b 1713 1.1 christos eor v1.16b, v1.16b, v12.16b 1714 1.1 christos eor v4.16b, v4.16b, v13.16b 1715 1.1 christos eor v6.16b, v6.16b, v14.16b 1716 1.1 christos ldr q11, [x0] // next round tweak 1717 1.1 christos eor v3.16b, v3.16b, v15.16b 1718 1.1 christos str q0, [x21], #16 1719 1.1 christos str q1, [x21], #16 1720 1.1 christos eor v0.16b, v7.16b, v16.16b 1721 1.1 christos str q4, [x21], #16 1722 1.1 christos str q6, [x21], #16 1723 1.1 christos str q3, [x21], #16 1724 1.1 christos str q0, [x21], #16 1725 1.1 christos b .Lxts_enc_done 1726 1.1 christos 1727 1.1 christos .align 4 1728 1.1 christos .Lxts_enc_5: 1729 1.1 christos eor v3.16b, v3.16b, v14.16b 1730 1.1 christos eor v4.16b, v4.16b, v15.16b 1731 1.1 christos mov x9, sp // pass key schedule 1732 1.1 christos mov x10, x1 // pass rounds 1733 1.1 christos add x0, x19, #16 1734 1.1 christos 1735 1.1 christos bl _bsaes_encrypt8 1736 1.1 christos 1737 1.1 christos eor v0.16b, v0.16b, v11.16b 1738 1.1 christos eor v1.16b, v1.16b, v12.16b 1739 1.1 christos ldr q11, [x0] // next round tweak 1740 1.1 christos eor v4.16b, v4.16b, v13.16b 1741 1.1 christos eor v6.16b, v6.16b, v14.16b 1742 1.1 christos eor v3.16b, v3.16b, v15.16b 1743 1.1 christos str q0, [x21], #16 1744 1.1 christos str q1, [x21], #16 1745 1.1 christos str q4, [x21], #16 1746 1.1 christos str q6, [x21], #16 1747 1.1 christos str q3, [x21], #16 1748 1.1 christos b .Lxts_enc_done 1749 1.1 christos 1750 1.1 christos .align 4 1751 1.1 christos .Lxts_enc_4: 1752 1.1 christos eor v2.16b, v2.16b, v13.16b 1753 1.1 christos eor v3.16b, v3.16b, v14.16b 1754 1.1 christos mov x9, sp // pass key schedule 1755 1.1 christos mov x10, x1 // pass rounds 1756 1.1 christos add x0, x19, #16 1757 1.1 christos 1758 1.1 christos bl _bsaes_encrypt8 1759 1.1 christos 1760 1.1 christos eor v0.16b, v0.16b, v11.16b 1761 1.1 christos eor v1.16b, v1.16b, v12.16b 1762 1.1 christos eor v4.16b, v4.16b, v13.16b 1763 1.1 christos eor v6.16b, v6.16b, v14.16b 1764 1.1 christos mov v11.16b, v15.16b // next round tweak 1765 1.1 christos str q0, [x21], #16 1766 1.1 christos str q1, [x21], #16 1767 1.1 christos str q4, [x21], #16 1768 1.1 christos str q6, [x21], #16 1769 1.1 christos b .Lxts_enc_done 1770 1.1 christos 1771 1.1 christos .align 4 1772 1.1 christos .Lxts_enc_3: 1773 1.1 christos eor v1.16b, v1.16b, v12.16b 1774 1.1 christos eor v2.16b, v2.16b, v13.16b 1775 1.1 christos mov x9, sp // pass key schedule 1776 1.1 christos mov x10, x1 // pass rounds 1777 1.1 christos add x0, x19, #16 1778 1.1 christos 1779 1.1 christos bl _bsaes_encrypt8 1780 1.1 christos 1781 1.1 christos eor v0.16b, v0.16b, v11.16b 1782 1.1 christos eor v1.16b, v1.16b, v12.16b 1783 1.1 christos eor v4.16b, v4.16b, v13.16b 1784 1.1 christos mov v11.16b, v14.16b // next round tweak 1785 1.1 christos str q0, [x21], #16 1786 1.1 christos str q1, [x21], #16 1787 1.1 christos str q4, [x21], #16 1788 1.1 christos b .Lxts_enc_done 1789 1.1 christos 1790 1.1 christos .align 4 1791 1.1 christos .Lxts_enc_2: 1792 1.1 christos eor v0.16b, v0.16b, v11.16b 1793 1.1 christos eor v1.16b, v1.16b, v12.16b 1794 1.1 christos mov x9, sp // pass key schedule 1795 1.1 christos mov x10, x1 // pass rounds 1796 1.1 christos add x0, x19, #16 1797 1.1 christos 1798 1.1 christos bl _bsaes_encrypt8 1799 1.1 christos 1800 1.1 christos eor v0.16b, v0.16b, v11.16b 1801 1.1 christos eor v1.16b, v1.16b, v12.16b 1802 1.1 christos mov v11.16b, v13.16b // next round tweak 1803 1.1 christos str q0, [x21], #16 1804 1.1 christos str q1, [x21], #16 1805 1.1 christos b .Lxts_enc_done 1806 1.1 christos 1807 1.1 christos .align 4 1808 1.1 christos .Lxts_enc_1: 1809 1.1 christos eor v0.16b, v0.16b, v11.16b 1810 1.1 christos sub x0, sp, #16 1811 1.1 christos sub x1, sp, #16 1812 1.1 christos mov x2, x23 1813 1.1 christos mov v13.d[0], v11.d[1] // just in case AES_encrypt corrupts top half of callee-saved SIMD registers 1814 1.1 christos mov v14.d[0], v12.d[1] 1815 1.1 christos str q0, [sp, #-16]! 1816 1.1 christos 1817 1.1 christos bl AES_encrypt 1818 1.1 christos 1819 1.1 christos ldr q0, [sp], #16 1820 1.1 christos trn1 v13.2d, v11.2d, v13.2d 1821 1.1 christos trn1 v11.2d, v12.2d, v14.2d // next round tweak 1822 1.1 christos eor v0.16b, v0.16b, v13.16b 1823 1.1 christos str q0, [x21], #16 1824 1.1 christos 1825 1.1 christos .Lxts_enc_done: 1826 1.1 christos adds x22, x22, #0x10 1827 1.1 christos beq .Lxts_enc_ret 1828 1.1 christos 1829 1.1 christos sub x6, x21, #0x10 1830 1.1 christos // Penultimate plaintext block produces final ciphertext part-block 1831 1.1 christos // plus remaining part of final plaintext block. Move ciphertext part 1832 1.1 christos // to final position and reuse penultimate ciphertext block buffer to 1833 1.1 christos // construct final plaintext block 1834 1.1 christos .Lxts_enc_steal: 1835 1.1 christos ldrb w0, [x20], #1 1836 1.1 christos ldrb w1, [x21, #-0x10] 1837 1.1 christos strb w0, [x21, #-0x10] 1838 1.1 christos strb w1, [x21], #1 1839 1.1 christos 1840 1.1 christos subs x22, x22, #1 1841 1.1 christos bhi .Lxts_enc_steal 1842 1.1 christos 1843 1.1 christos // Finally encrypt the penultimate ciphertext block using the 1844 1.1 christos // last tweak 1845 1.1 christos ldr q0, [x6] 1846 1.1 christos eor v0.16b, v0.16b, v11.16b 1847 1.1 christos str q0, [sp, #-16]! 1848 1.1 christos mov x0, sp 1849 1.1 christos mov x1, sp 1850 1.1 christos mov x2, x23 1851 1.1 christos mov x21, x6 1852 1.1 christos mov v13.d[0], v11.d[1] // just in case AES_encrypt corrupts top half of callee-saved SIMD registers 1853 1.1 christos 1854 1.1 christos bl AES_encrypt 1855 1.1 christos 1856 1.1 christos trn1 v11.2d, v11.2d, v13.2d 1857 1.1 christos ldr q0, [sp], #16 1858 1.1 christos eor v0.16b, v0.16b, v11.16b 1859 1.1 christos str q0, [x21] 1860 1.1 christos 1861 1.1 christos .Lxts_enc_ret: 1862 1.1 christos 1863 1.1 christos movi v0.16b, #0 1864 1.1 christos movi v1.16b, #0 1865 1.1 christos .Lxts_enc_bzero: // wipe key schedule 1866 1.1 christos stp q0, q1, [sp], #32 1867 1.1 christos cmp sp, x19 1868 1.1 christos bne .Lxts_enc_bzero 1869 1.1 christos 1870 1.1 christos ldp x19, x20, [sp, #80] 1871 1.1 christos ldp x21, x22, [sp, #96] 1872 1.1 christos ldr x23, [sp, #112] 1873 1.1 christos ldp d8, d9, [sp, #128] 1874 1.1 christos ldp d10, d11, [sp, #144] 1875 1.1 christos ldp d12, d13, [sp, #160] 1876 1.1 christos ldp d14, d15, [sp, #176] 1877 1.1 christos ldp x29, x30, [sp], #192 1878 1.1 christos ret 1879 1.1 christos .size ossl_bsaes_xts_encrypt,.-ossl_bsaes_xts_encrypt 1880 1.1 christos 1881 1.1 christos // The assembler doesn't seem capable of de-duplicating these when expressed 1882 1.1 christos // using `ldr qd,=` syntax, so assign a symbolic address 1883 1.1 christos .align 5 1884 1.1 christos .Lxts_magic: 1885 1.1 christos .quad 1, 0x87, 0x4000000000000000, 0x4000000000000000 1886 1.1 christos 1887 1.1 christos .globl ossl_bsaes_xts_decrypt 1888 1.1 christos .type ossl_bsaes_xts_decrypt,%function 1889 1.1 christos .align 4 1890 1.1 christos // On entry: 1891 1.1 christos // x0 -> input ciphertext 1892 1.1 christos // x1 -> output plaintext 1893 1.1 christos // x2 -> length of text in bytes (must be at least 16) 1894 1.1 christos // x3 -> key1 (used to decrypt the XORed ciphertext blocks) 1895 1.1 christos // x4 -> key2 (used to encrypt the initial vector to yield the initial tweak) 1896 1.1 christos // x5 -> 16-byte initial vector (typically, sector number) 1897 1.1 christos // On exit: 1898 1.1 christos // Output plaintext filled in 1899 1.1 christos // No output registers, usual AAPCS64 register preservation 1900 1.1 christos ossl_bsaes_xts_decrypt: 1901 1.1 christos AARCH64_VALID_CALL_TARGET 1902 1.1 christos // Stack layout: 1903 1.1 christos // sp -> 1904 1.1 christos // nrounds*128-96 bytes: key schedule 1905 1.1 christos // x19 -> 1906 1.1 christos // 16 bytes: frame record 1907 1.1 christos // 4*16 bytes: tweak storage across _bsaes_decrypt8 1908 1.1 christos // 6*8 bytes: storage for 5 callee-saved general-purpose registers 1909 1.1 christos // 8*8 bytes: storage for 8 callee-saved SIMD registers 1910 1.1 christos stp x29, x30, [sp, #-192]! 1911 1.1 christos stp x19, x20, [sp, #80] 1912 1.1 christos stp x21, x22, [sp, #96] 1913 1.1 christos str x23, [sp, #112] 1914 1.1 christos stp d8, d9, [sp, #128] 1915 1.1 christos stp d10, d11, [sp, #144] 1916 1.1 christos stp d12, d13, [sp, #160] 1917 1.1 christos stp d14, d15, [sp, #176] 1918 1.1 christos 1919 1.1 christos mov x19, sp 1920 1.1 christos mov x20, x0 1921 1.1 christos mov x21, x1 1922 1.1 christos mov x22, x2 1923 1.1 christos mov x23, x3 1924 1.1 christos 1925 1.1 christos // generate initial tweak 1926 1.1 christos sub sp, sp, #16 1927 1.1 christos mov x0, x5 // iv[] 1928 1.1 christos mov x1, sp 1929 1.1 christos mov x2, x4 // key2 1930 1.1 christos bl AES_encrypt 1931 1.1 christos ldr q11, [sp], #16 1932 1.1 christos 1933 1.1 christos ldr w1, [x23, #240] // get # of rounds 1934 1.1 christos // allocate the key schedule on the stack 1935 1.1 christos add x17, sp, #96 1936 1.1 christos sub x17, x17, x1, lsl #7 // 128 bytes per inner round key, less 96 bytes 1937 1.1 christos 1938 1.1 christos // populate the key schedule 1939 1.1 christos mov x9, x23 // pass key 1940 1.1 christos mov x10, x1 // pass # of rounds 1941 1.1 christos mov sp, x17 1942 1.1 christos bl _bsaes_key_convert 1943 1.1 christos ldr q6, [sp] 1944 1.1 christos str q15, [x17] // save last round key 1945 1.1 christos eor v6.16b, v6.16b, v7.16b // fix up round 0 key (by XORing with 0x63) 1946 1.1 christos str q6, [sp] 1947 1.1 christos 1948 1.1 christos sub x30, x22, #0x10 1949 1.1 christos tst x22, #0xf // if not multiple of 16 1950 1.1 christos csel x22, x30, x22, ne // subtract another 16 bytes 1951 1.1 christos subs x22, x22, #0x80 1952 1.1 christos 1953 1.1 christos blo .Lxts_dec_short 1954 1.1 christos b .Lxts_dec_loop 1955 1.1 christos 1956 1.1 christos .align 4 1957 1.1 christos .Lxts_dec_loop: 1958 1.1 christos ldr q8, .Lxts_magic 1959 1.1 christos mov x10, x1 // pass rounds 1960 1.1 christos add x2, x19, #16 1961 1.1 christos ldr q0, [x20], #16 1962 1.1 christos sshr v1.2d, v11.2d, #63 1963 1.1 christos mov x9, sp // pass key schedule 1964 1.1 christos ldr q6, .Lxts_magic+16 1965 1.1 christos add v2.2d, v11.2d, v11.2d 1966 1.1 christos cmtst v3.2d, v11.2d, v6.2d 1967 1.1 christos and v1.16b, v1.16b, v8.16b 1968 1.1 christos ext v1.16b, v1.16b, v1.16b, #8 1969 1.1 christos and v3.16b, v3.16b, v8.16b 1970 1.1 christos ldr q4, [x20], #16 1971 1.1 christos eor v12.16b, v2.16b, v1.16b 1972 1.1 christos eor v1.16b, v4.16b, v12.16b 1973 1.1 christos eor v0.16b, v0.16b, v11.16b 1974 1.1 christos cmtst v2.2d, v12.2d, v6.2d 1975 1.1 christos add v4.2d, v12.2d, v12.2d 1976 1.1 christos add x0, x19, #16 1977 1.1 christos ext v3.16b, v3.16b, v3.16b, #8 1978 1.1 christos and v2.16b, v2.16b, v8.16b 1979 1.1 christos eor v13.16b, v4.16b, v3.16b 1980 1.1 christos ldr q3, [x20], #16 1981 1.1 christos ext v4.16b, v2.16b, v2.16b, #8 1982 1.1 christos eor v2.16b, v3.16b, v13.16b 1983 1.1 christos ldr q3, [x20], #16 1984 1.1 christos add v5.2d, v13.2d, v13.2d 1985 1.1 christos cmtst v7.2d, v13.2d, v6.2d 1986 1.1 christos and v7.16b, v7.16b, v8.16b 1987 1.1 christos ldr q9, [x20], #16 1988 1.1 christos ext v7.16b, v7.16b, v7.16b, #8 1989 1.1 christos ldr q10, [x20], #16 1990 1.1 christos eor v14.16b, v5.16b, v4.16b 1991 1.1 christos ldr q16, [x20], #16 1992 1.1 christos add v4.2d, v14.2d, v14.2d 1993 1.1 christos eor v3.16b, v3.16b, v14.16b 1994 1.1 christos eor v15.16b, v4.16b, v7.16b 1995 1.1 christos add v5.2d, v15.2d, v15.2d 1996 1.1 christos ldr q7, [x20], #16 1997 1.1 christos cmtst v4.2d, v14.2d, v6.2d 1998 1.1 christos and v17.16b, v4.16b, v8.16b 1999 1.1 christos cmtst v18.2d, v15.2d, v6.2d 2000 1.1 christos eor v4.16b, v9.16b, v15.16b 2001 1.1 christos ext v9.16b, v17.16b, v17.16b, #8 2002 1.1 christos eor v9.16b, v5.16b, v9.16b 2003 1.1 christos add v17.2d, v9.2d, v9.2d 2004 1.1 christos and v18.16b, v18.16b, v8.16b 2005 1.1 christos eor v5.16b, v10.16b, v9.16b 2006 1.1 christos str q9, [x2], #16 2007 1.1 christos ext v10.16b, v18.16b, v18.16b, #8 2008 1.1 christos cmtst v9.2d, v9.2d, v6.2d 2009 1.1 christos and v9.16b, v9.16b, v8.16b 2010 1.1 christos eor v10.16b, v17.16b, v10.16b 2011 1.1 christos cmtst v17.2d, v10.2d, v6.2d 2012 1.1 christos eor v6.16b, v16.16b, v10.16b 2013 1.1 christos str q10, [x2], #16 2014 1.1 christos ext v9.16b, v9.16b, v9.16b, #8 2015 1.1 christos add v10.2d, v10.2d, v10.2d 2016 1.1 christos eor v9.16b, v10.16b, v9.16b 2017 1.1 christos str q9, [x2], #16 2018 1.1 christos eor v7.16b, v7.16b, v9.16b 2019 1.1 christos add v9.2d, v9.2d, v9.2d 2020 1.1 christos and v8.16b, v17.16b, v8.16b 2021 1.1 christos ext v8.16b, v8.16b, v8.16b, #8 2022 1.1 christos eor v8.16b, v9.16b, v8.16b 2023 1.1 christos str q8, [x2] // next round tweak 2024 1.1 christos 2025 1.1 christos bl _bsaes_decrypt8 2026 1.1 christos 2027 1.1 christos eor v6.16b, v6.16b, v13.16b 2028 1.1 christos eor v0.16b, v0.16b, v11.16b 2029 1.1 christos ldr q8, [x0], #16 2030 1.1 christos eor v7.16b, v7.16b, v8.16b 2031 1.1 christos str q0, [x21], #16 2032 1.1 christos eor v0.16b, v1.16b, v12.16b 2033 1.1 christos ldr q1, [x0], #16 2034 1.1 christos eor v1.16b, v3.16b, v1.16b 2035 1.1 christos subs x22, x22, #0x80 2036 1.1 christos eor v2.16b, v2.16b, v15.16b 2037 1.1 christos eor v3.16b, v4.16b, v14.16b 2038 1.1 christos ldr q4, [x0], #16 2039 1.1 christos str q0, [x21], #16 2040 1.1 christos ldr q11, [x0] // next round tweak 2041 1.1 christos eor v0.16b, v5.16b, v4.16b 2042 1.1 christos str q6, [x21], #16 2043 1.1 christos str q3, [x21], #16 2044 1.1 christos str q2, [x21], #16 2045 1.1 christos str q7, [x21], #16 2046 1.1 christos str q1, [x21], #16 2047 1.1 christos str q0, [x21], #16 2048 1.1 christos bpl .Lxts_dec_loop 2049 1.1 christos 2050 1.1 christos .Lxts_dec_short: 2051 1.1 christos adds x22, x22, #0x70 2052 1.1 christos bmi .Lxts_dec_done 2053 1.1 christos 2054 1.1 christos ldr q8, .Lxts_magic 2055 1.1 christos sshr v1.2d, v11.2d, #63 2056 1.1 christos add v2.2d, v11.2d, v11.2d 2057 1.1 christos ldr q9, .Lxts_magic+16 2058 1.1 christos subs x22, x22, #0x10 2059 1.1 christos ldr q0, [x20], #16 2060 1.1 christos and v1.16b, v1.16b, v8.16b 2061 1.1 christos cmtst v3.2d, v11.2d, v9.2d 2062 1.1 christos ext v1.16b, v1.16b, v1.16b, #8 2063 1.1 christos and v3.16b, v3.16b, v8.16b 2064 1.1 christos eor v12.16b, v2.16b, v1.16b 2065 1.1 christos ext v1.16b, v3.16b, v3.16b, #8 2066 1.1 christos add v2.2d, v12.2d, v12.2d 2067 1.1 christos cmtst v3.2d, v12.2d, v9.2d 2068 1.1 christos eor v13.16b, v2.16b, v1.16b 2069 1.1 christos and v22.16b, v3.16b, v8.16b 2070 1.1 christos bmi .Lxts_dec_1 2071 1.1 christos 2072 1.1 christos ext v2.16b, v22.16b, v22.16b, #8 2073 1.1 christos add v3.2d, v13.2d, v13.2d 2074 1.1 christos ldr q1, [x20], #16 2075 1.1 christos cmtst v4.2d, v13.2d, v9.2d 2076 1.1 christos subs x22, x22, #0x10 2077 1.1 christos eor v14.16b, v3.16b, v2.16b 2078 1.1 christos and v23.16b, v4.16b, v8.16b 2079 1.1 christos bmi .Lxts_dec_2 2080 1.1 christos 2081 1.1 christos ext v3.16b, v23.16b, v23.16b, #8 2082 1.1 christos add v4.2d, v14.2d, v14.2d 2083 1.1 christos ldr q2, [x20], #16 2084 1.1 christos cmtst v5.2d, v14.2d, v9.2d 2085 1.1 christos eor v0.16b, v0.16b, v11.16b 2086 1.1 christos subs x22, x22, #0x10 2087 1.1 christos eor v15.16b, v4.16b, v3.16b 2088 1.1 christos and v24.16b, v5.16b, v8.16b 2089 1.1 christos bmi .Lxts_dec_3 2090 1.1 christos 2091 1.1 christos ext v4.16b, v24.16b, v24.16b, #8 2092 1.1 christos add v5.2d, v15.2d, v15.2d 2093 1.1 christos ldr q3, [x20], #16 2094 1.1 christos cmtst v6.2d, v15.2d, v9.2d 2095 1.1 christos eor v1.16b, v1.16b, v12.16b 2096 1.1 christos subs x22, x22, #0x10 2097 1.1 christos eor v16.16b, v5.16b, v4.16b 2098 1.1 christos and v25.16b, v6.16b, v8.16b 2099 1.1 christos bmi .Lxts_dec_4 2100 1.1 christos 2101 1.1 christos ext v5.16b, v25.16b, v25.16b, #8 2102 1.1 christos add v6.2d, v16.2d, v16.2d 2103 1.1 christos add x0, x19, #16 2104 1.1 christos cmtst v7.2d, v16.2d, v9.2d 2105 1.1 christos ldr q4, [x20], #16 2106 1.1 christos eor v2.16b, v2.16b, v13.16b 2107 1.1 christos str q16, [x0], #16 2108 1.1 christos subs x22, x22, #0x10 2109 1.1 christos eor v17.16b, v6.16b, v5.16b 2110 1.1 christos and v26.16b, v7.16b, v8.16b 2111 1.1 christos bmi .Lxts_dec_5 2112 1.1 christos 2113 1.1 christos ext v7.16b, v26.16b, v26.16b, #8 2114 1.1 christos add v18.2d, v17.2d, v17.2d 2115 1.1 christos ldr q5, [x20], #16 2116 1.1 christos eor v3.16b, v3.16b, v14.16b 2117 1.1 christos str q17, [x0], #16 2118 1.1 christos subs x22, x22, #0x10 2119 1.1 christos eor v18.16b, v18.16b, v7.16b 2120 1.1 christos bmi .Lxts_dec_6 2121 1.1 christos 2122 1.1 christos ldr q6, [x20], #16 2123 1.1 christos eor v4.16b, v4.16b, v15.16b 2124 1.1 christos eor v5.16b, v5.16b, v16.16b 2125 1.1 christos str q18, [x0] // next round tweak 2126 1.1 christos mov x9, sp // pass key schedule 2127 1.1 christos mov x10, x1 2128 1.1 christos add x0, x19, #16 2129 1.1 christos sub x22, x22, #0x10 2130 1.1 christos eor v6.16b, v6.16b, v17.16b 2131 1.1 christos 2132 1.1 christos bl _bsaes_decrypt8 2133 1.1 christos 2134 1.1 christos ldr q16, [x0], #16 2135 1.1 christos eor v0.16b, v0.16b, v11.16b 2136 1.1 christos eor v1.16b, v1.16b, v12.16b 2137 1.1 christos ldr q17, [x0], #16 2138 1.1 christos eor v6.16b, v6.16b, v13.16b 2139 1.1 christos eor v4.16b, v4.16b, v14.16b 2140 1.1 christos eor v2.16b, v2.16b, v15.16b 2141 1.1 christos ldr q11, [x0] // next round tweak 2142 1.1 christos str q0, [x21], #16 2143 1.1 christos str q1, [x21], #16 2144 1.1 christos eor v0.16b, v7.16b, v16.16b 2145 1.1 christos eor v1.16b, v3.16b, v17.16b 2146 1.1 christos str q6, [x21], #16 2147 1.1 christos str q4, [x21], #16 2148 1.1 christos str q2, [x21], #16 2149 1.1 christos str q0, [x21], #16 2150 1.1 christos str q1, [x21], #16 2151 1.1 christos b .Lxts_dec_done 2152 1.1 christos 2153 1.1 christos .align 4 2154 1.1 christos .Lxts_dec_6: 2155 1.1 christos eor v4.16b, v4.16b, v15.16b 2156 1.1 christos eor v5.16b, v5.16b, v16.16b 2157 1.1 christos mov x9, sp // pass key schedule 2158 1.1 christos mov x10, x1 // pass rounds 2159 1.1 christos add x0, x19, #16 2160 1.1 christos 2161 1.1 christos bl _bsaes_decrypt8 2162 1.1 christos 2163 1.1 christos ldr q16, [x0], #16 2164 1.1 christos eor v0.16b, v0.16b, v11.16b 2165 1.1 christos eor v1.16b, v1.16b, v12.16b 2166 1.1 christos eor v6.16b, v6.16b, v13.16b 2167 1.1 christos eor v4.16b, v4.16b, v14.16b 2168 1.1 christos ldr q11, [x0] // next round tweak 2169 1.1 christos eor v2.16b, v2.16b, v15.16b 2170 1.1 christos str q0, [x21], #16 2171 1.1 christos str q1, [x21], #16 2172 1.1 christos eor v0.16b, v7.16b, v16.16b 2173 1.1 christos str q6, [x21], #16 2174 1.1 christos str q4, [x21], #16 2175 1.1 christos str q2, [x21], #16 2176 1.1 christos str q0, [x21], #16 2177 1.1 christos b .Lxts_dec_done 2178 1.1 christos 2179 1.1 christos .align 4 2180 1.1 christos .Lxts_dec_5: 2181 1.1 christos eor v3.16b, v3.16b, v14.16b 2182 1.1 christos eor v4.16b, v4.16b, v15.16b 2183 1.1 christos mov x9, sp // pass key schedule 2184 1.1 christos mov x10, x1 // pass rounds 2185 1.1 christos add x0, x19, #16 2186 1.1 christos 2187 1.1 christos bl _bsaes_decrypt8 2188 1.1 christos 2189 1.1 christos eor v0.16b, v0.16b, v11.16b 2190 1.1 christos eor v1.16b, v1.16b, v12.16b 2191 1.1 christos ldr q11, [x0] // next round tweak 2192 1.1 christos eor v6.16b, v6.16b, v13.16b 2193 1.1 christos eor v4.16b, v4.16b, v14.16b 2194 1.1 christos eor v2.16b, v2.16b, v15.16b 2195 1.1 christos str q0, [x21], #16 2196 1.1 christos str q1, [x21], #16 2197 1.1 christos str q6, [x21], #16 2198 1.1 christos str q4, [x21], #16 2199 1.1 christos str q2, [x21], #16 2200 1.1 christos b .Lxts_dec_done 2201 1.1 christos 2202 1.1 christos .align 4 2203 1.1 christos .Lxts_dec_4: 2204 1.1 christos eor v2.16b, v2.16b, v13.16b 2205 1.1 christos eor v3.16b, v3.16b, v14.16b 2206 1.1 christos mov x9, sp // pass key schedule 2207 1.1 christos mov x10, x1 // pass rounds 2208 1.1 christos add x0, x19, #16 2209 1.1 christos 2210 1.1 christos bl _bsaes_decrypt8 2211 1.1 christos 2212 1.1 christos eor v0.16b, v0.16b, v11.16b 2213 1.1 christos eor v1.16b, v1.16b, v12.16b 2214 1.1 christos eor v6.16b, v6.16b, v13.16b 2215 1.1 christos eor v4.16b, v4.16b, v14.16b 2216 1.1 christos mov v11.16b, v15.16b // next round tweak 2217 1.1 christos str q0, [x21], #16 2218 1.1 christos str q1, [x21], #16 2219 1.1 christos str q6, [x21], #16 2220 1.1 christos str q4, [x21], #16 2221 1.1 christos b .Lxts_dec_done 2222 1.1 christos 2223 1.1 christos .align 4 2224 1.1 christos .Lxts_dec_3: 2225 1.1 christos eor v1.16b, v1.16b, v12.16b 2226 1.1 christos eor v2.16b, v2.16b, v13.16b 2227 1.1 christos mov x9, sp // pass key schedule 2228 1.1 christos mov x10, x1 // pass rounds 2229 1.1 christos add x0, x19, #16 2230 1.1 christos 2231 1.1 christos bl _bsaes_decrypt8 2232 1.1 christos 2233 1.1 christos eor v0.16b, v0.16b, v11.16b 2234 1.1 christos eor v1.16b, v1.16b, v12.16b 2235 1.1 christos eor v6.16b, v6.16b, v13.16b 2236 1.1 christos mov v11.16b, v14.16b // next round tweak 2237 1.1 christos str q0, [x21], #16 2238 1.1 christos str q1, [x21], #16 2239 1.1 christos str q6, [x21], #16 2240 1.1 christos b .Lxts_dec_done 2241 1.1 christos 2242 1.1 christos .align 4 2243 1.1 christos .Lxts_dec_2: 2244 1.1 christos eor v0.16b, v0.16b, v11.16b 2245 1.1 christos eor v1.16b, v1.16b, v12.16b 2246 1.1 christos mov x9, sp // pass key schedule 2247 1.1 christos mov x10, x1 // pass rounds 2248 1.1 christos add x0, x19, #16 2249 1.1 christos 2250 1.1 christos bl _bsaes_decrypt8 2251 1.1 christos 2252 1.1 christos eor v0.16b, v0.16b, v11.16b 2253 1.1 christos eor v1.16b, v1.16b, v12.16b 2254 1.1 christos mov v11.16b, v13.16b // next round tweak 2255 1.1 christos str q0, [x21], #16 2256 1.1 christos str q1, [x21], #16 2257 1.1 christos b .Lxts_dec_done 2258 1.1 christos 2259 1.1 christos .align 4 2260 1.1 christos .Lxts_dec_1: 2261 1.1 christos eor v0.16b, v0.16b, v11.16b 2262 1.1 christos sub x0, sp, #16 2263 1.1 christos sub x1, sp, #16 2264 1.1 christos mov x2, x23 2265 1.1 christos mov v13.d[0], v11.d[1] // just in case AES_decrypt corrupts top half of callee-saved SIMD registers 2266 1.1 christos mov v14.d[0], v12.d[1] 2267 1.1 christos str q0, [sp, #-16]! 2268 1.1 christos 2269 1.1 christos bl AES_decrypt 2270 1.1 christos 2271 1.1 christos ldr q0, [sp], #16 2272 1.1 christos trn1 v13.2d, v11.2d, v13.2d 2273 1.1 christos trn1 v11.2d, v12.2d, v14.2d // next round tweak 2274 1.1 christos eor v0.16b, v0.16b, v13.16b 2275 1.1 christos str q0, [x21], #16 2276 1.1 christos 2277 1.1 christos .Lxts_dec_done: 2278 1.1 christos adds x22, x22, #0x10 2279 1.1 christos beq .Lxts_dec_ret 2280 1.1 christos 2281 1.1 christos // calculate one round of extra tweak for the stolen ciphertext 2282 1.1 christos ldr q8, .Lxts_magic 2283 1.1 christos sshr v6.2d, v11.2d, #63 2284 1.1 christos and v6.16b, v6.16b, v8.16b 2285 1.1 christos add v12.2d, v11.2d, v11.2d 2286 1.1 christos ext v6.16b, v6.16b, v6.16b, #8 2287 1.1 christos eor v12.16b, v12.16b, v6.16b 2288 1.1 christos 2289 1.1 christos // perform the final decryption with the last tweak value 2290 1.1 christos ldr q0, [x20], #16 2291 1.1 christos eor v0.16b, v0.16b, v12.16b 2292 1.1 christos str q0, [sp, #-16]! 2293 1.1 christos mov x0, sp 2294 1.1 christos mov x1, sp 2295 1.1 christos mov x2, x23 2296 1.1 christos mov v13.d[0], v11.d[1] // just in case AES_decrypt corrupts top half of callee-saved SIMD registers 2297 1.1 christos mov v14.d[0], v12.d[1] 2298 1.1 christos 2299 1.1 christos bl AES_decrypt 2300 1.1 christos 2301 1.1 christos trn1 v12.2d, v12.2d, v14.2d 2302 1.1 christos trn1 v11.2d, v11.2d, v13.2d 2303 1.1 christos ldr q0, [sp], #16 2304 1.1 christos eor v0.16b, v0.16b, v12.16b 2305 1.1 christos str q0, [x21] 2306 1.1 christos 2307 1.1 christos mov x6, x21 2308 1.1 christos // Penultimate ciphertext block produces final plaintext part-block 2309 1.1 christos // plus remaining part of final ciphertext block. Move plaintext part 2310 1.1 christos // to final position and reuse penultimate plaintext block buffer to 2311 1.1 christos // construct final ciphertext block 2312 1.1 christos .Lxts_dec_steal: 2313 1.1 christos ldrb w1, [x21] 2314 1.1 christos ldrb w0, [x20], #1 2315 1.1 christos strb w1, [x21, #0x10] 2316 1.1 christos strb w0, [x21], #1 2317 1.1 christos 2318 1.1 christos subs x22, x22, #1 2319 1.1 christos bhi .Lxts_dec_steal 2320 1.1 christos 2321 1.1 christos // Finally decrypt the penultimate plaintext block using the 2322 1.1 christos // penultimate tweak 2323 1.1 christos ldr q0, [x6] 2324 1.1 christos eor v0.16b, v0.16b, v11.16b 2325 1.1 christos str q0, [sp, #-16]! 2326 1.1 christos mov x0, sp 2327 1.1 christos mov x1, sp 2328 1.1 christos mov x2, x23 2329 1.1 christos mov x21, x6 2330 1.1 christos 2331 1.1 christos bl AES_decrypt 2332 1.1 christos 2333 1.1 christos trn1 v11.2d, v11.2d, v13.2d 2334 1.1 christos ldr q0, [sp], #16 2335 1.1 christos eor v0.16b, v0.16b, v11.16b 2336 1.1 christos str q0, [x21] 2337 1.1 christos 2338 1.1 christos .Lxts_dec_ret: 2339 1.1 christos 2340 1.1 christos movi v0.16b, #0 2341 1.1 christos movi v1.16b, #0 2342 1.1 christos .Lxts_dec_bzero: // wipe key schedule 2343 1.1 christos stp q0, q1, [sp], #32 2344 1.1 christos cmp sp, x19 2345 1.1 christos bne .Lxts_dec_bzero 2346 1.1 christos 2347 1.1 christos ldp x19, x20, [sp, #80] 2348 1.1 christos ldp x21, x22, [sp, #96] 2349 1.1 christos ldr x23, [sp, #112] 2350 1.1 christos ldp d8, d9, [sp, #128] 2351 1.1 christos ldp d10, d11, [sp, #144] 2352 1.1 christos ldp d12, d13, [sp, #160] 2353 1.1 christos ldp d14, d15, [sp, #176] 2354 1.1 christos ldp x29, x30, [sp], #192 2355 1.1 christos ret 2356 1.1 christos .size ossl_bsaes_xts_decrypt,.-ossl_bsaes_xts_decrypt 2357