Home | History | Annotate | Line # | Download | only in arm
      1  1.1  christos #include "arm_asm.h"
      2  1.1  christos // Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved.
      3  1.1  christos //
      4  1.1  christos // Licensed under the OpenSSL license (the "License").  You may not use
      5  1.1  christos // this file except in compliance with the License.  You can obtain a copy
      6  1.1  christos // in the file LICENSE in the source distribution or at
      7  1.1  christos // https://www.openssl.org/source/license.html
      8  1.1  christos //
      9  1.1  christos // ====================================================================
     10  1.1  christos // Written by Ben Avison <bavison (at) riscosopen.org> for the OpenSSL
     11  1.1  christos // project. Rights for redistribution and usage in source and binary
     12  1.1  christos // forms are granted according to the OpenSSL license.
     13  1.1  christos // ====================================================================
     14  1.1  christos //
     15  1.1  christos // This implementation is a translation of bsaes-armv7 for AArch64.
     16  1.1  christos // No attempt has been made to carry across the build switches for
     17  1.1  christos // kernel targets, since the Linux kernel crypto support has moved on
     18  1.1  christos // from when it was based on OpenSSL.
     19  1.1  christos 
     20  1.1  christos // A lot of hand-scheduling has been performed. Consequently, this code
     21  1.1  christos // doesn't factor out neatly into macros in the same way that the
     22  1.1  christos // AArch32 version did, and there is little to be gained by wrapping it
     23  1.1  christos // up in Perl, and it is presented as pure assembly.
     24  1.1  christos 
     25  1.1  christos 
     26  1.1  christos #include "crypto/arm_arch.h"
     27  1.1  christos 
     28  1.1  christos .text
     29  1.1  christos 
     30  1.1  christos 
     31  1.1  christos 
     32  1.1  christos 
     33  1.1  christos 
     34  1.1  christos .type	_bsaes_decrypt8,%function
     35  1.1  christos .align	4
     36  1.1  christos // On entry:
     37  1.1  christos //   x9 -> key (previously expanded using _bsaes_key_convert)
     38  1.1  christos //   x10 = number of rounds
     39  1.1  christos //   v0-v7 input data
     40  1.1  christos // On exit:
     41  1.1  christos //   x9-x11 corrupted
     42  1.1  christos //   other general-purpose registers preserved
     43  1.1  christos //   v0-v7 output data
     44  1.1  christos //   v11-v15 preserved
     45  1.1  christos //   other SIMD registers corrupted
     46  1.1  christos _bsaes_decrypt8:
     47  1.1  christos 	ldr	q8, [x9], #16
     48  1.1  christos 	adrp	x11, .LM0ISR
     49  1.1  christos 	add	x11, x11, #:lo12:.LM0ISR
     50  1.1  christos 	movi	v9.16b, #0x55
     51  1.1  christos 	ldr	q10, [x11], #16
     52  1.1  christos 	movi	v16.16b, #0x33
     53  1.1  christos 	movi	v17.16b, #0x0f
     54  1.1  christos 	sub	x10, x10, #1
     55  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
     56  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
     57  1.1  christos 	eor	v2.16b, v2.16b, v8.16b
     58  1.1  christos 	eor	v4.16b, v4.16b, v8.16b
     59  1.1  christos 	eor	v3.16b, v3.16b, v8.16b
     60  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
     61  1.1  christos 	tbl	v0.16b, {v0.16b}, v10.16b
     62  1.1  christos 	tbl	v1.16b, {v1.16b}, v10.16b
     63  1.1  christos 	tbl	v2.16b, {v2.16b}, v10.16b
     64  1.1  christos 	tbl	v4.16b, {v4.16b}, v10.16b
     65  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
     66  1.1  christos 	eor	v7.16b, v7.16b, v8.16b
     67  1.1  christos 	tbl	v3.16b, {v3.16b}, v10.16b
     68  1.1  christos 	tbl	v5.16b, {v5.16b}, v10.16b
     69  1.1  christos 	tbl	v6.16b, {v6.16b}, v10.16b
     70  1.1  christos 	ushr	v8.2d, v0.2d, #1
     71  1.1  christos 	tbl	v7.16b, {v7.16b}, v10.16b
     72  1.1  christos 	ushr	v10.2d, v4.2d, #1
     73  1.1  christos 	ushr	v18.2d, v2.2d, #1
     74  1.1  christos 	eor	v8.16b, v8.16b, v1.16b
     75  1.1  christos 	ushr	v19.2d, v6.2d, #1
     76  1.1  christos 	eor	v10.16b, v10.16b, v5.16b
     77  1.1  christos 	eor	v18.16b, v18.16b, v3.16b
     78  1.1  christos 	and	v8.16b, v8.16b, v9.16b
     79  1.1  christos 	eor	v19.16b, v19.16b, v7.16b
     80  1.1  christos 	and	v10.16b, v10.16b, v9.16b
     81  1.1  christos 	and	v18.16b, v18.16b, v9.16b
     82  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
     83  1.1  christos 	shl	v8.2d, v8.2d, #1
     84  1.1  christos 	and	v9.16b, v19.16b, v9.16b
     85  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
     86  1.1  christos 	shl	v10.2d, v10.2d, #1
     87  1.1  christos 	eor	v3.16b, v3.16b, v18.16b
     88  1.1  christos 	shl	v18.2d, v18.2d, #1
     89  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
     90  1.1  christos 	shl	v8.2d, v9.2d, #1
     91  1.1  christos 	eor	v7.16b, v7.16b, v9.16b
     92  1.1  christos 	eor	v4.16b, v4.16b, v10.16b
     93  1.1  christos 	eor	v2.16b, v2.16b, v18.16b
     94  1.1  christos 	ushr	v9.2d, v1.2d, #2
     95  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
     96  1.1  christos 	ushr	v8.2d, v0.2d, #2
     97  1.1  christos 	ushr	v10.2d, v5.2d, #2
     98  1.1  christos 	ushr	v18.2d, v4.2d, #2
     99  1.1  christos 	eor	v9.16b, v9.16b, v3.16b
    100  1.1  christos 	eor	v8.16b, v8.16b, v2.16b
    101  1.1  christos 	eor	v10.16b, v10.16b, v7.16b
    102  1.1  christos 	eor	v18.16b, v18.16b, v6.16b
    103  1.1  christos 	and	v9.16b, v9.16b, v16.16b
    104  1.1  christos 	and	v8.16b, v8.16b, v16.16b
    105  1.1  christos 	and	v10.16b, v10.16b, v16.16b
    106  1.1  christos 	and	v16.16b, v18.16b, v16.16b
    107  1.1  christos 	eor	v3.16b, v3.16b, v9.16b
    108  1.1  christos 	shl	v9.2d, v9.2d, #2
    109  1.1  christos 	eor	v2.16b, v2.16b, v8.16b
    110  1.1  christos 	shl	v8.2d, v8.2d, #2
    111  1.1  christos 	eor	v7.16b, v7.16b, v10.16b
    112  1.1  christos 	shl	v10.2d, v10.2d, #2
    113  1.1  christos 	eor	v6.16b, v6.16b, v16.16b
    114  1.1  christos 	shl	v16.2d, v16.2d, #2
    115  1.1  christos 	eor	v1.16b, v1.16b, v9.16b
    116  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    117  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    118  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    119  1.1  christos 	ushr	v8.2d, v3.2d, #4
    120  1.1  christos 	ushr	v9.2d, v2.2d, #4
    121  1.1  christos 	ushr	v10.2d, v1.2d, #4
    122  1.1  christos 	ushr	v16.2d, v0.2d, #4
    123  1.1  christos 	eor	v8.16b, v8.16b, v7.16b
    124  1.1  christos 	eor	v9.16b, v9.16b, v6.16b
    125  1.1  christos 	eor	v10.16b, v10.16b, v5.16b
    126  1.1  christos 	eor	v16.16b, v16.16b, v4.16b
    127  1.1  christos 	and	v8.16b, v8.16b, v17.16b
    128  1.1  christos 	and	v9.16b, v9.16b, v17.16b
    129  1.1  christos 	and	v10.16b, v10.16b, v17.16b
    130  1.1  christos 	and	v16.16b, v16.16b, v17.16b
    131  1.1  christos 	eor	v7.16b, v7.16b, v8.16b
    132  1.1  christos 	shl	v8.2d, v8.2d, #4
    133  1.1  christos 	eor	v6.16b, v6.16b, v9.16b
    134  1.1  christos 	shl	v9.2d, v9.2d, #4
    135  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    136  1.1  christos 	shl	v10.2d, v10.2d, #4
    137  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    138  1.1  christos 	shl	v16.2d, v16.2d, #4
    139  1.1  christos 	eor	v3.16b, v3.16b, v8.16b
    140  1.1  christos 	eor	v2.16b, v2.16b, v9.16b
    141  1.1  christos 	eor	v1.16b, v1.16b, v10.16b
    142  1.1  christos 	eor	v0.16b, v0.16b, v16.16b
    143  1.1  christos 	b	.Ldec_sbox
    144  1.1  christos .align	4
    145  1.1  christos .Ldec_loop:
    146  1.1  christos 	ld1	{v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64
    147  1.1  christos 	ldp	q8, q9, [x9], #32
    148  1.1  christos 	eor	v0.16b, v16.16b, v0.16b
    149  1.1  christos 	ldr	q10, [x9], #16
    150  1.1  christos 	eor	v1.16b, v17.16b, v1.16b
    151  1.1  christos 	ldr	q16, [x9], #16
    152  1.1  christos 	eor	v2.16b, v18.16b, v2.16b
    153  1.1  christos 	eor	v3.16b, v19.16b, v3.16b
    154  1.1  christos 	eor	v4.16b, v8.16b, v4.16b
    155  1.1  christos 	eor	v5.16b, v9.16b, v5.16b
    156  1.1  christos 	eor	v6.16b, v10.16b, v6.16b
    157  1.1  christos 	eor	v7.16b, v16.16b, v7.16b
    158  1.1  christos 	tbl	v0.16b, {v0.16b}, v28.16b
    159  1.1  christos 	tbl	v1.16b, {v1.16b}, v28.16b
    160  1.1  christos 	tbl	v2.16b, {v2.16b}, v28.16b
    161  1.1  christos 	tbl	v3.16b, {v3.16b}, v28.16b
    162  1.1  christos 	tbl	v4.16b, {v4.16b}, v28.16b
    163  1.1  christos 	tbl	v5.16b, {v5.16b}, v28.16b
    164  1.1  christos 	tbl	v6.16b, {v6.16b}, v28.16b
    165  1.1  christos 	tbl	v7.16b, {v7.16b}, v28.16b
    166  1.1  christos .Ldec_sbox:
    167  1.1  christos 	eor	v1.16b, v1.16b, v4.16b
    168  1.1  christos 	eor	v3.16b, v3.16b, v4.16b
    169  1.1  christos 	subs	x10, x10, #1
    170  1.1  christos 	eor	v4.16b, v4.16b, v7.16b
    171  1.1  christos 	eor	v2.16b, v2.16b, v7.16b
    172  1.1  christos 	eor	v1.16b, v1.16b, v6.16b
    173  1.1  christos 	eor	v6.16b, v6.16b, v4.16b
    174  1.1  christos 	eor	v2.16b, v2.16b, v5.16b
    175  1.1  christos 	eor	v0.16b, v0.16b, v1.16b
    176  1.1  christos 	eor	v7.16b, v7.16b, v6.16b
    177  1.1  christos 	eor	v8.16b, v6.16b, v2.16b
    178  1.1  christos 	and	v9.16b, v4.16b, v6.16b
    179  1.1  christos 	eor	v10.16b, v2.16b, v6.16b
    180  1.1  christos 	eor	v3.16b, v3.16b, v0.16b
    181  1.1  christos 	eor	v5.16b, v5.16b, v0.16b
    182  1.1  christos 	eor	v16.16b, v7.16b, v4.16b
    183  1.1  christos 	eor	v17.16b, v4.16b, v0.16b
    184  1.1  christos 	and	v18.16b, v0.16b, v2.16b
    185  1.1  christos 	eor	v19.16b, v7.16b, v4.16b
    186  1.1  christos 	eor	v1.16b, v1.16b, v3.16b
    187  1.1  christos 	eor	v20.16b, v3.16b, v0.16b
    188  1.1  christos 	eor	v21.16b, v5.16b, v2.16b
    189  1.1  christos 	eor	v22.16b, v3.16b, v7.16b
    190  1.1  christos 	and	v8.16b, v17.16b, v8.16b
    191  1.1  christos 	orr	v17.16b, v3.16b, v5.16b
    192  1.1  christos 	eor	v23.16b, v1.16b, v6.16b
    193  1.1  christos 	eor	v24.16b, v20.16b, v16.16b
    194  1.1  christos 	eor	v25.16b, v1.16b, v5.16b
    195  1.1  christos 	orr	v26.16b, v20.16b, v21.16b
    196  1.1  christos 	and	v20.16b, v20.16b, v21.16b
    197  1.1  christos 	and	v27.16b, v7.16b, v1.16b
    198  1.1  christos 	eor	v21.16b, v21.16b, v23.16b
    199  1.1  christos 	orr	v28.16b, v16.16b, v23.16b
    200  1.1  christos 	orr	v29.16b, v22.16b, v25.16b
    201  1.1  christos 	eor	v26.16b, v26.16b, v8.16b
    202  1.1  christos 	and	v16.16b, v16.16b, v23.16b
    203  1.1  christos 	and	v22.16b, v22.16b, v25.16b
    204  1.1  christos 	and	v21.16b, v24.16b, v21.16b
    205  1.1  christos 	eor	v8.16b, v28.16b, v8.16b
    206  1.1  christos 	eor	v23.16b, v5.16b, v2.16b
    207  1.1  christos 	eor	v24.16b, v1.16b, v6.16b
    208  1.1  christos 	eor	v16.16b, v16.16b, v22.16b
    209  1.1  christos 	eor	v22.16b, v3.16b, v0.16b
    210  1.1  christos 	eor	v25.16b, v29.16b, v21.16b
    211  1.1  christos 	eor	v21.16b, v26.16b, v21.16b
    212  1.1  christos 	eor	v8.16b, v8.16b, v20.16b
    213  1.1  christos 	eor	v26.16b, v23.16b, v24.16b
    214  1.1  christos 	eor	v16.16b, v16.16b, v20.16b
    215  1.1  christos 	eor	v28.16b, v22.16b, v19.16b
    216  1.1  christos 	eor	v20.16b, v25.16b, v20.16b
    217  1.1  christos 	eor	v9.16b, v21.16b, v9.16b
    218  1.1  christos 	eor	v8.16b, v8.16b, v18.16b
    219  1.1  christos 	eor	v18.16b, v5.16b, v1.16b
    220  1.1  christos 	eor	v21.16b, v16.16b, v17.16b
    221  1.1  christos 	eor	v16.16b, v16.16b, v17.16b
    222  1.1  christos 	eor	v17.16b, v20.16b, v27.16b
    223  1.1  christos 	eor	v20.16b, v3.16b, v7.16b
    224  1.1  christos 	eor	v25.16b, v9.16b, v8.16b
    225  1.1  christos 	eor	v27.16b, v0.16b, v4.16b
    226  1.1  christos 	and	v29.16b, v9.16b, v17.16b
    227  1.1  christos 	eor	v30.16b, v8.16b, v29.16b
    228  1.1  christos 	eor	v31.16b, v21.16b, v29.16b
    229  1.1  christos 	eor	v29.16b, v21.16b, v29.16b
    230  1.1  christos 	bsl	v30.16b, v17.16b, v21.16b
    231  1.1  christos 	bsl	v31.16b, v9.16b, v8.16b
    232  1.1  christos 	bsl	v16.16b, v30.16b, v29.16b
    233  1.1  christos 	bsl	v21.16b, v29.16b, v30.16b
    234  1.1  christos 	eor	v8.16b, v31.16b, v30.16b
    235  1.1  christos 	and	v1.16b, v1.16b, v31.16b
    236  1.1  christos 	and	v9.16b, v16.16b, v31.16b
    237  1.1  christos 	and	v6.16b, v6.16b, v30.16b
    238  1.1  christos 	eor	v16.16b, v17.16b, v21.16b
    239  1.1  christos 	and	v4.16b, v4.16b, v30.16b
    240  1.1  christos 	eor	v17.16b, v8.16b, v30.16b
    241  1.1  christos 	and	v21.16b, v24.16b, v8.16b
    242  1.1  christos 	eor	v9.16b, v9.16b, v25.16b
    243  1.1  christos 	and	v19.16b, v19.16b, v8.16b
    244  1.1  christos 	eor	v24.16b, v30.16b, v16.16b
    245  1.1  christos 	eor	v25.16b, v30.16b, v16.16b
    246  1.1  christos 	and	v7.16b, v7.16b, v17.16b
    247  1.1  christos 	and	v10.16b, v10.16b, v16.16b
    248  1.1  christos 	eor	v29.16b, v9.16b, v16.16b
    249  1.1  christos 	eor	v30.16b, v31.16b, v9.16b
    250  1.1  christos 	and	v0.16b, v24.16b, v0.16b
    251  1.1  christos 	and	v9.16b, v18.16b, v9.16b
    252  1.1  christos 	and	v2.16b, v25.16b, v2.16b
    253  1.1  christos 	eor	v10.16b, v10.16b, v6.16b
    254  1.1  christos 	eor	v18.16b, v29.16b, v16.16b
    255  1.1  christos 	and	v5.16b, v30.16b, v5.16b
    256  1.1  christos 	eor	v24.16b, v8.16b, v29.16b
    257  1.1  christos 	and	v25.16b, v26.16b, v29.16b
    258  1.1  christos 	and	v26.16b, v28.16b, v29.16b
    259  1.1  christos 	eor	v8.16b, v8.16b, v29.16b
    260  1.1  christos 	eor	v17.16b, v17.16b, v18.16b
    261  1.1  christos 	eor	v5.16b, v1.16b, v5.16b
    262  1.1  christos 	and	v23.16b, v24.16b, v23.16b
    263  1.1  christos 	eor	v21.16b, v21.16b, v25.16b
    264  1.1  christos 	eor	v19.16b, v19.16b, v26.16b
    265  1.1  christos 	eor	v0.16b, v4.16b, v0.16b
    266  1.1  christos 	and	v3.16b, v17.16b, v3.16b
    267  1.1  christos 	eor	v1.16b, v9.16b, v1.16b
    268  1.1  christos 	eor	v9.16b, v25.16b, v23.16b
    269  1.1  christos 	eor	v5.16b, v5.16b, v21.16b
    270  1.1  christos 	eor	v2.16b, v6.16b, v2.16b
    271  1.1  christos 	and	v6.16b, v8.16b, v22.16b
    272  1.1  christos 	eor	v3.16b, v7.16b, v3.16b
    273  1.1  christos 	and	v8.16b, v20.16b, v18.16b
    274  1.1  christos 	eor	v10.16b, v10.16b, v9.16b
    275  1.1  christos 	eor	v0.16b, v0.16b, v19.16b
    276  1.1  christos 	eor	v9.16b, v1.16b, v9.16b
    277  1.1  christos 	eor	v1.16b, v2.16b, v21.16b
    278  1.1  christos 	eor	v3.16b, v3.16b, v19.16b
    279  1.1  christos 	and	v16.16b, v27.16b, v16.16b
    280  1.1  christos 	eor	v17.16b, v26.16b, v6.16b
    281  1.1  christos 	eor	v6.16b, v8.16b, v7.16b
    282  1.1  christos 	eor	v7.16b, v1.16b, v9.16b
    283  1.1  christos 	eor	v1.16b, v5.16b, v3.16b
    284  1.1  christos 	eor	v2.16b, v10.16b, v3.16b
    285  1.1  christos 	eor	v4.16b, v16.16b, v4.16b
    286  1.1  christos 	eor	v8.16b, v6.16b, v17.16b
    287  1.1  christos 	eor	v5.16b, v9.16b, v3.16b
    288  1.1  christos 	eor	v9.16b, v0.16b, v1.16b
    289  1.1  christos 	eor	v6.16b, v7.16b, v1.16b
    290  1.1  christos 	eor	v0.16b, v4.16b, v17.16b
    291  1.1  christos 	eor	v4.16b, v8.16b, v7.16b
    292  1.1  christos 	eor	v7.16b, v9.16b, v2.16b
    293  1.1  christos 	eor	v8.16b, v3.16b, v0.16b
    294  1.1  christos 	eor	v7.16b, v7.16b, v5.16b
    295  1.1  christos 	eor	v3.16b, v4.16b, v7.16b
    296  1.1  christos 	eor	v4.16b, v7.16b, v0.16b
    297  1.1  christos 	eor	v7.16b, v8.16b, v3.16b
    298  1.1  christos 	bcc	.Ldec_done
    299  1.1  christos 	ext	v8.16b, v0.16b, v0.16b, #8
    300  1.1  christos 	ext	v9.16b, v1.16b, v1.16b, #8
    301  1.1  christos 	ldr	q28, [x11]                  // load from .LISR in common case (x10 > 0)
    302  1.1  christos 	ext	v10.16b, v6.16b, v6.16b, #8
    303  1.1  christos 	ext	v16.16b, v3.16b, v3.16b, #8
    304  1.1  christos 	ext	v17.16b, v5.16b, v5.16b, #8
    305  1.1  christos 	ext	v18.16b, v4.16b, v4.16b, #8
    306  1.1  christos 	eor	v8.16b, v8.16b, v0.16b
    307  1.1  christos 	eor	v9.16b, v9.16b, v1.16b
    308  1.1  christos 	eor	v10.16b, v10.16b, v6.16b
    309  1.1  christos 	eor	v16.16b, v16.16b, v3.16b
    310  1.1  christos 	eor	v17.16b, v17.16b, v5.16b
    311  1.1  christos 	ext	v19.16b, v2.16b, v2.16b, #8
    312  1.1  christos 	ext	v20.16b, v7.16b, v7.16b, #8
    313  1.1  christos 	eor	v18.16b, v18.16b, v4.16b
    314  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
    315  1.1  christos 	eor	v8.16b, v2.16b, v10.16b
    316  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
    317  1.1  christos 	eor	v2.16b, v19.16b, v2.16b
    318  1.1  christos 	eor	v9.16b, v20.16b, v7.16b
    319  1.1  christos 	eor	v0.16b, v0.16b, v16.16b
    320  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    321  1.1  christos 	eor	v6.16b, v6.16b, v17.16b
    322  1.1  christos 	eor	v8.16b, v8.16b, v16.16b
    323  1.1  christos 	eor	v7.16b, v7.16b, v18.16b
    324  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    325  1.1  christos 	eor	v2.16b, v3.16b, v2.16b
    326  1.1  christos 	eor	v1.16b, v1.16b, v17.16b
    327  1.1  christos 	eor	v3.16b, v5.16b, v9.16b
    328  1.1  christos 	eor	v5.16b, v8.16b, v17.16b
    329  1.1  christos 	eor	v7.16b, v7.16b, v17.16b
    330  1.1  christos 	ext	v8.16b, v0.16b, v0.16b, #12
    331  1.1  christos 	ext	v9.16b, v6.16b, v6.16b, #12
    332  1.1  christos 	ext	v10.16b, v4.16b, v4.16b, #12
    333  1.1  christos 	ext	v16.16b, v1.16b, v1.16b, #12
    334  1.1  christos 	ext	v17.16b, v5.16b, v5.16b, #12
    335  1.1  christos 	ext	v18.16b, v7.16b, v7.16b, #12
    336  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    337  1.1  christos 	eor	v6.16b, v6.16b, v9.16b
    338  1.1  christos 	eor	v4.16b, v4.16b, v10.16b
    339  1.1  christos 	ext	v19.16b, v2.16b, v2.16b, #12
    340  1.1  christos 	ext	v20.16b, v3.16b, v3.16b, #12
    341  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    342  1.1  christos 	eor	v5.16b, v5.16b, v17.16b
    343  1.1  christos 	eor	v7.16b, v7.16b, v18.16b
    344  1.1  christos 	eor	v2.16b, v2.16b, v19.16b
    345  1.1  christos 	eor	v16.16b, v16.16b, v0.16b
    346  1.1  christos 	eor	v3.16b, v3.16b, v20.16b
    347  1.1  christos 	eor	v17.16b, v17.16b, v4.16b
    348  1.1  christos 	eor	v10.16b, v10.16b, v6.16b
    349  1.1  christos 	ext	v0.16b, v0.16b, v0.16b, #8
    350  1.1  christos 	eor	v9.16b, v9.16b, v1.16b
    351  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
    352  1.1  christos 	eor	v8.16b, v8.16b, v3.16b
    353  1.1  christos 	eor	v16.16b, v16.16b, v3.16b
    354  1.1  christos 	eor	v18.16b, v18.16b, v5.16b
    355  1.1  christos 	eor	v19.16b, v19.16b, v7.16b
    356  1.1  christos 	ext	v21.16b, v5.16b, v5.16b, #8
    357  1.1  christos 	ext	v5.16b, v7.16b, v7.16b, #8
    358  1.1  christos 	eor	v7.16b, v20.16b, v2.16b
    359  1.1  christos 	ext	v4.16b, v4.16b, v4.16b, #8
    360  1.1  christos 	ext	v20.16b, v3.16b, v3.16b, #8
    361  1.1  christos 	eor	v17.16b, v17.16b, v3.16b
    362  1.1  christos 	ext	v2.16b, v2.16b, v2.16b, #8
    363  1.1  christos 	eor	v3.16b, v10.16b, v3.16b
    364  1.1  christos 	ext	v10.16b, v6.16b, v6.16b, #8
    365  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    366  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    367  1.1  christos 	eor	v5.16b, v5.16b, v18.16b
    368  1.1  christos 	eor	v3.16b, v3.16b, v4.16b
    369  1.1  christos 	eor	v7.16b, v20.16b, v7.16b
    370  1.1  christos 	eor	v6.16b, v2.16b, v19.16b
    371  1.1  christos 	eor	v4.16b, v21.16b, v17.16b
    372  1.1  christos 	eor	v2.16b, v10.16b, v9.16b
    373  1.1  christos 	bne	.Ldec_loop
    374  1.1  christos 	ldr	q28, [x11, #16]!            // load from .LISRM0 on last round (x10 == 0)
    375  1.1  christos 	b	.Ldec_loop
    376  1.1  christos .align	4
    377  1.1  christos .Ldec_done:
    378  1.1  christos 	ushr	v8.2d, v0.2d, #1
    379  1.1  christos 	movi	v9.16b, #0x55
    380  1.1  christos 	ldr	q10, [x9]
    381  1.1  christos 	ushr	v16.2d, v2.2d, #1
    382  1.1  christos 	movi	v17.16b, #0x33
    383  1.1  christos 	ushr	v18.2d, v6.2d, #1
    384  1.1  christos 	movi	v19.16b, #0x0f
    385  1.1  christos 	eor	v8.16b, v8.16b, v1.16b
    386  1.1  christos 	ushr	v20.2d, v3.2d, #1
    387  1.1  christos 	eor	v16.16b, v16.16b, v7.16b
    388  1.1  christos 	eor	v18.16b, v18.16b, v4.16b
    389  1.1  christos 	and	v8.16b, v8.16b, v9.16b
    390  1.1  christos 	eor	v20.16b, v20.16b, v5.16b
    391  1.1  christos 	and	v16.16b, v16.16b, v9.16b
    392  1.1  christos 	and	v18.16b, v18.16b, v9.16b
    393  1.1  christos 	shl	v21.2d, v8.2d, #1
    394  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    395  1.1  christos 	and	v8.16b, v20.16b, v9.16b
    396  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    397  1.1  christos 	shl	v9.2d, v16.2d, #1
    398  1.1  christos 	eor	v4.16b, v4.16b, v18.16b
    399  1.1  christos 	shl	v16.2d, v18.2d, #1
    400  1.1  christos 	eor	v0.16b, v0.16b, v21.16b
    401  1.1  christos 	shl	v18.2d, v8.2d, #1
    402  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
    403  1.1  christos 	eor	v2.16b, v2.16b, v9.16b
    404  1.1  christos 	eor	v6.16b, v6.16b, v16.16b
    405  1.1  christos 	ushr	v8.2d, v1.2d, #2
    406  1.1  christos 	eor	v3.16b, v3.16b, v18.16b
    407  1.1  christos 	ushr	v9.2d, v0.2d, #2
    408  1.1  christos 	ushr	v16.2d, v7.2d, #2
    409  1.1  christos 	ushr	v18.2d, v2.2d, #2
    410  1.1  christos 	eor	v8.16b, v8.16b, v4.16b
    411  1.1  christos 	eor	v9.16b, v9.16b, v6.16b
    412  1.1  christos 	eor	v16.16b, v16.16b, v5.16b
    413  1.1  christos 	eor	v18.16b, v18.16b, v3.16b
    414  1.1  christos 	and	v8.16b, v8.16b, v17.16b
    415  1.1  christos 	and	v9.16b, v9.16b, v17.16b
    416  1.1  christos 	and	v16.16b, v16.16b, v17.16b
    417  1.1  christos 	and	v17.16b, v18.16b, v17.16b
    418  1.1  christos 	eor	v4.16b, v4.16b, v8.16b
    419  1.1  christos 	shl	v8.2d, v8.2d, #2
    420  1.1  christos 	eor	v6.16b, v6.16b, v9.16b
    421  1.1  christos 	shl	v9.2d, v9.2d, #2
    422  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
    423  1.1  christos 	shl	v16.2d, v16.2d, #2
    424  1.1  christos 	eor	v3.16b, v3.16b, v17.16b
    425  1.1  christos 	shl	v17.2d, v17.2d, #2
    426  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    427  1.1  christos 	eor	v0.16b, v0.16b, v9.16b
    428  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    429  1.1  christos 	eor	v2.16b, v2.16b, v17.16b
    430  1.1  christos 	ushr	v8.2d, v4.2d, #4
    431  1.1  christos 	ushr	v9.2d, v6.2d, #4
    432  1.1  christos 	ushr	v16.2d, v1.2d, #4
    433  1.1  christos 	ushr	v17.2d, v0.2d, #4
    434  1.1  christos 	eor	v8.16b, v8.16b, v5.16b
    435  1.1  christos 	eor	v9.16b, v9.16b, v3.16b
    436  1.1  christos 	eor	v16.16b, v16.16b, v7.16b
    437  1.1  christos 	eor	v17.16b, v17.16b, v2.16b
    438  1.1  christos 	and	v8.16b, v8.16b, v19.16b
    439  1.1  christos 	and	v9.16b, v9.16b, v19.16b
    440  1.1  christos 	and	v16.16b, v16.16b, v19.16b
    441  1.1  christos 	and	v17.16b, v17.16b, v19.16b
    442  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
    443  1.1  christos 	shl	v8.2d, v8.2d, #4
    444  1.1  christos 	eor	v3.16b, v3.16b, v9.16b
    445  1.1  christos 	shl	v9.2d, v9.2d, #4
    446  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    447  1.1  christos 	shl	v16.2d, v16.2d, #4
    448  1.1  christos 	eor	v2.16b, v2.16b, v17.16b
    449  1.1  christos 	shl	v17.2d, v17.2d, #4
    450  1.1  christos 	eor	v4.16b, v4.16b, v8.16b
    451  1.1  christos 	eor	v6.16b, v6.16b, v9.16b
    452  1.1  christos 	eor	v7.16b, v7.16b, v10.16b
    453  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    454  1.1  christos 	eor	v2.16b, v2.16b, v10.16b
    455  1.1  christos 	eor	v0.16b, v0.16b, v17.16b
    456  1.1  christos 	eor	v4.16b, v4.16b, v10.16b
    457  1.1  christos 	eor	v6.16b, v6.16b, v10.16b
    458  1.1  christos 	eor	v3.16b, v3.16b, v10.16b
    459  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    460  1.1  christos 	eor	v1.16b, v1.16b, v10.16b
    461  1.1  christos 	eor	v0.16b, v0.16b, v10.16b
    462  1.1  christos 	ret
    463  1.1  christos .size	_bsaes_decrypt8,.-_bsaes_decrypt8
    464  1.1  christos 
    465  1.1  christos .section	.rodata
    466  1.1  christos .type	_bsaes_consts,%object
    467  1.1  christos .align	6
    468  1.1  christos _bsaes_consts:
    469  1.1  christos // InvShiftRows constants
    470  1.1  christos // Used in _bsaes_decrypt8, which assumes contiguity
    471  1.1  christos // .LM0ISR used with round 0 key
    472  1.1  christos // .LISR   used with middle round keys
    473  1.1  christos // .LISRM0 used with final round key
    474  1.1  christos .LM0ISR:
    475  1.1  christos .quad	0x0a0e0206070b0f03, 0x0004080c0d010509
    476  1.1  christos .LISR:
    477  1.1  christos .quad	0x0504070602010003, 0x0f0e0d0c080b0a09
    478  1.1  christos .LISRM0:
    479  1.1  christos .quad	0x01040b0e0205080f, 0x0306090c00070a0d
    480  1.1  christos 
    481  1.1  christos // ShiftRows constants
    482  1.1  christos // Used in _bsaes_encrypt8, which assumes contiguity
    483  1.1  christos // .LM0SR used with round 0 key
    484  1.1  christos // .LSR   used with middle round keys
    485  1.1  christos // .LSRM0 used with final round key
    486  1.1  christos .LM0SR:
    487  1.1  christos .quad	0x0a0e02060f03070b, 0x0004080c05090d01
    488  1.1  christos .LSR:
    489  1.1  christos .quad	0x0504070600030201, 0x0f0e0d0c0a09080b
    490  1.1  christos .LSRM0:
    491  1.1  christos .quad	0x0304090e00050a0f, 0x01060b0c0207080d
    492  1.1  christos 
    493  1.1  christos .LM0_bigendian:
    494  1.1  christos .quad	0x02060a0e03070b0f, 0x0004080c0105090d
    495  1.1  christos .LM0_littleendian:
    496  1.1  christos .quad	0x0105090d0004080c, 0x03070b0f02060a0e
    497  1.1  christos 
    498  1.1  christos // Used in ossl_bsaes_ctr32_encrypt_blocks, prior to dropping into
    499  1.1  christos // _bsaes_encrypt8_alt, for round 0 key in place of .LM0SR
    500  1.1  christos .LREVM0SR:
    501  1.1  christos .quad	0x090d01050c000408, 0x03070b0f060a0e02
    502  1.1  christos 
    503  1.1  christos .align	6
    504  1.1  christos .size	_bsaes_consts,.-_bsaes_consts
    505  1.1  christos 
    506  1.1  christos .previous
    507  1.1  christos 
    508  1.1  christos .type	_bsaes_encrypt8,%function
    509  1.1  christos .align	4
    510  1.1  christos // On entry:
    511  1.1  christos //   x9 -> key (previously expanded using _bsaes_key_convert)
    512  1.1  christos //   x10 = number of rounds
    513  1.1  christos //   v0-v7 input data
    514  1.1  christos // On exit:
    515  1.1  christos //   x9-x11 corrupted
    516  1.1  christos //   other general-purpose registers preserved
    517  1.1  christos //   v0-v7 output data
    518  1.1  christos //   v11-v15 preserved
    519  1.1  christos //   other SIMD registers corrupted
    520  1.1  christos _bsaes_encrypt8:
    521  1.1  christos 	ldr	q8, [x9], #16
    522  1.1  christos 	adrp	x11, .LM0SR
    523  1.1  christos 	add	x11, x11, #:lo12:.LM0SR
    524  1.1  christos 	ldr	q9, [x11], #16
    525  1.1  christos _bsaes_encrypt8_alt:
    526  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    527  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    528  1.1  christos 	sub	x10, x10, #1
    529  1.1  christos 	eor	v2.16b, v2.16b, v8.16b
    530  1.1  christos 	eor	v4.16b, v4.16b, v8.16b
    531  1.1  christos 	eor	v3.16b, v3.16b, v8.16b
    532  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
    533  1.1  christos 	tbl	v0.16b, {v0.16b}, v9.16b
    534  1.1  christos 	tbl	v1.16b, {v1.16b}, v9.16b
    535  1.1  christos 	tbl	v2.16b, {v2.16b}, v9.16b
    536  1.1  christos 	tbl	v4.16b, {v4.16b}, v9.16b
    537  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
    538  1.1  christos 	eor	v7.16b, v7.16b, v8.16b
    539  1.1  christos 	tbl	v3.16b, {v3.16b}, v9.16b
    540  1.1  christos 	tbl	v5.16b, {v5.16b}, v9.16b
    541  1.1  christos 	tbl	v6.16b, {v6.16b}, v9.16b
    542  1.1  christos 	ushr	v8.2d, v0.2d, #1
    543  1.1  christos 	movi	v10.16b, #0x55
    544  1.1  christos 	tbl	v7.16b, {v7.16b}, v9.16b
    545  1.1  christos 	ushr	v9.2d, v4.2d, #1
    546  1.1  christos 	movi	v16.16b, #0x33
    547  1.1  christos 	ushr	v17.2d, v2.2d, #1
    548  1.1  christos 	eor	v8.16b, v8.16b, v1.16b
    549  1.1  christos 	movi	v18.16b, #0x0f
    550  1.1  christos 	ushr	v19.2d, v6.2d, #1
    551  1.1  christos 	eor	v9.16b, v9.16b, v5.16b
    552  1.1  christos 	eor	v17.16b, v17.16b, v3.16b
    553  1.1  christos 	and	v8.16b, v8.16b, v10.16b
    554  1.1  christos 	eor	v19.16b, v19.16b, v7.16b
    555  1.1  christos 	and	v9.16b, v9.16b, v10.16b
    556  1.1  christos 	and	v17.16b, v17.16b, v10.16b
    557  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    558  1.1  christos 	shl	v8.2d, v8.2d, #1
    559  1.1  christos 	and	v10.16b, v19.16b, v10.16b
    560  1.1  christos 	eor	v5.16b, v5.16b, v9.16b
    561  1.1  christos 	shl	v9.2d, v9.2d, #1
    562  1.1  christos 	eor	v3.16b, v3.16b, v17.16b
    563  1.1  christos 	shl	v17.2d, v17.2d, #1
    564  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    565  1.1  christos 	shl	v8.2d, v10.2d, #1
    566  1.1  christos 	eor	v7.16b, v7.16b, v10.16b
    567  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
    568  1.1  christos 	eor	v2.16b, v2.16b, v17.16b
    569  1.1  christos 	ushr	v9.2d, v1.2d, #2
    570  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
    571  1.1  christos 	ushr	v8.2d, v0.2d, #2
    572  1.1  christos 	ushr	v10.2d, v5.2d, #2
    573  1.1  christos 	ushr	v17.2d, v4.2d, #2
    574  1.1  christos 	eor	v9.16b, v9.16b, v3.16b
    575  1.1  christos 	eor	v8.16b, v8.16b, v2.16b
    576  1.1  christos 	eor	v10.16b, v10.16b, v7.16b
    577  1.1  christos 	eor	v17.16b, v17.16b, v6.16b
    578  1.1  christos 	and	v9.16b, v9.16b, v16.16b
    579  1.1  christos 	and	v8.16b, v8.16b, v16.16b
    580  1.1  christos 	and	v10.16b, v10.16b, v16.16b
    581  1.1  christos 	and	v16.16b, v17.16b, v16.16b
    582  1.1  christos 	eor	v3.16b, v3.16b, v9.16b
    583  1.1  christos 	shl	v9.2d, v9.2d, #2
    584  1.1  christos 	eor	v2.16b, v2.16b, v8.16b
    585  1.1  christos 	shl	v8.2d, v8.2d, #2
    586  1.1  christos 	eor	v7.16b, v7.16b, v10.16b
    587  1.1  christos 	shl	v10.2d, v10.2d, #2
    588  1.1  christos 	eor	v6.16b, v6.16b, v16.16b
    589  1.1  christos 	shl	v16.2d, v16.2d, #2
    590  1.1  christos 	eor	v1.16b, v1.16b, v9.16b
    591  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    592  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    593  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    594  1.1  christos 	ushr	v8.2d, v3.2d, #4
    595  1.1  christos 	ushr	v9.2d, v2.2d, #4
    596  1.1  christos 	ushr	v10.2d, v1.2d, #4
    597  1.1  christos 	ushr	v16.2d, v0.2d, #4
    598  1.1  christos 	eor	v8.16b, v8.16b, v7.16b
    599  1.1  christos 	eor	v9.16b, v9.16b, v6.16b
    600  1.1  christos 	eor	v10.16b, v10.16b, v5.16b
    601  1.1  christos 	eor	v16.16b, v16.16b, v4.16b
    602  1.1  christos 	and	v8.16b, v8.16b, v18.16b
    603  1.1  christos 	and	v9.16b, v9.16b, v18.16b
    604  1.1  christos 	and	v10.16b, v10.16b, v18.16b
    605  1.1  christos 	and	v16.16b, v16.16b, v18.16b
    606  1.1  christos 	eor	v7.16b, v7.16b, v8.16b
    607  1.1  christos 	shl	v8.2d, v8.2d, #4
    608  1.1  christos 	eor	v6.16b, v6.16b, v9.16b
    609  1.1  christos 	shl	v9.2d, v9.2d, #4
    610  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    611  1.1  christos 	shl	v10.2d, v10.2d, #4
    612  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    613  1.1  christos 	shl	v16.2d, v16.2d, #4
    614  1.1  christos 	eor	v3.16b, v3.16b, v8.16b
    615  1.1  christos 	eor	v2.16b, v2.16b, v9.16b
    616  1.1  christos 	eor	v1.16b, v1.16b, v10.16b
    617  1.1  christos 	eor	v0.16b, v0.16b, v16.16b
    618  1.1  christos 	b	.Lenc_sbox
    619  1.1  christos .align	4
    620  1.1  christos .Lenc_loop:
    621  1.1  christos 	ld1	{v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64
    622  1.1  christos 	ldp	q8, q9, [x9], #32
    623  1.1  christos 	eor	v0.16b, v16.16b, v0.16b
    624  1.1  christos 	ldr	q10, [x9], #16
    625  1.1  christos 	eor	v1.16b, v17.16b, v1.16b
    626  1.1  christos 	ldr	q16, [x9], #16
    627  1.1  christos 	eor	v2.16b, v18.16b, v2.16b
    628  1.1  christos 	eor	v3.16b, v19.16b, v3.16b
    629  1.1  christos 	eor	v4.16b, v8.16b, v4.16b
    630  1.1  christos 	eor	v5.16b, v9.16b, v5.16b
    631  1.1  christos 	eor	v6.16b, v10.16b, v6.16b
    632  1.1  christos 	eor	v7.16b, v16.16b, v7.16b
    633  1.1  christos 	tbl	v0.16b, {v0.16b}, v28.16b
    634  1.1  christos 	tbl	v1.16b, {v1.16b}, v28.16b
    635  1.1  christos 	tbl	v2.16b, {v2.16b}, v28.16b
    636  1.1  christos 	tbl	v3.16b, {v3.16b}, v28.16b
    637  1.1  christos 	tbl	v4.16b, {v4.16b}, v28.16b
    638  1.1  christos 	tbl	v5.16b, {v5.16b}, v28.16b
    639  1.1  christos 	tbl	v6.16b, {v6.16b}, v28.16b
    640  1.1  christos 	tbl	v7.16b, {v7.16b}, v28.16b
    641  1.1  christos .Lenc_sbox:
    642  1.1  christos 	eor	v5.16b, v5.16b, v6.16b
    643  1.1  christos 	eor	v3.16b, v3.16b, v0.16b
    644  1.1  christos 	subs	x10, x10, #1
    645  1.1  christos 	eor	v2.16b, v2.16b, v1.16b
    646  1.1  christos 	eor	v5.16b, v5.16b, v0.16b
    647  1.1  christos 	eor	v8.16b, v3.16b, v7.16b
    648  1.1  christos 	eor	v6.16b, v6.16b, v2.16b
    649  1.1  christos 	eor	v7.16b, v7.16b, v5.16b
    650  1.1  christos 	eor	v8.16b, v8.16b, v4.16b
    651  1.1  christos 	eor	v3.16b, v6.16b, v3.16b
    652  1.1  christos 	eor	v4.16b, v4.16b, v5.16b
    653  1.1  christos 	eor	v6.16b, v1.16b, v5.16b
    654  1.1  christos 	eor	v2.16b, v2.16b, v7.16b
    655  1.1  christos 	eor	v1.16b, v8.16b, v1.16b
    656  1.1  christos 	eor	v8.16b, v7.16b, v4.16b
    657  1.1  christos 	eor	v9.16b, v3.16b, v0.16b
    658  1.1  christos 	eor	v10.16b, v7.16b, v6.16b
    659  1.1  christos 	eor	v16.16b, v5.16b, v3.16b
    660  1.1  christos 	eor	v17.16b, v6.16b, v2.16b
    661  1.1  christos 	eor	v18.16b, v5.16b, v1.16b
    662  1.1  christos 	eor	v19.16b, v2.16b, v4.16b
    663  1.1  christos 	eor	v20.16b, v1.16b, v0.16b
    664  1.1  christos 	orr	v21.16b, v8.16b, v9.16b
    665  1.1  christos 	orr	v22.16b, v10.16b, v16.16b
    666  1.1  christos 	eor	v23.16b, v8.16b, v17.16b
    667  1.1  christos 	eor	v24.16b, v9.16b, v18.16b
    668  1.1  christos 	and	v19.16b, v19.16b, v20.16b
    669  1.1  christos 	orr	v20.16b, v17.16b, v18.16b
    670  1.1  christos 	and	v8.16b, v8.16b, v9.16b
    671  1.1  christos 	and	v9.16b, v17.16b, v18.16b
    672  1.1  christos 	and	v17.16b, v23.16b, v24.16b
    673  1.1  christos 	and	v10.16b, v10.16b, v16.16b
    674  1.1  christos 	eor	v16.16b, v21.16b, v19.16b
    675  1.1  christos 	eor	v18.16b, v20.16b, v19.16b
    676  1.1  christos 	and	v19.16b, v2.16b, v1.16b
    677  1.1  christos 	and	v20.16b, v6.16b, v5.16b
    678  1.1  christos 	eor	v21.16b, v22.16b, v17.16b
    679  1.1  christos 	eor	v9.16b, v9.16b, v10.16b
    680  1.1  christos 	eor	v10.16b, v16.16b, v17.16b
    681  1.1  christos 	eor	v16.16b, v18.16b, v8.16b
    682  1.1  christos 	and	v17.16b, v4.16b, v0.16b
    683  1.1  christos 	orr	v18.16b, v7.16b, v3.16b
    684  1.1  christos 	eor	v21.16b, v21.16b, v8.16b
    685  1.1  christos 	eor	v8.16b, v9.16b, v8.16b
    686  1.1  christos 	eor	v9.16b, v10.16b, v19.16b
    687  1.1  christos 	eor	v10.16b, v3.16b, v0.16b
    688  1.1  christos 	eor	v16.16b, v16.16b, v17.16b
    689  1.1  christos 	eor	v17.16b, v5.16b, v1.16b
    690  1.1  christos 	eor	v19.16b, v21.16b, v20.16b
    691  1.1  christos 	eor	v20.16b, v8.16b, v18.16b
    692  1.1  christos 	eor	v8.16b, v8.16b, v18.16b
    693  1.1  christos 	eor	v18.16b, v7.16b, v4.16b
    694  1.1  christos 	eor	v21.16b, v9.16b, v16.16b
    695  1.1  christos 	eor	v22.16b, v6.16b, v2.16b
    696  1.1  christos 	and	v23.16b, v9.16b, v19.16b
    697  1.1  christos 	eor	v24.16b, v10.16b, v17.16b
    698  1.1  christos 	eor	v25.16b, v0.16b, v1.16b
    699  1.1  christos 	eor	v26.16b, v7.16b, v6.16b
    700  1.1  christos 	eor	v27.16b, v18.16b, v22.16b
    701  1.1  christos 	eor	v28.16b, v3.16b, v5.16b
    702  1.1  christos 	eor	v29.16b, v16.16b, v23.16b
    703  1.1  christos 	eor	v30.16b, v20.16b, v23.16b
    704  1.1  christos 	eor	v23.16b, v20.16b, v23.16b
    705  1.1  christos 	eor	v31.16b, v4.16b, v2.16b
    706  1.1  christos 	bsl	v29.16b, v19.16b, v20.16b
    707  1.1  christos 	bsl	v30.16b, v9.16b, v16.16b
    708  1.1  christos 	bsl	v8.16b, v29.16b, v23.16b
    709  1.1  christos 	bsl	v20.16b, v23.16b, v29.16b
    710  1.1  christos 	eor	v9.16b, v30.16b, v29.16b
    711  1.1  christos 	and	v5.16b, v5.16b, v30.16b
    712  1.1  christos 	and	v8.16b, v8.16b, v30.16b
    713  1.1  christos 	and	v1.16b, v1.16b, v29.16b
    714  1.1  christos 	eor	v16.16b, v19.16b, v20.16b
    715  1.1  christos 	and	v2.16b, v2.16b, v29.16b
    716  1.1  christos 	eor	v19.16b, v9.16b, v29.16b
    717  1.1  christos 	and	v17.16b, v17.16b, v9.16b
    718  1.1  christos 	eor	v8.16b, v8.16b, v21.16b
    719  1.1  christos 	and	v20.16b, v22.16b, v9.16b
    720  1.1  christos 	eor	v21.16b, v29.16b, v16.16b
    721  1.1  christos 	eor	v22.16b, v29.16b, v16.16b
    722  1.1  christos 	and	v23.16b, v25.16b, v16.16b
    723  1.1  christos 	and	v6.16b, v6.16b, v19.16b
    724  1.1  christos 	eor	v25.16b, v8.16b, v16.16b
    725  1.1  christos 	eor	v29.16b, v30.16b, v8.16b
    726  1.1  christos 	and	v4.16b, v21.16b, v4.16b
    727  1.1  christos 	and	v8.16b, v28.16b, v8.16b
    728  1.1  christos 	and	v0.16b, v22.16b, v0.16b
    729  1.1  christos 	eor	v21.16b, v23.16b, v1.16b
    730  1.1  christos 	eor	v22.16b, v9.16b, v25.16b
    731  1.1  christos 	eor	v9.16b, v9.16b, v25.16b
    732  1.1  christos 	eor	v23.16b, v25.16b, v16.16b
    733  1.1  christos 	and	v3.16b, v29.16b, v3.16b
    734  1.1  christos 	and	v24.16b, v24.16b, v25.16b
    735  1.1  christos 	and	v25.16b, v27.16b, v25.16b
    736  1.1  christos 	and	v10.16b, v22.16b, v10.16b
    737  1.1  christos 	and	v9.16b, v9.16b, v18.16b
    738  1.1  christos 	eor	v18.16b, v19.16b, v23.16b
    739  1.1  christos 	and	v19.16b, v26.16b, v23.16b
    740  1.1  christos 	eor	v3.16b, v5.16b, v3.16b
    741  1.1  christos 	eor	v17.16b, v17.16b, v24.16b
    742  1.1  christos 	eor	v10.16b, v24.16b, v10.16b
    743  1.1  christos 	and	v16.16b, v31.16b, v16.16b
    744  1.1  christos 	eor	v20.16b, v20.16b, v25.16b
    745  1.1  christos 	eor	v9.16b, v25.16b, v9.16b
    746  1.1  christos 	eor	v4.16b, v2.16b, v4.16b
    747  1.1  christos 	and	v7.16b, v18.16b, v7.16b
    748  1.1  christos 	eor	v18.16b, v19.16b, v6.16b
    749  1.1  christos 	eor	v5.16b, v8.16b, v5.16b
    750  1.1  christos 	eor	v0.16b, v1.16b, v0.16b
    751  1.1  christos 	eor	v1.16b, v21.16b, v10.16b
    752  1.1  christos 	eor	v8.16b, v3.16b, v17.16b
    753  1.1  christos 	eor	v2.16b, v16.16b, v2.16b
    754  1.1  christos 	eor	v3.16b, v6.16b, v7.16b
    755  1.1  christos 	eor	v6.16b, v18.16b, v9.16b
    756  1.1  christos 	eor	v4.16b, v4.16b, v20.16b
    757  1.1  christos 	eor	v10.16b, v5.16b, v10.16b
    758  1.1  christos 	eor	v0.16b, v0.16b, v17.16b
    759  1.1  christos 	eor	v9.16b, v2.16b, v9.16b
    760  1.1  christos 	eor	v3.16b, v3.16b, v20.16b
    761  1.1  christos 	eor	v7.16b, v6.16b, v1.16b
    762  1.1  christos 	eor	v5.16b, v8.16b, v4.16b
    763  1.1  christos 	eor	v6.16b, v10.16b, v1.16b
    764  1.1  christos 	eor	v2.16b, v4.16b, v0.16b
    765  1.1  christos 	eor	v4.16b, v3.16b, v10.16b
    766  1.1  christos 	eor	v9.16b, v9.16b, v7.16b
    767  1.1  christos 	eor	v3.16b, v0.16b, v5.16b
    768  1.1  christos 	eor	v0.16b, v1.16b, v4.16b
    769  1.1  christos 	eor	v1.16b, v4.16b, v8.16b
    770  1.1  christos 	eor	v4.16b, v9.16b, v5.16b
    771  1.1  christos 	eor	v6.16b, v6.16b, v3.16b
    772  1.1  christos 	bcc	.Lenc_done
    773  1.1  christos 	ext	v8.16b, v0.16b, v0.16b, #12
    774  1.1  christos 	ext	v9.16b, v4.16b, v4.16b, #12
    775  1.1  christos 	ldr	q28, [x11]
    776  1.1  christos 	ext	v10.16b, v6.16b, v6.16b, #12
    777  1.1  christos 	ext	v16.16b, v1.16b, v1.16b, #12
    778  1.1  christos 	ext	v17.16b, v3.16b, v3.16b, #12
    779  1.1  christos 	ext	v18.16b, v7.16b, v7.16b, #12
    780  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    781  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
    782  1.1  christos 	eor	v6.16b, v6.16b, v10.16b
    783  1.1  christos 	ext	v19.16b, v2.16b, v2.16b, #12
    784  1.1  christos 	ext	v20.16b, v5.16b, v5.16b, #12
    785  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    786  1.1  christos 	eor	v3.16b, v3.16b, v17.16b
    787  1.1  christos 	eor	v7.16b, v7.16b, v18.16b
    788  1.1  christos 	eor	v2.16b, v2.16b, v19.16b
    789  1.1  christos 	eor	v16.16b, v16.16b, v0.16b
    790  1.1  christos 	eor	v5.16b, v5.16b, v20.16b
    791  1.1  christos 	eor	v17.16b, v17.16b, v6.16b
    792  1.1  christos 	eor	v10.16b, v10.16b, v4.16b
    793  1.1  christos 	ext	v0.16b, v0.16b, v0.16b, #8
    794  1.1  christos 	eor	v9.16b, v9.16b, v1.16b
    795  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
    796  1.1  christos 	eor	v8.16b, v8.16b, v5.16b
    797  1.1  christos 	eor	v16.16b, v16.16b, v5.16b
    798  1.1  christos 	eor	v18.16b, v18.16b, v3.16b
    799  1.1  christos 	eor	v19.16b, v19.16b, v7.16b
    800  1.1  christos 	ext	v3.16b, v3.16b, v3.16b, #8
    801  1.1  christos 	ext	v7.16b, v7.16b, v7.16b, #8
    802  1.1  christos 	eor	v20.16b, v20.16b, v2.16b
    803  1.1  christos 	ext	v6.16b, v6.16b, v6.16b, #8
    804  1.1  christos 	ext	v21.16b, v5.16b, v5.16b, #8
    805  1.1  christos 	eor	v17.16b, v17.16b, v5.16b
    806  1.1  christos 	ext	v2.16b, v2.16b, v2.16b, #8
    807  1.1  christos 	eor	v10.16b, v10.16b, v5.16b
    808  1.1  christos 	ext	v22.16b, v4.16b, v4.16b, #8
    809  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    810  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    811  1.1  christos 	eor	v5.16b, v7.16b, v18.16b
    812  1.1  christos 	eor	v4.16b, v3.16b, v17.16b
    813  1.1  christos 	eor	v3.16b, v6.16b, v10.16b
    814  1.1  christos 	eor	v7.16b, v21.16b, v20.16b
    815  1.1  christos 	eor	v6.16b, v2.16b, v19.16b
    816  1.1  christos 	eor	v2.16b, v22.16b, v9.16b
    817  1.1  christos 	bne	.Lenc_loop
    818  1.1  christos 	ldr	q28, [x11, #16]!            // load from .LSRM0 on last round (x10 == 0)
    819  1.1  christos 	b	.Lenc_loop
    820  1.1  christos .align	4
    821  1.1  christos .Lenc_done:
    822  1.1  christos 	ushr	v8.2d, v0.2d, #1
    823  1.1  christos 	movi	v9.16b, #0x55
    824  1.1  christos 	ldr	q10, [x9]
    825  1.1  christos 	ushr	v16.2d, v3.2d, #1
    826  1.1  christos 	movi	v17.16b, #0x33
    827  1.1  christos 	ushr	v18.2d, v4.2d, #1
    828  1.1  christos 	movi	v19.16b, #0x0f
    829  1.1  christos 	eor	v8.16b, v8.16b, v1.16b
    830  1.1  christos 	ushr	v20.2d, v2.2d, #1
    831  1.1  christos 	eor	v16.16b, v16.16b, v7.16b
    832  1.1  christos 	eor	v18.16b, v18.16b, v6.16b
    833  1.1  christos 	and	v8.16b, v8.16b, v9.16b
    834  1.1  christos 	eor	v20.16b, v20.16b, v5.16b
    835  1.1  christos 	and	v16.16b, v16.16b, v9.16b
    836  1.1  christos 	and	v18.16b, v18.16b, v9.16b
    837  1.1  christos 	shl	v21.2d, v8.2d, #1
    838  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    839  1.1  christos 	and	v8.16b, v20.16b, v9.16b
    840  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    841  1.1  christos 	shl	v9.2d, v16.2d, #1
    842  1.1  christos 	eor	v6.16b, v6.16b, v18.16b
    843  1.1  christos 	shl	v16.2d, v18.2d, #1
    844  1.1  christos 	eor	v0.16b, v0.16b, v21.16b
    845  1.1  christos 	shl	v18.2d, v8.2d, #1
    846  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
    847  1.1  christos 	eor	v3.16b, v3.16b, v9.16b
    848  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    849  1.1  christos 	ushr	v8.2d, v1.2d, #2
    850  1.1  christos 	eor	v2.16b, v2.16b, v18.16b
    851  1.1  christos 	ushr	v9.2d, v0.2d, #2
    852  1.1  christos 	ushr	v16.2d, v7.2d, #2
    853  1.1  christos 	ushr	v18.2d, v3.2d, #2
    854  1.1  christos 	eor	v8.16b, v8.16b, v6.16b
    855  1.1  christos 	eor	v9.16b, v9.16b, v4.16b
    856  1.1  christos 	eor	v16.16b, v16.16b, v5.16b
    857  1.1  christos 	eor	v18.16b, v18.16b, v2.16b
    858  1.1  christos 	and	v8.16b, v8.16b, v17.16b
    859  1.1  christos 	and	v9.16b, v9.16b, v17.16b
    860  1.1  christos 	and	v16.16b, v16.16b, v17.16b
    861  1.1  christos 	and	v17.16b, v18.16b, v17.16b
    862  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
    863  1.1  christos 	shl	v8.2d, v8.2d, #2
    864  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
    865  1.1  christos 	shl	v9.2d, v9.2d, #2
    866  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
    867  1.1  christos 	shl	v16.2d, v16.2d, #2
    868  1.1  christos 	eor	v2.16b, v2.16b, v17.16b
    869  1.1  christos 	shl	v17.2d, v17.2d, #2
    870  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    871  1.1  christos 	eor	v0.16b, v0.16b, v9.16b
    872  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    873  1.1  christos 	eor	v3.16b, v3.16b, v17.16b
    874  1.1  christos 	ushr	v8.2d, v6.2d, #4
    875  1.1  christos 	ushr	v9.2d, v4.2d, #4
    876  1.1  christos 	ushr	v16.2d, v1.2d, #4
    877  1.1  christos 	ushr	v17.2d, v0.2d, #4
    878  1.1  christos 	eor	v8.16b, v8.16b, v5.16b
    879  1.1  christos 	eor	v9.16b, v9.16b, v2.16b
    880  1.1  christos 	eor	v16.16b, v16.16b, v7.16b
    881  1.1  christos 	eor	v17.16b, v17.16b, v3.16b
    882  1.1  christos 	and	v8.16b, v8.16b, v19.16b
    883  1.1  christos 	and	v9.16b, v9.16b, v19.16b
    884  1.1  christos 	and	v16.16b, v16.16b, v19.16b
    885  1.1  christos 	and	v17.16b, v17.16b, v19.16b
    886  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
    887  1.1  christos 	shl	v8.2d, v8.2d, #4
    888  1.1  christos 	eor	v2.16b, v2.16b, v9.16b
    889  1.1  christos 	shl	v9.2d, v9.2d, #4
    890  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    891  1.1  christos 	shl	v16.2d, v16.2d, #4
    892  1.1  christos 	eor	v3.16b, v3.16b, v17.16b
    893  1.1  christos 	shl	v17.2d, v17.2d, #4
    894  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
    895  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
    896  1.1  christos 	eor	v7.16b, v7.16b, v10.16b
    897  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    898  1.1  christos 	eor	v3.16b, v3.16b, v10.16b
    899  1.1  christos 	eor	v0.16b, v0.16b, v17.16b
    900  1.1  christos 	eor	v6.16b, v6.16b, v10.16b
    901  1.1  christos 	eor	v4.16b, v4.16b, v10.16b
    902  1.1  christos 	eor	v2.16b, v2.16b, v10.16b
    903  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    904  1.1  christos 	eor	v1.16b, v1.16b, v10.16b
    905  1.1  christos 	eor	v0.16b, v0.16b, v10.16b
    906  1.1  christos 	ret
    907  1.1  christos .size	_bsaes_encrypt8,.-_bsaes_encrypt8
    908  1.1  christos 
    909  1.1  christos .type	_bsaes_key_convert,%function
    910  1.1  christos .align	4
    911  1.1  christos // On entry:
    912  1.1  christos //   x9 -> input key (big-endian)
    913  1.1  christos //   x10 = number of rounds
    914  1.1  christos //   x17 -> output key (native endianness)
    915  1.1  christos // On exit:
    916  1.1  christos //   x9, x10 corrupted
    917  1.1  christos //   x11 -> .LM0_bigendian
    918  1.1  christos //   x17 -> last quadword of output key
    919  1.1  christos //   other general-purpose registers preserved
    920  1.1  christos //   v2-v6 preserved
    921  1.1  christos //   v7.16b[] = 0x63
    922  1.1  christos //   v8-v14 preserved
    923  1.1  christos //   v15 = last round key (converted to native endianness)
    924  1.1  christos //   other SIMD registers corrupted
    925  1.1  christos _bsaes_key_convert:
    926  1.1  christos #ifdef __AARCH64EL__
    927  1.1  christos 	adrp	x11, .LM0_littleendian
    928  1.1  christos 	add	x11, x11, #:lo12:.LM0_littleendian
    929  1.1  christos #else
    930  1.1  christos 	adrp	x11, .LM0_bigendian
    931  1.1  christos 	add	x11, x11, #:lo12:.LM0_bigendian
    932  1.1  christos #endif
    933  1.1  christos 	ldr	q0, [x9], #16               // load round 0 key
    934  1.1  christos 	ldr	q1, [x11]                   // .LM0
    935  1.1  christos 	ldr	q15, [x9], #16              // load round 1 key
    936  1.1  christos 
    937  1.1  christos 	movi	v7.16b, #0x63               // compose .L63
    938  1.1  christos 	movi	v16.16b, #0x01              // bit masks
    939  1.1  christos 	movi	v17.16b, #0x02
    940  1.1  christos 	movi	v18.16b, #0x04
    941  1.1  christos 	movi	v19.16b, #0x08
    942  1.1  christos 	movi	v20.16b, #0x10
    943  1.1  christos 	movi	v21.16b, #0x20
    944  1.1  christos 	movi	v22.16b, #0x40
    945  1.1  christos 	movi	v23.16b, #0x80
    946  1.1  christos 
    947  1.1  christos #ifdef __AARCH64EL__
    948  1.1  christos 	rev32	v0.16b, v0.16b
    949  1.1  christos #endif
    950  1.1  christos 	sub	x10, x10, #1
    951  1.1  christos 	str	q0, [x17], #16              // save round 0 key
    952  1.1  christos 
    953  1.1  christos .align	4
    954  1.1  christos .Lkey_loop:
    955  1.1  christos 	tbl	v0.16b, {v15.16b}, v1.16b
    956  1.1  christos 	ldr	q15, [x9], #16              // load next round key
    957  1.1  christos 
    958  1.1  christos 	eor	v0.16b, v0.16b, v7.16b
    959  1.1  christos 	cmtst	v24.16b, v0.16b, v16.16b
    960  1.1  christos 	cmtst	v25.16b, v0.16b, v17.16b
    961  1.1  christos 	cmtst	v26.16b, v0.16b, v18.16b
    962  1.1  christos 	cmtst	v27.16b, v0.16b, v19.16b
    963  1.1  christos 	cmtst	v28.16b, v0.16b, v20.16b
    964  1.1  christos 	cmtst	v29.16b, v0.16b, v21.16b
    965  1.1  christos 	cmtst	v30.16b, v0.16b, v22.16b
    966  1.1  christos 	cmtst	v31.16b, v0.16b, v23.16b
    967  1.1  christos 	sub	x10, x10, #1
    968  1.1  christos 	st1	{v24.16b,v25.16b,v26.16b,v27.16b}, [x17], #64 // write bit-sliced round key
    969  1.1  christos 	st1	{v28.16b,v29.16b,v30.16b,v31.16b}, [x17], #64
    970  1.1  christos 	cbnz	x10, .Lkey_loop
    971  1.1  christos 
    972  1.1  christos         // don't save last round key
    973  1.1  christos #ifdef __AARCH64EL__
    974  1.1  christos 	rev32	v15.16b, v15.16b
    975  1.1  christos 	adrp	x11, .LM0_bigendian
    976  1.1  christos 	add	x11, x11, #:lo12:.LM0_bigendian
    977  1.1  christos #endif
    978  1.1  christos 	ret
    979  1.1  christos .size	_bsaes_key_convert,.-_bsaes_key_convert
    980  1.1  christos 
    981  1.1  christos .globl	ossl_bsaes_cbc_encrypt
    982  1.1  christos .type	ossl_bsaes_cbc_encrypt,%function
    983  1.1  christos .align	4
    984  1.1  christos // On entry:
    985  1.1  christos //   x0 -> input ciphertext
    986  1.1  christos //   x1 -> output plaintext
    987  1.1  christos //   x2 = size of ciphertext and plaintext in bytes (assumed a multiple of 16)
    988  1.1  christos //   x3 -> key
    989  1.1  christos //   x4 -> 128-bit initialisation vector (or preceding 128-bit block of ciphertext if continuing after an earlier call)
    990  1.1  christos //   w5 must be == 0
    991  1.1  christos // On exit:
    992  1.1  christos //   Output plaintext filled in
    993  1.1  christos //   Initialisation vector overwritten with last quadword of ciphertext
    994  1.1  christos //   No output registers, usual AAPCS64 register preservation
    995  1.1  christos ossl_bsaes_cbc_encrypt:
    996  1.1  christos 	AARCH64_VALID_CALL_TARGET
    997  1.1  christos 	cmp	x2, #128
    998  1.1  christos 	bhs	.Lcbc_do_bsaes
    999  1.1  christos 	b	AES_cbc_encrypt
   1000  1.1  christos .Lcbc_do_bsaes:
   1001  1.1  christos 
   1002  1.1  christos         // it is up to the caller to make sure we are called with enc == 0
   1003  1.1  christos 
   1004  1.1  christos 	stp	x29, x30, [sp, #-48]!
   1005  1.1  christos 	stp	d8, d9, [sp, #16]
   1006  1.1  christos 	stp	d10, d15, [sp, #32]
   1007  1.1  christos 	lsr	x2, x2, #4                  // len in 16 byte blocks
   1008  1.1  christos 
   1009  1.1  christos 	ldr	w15, [x3, #240]             // get # of rounds
   1010  1.1  christos 	mov	x14, sp
   1011  1.1  christos 
   1012  1.1  christos         // allocate the key schedule on the stack
   1013  1.1  christos 	add	x17, sp, #96
   1014  1.1  christos 	sub	x17, x17, x15, lsl #7       // 128 bytes per inner round key, less 96 bytes
   1015  1.1  christos 
   1016  1.1  christos         // populate the key schedule
   1017  1.1  christos 	mov	x9, x3                      // pass key
   1018  1.1  christos 	mov	x10, x15                    // pass # of rounds
   1019  1.1  christos 	mov	sp, x17                     // sp is sp
   1020  1.1  christos 	bl	_bsaes_key_convert
   1021  1.1  christos 	ldr	q6,  [sp]
   1022  1.1  christos 	str	q15, [x17]                  // save last round key
   1023  1.1  christos 	eor	v6.16b, v6.16b, v7.16b      // fix up round 0 key (by XORing with 0x63)
   1024  1.1  christos 	str	q6, [sp]
   1025  1.1  christos 
   1026  1.1  christos 	ldr	q15, [x4]                   // load IV
   1027  1.1  christos 	b	.Lcbc_dec_loop
   1028  1.1  christos 
   1029  1.1  christos .align	4
   1030  1.1  christos .Lcbc_dec_loop:
   1031  1.1  christos 	subs	x2, x2, #0x8
   1032  1.1  christos 	bmi	.Lcbc_dec_loop_finish
   1033  1.1  christos 
   1034  1.1  christos 	ldr	q0, [x0], #16               // load input
   1035  1.1  christos 	mov	x9, sp                      // pass the key
   1036  1.1  christos 	ldr	q1, [x0], #16
   1037  1.1  christos 	mov	x10, x15
   1038  1.1  christos 	ldr	q2, [x0], #16
   1039  1.1  christos 	ldr	q3, [x0], #16
   1040  1.1  christos 	ldr	q4, [x0], #16
   1041  1.1  christos 	ldr	q5, [x0], #16
   1042  1.1  christos 	ldr	q6, [x0], #16
   1043  1.1  christos 	ldr	q7, [x0], #-7*16
   1044  1.1  christos 
   1045  1.1  christos 	bl	_bsaes_decrypt8
   1046  1.1  christos 
   1047  1.1  christos 	ldr	q16, [x0], #16              // reload input
   1048  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1049  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
   1050  1.1  christos 	str	q0, [x1], #16               // write output
   1051  1.1  christos 	ldr	q0, [x0], #16
   1052  1.1  christos 	str	q1, [x1], #16
   1053  1.1  christos 	ldr	q1, [x0], #16
   1054  1.1  christos 	eor	v1.16b, v4.16b, v1.16b
   1055  1.1  christos 	ldr	q4, [x0], #16
   1056  1.1  christos 	eor	v2.16b, v2.16b, v4.16b
   1057  1.1  christos 	eor	v0.16b, v6.16b, v0.16b
   1058  1.1  christos 	ldr	q4, [x0], #16
   1059  1.1  christos 	str	q0, [x1], #16
   1060  1.1  christos 	str	q1, [x1], #16
   1061  1.1  christos 	eor	v0.16b, v7.16b, v4.16b
   1062  1.1  christos 	ldr	q1, [x0], #16
   1063  1.1  christos 	str	q2, [x1], #16
   1064  1.1  christos 	ldr	q2, [x0], #16
   1065  1.1  christos 	ldr	q15, [x0], #16
   1066  1.1  christos 	str	q0, [x1], #16
   1067  1.1  christos 	eor	v0.16b, v5.16b, v2.16b
   1068  1.1  christos 	eor	v1.16b, v3.16b, v1.16b
   1069  1.1  christos 	str	q1, [x1], #16
   1070  1.1  christos 	str	q0, [x1], #16
   1071  1.1  christos 
   1072  1.1  christos 	b	.Lcbc_dec_loop
   1073  1.1  christos 
   1074  1.1  christos .Lcbc_dec_loop_finish:
   1075  1.1  christos 	adds	x2, x2, #8
   1076  1.1  christos 	beq	.Lcbc_dec_done
   1077  1.1  christos 
   1078  1.1  christos 	ldr	q0, [x0], #16               // load input
   1079  1.1  christos 	cmp	x2, #2
   1080  1.1  christos 	blo	.Lcbc_dec_one
   1081  1.1  christos 	ldr	q1, [x0], #16
   1082  1.1  christos 	mov	x9, sp                      // pass the key
   1083  1.1  christos 	mov	x10, x15
   1084  1.1  christos 	beq	.Lcbc_dec_two
   1085  1.1  christos 	ldr	q2, [x0], #16
   1086  1.1  christos 	cmp	x2, #4
   1087  1.1  christos 	blo	.Lcbc_dec_three
   1088  1.1  christos 	ldr	q3, [x0], #16
   1089  1.1  christos 	beq	.Lcbc_dec_four
   1090  1.1  christos 	ldr	q4, [x0], #16
   1091  1.1  christos 	cmp	x2, #6
   1092  1.1  christos 	blo	.Lcbc_dec_five
   1093  1.1  christos 	ldr	q5, [x0], #16
   1094  1.1  christos 	beq	.Lcbc_dec_six
   1095  1.1  christos 	ldr	q6, [x0], #-6*16
   1096  1.1  christos 
   1097  1.1  christos 	bl	_bsaes_decrypt8
   1098  1.1  christos 
   1099  1.1  christos 	ldr	q5, [x0], #16               // reload input
   1100  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1101  1.1  christos 	ldr	q8, [x0], #16
   1102  1.1  christos 	ldr	q9, [x0], #16
   1103  1.1  christos 	ldr	q10, [x0], #16
   1104  1.1  christos 	str	q0, [x1], #16               // write output
   1105  1.1  christos 	ldr	q0, [x0], #16
   1106  1.1  christos 	eor	v1.16b, v1.16b, v5.16b
   1107  1.1  christos 	ldr	q5, [x0], #16
   1108  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
   1109  1.1  christos 	ldr	q15, [x0]
   1110  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
   1111  1.1  christos 	eor	v2.16b, v2.16b, v10.16b
   1112  1.1  christos 	str	q1, [x1], #16
   1113  1.1  christos 	eor	v0.16b, v7.16b, v0.16b
   1114  1.1  christos 	str	q6, [x1], #16
   1115  1.1  christos 	eor	v1.16b, v3.16b, v5.16b
   1116  1.1  christos 	str	q4, [x1], #16
   1117  1.1  christos 	str	q2, [x1], #16
   1118  1.1  christos 	str	q0, [x1], #16
   1119  1.1  christos 	str	q1, [x1]
   1120  1.1  christos 	b	.Lcbc_dec_done
   1121  1.1  christos .align	4
   1122  1.1  christos .Lcbc_dec_six:
   1123  1.1  christos 	sub	x0, x0, #0x60
   1124  1.1  christos 	bl	_bsaes_decrypt8
   1125  1.1  christos 	ldr	q3, [x0], #16               // reload input
   1126  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1127  1.1  christos 	ldr	q5, [x0], #16
   1128  1.1  christos 	ldr	q8, [x0], #16
   1129  1.1  christos 	ldr	q9, [x0], #16
   1130  1.1  christos 	str	q0, [x1], #16               // write output
   1131  1.1  christos 	ldr	q0, [x0], #16
   1132  1.1  christos 	eor	v1.16b, v1.16b, v3.16b
   1133  1.1  christos 	ldr	q15, [x0]
   1134  1.1  christos 	eor	v3.16b, v6.16b, v5.16b
   1135  1.1  christos 	eor	v4.16b, v4.16b, v8.16b
   1136  1.1  christos 	eor	v2.16b, v2.16b, v9.16b
   1137  1.1  christos 	str	q1, [x1], #16
   1138  1.1  christos 	eor	v0.16b, v7.16b, v0.16b
   1139  1.1  christos 	str	q3, [x1], #16
   1140  1.1  christos 	str	q4, [x1], #16
   1141  1.1  christos 	str	q2, [x1], #16
   1142  1.1  christos 	str	q0, [x1]
   1143  1.1  christos 	b	.Lcbc_dec_done
   1144  1.1  christos .align	4
   1145  1.1  christos .Lcbc_dec_five:
   1146  1.1  christos 	sub	x0, x0, #0x50
   1147  1.1  christos 	bl	_bsaes_decrypt8
   1148  1.1  christos 	ldr	q3, [x0], #16               // reload input
   1149  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1150  1.1  christos 	ldr	q5, [x0], #16
   1151  1.1  christos 	ldr	q7, [x0], #16
   1152  1.1  christos 	ldr	q8, [x0], #16
   1153  1.1  christos 	str	q0, [x1], #16               // write output
   1154  1.1  christos 	ldr	q15, [x0]
   1155  1.1  christos 	eor	v0.16b, v1.16b, v3.16b
   1156  1.1  christos 	eor	v1.16b, v6.16b, v5.16b
   1157  1.1  christos 	eor	v3.16b, v4.16b, v7.16b
   1158  1.1  christos 	str	q0, [x1], #16
   1159  1.1  christos 	eor	v0.16b, v2.16b, v8.16b
   1160  1.1  christos 	str	q1, [x1], #16
   1161  1.1  christos 	str	q3, [x1], #16
   1162  1.1  christos 	str	q0, [x1]
   1163  1.1  christos 	b	.Lcbc_dec_done
   1164  1.1  christos .align	4
   1165  1.1  christos .Lcbc_dec_four:
   1166  1.1  christos 	sub	x0, x0, #0x40
   1167  1.1  christos 	bl	_bsaes_decrypt8
   1168  1.1  christos 	ldr	q2, [x0], #16               // reload input
   1169  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1170  1.1  christos 	ldr	q3, [x0], #16
   1171  1.1  christos 	ldr	q5, [x0], #16
   1172  1.1  christos 	str	q0, [x1], #16               // write output
   1173  1.1  christos 	ldr	q15, [x0]
   1174  1.1  christos 	eor	v0.16b, v1.16b, v2.16b
   1175  1.1  christos 	eor	v1.16b, v6.16b, v3.16b
   1176  1.1  christos 	eor	v2.16b, v4.16b, v5.16b
   1177  1.1  christos 	str	q0, [x1], #16
   1178  1.1  christos 	str	q1, [x1], #16
   1179  1.1  christos 	str	q2, [x1]
   1180  1.1  christos 	b	.Lcbc_dec_done
   1181  1.1  christos .align	4
   1182  1.1  christos .Lcbc_dec_three:
   1183  1.1  christos 	sub	x0, x0, #0x30
   1184  1.1  christos 	bl	_bsaes_decrypt8
   1185  1.1  christos 	ldr	q2, [x0], #16               // reload input
   1186  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1187  1.1  christos 	ldr	q3, [x0], #16
   1188  1.1  christos 	ldr	q15, [x0]
   1189  1.1  christos 	str	q0, [x1], #16               // write output
   1190  1.1  christos 	eor	v0.16b, v1.16b, v2.16b
   1191  1.1  christos 	eor	v1.16b, v6.16b, v3.16b
   1192  1.1  christos 	str	q0, [x1], #16
   1193  1.1  christos 	str	q1, [x1]
   1194  1.1  christos 	b	.Lcbc_dec_done
   1195  1.1  christos .align	4
   1196  1.1  christos .Lcbc_dec_two:
   1197  1.1  christos 	sub	x0, x0, #0x20
   1198  1.1  christos 	bl	_bsaes_decrypt8
   1199  1.1  christos 	ldr	q2, [x0], #16               // reload input
   1200  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1201  1.1  christos 	ldr	q15, [x0]
   1202  1.1  christos 	str	q0, [x1], #16               // write output
   1203  1.1  christos 	eor	v0.16b, v1.16b, v2.16b
   1204  1.1  christos 	str	q0, [x1]
   1205  1.1  christos 	b	.Lcbc_dec_done
   1206  1.1  christos .align	4
   1207  1.1  christos .Lcbc_dec_one:
   1208  1.1  christos 	sub	x0, x0, #0x10
   1209  1.1  christos 	stp	x1, x4, [sp, #-32]!
   1210  1.1  christos 	str	x14, [sp, #16]
   1211  1.1  christos 	mov	v8.16b, v15.16b
   1212  1.1  christos 	mov	v15.16b, v0.16b
   1213  1.1  christos 	mov	x2, x3
   1214  1.1  christos 	bl	AES_decrypt
   1215  1.1  christos 	ldr	x14, [sp, #16]
   1216  1.1  christos 	ldp	x1, x4, [sp], #32
   1217  1.1  christos 	ldr	q0, [x1]                    // load result
   1218  1.1  christos 	eor	v0.16b, v0.16b, v8.16b      // ^= IV
   1219  1.1  christos 	str	q0, [x1]                    // write output
   1220  1.1  christos 
   1221  1.1  christos .align	4
   1222  1.1  christos .Lcbc_dec_done:
   1223  1.1  christos 	movi	v0.16b, #0
   1224  1.1  christos 	movi	v1.16b, #0
   1225  1.1  christos .Lcbc_dec_bzero:	//	wipe key schedule [if any]
   1226  1.1  christos 	stp	q0, q1, [sp], #32
   1227  1.1  christos 	cmp	sp, x14
   1228  1.1  christos 	bne	.Lcbc_dec_bzero
   1229  1.1  christos 	str	q15, [x4]                   // return IV
   1230  1.1  christos 	ldp	d8, d9, [sp, #16]
   1231  1.1  christos 	ldp	d10, d15, [sp, #32]
   1232  1.1  christos 	ldp	x29, x30, [sp], #48
   1233  1.1  christos 	ret
   1234  1.1  christos .size	ossl_bsaes_cbc_encrypt,.-ossl_bsaes_cbc_encrypt
   1235  1.1  christos 
   1236  1.1  christos .globl	ossl_bsaes_ctr32_encrypt_blocks
   1237  1.1  christos .type	ossl_bsaes_ctr32_encrypt_blocks,%function
   1238  1.1  christos .align	4
   1239  1.1  christos // On entry:
   1240  1.1  christos //   x0 -> input text (whole 16-byte blocks)
   1241  1.1  christos //   x1 -> output text (whole 16-byte blocks)
   1242  1.1  christos //   x2 = number of 16-byte blocks to encrypt/decrypt (> 0)
   1243  1.1  christos //   x3 -> key
   1244  1.1  christos //   x4 -> initial value of 128-bit counter (stored big-endian) which increments, modulo 2^32, for each block
   1245  1.1  christos // On exit:
   1246  1.1  christos //   Output text filled in
   1247  1.1  christos //   No output registers, usual AAPCS64 register preservation
   1248  1.1  christos ossl_bsaes_ctr32_encrypt_blocks:
   1249  1.1  christos 	AARCH64_VALID_CALL_TARGET
   1250  1.1  christos 	cmp	x2, #8                      // use plain AES for
   1251  1.1  christos 	blo	.Lctr_enc_short             // small sizes
   1252  1.1  christos 
   1253  1.1  christos 	stp	x29, x30, [sp, #-80]!
   1254  1.1  christos 	stp	d8, d9, [sp, #16]
   1255  1.1  christos 	stp	d10, d11, [sp, #32]
   1256  1.1  christos 	stp	d12, d13, [sp, #48]
   1257  1.1  christos 	stp	d14, d15, [sp, #64]
   1258  1.1  christos 
   1259  1.1  christos 	ldr	w15, [x3, #240]             // get # of rounds
   1260  1.1  christos 	mov	x14, sp
   1261  1.1  christos 
   1262  1.1  christos         // allocate the key schedule on the stack
   1263  1.1  christos 	add	x17, sp, #96
   1264  1.1  christos 	sub	x17, x17, x15, lsl #7       // 128 bytes per inner round key, less 96 bytes
   1265  1.1  christos 
   1266  1.1  christos         // populate the key schedule
   1267  1.1  christos 	mov	x9, x3                      // pass key
   1268  1.1  christos 	mov	x10, x15                    // pass # of rounds
   1269  1.1  christos 	mov	sp, x17                     // sp is sp
   1270  1.1  christos 	bl	_bsaes_key_convert
   1271  1.1  christos 	eor	v7.16b, v7.16b, v15.16b     // fix up last round key
   1272  1.1  christos 	str	q7, [x17]                   // save last round key
   1273  1.1  christos 
   1274  1.1  christos 	ldr	q0, [x4]                    // load counter
   1275  1.1  christos 	add	x13, x11, #.LREVM0SR-.LM0_bigendian
   1276  1.1  christos 	ldr	q4, [sp]                    // load round0 key
   1277  1.1  christos 
   1278  1.1  christos 	movi	v8.4s, #1                   // compose 1<<96
   1279  1.1  christos 	movi	v9.16b, #0
   1280  1.1  christos 	rev32	v15.16b, v0.16b
   1281  1.1  christos 	rev32	v0.16b, v0.16b
   1282  1.1  christos 	ext	v11.16b, v9.16b, v8.16b, #4
   1283  1.1  christos 	rev32	v4.16b, v4.16b
   1284  1.1  christos 	add	v12.4s, v11.4s, v11.4s      // compose 2<<96
   1285  1.1  christos 	str	q4, [sp]                    // save adjusted round0 key
   1286  1.1  christos 	add	v13.4s, v11.4s, v12.4s      // compose 3<<96
   1287  1.1  christos 	add	v14.4s, v12.4s, v12.4s      // compose 4<<96
   1288  1.1  christos 	b	.Lctr_enc_loop
   1289  1.1  christos 
   1290  1.1  christos .align	4
   1291  1.1  christos .Lctr_enc_loop:
   1292  1.1  christos         // Intermix prologue from _bsaes_encrypt8 to use the opportunity
   1293  1.1  christos         // to flip byte order in 32-bit counter
   1294  1.1  christos 
   1295  1.1  christos 	add	v1.4s, v15.4s, v11.4s       // +1
   1296  1.1  christos 	add	x9, sp, #0x10               // pass next round key
   1297  1.1  christos 	add	v2.4s, v15.4s, v12.4s       // +2
   1298  1.1  christos 	ldr	q9, [x13]                   // .LREVM0SR
   1299  1.1  christos 	ldr	q8, [sp]                    // load round0 key
   1300  1.1  christos 	add	v3.4s, v15.4s, v13.4s       // +3
   1301  1.1  christos 	mov	x10, x15                    // pass rounds
   1302  1.1  christos 	sub	x11, x13, #.LREVM0SR-.LSR   // pass constants
   1303  1.1  christos 	add	v6.4s, v2.4s, v14.4s
   1304  1.1  christos 	add	v4.4s, v15.4s, v14.4s       // +4
   1305  1.1  christos 	add	v7.4s, v3.4s, v14.4s
   1306  1.1  christos 	add	v15.4s, v4.4s, v14.4s       // next counter
   1307  1.1  christos 	add	v5.4s, v1.4s, v14.4s
   1308  1.1  christos 
   1309  1.1  christos 	bl	_bsaes_encrypt8_alt
   1310  1.1  christos 
   1311  1.1  christos 	subs	x2, x2, #8
   1312  1.1  christos 	blo	.Lctr_enc_loop_done
   1313  1.1  christos 
   1314  1.1  christos 	ldr	q16, [x0], #16
   1315  1.1  christos 	ldr	q17, [x0], #16
   1316  1.1  christos 	eor	v1.16b, v1.16b, v17.16b
   1317  1.1  christos 	ldr	q17, [x0], #16
   1318  1.1  christos 	eor	v0.16b, v0.16b, v16.16b
   1319  1.1  christos 	eor	v4.16b, v4.16b, v17.16b
   1320  1.1  christos 	str	q0, [x1], #16
   1321  1.1  christos 	ldr	q16, [x0], #16
   1322  1.1  christos 	str	q1, [x1], #16
   1323  1.1  christos 	mov	v0.16b, v15.16b
   1324  1.1  christos 	str	q4, [x1], #16
   1325  1.1  christos 	ldr	q1, [x0], #16
   1326  1.1  christos 	eor	v4.16b, v6.16b, v16.16b
   1327  1.1  christos 	eor	v1.16b, v3.16b, v1.16b
   1328  1.1  christos 	ldr	q3, [x0], #16
   1329  1.1  christos 	eor	v3.16b, v7.16b, v3.16b
   1330  1.1  christos 	ldr	q6, [x0], #16
   1331  1.1  christos 	eor	v2.16b, v2.16b, v6.16b
   1332  1.1  christos 	ldr	q6, [x0], #16
   1333  1.1  christos 	eor	v5.16b, v5.16b, v6.16b
   1334  1.1  christos 	str	q4, [x1], #16
   1335  1.1  christos 	str	q1, [x1], #16
   1336  1.1  christos 	str	q3, [x1], #16
   1337  1.1  christos 	str	q2, [x1], #16
   1338  1.1  christos 	str	q5, [x1], #16
   1339  1.1  christos 
   1340  1.1  christos 	bne	.Lctr_enc_loop
   1341  1.1  christos 	b	.Lctr_enc_done
   1342  1.1  christos 
   1343  1.1  christos .align	4
   1344  1.1  christos .Lctr_enc_loop_done:
   1345  1.1  christos 	add	x2, x2, #8
   1346  1.1  christos 	ldr	q16, [x0], #16              // load input
   1347  1.1  christos 	eor	v0.16b, v0.16b, v16.16b
   1348  1.1  christos 	str	q0, [x1], #16               // write output
   1349  1.1  christos 	cmp	x2, #2
   1350  1.1  christos 	blo	.Lctr_enc_done
   1351  1.1  christos 	ldr	q17, [x0], #16
   1352  1.1  christos 	eor	v1.16b, v1.16b, v17.16b
   1353  1.1  christos 	str	q1, [x1], #16
   1354  1.1  christos 	beq	.Lctr_enc_done
   1355  1.1  christos 	ldr	q18, [x0], #16
   1356  1.1  christos 	eor	v4.16b, v4.16b, v18.16b
   1357  1.1  christos 	str	q4, [x1], #16
   1358  1.1  christos 	cmp	x2, #4
   1359  1.1  christos 	blo	.Lctr_enc_done
   1360  1.1  christos 	ldr	q19, [x0], #16
   1361  1.1  christos 	eor	v6.16b, v6.16b, v19.16b
   1362  1.1  christos 	str	q6, [x1], #16
   1363  1.1  christos 	beq	.Lctr_enc_done
   1364  1.1  christos 	ldr	q20, [x0], #16
   1365  1.1  christos 	eor	v3.16b, v3.16b, v20.16b
   1366  1.1  christos 	str	q3, [x1], #16
   1367  1.1  christos 	cmp	x2, #6
   1368  1.1  christos 	blo	.Lctr_enc_done
   1369  1.1  christos 	ldr	q21, [x0], #16
   1370  1.1  christos 	eor	v7.16b, v7.16b, v21.16b
   1371  1.1  christos 	str	q7, [x1], #16
   1372  1.1  christos 	beq	.Lctr_enc_done
   1373  1.1  christos 	ldr	q22, [x0]
   1374  1.1  christos 	eor	v2.16b, v2.16b, v22.16b
   1375  1.1  christos 	str	q2, [x1], #16
   1376  1.1  christos 
   1377  1.1  christos .Lctr_enc_done:
   1378  1.1  christos 	movi	v0.16b, #0
   1379  1.1  christos 	movi	v1.16b, #0
   1380  1.1  christos .Lctr_enc_bzero:	//	wipe key schedule [if any]
   1381  1.1  christos 	stp	q0, q1, [sp], #32
   1382  1.1  christos 	cmp	sp, x14
   1383  1.1  christos 	bne	.Lctr_enc_bzero
   1384  1.1  christos 
   1385  1.1  christos 	ldp	d8, d9, [sp, #16]
   1386  1.1  christos 	ldp	d10, d11, [sp, #32]
   1387  1.1  christos 	ldp	d12, d13, [sp, #48]
   1388  1.1  christos 	ldp	d14, d15, [sp, #64]
   1389  1.1  christos 	ldp	x29, x30, [sp], #80
   1390  1.1  christos 	ret
   1391  1.1  christos 
   1392  1.1  christos .Lctr_enc_short:
   1393  1.1  christos 	stp	x29, x30, [sp, #-96]!
   1394  1.1  christos 	stp	x19, x20, [sp, #16]
   1395  1.1  christos 	stp	x21, x22, [sp, #32]
   1396  1.1  christos 	str	x23, [sp, #48]
   1397  1.1  christos 
   1398  1.1  christos 	mov	x19, x0                     // copy arguments
   1399  1.1  christos 	mov	x20, x1
   1400  1.1  christos 	mov	x21, x2
   1401  1.1  christos 	mov	x22, x3
   1402  1.1  christos 	ldr	w23, [x4, #12]              // load counter .LSW
   1403  1.1  christos 	ldr	q1, [x4]                    // load whole counter value
   1404  1.1  christos #ifdef __AARCH64EL__
   1405  1.1  christos 	rev	w23, w23
   1406  1.1  christos #endif
   1407  1.1  christos 	str	q1, [sp, #80]               // copy counter value
   1408  1.1  christos 
   1409  1.1  christos .Lctr_enc_short_loop:
   1410  1.1  christos 	add	x0, sp, #80                 // input counter value
   1411  1.1  christos 	add	x1, sp, #64                 // output on the stack
   1412  1.1  christos 	mov	x2, x22                     // key
   1413  1.1  christos 
   1414  1.1  christos 	bl	AES_encrypt
   1415  1.1  christos 
   1416  1.1  christos 	ldr	q0, [x19], #16              // load input
   1417  1.1  christos 	ldr	q1, [sp, #64]               // load encrypted counter
   1418  1.1  christos 	add	x23, x23, #1
   1419  1.1  christos #ifdef __AARCH64EL__
   1420  1.1  christos 	rev	w0, w23
   1421  1.1  christos 	str	w0, [sp, #80+12]            // next counter value
   1422  1.1  christos #else
   1423  1.1  christos 	str	w23, [sp, #80+12]           // next counter value
   1424  1.1  christos #endif
   1425  1.1  christos 	eor	v0.16b, v0.16b, v1.16b
   1426  1.1  christos 	str	q0, [x20], #16              // store output
   1427  1.1  christos 	subs	x21, x21, #1
   1428  1.1  christos 	bne	.Lctr_enc_short_loop
   1429  1.1  christos 
   1430  1.1  christos 	movi	v0.16b, #0
   1431  1.1  christos 	movi	v1.16b, #0
   1432  1.1  christos 	stp	q0, q1, [sp, #64]
   1433  1.1  christos 
   1434  1.1  christos 	ldr	x23, [sp, #48]
   1435  1.1  christos 	ldp	x21, x22, [sp, #32]
   1436  1.1  christos 	ldp	x19, x20, [sp, #16]
   1437  1.1  christos 	ldp	x29, x30, [sp], #96
   1438  1.1  christos 	ret
   1439  1.1  christos .size	ossl_bsaes_ctr32_encrypt_blocks,.-ossl_bsaes_ctr32_encrypt_blocks
   1440  1.1  christos 
   1441  1.1  christos .globl	ossl_bsaes_xts_encrypt
   1442  1.1  christos .type	ossl_bsaes_xts_encrypt,%function
   1443  1.1  christos .align	4
   1444  1.1  christos // On entry:
   1445  1.1  christos //   x0 -> input plaintext
   1446  1.1  christos //   x1 -> output ciphertext
   1447  1.1  christos //   x2 -> length of text in bytes (must be at least 16)
   1448  1.1  christos //   x3 -> key1 (used to encrypt the XORed plaintext blocks)
   1449  1.1  christos //   x4 -> key2 (used to encrypt the initial vector to yield the initial tweak)
   1450  1.1  christos //   x5 -> 16-byte initial vector (typically, sector number)
   1451  1.1  christos // On exit:
   1452  1.1  christos //   Output ciphertext filled in
   1453  1.1  christos //   No output registers, usual AAPCS64 register preservation
   1454  1.1  christos ossl_bsaes_xts_encrypt:
   1455  1.1  christos 	AARCH64_VALID_CALL_TARGET
   1456  1.1  christos         // Stack layout:
   1457  1.1  christos         // sp ->
   1458  1.1  christos         //        nrounds*128-96 bytes: key schedule
   1459  1.1  christos         // x19 ->
   1460  1.1  christos         //        16 bytes: frame record
   1461  1.1  christos         //        4*16 bytes: tweak storage across _bsaes_encrypt8
   1462  1.1  christos         //        6*8 bytes: storage for 5 callee-saved general-purpose registers
   1463  1.1  christos         //        8*8 bytes: storage for 8 callee-saved SIMD registers
   1464  1.1  christos 	stp	x29, x30, [sp, #-192]!
   1465  1.1  christos 	stp	x19, x20, [sp, #80]
   1466  1.1  christos 	stp	x21, x22, [sp, #96]
   1467  1.1  christos 	str	x23, [sp, #112]
   1468  1.1  christos 	stp	d8, d9, [sp, #128]
   1469  1.1  christos 	stp	d10, d11, [sp, #144]
   1470  1.1  christos 	stp	d12, d13, [sp, #160]
   1471  1.1  christos 	stp	d14, d15, [sp, #176]
   1472  1.1  christos 
   1473  1.1  christos 	mov	x19, sp
   1474  1.1  christos 	mov	x20, x0
   1475  1.1  christos 	mov	x21, x1
   1476  1.1  christos 	mov	x22, x2
   1477  1.1  christos 	mov	x23, x3
   1478  1.1  christos 
   1479  1.1  christos         // generate initial tweak
   1480  1.1  christos 	sub	sp, sp, #16
   1481  1.1  christos 	mov	x0, x5                      // iv[]
   1482  1.1  christos 	mov	x1, sp
   1483  1.1  christos 	mov	x2, x4                      // key2
   1484  1.1  christos 	bl	AES_encrypt
   1485  1.1  christos 	ldr	q11, [sp], #16
   1486  1.1  christos 
   1487  1.1  christos 	ldr	w1, [x23, #240]             // get # of rounds
   1488  1.1  christos         // allocate the key schedule on the stack
   1489  1.1  christos 	add	x17, sp, #96
   1490  1.1  christos 	sub	x17, x17, x1, lsl #7        // 128 bytes per inner round key, less 96 bytes
   1491  1.1  christos 
   1492  1.1  christos         // populate the key schedule
   1493  1.1  christos 	mov	x9, x23                     // pass key
   1494  1.1  christos 	mov	x10, x1                     // pass # of rounds
   1495  1.1  christos 	mov	sp, x17
   1496  1.1  christos 	bl	_bsaes_key_convert
   1497  1.1  christos 	eor	v15.16b, v15.16b, v7.16b    // fix up last round key
   1498  1.1  christos 	str	q15, [x17]                  // save last round key
   1499  1.1  christos 
   1500  1.1  christos 	subs	x22, x22, #0x80
   1501  1.1  christos 	blo	.Lxts_enc_short
   1502  1.1  christos 	b	.Lxts_enc_loop
   1503  1.1  christos 
   1504  1.1  christos .align	4
   1505  1.1  christos .Lxts_enc_loop:
   1506  1.1  christos 	ldr	q8, .Lxts_magic
   1507  1.1  christos 	mov	x10, x1                     // pass rounds
   1508  1.1  christos 	add	x2, x19, #16
   1509  1.1  christos 	ldr	q0, [x20], #16
   1510  1.1  christos 	sshr	v1.2d, v11.2d, #63
   1511  1.1  christos 	mov	x9, sp                      // pass key schedule
   1512  1.1  christos 	ldr	q6, .Lxts_magic+16
   1513  1.1  christos 	add	v2.2d, v11.2d, v11.2d
   1514  1.1  christos 	cmtst	v3.2d, v11.2d, v6.2d
   1515  1.1  christos 	and	v1.16b, v1.16b, v8.16b
   1516  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
   1517  1.1  christos 	and	v3.16b, v3.16b, v8.16b
   1518  1.1  christos 	ldr	q4, [x20], #16
   1519  1.1  christos 	eor	v12.16b, v2.16b, v1.16b
   1520  1.1  christos 	eor	v1.16b, v4.16b, v12.16b
   1521  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1522  1.1  christos 	cmtst	v2.2d, v12.2d, v6.2d
   1523  1.1  christos 	add	v4.2d, v12.2d, v12.2d
   1524  1.1  christos 	add	x0, x19, #16
   1525  1.1  christos 	ext	v3.16b, v3.16b, v3.16b, #8
   1526  1.1  christos 	and	v2.16b, v2.16b, v8.16b
   1527  1.1  christos 	eor	v13.16b, v4.16b, v3.16b
   1528  1.1  christos 	ldr	q3, [x20], #16
   1529  1.1  christos 	ext	v4.16b, v2.16b, v2.16b, #8
   1530  1.1  christos 	eor	v2.16b, v3.16b, v13.16b
   1531  1.1  christos 	ldr	q3, [x20], #16
   1532  1.1  christos 	add	v5.2d, v13.2d, v13.2d
   1533  1.1  christos 	cmtst	v7.2d, v13.2d, v6.2d
   1534  1.1  christos 	and	v7.16b, v7.16b, v8.16b
   1535  1.1  christos 	ldr	q9, [x20], #16
   1536  1.1  christos 	ext	v7.16b, v7.16b, v7.16b, #8
   1537  1.1  christos 	ldr	q10, [x20], #16
   1538  1.1  christos 	eor	v14.16b, v5.16b, v4.16b
   1539  1.1  christos 	ldr	q16, [x20], #16
   1540  1.1  christos 	add	v4.2d, v14.2d, v14.2d
   1541  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   1542  1.1  christos 	eor	v15.16b, v4.16b, v7.16b
   1543  1.1  christos 	add	v5.2d, v15.2d, v15.2d
   1544  1.1  christos 	ldr	q7, [x20], #16
   1545  1.1  christos 	cmtst	v4.2d, v14.2d, v6.2d
   1546  1.1  christos 	and	v17.16b, v4.16b, v8.16b
   1547  1.1  christos 	cmtst	v18.2d, v15.2d, v6.2d
   1548  1.1  christos 	eor	v4.16b, v9.16b, v15.16b
   1549  1.1  christos 	ext	v9.16b, v17.16b, v17.16b, #8
   1550  1.1  christos 	eor	v9.16b, v5.16b, v9.16b
   1551  1.1  christos 	add	v17.2d, v9.2d, v9.2d
   1552  1.1  christos 	and	v18.16b, v18.16b, v8.16b
   1553  1.1  christos 	eor	v5.16b, v10.16b, v9.16b
   1554  1.1  christos 	str	q9, [x2], #16
   1555  1.1  christos 	ext	v10.16b, v18.16b, v18.16b, #8
   1556  1.1  christos 	cmtst	v9.2d, v9.2d, v6.2d
   1557  1.1  christos 	and	v9.16b, v9.16b, v8.16b
   1558  1.1  christos 	eor	v10.16b, v17.16b, v10.16b
   1559  1.1  christos 	cmtst	v17.2d, v10.2d, v6.2d
   1560  1.1  christos 	eor	v6.16b, v16.16b, v10.16b
   1561  1.1  christos 	str	q10, [x2], #16
   1562  1.1  christos 	ext	v9.16b, v9.16b, v9.16b, #8
   1563  1.1  christos 	add	v10.2d, v10.2d, v10.2d
   1564  1.1  christos 	eor	v9.16b, v10.16b, v9.16b
   1565  1.1  christos 	str	q9, [x2], #16
   1566  1.1  christos 	eor	v7.16b, v7.16b, v9.16b
   1567  1.1  christos 	add	v9.2d, v9.2d, v9.2d
   1568  1.1  christos 	and	v8.16b, v17.16b, v8.16b
   1569  1.1  christos 	ext	v8.16b, v8.16b, v8.16b, #8
   1570  1.1  christos 	eor	v8.16b, v9.16b, v8.16b
   1571  1.1  christos 	str	q8, [x2]                    // next round tweak
   1572  1.1  christos 
   1573  1.1  christos 	bl	_bsaes_encrypt8
   1574  1.1  christos 
   1575  1.1  christos 	ldr	q8, [x0], #16
   1576  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1577  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1578  1.1  christos 	ldr	q9, [x0], #16
   1579  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1580  1.1  christos 	eor	v6.16b, v6.16b, v14.16b
   1581  1.1  christos 	ldr	q10, [x0], #16
   1582  1.1  christos 	eor	v3.16b, v3.16b, v15.16b
   1583  1.1  christos 	subs	x22, x22, #0x80
   1584  1.1  christos 	str	q0, [x21], #16
   1585  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   1586  1.1  christos 	str	q1, [x21], #16
   1587  1.1  christos 	eor	v0.16b, v7.16b, v8.16b
   1588  1.1  christos 	eor	v1.16b, v2.16b, v9.16b
   1589  1.1  christos 	str	q4, [x21], #16
   1590  1.1  christos 	eor	v2.16b, v5.16b, v10.16b
   1591  1.1  christos 	str	q6, [x21], #16
   1592  1.1  christos 	str	q3, [x21], #16
   1593  1.1  christos 	str	q0, [x21], #16
   1594  1.1  christos 	str	q1, [x21], #16
   1595  1.1  christos 	str	q2, [x21], #16
   1596  1.1  christos 	bpl	.Lxts_enc_loop
   1597  1.1  christos 
   1598  1.1  christos .Lxts_enc_short:
   1599  1.1  christos 	adds	x22, x22, #0x70
   1600  1.1  christos 	bmi	.Lxts_enc_done
   1601  1.1  christos 
   1602  1.1  christos 	ldr	q8, .Lxts_magic
   1603  1.1  christos 	sshr	v1.2d, v11.2d, #63
   1604  1.1  christos 	add	v2.2d, v11.2d, v11.2d
   1605  1.1  christos 	ldr	q9, .Lxts_magic+16
   1606  1.1  christos 	subs	x22, x22, #0x10
   1607  1.1  christos 	ldr	q0, [x20], #16
   1608  1.1  christos 	and	v1.16b, v1.16b, v8.16b
   1609  1.1  christos 	cmtst	v3.2d, v11.2d, v9.2d
   1610  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
   1611  1.1  christos 	and	v3.16b, v3.16b, v8.16b
   1612  1.1  christos 	eor	v12.16b, v2.16b, v1.16b
   1613  1.1  christos 	ext	v1.16b, v3.16b, v3.16b, #8
   1614  1.1  christos 	add	v2.2d, v12.2d, v12.2d
   1615  1.1  christos 	cmtst	v3.2d, v12.2d, v9.2d
   1616  1.1  christos 	eor	v13.16b, v2.16b, v1.16b
   1617  1.1  christos 	and	v22.16b, v3.16b, v8.16b
   1618  1.1  christos 	bmi	.Lxts_enc_1
   1619  1.1  christos 
   1620  1.1  christos 	ext	v2.16b, v22.16b, v22.16b, #8
   1621  1.1  christos 	add	v3.2d, v13.2d, v13.2d
   1622  1.1  christos 	ldr	q1, [x20], #16
   1623  1.1  christos 	cmtst	v4.2d, v13.2d, v9.2d
   1624  1.1  christos 	subs	x22, x22, #0x10
   1625  1.1  christos 	eor	v14.16b, v3.16b, v2.16b
   1626  1.1  christos 	and	v23.16b, v4.16b, v8.16b
   1627  1.1  christos 	bmi	.Lxts_enc_2
   1628  1.1  christos 
   1629  1.1  christos 	ext	v3.16b, v23.16b, v23.16b, #8
   1630  1.1  christos 	add	v4.2d, v14.2d, v14.2d
   1631  1.1  christos 	ldr	q2, [x20], #16
   1632  1.1  christos 	cmtst	v5.2d, v14.2d, v9.2d
   1633  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1634  1.1  christos 	subs	x22, x22, #0x10
   1635  1.1  christos 	eor	v15.16b, v4.16b, v3.16b
   1636  1.1  christos 	and	v24.16b, v5.16b, v8.16b
   1637  1.1  christos 	bmi	.Lxts_enc_3
   1638  1.1  christos 
   1639  1.1  christos 	ext	v4.16b, v24.16b, v24.16b, #8
   1640  1.1  christos 	add	v5.2d, v15.2d, v15.2d
   1641  1.1  christos 	ldr	q3, [x20], #16
   1642  1.1  christos 	cmtst	v6.2d, v15.2d, v9.2d
   1643  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1644  1.1  christos 	subs	x22, x22, #0x10
   1645  1.1  christos 	eor	v16.16b, v5.16b, v4.16b
   1646  1.1  christos 	and	v25.16b, v6.16b, v8.16b
   1647  1.1  christos 	bmi	.Lxts_enc_4
   1648  1.1  christos 
   1649  1.1  christos 	ext	v5.16b, v25.16b, v25.16b, #8
   1650  1.1  christos 	add	v6.2d, v16.2d, v16.2d
   1651  1.1  christos 	add	x0, x19, #16
   1652  1.1  christos 	cmtst	v7.2d, v16.2d, v9.2d
   1653  1.1  christos 	ldr	q4, [x20], #16
   1654  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   1655  1.1  christos 	str	q16, [x0], #16
   1656  1.1  christos 	subs	x22, x22, #0x10
   1657  1.1  christos 	eor	v17.16b, v6.16b, v5.16b
   1658  1.1  christos 	and	v26.16b, v7.16b, v8.16b
   1659  1.1  christos 	bmi	.Lxts_enc_5
   1660  1.1  christos 
   1661  1.1  christos 	ext	v7.16b, v26.16b, v26.16b, #8
   1662  1.1  christos 	add	v18.2d, v17.2d, v17.2d
   1663  1.1  christos 	ldr	q5, [x20], #16
   1664  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   1665  1.1  christos 	str	q17, [x0], #16
   1666  1.1  christos 	subs	x22, x22, #0x10
   1667  1.1  christos 	eor	v18.16b, v18.16b, v7.16b
   1668  1.1  christos 	bmi	.Lxts_enc_6
   1669  1.1  christos 
   1670  1.1  christos 	ldr	q6, [x20], #16
   1671  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   1672  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
   1673  1.1  christos 	str	q18, [x0]                   // next round tweak
   1674  1.1  christos 	mov	x9, sp                      // pass key schedule
   1675  1.1  christos 	mov	x10, x1
   1676  1.1  christos 	add	x0, x19, #16
   1677  1.1  christos 	sub	x22, x22, #0x10
   1678  1.1  christos 	eor	v6.16b, v6.16b, v17.16b
   1679  1.1  christos 
   1680  1.1  christos 	bl	_bsaes_encrypt8
   1681  1.1  christos 
   1682  1.1  christos 	ldr	q16, [x0], #16
   1683  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1684  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1685  1.1  christos 	ldr	q17, [x0], #16
   1686  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1687  1.1  christos 	eor	v6.16b, v6.16b, v14.16b
   1688  1.1  christos 	eor	v3.16b, v3.16b, v15.16b
   1689  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   1690  1.1  christos 	str	q0, [x21], #16
   1691  1.1  christos 	str	q1, [x21], #16
   1692  1.1  christos 	eor	v0.16b, v7.16b, v16.16b
   1693  1.1  christos 	eor	v1.16b, v2.16b, v17.16b
   1694  1.1  christos 	str	q4, [x21], #16
   1695  1.1  christos 	str	q6, [x21], #16
   1696  1.1  christos 	str	q3, [x21], #16
   1697  1.1  christos 	str	q0, [x21], #16
   1698  1.1  christos 	str	q1, [x21], #16
   1699  1.1  christos 	b	.Lxts_enc_done
   1700  1.1  christos 
   1701  1.1  christos .align	4
   1702  1.1  christos .Lxts_enc_6:
   1703  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   1704  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
   1705  1.1  christos 	mov	x9, sp                      // pass key schedule
   1706  1.1  christos 	mov	x10, x1                     // pass rounds
   1707  1.1  christos 	add	x0, x19, #16
   1708  1.1  christos 
   1709  1.1  christos 	bl	_bsaes_encrypt8
   1710  1.1  christos 
   1711  1.1  christos 	ldr	q16, [x0], #16
   1712  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1713  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1714  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1715  1.1  christos 	eor	v6.16b, v6.16b, v14.16b
   1716  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   1717  1.1  christos 	eor	v3.16b, v3.16b, v15.16b
   1718  1.1  christos 	str	q0, [x21], #16
   1719  1.1  christos 	str	q1, [x21], #16
   1720  1.1  christos 	eor	v0.16b, v7.16b, v16.16b
   1721  1.1  christos 	str	q4, [x21], #16
   1722  1.1  christos 	str	q6, [x21], #16
   1723  1.1  christos 	str	q3, [x21], #16
   1724  1.1  christos 	str	q0, [x21], #16
   1725  1.1  christos 	b	.Lxts_enc_done
   1726  1.1  christos 
   1727  1.1  christos .align	4
   1728  1.1  christos .Lxts_enc_5:
   1729  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   1730  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   1731  1.1  christos 	mov	x9, sp                      // pass key schedule
   1732  1.1  christos 	mov	x10, x1                     // pass rounds
   1733  1.1  christos 	add	x0, x19, #16
   1734  1.1  christos 
   1735  1.1  christos 	bl	_bsaes_encrypt8
   1736  1.1  christos 
   1737  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1738  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1739  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   1740  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1741  1.1  christos 	eor	v6.16b, v6.16b, v14.16b
   1742  1.1  christos 	eor	v3.16b, v3.16b, v15.16b
   1743  1.1  christos 	str	q0, [x21], #16
   1744  1.1  christos 	str	q1, [x21], #16
   1745  1.1  christos 	str	q4, [x21], #16
   1746  1.1  christos 	str	q6, [x21], #16
   1747  1.1  christos 	str	q3, [x21], #16
   1748  1.1  christos 	b	.Lxts_enc_done
   1749  1.1  christos 
   1750  1.1  christos .align	4
   1751  1.1  christos .Lxts_enc_4:
   1752  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   1753  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   1754  1.1  christos 	mov	x9, sp                      // pass key schedule
   1755  1.1  christos 	mov	x10, x1                     // pass rounds
   1756  1.1  christos 	add	x0, x19, #16
   1757  1.1  christos 
   1758  1.1  christos 	bl	_bsaes_encrypt8
   1759  1.1  christos 
   1760  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1761  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1762  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1763  1.1  christos 	eor	v6.16b, v6.16b, v14.16b
   1764  1.1  christos 	mov	v11.16b, v15.16b            // next round tweak
   1765  1.1  christos 	str	q0, [x21], #16
   1766  1.1  christos 	str	q1, [x21], #16
   1767  1.1  christos 	str	q4, [x21], #16
   1768  1.1  christos 	str	q6, [x21], #16
   1769  1.1  christos 	b	.Lxts_enc_done
   1770  1.1  christos 
   1771  1.1  christos .align	4
   1772  1.1  christos .Lxts_enc_3:
   1773  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1774  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   1775  1.1  christos 	mov	x9, sp                      // pass key schedule
   1776  1.1  christos 	mov	x10, x1                     // pass rounds
   1777  1.1  christos 	add	x0, x19, #16
   1778  1.1  christos 
   1779  1.1  christos 	bl	_bsaes_encrypt8
   1780  1.1  christos 
   1781  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1782  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1783  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1784  1.1  christos 	mov	v11.16b, v14.16b            // next round tweak
   1785  1.1  christos 	str	q0, [x21], #16
   1786  1.1  christos 	str	q1, [x21], #16
   1787  1.1  christos 	str	q4, [x21], #16
   1788  1.1  christos 	b	.Lxts_enc_done
   1789  1.1  christos 
   1790  1.1  christos .align	4
   1791  1.1  christos .Lxts_enc_2:
   1792  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1793  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1794  1.1  christos 	mov	x9, sp                      // pass key schedule
   1795  1.1  christos 	mov	x10, x1                     // pass rounds
   1796  1.1  christos 	add	x0, x19, #16
   1797  1.1  christos 
   1798  1.1  christos 	bl	_bsaes_encrypt8
   1799  1.1  christos 
   1800  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1801  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1802  1.1  christos 	mov	v11.16b, v13.16b            // next round tweak
   1803  1.1  christos 	str	q0, [x21], #16
   1804  1.1  christos 	str	q1, [x21], #16
   1805  1.1  christos 	b	.Lxts_enc_done
   1806  1.1  christos 
   1807  1.1  christos .align	4
   1808  1.1  christos .Lxts_enc_1:
   1809  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1810  1.1  christos 	sub	x0, sp, #16
   1811  1.1  christos 	sub	x1, sp, #16
   1812  1.1  christos 	mov	x2, x23
   1813  1.1  christos 	mov	v13.d[0], v11.d[1]          // just in case AES_encrypt corrupts top half of callee-saved SIMD registers
   1814  1.1  christos 	mov	v14.d[0], v12.d[1]
   1815  1.1  christos 	str	q0, [sp, #-16]!
   1816  1.1  christos 
   1817  1.1  christos 	bl	AES_encrypt
   1818  1.1  christos 
   1819  1.1  christos 	ldr	q0, [sp], #16
   1820  1.1  christos 	trn1	v13.2d, v11.2d, v13.2d
   1821  1.1  christos 	trn1	v11.2d, v12.2d, v14.2d      // next round tweak
   1822  1.1  christos 	eor	v0.16b, v0.16b, v13.16b
   1823  1.1  christos 	str	q0, [x21], #16
   1824  1.1  christos 
   1825  1.1  christos .Lxts_enc_done:
   1826  1.1  christos 	adds	x22, x22, #0x10
   1827  1.1  christos 	beq	.Lxts_enc_ret
   1828  1.1  christos 
   1829  1.1  christos 	sub	x6, x21, #0x10
   1830  1.1  christos         // Penultimate plaintext block produces final ciphertext part-block
   1831  1.1  christos         // plus remaining part of final plaintext block. Move ciphertext part
   1832  1.1  christos         // to final position and reuse penultimate ciphertext block buffer to
   1833  1.1  christos         // construct final plaintext block
   1834  1.1  christos .Lxts_enc_steal:
   1835  1.1  christos 	ldrb	w0, [x20], #1
   1836  1.1  christos 	ldrb	w1, [x21, #-0x10]
   1837  1.1  christos 	strb	w0, [x21, #-0x10]
   1838  1.1  christos 	strb	w1, [x21], #1
   1839  1.1  christos 
   1840  1.1  christos 	subs	x22, x22, #1
   1841  1.1  christos 	bhi	.Lxts_enc_steal
   1842  1.1  christos 
   1843  1.1  christos         // Finally encrypt the penultimate ciphertext block using the
   1844  1.1  christos         // last tweak
   1845  1.1  christos 	ldr	q0, [x6]
   1846  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1847  1.1  christos 	str	q0, [sp, #-16]!
   1848  1.1  christos 	mov	x0, sp
   1849  1.1  christos 	mov	x1, sp
   1850  1.1  christos 	mov	x2, x23
   1851  1.1  christos 	mov	x21, x6
   1852  1.1  christos 	mov	v13.d[0], v11.d[1]          // just in case AES_encrypt corrupts top half of callee-saved SIMD registers
   1853  1.1  christos 
   1854  1.1  christos 	bl	AES_encrypt
   1855  1.1  christos 
   1856  1.1  christos 	trn1	v11.2d, v11.2d, v13.2d
   1857  1.1  christos 	ldr	q0, [sp], #16
   1858  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1859  1.1  christos 	str	q0, [x21]
   1860  1.1  christos 
   1861  1.1  christos .Lxts_enc_ret:
   1862  1.1  christos 
   1863  1.1  christos 	movi	v0.16b, #0
   1864  1.1  christos 	movi	v1.16b, #0
   1865  1.1  christos .Lxts_enc_bzero:	//	wipe key schedule
   1866  1.1  christos 	stp	q0, q1, [sp], #32
   1867  1.1  christos 	cmp	sp, x19
   1868  1.1  christos 	bne	.Lxts_enc_bzero
   1869  1.1  christos 
   1870  1.1  christos 	ldp	x19, x20, [sp, #80]
   1871  1.1  christos 	ldp	x21, x22, [sp, #96]
   1872  1.1  christos 	ldr	x23, [sp, #112]
   1873  1.1  christos 	ldp	d8, d9, [sp, #128]
   1874  1.1  christos 	ldp	d10, d11, [sp, #144]
   1875  1.1  christos 	ldp	d12, d13, [sp, #160]
   1876  1.1  christos 	ldp	d14, d15, [sp, #176]
   1877  1.1  christos 	ldp	x29, x30, [sp], #192
   1878  1.1  christos 	ret
   1879  1.1  christos .size	ossl_bsaes_xts_encrypt,.-ossl_bsaes_xts_encrypt
   1880  1.1  christos 
   1881  1.1  christos // The assembler doesn't seem capable of de-duplicating these when expressed
   1882  1.1  christos // using `ldr qd,=` syntax, so assign a symbolic address
   1883  1.1  christos .align	5
   1884  1.1  christos .Lxts_magic:
   1885  1.1  christos .quad	1, 0x87, 0x4000000000000000, 0x4000000000000000
   1886  1.1  christos 
   1887  1.1  christos .globl	ossl_bsaes_xts_decrypt
   1888  1.1  christos .type	ossl_bsaes_xts_decrypt,%function
   1889  1.1  christos .align	4
   1890  1.1  christos // On entry:
   1891  1.1  christos //   x0 -> input ciphertext
   1892  1.1  christos //   x1 -> output plaintext
   1893  1.1  christos //   x2 -> length of text in bytes (must be at least 16)
   1894  1.1  christos //   x3 -> key1 (used to decrypt the XORed ciphertext blocks)
   1895  1.1  christos //   x4 -> key2 (used to encrypt the initial vector to yield the initial tweak)
   1896  1.1  christos //   x5 -> 16-byte initial vector (typically, sector number)
   1897  1.1  christos // On exit:
   1898  1.1  christos //   Output plaintext filled in
   1899  1.1  christos //   No output registers, usual AAPCS64 register preservation
   1900  1.1  christos ossl_bsaes_xts_decrypt:
   1901  1.1  christos 	AARCH64_VALID_CALL_TARGET
   1902  1.1  christos         // Stack layout:
   1903  1.1  christos         // sp ->
   1904  1.1  christos         //        nrounds*128-96 bytes: key schedule
   1905  1.1  christos         // x19 ->
   1906  1.1  christos         //        16 bytes: frame record
   1907  1.1  christos         //        4*16 bytes: tweak storage across _bsaes_decrypt8
   1908  1.1  christos         //        6*8 bytes: storage for 5 callee-saved general-purpose registers
   1909  1.1  christos         //        8*8 bytes: storage for 8 callee-saved SIMD registers
   1910  1.1  christos 	stp	x29, x30, [sp, #-192]!
   1911  1.1  christos 	stp	x19, x20, [sp, #80]
   1912  1.1  christos 	stp	x21, x22, [sp, #96]
   1913  1.1  christos 	str	x23, [sp, #112]
   1914  1.1  christos 	stp	d8, d9, [sp, #128]
   1915  1.1  christos 	stp	d10, d11, [sp, #144]
   1916  1.1  christos 	stp	d12, d13, [sp, #160]
   1917  1.1  christos 	stp	d14, d15, [sp, #176]
   1918  1.1  christos 
   1919  1.1  christos 	mov	x19, sp
   1920  1.1  christos 	mov	x20, x0
   1921  1.1  christos 	mov	x21, x1
   1922  1.1  christos 	mov	x22, x2
   1923  1.1  christos 	mov	x23, x3
   1924  1.1  christos 
   1925  1.1  christos         // generate initial tweak
   1926  1.1  christos 	sub	sp, sp, #16
   1927  1.1  christos 	mov	x0, x5                      // iv[]
   1928  1.1  christos 	mov	x1, sp
   1929  1.1  christos 	mov	x2, x4                      // key2
   1930  1.1  christos 	bl	AES_encrypt
   1931  1.1  christos 	ldr	q11, [sp], #16
   1932  1.1  christos 
   1933  1.1  christos 	ldr	w1, [x23, #240]             // get # of rounds
   1934  1.1  christos         // allocate the key schedule on the stack
   1935  1.1  christos 	add	x17, sp, #96
   1936  1.1  christos 	sub	x17, x17, x1, lsl #7        // 128 bytes per inner round key, less 96 bytes
   1937  1.1  christos 
   1938  1.1  christos         // populate the key schedule
   1939  1.1  christos 	mov	x9, x23                     // pass key
   1940  1.1  christos 	mov	x10, x1                     // pass # of rounds
   1941  1.1  christos 	mov	sp, x17
   1942  1.1  christos 	bl	_bsaes_key_convert
   1943  1.1  christos 	ldr	q6,  [sp]
   1944  1.1  christos 	str	q15, [x17]                  // save last round key
   1945  1.1  christos 	eor	v6.16b, v6.16b, v7.16b      // fix up round 0 key (by XORing with 0x63)
   1946  1.1  christos 	str	q6, [sp]
   1947  1.1  christos 
   1948  1.1  christos 	sub	x30, x22, #0x10
   1949  1.1  christos 	tst	x22, #0xf                   // if not multiple of 16
   1950  1.1  christos 	csel	x22, x30, x22, ne           // subtract another 16 bytes
   1951  1.1  christos 	subs	x22, x22, #0x80
   1952  1.1  christos 
   1953  1.1  christos 	blo	.Lxts_dec_short
   1954  1.1  christos 	b	.Lxts_dec_loop
   1955  1.1  christos 
   1956  1.1  christos .align	4
   1957  1.1  christos .Lxts_dec_loop:
   1958  1.1  christos 	ldr	q8, .Lxts_magic
   1959  1.1  christos 	mov	x10, x1                     // pass rounds
   1960  1.1  christos 	add	x2, x19, #16
   1961  1.1  christos 	ldr	q0, [x20], #16
   1962  1.1  christos 	sshr	v1.2d, v11.2d, #63
   1963  1.1  christos 	mov	x9, sp                      // pass key schedule
   1964  1.1  christos 	ldr	q6, .Lxts_magic+16
   1965  1.1  christos 	add	v2.2d, v11.2d, v11.2d
   1966  1.1  christos 	cmtst	v3.2d, v11.2d, v6.2d
   1967  1.1  christos 	and	v1.16b, v1.16b, v8.16b
   1968  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
   1969  1.1  christos 	and	v3.16b, v3.16b, v8.16b
   1970  1.1  christos 	ldr	q4, [x20], #16
   1971  1.1  christos 	eor	v12.16b, v2.16b, v1.16b
   1972  1.1  christos 	eor	v1.16b, v4.16b, v12.16b
   1973  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1974  1.1  christos 	cmtst	v2.2d, v12.2d, v6.2d
   1975  1.1  christos 	add	v4.2d, v12.2d, v12.2d
   1976  1.1  christos 	add	x0, x19, #16
   1977  1.1  christos 	ext	v3.16b, v3.16b, v3.16b, #8
   1978  1.1  christos 	and	v2.16b, v2.16b, v8.16b
   1979  1.1  christos 	eor	v13.16b, v4.16b, v3.16b
   1980  1.1  christos 	ldr	q3, [x20], #16
   1981  1.1  christos 	ext	v4.16b, v2.16b, v2.16b, #8
   1982  1.1  christos 	eor	v2.16b, v3.16b, v13.16b
   1983  1.1  christos 	ldr	q3, [x20], #16
   1984  1.1  christos 	add	v5.2d, v13.2d, v13.2d
   1985  1.1  christos 	cmtst	v7.2d, v13.2d, v6.2d
   1986  1.1  christos 	and	v7.16b, v7.16b, v8.16b
   1987  1.1  christos 	ldr	q9, [x20], #16
   1988  1.1  christos 	ext	v7.16b, v7.16b, v7.16b, #8
   1989  1.1  christos 	ldr	q10, [x20], #16
   1990  1.1  christos 	eor	v14.16b, v5.16b, v4.16b
   1991  1.1  christos 	ldr	q16, [x20], #16
   1992  1.1  christos 	add	v4.2d, v14.2d, v14.2d
   1993  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   1994  1.1  christos 	eor	v15.16b, v4.16b, v7.16b
   1995  1.1  christos 	add	v5.2d, v15.2d, v15.2d
   1996  1.1  christos 	ldr	q7, [x20], #16
   1997  1.1  christos 	cmtst	v4.2d, v14.2d, v6.2d
   1998  1.1  christos 	and	v17.16b, v4.16b, v8.16b
   1999  1.1  christos 	cmtst	v18.2d, v15.2d, v6.2d
   2000  1.1  christos 	eor	v4.16b, v9.16b, v15.16b
   2001  1.1  christos 	ext	v9.16b, v17.16b, v17.16b, #8
   2002  1.1  christos 	eor	v9.16b, v5.16b, v9.16b
   2003  1.1  christos 	add	v17.2d, v9.2d, v9.2d
   2004  1.1  christos 	and	v18.16b, v18.16b, v8.16b
   2005  1.1  christos 	eor	v5.16b, v10.16b, v9.16b
   2006  1.1  christos 	str	q9, [x2], #16
   2007  1.1  christos 	ext	v10.16b, v18.16b, v18.16b, #8
   2008  1.1  christos 	cmtst	v9.2d, v9.2d, v6.2d
   2009  1.1  christos 	and	v9.16b, v9.16b, v8.16b
   2010  1.1  christos 	eor	v10.16b, v17.16b, v10.16b
   2011  1.1  christos 	cmtst	v17.2d, v10.2d, v6.2d
   2012  1.1  christos 	eor	v6.16b, v16.16b, v10.16b
   2013  1.1  christos 	str	q10, [x2], #16
   2014  1.1  christos 	ext	v9.16b, v9.16b, v9.16b, #8
   2015  1.1  christos 	add	v10.2d, v10.2d, v10.2d
   2016  1.1  christos 	eor	v9.16b, v10.16b, v9.16b
   2017  1.1  christos 	str	q9, [x2], #16
   2018  1.1  christos 	eor	v7.16b, v7.16b, v9.16b
   2019  1.1  christos 	add	v9.2d, v9.2d, v9.2d
   2020  1.1  christos 	and	v8.16b, v17.16b, v8.16b
   2021  1.1  christos 	ext	v8.16b, v8.16b, v8.16b, #8
   2022  1.1  christos 	eor	v8.16b, v9.16b, v8.16b
   2023  1.1  christos 	str	q8, [x2]                    // next round tweak
   2024  1.1  christos 
   2025  1.1  christos 	bl	_bsaes_decrypt8
   2026  1.1  christos 
   2027  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2028  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2029  1.1  christos 	ldr	q8, [x0], #16
   2030  1.1  christos 	eor	v7.16b, v7.16b, v8.16b
   2031  1.1  christos 	str	q0, [x21], #16
   2032  1.1  christos 	eor	v0.16b, v1.16b, v12.16b
   2033  1.1  christos 	ldr	q1, [x0], #16
   2034  1.1  christos 	eor	v1.16b, v3.16b, v1.16b
   2035  1.1  christos 	subs	x22, x22, #0x80
   2036  1.1  christos 	eor	v2.16b, v2.16b, v15.16b
   2037  1.1  christos 	eor	v3.16b, v4.16b, v14.16b
   2038  1.1  christos 	ldr	q4, [x0], #16
   2039  1.1  christos 	str	q0, [x21], #16
   2040  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   2041  1.1  christos 	eor	v0.16b, v5.16b, v4.16b
   2042  1.1  christos 	str	q6, [x21], #16
   2043  1.1  christos 	str	q3, [x21], #16
   2044  1.1  christos 	str	q2, [x21], #16
   2045  1.1  christos 	str	q7, [x21], #16
   2046  1.1  christos 	str	q1, [x21], #16
   2047  1.1  christos 	str	q0, [x21], #16
   2048  1.1  christos 	bpl	.Lxts_dec_loop
   2049  1.1  christos 
   2050  1.1  christos .Lxts_dec_short:
   2051  1.1  christos 	adds	x22, x22, #0x70
   2052  1.1  christos 	bmi	.Lxts_dec_done
   2053  1.1  christos 
   2054  1.1  christos 	ldr	q8, .Lxts_magic
   2055  1.1  christos 	sshr	v1.2d, v11.2d, #63
   2056  1.1  christos 	add	v2.2d, v11.2d, v11.2d
   2057  1.1  christos 	ldr	q9, .Lxts_magic+16
   2058  1.1  christos 	subs	x22, x22, #0x10
   2059  1.1  christos 	ldr	q0, [x20], #16
   2060  1.1  christos 	and	v1.16b, v1.16b, v8.16b
   2061  1.1  christos 	cmtst	v3.2d, v11.2d, v9.2d
   2062  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
   2063  1.1  christos 	and	v3.16b, v3.16b, v8.16b
   2064  1.1  christos 	eor	v12.16b, v2.16b, v1.16b
   2065  1.1  christos 	ext	v1.16b, v3.16b, v3.16b, #8
   2066  1.1  christos 	add	v2.2d, v12.2d, v12.2d
   2067  1.1  christos 	cmtst	v3.2d, v12.2d, v9.2d
   2068  1.1  christos 	eor	v13.16b, v2.16b, v1.16b
   2069  1.1  christos 	and	v22.16b, v3.16b, v8.16b
   2070  1.1  christos 	bmi	.Lxts_dec_1
   2071  1.1  christos 
   2072  1.1  christos 	ext	v2.16b, v22.16b, v22.16b, #8
   2073  1.1  christos 	add	v3.2d, v13.2d, v13.2d
   2074  1.1  christos 	ldr	q1, [x20], #16
   2075  1.1  christos 	cmtst	v4.2d, v13.2d, v9.2d
   2076  1.1  christos 	subs	x22, x22, #0x10
   2077  1.1  christos 	eor	v14.16b, v3.16b, v2.16b
   2078  1.1  christos 	and	v23.16b, v4.16b, v8.16b
   2079  1.1  christos 	bmi	.Lxts_dec_2
   2080  1.1  christos 
   2081  1.1  christos 	ext	v3.16b, v23.16b, v23.16b, #8
   2082  1.1  christos 	add	v4.2d, v14.2d, v14.2d
   2083  1.1  christos 	ldr	q2, [x20], #16
   2084  1.1  christos 	cmtst	v5.2d, v14.2d, v9.2d
   2085  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2086  1.1  christos 	subs	x22, x22, #0x10
   2087  1.1  christos 	eor	v15.16b, v4.16b, v3.16b
   2088  1.1  christos 	and	v24.16b, v5.16b, v8.16b
   2089  1.1  christos 	bmi	.Lxts_dec_3
   2090  1.1  christos 
   2091  1.1  christos 	ext	v4.16b, v24.16b, v24.16b, #8
   2092  1.1  christos 	add	v5.2d, v15.2d, v15.2d
   2093  1.1  christos 	ldr	q3, [x20], #16
   2094  1.1  christos 	cmtst	v6.2d, v15.2d, v9.2d
   2095  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2096  1.1  christos 	subs	x22, x22, #0x10
   2097  1.1  christos 	eor	v16.16b, v5.16b, v4.16b
   2098  1.1  christos 	and	v25.16b, v6.16b, v8.16b
   2099  1.1  christos 	bmi	.Lxts_dec_4
   2100  1.1  christos 
   2101  1.1  christos 	ext	v5.16b, v25.16b, v25.16b, #8
   2102  1.1  christos 	add	v6.2d, v16.2d, v16.2d
   2103  1.1  christos 	add	x0, x19, #16
   2104  1.1  christos 	cmtst	v7.2d, v16.2d, v9.2d
   2105  1.1  christos 	ldr	q4, [x20], #16
   2106  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   2107  1.1  christos 	str	q16, [x0], #16
   2108  1.1  christos 	subs	x22, x22, #0x10
   2109  1.1  christos 	eor	v17.16b, v6.16b, v5.16b
   2110  1.1  christos 	and	v26.16b, v7.16b, v8.16b
   2111  1.1  christos 	bmi	.Lxts_dec_5
   2112  1.1  christos 
   2113  1.1  christos 	ext	v7.16b, v26.16b, v26.16b, #8
   2114  1.1  christos 	add	v18.2d, v17.2d, v17.2d
   2115  1.1  christos 	ldr	q5, [x20], #16
   2116  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   2117  1.1  christos 	str	q17, [x0], #16
   2118  1.1  christos 	subs	x22, x22, #0x10
   2119  1.1  christos 	eor	v18.16b, v18.16b, v7.16b
   2120  1.1  christos 	bmi	.Lxts_dec_6
   2121  1.1  christos 
   2122  1.1  christos 	ldr	q6, [x20], #16
   2123  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   2124  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
   2125  1.1  christos 	str	q18, [x0]                   // next round tweak
   2126  1.1  christos 	mov	x9, sp                      // pass key schedule
   2127  1.1  christos 	mov	x10, x1
   2128  1.1  christos 	add	x0, x19, #16
   2129  1.1  christos 	sub	x22, x22, #0x10
   2130  1.1  christos 	eor	v6.16b, v6.16b, v17.16b
   2131  1.1  christos 
   2132  1.1  christos 	bl	_bsaes_decrypt8
   2133  1.1  christos 
   2134  1.1  christos 	ldr	q16, [x0], #16
   2135  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2136  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2137  1.1  christos 	ldr	q17, [x0], #16
   2138  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2139  1.1  christos 	eor	v4.16b, v4.16b, v14.16b
   2140  1.1  christos 	eor	v2.16b, v2.16b, v15.16b
   2141  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   2142  1.1  christos 	str	q0, [x21], #16
   2143  1.1  christos 	str	q1, [x21], #16
   2144  1.1  christos 	eor	v0.16b, v7.16b, v16.16b
   2145  1.1  christos 	eor	v1.16b, v3.16b, v17.16b
   2146  1.1  christos 	str	q6, [x21], #16
   2147  1.1  christos 	str	q4, [x21], #16
   2148  1.1  christos 	str	q2, [x21], #16
   2149  1.1  christos 	str	q0, [x21], #16
   2150  1.1  christos 	str	q1, [x21], #16
   2151  1.1  christos 	b	.Lxts_dec_done
   2152  1.1  christos 
   2153  1.1  christos .align	4
   2154  1.1  christos .Lxts_dec_6:
   2155  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   2156  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
   2157  1.1  christos 	mov	x9, sp                      // pass key schedule
   2158  1.1  christos 	mov	x10, x1                     // pass rounds
   2159  1.1  christos 	add	x0, x19, #16
   2160  1.1  christos 
   2161  1.1  christos 	bl	_bsaes_decrypt8
   2162  1.1  christos 
   2163  1.1  christos 	ldr	q16, [x0], #16
   2164  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2165  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2166  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2167  1.1  christos 	eor	v4.16b, v4.16b, v14.16b
   2168  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   2169  1.1  christos 	eor	v2.16b, v2.16b, v15.16b
   2170  1.1  christos 	str	q0, [x21], #16
   2171  1.1  christos 	str	q1, [x21], #16
   2172  1.1  christos 	eor	v0.16b, v7.16b, v16.16b
   2173  1.1  christos 	str	q6, [x21], #16
   2174  1.1  christos 	str	q4, [x21], #16
   2175  1.1  christos 	str	q2, [x21], #16
   2176  1.1  christos 	str	q0, [x21], #16
   2177  1.1  christos 	b	.Lxts_dec_done
   2178  1.1  christos 
   2179  1.1  christos .align	4
   2180  1.1  christos .Lxts_dec_5:
   2181  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   2182  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   2183  1.1  christos 	mov	x9, sp                      // pass key schedule
   2184  1.1  christos 	mov	x10, x1                     // pass rounds
   2185  1.1  christos 	add	x0, x19, #16
   2186  1.1  christos 
   2187  1.1  christos 	bl	_bsaes_decrypt8
   2188  1.1  christos 
   2189  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2190  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2191  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   2192  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2193  1.1  christos 	eor	v4.16b, v4.16b, v14.16b
   2194  1.1  christos 	eor	v2.16b, v2.16b, v15.16b
   2195  1.1  christos 	str	q0, [x21], #16
   2196  1.1  christos 	str	q1, [x21], #16
   2197  1.1  christos 	str	q6, [x21], #16
   2198  1.1  christos 	str	q4, [x21], #16
   2199  1.1  christos 	str	q2, [x21], #16
   2200  1.1  christos 	b	.Lxts_dec_done
   2201  1.1  christos 
   2202  1.1  christos .align	4
   2203  1.1  christos .Lxts_dec_4:
   2204  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   2205  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   2206  1.1  christos 	mov	x9, sp                      // pass key schedule
   2207  1.1  christos 	mov	x10, x1                     // pass rounds
   2208  1.1  christos 	add	x0, x19, #16
   2209  1.1  christos 
   2210  1.1  christos 	bl	_bsaes_decrypt8
   2211  1.1  christos 
   2212  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2213  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2214  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2215  1.1  christos 	eor	v4.16b, v4.16b, v14.16b
   2216  1.1  christos 	mov	v11.16b, v15.16b            // next round tweak
   2217  1.1  christos 	str	q0, [x21], #16
   2218  1.1  christos 	str	q1, [x21], #16
   2219  1.1  christos 	str	q6, [x21], #16
   2220  1.1  christos 	str	q4, [x21], #16
   2221  1.1  christos 	b	.Lxts_dec_done
   2222  1.1  christos 
   2223  1.1  christos .align	4
   2224  1.1  christos .Lxts_dec_3:
   2225  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2226  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   2227  1.1  christos 	mov	x9, sp                      // pass key schedule
   2228  1.1  christos 	mov	x10, x1                     // pass rounds
   2229  1.1  christos 	add	x0, x19, #16
   2230  1.1  christos 
   2231  1.1  christos 	bl	_bsaes_decrypt8
   2232  1.1  christos 
   2233  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2234  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2235  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2236  1.1  christos 	mov	v11.16b, v14.16b            // next round tweak
   2237  1.1  christos 	str	q0, [x21], #16
   2238  1.1  christos 	str	q1, [x21], #16
   2239  1.1  christos 	str	q6, [x21], #16
   2240  1.1  christos 	b	.Lxts_dec_done
   2241  1.1  christos 
   2242  1.1  christos .align	4
   2243  1.1  christos .Lxts_dec_2:
   2244  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2245  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2246  1.1  christos 	mov	x9, sp                      // pass key schedule
   2247  1.1  christos 	mov	x10, x1                     // pass rounds
   2248  1.1  christos 	add	x0, x19, #16
   2249  1.1  christos 
   2250  1.1  christos 	bl	_bsaes_decrypt8
   2251  1.1  christos 
   2252  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2253  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2254  1.1  christos 	mov	v11.16b, v13.16b            // next round tweak
   2255  1.1  christos 	str	q0, [x21], #16
   2256  1.1  christos 	str	q1, [x21], #16
   2257  1.1  christos 	b	.Lxts_dec_done
   2258  1.1  christos 
   2259  1.1  christos .align	4
   2260  1.1  christos .Lxts_dec_1:
   2261  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2262  1.1  christos 	sub	x0, sp, #16
   2263  1.1  christos 	sub	x1, sp, #16
   2264  1.1  christos 	mov	x2, x23
   2265  1.1  christos 	mov	v13.d[0], v11.d[1]          // just in case AES_decrypt corrupts top half of callee-saved SIMD registers
   2266  1.1  christos 	mov	v14.d[0], v12.d[1]
   2267  1.1  christos 	str	q0, [sp, #-16]!
   2268  1.1  christos 
   2269  1.1  christos 	bl	AES_decrypt
   2270  1.1  christos 
   2271  1.1  christos 	ldr	q0, [sp], #16
   2272  1.1  christos 	trn1	v13.2d, v11.2d, v13.2d
   2273  1.1  christos 	trn1	v11.2d, v12.2d, v14.2d      // next round tweak
   2274  1.1  christos 	eor	v0.16b, v0.16b, v13.16b
   2275  1.1  christos 	str	q0, [x21], #16
   2276  1.1  christos 
   2277  1.1  christos .Lxts_dec_done:
   2278  1.1  christos 	adds	x22, x22, #0x10
   2279  1.1  christos 	beq	.Lxts_dec_ret
   2280  1.1  christos 
   2281  1.1  christos         // calculate one round of extra tweak for the stolen ciphertext
   2282  1.1  christos 	ldr	q8, .Lxts_magic
   2283  1.1  christos 	sshr	v6.2d, v11.2d, #63
   2284  1.1  christos 	and	v6.16b, v6.16b, v8.16b
   2285  1.1  christos 	add	v12.2d, v11.2d, v11.2d
   2286  1.1  christos 	ext	v6.16b, v6.16b, v6.16b, #8
   2287  1.1  christos 	eor	v12.16b, v12.16b, v6.16b
   2288  1.1  christos 
   2289  1.1  christos         // perform the final decryption with the last tweak value
   2290  1.1  christos 	ldr	q0, [x20], #16
   2291  1.1  christos 	eor	v0.16b, v0.16b, v12.16b
   2292  1.1  christos 	str	q0, [sp, #-16]!
   2293  1.1  christos 	mov	x0, sp
   2294  1.1  christos 	mov	x1, sp
   2295  1.1  christos 	mov	x2, x23
   2296  1.1  christos 	mov	v13.d[0], v11.d[1]          // just in case AES_decrypt corrupts top half of callee-saved SIMD registers
   2297  1.1  christos 	mov	v14.d[0], v12.d[1]
   2298  1.1  christos 
   2299  1.1  christos 	bl	AES_decrypt
   2300  1.1  christos 
   2301  1.1  christos 	trn1	v12.2d, v12.2d, v14.2d
   2302  1.1  christos 	trn1	v11.2d, v11.2d, v13.2d
   2303  1.1  christos 	ldr	q0, [sp], #16
   2304  1.1  christos 	eor	v0.16b, v0.16b, v12.16b
   2305  1.1  christos 	str	q0, [x21]
   2306  1.1  christos 
   2307  1.1  christos 	mov	x6, x21
   2308  1.1  christos         // Penultimate ciphertext block produces final plaintext part-block
   2309  1.1  christos         // plus remaining part of final ciphertext block. Move plaintext part
   2310  1.1  christos         // to final position and reuse penultimate plaintext block buffer to
   2311  1.1  christos         // construct final ciphertext block
   2312  1.1  christos .Lxts_dec_steal:
   2313  1.1  christos 	ldrb	w1, [x21]
   2314  1.1  christos 	ldrb	w0, [x20], #1
   2315  1.1  christos 	strb	w1, [x21, #0x10]
   2316  1.1  christos 	strb	w0, [x21], #1
   2317  1.1  christos 
   2318  1.1  christos 	subs	x22, x22, #1
   2319  1.1  christos 	bhi	.Lxts_dec_steal
   2320  1.1  christos 
   2321  1.1  christos         // Finally decrypt the penultimate plaintext block using the
   2322  1.1  christos         // penultimate tweak
   2323  1.1  christos 	ldr	q0, [x6]
   2324  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2325  1.1  christos 	str	q0, [sp, #-16]!
   2326  1.1  christos 	mov	x0, sp
   2327  1.1  christos 	mov	x1, sp
   2328  1.1  christos 	mov	x2, x23
   2329  1.1  christos 	mov	x21, x6
   2330  1.1  christos 
   2331  1.1  christos 	bl	AES_decrypt
   2332  1.1  christos 
   2333  1.1  christos 	trn1	v11.2d, v11.2d, v13.2d
   2334  1.1  christos 	ldr	q0, [sp], #16
   2335  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2336  1.1  christos 	str	q0, [x21]
   2337  1.1  christos 
   2338  1.1  christos .Lxts_dec_ret:
   2339  1.1  christos 
   2340  1.1  christos 	movi	v0.16b, #0
   2341  1.1  christos 	movi	v1.16b, #0
   2342  1.1  christos .Lxts_dec_bzero:	//	wipe key schedule
   2343  1.1  christos 	stp	q0, q1, [sp], #32
   2344  1.1  christos 	cmp	sp, x19
   2345  1.1  christos 	bne	.Lxts_dec_bzero
   2346  1.1  christos 
   2347  1.1  christos 	ldp	x19, x20, [sp, #80]
   2348  1.1  christos 	ldp	x21, x22, [sp, #96]
   2349  1.1  christos 	ldr	x23, [sp, #112]
   2350  1.1  christos 	ldp	d8, d9, [sp, #128]
   2351  1.1  christos 	ldp	d10, d11, [sp, #144]
   2352  1.1  christos 	ldp	d12, d13, [sp, #160]
   2353  1.1  christos 	ldp	d14, d15, [sp, #176]
   2354  1.1  christos 	ldp	x29, x30, [sp], #192
   2355  1.1  christos 	ret
   2356  1.1  christos .size	ossl_bsaes_xts_decrypt,.-ossl_bsaes_xts_decrypt
   2357