Home | History | Annotate | Line # | Download | only in aarch64
      1  1.1  christos // Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved.
      2  1.1  christos //
      3  1.1  christos // Licensed under the OpenSSL license (the "License").  You may not use
      4  1.1  christos // this file except in compliance with the License.  You can obtain a copy
      5  1.1  christos // in the file LICENSE in the source distribution or at
      6  1.1  christos // https://www.openssl.org/source/license.html
      7  1.1  christos //
      8  1.1  christos // ====================================================================
      9  1.1  christos // Written by Ben Avison <bavison (at) riscosopen.org> for the OpenSSL
     10  1.1  christos // project. Rights for redistribution and usage in source and binary
     11  1.1  christos // forms are granted according to the OpenSSL license.
     12  1.1  christos // ====================================================================
     13  1.1  christos //
     14  1.1  christos // This implementation is a translation of bsaes-armv7 for AArch64.
     15  1.1  christos // No attempt has been made to carry across the build switches for
     16  1.1  christos // kernel targets, since the Linux kernel crypto support has moved on
     17  1.1  christos // from when it was based on OpenSSL.
     18  1.1  christos 
     19  1.1  christos // A lot of hand-scheduling has been performed. Consequently, this code
     20  1.1  christos // doesn't factor out neatly into macros in the same way that the
     21  1.1  christos // AArch32 version did, and there is little to be gained by wrapping it
     22  1.1  christos // up in Perl, and it is presented as pure assembly.
     23  1.1  christos 
     24  1.1  christos 
     25  1.1  christos #include "crypto/arm_arch.h"
     26  1.1  christos 
     27  1.1  christos .text
     28  1.1  christos 
     29  1.1  christos 
     30  1.1  christos 
     31  1.1  christos 
     32  1.1  christos 
     33  1.1  christos .type	_bsaes_decrypt8,%function
     34  1.1  christos .align	4
     35  1.1  christos // On entry:
     36  1.1  christos //   x9 -> key (previously expanded using _bsaes_key_convert)
     37  1.1  christos //   x10 = number of rounds
     38  1.1  christos //   v0-v7 input data
     39  1.1  christos // On exit:
     40  1.1  christos //   x9-x11 corrupted
     41  1.1  christos //   other general-purpose registers preserved
     42  1.1  christos //   v0-v7 output data
     43  1.1  christos //   v11-v15 preserved
     44  1.1  christos //   other SIMD registers corrupted
     45  1.1  christos _bsaes_decrypt8:
     46  1.1  christos 	ldr	q8, [x9], #16
     47  1.1  christos 	adrp	x11, .LM0ISR
     48  1.1  christos 	add	x11, x11, #:lo12:.LM0ISR
     49  1.1  christos 	movi	v9.16b, #0x55
     50  1.1  christos 	ldr	q10, [x11], #16
     51  1.1  christos 	movi	v16.16b, #0x33
     52  1.1  christos 	movi	v17.16b, #0x0f
     53  1.1  christos 	sub	x10, x10, #1
     54  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
     55  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
     56  1.1  christos 	eor	v2.16b, v2.16b, v8.16b
     57  1.1  christos 	eor	v4.16b, v4.16b, v8.16b
     58  1.1  christos 	eor	v3.16b, v3.16b, v8.16b
     59  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
     60  1.1  christos 	tbl	v0.16b, {v0.16b}, v10.16b
     61  1.1  christos 	tbl	v1.16b, {v1.16b}, v10.16b
     62  1.1  christos 	tbl	v2.16b, {v2.16b}, v10.16b
     63  1.1  christos 	tbl	v4.16b, {v4.16b}, v10.16b
     64  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
     65  1.1  christos 	eor	v7.16b, v7.16b, v8.16b
     66  1.1  christos 	tbl	v3.16b, {v3.16b}, v10.16b
     67  1.1  christos 	tbl	v5.16b, {v5.16b}, v10.16b
     68  1.1  christos 	tbl	v6.16b, {v6.16b}, v10.16b
     69  1.1  christos 	ushr	v8.2d, v0.2d, #1
     70  1.1  christos 	tbl	v7.16b, {v7.16b}, v10.16b
     71  1.1  christos 	ushr	v10.2d, v4.2d, #1
     72  1.1  christos 	ushr	v18.2d, v2.2d, #1
     73  1.1  christos 	eor	v8.16b, v8.16b, v1.16b
     74  1.1  christos 	ushr	v19.2d, v6.2d, #1
     75  1.1  christos 	eor	v10.16b, v10.16b, v5.16b
     76  1.1  christos 	eor	v18.16b, v18.16b, v3.16b
     77  1.1  christos 	and	v8.16b, v8.16b, v9.16b
     78  1.1  christos 	eor	v19.16b, v19.16b, v7.16b
     79  1.1  christos 	and	v10.16b, v10.16b, v9.16b
     80  1.1  christos 	and	v18.16b, v18.16b, v9.16b
     81  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
     82  1.1  christos 	shl	v8.2d, v8.2d, #1
     83  1.1  christos 	and	v9.16b, v19.16b, v9.16b
     84  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
     85  1.1  christos 	shl	v10.2d, v10.2d, #1
     86  1.1  christos 	eor	v3.16b, v3.16b, v18.16b
     87  1.1  christos 	shl	v18.2d, v18.2d, #1
     88  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
     89  1.1  christos 	shl	v8.2d, v9.2d, #1
     90  1.1  christos 	eor	v7.16b, v7.16b, v9.16b
     91  1.1  christos 	eor	v4.16b, v4.16b, v10.16b
     92  1.1  christos 	eor	v2.16b, v2.16b, v18.16b
     93  1.1  christos 	ushr	v9.2d, v1.2d, #2
     94  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
     95  1.1  christos 	ushr	v8.2d, v0.2d, #2
     96  1.1  christos 	ushr	v10.2d, v5.2d, #2
     97  1.1  christos 	ushr	v18.2d, v4.2d, #2
     98  1.1  christos 	eor	v9.16b, v9.16b, v3.16b
     99  1.1  christos 	eor	v8.16b, v8.16b, v2.16b
    100  1.1  christos 	eor	v10.16b, v10.16b, v7.16b
    101  1.1  christos 	eor	v18.16b, v18.16b, v6.16b
    102  1.1  christos 	and	v9.16b, v9.16b, v16.16b
    103  1.1  christos 	and	v8.16b, v8.16b, v16.16b
    104  1.1  christos 	and	v10.16b, v10.16b, v16.16b
    105  1.1  christos 	and	v16.16b, v18.16b, v16.16b
    106  1.1  christos 	eor	v3.16b, v3.16b, v9.16b
    107  1.1  christos 	shl	v9.2d, v9.2d, #2
    108  1.1  christos 	eor	v2.16b, v2.16b, v8.16b
    109  1.1  christos 	shl	v8.2d, v8.2d, #2
    110  1.1  christos 	eor	v7.16b, v7.16b, v10.16b
    111  1.1  christos 	shl	v10.2d, v10.2d, #2
    112  1.1  christos 	eor	v6.16b, v6.16b, v16.16b
    113  1.1  christos 	shl	v16.2d, v16.2d, #2
    114  1.1  christos 	eor	v1.16b, v1.16b, v9.16b
    115  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    116  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    117  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    118  1.1  christos 	ushr	v8.2d, v3.2d, #4
    119  1.1  christos 	ushr	v9.2d, v2.2d, #4
    120  1.1  christos 	ushr	v10.2d, v1.2d, #4
    121  1.1  christos 	ushr	v16.2d, v0.2d, #4
    122  1.1  christos 	eor	v8.16b, v8.16b, v7.16b
    123  1.1  christos 	eor	v9.16b, v9.16b, v6.16b
    124  1.1  christos 	eor	v10.16b, v10.16b, v5.16b
    125  1.1  christos 	eor	v16.16b, v16.16b, v4.16b
    126  1.1  christos 	and	v8.16b, v8.16b, v17.16b
    127  1.1  christos 	and	v9.16b, v9.16b, v17.16b
    128  1.1  christos 	and	v10.16b, v10.16b, v17.16b
    129  1.1  christos 	and	v16.16b, v16.16b, v17.16b
    130  1.1  christos 	eor	v7.16b, v7.16b, v8.16b
    131  1.1  christos 	shl	v8.2d, v8.2d, #4
    132  1.1  christos 	eor	v6.16b, v6.16b, v9.16b
    133  1.1  christos 	shl	v9.2d, v9.2d, #4
    134  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    135  1.1  christos 	shl	v10.2d, v10.2d, #4
    136  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    137  1.1  christos 	shl	v16.2d, v16.2d, #4
    138  1.1  christos 	eor	v3.16b, v3.16b, v8.16b
    139  1.1  christos 	eor	v2.16b, v2.16b, v9.16b
    140  1.1  christos 	eor	v1.16b, v1.16b, v10.16b
    141  1.1  christos 	eor	v0.16b, v0.16b, v16.16b
    142  1.1  christos 	b	.Ldec_sbox
    143  1.1  christos .align	4
    144  1.1  christos .Ldec_loop:
    145  1.1  christos 	ld1	{v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64
    146  1.1  christos 	ldp	q8, q9, [x9], #32
    147  1.1  christos 	eor	v0.16b, v16.16b, v0.16b
    148  1.1  christos 	ldr	q10, [x9], #16
    149  1.1  christos 	eor	v1.16b, v17.16b, v1.16b
    150  1.1  christos 	ldr	q16, [x9], #16
    151  1.1  christos 	eor	v2.16b, v18.16b, v2.16b
    152  1.1  christos 	eor	v3.16b, v19.16b, v3.16b
    153  1.1  christos 	eor	v4.16b, v8.16b, v4.16b
    154  1.1  christos 	eor	v5.16b, v9.16b, v5.16b
    155  1.1  christos 	eor	v6.16b, v10.16b, v6.16b
    156  1.1  christos 	eor	v7.16b, v16.16b, v7.16b
    157  1.1  christos 	tbl	v0.16b, {v0.16b}, v28.16b
    158  1.1  christos 	tbl	v1.16b, {v1.16b}, v28.16b
    159  1.1  christos 	tbl	v2.16b, {v2.16b}, v28.16b
    160  1.1  christos 	tbl	v3.16b, {v3.16b}, v28.16b
    161  1.1  christos 	tbl	v4.16b, {v4.16b}, v28.16b
    162  1.1  christos 	tbl	v5.16b, {v5.16b}, v28.16b
    163  1.1  christos 	tbl	v6.16b, {v6.16b}, v28.16b
    164  1.1  christos 	tbl	v7.16b, {v7.16b}, v28.16b
    165  1.1  christos .Ldec_sbox:
    166  1.1  christos 	eor	v1.16b, v1.16b, v4.16b
    167  1.1  christos 	eor	v3.16b, v3.16b, v4.16b
    168  1.1  christos 	subs	x10, x10, #1
    169  1.1  christos 	eor	v4.16b, v4.16b, v7.16b
    170  1.1  christos 	eor	v2.16b, v2.16b, v7.16b
    171  1.1  christos 	eor	v1.16b, v1.16b, v6.16b
    172  1.1  christos 	eor	v6.16b, v6.16b, v4.16b
    173  1.1  christos 	eor	v2.16b, v2.16b, v5.16b
    174  1.1  christos 	eor	v0.16b, v0.16b, v1.16b
    175  1.1  christos 	eor	v7.16b, v7.16b, v6.16b
    176  1.1  christos 	eor	v8.16b, v6.16b, v2.16b
    177  1.1  christos 	and	v9.16b, v4.16b, v6.16b
    178  1.1  christos 	eor	v10.16b, v2.16b, v6.16b
    179  1.1  christos 	eor	v3.16b, v3.16b, v0.16b
    180  1.1  christos 	eor	v5.16b, v5.16b, v0.16b
    181  1.1  christos 	eor	v16.16b, v7.16b, v4.16b
    182  1.1  christos 	eor	v17.16b, v4.16b, v0.16b
    183  1.1  christos 	and	v18.16b, v0.16b, v2.16b
    184  1.1  christos 	eor	v19.16b, v7.16b, v4.16b
    185  1.1  christos 	eor	v1.16b, v1.16b, v3.16b
    186  1.1  christos 	eor	v20.16b, v3.16b, v0.16b
    187  1.1  christos 	eor	v21.16b, v5.16b, v2.16b
    188  1.1  christos 	eor	v22.16b, v3.16b, v7.16b
    189  1.1  christos 	and	v8.16b, v17.16b, v8.16b
    190  1.1  christos 	orr	v17.16b, v3.16b, v5.16b
    191  1.1  christos 	eor	v23.16b, v1.16b, v6.16b
    192  1.1  christos 	eor	v24.16b, v20.16b, v16.16b
    193  1.1  christos 	eor	v25.16b, v1.16b, v5.16b
    194  1.1  christos 	orr	v26.16b, v20.16b, v21.16b
    195  1.1  christos 	and	v20.16b, v20.16b, v21.16b
    196  1.1  christos 	and	v27.16b, v7.16b, v1.16b
    197  1.1  christos 	eor	v21.16b, v21.16b, v23.16b
    198  1.1  christos 	orr	v28.16b, v16.16b, v23.16b
    199  1.1  christos 	orr	v29.16b, v22.16b, v25.16b
    200  1.1  christos 	eor	v26.16b, v26.16b, v8.16b
    201  1.1  christos 	and	v16.16b, v16.16b, v23.16b
    202  1.1  christos 	and	v22.16b, v22.16b, v25.16b
    203  1.1  christos 	and	v21.16b, v24.16b, v21.16b
    204  1.1  christos 	eor	v8.16b, v28.16b, v8.16b
    205  1.1  christos 	eor	v23.16b, v5.16b, v2.16b
    206  1.1  christos 	eor	v24.16b, v1.16b, v6.16b
    207  1.1  christos 	eor	v16.16b, v16.16b, v22.16b
    208  1.1  christos 	eor	v22.16b, v3.16b, v0.16b
    209  1.1  christos 	eor	v25.16b, v29.16b, v21.16b
    210  1.1  christos 	eor	v21.16b, v26.16b, v21.16b
    211  1.1  christos 	eor	v8.16b, v8.16b, v20.16b
    212  1.1  christos 	eor	v26.16b, v23.16b, v24.16b
    213  1.1  christos 	eor	v16.16b, v16.16b, v20.16b
    214  1.1  christos 	eor	v28.16b, v22.16b, v19.16b
    215  1.1  christos 	eor	v20.16b, v25.16b, v20.16b
    216  1.1  christos 	eor	v9.16b, v21.16b, v9.16b
    217  1.1  christos 	eor	v8.16b, v8.16b, v18.16b
    218  1.1  christos 	eor	v18.16b, v5.16b, v1.16b
    219  1.1  christos 	eor	v21.16b, v16.16b, v17.16b
    220  1.1  christos 	eor	v16.16b, v16.16b, v17.16b
    221  1.1  christos 	eor	v17.16b, v20.16b, v27.16b
    222  1.1  christos 	eor	v20.16b, v3.16b, v7.16b
    223  1.1  christos 	eor	v25.16b, v9.16b, v8.16b
    224  1.1  christos 	eor	v27.16b, v0.16b, v4.16b
    225  1.1  christos 	and	v29.16b, v9.16b, v17.16b
    226  1.1  christos 	eor	v30.16b, v8.16b, v29.16b
    227  1.1  christos 	eor	v31.16b, v21.16b, v29.16b
    228  1.1  christos 	eor	v29.16b, v21.16b, v29.16b
    229  1.1  christos 	bsl	v30.16b, v17.16b, v21.16b
    230  1.1  christos 	bsl	v31.16b, v9.16b, v8.16b
    231  1.1  christos 	bsl	v16.16b, v30.16b, v29.16b
    232  1.1  christos 	bsl	v21.16b, v29.16b, v30.16b
    233  1.1  christos 	eor	v8.16b, v31.16b, v30.16b
    234  1.1  christos 	and	v1.16b, v1.16b, v31.16b
    235  1.1  christos 	and	v9.16b, v16.16b, v31.16b
    236  1.1  christos 	and	v6.16b, v6.16b, v30.16b
    237  1.1  christos 	eor	v16.16b, v17.16b, v21.16b
    238  1.1  christos 	and	v4.16b, v4.16b, v30.16b
    239  1.1  christos 	eor	v17.16b, v8.16b, v30.16b
    240  1.1  christos 	and	v21.16b, v24.16b, v8.16b
    241  1.1  christos 	eor	v9.16b, v9.16b, v25.16b
    242  1.1  christos 	and	v19.16b, v19.16b, v8.16b
    243  1.1  christos 	eor	v24.16b, v30.16b, v16.16b
    244  1.1  christos 	eor	v25.16b, v30.16b, v16.16b
    245  1.1  christos 	and	v7.16b, v7.16b, v17.16b
    246  1.1  christos 	and	v10.16b, v10.16b, v16.16b
    247  1.1  christos 	eor	v29.16b, v9.16b, v16.16b
    248  1.1  christos 	eor	v30.16b, v31.16b, v9.16b
    249  1.1  christos 	and	v0.16b, v24.16b, v0.16b
    250  1.1  christos 	and	v9.16b, v18.16b, v9.16b
    251  1.1  christos 	and	v2.16b, v25.16b, v2.16b
    252  1.1  christos 	eor	v10.16b, v10.16b, v6.16b
    253  1.1  christos 	eor	v18.16b, v29.16b, v16.16b
    254  1.1  christos 	and	v5.16b, v30.16b, v5.16b
    255  1.1  christos 	eor	v24.16b, v8.16b, v29.16b
    256  1.1  christos 	and	v25.16b, v26.16b, v29.16b
    257  1.1  christos 	and	v26.16b, v28.16b, v29.16b
    258  1.1  christos 	eor	v8.16b, v8.16b, v29.16b
    259  1.1  christos 	eor	v17.16b, v17.16b, v18.16b
    260  1.1  christos 	eor	v5.16b, v1.16b, v5.16b
    261  1.1  christos 	and	v23.16b, v24.16b, v23.16b
    262  1.1  christos 	eor	v21.16b, v21.16b, v25.16b
    263  1.1  christos 	eor	v19.16b, v19.16b, v26.16b
    264  1.1  christos 	eor	v0.16b, v4.16b, v0.16b
    265  1.1  christos 	and	v3.16b, v17.16b, v3.16b
    266  1.1  christos 	eor	v1.16b, v9.16b, v1.16b
    267  1.1  christos 	eor	v9.16b, v25.16b, v23.16b
    268  1.1  christos 	eor	v5.16b, v5.16b, v21.16b
    269  1.1  christos 	eor	v2.16b, v6.16b, v2.16b
    270  1.1  christos 	and	v6.16b, v8.16b, v22.16b
    271  1.1  christos 	eor	v3.16b, v7.16b, v3.16b
    272  1.1  christos 	and	v8.16b, v20.16b, v18.16b
    273  1.1  christos 	eor	v10.16b, v10.16b, v9.16b
    274  1.1  christos 	eor	v0.16b, v0.16b, v19.16b
    275  1.1  christos 	eor	v9.16b, v1.16b, v9.16b
    276  1.1  christos 	eor	v1.16b, v2.16b, v21.16b
    277  1.1  christos 	eor	v3.16b, v3.16b, v19.16b
    278  1.1  christos 	and	v16.16b, v27.16b, v16.16b
    279  1.1  christos 	eor	v17.16b, v26.16b, v6.16b
    280  1.1  christos 	eor	v6.16b, v8.16b, v7.16b
    281  1.1  christos 	eor	v7.16b, v1.16b, v9.16b
    282  1.1  christos 	eor	v1.16b, v5.16b, v3.16b
    283  1.1  christos 	eor	v2.16b, v10.16b, v3.16b
    284  1.1  christos 	eor	v4.16b, v16.16b, v4.16b
    285  1.1  christos 	eor	v8.16b, v6.16b, v17.16b
    286  1.1  christos 	eor	v5.16b, v9.16b, v3.16b
    287  1.1  christos 	eor	v9.16b, v0.16b, v1.16b
    288  1.1  christos 	eor	v6.16b, v7.16b, v1.16b
    289  1.1  christos 	eor	v0.16b, v4.16b, v17.16b
    290  1.1  christos 	eor	v4.16b, v8.16b, v7.16b
    291  1.1  christos 	eor	v7.16b, v9.16b, v2.16b
    292  1.1  christos 	eor	v8.16b, v3.16b, v0.16b
    293  1.1  christos 	eor	v7.16b, v7.16b, v5.16b
    294  1.1  christos 	eor	v3.16b, v4.16b, v7.16b
    295  1.1  christos 	eor	v4.16b, v7.16b, v0.16b
    296  1.1  christos 	eor	v7.16b, v8.16b, v3.16b
    297  1.1  christos 	bcc	.Ldec_done
    298  1.1  christos 	ext	v8.16b, v0.16b, v0.16b, #8
    299  1.1  christos 	ext	v9.16b, v1.16b, v1.16b, #8
    300  1.1  christos 	ldr	q28, [x11]                  // load from .LISR in common case (x10 > 0)
    301  1.1  christos 	ext	v10.16b, v6.16b, v6.16b, #8
    302  1.1  christos 	ext	v16.16b, v3.16b, v3.16b, #8
    303  1.1  christos 	ext	v17.16b, v5.16b, v5.16b, #8
    304  1.1  christos 	ext	v18.16b, v4.16b, v4.16b, #8
    305  1.1  christos 	eor	v8.16b, v8.16b, v0.16b
    306  1.1  christos 	eor	v9.16b, v9.16b, v1.16b
    307  1.1  christos 	eor	v10.16b, v10.16b, v6.16b
    308  1.1  christos 	eor	v16.16b, v16.16b, v3.16b
    309  1.1  christos 	eor	v17.16b, v17.16b, v5.16b
    310  1.1  christos 	ext	v19.16b, v2.16b, v2.16b, #8
    311  1.1  christos 	ext	v20.16b, v7.16b, v7.16b, #8
    312  1.1  christos 	eor	v18.16b, v18.16b, v4.16b
    313  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
    314  1.1  christos 	eor	v8.16b, v2.16b, v10.16b
    315  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
    316  1.1  christos 	eor	v2.16b, v19.16b, v2.16b
    317  1.1  christos 	eor	v9.16b, v20.16b, v7.16b
    318  1.1  christos 	eor	v0.16b, v0.16b, v16.16b
    319  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    320  1.1  christos 	eor	v6.16b, v6.16b, v17.16b
    321  1.1  christos 	eor	v8.16b, v8.16b, v16.16b
    322  1.1  christos 	eor	v7.16b, v7.16b, v18.16b
    323  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    324  1.1  christos 	eor	v2.16b, v3.16b, v2.16b
    325  1.1  christos 	eor	v1.16b, v1.16b, v17.16b
    326  1.1  christos 	eor	v3.16b, v5.16b, v9.16b
    327  1.1  christos 	eor	v5.16b, v8.16b, v17.16b
    328  1.1  christos 	eor	v7.16b, v7.16b, v17.16b
    329  1.1  christos 	ext	v8.16b, v0.16b, v0.16b, #12
    330  1.1  christos 	ext	v9.16b, v6.16b, v6.16b, #12
    331  1.1  christos 	ext	v10.16b, v4.16b, v4.16b, #12
    332  1.1  christos 	ext	v16.16b, v1.16b, v1.16b, #12
    333  1.1  christos 	ext	v17.16b, v5.16b, v5.16b, #12
    334  1.1  christos 	ext	v18.16b, v7.16b, v7.16b, #12
    335  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    336  1.1  christos 	eor	v6.16b, v6.16b, v9.16b
    337  1.1  christos 	eor	v4.16b, v4.16b, v10.16b
    338  1.1  christos 	ext	v19.16b, v2.16b, v2.16b, #12
    339  1.1  christos 	ext	v20.16b, v3.16b, v3.16b, #12
    340  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    341  1.1  christos 	eor	v5.16b, v5.16b, v17.16b
    342  1.1  christos 	eor	v7.16b, v7.16b, v18.16b
    343  1.1  christos 	eor	v2.16b, v2.16b, v19.16b
    344  1.1  christos 	eor	v16.16b, v16.16b, v0.16b
    345  1.1  christos 	eor	v3.16b, v3.16b, v20.16b
    346  1.1  christos 	eor	v17.16b, v17.16b, v4.16b
    347  1.1  christos 	eor	v10.16b, v10.16b, v6.16b
    348  1.1  christos 	ext	v0.16b, v0.16b, v0.16b, #8
    349  1.1  christos 	eor	v9.16b, v9.16b, v1.16b
    350  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
    351  1.1  christos 	eor	v8.16b, v8.16b, v3.16b
    352  1.1  christos 	eor	v16.16b, v16.16b, v3.16b
    353  1.1  christos 	eor	v18.16b, v18.16b, v5.16b
    354  1.1  christos 	eor	v19.16b, v19.16b, v7.16b
    355  1.1  christos 	ext	v21.16b, v5.16b, v5.16b, #8
    356  1.1  christos 	ext	v5.16b, v7.16b, v7.16b, #8
    357  1.1  christos 	eor	v7.16b, v20.16b, v2.16b
    358  1.1  christos 	ext	v4.16b, v4.16b, v4.16b, #8
    359  1.1  christos 	ext	v20.16b, v3.16b, v3.16b, #8
    360  1.1  christos 	eor	v17.16b, v17.16b, v3.16b
    361  1.1  christos 	ext	v2.16b, v2.16b, v2.16b, #8
    362  1.1  christos 	eor	v3.16b, v10.16b, v3.16b
    363  1.1  christos 	ext	v10.16b, v6.16b, v6.16b, #8
    364  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    365  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    366  1.1  christos 	eor	v5.16b, v5.16b, v18.16b
    367  1.1  christos 	eor	v3.16b, v3.16b, v4.16b
    368  1.1  christos 	eor	v7.16b, v20.16b, v7.16b
    369  1.1  christos 	eor	v6.16b, v2.16b, v19.16b
    370  1.1  christos 	eor	v4.16b, v21.16b, v17.16b
    371  1.1  christos 	eor	v2.16b, v10.16b, v9.16b
    372  1.1  christos 	bne	.Ldec_loop
    373  1.1  christos 	ldr	q28, [x11, #16]!            // load from .LISRM0 on last round (x10 == 0)
    374  1.1  christos 	b	.Ldec_loop
    375  1.1  christos .align	4
    376  1.1  christos .Ldec_done:
    377  1.1  christos 	ushr	v8.2d, v0.2d, #1
    378  1.1  christos 	movi	v9.16b, #0x55
    379  1.1  christos 	ldr	q10, [x9]
    380  1.1  christos 	ushr	v16.2d, v2.2d, #1
    381  1.1  christos 	movi	v17.16b, #0x33
    382  1.1  christos 	ushr	v18.2d, v6.2d, #1
    383  1.1  christos 	movi	v19.16b, #0x0f
    384  1.1  christos 	eor	v8.16b, v8.16b, v1.16b
    385  1.1  christos 	ushr	v20.2d, v3.2d, #1
    386  1.1  christos 	eor	v16.16b, v16.16b, v7.16b
    387  1.1  christos 	eor	v18.16b, v18.16b, v4.16b
    388  1.1  christos 	and	v8.16b, v8.16b, v9.16b
    389  1.1  christos 	eor	v20.16b, v20.16b, v5.16b
    390  1.1  christos 	and	v16.16b, v16.16b, v9.16b
    391  1.1  christos 	and	v18.16b, v18.16b, v9.16b
    392  1.1  christos 	shl	v21.2d, v8.2d, #1
    393  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    394  1.1  christos 	and	v8.16b, v20.16b, v9.16b
    395  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    396  1.1  christos 	shl	v9.2d, v16.2d, #1
    397  1.1  christos 	eor	v4.16b, v4.16b, v18.16b
    398  1.1  christos 	shl	v16.2d, v18.2d, #1
    399  1.1  christos 	eor	v0.16b, v0.16b, v21.16b
    400  1.1  christos 	shl	v18.2d, v8.2d, #1
    401  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
    402  1.1  christos 	eor	v2.16b, v2.16b, v9.16b
    403  1.1  christos 	eor	v6.16b, v6.16b, v16.16b
    404  1.1  christos 	ushr	v8.2d, v1.2d, #2
    405  1.1  christos 	eor	v3.16b, v3.16b, v18.16b
    406  1.1  christos 	ushr	v9.2d, v0.2d, #2
    407  1.1  christos 	ushr	v16.2d, v7.2d, #2
    408  1.1  christos 	ushr	v18.2d, v2.2d, #2
    409  1.1  christos 	eor	v8.16b, v8.16b, v4.16b
    410  1.1  christos 	eor	v9.16b, v9.16b, v6.16b
    411  1.1  christos 	eor	v16.16b, v16.16b, v5.16b
    412  1.1  christos 	eor	v18.16b, v18.16b, v3.16b
    413  1.1  christos 	and	v8.16b, v8.16b, v17.16b
    414  1.1  christos 	and	v9.16b, v9.16b, v17.16b
    415  1.1  christos 	and	v16.16b, v16.16b, v17.16b
    416  1.1  christos 	and	v17.16b, v18.16b, v17.16b
    417  1.1  christos 	eor	v4.16b, v4.16b, v8.16b
    418  1.1  christos 	shl	v8.2d, v8.2d, #2
    419  1.1  christos 	eor	v6.16b, v6.16b, v9.16b
    420  1.1  christos 	shl	v9.2d, v9.2d, #2
    421  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
    422  1.1  christos 	shl	v16.2d, v16.2d, #2
    423  1.1  christos 	eor	v3.16b, v3.16b, v17.16b
    424  1.1  christos 	shl	v17.2d, v17.2d, #2
    425  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    426  1.1  christos 	eor	v0.16b, v0.16b, v9.16b
    427  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    428  1.1  christos 	eor	v2.16b, v2.16b, v17.16b
    429  1.1  christos 	ushr	v8.2d, v4.2d, #4
    430  1.1  christos 	ushr	v9.2d, v6.2d, #4
    431  1.1  christos 	ushr	v16.2d, v1.2d, #4
    432  1.1  christos 	ushr	v17.2d, v0.2d, #4
    433  1.1  christos 	eor	v8.16b, v8.16b, v5.16b
    434  1.1  christos 	eor	v9.16b, v9.16b, v3.16b
    435  1.1  christos 	eor	v16.16b, v16.16b, v7.16b
    436  1.1  christos 	eor	v17.16b, v17.16b, v2.16b
    437  1.1  christos 	and	v8.16b, v8.16b, v19.16b
    438  1.1  christos 	and	v9.16b, v9.16b, v19.16b
    439  1.1  christos 	and	v16.16b, v16.16b, v19.16b
    440  1.1  christos 	and	v17.16b, v17.16b, v19.16b
    441  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
    442  1.1  christos 	shl	v8.2d, v8.2d, #4
    443  1.1  christos 	eor	v3.16b, v3.16b, v9.16b
    444  1.1  christos 	shl	v9.2d, v9.2d, #4
    445  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    446  1.1  christos 	shl	v16.2d, v16.2d, #4
    447  1.1  christos 	eor	v2.16b, v2.16b, v17.16b
    448  1.1  christos 	shl	v17.2d, v17.2d, #4
    449  1.1  christos 	eor	v4.16b, v4.16b, v8.16b
    450  1.1  christos 	eor	v6.16b, v6.16b, v9.16b
    451  1.1  christos 	eor	v7.16b, v7.16b, v10.16b
    452  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    453  1.1  christos 	eor	v2.16b, v2.16b, v10.16b
    454  1.1  christos 	eor	v0.16b, v0.16b, v17.16b
    455  1.1  christos 	eor	v4.16b, v4.16b, v10.16b
    456  1.1  christos 	eor	v6.16b, v6.16b, v10.16b
    457  1.1  christos 	eor	v3.16b, v3.16b, v10.16b
    458  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    459  1.1  christos 	eor	v1.16b, v1.16b, v10.16b
    460  1.1  christos 	eor	v0.16b, v0.16b, v10.16b
    461  1.1  christos 	ret
    462  1.1  christos .size	_bsaes_decrypt8,.-_bsaes_decrypt8
    463  1.1  christos 
    464  1.1  christos .section	.rodata
    465  1.1  christos .type	_bsaes_consts,%object
    466  1.1  christos .align	6
    467  1.1  christos _bsaes_consts:
    468  1.1  christos // InvShiftRows constants
    469  1.1  christos // Used in _bsaes_decrypt8, which assumes contiguity
    470  1.1  christos // .LM0ISR used with round 0 key
    471  1.1  christos // .LISR   used with middle round keys
    472  1.1  christos // .LISRM0 used with final round key
    473  1.1  christos .LM0ISR:
    474  1.1  christos .quad	0x0a0e0206070b0f03, 0x0004080c0d010509
    475  1.1  christos .LISR:
    476  1.1  christos .quad	0x0504070602010003, 0x0f0e0d0c080b0a09
    477  1.1  christos .LISRM0:
    478  1.1  christos .quad	0x01040b0e0205080f, 0x0306090c00070a0d
    479  1.1  christos 
    480  1.1  christos // ShiftRows constants
    481  1.1  christos // Used in _bsaes_encrypt8, which assumes contiguity
    482  1.1  christos // .LM0SR used with round 0 key
    483  1.1  christos // .LSR   used with middle round keys
    484  1.1  christos // .LSRM0 used with final round key
    485  1.1  christos .LM0SR:
    486  1.1  christos .quad	0x0a0e02060f03070b, 0x0004080c05090d01
    487  1.1  christos .LSR:
    488  1.1  christos .quad	0x0504070600030201, 0x0f0e0d0c0a09080b
    489  1.1  christos .LSRM0:
    490  1.1  christos .quad	0x0304090e00050a0f, 0x01060b0c0207080d
    491  1.1  christos 
    492  1.1  christos .LM0_bigendian:
    493  1.1  christos .quad	0x02060a0e03070b0f, 0x0004080c0105090d
    494  1.1  christos .LM0_littleendian:
    495  1.1  christos .quad	0x0105090d0004080c, 0x03070b0f02060a0e
    496  1.1  christos 
    497  1.1  christos // Used in ossl_bsaes_ctr32_encrypt_blocks, prior to dropping into
    498  1.1  christos // _bsaes_encrypt8_alt, for round 0 key in place of .LM0SR
    499  1.1  christos .LREVM0SR:
    500  1.1  christos .quad	0x090d01050c000408, 0x03070b0f060a0e02
    501  1.1  christos 
    502  1.1  christos .align	6
    503  1.1  christos .size	_bsaes_consts,.-_bsaes_consts
    504  1.1  christos 
    505  1.1  christos .previous
    506  1.1  christos 
    507  1.1  christos .type	_bsaes_encrypt8,%function
    508  1.1  christos .align	4
    509  1.1  christos // On entry:
    510  1.1  christos //   x9 -> key (previously expanded using _bsaes_key_convert)
    511  1.1  christos //   x10 = number of rounds
    512  1.1  christos //   v0-v7 input data
    513  1.1  christos // On exit:
    514  1.1  christos //   x9-x11 corrupted
    515  1.1  christos //   other general-purpose registers preserved
    516  1.1  christos //   v0-v7 output data
    517  1.1  christos //   v11-v15 preserved
    518  1.1  christos //   other SIMD registers corrupted
    519  1.1  christos _bsaes_encrypt8:
    520  1.1  christos 	ldr	q8, [x9], #16
    521  1.1  christos 	adrp	x11, .LM0SR
    522  1.1  christos 	add	x11, x11, #:lo12:.LM0SR
    523  1.1  christos 	ldr	q9, [x11], #16
    524  1.1  christos _bsaes_encrypt8_alt:
    525  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    526  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    527  1.1  christos 	sub	x10, x10, #1
    528  1.1  christos 	eor	v2.16b, v2.16b, v8.16b
    529  1.1  christos 	eor	v4.16b, v4.16b, v8.16b
    530  1.1  christos 	eor	v3.16b, v3.16b, v8.16b
    531  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
    532  1.1  christos 	tbl	v0.16b, {v0.16b}, v9.16b
    533  1.1  christos 	tbl	v1.16b, {v1.16b}, v9.16b
    534  1.1  christos 	tbl	v2.16b, {v2.16b}, v9.16b
    535  1.1  christos 	tbl	v4.16b, {v4.16b}, v9.16b
    536  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
    537  1.1  christos 	eor	v7.16b, v7.16b, v8.16b
    538  1.1  christos 	tbl	v3.16b, {v3.16b}, v9.16b
    539  1.1  christos 	tbl	v5.16b, {v5.16b}, v9.16b
    540  1.1  christos 	tbl	v6.16b, {v6.16b}, v9.16b
    541  1.1  christos 	ushr	v8.2d, v0.2d, #1
    542  1.1  christos 	movi	v10.16b, #0x55
    543  1.1  christos 	tbl	v7.16b, {v7.16b}, v9.16b
    544  1.1  christos 	ushr	v9.2d, v4.2d, #1
    545  1.1  christos 	movi	v16.16b, #0x33
    546  1.1  christos 	ushr	v17.2d, v2.2d, #1
    547  1.1  christos 	eor	v8.16b, v8.16b, v1.16b
    548  1.1  christos 	movi	v18.16b, #0x0f
    549  1.1  christos 	ushr	v19.2d, v6.2d, #1
    550  1.1  christos 	eor	v9.16b, v9.16b, v5.16b
    551  1.1  christos 	eor	v17.16b, v17.16b, v3.16b
    552  1.1  christos 	and	v8.16b, v8.16b, v10.16b
    553  1.1  christos 	eor	v19.16b, v19.16b, v7.16b
    554  1.1  christos 	and	v9.16b, v9.16b, v10.16b
    555  1.1  christos 	and	v17.16b, v17.16b, v10.16b
    556  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    557  1.1  christos 	shl	v8.2d, v8.2d, #1
    558  1.1  christos 	and	v10.16b, v19.16b, v10.16b
    559  1.1  christos 	eor	v5.16b, v5.16b, v9.16b
    560  1.1  christos 	shl	v9.2d, v9.2d, #1
    561  1.1  christos 	eor	v3.16b, v3.16b, v17.16b
    562  1.1  christos 	shl	v17.2d, v17.2d, #1
    563  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    564  1.1  christos 	shl	v8.2d, v10.2d, #1
    565  1.1  christos 	eor	v7.16b, v7.16b, v10.16b
    566  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
    567  1.1  christos 	eor	v2.16b, v2.16b, v17.16b
    568  1.1  christos 	ushr	v9.2d, v1.2d, #2
    569  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
    570  1.1  christos 	ushr	v8.2d, v0.2d, #2
    571  1.1  christos 	ushr	v10.2d, v5.2d, #2
    572  1.1  christos 	ushr	v17.2d, v4.2d, #2
    573  1.1  christos 	eor	v9.16b, v9.16b, v3.16b
    574  1.1  christos 	eor	v8.16b, v8.16b, v2.16b
    575  1.1  christos 	eor	v10.16b, v10.16b, v7.16b
    576  1.1  christos 	eor	v17.16b, v17.16b, v6.16b
    577  1.1  christos 	and	v9.16b, v9.16b, v16.16b
    578  1.1  christos 	and	v8.16b, v8.16b, v16.16b
    579  1.1  christos 	and	v10.16b, v10.16b, v16.16b
    580  1.1  christos 	and	v16.16b, v17.16b, v16.16b
    581  1.1  christos 	eor	v3.16b, v3.16b, v9.16b
    582  1.1  christos 	shl	v9.2d, v9.2d, #2
    583  1.1  christos 	eor	v2.16b, v2.16b, v8.16b
    584  1.1  christos 	shl	v8.2d, v8.2d, #2
    585  1.1  christos 	eor	v7.16b, v7.16b, v10.16b
    586  1.1  christos 	shl	v10.2d, v10.2d, #2
    587  1.1  christos 	eor	v6.16b, v6.16b, v16.16b
    588  1.1  christos 	shl	v16.2d, v16.2d, #2
    589  1.1  christos 	eor	v1.16b, v1.16b, v9.16b
    590  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    591  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    592  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    593  1.1  christos 	ushr	v8.2d, v3.2d, #4
    594  1.1  christos 	ushr	v9.2d, v2.2d, #4
    595  1.1  christos 	ushr	v10.2d, v1.2d, #4
    596  1.1  christos 	ushr	v16.2d, v0.2d, #4
    597  1.1  christos 	eor	v8.16b, v8.16b, v7.16b
    598  1.1  christos 	eor	v9.16b, v9.16b, v6.16b
    599  1.1  christos 	eor	v10.16b, v10.16b, v5.16b
    600  1.1  christos 	eor	v16.16b, v16.16b, v4.16b
    601  1.1  christos 	and	v8.16b, v8.16b, v18.16b
    602  1.1  christos 	and	v9.16b, v9.16b, v18.16b
    603  1.1  christos 	and	v10.16b, v10.16b, v18.16b
    604  1.1  christos 	and	v16.16b, v16.16b, v18.16b
    605  1.1  christos 	eor	v7.16b, v7.16b, v8.16b
    606  1.1  christos 	shl	v8.2d, v8.2d, #4
    607  1.1  christos 	eor	v6.16b, v6.16b, v9.16b
    608  1.1  christos 	shl	v9.2d, v9.2d, #4
    609  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    610  1.1  christos 	shl	v10.2d, v10.2d, #4
    611  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    612  1.1  christos 	shl	v16.2d, v16.2d, #4
    613  1.1  christos 	eor	v3.16b, v3.16b, v8.16b
    614  1.1  christos 	eor	v2.16b, v2.16b, v9.16b
    615  1.1  christos 	eor	v1.16b, v1.16b, v10.16b
    616  1.1  christos 	eor	v0.16b, v0.16b, v16.16b
    617  1.1  christos 	b	.Lenc_sbox
    618  1.1  christos .align	4
    619  1.1  christos .Lenc_loop:
    620  1.1  christos 	ld1	{v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64
    621  1.1  christos 	ldp	q8, q9, [x9], #32
    622  1.1  christos 	eor	v0.16b, v16.16b, v0.16b
    623  1.1  christos 	ldr	q10, [x9], #16
    624  1.1  christos 	eor	v1.16b, v17.16b, v1.16b
    625  1.1  christos 	ldr	q16, [x9], #16
    626  1.1  christos 	eor	v2.16b, v18.16b, v2.16b
    627  1.1  christos 	eor	v3.16b, v19.16b, v3.16b
    628  1.1  christos 	eor	v4.16b, v8.16b, v4.16b
    629  1.1  christos 	eor	v5.16b, v9.16b, v5.16b
    630  1.1  christos 	eor	v6.16b, v10.16b, v6.16b
    631  1.1  christos 	eor	v7.16b, v16.16b, v7.16b
    632  1.1  christos 	tbl	v0.16b, {v0.16b}, v28.16b
    633  1.1  christos 	tbl	v1.16b, {v1.16b}, v28.16b
    634  1.1  christos 	tbl	v2.16b, {v2.16b}, v28.16b
    635  1.1  christos 	tbl	v3.16b, {v3.16b}, v28.16b
    636  1.1  christos 	tbl	v4.16b, {v4.16b}, v28.16b
    637  1.1  christos 	tbl	v5.16b, {v5.16b}, v28.16b
    638  1.1  christos 	tbl	v6.16b, {v6.16b}, v28.16b
    639  1.1  christos 	tbl	v7.16b, {v7.16b}, v28.16b
    640  1.1  christos .Lenc_sbox:
    641  1.1  christos 	eor	v5.16b, v5.16b, v6.16b
    642  1.1  christos 	eor	v3.16b, v3.16b, v0.16b
    643  1.1  christos 	subs	x10, x10, #1
    644  1.1  christos 	eor	v2.16b, v2.16b, v1.16b
    645  1.1  christos 	eor	v5.16b, v5.16b, v0.16b
    646  1.1  christos 	eor	v8.16b, v3.16b, v7.16b
    647  1.1  christos 	eor	v6.16b, v6.16b, v2.16b
    648  1.1  christos 	eor	v7.16b, v7.16b, v5.16b
    649  1.1  christos 	eor	v8.16b, v8.16b, v4.16b
    650  1.1  christos 	eor	v3.16b, v6.16b, v3.16b
    651  1.1  christos 	eor	v4.16b, v4.16b, v5.16b
    652  1.1  christos 	eor	v6.16b, v1.16b, v5.16b
    653  1.1  christos 	eor	v2.16b, v2.16b, v7.16b
    654  1.1  christos 	eor	v1.16b, v8.16b, v1.16b
    655  1.1  christos 	eor	v8.16b, v7.16b, v4.16b
    656  1.1  christos 	eor	v9.16b, v3.16b, v0.16b
    657  1.1  christos 	eor	v10.16b, v7.16b, v6.16b
    658  1.1  christos 	eor	v16.16b, v5.16b, v3.16b
    659  1.1  christos 	eor	v17.16b, v6.16b, v2.16b
    660  1.1  christos 	eor	v18.16b, v5.16b, v1.16b
    661  1.1  christos 	eor	v19.16b, v2.16b, v4.16b
    662  1.1  christos 	eor	v20.16b, v1.16b, v0.16b
    663  1.1  christos 	orr	v21.16b, v8.16b, v9.16b
    664  1.1  christos 	orr	v22.16b, v10.16b, v16.16b
    665  1.1  christos 	eor	v23.16b, v8.16b, v17.16b
    666  1.1  christos 	eor	v24.16b, v9.16b, v18.16b
    667  1.1  christos 	and	v19.16b, v19.16b, v20.16b
    668  1.1  christos 	orr	v20.16b, v17.16b, v18.16b
    669  1.1  christos 	and	v8.16b, v8.16b, v9.16b
    670  1.1  christos 	and	v9.16b, v17.16b, v18.16b
    671  1.1  christos 	and	v17.16b, v23.16b, v24.16b
    672  1.1  christos 	and	v10.16b, v10.16b, v16.16b
    673  1.1  christos 	eor	v16.16b, v21.16b, v19.16b
    674  1.1  christos 	eor	v18.16b, v20.16b, v19.16b
    675  1.1  christos 	and	v19.16b, v2.16b, v1.16b
    676  1.1  christos 	and	v20.16b, v6.16b, v5.16b
    677  1.1  christos 	eor	v21.16b, v22.16b, v17.16b
    678  1.1  christos 	eor	v9.16b, v9.16b, v10.16b
    679  1.1  christos 	eor	v10.16b, v16.16b, v17.16b
    680  1.1  christos 	eor	v16.16b, v18.16b, v8.16b
    681  1.1  christos 	and	v17.16b, v4.16b, v0.16b
    682  1.1  christos 	orr	v18.16b, v7.16b, v3.16b
    683  1.1  christos 	eor	v21.16b, v21.16b, v8.16b
    684  1.1  christos 	eor	v8.16b, v9.16b, v8.16b
    685  1.1  christos 	eor	v9.16b, v10.16b, v19.16b
    686  1.1  christos 	eor	v10.16b, v3.16b, v0.16b
    687  1.1  christos 	eor	v16.16b, v16.16b, v17.16b
    688  1.1  christos 	eor	v17.16b, v5.16b, v1.16b
    689  1.1  christos 	eor	v19.16b, v21.16b, v20.16b
    690  1.1  christos 	eor	v20.16b, v8.16b, v18.16b
    691  1.1  christos 	eor	v8.16b, v8.16b, v18.16b
    692  1.1  christos 	eor	v18.16b, v7.16b, v4.16b
    693  1.1  christos 	eor	v21.16b, v9.16b, v16.16b
    694  1.1  christos 	eor	v22.16b, v6.16b, v2.16b
    695  1.1  christos 	and	v23.16b, v9.16b, v19.16b
    696  1.1  christos 	eor	v24.16b, v10.16b, v17.16b
    697  1.1  christos 	eor	v25.16b, v0.16b, v1.16b
    698  1.1  christos 	eor	v26.16b, v7.16b, v6.16b
    699  1.1  christos 	eor	v27.16b, v18.16b, v22.16b
    700  1.1  christos 	eor	v28.16b, v3.16b, v5.16b
    701  1.1  christos 	eor	v29.16b, v16.16b, v23.16b
    702  1.1  christos 	eor	v30.16b, v20.16b, v23.16b
    703  1.1  christos 	eor	v23.16b, v20.16b, v23.16b
    704  1.1  christos 	eor	v31.16b, v4.16b, v2.16b
    705  1.1  christos 	bsl	v29.16b, v19.16b, v20.16b
    706  1.1  christos 	bsl	v30.16b, v9.16b, v16.16b
    707  1.1  christos 	bsl	v8.16b, v29.16b, v23.16b
    708  1.1  christos 	bsl	v20.16b, v23.16b, v29.16b
    709  1.1  christos 	eor	v9.16b, v30.16b, v29.16b
    710  1.1  christos 	and	v5.16b, v5.16b, v30.16b
    711  1.1  christos 	and	v8.16b, v8.16b, v30.16b
    712  1.1  christos 	and	v1.16b, v1.16b, v29.16b
    713  1.1  christos 	eor	v16.16b, v19.16b, v20.16b
    714  1.1  christos 	and	v2.16b, v2.16b, v29.16b
    715  1.1  christos 	eor	v19.16b, v9.16b, v29.16b
    716  1.1  christos 	and	v17.16b, v17.16b, v9.16b
    717  1.1  christos 	eor	v8.16b, v8.16b, v21.16b
    718  1.1  christos 	and	v20.16b, v22.16b, v9.16b
    719  1.1  christos 	eor	v21.16b, v29.16b, v16.16b
    720  1.1  christos 	eor	v22.16b, v29.16b, v16.16b
    721  1.1  christos 	and	v23.16b, v25.16b, v16.16b
    722  1.1  christos 	and	v6.16b, v6.16b, v19.16b
    723  1.1  christos 	eor	v25.16b, v8.16b, v16.16b
    724  1.1  christos 	eor	v29.16b, v30.16b, v8.16b
    725  1.1  christos 	and	v4.16b, v21.16b, v4.16b
    726  1.1  christos 	and	v8.16b, v28.16b, v8.16b
    727  1.1  christos 	and	v0.16b, v22.16b, v0.16b
    728  1.1  christos 	eor	v21.16b, v23.16b, v1.16b
    729  1.1  christos 	eor	v22.16b, v9.16b, v25.16b
    730  1.1  christos 	eor	v9.16b, v9.16b, v25.16b
    731  1.1  christos 	eor	v23.16b, v25.16b, v16.16b
    732  1.1  christos 	and	v3.16b, v29.16b, v3.16b
    733  1.1  christos 	and	v24.16b, v24.16b, v25.16b
    734  1.1  christos 	and	v25.16b, v27.16b, v25.16b
    735  1.1  christos 	and	v10.16b, v22.16b, v10.16b
    736  1.1  christos 	and	v9.16b, v9.16b, v18.16b
    737  1.1  christos 	eor	v18.16b, v19.16b, v23.16b
    738  1.1  christos 	and	v19.16b, v26.16b, v23.16b
    739  1.1  christos 	eor	v3.16b, v5.16b, v3.16b
    740  1.1  christos 	eor	v17.16b, v17.16b, v24.16b
    741  1.1  christos 	eor	v10.16b, v24.16b, v10.16b
    742  1.1  christos 	and	v16.16b, v31.16b, v16.16b
    743  1.1  christos 	eor	v20.16b, v20.16b, v25.16b
    744  1.1  christos 	eor	v9.16b, v25.16b, v9.16b
    745  1.1  christos 	eor	v4.16b, v2.16b, v4.16b
    746  1.1  christos 	and	v7.16b, v18.16b, v7.16b
    747  1.1  christos 	eor	v18.16b, v19.16b, v6.16b
    748  1.1  christos 	eor	v5.16b, v8.16b, v5.16b
    749  1.1  christos 	eor	v0.16b, v1.16b, v0.16b
    750  1.1  christos 	eor	v1.16b, v21.16b, v10.16b
    751  1.1  christos 	eor	v8.16b, v3.16b, v17.16b
    752  1.1  christos 	eor	v2.16b, v16.16b, v2.16b
    753  1.1  christos 	eor	v3.16b, v6.16b, v7.16b
    754  1.1  christos 	eor	v6.16b, v18.16b, v9.16b
    755  1.1  christos 	eor	v4.16b, v4.16b, v20.16b
    756  1.1  christos 	eor	v10.16b, v5.16b, v10.16b
    757  1.1  christos 	eor	v0.16b, v0.16b, v17.16b
    758  1.1  christos 	eor	v9.16b, v2.16b, v9.16b
    759  1.1  christos 	eor	v3.16b, v3.16b, v20.16b
    760  1.1  christos 	eor	v7.16b, v6.16b, v1.16b
    761  1.1  christos 	eor	v5.16b, v8.16b, v4.16b
    762  1.1  christos 	eor	v6.16b, v10.16b, v1.16b
    763  1.1  christos 	eor	v2.16b, v4.16b, v0.16b
    764  1.1  christos 	eor	v4.16b, v3.16b, v10.16b
    765  1.1  christos 	eor	v9.16b, v9.16b, v7.16b
    766  1.1  christos 	eor	v3.16b, v0.16b, v5.16b
    767  1.1  christos 	eor	v0.16b, v1.16b, v4.16b
    768  1.1  christos 	eor	v1.16b, v4.16b, v8.16b
    769  1.1  christos 	eor	v4.16b, v9.16b, v5.16b
    770  1.1  christos 	eor	v6.16b, v6.16b, v3.16b
    771  1.1  christos 	bcc	.Lenc_done
    772  1.1  christos 	ext	v8.16b, v0.16b, v0.16b, #12
    773  1.1  christos 	ext	v9.16b, v4.16b, v4.16b, #12
    774  1.1  christos 	ldr	q28, [x11]
    775  1.1  christos 	ext	v10.16b, v6.16b, v6.16b, #12
    776  1.1  christos 	ext	v16.16b, v1.16b, v1.16b, #12
    777  1.1  christos 	ext	v17.16b, v3.16b, v3.16b, #12
    778  1.1  christos 	ext	v18.16b, v7.16b, v7.16b, #12
    779  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    780  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
    781  1.1  christos 	eor	v6.16b, v6.16b, v10.16b
    782  1.1  christos 	ext	v19.16b, v2.16b, v2.16b, #12
    783  1.1  christos 	ext	v20.16b, v5.16b, v5.16b, #12
    784  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    785  1.1  christos 	eor	v3.16b, v3.16b, v17.16b
    786  1.1  christos 	eor	v7.16b, v7.16b, v18.16b
    787  1.1  christos 	eor	v2.16b, v2.16b, v19.16b
    788  1.1  christos 	eor	v16.16b, v16.16b, v0.16b
    789  1.1  christos 	eor	v5.16b, v5.16b, v20.16b
    790  1.1  christos 	eor	v17.16b, v17.16b, v6.16b
    791  1.1  christos 	eor	v10.16b, v10.16b, v4.16b
    792  1.1  christos 	ext	v0.16b, v0.16b, v0.16b, #8
    793  1.1  christos 	eor	v9.16b, v9.16b, v1.16b
    794  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
    795  1.1  christos 	eor	v8.16b, v8.16b, v5.16b
    796  1.1  christos 	eor	v16.16b, v16.16b, v5.16b
    797  1.1  christos 	eor	v18.16b, v18.16b, v3.16b
    798  1.1  christos 	eor	v19.16b, v19.16b, v7.16b
    799  1.1  christos 	ext	v3.16b, v3.16b, v3.16b, #8
    800  1.1  christos 	ext	v7.16b, v7.16b, v7.16b, #8
    801  1.1  christos 	eor	v20.16b, v20.16b, v2.16b
    802  1.1  christos 	ext	v6.16b, v6.16b, v6.16b, #8
    803  1.1  christos 	ext	v21.16b, v5.16b, v5.16b, #8
    804  1.1  christos 	eor	v17.16b, v17.16b, v5.16b
    805  1.1  christos 	ext	v2.16b, v2.16b, v2.16b, #8
    806  1.1  christos 	eor	v10.16b, v10.16b, v5.16b
    807  1.1  christos 	ext	v22.16b, v4.16b, v4.16b, #8
    808  1.1  christos 	eor	v0.16b, v0.16b, v8.16b
    809  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    810  1.1  christos 	eor	v5.16b, v7.16b, v18.16b
    811  1.1  christos 	eor	v4.16b, v3.16b, v17.16b
    812  1.1  christos 	eor	v3.16b, v6.16b, v10.16b
    813  1.1  christos 	eor	v7.16b, v21.16b, v20.16b
    814  1.1  christos 	eor	v6.16b, v2.16b, v19.16b
    815  1.1  christos 	eor	v2.16b, v22.16b, v9.16b
    816  1.1  christos 	bne	.Lenc_loop
    817  1.1  christos 	ldr	q28, [x11, #16]!            // load from .LSRM0 on last round (x10 == 0)
    818  1.1  christos 	b	.Lenc_loop
    819  1.1  christos .align	4
    820  1.1  christos .Lenc_done:
    821  1.1  christos 	ushr	v8.2d, v0.2d, #1
    822  1.1  christos 	movi	v9.16b, #0x55
    823  1.1  christos 	ldr	q10, [x9]
    824  1.1  christos 	ushr	v16.2d, v3.2d, #1
    825  1.1  christos 	movi	v17.16b, #0x33
    826  1.1  christos 	ushr	v18.2d, v4.2d, #1
    827  1.1  christos 	movi	v19.16b, #0x0f
    828  1.1  christos 	eor	v8.16b, v8.16b, v1.16b
    829  1.1  christos 	ushr	v20.2d, v2.2d, #1
    830  1.1  christos 	eor	v16.16b, v16.16b, v7.16b
    831  1.1  christos 	eor	v18.16b, v18.16b, v6.16b
    832  1.1  christos 	and	v8.16b, v8.16b, v9.16b
    833  1.1  christos 	eor	v20.16b, v20.16b, v5.16b
    834  1.1  christos 	and	v16.16b, v16.16b, v9.16b
    835  1.1  christos 	and	v18.16b, v18.16b, v9.16b
    836  1.1  christos 	shl	v21.2d, v8.2d, #1
    837  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    838  1.1  christos 	and	v8.16b, v20.16b, v9.16b
    839  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    840  1.1  christos 	shl	v9.2d, v16.2d, #1
    841  1.1  christos 	eor	v6.16b, v6.16b, v18.16b
    842  1.1  christos 	shl	v16.2d, v18.2d, #1
    843  1.1  christos 	eor	v0.16b, v0.16b, v21.16b
    844  1.1  christos 	shl	v18.2d, v8.2d, #1
    845  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
    846  1.1  christos 	eor	v3.16b, v3.16b, v9.16b
    847  1.1  christos 	eor	v4.16b, v4.16b, v16.16b
    848  1.1  christos 	ushr	v8.2d, v1.2d, #2
    849  1.1  christos 	eor	v2.16b, v2.16b, v18.16b
    850  1.1  christos 	ushr	v9.2d, v0.2d, #2
    851  1.1  christos 	ushr	v16.2d, v7.2d, #2
    852  1.1  christos 	ushr	v18.2d, v3.2d, #2
    853  1.1  christos 	eor	v8.16b, v8.16b, v6.16b
    854  1.1  christos 	eor	v9.16b, v9.16b, v4.16b
    855  1.1  christos 	eor	v16.16b, v16.16b, v5.16b
    856  1.1  christos 	eor	v18.16b, v18.16b, v2.16b
    857  1.1  christos 	and	v8.16b, v8.16b, v17.16b
    858  1.1  christos 	and	v9.16b, v9.16b, v17.16b
    859  1.1  christos 	and	v16.16b, v16.16b, v17.16b
    860  1.1  christos 	and	v17.16b, v18.16b, v17.16b
    861  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
    862  1.1  christos 	shl	v8.2d, v8.2d, #2
    863  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
    864  1.1  christos 	shl	v9.2d, v9.2d, #2
    865  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
    866  1.1  christos 	shl	v16.2d, v16.2d, #2
    867  1.1  christos 	eor	v2.16b, v2.16b, v17.16b
    868  1.1  christos 	shl	v17.2d, v17.2d, #2
    869  1.1  christos 	eor	v1.16b, v1.16b, v8.16b
    870  1.1  christos 	eor	v0.16b, v0.16b, v9.16b
    871  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    872  1.1  christos 	eor	v3.16b, v3.16b, v17.16b
    873  1.1  christos 	ushr	v8.2d, v6.2d, #4
    874  1.1  christos 	ushr	v9.2d, v4.2d, #4
    875  1.1  christos 	ushr	v16.2d, v1.2d, #4
    876  1.1  christos 	ushr	v17.2d, v0.2d, #4
    877  1.1  christos 	eor	v8.16b, v8.16b, v5.16b
    878  1.1  christos 	eor	v9.16b, v9.16b, v2.16b
    879  1.1  christos 	eor	v16.16b, v16.16b, v7.16b
    880  1.1  christos 	eor	v17.16b, v17.16b, v3.16b
    881  1.1  christos 	and	v8.16b, v8.16b, v19.16b
    882  1.1  christos 	and	v9.16b, v9.16b, v19.16b
    883  1.1  christos 	and	v16.16b, v16.16b, v19.16b
    884  1.1  christos 	and	v17.16b, v17.16b, v19.16b
    885  1.1  christos 	eor	v5.16b, v5.16b, v8.16b
    886  1.1  christos 	shl	v8.2d, v8.2d, #4
    887  1.1  christos 	eor	v2.16b, v2.16b, v9.16b
    888  1.1  christos 	shl	v9.2d, v9.2d, #4
    889  1.1  christos 	eor	v7.16b, v7.16b, v16.16b
    890  1.1  christos 	shl	v16.2d, v16.2d, #4
    891  1.1  christos 	eor	v3.16b, v3.16b, v17.16b
    892  1.1  christos 	shl	v17.2d, v17.2d, #4
    893  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
    894  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
    895  1.1  christos 	eor	v7.16b, v7.16b, v10.16b
    896  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
    897  1.1  christos 	eor	v3.16b, v3.16b, v10.16b
    898  1.1  christos 	eor	v0.16b, v0.16b, v17.16b
    899  1.1  christos 	eor	v6.16b, v6.16b, v10.16b
    900  1.1  christos 	eor	v4.16b, v4.16b, v10.16b
    901  1.1  christos 	eor	v2.16b, v2.16b, v10.16b
    902  1.1  christos 	eor	v5.16b, v5.16b, v10.16b
    903  1.1  christos 	eor	v1.16b, v1.16b, v10.16b
    904  1.1  christos 	eor	v0.16b, v0.16b, v10.16b
    905  1.1  christos 	ret
    906  1.1  christos .size	_bsaes_encrypt8,.-_bsaes_encrypt8
    907  1.1  christos 
    908  1.1  christos .type	_bsaes_key_convert,%function
    909  1.1  christos .align	4
    910  1.1  christos // On entry:
    911  1.1  christos //   x9 -> input key (big-endian)
    912  1.1  christos //   x10 = number of rounds
    913  1.1  christos //   x17 -> output key (native endianness)
    914  1.1  christos // On exit:
    915  1.1  christos //   x9, x10 corrupted
    916  1.1  christos //   x11 -> .LM0_bigendian
    917  1.1  christos //   x17 -> last quadword of output key
    918  1.1  christos //   other general-purpose registers preserved
    919  1.1  christos //   v2-v6 preserved
    920  1.1  christos //   v7.16b[] = 0x63
    921  1.1  christos //   v8-v14 preserved
    922  1.1  christos //   v15 = last round key (converted to native endianness)
    923  1.1  christos //   other SIMD registers corrupted
    924  1.1  christos _bsaes_key_convert:
    925  1.1  christos #ifdef __AARCH64EL__
    926  1.1  christos 	adrp	x11, .LM0_littleendian
    927  1.1  christos 	add	x11, x11, #:lo12:.LM0_littleendian
    928  1.1  christos #else
    929  1.1  christos 	adrp	x11, .LM0_bigendian
    930  1.1  christos 	add	x11, x11, #:lo12:.LM0_bigendian
    931  1.1  christos #endif
    932  1.1  christos 	ldr	q0, [x9], #16               // load round 0 key
    933  1.1  christos 	ldr	q1, [x11]                   // .LM0
    934  1.1  christos 	ldr	q15, [x9], #16              // load round 1 key
    935  1.1  christos 
    936  1.1  christos 	movi	v7.16b, #0x63               // compose .L63
    937  1.1  christos 	movi	v16.16b, #0x01              // bit masks
    938  1.1  christos 	movi	v17.16b, #0x02
    939  1.1  christos 	movi	v18.16b, #0x04
    940  1.1  christos 	movi	v19.16b, #0x08
    941  1.1  christos 	movi	v20.16b, #0x10
    942  1.1  christos 	movi	v21.16b, #0x20
    943  1.1  christos 	movi	v22.16b, #0x40
    944  1.1  christos 	movi	v23.16b, #0x80
    945  1.1  christos 
    946  1.1  christos #ifdef __AARCH64EL__
    947  1.1  christos 	rev32	v0.16b, v0.16b
    948  1.1  christos #endif
    949  1.1  christos 	sub	x10, x10, #1
    950  1.1  christos 	str	q0, [x17], #16              // save round 0 key
    951  1.1  christos 
    952  1.1  christos .align	4
    953  1.1  christos .Lkey_loop:
    954  1.1  christos 	tbl	v0.16b, {v15.16b}, v1.16b
    955  1.1  christos 	ldr	q15, [x9], #16              // load next round key
    956  1.1  christos 
    957  1.1  christos 	eor	v0.16b, v0.16b, v7.16b
    958  1.1  christos 	cmtst	v24.16b, v0.16b, v16.16b
    959  1.1  christos 	cmtst	v25.16b, v0.16b, v17.16b
    960  1.1  christos 	cmtst	v26.16b, v0.16b, v18.16b
    961  1.1  christos 	cmtst	v27.16b, v0.16b, v19.16b
    962  1.1  christos 	cmtst	v28.16b, v0.16b, v20.16b
    963  1.1  christos 	cmtst	v29.16b, v0.16b, v21.16b
    964  1.1  christos 	cmtst	v30.16b, v0.16b, v22.16b
    965  1.1  christos 	cmtst	v31.16b, v0.16b, v23.16b
    966  1.1  christos 	sub	x10, x10, #1
    967  1.1  christos 	st1	{v24.16b,v25.16b,v26.16b,v27.16b}, [x17], #64 // write bit-sliced round key
    968  1.1  christos 	st1	{v28.16b,v29.16b,v30.16b,v31.16b}, [x17], #64
    969  1.1  christos 	cbnz	x10, .Lkey_loop
    970  1.1  christos 
    971  1.1  christos         // don't save last round key
    972  1.1  christos #ifdef __AARCH64EL__
    973  1.1  christos 	rev32	v15.16b, v15.16b
    974  1.1  christos 	adrp	x11, .LM0_bigendian
    975  1.1  christos 	add	x11, x11, #:lo12:.LM0_bigendian
    976  1.1  christos #endif
    977  1.1  christos 	ret
    978  1.1  christos .size	_bsaes_key_convert,.-_bsaes_key_convert
    979  1.1  christos 
    980  1.1  christos .globl	ossl_bsaes_cbc_encrypt
    981  1.1  christos .type	ossl_bsaes_cbc_encrypt,%function
    982  1.1  christos .align	4
    983  1.1  christos // On entry:
    984  1.1  christos //   x0 -> input ciphertext
    985  1.1  christos //   x1 -> output plaintext
    986  1.1  christos //   x2 = size of ciphertext and plaintext in bytes (assumed a multiple of 16)
    987  1.1  christos //   x3 -> key
    988  1.1  christos //   x4 -> 128-bit initialisation vector (or preceding 128-bit block of ciphertext if continuing after an earlier call)
    989  1.1  christos //   w5 must be == 0
    990  1.1  christos // On exit:
    991  1.1  christos //   Output plaintext filled in
    992  1.1  christos //   Initialisation vector overwritten with last quadword of ciphertext
    993  1.1  christos //   No output registers, usual AAPCS64 register preservation
    994  1.1  christos ossl_bsaes_cbc_encrypt:
    995  1.1  christos 	AARCH64_VALID_CALL_TARGET
    996  1.1  christos 	cmp	x2, #128
    997  1.1  christos 	bhs	.Lcbc_do_bsaes
    998  1.1  christos 	b	AES_cbc_encrypt
    999  1.1  christos .Lcbc_do_bsaes:
   1000  1.1  christos 
   1001  1.1  christos         // it is up to the caller to make sure we are called with enc == 0
   1002  1.1  christos 
   1003  1.1  christos 	stp	x29, x30, [sp, #-48]!
   1004  1.1  christos 	stp	d8, d9, [sp, #16]
   1005  1.1  christos 	stp	d10, d15, [sp, #32]
   1006  1.1  christos 	lsr	x2, x2, #4                  // len in 16 byte blocks
   1007  1.1  christos 
   1008  1.1  christos 	ldr	w15, [x3, #240]             // get # of rounds
   1009  1.1  christos 	mov	x14, sp
   1010  1.1  christos 
   1011  1.1  christos         // allocate the key schedule on the stack
   1012  1.1  christos 	add	x17, sp, #96
   1013  1.1  christos 	sub	x17, x17, x15, lsl #7       // 128 bytes per inner round key, less 96 bytes
   1014  1.1  christos 
   1015  1.1  christos         // populate the key schedule
   1016  1.1  christos 	mov	x9, x3                      // pass key
   1017  1.1  christos 	mov	x10, x15                    // pass # of rounds
   1018  1.1  christos 	mov	sp, x17                     // sp is sp
   1019  1.1  christos 	bl	_bsaes_key_convert
   1020  1.1  christos 	ldr	q6,  [sp]
   1021  1.1  christos 	str	q15, [x17]                  // save last round key
   1022  1.1  christos 	eor	v6.16b, v6.16b, v7.16b      // fix up round 0 key (by XORing with 0x63)
   1023  1.1  christos 	str	q6, [sp]
   1024  1.1  christos 
   1025  1.1  christos 	ldr	q15, [x4]                   // load IV
   1026  1.1  christos 	b	.Lcbc_dec_loop
   1027  1.1  christos 
   1028  1.1  christos .align	4
   1029  1.1  christos .Lcbc_dec_loop:
   1030  1.1  christos 	subs	x2, x2, #0x8
   1031  1.1  christos 	bmi	.Lcbc_dec_loop_finish
   1032  1.1  christos 
   1033  1.1  christos 	ldr	q0, [x0], #16               // load input
   1034  1.1  christos 	mov	x9, sp                      // pass the key
   1035  1.1  christos 	ldr	q1, [x0], #16
   1036  1.1  christos 	mov	x10, x15
   1037  1.1  christos 	ldr	q2, [x0], #16
   1038  1.1  christos 	ldr	q3, [x0], #16
   1039  1.1  christos 	ldr	q4, [x0], #16
   1040  1.1  christos 	ldr	q5, [x0], #16
   1041  1.1  christos 	ldr	q6, [x0], #16
   1042  1.1  christos 	ldr	q7, [x0], #-7*16
   1043  1.1  christos 
   1044  1.1  christos 	bl	_bsaes_decrypt8
   1045  1.1  christos 
   1046  1.1  christos 	ldr	q16, [x0], #16              // reload input
   1047  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1048  1.1  christos 	eor	v1.16b, v1.16b, v16.16b
   1049  1.1  christos 	str	q0, [x1], #16               // write output
   1050  1.1  christos 	ldr	q0, [x0], #16
   1051  1.1  christos 	str	q1, [x1], #16
   1052  1.1  christos 	ldr	q1, [x0], #16
   1053  1.1  christos 	eor	v1.16b, v4.16b, v1.16b
   1054  1.1  christos 	ldr	q4, [x0], #16
   1055  1.1  christos 	eor	v2.16b, v2.16b, v4.16b
   1056  1.1  christos 	eor	v0.16b, v6.16b, v0.16b
   1057  1.1  christos 	ldr	q4, [x0], #16
   1058  1.1  christos 	str	q0, [x1], #16
   1059  1.1  christos 	str	q1, [x1], #16
   1060  1.1  christos 	eor	v0.16b, v7.16b, v4.16b
   1061  1.1  christos 	ldr	q1, [x0], #16
   1062  1.1  christos 	str	q2, [x1], #16
   1063  1.1  christos 	ldr	q2, [x0], #16
   1064  1.1  christos 	ldr	q15, [x0], #16
   1065  1.1  christos 	str	q0, [x1], #16
   1066  1.1  christos 	eor	v0.16b, v5.16b, v2.16b
   1067  1.1  christos 	eor	v1.16b, v3.16b, v1.16b
   1068  1.1  christos 	str	q1, [x1], #16
   1069  1.1  christos 	str	q0, [x1], #16
   1070  1.1  christos 
   1071  1.1  christos 	b	.Lcbc_dec_loop
   1072  1.1  christos 
   1073  1.1  christos .Lcbc_dec_loop_finish:
   1074  1.1  christos 	adds	x2, x2, #8
   1075  1.1  christos 	beq	.Lcbc_dec_done
   1076  1.1  christos 
   1077  1.1  christos 	ldr	q0, [x0], #16               // load input
   1078  1.1  christos 	cmp	x2, #2
   1079  1.1  christos 	blo	.Lcbc_dec_one
   1080  1.1  christos 	ldr	q1, [x0], #16
   1081  1.1  christos 	mov	x9, sp                      // pass the key
   1082  1.1  christos 	mov	x10, x15
   1083  1.1  christos 	beq	.Lcbc_dec_two
   1084  1.1  christos 	ldr	q2, [x0], #16
   1085  1.1  christos 	cmp	x2, #4
   1086  1.1  christos 	blo	.Lcbc_dec_three
   1087  1.1  christos 	ldr	q3, [x0], #16
   1088  1.1  christos 	beq	.Lcbc_dec_four
   1089  1.1  christos 	ldr	q4, [x0], #16
   1090  1.1  christos 	cmp	x2, #6
   1091  1.1  christos 	blo	.Lcbc_dec_five
   1092  1.1  christos 	ldr	q5, [x0], #16
   1093  1.1  christos 	beq	.Lcbc_dec_six
   1094  1.1  christos 	ldr	q6, [x0], #-6*16
   1095  1.1  christos 
   1096  1.1  christos 	bl	_bsaes_decrypt8
   1097  1.1  christos 
   1098  1.1  christos 	ldr	q5, [x0], #16               // reload input
   1099  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1100  1.1  christos 	ldr	q8, [x0], #16
   1101  1.1  christos 	ldr	q9, [x0], #16
   1102  1.1  christos 	ldr	q10, [x0], #16
   1103  1.1  christos 	str	q0, [x1], #16               // write output
   1104  1.1  christos 	ldr	q0, [x0], #16
   1105  1.1  christos 	eor	v1.16b, v1.16b, v5.16b
   1106  1.1  christos 	ldr	q5, [x0], #16
   1107  1.1  christos 	eor	v6.16b, v6.16b, v8.16b
   1108  1.1  christos 	ldr	q15, [x0]
   1109  1.1  christos 	eor	v4.16b, v4.16b, v9.16b
   1110  1.1  christos 	eor	v2.16b, v2.16b, v10.16b
   1111  1.1  christos 	str	q1, [x1], #16
   1112  1.1  christos 	eor	v0.16b, v7.16b, v0.16b
   1113  1.1  christos 	str	q6, [x1], #16
   1114  1.1  christos 	eor	v1.16b, v3.16b, v5.16b
   1115  1.1  christos 	str	q4, [x1], #16
   1116  1.1  christos 	str	q2, [x1], #16
   1117  1.1  christos 	str	q0, [x1], #16
   1118  1.1  christos 	str	q1, [x1]
   1119  1.1  christos 	b	.Lcbc_dec_done
   1120  1.1  christos .align	4
   1121  1.1  christos .Lcbc_dec_six:
   1122  1.1  christos 	sub	x0, x0, #0x60
   1123  1.1  christos 	bl	_bsaes_decrypt8
   1124  1.1  christos 	ldr	q3, [x0], #16               // reload input
   1125  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1126  1.1  christos 	ldr	q5, [x0], #16
   1127  1.1  christos 	ldr	q8, [x0], #16
   1128  1.1  christos 	ldr	q9, [x0], #16
   1129  1.1  christos 	str	q0, [x1], #16               // write output
   1130  1.1  christos 	ldr	q0, [x0], #16
   1131  1.1  christos 	eor	v1.16b, v1.16b, v3.16b
   1132  1.1  christos 	ldr	q15, [x0]
   1133  1.1  christos 	eor	v3.16b, v6.16b, v5.16b
   1134  1.1  christos 	eor	v4.16b, v4.16b, v8.16b
   1135  1.1  christos 	eor	v2.16b, v2.16b, v9.16b
   1136  1.1  christos 	str	q1, [x1], #16
   1137  1.1  christos 	eor	v0.16b, v7.16b, v0.16b
   1138  1.1  christos 	str	q3, [x1], #16
   1139  1.1  christos 	str	q4, [x1], #16
   1140  1.1  christos 	str	q2, [x1], #16
   1141  1.1  christos 	str	q0, [x1]
   1142  1.1  christos 	b	.Lcbc_dec_done
   1143  1.1  christos .align	4
   1144  1.1  christos .Lcbc_dec_five:
   1145  1.1  christos 	sub	x0, x0, #0x50
   1146  1.1  christos 	bl	_bsaes_decrypt8
   1147  1.1  christos 	ldr	q3, [x0], #16               // reload input
   1148  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1149  1.1  christos 	ldr	q5, [x0], #16
   1150  1.1  christos 	ldr	q7, [x0], #16
   1151  1.1  christos 	ldr	q8, [x0], #16
   1152  1.1  christos 	str	q0, [x1], #16               // write output
   1153  1.1  christos 	ldr	q15, [x0]
   1154  1.1  christos 	eor	v0.16b, v1.16b, v3.16b
   1155  1.1  christos 	eor	v1.16b, v6.16b, v5.16b
   1156  1.1  christos 	eor	v3.16b, v4.16b, v7.16b
   1157  1.1  christos 	str	q0, [x1], #16
   1158  1.1  christos 	eor	v0.16b, v2.16b, v8.16b
   1159  1.1  christos 	str	q1, [x1], #16
   1160  1.1  christos 	str	q3, [x1], #16
   1161  1.1  christos 	str	q0, [x1]
   1162  1.1  christos 	b	.Lcbc_dec_done
   1163  1.1  christos .align	4
   1164  1.1  christos .Lcbc_dec_four:
   1165  1.1  christos 	sub	x0, x0, #0x40
   1166  1.1  christos 	bl	_bsaes_decrypt8
   1167  1.1  christos 	ldr	q2, [x0], #16               // reload input
   1168  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1169  1.1  christos 	ldr	q3, [x0], #16
   1170  1.1  christos 	ldr	q5, [x0], #16
   1171  1.1  christos 	str	q0, [x1], #16               // write output
   1172  1.1  christos 	ldr	q15, [x0]
   1173  1.1  christos 	eor	v0.16b, v1.16b, v2.16b
   1174  1.1  christos 	eor	v1.16b, v6.16b, v3.16b
   1175  1.1  christos 	eor	v2.16b, v4.16b, v5.16b
   1176  1.1  christos 	str	q0, [x1], #16
   1177  1.1  christos 	str	q1, [x1], #16
   1178  1.1  christos 	str	q2, [x1]
   1179  1.1  christos 	b	.Lcbc_dec_done
   1180  1.1  christos .align	4
   1181  1.1  christos .Lcbc_dec_three:
   1182  1.1  christos 	sub	x0, x0, #0x30
   1183  1.1  christos 	bl	_bsaes_decrypt8
   1184  1.1  christos 	ldr	q2, [x0], #16               // reload input
   1185  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1186  1.1  christos 	ldr	q3, [x0], #16
   1187  1.1  christos 	ldr	q15, [x0]
   1188  1.1  christos 	str	q0, [x1], #16               // write output
   1189  1.1  christos 	eor	v0.16b, v1.16b, v2.16b
   1190  1.1  christos 	eor	v1.16b, v6.16b, v3.16b
   1191  1.1  christos 	str	q0, [x1], #16
   1192  1.1  christos 	str	q1, [x1]
   1193  1.1  christos 	b	.Lcbc_dec_done
   1194  1.1  christos .align	4
   1195  1.1  christos .Lcbc_dec_two:
   1196  1.1  christos 	sub	x0, x0, #0x20
   1197  1.1  christos 	bl	_bsaes_decrypt8
   1198  1.1  christos 	ldr	q2, [x0], #16               // reload input
   1199  1.1  christos 	eor	v0.16b, v0.16b, v15.16b     // ^= IV
   1200  1.1  christos 	ldr	q15, [x0]
   1201  1.1  christos 	str	q0, [x1], #16               // write output
   1202  1.1  christos 	eor	v0.16b, v1.16b, v2.16b
   1203  1.1  christos 	str	q0, [x1]
   1204  1.1  christos 	b	.Lcbc_dec_done
   1205  1.1  christos .align	4
   1206  1.1  christos .Lcbc_dec_one:
   1207  1.1  christos 	sub	x0, x0, #0x10
   1208  1.1  christos 	stp	x1, x4, [sp, #-32]!
   1209  1.1  christos 	str	x14, [sp, #16]
   1210  1.1  christos 	mov	v8.16b, v15.16b
   1211  1.1  christos 	mov	v15.16b, v0.16b
   1212  1.1  christos 	mov	x2, x3
   1213  1.1  christos 	bl	AES_decrypt
   1214  1.1  christos 	ldr	x14, [sp, #16]
   1215  1.1  christos 	ldp	x1, x4, [sp], #32
   1216  1.1  christos 	ldr	q0, [x1]                    // load result
   1217  1.1  christos 	eor	v0.16b, v0.16b, v8.16b      // ^= IV
   1218  1.1  christos 	str	q0, [x1]                    // write output
   1219  1.1  christos 
   1220  1.1  christos .align	4
   1221  1.1  christos .Lcbc_dec_done:
   1222  1.1  christos 	movi	v0.16b, #0
   1223  1.1  christos 	movi	v1.16b, #0
   1224  1.1  christos .Lcbc_dec_bzero:	//	wipe key schedule [if any]
   1225  1.1  christos 	stp	q0, q1, [sp], #32
   1226  1.1  christos 	cmp	sp, x14
   1227  1.1  christos 	bne	.Lcbc_dec_bzero
   1228  1.1  christos 	str	q15, [x4]                   // return IV
   1229  1.1  christos 	ldp	d8, d9, [sp, #16]
   1230  1.1  christos 	ldp	d10, d15, [sp, #32]
   1231  1.1  christos 	ldp	x29, x30, [sp], #48
   1232  1.1  christos 	ret
   1233  1.1  christos .size	ossl_bsaes_cbc_encrypt,.-ossl_bsaes_cbc_encrypt
   1234  1.1  christos 
   1235  1.1  christos .globl	ossl_bsaes_ctr32_encrypt_blocks
   1236  1.1  christos .type	ossl_bsaes_ctr32_encrypt_blocks,%function
   1237  1.1  christos .align	4
   1238  1.1  christos // On entry:
   1239  1.1  christos //   x0 -> input text (whole 16-byte blocks)
   1240  1.1  christos //   x1 -> output text (whole 16-byte blocks)
   1241  1.1  christos //   x2 = number of 16-byte blocks to encrypt/decrypt (> 0)
   1242  1.1  christos //   x3 -> key
   1243  1.1  christos //   x4 -> initial value of 128-bit counter (stored big-endian) which increments, modulo 2^32, for each block
   1244  1.1  christos // On exit:
   1245  1.1  christos //   Output text filled in
   1246  1.1  christos //   No output registers, usual AAPCS64 register preservation
   1247  1.1  christos ossl_bsaes_ctr32_encrypt_blocks:
   1248  1.1  christos 	AARCH64_VALID_CALL_TARGET
   1249  1.1  christos 	cmp	x2, #8                      // use plain AES for
   1250  1.1  christos 	blo	.Lctr_enc_short             // small sizes
   1251  1.1  christos 
   1252  1.1  christos 	stp	x29, x30, [sp, #-80]!
   1253  1.1  christos 	stp	d8, d9, [sp, #16]
   1254  1.1  christos 	stp	d10, d11, [sp, #32]
   1255  1.1  christos 	stp	d12, d13, [sp, #48]
   1256  1.1  christos 	stp	d14, d15, [sp, #64]
   1257  1.1  christos 
   1258  1.1  christos 	ldr	w15, [x3, #240]             // get # of rounds
   1259  1.1  christos 	mov	x14, sp
   1260  1.1  christos 
   1261  1.1  christos         // allocate the key schedule on the stack
   1262  1.1  christos 	add	x17, sp, #96
   1263  1.1  christos 	sub	x17, x17, x15, lsl #7       // 128 bytes per inner round key, less 96 bytes
   1264  1.1  christos 
   1265  1.1  christos         // populate the key schedule
   1266  1.1  christos 	mov	x9, x3                      // pass key
   1267  1.1  christos 	mov	x10, x15                    // pass # of rounds
   1268  1.1  christos 	mov	sp, x17                     // sp is sp
   1269  1.1  christos 	bl	_bsaes_key_convert
   1270  1.1  christos 	eor	v7.16b, v7.16b, v15.16b     // fix up last round key
   1271  1.1  christos 	str	q7, [x17]                   // save last round key
   1272  1.1  christos 
   1273  1.1  christos 	ldr	q0, [x4]                    // load counter
   1274  1.1  christos 	add	x13, x11, #.LREVM0SR-.LM0_bigendian
   1275  1.1  christos 	ldr	q4, [sp]                    // load round0 key
   1276  1.1  christos 
   1277  1.1  christos 	movi	v8.4s, #1                   // compose 1<<96
   1278  1.1  christos 	movi	v9.16b, #0
   1279  1.1  christos 	rev32	v15.16b, v0.16b
   1280  1.1  christos 	rev32	v0.16b, v0.16b
   1281  1.1  christos 	ext	v11.16b, v9.16b, v8.16b, #4
   1282  1.1  christos 	rev32	v4.16b, v4.16b
   1283  1.1  christos 	add	v12.4s, v11.4s, v11.4s      // compose 2<<96
   1284  1.1  christos 	str	q4, [sp]                    // save adjusted round0 key
   1285  1.1  christos 	add	v13.4s, v11.4s, v12.4s      // compose 3<<96
   1286  1.1  christos 	add	v14.4s, v12.4s, v12.4s      // compose 4<<96
   1287  1.1  christos 	b	.Lctr_enc_loop
   1288  1.1  christos 
   1289  1.1  christos .align	4
   1290  1.1  christos .Lctr_enc_loop:
   1291  1.1  christos         // Intermix prologue from _bsaes_encrypt8 to use the opportunity
   1292  1.1  christos         // to flip byte order in 32-bit counter
   1293  1.1  christos 
   1294  1.1  christos 	add	v1.4s, v15.4s, v11.4s       // +1
   1295  1.1  christos 	add	x9, sp, #0x10               // pass next round key
   1296  1.1  christos 	add	v2.4s, v15.4s, v12.4s       // +2
   1297  1.1  christos 	ldr	q9, [x13]                   // .LREVM0SR
   1298  1.1  christos 	ldr	q8, [sp]                    // load round0 key
   1299  1.1  christos 	add	v3.4s, v15.4s, v13.4s       // +3
   1300  1.1  christos 	mov	x10, x15                    // pass rounds
   1301  1.1  christos 	sub	x11, x13, #.LREVM0SR-.LSR   // pass constants
   1302  1.1  christos 	add	v6.4s, v2.4s, v14.4s
   1303  1.1  christos 	add	v4.4s, v15.4s, v14.4s       // +4
   1304  1.1  christos 	add	v7.4s, v3.4s, v14.4s
   1305  1.1  christos 	add	v15.4s, v4.4s, v14.4s       // next counter
   1306  1.1  christos 	add	v5.4s, v1.4s, v14.4s
   1307  1.1  christos 
   1308  1.1  christos 	bl	_bsaes_encrypt8_alt
   1309  1.1  christos 
   1310  1.1  christos 	subs	x2, x2, #8
   1311  1.1  christos 	blo	.Lctr_enc_loop_done
   1312  1.1  christos 
   1313  1.1  christos 	ldr	q16, [x0], #16
   1314  1.1  christos 	ldr	q17, [x0], #16
   1315  1.1  christos 	eor	v1.16b, v1.16b, v17.16b
   1316  1.1  christos 	ldr	q17, [x0], #16
   1317  1.1  christos 	eor	v0.16b, v0.16b, v16.16b
   1318  1.1  christos 	eor	v4.16b, v4.16b, v17.16b
   1319  1.1  christos 	str	q0, [x1], #16
   1320  1.1  christos 	ldr	q16, [x0], #16
   1321  1.1  christos 	str	q1, [x1], #16
   1322  1.1  christos 	mov	v0.16b, v15.16b
   1323  1.1  christos 	str	q4, [x1], #16
   1324  1.1  christos 	ldr	q1, [x0], #16
   1325  1.1  christos 	eor	v4.16b, v6.16b, v16.16b
   1326  1.1  christos 	eor	v1.16b, v3.16b, v1.16b
   1327  1.1  christos 	ldr	q3, [x0], #16
   1328  1.1  christos 	eor	v3.16b, v7.16b, v3.16b
   1329  1.1  christos 	ldr	q6, [x0], #16
   1330  1.1  christos 	eor	v2.16b, v2.16b, v6.16b
   1331  1.1  christos 	ldr	q6, [x0], #16
   1332  1.1  christos 	eor	v5.16b, v5.16b, v6.16b
   1333  1.1  christos 	str	q4, [x1], #16
   1334  1.1  christos 	str	q1, [x1], #16
   1335  1.1  christos 	str	q3, [x1], #16
   1336  1.1  christos 	str	q2, [x1], #16
   1337  1.1  christos 	str	q5, [x1], #16
   1338  1.1  christos 
   1339  1.1  christos 	bne	.Lctr_enc_loop
   1340  1.1  christos 	b	.Lctr_enc_done
   1341  1.1  christos 
   1342  1.1  christos .align	4
   1343  1.1  christos .Lctr_enc_loop_done:
   1344  1.1  christos 	add	x2, x2, #8
   1345  1.1  christos 	ldr	q16, [x0], #16              // load input
   1346  1.1  christos 	eor	v0.16b, v0.16b, v16.16b
   1347  1.1  christos 	str	q0, [x1], #16               // write output
   1348  1.1  christos 	cmp	x2, #2
   1349  1.1  christos 	blo	.Lctr_enc_done
   1350  1.1  christos 	ldr	q17, [x0], #16
   1351  1.1  christos 	eor	v1.16b, v1.16b, v17.16b
   1352  1.1  christos 	str	q1, [x1], #16
   1353  1.1  christos 	beq	.Lctr_enc_done
   1354  1.1  christos 	ldr	q18, [x0], #16
   1355  1.1  christos 	eor	v4.16b, v4.16b, v18.16b
   1356  1.1  christos 	str	q4, [x1], #16
   1357  1.1  christos 	cmp	x2, #4
   1358  1.1  christos 	blo	.Lctr_enc_done
   1359  1.1  christos 	ldr	q19, [x0], #16
   1360  1.1  christos 	eor	v6.16b, v6.16b, v19.16b
   1361  1.1  christos 	str	q6, [x1], #16
   1362  1.1  christos 	beq	.Lctr_enc_done
   1363  1.1  christos 	ldr	q20, [x0], #16
   1364  1.1  christos 	eor	v3.16b, v3.16b, v20.16b
   1365  1.1  christos 	str	q3, [x1], #16
   1366  1.1  christos 	cmp	x2, #6
   1367  1.1  christos 	blo	.Lctr_enc_done
   1368  1.1  christos 	ldr	q21, [x0], #16
   1369  1.1  christos 	eor	v7.16b, v7.16b, v21.16b
   1370  1.1  christos 	str	q7, [x1], #16
   1371  1.1  christos 	beq	.Lctr_enc_done
   1372  1.1  christos 	ldr	q22, [x0]
   1373  1.1  christos 	eor	v2.16b, v2.16b, v22.16b
   1374  1.1  christos 	str	q2, [x1], #16
   1375  1.1  christos 
   1376  1.1  christos .Lctr_enc_done:
   1377  1.1  christos 	movi	v0.16b, #0
   1378  1.1  christos 	movi	v1.16b, #0
   1379  1.1  christos .Lctr_enc_bzero:	//	wipe key schedule [if any]
   1380  1.1  christos 	stp	q0, q1, [sp], #32
   1381  1.1  christos 	cmp	sp, x14
   1382  1.1  christos 	bne	.Lctr_enc_bzero
   1383  1.1  christos 
   1384  1.1  christos 	ldp	d8, d9, [sp, #16]
   1385  1.1  christos 	ldp	d10, d11, [sp, #32]
   1386  1.1  christos 	ldp	d12, d13, [sp, #48]
   1387  1.1  christos 	ldp	d14, d15, [sp, #64]
   1388  1.1  christos 	ldp	x29, x30, [sp], #80
   1389  1.1  christos 	ret
   1390  1.1  christos 
   1391  1.1  christos .Lctr_enc_short:
   1392  1.1  christos 	stp	x29, x30, [sp, #-96]!
   1393  1.1  christos 	stp	x19, x20, [sp, #16]
   1394  1.1  christos 	stp	x21, x22, [sp, #32]
   1395  1.1  christos 	str	x23, [sp, #48]
   1396  1.1  christos 
   1397  1.1  christos 	mov	x19, x0                     // copy arguments
   1398  1.1  christos 	mov	x20, x1
   1399  1.1  christos 	mov	x21, x2
   1400  1.1  christos 	mov	x22, x3
   1401  1.1  christos 	ldr	w23, [x4, #12]              // load counter .LSW
   1402  1.1  christos 	ldr	q1, [x4]                    // load whole counter value
   1403  1.1  christos #ifdef __AARCH64EL__
   1404  1.1  christos 	rev	w23, w23
   1405  1.1  christos #endif
   1406  1.1  christos 	str	q1, [sp, #80]               // copy counter value
   1407  1.1  christos 
   1408  1.1  christos .Lctr_enc_short_loop:
   1409  1.1  christos 	add	x0, sp, #80                 // input counter value
   1410  1.1  christos 	add	x1, sp, #64                 // output on the stack
   1411  1.1  christos 	mov	x2, x22                     // key
   1412  1.1  christos 
   1413  1.1  christos 	bl	AES_encrypt
   1414  1.1  christos 
   1415  1.1  christos 	ldr	q0, [x19], #16              // load input
   1416  1.1  christos 	ldr	q1, [sp, #64]               // load encrypted counter
   1417  1.1  christos 	add	x23, x23, #1
   1418  1.1  christos #ifdef __AARCH64EL__
   1419  1.1  christos 	rev	w0, w23
   1420  1.1  christos 	str	w0, [sp, #80+12]            // next counter value
   1421  1.1  christos #else
   1422  1.1  christos 	str	w23, [sp, #80+12]           // next counter value
   1423  1.1  christos #endif
   1424  1.1  christos 	eor	v0.16b, v0.16b, v1.16b
   1425  1.1  christos 	str	q0, [x20], #16              // store output
   1426  1.1  christos 	subs	x21, x21, #1
   1427  1.1  christos 	bne	.Lctr_enc_short_loop
   1428  1.1  christos 
   1429  1.1  christos 	movi	v0.16b, #0
   1430  1.1  christos 	movi	v1.16b, #0
   1431  1.1  christos 	stp	q0, q1, [sp, #64]
   1432  1.1  christos 
   1433  1.1  christos 	ldr	x23, [sp, #48]
   1434  1.1  christos 	ldp	x21, x22, [sp, #32]
   1435  1.1  christos 	ldp	x19, x20, [sp, #16]
   1436  1.1  christos 	ldp	x29, x30, [sp], #96
   1437  1.1  christos 	ret
   1438  1.1  christos .size	ossl_bsaes_ctr32_encrypt_blocks,.-ossl_bsaes_ctr32_encrypt_blocks
   1439  1.1  christos 
   1440  1.1  christos .globl	ossl_bsaes_xts_encrypt
   1441  1.1  christos .type	ossl_bsaes_xts_encrypt,%function
   1442  1.1  christos .align	4
   1443  1.1  christos // On entry:
   1444  1.1  christos //   x0 -> input plaintext
   1445  1.1  christos //   x1 -> output ciphertext
   1446  1.1  christos //   x2 -> length of text in bytes (must be at least 16)
   1447  1.1  christos //   x3 -> key1 (used to encrypt the XORed plaintext blocks)
   1448  1.1  christos //   x4 -> key2 (used to encrypt the initial vector to yield the initial tweak)
   1449  1.1  christos //   x5 -> 16-byte initial vector (typically, sector number)
   1450  1.1  christos // On exit:
   1451  1.1  christos //   Output ciphertext filled in
   1452  1.1  christos //   No output registers, usual AAPCS64 register preservation
   1453  1.1  christos ossl_bsaes_xts_encrypt:
   1454  1.1  christos 	AARCH64_VALID_CALL_TARGET
   1455  1.1  christos         // Stack layout:
   1456  1.1  christos         // sp ->
   1457  1.1  christos         //        nrounds*128-96 bytes: key schedule
   1458  1.1  christos         // x19 ->
   1459  1.1  christos         //        16 bytes: frame record
   1460  1.1  christos         //        4*16 bytes: tweak storage across _bsaes_encrypt8
   1461  1.1  christos         //        6*8 bytes: storage for 5 callee-saved general-purpose registers
   1462  1.1  christos         //        8*8 bytes: storage for 8 callee-saved SIMD registers
   1463  1.1  christos 	stp	x29, x30, [sp, #-192]!
   1464  1.1  christos 	stp	x19, x20, [sp, #80]
   1465  1.1  christos 	stp	x21, x22, [sp, #96]
   1466  1.1  christos 	str	x23, [sp, #112]
   1467  1.1  christos 	stp	d8, d9, [sp, #128]
   1468  1.1  christos 	stp	d10, d11, [sp, #144]
   1469  1.1  christos 	stp	d12, d13, [sp, #160]
   1470  1.1  christos 	stp	d14, d15, [sp, #176]
   1471  1.1  christos 
   1472  1.1  christos 	mov	x19, sp
   1473  1.1  christos 	mov	x20, x0
   1474  1.1  christos 	mov	x21, x1
   1475  1.1  christos 	mov	x22, x2
   1476  1.1  christos 	mov	x23, x3
   1477  1.1  christos 
   1478  1.1  christos         // generate initial tweak
   1479  1.1  christos 	sub	sp, sp, #16
   1480  1.1  christos 	mov	x0, x5                      // iv[]
   1481  1.1  christos 	mov	x1, sp
   1482  1.1  christos 	mov	x2, x4                      // key2
   1483  1.1  christos 	bl	AES_encrypt
   1484  1.1  christos 	ldr	q11, [sp], #16
   1485  1.1  christos 
   1486  1.1  christos 	ldr	w1, [x23, #240]             // get # of rounds
   1487  1.1  christos         // allocate the key schedule on the stack
   1488  1.1  christos 	add	x17, sp, #96
   1489  1.1  christos 	sub	x17, x17, x1, lsl #7        // 128 bytes per inner round key, less 96 bytes
   1490  1.1  christos 
   1491  1.1  christos         // populate the key schedule
   1492  1.1  christos 	mov	x9, x23                     // pass key
   1493  1.1  christos 	mov	x10, x1                     // pass # of rounds
   1494  1.1  christos 	mov	sp, x17
   1495  1.1  christos 	bl	_bsaes_key_convert
   1496  1.1  christos 	eor	v15.16b, v15.16b, v7.16b    // fix up last round key
   1497  1.1  christos 	str	q15, [x17]                  // save last round key
   1498  1.1  christos 
   1499  1.1  christos 	subs	x22, x22, #0x80
   1500  1.1  christos 	blo	.Lxts_enc_short
   1501  1.1  christos 	b	.Lxts_enc_loop
   1502  1.1  christos 
   1503  1.1  christos .align	4
   1504  1.1  christos .Lxts_enc_loop:
   1505  1.1  christos 	ldr	q8, .Lxts_magic
   1506  1.1  christos 	mov	x10, x1                     // pass rounds
   1507  1.1  christos 	add	x2, x19, #16
   1508  1.1  christos 	ldr	q0, [x20], #16
   1509  1.1  christos 	sshr	v1.2d, v11.2d, #63
   1510  1.1  christos 	mov	x9, sp                      // pass key schedule
   1511  1.1  christos 	ldr	q6, .Lxts_magic+16
   1512  1.1  christos 	add	v2.2d, v11.2d, v11.2d
   1513  1.1  christos 	cmtst	v3.2d, v11.2d, v6.2d
   1514  1.1  christos 	and	v1.16b, v1.16b, v8.16b
   1515  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
   1516  1.1  christos 	and	v3.16b, v3.16b, v8.16b
   1517  1.1  christos 	ldr	q4, [x20], #16
   1518  1.1  christos 	eor	v12.16b, v2.16b, v1.16b
   1519  1.1  christos 	eor	v1.16b, v4.16b, v12.16b
   1520  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1521  1.1  christos 	cmtst	v2.2d, v12.2d, v6.2d
   1522  1.1  christos 	add	v4.2d, v12.2d, v12.2d
   1523  1.1  christos 	add	x0, x19, #16
   1524  1.1  christos 	ext	v3.16b, v3.16b, v3.16b, #8
   1525  1.1  christos 	and	v2.16b, v2.16b, v8.16b
   1526  1.1  christos 	eor	v13.16b, v4.16b, v3.16b
   1527  1.1  christos 	ldr	q3, [x20], #16
   1528  1.1  christos 	ext	v4.16b, v2.16b, v2.16b, #8
   1529  1.1  christos 	eor	v2.16b, v3.16b, v13.16b
   1530  1.1  christos 	ldr	q3, [x20], #16
   1531  1.1  christos 	add	v5.2d, v13.2d, v13.2d
   1532  1.1  christos 	cmtst	v7.2d, v13.2d, v6.2d
   1533  1.1  christos 	and	v7.16b, v7.16b, v8.16b
   1534  1.1  christos 	ldr	q9, [x20], #16
   1535  1.1  christos 	ext	v7.16b, v7.16b, v7.16b, #8
   1536  1.1  christos 	ldr	q10, [x20], #16
   1537  1.1  christos 	eor	v14.16b, v5.16b, v4.16b
   1538  1.1  christos 	ldr	q16, [x20], #16
   1539  1.1  christos 	add	v4.2d, v14.2d, v14.2d
   1540  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   1541  1.1  christos 	eor	v15.16b, v4.16b, v7.16b
   1542  1.1  christos 	add	v5.2d, v15.2d, v15.2d
   1543  1.1  christos 	ldr	q7, [x20], #16
   1544  1.1  christos 	cmtst	v4.2d, v14.2d, v6.2d
   1545  1.1  christos 	and	v17.16b, v4.16b, v8.16b
   1546  1.1  christos 	cmtst	v18.2d, v15.2d, v6.2d
   1547  1.1  christos 	eor	v4.16b, v9.16b, v15.16b
   1548  1.1  christos 	ext	v9.16b, v17.16b, v17.16b, #8
   1549  1.1  christos 	eor	v9.16b, v5.16b, v9.16b
   1550  1.1  christos 	add	v17.2d, v9.2d, v9.2d
   1551  1.1  christos 	and	v18.16b, v18.16b, v8.16b
   1552  1.1  christos 	eor	v5.16b, v10.16b, v9.16b
   1553  1.1  christos 	str	q9, [x2], #16
   1554  1.1  christos 	ext	v10.16b, v18.16b, v18.16b, #8
   1555  1.1  christos 	cmtst	v9.2d, v9.2d, v6.2d
   1556  1.1  christos 	and	v9.16b, v9.16b, v8.16b
   1557  1.1  christos 	eor	v10.16b, v17.16b, v10.16b
   1558  1.1  christos 	cmtst	v17.2d, v10.2d, v6.2d
   1559  1.1  christos 	eor	v6.16b, v16.16b, v10.16b
   1560  1.1  christos 	str	q10, [x2], #16
   1561  1.1  christos 	ext	v9.16b, v9.16b, v9.16b, #8
   1562  1.1  christos 	add	v10.2d, v10.2d, v10.2d
   1563  1.1  christos 	eor	v9.16b, v10.16b, v9.16b
   1564  1.1  christos 	str	q9, [x2], #16
   1565  1.1  christos 	eor	v7.16b, v7.16b, v9.16b
   1566  1.1  christos 	add	v9.2d, v9.2d, v9.2d
   1567  1.1  christos 	and	v8.16b, v17.16b, v8.16b
   1568  1.1  christos 	ext	v8.16b, v8.16b, v8.16b, #8
   1569  1.1  christos 	eor	v8.16b, v9.16b, v8.16b
   1570  1.1  christos 	str	q8, [x2]                    // next round tweak
   1571  1.1  christos 
   1572  1.1  christos 	bl	_bsaes_encrypt8
   1573  1.1  christos 
   1574  1.1  christos 	ldr	q8, [x0], #16
   1575  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1576  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1577  1.1  christos 	ldr	q9, [x0], #16
   1578  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1579  1.1  christos 	eor	v6.16b, v6.16b, v14.16b
   1580  1.1  christos 	ldr	q10, [x0], #16
   1581  1.1  christos 	eor	v3.16b, v3.16b, v15.16b
   1582  1.1  christos 	subs	x22, x22, #0x80
   1583  1.1  christos 	str	q0, [x21], #16
   1584  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   1585  1.1  christos 	str	q1, [x21], #16
   1586  1.1  christos 	eor	v0.16b, v7.16b, v8.16b
   1587  1.1  christos 	eor	v1.16b, v2.16b, v9.16b
   1588  1.1  christos 	str	q4, [x21], #16
   1589  1.1  christos 	eor	v2.16b, v5.16b, v10.16b
   1590  1.1  christos 	str	q6, [x21], #16
   1591  1.1  christos 	str	q3, [x21], #16
   1592  1.1  christos 	str	q0, [x21], #16
   1593  1.1  christos 	str	q1, [x21], #16
   1594  1.1  christos 	str	q2, [x21], #16
   1595  1.1  christos 	bpl	.Lxts_enc_loop
   1596  1.1  christos 
   1597  1.1  christos .Lxts_enc_short:
   1598  1.1  christos 	adds	x22, x22, #0x70
   1599  1.1  christos 	bmi	.Lxts_enc_done
   1600  1.1  christos 
   1601  1.1  christos 	ldr	q8, .Lxts_magic
   1602  1.1  christos 	sshr	v1.2d, v11.2d, #63
   1603  1.1  christos 	add	v2.2d, v11.2d, v11.2d
   1604  1.1  christos 	ldr	q9, .Lxts_magic+16
   1605  1.1  christos 	subs	x22, x22, #0x10
   1606  1.1  christos 	ldr	q0, [x20], #16
   1607  1.1  christos 	and	v1.16b, v1.16b, v8.16b
   1608  1.1  christos 	cmtst	v3.2d, v11.2d, v9.2d
   1609  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
   1610  1.1  christos 	and	v3.16b, v3.16b, v8.16b
   1611  1.1  christos 	eor	v12.16b, v2.16b, v1.16b
   1612  1.1  christos 	ext	v1.16b, v3.16b, v3.16b, #8
   1613  1.1  christos 	add	v2.2d, v12.2d, v12.2d
   1614  1.1  christos 	cmtst	v3.2d, v12.2d, v9.2d
   1615  1.1  christos 	eor	v13.16b, v2.16b, v1.16b
   1616  1.1  christos 	and	v22.16b, v3.16b, v8.16b
   1617  1.1  christos 	bmi	.Lxts_enc_1
   1618  1.1  christos 
   1619  1.1  christos 	ext	v2.16b, v22.16b, v22.16b, #8
   1620  1.1  christos 	add	v3.2d, v13.2d, v13.2d
   1621  1.1  christos 	ldr	q1, [x20], #16
   1622  1.1  christos 	cmtst	v4.2d, v13.2d, v9.2d
   1623  1.1  christos 	subs	x22, x22, #0x10
   1624  1.1  christos 	eor	v14.16b, v3.16b, v2.16b
   1625  1.1  christos 	and	v23.16b, v4.16b, v8.16b
   1626  1.1  christos 	bmi	.Lxts_enc_2
   1627  1.1  christos 
   1628  1.1  christos 	ext	v3.16b, v23.16b, v23.16b, #8
   1629  1.1  christos 	add	v4.2d, v14.2d, v14.2d
   1630  1.1  christos 	ldr	q2, [x20], #16
   1631  1.1  christos 	cmtst	v5.2d, v14.2d, v9.2d
   1632  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1633  1.1  christos 	subs	x22, x22, #0x10
   1634  1.1  christos 	eor	v15.16b, v4.16b, v3.16b
   1635  1.1  christos 	and	v24.16b, v5.16b, v8.16b
   1636  1.1  christos 	bmi	.Lxts_enc_3
   1637  1.1  christos 
   1638  1.1  christos 	ext	v4.16b, v24.16b, v24.16b, #8
   1639  1.1  christos 	add	v5.2d, v15.2d, v15.2d
   1640  1.1  christos 	ldr	q3, [x20], #16
   1641  1.1  christos 	cmtst	v6.2d, v15.2d, v9.2d
   1642  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1643  1.1  christos 	subs	x22, x22, #0x10
   1644  1.1  christos 	eor	v16.16b, v5.16b, v4.16b
   1645  1.1  christos 	and	v25.16b, v6.16b, v8.16b
   1646  1.1  christos 	bmi	.Lxts_enc_4
   1647  1.1  christos 
   1648  1.1  christos 	ext	v5.16b, v25.16b, v25.16b, #8
   1649  1.1  christos 	add	v6.2d, v16.2d, v16.2d
   1650  1.1  christos 	add	x0, x19, #16
   1651  1.1  christos 	cmtst	v7.2d, v16.2d, v9.2d
   1652  1.1  christos 	ldr	q4, [x20], #16
   1653  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   1654  1.1  christos 	str	q16, [x0], #16
   1655  1.1  christos 	subs	x22, x22, #0x10
   1656  1.1  christos 	eor	v17.16b, v6.16b, v5.16b
   1657  1.1  christos 	and	v26.16b, v7.16b, v8.16b
   1658  1.1  christos 	bmi	.Lxts_enc_5
   1659  1.1  christos 
   1660  1.1  christos 	ext	v7.16b, v26.16b, v26.16b, #8
   1661  1.1  christos 	add	v18.2d, v17.2d, v17.2d
   1662  1.1  christos 	ldr	q5, [x20], #16
   1663  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   1664  1.1  christos 	str	q17, [x0], #16
   1665  1.1  christos 	subs	x22, x22, #0x10
   1666  1.1  christos 	eor	v18.16b, v18.16b, v7.16b
   1667  1.1  christos 	bmi	.Lxts_enc_6
   1668  1.1  christos 
   1669  1.1  christos 	ldr	q6, [x20], #16
   1670  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   1671  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
   1672  1.1  christos 	str	q18, [x0]                   // next round tweak
   1673  1.1  christos 	mov	x9, sp                      // pass key schedule
   1674  1.1  christos 	mov	x10, x1
   1675  1.1  christos 	add	x0, x19, #16
   1676  1.1  christos 	sub	x22, x22, #0x10
   1677  1.1  christos 	eor	v6.16b, v6.16b, v17.16b
   1678  1.1  christos 
   1679  1.1  christos 	bl	_bsaes_encrypt8
   1680  1.1  christos 
   1681  1.1  christos 	ldr	q16, [x0], #16
   1682  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1683  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1684  1.1  christos 	ldr	q17, [x0], #16
   1685  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1686  1.1  christos 	eor	v6.16b, v6.16b, v14.16b
   1687  1.1  christos 	eor	v3.16b, v3.16b, v15.16b
   1688  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   1689  1.1  christos 	str	q0, [x21], #16
   1690  1.1  christos 	str	q1, [x21], #16
   1691  1.1  christos 	eor	v0.16b, v7.16b, v16.16b
   1692  1.1  christos 	eor	v1.16b, v2.16b, v17.16b
   1693  1.1  christos 	str	q4, [x21], #16
   1694  1.1  christos 	str	q6, [x21], #16
   1695  1.1  christos 	str	q3, [x21], #16
   1696  1.1  christos 	str	q0, [x21], #16
   1697  1.1  christos 	str	q1, [x21], #16
   1698  1.1  christos 	b	.Lxts_enc_done
   1699  1.1  christos 
   1700  1.1  christos .align	4
   1701  1.1  christos .Lxts_enc_6:
   1702  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   1703  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
   1704  1.1  christos 	mov	x9, sp                      // pass key schedule
   1705  1.1  christos 	mov	x10, x1                     // pass rounds
   1706  1.1  christos 	add	x0, x19, #16
   1707  1.1  christos 
   1708  1.1  christos 	bl	_bsaes_encrypt8
   1709  1.1  christos 
   1710  1.1  christos 	ldr	q16, [x0], #16
   1711  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1712  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1713  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1714  1.1  christos 	eor	v6.16b, v6.16b, v14.16b
   1715  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   1716  1.1  christos 	eor	v3.16b, v3.16b, v15.16b
   1717  1.1  christos 	str	q0, [x21], #16
   1718  1.1  christos 	str	q1, [x21], #16
   1719  1.1  christos 	eor	v0.16b, v7.16b, v16.16b
   1720  1.1  christos 	str	q4, [x21], #16
   1721  1.1  christos 	str	q6, [x21], #16
   1722  1.1  christos 	str	q3, [x21], #16
   1723  1.1  christos 	str	q0, [x21], #16
   1724  1.1  christos 	b	.Lxts_enc_done
   1725  1.1  christos 
   1726  1.1  christos .align	4
   1727  1.1  christos .Lxts_enc_5:
   1728  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   1729  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   1730  1.1  christos 	mov	x9, sp                      // pass key schedule
   1731  1.1  christos 	mov	x10, x1                     // pass rounds
   1732  1.1  christos 	add	x0, x19, #16
   1733  1.1  christos 
   1734  1.1  christos 	bl	_bsaes_encrypt8
   1735  1.1  christos 
   1736  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1737  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1738  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   1739  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1740  1.1  christos 	eor	v6.16b, v6.16b, v14.16b
   1741  1.1  christos 	eor	v3.16b, v3.16b, v15.16b
   1742  1.1  christos 	str	q0, [x21], #16
   1743  1.1  christos 	str	q1, [x21], #16
   1744  1.1  christos 	str	q4, [x21], #16
   1745  1.1  christos 	str	q6, [x21], #16
   1746  1.1  christos 	str	q3, [x21], #16
   1747  1.1  christos 	b	.Lxts_enc_done
   1748  1.1  christos 
   1749  1.1  christos .align	4
   1750  1.1  christos .Lxts_enc_4:
   1751  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   1752  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   1753  1.1  christos 	mov	x9, sp                      // pass key schedule
   1754  1.1  christos 	mov	x10, x1                     // pass rounds
   1755  1.1  christos 	add	x0, x19, #16
   1756  1.1  christos 
   1757  1.1  christos 	bl	_bsaes_encrypt8
   1758  1.1  christos 
   1759  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1760  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1761  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1762  1.1  christos 	eor	v6.16b, v6.16b, v14.16b
   1763  1.1  christos 	mov	v11.16b, v15.16b            // next round tweak
   1764  1.1  christos 	str	q0, [x21], #16
   1765  1.1  christos 	str	q1, [x21], #16
   1766  1.1  christos 	str	q4, [x21], #16
   1767  1.1  christos 	str	q6, [x21], #16
   1768  1.1  christos 	b	.Lxts_enc_done
   1769  1.1  christos 
   1770  1.1  christos .align	4
   1771  1.1  christos .Lxts_enc_3:
   1772  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1773  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   1774  1.1  christos 	mov	x9, sp                      // pass key schedule
   1775  1.1  christos 	mov	x10, x1                     // pass rounds
   1776  1.1  christos 	add	x0, x19, #16
   1777  1.1  christos 
   1778  1.1  christos 	bl	_bsaes_encrypt8
   1779  1.1  christos 
   1780  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1781  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1782  1.1  christos 	eor	v4.16b, v4.16b, v13.16b
   1783  1.1  christos 	mov	v11.16b, v14.16b            // next round tweak
   1784  1.1  christos 	str	q0, [x21], #16
   1785  1.1  christos 	str	q1, [x21], #16
   1786  1.1  christos 	str	q4, [x21], #16
   1787  1.1  christos 	b	.Lxts_enc_done
   1788  1.1  christos 
   1789  1.1  christos .align	4
   1790  1.1  christos .Lxts_enc_2:
   1791  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1792  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1793  1.1  christos 	mov	x9, sp                      // pass key schedule
   1794  1.1  christos 	mov	x10, x1                     // pass rounds
   1795  1.1  christos 	add	x0, x19, #16
   1796  1.1  christos 
   1797  1.1  christos 	bl	_bsaes_encrypt8
   1798  1.1  christos 
   1799  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1800  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   1801  1.1  christos 	mov	v11.16b, v13.16b            // next round tweak
   1802  1.1  christos 	str	q0, [x21], #16
   1803  1.1  christos 	str	q1, [x21], #16
   1804  1.1  christos 	b	.Lxts_enc_done
   1805  1.1  christos 
   1806  1.1  christos .align	4
   1807  1.1  christos .Lxts_enc_1:
   1808  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1809  1.1  christos 	sub	x0, sp, #16
   1810  1.1  christos 	sub	x1, sp, #16
   1811  1.1  christos 	mov	x2, x23
   1812  1.1  christos 	mov	v13.d[0], v11.d[1]          // just in case AES_encrypt corrupts top half of callee-saved SIMD registers
   1813  1.1  christos 	mov	v14.d[0], v12.d[1]
   1814  1.1  christos 	str	q0, [sp, #-16]!
   1815  1.1  christos 
   1816  1.1  christos 	bl	AES_encrypt
   1817  1.1  christos 
   1818  1.1  christos 	ldr	q0, [sp], #16
   1819  1.1  christos 	trn1	v13.2d, v11.2d, v13.2d
   1820  1.1  christos 	trn1	v11.2d, v12.2d, v14.2d      // next round tweak
   1821  1.1  christos 	eor	v0.16b, v0.16b, v13.16b
   1822  1.1  christos 	str	q0, [x21], #16
   1823  1.1  christos 
   1824  1.1  christos .Lxts_enc_done:
   1825  1.1  christos 	adds	x22, x22, #0x10
   1826  1.1  christos 	beq	.Lxts_enc_ret
   1827  1.1  christos 
   1828  1.1  christos 	sub	x6, x21, #0x10
   1829  1.1  christos         // Penultimate plaintext block produces final ciphertext part-block
   1830  1.1  christos         // plus remaining part of final plaintext block. Move ciphertext part
   1831  1.1  christos         // to final position and reuse penultimate ciphertext block buffer to
   1832  1.1  christos         // construct final plaintext block
   1833  1.1  christos .Lxts_enc_steal:
   1834  1.1  christos 	ldrb	w0, [x20], #1
   1835  1.1  christos 	ldrb	w1, [x21, #-0x10]
   1836  1.1  christos 	strb	w0, [x21, #-0x10]
   1837  1.1  christos 	strb	w1, [x21], #1
   1838  1.1  christos 
   1839  1.1  christos 	subs	x22, x22, #1
   1840  1.1  christos 	bhi	.Lxts_enc_steal
   1841  1.1  christos 
   1842  1.1  christos         // Finally encrypt the penultimate ciphertext block using the
   1843  1.1  christos         // last tweak
   1844  1.1  christos 	ldr	q0, [x6]
   1845  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1846  1.1  christos 	str	q0, [sp, #-16]!
   1847  1.1  christos 	mov	x0, sp
   1848  1.1  christos 	mov	x1, sp
   1849  1.1  christos 	mov	x2, x23
   1850  1.1  christos 	mov	x21, x6
   1851  1.1  christos 	mov	v13.d[0], v11.d[1]          // just in case AES_encrypt corrupts top half of callee-saved SIMD registers
   1852  1.1  christos 
   1853  1.1  christos 	bl	AES_encrypt
   1854  1.1  christos 
   1855  1.1  christos 	trn1	v11.2d, v11.2d, v13.2d
   1856  1.1  christos 	ldr	q0, [sp], #16
   1857  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1858  1.1  christos 	str	q0, [x21]
   1859  1.1  christos 
   1860  1.1  christos .Lxts_enc_ret:
   1861  1.1  christos 
   1862  1.1  christos 	movi	v0.16b, #0
   1863  1.1  christos 	movi	v1.16b, #0
   1864  1.1  christos .Lxts_enc_bzero:	//	wipe key schedule
   1865  1.1  christos 	stp	q0, q1, [sp], #32
   1866  1.1  christos 	cmp	sp, x19
   1867  1.1  christos 	bne	.Lxts_enc_bzero
   1868  1.1  christos 
   1869  1.1  christos 	ldp	x19, x20, [sp, #80]
   1870  1.1  christos 	ldp	x21, x22, [sp, #96]
   1871  1.1  christos 	ldr	x23, [sp, #112]
   1872  1.1  christos 	ldp	d8, d9, [sp, #128]
   1873  1.1  christos 	ldp	d10, d11, [sp, #144]
   1874  1.1  christos 	ldp	d12, d13, [sp, #160]
   1875  1.1  christos 	ldp	d14, d15, [sp, #176]
   1876  1.1  christos 	ldp	x29, x30, [sp], #192
   1877  1.1  christos 	ret
   1878  1.1  christos .size	ossl_bsaes_xts_encrypt,.-ossl_bsaes_xts_encrypt
   1879  1.1  christos 
   1880  1.1  christos // The assembler doesn't seem capable of de-duplicating these when expressed
   1881  1.1  christos // using `ldr qd,=` syntax, so assign a symbolic address
   1882  1.1  christos .align	5
   1883  1.1  christos .Lxts_magic:
   1884  1.1  christos .quad	1, 0x87, 0x4000000000000000, 0x4000000000000000
   1885  1.1  christos 
   1886  1.1  christos .globl	ossl_bsaes_xts_decrypt
   1887  1.1  christos .type	ossl_bsaes_xts_decrypt,%function
   1888  1.1  christos .align	4
   1889  1.1  christos // On entry:
   1890  1.1  christos //   x0 -> input ciphertext
   1891  1.1  christos //   x1 -> output plaintext
   1892  1.1  christos //   x2 -> length of text in bytes (must be at least 16)
   1893  1.1  christos //   x3 -> key1 (used to decrypt the XORed ciphertext blocks)
   1894  1.1  christos //   x4 -> key2 (used to encrypt the initial vector to yield the initial tweak)
   1895  1.1  christos //   x5 -> 16-byte initial vector (typically, sector number)
   1896  1.1  christos // On exit:
   1897  1.1  christos //   Output plaintext filled in
   1898  1.1  christos //   No output registers, usual AAPCS64 register preservation
   1899  1.1  christos ossl_bsaes_xts_decrypt:
   1900  1.1  christos 	AARCH64_VALID_CALL_TARGET
   1901  1.1  christos         // Stack layout:
   1902  1.1  christos         // sp ->
   1903  1.1  christos         //        nrounds*128-96 bytes: key schedule
   1904  1.1  christos         // x19 ->
   1905  1.1  christos         //        16 bytes: frame record
   1906  1.1  christos         //        4*16 bytes: tweak storage across _bsaes_decrypt8
   1907  1.1  christos         //        6*8 bytes: storage for 5 callee-saved general-purpose registers
   1908  1.1  christos         //        8*8 bytes: storage for 8 callee-saved SIMD registers
   1909  1.1  christos 	stp	x29, x30, [sp, #-192]!
   1910  1.1  christos 	stp	x19, x20, [sp, #80]
   1911  1.1  christos 	stp	x21, x22, [sp, #96]
   1912  1.1  christos 	str	x23, [sp, #112]
   1913  1.1  christos 	stp	d8, d9, [sp, #128]
   1914  1.1  christos 	stp	d10, d11, [sp, #144]
   1915  1.1  christos 	stp	d12, d13, [sp, #160]
   1916  1.1  christos 	stp	d14, d15, [sp, #176]
   1917  1.1  christos 
   1918  1.1  christos 	mov	x19, sp
   1919  1.1  christos 	mov	x20, x0
   1920  1.1  christos 	mov	x21, x1
   1921  1.1  christos 	mov	x22, x2
   1922  1.1  christos 	mov	x23, x3
   1923  1.1  christos 
   1924  1.1  christos         // generate initial tweak
   1925  1.1  christos 	sub	sp, sp, #16
   1926  1.1  christos 	mov	x0, x5                      // iv[]
   1927  1.1  christos 	mov	x1, sp
   1928  1.1  christos 	mov	x2, x4                      // key2
   1929  1.1  christos 	bl	AES_encrypt
   1930  1.1  christos 	ldr	q11, [sp], #16
   1931  1.1  christos 
   1932  1.1  christos 	ldr	w1, [x23, #240]             // get # of rounds
   1933  1.1  christos         // allocate the key schedule on the stack
   1934  1.1  christos 	add	x17, sp, #96
   1935  1.1  christos 	sub	x17, x17, x1, lsl #7        // 128 bytes per inner round key, less 96 bytes
   1936  1.1  christos 
   1937  1.1  christos         // populate the key schedule
   1938  1.1  christos 	mov	x9, x23                     // pass key
   1939  1.1  christos 	mov	x10, x1                     // pass # of rounds
   1940  1.1  christos 	mov	sp, x17
   1941  1.1  christos 	bl	_bsaes_key_convert
   1942  1.1  christos 	ldr	q6,  [sp]
   1943  1.1  christos 	str	q15, [x17]                  // save last round key
   1944  1.1  christos 	eor	v6.16b, v6.16b, v7.16b      // fix up round 0 key (by XORing with 0x63)
   1945  1.1  christos 	str	q6, [sp]
   1946  1.1  christos 
   1947  1.1  christos 	sub	x30, x22, #0x10
   1948  1.1  christos 	tst	x22, #0xf                   // if not multiple of 16
   1949  1.1  christos 	csel	x22, x30, x22, ne           // subtract another 16 bytes
   1950  1.1  christos 	subs	x22, x22, #0x80
   1951  1.1  christos 
   1952  1.1  christos 	blo	.Lxts_dec_short
   1953  1.1  christos 	b	.Lxts_dec_loop
   1954  1.1  christos 
   1955  1.1  christos .align	4
   1956  1.1  christos .Lxts_dec_loop:
   1957  1.1  christos 	ldr	q8, .Lxts_magic
   1958  1.1  christos 	mov	x10, x1                     // pass rounds
   1959  1.1  christos 	add	x2, x19, #16
   1960  1.1  christos 	ldr	q0, [x20], #16
   1961  1.1  christos 	sshr	v1.2d, v11.2d, #63
   1962  1.1  christos 	mov	x9, sp                      // pass key schedule
   1963  1.1  christos 	ldr	q6, .Lxts_magic+16
   1964  1.1  christos 	add	v2.2d, v11.2d, v11.2d
   1965  1.1  christos 	cmtst	v3.2d, v11.2d, v6.2d
   1966  1.1  christos 	and	v1.16b, v1.16b, v8.16b
   1967  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
   1968  1.1  christos 	and	v3.16b, v3.16b, v8.16b
   1969  1.1  christos 	ldr	q4, [x20], #16
   1970  1.1  christos 	eor	v12.16b, v2.16b, v1.16b
   1971  1.1  christos 	eor	v1.16b, v4.16b, v12.16b
   1972  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   1973  1.1  christos 	cmtst	v2.2d, v12.2d, v6.2d
   1974  1.1  christos 	add	v4.2d, v12.2d, v12.2d
   1975  1.1  christos 	add	x0, x19, #16
   1976  1.1  christos 	ext	v3.16b, v3.16b, v3.16b, #8
   1977  1.1  christos 	and	v2.16b, v2.16b, v8.16b
   1978  1.1  christos 	eor	v13.16b, v4.16b, v3.16b
   1979  1.1  christos 	ldr	q3, [x20], #16
   1980  1.1  christos 	ext	v4.16b, v2.16b, v2.16b, #8
   1981  1.1  christos 	eor	v2.16b, v3.16b, v13.16b
   1982  1.1  christos 	ldr	q3, [x20], #16
   1983  1.1  christos 	add	v5.2d, v13.2d, v13.2d
   1984  1.1  christos 	cmtst	v7.2d, v13.2d, v6.2d
   1985  1.1  christos 	and	v7.16b, v7.16b, v8.16b
   1986  1.1  christos 	ldr	q9, [x20], #16
   1987  1.1  christos 	ext	v7.16b, v7.16b, v7.16b, #8
   1988  1.1  christos 	ldr	q10, [x20], #16
   1989  1.1  christos 	eor	v14.16b, v5.16b, v4.16b
   1990  1.1  christos 	ldr	q16, [x20], #16
   1991  1.1  christos 	add	v4.2d, v14.2d, v14.2d
   1992  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   1993  1.1  christos 	eor	v15.16b, v4.16b, v7.16b
   1994  1.1  christos 	add	v5.2d, v15.2d, v15.2d
   1995  1.1  christos 	ldr	q7, [x20], #16
   1996  1.1  christos 	cmtst	v4.2d, v14.2d, v6.2d
   1997  1.1  christos 	and	v17.16b, v4.16b, v8.16b
   1998  1.1  christos 	cmtst	v18.2d, v15.2d, v6.2d
   1999  1.1  christos 	eor	v4.16b, v9.16b, v15.16b
   2000  1.1  christos 	ext	v9.16b, v17.16b, v17.16b, #8
   2001  1.1  christos 	eor	v9.16b, v5.16b, v9.16b
   2002  1.1  christos 	add	v17.2d, v9.2d, v9.2d
   2003  1.1  christos 	and	v18.16b, v18.16b, v8.16b
   2004  1.1  christos 	eor	v5.16b, v10.16b, v9.16b
   2005  1.1  christos 	str	q9, [x2], #16
   2006  1.1  christos 	ext	v10.16b, v18.16b, v18.16b, #8
   2007  1.1  christos 	cmtst	v9.2d, v9.2d, v6.2d
   2008  1.1  christos 	and	v9.16b, v9.16b, v8.16b
   2009  1.1  christos 	eor	v10.16b, v17.16b, v10.16b
   2010  1.1  christos 	cmtst	v17.2d, v10.2d, v6.2d
   2011  1.1  christos 	eor	v6.16b, v16.16b, v10.16b
   2012  1.1  christos 	str	q10, [x2], #16
   2013  1.1  christos 	ext	v9.16b, v9.16b, v9.16b, #8
   2014  1.1  christos 	add	v10.2d, v10.2d, v10.2d
   2015  1.1  christos 	eor	v9.16b, v10.16b, v9.16b
   2016  1.1  christos 	str	q9, [x2], #16
   2017  1.1  christos 	eor	v7.16b, v7.16b, v9.16b
   2018  1.1  christos 	add	v9.2d, v9.2d, v9.2d
   2019  1.1  christos 	and	v8.16b, v17.16b, v8.16b
   2020  1.1  christos 	ext	v8.16b, v8.16b, v8.16b, #8
   2021  1.1  christos 	eor	v8.16b, v9.16b, v8.16b
   2022  1.1  christos 	str	q8, [x2]                    // next round tweak
   2023  1.1  christos 
   2024  1.1  christos 	bl	_bsaes_decrypt8
   2025  1.1  christos 
   2026  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2027  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2028  1.1  christos 	ldr	q8, [x0], #16
   2029  1.1  christos 	eor	v7.16b, v7.16b, v8.16b
   2030  1.1  christos 	str	q0, [x21], #16
   2031  1.1  christos 	eor	v0.16b, v1.16b, v12.16b
   2032  1.1  christos 	ldr	q1, [x0], #16
   2033  1.1  christos 	eor	v1.16b, v3.16b, v1.16b
   2034  1.1  christos 	subs	x22, x22, #0x80
   2035  1.1  christos 	eor	v2.16b, v2.16b, v15.16b
   2036  1.1  christos 	eor	v3.16b, v4.16b, v14.16b
   2037  1.1  christos 	ldr	q4, [x0], #16
   2038  1.1  christos 	str	q0, [x21], #16
   2039  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   2040  1.1  christos 	eor	v0.16b, v5.16b, v4.16b
   2041  1.1  christos 	str	q6, [x21], #16
   2042  1.1  christos 	str	q3, [x21], #16
   2043  1.1  christos 	str	q2, [x21], #16
   2044  1.1  christos 	str	q7, [x21], #16
   2045  1.1  christos 	str	q1, [x21], #16
   2046  1.1  christos 	str	q0, [x21], #16
   2047  1.1  christos 	bpl	.Lxts_dec_loop
   2048  1.1  christos 
   2049  1.1  christos .Lxts_dec_short:
   2050  1.1  christos 	adds	x22, x22, #0x70
   2051  1.1  christos 	bmi	.Lxts_dec_done
   2052  1.1  christos 
   2053  1.1  christos 	ldr	q8, .Lxts_magic
   2054  1.1  christos 	sshr	v1.2d, v11.2d, #63
   2055  1.1  christos 	add	v2.2d, v11.2d, v11.2d
   2056  1.1  christos 	ldr	q9, .Lxts_magic+16
   2057  1.1  christos 	subs	x22, x22, #0x10
   2058  1.1  christos 	ldr	q0, [x20], #16
   2059  1.1  christos 	and	v1.16b, v1.16b, v8.16b
   2060  1.1  christos 	cmtst	v3.2d, v11.2d, v9.2d
   2061  1.1  christos 	ext	v1.16b, v1.16b, v1.16b, #8
   2062  1.1  christos 	and	v3.16b, v3.16b, v8.16b
   2063  1.1  christos 	eor	v12.16b, v2.16b, v1.16b
   2064  1.1  christos 	ext	v1.16b, v3.16b, v3.16b, #8
   2065  1.1  christos 	add	v2.2d, v12.2d, v12.2d
   2066  1.1  christos 	cmtst	v3.2d, v12.2d, v9.2d
   2067  1.1  christos 	eor	v13.16b, v2.16b, v1.16b
   2068  1.1  christos 	and	v22.16b, v3.16b, v8.16b
   2069  1.1  christos 	bmi	.Lxts_dec_1
   2070  1.1  christos 
   2071  1.1  christos 	ext	v2.16b, v22.16b, v22.16b, #8
   2072  1.1  christos 	add	v3.2d, v13.2d, v13.2d
   2073  1.1  christos 	ldr	q1, [x20], #16
   2074  1.1  christos 	cmtst	v4.2d, v13.2d, v9.2d
   2075  1.1  christos 	subs	x22, x22, #0x10
   2076  1.1  christos 	eor	v14.16b, v3.16b, v2.16b
   2077  1.1  christos 	and	v23.16b, v4.16b, v8.16b
   2078  1.1  christos 	bmi	.Lxts_dec_2
   2079  1.1  christos 
   2080  1.1  christos 	ext	v3.16b, v23.16b, v23.16b, #8
   2081  1.1  christos 	add	v4.2d, v14.2d, v14.2d
   2082  1.1  christos 	ldr	q2, [x20], #16
   2083  1.1  christos 	cmtst	v5.2d, v14.2d, v9.2d
   2084  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2085  1.1  christos 	subs	x22, x22, #0x10
   2086  1.1  christos 	eor	v15.16b, v4.16b, v3.16b
   2087  1.1  christos 	and	v24.16b, v5.16b, v8.16b
   2088  1.1  christos 	bmi	.Lxts_dec_3
   2089  1.1  christos 
   2090  1.1  christos 	ext	v4.16b, v24.16b, v24.16b, #8
   2091  1.1  christos 	add	v5.2d, v15.2d, v15.2d
   2092  1.1  christos 	ldr	q3, [x20], #16
   2093  1.1  christos 	cmtst	v6.2d, v15.2d, v9.2d
   2094  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2095  1.1  christos 	subs	x22, x22, #0x10
   2096  1.1  christos 	eor	v16.16b, v5.16b, v4.16b
   2097  1.1  christos 	and	v25.16b, v6.16b, v8.16b
   2098  1.1  christos 	bmi	.Lxts_dec_4
   2099  1.1  christos 
   2100  1.1  christos 	ext	v5.16b, v25.16b, v25.16b, #8
   2101  1.1  christos 	add	v6.2d, v16.2d, v16.2d
   2102  1.1  christos 	add	x0, x19, #16
   2103  1.1  christos 	cmtst	v7.2d, v16.2d, v9.2d
   2104  1.1  christos 	ldr	q4, [x20], #16
   2105  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   2106  1.1  christos 	str	q16, [x0], #16
   2107  1.1  christos 	subs	x22, x22, #0x10
   2108  1.1  christos 	eor	v17.16b, v6.16b, v5.16b
   2109  1.1  christos 	and	v26.16b, v7.16b, v8.16b
   2110  1.1  christos 	bmi	.Lxts_dec_5
   2111  1.1  christos 
   2112  1.1  christos 	ext	v7.16b, v26.16b, v26.16b, #8
   2113  1.1  christos 	add	v18.2d, v17.2d, v17.2d
   2114  1.1  christos 	ldr	q5, [x20], #16
   2115  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   2116  1.1  christos 	str	q17, [x0], #16
   2117  1.1  christos 	subs	x22, x22, #0x10
   2118  1.1  christos 	eor	v18.16b, v18.16b, v7.16b
   2119  1.1  christos 	bmi	.Lxts_dec_6
   2120  1.1  christos 
   2121  1.1  christos 	ldr	q6, [x20], #16
   2122  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   2123  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
   2124  1.1  christos 	str	q18, [x0]                   // next round tweak
   2125  1.1  christos 	mov	x9, sp                      // pass key schedule
   2126  1.1  christos 	mov	x10, x1
   2127  1.1  christos 	add	x0, x19, #16
   2128  1.1  christos 	sub	x22, x22, #0x10
   2129  1.1  christos 	eor	v6.16b, v6.16b, v17.16b
   2130  1.1  christos 
   2131  1.1  christos 	bl	_bsaes_decrypt8
   2132  1.1  christos 
   2133  1.1  christos 	ldr	q16, [x0], #16
   2134  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2135  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2136  1.1  christos 	ldr	q17, [x0], #16
   2137  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2138  1.1  christos 	eor	v4.16b, v4.16b, v14.16b
   2139  1.1  christos 	eor	v2.16b, v2.16b, v15.16b
   2140  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   2141  1.1  christos 	str	q0, [x21], #16
   2142  1.1  christos 	str	q1, [x21], #16
   2143  1.1  christos 	eor	v0.16b, v7.16b, v16.16b
   2144  1.1  christos 	eor	v1.16b, v3.16b, v17.16b
   2145  1.1  christos 	str	q6, [x21], #16
   2146  1.1  christos 	str	q4, [x21], #16
   2147  1.1  christos 	str	q2, [x21], #16
   2148  1.1  christos 	str	q0, [x21], #16
   2149  1.1  christos 	str	q1, [x21], #16
   2150  1.1  christos 	b	.Lxts_dec_done
   2151  1.1  christos 
   2152  1.1  christos .align	4
   2153  1.1  christos .Lxts_dec_6:
   2154  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   2155  1.1  christos 	eor	v5.16b, v5.16b, v16.16b
   2156  1.1  christos 	mov	x9, sp                      // pass key schedule
   2157  1.1  christos 	mov	x10, x1                     // pass rounds
   2158  1.1  christos 	add	x0, x19, #16
   2159  1.1  christos 
   2160  1.1  christos 	bl	_bsaes_decrypt8
   2161  1.1  christos 
   2162  1.1  christos 	ldr	q16, [x0], #16
   2163  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2164  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2165  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2166  1.1  christos 	eor	v4.16b, v4.16b, v14.16b
   2167  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   2168  1.1  christos 	eor	v2.16b, v2.16b, v15.16b
   2169  1.1  christos 	str	q0, [x21], #16
   2170  1.1  christos 	str	q1, [x21], #16
   2171  1.1  christos 	eor	v0.16b, v7.16b, v16.16b
   2172  1.1  christos 	str	q6, [x21], #16
   2173  1.1  christos 	str	q4, [x21], #16
   2174  1.1  christos 	str	q2, [x21], #16
   2175  1.1  christos 	str	q0, [x21], #16
   2176  1.1  christos 	b	.Lxts_dec_done
   2177  1.1  christos 
   2178  1.1  christos .align	4
   2179  1.1  christos .Lxts_dec_5:
   2180  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   2181  1.1  christos 	eor	v4.16b, v4.16b, v15.16b
   2182  1.1  christos 	mov	x9, sp                      // pass key schedule
   2183  1.1  christos 	mov	x10, x1                     // pass rounds
   2184  1.1  christos 	add	x0, x19, #16
   2185  1.1  christos 
   2186  1.1  christos 	bl	_bsaes_decrypt8
   2187  1.1  christos 
   2188  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2189  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2190  1.1  christos 	ldr	q11, [x0]                   // next round tweak
   2191  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2192  1.1  christos 	eor	v4.16b, v4.16b, v14.16b
   2193  1.1  christos 	eor	v2.16b, v2.16b, v15.16b
   2194  1.1  christos 	str	q0, [x21], #16
   2195  1.1  christos 	str	q1, [x21], #16
   2196  1.1  christos 	str	q6, [x21], #16
   2197  1.1  christos 	str	q4, [x21], #16
   2198  1.1  christos 	str	q2, [x21], #16
   2199  1.1  christos 	b	.Lxts_dec_done
   2200  1.1  christos 
   2201  1.1  christos .align	4
   2202  1.1  christos .Lxts_dec_4:
   2203  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   2204  1.1  christos 	eor	v3.16b, v3.16b, v14.16b
   2205  1.1  christos 	mov	x9, sp                      // pass key schedule
   2206  1.1  christos 	mov	x10, x1                     // pass rounds
   2207  1.1  christos 	add	x0, x19, #16
   2208  1.1  christos 
   2209  1.1  christos 	bl	_bsaes_decrypt8
   2210  1.1  christos 
   2211  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2212  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2213  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2214  1.1  christos 	eor	v4.16b, v4.16b, v14.16b
   2215  1.1  christos 	mov	v11.16b, v15.16b            // next round tweak
   2216  1.1  christos 	str	q0, [x21], #16
   2217  1.1  christos 	str	q1, [x21], #16
   2218  1.1  christos 	str	q6, [x21], #16
   2219  1.1  christos 	str	q4, [x21], #16
   2220  1.1  christos 	b	.Lxts_dec_done
   2221  1.1  christos 
   2222  1.1  christos .align	4
   2223  1.1  christos .Lxts_dec_3:
   2224  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2225  1.1  christos 	eor	v2.16b, v2.16b, v13.16b
   2226  1.1  christos 	mov	x9, sp                      // pass key schedule
   2227  1.1  christos 	mov	x10, x1                     // pass rounds
   2228  1.1  christos 	add	x0, x19, #16
   2229  1.1  christos 
   2230  1.1  christos 	bl	_bsaes_decrypt8
   2231  1.1  christos 
   2232  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2233  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2234  1.1  christos 	eor	v6.16b, v6.16b, v13.16b
   2235  1.1  christos 	mov	v11.16b, v14.16b            // next round tweak
   2236  1.1  christos 	str	q0, [x21], #16
   2237  1.1  christos 	str	q1, [x21], #16
   2238  1.1  christos 	str	q6, [x21], #16
   2239  1.1  christos 	b	.Lxts_dec_done
   2240  1.1  christos 
   2241  1.1  christos .align	4
   2242  1.1  christos .Lxts_dec_2:
   2243  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2244  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2245  1.1  christos 	mov	x9, sp                      // pass key schedule
   2246  1.1  christos 	mov	x10, x1                     // pass rounds
   2247  1.1  christos 	add	x0, x19, #16
   2248  1.1  christos 
   2249  1.1  christos 	bl	_bsaes_decrypt8
   2250  1.1  christos 
   2251  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2252  1.1  christos 	eor	v1.16b, v1.16b, v12.16b
   2253  1.1  christos 	mov	v11.16b, v13.16b            // next round tweak
   2254  1.1  christos 	str	q0, [x21], #16
   2255  1.1  christos 	str	q1, [x21], #16
   2256  1.1  christos 	b	.Lxts_dec_done
   2257  1.1  christos 
   2258  1.1  christos .align	4
   2259  1.1  christos .Lxts_dec_1:
   2260  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2261  1.1  christos 	sub	x0, sp, #16
   2262  1.1  christos 	sub	x1, sp, #16
   2263  1.1  christos 	mov	x2, x23
   2264  1.1  christos 	mov	v13.d[0], v11.d[1]          // just in case AES_decrypt corrupts top half of callee-saved SIMD registers
   2265  1.1  christos 	mov	v14.d[0], v12.d[1]
   2266  1.1  christos 	str	q0, [sp, #-16]!
   2267  1.1  christos 
   2268  1.1  christos 	bl	AES_decrypt
   2269  1.1  christos 
   2270  1.1  christos 	ldr	q0, [sp], #16
   2271  1.1  christos 	trn1	v13.2d, v11.2d, v13.2d
   2272  1.1  christos 	trn1	v11.2d, v12.2d, v14.2d      // next round tweak
   2273  1.1  christos 	eor	v0.16b, v0.16b, v13.16b
   2274  1.1  christos 	str	q0, [x21], #16
   2275  1.1  christos 
   2276  1.1  christos .Lxts_dec_done:
   2277  1.1  christos 	adds	x22, x22, #0x10
   2278  1.1  christos 	beq	.Lxts_dec_ret
   2279  1.1  christos 
   2280  1.1  christos         // calculate one round of extra tweak for the stolen ciphertext
   2281  1.1  christos 	ldr	q8, .Lxts_magic
   2282  1.1  christos 	sshr	v6.2d, v11.2d, #63
   2283  1.1  christos 	and	v6.16b, v6.16b, v8.16b
   2284  1.1  christos 	add	v12.2d, v11.2d, v11.2d
   2285  1.1  christos 	ext	v6.16b, v6.16b, v6.16b, #8
   2286  1.1  christos 	eor	v12.16b, v12.16b, v6.16b
   2287  1.1  christos 
   2288  1.1  christos         // perform the final decryption with the last tweak value
   2289  1.1  christos 	ldr	q0, [x20], #16
   2290  1.1  christos 	eor	v0.16b, v0.16b, v12.16b
   2291  1.1  christos 	str	q0, [sp, #-16]!
   2292  1.1  christos 	mov	x0, sp
   2293  1.1  christos 	mov	x1, sp
   2294  1.1  christos 	mov	x2, x23
   2295  1.1  christos 	mov	v13.d[0], v11.d[1]          // just in case AES_decrypt corrupts top half of callee-saved SIMD registers
   2296  1.1  christos 	mov	v14.d[0], v12.d[1]
   2297  1.1  christos 
   2298  1.1  christos 	bl	AES_decrypt
   2299  1.1  christos 
   2300  1.1  christos 	trn1	v12.2d, v12.2d, v14.2d
   2301  1.1  christos 	trn1	v11.2d, v11.2d, v13.2d
   2302  1.1  christos 	ldr	q0, [sp], #16
   2303  1.1  christos 	eor	v0.16b, v0.16b, v12.16b
   2304  1.1  christos 	str	q0, [x21]
   2305  1.1  christos 
   2306  1.1  christos 	mov	x6, x21
   2307  1.1  christos         // Penultimate ciphertext block produces final plaintext part-block
   2308  1.1  christos         // plus remaining part of final ciphertext block. Move plaintext part
   2309  1.1  christos         // to final position and reuse penultimate plaintext block buffer to
   2310  1.1  christos         // construct final ciphertext block
   2311  1.1  christos .Lxts_dec_steal:
   2312  1.1  christos 	ldrb	w1, [x21]
   2313  1.1  christos 	ldrb	w0, [x20], #1
   2314  1.1  christos 	strb	w1, [x21, #0x10]
   2315  1.1  christos 	strb	w0, [x21], #1
   2316  1.1  christos 
   2317  1.1  christos 	subs	x22, x22, #1
   2318  1.1  christos 	bhi	.Lxts_dec_steal
   2319  1.1  christos 
   2320  1.1  christos         // Finally decrypt the penultimate plaintext block using the
   2321  1.1  christos         // penultimate tweak
   2322  1.1  christos 	ldr	q0, [x6]
   2323  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2324  1.1  christos 	str	q0, [sp, #-16]!
   2325  1.1  christos 	mov	x0, sp
   2326  1.1  christos 	mov	x1, sp
   2327  1.1  christos 	mov	x2, x23
   2328  1.1  christos 	mov	x21, x6
   2329  1.1  christos 
   2330  1.1  christos 	bl	AES_decrypt
   2331  1.1  christos 
   2332  1.1  christos 	trn1	v11.2d, v11.2d, v13.2d
   2333  1.1  christos 	ldr	q0, [sp], #16
   2334  1.1  christos 	eor	v0.16b, v0.16b, v11.16b
   2335  1.1  christos 	str	q0, [x21]
   2336  1.1  christos 
   2337  1.1  christos .Lxts_dec_ret:
   2338  1.1  christos 
   2339  1.1  christos 	movi	v0.16b, #0
   2340  1.1  christos 	movi	v1.16b, #0
   2341  1.1  christos .Lxts_dec_bzero:	//	wipe key schedule
   2342  1.1  christos 	stp	q0, q1, [sp], #32
   2343  1.1  christos 	cmp	sp, x19
   2344  1.1  christos 	bne	.Lxts_dec_bzero
   2345  1.1  christos 
   2346  1.1  christos 	ldp	x19, x20, [sp, #80]
   2347  1.1  christos 	ldp	x21, x22, [sp, #96]
   2348  1.1  christos 	ldr	x23, [sp, #112]
   2349  1.1  christos 	ldp	d8, d9, [sp, #128]
   2350  1.1  christos 	ldp	d10, d11, [sp, #144]
   2351  1.1  christos 	ldp	d12, d13, [sp, #160]
   2352  1.1  christos 	ldp	d14, d15, [sp, #176]
   2353  1.1  christos 	ldp	x29, x30, [sp], #192
   2354  1.1  christos 	ret
   2355  1.1  christos .size	ossl_bsaes_xts_decrypt,.-ossl_bsaes_xts_decrypt
   2356