Home | History | Annotate | Line # | Download | only in aarch64
      1  1.1  christos #include "arm_arch.h"
      2  1.1  christos 
      3  1.1  christos #if __ARM_MAX_ARCH__>=8
      4  1.1  christos .arch	armv8-a+crypto
      5  1.1  christos .text
      6  1.1  christos .globl	unroll8_eor3_aes_gcm_enc_128_kernel
      7  1.1  christos .type	unroll8_eor3_aes_gcm_enc_128_kernel,%function
      8  1.1  christos .align	4
      9  1.1  christos unroll8_eor3_aes_gcm_enc_128_kernel:
     10  1.1  christos 	AARCH64_VALID_CALL_TARGET
     11  1.1  christos 	cbz	x1, .L128_enc_ret
     12  1.1  christos 	stp	d8, d9, [sp, #-80]!
     13  1.1  christos 	lsr	x9, x1, #3
     14  1.1  christos 	mov	x16, x4
     15  1.1  christos 	mov	x8, x5
     16  1.1  christos 	stp	d10, d11, [sp, #16]
     17  1.1  christos 	stp	d12, d13, [sp, #32]
     18  1.1  christos 	stp	d14, d15, [sp, #48]
     19  1.1  christos 	mov	x5, #0xc200000000000000
     20  1.1  christos 	stp	x5, xzr, [sp, #64]
     21  1.1  christos 	add	x10, sp, #64
     22  1.1  christos 
     23  1.1  christos 	mov	x15, #0x100000000				//set up counter increment
     24  1.1  christos 	movi	v31.16b, #0x0
     25  1.1  christos 	mov	v31.d[1], x15
     26  1.1  christos 	mov	x5, x9
     27  1.1  christos 	ld1	{ v0.16b}, [x16]					//CTR block 0
     28  1.1  christos 
     29  1.1  christos 	sub	x5, x5, #1	 	//byte_len - 1
     30  1.1  christos 
     31  1.1  christos 	and	x5, x5, #0xffffffffffffff80		//number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
     32  1.1  christos 
     33  1.1  christos 	rev32	v30.16b, v0.16b				//set up reversed counter
     34  1.1  christos 
     35  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 0
     36  1.1  christos 
     37  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 1
     38  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 1
     39  1.1  christos 
     40  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 2
     41  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 2
     42  1.1  christos 
     43  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 3
     44  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 3
     45  1.1  christos 
     46  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 4
     47  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 4
     48  1.1  christos 
     49  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 5
     50  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 5
     51  1.1  christos 	ldp	q26, q27, [x8, #0]				  	//load rk0, rk1
     52  1.1  christos 
     53  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 6
     54  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 6
     55  1.1  christos 
     56  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 7
     57  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 7
     58  1.1  christos 
     59  1.1  christos 	aese	v4.16b, v26.16b
     60  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 0
     61  1.1  christos 	aese	v6.16b, v26.16b
     62  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 0
     63  1.1  christos 	aese	v3.16b, v26.16b
     64  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 0
     65  1.1  christos 
     66  1.1  christos 	aese	v0.16b, v26.16b
     67  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 0
     68  1.1  christos 	aese	v1.16b, v26.16b
     69  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 0
     70  1.1  christos 	aese	v2.16b, v26.16b
     71  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 0
     72  1.1  christos 
     73  1.1  christos 	aese	v7.16b, v26.16b
     74  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 0
     75  1.1  christos 	aese	v5.16b, v26.16b
     76  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 0
     77  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
     78  1.1  christos 
     79  1.1  christos 	aese	v3.16b, v27.16b
     80  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 1
     81  1.1  christos 
     82  1.1  christos 	aese	v7.16b, v27.16b
     83  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 1
     84  1.1  christos 	aese	v5.16b, v27.16b
     85  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 1
     86  1.1  christos 	aese	v4.16b, v27.16b
     87  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 1
     88  1.1  christos 
     89  1.1  christos 	aese	v2.16b, v27.16b
     90  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 1
     91  1.1  christos 	aese	v6.16b, v27.16b
     92  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 1
     93  1.1  christos 	aese	v0.16b, v27.16b
     94  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 1
     95  1.1  christos 
     96  1.1  christos 	aese	v5.16b, v28.16b
     97  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 2
     98  1.1  christos 	aese	v1.16b, v27.16b
     99  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 1
    100  1.1  christos 	aese	v0.16b, v28.16b
    101  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 2
    102  1.1  christos 
    103  1.1  christos 	aese	v2.16b, v28.16b
    104  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 2
    105  1.1  christos 	aese	v3.16b, v28.16b
    106  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 2
    107  1.1  christos 	aese	v7.16b, v28.16b
    108  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 2
    109  1.1  christos 
    110  1.1  christos 	aese	v1.16b, v28.16b
    111  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 2
    112  1.1  christos 	aese	v6.16b, v28.16b
    113  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 2
    114  1.1  christos 	aese	v4.16b, v28.16b
    115  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 2
    116  1.1  christos 
    117  1.1  christos 	aese	v2.16b, v26.16b
    118  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 3
    119  1.1  christos 
    120  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
    121  1.1  christos 	aese	v5.16b, v26.16b
    122  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 3
    123  1.1  christos 	aese	v0.16b, v26.16b
    124  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 3
    125  1.1  christos 
    126  1.1  christos 	aese	v4.16b, v26.16b
    127  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 3
    128  1.1  christos 	aese	v3.16b, v26.16b
    129  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 3
    130  1.1  christos 	aese	v6.16b, v26.16b
    131  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 3
    132  1.1  christos 
    133  1.1  christos 	aese	v7.16b, v26.16b
    134  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 3
    135  1.1  christos 
    136  1.1  christos 	aese	v6.16b, v27.16b
    137  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 4
    138  1.1  christos 	aese	v1.16b, v26.16b
    139  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 3
    140  1.1  christos 	aese	v5.16b, v27.16b
    141  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 4
    142  1.1  christos 
    143  1.1  christos 	aese	v7.16b, v27.16b
    144  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 4
    145  1.1  christos 	aese	v4.16b, v27.16b
    146  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 4
    147  1.1  christos 	aese	v0.16b, v27.16b
    148  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 4
    149  1.1  christos 
    150  1.1  christos 	aese	v1.16b, v27.16b
    151  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 4
    152  1.1  christos 	aese	v2.16b, v27.16b
    153  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 4
    154  1.1  christos 	aese	v3.16b, v27.16b
    155  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 4
    156  1.1  christos 
    157  1.1  christos 	aese	v7.16b, v28.16b
    158  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 5
    159  1.1  christos 	aese	v0.16b, v28.16b
    160  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 5
    161  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
    162  1.1  christos 
    163  1.1  christos 	aese	v1.16b, v28.16b
    164  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 5
    165  1.1  christos 	aese	v3.16b, v28.16b
    166  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 5
    167  1.1  christos 	aese	v2.16b, v28.16b
    168  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 5
    169  1.1  christos 
    170  1.1  christos 	aese	v4.16b, v28.16b
    171  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 5
    172  1.1  christos 	aese	v5.16b, v28.16b
    173  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 5
    174  1.1  christos 	aese	v6.16b, v28.16b
    175  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 5
    176  1.1  christos 
    177  1.1  christos 	aese	v4.16b, v26.16b
    178  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 6
    179  1.1  christos 	aese	v3.16b, v26.16b
    180  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 6
    181  1.1  christos 	aese	v2.16b, v26.16b
    182  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 6
    183  1.1  christos 
    184  1.1  christos 	aese	v7.16b, v26.16b
    185  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 6
    186  1.1  christos 	aese	v6.16b, v26.16b
    187  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 6
    188  1.1  christos 	aese	v5.16b, v26.16b
    189  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 6
    190  1.1  christos 
    191  1.1  christos 	aese	v0.16b, v26.16b
    192  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 6
    193  1.1  christos 	aese	v1.16b, v26.16b
    194  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 6
    195  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
    196  1.1  christos 
    197  1.1  christos 	aese	v5.16b, v27.16b
    198  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 7
    199  1.1  christos 
    200  1.1  christos 	ld1	{ v19.16b}, [x3]
    201  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
    202  1.1  christos 	rev64	v19.16b, v19.16b
    203  1.1  christos 
    204  1.1  christos 	aese	v7.16b, v27.16b
    205  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 7
    206  1.1  christos 
    207  1.1  christos 	aese	v4.16b, v27.16b
    208  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 7
    209  1.1  christos 	aese	v3.16b, v27.16b
    210  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 7
    211  1.1  christos 	aese	v6.16b, v27.16b
    212  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 7
    213  1.1  christos 
    214  1.1  christos 	aese	v1.16b, v27.16b
    215  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 7
    216  1.1  christos 	aese	v2.16b, v27.16b
    217  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 7
    218  1.1  christos 	aese	v0.16b, v27.16b
    219  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 7
    220  1.1  christos 
    221  1.1  christos 	aese	v3.16b, v28.16b
    222  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
    223  1.1  christos 	aese	v6.16b, v28.16b
    224  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
    225  1.1  christos 	aese	v2.16b, v28.16b
    226  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
    227  1.1  christos 
    228  1.1  christos 	aese	v7.16b, v28.16b
    229  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
    230  1.1  christos 	aese	v0.16b, v28.16b
    231  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
    232  1.1  christos 	ldr	q27, [x8, #160]					//load rk10
    233  1.1  christos 
    234  1.1  christos 	aese	v3.16b, v26.16b						//AES block 8k+11 - round 9
    235  1.1  christos 	aese	v4.16b, v28.16b
    236  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
    237  1.1  christos 	aese	v2.16b, v26.16b						//AES block 8k+10 - round 9
    238  1.1  christos 
    239  1.1  christos 	aese	v5.16b, v28.16b
    240  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
    241  1.1  christos 	aese	v1.16b, v28.16b
    242  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
    243  1.1  christos 	aese	v6.16b, v26.16b						//AES block 8k+14 - round 9
    244  1.1  christos 
    245  1.1  christos 	aese	v4.16b, v26.16b						//AES block 8k+12 - round 9
    246  1.1  christos 	add	x5, x5, x0
    247  1.1  christos 	aese	v0.16b, v26.16b						//AES block 8k+8 - round 9
    248  1.1  christos 
    249  1.1  christos 	aese	v7.16b, v26.16b						//AES block 8k+15 - round 9
    250  1.1  christos 	aese	v5.16b, v26.16b						//AES block 8k+13 - round 9
    251  1.1  christos 	aese	v1.16b, v26.16b						//AES block 8k+9 - round 9
    252  1.1  christos 
    253  1.1  christos 	add	x4, x0, x1, lsr #3		//end_input_ptr
    254  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
    255  1.1  christos 	b.ge	.L128_enc_tail						//handle tail
    256  1.1  christos 
    257  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 0, 1 - load plaintext
    258  1.1  christos 
    259  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 2, 3 - load plaintext
    260  1.1  christos 
    261  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 4, 5 - load plaintext
    262  1.1  christos 
    263  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 6, 7 - load plaintext
    264  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
    265  1.1  christos 
    266  1.1  christos .inst	0xce006d08	//eor3 v8.16b, v8.16b, v0.16b, v27.16b				//AES block 0 - result
    267  1.1  christos 	rev32	v0.16b, v30.16b				//CTR block 8
    268  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8
    269  1.1  christos 
    270  1.1  christos .inst	0xce016d29	//eor3 v9.16b, v9.16b, v1.16b, v27.16b				//AES block 1 - result
    271  1.1  christos 	stp	q8, q9, [x2], #32			//AES block 0, 1 - store result
    272  1.1  christos 
    273  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 9
    274  1.1  christos .inst	0xce056dad	//eor3 v13.16b, v13.16b, v5.16b, v27.16b				//AES block 5 - result
    275  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 9
    276  1.1  christos 
    277  1.1  christos .inst	0xce026d4a	//eor3 v10.16b, v10.16b, v2.16b, v27.16b				//AES block 2 - result
    278  1.1  christos .inst	0xce066dce	//eor3 v14.16b, v14.16b, v6.16b, v27.16b				//AES block 6 - result
    279  1.1  christos .inst	0xce046d8c	//eor3 v12.16b, v12.16b, v4.16b, v27.16b				//AES block 4 - result
    280  1.1  christos 
    281  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 10
    282  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 10
    283  1.1  christos 
    284  1.1  christos .inst	0xce036d6b	//eor3 v11.16b, v11.16b, v3.16b, v27.16b				//AES block 3 - result
    285  1.1  christos .inst	0xce076def	//eor3 v15.16b, v15.16b, v7.16b,v27.16b				//AES block 7 - result
    286  1.1  christos 	stp	q10, q11, [x2], #32			//AES block 2, 3 - store result
    287  1.1  christos 
    288  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 11
    289  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 11
    290  1.1  christos 	stp	q12, q13, [x2], #32			//AES block 4, 5 - store result
    291  1.1  christos 
    292  1.1  christos 	stp	q14, q15, [x2], #32			//AES block 6, 7 - store result
    293  1.1  christos 
    294  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 12
    295  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 12
    296  1.1  christos 	b.ge	.L128_enc_prepretail					//do prepretail
    297  1.1  christos 
    298  1.1  christos .L128_enc_main_loop:	//main	loop start
    299  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
    300  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
    301  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
    302  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
    303  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
    304  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
    305  1.1  christos 
    306  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
    307  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
    308  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
    309  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
    310  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
    311  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
    312  1.1  christos 
    313  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
    314  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
    315  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
    316  1.1  christos 
    317  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
    318  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
    319  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5 (t0, t1, t2 and t3 free)
    320  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
    321  1.1  christos 
    322  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
    323  1.1  christos 	eor	v8.16b, v8.16b, v19.16b				 	//PRE 1
    324  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
    325  1.1  christos 
    326  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7 (t0, t1, t2 and t3 free)
    327  1.1  christos 
    328  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
    329  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
    330  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
    331  1.1  christos 
    332  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
    333  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
    334  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
    335  1.1  christos 
    336  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
    337  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
    338  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
    339  1.1  christos 
    340  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
    341  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
    342  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
    343  1.1  christos 	ldr	q25, [x3, #112]				//load h3l | h3h
    344  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
    345  1.1  christos 	aese	v5.16b, v26.16b
    346  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
    347  1.1  christos 
    348  1.1  christos 	aese	v1.16b, v26.16b
    349  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
    350  1.1  christos 	aese	v4.16b, v26.16b
    351  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
    352  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
    353  1.1  christos 
    354  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
    355  1.1  christos 	aese	v2.16b, v26.16b
    356  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
    357  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
    358  1.1  christos 
    359  1.1  christos 	aese	v6.16b, v26.16b
    360  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
    361  1.1  christos 	aese	v1.16b, v27.16b
    362  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
    363  1.1  christos 	aese	v0.16b, v26.16b
    364  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
    365  1.1  christos 
    366  1.1  christos 	aese	v2.16b, v27.16b
    367  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
    368  1.1  christos 	aese	v3.16b, v26.16b
    369  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
    370  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
    371  1.1  christos 
    372  1.1  christos 	aese	v5.16b, v27.16b
    373  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
    374  1.1  christos 	aese	v7.16b, v26.16b
    375  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
    376  1.1  christos 	aese	v0.16b, v27.16b
    377  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
    378  1.1  christos 
    379  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b,v9.16b			//GHASH block 8k+2, 8k+3 - high
    380  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
    381  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
    382  1.1  christos 
    383  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
    384  1.1  christos 	aese	v4.16b, v27.16b
    385  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
    386  1.1  christos 	aese	v3.16b, v27.16b
    387  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
    388  1.1  christos 
    389  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
    390  1.1  christos 	aese	v7.16b, v27.16b
    391  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
    392  1.1  christos 	aese	v6.16b, v27.16b
    393  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
    394  1.1  christos 
    395  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k	- mid
    396  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
    397  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
    398  1.1  christos 
    399  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6 (t0, t1, and t2 free)
    400  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
    401  1.1  christos 
    402  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
    403  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
    404  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
    405  1.1  christos 
    406  1.1  christos 	aese	v5.16b, v28.16b
    407  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
    408  1.1  christos 	aese	v4.16b, v28.16b
    409  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
    410  1.1  christos 	aese	v2.16b, v28.16b
    411  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
    412  1.1  christos 
    413  1.1  christos 	aese	v1.16b, v28.16b
    414  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
    415  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
    416  1.1  christos 	aese	v6.16b, v28.16b
    417  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
    418  1.1  christos 
    419  1.1  christos 	aese	v0.16b, v28.16b
    420  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
    421  1.1  christos 	aese	v3.16b, v28.16b
    422  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
    423  1.1  christos 	aese	v7.16b, v28.16b
    424  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
    425  1.1  christos 
    426  1.1  christos 	aese	v6.16b, v26.16b
    427  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
    428  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
    429  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
    430  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4 (t0, t1, and t2 free)
    431  1.1  christos 
    432  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
    433  1.1  christos 	aese	v2.16b, v26.16b
    434  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
    435  1.1  christos 	aese	v1.16b, v26.16b
    436  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
    437  1.1  christos 
    438  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
    439  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
    440  1.1  christos 	ldr	q22, [x3, #64]				//load h1l | h1h
    441  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
    442  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
    443  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
    444  1.1  christos 
    445  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
    446  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
    447  1.1  christos 
    448  1.1  christos 	aese	v0.16b, v26.16b
    449  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
    450  1.1  christos 	aese	v3.16b, v26.16b
    451  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
    452  1.1  christos 
    453  1.1  christos 	aese	v7.16b, v26.16b
    454  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
    455  1.1  christos 	aese	v4.16b, v26.16b
    456  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
    457  1.1  christos 
    458  1.1  christos 	aese	v5.16b, v26.16b
    459  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
    460  1.1  christos 	aese	v0.16b, v27.16b
    461  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
    462  1.1  christos 
    463  1.1  christos 	aese	v7.16b, v27.16b
    464  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
    465  1.1  christos 	aese	v3.16b, v27.16b
    466  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
    467  1.1  christos 	aese	v4.16b, v27.16b
    468  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
    469  1.1  christos 
    470  1.1  christos 	aese	v5.16b, v27.16b
    471  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
    472  1.1  christos 	aese	v6.16b, v27.16b
    473  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
    474  1.1  christos 	aese	v1.16b, v27.16b
    475  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
    476  1.1  christos 
    477  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
    478  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
    479  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
    480  1.1  christos 
    481  1.1  christos 	aese	v2.16b, v27.16b
    482  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
    483  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
    484  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
    485  1.1  christos 
    486  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
    487  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
    488  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
    489  1.1  christos 
    490  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
    491  1.1  christos 	aese	v2.16b, v28.16b
    492  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
    493  1.1  christos 	aese	v5.16b, v28.16b
    494  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
    495  1.1  christos 
    496  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
    497  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
    498  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
    499  1.1  christos 
    500  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
    501  1.1  christos 	aese	v6.16b, v28.16b
    502  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
    503  1.1  christos 
    504  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
    505  1.1  christos 	aese	v7.16b, v28.16b
    506  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
    507  1.1  christos 	aese	v1.16b, v28.16b
    508  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
    509  1.1  christos 
    510  1.1  christos 	aese	v3.16b, v28.16b
    511  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
    512  1.1  christos 	aese	v4.16b, v28.16b
    513  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
    514  1.1  christos 	aese	v0.16b, v28.16b
    515  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
    516  1.1  christos 
    517  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
    518  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
    519  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
    520  1.1  christos 
    521  1.1  christos 	aese	v7.16b, v26.16b
    522  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
    523  1.1  christos 	aese	v5.16b, v26.16b
    524  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
    525  1.1  christos 
    526  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
    527  1.1  christos 	aese	v1.16b, v26.16b
    528  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
    529  1.1  christos 	aese	v2.16b, v26.16b
    530  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
    531  1.1  christos 
    532  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
    533  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
    534  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 8k+8, 8k+9 - load plaintext
    535  1.1  christos 
    536  1.1  christos 	aese	v3.16b, v26.16b
    537  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
    538  1.1  christos 	rev32	v20.16b, v30.16b					//CTR block 8k+16
    539  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+16
    540  1.1  christos 
    541  1.1  christos 	aese	v4.16b, v26.16b
    542  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
    543  1.1  christos 	aese	v0.16b, v26.16b
    544  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
    545  1.1  christos 	aese	v6.16b, v26.16b
    546  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
    547  1.1  christos 
    548  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
    549  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
    550  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
    551  1.1  christos 
    552  1.1  christos 	aese	v2.16b, v27.16b
    553  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
    554  1.1  christos 	aese	v7.16b, v27.16b
    555  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
    556  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 8k+10, 8k+11 - load plaintext
    557  1.1  christos 
    558  1.1  christos 	aese	v5.16b, v27.16b
    559  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
    560  1.1  christos 	aese	v6.16b, v27.16b
    561  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
    562  1.1  christos 	aese	v1.16b, v27.16b
    563  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
    564  1.1  christos 
    565  1.1  christos 	pmull	v21.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
    566  1.1  christos 	aese	v0.16b, v27.16b
    567  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
    568  1.1  christos 	aese	v4.16b, v27.16b
    569  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
    570  1.1  christos 
    571  1.1  christos 	rev32	v22.16b, v30.16b					//CTR block 8k+17
    572  1.1  christos 	aese	v3.16b, v27.16b
    573  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
    574  1.1  christos 
    575  1.1  christos 	aese	v5.16b, v28.16b
    576  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
    577  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 8k+12, 8k+13 - load plaintext
    578  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+17
    579  1.1  christos 
    580  1.1  christos 	aese	v2.16b, v28.16b
    581  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
    582  1.1  christos 	aese	v1.16b, v28.16b
    583  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
    584  1.1  christos 	aese	v7.16b, v28.16b
    585  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
    586  1.1  christos 
    587  1.1  christos 	aese	v4.16b, v28.16b
    588  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
    589  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
    590  1.1  christos 	ldr	q27, [x8, #160]					//load rk10
    591  1.1  christos 
    592  1.1  christos 	ext	v29.16b, v17.16b, v17.16b, #8				//MODULO - other top alignment
    593  1.1  christos 	rev32	v23.16b, v30.16b					//CTR block 8k+18
    594  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+18
    595  1.1  christos 	aese	v3.16b, v28.16b
    596  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
    597  1.1  christos 
    598  1.1  christos 	aese	v0.16b, v28.16b
    599  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
    600  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
    601  1.1  christos 	aese	v6.16b, v28.16b
    602  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
    603  1.1  christos 
    604  1.1  christos 	aese	v2.16b, v26.16b						//AES block 8k+10 - round 9
    605  1.1  christos 	aese	v4.16b, v26.16b						//AES block 8k+12 - round 9
    606  1.1  christos 	aese	v1.16b, v26.16b						//AES block 8k+9 - round 9
    607  1.1  christos 
    608  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 8k+14, 8k+15 - load plaintext
    609  1.1  christos 	rev32	v25.16b, v30.16b					//CTR block 8k+19
    610  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+19
    611  1.1  christos 
    612  1.1  christos 	cmp	x0, x5				//.LOOP CONTROL
    613  1.1  christos .inst	0xce046d8c	//eor3 v12.16b, v12.16b, v4.16b, v27.16b				//AES block 4 - result
    614  1.1  christos 	aese	v7.16b, v26.16b						//AES block 8k+15 - round 9
    615  1.1  christos 
    616  1.1  christos 	aese	v6.16b, v26.16b						//AES block 8k+14 - round 9
    617  1.1  christos 	aese	v3.16b, v26.16b						//AES block 8k+11 - round 9
    618  1.1  christos 
    619  1.1  christos .inst	0xce026d4a	//eor3 v10.16b, v10.16b, v2.16b, v27.16b				//AES block 8k+10 - result
    620  1.1  christos 
    621  1.1  christos 	mov	v2.16b, v23.16b					//CTR block 8k+18
    622  1.1  christos 	aese	v0.16b, v26.16b						//AES block 8k+8 - round 9
    623  1.1  christos 
    624  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 8k+20
    625  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+20
    626  1.1  christos 
    627  1.1  christos .inst	0xce076def	//eor3 v15.16b, v15.16b, v7.16b, v27.16b				//AES block 7 - result
    628  1.1  christos 	aese	v5.16b, v26.16b						//AES block 8k+13 - round 9
    629  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
    630  1.1  christos 
    631  1.1  christos .inst	0xce016d29	//eor3 v9.16b, v9.16b, v1.16b, v27.16b				//AES block 8k+9 - result
    632  1.1  christos .inst	0xce036d6b	//eor3 v11.16b, v11.16b, v3.16b, v27.16b				//AES block 8k+11 - result
    633  1.1  christos 	mov	v3.16b, v25.16b					//CTR block 8k+19
    634  1.1  christos 
    635  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8				//MODULO - other mid alignment
    636  1.1  christos .inst	0xce056dad	//eor3 v13.16b, v13.16b, v5.16b, v27.16b				//AES block 5 - result
    637  1.1  christos 	mov	v1.16b, v22.16b					//CTR block 8k+17
    638  1.1  christos 
    639  1.1  christos .inst	0xce006d08	//eor3 v8.16b, v8.16b, v0.16b, v27.16b				//AES block 8k+8 - result
    640  1.1  christos 	mov	v0.16b, v20.16b					//CTR block 8k+16
    641  1.1  christos 	stp	q8, q9, [x2], #32			//AES block 8k+8, 8k+9 - store result
    642  1.1  christos 
    643  1.1  christos 	stp	q10, q11, [x2], #32			//AES block 8k+10, 8k+11 - store result
    644  1.1  christos .inst	0xce066dce	//eor3 v14.16b, v14.16b, v6.16b, v27.16b				//AES block 6 - result
    645  1.1  christos 
    646  1.1  christos 	stp	q12, q13, [x2], #32			//AES block 8k+12, 8k+13 - store result
    647  1.1  christos .inst	0xce115673	//eor3 v19.16b, v19.16b, v17.16b, v21.16b		 	//MODULO - fold into low
    648  1.1  christos 
    649  1.1  christos 	stp	q14, q15, [x2], #32			//AES block 8k+14, 8k+15 - store result
    650  1.1  christos 	b.lt	.L128_enc_main_loop
    651  1.1  christos 
    652  1.1  christos .L128_enc_prepretail:	//PREPRETAIL
    653  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
    654  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
    655  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
    656  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
    657  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
    658  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
    659  1.1  christos 
    660  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
    661  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
    662  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
    663  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
    664  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
    665  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
    666  1.1  christos 
    667  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
    668  1.1  christos 	ldr	q24, [x3, #192]				//load h6k | h5k
    669  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
    670  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
    671  1.1  christos 
    672  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
    673  1.1  christos 	eor	v8.16b, v8.16b, v19.16b				 	//PRE 1
    674  1.1  christos 
    675  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
    676  1.1  christos 
    677  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
    678  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
    679  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
    680  1.1  christos 
    681  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5 (t0, t1, t2 and t3 free)
    682  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
    683  1.1  christos 
    684  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
    685  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
    686  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
    687  1.1  christos 
    688  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
    689  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
    690  1.1  christos 
    691  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
    692  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
    693  1.1  christos 
    694  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k	- mid
    695  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
    696  1.1  christos 
    697  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4 (t0, t1, and t2 free)
    698  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7 (t0, t1, t2 and t3 free)
    699  1.1  christos 
    700  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
    701  1.1  christos 
    702  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
    703  1.1  christos 
    704  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6 (t0, t1, and t2 free)
    705  1.1  christos 
    706  1.1  christos 	aese	v2.16b, v26.16b
    707  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
    708  1.1  christos 
    709  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
    710  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
    711  1.1  christos 
    712  1.1  christos 	aese	v6.16b, v26.16b
    713  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
    714  1.1  christos 	aese	v3.16b, v26.16b
    715  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
    716  1.1  christos 
    717  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
    718  1.1  christos 	aese	v1.16b, v26.16b
    719  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
    720  1.1  christos 
    721  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
    722  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
    723  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
    724  1.1  christos 
    725  1.1  christos 	aese	v5.16b, v26.16b
    726  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
    727  1.1  christos 	aese	v7.16b, v26.16b
    728  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
    729  1.1  christos 
    730  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
    731  1.1  christos 	aese	v4.16b, v26.16b
    732  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
    733  1.1  christos 	aese	v0.16b, v26.16b
    734  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
    735  1.1  christos 
    736  1.1  christos 	aese	v3.16b, v27.16b
    737  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
    738  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
    739  1.1  christos 
    740  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
    741  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
    742  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
    743  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
    744  1.1  christos 
    745  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
    746  1.1  christos 	aese	v5.16b, v27.16b
    747  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
    748  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
    749  1.1  christos 
    750  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
    751  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
    752  1.1  christos 
    753  1.1  christos 	aese	v1.16b, v27.16b
    754  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
    755  1.1  christos 	aese	v0.16b, v27.16b
    756  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
    757  1.1  christos 
    758  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
    759  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
    760  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
    761  1.1  christos 	aese	v2.16b, v27.16b
    762  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
    763  1.1  christos 
    764  1.1  christos 	aese	v4.16b, v27.16b
    765  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
    766  1.1  christos 	aese	v7.16b, v27.16b
    767  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
    768  1.1  christos 
    769  1.1  christos 	aese	v5.16b, v28.16b
    770  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
    771  1.1  christos 	aese	v2.16b, v28.16b
    772  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
    773  1.1  christos 	aese	v3.16b, v28.16b
    774  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
    775  1.1  christos 
    776  1.1  christos 	aese	v1.16b, v28.16b
    777  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
    778  1.1  christos 	aese	v6.16b, v27.16b
    779  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
    780  1.1  christos 	aese	v4.16b, v28.16b
    781  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
    782  1.1  christos 
    783  1.1  christos 	aese	v5.16b, v26.16b
    784  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
    785  1.1  christos 	aese	v0.16b, v28.16b
    786  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
    787  1.1  christos 
    788  1.1  christos 	aese	v6.16b, v28.16b
    789  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
    790  1.1  christos 	aese	v7.16b, v28.16b
    791  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
    792  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
    793  1.1  christos 
    794  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
    795  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
    796  1.1  christos 	ldr	q22, [x3, #64]				//load h1l | h1h
    797  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
    798  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
    799  1.1  christos 	aese	v0.16b, v26.16b
    800  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
    801  1.1  christos 
    802  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
    803  1.1  christos 	aese	v6.16b, v26.16b
    804  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
    805  1.1  christos 	aese	v3.16b, v26.16b
    806  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
    807  1.1  christos 
    808  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
    809  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
    810  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
    811  1.1  christos 
    812  1.1  christos 	aese	v2.16b, v26.16b
    813  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
    814  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
    815  1.1  christos 
    816  1.1  christos 	aese	v7.16b, v26.16b
    817  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
    818  1.1  christos 	aese	v1.16b, v26.16b
    819  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
    820  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
    821  1.1  christos 
    822  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
    823  1.1  christos 	aese	v4.16b, v26.16b
    824  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
    825  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
    826  1.1  christos 
    827  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
    828  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
    829  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
    830  1.1  christos 
    831  1.1  christos 	aese	v1.16b, v27.16b
    832  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
    833  1.1  christos 	aese	v3.16b, v27.16b
    834  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
    835  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
    836  1.1  christos 
    837  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
    838  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
    839  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
    840  1.1  christos 
    841  1.1  christos 	aese	v1.16b, v28.16b
    842  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
    843  1.1  christos 	aese	v6.16b, v27.16b
    844  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
    845  1.1  christos 	aese	v0.16b, v27.16b
    846  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
    847  1.1  christos 
    848  1.1  christos 	aese	v7.16b, v27.16b
    849  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
    850  1.1  christos 	aese	v2.16b, v27.16b
    851  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
    852  1.1  christos 
    853  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
    854  1.1  christos 	aese	v4.16b, v27.16b
    855  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
    856  1.1  christos 	aese	v5.16b, v27.16b
    857  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
    858  1.1  christos 
    859  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
    860  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
    861  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
    862  1.1  christos 
    863  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
    864  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
    865  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
    866  1.1  christos 
    867  1.1  christos 	aese	v0.16b, v28.16b
    868  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
    869  1.1  christos 	aese	v7.16b, v28.16b
    870  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
    871  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
    872  1.1  christos 
    873  1.1  christos 	aese	v2.16b, v28.16b
    874  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
    875  1.1  christos 	aese	v4.16b, v28.16b
    876  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
    877  1.1  christos 
    878  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
    879  1.1  christos 	aese	v5.16b, v28.16b
    880  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
    881  1.1  christos 	aese	v6.16b, v28.16b
    882  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
    883  1.1  christos 
    884  1.1  christos 	aese	v3.16b, v28.16b
    885  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
    886  1.1  christos 	aese	v4.16b, v26.16b
    887  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
    888  1.1  christos 
    889  1.1  christos 	aese	v5.16b, v26.16b
    890  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
    891  1.1  christos 	aese	v2.16b, v26.16b
    892  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
    893  1.1  christos 	aese	v0.16b, v26.16b
    894  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
    895  1.1  christos 
    896  1.1  christos 	aese	v3.16b, v26.16b
    897  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
    898  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
    899  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
    900  1.1  christos 
    901  1.1  christos 	aese	v6.16b, v26.16b
    902  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
    903  1.1  christos 	aese	v1.16b, v26.16b
    904  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
    905  1.1  christos 	aese	v7.16b, v26.16b
    906  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
    907  1.1  christos 
    908  1.1  christos 	pmull	v21.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
    909  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
    910  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
    911  1.1  christos 
    912  1.1  christos 	aese	v3.16b, v27.16b
    913  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
    914  1.1  christos 	aese	v6.16b, v27.16b
    915  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
    916  1.1  christos 	aese	v1.16b, v27.16b
    917  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
    918  1.1  christos 	ext	v29.16b, v17.16b, v17.16b, #8				//MODULO - other top alignment
    919  1.1  christos 
    920  1.1  christos 	aese	v5.16b, v27.16b
    921  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
    922  1.1  christos 	aese	v0.16b, v27.16b
    923  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
    924  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
    925  1.1  christos 
    926  1.1  christos 	aese	v2.16b, v27.16b
    927  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
    928  1.1  christos 	aese	v7.16b, v27.16b
    929  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
    930  1.1  christos 
    931  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
    932  1.1  christos 	aese	v4.16b, v27.16b
    933  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
    934  1.1  christos 
    935  1.1  christos 	aese	v7.16b, v28.16b
    936  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
    937  1.1  christos 	aese	v2.16b, v28.16b
    938  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
    939  1.1  christos 	aese	v1.16b, v28.16b
    940  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
    941  1.1  christos 	ext	v18.16b, v18.16b, v18.16b, #8				//MODULO - other mid alignment
    942  1.1  christos 
    943  1.1  christos 	aese	v6.16b, v28.16b
    944  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
    945  1.1  christos .inst	0xce114a73	//eor3 v19.16b, v19.16b, v17.16b, v18.16b		 	//MODULO - fold into low
    946  1.1  christos 	aese	v4.16b, v28.16b
    947  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
    948  1.1  christos 
    949  1.1  christos 	aese	v3.16b, v28.16b
    950  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
    951  1.1  christos 	aese	v0.16b, v28.16b
    952  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
    953  1.1  christos 	aese	v5.16b, v28.16b
    954  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
    955  1.1  christos 
    956  1.1  christos 	ldr	q27, [x8, #160]					//load rk10
    957  1.1  christos 	aese	v6.16b, v26.16b						//AES block 8k+14 - round 9
    958  1.1  christos 	aese	v2.16b, v26.16b						//AES block 8k+10 - round 9
    959  1.1  christos 
    960  1.1  christos 	aese	v0.16b, v26.16b						//AES block 8k+8 - round 9
    961  1.1  christos 	aese	v1.16b, v26.16b						//AES block 8k+9 - round 9
    962  1.1  christos 
    963  1.1  christos 	aese	v3.16b, v26.16b						//AES block 8k+11 - round 9
    964  1.1  christos 	aese	v5.16b, v26.16b						//AES block 8k+13 - round 9
    965  1.1  christos 
    966  1.1  christos 	aese	v4.16b, v26.16b						//AES block 8k+12 - round 9
    967  1.1  christos 	aese	v7.16b, v26.16b						//AES block 8k+15 - round 9
    968  1.1  christos .L128_enc_tail:	//TAIL
    969  1.1  christos 
    970  1.1  christos 	sub	x5, x4, x0 	//main_end_input_ptr is number of bytes left to process
    971  1.1  christos 	ldr	q8, [x0], #16				//AES block 8k+8 - load plaintext
    972  1.1  christos 
    973  1.1  christos 	mov	v29.16b, v27.16b
    974  1.1  christos 	ldp	q20, q21, [x3, #128]			//load h5l | h5h
    975  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
    976  1.1  christos 
    977  1.1  christos .inst	0xce007509	//eor3 v9.16b, v8.16b, v0.16b, v29.16b			//AES block 8k+8 - result
    978  1.1  christos 	ext	v16.16b, v19.16b, v19.16b, #8				//prepare final partial tag
    979  1.1  christos 	ldp	q22, q23, [x3, #160]			//load h6l | h6h
    980  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
    981  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
    982  1.1  christos 
    983  1.1  christos 	ldp	q24, q25, [x3, #192]			//load h8k | h7k
    984  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
    985  1.1  christos 	cmp	x5, #112
    986  1.1  christos 	b.gt	.L128_enc_blocks_more_than_7
    987  1.1  christos 
    988  1.1  christos 	mov	v7.16b, v6.16b
    989  1.1  christos 	mov	v6.16b, v5.16b
    990  1.1  christos 	movi	v17.8b, #0
    991  1.1  christos 
    992  1.1  christos 	cmp	x5, #96
    993  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
    994  1.1  christos 	mov	v5.16b, v4.16b
    995  1.1  christos 
    996  1.1  christos 	mov	v4.16b, v3.16b
    997  1.1  christos 	mov	v3.16b, v2.16b
    998  1.1  christos 	mov	v2.16b, v1.16b
    999  1.1  christos 
   1000  1.1  christos 	movi	v19.8b, #0
   1001  1.1  christos 	movi	v18.8b, #0
   1002  1.1  christos 	b.gt	.L128_enc_blocks_more_than_6
   1003  1.1  christos 
   1004  1.1  christos 	mov	v7.16b, v6.16b
   1005  1.1  christos 	cmp	x5, #80
   1006  1.1  christos 
   1007  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   1008  1.1  christos 	mov	v6.16b, v5.16b
   1009  1.1  christos 	mov	v5.16b, v4.16b
   1010  1.1  christos 
   1011  1.1  christos 	mov	v4.16b, v3.16b
   1012  1.1  christos 	mov	v3.16b, v1.16b
   1013  1.1  christos 	b.gt	.L128_enc_blocks_more_than_5
   1014  1.1  christos 
   1015  1.1  christos 	cmp	x5, #64
   1016  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   1017  1.1  christos 
   1018  1.1  christos 	mov	v7.16b, v6.16b
   1019  1.1  christos 	mov	v6.16b, v5.16b
   1020  1.1  christos 
   1021  1.1  christos 	mov	v5.16b, v4.16b
   1022  1.1  christos 	mov	v4.16b, v1.16b
   1023  1.1  christos 	b.gt	.L128_enc_blocks_more_than_4
   1024  1.1  christos 
   1025  1.1  christos 	mov	v7.16b, v6.16b
   1026  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   1027  1.1  christos 	mov	v6.16b, v5.16b
   1028  1.1  christos 
   1029  1.1  christos 	mov	v5.16b, v1.16b
   1030  1.1  christos 	cmp	x5, #48
   1031  1.1  christos 	b.gt	.L128_enc_blocks_more_than_3
   1032  1.1  christos 
   1033  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   1034  1.1  christos 	mov	v7.16b, v6.16b
   1035  1.1  christos 	mov	v6.16b, v1.16b
   1036  1.1  christos 
   1037  1.1  christos 	cmp	x5, #32
   1038  1.1  christos 	ldr	q24, [x3, #96]					//load h4k | h3k
   1039  1.1  christos 	b.gt	.L128_enc_blocks_more_than_2
   1040  1.1  christos 
   1041  1.1  christos 	cmp	x5, #16
   1042  1.1  christos 
   1043  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   1044  1.1  christos 	mov	v7.16b, v1.16b
   1045  1.1  christos 	b.gt	.L128_enc_blocks_more_than_1
   1046  1.1  christos 
   1047  1.1  christos 	ldr	q21, [x3, #48]					//load h2k | h1k
   1048  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   1049  1.1  christos 	b	.L128_enc_blocks_less_than_1
   1050  1.1  christos .L128_enc_blocks_more_than_7:	//blocks	left >  7
   1051  1.1  christos 	st1	{ v9.16b}, [x2], #16				//AES final-7 block  - store result
   1052  1.1  christos 
   1053  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-7 block
   1054  1.1  christos 	ldr	q9, [x0], #16				//AES final-6 block - load plaintext
   1055  1.1  christos 
   1056  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   1057  1.1  christos 
   1058  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-7 block - mid
   1059  1.1  christos 
   1060  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH final-7 block - high
   1061  1.1  christos 
   1062  1.1  christos 	ins	v18.d[0], v24.d[1]					//GHASH final-7 block - mid
   1063  1.1  christos 
   1064  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-7 block - mid
   1065  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   1066  1.1  christos 
   1067  1.1  christos .inst	0xce017529	//eor3 v9.16b, v9.16b, v1.16b, v29.16b			//AES final-6 block - result
   1068  1.1  christos 
   1069  1.1  christos 	pmull	v18.1q, v27.1d, v18.1d				//GHASH final-7 block - mid
   1070  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH final-7 block - low
   1071  1.1  christos .L128_enc_blocks_more_than_6:	//blocks	left >  6
   1072  1.1  christos 
   1073  1.1  christos 	st1	{ v9.16b}, [x2], #16				//AES final-6 block - store result
   1074  1.1  christos 
   1075  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-6 block
   1076  1.1  christos 	ldr	q9, [x0], #16				//AES final-5 block - load plaintext
   1077  1.1  christos 
   1078  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   1079  1.1  christos 
   1080  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-6 block - mid
   1081  1.1  christos 
   1082  1.1  christos .inst	0xce027529	//eor3 v9.16b, v9.16b, v2.16b, v29.16b			//AES final-5 block - result
   1083  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-6 block - low
   1084  1.1  christos 
   1085  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-6 block - mid
   1086  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   1087  1.1  christos 
   1088  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-6 block - mid
   1089  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-6 block - high
   1090  1.1  christos 
   1091  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-6 block - low
   1092  1.1  christos 
   1093  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-6 block - mid
   1094  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-6 block - high
   1095  1.1  christos .L128_enc_blocks_more_than_5:	//blocks	left >  5
   1096  1.1  christos 
   1097  1.1  christos 	st1	{ v9.16b}, [x2], #16				//AES final-5 block - store result
   1098  1.1  christos 
   1099  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-5 block
   1100  1.1  christos 
   1101  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   1102  1.1  christos 
   1103  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-5 block - mid
   1104  1.1  christos 	ldr	q9, [x0], #16				//AES final-4 block - load plaintext
   1105  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-5 block - high
   1106  1.1  christos 
   1107  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-5 block - high
   1108  1.1  christos 
   1109  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-5 block - mid
   1110  1.1  christos 
   1111  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-5 block - mid
   1112  1.1  christos 
   1113  1.1  christos .inst	0xce037529	//eor3 v9.16b, v9.16b, v3.16b, v29.16b			//AES final-4 block - result
   1114  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-5 block - low
   1115  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   1116  1.1  christos 
   1117  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-5 block - mid
   1118  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-5 block - low
   1119  1.1  christos 
   1120  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-5 block - mid
   1121  1.1  christos .L128_enc_blocks_more_than_4:	//blocks	left >  4
   1122  1.1  christos 
   1123  1.1  christos 	st1	{ v9.16b}, [x2], #16			  	//AES final-4 block - store result
   1124  1.1  christos 
   1125  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-4 block
   1126  1.1  christos 
   1127  1.1  christos 	ldr	q9, [x0], #16				//AES final-3 block - load plaintext
   1128  1.1  christos 
   1129  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   1130  1.1  christos 
   1131  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-4 block - mid
   1132  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   1133  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final-4 block - high
   1134  1.1  christos 
   1135  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-4 block - mid
   1136  1.1  christos 
   1137  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final-4 block - low
   1138  1.1  christos 
   1139  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-4 block - high
   1140  1.1  christos 	pmull	v27.1q, v27.1d, v21.1d				//GHASH final-4 block - mid
   1141  1.1  christos 
   1142  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-4 block - low
   1143  1.1  christos 
   1144  1.1  christos .inst	0xce047529	//eor3 v9.16b, v9.16b, v4.16b, v29.16b			//AES final-3 block - result
   1145  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-4 block - mid
   1146  1.1  christos .L128_enc_blocks_more_than_3:	//blocks	left >  3
   1147  1.1  christos 
   1148  1.1  christos 	st1	{ v9.16b}, [x2], #16			  	//AES final-3 block - store result
   1149  1.1  christos 
   1150  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   1151  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   1152  1.1  christos 
   1153  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-3 block
   1154  1.1  christos 
   1155  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   1156  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   1157  1.1  christos 
   1158  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-3 block - mid
   1159  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   1160  1.1  christos 	pmull	v26.1q, v8.1d, v25.1d				//GHASH final-3 block - low
   1161  1.1  christos 
   1162  1.1  christos 	ldr	q9, [x0], #16				//AES final-2 block - load plaintext
   1163  1.1  christos 
   1164  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-3 block - mid
   1165  1.1  christos 
   1166  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-3 block - mid
   1167  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-3 block - low
   1168  1.1  christos 
   1169  1.1  christos .inst	0xce057529	//eor3 v9.16b, v9.16b, v5.16b, v29.16b			//AES final-2 block - result
   1170  1.1  christos 
   1171  1.1  christos 	pmull2	v27.1q, v27.2d, v24.2d				//GHASH final-3 block - mid
   1172  1.1  christos 	pmull2	v28.1q, v8.2d, v25.2d				//GHASH final-3 block - high
   1173  1.1  christos 
   1174  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-3 block - mid
   1175  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-3 block - high
   1176  1.1  christos .L128_enc_blocks_more_than_2:	//blocks	left >  2
   1177  1.1  christos 
   1178  1.1  christos 	st1	{ v9.16b}, [x2], #16			  	//AES final-2 block - store result
   1179  1.1  christos 
   1180  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-2 block
   1181  1.1  christos 
   1182  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   1183  1.1  christos 
   1184  1.1  christos 	ldr	q9, [x0], #16				//AES final-1 block - load plaintext
   1185  1.1  christos 
   1186  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-2 block - mid
   1187  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   1188  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   1189  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   1190  1.1  christos 
   1191  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-2 block - mid
   1192  1.1  christos .inst	0xce067529	//eor3 v9.16b, v9.16b, v6.16b, v29.16b			//AES final-1 block - result
   1193  1.1  christos 
   1194  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-2 block - high
   1195  1.1  christos 
   1196  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-2 block - low
   1197  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-2 block - mid
   1198  1.1  christos 
   1199  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-2 block - high
   1200  1.1  christos 
   1201  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-2 block - mid
   1202  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-2 block - low
   1203  1.1  christos .L128_enc_blocks_more_than_1:	//blocks	left >  1
   1204  1.1  christos 
   1205  1.1  christos 	st1	{ v9.16b}, [x2], #16			  	//AES final-1 block - store result
   1206  1.1  christos 
   1207  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   1208  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   1209  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-1 block
   1210  1.1  christos 	ldr	q9, [x0], #16				//AES final block - load plaintext
   1211  1.1  christos 
   1212  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   1213  1.1  christos 
   1214  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   1215  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-1 block - mid
   1216  1.1  christos .inst	0xce077529	//eor3 v9.16b, v9.16b, v7.16b, v29.16b			//AES final block - result
   1217  1.1  christos 
   1218  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-1 block - high
   1219  1.1  christos 
   1220  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-1 block - mid
   1221  1.1  christos 
   1222  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   1223  1.1  christos 
   1224  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-1 block - mid
   1225  1.1  christos 
   1226  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-1 block - low
   1227  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-1 block - mid
   1228  1.1  christos 
   1229  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-1 block - high
   1230  1.1  christos 
   1231  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-1 block - mid
   1232  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-1 block - low
   1233  1.1  christos .L128_enc_blocks_less_than_1:	//blocks	left <= 1
   1234  1.1  christos 
   1235  1.1  christos 	rev32	v30.16b, v30.16b
   1236  1.1  christos 	str	q30, [x16]					//store the updated counter
   1237  1.1  christos 	and	x1, x1, #127			 	//bit_length %= 128
   1238  1.1  christos 
   1239  1.1  christos 	sub	x1, x1, #128			 	//bit_length -= 128
   1240  1.1  christos 
   1241  1.1  christos 	neg	x1, x1				//bit_length = 128 - #bits in input (in range [1,128])
   1242  1.1  christos 
   1243  1.1  christos 	mvn	x6, xzr						//temp0_x = 0xffffffffffffffff
   1244  1.1  christos 	ld1	{ v26.16b}, [x2]					//load existing bytes where the possibly partial last block is to be stored
   1245  1.1  christos 	and	x1, x1, #127			 	//bit_length %= 128
   1246  1.1  christos 
   1247  1.1  christos 	lsr	x6, x6, x1				//temp0_x is mask for top 64b of last block
   1248  1.1  christos 	mvn	x7, xzr						//temp1_x = 0xffffffffffffffff
   1249  1.1  christos 	cmp	x1, #64
   1250  1.1  christos 
   1251  1.1  christos 	csel	x13, x7, x6, lt
   1252  1.1  christos 	csel	x14, x6, xzr, lt
   1253  1.1  christos 
   1254  1.1  christos 	mov	v0.d[1], x14
   1255  1.1  christos 	mov	v0.d[0], x13					//ctr0b is mask for last block
   1256  1.1  christos 
   1257  1.1  christos 	and	v9.16b, v9.16b, v0.16b					//possibly partial last block has zeroes in highest bits
   1258  1.1  christos 
   1259  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final block
   1260  1.1  christos 
   1261  1.1  christos 	bif	v9.16b, v26.16b, v0.16b					//insert existing bytes in top end of result before storing
   1262  1.1  christos 	st1	{ v9.16b}, [x2]				//store all 16B
   1263  1.1  christos 
   1264  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   1265  1.1  christos 
   1266  1.1  christos 	ins	v16.d[0], v8.d[1]					//GHASH final block - mid
   1267  1.1  christos 
   1268  1.1  christos 	eor	v16.8b, v16.8b, v8.8b				//GHASH final block - mid
   1269  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   1270  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   1271  1.1  christos 
   1272  1.1  christos 	pmull	v16.1q, v16.1d, v21.1d				//GHASH final block - mid
   1273  1.1  christos 
   1274  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final block - high
   1275  1.1  christos 	eor	v18.16b, v18.16b, v16.16b				//GHASH final block - mid
   1276  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   1277  1.1  christos 
   1278  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final block - low
   1279  1.1  christos 
   1280  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final block - high
   1281  1.1  christos 
   1282  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final block - low
   1283  1.1  christos 
   1284  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8			 	//MODULO - other top alignment
   1285  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d		  	//MODULO - top 64b align with mid
   1286  1.1  christos 
   1287  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		  	//MODULO - karatsuba tidy up
   1288  1.1  christos 
   1289  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b		 	//MODULO - fold into mid
   1290  1.1  christos 
   1291  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   1292  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8			  	//MODULO - other mid alignment
   1293  1.1  christos 
   1294  1.1  christos .inst	0xce115673	//eor3 v19.16b, v19.16b, v17.16b, v21.16b		  	//MODULO - fold into low
   1295  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   1296  1.1  christos 	rev64	v19.16b, v19.16b
   1297  1.1  christos 	st1	{ v19.16b }, [x3]
   1298  1.1  christos 	mov	x0, x9
   1299  1.1  christos 
   1300  1.1  christos 	ldp	d10, d11, [sp, #16]
   1301  1.1  christos 	ldp	d12, d13, [sp, #32]
   1302  1.1  christos 	ldp	d14, d15, [sp, #48]
   1303  1.1  christos 	ldp	d8, d9, [sp], #80
   1304  1.1  christos 	ret
   1305  1.1  christos 
   1306  1.1  christos .L128_enc_ret:
   1307  1.1  christos 	mov	w0, #0x0
   1308  1.1  christos 	ret
   1309  1.1  christos .size	unroll8_eor3_aes_gcm_enc_128_kernel,.-unroll8_eor3_aes_gcm_enc_128_kernel
   1310  1.1  christos .globl	unroll8_eor3_aes_gcm_dec_128_kernel
   1311  1.1  christos .type	unroll8_eor3_aes_gcm_dec_128_kernel,%function
   1312  1.1  christos .align	4
   1313  1.1  christos unroll8_eor3_aes_gcm_dec_128_kernel:
   1314  1.1  christos 	AARCH64_VALID_CALL_TARGET
   1315  1.1  christos 	cbz	x1, .L128_dec_ret
   1316  1.1  christos 	stp	d8, d9, [sp, #-80]!
   1317  1.1  christos 	lsr	x9, x1, #3
   1318  1.1  christos 	mov	x16, x4
   1319  1.1  christos 	mov	x8, x5
   1320  1.1  christos 	stp	d10, d11, [sp, #16]
   1321  1.1  christos 	stp	d12, d13, [sp, #32]
   1322  1.1  christos 	stp	d14, d15, [sp, #48]
   1323  1.1  christos 	mov	x5, #0xc200000000000000
   1324  1.1  christos 	stp	x5, xzr, [sp, #64]
   1325  1.1  christos 	add	x10, sp, #64
   1326  1.1  christos 
   1327  1.1  christos 	mov	x5, x9
   1328  1.1  christos 	ld1	{ v0.16b}, [x16]					//CTR block 0
   1329  1.1  christos 
   1330  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
   1331  1.1  christos 	sub	x5, x5, #1		//byte_len - 1
   1332  1.1  christos 
   1333  1.1  christos 	mov	x15, #0x100000000				//set up counter increment
   1334  1.1  christos 	movi	v31.16b, #0x0
   1335  1.1  christos 	mov	v31.d[1], x15
   1336  1.1  christos 	ld1	{ v19.16b}, [x3]
   1337  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   1338  1.1  christos 	rev64	v19.16b, v19.16b
   1339  1.1  christos 
   1340  1.1  christos 	rev32	v30.16b, v0.16b				//set up reversed counter
   1341  1.1  christos 
   1342  1.1  christos 	aese	v0.16b, v26.16b
   1343  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 0
   1344  1.1  christos 
   1345  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 0
   1346  1.1  christos 
   1347  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 1
   1348  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 1
   1349  1.1  christos 
   1350  1.1  christos 	and	x5, x5, #0xffffffffffffff80	//number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
   1351  1.1  christos 
   1352  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 2
   1353  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 2
   1354  1.1  christos 	aese	v1.16b, v26.16b
   1355  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 0
   1356  1.1  christos 
   1357  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 3
   1358  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 3
   1359  1.1  christos 
   1360  1.1  christos 	aese	v0.16b, v27.16b
   1361  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 1
   1362  1.1  christos 	aese	v1.16b, v27.16b
   1363  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 1
   1364  1.1  christos 
   1365  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 4
   1366  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 4
   1367  1.1  christos 
   1368  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 5
   1369  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 5
   1370  1.1  christos 
   1371  1.1  christos 	aese	v2.16b, v26.16b
   1372  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 0
   1373  1.1  christos 
   1374  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 6
   1375  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 6
   1376  1.1  christos 	aese	v5.16b, v26.16b
   1377  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 0
   1378  1.1  christos 
   1379  1.1  christos 	aese	v3.16b, v26.16b
   1380  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 0
   1381  1.1  christos 	aese	v4.16b, v26.16b
   1382  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 0
   1383  1.1  christos 
   1384  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 7
   1385  1.1  christos 
   1386  1.1  christos 	aese	v6.16b, v26.16b
   1387  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 0
   1388  1.1  christos 	aese	v2.16b, v27.16b
   1389  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 1
   1390  1.1  christos 
   1391  1.1  christos 	aese	v7.16b, v26.16b
   1392  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 0
   1393  1.1  christos 
   1394  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   1395  1.1  christos 
   1396  1.1  christos 	aese	v6.16b, v27.16b
   1397  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 1
   1398  1.1  christos 	aese	v5.16b, v27.16b
   1399  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 1
   1400  1.1  christos 
   1401  1.1  christos 	aese	v4.16b, v27.16b
   1402  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 1
   1403  1.1  christos 	aese	v7.16b, v27.16b
   1404  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 1
   1405  1.1  christos 
   1406  1.1  christos 	aese	v7.16b, v28.16b
   1407  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 2
   1408  1.1  christos 	aese	v0.16b, v28.16b
   1409  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 2
   1410  1.1  christos 	aese	v3.16b, v27.16b
   1411  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 1
   1412  1.1  christos 
   1413  1.1  christos 	aese	v6.16b, v28.16b
   1414  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 2
   1415  1.1  christos 	aese	v2.16b, v28.16b
   1416  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 2
   1417  1.1  christos 	aese	v5.16b, v28.16b
   1418  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 2
   1419  1.1  christos 
   1420  1.1  christos 	aese	v4.16b, v28.16b
   1421  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 2
   1422  1.1  christos 	aese	v3.16b, v28.16b
   1423  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 2
   1424  1.1  christos 	aese	v1.16b, v28.16b
   1425  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 2
   1426  1.1  christos 
   1427  1.1  christos 	aese	v6.16b, v26.16b
   1428  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 3
   1429  1.1  christos 	aese	v2.16b, v26.16b
   1430  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 3
   1431  1.1  christos 
   1432  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   1433  1.1  christos 	aese	v5.16b, v26.16b
   1434  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 3
   1435  1.1  christos 
   1436  1.1  christos 	aese	v0.16b, v26.16b
   1437  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 3
   1438  1.1  christos 	aese	v7.16b, v26.16b
   1439  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 3
   1440  1.1  christos 
   1441  1.1  christos 	aese	v3.16b, v26.16b
   1442  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 3
   1443  1.1  christos 	aese	v1.16b, v26.16b
   1444  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 3
   1445  1.1  christos 
   1446  1.1  christos 	aese	v0.16b, v27.16b
   1447  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 4
   1448  1.1  christos 	aese	v7.16b, v27.16b
   1449  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 4
   1450  1.1  christos 	aese	v4.16b, v26.16b
   1451  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 3
   1452  1.1  christos 
   1453  1.1  christos 	aese	v6.16b, v27.16b
   1454  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 4
   1455  1.1  christos 	aese	v1.16b, v27.16b
   1456  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 4
   1457  1.1  christos 	aese	v3.16b, v27.16b
   1458  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 4
   1459  1.1  christos 
   1460  1.1  christos 	aese	v5.16b, v27.16b
   1461  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 4
   1462  1.1  christos 	aese	v4.16b, v27.16b
   1463  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 4
   1464  1.1  christos 	aese	v2.16b, v27.16b
   1465  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 4
   1466  1.1  christos 
   1467  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   1468  1.1  christos 	aese	v2.16b, v28.16b
   1469  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 5
   1470  1.1  christos 	aese	v3.16b, v28.16b
   1471  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 5
   1472  1.1  christos 
   1473  1.1  christos 	aese	v6.16b, v28.16b
   1474  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 5
   1475  1.1  christos 	aese	v1.16b, v28.16b
   1476  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 5
   1477  1.1  christos 
   1478  1.1  christos 	aese	v7.16b, v28.16b
   1479  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 5
   1480  1.1  christos 	aese	v5.16b, v28.16b
   1481  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 5
   1482  1.1  christos 
   1483  1.1  christos 	aese	v4.16b, v28.16b
   1484  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 5
   1485  1.1  christos 
   1486  1.1  christos 	aese	v3.16b, v26.16b
   1487  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 6
   1488  1.1  christos 	aese	v2.16b, v26.16b
   1489  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 6
   1490  1.1  christos 	aese	v0.16b, v28.16b
   1491  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 5
   1492  1.1  christos 
   1493  1.1  christos 	aese	v5.16b, v26.16b
   1494  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 6
   1495  1.1  christos 	aese	v4.16b, v26.16b
   1496  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 6
   1497  1.1  christos 	aese	v1.16b, v26.16b
   1498  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 6
   1499  1.1  christos 
   1500  1.1  christos 	aese	v0.16b, v26.16b
   1501  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 6
   1502  1.1  christos 	aese	v7.16b, v26.16b
   1503  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 6
   1504  1.1  christos 	aese	v6.16b, v26.16b
   1505  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 6
   1506  1.1  christos 
   1507  1.1  christos 	aese	v3.16b, v27.16b
   1508  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 7
   1509  1.1  christos 	aese	v4.16b, v27.16b
   1510  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 7
   1511  1.1  christos 	aese	v1.16b, v27.16b
   1512  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 7
   1513  1.1  christos 
   1514  1.1  christos 	aese	v7.16b, v27.16b
   1515  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 7
   1516  1.1  christos 	aese	v5.16b, v27.16b
   1517  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 7
   1518  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   1519  1.1  christos 
   1520  1.1  christos 	aese	v6.16b, v27.16b
   1521  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 7
   1522  1.1  christos 	aese	v2.16b, v27.16b
   1523  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 7
   1524  1.1  christos 	aese	v0.16b, v27.16b
   1525  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 7
   1526  1.1  christos 
   1527  1.1  christos 	add	x5, x5, x0
   1528  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 7
   1529  1.1  christos 
   1530  1.1  christos 	aese	v6.16b, v28.16b
   1531  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 8
   1532  1.1  christos 	aese	v0.16b, v28.16b
   1533  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 8
   1534  1.1  christos 
   1535  1.1  christos 	aese	v1.16b, v28.16b
   1536  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 8
   1537  1.1  christos 	aese	v7.16b, v28.16b
   1538  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 8
   1539  1.1  christos 	aese	v3.16b, v28.16b
   1540  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 8
   1541  1.1  christos 
   1542  1.1  christos 	aese	v5.16b, v28.16b
   1543  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 8
   1544  1.1  christos 	aese	v2.16b, v28.16b
   1545  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 8
   1546  1.1  christos 	aese	v4.16b, v28.16b
   1547  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 8
   1548  1.1  christos 
   1549  1.1  christos 	aese	v0.16b, v26.16b						//AES block 0 - round 9
   1550  1.1  christos 	aese	v1.16b, v26.16b						//AES block 1 - round 9
   1551  1.1  christos 	aese	v6.16b, v26.16b						//AES block 6 - round 9
   1552  1.1  christos 
   1553  1.1  christos 	ldr	q27, [x8, #160]					//load rk10
   1554  1.1  christos 	aese	v4.16b, v26.16b						//AES block 4 - round 9
   1555  1.1  christos 	aese	v3.16b, v26.16b						//AES block 3 - round 9
   1556  1.1  christos 
   1557  1.1  christos 	aese	v2.16b, v26.16b						//AES block 2 - round 9
   1558  1.1  christos 	aese	v5.16b, v26.16b						//AES block 5 - round 9
   1559  1.1  christos 	aese	v7.16b, v26.16b						//AES block 7 - round 9
   1560  1.1  christos 
   1561  1.1  christos 	add	x4, x0, x1, lsr #3		//end_input_ptr
   1562  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
   1563  1.1  christos 	b.ge	.L128_dec_tail						//handle tail
   1564  1.1  christos 
   1565  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 0, 1 - load ciphertext
   1566  1.1  christos 
   1567  1.1  christos .inst	0xce006d00	//eor3 v0.16b, v8.16b, v0.16b, v27.16b				//AES block 0 - result
   1568  1.1  christos .inst	0xce016d21	//eor3 v1.16b, v9.16b, v1.16b, v27.16b				//AES block 1 - result
   1569  1.1  christos 	stp	q0, q1, [x2], #32			//AES block 0, 1 - store result
   1570  1.1  christos 
   1571  1.1  christos 	rev32	v0.16b, v30.16b				//CTR block 8
   1572  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8
   1573  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 2, 3 - load ciphertext
   1574  1.1  christos 
   1575  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 4, 5 - load ciphertext
   1576  1.1  christos 
   1577  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 9
   1578  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 9
   1579  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 6, 7 - load ciphertext
   1580  1.1  christos 
   1581  1.1  christos .inst	0xce036d63	//eor3 v3.16b, v11.16b, v3.16b, v27.16b				//AES block 3 - result
   1582  1.1  christos .inst	0xce026d42	//eor3 v2.16b, v10.16b, v2.16b, v27.16b				//AES block 2 - result
   1583  1.1  christos 	stp	q2, q3, [x2], #32			//AES block 2, 3 - store result
   1584  1.1  christos 
   1585  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 10
   1586  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 10
   1587  1.1  christos 
   1588  1.1  christos .inst	0xce066dc6	//eor3 v6.16b, v14.16b, v6.16b, v27.16b				//AES block 6 - result
   1589  1.1  christos 
   1590  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 11
   1591  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 11
   1592  1.1  christos 
   1593  1.1  christos .inst	0xce046d84	//eor3 v4.16b, v12.16b, v4.16b, v27.16b				//AES block 4 - result
   1594  1.1  christos .inst	0xce056da5	//eor3 v5.16b, v13.16b, v5.16b, v27.16b				//AES block 5 - result
   1595  1.1  christos 	stp	q4, q5, [x2], #32			//AES block 4, 5 - store result
   1596  1.1  christos 
   1597  1.1  christos .inst	0xce076de7	//eor3 v7.16b, v15.16b, v7.16b, v27.16b				//AES block 7 - result
   1598  1.1  christos 	stp	q6, q7, [x2], #32			//AES block 6, 7 - store result
   1599  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 12
   1600  1.1  christos 
   1601  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
   1602  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 12
   1603  1.1  christos 	b.ge	.L128_dec_prepretail					//do prepretail
   1604  1.1  christos 
   1605  1.1  christos .L128_dec_main_loop:	//main	loop start
   1606  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
   1607  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   1608  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
   1609  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   1610  1.1  christos 
   1611  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
   1612  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
   1613  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
   1614  1.1  christos 
   1615  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6
   1616  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
   1617  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   1618  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
   1619  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   1620  1.1  christos 
   1621  1.1  christos 	eor	v8.16b, v8.16b, v19.16b				 	//PRE 1
   1622  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
   1623  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
   1624  1.1  christos 
   1625  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
   1626  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4
   1627  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
   1628  1.1  christos 
   1629  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
   1630  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
   1631  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
   1632  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
   1633  1.1  christos 
   1634  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
   1635  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
   1636  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
   1637  1.1  christos 
   1638  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
   1639  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   1640  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5
   1641  1.1  christos 
   1642  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
   1643  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
   1644  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   1645  1.1  christos 
   1646  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
   1647  1.1  christos 	aese	v4.16b, v26.16b
   1648  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
   1649  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
   1650  1.1  christos 
   1651  1.1  christos 	aese	v6.16b, v26.16b
   1652  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
   1653  1.1  christos 	aese	v5.16b, v26.16b
   1654  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
   1655  1.1  christos 	aese	v7.16b, v26.16b
   1656  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
   1657  1.1  christos 
   1658  1.1  christos 	aese	v3.16b, v26.16b
   1659  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
   1660  1.1  christos 	aese	v2.16b, v26.16b
   1661  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
   1662  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
   1663  1.1  christos 
   1664  1.1  christos 	aese	v1.16b, v26.16b
   1665  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
   1666  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
   1667  1.1  christos 	aese	v0.16b, v26.16b
   1668  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
   1669  1.1  christos 
   1670  1.1  christos 	aese	v2.16b, v27.16b
   1671  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
   1672  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
   1673  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
   1674  1.1  christos 
   1675  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   1676  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   1677  1.1  christos 	aese	v7.16b, v27.16b
   1678  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
   1679  1.1  christos 
   1680  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
   1681  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   1682  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k	- mid
   1683  1.1  christos 
   1684  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   1685  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   1686  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   1687  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   1688  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
   1689  1.1  christos 	aese	v6.16b, v27.16b
   1690  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
   1691  1.1  christos 
   1692  1.1  christos 	aese	v4.16b, v27.16b
   1693  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
   1694  1.1  christos 	aese	v5.16b, v27.16b
   1695  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
   1696  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
   1697  1.1  christos 
   1698  1.1  christos 	aese	v3.16b, v27.16b
   1699  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
   1700  1.1  christos 	aese	v0.16b, v27.16b
   1701  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
   1702  1.1  christos 	aese	v1.16b, v27.16b
   1703  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
   1704  1.1  christos 
   1705  1.1  christos 	aese	v7.16b, v28.16b
   1706  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
   1707  1.1  christos 	aese	v2.16b, v28.16b
   1708  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
   1709  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
   1710  1.1  christos 
   1711  1.1  christos 	aese	v4.16b, v28.16b
   1712  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
   1713  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
   1714  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   1715  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   1716  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   1717  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   1718  1.1  christos 
   1719  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
   1720  1.1  christos 	aese	v1.16b, v28.16b
   1721  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
   1722  1.1  christos 	aese	v3.16b, v28.16b
   1723  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
   1724  1.1  christos 
   1725  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   1726  1.1  christos 	aese	v5.16b, v28.16b
   1727  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
   1728  1.1  christos 	aese	v0.16b, v28.16b
   1729  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
   1730  1.1  christos 
   1731  1.1  christos 	aese	v6.16b, v28.16b
   1732  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
   1733  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
   1734  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
   1735  1.1  christos 
   1736  1.1  christos 	aese	v7.16b, v26.16b
   1737  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
   1738  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7
   1739  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
   1740  1.1  christos 
   1741  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   1742  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
   1743  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
   1744  1.1  christos 
   1745  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   1746  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   1747  1.1  christos 	aese	v2.16b, v26.16b
   1748  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
   1749  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   1750  1.1  christos 
   1751  1.1  christos 	aese	v4.16b, v26.16b
   1752  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
   1753  1.1  christos 	aese	v3.16b, v26.16b
   1754  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
   1755  1.1  christos 	aese	v1.16b, v26.16b
   1756  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
   1757  1.1  christos 
   1758  1.1  christos 	aese	v0.16b, v26.16b
   1759  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
   1760  1.1  christos 	aese	v6.16b, v26.16b
   1761  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
   1762  1.1  christos 	aese	v5.16b, v26.16b
   1763  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
   1764  1.1  christos 
   1765  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
   1766  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
   1767  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
   1768  1.1  christos 
   1769  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
   1770  1.1  christos 	aese	v0.16b, v27.16b
   1771  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
   1772  1.1  christos 	aese	v7.16b, v27.16b
   1773  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
   1774  1.1  christos 
   1775  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
   1776  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   1777  1.1  christos 	aese	v3.16b, v27.16b
   1778  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
   1779  1.1  christos 
   1780  1.1  christos 	aese	v1.16b, v27.16b
   1781  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
   1782  1.1  christos 	aese	v5.16b, v27.16b
   1783  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
   1784  1.1  christos 	aese	v6.16b, v27.16b
   1785  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
   1786  1.1  christos 
   1787  1.1  christos 	aese	v2.16b, v27.16b
   1788  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
   1789  1.1  christos 	aese	v4.16b, v27.16b
   1790  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
   1791  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   1792  1.1  christos 
   1793  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   1794  1.1  christos 	aese	v0.16b, v28.16b
   1795  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
   1796  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
   1797  1.1  christos 
   1798  1.1  christos 	aese	v2.16b, v28.16b
   1799  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
   1800  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
   1801  1.1  christos 	aese	v1.16b, v28.16b
   1802  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
   1803  1.1  christos 
   1804  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
   1805  1.1  christos 	aese	v6.16b, v28.16b
   1806  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
   1807  1.1  christos 	aese	v7.16b, v28.16b
   1808  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
   1809  1.1  christos 
   1810  1.1  christos 	aese	v3.16b, v28.16b
   1811  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
   1812  1.1  christos 	aese	v5.16b, v28.16b
   1813  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
   1814  1.1  christos 	aese	v4.16b, v28.16b
   1815  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
   1816  1.1  christos 
   1817  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
   1818  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b 			//GHASH block 8k+4, 8k+5 - mid
   1819  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
   1820  1.1  christos 
   1821  1.1  christos 	aese	v3.16b, v26.16b
   1822  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
   1823  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
   1824  1.1  christos 	aese	v7.16b, v26.16b
   1825  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
   1826  1.1  christos 
   1827  1.1  christos 	aese	v1.16b, v26.16b
   1828  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
   1829  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
   1830  1.1  christos 	aese	v6.16b, v26.16b
   1831  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
   1832  1.1  christos 
   1833  1.1  christos 	aese	v2.16b, v26.16b
   1834  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
   1835  1.1  christos 	aese	v5.16b, v26.16b
   1836  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
   1837  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
   1838  1.1  christos 
   1839  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
   1840  1.1  christos 	aese	v0.16b, v26.16b
   1841  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
   1842  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
   1843  1.1  christos 
   1844  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
   1845  1.1  christos 	aese	v4.16b, v26.16b
   1846  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
   1847  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   1848  1.1  christos 
   1849  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   1850  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
   1851  1.1  christos 	aese	v5.16b, v27.16b
   1852  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
   1853  1.1  christos 
   1854  1.1  christos 	rev32	v20.16b, v30.16b					//CTR block 8k+16
   1855  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
   1856  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+16
   1857  1.1  christos 
   1858  1.1  christos 	aese	v6.16b, v27.16b
   1859  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
   1860  1.1  christos 	aese	v3.16b, v27.16b
   1861  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
   1862  1.1  christos 	aese	v7.16b, v27.16b
   1863  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
   1864  1.1  christos 
   1865  1.1  christos 	aese	v2.16b, v27.16b
   1866  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
   1867  1.1  christos 	aese	v1.16b, v27.16b
   1868  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
   1869  1.1  christos 	rev32	v22.16b, v30.16b					//CTR block 8k+17
   1870  1.1  christos 
   1871  1.1  christos 	aese	v4.16b, v27.16b
   1872  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
   1873  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8			 	//MODULO - other top alignment
   1874  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   1875  1.1  christos 
   1876  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   1877  1.1  christos 	aese	v0.16b, v27.16b
   1878  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
   1879  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+17
   1880  1.1  christos 
   1881  1.1  christos 	aese	v5.16b, v28.16b
   1882  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
   1883  1.1  christos 	aese	v1.16b, v28.16b
   1884  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
   1885  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 8k+8, 8k+9 - load ciphertext
   1886  1.1  christos 
   1887  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 8k+10, 8k+11 - load ciphertext
   1888  1.1  christos 	aese	v0.16b, v28.16b
   1889  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
   1890  1.1  christos 	rev32	v23.16b, v30.16b					//CTR block 8k+18
   1891  1.1  christos 
   1892  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 8k+12, 8k+13 - load ciphertext
   1893  1.1  christos 	aese	v4.16b, v28.16b
   1894  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
   1895  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   1896  1.1  christos 
   1897  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 8k+14, 8k+15 - load ciphertext
   1898  1.1  christos 	aese	v3.16b, v28.16b
   1899  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
   1900  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+18
   1901  1.1  christos 
   1902  1.1  christos 	aese	v7.16b, v28.16b
   1903  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
   1904  1.1  christos 	aese	v2.16b, v28.16b
   1905  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
   1906  1.1  christos 	aese	v6.16b, v28.16b
   1907  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
   1908  1.1  christos 
   1909  1.1  christos 	aese	v0.16b, v26.16b						//AES block 8k+8 - round 9
   1910  1.1  christos 	aese	v1.16b, v26.16b						//AES block 8k+9 - round 9
   1911  1.1  christos 	ldr	q27, [x8, #160]					//load rk10
   1912  1.1  christos 
   1913  1.1  christos 	aese	v6.16b, v26.16b						//AES block 8k+14 - round 9
   1914  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   1915  1.1  christos 	aese	v2.16b, v26.16b						//AES block 8k+10 - round 9
   1916  1.1  christos 
   1917  1.1  christos 	aese	v7.16b, v26.16b						//AES block 8k+15 - round 9
   1918  1.1  christos 	aese	v4.16b, v26.16b						//AES block 8k+12 - round 9
   1919  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8			 	//MODULO - other mid alignment
   1920  1.1  christos 
   1921  1.1  christos 	rev32	v25.16b, v30.16b					//CTR block 8k+19
   1922  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+19
   1923  1.1  christos 
   1924  1.1  christos 	aese	v3.16b, v26.16b						//AES block 8k+11 - round 9
   1925  1.1  christos 	aese	v5.16b, v26.16b						//AES block 8k+13 - round 9
   1926  1.1  christos .inst	0xce016d21	//eor3 v1.16b, v9.16b, v1.16b, v27.16b				//AES block 8k+9 - result
   1927  1.1  christos 
   1928  1.1  christos .inst	0xce006d00	//eor3 v0.16b, v8.16b, v0.16b, v27.16b				//AES block 8k+8 - result
   1929  1.1  christos .inst	0xce076de7	//eor3 v7.16b, v15.16b, v7.16b, v27.16b				//AES block 8k+15 - result
   1930  1.1  christos .inst	0xce066dc6	//eor3 v6.16b, v14.16b, v6.16b, v27.16b				//AES block 8k+14 - result
   1931  1.1  christos 
   1932  1.1  christos .inst	0xce026d42	//eor3 v2.16b, v10.16b, v2.16b, v27.16b				//AES block 8k+10 - result
   1933  1.1  christos 	stp	q0, q1, [x2], #32			//AES block 8k+8, 8k+9 - store result
   1934  1.1  christos 	mov	v1.16b, v22.16b					//CTR block 8k+17
   1935  1.1  christos 
   1936  1.1  christos .inst	0xce046d84	//eor3 v4.16b, v12.16b, v4.16b, v27.16b				//AES block 8k+12 - result
   1937  1.1  christos .inst	0xce115673	//eor3 v19.16b, v19.16b, v17.16b, v21.16b		 	//MODULO - fold into low
   1938  1.1  christos 	mov	v0.16b, v20.16b					//CTR block 8k+16
   1939  1.1  christos 
   1940  1.1  christos .inst	0xce036d63	//eor3 v3.16b, v11.16b, v3.16b, v27.16b				//AES block 8k+11 - result
   1941  1.1  christos 	cmp	x0, x5				//.LOOP CONTROL
   1942  1.1  christos 	stp	q2, q3, [x2], #32			//AES block 8k+10, 8k+11 - store result
   1943  1.1  christos 
   1944  1.1  christos .inst	0xce056da5	//eor3 v5.16b, v13.16b, v5.16b, v27.16b				//AES block 8k+13 - result
   1945  1.1  christos 	mov	v2.16b, v23.16b					//CTR block 8k+18
   1946  1.1  christos 
   1947  1.1  christos 	stp	q4, q5, [x2], #32			//AES block 8k+12, 8k+13 - store result
   1948  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 8k+20
   1949  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+20
   1950  1.1  christos 
   1951  1.1  christos 	stp	q6, q7, [x2], #32			//AES block 8k+14, 8k+15 - store result
   1952  1.1  christos 	mov	v3.16b, v25.16b					//CTR block 8k+19
   1953  1.1  christos 	b.lt	.L128_dec_main_loop
   1954  1.1  christos 
   1955  1.1  christos .L128_dec_prepretail:	//PREPRETAIL
   1956  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
   1957  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
   1958  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
   1959  1.1  christos 
   1960  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
   1961  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
   1962  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
   1963  1.1  christos 
   1964  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
   1965  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   1966  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
   1967  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   1968  1.1  christos 	eor	v8.16b, v8.16b, v19.16b				 	//PRE 1
   1969  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
   1970  1.1  christos 
   1971  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
   1972  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
   1973  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   1974  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
   1975  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   1976  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5
   1977  1.1  christos 
   1978  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4
   1979  1.1  christos 
   1980  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6
   1981  1.1  christos 
   1982  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
   1983  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
   1984  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
   1985  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
   1986  1.1  christos 
   1987  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
   1988  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
   1989  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
   1990  1.1  christos 
   1991  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   1992  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   1993  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
   1994  1.1  christos 
   1995  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
   1996  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
   1997  1.1  christos 	aese	v0.16b, v26.16b
   1998  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
   1999  1.1  christos 
   2000  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
   2001  1.1  christos 	aese	v4.16b, v26.16b
   2002  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
   2003  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
   2004  1.1  christos 
   2005  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
   2006  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
   2007  1.1  christos 	aese	v3.16b, v26.16b
   2008  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
   2009  1.1  christos 
   2010  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
   2011  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   2012  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   2013  1.1  christos 
   2014  1.1  christos 	aese	v2.16b, v26.16b
   2015  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
   2016  1.1  christos 	aese	v1.16b, v26.16b
   2017  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
   2018  1.1  christos 	aese	v5.16b, v26.16b
   2019  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
   2020  1.1  christos 
   2021  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k - mid
   2022  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
   2023  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
   2024  1.1  christos 
   2025  1.1  christos 	aese	v2.16b, v27.16b
   2026  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
   2027  1.1  christos 	aese	v7.16b, v26.16b
   2028  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
   2029  1.1  christos 	aese	v6.16b, v26.16b
   2030  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
   2031  1.1  christos 
   2032  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
   2033  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
   2034  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
   2035  1.1  christos 
   2036  1.1  christos 	aese	v6.16b, v27.16b
   2037  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
   2038  1.1  christos 	aese	v4.16b, v27.16b
   2039  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
   2040  1.1  christos 	aese	v5.16b, v27.16b
   2041  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
   2042  1.1  christos 
   2043  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   2044  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
   2045  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
   2046  1.1  christos 
   2047  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   2048  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   2049  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   2050  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   2051  1.1  christos 	aese	v1.16b, v27.16b
   2052  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
   2053  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
   2054  1.1  christos 
   2055  1.1  christos 	aese	v3.16b, v27.16b
   2056  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
   2057  1.1  christos 	aese	v7.16b, v27.16b
   2058  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
   2059  1.1  christos 	aese	v0.16b, v27.16b
   2060  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
   2061  1.1  christos 
   2062  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   2063  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   2064  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   2065  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   2066  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
   2067  1.1  christos 
   2068  1.1  christos 	aese	v0.16b, v28.16b
   2069  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
   2070  1.1  christos 	aese	v6.16b, v28.16b
   2071  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
   2072  1.1  christos 	aese	v2.16b, v28.16b
   2073  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
   2074  1.1  christos 
   2075  1.1  christos 	aese	v4.16b, v28.16b
   2076  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
   2077  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   2078  1.1  christos 	aese	v7.16b, v28.16b
   2079  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
   2080  1.1  christos 
   2081  1.1  christos 	aese	v1.16b, v28.16b
   2082  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
   2083  1.1  christos 	aese	v5.16b, v28.16b
   2084  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
   2085  1.1  christos 	aese	v3.16b, v28.16b
   2086  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
   2087  1.1  christos 
   2088  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
   2089  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
   2090  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   2091  1.1  christos 
   2092  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   2093  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7
   2094  1.1  christos 	aese	v6.16b, v26.16b
   2095  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
   2096  1.1  christos 
   2097  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   2098  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   2099  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
   2100  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
   2101  1.1  christos 
   2102  1.1  christos 	aese	v2.16b, v26.16b
   2103  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
   2104  1.1  christos 	aese	v0.16b, v26.16b
   2105  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
   2106  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   2107  1.1  christos 
   2108  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
   2109  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
   2110  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   2111  1.1  christos 
   2112  1.1  christos 	aese	v4.16b, v26.16b
   2113  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
   2114  1.1  christos 	aese	v3.16b, v26.16b
   2115  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
   2116  1.1  christos 	aese	v7.16b, v26.16b
   2117  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
   2118  1.1  christos 
   2119  1.1  christos 	aese	v1.16b, v26.16b
   2120  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
   2121  1.1  christos 	aese	v5.16b, v26.16b
   2122  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
   2123  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
   2124  1.1  christos 
   2125  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
   2126  1.1  christos 	aese	v0.16b, v27.16b
   2127  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
   2128  1.1  christos 	aese	v2.16b, v27.16b
   2129  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
   2130  1.1  christos 
   2131  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
   2132  1.1  christos 	aese	v5.16b, v27.16b
   2133  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
   2134  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
   2135  1.1  christos 
   2136  1.1  christos 	aese	v1.16b, v27.16b
   2137  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
   2138  1.1  christos 	aese	v6.16b, v27.16b
   2139  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
   2140  1.1  christos 	aese	v4.16b, v27.16b
   2141  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
   2142  1.1  christos 
   2143  1.1  christos 	aese	v7.16b, v27.16b
   2144  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
   2145  1.1  christos 	aese	v3.16b, v27.16b
   2146  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
   2147  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
   2148  1.1  christos 
   2149  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
   2150  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
   2151  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
   2152  1.1  christos 
   2153  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   2154  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
   2155  1.1  christos 	aese	v6.16b, v28.16b
   2156  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
   2157  1.1  christos 
   2158  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   2159  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
   2160  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
   2161  1.1  christos 
   2162  1.1  christos 	aese	v0.16b, v28.16b
   2163  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
   2164  1.1  christos 	aese	v2.16b, v28.16b
   2165  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
   2166  1.1  christos 	aese	v4.16b, v28.16b
   2167  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
   2168  1.1  christos 
   2169  1.1  christos 	aese	v3.16b, v28.16b
   2170  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
   2171  1.1  christos 	aese	v1.16b, v28.16b
   2172  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
   2173  1.1  christos 	aese	v5.16b, v28.16b
   2174  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
   2175  1.1  christos 
   2176  1.1  christos 	aese	v7.16b, v28.16b
   2177  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
   2178  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
   2179  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
   2180  1.1  christos 
   2181  1.1  christos 	aese	v4.16b, v26.16b
   2182  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
   2183  1.1  christos 	aese	v1.16b, v26.16b
   2184  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
   2185  1.1  christos 	aese	v2.16b, v26.16b
   2186  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
   2187  1.1  christos 
   2188  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
   2189  1.1  christos 	aese	v5.16b, v26.16b
   2190  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
   2191  1.1  christos 	aese	v0.16b, v26.16b
   2192  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
   2193  1.1  christos 
   2194  1.1  christos 	aese	v3.16b, v26.16b
   2195  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
   2196  1.1  christos 	aese	v6.16b, v26.16b
   2197  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
   2198  1.1  christos 	aese	v7.16b, v26.16b
   2199  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
   2200  1.1  christos 
   2201  1.1  christos 	aese	v4.16b, v27.16b
   2202  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
   2203  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   2204  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   2205  1.1  christos 
   2206  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   2207  1.1  christos 	aese	v3.16b, v27.16b
   2208  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
   2209  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8			 	//MODULO - other top alignment
   2210  1.1  christos 
   2211  1.1  christos 	aese	v5.16b, v27.16b
   2212  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
   2213  1.1  christos 	aese	v6.16b, v27.16b
   2214  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
   2215  1.1  christos 	aese	v0.16b, v27.16b
   2216  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
   2217  1.1  christos 
   2218  1.1  christos 	aese	v7.16b, v27.16b
   2219  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
   2220  1.1  christos 	aese	v1.16b, v27.16b
   2221  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
   2222  1.1  christos 	aese	v2.16b, v27.16b
   2223  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
   2224  1.1  christos 
   2225  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   2226  1.1  christos 	ldr	q27, [x8, #160]					//load rk10
   2227  1.1  christos 
   2228  1.1  christos 	aese	v3.16b, v28.16b
   2229  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
   2230  1.1  christos 	aese	v0.16b, v28.16b
   2231  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
   2232  1.1  christos 
   2233  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   2234  1.1  christos 	aese	v6.16b, v28.16b
   2235  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
   2236  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8			 	//MODULO - other mid alignment
   2237  1.1  christos 
   2238  1.1  christos 	aese	v2.16b, v28.16b
   2239  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
   2240  1.1  christos 	aese	v1.16b, v28.16b
   2241  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
   2242  1.1  christos 	aese	v7.16b, v28.16b
   2243  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
   2244  1.1  christos 
   2245  1.1  christos 	aese	v6.16b, v26.16b						//AES block 8k+14 - round 9
   2246  1.1  christos 	aese	v5.16b, v28.16b
   2247  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
   2248  1.1  christos 	aese	v4.16b, v28.16b
   2249  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
   2250  1.1  christos 
   2251  1.1  christos .inst	0xce115673	//eor3 v19.16b, v19.16b, v17.16b, v21.16b		 	//MODULO - fold into low
   2252  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
   2253  1.1  christos 	aese	v2.16b, v26.16b						//AES block 8k+10 - round 9
   2254  1.1  christos 
   2255  1.1  christos 	aese	v3.16b, v26.16b						//AES block 8k+11 - round 9
   2256  1.1  christos 	aese	v5.16b, v26.16b						//AES block 8k+13 - round 9
   2257  1.1  christos 	aese	v0.16b, v26.16b						//AES block 8k+8 - round 9
   2258  1.1  christos 
   2259  1.1  christos 	aese	v4.16b, v26.16b						//AES block 8k+12 - round 9
   2260  1.1  christos 	aese	v1.16b, v26.16b						//AES block 8k+9 - round 9
   2261  1.1  christos 	aese	v7.16b, v26.16b						//AES block 8k+15 - round 9
   2262  1.1  christos 
   2263  1.1  christos .L128_dec_tail:	//TAIL
   2264  1.1  christos 
   2265  1.1  christos 	mov	v29.16b, v27.16b
   2266  1.1  christos 	sub	x5, x4, x0 	//main_end_input_ptr is number of bytes left to process
   2267  1.1  christos 
   2268  1.1  christos 	cmp	x5, #112
   2269  1.1  christos 
   2270  1.1  christos 	ldp	q24, q25, [x3, #192]			//load h8k | h7k
   2271  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   2272  1.1  christos 	ldr	q9, [x0], #16				//AES block 8k+8 - load ciphertext
   2273  1.1  christos 
   2274  1.1  christos 	ldp	q20, q21, [x3, #128]			//load h5l | h5h
   2275  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   2276  1.1  christos 	ext	v16.16b, v19.16b, v19.16b, #8				//prepare final partial tag
   2277  1.1  christos 
   2278  1.1  christos 	ldp	q22, q23, [x3, #160]			//load h6l | h6h
   2279  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   2280  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   2281  1.1  christos 
   2282  1.1  christos .inst	0xce00752c	//eor3 v12.16b, v9.16b, v0.16b, v29.16b				//AES block 8k+8 - result
   2283  1.1  christos 	b.gt	.L128_dec_blocks_more_than_7
   2284  1.1  christos 
   2285  1.1  christos 	cmp	x5, #96
   2286  1.1  christos 	mov	v7.16b, v6.16b
   2287  1.1  christos 	movi	v19.8b, #0
   2288  1.1  christos 
   2289  1.1  christos 	movi	v17.8b, #0
   2290  1.1  christos 	mov	v6.16b, v5.16b
   2291  1.1  christos 	mov	v5.16b, v4.16b
   2292  1.1  christos 
   2293  1.1  christos 	mov	v4.16b, v3.16b
   2294  1.1  christos 	mov	v3.16b, v2.16b
   2295  1.1  christos 	mov	v2.16b, v1.16b
   2296  1.1  christos 
   2297  1.1  christos 	movi	v18.8b, #0
   2298  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   2299  1.1  christos 	b.gt	.L128_dec_blocks_more_than_6
   2300  1.1  christos 
   2301  1.1  christos 	cmp	x5, #80
   2302  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   2303  1.1  christos 
   2304  1.1  christos 	mov	v7.16b, v6.16b
   2305  1.1  christos 	mov	v6.16b, v5.16b
   2306  1.1  christos 	mov	v5.16b, v4.16b
   2307  1.1  christos 
   2308  1.1  christos 	mov	v4.16b, v3.16b
   2309  1.1  christos 	mov	v3.16b, v1.16b
   2310  1.1  christos 	b.gt	.L128_dec_blocks_more_than_5
   2311  1.1  christos 
   2312  1.1  christos 	cmp	x5, #64
   2313  1.1  christos 
   2314  1.1  christos 	mov	v7.16b, v6.16b
   2315  1.1  christos 	mov	v6.16b, v5.16b
   2316  1.1  christos 	mov	v5.16b, v4.16b
   2317  1.1  christos 
   2318  1.1  christos 	mov	v4.16b, v1.16b
   2319  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   2320  1.1  christos 	b.gt	.L128_dec_blocks_more_than_4
   2321  1.1  christos 
   2322  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   2323  1.1  christos 	mov	v7.16b, v6.16b
   2324  1.1  christos 	mov	v6.16b, v5.16b
   2325  1.1  christos 
   2326  1.1  christos 	mov	v5.16b, v1.16b
   2327  1.1  christos 	cmp	x5, #48
   2328  1.1  christos 	b.gt	.L128_dec_blocks_more_than_3
   2329  1.1  christos 
   2330  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   2331  1.1  christos 	mov	v7.16b, v6.16b
   2332  1.1  christos 	cmp	x5, #32
   2333  1.1  christos 
   2334  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   2335  1.1  christos 	mov	v6.16b, v1.16b
   2336  1.1  christos 	b.gt	.L128_dec_blocks_more_than_2
   2337  1.1  christos 
   2338  1.1  christos 	cmp	x5, #16
   2339  1.1  christos 
   2340  1.1  christos 	mov	v7.16b, v1.16b
   2341  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   2342  1.1  christos 	b.gt	.L128_dec_blocks_more_than_1
   2343  1.1  christos 
   2344  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   2345  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   2346  1.1  christos 	b	.L128_dec_blocks_less_than_1
   2347  1.1  christos .L128_dec_blocks_more_than_7:	//blocks	left >  7
   2348  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-7 block
   2349  1.1  christos 
   2350  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   2351  1.1  christos 
   2352  1.1  christos 	ins	v18.d[0], v24.d[1]					//GHASH final-7 block - mid
   2353  1.1  christos 
   2354  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH final-7 block - low
   2355  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-7 block - mid
   2356  1.1  christos 
   2357  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   2358  1.1  christos 	ldr	q9, [x0], #16				//AES final-6 block - load ciphertext
   2359  1.1  christos 
   2360  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-7 block - mid
   2361  1.1  christos 
   2362  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH final-7 block - high
   2363  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-7 block  - store result
   2364  1.1  christos .inst	0xce01752c	//eor3 v12.16b, v9.16b, v1.16b, v29.16b				//AES final-6 block - result
   2365  1.1  christos 
   2366  1.1  christos 	pmull	v18.1q, v27.1d, v18.1d			 	//GHASH final-7 block - mid
   2367  1.1  christos .L128_dec_blocks_more_than_6:	//blocks	left >  6
   2368  1.1  christos 
   2369  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-6 block
   2370  1.1  christos 
   2371  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   2372  1.1  christos 
   2373  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-6 block - mid
   2374  1.1  christos 
   2375  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-6 block - mid
   2376  1.1  christos 
   2377  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-6 block - low
   2378  1.1  christos 	ldr	q9, [x0], #16				//AES final-5 block - load ciphertext
   2379  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   2380  1.1  christos 
   2381  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-6 block - mid
   2382  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-6 block - store result
   2383  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-6 block - high
   2384  1.1  christos 
   2385  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-6 block - low
   2386  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-6 block - high
   2387  1.1  christos 
   2388  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-6 block - mid
   2389  1.1  christos .inst	0xce02752c	//eor3 v12.16b, v9.16b, v2.16b, v29.16b				//AES final-5 block - result
   2390  1.1  christos .L128_dec_blocks_more_than_5:	//blocks	left >  5
   2391  1.1  christos 
   2392  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-5 block
   2393  1.1  christos 
   2394  1.1  christos 	ldr	q9, [x0], #16				//AES final-4 block - load ciphertext
   2395  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-5 block - store result
   2396  1.1  christos 
   2397  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   2398  1.1  christos 
   2399  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-5 block - mid
   2400  1.1  christos 
   2401  1.1  christos .inst	0xce03752c	//eor3 v12.16b, v9.16b, v3.16b, v29.16b				//AES final-4 block - result
   2402  1.1  christos 
   2403  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-5 block - mid
   2404  1.1  christos 
   2405  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-5 block - mid
   2406  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-5 block - low
   2407  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   2408  1.1  christos 
   2409  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-5 block - mid
   2410  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-5 block - high
   2411  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-5 block - low
   2412  1.1  christos 
   2413  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-5 block - mid
   2414  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-5 block - high
   2415  1.1  christos .L128_dec_blocks_more_than_4:	//blocks	left >  4
   2416  1.1  christos 
   2417  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-4 block
   2418  1.1  christos 
   2419  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   2420  1.1  christos 	ldr	q9, [x0], #16				//AES final-3 block - load ciphertext
   2421  1.1  christos 
   2422  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-4 block - mid
   2423  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   2424  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final-4 block - high
   2425  1.1  christos 
   2426  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final-4 block - low
   2427  1.1  christos 
   2428  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-4 block - high
   2429  1.1  christos 
   2430  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-4 block - store result
   2431  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-4 block - mid
   2432  1.1  christos 
   2433  1.1  christos .inst	0xce04752c	//eor3 v12.16b, v9.16b, v4.16b, v29.16b				//AES final-3 block - result
   2434  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-4 block - low
   2435  1.1  christos 
   2436  1.1  christos 	pmull	v27.1q, v27.1d, v21.1d				//GHASH final-4 block - mid
   2437  1.1  christos 
   2438  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-4 block - mid
   2439  1.1  christos .L128_dec_blocks_more_than_3:	//blocks	left >  3
   2440  1.1  christos 
   2441  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-3 block - store result
   2442  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-3 block
   2443  1.1  christos 
   2444  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   2445  1.1  christos 
   2446  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-3 block - mid
   2447  1.1  christos 
   2448  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   2449  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   2450  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   2451  1.1  christos 
   2452  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-3 block - mid
   2453  1.1  christos 
   2454  1.1  christos 	ldr	q9, [x0], #16				//AES final-2 block - load ciphertext
   2455  1.1  christos 
   2456  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-3 block - mid
   2457  1.1  christos 	pmull	v26.1q, v8.1d, v25.1d				//GHASH final-3 block - low
   2458  1.1  christos 	pmull2	v28.1q, v8.2d, v25.2d				//GHASH final-3 block - high
   2459  1.1  christos 
   2460  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   2461  1.1  christos .inst	0xce05752c	//eor3 v12.16b, v9.16b, v5.16b, v29.16b				//AES final-2 block - result
   2462  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-3 block - low
   2463  1.1  christos 
   2464  1.1  christos 	pmull2	v27.1q, v27.2d, v24.2d				//GHASH final-3 block - mid
   2465  1.1  christos 
   2466  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-3 block - high
   2467  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-3 block - mid
   2468  1.1  christos .L128_dec_blocks_more_than_2:	//blocks	left >  2
   2469  1.1  christos 
   2470  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-2 block
   2471  1.1  christos 
   2472  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-2 block - store result
   2473  1.1  christos 
   2474  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   2475  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   2476  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   2477  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   2478  1.1  christos 
   2479  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-2 block - mid
   2480  1.1  christos 
   2481  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-2 block - mid
   2482  1.1  christos 
   2483  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-2 block - low
   2484  1.1  christos 
   2485  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-2 block - high
   2486  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-2 block - mid
   2487  1.1  christos 	ldr	q9, [x0], #16				//AES final-1 block - load ciphertext
   2488  1.1  christos 
   2489  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-2 block - mid
   2490  1.1  christos 
   2491  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-2 block - low
   2492  1.1  christos 
   2493  1.1  christos .inst	0xce06752c	//eor3 v12.16b, v9.16b, v6.16b, v29.16b				//AES final-1 block - result
   2494  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-2 block - high
   2495  1.1  christos .L128_dec_blocks_more_than_1:	//blocks	left >  1
   2496  1.1  christos 
   2497  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-1 block - store result
   2498  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-1 block
   2499  1.1  christos 
   2500  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   2501  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   2502  1.1  christos 
   2503  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   2504  1.1  christos 
   2505  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   2506  1.1  christos 
   2507  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-1 block - mid
   2508  1.1  christos 
   2509  1.1  christos 	ldr	q9, [x0], #16				//AES final block - load ciphertext
   2510  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-1 block - high
   2511  1.1  christos 
   2512  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-1 block - mid
   2513  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-1 block - high
   2514  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   2515  1.1  christos 
   2516  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-1 block - mid
   2517  1.1  christos .inst	0xce07752c	//eor3 v12.16b, v9.16b, v7.16b, v29.16b				//AES final block - result
   2518  1.1  christos 
   2519  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-1 block - low
   2520  1.1  christos 
   2521  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-1 block - mid
   2522  1.1  christos 
   2523  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-1 block - low
   2524  1.1  christos 
   2525  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-1 block - mid
   2526  1.1  christos .L128_dec_blocks_less_than_1:	//blocks	left <= 1
   2527  1.1  christos 
   2528  1.1  christos 	and	x1, x1, #127				//bit_length %= 128
   2529  1.1  christos 
   2530  1.1  christos 	sub	x1, x1, #128				//bit_length -= 128
   2531  1.1  christos 
   2532  1.1  christos 	neg	x1, x1				//bit_length = 128 - #bits in input (in range [1,128])
   2533  1.1  christos 
   2534  1.1  christos 	mvn	x6, xzr						//temp0_x = 0xffffffffffffffff
   2535  1.1  christos 	and	x1, x1, #127				//bit_length %= 128
   2536  1.1  christos 
   2537  1.1  christos 	lsr	x6, x6, x1				//temp0_x is mask for top 64b of last block
   2538  1.1  christos 	cmp	x1, #64
   2539  1.1  christos 	mvn	x7, xzr						//temp1_x = 0xffffffffffffffff
   2540  1.1  christos 
   2541  1.1  christos 	csel	x13, x7, x6, lt
   2542  1.1  christos 	csel	x14, x6, xzr, lt
   2543  1.1  christos 
   2544  1.1  christos 	mov	v0.d[1], x14
   2545  1.1  christos 	mov	v0.d[0], x13					//ctr0b is mask for last block
   2546  1.1  christos 
   2547  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   2548  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   2549  1.1  christos 	ld1	{ v26.16b}, [x2]					//load existing bytes where the possibly partial last block is to be stored
   2550  1.1  christos 
   2551  1.1  christos 	and	v9.16b, v9.16b, v0.16b					//possibly partial last block has zeroes in highest bits
   2552  1.1  christos 
   2553  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final block
   2554  1.1  christos 
   2555  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   2556  1.1  christos 
   2557  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final block - high
   2558  1.1  christos 	ins	v16.d[0], v8.d[1]					//GHASH final block - mid
   2559  1.1  christos 
   2560  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final block - high
   2561  1.1  christos 	eor	v16.8b, v16.8b, v8.8b				//GHASH final block - mid
   2562  1.1  christos 
   2563  1.1  christos 	bif	v12.16b, v26.16b, v0.16b					//insert existing bytes in top end of result before storing
   2564  1.1  christos 
   2565  1.1  christos 	pmull	v16.1q, v16.1d, v21.1d				//GHASH final block - mid
   2566  1.1  christos 	st1	{ v12.16b}, [x2]				//store all 16B
   2567  1.1  christos 
   2568  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final block - low
   2569  1.1  christos 
   2570  1.1  christos 	eor	v18.16b, v18.16b, v16.16b				//GHASH final block - mid
   2571  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   2572  1.1  christos 
   2573  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final block - low
   2574  1.1  christos 
   2575  1.1  christos 	eor	v14.16b, v17.16b, v19.16b				//MODULO - karatsuba tidy up
   2576  1.1  christos 
   2577  1.1  christos 	pmull	v21.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   2578  1.1  christos 	ext	v17.16b, v17.16b, v17.16b, #8				//MODULO - other top alignment
   2579  1.1  christos 
   2580  1.1  christos 	eor	v18.16b, v18.16b, v14.16b				//MODULO - karatsuba tidy up
   2581  1.1  christos 
   2582  1.1  christos .inst	0xce115652	//eor3 v18.16b, v18.16b, v17.16b, v21.16b			//MODULO - fold into mid
   2583  1.1  christos 
   2584  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   2585  1.1  christos 	ext	v18.16b, v18.16b, v18.16b, #8				//MODULO - other mid alignment
   2586  1.1  christos 
   2587  1.1  christos .inst	0xce124673	//eor3 v19.16b, v19.16b, v18.16b, v17.16b			//MODULO - fold into low
   2588  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   2589  1.1  christos 	rev64	v19.16b, v19.16b
   2590  1.1  christos 	st1	{ v19.16b }, [x3]
   2591  1.1  christos 	rev32	v30.16b, v30.16b
   2592  1.1  christos 
   2593  1.1  christos 	str	q30, [x16]					//store the updated counter
   2594  1.1  christos 
   2595  1.1  christos 	mov	x0, x9
   2596  1.1  christos 
   2597  1.1  christos 	ldp	d10, d11, [sp, #16]
   2598  1.1  christos 	ldp	d12, d13, [sp, #32]
   2599  1.1  christos 	ldp	d14, d15, [sp, #48]
   2600  1.1  christos 	ldp	d8, d9, [sp], #80
   2601  1.1  christos 	ret
   2602  1.1  christos .L128_dec_ret:
   2603  1.1  christos 	mov	w0, #0x0
   2604  1.1  christos 	ret
   2605  1.1  christos .size	unroll8_eor3_aes_gcm_dec_128_kernel,.-unroll8_eor3_aes_gcm_dec_128_kernel
   2606  1.1  christos .globl	unroll8_eor3_aes_gcm_enc_192_kernel
   2607  1.1  christos .type	unroll8_eor3_aes_gcm_enc_192_kernel,%function
   2608  1.1  christos .align	4
   2609  1.1  christos unroll8_eor3_aes_gcm_enc_192_kernel:
   2610  1.1  christos 	AARCH64_VALID_CALL_TARGET
   2611  1.1  christos 	cbz	x1, .L192_enc_ret
   2612  1.1  christos 	stp	d8, d9, [sp, #-80]!
   2613  1.1  christos 	lsr	x9, x1, #3
   2614  1.1  christos 	mov	x16, x4
   2615  1.1  christos 	mov	x8, x5
   2616  1.1  christos 	stp	d10, d11, [sp, #16]
   2617  1.1  christos 	stp	d12, d13, [sp, #32]
   2618  1.1  christos 	stp	d14, d15, [sp, #48]
   2619  1.1  christos 	mov	x5, #0xc200000000000000
   2620  1.1  christos 	stp	x5, xzr, [sp, #64]
   2621  1.1  christos 	add	x10, sp, #64
   2622  1.1  christos 
   2623  1.1  christos 	mov	x5, x9
   2624  1.1  christos 	ld1	{ v0.16b}, [x16]					//CTR block 0
   2625  1.1  christos 
   2626  1.1  christos 	mov	x15, #0x100000000				//set up counter increment
   2627  1.1  christos 	movi	v31.16b, #0x0
   2628  1.1  christos 	mov	v31.d[1], x15
   2629  1.1  christos 
   2630  1.1  christos 	rev32	v30.16b, v0.16b				//set up reversed counter
   2631  1.1  christos 
   2632  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 0
   2633  1.1  christos 
   2634  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 1
   2635  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 1
   2636  1.1  christos 
   2637  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 2
   2638  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 2
   2639  1.1  christos 
   2640  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 3
   2641  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 3
   2642  1.1  christos 
   2643  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 4
   2644  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 4
   2645  1.1  christos 	sub	x5, x5, #1		//byte_len - 1
   2646  1.1  christos 
   2647  1.1  christos 	and	x5, x5, #0xffffffffffffff80	//number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
   2648  1.1  christos 
   2649  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 5
   2650  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 5
   2651  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
   2652  1.1  christos 
   2653  1.1  christos 	add	x5, x5, x0
   2654  1.1  christos 
   2655  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 6
   2656  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 6
   2657  1.1  christos 
   2658  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 7
   2659  1.1  christos 
   2660  1.1  christos 	aese	v5.16b, v26.16b
   2661  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 0
   2662  1.1  christos 	aese	v4.16b, v26.16b
   2663  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 0
   2664  1.1  christos 	aese	v3.16b, v26.16b
   2665  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 0
   2666  1.1  christos 
   2667  1.1  christos 	aese	v0.16b, v26.16b
   2668  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 0
   2669  1.1  christos 	aese	v1.16b, v26.16b
   2670  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 0
   2671  1.1  christos 	aese	v7.16b, v26.16b
   2672  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 0
   2673  1.1  christos 
   2674  1.1  christos 	aese	v6.16b, v26.16b
   2675  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 0
   2676  1.1  christos 	aese	v2.16b, v26.16b
   2677  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 0
   2678  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   2679  1.1  christos 
   2680  1.1  christos 	aese	v5.16b, v27.16b
   2681  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 1
   2682  1.1  christos 	aese	v7.16b, v27.16b
   2683  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 1
   2684  1.1  christos 
   2685  1.1  christos 	aese	v2.16b, v27.16b
   2686  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 1
   2687  1.1  christos 	aese	v3.16b, v27.16b
   2688  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 1
   2689  1.1  christos 	aese	v6.16b, v27.16b
   2690  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 1
   2691  1.1  christos 
   2692  1.1  christos 	aese	v5.16b, v28.16b
   2693  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 2
   2694  1.1  christos 	aese	v4.16b, v27.16b
   2695  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 1
   2696  1.1  christos 	aese	v0.16b, v27.16b
   2697  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 1
   2698  1.1  christos 
   2699  1.1  christos 	aese	v1.16b, v27.16b
   2700  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 1
   2701  1.1  christos 	aese	v7.16b, v28.16b
   2702  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 2
   2703  1.1  christos 	aese	v3.16b, v28.16b
   2704  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 2
   2705  1.1  christos 
   2706  1.1  christos 	aese	v2.16b, v28.16b
   2707  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 2
   2708  1.1  christos 	aese	v0.16b, v28.16b
   2709  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 2
   2710  1.1  christos 
   2711  1.1  christos 	aese	v1.16b, v28.16b
   2712  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 2
   2713  1.1  christos 	aese	v4.16b, v28.16b
   2714  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 2
   2715  1.1  christos 	aese	v6.16b, v28.16b
   2716  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 2
   2717  1.1  christos 
   2718  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   2719  1.1  christos 	aese	v4.16b, v26.16b
   2720  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 3
   2721  1.1  christos 
   2722  1.1  christos 	aese	v7.16b, v26.16b
   2723  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 3
   2724  1.1  christos 	aese	v3.16b, v26.16b
   2725  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 3
   2726  1.1  christos 	aese	v2.16b, v26.16b
   2727  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 3
   2728  1.1  christos 
   2729  1.1  christos 	aese	v1.16b, v26.16b
   2730  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 3
   2731  1.1  christos 
   2732  1.1  christos 	aese	v0.16b, v26.16b
   2733  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 3
   2734  1.1  christos 
   2735  1.1  christos 	aese	v6.16b, v26.16b
   2736  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 3
   2737  1.1  christos 
   2738  1.1  christos 	aese	v0.16b, v27.16b
   2739  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 4
   2740  1.1  christos 	aese	v1.16b, v27.16b
   2741  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 4
   2742  1.1  christos 	aese	v5.16b, v26.16b
   2743  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 3
   2744  1.1  christos 
   2745  1.1  christos 	aese	v3.16b, v27.16b
   2746  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 4
   2747  1.1  christos 	aese	v2.16b, v27.16b
   2748  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 4
   2749  1.1  christos 	aese	v4.16b, v27.16b
   2750  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 4
   2751  1.1  christos 
   2752  1.1  christos 	aese	v6.16b, v27.16b
   2753  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 4
   2754  1.1  christos 	aese	v7.16b, v27.16b
   2755  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 4
   2756  1.1  christos 	aese	v5.16b, v27.16b
   2757  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 4
   2758  1.1  christos 
   2759  1.1  christos 	aese	v1.16b, v28.16b
   2760  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 5
   2761  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   2762  1.1  christos 	aese	v2.16b, v28.16b
   2763  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 5
   2764  1.1  christos 
   2765  1.1  christos 	aese	v4.16b, v28.16b
   2766  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 5
   2767  1.1  christos 	aese	v7.16b, v28.16b
   2768  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 5
   2769  1.1  christos 	aese	v0.16b, v28.16b
   2770  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 5
   2771  1.1  christos 
   2772  1.1  christos 	aese	v5.16b, v28.16b
   2773  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 5
   2774  1.1  christos 	aese	v6.16b, v28.16b
   2775  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 5
   2776  1.1  christos 	aese	v3.16b, v28.16b
   2777  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 5
   2778  1.1  christos 
   2779  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 7
   2780  1.1  christos 
   2781  1.1  christos 	aese	v5.16b, v26.16b
   2782  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 6
   2783  1.1  christos 	aese	v4.16b, v26.16b
   2784  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 6
   2785  1.1  christos 	aese	v3.16b, v26.16b
   2786  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 6
   2787  1.1  christos 
   2788  1.1  christos 	aese	v2.16b, v26.16b
   2789  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 6
   2790  1.1  christos 	aese	v6.16b, v26.16b
   2791  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 6
   2792  1.1  christos 	aese	v1.16b, v26.16b
   2793  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 6
   2794  1.1  christos 
   2795  1.1  christos 	aese	v0.16b, v26.16b
   2796  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 6
   2797  1.1  christos 	aese	v7.16b, v26.16b
   2798  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 6
   2799  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   2800  1.1  christos 
   2801  1.1  christos 	aese	v6.16b, v27.16b
   2802  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 7
   2803  1.1  christos 	aese	v3.16b, v27.16b
   2804  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 7
   2805  1.1  christos 
   2806  1.1  christos 	aese	v4.16b, v27.16b
   2807  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 7
   2808  1.1  christos 	aese	v0.16b, v27.16b
   2809  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 7
   2810  1.1  christos 
   2811  1.1  christos 	aese	v7.16b, v27.16b
   2812  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 7
   2813  1.1  christos 	aese	v1.16b, v27.16b
   2814  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 7
   2815  1.1  christos 
   2816  1.1  christos 	aese	v2.16b, v27.16b
   2817  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 7
   2818  1.1  christos 	aese	v5.16b, v27.16b
   2819  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 7
   2820  1.1  christos 
   2821  1.1  christos 	aese	v7.16b, v28.16b
   2822  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 8
   2823  1.1  christos 	aese	v0.16b, v28.16b
   2824  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 8
   2825  1.1  christos 
   2826  1.1  christos 	aese	v4.16b, v28.16b
   2827  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 8
   2828  1.1  christos 	aese	v3.16b, v28.16b
   2829  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 8
   2830  1.1  christos 	aese	v5.16b, v28.16b
   2831  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 8
   2832  1.1  christos 
   2833  1.1  christos 	aese	v2.16b, v28.16b
   2834  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 8
   2835  1.1  christos 	aese	v1.16b, v28.16b
   2836  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 8
   2837  1.1  christos 	aese	v6.16b, v28.16b
   2838  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 8
   2839  1.1  christos 
   2840  1.1  christos 	add	x4, x0, x1, lsr #3		//end_input_ptr
   2841  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
   2842  1.1  christos 	aese	v3.16b, v26.16b
   2843  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 9
   2844  1.1  christos 
   2845  1.1  christos 	ld1	{ v19.16b}, [x3]
   2846  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   2847  1.1  christos 	rev64	v19.16b, v19.16b
   2848  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   2849  1.1  christos 
   2850  1.1  christos 	aese	v6.16b, v26.16b
   2851  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 9
   2852  1.1  christos 	aese	v1.16b, v26.16b
   2853  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 9
   2854  1.1  christos 
   2855  1.1  christos 	aese	v5.16b, v26.16b
   2856  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 9
   2857  1.1  christos 	aese	v2.16b, v26.16b
   2858  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 9
   2859  1.1  christos 
   2860  1.1  christos 	aese	v0.16b, v26.16b
   2861  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 9
   2862  1.1  christos 	aese	v4.16b, v26.16b
   2863  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 9
   2864  1.1  christos 
   2865  1.1  christos 	aese	v6.16b, v27.16b
   2866  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 14 - round 10
   2867  1.1  christos 	aese	v7.16b, v26.16b
   2868  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 9
   2869  1.1  christos 	aese	v3.16b, v27.16b
   2870  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 11 - round 10
   2871  1.1  christos 
   2872  1.1  christos 	aese	v1.16b, v27.16b
   2873  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 9 - round 10
   2874  1.1  christos 	aese	v5.16b, v27.16b
   2875  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 13 - round 10
   2876  1.1  christos 	aese	v4.16b, v27.16b
   2877  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 12 - round 10
   2878  1.1  christos 
   2879  1.1  christos 	aese	v0.16b, v27.16b
   2880  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8 - round 10
   2881  1.1  christos 	aese	v2.16b, v27.16b
   2882  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 10 - round 10
   2883  1.1  christos 	aese	v7.16b, v27.16b
   2884  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 15 - round 10
   2885  1.1  christos 
   2886  1.1  christos 	aese	v6.16b, v28.16b						//AES block 14 - round 11
   2887  1.1  christos 	aese	v3.16b, v28.16b						//AES block 11 - round 11
   2888  1.1  christos 
   2889  1.1  christos 	aese	v4.16b, v28.16b						//AES block 12 - round 11
   2890  1.1  christos 	aese	v7.16b, v28.16b						//AES block 15 - round 11
   2891  1.1  christos 	ldr	q26, [x8, #192]					//load rk12
   2892  1.1  christos 
   2893  1.1  christos 	aese	v1.16b, v28.16b						//AES block 9 - round 11
   2894  1.1  christos 	aese	v5.16b, v28.16b						//AES block 13 - round 11
   2895  1.1  christos 
   2896  1.1  christos 	aese	v2.16b, v28.16b						//AES block 10 - round 11
   2897  1.1  christos 	aese	v0.16b, v28.16b						//AES block 8 - round 11
   2898  1.1  christos 	b.ge	.L192_enc_tail						//handle tail
   2899  1.1  christos 
   2900  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 0, 1 - load plaintext
   2901  1.1  christos 
   2902  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 2, 3 - load plaintext
   2903  1.1  christos 
   2904  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 4, 5 - load plaintext
   2905  1.1  christos 
   2906  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 6, 7 - load plaintext
   2907  1.1  christos 
   2908  1.1  christos .inst	0xce006908	//eor3 v8.16b, v8.16b, v0.16b, v26.16b				//AES block 0 - result
   2909  1.1  christos 	rev32	v0.16b, v30.16b				//CTR block 8
   2910  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8
   2911  1.1  christos 
   2912  1.1  christos .inst	0xce03696b	//eor3 v11.16b, v11.16b, v3.16b, v26.16b				//AES block 3 - result
   2913  1.1  christos .inst	0xce016929	//eor3 v9.16b, v9.16b, v1.16b, v26.16b				//AES block 1 - result
   2914  1.1  christos 
   2915  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 9
   2916  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 9
   2917  1.1  christos .inst	0xce04698c	//eor3 v12.16b, v12.16b, v4.16b, v26.16b				//AES block 4 - result
   2918  1.1  christos 
   2919  1.1  christos .inst	0xce0569ad	//eor3 v13.16b, v13.16b, v5.16b, v26.16b				//AES block 5 - result
   2920  1.1  christos .inst	0xce0769ef	//eor3 v15.16b, v15.16b, v7.16b, v26.16b				//AES block 7 - result
   2921  1.1  christos 	stp	q8, q9, [x2], #32			//AES block 0, 1 - store result
   2922  1.1  christos 
   2923  1.1  christos .inst	0xce02694a	//eor3 v10.16b, v10.16b, v2.16b, v26.16b				//AES block 2 - result
   2924  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 10
   2925  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 10
   2926  1.1  christos 
   2927  1.1  christos 	stp	q10, q11, [x2], #32			//AES block 2, 3 - store result
   2928  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
   2929  1.1  christos 
   2930  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 11
   2931  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 11
   2932  1.1  christos .inst	0xce0669ce	//eor3 v14.16b, v14.16b, v6.16b, v26.16b				//AES block 6 - result
   2933  1.1  christos 
   2934  1.1  christos 	stp	q12, q13, [x2], #32			//AES block 4, 5 - store result
   2935  1.1  christos 
   2936  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 12
   2937  1.1  christos 	stp	q14, q15, [x2], #32			//AES block 6, 7 - store result
   2938  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 12
   2939  1.1  christos 
   2940  1.1  christos 	b.ge	.L192_enc_prepretail					//do prepretail
   2941  1.1  christos 
   2942  1.1  christos .L192_enc_main_loop:	//main	loop start
   2943  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4 (t0, t1, and t2 free)
   2944  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
   2945  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
   2946  1.1  christos 
   2947  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
   2948  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
   2949  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
   2950  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   2951  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
   2952  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   2953  1.1  christos 
   2954  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
   2955  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
   2956  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
   2957  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   2958  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
   2959  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   2960  1.1  christos 
   2961  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
   2962  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
   2963  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
   2964  1.1  christos 
   2965  1.1  christos 	eor	v8.16b, v8.16b, v19.16b				 	//PRE 1
   2966  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
   2967  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5 (t0, t1, t2 and t3 free)
   2968  1.1  christos 
   2969  1.1  christos 	aese	v0.16b, v26.16b
   2970  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
   2971  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
   2972  1.1  christos 	aese	v1.16b, v26.16b
   2973  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
   2974  1.1  christos 
   2975  1.1  christos 	aese	v3.16b, v26.16b
   2976  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
   2977  1.1  christos 	aese	v5.16b, v26.16b
   2978  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
   2979  1.1  christos 	aese	v2.16b, v26.16b
   2980  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
   2981  1.1  christos 
   2982  1.1  christos 	aese	v7.16b, v26.16b
   2983  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
   2984  1.1  christos 	aese	v4.16b, v26.16b
   2985  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
   2986  1.1  christos 	aese	v6.16b, v26.16b
   2987  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
   2988  1.1  christos 
   2989  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   2990  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
   2991  1.1  christos 	aese	v0.16b, v27.16b
   2992  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
   2993  1.1  christos 
   2994  1.1  christos 	aese	v4.16b, v27.16b
   2995  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
   2996  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
   2997  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
   2998  1.1  christos 
   2999  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   3000  1.1  christos 	aese	v3.16b, v27.16b
   3001  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
   3002  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
   3003  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
   3004  1.1  christos 
   3005  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
   3006  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
   3007  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   3008  1.1  christos 
   3009  1.1  christos 	aese	v1.16b, v27.16b
   3010  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
   3011  1.1  christos 	aese	v2.16b, v27.16b
   3012  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
   3013  1.1  christos 	aese	v5.16b, v27.16b
   3014  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
   3015  1.1  christos 
   3016  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
   3017  1.1  christos 	aese	v6.16b, v27.16b
   3018  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
   3019  1.1  christos 	aese	v7.16b, v27.16b
   3020  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
   3021  1.1  christos 
   3022  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
   3023  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
   3024  1.1  christos 	aese	v1.16b, v28.16b
   3025  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
   3026  1.1  christos 
   3027  1.1  christos 	aese	v3.16b, v28.16b
   3028  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
   3029  1.1  christos 	aese	v4.16b, v28.16b
   3030  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
   3031  1.1  christos 	aese	v6.16b, v28.16b
   3032  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
   3033  1.1  christos 
   3034  1.1  christos 	aese	v5.16b, v28.16b
   3035  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
   3036  1.1  christos 	aese	v1.16b, v26.16b
   3037  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
   3038  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
   3039  1.1  christos 
   3040  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
   3041  1.1  christos 	aese	v7.16b, v28.16b
   3042  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
   3043  1.1  christos 	aese	v4.16b, v26.16b
   3044  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
   3045  1.1  christos 
   3046  1.1  christos 	aese	v2.16b, v28.16b
   3047  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
   3048  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   3049  1.1  christos 	aese	v0.16b, v28.16b
   3050  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
   3051  1.1  christos 
   3052  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   3053  1.1  christos 	aese	v3.16b, v26.16b
   3054  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
   3055  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   3056  1.1  christos 
   3057  1.1  christos 	aese	v0.16b, v26.16b
   3058  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
   3059  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
   3060  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   3061  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   3062  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   3063  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   3064  1.1  christos 
   3065  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k - mid
   3066  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
   3067  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
   3068  1.1  christos 
   3069  1.1  christos 	aese	v5.16b, v26.16b
   3070  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
   3071  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
   3072  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   3073  1.1  christos 
   3074  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
   3075  1.1  christos 	aese	v6.16b, v26.16b
   3076  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
   3077  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
   3078  1.1  christos 
   3079  1.1  christos 	aese	v1.16b, v27.16b
   3080  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
   3081  1.1  christos 	aese	v3.16b, v27.16b
   3082  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
   3083  1.1  christos 	aese	v7.16b, v26.16b
   3084  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
   3085  1.1  christos 
   3086  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
   3087  1.1  christos 	aese	v6.16b, v27.16b
   3088  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
   3089  1.1  christos 	aese	v2.16b, v26.16b
   3090  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
   3091  1.1  christos 
   3092  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
   3093  1.1  christos 	aese	v0.16b, v27.16b
   3094  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
   3095  1.1  christos 	aese	v4.16b, v27.16b
   3096  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
   3097  1.1  christos 
   3098  1.1  christos 	aese	v2.16b, v27.16b
   3099  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
   3100  1.1  christos 	aese	v5.16b, v27.16b
   3101  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
   3102  1.1  christos 	aese	v7.16b, v27.16b
   3103  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
   3104  1.1  christos 
   3105  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
   3106  1.1  christos 	aese	v4.16b, v28.16b
   3107  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
   3108  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   3109  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   3110  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   3111  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   3112  1.1  christos 
   3113  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   3114  1.1  christos 	aese	v2.16b, v28.16b
   3115  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
   3116  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7 (t0, t1, t2 and t3 free)
   3117  1.1  christos 
   3118  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6 (t0, t1, and t2 free)
   3119  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
   3120  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
   3121  1.1  christos 
   3122  1.1  christos 	aese	v5.16b, v28.16b
   3123  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
   3124  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   3125  1.1  christos 
   3126  1.1  christos 	aese	v6.16b, v28.16b
   3127  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
   3128  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   3129  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   3130  1.1  christos 
   3131  1.1  christos 	aese	v1.16b, v28.16b
   3132  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
   3133  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
   3134  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
   3135  1.1  christos 
   3136  1.1  christos 	aese	v3.16b, v28.16b
   3137  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
   3138  1.1  christos 	aese	v7.16b, v28.16b
   3139  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
   3140  1.1  christos 	aese	v0.16b, v28.16b
   3141  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
   3142  1.1  christos 
   3143  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
   3144  1.1  christos 	aese	v4.16b, v26.16b
   3145  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
   3146  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   3147  1.1  christos 
   3148  1.1  christos 	aese	v0.16b, v26.16b
   3149  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
   3150  1.1  christos 	aese	v3.16b, v26.16b
   3151  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
   3152  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
   3153  1.1  christos 
   3154  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
   3155  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   3156  1.1  christos 	aese	v2.16b, v26.16b
   3157  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
   3158  1.1  christos 
   3159  1.1  christos 	aese	v6.16b, v26.16b
   3160  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
   3161  1.1  christos 	aese	v5.16b, v26.16b
   3162  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
   3163  1.1  christos 
   3164  1.1  christos 	aese	v7.16b, v26.16b
   3165  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
   3166  1.1  christos 	aese	v2.16b, v27.16b
   3167  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
   3168  1.1  christos 	aese	v1.16b, v26.16b
   3169  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
   3170  1.1  christos 
   3171  1.1  christos 	aese	v6.16b, v27.16b
   3172  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
   3173  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
   3174  1.1  christos 
   3175  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
   3176  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   3177  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
   3178  1.1  christos 
   3179  1.1  christos 	aese	v4.16b, v27.16b
   3180  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
   3181  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
   3182  1.1  christos 	aese	v5.16b, v27.16b
   3183  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
   3184  1.1  christos 
   3185  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
   3186  1.1  christos 	aese	v7.16b, v27.16b
   3187  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
   3188  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
   3189  1.1  christos 
   3190  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   3191  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
   3192  1.1  christos 	aese	v0.16b, v27.16b
   3193  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
   3194  1.1  christos 
   3195  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
   3196  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
   3197  1.1  christos 	aese	v3.16b, v27.16b
   3198  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
   3199  1.1  christos 
   3200  1.1  christos 	aese	v5.16b, v28.16b
   3201  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
   3202  1.1  christos 	aese	v4.16b, v28.16b
   3203  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
   3204  1.1  christos 	aese	v0.16b, v28.16b
   3205  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
   3206  1.1  christos 
   3207  1.1  christos 	aese	v6.16b, v28.16b
   3208  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
   3209  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
   3210  1.1  christos 	aese	v1.16b, v27.16b
   3211  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
   3212  1.1  christos 
   3213  1.1  christos 	aese	v7.16b, v28.16b
   3214  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
   3215  1.1  christos 	aese	v2.16b, v28.16b
   3216  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
   3217  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
   3218  1.1  christos 
   3219  1.1  christos 	aese	v1.16b, v28.16b
   3220  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
   3221  1.1  christos 	aese	v3.16b, v28.16b
   3222  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
   3223  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   3224  1.1  christos 
   3225  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
   3226  1.1  christos 	rev32	v20.16b, v30.16b					//CTR block 8k+16
   3227  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+16
   3228  1.1  christos 
   3229  1.1  christos 	aese	v2.16b, v26.16b
   3230  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 9
   3231  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
   3232  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
   3233  1.1  christos 
   3234  1.1  christos 	aese	v6.16b, v26.16b
   3235  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 9
   3236  1.1  christos 	aese	v3.16b, v26.16b
   3237  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 9
   3238  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 8k+8, 8k+9 - load plaintext
   3239  1.1  christos 
   3240  1.1  christos 	pmull	v21.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   3241  1.1  christos 	rev32	v22.16b, v30.16b					//CTR block 8k+17
   3242  1.1  christos 	aese	v0.16b, v26.16b
   3243  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 9
   3244  1.1  christos 
   3245  1.1  christos 	aese	v4.16b, v26.16b
   3246  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 9
   3247  1.1  christos 	aese	v1.16b, v26.16b
   3248  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 9
   3249  1.1  christos 	aese	v7.16b, v26.16b
   3250  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 9
   3251  1.1  christos 
   3252  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   3253  1.1  christos 	aese	v5.16b, v26.16b
   3254  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 9
   3255  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+17
   3256  1.1  christos 
   3257  1.1  christos 	aese	v2.16b, v27.16b
   3258  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 10
   3259  1.1  christos 	aese	v4.16b, v27.16b
   3260  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 10
   3261  1.1  christos 	ldr	q26, [x8, #192]					//load rk12
   3262  1.1  christos 	ext	v29.16b, v17.16b, v17.16b, #8				//MODULO - other top alignment
   3263  1.1  christos 
   3264  1.1  christos 	aese	v0.16b, v27.16b
   3265  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 10
   3266  1.1  christos 	aese	v7.16b, v27.16b
   3267  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 10
   3268  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 8k+10, 8k+11 - load plaintext
   3269  1.1  christos 
   3270  1.1  christos 	aese	v4.16b, v28.16b						//AES block 8k+12 - round 11
   3271  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   3272  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 8k+12, 8k+13 - load plaintext
   3273  1.1  christos 
   3274  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 8k+14, 8k+15 - load plaintext
   3275  1.1  christos 	aese	v2.16b, v28.16b						//AES block 8k+10 - round 11
   3276  1.1  christos 	aese	v1.16b, v27.16b
   3277  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 10
   3278  1.1  christos 
   3279  1.1  christos 	rev32	v23.16b, v30.16b					//CTR block 8k+18
   3280  1.1  christos 	aese	v5.16b, v27.16b
   3281  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 10
   3282  1.1  christos 
   3283  1.1  christos 	aese	v3.16b, v27.16b
   3284  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 10
   3285  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   3286  1.1  christos 
   3287  1.1  christos 	aese	v6.16b, v27.16b
   3288  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 10
   3289  1.1  christos 	aese	v5.16b, v28.16b						//AES block 8k+13 - round 11
   3290  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+18
   3291  1.1  christos 
   3292  1.1  christos 	aese	v7.16b, v28.16b						//AES block 8k+15 - round 11
   3293  1.1  christos 	aese	v0.16b, v28.16b						//AES block 8k+8 - round 11
   3294  1.1  christos .inst	0xce04698c	//eor3 v12.16b, v12.16b, v4.16b, v26.16b				//AES block 4 - result
   3295  1.1  christos 
   3296  1.1  christos 	aese	v6.16b, v28.16b						//AES block 8k+14 - round 11
   3297  1.1  christos 	aese	v3.16b, v28.16b						//AES block 8k+11 - round 11
   3298  1.1  christos 	aese	v1.16b, v28.16b						//AES block 8k+9 - round 11
   3299  1.1  christos 
   3300  1.1  christos 	rev32	v25.16b, v30.16b					//CTR block 8k+19
   3301  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+19
   3302  1.1  christos .inst	0xce0769ef	//eor3 v15.16b, v15.16b, v7.16b, v26.16b				//AES block 7 - result
   3303  1.1  christos 
   3304  1.1  christos .inst	0xce02694a	//eor3 v10.16b, v10.16b, v2.16b, v26.16b				//AES block 8k+10 - result
   3305  1.1  christos .inst	0xce006908	//eor3 v8.16b, v8.16b, v0.16b, v26.16b				//AES block 8k+8 - result
   3306  1.1  christos 	mov	v2.16b, v23.16b					//CTR block 8k+18
   3307  1.1  christos 
   3308  1.1  christos .inst	0xce016929	//eor3 v9.16b, v9.16b, v1.16b, v26.16b				//AES block 8k+9 - result
   3309  1.1  christos 	mov	v1.16b, v22.16b					//CTR block 8k+17
   3310  1.1  christos 	stp	q8, q9, [x2], #32			//AES block 8k+8, 8k+9 - store result
   3311  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8				//MODULO - other mid alignment
   3312  1.1  christos 
   3313  1.1  christos .inst	0xce0669ce	//eor3 v14.16b, v14.16b, v6.16b, v26.16b				//AES block 6 - result
   3314  1.1  christos 	mov	v0.16b, v20.16b					//CTR block 8k+16
   3315  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 8k+20
   3316  1.1  christos 
   3317  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+20
   3318  1.1  christos .inst	0xce0569ad	//eor3 v13.16b, v13.16b, v5.16b, v26.16b				//AES block 5 - result
   3319  1.1  christos .inst	0xce115673	//eor3 v19.16b, v19.16b, v17.16b, v21.16b		 	//MODULO - fold into low
   3320  1.1  christos 
   3321  1.1  christos .inst	0xce03696b	//eor3 v11.16b, v11.16b, v3.16b, v26.16b				//AES block 8k+11 - result
   3322  1.1  christos 	mov	v3.16b, v25.16b					//CTR block 8k+19
   3323  1.1  christos 
   3324  1.1  christos 	stp	q10, q11, [x2], #32			//AES block 8k+10, 8k+11 - store result
   3325  1.1  christos 
   3326  1.1  christos 	stp	q12, q13, [x2], #32			//AES block 8k+12, 8k+13 - store result
   3327  1.1  christos 
   3328  1.1  christos 	cmp	x0, x5				//.LOOP CONTROL
   3329  1.1  christos 	stp	q14, q15, [x2], #32			//AES block 8k+14, 8k+15 - store result
   3330  1.1  christos 	b.lt	.L192_enc_main_loop
   3331  1.1  christos 
   3332  1.1  christos .L192_enc_prepretail:	//PREPRETAIL
   3333  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
   3334  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
   3335  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
   3336  1.1  christos 
   3337  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
   3338  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   3339  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
   3340  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   3341  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
   3342  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
   3343  1.1  christos 
   3344  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
   3345  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
   3346  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
   3347  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
   3348  1.1  christos 
   3349  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
   3350  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
   3351  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
   3352  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   3353  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
   3354  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   3355  1.1  christos 
   3356  1.1  christos 	eor	v8.16b, v8.16b, v19.16b				 	//PRE 1
   3357  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
   3358  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
   3359  1.1  christos 
   3360  1.1  christos 	aese	v5.16b, v26.16b
   3361  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
   3362  1.1  christos 	aese	v2.16b, v26.16b
   3363  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
   3364  1.1  christos 	aese	v3.16b, v26.16b
   3365  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
   3366  1.1  christos 
   3367  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
   3368  1.1  christos 	aese	v0.16b, v26.16b
   3369  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
   3370  1.1  christos 	aese	v6.16b, v26.16b
   3371  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
   3372  1.1  christos 
   3373  1.1  christos 	aese	v1.16b, v26.16b
   3374  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
   3375  1.1  christos 	aese	v4.16b, v26.16b
   3376  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
   3377  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
   3378  1.1  christos 
   3379  1.1  christos 	aese	v6.16b, v27.16b
   3380  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
   3381  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
   3382  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   3383  1.1  christos 
   3384  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   3385  1.1  christos 	aese	v7.16b, v26.16b
   3386  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
   3387  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   3388  1.1  christos 
   3389  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
   3390  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
   3391  1.1  christos 	aese	v2.16b, v27.16b
   3392  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
   3393  1.1  christos 
   3394  1.1  christos 	aese	v5.16b, v27.16b
   3395  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
   3396  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
   3397  1.1  christos 	aese	v1.16b, v27.16b
   3398  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
   3399  1.1  christos 
   3400  1.1  christos 	aese	v7.16b, v27.16b
   3401  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
   3402  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
   3403  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
   3404  1.1  christos 
   3405  1.1  christos 	aese	v3.16b, v27.16b
   3406  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
   3407  1.1  christos 	aese	v0.16b, v27.16b
   3408  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
   3409  1.1  christos 	aese	v4.16b, v27.16b
   3410  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
   3411  1.1  christos 
   3412  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
   3413  1.1  christos 	aese	v5.16b, v28.16b
   3414  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
   3415  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
   3416  1.1  christos 
   3417  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
   3418  1.1  christos 	aese	v7.16b, v28.16b
   3419  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
   3420  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
   3421  1.1  christos 
   3422  1.1  christos 	aese	v5.16b, v26.16b
   3423  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
   3424  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   3425  1.1  christos 	aese	v6.16b, v28.16b
   3426  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
   3427  1.1  christos 
   3428  1.1  christos 	aese	v0.16b, v28.16b
   3429  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
   3430  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k	- mid
   3431  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   3432  1.1  christos 
   3433  1.1  christos 	aese	v3.16b, v28.16b
   3434  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
   3435  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5 (t0, t1, t2 and t3 free)
   3436  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6 (t0, t1, and t2 free)
   3437  1.1  christos 
   3438  1.1  christos 	aese	v2.16b, v28.16b
   3439  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
   3440  1.1  christos 	aese	v1.16b, v28.16b
   3441  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
   3442  1.1  christos 	aese	v4.16b, v28.16b
   3443  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
   3444  1.1  christos 
   3445  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
   3446  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
   3447  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   3448  1.1  christos 
   3449  1.1  christos 	aese	v1.16b, v26.16b
   3450  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
   3451  1.1  christos 	aese	v6.16b, v26.16b
   3452  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
   3453  1.1  christos 	aese	v2.16b, v26.16b
   3454  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
   3455  1.1  christos 
   3456  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
   3457  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
   3458  1.1  christos 	aese	v7.16b, v26.16b
   3459  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
   3460  1.1  christos 
   3461  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   3462  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   3463  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   3464  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   3465  1.1  christos 	aese	v3.16b, v26.16b
   3466  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
   3467  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
   3468  1.1  christos 
   3469  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   3470  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   3471  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   3472  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   3473  1.1  christos 	aese	v4.16b, v26.16b
   3474  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
   3475  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4 (t0, t1, and t2 free)
   3476  1.1  christos 
   3477  1.1  christos 	aese	v0.16b, v26.16b
   3478  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
   3479  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
   3480  1.1  christos 	aese	v6.16b, v27.16b
   3481  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
   3482  1.1  christos 
   3483  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   3484  1.1  christos 	aese	v7.16b, v27.16b
   3485  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
   3486  1.1  christos 	aese	v5.16b, v27.16b
   3487  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
   3488  1.1  christos 
   3489  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
   3490  1.1  christos 	aese	v3.16b, v27.16b
   3491  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
   3492  1.1  christos 	aese	v0.16b, v27.16b
   3493  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
   3494  1.1  christos 
   3495  1.1  christos 	aese	v1.16b, v27.16b
   3496  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
   3497  1.1  christos 	aese	v4.16b, v27.16b
   3498  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
   3499  1.1  christos 	aese	v2.16b, v27.16b
   3500  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
   3501  1.1  christos 
   3502  1.1  christos 	aese	v0.16b, v28.16b
   3503  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
   3504  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7 (t0, t1, t2 and t3 free)
   3505  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   3506  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   3507  1.1  christos 
   3508  1.1  christos 	aese	v1.16b, v28.16b
   3509  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
   3510  1.1  christos 	aese	v2.16b, v28.16b
   3511  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
   3512  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   3513  1.1  christos 
   3514  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
   3515  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
   3516  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
   3517  1.1  christos 
   3518  1.1  christos 	aese	v4.16b, v28.16b
   3519  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
   3520  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   3521  1.1  christos 
   3522  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
   3523  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
   3524  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
   3525  1.1  christos 
   3526  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   3527  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
   3528  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   3529  1.1  christos 
   3530  1.1  christos 	aese	v5.16b, v28.16b
   3531  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
   3532  1.1  christos 	aese	v1.16b, v26.16b
   3533  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
   3534  1.1  christos 	aese	v7.16b, v28.16b
   3535  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
   3536  1.1  christos 
   3537  1.1  christos 	aese	v6.16b, v28.16b
   3538  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
   3539  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
   3540  1.1  christos 	aese	v3.16b, v28.16b
   3541  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
   3542  1.1  christos 
   3543  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
   3544  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
   3545  1.1  christos 
   3546  1.1  christos 	aese	v4.16b, v26.16b
   3547  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
   3548  1.1  christos 	aese	v5.16b, v26.16b
   3549  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
   3550  1.1  christos 	aese	v1.16b, v27.16b
   3551  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
   3552  1.1  christos 
   3553  1.1  christos 	aese	v0.16b, v26.16b
   3554  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
   3555  1.1  christos 	aese	v7.16b, v26.16b
   3556  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
   3557  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
   3558  1.1  christos 
   3559  1.1  christos 	aese	v2.16b, v26.16b
   3560  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
   3561  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
   3562  1.1  christos 	aese	v5.16b, v27.16b
   3563  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
   3564  1.1  christos 
   3565  1.1  christos 	aese	v6.16b, v26.16b
   3566  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
   3567  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   3568  1.1  christos 	aese	v3.16b, v26.16b
   3569  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
   3570  1.1  christos 
   3571  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
   3572  1.1  christos 	aese	v0.16b, v27.16b
   3573  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
   3574  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
   3575  1.1  christos 
   3576  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
   3577  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
   3578  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
   3579  1.1  christos 
   3580  1.1  christos 	aese	v4.16b, v27.16b
   3581  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
   3582  1.1  christos 	aese	v2.16b, v27.16b
   3583  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
   3584  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   3585  1.1  christos 
   3586  1.1  christos 	aese	v3.16b, v27.16b
   3587  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
   3588  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
   3589  1.1  christos 
   3590  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
   3591  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
   3592  1.1  christos 
   3593  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   3594  1.1  christos 	ext	v29.16b, v17.16b, v17.16b, #8				//MODULO - other top alignment
   3595  1.1  christos 	aese	v7.16b, v27.16b
   3596  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
   3597  1.1  christos 	pmull	v21.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   3598  1.1  christos 
   3599  1.1  christos 	aese	v5.16b, v28.16b
   3600  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
   3601  1.1  christos 	aese	v1.16b, v28.16b
   3602  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
   3603  1.1  christos 
   3604  1.1  christos 	aese	v6.16b, v27.16b
   3605  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
   3606  1.1  christos 	aese	v2.16b, v28.16b
   3607  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
   3608  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   3609  1.1  christos 
   3610  1.1  christos 	aese	v3.16b, v28.16b
   3611  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
   3612  1.1  christos 	aese	v5.16b, v26.16b
   3613  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 9
   3614  1.1  christos 	aese	v4.16b, v28.16b
   3615  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
   3616  1.1  christos 
   3617  1.1  christos 	aese	v0.16b, v28.16b
   3618  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
   3619  1.1  christos 	aese	v7.16b, v28.16b
   3620  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
   3621  1.1  christos 	aese	v6.16b, v28.16b
   3622  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
   3623  1.1  christos 
   3624  1.1  christos 	aese	v3.16b, v26.16b
   3625  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 9
   3626  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   3627  1.1  christos 	aese	v4.16b, v26.16b
   3628  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 9
   3629  1.1  christos 
   3630  1.1  christos 	aese	v2.16b, v26.16b
   3631  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 9
   3632  1.1  christos 	aese	v7.16b, v26.16b
   3633  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 9
   3634  1.1  christos 
   3635  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8				//MODULO - other mid alignment
   3636  1.1  christos 	aese	v6.16b, v26.16b
   3637  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 9
   3638  1.1  christos 	aese	v0.16b, v26.16b
   3639  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 9
   3640  1.1  christos 	aese	v1.16b, v26.16b
   3641  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 9
   3642  1.1  christos 
   3643  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   3644  1.1  christos 	ldr	q26, [x8, #192]					//load rk12
   3645  1.1  christos 
   3646  1.1  christos 	aese	v7.16b, v27.16b
   3647  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 10
   3648  1.1  christos 	aese	v1.16b, v27.16b
   3649  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 10
   3650  1.1  christos 	aese	v2.16b, v27.16b
   3651  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 10
   3652  1.1  christos 
   3653  1.1  christos .inst	0xce115673	//eor3 v19.16b, v19.16b, v17.16b, v21.16b		 	//MODULO - fold into low
   3654  1.1  christos 	aese	v0.16b, v27.16b
   3655  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 10
   3656  1.1  christos 	aese	v3.16b, v27.16b
   3657  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 10
   3658  1.1  christos 
   3659  1.1  christos 	aese	v1.16b, v28.16b						//AES block 8k+9 - round 11
   3660  1.1  christos 	aese	v7.16b, v28.16b						//AES block 8k+15 - round 11
   3661  1.1  christos 
   3662  1.1  christos 	aese	v4.16b, v27.16b
   3663  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 10
   3664  1.1  christos 	aese	v3.16b, v28.16b						//AES block 8k+11 - round 11
   3665  1.1  christos 
   3666  1.1  christos 	aese	v5.16b, v27.16b
   3667  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 10
   3668  1.1  christos 	aese	v6.16b, v27.16b
   3669  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 10
   3670  1.1  christos 
   3671  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
   3672  1.1  christos 	aese	v2.16b, v28.16b						//AES block 8k+10 - round 11
   3673  1.1  christos 	aese	v0.16b, v28.16b						//AES block 8k+8 - round 11
   3674  1.1  christos 
   3675  1.1  christos 	aese	v6.16b, v28.16b						//AES block 8k+14 - round 11
   3676  1.1  christos 	aese	v4.16b, v28.16b						//AES block 8k+12 - round 11
   3677  1.1  christos 	aese	v5.16b, v28.16b						//AES block 8k+13 - round 11
   3678  1.1  christos 
   3679  1.1  christos .L192_enc_tail:	//TAIL
   3680  1.1  christos 
   3681  1.1  christos 	ldp	q20, q21, [x3, #128]			//load h5l | h5h
   3682  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   3683  1.1  christos 	sub	x5, x4, x0 	//main_end_input_ptr is number of bytes left to process
   3684  1.1  christos 
   3685  1.1  christos 	ldr	q8, [x0], #16				//AES block 8k+8 - l3ad plaintext
   3686  1.1  christos 
   3687  1.1  christos 	ldp	q24, q25, [x3, #192]			//load h8k | h7k
   3688  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   3689  1.1  christos 
   3690  1.1  christos 	mov	v29.16b, v26.16b
   3691  1.1  christos 
   3692  1.1  christos 	ldp	q22, q23, [x3, #160]			//load h6l | h6h
   3693  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   3694  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   3695  1.1  christos 	cmp	x5, #112
   3696  1.1  christos 
   3697  1.1  christos .inst	0xce007509	//eor3 v9.16b, v8.16b, v0.16b, v29.16b			//AES block 8k+8 - result
   3698  1.1  christos 	ext	v16.16b, v19.16b, v19.16b, #8				//prepare final partial tag
   3699  1.1  christos 	b.gt	.L192_enc_blocks_more_than_7
   3700  1.1  christos 
   3701  1.1  christos 	cmp	x5, #96
   3702  1.1  christos 	mov	v7.16b, v6.16b
   3703  1.1  christos 	movi	v17.8b, #0
   3704  1.1  christos 
   3705  1.1  christos 	mov	v6.16b, v5.16b
   3706  1.1  christos 	movi	v19.8b, #0
   3707  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   3708  1.1  christos 
   3709  1.1  christos 	mov	v5.16b, v4.16b
   3710  1.1  christos 	mov	v4.16b, v3.16b
   3711  1.1  christos 	mov	v3.16b, v2.16b
   3712  1.1  christos 
   3713  1.1  christos 	mov	v2.16b, v1.16b
   3714  1.1  christos 	movi	v18.8b, #0
   3715  1.1  christos 	b.gt	.L192_enc_blocks_more_than_6
   3716  1.1  christos 
   3717  1.1  christos 	mov	v7.16b, v6.16b
   3718  1.1  christos 	cmp	x5, #80
   3719  1.1  christos 
   3720  1.1  christos 	mov	v6.16b, v5.16b
   3721  1.1  christos 	mov	v5.16b, v4.16b
   3722  1.1  christos 	mov	v4.16b, v3.16b
   3723  1.1  christos 
   3724  1.1  christos 	mov	v3.16b, v1.16b
   3725  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   3726  1.1  christos 	b.gt	.L192_enc_blocks_more_than_5
   3727  1.1  christos 
   3728  1.1  christos 	cmp	x5, #64
   3729  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   3730  1.1  christos 
   3731  1.1  christos 	mov	v7.16b, v6.16b
   3732  1.1  christos 	mov	v6.16b, v5.16b
   3733  1.1  christos 	mov	v5.16b, v4.16b
   3734  1.1  christos 
   3735  1.1  christos 	mov	v4.16b, v1.16b
   3736  1.1  christos 	b.gt	.L192_enc_blocks_more_than_4
   3737  1.1  christos 
   3738  1.1  christos 	mov	v7.16b, v6.16b
   3739  1.1  christos 	mov	v6.16b, v5.16b
   3740  1.1  christos 	mov	v5.16b, v1.16b
   3741  1.1  christos 
   3742  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   3743  1.1  christos 	cmp	x5, #48
   3744  1.1  christos 	b.gt	.L192_enc_blocks_more_than_3
   3745  1.1  christos 
   3746  1.1  christos 	mov	v7.16b, v6.16b
   3747  1.1  christos 	mov	v6.16b, v1.16b
   3748  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   3749  1.1  christos 
   3750  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   3751  1.1  christos 	cmp	x5, #32
   3752  1.1  christos 	b.gt	.L192_enc_blocks_more_than_2
   3753  1.1  christos 
   3754  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   3755  1.1  christos 
   3756  1.1  christos 	cmp	x5, #16
   3757  1.1  christos 	mov	v7.16b, v1.16b
   3758  1.1  christos 	b.gt	.L192_enc_blocks_more_than_1
   3759  1.1  christos 
   3760  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   3761  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   3762  1.1  christos 	b	.L192_enc_blocks_less_than_1
   3763  1.1  christos .L192_enc_blocks_more_than_7:	//blocks	left >  7
   3764  1.1  christos 	st1	{ v9.16b}, [x2], #16			 	//AES final-7 block  - store result
   3765  1.1  christos 
   3766  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-7 block
   3767  1.1  christos 	ins	v18.d[0], v24.d[1]					//GHASH final-7 block - mid
   3768  1.1  christos 
   3769  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   3770  1.1  christos 
   3771  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-7 block - mid
   3772  1.1  christos 
   3773  1.1  christos 	ldr	q9, [x0], #16				//AES final-6 block - load plaintext
   3774  1.1  christos 
   3775  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-7 block - mid
   3776  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   3777  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH final-7 block - low
   3778  1.1  christos 
   3779  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH final-7 block - high
   3780  1.1  christos 
   3781  1.1  christos 	pmull	v18.1q, v27.1d, v18.1d			 	//GHASH final-7 block - mid
   3782  1.1  christos .inst	0xce017529	//eor3 v9.16b, v9.16b, v1.16b, v29.16b			//AES final-6 block - result
   3783  1.1  christos .L192_enc_blocks_more_than_6:	//blocks	left >  6
   3784  1.1  christos 
   3785  1.1  christos 	st1	{ v9.16b}, [x2], #16			 	//AES final-6 block - store result
   3786  1.1  christos 
   3787  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-6 block
   3788  1.1  christos 
   3789  1.1  christos 	ldr	q9, [x0], #16				//AES final-5 block - load plaintext
   3790  1.1  christos 
   3791  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   3792  1.1  christos 
   3793  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-6 block - mid
   3794  1.1  christos 
   3795  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-6 block - low
   3796  1.1  christos .inst	0xce027529	//eor3 v9.16b, v9.16b, v2.16b, v29.16b			//AES final-5 block - result
   3797  1.1  christos 
   3798  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   3799  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-6 block - high
   3800  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-6 block - mid
   3801  1.1  christos 
   3802  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-6 block - mid
   3803  1.1  christos 
   3804  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-6 block - high
   3805  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-6 block - low
   3806  1.1  christos 
   3807  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-6 block - mid
   3808  1.1  christos .L192_enc_blocks_more_than_5:	//blocks	left >  5
   3809  1.1  christos 
   3810  1.1  christos 	st1	{ v9.16b}, [x2], #16			 	//AES final-5 block - store result
   3811  1.1  christos 
   3812  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-5 block
   3813  1.1  christos 
   3814  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   3815  1.1  christos 
   3816  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-5 block - mid
   3817  1.1  christos 
   3818  1.1  christos 	ldr	q9, [x0], #16				//AES final-4 block - load plaintext
   3819  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-5 block - high
   3820  1.1  christos 
   3821  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-5 block - mid
   3822  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-5 block - high
   3823  1.1  christos 
   3824  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-5 block - mid
   3825  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-5 block - low
   3826  1.1  christos 
   3827  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-5 block - low
   3828  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-5 block - mid
   3829  1.1  christos 
   3830  1.1  christos .inst	0xce037529	//eor3 v9.16b, v9.16b, v3.16b, v29.16b			//AES final-4 block - result
   3831  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   3832  1.1  christos 
   3833  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-5 block - mid
   3834  1.1  christos .L192_enc_blocks_more_than_4:	//blocks	left >  4
   3835  1.1  christos 
   3836  1.1  christos 	st1	{ v9.16b}, [x2], #16				//AES final-4 block - store result
   3837  1.1  christos 
   3838  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-4 block
   3839  1.1  christos 
   3840  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   3841  1.1  christos 
   3842  1.1  christos 	ldr	q9, [x0], #16				//AES final-3 block - load plaintext
   3843  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final-4 block - high
   3844  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-4 block - mid
   3845  1.1  christos 
   3846  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final-4 block - low
   3847  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-4 block - high
   3848  1.1  christos 
   3849  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-4 block - mid
   3850  1.1  christos 
   3851  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   3852  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-4 block - low
   3853  1.1  christos 
   3854  1.1  christos 	pmull	v27.1q, v27.1d, v21.1d				//GHASH final-4 block - mid
   3855  1.1  christos 
   3856  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-4 block - mid
   3857  1.1  christos .inst	0xce047529	//eor3 v9.16b, v9.16b, v4.16b, v29.16b			//AES final-3 block - result
   3858  1.1  christos .L192_enc_blocks_more_than_3:	//blocks	left >  3
   3859  1.1  christos 
   3860  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   3861  1.1  christos 	st1	{ v9.16b}, [x2], #16			 	//AES final-3 block - store result
   3862  1.1  christos 
   3863  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-3 block
   3864  1.1  christos 
   3865  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   3866  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   3867  1.1  christos 
   3868  1.1  christos 	ldr	q9, [x0], #16				//AES final-2 block - load plaintext
   3869  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   3870  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   3871  1.1  christos 
   3872  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-3 block - mid
   3873  1.1  christos 
   3874  1.1  christos .inst	0xce057529	//eor3 v9.16b, v9.16b, v5.16b, v29.16b			//AES final-2 block - result
   3875  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-3 block - mid
   3876  1.1  christos 
   3877  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-3 block - mid
   3878  1.1  christos 	pmull	v26.1q, v8.1d, v25.1d				//GHASH final-3 block - low
   3879  1.1  christos 
   3880  1.1  christos 	pmull2	v28.1q, v8.2d, v25.2d				//GHASH final-3 block - high
   3881  1.1  christos 	pmull2	v27.1q, v27.2d, v24.2d				//GHASH final-3 block - mid
   3882  1.1  christos 
   3883  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-3 block - low
   3884  1.1  christos 
   3885  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-3 block - mid
   3886  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-3 block - high
   3887  1.1  christos .L192_enc_blocks_more_than_2:	//blocks	left >  2
   3888  1.1  christos 
   3889  1.1  christos 	st1	{ v9.16b}, [x2], #16			 	//AES final-2 block - store result
   3890  1.1  christos 
   3891  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-2 block
   3892  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   3893  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   3894  1.1  christos 
   3895  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   3896  1.1  christos 
   3897  1.1  christos 	ldr	q9, [x0], #16				//AES final-1 block - load plaintext
   3898  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-2 block - mid
   3899  1.1  christos 
   3900  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-2 block - mid
   3901  1.1  christos 
   3902  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-2 block - low
   3903  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-2 block - high
   3904  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   3905  1.1  christos 
   3906  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-2 block - mid
   3907  1.1  christos 
   3908  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-2 block - low
   3909  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-2 block - high
   3910  1.1  christos 
   3911  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-2 block - mid
   3912  1.1  christos .inst	0xce067529	//eor3 v9.16b, v9.16b, v6.16b, v29.16b			//AES final-1 block - result
   3913  1.1  christos .L192_enc_blocks_more_than_1:	//blocks	left >  1
   3914  1.1  christos 
   3915  1.1  christos 	ldr	q22, [x3, #64]				//load h1l | h1h
   3916  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   3917  1.1  christos 	st1	{ v9.16b}, [x2], #16			 	//AES final-1 block - store result
   3918  1.1  christos 
   3919  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-1 block
   3920  1.1  christos 
   3921  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   3922  1.1  christos 
   3923  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-1 block - mid
   3924  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-1 block - low
   3925  1.1  christos 
   3926  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-1 block - low
   3927  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-1 block - high
   3928  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-1 block - mid
   3929  1.1  christos 
   3930  1.1  christos 	ldr	q9, [x0], #16				//AES final block - load plaintext
   3931  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   3932  1.1  christos 
   3933  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-1 block - mid
   3934  1.1  christos 
   3935  1.1  christos .inst	0xce077529	//eor3 v9.16b, v9.16b, v7.16b, v29.16b			//AES final block - result
   3936  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-1 block - mid
   3937  1.1  christos 
   3938  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   3939  1.1  christos 
   3940  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-1 block - mid
   3941  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-1 block - high
   3942  1.1  christos .L192_enc_blocks_less_than_1:	//blocks	left <= 1
   3943  1.1  christos 
   3944  1.1  christos 	mvn	x6, xzr						//temp0_x = 0xffffffffffffffff
   3945  1.1  christos 	and	x1, x1, #127				//bit_length %= 128
   3946  1.1  christos 
   3947  1.1  christos 	sub	x1, x1, #128				//bit_length -= 128
   3948  1.1  christos 
   3949  1.1  christos 	neg	x1, x1				//bit_length = 128 - #bits in input (in range [1,128])
   3950  1.1  christos 
   3951  1.1  christos 	and	x1, x1, #127				//bit_length %= 128
   3952  1.1  christos 
   3953  1.1  christos 	lsr	x6, x6, x1				//temp0_x is mask for top 64b of last block
   3954  1.1  christos 	cmp	x1, #64
   3955  1.1  christos 	mvn	x7, xzr						//temp1_x = 0xffffffffffffffff
   3956  1.1  christos 
   3957  1.1  christos 	csel	x13, x7, x6, lt
   3958  1.1  christos 	csel	x14, x6, xzr, lt
   3959  1.1  christos 
   3960  1.1  christos 	mov	v0.d[1], x14
   3961  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   3962  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   3963  1.1  christos 
   3964  1.1  christos 	ld1	{ v26.16b}, [x2]					//load existing bytes where the possibly partial last block is to be stored
   3965  1.1  christos 	mov	v0.d[0], x13					//ctr0b is mask for last block
   3966  1.1  christos 
   3967  1.1  christos 	and	v9.16b, v9.16b, v0.16b					//possibly partial last block has zeroes in highest bits
   3968  1.1  christos 
   3969  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final block
   3970  1.1  christos 	bif	v9.16b, v26.16b, v0.16b					//insert existing bytes in top end of result before storing
   3971  1.1  christos 
   3972  1.1  christos 	st1	{ v9.16b}, [x2]				//store all 16B
   3973  1.1  christos 
   3974  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   3975  1.1  christos 
   3976  1.1  christos 	ins	v16.d[0], v8.d[1]					//GHASH final block - mid
   3977  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final block - high
   3978  1.1  christos 
   3979  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final block - high
   3980  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final block - low
   3981  1.1  christos 
   3982  1.1  christos 	eor	v16.8b, v16.8b, v8.8b				//GHASH final block - mid
   3983  1.1  christos 
   3984  1.1  christos 	pmull	v16.1q, v16.1d, v21.1d				//GHASH final block - mid
   3985  1.1  christos 
   3986  1.1  christos 	eor	v18.16b, v18.16b, v16.16b				//GHASH final block - mid
   3987  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   3988  1.1  christos 
   3989  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final block - low
   3990  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8			 	//MODULO - other top alignment
   3991  1.1  christos 
   3992  1.1  christos 	rev32	v30.16b, v30.16b
   3993  1.1  christos 
   3994  1.1  christos 	str	q30, [x16]					//store the updated counter
   3995  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   3996  1.1  christos 
   3997  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   3998  1.1  christos 
   3999  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   4000  1.1  christos 
   4001  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   4002  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8			 	//MODULO - other mid alignment
   4003  1.1  christos 
   4004  1.1  christos .inst	0xce115673	//eor3 v19.16b, v19.16b, v17.16b, v21.16b		 	//MODULO - fold into low
   4005  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   4006  1.1  christos 	rev64	v19.16b, v19.16b
   4007  1.1  christos 	st1	{ v19.16b }, [x3]
   4008  1.1  christos 
   4009  1.1  christos 	mov	x0, x9					//return sizes
   4010  1.1  christos 
   4011  1.1  christos 	ldp	d10, d11, [sp, #16]
   4012  1.1  christos 	ldp	d12, d13, [sp, #32]
   4013  1.1  christos 	ldp	d14, d15, [sp, #48]
   4014  1.1  christos 	ldp	d8, d9, [sp], #80
   4015  1.1  christos 	ret
   4016  1.1  christos 
   4017  1.1  christos .L192_enc_ret:
   4018  1.1  christos 	mov	w0, #0x0
   4019  1.1  christos 	ret
   4020  1.1  christos .size	unroll8_eor3_aes_gcm_enc_192_kernel,.-unroll8_eor3_aes_gcm_enc_192_kernel
   4021  1.1  christos .globl	unroll8_eor3_aes_gcm_dec_192_kernel
   4022  1.1  christos .type	unroll8_eor3_aes_gcm_dec_192_kernel,%function
   4023  1.1  christos .align	4
   4024  1.1  christos unroll8_eor3_aes_gcm_dec_192_kernel:
   4025  1.1  christos 	AARCH64_VALID_CALL_TARGET
   4026  1.1  christos 	cbz	x1, .L192_dec_ret
   4027  1.1  christos 	stp	d8, d9, [sp, #-80]!
   4028  1.1  christos 	lsr	x9, x1, #3
   4029  1.1  christos 	mov	x16, x4
   4030  1.1  christos 	mov	x8, x5
   4031  1.1  christos 	stp	d10, d11, [sp, #16]
   4032  1.1  christos 	stp	d12, d13, [sp, #32]
   4033  1.1  christos 	stp	d14, d15, [sp, #48]
   4034  1.1  christos 	mov	x5, #0xc200000000000000
   4035  1.1  christos 	stp	x5, xzr, [sp, #64]
   4036  1.1  christos 	add	x10, sp, #64
   4037  1.1  christos 
   4038  1.1  christos 	mov	x5, x9
   4039  1.1  christos 	ld1	{ v0.16b}, [x16]					//CTR block 0
   4040  1.1  christos 	ld1	{ v19.16b}, [x3]
   4041  1.1  christos 
   4042  1.1  christos 	mov	x15, #0x100000000			//set up counter increment
   4043  1.1  christos 	movi	v31.16b, #0x0
   4044  1.1  christos 	mov	v31.d[1], x15
   4045  1.1  christos 
   4046  1.1  christos 	rev32	v30.16b, v0.16b				//set up reversed counter
   4047  1.1  christos 
   4048  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 0
   4049  1.1  christos 
   4050  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 1
   4051  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 1
   4052  1.1  christos 
   4053  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 2
   4054  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 2
   4055  1.1  christos 
   4056  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 3
   4057  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 3
   4058  1.1  christos 
   4059  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 4
   4060  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 4
   4061  1.1  christos 
   4062  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 5
   4063  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 5
   4064  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
   4065  1.1  christos 
   4066  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 6
   4067  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 6
   4068  1.1  christos 
   4069  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 7
   4070  1.1  christos 
   4071  1.1  christos 	aese	v3.16b, v26.16b
   4072  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 0
   4073  1.1  christos 	aese	v6.16b, v26.16b
   4074  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 0
   4075  1.1  christos 	aese	v5.16b, v26.16b
   4076  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 0
   4077  1.1  christos 
   4078  1.1  christos 	aese	v0.16b, v26.16b
   4079  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 0
   4080  1.1  christos 	aese	v1.16b, v26.16b
   4081  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 0
   4082  1.1  christos 	aese	v7.16b, v26.16b
   4083  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 0
   4084  1.1  christos 
   4085  1.1  christos 	aese	v2.16b, v26.16b
   4086  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 0
   4087  1.1  christos 	aese	v4.16b, v26.16b
   4088  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 0
   4089  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   4090  1.1  christos 
   4091  1.1  christos 	aese	v1.16b, v27.16b
   4092  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 1
   4093  1.1  christos 
   4094  1.1  christos 	aese	v2.16b, v27.16b
   4095  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 1
   4096  1.1  christos 
   4097  1.1  christos 	aese	v0.16b, v27.16b
   4098  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 1
   4099  1.1  christos 	aese	v3.16b, v27.16b
   4100  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 1
   4101  1.1  christos 	aese	v7.16b, v27.16b
   4102  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 1
   4103  1.1  christos 
   4104  1.1  christos 	aese	v5.16b, v27.16b
   4105  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 1
   4106  1.1  christos 	aese	v6.16b, v27.16b
   4107  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 1
   4108  1.1  christos 
   4109  1.1  christos 	aese	v7.16b, v28.16b
   4110  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 2
   4111  1.1  christos 	aese	v0.16b, v28.16b
   4112  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 2
   4113  1.1  christos 	aese	v4.16b, v27.16b
   4114  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 1
   4115  1.1  christos 
   4116  1.1  christos 	aese	v5.16b, v28.16b
   4117  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 2
   4118  1.1  christos 	aese	v1.16b, v28.16b
   4119  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 2
   4120  1.1  christos 	aese	v2.16b, v28.16b
   4121  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 2
   4122  1.1  christos 
   4123  1.1  christos 	aese	v3.16b, v28.16b
   4124  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 2
   4125  1.1  christos 	aese	v4.16b, v28.16b
   4126  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 2
   4127  1.1  christos 	aese	v6.16b, v28.16b
   4128  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 2
   4129  1.1  christos 
   4130  1.1  christos 	aese	v7.16b, v26.16b
   4131  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 3
   4132  1.1  christos 
   4133  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   4134  1.1  christos 	aese	v2.16b, v26.16b
   4135  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 3
   4136  1.1  christos 	aese	v5.16b, v26.16b
   4137  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 3
   4138  1.1  christos 
   4139  1.1  christos 	aese	v0.16b, v26.16b
   4140  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 3
   4141  1.1  christos 	aese	v3.16b, v26.16b
   4142  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 3
   4143  1.1  christos 
   4144  1.1  christos 	aese	v4.16b, v26.16b
   4145  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 3
   4146  1.1  christos 	aese	v1.16b, v26.16b
   4147  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 3
   4148  1.1  christos 	aese	v6.16b, v26.16b
   4149  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 3
   4150  1.1  christos 
   4151  1.1  christos 	aese	v3.16b, v27.16b
   4152  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 4
   4153  1.1  christos 	aese	v2.16b, v27.16b
   4154  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 4
   4155  1.1  christos 	aese	v5.16b, v27.16b
   4156  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 4
   4157  1.1  christos 
   4158  1.1  christos 	aese	v1.16b, v27.16b
   4159  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 4
   4160  1.1  christos 	aese	v7.16b, v27.16b
   4161  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 4
   4162  1.1  christos 	aese	v6.16b, v27.16b
   4163  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 4
   4164  1.1  christos 
   4165  1.1  christos 	aese	v0.16b, v27.16b
   4166  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 4
   4167  1.1  christos 	aese	v5.16b, v28.16b
   4168  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 5
   4169  1.1  christos 	aese	v4.16b, v27.16b
   4170  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 4
   4171  1.1  christos 
   4172  1.1  christos 	aese	v6.16b, v28.16b
   4173  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 5
   4174  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   4175  1.1  christos 
   4176  1.1  christos 	aese	v0.16b, v28.16b
   4177  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 5
   4178  1.1  christos 	aese	v4.16b, v28.16b
   4179  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 5
   4180  1.1  christos 	aese	v1.16b, v28.16b
   4181  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 5
   4182  1.1  christos 
   4183  1.1  christos 	aese	v3.16b, v28.16b
   4184  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 5
   4185  1.1  christos 	aese	v2.16b, v28.16b
   4186  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 5
   4187  1.1  christos 	aese	v7.16b, v28.16b
   4188  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 5
   4189  1.1  christos 
   4190  1.1  christos 	sub	x5, x5, #1		//byte_len - 1
   4191  1.1  christos 
   4192  1.1  christos 	aese	v4.16b, v26.16b
   4193  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 6
   4194  1.1  christos 	aese	v5.16b, v26.16b
   4195  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 6
   4196  1.1  christos 	aese	v1.16b, v26.16b
   4197  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 6
   4198  1.1  christos 
   4199  1.1  christos 	aese	v0.16b, v26.16b
   4200  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 6
   4201  1.1  christos 	aese	v3.16b, v26.16b
   4202  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 6
   4203  1.1  christos 	aese	v6.16b, v26.16b
   4204  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 6
   4205  1.1  christos 
   4206  1.1  christos 	aese	v7.16b, v26.16b
   4207  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 6
   4208  1.1  christos 	aese	v2.16b, v26.16b
   4209  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 6
   4210  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   4211  1.1  christos 
   4212  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 7
   4213  1.1  christos 
   4214  1.1  christos 	aese	v3.16b, v27.16b
   4215  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 7
   4216  1.1  christos 	aese	v7.16b, v27.16b
   4217  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 7
   4218  1.1  christos 
   4219  1.1  christos 	aese	v2.16b, v27.16b
   4220  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 7
   4221  1.1  christos 	aese	v1.16b, v27.16b
   4222  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 7
   4223  1.1  christos 	aese	v4.16b, v27.16b
   4224  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 7
   4225  1.1  christos 
   4226  1.1  christos 	aese	v6.16b, v27.16b
   4227  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 7
   4228  1.1  christos 	aese	v0.16b, v27.16b
   4229  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 7
   4230  1.1  christos 	aese	v5.16b, v27.16b
   4231  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 7
   4232  1.1  christos 
   4233  1.1  christos 	aese	v1.16b, v28.16b
   4234  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 8
   4235  1.1  christos 	aese	v2.16b, v28.16b
   4236  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 8
   4237  1.1  christos 	and	x5, x5, #0xffffffffffffff80	//number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
   4238  1.1  christos 
   4239  1.1  christos 	aese	v7.16b, v28.16b
   4240  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 8
   4241  1.1  christos 	aese	v6.16b, v28.16b
   4242  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 8
   4243  1.1  christos 	aese	v5.16b, v28.16b
   4244  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 8
   4245  1.1  christos 
   4246  1.1  christos 	aese	v4.16b, v28.16b
   4247  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 8
   4248  1.1  christos 	aese	v3.16b, v28.16b
   4249  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 8
   4250  1.1  christos 	aese	v0.16b, v28.16b
   4251  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 8
   4252  1.1  christos 
   4253  1.1  christos 	add	x4, x0, x1, lsr #3		//end_input_ptr
   4254  1.1  christos 	aese	v6.16b, v26.16b
   4255  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 9
   4256  1.1  christos 
   4257  1.1  christos 	ld1	{ v19.16b}, [x3]
   4258  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   4259  1.1  christos 	rev64	v19.16b, v19.16b
   4260  1.1  christos 
   4261  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   4262  1.1  christos 
   4263  1.1  christos 	aese	v0.16b, v26.16b
   4264  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 9
   4265  1.1  christos 	add	x5, x5, x0
   4266  1.1  christos 
   4267  1.1  christos 	aese	v1.16b, v26.16b
   4268  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 9
   4269  1.1  christos 	aese	v7.16b, v26.16b
   4270  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 9
   4271  1.1  christos 	aese	v4.16b, v26.16b
   4272  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 9
   4273  1.1  christos 
   4274  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
   4275  1.1  christos 	aese	v3.16b, v26.16b
   4276  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 9
   4277  1.1  christos 
   4278  1.1  christos 	aese	v5.16b, v26.16b
   4279  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 9
   4280  1.1  christos 	aese	v2.16b, v26.16b
   4281  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 9
   4282  1.1  christos 
   4283  1.1  christos 	aese	v3.16b, v27.16b
   4284  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 10
   4285  1.1  christos 	aese	v1.16b, v27.16b
   4286  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 10
   4287  1.1  christos 	aese	v7.16b, v27.16b
   4288  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 10
   4289  1.1  christos 
   4290  1.1  christos 	aese	v4.16b, v27.16b
   4291  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 10
   4292  1.1  christos 	aese	v0.16b, v27.16b
   4293  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 10
   4294  1.1  christos 	aese	v2.16b, v27.16b
   4295  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 10
   4296  1.1  christos 
   4297  1.1  christos 	aese	v6.16b, v27.16b
   4298  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 10
   4299  1.1  christos 	aese	v5.16b, v27.16b
   4300  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 10
   4301  1.1  christos 	ldr	q26, [x8, #192]					//load rk12
   4302  1.1  christos 
   4303  1.1  christos 	aese	v0.16b, v28.16b						//AES block 0 - round 11
   4304  1.1  christos 	aese	v1.16b, v28.16b						//AES block 1 - round 11
   4305  1.1  christos 	aese	v4.16b, v28.16b						//AES block 4 - round 11
   4306  1.1  christos 
   4307  1.1  christos 	aese	v6.16b, v28.16b						//AES block 6 - round 11
   4308  1.1  christos 	aese	v5.16b, v28.16b						//AES block 5 - round 11
   4309  1.1  christos 	aese	v7.16b, v28.16b						//AES block 7 - round 11
   4310  1.1  christos 
   4311  1.1  christos 	aese	v2.16b, v28.16b						//AES block 2 - round 11
   4312  1.1  christos 	aese	v3.16b, v28.16b						//AES block 3 - round 11
   4313  1.1  christos 	b.ge	.L192_dec_tail						//handle tail
   4314  1.1  christos 
   4315  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 0, 1 - load ciphertext
   4316  1.1  christos 
   4317  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 2, 3 - load ciphertext
   4318  1.1  christos 
   4319  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 4, 5 - load ciphertext
   4320  1.1  christos 
   4321  1.1  christos .inst	0xce016921	//eor3 v1.16b, v9.16b, v1.16b, v26.16b				//AES block 1 - result
   4322  1.1  christos .inst	0xce006900	//eor3 v0.16b, v8.16b, v0.16b, v26.16b				//AES block 0 - result
   4323  1.1  christos 	stp	q0, q1, [x2], #32			//AES block 0, 1 - store result
   4324  1.1  christos 
   4325  1.1  christos 	rev32	v0.16b, v30.16b				//CTR block 8
   4326  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8
   4327  1.1  christos 
   4328  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 9
   4329  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 9
   4330  1.1  christos .inst	0xce036963	//eor3 v3.16b, v11.16b, v3.16b, v26.16b				//AES block 3 - result
   4331  1.1  christos 
   4332  1.1  christos .inst	0xce026942	//eor3 v2.16b, v10.16b, v2.16b, v26.16b				//AES block 2 - result
   4333  1.1  christos 	stp	q2, q3, [x2], #32			//AES block 2, 3 - store result
   4334  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 6, 7 - load ciphertext
   4335  1.1  christos 
   4336  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 10
   4337  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 10
   4338  1.1  christos 
   4339  1.1  christos .inst	0xce046984	//eor3 v4.16b, v12.16b, v4.16b, v26.16b				//AES block 4 - result
   4340  1.1  christos 
   4341  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 11
   4342  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 11
   4343  1.1  christos 
   4344  1.1  christos .inst	0xce0569a5	//eor3 v5.16b, v13.16b, v5.16b, v26.16b				//AES block 5 - result
   4345  1.1  christos 	stp	q4, q5, [x2], #32			//AES block 4, 5 - store result
   4346  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
   4347  1.1  christos 
   4348  1.1  christos .inst	0xce0669c6	//eor3 v6.16b, v14.16b, v6.16b, v26.16b				//AES block 6 - result
   4349  1.1  christos .inst	0xce0769e7	//eor3 v7.16b, v15.16b, v7.16b, v26.16b				//AES block 7 - result
   4350  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 12
   4351  1.1  christos 
   4352  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 12
   4353  1.1  christos 	stp	q6, q7, [x2], #32			//AES block 6, 7 - store result
   4354  1.1  christos 	b.ge	.L192_dec_prepretail					//do prepretail
   4355  1.1  christos 
   4356  1.1  christos .L192_dec_main_loop:	//main	loop start
   4357  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
   4358  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
   4359  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
   4360  1.1  christos 
   4361  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
   4362  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
   4363  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
   4364  1.1  christos 
   4365  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
   4366  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   4367  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
   4368  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   4369  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4
   4370  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
   4371  1.1  christos 
   4372  1.1  christos 	eor	v8.16b, v8.16b, v19.16b				 	//PRE 1
   4373  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
   4374  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
   4375  1.1  christos 
   4376  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5
   4377  1.1  christos 
   4378  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
   4379  1.1  christos 	aese	v1.16b, v26.16b
   4380  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
   4381  1.1  christos 	aese	v6.16b, v26.16b
   4382  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
   4383  1.1  christos 
   4384  1.1  christos 	aese	v5.16b, v26.16b
   4385  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
   4386  1.1  christos 	aese	v4.16b, v26.16b
   4387  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
   4388  1.1  christos 	aese	v0.16b, v26.16b
   4389  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
   4390  1.1  christos 
   4391  1.1  christos 	aese	v7.16b, v26.16b
   4392  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
   4393  1.1  christos 	aese	v2.16b, v26.16b
   4394  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
   4395  1.1  christos 	aese	v3.16b, v26.16b
   4396  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
   4397  1.1  christos 
   4398  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
   4399  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
   4400  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   4401  1.1  christos 
   4402  1.1  christos 	aese	v6.16b, v27.16b
   4403  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
   4404  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
   4405  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
   4406  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   4407  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
   4408  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   4409  1.1  christos 
   4410  1.1  christos 	aese	v0.16b, v27.16b
   4411  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
   4412  1.1  christos 	aese	v3.16b, v27.16b
   4413  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
   4414  1.1  christos 	aese	v7.16b, v27.16b
   4415  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
   4416  1.1  christos 
   4417  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
   4418  1.1  christos 	aese	v2.16b, v27.16b
   4419  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
   4420  1.1  christos 	aese	v4.16b, v27.16b
   4421  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
   4422  1.1  christos 
   4423  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   4424  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
   4425  1.1  christos 	aese	v1.16b, v27.16b
   4426  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
   4427  1.1  christos 
   4428  1.1  christos 	aese	v5.16b, v27.16b
   4429  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
   4430  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
   4431  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
   4432  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   4433  1.1  christos 
   4434  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
   4435  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
   4436  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
   4437  1.1  christos 
   4438  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
   4439  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
   4440  1.1  christos 	aese	v6.16b, v28.16b
   4441  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
   4442  1.1  christos 
   4443  1.1  christos 	aese	v2.16b, v28.16b
   4444  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
   4445  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
   4446  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
   4447  1.1  christos 
   4448  1.1  christos 	aese	v1.16b, v28.16b
   4449  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
   4450  1.1  christos 	aese	v6.16b, v26.16b
   4451  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
   4452  1.1  christos 	aese	v4.16b, v28.16b
   4453  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
   4454  1.1  christos 
   4455  1.1  christos 	aese	v0.16b, v28.16b
   4456  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
   4457  1.1  christos 	aese	v7.16b, v28.16b
   4458  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
   4459  1.1  christos 	aese	v3.16b, v28.16b
   4460  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
   4461  1.1  christos 
   4462  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   4463  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   4464  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   4465  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   4466  1.1  christos 	aese	v5.16b, v28.16b
   4467  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
   4468  1.1  christos 	aese	v2.16b, v26.16b
   4469  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
   4470  1.1  christos 
   4471  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
   4472  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   4473  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   4474  1.1  christos 
   4475  1.1  christos 	aese	v3.16b, v26.16b
   4476  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
   4477  1.1  christos 	aese	v4.16b, v26.16b
   4478  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
   4479  1.1  christos 
   4480  1.1  christos 	aese	v0.16b, v26.16b
   4481  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
   4482  1.1  christos 	aese	v7.16b, v26.16b
   4483  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
   4484  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   4485  1.1  christos 
   4486  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
   4487  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
   4488  1.1  christos 	aese	v1.16b, v26.16b
   4489  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
   4490  1.1  christos 
   4491  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   4492  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
   4493  1.1  christos 
   4494  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
   4495  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k	- mid
   4496  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
   4497  1.1  christos 
   4498  1.1  christos 	aese	v5.16b, v26.16b
   4499  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
   4500  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
   4501  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
   4502  1.1  christos 
   4503  1.1  christos 	aese	v4.16b, v27.16b
   4504  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
   4505  1.1  christos 	aese	v6.16b, v27.16b
   4506  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
   4507  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
   4508  1.1  christos 
   4509  1.1  christos 	aese	v5.16b, v27.16b
   4510  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
   4511  1.1  christos 	aese	v1.16b, v27.16b
   4512  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
   4513  1.1  christos 	aese	v3.16b, v27.16b
   4514  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
   4515  1.1  christos 
   4516  1.1  christos 	aese	v2.16b, v27.16b
   4517  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
   4518  1.1  christos 	aese	v0.16b, v27.16b
   4519  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
   4520  1.1  christos 	aese	v7.16b, v27.16b
   4521  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
   4522  1.1  christos 
   4523  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   4524  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   4525  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   4526  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   4527  1.1  christos 	aese	v3.16b, v28.16b
   4528  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
   4529  1.1  christos 	aese	v5.16b, v28.16b
   4530  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
   4531  1.1  christos 
   4532  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   4533  1.1  christos 	aese	v7.16b, v28.16b
   4534  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
   4535  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7
   4536  1.1  christos 
   4537  1.1  christos 	aese	v4.16b, v28.16b
   4538  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
   4539  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
   4540  1.1  christos 	aese	v1.16b, v28.16b
   4541  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
   4542  1.1  christos 
   4543  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
   4544  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   4545  1.1  christos 	aese	v2.16b, v28.16b
   4546  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
   4547  1.1  christos 
   4548  1.1  christos 	aese	v6.16b, v28.16b
   4549  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
   4550  1.1  christos 	aese	v0.16b, v28.16b
   4551  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
   4552  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6
   4553  1.1  christos 
   4554  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   4555  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   4556  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
   4557  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
   4558  1.1  christos 
   4559  1.1  christos 	aese	v0.16b, v26.16b
   4560  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
   4561  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
   4562  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   4563  1.1  christos 
   4564  1.1  christos 	aese	v7.16b, v26.16b
   4565  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
   4566  1.1  christos 	aese	v2.16b, v26.16b
   4567  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
   4568  1.1  christos 	aese	v6.16b, v26.16b
   4569  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
   4570  1.1  christos 
   4571  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
   4572  1.1  christos 	aese	v3.16b, v26.16b
   4573  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
   4574  1.1  christos 	aese	v1.16b, v26.16b
   4575  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
   4576  1.1  christos 
   4577  1.1  christos 	aese	v2.16b, v27.16b
   4578  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
   4579  1.1  christos 	aese	v6.16b, v27.16b
   4580  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
   4581  1.1  christos 	aese	v5.16b, v26.16b
   4582  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
   4583  1.1  christos 
   4584  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
   4585  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
   4586  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
   4587  1.1  christos 
   4588  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
   4589  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   4590  1.1  christos 	aese	v4.16b, v26.16b
   4591  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
   4592  1.1  christos 
   4593  1.1  christos 	aese	v5.16b, v27.16b
   4594  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
   4595  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   4596  1.1  christos 	aese	v3.16b, v27.16b
   4597  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
   4598  1.1  christos 
   4599  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
   4600  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
   4601  1.1  christos 	aese	v1.16b, v27.16b
   4602  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
   4603  1.1  christos 
   4604  1.1  christos 	aese	v4.16b, v27.16b
   4605  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
   4606  1.1  christos 	aese	v0.16b, v27.16b
   4607  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
   4608  1.1  christos 	aese	v7.16b, v27.16b
   4609  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
   4610  1.1  christos 
   4611  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
   4612  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
   4613  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
   4614  1.1  christos 
   4615  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
   4616  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   4617  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
   4618  1.1  christos 
   4619  1.1  christos 	aese	v2.16b, v28.16b
   4620  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
   4621  1.1  christos 	aese	v5.16b, v28.16b
   4622  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
   4623  1.1  christos 	aese	v7.16b, v28.16b
   4624  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
   4625  1.1  christos 
   4626  1.1  christos 	aese	v0.16b, v28.16b
   4627  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
   4628  1.1  christos 	aese	v3.16b, v28.16b
   4629  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
   4630  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
   4631  1.1  christos 
   4632  1.1  christos 	aese	v4.16b, v28.16b
   4633  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
   4634  1.1  christos 	aese	v1.16b, v28.16b
   4635  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
   4636  1.1  christos 	aese	v6.16b, v28.16b
   4637  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
   4638  1.1  christos 
   4639  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
   4640  1.1  christos 	rev32	v20.16b, v30.16b					//CTR block 8k+16
   4641  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+16
   4642  1.1  christos 
   4643  1.1  christos 	aese	v5.16b, v26.16b
   4644  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 9
   4645  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
   4646  1.1  christos 	aese	v1.16b, v26.16b
   4647  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 9
   4648  1.1  christos 
   4649  1.1  christos 	aese	v3.16b, v26.16b
   4650  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 9
   4651  1.1  christos 	aese	v7.16b, v26.16b
   4652  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 9
   4653  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   4654  1.1  christos 
   4655  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   4656  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 8k+8, 8k+9 - load ciphertext
   4657  1.1  christos 
   4658  1.1  christos 	aese	v2.16b, v26.16b
   4659  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 9
   4660  1.1  christos 	aese	v0.16b, v26.16b
   4661  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 9
   4662  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 8k+10, 8k+11 - load ciphertext
   4663  1.1  christos 
   4664  1.1  christos 	rev32	v22.16b, v30.16b					//CTR block 8k+17
   4665  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   4666  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+17
   4667  1.1  christos 
   4668  1.1  christos 	aese	v6.16b, v26.16b
   4669  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 9
   4670  1.1  christos 	aese	v4.16b, v26.16b
   4671  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 9
   4672  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8			 	//MODULO - other top alignment
   4673  1.1  christos 
   4674  1.1  christos 	aese	v3.16b, v27.16b
   4675  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 10
   4676  1.1  christos 	aese	v7.16b, v27.16b
   4677  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 10
   4678  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 8k+12, 8k+13 - load ciphertext
   4679  1.1  christos 
   4680  1.1  christos 	rev32	v23.16b, v30.16b					//CTR block 8k+18
   4681  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+18
   4682  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   4683  1.1  christos 
   4684  1.1  christos 	aese	v0.16b, v27.16b
   4685  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 10
   4686  1.1  christos 	aese	v1.16b, v27.16b
   4687  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 10
   4688  1.1  christos 	ldr	q26, [x8, #192]					//load rk12
   4689  1.1  christos 
   4690  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 8k+14, 8k+15 - load ciphertext
   4691  1.1  christos 	aese	v4.16b, v27.16b
   4692  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 10
   4693  1.1  christos 	aese	v6.16b, v27.16b
   4694  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 10
   4695  1.1  christos 
   4696  1.1  christos 	aese	v0.16b, v28.16b						//AES block 8k+8 - round 11
   4697  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8			 	//MODULO - other mid alignment
   4698  1.1  christos 	aese	v1.16b, v28.16b						//AES block 8k+9 - round 11
   4699  1.1  christos 
   4700  1.1  christos 	aese	v2.16b, v27.16b
   4701  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 10
   4702  1.1  christos 	aese	v6.16b, v28.16b						//AES block 8k+14 - round 11
   4703  1.1  christos 	aese	v3.16b, v28.16b						//AES block 8k+11 - round 11
   4704  1.1  christos 
   4705  1.1  christos .inst	0xce006900	//eor3 v0.16b, v8.16b, v0.16b, v26.16b				//AES block 8k+8 - result
   4706  1.1  christos 	rev32	v25.16b, v30.16b					//CTR block 8k+19
   4707  1.1  christos 	aese	v5.16b, v27.16b
   4708  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 10
   4709  1.1  christos 
   4710  1.1  christos 	aese	v4.16b, v28.16b						//AES block 8k+12 - round 11
   4711  1.1  christos 	aese	v2.16b, v28.16b						//AES block 8k+10 - round 11
   4712  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+19
   4713  1.1  christos 
   4714  1.1  christos 	aese	v7.16b, v28.16b						//AES block 8k+15 - round 11
   4715  1.1  christos 	aese	v5.16b, v28.16b						//AES block 8k+13 - round 11
   4716  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   4717  1.1  christos 
   4718  1.1  christos .inst	0xce016921	//eor3 v1.16b, v9.16b, v1.16b, v26.16b				//AES block 8k+9 - result
   4719  1.1  christos 	stp	q0, q1, [x2], #32			//AES block 8k+8, 8k+9 - store result
   4720  1.1  christos .inst	0xce036963	//eor3 v3.16b, v11.16b, v3.16b, v26.16b				//AES block 8k+11 - result
   4721  1.1  christos 
   4722  1.1  christos .inst	0xce026942	//eor3 v2.16b, v10.16b, v2.16b, v26.16b				//AES block 8k+10 - result
   4723  1.1  christos .inst	0xce0769e7	//eor3 v7.16b, v15.16b, v7.16b, v26.16b				//AES block 8k+15 - result
   4724  1.1  christos 	stp	q2, q3, [x2], #32			//AES block 8k+10, 8k+11 - store result
   4725  1.1  christos 
   4726  1.1  christos .inst	0xce0569a5	//eor3 v5.16b, v13.16b, v5.16b, v26.16b				//AES block 8k+13 - result
   4727  1.1  christos .inst	0xce115673	//eor3 v19.16b, v19.16b, v17.16b, v21.16b		 	//MODULO - fold into low
   4728  1.1  christos 	mov	v3.16b, v25.16b					//CTR block 8k+19
   4729  1.1  christos 
   4730  1.1  christos .inst	0xce046984	//eor3 v4.16b, v12.16b, v4.16b, v26.16b				//AES block 8k+12 - result
   4731  1.1  christos 	stp	q4, q5, [x2], #32			//AES block 8k+12, 8k+13 - store result
   4732  1.1  christos 	cmp	x0, x5				//.LOOP CONTROL
   4733  1.1  christos 
   4734  1.1  christos .inst	0xce0669c6	//eor3 v6.16b, v14.16b, v6.16b, v26.16b				//AES block 8k+14 - result
   4735  1.1  christos 	stp	q6, q7, [x2], #32			//AES block 8k+14, 8k+15 - store result
   4736  1.1  christos 	mov	v0.16b, v20.16b					//CTR block 8k+16
   4737  1.1  christos 
   4738  1.1  christos 	mov	v1.16b, v22.16b					//CTR block 8k+17
   4739  1.1  christos 	mov	v2.16b, v23.16b					//CTR block 8k+18
   4740  1.1  christos 
   4741  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 8k+20
   4742  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+20
   4743  1.1  christos 	b.lt	.L192_dec_main_loop
   4744  1.1  christos 
   4745  1.1  christos .L192_dec_prepretail:	//PREPRETAIL
   4746  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
   4747  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
   4748  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
   4749  1.1  christos 
   4750  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
   4751  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   4752  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
   4753  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   4754  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
   4755  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
   4756  1.1  christos 
   4757  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
   4758  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
   4759  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
   4760  1.1  christos 
   4761  1.1  christos 	eor	v8.16b, v8.16b, v19.16b				 	//PRE 1
   4762  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
   4763  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
   4764  1.1  christos 
   4765  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
   4766  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   4767  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
   4768  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   4769  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
   4770  1.1  christos 
   4771  1.1  christos 	aese	v0.16b, v26.16b
   4772  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
   4773  1.1  christos 	aese	v6.16b, v26.16b
   4774  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
   4775  1.1  christos 	aese	v5.16b, v26.16b
   4776  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
   4777  1.1  christos 
   4778  1.1  christos 	aese	v3.16b, v26.16b
   4779  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
   4780  1.1  christos 	aese	v2.16b, v26.16b
   4781  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
   4782  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
   4783  1.1  christos 
   4784  1.1  christos 	aese	v4.16b, v26.16b
   4785  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
   4786  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
   4787  1.1  christos 	aese	v1.16b, v26.16b
   4788  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
   4789  1.1  christos 
   4790  1.1  christos 	aese	v6.16b, v27.16b
   4791  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
   4792  1.1  christos 	aese	v7.16b, v26.16b
   4793  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
   4794  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   4795  1.1  christos 
   4796  1.1  christos 	aese	v4.16b, v27.16b
   4797  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
   4798  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
   4799  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
   4800  1.1  christos 
   4801  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
   4802  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
   4803  1.1  christos 	aese	v3.16b, v27.16b
   4804  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
   4805  1.1  christos 
   4806  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
   4807  1.1  christos 	aese	v7.16b, v27.16b
   4808  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
   4809  1.1  christos 	aese	v0.16b, v27.16b
   4810  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
   4811  1.1  christos 
   4812  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   4813  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   4814  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
   4815  1.1  christos 
   4816  1.1  christos 	aese	v2.16b, v27.16b
   4817  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
   4818  1.1  christos 	aese	v1.16b, v27.16b
   4819  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
   4820  1.1  christos 	aese	v5.16b, v27.16b
   4821  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
   4822  1.1  christos 
   4823  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
   4824  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
   4825  1.1  christos 	aese	v3.16b, v28.16b
   4826  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
   4827  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
   4828  1.1  christos 
   4829  1.1  christos 	aese	v6.16b, v28.16b
   4830  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
   4831  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5
   4832  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
   4833  1.1  christos 
   4834  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
   4835  1.1  christos 	aese	v4.16b, v28.16b
   4836  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
   4837  1.1  christos 	aese	v5.16b, v28.16b
   4838  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
   4839  1.1  christos 
   4840  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   4841  1.1  christos 	aese	v3.16b, v26.16b
   4842  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
   4843  1.1  christos 	aese	v7.16b, v28.16b
   4844  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
   4845  1.1  christos 
   4846  1.1  christos 	aese	v0.16b, v28.16b
   4847  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
   4848  1.1  christos 	aese	v2.16b, v28.16b
   4849  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
   4850  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   4851  1.1  christos 
   4852  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k	- mid
   4853  1.1  christos 	aese	v1.16b, v28.16b
   4854  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
   4855  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
   4856  1.1  christos 
   4857  1.1  christos 	aese	v5.16b, v26.16b
   4858  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
   4859  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
   4860  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
   4861  1.1  christos 
   4862  1.1  christos 	aese	v7.16b, v26.16b
   4863  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
   4864  1.1  christos 	aese	v6.16b, v26.16b
   4865  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
   4866  1.1  christos 	aese	v4.16b, v26.16b
   4867  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
   4868  1.1  christos 
   4869  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
   4870  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   4871  1.1  christos 	aese	v0.16b, v26.16b
   4872  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
   4873  1.1  christos 
   4874  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   4875  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   4876  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   4877  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   4878  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
   4879  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
   4880  1.1  christos 
   4881  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   4882  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   4883  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   4884  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   4885  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
   4886  1.1  christos 	aese	v2.16b, v26.16b
   4887  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
   4888  1.1  christos 
   4889  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7
   4890  1.1  christos 
   4891  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
   4892  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4
   4893  1.1  christos 
   4894  1.1  christos 	aese	v5.16b, v27.16b
   4895  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
   4896  1.1  christos 	aese	v4.16b, v27.16b
   4897  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
   4898  1.1  christos 	aese	v1.16b, v26.16b
   4899  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
   4900  1.1  christos 
   4901  1.1  christos 	aese	v2.16b, v27.16b
   4902  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
   4903  1.1  christos 	aese	v0.16b, v27.16b
   4904  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
   4905  1.1  christos 	aese	v3.16b, v27.16b
   4906  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
   4907  1.1  christos 
   4908  1.1  christos 	aese	v1.16b, v27.16b
   4909  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
   4910  1.1  christos 	aese	v6.16b, v27.16b
   4911  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
   4912  1.1  christos 	aese	v7.16b, v27.16b
   4913  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
   4914  1.1  christos 
   4915  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6
   4916  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   4917  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   4918  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   4919  1.1  christos 
   4920  1.1  christos 	aese	v7.16b, v28.16b
   4921  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
   4922  1.1  christos 	aese	v1.16b, v28.16b
   4923  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
   4924  1.1  christos 	aese	v2.16b, v28.16b
   4925  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
   4926  1.1  christos 
   4927  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   4928  1.1  christos 	aese	v6.16b, v28.16b
   4929  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
   4930  1.1  christos 	aese	v5.16b, v28.16b
   4931  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
   4932  1.1  christos 
   4933  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
   4934  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
   4935  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
   4936  1.1  christos 
   4937  1.1  christos 	aese	v4.16b, v28.16b
   4938  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
   4939  1.1  christos 
   4940  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
   4941  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   4942  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
   4943  1.1  christos 
   4944  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
   4945  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   4946  1.1  christos 	aese	v0.16b, v28.16b
   4947  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
   4948  1.1  christos 
   4949  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   4950  1.1  christos 	aese	v3.16b, v28.16b
   4951  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
   4952  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
   4953  1.1  christos 
   4954  1.1  christos 	aese	v4.16b, v26.16b
   4955  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
   4956  1.1  christos 	aese	v2.16b, v26.16b
   4957  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
   4958  1.1  christos 
   4959  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
   4960  1.1  christos 	aese	v1.16b, v26.16b
   4961  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
   4962  1.1  christos 	aese	v7.16b, v26.16b
   4963  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
   4964  1.1  christos 
   4965  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
   4966  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
   4967  1.1  christos 	aese	v0.16b, v26.16b
   4968  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
   4969  1.1  christos 
   4970  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
   4971  1.1  christos 	aese	v5.16b, v26.16b
   4972  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
   4973  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
   4974  1.1  christos 
   4975  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
   4976  1.1  christos 	aese	v4.16b, v27.16b
   4977  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
   4978  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
   4979  1.1  christos 
   4980  1.1  christos 	aese	v3.16b, v26.16b
   4981  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
   4982  1.1  christos 	aese	v6.16b, v26.16b
   4983  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
   4984  1.1  christos 	aese	v5.16b, v27.16b
   4985  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
   4986  1.1  christos 
   4987  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   4988  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
   4989  1.1  christos 	aese	v2.16b, v27.16b
   4990  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
   4991  1.1  christos 
   4992  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   4993  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
   4994  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
   4995  1.1  christos 
   4996  1.1  christos 	aese	v1.16b, v27.16b
   4997  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
   4998  1.1  christos 	aese	v7.16b, v27.16b
   4999  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
   5000  1.1  christos 	aese	v6.16b, v27.16b
   5001  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
   5002  1.1  christos 
   5003  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
   5004  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
   5005  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
   5006  1.1  christos 
   5007  1.1  christos 	aese	v0.16b, v27.16b
   5008  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
   5009  1.1  christos 	aese	v3.16b, v27.16b
   5010  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
   5011  1.1  christos 
   5012  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   5013  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8			 	//MODULO - other top alignment
   5014  1.1  christos 	aese	v2.16b, v28.16b
   5015  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
   5016  1.1  christos 
   5017  1.1  christos 	aese	v6.16b, v28.16b
   5018  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
   5019  1.1  christos 	aese	v7.16b, v28.16b
   5020  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
   5021  1.1  christos 	aese	v1.16b, v28.16b
   5022  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
   5023  1.1  christos 
   5024  1.1  christos 	aese	v3.16b, v28.16b
   5025  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
   5026  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   5027  1.1  christos 	aese	v0.16b, v28.16b
   5028  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
   5029  1.1  christos 
   5030  1.1  christos 	aese	v5.16b, v28.16b
   5031  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
   5032  1.1  christos 	aese	v4.16b, v28.16b
   5033  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
   5034  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   5035  1.1  christos 
   5036  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   5037  1.1  christos 	aese	v7.16b, v26.16b
   5038  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 9
   5039  1.1  christos 	aese	v6.16b, v26.16b
   5040  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 9
   5041  1.1  christos 
   5042  1.1  christos 	aese	v5.16b, v26.16b
   5043  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 9
   5044  1.1  christos 	aese	v2.16b, v26.16b
   5045  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 9
   5046  1.1  christos 	aese	v3.16b, v26.16b
   5047  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 9
   5048  1.1  christos 
   5049  1.1  christos 	aese	v0.16b, v26.16b
   5050  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 9
   5051  1.1  christos 	aese	v1.16b, v26.16b
   5052  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 9
   5053  1.1  christos 	aese	v4.16b, v26.16b
   5054  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 9
   5055  1.1  christos 
   5056  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   5057  1.1  christos 	ldr	q26, [x8, #192]					//load rk12
   5058  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8			 	//MODULO - other mid alignment
   5059  1.1  christos 
   5060  1.1  christos 	aese	v2.16b, v27.16b
   5061  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 10
   5062  1.1  christos 	aese	v5.16b, v27.16b
   5063  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 10
   5064  1.1  christos 	aese	v0.16b, v27.16b
   5065  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 10
   5066  1.1  christos 
   5067  1.1  christos 	aese	v4.16b, v27.16b
   5068  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 10
   5069  1.1  christos 	aese	v6.16b, v27.16b
   5070  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 10
   5071  1.1  christos 	aese	v7.16b, v27.16b
   5072  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 10
   5073  1.1  christos 
   5074  1.1  christos 	aese	v0.16b, v28.16b						//AES block 8k+8 - round 11
   5075  1.1  christos .inst	0xce115673	//eor3 v19.16b, v19.16b, v17.16b, v21.16b		 	//MODULO - fold into low
   5076  1.1  christos 	aese	v5.16b, v28.16b						//AES block 8k+13 - round 11
   5077  1.1  christos 
   5078  1.1  christos 	aese	v2.16b, v28.16b						//AES block 8k+10 - round 11
   5079  1.1  christos 	aese	v3.16b, v27.16b
   5080  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 10
   5081  1.1  christos 	aese	v1.16b, v27.16b
   5082  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 10
   5083  1.1  christos 
   5084  1.1  christos 	aese	v6.16b, v28.16b						//AES block 8k+14 - round 11
   5085  1.1  christos 	aese	v4.16b, v28.16b						//AES block 8k+12 - round 11
   5086  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
   5087  1.1  christos 
   5088  1.1  christos 	aese	v3.16b, v28.16b						//AES block 8k+11 - round 11
   5089  1.1  christos 	aese	v1.16b, v28.16b						//AES block 8k+9 - round 11
   5090  1.1  christos 	aese	v7.16b, v28.16b						//AES block 8k+15 - round 11
   5091  1.1  christos 
   5092  1.1  christos .L192_dec_tail:	//TAIL
   5093  1.1  christos 
   5094  1.1  christos 	sub	x5, x4, x0 	//main_end_input_ptr is number of bytes left to process
   5095  1.1  christos 
   5096  1.1  christos 	ldp	q20, q21, [x3, #128]			//load h5l | h5h
   5097  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   5098  1.1  christos 	ldr	q9, [x0], #16				//AES block 8k+8 - load ciphertext
   5099  1.1  christos 
   5100  1.1  christos 	ldp	q24, q25, [x3, #192]			//load h8k | h7k
   5101  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   5102  1.1  christos 
   5103  1.1  christos 	mov	v29.16b, v26.16b
   5104  1.1  christos 
   5105  1.1  christos 	ldp	q22, q23, [x3, #160]			//load h6l | h6h
   5106  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   5107  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   5108  1.1  christos 	ext	v16.16b, v19.16b, v19.16b, #8				//prepare final partial tag
   5109  1.1  christos 
   5110  1.1  christos .inst	0xce00752c	//eor3 v12.16b, v9.16b, v0.16b, v29.16b				//AES block 8k+8 - result
   5111  1.1  christos 	cmp	x5, #112
   5112  1.1  christos 	b.gt	.L192_dec_blocks_more_than_7
   5113  1.1  christos 
   5114  1.1  christos 	mov	v7.16b, v6.16b
   5115  1.1  christos 	movi	v17.8b, #0
   5116  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   5117  1.1  christos 
   5118  1.1  christos 	mov	v6.16b, v5.16b
   5119  1.1  christos 	mov	v5.16b, v4.16b
   5120  1.1  christos 	mov	v4.16b, v3.16b
   5121  1.1  christos 
   5122  1.1  christos 	cmp	x5, #96
   5123  1.1  christos 	movi	v19.8b, #0
   5124  1.1  christos 	mov	v3.16b, v2.16b
   5125  1.1  christos 
   5126  1.1  christos 	mov	v2.16b, v1.16b
   5127  1.1  christos 	movi	v18.8b, #0
   5128  1.1  christos 	b.gt	.L192_dec_blocks_more_than_6
   5129  1.1  christos 
   5130  1.1  christos 	mov	v7.16b, v6.16b
   5131  1.1  christos 	mov	v6.16b, v5.16b
   5132  1.1  christos 	mov	v5.16b, v4.16b
   5133  1.1  christos 
   5134  1.1  christos 	mov	v4.16b, v3.16b
   5135  1.1  christos 	mov	v3.16b, v1.16b
   5136  1.1  christos 
   5137  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   5138  1.1  christos 	cmp	x5, #80
   5139  1.1  christos 	b.gt	.L192_dec_blocks_more_than_5
   5140  1.1  christos 
   5141  1.1  christos 	mov	v7.16b, v6.16b
   5142  1.1  christos 	mov	v6.16b, v5.16b
   5143  1.1  christos 
   5144  1.1  christos 	mov	v5.16b, v4.16b
   5145  1.1  christos 	mov	v4.16b, v1.16b
   5146  1.1  christos 	cmp	x5, #64
   5147  1.1  christos 
   5148  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   5149  1.1  christos 	b.gt	.L192_dec_blocks_more_than_4
   5150  1.1  christos 
   5151  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   5152  1.1  christos 	mov	v7.16b, v6.16b
   5153  1.1  christos 	mov	v6.16b, v5.16b
   5154  1.1  christos 
   5155  1.1  christos 	mov	v5.16b, v1.16b
   5156  1.1  christos 	cmp	x5, #48
   5157  1.1  christos 	b.gt	.L192_dec_blocks_more_than_3
   5158  1.1  christos 
   5159  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   5160  1.1  christos 	mov	v7.16b, v6.16b
   5161  1.1  christos 	cmp	x5, #32
   5162  1.1  christos 
   5163  1.1  christos 	mov	v6.16b, v1.16b
   5164  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   5165  1.1  christos 	b.gt	.L192_dec_blocks_more_than_2
   5166  1.1  christos 
   5167  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   5168  1.1  christos 
   5169  1.1  christos 	mov	v7.16b, v1.16b
   5170  1.1  christos 	cmp	x5, #16
   5171  1.1  christos 	b.gt	.L192_dec_blocks_more_than_1
   5172  1.1  christos 
   5173  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   5174  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   5175  1.1  christos 	b	.L192_dec_blocks_less_than_1
   5176  1.1  christos .L192_dec_blocks_more_than_7:	//blocks	left >  7
   5177  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-7 block
   5178  1.1  christos 
   5179  1.1  christos 	ins	v18.d[0], v24.d[1]					//GHASH final-7 block - mid
   5180  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   5181  1.1  christos 
   5182  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH final-7 block - high
   5183  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-7 block - mid
   5184  1.1  christos 	ldr	q9, [x0], #16				//AES final-6 block - load ciphertext
   5185  1.1  christos 
   5186  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH final-7 block - low
   5187  1.1  christos 
   5188  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-7 block - mid
   5189  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-7 block  - store result
   5190  1.1  christos 
   5191  1.1  christos .inst	0xce01752c	//eor3 v12.16b, v9.16b, v1.16b, v29.16b				//AES final-6 block - result
   5192  1.1  christos 
   5193  1.1  christos 	pmull	v18.1q, v27.1d, v18.1d			 	//GHASH final-7 block - mid
   5194  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   5195  1.1  christos .L192_dec_blocks_more_than_6:	//blocks	left >  6
   5196  1.1  christos 
   5197  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-6 block
   5198  1.1  christos 
   5199  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   5200  1.1  christos 
   5201  1.1  christos 	ldr	q9, [x0], #16				//AES final-5 block - load ciphertext
   5202  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-6 block - mid
   5203  1.1  christos 
   5204  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-6 block - mid
   5205  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   5206  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-6 block - high
   5207  1.1  christos 
   5208  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-6 block - store result
   5209  1.1  christos .inst	0xce02752c	//eor3 v12.16b, v9.16b, v2.16b, v29.16b				//AES final-5 block - result
   5210  1.1  christos 
   5211  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-6 block - high
   5212  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-6 block - mid
   5213  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-6 block - low
   5214  1.1  christos 
   5215  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-6 block - mid
   5216  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-6 block - low
   5217  1.1  christos .L192_dec_blocks_more_than_5:	//blocks	left >  5
   5218  1.1  christos 
   5219  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-5 block
   5220  1.1  christos 
   5221  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   5222  1.1  christos 
   5223  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-5 block - mid
   5224  1.1  christos 
   5225  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-5 block - mid
   5226  1.1  christos 
   5227  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-5 block - mid
   5228  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-5 block - high
   5229  1.1  christos 
   5230  1.1  christos 	ldr	q9, [x0], #16				//AES final-4 block - load ciphertext
   5231  1.1  christos 
   5232  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-5 block - high
   5233  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-5 block - low
   5234  1.1  christos 
   5235  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-5 block - mid
   5236  1.1  christos 
   5237  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-5 block - low
   5238  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   5239  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-5 block - store result
   5240  1.1  christos 
   5241  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-5 block - mid
   5242  1.1  christos .inst	0xce03752c	//eor3 v12.16b, v9.16b, v3.16b, v29.16b				//AES final-4 block - result
   5243  1.1  christos .L192_dec_blocks_more_than_4:	//blocks	left >  4
   5244  1.1  christos 
   5245  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-4 block
   5246  1.1  christos 
   5247  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   5248  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   5249  1.1  christos 
   5250  1.1  christos 	ldr	q9, [x0], #16				//AES final-3 block - load ciphertext
   5251  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-4 block - mid
   5252  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final-4 block - low
   5253  1.1  christos 
   5254  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-4 block - mid
   5255  1.1  christos 
   5256  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-4 block - low
   5257  1.1  christos 
   5258  1.1  christos 	pmull	v27.1q, v27.1d, v21.1d				//GHASH final-4 block - mid
   5259  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-4 block - store result
   5260  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final-4 block - high
   5261  1.1  christos 
   5262  1.1  christos .inst	0xce04752c	//eor3 v12.16b, v9.16b, v4.16b, v29.16b				//AES final-3 block - result
   5263  1.1  christos 
   5264  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-4 block - mid
   5265  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-4 block - high
   5266  1.1  christos .L192_dec_blocks_more_than_3:	//blocks	left >  3
   5267  1.1  christos 
   5268  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   5269  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   5270  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-3 block
   5271  1.1  christos 	ldr	q9, [x0], #16				//AES final-2 block - load ciphertext
   5272  1.1  christos 
   5273  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   5274  1.1  christos 
   5275  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-3 block - mid
   5276  1.1  christos 	pmull2	v28.1q, v8.2d, v25.2d				//GHASH final-3 block - high
   5277  1.1  christos 
   5278  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-3 block - high
   5279  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   5280  1.1  christos 	pmull	v26.1q, v8.1d, v25.1d				//GHASH final-3 block - low
   5281  1.1  christos 
   5282  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-3 block - store result
   5283  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-3 block - mid
   5284  1.1  christos .inst	0xce05752c	//eor3 v12.16b, v9.16b, v5.16b, v29.16b				//AES final-2 block - result
   5285  1.1  christos 
   5286  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-3 block - low
   5287  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   5288  1.1  christos 
   5289  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-3 block - mid
   5290  1.1  christos 
   5291  1.1  christos 	pmull2	v27.1q, v27.2d, v24.2d				//GHASH final-3 block - mid
   5292  1.1  christos 
   5293  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-3 block - mid
   5294  1.1  christos .L192_dec_blocks_more_than_2:	//blocks	left >  2
   5295  1.1  christos 
   5296  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-2 block
   5297  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   5298  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   5299  1.1  christos 
   5300  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   5301  1.1  christos 
   5302  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-2 block - mid
   5303  1.1  christos 	ldr	q9, [x0], #16				//AES final-1 block - load ciphertext
   5304  1.1  christos 
   5305  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-2 block - high
   5306  1.1  christos 
   5307  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-2 block - mid
   5308  1.1  christos 
   5309  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-2 block - high
   5310  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-2 block - low
   5311  1.1  christos 
   5312  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-2 block - mid
   5313  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   5314  1.1  christos 
   5315  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-2 block - low
   5316  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-2 block - store result
   5317  1.1  christos 
   5318  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-2 block - mid
   5319  1.1  christos .inst	0xce06752c	//eor3 v12.16b, v9.16b, v6.16b, v29.16b				//AES final-1 block - result
   5320  1.1  christos .L192_dec_blocks_more_than_1:	//blocks	left >  1
   5321  1.1  christos 
   5322  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-1 block
   5323  1.1  christos 	ldr	q9, [x0], #16				//AES final block - load ciphertext
   5324  1.1  christos 	ldr	q22, [x3, #64]				//load h1l | h1h
   5325  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   5326  1.1  christos 
   5327  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   5328  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   5329  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   5330  1.1  christos 
   5331  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-1 block - low
   5332  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-1 block - mid
   5333  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-1 block - store result
   5334  1.1  christos 
   5335  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-1 block - high
   5336  1.1  christos 
   5337  1.1  christos .inst	0xce07752c	//eor3 v12.16b, v9.16b, v7.16b, v29.16b				//AES final block - result
   5338  1.1  christos 
   5339  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-1 block - mid
   5340  1.1  christos 
   5341  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-1 block - mid
   5342  1.1  christos 
   5343  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-1 block - mid
   5344  1.1  christos 
   5345  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-1 block - low
   5346  1.1  christos 
   5347  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-1 block - mid
   5348  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-1 block - high
   5349  1.1  christos .L192_dec_blocks_less_than_1:	//blocks	left <= 1
   5350  1.1  christos 
   5351  1.1  christos 	rev32	v30.16b, v30.16b
   5352  1.1  christos 	and	x1, x1, #127				//bit_length %= 128
   5353  1.1  christos 
   5354  1.1  christos 	sub	x1, x1, #128				//bit_length -= 128
   5355  1.1  christos 	str	q30, [x16]					//store the updated counter
   5356  1.1  christos 
   5357  1.1  christos 	neg	x1, x1				//bit_length = 128 - #bits in input (in range [1,128])
   5358  1.1  christos 	mvn	x6, xzr						//temp0_x = 0xffffffffffffffff
   5359  1.1  christos 
   5360  1.1  christos 	and	x1, x1, #127				//bit_length %= 128
   5361  1.1  christos 
   5362  1.1  christos 	mvn	x7, xzr						//temp1_x = 0xffffffffffffffff
   5363  1.1  christos 	lsr	x6, x6, x1				//temp0_x is mask for top 64b of last block
   5364  1.1  christos 	cmp	x1, #64
   5365  1.1  christos 
   5366  1.1  christos 	csel	x13, x7, x6, lt
   5367  1.1  christos 	csel	x14, x6, xzr, lt
   5368  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   5369  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   5370  1.1  christos 
   5371  1.1  christos 	mov	v0.d[1], x14
   5372  1.1  christos 	ld1	{ v26.16b}, [x2]					//load existing bytes where the possibly partial last block is to be stored
   5373  1.1  christos 
   5374  1.1  christos 	mov	v0.d[0], x13					//ctr0b is mask for last block
   5375  1.1  christos 
   5376  1.1  christos 	and	v9.16b, v9.16b, v0.16b					//possibly partial last block has zeroes in highest bits
   5377  1.1  christos 	bif	v12.16b, v26.16b, v0.16b					//insert existing bytes in top end of result before storing
   5378  1.1  christos 
   5379  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final block
   5380  1.1  christos 
   5381  1.1  christos 	st1	{ v12.16b}, [x2]				//store all 16B
   5382  1.1  christos 
   5383  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   5384  1.1  christos 
   5385  1.1  christos 	ins	v16.d[0], v8.d[1]					//GHASH final block - mid
   5386  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final block - low
   5387  1.1  christos 
   5388  1.1  christos 	eor	v16.8b, v16.8b, v8.8b				//GHASH final block - mid
   5389  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final block - high
   5390  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final block - low
   5391  1.1  christos 
   5392  1.1  christos 	pmull	v16.1q, v16.1d, v21.1d				//GHASH final block - mid
   5393  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final block - high
   5394  1.1  christos 
   5395  1.1  christos 	eor	v14.16b, v17.16b, v19.16b				//MODULO - karatsuba tidy up
   5396  1.1  christos 	eor	v18.16b, v18.16b, v16.16b				//GHASH final block - mid
   5397  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   5398  1.1  christos 
   5399  1.1  christos 	pmull	v21.1q, v17.1d, v16.1d			//MODULO - top 64b align with mid
   5400  1.1  christos 	ext	v17.16b, v17.16b, v17.16b, #8				//MODULO - other top alignment
   5401  1.1  christos 
   5402  1.1  christos 	eor	v18.16b, v18.16b, v14.16b				//MODULO - karatsuba tidy up
   5403  1.1  christos 
   5404  1.1  christos .inst	0xce115652	//eor3 v18.16b, v18.16b, v17.16b, v21.16b			//MODULO - fold into mid
   5405  1.1  christos 
   5406  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   5407  1.1  christos 	ext	v18.16b, v18.16b, v18.16b, #8				//MODULO - other mid alignment
   5408  1.1  christos 
   5409  1.1  christos .inst	0xce124673	//eor3 v19.16b, v19.16b, v18.16b, v17.16b			//MODULO - fold into low
   5410  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   5411  1.1  christos 	rev64	v19.16b, v19.16b
   5412  1.1  christos 	st1	{ v19.16b }, [x3]
   5413  1.1  christos 
   5414  1.1  christos 	mov	x0, x9
   5415  1.1  christos 
   5416  1.1  christos 	ldp	d10, d11, [sp, #16]
   5417  1.1  christos 	ldp	d12, d13, [sp, #32]
   5418  1.1  christos 	ldp	d14, d15, [sp, #48]
   5419  1.1  christos 	ldp	d8, d9, [sp], #80
   5420  1.1  christos 	ret
   5421  1.1  christos 
   5422  1.1  christos .L192_dec_ret:
   5423  1.1  christos 	mov	w0, #0x0
   5424  1.1  christos 	ret
   5425  1.1  christos .size	unroll8_eor3_aes_gcm_dec_192_kernel,.-unroll8_eor3_aes_gcm_dec_192_kernel
   5426  1.1  christos .globl	unroll8_eor3_aes_gcm_enc_256_kernel
   5427  1.1  christos .type	unroll8_eor3_aes_gcm_enc_256_kernel,%function
   5428  1.1  christos .align	4
   5429  1.1  christos unroll8_eor3_aes_gcm_enc_256_kernel:
   5430  1.1  christos 	AARCH64_VALID_CALL_TARGET
   5431  1.1  christos 	cbz	x1, .L256_enc_ret
   5432  1.1  christos 	stp	d8, d9, [sp, #-80]!
   5433  1.1  christos 	lsr	x9, x1, #3
   5434  1.1  christos 	mov	x16, x4
   5435  1.1  christos 	mov	x8, x5
   5436  1.1  christos 	stp	d10, d11, [sp, #16]
   5437  1.1  christos 	stp	d12, d13, [sp, #32]
   5438  1.1  christos 	stp	d14, d15, [sp, #48]
   5439  1.1  christos 	mov	x5, #0xc200000000000000
   5440  1.1  christos 	stp	x5, xzr, [sp, #64]
   5441  1.1  christos 	add	x10, sp, #64
   5442  1.1  christos 
   5443  1.1  christos 	ld1	{ v0.16b}, [x16]					//CTR block 0
   5444  1.1  christos 
   5445  1.1  christos 	mov	x5, x9
   5446  1.1  christos 
   5447  1.1  christos 	mov	x15, #0x100000000			//set up counter increment
   5448  1.1  christos 	movi	v31.16b, #0x0
   5449  1.1  christos 	mov	v31.d[1], x15
   5450  1.1  christos 	sub	x5, x5, #1		//byte_len - 1
   5451  1.1  christos 
   5452  1.1  christos 	and	x5, x5, #0xffffffffffffff80	//number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
   5453  1.1  christos 
   5454  1.1  christos 	add	x5, x5, x0
   5455  1.1  christos 
   5456  1.1  christos 	rev32	v30.16b, v0.16b				//set up reversed counter
   5457  1.1  christos 
   5458  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 0
   5459  1.1  christos 
   5460  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 1
   5461  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 1
   5462  1.1  christos 
   5463  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 2
   5464  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 2
   5465  1.1  christos 
   5466  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 3
   5467  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 3
   5468  1.1  christos 
   5469  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 4
   5470  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 4
   5471  1.1  christos 
   5472  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 5
   5473  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 5
   5474  1.1  christos 	ldp	q26, q27, [x8, #0]				 	//load rk0, rk1
   5475  1.1  christos 
   5476  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 6
   5477  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 6
   5478  1.1  christos 
   5479  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 7
   5480  1.1  christos 
   5481  1.1  christos 	aese	v3.16b, v26.16b
   5482  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 0
   5483  1.1  christos 	aese	v4.16b, v26.16b
   5484  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 0
   5485  1.1  christos 	aese	v2.16b, v26.16b
   5486  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 0
   5487  1.1  christos 
   5488  1.1  christos 	aese	v0.16b, v26.16b
   5489  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 0
   5490  1.1  christos 	aese	v1.16b, v26.16b
   5491  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 0
   5492  1.1  christos 	aese	v6.16b, v26.16b
   5493  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 0
   5494  1.1  christos 
   5495  1.1  christos 	aese	v5.16b, v26.16b
   5496  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 0
   5497  1.1  christos 	aese	v7.16b, v26.16b
   5498  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 0
   5499  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   5500  1.1  christos 
   5501  1.1  christos 	aese	v4.16b, v27.16b
   5502  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 1
   5503  1.1  christos 	aese	v1.16b, v27.16b
   5504  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 1
   5505  1.1  christos 	aese	v3.16b, v27.16b
   5506  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 1
   5507  1.1  christos 
   5508  1.1  christos 	aese	v6.16b, v27.16b
   5509  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 1
   5510  1.1  christos 	aese	v5.16b, v27.16b
   5511  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 1
   5512  1.1  christos 
   5513  1.1  christos 	aese	v2.16b, v27.16b
   5514  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 1
   5515  1.1  christos 
   5516  1.1  christos 	aese	v7.16b, v27.16b
   5517  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 1
   5518  1.1  christos 
   5519  1.1  christos 	aese	v2.16b, v28.16b
   5520  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 2
   5521  1.1  christos 	aese	v3.16b, v28.16b
   5522  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 2
   5523  1.1  christos 	aese	v0.16b, v27.16b
   5524  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 1
   5525  1.1  christos 
   5526  1.1  christos 	aese	v7.16b, v28.16b
   5527  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 2
   5528  1.1  christos 	aese	v6.16b, v28.16b
   5529  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 2
   5530  1.1  christos 	aese	v5.16b, v28.16b
   5531  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 2
   5532  1.1  christos 
   5533  1.1  christos 	aese	v4.16b, v28.16b
   5534  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 2
   5535  1.1  christos 	aese	v0.16b, v28.16b
   5536  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 2
   5537  1.1  christos 	aese	v1.16b, v28.16b
   5538  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 2
   5539  1.1  christos 
   5540  1.1  christos 	aese	v5.16b, v26.16b
   5541  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 3
   5542  1.1  christos 	aese	v3.16b, v26.16b
   5543  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 3
   5544  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   5545  1.1  christos 
   5546  1.1  christos 	aese	v4.16b, v26.16b
   5547  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 3
   5548  1.1  christos 
   5549  1.1  christos 	aese	v1.16b, v26.16b
   5550  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 3
   5551  1.1  christos 	aese	v6.16b, v26.16b
   5552  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 3
   5553  1.1  christos 	aese	v7.16b, v26.16b
   5554  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 3
   5555  1.1  christos 
   5556  1.1  christos 	aese	v2.16b, v26.16b
   5557  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 3
   5558  1.1  christos 	aese	v0.16b, v26.16b
   5559  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 3
   5560  1.1  christos 
   5561  1.1  christos 	aese	v4.16b, v27.16b
   5562  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 4
   5563  1.1  christos 	aese	v6.16b, v27.16b
   5564  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 4
   5565  1.1  christos 	aese	v1.16b, v27.16b
   5566  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 4
   5567  1.1  christos 
   5568  1.1  christos 	aese	v2.16b, v27.16b
   5569  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 4
   5570  1.1  christos 	aese	v0.16b, v27.16b
   5571  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 4
   5572  1.1  christos 
   5573  1.1  christos 	aese	v3.16b, v27.16b
   5574  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 4
   5575  1.1  christos 	aese	v7.16b, v27.16b
   5576  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 4
   5577  1.1  christos 	aese	v5.16b, v27.16b
   5578  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 4
   5579  1.1  christos 
   5580  1.1  christos 	aese	v0.16b, v28.16b
   5581  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 5
   5582  1.1  christos 	aese	v2.16b, v28.16b
   5583  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 5
   5584  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   5585  1.1  christos 
   5586  1.1  christos 	aese	v1.16b, v28.16b
   5587  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 5
   5588  1.1  christos 	aese	v4.16b, v28.16b
   5589  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 5
   5590  1.1  christos 	aese	v5.16b, v28.16b
   5591  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 5
   5592  1.1  christos 
   5593  1.1  christos 	aese	v3.16b, v28.16b
   5594  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 5
   5595  1.1  christos 	aese	v6.16b, v28.16b
   5596  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 5
   5597  1.1  christos 	aese	v7.16b, v28.16b
   5598  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 5
   5599  1.1  christos 
   5600  1.1  christos 	aese	v1.16b, v26.16b
   5601  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 6
   5602  1.1  christos 	aese	v5.16b, v26.16b
   5603  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 6
   5604  1.1  christos 	aese	v4.16b, v26.16b
   5605  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 6
   5606  1.1  christos 
   5607  1.1  christos 	aese	v2.16b, v26.16b
   5608  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 6
   5609  1.1  christos 	aese	v6.16b, v26.16b
   5610  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 6
   5611  1.1  christos 	aese	v0.16b, v26.16b
   5612  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 6
   5613  1.1  christos 
   5614  1.1  christos 	aese	v7.16b, v26.16b
   5615  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 6
   5616  1.1  christos 	aese	v3.16b, v26.16b
   5617  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 6
   5618  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   5619  1.1  christos 
   5620  1.1  christos 	aese	v2.16b, v27.16b
   5621  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 7
   5622  1.1  christos 	aese	v0.16b, v27.16b
   5623  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 7
   5624  1.1  christos 
   5625  1.1  christos 	aese	v7.16b, v27.16b
   5626  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 7
   5627  1.1  christos 	aese	v6.16b, v27.16b
   5628  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 7
   5629  1.1  christos 	aese	v1.16b, v27.16b
   5630  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 7
   5631  1.1  christos 
   5632  1.1  christos 	aese	v5.16b, v27.16b
   5633  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 7
   5634  1.1  christos 	aese	v3.16b, v27.16b
   5635  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 7
   5636  1.1  christos 
   5637  1.1  christos 	aese	v4.16b, v27.16b
   5638  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 7
   5639  1.1  christos 
   5640  1.1  christos 	aese	v6.16b, v28.16b
   5641  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 8
   5642  1.1  christos 	aese	v1.16b, v28.16b
   5643  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 8
   5644  1.1  christos 
   5645  1.1  christos 	aese	v3.16b, v28.16b
   5646  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 8
   5647  1.1  christos 	aese	v0.16b, v28.16b
   5648  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 8
   5649  1.1  christos 	aese	v7.16b, v28.16b
   5650  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 8
   5651  1.1  christos 
   5652  1.1  christos 	aese	v5.16b, v28.16b
   5653  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 8
   5654  1.1  christos 	aese	v4.16b, v28.16b
   5655  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 8
   5656  1.1  christos 	aese	v2.16b, v28.16b
   5657  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 8
   5658  1.1  christos 
   5659  1.1  christos 	ld1	{ v19.16b}, [x3]
   5660  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   5661  1.1  christos 	rev64	v19.16b, v19.16b
   5662  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   5663  1.1  christos 
   5664  1.1  christos 	aese	v6.16b, v26.16b
   5665  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 9
   5666  1.1  christos 	aese	v7.16b, v26.16b
   5667  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 9
   5668  1.1  christos 	aese	v3.16b, v26.16b
   5669  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 9
   5670  1.1  christos 
   5671  1.1  christos 	aese	v4.16b, v26.16b
   5672  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 9
   5673  1.1  christos 	aese	v5.16b, v26.16b
   5674  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 9
   5675  1.1  christos 	aese	v2.16b, v26.16b
   5676  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 9
   5677  1.1  christos 
   5678  1.1  christos 	aese	v1.16b, v26.16b
   5679  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 9
   5680  1.1  christos 
   5681  1.1  christos 	aese	v7.16b, v27.16b
   5682  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 10
   5683  1.1  christos 	aese	v4.16b, v27.16b
   5684  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 10
   5685  1.1  christos 	aese	v0.16b, v26.16b
   5686  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 9
   5687  1.1  christos 
   5688  1.1  christos 	aese	v1.16b, v27.16b
   5689  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 10
   5690  1.1  christos 	aese	v5.16b, v27.16b
   5691  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 10
   5692  1.1  christos 	aese	v3.16b, v27.16b
   5693  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 10
   5694  1.1  christos 
   5695  1.1  christos 	aese	v2.16b, v27.16b
   5696  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 10
   5697  1.1  christos 	aese	v0.16b, v27.16b
   5698  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 10
   5699  1.1  christos 	aese	v6.16b, v27.16b
   5700  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 10
   5701  1.1  christos 
   5702  1.1  christos 	aese	v4.16b, v28.16b
   5703  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 11
   5704  1.1  christos 	ldp	q26, q27, [x8, #192]				//load rk12, rk13
   5705  1.1  christos 	aese	v5.16b, v28.16b
   5706  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 11
   5707  1.1  christos 
   5708  1.1  christos 	aese	v2.16b, v28.16b
   5709  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 11
   5710  1.1  christos 	aese	v6.16b, v28.16b
   5711  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 11
   5712  1.1  christos 	aese	v1.16b, v28.16b
   5713  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 11
   5714  1.1  christos 
   5715  1.1  christos 	aese	v0.16b, v28.16b
   5716  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 11
   5717  1.1  christos 	aese	v3.16b, v28.16b
   5718  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 11
   5719  1.1  christos 	aese	v7.16b, v28.16b
   5720  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 11
   5721  1.1  christos 
   5722  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 7
   5723  1.1  christos 	ldr	q28, [x8, #224]					//load rk14
   5724  1.1  christos 
   5725  1.1  christos 	aese	v4.16b, v26.16b
   5726  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 12
   5727  1.1  christos 	aese	v2.16b, v26.16b
   5728  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 12
   5729  1.1  christos 	aese	v1.16b, v26.16b
   5730  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 12
   5731  1.1  christos 
   5732  1.1  christos 	aese	v0.16b, v26.16b
   5733  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 12
   5734  1.1  christos 	aese	v5.16b, v26.16b
   5735  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 12
   5736  1.1  christos 	aese	v3.16b, v26.16b
   5737  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 12
   5738  1.1  christos 
   5739  1.1  christos 	aese	v2.16b, v27.16b						//AES block 2 - round 13
   5740  1.1  christos 	aese	v1.16b, v27.16b						//AES block 1 - round 13
   5741  1.1  christos 	aese	v4.16b, v27.16b						//AES block 4 - round 13
   5742  1.1  christos 
   5743  1.1  christos 	aese	v6.16b, v26.16b
   5744  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 12
   5745  1.1  christos 	aese	v7.16b, v26.16b
   5746  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 12
   5747  1.1  christos 
   5748  1.1  christos 	aese	v0.16b, v27.16b						//AES block 0 - round 13
   5749  1.1  christos 	aese	v5.16b, v27.16b						//AES block 5 - round 13
   5750  1.1  christos 
   5751  1.1  christos 	aese	v6.16b, v27.16b						//AES block 6 - round 13
   5752  1.1  christos 	aese	v7.16b, v27.16b						//AES block 7 - round 13
   5753  1.1  christos 	aese	v3.16b, v27.16b						//AES block 3 - round 13
   5754  1.1  christos 
   5755  1.1  christos 	add	x4, x0, x1, lsr #3		//end_input_ptr
   5756  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
   5757  1.1  christos 	b.ge	.L256_enc_tail						//handle tail
   5758  1.1  christos 
   5759  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 0, 1 - load plaintext
   5760  1.1  christos 
   5761  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 2, 3 - load plaintext
   5762  1.1  christos 
   5763  1.1  christos .inst	0xce007108	//eor3 v8.16b, v8.16b, v0.16b, v28.16b				//AES block 0 - result
   5764  1.1  christos 	rev32	v0.16b, v30.16b				//CTR block 8
   5765  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8
   5766  1.1  christos 
   5767  1.1  christos .inst	0xce017129	//eor3 v9.16b, v9.16b, v1.16b, v28.16b				//AES block 1 - result
   5768  1.1  christos .inst	0xce03716b	//eor3 v11.16b, v11.16b, v3.16b, v28.16b				//AES block 3 - result
   5769  1.1  christos 
   5770  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 9
   5771  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 9
   5772  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 4, 5 - load plaintext
   5773  1.1  christos 
   5774  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 6, 7 - load plaintext
   5775  1.1  christos .inst	0xce02714a	//eor3 v10.16b, v10.16b, v2.16b, v28.16b				//AES block 2 - result
   5776  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
   5777  1.1  christos 
   5778  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 10
   5779  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 10
   5780  1.1  christos 	stp	q8, q9, [x2], #32			//AES block 0, 1 - store result
   5781  1.1  christos 
   5782  1.1  christos 	stp	q10, q11, [x2], #32			//AES block 2, 3 - store result
   5783  1.1  christos 
   5784  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 11
   5785  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 11
   5786  1.1  christos 
   5787  1.1  christos .inst	0xce04718c	//eor3 v12.16b, v12.16b, v4.16b, v28.16b				//AES block 4 - result
   5788  1.1  christos 
   5789  1.1  christos .inst	0xce0771ef	//eor3 v15.16b, v15.16b, v7.16b, v28.16b				//AES block 7 - result
   5790  1.1  christos .inst	0xce0671ce	//eor3 v14.16b, v14.16b, v6.16b, v28.16b				//AES block 6 - result
   5791  1.1  christos .inst	0xce0571ad	//eor3 v13.16b, v13.16b, v5.16b, v28.16b				//AES block 5 - result
   5792  1.1  christos 
   5793  1.1  christos 	stp	q12, q13, [x2], #32			//AES block 4, 5 - store result
   5794  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 12
   5795  1.1  christos 
   5796  1.1  christos 	stp	q14, q15, [x2], #32			//AES block 6, 7 - store result
   5797  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 12
   5798  1.1  christos 	b.ge	.L256_enc_prepretail					//do prepretail
   5799  1.1  christos 
   5800  1.1  christos .L256_enc_main_loop:	//main	loop start
   5801  1.1  christos 	ldp	q26, q27, [x8, #0]					//load rk0, rk1
   5802  1.1  christos 
   5803  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
   5804  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
   5805  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
   5806  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
   5807  1.1  christos 
   5808  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
   5809  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
   5810  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   5811  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
   5812  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   5813  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
   5814  1.1  christos 
   5815  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
   5816  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
   5817  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
   5818  1.1  christos 
   5819  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4
   5820  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
   5821  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
   5822  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   5823  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
   5824  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   5825  1.1  christos 
   5826  1.1  christos 	aese	v3.16b, v26.16b
   5827  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
   5828  1.1  christos 	aese	v5.16b, v26.16b
   5829  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
   5830  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
   5831  1.1  christos 
   5832  1.1  christos 	aese	v0.16b, v26.16b
   5833  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
   5834  1.1  christos 	aese	v1.16b, v26.16b
   5835  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
   5836  1.1  christos 	aese	v6.16b, v26.16b
   5837  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
   5838  1.1  christos 
   5839  1.1  christos 	aese	v7.16b, v26.16b
   5840  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
   5841  1.1  christos 	aese	v2.16b, v26.16b
   5842  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
   5843  1.1  christos 	aese	v4.16b, v26.16b
   5844  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
   5845  1.1  christos 
   5846  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   5847  1.1  christos 	eor	v8.16b, v8.16b, v19.16b				 	//PRE 1
   5848  1.1  christos 	aese	v6.16b, v27.16b
   5849  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
   5850  1.1  christos 
   5851  1.1  christos 	aese	v2.16b, v27.16b
   5852  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
   5853  1.1  christos 	aese	v1.16b, v27.16b
   5854  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
   5855  1.1  christos 	aese	v0.16b, v27.16b
   5856  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
   5857  1.1  christos 
   5858  1.1  christos 	aese	v4.16b, v27.16b
   5859  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
   5860  1.1  christos 	aese	v3.16b, v27.16b
   5861  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
   5862  1.1  christos 	aese	v5.16b, v27.16b
   5863  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
   5864  1.1  christos 
   5865  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
   5866  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
   5867  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
   5868  1.1  christos 
   5869  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   5870  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   5871  1.1  christos 	aese	v7.16b, v27.16b
   5872  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
   5873  1.1  christos 
   5874  1.1  christos 	aese	v1.16b, v28.16b
   5875  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
   5876  1.1  christos 	aese	v5.16b, v28.16b
   5877  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
   5878  1.1  christos 	aese	v6.16b, v28.16b
   5879  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
   5880  1.1  christos 
   5881  1.1  christos 	aese	v2.16b, v28.16b
   5882  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
   5883  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
   5884  1.1  christos 	aese	v4.16b, v28.16b
   5885  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
   5886  1.1  christos 
   5887  1.1  christos 	aese	v5.16b, v26.16b
   5888  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
   5889  1.1  christos 	aese	v6.16b, v26.16b
   5890  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
   5891  1.1  christos 	aese	v0.16b, v28.16b
   5892  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
   5893  1.1  christos 
   5894  1.1  christos 	aese	v1.16b, v26.16b
   5895  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
   5896  1.1  christos 	aese	v7.16b, v28.16b
   5897  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
   5898  1.1  christos 	aese	v3.16b, v28.16b
   5899  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
   5900  1.1  christos 
   5901  1.1  christos 	aese	v4.16b, v26.16b
   5902  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
   5903  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6
   5904  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
   5905  1.1  christos 
   5906  1.1  christos 	aese	v3.16b, v26.16b
   5907  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
   5908  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   5909  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
   5910  1.1  christos 
   5911  1.1  christos 	aese	v2.16b, v26.16b
   5912  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
   5913  1.1  christos 	aese	v7.16b, v26.16b
   5914  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
   5915  1.1  christos 	aese	v0.16b, v26.16b
   5916  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
   5917  1.1  christos 
   5918  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
   5919  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
   5920  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5
   5921  1.1  christos 
   5922  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
   5923  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
   5924  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   5925  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   5926  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   5927  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   5928  1.1  christos 
   5929  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   5930  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
   5931  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
   5932  1.1  christos 
   5933  1.1  christos 	aese	v4.16b, v27.16b
   5934  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
   5935  1.1  christos 	aese	v1.16b, v27.16b
   5936  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
   5937  1.1  christos 	aese	v5.16b, v27.16b
   5938  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
   5939  1.1  christos 
   5940  1.1  christos 	aese	v7.16b, v27.16b
   5941  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
   5942  1.1  christos 	aese	v3.16b, v27.16b
   5943  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
   5944  1.1  christos 	aese	v2.16b, v27.16b
   5945  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
   5946  1.1  christos 
   5947  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   5948  1.1  christos 	aese	v6.16b, v27.16b
   5949  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
   5950  1.1  christos 	aese	v0.16b, v27.16b
   5951  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
   5952  1.1  christos 
   5953  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   5954  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
   5955  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   5956  1.1  christos 
   5957  1.1  christos 	aese	v5.16b, v28.16b
   5958  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
   5959  1.1  christos 	aese	v7.16b, v28.16b
   5960  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
   5961  1.1  christos 	aese	v4.16b, v28.16b
   5962  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
   5963  1.1  christos 
   5964  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
   5965  1.1  christos 	aese	v2.16b, v28.16b
   5966  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
   5967  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7
   5968  1.1  christos 
   5969  1.1  christos 	aese	v3.16b, v28.16b
   5970  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
   5971  1.1  christos 	aese	v6.16b, v28.16b
   5972  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
   5973  1.1  christos 	aese	v1.16b, v28.16b
   5974  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
   5975  1.1  christos 
   5976  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
   5977  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k	- mid
   5978  1.1  christos 	aese	v0.16b, v28.16b
   5979  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
   5980  1.1  christos 
   5981  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
   5982  1.1  christos 	aese	v4.16b, v26.16b
   5983  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
   5984  1.1  christos 	aese	v2.16b, v26.16b
   5985  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
   5986  1.1  christos 
   5987  1.1  christos 	aese	v6.16b, v26.16b
   5988  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
   5989  1.1  christos 	aese	v1.16b, v26.16b
   5990  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
   5991  1.1  christos 	aese	v7.16b, v26.16b
   5992  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
   5993  1.1  christos 
   5994  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
   5995  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
   5996  1.1  christos 	aese	v5.16b, v26.16b
   5997  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
   5998  1.1  christos 
   5999  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
   6000  1.1  christos 	aese	v3.16b, v26.16b
   6001  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
   6002  1.1  christos 	aese	v0.16b, v26.16b
   6003  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
   6004  1.1  christos 
   6005  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   6006  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
   6007  1.1  christos 	aese	v5.16b, v27.16b
   6008  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
   6009  1.1  christos 
   6010  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   6011  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   6012  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   6013  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   6014  1.1  christos 	aese	v2.16b, v27.16b
   6015  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
   6016  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
   6017  1.1  christos 
   6018  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   6019  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   6020  1.1  christos 	aese	v6.16b, v27.16b
   6021  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
   6022  1.1  christos 	aese	v3.16b, v27.16b
   6023  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
   6024  1.1  christos 
   6025  1.1  christos 	aese	v0.16b, v27.16b
   6026  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
   6027  1.1  christos 	aese	v7.16b, v27.16b
   6028  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
   6029  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
   6030  1.1  christos 
   6031  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   6032  1.1  christos 	aese	v4.16b, v27.16b
   6033  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
   6034  1.1  christos 	aese	v1.16b, v27.16b
   6035  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
   6036  1.1  christos 
   6037  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
   6038  1.1  christos 	aese	v7.16b, v28.16b
   6039  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
   6040  1.1  christos 	aese	v0.16b, v28.16b
   6041  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
   6042  1.1  christos 
   6043  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
   6044  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   6045  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
   6046  1.1  christos 
   6047  1.1  christos 	aese	v3.16b, v28.16b
   6048  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
   6049  1.1  christos 	aese	v0.16b, v26.16b
   6050  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 9
   6051  1.1  christos 	aese	v1.16b, v28.16b
   6052  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
   6053  1.1  christos 
   6054  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
   6055  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
   6056  1.1  christos 	aese	v2.16b, v28.16b
   6057  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
   6058  1.1  christos 
   6059  1.1  christos 	aese	v5.16b, v28.16b
   6060  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
   6061  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
   6062  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
   6063  1.1  christos 
   6064  1.1  christos 	aese	v6.16b, v28.16b
   6065  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
   6066  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   6067  1.1  christos 	aese	v4.16b, v28.16b
   6068  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
   6069  1.1  christos 
   6070  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
   6071  1.1  christos 	aese	v7.16b, v26.16b
   6072  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 9
   6073  1.1  christos 	aese	v5.16b, v26.16b
   6074  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 9
   6075  1.1  christos 
   6076  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
   6077  1.1  christos 	aese	v6.16b, v26.16b
   6078  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 9
   6079  1.1  christos 	aese	v4.16b, v26.16b
   6080  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 9
   6081  1.1  christos 
   6082  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   6083  1.1  christos 	aese	v2.16b, v26.16b
   6084  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 9
   6085  1.1  christos 	aese	v3.16b, v26.16b
   6086  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 9
   6087  1.1  christos 
   6088  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
   6089  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
   6090  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
   6091  1.1  christos 
   6092  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   6093  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
   6094  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
   6095  1.1  christos 
   6096  1.1  christos 	aese	v1.16b, v26.16b
   6097  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 9
   6098  1.1  christos 
   6099  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
   6100  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
   6101  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
   6102  1.1  christos 
   6103  1.1  christos 	aese	v4.16b, v27.16b
   6104  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 10
   6105  1.1  christos 	aese	v3.16b, v27.16b
   6106  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 10
   6107  1.1  christos 	aese	v5.16b, v27.16b
   6108  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 10
   6109  1.1  christos 
   6110  1.1  christos 	aese	v0.16b, v27.16b
   6111  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 10
   6112  1.1  christos 	aese	v2.16b, v27.16b
   6113  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 10
   6114  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
   6115  1.1  christos 
   6116  1.1  christos 	aese	v1.16b, v27.16b
   6117  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 10
   6118  1.1  christos 	aese	v7.16b, v27.16b
   6119  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 10
   6120  1.1  christos 	aese	v6.16b, v27.16b
   6121  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 10
   6122  1.1  christos 
   6123  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
   6124  1.1  christos 
   6125  1.1  christos 	ldp	q26, q27, [x8, #192]				//load rk12, rk13
   6126  1.1  christos 	rev32	v20.16b, v30.16b					//CTR block 8k+16
   6127  1.1  christos 
   6128  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8			 	//MODULO - other top alignment
   6129  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 8k+8, 8k+9 - load plaintext
   6130  1.1  christos 	aese	v2.16b, v28.16b
   6131  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 11
   6132  1.1  christos 
   6133  1.1  christos 	aese	v6.16b, v28.16b
   6134  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 11
   6135  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+16
   6136  1.1  christos 	aese	v3.16b, v28.16b
   6137  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 11
   6138  1.1  christos 
   6139  1.1  christos 	aese	v0.16b, v28.16b
   6140  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 11
   6141  1.1  christos 	aese	v7.16b, v28.16b
   6142  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 11
   6143  1.1  christos 
   6144  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   6145  1.1  christos 	aese	v1.16b, v28.16b
   6146  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 11
   6147  1.1  christos 
   6148  1.1  christos 	aese	v7.16b, v26.16b
   6149  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 12
   6150  1.1  christos 	aese	v5.16b, v28.16b
   6151  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 11
   6152  1.1  christos 
   6153  1.1  christos 	aese	v3.16b, v26.16b
   6154  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 12
   6155  1.1  christos 	aese	v6.16b, v26.16b
   6156  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 12
   6157  1.1  christos 	rev32	v22.16b, v30.16b					//CTR block 8k+17
   6158  1.1  christos 
   6159  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+17
   6160  1.1  christos 	aese	v4.16b, v28.16b
   6161  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 11
   6162  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   6163  1.1  christos 
   6164  1.1  christos 	aese	v5.16b, v26.16b
   6165  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 12
   6166  1.1  christos 	ldr	q28, [x8, #224]					//load rk14
   6167  1.1  christos 	aese	v7.16b, v27.16b						//AES block 8k+15 - round 13
   6168  1.1  christos 
   6169  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 8k+10, 8k+11 - load plaintext
   6170  1.1  christos 	aese	v2.16b, v26.16b
   6171  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 12
   6172  1.1  christos 	aese	v4.16b, v26.16b
   6173  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 12
   6174  1.1  christos 
   6175  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   6176  1.1  christos 	aese	v1.16b, v26.16b
   6177  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 12
   6178  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 4, 5 - load plaintext
   6179  1.1  christos 
   6180  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 6, 7 - load plaintext
   6181  1.1  christos 	aese	v2.16b, v27.16b						//AES block 8k+10 - round 13
   6182  1.1  christos 	aese	v4.16b, v27.16b						//AES block 8k+12 - round 13
   6183  1.1  christos 
   6184  1.1  christos 	rev32	v23.16b, v30.16b					//CTR block 8k+18
   6185  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+18
   6186  1.1  christos 	aese	v5.16b, v27.16b						//AES block 8k+13 - round 13
   6187  1.1  christos 
   6188  1.1  christos 	aese	v0.16b, v26.16b
   6189  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 12
   6190  1.1  christos 	aese	v3.16b, v27.16b						//AES block 8k+11 - round 13
   6191  1.1  christos 	cmp	x0, x5				//.LOOP CONTROL
   6192  1.1  christos 
   6193  1.1  christos .inst	0xce02714a	//eor3 v10.16b, v10.16b, v2.16b, v28.16b				//AES block 8k+10 - result
   6194  1.1  christos 	rev32	v25.16b, v30.16b					//CTR block 8k+19
   6195  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+19
   6196  1.1  christos 
   6197  1.1  christos 	aese	v0.16b, v27.16b						//AES block 8k+8 - round 13
   6198  1.1  christos 	aese	v6.16b, v27.16b						//AES block 8k+14 - round 13
   6199  1.1  christos .inst	0xce0571ad	//eor3 v13.16b, v13.16b, v5.16b, v28.16b				//AES block 5 - result
   6200  1.1  christos 
   6201  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8				//MODULO - other mid alignment
   6202  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   6203  1.1  christos 	aese	v1.16b, v27.16b						//AES block 8k+9 - round 13
   6204  1.1  christos 
   6205  1.1  christos .inst	0xce04718c	//eor3 v12.16b, v12.16b, v4.16b, v28.16b				//AES block 4 - result
   6206  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 8k+20
   6207  1.1  christos .inst	0xce03716b	//eor3 v11.16b, v11.16b, v3.16b, v28.16b				//AES block 8k+11 - result
   6208  1.1  christos 
   6209  1.1  christos 	mov	v3.16b, v25.16b					//CTR block 8k+19
   6210  1.1  christos .inst	0xce017129	//eor3 v9.16b, v9.16b, v1.16b, v28.16b				//AES block 8k+9 - result
   6211  1.1  christos .inst	0xce007108	//eor3 v8.16b, v8.16b, v0.16b, v28.16b				//AES block 8k+8 - result
   6212  1.1  christos 
   6213  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+20
   6214  1.1  christos 	stp	q8, q9, [x2], #32			//AES block 8k+8, 8k+9 - store result
   6215  1.1  christos 	mov	v2.16b, v23.16b					//CTR block 8k+18
   6216  1.1  christos 
   6217  1.1  christos .inst	0xce0771ef	//eor3 v15.16b, v15.16b, v7.16b, v28.16b				//AES block 7 - result
   6218  1.1  christos .inst	0xce154673	//eor3 v19.16b, v19.16b, v21.16b, v17.16b		 	//MODULO - fold into low
   6219  1.1  christos 	stp	q10, q11, [x2], #32			//AES block 8k+10, 8k+11 - store result
   6220  1.1  christos 
   6221  1.1  christos .inst	0xce0671ce	//eor3 v14.16b, v14.16b, v6.16b, v28.16b				//AES block 6 - result
   6222  1.1  christos 	mov	v1.16b, v22.16b					//CTR block 8k+17
   6223  1.1  christos 	stp	q12, q13, [x2], #32			//AES block 4, 5 - store result
   6224  1.1  christos 
   6225  1.1  christos 	stp	q14, q15, [x2], #32			//AES block 6, 7 - store result
   6226  1.1  christos 	mov	v0.16b, v20.16b					//CTR block 8k+16
   6227  1.1  christos 	b.lt	.L256_enc_main_loop
   6228  1.1  christos 
   6229  1.1  christos .L256_enc_prepretail:	//PREPRETAIL
   6230  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
   6231  1.1  christos 	ldp	q26, q27, [x8, #0]					//load rk0, rk1
   6232  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
   6233  1.1  christos 
   6234  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
   6235  1.1  christos 
   6236  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
   6237  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
   6238  1.1  christos 
   6239  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5
   6240  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
   6241  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
   6242  1.1  christos 
   6243  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
   6244  1.1  christos 
   6245  1.1  christos 	aese	v6.16b, v26.16b
   6246  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
   6247  1.1  christos 	aese	v4.16b, v26.16b
   6248  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
   6249  1.1  christos 	aese	v1.16b, v26.16b
   6250  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
   6251  1.1  christos 
   6252  1.1  christos 	aese	v5.16b, v26.16b
   6253  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
   6254  1.1  christos 	aese	v0.16b, v26.16b
   6255  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
   6256  1.1  christos 
   6257  1.1  christos 	aese	v2.16b, v26.16b
   6258  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
   6259  1.1  christos 	aese	v7.16b, v26.16b
   6260  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
   6261  1.1  christos 	aese	v3.16b, v26.16b
   6262  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
   6263  1.1  christos 
   6264  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
   6265  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
   6266  1.1  christos 	aese	v1.16b, v27.16b
   6267  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
   6268  1.1  christos 
   6269  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
   6270  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   6271  1.1  christos 	aese	v3.16b, v27.16b
   6272  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
   6273  1.1  christos 
   6274  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
   6275  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   6276  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
   6277  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   6278  1.1  christos 	aese	v2.16b, v27.16b
   6279  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
   6280  1.1  christos 
   6281  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
   6282  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   6283  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
   6284  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   6285  1.1  christos 	aese	v0.16b, v27.16b
   6286  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
   6287  1.1  christos 	aese	v5.16b, v27.16b
   6288  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
   6289  1.1  christos 
   6290  1.1  christos 	aese	v4.16b, v27.16b
   6291  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
   6292  1.1  christos 	eor	v8.16b, v8.16b, v19.16b					//PRE 1
   6293  1.1  christos 
   6294  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
   6295  1.1  christos 	aese	v6.16b, v27.16b
   6296  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
   6297  1.1  christos 
   6298  1.1  christos 	aese	v1.16b, v28.16b
   6299  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
   6300  1.1  christos 	aese	v2.16b, v28.16b
   6301  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
   6302  1.1  christos 	aese	v7.16b, v27.16b
   6303  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
   6304  1.1  christos 
   6305  1.1  christos 	aese	v4.16b, v28.16b
   6306  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
   6307  1.1  christos 	aese	v0.16b, v28.16b
   6308  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
   6309  1.1  christos 	aese	v6.16b, v28.16b
   6310  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
   6311  1.1  christos 
   6312  1.1  christos 	aese	v5.16b, v28.16b
   6313  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
   6314  1.1  christos 	aese	v7.16b, v28.16b
   6315  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
   6316  1.1  christos 	aese	v3.16b, v28.16b
   6317  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
   6318  1.1  christos 
   6319  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   6320  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   6321  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
   6322  1.1  christos 
   6323  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6
   6324  1.1  christos 	aese	v4.16b, v26.16b
   6325  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
   6326  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
   6327  1.1  christos 
   6328  1.1  christos 	aese	v7.16b, v26.16b
   6329  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
   6330  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
   6331  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   6332  1.1  christos 
   6333  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
   6334  1.1  christos 	aese	v6.16b, v26.16b
   6335  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
   6336  1.1  christos 
   6337  1.1  christos 	aese	v2.16b, v26.16b
   6338  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
   6339  1.1  christos 	aese	v3.16b, v26.16b
   6340  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
   6341  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
   6342  1.1  christos 
   6343  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
   6344  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
   6345  1.1  christos 	aese	v1.16b, v26.16b
   6346  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
   6347  1.1  christos 
   6348  1.1  christos 	aese	v0.16b, v26.16b
   6349  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
   6350  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
   6351  1.1  christos 	aese	v5.16b, v26.16b
   6352  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
   6353  1.1  christos 
   6354  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
   6355  1.1  christos 	aese	v1.16b, v27.16b
   6356  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
   6357  1.1  christos 	aese	v6.16b, v27.16b
   6358  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
   6359  1.1  christos 
   6360  1.1  christos 	aese	v0.16b, v27.16b
   6361  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
   6362  1.1  christos 	aese	v2.16b, v27.16b
   6363  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
   6364  1.1  christos 	aese	v4.16b, v27.16b
   6365  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
   6366  1.1  christos 
   6367  1.1  christos 	aese	v6.16b, v28.16b
   6368  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
   6369  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k	- mid
   6370  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
   6371  1.1  christos 
   6372  1.1  christos 	aese	v7.16b, v27.16b
   6373  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
   6374  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   6375  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   6376  1.1  christos 
   6377  1.1  christos 	aese	v5.16b, v27.16b
   6378  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
   6379  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
   6380  1.1  christos 	aese	v3.16b, v27.16b
   6381  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
   6382  1.1  christos 
   6383  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
   6384  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
   6385  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
   6386  1.1  christos 
   6387  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4
   6388  1.1  christos 	aese	v1.16b, v28.16b
   6389  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
   6390  1.1  christos 	aese	v0.16b, v28.16b
   6391  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
   6392  1.1  christos 
   6393  1.1  christos 	aese	v7.16b, v28.16b
   6394  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
   6395  1.1  christos 	aese	v4.16b, v28.16b
   6396  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
   6397  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   6398  1.1  christos 
   6399  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   6400  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   6401  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   6402  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   6403  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
   6404  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
   6405  1.1  christos 
   6406  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
   6407  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
   6408  1.1  christos 
   6409  1.1  christos 	aese	v5.16b, v28.16b
   6410  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
   6411  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7
   6412  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   6413  1.1  christos 
   6414  1.1  christos 	aese	v3.16b, v28.16b
   6415  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
   6416  1.1  christos 	aese	v2.16b, v28.16b
   6417  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
   6418  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
   6419  1.1  christos 
   6420  1.1  christos 	aese	v7.16b, v26.16b
   6421  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
   6422  1.1  christos 	aese	v4.16b, v26.16b
   6423  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
   6424  1.1  christos 	aese	v6.16b, v26.16b
   6425  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
   6426  1.1  christos 
   6427  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   6428  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   6429  1.1  christos 	aese	v5.16b, v26.16b
   6430  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
   6431  1.1  christos 	aese	v3.16b, v26.16b
   6432  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
   6433  1.1  christos 
   6434  1.1  christos 	aese	v0.16b, v26.16b
   6435  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
   6436  1.1  christos 	aese	v1.16b, v26.16b
   6437  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
   6438  1.1  christos 	aese	v2.16b, v26.16b
   6439  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
   6440  1.1  christos 
   6441  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
   6442  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
   6443  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   6444  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   6445  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   6446  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   6447  1.1  christos 
   6448  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   6449  1.1  christos 	aese	v1.16b, v27.16b
   6450  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
   6451  1.1  christos 	aese	v4.16b, v27.16b
   6452  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
   6453  1.1  christos 
   6454  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
   6455  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   6456  1.1  christos 
   6457  1.1  christos 	aese	v5.16b, v27.16b
   6458  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
   6459  1.1  christos 	aese	v6.16b, v27.16b
   6460  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
   6461  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
   6462  1.1  christos 
   6463  1.1  christos 	aese	v7.16b, v27.16b
   6464  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
   6465  1.1  christos 	aese	v3.16b, v27.16b
   6466  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
   6467  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
   6468  1.1  christos 
   6469  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
   6470  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
   6471  1.1  christos 	aese	v2.16b, v27.16b
   6472  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
   6473  1.1  christos 
   6474  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   6475  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   6476  1.1  christos 	aese	v0.16b, v27.16b
   6477  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
   6478  1.1  christos 
   6479  1.1  christos 	aese	v7.16b, v28.16b
   6480  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
   6481  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
   6482  1.1  christos 	aese	v2.16b, v28.16b
   6483  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
   6484  1.1  christos 
   6485  1.1  christos 	aese	v6.16b, v28.16b
   6486  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
   6487  1.1  christos 	aese	v4.16b, v28.16b
   6488  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
   6489  1.1  christos 	aese	v3.16b, v28.16b
   6490  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
   6491  1.1  christos 
   6492  1.1  christos 	aese	v5.16b, v28.16b
   6493  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
   6494  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
   6495  1.1  christos 	aese	v0.16b, v28.16b
   6496  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
   6497  1.1  christos 
   6498  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
   6499  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
   6500  1.1  christos 	aese	v1.16b, v28.16b
   6501  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
   6502  1.1  christos 
   6503  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
   6504  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
   6505  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
   6506  1.1  christos 
   6507  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
   6508  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
   6509  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
   6510  1.1  christos 
   6511  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   6512  1.1  christos 	aese	v1.16b, v26.16b
   6513  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 9
   6514  1.1  christos 	aese	v0.16b, v26.16b
   6515  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 9
   6516  1.1  christos 
   6517  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
   6518  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
   6519  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   6520  1.1  christos 
   6521  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
   6522  1.1  christos 
   6523  1.1  christos 	aese	v3.16b, v26.16b
   6524  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 9
   6525  1.1  christos 	aese	v7.16b, v26.16b
   6526  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 9
   6527  1.1  christos 	aese	v5.16b, v26.16b
   6528  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 9
   6529  1.1  christos 
   6530  1.1  christos 	aese	v2.16b, v26.16b
   6531  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 9
   6532  1.1  christos 	aese	v6.16b, v26.16b
   6533  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 9
   6534  1.1  christos 
   6535  1.1  christos 	aese	v5.16b, v27.16b
   6536  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 10
   6537  1.1  christos 	aese	v1.16b, v27.16b
   6538  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 10
   6539  1.1  christos 	aese	v4.16b, v26.16b
   6540  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 9
   6541  1.1  christos 
   6542  1.1  christos 	aese	v7.16b, v27.16b
   6543  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 10
   6544  1.1  christos 	aese	v6.16b, v27.16b
   6545  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 10
   6546  1.1  christos 	aese	v3.16b, v27.16b
   6547  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 10
   6548  1.1  christos 
   6549  1.1  christos 	aese	v4.16b, v27.16b
   6550  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 10
   6551  1.1  christos 	aese	v0.16b, v27.16b
   6552  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 10
   6553  1.1  christos 	aese	v2.16b, v27.16b
   6554  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 10
   6555  1.1  christos 
   6556  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   6557  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   6558  1.1  christos 	aese	v7.16b, v28.16b
   6559  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 11
   6560  1.1  christos 
   6561  1.1  christos 	ldp	q26, q27, [x8, #192]				//load rk12, rk13
   6562  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8			 	//MODULO - other top alignment
   6563  1.1  christos 	aese	v2.16b, v28.16b
   6564  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 11
   6565  1.1  christos 
   6566  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   6567  1.1  christos 	aese	v1.16b, v28.16b
   6568  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 11
   6569  1.1  christos 	aese	v6.16b, v28.16b
   6570  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 11
   6571  1.1  christos 
   6572  1.1  christos 	aese	v0.16b, v28.16b
   6573  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 11
   6574  1.1  christos 	aese	v4.16b, v28.16b
   6575  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 11
   6576  1.1  christos 	aese	v5.16b, v28.16b
   6577  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 11
   6578  1.1  christos 
   6579  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   6580  1.1  christos 	aese	v3.16b, v28.16b
   6581  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 11
   6582  1.1  christos 	ldr	q28, [x8, #224]					//load rk14
   6583  1.1  christos 
   6584  1.1  christos 	aese	v1.16b, v26.16b
   6585  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 12
   6586  1.1  christos 	aese	v2.16b, v26.16b
   6587  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 12
   6588  1.1  christos 	aese	v0.16b, v26.16b
   6589  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 12
   6590  1.1  christos 
   6591  1.1  christos 	aese	v6.16b, v26.16b
   6592  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 12
   6593  1.1  christos 	aese	v5.16b, v26.16b
   6594  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 12
   6595  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8			 	//MODULO - other mid alignment
   6596  1.1  christos 
   6597  1.1  christos 	aese	v4.16b, v26.16b
   6598  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 12
   6599  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
   6600  1.1  christos 
   6601  1.1  christos 	aese	v3.16b, v26.16b
   6602  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 12
   6603  1.1  christos 	aese	v7.16b, v26.16b
   6604  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 12
   6605  1.1  christos 	aese	v0.16b, v27.16b						//AES block 8k+8 - round 13
   6606  1.1  christos 
   6607  1.1  christos .inst	0xce154673	//eor3 v19.16b, v19.16b, v21.16b, v17.16b		 	//MODULO - fold into low
   6608  1.1  christos 	aese	v5.16b, v27.16b						//AES block 8k+13 - round 13
   6609  1.1  christos 	aese	v1.16b, v27.16b						//AES block 8k+9 - round 13
   6610  1.1  christos 
   6611  1.1  christos 	aese	v3.16b, v27.16b						//AES block 8k+11 - round 13
   6612  1.1  christos 	aese	v4.16b, v27.16b						//AES block 8k+12 - round 13
   6613  1.1  christos 	aese	v7.16b, v27.16b						//AES block 8k+15 - round 13
   6614  1.1  christos 
   6615  1.1  christos 	aese	v2.16b, v27.16b						//AES block 8k+10 - round 13
   6616  1.1  christos 	aese	v6.16b, v27.16b						//AES block 8k+14 - round 13
   6617  1.1  christos .L256_enc_tail:	//TAIL
   6618  1.1  christos 
   6619  1.1  christos 	ldp	q24, q25, [x3, #192]			//load h8l | h8h
   6620  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   6621  1.1  christos 	sub	x5, x4, x0		//main_end_input_ptr is number of bytes left to process
   6622  1.1  christos 
   6623  1.1  christos 	ldr	q8, [x0], #16				//AES block 8k+8 - load plaintext
   6624  1.1  christos 
   6625  1.1  christos 	ldp	q20, q21, [x3, #128]			//load h5l | h5h
   6626  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   6627  1.1  christos 
   6628  1.1  christos 	ext	v16.16b, v19.16b, v19.16b, #8				//prepare final partial tag
   6629  1.1  christos 	ldp	q22, q23, [x3, #160]			//load h6l | h6h
   6630  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   6631  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   6632  1.1  christos 	mov	v29.16b, v28.16b
   6633  1.1  christos 
   6634  1.1  christos 	cmp	x5, #112
   6635  1.1  christos .inst	0xce007509	//eor3 v9.16b, v8.16b, v0.16b, v29.16b				//AES block 8k+8 - result
   6636  1.1  christos 	b.gt	.L256_enc_blocks_more_than_7
   6637  1.1  christos 
   6638  1.1  christos 	movi	v19.8b, #0
   6639  1.1  christos 	mov	v7.16b, v6.16b
   6640  1.1  christos 	movi	v17.8b, #0
   6641  1.1  christos 
   6642  1.1  christos 	mov	v6.16b, v5.16b
   6643  1.1  christos 	mov	v5.16b, v4.16b
   6644  1.1  christos 	mov	v4.16b, v3.16b
   6645  1.1  christos 
   6646  1.1  christos 	mov	v3.16b, v2.16b
   6647  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   6648  1.1  christos 	mov	v2.16b, v1.16b
   6649  1.1  christos 
   6650  1.1  christos 	movi	v18.8b, #0
   6651  1.1  christos 	cmp	x5, #96
   6652  1.1  christos 	b.gt	.L256_enc_blocks_more_than_6
   6653  1.1  christos 
   6654  1.1  christos 	mov	v7.16b, v6.16b
   6655  1.1  christos 	mov	v6.16b, v5.16b
   6656  1.1  christos 	cmp	x5, #80
   6657  1.1  christos 
   6658  1.1  christos 	mov	v5.16b, v4.16b
   6659  1.1  christos 	mov	v4.16b, v3.16b
   6660  1.1  christos 	mov	v3.16b, v1.16b
   6661  1.1  christos 
   6662  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   6663  1.1  christos 	b.gt	.L256_enc_blocks_more_than_5
   6664  1.1  christos 
   6665  1.1  christos 	mov	v7.16b, v6.16b
   6666  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   6667  1.1  christos 
   6668  1.1  christos 	mov	v6.16b, v5.16b
   6669  1.1  christos 	mov	v5.16b, v4.16b
   6670  1.1  christos 
   6671  1.1  christos 	cmp	x5, #64
   6672  1.1  christos 	mov	v4.16b, v1.16b
   6673  1.1  christos 	b.gt	.L256_enc_blocks_more_than_4
   6674  1.1  christos 
   6675  1.1  christos 	cmp	x5, #48
   6676  1.1  christos 	mov	v7.16b, v6.16b
   6677  1.1  christos 	mov	v6.16b, v5.16b
   6678  1.1  christos 
   6679  1.1  christos 	mov	v5.16b, v1.16b
   6680  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   6681  1.1  christos 	b.gt	.L256_enc_blocks_more_than_3
   6682  1.1  christos 
   6683  1.1  christos 	cmp	x5, #32
   6684  1.1  christos 	mov	v7.16b, v6.16b
   6685  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   6686  1.1  christos 
   6687  1.1  christos 	mov	v6.16b, v1.16b
   6688  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   6689  1.1  christos 	b.gt	.L256_enc_blocks_more_than_2
   6690  1.1  christos 
   6691  1.1  christos 	mov	v7.16b, v1.16b
   6692  1.1  christos 
   6693  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   6694  1.1  christos 	cmp	x5, #16
   6695  1.1  christos 	b.gt	.L256_enc_blocks_more_than_1
   6696  1.1  christos 
   6697  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   6698  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   6699  1.1  christos 	b	.L256_enc_blocks_less_than_1
   6700  1.1  christos .L256_enc_blocks_more_than_7:	//blocks	left >  7
   6701  1.1  christos 	st1	{ v9.16b}, [x2], #16				//AES final-7 block  - store result
   6702  1.1  christos 
   6703  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-7 block
   6704  1.1  christos 
   6705  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   6706  1.1  christos 
   6707  1.1  christos 	ldr	q9, [x0], #16				//AES final-6 block - load plaintext
   6708  1.1  christos 
   6709  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH final-7 block - high
   6710  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-7 block - mid
   6711  1.1  christos 	ins	v18.d[0], v24.d[1]					//GHASH final-7 block - mid
   6712  1.1  christos 
   6713  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   6714  1.1  christos 
   6715  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-7 block - mid
   6716  1.1  christos .inst	0xce017529	//eor3 v9.16b, v9.16b, v1.16b, v29.16b			//AES final-6 block - result
   6717  1.1  christos 
   6718  1.1  christos 	pmull	v18.1q, v27.1d, v18.1d				//GHASH final-7 block - mid
   6719  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH final-7 block - low
   6720  1.1  christos .L256_enc_blocks_more_than_6:	//blocks	left >  6
   6721  1.1  christos 
   6722  1.1  christos 	st1	{ v9.16b}, [x2], #16				//AES final-6 block - store result
   6723  1.1  christos 
   6724  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-6 block
   6725  1.1  christos 
   6726  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   6727  1.1  christos 
   6728  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-6 block - low
   6729  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-6 block - mid
   6730  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-6 block - high
   6731  1.1  christos 
   6732  1.1  christos 	ldr	q9, [x0], #16				//AES final-5 block - load plaintext
   6733  1.1  christos 
   6734  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-6 block - low
   6735  1.1  christos 
   6736  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-6 block - mid
   6737  1.1  christos 
   6738  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-6 block - mid
   6739  1.1  christos .inst	0xce027529	//eor3 v9.16b, v9.16b, v2.16b, v29.16b			//AES final-5 block - result
   6740  1.1  christos 
   6741  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   6742  1.1  christos 
   6743  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-6 block - mid
   6744  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-6 block - high
   6745  1.1  christos .L256_enc_blocks_more_than_5:	//blocks	left >  5
   6746  1.1  christos 
   6747  1.1  christos 	st1	{ v9.16b}, [x2], #16				//AES final-5 block - store result
   6748  1.1  christos 
   6749  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-5 block
   6750  1.1  christos 
   6751  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   6752  1.1  christos 
   6753  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-5 block - mid
   6754  1.1  christos 
   6755  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-5 block - high
   6756  1.1  christos 
   6757  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-5 block - high
   6758  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-5 block - mid
   6759  1.1  christos 
   6760  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-5 block - mid
   6761  1.1  christos 
   6762  1.1  christos 	ldr	q9, [x0], #16				//AES final-4 block - load plaintext
   6763  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-5 block - low
   6764  1.1  christos 
   6765  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-5 block - mid
   6766  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   6767  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-5 block - low
   6768  1.1  christos 
   6769  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-5 block - mid
   6770  1.1  christos .inst	0xce037529	//eor3 v9.16b, v9.16b, v3.16b, v29.16b			//AES final-4 block - result
   6771  1.1  christos .L256_enc_blocks_more_than_4:	//blocks	left >  4
   6772  1.1  christos 
   6773  1.1  christos 	st1	{ v9.16b}, [x2], #16				//AES final-4 block - store result
   6774  1.1  christos 
   6775  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-4 block
   6776  1.1  christos 
   6777  1.1  christos 	ldr	q9, [x0], #16				//AES final-3 block - load plaintext
   6778  1.1  christos 
   6779  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   6780  1.1  christos 
   6781  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-4 block - mid
   6782  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final-4 block - high
   6783  1.1  christos 
   6784  1.1  christos .inst	0xce047529	//eor3 v9.16b, v9.16b, v4.16b, v29.16b			//AES final-3 block - result
   6785  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final-4 block - low
   6786  1.1  christos 
   6787  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-4 block - mid
   6788  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-4 block - low
   6789  1.1  christos 
   6790  1.1  christos 	pmull	v27.1q, v27.1d, v21.1d				//GHASH final-4 block - mid
   6791  1.1  christos 
   6792  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   6793  1.1  christos 
   6794  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-4 block - mid
   6795  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-4 block - high
   6796  1.1  christos .L256_enc_blocks_more_than_3:	//blocks	left >  3
   6797  1.1  christos 
   6798  1.1  christos 	st1	{ v9.16b}, [x2], #16				//AES final-3 block - store result
   6799  1.1  christos 
   6800  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   6801  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   6802  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-3 block
   6803  1.1  christos 
   6804  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   6805  1.1  christos 
   6806  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-3 block - mid
   6807  1.1  christos 	pmull2	v28.1q, v8.2d, v25.2d				//GHASH final-3 block - high
   6808  1.1  christos 
   6809  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-3 block - high
   6810  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-3 block - mid
   6811  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   6812  1.1  christos 
   6813  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-3 block - mid
   6814  1.1  christos 	ldr	q9, [x0], #16				//AES final-2 block - load plaintext
   6815  1.1  christos 
   6816  1.1  christos 	pmull2	v27.1q, v27.2d, v24.2d				//GHASH final-3 block - mid
   6817  1.1  christos 	pmull	v26.1q, v8.1d, v25.1d				//GHASH final-3 block - low
   6818  1.1  christos 
   6819  1.1  christos .inst	0xce057529	//eor3 v9.16b, v9.16b, v5.16b, v29.16b			//AES final-2 block - result
   6820  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   6821  1.1  christos 
   6822  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-3 block - mid
   6823  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-3 block - low
   6824  1.1  christos .L256_enc_blocks_more_than_2:	//blocks	left >  2
   6825  1.1  christos 
   6826  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   6827  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   6828  1.1  christos 
   6829  1.1  christos 	st1	{ v9.16b}, [x2], #16			 	//AES final-2 block - store result
   6830  1.1  christos 
   6831  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-2 block
   6832  1.1  christos 	ldr	q9, [x0], #16				//AES final-1 block - load plaintext
   6833  1.1  christos 
   6834  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   6835  1.1  christos 
   6836  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-2 block - mid
   6837  1.1  christos 
   6838  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   6839  1.1  christos 
   6840  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-2 block - high
   6841  1.1  christos .inst	0xce067529	//eor3 v9.16b, v9.16b, v6.16b, v29.16b			//AES final-1 block - result
   6842  1.1  christos 
   6843  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-2 block - mid
   6844  1.1  christos 
   6845  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-2 block - high
   6846  1.1  christos 
   6847  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-2 block - mid
   6848  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-2 block - low
   6849  1.1  christos 
   6850  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-2 block - mid
   6851  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-2 block - low
   6852  1.1  christos .L256_enc_blocks_more_than_1:	//blocks	left >  1
   6853  1.1  christos 
   6854  1.1  christos 	st1	{ v9.16b}, [x2], #16				//AES final-1 block - store result
   6855  1.1  christos 
   6856  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   6857  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   6858  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-1 block
   6859  1.1  christos 	ldr	q9, [x0], #16				//AES final block - load plaintext
   6860  1.1  christos 
   6861  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   6862  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   6863  1.1  christos 
   6864  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-1 block - mid
   6865  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-1 block - high
   6866  1.1  christos 
   6867  1.1  christos .inst	0xce077529	//eor3 v9.16b, v9.16b, v7.16b, v29.16b			//AES final block - result
   6868  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-1 block - high
   6869  1.1  christos 
   6870  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-1 block - low
   6871  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-1 block - mid
   6872  1.1  christos 
   6873  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   6874  1.1  christos 
   6875  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-1 block - low
   6876  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-1 block - mid
   6877  1.1  christos 
   6878  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-1 block - mid
   6879  1.1  christos 
   6880  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-1 block - mid
   6881  1.1  christos .L256_enc_blocks_less_than_1:	//blocks	left <= 1
   6882  1.1  christos 
   6883  1.1  christos 	and	x1, x1, #127				//bit_length %= 128
   6884  1.1  christos 
   6885  1.1  christos 	sub	x1, x1, #128				//bit_length -= 128
   6886  1.1  christos 
   6887  1.1  christos 	neg	x1, x1				//bit_length = 128 - #bits in input (in range [1,128])
   6888  1.1  christos 
   6889  1.1  christos 	mvn	x6, xzr						//temp0_x = 0xffffffffffffffff
   6890  1.1  christos 	and	x1, x1, #127				//bit_length %= 128
   6891  1.1  christos 
   6892  1.1  christos 	lsr	x6, x6, x1				//temp0_x is mask for top 64b of last block
   6893  1.1  christos 	cmp	x1, #64
   6894  1.1  christos 	mvn	x7, xzr						//temp1_x = 0xffffffffffffffff
   6895  1.1  christos 
   6896  1.1  christos 	csel	x14, x6, xzr, lt
   6897  1.1  christos 	csel	x13, x7, x6, lt
   6898  1.1  christos 
   6899  1.1  christos 	mov	v0.d[0], x13					//ctr0b is mask for last block
   6900  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   6901  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   6902  1.1  christos 
   6903  1.1  christos 	ld1	{ v26.16b}, [x2]					//load existing bytes where the possibly partial last block is to be stored
   6904  1.1  christos 	mov	v0.d[1], x14
   6905  1.1  christos 
   6906  1.1  christos 	and	v9.16b, v9.16b, v0.16b					//possibly partial last block has zeroes in highest bits
   6907  1.1  christos 
   6908  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final block
   6909  1.1  christos 
   6910  1.1  christos 	rev32	v30.16b, v30.16b
   6911  1.1  christos 	bif	v9.16b, v26.16b, v0.16b					//insert existing bytes in top end of result before storing
   6912  1.1  christos 	str	q30, [x16]					//store the updated counter
   6913  1.1  christos 
   6914  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   6915  1.1  christos 	st1	{ v9.16b}, [x2]				//store all 16B
   6916  1.1  christos 
   6917  1.1  christos 	ins	v16.d[0], v8.d[1]					//GHASH final block - mid
   6918  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final block - high
   6919  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final block - low
   6920  1.1  christos 
   6921  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final block - high
   6922  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final block - low
   6923  1.1  christos 
   6924  1.1  christos 	eor	v16.8b, v16.8b, v8.8b				//GHASH final block - mid
   6925  1.1  christos 
   6926  1.1  christos 	pmull	v16.1q, v16.1d, v21.1d				//GHASH final block - mid
   6927  1.1  christos 
   6928  1.1  christos 	eor	v18.16b, v18.16b, v16.16b				//GHASH final block - mid
   6929  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   6930  1.1  christos 
   6931  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8				//MODULO - other top alignment
   6932  1.1  christos 
   6933  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   6934  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d			//MODULO - top 64b align with mid
   6935  1.1  christos 
   6936  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   6937  1.1  christos 
   6938  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   6939  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8				//MODULO - other mid alignment
   6940  1.1  christos 
   6941  1.1  christos .inst	0xce115673	//eor3 v19.16b, v19.16b, v17.16b, v21.16b		 	//MODULO - fold into low
   6942  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   6943  1.1  christos 	rev64	v19.16b, v19.16b
   6944  1.1  christos 	st1	{ v19.16b }, [x3]
   6945  1.1  christos 	mov	x0, x9					//return sizes
   6946  1.1  christos 
   6947  1.1  christos 	ldp	d10, d11, [sp, #16]
   6948  1.1  christos 	ldp	d12, d13, [sp, #32]
   6949  1.1  christos 	ldp	d14, d15, [sp, #48]
   6950  1.1  christos 	ldp	d8, d9, [sp], #80
   6951  1.1  christos 	ret
   6952  1.1  christos 
   6953  1.1  christos .L256_enc_ret:
   6954  1.1  christos 	mov	w0, #0x0
   6955  1.1  christos 	ret
   6956  1.1  christos .size	unroll8_eor3_aes_gcm_enc_256_kernel,.-unroll8_eor3_aes_gcm_enc_256_kernel
   6957  1.1  christos .globl	unroll8_eor3_aes_gcm_dec_256_kernel
   6958  1.1  christos .type	unroll8_eor3_aes_gcm_dec_256_kernel,%function
   6959  1.1  christos .align	4
   6960  1.1  christos unroll8_eor3_aes_gcm_dec_256_kernel:
   6961  1.1  christos 	AARCH64_VALID_CALL_TARGET
   6962  1.1  christos 	cbz	x1, .L256_dec_ret
   6963  1.1  christos 	stp	d8, d9, [sp, #-80]!
   6964  1.1  christos 	lsr	x9, x1, #3
   6965  1.1  christos 	mov	x16, x4
   6966  1.1  christos 	mov	x8, x5
   6967  1.1  christos 	stp	d10, d11, [sp, #16]
   6968  1.1  christos 	stp	d12, d13, [sp, #32]
   6969  1.1  christos 	stp	d14, d15, [sp, #48]
   6970  1.1  christos 	mov	x5, #0xc200000000000000
   6971  1.1  christos 	stp	x5, xzr, [sp, #64]
   6972  1.1  christos 	add	x10, sp, #64
   6973  1.1  christos 
   6974  1.1  christos 	ld1	{ v0.16b}, [x16]					//CTR block 0
   6975  1.1  christos 
   6976  1.1  christos 	mov	x15, #0x100000000			//set up counter increment
   6977  1.1  christos 	movi	v31.16b, #0x0
   6978  1.1  christos 	mov	v31.d[1], x15
   6979  1.1  christos 	mov	x5, x9
   6980  1.1  christos 
   6981  1.1  christos 	sub	x5, x5, #1		//byte_len - 1
   6982  1.1  christos 
   6983  1.1  christos 	rev32	v30.16b, v0.16b				//set up reversed counter
   6984  1.1  christos 
   6985  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 0
   6986  1.1  christos 
   6987  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 1
   6988  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 1
   6989  1.1  christos 
   6990  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 2
   6991  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 2
   6992  1.1  christos 	ldp	q26, q27, [x8, #0]				  	//load rk0, rk1
   6993  1.1  christos 
   6994  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 3
   6995  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 3
   6996  1.1  christos 
   6997  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 4
   6998  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 4
   6999  1.1  christos 
   7000  1.1  christos 	aese	v0.16b, v26.16b
   7001  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 0
   7002  1.1  christos 
   7003  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 5
   7004  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 5
   7005  1.1  christos 
   7006  1.1  christos 	aese	v1.16b, v26.16b
   7007  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 0
   7008  1.1  christos 	aese	v2.16b, v26.16b
   7009  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 0
   7010  1.1  christos 
   7011  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 6
   7012  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 6
   7013  1.1  christos 
   7014  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 7
   7015  1.1  christos 	aese	v4.16b, v26.16b
   7016  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 0
   7017  1.1  christos 
   7018  1.1  christos 	aese	v6.16b, v26.16b
   7019  1.1  christos 	aesmc	v6.16b, v6.16b		        //AES block 6 - round 0
   7020  1.1  christos 	aese	v5.16b, v26.16b
   7021  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 0
   7022  1.1  christos 
   7023  1.1  christos 	aese	v3.16b, v26.16b
   7024  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 0
   7025  1.1  christos 	aese	v7.16b, v26.16b
   7026  1.1  christos 	aesmc	v7.16b, v7.16b		        //AES block 7 - round 0
   7027  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   7028  1.1  christos 
   7029  1.1  christos 	aese	v6.16b, v27.16b
   7030  1.1  christos 	aesmc	v6.16b, v6.16b		        //AES block 6 - round 1
   7031  1.1  christos 	aese	v4.16b, v27.16b
   7032  1.1  christos 	aesmc	v4.16b, v4.16b		        //AES block 4 - round 1
   7033  1.1  christos 	aese	v0.16b, v27.16b
   7034  1.1  christos 	aesmc	v0.16b, v0.16b		        //AES block 0 - round 1
   7035  1.1  christos 
   7036  1.1  christos 	aese	v5.16b, v27.16b
   7037  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 1
   7038  1.1  christos 	aese	v7.16b, v27.16b
   7039  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 1
   7040  1.1  christos 	aese	v1.16b, v27.16b
   7041  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 1
   7042  1.1  christos 
   7043  1.1  christos 	aese	v2.16b, v27.16b
   7044  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 1
   7045  1.1  christos 	aese	v3.16b, v27.16b
   7046  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 1
   7047  1.1  christos 
   7048  1.1  christos 	aese	v3.16b, v28.16b
   7049  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 2
   7050  1.1  christos 	aese	v2.16b, v28.16b
   7051  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 2
   7052  1.1  christos 	aese	v6.16b, v28.16b
   7053  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 2
   7054  1.1  christos 
   7055  1.1  christos 	aese	v1.16b, v28.16b
   7056  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 2
   7057  1.1  christos 	aese	v7.16b, v28.16b
   7058  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 2
   7059  1.1  christos 	aese	v5.16b, v28.16b
   7060  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 2
   7061  1.1  christos 
   7062  1.1  christos 	aese	v0.16b, v28.16b
   7063  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 2
   7064  1.1  christos 	aese	v4.16b, v28.16b
   7065  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 2
   7066  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   7067  1.1  christos 
   7068  1.1  christos 	aese	v1.16b, v26.16b
   7069  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 3
   7070  1.1  christos 	aese	v2.16b, v26.16b
   7071  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 3
   7072  1.1  christos 
   7073  1.1  christos 	aese	v3.16b, v26.16b
   7074  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 3
   7075  1.1  christos 	aese	v4.16b, v26.16b
   7076  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 3
   7077  1.1  christos 
   7078  1.1  christos 	aese	v5.16b, v26.16b
   7079  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 3
   7080  1.1  christos 	aese	v7.16b, v26.16b
   7081  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 3
   7082  1.1  christos 	aese	v0.16b, v26.16b
   7083  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 3
   7084  1.1  christos 
   7085  1.1  christos 	aese	v6.16b, v26.16b
   7086  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 3
   7087  1.1  christos 
   7088  1.1  christos 	aese	v7.16b, v27.16b
   7089  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 4
   7090  1.1  christos 	aese	v3.16b, v27.16b
   7091  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 4
   7092  1.1  christos 
   7093  1.1  christos 	aese	v6.16b, v27.16b
   7094  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 4
   7095  1.1  christos 	aese	v2.16b, v27.16b
   7096  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 4
   7097  1.1  christos 	aese	v0.16b, v27.16b
   7098  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 4
   7099  1.1  christos 
   7100  1.1  christos 	aese	v4.16b, v27.16b
   7101  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 4
   7102  1.1  christos 	aese	v1.16b, v27.16b
   7103  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 4
   7104  1.1  christos 	aese	v5.16b, v27.16b
   7105  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 4
   7106  1.1  christos 
   7107  1.1  christos 	aese	v0.16b, v28.16b
   7108  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 5
   7109  1.1  christos 	aese	v6.16b, v28.16b
   7110  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 5
   7111  1.1  christos 
   7112  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   7113  1.1  christos 	aese	v4.16b, v28.16b
   7114  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 5
   7115  1.1  christos 	aese	v7.16b, v28.16b
   7116  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 5
   7117  1.1  christos 
   7118  1.1  christos 	aese	v5.16b, v28.16b
   7119  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 5
   7120  1.1  christos 
   7121  1.1  christos 	aese	v2.16b, v28.16b
   7122  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 5
   7123  1.1  christos 	aese	v3.16b, v28.16b
   7124  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 5
   7125  1.1  christos 
   7126  1.1  christos 	aese	v1.16b, v28.16b
   7127  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 5
   7128  1.1  christos 
   7129  1.1  christos 	aese	v4.16b, v26.16b
   7130  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 6
   7131  1.1  christos 	aese	v3.16b, v26.16b
   7132  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 6
   7133  1.1  christos 	aese	v7.16b, v26.16b
   7134  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 6
   7135  1.1  christos 
   7136  1.1  christos 	aese	v6.16b, v26.16b
   7137  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 6
   7138  1.1  christos 	aese	v0.16b, v26.16b
   7139  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 6
   7140  1.1  christos 	aese	v5.16b, v26.16b
   7141  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 6
   7142  1.1  christos 
   7143  1.1  christos 	aese	v2.16b, v26.16b
   7144  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 6
   7145  1.1  christos 	aese	v1.16b, v26.16b
   7146  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 6
   7147  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   7148  1.1  christos 
   7149  1.1  christos 	aese	v5.16b, v27.16b
   7150  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 7
   7151  1.1  christos 	aese	v0.16b, v27.16b
   7152  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 7
   7153  1.1  christos 
   7154  1.1  christos 	aese	v3.16b, v27.16b
   7155  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 7
   7156  1.1  christos 	aese	v2.16b, v27.16b
   7157  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 7
   7158  1.1  christos 	aese	v7.16b, v27.16b
   7159  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 7
   7160  1.1  christos 
   7161  1.1  christos 	aese	v4.16b, v27.16b
   7162  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 7
   7163  1.1  christos 	aese	v1.16b, v27.16b
   7164  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 7
   7165  1.1  christos 	aese	v6.16b, v27.16b
   7166  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 7
   7167  1.1  christos 
   7168  1.1  christos 	and	x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail)
   7169  1.1  christos 	aese	v7.16b, v28.16b
   7170  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 8
   7171  1.1  christos 	aese	v5.16b, v28.16b
   7172  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 8
   7173  1.1  christos 
   7174  1.1  christos 	aese	v0.16b, v28.16b
   7175  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 8
   7176  1.1  christos 	aese	v1.16b, v28.16b
   7177  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 8
   7178  1.1  christos 	aese	v2.16b, v28.16b
   7179  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 8
   7180  1.1  christos 
   7181  1.1  christos 	aese	v4.16b, v28.16b
   7182  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 8
   7183  1.1  christos 	aese	v3.16b, v28.16b
   7184  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 8
   7185  1.1  christos 	aese	v6.16b, v28.16b
   7186  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 8
   7187  1.1  christos 
   7188  1.1  christos 	aese	v2.16b, v26.16b
   7189  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 9
   7190  1.1  christos 
   7191  1.1  christos 	ld1	{ v19.16b}, [x3]
   7192  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   7193  1.1  christos 	rev64	v19.16b, v19.16b
   7194  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   7195  1.1  christos 	add	x4, x0, x1, lsr #3 //end_input_ptr
   7196  1.1  christos 	add	x5, x5, x0
   7197  1.1  christos 
   7198  1.1  christos 	aese	v3.16b, v26.16b
   7199  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 9
   7200  1.1  christos 	aese	v6.16b, v26.16b
   7201  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 9
   7202  1.1  christos 
   7203  1.1  christos 	aese	v4.16b, v26.16b
   7204  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 9
   7205  1.1  christos 	aese	v5.16b, v26.16b
   7206  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 9
   7207  1.1  christos 
   7208  1.1  christos 	aese	v7.16b, v26.16b
   7209  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 9
   7210  1.1  christos 
   7211  1.1  christos 	aese	v0.16b, v26.16b
   7212  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 9
   7213  1.1  christos 	aese	v1.16b, v26.16b
   7214  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 9
   7215  1.1  christos 
   7216  1.1  christos 	aese	v4.16b, v27.16b
   7217  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 10
   7218  1.1  christos 	aese	v7.16b, v27.16b
   7219  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 10
   7220  1.1  christos 	aese	v5.16b, v27.16b
   7221  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 10
   7222  1.1  christos 
   7223  1.1  christos 	aese	v1.16b, v27.16b
   7224  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 10
   7225  1.1  christos 	aese	v2.16b, v27.16b
   7226  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 10
   7227  1.1  christos 	aese	v0.16b, v27.16b
   7228  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 10
   7229  1.1  christos 
   7230  1.1  christos 	aese	v6.16b, v27.16b
   7231  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 10
   7232  1.1  christos 	aese	v3.16b, v27.16b
   7233  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 10
   7234  1.1  christos 	ldp	q26, q27, [x8, #192]				//load rk12, rk13
   7235  1.1  christos 
   7236  1.1  christos 	aese	v0.16b, v28.16b
   7237  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 11
   7238  1.1  christos 	add	v30.4s, v30.4s, v31.4s //CTR block 7
   7239  1.1  christos 
   7240  1.1  christos 	aese	v7.16b, v28.16b
   7241  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 11
   7242  1.1  christos 	aese	v3.16b, v28.16b
   7243  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 11
   7244  1.1  christos 	aese	v1.16b, v28.16b
   7245  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 11
   7246  1.1  christos 
   7247  1.1  christos 	aese	v5.16b, v28.16b
   7248  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 11
   7249  1.1  christos 	aese	v4.16b, v28.16b
   7250  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 11
   7251  1.1  christos 	aese	v2.16b, v28.16b
   7252  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 11
   7253  1.1  christos 
   7254  1.1  christos 	aese	v6.16b, v28.16b
   7255  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 11
   7256  1.1  christos 	ldr	q28, [x8, #224]					//load rk14
   7257  1.1  christos 
   7258  1.1  christos 	aese	v1.16b, v26.16b
   7259  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 1 - round 12
   7260  1.1  christos 	aese	v4.16b, v26.16b
   7261  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 4 - round 12
   7262  1.1  christos 	aese	v5.16b, v26.16b
   7263  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 5 - round 12
   7264  1.1  christos 
   7265  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
   7266  1.1  christos 	aese	v3.16b, v26.16b
   7267  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 3 - round 12
   7268  1.1  christos 	aese	v2.16b, v26.16b
   7269  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 2 - round 12
   7270  1.1  christos 
   7271  1.1  christos 	aese	v6.16b, v26.16b
   7272  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 6 - round 12
   7273  1.1  christos 	aese	v0.16b, v26.16b
   7274  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 0 - round 12
   7275  1.1  christos 	aese	v7.16b, v26.16b
   7276  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 7 - round 12
   7277  1.1  christos 
   7278  1.1  christos 	aese	v5.16b, v27.16b						//AES block 5 - round 13
   7279  1.1  christos 	aese	v1.16b, v27.16b						//AES block 1 - round 13
   7280  1.1  christos 	aese	v2.16b, v27.16b						//AES block 2 - round 13
   7281  1.1  christos 
   7282  1.1  christos 	aese	v0.16b, v27.16b						//AES block 0 - round 13
   7283  1.1  christos 	aese	v4.16b, v27.16b						//AES block 4 - round 13
   7284  1.1  christos 	aese	v6.16b, v27.16b						//AES block 6 - round 13
   7285  1.1  christos 
   7286  1.1  christos 	aese	v3.16b, v27.16b						//AES block 3 - round 13
   7287  1.1  christos 	aese	v7.16b, v27.16b						//AES block 7 - round 13
   7288  1.1  christos 	b.ge	.L256_dec_tail						//handle tail
   7289  1.1  christos 
   7290  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 0, 1 - load ciphertext
   7291  1.1  christos 
   7292  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 2, 3 - load ciphertext
   7293  1.1  christos 
   7294  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 4, 5 - load ciphertext
   7295  1.1  christos 
   7296  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 6, 7 - load ciphertext
   7297  1.1  christos 	cmp	x0, x5				//check if we have <= 8 blocks
   7298  1.1  christos 
   7299  1.1  christos .inst	0xce017121	//eor3 v1.16b, v9.16b, v1.16b, v28.16b				//AES block 1 - result
   7300  1.1  christos .inst	0xce007100	//eor3 v0.16b, v8.16b, v0.16b, v28.16b				//AES block 0 - result
   7301  1.1  christos 	stp	q0, q1, [x2], #32			//AES block 0, 1 - store result
   7302  1.1  christos 
   7303  1.1  christos 	rev32	v0.16b, v30.16b				//CTR block 8
   7304  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8
   7305  1.1  christos .inst	0xce037163	//eor3 v3.16b, v11.16b, v3.16b, v28.16b				//AES block 3 - result
   7306  1.1  christos 
   7307  1.1  christos .inst	0xce0571a5	//eor3 v5.16b, v13.16b, v5.16b, v28.16b				//AES block 5 - result
   7308  1.1  christos 
   7309  1.1  christos .inst	0xce047184	//eor3 v4.16b, v12.16b, v4.16b, v28.16b				//AES block 4 - result
   7310  1.1  christos 	rev32	v1.16b, v30.16b				//CTR block 9
   7311  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 9
   7312  1.1  christos 
   7313  1.1  christos .inst	0xce027142	//eor3 v2.16b, v10.16b, v2.16b, v28.16b				//AES block 2 - result
   7314  1.1  christos 	stp	q2, q3, [x2], #32			//AES block 2, 3 - store result
   7315  1.1  christos 
   7316  1.1  christos 	rev32	v2.16b, v30.16b				//CTR block 10
   7317  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 10
   7318  1.1  christos 
   7319  1.1  christos .inst	0xce0671c6	//eor3 v6.16b, v14.16b, v6.16b, v28.16b				//AES block 6 - result
   7320  1.1  christos 
   7321  1.1  christos 	rev32	v3.16b, v30.16b				//CTR block 11
   7322  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 11
   7323  1.1  christos 	stp	q4, q5, [x2], #32			//AES block 4, 5 - store result
   7324  1.1  christos 
   7325  1.1  christos .inst	0xce0771e7	//eor3 v7.16b, v15.16b, v7.16b, v28.16b				//AES block 7 - result
   7326  1.1  christos 	stp	q6, q7, [x2], #32			//AES block 6, 7 - store result
   7327  1.1  christos 
   7328  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 12
   7329  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 12
   7330  1.1  christos 	b.ge	.L256_dec_prepretail					//do prepretail
   7331  1.1  christos 
   7332  1.1  christos .L256_dec_main_loop:	//main	loop start
   7333  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
   7334  1.1  christos 	ldp	q26, q27, [x8, #0]					//load rk0, rk1
   7335  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
   7336  1.1  christos 
   7337  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
   7338  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
   7339  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   7340  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
   7341  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   7342  1.1  christos 
   7343  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
   7344  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
   7345  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
   7346  1.1  christos 
   7347  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
   7348  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4
   7349  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
   7350  1.1  christos 
   7351  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
   7352  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7
   7353  1.1  christos 
   7354  1.1  christos 	aese	v3.16b, v26.16b
   7355  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
   7356  1.1  christos 	aese	v6.16b, v26.16b
   7357  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
   7358  1.1  christos 	aese	v2.16b, v26.16b
   7359  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
   7360  1.1  christos 
   7361  1.1  christos 	aese	v7.16b, v26.16b
   7362  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
   7363  1.1  christos 	aese	v0.16b, v26.16b
   7364  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
   7365  1.1  christos 	aese	v5.16b, v26.16b
   7366  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
   7367  1.1  christos 
   7368  1.1  christos 	aese	v4.16b, v26.16b
   7369  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
   7370  1.1  christos 	aese	v1.16b, v26.16b
   7371  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
   7372  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   7373  1.1  christos 
   7374  1.1  christos 	eor	v8.16b, v8.16b, v19.16b					//PRE 1
   7375  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
   7376  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   7377  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
   7378  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   7379  1.1  christos 	aese	v6.16b, v27.16b
   7380  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
   7381  1.1  christos 
   7382  1.1  christos 	aese	v4.16b, v27.16b
   7383  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
   7384  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
   7385  1.1  christos 	aese	v3.16b, v27.16b
   7386  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
   7387  1.1  christos 
   7388  1.1  christos 	aese	v0.16b, v27.16b
   7389  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
   7390  1.1  christos 	aese	v5.16b, v27.16b
   7391  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
   7392  1.1  christos 	aese	v2.16b, v27.16b
   7393  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
   7394  1.1  christos 
   7395  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   7396  1.1  christos 	aese	v7.16b, v27.16b
   7397  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
   7398  1.1  christos 	aese	v1.16b, v27.16b
   7399  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
   7400  1.1  christos 
   7401  1.1  christos 	aese	v4.16b, v28.16b
   7402  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
   7403  1.1  christos 	aese	v0.16b, v28.16b
   7404  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
   7405  1.1  christos 	aese	v3.16b, v28.16b
   7406  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
   7407  1.1  christos 
   7408  1.1  christos 	aese	v6.16b, v28.16b
   7409  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
   7410  1.1  christos 	aese	v7.16b, v28.16b
   7411  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
   7412  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
   7413  1.1  christos 
   7414  1.1  christos 	aese	v5.16b, v28.16b
   7415  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
   7416  1.1  christos 	aese	v2.16b, v28.16b
   7417  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
   7418  1.1  christos 	aese	v1.16b, v28.16b
   7419  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
   7420  1.1  christos 
   7421  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   7422  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
   7423  1.1  christos 	aese	v3.16b, v26.16b
   7424  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
   7425  1.1  christos 
   7426  1.1  christos 	aese	v0.16b, v26.16b
   7427  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
   7428  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
   7429  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
   7430  1.1  christos 
   7431  1.1  christos 	aese	v5.16b, v26.16b
   7432  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
   7433  1.1  christos 	aese	v6.16b, v26.16b
   7434  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
   7435  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
   7436  1.1  christos 
   7437  1.1  christos 	aese	v4.16b, v26.16b
   7438  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
   7439  1.1  christos 	aese	v1.16b, v26.16b
   7440  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
   7441  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   7442  1.1  christos 
   7443  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
   7444  1.1  christos 	aese	v2.16b, v26.16b
   7445  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
   7446  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
   7447  1.1  christos 
   7448  1.1  christos 	aese	v5.16b, v27.16b
   7449  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
   7450  1.1  christos 	aese	v7.16b, v26.16b
   7451  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
   7452  1.1  christos 	aese	v3.16b, v27.16b
   7453  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
   7454  1.1  christos 
   7455  1.1  christos 	aese	v2.16b, v27.16b
   7456  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
   7457  1.1  christos 	aese	v0.16b, v27.16b
   7458  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
   7459  1.1  christos 	aese	v1.16b, v27.16b
   7460  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
   7461  1.1  christos 
   7462  1.1  christos 	aese	v6.16b, v27.16b
   7463  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
   7464  1.1  christos 	aese	v7.16b, v27.16b
   7465  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
   7466  1.1  christos 	aese	v4.16b, v27.16b
   7467  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
   7468  1.1  christos 
   7469  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
   7470  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
   7471  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
   7472  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
   7473  1.1  christos 
   7474  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   7475  1.1  christos 	aese	v5.16b, v28.16b
   7476  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
   7477  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
   7478  1.1  christos 
   7479  1.1  christos 	aese	v0.16b, v28.16b
   7480  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
   7481  1.1  christos 	aese	v3.16b, v28.16b
   7482  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
   7483  1.1  christos 	aese	v7.16b, v28.16b
   7484  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
   7485  1.1  christos 
   7486  1.1  christos 	aese	v1.16b, v28.16b
   7487  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
   7488  1.1  christos 	aese	v2.16b, v28.16b
   7489  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
   7490  1.1  christos 	aese	v6.16b, v28.16b
   7491  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
   7492  1.1  christos 
   7493  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
   7494  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   7495  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5
   7496  1.1  christos 
   7497  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k	- mid
   7498  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
   7499  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   7500  1.1  christos 
   7501  1.1  christos 	aese	v3.16b, v26.16b
   7502  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
   7503  1.1  christos 	aese	v0.16b, v26.16b
   7504  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
   7505  1.1  christos 	aese	v4.16b, v28.16b
   7506  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
   7507  1.1  christos 
   7508  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   7509  1.1  christos 	aese	v1.16b, v26.16b
   7510  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
   7511  1.1  christos 	aese	v6.16b, v26.16b
   7512  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
   7513  1.1  christos 
   7514  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
   7515  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
   7516  1.1  christos 	aese	v4.16b, v26.16b
   7517  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
   7518  1.1  christos 
   7519  1.1  christos 	aese	v2.16b, v26.16b
   7520  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
   7521  1.1  christos 	aese	v5.16b, v26.16b
   7522  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
   7523  1.1  christos 	aese	v7.16b, v26.16b
   7524  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
   7525  1.1  christos 
   7526  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
   7527  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
   7528  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
   7529  1.1  christos 
   7530  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   7531  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   7532  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   7533  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   7534  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6
   7535  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
   7536  1.1  christos 
   7537  1.1  christos 	aese	v2.16b, v27.16b
   7538  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
   7539  1.1  christos 	aese	v5.16b, v27.16b
   7540  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
   7541  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   7542  1.1  christos 
   7543  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   7544  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   7545  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   7546  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   7547  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
   7548  1.1  christos 	aese	v7.16b, v27.16b
   7549  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
   7550  1.1  christos 
   7551  1.1  christos 	aese	v1.16b, v27.16b
   7552  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
   7553  1.1  christos 	aese	v3.16b, v27.16b
   7554  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
   7555  1.1  christos 	aese	v6.16b, v27.16b
   7556  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
   7557  1.1  christos 
   7558  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   7559  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   7560  1.1  christos 	aese	v0.16b, v27.16b
   7561  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
   7562  1.1  christos 	aese	v4.16b, v27.16b
   7563  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
   7564  1.1  christos 
   7565  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
   7566  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
   7567  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   7568  1.1  christos 
   7569  1.1  christos 	aese	v5.16b, v28.16b
   7570  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
   7571  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
   7572  1.1  christos 	aese	v2.16b, v28.16b
   7573  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
   7574  1.1  christos 
   7575  1.1  christos 	aese	v6.16b, v28.16b
   7576  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
   7577  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
   7578  1.1  christos 	aese	v1.16b, v28.16b
   7579  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
   7580  1.1  christos 
   7581  1.1  christos 	aese	v4.16b, v28.16b
   7582  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
   7583  1.1  christos 	aese	v0.16b, v28.16b
   7584  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
   7585  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
   7586  1.1  christos 
   7587  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   7588  1.1  christos 	aese	v3.16b, v28.16b
   7589  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
   7590  1.1  christos 	aese	v7.16b, v28.16b
   7591  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
   7592  1.1  christos 
   7593  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   7594  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
   7595  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   7596  1.1  christos 
   7597  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
   7598  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
   7599  1.1  christos 	aese	v3.16b, v26.16b
   7600  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 9
   7601  1.1  christos 
   7602  1.1  christos 	aese	v6.16b, v26.16b
   7603  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 9
   7604  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
   7605  1.1  christos 	aese	v5.16b, v26.16b
   7606  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 9
   7607  1.1  christos 
   7608  1.1  christos 	ldp	q8, q9, [x0], #32			//AES block 8k+8, 8k+9 - load ciphertext
   7609  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
   7610  1.1  christos 	aese	v7.16b, v26.16b
   7611  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 9
   7612  1.1  christos 
   7613  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
   7614  1.1  christos 	aese	v2.16b, v26.16b
   7615  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 9
   7616  1.1  christos 	aese	v1.16b, v26.16b
   7617  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 9
   7618  1.1  christos 
   7619  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
   7620  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
   7621  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
   7622  1.1  christos 
   7623  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
   7624  1.1  christos 	aese	v3.16b, v27.16b
   7625  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 10
   7626  1.1  christos 	aese	v6.16b, v27.16b
   7627  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 10
   7628  1.1  christos 
   7629  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
   7630  1.1  christos 	aese	v0.16b, v26.16b
   7631  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 9
   7632  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
   7633  1.1  christos 
   7634  1.1  christos 	aese	v4.16b, v26.16b
   7635  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 9
   7636  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
   7637  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
   7638  1.1  christos 
   7639  1.1  christos 	aese	v2.16b, v27.16b
   7640  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 10
   7641  1.1  christos 	aese	v5.16b, v27.16b
   7642  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 10
   7643  1.1  christos 	aese	v7.16b, v27.16b
   7644  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 10
   7645  1.1  christos 
   7646  1.1  christos 	aese	v1.16b, v27.16b
   7647  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 10
   7648  1.1  christos 	aese	v0.16b, v27.16b
   7649  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 10
   7650  1.1  christos 	aese	v4.16b, v27.16b
   7651  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 10
   7652  1.1  christos 
   7653  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
   7654  1.1  christos 	rev32	v20.16b, v30.16b					//CTR block 8k+16
   7655  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   7656  1.1  christos 
   7657  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+16
   7658  1.1  christos 	aese	v1.16b, v28.16b
   7659  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 11
   7660  1.1  christos 	ldp	q26, q27, [x8, #192]				//load rk12, rk13
   7661  1.1  christos 
   7662  1.1  christos 	aese	v0.16b, v28.16b
   7663  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 11
   7664  1.1  christos 	aese	v6.16b, v28.16b
   7665  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 11
   7666  1.1  christos 
   7667  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
   7668  1.1  christos 	rev32	v22.16b, v30.16b					//CTR block 8k+17
   7669  1.1  christos 	aese	v2.16b, v28.16b
   7670  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 11
   7671  1.1  christos 
   7672  1.1  christos 	ldp	q10, q11, [x0], #32			//AES block 8k+10, 8k+11 - load ciphertext
   7673  1.1  christos 	aese	v7.16b, v28.16b
   7674  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 11
   7675  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8				 //MODULO - other top alignment
   7676  1.1  christos 
   7677  1.1  christos 	aese	v5.16b, v28.16b
   7678  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 11
   7679  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+17
   7680  1.1  christos 	aese	v3.16b, v28.16b
   7681  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 11
   7682  1.1  christos 
   7683  1.1  christos 	aese	v2.16b, v26.16b
   7684  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 12
   7685  1.1  christos 	aese	v7.16b, v26.16b
   7686  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 12
   7687  1.1  christos 	aese	v6.16b, v26.16b
   7688  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 12
   7689  1.1  christos 
   7690  1.1  christos 	rev32	v23.16b, v30.16b					//CTR block 8k+18
   7691  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+18
   7692  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d			//MODULO - top 64b align with mid
   7693  1.1  christos 
   7694  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   7695  1.1  christos 	aese	v1.16b, v26.16b
   7696  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 12
   7697  1.1  christos 	aese	v4.16b, v28.16b
   7698  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 11
   7699  1.1  christos 
   7700  1.1  christos 	ldr	q28, [x8, #224]					//load rk14
   7701  1.1  christos 	aese	v5.16b, v26.16b
   7702  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 12
   7703  1.1  christos 	aese	v3.16b, v26.16b
   7704  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 12
   7705  1.1  christos 
   7706  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   7707  1.1  christos 	aese	v0.16b, v26.16b
   7708  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 12
   7709  1.1  christos 	aese	v4.16b, v26.16b
   7710  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 12
   7711  1.1  christos 
   7712  1.1  christos 	ldp	q12, q13, [x0], #32			//AES block 8k+12, 8k+13 - load ciphertext
   7713  1.1  christos 	aese	v1.16b, v27.16b						//AES block 8k+9 - round 13
   7714  1.1  christos 	aese	v2.16b, v27.16b						//AES block 8k+10 - round 13
   7715  1.1  christos 
   7716  1.1  christos 	ldp	q14, q15, [x0], #32			//AES block 8k+14, 8k+15 - load ciphertext
   7717  1.1  christos 	aese	v0.16b, v27.16b						//AES block 8k+8 - round 13
   7718  1.1  christos 	aese	v5.16b, v27.16b						//AES block 8k+13 - round 13
   7719  1.1  christos 
   7720  1.1  christos 	rev32	v25.16b, v30.16b					//CTR block 8k+19
   7721  1.1  christos .inst	0xce027142	//eor3 v2.16b, v10.16b, v2.16b, v28.16b				//AES block 8k+10 - result
   7722  1.1  christos .inst	0xce017121	//eor3 v1.16b, v9.16b, v1.16b, v28.16b				//AES block 8k+9 - result
   7723  1.1  christos 
   7724  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8				//MODULO - other mid alignment
   7725  1.1  christos 	aese	v7.16b, v27.16b						//AES block 8k+15 - round 13
   7726  1.1  christos 
   7727  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+19
   7728  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   7729  1.1  christos 	aese	v4.16b, v27.16b						//AES block 8k+12 - round 13
   7730  1.1  christos 
   7731  1.1  christos .inst	0xce0571a5	//eor3 v5.16b, v13.16b, v5.16b, v28.16b				//AES block 8k+13 - result
   7732  1.1  christos .inst	0xce007100	//eor3 v0.16b, v8.16b, v0.16b, v28.16b				//AES block 8k+8 - result
   7733  1.1  christos 	aese	v3.16b, v27.16b						//AES block 8k+11 - round 13
   7734  1.1  christos 
   7735  1.1  christos 	stp	q0, q1, [x2], #32			//AES block 8k+8, 8k+9 - store result
   7736  1.1  christos 	mov	v0.16b, v20.16b					//CTR block 8k+16
   7737  1.1  christos .inst	0xce047184	//eor3 v4.16b, v12.16b, v4.16b, v28.16b				//AES block 8k+12 - result
   7738  1.1  christos 
   7739  1.1  christos .inst	0xce154673	//eor3 v19.16b, v19.16b, v21.16b, v17.16b		 	//MODULO - fold into low
   7740  1.1  christos .inst	0xce037163	//eor3 v3.16b, v11.16b, v3.16b, v28.16b				//AES block 8k+11 - result
   7741  1.1  christos 	stp	q2, q3, [x2], #32			//AES block 8k+10, 8k+11 - store result
   7742  1.1  christos 
   7743  1.1  christos 	mov	v3.16b, v25.16b					//CTR block 8k+19
   7744  1.1  christos 	mov	v2.16b, v23.16b					//CTR block 8k+18
   7745  1.1  christos 	aese	v6.16b, v27.16b						//AES block 8k+14 - round 13
   7746  1.1  christos 
   7747  1.1  christos 	mov	v1.16b, v22.16b					//CTR block 8k+17
   7748  1.1  christos 	stp	q4, q5, [x2], #32			//AES block 8k+12, 8k+13 - store result
   7749  1.1  christos .inst	0xce0771e7	//eor3 v7.16b, v15.16b, v7.16b, v28.16b				//AES block 8k+15 - result
   7750  1.1  christos 
   7751  1.1  christos .inst	0xce0671c6	//eor3 v6.16b, v14.16b, v6.16b, v28.16b				//AES block 8k+14 - result
   7752  1.1  christos 	rev32	v4.16b, v30.16b				//CTR block 8k+20
   7753  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+20
   7754  1.1  christos 
   7755  1.1  christos 	cmp	x0, x5				//.LOOP CONTROL
   7756  1.1  christos 	stp	q6, q7, [x2], #32			//AES block 8k+14, 8k+15 - store result
   7757  1.1  christos 	b.lt	.L256_dec_main_loop
   7758  1.1  christos 
   7759  1.1  christos .L256_dec_prepretail:	//PREPRETAIL
   7760  1.1  christos 	ldp	q26, q27, [x8, #0]					//load rk0, rk1
   7761  1.1  christos 	rev32	v5.16b, v30.16b				//CTR block 8k+13
   7762  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+13
   7763  1.1  christos 
   7764  1.1  christos 	rev64	v12.16b, v12.16b						//GHASH block 8k+4
   7765  1.1  christos 	ldr	q21, [x3, #144]				//load h6k | h5k
   7766  1.1  christos 	ldr	q24, [x3, #192]				//load h8k | h7k
   7767  1.1  christos 
   7768  1.1  christos 	rev32	v6.16b, v30.16b				//CTR block 8k+14
   7769  1.1  christos 	rev64	v8.16b, v8.16b						//GHASH block 8k
   7770  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+14
   7771  1.1  christos 
   7772  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8				//PRE 0
   7773  1.1  christos 	ldr	q23, [x3, #176]				//load h7l | h7h
   7774  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   7775  1.1  christos 	ldr	q25, [x3, #208]				//load h8l | h8h
   7776  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   7777  1.1  christos 	rev64	v9.16b, v9.16b						//GHASH block 8k+1
   7778  1.1  christos 
   7779  1.1  christos 	rev32	v7.16b, v30.16b				//CTR block 8k+15
   7780  1.1  christos 	rev64	v10.16b, v10.16b						//GHASH block 8k+2
   7781  1.1  christos 	ldr	q20, [x3, #128]				//load h5l | h5h
   7782  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   7783  1.1  christos 	ldr	q22, [x3, #160]				//load h6l | h6h
   7784  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   7785  1.1  christos 
   7786  1.1  christos 	aese	v0.16b, v26.16b
   7787  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 0
   7788  1.1  christos 	aese	v1.16b, v26.16b
   7789  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 0
   7790  1.1  christos 	aese	v4.16b, v26.16b
   7791  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 0
   7792  1.1  christos 
   7793  1.1  christos 	aese	v3.16b, v26.16b
   7794  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 0
   7795  1.1  christos 	aese	v5.16b, v26.16b
   7796  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 0
   7797  1.1  christos 	aese	v6.16b, v26.16b
   7798  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 0
   7799  1.1  christos 
   7800  1.1  christos 	aese	v4.16b, v27.16b
   7801  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 1
   7802  1.1  christos 	aese	v7.16b, v26.16b
   7803  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 0
   7804  1.1  christos 	aese	v2.16b, v26.16b
   7805  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 0
   7806  1.1  christos 
   7807  1.1  christos 	ldp	q28, q26, [x8, #32]				//load rk2, rk3
   7808  1.1  christos 	aese	v0.16b, v27.16b
   7809  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 1
   7810  1.1  christos 	eor	v8.16b, v8.16b, v19.16b					//PRE 1
   7811  1.1  christos 
   7812  1.1  christos 	aese	v7.16b, v27.16b
   7813  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 1
   7814  1.1  christos 	aese	v6.16b, v27.16b
   7815  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 1
   7816  1.1  christos 	aese	v2.16b, v27.16b
   7817  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 1
   7818  1.1  christos 
   7819  1.1  christos 	aese	v3.16b, v27.16b
   7820  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 1
   7821  1.1  christos 	aese	v1.16b, v27.16b
   7822  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 1
   7823  1.1  christos 	aese	v5.16b, v27.16b
   7824  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 1
   7825  1.1  christos 
   7826  1.1  christos 	pmull2	v16.1q, v9.2d, v23.2d				//GHASH block 8k+1 - high
   7827  1.1  christos 	trn1	v18.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   7828  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH block 8k - low
   7829  1.1  christos 
   7830  1.1  christos 	rev64	v11.16b, v11.16b						//GHASH block 8k+3
   7831  1.1  christos 	pmull	v23.1q, v9.1d, v23.1d				//GHASH block 8k+1 - low
   7832  1.1  christos 
   7833  1.1  christos 	aese	v5.16b, v28.16b
   7834  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 2
   7835  1.1  christos 	aese	v7.16b, v28.16b
   7836  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 2
   7837  1.1  christos 	aese	v1.16b, v28.16b
   7838  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 2
   7839  1.1  christos 
   7840  1.1  christos 	aese	v3.16b, v28.16b
   7841  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 2
   7842  1.1  christos 	aese	v6.16b, v28.16b
   7843  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 2
   7844  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH block 8k - high
   7845  1.1  christos 
   7846  1.1  christos 	aese	v0.16b, v28.16b
   7847  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 2
   7848  1.1  christos 	aese	v7.16b, v26.16b
   7849  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 3
   7850  1.1  christos 
   7851  1.1  christos 	aese	v5.16b, v26.16b
   7852  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 3
   7853  1.1  christos 	rev64	v14.16b, v14.16b						//GHASH block 8k+6
   7854  1.1  christos 
   7855  1.1  christos 	aese	v0.16b, v26.16b
   7856  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 3
   7857  1.1  christos 	aese	v2.16b, v28.16b
   7858  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 2
   7859  1.1  christos 	aese	v6.16b, v26.16b
   7860  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 3
   7861  1.1  christos 
   7862  1.1  christos 	pmull2	v29.1q, v10.2d, v22.2d				//GHASH block 8k+2 - high
   7863  1.1  christos 	trn2	v8.2d, v9.2d, v8.2d				//GHASH block 8k, 8k+1 - mid
   7864  1.1  christos 	aese	v4.16b, v28.16b
   7865  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 2
   7866  1.1  christos 
   7867  1.1  christos 	ldp	q27, q28, [x8, #64]				//load rk4, rk5
   7868  1.1  christos 	aese	v1.16b, v26.16b
   7869  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 3
   7870  1.1  christos 	pmull2	v9.1q, v11.2d, v20.2d				//GHASH block 8k+3 - high
   7871  1.1  christos 
   7872  1.1  christos 	aese	v2.16b, v26.16b
   7873  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 3
   7874  1.1  christos 	eor	v17.16b, v17.16b, v16.16b				//GHASH block 8k+1 - high
   7875  1.1  christos 	eor	v8.16b, v8.16b, v18.16b			//GHASH block 8k, 8k+1 - mid
   7876  1.1  christos 
   7877  1.1  christos 	aese	v4.16b, v26.16b
   7878  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 3
   7879  1.1  christos 	pmull	v22.1q, v10.1d, v22.1d				//GHASH block 8k+2 - low
   7880  1.1  christos 	aese	v3.16b, v26.16b
   7881  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 3
   7882  1.1  christos 
   7883  1.1  christos .inst	0xce1d2631	//eor3 v17.16b, v17.16b, v29.16b, v9.16b			//GHASH block 8k+2, 8k+3 - high
   7884  1.1  christos 	trn1	v29.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   7885  1.1  christos 	trn2	v10.2d, v11.2d, v10.2d				//GHASH block 8k+2, 8k+3 - mid
   7886  1.1  christos 
   7887  1.1  christos 	pmull2	v18.1q, v8.2d, v24.2d				//GHASH block 8k	- mid
   7888  1.1  christos 	pmull	v20.1q, v11.1d, v20.1d				//GHASH block 8k+3 - low
   7889  1.1  christos 	eor	v19.16b, v19.16b, v23.16b				//GHASH block 8k+1 - low
   7890  1.1  christos 
   7891  1.1  christos 	pmull	v24.1q, v8.1d, v24.1d				//GHASH block 8k+1 - mid
   7892  1.1  christos 	aese	v5.16b, v27.16b
   7893  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 4
   7894  1.1  christos 	aese	v0.16b, v27.16b
   7895  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 4
   7896  1.1  christos 
   7897  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+2, 8k+3 - low
   7898  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   7899  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   7900  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   7901  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   7902  1.1  christos 	aese	v7.16b, v27.16b
   7903  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 4
   7904  1.1  christos 
   7905  1.1  christos 	aese	v2.16b, v27.16b
   7906  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 4
   7907  1.1  christos 	aese	v6.16b, v27.16b
   7908  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 4
   7909  1.1  christos 	eor	v18.16b, v18.16b, v24.16b				//GHASH block 8k+1 - mid
   7910  1.1  christos 
   7911  1.1  christos 	eor	v10.16b, v10.16b, v29.16b				//GHASH block 8k+2, 8k+3 - mid
   7912  1.1  christos 	aese	v7.16b, v28.16b
   7913  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 5
   7914  1.1  christos 	aese	v1.16b, v27.16b
   7915  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 4
   7916  1.1  christos 
   7917  1.1  christos 	aese	v2.16b, v28.16b
   7918  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 5
   7919  1.1  christos 	aese	v3.16b, v27.16b
   7920  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 4
   7921  1.1  christos 	aese	v4.16b, v27.16b
   7922  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 4
   7923  1.1  christos 
   7924  1.1  christos 	aese	v1.16b, v28.16b
   7925  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 5
   7926  1.1  christos 	pmull2	v29.1q, v10.2d, v21.2d				//GHASH block 8k+2 - mid
   7927  1.1  christos 	aese	v6.16b, v28.16b
   7928  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 5
   7929  1.1  christos 
   7930  1.1  christos 	aese	v4.16b, v28.16b
   7931  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 5
   7932  1.1  christos 	aese	v3.16b, v28.16b
   7933  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 5
   7934  1.1  christos 	pmull	v21.1q, v10.1d, v21.1d				//GHASH block 8k+3 - mid
   7935  1.1  christos 
   7936  1.1  christos 	aese	v0.16b, v28.16b
   7937  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 5
   7938  1.1  christos 	aese	v5.16b, v28.16b
   7939  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 5
   7940  1.1  christos 	ldp	q26, q27, [x8, #96]				//load rk6, rk7
   7941  1.1  christos 
   7942  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   7943  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   7944  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   7945  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   7946  1.1  christos 	rev64	v15.16b, v15.16b						//GHASH block 8k+7
   7947  1.1  christos 	rev64	v13.16b, v13.16b						//GHASH block 8k+5
   7948  1.1  christos 
   7949  1.1  christos .inst	0xce157652	//eor3 v18.16b, v18.16b, v21.16b, v29.16b			//GHASH block 8k+2, 8k+3 - mid
   7950  1.1  christos 
   7951  1.1  christos 	trn1	v16.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   7952  1.1  christos 
   7953  1.1  christos 	aese	v0.16b, v26.16b
   7954  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 6
   7955  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   7956  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   7957  1.1  christos 	aese	v6.16b, v26.16b
   7958  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 6
   7959  1.1  christos 
   7960  1.1  christos 	aese	v5.16b, v26.16b
   7961  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 6
   7962  1.1  christos 	aese	v7.16b, v26.16b
   7963  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 6
   7964  1.1  christos 
   7965  1.1  christos 	pmull2	v8.1q, v12.2d, v25.2d				//GHASH block 8k+4 - high
   7966  1.1  christos 	pmull2	v10.1q, v13.2d, v23.2d				//GHASH block 8k+5 - high
   7967  1.1  christos 	pmull	v25.1q, v12.1d, v25.1d				//GHASH block 8k+4 - low
   7968  1.1  christos 
   7969  1.1  christos 	trn2	v12.2d, v13.2d, v12.2d				//GHASH block 8k+4, 8k+5 - mid
   7970  1.1  christos 	pmull	v23.1q, v13.1d, v23.1d				//GHASH block 8k+5 - low
   7971  1.1  christos 	trn1	v13.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   7972  1.1  christos 
   7973  1.1  christos 	aese	v7.16b, v27.16b
   7974  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 7
   7975  1.1  christos 	pmull2	v11.1q, v14.2d, v22.2d				//GHASH block 8k+6 - high
   7976  1.1  christos 	aese	v1.16b, v26.16b
   7977  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 6
   7978  1.1  christos 
   7979  1.1  christos 	aese	v2.16b, v26.16b
   7980  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 6
   7981  1.1  christos 	aese	v3.16b, v26.16b
   7982  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 6
   7983  1.1  christos 	aese	v4.16b, v26.16b
   7984  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 6
   7985  1.1  christos 
   7986  1.1  christos 	ldp	q28, q26, [x8, #128]				//load rk8, rk9
   7987  1.1  christos 	pmull	v22.1q, v14.1d, v22.1d				//GHASH block 8k+6 - low
   7988  1.1  christos 	aese	v5.16b, v27.16b
   7989  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 7
   7990  1.1  christos 
   7991  1.1  christos 	aese	v1.16b, v27.16b
   7992  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 7
   7993  1.1  christos 	aese	v4.16b, v27.16b
   7994  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 7
   7995  1.1  christos 
   7996  1.1  christos 	aese	v6.16b, v27.16b
   7997  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 7
   7998  1.1  christos 	aese	v2.16b, v27.16b
   7999  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 7
   8000  1.1  christos .inst	0xce082a31	//eor3 v17.16b, v17.16b, v8.16b, v10.16b			//GHASH block 8k+4, 8k+5 - high
   8001  1.1  christos 
   8002  1.1  christos 	aese	v0.16b, v27.16b
   8003  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 7
   8004  1.1  christos 	trn2	v14.2d, v15.2d, v14.2d				//GHASH block 8k+6, 8k+7 - mid
   8005  1.1  christos 	aese	v3.16b, v27.16b
   8006  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 7
   8007  1.1  christos 
   8008  1.1  christos 	aese	v0.16b, v28.16b
   8009  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 8
   8010  1.1  christos 	aese	v7.16b, v28.16b
   8011  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 8
   8012  1.1  christos 	aese	v4.16b, v28.16b
   8013  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 8
   8014  1.1  christos 
   8015  1.1  christos 	aese	v1.16b, v28.16b
   8016  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 8
   8017  1.1  christos 	aese	v5.16b, v28.16b
   8018  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 8
   8019  1.1  christos 	aese	v6.16b, v28.16b
   8020  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 8
   8021  1.1  christos 
   8022  1.1  christos 	aese	v3.16b, v28.16b
   8023  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 8
   8024  1.1  christos 	aese	v4.16b, v26.16b
   8025  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 9
   8026  1.1  christos 	eor	v12.16b, v12.16b, v16.16b				//GHASH block 8k+4, 8k+5 - mid
   8027  1.1  christos 
   8028  1.1  christos 	aese	v0.16b, v26.16b
   8029  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 9
   8030  1.1  christos 	aese	v1.16b, v26.16b
   8031  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 9
   8032  1.1  christos 	eor	v14.16b, v14.16b, v13.16b				//GHASH block 8k+6, 8k+7 - mid
   8033  1.1  christos 
   8034  1.1  christos 	aese	v6.16b, v26.16b
   8035  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 9
   8036  1.1  christos 	aese	v7.16b, v26.16b
   8037  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 9
   8038  1.1  christos 	pmull2	v16.1q, v12.2d, v24.2d				//GHASH block 8k+4 - mid
   8039  1.1  christos 
   8040  1.1  christos 	aese	v2.16b, v28.16b
   8041  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 8
   8042  1.1  christos 	pmull	v24.1q, v12.1d, v24.1d				//GHASH block 8k+5 - mid
   8043  1.1  christos 	pmull2	v12.1q, v15.2d, v20.2d				//GHASH block 8k+7 - high
   8044  1.1  christos 
   8045  1.1  christos 	pmull2	v13.1q, v14.2d, v21.2d				//GHASH block 8k+6 - mid
   8046  1.1  christos 	pmull	v21.1q, v14.1d, v21.1d				//GHASH block 8k+7 - mid
   8047  1.1  christos 	pmull	v20.1q, v15.1d, v20.1d				//GHASH block 8k+7 - low
   8048  1.1  christos 
   8049  1.1  christos 	ldp	q27, q28, [x8, #160]				//load rk10, rk11
   8050  1.1  christos .inst	0xce195e73	//eor3 v19.16b, v19.16b, v25.16b, v23.16b			//GHASH block 8k+4, 8k+5 - low
   8051  1.1  christos .inst	0xce184252	//eor3 v18.16b, v18.16b, v24.16b, v16.16b			//GHASH block 8k+4, 8k+5 - mid
   8052  1.1  christos 
   8053  1.1  christos 	aese	v2.16b, v26.16b
   8054  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 9
   8055  1.1  christos 	aese	v3.16b, v26.16b
   8056  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 9
   8057  1.1  christos 	aese	v5.16b, v26.16b
   8058  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 9
   8059  1.1  christos 
   8060  1.1  christos .inst	0xce0b3231	//eor3 v17.16b, v17.16b, v11.16b, v12.16b			//GHASH block 8k+6, 8k+7 - high
   8061  1.1  christos .inst	0xce165273	//eor3 v19.16b, v19.16b, v22.16b, v20.16b			//GHASH block 8k+6, 8k+7 - low
   8062  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   8063  1.1  christos 
   8064  1.1  christos .inst	0xce153652	//eor3 v18.16b, v18.16b, v21.16b, v13.16b			//GHASH block 8k+6, 8k+7 - mid
   8065  1.1  christos 
   8066  1.1  christos 	aese	v4.16b, v27.16b
   8067  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 10
   8068  1.1  christos 	aese	v6.16b, v27.16b
   8069  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 10
   8070  1.1  christos 	aese	v5.16b, v27.16b
   8071  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 10
   8072  1.1  christos 
   8073  1.1  christos 	aese	v0.16b, v27.16b
   8074  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 10
   8075  1.1  christos 	aese	v2.16b, v27.16b
   8076  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 10
   8077  1.1  christos 	aese	v3.16b, v27.16b
   8078  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 10
   8079  1.1  christos 
   8080  1.1  christos .inst	0xce114e52	//eor3 v18.16b, v18.16b, v17.16b, v19.16b		 	//MODULO - karatsuba tidy up
   8081  1.1  christos 
   8082  1.1  christos 	aese	v7.16b, v27.16b
   8083  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 10
   8084  1.1  christos 	aese	v1.16b, v27.16b
   8085  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 10
   8086  1.1  christos 	ldp	q26, q27, [x8, #192]				//load rk12, rk13
   8087  1.1  christos 
   8088  1.1  christos 	ext	v21.16b, v17.16b, v17.16b, #8				//MODULO - other top alignment
   8089  1.1  christos 
   8090  1.1  christos 	aese	v2.16b, v28.16b
   8091  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 11
   8092  1.1  christos 	aese	v1.16b, v28.16b
   8093  1.1  christos 	aesmc	v1.16b, v1.16b			//AES block 8k+9 - round 11
   8094  1.1  christos 	aese	v0.16b, v28.16b
   8095  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 11
   8096  1.1  christos 
   8097  1.1  christos 	pmull	v29.1q, v17.1d, v16.1d			//MODULO - top 64b align with mid
   8098  1.1  christos 	aese	v3.16b, v28.16b
   8099  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 11
   8100  1.1  christos 
   8101  1.1  christos 	aese	v7.16b, v28.16b
   8102  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 11
   8103  1.1  christos 	aese	v6.16b, v28.16b
   8104  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 11
   8105  1.1  christos 	aese	v4.16b, v28.16b
   8106  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 11
   8107  1.1  christos 
   8108  1.1  christos 	aese	v5.16b, v28.16b
   8109  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 11
   8110  1.1  christos 	aese	v3.16b, v26.16b
   8111  1.1  christos 	aesmc	v3.16b, v3.16b			//AES block 8k+11 - round 12
   8112  1.1  christos 
   8113  1.1  christos .inst	0xce1d5652	//eor3 v18.16b, v18.16b, v29.16b, v21.16b			//MODULO - fold into mid
   8114  1.1  christos 
   8115  1.1  christos 	aese	v3.16b, v27.16b						//AES block 8k+11 - round 13
   8116  1.1  christos 	aese	v2.16b, v26.16b
   8117  1.1  christos 	aesmc	v2.16b, v2.16b			//AES block 8k+10 - round 12
   8118  1.1  christos 	aese	v6.16b, v26.16b
   8119  1.1  christos 	aesmc	v6.16b, v6.16b			//AES block 8k+14 - round 12
   8120  1.1  christos 
   8121  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   8122  1.1  christos 	aese	v4.16b, v26.16b
   8123  1.1  christos 	aesmc	v4.16b, v4.16b			//AES block 8k+12 - round 12
   8124  1.1  christos 	aese	v7.16b, v26.16b
   8125  1.1  christos 	aesmc	v7.16b, v7.16b			//AES block 8k+15 - round 12
   8126  1.1  christos 
   8127  1.1  christos 	aese	v0.16b, v26.16b
   8128  1.1  christos 	aesmc	v0.16b, v0.16b			//AES block 8k+8 - round 12
   8129  1.1  christos 	ldr	q28, [x8, #224]					//load rk14
   8130  1.1  christos 	aese	v1.16b, v26.16b
   8131  1.1  christos 	aesmc	v1.16b, v1.16b	        	//AES block 8k+9 - round 12
   8132  1.1  christos 
   8133  1.1  christos 	aese	v4.16b, v27.16b						//AES block 8k+12 - round 13
   8134  1.1  christos 	ext	v21.16b, v18.16b, v18.16b, #8			 	//MODULO - other mid alignment
   8135  1.1  christos 	aese	v5.16b, v26.16b
   8136  1.1  christos 	aesmc	v5.16b, v5.16b			//AES block 8k+13 - round 12
   8137  1.1  christos 
   8138  1.1  christos 	aese	v6.16b, v27.16b						//AES block 8k+14 - round 13
   8139  1.1  christos 	aese	v2.16b, v27.16b						//AES block 8k+10 - round 13
   8140  1.1  christos 	aese	v1.16b, v27.16b						//AES block 8k+9 - round 13
   8141  1.1  christos 
   8142  1.1  christos 	aese	v5.16b, v27.16b						//AES block 8k+13 - round 13
   8143  1.1  christos .inst	0xce154673	//eor3 v19.16b, v19.16b, v21.16b, v17.16b		 	//MODULO - fold into low
   8144  1.1  christos 	add	v30.4s, v30.4s, v31.4s		//CTR block 8k+15
   8145  1.1  christos 
   8146  1.1  christos 	aese	v7.16b, v27.16b						//AES block 8k+15 - round 13
   8147  1.1  christos 	aese	v0.16b, v27.16b						//AES block 8k+8 - round 13
   8148  1.1  christos .L256_dec_tail:	//TAIL
   8149  1.1  christos 
   8150  1.1  christos 	ext	v16.16b, v19.16b, v19.16b, #8				//prepare final partial tag
   8151  1.1  christos 	sub	x5, x4, x0		//main_end_input_ptr is number of bytes left to process
   8152  1.1  christos 	cmp	x5, #112
   8153  1.1  christos 
   8154  1.1  christos 	ldr	q9, [x0], #16				//AES block 8k+8 - load ciphertext
   8155  1.1  christos 
   8156  1.1  christos 	ldp	q24, q25, [x3, #192]			//load h8k | h7k
   8157  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   8158  1.1  christos 	mov	v29.16b, v28.16b
   8159  1.1  christos 
   8160  1.1  christos 	ldp	q20, q21, [x3, #128]			//load h5l | h5h
   8161  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   8162  1.1  christos 
   8163  1.1  christos .inst	0xce00752c	//eor3 v12.16b, v9.16b, v0.16b, v29.16b				//AES block 8k+8 - result
   8164  1.1  christos 	ldp	q22, q23, [x3, #160]			//load h6l | h6h
   8165  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   8166  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   8167  1.1  christos 	b.gt	.L256_dec_blocks_more_than_7
   8168  1.1  christos 
   8169  1.1  christos 	mov	v7.16b, v6.16b
   8170  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   8171  1.1  christos 	mov	v6.16b, v5.16b
   8172  1.1  christos 
   8173  1.1  christos 	mov	v5.16b, v4.16b
   8174  1.1  christos 	mov	v4.16b, v3.16b
   8175  1.1  christos 	movi	v19.8b, #0
   8176  1.1  christos 
   8177  1.1  christos 	movi	v17.8b, #0
   8178  1.1  christos 	movi	v18.8b, #0
   8179  1.1  christos 	mov	v3.16b, v2.16b
   8180  1.1  christos 
   8181  1.1  christos 	cmp	x5, #96
   8182  1.1  christos 	mov	v2.16b, v1.16b
   8183  1.1  christos 	b.gt	.L256_dec_blocks_more_than_6
   8184  1.1  christos 
   8185  1.1  christos 	mov	v7.16b, v6.16b
   8186  1.1  christos 	mov	v6.16b, v5.16b
   8187  1.1  christos 
   8188  1.1  christos 	mov	v5.16b, v4.16b
   8189  1.1  christos 	cmp	x5, #80
   8190  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   8191  1.1  christos 
   8192  1.1  christos 	mov	v4.16b, v3.16b
   8193  1.1  christos 	mov	v3.16b, v1.16b
   8194  1.1  christos 	b.gt	.L256_dec_blocks_more_than_5
   8195  1.1  christos 
   8196  1.1  christos 	cmp	x5, #64
   8197  1.1  christos 	mov	v7.16b, v6.16b
   8198  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   8199  1.1  christos 
   8200  1.1  christos 	mov	v6.16b, v5.16b
   8201  1.1  christos 
   8202  1.1  christos 	mov	v5.16b, v4.16b
   8203  1.1  christos 	mov	v4.16b, v1.16b
   8204  1.1  christos 	b.gt	.L256_dec_blocks_more_than_4
   8205  1.1  christos 
   8206  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   8207  1.1  christos 	mov	v7.16b, v6.16b
   8208  1.1  christos 	cmp	x5, #48
   8209  1.1  christos 
   8210  1.1  christos 	mov	v6.16b, v5.16b
   8211  1.1  christos 	mov	v5.16b, v1.16b
   8212  1.1  christos 	b.gt	.L256_dec_blocks_more_than_3
   8213  1.1  christos 
   8214  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   8215  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   8216  1.1  christos 	mov	v7.16b, v6.16b
   8217  1.1  christos 
   8218  1.1  christos 	cmp	x5, #32
   8219  1.1  christos 	mov	v6.16b, v1.16b
   8220  1.1  christos 	b.gt	.L256_dec_blocks_more_than_2
   8221  1.1  christos 
   8222  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   8223  1.1  christos 
   8224  1.1  christos 	mov	v7.16b, v1.16b
   8225  1.1  christos 	cmp	x5, #16
   8226  1.1  christos 	b.gt	.L256_dec_blocks_more_than_1
   8227  1.1  christos 
   8228  1.1  christos 	sub	v30.4s, v30.4s, v31.4s
   8229  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   8230  1.1  christos 	b	.L256_dec_blocks_less_than_1
   8231  1.1  christos .L256_dec_blocks_more_than_7:	//blocks	left >  7
   8232  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-7 block
   8233  1.1  christos 	ldr	q9, [x0], #16				//AES final-6 block - load ciphertext
   8234  1.1  christos 	st1	{ v12.16b}, [x2], #16				//AES final-7 block  - store result
   8235  1.1  christos 
   8236  1.1  christos 	ins	v18.d[0], v24.d[1]					//GHASH final-7 block - mid
   8237  1.1  christos 
   8238  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   8239  1.1  christos 
   8240  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-7 block - mid
   8241  1.1  christos .inst	0xce01752c	//eor3 v12.16b, v9.16b, v1.16b, v29.16b				//AES final-6 block - result
   8242  1.1  christos 
   8243  1.1  christos 	pmull2	v17.1q, v8.2d, v25.2d				//GHASH final-7 block - high
   8244  1.1  christos 
   8245  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-7 block - mid
   8246  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   8247  1.1  christos 
   8248  1.1  christos 	pmull	v19.1q, v8.1d, v25.1d				//GHASH final-7 block - low
   8249  1.1  christos 	pmull	v18.1q, v27.1d, v18.1d			 	//GHASH final-7 block - mid
   8250  1.1  christos .L256_dec_blocks_more_than_6:	//blocks	left >  6
   8251  1.1  christos 
   8252  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-6 block
   8253  1.1  christos 
   8254  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   8255  1.1  christos 	ldr	q9, [x0], #16				//AES final-5 block - load ciphertext
   8256  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   8257  1.1  christos 
   8258  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-6 block - mid
   8259  1.1  christos 	st1	{ v12.16b}, [x2], #16				//AES final-6 block - store result
   8260  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-6 block - high
   8261  1.1  christos 
   8262  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-6 block - low
   8263  1.1  christos 
   8264  1.1  christos .inst	0xce02752c	//eor3 v12.16b, v9.16b, v2.16b, v29.16b				//AES final-5 block - result
   8265  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-6 block - low
   8266  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-6 block - mid
   8267  1.1  christos 
   8268  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-6 block - mid
   8269  1.1  christos 
   8270  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-6 block - mid
   8271  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-6 block - high
   8272  1.1  christos .L256_dec_blocks_more_than_5:	//blocks	left >  5
   8273  1.1  christos 
   8274  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-5 block
   8275  1.1  christos 
   8276  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   8277  1.1  christos 
   8278  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-5 block - high
   8279  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-5 block - mid
   8280  1.1  christos 
   8281  1.1  christos 	ldr	q9, [x0], #16				//AES final-4 block - load ciphertext
   8282  1.1  christos 
   8283  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-5 block - mid
   8284  1.1  christos 	st1	{ v12.16b}, [x2], #16			  	//AES final-5 block - store result
   8285  1.1  christos 
   8286  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-5 block - low
   8287  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-5 block - mid
   8288  1.1  christos 
   8289  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-5 block - mid
   8290  1.1  christos 
   8291  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-5 block - high
   8292  1.1  christos .inst	0xce03752c	//eor3 v12.16b, v9.16b, v3.16b, v29.16b				//AES final-4 block - result
   8293  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-5 block - low
   8294  1.1  christos 
   8295  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-5 block - mid
   8296  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   8297  1.1  christos .L256_dec_blocks_more_than_4:	//blocks	left >  4
   8298  1.1  christos 
   8299  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-4 block
   8300  1.1  christos 
   8301  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   8302  1.1  christos 
   8303  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-4 block - mid
   8304  1.1  christos 	ldr	q9, [x0], #16				//AES final-3 block - load ciphertext
   8305  1.1  christos 
   8306  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   8307  1.1  christos 
   8308  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final-4 block - low
   8309  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final-4 block - high
   8310  1.1  christos 
   8311  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-4 block - mid
   8312  1.1  christos 
   8313  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-4 block - high
   8314  1.1  christos 
   8315  1.1  christos 	pmull	v27.1q, v27.1d, v21.1d				//GHASH final-4 block - mid
   8316  1.1  christos 
   8317  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-4 block - low
   8318  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-4 block - store result
   8319  1.1  christos 
   8320  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-4 block - mid
   8321  1.1  christos .inst	0xce04752c	//eor3 v12.16b, v9.16b, v4.16b, v29.16b				//AES final-3 block - result
   8322  1.1  christos .L256_dec_blocks_more_than_3:	//blocks	left >  3
   8323  1.1  christos 
   8324  1.1  christos 	ldr	q25, [x3, #112]				//load h4l | h4h
   8325  1.1  christos 	ext	v25.16b, v25.16b, v25.16b, #8
   8326  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-3 block
   8327  1.1  christos 
   8328  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   8329  1.1  christos 	ldr	q9, [x0], #16				//AES final-2 block - load ciphertext
   8330  1.1  christos 	ldr	q24, [x3, #96]				//load h4k | h3k
   8331  1.1  christos 
   8332  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-3 block - mid
   8333  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-3 block - store result
   8334  1.1  christos 
   8335  1.1  christos .inst	0xce05752c	//eor3 v12.16b, v9.16b, v5.16b, v29.16b				//AES final-2 block - result
   8336  1.1  christos 
   8337  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-3 block - mid
   8338  1.1  christos 
   8339  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-3 block - mid
   8340  1.1  christos 	pmull	v26.1q, v8.1d, v25.1d				//GHASH final-3 block - low
   8341  1.1  christos 	pmull2	v28.1q, v8.2d, v25.2d				//GHASH final-3 block - high
   8342  1.1  christos 
   8343  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   8344  1.1  christos 	pmull2	v27.1q, v27.2d, v24.2d				//GHASH final-3 block - mid
   8345  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-3 block - low
   8346  1.1  christos 
   8347  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-3 block - high
   8348  1.1  christos 
   8349  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-3 block - mid
   8350  1.1  christos .L256_dec_blocks_more_than_2:	//blocks	left >  2
   8351  1.1  christos 
   8352  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-2 block
   8353  1.1  christos 
   8354  1.1  christos 	ldr	q23, [x3, #80]				//load h3l | h3h
   8355  1.1  christos 	ext	v23.16b, v23.16b, v23.16b, #8
   8356  1.1  christos 	ldr	q9, [x0], #16				//AES final-1 block - load ciphertext
   8357  1.1  christos 
   8358  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   8359  1.1  christos 
   8360  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-2 block - mid
   8361  1.1  christos 
   8362  1.1  christos 	pmull	v26.1q, v8.1d, v23.1d				//GHASH final-2 block - low
   8363  1.1  christos 	st1	{ v12.16b}, [x2], #16			  	//AES final-2 block - store result
   8364  1.1  christos .inst	0xce06752c	//eor3 v12.16b, v9.16b, v6.16b, v29.16b				//AES final-1 block - result
   8365  1.1  christos 
   8366  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-2 block - mid
   8367  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-2 block - low
   8368  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   8369  1.1  christos 
   8370  1.1  christos 	pmull	v27.1q, v27.1d, v24.1d				//GHASH final-2 block - mid
   8371  1.1  christos 	pmull2	v28.1q, v8.2d, v23.2d				//GHASH final-2 block - high
   8372  1.1  christos 
   8373  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-2 block - mid
   8374  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-2 block - high
   8375  1.1  christos .L256_dec_blocks_more_than_1:	//blocks	left >  1
   8376  1.1  christos 
   8377  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final-1 block
   8378  1.1  christos 
   8379  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   8380  1.1  christos 
   8381  1.1  christos 	ins	v27.d[0], v8.d[1]					//GHASH final-1 block - mid
   8382  1.1  christos 	ldr	q22, [x3, #64]				//load h2l | h2h
   8383  1.1  christos 	ext	v22.16b, v22.16b, v22.16b, #8
   8384  1.1  christos 
   8385  1.1  christos 	eor	v27.8b, v27.8b, v8.8b				//GHASH final-1 block - mid
   8386  1.1  christos 	ldr	q9, [x0], #16				//AES final block - load ciphertext
   8387  1.1  christos 	st1	{ v12.16b}, [x2], #16			 	//AES final-1 block - store result
   8388  1.1  christos 
   8389  1.1  christos 	ldr	q21, [x3, #48]				//load h2k | h1k
   8390  1.1  christos 	pmull	v26.1q, v8.1d, v22.1d				//GHASH final-1 block - low
   8391  1.1  christos 
   8392  1.1  christos 	ins	v27.d[1], v27.d[0]					//GHASH final-1 block - mid
   8393  1.1  christos 
   8394  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final-1 block - low
   8395  1.1  christos 
   8396  1.1  christos .inst	0xce07752c	//eor3 v12.16b, v9.16b, v7.16b, v29.16b				//AES final block - result
   8397  1.1  christos 	pmull2	v28.1q, v8.2d, v22.2d				//GHASH final-1 block - high
   8398  1.1  christos 
   8399  1.1  christos 	pmull2	v27.1q, v27.2d, v21.2d				//GHASH final-1 block - mid
   8400  1.1  christos 
   8401  1.1  christos 	movi	v16.8b, #0						//suppress further partial tag feed in
   8402  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final-1 block - high
   8403  1.1  christos 
   8404  1.1  christos 	eor	v18.16b, v18.16b, v27.16b				//GHASH final-1 block - mid
   8405  1.1  christos .L256_dec_blocks_less_than_1:	//blocks	left <= 1
   8406  1.1  christos 
   8407  1.1  christos 	ld1	{ v26.16b}, [x2]					//load existing bytes where the possibly partial last block is to be stored
   8408  1.1  christos 	mvn	x6, xzr						//temp0_x = 0xffffffffffffffff
   8409  1.1  christos 	and	x1, x1, #127				//bit_length %= 128
   8410  1.1  christos 
   8411  1.1  christos 	sub	x1, x1, #128				//bit_length -= 128
   8412  1.1  christos 	rev32	v30.16b, v30.16b
   8413  1.1  christos 	str	q30, [x16]					//store the updated counter
   8414  1.1  christos 
   8415  1.1  christos 	neg	x1, x1				//bit_length = 128 - #bits in input (in range [1,128])
   8416  1.1  christos 
   8417  1.1  christos 	and	x1, x1, #127			 	//bit_length %= 128
   8418  1.1  christos 
   8419  1.1  christos 	lsr	x6, x6, x1				//temp0_x is mask for top 64b of last block
   8420  1.1  christos 	cmp	x1, #64
   8421  1.1  christos 	mvn	x7, xzr						//temp1_x = 0xffffffffffffffff
   8422  1.1  christos 
   8423  1.1  christos 	csel	x14, x6, xzr, lt
   8424  1.1  christos 	csel	x13, x7, x6, lt
   8425  1.1  christos 
   8426  1.1  christos 	mov	v0.d[0], x13					//ctr0b is mask for last block
   8427  1.1  christos 	mov	v0.d[1], x14
   8428  1.1  christos 
   8429  1.1  christos 	and	v9.16b, v9.16b, v0.16b					//possibly partial last block has zeroes in highest bits
   8430  1.1  christos 	ldr	q20, [x3, #32]				//load h1l | h1h
   8431  1.1  christos 	ext	v20.16b, v20.16b, v20.16b, #8
   8432  1.1  christos 	bif	v12.16b, v26.16b, v0.16b					//insert existing bytes in top end of result before storing
   8433  1.1  christos 
   8434  1.1  christos 	rev64	v8.16b, v9.16b						//GHASH final block
   8435  1.1  christos 
   8436  1.1  christos 	eor	v8.16b, v8.16b, v16.16b					//feed in partial tag
   8437  1.1  christos 
   8438  1.1  christos 	ins	v16.d[0], v8.d[1]					//GHASH final block - mid
   8439  1.1  christos 	pmull2	v28.1q, v8.2d, v20.2d				//GHASH final block - high
   8440  1.1  christos 
   8441  1.1  christos 	eor	v16.8b, v16.8b, v8.8b				//GHASH final block - mid
   8442  1.1  christos 
   8443  1.1  christos 	pmull	v26.1q, v8.1d, v20.1d				//GHASH final block - low
   8444  1.1  christos 	eor	v17.16b, v17.16b, v28.16b					//GHASH final block - high
   8445  1.1  christos 
   8446  1.1  christos 	pmull	v16.1q, v16.1d, v21.1d				//GHASH final block - mid
   8447  1.1  christos 
   8448  1.1  christos 	eor	v18.16b, v18.16b, v16.16b				//GHASH final block - mid
   8449  1.1  christos 	ldr	d16, [x10]			//MODULO - load modulo constant
   8450  1.1  christos 	eor	v19.16b, v19.16b, v26.16b					//GHASH final block - low
   8451  1.1  christos 
   8452  1.1  christos 	pmull	v21.1q, v17.1d, v16.1d		 	//MODULO - top 64b align with mid
   8453  1.1  christos 	eor	v14.16b, v17.16b, v19.16b				//MODULO - karatsuba tidy up
   8454  1.1  christos 
   8455  1.1  christos 	ext	v17.16b, v17.16b, v17.16b, #8				//MODULO - other top alignment
   8456  1.1  christos 	st1	{ v12.16b}, [x2]				//store all 16B
   8457  1.1  christos 
   8458  1.1  christos 	eor	v18.16b, v18.16b, v14.16b				//MODULO - karatsuba tidy up
   8459  1.1  christos 
   8460  1.1  christos 	eor	v21.16b, v17.16b, v21.16b				//MODULO - fold into mid
   8461  1.1  christos 	eor	v18.16b, v18.16b, v21.16b				//MODULO - fold into mid
   8462  1.1  christos 
   8463  1.1  christos 	pmull	v17.1q, v18.1d, v16.1d			//MODULO - mid 64b align with low
   8464  1.1  christos 
   8465  1.1  christos 	ext	v18.16b, v18.16b, v18.16b, #8				//MODULO - other mid alignment
   8466  1.1  christos 	eor	v19.16b, v19.16b, v17.16b				//MODULO - fold into low
   8467  1.1  christos 
   8468  1.1  christos 	eor	v19.16b, v19.16b, v18.16b				//MODULO - fold into low
   8469  1.1  christos 	ext	v19.16b, v19.16b, v19.16b, #8
   8470  1.1  christos 	rev64	v19.16b, v19.16b
   8471  1.1  christos 	st1	{ v19.16b }, [x3]
   8472  1.1  christos 	mov	x0, x9
   8473  1.1  christos 
   8474  1.1  christos 	ldp	d10, d11, [sp, #16]
   8475  1.1  christos 	ldp	d12, d13, [sp, #32]
   8476  1.1  christos 	ldp	d14, d15, [sp, #48]
   8477  1.1  christos 	ldp	d8, d9, [sp], #80
   8478  1.1  christos 	ret
   8479  1.1  christos 
   8480  1.1  christos .L256_dec_ret:
   8481  1.1  christos 	mov	w0, #0x0
   8482  1.1  christos 	ret
   8483  1.1  christos .size	unroll8_eor3_aes_gcm_dec_256_kernel,.-unroll8_eor3_aes_gcm_dec_256_kernel
   8484  1.1  christos .byte	65,69,83,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,65,82,77,118,56,44,32,83,80,68,88,32,66,83,68,45,51,45,67,108,97,117,115,101,32,98,121,32,60,120,105,97,111,107,97,110,103,46,113,105,97,110,64,97,114,109,46,99,111,109,62,0
   8485  1.1  christos .align	2
   8486  1.1  christos .align	2
   8487  1.1  christos #endif
   8488