Home | History | Annotate | Line # | Download | only in arm64
      1  1.1  mrg dnl  ARM64 mpn_rsh1add_n and mpn_rsh1sub_n.
      2  1.1  mrg 
      3  1.1  mrg dnl  Contributed to the GNU project by Torbjrn Granlund.
      4  1.1  mrg 
      5  1.1  mrg dnl  Copyright 2017 Free Software Foundation, Inc.
      6  1.1  mrg 
      7  1.1  mrg dnl  This file is part of the GNU MP Library.
      8  1.1  mrg dnl
      9  1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10  1.1  mrg dnl  it under the terms of either:
     11  1.1  mrg dnl
     12  1.1  mrg dnl    * the GNU Lesser General Public License as published by the Free
     13  1.1  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     14  1.1  mrg dnl      option) any later version.
     15  1.1  mrg dnl
     16  1.1  mrg dnl  or
     17  1.1  mrg dnl
     18  1.1  mrg dnl    * the GNU General Public License as published by the Free Software
     19  1.1  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     20  1.1  mrg dnl      later version.
     21  1.1  mrg dnl
     22  1.1  mrg dnl  or both in parallel, as here.
     23  1.1  mrg dnl
     24  1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25  1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26  1.1  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27  1.1  mrg dnl  for more details.
     28  1.1  mrg dnl
     29  1.1  mrg dnl  You should have received copies of the GNU General Public License and the
     30  1.1  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31  1.1  mrg dnl  see https://www.gnu.org/licenses/.
     32  1.1  mrg 
     33  1.1  mrg include(`../config.m4')
     34  1.1  mrg 
     35  1.1  mrg C	     cycles/limb   assumed optimal c/l
     36  1.1  mrg C Cortex-A53	3.25-3.75	 3.0 steady
     37  1.1  mrg C Cortex-A57	 2.15		 1.75
     38  1.1  mrg C X-Gene	 2.75		 2.5
     39  1.1  mrg 
     40  1.1  mrg changecom(blah)
     41  1.1  mrg 
     42  1.1  mrg define(`rp', `x0')
     43  1.1  mrg define(`up', `x1')
     44  1.1  mrg define(`vp', `x2')
     45  1.1  mrg define(`n',  `x3')
     46  1.1  mrg 
     47  1.1  mrg ifdef(`OPERATION_rsh1add_n', `
     48  1.1  mrg   define(`ADDSUB',	adds)
     49  1.1  mrg   define(`ADDSUBC',	adcs)
     50  1.1  mrg   define(`COND',	`cs')
     51  1.1  mrg   define(`func_n',	mpn_rsh1add_n)')
     52  1.1  mrg ifdef(`OPERATION_rsh1sub_n', `
     53  1.1  mrg   define(`ADDSUB',	subs)
     54  1.1  mrg   define(`ADDSUBC',	sbcs)
     55  1.1  mrg   define(`COND',	`cc')
     56  1.1  mrg   define(`func_n',	mpn_rsh1sub_n)')
     57  1.1  mrg 
     58  1.1  mrg MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
     59  1.1  mrg 
     60  1.1  mrg ASM_START()
     61  1.1  mrg PROLOGUE(func_n)
     62  1.1  mrg 	lsr	x18, n, #2
     63  1.1  mrg 
     64  1.1  mrg 	tbz	n, #0, L(bx0)
     65  1.1  mrg 
     66  1.1  mrg L(bx1):	ldr	x5, [up],#8
     67  1.1  mrg 	ldr	x9, [vp],#8
     68  1.1  mrg 	tbnz	n, #1, L(b11)
     69  1.1  mrg 
     70  1.1  mrg L(b01):	ADDSUB	x13, x5, x9
     71  1.1  mrg 	and	x10, x13, #1
     72  1.1  mrg 	cbz	x18, L(1)
     73  1.1  mrg 	ldp	x4, x5, [up],#48
     74  1.1  mrg 	ldp	x8, x9, [vp],#48
     75  1.1  mrg 	ADDSUBC	x14, x4, x8
     76  1.1  mrg 	ADDSUBC	x15, x5, x9
     77  1.1  mrg 	ldp	x4, x5, [up,#-32]
     78  1.1  mrg 	ldp	x8, x9, [vp,#-32]
     79  1.1  mrg 	extr	x17, x14, x13, #1
     80  1.1  mrg 	ADDSUBC	x12, x4, x8
     81  1.1  mrg 	ADDSUBC	x13, x5, x9
     82  1.1  mrg 	str	x17, [rp], #24
     83  1.1  mrg 	sub	x18, x18, #1
     84  1.1  mrg 	cbz	x18, L(end)
     85  1.1  mrg 	b	L(top)
     86  1.1  mrg 
     87  1.1  mrg L(1):	cset	x14, COND
     88  1.1  mrg 	extr	x17, x14, x13, #1
     89  1.1  mrg 	str	x17, [rp]
     90  1.1  mrg 	mov	x0, x10
     91  1.1  mrg 	ret
     92  1.1  mrg 
     93  1.1  mrg L(b11):	ADDSUB	x15, x5, x9
     94  1.1  mrg 	and	x10, x15, #1
     95  1.1  mrg 
     96  1.1  mrg 	ldp	x4, x5, [up],#32
     97  1.1  mrg 	ldp	x8, x9, [vp],#32
     98  1.1  mrg 	ADDSUBC	x12, x4, x8
     99  1.1  mrg 	ADDSUBC	x13, x5, x9
    100  1.1  mrg 	cbz	x18, L(3)
    101  1.1  mrg 	ldp	x4, x5, [up,#-16]
    102  1.1  mrg 	ldp	x8, x9, [vp,#-16]
    103  1.1  mrg 	extr	x17, x12, x15, #1
    104  1.1  mrg 	ADDSUBC	x14, x4, x8
    105  1.1  mrg 	ADDSUBC	x15, x5, x9
    106  1.1  mrg 	str	x17, [rp], #8
    107  1.1  mrg 	b	L(mid)
    108  1.1  mrg 
    109  1.1  mrg L(3):	extr	x17, x12, x15, #1
    110  1.1  mrg 	str	x17, [rp], #8
    111  1.1  mrg 	b	L(2)
    112  1.1  mrg 
    113  1.1  mrg L(bx0):	tbz	n, #1, L(b00)
    114  1.1  mrg 
    115  1.1  mrg L(b10):	ldp	x4, x5, [up],#32
    116  1.1  mrg 	ldp	x8, x9, [vp],#32
    117  1.1  mrg 	ADDSUB	x12, x4, x8
    118  1.1  mrg 	ADDSUBC	x13, x5, x9
    119  1.1  mrg 	and	x10, x12, #1
    120  1.1  mrg 	cbz	x18, L(2)
    121  1.1  mrg 	ldp	x4, x5, [up,#-16]
    122  1.1  mrg 	ldp	x8, x9, [vp,#-16]
    123  1.1  mrg 	ADDSUBC	x14, x4, x8
    124  1.1  mrg 	ADDSUBC	x15, x5, x9
    125  1.1  mrg 	b	L(mid)
    126  1.1  mrg 
    127  1.1  mrg L(b00):	ldp	x4, x5, [up],#48
    128  1.1  mrg 	ldp	x8, x9, [vp],#48
    129  1.1  mrg 	ADDSUB	x14, x4, x8
    130  1.1  mrg 	ADDSUBC	x15, x5, x9
    131  1.1  mrg 	and	x10, x14, #1
    132  1.1  mrg 	ldp	x4, x5, [up,#-32]
    133  1.1  mrg 	ldp	x8, x9, [vp,#-32]
    134  1.1  mrg 	ADDSUBC	x12, x4, x8
    135  1.1  mrg 	ADDSUBC	x13, x5, x9
    136  1.1  mrg 	add	rp, rp, #16
    137  1.1  mrg 	sub	x18, x18, #1
    138  1.1  mrg 	cbz	x18, L(end)
    139  1.1  mrg 
    140  1.1  mrg 	ALIGN(16)
    141  1.1  mrg L(top):	ldp	x4, x5, [up,#-16]
    142  1.1  mrg 	ldp	x8, x9, [vp,#-16]
    143  1.1  mrg 	extr	x16, x15, x14, #1
    144  1.1  mrg 	extr	x17, x12, x15, #1
    145  1.1  mrg 	ADDSUBC	x14, x4, x8
    146  1.1  mrg 	ADDSUBC	x15, x5, x9
    147  1.1  mrg 	stp	x16, x17, [rp,#-16]
    148  1.1  mrg L(mid):	ldp	x4, x5, [up],#32
    149  1.1  mrg 	ldp	x8, x9, [vp],#32
    150  1.1  mrg 	extr	x16, x13, x12, #1
    151  1.1  mrg 	extr	x17, x14, x13, #1
    152  1.1  mrg 	ADDSUBC	x12, x4, x8
    153  1.1  mrg 	ADDSUBC	x13, x5, x9
    154  1.1  mrg 	stp	x16, x17, [rp],#32
    155  1.1  mrg 	sub	x18, x18, #1
    156  1.1  mrg 	cbnz	x18, L(top)
    157  1.1  mrg 
    158  1.1  mrg L(end):	extr	x16, x15, x14, #1
    159  1.1  mrg 	extr	x17, x12, x15, #1
    160  1.1  mrg 	stp	x16, x17, [rp,#-16]
    161  1.1  mrg L(2):	cset	x14, COND
    162  1.1  mrg 	extr	x16, x13, x12, #1
    163  1.1  mrg 	extr	x17, x14, x13, #1
    164  1.1  mrg 	stp	x16, x17, [rp]
    165  1.1  mrg 
    166  1.1  mrg L(ret):	mov	x0, x10
    167  1.1  mrg 	ret
    168  1.1  mrg EPILOGUE()
    169