Home | History | Annotate | Line # | Download | only in arm
      1 dnl  ARM mpn_sec_tabselect
      2 
      3 dnl  Contributed to the GNU project by Torbjrn Granlund.
      4 
      5 dnl  Copyright 2013 Free Software Foundation, Inc.
      6 
      7 dnl  This file is part of the GNU MP Library.
      8 dnl
      9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10 dnl  it under the terms of either:
     11 dnl
     12 dnl    * the GNU Lesser General Public License as published by the Free
     13 dnl      Software Foundation; either version 3 of the License, or (at your
     14 dnl      option) any later version.
     15 dnl
     16 dnl  or
     17 dnl
     18 dnl    * the GNU General Public License as published by the Free Software
     19 dnl      Foundation; either version 2 of the License, or (at your option) any
     20 dnl      later version.
     21 dnl
     22 dnl  or both in parallel, as here.
     23 dnl
     24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27 dnl  for more details.
     28 dnl
     29 dnl  You should have received copies of the GNU General Public License and the
     30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31 dnl  see https://www.gnu.org/licenses/.
     32 
     33 include(`../config.m4')
     34 
     35 C	     cycles/limb
     36 C StrongARM	 ?
     37 C XScale	 ?
     38 C Cortex-A7	 ?
     39 C Cortex-A8	 ?
     40 C Cortex-A9	 2.33
     41 C Cortex-A15	 2.2
     42 
     43 C TODO
     44 C  * Consider using special code for small nents, either swapping the inner and
     45 C    outer loops, or providing a few completely unrolling the inner loops.
     46 
     47 define(`rp',    `r0')
     48 define(`tp',    `r1')
     49 define(`n',     `r2')
     50 define(`nents', `r3')
     51 C      which  on stack
     52 
     53 define(`i',     `r11')
     54 define(`j',     `r12')
     55 define(`c',     `r14')
     56 define(`mask',  `r7')
     57 
     58 ASM_START()
     59 PROLOGUE(mpn_sec_tabselect)
     60 	push	{r4-r11, r14}
     61 
     62 	subs	j, n, #3
     63 	bmi	L(outer_end)
     64 L(outer_top):
     65 	ldr	c, [sp, #36]
     66 	mov	i, nents
     67 	push	{tp}
     68 
     69 	mov	r8, #0
     70 	mov	r9, #0
     71 	mov	r10, #0
     72 
     73 L(top):	subs	c, c, #1
     74 	ldm	tp, {r4,r5,r6}
     75 	sbc	mask, mask, mask
     76 	subs	i, i, #1
     77 	add	tp, tp, n, lsl #2
     78 	and	r4, r4, mask
     79 	and	r5, r5, mask
     80 	and	r6, r6, mask
     81 	orr	r8, r8, r4
     82 	orr	r9, r9, r5
     83 	orr	r10, r10, r6
     84 	bge	L(top)
     85 
     86 	stmia	rp!, {r8,r9,r10}
     87 	pop	{tp}
     88 	add	tp, tp, #12
     89 	subs	j, j, #3
     90 	bpl	L(outer_top)
     91 L(outer_end):
     92 
     93 	cmp	j, #-1
     94 	bne	L(n2)
     95 
     96 	ldr	c, [sp, #36]
     97 	mov	i, nents
     98 	mov	r8, #0
     99 	mov	r9, #0
    100 L(tp2):	subs	c, c, #1
    101 	sbc	mask, mask, mask
    102 	ldm	tp, {r4,r5}
    103 	subs	i, i, #1
    104 	add	tp, tp, n, lsl #2
    105 	and	r4, r4, mask
    106 	and	r5, r5, mask
    107 	orr	r8, r8, r4
    108 	orr	r9, r9, r5
    109 	bge	L(tp2)
    110 	stmia	rp, {r8,r9}
    111 	pop	{r4-r11, r14}
    112 	return	lr
    113 
    114 L(n2):	cmp	j, #-2
    115 	bne	L(n1)
    116 
    117 	ldr	c, [sp, #36]
    118 	mov	i, nents
    119 	mov	r8, #0
    120 L(tp1):	subs	c, c, #1
    121 	sbc	mask, mask, mask
    122 	ldr	r4, [tp]
    123 	subs	i, i, #1
    124 	add	tp, tp, n, lsl #2
    125 	and	r4, r4, mask
    126 	orr	r8, r8, r4
    127 	bge	L(tp1)
    128 	str	r8, [rp]
    129 L(n1):	pop	{r4-r11, r14}
    130 	return	lr
    131 EPILOGUE()
    132