Home | History | Annotate | Line # | Download | only in arm
      1 dnl  ARM mpn_addlsh1_n and mpn_sublsh1_n
      2 
      3 dnl  Contributed to the GNU project by Torbjrn Granlund.
      4 
      5 dnl  Copyright 2012 Free Software Foundation, Inc.
      6 
      7 dnl  This file is part of the GNU MP Library.
      8 dnl
      9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10 dnl  it under the terms of either:
     11 dnl
     12 dnl    * the GNU Lesser General Public License as published by the Free
     13 dnl      Software Foundation; either version 3 of the License, or (at your
     14 dnl      option) any later version.
     15 dnl
     16 dnl  or
     17 dnl
     18 dnl    * the GNU General Public License as published by the Free Software
     19 dnl      Foundation; either version 2 of the License, or (at your option) any
     20 dnl      later version.
     21 dnl
     22 dnl  or both in parallel, as here.
     23 dnl
     24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27 dnl  for more details.
     28 dnl
     29 dnl  You should have received copies of the GNU General Public License and the
     30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31 dnl  see https://www.gnu.org/licenses/.
     32 
     33 include(`../config.m4')
     34 
     35 C	      addlsh1_n       sublsh1_n
     36 C	     cycles/limb     cycles/limb
     37 C StrongARM	 ?		 ?
     38 C XScale	 ?		 ?
     39 C Cortex-A7	 ?		 ?
     40 C Cortex-A8	 ?		 ?
     41 C Cortex-A9	 3.12		 3.7
     42 C Cortex-A15	 ?		 ?
     43 
     44 C TODO
     45 C  * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
     46 C    The sublsh1_n code could surely be tweaked, its REVCY slows down things
     47 C    very much.  If two insns are really needed, it might help to separate them
     48 C    for better micro-parallelism.
     49 
     50 define(`rp', `r0')
     51 define(`up', `r1')
     52 define(`vp', `r2')
     53 define(`n',  `r3')
     54 
     55 ifdef(`OPERATION_addlsh1_n', `
     56   define(`ADDSUB',	adds)
     57   define(`ADDSUBC',	adcs)
     58   define(`SETCY',	`cmp	$1, #1')
     59   define(`RETVAL',	`adc	r0, $1, #2')
     60   define(`SAVECY',	`sbc	$1, $2, #0')
     61   define(`RESTCY',	`cmn	$1, #1')
     62   define(`REVCY',	`')
     63   define(`INICYR',	`mov	$1, #0')
     64   define(`r10r11',	`r11')
     65   define(`func',	mpn_addlsh1_n)
     66   define(`func_nc',	mpn_addlsh1_nc)')
     67 ifdef(`OPERATION_sublsh1_n', `
     68   define(`ADDSUB',	subs)
     69   define(`ADDSUBC',	sbcs)
     70   define(`SETCY',	`rsbs	$1, $1, #0')
     71   define(`RETVAL',	`adc	r0, $1, #1')
     72   define(`SAVECY',	`sbc	$1, $1, $1')
     73   define(`RESTCY',	`cmn	$1, #1')
     74   define(`REVCY',	`sbc	$1, $1, $1
     75 			cmn	$1, #1')
     76   define(`INICYR',	`mvn	$1, #0')
     77   define(`r10r11',	`r10')
     78   define(`func',	mpn_sublsh1_n)
     79   define(`func_nc',	mpn_sublsh1_nc)')
     80 
     81 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
     82 
     83 ASM_START()
     84 PROLOGUE(func)
     85 	push	{r4-r10r11, r14}
     86 
     87 ifdef(`OPERATION_addlsh1_n', `
     88 	mvn	r11, #0
     89 ')
     90 	INICYR(	r14)
     91 	subs	n, n, #3
     92 	blt	L(le2)			C carry clear on branch path
     93 
     94 	cmn	r0, #0			C clear carry
     95 	ldmia	vp!, {r8, r9, r10}
     96 	b	L(mid)
     97 
     98 L(top):	RESTCY(	r14)
     99 	ADDSUBC	r4, r4, r8
    100 	ADDSUBC	r5, r5, r9
    101 	ADDSUBC	r6, r6, r10
    102 	ldmia	vp!, {r8, r9, r10}
    103 	stmia	rp!, {r4, r5, r6}
    104 	REVCY(r14)
    105 	adcs	r8, r8, r8
    106 	adcs	r9, r9, r9
    107 	adcs	r10, r10, r10
    108 	ldmia	up!, {r4, r5, r6}
    109 	SAVECY(	r14, r11)
    110 	subs	n, n, #3
    111 	blt	L(exi)
    112 	RESTCY(	r12)
    113 	ADDSUBC	r4, r4, r8
    114 	ADDSUBC	r5, r5, r9
    115 	ADDSUBC	r6, r6, r10
    116 	ldmia	vp!, {r8, r9, r10}
    117 	stmia	rp!, {r4, r5, r6}
    118 	REVCY(r12)
    119 L(mid):	adcs	r8, r8, r8
    120 	adcs	r9, r9, r9
    121 	adcs	r10, r10, r10
    122 	ldmia	up!, {r4, r5, r6}
    123 	SAVECY(	r12, r11)
    124 	subs	n, n, #3
    125 	bge	L(top)
    126 
    127 	mov	r7, r12			C swap alternating...
    128 	mov	r12, r14		C ...carry-save...
    129 	mov	r14, r7			C ...registers
    130 
    131 L(exi):	RESTCY(	r12)
    132 	ADDSUBC	r4, r4, r8
    133 	ADDSUBC	r5, r5, r9
    134 	ADDSUBC	r6, r6, r10
    135 	stmia	rp!, {r4, r5, r6}
    136 
    137 	REVCY(r12)
    138 L(le2):	tst	n, #1			C n = {-1,-2,-3} map to [2], [1], [0]
    139 	beq	L(e1)
    140 
    141 L(e02):	tst	n, #2
    142 	beq	L(rt0)
    143 	ldm	vp, {r8, r9}
    144 	adcs	r8, r8, r8
    145 	adcs	r9, r9, r9
    146 	ldm	up, {r4, r5}
    147 	SAVECY(	r12, r11)
    148 	RESTCY(	r14)
    149 	ADDSUBC	r4, r4, r8
    150 	ADDSUBC	r5, r5, r9
    151 	stm	rp, {r4, r5}
    152 	b	L(rt1)
    153 
    154 L(e1):	ldr	r8, [vp]
    155 	adcs	r8, r8, r8
    156 	ldr	r4, [up]
    157 	SAVECY(	r12, r11)
    158 	RESTCY(	r14)
    159 	ADDSUBC	r4, r4, r8
    160 	str	r4, [rp]
    161 
    162 L(rt1):	mov	r14, r12
    163 	REVCY(r12)
    164 L(rt0):	RETVAL(	r14)
    165 	pop	{r4-r10r11, r14}
    166 	return	r14
    167 EPILOGUE()
    168