Home | History | Annotate | Line # | Download | only in v6
      1 dnl  ARM mpn_mul_2.
      2 
      3 dnl  Contributed to the GNU project by Torbjrn Granlund.
      4 
      5 dnl  Copyright 2012 Free Software Foundation, Inc.
      6 
      7 dnl  This file is part of the GNU MP Library.
      8 dnl
      9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10 dnl  it under the terms of either:
     11 dnl
     12 dnl    * the GNU Lesser General Public License as published by the Free
     13 dnl      Software Foundation; either version 3 of the License, or (at your
     14 dnl      option) any later version.
     15 dnl
     16 dnl  or
     17 dnl
     18 dnl    * the GNU General Public License as published by the Free Software
     19 dnl      Foundation; either version 2 of the License, or (at your option) any
     20 dnl      later version.
     21 dnl
     22 dnl  or both in parallel, as here.
     23 dnl
     24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27 dnl  for more details.
     28 dnl
     29 dnl  You should have received copies of the GNU General Public License and the
     30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31 dnl  see https://www.gnu.org/licenses/.
     32 
     33 include(`../config.m4')
     34 
     35 C	     cycles/limb
     36 C StrongARM:	 -
     37 C XScale	 -
     38 C ARM11		 5.25
     39 C Cortex-A5	 3.63
     40 C Cortex-A7	 3.15
     41 C Cortex-A8	 5.0
     42 C Cortex-A9	 2.25
     43 C Cortex-A15	 2.5
     44 C Cortex-A17	 2.13
     45 C Cortex-A53	 3.5
     46 
     47 C TODO
     48 C  * This is a trivial edit of the addmul_2 code.  Check for simplifications,
     49 C    and possible speedups to 2.0 c/l.
     50 
     51 define(`rp',`r0')
     52 define(`up',`r1')
     53 define(`n', `r2')
     54 define(`vp',`r3')
     55 
     56 define(`v0',`r6')
     57 define(`v1',`r7')
     58 define(`u0',`r3')
     59 define(`u1',`r9')
     60 
     61 define(`cya',`r8')
     62 define(`cyb',`r12')
     63 
     64 
     65 ASM_START()
     66 PROLOGUE(mpn_mul_2)
     67 	push	{ r4, r5, r6, r7, r8, r9 }
     68 
     69 	ldm	vp, { v0, v1 }
     70 	mov	cya, #0
     71 	mov	cyb, #0
     72 
     73 	tst	n, #1
     74 	beq	L(evn)
     75 L(odd):	mov	r5, #0
     76 	ldr	u0, [up, #0]
     77 	mov	r4, #0
     78 	tst	n, #2
     79 	beq	L(fi1)
     80 L(fi3):	sub	up, up, #12
     81 	sub	rp, rp, #16
     82 	b	L(lo3)
     83 L(fi1):	sub	n, n, #1
     84 	sub	up, up, #4
     85 	sub	rp, rp, #8
     86 	b	L(lo1)
     87 L(evn):	mov	r4, #0
     88 	ldr	u1, [up, #0]
     89 	mov	r5, #0
     90 	tst	n, #2
     91 	bne	L(fi2)
     92 L(fi0):	sub	up, up, #8
     93 	sub	rp, rp, #12
     94 	b	L(lo0)
     95 L(fi2):	subs	n, n, #2
     96 	sub	rp, rp, #4
     97 	bls	L(end)
     98 
     99 	ALIGN(16)
    100 L(top):	ldr	u0, [up, #4]
    101 	umaal	r4, cya, u1, v0
    102 	str	r4, [rp, #4]
    103 	mov	r4, #0
    104 	umaal	r5, cyb, u1, v1
    105 L(lo1):	ldr	u1, [up, #8]
    106 	umaal	r5, cya, u0, v0
    107 	str	r5, [rp, #8]
    108 	mov	r5, #0
    109 	umaal	r4, cyb, u0, v1
    110 L(lo0):	ldr	u0, [up, #12]
    111 	umaal	r4, cya, u1, v0
    112 	str	r4, [rp, #12]
    113 	mov	r4, #0
    114 	umaal	r5, cyb, u1, v1
    115 L(lo3):	ldr	u1, [up, #16]!
    116 	umaal	r5, cya, u0, v0
    117 	str	r5, [rp, #16]!
    118 	mov	r5, #0
    119 	umaal	r4, cyb, u0, v1
    120 	subs	n, n, #4
    121 	bhi	L(top)
    122 
    123 L(end):	umaal	r4, cya, u1, v0
    124 	ldr	u0, [up, #4]
    125 	umaal	r5, cyb, u1, v1
    126 	str	r4, [rp, #4]
    127 	umaal	r5, cya, u0, v0
    128 	umaal	cya, cyb, u0, v1
    129 	str	r5, [rp, #8]
    130 	str	cya, [rp, #12]
    131 	mov	r0, cyb
    132 
    133 	pop	{ r4, r5, r6, r7, r8, r9 }
    134 	bx	r14
    135 EPILOGUE()
    136