Home | History | Annotate | Line # | Download | only in v8
addmul_1.asm revision 1.1.1.1
      1 dnl  SPARC v8 mpn_addmul_1 -- Multiply a limb vector with a limb and
      2 dnl  add the result to a second limb vector.
      3 
      4 dnl  Copyright 1992, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
      5 
      6 dnl  This file is part of the GNU MP Library.
      7 
      8 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      9 dnl  it under the terms of the GNU Lesser General Public License as published
     10 dnl  by the Free Software Foundation; either version 3 of the License, or (at
     11 dnl  your option) any later version.
     12 
     13 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     14 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16 dnl  License for more details.
     17 
     18 dnl  You should have received a copy of the GNU Lesser General Public License
     19 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     20 
     21 
     22 include(`../config.m4')
     23 
     24 C INPUT PARAMETERS
     25 C res_ptr	o0
     26 C s1_ptr	o1
     27 C size		o2
     28 C s2_limb	o3
     29 
     30 ASM_START()
     31 PROLOGUE(mpn_addmul_1)
     32 	orcc	%g0,%g0,%g2
     33 	ld	[%o1+0],%o4	C 1
     34 
     35 	sll	%o2,4,%g1
     36 	and	%g1,(4-1)<<4,%g1
     37 ifdef(`PIC',
     38 `	mov	%o7,%g4		C Save return address register
     39 0:	call	1f
     40 	add	%o7,L(1)-0b,%g3
     41 1:	mov	%g4,%o7		C Restore return address register
     42 ',
     43 `	sethi	%hi(L(1)),%g3
     44 	or	%g3,%lo(L(1)),%g3
     45 ')
     46 	jmp	%g3+%g1
     47 	nop
     48 L(1):
     49 L(L00):	add	%o0,-4,%o0
     50 	b	L(loop00)	C 4, 8, 12, ...
     51 	add	%o1,-4,%o1
     52 	nop
     53 L(L01):	b	L(loop01)	C 1, 5, 9, ...
     54 	nop
     55 	nop
     56 	nop
     57 L(L10):	add	%o0,-12,%o0	C 2, 6, 10, ...
     58 	b	L(loop10)
     59 	add	%o1,4,%o1
     60 	nop
     61 L(L11):	add	%o0,-8,%o0	C 3, 7, 11, ...
     62 	b	L(loop11)
     63 	add	%o1,-8,%o1
     64 	nop
     65 
     66 L(loop):
     67 	addcc	%g3,%g2,%g3	C 1
     68 	ld	[%o1+4],%o4	C 2
     69 	rd	%y,%g2		C 1
     70 	addx	%g0,%g2,%g2
     71 	ld	[%o0+0],%g1	C 2
     72 	addcc	%g1,%g3,%g3
     73 	st	%g3,[%o0+0]	C 1
     74 L(loop00):
     75 	umul	%o4,%o3,%g3	C 2
     76 	ld	[%o0+4],%g1	C 2
     77 	addxcc	%g3,%g2,%g3	C 2
     78 	ld	[%o1+8],%o4	C 3
     79 	rd	%y,%g2		C 2
     80 	addx	%g0,%g2,%g2
     81 	nop
     82 	addcc	%g1,%g3,%g3
     83 	st	%g3,[%o0+4]	C 2
     84 L(loop11):
     85 	umul	%o4,%o3,%g3	C 3
     86 	addxcc	%g3,%g2,%g3	C 3
     87 	ld	[%o1+12],%o4	C 4
     88 	rd	%y,%g2		C 3
     89 	add	%o1,16,%o1
     90 	addx	%g0,%g2,%g2
     91 	ld	[%o0+8],%g1	C 2
     92 	addcc	%g1,%g3,%g3
     93 	st	%g3,[%o0+8]	C 3
     94 L(loop10):
     95 	umul	%o4,%o3,%g3	C 4
     96 	addxcc	%g3,%g2,%g3	C 4
     97 	ld	[%o1+0],%o4	C 1
     98 	rd	%y,%g2		C 4
     99 	addx	%g0,%g2,%g2
    100 	ld	[%o0+12],%g1	C 2
    101 	addcc	%g1,%g3,%g3
    102 	st	%g3,[%o0+12]	C 4
    103 	add	%o0,16,%o0
    104 	addx	%g0,%g2,%g2
    105 L(loop01):
    106 	addcc	%o2,-4,%o2
    107 	bg	L(loop)
    108 	umul	%o4,%o3,%g3	C 1
    109 
    110 	addcc	%g3,%g2,%g3	C 4
    111 	rd	%y,%g2		C 4
    112 	addx	%g0,%g2,%g2
    113 	ld	[%o0+0],%g1	C 2
    114 	addcc	%g1,%g3,%g3
    115 	st	%g3,[%o0+0]	C 4
    116 	addx	%g0,%g2,%o0
    117 
    118 	retl
    119 	 nop
    120 EPILOGUE(mpn_addmul_1)
    121