Home | History | Annotate | Line # | Download | only in v8
addmul_1.asm revision 1.1.1.2
      1 dnl  SPARC v8 mpn_addmul_1 -- Multiply a limb vector with a limb and
      2 dnl  add the result to a second limb vector.
      3 
      4 dnl  Copyright 1992-1995, 2000 Free Software Foundation, Inc.
      5 
      6 dnl  This file is part of the GNU MP Library.
      7 dnl
      8 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      9 dnl  it under the terms of either:
     10 dnl
     11 dnl    * the GNU Lesser General Public License as published by the Free
     12 dnl      Software Foundation; either version 3 of the License, or (at your
     13 dnl      option) any later version.
     14 dnl
     15 dnl  or
     16 dnl
     17 dnl    * the GNU General Public License as published by the Free Software
     18 dnl      Foundation; either version 2 of the License, or (at your option) any
     19 dnl      later version.
     20 dnl
     21 dnl  or both in parallel, as here.
     22 dnl
     23 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     24 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26 dnl  for more details.
     27 dnl
     28 dnl  You should have received copies of the GNU General Public License and the
     29 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     30 dnl  see https://www.gnu.org/licenses/.
     31 
     32 
     33 include(`../config.m4')
     34 
     35 C INPUT PARAMETERS
     36 C res_ptr	o0
     37 C s1_ptr	o1
     38 C size		o2
     39 C s2_limb	o3
     40 
     41 ASM_START()
     42 PROLOGUE(mpn_addmul_1)
     43 	orcc	%g0,%g0,%g2
     44 	ld	[%o1+0],%o4	C 1
     45 
     46 	sll	%o2,4,%g1
     47 	and	%g1,(4-1)<<4,%g1
     48 ifdef(`PIC',
     49 `	mov	%o7,%g4		C Save return address register
     50 0:	call	1f
     51 	add	%o7,L(1)-0b,%g3
     52 1:	mov	%g4,%o7		C Restore return address register
     53 ',
     54 `	sethi	%hi(L(1)),%g3
     55 	or	%g3,%lo(L(1)),%g3
     56 ')
     57 	jmp	%g3+%g1
     58 	nop
     59 L(1):
     60 L(L00):	add	%o0,-4,%o0
     61 	b	L(loop00)	C 4, 8, 12, ...
     62 	add	%o1,-4,%o1
     63 	nop
     64 L(L01):	b	L(loop01)	C 1, 5, 9, ...
     65 	nop
     66 	nop
     67 	nop
     68 L(L10):	add	%o0,-12,%o0	C 2, 6, 10, ...
     69 	b	L(loop10)
     70 	add	%o1,4,%o1
     71 	nop
     72 L(L11):	add	%o0,-8,%o0	C 3, 7, 11, ...
     73 	b	L(loop11)
     74 	add	%o1,-8,%o1
     75 	nop
     76 
     77 L(loop):
     78 	addcc	%g3,%g2,%g3	C 1
     79 	ld	[%o1+4],%o4	C 2
     80 	rd	%y,%g2		C 1
     81 	addx	%g0,%g2,%g2
     82 	ld	[%o0+0],%g1	C 2
     83 	addcc	%g1,%g3,%g3
     84 	st	%g3,[%o0+0]	C 1
     85 L(loop00):
     86 	umul	%o4,%o3,%g3	C 2
     87 	ld	[%o0+4],%g1	C 2
     88 	addxcc	%g3,%g2,%g3	C 2
     89 	ld	[%o1+8],%o4	C 3
     90 	rd	%y,%g2		C 2
     91 	addx	%g0,%g2,%g2
     92 	nop
     93 	addcc	%g1,%g3,%g3
     94 	st	%g3,[%o0+4]	C 2
     95 L(loop11):
     96 	umul	%o4,%o3,%g3	C 3
     97 	addxcc	%g3,%g2,%g3	C 3
     98 	ld	[%o1+12],%o4	C 4
     99 	rd	%y,%g2		C 3
    100 	add	%o1,16,%o1
    101 	addx	%g0,%g2,%g2
    102 	ld	[%o0+8],%g1	C 2
    103 	addcc	%g1,%g3,%g3
    104 	st	%g3,[%o0+8]	C 3
    105 L(loop10):
    106 	umul	%o4,%o3,%g3	C 4
    107 	addxcc	%g3,%g2,%g3	C 4
    108 	ld	[%o1+0],%o4	C 1
    109 	rd	%y,%g2		C 4
    110 	addx	%g0,%g2,%g2
    111 	ld	[%o0+12],%g1	C 2
    112 	addcc	%g1,%g3,%g3
    113 	st	%g3,[%o0+12]	C 4
    114 	add	%o0,16,%o0
    115 	addx	%g0,%g2,%g2
    116 L(loop01):
    117 	addcc	%o2,-4,%o2
    118 	bg	L(loop)
    119 	umul	%o4,%o3,%g3	C 1
    120 
    121 	addcc	%g3,%g2,%g3	C 4
    122 	rd	%y,%g2		C 4
    123 	addx	%g0,%g2,%g2
    124 	ld	[%o0+0],%g1	C 2
    125 	addcc	%g1,%g3,%g3
    126 	st	%g3,[%o0+0]	C 4
    127 	addx	%g0,%g2,%o0
    128 
    129 	retl
    130 	 nop
    131 EPILOGUE(mpn_addmul_1)
    132