Home | History | Annotate | Line # | Download | only in alpha
      1 dnl  Alpha mpn_lshift -- Shift a number left.
      2 
      3 dnl  Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 C      cycles/limb
     34 C EV4:     ?
     35 C EV5:     3.25
     36 C EV6:     1.75
     37 
     38 C  INPUT PARAMETERS
     39 C  rp	r16
     40 C  up	r17
     41 C  n	r18
     42 C  cnt	r19
     43 
     44 
     45 ASM_START()
     46 PROLOGUE(mpn_lshift)
     47 	s8addq	r18,r17,r17	C make r17 point at end of s1
     48 	ldq	r4,-8(r17)	C load first limb
     49 	subq	r31,r19,r20
     50 	s8addq	r18,r16,r16	C make r16 point at end of RES
     51 	subq	r18,1,r18
     52 	and	r18,4-1,r28	C number of limbs in first loop
     53 	srl	r4,r20,r0	C compute function result
     54 
     55 	beq	r28,L(L0)
     56 	subq	r18,r28,r18
     57 
     58 	ALIGN(8)
     59 L(top0):
     60 	ldq	r3,-16(r17)
     61 	subq	r16,8,r16
     62 	sll	r4,r19,r5
     63 	subq	r17,8,r17
     64 	subq	r28,1,r28
     65 	srl	r3,r20,r6
     66 	bis	r3,r3,r4
     67 	bis	r5,r6,r8
     68 	stq	r8,0(r16)
     69 	bne	r28,L(top0)
     70 
     71 L(L0):	sll	r4,r19,r24
     72 	beq	r18,L(end)
     73 C warm up phase 1
     74 	ldq	r1,-16(r17)
     75 	subq	r18,4,r18
     76 	ldq	r2,-24(r17)
     77 	ldq	r3,-32(r17)
     78 	ldq	r4,-40(r17)
     79 C warm up phase 2
     80 	srl	r1,r20,r7
     81 	sll	r1,r19,r21
     82 	srl	r2,r20,r8
     83 	beq	r18,L(end1)
     84 	ldq	r1,-48(r17)
     85 	sll	r2,r19,r22
     86 	ldq	r2,-56(r17)
     87 	srl	r3,r20,r5
     88 	bis	r7,r24,r7
     89 	sll	r3,r19,r23
     90 	bis	r8,r21,r8
     91 	srl	r4,r20,r6
     92 	ldq	r3,-64(r17)
     93 	sll	r4,r19,r24
     94 	ldq	r4,-72(r17)
     95 	subq	r18,4,r18
     96 	beq	r18,L(end2)
     97 	ALIGN(16)
     98 C main loop
     99 L(top):	stq	r7,-8(r16)
    100 	bis	r5,r22,r5
    101 	stq	r8,-16(r16)
    102 	bis	r6,r23,r6
    103 
    104 	srl	r1,r20,r7
    105 	subq	r18,4,r18
    106 	sll	r1,r19,r21
    107 	unop	C ldq	r31,-96(r17)
    108 
    109 	srl	r2,r20,r8
    110 	ldq	r1,-80(r17)
    111 	sll	r2,r19,r22
    112 	ldq	r2,-88(r17)
    113 
    114 	stq	r5,-24(r16)
    115 	bis	r7,r24,r7
    116 	stq	r6,-32(r16)
    117 	bis	r8,r21,r8
    118 
    119 	srl	r3,r20,r5
    120 	unop	C ldq	r31,-96(r17)
    121 	sll	r3,r19,r23
    122 	subq	r16,32,r16
    123 
    124 	srl	r4,r20,r6
    125 	ldq	r3,-96(r17)
    126 	sll	r4,r19,r24
    127 	ldq	r4,-104(r17)
    128 
    129 	subq	r17,32,r17
    130 	bne	r18,L(top)
    131 C cool down phase 2/1
    132 L(end2):
    133 	stq	r7,-8(r16)
    134 	bis	r5,r22,r5
    135 	stq	r8,-16(r16)
    136 	bis	r6,r23,r6
    137 	srl	r1,r20,r7
    138 	sll	r1,r19,r21
    139 	srl	r2,r20,r8
    140 	sll	r2,r19,r22
    141 	stq	r5,-24(r16)
    142 	bis	r7,r24,r7
    143 	stq	r6,-32(r16)
    144 	bis	r8,r21,r8
    145 	srl	r3,r20,r5
    146 	sll	r3,r19,r23
    147 	srl	r4,r20,r6
    148 	sll	r4,r19,r24
    149 C cool down phase 2/2
    150 	stq	r7,-40(r16)
    151 	bis	r5,r22,r5
    152 	stq	r8,-48(r16)
    153 	bis	r6,r23,r6
    154 	stq	r5,-56(r16)
    155 	stq	r6,-64(r16)
    156 C cool down phase 2/3
    157 	stq	r24,-72(r16)
    158 	ret	r31,(r26),1
    159 
    160 C cool down phase 1/1
    161 L(end1):
    162 	sll	r2,r19,r22
    163 	srl	r3,r20,r5
    164 	bis	r7,r24,r7
    165 	sll	r3,r19,r23
    166 	bis	r8,r21,r8
    167 	srl	r4,r20,r6
    168 	sll	r4,r19,r24
    169 C cool down phase 1/2
    170 	stq	r7,-8(r16)
    171 	bis	r5,r22,r5
    172 	stq	r8,-16(r16)
    173 	bis	r6,r23,r6
    174 	stq	r5,-24(r16)
    175 	stq	r6,-32(r16)
    176 	stq	r24,-40(r16)
    177 	ret	r31,(r26),1
    178 
    179 L(end):	stq	r24,-8(r16)
    180 	ret	r31,(r26),1
    181 EPILOGUE(mpn_lshift)
    182 ASM_END()
    183