Home | History | Annotate | Line # | Download | only in ia64
copyi.asm revision 1.1.1.1
      1 dnl  IA-64 mpn_copyi -- copy limb vector, incrementing.
      2 
      3 dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of the GNU Lesser General Public License as published
      9 dnl  by the Free Software Foundation; either version 3 of the License, or (at
     10 dnl  your option) any later version.
     11 
     12 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     13 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     15 dnl  License for more details.
     16 
     17 dnl  You should have received a copy of the GNU Lesser General Public License
     18 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     19 
     20 include(`../config.m4')
     21 
     22 C         cycles/limb
     23 C Itanium:    1
     24 C Itanium 2:  0.5
     25 
     26 C INPUT PARAMETERS
     27 C rp = r32
     28 C sp = r33
     29 C n = r34
     30 
     31 ASM_START()
     32 PROLOGUE(mpn_copyi)
     33 	.prologue
     34 	.save ar.lc, r2
     35 	.body
     36 ifdef(`HAVE_ABI_32',
     37 `	addp4		r32 = 0, r32
     38 	addp4		r33 = 0, r33
     39 	sxt4		r34 = r34
     40 	;;
     41 ')
     42 {.mmi
     43 	nop		0
     44 	nop		0
     45 	mov.i		r2 = ar.lc
     46 }
     47 {.mmi
     48 	and		r14 = 3, r34
     49 	cmp.ge		p14, p15 = 3, r34
     50 	add		r34 = -4, r34
     51 	;;
     52 }
     53 {.mmi
     54 	cmp.eq		p8, p0 = 1, r14
     55 	cmp.eq		p10, p0 = 2, r14
     56 	cmp.eq		p12, p0 = 3, r14
     57 }
     58 {.bbb
     59   (p8)	br.dptk		.Lb01
     60   (p10)	br.dptk		.Lb10
     61   (p12)	br.dptk		.Lb11
     62 }
     63 
     64 .Lb00:	C  n = 0, 4, 8, 12, ...
     65   (p14)	br.dptk		.Ls00
     66 	;;
     67 	add		r21 = 8, r33
     68 	ld8		r16 = [r33], 16
     69 	shr		r15 = r34, 2
     70 	;;
     71 	ld8		r17 = [r21], 16
     72 	mov.i		ar.lc = r15
     73 	ld8		r18 = [r33], 16
     74 	add		r20 = 8, r32
     75 	;;
     76 	ld8		r19 = [r21], 16
     77 	br.cloop.dptk	.Loop
     78 	;;
     79 	br.sptk		.Lend
     80 	;;
     81 
     82 .Lb01:	C  n = 1, 5, 9, 13, ...
     83 	add		r21 = 0, r33
     84 	add		r20 = 0, r32
     85 	add		r33 = 8, r33
     86 	add		r32 = 8, r32
     87 	;;
     88 	ld8		r19 = [r21], 16
     89 	shr		r15 = r34, 2
     90   (p14)	br.dptk		.Ls01
     91 	;;
     92 	ld8		r16 = [r33], 16
     93 	mov.i		ar.lc = r15
     94 	;;
     95 	ld8		r17 = [r21], 16
     96 	ld8		r18 = [r33], 16
     97 	br.sptk		.Li01
     98 	;;
     99 
    100 .Lb10:	C  n = 2,6, 10, 14, ...
    101 	add		r21 = 8, r33
    102 	add		r20 = 8, r32
    103 	ld8		r18 = [r33], 16
    104 	shr		r15 = r34, 2
    105 	;;
    106 	ld8		r19 = [r21], 16
    107 	mov.i		ar.lc = r15
    108   (p14)	br.dptk		.Ls10
    109 	;;
    110 	ld8		r16 = [r33], 16
    111 	ld8		r17 = [r21], 16
    112 	br.sptk		.Li10
    113 	;;
    114 
    115 .Lb11:	C  n = 3, 7, 11, 15, ...
    116 	add		r21 = 0, r33
    117 	add		r20 = 0, r32
    118 	add		r33 = 8, r33
    119 	add		r32 = 8, r32
    120 	;;
    121 	ld8		r17 = [r21], 16
    122 	shr		r15 = r34, 2
    123 	;;
    124 	ld8		r18 = [r33], 16
    125 	mov.i		ar.lc = r15
    126 	ld8		r19 = [r21], 16
    127   (p14)	br.dptk		.Ls11
    128 	;;
    129 	ld8		r16 = [r33], 16
    130 	br.sptk		.Li11
    131 	;;
    132 
    133 	ALIGN(32)
    134 .Loop:
    135 .Li00:
    136 {.mmb
    137 	st8		[r32] = r16, 16
    138 	ld8		r16 = [r33], 16
    139 	nop.b		0
    140 }
    141 .Li11:
    142 {.mmb
    143 	st8		[r20] = r17, 16
    144 	ld8		r17 = [r21], 16
    145 	nop.b		0
    146 	;;
    147 }
    148 .Li10:
    149 {.mmb
    150 	st8		[r32] = r18, 16
    151 	ld8		r18 = [r33], 16
    152 	nop.b		0
    153 }
    154 .Li01:
    155 {.mmb
    156 	st8		[r20] = r19, 16
    157 	ld8		r19 = [r21], 16
    158 	br.cloop.dptk	.Loop
    159 	;;
    160 }
    161 .Lend:	st8		[r32] = r16, 16
    162 .Ls11:	st8		[r20] = r17, 16
    163 	;;
    164 .Ls10:	st8		[r32] = r18, 16
    165 .Ls01:	st8		[r20] = r19, 16
    166 .Ls00:	mov.i		ar.lc = r2
    167 	br.ret.sptk.many b0
    168 EPILOGUE()
    169 ASM_END()
    170