Home | History | Annotate | Line # | Download | only in ia64
      1      1.1  mrg dnl  IA-64 mpn_copyi -- copy limb vector, incrementing.
      2      1.1  mrg 
      3  1.1.1.2  mrg dnl  Contributed to the GNU project by Torbjorn Granlund.
      4  1.1.1.2  mrg 
      5      1.1  mrg dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
      6      1.1  mrg 
      7      1.1  mrg dnl  This file is part of the GNU MP Library.
      8  1.1.1.3  mrg dnl
      9      1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10  1.1.1.3  mrg dnl  it under the terms of either:
     11  1.1.1.3  mrg dnl
     12  1.1.1.3  mrg dnl    * the GNU Lesser General Public License as published by the Free
     13  1.1.1.3  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     14  1.1.1.3  mrg dnl      option) any later version.
     15  1.1.1.3  mrg dnl
     16  1.1.1.3  mrg dnl  or
     17  1.1.1.3  mrg dnl
     18  1.1.1.3  mrg dnl    * the GNU General Public License as published by the Free Software
     19  1.1.1.3  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     20  1.1.1.3  mrg dnl      later version.
     21  1.1.1.3  mrg dnl
     22  1.1.1.3  mrg dnl  or both in parallel, as here.
     23  1.1.1.3  mrg dnl
     24      1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25      1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26  1.1.1.3  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27  1.1.1.3  mrg dnl  for more details.
     28  1.1.1.3  mrg dnl
     29  1.1.1.3  mrg dnl  You should have received copies of the GNU General Public License and the
     30  1.1.1.3  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31  1.1.1.3  mrg dnl  see https://www.gnu.org/licenses/.
     32      1.1  mrg 
     33      1.1  mrg include(`../config.m4')
     34      1.1  mrg 
     35      1.1  mrg C         cycles/limb
     36      1.1  mrg C Itanium:    1
     37      1.1  mrg C Itanium 2:  0.5
     38      1.1  mrg 
     39      1.1  mrg C INPUT PARAMETERS
     40      1.1  mrg C rp = r32
     41      1.1  mrg C sp = r33
     42      1.1  mrg C n = r34
     43      1.1  mrg 
     44      1.1  mrg ASM_START()
     45      1.1  mrg PROLOGUE(mpn_copyi)
     46      1.1  mrg 	.prologue
     47      1.1  mrg 	.save ar.lc, r2
     48      1.1  mrg 	.body
     49      1.1  mrg ifdef(`HAVE_ABI_32',
     50      1.1  mrg `	addp4		r32 = 0, r32
     51      1.1  mrg 	addp4		r33 = 0, r33
     52      1.1  mrg 	sxt4		r34 = r34
     53      1.1  mrg 	;;
     54      1.1  mrg ')
     55      1.1  mrg {.mmi
     56      1.1  mrg 	nop		0
     57      1.1  mrg 	nop		0
     58      1.1  mrg 	mov.i		r2 = ar.lc
     59      1.1  mrg }
     60      1.1  mrg {.mmi
     61      1.1  mrg 	and		r14 = 3, r34
     62      1.1  mrg 	cmp.ge		p14, p15 = 3, r34
     63      1.1  mrg 	add		r34 = -4, r34
     64      1.1  mrg 	;;
     65      1.1  mrg }
     66      1.1  mrg {.mmi
     67      1.1  mrg 	cmp.eq		p8, p0 = 1, r14
     68      1.1  mrg 	cmp.eq		p10, p0 = 2, r14
     69      1.1  mrg 	cmp.eq		p12, p0 = 3, r14
     70      1.1  mrg }
     71      1.1  mrg {.bbb
     72      1.1  mrg   (p8)	br.dptk		.Lb01
     73      1.1  mrg   (p10)	br.dptk		.Lb10
     74      1.1  mrg   (p12)	br.dptk		.Lb11
     75      1.1  mrg }
     76      1.1  mrg 
     77      1.1  mrg .Lb00:	C  n = 0, 4, 8, 12, ...
     78      1.1  mrg   (p14)	br.dptk		.Ls00
     79      1.1  mrg 	;;
     80      1.1  mrg 	add		r21 = 8, r33
     81      1.1  mrg 	ld8		r16 = [r33], 16
     82      1.1  mrg 	shr		r15 = r34, 2
     83      1.1  mrg 	;;
     84      1.1  mrg 	ld8		r17 = [r21], 16
     85      1.1  mrg 	mov.i		ar.lc = r15
     86      1.1  mrg 	ld8		r18 = [r33], 16
     87      1.1  mrg 	add		r20 = 8, r32
     88      1.1  mrg 	;;
     89      1.1  mrg 	ld8		r19 = [r21], 16
     90      1.1  mrg 	br.cloop.dptk	.Loop
     91      1.1  mrg 	;;
     92      1.1  mrg 	br.sptk		.Lend
     93      1.1  mrg 	;;
     94      1.1  mrg 
     95      1.1  mrg .Lb01:	C  n = 1, 5, 9, 13, ...
     96      1.1  mrg 	add		r21 = 0, r33
     97      1.1  mrg 	add		r20 = 0, r32
     98      1.1  mrg 	add		r33 = 8, r33
     99      1.1  mrg 	add		r32 = 8, r32
    100      1.1  mrg 	;;
    101      1.1  mrg 	ld8		r19 = [r21], 16
    102      1.1  mrg 	shr		r15 = r34, 2
    103      1.1  mrg   (p14)	br.dptk		.Ls01
    104      1.1  mrg 	;;
    105      1.1  mrg 	ld8		r16 = [r33], 16
    106      1.1  mrg 	mov.i		ar.lc = r15
    107      1.1  mrg 	;;
    108      1.1  mrg 	ld8		r17 = [r21], 16
    109      1.1  mrg 	ld8		r18 = [r33], 16
    110      1.1  mrg 	br.sptk		.Li01
    111      1.1  mrg 	;;
    112      1.1  mrg 
    113      1.1  mrg .Lb10:	C  n = 2,6, 10, 14, ...
    114      1.1  mrg 	add		r21 = 8, r33
    115      1.1  mrg 	add		r20 = 8, r32
    116      1.1  mrg 	ld8		r18 = [r33], 16
    117      1.1  mrg 	shr		r15 = r34, 2
    118      1.1  mrg 	;;
    119      1.1  mrg 	ld8		r19 = [r21], 16
    120      1.1  mrg 	mov.i		ar.lc = r15
    121      1.1  mrg   (p14)	br.dptk		.Ls10
    122      1.1  mrg 	;;
    123      1.1  mrg 	ld8		r16 = [r33], 16
    124      1.1  mrg 	ld8		r17 = [r21], 16
    125      1.1  mrg 	br.sptk		.Li10
    126      1.1  mrg 	;;
    127      1.1  mrg 
    128      1.1  mrg .Lb11:	C  n = 3, 7, 11, 15, ...
    129      1.1  mrg 	add		r21 = 0, r33
    130      1.1  mrg 	add		r20 = 0, r32
    131      1.1  mrg 	add		r33 = 8, r33
    132      1.1  mrg 	add		r32 = 8, r32
    133      1.1  mrg 	;;
    134      1.1  mrg 	ld8		r17 = [r21], 16
    135      1.1  mrg 	shr		r15 = r34, 2
    136      1.1  mrg 	;;
    137      1.1  mrg 	ld8		r18 = [r33], 16
    138      1.1  mrg 	mov.i		ar.lc = r15
    139      1.1  mrg 	ld8		r19 = [r21], 16
    140      1.1  mrg   (p14)	br.dptk		.Ls11
    141      1.1  mrg 	;;
    142      1.1  mrg 	ld8		r16 = [r33], 16
    143      1.1  mrg 	br.sptk		.Li11
    144      1.1  mrg 	;;
    145      1.1  mrg 
    146      1.1  mrg 	ALIGN(32)
    147      1.1  mrg .Loop:
    148      1.1  mrg .Li00:
    149      1.1  mrg {.mmb
    150      1.1  mrg 	st8		[r32] = r16, 16
    151      1.1  mrg 	ld8		r16 = [r33], 16
    152      1.1  mrg 	nop.b		0
    153      1.1  mrg }
    154      1.1  mrg .Li11:
    155      1.1  mrg {.mmb
    156      1.1  mrg 	st8		[r20] = r17, 16
    157      1.1  mrg 	ld8		r17 = [r21], 16
    158      1.1  mrg 	nop.b		0
    159      1.1  mrg 	;;
    160      1.1  mrg }
    161      1.1  mrg .Li10:
    162      1.1  mrg {.mmb
    163      1.1  mrg 	st8		[r32] = r18, 16
    164      1.1  mrg 	ld8		r18 = [r33], 16
    165      1.1  mrg 	nop.b		0
    166      1.1  mrg }
    167      1.1  mrg .Li01:
    168      1.1  mrg {.mmb
    169      1.1  mrg 	st8		[r20] = r19, 16
    170      1.1  mrg 	ld8		r19 = [r21], 16
    171      1.1  mrg 	br.cloop.dptk	.Loop
    172      1.1  mrg 	;;
    173      1.1  mrg }
    174      1.1  mrg .Lend:	st8		[r32] = r16, 16
    175      1.1  mrg .Ls11:	st8		[r20] = r17, 16
    176      1.1  mrg 	;;
    177      1.1  mrg .Ls10:	st8		[r32] = r18, 16
    178      1.1  mrg .Ls01:	st8		[r20] = r19, 16
    179      1.1  mrg .Ls00:	mov.i		ar.lc = r2
    180      1.1  mrg 	br.ret.sptk.many b0
    181      1.1  mrg EPILOGUE()
    182      1.1  mrg ASM_END()
    183