copyi.asm revision 1.1.1.1 1 dnl IA-64 mpn_copyi -- copy limb vector, incrementing.
2
3 dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
4
5 dnl This file is part of the GNU MP Library.
6
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of the GNU Lesser General Public License as published
9 dnl by the Free Software Foundation; either version 3 of the License, or (at
10 dnl your option) any later version.
11
12 dnl The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 dnl License for more details.
16
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
19
20 include(`../config.m4')
21
22 C cycles/limb
23 C Itanium: 1
24 C Itanium 2: 0.5
25
26 C INPUT PARAMETERS
27 C rp = r32
28 C sp = r33
29 C n = r34
30
31 ASM_START()
32 PROLOGUE(mpn_copyi)
33 .prologue
34 .save ar.lc, r2
35 .body
36 ifdef(`HAVE_ABI_32',
37 ` addp4 r32 = 0, r32
38 addp4 r33 = 0, r33
39 sxt4 r34 = r34
40 ;;
41 ')
42 {.mmi
43 nop 0
44 nop 0
45 mov.i r2 = ar.lc
46 }
47 {.mmi
48 and r14 = 3, r34
49 cmp.ge p14, p15 = 3, r34
50 add r34 = -4, r34
51 ;;
52 }
53 {.mmi
54 cmp.eq p8, p0 = 1, r14
55 cmp.eq p10, p0 = 2, r14
56 cmp.eq p12, p0 = 3, r14
57 }
58 {.bbb
59 (p8) br.dptk .Lb01
60 (p10) br.dptk .Lb10
61 (p12) br.dptk .Lb11
62 }
63
64 .Lb00: C n = 0, 4, 8, 12, ...
65 (p14) br.dptk .Ls00
66 ;;
67 add r21 = 8, r33
68 ld8 r16 = [r33], 16
69 shr r15 = r34, 2
70 ;;
71 ld8 r17 = [r21], 16
72 mov.i ar.lc = r15
73 ld8 r18 = [r33], 16
74 add r20 = 8, r32
75 ;;
76 ld8 r19 = [r21], 16
77 br.cloop.dptk .Loop
78 ;;
79 br.sptk .Lend
80 ;;
81
82 .Lb01: C n = 1, 5, 9, 13, ...
83 add r21 = 0, r33
84 add r20 = 0, r32
85 add r33 = 8, r33
86 add r32 = 8, r32
87 ;;
88 ld8 r19 = [r21], 16
89 shr r15 = r34, 2
90 (p14) br.dptk .Ls01
91 ;;
92 ld8 r16 = [r33], 16
93 mov.i ar.lc = r15
94 ;;
95 ld8 r17 = [r21], 16
96 ld8 r18 = [r33], 16
97 br.sptk .Li01
98 ;;
99
100 .Lb10: C n = 2,6, 10, 14, ...
101 add r21 = 8, r33
102 add r20 = 8, r32
103 ld8 r18 = [r33], 16
104 shr r15 = r34, 2
105 ;;
106 ld8 r19 = [r21], 16
107 mov.i ar.lc = r15
108 (p14) br.dptk .Ls10
109 ;;
110 ld8 r16 = [r33], 16
111 ld8 r17 = [r21], 16
112 br.sptk .Li10
113 ;;
114
115 .Lb11: C n = 3, 7, 11, 15, ...
116 add r21 = 0, r33
117 add r20 = 0, r32
118 add r33 = 8, r33
119 add r32 = 8, r32
120 ;;
121 ld8 r17 = [r21], 16
122 shr r15 = r34, 2
123 ;;
124 ld8 r18 = [r33], 16
125 mov.i ar.lc = r15
126 ld8 r19 = [r21], 16
127 (p14) br.dptk .Ls11
128 ;;
129 ld8 r16 = [r33], 16
130 br.sptk .Li11
131 ;;
132
133 ALIGN(32)
134 .Loop:
135 .Li00:
136 {.mmb
137 st8 [r32] = r16, 16
138 ld8 r16 = [r33], 16
139 nop.b 0
140 }
141 .Li11:
142 {.mmb
143 st8 [r20] = r17, 16
144 ld8 r17 = [r21], 16
145 nop.b 0
146 ;;
147 }
148 .Li10:
149 {.mmb
150 st8 [r32] = r18, 16
151 ld8 r18 = [r33], 16
152 nop.b 0
153 }
154 .Li01:
155 {.mmb
156 st8 [r20] = r19, 16
157 ld8 r19 = [r21], 16
158 br.cloop.dptk .Loop
159 ;;
160 }
161 .Lend: st8 [r32] = r16, 16
162 .Ls11: st8 [r20] = r17, 16
163 ;;
164 .Ls10: st8 [r32] = r18, 16
165 .Ls01: st8 [r20] = r19, 16
166 .Ls00: mov.i ar.lc = r2
167 br.ret.sptk.many b0
168 EPILOGUE()
169 ASM_END()
170