1 1.1 mrg dnl IA-64 mpn_copyi -- copy limb vector, incrementing. 2 1.1 mrg 3 1.1.1.2 mrg dnl Contributed to the GNU project by Torbjorn Granlund. 4 1.1.1.2 mrg 5 1.1 mrg dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc. 6 1.1 mrg 7 1.1 mrg dnl This file is part of the GNU MP Library. 8 1.1.1.3 mrg dnl 9 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 1.1.1.3 mrg dnl it under the terms of either: 11 1.1.1.3 mrg dnl 12 1.1.1.3 mrg dnl * the GNU Lesser General Public License as published by the Free 13 1.1.1.3 mrg dnl Software Foundation; either version 3 of the License, or (at your 14 1.1.1.3 mrg dnl option) any later version. 15 1.1.1.3 mrg dnl 16 1.1.1.3 mrg dnl or 17 1.1.1.3 mrg dnl 18 1.1.1.3 mrg dnl * the GNU General Public License as published by the Free Software 19 1.1.1.3 mrg dnl Foundation; either version 2 of the License, or (at your option) any 20 1.1.1.3 mrg dnl later version. 21 1.1.1.3 mrg dnl 22 1.1.1.3 mrg dnl or both in parallel, as here. 23 1.1.1.3 mrg dnl 24 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 1.1.1.3 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 1.1.1.3 mrg dnl for more details. 28 1.1.1.3 mrg dnl 29 1.1.1.3 mrg dnl You should have received copies of the GNU General Public License and the 30 1.1.1.3 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 1.1.1.3 mrg dnl see https://www.gnu.org/licenses/. 32 1.1 mrg 33 1.1 mrg include(`../config.m4') 34 1.1 mrg 35 1.1 mrg C cycles/limb 36 1.1 mrg C Itanium: 1 37 1.1 mrg C Itanium 2: 0.5 38 1.1 mrg 39 1.1 mrg C INPUT PARAMETERS 40 1.1 mrg C rp = r32 41 1.1 mrg C sp = r33 42 1.1 mrg C n = r34 43 1.1 mrg 44 1.1 mrg ASM_START() 45 1.1 mrg PROLOGUE(mpn_copyi) 46 1.1 mrg .prologue 47 1.1 mrg .save ar.lc, r2 48 1.1 mrg .body 49 1.1 mrg ifdef(`HAVE_ABI_32', 50 1.1 mrg ` addp4 r32 = 0, r32 51 1.1 mrg addp4 r33 = 0, r33 52 1.1 mrg sxt4 r34 = r34 53 1.1 mrg ;; 54 1.1 mrg ') 55 1.1 mrg {.mmi 56 1.1 mrg nop 0 57 1.1 mrg nop 0 58 1.1 mrg mov.i r2 = ar.lc 59 1.1 mrg } 60 1.1 mrg {.mmi 61 1.1 mrg and r14 = 3, r34 62 1.1 mrg cmp.ge p14, p15 = 3, r34 63 1.1 mrg add r34 = -4, r34 64 1.1 mrg ;; 65 1.1 mrg } 66 1.1 mrg {.mmi 67 1.1 mrg cmp.eq p8, p0 = 1, r14 68 1.1 mrg cmp.eq p10, p0 = 2, r14 69 1.1 mrg cmp.eq p12, p0 = 3, r14 70 1.1 mrg } 71 1.1 mrg {.bbb 72 1.1 mrg (p8) br.dptk .Lb01 73 1.1 mrg (p10) br.dptk .Lb10 74 1.1 mrg (p12) br.dptk .Lb11 75 1.1 mrg } 76 1.1 mrg 77 1.1 mrg .Lb00: C n = 0, 4, 8, 12, ... 78 1.1 mrg (p14) br.dptk .Ls00 79 1.1 mrg ;; 80 1.1 mrg add r21 = 8, r33 81 1.1 mrg ld8 r16 = [r33], 16 82 1.1 mrg shr r15 = r34, 2 83 1.1 mrg ;; 84 1.1 mrg ld8 r17 = [r21], 16 85 1.1 mrg mov.i ar.lc = r15 86 1.1 mrg ld8 r18 = [r33], 16 87 1.1 mrg add r20 = 8, r32 88 1.1 mrg ;; 89 1.1 mrg ld8 r19 = [r21], 16 90 1.1 mrg br.cloop.dptk .Loop 91 1.1 mrg ;; 92 1.1 mrg br.sptk .Lend 93 1.1 mrg ;; 94 1.1 mrg 95 1.1 mrg .Lb01: C n = 1, 5, 9, 13, ... 96 1.1 mrg add r21 = 0, r33 97 1.1 mrg add r20 = 0, r32 98 1.1 mrg add r33 = 8, r33 99 1.1 mrg add r32 = 8, r32 100 1.1 mrg ;; 101 1.1 mrg ld8 r19 = [r21], 16 102 1.1 mrg shr r15 = r34, 2 103 1.1 mrg (p14) br.dptk .Ls01 104 1.1 mrg ;; 105 1.1 mrg ld8 r16 = [r33], 16 106 1.1 mrg mov.i ar.lc = r15 107 1.1 mrg ;; 108 1.1 mrg ld8 r17 = [r21], 16 109 1.1 mrg ld8 r18 = [r33], 16 110 1.1 mrg br.sptk .Li01 111 1.1 mrg ;; 112 1.1 mrg 113 1.1 mrg .Lb10: C n = 2,6, 10, 14, ... 114 1.1 mrg add r21 = 8, r33 115 1.1 mrg add r20 = 8, r32 116 1.1 mrg ld8 r18 = [r33], 16 117 1.1 mrg shr r15 = r34, 2 118 1.1 mrg ;; 119 1.1 mrg ld8 r19 = [r21], 16 120 1.1 mrg mov.i ar.lc = r15 121 1.1 mrg (p14) br.dptk .Ls10 122 1.1 mrg ;; 123 1.1 mrg ld8 r16 = [r33], 16 124 1.1 mrg ld8 r17 = [r21], 16 125 1.1 mrg br.sptk .Li10 126 1.1 mrg ;; 127 1.1 mrg 128 1.1 mrg .Lb11: C n = 3, 7, 11, 15, ... 129 1.1 mrg add r21 = 0, r33 130 1.1 mrg add r20 = 0, r32 131 1.1 mrg add r33 = 8, r33 132 1.1 mrg add r32 = 8, r32 133 1.1 mrg ;; 134 1.1 mrg ld8 r17 = [r21], 16 135 1.1 mrg shr r15 = r34, 2 136 1.1 mrg ;; 137 1.1 mrg ld8 r18 = [r33], 16 138 1.1 mrg mov.i ar.lc = r15 139 1.1 mrg ld8 r19 = [r21], 16 140 1.1 mrg (p14) br.dptk .Ls11 141 1.1 mrg ;; 142 1.1 mrg ld8 r16 = [r33], 16 143 1.1 mrg br.sptk .Li11 144 1.1 mrg ;; 145 1.1 mrg 146 1.1 mrg ALIGN(32) 147 1.1 mrg .Loop: 148 1.1 mrg .Li00: 149 1.1 mrg {.mmb 150 1.1 mrg st8 [r32] = r16, 16 151 1.1 mrg ld8 r16 = [r33], 16 152 1.1 mrg nop.b 0 153 1.1 mrg } 154 1.1 mrg .Li11: 155 1.1 mrg {.mmb 156 1.1 mrg st8 [r20] = r17, 16 157 1.1 mrg ld8 r17 = [r21], 16 158 1.1 mrg nop.b 0 159 1.1 mrg ;; 160 1.1 mrg } 161 1.1 mrg .Li10: 162 1.1 mrg {.mmb 163 1.1 mrg st8 [r32] = r18, 16 164 1.1 mrg ld8 r18 = [r33], 16 165 1.1 mrg nop.b 0 166 1.1 mrg } 167 1.1 mrg .Li01: 168 1.1 mrg {.mmb 169 1.1 mrg st8 [r20] = r19, 16 170 1.1 mrg ld8 r19 = [r21], 16 171 1.1 mrg br.cloop.dptk .Loop 172 1.1 mrg ;; 173 1.1 mrg } 174 1.1 mrg .Lend: st8 [r32] = r16, 16 175 1.1 mrg .Ls11: st8 [r20] = r17, 16 176 1.1 mrg ;; 177 1.1 mrg .Ls10: st8 [r32] = r18, 16 178 1.1 mrg .Ls01: st8 [r20] = r19, 16 179 1.1 mrg .Ls00: mov.i ar.lc = r2 180 1.1 mrg br.ret.sptk.many b0 181 1.1 mrg EPILOGUE() 182 1.1 mrg ASM_END() 183