1 1.1 mrg dnl IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n, 2 1.1 mrg dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations. 3 1.1 mrg 4 1.1.1.2 mrg dnl Contributed to the GNU project by Torbjorn Granlund. 5 1.1.1.2 mrg 6 1.1.1.3 mrg dnl Copyright 2003-2005 Free Software Foundation, Inc. 7 1.1.1.3 mrg 8 1.1 mrg dnl This file is part of the GNU MP Library. 9 1.1 mrg dnl 10 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 11 1.1.1.3 mrg dnl it under the terms of either: 12 1.1.1.3 mrg dnl 13 1.1.1.3 mrg dnl * the GNU Lesser General Public License as published by the Free 14 1.1.1.3 mrg dnl Software Foundation; either version 3 of the License, or (at your 15 1.1.1.3 mrg dnl option) any later version. 16 1.1.1.3 mrg dnl 17 1.1.1.3 mrg dnl or 18 1.1.1.3 mrg dnl 19 1.1.1.3 mrg dnl * the GNU General Public License as published by the Free Software 20 1.1.1.3 mrg dnl Foundation; either version 2 of the License, or (at your option) any 21 1.1.1.3 mrg dnl later version. 22 1.1.1.3 mrg dnl 23 1.1.1.3 mrg dnl or both in parallel, as here. 24 1.1 mrg dnl 25 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 26 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27 1.1.1.3 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28 1.1.1.3 mrg dnl for more details. 29 1.1 mrg dnl 30 1.1.1.3 mrg dnl You should have received copies of the GNU General Public License and the 31 1.1.1.3 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 32 1.1.1.3 mrg dnl see https://www.gnu.org/licenses/. 33 1.1 mrg 34 1.1 mrg include(`../config.m4') 35 1.1 mrg 36 1.1 mrg C cycles/limb 37 1.1 mrg C Itanium: 2 38 1.1 mrg C Itanium 2: 1 39 1.1 mrg 40 1.1 mrg C TODO 41 1.1 mrg C * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in 42 1.1 mrg C wind-down code). 43 1.1 mrg 44 1.1 mrg C INPUT PARAMETERS 45 1.1 mrg define(`rp', `r32') 46 1.1 mrg define(`up', `r33') 47 1.1 mrg define(`vp', `r34') 48 1.1 mrg define(`n', `r35') 49 1.1 mrg 50 1.1 mrg ifdef(`OPERATION_and_n', 51 1.1 mrg ` define(`func',`mpn_and_n') 52 1.1 mrg define(`logop', `and $1 = $2, $3') 53 1.1 mrg define(`notormov', `mov $1 = $2')') 54 1.1 mrg ifdef(`OPERATION_andn_n', 55 1.1 mrg ` define(`func',`mpn_andn_n') 56 1.1 mrg define(`logop', `andcm $1 = $2, $3') 57 1.1 mrg define(`notormov', `mov $1 = $2')') 58 1.1 mrg ifdef(`OPERATION_nand_n', 59 1.1 mrg ` define(`func',`mpn_nand_n') 60 1.1 mrg define(`logop', `and $1 = $2, $3') 61 1.1 mrg define(`notormov', `sub $1 = -1, $2')') 62 1.1 mrg ifdef(`OPERATION_ior_n', 63 1.1 mrg ` define(`func',`mpn_ior_n') 64 1.1 mrg define(`logop', `or $1 = $2, $3') 65 1.1 mrg define(`notormov', `mov $1 = $2')') 66 1.1 mrg ifdef(`OPERATION_iorn_n', 67 1.1 mrg ` define(`func',`mpn_iorn_n') 68 1.1 mrg define(`logop', `andcm $1 = $3, $2') 69 1.1 mrg define(`notormov', `sub $1 = -1, $2')') 70 1.1 mrg ifdef(`OPERATION_nior_n', 71 1.1 mrg ` define(`func',`mpn_nior_n') 72 1.1 mrg define(`logop', `or $1 = $2, $3') 73 1.1 mrg define(`notormov', `sub $1 = -1, $2')') 74 1.1 mrg ifdef(`OPERATION_xor_n', 75 1.1 mrg ` define(`func',`mpn_xor_n') 76 1.1 mrg define(`logop', `xor $1 = $2, $3') 77 1.1 mrg define(`notormov', `mov $1 = $2')') 78 1.1 mrg ifdef(`OPERATION_xnor_n', 79 1.1 mrg ` define(`func',`mpn_xnor_n') 80 1.1 mrg define(`logop', `xor $1 = $2, $3') 81 1.1 mrg define(`notormov', `sub $1 = -1, $2')') 82 1.1 mrg 83 1.1 mrg MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) 84 1.1 mrg 85 1.1 mrg ASM_START() 86 1.1 mrg PROLOGUE(func) 87 1.1 mrg .prologue 88 1.1 mrg .save ar.lc, r2 89 1.1 mrg .body 90 1.1 mrg ifdef(`HAVE_ABI_32', 91 1.1 mrg ` addp4 rp = 0, rp C M I 92 1.1 mrg addp4 up = 0, up C M I 93 1.1 mrg addp4 vp = 0, vp C M I 94 1.1.1.3 mrg nop.m 0 95 1.1.1.3 mrg nop.m 0 96 1.1 mrg zxt4 n = n C I 97 1.1 mrg ;; 98 1.1 mrg ') 99 1.1 mrg {.mmi 100 1.1 mrg ld8 r10 = [up], 8 C M 101 1.1 mrg ld8 r11 = [vp], 8 C M 102 1.1 mrg mov.i r2 = ar.lc C I0 103 1.1 mrg } 104 1.1 mrg {.mmi 105 1.1 mrg and r14 = 3, n C M I 106 1.1 mrg cmp.lt p15, p14 = 4, n C M I 107 1.1 mrg shr.u n = n, 2 C I0 108 1.1 mrg ;; 109 1.1 mrg } 110 1.1 mrg {.mmi 111 1.1 mrg cmp.eq p6, p0 = 1, r14 C M I 112 1.1 mrg cmp.eq p7, p0 = 2, r14 C M I 113 1.1 mrg cmp.eq p8, p0 = 3, r14 C M I 114 1.1 mrg } 115 1.1 mrg {.bbb 116 1.1 mrg (p6) br.dptk .Lb01 C B 117 1.1 mrg (p7) br.dptk .Lb10 C B 118 1.1 mrg (p8) br.dptk .Lb11 C B 119 1.1 mrg } 120 1.1 mrg 121 1.1 mrg .Lb00: ld8 r17 = [up], 8 C M 122 1.1 mrg ld8 r21 = [vp], 8 C M 123 1.1 mrg add n = -2, n C M I 124 1.1 mrg ;; 125 1.1 mrg ld8 r18 = [up], 8 C M 126 1.1 mrg ld8 r22 = [vp], 8 C M 127 1.1 mrg ;; 128 1.1 mrg ld8 r19 = [up], 8 C M 129 1.1 mrg ld8 r23 = [vp], 8 C M 130 1.1 mrg (p15) br.cond.dpnt .grt4 C B 131 1.1 mrg 132 1.1 mrg logop( r14, r10, r11) C M I 133 1.1 mrg ;; 134 1.1 mrg logop( r15, r17, r21) C M I 135 1.1 mrg notormov( r8, r14) C M I 136 1.1 mrg br .Lcj4 C B 137 1.1 mrg 138 1.1 mrg .grt4: logop( r14, r10, r11) C M I 139 1.1 mrg ld8 r16 = [up], 8 C M 140 1.1 mrg ld8 r20 = [vp], 8 C M 141 1.1 mrg ;; 142 1.1 mrg logop( r15, r17, r21) C M I 143 1.1 mrg ld8 r17 = [up], 8 C M 144 1.1 mrg mov.i ar.lc = n C I0 145 1.1 mrg notormov( r8, r14) C M I 146 1.1 mrg ld8 r21 = [vp], 8 C M 147 1.1 mrg br .LL00 C B 148 1.1 mrg 149 1.1 mrg .Lb01: add n = -1, n C M I 150 1.1 mrg logop( r15, r10, r11) C M I 151 1.1 mrg (p15) br.cond.dpnt .grt1 C B 152 1.1 mrg ;; 153 1.1 mrg 154 1.1 mrg notormov( r9, r15) C M I 155 1.1 mrg br .Lcj1 C B 156 1.1 mrg 157 1.1 mrg .grt1: ld8 r16 = [up], 8 C M 158 1.1 mrg ld8 r20 = [vp], 8 C M 159 1.1 mrg ;; 160 1.1 mrg ld8 r17 = [up], 8 C M 161 1.1 mrg ld8 r21 = [vp], 8 C M 162 1.1 mrg mov.i ar.lc = n C I0 163 1.1 mrg ;; 164 1.1 mrg ld8 r18 = [up], 8 C M 165 1.1 mrg ld8 r22 = [vp], 8 C M 166 1.1 mrg ;; 167 1.1 mrg ld8 r19 = [up], 8 C M 168 1.1 mrg ld8 r23 = [vp], 8 C M 169 1.1 mrg br.cloop.dptk .grt5 C B 170 1.1 mrg ;; 171 1.1 mrg 172 1.1 mrg logop( r14, r16, r20) C M I 173 1.1 mrg notormov( r9, r15) C M I 174 1.1 mrg br .Lcj5 C B 175 1.1 mrg 176 1.1 mrg .grt5: logop( r14, r16, r20) C M I 177 1.1 mrg ld8 r16 = [up], 8 C M 178 1.1 mrg notormov( r9, r15) C M I 179 1.1 mrg ld8 r20 = [vp], 8 C M 180 1.1 mrg br .LL01 C B 181 1.1 mrg 182 1.1 mrg .Lb10: ld8 r19 = [up], 8 C M 183 1.1 mrg ld8 r23 = [vp], 8 C M 184 1.1 mrg (p15) br.cond.dpnt .grt2 C B 185 1.1 mrg 186 1.1 mrg logop( r14, r10, r11) C M I 187 1.1 mrg ;; 188 1.1 mrg logop( r15, r19, r23) C M I 189 1.1 mrg notormov( r8, r14) C M I 190 1.1 mrg br .Lcj2 C B 191 1.1 mrg 192 1.1 mrg .grt2: ld8 r16 = [up], 8 C M 193 1.1 mrg ld8 r20 = [vp], 8 C M 194 1.1 mrg add n = -1, n C M I 195 1.1 mrg ;; 196 1.1 mrg ld8 r17 = [up], 8 C M 197 1.1 mrg ld8 r21 = [vp], 8 C M 198 1.1 mrg logop( r14, r10, r11) C M I 199 1.1 mrg ;; 200 1.1 mrg ld8 r18 = [up], 8 C M 201 1.1 mrg ld8 r22 = [vp], 8 C M 202 1.1 mrg mov.i ar.lc = n C I0 203 1.1 mrg ;; 204 1.1 mrg logop( r15, r19, r23) C M I 205 1.1 mrg ld8 r19 = [up], 8 C M 206 1.1 mrg notormov( r8, r14) C M I 207 1.1 mrg ld8 r23 = [vp], 8 C M 208 1.1 mrg br.cloop.dptk .Loop C B 209 1.1 mrg br .Lcj6 C B 210 1.1 mrg 211 1.1 mrg .Lb11: ld8 r18 = [up], 8 C M 212 1.1 mrg ld8 r22 = [vp], 8 C M 213 1.1 mrg add n = -1, n C M I 214 1.1 mrg ;; 215 1.1 mrg ld8 r19 = [up], 8 C M 216 1.1 mrg ld8 r23 = [vp], 8 C M 217 1.1 mrg logop( r15, r10, r11) C M I 218 1.1 mrg (p15) br.cond.dpnt .grt3 C B 219 1.1 mrg ;; 220 1.1 mrg 221 1.1 mrg logop( r14, r18, r22) C M I 222 1.1 mrg notormov( r9, r15) C M I 223 1.1 mrg br .Lcj3 C B 224 1.1 mrg 225 1.1 mrg .grt3: ld8 r16 = [up], 8 C M 226 1.1 mrg ld8 r20 = [vp], 8 C M 227 1.1 mrg ;; 228 1.1 mrg ld8 r17 = [up], 8 C M 229 1.1 mrg ld8 r21 = [vp], 8 C M 230 1.1 mrg mov.i ar.lc = n C I0 231 1.1 mrg ;; 232 1.1 mrg logop( r14, r18, r22) C M I 233 1.1 mrg ld8 r18 = [up], 8 C M 234 1.1 mrg notormov( r9, r15) C M I 235 1.1 mrg ld8 r22 = [vp], 8 C M 236 1.1 mrg br .LL11 C B 237 1.1 mrg 238 1.1 mrg C *** MAIN LOOP START *** 239 1.1 mrg ALIGN(32) 240 1.1 mrg .Loop: st8 [rp] = r8, 8 C M 241 1.1 mrg logop( r14, r16, r20) C M I 242 1.1 mrg notormov( r9, r15) C M I 243 1.1 mrg ld8 r16 = [up], 8 C M 244 1.1 mrg ld8 r20 = [vp], 8 C M 245 1.1 mrg nop.b 0 246 1.1 mrg ;; 247 1.1 mrg .LL01: st8 [rp] = r9, 8 C M 248 1.1 mrg logop( r15, r17, r21) C M I 249 1.1 mrg notormov( r8, r14) C M I 250 1.1 mrg ld8 r17 = [up], 8 C M 251 1.1 mrg ld8 r21 = [vp], 8 C M 252 1.1 mrg nop.b 0 253 1.1 mrg ;; 254 1.1 mrg .LL00: st8 [rp] = r8, 8 C M 255 1.1 mrg logop( r14, r18, r22) C M I 256 1.1 mrg notormov( r9, r15) C M I 257 1.1 mrg ld8 r18 = [up], 8 C M 258 1.1 mrg ld8 r22 = [vp], 8 C M 259 1.1 mrg nop.b 0 260 1.1 mrg ;; 261 1.1 mrg .LL11: st8 [rp] = r9, 8 C M 262 1.1 mrg logop( r15, r19, r23) C M I 263 1.1 mrg notormov( r8, r14) C M I 264 1.1 mrg ld8 r19 = [up], 8 C M 265 1.1 mrg ld8 r23 = [vp], 8 C M 266 1.1 mrg br.cloop.dptk .Loop ;; C B 267 1.1 mrg C *** MAIN LOOP END *** 268 1.1 mrg 269 1.1 mrg .Lcj6: st8 [rp] = r8, 8 C M 270 1.1 mrg logop( r14, r16, r20) C M I 271 1.1 mrg notormov( r9, r15) C M I 272 1.1 mrg ;; 273 1.1 mrg .Lcj5: st8 [rp] = r9, 8 C M 274 1.1 mrg logop( r15, r17, r21) C M I 275 1.1 mrg notormov( r8, r14) C M I 276 1.1 mrg ;; 277 1.1 mrg .Lcj4: st8 [rp] = r8, 8 C M 278 1.1 mrg logop( r14, r18, r22) C M I 279 1.1 mrg notormov( r9, r15) C M I 280 1.1 mrg ;; 281 1.1 mrg .Lcj3: st8 [rp] = r9, 8 C M 282 1.1 mrg logop( r15, r19, r23) C M I 283 1.1 mrg notormov( r8, r14) C M I 284 1.1 mrg ;; 285 1.1 mrg .Lcj2: st8 [rp] = r8, 8 C M 286 1.1 mrg notormov( r9, r15) C M I 287 1.1 mrg ;; 288 1.1 mrg .Lcj1: st8 [rp] = r9, 8 C M 289 1.1 mrg mov.i ar.lc = r2 C I0 290 1.1 mrg br.ret.sptk.many b0 C B 291 1.1 mrg EPILOGUE() 292 1.1 mrg ASM_END() 293