1 1.1 mrg dnl IA-64 mpn_bdiv_dbm1. 2 1.1 mrg 3 1.1.1.2 mrg dnl Contributed to the GNU project by Torbjorn Granlund. 4 1.1.1.2 mrg 5 1.1 mrg dnl Copyright 2008, 2009 Free Software Foundation, Inc. 6 1.1 mrg 7 1.1 mrg dnl This file is part of the GNU MP Library. 8 1.1.1.3 mrg dnl 9 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 1.1.1.3 mrg dnl it under the terms of either: 11 1.1.1.3 mrg dnl 12 1.1.1.3 mrg dnl * the GNU Lesser General Public License as published by the Free 13 1.1.1.3 mrg dnl Software Foundation; either version 3 of the License, or (at your 14 1.1.1.3 mrg dnl option) any later version. 15 1.1.1.3 mrg dnl 16 1.1.1.3 mrg dnl or 17 1.1.1.3 mrg dnl 18 1.1.1.3 mrg dnl * the GNU General Public License as published by the Free Software 19 1.1.1.3 mrg dnl Foundation; either version 2 of the License, or (at your option) any 20 1.1.1.3 mrg dnl later version. 21 1.1.1.3 mrg dnl 22 1.1.1.3 mrg dnl or both in parallel, as here. 23 1.1.1.3 mrg dnl 24 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 1.1.1.3 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 1.1.1.3 mrg dnl for more details. 28 1.1.1.3 mrg dnl 29 1.1.1.3 mrg dnl You should have received copies of the GNU General Public License and the 30 1.1.1.3 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 1.1.1.3 mrg dnl see https://www.gnu.org/licenses/. 32 1.1 mrg 33 1.1 mrg include(`../config.m4') 34 1.1 mrg 35 1.1 mrg C cycles/limb 36 1.1 mrg C Itanium: 4 37 1.1 mrg C Itanium 2: 2 38 1.1 mrg 39 1.1 mrg C TODO 40 1.1 mrg C * Optimize feed-in and wind-down code, both for speed and code size. 41 1.1 mrg 42 1.1 mrg C INPUT PARAMETERS 43 1.1 mrg define(`rp', `r32') 44 1.1 mrg define(`up', `r33') 45 1.1 mrg define(`n', `r34') 46 1.1 mrg define(`bd', `r35') 47 1.1 mrg 48 1.1 mrg ASM_START() 49 1.1 mrg PROLOGUE(mpn_bdiv_dbm1c) 50 1.1 mrg .prologue 51 1.1 mrg .save ar.lc, r2 52 1.1 mrg .body 53 1.1 mrg 54 1.1 mrg ifdef(`HAVE_ABI_32', 55 1.1 mrg ` addp4 rp = 0, rp C M I 56 1.1 mrg addp4 up = 0, up C M I 57 1.1 mrg zxt4 n = n C I 58 1.1 mrg ;; 59 1.1 mrg ') 60 1.1 mrg {.mmb 61 1.1 mrg mov r15 = r36 C M I 62 1.1 mrg ldf8 f9 = [up], 8 C M 63 1.1 mrg nop.b 0 C B 64 1.1 mrg } 65 1.1 mrg .Lcommon: 66 1.1 mrg {.mii 67 1.1 mrg adds r16 = -1, n C M I 68 1.1 mrg mov r2 = ar.lc C I0 69 1.1 mrg and r14 = 3, n C M I 70 1.1 mrg ;; 71 1.1 mrg } 72 1.1 mrg {.mii 73 1.1 mrg setf.sig f6 = bd C M2 M3 74 1.1 mrg shr.u r31 = r16, 2 C I0 75 1.1 mrg cmp.eq p10, p0 = 0, r14 C M I 76 1.1 mrg } 77 1.1 mrg {.mii 78 1.1 mrg nop.m 0 C M 79 1.1 mrg cmp.eq p11, p0 = 2, r14 C M I 80 1.1 mrg cmp.eq p12, p0 = 3, r14 C M I 81 1.1 mrg ;; 82 1.1 mrg } 83 1.1 mrg {.mii 84 1.1 mrg cmp.ne p6, p7 = r0, r0 C M I 85 1.1 mrg mov.i ar.lc = r31 C I0 86 1.1 mrg cmp.ne p8, p9 = r0, r0 C M I 87 1.1 mrg } 88 1.1 mrg {.bbb 89 1.1 mrg (p10) br.dptk .Lb00 C B 90 1.1 mrg (p11) br.dptk .Lb10 C B 91 1.1 mrg (p12) br.dptk .Lb11 C B 92 1.1 mrg ;; 93 1.1 mrg } 94 1.1 mrg 95 1.1 mrg .Lb01: br.cloop.dptk .grt1 96 1.1 mrg ;; 97 1.1 mrg xma.l f38 = f9, f6, f0 98 1.1 mrg xma.hu f39 = f9, f6, f0 99 1.1 mrg ;; 100 1.1 mrg getf.sig r26 = f38 101 1.1 mrg getf.sig r27 = f39 102 1.1 mrg br .Lcj1 103 1.1 mrg 104 1.1 mrg .grt1: ldf8 f10 = [r33], 8 105 1.1 mrg ;; 106 1.1 mrg ldf8 f11 = [r33], 8 107 1.1 mrg ;; 108 1.1 mrg ldf8 f12 = [r33], 8 109 1.1 mrg ;; 110 1.1 mrg xma.l f38 = f9, f6, f0 111 1.1 mrg xma.hu f39 = f9, f6, f0 112 1.1 mrg ;; 113 1.1 mrg ldf8 f13 = [r33], 8 114 1.1 mrg ;; 115 1.1 mrg xma.l f32 = f10, f6, f0 116 1.1 mrg xma.hu f33 = f10, f6, f0 117 1.1 mrg br.cloop.dptk .grt5 118 1.1 mrg 119 1.1 mrg ;; 120 1.1 mrg getf.sig r26 = f38 121 1.1 mrg xma.l f34 = f11, f6, f0 122 1.1 mrg xma.hu f35 = f11, f6, f0 123 1.1 mrg ;; 124 1.1 mrg getf.sig r27 = f39 125 1.1 mrg ;; 126 1.1 mrg getf.sig r20 = f32 127 1.1 mrg xma.l f36 = f12, f6, f0 128 1.1 mrg xma.hu f37 = f12, f6, f0 129 1.1 mrg ;; 130 1.1 mrg getf.sig r21 = f33 131 1.1 mrg ;; 132 1.1 mrg getf.sig r22 = f34 133 1.1 mrg xma.l f38 = f13, f6, f0 134 1.1 mrg xma.hu f39 = f13, f6, f0 135 1.1 mrg br .Lcj5 136 1.1 mrg 137 1.1 mrg .grt5: ldf8 f10 = [r33], 8 138 1.1 mrg ;; 139 1.1 mrg getf.sig r26 = f38 140 1.1 mrg xma.l f34 = f11, f6, f0 141 1.1 mrg xma.hu f35 = f11, f6, f0 142 1.1 mrg ;; 143 1.1 mrg getf.sig r27 = f39 144 1.1 mrg ldf8 f11 = [r33], 8 145 1.1 mrg ;; 146 1.1 mrg getf.sig r20 = f32 147 1.1 mrg xma.l f36 = f12, f6, f0 148 1.1 mrg xma.hu f37 = f12, f6, f0 149 1.1 mrg ;; 150 1.1 mrg getf.sig r21 = f33 151 1.1 mrg ldf8 f12 = [r33], 8 152 1.1 mrg ;; 153 1.1 mrg getf.sig r22 = f34 154 1.1 mrg xma.l f38 = f13, f6, f0 155 1.1 mrg xma.hu f39 = f13, f6, f0 156 1.1 mrg br .LL01 157 1.1 mrg 158 1.1 mrg .Lb10: ldf8 f13 = [r33], 8 159 1.1 mrg br.cloop.dptk .grt2 160 1.1 mrg ;; 161 1.1 mrg 162 1.1 mrg xma.l f36 = f9, f6, f0 163 1.1 mrg xma.hu f37 = f9, f6, f0 164 1.1 mrg ;; 165 1.1 mrg xma.l f38 = f13, f6, f0 166 1.1 mrg xma.hu f39 = f13, f6, f0 167 1.1 mrg ;; 168 1.1 mrg getf.sig r24 = f36 169 1.1 mrg ;; 170 1.1 mrg getf.sig r25 = f37 171 1.1 mrg ;; 172 1.1 mrg getf.sig r26 = f38 173 1.1 mrg ;; 174 1.1 mrg getf.sig r27 = f39 175 1.1 mrg br .Lcj2 176 1.1 mrg 177 1.1 mrg .grt2: ldf8 f10 = [r33], 8 178 1.1 mrg ;; 179 1.1 mrg ldf8 f11 = [r33], 8 180 1.1 mrg ;; 181 1.1 mrg xma.l f36 = f9, f6, f0 182 1.1 mrg xma.hu f37 = f9, f6, f0 183 1.1 mrg ;; 184 1.1 mrg ldf8 f12 = [r33], 8 185 1.1 mrg ;; 186 1.1 mrg xma.l f38 = f13, f6, f0 187 1.1 mrg xma.hu f39 = f13, f6, f0 188 1.1 mrg ;; 189 1.1 mrg ldf8 f13 = [r33], 8 190 1.1 mrg ;; 191 1.1 mrg getf.sig r24 = f36 192 1.1 mrg xma.l f32 = f10, f6, f0 193 1.1 mrg xma.hu f33 = f10, f6, f0 194 1.1 mrg br.cloop.dptk .grt6 195 1.1 mrg 196 1.1 mrg getf.sig r25 = f37 197 1.1 mrg ;; 198 1.1 mrg getf.sig r26 = f38 199 1.1 mrg xma.l f34 = f11, f6, f0 200 1.1 mrg xma.hu f35 = f11, f6, f0 201 1.1 mrg ;; 202 1.1 mrg getf.sig r27 = f39 203 1.1 mrg ;; 204 1.1 mrg getf.sig r20 = f32 205 1.1 mrg xma.l f36 = f12, f6, f0 206 1.1 mrg xma.hu f37 = f12, f6, f0 207 1.1 mrg br .Lcj6 208 1.1 mrg 209 1.1 mrg .grt6: getf.sig r25 = f37 210 1.1 mrg ldf8 f10 = [r33], 8 211 1.1 mrg ;; 212 1.1 mrg getf.sig r26 = f38 213 1.1 mrg xma.l f34 = f11, f6, f0 214 1.1 mrg xma.hu f35 = f11, f6, f0 215 1.1 mrg ;; 216 1.1 mrg getf.sig r27 = f39 217 1.1 mrg ldf8 f11 = [r33], 8 218 1.1 mrg ;; 219 1.1 mrg getf.sig r20 = f32 220 1.1 mrg xma.l f36 = f12, f6, f0 221 1.1 mrg xma.hu f37 = f12, f6, f0 222 1.1 mrg br .LL10 223 1.1 mrg 224 1.1 mrg 225 1.1 mrg .Lb11: ldf8 f12 = [r33], 8 226 1.1 mrg ;; 227 1.1 mrg ldf8 f13 = [r33], 8 228 1.1 mrg br.cloop.dptk .grt3 229 1.1 mrg ;; 230 1.1 mrg 231 1.1 mrg xma.l f34 = f9, f6, f0 232 1.1 mrg xma.hu f35 = f9, f6, f0 233 1.1 mrg ;; 234 1.1 mrg xma.l f36 = f12, f6, f0 235 1.1 mrg xma.hu f37 = f12, f6, f0 236 1.1 mrg ;; 237 1.1 mrg getf.sig r22 = f34 238 1.1 mrg xma.l f38 = f13, f6, f0 239 1.1 mrg xma.hu f39 = f13, f6, f0 240 1.1 mrg ;; 241 1.1 mrg getf.sig r23 = f35 242 1.1 mrg ;; 243 1.1 mrg getf.sig r24 = f36 244 1.1 mrg ;; 245 1.1 mrg getf.sig r25 = f37 246 1.1 mrg ;; 247 1.1 mrg getf.sig r26 = f38 248 1.1 mrg br .Lcj3 249 1.1 mrg 250 1.1 mrg .grt3: ldf8 f10 = [r33], 8 251 1.1 mrg ;; 252 1.1 mrg xma.l f34 = f9, f6, f0 253 1.1 mrg xma.hu f35 = f9, f6, f0 254 1.1 mrg ;; 255 1.1 mrg ldf8 f11 = [r33], 8 256 1.1 mrg ;; 257 1.1 mrg xma.l f36 = f12, f6, f0 258 1.1 mrg xma.hu f37 = f12, f6, f0 259 1.1 mrg ;; 260 1.1 mrg ldf8 f12 = [r33], 8 261 1.1 mrg ;; 262 1.1 mrg getf.sig r22 = f34 263 1.1 mrg xma.l f38 = f13, f6, f0 264 1.1 mrg xma.hu f39 = f13, f6, f0 265 1.1 mrg ;; 266 1.1 mrg getf.sig r23 = f35 267 1.1 mrg ldf8 f13 = [r33], 8 268 1.1 mrg ;; 269 1.1 mrg getf.sig r24 = f36 270 1.1 mrg xma.l f32 = f10, f6, f0 271 1.1 mrg xma.hu f33 = f10, f6, f0 272 1.1 mrg br.cloop.dptk .grt7 273 1.1 mrg 274 1.1 mrg getf.sig r25 = f37 275 1.1 mrg ;; 276 1.1 mrg getf.sig r26 = f38 277 1.1 mrg xma.l f34 = f11, f6, f0 278 1.1 mrg xma.hu f35 = f11, f6, f0 279 1.1 mrg br .Lcj7 280 1.1 mrg 281 1.1 mrg .grt7: getf.sig r25 = f37 282 1.1 mrg ldf8 f10 = [r33], 8 283 1.1 mrg ;; 284 1.1 mrg getf.sig r26 = f38 285 1.1 mrg xma.l f34 = f11, f6, f0 286 1.1 mrg xma.hu f35 = f11, f6, f0 287 1.1 mrg br .LL11 288 1.1 mrg 289 1.1 mrg 290 1.1 mrg .Lb00: ldf8 f11 = [r33], 8 291 1.1 mrg ;; 292 1.1 mrg ldf8 f12 = [r33], 8 293 1.1 mrg ;; 294 1.1 mrg ldf8 f13 = [r33], 8 295 1.1 mrg br.cloop.dptk .grt4 296 1.1 mrg ;; 297 1.1 mrg 298 1.1 mrg xma.l f32 = f9, f6, f0 299 1.1 mrg xma.hu f33 = f9, f6, f0 300 1.1 mrg ;; 301 1.1 mrg xma.l f34 = f11, f6, f0 302 1.1 mrg xma.hu f35 = f11, f6, f0 303 1.1 mrg ;; 304 1.1 mrg getf.sig r20 = f32 305 1.1 mrg xma.l f36 = f12, f6, f0 306 1.1 mrg xma.hu f37 = f12, f6, f0 307 1.1 mrg ;; 308 1.1 mrg getf.sig r21 = f33 309 1.1 mrg ;; 310 1.1 mrg getf.sig r22 = f34 311 1.1 mrg xma.l f38 = f13, f6, f0 312 1.1 mrg xma.hu f39 = f13, f6, f0 313 1.1 mrg ;; 314 1.1 mrg getf.sig r23 = f35 315 1.1 mrg ;; 316 1.1 mrg getf.sig r24 = f36 317 1.1 mrg br .Lcj4 318 1.1 mrg 319 1.1 mrg .grt4: xma.l f32 = f9, f6, f0 320 1.1 mrg xma.hu f33 = f9, f6, f0 321 1.1 mrg ;; 322 1.1 mrg ldf8 f10 = [r33], 8 323 1.1 mrg ;; 324 1.1 mrg xma.l f34 = f11, f6, f0 325 1.1 mrg xma.hu f35 = f11, f6, f0 326 1.1 mrg ;; 327 1.1 mrg ldf8 f11 = [r33], 8 328 1.1 mrg ;; 329 1.1 mrg getf.sig r20 = f32 330 1.1 mrg xma.l f36 = f12, f6, f0 331 1.1 mrg xma.hu f37 = f12, f6, f0 332 1.1 mrg ;; 333 1.1 mrg getf.sig r21 = f33 334 1.1 mrg ldf8 f12 = [r33], 8 335 1.1 mrg ;; 336 1.1 mrg getf.sig r22 = f34 337 1.1 mrg xma.l f38 = f13, f6, f0 338 1.1 mrg xma.hu f39 = f13, f6, f0 339 1.1 mrg ;; 340 1.1 mrg getf.sig r23 = f35 341 1.1 mrg ldf8 f13 = [r33], 8 342 1.1 mrg ;; 343 1.1 mrg getf.sig r24 = f36 344 1.1 mrg xma.l f32 = f10, f6, f0 345 1.1 mrg xma.hu f33 = f10, f6, f0 346 1.1 mrg br.cloop.dptk .LL00 347 1.1 mrg br .Lcj8 348 1.1 mrg 349 1.1 mrg C *** MAIN LOOP START *** 350 1.1 mrg ALIGN(32) 351 1.1 mrg .Ltop: 352 1.1 mrg .pred.rel "mutex",p6,p7 353 1.1 mrg C .mfi 354 1.1 mrg getf.sig r24 = f36 355 1.1 mrg xma.l f32 = f10, f6, f0 356 1.1 mrg (p6) sub r15 = r19, r27, 1 357 1.1 mrg C .mfi 358 1.1 mrg st8 [r32] = r19, 8 359 1.1 mrg xma.hu f33 = f10, f6, f0 360 1.1 mrg (p7) sub r15 = r19, r27 361 1.1 mrg ;; 362 1.1 mrg .LL00: 363 1.1 mrg C .mfi 364 1.1 mrg getf.sig r25 = f37 365 1.1 mrg nop.f 0 366 1.1 mrg cmp.ltu p6, p7 = r15, r20 367 1.1 mrg C .mib 368 1.1 mrg ldf8 f10 = [r33], 8 369 1.1 mrg sub r16 = r15, r20 370 1.1 mrg nop.b 0 371 1.1 mrg ;; 372 1.1 mrg 373 1.1 mrg C .mfi 374 1.1 mrg getf.sig r26 = f38 375 1.1 mrg xma.l f34 = f11, f6, f0 376 1.1 mrg (p6) sub r15 = r16, r21, 1 377 1.1 mrg C .mfi 378 1.1 mrg st8 [r32] = r16, 8 379 1.1 mrg xma.hu f35 = f11, f6, f0 380 1.1 mrg (p7) sub r15 = r16, r21 381 1.1 mrg ;; 382 1.1 mrg .LL11: 383 1.1 mrg C .mfi 384 1.1 mrg getf.sig r27 = f39 385 1.1 mrg nop.f 0 386 1.1 mrg cmp.ltu p6, p7 = r15, r22 387 1.1 mrg C .mib 388 1.1 mrg ldf8 f11 = [r33], 8 389 1.1 mrg sub r17 = r15, r22 390 1.1 mrg nop.b 0 391 1.1 mrg ;; 392 1.1 mrg 393 1.1 mrg C .mfi 394 1.1 mrg getf.sig r20 = f32 395 1.1 mrg xma.l f36 = f12, f6, f0 396 1.1 mrg (p6) sub r15 = r17, r23, 1 397 1.1 mrg C .mfi 398 1.1 mrg st8 [r32] = r17, 8 399 1.1 mrg xma.hu f37 = f12, f6, f0 400 1.1 mrg (p7) sub r15 = r17, r23 401 1.1 mrg ;; 402 1.1 mrg .LL10: 403 1.1 mrg C .mfi 404 1.1 mrg getf.sig r21 = f33 405 1.1 mrg nop.f 0 406 1.1 mrg cmp.ltu p6, p7 = r15, r24 407 1.1 mrg C .mib 408 1.1 mrg ldf8 f12 = [r33], 8 409 1.1 mrg sub r18 = r15, r24 410 1.1 mrg nop.b 0 411 1.1 mrg ;; 412 1.1 mrg 413 1.1 mrg C .mfi 414 1.1 mrg getf.sig r22 = f34 415 1.1 mrg xma.l f38 = f13, f6, f0 416 1.1 mrg (p6) sub r15 = r18, r25, 1 417 1.1 mrg C .mfi 418 1.1 mrg st8 [r32] = r18, 8 419 1.1 mrg xma.hu f39 = f13, f6, f0 420 1.1 mrg (p7) sub r15 = r18, r25 421 1.1 mrg ;; 422 1.1 mrg .LL01: 423 1.1 mrg C .mfi 424 1.1 mrg getf.sig r23 = f35 425 1.1 mrg nop.f 0 426 1.1 mrg cmp.ltu p6, p7 = r15, r26 427 1.1 mrg C .mib 428 1.1 mrg ldf8 f13 = [r33], 8 429 1.1 mrg sub r19 = r15, r26 430 1.1 mrg br.cloop.sptk.few .Ltop 431 1.1 mrg C *** MAIN LOOP END *** 432 1.1 mrg ;; 433 1.1 mrg 434 1.1 mrg getf.sig r24 = f36 435 1.1 mrg xma.l f32 = f10, f6, f0 436 1.1 mrg (p6) sub r15 = r19, r27, 1 437 1.1 mrg st8 [r32] = r19, 8 438 1.1 mrg xma.hu f33 = f10, f6, f0 439 1.1 mrg (p7) sub r15 = r19, r27 440 1.1 mrg ;; 441 1.1 mrg .Lcj8: getf.sig r25 = f37 442 1.1 mrg cmp.ltu p6, p7 = r15, r20 443 1.1 mrg sub r16 = r15, r20 444 1.1 mrg ;; 445 1.1 mrg getf.sig r26 = f38 446 1.1 mrg xma.l f34 = f11, f6, f0 447 1.1 mrg (p6) sub r15 = r16, r21, 1 448 1.1 mrg st8 [r32] = r16, 8 449 1.1 mrg xma.hu f35 = f11, f6, f0 450 1.1 mrg (p7) sub r15 = r16, r21 451 1.1 mrg ;; 452 1.1 mrg .Lcj7: getf.sig r27 = f39 453 1.1 mrg cmp.ltu p6, p7 = r15, r22 454 1.1 mrg sub r17 = r15, r22 455 1.1 mrg ;; 456 1.1 mrg getf.sig r20 = f32 457 1.1 mrg xma.l f36 = f12, f6, f0 458 1.1 mrg (p6) sub r15 = r17, r23, 1 459 1.1 mrg st8 [r32] = r17, 8 460 1.1 mrg xma.hu f37 = f12, f6, f0 461 1.1 mrg (p7) sub r15 = r17, r23 462 1.1 mrg ;; 463 1.1 mrg .Lcj6: getf.sig r21 = f33 464 1.1 mrg cmp.ltu p6, p7 = r15, r24 465 1.1 mrg sub r18 = r15, r24 466 1.1 mrg ;; 467 1.1 mrg getf.sig r22 = f34 468 1.1 mrg xma.l f38 = f13, f6, f0 469 1.1 mrg (p6) sub r15 = r18, r25, 1 470 1.1 mrg st8 [r32] = r18, 8 471 1.1 mrg xma.hu f39 = f13, f6, f0 472 1.1 mrg (p7) sub r15 = r18, r25 473 1.1 mrg ;; 474 1.1 mrg .Lcj5: getf.sig r23 = f35 475 1.1 mrg cmp.ltu p6, p7 = r15, r26 476 1.1 mrg sub r19 = r15, r26 477 1.1 mrg ;; 478 1.1 mrg getf.sig r24 = f36 479 1.1 mrg (p6) sub r15 = r19, r27, 1 480 1.1 mrg st8 [r32] = r19, 8 481 1.1 mrg (p7) sub r15 = r19, r27 482 1.1 mrg ;; 483 1.1 mrg .Lcj4: getf.sig r25 = f37 484 1.1 mrg cmp.ltu p6, p7 = r15, r20 485 1.1 mrg sub r16 = r15, r20 486 1.1 mrg ;; 487 1.1 mrg getf.sig r26 = f38 488 1.1 mrg (p6) sub r15 = r16, r21, 1 489 1.1 mrg st8 [r32] = r16, 8 490 1.1 mrg (p7) sub r15 = r16, r21 491 1.1 mrg ;; 492 1.1 mrg .Lcj3: getf.sig r27 = f39 493 1.1 mrg cmp.ltu p6, p7 = r15, r22 494 1.1 mrg sub r17 = r15, r22 495 1.1 mrg ;; 496 1.1 mrg (p6) sub r15 = r17, r23, 1 497 1.1 mrg st8 [r32] = r17, 8 498 1.1 mrg (p7) sub r15 = r17, r23 499 1.1 mrg ;; 500 1.1 mrg .Lcj2: cmp.ltu p6, p7 = r15, r24 501 1.1 mrg sub r18 = r15, r24 502 1.1 mrg ;; 503 1.1 mrg (p6) sub r15 = r18, r25, 1 504 1.1 mrg st8 [r32] = r18, 8 505 1.1 mrg (p7) sub r15 = r18, r25 506 1.1 mrg ;; 507 1.1 mrg .Lcj1: cmp.ltu p6, p7 = r15, r26 508 1.1 mrg sub r19 = r15, r26 509 1.1 mrg ;; 510 1.1 mrg (p6) sub r8 = r19, r27, 1 511 1.1 mrg st8 [r32] = r19 512 1.1 mrg (p7) sub r8 = r19, r27 513 1.1 mrg mov ar.lc = r2 514 1.1 mrg br.ret.sptk.many b0 515 1.1 mrg EPILOGUE() 516 1.1 mrg ASM_END() 517