1 1.1 mrg dnl HP-PA 2.0 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and 2 1.1 mrg dnl subtract the result from a second limb vector. 3 1.1 mrg 4 1.1.1.2 mrg dnl Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc. 5 1.1 mrg 6 1.1 mrg dnl This file is part of the GNU MP Library. 7 1.1.1.2 mrg dnl 8 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 1.1.1.2 mrg dnl it under the terms of either: 10 1.1.1.2 mrg dnl 11 1.1.1.2 mrg dnl * the GNU Lesser General Public License as published by the Free 12 1.1.1.2 mrg dnl Software Foundation; either version 3 of the License, or (at your 13 1.1.1.2 mrg dnl option) any later version. 14 1.1.1.2 mrg dnl 15 1.1.1.2 mrg dnl or 16 1.1.1.2 mrg dnl 17 1.1.1.2 mrg dnl * the GNU General Public License as published by the Free Software 18 1.1.1.2 mrg dnl Foundation; either version 2 of the License, or (at your option) any 19 1.1.1.2 mrg dnl later version. 20 1.1.1.2 mrg dnl 21 1.1.1.2 mrg dnl or both in parallel, as here. 22 1.1.1.2 mrg dnl 23 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 1.1.1.2 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 1.1.1.2 mrg dnl for more details. 27 1.1.1.2 mrg dnl 28 1.1.1.2 mrg dnl You should have received copies of the GNU General Public License and the 29 1.1.1.2 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 1.1.1.2 mrg dnl see https://www.gnu.org/licenses/. 31 1.1 mrg 32 1.1 mrg include(`../config.m4') 33 1.1 mrg 34 1.1 mrg C cycles/limb 35 1.1 mrg C 8000,8200: 7 36 1.1 mrg C 8500,8600,8700: 6.5 37 1.1 mrg 38 1.1 mrg C The feed-in and wind-down code has not yet been scheduled. Many cycles 39 1.1 mrg C could be saved there per call. 40 1.1 mrg 41 1.1 mrg C DESCRIPTION: 42 1.1 mrg C The main loop "BIG" is 4-way unrolled, mainly to allow 43 1.1 mrg C effective use of ADD,DC. Delays in moving data via the cache from the FP 44 1.1 mrg C registers to the IU registers, have demanded a deep software pipeline, and 45 1.1 mrg C a lot of stack slots for partial products in flight. 46 1.1 mrg C 47 1.1 mrg C CODE STRUCTURE: 48 1.1 mrg C save-some-registers 49 1.1 mrg C do 0, 1, 2, or 3 limbs 50 1.1 mrg C if done, restore-some-regs and return 51 1.1 mrg C save-many-regs 52 1.1 mrg C do 4, 8, ... limb 53 1.1 mrg C restore-all-regs 54 1.1 mrg 55 1.1 mrg C STACK LAYOUT: 56 1.1 mrg C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the 57 1.1 mrg C slots marked FREE, as well as some slots in the caller's "frame marker". 58 1.1 mrg C 59 1.1 mrg C -00 <- r30 60 1.1 mrg C -08 FREE 61 1.1 mrg C -10 tmp 62 1.1 mrg C -18 tmp 63 1.1 mrg C -20 tmp 64 1.1 mrg C -28 tmp 65 1.1 mrg C -30 tmp 66 1.1 mrg C -38 tmp 67 1.1 mrg C -40 tmp 68 1.1 mrg C -48 tmp 69 1.1 mrg C -50 tmp 70 1.1 mrg C -58 tmp 71 1.1 mrg C -60 tmp 72 1.1 mrg C -68 tmp 73 1.1 mrg C -70 tmp 74 1.1 mrg C -78 tmp 75 1.1 mrg C -80 tmp 76 1.1 mrg C -88 tmp 77 1.1 mrg C -90 FREE 78 1.1 mrg C -98 FREE 79 1.1 mrg C -a0 FREE 80 1.1 mrg C -a8 FREE 81 1.1 mrg C -b0 r13 82 1.1 mrg C -b8 r12 83 1.1 mrg C -c0 r11 84 1.1 mrg C -c8 r10 85 1.1 mrg C -d0 r8 86 1.1 mrg C -d8 r8 87 1.1 mrg C -e0 r7 88 1.1 mrg C -e8 r6 89 1.1 mrg C -f0 r5 90 1.1 mrg C -f8 r4 91 1.1 mrg C -100 r3 92 1.1 mrg C Previous frame: 93 1.1 mrg C [unused area] 94 1.1 mrg C -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here. 95 1.1 mrg 96 1.1 mrg 97 1.1 mrg include(`../config.m4') 98 1.1 mrg 99 1.1 mrg C INPUT PARAMETERS: 100 1.1 mrg define(`rp',`%r26') C 101 1.1 mrg define(`up',`%r25') C 102 1.1 mrg define(`n',`%r24') C 103 1.1 mrg define(`vlimb',`%r23') C 104 1.1 mrg 105 1.1 mrg define(`climb',`%r23') C 106 1.1 mrg 107 1.1 mrg ifdef(`HAVE_ABI_2_0w', 108 1.1 mrg ` .level 2.0w 109 1.1 mrg ',` .level 2.0 110 1.1 mrg ') 111 1.1 mrg PROLOGUE(mpn_submul_1) 112 1.1 mrg 113 1.1 mrg ifdef(`HAVE_ABI_2_0w', 114 1.1 mrg ` std vlimb, -0x38(%r30) C store vlimb into "home" slot 115 1.1 mrg ') 116 1.1 mrg std,ma %r3, 0x100(%r30) 117 1.1 mrg std %r4, -0xf8(%r30) 118 1.1 mrg std %r5, -0xf0(%r30) 119 1.1 mrg ldo 0(%r0), climb C clear climb 120 1.1 mrg fldd -0x138(%r30), %fr8 C put vlimb in fp register 121 1.1 mrg 122 1.1 mrg define(`p032a1',`%r1') C 123 1.1 mrg define(`p032a2',`%r19') C 124 1.1 mrg 125 1.1 mrg define(`m032',`%r20') C 126 1.1 mrg define(`m096',`%r21') C 127 1.1 mrg 128 1.1 mrg define(`p000a',`%r22') C 129 1.1 mrg define(`p064a',`%r29') C 130 1.1 mrg 131 1.1 mrg define(`s000',`%r31') C 132 1.1 mrg 133 1.1 mrg define(`ma000',`%r4') C 134 1.1 mrg define(`ma064',`%r20') C 135 1.1 mrg 136 1.1 mrg define(`r000',`%r3') C 137 1.1 mrg 138 1.1 mrg extrd,u n, 63, 2, %r5 139 1.1 mrg cmpb,= %r5, %r0, L(BIG) 140 1.1 mrg nop 141 1.1 mrg 142 1.1 mrg fldd 0(up), %fr4 143 1.1 mrg ldo 8(up), up 144 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 145 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 146 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 147 1.1 mrg xmpyu %fr8R, %fr4R, %fr24 148 1.1 mrg xmpyu %fr8L, %fr4L, %fr25 149 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 150 1.1 mrg fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 151 1.1 mrg addib,<> -1, %r5, L(two_or_more) 152 1.1 mrg fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 153 1.1 mrg LDEF(one) 154 1.1 mrg ldd -0x78(%r30), p032a1 155 1.1 mrg ldd -0x70(%r30), p032a2 156 1.1 mrg ldd -0x80(%r30), p000a 157 1.1 mrg b L(0_one_out) 158 1.1 mrg ldd -0x68(%r30), p064a 159 1.1 mrg 160 1.1 mrg LDEF(two_or_more) 161 1.1 mrg fldd 0(up), %fr4 162 1.1 mrg ldo 8(up), up 163 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 164 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 165 1.1 mrg ldd -0x78(%r30), p032a1 166 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 167 1.1 mrg xmpyu %fr8R, %fr4R, %fr24 168 1.1 mrg xmpyu %fr8L, %fr4L, %fr25 169 1.1 mrg ldd -0x70(%r30), p032a2 170 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 171 1.1 mrg ldd -0x80(%r30), p000a 172 1.1 mrg fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 173 1.1 mrg ldd -0x68(%r30), p064a 174 1.1 mrg addib,<> -1, %r5, L(three_or_more) 175 1.1 mrg fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 176 1.1 mrg LDEF(two) 177 1.1 mrg add p032a1, p032a2, m032 178 1.1 mrg add,dc %r0, %r0, m096 179 1.1 mrg depd,z m032, 31, 32, ma000 180 1.1 mrg extrd,u m032, 31, 32, ma064 181 1.1 mrg ldd 0(rp), r000 182 1.1 mrg b L(0_two_out) 183 1.1 mrg depd m096, 31, 32, ma064 184 1.1 mrg 185 1.1 mrg LDEF(three_or_more) 186 1.1 mrg fldd 0(up), %fr4 187 1.1 mrg add p032a1, p032a2, m032 188 1.1 mrg add,dc %r0, %r0, m096 189 1.1 mrg depd,z m032, 31, 32, ma000 190 1.1 mrg extrd,u m032, 31, 32, ma064 191 1.1 mrg ldd 0(rp), r000 192 1.1 mrg C addib,= -1, %r5, L(0_out) 193 1.1 mrg depd m096, 31, 32, ma064 194 1.1 mrg LDEF(loop0) 195 1.1 mrg C xmpyu %fr8R, %fr4L, %fr22 196 1.1 mrg C xmpyu %fr8L, %fr4R, %fr23 197 1.1 mrg C ldd -0x78(%r30), p032a1 198 1.1 mrg C fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 199 1.1 mrg C 200 1.1 mrg C xmpyu %fr8R, %fr4R, %fr24 201 1.1 mrg C xmpyu %fr8L, %fr4L, %fr25 202 1.1 mrg C ldd -0x70(%r30), p032a2 203 1.1 mrg C fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 204 1.1 mrg C 205 1.1 mrg C ldo 8(rp), rp 206 1.1 mrg C add climb, p000a, s000 207 1.1 mrg C ldd -0x80(%r30), p000a 208 1.1 mrg C fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 209 1.1 mrg C 210 1.1 mrg C add,dc p064a, %r0, climb 211 1.1 mrg C ldo 8(up), up 212 1.1 mrg C ldd -0x68(%r30), p064a 213 1.1 mrg C fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 214 1.1 mrg C 215 1.1 mrg C add ma000, s000, s000 216 1.1 mrg C add,dc ma064, climb, climb 217 1.1 mrg C fldd 0(up), %fr4 218 1.1 mrg C 219 1.1 mrg C sub r000, s000, s000 220 1.1 mrg C sub,db %r0, climb, climb 221 1.1 mrg C sub %r0, climb, climb 222 1.1 mrg C std s000, -8(rp) 223 1.1 mrg C 224 1.1 mrg C add p032a1, p032a2, m032 225 1.1 mrg C add,dc %r0, %r0, m096 226 1.1 mrg C 227 1.1 mrg C depd,z m032, 31, 32, ma000 228 1.1 mrg C extrd,u m032, 31, 32, ma064 229 1.1 mrg C ldd 0(rp), r000 230 1.1 mrg C addib,<> -1, %r5, L(loop0) 231 1.1 mrg C depd m096, 31, 32, ma064 232 1.1 mrg LDEF(0_out) 233 1.1 mrg ldo 8(up), up 234 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 235 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 236 1.1 mrg ldd -0x78(%r30), p032a1 237 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 238 1.1 mrg xmpyu %fr8R, %fr4R, %fr24 239 1.1 mrg xmpyu %fr8L, %fr4L, %fr25 240 1.1 mrg ldd -0x70(%r30), p032a2 241 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 242 1.1 mrg ldo 8(rp), rp 243 1.1 mrg add climb, p000a, s000 244 1.1 mrg ldd -0x80(%r30), p000a 245 1.1 mrg fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 246 1.1 mrg add,dc p064a, %r0, climb 247 1.1 mrg ldd -0x68(%r30), p064a 248 1.1 mrg fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 249 1.1 mrg add ma000, s000, s000 250 1.1 mrg add,dc ma064, climb, climb 251 1.1 mrg sub r000, s000, s000 252 1.1 mrg sub,db %r0, climb, climb 253 1.1 mrg sub %r0, climb, climb 254 1.1 mrg std s000, -8(rp) 255 1.1 mrg add p032a1, p032a2, m032 256 1.1 mrg add,dc %r0, %r0, m096 257 1.1 mrg depd,z m032, 31, 32, ma000 258 1.1 mrg extrd,u m032, 31, 32, ma064 259 1.1 mrg ldd 0(rp), r000 260 1.1 mrg depd m096, 31, 32, ma064 261 1.1 mrg LDEF(0_two_out) 262 1.1 mrg ldd -0x78(%r30), p032a1 263 1.1 mrg ldd -0x70(%r30), p032a2 264 1.1 mrg ldo 8(rp), rp 265 1.1 mrg add climb, p000a, s000 266 1.1 mrg ldd -0x80(%r30), p000a 267 1.1 mrg add,dc p064a, %r0, climb 268 1.1 mrg ldd -0x68(%r30), p064a 269 1.1 mrg add ma000, s000, s000 270 1.1 mrg add,dc ma064, climb, climb 271 1.1 mrg sub r000, s000, s000 272 1.1 mrg sub,db %r0, climb, climb 273 1.1 mrg sub %r0, climb, climb 274 1.1 mrg std s000, -8(rp) 275 1.1 mrg LDEF(0_one_out) 276 1.1 mrg add p032a1, p032a2, m032 277 1.1 mrg add,dc %r0, %r0, m096 278 1.1 mrg depd,z m032, 31, 32, ma000 279 1.1 mrg extrd,u m032, 31, 32, ma064 280 1.1 mrg ldd 0(rp), r000 281 1.1 mrg depd m096, 31, 32, ma064 282 1.1 mrg 283 1.1 mrg add climb, p000a, s000 284 1.1 mrg add,dc p064a, %r0, climb 285 1.1 mrg add ma000, s000, s000 286 1.1 mrg add,dc ma064, climb, climb 287 1.1 mrg sub r000, s000, s000 288 1.1 mrg sub,db %r0, climb, climb 289 1.1 mrg sub %r0, climb, climb 290 1.1 mrg std s000, 0(rp) 291 1.1 mrg 292 1.1 mrg cmpib,>= 4, n, L(done) 293 1.1 mrg ldo 8(rp), rp 294 1.1 mrg 295 1.1 mrg C 4-way unrolled code. 296 1.1 mrg 297 1.1 mrg LDEF(BIG) 298 1.1 mrg 299 1.1 mrg define(`p032a1',`%r1') C 300 1.1 mrg define(`p032a2',`%r19') C 301 1.1 mrg define(`p096b1',`%r20') C 302 1.1 mrg define(`p096b2',`%r21') C 303 1.1 mrg define(`p160c1',`%r22') C 304 1.1 mrg define(`p160c2',`%r29') C 305 1.1 mrg define(`p224d1',`%r31') C 306 1.1 mrg define(`p224d2',`%r3') C 307 1.1 mrg C 308 1.1 mrg define(`m032',`%r4') C 309 1.1 mrg define(`m096',`%r5') C 310 1.1 mrg define(`m160',`%r6') C 311 1.1 mrg define(`m224',`%r7') C 312 1.1 mrg define(`m288',`%r8') C 313 1.1 mrg C 314 1.1 mrg define(`p000a',`%r1') C 315 1.1 mrg define(`p064a',`%r19') C 316 1.1 mrg define(`p064b',`%r20') C 317 1.1 mrg define(`p128b',`%r21') C 318 1.1 mrg define(`p128c',`%r22') C 319 1.1 mrg define(`p192c',`%r29') C 320 1.1 mrg define(`p192d',`%r31') C 321 1.1 mrg define(`p256d',`%r3') C 322 1.1 mrg C 323 1.1 mrg define(`s000',`%r10') C 324 1.1 mrg define(`s064',`%r11') C 325 1.1 mrg define(`s128',`%r12') C 326 1.1 mrg define(`s192',`%r13') C 327 1.1 mrg C 328 1.1 mrg define(`ma000',`%r9') C 329 1.1 mrg define(`ma064',`%r4') C 330 1.1 mrg define(`ma128',`%r5') C 331 1.1 mrg define(`ma192',`%r6') C 332 1.1 mrg define(`ma256',`%r7') C 333 1.1 mrg C 334 1.1 mrg define(`r000',`%r1') C 335 1.1 mrg define(`r064',`%r19') C 336 1.1 mrg define(`r128',`%r20') C 337 1.1 mrg define(`r192',`%r21') C 338 1.1 mrg 339 1.1 mrg std %r6, -0xe8(%r30) 340 1.1 mrg std %r7, -0xe0(%r30) 341 1.1 mrg std %r8, -0xd8(%r30) 342 1.1 mrg std %r9, -0xd0(%r30) 343 1.1 mrg std %r10, -0xc8(%r30) 344 1.1 mrg std %r11, -0xc0(%r30) 345 1.1 mrg std %r12, -0xb8(%r30) 346 1.1 mrg std %r13, -0xb0(%r30) 347 1.1 mrg 348 1.1 mrg ifdef(`HAVE_ABI_2_0w', 349 1.1 mrg ` extrd,u n, 61, 62, n C right shift 2 350 1.1 mrg ',` extrd,u n, 61, 30, n C right shift 2, zero extend 351 1.1 mrg ') 352 1.1 mrg 353 1.1 mrg LDEF(4_or_more) 354 1.1 mrg fldd 0(up), %fr4 355 1.1 mrg fldd 8(up), %fr5 356 1.1 mrg fldd 16(up), %fr6 357 1.1 mrg fldd 24(up), %fr7 358 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 359 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 360 1.1 mrg xmpyu %fr8R, %fr5L, %fr24 361 1.1 mrg xmpyu %fr8L, %fr5R, %fr25 362 1.1 mrg xmpyu %fr8R, %fr6L, %fr26 363 1.1 mrg xmpyu %fr8L, %fr6R, %fr27 364 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 365 1.1 mrg xmpyu %fr8R, %fr7L, %fr28 366 1.1 mrg xmpyu %fr8L, %fr7R, %fr29 367 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 368 1.1 mrg xmpyu %fr8R, %fr4R, %fr30 369 1.1 mrg xmpyu %fr8L, %fr4L, %fr31 370 1.1 mrg fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 371 1.1 mrg xmpyu %fr8R, %fr5R, %fr22 372 1.1 mrg xmpyu %fr8L, %fr5L, %fr23 373 1.1 mrg fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 374 1.1 mrg xmpyu %fr8R, %fr6R, %fr24 375 1.1 mrg xmpyu %fr8L, %fr6L, %fr25 376 1.1 mrg fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 377 1.1 mrg xmpyu %fr8R, %fr7R, %fr26 378 1.1 mrg fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 379 1.1 mrg addib,<> -1, n, L(8_or_more) 380 1.1 mrg xmpyu %fr8L, %fr7L, %fr27 381 1.1 mrg fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 382 1.1 mrg fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 383 1.1 mrg fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 384 1.1 mrg fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 385 1.1 mrg fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 386 1.1 mrg fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 387 1.1 mrg fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 388 1.1 mrg fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 389 1.1 mrg fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 390 1.1 mrg fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 391 1.1 mrg ldd -0x78(%r30), p032a1 392 1.1 mrg ldd -0x70(%r30), p032a2 393 1.1 mrg ldd -0x38(%r30), p096b1 394 1.1 mrg ldd -0x30(%r30), p096b2 395 1.1 mrg ldd -0x58(%r30), p160c1 396 1.1 mrg ldd -0x50(%r30), p160c2 397 1.1 mrg ldd -0x18(%r30), p224d1 398 1.1 mrg ldd -0x10(%r30), p224d2 399 1.1 mrg b L(end1) 400 1.1 mrg nop 401 1.1 mrg 402 1.1 mrg LDEF(8_or_more) 403 1.1 mrg fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 404 1.1 mrg fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 405 1.1 mrg ldo 32(up), up 406 1.1 mrg fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 407 1.1 mrg fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 408 1.1 mrg fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 409 1.1 mrg fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 410 1.1 mrg fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 411 1.1 mrg fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 412 1.1 mrg fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 413 1.1 mrg fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 414 1.1 mrg fldd 0(up), %fr4 415 1.1 mrg fldd 8(up), %fr5 416 1.1 mrg fldd 16(up), %fr6 417 1.1 mrg fldd 24(up), %fr7 418 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 419 1.1 mrg ldd -0x78(%r30), p032a1 420 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 421 1.1 mrg xmpyu %fr8R, %fr5L, %fr24 422 1.1 mrg ldd -0x70(%r30), p032a2 423 1.1 mrg xmpyu %fr8L, %fr5R, %fr25 424 1.1 mrg xmpyu %fr8R, %fr6L, %fr26 425 1.1 mrg ldd -0x38(%r30), p096b1 426 1.1 mrg xmpyu %fr8L, %fr6R, %fr27 427 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 428 1.1 mrg xmpyu %fr8R, %fr7L, %fr28 429 1.1 mrg ldd -0x30(%r30), p096b2 430 1.1 mrg xmpyu %fr8L, %fr7R, %fr29 431 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 432 1.1 mrg xmpyu %fr8R, %fr4R, %fr30 433 1.1 mrg ldd -0x58(%r30), p160c1 434 1.1 mrg xmpyu %fr8L, %fr4L, %fr31 435 1.1 mrg fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 436 1.1 mrg xmpyu %fr8R, %fr5R, %fr22 437 1.1 mrg ldd -0x50(%r30), p160c2 438 1.1 mrg xmpyu %fr8L, %fr5L, %fr23 439 1.1 mrg fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 440 1.1 mrg xmpyu %fr8R, %fr6R, %fr24 441 1.1 mrg ldd -0x18(%r30), p224d1 442 1.1 mrg xmpyu %fr8L, %fr6L, %fr25 443 1.1 mrg fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 444 1.1 mrg xmpyu %fr8R, %fr7R, %fr26 445 1.1 mrg ldd -0x10(%r30), p224d2 446 1.1 mrg fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 447 1.1 mrg addib,= -1, n, L(end2) 448 1.1 mrg xmpyu %fr8L, %fr7L, %fr27 449 1.1 mrg LDEF(loop) 450 1.1 mrg add p032a1, p032a2, m032 451 1.1 mrg ldd -0x80(%r30), p000a 452 1.1 mrg add,dc p096b1, p096b2, m096 453 1.1 mrg fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 454 1.1 mrg 455 1.1 mrg add,dc p160c1, p160c2, m160 456 1.1 mrg ldd -0x68(%r30), p064a 457 1.1 mrg add,dc p224d1, p224d2, m224 458 1.1 mrg fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 459 1.1 mrg 460 1.1 mrg add,dc %r0, %r0, m288 461 1.1 mrg ldd -0x40(%r30), p064b 462 1.1 mrg ldo 32(up), up 463 1.1 mrg fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 464 1.1 mrg 465 1.1 mrg depd,z m032, 31, 32, ma000 466 1.1 mrg ldd -0x28(%r30), p128b 467 1.1 mrg extrd,u m032, 31, 32, ma064 468 1.1 mrg fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 469 1.1 mrg 470 1.1 mrg depd m096, 31, 32, ma064 471 1.1 mrg ldd -0x60(%r30), p128c 472 1.1 mrg extrd,u m096, 31, 32, ma128 473 1.1 mrg fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 474 1.1 mrg 475 1.1 mrg depd m160, 31, 32, ma128 476 1.1 mrg ldd -0x48(%r30), p192c 477 1.1 mrg extrd,u m160, 31, 32, ma192 478 1.1 mrg fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 479 1.1 mrg 480 1.1 mrg depd m224, 31, 32, ma192 481 1.1 mrg ldd -0x20(%r30), p192d 482 1.1 mrg extrd,u m224, 31, 32, ma256 483 1.1 mrg fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 484 1.1 mrg 485 1.1 mrg depd m288, 31, 32, ma256 486 1.1 mrg ldd -0x88(%r30), p256d 487 1.1 mrg add climb, p000a, s000 488 1.1 mrg fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 489 1.1 mrg 490 1.1 mrg add,dc p064a, p064b, s064 491 1.1 mrg ldd 0(rp), r000 492 1.1 mrg add,dc p128b, p128c, s128 493 1.1 mrg fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 494 1.1 mrg 495 1.1 mrg add,dc p192c, p192d, s192 496 1.1 mrg ldd 8(rp), r064 497 1.1 mrg add,dc p256d, %r0, climb 498 1.1 mrg fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 499 1.1 mrg 500 1.1 mrg ldd 16(rp), r128 501 1.1 mrg add ma000, s000, s000 C accum mid 0 502 1.1 mrg ldd 24(rp), r192 503 1.1 mrg add,dc ma064, s064, s064 C accum mid 1 504 1.1 mrg 505 1.1 mrg add,dc ma128, s128, s128 C accum mid 2 506 1.1 mrg fldd 0(up), %fr4 507 1.1 mrg add,dc ma192, s192, s192 C accum mid 3 508 1.1 mrg fldd 8(up), %fr5 509 1.1 mrg 510 1.1 mrg add,dc ma256, climb, climb 511 1.1 mrg fldd 16(up), %fr6 512 1.1 mrg sub r000, s000, s000 C accum rlimb 0 513 1.1 mrg fldd 24(up), %fr7 514 1.1 mrg 515 1.1 mrg sub,db r064, s064, s064 C accum rlimb 1 516 1.1 mrg sub,db r128, s128, s128 C accum rlimb 2 517 1.1 mrg std s000, 0(rp) 518 1.1 mrg 519 1.1 mrg sub,db r192, s192, s192 C accum rlimb 3 520 1.1 mrg sub,db %r0, climb, climb 521 1.1 mrg sub %r0, climb, climb 522 1.1 mrg std s064, 8(rp) 523 1.1 mrg 524 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 525 1.1 mrg ldd -0x78(%r30), p032a1 526 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 527 1.1 mrg std s128, 16(rp) 528 1.1 mrg 529 1.1 mrg xmpyu %fr8R, %fr5L, %fr24 530 1.1 mrg ldd -0x70(%r30), p032a2 531 1.1 mrg xmpyu %fr8L, %fr5R, %fr25 532 1.1 mrg std s192, 24(rp) 533 1.1 mrg 534 1.1 mrg xmpyu %fr8R, %fr6L, %fr26 535 1.1 mrg ldd -0x38(%r30), p096b1 536 1.1 mrg xmpyu %fr8L, %fr6R, %fr27 537 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 538 1.1 mrg 539 1.1 mrg xmpyu %fr8R, %fr7L, %fr28 540 1.1 mrg ldd -0x30(%r30), p096b2 541 1.1 mrg xmpyu %fr8L, %fr7R, %fr29 542 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 543 1.1 mrg 544 1.1 mrg xmpyu %fr8R, %fr4R, %fr30 545 1.1 mrg ldd -0x58(%r30), p160c1 546 1.1 mrg xmpyu %fr8L, %fr4L, %fr31 547 1.1 mrg fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 548 1.1 mrg 549 1.1 mrg xmpyu %fr8R, %fr5R, %fr22 550 1.1 mrg ldd -0x50(%r30), p160c2 551 1.1 mrg xmpyu %fr8L, %fr5L, %fr23 552 1.1 mrg fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 553 1.1 mrg 554 1.1 mrg xmpyu %fr8R, %fr6R, %fr24 555 1.1 mrg ldd -0x18(%r30), p224d1 556 1.1 mrg xmpyu %fr8L, %fr6L, %fr25 557 1.1 mrg fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 558 1.1 mrg 559 1.1 mrg xmpyu %fr8R, %fr7R, %fr26 560 1.1 mrg ldd -0x10(%r30), p224d2 561 1.1 mrg fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 562 1.1 mrg xmpyu %fr8L, %fr7L, %fr27 563 1.1 mrg 564 1.1 mrg addib,<> -1, n, L(loop) 565 1.1 mrg ldo 32(rp), rp 566 1.1 mrg 567 1.1 mrg LDEF(end2) 568 1.1 mrg add p032a1, p032a2, m032 569 1.1 mrg ldd -0x80(%r30), p000a 570 1.1 mrg add,dc p096b1, p096b2, m096 571 1.1 mrg fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 572 1.1 mrg add,dc p160c1, p160c2, m160 573 1.1 mrg ldd -0x68(%r30), p064a 574 1.1 mrg add,dc p224d1, p224d2, m224 575 1.1 mrg fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 576 1.1 mrg add,dc %r0, %r0, m288 577 1.1 mrg ldd -0x40(%r30), p064b 578 1.1 mrg fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 579 1.1 mrg depd,z m032, 31, 32, ma000 580 1.1 mrg ldd -0x28(%r30), p128b 581 1.1 mrg extrd,u m032, 31, 32, ma064 582 1.1 mrg fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 583 1.1 mrg depd m096, 31, 32, ma064 584 1.1 mrg ldd -0x60(%r30), p128c 585 1.1 mrg extrd,u m096, 31, 32, ma128 586 1.1 mrg fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 587 1.1 mrg depd m160, 31, 32, ma128 588 1.1 mrg ldd -0x48(%r30), p192c 589 1.1 mrg extrd,u m160, 31, 32, ma192 590 1.1 mrg fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 591 1.1 mrg depd m224, 31, 32, ma192 592 1.1 mrg ldd -0x20(%r30), p192d 593 1.1 mrg extrd,u m224, 31, 32, ma256 594 1.1 mrg fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 595 1.1 mrg depd m288, 31, 32, ma256 596 1.1 mrg ldd -0x88(%r30), p256d 597 1.1 mrg add climb, p000a, s000 598 1.1 mrg fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 599 1.1 mrg add,dc p064a, p064b, s064 600 1.1 mrg ldd 0(rp), r000 601 1.1 mrg add,dc p128b, p128c, s128 602 1.1 mrg fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 603 1.1 mrg add,dc p192c, p192d, s192 604 1.1 mrg ldd 8(rp), r064 605 1.1 mrg add,dc p256d, %r0, climb 606 1.1 mrg fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 607 1.1 mrg ldd 16(rp), r128 608 1.1 mrg add ma000, s000, s000 C accum mid 0 609 1.1 mrg ldd 24(rp), r192 610 1.1 mrg add,dc ma064, s064, s064 C accum mid 1 611 1.1 mrg add,dc ma128, s128, s128 C accum mid 2 612 1.1 mrg add,dc ma192, s192, s192 C accum mid 3 613 1.1 mrg add,dc ma256, climb, climb 614 1.1 mrg sub r000, s000, s000 C accum rlimb 0 615 1.1 mrg sub,db r064, s064, s064 C accum rlimb 1 616 1.1 mrg sub,db r128, s128, s128 C accum rlimb 2 617 1.1 mrg std s000, 0(rp) 618 1.1 mrg sub,db r192, s192, s192 C accum rlimb 3 619 1.1 mrg sub,db %r0, climb, climb 620 1.1 mrg sub %r0, climb, climb 621 1.1 mrg std s064, 8(rp) 622 1.1 mrg ldd -0x78(%r30), p032a1 623 1.1 mrg std s128, 16(rp) 624 1.1 mrg ldd -0x70(%r30), p032a2 625 1.1 mrg std s192, 24(rp) 626 1.1 mrg ldd -0x38(%r30), p096b1 627 1.1 mrg ldd -0x30(%r30), p096b2 628 1.1 mrg ldd -0x58(%r30), p160c1 629 1.1 mrg ldd -0x50(%r30), p160c2 630 1.1 mrg ldd -0x18(%r30), p224d1 631 1.1 mrg ldd -0x10(%r30), p224d2 632 1.1 mrg ldo 32(rp), rp 633 1.1 mrg 634 1.1 mrg LDEF(end1) 635 1.1 mrg add p032a1, p032a2, m032 636 1.1 mrg ldd -0x80(%r30), p000a 637 1.1 mrg add,dc p096b1, p096b2, m096 638 1.1 mrg add,dc p160c1, p160c2, m160 639 1.1 mrg ldd -0x68(%r30), p064a 640 1.1 mrg add,dc p224d1, p224d2, m224 641 1.1 mrg add,dc %r0, %r0, m288 642 1.1 mrg ldd -0x40(%r30), p064b 643 1.1 mrg depd,z m032, 31, 32, ma000 644 1.1 mrg ldd -0x28(%r30), p128b 645 1.1 mrg extrd,u m032, 31, 32, ma064 646 1.1 mrg depd m096, 31, 32, ma064 647 1.1 mrg ldd -0x60(%r30), p128c 648 1.1 mrg extrd,u m096, 31, 32, ma128 649 1.1 mrg depd m160, 31, 32, ma128 650 1.1 mrg ldd -0x48(%r30), p192c 651 1.1 mrg extrd,u m160, 31, 32, ma192 652 1.1 mrg depd m224, 31, 32, ma192 653 1.1 mrg ldd -0x20(%r30), p192d 654 1.1 mrg extrd,u m224, 31, 32, ma256 655 1.1 mrg depd m288, 31, 32, ma256 656 1.1 mrg ldd -0x88(%r30), p256d 657 1.1 mrg add climb, p000a, s000 658 1.1 mrg add,dc p064a, p064b, s064 659 1.1 mrg ldd 0(rp), r000 660 1.1 mrg add,dc p128b, p128c, s128 661 1.1 mrg add,dc p192c, p192d, s192 662 1.1 mrg ldd 8(rp), r064 663 1.1 mrg add,dc p256d, %r0, climb 664 1.1 mrg ldd 16(rp), r128 665 1.1 mrg add ma000, s000, s000 C accum mid 0 666 1.1 mrg ldd 24(rp), r192 667 1.1 mrg add,dc ma064, s064, s064 C accum mid 1 668 1.1 mrg add,dc ma128, s128, s128 C accum mid 2 669 1.1 mrg add,dc ma192, s192, s192 C accum mid 3 670 1.1 mrg add,dc ma256, climb, climb 671 1.1 mrg sub r000, s000, s000 C accum rlimb 0 672 1.1 mrg sub,db r064, s064, s064 C accum rlimb 1 673 1.1 mrg sub,db r128, s128, s128 C accum rlimb 2 674 1.1 mrg std s000, 0(rp) 675 1.1 mrg sub,db r192, s192, s192 C accum rlimb 3 676 1.1 mrg sub,db %r0, climb, climb 677 1.1 mrg sub %r0, climb, climb 678 1.1 mrg std s064, 8(rp) 679 1.1 mrg std s128, 16(rp) 680 1.1 mrg std s192, 24(rp) 681 1.1 mrg 682 1.1 mrg ldd -0xb0(%r30), %r13 683 1.1 mrg ldd -0xb8(%r30), %r12 684 1.1 mrg ldd -0xc0(%r30), %r11 685 1.1 mrg ldd -0xc8(%r30), %r10 686 1.1 mrg ldd -0xd0(%r30), %r9 687 1.1 mrg ldd -0xd8(%r30), %r8 688 1.1 mrg ldd -0xe0(%r30), %r7 689 1.1 mrg ldd -0xe8(%r30), %r6 690 1.1 mrg LDEF(done) 691 1.1 mrg ifdef(`HAVE_ABI_2_0w', 692 1.1 mrg ` copy climb, %r28 693 1.1 mrg ',` extrd,u climb, 63, 32, %r29 694 1.1 mrg extrd,u climb, 31, 32, %r28 695 1.1 mrg ') 696 1.1 mrg ldd -0xf0(%r30), %r5 697 1.1 mrg ldd -0xf8(%r30), %r4 698 1.1 mrg bve (%r2) 699 1.1 mrg ldd,mb -0x100(%r30), %r3 700 1.1 mrg EPILOGUE(mpn_submul_1) 701