1 1.1 mrg dnl HP-PA 2.0 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and 2 1.1 mrg dnl add the result to a second limb vector. 3 1.1 mrg 4 1.1.1.2 mrg dnl Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc. 5 1.1 mrg 6 1.1 mrg dnl This file is part of the GNU MP Library. 7 1.1.1.2 mrg dnl 8 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 9 1.1.1.2 mrg dnl it under the terms of either: 10 1.1.1.2 mrg dnl 11 1.1.1.2 mrg dnl * the GNU Lesser General Public License as published by the Free 12 1.1.1.2 mrg dnl Software Foundation; either version 3 of the License, or (at your 13 1.1.1.2 mrg dnl option) any later version. 14 1.1.1.2 mrg dnl 15 1.1.1.2 mrg dnl or 16 1.1.1.2 mrg dnl 17 1.1.1.2 mrg dnl * the GNU General Public License as published by the Free Software 18 1.1.1.2 mrg dnl Foundation; either version 2 of the License, or (at your option) any 19 1.1.1.2 mrg dnl later version. 20 1.1.1.2 mrg dnl 21 1.1.1.2 mrg dnl or both in parallel, as here. 22 1.1.1.2 mrg dnl 23 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 24 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 1.1.1.2 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 1.1.1.2 mrg dnl for more details. 27 1.1.1.2 mrg dnl 28 1.1.1.2 mrg dnl You should have received copies of the GNU General Public License and the 29 1.1.1.2 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30 1.1.1.2 mrg dnl see https://www.gnu.org/licenses/. 31 1.1 mrg 32 1.1 mrg include(`../config.m4') 33 1.1 mrg 34 1.1 mrg C cycles/limb 35 1.1 mrg C 8000,8200: 7 36 1.1 mrg C 8500,8600,8700: 6.375 37 1.1 mrg 38 1.1 mrg C The feed-in and wind-down code has not yet been scheduled. Many cycles 39 1.1 mrg C could be saved there per call. 40 1.1 mrg 41 1.1 mrg C DESCRIPTION: 42 1.1 mrg C The main loop "BIG" is 4-way unrolled, mainly to allow 43 1.1 mrg C effective use of ADD,DC. Delays in moving data via the cache from the FP 44 1.1 mrg C registers to the IU registers, have demanded a deep software pipeline, and 45 1.1 mrg C a lot of stack slots for partial products in flight. 46 1.1 mrg C 47 1.1 mrg C CODE STRUCTURE: 48 1.1 mrg C save-some-registers 49 1.1 mrg C do 0, 1, 2, or 3 limbs 50 1.1 mrg C if done, restore-some-regs and return 51 1.1 mrg C save-many-regs 52 1.1 mrg C do 4, 8, ... limb 53 1.1 mrg C restore-all-regs 54 1.1 mrg 55 1.1 mrg C STACK LAYOUT: 56 1.1 mrg C HP-PA stack grows upwards. We could allocate 8 fewer slots by using the 57 1.1 mrg C slots marked FREE, as well as some slots in the caller's "frame marker". 58 1.1 mrg C 59 1.1 mrg C -00 <- r30 60 1.1 mrg C -08 FREE 61 1.1 mrg C -10 tmp 62 1.1 mrg C -18 tmp 63 1.1 mrg C -20 tmp 64 1.1 mrg C -28 tmp 65 1.1 mrg C -30 tmp 66 1.1 mrg C -38 tmp 67 1.1 mrg C -40 tmp 68 1.1 mrg C -48 tmp 69 1.1 mrg C -50 tmp 70 1.1 mrg C -58 tmp 71 1.1 mrg C -60 tmp 72 1.1 mrg C -68 tmp 73 1.1 mrg C -70 tmp 74 1.1 mrg C -78 tmp 75 1.1 mrg C -80 tmp 76 1.1 mrg C -88 tmp 77 1.1 mrg C -90 FREE 78 1.1 mrg C -98 FREE 79 1.1 mrg C -a0 FREE 80 1.1 mrg C -a8 FREE 81 1.1 mrg C -b0 r13 82 1.1 mrg C -b8 r12 83 1.1 mrg C -c0 r11 84 1.1 mrg C -c8 r10 85 1.1 mrg C -d0 r8 86 1.1 mrg C -d8 r8 87 1.1 mrg C -e0 r7 88 1.1 mrg C -e8 r6 89 1.1 mrg C -f0 r5 90 1.1 mrg C -f8 r4 91 1.1 mrg C -100 r3 92 1.1 mrg C Previous frame: 93 1.1 mrg C [unused area] 94 1.1 mrg C -38/-138 vlimb home slot. For 2.0N, the vlimb arg will arrive here. 95 1.1 mrg 96 1.1 mrg 97 1.1 mrg include(`../config.m4') 98 1.1 mrg 99 1.1 mrg C INPUT PARAMETERS: 100 1.1 mrg define(`rp',`%r26') C 101 1.1 mrg define(`up',`%r25') C 102 1.1 mrg define(`n',`%r24') C 103 1.1 mrg define(`vlimb',`%r23') C 104 1.1 mrg 105 1.1 mrg define(`climb',`%r23') C 106 1.1 mrg 107 1.1 mrg ifdef(`HAVE_ABI_2_0w', 108 1.1 mrg ` .level 2.0w 109 1.1 mrg ',` .level 2.0 110 1.1 mrg ') 111 1.1 mrg PROLOGUE(mpn_addmul_1) 112 1.1 mrg 113 1.1 mrg ifdef(`HAVE_ABI_2_0w', 114 1.1 mrg ` std vlimb, -0x38(%r30) C store vlimb into "home" slot 115 1.1 mrg ') 116 1.1 mrg std,ma %r3, 0x100(%r30) 117 1.1 mrg std %r4, -0xf8(%r30) 118 1.1 mrg std %r5, -0xf0(%r30) 119 1.1 mrg ldo 0(%r0), climb C clear climb 120 1.1 mrg fldd -0x138(%r30), %fr8 C put vlimb in fp register 121 1.1 mrg 122 1.1 mrg define(`p032a1',`%r1') C 123 1.1 mrg define(`p032a2',`%r19') C 124 1.1 mrg 125 1.1 mrg define(`m032',`%r20') C 126 1.1 mrg define(`m096',`%r21') C 127 1.1 mrg 128 1.1 mrg define(`p000a',`%r22') C 129 1.1 mrg define(`p064a',`%r29') C 130 1.1 mrg 131 1.1 mrg define(`s000',`%r31') C 132 1.1 mrg 133 1.1 mrg define(`ma000',`%r4') C 134 1.1 mrg define(`ma064',`%r20') C 135 1.1 mrg 136 1.1 mrg define(`r000',`%r3') C 137 1.1 mrg 138 1.1 mrg extrd,u n, 63, 2, %r5 139 1.1 mrg cmpb,= %r5, %r0, L(BIG) 140 1.1 mrg nop 141 1.1 mrg 142 1.1 mrg fldd 0(up), %fr4 143 1.1 mrg ldo 8(up), up 144 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 145 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 146 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 147 1.1 mrg xmpyu %fr8R, %fr4R, %fr24 148 1.1 mrg xmpyu %fr8L, %fr4L, %fr25 149 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 150 1.1 mrg fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 151 1.1 mrg addib,<> -1, %r5, L(two_or_more) 152 1.1 mrg fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 153 1.1 mrg LDEF(one) 154 1.1 mrg ldd -0x78(%r30), p032a1 155 1.1 mrg ldd -0x70(%r30), p032a2 156 1.1 mrg ldd -0x80(%r30), p000a 157 1.1 mrg b L(0_one_out) 158 1.1 mrg ldd -0x68(%r30), p064a 159 1.1 mrg 160 1.1 mrg LDEF(two_or_more) 161 1.1 mrg fldd 0(up), %fr4 162 1.1 mrg ldo 8(up), up 163 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 164 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 165 1.1 mrg ldd -0x78(%r30), p032a1 166 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 167 1.1 mrg xmpyu %fr8R, %fr4R, %fr24 168 1.1 mrg xmpyu %fr8L, %fr4L, %fr25 169 1.1 mrg ldd -0x70(%r30), p032a2 170 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 171 1.1 mrg ldd -0x80(%r30), p000a 172 1.1 mrg fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 173 1.1 mrg ldd -0x68(%r30), p064a 174 1.1 mrg addib,<> -1, %r5, L(three_or_more) 175 1.1 mrg fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 176 1.1 mrg LDEF(two) 177 1.1 mrg add p032a1, p032a2, m032 178 1.1 mrg add,dc %r0, %r0, m096 179 1.1 mrg depd,z m032, 31, 32, ma000 180 1.1 mrg extrd,u m032, 31, 32, ma064 181 1.1 mrg ldd 0(rp), r000 182 1.1 mrg b L(0_two_out) 183 1.1 mrg depd m096, 31, 32, ma064 184 1.1 mrg 185 1.1 mrg LDEF(three_or_more) 186 1.1 mrg fldd 0(up), %fr4 187 1.1 mrg add p032a1, p032a2, m032 188 1.1 mrg add,dc %r0, %r0, m096 189 1.1 mrg depd,z m032, 31, 32, ma000 190 1.1 mrg extrd,u m032, 31, 32, ma064 191 1.1 mrg ldd 0(rp), r000 192 1.1 mrg C addib,= -1, %r5, L(0_out) 193 1.1 mrg depd m096, 31, 32, ma064 194 1.1 mrg LDEF(loop0) 195 1.1 mrg C xmpyu %fr8R, %fr4L, %fr22 196 1.1 mrg C xmpyu %fr8L, %fr4R, %fr23 197 1.1 mrg C ldd -0x78(%r30), p032a1 198 1.1 mrg C fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 199 1.1 mrg C 200 1.1 mrg C xmpyu %fr8R, %fr4R, %fr24 201 1.1 mrg C xmpyu %fr8L, %fr4L, %fr25 202 1.1 mrg C ldd -0x70(%r30), p032a2 203 1.1 mrg C fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 204 1.1 mrg C 205 1.1 mrg C ldo 8(rp), rp 206 1.1 mrg C add climb, p000a, s000 207 1.1 mrg C ldd -0x80(%r30), p000a 208 1.1 mrg C fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 209 1.1 mrg C 210 1.1 mrg C add,dc p064a, %r0, climb 211 1.1 mrg C ldo 8(up), up 212 1.1 mrg C ldd -0x68(%r30), p064a 213 1.1 mrg C fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 214 1.1 mrg C 215 1.1 mrg C add ma000, s000, s000 216 1.1 mrg C add,dc ma064, climb, climb 217 1.1 mrg C fldd 0(up), %fr4 218 1.1 mrg C 219 1.1 mrg C add r000, s000, s000 220 1.1 mrg C add,dc %r0, climb, climb 221 1.1 mrg C std s000, -8(rp) 222 1.1 mrg C 223 1.1 mrg C add p032a1, p032a2, m032 224 1.1 mrg C add,dc %r0, %r0, m096 225 1.1 mrg C 226 1.1 mrg C depd,z m032, 31, 32, ma000 227 1.1 mrg C extrd,u m032, 31, 32, ma064 228 1.1 mrg C ldd 0(rp), r000 229 1.1 mrg C addib,<> -1, %r5, L(loop0) 230 1.1 mrg C depd m096, 31, 32, ma064 231 1.1 mrg LDEF(0_out) 232 1.1 mrg ldo 8(up), up 233 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 234 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 235 1.1 mrg ldd -0x78(%r30), p032a1 236 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 237 1.1 mrg xmpyu %fr8R, %fr4R, %fr24 238 1.1 mrg xmpyu %fr8L, %fr4L, %fr25 239 1.1 mrg ldd -0x70(%r30), p032a2 240 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 241 1.1 mrg ldo 8(rp), rp 242 1.1 mrg add climb, p000a, s000 243 1.1 mrg ldd -0x80(%r30), p000a 244 1.1 mrg fstd %fr24, -0x80(%r30) C low product to -0x80..-0x79 245 1.1 mrg add,dc p064a, %r0, climb 246 1.1 mrg ldd -0x68(%r30), p064a 247 1.1 mrg fstd %fr25, -0x68(%r30) C high product to -0x68..-0x61 248 1.1 mrg add ma000, s000, s000 249 1.1 mrg add,dc ma064, climb, climb 250 1.1 mrg add r000, s000, s000 251 1.1 mrg add,dc %r0, climb, climb 252 1.1 mrg std s000, -8(rp) 253 1.1 mrg add p032a1, p032a2, m032 254 1.1 mrg add,dc %r0, %r0, m096 255 1.1 mrg depd,z m032, 31, 32, ma000 256 1.1 mrg extrd,u m032, 31, 32, ma064 257 1.1 mrg ldd 0(rp), r000 258 1.1 mrg depd m096, 31, 32, ma064 259 1.1 mrg LDEF(0_two_out) 260 1.1 mrg ldd -0x78(%r30), p032a1 261 1.1 mrg ldd -0x70(%r30), p032a2 262 1.1 mrg ldo 8(rp), rp 263 1.1 mrg add climb, p000a, s000 264 1.1 mrg ldd -0x80(%r30), p000a 265 1.1 mrg add,dc p064a, %r0, climb 266 1.1 mrg ldd -0x68(%r30), p064a 267 1.1 mrg add ma000, s000, s000 268 1.1 mrg add,dc ma064, climb, climb 269 1.1 mrg add r000, s000, s000 270 1.1 mrg add,dc %r0, climb, climb 271 1.1 mrg std s000, -8(rp) 272 1.1 mrg LDEF(0_one_out) 273 1.1 mrg add p032a1, p032a2, m032 274 1.1 mrg add,dc %r0, %r0, m096 275 1.1 mrg depd,z m032, 31, 32, ma000 276 1.1 mrg extrd,u m032, 31, 32, ma064 277 1.1 mrg ldd 0(rp), r000 278 1.1 mrg depd m096, 31, 32, ma064 279 1.1 mrg 280 1.1 mrg add climb, p000a, s000 281 1.1 mrg add,dc p064a, %r0, climb 282 1.1 mrg add ma000, s000, s000 283 1.1 mrg add,dc ma064, climb, climb 284 1.1 mrg add r000, s000, s000 285 1.1 mrg add,dc %r0, climb, climb 286 1.1 mrg std s000, 0(rp) 287 1.1 mrg 288 1.1 mrg cmpib,>= 4, n, L(done) 289 1.1 mrg ldo 8(rp), rp 290 1.1 mrg 291 1.1 mrg C 4-way unrolled code. 292 1.1 mrg 293 1.1 mrg LDEF(BIG) 294 1.1 mrg 295 1.1 mrg define(`p032a1',`%r1') C 296 1.1 mrg define(`p032a2',`%r19') C 297 1.1 mrg define(`p096b1',`%r20') C 298 1.1 mrg define(`p096b2',`%r21') C 299 1.1 mrg define(`p160c1',`%r22') C 300 1.1 mrg define(`p160c2',`%r29') C 301 1.1 mrg define(`p224d1',`%r31') C 302 1.1 mrg define(`p224d2',`%r3') C 303 1.1 mrg C 304 1.1 mrg define(`m032',`%r4') C 305 1.1 mrg define(`m096',`%r5') C 306 1.1 mrg define(`m160',`%r6') C 307 1.1 mrg define(`m224',`%r7') C 308 1.1 mrg define(`m288',`%r8') C 309 1.1 mrg C 310 1.1 mrg define(`p000a',`%r1') C 311 1.1 mrg define(`p064a',`%r19') C 312 1.1 mrg define(`p064b',`%r20') C 313 1.1 mrg define(`p128b',`%r21') C 314 1.1 mrg define(`p128c',`%r22') C 315 1.1 mrg define(`p192c',`%r29') C 316 1.1 mrg define(`p192d',`%r31') C 317 1.1 mrg define(`p256d',`%r3') C 318 1.1 mrg C 319 1.1 mrg define(`s000',`%r10') C 320 1.1 mrg define(`s064',`%r11') C 321 1.1 mrg define(`s128',`%r12') C 322 1.1 mrg define(`s192',`%r13') C 323 1.1 mrg C 324 1.1 mrg define(`ma000',`%r9') C 325 1.1 mrg define(`ma064',`%r4') C 326 1.1 mrg define(`ma128',`%r5') C 327 1.1 mrg define(`ma192',`%r6') C 328 1.1 mrg define(`ma256',`%r7') C 329 1.1 mrg C 330 1.1 mrg define(`r000',`%r1') C 331 1.1 mrg define(`r064',`%r19') C 332 1.1 mrg define(`r128',`%r20') C 333 1.1 mrg define(`r192',`%r21') C 334 1.1 mrg 335 1.1 mrg std %r6, -0xe8(%r30) 336 1.1 mrg std %r7, -0xe0(%r30) 337 1.1 mrg std %r8, -0xd8(%r30) 338 1.1 mrg std %r9, -0xd0(%r30) 339 1.1 mrg std %r10, -0xc8(%r30) 340 1.1 mrg std %r11, -0xc0(%r30) 341 1.1 mrg std %r12, -0xb8(%r30) 342 1.1 mrg std %r13, -0xb0(%r30) 343 1.1 mrg 344 1.1 mrg ifdef(`HAVE_ABI_2_0w', 345 1.1 mrg ` extrd,u n, 61, 62, n C right shift 2 346 1.1 mrg ',` extrd,u n, 61, 30, n C right shift 2, zero extend 347 1.1 mrg ') 348 1.1 mrg 349 1.1 mrg LDEF(4_or_more) 350 1.1 mrg fldd 0(up), %fr4 351 1.1 mrg fldd 8(up), %fr5 352 1.1 mrg fldd 16(up), %fr6 353 1.1 mrg fldd 24(up), %fr7 354 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 355 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 356 1.1 mrg xmpyu %fr8R, %fr5L, %fr24 357 1.1 mrg xmpyu %fr8L, %fr5R, %fr25 358 1.1 mrg xmpyu %fr8R, %fr6L, %fr26 359 1.1 mrg xmpyu %fr8L, %fr6R, %fr27 360 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 361 1.1 mrg xmpyu %fr8R, %fr7L, %fr28 362 1.1 mrg xmpyu %fr8L, %fr7R, %fr29 363 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 364 1.1 mrg xmpyu %fr8R, %fr4R, %fr30 365 1.1 mrg xmpyu %fr8L, %fr4L, %fr31 366 1.1 mrg fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 367 1.1 mrg xmpyu %fr8R, %fr5R, %fr22 368 1.1 mrg xmpyu %fr8L, %fr5L, %fr23 369 1.1 mrg fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 370 1.1 mrg xmpyu %fr8R, %fr6R, %fr24 371 1.1 mrg xmpyu %fr8L, %fr6L, %fr25 372 1.1 mrg fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 373 1.1 mrg xmpyu %fr8R, %fr7R, %fr26 374 1.1 mrg fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 375 1.1 mrg addib,<> -1, n, L(8_or_more) 376 1.1 mrg xmpyu %fr8L, %fr7L, %fr27 377 1.1 mrg fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 378 1.1 mrg fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 379 1.1 mrg fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 380 1.1 mrg fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 381 1.1 mrg fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 382 1.1 mrg fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 383 1.1 mrg fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 384 1.1 mrg fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 385 1.1 mrg fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 386 1.1 mrg fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 387 1.1 mrg ldd -0x78(%r30), p032a1 388 1.1 mrg ldd -0x70(%r30), p032a2 389 1.1 mrg ldd -0x38(%r30), p096b1 390 1.1 mrg ldd -0x30(%r30), p096b2 391 1.1 mrg ldd -0x58(%r30), p160c1 392 1.1 mrg ldd -0x50(%r30), p160c2 393 1.1 mrg ldd -0x18(%r30), p224d1 394 1.1 mrg ldd -0x10(%r30), p224d2 395 1.1 mrg b L(end1) 396 1.1 mrg nop 397 1.1 mrg 398 1.1 mrg LDEF(8_or_more) 399 1.1 mrg fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 400 1.1 mrg fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 401 1.1 mrg ldo 32(up), up 402 1.1 mrg fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 403 1.1 mrg fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 404 1.1 mrg fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 405 1.1 mrg fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 406 1.1 mrg fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 407 1.1 mrg fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 408 1.1 mrg fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 409 1.1 mrg fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 410 1.1 mrg fldd 0(up), %fr4 411 1.1 mrg fldd 8(up), %fr5 412 1.1 mrg fldd 16(up), %fr6 413 1.1 mrg fldd 24(up), %fr7 414 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 415 1.1 mrg ldd -0x78(%r30), p032a1 416 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 417 1.1 mrg xmpyu %fr8R, %fr5L, %fr24 418 1.1 mrg ldd -0x70(%r30), p032a2 419 1.1 mrg xmpyu %fr8L, %fr5R, %fr25 420 1.1 mrg xmpyu %fr8R, %fr6L, %fr26 421 1.1 mrg ldd -0x38(%r30), p096b1 422 1.1 mrg xmpyu %fr8L, %fr6R, %fr27 423 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 424 1.1 mrg xmpyu %fr8R, %fr7L, %fr28 425 1.1 mrg ldd -0x30(%r30), p096b2 426 1.1 mrg xmpyu %fr8L, %fr7R, %fr29 427 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 428 1.1 mrg xmpyu %fr8R, %fr4R, %fr30 429 1.1 mrg ldd -0x58(%r30), p160c1 430 1.1 mrg xmpyu %fr8L, %fr4L, %fr31 431 1.1 mrg fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 432 1.1 mrg xmpyu %fr8R, %fr5R, %fr22 433 1.1 mrg ldd -0x50(%r30), p160c2 434 1.1 mrg xmpyu %fr8L, %fr5L, %fr23 435 1.1 mrg fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 436 1.1 mrg xmpyu %fr8R, %fr6R, %fr24 437 1.1 mrg ldd -0x18(%r30), p224d1 438 1.1 mrg xmpyu %fr8L, %fr6L, %fr25 439 1.1 mrg fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 440 1.1 mrg xmpyu %fr8R, %fr7R, %fr26 441 1.1 mrg ldd -0x10(%r30), p224d2 442 1.1 mrg fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 443 1.1 mrg addib,= -1, n, L(end2) 444 1.1 mrg xmpyu %fr8L, %fr7L, %fr27 445 1.1 mrg LDEF(loop) 446 1.1 mrg add p032a1, p032a2, m032 447 1.1 mrg ldd -0x80(%r30), p000a 448 1.1 mrg add,dc p096b1, p096b2, m096 449 1.1 mrg fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 450 1.1 mrg 451 1.1 mrg add,dc p160c1, p160c2, m160 452 1.1 mrg ldd -0x68(%r30), p064a 453 1.1 mrg add,dc p224d1, p224d2, m224 454 1.1 mrg fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 455 1.1 mrg 456 1.1 mrg add,dc %r0, %r0, m288 457 1.1 mrg ldd -0x40(%r30), p064b 458 1.1 mrg ldo 32(up), up 459 1.1 mrg fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 460 1.1 mrg 461 1.1 mrg depd,z m032, 31, 32, ma000 462 1.1 mrg ldd -0x28(%r30), p128b 463 1.1 mrg extrd,u m032, 31, 32, ma064 464 1.1 mrg fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 465 1.1 mrg 466 1.1 mrg depd m096, 31, 32, ma064 467 1.1 mrg ldd -0x60(%r30), p128c 468 1.1 mrg extrd,u m096, 31, 32, ma128 469 1.1 mrg fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 470 1.1 mrg 471 1.1 mrg depd m160, 31, 32, ma128 472 1.1 mrg ldd -0x48(%r30), p192c 473 1.1 mrg extrd,u m160, 31, 32, ma192 474 1.1 mrg fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 475 1.1 mrg 476 1.1 mrg depd m224, 31, 32, ma192 477 1.1 mrg ldd -0x20(%r30), p192d 478 1.1 mrg extrd,u m224, 31, 32, ma256 479 1.1 mrg fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 480 1.1 mrg 481 1.1 mrg depd m288, 31, 32, ma256 482 1.1 mrg ldd -0x88(%r30), p256d 483 1.1 mrg add climb, p000a, s000 484 1.1 mrg fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 485 1.1 mrg 486 1.1 mrg add,dc p064a, p064b, s064 487 1.1 mrg ldd 0(rp), r000 488 1.1 mrg add,dc p128b, p128c, s128 489 1.1 mrg fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 490 1.1 mrg 491 1.1 mrg add,dc p192c, p192d, s192 492 1.1 mrg ldd 8(rp), r064 493 1.1 mrg add,dc p256d, %r0, climb 494 1.1 mrg fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 495 1.1 mrg 496 1.1 mrg ldd 16(rp), r128 497 1.1 mrg add ma000, s000, s000 C accum mid 0 498 1.1 mrg ldd 24(rp), r192 499 1.1 mrg add,dc ma064, s064, s064 C accum mid 1 500 1.1 mrg 501 1.1 mrg add,dc ma128, s128, s128 C accum mid 2 502 1.1 mrg fldd 0(up), %fr4 503 1.1 mrg add,dc ma192, s192, s192 C accum mid 3 504 1.1 mrg fldd 8(up), %fr5 505 1.1 mrg 506 1.1 mrg add,dc ma256, climb, climb 507 1.1 mrg fldd 16(up), %fr6 508 1.1 mrg add r000, s000, s000 C accum rlimb 0 509 1.1 mrg fldd 24(up), %fr7 510 1.1 mrg 511 1.1 mrg add,dc r064, s064, s064 C accum rlimb 1 512 1.1 mrg add,dc r128, s128, s128 C accum rlimb 2 513 1.1 mrg std s000, 0(rp) 514 1.1 mrg 515 1.1 mrg add,dc r192, s192, s192 C accum rlimb 3 516 1.1 mrg add,dc %r0, climb, climb 517 1.1 mrg std s064, 8(rp) 518 1.1 mrg 519 1.1 mrg xmpyu %fr8R, %fr4L, %fr22 520 1.1 mrg ldd -0x78(%r30), p032a1 521 1.1 mrg xmpyu %fr8L, %fr4R, %fr23 522 1.1 mrg std s128, 16(rp) 523 1.1 mrg 524 1.1 mrg xmpyu %fr8R, %fr5L, %fr24 525 1.1 mrg ldd -0x70(%r30), p032a2 526 1.1 mrg xmpyu %fr8L, %fr5R, %fr25 527 1.1 mrg std s192, 24(rp) 528 1.1 mrg 529 1.1 mrg xmpyu %fr8R, %fr6L, %fr26 530 1.1 mrg ldd -0x38(%r30), p096b1 531 1.1 mrg xmpyu %fr8L, %fr6R, %fr27 532 1.1 mrg fstd %fr22, -0x78(%r30) C mid product to -0x78..-0x71 533 1.1 mrg 534 1.1 mrg xmpyu %fr8R, %fr7L, %fr28 535 1.1 mrg ldd -0x30(%r30), p096b2 536 1.1 mrg xmpyu %fr8L, %fr7R, %fr29 537 1.1 mrg fstd %fr23, -0x70(%r30) C mid product to -0x70..-0x69 538 1.1 mrg 539 1.1 mrg xmpyu %fr8R, %fr4R, %fr30 540 1.1 mrg ldd -0x58(%r30), p160c1 541 1.1 mrg xmpyu %fr8L, %fr4L, %fr31 542 1.1 mrg fstd %fr24, -0x38(%r30) C mid product to -0x38..-0x31 543 1.1 mrg 544 1.1 mrg xmpyu %fr8R, %fr5R, %fr22 545 1.1 mrg ldd -0x50(%r30), p160c2 546 1.1 mrg xmpyu %fr8L, %fr5L, %fr23 547 1.1 mrg fstd %fr25, -0x30(%r30) C mid product to -0x30..-0x29 548 1.1 mrg 549 1.1 mrg xmpyu %fr8R, %fr6R, %fr24 550 1.1 mrg ldd -0x18(%r30), p224d1 551 1.1 mrg xmpyu %fr8L, %fr6L, %fr25 552 1.1 mrg fstd %fr26, -0x58(%r30) C mid product to -0x58..-0x51 553 1.1 mrg 554 1.1 mrg xmpyu %fr8R, %fr7R, %fr26 555 1.1 mrg ldd -0x10(%r30), p224d2 556 1.1 mrg fstd %fr27, -0x50(%r30) C mid product to -0x50..-0x49 557 1.1 mrg xmpyu %fr8L, %fr7L, %fr27 558 1.1 mrg 559 1.1 mrg addib,<> -1, n, L(loop) 560 1.1 mrg ldo 32(rp), rp 561 1.1 mrg 562 1.1 mrg LDEF(end2) 563 1.1 mrg add p032a1, p032a2, m032 564 1.1 mrg ldd -0x80(%r30), p000a 565 1.1 mrg add,dc p096b1, p096b2, m096 566 1.1 mrg fstd %fr28, -0x18(%r30) C mid product to -0x18..-0x11 567 1.1 mrg add,dc p160c1, p160c2, m160 568 1.1 mrg ldd -0x68(%r30), p064a 569 1.1 mrg add,dc p224d1, p224d2, m224 570 1.1 mrg fstd %fr29, -0x10(%r30) C mid product to -0x10..-0x09 571 1.1 mrg add,dc %r0, %r0, m288 572 1.1 mrg ldd -0x40(%r30), p064b 573 1.1 mrg fstd %fr30, -0x80(%r30) C low product to -0x80..-0x79 574 1.1 mrg depd,z m032, 31, 32, ma000 575 1.1 mrg ldd -0x28(%r30), p128b 576 1.1 mrg extrd,u m032, 31, 32, ma064 577 1.1 mrg fstd %fr31, -0x68(%r30) C high product to -0x68..-0x61 578 1.1 mrg depd m096, 31, 32, ma064 579 1.1 mrg ldd -0x60(%r30), p128c 580 1.1 mrg extrd,u m096, 31, 32, ma128 581 1.1 mrg fstd %fr22, -0x40(%r30) C low product to -0x40..-0x39 582 1.1 mrg depd m160, 31, 32, ma128 583 1.1 mrg ldd -0x48(%r30), p192c 584 1.1 mrg extrd,u m160, 31, 32, ma192 585 1.1 mrg fstd %fr23, -0x28(%r30) C high product to -0x28..-0x21 586 1.1 mrg depd m224, 31, 32, ma192 587 1.1 mrg ldd -0x20(%r30), p192d 588 1.1 mrg extrd,u m224, 31, 32, ma256 589 1.1 mrg fstd %fr24, -0x60(%r30) C low product to -0x60..-0x59 590 1.1 mrg depd m288, 31, 32, ma256 591 1.1 mrg ldd -0x88(%r30), p256d 592 1.1 mrg add climb, p000a, s000 593 1.1 mrg fstd %fr25, -0x48(%r30) C high product to -0x48..-0x41 594 1.1 mrg add,dc p064a, p064b, s064 595 1.1 mrg ldd 0(rp), r000 596 1.1 mrg add,dc p128b, p128c, s128 597 1.1 mrg fstd %fr26, -0x20(%r30) C low product to -0x20..-0x19 598 1.1 mrg add,dc p192c, p192d, s192 599 1.1 mrg ldd 8(rp), r064 600 1.1 mrg add,dc p256d, %r0, climb 601 1.1 mrg fstd %fr27, -0x88(%r30) C high product to -0x88..-0x81 602 1.1 mrg ldd 16(rp), r128 603 1.1 mrg add ma000, s000, s000 C accum mid 0 604 1.1 mrg ldd 24(rp), r192 605 1.1 mrg add,dc ma064, s064, s064 C accum mid 1 606 1.1 mrg add,dc ma128, s128, s128 C accum mid 2 607 1.1 mrg add,dc ma192, s192, s192 C accum mid 3 608 1.1 mrg add,dc ma256, climb, climb 609 1.1 mrg add r000, s000, s000 C accum rlimb 0 610 1.1 mrg add,dc r064, s064, s064 C accum rlimb 1 611 1.1 mrg add,dc r128, s128, s128 C accum rlimb 2 612 1.1 mrg std s000, 0(rp) 613 1.1 mrg add,dc r192, s192, s192 C accum rlimb 3 614 1.1 mrg add,dc %r0, climb, climb 615 1.1 mrg std s064, 8(rp) 616 1.1 mrg ldd -0x78(%r30), p032a1 617 1.1 mrg std s128, 16(rp) 618 1.1 mrg ldd -0x70(%r30), p032a2 619 1.1 mrg std s192, 24(rp) 620 1.1 mrg ldd -0x38(%r30), p096b1 621 1.1 mrg ldd -0x30(%r30), p096b2 622 1.1 mrg ldd -0x58(%r30), p160c1 623 1.1 mrg ldd -0x50(%r30), p160c2 624 1.1 mrg ldd -0x18(%r30), p224d1 625 1.1 mrg ldd -0x10(%r30), p224d2 626 1.1 mrg ldo 32(rp), rp 627 1.1 mrg 628 1.1 mrg LDEF(end1) 629 1.1 mrg add p032a1, p032a2, m032 630 1.1 mrg ldd -0x80(%r30), p000a 631 1.1 mrg add,dc p096b1, p096b2, m096 632 1.1 mrg add,dc p160c1, p160c2, m160 633 1.1 mrg ldd -0x68(%r30), p064a 634 1.1 mrg add,dc p224d1, p224d2, m224 635 1.1 mrg add,dc %r0, %r0, m288 636 1.1 mrg ldd -0x40(%r30), p064b 637 1.1 mrg depd,z m032, 31, 32, ma000 638 1.1 mrg ldd -0x28(%r30), p128b 639 1.1 mrg extrd,u m032, 31, 32, ma064 640 1.1 mrg depd m096, 31, 32, ma064 641 1.1 mrg ldd -0x60(%r30), p128c 642 1.1 mrg extrd,u m096, 31, 32, ma128 643 1.1 mrg depd m160, 31, 32, ma128 644 1.1 mrg ldd -0x48(%r30), p192c 645 1.1 mrg extrd,u m160, 31, 32, ma192 646 1.1 mrg depd m224, 31, 32, ma192 647 1.1 mrg ldd -0x20(%r30), p192d 648 1.1 mrg extrd,u m224, 31, 32, ma256 649 1.1 mrg depd m288, 31, 32, ma256 650 1.1 mrg ldd -0x88(%r30), p256d 651 1.1 mrg add climb, p000a, s000 652 1.1 mrg add,dc p064a, p064b, s064 653 1.1 mrg ldd 0(rp), r000 654 1.1 mrg add,dc p128b, p128c, s128 655 1.1 mrg add,dc p192c, p192d, s192 656 1.1 mrg ldd 8(rp), r064 657 1.1 mrg add,dc p256d, %r0, climb 658 1.1 mrg ldd 16(rp), r128 659 1.1 mrg add ma000, s000, s000 C accum mid 0 660 1.1 mrg ldd 24(rp), r192 661 1.1 mrg add,dc ma064, s064, s064 C accum mid 1 662 1.1 mrg add,dc ma128, s128, s128 C accum mid 2 663 1.1 mrg add,dc ma192, s192, s192 C accum mid 3 664 1.1 mrg add,dc ma256, climb, climb 665 1.1 mrg add r000, s000, s000 C accum rlimb 0 666 1.1 mrg add,dc r064, s064, s064 C accum rlimb 1 667 1.1 mrg add,dc r128, s128, s128 C accum rlimb 2 668 1.1 mrg std s000, 0(rp) 669 1.1 mrg add,dc r192, s192, s192 C accum rlimb 3 670 1.1 mrg add,dc %r0, climb, climb 671 1.1 mrg std s064, 8(rp) 672 1.1 mrg std s128, 16(rp) 673 1.1 mrg std s192, 24(rp) 674 1.1 mrg 675 1.1 mrg ldd -0xb0(%r30), %r13 676 1.1 mrg ldd -0xb8(%r30), %r12 677 1.1 mrg ldd -0xc0(%r30), %r11 678 1.1 mrg ldd -0xc8(%r30), %r10 679 1.1 mrg ldd -0xd0(%r30), %r9 680 1.1 mrg ldd -0xd8(%r30), %r8 681 1.1 mrg ldd -0xe0(%r30), %r7 682 1.1 mrg ldd -0xe8(%r30), %r6 683 1.1 mrg LDEF(done) 684 1.1 mrg ifdef(`HAVE_ABI_2_0w', 685 1.1 mrg ` copy climb, %r28 686 1.1 mrg ',` extrd,u climb, 63, 32, %r29 687 1.1 mrg extrd,u climb, 31, 32, %r28 688 1.1 mrg ') 689 1.1 mrg ldd -0xf0(%r30), %r5 690 1.1 mrg ldd -0xf8(%r30), %r4 691 1.1 mrg bve (%r2) 692 1.1 mrg ldd,mb -0x100(%r30), %r3 693 1.1 mrg EPILOGUE(mpn_addmul_1) 694