1 1.3 christos #ifndef __ASSEMBLER__ 2 1.3 christos # define __ASSEMBLER__ 1 3 1.3 christos #endif 4 1.3 christos #include "crypto/sparc_arch.h" 5 1.1 spz 6 1.1 spz #ifdef __arch64__ 7 1.1 spz .register %g2,#scratch 8 1.1 spz .register %g3,#scratch 9 1.1 spz #endif 10 1.1 spz 11 1.1 spz .section ".text",#alloc,#execinstr 12 1.1 spz 13 1.1 spz #ifdef __PIC__ 14 1.1 spz SPARC_PIC_THUNK(%g1) 15 1.1 spz #endif 16 1.1 spz .globl bn_mul_mont_t4_8 17 1.1 spz .align 32 18 1.1 spz bn_mul_mont_t4_8: 19 1.1 spz #ifdef __arch64__ 20 1.1 spz mov 0,%g5 21 1.1 spz mov -128,%g4 22 1.1 spz #elif defined(SPARCV9_64BIT_STACK) 23 1.1 spz SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 24 1.1 spz ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 25 1.1 spz mov -2047,%g4 26 1.1 spz and %g1,SPARCV9_64BIT_STACK,%g1 27 1.1 spz movrz %g1,0,%g4 28 1.1 spz mov -1,%g5 29 1.1 spz add %g4,-128,%g4 30 1.1 spz #else 31 1.1 spz mov -1,%g5 32 1.1 spz mov -128,%g4 33 1.1 spz #endif 34 1.1 spz sllx %g5,32,%g5 35 1.1 spz save %sp,%g4,%sp 36 1.1 spz #ifndef __arch64__ 37 1.1 spz save %sp,-128,%sp ! warm it up 38 1.1 spz save %sp,-128,%sp 39 1.1 spz save %sp,-128,%sp 40 1.1 spz save %sp,-128,%sp 41 1.1 spz save %sp,-128,%sp 42 1.1 spz save %sp,-128,%sp 43 1.1 spz restore 44 1.1 spz restore 45 1.1 spz restore 46 1.1 spz restore 47 1.1 spz restore 48 1.1 spz restore 49 1.1 spz #endif 50 1.1 spz and %sp,1,%g4 51 1.1 spz or %g5,%fp,%fp 52 1.1 spz or %g4,%g5,%g5 53 1.1 spz 54 1.1 spz ! copy arguments to global registers 55 1.1 spz mov %i0,%g1 56 1.1 spz mov %i1,%g2 57 1.1 spz mov %i2,%g3 58 1.1 spz mov %i3,%g4 59 1.1 spz ld [%i4+0],%f1 ! load *n0 60 1.1 spz ld [%i4+4],%f0 61 1.1 spz .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 62 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 63 1.1 spz ld [%g2+0*8+0],%l1 64 1.1 spz ld [%g2+0*8+4],%l0 65 1.1 spz sllx %l0,32,%l0 66 1.1 spz or %l1,%l0,%l0 67 1.1 spz ld [%g2+1*8+0],%l2 68 1.1 spz ld [%g2+1*8+4],%l1 69 1.1 spz sllx %l1,32,%l1 70 1.1 spz or %l2,%l1,%l1 71 1.1 spz ld [%g2+2*8+0],%l3 72 1.1 spz ld [%g2+2*8+4],%l2 73 1.1 spz sllx %l2,32,%l2 74 1.1 spz or %l3,%l2,%l2 75 1.1 spz ld [%g2+3*8+0],%l4 76 1.1 spz ld [%g2+3*8+4],%l3 77 1.1 spz sllx %l3,32,%l3 78 1.1 spz or %l4,%l3,%l3 79 1.1 spz ld [%g2+4*8+0],%l5 80 1.1 spz ld [%g2+4*8+4],%l4 81 1.1 spz sllx %l4,32,%l4 82 1.1 spz or %l5,%l4,%l4 83 1.1 spz ld [%g2+5*8+0],%l6 84 1.1 spz ld [%g2+5*8+4],%l5 85 1.1 spz sllx %l5,32,%l5 86 1.1 spz or %l6,%l5,%l5 87 1.1 spz ld [%g2+6*8+0],%l7 88 1.1 spz ld [%g2+6*8+4],%l6 89 1.1 spz sllx %l6,32,%l6 90 1.1 spz or %l7,%l6,%l6 91 1.1 spz ld [%g2+7*8+0],%o0 92 1.1 spz ld [%g2+7*8+4],%l7 93 1.1 spz sllx %l7,32,%l7 94 1.1 spz or %o0,%l7,%l7 95 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 96 1.1 spz ld [%g4+0*8+0],%l1 97 1.1 spz ld [%g4+0*8+4],%l0 98 1.1 spz sllx %l0,32,%l0 99 1.1 spz or %l1,%l0,%l0 100 1.1 spz ld [%g4+1*8+0],%l2 101 1.1 spz ld [%g4+1*8+4],%l1 102 1.1 spz sllx %l1,32,%l1 103 1.1 spz or %l2,%l1,%l1 104 1.1 spz ld [%g4+2*8+0],%l3 105 1.1 spz ld [%g4+2*8+4],%l2 106 1.1 spz sllx %l2,32,%l2 107 1.1 spz or %l3,%l2,%l2 108 1.1 spz ld [%g4+3*8+0],%l4 109 1.1 spz ld [%g4+3*8+4],%l3 110 1.1 spz sllx %l3,32,%l3 111 1.1 spz or %l4,%l3,%l3 112 1.1 spz ld [%g4+4*8+0],%l5 113 1.1 spz ld [%g4+4*8+4],%l4 114 1.1 spz sllx %l4,32,%l4 115 1.1 spz or %l5,%l4,%l4 116 1.1 spz ld [%g4+5*8+0],%l6 117 1.1 spz ld [%g4+5*8+4],%l5 118 1.1 spz sllx %l5,32,%l5 119 1.1 spz or %l6,%l5,%l5 120 1.1 spz ld [%g4+6*8+0],%l7 121 1.1 spz ld [%g4+6*8+4],%l6 122 1.1 spz sllx %l6,32,%l6 123 1.1 spz or %l7,%l6,%l6 124 1.1 spz ld [%g4+7*8+0],%o0 125 1.1 spz ld [%g4+7*8+4],%l7 126 1.1 spz sllx %l7,32,%l7 127 1.1 spz or %o0,%l7,%l7 128 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 129 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 130 1.1 spz cmp %g2,%g3 131 1.1 spz be SIZE_T_CC,.Lmsquare_8 132 1.1 spz nop 133 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 134 1.1 spz ld [%g3+0*8+0],%i1 135 1.1 spz ld [%g3+0*8+4],%i0 136 1.1 spz sllx %i0,32,%i0 137 1.1 spz or %i1,%i0,%i0 138 1.1 spz ld [%g3+1*8+0],%i2 139 1.1 spz ld [%g3+1*8+4],%i1 140 1.1 spz sllx %i1,32,%i1 141 1.1 spz or %i2,%i1,%i1 142 1.1 spz ld [%g3+2*8+0],%i3 143 1.1 spz ld [%g3+2*8+4],%i2 144 1.1 spz sllx %i2,32,%i2 145 1.1 spz or %i3,%i2,%i2 146 1.1 spz ld [%g3+3*8+0],%i4 147 1.1 spz ld [%g3+3*8+4],%i3 148 1.1 spz sllx %i3,32,%i3 149 1.1 spz or %i4,%i3,%i3 150 1.1 spz ld [%g3+4*8+0],%i5 151 1.1 spz ld [%g3+4*8+4],%i4 152 1.1 spz sllx %i4,32,%i4 153 1.1 spz or %i5,%i4,%i4 154 1.1 spz ld [%g3+5*8+0],%l0 155 1.1 spz ld [%g3+5*8+4],%i5 156 1.1 spz sllx %i5,32,%i5 157 1.1 spz or %l0,%i5,%i5 158 1.1 spz ld [%g3+6*8+0],%l1 159 1.1 spz ld [%g3+6*8+4],%l0 160 1.1 spz sllx %l0,32,%l0 161 1.1 spz or %l1,%l0,%l0 162 1.1 spz ld [%g3+7*8+0],%l2 163 1.1 spz ld [%g3+7*8+4],%l1 164 1.1 spz sllx %l1,32,%l1 165 1.1 spz or %l2,%l1,%l1 166 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 167 1.1 spz .word 0x81b02920+8-1 ! montmul 8-1 168 1.1 spz .Lmresume_8: 169 1.1 spz fbu,pn %fcc3,.Lmabort_8 170 1.1 spz #ifndef __arch64__ 171 1.1 spz and %fp,%g5,%g5 172 1.1 spz brz,pn %g5,.Lmabort_8 173 1.1 spz #endif 174 1.1 spz nop 175 1.1 spz #ifdef __arch64__ 176 1.1 spz restore 177 1.1 spz restore 178 1.1 spz restore 179 1.1 spz restore 180 1.1 spz restore 181 1.1 spz #else 182 1.1 spz restore; and %fp,%g5,%g5 183 1.1 spz restore; and %fp,%g5,%g5 184 1.1 spz restore; and %fp,%g5,%g5 185 1.1 spz restore; and %fp,%g5,%g5 186 1.1 spz brz,pn %g5,.Lmabort1_8 187 1.1 spz restore 188 1.1 spz #endif 189 1.1 spz .word 0x81b02310 !movxtod %l0,%f0 190 1.1 spz .word 0x85b02311 !movxtod %l1,%f2 191 1.1 spz .word 0x89b02312 !movxtod %l2,%f4 192 1.1 spz .word 0x8db02313 !movxtod %l3,%f6 193 1.1 spz .word 0x91b02314 !movxtod %l4,%f8 194 1.1 spz .word 0x95b02315 !movxtod %l5,%f10 195 1.1 spz .word 0x99b02316 !movxtod %l6,%f12 196 1.1 spz .word 0x9db02317 !movxtod %l7,%f14 197 1.1 spz #ifdef __arch64__ 198 1.1 spz restore 199 1.1 spz #else 200 1.1 spz and %fp,%g5,%g5 201 1.1 spz restore 202 1.1 spz and %g5,1,%o7 203 1.1 spz and %fp,%g5,%g5 204 1.1 spz srl %fp,0,%fp ! just in case? 205 1.1 spz or %o7,%g5,%g5 206 1.1 spz brz,a,pn %g5,.Lmdone_8 207 1.1 spz mov 0,%i0 ! return failure 208 1.1 spz #endif 209 1.1 spz st %f1,[%g1+0*8+0] 210 1.1 spz st %f0,[%g1+0*8+4] 211 1.1 spz st %f3,[%g1+1*8+0] 212 1.1 spz st %f2,[%g1+1*8+4] 213 1.1 spz st %f5,[%g1+2*8+0] 214 1.1 spz st %f4,[%g1+2*8+4] 215 1.1 spz st %f7,[%g1+3*8+0] 216 1.1 spz st %f6,[%g1+3*8+4] 217 1.1 spz st %f9,[%g1+4*8+0] 218 1.1 spz st %f8,[%g1+4*8+4] 219 1.1 spz st %f11,[%g1+5*8+0] 220 1.1 spz st %f10,[%g1+5*8+4] 221 1.1 spz st %f13,[%g1+6*8+0] 222 1.1 spz st %f12,[%g1+6*8+4] 223 1.1 spz st %f15,[%g1+7*8+0] 224 1.1 spz st %f14,[%g1+7*8+4] 225 1.1 spz mov 1,%i0 ! return success 226 1.1 spz .Lmdone_8: 227 1.1 spz ret 228 1.1 spz restore 229 1.1 spz 230 1.1 spz .Lmabort_8: 231 1.1 spz restore 232 1.1 spz restore 233 1.1 spz restore 234 1.1 spz restore 235 1.1 spz restore 236 1.1 spz .Lmabort1_8: 237 1.1 spz restore 238 1.1 spz 239 1.1 spz mov 0,%i0 ! return failure 240 1.1 spz ret 241 1.1 spz restore 242 1.1 spz 243 1.1 spz .align 32 244 1.1 spz .Lmsquare_8: 245 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 246 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 247 1.1 spz .word 0x81b02940+8-1 ! montsqr 8-1 248 1.1 spz ba .Lmresume_8 249 1.1 spz nop 250 1.1 spz .type bn_mul_mont_t4_8, #function 251 1.1 spz .size bn_mul_mont_t4_8, .-bn_mul_mont_t4_8 252 1.1 spz .globl bn_mul_mont_t4_16 253 1.1 spz .align 32 254 1.1 spz bn_mul_mont_t4_16: 255 1.1 spz #ifdef __arch64__ 256 1.1 spz mov 0,%g5 257 1.1 spz mov -128,%g4 258 1.1 spz #elif defined(SPARCV9_64BIT_STACK) 259 1.1 spz SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 260 1.1 spz ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 261 1.1 spz mov -2047,%g4 262 1.1 spz and %g1,SPARCV9_64BIT_STACK,%g1 263 1.1 spz movrz %g1,0,%g4 264 1.1 spz mov -1,%g5 265 1.1 spz add %g4,-128,%g4 266 1.1 spz #else 267 1.1 spz mov -1,%g5 268 1.1 spz mov -128,%g4 269 1.1 spz #endif 270 1.1 spz sllx %g5,32,%g5 271 1.1 spz save %sp,%g4,%sp 272 1.1 spz #ifndef __arch64__ 273 1.1 spz save %sp,-128,%sp ! warm it up 274 1.1 spz save %sp,-128,%sp 275 1.1 spz save %sp,-128,%sp 276 1.1 spz save %sp,-128,%sp 277 1.1 spz save %sp,-128,%sp 278 1.1 spz save %sp,-128,%sp 279 1.1 spz restore 280 1.1 spz restore 281 1.1 spz restore 282 1.1 spz restore 283 1.1 spz restore 284 1.1 spz restore 285 1.1 spz #endif 286 1.1 spz and %sp,1,%g4 287 1.1 spz or %g5,%fp,%fp 288 1.1 spz or %g4,%g5,%g5 289 1.1 spz 290 1.1 spz ! copy arguments to global registers 291 1.1 spz mov %i0,%g1 292 1.1 spz mov %i1,%g2 293 1.1 spz mov %i2,%g3 294 1.1 spz mov %i3,%g4 295 1.1 spz ld [%i4+0],%f1 ! load *n0 296 1.1 spz ld [%i4+4],%f0 297 1.1 spz .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 298 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 299 1.1 spz ld [%g2+0*8+0],%l1 300 1.1 spz ld [%g2+0*8+4],%l0 301 1.1 spz sllx %l0,32,%l0 302 1.1 spz or %l1,%l0,%l0 303 1.1 spz ld [%g2+1*8+0],%l2 304 1.1 spz ld [%g2+1*8+4],%l1 305 1.1 spz sllx %l1,32,%l1 306 1.1 spz or %l2,%l1,%l1 307 1.1 spz ld [%g2+2*8+0],%l3 308 1.1 spz ld [%g2+2*8+4],%l2 309 1.1 spz sllx %l2,32,%l2 310 1.1 spz or %l3,%l2,%l2 311 1.1 spz ld [%g2+3*8+0],%l4 312 1.1 spz ld [%g2+3*8+4],%l3 313 1.1 spz sllx %l3,32,%l3 314 1.1 spz or %l4,%l3,%l3 315 1.1 spz ld [%g2+4*8+0],%l5 316 1.1 spz ld [%g2+4*8+4],%l4 317 1.1 spz sllx %l4,32,%l4 318 1.1 spz or %l5,%l4,%l4 319 1.1 spz ld [%g2+5*8+0],%l6 320 1.1 spz ld [%g2+5*8+4],%l5 321 1.1 spz sllx %l5,32,%l5 322 1.1 spz or %l6,%l5,%l5 323 1.1 spz ld [%g2+6*8+0],%l7 324 1.1 spz ld [%g2+6*8+4],%l6 325 1.1 spz sllx %l6,32,%l6 326 1.1 spz or %l7,%l6,%l6 327 1.1 spz ld [%g2+7*8+0],%o0 328 1.1 spz ld [%g2+7*8+4],%l7 329 1.1 spz sllx %l7,32,%l7 330 1.1 spz or %o0,%l7,%l7 331 1.1 spz ld [%g2+8*8+0],%o1 332 1.1 spz ld [%g2+8*8+4],%o0 333 1.1 spz sllx %o0,32,%o0 334 1.1 spz or %o1,%o0,%o0 335 1.1 spz ld [%g2+9*8+0],%o2 336 1.1 spz ld [%g2+9*8+4],%o1 337 1.1 spz sllx %o1,32,%o1 338 1.1 spz or %o2,%o1,%o1 339 1.1 spz ld [%g2+10*8+0],%o3 340 1.1 spz ld [%g2+10*8+4],%o2 341 1.1 spz sllx %o2,32,%o2 342 1.1 spz or %o3,%o2,%o2 343 1.1 spz ld [%g2+11*8+0],%o4 344 1.1 spz ld [%g2+11*8+4],%o3 345 1.1 spz sllx %o3,32,%o3 346 1.1 spz or %o4,%o3,%o3 347 1.1 spz ld [%g2+12*8+0],%o5 348 1.1 spz ld [%g2+12*8+4],%o4 349 1.1 spz sllx %o4,32,%o4 350 1.1 spz or %o5,%o4,%o4 351 1.1 spz ld [%g2+13*8+0],%o7 352 1.1 spz ld [%g2+13*8+4],%o5 353 1.1 spz sllx %o5,32,%o5 354 1.1 spz or %o7,%o5,%o5 355 1.1 spz ld [%g2+14*8+0],%f5 356 1.1 spz ld [%g2+14*8+4],%f4 357 1.1 spz .word 0xb1b00f04 !fsrc2 %f0,%f4,%f24 358 1.1 spz ld [%g2+15*8+0],%f7 359 1.1 spz ld [%g2+15*8+4],%f6 360 1.1 spz .word 0xb5b00f06 !fsrc2 %f0,%f6,%f26 361 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 362 1.1 spz ld [%g4+0*8+0],%l1 363 1.1 spz ld [%g4+0*8+4],%l0 364 1.1 spz sllx %l0,32,%l0 365 1.1 spz or %l1,%l0,%l0 366 1.1 spz ld [%g4+1*8+0],%l2 367 1.1 spz ld [%g4+1*8+4],%l1 368 1.1 spz sllx %l1,32,%l1 369 1.1 spz or %l2,%l1,%l1 370 1.1 spz ld [%g4+2*8+0],%l3 371 1.1 spz ld [%g4+2*8+4],%l2 372 1.1 spz sllx %l2,32,%l2 373 1.1 spz or %l3,%l2,%l2 374 1.1 spz ld [%g4+3*8+0],%l4 375 1.1 spz ld [%g4+3*8+4],%l3 376 1.1 spz sllx %l3,32,%l3 377 1.1 spz or %l4,%l3,%l3 378 1.1 spz ld [%g4+4*8+0],%l5 379 1.1 spz ld [%g4+4*8+4],%l4 380 1.1 spz sllx %l4,32,%l4 381 1.1 spz or %l5,%l4,%l4 382 1.1 spz ld [%g4+5*8+0],%l6 383 1.1 spz ld [%g4+5*8+4],%l5 384 1.1 spz sllx %l5,32,%l5 385 1.1 spz or %l6,%l5,%l5 386 1.1 spz ld [%g4+6*8+0],%l7 387 1.1 spz ld [%g4+6*8+4],%l6 388 1.1 spz sllx %l6,32,%l6 389 1.1 spz or %l7,%l6,%l6 390 1.1 spz ld [%g4+7*8+0],%o0 391 1.1 spz ld [%g4+7*8+4],%l7 392 1.1 spz sllx %l7,32,%l7 393 1.1 spz or %o0,%l7,%l7 394 1.1 spz ld [%g4+8*8+0],%o1 395 1.1 spz ld [%g4+8*8+4],%o0 396 1.1 spz sllx %o0,32,%o0 397 1.1 spz or %o1,%o0,%o0 398 1.1 spz ld [%g4+9*8+0],%o2 399 1.1 spz ld [%g4+9*8+4],%o1 400 1.1 spz sllx %o1,32,%o1 401 1.1 spz or %o2,%o1,%o1 402 1.1 spz ld [%g4+10*8+0],%o3 403 1.1 spz ld [%g4+10*8+4],%o2 404 1.1 spz sllx %o2,32,%o2 405 1.1 spz or %o3,%o2,%o2 406 1.1 spz ld [%g4+11*8+0],%o4 407 1.1 spz ld [%g4+11*8+4],%o3 408 1.1 spz sllx %o3,32,%o3 409 1.1 spz or %o4,%o3,%o3 410 1.1 spz ld [%g4+12*8+0],%o5 411 1.1 spz ld [%g4+12*8+4],%o4 412 1.1 spz sllx %o4,32,%o4 413 1.1 spz or %o5,%o4,%o4 414 1.1 spz ld [%g4+13*8+0],%o7 415 1.1 spz ld [%g4+13*8+4],%o5 416 1.1 spz sllx %o5,32,%o5 417 1.1 spz or %o7,%o5,%o5 418 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 419 1.1 spz ld [%g4+14*8+0],%l1 420 1.1 spz ld [%g4+14*8+4],%l0 421 1.1 spz sllx %l0,32,%l0 422 1.1 spz or %l1,%l0,%l0 423 1.1 spz ld [%g4+15*8+0],%l2 424 1.1 spz ld [%g4+15*8+4],%l1 425 1.1 spz sllx %l1,32,%l1 426 1.1 spz or %l2,%l1,%l1 427 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 428 1.1 spz cmp %g2,%g3 429 1.1 spz be SIZE_T_CC,.Lmsquare_16 430 1.1 spz nop 431 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 432 1.1 spz ld [%g3+0*8+0],%i1 433 1.1 spz ld [%g3+0*8+4],%i0 434 1.1 spz sllx %i0,32,%i0 435 1.1 spz or %i1,%i0,%i0 436 1.1 spz ld [%g3+1*8+0],%i2 437 1.1 spz ld [%g3+1*8+4],%i1 438 1.1 spz sllx %i1,32,%i1 439 1.1 spz or %i2,%i1,%i1 440 1.1 spz ld [%g3+2*8+0],%i3 441 1.1 spz ld [%g3+2*8+4],%i2 442 1.1 spz sllx %i2,32,%i2 443 1.1 spz or %i3,%i2,%i2 444 1.1 spz ld [%g3+3*8+0],%i4 445 1.1 spz ld [%g3+3*8+4],%i3 446 1.1 spz sllx %i3,32,%i3 447 1.1 spz or %i4,%i3,%i3 448 1.1 spz ld [%g3+4*8+0],%i5 449 1.1 spz ld [%g3+4*8+4],%i4 450 1.1 spz sllx %i4,32,%i4 451 1.1 spz or %i5,%i4,%i4 452 1.1 spz ld [%g3+5*8+0],%l0 453 1.1 spz ld [%g3+5*8+4],%i5 454 1.1 spz sllx %i5,32,%i5 455 1.1 spz or %l0,%i5,%i5 456 1.1 spz ld [%g3+6*8+0],%l1 457 1.1 spz ld [%g3+6*8+4],%l0 458 1.1 spz sllx %l0,32,%l0 459 1.1 spz or %l1,%l0,%l0 460 1.1 spz ld [%g3+7*8+0],%l2 461 1.1 spz ld [%g3+7*8+4],%l1 462 1.1 spz sllx %l1,32,%l1 463 1.1 spz or %l2,%l1,%l1 464 1.1 spz ld [%g3+8*8+0],%l3 465 1.1 spz ld [%g3+8*8+4],%l2 466 1.1 spz sllx %l2,32,%l2 467 1.1 spz or %l3,%l2,%l2 468 1.1 spz ld [%g3+9*8+0],%l4 469 1.1 spz ld [%g3+9*8+4],%l3 470 1.1 spz sllx %l3,32,%l3 471 1.1 spz or %l4,%l3,%l3 472 1.1 spz ld [%g3+10*8+0],%l5 473 1.1 spz ld [%g3+10*8+4],%l4 474 1.1 spz sllx %l4,32,%l4 475 1.1 spz or %l5,%l4,%l4 476 1.1 spz ld [%g3+11*8+0],%l6 477 1.1 spz ld [%g3+11*8+4],%l5 478 1.1 spz sllx %l5,32,%l5 479 1.1 spz or %l6,%l5,%l5 480 1.1 spz ld [%g3+12*8+0],%l7 481 1.1 spz ld [%g3+12*8+4],%l6 482 1.1 spz sllx %l6,32,%l6 483 1.1 spz or %l7,%l6,%l6 484 1.1 spz ld [%g3+13*8+0],%o7 485 1.1 spz ld [%g3+13*8+4],%l7 486 1.1 spz sllx %l7,32,%l7 487 1.1 spz or %o7,%l7,%l7 488 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 489 1.1 spz ld [%g3+14*8+0],%i1 490 1.1 spz ld [%g3+14*8+4],%i0 491 1.1 spz sllx %i0,32,%i0 492 1.1 spz or %i1,%i0,%i0 493 1.1 spz ld [%g3+15*8+0],%o7 494 1.1 spz ld [%g3+15*8+4],%i1 495 1.1 spz sllx %i1,32,%i1 496 1.1 spz or %o7,%i1,%i1 497 1.1 spz .word 0x81b02920+16-1 ! montmul 16-1 498 1.1 spz .Lmresume_16: 499 1.1 spz fbu,pn %fcc3,.Lmabort_16 500 1.1 spz #ifndef __arch64__ 501 1.1 spz and %fp,%g5,%g5 502 1.1 spz brz,pn %g5,.Lmabort_16 503 1.1 spz #endif 504 1.1 spz nop 505 1.1 spz #ifdef __arch64__ 506 1.1 spz restore 507 1.1 spz restore 508 1.1 spz restore 509 1.1 spz restore 510 1.1 spz restore 511 1.1 spz #else 512 1.1 spz restore; and %fp,%g5,%g5 513 1.1 spz restore; and %fp,%g5,%g5 514 1.1 spz restore; and %fp,%g5,%g5 515 1.1 spz restore; and %fp,%g5,%g5 516 1.1 spz brz,pn %g5,.Lmabort1_16 517 1.1 spz restore 518 1.1 spz #endif 519 1.1 spz .word 0x81b02310 !movxtod %l0,%f0 520 1.1 spz .word 0x85b02311 !movxtod %l1,%f2 521 1.1 spz .word 0x89b02312 !movxtod %l2,%f4 522 1.1 spz .word 0x8db02313 !movxtod %l3,%f6 523 1.1 spz .word 0x91b02314 !movxtod %l4,%f8 524 1.1 spz .word 0x95b02315 !movxtod %l5,%f10 525 1.1 spz .word 0x99b02316 !movxtod %l6,%f12 526 1.1 spz .word 0x9db02317 !movxtod %l7,%f14 527 1.1 spz .word 0xa1b02308 !movxtod %o0,%f16 528 1.1 spz .word 0xa5b02309 !movxtod %o1,%f18 529 1.1 spz .word 0xa9b0230a !movxtod %o2,%f20 530 1.1 spz .word 0xadb0230b !movxtod %o3,%f22 531 1.1 spz .word 0xbbb0230c !movxtod %o4,%f60 532 1.1 spz .word 0xbfb0230d !movxtod %o5,%f62 533 1.1 spz #ifdef __arch64__ 534 1.1 spz restore 535 1.1 spz #else 536 1.1 spz and %fp,%g5,%g5 537 1.1 spz restore 538 1.1 spz and %g5,1,%o7 539 1.1 spz and %fp,%g5,%g5 540 1.1 spz srl %fp,0,%fp ! just in case? 541 1.1 spz or %o7,%g5,%g5 542 1.1 spz brz,a,pn %g5,.Lmdone_16 543 1.1 spz mov 0,%i0 ! return failure 544 1.1 spz #endif 545 1.1 spz st %f1,[%g1+0*8+0] 546 1.1 spz st %f0,[%g1+0*8+4] 547 1.1 spz st %f3,[%g1+1*8+0] 548 1.1 spz st %f2,[%g1+1*8+4] 549 1.1 spz st %f5,[%g1+2*8+0] 550 1.1 spz st %f4,[%g1+2*8+4] 551 1.1 spz st %f7,[%g1+3*8+0] 552 1.1 spz st %f6,[%g1+3*8+4] 553 1.1 spz st %f9,[%g1+4*8+0] 554 1.1 spz st %f8,[%g1+4*8+4] 555 1.1 spz st %f11,[%g1+5*8+0] 556 1.1 spz st %f10,[%g1+5*8+4] 557 1.1 spz st %f13,[%g1+6*8+0] 558 1.1 spz st %f12,[%g1+6*8+4] 559 1.1 spz st %f15,[%g1+7*8+0] 560 1.1 spz st %f14,[%g1+7*8+4] 561 1.1 spz st %f17,[%g1+8*8+0] 562 1.1 spz st %f16,[%g1+8*8+4] 563 1.1 spz st %f19,[%g1+9*8+0] 564 1.1 spz st %f18,[%g1+9*8+4] 565 1.1 spz st %f21,[%g1+10*8+0] 566 1.1 spz st %f20,[%g1+10*8+4] 567 1.1 spz st %f23,[%g1+11*8+0] 568 1.1 spz st %f22,[%g1+11*8+4] 569 1.1 spz .word 0x81b00f1d !fsrc2 %f0,%f60,%f0 570 1.1 spz st %f1,[%g1+12*8+0] 571 1.1 spz st %f0,[%g1+12*8+4] 572 1.1 spz .word 0x85b00f1f !fsrc2 %f0,%f62,%f2 573 1.1 spz st %f3,[%g1+13*8+0] 574 1.1 spz st %f2,[%g1+13*8+4] 575 1.1 spz .word 0x89b00f18 !fsrc2 %f0,%f24,%f4 576 1.1 spz st %f5,[%g1+14*8+0] 577 1.1 spz st %f4,[%g1+14*8+4] 578 1.1 spz .word 0x8db00f1a !fsrc2 %f0,%f26,%f6 579 1.1 spz st %f7,[%g1+15*8+0] 580 1.1 spz st %f6,[%g1+15*8+4] 581 1.1 spz mov 1,%i0 ! return success 582 1.1 spz .Lmdone_16: 583 1.1 spz ret 584 1.1 spz restore 585 1.1 spz 586 1.1 spz .Lmabort_16: 587 1.1 spz restore 588 1.1 spz restore 589 1.1 spz restore 590 1.1 spz restore 591 1.1 spz restore 592 1.1 spz .Lmabort1_16: 593 1.1 spz restore 594 1.1 spz 595 1.1 spz mov 0,%i0 ! return failure 596 1.1 spz ret 597 1.1 spz restore 598 1.1 spz 599 1.1 spz .align 32 600 1.1 spz .Lmsquare_16: 601 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 602 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 603 1.1 spz .word 0x81b02940+16-1 ! montsqr 16-1 604 1.1 spz ba .Lmresume_16 605 1.1 spz nop 606 1.1 spz .type bn_mul_mont_t4_16, #function 607 1.1 spz .size bn_mul_mont_t4_16, .-bn_mul_mont_t4_16 608 1.1 spz .globl bn_mul_mont_t4_24 609 1.1 spz .align 32 610 1.1 spz bn_mul_mont_t4_24: 611 1.1 spz #ifdef __arch64__ 612 1.1 spz mov 0,%g5 613 1.1 spz mov -128,%g4 614 1.1 spz #elif defined(SPARCV9_64BIT_STACK) 615 1.1 spz SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 616 1.1 spz ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 617 1.1 spz mov -2047,%g4 618 1.1 spz and %g1,SPARCV9_64BIT_STACK,%g1 619 1.1 spz movrz %g1,0,%g4 620 1.1 spz mov -1,%g5 621 1.1 spz add %g4,-128,%g4 622 1.1 spz #else 623 1.1 spz mov -1,%g5 624 1.1 spz mov -128,%g4 625 1.1 spz #endif 626 1.1 spz sllx %g5,32,%g5 627 1.1 spz save %sp,%g4,%sp 628 1.1 spz #ifndef __arch64__ 629 1.1 spz save %sp,-128,%sp ! warm it up 630 1.1 spz save %sp,-128,%sp 631 1.1 spz save %sp,-128,%sp 632 1.1 spz save %sp,-128,%sp 633 1.1 spz save %sp,-128,%sp 634 1.1 spz save %sp,-128,%sp 635 1.1 spz restore 636 1.1 spz restore 637 1.1 spz restore 638 1.1 spz restore 639 1.1 spz restore 640 1.1 spz restore 641 1.1 spz #endif 642 1.1 spz and %sp,1,%g4 643 1.1 spz or %g5,%fp,%fp 644 1.1 spz or %g4,%g5,%g5 645 1.1 spz 646 1.1 spz ! copy arguments to global registers 647 1.1 spz mov %i0,%g1 648 1.1 spz mov %i1,%g2 649 1.1 spz mov %i2,%g3 650 1.1 spz mov %i3,%g4 651 1.1 spz ld [%i4+0],%f1 ! load *n0 652 1.1 spz ld [%i4+4],%f0 653 1.1 spz .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 654 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 655 1.1 spz ld [%g2+0*8+0],%l1 656 1.1 spz ld [%g2+0*8+4],%l0 657 1.1 spz sllx %l0,32,%l0 658 1.1 spz or %l1,%l0,%l0 659 1.1 spz ld [%g2+1*8+0],%l2 660 1.1 spz ld [%g2+1*8+4],%l1 661 1.1 spz sllx %l1,32,%l1 662 1.1 spz or %l2,%l1,%l1 663 1.1 spz ld [%g2+2*8+0],%l3 664 1.1 spz ld [%g2+2*8+4],%l2 665 1.1 spz sllx %l2,32,%l2 666 1.1 spz or %l3,%l2,%l2 667 1.1 spz ld [%g2+3*8+0],%l4 668 1.1 spz ld [%g2+3*8+4],%l3 669 1.1 spz sllx %l3,32,%l3 670 1.1 spz or %l4,%l3,%l3 671 1.1 spz ld [%g2+4*8+0],%l5 672 1.1 spz ld [%g2+4*8+4],%l4 673 1.1 spz sllx %l4,32,%l4 674 1.1 spz or %l5,%l4,%l4 675 1.1 spz ld [%g2+5*8+0],%l6 676 1.1 spz ld [%g2+5*8+4],%l5 677 1.1 spz sllx %l5,32,%l5 678 1.1 spz or %l6,%l5,%l5 679 1.1 spz ld [%g2+6*8+0],%l7 680 1.1 spz ld [%g2+6*8+4],%l6 681 1.1 spz sllx %l6,32,%l6 682 1.1 spz or %l7,%l6,%l6 683 1.1 spz ld [%g2+7*8+0],%o0 684 1.1 spz ld [%g2+7*8+4],%l7 685 1.1 spz sllx %l7,32,%l7 686 1.1 spz or %o0,%l7,%l7 687 1.1 spz ld [%g2+8*8+0],%o1 688 1.1 spz ld [%g2+8*8+4],%o0 689 1.1 spz sllx %o0,32,%o0 690 1.1 spz or %o1,%o0,%o0 691 1.1 spz ld [%g2+9*8+0],%o2 692 1.1 spz ld [%g2+9*8+4],%o1 693 1.1 spz sllx %o1,32,%o1 694 1.1 spz or %o2,%o1,%o1 695 1.1 spz ld [%g2+10*8+0],%o3 696 1.1 spz ld [%g2+10*8+4],%o2 697 1.1 spz sllx %o2,32,%o2 698 1.1 spz or %o3,%o2,%o2 699 1.1 spz ld [%g2+11*8+0],%o4 700 1.1 spz ld [%g2+11*8+4],%o3 701 1.1 spz sllx %o3,32,%o3 702 1.1 spz or %o4,%o3,%o3 703 1.1 spz ld [%g2+12*8+0],%o5 704 1.1 spz ld [%g2+12*8+4],%o4 705 1.1 spz sllx %o4,32,%o4 706 1.1 spz or %o5,%o4,%o4 707 1.1 spz ld [%g2+13*8+0],%o7 708 1.1 spz ld [%g2+13*8+4],%o5 709 1.1 spz sllx %o5,32,%o5 710 1.1 spz or %o7,%o5,%o5 711 1.1 spz ld [%g2+14*8+0],%f5 712 1.1 spz ld [%g2+14*8+4],%f4 713 1.1 spz .word 0xb1b00f04 !fsrc2 %f0,%f4,%f24 714 1.1 spz ld [%g2+15*8+0],%f7 715 1.1 spz ld [%g2+15*8+4],%f6 716 1.1 spz .word 0xb5b00f06 !fsrc2 %f0,%f6,%f26 717 1.1 spz ld [%g2+16*8+0],%f1 718 1.1 spz ld [%g2+16*8+4],%f0 719 1.1 spz .word 0xb9b00f00 !fsrc2 %f0,%f0,%f28 720 1.1 spz ld [%g2+17*8+0],%f3 721 1.1 spz ld [%g2+17*8+4],%f2 722 1.1 spz .word 0xbdb00f02 !fsrc2 %f0,%f2,%f30 723 1.1 spz ld [%g2+18*8+0],%f5 724 1.1 spz ld [%g2+18*8+4],%f4 725 1.1 spz .word 0x83b00f04 !fsrc2 %f0,%f4,%f32 726 1.1 spz ld [%g2+19*8+0],%f7 727 1.1 spz ld [%g2+19*8+4],%f6 728 1.1 spz .word 0x87b00f06 !fsrc2 %f0,%f6,%f34 729 1.1 spz ld [%g2+20*8+0],%f1 730 1.1 spz ld [%g2+20*8+4],%f0 731 1.1 spz .word 0x8bb00f00 !fsrc2 %f0,%f0,%f36 732 1.1 spz ld [%g2+21*8+0],%f3 733 1.1 spz ld [%g2+21*8+4],%f2 734 1.1 spz .word 0x8fb00f02 !fsrc2 %f0,%f2,%f38 735 1.1 spz ld [%g2+22*8+0],%f5 736 1.1 spz ld [%g2+22*8+4],%f4 737 1.1 spz .word 0x93b00f04 !fsrc2 %f0,%f4,%f40 738 1.1 spz ld [%g2+23*8+0],%f7 739 1.1 spz ld [%g2+23*8+4],%f6 740 1.1 spz .word 0x97b00f06 !fsrc2 %f0,%f6,%f42 741 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 742 1.1 spz ld [%g4+0*8+0],%l1 743 1.1 spz ld [%g4+0*8+4],%l0 744 1.1 spz sllx %l0,32,%l0 745 1.1 spz or %l1,%l0,%l0 746 1.1 spz ld [%g4+1*8+0],%l2 747 1.1 spz ld [%g4+1*8+4],%l1 748 1.1 spz sllx %l1,32,%l1 749 1.1 spz or %l2,%l1,%l1 750 1.1 spz ld [%g4+2*8+0],%l3 751 1.1 spz ld [%g4+2*8+4],%l2 752 1.1 spz sllx %l2,32,%l2 753 1.1 spz or %l3,%l2,%l2 754 1.1 spz ld [%g4+3*8+0],%l4 755 1.1 spz ld [%g4+3*8+4],%l3 756 1.1 spz sllx %l3,32,%l3 757 1.1 spz or %l4,%l3,%l3 758 1.1 spz ld [%g4+4*8+0],%l5 759 1.1 spz ld [%g4+4*8+4],%l4 760 1.1 spz sllx %l4,32,%l4 761 1.1 spz or %l5,%l4,%l4 762 1.1 spz ld [%g4+5*8+0],%l6 763 1.1 spz ld [%g4+5*8+4],%l5 764 1.1 spz sllx %l5,32,%l5 765 1.1 spz or %l6,%l5,%l5 766 1.1 spz ld [%g4+6*8+0],%l7 767 1.1 spz ld [%g4+6*8+4],%l6 768 1.1 spz sllx %l6,32,%l6 769 1.1 spz or %l7,%l6,%l6 770 1.1 spz ld [%g4+7*8+0],%o0 771 1.1 spz ld [%g4+7*8+4],%l7 772 1.1 spz sllx %l7,32,%l7 773 1.1 spz or %o0,%l7,%l7 774 1.1 spz ld [%g4+8*8+0],%o1 775 1.1 spz ld [%g4+8*8+4],%o0 776 1.1 spz sllx %o0,32,%o0 777 1.1 spz or %o1,%o0,%o0 778 1.1 spz ld [%g4+9*8+0],%o2 779 1.1 spz ld [%g4+9*8+4],%o1 780 1.1 spz sllx %o1,32,%o1 781 1.1 spz or %o2,%o1,%o1 782 1.1 spz ld [%g4+10*8+0],%o3 783 1.1 spz ld [%g4+10*8+4],%o2 784 1.1 spz sllx %o2,32,%o2 785 1.1 spz or %o3,%o2,%o2 786 1.1 spz ld [%g4+11*8+0],%o4 787 1.1 spz ld [%g4+11*8+4],%o3 788 1.1 spz sllx %o3,32,%o3 789 1.1 spz or %o4,%o3,%o3 790 1.1 spz ld [%g4+12*8+0],%o5 791 1.1 spz ld [%g4+12*8+4],%o4 792 1.1 spz sllx %o4,32,%o4 793 1.1 spz or %o5,%o4,%o4 794 1.1 spz ld [%g4+13*8+0],%o7 795 1.1 spz ld [%g4+13*8+4],%o5 796 1.1 spz sllx %o5,32,%o5 797 1.1 spz or %o7,%o5,%o5 798 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 799 1.1 spz ld [%g4+14*8+0],%l1 800 1.1 spz ld [%g4+14*8+4],%l0 801 1.1 spz sllx %l0,32,%l0 802 1.1 spz or %l1,%l0,%l0 803 1.1 spz ld [%g4+15*8+0],%l2 804 1.1 spz ld [%g4+15*8+4],%l1 805 1.1 spz sllx %l1,32,%l1 806 1.1 spz or %l2,%l1,%l1 807 1.1 spz ld [%g4+16*8+0],%l3 808 1.1 spz ld [%g4+16*8+4],%l2 809 1.1 spz sllx %l2,32,%l2 810 1.1 spz or %l3,%l2,%l2 811 1.1 spz ld [%g4+17*8+0],%l4 812 1.1 spz ld [%g4+17*8+4],%l3 813 1.1 spz sllx %l3,32,%l3 814 1.1 spz or %l4,%l3,%l3 815 1.1 spz ld [%g4+18*8+0],%l5 816 1.1 spz ld [%g4+18*8+4],%l4 817 1.1 spz sllx %l4,32,%l4 818 1.1 spz or %l5,%l4,%l4 819 1.1 spz ld [%g4+19*8+0],%l6 820 1.1 spz ld [%g4+19*8+4],%l5 821 1.1 spz sllx %l5,32,%l5 822 1.1 spz or %l6,%l5,%l5 823 1.1 spz ld [%g4+20*8+0],%l7 824 1.1 spz ld [%g4+20*8+4],%l6 825 1.1 spz sllx %l6,32,%l6 826 1.1 spz or %l7,%l6,%l6 827 1.1 spz ld [%g4+21*8+0],%o0 828 1.1 spz ld [%g4+21*8+4],%l7 829 1.1 spz sllx %l7,32,%l7 830 1.1 spz or %o0,%l7,%l7 831 1.1 spz ld [%g4+22*8+0],%o1 832 1.1 spz ld [%g4+22*8+4],%o0 833 1.1 spz sllx %o0,32,%o0 834 1.1 spz or %o1,%o0,%o0 835 1.1 spz ld [%g4+23*8+0],%o2 836 1.1 spz ld [%g4+23*8+4],%o1 837 1.1 spz sllx %o1,32,%o1 838 1.1 spz or %o2,%o1,%o1 839 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 840 1.1 spz cmp %g2,%g3 841 1.1 spz be SIZE_T_CC,.Lmsquare_24 842 1.1 spz nop 843 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 844 1.1 spz ld [%g3+0*8+0],%i1 845 1.1 spz ld [%g3+0*8+4],%i0 846 1.1 spz sllx %i0,32,%i0 847 1.1 spz or %i1,%i0,%i0 848 1.1 spz ld [%g3+1*8+0],%i2 849 1.1 spz ld [%g3+1*8+4],%i1 850 1.1 spz sllx %i1,32,%i1 851 1.1 spz or %i2,%i1,%i1 852 1.1 spz ld [%g3+2*8+0],%i3 853 1.1 spz ld [%g3+2*8+4],%i2 854 1.1 spz sllx %i2,32,%i2 855 1.1 spz or %i3,%i2,%i2 856 1.1 spz ld [%g3+3*8+0],%i4 857 1.1 spz ld [%g3+3*8+4],%i3 858 1.1 spz sllx %i3,32,%i3 859 1.1 spz or %i4,%i3,%i3 860 1.1 spz ld [%g3+4*8+0],%i5 861 1.1 spz ld [%g3+4*8+4],%i4 862 1.1 spz sllx %i4,32,%i4 863 1.1 spz or %i5,%i4,%i4 864 1.1 spz ld [%g3+5*8+0],%l0 865 1.1 spz ld [%g3+5*8+4],%i5 866 1.1 spz sllx %i5,32,%i5 867 1.1 spz or %l0,%i5,%i5 868 1.1 spz ld [%g3+6*8+0],%l1 869 1.1 spz ld [%g3+6*8+4],%l0 870 1.1 spz sllx %l0,32,%l0 871 1.1 spz or %l1,%l0,%l0 872 1.1 spz ld [%g3+7*8+0],%l2 873 1.1 spz ld [%g3+7*8+4],%l1 874 1.1 spz sllx %l1,32,%l1 875 1.1 spz or %l2,%l1,%l1 876 1.1 spz ld [%g3+8*8+0],%l3 877 1.1 spz ld [%g3+8*8+4],%l2 878 1.1 spz sllx %l2,32,%l2 879 1.1 spz or %l3,%l2,%l2 880 1.1 spz ld [%g3+9*8+0],%l4 881 1.1 spz ld [%g3+9*8+4],%l3 882 1.1 spz sllx %l3,32,%l3 883 1.1 spz or %l4,%l3,%l3 884 1.1 spz ld [%g3+10*8+0],%l5 885 1.1 spz ld [%g3+10*8+4],%l4 886 1.1 spz sllx %l4,32,%l4 887 1.1 spz or %l5,%l4,%l4 888 1.1 spz ld [%g3+11*8+0],%l6 889 1.1 spz ld [%g3+11*8+4],%l5 890 1.1 spz sllx %l5,32,%l5 891 1.1 spz or %l6,%l5,%l5 892 1.1 spz ld [%g3+12*8+0],%l7 893 1.1 spz ld [%g3+12*8+4],%l6 894 1.1 spz sllx %l6,32,%l6 895 1.1 spz or %l7,%l6,%l6 896 1.1 spz ld [%g3+13*8+0],%o7 897 1.1 spz ld [%g3+13*8+4],%l7 898 1.1 spz sllx %l7,32,%l7 899 1.1 spz or %o7,%l7,%l7 900 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 901 1.1 spz ld [%g3+14*8+0],%i1 902 1.1 spz ld [%g3+14*8+4],%i0 903 1.1 spz sllx %i0,32,%i0 904 1.1 spz or %i1,%i0,%i0 905 1.1 spz ld [%g3+15*8+0],%i2 906 1.1 spz ld [%g3+15*8+4],%i1 907 1.1 spz sllx %i1,32,%i1 908 1.1 spz or %i2,%i1,%i1 909 1.1 spz ld [%g3+16*8+0],%i3 910 1.1 spz ld [%g3+16*8+4],%i2 911 1.1 spz sllx %i2,32,%i2 912 1.1 spz or %i3,%i2,%i2 913 1.1 spz ld [%g3+17*8+0],%i4 914 1.1 spz ld [%g3+17*8+4],%i3 915 1.1 spz sllx %i3,32,%i3 916 1.1 spz or %i4,%i3,%i3 917 1.1 spz ld [%g3+18*8+0],%i5 918 1.1 spz ld [%g3+18*8+4],%i4 919 1.1 spz sllx %i4,32,%i4 920 1.1 spz or %i5,%i4,%i4 921 1.1 spz ld [%g3+19*8+0],%l0 922 1.1 spz ld [%g3+19*8+4],%i5 923 1.1 spz sllx %i5,32,%i5 924 1.1 spz or %l0,%i5,%i5 925 1.1 spz ld [%g3+20*8+0],%l1 926 1.1 spz ld [%g3+20*8+4],%l0 927 1.1 spz sllx %l0,32,%l0 928 1.1 spz or %l1,%l0,%l0 929 1.1 spz ld [%g3+21*8+0],%l2 930 1.1 spz ld [%g3+21*8+4],%l1 931 1.1 spz sllx %l1,32,%l1 932 1.1 spz or %l2,%l1,%l1 933 1.1 spz ld [%g3+22*8+0],%l3 934 1.1 spz ld [%g3+22*8+4],%l2 935 1.1 spz sllx %l2,32,%l2 936 1.1 spz or %l3,%l2,%l2 937 1.1 spz ld [%g3+23*8+0],%o7 938 1.1 spz ld [%g3+23*8+4],%l3 939 1.1 spz sllx %l3,32,%l3 940 1.1 spz or %o7,%l3,%l3 941 1.1 spz .word 0x81b02920+24-1 ! montmul 24-1 942 1.1 spz .Lmresume_24: 943 1.1 spz fbu,pn %fcc3,.Lmabort_24 944 1.1 spz #ifndef __arch64__ 945 1.1 spz and %fp,%g5,%g5 946 1.1 spz brz,pn %g5,.Lmabort_24 947 1.1 spz #endif 948 1.1 spz nop 949 1.1 spz #ifdef __arch64__ 950 1.1 spz restore 951 1.1 spz restore 952 1.1 spz restore 953 1.1 spz restore 954 1.1 spz restore 955 1.1 spz #else 956 1.1 spz restore; and %fp,%g5,%g5 957 1.1 spz restore; and %fp,%g5,%g5 958 1.1 spz restore; and %fp,%g5,%g5 959 1.1 spz restore; and %fp,%g5,%g5 960 1.1 spz brz,pn %g5,.Lmabort1_24 961 1.1 spz restore 962 1.1 spz #endif 963 1.1 spz .word 0x81b02310 !movxtod %l0,%f0 964 1.1 spz .word 0x85b02311 !movxtod %l1,%f2 965 1.1 spz .word 0x89b02312 !movxtod %l2,%f4 966 1.1 spz .word 0x8db02313 !movxtod %l3,%f6 967 1.1 spz .word 0x91b02314 !movxtod %l4,%f8 968 1.1 spz .word 0x95b02315 !movxtod %l5,%f10 969 1.1 spz .word 0x99b02316 !movxtod %l6,%f12 970 1.1 spz .word 0x9db02317 !movxtod %l7,%f14 971 1.1 spz .word 0xa1b02308 !movxtod %o0,%f16 972 1.1 spz .word 0xa5b02309 !movxtod %o1,%f18 973 1.1 spz .word 0xa9b0230a !movxtod %o2,%f20 974 1.1 spz .word 0xadb0230b !movxtod %o3,%f22 975 1.1 spz .word 0xbbb0230c !movxtod %o4,%f60 976 1.1 spz .word 0xbfb0230d !movxtod %o5,%f62 977 1.1 spz #ifdef __arch64__ 978 1.1 spz restore 979 1.1 spz #else 980 1.1 spz and %fp,%g5,%g5 981 1.1 spz restore 982 1.1 spz and %g5,1,%o7 983 1.1 spz and %fp,%g5,%g5 984 1.1 spz srl %fp,0,%fp ! just in case? 985 1.1 spz or %o7,%g5,%g5 986 1.1 spz brz,a,pn %g5,.Lmdone_24 987 1.1 spz mov 0,%i0 ! return failure 988 1.1 spz #endif 989 1.1 spz st %f1,[%g1+0*8+0] 990 1.1 spz st %f0,[%g1+0*8+4] 991 1.1 spz st %f3,[%g1+1*8+0] 992 1.1 spz st %f2,[%g1+1*8+4] 993 1.1 spz st %f5,[%g1+2*8+0] 994 1.1 spz st %f4,[%g1+2*8+4] 995 1.1 spz st %f7,[%g1+3*8+0] 996 1.1 spz st %f6,[%g1+3*8+4] 997 1.1 spz st %f9,[%g1+4*8+0] 998 1.1 spz st %f8,[%g1+4*8+4] 999 1.1 spz st %f11,[%g1+5*8+0] 1000 1.1 spz st %f10,[%g1+5*8+4] 1001 1.1 spz st %f13,[%g1+6*8+0] 1002 1.1 spz st %f12,[%g1+6*8+4] 1003 1.1 spz st %f15,[%g1+7*8+0] 1004 1.1 spz st %f14,[%g1+7*8+4] 1005 1.1 spz st %f17,[%g1+8*8+0] 1006 1.1 spz st %f16,[%g1+8*8+4] 1007 1.1 spz st %f19,[%g1+9*8+0] 1008 1.1 spz st %f18,[%g1+9*8+4] 1009 1.1 spz st %f21,[%g1+10*8+0] 1010 1.1 spz st %f20,[%g1+10*8+4] 1011 1.1 spz st %f23,[%g1+11*8+0] 1012 1.1 spz st %f22,[%g1+11*8+4] 1013 1.1 spz .word 0x81b00f1d !fsrc2 %f0,%f60,%f0 1014 1.1 spz st %f1,[%g1+12*8+0] 1015 1.1 spz st %f0,[%g1+12*8+4] 1016 1.1 spz .word 0x85b00f1f !fsrc2 %f0,%f62,%f2 1017 1.1 spz st %f3,[%g1+13*8+0] 1018 1.1 spz st %f2,[%g1+13*8+4] 1019 1.1 spz .word 0x89b00f18 !fsrc2 %f0,%f24,%f4 1020 1.1 spz st %f5,[%g1+14*8+0] 1021 1.1 spz st %f4,[%g1+14*8+4] 1022 1.1 spz .word 0x8db00f1a !fsrc2 %f0,%f26,%f6 1023 1.1 spz st %f7,[%g1+15*8+0] 1024 1.1 spz st %f6,[%g1+15*8+4] 1025 1.1 spz .word 0x81b00f1c !fsrc2 %f0,%f28,%f0 1026 1.1 spz st %f1,[%g1+16*8+0] 1027 1.1 spz st %f0,[%g1+16*8+4] 1028 1.1 spz .word 0x85b00f1e !fsrc2 %f0,%f30,%f2 1029 1.1 spz st %f3,[%g1+17*8+0] 1030 1.1 spz st %f2,[%g1+17*8+4] 1031 1.1 spz .word 0x89b00f01 !fsrc2 %f0,%f32,%f4 1032 1.1 spz st %f5,[%g1+18*8+0] 1033 1.1 spz st %f4,[%g1+18*8+4] 1034 1.1 spz .word 0x8db00f03 !fsrc2 %f0,%f34,%f6 1035 1.1 spz st %f7,[%g1+19*8+0] 1036 1.1 spz st %f6,[%g1+19*8+4] 1037 1.1 spz .word 0x81b00f05 !fsrc2 %f0,%f36,%f0 1038 1.1 spz st %f1,[%g1+20*8+0] 1039 1.1 spz st %f0,[%g1+20*8+4] 1040 1.1 spz .word 0x85b00f07 !fsrc2 %f0,%f38,%f2 1041 1.1 spz st %f3,[%g1+21*8+0] 1042 1.1 spz st %f2,[%g1+21*8+4] 1043 1.1 spz .word 0x89b00f09 !fsrc2 %f0,%f40,%f4 1044 1.1 spz st %f5,[%g1+22*8+0] 1045 1.1 spz st %f4,[%g1+22*8+4] 1046 1.1 spz .word 0x8db00f0b !fsrc2 %f0,%f42,%f6 1047 1.1 spz st %f7,[%g1+23*8+0] 1048 1.1 spz st %f6,[%g1+23*8+4] 1049 1.1 spz mov 1,%i0 ! return success 1050 1.1 spz .Lmdone_24: 1051 1.1 spz ret 1052 1.1 spz restore 1053 1.1 spz 1054 1.1 spz .Lmabort_24: 1055 1.1 spz restore 1056 1.1 spz restore 1057 1.1 spz restore 1058 1.1 spz restore 1059 1.1 spz restore 1060 1.1 spz .Lmabort1_24: 1061 1.1 spz restore 1062 1.1 spz 1063 1.1 spz mov 0,%i0 ! return failure 1064 1.1 spz ret 1065 1.1 spz restore 1066 1.1 spz 1067 1.1 spz .align 32 1068 1.1 spz .Lmsquare_24: 1069 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1070 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1071 1.1 spz .word 0x81b02940+24-1 ! montsqr 24-1 1072 1.1 spz ba .Lmresume_24 1073 1.1 spz nop 1074 1.1 spz .type bn_mul_mont_t4_24, #function 1075 1.1 spz .size bn_mul_mont_t4_24, .-bn_mul_mont_t4_24 1076 1.1 spz .globl bn_mul_mont_t4_32 1077 1.1 spz .align 32 1078 1.1 spz bn_mul_mont_t4_32: 1079 1.1 spz #ifdef __arch64__ 1080 1.1 spz mov 0,%g5 1081 1.1 spz mov -128,%g4 1082 1.1 spz #elif defined(SPARCV9_64BIT_STACK) 1083 1.1 spz SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 1084 1.1 spz ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 1085 1.1 spz mov -2047,%g4 1086 1.1 spz and %g1,SPARCV9_64BIT_STACK,%g1 1087 1.1 spz movrz %g1,0,%g4 1088 1.1 spz mov -1,%g5 1089 1.1 spz add %g4,-128,%g4 1090 1.1 spz #else 1091 1.1 spz mov -1,%g5 1092 1.1 spz mov -128,%g4 1093 1.1 spz #endif 1094 1.1 spz sllx %g5,32,%g5 1095 1.1 spz save %sp,%g4,%sp 1096 1.1 spz #ifndef __arch64__ 1097 1.1 spz save %sp,-128,%sp ! warm it up 1098 1.1 spz save %sp,-128,%sp 1099 1.1 spz save %sp,-128,%sp 1100 1.1 spz save %sp,-128,%sp 1101 1.1 spz save %sp,-128,%sp 1102 1.1 spz save %sp,-128,%sp 1103 1.1 spz restore 1104 1.1 spz restore 1105 1.1 spz restore 1106 1.1 spz restore 1107 1.1 spz restore 1108 1.1 spz restore 1109 1.1 spz #endif 1110 1.1 spz and %sp,1,%g4 1111 1.1 spz or %g5,%fp,%fp 1112 1.1 spz or %g4,%g5,%g5 1113 1.1 spz 1114 1.1 spz ! copy arguments to global registers 1115 1.1 spz mov %i0,%g1 1116 1.1 spz mov %i1,%g2 1117 1.1 spz mov %i2,%g3 1118 1.1 spz mov %i3,%g4 1119 1.1 spz ld [%i4+0],%f1 ! load *n0 1120 1.1 spz ld [%i4+4],%f0 1121 1.1 spz .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 1122 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1123 1.1 spz ld [%g2+0*8+0],%l1 1124 1.1 spz ld [%g2+0*8+4],%l0 1125 1.1 spz sllx %l0,32,%l0 1126 1.1 spz or %l1,%l0,%l0 1127 1.1 spz ld [%g2+1*8+0],%l2 1128 1.1 spz ld [%g2+1*8+4],%l1 1129 1.1 spz sllx %l1,32,%l1 1130 1.1 spz or %l2,%l1,%l1 1131 1.1 spz ld [%g2+2*8+0],%l3 1132 1.1 spz ld [%g2+2*8+4],%l2 1133 1.1 spz sllx %l2,32,%l2 1134 1.1 spz or %l3,%l2,%l2 1135 1.1 spz ld [%g2+3*8+0],%l4 1136 1.1 spz ld [%g2+3*8+4],%l3 1137 1.1 spz sllx %l3,32,%l3 1138 1.1 spz or %l4,%l3,%l3 1139 1.1 spz ld [%g2+4*8+0],%l5 1140 1.1 spz ld [%g2+4*8+4],%l4 1141 1.1 spz sllx %l4,32,%l4 1142 1.1 spz or %l5,%l4,%l4 1143 1.1 spz ld [%g2+5*8+0],%l6 1144 1.1 spz ld [%g2+5*8+4],%l5 1145 1.1 spz sllx %l5,32,%l5 1146 1.1 spz or %l6,%l5,%l5 1147 1.1 spz ld [%g2+6*8+0],%l7 1148 1.1 spz ld [%g2+6*8+4],%l6 1149 1.1 spz sllx %l6,32,%l6 1150 1.1 spz or %l7,%l6,%l6 1151 1.1 spz ld [%g2+7*8+0],%o0 1152 1.1 spz ld [%g2+7*8+4],%l7 1153 1.1 spz sllx %l7,32,%l7 1154 1.1 spz or %o0,%l7,%l7 1155 1.1 spz ld [%g2+8*8+0],%o1 1156 1.1 spz ld [%g2+8*8+4],%o0 1157 1.1 spz sllx %o0,32,%o0 1158 1.1 spz or %o1,%o0,%o0 1159 1.1 spz ld [%g2+9*8+0],%o2 1160 1.1 spz ld [%g2+9*8+4],%o1 1161 1.1 spz sllx %o1,32,%o1 1162 1.1 spz or %o2,%o1,%o1 1163 1.1 spz ld [%g2+10*8+0],%o3 1164 1.1 spz ld [%g2+10*8+4],%o2 1165 1.1 spz sllx %o2,32,%o2 1166 1.1 spz or %o3,%o2,%o2 1167 1.1 spz ld [%g2+11*8+0],%o4 1168 1.1 spz ld [%g2+11*8+4],%o3 1169 1.1 spz sllx %o3,32,%o3 1170 1.1 spz or %o4,%o3,%o3 1171 1.1 spz ld [%g2+12*8+0],%o5 1172 1.1 spz ld [%g2+12*8+4],%o4 1173 1.1 spz sllx %o4,32,%o4 1174 1.1 spz or %o5,%o4,%o4 1175 1.1 spz ld [%g2+13*8+0],%o7 1176 1.1 spz ld [%g2+13*8+4],%o5 1177 1.1 spz sllx %o5,32,%o5 1178 1.1 spz or %o7,%o5,%o5 1179 1.1 spz ld [%g2+14*8+0],%f5 1180 1.1 spz ld [%g2+14*8+4],%f4 1181 1.1 spz .word 0xb1b00f04 !fsrc2 %f0,%f4,%f24 1182 1.1 spz ld [%g2+15*8+0],%f7 1183 1.1 spz ld [%g2+15*8+4],%f6 1184 1.1 spz .word 0xb5b00f06 !fsrc2 %f0,%f6,%f26 1185 1.1 spz ld [%g2+16*8+0],%f1 1186 1.1 spz ld [%g2+16*8+4],%f0 1187 1.1 spz .word 0xb9b00f00 !fsrc2 %f0,%f0,%f28 1188 1.1 spz ld [%g2+17*8+0],%f3 1189 1.1 spz ld [%g2+17*8+4],%f2 1190 1.1 spz .word 0xbdb00f02 !fsrc2 %f0,%f2,%f30 1191 1.1 spz ld [%g2+18*8+0],%f5 1192 1.1 spz ld [%g2+18*8+4],%f4 1193 1.1 spz .word 0x83b00f04 !fsrc2 %f0,%f4,%f32 1194 1.1 spz ld [%g2+19*8+0],%f7 1195 1.1 spz ld [%g2+19*8+4],%f6 1196 1.1 spz .word 0x87b00f06 !fsrc2 %f0,%f6,%f34 1197 1.1 spz ld [%g2+20*8+0],%f1 1198 1.1 spz ld [%g2+20*8+4],%f0 1199 1.1 spz .word 0x8bb00f00 !fsrc2 %f0,%f0,%f36 1200 1.1 spz ld [%g2+21*8+0],%f3 1201 1.1 spz ld [%g2+21*8+4],%f2 1202 1.1 spz .word 0x8fb00f02 !fsrc2 %f0,%f2,%f38 1203 1.1 spz ld [%g2+22*8+0],%f5 1204 1.1 spz ld [%g2+22*8+4],%f4 1205 1.1 spz .word 0x93b00f04 !fsrc2 %f0,%f4,%f40 1206 1.1 spz ld [%g2+23*8+0],%f7 1207 1.1 spz ld [%g2+23*8+4],%f6 1208 1.1 spz .word 0x97b00f06 !fsrc2 %f0,%f6,%f42 1209 1.1 spz ld [%g2+24*8+0],%f1 1210 1.1 spz ld [%g2+24*8+4],%f0 1211 1.1 spz .word 0x9bb00f00 !fsrc2 %f0,%f0,%f44 1212 1.1 spz ld [%g2+25*8+0],%f3 1213 1.1 spz ld [%g2+25*8+4],%f2 1214 1.1 spz .word 0x9fb00f02 !fsrc2 %f0,%f2,%f46 1215 1.1 spz ld [%g2+26*8+0],%f5 1216 1.1 spz ld [%g2+26*8+4],%f4 1217 1.1 spz .word 0xa3b00f04 !fsrc2 %f0,%f4,%f48 1218 1.1 spz ld [%g2+27*8+0],%f7 1219 1.1 spz ld [%g2+27*8+4],%f6 1220 1.1 spz .word 0xa7b00f06 !fsrc2 %f0,%f6,%f50 1221 1.1 spz ld [%g2+28*8+0],%f1 1222 1.1 spz ld [%g2+28*8+4],%f0 1223 1.1 spz .word 0xabb00f00 !fsrc2 %f0,%f0,%f52 1224 1.1 spz ld [%g2+29*8+0],%f3 1225 1.1 spz ld [%g2+29*8+4],%f2 1226 1.1 spz .word 0xafb00f02 !fsrc2 %f0,%f2,%f54 1227 1.1 spz ld [%g2+30*8+0],%f5 1228 1.1 spz ld [%g2+30*8+4],%f4 1229 1.1 spz .word 0xb3b00f04 !fsrc2 %f0,%f4,%f56 1230 1.1 spz ld [%g2+31*8+0],%f7 1231 1.1 spz ld [%g2+31*8+4],%f6 1232 1.1 spz .word 0xb7b00f06 !fsrc2 %f0,%f6,%f58 1233 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1234 1.1 spz ld [%g4+0*8+0],%l1 1235 1.1 spz ld [%g4+0*8+4],%l0 1236 1.1 spz sllx %l0,32,%l0 1237 1.1 spz or %l1,%l0,%l0 1238 1.1 spz ld [%g4+1*8+0],%l2 1239 1.1 spz ld [%g4+1*8+4],%l1 1240 1.1 spz sllx %l1,32,%l1 1241 1.1 spz or %l2,%l1,%l1 1242 1.1 spz ld [%g4+2*8+0],%l3 1243 1.1 spz ld [%g4+2*8+4],%l2 1244 1.1 spz sllx %l2,32,%l2 1245 1.1 spz or %l3,%l2,%l2 1246 1.1 spz ld [%g4+3*8+0],%l4 1247 1.1 spz ld [%g4+3*8+4],%l3 1248 1.1 spz sllx %l3,32,%l3 1249 1.1 spz or %l4,%l3,%l3 1250 1.1 spz ld [%g4+4*8+0],%l5 1251 1.1 spz ld [%g4+4*8+4],%l4 1252 1.1 spz sllx %l4,32,%l4 1253 1.1 spz or %l5,%l4,%l4 1254 1.1 spz ld [%g4+5*8+0],%l6 1255 1.1 spz ld [%g4+5*8+4],%l5 1256 1.1 spz sllx %l5,32,%l5 1257 1.1 spz or %l6,%l5,%l5 1258 1.1 spz ld [%g4+6*8+0],%l7 1259 1.1 spz ld [%g4+6*8+4],%l6 1260 1.1 spz sllx %l6,32,%l6 1261 1.1 spz or %l7,%l6,%l6 1262 1.1 spz ld [%g4+7*8+0],%o0 1263 1.1 spz ld [%g4+7*8+4],%l7 1264 1.1 spz sllx %l7,32,%l7 1265 1.1 spz or %o0,%l7,%l7 1266 1.1 spz ld [%g4+8*8+0],%o1 1267 1.1 spz ld [%g4+8*8+4],%o0 1268 1.1 spz sllx %o0,32,%o0 1269 1.1 spz or %o1,%o0,%o0 1270 1.1 spz ld [%g4+9*8+0],%o2 1271 1.1 spz ld [%g4+9*8+4],%o1 1272 1.1 spz sllx %o1,32,%o1 1273 1.1 spz or %o2,%o1,%o1 1274 1.1 spz ld [%g4+10*8+0],%o3 1275 1.1 spz ld [%g4+10*8+4],%o2 1276 1.1 spz sllx %o2,32,%o2 1277 1.1 spz or %o3,%o2,%o2 1278 1.1 spz ld [%g4+11*8+0],%o4 1279 1.1 spz ld [%g4+11*8+4],%o3 1280 1.1 spz sllx %o3,32,%o3 1281 1.1 spz or %o4,%o3,%o3 1282 1.1 spz ld [%g4+12*8+0],%o5 1283 1.1 spz ld [%g4+12*8+4],%o4 1284 1.1 spz sllx %o4,32,%o4 1285 1.1 spz or %o5,%o4,%o4 1286 1.1 spz ld [%g4+13*8+0],%o7 1287 1.1 spz ld [%g4+13*8+4],%o5 1288 1.1 spz sllx %o5,32,%o5 1289 1.1 spz or %o7,%o5,%o5 1290 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1291 1.1 spz ld [%g4+14*8+0],%l1 1292 1.1 spz ld [%g4+14*8+4],%l0 1293 1.1 spz sllx %l0,32,%l0 1294 1.1 spz or %l1,%l0,%l0 1295 1.1 spz ld [%g4+15*8+0],%l2 1296 1.1 spz ld [%g4+15*8+4],%l1 1297 1.1 spz sllx %l1,32,%l1 1298 1.1 spz or %l2,%l1,%l1 1299 1.1 spz ld [%g4+16*8+0],%l3 1300 1.1 spz ld [%g4+16*8+4],%l2 1301 1.1 spz sllx %l2,32,%l2 1302 1.1 spz or %l3,%l2,%l2 1303 1.1 spz ld [%g4+17*8+0],%l4 1304 1.1 spz ld [%g4+17*8+4],%l3 1305 1.1 spz sllx %l3,32,%l3 1306 1.1 spz or %l4,%l3,%l3 1307 1.1 spz ld [%g4+18*8+0],%l5 1308 1.1 spz ld [%g4+18*8+4],%l4 1309 1.1 spz sllx %l4,32,%l4 1310 1.1 spz or %l5,%l4,%l4 1311 1.1 spz ld [%g4+19*8+0],%l6 1312 1.1 spz ld [%g4+19*8+4],%l5 1313 1.1 spz sllx %l5,32,%l5 1314 1.1 spz or %l6,%l5,%l5 1315 1.1 spz ld [%g4+20*8+0],%l7 1316 1.1 spz ld [%g4+20*8+4],%l6 1317 1.1 spz sllx %l6,32,%l6 1318 1.1 spz or %l7,%l6,%l6 1319 1.1 spz ld [%g4+21*8+0],%o0 1320 1.1 spz ld [%g4+21*8+4],%l7 1321 1.1 spz sllx %l7,32,%l7 1322 1.1 spz or %o0,%l7,%l7 1323 1.1 spz ld [%g4+22*8+0],%o1 1324 1.1 spz ld [%g4+22*8+4],%o0 1325 1.1 spz sllx %o0,32,%o0 1326 1.1 spz or %o1,%o0,%o0 1327 1.1 spz ld [%g4+23*8+0],%o2 1328 1.1 spz ld [%g4+23*8+4],%o1 1329 1.1 spz sllx %o1,32,%o1 1330 1.1 spz or %o2,%o1,%o1 1331 1.1 spz ld [%g4+24*8+0],%o3 1332 1.1 spz ld [%g4+24*8+4],%o2 1333 1.1 spz sllx %o2,32,%o2 1334 1.1 spz or %o3,%o2,%o2 1335 1.1 spz ld [%g4+25*8+0],%o4 1336 1.1 spz ld [%g4+25*8+4],%o3 1337 1.1 spz sllx %o3,32,%o3 1338 1.1 spz or %o4,%o3,%o3 1339 1.1 spz ld [%g4+26*8+0],%o5 1340 1.1 spz ld [%g4+26*8+4],%o4 1341 1.1 spz sllx %o4,32,%o4 1342 1.1 spz or %o5,%o4,%o4 1343 1.1 spz ld [%g4+27*8+0],%o7 1344 1.1 spz ld [%g4+27*8+4],%o5 1345 1.1 spz sllx %o5,32,%o5 1346 1.1 spz or %o7,%o5,%o5 1347 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1348 1.1 spz ld [%g4+28*8+0],%l1 1349 1.1 spz ld [%g4+28*8+4],%l0 1350 1.1 spz sllx %l0,32,%l0 1351 1.1 spz or %l1,%l0,%l0 1352 1.1 spz ld [%g4+29*8+0],%l2 1353 1.1 spz ld [%g4+29*8+4],%l1 1354 1.1 spz sllx %l1,32,%l1 1355 1.1 spz or %l2,%l1,%l1 1356 1.1 spz ld [%g4+30*8+0],%l3 1357 1.1 spz ld [%g4+30*8+4],%l2 1358 1.1 spz sllx %l2,32,%l2 1359 1.1 spz or %l3,%l2,%l2 1360 1.1 spz ld [%g4+31*8+0],%o7 1361 1.1 spz ld [%g4+31*8+4],%l3 1362 1.1 spz sllx %l3,32,%l3 1363 1.1 spz or %o7,%l3,%l3 1364 1.1 spz cmp %g2,%g3 1365 1.1 spz be SIZE_T_CC,.Lmsquare_32 1366 1.1 spz nop 1367 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1368 1.1 spz ld [%g3+0*8+0],%i1 1369 1.1 spz ld [%g3+0*8+4],%i0 1370 1.1 spz sllx %i0,32,%i0 1371 1.1 spz or %i1,%i0,%i0 1372 1.1 spz ld [%g3+1*8+0],%i2 1373 1.1 spz ld [%g3+1*8+4],%i1 1374 1.1 spz sllx %i1,32,%i1 1375 1.1 spz or %i2,%i1,%i1 1376 1.1 spz ld [%g3+2*8+0],%i3 1377 1.1 spz ld [%g3+2*8+4],%i2 1378 1.1 spz sllx %i2,32,%i2 1379 1.1 spz or %i3,%i2,%i2 1380 1.1 spz ld [%g3+3*8+0],%i4 1381 1.1 spz ld [%g3+3*8+4],%i3 1382 1.1 spz sllx %i3,32,%i3 1383 1.1 spz or %i4,%i3,%i3 1384 1.1 spz ld [%g3+4*8+0],%i5 1385 1.1 spz ld [%g3+4*8+4],%i4 1386 1.1 spz sllx %i4,32,%i4 1387 1.1 spz or %i5,%i4,%i4 1388 1.1 spz ld [%g3+5*8+0],%l0 1389 1.1 spz ld [%g3+5*8+4],%i5 1390 1.1 spz sllx %i5,32,%i5 1391 1.1 spz or %l0,%i5,%i5 1392 1.1 spz ld [%g3+6*8+0],%l1 1393 1.1 spz ld [%g3+6*8+4],%l0 1394 1.1 spz sllx %l0,32,%l0 1395 1.1 spz or %l1,%l0,%l0 1396 1.1 spz ld [%g3+7*8+0],%l2 1397 1.1 spz ld [%g3+7*8+4],%l1 1398 1.1 spz sllx %l1,32,%l1 1399 1.1 spz or %l2,%l1,%l1 1400 1.1 spz ld [%g3+8*8+0],%l3 1401 1.1 spz ld [%g3+8*8+4],%l2 1402 1.1 spz sllx %l2,32,%l2 1403 1.1 spz or %l3,%l2,%l2 1404 1.1 spz ld [%g3+9*8+0],%l4 1405 1.1 spz ld [%g3+9*8+4],%l3 1406 1.1 spz sllx %l3,32,%l3 1407 1.1 spz or %l4,%l3,%l3 1408 1.1 spz ld [%g3+10*8+0],%l5 1409 1.1 spz ld [%g3+10*8+4],%l4 1410 1.1 spz sllx %l4,32,%l4 1411 1.1 spz or %l5,%l4,%l4 1412 1.1 spz ld [%g3+11*8+0],%l6 1413 1.1 spz ld [%g3+11*8+4],%l5 1414 1.1 spz sllx %l5,32,%l5 1415 1.1 spz or %l6,%l5,%l5 1416 1.1 spz ld [%g3+12*8+0],%l7 1417 1.1 spz ld [%g3+12*8+4],%l6 1418 1.1 spz sllx %l6,32,%l6 1419 1.1 spz or %l7,%l6,%l6 1420 1.1 spz ld [%g3+13*8+0],%o7 1421 1.1 spz ld [%g3+13*8+4],%l7 1422 1.1 spz sllx %l7,32,%l7 1423 1.1 spz or %o7,%l7,%l7 1424 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1425 1.1 spz ld [%g3+14*8+0],%i1 1426 1.1 spz ld [%g3+14*8+4],%i0 1427 1.1 spz sllx %i0,32,%i0 1428 1.1 spz or %i1,%i0,%i0 1429 1.1 spz ld [%g3+15*8+0],%i2 1430 1.1 spz ld [%g3+15*8+4],%i1 1431 1.1 spz sllx %i1,32,%i1 1432 1.1 spz or %i2,%i1,%i1 1433 1.1 spz ld [%g3+16*8+0],%i3 1434 1.1 spz ld [%g3+16*8+4],%i2 1435 1.1 spz sllx %i2,32,%i2 1436 1.1 spz or %i3,%i2,%i2 1437 1.1 spz ld [%g3+17*8+0],%i4 1438 1.1 spz ld [%g3+17*8+4],%i3 1439 1.1 spz sllx %i3,32,%i3 1440 1.1 spz or %i4,%i3,%i3 1441 1.1 spz ld [%g3+18*8+0],%i5 1442 1.1 spz ld [%g3+18*8+4],%i4 1443 1.1 spz sllx %i4,32,%i4 1444 1.1 spz or %i5,%i4,%i4 1445 1.1 spz ld [%g3+19*8+0],%l0 1446 1.1 spz ld [%g3+19*8+4],%i5 1447 1.1 spz sllx %i5,32,%i5 1448 1.1 spz or %l0,%i5,%i5 1449 1.1 spz ld [%g3+20*8+0],%l1 1450 1.1 spz ld [%g3+20*8+4],%l0 1451 1.1 spz sllx %l0,32,%l0 1452 1.1 spz or %l1,%l0,%l0 1453 1.1 spz ld [%g3+21*8+0],%l2 1454 1.1 spz ld [%g3+21*8+4],%l1 1455 1.1 spz sllx %l1,32,%l1 1456 1.1 spz or %l2,%l1,%l1 1457 1.1 spz ld [%g3+22*8+0],%l3 1458 1.1 spz ld [%g3+22*8+4],%l2 1459 1.1 spz sllx %l2,32,%l2 1460 1.1 spz or %l3,%l2,%l2 1461 1.1 spz ld [%g3+23*8+0],%l4 1462 1.1 spz ld [%g3+23*8+4],%l3 1463 1.1 spz sllx %l3,32,%l3 1464 1.1 spz or %l4,%l3,%l3 1465 1.1 spz ld [%g3+24*8+0],%l5 1466 1.1 spz ld [%g3+24*8+4],%l4 1467 1.1 spz sllx %l4,32,%l4 1468 1.1 spz or %l5,%l4,%l4 1469 1.1 spz ld [%g3+25*8+0],%l6 1470 1.1 spz ld [%g3+25*8+4],%l5 1471 1.1 spz sllx %l5,32,%l5 1472 1.1 spz or %l6,%l5,%l5 1473 1.1 spz ld [%g3+26*8+0],%l7 1474 1.1 spz ld [%g3+26*8+4],%l6 1475 1.1 spz sllx %l6,32,%l6 1476 1.1 spz or %l7,%l6,%l6 1477 1.1 spz ld [%g3+27*8+0],%o0 1478 1.1 spz ld [%g3+27*8+4],%l7 1479 1.1 spz sllx %l7,32,%l7 1480 1.1 spz or %o0,%l7,%l7 1481 1.1 spz ld [%g3+28*8+0],%o1 1482 1.1 spz ld [%g3+28*8+4],%o0 1483 1.1 spz sllx %o0,32,%o0 1484 1.1 spz or %o1,%o0,%o0 1485 1.1 spz ld [%g3+29*8+0],%o2 1486 1.1 spz ld [%g3+29*8+4],%o1 1487 1.1 spz sllx %o1,32,%o1 1488 1.1 spz or %o2,%o1,%o1 1489 1.1 spz ld [%g3+30*8+0],%o3 1490 1.1 spz ld [%g3+30*8+4],%o2 1491 1.1 spz sllx %o2,32,%o2 1492 1.1 spz or %o3,%o2,%o2 1493 1.1 spz ld [%g3+31*8+0],%o7 1494 1.1 spz ld [%g3+31*8+4],%o3 1495 1.1 spz sllx %o3,32,%o3 1496 1.1 spz or %o7,%o3,%o3 1497 1.1 spz .word 0x81b02920+32-1 ! montmul 32-1 1498 1.1 spz .Lmresume_32: 1499 1.1 spz fbu,pn %fcc3,.Lmabort_32 1500 1.1 spz #ifndef __arch64__ 1501 1.1 spz and %fp,%g5,%g5 1502 1.1 spz brz,pn %g5,.Lmabort_32 1503 1.1 spz #endif 1504 1.1 spz nop 1505 1.1 spz #ifdef __arch64__ 1506 1.1 spz restore 1507 1.1 spz restore 1508 1.1 spz restore 1509 1.1 spz restore 1510 1.1 spz restore 1511 1.1 spz #else 1512 1.1 spz restore; and %fp,%g5,%g5 1513 1.1 spz restore; and %fp,%g5,%g5 1514 1.1 spz restore; and %fp,%g5,%g5 1515 1.1 spz restore; and %fp,%g5,%g5 1516 1.1 spz brz,pn %g5,.Lmabort1_32 1517 1.1 spz restore 1518 1.1 spz #endif 1519 1.1 spz .word 0x81b02310 !movxtod %l0,%f0 1520 1.1 spz .word 0x85b02311 !movxtod %l1,%f2 1521 1.1 spz .word 0x89b02312 !movxtod %l2,%f4 1522 1.1 spz .word 0x8db02313 !movxtod %l3,%f6 1523 1.1 spz .word 0x91b02314 !movxtod %l4,%f8 1524 1.1 spz .word 0x95b02315 !movxtod %l5,%f10 1525 1.1 spz .word 0x99b02316 !movxtod %l6,%f12 1526 1.1 spz .word 0x9db02317 !movxtod %l7,%f14 1527 1.1 spz .word 0xa1b02308 !movxtod %o0,%f16 1528 1.1 spz .word 0xa5b02309 !movxtod %o1,%f18 1529 1.1 spz .word 0xa9b0230a !movxtod %o2,%f20 1530 1.1 spz .word 0xadb0230b !movxtod %o3,%f22 1531 1.1 spz .word 0xbbb0230c !movxtod %o4,%f60 1532 1.1 spz .word 0xbfb0230d !movxtod %o5,%f62 1533 1.1 spz #ifdef __arch64__ 1534 1.1 spz restore 1535 1.1 spz #else 1536 1.1 spz and %fp,%g5,%g5 1537 1.1 spz restore 1538 1.1 spz and %g5,1,%o7 1539 1.1 spz and %fp,%g5,%g5 1540 1.1 spz srl %fp,0,%fp ! just in case? 1541 1.1 spz or %o7,%g5,%g5 1542 1.1 spz brz,a,pn %g5,.Lmdone_32 1543 1.1 spz mov 0,%i0 ! return failure 1544 1.1 spz #endif 1545 1.1 spz st %f1,[%g1+0*8+0] 1546 1.1 spz st %f0,[%g1+0*8+4] 1547 1.1 spz st %f3,[%g1+1*8+0] 1548 1.1 spz st %f2,[%g1+1*8+4] 1549 1.1 spz st %f5,[%g1+2*8+0] 1550 1.1 spz st %f4,[%g1+2*8+4] 1551 1.1 spz st %f7,[%g1+3*8+0] 1552 1.1 spz st %f6,[%g1+3*8+4] 1553 1.1 spz st %f9,[%g1+4*8+0] 1554 1.1 spz st %f8,[%g1+4*8+4] 1555 1.1 spz st %f11,[%g1+5*8+0] 1556 1.1 spz st %f10,[%g1+5*8+4] 1557 1.1 spz st %f13,[%g1+6*8+0] 1558 1.1 spz st %f12,[%g1+6*8+4] 1559 1.1 spz st %f15,[%g1+7*8+0] 1560 1.1 spz st %f14,[%g1+7*8+4] 1561 1.1 spz st %f17,[%g1+8*8+0] 1562 1.1 spz st %f16,[%g1+8*8+4] 1563 1.1 spz st %f19,[%g1+9*8+0] 1564 1.1 spz st %f18,[%g1+9*8+4] 1565 1.1 spz st %f21,[%g1+10*8+0] 1566 1.1 spz st %f20,[%g1+10*8+4] 1567 1.1 spz st %f23,[%g1+11*8+0] 1568 1.1 spz st %f22,[%g1+11*8+4] 1569 1.1 spz .word 0x81b00f1d !fsrc2 %f0,%f60,%f0 1570 1.1 spz st %f1,[%g1+12*8+0] 1571 1.1 spz st %f0,[%g1+12*8+4] 1572 1.1 spz .word 0x85b00f1f !fsrc2 %f0,%f62,%f2 1573 1.1 spz st %f3,[%g1+13*8+0] 1574 1.1 spz st %f2,[%g1+13*8+4] 1575 1.1 spz .word 0x89b00f18 !fsrc2 %f0,%f24,%f4 1576 1.1 spz st %f5,[%g1+14*8+0] 1577 1.1 spz st %f4,[%g1+14*8+4] 1578 1.1 spz .word 0x8db00f1a !fsrc2 %f0,%f26,%f6 1579 1.1 spz st %f7,[%g1+15*8+0] 1580 1.1 spz st %f6,[%g1+15*8+4] 1581 1.1 spz .word 0x81b00f1c !fsrc2 %f0,%f28,%f0 1582 1.1 spz st %f1,[%g1+16*8+0] 1583 1.1 spz st %f0,[%g1+16*8+4] 1584 1.1 spz .word 0x85b00f1e !fsrc2 %f0,%f30,%f2 1585 1.1 spz st %f3,[%g1+17*8+0] 1586 1.1 spz st %f2,[%g1+17*8+4] 1587 1.1 spz .word 0x89b00f01 !fsrc2 %f0,%f32,%f4 1588 1.1 spz st %f5,[%g1+18*8+0] 1589 1.1 spz st %f4,[%g1+18*8+4] 1590 1.1 spz .word 0x8db00f03 !fsrc2 %f0,%f34,%f6 1591 1.1 spz st %f7,[%g1+19*8+0] 1592 1.1 spz st %f6,[%g1+19*8+4] 1593 1.1 spz .word 0x81b00f05 !fsrc2 %f0,%f36,%f0 1594 1.1 spz st %f1,[%g1+20*8+0] 1595 1.1 spz st %f0,[%g1+20*8+4] 1596 1.1 spz .word 0x85b00f07 !fsrc2 %f0,%f38,%f2 1597 1.1 spz st %f3,[%g1+21*8+0] 1598 1.1 spz st %f2,[%g1+21*8+4] 1599 1.1 spz .word 0x89b00f09 !fsrc2 %f0,%f40,%f4 1600 1.1 spz st %f5,[%g1+22*8+0] 1601 1.1 spz st %f4,[%g1+22*8+4] 1602 1.1 spz .word 0x8db00f0b !fsrc2 %f0,%f42,%f6 1603 1.1 spz st %f7,[%g1+23*8+0] 1604 1.1 spz st %f6,[%g1+23*8+4] 1605 1.1 spz .word 0x81b00f0d !fsrc2 %f0,%f44,%f0 1606 1.1 spz st %f1,[%g1+24*8+0] 1607 1.1 spz st %f0,[%g1+24*8+4] 1608 1.1 spz .word 0x85b00f0f !fsrc2 %f0,%f46,%f2 1609 1.1 spz st %f3,[%g1+25*8+0] 1610 1.1 spz st %f2,[%g1+25*8+4] 1611 1.1 spz .word 0x89b00f11 !fsrc2 %f0,%f48,%f4 1612 1.1 spz st %f5,[%g1+26*8+0] 1613 1.1 spz st %f4,[%g1+26*8+4] 1614 1.1 spz .word 0x8db00f13 !fsrc2 %f0,%f50,%f6 1615 1.1 spz st %f7,[%g1+27*8+0] 1616 1.1 spz st %f6,[%g1+27*8+4] 1617 1.1 spz .word 0x81b00f15 !fsrc2 %f0,%f52,%f0 1618 1.1 spz st %f1,[%g1+28*8+0] 1619 1.1 spz st %f0,[%g1+28*8+4] 1620 1.1 spz .word 0x85b00f17 !fsrc2 %f0,%f54,%f2 1621 1.1 spz st %f3,[%g1+29*8+0] 1622 1.1 spz st %f2,[%g1+29*8+4] 1623 1.1 spz .word 0x89b00f19 !fsrc2 %f0,%f56,%f4 1624 1.1 spz st %f5,[%g1+30*8+0] 1625 1.1 spz st %f4,[%g1+30*8+4] 1626 1.1 spz .word 0x8db00f1b !fsrc2 %f0,%f58,%f6 1627 1.1 spz st %f7,[%g1+31*8+0] 1628 1.1 spz st %f6,[%g1+31*8+4] 1629 1.1 spz mov 1,%i0 ! return success 1630 1.1 spz .Lmdone_32: 1631 1.1 spz ret 1632 1.1 spz restore 1633 1.1 spz 1634 1.1 spz .Lmabort_32: 1635 1.1 spz restore 1636 1.1 spz restore 1637 1.1 spz restore 1638 1.1 spz restore 1639 1.1 spz restore 1640 1.1 spz .Lmabort1_32: 1641 1.1 spz restore 1642 1.1 spz 1643 1.1 spz mov 0,%i0 ! return failure 1644 1.1 spz ret 1645 1.1 spz restore 1646 1.1 spz 1647 1.1 spz .align 32 1648 1.1 spz .Lmsquare_32: 1649 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1650 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1651 1.1 spz .word 0x81b02940+32-1 ! montsqr 32-1 1652 1.1 spz ba .Lmresume_32 1653 1.1 spz nop 1654 1.1 spz .type bn_mul_mont_t4_32, #function 1655 1.1 spz .size bn_mul_mont_t4_32, .-bn_mul_mont_t4_32 1656 1.1 spz .globl bn_pwr5_mont_t4_8 1657 1.1 spz .align 32 1658 1.1 spz bn_pwr5_mont_t4_8: 1659 1.1 spz #ifdef __arch64__ 1660 1.1 spz mov 0,%g5 1661 1.1 spz mov -128,%g4 1662 1.1 spz #elif defined(SPARCV9_64BIT_STACK) 1663 1.1 spz SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 1664 1.1 spz ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 1665 1.1 spz mov -2047,%g4 1666 1.1 spz and %g1,SPARCV9_64BIT_STACK,%g1 1667 1.1 spz movrz %g1,0,%g4 1668 1.1 spz mov -1,%g5 1669 1.1 spz add %g4,-128,%g4 1670 1.1 spz #else 1671 1.1 spz mov -1,%g5 1672 1.1 spz mov -128,%g4 1673 1.1 spz #endif 1674 1.1 spz sllx %g5,32,%g5 1675 1.1 spz save %sp,%g4,%sp 1676 1.1 spz #ifndef __arch64__ 1677 1.1 spz save %sp,-128,%sp ! warm it up 1678 1.1 spz save %sp,-128,%sp 1679 1.1 spz save %sp,-128,%sp 1680 1.1 spz save %sp,-128,%sp 1681 1.1 spz save %sp,-128,%sp 1682 1.1 spz save %sp,-128,%sp 1683 1.1 spz restore 1684 1.1 spz restore 1685 1.1 spz restore 1686 1.1 spz restore 1687 1.1 spz restore 1688 1.1 spz restore 1689 1.1 spz #endif 1690 1.1 spz and %sp,1,%g4 1691 1.1 spz or %g5,%fp,%fp 1692 1.1 spz or %g4,%g5,%g5 1693 1.1 spz 1694 1.1 spz ! copy arguments to global registers 1695 1.1 spz mov %i0,%g1 1696 1.1 spz mov %i1,%g2 1697 1.1 spz ld [%i2+0],%f1 ! load *n0 1698 1.1 spz ld [%i2+4],%f0 1699 1.1 spz mov %i3,%g3 1700 1.1 spz srl %i4,%g0,%i4 ! pack last arguments 1701 1.1 spz sllx %i5,32,%g4 1702 1.1 spz or %i4,%g4,%g4 1703 1.1 spz .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 1704 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1705 1.1 spz ldx [%g1+0*8],%l0 1706 1.1 spz ldx [%g1+1*8],%l1 1707 1.1 spz ldx [%g1+2*8],%l2 1708 1.1 spz ldx [%g1+3*8],%l3 1709 1.1 spz ldx [%g1+4*8],%l4 1710 1.1 spz ldx [%g1+5*8],%l5 1711 1.1 spz ldx [%g1+6*8],%l6 1712 1.1 spz ldx [%g1+7*8],%l7 1713 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1714 1.1 spz ldx [%g2+0*8],%l0 1715 1.1 spz ldx [%g2+1*8],%l1 1716 1.1 spz ldx [%g2+2*8],%l2 1717 1.1 spz ldx [%g2+3*8],%l3 1718 1.1 spz ldx [%g2+4*8],%l4 1719 1.1 spz ldx [%g2+5*8],%l5 1720 1.1 spz ldx [%g2+6*8],%l6 1721 1.1 spz ldx [%g2+7*8],%l7 1722 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1723 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1724 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1725 1.1 spz 1726 1.1 spz srlx %g4, 32, %o4 ! unpack %g4 1727 1.1 spz srl %g4, %g0, %o5 1728 1.1 spz sub %o4, 5, %o4 1729 1.1 spz mov %g3, %o7 1730 1.1 spz sllx %o4, 32, %g4 ! re-pack %g4 1731 1.1 spz or %o5, %g4, %g4 1732 1.1 spz srl %o5, %o4, %o5 1733 1.1 spz srl %o5, 2, %o4 1734 1.1 spz and %o5, 3, %o5 1735 1.1 spz and %o4, 7, %o4 1736 1.1 spz sll %o5, 3, %o5 ! offset within first cache line 1737 1.1 spz add %o5, %o7, %o7 ! of the pwrtbl 1738 1.1 spz or %g0, 1, %o5 1739 1.1 spz sll %o5, %o4, %o4 1740 1.1 spz wr %o4, %g0, %ccr 1741 1.1 spz b .Lstride_8 1742 1.1 spz nop 1743 1.1 spz .align 16 1744 1.1 spz .Lstride_8: 1745 1.1 spz ldx [%o7+0*32], %i0 1746 1.1 spz ldx [%o7+8*32], %i1 1747 1.1 spz ldx [%o7+1*32], %o4 1748 1.1 spz ldx [%o7+9*32], %o5 1749 1.1 spz movvs %icc, %o4, %i0 1750 1.1 spz ldx [%o7+2*32], %o4 1751 1.1 spz movvs %icc, %o5, %i1 1752 1.1 spz ldx [%o7+10*32],%o5 1753 1.1 spz move %icc, %o4, %i0 1754 1.1 spz ldx [%o7+3*32], %o4 1755 1.1 spz move %icc, %o5, %i1 1756 1.1 spz ldx [%o7+11*32],%o5 1757 1.1 spz movneg %icc, %o4, %i0 1758 1.1 spz ldx [%o7+4*32], %o4 1759 1.1 spz movneg %icc, %o5, %i1 1760 1.1 spz ldx [%o7+12*32],%o5 1761 1.1 spz movcs %xcc, %o4, %i0 1762 1.1 spz ldx [%o7+5*32],%o4 1763 1.1 spz movcs %xcc, %o5, %i1 1764 1.1 spz ldx [%o7+13*32],%o5 1765 1.1 spz movvs %xcc, %o4, %i0 1766 1.1 spz ldx [%o7+6*32], %o4 1767 1.1 spz movvs %xcc, %o5, %i1 1768 1.1 spz ldx [%o7+14*32],%o5 1769 1.1 spz move %xcc, %o4, %i0 1770 1.1 spz ldx [%o7+7*32], %o4 1771 1.1 spz move %xcc, %o5, %i1 1772 1.1 spz ldx [%o7+15*32],%o5 1773 1.1 spz movneg %xcc, %o4, %i0 1774 1.1 spz add %o7,16*32, %o7 1775 1.1 spz movneg %xcc, %o5, %i1 1776 1.1 spz ldx [%o7+0*32], %i2 1777 1.1 spz ldx [%o7+8*32], %i3 1778 1.1 spz ldx [%o7+1*32], %o4 1779 1.1 spz ldx [%o7+9*32], %o5 1780 1.1 spz movvs %icc, %o4, %i2 1781 1.1 spz ldx [%o7+2*32], %o4 1782 1.1 spz movvs %icc, %o5, %i3 1783 1.1 spz ldx [%o7+10*32],%o5 1784 1.1 spz move %icc, %o4, %i2 1785 1.1 spz ldx [%o7+3*32], %o4 1786 1.1 spz move %icc, %o5, %i3 1787 1.1 spz ldx [%o7+11*32],%o5 1788 1.1 spz movneg %icc, %o4, %i2 1789 1.1 spz ldx [%o7+4*32], %o4 1790 1.1 spz movneg %icc, %o5, %i3 1791 1.1 spz ldx [%o7+12*32],%o5 1792 1.1 spz movcs %xcc, %o4, %i2 1793 1.1 spz ldx [%o7+5*32],%o4 1794 1.1 spz movcs %xcc, %o5, %i3 1795 1.1 spz ldx [%o7+13*32],%o5 1796 1.1 spz movvs %xcc, %o4, %i2 1797 1.1 spz ldx [%o7+6*32], %o4 1798 1.1 spz movvs %xcc, %o5, %i3 1799 1.1 spz ldx [%o7+14*32],%o5 1800 1.1 spz move %xcc, %o4, %i2 1801 1.1 spz ldx [%o7+7*32], %o4 1802 1.1 spz move %xcc, %o5, %i3 1803 1.1 spz ldx [%o7+15*32],%o5 1804 1.1 spz movneg %xcc, %o4, %i2 1805 1.1 spz add %o7,16*32, %o7 1806 1.1 spz movneg %xcc, %o5, %i3 1807 1.1 spz ldx [%o7+0*32], %i4 1808 1.1 spz ldx [%o7+8*32], %i5 1809 1.1 spz ldx [%o7+1*32], %o4 1810 1.1 spz ldx [%o7+9*32], %o5 1811 1.1 spz movvs %icc, %o4, %i4 1812 1.1 spz ldx [%o7+2*32], %o4 1813 1.1 spz movvs %icc, %o5, %i5 1814 1.1 spz ldx [%o7+10*32],%o5 1815 1.1 spz move %icc, %o4, %i4 1816 1.1 spz ldx [%o7+3*32], %o4 1817 1.1 spz move %icc, %o5, %i5 1818 1.1 spz ldx [%o7+11*32],%o5 1819 1.1 spz movneg %icc, %o4, %i4 1820 1.1 spz ldx [%o7+4*32], %o4 1821 1.1 spz movneg %icc, %o5, %i5 1822 1.1 spz ldx [%o7+12*32],%o5 1823 1.1 spz movcs %xcc, %o4, %i4 1824 1.1 spz ldx [%o7+5*32],%o4 1825 1.1 spz movcs %xcc, %o5, %i5 1826 1.1 spz ldx [%o7+13*32],%o5 1827 1.1 spz movvs %xcc, %o4, %i4 1828 1.1 spz ldx [%o7+6*32], %o4 1829 1.1 spz movvs %xcc, %o5, %i5 1830 1.1 spz ldx [%o7+14*32],%o5 1831 1.1 spz move %xcc, %o4, %i4 1832 1.1 spz ldx [%o7+7*32], %o4 1833 1.1 spz move %xcc, %o5, %i5 1834 1.1 spz ldx [%o7+15*32],%o5 1835 1.1 spz movneg %xcc, %o4, %i4 1836 1.1 spz add %o7,16*32, %o7 1837 1.1 spz movneg %xcc, %o5, %i5 1838 1.1 spz ldx [%o7+0*32], %l0 1839 1.1 spz ldx [%o7+8*32], %l1 1840 1.1 spz ldx [%o7+1*32], %o4 1841 1.1 spz ldx [%o7+9*32], %o5 1842 1.1 spz movvs %icc, %o4, %l0 1843 1.1 spz ldx [%o7+2*32], %o4 1844 1.1 spz movvs %icc, %o5, %l1 1845 1.1 spz ldx [%o7+10*32],%o5 1846 1.1 spz move %icc, %o4, %l0 1847 1.1 spz ldx [%o7+3*32], %o4 1848 1.1 spz move %icc, %o5, %l1 1849 1.1 spz ldx [%o7+11*32],%o5 1850 1.1 spz movneg %icc, %o4, %l0 1851 1.1 spz ldx [%o7+4*32], %o4 1852 1.1 spz movneg %icc, %o5, %l1 1853 1.1 spz ldx [%o7+12*32],%o5 1854 1.1 spz movcs %xcc, %o4, %l0 1855 1.1 spz ldx [%o7+5*32],%o4 1856 1.1 spz movcs %xcc, %o5, %l1 1857 1.1 spz ldx [%o7+13*32],%o5 1858 1.1 spz movvs %xcc, %o4, %l0 1859 1.1 spz ldx [%o7+6*32], %o4 1860 1.1 spz movvs %xcc, %o5, %l1 1861 1.1 spz ldx [%o7+14*32],%o5 1862 1.1 spz move %xcc, %o4, %l0 1863 1.1 spz ldx [%o7+7*32], %o4 1864 1.1 spz move %xcc, %o5, %l1 1865 1.1 spz ldx [%o7+15*32],%o5 1866 1.1 spz movneg %xcc, %o4, %l0 1867 1.1 spz add %o7,16*32, %o7 1868 1.1 spz movneg %xcc, %o5, %l1 1869 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 1870 1.1 spz srax %g4, 32, %o4 ! unpack %g4 1871 1.1 spz srl %g4, %g0, %o5 1872 1.1 spz sub %o4, 5, %o4 1873 1.1 spz mov %g3, %i7 1874 1.1 spz sllx %o4, 32, %g4 ! re-pack %g4 1875 1.1 spz or %o5, %g4, %g4 1876 1.1 spz srl %o5, %o4, %o5 1877 1.1 spz srl %o5, 2, %o4 1878 1.1 spz and %o5, 3, %o5 1879 1.1 spz and %o4, 7, %o4 1880 1.1 spz sll %o5, 3, %o5 ! offset within first cache line 1881 1.1 spz add %o5, %i7, %i7 ! of the pwrtbl 1882 1.1 spz or %g0, 1, %o5 1883 1.1 spz sll %o5, %o4, %o4 1884 1.1 spz .word 0x81b02940+8-1 ! montsqr 8-1 1885 1.1 spz fbu,pn %fcc3,.Labort_8 1886 1.1 spz #ifndef __arch64__ 1887 1.1 spz and %fp,%g5,%g5 1888 1.1 spz brz,pn %g5,.Labort_8 1889 1.1 spz #endif 1890 1.1 spz nop 1891 1.1 spz .word 0x81b02940+8-1 ! montsqr 8-1 1892 1.1 spz fbu,pn %fcc3,.Labort_8 1893 1.1 spz #ifndef __arch64__ 1894 1.1 spz and %fp,%g5,%g5 1895 1.1 spz brz,pn %g5,.Labort_8 1896 1.1 spz #endif 1897 1.1 spz nop 1898 1.1 spz .word 0x81b02940+8-1 ! montsqr 8-1 1899 1.1 spz fbu,pn %fcc3,.Labort_8 1900 1.1 spz #ifndef __arch64__ 1901 1.1 spz and %fp,%g5,%g5 1902 1.1 spz brz,pn %g5,.Labort_8 1903 1.1 spz #endif 1904 1.1 spz nop 1905 1.1 spz .word 0x81b02940+8-1 ! montsqr 8-1 1906 1.1 spz fbu,pn %fcc3,.Labort_8 1907 1.1 spz #ifndef __arch64__ 1908 1.1 spz and %fp,%g5,%g5 1909 1.1 spz brz,pn %g5,.Labort_8 1910 1.1 spz #endif 1911 1.1 spz nop 1912 1.1 spz .word 0x81b02940+8-1 ! montsqr 8-1 1913 1.1 spz fbu,pn %fcc3,.Labort_8 1914 1.1 spz #ifndef __arch64__ 1915 1.1 spz and %fp,%g5,%g5 1916 1.1 spz brz,pn %g5,.Labort_8 1917 1.1 spz #endif 1918 1.1 spz nop 1919 1.1 spz wr %o4, %g0, %ccr 1920 1.1 spz .word 0x81b02920+8-1 ! montmul 8-1 1921 1.1 spz fbu,pn %fcc3,.Labort_8 1922 1.1 spz #ifndef __arch64__ 1923 1.1 spz and %fp,%g5,%g5 1924 1.1 spz brz,pn %g5,.Labort_8 1925 1.1 spz #endif 1926 1.1 spz 1927 1.1 spz srax %g4, 32, %o4 1928 1.1 spz #ifdef __arch64__ 1929 1.1 spz brgez %o4,.Lstride_8 1930 1.1 spz restore 1931 1.1 spz restore 1932 1.1 spz restore 1933 1.1 spz restore 1934 1.1 spz restore 1935 1.1 spz #else 1936 1.1 spz brgez %o4,.Lstride_8 1937 1.1 spz restore; and %fp,%g5,%g5 1938 1.1 spz restore; and %fp,%g5,%g5 1939 1.1 spz restore; and %fp,%g5,%g5 1940 1.1 spz restore; and %fp,%g5,%g5 1941 1.1 spz brz,pn %g5,.Labort1_8 1942 1.1 spz restore 1943 1.1 spz #endif 1944 1.1 spz .word 0x81b02310 !movxtod %l0,%f0 1945 1.1 spz .word 0x85b02311 !movxtod %l1,%f2 1946 1.1 spz .word 0x89b02312 !movxtod %l2,%f4 1947 1.1 spz .word 0x8db02313 !movxtod %l3,%f6 1948 1.1 spz .word 0x91b02314 !movxtod %l4,%f8 1949 1.1 spz .word 0x95b02315 !movxtod %l5,%f10 1950 1.1 spz .word 0x99b02316 !movxtod %l6,%f12 1951 1.1 spz .word 0x9db02317 !movxtod %l7,%f14 1952 1.1 spz #ifdef __arch64__ 1953 1.1 spz restore 1954 1.1 spz #else 1955 1.1 spz and %fp,%g5,%g5 1956 1.1 spz restore 1957 1.1 spz and %g5,1,%o7 1958 1.1 spz and %fp,%g5,%g5 1959 1.1 spz srl %fp,0,%fp ! just in case? 1960 1.1 spz or %o7,%g5,%g5 1961 1.1 spz brz,a,pn %g5,.Ldone_8 1962 1.1 spz mov 0,%i0 ! return failure 1963 1.1 spz #endif 1964 1.1 spz std %f0,[%g1+0*8] 1965 1.1 spz std %f2,[%g1+1*8] 1966 1.1 spz std %f4,[%g1+2*8] 1967 1.1 spz std %f6,[%g1+3*8] 1968 1.1 spz std %f8,[%g1+4*8] 1969 1.1 spz std %f10,[%g1+5*8] 1970 1.1 spz std %f12,[%g1+6*8] 1971 1.1 spz std %f14,[%g1+7*8] 1972 1.1 spz mov 1,%i0 ! return success 1973 1.1 spz .Ldone_8: 1974 1.1 spz ret 1975 1.1 spz restore 1976 1.1 spz 1977 1.1 spz .Labort_8: 1978 1.1 spz restore 1979 1.1 spz restore 1980 1.1 spz restore 1981 1.1 spz restore 1982 1.1 spz restore 1983 1.1 spz .Labort1_8: 1984 1.1 spz restore 1985 1.1 spz 1986 1.1 spz mov 0,%i0 ! return failure 1987 1.1 spz ret 1988 1.1 spz restore 1989 1.1 spz .type bn_pwr5_mont_t4_8, #function 1990 1.1 spz .size bn_pwr5_mont_t4_8, .-bn_pwr5_mont_t4_8 1991 1.1 spz .globl bn_pwr5_mont_t4_16 1992 1.1 spz .align 32 1993 1.1 spz bn_pwr5_mont_t4_16: 1994 1.1 spz #ifdef __arch64__ 1995 1.1 spz mov 0,%g5 1996 1.1 spz mov -128,%g4 1997 1.1 spz #elif defined(SPARCV9_64BIT_STACK) 1998 1.1 spz SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 1999 1.1 spz ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 2000 1.1 spz mov -2047,%g4 2001 1.1 spz and %g1,SPARCV9_64BIT_STACK,%g1 2002 1.1 spz movrz %g1,0,%g4 2003 1.1 spz mov -1,%g5 2004 1.1 spz add %g4,-128,%g4 2005 1.1 spz #else 2006 1.1 spz mov -1,%g5 2007 1.1 spz mov -128,%g4 2008 1.1 spz #endif 2009 1.1 spz sllx %g5,32,%g5 2010 1.1 spz save %sp,%g4,%sp 2011 1.1 spz #ifndef __arch64__ 2012 1.1 spz save %sp,-128,%sp ! warm it up 2013 1.1 spz save %sp,-128,%sp 2014 1.1 spz save %sp,-128,%sp 2015 1.1 spz save %sp,-128,%sp 2016 1.1 spz save %sp,-128,%sp 2017 1.1 spz save %sp,-128,%sp 2018 1.1 spz restore 2019 1.1 spz restore 2020 1.1 spz restore 2021 1.1 spz restore 2022 1.1 spz restore 2023 1.1 spz restore 2024 1.1 spz #endif 2025 1.1 spz and %sp,1,%g4 2026 1.1 spz or %g5,%fp,%fp 2027 1.1 spz or %g4,%g5,%g5 2028 1.1 spz 2029 1.1 spz ! copy arguments to global registers 2030 1.1 spz mov %i0,%g1 2031 1.1 spz mov %i1,%g2 2032 1.1 spz ld [%i2+0],%f1 ! load *n0 2033 1.1 spz ld [%i2+4],%f0 2034 1.1 spz mov %i3,%g3 2035 1.1 spz srl %i4,%g0,%i4 ! pack last arguments 2036 1.1 spz sllx %i5,32,%g4 2037 1.1 spz or %i4,%g4,%g4 2038 1.1 spz .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 2039 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2040 1.1 spz ldx [%g1+0*8],%l0 2041 1.1 spz ldx [%g1+1*8],%l1 2042 1.1 spz ldx [%g1+2*8],%l2 2043 1.1 spz ldx [%g1+3*8],%l3 2044 1.1 spz ldx [%g1+4*8],%l4 2045 1.1 spz ldx [%g1+5*8],%l5 2046 1.1 spz ldx [%g1+6*8],%l6 2047 1.1 spz ldx [%g1+7*8],%l7 2048 1.1 spz ldx [%g1+8*8],%o0 2049 1.1 spz ldx [%g1+9*8],%o1 2050 1.1 spz ldx [%g1+10*8],%o2 2051 1.1 spz ldx [%g1+11*8],%o3 2052 1.1 spz ldx [%g1+12*8],%o4 2053 1.1 spz ldx [%g1+13*8],%o5 2054 1.1 spz ldd [%g1+14*8],%f24 2055 1.1 spz ldd [%g1+15*8],%f26 2056 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2057 1.1 spz ldx [%g2+0*8],%l0 2058 1.1 spz ldx [%g2+1*8],%l1 2059 1.1 spz ldx [%g2+2*8],%l2 2060 1.1 spz ldx [%g2+3*8],%l3 2061 1.1 spz ldx [%g2+4*8],%l4 2062 1.1 spz ldx [%g2+5*8],%l5 2063 1.1 spz ldx [%g2+6*8],%l6 2064 1.1 spz ldx [%g2+7*8],%l7 2065 1.1 spz ldx [%g2+8*8],%o0 2066 1.1 spz ldx [%g2+9*8],%o1 2067 1.1 spz ldx [%g2+10*8],%o2 2068 1.1 spz ldx [%g2+11*8],%o3 2069 1.1 spz ldx [%g2+12*8],%o4 2070 1.1 spz ldx [%g2+13*8],%o5 2071 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2072 1.1 spz ldx [%g2+14*8],%l0 2073 1.1 spz ldx [%g2+15*8],%l1 2074 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2075 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2076 1.1 spz 2077 1.1 spz srlx %g4, 32, %o4 ! unpack %g4 2078 1.1 spz srl %g4, %g0, %o5 2079 1.1 spz sub %o4, 5, %o4 2080 1.1 spz mov %g3, %o7 2081 1.1 spz sllx %o4, 32, %g4 ! re-pack %g4 2082 1.1 spz or %o5, %g4, %g4 2083 1.1 spz srl %o5, %o4, %o5 2084 1.1 spz srl %o5, 2, %o4 2085 1.1 spz and %o5, 3, %o5 2086 1.1 spz and %o4, 7, %o4 2087 1.1 spz sll %o5, 3, %o5 ! offset within first cache line 2088 1.1 spz add %o5, %o7, %o7 ! of the pwrtbl 2089 1.1 spz or %g0, 1, %o5 2090 1.1 spz sll %o5, %o4, %o4 2091 1.1 spz wr %o4, %g0, %ccr 2092 1.1 spz b .Lstride_16 2093 1.1 spz nop 2094 1.1 spz .align 16 2095 1.1 spz .Lstride_16: 2096 1.1 spz ldx [%o7+0*32], %i0 2097 1.1 spz ldx [%o7+8*32], %i1 2098 1.1 spz ldx [%o7+1*32], %o4 2099 1.1 spz ldx [%o7+9*32], %o5 2100 1.1 spz movvs %icc, %o4, %i0 2101 1.1 spz ldx [%o7+2*32], %o4 2102 1.1 spz movvs %icc, %o5, %i1 2103 1.1 spz ldx [%o7+10*32],%o5 2104 1.1 spz move %icc, %o4, %i0 2105 1.1 spz ldx [%o7+3*32], %o4 2106 1.1 spz move %icc, %o5, %i1 2107 1.1 spz ldx [%o7+11*32],%o5 2108 1.1 spz movneg %icc, %o4, %i0 2109 1.1 spz ldx [%o7+4*32], %o4 2110 1.1 spz movneg %icc, %o5, %i1 2111 1.1 spz ldx [%o7+12*32],%o5 2112 1.1 spz movcs %xcc, %o4, %i0 2113 1.1 spz ldx [%o7+5*32],%o4 2114 1.1 spz movcs %xcc, %o5, %i1 2115 1.1 spz ldx [%o7+13*32],%o5 2116 1.1 spz movvs %xcc, %o4, %i0 2117 1.1 spz ldx [%o7+6*32], %o4 2118 1.1 spz movvs %xcc, %o5, %i1 2119 1.1 spz ldx [%o7+14*32],%o5 2120 1.1 spz move %xcc, %o4, %i0 2121 1.1 spz ldx [%o7+7*32], %o4 2122 1.1 spz move %xcc, %o5, %i1 2123 1.1 spz ldx [%o7+15*32],%o5 2124 1.1 spz movneg %xcc, %o4, %i0 2125 1.1 spz add %o7,16*32, %o7 2126 1.1 spz movneg %xcc, %o5, %i1 2127 1.1 spz ldx [%o7+0*32], %i2 2128 1.1 spz ldx [%o7+8*32], %i3 2129 1.1 spz ldx [%o7+1*32], %o4 2130 1.1 spz ldx [%o7+9*32], %o5 2131 1.1 spz movvs %icc, %o4, %i2 2132 1.1 spz ldx [%o7+2*32], %o4 2133 1.1 spz movvs %icc, %o5, %i3 2134 1.1 spz ldx [%o7+10*32],%o5 2135 1.1 spz move %icc, %o4, %i2 2136 1.1 spz ldx [%o7+3*32], %o4 2137 1.1 spz move %icc, %o5, %i3 2138 1.1 spz ldx [%o7+11*32],%o5 2139 1.1 spz movneg %icc, %o4, %i2 2140 1.1 spz ldx [%o7+4*32], %o4 2141 1.1 spz movneg %icc, %o5, %i3 2142 1.1 spz ldx [%o7+12*32],%o5 2143 1.1 spz movcs %xcc, %o4, %i2 2144 1.1 spz ldx [%o7+5*32],%o4 2145 1.1 spz movcs %xcc, %o5, %i3 2146 1.1 spz ldx [%o7+13*32],%o5 2147 1.1 spz movvs %xcc, %o4, %i2 2148 1.1 spz ldx [%o7+6*32], %o4 2149 1.1 spz movvs %xcc, %o5, %i3 2150 1.1 spz ldx [%o7+14*32],%o5 2151 1.1 spz move %xcc, %o4, %i2 2152 1.1 spz ldx [%o7+7*32], %o4 2153 1.1 spz move %xcc, %o5, %i3 2154 1.1 spz ldx [%o7+15*32],%o5 2155 1.1 spz movneg %xcc, %o4, %i2 2156 1.1 spz add %o7,16*32, %o7 2157 1.1 spz movneg %xcc, %o5, %i3 2158 1.1 spz ldx [%o7+0*32], %i4 2159 1.1 spz ldx [%o7+8*32], %i5 2160 1.1 spz ldx [%o7+1*32], %o4 2161 1.1 spz ldx [%o7+9*32], %o5 2162 1.1 spz movvs %icc, %o4, %i4 2163 1.1 spz ldx [%o7+2*32], %o4 2164 1.1 spz movvs %icc, %o5, %i5 2165 1.1 spz ldx [%o7+10*32],%o5 2166 1.1 spz move %icc, %o4, %i4 2167 1.1 spz ldx [%o7+3*32], %o4 2168 1.1 spz move %icc, %o5, %i5 2169 1.1 spz ldx [%o7+11*32],%o5 2170 1.1 spz movneg %icc, %o4, %i4 2171 1.1 spz ldx [%o7+4*32], %o4 2172 1.1 spz movneg %icc, %o5, %i5 2173 1.1 spz ldx [%o7+12*32],%o5 2174 1.1 spz movcs %xcc, %o4, %i4 2175 1.1 spz ldx [%o7+5*32],%o4 2176 1.1 spz movcs %xcc, %o5, %i5 2177 1.1 spz ldx [%o7+13*32],%o5 2178 1.1 spz movvs %xcc, %o4, %i4 2179 1.1 spz ldx [%o7+6*32], %o4 2180 1.1 spz movvs %xcc, %o5, %i5 2181 1.1 spz ldx [%o7+14*32],%o5 2182 1.1 spz move %xcc, %o4, %i4 2183 1.1 spz ldx [%o7+7*32], %o4 2184 1.1 spz move %xcc, %o5, %i5 2185 1.1 spz ldx [%o7+15*32],%o5 2186 1.1 spz movneg %xcc, %o4, %i4 2187 1.1 spz add %o7,16*32, %o7 2188 1.1 spz movneg %xcc, %o5, %i5 2189 1.1 spz ldx [%o7+0*32], %l0 2190 1.1 spz ldx [%o7+8*32], %l1 2191 1.1 spz ldx [%o7+1*32], %o4 2192 1.1 spz ldx [%o7+9*32], %o5 2193 1.1 spz movvs %icc, %o4, %l0 2194 1.1 spz ldx [%o7+2*32], %o4 2195 1.1 spz movvs %icc, %o5, %l1 2196 1.1 spz ldx [%o7+10*32],%o5 2197 1.1 spz move %icc, %o4, %l0 2198 1.1 spz ldx [%o7+3*32], %o4 2199 1.1 spz move %icc, %o5, %l1 2200 1.1 spz ldx [%o7+11*32],%o5 2201 1.1 spz movneg %icc, %o4, %l0 2202 1.1 spz ldx [%o7+4*32], %o4 2203 1.1 spz movneg %icc, %o5, %l1 2204 1.1 spz ldx [%o7+12*32],%o5 2205 1.1 spz movcs %xcc, %o4, %l0 2206 1.1 spz ldx [%o7+5*32],%o4 2207 1.1 spz movcs %xcc, %o5, %l1 2208 1.1 spz ldx [%o7+13*32],%o5 2209 1.1 spz movvs %xcc, %o4, %l0 2210 1.1 spz ldx [%o7+6*32], %o4 2211 1.1 spz movvs %xcc, %o5, %l1 2212 1.1 spz ldx [%o7+14*32],%o5 2213 1.1 spz move %xcc, %o4, %l0 2214 1.1 spz ldx [%o7+7*32], %o4 2215 1.1 spz move %xcc, %o5, %l1 2216 1.1 spz ldx [%o7+15*32],%o5 2217 1.1 spz movneg %xcc, %o4, %l0 2218 1.1 spz add %o7,16*32, %o7 2219 1.1 spz movneg %xcc, %o5, %l1 2220 1.1 spz ldx [%o7+0*32], %l2 2221 1.1 spz ldx [%o7+8*32], %l3 2222 1.1 spz ldx [%o7+1*32], %o4 2223 1.1 spz ldx [%o7+9*32], %o5 2224 1.1 spz movvs %icc, %o4, %l2 2225 1.1 spz ldx [%o7+2*32], %o4 2226 1.1 spz movvs %icc, %o5, %l3 2227 1.1 spz ldx [%o7+10*32],%o5 2228 1.1 spz move %icc, %o4, %l2 2229 1.1 spz ldx [%o7+3*32], %o4 2230 1.1 spz move %icc, %o5, %l3 2231 1.1 spz ldx [%o7+11*32],%o5 2232 1.1 spz movneg %icc, %o4, %l2 2233 1.1 spz ldx [%o7+4*32], %o4 2234 1.1 spz movneg %icc, %o5, %l3 2235 1.1 spz ldx [%o7+12*32],%o5 2236 1.1 spz movcs %xcc, %o4, %l2 2237 1.1 spz ldx [%o7+5*32],%o4 2238 1.1 spz movcs %xcc, %o5, %l3 2239 1.1 spz ldx [%o7+13*32],%o5 2240 1.1 spz movvs %xcc, %o4, %l2 2241 1.1 spz ldx [%o7+6*32], %o4 2242 1.1 spz movvs %xcc, %o5, %l3 2243 1.1 spz ldx [%o7+14*32],%o5 2244 1.1 spz move %xcc, %o4, %l2 2245 1.1 spz ldx [%o7+7*32], %o4 2246 1.1 spz move %xcc, %o5, %l3 2247 1.1 spz ldx [%o7+15*32],%o5 2248 1.1 spz movneg %xcc, %o4, %l2 2249 1.1 spz add %o7,16*32, %o7 2250 1.1 spz movneg %xcc, %o5, %l3 2251 1.1 spz ldx [%o7+0*32], %l4 2252 1.1 spz ldx [%o7+8*32], %l5 2253 1.1 spz ldx [%o7+1*32], %o4 2254 1.1 spz ldx [%o7+9*32], %o5 2255 1.1 spz movvs %icc, %o4, %l4 2256 1.1 spz ldx [%o7+2*32], %o4 2257 1.1 spz movvs %icc, %o5, %l5 2258 1.1 spz ldx [%o7+10*32],%o5 2259 1.1 spz move %icc, %o4, %l4 2260 1.1 spz ldx [%o7+3*32], %o4 2261 1.1 spz move %icc, %o5, %l5 2262 1.1 spz ldx [%o7+11*32],%o5 2263 1.1 spz movneg %icc, %o4, %l4 2264 1.1 spz ldx [%o7+4*32], %o4 2265 1.1 spz movneg %icc, %o5, %l5 2266 1.1 spz ldx [%o7+12*32],%o5 2267 1.1 spz movcs %xcc, %o4, %l4 2268 1.1 spz ldx [%o7+5*32],%o4 2269 1.1 spz movcs %xcc, %o5, %l5 2270 1.1 spz ldx [%o7+13*32],%o5 2271 1.1 spz movvs %xcc, %o4, %l4 2272 1.1 spz ldx [%o7+6*32], %o4 2273 1.1 spz movvs %xcc, %o5, %l5 2274 1.1 spz ldx [%o7+14*32],%o5 2275 1.1 spz move %xcc, %o4, %l4 2276 1.1 spz ldx [%o7+7*32], %o4 2277 1.1 spz move %xcc, %o5, %l5 2278 1.1 spz ldx [%o7+15*32],%o5 2279 1.1 spz movneg %xcc, %o4, %l4 2280 1.1 spz add %o7,16*32, %o7 2281 1.1 spz movneg %xcc, %o5, %l5 2282 1.1 spz ldx [%o7+0*32], %l6 2283 1.1 spz ldx [%o7+8*32], %l7 2284 1.1 spz ldx [%o7+1*32], %o4 2285 1.1 spz ldx [%o7+9*32], %o5 2286 1.1 spz movvs %icc, %o4, %l6 2287 1.1 spz ldx [%o7+2*32], %o4 2288 1.1 spz movvs %icc, %o5, %l7 2289 1.1 spz ldx [%o7+10*32],%o5 2290 1.1 spz move %icc, %o4, %l6 2291 1.1 spz ldx [%o7+3*32], %o4 2292 1.1 spz move %icc, %o5, %l7 2293 1.1 spz ldx [%o7+11*32],%o5 2294 1.1 spz movneg %icc, %o4, %l6 2295 1.1 spz ldx [%o7+4*32], %o4 2296 1.1 spz movneg %icc, %o5, %l7 2297 1.1 spz ldx [%o7+12*32],%o5 2298 1.1 spz movcs %xcc, %o4, %l6 2299 1.1 spz ldx [%o7+5*32],%o4 2300 1.1 spz movcs %xcc, %o5, %l7 2301 1.1 spz ldx [%o7+13*32],%o5 2302 1.1 spz movvs %xcc, %o4, %l6 2303 1.1 spz ldx [%o7+6*32], %o4 2304 1.1 spz movvs %xcc, %o5, %l7 2305 1.1 spz ldx [%o7+14*32],%o5 2306 1.1 spz move %xcc, %o4, %l6 2307 1.1 spz ldx [%o7+7*32], %o4 2308 1.1 spz move %xcc, %o5, %l7 2309 1.1 spz ldx [%o7+15*32],%o5 2310 1.1 spz movneg %xcc, %o4, %l6 2311 1.1 spz add %o7,16*32, %o7 2312 1.1 spz movneg %xcc, %o5, %l7 2313 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2314 1.1 spz ldx [%i7+0*32], %i0 2315 1.1 spz ldx [%i7+8*32], %i1 2316 1.1 spz ldx [%i7+1*32], %o4 2317 1.1 spz ldx [%i7+9*32], %o5 2318 1.1 spz movvs %icc, %o4, %i0 2319 1.1 spz ldx [%i7+2*32], %o4 2320 1.1 spz movvs %icc, %o5, %i1 2321 1.1 spz ldx [%i7+10*32],%o5 2322 1.1 spz move %icc, %o4, %i0 2323 1.1 spz ldx [%i7+3*32], %o4 2324 1.1 spz move %icc, %o5, %i1 2325 1.1 spz ldx [%i7+11*32],%o5 2326 1.1 spz movneg %icc, %o4, %i0 2327 1.1 spz ldx [%i7+4*32], %o4 2328 1.1 spz movneg %icc, %o5, %i1 2329 1.1 spz ldx [%i7+12*32],%o5 2330 1.1 spz movcs %xcc, %o4, %i0 2331 1.1 spz ldx [%i7+5*32],%o4 2332 1.1 spz movcs %xcc, %o5, %i1 2333 1.1 spz ldx [%i7+13*32],%o5 2334 1.1 spz movvs %xcc, %o4, %i0 2335 1.1 spz ldx [%i7+6*32], %o4 2336 1.1 spz movvs %xcc, %o5, %i1 2337 1.1 spz ldx [%i7+14*32],%o5 2338 1.1 spz move %xcc, %o4, %i0 2339 1.1 spz ldx [%i7+7*32], %o4 2340 1.1 spz move %xcc, %o5, %i1 2341 1.1 spz ldx [%i7+15*32],%o5 2342 1.1 spz movneg %xcc, %o4, %i0 2343 1.1 spz add %i7,16*32, %i7 2344 1.1 spz movneg %xcc, %o5, %i1 2345 1.1 spz srax %g4, 32, %o4 ! unpack %g4 2346 1.1 spz srl %g4, %g0, %o5 2347 1.1 spz sub %o4, 5, %o4 2348 1.1 spz mov %g3, %i7 2349 1.1 spz sllx %o4, 32, %g4 ! re-pack %g4 2350 1.1 spz or %o5, %g4, %g4 2351 1.1 spz srl %o5, %o4, %o5 2352 1.1 spz srl %o5, 2, %o4 2353 1.1 spz and %o5, 3, %o5 2354 1.1 spz and %o4, 7, %o4 2355 1.1 spz sll %o5, 3, %o5 ! offset within first cache line 2356 1.1 spz add %o5, %i7, %i7 ! of the pwrtbl 2357 1.1 spz or %g0, 1, %o5 2358 1.1 spz sll %o5, %o4, %o4 2359 1.1 spz .word 0x81b02940+16-1 ! montsqr 16-1 2360 1.1 spz fbu,pn %fcc3,.Labort_16 2361 1.1 spz #ifndef __arch64__ 2362 1.1 spz and %fp,%g5,%g5 2363 1.1 spz brz,pn %g5,.Labort_16 2364 1.1 spz #endif 2365 1.1 spz nop 2366 1.1 spz .word 0x81b02940+16-1 ! montsqr 16-1 2367 1.1 spz fbu,pn %fcc3,.Labort_16 2368 1.1 spz #ifndef __arch64__ 2369 1.1 spz and %fp,%g5,%g5 2370 1.1 spz brz,pn %g5,.Labort_16 2371 1.1 spz #endif 2372 1.1 spz nop 2373 1.1 spz .word 0x81b02940+16-1 ! montsqr 16-1 2374 1.1 spz fbu,pn %fcc3,.Labort_16 2375 1.1 spz #ifndef __arch64__ 2376 1.1 spz and %fp,%g5,%g5 2377 1.1 spz brz,pn %g5,.Labort_16 2378 1.1 spz #endif 2379 1.1 spz nop 2380 1.1 spz .word 0x81b02940+16-1 ! montsqr 16-1 2381 1.1 spz fbu,pn %fcc3,.Labort_16 2382 1.1 spz #ifndef __arch64__ 2383 1.1 spz and %fp,%g5,%g5 2384 1.1 spz brz,pn %g5,.Labort_16 2385 1.1 spz #endif 2386 1.1 spz nop 2387 1.1 spz .word 0x81b02940+16-1 ! montsqr 16-1 2388 1.1 spz fbu,pn %fcc3,.Labort_16 2389 1.1 spz #ifndef __arch64__ 2390 1.1 spz and %fp,%g5,%g5 2391 1.1 spz brz,pn %g5,.Labort_16 2392 1.1 spz #endif 2393 1.1 spz nop 2394 1.1 spz wr %o4, %g0, %ccr 2395 1.1 spz .word 0x81b02920+16-1 ! montmul 16-1 2396 1.1 spz fbu,pn %fcc3,.Labort_16 2397 1.1 spz #ifndef __arch64__ 2398 1.1 spz and %fp,%g5,%g5 2399 1.1 spz brz,pn %g5,.Labort_16 2400 1.1 spz #endif 2401 1.1 spz 2402 1.1 spz srax %g4, 32, %o4 2403 1.1 spz #ifdef __arch64__ 2404 1.1 spz brgez %o4,.Lstride_16 2405 1.1 spz restore 2406 1.1 spz restore 2407 1.1 spz restore 2408 1.1 spz restore 2409 1.1 spz restore 2410 1.1 spz #else 2411 1.1 spz brgez %o4,.Lstride_16 2412 1.1 spz restore; and %fp,%g5,%g5 2413 1.1 spz restore; and %fp,%g5,%g5 2414 1.1 spz restore; and %fp,%g5,%g5 2415 1.1 spz restore; and %fp,%g5,%g5 2416 1.1 spz brz,pn %g5,.Labort1_16 2417 1.1 spz restore 2418 1.1 spz #endif 2419 1.1 spz .word 0x81b02310 !movxtod %l0,%f0 2420 1.1 spz .word 0x85b02311 !movxtod %l1,%f2 2421 1.1 spz .word 0x89b02312 !movxtod %l2,%f4 2422 1.1 spz .word 0x8db02313 !movxtod %l3,%f6 2423 1.1 spz .word 0x91b02314 !movxtod %l4,%f8 2424 1.1 spz .word 0x95b02315 !movxtod %l5,%f10 2425 1.1 spz .word 0x99b02316 !movxtod %l6,%f12 2426 1.1 spz .word 0x9db02317 !movxtod %l7,%f14 2427 1.1 spz .word 0xa1b02308 !movxtod %o0,%f16 2428 1.1 spz .word 0xa5b02309 !movxtod %o1,%f18 2429 1.1 spz .word 0xa9b0230a !movxtod %o2,%f20 2430 1.1 spz .word 0xadb0230b !movxtod %o3,%f22 2431 1.1 spz .word 0xbbb0230c !movxtod %o4,%f60 2432 1.1 spz .word 0xbfb0230d !movxtod %o5,%f62 2433 1.1 spz #ifdef __arch64__ 2434 1.1 spz restore 2435 1.1 spz #else 2436 1.1 spz and %fp,%g5,%g5 2437 1.1 spz restore 2438 1.1 spz and %g5,1,%o7 2439 1.1 spz and %fp,%g5,%g5 2440 1.1 spz srl %fp,0,%fp ! just in case? 2441 1.1 spz or %o7,%g5,%g5 2442 1.1 spz brz,a,pn %g5,.Ldone_16 2443 1.1 spz mov 0,%i0 ! return failure 2444 1.1 spz #endif 2445 1.1 spz std %f0,[%g1+0*8] 2446 1.1 spz std %f2,[%g1+1*8] 2447 1.1 spz std %f4,[%g1+2*8] 2448 1.1 spz std %f6,[%g1+3*8] 2449 1.1 spz std %f8,[%g1+4*8] 2450 1.1 spz std %f10,[%g1+5*8] 2451 1.1 spz std %f12,[%g1+6*8] 2452 1.1 spz std %f14,[%g1+7*8] 2453 1.1 spz std %f16,[%g1+8*8] 2454 1.1 spz std %f18,[%g1+9*8] 2455 1.1 spz std %f20,[%g1+10*8] 2456 1.1 spz std %f22,[%g1+11*8] 2457 1.1 spz std %f60,[%g1+12*8] 2458 1.1 spz std %f62,[%g1+13*8] 2459 1.1 spz std %f24,[%g1+14*8] 2460 1.1 spz std %f26,[%g1+15*8] 2461 1.1 spz mov 1,%i0 ! return success 2462 1.1 spz .Ldone_16: 2463 1.1 spz ret 2464 1.1 spz restore 2465 1.1 spz 2466 1.1 spz .Labort_16: 2467 1.1 spz restore 2468 1.1 spz restore 2469 1.1 spz restore 2470 1.1 spz restore 2471 1.1 spz restore 2472 1.1 spz .Labort1_16: 2473 1.1 spz restore 2474 1.1 spz 2475 1.1 spz mov 0,%i0 ! return failure 2476 1.1 spz ret 2477 1.1 spz restore 2478 1.1 spz .type bn_pwr5_mont_t4_16, #function 2479 1.1 spz .size bn_pwr5_mont_t4_16, .-bn_pwr5_mont_t4_16 2480 1.1 spz .globl bn_pwr5_mont_t4_24 2481 1.1 spz .align 32 2482 1.1 spz bn_pwr5_mont_t4_24: 2483 1.1 spz #ifdef __arch64__ 2484 1.1 spz mov 0,%g5 2485 1.1 spz mov -128,%g4 2486 1.1 spz #elif defined(SPARCV9_64BIT_STACK) 2487 1.1 spz SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 2488 1.1 spz ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 2489 1.1 spz mov -2047,%g4 2490 1.1 spz and %g1,SPARCV9_64BIT_STACK,%g1 2491 1.1 spz movrz %g1,0,%g4 2492 1.1 spz mov -1,%g5 2493 1.1 spz add %g4,-128,%g4 2494 1.1 spz #else 2495 1.1 spz mov -1,%g5 2496 1.1 spz mov -128,%g4 2497 1.1 spz #endif 2498 1.1 spz sllx %g5,32,%g5 2499 1.1 spz save %sp,%g4,%sp 2500 1.1 spz #ifndef __arch64__ 2501 1.1 spz save %sp,-128,%sp ! warm it up 2502 1.1 spz save %sp,-128,%sp 2503 1.1 spz save %sp,-128,%sp 2504 1.1 spz save %sp,-128,%sp 2505 1.1 spz save %sp,-128,%sp 2506 1.1 spz save %sp,-128,%sp 2507 1.1 spz restore 2508 1.1 spz restore 2509 1.1 spz restore 2510 1.1 spz restore 2511 1.1 spz restore 2512 1.1 spz restore 2513 1.1 spz #endif 2514 1.1 spz and %sp,1,%g4 2515 1.1 spz or %g5,%fp,%fp 2516 1.1 spz or %g4,%g5,%g5 2517 1.1 spz 2518 1.1 spz ! copy arguments to global registers 2519 1.1 spz mov %i0,%g1 2520 1.1 spz mov %i1,%g2 2521 1.1 spz ld [%i2+0],%f1 ! load *n0 2522 1.1 spz ld [%i2+4],%f0 2523 1.1 spz mov %i3,%g3 2524 1.1 spz srl %i4,%g0,%i4 ! pack last arguments 2525 1.1 spz sllx %i5,32,%g4 2526 1.1 spz or %i4,%g4,%g4 2527 1.1 spz .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 2528 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2529 1.1 spz ldx [%g1+0*8],%l0 2530 1.1 spz ldx [%g1+1*8],%l1 2531 1.1 spz ldx [%g1+2*8],%l2 2532 1.1 spz ldx [%g1+3*8],%l3 2533 1.1 spz ldx [%g1+4*8],%l4 2534 1.1 spz ldx [%g1+5*8],%l5 2535 1.1 spz ldx [%g1+6*8],%l6 2536 1.1 spz ldx [%g1+7*8],%l7 2537 1.1 spz ldx [%g1+8*8],%o0 2538 1.1 spz ldx [%g1+9*8],%o1 2539 1.1 spz ldx [%g1+10*8],%o2 2540 1.1 spz ldx [%g1+11*8],%o3 2541 1.1 spz ldx [%g1+12*8],%o4 2542 1.1 spz ldx [%g1+13*8],%o5 2543 1.1 spz ldd [%g1+14*8],%f24 2544 1.1 spz ldd [%g1+15*8],%f26 2545 1.1 spz ldd [%g1+16*8],%f28 2546 1.1 spz ldd [%g1+17*8],%f30 2547 1.1 spz ldd [%g1+18*8],%f32 2548 1.1 spz ldd [%g1+19*8],%f34 2549 1.1 spz ldd [%g1+20*8],%f36 2550 1.1 spz ldd [%g1+21*8],%f38 2551 1.1 spz ldd [%g1+22*8],%f40 2552 1.1 spz ldd [%g1+23*8],%f42 2553 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2554 1.1 spz ldx [%g2+0*8],%l0 2555 1.1 spz ldx [%g2+1*8],%l1 2556 1.1 spz ldx [%g2+2*8],%l2 2557 1.1 spz ldx [%g2+3*8],%l3 2558 1.1 spz ldx [%g2+4*8],%l4 2559 1.1 spz ldx [%g2+5*8],%l5 2560 1.1 spz ldx [%g2+6*8],%l6 2561 1.1 spz ldx [%g2+7*8],%l7 2562 1.1 spz ldx [%g2+8*8],%o0 2563 1.1 spz ldx [%g2+9*8],%o1 2564 1.1 spz ldx [%g2+10*8],%o2 2565 1.1 spz ldx [%g2+11*8],%o3 2566 1.1 spz ldx [%g2+12*8],%o4 2567 1.1 spz ldx [%g2+13*8],%o5 2568 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2569 1.1 spz ldx [%g2+14*8],%l0 2570 1.1 spz ldx [%g2+15*8],%l1 2571 1.1 spz ldx [%g2+16*8],%l2 2572 1.1 spz ldx [%g2+17*8],%l3 2573 1.1 spz ldx [%g2+18*8],%l4 2574 1.1 spz ldx [%g2+19*8],%l5 2575 1.1 spz ldx [%g2+20*8],%l6 2576 1.1 spz ldx [%g2+21*8],%l7 2577 1.1 spz ldx [%g2+22*8],%o0 2578 1.1 spz ldx [%g2+23*8],%o1 2579 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2580 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2581 1.1 spz 2582 1.1 spz srlx %g4, 32, %o4 ! unpack %g4 2583 1.1 spz srl %g4, %g0, %o5 2584 1.1 spz sub %o4, 5, %o4 2585 1.1 spz mov %g3, %o7 2586 1.1 spz sllx %o4, 32, %g4 ! re-pack %g4 2587 1.1 spz or %o5, %g4, %g4 2588 1.1 spz srl %o5, %o4, %o5 2589 1.1 spz srl %o5, 2, %o4 2590 1.1 spz and %o5, 3, %o5 2591 1.1 spz and %o4, 7, %o4 2592 1.1 spz sll %o5, 3, %o5 ! offset within first cache line 2593 1.1 spz add %o5, %o7, %o7 ! of the pwrtbl 2594 1.1 spz or %g0, 1, %o5 2595 1.1 spz sll %o5, %o4, %o4 2596 1.1 spz wr %o4, %g0, %ccr 2597 1.1 spz b .Lstride_24 2598 1.1 spz nop 2599 1.1 spz .align 16 2600 1.1 spz .Lstride_24: 2601 1.1 spz ldx [%o7+0*32], %i0 2602 1.1 spz ldx [%o7+8*32], %i1 2603 1.1 spz ldx [%o7+1*32], %o4 2604 1.1 spz ldx [%o7+9*32], %o5 2605 1.1 spz movvs %icc, %o4, %i0 2606 1.1 spz ldx [%o7+2*32], %o4 2607 1.1 spz movvs %icc, %o5, %i1 2608 1.1 spz ldx [%o7+10*32],%o5 2609 1.1 spz move %icc, %o4, %i0 2610 1.1 spz ldx [%o7+3*32], %o4 2611 1.1 spz move %icc, %o5, %i1 2612 1.1 spz ldx [%o7+11*32],%o5 2613 1.1 spz movneg %icc, %o4, %i0 2614 1.1 spz ldx [%o7+4*32], %o4 2615 1.1 spz movneg %icc, %o5, %i1 2616 1.1 spz ldx [%o7+12*32],%o5 2617 1.1 spz movcs %xcc, %o4, %i0 2618 1.1 spz ldx [%o7+5*32],%o4 2619 1.1 spz movcs %xcc, %o5, %i1 2620 1.1 spz ldx [%o7+13*32],%o5 2621 1.1 spz movvs %xcc, %o4, %i0 2622 1.1 spz ldx [%o7+6*32], %o4 2623 1.1 spz movvs %xcc, %o5, %i1 2624 1.1 spz ldx [%o7+14*32],%o5 2625 1.1 spz move %xcc, %o4, %i0 2626 1.1 spz ldx [%o7+7*32], %o4 2627 1.1 spz move %xcc, %o5, %i1 2628 1.1 spz ldx [%o7+15*32],%o5 2629 1.1 spz movneg %xcc, %o4, %i0 2630 1.1 spz add %o7,16*32, %o7 2631 1.1 spz movneg %xcc, %o5, %i1 2632 1.1 spz ldx [%o7+0*32], %i2 2633 1.1 spz ldx [%o7+8*32], %i3 2634 1.1 spz ldx [%o7+1*32], %o4 2635 1.1 spz ldx [%o7+9*32], %o5 2636 1.1 spz movvs %icc, %o4, %i2 2637 1.1 spz ldx [%o7+2*32], %o4 2638 1.1 spz movvs %icc, %o5, %i3 2639 1.1 spz ldx [%o7+10*32],%o5 2640 1.1 spz move %icc, %o4, %i2 2641 1.1 spz ldx [%o7+3*32], %o4 2642 1.1 spz move %icc, %o5, %i3 2643 1.1 spz ldx [%o7+11*32],%o5 2644 1.1 spz movneg %icc, %o4, %i2 2645 1.1 spz ldx [%o7+4*32], %o4 2646 1.1 spz movneg %icc, %o5, %i3 2647 1.1 spz ldx [%o7+12*32],%o5 2648 1.1 spz movcs %xcc, %o4, %i2 2649 1.1 spz ldx [%o7+5*32],%o4 2650 1.1 spz movcs %xcc, %o5, %i3 2651 1.1 spz ldx [%o7+13*32],%o5 2652 1.1 spz movvs %xcc, %o4, %i2 2653 1.1 spz ldx [%o7+6*32], %o4 2654 1.1 spz movvs %xcc, %o5, %i3 2655 1.1 spz ldx [%o7+14*32],%o5 2656 1.1 spz move %xcc, %o4, %i2 2657 1.1 spz ldx [%o7+7*32], %o4 2658 1.1 spz move %xcc, %o5, %i3 2659 1.1 spz ldx [%o7+15*32],%o5 2660 1.1 spz movneg %xcc, %o4, %i2 2661 1.1 spz add %o7,16*32, %o7 2662 1.1 spz movneg %xcc, %o5, %i3 2663 1.1 spz ldx [%o7+0*32], %i4 2664 1.1 spz ldx [%o7+8*32], %i5 2665 1.1 spz ldx [%o7+1*32], %o4 2666 1.1 spz ldx [%o7+9*32], %o5 2667 1.1 spz movvs %icc, %o4, %i4 2668 1.1 spz ldx [%o7+2*32], %o4 2669 1.1 spz movvs %icc, %o5, %i5 2670 1.1 spz ldx [%o7+10*32],%o5 2671 1.1 spz move %icc, %o4, %i4 2672 1.1 spz ldx [%o7+3*32], %o4 2673 1.1 spz move %icc, %o5, %i5 2674 1.1 spz ldx [%o7+11*32],%o5 2675 1.1 spz movneg %icc, %o4, %i4 2676 1.1 spz ldx [%o7+4*32], %o4 2677 1.1 spz movneg %icc, %o5, %i5 2678 1.1 spz ldx [%o7+12*32],%o5 2679 1.1 spz movcs %xcc, %o4, %i4 2680 1.1 spz ldx [%o7+5*32],%o4 2681 1.1 spz movcs %xcc, %o5, %i5 2682 1.1 spz ldx [%o7+13*32],%o5 2683 1.1 spz movvs %xcc, %o4, %i4 2684 1.1 spz ldx [%o7+6*32], %o4 2685 1.1 spz movvs %xcc, %o5, %i5 2686 1.1 spz ldx [%o7+14*32],%o5 2687 1.1 spz move %xcc, %o4, %i4 2688 1.1 spz ldx [%o7+7*32], %o4 2689 1.1 spz move %xcc, %o5, %i5 2690 1.1 spz ldx [%o7+15*32],%o5 2691 1.1 spz movneg %xcc, %o4, %i4 2692 1.1 spz add %o7,16*32, %o7 2693 1.1 spz movneg %xcc, %o5, %i5 2694 1.1 spz ldx [%o7+0*32], %l0 2695 1.1 spz ldx [%o7+8*32], %l1 2696 1.1 spz ldx [%o7+1*32], %o4 2697 1.1 spz ldx [%o7+9*32], %o5 2698 1.1 spz movvs %icc, %o4, %l0 2699 1.1 spz ldx [%o7+2*32], %o4 2700 1.1 spz movvs %icc, %o5, %l1 2701 1.1 spz ldx [%o7+10*32],%o5 2702 1.1 spz move %icc, %o4, %l0 2703 1.1 spz ldx [%o7+3*32], %o4 2704 1.1 spz move %icc, %o5, %l1 2705 1.1 spz ldx [%o7+11*32],%o5 2706 1.1 spz movneg %icc, %o4, %l0 2707 1.1 spz ldx [%o7+4*32], %o4 2708 1.1 spz movneg %icc, %o5, %l1 2709 1.1 spz ldx [%o7+12*32],%o5 2710 1.1 spz movcs %xcc, %o4, %l0 2711 1.1 spz ldx [%o7+5*32],%o4 2712 1.1 spz movcs %xcc, %o5, %l1 2713 1.1 spz ldx [%o7+13*32],%o5 2714 1.1 spz movvs %xcc, %o4, %l0 2715 1.1 spz ldx [%o7+6*32], %o4 2716 1.1 spz movvs %xcc, %o5, %l1 2717 1.1 spz ldx [%o7+14*32],%o5 2718 1.1 spz move %xcc, %o4, %l0 2719 1.1 spz ldx [%o7+7*32], %o4 2720 1.1 spz move %xcc, %o5, %l1 2721 1.1 spz ldx [%o7+15*32],%o5 2722 1.1 spz movneg %xcc, %o4, %l0 2723 1.1 spz add %o7,16*32, %o7 2724 1.1 spz movneg %xcc, %o5, %l1 2725 1.1 spz ldx [%o7+0*32], %l2 2726 1.1 spz ldx [%o7+8*32], %l3 2727 1.1 spz ldx [%o7+1*32], %o4 2728 1.1 spz ldx [%o7+9*32], %o5 2729 1.1 spz movvs %icc, %o4, %l2 2730 1.1 spz ldx [%o7+2*32], %o4 2731 1.1 spz movvs %icc, %o5, %l3 2732 1.1 spz ldx [%o7+10*32],%o5 2733 1.1 spz move %icc, %o4, %l2 2734 1.1 spz ldx [%o7+3*32], %o4 2735 1.1 spz move %icc, %o5, %l3 2736 1.1 spz ldx [%o7+11*32],%o5 2737 1.1 spz movneg %icc, %o4, %l2 2738 1.1 spz ldx [%o7+4*32], %o4 2739 1.1 spz movneg %icc, %o5, %l3 2740 1.1 spz ldx [%o7+12*32],%o5 2741 1.1 spz movcs %xcc, %o4, %l2 2742 1.1 spz ldx [%o7+5*32],%o4 2743 1.1 spz movcs %xcc, %o5, %l3 2744 1.1 spz ldx [%o7+13*32],%o5 2745 1.1 spz movvs %xcc, %o4, %l2 2746 1.1 spz ldx [%o7+6*32], %o4 2747 1.1 spz movvs %xcc, %o5, %l3 2748 1.1 spz ldx [%o7+14*32],%o5 2749 1.1 spz move %xcc, %o4, %l2 2750 1.1 spz ldx [%o7+7*32], %o4 2751 1.1 spz move %xcc, %o5, %l3 2752 1.1 spz ldx [%o7+15*32],%o5 2753 1.1 spz movneg %xcc, %o4, %l2 2754 1.1 spz add %o7,16*32, %o7 2755 1.1 spz movneg %xcc, %o5, %l3 2756 1.1 spz ldx [%o7+0*32], %l4 2757 1.1 spz ldx [%o7+8*32], %l5 2758 1.1 spz ldx [%o7+1*32], %o4 2759 1.1 spz ldx [%o7+9*32], %o5 2760 1.1 spz movvs %icc, %o4, %l4 2761 1.1 spz ldx [%o7+2*32], %o4 2762 1.1 spz movvs %icc, %o5, %l5 2763 1.1 spz ldx [%o7+10*32],%o5 2764 1.1 spz move %icc, %o4, %l4 2765 1.1 spz ldx [%o7+3*32], %o4 2766 1.1 spz move %icc, %o5, %l5 2767 1.1 spz ldx [%o7+11*32],%o5 2768 1.1 spz movneg %icc, %o4, %l4 2769 1.1 spz ldx [%o7+4*32], %o4 2770 1.1 spz movneg %icc, %o5, %l5 2771 1.1 spz ldx [%o7+12*32],%o5 2772 1.1 spz movcs %xcc, %o4, %l4 2773 1.1 spz ldx [%o7+5*32],%o4 2774 1.1 spz movcs %xcc, %o5, %l5 2775 1.1 spz ldx [%o7+13*32],%o5 2776 1.1 spz movvs %xcc, %o4, %l4 2777 1.1 spz ldx [%o7+6*32], %o4 2778 1.1 spz movvs %xcc, %o5, %l5 2779 1.1 spz ldx [%o7+14*32],%o5 2780 1.1 spz move %xcc, %o4, %l4 2781 1.1 spz ldx [%o7+7*32], %o4 2782 1.1 spz move %xcc, %o5, %l5 2783 1.1 spz ldx [%o7+15*32],%o5 2784 1.1 spz movneg %xcc, %o4, %l4 2785 1.1 spz add %o7,16*32, %o7 2786 1.1 spz movneg %xcc, %o5, %l5 2787 1.1 spz ldx [%o7+0*32], %l6 2788 1.1 spz ldx [%o7+8*32], %l7 2789 1.1 spz ldx [%o7+1*32], %o4 2790 1.1 spz ldx [%o7+9*32], %o5 2791 1.1 spz movvs %icc, %o4, %l6 2792 1.1 spz ldx [%o7+2*32], %o4 2793 1.1 spz movvs %icc, %o5, %l7 2794 1.1 spz ldx [%o7+10*32],%o5 2795 1.1 spz move %icc, %o4, %l6 2796 1.1 spz ldx [%o7+3*32], %o4 2797 1.1 spz move %icc, %o5, %l7 2798 1.1 spz ldx [%o7+11*32],%o5 2799 1.1 spz movneg %icc, %o4, %l6 2800 1.1 spz ldx [%o7+4*32], %o4 2801 1.1 spz movneg %icc, %o5, %l7 2802 1.1 spz ldx [%o7+12*32],%o5 2803 1.1 spz movcs %xcc, %o4, %l6 2804 1.1 spz ldx [%o7+5*32],%o4 2805 1.1 spz movcs %xcc, %o5, %l7 2806 1.1 spz ldx [%o7+13*32],%o5 2807 1.1 spz movvs %xcc, %o4, %l6 2808 1.1 spz ldx [%o7+6*32], %o4 2809 1.1 spz movvs %xcc, %o5, %l7 2810 1.1 spz ldx [%o7+14*32],%o5 2811 1.1 spz move %xcc, %o4, %l6 2812 1.1 spz ldx [%o7+7*32], %o4 2813 1.1 spz move %xcc, %o5, %l7 2814 1.1 spz ldx [%o7+15*32],%o5 2815 1.1 spz movneg %xcc, %o4, %l6 2816 1.1 spz add %o7,16*32, %o7 2817 1.1 spz movneg %xcc, %o5, %l7 2818 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 2819 1.1 spz ldx [%i7+0*32], %i0 2820 1.1 spz ldx [%i7+8*32], %i1 2821 1.1 spz ldx [%i7+1*32], %o4 2822 1.1 spz ldx [%i7+9*32], %o5 2823 1.1 spz movvs %icc, %o4, %i0 2824 1.1 spz ldx [%i7+2*32], %o4 2825 1.1 spz movvs %icc, %o5, %i1 2826 1.1 spz ldx [%i7+10*32],%o5 2827 1.1 spz move %icc, %o4, %i0 2828 1.1 spz ldx [%i7+3*32], %o4 2829 1.1 spz move %icc, %o5, %i1 2830 1.1 spz ldx [%i7+11*32],%o5 2831 1.1 spz movneg %icc, %o4, %i0 2832 1.1 spz ldx [%i7+4*32], %o4 2833 1.1 spz movneg %icc, %o5, %i1 2834 1.1 spz ldx [%i7+12*32],%o5 2835 1.1 spz movcs %xcc, %o4, %i0 2836 1.1 spz ldx [%i7+5*32],%o4 2837 1.1 spz movcs %xcc, %o5, %i1 2838 1.1 spz ldx [%i7+13*32],%o5 2839 1.1 spz movvs %xcc, %o4, %i0 2840 1.1 spz ldx [%i7+6*32], %o4 2841 1.1 spz movvs %xcc, %o5, %i1 2842 1.1 spz ldx [%i7+14*32],%o5 2843 1.1 spz move %xcc, %o4, %i0 2844 1.1 spz ldx [%i7+7*32], %o4 2845 1.1 spz move %xcc, %o5, %i1 2846 1.1 spz ldx [%i7+15*32],%o5 2847 1.1 spz movneg %xcc, %o4, %i0 2848 1.1 spz add %i7,16*32, %i7 2849 1.1 spz movneg %xcc, %o5, %i1 2850 1.1 spz ldx [%i7+0*32], %i2 2851 1.1 spz ldx [%i7+8*32], %i3 2852 1.1 spz ldx [%i7+1*32], %o4 2853 1.1 spz ldx [%i7+9*32], %o5 2854 1.1 spz movvs %icc, %o4, %i2 2855 1.1 spz ldx [%i7+2*32], %o4 2856 1.1 spz movvs %icc, %o5, %i3 2857 1.1 spz ldx [%i7+10*32],%o5 2858 1.1 spz move %icc, %o4, %i2 2859 1.1 spz ldx [%i7+3*32], %o4 2860 1.1 spz move %icc, %o5, %i3 2861 1.1 spz ldx [%i7+11*32],%o5 2862 1.1 spz movneg %icc, %o4, %i2 2863 1.1 spz ldx [%i7+4*32], %o4 2864 1.1 spz movneg %icc, %o5, %i3 2865 1.1 spz ldx [%i7+12*32],%o5 2866 1.1 spz movcs %xcc, %o4, %i2 2867 1.1 spz ldx [%i7+5*32],%o4 2868 1.1 spz movcs %xcc, %o5, %i3 2869 1.1 spz ldx [%i7+13*32],%o5 2870 1.1 spz movvs %xcc, %o4, %i2 2871 1.1 spz ldx [%i7+6*32], %o4 2872 1.1 spz movvs %xcc, %o5, %i3 2873 1.1 spz ldx [%i7+14*32],%o5 2874 1.1 spz move %xcc, %o4, %i2 2875 1.1 spz ldx [%i7+7*32], %o4 2876 1.1 spz move %xcc, %o5, %i3 2877 1.1 spz ldx [%i7+15*32],%o5 2878 1.1 spz movneg %xcc, %o4, %i2 2879 1.1 spz add %i7,16*32, %i7 2880 1.1 spz movneg %xcc, %o5, %i3 2881 1.1 spz ldx [%i7+0*32], %i4 2882 1.1 spz ldx [%i7+8*32], %i5 2883 1.1 spz ldx [%i7+1*32], %o4 2884 1.1 spz ldx [%i7+9*32], %o5 2885 1.1 spz movvs %icc, %o4, %i4 2886 1.1 spz ldx [%i7+2*32], %o4 2887 1.1 spz movvs %icc, %o5, %i5 2888 1.1 spz ldx [%i7+10*32],%o5 2889 1.1 spz move %icc, %o4, %i4 2890 1.1 spz ldx [%i7+3*32], %o4 2891 1.1 spz move %icc, %o5, %i5 2892 1.1 spz ldx [%i7+11*32],%o5 2893 1.1 spz movneg %icc, %o4, %i4 2894 1.1 spz ldx [%i7+4*32], %o4 2895 1.1 spz movneg %icc, %o5, %i5 2896 1.1 spz ldx [%i7+12*32],%o5 2897 1.1 spz movcs %xcc, %o4, %i4 2898 1.1 spz ldx [%i7+5*32],%o4 2899 1.1 spz movcs %xcc, %o5, %i5 2900 1.1 spz ldx [%i7+13*32],%o5 2901 1.1 spz movvs %xcc, %o4, %i4 2902 1.1 spz ldx [%i7+6*32], %o4 2903 1.1 spz movvs %xcc, %o5, %i5 2904 1.1 spz ldx [%i7+14*32],%o5 2905 1.1 spz move %xcc, %o4, %i4 2906 1.1 spz ldx [%i7+7*32], %o4 2907 1.1 spz move %xcc, %o5, %i5 2908 1.1 spz ldx [%i7+15*32],%o5 2909 1.1 spz movneg %xcc, %o4, %i4 2910 1.1 spz add %i7,16*32, %i7 2911 1.1 spz movneg %xcc, %o5, %i5 2912 1.1 spz ldx [%i7+0*32], %l0 2913 1.1 spz ldx [%i7+8*32], %l1 2914 1.1 spz ldx [%i7+1*32], %o4 2915 1.1 spz ldx [%i7+9*32], %o5 2916 1.1 spz movvs %icc, %o4, %l0 2917 1.1 spz ldx [%i7+2*32], %o4 2918 1.1 spz movvs %icc, %o5, %l1 2919 1.1 spz ldx [%i7+10*32],%o5 2920 1.1 spz move %icc, %o4, %l0 2921 1.1 spz ldx [%i7+3*32], %o4 2922 1.1 spz move %icc, %o5, %l1 2923 1.1 spz ldx [%i7+11*32],%o5 2924 1.1 spz movneg %icc, %o4, %l0 2925 1.1 spz ldx [%i7+4*32], %o4 2926 1.1 spz movneg %icc, %o5, %l1 2927 1.1 spz ldx [%i7+12*32],%o5 2928 1.1 spz movcs %xcc, %o4, %l0 2929 1.1 spz ldx [%i7+5*32],%o4 2930 1.1 spz movcs %xcc, %o5, %l1 2931 1.1 spz ldx [%i7+13*32],%o5 2932 1.1 spz movvs %xcc, %o4, %l0 2933 1.1 spz ldx [%i7+6*32], %o4 2934 1.1 spz movvs %xcc, %o5, %l1 2935 1.1 spz ldx [%i7+14*32],%o5 2936 1.1 spz move %xcc, %o4, %l0 2937 1.1 spz ldx [%i7+7*32], %o4 2938 1.1 spz move %xcc, %o5, %l1 2939 1.1 spz ldx [%i7+15*32],%o5 2940 1.1 spz movneg %xcc, %o4, %l0 2941 1.1 spz add %i7,16*32, %i7 2942 1.1 spz movneg %xcc, %o5, %l1 2943 1.1 spz ldx [%i7+0*32], %l2 2944 1.1 spz ldx [%i7+8*32], %l3 2945 1.1 spz ldx [%i7+1*32], %o4 2946 1.1 spz ldx [%i7+9*32], %o5 2947 1.1 spz movvs %icc, %o4, %l2 2948 1.1 spz ldx [%i7+2*32], %o4 2949 1.1 spz movvs %icc, %o5, %l3 2950 1.1 spz ldx [%i7+10*32],%o5 2951 1.1 spz move %icc, %o4, %l2 2952 1.1 spz ldx [%i7+3*32], %o4 2953 1.1 spz move %icc, %o5, %l3 2954 1.1 spz ldx [%i7+11*32],%o5 2955 1.1 spz movneg %icc, %o4, %l2 2956 1.1 spz ldx [%i7+4*32], %o4 2957 1.1 spz movneg %icc, %o5, %l3 2958 1.1 spz ldx [%i7+12*32],%o5 2959 1.1 spz movcs %xcc, %o4, %l2 2960 1.1 spz ldx [%i7+5*32],%o4 2961 1.1 spz movcs %xcc, %o5, %l3 2962 1.1 spz ldx [%i7+13*32],%o5 2963 1.1 spz movvs %xcc, %o4, %l2 2964 1.1 spz ldx [%i7+6*32], %o4 2965 1.1 spz movvs %xcc, %o5, %l3 2966 1.1 spz ldx [%i7+14*32],%o5 2967 1.1 spz move %xcc, %o4, %l2 2968 1.1 spz ldx [%i7+7*32], %o4 2969 1.1 spz move %xcc, %o5, %l3 2970 1.1 spz ldx [%i7+15*32],%o5 2971 1.1 spz movneg %xcc, %o4, %l2 2972 1.1 spz add %i7,16*32, %i7 2973 1.1 spz movneg %xcc, %o5, %l3 2974 1.1 spz srax %g4, 32, %o4 ! unpack %g4 2975 1.1 spz srl %g4, %g0, %o5 2976 1.1 spz sub %o4, 5, %o4 2977 1.1 spz mov %g3, %i7 2978 1.1 spz sllx %o4, 32, %g4 ! re-pack %g4 2979 1.1 spz or %o5, %g4, %g4 2980 1.1 spz srl %o5, %o4, %o5 2981 1.1 spz srl %o5, 2, %o4 2982 1.1 spz and %o5, 3, %o5 2983 1.1 spz and %o4, 7, %o4 2984 1.1 spz sll %o5, 3, %o5 ! offset within first cache line 2985 1.1 spz add %o5, %i7, %i7 ! of the pwrtbl 2986 1.1 spz or %g0, 1, %o5 2987 1.1 spz sll %o5, %o4, %o4 2988 1.1 spz .word 0x81b02940+24-1 ! montsqr 24-1 2989 1.1 spz fbu,pn %fcc3,.Labort_24 2990 1.1 spz #ifndef __arch64__ 2991 1.1 spz and %fp,%g5,%g5 2992 1.1 spz brz,pn %g5,.Labort_24 2993 1.1 spz #endif 2994 1.1 spz nop 2995 1.1 spz .word 0x81b02940+24-1 ! montsqr 24-1 2996 1.1 spz fbu,pn %fcc3,.Labort_24 2997 1.1 spz #ifndef __arch64__ 2998 1.1 spz and %fp,%g5,%g5 2999 1.1 spz brz,pn %g5,.Labort_24 3000 1.1 spz #endif 3001 1.1 spz nop 3002 1.1 spz .word 0x81b02940+24-1 ! montsqr 24-1 3003 1.1 spz fbu,pn %fcc3,.Labort_24 3004 1.1 spz #ifndef __arch64__ 3005 1.1 spz and %fp,%g5,%g5 3006 1.1 spz brz,pn %g5,.Labort_24 3007 1.1 spz #endif 3008 1.1 spz nop 3009 1.1 spz .word 0x81b02940+24-1 ! montsqr 24-1 3010 1.1 spz fbu,pn %fcc3,.Labort_24 3011 1.1 spz #ifndef __arch64__ 3012 1.1 spz and %fp,%g5,%g5 3013 1.1 spz brz,pn %g5,.Labort_24 3014 1.1 spz #endif 3015 1.1 spz nop 3016 1.1 spz .word 0x81b02940+24-1 ! montsqr 24-1 3017 1.1 spz fbu,pn %fcc3,.Labort_24 3018 1.1 spz #ifndef __arch64__ 3019 1.1 spz and %fp,%g5,%g5 3020 1.1 spz brz,pn %g5,.Labort_24 3021 1.1 spz #endif 3022 1.1 spz nop 3023 1.1 spz wr %o4, %g0, %ccr 3024 1.1 spz .word 0x81b02920+24-1 ! montmul 24-1 3025 1.1 spz fbu,pn %fcc3,.Labort_24 3026 1.1 spz #ifndef __arch64__ 3027 1.1 spz and %fp,%g5,%g5 3028 1.1 spz brz,pn %g5,.Labort_24 3029 1.1 spz #endif 3030 1.1 spz 3031 1.1 spz srax %g4, 32, %o4 3032 1.1 spz #ifdef __arch64__ 3033 1.1 spz brgez %o4,.Lstride_24 3034 1.1 spz restore 3035 1.1 spz restore 3036 1.1 spz restore 3037 1.1 spz restore 3038 1.1 spz restore 3039 1.1 spz #else 3040 1.1 spz brgez %o4,.Lstride_24 3041 1.1 spz restore; and %fp,%g5,%g5 3042 1.1 spz restore; and %fp,%g5,%g5 3043 1.1 spz restore; and %fp,%g5,%g5 3044 1.1 spz restore; and %fp,%g5,%g5 3045 1.1 spz brz,pn %g5,.Labort1_24 3046 1.1 spz restore 3047 1.1 spz #endif 3048 1.1 spz .word 0x81b02310 !movxtod %l0,%f0 3049 1.1 spz .word 0x85b02311 !movxtod %l1,%f2 3050 1.1 spz .word 0x89b02312 !movxtod %l2,%f4 3051 1.1 spz .word 0x8db02313 !movxtod %l3,%f6 3052 1.1 spz .word 0x91b02314 !movxtod %l4,%f8 3053 1.1 spz .word 0x95b02315 !movxtod %l5,%f10 3054 1.1 spz .word 0x99b02316 !movxtod %l6,%f12 3055 1.1 spz .word 0x9db02317 !movxtod %l7,%f14 3056 1.1 spz .word 0xa1b02308 !movxtod %o0,%f16 3057 1.1 spz .word 0xa5b02309 !movxtod %o1,%f18 3058 1.1 spz .word 0xa9b0230a !movxtod %o2,%f20 3059 1.1 spz .word 0xadb0230b !movxtod %o3,%f22 3060 1.1 spz .word 0xbbb0230c !movxtod %o4,%f60 3061 1.1 spz .word 0xbfb0230d !movxtod %o5,%f62 3062 1.1 spz #ifdef __arch64__ 3063 1.1 spz restore 3064 1.1 spz #else 3065 1.1 spz and %fp,%g5,%g5 3066 1.1 spz restore 3067 1.1 spz and %g5,1,%o7 3068 1.1 spz and %fp,%g5,%g5 3069 1.1 spz srl %fp,0,%fp ! just in case? 3070 1.1 spz or %o7,%g5,%g5 3071 1.1 spz brz,a,pn %g5,.Ldone_24 3072 1.1 spz mov 0,%i0 ! return failure 3073 1.1 spz #endif 3074 1.1 spz std %f0,[%g1+0*8] 3075 1.1 spz std %f2,[%g1+1*8] 3076 1.1 spz std %f4,[%g1+2*8] 3077 1.1 spz std %f6,[%g1+3*8] 3078 1.1 spz std %f8,[%g1+4*8] 3079 1.1 spz std %f10,[%g1+5*8] 3080 1.1 spz std %f12,[%g1+6*8] 3081 1.1 spz std %f14,[%g1+7*8] 3082 1.1 spz std %f16,[%g1+8*8] 3083 1.1 spz std %f18,[%g1+9*8] 3084 1.1 spz std %f20,[%g1+10*8] 3085 1.1 spz std %f22,[%g1+11*8] 3086 1.1 spz std %f60,[%g1+12*8] 3087 1.1 spz std %f62,[%g1+13*8] 3088 1.1 spz std %f24,[%g1+14*8] 3089 1.1 spz std %f26,[%g1+15*8] 3090 1.1 spz std %f28,[%g1+16*8] 3091 1.1 spz std %f30,[%g1+17*8] 3092 1.1 spz std %f32,[%g1+18*8] 3093 1.1 spz std %f34,[%g1+19*8] 3094 1.1 spz std %f36,[%g1+20*8] 3095 1.1 spz std %f38,[%g1+21*8] 3096 1.1 spz std %f40,[%g1+22*8] 3097 1.1 spz std %f42,[%g1+23*8] 3098 1.1 spz mov 1,%i0 ! return success 3099 1.1 spz .Ldone_24: 3100 1.1 spz ret 3101 1.1 spz restore 3102 1.1 spz 3103 1.1 spz .Labort_24: 3104 1.1 spz restore 3105 1.1 spz restore 3106 1.1 spz restore 3107 1.1 spz restore 3108 1.1 spz restore 3109 1.1 spz .Labort1_24: 3110 1.1 spz restore 3111 1.1 spz 3112 1.1 spz mov 0,%i0 ! return failure 3113 1.1 spz ret 3114 1.1 spz restore 3115 1.1 spz .type bn_pwr5_mont_t4_24, #function 3116 1.1 spz .size bn_pwr5_mont_t4_24, .-bn_pwr5_mont_t4_24 3117 1.1 spz .globl bn_pwr5_mont_t4_32 3118 1.1 spz .align 32 3119 1.1 spz bn_pwr5_mont_t4_32: 3120 1.1 spz #ifdef __arch64__ 3121 1.1 spz mov 0,%g5 3122 1.1 spz mov -128,%g4 3123 1.1 spz #elif defined(SPARCV9_64BIT_STACK) 3124 1.1 spz SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 3125 1.1 spz ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 3126 1.1 spz mov -2047,%g4 3127 1.1 spz and %g1,SPARCV9_64BIT_STACK,%g1 3128 1.1 spz movrz %g1,0,%g4 3129 1.1 spz mov -1,%g5 3130 1.1 spz add %g4,-128,%g4 3131 1.1 spz #else 3132 1.1 spz mov -1,%g5 3133 1.1 spz mov -128,%g4 3134 1.1 spz #endif 3135 1.1 spz sllx %g5,32,%g5 3136 1.1 spz save %sp,%g4,%sp 3137 1.1 spz #ifndef __arch64__ 3138 1.1 spz save %sp,-128,%sp ! warm it up 3139 1.1 spz save %sp,-128,%sp 3140 1.1 spz save %sp,-128,%sp 3141 1.1 spz save %sp,-128,%sp 3142 1.1 spz save %sp,-128,%sp 3143 1.1 spz save %sp,-128,%sp 3144 1.1 spz restore 3145 1.1 spz restore 3146 1.1 spz restore 3147 1.1 spz restore 3148 1.1 spz restore 3149 1.1 spz restore 3150 1.1 spz #endif 3151 1.1 spz and %sp,1,%g4 3152 1.1 spz or %g5,%fp,%fp 3153 1.1 spz or %g4,%g5,%g5 3154 1.1 spz 3155 1.1 spz ! copy arguments to global registers 3156 1.1 spz mov %i0,%g1 3157 1.1 spz mov %i1,%g2 3158 1.1 spz ld [%i2+0],%f1 ! load *n0 3159 1.1 spz ld [%i2+4],%f0 3160 1.1 spz mov %i3,%g3 3161 1.1 spz srl %i4,%g0,%i4 ! pack last arguments 3162 1.1 spz sllx %i5,32,%g4 3163 1.1 spz or %i4,%g4,%g4 3164 1.1 spz .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 3165 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 3166 1.1 spz ldx [%g1+0*8],%l0 3167 1.1 spz ldx [%g1+1*8],%l1 3168 1.1 spz ldx [%g1+2*8],%l2 3169 1.1 spz ldx [%g1+3*8],%l3 3170 1.1 spz ldx [%g1+4*8],%l4 3171 1.1 spz ldx [%g1+5*8],%l5 3172 1.1 spz ldx [%g1+6*8],%l6 3173 1.1 spz ldx [%g1+7*8],%l7 3174 1.1 spz ldx [%g1+8*8],%o0 3175 1.1 spz ldx [%g1+9*8],%o1 3176 1.1 spz ldx [%g1+10*8],%o2 3177 1.1 spz ldx [%g1+11*8],%o3 3178 1.1 spz ldx [%g1+12*8],%o4 3179 1.1 spz ldx [%g1+13*8],%o5 3180 1.1 spz ldd [%g1+14*8],%f24 3181 1.1 spz ldd [%g1+15*8],%f26 3182 1.1 spz ldd [%g1+16*8],%f28 3183 1.1 spz ldd [%g1+17*8],%f30 3184 1.1 spz ldd [%g1+18*8],%f32 3185 1.1 spz ldd [%g1+19*8],%f34 3186 1.1 spz ldd [%g1+20*8],%f36 3187 1.1 spz ldd [%g1+21*8],%f38 3188 1.1 spz ldd [%g1+22*8],%f40 3189 1.1 spz ldd [%g1+23*8],%f42 3190 1.1 spz ldd [%g1+24*8],%f44 3191 1.1 spz ldd [%g1+25*8],%f46 3192 1.1 spz ldd [%g1+26*8],%f48 3193 1.1 spz ldd [%g1+27*8],%f50 3194 1.1 spz ldd [%g1+28*8],%f52 3195 1.1 spz ldd [%g1+29*8],%f54 3196 1.1 spz ldd [%g1+30*8],%f56 3197 1.1 spz ldd [%g1+31*8],%f58 3198 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 3199 1.1 spz ldx [%g2+0*8],%l0 3200 1.1 spz ldx [%g2+1*8],%l1 3201 1.1 spz ldx [%g2+2*8],%l2 3202 1.1 spz ldx [%g2+3*8],%l3 3203 1.1 spz ldx [%g2+4*8],%l4 3204 1.1 spz ldx [%g2+5*8],%l5 3205 1.1 spz ldx [%g2+6*8],%l6 3206 1.1 spz ldx [%g2+7*8],%l7 3207 1.1 spz ldx [%g2+8*8],%o0 3208 1.1 spz ldx [%g2+9*8],%o1 3209 1.1 spz ldx [%g2+10*8],%o2 3210 1.1 spz ldx [%g2+11*8],%o3 3211 1.1 spz ldx [%g2+12*8],%o4 3212 1.1 spz ldx [%g2+13*8],%o5 3213 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 3214 1.1 spz ldx [%g2+14*8],%l0 3215 1.1 spz ldx [%g2+15*8],%l1 3216 1.1 spz ldx [%g2+16*8],%l2 3217 1.1 spz ldx [%g2+17*8],%l3 3218 1.1 spz ldx [%g2+18*8],%l4 3219 1.1 spz ldx [%g2+19*8],%l5 3220 1.1 spz ldx [%g2+20*8],%l6 3221 1.1 spz ldx [%g2+21*8],%l7 3222 1.1 spz ldx [%g2+22*8],%o0 3223 1.1 spz ldx [%g2+23*8],%o1 3224 1.1 spz ldx [%g2+24*8],%o2 3225 1.1 spz ldx [%g2+25*8],%o3 3226 1.1 spz ldx [%g2+26*8],%o4 3227 1.1 spz ldx [%g2+27*8],%o5 3228 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 3229 1.1 spz ldx [%g2+28*8],%l0 3230 1.1 spz ldx [%g2+29*8],%l1 3231 1.1 spz ldx [%g2+30*8],%l2 3232 1.1 spz ldx [%g2+31*8],%l3 3233 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 3234 1.1 spz 3235 1.1 spz srlx %g4, 32, %o4 ! unpack %g4 3236 1.1 spz srl %g4, %g0, %o5 3237 1.1 spz sub %o4, 5, %o4 3238 1.1 spz mov %g3, %o7 3239 1.1 spz sllx %o4, 32, %g4 ! re-pack %g4 3240 1.1 spz or %o5, %g4, %g4 3241 1.1 spz srl %o5, %o4, %o5 3242 1.1 spz srl %o5, 2, %o4 3243 1.1 spz and %o5, 3, %o5 3244 1.1 spz and %o4, 7, %o4 3245 1.1 spz sll %o5, 3, %o5 ! offset within first cache line 3246 1.1 spz add %o5, %o7, %o7 ! of the pwrtbl 3247 1.1 spz or %g0, 1, %o5 3248 1.1 spz sll %o5, %o4, %o4 3249 1.1 spz wr %o4, %g0, %ccr 3250 1.1 spz b .Lstride_32 3251 1.1 spz nop 3252 1.1 spz .align 16 3253 1.1 spz .Lstride_32: 3254 1.1 spz ldx [%o7+0*32], %i0 3255 1.1 spz ldx [%o7+8*32], %i1 3256 1.1 spz ldx [%o7+1*32], %o4 3257 1.1 spz ldx [%o7+9*32], %o5 3258 1.1 spz movvs %icc, %o4, %i0 3259 1.1 spz ldx [%o7+2*32], %o4 3260 1.1 spz movvs %icc, %o5, %i1 3261 1.1 spz ldx [%o7+10*32],%o5 3262 1.1 spz move %icc, %o4, %i0 3263 1.1 spz ldx [%o7+3*32], %o4 3264 1.1 spz move %icc, %o5, %i1 3265 1.1 spz ldx [%o7+11*32],%o5 3266 1.1 spz movneg %icc, %o4, %i0 3267 1.1 spz ldx [%o7+4*32], %o4 3268 1.1 spz movneg %icc, %o5, %i1 3269 1.1 spz ldx [%o7+12*32],%o5 3270 1.1 spz movcs %xcc, %o4, %i0 3271 1.1 spz ldx [%o7+5*32],%o4 3272 1.1 spz movcs %xcc, %o5, %i1 3273 1.1 spz ldx [%o7+13*32],%o5 3274 1.1 spz movvs %xcc, %o4, %i0 3275 1.1 spz ldx [%o7+6*32], %o4 3276 1.1 spz movvs %xcc, %o5, %i1 3277 1.1 spz ldx [%o7+14*32],%o5 3278 1.1 spz move %xcc, %o4, %i0 3279 1.1 spz ldx [%o7+7*32], %o4 3280 1.1 spz move %xcc, %o5, %i1 3281 1.1 spz ldx [%o7+15*32],%o5 3282 1.1 spz movneg %xcc, %o4, %i0 3283 1.1 spz add %o7,16*32, %o7 3284 1.1 spz movneg %xcc, %o5, %i1 3285 1.1 spz ldx [%o7+0*32], %i2 3286 1.1 spz ldx [%o7+8*32], %i3 3287 1.1 spz ldx [%o7+1*32], %o4 3288 1.1 spz ldx [%o7+9*32], %o5 3289 1.1 spz movvs %icc, %o4, %i2 3290 1.1 spz ldx [%o7+2*32], %o4 3291 1.1 spz movvs %icc, %o5, %i3 3292 1.1 spz ldx [%o7+10*32],%o5 3293 1.1 spz move %icc, %o4, %i2 3294 1.1 spz ldx [%o7+3*32], %o4 3295 1.1 spz move %icc, %o5, %i3 3296 1.1 spz ldx [%o7+11*32],%o5 3297 1.1 spz movneg %icc, %o4, %i2 3298 1.1 spz ldx [%o7+4*32], %o4 3299 1.1 spz movneg %icc, %o5, %i3 3300 1.1 spz ldx [%o7+12*32],%o5 3301 1.1 spz movcs %xcc, %o4, %i2 3302 1.1 spz ldx [%o7+5*32],%o4 3303 1.1 spz movcs %xcc, %o5, %i3 3304 1.1 spz ldx [%o7+13*32],%o5 3305 1.1 spz movvs %xcc, %o4, %i2 3306 1.1 spz ldx [%o7+6*32], %o4 3307 1.1 spz movvs %xcc, %o5, %i3 3308 1.1 spz ldx [%o7+14*32],%o5 3309 1.1 spz move %xcc, %o4, %i2 3310 1.1 spz ldx [%o7+7*32], %o4 3311 1.1 spz move %xcc, %o5, %i3 3312 1.1 spz ldx [%o7+15*32],%o5 3313 1.1 spz movneg %xcc, %o4, %i2 3314 1.1 spz add %o7,16*32, %o7 3315 1.1 spz movneg %xcc, %o5, %i3 3316 1.1 spz ldx [%o7+0*32], %i4 3317 1.1 spz ldx [%o7+8*32], %i5 3318 1.1 spz ldx [%o7+1*32], %o4 3319 1.1 spz ldx [%o7+9*32], %o5 3320 1.1 spz movvs %icc, %o4, %i4 3321 1.1 spz ldx [%o7+2*32], %o4 3322 1.1 spz movvs %icc, %o5, %i5 3323 1.1 spz ldx [%o7+10*32],%o5 3324 1.1 spz move %icc, %o4, %i4 3325 1.1 spz ldx [%o7+3*32], %o4 3326 1.1 spz move %icc, %o5, %i5 3327 1.1 spz ldx [%o7+11*32],%o5 3328 1.1 spz movneg %icc, %o4, %i4 3329 1.1 spz ldx [%o7+4*32], %o4 3330 1.1 spz movneg %icc, %o5, %i5 3331 1.1 spz ldx [%o7+12*32],%o5 3332 1.1 spz movcs %xcc, %o4, %i4 3333 1.1 spz ldx [%o7+5*32],%o4 3334 1.1 spz movcs %xcc, %o5, %i5 3335 1.1 spz ldx [%o7+13*32],%o5 3336 1.1 spz movvs %xcc, %o4, %i4 3337 1.1 spz ldx [%o7+6*32], %o4 3338 1.1 spz movvs %xcc, %o5, %i5 3339 1.1 spz ldx [%o7+14*32],%o5 3340 1.1 spz move %xcc, %o4, %i4 3341 1.1 spz ldx [%o7+7*32], %o4 3342 1.1 spz move %xcc, %o5, %i5 3343 1.1 spz ldx [%o7+15*32],%o5 3344 1.1 spz movneg %xcc, %o4, %i4 3345 1.1 spz add %o7,16*32, %o7 3346 1.1 spz movneg %xcc, %o5, %i5 3347 1.1 spz ldx [%o7+0*32], %l0 3348 1.1 spz ldx [%o7+8*32], %l1 3349 1.1 spz ldx [%o7+1*32], %o4 3350 1.1 spz ldx [%o7+9*32], %o5 3351 1.1 spz movvs %icc, %o4, %l0 3352 1.1 spz ldx [%o7+2*32], %o4 3353 1.1 spz movvs %icc, %o5, %l1 3354 1.1 spz ldx [%o7+10*32],%o5 3355 1.1 spz move %icc, %o4, %l0 3356 1.1 spz ldx [%o7+3*32], %o4 3357 1.1 spz move %icc, %o5, %l1 3358 1.1 spz ldx [%o7+11*32],%o5 3359 1.1 spz movneg %icc, %o4, %l0 3360 1.1 spz ldx [%o7+4*32], %o4 3361 1.1 spz movneg %icc, %o5, %l1 3362 1.1 spz ldx [%o7+12*32],%o5 3363 1.1 spz movcs %xcc, %o4, %l0 3364 1.1 spz ldx [%o7+5*32],%o4 3365 1.1 spz movcs %xcc, %o5, %l1 3366 1.1 spz ldx [%o7+13*32],%o5 3367 1.1 spz movvs %xcc, %o4, %l0 3368 1.1 spz ldx [%o7+6*32], %o4 3369 1.1 spz movvs %xcc, %o5, %l1 3370 1.1 spz ldx [%o7+14*32],%o5 3371 1.1 spz move %xcc, %o4, %l0 3372 1.1 spz ldx [%o7+7*32], %o4 3373 1.1 spz move %xcc, %o5, %l1 3374 1.1 spz ldx [%o7+15*32],%o5 3375 1.1 spz movneg %xcc, %o4, %l0 3376 1.1 spz add %o7,16*32, %o7 3377 1.1 spz movneg %xcc, %o5, %l1 3378 1.1 spz ldx [%o7+0*32], %l2 3379 1.1 spz ldx [%o7+8*32], %l3 3380 1.1 spz ldx [%o7+1*32], %o4 3381 1.1 spz ldx [%o7+9*32], %o5 3382 1.1 spz movvs %icc, %o4, %l2 3383 1.1 spz ldx [%o7+2*32], %o4 3384 1.1 spz movvs %icc, %o5, %l3 3385 1.1 spz ldx [%o7+10*32],%o5 3386 1.1 spz move %icc, %o4, %l2 3387 1.1 spz ldx [%o7+3*32], %o4 3388 1.1 spz move %icc, %o5, %l3 3389 1.1 spz ldx [%o7+11*32],%o5 3390 1.1 spz movneg %icc, %o4, %l2 3391 1.1 spz ldx [%o7+4*32], %o4 3392 1.1 spz movneg %icc, %o5, %l3 3393 1.1 spz ldx [%o7+12*32],%o5 3394 1.1 spz movcs %xcc, %o4, %l2 3395 1.1 spz ldx [%o7+5*32],%o4 3396 1.1 spz movcs %xcc, %o5, %l3 3397 1.1 spz ldx [%o7+13*32],%o5 3398 1.1 spz movvs %xcc, %o4, %l2 3399 1.1 spz ldx [%o7+6*32], %o4 3400 1.1 spz movvs %xcc, %o5, %l3 3401 1.1 spz ldx [%o7+14*32],%o5 3402 1.1 spz move %xcc, %o4, %l2 3403 1.1 spz ldx [%o7+7*32], %o4 3404 1.1 spz move %xcc, %o5, %l3 3405 1.1 spz ldx [%o7+15*32],%o5 3406 1.1 spz movneg %xcc, %o4, %l2 3407 1.1 spz add %o7,16*32, %o7 3408 1.1 spz movneg %xcc, %o5, %l3 3409 1.1 spz ldx [%o7+0*32], %l4 3410 1.1 spz ldx [%o7+8*32], %l5 3411 1.1 spz ldx [%o7+1*32], %o4 3412 1.1 spz ldx [%o7+9*32], %o5 3413 1.1 spz movvs %icc, %o4, %l4 3414 1.1 spz ldx [%o7+2*32], %o4 3415 1.1 spz movvs %icc, %o5, %l5 3416 1.1 spz ldx [%o7+10*32],%o5 3417 1.1 spz move %icc, %o4, %l4 3418 1.1 spz ldx [%o7+3*32], %o4 3419 1.1 spz move %icc, %o5, %l5 3420 1.1 spz ldx [%o7+11*32],%o5 3421 1.1 spz movneg %icc, %o4, %l4 3422 1.1 spz ldx [%o7+4*32], %o4 3423 1.1 spz movneg %icc, %o5, %l5 3424 1.1 spz ldx [%o7+12*32],%o5 3425 1.1 spz movcs %xcc, %o4, %l4 3426 1.1 spz ldx [%o7+5*32],%o4 3427 1.1 spz movcs %xcc, %o5, %l5 3428 1.1 spz ldx [%o7+13*32],%o5 3429 1.1 spz movvs %xcc, %o4, %l4 3430 1.1 spz ldx [%o7+6*32], %o4 3431 1.1 spz movvs %xcc, %o5, %l5 3432 1.1 spz ldx [%o7+14*32],%o5 3433 1.1 spz move %xcc, %o4, %l4 3434 1.1 spz ldx [%o7+7*32], %o4 3435 1.1 spz move %xcc, %o5, %l5 3436 1.1 spz ldx [%o7+15*32],%o5 3437 1.1 spz movneg %xcc, %o4, %l4 3438 1.1 spz add %o7,16*32, %o7 3439 1.1 spz movneg %xcc, %o5, %l5 3440 1.1 spz ldx [%o7+0*32], %l6 3441 1.1 spz ldx [%o7+8*32], %l7 3442 1.1 spz ldx [%o7+1*32], %o4 3443 1.1 spz ldx [%o7+9*32], %o5 3444 1.1 spz movvs %icc, %o4, %l6 3445 1.1 spz ldx [%o7+2*32], %o4 3446 1.1 spz movvs %icc, %o5, %l7 3447 1.1 spz ldx [%o7+10*32],%o5 3448 1.1 spz move %icc, %o4, %l6 3449 1.1 spz ldx [%o7+3*32], %o4 3450 1.1 spz move %icc, %o5, %l7 3451 1.1 spz ldx [%o7+11*32],%o5 3452 1.1 spz movneg %icc, %o4, %l6 3453 1.1 spz ldx [%o7+4*32], %o4 3454 1.1 spz movneg %icc, %o5, %l7 3455 1.1 spz ldx [%o7+12*32],%o5 3456 1.1 spz movcs %xcc, %o4, %l6 3457 1.1 spz ldx [%o7+5*32],%o4 3458 1.1 spz movcs %xcc, %o5, %l7 3459 1.1 spz ldx [%o7+13*32],%o5 3460 1.1 spz movvs %xcc, %o4, %l6 3461 1.1 spz ldx [%o7+6*32], %o4 3462 1.1 spz movvs %xcc, %o5, %l7 3463 1.1 spz ldx [%o7+14*32],%o5 3464 1.1 spz move %xcc, %o4, %l6 3465 1.1 spz ldx [%o7+7*32], %o4 3466 1.1 spz move %xcc, %o5, %l7 3467 1.1 spz ldx [%o7+15*32],%o5 3468 1.1 spz movneg %xcc, %o4, %l6 3469 1.1 spz add %o7,16*32, %o7 3470 1.1 spz movneg %xcc, %o5, %l7 3471 1.1 spz save %sp,-128,%sp; or %g5,%fp,%fp 3472 1.1 spz ldx [%i7+0*32], %i0 3473 1.1 spz ldx [%i7+8*32], %i1 3474 1.1 spz ldx [%i7+1*32], %o4 3475 1.1 spz ldx [%i7+9*32], %o5 3476 1.1 spz movvs %icc, %o4, %i0 3477 1.1 spz ldx [%i7+2*32], %o4 3478 1.1 spz movvs %icc, %o5, %i1 3479 1.1 spz ldx [%i7+10*32],%o5 3480 1.1 spz move %icc, %o4, %i0 3481 1.1 spz ldx [%i7+3*32], %o4 3482 1.1 spz move %icc, %o5, %i1 3483 1.1 spz ldx [%i7+11*32],%o5 3484 1.1 spz movneg %icc, %o4, %i0 3485 1.1 spz ldx [%i7+4*32], %o4 3486 1.1 spz movneg %icc, %o5, %i1 3487 1.1 spz ldx [%i7+12*32],%o5 3488 1.1 spz movcs %xcc, %o4, %i0 3489 1.1 spz ldx [%i7+5*32],%o4 3490 1.1 spz movcs %xcc, %o5, %i1 3491 1.1 spz ldx [%i7+13*32],%o5 3492 1.1 spz movvs %xcc, %o4, %i0 3493 1.1 spz ldx [%i7+6*32], %o4 3494 1.1 spz movvs %xcc, %o5, %i1 3495 1.1 spz ldx [%i7+14*32],%o5 3496 1.1 spz move %xcc, %o4, %i0 3497 1.1 spz ldx [%i7+7*32], %o4 3498 1.1 spz move %xcc, %o5, %i1 3499 1.1 spz ldx [%i7+15*32],%o5 3500 1.1 spz movneg %xcc, %o4, %i0 3501 1.1 spz add %i7,16*32, %i7 3502 1.1 spz movneg %xcc, %o5, %i1 3503 1.1 spz ldx [%i7+0*32], %i2 3504 1.1 spz ldx [%i7+8*32], %i3 3505 1.1 spz ldx [%i7+1*32], %o4 3506 1.1 spz ldx [%i7+9*32], %o5 3507 1.1 spz movvs %icc, %o4, %i2 3508 1.1 spz ldx [%i7+2*32], %o4 3509 1.1 spz movvs %icc, %o5, %i3 3510 1.1 spz ldx [%i7+10*32],%o5 3511 1.1 spz move %icc, %o4, %i2 3512 1.1 spz ldx [%i7+3*32], %o4 3513 1.1 spz move %icc, %o5, %i3 3514 1.1 spz ldx [%i7+11*32],%o5 3515 1.1 spz movneg %icc, %o4, %i2 3516 1.1 spz ldx [%i7+4*32], %o4 3517 1.1 spz movneg %icc, %o5, %i3 3518 1.1 spz ldx [%i7+12*32],%o5 3519 1.1 spz movcs %xcc, %o4, %i2 3520 1.1 spz ldx [%i7+5*32],%o4 3521 1.1 spz movcs %xcc, %o5, %i3 3522 1.1 spz ldx [%i7+13*32],%o5 3523 1.1 spz movvs %xcc, %o4, %i2 3524 1.1 spz ldx [%i7+6*32], %o4 3525 1.1 spz movvs %xcc, %o5, %i3 3526 1.1 spz ldx [%i7+14*32],%o5 3527 1.1 spz move %xcc, %o4, %i2 3528 1.1 spz ldx [%i7+7*32], %o4 3529 1.1 spz move %xcc, %o5, %i3 3530 1.1 spz ldx [%i7+15*32],%o5 3531 1.1 spz movneg %xcc, %o4, %i2 3532 1.1 spz add %i7,16*32, %i7 3533 1.1 spz movneg %xcc, %o5, %i3 3534 1.1 spz ldx [%i7+0*32], %i4 3535 1.1 spz ldx [%i7+8*32], %i5 3536 1.1 spz ldx [%i7+1*32], %o4 3537 1.1 spz ldx [%i7+9*32], %o5 3538 1.1 spz movvs %icc, %o4, %i4 3539 1.1 spz ldx [%i7+2*32], %o4 3540 1.1 spz movvs %icc, %o5, %i5 3541 1.1 spz ldx [%i7+10*32],%o5 3542 1.1 spz move %icc, %o4, %i4 3543 1.1 spz ldx [%i7+3*32], %o4 3544 1.1 spz move %icc, %o5, %i5 3545 1.1 spz ldx [%i7+11*32],%o5 3546 1.1 spz movneg %icc, %o4, %i4 3547 1.1 spz ldx [%i7+4*32], %o4 3548 1.1 spz movneg %icc, %o5, %i5 3549 1.1 spz ldx [%i7+12*32],%o5 3550 1.1 spz movcs %xcc, %o4, %i4 3551 1.1 spz ldx [%i7+5*32],%o4 3552 1.1 spz movcs %xcc, %o5, %i5 3553 1.1 spz ldx [%i7+13*32],%o5 3554 1.1 spz movvs %xcc, %o4, %i4 3555 1.1 spz ldx [%i7+6*32], %o4 3556 1.1 spz movvs %xcc, %o5, %i5 3557 1.1 spz ldx [%i7+14*32],%o5 3558 1.1 spz move %xcc, %o4, %i4 3559 1.1 spz ldx [%i7+7*32], %o4 3560 1.1 spz move %xcc, %o5, %i5 3561 1.1 spz ldx [%i7+15*32],%o5 3562 1.1 spz movneg %xcc, %o4, %i4 3563 1.1 spz add %i7,16*32, %i7 3564 1.1 spz movneg %xcc, %o5, %i5 3565 1.1 spz ldx [%i7+0*32], %l0 3566 1.1 spz ldx [%i7+8*32], %l1 3567 1.1 spz ldx [%i7+1*32], %o4 3568 1.1 spz ldx [%i7+9*32], %o5 3569 1.1 spz movvs %icc, %o4, %l0 3570 1.1 spz ldx [%i7+2*32], %o4 3571 1.1 spz movvs %icc, %o5, %l1 3572 1.1 spz ldx [%i7+10*32],%o5 3573 1.1 spz move %icc, %o4, %l0 3574 1.1 spz ldx [%i7+3*32], %o4 3575 1.1 spz move %icc, %o5, %l1 3576 1.1 spz ldx [%i7+11*32],%o5 3577 1.1 spz movneg %icc, %o4, %l0 3578 1.1 spz ldx [%i7+4*32], %o4 3579 1.1 spz movneg %icc, %o5, %l1 3580 1.1 spz ldx [%i7+12*32],%o5 3581 1.1 spz movcs %xcc, %o4, %l0 3582 1.1 spz ldx [%i7+5*32],%o4 3583 1.1 spz movcs %xcc, %o5, %l1 3584 1.1 spz ldx [%i7+13*32],%o5 3585 1.1 spz movvs %xcc, %o4, %l0 3586 1.1 spz ldx [%i7+6*32], %o4 3587 1.1 spz movvs %xcc, %o5, %l1 3588 1.1 spz ldx [%i7+14*32],%o5 3589 1.1 spz move %xcc, %o4, %l0 3590 1.1 spz ldx [%i7+7*32], %o4 3591 1.1 spz move %xcc, %o5, %l1 3592 1.1 spz ldx [%i7+15*32],%o5 3593 1.1 spz movneg %xcc, %o4, %l0 3594 1.1 spz add %i7,16*32, %i7 3595 1.1 spz movneg %xcc, %o5, %l1 3596 1.1 spz ldx [%i7+0*32], %l2 3597 1.1 spz ldx [%i7+8*32], %l3 3598 1.1 spz ldx [%i7+1*32], %o4 3599 1.1 spz ldx [%i7+9*32], %o5 3600 1.1 spz movvs %icc, %o4, %l2 3601 1.1 spz ldx [%i7+2*32], %o4 3602 1.1 spz movvs %icc, %o5, %l3 3603 1.1 spz ldx [%i7+10*32],%o5 3604 1.1 spz move %icc, %o4, %l2 3605 1.1 spz ldx [%i7+3*32], %o4 3606 1.1 spz move %icc, %o5, %l3 3607 1.1 spz ldx [%i7+11*32],%o5 3608 1.1 spz movneg %icc, %o4, %l2 3609 1.1 spz ldx [%i7+4*32], %o4 3610 1.1 spz movneg %icc, %o5, %l3 3611 1.1 spz ldx [%i7+12*32],%o5 3612 1.1 spz movcs %xcc, %o4, %l2 3613 1.1 spz ldx [%i7+5*32],%o4 3614 1.1 spz movcs %xcc, %o5, %l3 3615 1.1 spz ldx [%i7+13*32],%o5 3616 1.1 spz movvs %xcc, %o4, %l2 3617 1.1 spz ldx [%i7+6*32], %o4 3618 1.1 spz movvs %xcc, %o5, %l3 3619 1.1 spz ldx [%i7+14*32],%o5 3620 1.1 spz move %xcc, %o4, %l2 3621 1.1 spz ldx [%i7+7*32], %o4 3622 1.1 spz move %xcc, %o5, %l3 3623 1.1 spz ldx [%i7+15*32],%o5 3624 1.1 spz movneg %xcc, %o4, %l2 3625 1.1 spz add %i7,16*32, %i7 3626 1.1 spz movneg %xcc, %o5, %l3 3627 1.1 spz ldx [%i7+0*32], %l4 3628 1.1 spz ldx [%i7+8*32], %l5 3629 1.1 spz ldx [%i7+1*32], %o4 3630 1.1 spz ldx [%i7+9*32], %o5 3631 1.1 spz movvs %icc, %o4, %l4 3632 1.1 spz ldx [%i7+2*32], %o4 3633 1.1 spz movvs %icc, %o5, %l5 3634 1.1 spz ldx [%i7+10*32],%o5 3635 1.1 spz move %icc, %o4, %l4 3636 1.1 spz ldx [%i7+3*32], %o4 3637 1.1 spz move %icc, %o5, %l5 3638 1.1 spz ldx [%i7+11*32],%o5 3639 1.1 spz movneg %icc, %o4, %l4 3640 1.1 spz ldx [%i7+4*32], %o4 3641 1.1 spz movneg %icc, %o5, %l5 3642 1.1 spz ldx [%i7+12*32],%o5 3643 1.1 spz movcs %xcc, %o4, %l4 3644 1.1 spz ldx [%i7+5*32],%o4 3645 1.1 spz movcs %xcc, %o5, %l5 3646 1.1 spz ldx [%i7+13*32],%o5 3647 1.1 spz movvs %xcc, %o4, %l4 3648 1.1 spz ldx [%i7+6*32], %o4 3649 1.1 spz movvs %xcc, %o5, %l5 3650 1.1 spz ldx [%i7+14*32],%o5 3651 1.1 spz move %xcc, %o4, %l4 3652 1.1 spz ldx [%i7+7*32], %o4 3653 1.1 spz move %xcc, %o5, %l5 3654 1.1 spz ldx [%i7+15*32],%o5 3655 1.1 spz movneg %xcc, %o4, %l4 3656 1.1 spz add %i7,16*32, %i7 3657 1.1 spz movneg %xcc, %o5, %l5 3658 1.1 spz ldx [%i7+0*32], %l6 3659 1.1 spz ldx [%i7+8*32], %l7 3660 1.1 spz ldx [%i7+1*32], %o4 3661 1.1 spz ldx [%i7+9*32], %o5 3662 1.1 spz movvs %icc, %o4, %l6 3663 1.1 spz ldx [%i7+2*32], %o4 3664 1.1 spz movvs %icc, %o5, %l7 3665 1.1 spz ldx [%i7+10*32],%o5 3666 1.1 spz move %icc, %o4, %l6 3667 1.1 spz ldx [%i7+3*32], %o4 3668 1.1 spz move %icc, %o5, %l7 3669 1.1 spz ldx [%i7+11*32],%o5 3670 1.1 spz movneg %icc, %o4, %l6 3671 1.1 spz ldx [%i7+4*32], %o4 3672 1.1 spz movneg %icc, %o5, %l7 3673 1.1 spz ldx [%i7+12*32],%o5 3674 1.1 spz movcs %xcc, %o4, %l6 3675 1.1 spz ldx [%i7+5*32],%o4 3676 1.1 spz movcs %xcc, %o5, %l7 3677 1.1 spz ldx [%i7+13*32],%o5 3678 1.1 spz movvs %xcc, %o4, %l6 3679 1.1 spz ldx [%i7+6*32], %o4 3680 1.1 spz movvs %xcc, %o5, %l7 3681 1.1 spz ldx [%i7+14*32],%o5 3682 1.1 spz move %xcc, %o4, %l6 3683 1.1 spz ldx [%i7+7*32], %o4 3684 1.1 spz move %xcc, %o5, %l7 3685 1.1 spz ldx [%i7+15*32],%o5 3686 1.1 spz movneg %xcc, %o4, %l6 3687 1.1 spz add %i7,16*32, %i7 3688 1.1 spz movneg %xcc, %o5, %l7 3689 1.1 spz ldx [%i7+0*32], %o0 3690 1.1 spz ldx [%i7+8*32], %o1 3691 1.1 spz ldx [%i7+1*32], %o4 3692 1.1 spz ldx [%i7+9*32], %o5 3693 1.1 spz movvs %icc, %o4, %o0 3694 1.1 spz ldx [%i7+2*32], %o4 3695 1.1 spz movvs %icc, %o5, %o1 3696 1.1 spz ldx [%i7+10*32],%o5 3697 1.1 spz move %icc, %o4, %o0 3698 1.1 spz ldx [%i7+3*32], %o4 3699 1.1 spz move %icc, %o5, %o1 3700 1.1 spz ldx [%i7+11*32],%o5 3701 1.1 spz movneg %icc, %o4, %o0 3702 1.1 spz ldx [%i7+4*32], %o4 3703 1.1 spz movneg %icc, %o5, %o1 3704 1.1 spz ldx [%i7+12*32],%o5 3705 1.1 spz movcs %xcc, %o4, %o0 3706 1.1 spz ldx [%i7+5*32],%o4 3707 1.1 spz movcs %xcc, %o5, %o1 3708 1.1 spz ldx [%i7+13*32],%o5 3709 1.1 spz movvs %xcc, %o4, %o0 3710 1.1 spz ldx [%i7+6*32], %o4 3711 1.1 spz movvs %xcc, %o5, %o1 3712 1.1 spz ldx [%i7+14*32],%o5 3713 1.1 spz move %xcc, %o4, %o0 3714 1.1 spz ldx [%i7+7*32], %o4 3715 1.1 spz move %xcc, %o5, %o1 3716 1.1 spz ldx [%i7+15*32],%o5 3717 1.1 spz movneg %xcc, %o4, %o0 3718 1.1 spz add %i7,16*32, %i7 3719 1.1 spz movneg %xcc, %o5, %o1 3720 1.1 spz ldx [%i7+0*32], %o2 3721 1.1 spz ldx [%i7+8*32], %o3 3722 1.1 spz ldx [%i7+1*32], %o4 3723 1.1 spz ldx [%i7+9*32], %o5 3724 1.1 spz movvs %icc, %o4, %o2 3725 1.1 spz ldx [%i7+2*32], %o4 3726 1.1 spz movvs %icc, %o5, %o3 3727 1.1 spz ldx [%i7+10*32],%o5 3728 1.1 spz move %icc, %o4, %o2 3729 1.1 spz ldx [%i7+3*32], %o4 3730 1.1 spz move %icc, %o5, %o3 3731 1.1 spz ldx [%i7+11*32],%o5 3732 1.1 spz movneg %icc, %o4, %o2 3733 1.1 spz ldx [%i7+4*32], %o4 3734 1.1 spz movneg %icc, %o5, %o3 3735 1.1 spz ldx [%i7+12*32],%o5 3736 1.1 spz movcs %xcc, %o4, %o2 3737 1.1 spz ldx [%i7+5*32],%o4 3738 1.1 spz movcs %xcc, %o5, %o3 3739 1.1 spz ldx [%i7+13*32],%o5 3740 1.1 spz movvs %xcc, %o4, %o2 3741 1.1 spz ldx [%i7+6*32], %o4 3742 1.1 spz movvs %xcc, %o5, %o3 3743 1.1 spz ldx [%i7+14*32],%o5 3744 1.1 spz move %xcc, %o4, %o2 3745 1.1 spz ldx [%i7+7*32], %o4 3746 1.1 spz move %xcc, %o5, %o3 3747 1.1 spz ldx [%i7+15*32],%o5 3748 1.1 spz movneg %xcc, %o4, %o2 3749 1.1 spz add %i7,16*32, %i7 3750 1.1 spz movneg %xcc, %o5, %o3 3751 1.1 spz srax %g4, 32, %o4 ! unpack %g4 3752 1.1 spz srl %g4, %g0, %o5 3753 1.1 spz sub %o4, 5, %o4 3754 1.1 spz mov %g3, %i7 3755 1.1 spz sllx %o4, 32, %g4 ! re-pack %g4 3756 1.1 spz or %o5, %g4, %g4 3757 1.1 spz srl %o5, %o4, %o5 3758 1.1 spz srl %o5, 2, %o4 3759 1.1 spz and %o5, 3, %o5 3760 1.1 spz and %o4, 7, %o4 3761 1.1 spz sll %o5, 3, %o5 ! offset within first cache line 3762 1.1 spz add %o5, %i7, %i7 ! of the pwrtbl 3763 1.1 spz or %g0, 1, %o5 3764 1.1 spz sll %o5, %o4, %o4 3765 1.1 spz .word 0x81b02940+32-1 ! montsqr 32-1 3766 1.1 spz fbu,pn %fcc3,.Labort_32 3767 1.1 spz #ifndef __arch64__ 3768 1.1 spz and %fp,%g5,%g5 3769 1.1 spz brz,pn %g5,.Labort_32 3770 1.1 spz #endif 3771 1.1 spz nop 3772 1.1 spz .word 0x81b02940+32-1 ! montsqr 32-1 3773 1.1 spz fbu,pn %fcc3,.Labort_32 3774 1.1 spz #ifndef __arch64__ 3775 1.1 spz and %fp,%g5,%g5 3776 1.1 spz brz,pn %g5,.Labort_32 3777 1.1 spz #endif 3778 1.1 spz nop 3779 1.1 spz .word 0x81b02940+32-1 ! montsqr 32-1 3780 1.1 spz fbu,pn %fcc3,.Labort_32 3781 1.1 spz #ifndef __arch64__ 3782 1.1 spz and %fp,%g5,%g5 3783 1.1 spz brz,pn %g5,.Labort_32 3784 1.1 spz #endif 3785 1.1 spz nop 3786 1.1 spz .word 0x81b02940+32-1 ! montsqr 32-1 3787 1.1 spz fbu,pn %fcc3,.Labort_32 3788 1.1 spz #ifndef __arch64__ 3789 1.1 spz and %fp,%g5,%g5 3790 1.1 spz brz,pn %g5,.Labort_32 3791 1.1 spz #endif 3792 1.1 spz nop 3793 1.1 spz .word 0x81b02940+32-1 ! montsqr 32-1 3794 1.1 spz fbu,pn %fcc3,.Labort_32 3795 1.1 spz #ifndef __arch64__ 3796 1.1 spz and %fp,%g5,%g5 3797 1.1 spz brz,pn %g5,.Labort_32 3798 1.1 spz #endif 3799 1.1 spz nop 3800 1.1 spz wr %o4, %g0, %ccr 3801 1.1 spz .word 0x81b02920+32-1 ! montmul 32-1 3802 1.1 spz fbu,pn %fcc3,.Labort_32 3803 1.1 spz #ifndef __arch64__ 3804 1.1 spz and %fp,%g5,%g5 3805 1.1 spz brz,pn %g5,.Labort_32 3806 1.1 spz #endif 3807 1.1 spz 3808 1.1 spz srax %g4, 32, %o4 3809 1.1 spz #ifdef __arch64__ 3810 1.1 spz brgez %o4,.Lstride_32 3811 1.1 spz restore 3812 1.1 spz restore 3813 1.1 spz restore 3814 1.1 spz restore 3815 1.1 spz restore 3816 1.1 spz #else 3817 1.1 spz brgez %o4,.Lstride_32 3818 1.1 spz restore; and %fp,%g5,%g5 3819 1.1 spz restore; and %fp,%g5,%g5 3820 1.1 spz restore; and %fp,%g5,%g5 3821 1.1 spz restore; and %fp,%g5,%g5 3822 1.1 spz brz,pn %g5,.Labort1_32 3823 1.1 spz restore 3824 1.1 spz #endif 3825 1.1 spz .word 0x81b02310 !movxtod %l0,%f0 3826 1.1 spz .word 0x85b02311 !movxtod %l1,%f2 3827 1.1 spz .word 0x89b02312 !movxtod %l2,%f4 3828 1.1 spz .word 0x8db02313 !movxtod %l3,%f6 3829 1.1 spz .word 0x91b02314 !movxtod %l4,%f8 3830 1.1 spz .word 0x95b02315 !movxtod %l5,%f10 3831 1.1 spz .word 0x99b02316 !movxtod %l6,%f12 3832 1.1 spz .word 0x9db02317 !movxtod %l7,%f14 3833 1.1 spz .word 0xa1b02308 !movxtod %o0,%f16 3834 1.1 spz .word 0xa5b02309 !movxtod %o1,%f18 3835 1.1 spz .word 0xa9b0230a !movxtod %o2,%f20 3836 1.1 spz .word 0xadb0230b !movxtod %o3,%f22 3837 1.1 spz .word 0xbbb0230c !movxtod %o4,%f60 3838 1.1 spz .word 0xbfb0230d !movxtod %o5,%f62 3839 1.1 spz #ifdef __arch64__ 3840 1.1 spz restore 3841 1.1 spz #else 3842 1.1 spz and %fp,%g5,%g5 3843 1.1 spz restore 3844 1.1 spz and %g5,1,%o7 3845 1.1 spz and %fp,%g5,%g5 3846 1.1 spz srl %fp,0,%fp ! just in case? 3847 1.1 spz or %o7,%g5,%g5 3848 1.1 spz brz,a,pn %g5,.Ldone_32 3849 1.1 spz mov 0,%i0 ! return failure 3850 1.1 spz #endif 3851 1.1 spz std %f0,[%g1+0*8] 3852 1.1 spz std %f2,[%g1+1*8] 3853 1.1 spz std %f4,[%g1+2*8] 3854 1.1 spz std %f6,[%g1+3*8] 3855 1.1 spz std %f8,[%g1+4*8] 3856 1.1 spz std %f10,[%g1+5*8] 3857 1.1 spz std %f12,[%g1+6*8] 3858 1.1 spz std %f14,[%g1+7*8] 3859 1.1 spz std %f16,[%g1+8*8] 3860 1.1 spz std %f18,[%g1+9*8] 3861 1.1 spz std %f20,[%g1+10*8] 3862 1.1 spz std %f22,[%g1+11*8] 3863 1.1 spz std %f60,[%g1+12*8] 3864 1.1 spz std %f62,[%g1+13*8] 3865 1.1 spz std %f24,[%g1+14*8] 3866 1.1 spz std %f26,[%g1+15*8] 3867 1.1 spz std %f28,[%g1+16*8] 3868 1.1 spz std %f30,[%g1+17*8] 3869 1.1 spz std %f32,[%g1+18*8] 3870 1.1 spz std %f34,[%g1+19*8] 3871 1.1 spz std %f36,[%g1+20*8] 3872 1.1 spz std %f38,[%g1+21*8] 3873 1.1 spz std %f40,[%g1+22*8] 3874 1.1 spz std %f42,[%g1+23*8] 3875 1.1 spz std %f44,[%g1+24*8] 3876 1.1 spz std %f46,[%g1+25*8] 3877 1.1 spz std %f48,[%g1+26*8] 3878 1.1 spz std %f50,[%g1+27*8] 3879 1.1 spz std %f52,[%g1+28*8] 3880 1.1 spz std %f54,[%g1+29*8] 3881 1.1 spz std %f56,[%g1+30*8] 3882 1.1 spz std %f58,[%g1+31*8] 3883 1.1 spz mov 1,%i0 ! return success 3884 1.1 spz .Ldone_32: 3885 1.1 spz ret 3886 1.1 spz restore 3887 1.1 spz 3888 1.1 spz .Labort_32: 3889 1.1 spz restore 3890 1.1 spz restore 3891 1.1 spz restore 3892 1.1 spz restore 3893 1.1 spz restore 3894 1.1 spz .Labort1_32: 3895 1.1 spz restore 3896 1.1 spz 3897 1.1 spz mov 0,%i0 ! return failure 3898 1.1 spz ret 3899 1.1 spz restore 3900 1.1 spz .type bn_pwr5_mont_t4_32, #function 3901 1.1 spz .size bn_pwr5_mont_t4_32, .-bn_pwr5_mont_t4_32 3902 1.1 spz .globl bn_mul_mont_t4 3903 1.1 spz .align 32 3904 1.1 spz bn_mul_mont_t4: 3905 1.1 spz add %sp, STACK_BIAS, %g4 ! real top of stack 3906 1.1 spz sll %o5, 3, %o5 ! size in bytes 3907 1.1 spz add %o5, 63, %g1 3908 1.1 spz andn %g1, 63, %g1 ! buffer size rounded up to 64 bytes 3909 1.1 spz sub %g4, %g1, %g1 3910 1.1 spz andn %g1, 63, %g1 ! align at 64 byte 3911 1.1 spz sub %g1, STACK_FRAME, %g1 ! new top of stack 3912 1.1 spz sub %g1, %g4, %g1 3913 1.1 spz 3914 1.1 spz save %sp, %g1, %sp 3915 1.1 spz ld [%i4+0], %l0 ! pull n0[0..1] value 3916 1.1 spz ld [%i4+4], %l1 3917 1.1 spz add %sp, STACK_BIAS+STACK_FRAME, %l5 3918 1.1 spz ldx [%i2+0], %g2 ! m0=bp[0] 3919 1.1 spz sllx %l1, 32, %g1 3920 1.1 spz add %i2, 8, %i2 3921 1.1 spz or %l0, %g1, %g1 3922 1.1 spz 3923 1.1 spz ldx [%i1+0], %o2 ! ap[0] 3925 1.1 spz 3926 1.1 spz mulx %o2, %g2, %g4 ! ap[0]*bp[0] 3927 1.1 spz .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 3928 1.1 spz 3929 1.1 spz ldx [%i1+8], %o2 ! ap[1] 3930 1.1 spz add %i1, 16, %i1 3931 1.1 spz ldx [%i3+0], %o4 ! np[0] 3932 1.1 spz 3933 1.1 spz mulx %g4, %g1, %g3 ! "tp[0]"*n0 3934 1.1 spz 3935 1.1 spz mulx %o2, %g2, %o3 ! ap[1]*bp[0] 3936 1.1 spz .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 3937 1.1 spz 3938 1.1 spz mulx %o4, %g3, %o0 ! np[0]*m1 3939 1.1 spz .word 0x93b302c3 !umulxhi %o4,%g3,%o1 3940 1.1 spz 3941 1.1 spz ldx [%i3+8], %o4 ! np[1] 3942 1.1 spz 3943 1.1 spz addcc %g4, %o0, %o0 3944 1.1 spz add %i3, 16, %i3 3945 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 3946 1.1 spz 3947 1.1 spz mulx %o4, %g3, %o5 ! np[1]*m1 3948 1.1 spz .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 3949 1.1 spz 3950 1.1 spz ba .L1st 3952 1.1 spz sub %i5, 24, %l4 ! cnt=num-3 3953 1.1 spz 3954 1.1 spz .align 16 3955 1.1 spz .L1st: 3956 1.1 spz addcc %o3, %g5, %g4 3957 1.1 spz .word 0x8bb28220 !addxc %o2,%g0,%g5 3958 1.1 spz 3959 1.1 spz ldx [%i1+0], %o2 ! ap[j] 3960 1.1 spz addcc %o5, %o1, %o0 3961 1.1 spz add %i1, 8, %i1 3962 1.1 spz .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 3963 1.1 spz 3964 1.1 spz ldx [%i3+0], %o4 ! np[j] 3965 1.1 spz mulx %o2, %g2, %o3 ! ap[j]*bp[0] 3966 1.1 spz add %i3, 8, %i3 3967 1.1 spz .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 3968 1.1 spz 3969 1.1 spz mulx %o4, %g3, %o5 ! np[j]*m1 3970 1.1 spz addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 3971 1.1 spz .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 3972 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 3973 1.1 spz stxa %o0, [%l5]0xe2 ! tp[j-1] 3974 1.1 spz add %l5, 8, %l5 ! tp++ 3975 1.1 spz 3976 1.1 spz brnz,pt %l4, .L1st 3977 1.1 spz sub %l4, 8, %l4 ! j-- 3978 1.1 spz !.L1st 3979 1.1 spz addcc %o3, %g5, %g4 3980 1.1 spz .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 3981 1.1 spz 3982 1.1 spz addcc %o5, %o1, %o0 3983 1.1 spz .word 0x93b30220 !addxc %o4,%g0,%o1 3984 1.1 spz addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 3985 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 3986 1.1 spz stxa %o0, [%l5]0xe2 ! tp[j-1] 3987 1.1 spz add %l5, 8, %l5 3988 1.1 spz 3989 1.1 spz addcc %g5, %o1, %o1 3990 1.1 spz .word 0xa1b00220 !addxc %g0,%g0,%l0 ! upmost overflow bit 3991 1.1 spz stxa %o1, [%l5]0xe2 3992 1.1 spz add %l5, 8, %l5 3993 1.1 spz 3994 1.1 spz ba .Louter 3996 1.1 spz sub %i5, 16, %l1 ! i=num-2 3997 1.1 spz 3998 1.1 spz .align 16 3999 1.1 spz .Louter: 4000 1.1 spz ldx [%i2+0], %g2 ! m0=bp[i] 4001 1.1 spz add %i2, 8, %i2 4002 1.1 spz 4003 1.1 spz sub %i1, %i5, %i1 ! rewind 4004 1.1 spz sub %i3, %i5, %i3 4005 1.1 spz sub %l5, %i5, %l5 4006 1.1 spz 4007 1.1 spz ldx [%i1+0], %o2 ! ap[0] 4008 1.1 spz ldx [%i3+0], %o4 ! np[0] 4009 1.1 spz 4010 1.1 spz mulx %o2, %g2, %g4 ! ap[0]*bp[i] 4011 1.1 spz ldx [%l5], %o7 ! tp[0] 4012 1.1 spz .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 4013 1.1 spz ldx [%i1+8], %o2 ! ap[1] 4014 1.1 spz addcc %g4, %o7, %g4 ! ap[0]*bp[i]+tp[0] 4015 1.1 spz mulx %o2, %g2, %o3 ! ap[1]*bp[i] 4016 1.1 spz .word 0x8bb00225 !addxc %g0,%g5,%g5 4017 1.1 spz mulx %g4, %g1, %g3 ! tp[0]*n0 4018 1.1 spz .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4019 1.1 spz mulx %o4, %g3, %o0 ! np[0]*m1 4020 1.1 spz add %i1, 16, %i1 4021 1.1 spz .word 0x93b302c3 !umulxhi %o4,%g3,%o1 4022 1.1 spz ldx [%i3+8], %o4 ! np[1] 4023 1.1 spz add %i3, 16, %i3 4024 1.1 spz addcc %o0, %g4, %o0 4025 1.1 spz mulx %o4, %g3, %o5 ! np[1]*m1 4026 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 4027 1.1 spz .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4028 1.1 spz 4029 1.1 spz ba .Linner 4031 1.1 spz sub %i5, 24, %l4 ! cnt=num-3 4032 1.1 spz .align 16 4033 1.1 spz .Linner: 4034 1.1 spz addcc %o3, %g5, %g4 4035 1.1 spz ldx [%l5+8], %o7 ! tp[j] 4036 1.1 spz .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4037 1.1 spz ldx [%i1+0], %o2 ! ap[j] 4038 1.1 spz add %i1, 8, %i1 4039 1.1 spz addcc %o5, %o1, %o0 4040 1.1 spz mulx %o2, %g2, %o3 ! ap[j]*bp[i] 4041 1.1 spz .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4042 1.1 spz ldx [%i3+0], %o4 ! np[j] 4043 1.1 spz add %i3, 8, %i3 4044 1.1 spz .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4045 1.1 spz addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4046 1.1 spz mulx %o4, %g3, %o5 ! np[j]*m1 4047 1.1 spz .word 0x8bb00225 !addxc %g0,%g5,%g5 4048 1.1 spz .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4049 1.1 spz addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4050 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 4051 1.1 spz stx %o0, [%l5] ! tp[j-1] 4052 1.1 spz add %l5, 8, %l5 4053 1.1 spz brnz,pt %l4, .Linner 4054 1.1 spz sub %l4, 8, %l4 4055 1.1 spz !.Linner 4056 1.1 spz ldx [%l5+8], %o7 ! tp[j] 4057 1.1 spz addcc %o3, %g5, %g4 4058 1.1 spz .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4059 1.1 spz addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4060 1.1 spz .word 0x8bb00225 !addxc %g0,%g5,%g5 4061 1.1 spz 4062 1.1 spz addcc %o5, %o1, %o0 4063 1.1 spz .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4064 1.1 spz addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4065 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 4066 1.1 spz stx %o0, [%l5] ! tp[j-1] 4067 1.1 spz 4068 1.1 spz subcc %g0, %l0, %g0 ! move upmost overflow to CCR.xcc 4069 1.1 spz .word 0x93b24265 !addxccc %o1,%g5,%o1 4070 1.1 spz .word 0xa1b00220 !addxc %g0,%g0,%l0 4071 1.1 spz stx %o1, [%l5+8] 4072 1.1 spz add %l5, 16, %l5 4073 1.1 spz 4074 1.1 spz brnz,pt %l1, .Louter 4075 1.1 spz sub %l1, 8, %l1 4076 1.1 spz 4077 1.1 spz sub %i1, %i5, %i1 ! rewind 4079 1.1 spz sub %i3, %i5, %i3 4080 1.1 spz sub %l5, %i5, %l5 4081 1.1 spz ba .Lsub 4082 1.1 spz subcc %i5, 8, %l4 ! cnt=num-1 and clear CCR.xcc 4083 1.1 spz 4084 1.1 spz .align 16 4085 1.1 spz .Lsub: 4086 1.1 spz ldx [%l5], %o7 4087 1.1 spz add %l5, 8, %l5 4088 1.1 spz ldx [%i3+0], %o4 4089 1.1 spz add %i3, 8, %i3 4090 1.1 spz subccc %o7, %o4, %l2 ! tp[j]-np[j] 4091 1.1 spz srlx %o7, 32, %o7 4092 1.1 spz srlx %o4, 32, %o4 4093 1.1 spz subccc %o7, %o4, %l3 4094 1.1 spz add %i0, 8, %i0 4095 1.1 spz st %l2, [%i0-4] ! reverse order 4096 1.1 spz st %l3, [%i0-8] 4097 1.1 spz brnz,pt %l4, .Lsub 4098 1.1 spz sub %l4, 8, %l4 4099 1.2 christos 4100 1.1 spz sub %i3, %i5, %i3 ! rewind 4101 1.1 spz sub %l5, %i5, %l5 4102 1.1 spz sub %i0, %i5, %i0 4103 1.1 spz 4104 1.2 christos subccc %l0, %g0, %l0 ! handle upmost overflow bit 4105 1.2 christos ba .Lcopy 4106 1.2 christos sub %i5, 8, %l4 4107 1.1 spz 4108 1.1 spz .align 16 4109 1.2 christos .Lcopy: ! conditional copy 4110 1.1 spz ldx [%l5], %o7 4111 1.1 spz ldx [%i0+0], %l2 4112 1.1 spz stx %g0, [%l5] ! zap 4113 1.1 spz add %l5, 8, %l5 4114 1.1 spz movcs %icc, %o7, %l2 4115 1.1 spz stx %l2, [%i0+0] 4116 1.1 spz add %i0, 8, %i0 4117 1.1 spz brnz %l4, .Lcopy 4118 1.1 spz sub %l4, 8, %l4 4119 1.1 spz 4120 1.1 spz mov 1, %o0 4121 1.1 spz ret 4122 1.1 spz restore 4123 1.1 spz .type bn_mul_mont_t4, #function 4124 1.1 spz .size bn_mul_mont_t4, .-bn_mul_mont_t4 4125 1.1 spz .globl bn_mul_mont_gather5_t4 4126 1.1 spz .align 32 4127 1.1 spz bn_mul_mont_gather5_t4: 4128 1.1 spz add %sp, STACK_BIAS, %g4 ! real top of stack 4129 1.1 spz sll %o5, 3, %o5 ! size in bytes 4130 1.1 spz add %o5, 63, %g1 4131 1.1 spz andn %g1, 63, %g1 ! buffer size rounded up to 64 bytes 4132 1.1 spz sub %g4, %g1, %g1 4133 1.1 spz andn %g1, 63, %g1 ! align at 64 byte 4134 1.1 spz sub %g1, STACK_FRAME, %g1 ! new top of stack 4135 1.1 spz sub %g1, %g4, %g1 4136 1.1 spz LDPTR [%sp+STACK_7thARG], %g4 ! load power, 7th argument 4137 1.1 spz 4138 1.1 spz save %sp, %g1, %sp 4139 1.1 spz srl %g4, 2, %o4 4140 1.1 spz and %g4, 3, %o5 4141 1.1 spz and %o4, 7, %o4 4142 1.1 spz sll %o5, 3, %o5 ! offset within first cache line 4143 1.1 spz add %o5, %i2, %i2 ! of the pwrtbl 4144 1.1 spz or %g0, 1, %o5 4145 1.1 spz sll %o5, %o4, %l7 4146 1.1 spz wr %l7, %g0, %ccr 4147 1.1 spz ldx [%i2+0*32], %g2 4148 1.1 spz ldx [%i2+1*32], %o4 4149 1.1 spz ldx [%i2+2*32], %o5 4150 1.1 spz movvs %icc, %o4, %g2 4151 1.1 spz ldx [%i2+3*32], %o4 4152 1.1 spz move %icc, %o5, %g2 4153 1.1 spz ldx [%i2+4*32], %o5 4154 1.1 spz movneg %icc, %o4, %g2 4155 1.1 spz ldx [%i2+5*32], %o4 4156 1.1 spz movcs %xcc, %o5, %g2 4157 1.1 spz ldx [%i2+6*32], %o5 4158 1.1 spz movvs %xcc, %o4, %g2 4159 1.1 spz ldx [%i2+7*32], %o4 4160 1.1 spz move %xcc, %o5, %g2 4161 1.1 spz add %i2,8*32, %i2 4162 1.1 spz movneg %xcc, %o4, %g2 4163 1.1 spz ld [%i4+0], %l0 ! pull n0[0..1] value 4164 1.1 spz ld [%i4+4], %l1 4165 1.1 spz add %sp, STACK_BIAS+STACK_FRAME, %l5 4166 1.1 spz sllx %l1, 32, %g1 4167 1.1 spz or %l0, %g1, %g1 4168 1.1 spz 4169 1.1 spz ldx [%i1+0], %o2 ! ap[0] 4171 1.1 spz 4172 1.1 spz mulx %o2, %g2, %g4 ! ap[0]*bp[0] 4173 1.1 spz .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 4174 1.1 spz 4175 1.1 spz ldx [%i1+8], %o2 ! ap[1] 4176 1.1 spz add %i1, 16, %i1 4177 1.1 spz ldx [%i3+0], %o4 ! np[0] 4178 1.1 spz 4179 1.1 spz mulx %g4, %g1, %g3 ! "tp[0]"*n0 4180 1.1 spz 4181 1.1 spz mulx %o2, %g2, %o3 ! ap[1]*bp[0] 4182 1.1 spz .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4183 1.1 spz 4184 1.1 spz mulx %o4, %g3, %o0 ! np[0]*m1 4185 1.1 spz .word 0x93b302c3 !umulxhi %o4,%g3,%o1 4186 1.1 spz 4187 1.1 spz ldx [%i3+8], %o4 ! np[1] 4188 1.1 spz 4189 1.1 spz addcc %g4, %o0, %o0 4190 1.1 spz add %i3, 16, %i3 4191 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 4192 1.1 spz 4193 1.1 spz mulx %o4, %g3, %o5 ! np[1]*m1 4194 1.1 spz .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4195 1.1 spz 4196 1.1 spz ba .L1st_g5 4198 1.1 spz sub %i5, 24, %l4 ! cnt=num-3 4199 1.1 spz 4200 1.1 spz .align 16 4201 1.1 spz .L1st_g5: 4202 1.1 spz addcc %o3, %g5, %g4 4203 1.1 spz .word 0x8bb28220 !addxc %o2,%g0,%g5 4204 1.1 spz 4205 1.1 spz ldx [%i1+0], %o2 ! ap[j] 4206 1.1 spz addcc %o5, %o1, %o0 4207 1.1 spz add %i1, 8, %i1 4208 1.1 spz .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4209 1.1 spz 4210 1.1 spz ldx [%i3+0], %o4 ! np[j] 4211 1.1 spz mulx %o2, %g2, %o3 ! ap[j]*bp[0] 4212 1.1 spz add %i3, 8, %i3 4213 1.1 spz .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4214 1.1 spz 4215 1.1 spz mulx %o4, %g3, %o5 ! np[j]*m1 4216 1.1 spz addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 4217 1.1 spz .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4218 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 4219 1.1 spz stxa %o0, [%l5]0xe2 ! tp[j-1] 4220 1.1 spz add %l5, 8, %l5 ! tp++ 4221 1.1 spz 4222 1.1 spz brnz,pt %l4, .L1st_g5 4223 1.1 spz sub %l4, 8, %l4 ! j-- 4224 1.1 spz !.L1st_g5 4225 1.1 spz addcc %o3, %g5, %g4 4226 1.1 spz .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4227 1.1 spz 4228 1.1 spz addcc %o5, %o1, %o0 4229 1.1 spz .word 0x93b30220 !addxc %o4,%g0,%o1 4230 1.1 spz addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 4231 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 4232 1.1 spz stxa %o0, [%l5]0xe2 ! tp[j-1] 4233 1.1 spz add %l5, 8, %l5 4234 1.1 spz 4235 1.1 spz addcc %g5, %o1, %o1 4236 1.1 spz .word 0xa1b00220 !addxc %g0,%g0,%l0 ! upmost overflow bit 4237 1.1 spz stxa %o1, [%l5]0xe2 4238 1.1 spz add %l5, 8, %l5 4239 1.1 spz 4240 1.1 spz ba .Louter_g5 4242 1.1 spz sub %i5, 16, %l1 ! i=num-2 4243 1.1 spz 4244 1.1 spz .align 16 4245 1.1 spz .Louter_g5: 4246 1.1 spz wr %l7, %g0, %ccr 4247 1.1 spz ldx [%i2+0*32], %g2 4248 1.1 spz ldx [%i2+1*32], %o4 4249 1.1 spz ldx [%i2+2*32], %o5 4250 1.1 spz movvs %icc, %o4, %g2 4251 1.1 spz ldx [%i2+3*32], %o4 4252 1.1 spz move %icc, %o5, %g2 4253 1.1 spz ldx [%i2+4*32], %o5 4254 1.1 spz movneg %icc, %o4, %g2 4255 1.1 spz ldx [%i2+5*32], %o4 4256 1.1 spz movcs %xcc, %o5, %g2 4257 1.1 spz ldx [%i2+6*32], %o5 4258 1.1 spz movvs %xcc, %o4, %g2 4259 1.1 spz ldx [%i2+7*32], %o4 4260 1.1 spz move %xcc, %o5, %g2 4261 1.1 spz add %i2,8*32, %i2 4262 1.1 spz movneg %xcc, %o4, %g2 4263 1.1 spz sub %i1, %i5, %i1 ! rewind 4264 1.1 spz sub %i3, %i5, %i3 4265 1.1 spz sub %l5, %i5, %l5 4266 1.1 spz 4267 1.1 spz ldx [%i1+0], %o2 ! ap[0] 4268 1.1 spz ldx [%i3+0], %o4 ! np[0] 4269 1.1 spz 4270 1.1 spz mulx %o2, %g2, %g4 ! ap[0]*bp[i] 4271 1.1 spz ldx [%l5], %o7 ! tp[0] 4272 1.1 spz .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 4273 1.1 spz ldx [%i1+8], %o2 ! ap[1] 4274 1.1 spz addcc %g4, %o7, %g4 ! ap[0]*bp[i]+tp[0] 4275 1.1 spz mulx %o2, %g2, %o3 ! ap[1]*bp[i] 4276 1.1 spz .word 0x8bb00225 !addxc %g0,%g5,%g5 4277 1.1 spz mulx %g4, %g1, %g3 ! tp[0]*n0 4278 1.1 spz .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4279 1.1 spz mulx %o4, %g3, %o0 ! np[0]*m1 4280 1.1 spz add %i1, 16, %i1 4281 1.1 spz .word 0x93b302c3 !umulxhi %o4,%g3,%o1 4282 1.1 spz ldx [%i3+8], %o4 ! np[1] 4283 1.1 spz add %i3, 16, %i3 4284 1.1 spz addcc %o0, %g4, %o0 4285 1.1 spz mulx %o4, %g3, %o5 ! np[1]*m1 4286 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 4287 1.1 spz .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4288 1.1 spz 4289 1.1 spz ba .Linner_g5 4291 1.1 spz sub %i5, 24, %l4 ! cnt=num-3 4292 1.1 spz .align 16 4293 1.1 spz .Linner_g5: 4294 1.1 spz addcc %o3, %g5, %g4 4295 1.1 spz ldx [%l5+8], %o7 ! tp[j] 4296 1.1 spz .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4297 1.1 spz ldx [%i1+0], %o2 ! ap[j] 4298 1.1 spz add %i1, 8, %i1 4299 1.1 spz addcc %o5, %o1, %o0 4300 1.1 spz mulx %o2, %g2, %o3 ! ap[j]*bp[i] 4301 1.1 spz .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4302 1.1 spz ldx [%i3+0], %o4 ! np[j] 4303 1.1 spz add %i3, 8, %i3 4304 1.1 spz .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4305 1.1 spz addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4306 1.1 spz mulx %o4, %g3, %o5 ! np[j]*m1 4307 1.1 spz .word 0x8bb00225 !addxc %g0,%g5,%g5 4308 1.1 spz .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4309 1.1 spz addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4310 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 4311 1.1 spz stx %o0, [%l5] ! tp[j-1] 4312 1.1 spz add %l5, 8, %l5 4313 1.1 spz brnz,pt %l4, .Linner_g5 4314 1.1 spz sub %l4, 8, %l4 4315 1.1 spz !.Linner_g5 4316 1.1 spz ldx [%l5+8], %o7 ! tp[j] 4317 1.1 spz addcc %o3, %g5, %g4 4318 1.1 spz .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4319 1.1 spz addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4320 1.1 spz .word 0x8bb00225 !addxc %g0,%g5,%g5 4321 1.1 spz 4322 1.1 spz addcc %o5, %o1, %o0 4323 1.1 spz .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4324 1.1 spz addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4325 1.1 spz .word 0x93b00229 !addxc %g0,%o1,%o1 4326 1.1 spz stx %o0, [%l5] ! tp[j-1] 4327 1.1 spz 4328 1.1 spz subcc %g0, %l0, %g0 ! move upmost overflow to CCR.xcc 4329 1.1 spz .word 0x93b24265 !addxccc %o1,%g5,%o1 4330 1.1 spz .word 0xa1b00220 !addxc %g0,%g0,%l0 4331 1.1 spz stx %o1, [%l5+8] 4332 1.1 spz add %l5, 16, %l5 4333 1.1 spz 4334 1.1 spz brnz,pt %l1, .Louter_g5 4335 1.1 spz sub %l1, 8, %l1 4336 1.1 spz 4337 1.1 spz sub %i1, %i5, %i1 ! rewind 4339 1.1 spz sub %i3, %i5, %i3 4340 1.1 spz sub %l5, %i5, %l5 4341 1.1 spz ba .Lsub_g5 4342 1.1 spz subcc %i5, 8, %l4 ! cnt=num-1 and clear CCR.xcc 4343 1.1 spz 4344 1.1 spz .align 16 4345 1.1 spz .Lsub_g5: 4346 1.1 spz ldx [%l5], %o7 4347 1.1 spz add %l5, 8, %l5 4348 1.1 spz ldx [%i3+0], %o4 4349 1.1 spz add %i3, 8, %i3 4350 1.1 spz subccc %o7, %o4, %l2 ! tp[j]-np[j] 4351 1.1 spz srlx %o7, 32, %o7 4352 1.1 spz srlx %o4, 32, %o4 4353 1.1 spz subccc %o7, %o4, %l3 4354 1.2 christos add %i0, 8, %i0 4355 1.1 spz st %l2, [%i0-4] ! reverse order 4356 1.1 spz st %l3, [%i0-8] 4357 1.1 spz brnz,pt %l4, .Lsub_g5 4358 1.1 spz sub %l4, 8, %l4 4359 1.2 christos 4360 1.2 christos sub %i3, %i5, %i3 ! rewind 4361 1.2 christos sub %l5, %i5, %l5 4362 1.1 spz sub %i0, %i5, %i0 4363 1.1 spz 4364 1.2 christos subccc %l0, %g0, %l0 ! handle upmost overflow bit 4365 1.1 spz ba .Lcopy_g5 4366 1.1 spz sub %i5, 8, %l4 4367 1.1 spz 4368 1.1 spz .align 16 4369 1.1 spz .Lcopy_g5: ! conditional copy 4370 1.1 spz ldx [%l5], %o7 4371 1.1 spz ldx [%i0+0], %l2 4372 1.1 spz stx %g0, [%l5] ! zap 4373 1.1 spz add %l5, 8, %l5 4374 1.1 spz movcs %icc, %o7, %l2 4375 1.1 spz stx %l2, [%i0+0] 4376 1.1 spz add %i0, 8, %i0 4377 1.1 spz brnz %l4, .Lcopy_g5 4378 1.1 spz sub %l4, 8, %l4 4379 1.1 spz 4380 1.1 spz mov 1, %o0 4381 1.1 spz ret 4382 1.1 spz restore 4383 1.1 spz .type bn_mul_mont_gather5_t4, #function 4384 1.1 spz .size bn_mul_mont_gather5_t4, .-bn_mul_mont_gather5_t4 4385 1.1 spz .globl bn_flip_t4 4386 1.1 spz .align 32 4387 1.1 spz bn_flip_t4: 4388 1.1 spz .Loop_flip: 4389 1.1 spz ld [%o1+0], %o4 4390 1.1 spz sub %o2, 1, %o2 4391 1.1 spz ld [%o1+4], %o5 4392 1.1 spz add %o1, 8, %o1 4393 1.1 spz st %o5, [%o0+0] 4394 1.1 spz st %o4, [%o0+4] 4395 1.1 spz brnz %o2, .Loop_flip 4396 1.1 spz add %o0, 8, %o0 4397 1.1 spz retl 4398 1.1 spz nop 4399 1.1 spz .type bn_flip_t4, #function 4400 1.1 spz .size bn_flip_t4, .-bn_flip_t4 4401 1.1 spz 4402 1.1 spz .globl bn_flip_n_scatter5_t4 4403 1.1 spz .align 32 4404 1.1 spz bn_flip_n_scatter5_t4: 4405 1.1 spz sll %o3, 3, %o3 4406 1.1 spz srl %o1, 1, %o1 4407 1.1 spz add %o3, %o2, %o2 ! &pwrtbl[pwr] 4408 1.1 spz sub %o1, 1, %o1 4409 1.1 spz .Loop_flip_n_scatter5: 4410 1.1 spz ld [%o0+0], %o4 ! inp[i] 4411 1.1 spz ld [%o0+4], %o5 4412 1.1 spz add %o0, 8, %o0 4413 1.1 spz sllx %o5, 32, %o5 4414 1.1 spz or %o4, %o5, %o5 4415 1.1 spz stx %o5, [%o2] 4416 1.1 spz add %o2, 32*8, %o2 4417 1.1 spz brnz %o1, .Loop_flip_n_scatter5 4418 1.1 spz sub %o1, 1, %o1 4419 1.1 spz retl 4420 1.1 spz nop 4421 1.1 spz .type bn_flip_n_scatter5_t4, #function 4422 1.1 spz .size bn_flip_n_scatter5_t4, .-bn_flip_n_scatter5_t4 4423 1.1 spz 4424 1.1 spz .globl bn_gather5_t4 4425 1.1 spz .align 32 4426 1.1 spz bn_gather5_t4: 4427 1.1 spz srl %o3, 2, %o4 4428 1.1 spz and %o3, 3, %o5 4429 1.1 spz and %o4, 7, %o4 4430 1.1 spz sll %o5, 3, %o5 ! offset within first cache line 4431 1.1 spz add %o5, %o2, %o2 ! of the pwrtbl 4432 1.1 spz or %g0, 1, %o5 4433 1.1 spz sll %o5, %o4, %g1 4434 1.1 spz wr %g1, %g0, %ccr 4435 1.1 spz sub %o1, 1, %o1 4436 1.1 spz .Loop_gather5: 4437 1.1 spz ldx [%o2+0*32], %g1 4438 1.1 spz ldx [%o2+1*32], %o4 4439 1.1 spz ldx [%o2+2*32], %o5 4440 1.1 spz movvs %icc, %o4, %g1 4441 1.1 spz ldx [%o2+3*32], %o4 4442 1.1 spz move %icc, %o5, %g1 4443 1.1 spz ldx [%o2+4*32], %o5 4444 1.1 spz movneg %icc, %o4, %g1 4445 1.1 spz ldx [%o2+5*32], %o4 4446 1.1 spz movcs %xcc, %o5, %g1 4447 1.1 spz ldx [%o2+6*32], %o5 4448 1.1 spz movvs %xcc, %o4, %g1 4449 1.1 spz ldx [%o2+7*32], %o4 4450 1.1 spz move %xcc, %o5, %g1 4451 1.1 spz add %o2,8*32, %o2 4452 1.1 spz movneg %xcc, %o4, %g1 4453 1.1 spz stx %g1, [%o0] 4454 1.1 spz add %o0, 8, %o0 4455 brnz %o1, .Loop_gather5 4456 sub %o1, 1, %o1 4457 4458 retl 4459 nop 4460 .type bn_gather5_t4, #function 4461 .size bn_gather5_t4, .-bn_gather5_t4 4462 4463 .asciz "Montgomery Multiplication for SPARC T4, David S. Miller, Andy Polyakov" 4464 .align 4 4465