Home | History | Annotate | Line # | Download | only in mips
      1 #include "mips_arch.h"
      2 
      3 #if defined(_MIPS_ARCH_MIPS64R6)
      4 # define ddivu(rs,rt)
      5 # define mfqt(rd,rs,rt)	ddivu	rd,rs,rt
      6 # define mfrm(rd,rs,rt)	dmodu	rd,rs,rt
      7 #elif defined(_MIPS_ARCH_MIPS32R6)
      8 # define divu(rs,rt)
      9 # define mfqt(rd,rs,rt)	divu	rd,rs,rt
     10 # define mfrm(rd,rs,rt)	modu	rd,rs,rt
     11 #else
     12 # define ddivu(rs,rt)	ddivu	$0,rs,rt
     13 # define mfqt(rd,rs,rt)	mflo	rd
     14 # define mfrm(rd,rs,rt)	mfhi	rd
     15 #endif
     16 
     17 .rdata
     18 .asciiz	"mips3.s, Version 1.2"
     19 .asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro (at) fy.chalmers.se>"
     20 
     21 .text
     22 .set	noat
     23 
     24 .align	5
     25 .globl	bn_mul_add_words
     26 .ent	bn_mul_add_words
     27 bn_mul_add_words:
     28 	.set	noreorder
     29 	bgtz	$6,bn_mul_add_words_internal
     30 	move	$2,$0
     31 	jr	$31
     32 	move	$4,$2
     33 .end	bn_mul_add_words
     34 
     35 .align	5
     36 .ent	bn_mul_add_words_internal
     37 bn_mul_add_words_internal:
     38 	.set	reorder
     39 	li	$3,-4
     40 	and	$8,$6,$3
     41 	beqz	$8,.L_bn_mul_add_words_tail
     42 
     43 .L_bn_mul_add_words_loop:
     44 	ld	$12,0($5)
     45 	dmultu	($12,$7)
     46 	ld	$13,0($4)
     47 	ld	$14,8($5)
     48 	ld	$15,8($4)
     49 	ld	$8,2*8($5)
     50 	ld	$9,2*8($4)
     51 	daddu	$13,$2
     52 	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
     53 				# values", but it seems to work fine
     54 				# even on 64-bit registers.
     55 	mflo	($1,$12,$7)
     56 	mfhi	($12,$12,$7)
     57 	daddu	$13,$1
     58 	daddu	$2,$12
     59 	 dmultu	($14,$7)
     60 	sltu	$1,$13,$1
     61 	sd	$13,0($4)
     62 	daddu	$2,$1
     63 
     64 	ld	$10,3*8($5)
     65 	ld	$11,3*8($4)
     66 	daddu	$15,$2
     67 	sltu	$2,$15,$2
     68 	mflo	($1,$14,$7)
     69 	mfhi	($14,$14,$7)
     70 	daddu	$15,$1
     71 	daddu	$2,$14
     72 	 dmultu	($8,$7)
     73 	sltu	$1,$15,$1
     74 	sd	$15,8($4)
     75 	daddu	$2,$1
     76 
     77 	subu	$6,4
     78 	daddu $4,4*8
     79 	daddu $5,4*8
     80 	daddu	$9,$2
     81 	sltu	$2,$9,$2
     82 	mflo	($1,$8,$7)
     83 	mfhi	($8,$8,$7)
     84 	daddu	$9,$1
     85 	daddu	$2,$8
     86 	 dmultu	($10,$7)
     87 	sltu	$1,$9,$1
     88 	sd	$9,-2*8($4)
     89 	daddu	$2,$1
     90 
     91 
     92 	and	$8,$6,$3
     93 	daddu	$11,$2
     94 	sltu	$2,$11,$2
     95 	mflo	($1,$10,$7)
     96 	mfhi	($10,$10,$7)
     97 	daddu	$11,$1
     98 	daddu	$2,$10
     99 	sltu	$1,$11,$1
    100 	sd	$11,-8($4)
    101 	.set	noreorder
    102 	bgtz	$8,.L_bn_mul_add_words_loop
    103 	daddu	$2,$1
    104 
    105 	beqz	$6,.L_bn_mul_add_words_return
    106 	nop
    107 
    108 .L_bn_mul_add_words_tail:
    109 	.set	reorder
    110 	ld	$12,0($5)
    111 	dmultu	($12,$7)
    112 	ld	$13,0($4)
    113 	subu	$6,1
    114 	daddu	$13,$2
    115 	sltu	$2,$13,$2
    116 	mflo	($1,$12,$7)
    117 	mfhi	($12,$12,$7)
    118 	daddu	$13,$1
    119 	daddu	$2,$12
    120 	sltu	$1,$13,$1
    121 	sd	$13,0($4)
    122 	daddu	$2,$1
    123 	beqz	$6,.L_bn_mul_add_words_return
    124 
    125 	ld	$12,8($5)
    126 	dmultu	($12,$7)
    127 	ld	$13,8($4)
    128 	subu	$6,1
    129 	daddu	$13,$2
    130 	sltu	$2,$13,$2
    131 	mflo	($1,$12,$7)
    132 	mfhi	($12,$12,$7)
    133 	daddu	$13,$1
    134 	daddu	$2,$12
    135 	sltu	$1,$13,$1
    136 	sd	$13,8($4)
    137 	daddu	$2,$1
    138 	beqz	$6,.L_bn_mul_add_words_return
    139 
    140 	ld	$12,2*8($5)
    141 	dmultu	($12,$7)
    142 	ld	$13,2*8($4)
    143 	daddu	$13,$2
    144 	sltu	$2,$13,$2
    145 	mflo	($1,$12,$7)
    146 	mfhi	($12,$12,$7)
    147 	daddu	$13,$1
    148 	daddu	$2,$12
    149 	sltu	$1,$13,$1
    150 	sd	$13,2*8($4)
    151 	daddu	$2,$1
    152 
    153 .L_bn_mul_add_words_return:
    154 	.set	noreorder
    155 	jr	$31
    156 	move	$4,$2
    157 .end	bn_mul_add_words_internal
    158 
    159 .align	5
    160 .globl	bn_mul_words
    161 .ent	bn_mul_words
    162 bn_mul_words:
    163 	.set	noreorder
    164 	bgtz	$6,bn_mul_words_internal
    165 	move	$2,$0
    166 	jr	$31
    167 	move	$4,$2
    168 .end	bn_mul_words
    169 
    170 .align	5
    171 .ent	bn_mul_words_internal
    172 bn_mul_words_internal:
    173 	.set	reorder
    174 	li	$3,-4
    175 	and	$8,$6,$3
    176 	beqz	$8,.L_bn_mul_words_tail
    177 
    178 .L_bn_mul_words_loop:
    179 	ld	$12,0($5)
    180 	dmultu	($12,$7)
    181 	ld	$14,8($5)
    182 	ld	$8,2*8($5)
    183 	ld	$10,3*8($5)
    184 	mflo	($1,$12,$7)
    185 	mfhi	($12,$12,$7)
    186 	daddu	$2,$1
    187 	sltu	$13,$2,$1
    188 	 dmultu	($14,$7)
    189 	sd	$2,0($4)
    190 	daddu	$2,$13,$12
    191 
    192 	subu	$6,4
    193 	daddu $4,4*8
    194 	daddu $5,4*8
    195 	mflo	($1,$14,$7)
    196 	mfhi	($14,$14,$7)
    197 	daddu	$2,$1
    198 	sltu	$15,$2,$1
    199 	 dmultu	($8,$7)
    200 	sd	$2,-3*8($4)
    201 	daddu	$2,$15,$14
    202 
    203 	mflo	($1,$8,$7)
    204 	mfhi	($8,$8,$7)
    205 	daddu	$2,$1
    206 	sltu	$9,$2,$1
    207 	 dmultu	($10,$7)
    208 	sd	$2,-2*8($4)
    209 	daddu	$2,$9,$8
    210 
    211 	and	$8,$6,$3
    212 	mflo	($1,$10,$7)
    213 	mfhi	($10,$10,$7)
    214 	daddu	$2,$1
    215 	sltu	$11,$2,$1
    216 	sd	$2,-8($4)
    217 	.set	noreorder
    218 	bgtz	$8,.L_bn_mul_words_loop
    219 	daddu	$2,$11,$10
    220 
    221 	beqz	$6,.L_bn_mul_words_return
    222 	nop
    223 
    224 .L_bn_mul_words_tail:
    225 	.set	reorder
    226 	ld	$12,0($5)
    227 	dmultu	($12,$7)
    228 	subu	$6,1
    229 	mflo	($1,$12,$7)
    230 	mfhi	($12,$12,$7)
    231 	daddu	$2,$1
    232 	sltu	$13,$2,$1
    233 	sd	$2,0($4)
    234 	daddu	$2,$13,$12
    235 	beqz	$6,.L_bn_mul_words_return
    236 
    237 	ld	$12,8($5)
    238 	dmultu	($12,$7)
    239 	subu	$6,1
    240 	mflo	($1,$12,$7)
    241 	mfhi	($12,$12,$7)
    242 	daddu	$2,$1
    243 	sltu	$13,$2,$1
    244 	sd	$2,8($4)
    245 	daddu	$2,$13,$12
    246 	beqz	$6,.L_bn_mul_words_return
    247 
    248 	ld	$12,2*8($5)
    249 	dmultu	($12,$7)
    250 	mflo	($1,$12,$7)
    251 	mfhi	($12,$12,$7)
    252 	daddu	$2,$1
    253 	sltu	$13,$2,$1
    254 	sd	$2,2*8($4)
    255 	daddu	$2,$13,$12
    256 
    257 .L_bn_mul_words_return:
    258 	.set	noreorder
    259 	jr	$31
    260 	move	$4,$2
    261 .end	bn_mul_words_internal
    262 
    263 .align	5
    264 .globl	bn_sqr_words
    265 .ent	bn_sqr_words
    266 bn_sqr_words:
    267 	.set	noreorder
    268 	bgtz	$6,bn_sqr_words_internal
    269 	move	$2,$0
    270 	jr	$31
    271 	move	$4,$2
    272 .end	bn_sqr_words
    273 
    274 .align	5
    275 .ent	bn_sqr_words_internal
    276 bn_sqr_words_internal:
    277 	.set	reorder
    278 	li	$3,-4
    279 	and	$8,$6,$3
    280 	beqz	$8,.L_bn_sqr_words_tail
    281 
    282 .L_bn_sqr_words_loop:
    283 	ld	$12,0($5)
    284 	dmultu	($12,$12)
    285 	ld	$14,8($5)
    286 	ld	$8,2*8($5)
    287 	ld	$10,3*8($5)
    288 	mflo	($13,$12,$12)
    289 	mfhi	($12,$12,$12)
    290 	sd	$13,0($4)
    291 	sd	$12,8($4)
    292 
    293 	dmultu	($14,$14)
    294 	subu	$6,4
    295 	daddu $4,8*8
    296 	daddu $5,4*8
    297 	mflo	($15,$14,$14)
    298 	mfhi	($14,$14,$14)
    299 	sd	$15,-6*8($4)
    300 	sd	$14,-5*8($4)
    301 
    302 	dmultu	($8,$8)
    303 	mflo	($9,$8,$8)
    304 	mfhi	($8,$8,$8)
    305 	sd	$9,-4*8($4)
    306 	sd	$8,-3*8($4)
    307 
    308 
    309 	dmultu	($10,$10)
    310 	and	$8,$6,$3
    311 	mflo	($11,$10,$10)
    312 	mfhi	($10,$10,$10)
    313 	sd	$11,-2*8($4)
    314 
    315 	.set	noreorder
    316 	bgtz	$8,.L_bn_sqr_words_loop
    317 	sd	$10,-8($4)
    318 
    319 	beqz	$6,.L_bn_sqr_words_return
    320 	nop
    321 
    322 .L_bn_sqr_words_tail:
    323 	.set	reorder
    324 	ld	$12,0($5)
    325 	dmultu	($12,$12)
    326 	subu	$6,1
    327 	mflo	($13,$12,$12)
    328 	mfhi	($12,$12,$12)
    329 	sd	$13,0($4)
    330 	sd	$12,8($4)
    331 	beqz	$6,.L_bn_sqr_words_return
    332 
    333 	ld	$12,8($5)
    334 	dmultu	($12,$12)
    335 	subu	$6,1
    336 	mflo	($13,$12,$12)
    337 	mfhi	($12,$12,$12)
    338 	sd	$13,2*8($4)
    339 	sd	$12,3*8($4)
    340 	beqz	$6,.L_bn_sqr_words_return
    341 
    342 	ld	$12,2*8($5)
    343 	dmultu	($12,$12)
    344 	mflo	($13,$12,$12)
    345 	mfhi	($12,$12,$12)
    346 	sd	$13,4*8($4)
    347 	sd	$12,5*8($4)
    348 
    349 .L_bn_sqr_words_return:
    350 	.set	noreorder
    351 	jr	$31
    352 	move	$4,$2
    353 
    354 .end	bn_sqr_words_internal
    355 
    356 .align	5
    357 .globl	bn_add_words
    358 .ent	bn_add_words
    359 bn_add_words:
    360 	.set	noreorder
    361 	bgtz	$7,bn_add_words_internal
    362 	move	$2,$0
    363 	jr	$31
    364 	move	$4,$2
    365 .end	bn_add_words
    366 
    367 .align	5
    368 .ent	bn_add_words_internal
    369 bn_add_words_internal:
    370 	.set	reorder
    371 	li	$3,-4
    372 	and	$1,$7,$3
    373 	beqz	$1,.L_bn_add_words_tail
    374 
    375 .L_bn_add_words_loop:
    376 	ld	$12,0($5)
    377 	ld	$8,0($6)
    378 	subu	$7,4
    379 	ld	$13,8($5)
    380 	and	$1,$7,$3
    381 	ld	$14,2*8($5)
    382 	daddu $6,4*8
    383 	ld	$15,3*8($5)
    384 	daddu $4,4*8
    385 	ld	$9,-3*8($6)
    386 	daddu $5,4*8
    387 	ld	$10,-2*8($6)
    388 	ld	$11,-8($6)
    389 	daddu	$8,$12
    390 	sltu	$24,$8,$12
    391 	daddu	$12,$8,$2
    392 	sltu	$2,$12,$8
    393 	sd	$12,-4*8($4)
    394 	daddu	$2,$24
    395 
    396 	daddu	$9,$13
    397 	sltu	$25,$9,$13
    398 	daddu	$13,$9,$2
    399 	sltu	$2,$13,$9
    400 	sd	$13,-3*8($4)
    401 	daddu	$2,$25
    402 
    403 	daddu	$10,$14
    404 	sltu	$24,$10,$14
    405 	daddu	$14,$10,$2
    406 	sltu	$2,$14,$10
    407 	sd	$14,-2*8($4)
    408 	daddu	$2,$24
    409 
    410 	daddu	$11,$15
    411 	sltu	$25,$11,$15
    412 	daddu	$15,$11,$2
    413 	sltu	$2,$15,$11
    414 	sd	$15,-8($4)
    415 
    416 	.set	noreorder
    417 	bgtz	$1,.L_bn_add_words_loop
    418 	daddu	$2,$25
    419 
    420 	beqz	$7,.L_bn_add_words_return
    421 	nop
    422 
    423 .L_bn_add_words_tail:
    424 	.set	reorder
    425 	ld	$12,0($5)
    426 	ld	$8,0($6)
    427 	daddu	$8,$12
    428 	subu	$7,1
    429 	sltu	$24,$8,$12
    430 	daddu	$12,$8,$2
    431 	sltu	$2,$12,$8
    432 	sd	$12,0($4)
    433 	daddu	$2,$24
    434 	beqz	$7,.L_bn_add_words_return
    435 
    436 	ld	$13,8($5)
    437 	ld	$9,8($6)
    438 	daddu	$9,$13
    439 	subu	$7,1
    440 	sltu	$25,$9,$13
    441 	daddu	$13,$9,$2
    442 	sltu	$2,$13,$9
    443 	sd	$13,8($4)
    444 	daddu	$2,$25
    445 	beqz	$7,.L_bn_add_words_return
    446 
    447 	ld	$14,2*8($5)
    448 	ld	$10,2*8($6)
    449 	daddu	$10,$14
    450 	sltu	$24,$10,$14
    451 	daddu	$14,$10,$2
    452 	sltu	$2,$14,$10
    453 	sd	$14,2*8($4)
    454 	daddu	$2,$24
    455 
    456 .L_bn_add_words_return:
    457 	.set	noreorder
    458 	jr	$31
    459 	move	$4,$2
    460 
    461 .end	bn_add_words_internal
    462 
    463 .align	5
    464 .globl	bn_sub_words
    465 .ent	bn_sub_words
    466 bn_sub_words:
    467 	.set	noreorder
    468 	bgtz	$7,bn_sub_words_internal
    469 	move	$2,$0
    470 	jr	$31
    471 	move	$4,$0
    472 .end	bn_sub_words
    473 
    474 .align	5
    475 .ent	bn_sub_words_internal
    476 bn_sub_words_internal:
    477 	.set	reorder
    478 	li	$3,-4
    479 	and	$1,$7,$3
    480 	beqz	$1,.L_bn_sub_words_tail
    481 
    482 .L_bn_sub_words_loop:
    483 	ld	$12,0($5)
    484 	ld	$8,0($6)
    485 	subu	$7,4
    486 	ld	$13,8($5)
    487 	and	$1,$7,$3
    488 	ld	$14,2*8($5)
    489 	daddu $6,4*8
    490 	ld	$15,3*8($5)
    491 	daddu $4,4*8
    492 	ld	$9,-3*8($6)
    493 	daddu $5,4*8
    494 	ld	$10,-2*8($6)
    495 	ld	$11,-8($6)
    496 	sltu	$24,$12,$8
    497 	dsubu	$8,$12,$8
    498 	dsubu	$12,$8,$2
    499 	sgtu	$2,$12,$8
    500 	sd	$12,-4*8($4)
    501 	daddu	$2,$24
    502 
    503 	sltu	$25,$13,$9
    504 	dsubu	$9,$13,$9
    505 	dsubu	$13,$9,$2
    506 	sgtu	$2,$13,$9
    507 	sd	$13,-3*8($4)
    508 	daddu	$2,$25
    509 
    510 
    511 	sltu	$24,$14,$10
    512 	dsubu	$10,$14,$10
    513 	dsubu	$14,$10,$2
    514 	sgtu	$2,$14,$10
    515 	sd	$14,-2*8($4)
    516 	daddu	$2,$24
    517 
    518 	sltu	$25,$15,$11
    519 	dsubu	$11,$15,$11
    520 	dsubu	$15,$11,$2
    521 	sgtu	$2,$15,$11
    522 	sd	$15,-8($4)
    523 
    524 	.set	noreorder
    525 	bgtz	$1,.L_bn_sub_words_loop
    526 	daddu	$2,$25
    527 
    528 	beqz	$7,.L_bn_sub_words_return
    529 	nop
    530 
    531 .L_bn_sub_words_tail:
    532 	.set	reorder
    533 	ld	$12,0($5)
    534 	ld	$8,0($6)
    535 	subu	$7,1
    536 	sltu	$24,$12,$8
    537 	dsubu	$8,$12,$8
    538 	dsubu	$12,$8,$2
    539 	sgtu	$2,$12,$8
    540 	sd	$12,0($4)
    541 	daddu	$2,$24
    542 	beqz	$7,.L_bn_sub_words_return
    543 
    544 	ld	$13,8($5)
    545 	subu	$7,1
    546 	ld	$9,8($6)
    547 	sltu	$25,$13,$9
    548 	dsubu	$9,$13,$9
    549 	dsubu	$13,$9,$2
    550 	sgtu	$2,$13,$9
    551 	sd	$13,8($4)
    552 	daddu	$2,$25
    553 	beqz	$7,.L_bn_sub_words_return
    554 
    555 	ld	$14,2*8($5)
    556 	ld	$10,2*8($6)
    557 	sltu	$24,$14,$10
    558 	dsubu	$10,$14,$10
    559 	dsubu	$14,$10,$2
    560 	sgtu	$2,$14,$10
    561 	sd	$14,2*8($4)
    562 	daddu	$2,$24
    563 
    564 .L_bn_sub_words_return:
    565 	.set	noreorder
    566 	jr	$31
    567 	move	$4,$2
    568 .end	bn_sub_words_internal
    569 
    570 #if 0
    571 /*
    572  * The bn_div_3_words entry point is re-used for constant-time interface.
    573  * Implementation is retained as historical reference.
    574  */
    575 .align 5
    576 .globl	bn_div_3_words
    577 .ent	bn_div_3_words
    578 bn_div_3_words:
    579 	.set	noreorder
    580 	move	$7,$4		# we know that bn_div_words does not
    581 				# touch $7, $10, $11 and preserves $6
    582 				# so that we can save two arguments
    583 				# and return address in registers
    584 				# instead of stack:-)
    585 
    586 	ld	$4,($7)
    587 	move	$10,$5
    588 	bne	$4,$6,bn_div_3_words_internal
    589 	 ld	$5,-8($7)
    590 	li	$2,-1
    591 	jr	$31
    592 	move	$4,$2
    593 .end	bn_div_3_words
    594 
    595 .align	5
    596 .ent	bn_div_3_words_internal
    597 bn_div_3_words_internal:
    598 	.set	reorder
    599 	move	$11,$31
    600 	bal	bn_div_words_internal
    601 	move	$31,$11
    602 	dmultu	($10,$2)
    603 	ld	$14,-2*8($7)
    604 	move	$8,$0
    605 	mfhi	($13,$10,$2)
    606 	mflo	($12,$10,$2)
    607 	sltu	$24,$13,$5
    608 .L_bn_div_3_words_inner_loop:
    609 	bnez	$24,.L_bn_div_3_words_inner_loop_done
    610 	sgeu	$1,$14,$12
    611 	seq	$25,$13,$5
    612 	and	$1,$25
    613 	sltu	$15,$12,$10
    614 	daddu	$5,$6
    615 	dsubu	$13,$15
    616 	dsubu	$12,$10
    617 	sltu	$24,$13,$5
    618 	sltu	$8,$5,$6
    619 	or	$24,$8
    620 	.set	noreorder
    621 	beqz	$1,.L_bn_div_3_words_inner_loop
    622 	dsubu	$2,1
    623 	daddu	$2,1
    624 	.set	reorder
    625 .L_bn_div_3_words_inner_loop_done:
    626 	.set	noreorder
    627 	jr	$31
    628 	move	$4,$2
    629 .end	bn_div_3_words_internal
    630 #endif
    631 
    632 .align	5
    633 .globl	bn_div_words
    634 .ent	bn_div_words
    635 bn_div_words:
    636 	.set	noreorder
    637 	bnez	$6,bn_div_words_internal
    638 	li	$2,-1		# I would rather signal div-by-zero
    639 				# which can be done with 'break 7'
    640 	jr	$31
    641 	move	$4,$2
    642 .end	bn_div_words
    643 
    644 .align	5
    645 .ent	bn_div_words_internal
    646 bn_div_words_internal:
    647 	move	$3,$0
    648 	bltz	$6,.L_bn_div_words_body
    649 	move	$25,$3
    650 	dsll	$6,1
    651 	bgtz	$6,.-4
    652 	addu	$25,1
    653 
    654 	.set	reorder
    655 	negu	$13,$25
    656 	li	$14,-1
    657 	dsll	$14,$13
    658 	and	$14,$4
    659 	dsrl	$1,$5,$13
    660 	.set	noreorder
    661 	beqz	$14,.+12
    662 	nop
    663 	break	6		# signal overflow
    664 	.set	reorder
    665 	dsll	$4,$25
    666 	dsll	$5,$25
    667 	or	$4,$1
    668 .L_bn_div_words_body:
    669 	dsrl	$3,$6,4*8	# bits
    670 	sgeu	$1,$4,$6
    671 	.set	noreorder
    672 	beqz	$1,.+12
    673 	nop
    674 	dsubu	$4,$6
    675 	.set	reorder
    676 
    677 	li	$8,-1
    678 	dsrl	$9,$4,4*8	# bits
    679 	dsrl	$8,4*8	# q=0xffffffff
    680 	beq	$3,$9,.L_bn_div_words_skip_div1
    681 	ddivu	($4,$3)
    682 	mfqt	($8,$4,$3)
    683 .L_bn_div_words_skip_div1:
    684 	dmultu	($6,$8)
    685 	dsll	$15,$4,4*8	# bits
    686 	dsrl	$1,$5,4*8	# bits
    687 	or	$15,$1
    688 	mflo	($12,$6,$8)
    689 	mfhi	($13,$6,$8)
    690 .L_bn_div_words_inner_loop1:
    691 	sltu	$14,$15,$12
    692 	seq	$24,$9,$13
    693 	sltu	$1,$9,$13
    694 	and	$14,$24
    695 	sltu	$2,$12,$6
    696 	or	$1,$14
    697 	.set	noreorder
    698 	beqz	$1,.L_bn_div_words_inner_loop1_done
    699 	dsubu	$13,$2
    700 	dsubu	$12,$6
    701 	b	.L_bn_div_words_inner_loop1
    702 	dsubu	$8,1
    703 	.set	reorder
    704 .L_bn_div_words_inner_loop1_done:
    705 
    706 	dsll	$5,4*8	# bits
    707 	dsubu	$4,$15,$12
    708 	dsll	$2,$8,4*8	# bits
    709 
    710 	li	$8,-1
    711 	dsrl	$9,$4,4*8	# bits
    712 	dsrl	$8,4*8	# q=0xffffffff
    713 	beq	$3,$9,.L_bn_div_words_skip_div2
    714 	ddivu	($4,$3)
    715 	mfqt	($8,$4,$3)
    716 .L_bn_div_words_skip_div2:
    717 	dmultu	($6,$8)
    718 	dsll	$15,$4,4*8	# bits
    719 	dsrl	$1,$5,4*8	# bits
    720 	or	$15,$1
    721 	mflo	($12,$6,$8)
    722 	mfhi	($13,$6,$8)
    723 .L_bn_div_words_inner_loop2:
    724 	sltu	$14,$15,$12
    725 	seq	$24,$9,$13
    726 	sltu	$1,$9,$13
    727 	and	$14,$24
    728 	sltu	$3,$12,$6
    729 	or	$1,$14
    730 	.set	noreorder
    731 	beqz	$1,.L_bn_div_words_inner_loop2_done
    732 	dsubu	$13,$3
    733 	dsubu	$12,$6
    734 	b	.L_bn_div_words_inner_loop2
    735 	dsubu	$8,1
    736 	.set	reorder
    737 .L_bn_div_words_inner_loop2_done:
    738 
    739 	dsubu	$4,$15,$12
    740 	or	$2,$8
    741 	dsrl	$3,$4,$25	# $3 contains remainder if anybody wants it
    742 	dsrl	$6,$25		# restore $6
    743 
    744 	.set	noreorder
    745 	move	$5,$3
    746 	jr	$31
    747 	move	$4,$2
    748 .end	bn_div_words_internal
    749 
    750 .align	5
    751 .globl	bn_mul_comba8
    752 .ent	bn_mul_comba8
    753 bn_mul_comba8:
    754 	.set	noreorder
    755 	.frame	$29,6*8,$31
    756 	.mask	0x003f0000,-8
    757 	dsubu $29,6*8
    758 	sd	$21,5*8($29)
    759 	sd	$20,4*8($29)
    760 	sd	$19,3*8($29)
    761 	sd	$18,2*8($29)
    762 	sd	$17,1*8($29)
    763 	sd	$16,0*8($29)
    764 
    765 	.set	reorder
    766 	ld	$12,0($5)	# If compiled with -mips3 option on
    767 				# R5000 box assembler barks on this
    768 				# 1ine with "should not have mult/div
    769 				# as last instruction in bb (R10K
    770 				# bug)" warning. If anybody out there
    771 				# has a clue about how to circumvent
    772 				# this do send me a note.
    773 				#		<appro@fy.chalmers.se>
    774 
    775 	ld	$8,0($6)
    776 	ld	$13,8($5)
    777 	ld	$14,2*8($5)
    778 	dmultu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
    779 	ld	$15,3*8($5)
    780 	ld	$9,8($6)
    781 	ld	$10,2*8($6)
    782 	ld	$11,3*8($6)
    783 	mflo	($2,$12,$8)
    784 	mfhi	($3,$12,$8)
    785 
    786 	ld	$16,4*8($5)
    787 	ld	$18,5*8($5)
    788 	dmultu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
    789 	ld	$20,6*8($5)
    790 	ld	$5,7*8($5)
    791 	ld	$17,4*8($6)
    792 	ld	$19,5*8($6)
    793 	mflo	($24,$12,$9)
    794 	mfhi	($25,$12,$9)
    795 	daddu	$3,$24
    796 	sltu	$1,$3,$24
    797 	dmultu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
    798 	daddu	$7,$25,$1
    799 	ld	$21,6*8($6)
    800 	ld	$6,7*8($6)
    801 	sd	$2,0($4)	# r[0]=c1;
    802 	mflo	($24,$13,$8)
    803 	mfhi	($25,$13,$8)
    804 	daddu	$3,$24
    805 	sltu	$1,$3,$24
    806 	 dmultu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
    807 	daddu	$25,$1
    808 	daddu	$7,$25
    809 	sltu	$2,$7,$25
    810 	sd	$3,8($4)	# r[1]=c2;
    811 
    812 	mflo	($24,$14,$8)
    813 	mfhi	($25,$14,$8)
    814 	daddu	$7,$24
    815 	sltu	$1,$7,$24
    816 	dmultu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
    817 	daddu	$25,$1
    818 	daddu	$2,$25
    819 	mflo	($24,$13,$9)
    820 	mfhi	($25,$13,$9)
    821 	daddu	$7,$24
    822 	sltu	$1,$7,$24
    823 	dmultu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
    824 	daddu	$25,$1
    825 	daddu	$2,$25
    826 	sltu	$3,$2,$25
    827 	mflo	($24,$12,$10)
    828 	mfhi	($25,$12,$10)
    829 	daddu	$7,$24
    830 	sltu	$1,$7,$24
    831 	 dmultu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
    832 	daddu	$25,$1
    833 	daddu	$2,$25
    834 	sltu	$1,$2,$25
    835 	daddu	$3,$1
    836 	sd	$7,2*8($4)	# r[2]=c3;
    837 
    838 	mflo	($24,$12,$11)
    839 	mfhi	($25,$12,$11)
    840 	daddu	$2,$24
    841 	sltu	$1,$2,$24
    842 	dmultu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
    843 	daddu	$25,$1
    844 	daddu	$3,$25
    845 	sltu	$7,$3,$25
    846 	mflo	($24,$13,$10)
    847 	mfhi	($25,$13,$10)
    848 	daddu	$2,$24
    849 	sltu	$1,$2,$24
    850 	dmultu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
    851 	daddu	$25,$1
    852 	daddu	$3,$25
    853 	sltu	$1,$3,$25
    854 	daddu	$7,$1
    855 	mflo	($24,$14,$9)
    856 	mfhi	($25,$14,$9)
    857 	daddu	$2,$24
    858 	sltu	$1,$2,$24
    859 	dmultu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
    860 	daddu	$25,$1
    861 	daddu	$3,$25
    862 	sltu	$1,$3,$25
    863 	daddu	$7,$1
    864 	mflo	($24,$15,$8)
    865 	mfhi	($25,$15,$8)
    866 	daddu	$2,$24
    867 	sltu	$1,$2,$24
    868 	 dmultu	($16,$8)		# mul_add_c(a[4],b[0],c2,c3,c1);
    869 	daddu	$25,$1
    870 	daddu	$3,$25
    871 	sltu	$1,$3,$25
    872 	daddu	$7,$1
    873 	sd	$2,3*8($4)	# r[3]=c1;
    874 
    875 	mflo	($24,$16,$8)
    876 	mfhi	($25,$16,$8)
    877 	daddu	$3,$24
    878 	sltu	$1,$3,$24
    879 	dmultu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
    880 	daddu	$25,$1
    881 	daddu	$7,$25
    882 	sltu	$2,$7,$25
    883 	mflo	($24,$15,$9)
    884 	mfhi	($25,$15,$9)
    885 	daddu	$3,$24
    886 	sltu	$1,$3,$24
    887 	dmultu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
    888 	daddu	$25,$1
    889 	daddu	$7,$25
    890 	sltu	$1,$7,$25
    891 	daddu	$2,$1
    892 	mflo	($24,$14,$10)
    893 	mfhi	($25,$14,$10)
    894 	daddu	$3,$24
    895 	sltu	$1,$3,$24
    896 	dmultu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
    897 	daddu	$25,$1
    898 	daddu	$7,$25
    899 	sltu	$1,$7,$25
    900 	daddu	$2,$1
    901 	mflo	($24,$13,$11)
    902 	mfhi	($25,$13,$11)
    903 	daddu	$3,$24
    904 	sltu	$1,$3,$24
    905 	dmultu	($12,$17)		# mul_add_c(a[0],b[4],c2,c3,c1);
    906 	daddu	$25,$1
    907 	daddu	$7,$25
    908 	sltu	$1,$7,$25
    909 	daddu	$2,$1
    910 	mflo	($24,$12,$17)
    911 	mfhi	($25,$12,$17)
    912 	daddu	$3,$24
    913 	sltu	$1,$3,$24
    914 	 dmultu	($12,$19)		# mul_add_c(a[0],b[5],c3,c1,c2);
    915 	daddu	$25,$1
    916 	daddu	$7,$25
    917 	sltu	$1,$7,$25
    918 	daddu	$2,$1
    919 	sd	$3,4*8($4)	# r[4]=c2;
    920 
    921 	mflo	($24,$12,$19)
    922 	mfhi	($25,$12,$19)
    923 	daddu	$7,$24
    924 	sltu	$1,$7,$24
    925 	dmultu	($13,$17)		# mul_add_c(a[1],b[4],c3,c1,c2);
    926 	daddu	$25,$1
    927 	daddu	$2,$25
    928 	sltu	$3,$2,$25
    929 	mflo	($24,$13,$17)
    930 	mfhi	($25,$13,$17)
    931 	daddu	$7,$24
    932 	sltu	$1,$7,$24
    933 	dmultu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
    934 	daddu	$25,$1
    935 	daddu	$2,$25
    936 	sltu	$1,$2,$25
    937 	daddu	$3,$1
    938 	mflo	($24,$14,$11)
    939 	mfhi	($25,$14,$11)
    940 	daddu	$7,$24
    941 	sltu	$1,$7,$24
    942 	dmultu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
    943 	daddu	$25,$1
    944 	daddu	$2,$25
    945 	sltu	$1,$2,$25
    946 	daddu	$3,$1
    947 	mflo	($24,$15,$10)
    948 	mfhi	($25,$15,$10)
    949 	daddu	$7,$24
    950 	sltu	$1,$7,$24
    951 	dmultu	($16,$9)		# mul_add_c(a[4],b[1],c3,c1,c2);
    952 	daddu	$25,$1
    953 	daddu	$2,$25
    954 	sltu	$1,$2,$25
    955 	daddu	$3,$1
    956 	mflo	($24,$16,$9)
    957 	mfhi	($25,$16,$9)
    958 	daddu	$7,$24
    959 	sltu	$1,$7,$24
    960 	dmultu	($18,$8)		# mul_add_c(a[5],b[0],c3,c1,c2);
    961 	daddu	$25,$1
    962 	daddu	$2,$25
    963 	sltu	$1,$2,$25
    964 	daddu	$3,$1
    965 	mflo	($24,$18,$8)
    966 	mfhi	($25,$18,$8)
    967 	daddu	$7,$24
    968 	sltu	$1,$7,$24
    969 	 dmultu	($20,$8)		# mul_add_c(a[6],b[0],c1,c2,c3);
    970 	daddu	$25,$1
    971 	daddu	$2,$25
    972 	sltu	$1,$2,$25
    973 	daddu	$3,$1
    974 	sd	$7,5*8($4)	# r[5]=c3;
    975 
    976 	mflo	($24,$20,$8)
    977 	mfhi	($25,$20,$8)
    978 	daddu	$2,$24
    979 	sltu	$1,$2,$24
    980 	dmultu	($18,$9)		# mul_add_c(a[5],b[1],c1,c2,c3);
    981 	daddu	$25,$1
    982 	daddu	$3,$25
    983 	sltu	$7,$3,$25
    984 	mflo	($24,$18,$9)
    985 	mfhi	($25,$18,$9)
    986 	daddu	$2,$24
    987 	sltu	$1,$2,$24
    988 	dmultu	($16,$10)		# mul_add_c(a[4],b[2],c1,c2,c3);
    989 	daddu	$25,$1
    990 	daddu	$3,$25
    991 	sltu	$1,$3,$25
    992 	daddu	$7,$1
    993 	mflo	($24,$16,$10)
    994 	mfhi	($25,$16,$10)
    995 	daddu	$2,$24
    996 	sltu	$1,$2,$24
    997 	dmultu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
    998 	daddu	$25,$1
    999 	daddu	$3,$25
   1000 	sltu	$1,$3,$25
   1001 	daddu	$7,$1
   1002 	mflo	($24,$15,$11)
   1003 	mfhi	($25,$15,$11)
   1004 	daddu	$2,$24
   1005 	sltu	$1,$2,$24
   1006 	dmultu	($14,$17)		# mul_add_c(a[2],b[4],c1,c2,c3);
   1007 	daddu	$25,$1
   1008 	daddu	$3,$25
   1009 	sltu	$1,$3,$25
   1010 	daddu	$7,$1
   1011 	mflo	($24,$14,$17)
   1012 	mfhi	($25,$14,$17)
   1013 	daddu	$2,$24
   1014 	sltu	$1,$2,$24
   1015 	dmultu	($13,$19)		# mul_add_c(a[1],b[5],c1,c2,c3);
   1016 	daddu	$25,$1
   1017 	daddu	$3,$25
   1018 	sltu	$1,$3,$25
   1019 	daddu	$7,$1
   1020 	mflo	($24,$13,$19)
   1021 	mfhi	($25,$13,$19)
   1022 	daddu	$2,$24
   1023 	sltu	$1,$2,$24
   1024 	dmultu	($12,$21)		# mul_add_c(a[0],b[6],c1,c2,c3);
   1025 	daddu	$25,$1
   1026 	daddu	$3,$25
   1027 	sltu	$1,$3,$25
   1028 	daddu	$7,$1
   1029 	mflo	($24,$12,$21)
   1030 	mfhi	($25,$12,$21)
   1031 	daddu	$2,$24
   1032 	sltu	$1,$2,$24
   1033 	 dmultu	($12,$6)		# mul_add_c(a[0],b[7],c2,c3,c1);
   1034 	daddu	$25,$1
   1035 	daddu	$3,$25
   1036 	sltu	$1,$3,$25
   1037 	daddu	$7,$1
   1038 	sd	$2,6*8($4)	# r[6]=c1;
   1039 
   1040 	mflo	($24,$12,$6)
   1041 	mfhi	($25,$12,$6)
   1042 	daddu	$3,$24
   1043 	sltu	$1,$3,$24
   1044 	dmultu	($13,$21)		# mul_add_c(a[1],b[6],c2,c3,c1);
   1045 	daddu	$25,$1
   1046 	daddu	$7,$25
   1047 	sltu	$2,$7,$25
   1048 	mflo	($24,$13,$21)
   1049 	mfhi	($25,$13,$21)
   1050 	daddu	$3,$24
   1051 	sltu	$1,$3,$24
   1052 	dmultu	($14,$19)		# mul_add_c(a[2],b[5],c2,c3,c1);
   1053 	daddu	$25,$1
   1054 	daddu	$7,$25
   1055 	sltu	$1,$7,$25
   1056 	daddu	$2,$1
   1057 	mflo	($24,$14,$19)
   1058 	mfhi	($25,$14,$19)
   1059 	daddu	$3,$24
   1060 	sltu	$1,$3,$24
   1061 	dmultu	($15,$17)		# mul_add_c(a[3],b[4],c2,c3,c1);
   1062 	daddu	$25,$1
   1063 	daddu	$7,$25
   1064 	sltu	$1,$7,$25
   1065 	daddu	$2,$1
   1066 	mflo	($24,$15,$17)
   1067 	mfhi	($25,$15,$17)
   1068 	daddu	$3,$24
   1069 	sltu	$1,$3,$24
   1070 	dmultu	($16,$11)		# mul_add_c(a[4],b[3],c2,c3,c1);
   1071 	daddu	$25,$1
   1072 	daddu	$7,$25
   1073 	sltu	$1,$7,$25
   1074 	daddu	$2,$1
   1075 	mflo	($24,$16,$11)
   1076 	mfhi	($25,$16,$11)
   1077 	daddu	$3,$24
   1078 	sltu	$1,$3,$24
   1079 	dmultu	($18,$10)		# mul_add_c(a[5],b[2],c2,c3,c1);
   1080 	daddu	$25,$1
   1081 	daddu	$7,$25
   1082 	sltu	$1,$7,$25
   1083 	daddu	$2,$1
   1084 	mflo	($24,$18,$10)
   1085 	mfhi	($25,$18,$10)
   1086 	daddu	$3,$24
   1087 	sltu	$1,$3,$24
   1088 	dmultu	($20,$9)		# mul_add_c(a[6],b[1],c2,c3,c1);
   1089 	daddu	$25,$1
   1090 	daddu	$7,$25
   1091 	sltu	$1,$7,$25
   1092 	daddu	$2,$1
   1093 	mflo	($24,$20,$9)
   1094 	mfhi	($25,$20,$9)
   1095 	daddu	$3,$24
   1096 	sltu	$1,$3,$24
   1097 	dmultu	($5,$8)		# mul_add_c(a[7],b[0],c2,c3,c1);
   1098 	daddu	$25,$1
   1099 	daddu	$7,$25
   1100 	sltu	$1,$7,$25
   1101 	daddu	$2,$1
   1102 	mflo	($24,$5,$8)
   1103 	mfhi	($25,$5,$8)
   1104 	daddu	$3,$24
   1105 	sltu	$1,$3,$24
   1106 	 dmultu	($5,$9)		# mul_add_c(a[7],b[1],c3,c1,c2);
   1107 	daddu	$25,$1
   1108 	daddu	$7,$25
   1109 	sltu	$1,$7,$25
   1110 	daddu	$2,$1
   1111 	sd	$3,7*8($4)	# r[7]=c2;
   1112 
   1113 	mflo	($24,$5,$9)
   1114 	mfhi	($25,$5,$9)
   1115 	daddu	$7,$24
   1116 	sltu	$1,$7,$24
   1117 	dmultu	($20,$10)		# mul_add_c(a[6],b[2],c3,c1,c2);
   1118 	daddu	$25,$1
   1119 	daddu	$2,$25
   1120 	sltu	$3,$2,$25
   1121 	mflo	($24,$20,$10)
   1122 	mfhi	($25,$20,$10)
   1123 	daddu	$7,$24
   1124 	sltu	$1,$7,$24
   1125 	dmultu	($18,$11)		# mul_add_c(a[5],b[3],c3,c1,c2);
   1126 	daddu	$25,$1
   1127 	daddu	$2,$25
   1128 	sltu	$1,$2,$25
   1129 	daddu	$3,$1
   1130 	mflo	($24,$18,$11)
   1131 	mfhi	($25,$18,$11)
   1132 	daddu	$7,$24
   1133 	sltu	$1,$7,$24
   1134 	dmultu	($16,$17)		# mul_add_c(a[4],b[4],c3,c1,c2);
   1135 	daddu	$25,$1
   1136 	daddu	$2,$25
   1137 	sltu	$1,$2,$25
   1138 	daddu	$3,$1
   1139 	mflo	($24,$16,$17)
   1140 	mfhi	($25,$16,$17)
   1141 	daddu	$7,$24
   1142 	sltu	$1,$7,$24
   1143 	dmultu	($15,$19)		# mul_add_c(a[3],b[5],c3,c1,c2);
   1144 	daddu	$25,$1
   1145 	daddu	$2,$25
   1146 	sltu	$1,$2,$25
   1147 	daddu	$3,$1
   1148 	mflo	($24,$15,$19)
   1149 	mfhi	($25,$15,$19)
   1150 	daddu	$7,$24
   1151 	sltu	$1,$7,$24
   1152 	dmultu	($14,$21)		# mul_add_c(a[2],b[6],c3,c1,c2);
   1153 	daddu	$25,$1
   1154 	daddu	$2,$25
   1155 	sltu	$1,$2,$25
   1156 	daddu	$3,$1
   1157 	mflo	($24,$14,$21)
   1158 	mfhi	($25,$14,$21)
   1159 	daddu	$7,$24
   1160 	sltu	$1,$7,$24
   1161 	dmultu	($13,$6)		# mul_add_c(a[1],b[7],c3,c1,c2);
   1162 	daddu	$25,$1
   1163 	daddu	$2,$25
   1164 	sltu	$1,$2,$25
   1165 	daddu	$3,$1
   1166 	mflo	($24,$13,$6)
   1167 	mfhi	($25,$13,$6)
   1168 	daddu	$7,$24
   1169 	sltu	$1,$7,$24
   1170 	 dmultu	($14,$6)		# mul_add_c(a[2],b[7],c1,c2,c3);
   1171 	daddu	$25,$1
   1172 	daddu	$2,$25
   1173 	sltu	$1,$2,$25
   1174 	daddu	$3,$1
   1175 	sd	$7,8*8($4)	# r[8]=c3;
   1176 
   1177 	mflo	($24,$14,$6)
   1178 	mfhi	($25,$14,$6)
   1179 	daddu	$2,$24
   1180 	sltu	$1,$2,$24
   1181 	dmultu	($15,$21)		# mul_add_c(a[3],b[6],c1,c2,c3);
   1182 	daddu	$25,$1
   1183 	daddu	$3,$25
   1184 	sltu	$7,$3,$25
   1185 	mflo	($24,$15,$21)
   1186 	mfhi	($25,$15,$21)
   1187 	daddu	$2,$24
   1188 	sltu	$1,$2,$24
   1189 	dmultu	($16,$19)		# mul_add_c(a[4],b[5],c1,c2,c3);
   1190 	daddu	$25,$1
   1191 	daddu	$3,$25
   1192 	sltu	$1,$3,$25
   1193 	daddu	$7,$1
   1194 	mflo	($24,$16,$19)
   1195 	mfhi	($25,$16,$19)
   1196 	daddu	$2,$24
   1197 	sltu	$1,$2,$24
   1198 	dmultu	($18,$17)		# mul_add_c(a[5],b[4],c1,c2,c3);
   1199 	daddu	$25,$1
   1200 	daddu	$3,$25
   1201 	sltu	$1,$3,$25
   1202 	daddu	$7,$1
   1203 	mflo	($24,$18,$17)
   1204 	mfhi	($25,$18,$17)
   1205 	daddu	$2,$24
   1206 	sltu	$1,$2,$24
   1207 	dmultu	($20,$11)		# mul_add_c(a[6],b[3],c1,c2,c3);
   1208 	daddu	$25,$1
   1209 	daddu	$3,$25
   1210 	sltu	$1,$3,$25
   1211 	daddu	$7,$1
   1212 	mflo	($24,$20,$11)
   1213 	mfhi	($25,$20,$11)
   1214 	daddu	$2,$24
   1215 	sltu	$1,$2,$24
   1216 	dmultu	($5,$10)		# mul_add_c(a[7],b[2],c1,c2,c3);
   1217 	daddu	$25,$1
   1218 	daddu	$3,$25
   1219 	sltu	$1,$3,$25
   1220 	daddu	$7,$1
   1221 	mflo	($24,$5,$10)
   1222 	mfhi	($25,$5,$10)
   1223 	daddu	$2,$24
   1224 	sltu	$1,$2,$24
   1225 	 dmultu	($5,$11)		# mul_add_c(a[7],b[3],c2,c3,c1);
   1226 	daddu	$25,$1
   1227 	daddu	$3,$25
   1228 	sltu	$1,$3,$25
   1229 	daddu	$7,$1
   1230 	sd	$2,9*8($4)	# r[9]=c1;
   1231 
   1232 	mflo	($24,$5,$11)
   1233 	mfhi	($25,$5,$11)
   1234 	daddu	$3,$24
   1235 	sltu	$1,$3,$24
   1236 	dmultu	($20,$17)		# mul_add_c(a[6],b[4],c2,c3,c1);
   1237 	daddu	$25,$1
   1238 	daddu	$7,$25
   1239 	sltu	$2,$7,$25
   1240 	mflo	($24,$20,$17)
   1241 	mfhi	($25,$20,$17)
   1242 	daddu	$3,$24
   1243 	sltu	$1,$3,$24
   1244 	dmultu	($18,$19)		# mul_add_c(a[5],b[5],c2,c3,c1);
   1245 	daddu	$25,$1
   1246 	daddu	$7,$25
   1247 	sltu	$1,$7,$25
   1248 	daddu	$2,$1
   1249 	mflo	($24,$18,$19)
   1250 	mfhi	($25,$18,$19)
   1251 	daddu	$3,$24
   1252 	sltu	$1,$3,$24
   1253 	dmultu	($16,$21)		# mul_add_c(a[4],b[6],c2,c3,c1);
   1254 	daddu	$25,$1
   1255 	daddu	$7,$25
   1256 	sltu	$1,$7,$25
   1257 	daddu	$2,$1
   1258 	mflo	($24,$16,$21)
   1259 	mfhi	($25,$16,$21)
   1260 	daddu	$3,$24
   1261 	sltu	$1,$3,$24
   1262 	dmultu	($15,$6)		# mul_add_c(a[3],b[7],c2,c3,c1);
   1263 	daddu	$25,$1
   1264 	daddu	$7,$25
   1265 	sltu	$1,$7,$25
   1266 	daddu	$2,$1
   1267 	mflo	($24,$15,$6)
   1268 	mfhi	($25,$15,$6)
   1269 	daddu	$3,$24
   1270 	sltu	$1,$3,$24
   1271 	dmultu	($16,$6)		# mul_add_c(a[4],b[7],c3,c1,c2);
   1272 	daddu	$25,$1
   1273 	daddu	$7,$25
   1274 	sltu	$1,$7,$25
   1275 	daddu	$2,$1
   1276 	sd	$3,10*8($4)	# r[10]=c2;
   1277 
   1278 	mflo	($24,$16,$6)
   1279 	mfhi	($25,$16,$6)
   1280 	daddu	$7,$24
   1281 	sltu	$1,$7,$24
   1282 	dmultu	($18,$21)		# mul_add_c(a[5],b[6],c3,c1,c2);
   1283 	daddu	$25,$1
   1284 	daddu	$2,$25
   1285 	sltu	$3,$2,$25
   1286 	mflo	($24,$18,$21)
   1287 	mfhi	($25,$18,$21)
   1288 	daddu	$7,$24
   1289 	sltu	$1,$7,$24
   1290 	dmultu	($20,$19)		# mul_add_c(a[6],b[5],c3,c1,c2);
   1291 	daddu	$25,$1
   1292 	daddu	$2,$25
   1293 	sltu	$1,$2,$25
   1294 	daddu	$3,$1
   1295 	mflo	($24,$20,$19)
   1296 	mfhi	($25,$20,$19)
   1297 	daddu	$7,$24
   1298 	sltu	$1,$7,$24
   1299 	dmultu	($5,$17)		# mul_add_c(a[7],b[4],c3,c1,c2);
   1300 	daddu	$25,$1
   1301 	daddu	$2,$25
   1302 	sltu	$1,$2,$25
   1303 	daddu	$3,$1
   1304 	mflo	($24,$5,$17)
   1305 	mfhi	($25,$5,$17)
   1306 	daddu	$7,$24
   1307 	sltu	$1,$7,$24
   1308 	 dmultu	($5,$19)		# mul_add_c(a[7],b[5],c1,c2,c3);
   1309 	daddu	$25,$1
   1310 	daddu	$2,$25
   1311 	sltu	$1,$2,$25
   1312 	daddu	$3,$1
   1313 	sd	$7,11*8($4)	# r[11]=c3;
   1314 
   1315 	mflo	($24,$5,$19)
   1316 	mfhi	($25,$5,$19)
   1317 	daddu	$2,$24
   1318 	sltu	$1,$2,$24
   1319 	dmultu	($20,$21)		# mul_add_c(a[6],b[6],c1,c2,c3);
   1320 	daddu	$25,$1
   1321 	daddu	$3,$25
   1322 	sltu	$7,$3,$25
   1323 	mflo	($24,$20,$21)
   1324 	mfhi	($25,$20,$21)
   1325 	daddu	$2,$24
   1326 	sltu	$1,$2,$24
   1327 	dmultu	($18,$6)		# mul_add_c(a[5],b[7],c1,c2,c3);
   1328 	daddu	$25,$1
   1329 	daddu	$3,$25
   1330 	sltu	$1,$3,$25
   1331 	daddu	$7,$1
   1332 	mflo	($24,$18,$6)
   1333 	mfhi	($25,$18,$6)
   1334 	daddu	$2,$24
   1335 	sltu	$1,$2,$24
   1336 	 dmultu	($20,$6)		# mul_add_c(a[6],b[7],c2,c3,c1);
   1337 	daddu	$25,$1
   1338 	daddu	$3,$25
   1339 	sltu	$1,$3,$25
   1340 	daddu	$7,$1
   1341 	sd	$2,12*8($4)	# r[12]=c1;
   1342 
   1343 	mflo	($24,$20,$6)
   1344 	mfhi	($25,$20,$6)
   1345 	daddu	$3,$24
   1346 	sltu	$1,$3,$24
   1347 	dmultu	($5,$21)		# mul_add_c(a[7],b[6],c2,c3,c1);
   1348 	daddu	$25,$1
   1349 	daddu	$7,$25
   1350 	sltu	$2,$7,$25
   1351 	mflo	($24,$5,$21)
   1352 	mfhi	($25,$5,$21)
   1353 	daddu	$3,$24
   1354 	sltu	$1,$3,$24
   1355 	dmultu	($5,$6)		# mul_add_c(a[7],b[7],c3,c1,c2);
   1356 	daddu	$25,$1
   1357 	daddu	$7,$25
   1358 	sltu	$1,$7,$25
   1359 	daddu	$2,$1
   1360 	sd	$3,13*8($4)	# r[13]=c2;
   1361 
   1362 	mflo	($24,$5,$6)
   1363 	mfhi	($25,$5,$6)
   1364 	daddu	$7,$24
   1365 	sltu	$1,$7,$24
   1366 	daddu	$25,$1
   1367 	daddu	$2,$25
   1368 	sd	$7,14*8($4)	# r[14]=c3;
   1369 	sd	$2,15*8($4)	# r[15]=c1;
   1370 
   1371 	.set	noreorder
   1372 	ld	$21,5*8($29)
   1373 	ld	$20,4*8($29)
   1374 	ld	$19,3*8($29)
   1375 	ld	$18,2*8($29)
   1376 	ld	$17,1*8($29)
   1377 	ld	$16,0*8($29)
   1378 	jr	$31
   1379 	daddu $29,6*8
   1380 .end	bn_mul_comba8
   1381 
   1382 .align	5
   1383 .globl	bn_mul_comba4
   1384 .ent	bn_mul_comba4
   1385 bn_mul_comba4:
   1386 	.set	reorder
   1387 	ld	$12,0($5)
   1388 	ld	$8,0($6)
   1389 	ld	$13,8($5)
   1390 	ld	$14,2*8($5)
   1391 	dmultu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
   1392 	ld	$15,3*8($5)
   1393 	ld	$9,8($6)
   1394 	ld	$10,2*8($6)
   1395 	ld	$11,3*8($6)
   1396 	mflo	($2,$12,$8)
   1397 	mfhi	($3,$12,$8)
   1398 	sd	$2,0($4)
   1399 
   1400 	dmultu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
   1401 	mflo	($24,$12,$9)
   1402 	mfhi	($25,$12,$9)
   1403 	daddu	$3,$24
   1404 	sltu	$1,$3,$24
   1405 	dmultu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
   1406 	daddu	$7,$25,$1
   1407 	mflo	($24,$13,$8)
   1408 	mfhi	($25,$13,$8)
   1409 	daddu	$3,$24
   1410 	sltu	$1,$3,$24
   1411 	 dmultu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
   1412 	daddu	$25,$1
   1413 	daddu	$7,$25
   1414 	sltu	$2,$7,$25
   1415 	sd	$3,8($4)
   1416 
   1417 	mflo	($24,$14,$8)
   1418 	mfhi	($25,$14,$8)
   1419 	daddu	$7,$24
   1420 	sltu	$1,$7,$24
   1421 	dmultu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
   1422 	daddu	$25,$1
   1423 	daddu	$2,$25
   1424 	mflo	($24,$13,$9)
   1425 	mfhi	($25,$13,$9)
   1426 	daddu	$7,$24
   1427 	sltu	$1,$7,$24
   1428 	dmultu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
   1429 	daddu	$25,$1
   1430 	daddu	$2,$25
   1431 	sltu	$3,$2,$25
   1432 	mflo	($24,$12,$10)
   1433 	mfhi	($25,$12,$10)
   1434 	daddu	$7,$24
   1435 	sltu	$1,$7,$24
   1436 	 dmultu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
   1437 	daddu	$25,$1
   1438 	daddu	$2,$25
   1439 	sltu	$1,$2,$25
   1440 	daddu	$3,$1
   1441 	sd	$7,2*8($4)
   1442 
   1443 	mflo	($24,$12,$11)
   1444 	mfhi	($25,$12,$11)
   1445 	daddu	$2,$24
   1446 	sltu	$1,$2,$24
   1447 	dmultu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
   1448 	daddu	$25,$1
   1449 	daddu	$3,$25
   1450 	sltu	$7,$3,$25
   1451 	mflo	($24,$13,$10)
   1452 	mfhi	($25,$13,$10)
   1453 	daddu	$2,$24
   1454 	sltu	$1,$2,$24
   1455 	dmultu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
   1456 	daddu	$25,$1
   1457 	daddu	$3,$25
   1458 	sltu	$1,$3,$25
   1459 	daddu	$7,$1
   1460 	mflo	($24,$14,$9)
   1461 	mfhi	($25,$14,$9)
   1462 	daddu	$2,$24
   1463 	sltu	$1,$2,$24
   1464 	dmultu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
   1465 	daddu	$25,$1
   1466 	daddu	$3,$25
   1467 	sltu	$1,$3,$25
   1468 	daddu	$7,$1
   1469 	mflo	($24,$15,$8)
   1470 	mfhi	($25,$15,$8)
   1471 	daddu	$2,$24
   1472 	sltu	$1,$2,$24
   1473 	 dmultu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
   1474 	daddu	$25,$1
   1475 	daddu	$3,$25
   1476 	sltu	$1,$3,$25
   1477 	daddu	$7,$1
   1478 	sd	$2,3*8($4)
   1479 
   1480 	mflo	($24,$15,$9)
   1481 	mfhi	($25,$15,$9)
   1482 	daddu	$3,$24
   1483 	sltu	$1,$3,$24
   1484 	dmultu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
   1485 	daddu	$25,$1
   1486 	daddu	$7,$25
   1487 	sltu	$2,$7,$25
   1488 	mflo	($24,$14,$10)
   1489 	mfhi	($25,$14,$10)
   1490 	daddu	$3,$24
   1491 	sltu	$1,$3,$24
   1492 	dmultu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
   1493 	daddu	$25,$1
   1494 	daddu	$7,$25
   1495 	sltu	$1,$7,$25
   1496 	daddu	$2,$1
   1497 	mflo	($24,$13,$11)
   1498 	mfhi	($25,$13,$11)
   1499 	daddu	$3,$24
   1500 	sltu	$1,$3,$24
   1501 	 dmultu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
   1502 	daddu	$25,$1
   1503 	daddu	$7,$25
   1504 	sltu	$1,$7,$25
   1505 	daddu	$2,$1
   1506 	sd	$3,4*8($4)
   1507 
   1508 	mflo	($24,$14,$11)
   1509 	mfhi	($25,$14,$11)
   1510 	daddu	$7,$24
   1511 	sltu	$1,$7,$24
   1512 	dmultu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
   1513 	daddu	$25,$1
   1514 	daddu	$2,$25
   1515 	sltu	$3,$2,$25
   1516 	mflo	($24,$15,$10)
   1517 	mfhi	($25,$15,$10)
   1518 	daddu	$7,$24
   1519 	sltu	$1,$7,$24
   1520 	 dmultu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
   1521 	daddu	$25,$1
   1522 	daddu	$2,$25
   1523 	sltu	$1,$2,$25
   1524 	daddu	$3,$1
   1525 	sd	$7,5*8($4)
   1526 
   1527 	mflo	($24,$15,$11)
   1528 	mfhi	($25,$15,$11)
   1529 	daddu	$2,$24
   1530 	sltu	$1,$2,$24
   1531 	daddu	$25,$1
   1532 	daddu	$3,$25
   1533 	sd	$2,6*8($4)
   1534 	sd	$3,7*8($4)
   1535 
   1536 	.set	noreorder
   1537 	jr	$31
   1538 	nop
   1539 .end	bn_mul_comba4
   1540 
   1541 .align	5
   1542 .globl	bn_sqr_comba8
   1543 .ent	bn_sqr_comba8
   1544 bn_sqr_comba8:
   1545 	.set	reorder
   1546 	ld	$12,0($5)
   1547 	ld	$13,8($5)
   1548 	ld	$14,2*8($5)
   1549 	ld	$15,3*8($5)
   1550 
   1551 	dmultu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
   1552 	ld	$8,4*8($5)
   1553 	ld	$9,5*8($5)
   1554 	ld	$10,6*8($5)
   1555 	ld	$11,7*8($5)
   1556 	mflo	($2,$12,$12)
   1557 	mfhi	($3,$12,$12)
   1558 	sd	$2,0($4)
   1559 
   1560 	dmultu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
   1561 	mflo	($24,$12,$13)
   1562 	mfhi	($25,$12,$13)
   1563 	slt	$2,$25,$0
   1564 	dsll	$25,1
   1565 	 dmultu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
   1566 	slt	$6,$24,$0
   1567 	daddu	$25,$6
   1568 	dsll	$24,1
   1569 	daddu	$3,$24
   1570 	sltu	$1,$3,$24
   1571 	daddu	$7,$25,$1
   1572 	sd	$3,8($4)
   1573 	mflo	($24,$14,$12)
   1574 	mfhi	($25,$14,$12)
   1575 	daddu	$7,$24
   1576 	sltu	$1,$7,$24
   1577 	 dmultu	($13,$13)		# forward multiplication
   1578 	daddu	$7,$24
   1579 	daddu	$1,$25
   1580 	sltu	$24,$7,$24
   1581 	daddu	$2,$1
   1582 	daddu	$25,$24
   1583 	sltu	$3,$2,$1
   1584 	daddu	$2,$25
   1585 	sltu	$25,$2,$25
   1586 	daddu	$3,$25
   1587 	mflo	($24,$13,$13)
   1588 	mfhi	($25,$13,$13)
   1589 	daddu	$7,$24
   1590 	sltu	$1,$7,$24
   1591 	 dmultu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
   1592 	daddu	$25,$1
   1593 	daddu	$2,$25
   1594 	sltu	$1,$2,$25
   1595 	daddu	$3,$1
   1596 	sd	$7,2*8($4)
   1597 	mflo	($24,$12,$15)
   1598 	mfhi	($25,$12,$15)
   1599 	daddu	$2,$24
   1600 	sltu	$1,$2,$24
   1601 	 dmultu	($13,$14)		# forward multiplication
   1602 	daddu	$2,$24
   1603 	daddu	$1,$25
   1604 	sltu	$24,$2,$24
   1605 	daddu	$3,$1
   1606 	daddu	$25,$24
   1607 	sltu	$7,$3,$1
   1608 	daddu	$3,$25
   1609 	sltu	$25,$3,$25
   1610 	daddu	$7,$25
   1611 	mflo	($24,$13,$14)
   1612 	mfhi	($25,$13,$14)
   1613 	daddu	$2,$24
   1614 	sltu	$1,$2,$24
   1615 	 dmultu	($8,$12)		# forward multiplication
   1616 	daddu	$2,$24
   1617 	daddu	$1,$25
   1618 	sltu	$24,$2,$24
   1619 	daddu	$3,$1
   1620 	daddu	$25,$24
   1621 	sltu	$1,$3,$1
   1622 	daddu	$3,$25
   1623 	daddu	$7,$1
   1624 	sltu	$25,$3,$25
   1625 	daddu	$7,$25
   1626 	mflo	($24,$8,$12)
   1627 	mfhi	($25,$8,$12)
   1628 	sd	$2,3*8($4)
   1629 	daddu	$3,$24
   1630 	sltu	$1,$3,$24
   1631 	 dmultu	($15,$13)		# forward multiplication
   1632 	daddu	$3,$24
   1633 	daddu	$1,$25
   1634 	sltu	$24,$3,$24
   1635 	daddu	$7,$1
   1636 	daddu	$25,$24
   1637 	sltu	$2,$7,$1
   1638 	daddu	$7,$25
   1639 	sltu	$25,$7,$25
   1640 	daddu	$2,$25
   1641 	mflo	($24,$15,$13)
   1642 	mfhi	($25,$15,$13)
   1643 	daddu	$3,$24
   1644 	sltu	$1,$3,$24
   1645 	 dmultu	($14,$14)		# forward multiplication
   1646 	daddu	$3,$24
   1647 	daddu	$1,$25
   1648 	sltu	$24,$3,$24
   1649 	daddu	$7,$1
   1650 	daddu	$25,$24
   1651 	sltu	$1,$7,$1
   1652 	daddu	$7,$25
   1653 	daddu	$2,$1
   1654 	sltu	$25,$7,$25
   1655 	daddu	$2,$25
   1656 	mflo	($24,$14,$14)
   1657 	mfhi	($25,$14,$14)
   1658 	daddu	$3,$24
   1659 	sltu	$1,$3,$24
   1660 	 dmultu	($12,$9)		# mul_add_c2(a[0],b[5],c3,c1,c2);
   1661 	daddu	$25,$1
   1662 	daddu	$7,$25
   1663 	sltu	$1,$7,$25
   1664 	daddu	$2,$1
   1665 	sd	$3,4*8($4)
   1666 	mflo	($24,$12,$9)
   1667 	mfhi	($25,$12,$9)
   1668 	daddu	$7,$24
   1669 	sltu	$1,$7,$24
   1670 	 dmultu	($13,$8)		# forward multiplication
   1671 	daddu	$7,$24
   1672 	daddu	$1,$25
   1673 	sltu	$24,$7,$24
   1674 	daddu	$2,$1
   1675 	daddu	$25,$24
   1676 	sltu	$3,$2,$1
   1677 	daddu	$2,$25
   1678 	sltu	$25,$2,$25
   1679 	daddu	$3,$25
   1680 	mflo	($24,$13,$8)
   1681 	mfhi	($25,$13,$8)
   1682 	daddu	$7,$24
   1683 	sltu	$1,$7,$24
   1684 	 dmultu	($14,$15)		# forward multiplication
   1685 	daddu	$7,$24
   1686 	daddu	$1,$25
   1687 	sltu	$24,$7,$24
   1688 	daddu	$2,$1
   1689 	daddu	$25,$24
   1690 	sltu	$1,$2,$1
   1691 	daddu	$2,$25
   1692 	daddu	$3,$1
   1693 	sltu	$25,$2,$25
   1694 	daddu	$3,$25
   1695 	mflo	($24,$14,$15)
   1696 	mfhi	($25,$14,$15)
   1697 	daddu	$7,$24
   1698 	sltu	$1,$7,$24
   1699 	 dmultu	($10,$12)		# forward multiplication
   1700 	daddu	$7,$24
   1701 	daddu	$1,$25
   1702 	sltu	$24,$7,$24
   1703 	daddu	$2,$1
   1704 	daddu	$25,$24
   1705 	sltu	$1,$2,$1
   1706 	daddu	$2,$25
   1707 	daddu	$3,$1
   1708 	sltu	$25,$2,$25
   1709 	daddu	$3,$25
   1710 	mflo	($24,$10,$12)
   1711 	mfhi	($25,$10,$12)
   1712 	sd	$7,5*8($4)
   1713 	daddu	$2,$24
   1714 	sltu	$1,$2,$24
   1715 	 dmultu	($9,$13)		# forward multiplication
   1716 	daddu	$2,$24
   1717 	daddu	$1,$25
   1718 	sltu	$24,$2,$24
   1719 	daddu	$3,$1
   1720 	daddu	$25,$24
   1721 	sltu	$7,$3,$1
   1722 	daddu	$3,$25
   1723 	sltu	$25,$3,$25
   1724 	daddu	$7,$25
   1725 	mflo	($24,$9,$13)
   1726 	mfhi	($25,$9,$13)
   1727 	daddu	$2,$24
   1728 	sltu	$1,$2,$24
   1729 	 dmultu	($8,$14)		# forward multiplication
   1730 	daddu	$2,$24
   1731 	daddu	$1,$25
   1732 	sltu	$24,$2,$24
   1733 	daddu	$3,$1
   1734 	daddu	$25,$24
   1735 	sltu	$1,$3,$1
   1736 	daddu	$3,$25
   1737 	daddu	$7,$1
   1738 	sltu	$25,$3,$25
   1739 	daddu	$7,$25
   1740 	mflo	($24,$8,$14)
   1741 	mfhi	($25,$8,$14)
   1742 	daddu	$2,$24
   1743 	sltu	$1,$2,$24
   1744 	 dmultu	($15,$15)		# forward multiplication
   1745 	daddu	$2,$24
   1746 	daddu	$1,$25
   1747 	sltu	$24,$2,$24
   1748 	daddu	$3,$1
   1749 	daddu	$25,$24
   1750 	sltu	$1,$3,$1
   1751 	daddu	$3,$25
   1752 	daddu	$7,$1
   1753 	sltu	$25,$3,$25
   1754 	daddu	$7,$25
   1755 	mflo	($24,$15,$15)
   1756 	mfhi	($25,$15,$15)
   1757 	daddu	$2,$24
   1758 	sltu	$1,$2,$24
   1759 	 dmultu	($12,$11)		# mul_add_c2(a[0],b[7],c2,c3,c1);
   1760 	daddu	$25,$1
   1761 	daddu	$3,$25
   1762 	sltu	$1,$3,$25
   1763 	daddu	$7,$1
   1764 	sd	$2,6*8($4)
   1765 	mflo	($24,$12,$11)
   1766 	mfhi	($25,$12,$11)
   1767 	daddu	$3,$24
   1768 	sltu	$1,$3,$24
   1769 	 dmultu	($13,$10)		# forward multiplication
   1770 	daddu	$3,$24
   1771 	daddu	$1,$25
   1772 	sltu	$24,$3,$24
   1773 	daddu	$7,$1
   1774 	daddu	$25,$24
   1775 	sltu	$2,$7,$1
   1776 	daddu	$7,$25
   1777 	sltu	$25,$7,$25
   1778 	daddu	$2,$25
   1779 	mflo	($24,$13,$10)
   1780 	mfhi	($25,$13,$10)
   1781 	daddu	$3,$24
   1782 	sltu	$1,$3,$24
   1783 	 dmultu	($14,$9)		# forward multiplication
   1784 	daddu	$3,$24
   1785 	daddu	$1,$25
   1786 	sltu	$24,$3,$24
   1787 	daddu	$7,$1
   1788 	daddu	$25,$24
   1789 	sltu	$1,$7,$1
   1790 	daddu	$7,$25
   1791 	daddu	$2,$1
   1792 	sltu	$25,$7,$25
   1793 	daddu	$2,$25
   1794 	mflo	($24,$14,$9)
   1795 	mfhi	($25,$14,$9)
   1796 	daddu	$3,$24
   1797 	sltu	$1,$3,$24
   1798 	 dmultu	($15,$8)		# forward multiplication
   1799 	daddu	$3,$24
   1800 	daddu	$1,$25
   1801 	sltu	$24,$3,$24
   1802 	daddu	$7,$1
   1803 	daddu	$25,$24
   1804 	sltu	$1,$7,$1
   1805 	daddu	$7,$25
   1806 	daddu	$2,$1
   1807 	sltu	$25,$7,$25
   1808 	daddu	$2,$25
   1809 	mflo	($24,$15,$8)
   1810 	mfhi	($25,$15,$8)
   1811 	daddu	$3,$24
   1812 	sltu	$1,$3,$24
   1813 	 dmultu	($11,$13)		# forward multiplication
   1814 	daddu	$3,$24
   1815 	daddu	$1,$25
   1816 	sltu	$24,$3,$24
   1817 	daddu	$7,$1
   1818 	daddu	$25,$24
   1819 	sltu	$1,$7,$1
   1820 	daddu	$7,$25
   1821 	daddu	$2,$1
   1822 	sltu	$25,$7,$25
   1823 	daddu	$2,$25
   1824 	mflo	($24,$11,$13)
   1825 	mfhi	($25,$11,$13)
   1826 	sd	$3,7*8($4)
   1827 	daddu	$7,$24
   1828 	sltu	$1,$7,$24
   1829 	 dmultu	($10,$14)		# forward multiplication
   1830 	daddu	$7,$24
   1831 	daddu	$1,$25
   1832 	sltu	$24,$7,$24
   1833 	daddu	$2,$1
   1834 	daddu	$25,$24
   1835 	sltu	$3,$2,$1
   1836 	daddu	$2,$25
   1837 	sltu	$25,$2,$25
   1838 	daddu	$3,$25
   1839 	mflo	($24,$10,$14)
   1840 	mfhi	($25,$10,$14)
   1841 	daddu	$7,$24
   1842 	sltu	$1,$7,$24
   1843 	 dmultu	($9,$15)		# forward multiplication
   1844 	daddu	$7,$24
   1845 	daddu	$1,$25
   1846 	sltu	$24,$7,$24
   1847 	daddu	$2,$1
   1848 	daddu	$25,$24
   1849 	sltu	$1,$2,$1
   1850 	daddu	$2,$25
   1851 	daddu	$3,$1
   1852 	sltu	$25,$2,$25
   1853 	daddu	$3,$25
   1854 	mflo	($24,$9,$15)
   1855 	mfhi	($25,$9,$15)
   1856 	daddu	$7,$24
   1857 	sltu	$1,$7,$24
   1858 	 dmultu	($8,$8)		# forward multiplication
   1859 	daddu	$7,$24
   1860 	daddu	$1,$25
   1861 	sltu	$24,$7,$24
   1862 	daddu	$2,$1
   1863 	daddu	$25,$24
   1864 	sltu	$1,$2,$1
   1865 	daddu	$2,$25
   1866 	daddu	$3,$1
   1867 	sltu	$25,$2,$25
   1868 	daddu	$3,$25
   1869 	mflo	($24,$8,$8)
   1870 	mfhi	($25,$8,$8)
   1871 	daddu	$7,$24
   1872 	sltu	$1,$7,$24
   1873 	 dmultu	($14,$11)		# mul_add_c2(a[2],b[7],c1,c2,c3);
   1874 	daddu	$25,$1
   1875 	daddu	$2,$25
   1876 	sltu	$1,$2,$25
   1877 	daddu	$3,$1
   1878 	sd	$7,8*8($4)
   1879 	mflo	($24,$14,$11)
   1880 	mfhi	($25,$14,$11)
   1881 	daddu	$2,$24
   1882 	sltu	$1,$2,$24
   1883 	 dmultu	($15,$10)		# forward multiplication
   1884 	daddu	$2,$24
   1885 	daddu	$1,$25
   1886 	sltu	$24,$2,$24
   1887 	daddu	$3,$1
   1888 	daddu	$25,$24
   1889 	sltu	$7,$3,$1
   1890 	daddu	$3,$25
   1891 	sltu	$25,$3,$25
   1892 	daddu	$7,$25
   1893 	mflo	($24,$15,$10)
   1894 	mfhi	($25,$15,$10)
   1895 	daddu	$2,$24
   1896 	sltu	$1,$2,$24
   1897 	 dmultu	($8,$9)		# forward multiplication
   1898 	daddu	$2,$24
   1899 	daddu	$1,$25
   1900 	sltu	$24,$2,$24
   1901 	daddu	$3,$1
   1902 	daddu	$25,$24
   1903 	sltu	$1,$3,$1
   1904 	daddu	$3,$25
   1905 	daddu	$7,$1
   1906 	sltu	$25,$3,$25
   1907 	daddu	$7,$25
   1908 	mflo	($24,$8,$9)
   1909 	mfhi	($25,$8,$9)
   1910 	daddu	$2,$24
   1911 	sltu	$1,$2,$24
   1912 	 dmultu	($11,$15)		# forward multiplication
   1913 	daddu	$2,$24
   1914 	daddu	$1,$25
   1915 	sltu	$24,$2,$24
   1916 	daddu	$3,$1
   1917 	daddu	$25,$24
   1918 	sltu	$1,$3,$1
   1919 	daddu	$3,$25
   1920 	daddu	$7,$1
   1921 	sltu	$25,$3,$25
   1922 	daddu	$7,$25
   1923 	mflo	($24,$11,$15)
   1924 	mfhi	($25,$11,$15)
   1925 	sd	$2,9*8($4)
   1926 	daddu	$3,$24
   1927 	sltu	$1,$3,$24
   1928 	 dmultu	($10,$8)		# forward multiplication
   1929 	daddu	$3,$24
   1930 	daddu	$1,$25
   1931 	sltu	$24,$3,$24
   1932 	daddu	$7,$1
   1933 	daddu	$25,$24
   1934 	sltu	$2,$7,$1
   1935 	daddu	$7,$25
   1936 	sltu	$25,$7,$25
   1937 	daddu	$2,$25
   1938 	mflo	($24,$10,$8)
   1939 	mfhi	($25,$10,$8)
   1940 	daddu	$3,$24
   1941 	sltu	$1,$3,$24
   1942 	 dmultu	($9,$9)		# forward multiplication
   1943 	daddu	$3,$24
   1944 	daddu	$1,$25
   1945 	sltu	$24,$3,$24
   1946 	daddu	$7,$1
   1947 	daddu	$25,$24
   1948 	sltu	$1,$7,$1
   1949 	daddu	$7,$25
   1950 	daddu	$2,$1
   1951 	sltu	$25,$7,$25
   1952 	daddu	$2,$25
   1953 	mflo	($24,$9,$9)
   1954 	mfhi	($25,$9,$9)
   1955 	daddu	$3,$24
   1956 	sltu	$1,$3,$24
   1957 	 dmultu	($8,$11)		# mul_add_c2(a[4],b[7],c3,c1,c2);
   1958 	daddu	$25,$1
   1959 	daddu	$7,$25
   1960 	sltu	$1,$7,$25
   1961 	daddu	$2,$1
   1962 	sd	$3,10*8($4)
   1963 	mflo	($24,$8,$11)
   1964 	mfhi	($25,$8,$11)
   1965 	daddu	$7,$24
   1966 	sltu	$1,$7,$24
   1967 	 dmultu	($9,$10)		# forward multiplication
   1968 	daddu	$7,$24
   1969 	daddu	$1,$25
   1970 	sltu	$24,$7,$24
   1971 	daddu	$2,$1
   1972 	daddu	$25,$24
   1973 	sltu	$3,$2,$1
   1974 	daddu	$2,$25
   1975 	sltu	$25,$2,$25
   1976 	daddu	$3,$25
   1977 	mflo	($24,$9,$10)
   1978 	mfhi	($25,$9,$10)
   1979 	daddu	$7,$24
   1980 	sltu	$1,$7,$24
   1981 	 dmultu	($11,$9)		# forward multiplication
   1982 	daddu	$7,$24
   1983 	daddu	$1,$25
   1984 	sltu	$24,$7,$24
   1985 	daddu	$2,$1
   1986 	daddu	$25,$24
   1987 	sltu	$1,$2,$1
   1988 	daddu	$2,$25
   1989 	daddu	$3,$1
   1990 	sltu	$25,$2,$25
   1991 	daddu	$3,$25
   1992 	mflo	($24,$11,$9)
   1993 	mfhi	($25,$11,$9)
   1994 	sd	$7,11*8($4)
   1995 	daddu	$2,$24
   1996 	sltu	$1,$2,$24
   1997 	 dmultu	($10,$10)		# forward multiplication
   1998 	daddu	$2,$24
   1999 	daddu	$1,$25
   2000 	sltu	$24,$2,$24
   2001 	daddu	$3,$1
   2002 	daddu	$25,$24
   2003 	sltu	$7,$3,$1
   2004 	daddu	$3,$25
   2005 	sltu	$25,$3,$25
   2006 	daddu	$7,$25
   2007 	mflo	($24,$10,$10)
   2008 	mfhi	($25,$10,$10)
   2009 	daddu	$2,$24
   2010 	sltu	$1,$2,$24
   2011 	 dmultu	($10,$11)		# mul_add_c2(a[6],b[7],c2,c3,c1);
   2012 	daddu	$25,$1
   2013 	daddu	$3,$25
   2014 	sltu	$1,$3,$25
   2015 	daddu	$7,$1
   2016 	sd	$2,12*8($4)
   2017 	mflo	($24,$10,$11)
   2018 	mfhi	($25,$10,$11)
   2019 	daddu	$3,$24
   2020 	sltu	$1,$3,$24
   2021 	 dmultu	($11,$11)		# forward multiplication
   2022 	daddu	$3,$24
   2023 	daddu	$1,$25
   2024 	sltu	$24,$3,$24
   2025 	daddu	$7,$1
   2026 	daddu	$25,$24
   2027 	sltu	$2,$7,$1
   2028 	daddu	$7,$25
   2029 	sltu	$25,$7,$25
   2030 	daddu	$2,$25
   2031 	mflo	($24,$11,$11)
   2032 	mfhi	($25,$11,$11)
   2033 	sd	$3,13*8($4)
   2034 
   2035 	daddu	$7,$24
   2036 	sltu	$1,$7,$24
   2037 	daddu	$25,$1
   2038 	daddu	$2,$25
   2039 	sd	$7,14*8($4)
   2040 	sd	$2,15*8($4)
   2041 
   2042 	.set	noreorder
   2043 	jr	$31
   2044 	nop
   2045 .end	bn_sqr_comba8
   2046 
   2047 .align	5
   2048 .globl	bn_sqr_comba4
   2049 .ent	bn_sqr_comba4
   2050 bn_sqr_comba4:
   2051 	.set	reorder
   2052 	ld	$12,0($5)
   2053 	ld	$13,8($5)
   2054 	dmultu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
   2055 	ld	$14,2*8($5)
   2056 	ld	$15,3*8($5)
   2057 	mflo	($2,$12,$12)
   2058 	mfhi	($3,$12,$12)
   2059 	sd	$2,0($4)
   2060 
   2061 	dmultu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
   2062 	mflo	($24,$12,$13)
   2063 	mfhi	($25,$12,$13)
   2064 	slt	$2,$25,$0
   2065 	dsll	$25,1
   2066 	 dmultu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
   2067 	slt	$6,$24,$0
   2068 	daddu	$25,$6
   2069 	dsll	$24,1
   2070 	daddu	$3,$24
   2071 	sltu	$1,$3,$24
   2072 	daddu	$7,$25,$1
   2073 	sd	$3,8($4)
   2074 	mflo	($24,$14,$12)
   2075 	mfhi	($25,$14,$12)
   2076 	daddu	$7,$24
   2077 	sltu	$1,$7,$24
   2078 	 dmultu	($13,$13)		# forward multiplication
   2079 	daddu	$7,$24
   2080 	daddu	$1,$25
   2081 	sltu	$24,$7,$24
   2082 	daddu	$2,$1
   2083 	daddu	$25,$24
   2084 	sltu	$3,$2,$1
   2085 	daddu	$2,$25
   2086 	sltu	$25,$2,$25
   2087 	daddu	$3,$25
   2088 	mflo	($24,$13,$13)
   2089 	mfhi	($25,$13,$13)
   2090 	daddu	$7,$24
   2091 	sltu	$1,$7,$24
   2092 	 dmultu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
   2093 	daddu	$25,$1
   2094 	daddu	$2,$25
   2095 	sltu	$1,$2,$25
   2096 	daddu	$3,$1
   2097 	sd	$7,2*8($4)
   2098 	mflo	($24,$12,$15)
   2099 	mfhi	($25,$12,$15)
   2100 	daddu	$2,$24
   2101 	sltu	$1,$2,$24
   2102 	 dmultu	($13,$14)		# forward multiplication
   2103 	daddu	$2,$24
   2104 	daddu	$1,$25
   2105 	sltu	$24,$2,$24
   2106 	daddu	$3,$1
   2107 	daddu	$25,$24
   2108 	sltu	$7,$3,$1
   2109 	daddu	$3,$25
   2110 	sltu	$25,$3,$25
   2111 	daddu	$7,$25
   2112 	mflo	($24,$13,$14)
   2113 	mfhi	($25,$13,$14)
   2114 	daddu	$2,$24
   2115 	sltu	$1,$2,$24
   2116 	 dmultu	($15,$13)		# forward multiplication
   2117 	daddu	$2,$24
   2118 	daddu	$1,$25
   2119 	sltu	$24,$2,$24
   2120 	daddu	$3,$1
   2121 	daddu	$25,$24
   2122 	sltu	$1,$3,$1
   2123 	daddu	$3,$25
   2124 	daddu	$7,$1
   2125 	sltu	$25,$3,$25
   2126 	daddu	$7,$25
   2127 	mflo	($24,$15,$13)
   2128 	mfhi	($25,$15,$13)
   2129 	sd	$2,3*8($4)
   2130 	daddu	$3,$24
   2131 	sltu	$1,$3,$24
   2132 	 dmultu	($14,$14)		# forward multiplication
   2133 	daddu	$3,$24
   2134 	daddu	$1,$25
   2135 	sltu	$24,$3,$24
   2136 	daddu	$7,$1
   2137 	daddu	$25,$24
   2138 	sltu	$2,$7,$1
   2139 	daddu	$7,$25
   2140 	sltu	$25,$7,$25
   2141 	daddu	$2,$25
   2142 	mflo	($24,$14,$14)
   2143 	mfhi	($25,$14,$14)
   2144 	daddu	$3,$24
   2145 	sltu	$1,$3,$24
   2146 	 dmultu	($14,$15)		# mul_add_c2(a[2],b[3],c3,c1,c2);
   2147 	daddu	$25,$1
   2148 	daddu	$7,$25
   2149 	sltu	$1,$7,$25
   2150 	daddu	$2,$1
   2151 	sd	$3,4*8($4)
   2152 	mflo	($24,$14,$15)
   2153 	mfhi	($25,$14,$15)
   2154 	daddu	$7,$24
   2155 	sltu	$1,$7,$24
   2156 	 dmultu	($15,$15)		# forward multiplication
   2157 	daddu	$7,$24
   2158 	daddu	$1,$25
   2159 	sltu	$24,$7,$24
   2160 	daddu	$2,$1
   2161 	daddu	$25,$24
   2162 	sltu	$3,$2,$1
   2163 	daddu	$2,$25
   2164 	sltu	$25,$2,$25
   2165 	daddu	$3,$25
   2166 	mflo	($24,$15,$15)
   2167 	mfhi	($25,$15,$15)
   2168 	sd	$7,5*8($4)
   2169 
   2170 	daddu	$2,$24
   2171 	sltu	$1,$2,$24
   2172 	daddu	$25,$1
   2173 	daddu	$3,$25
   2174 	sd	$2,6*8($4)
   2175 	sd	$3,7*8($4)
   2176 
   2177 	.set	noreorder
   2178 	jr	$31
   2179 	nop
   2180 .end	bn_sqr_comba4
   2181