Home | History | Annotate | Line # | Download | only in mips
mips.S revision 1.4
      1 #if !(defined (__mips_isa_rev) && (__mips_isa_rev >= 6))
      2 .set     mips2
      3 #endif
      4 #include "mips_arch.h"
      5 
      6 #if defined(_MIPS_ARCH_MIPS64R6)
      7 # define ddivu(rs,rt)
      8 # define mfqt(rd,rs,rt)	ddivu	rd,rs,rt
      9 # define mfrm(rd,rs,rt)	dmodu	rd,rs,rt
     10 #elif defined(_MIPS_ARCH_MIPS32R6)
     11 # define divu(rs,rt)
     12 # define mfqt(rd,rs,rt)	divu	rd,rs,rt
     13 # define mfrm(rd,rs,rt)	modu	rd,rs,rt
     14 #else
     15 # define divu(rs,rt)	divu	$0,rs,rt
     16 # define mfqt(rd,rs,rt)	mflo	rd
     17 # define mfrm(rd,rs,rt)	mfhi	rd
     18 #endif
     19 
     20 .rdata
     21 .asciiz	"mips3.s, Version 1.2"
     22 .asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro (at) fy.chalmers.se>"
     23 
     24 .text
     25 .set	noat
     26 
     27 .align	5
     28 .globl	bn_mul_add_words
     29 .ent	bn_mul_add_words
     30 bn_mul_add_words:
     31 	.set	noreorder
     32 	bgtz	$6,bn_mul_add_words_internal
     33 	move	$2,$0
     34 	jr	$31
     35 	move	$4,$2
     36 .end	bn_mul_add_words
     37 
     38 .align	5
     39 .ent	bn_mul_add_words_internal
     40 bn_mul_add_words_internal:
     41 	.set	reorder
     42 	li	$3,-4
     43 	and	$8,$6,$3
     44 	beqz	$8,.L_bn_mul_add_words_tail
     45 
     46 .L_bn_mul_add_words_loop:
     47 	lw	$12,0($5)
     48 	multu	($12,$7)
     49 	lw	$13,0($4)
     50 	lw	$14,4($5)
     51 	lw	$15,4($4)
     52 	lw	$8,2*4($5)
     53 	lw	$9,2*4($4)
     54 	addu	$13,$2
     55 	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
     56 				# values", but it seems to work fine
     57 				# even on 64-bit registers.
     58 	mflo	($1,$12,$7)
     59 	mfhi	($12,$12,$7)
     60 	addu	$13,$1
     61 	addu	$2,$12
     62 	 multu	($14,$7)
     63 	sltu	$1,$13,$1
     64 	sw	$13,0($4)
     65 	addu	$2,$1
     66 
     67 	lw	$10,3*4($5)
     68 	lw	$11,3*4($4)
     69 	addu	$15,$2
     70 	sltu	$2,$15,$2
     71 	mflo	($1,$14,$7)
     72 	mfhi	($14,$14,$7)
     73 	addu	$15,$1
     74 	addu	$2,$14
     75 	 multu	($8,$7)
     76 	sltu	$1,$15,$1
     77 	sw	$15,4($4)
     78 	addu	$2,$1
     79 
     80 	subu	$6,4
     81 	addu $4,4*4
     82 	addu $5,4*4
     83 	addu	$9,$2
     84 	sltu	$2,$9,$2
     85 	mflo	($1,$8,$7)
     86 	mfhi	($8,$8,$7)
     87 	addu	$9,$1
     88 	addu	$2,$8
     89 	 multu	($10,$7)
     90 	sltu	$1,$9,$1
     91 	sw	$9,-2*4($4)
     92 	addu	$2,$1
     93 
     94 
     95 	and	$8,$6,$3
     96 	addu	$11,$2
     97 	sltu	$2,$11,$2
     98 	mflo	($1,$10,$7)
     99 	mfhi	($10,$10,$7)
    100 	addu	$11,$1
    101 	addu	$2,$10
    102 	sltu	$1,$11,$1
    103 	sw	$11,-4($4)
    104 	.set	noreorder
    105 	bgtz	$8,.L_bn_mul_add_words_loop
    106 	addu	$2,$1
    107 
    108 	beqz	$6,.L_bn_mul_add_words_return
    109 	nop
    110 
    111 .L_bn_mul_add_words_tail:
    112 	.set	reorder
    113 	lw	$12,0($5)
    114 	multu	($12,$7)
    115 	lw	$13,0($4)
    116 	subu	$6,1
    117 	addu	$13,$2
    118 	sltu	$2,$13,$2
    119 	mflo	($1,$12,$7)
    120 	mfhi	($12,$12,$7)
    121 	addu	$13,$1
    122 	addu	$2,$12
    123 	sltu	$1,$13,$1
    124 	sw	$13,0($4)
    125 	addu	$2,$1
    126 	beqz	$6,.L_bn_mul_add_words_return
    127 
    128 	lw	$12,4($5)
    129 	multu	($12,$7)
    130 	lw	$13,4($4)
    131 	subu	$6,1
    132 	addu	$13,$2
    133 	sltu	$2,$13,$2
    134 	mflo	($1,$12,$7)
    135 	mfhi	($12,$12,$7)
    136 	addu	$13,$1
    137 	addu	$2,$12
    138 	sltu	$1,$13,$1
    139 	sw	$13,4($4)
    140 	addu	$2,$1
    141 	beqz	$6,.L_bn_mul_add_words_return
    142 
    143 	lw	$12,2*4($5)
    144 	multu	($12,$7)
    145 	lw	$13,2*4($4)
    146 	addu	$13,$2
    147 	sltu	$2,$13,$2
    148 	mflo	($1,$12,$7)
    149 	mfhi	($12,$12,$7)
    150 	addu	$13,$1
    151 	addu	$2,$12
    152 	sltu	$1,$13,$1
    153 	sw	$13,2*4($4)
    154 	addu	$2,$1
    155 
    156 .L_bn_mul_add_words_return:
    157 	.set	noreorder
    158 	jr	$31
    159 	move	$4,$2
    160 .end	bn_mul_add_words_internal
    161 
    162 .align	5
    163 .globl	bn_mul_words
    164 .ent	bn_mul_words
    165 bn_mul_words:
    166 	.set	noreorder
    167 	bgtz	$6,bn_mul_words_internal
    168 	move	$2,$0
    169 	jr	$31
    170 	move	$4,$2
    171 .end	bn_mul_words
    172 
    173 .align	5
    174 .ent	bn_mul_words_internal
    175 bn_mul_words_internal:
    176 	.set	reorder
    177 	li	$3,-4
    178 	and	$8,$6,$3
    179 	beqz	$8,.L_bn_mul_words_tail
    180 
    181 .L_bn_mul_words_loop:
    182 	lw	$12,0($5)
    183 	multu	($12,$7)
    184 	lw	$14,4($5)
    185 	lw	$8,2*4($5)
    186 	lw	$10,3*4($5)
    187 	mflo	($1,$12,$7)
    188 	mfhi	($12,$12,$7)
    189 	addu	$2,$1
    190 	sltu	$13,$2,$1
    191 	 multu	($14,$7)
    192 	sw	$2,0($4)
    193 	addu	$2,$13,$12
    194 
    195 	subu	$6,4
    196 	addu $4,4*4
    197 	addu $5,4*4
    198 	mflo	($1,$14,$7)
    199 	mfhi	($14,$14,$7)
    200 	addu	$2,$1
    201 	sltu	$15,$2,$1
    202 	 multu	($8,$7)
    203 	sw	$2,-3*4($4)
    204 	addu	$2,$15,$14
    205 
    206 	mflo	($1,$8,$7)
    207 	mfhi	($8,$8,$7)
    208 	addu	$2,$1
    209 	sltu	$9,$2,$1
    210 	 multu	($10,$7)
    211 	sw	$2,-2*4($4)
    212 	addu	$2,$9,$8
    213 
    214 	and	$8,$6,$3
    215 	mflo	($1,$10,$7)
    216 	mfhi	($10,$10,$7)
    217 	addu	$2,$1
    218 	sltu	$11,$2,$1
    219 	sw	$2,-4($4)
    220 	.set	noreorder
    221 	bgtz	$8,.L_bn_mul_words_loop
    222 	addu	$2,$11,$10
    223 
    224 	beqz	$6,.L_bn_mul_words_return
    225 	nop
    226 
    227 .L_bn_mul_words_tail:
    228 	.set	reorder
    229 	lw	$12,0($5)
    230 	multu	($12,$7)
    231 	subu	$6,1
    232 	mflo	($1,$12,$7)
    233 	mfhi	($12,$12,$7)
    234 	addu	$2,$1
    235 	sltu	$13,$2,$1
    236 	sw	$2,0($4)
    237 	addu	$2,$13,$12
    238 	beqz	$6,.L_bn_mul_words_return
    239 
    240 	lw	$12,4($5)
    241 	multu	($12,$7)
    242 	subu	$6,1
    243 	mflo	($1,$12,$7)
    244 	mfhi	($12,$12,$7)
    245 	addu	$2,$1
    246 	sltu	$13,$2,$1
    247 	sw	$2,4($4)
    248 	addu	$2,$13,$12
    249 	beqz	$6,.L_bn_mul_words_return
    250 
    251 	lw	$12,2*4($5)
    252 	multu	($12,$7)
    253 	mflo	($1,$12,$7)
    254 	mfhi	($12,$12,$7)
    255 	addu	$2,$1
    256 	sltu	$13,$2,$1
    257 	sw	$2,2*4($4)
    258 	addu	$2,$13,$12
    259 
    260 .L_bn_mul_words_return:
    261 	.set	noreorder
    262 	jr	$31
    263 	move	$4,$2
    264 .end	bn_mul_words_internal
    265 
    266 .align	5
    267 .globl	bn_sqr_words
    268 .ent	bn_sqr_words
    269 bn_sqr_words:
    270 	.set	noreorder
    271 	bgtz	$6,bn_sqr_words_internal
    272 	move	$2,$0
    273 	jr	$31
    274 	move	$4,$2
    275 .end	bn_sqr_words
    276 
    277 .align	5
    278 .ent	bn_sqr_words_internal
    279 bn_sqr_words_internal:
    280 	.set	reorder
    281 	li	$3,-4
    282 	and	$8,$6,$3
    283 	beqz	$8,.L_bn_sqr_words_tail
    284 
    285 .L_bn_sqr_words_loop:
    286 	lw	$12,0($5)
    287 	multu	($12,$12)
    288 	lw	$14,4($5)
    289 	lw	$8,2*4($5)
    290 	lw	$10,3*4($5)
    291 	mflo	($13,$12,$12)
    292 	mfhi	($12,$12,$12)
    293 	sw	$13,0($4)
    294 	sw	$12,4($4)
    295 
    296 	multu	($14,$14)
    297 	subu	$6,4
    298 	addu $4,8*4
    299 	addu $5,4*4
    300 	mflo	($15,$14,$14)
    301 	mfhi	($14,$14,$14)
    302 	sw	$15,-6*4($4)
    303 	sw	$14,-5*4($4)
    304 
    305 	multu	($8,$8)
    306 	mflo	($9,$8,$8)
    307 	mfhi	($8,$8,$8)
    308 	sw	$9,-4*4($4)
    309 	sw	$8,-3*4($4)
    310 
    311 
    312 	multu	($10,$10)
    313 	and	$8,$6,$3
    314 	mflo	($11,$10,$10)
    315 	mfhi	($10,$10,$10)
    316 	sw	$11,-2*4($4)
    317 
    318 	.set	noreorder
    319 	bgtz	$8,.L_bn_sqr_words_loop
    320 	sw	$10,-4($4)
    321 
    322 	beqz	$6,.L_bn_sqr_words_return
    323 	nop
    324 
    325 .L_bn_sqr_words_tail:
    326 	.set	reorder
    327 	lw	$12,0($5)
    328 	multu	($12,$12)
    329 	subu	$6,1
    330 	mflo	($13,$12,$12)
    331 	mfhi	($12,$12,$12)
    332 	sw	$13,0($4)
    333 	sw	$12,4($4)
    334 	beqz	$6,.L_bn_sqr_words_return
    335 
    336 	lw	$12,4($5)
    337 	multu	($12,$12)
    338 	subu	$6,1
    339 	mflo	($13,$12,$12)
    340 	mfhi	($12,$12,$12)
    341 	sw	$13,2*4($4)
    342 	sw	$12,3*4($4)
    343 	beqz	$6,.L_bn_sqr_words_return
    344 
    345 	lw	$12,2*4($5)
    346 	multu	($12,$12)
    347 	mflo	($13,$12,$12)
    348 	mfhi	($12,$12,$12)
    349 	sw	$13,4*4($4)
    350 	sw	$12,5*4($4)
    351 
    352 .L_bn_sqr_words_return:
    353 	.set	noreorder
    354 	jr	$31
    355 	move	$4,$2
    356 
    357 .end	bn_sqr_words_internal
    358 
    359 .align	5
    360 .globl	bn_add_words
    361 .ent	bn_add_words
    362 bn_add_words:
    363 	.set	noreorder
    364 	bgtz	$7,bn_add_words_internal
    365 	move	$2,$0
    366 	jr	$31
    367 	move	$4,$2
    368 .end	bn_add_words
    369 
    370 .align	5
    371 .ent	bn_add_words_internal
    372 bn_add_words_internal:
    373 	.set	reorder
    374 	li	$3,-4
    375 	and	$1,$7,$3
    376 	beqz	$1,.L_bn_add_words_tail
    377 
    378 .L_bn_add_words_loop:
    379 	lw	$12,0($5)
    380 	lw	$8,0($6)
    381 	subu	$7,4
    382 	lw	$13,4($5)
    383 	and	$1,$7,$3
    384 	lw	$14,2*4($5)
    385 	addu $6,4*4
    386 	lw	$15,3*4($5)
    387 	addu $4,4*4
    388 	lw	$9,-3*4($6)
    389 	addu $5,4*4
    390 	lw	$10,-2*4($6)
    391 	lw	$11,-4($6)
    392 	addu	$8,$12
    393 	sltu	$24,$8,$12
    394 	addu	$12,$8,$2
    395 	sltu	$2,$12,$8
    396 	sw	$12,-4*4($4)
    397 	addu	$2,$24
    398 
    399 	addu	$9,$13
    400 	sltu	$25,$9,$13
    401 	addu	$13,$9,$2
    402 	sltu	$2,$13,$9
    403 	sw	$13,-3*4($4)
    404 	addu	$2,$25
    405 
    406 	addu	$10,$14
    407 	sltu	$24,$10,$14
    408 	addu	$14,$10,$2
    409 	sltu	$2,$14,$10
    410 	sw	$14,-2*4($4)
    411 	addu	$2,$24
    412 
    413 	addu	$11,$15
    414 	sltu	$25,$11,$15
    415 	addu	$15,$11,$2
    416 	sltu	$2,$15,$11
    417 	sw	$15,-4($4)
    418 
    419 	.set	noreorder
    420 	bgtz	$1,.L_bn_add_words_loop
    421 	addu	$2,$25
    422 
    423 	beqz	$7,.L_bn_add_words_return
    424 	nop
    425 
    426 .L_bn_add_words_tail:
    427 	.set	reorder
    428 	lw	$12,0($5)
    429 	lw	$8,0($6)
    430 	addu	$8,$12
    431 	subu	$7,1
    432 	sltu	$24,$8,$12
    433 	addu	$12,$8,$2
    434 	sltu	$2,$12,$8
    435 	sw	$12,0($4)
    436 	addu	$2,$24
    437 	beqz	$7,.L_bn_add_words_return
    438 
    439 	lw	$13,4($5)
    440 	lw	$9,4($6)
    441 	addu	$9,$13
    442 	subu	$7,1
    443 	sltu	$25,$9,$13
    444 	addu	$13,$9,$2
    445 	sltu	$2,$13,$9
    446 	sw	$13,4($4)
    447 	addu	$2,$25
    448 	beqz	$7,.L_bn_add_words_return
    449 
    450 	lw	$14,2*4($5)
    451 	lw	$10,2*4($6)
    452 	addu	$10,$14
    453 	sltu	$24,$10,$14
    454 	addu	$14,$10,$2
    455 	sltu	$2,$14,$10
    456 	sw	$14,2*4($4)
    457 	addu	$2,$24
    458 
    459 .L_bn_add_words_return:
    460 	.set	noreorder
    461 	jr	$31
    462 	move	$4,$2
    463 
    464 .end	bn_add_words_internal
    465 
    466 .align	5
    467 .globl	bn_sub_words
    468 .ent	bn_sub_words
    469 bn_sub_words:
    470 	.set	noreorder
    471 	bgtz	$7,bn_sub_words_internal
    472 	move	$2,$0
    473 	jr	$31
    474 	move	$4,$0
    475 .end	bn_sub_words
    476 
    477 .align	5
    478 .ent	bn_sub_words_internal
    479 bn_sub_words_internal:
    480 	.set	reorder
    481 	li	$3,-4
    482 	and	$1,$7,$3
    483 	beqz	$1,.L_bn_sub_words_tail
    484 
    485 .L_bn_sub_words_loop:
    486 	lw	$12,0($5)
    487 	lw	$8,0($6)
    488 	subu	$7,4
    489 	lw	$13,4($5)
    490 	and	$1,$7,$3
    491 	lw	$14,2*4($5)
    492 	addu $6,4*4
    493 	lw	$15,3*4($5)
    494 	addu $4,4*4
    495 	lw	$9,-3*4($6)
    496 	addu $5,4*4
    497 	lw	$10,-2*4($6)
    498 	lw	$11,-4($6)
    499 	sltu	$24,$12,$8
    500 	subu	$8,$12,$8
    501 	subu	$12,$8,$2
    502 	sgtu	$2,$12,$8
    503 	sw	$12,-4*4($4)
    504 	addu	$2,$24
    505 
    506 	sltu	$25,$13,$9
    507 	subu	$9,$13,$9
    508 	subu	$13,$9,$2
    509 	sgtu	$2,$13,$9
    510 	sw	$13,-3*4($4)
    511 	addu	$2,$25
    512 
    513 
    514 	sltu	$24,$14,$10
    515 	subu	$10,$14,$10
    516 	subu	$14,$10,$2
    517 	sgtu	$2,$14,$10
    518 	sw	$14,-2*4($4)
    519 	addu	$2,$24
    520 
    521 	sltu	$25,$15,$11
    522 	subu	$11,$15,$11
    523 	subu	$15,$11,$2
    524 	sgtu	$2,$15,$11
    525 	sw	$15,-4($4)
    526 
    527 	.set	noreorder
    528 	bgtz	$1,.L_bn_sub_words_loop
    529 	addu	$2,$25
    530 
    531 	beqz	$7,.L_bn_sub_words_return
    532 	nop
    533 
    534 .L_bn_sub_words_tail:
    535 	.set	reorder
    536 	lw	$12,0($5)
    537 	lw	$8,0($6)
    538 	subu	$7,1
    539 	sltu	$24,$12,$8
    540 	subu	$8,$12,$8
    541 	subu	$12,$8,$2
    542 	sgtu	$2,$12,$8
    543 	sw	$12,0($4)
    544 	addu	$2,$24
    545 	beqz	$7,.L_bn_sub_words_return
    546 
    547 	lw	$13,4($5)
    548 	subu	$7,1
    549 	lw	$9,4($6)
    550 	sltu	$25,$13,$9
    551 	subu	$9,$13,$9
    552 	subu	$13,$9,$2
    553 	sgtu	$2,$13,$9
    554 	sw	$13,4($4)
    555 	addu	$2,$25
    556 	beqz	$7,.L_bn_sub_words_return
    557 
    558 	lw	$14,2*4($5)
    559 	lw	$10,2*4($6)
    560 	sltu	$24,$14,$10
    561 	subu	$10,$14,$10
    562 	subu	$14,$10,$2
    563 	sgtu	$2,$14,$10
    564 	sw	$14,2*4($4)
    565 	addu	$2,$24
    566 
    567 .L_bn_sub_words_return:
    568 	.set	noreorder
    569 	jr	$31
    570 	move	$4,$2
    571 .end	bn_sub_words_internal
    572 
    573 #if 0
    574 /*
    575  * The bn_div_3_words entry point is re-used for constant-time interface.
    576  * Implementation is retained as historical reference.
    577  */
    578 .align 5
    579 .globl	bn_div_3_words
    580 .ent	bn_div_3_words
    581 bn_div_3_words:
    582 	.set	noreorder
    583 	move	$7,$4		# we know that bn_div_words does not
    584 				# touch $7, $10, $11 and preserves $6
    585 				# so that we can save two arguments
    586 				# and return address in registers
    587 				# instead of stack:-)
    588 
    589 	lw	$4,($7)
    590 	move	$10,$5
    591 	bne	$4,$6,bn_div_3_words_internal
    592 	 lw	$5,-4($7)
    593 	li	$2,-1
    594 	jr	$31
    595 	move	$4,$2
    596 .end	bn_div_3_words
    597 
    598 .align	5
    599 .ent	bn_div_3_words_internal
    600 bn_div_3_words_internal:
    601 	.set	reorder
    602 	move	$11,$31
    603 	bal	bn_div_words_internal
    604 	move	$31,$11
    605 	multu	($10,$2)
    606 	lw	$14,-2*4($7)
    607 	move	$8,$0
    608 	mfhi	($13,$10,$2)
    609 	mflo	($12,$10,$2)
    610 	sltu	$24,$13,$5
    611 .L_bn_div_3_words_inner_loop:
    612 	bnez	$24,.L_bn_div_3_words_inner_loop_done
    613 	sgeu	$1,$14,$12
    614 	seq	$25,$13,$5
    615 	and	$1,$25
    616 	sltu	$15,$12,$10
    617 	addu	$5,$6
    618 	subu	$13,$15
    619 	subu	$12,$10
    620 	sltu	$24,$13,$5
    621 	sltu	$8,$5,$6
    622 	or	$24,$8
    623 	.set	noreorder
    624 	beqz	$1,.L_bn_div_3_words_inner_loop
    625 	subu	$2,1
    626 	addu	$2,1
    627 	.set	reorder
    628 .L_bn_div_3_words_inner_loop_done:
    629 	.set	noreorder
    630 	jr	$31
    631 	move	$4,$2
    632 .end	bn_div_3_words_internal
    633 #endif
    634 
    635 .align	5
    636 .globl	bn_div_words
    637 .ent	bn_div_words
    638 bn_div_words:
    639 	.set	noreorder
    640 	bnez	$6,bn_div_words_internal
    641 	li	$2,-1		# I would rather signal div-by-zero
    642 				# which can be done with 'break 7'
    643 	jr	$31
    644 	move	$4,$2
    645 .end	bn_div_words
    646 
    647 .align	5
    648 .ent	bn_div_words_internal
    649 bn_div_words_internal:
    650 	move	$3,$0
    651 	bltz	$6,.L_bn_div_words_body
    652 	move	$25,$3
    653 	sll	$6,1
    654 	bgtz	$6,.-4
    655 	addu	$25,1
    656 
    657 	.set	reorder
    658 	negu	$13,$25
    659 	li	$14,-1
    660 	sll	$14,$13
    661 	and	$14,$4
    662 	srl	$1,$5,$13
    663 	.set	noreorder
    664 	beqz	$14,.+12
    665 	nop
    666 	break	6		# signal overflow
    667 	.set	reorder
    668 	sll	$4,$25
    669 	sll	$5,$25
    670 	or	$4,$1
    671 .L_bn_div_words_body:
    672 	srl	$3,$6,4*4	# bits
    673 	sgeu	$1,$4,$6
    674 	.set	noreorder
    675 	beqz	$1,.+12
    676 	nop
    677 	subu	$4,$6
    678 	.set	reorder
    679 
    680 	li	$8,-1
    681 	srl	$9,$4,4*4	# bits
    682 	srl	$8,4*4	# q=0xffffffff
    683 	beq	$3,$9,.L_bn_div_words_skip_div1
    684 	divu	($4,$3)
    685 	mfqt	($8,$4,$3)
    686 .L_bn_div_words_skip_div1:
    687 	multu	($6,$8)
    688 	sll	$15,$4,4*4	# bits
    689 	srl	$1,$5,4*4	# bits
    690 	or	$15,$1
    691 	mflo	($12,$6,$8)
    692 	mfhi	($13,$6,$8)
    693 .L_bn_div_words_inner_loop1:
    694 	sltu	$14,$15,$12
    695 	seq	$24,$9,$13
    696 	sltu	$1,$9,$13
    697 	and	$14,$24
    698 	sltu	$2,$12,$6
    699 	or	$1,$14
    700 	.set	noreorder
    701 	beqz	$1,.L_bn_div_words_inner_loop1_done
    702 	subu	$13,$2
    703 	subu	$12,$6
    704 	b	.L_bn_div_words_inner_loop1
    705 	subu	$8,1
    706 	.set	reorder
    707 .L_bn_div_words_inner_loop1_done:
    708 
    709 	sll	$5,4*4	# bits
    710 	subu	$4,$15,$12
    711 	sll	$2,$8,4*4	# bits
    712 
    713 	li	$8,-1
    714 	srl	$9,$4,4*4	# bits
    715 	srl	$8,4*4	# q=0xffffffff
    716 	beq	$3,$9,.L_bn_div_words_skip_div2
    717 	divu	($4,$3)
    718 	mfqt	($8,$4,$3)
    719 .L_bn_div_words_skip_div2:
    720 	multu	($6,$8)
    721 	sll	$15,$4,4*4	# bits
    722 	srl	$1,$5,4*4	# bits
    723 	or	$15,$1
    724 	mflo	($12,$6,$8)
    725 	mfhi	($13,$6,$8)
    726 .L_bn_div_words_inner_loop2:
    727 	sltu	$14,$15,$12
    728 	seq	$24,$9,$13
    729 	sltu	$1,$9,$13
    730 	and	$14,$24
    731 	sltu	$3,$12,$6
    732 	or	$1,$14
    733 	.set	noreorder
    734 	beqz	$1,.L_bn_div_words_inner_loop2_done
    735 	subu	$13,$3
    736 	subu	$12,$6
    737 	b	.L_bn_div_words_inner_loop2
    738 	subu	$8,1
    739 	.set	reorder
    740 .L_bn_div_words_inner_loop2_done:
    741 
    742 	subu	$4,$15,$12
    743 	or	$2,$8
    744 	srl	$3,$4,$25	# $3 contains remainder if anybody wants it
    745 	srl	$6,$25		# restore $6
    746 
    747 	.set	noreorder
    748 	move	$5,$3
    749 	jr	$31
    750 	move	$4,$2
    751 .end	bn_div_words_internal
    752 
    753 .align	5
    754 .globl	bn_mul_comba8
    755 .ent	bn_mul_comba8
    756 bn_mul_comba8:
    757 	.set	noreorder
    758 	.frame	$29,6*4,$31
    759 	.mask	0x003f0000,-4
    760 	subu $29,6*4
    761 	sw	$21,5*4($29)
    762 	sw	$20,4*4($29)
    763 	sw	$19,3*4($29)
    764 	sw	$18,2*4($29)
    765 	sw	$17,1*4($29)
    766 	sw	$16,0*4($29)
    767 
    768 	.set	reorder
    769 	lw	$12,0($5)	# If compiled with -mips3 option on
    770 				# R5000 box assembler barks on this
    771 				# 1ine with "should not have mult/div
    772 				# as last instruction in bb (R10K
    773 				# bug)" warning. If anybody out there
    774 				# has a clue about how to circumvent
    775 				# this do send me a note.
    776 				#		<appro@fy.chalmers.se>
    777 
    778 	lw	$8,0($6)
    779 	lw	$13,4($5)
    780 	lw	$14,2*4($5)
    781 	multu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
    782 	lw	$15,3*4($5)
    783 	lw	$9,4($6)
    784 	lw	$10,2*4($6)
    785 	lw	$11,3*4($6)
    786 	mflo	($2,$12,$8)
    787 	mfhi	($3,$12,$8)
    788 
    789 	lw	$16,4*4($5)
    790 	lw	$18,5*4($5)
    791 	multu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
    792 	lw	$20,6*4($5)
    793 	lw	$5,7*4($5)
    794 	lw	$17,4*4($6)
    795 	lw	$19,5*4($6)
    796 	mflo	($24,$12,$9)
    797 	mfhi	($25,$12,$9)
    798 	addu	$3,$24
    799 	sltu	$1,$3,$24
    800 	multu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
    801 	addu	$7,$25,$1
    802 	lw	$21,6*4($6)
    803 	lw	$6,7*4($6)
    804 	sw	$2,0($4)	# r[0]=c1;
    805 	mflo	($24,$13,$8)
    806 	mfhi	($25,$13,$8)
    807 	addu	$3,$24
    808 	sltu	$1,$3,$24
    809 	 multu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
    810 	addu	$25,$1
    811 	addu	$7,$25
    812 	sltu	$2,$7,$25
    813 	sw	$3,4($4)	# r[1]=c2;
    814 
    815 	mflo	($24,$14,$8)
    816 	mfhi	($25,$14,$8)
    817 	addu	$7,$24
    818 	sltu	$1,$7,$24
    819 	multu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
    820 	addu	$25,$1
    821 	addu	$2,$25
    822 	mflo	($24,$13,$9)
    823 	mfhi	($25,$13,$9)
    824 	addu	$7,$24
    825 	sltu	$1,$7,$24
    826 	multu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
    827 	addu	$25,$1
    828 	addu	$2,$25
    829 	sltu	$3,$2,$25
    830 	mflo	($24,$12,$10)
    831 	mfhi	($25,$12,$10)
    832 	addu	$7,$24
    833 	sltu	$1,$7,$24
    834 	 multu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
    835 	addu	$25,$1
    836 	addu	$2,$25
    837 	sltu	$1,$2,$25
    838 	addu	$3,$1
    839 	sw	$7,2*4($4)	# r[2]=c3;
    840 
    841 	mflo	($24,$12,$11)
    842 	mfhi	($25,$12,$11)
    843 	addu	$2,$24
    844 	sltu	$1,$2,$24
    845 	multu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
    846 	addu	$25,$1
    847 	addu	$3,$25
    848 	sltu	$7,$3,$25
    849 	mflo	($24,$13,$10)
    850 	mfhi	($25,$13,$10)
    851 	addu	$2,$24
    852 	sltu	$1,$2,$24
    853 	multu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
    854 	addu	$25,$1
    855 	addu	$3,$25
    856 	sltu	$1,$3,$25
    857 	addu	$7,$1
    858 	mflo	($24,$14,$9)
    859 	mfhi	($25,$14,$9)
    860 	addu	$2,$24
    861 	sltu	$1,$2,$24
    862 	multu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
    863 	addu	$25,$1
    864 	addu	$3,$25
    865 	sltu	$1,$3,$25
    866 	addu	$7,$1
    867 	mflo	($24,$15,$8)
    868 	mfhi	($25,$15,$8)
    869 	addu	$2,$24
    870 	sltu	$1,$2,$24
    871 	 multu	($16,$8)		# mul_add_c(a[4],b[0],c2,c3,c1);
    872 	addu	$25,$1
    873 	addu	$3,$25
    874 	sltu	$1,$3,$25
    875 	addu	$7,$1
    876 	sw	$2,3*4($4)	# r[3]=c1;
    877 
    878 	mflo	($24,$16,$8)
    879 	mfhi	($25,$16,$8)
    880 	addu	$3,$24
    881 	sltu	$1,$3,$24
    882 	multu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
    883 	addu	$25,$1
    884 	addu	$7,$25
    885 	sltu	$2,$7,$25
    886 	mflo	($24,$15,$9)
    887 	mfhi	($25,$15,$9)
    888 	addu	$3,$24
    889 	sltu	$1,$3,$24
    890 	multu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
    891 	addu	$25,$1
    892 	addu	$7,$25
    893 	sltu	$1,$7,$25
    894 	addu	$2,$1
    895 	mflo	($24,$14,$10)
    896 	mfhi	($25,$14,$10)
    897 	addu	$3,$24
    898 	sltu	$1,$3,$24
    899 	multu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
    900 	addu	$25,$1
    901 	addu	$7,$25
    902 	sltu	$1,$7,$25
    903 	addu	$2,$1
    904 	mflo	($24,$13,$11)
    905 	mfhi	($25,$13,$11)
    906 	addu	$3,$24
    907 	sltu	$1,$3,$24
    908 	multu	($12,$17)		# mul_add_c(a[0],b[4],c2,c3,c1);
    909 	addu	$25,$1
    910 	addu	$7,$25
    911 	sltu	$1,$7,$25
    912 	addu	$2,$1
    913 	mflo	($24,$12,$17)
    914 	mfhi	($25,$12,$17)
    915 	addu	$3,$24
    916 	sltu	$1,$3,$24
    917 	 multu	($12,$19)		# mul_add_c(a[0],b[5],c3,c1,c2);
    918 	addu	$25,$1
    919 	addu	$7,$25
    920 	sltu	$1,$7,$25
    921 	addu	$2,$1
    922 	sw	$3,4*4($4)	# r[4]=c2;
    923 
    924 	mflo	($24,$12,$19)
    925 	mfhi	($25,$12,$19)
    926 	addu	$7,$24
    927 	sltu	$1,$7,$24
    928 	multu	($13,$17)		# mul_add_c(a[1],b[4],c3,c1,c2);
    929 	addu	$25,$1
    930 	addu	$2,$25
    931 	sltu	$3,$2,$25
    932 	mflo	($24,$13,$17)
    933 	mfhi	($25,$13,$17)
    934 	addu	$7,$24
    935 	sltu	$1,$7,$24
    936 	multu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
    937 	addu	$25,$1
    938 	addu	$2,$25
    939 	sltu	$1,$2,$25
    940 	addu	$3,$1
    941 	mflo	($24,$14,$11)
    942 	mfhi	($25,$14,$11)
    943 	addu	$7,$24
    944 	sltu	$1,$7,$24
    945 	multu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
    946 	addu	$25,$1
    947 	addu	$2,$25
    948 	sltu	$1,$2,$25
    949 	addu	$3,$1
    950 	mflo	($24,$15,$10)
    951 	mfhi	($25,$15,$10)
    952 	addu	$7,$24
    953 	sltu	$1,$7,$24
    954 	multu	($16,$9)		# mul_add_c(a[4],b[1],c3,c1,c2);
    955 	addu	$25,$1
    956 	addu	$2,$25
    957 	sltu	$1,$2,$25
    958 	addu	$3,$1
    959 	mflo	($24,$16,$9)
    960 	mfhi	($25,$16,$9)
    961 	addu	$7,$24
    962 	sltu	$1,$7,$24
    963 	multu	($18,$8)		# mul_add_c(a[5],b[0],c3,c1,c2);
    964 	addu	$25,$1
    965 	addu	$2,$25
    966 	sltu	$1,$2,$25
    967 	addu	$3,$1
    968 	mflo	($24,$18,$8)
    969 	mfhi	($25,$18,$8)
    970 	addu	$7,$24
    971 	sltu	$1,$7,$24
    972 	 multu	($20,$8)		# mul_add_c(a[6],b[0],c1,c2,c3);
    973 	addu	$25,$1
    974 	addu	$2,$25
    975 	sltu	$1,$2,$25
    976 	addu	$3,$1
    977 	sw	$7,5*4($4)	# r[5]=c3;
    978 
    979 	mflo	($24,$20,$8)
    980 	mfhi	($25,$20,$8)
    981 	addu	$2,$24
    982 	sltu	$1,$2,$24
    983 	multu	($18,$9)		# mul_add_c(a[5],b[1],c1,c2,c3);
    984 	addu	$25,$1
    985 	addu	$3,$25
    986 	sltu	$7,$3,$25
    987 	mflo	($24,$18,$9)
    988 	mfhi	($25,$18,$9)
    989 	addu	$2,$24
    990 	sltu	$1,$2,$24
    991 	multu	($16,$10)		# mul_add_c(a[4],b[2],c1,c2,c3);
    992 	addu	$25,$1
    993 	addu	$3,$25
    994 	sltu	$1,$3,$25
    995 	addu	$7,$1
    996 	mflo	($24,$16,$10)
    997 	mfhi	($25,$16,$10)
    998 	addu	$2,$24
    999 	sltu	$1,$2,$24
   1000 	multu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
   1001 	addu	$25,$1
   1002 	addu	$3,$25
   1003 	sltu	$1,$3,$25
   1004 	addu	$7,$1
   1005 	mflo	($24,$15,$11)
   1006 	mfhi	($25,$15,$11)
   1007 	addu	$2,$24
   1008 	sltu	$1,$2,$24
   1009 	multu	($14,$17)		# mul_add_c(a[2],b[4],c1,c2,c3);
   1010 	addu	$25,$1
   1011 	addu	$3,$25
   1012 	sltu	$1,$3,$25
   1013 	addu	$7,$1
   1014 	mflo	($24,$14,$17)
   1015 	mfhi	($25,$14,$17)
   1016 	addu	$2,$24
   1017 	sltu	$1,$2,$24
   1018 	multu	($13,$19)		# mul_add_c(a[1],b[5],c1,c2,c3);
   1019 	addu	$25,$1
   1020 	addu	$3,$25
   1021 	sltu	$1,$3,$25
   1022 	addu	$7,$1
   1023 	mflo	($24,$13,$19)
   1024 	mfhi	($25,$13,$19)
   1025 	addu	$2,$24
   1026 	sltu	$1,$2,$24
   1027 	multu	($12,$21)		# mul_add_c(a[0],b[6],c1,c2,c3);
   1028 	addu	$25,$1
   1029 	addu	$3,$25
   1030 	sltu	$1,$3,$25
   1031 	addu	$7,$1
   1032 	mflo	($24,$12,$21)
   1033 	mfhi	($25,$12,$21)
   1034 	addu	$2,$24
   1035 	sltu	$1,$2,$24
   1036 	 multu	($12,$6)		# mul_add_c(a[0],b[7],c2,c3,c1);
   1037 	addu	$25,$1
   1038 	addu	$3,$25
   1039 	sltu	$1,$3,$25
   1040 	addu	$7,$1
   1041 	sw	$2,6*4($4)	# r[6]=c1;
   1042 
   1043 	mflo	($24,$12,$6)
   1044 	mfhi	($25,$12,$6)
   1045 	addu	$3,$24
   1046 	sltu	$1,$3,$24
   1047 	multu	($13,$21)		# mul_add_c(a[1],b[6],c2,c3,c1);
   1048 	addu	$25,$1
   1049 	addu	$7,$25
   1050 	sltu	$2,$7,$25
   1051 	mflo	($24,$13,$21)
   1052 	mfhi	($25,$13,$21)
   1053 	addu	$3,$24
   1054 	sltu	$1,$3,$24
   1055 	multu	($14,$19)		# mul_add_c(a[2],b[5],c2,c3,c1);
   1056 	addu	$25,$1
   1057 	addu	$7,$25
   1058 	sltu	$1,$7,$25
   1059 	addu	$2,$1
   1060 	mflo	($24,$14,$19)
   1061 	mfhi	($25,$14,$19)
   1062 	addu	$3,$24
   1063 	sltu	$1,$3,$24
   1064 	multu	($15,$17)		# mul_add_c(a[3],b[4],c2,c3,c1);
   1065 	addu	$25,$1
   1066 	addu	$7,$25
   1067 	sltu	$1,$7,$25
   1068 	addu	$2,$1
   1069 	mflo	($24,$15,$17)
   1070 	mfhi	($25,$15,$17)
   1071 	addu	$3,$24
   1072 	sltu	$1,$3,$24
   1073 	multu	($16,$11)		# mul_add_c(a[4],b[3],c2,c3,c1);
   1074 	addu	$25,$1
   1075 	addu	$7,$25
   1076 	sltu	$1,$7,$25
   1077 	addu	$2,$1
   1078 	mflo	($24,$16,$11)
   1079 	mfhi	($25,$16,$11)
   1080 	addu	$3,$24
   1081 	sltu	$1,$3,$24
   1082 	multu	($18,$10)		# mul_add_c(a[5],b[2],c2,c3,c1);
   1083 	addu	$25,$1
   1084 	addu	$7,$25
   1085 	sltu	$1,$7,$25
   1086 	addu	$2,$1
   1087 	mflo	($24,$18,$10)
   1088 	mfhi	($25,$18,$10)
   1089 	addu	$3,$24
   1090 	sltu	$1,$3,$24
   1091 	multu	($20,$9)		# mul_add_c(a[6],b[1],c2,c3,c1);
   1092 	addu	$25,$1
   1093 	addu	$7,$25
   1094 	sltu	$1,$7,$25
   1095 	addu	$2,$1
   1096 	mflo	($24,$20,$9)
   1097 	mfhi	($25,$20,$9)
   1098 	addu	$3,$24
   1099 	sltu	$1,$3,$24
   1100 	multu	($5,$8)		# mul_add_c(a[7],b[0],c2,c3,c1);
   1101 	addu	$25,$1
   1102 	addu	$7,$25
   1103 	sltu	$1,$7,$25
   1104 	addu	$2,$1
   1105 	mflo	($24,$5,$8)
   1106 	mfhi	($25,$5,$8)
   1107 	addu	$3,$24
   1108 	sltu	$1,$3,$24
   1109 	 multu	($5,$9)		# mul_add_c(a[7],b[1],c3,c1,c2);
   1110 	addu	$25,$1
   1111 	addu	$7,$25
   1112 	sltu	$1,$7,$25
   1113 	addu	$2,$1
   1114 	sw	$3,7*4($4)	# r[7]=c2;
   1115 
   1116 	mflo	($24,$5,$9)
   1117 	mfhi	($25,$5,$9)
   1118 	addu	$7,$24
   1119 	sltu	$1,$7,$24
   1120 	multu	($20,$10)		# mul_add_c(a[6],b[2],c3,c1,c2);
   1121 	addu	$25,$1
   1122 	addu	$2,$25
   1123 	sltu	$3,$2,$25
   1124 	mflo	($24,$20,$10)
   1125 	mfhi	($25,$20,$10)
   1126 	addu	$7,$24
   1127 	sltu	$1,$7,$24
   1128 	multu	($18,$11)		# mul_add_c(a[5],b[3],c3,c1,c2);
   1129 	addu	$25,$1
   1130 	addu	$2,$25
   1131 	sltu	$1,$2,$25
   1132 	addu	$3,$1
   1133 	mflo	($24,$18,$11)
   1134 	mfhi	($25,$18,$11)
   1135 	addu	$7,$24
   1136 	sltu	$1,$7,$24
   1137 	multu	($16,$17)		# mul_add_c(a[4],b[4],c3,c1,c2);
   1138 	addu	$25,$1
   1139 	addu	$2,$25
   1140 	sltu	$1,$2,$25
   1141 	addu	$3,$1
   1142 	mflo	($24,$16,$17)
   1143 	mfhi	($25,$16,$17)
   1144 	addu	$7,$24
   1145 	sltu	$1,$7,$24
   1146 	multu	($15,$19)		# mul_add_c(a[3],b[5],c3,c1,c2);
   1147 	addu	$25,$1
   1148 	addu	$2,$25
   1149 	sltu	$1,$2,$25
   1150 	addu	$3,$1
   1151 	mflo	($24,$15,$19)
   1152 	mfhi	($25,$15,$19)
   1153 	addu	$7,$24
   1154 	sltu	$1,$7,$24
   1155 	multu	($14,$21)		# mul_add_c(a[2],b[6],c3,c1,c2);
   1156 	addu	$25,$1
   1157 	addu	$2,$25
   1158 	sltu	$1,$2,$25
   1159 	addu	$3,$1
   1160 	mflo	($24,$14,$21)
   1161 	mfhi	($25,$14,$21)
   1162 	addu	$7,$24
   1163 	sltu	$1,$7,$24
   1164 	multu	($13,$6)		# mul_add_c(a[1],b[7],c3,c1,c2);
   1165 	addu	$25,$1
   1166 	addu	$2,$25
   1167 	sltu	$1,$2,$25
   1168 	addu	$3,$1
   1169 	mflo	($24,$13,$6)
   1170 	mfhi	($25,$13,$6)
   1171 	addu	$7,$24
   1172 	sltu	$1,$7,$24
   1173 	 multu	($14,$6)		# mul_add_c(a[2],b[7],c1,c2,c3);
   1174 	addu	$25,$1
   1175 	addu	$2,$25
   1176 	sltu	$1,$2,$25
   1177 	addu	$3,$1
   1178 	sw	$7,8*4($4)	# r[8]=c3;
   1179 
   1180 	mflo	($24,$14,$6)
   1181 	mfhi	($25,$14,$6)
   1182 	addu	$2,$24
   1183 	sltu	$1,$2,$24
   1184 	multu	($15,$21)		# mul_add_c(a[3],b[6],c1,c2,c3);
   1185 	addu	$25,$1
   1186 	addu	$3,$25
   1187 	sltu	$7,$3,$25
   1188 	mflo	($24,$15,$21)
   1189 	mfhi	($25,$15,$21)
   1190 	addu	$2,$24
   1191 	sltu	$1,$2,$24
   1192 	multu	($16,$19)		# mul_add_c(a[4],b[5],c1,c2,c3);
   1193 	addu	$25,$1
   1194 	addu	$3,$25
   1195 	sltu	$1,$3,$25
   1196 	addu	$7,$1
   1197 	mflo	($24,$16,$19)
   1198 	mfhi	($25,$16,$19)
   1199 	addu	$2,$24
   1200 	sltu	$1,$2,$24
   1201 	multu	($18,$17)		# mul_add_c(a[5],b[4],c1,c2,c3);
   1202 	addu	$25,$1
   1203 	addu	$3,$25
   1204 	sltu	$1,$3,$25
   1205 	addu	$7,$1
   1206 	mflo	($24,$18,$17)
   1207 	mfhi	($25,$18,$17)
   1208 	addu	$2,$24
   1209 	sltu	$1,$2,$24
   1210 	multu	($20,$11)		# mul_add_c(a[6],b[3],c1,c2,c3);
   1211 	addu	$25,$1
   1212 	addu	$3,$25
   1213 	sltu	$1,$3,$25
   1214 	addu	$7,$1
   1215 	mflo	($24,$20,$11)
   1216 	mfhi	($25,$20,$11)
   1217 	addu	$2,$24
   1218 	sltu	$1,$2,$24
   1219 	multu	($5,$10)		# mul_add_c(a[7],b[2],c1,c2,c3);
   1220 	addu	$25,$1
   1221 	addu	$3,$25
   1222 	sltu	$1,$3,$25
   1223 	addu	$7,$1
   1224 	mflo	($24,$5,$10)
   1225 	mfhi	($25,$5,$10)
   1226 	addu	$2,$24
   1227 	sltu	$1,$2,$24
   1228 	 multu	($5,$11)		# mul_add_c(a[7],b[3],c2,c3,c1);
   1229 	addu	$25,$1
   1230 	addu	$3,$25
   1231 	sltu	$1,$3,$25
   1232 	addu	$7,$1
   1233 	sw	$2,9*4($4)	# r[9]=c1;
   1234 
   1235 	mflo	($24,$5,$11)
   1236 	mfhi	($25,$5,$11)
   1237 	addu	$3,$24
   1238 	sltu	$1,$3,$24
   1239 	multu	($20,$17)		# mul_add_c(a[6],b[4],c2,c3,c1);
   1240 	addu	$25,$1
   1241 	addu	$7,$25
   1242 	sltu	$2,$7,$25
   1243 	mflo	($24,$20,$17)
   1244 	mfhi	($25,$20,$17)
   1245 	addu	$3,$24
   1246 	sltu	$1,$3,$24
   1247 	multu	($18,$19)		# mul_add_c(a[5],b[5],c2,c3,c1);
   1248 	addu	$25,$1
   1249 	addu	$7,$25
   1250 	sltu	$1,$7,$25
   1251 	addu	$2,$1
   1252 	mflo	($24,$18,$19)
   1253 	mfhi	($25,$18,$19)
   1254 	addu	$3,$24
   1255 	sltu	$1,$3,$24
   1256 	multu	($16,$21)		# mul_add_c(a[4],b[6],c2,c3,c1);
   1257 	addu	$25,$1
   1258 	addu	$7,$25
   1259 	sltu	$1,$7,$25
   1260 	addu	$2,$1
   1261 	mflo	($24,$16,$21)
   1262 	mfhi	($25,$16,$21)
   1263 	addu	$3,$24
   1264 	sltu	$1,$3,$24
   1265 	multu	($15,$6)		# mul_add_c(a[3],b[7],c2,c3,c1);
   1266 	addu	$25,$1
   1267 	addu	$7,$25
   1268 	sltu	$1,$7,$25
   1269 	addu	$2,$1
   1270 	mflo	($24,$15,$6)
   1271 	mfhi	($25,$15,$6)
   1272 	addu	$3,$24
   1273 	sltu	$1,$3,$24
   1274 	multu	($16,$6)		# mul_add_c(a[4],b[7],c3,c1,c2);
   1275 	addu	$25,$1
   1276 	addu	$7,$25
   1277 	sltu	$1,$7,$25
   1278 	addu	$2,$1
   1279 	sw	$3,10*4($4)	# r[10]=c2;
   1280 
   1281 	mflo	($24,$16,$6)
   1282 	mfhi	($25,$16,$6)
   1283 	addu	$7,$24
   1284 	sltu	$1,$7,$24
   1285 	multu	($18,$21)		# mul_add_c(a[5],b[6],c3,c1,c2);
   1286 	addu	$25,$1
   1287 	addu	$2,$25
   1288 	sltu	$3,$2,$25
   1289 	mflo	($24,$18,$21)
   1290 	mfhi	($25,$18,$21)
   1291 	addu	$7,$24
   1292 	sltu	$1,$7,$24
   1293 	multu	($20,$19)		# mul_add_c(a[6],b[5],c3,c1,c2);
   1294 	addu	$25,$1
   1295 	addu	$2,$25
   1296 	sltu	$1,$2,$25
   1297 	addu	$3,$1
   1298 	mflo	($24,$20,$19)
   1299 	mfhi	($25,$20,$19)
   1300 	addu	$7,$24
   1301 	sltu	$1,$7,$24
   1302 	multu	($5,$17)		# mul_add_c(a[7],b[4],c3,c1,c2);
   1303 	addu	$25,$1
   1304 	addu	$2,$25
   1305 	sltu	$1,$2,$25
   1306 	addu	$3,$1
   1307 	mflo	($24,$5,$17)
   1308 	mfhi	($25,$5,$17)
   1309 	addu	$7,$24
   1310 	sltu	$1,$7,$24
   1311 	 multu	($5,$19)		# mul_add_c(a[7],b[5],c1,c2,c3);
   1312 	addu	$25,$1
   1313 	addu	$2,$25
   1314 	sltu	$1,$2,$25
   1315 	addu	$3,$1
   1316 	sw	$7,11*4($4)	# r[11]=c3;
   1317 
   1318 	mflo	($24,$5,$19)
   1319 	mfhi	($25,$5,$19)
   1320 	addu	$2,$24
   1321 	sltu	$1,$2,$24
   1322 	multu	($20,$21)		# mul_add_c(a[6],b[6],c1,c2,c3);
   1323 	addu	$25,$1
   1324 	addu	$3,$25
   1325 	sltu	$7,$3,$25
   1326 	mflo	($24,$20,$21)
   1327 	mfhi	($25,$20,$21)
   1328 	addu	$2,$24
   1329 	sltu	$1,$2,$24
   1330 	multu	($18,$6)		# mul_add_c(a[5],b[7],c1,c2,c3);
   1331 	addu	$25,$1
   1332 	addu	$3,$25
   1333 	sltu	$1,$3,$25
   1334 	addu	$7,$1
   1335 	mflo	($24,$18,$6)
   1336 	mfhi	($25,$18,$6)
   1337 	addu	$2,$24
   1338 	sltu	$1,$2,$24
   1339 	 multu	($20,$6)		# mul_add_c(a[6],b[7],c2,c3,c1);
   1340 	addu	$25,$1
   1341 	addu	$3,$25
   1342 	sltu	$1,$3,$25
   1343 	addu	$7,$1
   1344 	sw	$2,12*4($4)	# r[12]=c1;
   1345 
   1346 	mflo	($24,$20,$6)
   1347 	mfhi	($25,$20,$6)
   1348 	addu	$3,$24
   1349 	sltu	$1,$3,$24
   1350 	multu	($5,$21)		# mul_add_c(a[7],b[6],c2,c3,c1);
   1351 	addu	$25,$1
   1352 	addu	$7,$25
   1353 	sltu	$2,$7,$25
   1354 	mflo	($24,$5,$21)
   1355 	mfhi	($25,$5,$21)
   1356 	addu	$3,$24
   1357 	sltu	$1,$3,$24
   1358 	multu	($5,$6)		# mul_add_c(a[7],b[7],c3,c1,c2);
   1359 	addu	$25,$1
   1360 	addu	$7,$25
   1361 	sltu	$1,$7,$25
   1362 	addu	$2,$1
   1363 	sw	$3,13*4($4)	# r[13]=c2;
   1364 
   1365 	mflo	($24,$5,$6)
   1366 	mfhi	($25,$5,$6)
   1367 	addu	$7,$24
   1368 	sltu	$1,$7,$24
   1369 	addu	$25,$1
   1370 	addu	$2,$25
   1371 	sw	$7,14*4($4)	# r[14]=c3;
   1372 	sw	$2,15*4($4)	# r[15]=c1;
   1373 
   1374 	.set	noreorder
   1375 	lw	$21,5*4($29)
   1376 	lw	$20,4*4($29)
   1377 	lw	$19,3*4($29)
   1378 	lw	$18,2*4($29)
   1379 	lw	$17,1*4($29)
   1380 	lw	$16,0*4($29)
   1381 	jr	$31
   1382 	addu $29,6*4
   1383 .end	bn_mul_comba8
   1384 
   1385 .align	5
   1386 .globl	bn_mul_comba4
   1387 .ent	bn_mul_comba4
   1388 bn_mul_comba4:
   1389 	.set	reorder
   1390 	lw	$12,0($5)
   1391 	lw	$8,0($6)
   1392 	lw	$13,4($5)
   1393 	lw	$14,2*4($5)
   1394 	multu	($12,$8)		# mul_add_c(a[0],b[0],c1,c2,c3);
   1395 	lw	$15,3*4($5)
   1396 	lw	$9,4($6)
   1397 	lw	$10,2*4($6)
   1398 	lw	$11,3*4($6)
   1399 	mflo	($2,$12,$8)
   1400 	mfhi	($3,$12,$8)
   1401 	sw	$2,0($4)
   1402 
   1403 	multu	($12,$9)		# mul_add_c(a[0],b[1],c2,c3,c1);
   1404 	mflo	($24,$12,$9)
   1405 	mfhi	($25,$12,$9)
   1406 	addu	$3,$24
   1407 	sltu	$1,$3,$24
   1408 	multu	($13,$8)		# mul_add_c(a[1],b[0],c2,c3,c1);
   1409 	addu	$7,$25,$1
   1410 	mflo	($24,$13,$8)
   1411 	mfhi	($25,$13,$8)
   1412 	addu	$3,$24
   1413 	sltu	$1,$3,$24
   1414 	 multu	($14,$8)		# mul_add_c(a[2],b[0],c3,c1,c2);
   1415 	addu	$25,$1
   1416 	addu	$7,$25
   1417 	sltu	$2,$7,$25
   1418 	sw	$3,4($4)
   1419 
   1420 	mflo	($24,$14,$8)
   1421 	mfhi	($25,$14,$8)
   1422 	addu	$7,$24
   1423 	sltu	$1,$7,$24
   1424 	multu	($13,$9)		# mul_add_c(a[1],b[1],c3,c1,c2);
   1425 	addu	$25,$1
   1426 	addu	$2,$25
   1427 	mflo	($24,$13,$9)
   1428 	mfhi	($25,$13,$9)
   1429 	addu	$7,$24
   1430 	sltu	$1,$7,$24
   1431 	multu	($12,$10)		# mul_add_c(a[0],b[2],c3,c1,c2);
   1432 	addu	$25,$1
   1433 	addu	$2,$25
   1434 	sltu	$3,$2,$25
   1435 	mflo	($24,$12,$10)
   1436 	mfhi	($25,$12,$10)
   1437 	addu	$7,$24
   1438 	sltu	$1,$7,$24
   1439 	 multu	($12,$11)		# mul_add_c(a[0],b[3],c1,c2,c3);
   1440 	addu	$25,$1
   1441 	addu	$2,$25
   1442 	sltu	$1,$2,$25
   1443 	addu	$3,$1
   1444 	sw	$7,2*4($4)
   1445 
   1446 	mflo	($24,$12,$11)
   1447 	mfhi	($25,$12,$11)
   1448 	addu	$2,$24
   1449 	sltu	$1,$2,$24
   1450 	multu	($13,$10)		# mul_add_c(a[1],b[2],c1,c2,c3);
   1451 	addu	$25,$1
   1452 	addu	$3,$25
   1453 	sltu	$7,$3,$25
   1454 	mflo	($24,$13,$10)
   1455 	mfhi	($25,$13,$10)
   1456 	addu	$2,$24
   1457 	sltu	$1,$2,$24
   1458 	multu	($14,$9)		# mul_add_c(a[2],b[1],c1,c2,c3);
   1459 	addu	$25,$1
   1460 	addu	$3,$25
   1461 	sltu	$1,$3,$25
   1462 	addu	$7,$1
   1463 	mflo	($24,$14,$9)
   1464 	mfhi	($25,$14,$9)
   1465 	addu	$2,$24
   1466 	sltu	$1,$2,$24
   1467 	multu	($15,$8)		# mul_add_c(a[3],b[0],c1,c2,c3);
   1468 	addu	$25,$1
   1469 	addu	$3,$25
   1470 	sltu	$1,$3,$25
   1471 	addu	$7,$1
   1472 	mflo	($24,$15,$8)
   1473 	mfhi	($25,$15,$8)
   1474 	addu	$2,$24
   1475 	sltu	$1,$2,$24
   1476 	 multu	($15,$9)		# mul_add_c(a[3],b[1],c2,c3,c1);
   1477 	addu	$25,$1
   1478 	addu	$3,$25
   1479 	sltu	$1,$3,$25
   1480 	addu	$7,$1
   1481 	sw	$2,3*4($4)
   1482 
   1483 	mflo	($24,$15,$9)
   1484 	mfhi	($25,$15,$9)
   1485 	addu	$3,$24
   1486 	sltu	$1,$3,$24
   1487 	multu	($14,$10)		# mul_add_c(a[2],b[2],c2,c3,c1);
   1488 	addu	$25,$1
   1489 	addu	$7,$25
   1490 	sltu	$2,$7,$25
   1491 	mflo	($24,$14,$10)
   1492 	mfhi	($25,$14,$10)
   1493 	addu	$3,$24
   1494 	sltu	$1,$3,$24
   1495 	multu	($13,$11)		# mul_add_c(a[1],b[3],c2,c3,c1);
   1496 	addu	$25,$1
   1497 	addu	$7,$25
   1498 	sltu	$1,$7,$25
   1499 	addu	$2,$1
   1500 	mflo	($24,$13,$11)
   1501 	mfhi	($25,$13,$11)
   1502 	addu	$3,$24
   1503 	sltu	$1,$3,$24
   1504 	 multu	($14,$11)		# mul_add_c(a[2],b[3],c3,c1,c2);
   1505 	addu	$25,$1
   1506 	addu	$7,$25
   1507 	sltu	$1,$7,$25
   1508 	addu	$2,$1
   1509 	sw	$3,4*4($4)
   1510 
   1511 	mflo	($24,$14,$11)
   1512 	mfhi	($25,$14,$11)
   1513 	addu	$7,$24
   1514 	sltu	$1,$7,$24
   1515 	multu	($15,$10)		# mul_add_c(a[3],b[2],c3,c1,c2);
   1516 	addu	$25,$1
   1517 	addu	$2,$25
   1518 	sltu	$3,$2,$25
   1519 	mflo	($24,$15,$10)
   1520 	mfhi	($25,$15,$10)
   1521 	addu	$7,$24
   1522 	sltu	$1,$7,$24
   1523 	 multu	($15,$11)		# mul_add_c(a[3],b[3],c1,c2,c3);
   1524 	addu	$25,$1
   1525 	addu	$2,$25
   1526 	sltu	$1,$2,$25
   1527 	addu	$3,$1
   1528 	sw	$7,5*4($4)
   1529 
   1530 	mflo	($24,$15,$11)
   1531 	mfhi	($25,$15,$11)
   1532 	addu	$2,$24
   1533 	sltu	$1,$2,$24
   1534 	addu	$25,$1
   1535 	addu	$3,$25
   1536 	sw	$2,6*4($4)
   1537 	sw	$3,7*4($4)
   1538 
   1539 	.set	noreorder
   1540 	jr	$31
   1541 	nop
   1542 .end	bn_mul_comba4
   1543 
   1544 .align	5
   1545 .globl	bn_sqr_comba8
   1546 .ent	bn_sqr_comba8
   1547 bn_sqr_comba8:
   1548 	.set	reorder
   1549 	lw	$12,0($5)
   1550 	lw	$13,4($5)
   1551 	lw	$14,2*4($5)
   1552 	lw	$15,3*4($5)
   1553 
   1554 	multu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
   1555 	lw	$8,4*4($5)
   1556 	lw	$9,5*4($5)
   1557 	lw	$10,6*4($5)
   1558 	lw	$11,7*4($5)
   1559 	mflo	($2,$12,$12)
   1560 	mfhi	($3,$12,$12)
   1561 	sw	$2,0($4)
   1562 
   1563 	multu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
   1564 	mflo	($24,$12,$13)
   1565 	mfhi	($25,$12,$13)
   1566 	slt	$2,$25,$0
   1567 	sll	$25,1
   1568 	 multu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
   1569 	slt	$6,$24,$0
   1570 	addu	$25,$6
   1571 	sll	$24,1
   1572 	addu	$3,$24
   1573 	sltu	$1,$3,$24
   1574 	addu	$7,$25,$1
   1575 	sw	$3,4($4)
   1576 	mflo	($24,$14,$12)
   1577 	mfhi	($25,$14,$12)
   1578 	addu	$7,$24
   1579 	sltu	$1,$7,$24
   1580 	 multu	($13,$13)		# forward multiplication
   1581 	addu	$7,$24
   1582 	addu	$1,$25
   1583 	sltu	$24,$7,$24
   1584 	addu	$2,$1
   1585 	addu	$25,$24
   1586 	sltu	$3,$2,$1
   1587 	addu	$2,$25
   1588 	sltu	$25,$2,$25
   1589 	addu	$3,$25
   1590 	mflo	($24,$13,$13)
   1591 	mfhi	($25,$13,$13)
   1592 	addu	$7,$24
   1593 	sltu	$1,$7,$24
   1594 	 multu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
   1595 	addu	$25,$1
   1596 	addu	$2,$25
   1597 	sltu	$1,$2,$25
   1598 	addu	$3,$1
   1599 	sw	$7,2*4($4)
   1600 	mflo	($24,$12,$15)
   1601 	mfhi	($25,$12,$15)
   1602 	addu	$2,$24
   1603 	sltu	$1,$2,$24
   1604 	 multu	($13,$14)		# forward multiplication
   1605 	addu	$2,$24
   1606 	addu	$1,$25
   1607 	sltu	$24,$2,$24
   1608 	addu	$3,$1
   1609 	addu	$25,$24
   1610 	sltu	$7,$3,$1
   1611 	addu	$3,$25
   1612 	sltu	$25,$3,$25
   1613 	addu	$7,$25
   1614 	mflo	($24,$13,$14)
   1615 	mfhi	($25,$13,$14)
   1616 	addu	$2,$24
   1617 	sltu	$1,$2,$24
   1618 	 multu	($8,$12)		# forward multiplication
   1619 	addu	$2,$24
   1620 	addu	$1,$25
   1621 	sltu	$24,$2,$24
   1622 	addu	$3,$1
   1623 	addu	$25,$24
   1624 	sltu	$1,$3,$1
   1625 	addu	$3,$25
   1626 	addu	$7,$1
   1627 	sltu	$25,$3,$25
   1628 	addu	$7,$25
   1629 	mflo	($24,$8,$12)
   1630 	mfhi	($25,$8,$12)
   1631 	sw	$2,3*4($4)
   1632 	addu	$3,$24
   1633 	sltu	$1,$3,$24
   1634 	 multu	($15,$13)		# forward multiplication
   1635 	addu	$3,$24
   1636 	addu	$1,$25
   1637 	sltu	$24,$3,$24
   1638 	addu	$7,$1
   1639 	addu	$25,$24
   1640 	sltu	$2,$7,$1
   1641 	addu	$7,$25
   1642 	sltu	$25,$7,$25
   1643 	addu	$2,$25
   1644 	mflo	($24,$15,$13)
   1645 	mfhi	($25,$15,$13)
   1646 	addu	$3,$24
   1647 	sltu	$1,$3,$24
   1648 	 multu	($14,$14)		# forward multiplication
   1649 	addu	$3,$24
   1650 	addu	$1,$25
   1651 	sltu	$24,$3,$24
   1652 	addu	$7,$1
   1653 	addu	$25,$24
   1654 	sltu	$1,$7,$1
   1655 	addu	$7,$25
   1656 	addu	$2,$1
   1657 	sltu	$25,$7,$25
   1658 	addu	$2,$25
   1659 	mflo	($24,$14,$14)
   1660 	mfhi	($25,$14,$14)
   1661 	addu	$3,$24
   1662 	sltu	$1,$3,$24
   1663 	 multu	($12,$9)		# mul_add_c2(a[0],b[5],c3,c1,c2);
   1664 	addu	$25,$1
   1665 	addu	$7,$25
   1666 	sltu	$1,$7,$25
   1667 	addu	$2,$1
   1668 	sw	$3,4*4($4)
   1669 	mflo	($24,$12,$9)
   1670 	mfhi	($25,$12,$9)
   1671 	addu	$7,$24
   1672 	sltu	$1,$7,$24
   1673 	 multu	($13,$8)		# forward multiplication
   1674 	addu	$7,$24
   1675 	addu	$1,$25
   1676 	sltu	$24,$7,$24
   1677 	addu	$2,$1
   1678 	addu	$25,$24
   1679 	sltu	$3,$2,$1
   1680 	addu	$2,$25
   1681 	sltu	$25,$2,$25
   1682 	addu	$3,$25
   1683 	mflo	($24,$13,$8)
   1684 	mfhi	($25,$13,$8)
   1685 	addu	$7,$24
   1686 	sltu	$1,$7,$24
   1687 	 multu	($14,$15)		# forward multiplication
   1688 	addu	$7,$24
   1689 	addu	$1,$25
   1690 	sltu	$24,$7,$24
   1691 	addu	$2,$1
   1692 	addu	$25,$24
   1693 	sltu	$1,$2,$1
   1694 	addu	$2,$25
   1695 	addu	$3,$1
   1696 	sltu	$25,$2,$25
   1697 	addu	$3,$25
   1698 	mflo	($24,$14,$15)
   1699 	mfhi	($25,$14,$15)
   1700 	addu	$7,$24
   1701 	sltu	$1,$7,$24
   1702 	 multu	($10,$12)		# forward multiplication
   1703 	addu	$7,$24
   1704 	addu	$1,$25
   1705 	sltu	$24,$7,$24
   1706 	addu	$2,$1
   1707 	addu	$25,$24
   1708 	sltu	$1,$2,$1
   1709 	addu	$2,$25
   1710 	addu	$3,$1
   1711 	sltu	$25,$2,$25
   1712 	addu	$3,$25
   1713 	mflo	($24,$10,$12)
   1714 	mfhi	($25,$10,$12)
   1715 	sw	$7,5*4($4)
   1716 	addu	$2,$24
   1717 	sltu	$1,$2,$24
   1718 	 multu	($9,$13)		# forward multiplication
   1719 	addu	$2,$24
   1720 	addu	$1,$25
   1721 	sltu	$24,$2,$24
   1722 	addu	$3,$1
   1723 	addu	$25,$24
   1724 	sltu	$7,$3,$1
   1725 	addu	$3,$25
   1726 	sltu	$25,$3,$25
   1727 	addu	$7,$25
   1728 	mflo	($24,$9,$13)
   1729 	mfhi	($25,$9,$13)
   1730 	addu	$2,$24
   1731 	sltu	$1,$2,$24
   1732 	 multu	($8,$14)		# forward multiplication
   1733 	addu	$2,$24
   1734 	addu	$1,$25
   1735 	sltu	$24,$2,$24
   1736 	addu	$3,$1
   1737 	addu	$25,$24
   1738 	sltu	$1,$3,$1
   1739 	addu	$3,$25
   1740 	addu	$7,$1
   1741 	sltu	$25,$3,$25
   1742 	addu	$7,$25
   1743 	mflo	($24,$8,$14)
   1744 	mfhi	($25,$8,$14)
   1745 	addu	$2,$24
   1746 	sltu	$1,$2,$24
   1747 	 multu	($15,$15)		# forward multiplication
   1748 	addu	$2,$24
   1749 	addu	$1,$25
   1750 	sltu	$24,$2,$24
   1751 	addu	$3,$1
   1752 	addu	$25,$24
   1753 	sltu	$1,$3,$1
   1754 	addu	$3,$25
   1755 	addu	$7,$1
   1756 	sltu	$25,$3,$25
   1757 	addu	$7,$25
   1758 	mflo	($24,$15,$15)
   1759 	mfhi	($25,$15,$15)
   1760 	addu	$2,$24
   1761 	sltu	$1,$2,$24
   1762 	 multu	($12,$11)		# mul_add_c2(a[0],b[7],c2,c3,c1);
   1763 	addu	$25,$1
   1764 	addu	$3,$25
   1765 	sltu	$1,$3,$25
   1766 	addu	$7,$1
   1767 	sw	$2,6*4($4)
   1768 	mflo	($24,$12,$11)
   1769 	mfhi	($25,$12,$11)
   1770 	addu	$3,$24
   1771 	sltu	$1,$3,$24
   1772 	 multu	($13,$10)		# forward multiplication
   1773 	addu	$3,$24
   1774 	addu	$1,$25
   1775 	sltu	$24,$3,$24
   1776 	addu	$7,$1
   1777 	addu	$25,$24
   1778 	sltu	$2,$7,$1
   1779 	addu	$7,$25
   1780 	sltu	$25,$7,$25
   1781 	addu	$2,$25
   1782 	mflo	($24,$13,$10)
   1783 	mfhi	($25,$13,$10)
   1784 	addu	$3,$24
   1785 	sltu	$1,$3,$24
   1786 	 multu	($14,$9)		# forward multiplication
   1787 	addu	$3,$24
   1788 	addu	$1,$25
   1789 	sltu	$24,$3,$24
   1790 	addu	$7,$1
   1791 	addu	$25,$24
   1792 	sltu	$1,$7,$1
   1793 	addu	$7,$25
   1794 	addu	$2,$1
   1795 	sltu	$25,$7,$25
   1796 	addu	$2,$25
   1797 	mflo	($24,$14,$9)
   1798 	mfhi	($25,$14,$9)
   1799 	addu	$3,$24
   1800 	sltu	$1,$3,$24
   1801 	 multu	($15,$8)		# forward multiplication
   1802 	addu	$3,$24
   1803 	addu	$1,$25
   1804 	sltu	$24,$3,$24
   1805 	addu	$7,$1
   1806 	addu	$25,$24
   1807 	sltu	$1,$7,$1
   1808 	addu	$7,$25
   1809 	addu	$2,$1
   1810 	sltu	$25,$7,$25
   1811 	addu	$2,$25
   1812 	mflo	($24,$15,$8)
   1813 	mfhi	($25,$15,$8)
   1814 	addu	$3,$24
   1815 	sltu	$1,$3,$24
   1816 	 multu	($11,$13)		# forward multiplication
   1817 	addu	$3,$24
   1818 	addu	$1,$25
   1819 	sltu	$24,$3,$24
   1820 	addu	$7,$1
   1821 	addu	$25,$24
   1822 	sltu	$1,$7,$1
   1823 	addu	$7,$25
   1824 	addu	$2,$1
   1825 	sltu	$25,$7,$25
   1826 	addu	$2,$25
   1827 	mflo	($24,$11,$13)
   1828 	mfhi	($25,$11,$13)
   1829 	sw	$3,7*4($4)
   1830 	addu	$7,$24
   1831 	sltu	$1,$7,$24
   1832 	 multu	($10,$14)		# forward multiplication
   1833 	addu	$7,$24
   1834 	addu	$1,$25
   1835 	sltu	$24,$7,$24
   1836 	addu	$2,$1
   1837 	addu	$25,$24
   1838 	sltu	$3,$2,$1
   1839 	addu	$2,$25
   1840 	sltu	$25,$2,$25
   1841 	addu	$3,$25
   1842 	mflo	($24,$10,$14)
   1843 	mfhi	($25,$10,$14)
   1844 	addu	$7,$24
   1845 	sltu	$1,$7,$24
   1846 	 multu	($9,$15)		# forward multiplication
   1847 	addu	$7,$24
   1848 	addu	$1,$25
   1849 	sltu	$24,$7,$24
   1850 	addu	$2,$1
   1851 	addu	$25,$24
   1852 	sltu	$1,$2,$1
   1853 	addu	$2,$25
   1854 	addu	$3,$1
   1855 	sltu	$25,$2,$25
   1856 	addu	$3,$25
   1857 	mflo	($24,$9,$15)
   1858 	mfhi	($25,$9,$15)
   1859 	addu	$7,$24
   1860 	sltu	$1,$7,$24
   1861 	 multu	($8,$8)		# forward multiplication
   1862 	addu	$7,$24
   1863 	addu	$1,$25
   1864 	sltu	$24,$7,$24
   1865 	addu	$2,$1
   1866 	addu	$25,$24
   1867 	sltu	$1,$2,$1
   1868 	addu	$2,$25
   1869 	addu	$3,$1
   1870 	sltu	$25,$2,$25
   1871 	addu	$3,$25
   1872 	mflo	($24,$8,$8)
   1873 	mfhi	($25,$8,$8)
   1874 	addu	$7,$24
   1875 	sltu	$1,$7,$24
   1876 	 multu	($14,$11)		# mul_add_c2(a[2],b[7],c1,c2,c3);
   1877 	addu	$25,$1
   1878 	addu	$2,$25
   1879 	sltu	$1,$2,$25
   1880 	addu	$3,$1
   1881 	sw	$7,8*4($4)
   1882 	mflo	($24,$14,$11)
   1883 	mfhi	($25,$14,$11)
   1884 	addu	$2,$24
   1885 	sltu	$1,$2,$24
   1886 	 multu	($15,$10)		# forward multiplication
   1887 	addu	$2,$24
   1888 	addu	$1,$25
   1889 	sltu	$24,$2,$24
   1890 	addu	$3,$1
   1891 	addu	$25,$24
   1892 	sltu	$7,$3,$1
   1893 	addu	$3,$25
   1894 	sltu	$25,$3,$25
   1895 	addu	$7,$25
   1896 	mflo	($24,$15,$10)
   1897 	mfhi	($25,$15,$10)
   1898 	addu	$2,$24
   1899 	sltu	$1,$2,$24
   1900 	 multu	($8,$9)		# forward multiplication
   1901 	addu	$2,$24
   1902 	addu	$1,$25
   1903 	sltu	$24,$2,$24
   1904 	addu	$3,$1
   1905 	addu	$25,$24
   1906 	sltu	$1,$3,$1
   1907 	addu	$3,$25
   1908 	addu	$7,$1
   1909 	sltu	$25,$3,$25
   1910 	addu	$7,$25
   1911 	mflo	($24,$8,$9)
   1912 	mfhi	($25,$8,$9)
   1913 	addu	$2,$24
   1914 	sltu	$1,$2,$24
   1915 	 multu	($11,$15)		# forward multiplication
   1916 	addu	$2,$24
   1917 	addu	$1,$25
   1918 	sltu	$24,$2,$24
   1919 	addu	$3,$1
   1920 	addu	$25,$24
   1921 	sltu	$1,$3,$1
   1922 	addu	$3,$25
   1923 	addu	$7,$1
   1924 	sltu	$25,$3,$25
   1925 	addu	$7,$25
   1926 	mflo	($24,$11,$15)
   1927 	mfhi	($25,$11,$15)
   1928 	sw	$2,9*4($4)
   1929 	addu	$3,$24
   1930 	sltu	$1,$3,$24
   1931 	 multu	($10,$8)		# forward multiplication
   1932 	addu	$3,$24
   1933 	addu	$1,$25
   1934 	sltu	$24,$3,$24
   1935 	addu	$7,$1
   1936 	addu	$25,$24
   1937 	sltu	$2,$7,$1
   1938 	addu	$7,$25
   1939 	sltu	$25,$7,$25
   1940 	addu	$2,$25
   1941 	mflo	($24,$10,$8)
   1942 	mfhi	($25,$10,$8)
   1943 	addu	$3,$24
   1944 	sltu	$1,$3,$24
   1945 	 multu	($9,$9)		# forward multiplication
   1946 	addu	$3,$24
   1947 	addu	$1,$25
   1948 	sltu	$24,$3,$24
   1949 	addu	$7,$1
   1950 	addu	$25,$24
   1951 	sltu	$1,$7,$1
   1952 	addu	$7,$25
   1953 	addu	$2,$1
   1954 	sltu	$25,$7,$25
   1955 	addu	$2,$25
   1956 	mflo	($24,$9,$9)
   1957 	mfhi	($25,$9,$9)
   1958 	addu	$3,$24
   1959 	sltu	$1,$3,$24
   1960 	 multu	($8,$11)		# mul_add_c2(a[4],b[7],c3,c1,c2);
   1961 	addu	$25,$1
   1962 	addu	$7,$25
   1963 	sltu	$1,$7,$25
   1964 	addu	$2,$1
   1965 	sw	$3,10*4($4)
   1966 	mflo	($24,$8,$11)
   1967 	mfhi	($25,$8,$11)
   1968 	addu	$7,$24
   1969 	sltu	$1,$7,$24
   1970 	 multu	($9,$10)		# forward multiplication
   1971 	addu	$7,$24
   1972 	addu	$1,$25
   1973 	sltu	$24,$7,$24
   1974 	addu	$2,$1
   1975 	addu	$25,$24
   1976 	sltu	$3,$2,$1
   1977 	addu	$2,$25
   1978 	sltu	$25,$2,$25
   1979 	addu	$3,$25
   1980 	mflo	($24,$9,$10)
   1981 	mfhi	($25,$9,$10)
   1982 	addu	$7,$24
   1983 	sltu	$1,$7,$24
   1984 	 multu	($11,$9)		# forward multiplication
   1985 	addu	$7,$24
   1986 	addu	$1,$25
   1987 	sltu	$24,$7,$24
   1988 	addu	$2,$1
   1989 	addu	$25,$24
   1990 	sltu	$1,$2,$1
   1991 	addu	$2,$25
   1992 	addu	$3,$1
   1993 	sltu	$25,$2,$25
   1994 	addu	$3,$25
   1995 	mflo	($24,$11,$9)
   1996 	mfhi	($25,$11,$9)
   1997 	sw	$7,11*4($4)
   1998 	addu	$2,$24
   1999 	sltu	$1,$2,$24
   2000 	 multu	($10,$10)		# forward multiplication
   2001 	addu	$2,$24
   2002 	addu	$1,$25
   2003 	sltu	$24,$2,$24
   2004 	addu	$3,$1
   2005 	addu	$25,$24
   2006 	sltu	$7,$3,$1
   2007 	addu	$3,$25
   2008 	sltu	$25,$3,$25
   2009 	addu	$7,$25
   2010 	mflo	($24,$10,$10)
   2011 	mfhi	($25,$10,$10)
   2012 	addu	$2,$24
   2013 	sltu	$1,$2,$24
   2014 	 multu	($10,$11)		# mul_add_c2(a[6],b[7],c2,c3,c1);
   2015 	addu	$25,$1
   2016 	addu	$3,$25
   2017 	sltu	$1,$3,$25
   2018 	addu	$7,$1
   2019 	sw	$2,12*4($4)
   2020 	mflo	($24,$10,$11)
   2021 	mfhi	($25,$10,$11)
   2022 	addu	$3,$24
   2023 	sltu	$1,$3,$24
   2024 	 multu	($11,$11)		# forward multiplication
   2025 	addu	$3,$24
   2026 	addu	$1,$25
   2027 	sltu	$24,$3,$24
   2028 	addu	$7,$1
   2029 	addu	$25,$24
   2030 	sltu	$2,$7,$1
   2031 	addu	$7,$25
   2032 	sltu	$25,$7,$25
   2033 	addu	$2,$25
   2034 	mflo	($24,$11,$11)
   2035 	mfhi	($25,$11,$11)
   2036 	sw	$3,13*4($4)
   2037 
   2038 	addu	$7,$24
   2039 	sltu	$1,$7,$24
   2040 	addu	$25,$1
   2041 	addu	$2,$25
   2042 	sw	$7,14*4($4)
   2043 	sw	$2,15*4($4)
   2044 
   2045 	.set	noreorder
   2046 	jr	$31
   2047 	nop
   2048 .end	bn_sqr_comba8
   2049 
   2050 .align	5
   2051 .globl	bn_sqr_comba4
   2052 .ent	bn_sqr_comba4
   2053 bn_sqr_comba4:
   2054 	.set	reorder
   2055 	lw	$12,0($5)
   2056 	lw	$13,4($5)
   2057 	multu	($12,$12)		# mul_add_c(a[0],b[0],c1,c2,c3);
   2058 	lw	$14,2*4($5)
   2059 	lw	$15,3*4($5)
   2060 	mflo	($2,$12,$12)
   2061 	mfhi	($3,$12,$12)
   2062 	sw	$2,0($4)
   2063 
   2064 	multu	($12,$13)		# mul_add_c2(a[0],b[1],c2,c3,c1);
   2065 	mflo	($24,$12,$13)
   2066 	mfhi	($25,$12,$13)
   2067 	slt	$2,$25,$0
   2068 	sll	$25,1
   2069 	 multu	($14,$12)		# mul_add_c2(a[2],b[0],c3,c1,c2);
   2070 	slt	$6,$24,$0
   2071 	addu	$25,$6
   2072 	sll	$24,1
   2073 	addu	$3,$24
   2074 	sltu	$1,$3,$24
   2075 	addu	$7,$25,$1
   2076 	sw	$3,4($4)
   2077 	mflo	($24,$14,$12)
   2078 	mfhi	($25,$14,$12)
   2079 	addu	$7,$24
   2080 	sltu	$1,$7,$24
   2081 	 multu	($13,$13)		# forward multiplication
   2082 	addu	$7,$24
   2083 	addu	$1,$25
   2084 	sltu	$24,$7,$24
   2085 	addu	$2,$1
   2086 	addu	$25,$24
   2087 	sltu	$3,$2,$1
   2088 	addu	$2,$25
   2089 	sltu	$25,$2,$25
   2090 	addu	$3,$25
   2091 	mflo	($24,$13,$13)
   2092 	mfhi	($25,$13,$13)
   2093 	addu	$7,$24
   2094 	sltu	$1,$7,$24
   2095 	 multu	($12,$15)		# mul_add_c2(a[0],b[3],c1,c2,c3);
   2096 	addu	$25,$1
   2097 	addu	$2,$25
   2098 	sltu	$1,$2,$25
   2099 	addu	$3,$1
   2100 	sw	$7,2*4($4)
   2101 	mflo	($24,$12,$15)
   2102 	mfhi	($25,$12,$15)
   2103 	addu	$2,$24
   2104 	sltu	$1,$2,$24
   2105 	 multu	($13,$14)		# forward multiplication
   2106 	addu	$2,$24
   2107 	addu	$1,$25
   2108 	sltu	$24,$2,$24
   2109 	addu	$3,$1
   2110 	addu	$25,$24
   2111 	sltu	$7,$3,$1
   2112 	addu	$3,$25
   2113 	sltu	$25,$3,$25
   2114 	addu	$7,$25
   2115 	mflo	($24,$13,$14)
   2116 	mfhi	($25,$13,$14)
   2117 	addu	$2,$24
   2118 	sltu	$1,$2,$24
   2119 	 multu	($15,$13)		# forward multiplication
   2120 	addu	$2,$24
   2121 	addu	$1,$25
   2122 	sltu	$24,$2,$24
   2123 	addu	$3,$1
   2124 	addu	$25,$24
   2125 	sltu	$1,$3,$1
   2126 	addu	$3,$25
   2127 	addu	$7,$1
   2128 	sltu	$25,$3,$25
   2129 	addu	$7,$25
   2130 	mflo	($24,$15,$13)
   2131 	mfhi	($25,$15,$13)
   2132 	sw	$2,3*4($4)
   2133 	addu	$3,$24
   2134 	sltu	$1,$3,$24
   2135 	 multu	($14,$14)		# forward multiplication
   2136 	addu	$3,$24
   2137 	addu	$1,$25
   2138 	sltu	$24,$3,$24
   2139 	addu	$7,$1
   2140 	addu	$25,$24
   2141 	sltu	$2,$7,$1
   2142 	addu	$7,$25
   2143 	sltu	$25,$7,$25
   2144 	addu	$2,$25
   2145 	mflo	($24,$14,$14)
   2146 	mfhi	($25,$14,$14)
   2147 	addu	$3,$24
   2148 	sltu	$1,$3,$24
   2149 	 multu	($14,$15)		# mul_add_c2(a[2],b[3],c3,c1,c2);
   2150 	addu	$25,$1
   2151 	addu	$7,$25
   2152 	sltu	$1,$7,$25
   2153 	addu	$2,$1
   2154 	sw	$3,4*4($4)
   2155 	mflo	($24,$14,$15)
   2156 	mfhi	($25,$14,$15)
   2157 	addu	$7,$24
   2158 	sltu	$1,$7,$24
   2159 	 multu	($15,$15)		# forward multiplication
   2160 	addu	$7,$24
   2161 	addu	$1,$25
   2162 	sltu	$24,$7,$24
   2163 	addu	$2,$1
   2164 	addu	$25,$24
   2165 	sltu	$3,$2,$1
   2166 	addu	$2,$25
   2167 	sltu	$25,$2,$25
   2168 	addu	$3,$25
   2169 	mflo	($24,$15,$15)
   2170 	mfhi	($25,$15,$15)
   2171 	sw	$7,5*4($4)
   2172 
   2173 	addu	$2,$24
   2174 	sltu	$1,$2,$24
   2175 	addu	$25,$1
   2176 	addu	$3,$25
   2177 	sw	$2,6*4($4)
   2178 	sw	$3,7*4($4)
   2179 
   2180 	.set	noreorder
   2181 	jr	$31
   2182 	nop
   2183 .end	bn_sqr_comba4
   2184