mips.S revision 1.4 1 #if !(defined (__mips_isa_rev) && (__mips_isa_rev >= 6))
2 .set mips2
3 #endif
4 #include "mips_arch.h"
5
6 #if defined(_MIPS_ARCH_MIPS64R6)
7 # define ddivu(rs,rt)
8 # define mfqt(rd,rs,rt) ddivu rd,rs,rt
9 # define mfrm(rd,rs,rt) dmodu rd,rs,rt
10 #elif defined(_MIPS_ARCH_MIPS32R6)
11 # define divu(rs,rt)
12 # define mfqt(rd,rs,rt) divu rd,rs,rt
13 # define mfrm(rd,rs,rt) modu rd,rs,rt
14 #else
15 # define divu(rs,rt) divu $0,rs,rt
16 # define mfqt(rd,rs,rt) mflo rd
17 # define mfrm(rd,rs,rt) mfhi rd
18 #endif
19
20 .rdata
21 .asciiz "mips3.s, Version 1.2"
22 .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro (at) fy.chalmers.se>"
23
24 .text
25 .set noat
26
27 .align 5
28 .globl bn_mul_add_words
29 .ent bn_mul_add_words
30 bn_mul_add_words:
31 .set noreorder
32 bgtz $6,bn_mul_add_words_internal
33 move $2,$0
34 jr $31
35 move $4,$2
36 .end bn_mul_add_words
37
38 .align 5
39 .ent bn_mul_add_words_internal
40 bn_mul_add_words_internal:
41 .set reorder
42 li $3,-4
43 and $8,$6,$3
44 beqz $8,.L_bn_mul_add_words_tail
45
46 .L_bn_mul_add_words_loop:
47 lw $12,0($5)
48 multu ($12,$7)
49 lw $13,0($4)
50 lw $14,4($5)
51 lw $15,4($4)
52 lw $8,2*4($5)
53 lw $9,2*4($4)
54 addu $13,$2
55 sltu $2,$13,$2 # All manuals say it "compares 32-bit
56 # values", but it seems to work fine
57 # even on 64-bit registers.
58 mflo ($1,$12,$7)
59 mfhi ($12,$12,$7)
60 addu $13,$1
61 addu $2,$12
62 multu ($14,$7)
63 sltu $1,$13,$1
64 sw $13,0($4)
65 addu $2,$1
66
67 lw $10,3*4($5)
68 lw $11,3*4($4)
69 addu $15,$2
70 sltu $2,$15,$2
71 mflo ($1,$14,$7)
72 mfhi ($14,$14,$7)
73 addu $15,$1
74 addu $2,$14
75 multu ($8,$7)
76 sltu $1,$15,$1
77 sw $15,4($4)
78 addu $2,$1
79
80 subu $6,4
81 addu $4,4*4
82 addu $5,4*4
83 addu $9,$2
84 sltu $2,$9,$2
85 mflo ($1,$8,$7)
86 mfhi ($8,$8,$7)
87 addu $9,$1
88 addu $2,$8
89 multu ($10,$7)
90 sltu $1,$9,$1
91 sw $9,-2*4($4)
92 addu $2,$1
93
94
95 and $8,$6,$3
96 addu $11,$2
97 sltu $2,$11,$2
98 mflo ($1,$10,$7)
99 mfhi ($10,$10,$7)
100 addu $11,$1
101 addu $2,$10
102 sltu $1,$11,$1
103 sw $11,-4($4)
104 .set noreorder
105 bgtz $8,.L_bn_mul_add_words_loop
106 addu $2,$1
107
108 beqz $6,.L_bn_mul_add_words_return
109 nop
110
111 .L_bn_mul_add_words_tail:
112 .set reorder
113 lw $12,0($5)
114 multu ($12,$7)
115 lw $13,0($4)
116 subu $6,1
117 addu $13,$2
118 sltu $2,$13,$2
119 mflo ($1,$12,$7)
120 mfhi ($12,$12,$7)
121 addu $13,$1
122 addu $2,$12
123 sltu $1,$13,$1
124 sw $13,0($4)
125 addu $2,$1
126 beqz $6,.L_bn_mul_add_words_return
127
128 lw $12,4($5)
129 multu ($12,$7)
130 lw $13,4($4)
131 subu $6,1
132 addu $13,$2
133 sltu $2,$13,$2
134 mflo ($1,$12,$7)
135 mfhi ($12,$12,$7)
136 addu $13,$1
137 addu $2,$12
138 sltu $1,$13,$1
139 sw $13,4($4)
140 addu $2,$1
141 beqz $6,.L_bn_mul_add_words_return
142
143 lw $12,2*4($5)
144 multu ($12,$7)
145 lw $13,2*4($4)
146 addu $13,$2
147 sltu $2,$13,$2
148 mflo ($1,$12,$7)
149 mfhi ($12,$12,$7)
150 addu $13,$1
151 addu $2,$12
152 sltu $1,$13,$1
153 sw $13,2*4($4)
154 addu $2,$1
155
156 .L_bn_mul_add_words_return:
157 .set noreorder
158 jr $31
159 move $4,$2
160 .end bn_mul_add_words_internal
161
162 .align 5
163 .globl bn_mul_words
164 .ent bn_mul_words
165 bn_mul_words:
166 .set noreorder
167 bgtz $6,bn_mul_words_internal
168 move $2,$0
169 jr $31
170 move $4,$2
171 .end bn_mul_words
172
173 .align 5
174 .ent bn_mul_words_internal
175 bn_mul_words_internal:
176 .set reorder
177 li $3,-4
178 and $8,$6,$3
179 beqz $8,.L_bn_mul_words_tail
180
181 .L_bn_mul_words_loop:
182 lw $12,0($5)
183 multu ($12,$7)
184 lw $14,4($5)
185 lw $8,2*4($5)
186 lw $10,3*4($5)
187 mflo ($1,$12,$7)
188 mfhi ($12,$12,$7)
189 addu $2,$1
190 sltu $13,$2,$1
191 multu ($14,$7)
192 sw $2,0($4)
193 addu $2,$13,$12
194
195 subu $6,4
196 addu $4,4*4
197 addu $5,4*4
198 mflo ($1,$14,$7)
199 mfhi ($14,$14,$7)
200 addu $2,$1
201 sltu $15,$2,$1
202 multu ($8,$7)
203 sw $2,-3*4($4)
204 addu $2,$15,$14
205
206 mflo ($1,$8,$7)
207 mfhi ($8,$8,$7)
208 addu $2,$1
209 sltu $9,$2,$1
210 multu ($10,$7)
211 sw $2,-2*4($4)
212 addu $2,$9,$8
213
214 and $8,$6,$3
215 mflo ($1,$10,$7)
216 mfhi ($10,$10,$7)
217 addu $2,$1
218 sltu $11,$2,$1
219 sw $2,-4($4)
220 .set noreorder
221 bgtz $8,.L_bn_mul_words_loop
222 addu $2,$11,$10
223
224 beqz $6,.L_bn_mul_words_return
225 nop
226
227 .L_bn_mul_words_tail:
228 .set reorder
229 lw $12,0($5)
230 multu ($12,$7)
231 subu $6,1
232 mflo ($1,$12,$7)
233 mfhi ($12,$12,$7)
234 addu $2,$1
235 sltu $13,$2,$1
236 sw $2,0($4)
237 addu $2,$13,$12
238 beqz $6,.L_bn_mul_words_return
239
240 lw $12,4($5)
241 multu ($12,$7)
242 subu $6,1
243 mflo ($1,$12,$7)
244 mfhi ($12,$12,$7)
245 addu $2,$1
246 sltu $13,$2,$1
247 sw $2,4($4)
248 addu $2,$13,$12
249 beqz $6,.L_bn_mul_words_return
250
251 lw $12,2*4($5)
252 multu ($12,$7)
253 mflo ($1,$12,$7)
254 mfhi ($12,$12,$7)
255 addu $2,$1
256 sltu $13,$2,$1
257 sw $2,2*4($4)
258 addu $2,$13,$12
259
260 .L_bn_mul_words_return:
261 .set noreorder
262 jr $31
263 move $4,$2
264 .end bn_mul_words_internal
265
266 .align 5
267 .globl bn_sqr_words
268 .ent bn_sqr_words
269 bn_sqr_words:
270 .set noreorder
271 bgtz $6,bn_sqr_words_internal
272 move $2,$0
273 jr $31
274 move $4,$2
275 .end bn_sqr_words
276
277 .align 5
278 .ent bn_sqr_words_internal
279 bn_sqr_words_internal:
280 .set reorder
281 li $3,-4
282 and $8,$6,$3
283 beqz $8,.L_bn_sqr_words_tail
284
285 .L_bn_sqr_words_loop:
286 lw $12,0($5)
287 multu ($12,$12)
288 lw $14,4($5)
289 lw $8,2*4($5)
290 lw $10,3*4($5)
291 mflo ($13,$12,$12)
292 mfhi ($12,$12,$12)
293 sw $13,0($4)
294 sw $12,4($4)
295
296 multu ($14,$14)
297 subu $6,4
298 addu $4,8*4
299 addu $5,4*4
300 mflo ($15,$14,$14)
301 mfhi ($14,$14,$14)
302 sw $15,-6*4($4)
303 sw $14,-5*4($4)
304
305 multu ($8,$8)
306 mflo ($9,$8,$8)
307 mfhi ($8,$8,$8)
308 sw $9,-4*4($4)
309 sw $8,-3*4($4)
310
311
312 multu ($10,$10)
313 and $8,$6,$3
314 mflo ($11,$10,$10)
315 mfhi ($10,$10,$10)
316 sw $11,-2*4($4)
317
318 .set noreorder
319 bgtz $8,.L_bn_sqr_words_loop
320 sw $10,-4($4)
321
322 beqz $6,.L_bn_sqr_words_return
323 nop
324
325 .L_bn_sqr_words_tail:
326 .set reorder
327 lw $12,0($5)
328 multu ($12,$12)
329 subu $6,1
330 mflo ($13,$12,$12)
331 mfhi ($12,$12,$12)
332 sw $13,0($4)
333 sw $12,4($4)
334 beqz $6,.L_bn_sqr_words_return
335
336 lw $12,4($5)
337 multu ($12,$12)
338 subu $6,1
339 mflo ($13,$12,$12)
340 mfhi ($12,$12,$12)
341 sw $13,2*4($4)
342 sw $12,3*4($4)
343 beqz $6,.L_bn_sqr_words_return
344
345 lw $12,2*4($5)
346 multu ($12,$12)
347 mflo ($13,$12,$12)
348 mfhi ($12,$12,$12)
349 sw $13,4*4($4)
350 sw $12,5*4($4)
351
352 .L_bn_sqr_words_return:
353 .set noreorder
354 jr $31
355 move $4,$2
356
357 .end bn_sqr_words_internal
358
359 .align 5
360 .globl bn_add_words
361 .ent bn_add_words
362 bn_add_words:
363 .set noreorder
364 bgtz $7,bn_add_words_internal
365 move $2,$0
366 jr $31
367 move $4,$2
368 .end bn_add_words
369
370 .align 5
371 .ent bn_add_words_internal
372 bn_add_words_internal:
373 .set reorder
374 li $3,-4
375 and $1,$7,$3
376 beqz $1,.L_bn_add_words_tail
377
378 .L_bn_add_words_loop:
379 lw $12,0($5)
380 lw $8,0($6)
381 subu $7,4
382 lw $13,4($5)
383 and $1,$7,$3
384 lw $14,2*4($5)
385 addu $6,4*4
386 lw $15,3*4($5)
387 addu $4,4*4
388 lw $9,-3*4($6)
389 addu $5,4*4
390 lw $10,-2*4($6)
391 lw $11,-4($6)
392 addu $8,$12
393 sltu $24,$8,$12
394 addu $12,$8,$2
395 sltu $2,$12,$8
396 sw $12,-4*4($4)
397 addu $2,$24
398
399 addu $9,$13
400 sltu $25,$9,$13
401 addu $13,$9,$2
402 sltu $2,$13,$9
403 sw $13,-3*4($4)
404 addu $2,$25
405
406 addu $10,$14
407 sltu $24,$10,$14
408 addu $14,$10,$2
409 sltu $2,$14,$10
410 sw $14,-2*4($4)
411 addu $2,$24
412
413 addu $11,$15
414 sltu $25,$11,$15
415 addu $15,$11,$2
416 sltu $2,$15,$11
417 sw $15,-4($4)
418
419 .set noreorder
420 bgtz $1,.L_bn_add_words_loop
421 addu $2,$25
422
423 beqz $7,.L_bn_add_words_return
424 nop
425
426 .L_bn_add_words_tail:
427 .set reorder
428 lw $12,0($5)
429 lw $8,0($6)
430 addu $8,$12
431 subu $7,1
432 sltu $24,$8,$12
433 addu $12,$8,$2
434 sltu $2,$12,$8
435 sw $12,0($4)
436 addu $2,$24
437 beqz $7,.L_bn_add_words_return
438
439 lw $13,4($5)
440 lw $9,4($6)
441 addu $9,$13
442 subu $7,1
443 sltu $25,$9,$13
444 addu $13,$9,$2
445 sltu $2,$13,$9
446 sw $13,4($4)
447 addu $2,$25
448 beqz $7,.L_bn_add_words_return
449
450 lw $14,2*4($5)
451 lw $10,2*4($6)
452 addu $10,$14
453 sltu $24,$10,$14
454 addu $14,$10,$2
455 sltu $2,$14,$10
456 sw $14,2*4($4)
457 addu $2,$24
458
459 .L_bn_add_words_return:
460 .set noreorder
461 jr $31
462 move $4,$2
463
464 .end bn_add_words_internal
465
466 .align 5
467 .globl bn_sub_words
468 .ent bn_sub_words
469 bn_sub_words:
470 .set noreorder
471 bgtz $7,bn_sub_words_internal
472 move $2,$0
473 jr $31
474 move $4,$0
475 .end bn_sub_words
476
477 .align 5
478 .ent bn_sub_words_internal
479 bn_sub_words_internal:
480 .set reorder
481 li $3,-4
482 and $1,$7,$3
483 beqz $1,.L_bn_sub_words_tail
484
485 .L_bn_sub_words_loop:
486 lw $12,0($5)
487 lw $8,0($6)
488 subu $7,4
489 lw $13,4($5)
490 and $1,$7,$3
491 lw $14,2*4($5)
492 addu $6,4*4
493 lw $15,3*4($5)
494 addu $4,4*4
495 lw $9,-3*4($6)
496 addu $5,4*4
497 lw $10,-2*4($6)
498 lw $11,-4($6)
499 sltu $24,$12,$8
500 subu $8,$12,$8
501 subu $12,$8,$2
502 sgtu $2,$12,$8
503 sw $12,-4*4($4)
504 addu $2,$24
505
506 sltu $25,$13,$9
507 subu $9,$13,$9
508 subu $13,$9,$2
509 sgtu $2,$13,$9
510 sw $13,-3*4($4)
511 addu $2,$25
512
513
514 sltu $24,$14,$10
515 subu $10,$14,$10
516 subu $14,$10,$2
517 sgtu $2,$14,$10
518 sw $14,-2*4($4)
519 addu $2,$24
520
521 sltu $25,$15,$11
522 subu $11,$15,$11
523 subu $15,$11,$2
524 sgtu $2,$15,$11
525 sw $15,-4($4)
526
527 .set noreorder
528 bgtz $1,.L_bn_sub_words_loop
529 addu $2,$25
530
531 beqz $7,.L_bn_sub_words_return
532 nop
533
534 .L_bn_sub_words_tail:
535 .set reorder
536 lw $12,0($5)
537 lw $8,0($6)
538 subu $7,1
539 sltu $24,$12,$8
540 subu $8,$12,$8
541 subu $12,$8,$2
542 sgtu $2,$12,$8
543 sw $12,0($4)
544 addu $2,$24
545 beqz $7,.L_bn_sub_words_return
546
547 lw $13,4($5)
548 subu $7,1
549 lw $9,4($6)
550 sltu $25,$13,$9
551 subu $9,$13,$9
552 subu $13,$9,$2
553 sgtu $2,$13,$9
554 sw $13,4($4)
555 addu $2,$25
556 beqz $7,.L_bn_sub_words_return
557
558 lw $14,2*4($5)
559 lw $10,2*4($6)
560 sltu $24,$14,$10
561 subu $10,$14,$10
562 subu $14,$10,$2
563 sgtu $2,$14,$10
564 sw $14,2*4($4)
565 addu $2,$24
566
567 .L_bn_sub_words_return:
568 .set noreorder
569 jr $31
570 move $4,$2
571 .end bn_sub_words_internal
572
573 #if 0
574 /*
575 * The bn_div_3_words entry point is re-used for constant-time interface.
576 * Implementation is retained as historical reference.
577 */
578 .align 5
579 .globl bn_div_3_words
580 .ent bn_div_3_words
581 bn_div_3_words:
582 .set noreorder
583 move $7,$4 # we know that bn_div_words does not
584 # touch $7, $10, $11 and preserves $6
585 # so that we can save two arguments
586 # and return address in registers
587 # instead of stack:-)
588
589 lw $4,($7)
590 move $10,$5
591 bne $4,$6,bn_div_3_words_internal
592 lw $5,-4($7)
593 li $2,-1
594 jr $31
595 move $4,$2
596 .end bn_div_3_words
597
598 .align 5
599 .ent bn_div_3_words_internal
600 bn_div_3_words_internal:
601 .set reorder
602 move $11,$31
603 bal bn_div_words_internal
604 move $31,$11
605 multu ($10,$2)
606 lw $14,-2*4($7)
607 move $8,$0
608 mfhi ($13,$10,$2)
609 mflo ($12,$10,$2)
610 sltu $24,$13,$5
611 .L_bn_div_3_words_inner_loop:
612 bnez $24,.L_bn_div_3_words_inner_loop_done
613 sgeu $1,$14,$12
614 seq $25,$13,$5
615 and $1,$25
616 sltu $15,$12,$10
617 addu $5,$6
618 subu $13,$15
619 subu $12,$10
620 sltu $24,$13,$5
621 sltu $8,$5,$6
622 or $24,$8
623 .set noreorder
624 beqz $1,.L_bn_div_3_words_inner_loop
625 subu $2,1
626 addu $2,1
627 .set reorder
628 .L_bn_div_3_words_inner_loop_done:
629 .set noreorder
630 jr $31
631 move $4,$2
632 .end bn_div_3_words_internal
633 #endif
634
635 .align 5
636 .globl bn_div_words
637 .ent bn_div_words
638 bn_div_words:
639 .set noreorder
640 bnez $6,bn_div_words_internal
641 li $2,-1 # I would rather signal div-by-zero
642 # which can be done with 'break 7'
643 jr $31
644 move $4,$2
645 .end bn_div_words
646
647 .align 5
648 .ent bn_div_words_internal
649 bn_div_words_internal:
650 move $3,$0
651 bltz $6,.L_bn_div_words_body
652 move $25,$3
653 sll $6,1
654 bgtz $6,.-4
655 addu $25,1
656
657 .set reorder
658 negu $13,$25
659 li $14,-1
660 sll $14,$13
661 and $14,$4
662 srl $1,$5,$13
663 .set noreorder
664 beqz $14,.+12
665 nop
666 break 6 # signal overflow
667 .set reorder
668 sll $4,$25
669 sll $5,$25
670 or $4,$1
671 .L_bn_div_words_body:
672 srl $3,$6,4*4 # bits
673 sgeu $1,$4,$6
674 .set noreorder
675 beqz $1,.+12
676 nop
677 subu $4,$6
678 .set reorder
679
680 li $8,-1
681 srl $9,$4,4*4 # bits
682 srl $8,4*4 # q=0xffffffff
683 beq $3,$9,.L_bn_div_words_skip_div1
684 divu ($4,$3)
685 mfqt ($8,$4,$3)
686 .L_bn_div_words_skip_div1:
687 multu ($6,$8)
688 sll $15,$4,4*4 # bits
689 srl $1,$5,4*4 # bits
690 or $15,$1
691 mflo ($12,$6,$8)
692 mfhi ($13,$6,$8)
693 .L_bn_div_words_inner_loop1:
694 sltu $14,$15,$12
695 seq $24,$9,$13
696 sltu $1,$9,$13
697 and $14,$24
698 sltu $2,$12,$6
699 or $1,$14
700 .set noreorder
701 beqz $1,.L_bn_div_words_inner_loop1_done
702 subu $13,$2
703 subu $12,$6
704 b .L_bn_div_words_inner_loop1
705 subu $8,1
706 .set reorder
707 .L_bn_div_words_inner_loop1_done:
708
709 sll $5,4*4 # bits
710 subu $4,$15,$12
711 sll $2,$8,4*4 # bits
712
713 li $8,-1
714 srl $9,$4,4*4 # bits
715 srl $8,4*4 # q=0xffffffff
716 beq $3,$9,.L_bn_div_words_skip_div2
717 divu ($4,$3)
718 mfqt ($8,$4,$3)
719 .L_bn_div_words_skip_div2:
720 multu ($6,$8)
721 sll $15,$4,4*4 # bits
722 srl $1,$5,4*4 # bits
723 or $15,$1
724 mflo ($12,$6,$8)
725 mfhi ($13,$6,$8)
726 .L_bn_div_words_inner_loop2:
727 sltu $14,$15,$12
728 seq $24,$9,$13
729 sltu $1,$9,$13
730 and $14,$24
731 sltu $3,$12,$6
732 or $1,$14
733 .set noreorder
734 beqz $1,.L_bn_div_words_inner_loop2_done
735 subu $13,$3
736 subu $12,$6
737 b .L_bn_div_words_inner_loop2
738 subu $8,1
739 .set reorder
740 .L_bn_div_words_inner_loop2_done:
741
742 subu $4,$15,$12
743 or $2,$8
744 srl $3,$4,$25 # $3 contains remainder if anybody wants it
745 srl $6,$25 # restore $6
746
747 .set noreorder
748 move $5,$3
749 jr $31
750 move $4,$2
751 .end bn_div_words_internal
752
753 .align 5
754 .globl bn_mul_comba8
755 .ent bn_mul_comba8
756 bn_mul_comba8:
757 .set noreorder
758 .frame $29,6*4,$31
759 .mask 0x003f0000,-4
760 subu $29,6*4
761 sw $21,5*4($29)
762 sw $20,4*4($29)
763 sw $19,3*4($29)
764 sw $18,2*4($29)
765 sw $17,1*4($29)
766 sw $16,0*4($29)
767
768 .set reorder
769 lw $12,0($5) # If compiled with -mips3 option on
770 # R5000 box assembler barks on this
771 # 1ine with "should not have mult/div
772 # as last instruction in bb (R10K
773 # bug)" warning. If anybody out there
774 # has a clue about how to circumvent
775 # this do send me a note.
776 # <appro@fy.chalmers.se>
777
778 lw $8,0($6)
779 lw $13,4($5)
780 lw $14,2*4($5)
781 multu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3);
782 lw $15,3*4($5)
783 lw $9,4($6)
784 lw $10,2*4($6)
785 lw $11,3*4($6)
786 mflo ($2,$12,$8)
787 mfhi ($3,$12,$8)
788
789 lw $16,4*4($5)
790 lw $18,5*4($5)
791 multu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1);
792 lw $20,6*4($5)
793 lw $5,7*4($5)
794 lw $17,4*4($6)
795 lw $19,5*4($6)
796 mflo ($24,$12,$9)
797 mfhi ($25,$12,$9)
798 addu $3,$24
799 sltu $1,$3,$24
800 multu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1);
801 addu $7,$25,$1
802 lw $21,6*4($6)
803 lw $6,7*4($6)
804 sw $2,0($4) # r[0]=c1;
805 mflo ($24,$13,$8)
806 mfhi ($25,$13,$8)
807 addu $3,$24
808 sltu $1,$3,$24
809 multu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2);
810 addu $25,$1
811 addu $7,$25
812 sltu $2,$7,$25
813 sw $3,4($4) # r[1]=c2;
814
815 mflo ($24,$14,$8)
816 mfhi ($25,$14,$8)
817 addu $7,$24
818 sltu $1,$7,$24
819 multu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2);
820 addu $25,$1
821 addu $2,$25
822 mflo ($24,$13,$9)
823 mfhi ($25,$13,$9)
824 addu $7,$24
825 sltu $1,$7,$24
826 multu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2);
827 addu $25,$1
828 addu $2,$25
829 sltu $3,$2,$25
830 mflo ($24,$12,$10)
831 mfhi ($25,$12,$10)
832 addu $7,$24
833 sltu $1,$7,$24
834 multu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3);
835 addu $25,$1
836 addu $2,$25
837 sltu $1,$2,$25
838 addu $3,$1
839 sw $7,2*4($4) # r[2]=c3;
840
841 mflo ($24,$12,$11)
842 mfhi ($25,$12,$11)
843 addu $2,$24
844 sltu $1,$2,$24
845 multu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3);
846 addu $25,$1
847 addu $3,$25
848 sltu $7,$3,$25
849 mflo ($24,$13,$10)
850 mfhi ($25,$13,$10)
851 addu $2,$24
852 sltu $1,$2,$24
853 multu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3);
854 addu $25,$1
855 addu $3,$25
856 sltu $1,$3,$25
857 addu $7,$1
858 mflo ($24,$14,$9)
859 mfhi ($25,$14,$9)
860 addu $2,$24
861 sltu $1,$2,$24
862 multu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3);
863 addu $25,$1
864 addu $3,$25
865 sltu $1,$3,$25
866 addu $7,$1
867 mflo ($24,$15,$8)
868 mfhi ($25,$15,$8)
869 addu $2,$24
870 sltu $1,$2,$24
871 multu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1);
872 addu $25,$1
873 addu $3,$25
874 sltu $1,$3,$25
875 addu $7,$1
876 sw $2,3*4($4) # r[3]=c1;
877
878 mflo ($24,$16,$8)
879 mfhi ($25,$16,$8)
880 addu $3,$24
881 sltu $1,$3,$24
882 multu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1);
883 addu $25,$1
884 addu $7,$25
885 sltu $2,$7,$25
886 mflo ($24,$15,$9)
887 mfhi ($25,$15,$9)
888 addu $3,$24
889 sltu $1,$3,$24
890 multu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1);
891 addu $25,$1
892 addu $7,$25
893 sltu $1,$7,$25
894 addu $2,$1
895 mflo ($24,$14,$10)
896 mfhi ($25,$14,$10)
897 addu $3,$24
898 sltu $1,$3,$24
899 multu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1);
900 addu $25,$1
901 addu $7,$25
902 sltu $1,$7,$25
903 addu $2,$1
904 mflo ($24,$13,$11)
905 mfhi ($25,$13,$11)
906 addu $3,$24
907 sltu $1,$3,$24
908 multu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1);
909 addu $25,$1
910 addu $7,$25
911 sltu $1,$7,$25
912 addu $2,$1
913 mflo ($24,$12,$17)
914 mfhi ($25,$12,$17)
915 addu $3,$24
916 sltu $1,$3,$24
917 multu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2);
918 addu $25,$1
919 addu $7,$25
920 sltu $1,$7,$25
921 addu $2,$1
922 sw $3,4*4($4) # r[4]=c2;
923
924 mflo ($24,$12,$19)
925 mfhi ($25,$12,$19)
926 addu $7,$24
927 sltu $1,$7,$24
928 multu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2);
929 addu $25,$1
930 addu $2,$25
931 sltu $3,$2,$25
932 mflo ($24,$13,$17)
933 mfhi ($25,$13,$17)
934 addu $7,$24
935 sltu $1,$7,$24
936 multu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2);
937 addu $25,$1
938 addu $2,$25
939 sltu $1,$2,$25
940 addu $3,$1
941 mflo ($24,$14,$11)
942 mfhi ($25,$14,$11)
943 addu $7,$24
944 sltu $1,$7,$24
945 multu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2);
946 addu $25,$1
947 addu $2,$25
948 sltu $1,$2,$25
949 addu $3,$1
950 mflo ($24,$15,$10)
951 mfhi ($25,$15,$10)
952 addu $7,$24
953 sltu $1,$7,$24
954 multu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2);
955 addu $25,$1
956 addu $2,$25
957 sltu $1,$2,$25
958 addu $3,$1
959 mflo ($24,$16,$9)
960 mfhi ($25,$16,$9)
961 addu $7,$24
962 sltu $1,$7,$24
963 multu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2);
964 addu $25,$1
965 addu $2,$25
966 sltu $1,$2,$25
967 addu $3,$1
968 mflo ($24,$18,$8)
969 mfhi ($25,$18,$8)
970 addu $7,$24
971 sltu $1,$7,$24
972 multu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3);
973 addu $25,$1
974 addu $2,$25
975 sltu $1,$2,$25
976 addu $3,$1
977 sw $7,5*4($4) # r[5]=c3;
978
979 mflo ($24,$20,$8)
980 mfhi ($25,$20,$8)
981 addu $2,$24
982 sltu $1,$2,$24
983 multu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3);
984 addu $25,$1
985 addu $3,$25
986 sltu $7,$3,$25
987 mflo ($24,$18,$9)
988 mfhi ($25,$18,$9)
989 addu $2,$24
990 sltu $1,$2,$24
991 multu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3);
992 addu $25,$1
993 addu $3,$25
994 sltu $1,$3,$25
995 addu $7,$1
996 mflo ($24,$16,$10)
997 mfhi ($25,$16,$10)
998 addu $2,$24
999 sltu $1,$2,$24
1000 multu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3);
1001 addu $25,$1
1002 addu $3,$25
1003 sltu $1,$3,$25
1004 addu $7,$1
1005 mflo ($24,$15,$11)
1006 mfhi ($25,$15,$11)
1007 addu $2,$24
1008 sltu $1,$2,$24
1009 multu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3);
1010 addu $25,$1
1011 addu $3,$25
1012 sltu $1,$3,$25
1013 addu $7,$1
1014 mflo ($24,$14,$17)
1015 mfhi ($25,$14,$17)
1016 addu $2,$24
1017 sltu $1,$2,$24
1018 multu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3);
1019 addu $25,$1
1020 addu $3,$25
1021 sltu $1,$3,$25
1022 addu $7,$1
1023 mflo ($24,$13,$19)
1024 mfhi ($25,$13,$19)
1025 addu $2,$24
1026 sltu $1,$2,$24
1027 multu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3);
1028 addu $25,$1
1029 addu $3,$25
1030 sltu $1,$3,$25
1031 addu $7,$1
1032 mflo ($24,$12,$21)
1033 mfhi ($25,$12,$21)
1034 addu $2,$24
1035 sltu $1,$2,$24
1036 multu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1);
1037 addu $25,$1
1038 addu $3,$25
1039 sltu $1,$3,$25
1040 addu $7,$1
1041 sw $2,6*4($4) # r[6]=c1;
1042
1043 mflo ($24,$12,$6)
1044 mfhi ($25,$12,$6)
1045 addu $3,$24
1046 sltu $1,$3,$24
1047 multu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1);
1048 addu $25,$1
1049 addu $7,$25
1050 sltu $2,$7,$25
1051 mflo ($24,$13,$21)
1052 mfhi ($25,$13,$21)
1053 addu $3,$24
1054 sltu $1,$3,$24
1055 multu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1);
1056 addu $25,$1
1057 addu $7,$25
1058 sltu $1,$7,$25
1059 addu $2,$1
1060 mflo ($24,$14,$19)
1061 mfhi ($25,$14,$19)
1062 addu $3,$24
1063 sltu $1,$3,$24
1064 multu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1);
1065 addu $25,$1
1066 addu $7,$25
1067 sltu $1,$7,$25
1068 addu $2,$1
1069 mflo ($24,$15,$17)
1070 mfhi ($25,$15,$17)
1071 addu $3,$24
1072 sltu $1,$3,$24
1073 multu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1);
1074 addu $25,$1
1075 addu $7,$25
1076 sltu $1,$7,$25
1077 addu $2,$1
1078 mflo ($24,$16,$11)
1079 mfhi ($25,$16,$11)
1080 addu $3,$24
1081 sltu $1,$3,$24
1082 multu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1);
1083 addu $25,$1
1084 addu $7,$25
1085 sltu $1,$7,$25
1086 addu $2,$1
1087 mflo ($24,$18,$10)
1088 mfhi ($25,$18,$10)
1089 addu $3,$24
1090 sltu $1,$3,$24
1091 multu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1);
1092 addu $25,$1
1093 addu $7,$25
1094 sltu $1,$7,$25
1095 addu $2,$1
1096 mflo ($24,$20,$9)
1097 mfhi ($25,$20,$9)
1098 addu $3,$24
1099 sltu $1,$3,$24
1100 multu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1);
1101 addu $25,$1
1102 addu $7,$25
1103 sltu $1,$7,$25
1104 addu $2,$1
1105 mflo ($24,$5,$8)
1106 mfhi ($25,$5,$8)
1107 addu $3,$24
1108 sltu $1,$3,$24
1109 multu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2);
1110 addu $25,$1
1111 addu $7,$25
1112 sltu $1,$7,$25
1113 addu $2,$1
1114 sw $3,7*4($4) # r[7]=c2;
1115
1116 mflo ($24,$5,$9)
1117 mfhi ($25,$5,$9)
1118 addu $7,$24
1119 sltu $1,$7,$24
1120 multu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2);
1121 addu $25,$1
1122 addu $2,$25
1123 sltu $3,$2,$25
1124 mflo ($24,$20,$10)
1125 mfhi ($25,$20,$10)
1126 addu $7,$24
1127 sltu $1,$7,$24
1128 multu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2);
1129 addu $25,$1
1130 addu $2,$25
1131 sltu $1,$2,$25
1132 addu $3,$1
1133 mflo ($24,$18,$11)
1134 mfhi ($25,$18,$11)
1135 addu $7,$24
1136 sltu $1,$7,$24
1137 multu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2);
1138 addu $25,$1
1139 addu $2,$25
1140 sltu $1,$2,$25
1141 addu $3,$1
1142 mflo ($24,$16,$17)
1143 mfhi ($25,$16,$17)
1144 addu $7,$24
1145 sltu $1,$7,$24
1146 multu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2);
1147 addu $25,$1
1148 addu $2,$25
1149 sltu $1,$2,$25
1150 addu $3,$1
1151 mflo ($24,$15,$19)
1152 mfhi ($25,$15,$19)
1153 addu $7,$24
1154 sltu $1,$7,$24
1155 multu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2);
1156 addu $25,$1
1157 addu $2,$25
1158 sltu $1,$2,$25
1159 addu $3,$1
1160 mflo ($24,$14,$21)
1161 mfhi ($25,$14,$21)
1162 addu $7,$24
1163 sltu $1,$7,$24
1164 multu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2);
1165 addu $25,$1
1166 addu $2,$25
1167 sltu $1,$2,$25
1168 addu $3,$1
1169 mflo ($24,$13,$6)
1170 mfhi ($25,$13,$6)
1171 addu $7,$24
1172 sltu $1,$7,$24
1173 multu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3);
1174 addu $25,$1
1175 addu $2,$25
1176 sltu $1,$2,$25
1177 addu $3,$1
1178 sw $7,8*4($4) # r[8]=c3;
1179
1180 mflo ($24,$14,$6)
1181 mfhi ($25,$14,$6)
1182 addu $2,$24
1183 sltu $1,$2,$24
1184 multu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3);
1185 addu $25,$1
1186 addu $3,$25
1187 sltu $7,$3,$25
1188 mflo ($24,$15,$21)
1189 mfhi ($25,$15,$21)
1190 addu $2,$24
1191 sltu $1,$2,$24
1192 multu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3);
1193 addu $25,$1
1194 addu $3,$25
1195 sltu $1,$3,$25
1196 addu $7,$1
1197 mflo ($24,$16,$19)
1198 mfhi ($25,$16,$19)
1199 addu $2,$24
1200 sltu $1,$2,$24
1201 multu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3);
1202 addu $25,$1
1203 addu $3,$25
1204 sltu $1,$3,$25
1205 addu $7,$1
1206 mflo ($24,$18,$17)
1207 mfhi ($25,$18,$17)
1208 addu $2,$24
1209 sltu $1,$2,$24
1210 multu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3);
1211 addu $25,$1
1212 addu $3,$25
1213 sltu $1,$3,$25
1214 addu $7,$1
1215 mflo ($24,$20,$11)
1216 mfhi ($25,$20,$11)
1217 addu $2,$24
1218 sltu $1,$2,$24
1219 multu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3);
1220 addu $25,$1
1221 addu $3,$25
1222 sltu $1,$3,$25
1223 addu $7,$1
1224 mflo ($24,$5,$10)
1225 mfhi ($25,$5,$10)
1226 addu $2,$24
1227 sltu $1,$2,$24
1228 multu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1);
1229 addu $25,$1
1230 addu $3,$25
1231 sltu $1,$3,$25
1232 addu $7,$1
1233 sw $2,9*4($4) # r[9]=c1;
1234
1235 mflo ($24,$5,$11)
1236 mfhi ($25,$5,$11)
1237 addu $3,$24
1238 sltu $1,$3,$24
1239 multu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1);
1240 addu $25,$1
1241 addu $7,$25
1242 sltu $2,$7,$25
1243 mflo ($24,$20,$17)
1244 mfhi ($25,$20,$17)
1245 addu $3,$24
1246 sltu $1,$3,$24
1247 multu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1);
1248 addu $25,$1
1249 addu $7,$25
1250 sltu $1,$7,$25
1251 addu $2,$1
1252 mflo ($24,$18,$19)
1253 mfhi ($25,$18,$19)
1254 addu $3,$24
1255 sltu $1,$3,$24
1256 multu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1);
1257 addu $25,$1
1258 addu $7,$25
1259 sltu $1,$7,$25
1260 addu $2,$1
1261 mflo ($24,$16,$21)
1262 mfhi ($25,$16,$21)
1263 addu $3,$24
1264 sltu $1,$3,$24
1265 multu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1);
1266 addu $25,$1
1267 addu $7,$25
1268 sltu $1,$7,$25
1269 addu $2,$1
1270 mflo ($24,$15,$6)
1271 mfhi ($25,$15,$6)
1272 addu $3,$24
1273 sltu $1,$3,$24
1274 multu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2);
1275 addu $25,$1
1276 addu $7,$25
1277 sltu $1,$7,$25
1278 addu $2,$1
1279 sw $3,10*4($4) # r[10]=c2;
1280
1281 mflo ($24,$16,$6)
1282 mfhi ($25,$16,$6)
1283 addu $7,$24
1284 sltu $1,$7,$24
1285 multu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2);
1286 addu $25,$1
1287 addu $2,$25
1288 sltu $3,$2,$25
1289 mflo ($24,$18,$21)
1290 mfhi ($25,$18,$21)
1291 addu $7,$24
1292 sltu $1,$7,$24
1293 multu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2);
1294 addu $25,$1
1295 addu $2,$25
1296 sltu $1,$2,$25
1297 addu $3,$1
1298 mflo ($24,$20,$19)
1299 mfhi ($25,$20,$19)
1300 addu $7,$24
1301 sltu $1,$7,$24
1302 multu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2);
1303 addu $25,$1
1304 addu $2,$25
1305 sltu $1,$2,$25
1306 addu $3,$1
1307 mflo ($24,$5,$17)
1308 mfhi ($25,$5,$17)
1309 addu $7,$24
1310 sltu $1,$7,$24
1311 multu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3);
1312 addu $25,$1
1313 addu $2,$25
1314 sltu $1,$2,$25
1315 addu $3,$1
1316 sw $7,11*4($4) # r[11]=c3;
1317
1318 mflo ($24,$5,$19)
1319 mfhi ($25,$5,$19)
1320 addu $2,$24
1321 sltu $1,$2,$24
1322 multu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3);
1323 addu $25,$1
1324 addu $3,$25
1325 sltu $7,$3,$25
1326 mflo ($24,$20,$21)
1327 mfhi ($25,$20,$21)
1328 addu $2,$24
1329 sltu $1,$2,$24
1330 multu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3);
1331 addu $25,$1
1332 addu $3,$25
1333 sltu $1,$3,$25
1334 addu $7,$1
1335 mflo ($24,$18,$6)
1336 mfhi ($25,$18,$6)
1337 addu $2,$24
1338 sltu $1,$2,$24
1339 multu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1);
1340 addu $25,$1
1341 addu $3,$25
1342 sltu $1,$3,$25
1343 addu $7,$1
1344 sw $2,12*4($4) # r[12]=c1;
1345
1346 mflo ($24,$20,$6)
1347 mfhi ($25,$20,$6)
1348 addu $3,$24
1349 sltu $1,$3,$24
1350 multu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1);
1351 addu $25,$1
1352 addu $7,$25
1353 sltu $2,$7,$25
1354 mflo ($24,$5,$21)
1355 mfhi ($25,$5,$21)
1356 addu $3,$24
1357 sltu $1,$3,$24
1358 multu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2);
1359 addu $25,$1
1360 addu $7,$25
1361 sltu $1,$7,$25
1362 addu $2,$1
1363 sw $3,13*4($4) # r[13]=c2;
1364
1365 mflo ($24,$5,$6)
1366 mfhi ($25,$5,$6)
1367 addu $7,$24
1368 sltu $1,$7,$24
1369 addu $25,$1
1370 addu $2,$25
1371 sw $7,14*4($4) # r[14]=c3;
1372 sw $2,15*4($4) # r[15]=c1;
1373
1374 .set noreorder
1375 lw $21,5*4($29)
1376 lw $20,4*4($29)
1377 lw $19,3*4($29)
1378 lw $18,2*4($29)
1379 lw $17,1*4($29)
1380 lw $16,0*4($29)
1381 jr $31
1382 addu $29,6*4
1383 .end bn_mul_comba8
1384
1385 .align 5
1386 .globl bn_mul_comba4
1387 .ent bn_mul_comba4
1388 bn_mul_comba4:
1389 .set reorder
1390 lw $12,0($5)
1391 lw $8,0($6)
1392 lw $13,4($5)
1393 lw $14,2*4($5)
1394 multu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3);
1395 lw $15,3*4($5)
1396 lw $9,4($6)
1397 lw $10,2*4($6)
1398 lw $11,3*4($6)
1399 mflo ($2,$12,$8)
1400 mfhi ($3,$12,$8)
1401 sw $2,0($4)
1402
1403 multu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1);
1404 mflo ($24,$12,$9)
1405 mfhi ($25,$12,$9)
1406 addu $3,$24
1407 sltu $1,$3,$24
1408 multu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1);
1409 addu $7,$25,$1
1410 mflo ($24,$13,$8)
1411 mfhi ($25,$13,$8)
1412 addu $3,$24
1413 sltu $1,$3,$24
1414 multu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2);
1415 addu $25,$1
1416 addu $7,$25
1417 sltu $2,$7,$25
1418 sw $3,4($4)
1419
1420 mflo ($24,$14,$8)
1421 mfhi ($25,$14,$8)
1422 addu $7,$24
1423 sltu $1,$7,$24
1424 multu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2);
1425 addu $25,$1
1426 addu $2,$25
1427 mflo ($24,$13,$9)
1428 mfhi ($25,$13,$9)
1429 addu $7,$24
1430 sltu $1,$7,$24
1431 multu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2);
1432 addu $25,$1
1433 addu $2,$25
1434 sltu $3,$2,$25
1435 mflo ($24,$12,$10)
1436 mfhi ($25,$12,$10)
1437 addu $7,$24
1438 sltu $1,$7,$24
1439 multu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3);
1440 addu $25,$1
1441 addu $2,$25
1442 sltu $1,$2,$25
1443 addu $3,$1
1444 sw $7,2*4($4)
1445
1446 mflo ($24,$12,$11)
1447 mfhi ($25,$12,$11)
1448 addu $2,$24
1449 sltu $1,$2,$24
1450 multu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3);
1451 addu $25,$1
1452 addu $3,$25
1453 sltu $7,$3,$25
1454 mflo ($24,$13,$10)
1455 mfhi ($25,$13,$10)
1456 addu $2,$24
1457 sltu $1,$2,$24
1458 multu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3);
1459 addu $25,$1
1460 addu $3,$25
1461 sltu $1,$3,$25
1462 addu $7,$1
1463 mflo ($24,$14,$9)
1464 mfhi ($25,$14,$9)
1465 addu $2,$24
1466 sltu $1,$2,$24
1467 multu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3);
1468 addu $25,$1
1469 addu $3,$25
1470 sltu $1,$3,$25
1471 addu $7,$1
1472 mflo ($24,$15,$8)
1473 mfhi ($25,$15,$8)
1474 addu $2,$24
1475 sltu $1,$2,$24
1476 multu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1);
1477 addu $25,$1
1478 addu $3,$25
1479 sltu $1,$3,$25
1480 addu $7,$1
1481 sw $2,3*4($4)
1482
1483 mflo ($24,$15,$9)
1484 mfhi ($25,$15,$9)
1485 addu $3,$24
1486 sltu $1,$3,$24
1487 multu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1);
1488 addu $25,$1
1489 addu $7,$25
1490 sltu $2,$7,$25
1491 mflo ($24,$14,$10)
1492 mfhi ($25,$14,$10)
1493 addu $3,$24
1494 sltu $1,$3,$24
1495 multu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1);
1496 addu $25,$1
1497 addu $7,$25
1498 sltu $1,$7,$25
1499 addu $2,$1
1500 mflo ($24,$13,$11)
1501 mfhi ($25,$13,$11)
1502 addu $3,$24
1503 sltu $1,$3,$24
1504 multu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2);
1505 addu $25,$1
1506 addu $7,$25
1507 sltu $1,$7,$25
1508 addu $2,$1
1509 sw $3,4*4($4)
1510
1511 mflo ($24,$14,$11)
1512 mfhi ($25,$14,$11)
1513 addu $7,$24
1514 sltu $1,$7,$24
1515 multu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2);
1516 addu $25,$1
1517 addu $2,$25
1518 sltu $3,$2,$25
1519 mflo ($24,$15,$10)
1520 mfhi ($25,$15,$10)
1521 addu $7,$24
1522 sltu $1,$7,$24
1523 multu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3);
1524 addu $25,$1
1525 addu $2,$25
1526 sltu $1,$2,$25
1527 addu $3,$1
1528 sw $7,5*4($4)
1529
1530 mflo ($24,$15,$11)
1531 mfhi ($25,$15,$11)
1532 addu $2,$24
1533 sltu $1,$2,$24
1534 addu $25,$1
1535 addu $3,$25
1536 sw $2,6*4($4)
1537 sw $3,7*4($4)
1538
1539 .set noreorder
1540 jr $31
1541 nop
1542 .end bn_mul_comba4
1543
1544 .align 5
1545 .globl bn_sqr_comba8
1546 .ent bn_sqr_comba8
1547 bn_sqr_comba8:
1548 .set reorder
1549 lw $12,0($5)
1550 lw $13,4($5)
1551 lw $14,2*4($5)
1552 lw $15,3*4($5)
1553
1554 multu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3);
1555 lw $8,4*4($5)
1556 lw $9,5*4($5)
1557 lw $10,6*4($5)
1558 lw $11,7*4($5)
1559 mflo ($2,$12,$12)
1560 mfhi ($3,$12,$12)
1561 sw $2,0($4)
1562
1563 multu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1);
1564 mflo ($24,$12,$13)
1565 mfhi ($25,$12,$13)
1566 slt $2,$25,$0
1567 sll $25,1
1568 multu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2);
1569 slt $6,$24,$0
1570 addu $25,$6
1571 sll $24,1
1572 addu $3,$24
1573 sltu $1,$3,$24
1574 addu $7,$25,$1
1575 sw $3,4($4)
1576 mflo ($24,$14,$12)
1577 mfhi ($25,$14,$12)
1578 addu $7,$24
1579 sltu $1,$7,$24
1580 multu ($13,$13) # forward multiplication
1581 addu $7,$24
1582 addu $1,$25
1583 sltu $24,$7,$24
1584 addu $2,$1
1585 addu $25,$24
1586 sltu $3,$2,$1
1587 addu $2,$25
1588 sltu $25,$2,$25
1589 addu $3,$25
1590 mflo ($24,$13,$13)
1591 mfhi ($25,$13,$13)
1592 addu $7,$24
1593 sltu $1,$7,$24
1594 multu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3);
1595 addu $25,$1
1596 addu $2,$25
1597 sltu $1,$2,$25
1598 addu $3,$1
1599 sw $7,2*4($4)
1600 mflo ($24,$12,$15)
1601 mfhi ($25,$12,$15)
1602 addu $2,$24
1603 sltu $1,$2,$24
1604 multu ($13,$14) # forward multiplication
1605 addu $2,$24
1606 addu $1,$25
1607 sltu $24,$2,$24
1608 addu $3,$1
1609 addu $25,$24
1610 sltu $7,$3,$1
1611 addu $3,$25
1612 sltu $25,$3,$25
1613 addu $7,$25
1614 mflo ($24,$13,$14)
1615 mfhi ($25,$13,$14)
1616 addu $2,$24
1617 sltu $1,$2,$24
1618 multu ($8,$12) # forward multiplication
1619 addu $2,$24
1620 addu $1,$25
1621 sltu $24,$2,$24
1622 addu $3,$1
1623 addu $25,$24
1624 sltu $1,$3,$1
1625 addu $3,$25
1626 addu $7,$1
1627 sltu $25,$3,$25
1628 addu $7,$25
1629 mflo ($24,$8,$12)
1630 mfhi ($25,$8,$12)
1631 sw $2,3*4($4)
1632 addu $3,$24
1633 sltu $1,$3,$24
1634 multu ($15,$13) # forward multiplication
1635 addu $3,$24
1636 addu $1,$25
1637 sltu $24,$3,$24
1638 addu $7,$1
1639 addu $25,$24
1640 sltu $2,$7,$1
1641 addu $7,$25
1642 sltu $25,$7,$25
1643 addu $2,$25
1644 mflo ($24,$15,$13)
1645 mfhi ($25,$15,$13)
1646 addu $3,$24
1647 sltu $1,$3,$24
1648 multu ($14,$14) # forward multiplication
1649 addu $3,$24
1650 addu $1,$25
1651 sltu $24,$3,$24
1652 addu $7,$1
1653 addu $25,$24
1654 sltu $1,$7,$1
1655 addu $7,$25
1656 addu $2,$1
1657 sltu $25,$7,$25
1658 addu $2,$25
1659 mflo ($24,$14,$14)
1660 mfhi ($25,$14,$14)
1661 addu $3,$24
1662 sltu $1,$3,$24
1663 multu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2);
1664 addu $25,$1
1665 addu $7,$25
1666 sltu $1,$7,$25
1667 addu $2,$1
1668 sw $3,4*4($4)
1669 mflo ($24,$12,$9)
1670 mfhi ($25,$12,$9)
1671 addu $7,$24
1672 sltu $1,$7,$24
1673 multu ($13,$8) # forward multiplication
1674 addu $7,$24
1675 addu $1,$25
1676 sltu $24,$7,$24
1677 addu $2,$1
1678 addu $25,$24
1679 sltu $3,$2,$1
1680 addu $2,$25
1681 sltu $25,$2,$25
1682 addu $3,$25
1683 mflo ($24,$13,$8)
1684 mfhi ($25,$13,$8)
1685 addu $7,$24
1686 sltu $1,$7,$24
1687 multu ($14,$15) # forward multiplication
1688 addu $7,$24
1689 addu $1,$25
1690 sltu $24,$7,$24
1691 addu $2,$1
1692 addu $25,$24
1693 sltu $1,$2,$1
1694 addu $2,$25
1695 addu $3,$1
1696 sltu $25,$2,$25
1697 addu $3,$25
1698 mflo ($24,$14,$15)
1699 mfhi ($25,$14,$15)
1700 addu $7,$24
1701 sltu $1,$7,$24
1702 multu ($10,$12) # forward multiplication
1703 addu $7,$24
1704 addu $1,$25
1705 sltu $24,$7,$24
1706 addu $2,$1
1707 addu $25,$24
1708 sltu $1,$2,$1
1709 addu $2,$25
1710 addu $3,$1
1711 sltu $25,$2,$25
1712 addu $3,$25
1713 mflo ($24,$10,$12)
1714 mfhi ($25,$10,$12)
1715 sw $7,5*4($4)
1716 addu $2,$24
1717 sltu $1,$2,$24
1718 multu ($9,$13) # forward multiplication
1719 addu $2,$24
1720 addu $1,$25
1721 sltu $24,$2,$24
1722 addu $3,$1
1723 addu $25,$24
1724 sltu $7,$3,$1
1725 addu $3,$25
1726 sltu $25,$3,$25
1727 addu $7,$25
1728 mflo ($24,$9,$13)
1729 mfhi ($25,$9,$13)
1730 addu $2,$24
1731 sltu $1,$2,$24
1732 multu ($8,$14) # forward multiplication
1733 addu $2,$24
1734 addu $1,$25
1735 sltu $24,$2,$24
1736 addu $3,$1
1737 addu $25,$24
1738 sltu $1,$3,$1
1739 addu $3,$25
1740 addu $7,$1
1741 sltu $25,$3,$25
1742 addu $7,$25
1743 mflo ($24,$8,$14)
1744 mfhi ($25,$8,$14)
1745 addu $2,$24
1746 sltu $1,$2,$24
1747 multu ($15,$15) # forward multiplication
1748 addu $2,$24
1749 addu $1,$25
1750 sltu $24,$2,$24
1751 addu $3,$1
1752 addu $25,$24
1753 sltu $1,$3,$1
1754 addu $3,$25
1755 addu $7,$1
1756 sltu $25,$3,$25
1757 addu $7,$25
1758 mflo ($24,$15,$15)
1759 mfhi ($25,$15,$15)
1760 addu $2,$24
1761 sltu $1,$2,$24
1762 multu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1);
1763 addu $25,$1
1764 addu $3,$25
1765 sltu $1,$3,$25
1766 addu $7,$1
1767 sw $2,6*4($4)
1768 mflo ($24,$12,$11)
1769 mfhi ($25,$12,$11)
1770 addu $3,$24
1771 sltu $1,$3,$24
1772 multu ($13,$10) # forward multiplication
1773 addu $3,$24
1774 addu $1,$25
1775 sltu $24,$3,$24
1776 addu $7,$1
1777 addu $25,$24
1778 sltu $2,$7,$1
1779 addu $7,$25
1780 sltu $25,$7,$25
1781 addu $2,$25
1782 mflo ($24,$13,$10)
1783 mfhi ($25,$13,$10)
1784 addu $3,$24
1785 sltu $1,$3,$24
1786 multu ($14,$9) # forward multiplication
1787 addu $3,$24
1788 addu $1,$25
1789 sltu $24,$3,$24
1790 addu $7,$1
1791 addu $25,$24
1792 sltu $1,$7,$1
1793 addu $7,$25
1794 addu $2,$1
1795 sltu $25,$7,$25
1796 addu $2,$25
1797 mflo ($24,$14,$9)
1798 mfhi ($25,$14,$9)
1799 addu $3,$24
1800 sltu $1,$3,$24
1801 multu ($15,$8) # forward multiplication
1802 addu $3,$24
1803 addu $1,$25
1804 sltu $24,$3,$24
1805 addu $7,$1
1806 addu $25,$24
1807 sltu $1,$7,$1
1808 addu $7,$25
1809 addu $2,$1
1810 sltu $25,$7,$25
1811 addu $2,$25
1812 mflo ($24,$15,$8)
1813 mfhi ($25,$15,$8)
1814 addu $3,$24
1815 sltu $1,$3,$24
1816 multu ($11,$13) # forward multiplication
1817 addu $3,$24
1818 addu $1,$25
1819 sltu $24,$3,$24
1820 addu $7,$1
1821 addu $25,$24
1822 sltu $1,$7,$1
1823 addu $7,$25
1824 addu $2,$1
1825 sltu $25,$7,$25
1826 addu $2,$25
1827 mflo ($24,$11,$13)
1828 mfhi ($25,$11,$13)
1829 sw $3,7*4($4)
1830 addu $7,$24
1831 sltu $1,$7,$24
1832 multu ($10,$14) # forward multiplication
1833 addu $7,$24
1834 addu $1,$25
1835 sltu $24,$7,$24
1836 addu $2,$1
1837 addu $25,$24
1838 sltu $3,$2,$1
1839 addu $2,$25
1840 sltu $25,$2,$25
1841 addu $3,$25
1842 mflo ($24,$10,$14)
1843 mfhi ($25,$10,$14)
1844 addu $7,$24
1845 sltu $1,$7,$24
1846 multu ($9,$15) # forward multiplication
1847 addu $7,$24
1848 addu $1,$25
1849 sltu $24,$7,$24
1850 addu $2,$1
1851 addu $25,$24
1852 sltu $1,$2,$1
1853 addu $2,$25
1854 addu $3,$1
1855 sltu $25,$2,$25
1856 addu $3,$25
1857 mflo ($24,$9,$15)
1858 mfhi ($25,$9,$15)
1859 addu $7,$24
1860 sltu $1,$7,$24
1861 multu ($8,$8) # forward multiplication
1862 addu $7,$24
1863 addu $1,$25
1864 sltu $24,$7,$24
1865 addu $2,$1
1866 addu $25,$24
1867 sltu $1,$2,$1
1868 addu $2,$25
1869 addu $3,$1
1870 sltu $25,$2,$25
1871 addu $3,$25
1872 mflo ($24,$8,$8)
1873 mfhi ($25,$8,$8)
1874 addu $7,$24
1875 sltu $1,$7,$24
1876 multu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3);
1877 addu $25,$1
1878 addu $2,$25
1879 sltu $1,$2,$25
1880 addu $3,$1
1881 sw $7,8*4($4)
1882 mflo ($24,$14,$11)
1883 mfhi ($25,$14,$11)
1884 addu $2,$24
1885 sltu $1,$2,$24
1886 multu ($15,$10) # forward multiplication
1887 addu $2,$24
1888 addu $1,$25
1889 sltu $24,$2,$24
1890 addu $3,$1
1891 addu $25,$24
1892 sltu $7,$3,$1
1893 addu $3,$25
1894 sltu $25,$3,$25
1895 addu $7,$25
1896 mflo ($24,$15,$10)
1897 mfhi ($25,$15,$10)
1898 addu $2,$24
1899 sltu $1,$2,$24
1900 multu ($8,$9) # forward multiplication
1901 addu $2,$24
1902 addu $1,$25
1903 sltu $24,$2,$24
1904 addu $3,$1
1905 addu $25,$24
1906 sltu $1,$3,$1
1907 addu $3,$25
1908 addu $7,$1
1909 sltu $25,$3,$25
1910 addu $7,$25
1911 mflo ($24,$8,$9)
1912 mfhi ($25,$8,$9)
1913 addu $2,$24
1914 sltu $1,$2,$24
1915 multu ($11,$15) # forward multiplication
1916 addu $2,$24
1917 addu $1,$25
1918 sltu $24,$2,$24
1919 addu $3,$1
1920 addu $25,$24
1921 sltu $1,$3,$1
1922 addu $3,$25
1923 addu $7,$1
1924 sltu $25,$3,$25
1925 addu $7,$25
1926 mflo ($24,$11,$15)
1927 mfhi ($25,$11,$15)
1928 sw $2,9*4($4)
1929 addu $3,$24
1930 sltu $1,$3,$24
1931 multu ($10,$8) # forward multiplication
1932 addu $3,$24
1933 addu $1,$25
1934 sltu $24,$3,$24
1935 addu $7,$1
1936 addu $25,$24
1937 sltu $2,$7,$1
1938 addu $7,$25
1939 sltu $25,$7,$25
1940 addu $2,$25
1941 mflo ($24,$10,$8)
1942 mfhi ($25,$10,$8)
1943 addu $3,$24
1944 sltu $1,$3,$24
1945 multu ($9,$9) # forward multiplication
1946 addu $3,$24
1947 addu $1,$25
1948 sltu $24,$3,$24
1949 addu $7,$1
1950 addu $25,$24
1951 sltu $1,$7,$1
1952 addu $7,$25
1953 addu $2,$1
1954 sltu $25,$7,$25
1955 addu $2,$25
1956 mflo ($24,$9,$9)
1957 mfhi ($25,$9,$9)
1958 addu $3,$24
1959 sltu $1,$3,$24
1960 multu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2);
1961 addu $25,$1
1962 addu $7,$25
1963 sltu $1,$7,$25
1964 addu $2,$1
1965 sw $3,10*4($4)
1966 mflo ($24,$8,$11)
1967 mfhi ($25,$8,$11)
1968 addu $7,$24
1969 sltu $1,$7,$24
1970 multu ($9,$10) # forward multiplication
1971 addu $7,$24
1972 addu $1,$25
1973 sltu $24,$7,$24
1974 addu $2,$1
1975 addu $25,$24
1976 sltu $3,$2,$1
1977 addu $2,$25
1978 sltu $25,$2,$25
1979 addu $3,$25
1980 mflo ($24,$9,$10)
1981 mfhi ($25,$9,$10)
1982 addu $7,$24
1983 sltu $1,$7,$24
1984 multu ($11,$9) # forward multiplication
1985 addu $7,$24
1986 addu $1,$25
1987 sltu $24,$7,$24
1988 addu $2,$1
1989 addu $25,$24
1990 sltu $1,$2,$1
1991 addu $2,$25
1992 addu $3,$1
1993 sltu $25,$2,$25
1994 addu $3,$25
1995 mflo ($24,$11,$9)
1996 mfhi ($25,$11,$9)
1997 sw $7,11*4($4)
1998 addu $2,$24
1999 sltu $1,$2,$24
2000 multu ($10,$10) # forward multiplication
2001 addu $2,$24
2002 addu $1,$25
2003 sltu $24,$2,$24
2004 addu $3,$1
2005 addu $25,$24
2006 sltu $7,$3,$1
2007 addu $3,$25
2008 sltu $25,$3,$25
2009 addu $7,$25
2010 mflo ($24,$10,$10)
2011 mfhi ($25,$10,$10)
2012 addu $2,$24
2013 sltu $1,$2,$24
2014 multu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1);
2015 addu $25,$1
2016 addu $3,$25
2017 sltu $1,$3,$25
2018 addu $7,$1
2019 sw $2,12*4($4)
2020 mflo ($24,$10,$11)
2021 mfhi ($25,$10,$11)
2022 addu $3,$24
2023 sltu $1,$3,$24
2024 multu ($11,$11) # forward multiplication
2025 addu $3,$24
2026 addu $1,$25
2027 sltu $24,$3,$24
2028 addu $7,$1
2029 addu $25,$24
2030 sltu $2,$7,$1
2031 addu $7,$25
2032 sltu $25,$7,$25
2033 addu $2,$25
2034 mflo ($24,$11,$11)
2035 mfhi ($25,$11,$11)
2036 sw $3,13*4($4)
2037
2038 addu $7,$24
2039 sltu $1,$7,$24
2040 addu $25,$1
2041 addu $2,$25
2042 sw $7,14*4($4)
2043 sw $2,15*4($4)
2044
2045 .set noreorder
2046 jr $31
2047 nop
2048 .end bn_sqr_comba8
2049
2050 .align 5
2051 .globl bn_sqr_comba4
2052 .ent bn_sqr_comba4
2053 bn_sqr_comba4:
2054 .set reorder
2055 lw $12,0($5)
2056 lw $13,4($5)
2057 multu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3);
2058 lw $14,2*4($5)
2059 lw $15,3*4($5)
2060 mflo ($2,$12,$12)
2061 mfhi ($3,$12,$12)
2062 sw $2,0($4)
2063
2064 multu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1);
2065 mflo ($24,$12,$13)
2066 mfhi ($25,$12,$13)
2067 slt $2,$25,$0
2068 sll $25,1
2069 multu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2);
2070 slt $6,$24,$0
2071 addu $25,$6
2072 sll $24,1
2073 addu $3,$24
2074 sltu $1,$3,$24
2075 addu $7,$25,$1
2076 sw $3,4($4)
2077 mflo ($24,$14,$12)
2078 mfhi ($25,$14,$12)
2079 addu $7,$24
2080 sltu $1,$7,$24
2081 multu ($13,$13) # forward multiplication
2082 addu $7,$24
2083 addu $1,$25
2084 sltu $24,$7,$24
2085 addu $2,$1
2086 addu $25,$24
2087 sltu $3,$2,$1
2088 addu $2,$25
2089 sltu $25,$2,$25
2090 addu $3,$25
2091 mflo ($24,$13,$13)
2092 mfhi ($25,$13,$13)
2093 addu $7,$24
2094 sltu $1,$7,$24
2095 multu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3);
2096 addu $25,$1
2097 addu $2,$25
2098 sltu $1,$2,$25
2099 addu $3,$1
2100 sw $7,2*4($4)
2101 mflo ($24,$12,$15)
2102 mfhi ($25,$12,$15)
2103 addu $2,$24
2104 sltu $1,$2,$24
2105 multu ($13,$14) # forward multiplication
2106 addu $2,$24
2107 addu $1,$25
2108 sltu $24,$2,$24
2109 addu $3,$1
2110 addu $25,$24
2111 sltu $7,$3,$1
2112 addu $3,$25
2113 sltu $25,$3,$25
2114 addu $7,$25
2115 mflo ($24,$13,$14)
2116 mfhi ($25,$13,$14)
2117 addu $2,$24
2118 sltu $1,$2,$24
2119 multu ($15,$13) # forward multiplication
2120 addu $2,$24
2121 addu $1,$25
2122 sltu $24,$2,$24
2123 addu $3,$1
2124 addu $25,$24
2125 sltu $1,$3,$1
2126 addu $3,$25
2127 addu $7,$1
2128 sltu $25,$3,$25
2129 addu $7,$25
2130 mflo ($24,$15,$13)
2131 mfhi ($25,$15,$13)
2132 sw $2,3*4($4)
2133 addu $3,$24
2134 sltu $1,$3,$24
2135 multu ($14,$14) # forward multiplication
2136 addu $3,$24
2137 addu $1,$25
2138 sltu $24,$3,$24
2139 addu $7,$1
2140 addu $25,$24
2141 sltu $2,$7,$1
2142 addu $7,$25
2143 sltu $25,$7,$25
2144 addu $2,$25
2145 mflo ($24,$14,$14)
2146 mfhi ($25,$14,$14)
2147 addu $3,$24
2148 sltu $1,$3,$24
2149 multu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2);
2150 addu $25,$1
2151 addu $7,$25
2152 sltu $1,$7,$25
2153 addu $2,$1
2154 sw $3,4*4($4)
2155 mflo ($24,$14,$15)
2156 mfhi ($25,$14,$15)
2157 addu $7,$24
2158 sltu $1,$7,$24
2159 multu ($15,$15) # forward multiplication
2160 addu $7,$24
2161 addu $1,$25
2162 sltu $24,$7,$24
2163 addu $2,$1
2164 addu $25,$24
2165 sltu $3,$2,$1
2166 addu $2,$25
2167 sltu $25,$2,$25
2168 addu $3,$25
2169 mflo ($24,$15,$15)
2170 mfhi ($25,$15,$15)
2171 sw $7,5*4($4)
2172
2173 addu $2,$24
2174 sltu $1,$2,$24
2175 addu $25,$1
2176 addu $3,$25
2177 sw $2,6*4($4)
2178 sw $3,7*4($4)
2179
2180 .set noreorder
2181 jr $31
2182 nop
2183 .end bn_sqr_comba4
2184