Home | History | Annotate | Line # | Download | only in pa
milli64.S revision 1.1.1.2
      1 /* 32 and 64-bit millicode, original author Hewlett-Packard
      2    adapted for gcc by Paul Bame <bame (at) debian.org>
      3    and Alan Modra <alan (at) linuxcare.com.au>.
      4 
      5    Copyright (C) 2001-2015 Free Software Foundation, Inc.
      6 
      7 This file is part of GCC.
      8 
      9 GCC is free software; you can redistribute it and/or modify it under
     10 the terms of the GNU General Public License as published by the Free
     11 Software Foundation; either version 3, or (at your option) any later
     12 version.
     13 
     14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
     16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     17 for more details.
     18 
     19 Under Section 7 of GPL version 3, you are granted additional
     20 permissions described in the GCC Runtime Library Exception, version
     21 3.1, as published by the Free Software Foundation.
     22 
     23 You should have received a copy of the GNU General Public License and
     24 a copy of the GCC Runtime Library Exception along with this program;
     25 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     26 <http://www.gnu.org/licenses/>.  */
     27 
     28 #ifdef pa64
     29         .level  2.0w
     30 #endif
     31 
     32 /* Hardware General Registers.  */
     33 r0:	.reg	%r0
     34 r1:	.reg	%r1
     35 r2:	.reg	%r2
     36 r3:	.reg	%r3
     37 r4:	.reg	%r4
     38 r5:	.reg	%r5
     39 r6:	.reg	%r6
     40 r7:	.reg	%r7
     41 r8:	.reg	%r8
     42 r9:	.reg	%r9
     43 r10:	.reg	%r10
     44 r11:	.reg	%r11
     45 r12:	.reg	%r12
     46 r13:	.reg	%r13
     47 r14:	.reg	%r14
     48 r15:	.reg	%r15
     49 r16:	.reg	%r16
     50 r17:	.reg	%r17
     51 r18:	.reg	%r18
     52 r19:	.reg	%r19
     53 r20:	.reg	%r20
     54 r21:	.reg	%r21
     55 r22:	.reg	%r22
     56 r23:	.reg	%r23
     57 r24:	.reg	%r24
     58 r25:	.reg	%r25
     59 r26:	.reg	%r26
     60 r27:	.reg	%r27
     61 r28:	.reg	%r28
     62 r29:	.reg	%r29
     63 r30:	.reg	%r30
     64 r31:	.reg	%r31
     65 
     66 /* Hardware Space Registers.  */
     67 sr0:	.reg	%sr0
     68 sr1:	.reg	%sr1
     69 sr2:	.reg	%sr2
     70 sr3:	.reg	%sr3
     71 sr4:	.reg	%sr4
     72 sr5:	.reg	%sr5
     73 sr6:	.reg	%sr6
     74 sr7:	.reg	%sr7
     75 
     76 /* Hardware Floating Point Registers.  */
     77 fr0:	.reg	%fr0
     78 fr1:	.reg	%fr1
     79 fr2:	.reg	%fr2
     80 fr3:	.reg	%fr3
     81 fr4:	.reg	%fr4
     82 fr5:	.reg	%fr5
     83 fr6:	.reg	%fr6
     84 fr7:	.reg	%fr7
     85 fr8:	.reg	%fr8
     86 fr9:	.reg	%fr9
     87 fr10:	.reg	%fr10
     88 fr11:	.reg	%fr11
     89 fr12:	.reg	%fr12
     90 fr13:	.reg	%fr13
     91 fr14:	.reg	%fr14
     92 fr15:	.reg	%fr15
     93 
     94 /* Hardware Control Registers.  */
     95 cr11:	.reg	%cr11
     96 sar:	.reg	%cr11	/* Shift Amount Register */
     97 
     98 /* Software Architecture General Registers.  */
     99 rp:	.reg    r2	/* return pointer */
    100 #ifdef pa64
    101 mrp:	.reg	r2 	/* millicode return pointer */
    102 #else
    103 mrp:	.reg	r31	/* millicode return pointer */
    104 #endif
    105 ret0:	.reg    r28	/* return value */
    106 ret1:	.reg    r29	/* return value (high part of double) */
    107 sp:	.reg 	r30	/* stack pointer */
    108 dp:	.reg	r27	/* data pointer */
    109 arg0:	.reg	r26	/* argument */
    110 arg1:	.reg	r25	/* argument or high part of double argument */
    111 arg2:	.reg	r24	/* argument */
    112 arg3:	.reg	r23	/* argument or high part of double argument */
    113 
    114 /* Software Architecture Space Registers.  */
    115 /* 		sr0	; return link from BLE */
    116 sret:	.reg	sr1	/* return value */
    117 sarg:	.reg	sr1	/* argument */
    118 /* 		sr4	; PC SPACE tracker */
    119 /* 		sr5	; process private data */
    120 
    121 /* Frame Offsets (millicode convention!)  Used when calling other
    122    millicode routines.  Stack unwinding is dependent upon these
    123    definitions.  */
    124 r31_slot:	.equ	-20	/* "current RP" slot */
    125 sr0_slot:	.equ	-16     /* "static link" slot */
    126 #if defined(pa64)
    127 mrp_slot:       .equ    -16	/* "current RP" slot */
    128 psp_slot:       .equ    -8	/* "previous SP" slot */
    129 #else
    130 mrp_slot:	.equ	-20     /* "current RP" slot (replacing "r31_slot") */
    131 #endif
    132 
    133 
    134 #define DEFINE(name,value)name:	.EQU	value
    135 #define RDEFINE(name,value)name:	.REG	value
    136 #ifdef milliext
    137 #define MILLI_BE(lbl)   BE    lbl(sr7,r0)
    138 #define MILLI_BEN(lbl)  BE,n  lbl(sr7,r0)
    139 #define MILLI_BLE(lbl)	BLE   lbl(sr7,r0)
    140 #define MILLI_BLEN(lbl)	BLE,n lbl(sr7,r0)
    141 #define MILLIRETN	BE,n  0(sr0,mrp)
    142 #define MILLIRET	BE    0(sr0,mrp)
    143 #define MILLI_RETN	BE,n  0(sr0,mrp)
    144 #define MILLI_RET	BE    0(sr0,mrp)
    145 #else
    146 #define MILLI_BE(lbl)	B     lbl
    147 #define MILLI_BEN(lbl)  B,n   lbl
    148 #define MILLI_BLE(lbl)	BL    lbl,mrp
    149 #define MILLI_BLEN(lbl)	BL,n  lbl,mrp
    150 #define MILLIRETN	BV,n  0(mrp)
    151 #define MILLIRET	BV    0(mrp)
    152 #define MILLI_RETN	BV,n  0(mrp)
    153 #define MILLI_RET	BV    0(mrp)
    154 #endif
    155 
    156 #ifdef __STDC__
    157 #define CAT(a,b)	a##b
    158 #else
    159 #define CAT(a,b)	a/**/b
    160 #endif
    161 
    162 #ifdef ELF
    163 #define SUBSPA_MILLI	 .section .text
    164 #define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
    165 #define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
    166 #define ATTR_MILLI
    167 #define SUBSPA_DATA	 .section .data
    168 #define ATTR_DATA
    169 #define GLOBAL		 $global$
    170 #define GSYM(sym) 	 !sym:
    171 #define LSYM(sym)	 !CAT(.L,sym:)
    172 #define LREF(sym)	 CAT(.L,sym)
    173 
    174 #else
    175 
    176 #ifdef coff
    177 /* This used to be .milli but since link32 places different named
    178    sections in different segments millicode ends up a long ways away
    179    from .text (1meg?).  This way they will be a lot closer.
    180 
    181    The SUBSPA_MILLI_* specify locality sets for certain millicode
    182    modules in order to ensure that modules that call one another are
    183    placed close together. Without locality sets this is unlikely to
    184    happen because of the Dynamite linker library search algorithm. We
    185    want these modules close together so that short calls always reach
    186    (we don't want to require long calls or use long call stubs).  */
    187 
    188 #define SUBSPA_MILLI	 .subspa .text
    189 #define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
    190 #define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
    191 #define ATTR_MILLI	 .attr code,read,execute
    192 #define SUBSPA_DATA	 .subspa .data
    193 #define ATTR_DATA	 .attr init_data,read,write
    194 #define GLOBAL		 _gp
    195 #else
    196 #define SUBSPA_MILLI	 .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
    197 #define SUBSPA_MILLI_DIV SUBSPA_MILLI
    198 #define SUBSPA_MILLI_MUL SUBSPA_MILLI
    199 #define ATTR_MILLI
    200 #define SUBSPA_DATA	 .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
    201 #define ATTR_DATA
    202 #define GLOBAL		 $global$
    203 #endif
    204 #define SPACE_DATA	 .space $PRIVATE$,spnum=1,sort=16
    205 
    206 #define GSYM(sym)	 !sym
    207 #define LSYM(sym)	 !CAT(L$,sym)
    208 #define LREF(sym)	 CAT(L$,sym)
    209 #endif
    210 
    211 #ifdef L_dyncall
    212 	SUBSPA_MILLI
    213 	ATTR_DATA
    214 GSYM($$dyncall)
    215 	.export $$dyncall,millicode
    216 	.proc
    217 	.callinfo	millicode
    218 	.entry
    219 	bb,>=,n %r22,30,LREF(1)		; branch if not plabel address
    220 	depi	0,31,2,%r22		; clear the two least significant bits
    221 	ldw	4(%r22),%r19		; load new LTP value
    222 	ldw	0(%r22),%r22		; load address of target
    223 LSYM(1)
    224 #ifdef LINUX
    225 	bv	%r0(%r22)		; branch to the real target
    226 #else
    227 	ldsid	(%sr0,%r22),%r1		; get the "space ident" selected by r22
    228 	mtsp	%r1,%sr0		; move that space identifier into sr0
    229 	be	0(%sr0,%r22)		; branch to the real target
    230 #endif
    231 	stw	%r2,-24(%r30)		; save return address into frame marker
    232 	.exit
    233 	.procend
    234 #endif
    235 
    236 #ifdef L_divI
    237 /* ROUTINES:	$$divI, $$divoI
    238 
    239    Single precision divide for signed binary integers.
    240 
    241    The quotient is truncated towards zero.
    242    The sign of the quotient is the XOR of the signs of the dividend and
    243    divisor.
    244    Divide by zero is trapped.
    245    Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
    246 
    247    INPUT REGISTERS:
    248    .	arg0 ==	dividend
    249    .	arg1 ==	divisor
    250    .	mrp  == return pc
    251    .	sr0  == return space when called externally
    252 
    253    OUTPUT REGISTERS:
    254    .	arg0 =	undefined
    255    .	arg1 =	undefined
    256    .	ret1 =	quotient
    257 
    258    OTHER REGISTERS AFFECTED:
    259    .	r1   =	undefined
    260 
    261    SIDE EFFECTS:
    262    .	Causes a trap under the following conditions:
    263    .		divisor is zero  (traps with ADDIT,=  0,25,0)
    264    .		dividend==-2**31  and divisor==-1 and routine is $$divoI
    265    .				 (traps with ADDO  26,25,0)
    266    .	Changes memory at the following places:
    267    .		NONE
    268 
    269    PERMISSIBLE CONTEXT:
    270    .	Unwindable.
    271    .	Suitable for internal or external millicode.
    272    .	Assumes the special millicode register conventions.
    273 
    274    DISCUSSION:
    275    .	Branchs to other millicode routines using BE
    276    .		$$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
    277    .
    278    .	For selected divisors, calls a divide by constant routine written by
    279    .	Karl Pettis.  Eligible divisors are 1..15 excluding 11 and 13.
    280    .
    281    .	The only overflow case is -2**31 divided by -1.
    282    .	Both routines return -2**31 but only $$divoI traps.  */
    283 
    284 RDEFINE(temp,r1)
    285 RDEFINE(retreg,ret1)	/*  r29 */
    286 RDEFINE(temp1,arg0)
    287 	SUBSPA_MILLI_DIV
    288 	ATTR_MILLI
    289 	.import $$divI_2,millicode
    290 	.import $$divI_3,millicode
    291 	.import $$divI_4,millicode
    292 	.import $$divI_5,millicode
    293 	.import $$divI_6,millicode
    294 	.import $$divI_7,millicode
    295 	.import $$divI_8,millicode
    296 	.import $$divI_9,millicode
    297 	.import $$divI_10,millicode
    298 	.import $$divI_12,millicode
    299 	.import $$divI_14,millicode
    300 	.import $$divI_15,millicode
    301 	.export $$divI,millicode
    302 	.export	$$divoI,millicode
    303 	.proc
    304 	.callinfo	millicode
    305 	.entry
    306 GSYM($$divoI)
    307 	comib,=,n  -1,arg1,LREF(negative1)	/*  when divisor == -1 */
    308 GSYM($$divI)
    309 	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
    310 	and,<>	arg1,temp,r0		/*  if not, don't use power of 2 divide */
    311 	addi,>	0,arg1,r0		/*  if divisor > 0, use power of 2 divide */
    312 	b,n	LREF(neg_denom)
    313 LSYM(pow2)
    314 	addi,>=	0,arg0,retreg		/*  if numerator is negative, add the */
    315 	add	arg0,temp,retreg	/*  (denominaotr -1) to correct for shifts */
    316 	extru,=	arg1,15,16,temp		/*  test denominator with 0xffff0000 */
    317 	extrs	retreg,15,16,retreg	/*  retreg = retreg >> 16 */
    318 	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 16) */
    319 	ldi	0xcc,temp1		/*  setup 0xcc in temp1 */
    320 	extru,= arg1,23,8,temp		/*  test denominator with 0xff00 */
    321 	extrs	retreg,23,24,retreg	/*  retreg = retreg >> 8 */
    322 	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 8) */
    323 	ldi	0xaa,temp		/*  setup 0xaa in temp */
    324 	extru,= arg1,27,4,r0		/*  test denominator with 0xf0 */
    325 	extrs	retreg,27,28,retreg	/*  retreg = retreg >> 4 */
    326 	and,=	arg1,temp1,r0		/*  test denominator with 0xcc */
    327 	extrs	retreg,29,30,retreg	/*  retreg = retreg >> 2 */
    328 	and,=	arg1,temp,r0		/*  test denominator with 0xaa */
    329 	extrs	retreg,30,31,retreg	/*  retreg = retreg >> 1 */
    330 	MILLIRETN
    331 LSYM(neg_denom)
    332 	addi,<	0,arg1,r0		/*  if arg1 >= 0, it's not power of 2 */
    333 	b,n	LREF(regular_seq)
    334 	sub	r0,arg1,temp		/*  make denominator positive */
    335 	comb,=,n  arg1,temp,LREF(regular_seq)	/*  test against 0x80000000 and 0 */
    336 	ldo	-1(temp),retreg		/*  is there at most one bit set ? */
    337 	and,=	temp,retreg,r0		/*  if so, the denominator is power of 2 */
    338 	b,n	LREF(regular_seq)
    339 	sub	r0,arg0,retreg		/*  negate numerator */
    340 	comb,=,n arg0,retreg,LREF(regular_seq) /*  test against 0x80000000 */
    341 	copy	retreg,arg0		/*  set up arg0, arg1 and temp	*/
    342 	copy	temp,arg1		/*  before branching to pow2 */
    343 	b	LREF(pow2)
    344 	ldo	-1(arg1),temp
    345 LSYM(regular_seq)
    346 	comib,>>=,n 15,arg1,LREF(small_divisor)
    347 	add,>=	0,arg0,retreg		/*  move dividend, if retreg < 0, */
    348 LSYM(normal)
    349 	subi	0,retreg,retreg		/*    make it positive */
    350 	sub	0,arg1,temp		/*  clear carry,  */
    351 					/*    negate the divisor */
    352 	ds	0,temp,0		/*  set V-bit to the comple- */
    353 					/*    ment of the divisor sign */
    354 	add	retreg,retreg,retreg	/*  shift msb bit into carry */
    355 	ds	r0,arg1,temp		/*  1st divide step, if no carry */
    356 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    357 	ds	temp,arg1,temp		/*  2nd divide step */
    358 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    359 	ds	temp,arg1,temp		/*  3rd divide step */
    360 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    361 	ds	temp,arg1,temp		/*  4th divide step */
    362 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    363 	ds	temp,arg1,temp		/*  5th divide step */
    364 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    365 	ds	temp,arg1,temp		/*  6th divide step */
    366 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    367 	ds	temp,arg1,temp		/*  7th divide step */
    368 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    369 	ds	temp,arg1,temp		/*  8th divide step */
    370 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    371 	ds	temp,arg1,temp		/*  9th divide step */
    372 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    373 	ds	temp,arg1,temp		/*  10th divide step */
    374 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    375 	ds	temp,arg1,temp		/*  11th divide step */
    376 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    377 	ds	temp,arg1,temp		/*  12th divide step */
    378 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    379 	ds	temp,arg1,temp		/*  13th divide step */
    380 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    381 	ds	temp,arg1,temp		/*  14th divide step */
    382 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    383 	ds	temp,arg1,temp		/*  15th divide step */
    384 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    385 	ds	temp,arg1,temp		/*  16th divide step */
    386 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    387 	ds	temp,arg1,temp		/*  17th divide step */
    388 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    389 	ds	temp,arg1,temp		/*  18th divide step */
    390 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    391 	ds	temp,arg1,temp		/*  19th divide step */
    392 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    393 	ds	temp,arg1,temp		/*  20th divide step */
    394 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    395 	ds	temp,arg1,temp		/*  21st divide step */
    396 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    397 	ds	temp,arg1,temp		/*  22nd divide step */
    398 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    399 	ds	temp,arg1,temp		/*  23rd divide step */
    400 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    401 	ds	temp,arg1,temp		/*  24th divide step */
    402 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    403 	ds	temp,arg1,temp		/*  25th divide step */
    404 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    405 	ds	temp,arg1,temp		/*  26th divide step */
    406 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    407 	ds	temp,arg1,temp		/*  27th divide step */
    408 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    409 	ds	temp,arg1,temp		/*  28th divide step */
    410 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    411 	ds	temp,arg1,temp		/*  29th divide step */
    412 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    413 	ds	temp,arg1,temp		/*  30th divide step */
    414 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    415 	ds	temp,arg1,temp		/*  31st divide step */
    416 	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
    417 	ds	temp,arg1,temp		/*  32nd divide step, */
    418 	addc	retreg,retreg,retreg	/*  shift last retreg bit into retreg */
    419 	xor,>=	arg0,arg1,0		/*  get correct sign of quotient */
    420 	  sub	0,retreg,retreg		/*    based on operand signs */
    421 	MILLIRETN
    422 	nop
    423 
    424 LSYM(small_divisor)
    425 
    426 #if defined(pa64)
    427 /*  Clear the upper 32 bits of the arg1 register.  We are working with	*/
    428 /*  small divisors (and 32-bit integers)   We must not be mislead  */
    429 /*  by "1" bits left in the upper 32 bits.  */
    430 	depd %r0,31,32,%r25
    431 #endif
    432 	blr,n	arg1,r0
    433 	nop
    434 /*  table for divisor == 0,1, ... ,15 */
    435 	addit,=	0,arg1,r0	/*  trap if divisor == 0 */
    436 	nop
    437 	MILLIRET		/*  divisor == 1 */
    438 	copy	arg0,retreg
    439 	MILLI_BEN($$divI_2)	/*  divisor == 2 */
    440 	nop
    441 	MILLI_BEN($$divI_3)	/*  divisor == 3 */
    442 	nop
    443 	MILLI_BEN($$divI_4)	/*  divisor == 4 */
    444 	nop
    445 	MILLI_BEN($$divI_5)	/*  divisor == 5 */
    446 	nop
    447 	MILLI_BEN($$divI_6)	/*  divisor == 6 */
    448 	nop
    449 	MILLI_BEN($$divI_7)	/*  divisor == 7 */
    450 	nop
    451 	MILLI_BEN($$divI_8)	/*  divisor == 8 */
    452 	nop
    453 	MILLI_BEN($$divI_9)	/*  divisor == 9 */
    454 	nop
    455 	MILLI_BEN($$divI_10)	/*  divisor == 10 */
    456 	nop
    457 	b	LREF(normal)		/*  divisor == 11 */
    458 	add,>=	0,arg0,retreg
    459 	MILLI_BEN($$divI_12)	/*  divisor == 12 */
    460 	nop
    461 	b	LREF(normal)		/*  divisor == 13 */
    462 	add,>=	0,arg0,retreg
    463 	MILLI_BEN($$divI_14)	/*  divisor == 14 */
    464 	nop
    465 	MILLI_BEN($$divI_15)	/*  divisor == 15 */
    466 	nop
    467 
    468 LSYM(negative1)
    469 	sub	0,arg0,retreg	/*  result is negation of dividend */
    470 	MILLIRET
    471 	addo	arg0,arg1,r0	/*  trap iff dividend==0x80000000 && divisor==-1 */
    472 	.exit
    473 	.procend
    474 	.end
    475 #endif
    476 
    477 #ifdef L_divU
    478 /* ROUTINE:	$$divU
    479    .
    480    .	Single precision divide for unsigned integers.
    481    .
    482    .	Quotient is truncated towards zero.
    483    .	Traps on divide by zero.
    484 
    485    INPUT REGISTERS:
    486    .	arg0 ==	dividend
    487    .	arg1 ==	divisor
    488    .	mrp  == return pc
    489    .	sr0  == return space when called externally
    490 
    491    OUTPUT REGISTERS:
    492    .	arg0 =	undefined
    493    .	arg1 =	undefined
    494    .	ret1 =	quotient
    495 
    496    OTHER REGISTERS AFFECTED:
    497    .	r1   =	undefined
    498 
    499    SIDE EFFECTS:
    500    .	Causes a trap under the following conditions:
    501    .		divisor is zero
    502    .	Changes memory at the following places:
    503    .		NONE
    504 
    505    PERMISSIBLE CONTEXT:
    506    .	Unwindable.
    507    .	Does not create a stack frame.
    508    .	Suitable for internal or external millicode.
    509    .	Assumes the special millicode register conventions.
    510 
    511    DISCUSSION:
    512    .	Branchs to other millicode routines using BE:
    513    .		$$divU_# for 3,5,6,7,9,10,12,14,15
    514    .
    515    .	For selected small divisors calls the special divide by constant
    516    .	routines written by Karl Pettis.  These are: 3,5,6,7,9,10,12,14,15.  */
    517 
    518 RDEFINE(temp,r1)
    519 RDEFINE(retreg,ret1)	/* r29 */
    520 RDEFINE(temp1,arg0)
    521 	SUBSPA_MILLI_DIV
    522 	ATTR_MILLI
    523 	.export $$divU,millicode
    524 	.import $$divU_3,millicode
    525 	.import $$divU_5,millicode
    526 	.import $$divU_6,millicode
    527 	.import $$divU_7,millicode
    528 	.import $$divU_9,millicode
    529 	.import $$divU_10,millicode
    530 	.import $$divU_12,millicode
    531 	.import $$divU_14,millicode
    532 	.import $$divU_15,millicode
    533 	.proc
    534 	.callinfo	millicode
    535 	.entry
    536 GSYM($$divU)
    537 /* The subtract is not nullified since it does no harm and can be used
    538    by the two cases that branch back to "normal".  */
    539 	ldo	-1(arg1),temp		/* is there at most one bit set ? */
    540 	and,=	arg1,temp,r0		/* if so, denominator is power of 2 */
    541 	b	LREF(regular_seq)
    542 	addit,=	0,arg1,0		/* trap for zero dvr */
    543 	copy	arg0,retreg
    544 	extru,= arg1,15,16,temp		/* test denominator with 0xffff0000 */
    545 	extru	retreg,15,16,retreg	/* retreg = retreg >> 16 */
    546 	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 16) */
    547 	ldi	0xcc,temp1		/* setup 0xcc in temp1 */
    548 	extru,= arg1,23,8,temp		/* test denominator with 0xff00 */
    549 	extru	retreg,23,24,retreg	/* retreg = retreg >> 8 */
    550 	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 8) */
    551 	ldi	0xaa,temp		/* setup 0xaa in temp */
    552 	extru,= arg1,27,4,r0		/* test denominator with 0xf0 */
    553 	extru	retreg,27,28,retreg	/* retreg = retreg >> 4 */
    554 	and,=	arg1,temp1,r0		/* test denominator with 0xcc */
    555 	extru	retreg,29,30,retreg	/* retreg = retreg >> 2 */
    556 	and,=	arg1,temp,r0		/* test denominator with 0xaa */
    557 	extru	retreg,30,31,retreg	/* retreg = retreg >> 1 */
    558 	MILLIRETN
    559 	nop
    560 LSYM(regular_seq)
    561 	comib,>=  15,arg1,LREF(special_divisor)
    562 	subi	0,arg1,temp		/* clear carry, negate the divisor */
    563 	ds	r0,temp,r0		/* set V-bit to 1 */
    564 LSYM(normal)
    565 	add	arg0,arg0,retreg	/* shift msb bit into carry */
    566 	ds	r0,arg1,temp		/* 1st divide step, if no carry */
    567 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    568 	ds	temp,arg1,temp		/* 2nd divide step */
    569 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    570 	ds	temp,arg1,temp		/* 3rd divide step */
    571 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    572 	ds	temp,arg1,temp		/* 4th divide step */
    573 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    574 	ds	temp,arg1,temp		/* 5th divide step */
    575 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    576 	ds	temp,arg1,temp		/* 6th divide step */
    577 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    578 	ds	temp,arg1,temp		/* 7th divide step */
    579 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    580 	ds	temp,arg1,temp		/* 8th divide step */
    581 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    582 	ds	temp,arg1,temp		/* 9th divide step */
    583 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    584 	ds	temp,arg1,temp		/* 10th divide step */
    585 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    586 	ds	temp,arg1,temp		/* 11th divide step */
    587 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    588 	ds	temp,arg1,temp		/* 12th divide step */
    589 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    590 	ds	temp,arg1,temp		/* 13th divide step */
    591 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    592 	ds	temp,arg1,temp		/* 14th divide step */
    593 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    594 	ds	temp,arg1,temp		/* 15th divide step */
    595 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    596 	ds	temp,arg1,temp		/* 16th divide step */
    597 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    598 	ds	temp,arg1,temp		/* 17th divide step */
    599 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    600 	ds	temp,arg1,temp		/* 18th divide step */
    601 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    602 	ds	temp,arg1,temp		/* 19th divide step */
    603 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    604 	ds	temp,arg1,temp		/* 20th divide step */
    605 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    606 	ds	temp,arg1,temp		/* 21st divide step */
    607 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    608 	ds	temp,arg1,temp		/* 22nd divide step */
    609 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    610 	ds	temp,arg1,temp		/* 23rd divide step */
    611 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    612 	ds	temp,arg1,temp		/* 24th divide step */
    613 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    614 	ds	temp,arg1,temp		/* 25th divide step */
    615 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    616 	ds	temp,arg1,temp		/* 26th divide step */
    617 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    618 	ds	temp,arg1,temp		/* 27th divide step */
    619 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    620 	ds	temp,arg1,temp		/* 28th divide step */
    621 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    622 	ds	temp,arg1,temp		/* 29th divide step */
    623 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    624 	ds	temp,arg1,temp		/* 30th divide step */
    625 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    626 	ds	temp,arg1,temp		/* 31st divide step */
    627 	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
    628 	ds	temp,arg1,temp		/* 32nd divide step, */
    629 	MILLIRET
    630 	addc	retreg,retreg,retreg	/* shift last retreg bit into retreg */
    631 
    632 /* Handle the cases where divisor is a small constant or has high bit on.  */
    633 LSYM(special_divisor)
    634 /*	blr	arg1,r0 */
    635 /*	comib,>,n  0,arg1,LREF(big_divisor) ; nullify previous instruction */
    636 
    637 /* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
    638    generating such a blr, comib sequence. A problem in nullification. So I
    639    rewrote this code.  */
    640 
    641 #if defined(pa64)
    642 /* Clear the upper 32 bits of the arg1 register.  We are working with
    643    small divisors (and 32-bit unsigned integers)   We must not be mislead
    644    by "1" bits left in the upper 32 bits.  */
    645 	depd %r0,31,32,%r25
    646 #endif
    647 	comib,>	0,arg1,LREF(big_divisor)
    648 	nop
    649 	blr	arg1,r0
    650 	nop
    651 
    652 LSYM(zero_divisor)	/* this label is here to provide external visibility */
    653 	addit,=	0,arg1,0		/* trap for zero dvr */
    654 	nop
    655 	MILLIRET			/* divisor == 1 */
    656 	copy	arg0,retreg
    657 	MILLIRET			/* divisor == 2 */
    658 	extru	arg0,30,31,retreg
    659 	MILLI_BEN($$divU_3)		/* divisor == 3 */
    660 	nop
    661 	MILLIRET			/* divisor == 4 */
    662 	extru	arg0,29,30,retreg
    663 	MILLI_BEN($$divU_5)		/* divisor == 5 */
    664 	nop
    665 	MILLI_BEN($$divU_6)		/* divisor == 6 */
    666 	nop
    667 	MILLI_BEN($$divU_7)		/* divisor == 7 */
    668 	nop
    669 	MILLIRET			/* divisor == 8 */
    670 	extru	arg0,28,29,retreg
    671 	MILLI_BEN($$divU_9)		/* divisor == 9 */
    672 	nop
    673 	MILLI_BEN($$divU_10)		/* divisor == 10 */
    674 	nop
    675 	b	LREF(normal)		/* divisor == 11 */
    676 	ds	r0,temp,r0		/* set V-bit to 1 */
    677 	MILLI_BEN($$divU_12)		/* divisor == 12 */
    678 	nop
    679 	b	LREF(normal)		/* divisor == 13 */
    680 	ds	r0,temp,r0		/* set V-bit to 1 */
    681 	MILLI_BEN($$divU_14)		/* divisor == 14 */
    682 	nop
    683 	MILLI_BEN($$divU_15)		/* divisor == 15 */
    684 	nop
    685 
    686 /* Handle the case where the high bit is on in the divisor.
    687    Compute:	if( dividend>=divisor) quotient=1; else quotient=0;
    688    Note:	dividend>==divisor iff dividend-divisor does not borrow
    689    and		not borrow iff carry.  */
    690 LSYM(big_divisor)
    691 	sub	arg0,arg1,r0
    692 	MILLIRET
    693 	addc	r0,r0,retreg
    694 	.exit
    695 	.procend
    696 	.end
    697 #endif
    698 
    699 #ifdef L_remI
    700 /* ROUTINE:	$$remI
    701 
    702    DESCRIPTION:
    703    .	$$remI returns the remainder of the division of two signed 32-bit
    704    .	integers.  The sign of the remainder is the same as the sign of
    705    .	the dividend.
    706 
    707 
    708    INPUT REGISTERS:
    709    .	arg0 == dividend
    710    .	arg1 == divisor
    711    .	mrp  == return pc
    712    .	sr0  == return space when called externally
    713 
    714    OUTPUT REGISTERS:
    715    .	arg0 = destroyed
    716    .	arg1 = destroyed
    717    .	ret1 = remainder
    718 
    719    OTHER REGISTERS AFFECTED:
    720    .	r1   = undefined
    721 
    722    SIDE EFFECTS:
    723    .	Causes a trap under the following conditions:  DIVIDE BY ZERO
    724    .	Changes memory at the following places:  NONE
    725 
    726    PERMISSIBLE CONTEXT:
    727    .	Unwindable
    728    .	Does not create a stack frame
    729    .	Is usable for internal or external microcode
    730 
    731    DISCUSSION:
    732    .	Calls other millicode routines via mrp:  NONE
    733    .	Calls other millicode routines:  NONE  */
    734 
    735 RDEFINE(tmp,r1)
    736 RDEFINE(retreg,ret1)
    737 
    738 	SUBSPA_MILLI
    739 	ATTR_MILLI
    740 	.proc
    741 	.callinfo millicode
    742 	.entry
    743 GSYM($$remI)
    744 GSYM($$remoI)
    745 	.export $$remI,MILLICODE
    746 	.export $$remoI,MILLICODE
    747 	ldo		-1(arg1),tmp		/*  is there at most one bit set ? */
    748 	and,<>		arg1,tmp,r0		/*  if not, don't use power of 2 */
    749 	addi,>		0,arg1,r0		/*  if denominator > 0, use power */
    750 						/*  of 2 */
    751 	b,n		LREF(neg_denom)
    752 LSYM(pow2)
    753 	comb,>,n	0,arg0,LREF(neg_num)	/*  is numerator < 0 ? */
    754 	and		arg0,tmp,retreg		/*  get the result */
    755 	MILLIRETN
    756 LSYM(neg_num)
    757 	subi		0,arg0,arg0		/*  negate numerator */
    758 	and		arg0,tmp,retreg		/*  get the result */
    759 	subi		0,retreg,retreg		/*  negate result */
    760 	MILLIRETN
    761 LSYM(neg_denom)
    762 	addi,<		0,arg1,r0		/*  if arg1 >= 0, it's not power */
    763 						/*  of 2 */
    764 	b,n		LREF(regular_seq)
    765 	sub		r0,arg1,tmp		/*  make denominator positive */
    766 	comb,=,n	arg1,tmp,LREF(regular_seq) /*  test against 0x80000000 and 0 */
    767 	ldo		-1(tmp),retreg		/*  is there at most one bit set ? */
    768 	and,=		tmp,retreg,r0		/*  if not, go to regular_seq */
    769 	b,n		LREF(regular_seq)
    770 	comb,>,n	0,arg0,LREF(neg_num_2)	/*  if arg0 < 0, negate it  */
    771 	and		arg0,retreg,retreg
    772 	MILLIRETN
    773 LSYM(neg_num_2)
    774 	subi		0,arg0,tmp		/*  test against 0x80000000 */
    775 	and		tmp,retreg,retreg
    776 	subi		0,retreg,retreg
    777 	MILLIRETN
    778 LSYM(regular_seq)
    779 	addit,=		0,arg1,0		/*  trap if div by zero */
    780 	add,>=		0,arg0,retreg		/*  move dividend, if retreg < 0, */
    781 	sub		0,retreg,retreg		/*    make it positive */
    782 	sub		0,arg1, tmp		/*  clear carry,  */
    783 						/*    negate the divisor */
    784 	ds		0, tmp,0		/*  set V-bit to the comple- */
    785 						/*    ment of the divisor sign */
    786 	or		0,0, tmp		/*  clear  tmp */
    787 	add		retreg,retreg,retreg	/*  shift msb bit into carry */
    788 	ds		 tmp,arg1, tmp		/*  1st divide step, if no carry */
    789 						/*    out, msb of quotient = 0 */
    790 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    791 LSYM(t1)
    792 	ds		 tmp,arg1, tmp		/*  2nd divide step */
    793 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    794 	ds		 tmp,arg1, tmp		/*  3rd divide step */
    795 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    796 	ds		 tmp,arg1, tmp		/*  4th divide step */
    797 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    798 	ds		 tmp,arg1, tmp		/*  5th divide step */
    799 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    800 	ds		 tmp,arg1, tmp		/*  6th divide step */
    801 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    802 	ds		 tmp,arg1, tmp		/*  7th divide step */
    803 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    804 	ds		 tmp,arg1, tmp		/*  8th divide step */
    805 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    806 	ds		 tmp,arg1, tmp		/*  9th divide step */
    807 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    808 	ds		 tmp,arg1, tmp		/*  10th divide step */
    809 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    810 	ds		 tmp,arg1, tmp		/*  11th divide step */
    811 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    812 	ds		 tmp,arg1, tmp		/*  12th divide step */
    813 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    814 	ds		 tmp,arg1, tmp		/*  13th divide step */
    815 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    816 	ds		 tmp,arg1, tmp		/*  14th divide step */
    817 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    818 	ds		 tmp,arg1, tmp		/*  15th divide step */
    819 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    820 	ds		 tmp,arg1, tmp		/*  16th divide step */
    821 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    822 	ds		 tmp,arg1, tmp		/*  17th divide step */
    823 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    824 	ds		 tmp,arg1, tmp		/*  18th divide step */
    825 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    826 	ds		 tmp,arg1, tmp		/*  19th divide step */
    827 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    828 	ds		 tmp,arg1, tmp		/*  20th divide step */
    829 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    830 	ds		 tmp,arg1, tmp		/*  21st divide step */
    831 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    832 	ds		 tmp,arg1, tmp		/*  22nd divide step */
    833 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    834 	ds		 tmp,arg1, tmp		/*  23rd divide step */
    835 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    836 	ds		 tmp,arg1, tmp		/*  24th divide step */
    837 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    838 	ds		 tmp,arg1, tmp		/*  25th divide step */
    839 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    840 	ds		 tmp,arg1, tmp		/*  26th divide step */
    841 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    842 	ds		 tmp,arg1, tmp		/*  27th divide step */
    843 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    844 	ds		 tmp,arg1, tmp		/*  28th divide step */
    845 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    846 	ds		 tmp,arg1, tmp		/*  29th divide step */
    847 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    848 	ds		 tmp,arg1, tmp		/*  30th divide step */
    849 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    850 	ds		 tmp,arg1, tmp		/*  31st divide step */
    851 	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
    852 	ds		 tmp,arg1, tmp		/*  32nd divide step, */
    853 	addc		retreg,retreg,retreg	/*  shift last bit into retreg */
    854 	movb,>=,n	 tmp,retreg,LREF(finish) /*  branch if pos.  tmp */
    855 	add,<		arg1,0,0		/*  if arg1 > 0, add arg1 */
    856 	add,tr		 tmp,arg1,retreg	/*    for correcting remainder tmp */
    857 	sub		 tmp,arg1,retreg	/*  else add absolute value arg1 */
    858 LSYM(finish)
    859 	add,>=		arg0,0,0		/*  set sign of remainder */
    860 	sub		0,retreg,retreg		/*    to sign of dividend */
    861 	MILLIRET
    862 	nop
    863 	.exit
    864 	.procend
    865 #ifdef milliext
    866 	.origin 0x00000200
    867 #endif
    868 	.end
    869 #endif
    870 
    871 #ifdef L_remU
    872 /* ROUTINE:	$$remU
    873    .	Single precision divide for remainder with unsigned binary integers.
    874    .
    875    .	The remainder must be dividend-(dividend/divisor)*divisor.
    876    .	Divide by zero is trapped.
    877 
    878    INPUT REGISTERS:
    879    .	arg0 ==	dividend
    880    .	arg1 == divisor
    881    .	mrp  == return pc
    882    .	sr0  == return space when called externally
    883 
    884    OUTPUT REGISTERS:
    885    .	arg0 =	undefined
    886    .	arg1 =	undefined
    887    .	ret1 =	remainder
    888 
    889    OTHER REGISTERS AFFECTED:
    890    .	r1   =	undefined
    891 
    892    SIDE EFFECTS:
    893    .	Causes a trap under the following conditions:  DIVIDE BY ZERO
    894    .	Changes memory at the following places:  NONE
    895 
    896    PERMISSIBLE CONTEXT:
    897    .	Unwindable.
    898    .	Does not create a stack frame.
    899    .	Suitable for internal or external millicode.
    900    .	Assumes the special millicode register conventions.
    901 
    902    DISCUSSION:
    903    .	Calls other millicode routines using mrp: NONE
    904    .	Calls other millicode routines: NONE  */
    905 
    906 
    907 RDEFINE(temp,r1)
    908 RDEFINE(rmndr,ret1)	/*  r29 */
    909 	SUBSPA_MILLI
    910 	ATTR_MILLI
    911 	.export $$remU,millicode
    912 	.proc
    913 	.callinfo	millicode
    914 	.entry
    915 GSYM($$remU)
    916 	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
    917 	and,=	arg1,temp,r0		/*  if not, don't use power of 2 */
    918 	b	LREF(regular_seq)
    919 	addit,=	0,arg1,r0		/*  trap on div by zero */
    920 	and	arg0,temp,rmndr		/*  get the result for power of 2 */
    921 	MILLIRETN
    922 LSYM(regular_seq)
    923 	comib,>=,n  0,arg1,LREF(special_case)
    924 	subi	0,arg1,rmndr		/*  clear carry, negate the divisor */
    925 	ds	r0,rmndr,r0		/*  set V-bit to 1 */
    926 	add	arg0,arg0,temp		/*  shift msb bit into carry */
    927 	ds	r0,arg1,rmndr		/*  1st divide step, if no carry */
    928 	addc	temp,temp,temp		/*  shift temp with/into carry */
    929 	ds	rmndr,arg1,rmndr		/*  2nd divide step */
    930 	addc	temp,temp,temp		/*  shift temp with/into carry */
    931 	ds	rmndr,arg1,rmndr		/*  3rd divide step */
    932 	addc	temp,temp,temp		/*  shift temp with/into carry */
    933 	ds	rmndr,arg1,rmndr		/*  4th divide step */
    934 	addc	temp,temp,temp		/*  shift temp with/into carry */
    935 	ds	rmndr,arg1,rmndr		/*  5th divide step */
    936 	addc	temp,temp,temp		/*  shift temp with/into carry */
    937 	ds	rmndr,arg1,rmndr		/*  6th divide step */
    938 	addc	temp,temp,temp		/*  shift temp with/into carry */
    939 	ds	rmndr,arg1,rmndr		/*  7th divide step */
    940 	addc	temp,temp,temp		/*  shift temp with/into carry */
    941 	ds	rmndr,arg1,rmndr		/*  8th divide step */
    942 	addc	temp,temp,temp		/*  shift temp with/into carry */
    943 	ds	rmndr,arg1,rmndr		/*  9th divide step */
    944 	addc	temp,temp,temp		/*  shift temp with/into carry */
    945 	ds	rmndr,arg1,rmndr		/*  10th divide step */
    946 	addc	temp,temp,temp		/*  shift temp with/into carry */
    947 	ds	rmndr,arg1,rmndr		/*  11th divide step */
    948 	addc	temp,temp,temp		/*  shift temp with/into carry */
    949 	ds	rmndr,arg1,rmndr		/*  12th divide step */
    950 	addc	temp,temp,temp		/*  shift temp with/into carry */
    951 	ds	rmndr,arg1,rmndr		/*  13th divide step */
    952 	addc	temp,temp,temp		/*  shift temp with/into carry */
    953 	ds	rmndr,arg1,rmndr		/*  14th divide step */
    954 	addc	temp,temp,temp		/*  shift temp with/into carry */
    955 	ds	rmndr,arg1,rmndr		/*  15th divide step */
    956 	addc	temp,temp,temp		/*  shift temp with/into carry */
    957 	ds	rmndr,arg1,rmndr		/*  16th divide step */
    958 	addc	temp,temp,temp		/*  shift temp with/into carry */
    959 	ds	rmndr,arg1,rmndr		/*  17th divide step */
    960 	addc	temp,temp,temp		/*  shift temp with/into carry */
    961 	ds	rmndr,arg1,rmndr		/*  18th divide step */
    962 	addc	temp,temp,temp		/*  shift temp with/into carry */
    963 	ds	rmndr,arg1,rmndr		/*  19th divide step */
    964 	addc	temp,temp,temp		/*  shift temp with/into carry */
    965 	ds	rmndr,arg1,rmndr		/*  20th divide step */
    966 	addc	temp,temp,temp		/*  shift temp with/into carry */
    967 	ds	rmndr,arg1,rmndr		/*  21st divide step */
    968 	addc	temp,temp,temp		/*  shift temp with/into carry */
    969 	ds	rmndr,arg1,rmndr		/*  22nd divide step */
    970 	addc	temp,temp,temp		/*  shift temp with/into carry */
    971 	ds	rmndr,arg1,rmndr		/*  23rd divide step */
    972 	addc	temp,temp,temp		/*  shift temp with/into carry */
    973 	ds	rmndr,arg1,rmndr		/*  24th divide step */
    974 	addc	temp,temp,temp		/*  shift temp with/into carry */
    975 	ds	rmndr,arg1,rmndr		/*  25th divide step */
    976 	addc	temp,temp,temp		/*  shift temp with/into carry */
    977 	ds	rmndr,arg1,rmndr		/*  26th divide step */
    978 	addc	temp,temp,temp		/*  shift temp with/into carry */
    979 	ds	rmndr,arg1,rmndr		/*  27th divide step */
    980 	addc	temp,temp,temp		/*  shift temp with/into carry */
    981 	ds	rmndr,arg1,rmndr		/*  28th divide step */
    982 	addc	temp,temp,temp		/*  shift temp with/into carry */
    983 	ds	rmndr,arg1,rmndr		/*  29th divide step */
    984 	addc	temp,temp,temp		/*  shift temp with/into carry */
    985 	ds	rmndr,arg1,rmndr		/*  30th divide step */
    986 	addc	temp,temp,temp		/*  shift temp with/into carry */
    987 	ds	rmndr,arg1,rmndr		/*  31st divide step */
    988 	addc	temp,temp,temp		/*  shift temp with/into carry */
    989 	ds	rmndr,arg1,rmndr		/*  32nd divide step, */
    990 	comiclr,<= 0,rmndr,r0
    991 	  add	rmndr,arg1,rmndr	/*  correction */
    992 	MILLIRETN
    993 	nop
    994 
    995 /* Putting >= on the last DS and deleting COMICLR does not work!  */
    996 LSYM(special_case)
    997 	sub,>>=	arg0,arg1,rmndr
    998 	  copy	arg0,rmndr
    999 	MILLIRETN
   1000 	nop
   1001 	.exit
   1002 	.procend
   1003 	.end
   1004 #endif
   1005 
   1006 #ifdef L_div_const
   1007 /* ROUTINE:	$$divI_2
   1008    .		$$divI_3	$$divU_3
   1009    .		$$divI_4
   1010    .		$$divI_5	$$divU_5
   1011    .		$$divI_6	$$divU_6
   1012    .		$$divI_7	$$divU_7
   1013    .		$$divI_8
   1014    .		$$divI_9	$$divU_9
   1015    .		$$divI_10	$$divU_10
   1016    .
   1017    .		$$divI_12	$$divU_12
   1018    .
   1019    .		$$divI_14	$$divU_14
   1020    .		$$divI_15	$$divU_15
   1021    .		$$divI_16
   1022    .		$$divI_17	$$divU_17
   1023    .
   1024    .	Divide by selected constants for single precision binary integers.
   1025 
   1026    INPUT REGISTERS:
   1027    .	arg0 ==	dividend
   1028    .	mrp  == return pc
   1029    .	sr0  == return space when called externally
   1030 
   1031    OUTPUT REGISTERS:
   1032    .	arg0 =	undefined
   1033    .	arg1 =	undefined
   1034    .	ret1 =	quotient
   1035 
   1036    OTHER REGISTERS AFFECTED:
   1037    .	r1   =	undefined
   1038 
   1039    SIDE EFFECTS:
   1040    .	Causes a trap under the following conditions: NONE
   1041    .	Changes memory at the following places:  NONE
   1042 
   1043    PERMISSIBLE CONTEXT:
   1044    .	Unwindable.
   1045    .	Does not create a stack frame.
   1046    .	Suitable for internal or external millicode.
   1047    .	Assumes the special millicode register conventions.
   1048 
   1049    DISCUSSION:
   1050    .	Calls other millicode routines using mrp:  NONE
   1051    .	Calls other millicode routines:  NONE  */
   1052 
   1053 
   1054 /* TRUNCATED DIVISION BY SMALL INTEGERS
   1055 
   1056    We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
   1057    (with y fixed).
   1058 
   1059    Let a = floor(z/y), for some choice of z.  Note that z will be
   1060    chosen so that division by z is cheap.
   1061 
   1062    Let r be the remainder(z/y).  In other words, r = z - ay.
   1063 
   1064    Now, our method is to choose a value for b such that
   1065 
   1066    q'(x) = floor((ax+b)/z)
   1067 
   1068    is equal to q(x) over as large a range of x as possible.  If the
   1069    two are equal over a sufficiently large range, and if it is easy to
   1070    form the product (ax), and it is easy to divide by z, then we can
   1071    perform the division much faster than the general division algorithm.
   1072 
   1073    So, we want the following to be true:
   1074 
   1075    .	For x in the following range:
   1076    .
   1077    .	    ky <= x < (k+1)y
   1078    .
   1079    .	implies that
   1080    .
   1081    .	    k <= (ax+b)/z < (k+1)
   1082 
   1083    We want to determine b such that this is true for all k in the
   1084    range {0..K} for some maximum K.
   1085 
   1086    Since (ax+b) is an increasing function of x, we can take each
   1087    bound separately to determine the "best" value for b.
   1088 
   1089    (ax+b)/z < (k+1)	       implies
   1090 
   1091    (a((k+1)y-1)+b < (k+1)z     implies
   1092 
   1093    b < a + (k+1)(z-ay)	       implies
   1094 
   1095    b < a + (k+1)r
   1096 
   1097    This needs to be true for all k in the range {0..K}.  In
   1098    particular, it is true for k = 0 and this leads to a maximum
   1099    acceptable value for b.
   1100 
   1101    b < a+r   or   b <= a+r-1
   1102 
   1103    Taking the other bound, we have
   1104 
   1105    k <= (ax+b)/z	       implies
   1106 
   1107    k <= (aky+b)/z	       implies
   1108 
   1109    k(z-ay) <= b		       implies
   1110 
   1111    kr <= b
   1112 
   1113    Clearly, the largest range for k will be achieved by maximizing b,
   1114    when r is not zero.	When r is zero, then the simplest choice for b
   1115    is 0.  When r is not 0, set
   1116 
   1117    .	b = a+r-1
   1118 
   1119    Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
   1120    for all x in the range:
   1121 
   1122    .	0 <= x < (K+1)y
   1123 
   1124    We need to determine what K is.  Of our two bounds,
   1125 
   1126    .	b < a+(k+1)r	is satisfied for all k >= 0, by construction.
   1127 
   1128    The other bound is
   1129 
   1130    .	kr <= b
   1131 
   1132    This is always true if r = 0.  If r is not 0 (the usual case), then
   1133    K = floor((a+r-1)/r), is the maximum value for k.
   1134 
   1135    Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
   1136    answer for q(x) = floor(x/y) when x is in the range
   1137 
   1138    (0,(K+1)y-1)	       K = floor((a+r-1)/r)
   1139 
   1140    To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
   1141    the formula for q'(x) yields the correct value of q(x) for all x
   1142    representable by a single word in HPPA.
   1143 
   1144    We are also constrained in that computing the product (ax), adding
   1145    b, and dividing by z must all be done quickly, otherwise we will be
   1146    better off going through the general algorithm using the DS
   1147    instruction, which uses approximately 70 cycles.
   1148 
   1149    For each y, there is a choice of z which satisfies the constraints
   1150    for (K+1)y >= 2**32.  We may not, however, be able to satisfy the
   1151    timing constraints for arbitrary y.	It seems that z being equal to
   1152    a power of 2 or a power of 2 minus 1 is as good as we can do, since
   1153    it minimizes the time to do division by z.  We want the choice of z
   1154    to also result in a value for (a) that minimizes the computation of
   1155    the product (ax).  This is best achieved if (a) has a regular bit
   1156    pattern (so the multiplication can be done with shifts and adds).
   1157    The value of (a) also needs to be less than 2**32 so the product is
   1158    always guaranteed to fit in 2 words.
   1159 
   1160    In actual practice, the following should be done:
   1161 
   1162    1) For negative x, you should take the absolute value and remember
   1163    .  the fact so that the result can be negated.  This obviously does
   1164    .  not apply in the unsigned case.
   1165    2) For even y, you should factor out the power of 2 that divides y
   1166    .  and divide x by it.  You can then proceed by dividing by the
   1167    .  odd factor of y.
   1168 
   1169    Here is a table of some odd values of y, and corresponding choices
   1170    for z which are "good".
   1171 
   1172     y	  z	  r	 a (hex)     max x (hex)
   1173 
   1174     3	2**32	  1	55555555      100000001
   1175     5	2**32	  1	33333333      100000003
   1176     7  2**24-1	  0	  249249     (infinite)
   1177     9  2**24-1	  0	  1c71c7     (infinite)
   1178    11  2**20-1	  0	   1745d     (infinite)
   1179    13  2**24-1	  0	  13b13b     (infinite)
   1180    15	2**32	  1	11111111      10000000d
   1181    17	2**32	  1	 f0f0f0f      10000000f
   1182 
   1183    If r is 1, then b = a+r-1 = a.  This simplifies the computation
   1184    of (ax+b), since you can compute (x+1)(a) instead.  If r is 0,
   1185    then b = 0 is ok to use which simplifies (ax+b).
   1186 
   1187    The bit patterns for 55555555, 33333333, and 11111111 are obviously
   1188    very regular.  The bit patterns for the other values of a above are:
   1189 
   1190     y	   (hex)	  (binary)
   1191 
   1192     7	  249249  001001001001001001001001  << regular >>
   1193     9	  1c71c7  000111000111000111000111  << regular >>
   1194    11	   1745d  000000010111010001011101  << irregular >>
   1195    13	  13b13b  000100111011000100111011  << irregular >>
   1196 
   1197    The bit patterns for (a) corresponding to (y) of 11 and 13 may be
   1198    too irregular to warrant using this method.
   1199 
   1200    When z is a power of 2 minus 1, then the division by z is slightly
   1201    more complicated, involving an iterative solution.
   1202 
   1203    The code presented here solves division by 1 through 17, except for
   1204    11 and 13. There are algorithms for both signed and unsigned
   1205    quantities given.
   1206 
   1207    TIMINGS (cycles)
   1208 
   1209    divisor  positive  negative	unsigned
   1210 
   1211    .   1	2	   2	     2
   1212    .   2	4	   4	     2
   1213    .   3       19	  21	    19
   1214    .   4	4	   4	     2
   1215    .   5       18	  22	    19
   1216    .   6       19	  22	    19
   1217    .   8	4	   4	     2
   1218    .  10       18	  19	    17
   1219    .  12       18	  20	    18
   1220    .  15       16	  18	    16
   1221    .  16	4	   4	     2
   1222    .  17       16	  18	    16
   1223 
   1224    Now, the algorithm for 7, 9, and 14 is an iterative one.  That is,
   1225    a loop body is executed until the tentative quotient is 0.  The
   1226    number of times the loop body is executed varies depending on the
   1227    dividend, but is never more than two times.	If the dividend is
   1228    less than the divisor, then the loop body is not executed at all.
   1229    Each iteration adds 4 cycles to the timings.
   1230 
   1231    divisor  positive  negative	unsigned
   1232 
   1233    .   7       19+4n	 20+4n	   20+4n    n = number of iterations
   1234    .   9       21+4n	 22+4n	   21+4n
   1235    .  14       21+4n	 22+4n	   20+4n
   1236 
   1237    To give an idea of how the number of iterations varies, here is a
   1238    table of dividend versus number of iterations when dividing by 7.
   1239 
   1240    smallest	 largest       required
   1241    dividend	dividend      iterations
   1242 
   1243    .	0	     6		    0
   1244    .	7	 0x6ffffff	    1
   1245    0x1000006	0xffffffff	    2
   1246 
   1247    There is some overlap in the range of numbers requiring 1 and 2
   1248    iterations.	*/
   1249 
   1250 RDEFINE(t2,r1)
   1251 RDEFINE(x2,arg0)	/*  r26 */
   1252 RDEFINE(t1,arg1)	/*  r25 */
   1253 RDEFINE(x1,ret1)	/*  r29 */
   1254 
   1255 	SUBSPA_MILLI_DIV
   1256 	ATTR_MILLI
   1257 
   1258 	.proc
   1259 	.callinfo	millicode
   1260 	.entry
   1261 /* NONE of these routines require a stack frame
   1262    ALL of these routines are unwindable from millicode	*/
   1263 
   1264 GSYM($$divide_by_constant)
   1265 	.export $$divide_by_constant,millicode
   1266 /*  Provides a "nice" label for the code covered by the unwind descriptor
   1267     for things like gprof.  */
   1268 
   1269 /* DIVISION BY 2 (shift by 1) */
   1270 GSYM($$divI_2)
   1271 	.export		$$divI_2,millicode
   1272 	comclr,>=	arg0,0,0
   1273 	addi		1,arg0,arg0
   1274 	MILLIRET
   1275 	extrs		arg0,30,31,ret1
   1276 
   1277 
   1278 /* DIVISION BY 4 (shift by 2) */
   1279 GSYM($$divI_4)
   1280 	.export		$$divI_4,millicode
   1281 	comclr,>=	arg0,0,0
   1282 	addi		3,arg0,arg0
   1283 	MILLIRET
   1284 	extrs		arg0,29,30,ret1
   1285 
   1286 
   1287 /* DIVISION BY 8 (shift by 3) */
   1288 GSYM($$divI_8)
   1289 	.export		$$divI_8,millicode
   1290 	comclr,>=	arg0,0,0
   1291 	addi		7,arg0,arg0
   1292 	MILLIRET
   1293 	extrs		arg0,28,29,ret1
   1294 
   1295 /* DIVISION BY 16 (shift by 4) */
   1296 GSYM($$divI_16)
   1297 	.export		$$divI_16,millicode
   1298 	comclr,>=	arg0,0,0
   1299 	addi		15,arg0,arg0
   1300 	MILLIRET
   1301 	extrs		arg0,27,28,ret1
   1302 
   1303 /****************************************************************************
   1304 *
   1305 *	DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
   1306 *
   1307 *	includes 3,5,15,17 and also 6,10,12
   1308 *
   1309 ****************************************************************************/
   1310 
   1311 /* DIVISION BY 3 (use z = 2**32; a = 55555555) */
   1312 
   1313 GSYM($$divI_3)
   1314 	.export		$$divI_3,millicode
   1315 	comb,<,N	x2,0,LREF(neg3)
   1316 
   1317 	addi		1,x2,x2		/* this cannot overflow	*/
   1318 	extru		x2,1,2,x1	/* multiply by 5 to get started */
   1319 	sh2add		x2,x2,x2
   1320 	b		LREF(pos)
   1321 	addc		x1,0,x1
   1322 
   1323 LSYM(neg3)
   1324 	subi		1,x2,x2		/* this cannot overflow	*/
   1325 	extru		x2,1,2,x1	/* multiply by 5 to get started */
   1326 	sh2add		x2,x2,x2
   1327 	b		LREF(neg)
   1328 	addc		x1,0,x1
   1329 
   1330 GSYM($$divU_3)
   1331 	.export		$$divU_3,millicode
   1332 	addi		1,x2,x2		/* this CAN overflow */
   1333 	addc		0,0,x1
   1334 	shd		x1,x2,30,t1	/* multiply by 5 to get started */
   1335 	sh2add		x2,x2,x2
   1336 	b		LREF(pos)
   1337 	addc		x1,t1,x1
   1338 
   1339 /* DIVISION BY 5 (use z = 2**32; a = 33333333) */
   1340 
   1341 GSYM($$divI_5)
   1342 	.export		$$divI_5,millicode
   1343 	comb,<,N	x2,0,LREF(neg5)
   1344 
   1345 	addi		3,x2,t1		/* this cannot overflow	*/
   1346 	sh1add		x2,t1,x2	/* multiply by 3 to get started */
   1347 	b		LREF(pos)
   1348 	addc		0,0,x1
   1349 
   1350 LSYM(neg5)
   1351 	sub		0,x2,x2		/* negate x2			*/
   1352 	addi		1,x2,x2		/* this cannot overflow	*/
   1353 	shd		0,x2,31,x1	/* get top bit (can be 1)	*/
   1354 	sh1add		x2,x2,x2	/* multiply by 3 to get started */
   1355 	b		LREF(neg)
   1356 	addc		x1,0,x1
   1357 
   1358 GSYM($$divU_5)
   1359 	.export		$$divU_5,millicode
   1360 	addi		1,x2,x2		/* this CAN overflow */
   1361 	addc		0,0,x1
   1362 	shd		x1,x2,31,t1	/* multiply by 3 to get started */
   1363 	sh1add		x2,x2,x2
   1364 	b		LREF(pos)
   1365 	addc		t1,x1,x1
   1366 
   1367 /* DIVISION BY	6 (shift to divide by 2 then divide by 3) */
   1368 GSYM($$divI_6)
   1369 	.export		$$divI_6,millicode
   1370 	comb,<,N	x2,0,LREF(neg6)
   1371 	extru		x2,30,31,x2	/* divide by 2			*/
   1372 	addi		5,x2,t1		/* compute 5*(x2+1) = 5*x2+5	*/
   1373 	sh2add		x2,t1,x2	/* multiply by 5 to get started */
   1374 	b		LREF(pos)
   1375 	addc		0,0,x1
   1376 
   1377 LSYM(neg6)
   1378 	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
   1379 					/* negation and adding 1 are done */
   1380 					/* at the same time by the SUBI   */
   1381 	extru		x2,30,31,x2
   1382 	shd		0,x2,30,x1
   1383 	sh2add		x2,x2,x2	/* multiply by 5 to get started */
   1384 	b		LREF(neg)
   1385 	addc		x1,0,x1
   1386 
   1387 GSYM($$divU_6)
   1388 	.export		$$divU_6,millicode
   1389 	extru		x2,30,31,x2	/* divide by 2 */
   1390 	addi		1,x2,x2		/* cannot carry */
   1391 	shd		0,x2,30,x1	/* multiply by 5 to get started */
   1392 	sh2add		x2,x2,x2
   1393 	b		LREF(pos)
   1394 	addc		x1,0,x1
   1395 
   1396 /* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
   1397 GSYM($$divU_10)
   1398 	.export		$$divU_10,millicode
   1399 	extru		x2,30,31,x2	/* divide by 2 */
   1400 	addi		3,x2,t1		/* compute 3*(x2+1) = (3*x2)+3	*/
   1401 	sh1add		x2,t1,x2	/* multiply by 3 to get started */
   1402 	addc		0,0,x1
   1403 LSYM(pos)
   1404 	shd		x1,x2,28,t1	/* multiply by 0x11 */
   1405 	shd		x2,0,28,t2
   1406 	add		x2,t2,x2
   1407 	addc		x1,t1,x1
   1408 LSYM(pos_for_17)
   1409 	shd		x1,x2,24,t1	/* multiply by 0x101 */
   1410 	shd		x2,0,24,t2
   1411 	add		x2,t2,x2
   1412 	addc		x1,t1,x1
   1413 
   1414 	shd		x1,x2,16,t1	/* multiply by 0x10001 */
   1415 	shd		x2,0,16,t2
   1416 	add		x2,t2,x2
   1417 	MILLIRET
   1418 	addc		x1,t1,x1
   1419 
   1420 GSYM($$divI_10)
   1421 	.export		$$divI_10,millicode
   1422 	comb,<		x2,0,LREF(neg10)
   1423 	copy		0,x1
   1424 	extru		x2,30,31,x2	/* divide by 2 */
   1425 	addib,TR	1,x2,LREF(pos)	/* add 1 (cannot overflow)     */
   1426 	sh1add		x2,x2,x2	/* multiply by 3 to get started */
   1427 
   1428 LSYM(neg10)
   1429 	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
   1430 					/* negation and adding 1 are done */
   1431 					/* at the same time by the SUBI   */
   1432 	extru		x2,30,31,x2
   1433 	sh1add		x2,x2,x2	/* multiply by 3 to get started */
   1434 LSYM(neg)
   1435 	shd		x1,x2,28,t1	/* multiply by 0x11 */
   1436 	shd		x2,0,28,t2
   1437 	add		x2,t2,x2
   1438 	addc		x1,t1,x1
   1439 LSYM(neg_for_17)
   1440 	shd		x1,x2,24,t1	/* multiply by 0x101 */
   1441 	shd		x2,0,24,t2
   1442 	add		x2,t2,x2
   1443 	addc		x1,t1,x1
   1444 
   1445 	shd		x1,x2,16,t1	/* multiply by 0x10001 */
   1446 	shd		x2,0,16,t2
   1447 	add		x2,t2,x2
   1448 	addc		x1,t1,x1
   1449 	MILLIRET
   1450 	sub		0,x1,x1
   1451 
   1452 /* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
   1453 GSYM($$divI_12)
   1454 	.export		$$divI_12,millicode
   1455 	comb,<		x2,0,LREF(neg12)
   1456 	copy		0,x1
   1457 	extru		x2,29,30,x2	/* divide by 4			*/
   1458 	addib,tr	1,x2,LREF(pos)	/* compute 5*(x2+1) = 5*x2+5    */
   1459 	sh2add		x2,x2,x2	/* multiply by 5 to get started */
   1460 
   1461 LSYM(neg12)
   1462 	subi		4,x2,x2		/* negate, divide by 4, and add 1 */
   1463 					/* negation and adding 1 are done */
   1464 					/* at the same time by the SUBI   */
   1465 	extru		x2,29,30,x2
   1466 	b		LREF(neg)
   1467 	sh2add		x2,x2,x2	/* multiply by 5 to get started */
   1468 
   1469 GSYM($$divU_12)
   1470 	.export		$$divU_12,millicode
   1471 	extru		x2,29,30,x2	/* divide by 4   */
   1472 	addi		5,x2,t1		/* cannot carry */
   1473 	sh2add		x2,t1,x2	/* multiply by 5 to get started */
   1474 	b		LREF(pos)
   1475 	addc		0,0,x1
   1476 
   1477 /* DIVISION BY 15 (use z = 2**32; a = 11111111) */
   1478 GSYM($$divI_15)
   1479 	.export		$$divI_15,millicode
   1480 	comb,<		x2,0,LREF(neg15)
   1481 	copy		0,x1
   1482 	addib,tr	1,x2,LREF(pos)+4
   1483 	shd		x1,x2,28,t1
   1484 
   1485 LSYM(neg15)
   1486 	b		LREF(neg)
   1487 	subi		1,x2,x2
   1488 
   1489 GSYM($$divU_15)
   1490 	.export		$$divU_15,millicode
   1491 	addi		1,x2,x2		/* this CAN overflow */
   1492 	b		LREF(pos)
   1493 	addc		0,0,x1
   1494 
   1495 /* DIVISION BY 17 (use z = 2**32; a =  f0f0f0f) */
   1496 GSYM($$divI_17)
   1497 	.export		$$divI_17,millicode
   1498 	comb,<,n	x2,0,LREF(neg17)
   1499 	addi		1,x2,x2		/* this cannot overflow */
   1500 	shd		0,x2,28,t1	/* multiply by 0xf to get started */
   1501 	shd		x2,0,28,t2
   1502 	sub		t2,x2,x2
   1503 	b		LREF(pos_for_17)
   1504 	subb		t1,0,x1
   1505 
   1506 LSYM(neg17)
   1507 	subi		1,x2,x2		/* this cannot overflow */
   1508 	shd		0,x2,28,t1	/* multiply by 0xf to get started */
   1509 	shd		x2,0,28,t2
   1510 	sub		t2,x2,x2
   1511 	b		LREF(neg_for_17)
   1512 	subb		t1,0,x1
   1513 
   1514 GSYM($$divU_17)
   1515 	.export		$$divU_17,millicode
   1516 	addi		1,x2,x2		/* this CAN overflow */
   1517 	addc		0,0,x1
   1518 	shd		x1,x2,28,t1	/* multiply by 0xf to get started */
   1519 LSYM(u17)
   1520 	shd		x2,0,28,t2
   1521 	sub		t2,x2,x2
   1522 	b		LREF(pos_for_17)
   1523 	subb		t1,x1,x1
   1524 
   1525 
   1526 /* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
   1527    includes 7,9 and also 14
   1528 
   1529 
   1530    z = 2**24-1
   1531    r = z mod x = 0
   1532 
   1533    so choose b = 0
   1534 
   1535    Also, in order to divide by z = 2**24-1, we approximate by dividing
   1536    by (z+1) = 2**24 (which is easy), and then correcting.
   1537 
   1538    (ax) = (z+1)q' + r
   1539    .	= zq' + (q'+r)
   1540 
   1541    So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
   1542    Then the true remainder of (ax)/z is (q'+r).  Repeat the process
   1543    with this new remainder, adding the tentative quotients together,
   1544    until a tentative quotient is 0 (and then we are done).  There is
   1545    one last correction to be done.  It is possible that (q'+r) = z.
   1546    If so, then (q'+r)/(z+1) = 0 and it looks like we are done.	But,
   1547    in fact, we need to add 1 more to the quotient.  Now, it turns
   1548    out that this happens if and only if the original value x is
   1549    an exact multiple of y.  So, to avoid a three instruction test at
   1550    the end, instead use 1 instruction to add 1 to x at the beginning.  */
   1551 
   1552 /* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
   1553 GSYM($$divI_7)
   1554 	.export		$$divI_7,millicode
   1555 	comb,<,n	x2,0,LREF(neg7)
   1556 LSYM(7)
   1557 	addi		1,x2,x2		/* cannot overflow */
   1558 	shd		0,x2,29,x1
   1559 	sh3add		x2,x2,x2
   1560 	addc		x1,0,x1
   1561 LSYM(pos7)
   1562 	shd		x1,x2,26,t1
   1563 	shd		x2,0,26,t2
   1564 	add		x2,t2,x2
   1565 	addc		x1,t1,x1
   1566 
   1567 	shd		x1,x2,20,t1
   1568 	shd		x2,0,20,t2
   1569 	add		x2,t2,x2
   1570 	addc		x1,t1,t1
   1571 
   1572 	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
   1573 
   1574 	copy		0,x1
   1575 	shd,=		t1,x2,24,t1	/* tentative quotient  */
   1576 LSYM(1)
   1577 	addb,tr		t1,x1,LREF(2)	/* add to previous quotient   */
   1578 	extru		x2,31,24,x2	/* new remainder (unadjusted) */
   1579 
   1580 	MILLIRETN
   1581 
   1582 LSYM(2)
   1583 	addb,tr		t1,x2,LREF(1)	/* adjust remainder */
   1584 	extru,=		x2,7,8,t1	/* new quotient     */
   1585 
   1586 LSYM(neg7)
   1587 	subi		1,x2,x2		/* negate x2 and add 1 */
   1588 LSYM(8)
   1589 	shd		0,x2,29,x1
   1590 	sh3add		x2,x2,x2
   1591 	addc		x1,0,x1
   1592 
   1593 LSYM(neg7_shift)
   1594 	shd		x1,x2,26,t1
   1595 	shd		x2,0,26,t2
   1596 	add		x2,t2,x2
   1597 	addc		x1,t1,x1
   1598 
   1599 	shd		x1,x2,20,t1
   1600 	shd		x2,0,20,t2
   1601 	add		x2,t2,x2
   1602 	addc		x1,t1,t1
   1603 
   1604 	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
   1605 
   1606 	copy		0,x1
   1607 	shd,=		t1,x2,24,t1	/* tentative quotient  */
   1608 LSYM(3)
   1609 	addb,tr		t1,x1,LREF(4)	/* add to previous quotient   */
   1610 	extru		x2,31,24,x2	/* new remainder (unadjusted) */
   1611 
   1612 	MILLIRET
   1613 	sub		0,x1,x1		/* negate result    */
   1614 
   1615 LSYM(4)
   1616 	addb,tr		t1,x2,LREF(3)	/* adjust remainder */
   1617 	extru,=		x2,7,8,t1	/* new quotient     */
   1618 
   1619 GSYM($$divU_7)
   1620 	.export		$$divU_7,millicode
   1621 	addi		1,x2,x2		/* can carry */
   1622 	addc		0,0,x1
   1623 	shd		x1,x2,29,t1
   1624 	sh3add		x2,x2,x2
   1625 	b		LREF(pos7)
   1626 	addc		t1,x1,x1
   1627 
   1628 /* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
   1629 GSYM($$divI_9)
   1630 	.export		$$divI_9,millicode
   1631 	comb,<,n	x2,0,LREF(neg9)
   1632 	addi		1,x2,x2		/* cannot overflow */
   1633 	shd		0,x2,29,t1
   1634 	shd		x2,0,29,t2
   1635 	sub		t2,x2,x2
   1636 	b		LREF(pos7)
   1637 	subb		t1,0,x1
   1638 
   1639 LSYM(neg9)
   1640 	subi		1,x2,x2		/* negate and add 1 */
   1641 	shd		0,x2,29,t1
   1642 	shd		x2,0,29,t2
   1643 	sub		t2,x2,x2
   1644 	b		LREF(neg7_shift)
   1645 	subb		t1,0,x1
   1646 
   1647 GSYM($$divU_9)
   1648 	.export		$$divU_9,millicode
   1649 	addi		1,x2,x2		/* can carry */
   1650 	addc		0,0,x1
   1651 	shd		x1,x2,29,t1
   1652 	shd		x2,0,29,t2
   1653 	sub		t2,x2,x2
   1654 	b		LREF(pos7)
   1655 	subb		t1,x1,x1
   1656 
   1657 /* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
   1658 GSYM($$divI_14)
   1659 	.export		$$divI_14,millicode
   1660 	comb,<,n	x2,0,LREF(neg14)
   1661 GSYM($$divU_14)
   1662 	.export		$$divU_14,millicode
   1663 	b		LREF(7)		/* go to 7 case */
   1664 	extru		x2,30,31,x2	/* divide by 2  */
   1665 
   1666 LSYM(neg14)
   1667 	subi		2,x2,x2		/* negate (and add 2) */
   1668 	b		LREF(8)
   1669 	extru		x2,30,31,x2	/* divide by 2	      */
   1670 	.exit
   1671 	.procend
   1672 	.end
   1673 #endif
   1674 
   1675 #ifdef L_mulI
   1676 /* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
   1677 /******************************************************************************
   1678 This routine is used on PA2.0 processors when gcc -mno-fpregs is used
   1679 
   1680 ROUTINE:	$$mulI
   1681 
   1682 
   1683 DESCRIPTION:
   1684 
   1685 	$$mulI multiplies two single word integers, giving a single
   1686 	word result.
   1687 
   1688 
   1689 INPUT REGISTERS:
   1690 
   1691 	arg0 = Operand 1
   1692 	arg1 = Operand 2
   1693 	r31  == return pc
   1694 	sr0  == return space when called externally
   1695 
   1696 
   1697 OUTPUT REGISTERS:
   1698 
   1699 	arg0 = undefined
   1700 	arg1 = undefined
   1701 	ret1 = result
   1702 
   1703 OTHER REGISTERS AFFECTED:
   1704 
   1705 	r1   = undefined
   1706 
   1707 SIDE EFFECTS:
   1708 
   1709 	Causes a trap under the following conditions:  NONE
   1710 	Changes memory at the following places:  NONE
   1711 
   1712 PERMISSIBLE CONTEXT:
   1713 
   1714 	Unwindable
   1715 	Does not create a stack frame
   1716 	Is usable for internal or external microcode
   1717 
   1718 DISCUSSION:
   1719 
   1720 	Calls other millicode routines via mrp:  NONE
   1721 	Calls other millicode routines:  NONE
   1722 
   1723 ***************************************************************************/
   1724 
   1725 
   1726 #define	a0	%arg0
   1727 #define	a1	%arg1
   1728 #define	t0	%r1
   1729 #define	r	%ret1
   1730 
   1731 #define	a0__128a0	zdep	a0,24,25,a0
   1732 #define	a0__256a0	zdep	a0,23,24,a0
   1733 #define	a1_ne_0_b_l0	comb,<>	a1,0,LREF(l0)
   1734 #define	a1_ne_0_b_l1	comb,<>	a1,0,LREF(l1)
   1735 #define	a1_ne_0_b_l2	comb,<>	a1,0,LREF(l2)
   1736 #define	b_n_ret_t0	b,n	LREF(ret_t0)
   1737 #define	b_e_shift	b	LREF(e_shift)
   1738 #define	b_e_t0ma0	b	LREF(e_t0ma0)
   1739 #define	b_e_t0		b	LREF(e_t0)
   1740 #define	b_e_t0a0	b	LREF(e_t0a0)
   1741 #define	b_e_t02a0	b	LREF(e_t02a0)
   1742 #define	b_e_t04a0	b	LREF(e_t04a0)
   1743 #define	b_e_2t0		b	LREF(e_2t0)
   1744 #define	b_e_2t0a0	b	LREF(e_2t0a0)
   1745 #define	b_e_2t04a0	b	LREF(e2t04a0)
   1746 #define	b_e_3t0		b	LREF(e_3t0)
   1747 #define	b_e_4t0		b	LREF(e_4t0)
   1748 #define	b_e_4t0a0	b	LREF(e_4t0a0)
   1749 #define	b_e_4t08a0	b	LREF(e4t08a0)
   1750 #define	b_e_5t0		b	LREF(e_5t0)
   1751 #define	b_e_8t0		b	LREF(e_8t0)
   1752 #define	b_e_8t0a0	b	LREF(e_8t0a0)
   1753 #define	r__r_a0		add	r,a0,r
   1754 #define	r__r_2a0	sh1add	a0,r,r
   1755 #define	r__r_4a0	sh2add	a0,r,r
   1756 #define	r__r_8a0	sh3add	a0,r,r
   1757 #define	r__r_t0		add	r,t0,r
   1758 #define	r__r_2t0	sh1add	t0,r,r
   1759 #define	r__r_4t0	sh2add	t0,r,r
   1760 #define	r__r_8t0	sh3add	t0,r,r
   1761 #define	t0__3a0		sh1add	a0,a0,t0
   1762 #define	t0__4a0		sh2add	a0,0,t0
   1763 #define	t0__5a0		sh2add	a0,a0,t0
   1764 #define	t0__8a0		sh3add	a0,0,t0
   1765 #define	t0__9a0		sh3add	a0,a0,t0
   1766 #define	t0__16a0	zdep	a0,27,28,t0
   1767 #define	t0__32a0	zdep	a0,26,27,t0
   1768 #define	t0__64a0	zdep	a0,25,26,t0
   1769 #define	t0__128a0	zdep	a0,24,25,t0
   1770 #define	t0__t0ma0	sub	t0,a0,t0
   1771 #define	t0__t0_a0	add	t0,a0,t0
   1772 #define	t0__t0_2a0	sh1add	a0,t0,t0
   1773 #define	t0__t0_4a0	sh2add	a0,t0,t0
   1774 #define	t0__t0_8a0	sh3add	a0,t0,t0
   1775 #define	t0__2t0_a0	sh1add	t0,a0,t0
   1776 #define	t0__3t0		sh1add	t0,t0,t0
   1777 #define	t0__4t0		sh2add	t0,0,t0
   1778 #define	t0__4t0_a0	sh2add	t0,a0,t0
   1779 #define	t0__5t0		sh2add	t0,t0,t0
   1780 #define	t0__8t0		sh3add	t0,0,t0
   1781 #define	t0__8t0_a0	sh3add	t0,a0,t0
   1782 #define	t0__9t0		sh3add	t0,t0,t0
   1783 #define	t0__16t0	zdep	t0,27,28,t0
   1784 #define	t0__32t0	zdep	t0,26,27,t0
   1785 #define	t0__256a0	zdep	a0,23,24,t0
   1786 
   1787 
   1788 	SUBSPA_MILLI
   1789 	ATTR_MILLI
   1790 	.align 16
   1791 	.proc
   1792 	.callinfo millicode
   1793 	.export $$mulI,millicode
   1794 GSYM($$mulI)
   1795 	combt,<<=	a1,a0,LREF(l4)	/* swap args if unsigned a1>a0 */
   1796 	copy		0,r		/* zero out the result */
   1797 	xor		a0,a1,a0	/* swap a0 & a1 using the */
   1798 	xor		a0,a1,a1	/*  old xor trick */
   1799 	xor		a0,a1,a0
   1800 LSYM(l4)
   1801 	combt,<=	0,a0,LREF(l3)		/* if a0>=0 then proceed like unsigned */
   1802 	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
   1803 	sub,>		0,a1,t0		/* otherwise negate both and */
   1804 	combt,<=,n	a0,t0,LREF(l2)	/*  swap back if |a0|<|a1| */
   1805 	sub		0,a0,a1
   1806 	movb,tr,n	t0,a0,LREF(l2)	/* 10th inst.  */
   1807 
   1808 LSYM(l0)	r__r_t0				/* add in this partial product */
   1809 LSYM(l1)	a0__256a0			/* a0 <<= 8 ****************** */
   1810 LSYM(l2)	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
   1811 LSYM(l3)	blr		t0,0		/* case on these 8 bits ****** */
   1812 		extru		a1,23,24,a1	/* a1 >>= 8 ****************** */
   1813 
   1814 /*16 insts before this.  */
   1815 /*			  a0 <<= 8 ************************** */
   1816 LSYM(x0)	a1_ne_0_b_l2	! a0__256a0	! MILLIRETN	! nop
   1817 LSYM(x1)	a1_ne_0_b_l1	! r__r_a0	! MILLIRETN	! nop
   1818 LSYM(x2)	a1_ne_0_b_l1	! r__r_2a0	! MILLIRETN	! nop
   1819 LSYM(x3)	a1_ne_0_b_l0	! t0__3a0	! MILLIRET	! r__r_t0
   1820 LSYM(x4)	a1_ne_0_b_l1	! r__r_4a0	! MILLIRETN	! nop
   1821 LSYM(x5)	a1_ne_0_b_l0	! t0__5a0	! MILLIRET	! r__r_t0
   1822 LSYM(x6)	t0__3a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
   1823 LSYM(x7)	t0__3a0		! a1_ne_0_b_l0	! r__r_4a0	! b_n_ret_t0
   1824 LSYM(x8)	a1_ne_0_b_l1	! r__r_8a0	! MILLIRETN	! nop
   1825 LSYM(x9)	a1_ne_0_b_l0	! t0__9a0	! MILLIRET	! r__r_t0
   1826 LSYM(x10)	t0__5a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
   1827 LSYM(x11)	t0__3a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
   1828 LSYM(x12)	t0__3a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
   1829 LSYM(x13)	t0__5a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
   1830 LSYM(x14)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
   1831 LSYM(x15)	t0__5a0		! a1_ne_0_b_l0	! t0__3t0	! b_n_ret_t0
   1832 LSYM(x16)	t0__16a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
   1833 LSYM(x17)	t0__9a0		! a1_ne_0_b_l0	! t0__t0_8a0	! b_n_ret_t0
   1834 LSYM(x18)	t0__9a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
   1835 LSYM(x19)	t0__9a0		! a1_ne_0_b_l0	! t0__2t0_a0	! b_n_ret_t0
   1836 LSYM(x20)	t0__5a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
   1837 LSYM(x21)	t0__5a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
   1838 LSYM(x22)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
   1839 LSYM(x23)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
   1840 LSYM(x24)	t0__3a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
   1841 LSYM(x25)	t0__5a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
   1842 LSYM(x26)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
   1843 LSYM(x27)	t0__3a0		! a1_ne_0_b_l0	! t0__9t0	! b_n_ret_t0
   1844 LSYM(x28)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
   1845 LSYM(x29)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
   1846 LSYM(x30)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_2t0
   1847 LSYM(x31)	t0__32a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
   1848 LSYM(x32)	t0__32a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
   1849 LSYM(x33)	t0__8a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
   1850 LSYM(x34)	t0__16a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
   1851 LSYM(x35)	t0__9a0		! t0__3t0	! b_e_t0	! t0__t0_8a0
   1852 LSYM(x36)	t0__9a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
   1853 LSYM(x37)	t0__9a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
   1854 LSYM(x38)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
   1855 LSYM(x39)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
   1856 LSYM(x40)	t0__5a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
   1857 LSYM(x41)	t0__5a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
   1858 LSYM(x42)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
   1859 LSYM(x43)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
   1860 LSYM(x44)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
   1861 LSYM(x45)	t0__9a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
   1862 LSYM(x46)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_a0
   1863 LSYM(x47)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_2a0
   1864 LSYM(x48)	t0__3a0		! a1_ne_0_b_l0	! t0__16t0	! b_n_ret_t0
   1865 LSYM(x49)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_4a0
   1866 LSYM(x50)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_2t0
   1867 LSYM(x51)	t0__9a0		! t0__t0_8a0	! b_e_t0	! t0__3t0
   1868 LSYM(x52)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
   1869 LSYM(x53)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
   1870 LSYM(x54)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_2t0
   1871 LSYM(x55)	t0__9a0		! t0__3t0	! b_e_t0	! t0__2t0_a0
   1872 LSYM(x56)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
   1873 LSYM(x57)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__3t0
   1874 LSYM(x58)	t0__3a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
   1875 LSYM(x59)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__3t0
   1876 LSYM(x60)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_4t0
   1877 LSYM(x61)	t0__5a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
   1878 LSYM(x62)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
   1879 LSYM(x63)	t0__64a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
   1880 LSYM(x64)	t0__64a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
   1881 LSYM(x65)	t0__8a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
   1882 LSYM(x66)	t0__32a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
   1883 LSYM(x67)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
   1884 LSYM(x68)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
   1885 LSYM(x69)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
   1886 LSYM(x70)	t0__64a0	! t0__t0_4a0	! b_e_t0	! t0__t0_2a0
   1887 LSYM(x71)	t0__9a0		! t0__8t0	! b_e_t0	! t0__t0ma0
   1888 LSYM(x72)	t0__9a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
   1889 LSYM(x73)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_t0
   1890 LSYM(x74)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
   1891 LSYM(x75)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
   1892 LSYM(x76)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
   1893 LSYM(x77)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
   1894 LSYM(x78)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__2t0_a0
   1895 LSYM(x79)	t0__16a0	! t0__5t0	! b_e_t0	! t0__t0ma0
   1896 LSYM(x80)	t0__16a0	! t0__5t0	! b_e_shift	! r__r_t0
   1897 LSYM(x81)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_t0
   1898 LSYM(x82)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
   1899 LSYM(x83)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
   1900 LSYM(x84)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
   1901 LSYM(x85)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
   1902 LSYM(x86)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
   1903 LSYM(x87)	t0__9a0		! t0__9t0	! b_e_t02a0	! t0__t0_4a0
   1904 LSYM(x88)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
   1905 LSYM(x89)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
   1906 LSYM(x90)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_2t0
   1907 LSYM(x91)	t0__9a0		! t0__5t0	! b_e_t0	! t0__2t0_a0
   1908 LSYM(x92)	t0__5a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
   1909 LSYM(x93)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__3t0
   1910 LSYM(x94)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__t0_2a0
   1911 LSYM(x95)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
   1912 LSYM(x96)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_4t0
   1913 LSYM(x97)	t0__8a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
   1914 LSYM(x98)	t0__32a0	! t0__3t0	! b_e_t0	! t0__t0_2a0
   1915 LSYM(x99)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
   1916 LSYM(x100)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_4t0
   1917 LSYM(x101)	t0__5a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
   1918 LSYM(x102)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
   1919 LSYM(x103)	t0__5a0		! t0__5t0	! b_e_t02a0	! t0__4t0_a0
   1920 LSYM(x104)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
   1921 LSYM(x105)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
   1922 LSYM(x106)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__4t0_a0
   1923 LSYM(x107)	t0__9a0		! t0__t0_4a0	! b_e_t02a0	! t0__8t0_a0
   1924 LSYM(x108)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_4t0
   1925 LSYM(x109)	t0__9a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
   1926 LSYM(x110)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__2t0_a0
   1927 LSYM(x111)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
   1928 LSYM(x112)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__16t0
   1929 LSYM(x113)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__3t0
   1930 LSYM(x114)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__3t0
   1931 LSYM(x115)	t0__9a0		! t0__2t0_a0	! b_e_2t0a0	! t0__3t0
   1932 LSYM(x116)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__4t0_a0
   1933 LSYM(x117)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
   1934 LSYM(x118)	t0__3a0		! t0__4t0_a0	! b_e_t0a0	! t0__9t0
   1935 LSYM(x119)	t0__3a0		! t0__4t0_a0	! b_e_t02a0	! t0__9t0
   1936 LSYM(x120)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_8t0
   1937 LSYM(x121)	t0__5a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
   1938 LSYM(x122)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
   1939 LSYM(x123)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
   1940 LSYM(x124)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
   1941 LSYM(x125)	t0__5a0		! t0__5t0	! b_e_t0	! t0__5t0
   1942 LSYM(x126)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
   1943 LSYM(x127)	t0__128a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
   1944 LSYM(x128)	t0__128a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
   1945 LSYM(x129)	t0__128a0	! a1_ne_0_b_l0	! t0__t0_a0	! b_n_ret_t0
   1946 LSYM(x130)	t0__64a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
   1947 LSYM(x131)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
   1948 LSYM(x132)	t0__8a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
   1949 LSYM(x133)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
   1950 LSYM(x134)	t0__8a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
   1951 LSYM(x135)	t0__9a0		! t0__5t0	! b_e_t0	! t0__3t0
   1952 LSYM(x136)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
   1953 LSYM(x137)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
   1954 LSYM(x138)	t0__8a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
   1955 LSYM(x139)	t0__8a0		! t0__2t0_a0	! b_e_2t0a0	! t0__4t0_a0
   1956 LSYM(x140)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__5t0
   1957 LSYM(x141)	t0__8a0		! t0__2t0_a0	! b_e_4t0a0	! t0__2t0_a0
   1958 LSYM(x142)	t0__9a0		! t0__8t0	! b_e_2t0	! t0__t0ma0
   1959 LSYM(x143)	t0__16a0	! t0__9t0	! b_e_t0	! t0__t0ma0
   1960 LSYM(x144)	t0__9a0		! t0__8t0	! b_e_shift	! r__r_2t0
   1961 LSYM(x145)	t0__9a0		! t0__8t0	! b_e_t0	! t0__2t0_a0
   1962 LSYM(x146)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
   1963 LSYM(x147)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
   1964 LSYM(x148)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
   1965 LSYM(x149)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
   1966 LSYM(x150)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
   1967 LSYM(x151)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__2t0_a0
   1968 LSYM(x152)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
   1969 LSYM(x153)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
   1970 LSYM(x154)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
   1971 LSYM(x155)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__5t0
   1972 LSYM(x156)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
   1973 LSYM(x157)	t0__32a0	! t0__t0ma0	! b_e_t02a0	! t0__5t0
   1974 LSYM(x158)	t0__16a0	! t0__5t0	! b_e_2t0	! t0__t0ma0
   1975 LSYM(x159)	t0__32a0	! t0__5t0	! b_e_t0	! t0__t0ma0
   1976 LSYM(x160)	t0__5a0		! t0__4t0	! b_e_shift	! r__r_8t0
   1977 LSYM(x161)	t0__8a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
   1978 LSYM(x162)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_2t0
   1979 LSYM(x163)	t0__9a0		! t0__9t0	! b_e_t0	! t0__2t0_a0
   1980 LSYM(x164)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_4t0
   1981 LSYM(x165)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
   1982 LSYM(x166)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__2t0_a0
   1983 LSYM(x167)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__2t0_a0
   1984 LSYM(x168)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
   1985 LSYM(x169)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__8t0_a0
   1986 LSYM(x170)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__5t0
   1987 LSYM(x171)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__9t0
   1988 LSYM(x172)	t0__5a0		! t0__4t0_a0	! b_e_4t0	! t0__2t0_a0
   1989 LSYM(x173)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__9t0
   1990 LSYM(x174)	t0__32a0	! t0__t0_2a0	! b_e_t04a0	! t0__5t0
   1991 LSYM(x175)	t0__8a0		! t0__2t0_a0	! b_e_5t0	! t0__2t0_a0
   1992 LSYM(x176)	t0__5a0		! t0__4t0_a0	! b_e_8t0	! t0__t0_a0
   1993 LSYM(x177)	t0__5a0		! t0__4t0_a0	! b_e_8t0a0	! t0__t0_a0
   1994 LSYM(x178)	t0__5a0		! t0__2t0_a0	! b_e_2t0	! t0__8t0_a0
   1995 LSYM(x179)	t0__5a0		! t0__2t0_a0	! b_e_2t0a0	! t0__8t0_a0
   1996 LSYM(x180)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_4t0
   1997 LSYM(x181)	t0__9a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
   1998 LSYM(x182)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__2t0_a0
   1999 LSYM(x183)	t0__9a0		! t0__5t0	! b_e_2t0a0	! t0__2t0_a0
   2000 LSYM(x184)	t0__5a0		! t0__9t0	! b_e_4t0	! t0__t0_a0
   2001 LSYM(x185)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
   2002 LSYM(x186)	t0__32a0	! t0__t0ma0	! b_e_2t0	! t0__3t0
   2003 LSYM(x187)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__5t0
   2004 LSYM(x188)	t0__9a0		! t0__5t0	! b_e_4t0	! t0__t0_2a0
   2005 LSYM(x189)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
   2006 LSYM(x190)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__5t0
   2007 LSYM(x191)	t0__64a0	! t0__3t0	! b_e_t0	! t0__t0ma0
   2008 LSYM(x192)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_8t0
   2009 LSYM(x193)	t0__8a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
   2010 LSYM(x194)	t0__8a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
   2011 LSYM(x195)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
   2012 LSYM(x196)	t0__8a0		! t0__3t0	! b_e_4t0	! t0__2t0_a0
   2013 LSYM(x197)	t0__8a0		! t0__3t0	! b_e_4t0a0	! t0__2t0_a0
   2014 LSYM(x198)	t0__64a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
   2015 LSYM(x199)	t0__8a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
   2016 LSYM(x200)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_8t0
   2017 LSYM(x201)	t0__5a0		! t0__5t0	! b_e_t0	! t0__8t0_a0
   2018 LSYM(x202)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__4t0_a0
   2019 LSYM(x203)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__4t0_a0
   2020 LSYM(x204)	t0__8a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
   2021 LSYM(x205)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__5t0
   2022 LSYM(x206)	t0__64a0	! t0__t0_4a0	! b_e_t02a0	! t0__3t0
   2023 LSYM(x207)	t0__8a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
   2024 LSYM(x208)	t0__5a0		! t0__5t0	! b_e_8t0	! t0__t0_a0
   2025 LSYM(x209)	t0__5a0		! t0__5t0	! b_e_8t0a0	! t0__t0_a0
   2026 LSYM(x210)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__5t0
   2027 LSYM(x211)	t0__5a0		! t0__4t0_a0	! b_e_2t0a0	! t0__5t0
   2028 LSYM(x212)	t0__3a0		! t0__4t0_a0	! b_e_4t0	! t0__4t0_a0
   2029 LSYM(x213)	t0__3a0		! t0__4t0_a0	! b_e_4t0a0	! t0__4t0_a0
   2030 LSYM(x214)	t0__9a0		! t0__t0_4a0	! b_e_2t04a0	! t0__8t0_a0
   2031 LSYM(x215)	t0__5a0		! t0__4t0_a0	! b_e_5t0	! t0__2t0_a0
   2032 LSYM(x216)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_8t0
   2033 LSYM(x217)	t0__9a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
   2034 LSYM(x218)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
   2035 LSYM(x219)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
   2036 LSYM(x220)	t0__3a0		! t0__9t0	! b_e_4t0	! t0__2t0_a0
   2037 LSYM(x221)	t0__3a0		! t0__9t0	! b_e_4t0a0	! t0__2t0_a0
   2038 LSYM(x222)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__3t0
   2039 LSYM(x223)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
   2040 LSYM(x224)	t0__9a0		! t0__3t0	! b_e_8t0	! t0__t0_a0
   2041 LSYM(x225)	t0__9a0		! t0__5t0	! b_e_t0	! t0__5t0
   2042 LSYM(x226)	t0__3a0		! t0__2t0_a0	! b_e_t02a0	! t0__32t0
   2043 LSYM(x227)	t0__9a0		! t0__5t0	! b_e_t02a0	! t0__5t0
   2044 LSYM(x228)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
   2045 LSYM(x229)	t0__9a0		! t0__2t0_a0	! b_e_4t0a0	! t0__3t0
   2046 LSYM(x230)	t0__9a0		! t0__5t0	! b_e_5t0	! t0__t0_a0
   2047 LSYM(x231)	t0__9a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
   2048 LSYM(x232)	t0__3a0		! t0__2t0_a0	! b_e_8t0	! t0__4t0_a0
   2049 LSYM(x233)	t0__3a0		! t0__2t0_a0	! b_e_8t0a0	! t0__4t0_a0
   2050 LSYM(x234)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__9t0
   2051 LSYM(x235)	t0__3a0		! t0__4t0_a0	! b_e_2t0a0	! t0__9t0
   2052 LSYM(x236)	t0__9a0		! t0__2t0_a0	! b_e_4t08a0	! t0__3t0
   2053 LSYM(x237)	t0__16a0	! t0__5t0	! b_e_3t0	! t0__t0ma0
   2054 LSYM(x238)	t0__3a0		! t0__4t0_a0	! b_e_2t04a0	! t0__9t0
   2055 LSYM(x239)	t0__16a0	! t0__5t0	! b_e_t0ma0	! t0__3t0
   2056 LSYM(x240)	t0__9a0		! t0__t0_a0	! b_e_8t0	! t0__3t0
   2057 LSYM(x241)	t0__9a0		! t0__t0_a0	! b_e_8t0a0	! t0__3t0
   2058 LSYM(x242)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__8t0_a0
   2059 LSYM(x243)	t0__9a0		! t0__9t0	! b_e_t0	! t0__3t0
   2060 LSYM(x244)	t0__5a0		! t0__3t0	! b_e_4t0	! t0__4t0_a0
   2061 LSYM(x245)	t0__8a0		! t0__3t0	! b_e_5t0	! t0__2t0_a0
   2062 LSYM(x246)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__3t0
   2063 LSYM(x247)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__3t0
   2064 LSYM(x248)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_8t0
   2065 LSYM(x249)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__8t0_a0
   2066 LSYM(x250)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__5t0
   2067 LSYM(x251)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__5t0
   2068 LSYM(x252)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
   2069 LSYM(x253)	t0__64a0	! t0__t0ma0	! b_e_t0	! t0__4t0_a0
   2070 LSYM(x254)	t0__128a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
   2071 LSYM(x255)	t0__256a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
   2072 /*1040 insts before this.  */
   2073 LSYM(ret_t0)	MILLIRET
   2074 LSYM(e_t0)	r__r_t0
   2075 LSYM(e_shift)	a1_ne_0_b_l2
   2076 	a0__256a0	/* a0 <<= 8 *********** */
   2077 	MILLIRETN
   2078 LSYM(e_t0ma0)	a1_ne_0_b_l0
   2079 	t0__t0ma0
   2080 	MILLIRET
   2081 	r__r_t0
   2082 LSYM(e_t0a0)	a1_ne_0_b_l0
   2083 	t0__t0_a0
   2084 	MILLIRET
   2085 	r__r_t0
   2086 LSYM(e_t02a0)	a1_ne_0_b_l0
   2087 	t0__t0_2a0
   2088 	MILLIRET
   2089 	r__r_t0
   2090 LSYM(e_t04a0)	a1_ne_0_b_l0
   2091 	t0__t0_4a0
   2092 	MILLIRET
   2093 	r__r_t0
   2094 LSYM(e_2t0)	a1_ne_0_b_l1
   2095 	r__r_2t0
   2096 	MILLIRETN
   2097 LSYM(e_2t0a0)	a1_ne_0_b_l0
   2098 	t0__2t0_a0
   2099 	MILLIRET
   2100 	r__r_t0
   2101 LSYM(e2t04a0)	t0__t0_2a0
   2102 	a1_ne_0_b_l1
   2103 	r__r_2t0
   2104 	MILLIRETN
   2105 LSYM(e_3t0)	a1_ne_0_b_l0
   2106 	t0__3t0
   2107 	MILLIRET
   2108 	r__r_t0
   2109 LSYM(e_4t0)	a1_ne_0_b_l1
   2110 	r__r_4t0
   2111 	MILLIRETN
   2112 LSYM(e_4t0a0)	a1_ne_0_b_l0
   2113 	t0__4t0_a0
   2114 	MILLIRET
   2115 	r__r_t0
   2116 LSYM(e4t08a0)	t0__t0_2a0
   2117 	a1_ne_0_b_l1
   2118 	r__r_4t0
   2119 	MILLIRETN
   2120 LSYM(e_5t0)	a1_ne_0_b_l0
   2121 	t0__5t0
   2122 	MILLIRET
   2123 	r__r_t0
   2124 LSYM(e_8t0)	a1_ne_0_b_l1
   2125 	r__r_8t0
   2126 	MILLIRETN
   2127 LSYM(e_8t0a0)	a1_ne_0_b_l0
   2128 	t0__8t0_a0
   2129 	MILLIRET
   2130 	r__r_t0
   2131 
   2132 	.procend
   2133 	.end
   2134 #endif
   2135