Home | History | Annotate | Line # | Download | only in ia64
moddi3.S revision 1.1
      1  1.1  cherry .file "__moddi3.s"
      2  1.1  cherry 
      3  1.1  cherry // $FreeBSD$
      4  1.1  cherry 
      5  1.1  cherry //-
      6  1.1  cherry // Copyright (c) 2000, Intel Corporation
      7  1.1  cherry // All rights reserved.
      8  1.1  cherry //
      9  1.1  cherry // Contributed 2/15/2000 by Marius Cornea, John Harrison, Cristina Iordache,
     10  1.1  cherry // Ted Kubaska, Bob Norin, and Shane Story of the Computational Software Lab,
     11  1.1  cherry // Intel Corporation.
     12  1.1  cherry //
     13  1.1  cherry // WARRANTY DISCLAIMER
     14  1.1  cherry //
     15  1.1  cherry // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  1.1  cherry // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  1.1  cherry // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     18  1.1  cherry // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
     19  1.1  cherry // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20  1.1  cherry // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  1.1  cherry // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22  1.1  cherry // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     23  1.1  cherry // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
     24  1.1  cherry // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25  1.1  cherry // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  1.1  cherry //
     27  1.1  cherry // Intel Corporation is the author of this code, and requests that all
     28  1.1  cherry // problem reports or change requests be submitted to it directly at
     29  1.1  cherry // http://developer.intel.com/opensource.
     30  1.1  cherry //
     31  1.1  cherry 
     32  1.1  cherry .section .text
     33  1.1  cherry 
     34  1.1  cherry // 64-bit signed integer remainder
     35  1.1  cherry 
     36  1.1  cherry .proc __moddi3#
     37  1.1  cherry .align 32
     38  1.1  cherry .global __moddi3#
     39  1.1  cherry .align 32
     40  1.1  cherry 
     41  1.1  cherry __moddi3:
     42  1.1  cherry 
     43  1.1  cherry { .mii
     44  1.1  cherry   alloc r31=ar.pfs,3,0,0,0
     45  1.1  cherry   nop.i 0
     46  1.1  cherry   nop.i 0
     47  1.1  cherry } { .mmb
     48  1.1  cherry 
     49  1.1  cherry   // 64-BIT SIGNED INTEGER REMAINDER BEGINS HERE
     50  1.1  cherry 
     51  1.1  cherry   // general register used:
     52  1.1  cherry   //    r32 - 64-bit signed integer dividend, called a below
     53  1.1  cherry   //    r33 - 64-bit signed integer divisor, called b below
     54  1.1  cherry   //    r8 - 64-bit signed integer result
     55  1.1  cherry   //    r2 - scratch register
     56  1.1  cherry   // floating-point registers used: f6, f7, f8, f9, f10, f11, f12
     57  1.1  cherry   // predicate registers used: p6
     58  1.1  cherry 
     59  1.1  cherry   setf.sig f12=r32  // holds a in integer form
     60  1.1  cherry   setf.sig f7=r33
     61  1.1  cherry   nop.b 0
     62  1.1  cherry } { .mlx
     63  1.1  cherry   nop.m 0
     64  1.1  cherry   //movl r2=0x8000000000000000;;
     65  1.1  cherry   movl r2=0xffffffffffffffff;;
     66  1.1  cherry } { .mfi
     67  1.1  cherry   // get the 2's complement of b
     68  1.1  cherry   sub r33=r0,r33
     69  1.1  cherry   fcvt.xf f6=f12
     70  1.1  cherry   nop.i 0
     71  1.1  cherry } { .mfi
     72  1.1  cherry   nop.m 0
     73  1.1  cherry   fcvt.xf f7=f7
     74  1.1  cherry   nop.i 0;;
     75  1.1  cherry } { .mfi
     76  1.1  cherry   nop.m 0
     77  1.1  cherry   // Step (1)
     78  1.1  cherry   // y0 = 1 / b in f8
     79  1.1  cherry   frcpa.s1 f8,p6=f6,f7
     80  1.1  cherry   nop.i 0;;
     81  1.1  cherry } { .mfi
     82  1.1  cherry   nop.m 0
     83  1.1  cherry   // Step (2)
     84  1.1  cherry   // q0 = a * y0 in f10
     85  1.1  cherry   (p6) fma.s1 f10=f6,f8,f0
     86  1.1  cherry   nop.i 0
     87  1.1  cherry } { .mfi
     88  1.1  cherry   nop.m 0
     89  1.1  cherry   // Step (3)
     90  1.1  cherry   // e0 = 1 - b * y0 in f9
     91  1.1  cherry   (p6) fnma.s1 f9=f7,f8,f1
     92  1.1  cherry   nop.i 0;;
     93  1.1  cherry } { .mfi
     94  1.1  cherry   nop.m 0
     95  1.1  cherry   // Step (4)
     96  1.1  cherry   // q1 = q0 + e0 * q0 in f10
     97  1.1  cherry   (p6) fma.s1 f10=f9,f10,f10
     98  1.1  cherry   nop.i 0
     99  1.1  cherry } { .mfi
    100  1.1  cherry   nop.m 0
    101  1.1  cherry   // Step (5)
    102  1.1  cherry   // e1 = e0 * e0 in f11
    103  1.1  cherry   (p6) fma.s1 f11=f9,f9,f0
    104  1.1  cherry   nop.i 0;;
    105  1.1  cherry } { .mfi
    106  1.1  cherry   nop.m 0
    107  1.1  cherry   // Step (6)
    108  1.1  cherry   // y1 = y0 + e0 * y0 in f8
    109  1.1  cherry   (p6) fma.s1 f8=f9,f8,f8
    110  1.1  cherry   nop.i 0;;
    111  1.1  cherry } { .mfi
    112  1.1  cherry   nop.m 0
    113  1.1  cherry   // Step (7)
    114  1.1  cherry   // q2 = q1 + e1 * q1 in f9
    115  1.1  cherry   (p6) fma.s1 f9=f11,f10,f10
    116  1.1  cherry   nop.i 0;;
    117  1.1  cherry } { .mfi
    118  1.1  cherry   nop.m 0
    119  1.1  cherry   // Step (8)
    120  1.1  cherry   // y2 = y1 + e1 * y1 in f8
    121  1.1  cherry   (p6) fma.s1 f8=f11,f8,f8
    122  1.1  cherry   nop.i 0;;
    123  1.1  cherry } { .mfi
    124  1.1  cherry   nop.m 0
    125  1.1  cherry   // Step (9)
    126  1.1  cherry   // r2 = a - b * q2 in f10
    127  1.1  cherry   (p6) fnma.s1 f10=f7,f9,f6
    128  1.1  cherry   nop.i 0;;
    129  1.1  cherry } { .mfi
    130  1.1  cherry   setf.sig f7=r33
    131  1.1  cherry   // Step (10)
    132  1.1  cherry   // q3 = q2 + r2 * y2 in f8
    133  1.1  cherry   (p6) fma.s1 f8=f10,f8,f9
    134  1.1  cherry   nop.i 0;;
    135  1.1  cherry } { .mfi
    136  1.1  cherry   nop.m 0
    137  1.1  cherry   // (11) q = trunc(q3)
    138  1.1  cherry   fcvt.fx.trunc.s1 f8=f8
    139  1.1  cherry   nop.i 0;;
    140  1.1  cherry } { .mfi
    141  1.1  cherry   nop.m 0
    142  1.1  cherry   // (12) r = a + (-b) * q
    143  1.1  cherry   xma.l f8=f8,f7,f12
    144  1.1  cherry   nop.i 0;;
    145  1.1  cherry }  { .mib
    146  1.1  cherry   getf.sig r8=f8
    147  1.1  cherry   nop.i 0
    148  1.1  cherry   nop.b 0
    149  1.1  cherry }
    150  1.1  cherry 
    151  1.1  cherry   // 64-BIT SIGNED INTEGER REMAINDER ENDS HERE
    152  1.1  cherry 
    153  1.1  cherry { .mib
    154  1.1  cherry   nop.m 0
    155  1.1  cherry   nop.i 0
    156  1.1  cherry   br.ret.sptk b0;;
    157  1.1  cherry }
    158  1.1  cherry 
    159  1.1  cherry .endp __moddi3
    160