Home | History | Annotate | Line # | Download | only in ia64
      1 .file "__moddi3.s"
      2 
      3 // $NetBSD: moddi3.S,v 1.2 2006/04/07 14:27:33 cherry Exp $
      4 
      5 //-
      6 // Copyright (c) 2000, Intel Corporation
      7 // All rights reserved.
      8 //
      9 // Contributed 2/15/2000 by Marius Cornea, John Harrison, Cristina Iordache,
     10 // Ted Kubaska, Bob Norin, and Shane Story of the Computational Software Lab,
     11 // Intel Corporation.
     12 //
     13 // WARRANTY DISCLAIMER
     14 //
     15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     18 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
     19 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     23 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
     24 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 //
     27 // Intel Corporation is the author of this code, and requests that all
     28 // problem reports or change requests be submitted to it directly at
     29 // http://developer.intel.com/opensource.
     30 //
     31 
     32 .section .text
     33 
     34 // 64-bit signed integer remainder
     35 
     36 .proc __moddi3#
     37 .align 32
     38 .global __moddi3#
     39 .align 32
     40 
     41 __moddi3:
     42 
     43 { .mii
     44   alloc r31=ar.pfs,3,0,0,0
     45   nop.i 0
     46   nop.i 0
     47 } { .mmb
     48 
     49   // 64-BIT SIGNED INTEGER REMAINDER BEGINS HERE
     50 
     51   // general register used:
     52   //    r32 - 64-bit signed integer dividend, called a below
     53   //    r33 - 64-bit signed integer divisor, called b below
     54   //    r8 - 64-bit signed integer result
     55   //    r2 - scratch register
     56   // floating-point registers used: f6, f7, f8, f9, f10, f11, f12
     57   // predicate registers used: p6
     58 
     59   setf.sig f12=r32  // holds a in integer form
     60   setf.sig f7=r33
     61   nop.b 0
     62 } { .mlx
     63   nop.m 0
     64   //movl r2=0x8000000000000000;;
     65   movl r2=0xffffffffffffffff;;
     66 } { .mfi
     67   // get the 2's complement of b
     68   sub r33=r0,r33
     69   fcvt.xf f6=f12
     70   nop.i 0
     71 } { .mfi
     72   nop.m 0
     73   fcvt.xf f7=f7
     74   nop.i 0;;
     75 } { .mfi
     76   nop.m 0
     77   // Step (1)
     78   // y0 = 1 / b in f8
     79   frcpa.s1 f8,p6=f6,f7
     80   nop.i 0;;
     81 } { .mfi
     82   nop.m 0
     83   // Step (2)
     84   // q0 = a * y0 in f10
     85   (p6) fma.s1 f10=f6,f8,f0
     86   nop.i 0
     87 } { .mfi
     88   nop.m 0
     89   // Step (3)
     90   // e0 = 1 - b * y0 in f9
     91   (p6) fnma.s1 f9=f7,f8,f1
     92   nop.i 0;;
     93 } { .mfi
     94   nop.m 0
     95   // Step (4)
     96   // q1 = q0 + e0 * q0 in f10
     97   (p6) fma.s1 f10=f9,f10,f10
     98   nop.i 0
     99 } { .mfi
    100   nop.m 0
    101   // Step (5)
    102   // e1 = e0 * e0 in f11
    103   (p6) fma.s1 f11=f9,f9,f0
    104   nop.i 0;;
    105 } { .mfi
    106   nop.m 0
    107   // Step (6)
    108   // y1 = y0 + e0 * y0 in f8
    109   (p6) fma.s1 f8=f9,f8,f8
    110   nop.i 0;;
    111 } { .mfi
    112   nop.m 0
    113   // Step (7)
    114   // q2 = q1 + e1 * q1 in f9
    115   (p6) fma.s1 f9=f11,f10,f10
    116   nop.i 0;;
    117 } { .mfi
    118   nop.m 0
    119   // Step (8)
    120   // y2 = y1 + e1 * y1 in f8
    121   (p6) fma.s1 f8=f11,f8,f8
    122   nop.i 0;;
    123 } { .mfi
    124   nop.m 0
    125   // Step (9)
    126   // r2 = a - b * q2 in f10
    127   (p6) fnma.s1 f10=f7,f9,f6
    128   nop.i 0;;
    129 } { .mfi
    130   setf.sig f7=r33
    131   // Step (10)
    132   // q3 = q2 + r2 * y2 in f8
    133   (p6) fma.s1 f8=f10,f8,f9
    134   nop.i 0;;
    135 } { .mfi
    136   nop.m 0
    137   // (11) q = trunc(q3)
    138   fcvt.fx.trunc.s1 f8=f8
    139   nop.i 0;;
    140 } { .mfi
    141   nop.m 0
    142   // (12) r = a + (-b) * q
    143   xma.l f8=f8,f7,f12
    144   nop.i 0;;
    145 }  { .mib
    146   getf.sig r8=f8
    147   nop.i 0
    148   nop.b 0
    149 }
    150 
    151   // 64-BIT SIGNED INTEGER REMAINDER ENDS HERE
    152 
    153 { .mib
    154   nop.m 0
    155   nop.i 0
    156   br.ret.sptk b0;;
    157 }
    158 
    159 .endp __moddi3
    160