Home | History | Annotate | Line # | Download | only in ia64
      1 .file "__umoddi3.s"
      2 
      3 // $NetBSD: umoddi3.S,v 1.2 2006/04/07 14:27:33 cherry Exp $
      4 
      5 //-
      6 // Copyright (c) 2000, Intel Corporation
      7 // All rights reserved.
      8 //
      9 // Contributed 2/15/2000 by Marius Cornea, John Harrison, Cristina Iordache,
     10 // Ted Kubaska, Bob Norin, and Shane Story of the Computational Software Lab,
     11 // Intel Corporation.
     12 //
     13 // WARRANTY DISCLAIMER
     14 //
     15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     18 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
     19 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     23 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
     24 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 //
     27 // Intel Corporation is the author of this code, and requests that all
     28 // problem reports or change requests be submitted to it directly at
     29 // http://developer.intel.com/opensource.
     30 //
     31 
     32 .section .text
     33 
     34   // 64-bit unsigned integer remainder
     35 
     36 .proc __umoddi3#
     37 .align 32
     38 .global __umoddi3#
     39 .align 32
     40 
     41 __umoddi3:
     42 
     43 { .mii
     44   alloc r31=ar.pfs,3,0,0,0
     45   nop.i 0
     46   nop.i 0
     47 } { .mmb
     48 
     49   // 64-BIT UNSIGNED INTEGER REMAINDER BEGINS HERE
     50 
     51   // general register used:
     52   //    r32 - 64-bit unsigned integer dividend, called a below
     53   //    r33 - 64-bit unsigned integer divisor, called b below
     54   //    r8 - 64-bit unsigned integer result
     55   // floating-point registers used: f6, f7, f8, f9, f10, f11, f12
     56   // predicate registers used: p6
     57 
     58   setf.sig f12=r32  // holds a in integer form
     59   setf.sig f7=r33
     60   nop.b 0;;
     61 } { .mfi
     62   // get 2's complement of b
     63   sub r33=r0,r33
     64   fcvt.xuf.s1 f6=f12
     65   nop.i 0
     66 } { .mfi
     67   nop.m 0
     68   fcvt.xuf.s1 f7=f7
     69   nop.i 0;;
     70 } { .mfi
     71   nop.m 0
     72   // Step (1)
     73   // y0 = 1 / b in f8
     74   frcpa.s1 f8,p6=f6,f7
     75   nop.i 0;;
     76 } { .mfi
     77   nop.m 0
     78   // Step (2)
     79   // q0 = a * y0 in f10
     80   (p6) fma.s1 f10=f6,f8,f0
     81   nop.i 0
     82 } { .mfi
     83   nop.m 0
     84   // Step (3)
     85   // e0 = 1 - b * y0 in f9
     86   (p6) fnma.s1 f9=f7,f8,f1
     87   nop.i 0;;
     88 } { .mfi
     89   nop.m 0
     90   // Step (4)
     91   // q1 = q0 + e0 * q0 in f10
     92   (p6) fma.s1 f10=f9,f10,f10
     93   nop.i 0
     94 } { .mfi
     95   nop.m 0
     96   // Step (5)
     97   // e1 = e0 * e0 in f11
     98   (p6) fma.s1 f11=f9,f9,f0
     99   nop.i 0;;
    100 } { .mfi
    101   nop.m 0
    102   // Step (6)
    103   // y1 = y0 + e0 * y0 in f8
    104   (p6) fma.s1 f8=f9,f8,f8
    105   nop.i 0;;
    106 } { .mfi
    107   nop.m 0
    108   // Step (7)
    109   // q2 = q1 + e1 * q1 in f9
    110   (p6) fma.s1 f9=f11,f10,f10
    111   nop.i 0;;
    112 } { .mfi
    113   nop.m 0
    114   // Step (8)
    115   // y2 = y1 + e1 * y1 in f8
    116   (p6) fma.s1 f8=f11,f8,f8
    117   nop.i 0;;
    118 } { .mfi
    119   nop.m 0
    120   // Step (9)
    121   // r2 = a - b * q2 in f10
    122   (p6) fnma.s1 f10=f7,f9,f6
    123   nop.i 0;;
    124 } { .mfi
    125   // f7=-b
    126   setf.sig f7=r33
    127   // Step (10)
    128   // q3 = q2 + r2 * y2 in f8
    129   (p6) fma.s1 f8=f10,f8,f9
    130   nop.i 0;;
    131 } { .mfi
    132   nop.m 0
    133   // (11) q = trunc(q3)
    134   fcvt.fxu.trunc.s1 f8=f8
    135   nop.i 0;;
    136 }  { .mfi
    137   nop.m 0
    138   // (12) r = a + (-b) * q
    139   xma.l f8=f8,f7,f12
    140   nop.i 0;;
    141 }  { .mib
    142   getf.sig r8=f8
    143   nop.i 0
    144   nop.b 0
    145 }
    146 
    147   // 64-BIT UNSIGNED INTEGER REMAINDER ENDS HERE
    148 
    149 { .mib
    150   nop.m 0
    151   nop.i 0
    152   br.ret.sptk b0;;
    153 }
    154 
    155 .endp __umoddi3
    156