Home | History | Annotate | Line # | Download | only in arm
      1      1.1  joerg //===-- comparesf2.S - Implement single-precision soft-float comparisons --===//
      2      1.1  joerg //
      3      1.1  joerg //                     The LLVM Compiler Infrastructure
      4      1.1  joerg //
      5      1.1  joerg // This file is dual licensed under the MIT and the University of Illinois Open
      6      1.1  joerg // Source Licenses. See LICENSE.TXT for details.
      7      1.1  joerg //
      8      1.1  joerg //===----------------------------------------------------------------------===//
      9      1.1  joerg //
     10      1.1  joerg // This file implements the following soft-fp_t comparison routines:
     11      1.1  joerg //
     12      1.1  joerg //   __eqsf2   __gesf2   __unordsf2
     13      1.1  joerg //   __lesf2   __gtsf2
     14      1.1  joerg //   __ltsf2
     15      1.1  joerg //   __nesf2
     16      1.1  joerg //
     17      1.1  joerg // The semantics of the routines grouped in each column are identical, so there
     18      1.1  joerg // is a single implementation for each, with multiple names.
     19      1.1  joerg //
     20      1.1  joerg // The routines behave as follows:
     21      1.1  joerg //
     22      1.1  joerg //   __lesf2(a,b) returns -1 if a < b
     23      1.1  joerg //                         0 if a == b
     24      1.1  joerg //                         1 if a > b
     25      1.1  joerg //                         1 if either a or b is NaN
     26      1.1  joerg //
     27      1.1  joerg //   __gesf2(a,b) returns -1 if a < b
     28      1.1  joerg //                         0 if a == b
     29      1.1  joerg //                         1 if a > b
     30      1.1  joerg //                        -1 if either a or b is NaN
     31      1.1  joerg //
     32      1.1  joerg //   __unordsf2(a,b) returns 0 if both a and b are numbers
     33      1.1  joerg //                           1 if either a or b is NaN
     34      1.1  joerg //
     35      1.1  joerg // Note that __lesf2( ) and __gesf2( ) are identical except in their handling of
     36      1.1  joerg // NaN values.
     37      1.1  joerg //
     38      1.1  joerg //===----------------------------------------------------------------------===//
     39      1.1  joerg 
     40      1.1  joerg #include "../assembly.h"
     41      1.1  joerg .syntax unified
     42      1.1  joerg 
     43  1.1.1.2  joerg .p2align 2
     44      1.1  joerg DEFINE_COMPILERRT_FUNCTION(__eqsf2)
     45      1.1  joerg     // Make copies of a and b with the sign bit shifted off the top.  These will
     46      1.1  joerg     // be used to detect zeros and NaNs.
     47      1.1  joerg     mov     r2,         r0, lsl #1
     48      1.1  joerg     mov     r3,         r1, lsl #1
     49      1.1  joerg 
     50      1.1  joerg     // We do the comparison in three stages (ignoring NaN values for the time
     51      1.1  joerg     // being).  First, we orr the absolute values of a and b; this sets the Z
     52      1.1  joerg     // flag if both a and b are zero (of either sign).  The shift of r3 doesn't
     53      1.1  joerg     // effect this at all, but it *does* make sure that the C flag is clear for
     54      1.1  joerg     // the subsequent operations.
     55      1.1  joerg     orrs    r12,    r2, r3, lsr #1
     56      1.1  joerg 
     57      1.1  joerg     // Next, we check if a and b have the same or different signs.  If they have
     58      1.1  joerg     // opposite signs, this eor will set the N flag.
     59      1.1  joerg     it ne
     60      1.1  joerg     eorsne  r12,    r0, r1
     61      1.1  joerg 
     62      1.1  joerg     // If a and b are equal (either both zeros or bit identical; again, we're
     63      1.1  joerg     // ignoring NaNs for now), this subtract will zero out r0.  If they have the
     64      1.1  joerg     // same sign, the flags are updated as they would be for a comparison of the
     65      1.1  joerg     // absolute values of a and b.
     66      1.1  joerg     it pl
     67      1.1  joerg     subspl  r0,     r2, r3
     68      1.1  joerg 
     69      1.1  joerg     // If a is smaller in magnitude than b and both have the same sign, place
     70      1.1  joerg     // the negation of the sign of b in r0.  Thus, if both are negative and
     71      1.1  joerg     // a > b, this sets r0 to 0; if both are positive and a < b, this sets
     72      1.1  joerg     // r0 to -1.
     73      1.1  joerg     //
     74      1.1  joerg     // This is also done if a and b have opposite signs and are not both zero,
     75      1.1  joerg     // because in that case the subtract was not performed and the C flag is
     76      1.1  joerg     // still clear from the shift argument in orrs; if a is positive and b
     77      1.1  joerg     // negative, this places 0 in r0; if a is negative and b positive, -1 is
     78      1.1  joerg     // placed in r0.
     79      1.1  joerg     it lo
     80      1.1  joerg     mvnlo   r0,         r1, asr #31
     81      1.1  joerg 
     82      1.1  joerg     // If a is greater in magnitude than b and both have the same sign, place
     83      1.1  joerg     // the sign of b in r0.  Thus, if both are negative and a < b, -1 is placed
     84      1.1  joerg     // in r0, which is the desired result.  Conversely, if both are positive
     85      1.1  joerg     // and a > b, zero is placed in r0.
     86      1.1  joerg     it hi
     87      1.1  joerg     movhi   r0,         r1, asr #31
     88      1.1  joerg 
     89      1.1  joerg     // If you've been keeping track, at this point r0 contains -1 if a < b and
     90      1.1  joerg     // 0 if a >= b.  All that remains to be done is to set it to 1 if a > b.
     91      1.1  joerg     // If a == b, then the Z flag is set, so we can get the correct final value
     92      1.1  joerg     // into r0 by simply or'ing with 1 if Z is clear.
     93      1.1  joerg     it ne
     94      1.1  joerg     orrne   r0,     r0, #1
     95      1.1  joerg 
     96      1.1  joerg     // Finally, we need to deal with NaNs.  If either argument is NaN, replace
     97      1.1  joerg     // the value in r0 with 1.
     98      1.1  joerg     cmp     r2,         #0xff000000
     99      1.1  joerg     ite ls
    100      1.1  joerg     cmpls   r3,         #0xff000000
    101      1.1  joerg     movhi   r0,         #1
    102      1.1  joerg     JMP(lr)
    103      1.1  joerg END_COMPILERRT_FUNCTION(__eqsf2)
    104      1.1  joerg DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
    105      1.1  joerg DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
    106      1.1  joerg DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
    107      1.1  joerg 
    108  1.1.1.2  joerg .p2align 2
    109      1.1  joerg DEFINE_COMPILERRT_FUNCTION(__gtsf2)
    110  1.1.1.3  joerg     // Identical to the preceding except in that we return -1 for NaN values.
    111      1.1  joerg     // Given that the two paths share so much code, one might be tempted to
    112      1.1  joerg     // unify them; however, the extra code needed to do so makes the code size
    113      1.1  joerg     // to performance tradeoff very hard to justify for such small functions.
    114      1.1  joerg     mov     r2,         r0, lsl #1
    115      1.1  joerg     mov     r3,         r1, lsl #1
    116      1.1  joerg     orrs    r12,    r2, r3, lsr #1
    117      1.1  joerg     it ne
    118      1.1  joerg     eorsne  r12,    r0, r1
    119      1.1  joerg     it pl
    120      1.1  joerg     subspl  r0,     r2, r3
    121      1.1  joerg     it lo
    122      1.1  joerg     mvnlo   r0,         r1, asr #31
    123      1.1  joerg     it hi
    124      1.1  joerg     movhi   r0,         r1, asr #31
    125      1.1  joerg     it ne
    126      1.1  joerg     orrne   r0,     r0, #1
    127      1.1  joerg     cmp     r2,         #0xff000000
    128      1.1  joerg     ite ls
    129      1.1  joerg     cmpls   r3,         #0xff000000
    130      1.1  joerg     movhi   r0,         #-1
    131      1.1  joerg     JMP(lr)
    132      1.1  joerg END_COMPILERRT_FUNCTION(__gtsf2)
    133      1.1  joerg DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
    134      1.1  joerg 
    135  1.1.1.2  joerg .p2align 2
    136      1.1  joerg DEFINE_COMPILERRT_FUNCTION(__unordsf2)
    137      1.1  joerg     // Return 1 for NaN values, 0 otherwise.
    138      1.1  joerg     mov     r2,         r0, lsl #1
    139      1.1  joerg     mov     r3,         r1, lsl #1
    140      1.1  joerg     mov     r0,         #0
    141      1.1  joerg     cmp     r2,         #0xff000000
    142      1.1  joerg     ite ls
    143      1.1  joerg     cmpls   r3,         #0xff000000
    144      1.1  joerg     movhi   r0,         #1
    145      1.1  joerg     JMP(lr)
    146      1.1  joerg END_COMPILERRT_FUNCTION(__unordsf2)
    147      1.1  joerg 
    148      1.1  joerg DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
    149