1 1.1 joerg //===-- comparesf2.S - Implement single-precision soft-float comparisons --===// 2 1.1 joerg // 3 1.1 joerg // The LLVM Compiler Infrastructure 4 1.1 joerg // 5 1.1 joerg // This file is dual licensed under the MIT and the University of Illinois Open 6 1.1 joerg // Source Licenses. See LICENSE.TXT for details. 7 1.1 joerg // 8 1.1 joerg //===----------------------------------------------------------------------===// 9 1.1 joerg // 10 1.1 joerg // This file implements the following soft-fp_t comparison routines: 11 1.1 joerg // 12 1.1 joerg // __eqsf2 __gesf2 __unordsf2 13 1.1 joerg // __lesf2 __gtsf2 14 1.1 joerg // __ltsf2 15 1.1 joerg // __nesf2 16 1.1 joerg // 17 1.1 joerg // The semantics of the routines grouped in each column are identical, so there 18 1.1 joerg // is a single implementation for each, with multiple names. 19 1.1 joerg // 20 1.1 joerg // The routines behave as follows: 21 1.1 joerg // 22 1.1 joerg // __lesf2(a,b) returns -1 if a < b 23 1.1 joerg // 0 if a == b 24 1.1 joerg // 1 if a > b 25 1.1 joerg // 1 if either a or b is NaN 26 1.1 joerg // 27 1.1 joerg // __gesf2(a,b) returns -1 if a < b 28 1.1 joerg // 0 if a == b 29 1.1 joerg // 1 if a > b 30 1.1 joerg // -1 if either a or b is NaN 31 1.1 joerg // 32 1.1 joerg // __unordsf2(a,b) returns 0 if both a and b are numbers 33 1.1 joerg // 1 if either a or b is NaN 34 1.1 joerg // 35 1.1 joerg // Note that __lesf2( ) and __gesf2( ) are identical except in their handling of 36 1.1 joerg // NaN values. 37 1.1 joerg // 38 1.1 joerg //===----------------------------------------------------------------------===// 39 1.1 joerg 40 1.1 joerg #include "../assembly.h" 41 1.1 joerg .syntax unified 42 1.1 joerg 43 1.1.1.2 joerg .p2align 2 44 1.1 joerg DEFINE_COMPILERRT_FUNCTION(__eqsf2) 45 1.1 joerg // Make copies of a and b with the sign bit shifted off the top. These will 46 1.1 joerg // be used to detect zeros and NaNs. 47 1.1 joerg mov r2, r0, lsl #1 48 1.1 joerg mov r3, r1, lsl #1 49 1.1 joerg 50 1.1 joerg // We do the comparison in three stages (ignoring NaN values for the time 51 1.1 joerg // being). First, we orr the absolute values of a and b; this sets the Z 52 1.1 joerg // flag if both a and b are zero (of either sign). The shift of r3 doesn't 53 1.1 joerg // effect this at all, but it *does* make sure that the C flag is clear for 54 1.1 joerg // the subsequent operations. 55 1.1 joerg orrs r12, r2, r3, lsr #1 56 1.1 joerg 57 1.1 joerg // Next, we check if a and b have the same or different signs. If they have 58 1.1 joerg // opposite signs, this eor will set the N flag. 59 1.1 joerg it ne 60 1.1 joerg eorsne r12, r0, r1 61 1.1 joerg 62 1.1 joerg // If a and b are equal (either both zeros or bit identical; again, we're 63 1.1 joerg // ignoring NaNs for now), this subtract will zero out r0. If they have the 64 1.1 joerg // same sign, the flags are updated as they would be for a comparison of the 65 1.1 joerg // absolute values of a and b. 66 1.1 joerg it pl 67 1.1 joerg subspl r0, r2, r3 68 1.1 joerg 69 1.1 joerg // If a is smaller in magnitude than b and both have the same sign, place 70 1.1 joerg // the negation of the sign of b in r0. Thus, if both are negative and 71 1.1 joerg // a > b, this sets r0 to 0; if both are positive and a < b, this sets 72 1.1 joerg // r0 to -1. 73 1.1 joerg // 74 1.1 joerg // This is also done if a and b have opposite signs and are not both zero, 75 1.1 joerg // because in that case the subtract was not performed and the C flag is 76 1.1 joerg // still clear from the shift argument in orrs; if a is positive and b 77 1.1 joerg // negative, this places 0 in r0; if a is negative and b positive, -1 is 78 1.1 joerg // placed in r0. 79 1.1 joerg it lo 80 1.1 joerg mvnlo r0, r1, asr #31 81 1.1 joerg 82 1.1 joerg // If a is greater in magnitude than b and both have the same sign, place 83 1.1 joerg // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed 84 1.1 joerg // in r0, which is the desired result. Conversely, if both are positive 85 1.1 joerg // and a > b, zero is placed in r0. 86 1.1 joerg it hi 87 1.1 joerg movhi r0, r1, asr #31 88 1.1 joerg 89 1.1 joerg // If you've been keeping track, at this point r0 contains -1 if a < b and 90 1.1 joerg // 0 if a >= b. All that remains to be done is to set it to 1 if a > b. 91 1.1 joerg // If a == b, then the Z flag is set, so we can get the correct final value 92 1.1 joerg // into r0 by simply or'ing with 1 if Z is clear. 93 1.1 joerg it ne 94 1.1 joerg orrne r0, r0, #1 95 1.1 joerg 96 1.1 joerg // Finally, we need to deal with NaNs. If either argument is NaN, replace 97 1.1 joerg // the value in r0 with 1. 98 1.1 joerg cmp r2, #0xff000000 99 1.1 joerg ite ls 100 1.1 joerg cmpls r3, #0xff000000 101 1.1 joerg movhi r0, #1 102 1.1 joerg JMP(lr) 103 1.1 joerg END_COMPILERRT_FUNCTION(__eqsf2) 104 1.1 joerg DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) 105 1.1 joerg DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) 106 1.1 joerg DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) 107 1.1 joerg 108 1.1.1.2 joerg .p2align 2 109 1.1 joerg DEFINE_COMPILERRT_FUNCTION(__gtsf2) 110 1.1.1.3 joerg // Identical to the preceding except in that we return -1 for NaN values. 111 1.1 joerg // Given that the two paths share so much code, one might be tempted to 112 1.1 joerg // unify them; however, the extra code needed to do so makes the code size 113 1.1 joerg // to performance tradeoff very hard to justify for such small functions. 114 1.1 joerg mov r2, r0, lsl #1 115 1.1 joerg mov r3, r1, lsl #1 116 1.1 joerg orrs r12, r2, r3, lsr #1 117 1.1 joerg it ne 118 1.1 joerg eorsne r12, r0, r1 119 1.1 joerg it pl 120 1.1 joerg subspl r0, r2, r3 121 1.1 joerg it lo 122 1.1 joerg mvnlo r0, r1, asr #31 123 1.1 joerg it hi 124 1.1 joerg movhi r0, r1, asr #31 125 1.1 joerg it ne 126 1.1 joerg orrne r0, r0, #1 127 1.1 joerg cmp r2, #0xff000000 128 1.1 joerg ite ls 129 1.1 joerg cmpls r3, #0xff000000 130 1.1 joerg movhi r0, #-1 131 1.1 joerg JMP(lr) 132 1.1 joerg END_COMPILERRT_FUNCTION(__gtsf2) 133 1.1 joerg DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) 134 1.1 joerg 135 1.1.1.2 joerg .p2align 2 136 1.1 joerg DEFINE_COMPILERRT_FUNCTION(__unordsf2) 137 1.1 joerg // Return 1 for NaN values, 0 otherwise. 138 1.1 joerg mov r2, r0, lsl #1 139 1.1 joerg mov r3, r1, lsl #1 140 1.1 joerg mov r0, #0 141 1.1 joerg cmp r2, #0xff000000 142 1.1 joerg ite ls 143 1.1 joerg cmpls r3, #0xff000000 144 1.1 joerg movhi r0, #1 145 1.1 joerg JMP(lr) 146 1.1 joerg END_COMPILERRT_FUNCTION(__unordsf2) 147 1.1 joerg 148 1.1 joerg DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2) 149