ieee754-sf.S revision 1.1.1.5 1 1.1 mrg /* IEEE-754 single-precision functions for Xtensa
2 1.1.1.4 mrg Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica.
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 1.1 mrg License for more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg #ifdef __XTENSA_EB__
27 1.1 mrg #define xh a2
28 1.1 mrg #define xl a3
29 1.1 mrg #define yh a4
30 1.1 mrg #define yl a5
31 1.1 mrg #else
32 1.1 mrg #define xh a3
33 1.1 mrg #define xl a2
34 1.1 mrg #define yh a5
35 1.1 mrg #define yl a4
36 1.1 mrg #endif
37 1.1 mrg
38 1.1 mrg /* Warning! The branch displacements for some Xtensa branch instructions
39 1.1 mrg are quite small, and this code has been carefully laid out to keep
40 1.1 mrg branch targets in range. If you change anything, be sure to check that
41 1.1 mrg the assembler is not relaxing anything to branch over a jump. */
42 1.1 mrg
43 1.1 mrg #ifdef L_negsf2
44 1.1 mrg
45 1.1 mrg .align 4
46 1.1 mrg .global __negsf2
47 1.1 mrg .type __negsf2, @function
48 1.1 mrg __negsf2:
49 1.1 mrg leaf_entry sp, 16
50 1.1 mrg movi a4, 0x80000000
51 1.1 mrg xor a2, a2, a4
52 1.1 mrg leaf_return
53 1.1 mrg
54 1.1 mrg #endif /* L_negsf2 */
55 1.1 mrg
56 1.1 mrg #ifdef L_addsubsf3
57 1.1 mrg
58 1.1.1.5 mrg .literal_position
59 1.1 mrg /* Addition */
60 1.1 mrg __addsf3_aux:
61 1.1 mrg
62 1.1 mrg /* Handle NaNs and Infinities. (This code is placed before the
63 1.1 mrg start of the function just to keep it in range of the limited
64 1.1 mrg branch displacements.) */
65 1.1 mrg
66 1.1 mrg .Ladd_xnan_or_inf:
67 1.1 mrg /* If y is neither Infinity nor NaN, return x. */
68 1.1.1.5 mrg bnall a3, a6, .Ladd_return_nan_or_inf
69 1.1 mrg /* If x is a NaN, return it. Otherwise, return y. */
70 1.1 mrg slli a7, a2, 9
71 1.1.1.5 mrg bnez a7, .Ladd_return_nan
72 1.1 mrg
73 1.1 mrg .Ladd_ynan_or_inf:
74 1.1 mrg /* Return y. */
75 1.1 mrg mov a2, a3
76 1.1.1.5 mrg
77 1.1.1.5 mrg .Ladd_return_nan_or_inf:
78 1.1.1.5 mrg slli a7, a2, 9
79 1.1.1.5 mrg bnez a7, .Ladd_return_nan
80 1.1.1.5 mrg leaf_return
81 1.1.1.5 mrg
82 1.1.1.5 mrg .Ladd_return_nan:
83 1.1.1.5 mrg movi a6, 0x400000 /* make it a quiet NaN */
84 1.1.1.5 mrg or a2, a2, a6
85 1.1 mrg leaf_return
86 1.1 mrg
87 1.1 mrg .Ladd_opposite_signs:
88 1.1 mrg /* Operand signs differ. Do a subtraction. */
89 1.1 mrg slli a7, a6, 8
90 1.1 mrg xor a3, a3, a7
91 1.1 mrg j .Lsub_same_sign
92 1.1 mrg
93 1.1 mrg .align 4
94 1.1 mrg .global __addsf3
95 1.1 mrg .type __addsf3, @function
96 1.1 mrg __addsf3:
97 1.1 mrg leaf_entry sp, 16
98 1.1 mrg movi a6, 0x7f800000
99 1.1 mrg
100 1.1 mrg /* Check if the two operands have the same sign. */
101 1.1 mrg xor a7, a2, a3
102 1.1 mrg bltz a7, .Ladd_opposite_signs
103 1.1 mrg
104 1.1 mrg .Ladd_same_sign:
105 1.1 mrg /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
106 1.1 mrg ball a2, a6, .Ladd_xnan_or_inf
107 1.1 mrg ball a3, a6, .Ladd_ynan_or_inf
108 1.1 mrg
109 1.1 mrg /* Compare the exponents. The smaller operand will be shifted
110 1.1 mrg right by the exponent difference and added to the larger
111 1.1 mrg one. */
112 1.1 mrg extui a7, a2, 23, 9
113 1.1 mrg extui a8, a3, 23, 9
114 1.1 mrg bltu a7, a8, .Ladd_shiftx
115 1.1 mrg
116 1.1 mrg .Ladd_shifty:
117 1.1 mrg /* Check if the smaller (or equal) exponent is zero. */
118 1.1 mrg bnone a3, a6, .Ladd_yexpzero
119 1.1 mrg
120 1.1 mrg /* Replace y sign/exponent with 0x008. */
121 1.1 mrg or a3, a3, a6
122 1.1 mrg slli a3, a3, 8
123 1.1 mrg srli a3, a3, 8
124 1.1 mrg
125 1.1 mrg .Ladd_yexpdiff:
126 1.1 mrg /* Compute the exponent difference. */
127 1.1 mrg sub a10, a7, a8
128 1.1 mrg
129 1.1 mrg /* Exponent difference > 32 -- just return the bigger value. */
130 1.1 mrg bgeui a10, 32, 1f
131 1.1 mrg
132 1.1 mrg /* Shift y right by the exponent difference. Any bits that are
133 1.1 mrg shifted out of y are saved in a9 for rounding the result. */
134 1.1 mrg ssr a10
135 1.1 mrg movi a9, 0
136 1.1 mrg src a9, a3, a9
137 1.1 mrg srl a3, a3
138 1.1 mrg
139 1.1 mrg /* Do the addition. */
140 1.1 mrg add a2, a2, a3
141 1.1 mrg
142 1.1 mrg /* Check if the add overflowed into the exponent. */
143 1.1 mrg extui a10, a2, 23, 9
144 1.1 mrg beq a10, a7, .Ladd_round
145 1.1 mrg mov a8, a7
146 1.1 mrg j .Ladd_carry
147 1.1 mrg
148 1.1 mrg .Ladd_yexpzero:
149 1.1 mrg /* y is a subnormal value. Replace its sign/exponent with zero,
150 1.1 mrg i.e., no implicit "1.0", and increment the apparent exponent
151 1.1 mrg because subnormals behave as if they had the minimum (nonzero)
152 1.1 mrg exponent. Test for the case when both exponents are zero. */
153 1.1 mrg slli a3, a3, 9
154 1.1 mrg srli a3, a3, 9
155 1.1 mrg bnone a2, a6, .Ladd_bothexpzero
156 1.1 mrg addi a8, a8, 1
157 1.1 mrg j .Ladd_yexpdiff
158 1.1 mrg
159 1.1 mrg .Ladd_bothexpzero:
160 1.1 mrg /* Both exponents are zero. Handle this as a special case. There
161 1.1 mrg is no need to shift or round, and the normal code for handling
162 1.1 mrg a carry into the exponent field will not work because it
163 1.1 mrg assumes there is an implicit "1.0" that needs to be added. */
164 1.1 mrg add a2, a2, a3
165 1.1 mrg 1: leaf_return
166 1.1 mrg
167 1.1 mrg .Ladd_xexpzero:
168 1.1 mrg /* Same as "yexpzero" except skip handling the case when both
169 1.1 mrg exponents are zero. */
170 1.1 mrg slli a2, a2, 9
171 1.1 mrg srli a2, a2, 9
172 1.1 mrg addi a7, a7, 1
173 1.1 mrg j .Ladd_xexpdiff
174 1.1 mrg
175 1.1 mrg .Ladd_shiftx:
176 1.1 mrg /* Same thing as the "shifty" code, but with x and y swapped. Also,
177 1.1 mrg because the exponent difference is always nonzero in this version,
178 1.1 mrg the shift sequence can use SLL and skip loading a constant zero. */
179 1.1 mrg bnone a2, a6, .Ladd_xexpzero
180 1.1 mrg
181 1.1 mrg or a2, a2, a6
182 1.1 mrg slli a2, a2, 8
183 1.1 mrg srli a2, a2, 8
184 1.1 mrg
185 1.1 mrg .Ladd_xexpdiff:
186 1.1 mrg sub a10, a8, a7
187 1.1 mrg bgeui a10, 32, .Ladd_returny
188 1.1 mrg
189 1.1 mrg ssr a10
190 1.1 mrg sll a9, a2
191 1.1 mrg srl a2, a2
192 1.1 mrg
193 1.1 mrg add a2, a2, a3
194 1.1 mrg
195 1.1 mrg /* Check if the add overflowed into the exponent. */
196 1.1 mrg extui a10, a2, 23, 9
197 1.1 mrg bne a10, a8, .Ladd_carry
198 1.1 mrg
199 1.1 mrg .Ladd_round:
200 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
201 1.1 mrg bgez a9, 1f
202 1.1 mrg addi a2, a2, 1
203 1.1 mrg
204 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
205 1.1 mrg slli a9, a9, 1
206 1.1 mrg beqz a9, .Ladd_exactlyhalf
207 1.1 mrg 1: leaf_return
208 1.1 mrg
209 1.1 mrg .Ladd_returny:
210 1.1 mrg mov a2, a3
211 1.1 mrg leaf_return
212 1.1 mrg
213 1.1 mrg .Ladd_carry:
214 1.1 mrg /* The addition has overflowed into the exponent field, so the
215 1.1 mrg value needs to be renormalized. The mantissa of the result
216 1.1 mrg can be recovered by subtracting the original exponent and
217 1.1 mrg adding 0x800000 (which is the explicit "1.0" for the
218 1.1 mrg mantissa of the non-shifted operand -- the "1.0" for the
219 1.1 mrg shifted operand was already added). The mantissa can then
220 1.1 mrg be shifted right by one bit. The explicit "1.0" of the
221 1.1 mrg shifted mantissa then needs to be replaced by the exponent,
222 1.1 mrg incremented by one to account for the normalizing shift.
223 1.1 mrg It is faster to combine these operations: do the shift first
224 1.1 mrg and combine the additions and subtractions. If x is the
225 1.1 mrg original exponent, the result is:
226 1.1 mrg shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
227 1.1 mrg or:
228 1.1 mrg shifted mantissa + ((x + 1) << 22)
229 1.1 mrg Note that the exponent is incremented here by leaving the
230 1.1 mrg explicit "1.0" of the mantissa in the exponent field. */
231 1.1 mrg
232 1.1 mrg /* Shift x right by one bit. Save the lsb. */
233 1.1 mrg mov a10, a2
234 1.1 mrg srli a2, a2, 1
235 1.1 mrg
236 1.1 mrg /* See explanation above. The original exponent is in a8. */
237 1.1 mrg addi a8, a8, 1
238 1.1 mrg slli a8, a8, 22
239 1.1 mrg add a2, a2, a8
240 1.1 mrg
241 1.1 mrg /* Return an Infinity if the exponent overflowed. */
242 1.1 mrg ball a2, a6, .Ladd_infinity
243 1.1 mrg
244 1.1 mrg /* Same thing as the "round" code except the msb of the leftover
245 1.1 mrg fraction is bit 0 of a10, with the rest of the fraction in a9. */
246 1.1 mrg bbci.l a10, 0, 1f
247 1.1 mrg addi a2, a2, 1
248 1.1 mrg beqz a9, .Ladd_exactlyhalf
249 1.1 mrg 1: leaf_return
250 1.1 mrg
251 1.1 mrg .Ladd_infinity:
252 1.1 mrg /* Clear the mantissa. */
253 1.1 mrg srli a2, a2, 23
254 1.1 mrg slli a2, a2, 23
255 1.1 mrg
256 1.1 mrg /* The sign bit may have been lost in a carry-out. Put it back. */
257 1.1 mrg slli a8, a8, 1
258 1.1 mrg or a2, a2, a8
259 1.1 mrg leaf_return
260 1.1 mrg
261 1.1 mrg .Ladd_exactlyhalf:
262 1.1 mrg /* Round down to the nearest even value. */
263 1.1 mrg srli a2, a2, 1
264 1.1 mrg slli a2, a2, 1
265 1.1 mrg leaf_return
266 1.1 mrg
267 1.1 mrg
268 1.1 mrg /* Subtraction */
269 1.1 mrg __subsf3_aux:
270 1.1 mrg
271 1.1 mrg /* Handle NaNs and Infinities. (This code is placed before the
272 1.1 mrg start of the function just to keep it in range of the limited
273 1.1 mrg branch displacements.) */
274 1.1 mrg
275 1.1 mrg .Lsub_xnan_or_inf:
276 1.1 mrg /* If y is neither Infinity nor NaN, return x. */
277 1.1.1.5 mrg bnall a3, a6, .Lsub_return_nan_or_inf
278 1.1 mrg /* Both x and y are either NaN or Inf, so the result is NaN. */
279 1.1.1.5 mrg
280 1.1.1.5 mrg .Lsub_return_nan:
281 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
282 1.1 mrg or a2, a2, a4
283 1.1.1.5 mrg leaf_return
284 1.1 mrg
285 1.1 mrg .Lsub_ynan_or_inf:
286 1.1 mrg /* Negate y and return it. */
287 1.1 mrg slli a7, a6, 8
288 1.1 mrg xor a2, a3, a7
289 1.1.1.5 mrg
290 1.1.1.5 mrg .Lsub_return_nan_or_inf:
291 1.1.1.5 mrg slli a7, a2, 9
292 1.1.1.5 mrg bnez a7, .Lsub_return_nan
293 1.1 mrg leaf_return
294 1.1 mrg
295 1.1 mrg .Lsub_opposite_signs:
296 1.1 mrg /* Operand signs differ. Do an addition. */
297 1.1 mrg slli a7, a6, 8
298 1.1 mrg xor a3, a3, a7
299 1.1 mrg j .Ladd_same_sign
300 1.1 mrg
301 1.1 mrg .align 4
302 1.1 mrg .global __subsf3
303 1.1 mrg .type __subsf3, @function
304 1.1 mrg __subsf3:
305 1.1 mrg leaf_entry sp, 16
306 1.1 mrg movi a6, 0x7f800000
307 1.1 mrg
308 1.1 mrg /* Check if the two operands have the same sign. */
309 1.1 mrg xor a7, a2, a3
310 1.1 mrg bltz a7, .Lsub_opposite_signs
311 1.1 mrg
312 1.1 mrg .Lsub_same_sign:
313 1.1 mrg /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
314 1.1 mrg ball a2, a6, .Lsub_xnan_or_inf
315 1.1 mrg ball a3, a6, .Lsub_ynan_or_inf
316 1.1 mrg
317 1.1 mrg /* Compare the operands. In contrast to addition, the entire
318 1.1 mrg value matters here. */
319 1.1 mrg extui a7, a2, 23, 8
320 1.1 mrg extui a8, a3, 23, 8
321 1.1 mrg bltu a2, a3, .Lsub_xsmaller
322 1.1 mrg
323 1.1 mrg .Lsub_ysmaller:
324 1.1 mrg /* Check if the smaller (or equal) exponent is zero. */
325 1.1 mrg bnone a3, a6, .Lsub_yexpzero
326 1.1 mrg
327 1.1 mrg /* Replace y sign/exponent with 0x008. */
328 1.1 mrg or a3, a3, a6
329 1.1 mrg slli a3, a3, 8
330 1.1 mrg srli a3, a3, 8
331 1.1 mrg
332 1.1 mrg .Lsub_yexpdiff:
333 1.1 mrg /* Compute the exponent difference. */
334 1.1 mrg sub a10, a7, a8
335 1.1 mrg
336 1.1 mrg /* Exponent difference > 32 -- just return the bigger value. */
337 1.1 mrg bgeui a10, 32, 1f
338 1.1 mrg
339 1.1 mrg /* Shift y right by the exponent difference. Any bits that are
340 1.1 mrg shifted out of y are saved in a9 for rounding the result. */
341 1.1 mrg ssr a10
342 1.1 mrg movi a9, 0
343 1.1 mrg src a9, a3, a9
344 1.1 mrg srl a3, a3
345 1.1 mrg
346 1.1 mrg sub a2, a2, a3
347 1.1 mrg
348 1.1 mrg /* Subtract the leftover bits in a9 from zero and propagate any
349 1.1 mrg borrow from a2. */
350 1.1 mrg neg a9, a9
351 1.1 mrg addi a10, a2, -1
352 1.1 mrg movnez a2, a10, a9
353 1.1 mrg
354 1.1 mrg /* Check if the subtract underflowed into the exponent. */
355 1.1 mrg extui a10, a2, 23, 8
356 1.1 mrg beq a10, a7, .Lsub_round
357 1.1 mrg j .Lsub_borrow
358 1.1 mrg
359 1.1 mrg .Lsub_yexpzero:
360 1.1 mrg /* Return zero if the inputs are equal. (For the non-subnormal
361 1.1 mrg case, subtracting the "1.0" will cause a borrow from the exponent
362 1.1 mrg and this case can be detected when handling the borrow.) */
363 1.1 mrg beq a2, a3, .Lsub_return_zero
364 1.1 mrg
365 1.1 mrg /* y is a subnormal value. Replace its sign/exponent with zero,
366 1.1 mrg i.e., no implicit "1.0". Unless x is also a subnormal, increment
367 1.1 mrg y's apparent exponent because subnormals behave as if they had
368 1.1 mrg the minimum (nonzero) exponent. */
369 1.1 mrg slli a3, a3, 9
370 1.1 mrg srli a3, a3, 9
371 1.1 mrg bnone a2, a6, .Lsub_yexpdiff
372 1.1 mrg addi a8, a8, 1
373 1.1 mrg j .Lsub_yexpdiff
374 1.1 mrg
375 1.1 mrg .Lsub_returny:
376 1.1 mrg /* Negate and return y. */
377 1.1 mrg slli a7, a6, 8
378 1.1 mrg xor a2, a3, a7
379 1.1 mrg 1: leaf_return
380 1.1 mrg
381 1.1 mrg .Lsub_xsmaller:
382 1.1 mrg /* Same thing as the "ysmaller" code, but with x and y swapped and
383 1.1 mrg with y negated. */
384 1.1 mrg bnone a2, a6, .Lsub_xexpzero
385 1.1 mrg
386 1.1 mrg or a2, a2, a6
387 1.1 mrg slli a2, a2, 8
388 1.1 mrg srli a2, a2, 8
389 1.1 mrg
390 1.1 mrg .Lsub_xexpdiff:
391 1.1 mrg sub a10, a8, a7
392 1.1 mrg bgeui a10, 32, .Lsub_returny
393 1.1 mrg
394 1.1 mrg ssr a10
395 1.1 mrg movi a9, 0
396 1.1 mrg src a9, a2, a9
397 1.1 mrg srl a2, a2
398 1.1 mrg
399 1.1 mrg /* Negate y. */
400 1.1 mrg slli a11, a6, 8
401 1.1 mrg xor a3, a3, a11
402 1.1 mrg
403 1.1 mrg sub a2, a3, a2
404 1.1 mrg
405 1.1 mrg neg a9, a9
406 1.1 mrg addi a10, a2, -1
407 1.1 mrg movnez a2, a10, a9
408 1.1 mrg
409 1.1 mrg /* Check if the subtract underflowed into the exponent. */
410 1.1 mrg extui a10, a2, 23, 8
411 1.1 mrg bne a10, a8, .Lsub_borrow
412 1.1 mrg
413 1.1 mrg .Lsub_round:
414 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
415 1.1 mrg bgez a9, 1f
416 1.1 mrg addi a2, a2, 1
417 1.1 mrg
418 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
419 1.1 mrg slli a9, a9, 1
420 1.1 mrg beqz a9, .Lsub_exactlyhalf
421 1.1 mrg 1: leaf_return
422 1.1 mrg
423 1.1 mrg .Lsub_xexpzero:
424 1.1 mrg /* Same as "yexpzero". */
425 1.1 mrg beq a2, a3, .Lsub_return_zero
426 1.1 mrg slli a2, a2, 9
427 1.1 mrg srli a2, a2, 9
428 1.1 mrg bnone a3, a6, .Lsub_xexpdiff
429 1.1 mrg addi a7, a7, 1
430 1.1 mrg j .Lsub_xexpdiff
431 1.1 mrg
432 1.1 mrg .Lsub_return_zero:
433 1.1 mrg movi a2, 0
434 1.1 mrg leaf_return
435 1.1 mrg
436 1.1 mrg .Lsub_borrow:
437 1.1 mrg /* The subtraction has underflowed into the exponent field, so the
438 1.1 mrg value needs to be renormalized. Shift the mantissa left as
439 1.1 mrg needed to remove any leading zeros and adjust the exponent
440 1.1 mrg accordingly. If the exponent is not large enough to remove
441 1.1 mrg all the leading zeros, the result will be a subnormal value. */
442 1.1 mrg
443 1.1 mrg slli a8, a2, 9
444 1.1 mrg beqz a8, .Lsub_xzero
445 1.1 mrg do_nsau a6, a8, a7, a11
446 1.1 mrg srli a8, a8, 9
447 1.1 mrg bge a6, a10, .Lsub_subnormal
448 1.1 mrg addi a6, a6, 1
449 1.1 mrg
450 1.1 mrg .Lsub_normalize_shift:
451 1.1 mrg /* Shift the mantissa (a8/a9) left by a6. */
452 1.1 mrg ssl a6
453 1.1 mrg src a8, a8, a9
454 1.1 mrg sll a9, a9
455 1.1 mrg
456 1.1 mrg /* Combine the shifted mantissa with the sign and exponent,
457 1.1 mrg decrementing the exponent by a6. (The exponent has already
458 1.1 mrg been decremented by one due to the borrow from the subtraction,
459 1.1 mrg but adding the mantissa will increment the exponent by one.) */
460 1.1 mrg srli a2, a2, 23
461 1.1 mrg sub a2, a2, a6
462 1.1 mrg slli a2, a2, 23
463 1.1 mrg add a2, a2, a8
464 1.1 mrg j .Lsub_round
465 1.1 mrg
466 1.1 mrg .Lsub_exactlyhalf:
467 1.1 mrg /* Round down to the nearest even value. */
468 1.1 mrg srli a2, a2, 1
469 1.1 mrg slli a2, a2, 1
470 1.1 mrg leaf_return
471 1.1 mrg
472 1.1 mrg .Lsub_xzero:
473 1.1 mrg /* If there was a borrow from the exponent, and the mantissa and
474 1.1 mrg guard digits are all zero, then the inputs were equal and the
475 1.1 mrg result should be zero. */
476 1.1 mrg beqz a9, .Lsub_return_zero
477 1.1 mrg
478 1.1 mrg /* Only the guard digit is nonzero. Shift by min(24, a10). */
479 1.1 mrg addi a11, a10, -24
480 1.1 mrg movi a6, 24
481 1.1 mrg movltz a6, a10, a11
482 1.1 mrg j .Lsub_normalize_shift
483 1.1 mrg
484 1.1 mrg .Lsub_subnormal:
485 1.1 mrg /* The exponent is too small to shift away all the leading zeros.
486 1.1 mrg Set a6 to the current exponent (which has already been
487 1.1 mrg decremented by the borrow) so that the exponent of the result
488 1.1 mrg will be zero. Do not add 1 to a6 in this case, because: (1)
489 1.1 mrg adding the mantissa will not increment the exponent, so there is
490 1.1 mrg no need to subtract anything extra from the exponent to
491 1.1 mrg compensate, and (2) the effective exponent of a subnormal is 1
492 1.1 mrg not 0 so the shift amount must be 1 smaller than normal. */
493 1.1 mrg mov a6, a10
494 1.1 mrg j .Lsub_normalize_shift
495 1.1 mrg
496 1.1 mrg #endif /* L_addsubsf3 */
497 1.1 mrg
498 1.1 mrg #ifdef L_mulsf3
499 1.1 mrg
500 1.1 mrg /* Multiplication */
501 1.1 mrg #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
502 1.1 mrg #define XCHAL_NO_MUL 1
503 1.1 mrg #endif
504 1.1 mrg
505 1.1.1.3 mrg .literal_position
506 1.1 mrg __mulsf3_aux:
507 1.1 mrg
508 1.1 mrg /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
509 1.1 mrg (This code is placed before the start of the function just to
510 1.1 mrg keep it in range of the limited branch displacements.) */
511 1.1 mrg
512 1.1 mrg .Lmul_xexpzero:
513 1.1 mrg /* Clear the sign bit of x. */
514 1.1 mrg slli a2, a2, 1
515 1.1 mrg srli a2, a2, 1
516 1.1 mrg
517 1.1 mrg /* If x is zero, return zero. */
518 1.1 mrg beqz a2, .Lmul_return_zero
519 1.1 mrg
520 1.1 mrg /* Normalize x. Adjust the exponent in a8. */
521 1.1 mrg do_nsau a10, a2, a11, a12
522 1.1 mrg addi a10, a10, -8
523 1.1 mrg ssl a10
524 1.1 mrg sll a2, a2
525 1.1 mrg movi a8, 1
526 1.1 mrg sub a8, a8, a10
527 1.1 mrg j .Lmul_xnormalized
528 1.1 mrg
529 1.1 mrg .Lmul_yexpzero:
530 1.1 mrg /* Clear the sign bit of y. */
531 1.1 mrg slli a3, a3, 1
532 1.1 mrg srli a3, a3, 1
533 1.1 mrg
534 1.1 mrg /* If y is zero, return zero. */
535 1.1 mrg beqz a3, .Lmul_return_zero
536 1.1 mrg
537 1.1 mrg /* Normalize y. Adjust the exponent in a9. */
538 1.1 mrg do_nsau a10, a3, a11, a12
539 1.1 mrg addi a10, a10, -8
540 1.1 mrg ssl a10
541 1.1 mrg sll a3, a3
542 1.1 mrg movi a9, 1
543 1.1 mrg sub a9, a9, a10
544 1.1 mrg j .Lmul_ynormalized
545 1.1 mrg
546 1.1 mrg .Lmul_return_zero:
547 1.1 mrg /* Return zero with the appropriate sign bit. */
548 1.1 mrg srli a2, a7, 31
549 1.1 mrg slli a2, a2, 31
550 1.1 mrg j .Lmul_done
551 1.1 mrg
552 1.1 mrg .Lmul_xnan_or_inf:
553 1.1 mrg /* If y is zero, return NaN. */
554 1.1 mrg slli a8, a3, 1
555 1.1.1.5 mrg beqz a8, .Lmul_return_nan
556 1.1 mrg /* If y is NaN, return y. */
557 1.1 mrg bnall a3, a6, .Lmul_returnx
558 1.1 mrg slli a8, a3, 9
559 1.1 mrg beqz a8, .Lmul_returnx
560 1.1 mrg
561 1.1 mrg .Lmul_returny:
562 1.1 mrg mov a2, a3
563 1.1 mrg
564 1.1 mrg .Lmul_returnx:
565 1.1.1.5 mrg slli a8, a2, 9
566 1.1.1.5 mrg bnez a8, .Lmul_return_nan
567 1.1 mrg /* Set the sign bit and return. */
568 1.1 mrg extui a7, a7, 31, 1
569 1.1 mrg slli a2, a2, 1
570 1.1 mrg ssai 1
571 1.1 mrg src a2, a7, a2
572 1.1 mrg j .Lmul_done
573 1.1 mrg
574 1.1 mrg .Lmul_ynan_or_inf:
575 1.1 mrg /* If x is zero, return NaN. */
576 1.1 mrg slli a8, a2, 1
577 1.1 mrg bnez a8, .Lmul_returny
578 1.1.1.5 mrg mov a2, a3
579 1.1.1.5 mrg
580 1.1.1.5 mrg .Lmul_return_nan:
581 1.1.1.5 mrg movi a4, 0x400000 /* make it a quiet NaN */
582 1.1.1.5 mrg or a2, a2, a4
583 1.1 mrg j .Lmul_done
584 1.1 mrg
585 1.1 mrg .align 4
586 1.1 mrg .global __mulsf3
587 1.1 mrg .type __mulsf3, @function
588 1.1 mrg __mulsf3:
589 1.1 mrg #if __XTENSA_CALL0_ABI__
590 1.1 mrg leaf_entry sp, 32
591 1.1 mrg addi sp, sp, -32
592 1.1 mrg s32i a12, sp, 16
593 1.1 mrg s32i a13, sp, 20
594 1.1 mrg s32i a14, sp, 24
595 1.1 mrg s32i a15, sp, 28
596 1.1 mrg #elif XCHAL_NO_MUL
597 1.1 mrg /* This is not really a leaf function; allocate enough stack space
598 1.1 mrg to allow CALL12s to a helper function. */
599 1.1 mrg leaf_entry sp, 64
600 1.1 mrg #else
601 1.1 mrg leaf_entry sp, 32
602 1.1 mrg #endif
603 1.1 mrg movi a6, 0x7f800000
604 1.1 mrg
605 1.1 mrg /* Get the sign of the result. */
606 1.1 mrg xor a7, a2, a3
607 1.1 mrg
608 1.1 mrg /* Check for NaN and infinity. */
609 1.1 mrg ball a2, a6, .Lmul_xnan_or_inf
610 1.1 mrg ball a3, a6, .Lmul_ynan_or_inf
611 1.1 mrg
612 1.1 mrg /* Extract the exponents. */
613 1.1 mrg extui a8, a2, 23, 8
614 1.1 mrg extui a9, a3, 23, 8
615 1.1 mrg
616 1.1 mrg beqz a8, .Lmul_xexpzero
617 1.1 mrg .Lmul_xnormalized:
618 1.1 mrg beqz a9, .Lmul_yexpzero
619 1.1 mrg .Lmul_ynormalized:
620 1.1 mrg
621 1.1 mrg /* Add the exponents. */
622 1.1 mrg add a8, a8, a9
623 1.1 mrg
624 1.1 mrg /* Replace sign/exponent fields with explicit "1.0". */
625 1.1 mrg movi a10, 0xffffff
626 1.1 mrg or a2, a2, a6
627 1.1 mrg and a2, a2, a10
628 1.1 mrg or a3, a3, a6
629 1.1 mrg and a3, a3, a10
630 1.1 mrg
631 1.1 mrg /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
632 1.1 mrg
633 1.1 mrg #if XCHAL_HAVE_MUL32_HIGH
634 1.1 mrg
635 1.1 mrg mull a6, a2, a3
636 1.1 mrg muluh a2, a2, a3
637 1.1 mrg
638 1.1 mrg #else
639 1.1 mrg
640 1.1 mrg /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
641 1.1 mrg products. These partial products are:
642 1.1 mrg
643 1.1 mrg 0 xl * yl
644 1.1 mrg
645 1.1 mrg 1 xl * yh
646 1.1 mrg 2 xh * yl
647 1.1 mrg
648 1.1 mrg 3 xh * yh
649 1.1 mrg
650 1.1 mrg If using the Mul16 or Mul32 multiplier options, these input
651 1.1 mrg chunks must be stored in separate registers. For Mac16, the
652 1.1 mrg UMUL.AA.* opcodes can specify that the inputs come from either
653 1.1 mrg half of the registers, so there is no need to shift them out
654 1.1 mrg ahead of time. If there is no multiply hardware, the 16-bit
655 1.1 mrg chunks can be extracted when setting up the arguments to the
656 1.1 mrg separate multiply function. */
657 1.1 mrg
658 1.1 mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
659 1.1 mrg /* Calling a separate multiply function will clobber a0 and requires
660 1.1 mrg use of a8 as a temporary, so save those values now. (The function
661 1.1 mrg uses a custom ABI so nothing else needs to be saved.) */
662 1.1 mrg s32i a0, sp, 0
663 1.1 mrg s32i a8, sp, 4
664 1.1 mrg #endif
665 1.1 mrg
666 1.1 mrg #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
667 1.1 mrg
668 1.1 mrg #define a2h a4
669 1.1 mrg #define a3h a5
670 1.1 mrg
671 1.1 mrg /* Get the high halves of the inputs into registers. */
672 1.1 mrg srli a2h, a2, 16
673 1.1 mrg srli a3h, a3, 16
674 1.1 mrg
675 1.1 mrg #define a2l a2
676 1.1 mrg #define a3l a3
677 1.1 mrg
678 1.1 mrg #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
679 1.1 mrg /* Clear the high halves of the inputs. This does not matter
680 1.1 mrg for MUL16 because the high bits are ignored. */
681 1.1 mrg extui a2, a2, 0, 16
682 1.1 mrg extui a3, a3, 0, 16
683 1.1 mrg #endif
684 1.1 mrg #endif /* MUL16 || MUL32 */
685 1.1 mrg
686 1.1 mrg
687 1.1 mrg #if XCHAL_HAVE_MUL16
688 1.1 mrg
689 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
690 1.1 mrg mul16u dst, xreg ## xhalf, yreg ## yhalf
691 1.1 mrg
692 1.1 mrg #elif XCHAL_HAVE_MUL32
693 1.1 mrg
694 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
695 1.1 mrg mull dst, xreg ## xhalf, yreg ## yhalf
696 1.1 mrg
697 1.1 mrg #elif XCHAL_HAVE_MAC16
698 1.1 mrg
699 1.1 mrg /* The preprocessor insists on inserting a space when concatenating after
700 1.1 mrg a period in the definition of do_mul below. These macros are a workaround
701 1.1 mrg using underscores instead of periods when doing the concatenation. */
702 1.1 mrg #define umul_aa_ll umul.aa.ll
703 1.1 mrg #define umul_aa_lh umul.aa.lh
704 1.1 mrg #define umul_aa_hl umul.aa.hl
705 1.1 mrg #define umul_aa_hh umul.aa.hh
706 1.1 mrg
707 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
708 1.1 mrg umul_aa_ ## xhalf ## yhalf xreg, yreg; \
709 1.1 mrg rsr dst, ACCLO
710 1.1 mrg
711 1.1 mrg #else /* no multiply hardware */
712 1.1 mrg
713 1.1 mrg #define set_arg_l(dst, src) \
714 1.1 mrg extui dst, src, 0, 16
715 1.1 mrg #define set_arg_h(dst, src) \
716 1.1 mrg srli dst, src, 16
717 1.1 mrg
718 1.1 mrg #if __XTENSA_CALL0_ABI__
719 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
720 1.1 mrg set_arg_ ## xhalf (a13, xreg); \
721 1.1 mrg set_arg_ ## yhalf (a14, yreg); \
722 1.1 mrg call0 .Lmul_mulsi3; \
723 1.1 mrg mov dst, a12
724 1.1 mrg #else
725 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
726 1.1 mrg set_arg_ ## xhalf (a14, xreg); \
727 1.1 mrg set_arg_ ## yhalf (a15, yreg); \
728 1.1 mrg call12 .Lmul_mulsi3; \
729 1.1 mrg mov dst, a14
730 1.1 mrg #endif /* __XTENSA_CALL0_ABI__ */
731 1.1 mrg
732 1.1 mrg #endif /* no multiply hardware */
733 1.1 mrg
734 1.1 mrg /* Add pp1 and pp2 into a6 with carry-out in a9. */
735 1.1 mrg do_mul(a6, a2, l, a3, h) /* pp 1 */
736 1.1 mrg do_mul(a11, a2, h, a3, l) /* pp 2 */
737 1.1 mrg movi a9, 0
738 1.1 mrg add a6, a6, a11
739 1.1 mrg bgeu a6, a11, 1f
740 1.1 mrg addi a9, a9, 1
741 1.1 mrg 1:
742 1.1 mrg /* Shift the high half of a9/a6 into position in a9. Note that
743 1.1 mrg this value can be safely incremented without any carry-outs. */
744 1.1 mrg ssai 16
745 1.1 mrg src a9, a9, a6
746 1.1 mrg
747 1.1 mrg /* Compute the low word into a6. */
748 1.1 mrg do_mul(a11, a2, l, a3, l) /* pp 0 */
749 1.1 mrg sll a6, a6
750 1.1 mrg add a6, a6, a11
751 1.1 mrg bgeu a6, a11, 1f
752 1.1 mrg addi a9, a9, 1
753 1.1 mrg 1:
754 1.1 mrg /* Compute the high word into a2. */
755 1.1 mrg do_mul(a2, a2, h, a3, h) /* pp 3 */
756 1.1 mrg add a2, a2, a9
757 1.1 mrg
758 1.1 mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
759 1.1 mrg /* Restore values saved on the stack during the multiplication. */
760 1.1 mrg l32i a0, sp, 0
761 1.1 mrg l32i a8, sp, 4
762 1.1 mrg #endif
763 1.1 mrg #endif /* ! XCHAL_HAVE_MUL32_HIGH */
764 1.1 mrg
765 1.1 mrg /* Shift left by 9 bits, unless there was a carry-out from the
766 1.1 mrg multiply, in which case, shift by 8 bits and increment the
767 1.1 mrg exponent. */
768 1.1 mrg movi a4, 9
769 1.1 mrg srli a5, a2, 24 - 9
770 1.1 mrg beqz a5, 1f
771 1.1 mrg addi a4, a4, -1
772 1.1 mrg addi a8, a8, 1
773 1.1 mrg 1: ssl a4
774 1.1 mrg src a2, a2, a6
775 1.1 mrg sll a6, a6
776 1.1 mrg
777 1.1 mrg /* Subtract the extra bias from the exponent sum (plus one to account
778 1.1 mrg for the explicit "1.0" of the mantissa that will be added to the
779 1.1 mrg exponent in the final result). */
780 1.1 mrg movi a4, 0x80
781 1.1 mrg sub a8, a8, a4
782 1.1 mrg
783 1.1 mrg /* Check for over/underflow. The value in a8 is one less than the
784 1.1 mrg final exponent, so values in the range 0..fd are OK here. */
785 1.1 mrg movi a4, 0xfe
786 1.1 mrg bgeu a8, a4, .Lmul_overflow
787 1.1 mrg
788 1.1 mrg .Lmul_round:
789 1.1 mrg /* Round. */
790 1.1 mrg bgez a6, .Lmul_rounded
791 1.1 mrg addi a2, a2, 1
792 1.1 mrg slli a6, a6, 1
793 1.1 mrg beqz a6, .Lmul_exactlyhalf
794 1.1 mrg
795 1.1 mrg .Lmul_rounded:
796 1.1 mrg /* Add the exponent to the mantissa. */
797 1.1 mrg slli a8, a8, 23
798 1.1 mrg add a2, a2, a8
799 1.1 mrg
800 1.1 mrg .Lmul_addsign:
801 1.1 mrg /* Add the sign bit. */
802 1.1 mrg srli a7, a7, 31
803 1.1 mrg slli a7, a7, 31
804 1.1 mrg or a2, a2, a7
805 1.1 mrg
806 1.1 mrg .Lmul_done:
807 1.1 mrg #if __XTENSA_CALL0_ABI__
808 1.1 mrg l32i a12, sp, 16
809 1.1 mrg l32i a13, sp, 20
810 1.1 mrg l32i a14, sp, 24
811 1.1 mrg l32i a15, sp, 28
812 1.1 mrg addi sp, sp, 32
813 1.1 mrg #endif
814 1.1 mrg leaf_return
815 1.1 mrg
816 1.1 mrg .Lmul_exactlyhalf:
817 1.1 mrg /* Round down to the nearest even value. */
818 1.1 mrg srli a2, a2, 1
819 1.1 mrg slli a2, a2, 1
820 1.1 mrg j .Lmul_rounded
821 1.1 mrg
822 1.1 mrg .Lmul_overflow:
823 1.1 mrg bltz a8, .Lmul_underflow
824 1.1 mrg /* Return +/- Infinity. */
825 1.1 mrg movi a8, 0xff
826 1.1 mrg slli a2, a8, 23
827 1.1 mrg j .Lmul_addsign
828 1.1 mrg
829 1.1 mrg .Lmul_underflow:
830 1.1 mrg /* Create a subnormal value, where the exponent field contains zero,
831 1.1 mrg but the effective exponent is 1. The value of a8 is one less than
832 1.1 mrg the actual exponent, so just negate it to get the shift amount. */
833 1.1 mrg neg a8, a8
834 1.1 mrg mov a9, a6
835 1.1 mrg ssr a8
836 1.1 mrg bgeui a8, 32, .Lmul_flush_to_zero
837 1.1 mrg
838 1.1 mrg /* Shift a2 right. Any bits that are shifted out of a2 are saved
839 1.1 mrg in a6 (combined with the shifted-out bits currently in a6) for
840 1.1 mrg rounding the result. */
841 1.1 mrg sll a6, a2
842 1.1 mrg srl a2, a2
843 1.1 mrg
844 1.1 mrg /* Set the exponent to zero. */
845 1.1 mrg movi a8, 0
846 1.1 mrg
847 1.1 mrg /* Pack any nonzero bits shifted out into a6. */
848 1.1 mrg beqz a9, .Lmul_round
849 1.1 mrg movi a9, 1
850 1.1 mrg or a6, a6, a9
851 1.1 mrg j .Lmul_round
852 1.1 mrg
853 1.1 mrg .Lmul_flush_to_zero:
854 1.1 mrg /* Return zero with the appropriate sign bit. */
855 1.1 mrg srli a2, a7, 31
856 1.1 mrg slli a2, a2, 31
857 1.1 mrg j .Lmul_done
858 1.1 mrg
859 1.1 mrg #if XCHAL_NO_MUL
860 1.1 mrg
861 1.1 mrg /* For Xtensa processors with no multiply hardware, this simplified
862 1.1 mrg version of _mulsi3 is used for multiplying 16-bit chunks of
863 1.1 mrg the floating-point mantissas. When using CALL0, this function
864 1.1 mrg uses a custom ABI: the inputs are passed in a13 and a14, the
865 1.1 mrg result is returned in a12, and a8 and a15 are clobbered. */
866 1.1 mrg .align 4
867 1.1 mrg .Lmul_mulsi3:
868 1.1 mrg leaf_entry sp, 16
869 1.1 mrg .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
870 1.1 mrg movi \dst, 0
871 1.1 mrg 1: add \tmp1, \src2, \dst
872 1.1 mrg extui \tmp2, \src1, 0, 1
873 1.1 mrg movnez \dst, \tmp1, \tmp2
874 1.1 mrg
875 1.1 mrg do_addx2 \tmp1, \src2, \dst, \tmp1
876 1.1 mrg extui \tmp2, \src1, 1, 1
877 1.1 mrg movnez \dst, \tmp1, \tmp2
878 1.1 mrg
879 1.1 mrg do_addx4 \tmp1, \src2, \dst, \tmp1
880 1.1 mrg extui \tmp2, \src1, 2, 1
881 1.1 mrg movnez \dst, \tmp1, \tmp2
882 1.1 mrg
883 1.1 mrg do_addx8 \tmp1, \src2, \dst, \tmp1
884 1.1 mrg extui \tmp2, \src1, 3, 1
885 1.1 mrg movnez \dst, \tmp1, \tmp2
886 1.1 mrg
887 1.1 mrg srli \src1, \src1, 4
888 1.1 mrg slli \src2, \src2, 4
889 1.1 mrg bnez \src1, 1b
890 1.1 mrg .endm
891 1.1 mrg #if __XTENSA_CALL0_ABI__
892 1.1 mrg mul_mulsi3_body a12, a13, a14, a15, a8
893 1.1 mrg #else
894 1.1 mrg /* The result will be written into a2, so save that argument in a4. */
895 1.1 mrg mov a4, a2
896 1.1 mrg mul_mulsi3_body a2, a4, a3, a5, a6
897 1.1 mrg #endif
898 1.1 mrg leaf_return
899 1.1 mrg #endif /* XCHAL_NO_MUL */
900 1.1 mrg #endif /* L_mulsf3 */
901 1.1 mrg
902 1.1 mrg #ifdef L_divsf3
903 1.1 mrg
904 1.1.1.3 mrg .literal_position
905 1.1 mrg /* Division */
906 1.1 mrg __divsf3_aux:
907 1.1 mrg
908 1.1 mrg /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
909 1.1 mrg (This code is placed before the start of the function just to
910 1.1 mrg keep it in range of the limited branch displacements.) */
911 1.1 mrg
912 1.1 mrg .Ldiv_yexpzero:
913 1.1 mrg /* Clear the sign bit of y. */
914 1.1 mrg slli a3, a3, 1
915 1.1 mrg srli a3, a3, 1
916 1.1 mrg
917 1.1 mrg /* Check for division by zero. */
918 1.1 mrg beqz a3, .Ldiv_yzero
919 1.1 mrg
920 1.1 mrg /* Normalize y. Adjust the exponent in a9. */
921 1.1 mrg do_nsau a10, a3, a4, a5
922 1.1 mrg addi a10, a10, -8
923 1.1 mrg ssl a10
924 1.1 mrg sll a3, a3
925 1.1 mrg movi a9, 1
926 1.1 mrg sub a9, a9, a10
927 1.1 mrg j .Ldiv_ynormalized
928 1.1 mrg
929 1.1 mrg .Ldiv_yzero:
930 1.1 mrg /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
931 1.1 mrg slli a4, a2, 1
932 1.1 mrg srli a4, a4, 1
933 1.1 mrg srli a2, a7, 31
934 1.1 mrg slli a2, a2, 31
935 1.1 mrg or a2, a2, a6
936 1.1 mrg bnez a4, 1f
937 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
938 1.1 mrg or a2, a2, a4
939 1.1 mrg 1: leaf_return
940 1.1 mrg
941 1.1 mrg .Ldiv_xexpzero:
942 1.1 mrg /* Clear the sign bit of x. */
943 1.1 mrg slli a2, a2, 1
944 1.1 mrg srli a2, a2, 1
945 1.1 mrg
946 1.1 mrg /* If x is zero, return zero. */
947 1.1 mrg beqz a2, .Ldiv_return_zero
948 1.1 mrg
949 1.1 mrg /* Normalize x. Adjust the exponent in a8. */
950 1.1 mrg do_nsau a10, a2, a4, a5
951 1.1 mrg addi a10, a10, -8
952 1.1 mrg ssl a10
953 1.1 mrg sll a2, a2
954 1.1 mrg movi a8, 1
955 1.1 mrg sub a8, a8, a10
956 1.1 mrg j .Ldiv_xnormalized
957 1.1 mrg
958 1.1 mrg .Ldiv_return_zero:
959 1.1 mrg /* Return zero with the appropriate sign bit. */
960 1.1 mrg srli a2, a7, 31
961 1.1 mrg slli a2, a2, 31
962 1.1 mrg leaf_return
963 1.1 mrg
964 1.1 mrg .Ldiv_xnan_or_inf:
965 1.1 mrg /* Set the sign bit of the result. */
966 1.1 mrg srli a7, a3, 31
967 1.1 mrg slli a7, a7, 31
968 1.1 mrg xor a2, a2, a7
969 1.1 mrg /* If y is NaN or Inf, return NaN. */
970 1.1.1.5 mrg ball a3, a6, .Ldiv_return_nan
971 1.1.1.5 mrg slli a7, a2, 9
972 1.1.1.5 mrg bnez a7, .Ldiv_return_nan
973 1.1.1.5 mrg leaf_return
974 1.1 mrg
975 1.1 mrg .Ldiv_ynan_or_inf:
976 1.1 mrg /* If y is Infinity, return zero. */
977 1.1 mrg slli a8, a3, 9
978 1.1 mrg beqz a8, .Ldiv_return_zero
979 1.1 mrg /* y is NaN; return it. */
980 1.1 mrg mov a2, a3
981 1.1.1.5 mrg
982 1.1.1.5 mrg .Ldiv_return_nan:
983 1.1.1.5 mrg movi a4, 0x400000 /* make it a quiet NaN */
984 1.1.1.5 mrg or a2, a2, a4
985 1.1 mrg leaf_return
986 1.1 mrg
987 1.1 mrg .align 4
988 1.1 mrg .global __divsf3
989 1.1 mrg .type __divsf3, @function
990 1.1 mrg __divsf3:
991 1.1 mrg leaf_entry sp, 16
992 1.1 mrg movi a6, 0x7f800000
993 1.1 mrg
994 1.1 mrg /* Get the sign of the result. */
995 1.1 mrg xor a7, a2, a3
996 1.1 mrg
997 1.1 mrg /* Check for NaN and infinity. */
998 1.1 mrg ball a2, a6, .Ldiv_xnan_or_inf
999 1.1 mrg ball a3, a6, .Ldiv_ynan_or_inf
1000 1.1 mrg
1001 1.1 mrg /* Extract the exponents. */
1002 1.1 mrg extui a8, a2, 23, 8
1003 1.1 mrg extui a9, a3, 23, 8
1004 1.1 mrg
1005 1.1 mrg beqz a9, .Ldiv_yexpzero
1006 1.1 mrg .Ldiv_ynormalized:
1007 1.1 mrg beqz a8, .Ldiv_xexpzero
1008 1.1 mrg .Ldiv_xnormalized:
1009 1.1 mrg
1010 1.1 mrg /* Subtract the exponents. */
1011 1.1 mrg sub a8, a8, a9
1012 1.1 mrg
1013 1.1 mrg /* Replace sign/exponent fields with explicit "1.0". */
1014 1.1 mrg movi a10, 0xffffff
1015 1.1 mrg or a2, a2, a6
1016 1.1 mrg and a2, a2, a10
1017 1.1 mrg or a3, a3, a6
1018 1.1 mrg and a3, a3, a10
1019 1.1 mrg
1020 1.1 mrg /* The first digit of the mantissa division must be a one.
1021 1.1 mrg Shift x (and adjust the exponent) as needed to make this true. */
1022 1.1 mrg bltu a3, a2, 1f
1023 1.1 mrg slli a2, a2, 1
1024 1.1 mrg addi a8, a8, -1
1025 1.1 mrg 1:
1026 1.1 mrg /* Do the first subtraction and shift. */
1027 1.1 mrg sub a2, a2, a3
1028 1.1 mrg slli a2, a2, 1
1029 1.1 mrg
1030 1.1 mrg /* Put the quotient into a10. */
1031 1.1 mrg movi a10, 1
1032 1.1 mrg
1033 1.1 mrg /* Divide one bit at a time for 23 bits. */
1034 1.1 mrg movi a9, 23
1035 1.1 mrg #if XCHAL_HAVE_LOOPS
1036 1.1 mrg loop a9, .Ldiv_loopend
1037 1.1 mrg #endif
1038 1.1 mrg .Ldiv_loop:
1039 1.1 mrg /* Shift the quotient << 1. */
1040 1.1 mrg slli a10, a10, 1
1041 1.1 mrg
1042 1.1 mrg /* Is this digit a 0 or 1? */
1043 1.1 mrg bltu a2, a3, 1f
1044 1.1 mrg
1045 1.1 mrg /* Output a 1 and subtract. */
1046 1.1 mrg addi a10, a10, 1
1047 1.1 mrg sub a2, a2, a3
1048 1.1 mrg
1049 1.1 mrg /* Shift the dividend << 1. */
1050 1.1 mrg 1: slli a2, a2, 1
1051 1.1 mrg
1052 1.1 mrg #if !XCHAL_HAVE_LOOPS
1053 1.1 mrg addi a9, a9, -1
1054 1.1 mrg bnez a9, .Ldiv_loop
1055 1.1 mrg #endif
1056 1.1 mrg .Ldiv_loopend:
1057 1.1 mrg
1058 1.1 mrg /* Add the exponent bias (less one to account for the explicit "1.0"
1059 1.1 mrg of the mantissa that will be added to the exponent in the final
1060 1.1 mrg result). */
1061 1.1 mrg addi a8, a8, 0x7e
1062 1.1 mrg
1063 1.1 mrg /* Check for over/underflow. The value in a8 is one less than the
1064 1.1 mrg final exponent, so values in the range 0..fd are OK here. */
1065 1.1 mrg movi a4, 0xfe
1066 1.1 mrg bgeu a8, a4, .Ldiv_overflow
1067 1.1 mrg
1068 1.1 mrg .Ldiv_round:
1069 1.1 mrg /* Round. The remainder (<< 1) is in a2. */
1070 1.1 mrg bltu a2, a3, .Ldiv_rounded
1071 1.1 mrg addi a10, a10, 1
1072 1.1 mrg beq a2, a3, .Ldiv_exactlyhalf
1073 1.1 mrg
1074 1.1 mrg .Ldiv_rounded:
1075 1.1 mrg /* Add the exponent to the mantissa. */
1076 1.1 mrg slli a8, a8, 23
1077 1.1 mrg add a2, a10, a8
1078 1.1 mrg
1079 1.1 mrg .Ldiv_addsign:
1080 1.1 mrg /* Add the sign bit. */
1081 1.1 mrg srli a7, a7, 31
1082 1.1 mrg slli a7, a7, 31
1083 1.1 mrg or a2, a2, a7
1084 1.1 mrg leaf_return
1085 1.1 mrg
1086 1.1 mrg .Ldiv_overflow:
1087 1.1 mrg bltz a8, .Ldiv_underflow
1088 1.1 mrg /* Return +/- Infinity. */
1089 1.1 mrg addi a8, a4, 1 /* 0xff */
1090 1.1 mrg slli a2, a8, 23
1091 1.1 mrg j .Ldiv_addsign
1092 1.1 mrg
1093 1.1 mrg .Ldiv_exactlyhalf:
1094 1.1 mrg /* Remainder is exactly half the divisor. Round even. */
1095 1.1 mrg srli a10, a10, 1
1096 1.1 mrg slli a10, a10, 1
1097 1.1 mrg j .Ldiv_rounded
1098 1.1 mrg
1099 1.1 mrg .Ldiv_underflow:
1100 1.1 mrg /* Create a subnormal value, where the exponent field contains zero,
1101 1.1 mrg but the effective exponent is 1. The value of a8 is one less than
1102 1.1 mrg the actual exponent, so just negate it to get the shift amount. */
1103 1.1 mrg neg a8, a8
1104 1.1 mrg ssr a8
1105 1.1 mrg bgeui a8, 32, .Ldiv_flush_to_zero
1106 1.1 mrg
1107 1.1 mrg /* Shift a10 right. Any bits that are shifted out of a10 are
1108 1.1 mrg saved in a6 for rounding the result. */
1109 1.1 mrg sll a6, a10
1110 1.1 mrg srl a10, a10
1111 1.1 mrg
1112 1.1 mrg /* Set the exponent to zero. */
1113 1.1 mrg movi a8, 0
1114 1.1 mrg
1115 1.1 mrg /* Pack any nonzero remainder (in a2) into a6. */
1116 1.1 mrg beqz a2, 1f
1117 1.1 mrg movi a9, 1
1118 1.1 mrg or a6, a6, a9
1119 1.1 mrg
1120 1.1 mrg /* Round a10 based on the bits shifted out into a6. */
1121 1.1 mrg 1: bgez a6, .Ldiv_rounded
1122 1.1 mrg addi a10, a10, 1
1123 1.1 mrg slli a6, a6, 1
1124 1.1 mrg bnez a6, .Ldiv_rounded
1125 1.1 mrg srli a10, a10, 1
1126 1.1 mrg slli a10, a10, 1
1127 1.1 mrg j .Ldiv_rounded
1128 1.1 mrg
1129 1.1 mrg .Ldiv_flush_to_zero:
1130 1.1 mrg /* Return zero with the appropriate sign bit. */
1131 1.1 mrg srli a2, a7, 31
1132 1.1 mrg slli a2, a2, 31
1133 1.1 mrg leaf_return
1134 1.1 mrg
1135 1.1 mrg #endif /* L_divsf3 */
1136 1.1 mrg
1137 1.1 mrg #ifdef L_cmpsf2
1138 1.1 mrg
1139 1.1 mrg /* Equal and Not Equal */
1140 1.1 mrg
1141 1.1 mrg .align 4
1142 1.1 mrg .global __eqsf2
1143 1.1 mrg .global __nesf2
1144 1.1 mrg .set __nesf2, __eqsf2
1145 1.1 mrg .type __eqsf2, @function
1146 1.1 mrg __eqsf2:
1147 1.1 mrg leaf_entry sp, 16
1148 1.1 mrg bne a2, a3, 4f
1149 1.1 mrg
1150 1.1 mrg /* The values are equal but NaN != NaN. Check the exponent. */
1151 1.1 mrg movi a6, 0x7f800000
1152 1.1 mrg ball a2, a6, 3f
1153 1.1 mrg
1154 1.1 mrg /* Equal. */
1155 1.1 mrg movi a2, 0
1156 1.1 mrg leaf_return
1157 1.1 mrg
1158 1.1 mrg /* Not equal. */
1159 1.1 mrg 2: movi a2, 1
1160 1.1 mrg leaf_return
1161 1.1 mrg
1162 1.1 mrg /* Check if the mantissas are nonzero. */
1163 1.1 mrg 3: slli a7, a2, 9
1164 1.1 mrg j 5f
1165 1.1 mrg
1166 1.1 mrg /* Check if x and y are zero with different signs. */
1167 1.1 mrg 4: or a7, a2, a3
1168 1.1 mrg slli a7, a7, 1
1169 1.1 mrg
1170 1.1 mrg /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1171 1.1 mrg or x when exponent(x) = 0x7f8 and x == y. */
1172 1.1 mrg 5: movi a2, 0
1173 1.1 mrg movi a3, 1
1174 1.1 mrg movnez a2, a3, a7
1175 1.1 mrg leaf_return
1176 1.1 mrg
1177 1.1 mrg
1178 1.1 mrg /* Greater Than */
1179 1.1 mrg
1180 1.1 mrg .align 4
1181 1.1 mrg .global __gtsf2
1182 1.1 mrg .type __gtsf2, @function
1183 1.1 mrg __gtsf2:
1184 1.1 mrg leaf_entry sp, 16
1185 1.1 mrg movi a6, 0x7f800000
1186 1.1 mrg ball a2, a6, 2f
1187 1.1 mrg 1: bnall a3, a6, .Lle_cmp
1188 1.1 mrg
1189 1.1 mrg /* Check if y is a NaN. */
1190 1.1 mrg slli a7, a3, 9
1191 1.1 mrg beqz a7, .Lle_cmp
1192 1.1 mrg movi a2, 0
1193 1.1 mrg leaf_return
1194 1.1 mrg
1195 1.1 mrg /* Check if x is a NaN. */
1196 1.1 mrg 2: slli a7, a2, 9
1197 1.1 mrg beqz a7, 1b
1198 1.1 mrg movi a2, 0
1199 1.1 mrg leaf_return
1200 1.1 mrg
1201 1.1 mrg
1202 1.1 mrg /* Less Than or Equal */
1203 1.1 mrg
1204 1.1 mrg .align 4
1205 1.1 mrg .global __lesf2
1206 1.1 mrg .type __lesf2, @function
1207 1.1 mrg __lesf2:
1208 1.1 mrg leaf_entry sp, 16
1209 1.1 mrg movi a6, 0x7f800000
1210 1.1 mrg ball a2, a6, 2f
1211 1.1 mrg 1: bnall a3, a6, .Lle_cmp
1212 1.1 mrg
1213 1.1 mrg /* Check if y is a NaN. */
1214 1.1 mrg slli a7, a3, 9
1215 1.1 mrg beqz a7, .Lle_cmp
1216 1.1 mrg movi a2, 1
1217 1.1 mrg leaf_return
1218 1.1 mrg
1219 1.1 mrg /* Check if x is a NaN. */
1220 1.1 mrg 2: slli a7, a2, 9
1221 1.1 mrg beqz a7, 1b
1222 1.1 mrg movi a2, 1
1223 1.1 mrg leaf_return
1224 1.1 mrg
1225 1.1 mrg .Lle_cmp:
1226 1.1 mrg /* Check if x and y have different signs. */
1227 1.1 mrg xor a7, a2, a3
1228 1.1 mrg bltz a7, .Lle_diff_signs
1229 1.1 mrg
1230 1.1 mrg /* Check if x is negative. */
1231 1.1 mrg bltz a2, .Lle_xneg
1232 1.1 mrg
1233 1.1 mrg /* Check if x <= y. */
1234 1.1 mrg bltu a3, a2, 5f
1235 1.1 mrg 4: movi a2, 0
1236 1.1 mrg leaf_return
1237 1.1 mrg
1238 1.1 mrg .Lle_xneg:
1239 1.1 mrg /* Check if y <= x. */
1240 1.1 mrg bgeu a2, a3, 4b
1241 1.1 mrg 5: movi a2, 1
1242 1.1 mrg leaf_return
1243 1.1 mrg
1244 1.1 mrg .Lle_diff_signs:
1245 1.1 mrg bltz a2, 4b
1246 1.1 mrg
1247 1.1 mrg /* Check if both x and y are zero. */
1248 1.1 mrg or a7, a2, a3
1249 1.1 mrg slli a7, a7, 1
1250 1.1 mrg movi a2, 1
1251 1.1 mrg movi a3, 0
1252 1.1 mrg moveqz a2, a3, a7
1253 1.1 mrg leaf_return
1254 1.1 mrg
1255 1.1 mrg
1256 1.1 mrg /* Greater Than or Equal */
1257 1.1 mrg
1258 1.1 mrg .align 4
1259 1.1 mrg .global __gesf2
1260 1.1 mrg .type __gesf2, @function
1261 1.1 mrg __gesf2:
1262 1.1 mrg leaf_entry sp, 16
1263 1.1 mrg movi a6, 0x7f800000
1264 1.1 mrg ball a2, a6, 2f
1265 1.1 mrg 1: bnall a3, a6, .Llt_cmp
1266 1.1 mrg
1267 1.1 mrg /* Check if y is a NaN. */
1268 1.1 mrg slli a7, a3, 9
1269 1.1 mrg beqz a7, .Llt_cmp
1270 1.1 mrg movi a2, -1
1271 1.1 mrg leaf_return
1272 1.1 mrg
1273 1.1 mrg /* Check if x is a NaN. */
1274 1.1 mrg 2: slli a7, a2, 9
1275 1.1 mrg beqz a7, 1b
1276 1.1 mrg movi a2, -1
1277 1.1 mrg leaf_return
1278 1.1 mrg
1279 1.1 mrg
1280 1.1 mrg /* Less Than */
1281 1.1 mrg
1282 1.1 mrg .align 4
1283 1.1 mrg .global __ltsf2
1284 1.1 mrg .type __ltsf2, @function
1285 1.1 mrg __ltsf2:
1286 1.1 mrg leaf_entry sp, 16
1287 1.1 mrg movi a6, 0x7f800000
1288 1.1 mrg ball a2, a6, 2f
1289 1.1 mrg 1: bnall a3, a6, .Llt_cmp
1290 1.1 mrg
1291 1.1 mrg /* Check if y is a NaN. */
1292 1.1 mrg slli a7, a3, 9
1293 1.1 mrg beqz a7, .Llt_cmp
1294 1.1 mrg movi a2, 0
1295 1.1 mrg leaf_return
1296 1.1 mrg
1297 1.1 mrg /* Check if x is a NaN. */
1298 1.1 mrg 2: slli a7, a2, 9
1299 1.1 mrg beqz a7, 1b
1300 1.1 mrg movi a2, 0
1301 1.1 mrg leaf_return
1302 1.1 mrg
1303 1.1 mrg .Llt_cmp:
1304 1.1 mrg /* Check if x and y have different signs. */
1305 1.1 mrg xor a7, a2, a3
1306 1.1 mrg bltz a7, .Llt_diff_signs
1307 1.1 mrg
1308 1.1 mrg /* Check if x is negative. */
1309 1.1 mrg bltz a2, .Llt_xneg
1310 1.1 mrg
1311 1.1 mrg /* Check if x < y. */
1312 1.1 mrg bgeu a2, a3, 5f
1313 1.1 mrg 4: movi a2, -1
1314 1.1 mrg leaf_return
1315 1.1 mrg
1316 1.1 mrg .Llt_xneg:
1317 1.1 mrg /* Check if y < x. */
1318 1.1 mrg bltu a3, a2, 4b
1319 1.1 mrg 5: movi a2, 0
1320 1.1 mrg leaf_return
1321 1.1 mrg
1322 1.1 mrg .Llt_diff_signs:
1323 1.1 mrg bgez a2, 5b
1324 1.1 mrg
1325 1.1 mrg /* Check if both x and y are nonzero. */
1326 1.1 mrg or a7, a2, a3
1327 1.1 mrg slli a7, a7, 1
1328 1.1 mrg movi a2, 0
1329 1.1 mrg movi a3, -1
1330 1.1 mrg movnez a2, a3, a7
1331 1.1 mrg leaf_return
1332 1.1 mrg
1333 1.1 mrg
1334 1.1 mrg /* Unordered */
1335 1.1 mrg
1336 1.1 mrg .align 4
1337 1.1 mrg .global __unordsf2
1338 1.1 mrg .type __unordsf2, @function
1339 1.1 mrg __unordsf2:
1340 1.1 mrg leaf_entry sp, 16
1341 1.1 mrg movi a6, 0x7f800000
1342 1.1 mrg ball a2, a6, 3f
1343 1.1 mrg 1: ball a3, a6, 4f
1344 1.1 mrg 2: movi a2, 0
1345 1.1 mrg leaf_return
1346 1.1 mrg
1347 1.1 mrg 3: slli a7, a2, 9
1348 1.1 mrg beqz a7, 1b
1349 1.1 mrg movi a2, 1
1350 1.1 mrg leaf_return
1351 1.1 mrg
1352 1.1 mrg 4: slli a7, a3, 9
1353 1.1 mrg beqz a7, 2b
1354 1.1 mrg movi a2, 1
1355 1.1 mrg leaf_return
1356 1.1 mrg
1357 1.1 mrg #endif /* L_cmpsf2 */
1358 1.1 mrg
1359 1.1 mrg #ifdef L_fixsfsi
1360 1.1 mrg
1361 1.1 mrg .align 4
1362 1.1 mrg .global __fixsfsi
1363 1.1 mrg .type __fixsfsi, @function
1364 1.1 mrg __fixsfsi:
1365 1.1 mrg leaf_entry sp, 16
1366 1.1 mrg
1367 1.1 mrg /* Check for NaN and Infinity. */
1368 1.1 mrg movi a6, 0x7f800000
1369 1.1 mrg ball a2, a6, .Lfixsfsi_nan_or_inf
1370 1.1 mrg
1371 1.1 mrg /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1372 1.1 mrg extui a4, a2, 23, 8
1373 1.1 mrg addi a4, a4, -0x7e
1374 1.1 mrg bgei a4, 32, .Lfixsfsi_maxint
1375 1.1 mrg blti a4, 1, .Lfixsfsi_zero
1376 1.1 mrg
1377 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1378 1.1 mrg or a7, a2, a6
1379 1.1 mrg slli a5, a7, 8
1380 1.1 mrg
1381 1.1 mrg /* Shift back to the right, based on the exponent. */
1382 1.1 mrg ssl a4 /* shift by 32 - a4 */
1383 1.1 mrg srl a5, a5
1384 1.1 mrg
1385 1.1 mrg /* Negate the result if sign != 0. */
1386 1.1 mrg neg a2, a5
1387 1.1 mrg movgez a2, a5, a7
1388 1.1 mrg leaf_return
1389 1.1 mrg
1390 1.1 mrg .Lfixsfsi_nan_or_inf:
1391 1.1 mrg /* Handle Infinity and NaN. */
1392 1.1 mrg slli a4, a2, 9
1393 1.1 mrg beqz a4, .Lfixsfsi_maxint
1394 1.1 mrg
1395 1.1 mrg /* Translate NaN to +maxint. */
1396 1.1 mrg movi a2, 0
1397 1.1 mrg
1398 1.1 mrg .Lfixsfsi_maxint:
1399 1.1 mrg slli a4, a6, 8 /* 0x80000000 */
1400 1.1 mrg addi a5, a4, -1 /* 0x7fffffff */
1401 1.1 mrg movgez a4, a5, a2
1402 1.1 mrg mov a2, a4
1403 1.1 mrg leaf_return
1404 1.1 mrg
1405 1.1 mrg .Lfixsfsi_zero:
1406 1.1 mrg movi a2, 0
1407 1.1 mrg leaf_return
1408 1.1 mrg
1409 1.1 mrg #endif /* L_fixsfsi */
1410 1.1 mrg
1411 1.1 mrg #ifdef L_fixsfdi
1412 1.1 mrg
1413 1.1 mrg .align 4
1414 1.1 mrg .global __fixsfdi
1415 1.1 mrg .type __fixsfdi, @function
1416 1.1 mrg __fixsfdi:
1417 1.1 mrg leaf_entry sp, 16
1418 1.1 mrg
1419 1.1 mrg /* Check for NaN and Infinity. */
1420 1.1 mrg movi a6, 0x7f800000
1421 1.1 mrg ball a2, a6, .Lfixsfdi_nan_or_inf
1422 1.1 mrg
1423 1.1 mrg /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1424 1.1 mrg extui a4, a2, 23, 8
1425 1.1 mrg addi a4, a4, -0x7e
1426 1.1 mrg bgei a4, 64, .Lfixsfdi_maxint
1427 1.1 mrg blti a4, 1, .Lfixsfdi_zero
1428 1.1 mrg
1429 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1430 1.1 mrg or a7, a2, a6
1431 1.1 mrg slli xh, a7, 8
1432 1.1 mrg
1433 1.1 mrg /* Shift back to the right, based on the exponent. */
1434 1.1 mrg ssl a4 /* shift by 64 - a4 */
1435 1.1 mrg bgei a4, 32, .Lfixsfdi_smallshift
1436 1.1 mrg srl xl, xh
1437 1.1 mrg movi xh, 0
1438 1.1 mrg
1439 1.1 mrg .Lfixsfdi_shifted:
1440 1.1 mrg /* Negate the result if sign != 0. */
1441 1.1 mrg bgez a7, 1f
1442 1.1 mrg neg xl, xl
1443 1.1 mrg neg xh, xh
1444 1.1 mrg beqz xl, 1f
1445 1.1 mrg addi xh, xh, -1
1446 1.1 mrg 1: leaf_return
1447 1.1 mrg
1448 1.1 mrg .Lfixsfdi_smallshift:
1449 1.1 mrg movi xl, 0
1450 1.1 mrg sll xl, xh
1451 1.1 mrg srl xh, xh
1452 1.1 mrg j .Lfixsfdi_shifted
1453 1.1 mrg
1454 1.1 mrg .Lfixsfdi_nan_or_inf:
1455 1.1 mrg /* Handle Infinity and NaN. */
1456 1.1 mrg slli a4, a2, 9
1457 1.1 mrg beqz a4, .Lfixsfdi_maxint
1458 1.1 mrg
1459 1.1 mrg /* Translate NaN to +maxint. */
1460 1.1 mrg movi a2, 0
1461 1.1 mrg
1462 1.1 mrg .Lfixsfdi_maxint:
1463 1.1 mrg slli a7, a6, 8 /* 0x80000000 */
1464 1.1 mrg bgez a2, 1f
1465 1.1 mrg mov xh, a7
1466 1.1 mrg movi xl, 0
1467 1.1 mrg leaf_return
1468 1.1 mrg
1469 1.1 mrg 1: addi xh, a7, -1 /* 0x7fffffff */
1470 1.1 mrg movi xl, -1
1471 1.1 mrg leaf_return
1472 1.1 mrg
1473 1.1 mrg .Lfixsfdi_zero:
1474 1.1 mrg movi xh, 0
1475 1.1 mrg movi xl, 0
1476 1.1 mrg leaf_return
1477 1.1 mrg
1478 1.1 mrg #endif /* L_fixsfdi */
1479 1.1 mrg
1480 1.1 mrg #ifdef L_fixunssfsi
1481 1.1 mrg
1482 1.1 mrg .align 4
1483 1.1 mrg .global __fixunssfsi
1484 1.1 mrg .type __fixunssfsi, @function
1485 1.1 mrg __fixunssfsi:
1486 1.1 mrg leaf_entry sp, 16
1487 1.1 mrg
1488 1.1 mrg /* Check for NaN and Infinity. */
1489 1.1 mrg movi a6, 0x7f800000
1490 1.1 mrg ball a2, a6, .Lfixunssfsi_nan_or_inf
1491 1.1 mrg
1492 1.1 mrg /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1493 1.1 mrg extui a4, a2, 23, 8
1494 1.1 mrg addi a4, a4, -0x7f
1495 1.1 mrg bgei a4, 32, .Lfixunssfsi_maxint
1496 1.1 mrg bltz a4, .Lfixunssfsi_zero
1497 1.1 mrg
1498 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1499 1.1 mrg or a7, a2, a6
1500 1.1 mrg slli a5, a7, 8
1501 1.1 mrg
1502 1.1 mrg /* Shift back to the right, based on the exponent. */
1503 1.1 mrg addi a4, a4, 1
1504 1.1 mrg beqi a4, 32, .Lfixunssfsi_bigexp
1505 1.1 mrg ssl a4 /* shift by 32 - a4 */
1506 1.1 mrg srl a5, a5
1507 1.1 mrg
1508 1.1 mrg /* Negate the result if sign != 0. */
1509 1.1 mrg neg a2, a5
1510 1.1 mrg movgez a2, a5, a7
1511 1.1 mrg leaf_return
1512 1.1 mrg
1513 1.1 mrg .Lfixunssfsi_nan_or_inf:
1514 1.1 mrg /* Handle Infinity and NaN. */
1515 1.1 mrg slli a4, a2, 9
1516 1.1 mrg beqz a4, .Lfixunssfsi_maxint
1517 1.1 mrg
1518 1.1 mrg /* Translate NaN to 0xffffffff. */
1519 1.1 mrg movi a2, -1
1520 1.1 mrg leaf_return
1521 1.1 mrg
1522 1.1 mrg .Lfixunssfsi_maxint:
1523 1.1 mrg slli a4, a6, 8 /* 0x80000000 */
1524 1.1 mrg movi a5, -1 /* 0xffffffff */
1525 1.1 mrg movgez a4, a5, a2
1526 1.1 mrg mov a2, a4
1527 1.1 mrg leaf_return
1528 1.1 mrg
1529 1.1 mrg .Lfixunssfsi_zero:
1530 1.1 mrg movi a2, 0
1531 1.1 mrg leaf_return
1532 1.1 mrg
1533 1.1 mrg .Lfixunssfsi_bigexp:
1534 1.1 mrg /* Handle unsigned maximum exponent case. */
1535 1.1 mrg bltz a2, 1f
1536 1.1 mrg mov a2, a5 /* no shift needed */
1537 1.1 mrg leaf_return
1538 1.1 mrg
1539 1.1 mrg /* Return 0x80000000 if negative. */
1540 1.1 mrg 1: slli a2, a6, 8
1541 1.1 mrg leaf_return
1542 1.1 mrg
1543 1.1 mrg #endif /* L_fixunssfsi */
1544 1.1 mrg
1545 1.1 mrg #ifdef L_fixunssfdi
1546 1.1 mrg
1547 1.1 mrg .align 4
1548 1.1 mrg .global __fixunssfdi
1549 1.1 mrg .type __fixunssfdi, @function
1550 1.1 mrg __fixunssfdi:
1551 1.1 mrg leaf_entry sp, 16
1552 1.1 mrg
1553 1.1 mrg /* Check for NaN and Infinity. */
1554 1.1 mrg movi a6, 0x7f800000
1555 1.1 mrg ball a2, a6, .Lfixunssfdi_nan_or_inf
1556 1.1 mrg
1557 1.1 mrg /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1558 1.1 mrg extui a4, a2, 23, 8
1559 1.1 mrg addi a4, a4, -0x7f
1560 1.1 mrg bgei a4, 64, .Lfixunssfdi_maxint
1561 1.1 mrg bltz a4, .Lfixunssfdi_zero
1562 1.1 mrg
1563 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1564 1.1 mrg or a7, a2, a6
1565 1.1 mrg slli xh, a7, 8
1566 1.1 mrg
1567 1.1 mrg /* Shift back to the right, based on the exponent. */
1568 1.1 mrg addi a4, a4, 1
1569 1.1 mrg beqi a4, 64, .Lfixunssfdi_bigexp
1570 1.1 mrg ssl a4 /* shift by 64 - a4 */
1571 1.1 mrg bgei a4, 32, .Lfixunssfdi_smallshift
1572 1.1 mrg srl xl, xh
1573 1.1 mrg movi xh, 0
1574 1.1 mrg
1575 1.1 mrg .Lfixunssfdi_shifted:
1576 1.1 mrg /* Negate the result if sign != 0. */
1577 1.1 mrg bgez a7, 1f
1578 1.1 mrg neg xl, xl
1579 1.1 mrg neg xh, xh
1580 1.1 mrg beqz xl, 1f
1581 1.1 mrg addi xh, xh, -1
1582 1.1 mrg 1: leaf_return
1583 1.1 mrg
1584 1.1 mrg .Lfixunssfdi_smallshift:
1585 1.1 mrg movi xl, 0
1586 1.1 mrg src xl, xh, xl
1587 1.1 mrg srl xh, xh
1588 1.1 mrg j .Lfixunssfdi_shifted
1589 1.1 mrg
1590 1.1 mrg .Lfixunssfdi_nan_or_inf:
1591 1.1 mrg /* Handle Infinity and NaN. */
1592 1.1 mrg slli a4, a2, 9
1593 1.1 mrg beqz a4, .Lfixunssfdi_maxint
1594 1.1 mrg
1595 1.1 mrg /* Translate NaN to 0xffffffff.... */
1596 1.1 mrg 1: movi xh, -1
1597 1.1 mrg movi xl, -1
1598 1.1 mrg leaf_return
1599 1.1 mrg
1600 1.1 mrg .Lfixunssfdi_maxint:
1601 1.1 mrg bgez a2, 1b
1602 1.1 mrg 2: slli xh, a6, 8 /* 0x80000000 */
1603 1.1 mrg movi xl, 0
1604 1.1 mrg leaf_return
1605 1.1 mrg
1606 1.1 mrg .Lfixunssfdi_zero:
1607 1.1 mrg movi xh, 0
1608 1.1 mrg movi xl, 0
1609 1.1 mrg leaf_return
1610 1.1 mrg
1611 1.1 mrg .Lfixunssfdi_bigexp:
1612 1.1 mrg /* Handle unsigned maximum exponent case. */
1613 1.1 mrg bltz a7, 2b
1614 1.1 mrg movi xl, 0
1615 1.1 mrg leaf_return /* no shift needed */
1616 1.1 mrg
1617 1.1 mrg #endif /* L_fixunssfdi */
1618 1.1 mrg
1619 1.1 mrg #ifdef L_floatsisf
1620 1.1 mrg
1621 1.1 mrg .align 4
1622 1.1 mrg .global __floatunsisf
1623 1.1 mrg .type __floatunsisf, @function
1624 1.1 mrg __floatunsisf:
1625 1.1 mrg leaf_entry sp, 16
1626 1.1 mrg beqz a2, .Lfloatsisf_return
1627 1.1 mrg
1628 1.1 mrg /* Set the sign to zero and jump to the floatsisf code. */
1629 1.1 mrg movi a7, 0
1630 1.1 mrg j .Lfloatsisf_normalize
1631 1.1 mrg
1632 1.1 mrg .align 4
1633 1.1 mrg .global __floatsisf
1634 1.1 mrg .type __floatsisf, @function
1635 1.1 mrg __floatsisf:
1636 1.1 mrg leaf_entry sp, 16
1637 1.1 mrg
1638 1.1 mrg /* Check for zero. */
1639 1.1 mrg beqz a2, .Lfloatsisf_return
1640 1.1 mrg
1641 1.1 mrg /* Save the sign. */
1642 1.1 mrg extui a7, a2, 31, 1
1643 1.1 mrg
1644 1.1 mrg /* Get the absolute value. */
1645 1.1 mrg #if XCHAL_HAVE_ABS
1646 1.1 mrg abs a2, a2
1647 1.1 mrg #else
1648 1.1 mrg neg a4, a2
1649 1.1 mrg movltz a2, a4, a2
1650 1.1 mrg #endif
1651 1.1 mrg
1652 1.1 mrg .Lfloatsisf_normalize:
1653 1.1 mrg /* Normalize with the first 1 bit in the msb. */
1654 1.1 mrg do_nsau a4, a2, a5, a6
1655 1.1 mrg ssl a4
1656 1.1 mrg sll a5, a2
1657 1.1 mrg
1658 1.1 mrg /* Shift the mantissa into position, with rounding bits in a6. */
1659 1.1 mrg srli a2, a5, 8
1660 1.1 mrg slli a6, a5, (32 - 8)
1661 1.1 mrg
1662 1.1 mrg /* Set the exponent. */
1663 1.1 mrg movi a5, 0x9d /* 0x7e + 31 */
1664 1.1 mrg sub a5, a5, a4
1665 1.1 mrg slli a5, a5, 23
1666 1.1 mrg add a2, a2, a5
1667 1.1 mrg
1668 1.1 mrg /* Add the sign. */
1669 1.1 mrg slli a7, a7, 31
1670 1.1 mrg or a2, a2, a7
1671 1.1 mrg
1672 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
1673 1.1 mrg bgez a6, .Lfloatsisf_return
1674 1.1 mrg addi a2, a2, 1 /* Overflow to the exponent is OK. */
1675 1.1 mrg
1676 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
1677 1.1 mrg slli a6, a6, 1
1678 1.1 mrg beqz a6, .Lfloatsisf_exactlyhalf
1679 1.1 mrg
1680 1.1 mrg .Lfloatsisf_return:
1681 1.1 mrg leaf_return
1682 1.1 mrg
1683 1.1 mrg .Lfloatsisf_exactlyhalf:
1684 1.1 mrg /* Round down to the nearest even value. */
1685 1.1 mrg srli a2, a2, 1
1686 1.1 mrg slli a2, a2, 1
1687 1.1 mrg leaf_return
1688 1.1 mrg
1689 1.1 mrg #endif /* L_floatsisf */
1690 1.1 mrg
1691 1.1 mrg #ifdef L_floatdisf
1692 1.1 mrg
1693 1.1 mrg .align 4
1694 1.1 mrg .global __floatundisf
1695 1.1 mrg .type __floatundisf, @function
1696 1.1 mrg __floatundisf:
1697 1.1 mrg leaf_entry sp, 16
1698 1.1 mrg
1699 1.1 mrg /* Check for zero. */
1700 1.1 mrg or a4, xh, xl
1701 1.1 mrg beqz a4, 2f
1702 1.1 mrg
1703 1.1 mrg /* Set the sign to zero and jump to the floatdisf code. */
1704 1.1 mrg movi a7, 0
1705 1.1 mrg j .Lfloatdisf_normalize
1706 1.1 mrg
1707 1.1 mrg .align 4
1708 1.1 mrg .global __floatdisf
1709 1.1 mrg .type __floatdisf, @function
1710 1.1 mrg __floatdisf:
1711 1.1 mrg leaf_entry sp, 16
1712 1.1 mrg
1713 1.1 mrg /* Check for zero. */
1714 1.1 mrg or a4, xh, xl
1715 1.1 mrg beqz a4, 2f
1716 1.1 mrg
1717 1.1 mrg /* Save the sign. */
1718 1.1 mrg extui a7, xh, 31, 1
1719 1.1 mrg
1720 1.1 mrg /* Get the absolute value. */
1721 1.1 mrg bgez xh, .Lfloatdisf_normalize
1722 1.1 mrg neg xl, xl
1723 1.1 mrg neg xh, xh
1724 1.1 mrg beqz xl, .Lfloatdisf_normalize
1725 1.1 mrg addi xh, xh, -1
1726 1.1 mrg
1727 1.1 mrg .Lfloatdisf_normalize:
1728 1.1 mrg /* Normalize with the first 1 bit in the msb of xh. */
1729 1.1 mrg beqz xh, .Lfloatdisf_bigshift
1730 1.1 mrg do_nsau a4, xh, a5, a6
1731 1.1 mrg ssl a4
1732 1.1 mrg src xh, xh, xl
1733 1.1 mrg sll xl, xl
1734 1.1 mrg
1735 1.1 mrg .Lfloatdisf_shifted:
1736 1.1 mrg /* Shift the mantissa into position, with rounding bits in a6. */
1737 1.1 mrg ssai 8
1738 1.1 mrg sll a5, xl
1739 1.1 mrg src a6, xh, xl
1740 1.1 mrg srl xh, xh
1741 1.1 mrg beqz a5, 1f
1742 1.1 mrg movi a5, 1
1743 1.1 mrg or a6, a6, a5
1744 1.1 mrg 1:
1745 1.1 mrg /* Set the exponent. */
1746 1.1 mrg movi a5, 0xbd /* 0x7e + 63 */
1747 1.1 mrg sub a5, a5, a4
1748 1.1 mrg slli a5, a5, 23
1749 1.1 mrg add a2, xh, a5
1750 1.1 mrg
1751 1.1 mrg /* Add the sign. */
1752 1.1 mrg slli a7, a7, 31
1753 1.1 mrg or a2, a2, a7
1754 1.1 mrg
1755 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
1756 1.1 mrg bgez a6, 2f
1757 1.1 mrg addi a2, a2, 1 /* Overflow to the exponent is OK. */
1758 1.1 mrg
1759 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
1760 1.1 mrg slli a6, a6, 1
1761 1.1 mrg beqz a6, .Lfloatdisf_exactlyhalf
1762 1.1 mrg 2: leaf_return
1763 1.1 mrg
1764 1.1 mrg .Lfloatdisf_bigshift:
1765 1.1 mrg /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1766 1.1 mrg do_nsau a4, xl, a5, a6
1767 1.1 mrg ssl a4
1768 1.1 mrg sll xh, xl
1769 1.1 mrg movi xl, 0
1770 1.1 mrg addi a4, a4, 32
1771 1.1 mrg j .Lfloatdisf_shifted
1772 1.1 mrg
1773 1.1 mrg .Lfloatdisf_exactlyhalf:
1774 1.1 mrg /* Round down to the nearest even value. */
1775 1.1 mrg srli a2, a2, 1
1776 1.1 mrg slli a2, a2, 1
1777 1.1 mrg leaf_return
1778 1.1 mrg
1779 1.1 mrg #endif /* L_floatdisf */
1780