ieee754-sf.S revision 1.1 1 1.1 mrg /* IEEE-754 single-precision functions for Xtensa
2 1.1 mrg Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 1.1 mrg Contributed by Bob Wilson (bwilson (at) tensilica.com) at Tensilica.
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 1.1 mrg License for more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg #ifdef __XTENSA_EB__
27 1.1 mrg #define xh a2
28 1.1 mrg #define xl a3
29 1.1 mrg #define yh a4
30 1.1 mrg #define yl a5
31 1.1 mrg #else
32 1.1 mrg #define xh a3
33 1.1 mrg #define xl a2
34 1.1 mrg #define yh a5
35 1.1 mrg #define yl a4
36 1.1 mrg #endif
37 1.1 mrg
38 1.1 mrg /* Warning! The branch displacements for some Xtensa branch instructions
39 1.1 mrg are quite small, and this code has been carefully laid out to keep
40 1.1 mrg branch targets in range. If you change anything, be sure to check that
41 1.1 mrg the assembler is not relaxing anything to branch over a jump. */
42 1.1 mrg
43 1.1 mrg #ifdef L_negsf2
44 1.1 mrg
45 1.1 mrg .align 4
46 1.1 mrg .global __negsf2
47 1.1 mrg .type __negsf2, @function
48 1.1 mrg __negsf2:
49 1.1 mrg leaf_entry sp, 16
50 1.1 mrg movi a4, 0x80000000
51 1.1 mrg xor a2, a2, a4
52 1.1 mrg leaf_return
53 1.1 mrg
54 1.1 mrg #endif /* L_negsf2 */
55 1.1 mrg
56 1.1 mrg #ifdef L_addsubsf3
57 1.1 mrg
58 1.1 mrg /* Addition */
59 1.1 mrg __addsf3_aux:
60 1.1 mrg
61 1.1 mrg /* Handle NaNs and Infinities. (This code is placed before the
62 1.1 mrg start of the function just to keep it in range of the limited
63 1.1 mrg branch displacements.) */
64 1.1 mrg
65 1.1 mrg .Ladd_xnan_or_inf:
66 1.1 mrg /* If y is neither Infinity nor NaN, return x. */
67 1.1 mrg bnall a3, a6, 1f
68 1.1 mrg /* If x is a NaN, return it. Otherwise, return y. */
69 1.1 mrg slli a7, a2, 9
70 1.1 mrg beqz a7, .Ladd_ynan_or_inf
71 1.1 mrg 1: leaf_return
72 1.1 mrg
73 1.1 mrg .Ladd_ynan_or_inf:
74 1.1 mrg /* Return y. */
75 1.1 mrg mov a2, a3
76 1.1 mrg leaf_return
77 1.1 mrg
78 1.1 mrg .Ladd_opposite_signs:
79 1.1 mrg /* Operand signs differ. Do a subtraction. */
80 1.1 mrg slli a7, a6, 8
81 1.1 mrg xor a3, a3, a7
82 1.1 mrg j .Lsub_same_sign
83 1.1 mrg
84 1.1 mrg .align 4
85 1.1 mrg .global __addsf3
86 1.1 mrg .type __addsf3, @function
87 1.1 mrg __addsf3:
88 1.1 mrg leaf_entry sp, 16
89 1.1 mrg movi a6, 0x7f800000
90 1.1 mrg
91 1.1 mrg /* Check if the two operands have the same sign. */
92 1.1 mrg xor a7, a2, a3
93 1.1 mrg bltz a7, .Ladd_opposite_signs
94 1.1 mrg
95 1.1 mrg .Ladd_same_sign:
96 1.1 mrg /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
97 1.1 mrg ball a2, a6, .Ladd_xnan_or_inf
98 1.1 mrg ball a3, a6, .Ladd_ynan_or_inf
99 1.1 mrg
100 1.1 mrg /* Compare the exponents. The smaller operand will be shifted
101 1.1 mrg right by the exponent difference and added to the larger
102 1.1 mrg one. */
103 1.1 mrg extui a7, a2, 23, 9
104 1.1 mrg extui a8, a3, 23, 9
105 1.1 mrg bltu a7, a8, .Ladd_shiftx
106 1.1 mrg
107 1.1 mrg .Ladd_shifty:
108 1.1 mrg /* Check if the smaller (or equal) exponent is zero. */
109 1.1 mrg bnone a3, a6, .Ladd_yexpzero
110 1.1 mrg
111 1.1 mrg /* Replace y sign/exponent with 0x008. */
112 1.1 mrg or a3, a3, a6
113 1.1 mrg slli a3, a3, 8
114 1.1 mrg srli a3, a3, 8
115 1.1 mrg
116 1.1 mrg .Ladd_yexpdiff:
117 1.1 mrg /* Compute the exponent difference. */
118 1.1 mrg sub a10, a7, a8
119 1.1 mrg
120 1.1 mrg /* Exponent difference > 32 -- just return the bigger value. */
121 1.1 mrg bgeui a10, 32, 1f
122 1.1 mrg
123 1.1 mrg /* Shift y right by the exponent difference. Any bits that are
124 1.1 mrg shifted out of y are saved in a9 for rounding the result. */
125 1.1 mrg ssr a10
126 1.1 mrg movi a9, 0
127 1.1 mrg src a9, a3, a9
128 1.1 mrg srl a3, a3
129 1.1 mrg
130 1.1 mrg /* Do the addition. */
131 1.1 mrg add a2, a2, a3
132 1.1 mrg
133 1.1 mrg /* Check if the add overflowed into the exponent. */
134 1.1 mrg extui a10, a2, 23, 9
135 1.1 mrg beq a10, a7, .Ladd_round
136 1.1 mrg mov a8, a7
137 1.1 mrg j .Ladd_carry
138 1.1 mrg
139 1.1 mrg .Ladd_yexpzero:
140 1.1 mrg /* y is a subnormal value. Replace its sign/exponent with zero,
141 1.1 mrg i.e., no implicit "1.0", and increment the apparent exponent
142 1.1 mrg because subnormals behave as if they had the minimum (nonzero)
143 1.1 mrg exponent. Test for the case when both exponents are zero. */
144 1.1 mrg slli a3, a3, 9
145 1.1 mrg srli a3, a3, 9
146 1.1 mrg bnone a2, a6, .Ladd_bothexpzero
147 1.1 mrg addi a8, a8, 1
148 1.1 mrg j .Ladd_yexpdiff
149 1.1 mrg
150 1.1 mrg .Ladd_bothexpzero:
151 1.1 mrg /* Both exponents are zero. Handle this as a special case. There
152 1.1 mrg is no need to shift or round, and the normal code for handling
153 1.1 mrg a carry into the exponent field will not work because it
154 1.1 mrg assumes there is an implicit "1.0" that needs to be added. */
155 1.1 mrg add a2, a2, a3
156 1.1 mrg 1: leaf_return
157 1.1 mrg
158 1.1 mrg .Ladd_xexpzero:
159 1.1 mrg /* Same as "yexpzero" except skip handling the case when both
160 1.1 mrg exponents are zero. */
161 1.1 mrg slli a2, a2, 9
162 1.1 mrg srli a2, a2, 9
163 1.1 mrg addi a7, a7, 1
164 1.1 mrg j .Ladd_xexpdiff
165 1.1 mrg
166 1.1 mrg .Ladd_shiftx:
167 1.1 mrg /* Same thing as the "shifty" code, but with x and y swapped. Also,
168 1.1 mrg because the exponent difference is always nonzero in this version,
169 1.1 mrg the shift sequence can use SLL and skip loading a constant zero. */
170 1.1 mrg bnone a2, a6, .Ladd_xexpzero
171 1.1 mrg
172 1.1 mrg or a2, a2, a6
173 1.1 mrg slli a2, a2, 8
174 1.1 mrg srli a2, a2, 8
175 1.1 mrg
176 1.1 mrg .Ladd_xexpdiff:
177 1.1 mrg sub a10, a8, a7
178 1.1 mrg bgeui a10, 32, .Ladd_returny
179 1.1 mrg
180 1.1 mrg ssr a10
181 1.1 mrg sll a9, a2
182 1.1 mrg srl a2, a2
183 1.1 mrg
184 1.1 mrg add a2, a2, a3
185 1.1 mrg
186 1.1 mrg /* Check if the add overflowed into the exponent. */
187 1.1 mrg extui a10, a2, 23, 9
188 1.1 mrg bne a10, a8, .Ladd_carry
189 1.1 mrg
190 1.1 mrg .Ladd_round:
191 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
192 1.1 mrg bgez a9, 1f
193 1.1 mrg addi a2, a2, 1
194 1.1 mrg
195 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
196 1.1 mrg slli a9, a9, 1
197 1.1 mrg beqz a9, .Ladd_exactlyhalf
198 1.1 mrg 1: leaf_return
199 1.1 mrg
200 1.1 mrg .Ladd_returny:
201 1.1 mrg mov a2, a3
202 1.1 mrg leaf_return
203 1.1 mrg
204 1.1 mrg .Ladd_carry:
205 1.1 mrg /* The addition has overflowed into the exponent field, so the
206 1.1 mrg value needs to be renormalized. The mantissa of the result
207 1.1 mrg can be recovered by subtracting the original exponent and
208 1.1 mrg adding 0x800000 (which is the explicit "1.0" for the
209 1.1 mrg mantissa of the non-shifted operand -- the "1.0" for the
210 1.1 mrg shifted operand was already added). The mantissa can then
211 1.1 mrg be shifted right by one bit. The explicit "1.0" of the
212 1.1 mrg shifted mantissa then needs to be replaced by the exponent,
213 1.1 mrg incremented by one to account for the normalizing shift.
214 1.1 mrg It is faster to combine these operations: do the shift first
215 1.1 mrg and combine the additions and subtractions. If x is the
216 1.1 mrg original exponent, the result is:
217 1.1 mrg shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
218 1.1 mrg or:
219 1.1 mrg shifted mantissa + ((x + 1) << 22)
220 1.1 mrg Note that the exponent is incremented here by leaving the
221 1.1 mrg explicit "1.0" of the mantissa in the exponent field. */
222 1.1 mrg
223 1.1 mrg /* Shift x right by one bit. Save the lsb. */
224 1.1 mrg mov a10, a2
225 1.1 mrg srli a2, a2, 1
226 1.1 mrg
227 1.1 mrg /* See explanation above. The original exponent is in a8. */
228 1.1 mrg addi a8, a8, 1
229 1.1 mrg slli a8, a8, 22
230 1.1 mrg add a2, a2, a8
231 1.1 mrg
232 1.1 mrg /* Return an Infinity if the exponent overflowed. */
233 1.1 mrg ball a2, a6, .Ladd_infinity
234 1.1 mrg
235 1.1 mrg /* Same thing as the "round" code except the msb of the leftover
236 1.1 mrg fraction is bit 0 of a10, with the rest of the fraction in a9. */
237 1.1 mrg bbci.l a10, 0, 1f
238 1.1 mrg addi a2, a2, 1
239 1.1 mrg beqz a9, .Ladd_exactlyhalf
240 1.1 mrg 1: leaf_return
241 1.1 mrg
242 1.1 mrg .Ladd_infinity:
243 1.1 mrg /* Clear the mantissa. */
244 1.1 mrg srli a2, a2, 23
245 1.1 mrg slli a2, a2, 23
246 1.1 mrg
247 1.1 mrg /* The sign bit may have been lost in a carry-out. Put it back. */
248 1.1 mrg slli a8, a8, 1
249 1.1 mrg or a2, a2, a8
250 1.1 mrg leaf_return
251 1.1 mrg
252 1.1 mrg .Ladd_exactlyhalf:
253 1.1 mrg /* Round down to the nearest even value. */
254 1.1 mrg srli a2, a2, 1
255 1.1 mrg slli a2, a2, 1
256 1.1 mrg leaf_return
257 1.1 mrg
258 1.1 mrg
259 1.1 mrg /* Subtraction */
260 1.1 mrg __subsf3_aux:
261 1.1 mrg
262 1.1 mrg /* Handle NaNs and Infinities. (This code is placed before the
263 1.1 mrg start of the function just to keep it in range of the limited
264 1.1 mrg branch displacements.) */
265 1.1 mrg
266 1.1 mrg .Lsub_xnan_or_inf:
267 1.1 mrg /* If y is neither Infinity nor NaN, return x. */
268 1.1 mrg bnall a3, a6, 1f
269 1.1 mrg /* Both x and y are either NaN or Inf, so the result is NaN. */
270 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
271 1.1 mrg or a2, a2, a4
272 1.1 mrg 1: leaf_return
273 1.1 mrg
274 1.1 mrg .Lsub_ynan_or_inf:
275 1.1 mrg /* Negate y and return it. */
276 1.1 mrg slli a7, a6, 8
277 1.1 mrg xor a2, a3, a7
278 1.1 mrg leaf_return
279 1.1 mrg
280 1.1 mrg .Lsub_opposite_signs:
281 1.1 mrg /* Operand signs differ. Do an addition. */
282 1.1 mrg slli a7, a6, 8
283 1.1 mrg xor a3, a3, a7
284 1.1 mrg j .Ladd_same_sign
285 1.1 mrg
286 1.1 mrg .align 4
287 1.1 mrg .global __subsf3
288 1.1 mrg .type __subsf3, @function
289 1.1 mrg __subsf3:
290 1.1 mrg leaf_entry sp, 16
291 1.1 mrg movi a6, 0x7f800000
292 1.1 mrg
293 1.1 mrg /* Check if the two operands have the same sign. */
294 1.1 mrg xor a7, a2, a3
295 1.1 mrg bltz a7, .Lsub_opposite_signs
296 1.1 mrg
297 1.1 mrg .Lsub_same_sign:
298 1.1 mrg /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
299 1.1 mrg ball a2, a6, .Lsub_xnan_or_inf
300 1.1 mrg ball a3, a6, .Lsub_ynan_or_inf
301 1.1 mrg
302 1.1 mrg /* Compare the operands. In contrast to addition, the entire
303 1.1 mrg value matters here. */
304 1.1 mrg extui a7, a2, 23, 8
305 1.1 mrg extui a8, a3, 23, 8
306 1.1 mrg bltu a2, a3, .Lsub_xsmaller
307 1.1 mrg
308 1.1 mrg .Lsub_ysmaller:
309 1.1 mrg /* Check if the smaller (or equal) exponent is zero. */
310 1.1 mrg bnone a3, a6, .Lsub_yexpzero
311 1.1 mrg
312 1.1 mrg /* Replace y sign/exponent with 0x008. */
313 1.1 mrg or a3, a3, a6
314 1.1 mrg slli a3, a3, 8
315 1.1 mrg srli a3, a3, 8
316 1.1 mrg
317 1.1 mrg .Lsub_yexpdiff:
318 1.1 mrg /* Compute the exponent difference. */
319 1.1 mrg sub a10, a7, a8
320 1.1 mrg
321 1.1 mrg /* Exponent difference > 32 -- just return the bigger value. */
322 1.1 mrg bgeui a10, 32, 1f
323 1.1 mrg
324 1.1 mrg /* Shift y right by the exponent difference. Any bits that are
325 1.1 mrg shifted out of y are saved in a9 for rounding the result. */
326 1.1 mrg ssr a10
327 1.1 mrg movi a9, 0
328 1.1 mrg src a9, a3, a9
329 1.1 mrg srl a3, a3
330 1.1 mrg
331 1.1 mrg sub a2, a2, a3
332 1.1 mrg
333 1.1 mrg /* Subtract the leftover bits in a9 from zero and propagate any
334 1.1 mrg borrow from a2. */
335 1.1 mrg neg a9, a9
336 1.1 mrg addi a10, a2, -1
337 1.1 mrg movnez a2, a10, a9
338 1.1 mrg
339 1.1 mrg /* Check if the subtract underflowed into the exponent. */
340 1.1 mrg extui a10, a2, 23, 8
341 1.1 mrg beq a10, a7, .Lsub_round
342 1.1 mrg j .Lsub_borrow
343 1.1 mrg
344 1.1 mrg .Lsub_yexpzero:
345 1.1 mrg /* Return zero if the inputs are equal. (For the non-subnormal
346 1.1 mrg case, subtracting the "1.0" will cause a borrow from the exponent
347 1.1 mrg and this case can be detected when handling the borrow.) */
348 1.1 mrg beq a2, a3, .Lsub_return_zero
349 1.1 mrg
350 1.1 mrg /* y is a subnormal value. Replace its sign/exponent with zero,
351 1.1 mrg i.e., no implicit "1.0". Unless x is also a subnormal, increment
352 1.1 mrg y's apparent exponent because subnormals behave as if they had
353 1.1 mrg the minimum (nonzero) exponent. */
354 1.1 mrg slli a3, a3, 9
355 1.1 mrg srli a3, a3, 9
356 1.1 mrg bnone a2, a6, .Lsub_yexpdiff
357 1.1 mrg addi a8, a8, 1
358 1.1 mrg j .Lsub_yexpdiff
359 1.1 mrg
360 1.1 mrg .Lsub_returny:
361 1.1 mrg /* Negate and return y. */
362 1.1 mrg slli a7, a6, 8
363 1.1 mrg xor a2, a3, a7
364 1.1 mrg 1: leaf_return
365 1.1 mrg
366 1.1 mrg .Lsub_xsmaller:
367 1.1 mrg /* Same thing as the "ysmaller" code, but with x and y swapped and
368 1.1 mrg with y negated. */
369 1.1 mrg bnone a2, a6, .Lsub_xexpzero
370 1.1 mrg
371 1.1 mrg or a2, a2, a6
372 1.1 mrg slli a2, a2, 8
373 1.1 mrg srli a2, a2, 8
374 1.1 mrg
375 1.1 mrg .Lsub_xexpdiff:
376 1.1 mrg sub a10, a8, a7
377 1.1 mrg bgeui a10, 32, .Lsub_returny
378 1.1 mrg
379 1.1 mrg ssr a10
380 1.1 mrg movi a9, 0
381 1.1 mrg src a9, a2, a9
382 1.1 mrg srl a2, a2
383 1.1 mrg
384 1.1 mrg /* Negate y. */
385 1.1 mrg slli a11, a6, 8
386 1.1 mrg xor a3, a3, a11
387 1.1 mrg
388 1.1 mrg sub a2, a3, a2
389 1.1 mrg
390 1.1 mrg neg a9, a9
391 1.1 mrg addi a10, a2, -1
392 1.1 mrg movnez a2, a10, a9
393 1.1 mrg
394 1.1 mrg /* Check if the subtract underflowed into the exponent. */
395 1.1 mrg extui a10, a2, 23, 8
396 1.1 mrg bne a10, a8, .Lsub_borrow
397 1.1 mrg
398 1.1 mrg .Lsub_round:
399 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
400 1.1 mrg bgez a9, 1f
401 1.1 mrg addi a2, a2, 1
402 1.1 mrg
403 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
404 1.1 mrg slli a9, a9, 1
405 1.1 mrg beqz a9, .Lsub_exactlyhalf
406 1.1 mrg 1: leaf_return
407 1.1 mrg
408 1.1 mrg .Lsub_xexpzero:
409 1.1 mrg /* Same as "yexpzero". */
410 1.1 mrg beq a2, a3, .Lsub_return_zero
411 1.1 mrg slli a2, a2, 9
412 1.1 mrg srli a2, a2, 9
413 1.1 mrg bnone a3, a6, .Lsub_xexpdiff
414 1.1 mrg addi a7, a7, 1
415 1.1 mrg j .Lsub_xexpdiff
416 1.1 mrg
417 1.1 mrg .Lsub_return_zero:
418 1.1 mrg movi a2, 0
419 1.1 mrg leaf_return
420 1.1 mrg
421 1.1 mrg .Lsub_borrow:
422 1.1 mrg /* The subtraction has underflowed into the exponent field, so the
423 1.1 mrg value needs to be renormalized. Shift the mantissa left as
424 1.1 mrg needed to remove any leading zeros and adjust the exponent
425 1.1 mrg accordingly. If the exponent is not large enough to remove
426 1.1 mrg all the leading zeros, the result will be a subnormal value. */
427 1.1 mrg
428 1.1 mrg slli a8, a2, 9
429 1.1 mrg beqz a8, .Lsub_xzero
430 1.1 mrg do_nsau a6, a8, a7, a11
431 1.1 mrg srli a8, a8, 9
432 1.1 mrg bge a6, a10, .Lsub_subnormal
433 1.1 mrg addi a6, a6, 1
434 1.1 mrg
435 1.1 mrg .Lsub_normalize_shift:
436 1.1 mrg /* Shift the mantissa (a8/a9) left by a6. */
437 1.1 mrg ssl a6
438 1.1 mrg src a8, a8, a9
439 1.1 mrg sll a9, a9
440 1.1 mrg
441 1.1 mrg /* Combine the shifted mantissa with the sign and exponent,
442 1.1 mrg decrementing the exponent by a6. (The exponent has already
443 1.1 mrg been decremented by one due to the borrow from the subtraction,
444 1.1 mrg but adding the mantissa will increment the exponent by one.) */
445 1.1 mrg srli a2, a2, 23
446 1.1 mrg sub a2, a2, a6
447 1.1 mrg slli a2, a2, 23
448 1.1 mrg add a2, a2, a8
449 1.1 mrg j .Lsub_round
450 1.1 mrg
451 1.1 mrg .Lsub_exactlyhalf:
452 1.1 mrg /* Round down to the nearest even value. */
453 1.1 mrg srli a2, a2, 1
454 1.1 mrg slli a2, a2, 1
455 1.1 mrg leaf_return
456 1.1 mrg
457 1.1 mrg .Lsub_xzero:
458 1.1 mrg /* If there was a borrow from the exponent, and the mantissa and
459 1.1 mrg guard digits are all zero, then the inputs were equal and the
460 1.1 mrg result should be zero. */
461 1.1 mrg beqz a9, .Lsub_return_zero
462 1.1 mrg
463 1.1 mrg /* Only the guard digit is nonzero. Shift by min(24, a10). */
464 1.1 mrg addi a11, a10, -24
465 1.1 mrg movi a6, 24
466 1.1 mrg movltz a6, a10, a11
467 1.1 mrg j .Lsub_normalize_shift
468 1.1 mrg
469 1.1 mrg .Lsub_subnormal:
470 1.1 mrg /* The exponent is too small to shift away all the leading zeros.
471 1.1 mrg Set a6 to the current exponent (which has already been
472 1.1 mrg decremented by the borrow) so that the exponent of the result
473 1.1 mrg will be zero. Do not add 1 to a6 in this case, because: (1)
474 1.1 mrg adding the mantissa will not increment the exponent, so there is
475 1.1 mrg no need to subtract anything extra from the exponent to
476 1.1 mrg compensate, and (2) the effective exponent of a subnormal is 1
477 1.1 mrg not 0 so the shift amount must be 1 smaller than normal. */
478 1.1 mrg mov a6, a10
479 1.1 mrg j .Lsub_normalize_shift
480 1.1 mrg
481 1.1 mrg #endif /* L_addsubsf3 */
482 1.1 mrg
483 1.1 mrg #ifdef L_mulsf3
484 1.1 mrg
485 1.1 mrg /* Multiplication */
486 1.1 mrg #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
487 1.1 mrg #define XCHAL_NO_MUL 1
488 1.1 mrg #endif
489 1.1 mrg
490 1.1 mrg __mulsf3_aux:
491 1.1 mrg
492 1.1 mrg /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
493 1.1 mrg (This code is placed before the start of the function just to
494 1.1 mrg keep it in range of the limited branch displacements.) */
495 1.1 mrg
496 1.1 mrg .Lmul_xexpzero:
497 1.1 mrg /* Clear the sign bit of x. */
498 1.1 mrg slli a2, a2, 1
499 1.1 mrg srli a2, a2, 1
500 1.1 mrg
501 1.1 mrg /* If x is zero, return zero. */
502 1.1 mrg beqz a2, .Lmul_return_zero
503 1.1 mrg
504 1.1 mrg /* Normalize x. Adjust the exponent in a8. */
505 1.1 mrg do_nsau a10, a2, a11, a12
506 1.1 mrg addi a10, a10, -8
507 1.1 mrg ssl a10
508 1.1 mrg sll a2, a2
509 1.1 mrg movi a8, 1
510 1.1 mrg sub a8, a8, a10
511 1.1 mrg j .Lmul_xnormalized
512 1.1 mrg
513 1.1 mrg .Lmul_yexpzero:
514 1.1 mrg /* Clear the sign bit of y. */
515 1.1 mrg slli a3, a3, 1
516 1.1 mrg srli a3, a3, 1
517 1.1 mrg
518 1.1 mrg /* If y is zero, return zero. */
519 1.1 mrg beqz a3, .Lmul_return_zero
520 1.1 mrg
521 1.1 mrg /* Normalize y. Adjust the exponent in a9. */
522 1.1 mrg do_nsau a10, a3, a11, a12
523 1.1 mrg addi a10, a10, -8
524 1.1 mrg ssl a10
525 1.1 mrg sll a3, a3
526 1.1 mrg movi a9, 1
527 1.1 mrg sub a9, a9, a10
528 1.1 mrg j .Lmul_ynormalized
529 1.1 mrg
530 1.1 mrg .Lmul_return_zero:
531 1.1 mrg /* Return zero with the appropriate sign bit. */
532 1.1 mrg srli a2, a7, 31
533 1.1 mrg slli a2, a2, 31
534 1.1 mrg j .Lmul_done
535 1.1 mrg
536 1.1 mrg .Lmul_xnan_or_inf:
537 1.1 mrg /* If y is zero, return NaN. */
538 1.1 mrg slli a8, a3, 1
539 1.1 mrg bnez a8, 1f
540 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
541 1.1 mrg or a2, a2, a4
542 1.1 mrg j .Lmul_done
543 1.1 mrg 1:
544 1.1 mrg /* If y is NaN, return y. */
545 1.1 mrg bnall a3, a6, .Lmul_returnx
546 1.1 mrg slli a8, a3, 9
547 1.1 mrg beqz a8, .Lmul_returnx
548 1.1 mrg
549 1.1 mrg .Lmul_returny:
550 1.1 mrg mov a2, a3
551 1.1 mrg
552 1.1 mrg .Lmul_returnx:
553 1.1 mrg /* Set the sign bit and return. */
554 1.1 mrg extui a7, a7, 31, 1
555 1.1 mrg slli a2, a2, 1
556 1.1 mrg ssai 1
557 1.1 mrg src a2, a7, a2
558 1.1 mrg j .Lmul_done
559 1.1 mrg
560 1.1 mrg .Lmul_ynan_or_inf:
561 1.1 mrg /* If x is zero, return NaN. */
562 1.1 mrg slli a8, a2, 1
563 1.1 mrg bnez a8, .Lmul_returny
564 1.1 mrg movi a7, 0x400000 /* make it a quiet NaN */
565 1.1 mrg or a2, a3, a7
566 1.1 mrg j .Lmul_done
567 1.1 mrg
568 1.1 mrg .align 4
569 1.1 mrg .global __mulsf3
570 1.1 mrg .type __mulsf3, @function
571 1.1 mrg __mulsf3:
572 1.1 mrg #if __XTENSA_CALL0_ABI__
573 1.1 mrg leaf_entry sp, 32
574 1.1 mrg addi sp, sp, -32
575 1.1 mrg s32i a12, sp, 16
576 1.1 mrg s32i a13, sp, 20
577 1.1 mrg s32i a14, sp, 24
578 1.1 mrg s32i a15, sp, 28
579 1.1 mrg #elif XCHAL_NO_MUL
580 1.1 mrg /* This is not really a leaf function; allocate enough stack space
581 1.1 mrg to allow CALL12s to a helper function. */
582 1.1 mrg leaf_entry sp, 64
583 1.1 mrg #else
584 1.1 mrg leaf_entry sp, 32
585 1.1 mrg #endif
586 1.1 mrg movi a6, 0x7f800000
587 1.1 mrg
588 1.1 mrg /* Get the sign of the result. */
589 1.1 mrg xor a7, a2, a3
590 1.1 mrg
591 1.1 mrg /* Check for NaN and infinity. */
592 1.1 mrg ball a2, a6, .Lmul_xnan_or_inf
593 1.1 mrg ball a3, a6, .Lmul_ynan_or_inf
594 1.1 mrg
595 1.1 mrg /* Extract the exponents. */
596 1.1 mrg extui a8, a2, 23, 8
597 1.1 mrg extui a9, a3, 23, 8
598 1.1 mrg
599 1.1 mrg beqz a8, .Lmul_xexpzero
600 1.1 mrg .Lmul_xnormalized:
601 1.1 mrg beqz a9, .Lmul_yexpzero
602 1.1 mrg .Lmul_ynormalized:
603 1.1 mrg
604 1.1 mrg /* Add the exponents. */
605 1.1 mrg add a8, a8, a9
606 1.1 mrg
607 1.1 mrg /* Replace sign/exponent fields with explicit "1.0". */
608 1.1 mrg movi a10, 0xffffff
609 1.1 mrg or a2, a2, a6
610 1.1 mrg and a2, a2, a10
611 1.1 mrg or a3, a3, a6
612 1.1 mrg and a3, a3, a10
613 1.1 mrg
614 1.1 mrg /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
615 1.1 mrg
616 1.1 mrg #if XCHAL_HAVE_MUL32_HIGH
617 1.1 mrg
618 1.1 mrg mull a6, a2, a3
619 1.1 mrg muluh a2, a2, a3
620 1.1 mrg
621 1.1 mrg #else
622 1.1 mrg
623 1.1 mrg /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
624 1.1 mrg products. These partial products are:
625 1.1 mrg
626 1.1 mrg 0 xl * yl
627 1.1 mrg
628 1.1 mrg 1 xl * yh
629 1.1 mrg 2 xh * yl
630 1.1 mrg
631 1.1 mrg 3 xh * yh
632 1.1 mrg
633 1.1 mrg If using the Mul16 or Mul32 multiplier options, these input
634 1.1 mrg chunks must be stored in separate registers. For Mac16, the
635 1.1 mrg UMUL.AA.* opcodes can specify that the inputs come from either
636 1.1 mrg half of the registers, so there is no need to shift them out
637 1.1 mrg ahead of time. If there is no multiply hardware, the 16-bit
638 1.1 mrg chunks can be extracted when setting up the arguments to the
639 1.1 mrg separate multiply function. */
640 1.1 mrg
641 1.1 mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
642 1.1 mrg /* Calling a separate multiply function will clobber a0 and requires
643 1.1 mrg use of a8 as a temporary, so save those values now. (The function
644 1.1 mrg uses a custom ABI so nothing else needs to be saved.) */
645 1.1 mrg s32i a0, sp, 0
646 1.1 mrg s32i a8, sp, 4
647 1.1 mrg #endif
648 1.1 mrg
649 1.1 mrg #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
650 1.1 mrg
651 1.1 mrg #define a2h a4
652 1.1 mrg #define a3h a5
653 1.1 mrg
654 1.1 mrg /* Get the high halves of the inputs into registers. */
655 1.1 mrg srli a2h, a2, 16
656 1.1 mrg srli a3h, a3, 16
657 1.1 mrg
658 1.1 mrg #define a2l a2
659 1.1 mrg #define a3l a3
660 1.1 mrg
661 1.1 mrg #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
662 1.1 mrg /* Clear the high halves of the inputs. This does not matter
663 1.1 mrg for MUL16 because the high bits are ignored. */
664 1.1 mrg extui a2, a2, 0, 16
665 1.1 mrg extui a3, a3, 0, 16
666 1.1 mrg #endif
667 1.1 mrg #endif /* MUL16 || MUL32 */
668 1.1 mrg
669 1.1 mrg
670 1.1 mrg #if XCHAL_HAVE_MUL16
671 1.1 mrg
672 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
673 1.1 mrg mul16u dst, xreg ## xhalf, yreg ## yhalf
674 1.1 mrg
675 1.1 mrg #elif XCHAL_HAVE_MUL32
676 1.1 mrg
677 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
678 1.1 mrg mull dst, xreg ## xhalf, yreg ## yhalf
679 1.1 mrg
680 1.1 mrg #elif XCHAL_HAVE_MAC16
681 1.1 mrg
682 1.1 mrg /* The preprocessor insists on inserting a space when concatenating after
683 1.1 mrg a period in the definition of do_mul below. These macros are a workaround
684 1.1 mrg using underscores instead of periods when doing the concatenation. */
685 1.1 mrg #define umul_aa_ll umul.aa.ll
686 1.1 mrg #define umul_aa_lh umul.aa.lh
687 1.1 mrg #define umul_aa_hl umul.aa.hl
688 1.1 mrg #define umul_aa_hh umul.aa.hh
689 1.1 mrg
690 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
691 1.1 mrg umul_aa_ ## xhalf ## yhalf xreg, yreg; \
692 1.1 mrg rsr dst, ACCLO
693 1.1 mrg
694 1.1 mrg #else /* no multiply hardware */
695 1.1 mrg
696 1.1 mrg #define set_arg_l(dst, src) \
697 1.1 mrg extui dst, src, 0, 16
698 1.1 mrg #define set_arg_h(dst, src) \
699 1.1 mrg srli dst, src, 16
700 1.1 mrg
701 1.1 mrg #if __XTENSA_CALL0_ABI__
702 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
703 1.1 mrg set_arg_ ## xhalf (a13, xreg); \
704 1.1 mrg set_arg_ ## yhalf (a14, yreg); \
705 1.1 mrg call0 .Lmul_mulsi3; \
706 1.1 mrg mov dst, a12
707 1.1 mrg #else
708 1.1 mrg #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
709 1.1 mrg set_arg_ ## xhalf (a14, xreg); \
710 1.1 mrg set_arg_ ## yhalf (a15, yreg); \
711 1.1 mrg call12 .Lmul_mulsi3; \
712 1.1 mrg mov dst, a14
713 1.1 mrg #endif /* __XTENSA_CALL0_ABI__ */
714 1.1 mrg
715 1.1 mrg #endif /* no multiply hardware */
716 1.1 mrg
717 1.1 mrg /* Add pp1 and pp2 into a6 with carry-out in a9. */
718 1.1 mrg do_mul(a6, a2, l, a3, h) /* pp 1 */
719 1.1 mrg do_mul(a11, a2, h, a3, l) /* pp 2 */
720 1.1 mrg movi a9, 0
721 1.1 mrg add a6, a6, a11
722 1.1 mrg bgeu a6, a11, 1f
723 1.1 mrg addi a9, a9, 1
724 1.1 mrg 1:
725 1.1 mrg /* Shift the high half of a9/a6 into position in a9. Note that
726 1.1 mrg this value can be safely incremented without any carry-outs. */
727 1.1 mrg ssai 16
728 1.1 mrg src a9, a9, a6
729 1.1 mrg
730 1.1 mrg /* Compute the low word into a6. */
731 1.1 mrg do_mul(a11, a2, l, a3, l) /* pp 0 */
732 1.1 mrg sll a6, a6
733 1.1 mrg add a6, a6, a11
734 1.1 mrg bgeu a6, a11, 1f
735 1.1 mrg addi a9, a9, 1
736 1.1 mrg 1:
737 1.1 mrg /* Compute the high word into a2. */
738 1.1 mrg do_mul(a2, a2, h, a3, h) /* pp 3 */
739 1.1 mrg add a2, a2, a9
740 1.1 mrg
741 1.1 mrg #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
742 1.1 mrg /* Restore values saved on the stack during the multiplication. */
743 1.1 mrg l32i a0, sp, 0
744 1.1 mrg l32i a8, sp, 4
745 1.1 mrg #endif
746 1.1 mrg #endif /* ! XCHAL_HAVE_MUL32_HIGH */
747 1.1 mrg
748 1.1 mrg /* Shift left by 9 bits, unless there was a carry-out from the
749 1.1 mrg multiply, in which case, shift by 8 bits and increment the
750 1.1 mrg exponent. */
751 1.1 mrg movi a4, 9
752 1.1 mrg srli a5, a2, 24 - 9
753 1.1 mrg beqz a5, 1f
754 1.1 mrg addi a4, a4, -1
755 1.1 mrg addi a8, a8, 1
756 1.1 mrg 1: ssl a4
757 1.1 mrg src a2, a2, a6
758 1.1 mrg sll a6, a6
759 1.1 mrg
760 1.1 mrg /* Subtract the extra bias from the exponent sum (plus one to account
761 1.1 mrg for the explicit "1.0" of the mantissa that will be added to the
762 1.1 mrg exponent in the final result). */
763 1.1 mrg movi a4, 0x80
764 1.1 mrg sub a8, a8, a4
765 1.1 mrg
766 1.1 mrg /* Check for over/underflow. The value in a8 is one less than the
767 1.1 mrg final exponent, so values in the range 0..fd are OK here. */
768 1.1 mrg movi a4, 0xfe
769 1.1 mrg bgeu a8, a4, .Lmul_overflow
770 1.1 mrg
771 1.1 mrg .Lmul_round:
772 1.1 mrg /* Round. */
773 1.1 mrg bgez a6, .Lmul_rounded
774 1.1 mrg addi a2, a2, 1
775 1.1 mrg slli a6, a6, 1
776 1.1 mrg beqz a6, .Lmul_exactlyhalf
777 1.1 mrg
778 1.1 mrg .Lmul_rounded:
779 1.1 mrg /* Add the exponent to the mantissa. */
780 1.1 mrg slli a8, a8, 23
781 1.1 mrg add a2, a2, a8
782 1.1 mrg
783 1.1 mrg .Lmul_addsign:
784 1.1 mrg /* Add the sign bit. */
785 1.1 mrg srli a7, a7, 31
786 1.1 mrg slli a7, a7, 31
787 1.1 mrg or a2, a2, a7
788 1.1 mrg
789 1.1 mrg .Lmul_done:
790 1.1 mrg #if __XTENSA_CALL0_ABI__
791 1.1 mrg l32i a12, sp, 16
792 1.1 mrg l32i a13, sp, 20
793 1.1 mrg l32i a14, sp, 24
794 1.1 mrg l32i a15, sp, 28
795 1.1 mrg addi sp, sp, 32
796 1.1 mrg #endif
797 1.1 mrg leaf_return
798 1.1 mrg
799 1.1 mrg .Lmul_exactlyhalf:
800 1.1 mrg /* Round down to the nearest even value. */
801 1.1 mrg srli a2, a2, 1
802 1.1 mrg slli a2, a2, 1
803 1.1 mrg j .Lmul_rounded
804 1.1 mrg
805 1.1 mrg .Lmul_overflow:
806 1.1 mrg bltz a8, .Lmul_underflow
807 1.1 mrg /* Return +/- Infinity. */
808 1.1 mrg movi a8, 0xff
809 1.1 mrg slli a2, a8, 23
810 1.1 mrg j .Lmul_addsign
811 1.1 mrg
812 1.1 mrg .Lmul_underflow:
813 1.1 mrg /* Create a subnormal value, where the exponent field contains zero,
814 1.1 mrg but the effective exponent is 1. The value of a8 is one less than
815 1.1 mrg the actual exponent, so just negate it to get the shift amount. */
816 1.1 mrg neg a8, a8
817 1.1 mrg mov a9, a6
818 1.1 mrg ssr a8
819 1.1 mrg bgeui a8, 32, .Lmul_flush_to_zero
820 1.1 mrg
821 1.1 mrg /* Shift a2 right. Any bits that are shifted out of a2 are saved
822 1.1 mrg in a6 (combined with the shifted-out bits currently in a6) for
823 1.1 mrg rounding the result. */
824 1.1 mrg sll a6, a2
825 1.1 mrg srl a2, a2
826 1.1 mrg
827 1.1 mrg /* Set the exponent to zero. */
828 1.1 mrg movi a8, 0
829 1.1 mrg
830 1.1 mrg /* Pack any nonzero bits shifted out into a6. */
831 1.1 mrg beqz a9, .Lmul_round
832 1.1 mrg movi a9, 1
833 1.1 mrg or a6, a6, a9
834 1.1 mrg j .Lmul_round
835 1.1 mrg
836 1.1 mrg .Lmul_flush_to_zero:
837 1.1 mrg /* Return zero with the appropriate sign bit. */
838 1.1 mrg srli a2, a7, 31
839 1.1 mrg slli a2, a2, 31
840 1.1 mrg j .Lmul_done
841 1.1 mrg
842 1.1 mrg #if XCHAL_NO_MUL
843 1.1 mrg
844 1.1 mrg /* For Xtensa processors with no multiply hardware, this simplified
845 1.1 mrg version of _mulsi3 is used for multiplying 16-bit chunks of
846 1.1 mrg the floating-point mantissas. When using CALL0, this function
847 1.1 mrg uses a custom ABI: the inputs are passed in a13 and a14, the
848 1.1 mrg result is returned in a12, and a8 and a15 are clobbered. */
849 1.1 mrg .align 4
850 1.1 mrg .Lmul_mulsi3:
851 1.1 mrg leaf_entry sp, 16
852 1.1 mrg .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
853 1.1 mrg movi \dst, 0
854 1.1 mrg 1: add \tmp1, \src2, \dst
855 1.1 mrg extui \tmp2, \src1, 0, 1
856 1.1 mrg movnez \dst, \tmp1, \tmp2
857 1.1 mrg
858 1.1 mrg do_addx2 \tmp1, \src2, \dst, \tmp1
859 1.1 mrg extui \tmp2, \src1, 1, 1
860 1.1 mrg movnez \dst, \tmp1, \tmp2
861 1.1 mrg
862 1.1 mrg do_addx4 \tmp1, \src2, \dst, \tmp1
863 1.1 mrg extui \tmp2, \src1, 2, 1
864 1.1 mrg movnez \dst, \tmp1, \tmp2
865 1.1 mrg
866 1.1 mrg do_addx8 \tmp1, \src2, \dst, \tmp1
867 1.1 mrg extui \tmp2, \src1, 3, 1
868 1.1 mrg movnez \dst, \tmp1, \tmp2
869 1.1 mrg
870 1.1 mrg srli \src1, \src1, 4
871 1.1 mrg slli \src2, \src2, 4
872 1.1 mrg bnez \src1, 1b
873 1.1 mrg .endm
874 1.1 mrg #if __XTENSA_CALL0_ABI__
875 1.1 mrg mul_mulsi3_body a12, a13, a14, a15, a8
876 1.1 mrg #else
877 1.1 mrg /* The result will be written into a2, so save that argument in a4. */
878 1.1 mrg mov a4, a2
879 1.1 mrg mul_mulsi3_body a2, a4, a3, a5, a6
880 1.1 mrg #endif
881 1.1 mrg leaf_return
882 1.1 mrg #endif /* XCHAL_NO_MUL */
883 1.1 mrg #endif /* L_mulsf3 */
884 1.1 mrg
885 1.1 mrg #ifdef L_divsf3
886 1.1 mrg
887 1.1 mrg /* Division */
888 1.1 mrg __divsf3_aux:
889 1.1 mrg
890 1.1 mrg /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
891 1.1 mrg (This code is placed before the start of the function just to
892 1.1 mrg keep it in range of the limited branch displacements.) */
893 1.1 mrg
894 1.1 mrg .Ldiv_yexpzero:
895 1.1 mrg /* Clear the sign bit of y. */
896 1.1 mrg slli a3, a3, 1
897 1.1 mrg srli a3, a3, 1
898 1.1 mrg
899 1.1 mrg /* Check for division by zero. */
900 1.1 mrg beqz a3, .Ldiv_yzero
901 1.1 mrg
902 1.1 mrg /* Normalize y. Adjust the exponent in a9. */
903 1.1 mrg do_nsau a10, a3, a4, a5
904 1.1 mrg addi a10, a10, -8
905 1.1 mrg ssl a10
906 1.1 mrg sll a3, a3
907 1.1 mrg movi a9, 1
908 1.1 mrg sub a9, a9, a10
909 1.1 mrg j .Ldiv_ynormalized
910 1.1 mrg
911 1.1 mrg .Ldiv_yzero:
912 1.1 mrg /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
913 1.1 mrg slli a4, a2, 1
914 1.1 mrg srli a4, a4, 1
915 1.1 mrg srli a2, a7, 31
916 1.1 mrg slli a2, a2, 31
917 1.1 mrg or a2, a2, a6
918 1.1 mrg bnez a4, 1f
919 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
920 1.1 mrg or a2, a2, a4
921 1.1 mrg 1: leaf_return
922 1.1 mrg
923 1.1 mrg .Ldiv_xexpzero:
924 1.1 mrg /* Clear the sign bit of x. */
925 1.1 mrg slli a2, a2, 1
926 1.1 mrg srli a2, a2, 1
927 1.1 mrg
928 1.1 mrg /* If x is zero, return zero. */
929 1.1 mrg beqz a2, .Ldiv_return_zero
930 1.1 mrg
931 1.1 mrg /* Normalize x. Adjust the exponent in a8. */
932 1.1 mrg do_nsau a10, a2, a4, a5
933 1.1 mrg addi a10, a10, -8
934 1.1 mrg ssl a10
935 1.1 mrg sll a2, a2
936 1.1 mrg movi a8, 1
937 1.1 mrg sub a8, a8, a10
938 1.1 mrg j .Ldiv_xnormalized
939 1.1 mrg
940 1.1 mrg .Ldiv_return_zero:
941 1.1 mrg /* Return zero with the appropriate sign bit. */
942 1.1 mrg srli a2, a7, 31
943 1.1 mrg slli a2, a2, 31
944 1.1 mrg leaf_return
945 1.1 mrg
946 1.1 mrg .Ldiv_xnan_or_inf:
947 1.1 mrg /* Set the sign bit of the result. */
948 1.1 mrg srli a7, a3, 31
949 1.1 mrg slli a7, a7, 31
950 1.1 mrg xor a2, a2, a7
951 1.1 mrg /* If y is NaN or Inf, return NaN. */
952 1.1 mrg bnall a3, a6, 1f
953 1.1 mrg movi a4, 0x400000 /* make it a quiet NaN */
954 1.1 mrg or a2, a2, a4
955 1.1 mrg 1: leaf_return
956 1.1 mrg
957 1.1 mrg .Ldiv_ynan_or_inf:
958 1.1 mrg /* If y is Infinity, return zero. */
959 1.1 mrg slli a8, a3, 9
960 1.1 mrg beqz a8, .Ldiv_return_zero
961 1.1 mrg /* y is NaN; return it. */
962 1.1 mrg mov a2, a3
963 1.1 mrg leaf_return
964 1.1 mrg
965 1.1 mrg .align 4
966 1.1 mrg .global __divsf3
967 1.1 mrg .type __divsf3, @function
968 1.1 mrg __divsf3:
969 1.1 mrg leaf_entry sp, 16
970 1.1 mrg movi a6, 0x7f800000
971 1.1 mrg
972 1.1 mrg /* Get the sign of the result. */
973 1.1 mrg xor a7, a2, a3
974 1.1 mrg
975 1.1 mrg /* Check for NaN and infinity. */
976 1.1 mrg ball a2, a6, .Ldiv_xnan_or_inf
977 1.1 mrg ball a3, a6, .Ldiv_ynan_or_inf
978 1.1 mrg
979 1.1 mrg /* Extract the exponents. */
980 1.1 mrg extui a8, a2, 23, 8
981 1.1 mrg extui a9, a3, 23, 8
982 1.1 mrg
983 1.1 mrg beqz a9, .Ldiv_yexpzero
984 1.1 mrg .Ldiv_ynormalized:
985 1.1 mrg beqz a8, .Ldiv_xexpzero
986 1.1 mrg .Ldiv_xnormalized:
987 1.1 mrg
988 1.1 mrg /* Subtract the exponents. */
989 1.1 mrg sub a8, a8, a9
990 1.1 mrg
991 1.1 mrg /* Replace sign/exponent fields with explicit "1.0". */
992 1.1 mrg movi a10, 0xffffff
993 1.1 mrg or a2, a2, a6
994 1.1 mrg and a2, a2, a10
995 1.1 mrg or a3, a3, a6
996 1.1 mrg and a3, a3, a10
997 1.1 mrg
998 1.1 mrg /* The first digit of the mantissa division must be a one.
999 1.1 mrg Shift x (and adjust the exponent) as needed to make this true. */
1000 1.1 mrg bltu a3, a2, 1f
1001 1.1 mrg slli a2, a2, 1
1002 1.1 mrg addi a8, a8, -1
1003 1.1 mrg 1:
1004 1.1 mrg /* Do the first subtraction and shift. */
1005 1.1 mrg sub a2, a2, a3
1006 1.1 mrg slli a2, a2, 1
1007 1.1 mrg
1008 1.1 mrg /* Put the quotient into a10. */
1009 1.1 mrg movi a10, 1
1010 1.1 mrg
1011 1.1 mrg /* Divide one bit at a time for 23 bits. */
1012 1.1 mrg movi a9, 23
1013 1.1 mrg #if XCHAL_HAVE_LOOPS
1014 1.1 mrg loop a9, .Ldiv_loopend
1015 1.1 mrg #endif
1016 1.1 mrg .Ldiv_loop:
1017 1.1 mrg /* Shift the quotient << 1. */
1018 1.1 mrg slli a10, a10, 1
1019 1.1 mrg
1020 1.1 mrg /* Is this digit a 0 or 1? */
1021 1.1 mrg bltu a2, a3, 1f
1022 1.1 mrg
1023 1.1 mrg /* Output a 1 and subtract. */
1024 1.1 mrg addi a10, a10, 1
1025 1.1 mrg sub a2, a2, a3
1026 1.1 mrg
1027 1.1 mrg /* Shift the dividend << 1. */
1028 1.1 mrg 1: slli a2, a2, 1
1029 1.1 mrg
1030 1.1 mrg #if !XCHAL_HAVE_LOOPS
1031 1.1 mrg addi a9, a9, -1
1032 1.1 mrg bnez a9, .Ldiv_loop
1033 1.1 mrg #endif
1034 1.1 mrg .Ldiv_loopend:
1035 1.1 mrg
1036 1.1 mrg /* Add the exponent bias (less one to account for the explicit "1.0"
1037 1.1 mrg of the mantissa that will be added to the exponent in the final
1038 1.1 mrg result). */
1039 1.1 mrg addi a8, a8, 0x7e
1040 1.1 mrg
1041 1.1 mrg /* Check for over/underflow. The value in a8 is one less than the
1042 1.1 mrg final exponent, so values in the range 0..fd are OK here. */
1043 1.1 mrg movi a4, 0xfe
1044 1.1 mrg bgeu a8, a4, .Ldiv_overflow
1045 1.1 mrg
1046 1.1 mrg .Ldiv_round:
1047 1.1 mrg /* Round. The remainder (<< 1) is in a2. */
1048 1.1 mrg bltu a2, a3, .Ldiv_rounded
1049 1.1 mrg addi a10, a10, 1
1050 1.1 mrg beq a2, a3, .Ldiv_exactlyhalf
1051 1.1 mrg
1052 1.1 mrg .Ldiv_rounded:
1053 1.1 mrg /* Add the exponent to the mantissa. */
1054 1.1 mrg slli a8, a8, 23
1055 1.1 mrg add a2, a10, a8
1056 1.1 mrg
1057 1.1 mrg .Ldiv_addsign:
1058 1.1 mrg /* Add the sign bit. */
1059 1.1 mrg srli a7, a7, 31
1060 1.1 mrg slli a7, a7, 31
1061 1.1 mrg or a2, a2, a7
1062 1.1 mrg leaf_return
1063 1.1 mrg
1064 1.1 mrg .Ldiv_overflow:
1065 1.1 mrg bltz a8, .Ldiv_underflow
1066 1.1 mrg /* Return +/- Infinity. */
1067 1.1 mrg addi a8, a4, 1 /* 0xff */
1068 1.1 mrg slli a2, a8, 23
1069 1.1 mrg j .Ldiv_addsign
1070 1.1 mrg
1071 1.1 mrg .Ldiv_exactlyhalf:
1072 1.1 mrg /* Remainder is exactly half the divisor. Round even. */
1073 1.1 mrg srli a10, a10, 1
1074 1.1 mrg slli a10, a10, 1
1075 1.1 mrg j .Ldiv_rounded
1076 1.1 mrg
1077 1.1 mrg .Ldiv_underflow:
1078 1.1 mrg /* Create a subnormal value, where the exponent field contains zero,
1079 1.1 mrg but the effective exponent is 1. The value of a8 is one less than
1080 1.1 mrg the actual exponent, so just negate it to get the shift amount. */
1081 1.1 mrg neg a8, a8
1082 1.1 mrg ssr a8
1083 1.1 mrg bgeui a8, 32, .Ldiv_flush_to_zero
1084 1.1 mrg
1085 1.1 mrg /* Shift a10 right. Any bits that are shifted out of a10 are
1086 1.1 mrg saved in a6 for rounding the result. */
1087 1.1 mrg sll a6, a10
1088 1.1 mrg srl a10, a10
1089 1.1 mrg
1090 1.1 mrg /* Set the exponent to zero. */
1091 1.1 mrg movi a8, 0
1092 1.1 mrg
1093 1.1 mrg /* Pack any nonzero remainder (in a2) into a6. */
1094 1.1 mrg beqz a2, 1f
1095 1.1 mrg movi a9, 1
1096 1.1 mrg or a6, a6, a9
1097 1.1 mrg
1098 1.1 mrg /* Round a10 based on the bits shifted out into a6. */
1099 1.1 mrg 1: bgez a6, .Ldiv_rounded
1100 1.1 mrg addi a10, a10, 1
1101 1.1 mrg slli a6, a6, 1
1102 1.1 mrg bnez a6, .Ldiv_rounded
1103 1.1 mrg srli a10, a10, 1
1104 1.1 mrg slli a10, a10, 1
1105 1.1 mrg j .Ldiv_rounded
1106 1.1 mrg
1107 1.1 mrg .Ldiv_flush_to_zero:
1108 1.1 mrg /* Return zero with the appropriate sign bit. */
1109 1.1 mrg srli a2, a7, 31
1110 1.1 mrg slli a2, a2, 31
1111 1.1 mrg leaf_return
1112 1.1 mrg
1113 1.1 mrg #endif /* L_divsf3 */
1114 1.1 mrg
1115 1.1 mrg #ifdef L_cmpsf2
1116 1.1 mrg
1117 1.1 mrg /* Equal and Not Equal */
1118 1.1 mrg
1119 1.1 mrg .align 4
1120 1.1 mrg .global __eqsf2
1121 1.1 mrg .global __nesf2
1122 1.1 mrg .set __nesf2, __eqsf2
1123 1.1 mrg .type __eqsf2, @function
1124 1.1 mrg __eqsf2:
1125 1.1 mrg leaf_entry sp, 16
1126 1.1 mrg bne a2, a3, 4f
1127 1.1 mrg
1128 1.1 mrg /* The values are equal but NaN != NaN. Check the exponent. */
1129 1.1 mrg movi a6, 0x7f800000
1130 1.1 mrg ball a2, a6, 3f
1131 1.1 mrg
1132 1.1 mrg /* Equal. */
1133 1.1 mrg movi a2, 0
1134 1.1 mrg leaf_return
1135 1.1 mrg
1136 1.1 mrg /* Not equal. */
1137 1.1 mrg 2: movi a2, 1
1138 1.1 mrg leaf_return
1139 1.1 mrg
1140 1.1 mrg /* Check if the mantissas are nonzero. */
1141 1.1 mrg 3: slli a7, a2, 9
1142 1.1 mrg j 5f
1143 1.1 mrg
1144 1.1 mrg /* Check if x and y are zero with different signs. */
1145 1.1 mrg 4: or a7, a2, a3
1146 1.1 mrg slli a7, a7, 1
1147 1.1 mrg
1148 1.1 mrg /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1149 1.1 mrg or x when exponent(x) = 0x7f8 and x == y. */
1150 1.1 mrg 5: movi a2, 0
1151 1.1 mrg movi a3, 1
1152 1.1 mrg movnez a2, a3, a7
1153 1.1 mrg leaf_return
1154 1.1 mrg
1155 1.1 mrg
1156 1.1 mrg /* Greater Than */
1157 1.1 mrg
1158 1.1 mrg .align 4
1159 1.1 mrg .global __gtsf2
1160 1.1 mrg .type __gtsf2, @function
1161 1.1 mrg __gtsf2:
1162 1.1 mrg leaf_entry sp, 16
1163 1.1 mrg movi a6, 0x7f800000
1164 1.1 mrg ball a2, a6, 2f
1165 1.1 mrg 1: bnall a3, a6, .Lle_cmp
1166 1.1 mrg
1167 1.1 mrg /* Check if y is a NaN. */
1168 1.1 mrg slli a7, a3, 9
1169 1.1 mrg beqz a7, .Lle_cmp
1170 1.1 mrg movi a2, 0
1171 1.1 mrg leaf_return
1172 1.1 mrg
1173 1.1 mrg /* Check if x is a NaN. */
1174 1.1 mrg 2: slli a7, a2, 9
1175 1.1 mrg beqz a7, 1b
1176 1.1 mrg movi a2, 0
1177 1.1 mrg leaf_return
1178 1.1 mrg
1179 1.1 mrg
1180 1.1 mrg /* Less Than or Equal */
1181 1.1 mrg
1182 1.1 mrg .align 4
1183 1.1 mrg .global __lesf2
1184 1.1 mrg .type __lesf2, @function
1185 1.1 mrg __lesf2:
1186 1.1 mrg leaf_entry sp, 16
1187 1.1 mrg movi a6, 0x7f800000
1188 1.1 mrg ball a2, a6, 2f
1189 1.1 mrg 1: bnall a3, a6, .Lle_cmp
1190 1.1 mrg
1191 1.1 mrg /* Check if y is a NaN. */
1192 1.1 mrg slli a7, a3, 9
1193 1.1 mrg beqz a7, .Lle_cmp
1194 1.1 mrg movi a2, 1
1195 1.1 mrg leaf_return
1196 1.1 mrg
1197 1.1 mrg /* Check if x is a NaN. */
1198 1.1 mrg 2: slli a7, a2, 9
1199 1.1 mrg beqz a7, 1b
1200 1.1 mrg movi a2, 1
1201 1.1 mrg leaf_return
1202 1.1 mrg
1203 1.1 mrg .Lle_cmp:
1204 1.1 mrg /* Check if x and y have different signs. */
1205 1.1 mrg xor a7, a2, a3
1206 1.1 mrg bltz a7, .Lle_diff_signs
1207 1.1 mrg
1208 1.1 mrg /* Check if x is negative. */
1209 1.1 mrg bltz a2, .Lle_xneg
1210 1.1 mrg
1211 1.1 mrg /* Check if x <= y. */
1212 1.1 mrg bltu a3, a2, 5f
1213 1.1 mrg 4: movi a2, 0
1214 1.1 mrg leaf_return
1215 1.1 mrg
1216 1.1 mrg .Lle_xneg:
1217 1.1 mrg /* Check if y <= x. */
1218 1.1 mrg bgeu a2, a3, 4b
1219 1.1 mrg 5: movi a2, 1
1220 1.1 mrg leaf_return
1221 1.1 mrg
1222 1.1 mrg .Lle_diff_signs:
1223 1.1 mrg bltz a2, 4b
1224 1.1 mrg
1225 1.1 mrg /* Check if both x and y are zero. */
1226 1.1 mrg or a7, a2, a3
1227 1.1 mrg slli a7, a7, 1
1228 1.1 mrg movi a2, 1
1229 1.1 mrg movi a3, 0
1230 1.1 mrg moveqz a2, a3, a7
1231 1.1 mrg leaf_return
1232 1.1 mrg
1233 1.1 mrg
1234 1.1 mrg /* Greater Than or Equal */
1235 1.1 mrg
1236 1.1 mrg .align 4
1237 1.1 mrg .global __gesf2
1238 1.1 mrg .type __gesf2, @function
1239 1.1 mrg __gesf2:
1240 1.1 mrg leaf_entry sp, 16
1241 1.1 mrg movi a6, 0x7f800000
1242 1.1 mrg ball a2, a6, 2f
1243 1.1 mrg 1: bnall a3, a6, .Llt_cmp
1244 1.1 mrg
1245 1.1 mrg /* Check if y is a NaN. */
1246 1.1 mrg slli a7, a3, 9
1247 1.1 mrg beqz a7, .Llt_cmp
1248 1.1 mrg movi a2, -1
1249 1.1 mrg leaf_return
1250 1.1 mrg
1251 1.1 mrg /* Check if x is a NaN. */
1252 1.1 mrg 2: slli a7, a2, 9
1253 1.1 mrg beqz a7, 1b
1254 1.1 mrg movi a2, -1
1255 1.1 mrg leaf_return
1256 1.1 mrg
1257 1.1 mrg
1258 1.1 mrg /* Less Than */
1259 1.1 mrg
1260 1.1 mrg .align 4
1261 1.1 mrg .global __ltsf2
1262 1.1 mrg .type __ltsf2, @function
1263 1.1 mrg __ltsf2:
1264 1.1 mrg leaf_entry sp, 16
1265 1.1 mrg movi a6, 0x7f800000
1266 1.1 mrg ball a2, a6, 2f
1267 1.1 mrg 1: bnall a3, a6, .Llt_cmp
1268 1.1 mrg
1269 1.1 mrg /* Check if y is a NaN. */
1270 1.1 mrg slli a7, a3, 9
1271 1.1 mrg beqz a7, .Llt_cmp
1272 1.1 mrg movi a2, 0
1273 1.1 mrg leaf_return
1274 1.1 mrg
1275 1.1 mrg /* Check if x is a NaN. */
1276 1.1 mrg 2: slli a7, a2, 9
1277 1.1 mrg beqz a7, 1b
1278 1.1 mrg movi a2, 0
1279 1.1 mrg leaf_return
1280 1.1 mrg
1281 1.1 mrg .Llt_cmp:
1282 1.1 mrg /* Check if x and y have different signs. */
1283 1.1 mrg xor a7, a2, a3
1284 1.1 mrg bltz a7, .Llt_diff_signs
1285 1.1 mrg
1286 1.1 mrg /* Check if x is negative. */
1287 1.1 mrg bltz a2, .Llt_xneg
1288 1.1 mrg
1289 1.1 mrg /* Check if x < y. */
1290 1.1 mrg bgeu a2, a3, 5f
1291 1.1 mrg 4: movi a2, -1
1292 1.1 mrg leaf_return
1293 1.1 mrg
1294 1.1 mrg .Llt_xneg:
1295 1.1 mrg /* Check if y < x. */
1296 1.1 mrg bltu a3, a2, 4b
1297 1.1 mrg 5: movi a2, 0
1298 1.1 mrg leaf_return
1299 1.1 mrg
1300 1.1 mrg .Llt_diff_signs:
1301 1.1 mrg bgez a2, 5b
1302 1.1 mrg
1303 1.1 mrg /* Check if both x and y are nonzero. */
1304 1.1 mrg or a7, a2, a3
1305 1.1 mrg slli a7, a7, 1
1306 1.1 mrg movi a2, 0
1307 1.1 mrg movi a3, -1
1308 1.1 mrg movnez a2, a3, a7
1309 1.1 mrg leaf_return
1310 1.1 mrg
1311 1.1 mrg
1312 1.1 mrg /* Unordered */
1313 1.1 mrg
1314 1.1 mrg .align 4
1315 1.1 mrg .global __unordsf2
1316 1.1 mrg .type __unordsf2, @function
1317 1.1 mrg __unordsf2:
1318 1.1 mrg leaf_entry sp, 16
1319 1.1 mrg movi a6, 0x7f800000
1320 1.1 mrg ball a2, a6, 3f
1321 1.1 mrg 1: ball a3, a6, 4f
1322 1.1 mrg 2: movi a2, 0
1323 1.1 mrg leaf_return
1324 1.1 mrg
1325 1.1 mrg 3: slli a7, a2, 9
1326 1.1 mrg beqz a7, 1b
1327 1.1 mrg movi a2, 1
1328 1.1 mrg leaf_return
1329 1.1 mrg
1330 1.1 mrg 4: slli a7, a3, 9
1331 1.1 mrg beqz a7, 2b
1332 1.1 mrg movi a2, 1
1333 1.1 mrg leaf_return
1334 1.1 mrg
1335 1.1 mrg #endif /* L_cmpsf2 */
1336 1.1 mrg
1337 1.1 mrg #ifdef L_fixsfsi
1338 1.1 mrg
1339 1.1 mrg .align 4
1340 1.1 mrg .global __fixsfsi
1341 1.1 mrg .type __fixsfsi, @function
1342 1.1 mrg __fixsfsi:
1343 1.1 mrg leaf_entry sp, 16
1344 1.1 mrg
1345 1.1 mrg /* Check for NaN and Infinity. */
1346 1.1 mrg movi a6, 0x7f800000
1347 1.1 mrg ball a2, a6, .Lfixsfsi_nan_or_inf
1348 1.1 mrg
1349 1.1 mrg /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1350 1.1 mrg extui a4, a2, 23, 8
1351 1.1 mrg addi a4, a4, -0x7e
1352 1.1 mrg bgei a4, 32, .Lfixsfsi_maxint
1353 1.1 mrg blti a4, 1, .Lfixsfsi_zero
1354 1.1 mrg
1355 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1356 1.1 mrg or a7, a2, a6
1357 1.1 mrg slli a5, a7, 8
1358 1.1 mrg
1359 1.1 mrg /* Shift back to the right, based on the exponent. */
1360 1.1 mrg ssl a4 /* shift by 32 - a4 */
1361 1.1 mrg srl a5, a5
1362 1.1 mrg
1363 1.1 mrg /* Negate the result if sign != 0. */
1364 1.1 mrg neg a2, a5
1365 1.1 mrg movgez a2, a5, a7
1366 1.1 mrg leaf_return
1367 1.1 mrg
1368 1.1 mrg .Lfixsfsi_nan_or_inf:
1369 1.1 mrg /* Handle Infinity and NaN. */
1370 1.1 mrg slli a4, a2, 9
1371 1.1 mrg beqz a4, .Lfixsfsi_maxint
1372 1.1 mrg
1373 1.1 mrg /* Translate NaN to +maxint. */
1374 1.1 mrg movi a2, 0
1375 1.1 mrg
1376 1.1 mrg .Lfixsfsi_maxint:
1377 1.1 mrg slli a4, a6, 8 /* 0x80000000 */
1378 1.1 mrg addi a5, a4, -1 /* 0x7fffffff */
1379 1.1 mrg movgez a4, a5, a2
1380 1.1 mrg mov a2, a4
1381 1.1 mrg leaf_return
1382 1.1 mrg
1383 1.1 mrg .Lfixsfsi_zero:
1384 1.1 mrg movi a2, 0
1385 1.1 mrg leaf_return
1386 1.1 mrg
1387 1.1 mrg #endif /* L_fixsfsi */
1388 1.1 mrg
1389 1.1 mrg #ifdef L_fixsfdi
1390 1.1 mrg
1391 1.1 mrg .align 4
1392 1.1 mrg .global __fixsfdi
1393 1.1 mrg .type __fixsfdi, @function
1394 1.1 mrg __fixsfdi:
1395 1.1 mrg leaf_entry sp, 16
1396 1.1 mrg
1397 1.1 mrg /* Check for NaN and Infinity. */
1398 1.1 mrg movi a6, 0x7f800000
1399 1.1 mrg ball a2, a6, .Lfixsfdi_nan_or_inf
1400 1.1 mrg
1401 1.1 mrg /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1402 1.1 mrg extui a4, a2, 23, 8
1403 1.1 mrg addi a4, a4, -0x7e
1404 1.1 mrg bgei a4, 64, .Lfixsfdi_maxint
1405 1.1 mrg blti a4, 1, .Lfixsfdi_zero
1406 1.1 mrg
1407 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1408 1.1 mrg or a7, a2, a6
1409 1.1 mrg slli xh, a7, 8
1410 1.1 mrg
1411 1.1 mrg /* Shift back to the right, based on the exponent. */
1412 1.1 mrg ssl a4 /* shift by 64 - a4 */
1413 1.1 mrg bgei a4, 32, .Lfixsfdi_smallshift
1414 1.1 mrg srl xl, xh
1415 1.1 mrg movi xh, 0
1416 1.1 mrg
1417 1.1 mrg .Lfixsfdi_shifted:
1418 1.1 mrg /* Negate the result if sign != 0. */
1419 1.1 mrg bgez a7, 1f
1420 1.1 mrg neg xl, xl
1421 1.1 mrg neg xh, xh
1422 1.1 mrg beqz xl, 1f
1423 1.1 mrg addi xh, xh, -1
1424 1.1 mrg 1: leaf_return
1425 1.1 mrg
1426 1.1 mrg .Lfixsfdi_smallshift:
1427 1.1 mrg movi xl, 0
1428 1.1 mrg sll xl, xh
1429 1.1 mrg srl xh, xh
1430 1.1 mrg j .Lfixsfdi_shifted
1431 1.1 mrg
1432 1.1 mrg .Lfixsfdi_nan_or_inf:
1433 1.1 mrg /* Handle Infinity and NaN. */
1434 1.1 mrg slli a4, a2, 9
1435 1.1 mrg beqz a4, .Lfixsfdi_maxint
1436 1.1 mrg
1437 1.1 mrg /* Translate NaN to +maxint. */
1438 1.1 mrg movi a2, 0
1439 1.1 mrg
1440 1.1 mrg .Lfixsfdi_maxint:
1441 1.1 mrg slli a7, a6, 8 /* 0x80000000 */
1442 1.1 mrg bgez a2, 1f
1443 1.1 mrg mov xh, a7
1444 1.1 mrg movi xl, 0
1445 1.1 mrg leaf_return
1446 1.1 mrg
1447 1.1 mrg 1: addi xh, a7, -1 /* 0x7fffffff */
1448 1.1 mrg movi xl, -1
1449 1.1 mrg leaf_return
1450 1.1 mrg
1451 1.1 mrg .Lfixsfdi_zero:
1452 1.1 mrg movi xh, 0
1453 1.1 mrg movi xl, 0
1454 1.1 mrg leaf_return
1455 1.1 mrg
1456 1.1 mrg #endif /* L_fixsfdi */
1457 1.1 mrg
1458 1.1 mrg #ifdef L_fixunssfsi
1459 1.1 mrg
1460 1.1 mrg .align 4
1461 1.1 mrg .global __fixunssfsi
1462 1.1 mrg .type __fixunssfsi, @function
1463 1.1 mrg __fixunssfsi:
1464 1.1 mrg leaf_entry sp, 16
1465 1.1 mrg
1466 1.1 mrg /* Check for NaN and Infinity. */
1467 1.1 mrg movi a6, 0x7f800000
1468 1.1 mrg ball a2, a6, .Lfixunssfsi_nan_or_inf
1469 1.1 mrg
1470 1.1 mrg /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1471 1.1 mrg extui a4, a2, 23, 8
1472 1.1 mrg addi a4, a4, -0x7f
1473 1.1 mrg bgei a4, 32, .Lfixunssfsi_maxint
1474 1.1 mrg bltz a4, .Lfixunssfsi_zero
1475 1.1 mrg
1476 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1477 1.1 mrg or a7, a2, a6
1478 1.1 mrg slli a5, a7, 8
1479 1.1 mrg
1480 1.1 mrg /* Shift back to the right, based on the exponent. */
1481 1.1 mrg addi a4, a4, 1
1482 1.1 mrg beqi a4, 32, .Lfixunssfsi_bigexp
1483 1.1 mrg ssl a4 /* shift by 32 - a4 */
1484 1.1 mrg srl a5, a5
1485 1.1 mrg
1486 1.1 mrg /* Negate the result if sign != 0. */
1487 1.1 mrg neg a2, a5
1488 1.1 mrg movgez a2, a5, a7
1489 1.1 mrg leaf_return
1490 1.1 mrg
1491 1.1 mrg .Lfixunssfsi_nan_or_inf:
1492 1.1 mrg /* Handle Infinity and NaN. */
1493 1.1 mrg slli a4, a2, 9
1494 1.1 mrg beqz a4, .Lfixunssfsi_maxint
1495 1.1 mrg
1496 1.1 mrg /* Translate NaN to 0xffffffff. */
1497 1.1 mrg movi a2, -1
1498 1.1 mrg leaf_return
1499 1.1 mrg
1500 1.1 mrg .Lfixunssfsi_maxint:
1501 1.1 mrg slli a4, a6, 8 /* 0x80000000 */
1502 1.1 mrg movi a5, -1 /* 0xffffffff */
1503 1.1 mrg movgez a4, a5, a2
1504 1.1 mrg mov a2, a4
1505 1.1 mrg leaf_return
1506 1.1 mrg
1507 1.1 mrg .Lfixunssfsi_zero:
1508 1.1 mrg movi a2, 0
1509 1.1 mrg leaf_return
1510 1.1 mrg
1511 1.1 mrg .Lfixunssfsi_bigexp:
1512 1.1 mrg /* Handle unsigned maximum exponent case. */
1513 1.1 mrg bltz a2, 1f
1514 1.1 mrg mov a2, a5 /* no shift needed */
1515 1.1 mrg leaf_return
1516 1.1 mrg
1517 1.1 mrg /* Return 0x80000000 if negative. */
1518 1.1 mrg 1: slli a2, a6, 8
1519 1.1 mrg leaf_return
1520 1.1 mrg
1521 1.1 mrg #endif /* L_fixunssfsi */
1522 1.1 mrg
1523 1.1 mrg #ifdef L_fixunssfdi
1524 1.1 mrg
1525 1.1 mrg .align 4
1526 1.1 mrg .global __fixunssfdi
1527 1.1 mrg .type __fixunssfdi, @function
1528 1.1 mrg __fixunssfdi:
1529 1.1 mrg leaf_entry sp, 16
1530 1.1 mrg
1531 1.1 mrg /* Check for NaN and Infinity. */
1532 1.1 mrg movi a6, 0x7f800000
1533 1.1 mrg ball a2, a6, .Lfixunssfdi_nan_or_inf
1534 1.1 mrg
1535 1.1 mrg /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1536 1.1 mrg extui a4, a2, 23, 8
1537 1.1 mrg addi a4, a4, -0x7f
1538 1.1 mrg bgei a4, 64, .Lfixunssfdi_maxint
1539 1.1 mrg bltz a4, .Lfixunssfdi_zero
1540 1.1 mrg
1541 1.1 mrg /* Add explicit "1.0" and shift << 8. */
1542 1.1 mrg or a7, a2, a6
1543 1.1 mrg slli xh, a7, 8
1544 1.1 mrg
1545 1.1 mrg /* Shift back to the right, based on the exponent. */
1546 1.1 mrg addi a4, a4, 1
1547 1.1 mrg beqi a4, 64, .Lfixunssfdi_bigexp
1548 1.1 mrg ssl a4 /* shift by 64 - a4 */
1549 1.1 mrg bgei a4, 32, .Lfixunssfdi_smallshift
1550 1.1 mrg srl xl, xh
1551 1.1 mrg movi xh, 0
1552 1.1 mrg
1553 1.1 mrg .Lfixunssfdi_shifted:
1554 1.1 mrg /* Negate the result if sign != 0. */
1555 1.1 mrg bgez a7, 1f
1556 1.1 mrg neg xl, xl
1557 1.1 mrg neg xh, xh
1558 1.1 mrg beqz xl, 1f
1559 1.1 mrg addi xh, xh, -1
1560 1.1 mrg 1: leaf_return
1561 1.1 mrg
1562 1.1 mrg .Lfixunssfdi_smallshift:
1563 1.1 mrg movi xl, 0
1564 1.1 mrg src xl, xh, xl
1565 1.1 mrg srl xh, xh
1566 1.1 mrg j .Lfixunssfdi_shifted
1567 1.1 mrg
1568 1.1 mrg .Lfixunssfdi_nan_or_inf:
1569 1.1 mrg /* Handle Infinity and NaN. */
1570 1.1 mrg slli a4, a2, 9
1571 1.1 mrg beqz a4, .Lfixunssfdi_maxint
1572 1.1 mrg
1573 1.1 mrg /* Translate NaN to 0xffffffff.... */
1574 1.1 mrg 1: movi xh, -1
1575 1.1 mrg movi xl, -1
1576 1.1 mrg leaf_return
1577 1.1 mrg
1578 1.1 mrg .Lfixunssfdi_maxint:
1579 1.1 mrg bgez a2, 1b
1580 1.1 mrg 2: slli xh, a6, 8 /* 0x80000000 */
1581 1.1 mrg movi xl, 0
1582 1.1 mrg leaf_return
1583 1.1 mrg
1584 1.1 mrg .Lfixunssfdi_zero:
1585 1.1 mrg movi xh, 0
1586 1.1 mrg movi xl, 0
1587 1.1 mrg leaf_return
1588 1.1 mrg
1589 1.1 mrg .Lfixunssfdi_bigexp:
1590 1.1 mrg /* Handle unsigned maximum exponent case. */
1591 1.1 mrg bltz a7, 2b
1592 1.1 mrg movi xl, 0
1593 1.1 mrg leaf_return /* no shift needed */
1594 1.1 mrg
1595 1.1 mrg #endif /* L_fixunssfdi */
1596 1.1 mrg
1597 1.1 mrg #ifdef L_floatsisf
1598 1.1 mrg
1599 1.1 mrg .align 4
1600 1.1 mrg .global __floatunsisf
1601 1.1 mrg .type __floatunsisf, @function
1602 1.1 mrg __floatunsisf:
1603 1.1 mrg leaf_entry sp, 16
1604 1.1 mrg beqz a2, .Lfloatsisf_return
1605 1.1 mrg
1606 1.1 mrg /* Set the sign to zero and jump to the floatsisf code. */
1607 1.1 mrg movi a7, 0
1608 1.1 mrg j .Lfloatsisf_normalize
1609 1.1 mrg
1610 1.1 mrg .align 4
1611 1.1 mrg .global __floatsisf
1612 1.1 mrg .type __floatsisf, @function
1613 1.1 mrg __floatsisf:
1614 1.1 mrg leaf_entry sp, 16
1615 1.1 mrg
1616 1.1 mrg /* Check for zero. */
1617 1.1 mrg beqz a2, .Lfloatsisf_return
1618 1.1 mrg
1619 1.1 mrg /* Save the sign. */
1620 1.1 mrg extui a7, a2, 31, 1
1621 1.1 mrg
1622 1.1 mrg /* Get the absolute value. */
1623 1.1 mrg #if XCHAL_HAVE_ABS
1624 1.1 mrg abs a2, a2
1625 1.1 mrg #else
1626 1.1 mrg neg a4, a2
1627 1.1 mrg movltz a2, a4, a2
1628 1.1 mrg #endif
1629 1.1 mrg
1630 1.1 mrg .Lfloatsisf_normalize:
1631 1.1 mrg /* Normalize with the first 1 bit in the msb. */
1632 1.1 mrg do_nsau a4, a2, a5, a6
1633 1.1 mrg ssl a4
1634 1.1 mrg sll a5, a2
1635 1.1 mrg
1636 1.1 mrg /* Shift the mantissa into position, with rounding bits in a6. */
1637 1.1 mrg srli a2, a5, 8
1638 1.1 mrg slli a6, a5, (32 - 8)
1639 1.1 mrg
1640 1.1 mrg /* Set the exponent. */
1641 1.1 mrg movi a5, 0x9d /* 0x7e + 31 */
1642 1.1 mrg sub a5, a5, a4
1643 1.1 mrg slli a5, a5, 23
1644 1.1 mrg add a2, a2, a5
1645 1.1 mrg
1646 1.1 mrg /* Add the sign. */
1647 1.1 mrg slli a7, a7, 31
1648 1.1 mrg or a2, a2, a7
1649 1.1 mrg
1650 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
1651 1.1 mrg bgez a6, .Lfloatsisf_return
1652 1.1 mrg addi a2, a2, 1 /* Overflow to the exponent is OK. */
1653 1.1 mrg
1654 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
1655 1.1 mrg slli a6, a6, 1
1656 1.1 mrg beqz a6, .Lfloatsisf_exactlyhalf
1657 1.1 mrg
1658 1.1 mrg .Lfloatsisf_return:
1659 1.1 mrg leaf_return
1660 1.1 mrg
1661 1.1 mrg .Lfloatsisf_exactlyhalf:
1662 1.1 mrg /* Round down to the nearest even value. */
1663 1.1 mrg srli a2, a2, 1
1664 1.1 mrg slli a2, a2, 1
1665 1.1 mrg leaf_return
1666 1.1 mrg
1667 1.1 mrg #endif /* L_floatsisf */
1668 1.1 mrg
1669 1.1 mrg #ifdef L_floatdisf
1670 1.1 mrg
1671 1.1 mrg .align 4
1672 1.1 mrg .global __floatundisf
1673 1.1 mrg .type __floatundisf, @function
1674 1.1 mrg __floatundisf:
1675 1.1 mrg leaf_entry sp, 16
1676 1.1 mrg
1677 1.1 mrg /* Check for zero. */
1678 1.1 mrg or a4, xh, xl
1679 1.1 mrg beqz a4, 2f
1680 1.1 mrg
1681 1.1 mrg /* Set the sign to zero and jump to the floatdisf code. */
1682 1.1 mrg movi a7, 0
1683 1.1 mrg j .Lfloatdisf_normalize
1684 1.1 mrg
1685 1.1 mrg .align 4
1686 1.1 mrg .global __floatdisf
1687 1.1 mrg .type __floatdisf, @function
1688 1.1 mrg __floatdisf:
1689 1.1 mrg leaf_entry sp, 16
1690 1.1 mrg
1691 1.1 mrg /* Check for zero. */
1692 1.1 mrg or a4, xh, xl
1693 1.1 mrg beqz a4, 2f
1694 1.1 mrg
1695 1.1 mrg /* Save the sign. */
1696 1.1 mrg extui a7, xh, 31, 1
1697 1.1 mrg
1698 1.1 mrg /* Get the absolute value. */
1699 1.1 mrg bgez xh, .Lfloatdisf_normalize
1700 1.1 mrg neg xl, xl
1701 1.1 mrg neg xh, xh
1702 1.1 mrg beqz xl, .Lfloatdisf_normalize
1703 1.1 mrg addi xh, xh, -1
1704 1.1 mrg
1705 1.1 mrg .Lfloatdisf_normalize:
1706 1.1 mrg /* Normalize with the first 1 bit in the msb of xh. */
1707 1.1 mrg beqz xh, .Lfloatdisf_bigshift
1708 1.1 mrg do_nsau a4, xh, a5, a6
1709 1.1 mrg ssl a4
1710 1.1 mrg src xh, xh, xl
1711 1.1 mrg sll xl, xl
1712 1.1 mrg
1713 1.1 mrg .Lfloatdisf_shifted:
1714 1.1 mrg /* Shift the mantissa into position, with rounding bits in a6. */
1715 1.1 mrg ssai 8
1716 1.1 mrg sll a5, xl
1717 1.1 mrg src a6, xh, xl
1718 1.1 mrg srl xh, xh
1719 1.1 mrg beqz a5, 1f
1720 1.1 mrg movi a5, 1
1721 1.1 mrg or a6, a6, a5
1722 1.1 mrg 1:
1723 1.1 mrg /* Set the exponent. */
1724 1.1 mrg movi a5, 0xbd /* 0x7e + 63 */
1725 1.1 mrg sub a5, a5, a4
1726 1.1 mrg slli a5, a5, 23
1727 1.1 mrg add a2, xh, a5
1728 1.1 mrg
1729 1.1 mrg /* Add the sign. */
1730 1.1 mrg slli a7, a7, 31
1731 1.1 mrg or a2, a2, a7
1732 1.1 mrg
1733 1.1 mrg /* Round up if the leftover fraction is >= 1/2. */
1734 1.1 mrg bgez a6, 2f
1735 1.1 mrg addi a2, a2, 1 /* Overflow to the exponent is OK. */
1736 1.1 mrg
1737 1.1 mrg /* Check if the leftover fraction is exactly 1/2. */
1738 1.1 mrg slli a6, a6, 1
1739 1.1 mrg beqz a6, .Lfloatdisf_exactlyhalf
1740 1.1 mrg 2: leaf_return
1741 1.1 mrg
1742 1.1 mrg .Lfloatdisf_bigshift:
1743 1.1 mrg /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1744 1.1 mrg do_nsau a4, xl, a5, a6
1745 1.1 mrg ssl a4
1746 1.1 mrg sll xh, xl
1747 1.1 mrg movi xl, 0
1748 1.1 mrg addi a4, a4, 32
1749 1.1 mrg j .Lfloatdisf_shifted
1750 1.1 mrg
1751 1.1 mrg .Lfloatdisf_exactlyhalf:
1752 1.1 mrg /* Round down to the nearest even value. */
1753 1.1 mrg srli a2, a2, 1
1754 1.1 mrg slli a2, a2, 1
1755 1.1 mrg leaf_return
1756 1.1 mrg
1757 1.1 mrg #endif /* L_floatdisf */
1758