Home | History | Annotate | Line # | Download | only in libf7
      1  1.1.1.3  mrg ;; Copyright (C) 2019-2022 Free Software Foundation, Inc.
      2      1.1  mrg ;;
      3      1.1  mrg ;; This file is part of LIBF7, which is part of GCC.
      4      1.1  mrg ;;
      5      1.1  mrg ;; GCC is free software; you can redistribute it and/or modify it under
      6      1.1  mrg ;; the terms of the GNU General Public License as published by the Free
      7      1.1  mrg ;; Software Foundation; either version 3, or (at your option) any later
      8      1.1  mrg ;; version.
      9      1.1  mrg ;;
     10      1.1  mrg ;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     11      1.1  mrg ;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
     12      1.1  mrg ;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     13      1.1  mrg ;; for more details.
     14      1.1  mrg ;;
     15      1.1  mrg ;; Under Section 7 of GPL version 3, you are granted additional
     16      1.1  mrg ;; permissions described in the GCC Runtime Library Exception, version
     17      1.1  mrg ;; 3.1, as published by the Free Software Foundation.
     18      1.1  mrg ;;
     19      1.1  mrg ;; You should have received a copy of the GNU General Public License and
     20      1.1  mrg ;; a copy of the GCC Runtime Library Exception along with this program;
     21      1.1  mrg ;; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22      1.1  mrg ;; <http://www.gnu.org/licenses/>.  */
     23      1.1  mrg 
     24      1.1  mrg #ifndef __AVR_TINY__
     25      1.1  mrg 
     26      1.1  mrg #define ASM_DEFS_HAVE_DEFUN
     27      1.1  mrg 
     28      1.1  mrg #include "asm-defs.h"
     29      1.1  mrg #include "libf7.h"
     30      1.1  mrg 
     31      1.1  mrg #define ZERO __zero_reg__
     32      1.1  mrg #define TMP  __tmp_reg__
     33      1.1  mrg 
     34      1.1  mrg #define F7(name)   F7_(name##_asm)
     35      1.1  mrg 
     36      1.1  mrg .macro F7call name
     37      1.1  mrg     .global F7(\name\())
     38      1.1  mrg     XCALL   F7(\name\())
     39      1.1  mrg .endm
     40      1.1  mrg 
     41      1.1  mrg .macro F7jmp name
     42      1.1  mrg     .global F7(\name\())
     43      1.1  mrg     XJMP    F7(\name\())
     44      1.1  mrg .endm
     45      1.1  mrg 
     46      1.1  mrg ;; Just for visibility in disassembly.
     47      1.1  mrg .macro LLL name
     48      1.1  mrg     .global LLL.\name
     49      1.1  mrg     LLL.\name:
     50      1.1  mrg     nop
     51      1.1  mrg .endm
     52      1.1  mrg 
     53      1.1  mrg .macro DEFUN name
     54      1.1  mrg     .section .text.libf7.asm.\name, "ax", @progbits
     55      1.1  mrg     .global F7(\name\())
     56      1.1  mrg     .func F7(\name\())
     57      1.1  mrg     F7(\name\()) :
     58      1.1  mrg .endm
     59      1.1  mrg 
     60      1.1  mrg .macro ENDF name
     61      1.1  mrg     .size F7(\name\()), . - F7(\name\())
     62      1.1  mrg     .endfunc
     63      1.1  mrg .endm
     64      1.1  mrg 
     65      1.1  mrg .macro LABEL name
     66      1.1  mrg     .global F7(\name\())
     67      1.1  mrg     F7(\name\()) :
     68      1.1  mrg .endm
     69      1.1  mrg 
     70      1.1  mrg .macro _DEFUN name
     71      1.1  mrg     .section .text.libf7.asm.\name, "ax", @progbits
     72      1.1  mrg     .weak \name
     73      1.1  mrg     .type \name, @function
     74      1.1  mrg     \name :
     75      1.1  mrg .endm
     76      1.1  mrg 
     77      1.1  mrg .macro _ENDF name
     78      1.1  mrg     .size \name, . - \name
     79      1.1  mrg .endm
     80      1.1  mrg 
     81      1.1  mrg .macro _LABEL name
     82      1.1  mrg     .weak \name
     83      1.1  mrg     .type \name, @function
     84      1.1  mrg     \name :
     85      1.1  mrg .endm
     86      1.1  mrg 
     87      1.1  mrg #define F7_NAME(X)   F7_(X)
     88      1.1  mrg 
     89      1.1  mrg ;; Make a weak alias.
     90      1.1  mrg .macro  ALIAS  sym
     91      1.1  mrg     .weak \sym
     92      1.1  mrg     .type \sym, @function
     93      1.1  mrg     \sym:
     94      1.1  mrg .endm
     95      1.1  mrg 
     96      1.1  mrg ;; Make a weak alias if double is 64 bits wide.
     97      1.1  mrg .macro  DALIAS  sym
     98      1.1  mrg #if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_DOUBLE__ == 8
     99      1.1  mrg ALIAS \sym
    100      1.1  mrg #endif
    101      1.1  mrg .endm
    102      1.1  mrg 
    103      1.1  mrg ;; Make a weak alias if long double is 64 bits wide.
    104      1.1  mrg .macro  LALIAS  sym
    105      1.1  mrg #if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_LONG_DOUBLE__ == 8
    106      1.1  mrg ALIAS \sym
    107      1.1  mrg #endif
    108      1.1  mrg .endm
    109      1.1  mrg 
    110      1.1  mrg #define     Off 1
    111      1.1  mrg #define     Expo (Off + F7_MANT_BYTES)
    112      1.1  mrg 
    113      1.1  mrg #ifdef F7MOD_classify_
    114      1.1  mrg ;;  r24 = classify (*Z)
    115      1.1  mrg ;;  NaN  ->  F7_FLAG_nan
    116      1.1  mrg ;;  INF  ->  F7_FLAG_inf [ | F7_FLAG_sign ]
    117      1.1  mrg ;;  ==0  ->  F7_FLAG_zero
    118      1.1  mrg ;;  ...  ->  0 [ | F7_FLAG_sign ]
    119      1.1  mrg 
    120      1.1  mrg ;; Clobbers:  None (no TMP, no T).
    121      1.1  mrg DEFUN classify
    122      1.1  mrg 
    123      1.1  mrg     ld      r24,    Z
    124      1.1  mrg     lsr     r24
    125      1.1  mrg     brne .Lnan_or_inf
    126      1.1  mrg 
    127      1.1  mrg     ldd     r24,    Z+6+Off
    128      1.1  mrg     tst     r24
    129      1.1  mrg     brpl 0f
    130      1.1  mrg     sbc     r24,    r24
    131      1.1  mrg     andi    r24,    F7_FLAG_sign
    132      1.1  mrg     ret
    133      1.1  mrg 
    134      1.1  mrg 0:  ldi     r24,    F7_FLAG_zero
    135      1.1  mrg     ret
    136      1.1  mrg 
    137      1.1  mrg .Lnan_or_inf:
    138      1.1  mrg     rol     r24
    139      1.1  mrg     ret
    140      1.1  mrg 
    141      1.1  mrg ENDF classify
    142      1.1  mrg #endif /* F7MOD_classify_ */
    143      1.1  mrg 
    144      1.1  mrg #ifdef F7MOD_clr_
    145      1.1  mrg DEFUN clr
    146      1.1  mrg     std     Z+0,     ZERO
    147      1.1  mrg     std     Z+0+Off, ZERO
    148      1.1  mrg     std     Z+1+Off, ZERO
    149      1.1  mrg     std     Z+2+Off, ZERO
    150      1.1  mrg     std     Z+3+Off, ZERO
    151      1.1  mrg     std     Z+4+Off, ZERO
    152      1.1  mrg     std     Z+5+Off, ZERO
    153      1.1  mrg     std     Z+6+Off, ZERO
    154      1.1  mrg     std     Z+0+Expo, ZERO
    155      1.1  mrg     std     Z+1+Expo, ZERO
    156      1.1  mrg     ret
    157      1.1  mrg ENDF clr
    158      1.1  mrg 
    159      1.1  mrg #endif /* F7MOD_clr_ */
    160      1.1  mrg 
    161      1.1  mrg #ifdef F7MOD_clz_
    162      1.1  mrg ;; The libcc CLZ implementations like __clzsi2 aka. __builtin_clzl are
    163      1.1  mrg ;; not very well suited for out purpose, so implement our own.
    164      1.1  mrg 
    165      1.1  mrg #define ZBITS   r26
    166      1.1  mrg .macro  .test.byte  reg
    167      1.1  mrg     or      ZERO,   \reg
    168      1.1  mrg     brne    .Loop_bit
    169      1.1  mrg     subi    ZBITS, -8
    170      1.1  mrg .endm
    171      1.1  mrg 
    172      1.1  mrg ;; R26 = CLZ (uint64_t R18);  CLZ (0) = 64.
    173      1.1  mrg ;; Unchanged: T
    174      1.1  mrg DEFUN clzdi2
    175      1.1  mrg     clr     ZBITS
    176      1.1  mrg     ;; Catch the common case of normalized .mant for speed-up.
    177      1.1  mrg     tst     r25
    178      1.1  mrg     brmi 9f
    179      1.1  mrg     .test.byte  r25
    180      1.1  mrg     .test.byte  r24
    181      1.1  mrg     .test.byte  r23
    182      1.1  mrg     .test.byte  r22
    183      1.1  mrg     .test.byte  r21
    184      1.1  mrg     .test.byte  r20
    185      1.1  mrg     .test.byte  r19
    186      1.1  mrg     .test.byte  r18
    187      1.1  mrg .Ldone:
    188      1.1  mrg     clr     ZERO
    189      1.1  mrg 9:  ret
    190      1.1  mrg 
    191      1.1  mrg .Loop_bit:
    192      1.1  mrg     lsl     ZERO
    193      1.1  mrg     brcs .Ldone
    194      1.1  mrg     inc     ZBITS
    195      1.1  mrg     rjmp .Loop_bit
    196      1.1  mrg 
    197      1.1  mrg ENDF clzdi2
    198      1.1  mrg #undef  ZBITS
    199      1.1  mrg #endif /* F7MOD_clz_ */
    200      1.1  mrg 
    201      1.1  mrg #ifdef F7MOD_cmp_mant_
    202      1.1  mrg DEFUN cmp_mant
    203      1.1  mrg 
    204      1.1  mrg     adiw    X,   6 + Off
    205      1.1  mrg     ld      r24, X      $ ldd   TMP, Z+6+Off    $ SUB   r24, TMP
    206      1.1  mrg     brne .Lunequal
    207      1.1  mrg 
    208      1.1  mrg     sbiw    X,  6
    209      1.1  mrg     ld      r24, X+     $ ldd   TMP, Z+0+Off    $ SUB   r24, TMP
    210      1.1  mrg     ld      r24, X+     $ ldd   TMP, Z+1+Off    $ sbc   r24, TMP
    211      1.1  mrg     ld      r24, X+     $ ldd   TMP, Z+2+Off    $ sbc   r24, TMP
    212      1.1  mrg     ld      r24, X+     $ ldd   TMP, Z+3+Off    $ sbc   r24, TMP
    213      1.1  mrg     ld      r24, X+     $ ldd   TMP, Z+4+Off    $ sbc   r24, TMP
    214      1.1  mrg     ld      r24, X+     $ ldd   TMP, Z+5+Off    $ sbc   r24, TMP
    215      1.1  mrg     ;; MSBs are already known to be equal
    216      1.1  mrg     breq 9f
    217      1.1  mrg .Lunequal:
    218      1.1  mrg     sbc     r24,    r24
    219      1.1  mrg     sbci    r24,    -1
    220      1.1  mrg 9:  sbiw    X,      6 + Off
    221      1.1  mrg     ret
    222      1.1  mrg ENDF cmp_mant
    223      1.1  mrg #endif /* F7MOD_cmp_mant_ */
    224      1.1  mrg 
    225      1.1  mrg #define     CA      18
    226      1.1  mrg #define     C0      CA+1
    227      1.1  mrg #define     C1      C0+1
    228      1.1  mrg #define     C2      C0+2
    229      1.1  mrg #define     C3      C0+3
    230      1.1  mrg #define     C4      C0+4
    231      1.1  mrg #define     C5      C0+5
    232      1.1  mrg #define     C6      C0+6
    233      1.1  mrg #define     Carry   r16
    234      1.1  mrg #define     Flags   18
    235      1.1  mrg 
    236      1.1  mrg #ifdef F7MOD_store_
    237      1.1  mrg ;; Z->flags = CA.
    238      1.1  mrg ;; Z->mant  = C[7].
    239      1.1  mrg DEFUN store_mant.with_flags
    240      1.1  mrg     st      Z,      CA
    241      1.1  mrg 
    242      1.1  mrg ;; Z->mant = C[7].
    243      1.1  mrg LABEL store_mant
    244      1.1  mrg     std     Z+0+Off, C0
    245      1.1  mrg     std     Z+1+Off, C1
    246      1.1  mrg     std     Z+2+Off, C2
    247      1.1  mrg     std     Z+3+Off, C3
    248      1.1  mrg     std     Z+4+Off, C4
    249      1.1  mrg     std     Z+5+Off, C5
    250      1.1  mrg     std     Z+6+Off, C6
    251      1.1  mrg     ret
    252      1.1  mrg ENDF store_mant.with_flags
    253      1.1  mrg #endif /* F7MOD_store_ */
    254      1.1  mrg 
    255      1.1  mrg #ifdef F7MOD_load_
    256      1.1  mrg ;; CA   = Z->flags
    257      1.1  mrg ;; C[7] = Z->mant
    258      1.1  mrg DEFUN load_mant.with_flags
    259      1.1  mrg     ld      CA,     Z
    260      1.1  mrg     skipnext
    261      1.1  mrg 
    262      1.1  mrg ;; CA   = 0
    263      1.1  mrg ;; C[7] = Z->mant
    264      1.1  mrg LABEL load_mant.clr_CA
    265      1.1  mrg LABEL load_mant.clr_flags
    266      1.1  mrg     clr     CA      ; May be skipped
    267      1.1  mrg 
    268      1.1  mrg ;; C[7] = Z->mant
    269      1.1  mrg LABEL load_mant
    270      1.1  mrg     ldd     C0,     Z+0+Off
    271      1.1  mrg     ldd     C1,     Z+1+Off
    272      1.1  mrg     ldd     C2,     Z+2+Off
    273      1.1  mrg     ldd     C3,     Z+3+Off
    274      1.1  mrg     ldd     C4,     Z+4+Off
    275      1.1  mrg     ldd     C5,     Z+5+Off
    276      1.1  mrg     ldd     C6,     Z+6+Off
    277      1.1  mrg     ret
    278      1.1  mrg ENDF load_mant.with_flags
    279      1.1  mrg #endif /* F7MOD_load_ */
    280      1.1  mrg 
    281      1.1  mrg #ifdef F7MOD_copy_
    282      1.1  mrg DEFUN copy
    283      1.1  mrg     cp      XL,     ZL
    284      1.1  mrg     cpc     XH,     ZH
    285      1.1  mrg     breq 9f
    286      1.1  mrg     adiw    XL,     10
    287      1.1  mrg     adiw    ZL,     10
    288      1.1  mrg     set
    289      1.1  mrg     bld     ZERO,   1
    290      1.1  mrg     bld     ZERO,   3   ; ZERO = 0b1010 = 10.
    291      1.1  mrg .Loop:
    292      1.1  mrg     ld      TMP,    -X
    293      1.1  mrg     st      -Z,     TMP
    294      1.1  mrg     dec     ZERO
    295      1.1  mrg     brne .Loop
    296      1.1  mrg 9:  ret
    297      1.1  mrg ENDF copy
    298      1.1  mrg #endif /* F7MOD_copy_ */
    299      1.1  mrg 
    300      1.1  mrg #ifdef F7MOD_copy_P_
    301      1.1  mrg DEFUN copy_P
    302      1.1  mrg     set
    303      1.1  mrg     bld     ZERO,   1
    304      1.1  mrg     bld     ZERO,   3   ; ZERO = 0b1010 = 10.
    305      1.1  mrg .Loop:
    306      1.1  mrg #ifdef __AVR_HAVE_LPMX__
    307      1.1  mrg     lpm     TMP,    Z+
    308      1.1  mrg #else
    309      1.1  mrg     lpm
    310      1.1  mrg     adiw    Z,      1
    311      1.1  mrg #endif /* Have LPMx */
    312      1.1  mrg     st      X+,     TMP
    313      1.1  mrg     dec     ZERO
    314      1.1  mrg     brne .Loop
    315      1.1  mrg     sbiw    X,      10
    316      1.1  mrg     sbiw    Z,      10
    317      1.1  mrg     ret
    318      1.1  mrg ENDF copy_P
    319      1.1  mrg #endif /* F7MOD_copy_P_ */
    320      1.1  mrg 
    321      1.1  mrg #ifdef F7MOD_copy_mant_
    322      1.1  mrg DEFUN copy_mant
    323      1.1  mrg     cp      XL,     ZL
    324      1.1  mrg     cpc     XH,     ZH
    325      1.1  mrg     breq 9f
    326      1.1  mrg     adiw    XL,     1
    327      1.1  mrg     adiw    ZL,     1
    328      1.1  mrg     set
    329      1.1  mrg     bld     ZERO,   3
    330      1.1  mrg     dec     ZERO        ; ZERO = 7
    331      1.1  mrg .Loop:
    332      1.1  mrg     ld      TMP,    X+
    333      1.1  mrg     st      Z+,     TMP
    334      1.1  mrg     dec     ZERO
    335      1.1  mrg     brne    .Loop
    336      1.1  mrg     sbiw    XL,     8
    337      1.1  mrg     sbiw    ZL,     8
    338      1.1  mrg 9:  ret
    339      1.1  mrg ENDF copy_mant
    340      1.1  mrg #endif /* F7MOD_copy_mant_ */
    341      1.1  mrg 
    342      1.1  mrg 
    343      1.1  mrg #ifdef F7MOD_clr_mant_lsbs_
    344      1.1  mrg DEFUN clr_mant_lsbs
    345      1.1  mrg     push    r16
    346      1.1  mrg     mov     r16,    r20
    347      1.1  mrg     wmov    XL,     r24
    348      1.1  mrg 
    349      1.1  mrg     wmov    ZL,     r22
    350      1.1  mrg     F7call  load_mant
    351      1.1  mrg 
    352      1.1  mrg     F7call  lshrdi3
    353      1.1  mrg 
    354      1.1  mrg     clr     CA
    355      1.1  mrg 
    356      1.1  mrg     F7call   ashldi3
    357      1.1  mrg 
    358      1.1  mrg     pop     r16
    359      1.1  mrg 
    360      1.1  mrg     wmov    ZL,     XL
    361      1.1  mrg     F7jmp  store_mant
    362      1.1  mrg 
    363      1.1  mrg ENDF clr_mant_lsbs
    364      1.1  mrg #endif /* F7MOD_clr_mant_lsbs_ */
    365      1.1  mrg 
    366      1.1  mrg 
    367      1.1  mrg #ifdef F7MOD_normalize_with_carry_
    368      1.1  mrg ;; Z = &f7_t
    369      1.1  mrg ;; C[] = .mant may be not normalized
    370      1.1  mrg ;; Carry === r16 = Addend to Z->expo in [-64, 128).
    371      1.1  mrg ;; Normalize C[], set Flags, and adjust Z->expo.
    372      1.1  mrg ;; Return CA (after normalization) in TMP.
    373      1.1  mrg ;; Unchanged: T
    374      1.1  mrg #define Addend  r17
    375      1.1  mrg #define Zbits   r26
    376      1.1  mrg #define expL    r26
    377      1.1  mrg #define expH    r27
    378      1.1  mrg DEFUN normalize_with_carry
    379      1.1  mrg     mov     Addend, Carry
    380      1.1  mrg     tst     C6
    381      1.1  mrg     brmi .Lshift.0
    382      1.1  mrg     ;; r26 = CLZ (uint64_t R18)
    383      1.1  mrg     F7call  clzdi2
    384      1.1  mrg     cpi     Zbits,  64
    385      1.1  mrg     breq .Lclr
    386      1.1  mrg     sub     Addend, Zbits
    387      1.1  mrg     mov     r16,    Zbits
    388      1.1  mrg 
    389      1.1  mrg     F7call  ashldi3
    390      1.1  mrg     ;; Assert (R25.7 == 1)
    391      1.1  mrg .Lshift.0:
    392      1.1  mrg     mov     TMP,    CA
    393      1.1  mrg     ld      Flags,  Z
    394      1.1  mrg 
    395      1.1  mrg     ;; .expo += Addend
    396      1.1  mrg     ldd     expL,   Z+0+Expo
    397      1.1  mrg     ldd     expH,   Z+1+Expo
    398      1.1  mrg     ;; Sign-extend Addend
    399      1.1  mrg     clr     r16
    400      1.1  mrg     sbrc    Addend, 7
    401      1.1  mrg     com     r16
    402      1.1  mrg 
    403      1.1  mrg     ;; exp += (int8_t) Addend, i.e. sign-extend Addend.
    404      1.1  mrg     add     expL,   Addend
    405      1.1  mrg     adc     expH,   r16
    406      1.1  mrg     brvc .Lnormal
    407      1.1  mrg     tst     r16
    408      1.1  mrg     brmi .Lclr
    409      1.1  mrg     ;; Overflow
    410      1.1  mrg #if F7_HAVE_Inf == 1
    411      1.1  mrg     ori     Flags,  F7_FLAG_inf
    412      1.1  mrg #else
    413      1.1  mrg     ldi     Flags,  F7_FLAG_nan
    414      1.1  mrg #endif /* Have Inf */
    415      1.1  mrg     ret
    416      1.1  mrg 
    417      1.1  mrg .Lnormal:
    418      1.1  mrg     std     Z+0+Expo,   expL
    419      1.1  mrg     std     Z+1+Expo,   expH
    420      1.1  mrg     ret
    421      1.1  mrg 
    422      1.1  mrg .Lclr:
    423      1.1  mrg     ;; Underflow or Zero.
    424      1.1  mrg     clr     TMP
    425      1.1  mrg     .global __clr_8
    426      1.1  mrg     XJMP    __clr_8
    427      1.1  mrg 
    428      1.1  mrg LABEL normalize.store_with_flags
    429      1.1  mrg     ;; no rounding
    430      1.1  mrg     set
    431      1.1  mrg     skipnext
    432      1.1  mrg LABEL normalize.round.store_with_flags
    433      1.1  mrg     ;; with rounding
    434      1.1  mrg     clt     ; skipped ?
    435      1.1  mrg LABEL normalize.maybe_round.store_with_flags
    436      1.1  mrg     F7call  normalize_with_carry
    437      1.1  mrg     ;; We have:
    438      1.1  mrg     ;; Z   = &f7_t
    439      1.1  mrg     ;; X   = .expo
    440      1.1  mrg     ;; C[] = .mant
    441      1.1  mrg     ;; R18 = .flags
    442      1.1  mrg     ;; TMP = byte below .mant after normalization
    443      1.1  mrg     ;; T = 1  =>  no rounding.
    444      1.1  mrg     brts .Lstore
    445      1.1  mrg     lsl     TMP
    446      1.1  mrg     adc     C0,     ZERO
    447      1.1  mrg     brcc .Lstore
    448      1.1  mrg     adc     C1,     ZERO
    449      1.1  mrg     adc     C2,     ZERO
    450      1.1  mrg     adc     C3,     ZERO
    451      1.1  mrg     adc     C4,     ZERO
    452      1.1  mrg     adc     C5,     ZERO
    453      1.1  mrg     adc     C6,     ZERO
    454      1.1  mrg     brcc .Lstore
    455      1.1  mrg     ;; We only come here if C6 overflowed, i.e. C[] is 0 now.
    456      1.1  mrg     ;; .mant = 1.0 by restoring the MSbit.
    457      1.1  mrg     ror     C6
    458      1.1  mrg     ;; .expo += 1 and override the .expo stored during normalize.
    459      1.1  mrg     adiw    expL,   1
    460      1.1  mrg     std     Z+0+Expo,   expL
    461      1.1  mrg     std     Z+1+Expo,   expH
    462      1.1  mrg 
    463      1.1  mrg .Lstore:
    464      1.1  mrg     F7call  store_mant.with_flags
    465      1.1  mrg 
    466      1.1  mrg     ;; Return the byte below .mant after normalization.
    467      1.1  mrg     ;; This is only useful without rounding; the caller will know.
    468      1.1  mrg     mov     R24,    TMP
    469      1.1  mrg     ret
    470      1.1  mrg ENDF normalize_with_carry
    471      1.1  mrg #endif /* F7MOD_normalize_with_carry_ */
    472      1.1  mrg 
    473      1.1  mrg 
    474      1.1  mrg #ifdef F7MOD_normalize_
    475      1.1  mrg ;; Using above functionality from C.
    476      1.1  mrg ;; f7_t* normalize (f7_t *cc)
    477      1.1  mrg ;; Adjusts cc->expo
    478      1.1  mrg ;; Clears cc->flags
    479      1.1  mrg DEFUN normalize
    480      1.1  mrg     push    r17
    481      1.1  mrg     push    r16
    482      1.1  mrg     wmov    ZL,     r24
    483      1.1  mrg     F7call  load_mant.clr_CA
    484      1.1  mrg     clr     Carry
    485      1.1  mrg     st      Z,      ZERO
    486      1.1  mrg     F7call  normalize.store_with_flags
    487      1.1  mrg     wmov    r24,    Z
    488      1.1  mrg     pop     r16
    489      1.1  mrg     pop     r17
    490      1.1  mrg     ret
    491      1.1  mrg ENDF normalize
    492      1.1  mrg #endif /* F7MOD_normalize_ */
    493      1.1  mrg 
    494      1.1  mrg 
    495      1.1  mrg #ifdef F7MOD_store_expo_
    496      1.1  mrg #define Done    r24
    497      1.1  mrg #define expLO   r24
    498      1.1  mrg #define expHI   r25
    499      1.1  mrg ;; expo == INT16_MAX  =>  *Z = Inf,         return Done = true.
    500      1.1  mrg ;; expo == INT16_MIN  =>  *Z = 0x0,         return Done = true.
    501      1.1  mrg ;; else               =>  Z->expo = expo,   return Done = false.
    502      1.1  mrg DEFUN store_expo
    503      1.1  mrg     cpi     expHI,   0x80
    504      1.1  mrg     cpc     expLO,  ZERO
    505      1.1  mrg     breq .Ltiny
    506      1.1  mrg     adiw    expLO,  1
    507      1.1  mrg     brvs .Lhuge
    508      1.1  mrg     sbiw    expLO,  1
    509      1.1  mrg     std     Z+0+Expo,   expLO
    510      1.1  mrg     std     Z+1+Expo,   expHI
    511      1.1  mrg     ldi     Done,   0
    512      1.1  mrg     ret
    513      1.1  mrg 
    514      1.1  mrg .Lhuge:
    515      1.1  mrg #if F7_HAVE_Inf == 1
    516      1.1  mrg     ld      Done,   Z
    517      1.1  mrg     andi    Done,   F7_FLAG_sign
    518      1.1  mrg     ori     Done,   F7_FLAG_inf
    519      1.1  mrg #else
    520      1.1  mrg     ldi     Done,   F7_FLAG_nan
    521      1.1  mrg #endif /* Have Inf */
    522      1.1  mrg     st      Z,      Done
    523      1.1  mrg     ldi     Done,   1
    524      1.1  mrg     ret
    525      1.1  mrg 
    526      1.1  mrg .Ltiny:
    527      1.1  mrg     ldi     Done,   1
    528      1.1  mrg     F7jmp   clr
    529      1.1  mrg ENDF store_expo
    530      1.1  mrg #endif /* F7MOD_store_expo_ */
    531      1.1  mrg 
    532      1.1  mrg 
    533      1.1  mrg #ifdef F7MOD_set_u64_
    534      1.1  mrg DEFUN set_s64
    535      1.1  mrg     set
    536      1.1  mrg     skipnext
    537      1.1  mrg     ;; ...
    538      1.1  mrg LABEL set_u64
    539      1.1  mrg     clt     ; Skipped?
    540      1.1  mrg     wmov    Zl,     r16
    541      1.1  mrg     ;; TMP holds .flags.
    542      1.1  mrg     clr     TMP
    543      1.1  mrg     brtc .Lnot.negative
    544      1.1  mrg 
    545      1.1  mrg     bst     C6,     7
    546      1.1  mrg     brtc .Lnot.negative
    547      1.1  mrg     bld     TMP,    F7_FLAGNO_sign
    548      1.1  mrg     .global __negdi2
    549      1.1  mrg     XCALL   __negdi2
    550      1.1  mrg 
    551      1.1  mrg .Lnot.negative:
    552      1.1  mrg     st      Z,          TMP
    553      1.1  mrg     std     Z+0+Expo,   ZERO
    554      1.1  mrg     std     Z+1+Expo,   ZERO
    555      1.1  mrg     ldi     Carry,      63
    556      1.1  mrg     F7call  normalize.round.store_with_flags
    557      1.1  mrg     wmov    r24,        Z
    558      1.1  mrg     wmov    r16,        Z   ; Unclobber r16.
    559      1.1  mrg     ret
    560      1.1  mrg ENDF set_s64
    561      1.1  mrg #endif /* F7MOD_set_u64_ */
    562      1.1  mrg 
    563      1.1  mrg 
    564      1.1  mrg #ifdef F7MOD_to_integer_
    565      1.1  mrg #define Mask    r26
    566      1.1  mrg DEFUN to_integer
    567      1.1  mrg     wmov    ZL,     r24
    568      1.1  mrg     mov     Mask,   r22
    569      1.1  mrg 
    570      1.1  mrg     F7call  load_mant.with_flags
    571      1.1  mrg 
    572      1.1  mrg     sbrc    Flags, F7_FLAGNO_nan
    573      1.1  mrg     rjmp .Lset_0x8000
    574      1.1  mrg 
    575      1.1  mrg     sbrc    Flags, F7_FLAGNO_inf
    576      1.1  mrg     rjmp .Lsaturate
    577      1.1  mrg 
    578      1.1  mrg     sbrs    C6, 7
    579      1.1  mrg     rjmp .Lset_0x0000
    580      1.1  mrg 
    581      1.1  mrg     bst     Flags, F7_FLAGNO_sign
    582      1.1  mrg     ldd     r27,    Z+0+Expo
    583      1.1  mrg     ;; Does .expo have bits outside Mask? ...
    584      1.1  mrg     mov     TMP,    Mask
    585      1.1  mrg     com     TMP
    586      1.1  mrg     and     TMP,    r27
    587      1.1  mrg     ldd     r27,    Z+1+Expo
    588      1.1  mrg     tst     r27
    589      1.1  mrg     brmi .Lset_0x0000       ; ...yes: .expo is < 0  =>  return 0
    590      1.1  mrg     or      TMP,    r27
    591      1.1  mrg     brne .Lsaturate.T       ; ...yes: .expo > Mask  =>  saturate
    592      1.1  mrg 
    593      1.1  mrg     ;; ...no:  Shift right to meet .expo = 0.
    594      1.1  mrg     PUSH    r16
    595      1.1  mrg     ldd     r16,    Z+0+Expo
    596      1.1  mrg     eor     r16,    Mask
    597      1.1  mrg     and     r16,    Mask
    598      1.1  mrg     clr     CA
    599      1.1  mrg     F7call  lshrdi3
    600      1.1  mrg     POP     r16
    601      1.1  mrg     tst     C6
    602      1.1  mrg     brmi    .Lsaturate.T    ;   > INTxx_MAX  =>  saturate
    603      1.1  mrg 
    604      1.1  mrg     brtc 9f                 ;   >= 0         =>  return
    605      1.1  mrg     sbrc    Mask,   5
    606      1.1  mrg     .global __negdi2
    607      1.1  mrg     XJMP    __negdi2
    608      1.1  mrg     sbrc    Mask,   4
    609      1.1  mrg     .global __negsi2
    610      1.1  mrg     XJMP    __negsi2
    611      1.1  mrg     neg     C6
    612      1.1  mrg     neg     C5
    613      1.1  mrg     sbci    C6,     0
    614      1.1  mrg 9:  ret
    615      1.1  mrg 
    616      1.1  mrg .Lsaturate:
    617      1.1  mrg     bst     Flags, F7_FLAGNO_sign
    618      1.1  mrg .Lsaturate.T:
    619      1.1  mrg 
    620      1.1  mrg #if F7_HAVE_Inf
    621      1.1  mrg     brtc .Lset_0x7fff
    622      1.1  mrg     ;; -Inf  =>  return 1 + INTxx_MIN
    623      1.1  mrg     mov     ZL,     Flags
    624      1.1  mrg     .global __clr_8
    625      1.1  mrg     XCALL   __clr_8
    626      1.1  mrg     ldi     C6,     0x80
    627      1.1  mrg 
    628      1.1  mrg     ldi     CA+0,   0x01
    629      1.1  mrg 
    630      1.1  mrg     sbrs    Mask,   5
    631      1.1  mrg     ldi     CA+4,   0x01
    632      1.1  mrg 
    633      1.1  mrg     sbrs    Mask,   4
    634      1.1  mrg     ldi     CA+6,   0x01
    635      1.1  mrg     ret
    636      1.1  mrg 
    637      1.1  mrg .Lset_0x7fff:
    638      1.1  mrg     ;; +Inf  =>  return INTxx_MAX
    639      1.1  mrg     sec
    640      1.1  mrg     .global __sbc_8
    641      1.1  mrg     XCALL   __sbc_8
    642      1.1  mrg     ldi     C6,     0x7f
    643      1.1  mrg     ret
    644      1.1  mrg #endif /* F7_HAVE_Inf */
    645      1.1  mrg 
    646      1.1  mrg .Lset_0x8000:
    647      1.1  mrg     ;; NaN  =>  return INTxx_MIN
    648      1.1  mrg     .global __clr_8
    649      1.1  mrg     XCALL   __clr_8
    650      1.1  mrg     ldi     C6,     0x80
    651      1.1  mrg     ret
    652      1.1  mrg 
    653      1.1  mrg .Lset_0x0000:
    654      1.1  mrg     ;; Small value  =>  return 0x0
    655      1.1  mrg     .global __clr_8
    656      1.1  mrg     XJMP    __clr_8
    657      1.1  mrg 
    658      1.1  mrg ENDF to_integer
    659      1.1  mrg #endif /* F7MOD_to_integer_ */
    660      1.1  mrg 
    661      1.1  mrg 
    662      1.1  mrg #ifdef F7MOD_to_unsigned_
    663      1.1  mrg #define Mask    r26
    664      1.1  mrg DEFUN to_unsigned
    665      1.1  mrg     wmov    ZL,     r24
    666      1.1  mrg     mov     Mask,   r22
    667      1.1  mrg 
    668      1.1  mrg     F7call  load_mant.with_flags
    669      1.1  mrg 
    670      1.1  mrg     sbrc    Flags, F7_FLAGNO_nan
    671      1.1  mrg     rjmp .Lset_0xffff
    672      1.1  mrg 
    673      1.1  mrg     sbrc    Flags, F7_FLAGNO_sign
    674      1.1  mrg     rjmp .Lset_0x0000
    675      1.1  mrg 
    676      1.1  mrg     sbrc    Flags, F7_FLAGNO_inf
    677      1.1  mrg     rjmp .Lset_0xffff
    678      1.1  mrg 
    679      1.1  mrg     sbrs    C6, 7
    680      1.1  mrg     rjmp .Lset_0x0000
    681      1.1  mrg 
    682      1.1  mrg     ldd     r27,    Z+0+Expo
    683      1.1  mrg     ;; Does .expo have bits outside Mask? ...
    684      1.1  mrg     mov     TMP,    Mask
    685      1.1  mrg     com     TMP
    686      1.1  mrg     and     TMP,    r27
    687      1.1  mrg     ldd     r27,    Z+1+Expo
    688      1.1  mrg     tst     r27
    689      1.1  mrg     brmi .Lset_0x0000       ; ...yes: .expo is < 0  =>  return 0
    690      1.1  mrg     or      TMP,    r27
    691      1.1  mrg     brne .Lset_0xffff       ; ...yes: .expo > Mask  =>  saturate
    692      1.1  mrg 
    693      1.1  mrg     ;; ...no:  Shift right to meet .expo = 0.
    694      1.1  mrg     PUSH    r16
    695      1.1  mrg     ldd     r16,    Z+0+Expo
    696      1.1  mrg     eor     r16,    Mask
    697      1.1  mrg     and     r16,    Mask
    698      1.1  mrg     clr     CA
    699      1.1  mrg     F7call  lshrdi3
    700      1.1  mrg     POP     r16
    701      1.1  mrg     ret
    702      1.1  mrg 
    703      1.1  mrg .Lset_0xffff:
    704      1.1  mrg     ;; return UINTxx_MAX
    705      1.1  mrg     sec
    706      1.1  mrg     .global __sbc_8
    707      1.1  mrg     XJMP    __sbc_8
    708      1.1  mrg 
    709      1.1  mrg .Lset_0x0000:
    710      1.1  mrg     ;; Small value  =>  return 0x0
    711      1.1  mrg     .global __clr_8
    712      1.1  mrg     XJMP    __clr_8
    713      1.1  mrg 
    714      1.1  mrg ENDF to_unsigned
    715      1.1  mrg #endif /* F7MOD_to_unsigned_ */
    716      1.1  mrg 
    717      1.1  mrg 
    718      1.1  mrg #ifdef F7MOD_addsub_mant_scaled_
    719      1.1  mrg ;; int8_t f7_addsub_mant_scaled_asm (f7_t *r24, const f7_t *r22, const f7_t 20*,
    720      1.1  mrg ;;                                   uint8_t r18);
    721      1.1  mrg ;; R18.0 = 1 : ADD
    722      1.1  mrg ;; R18.0 = 0 : SUB
    723      1.1  mrg ;; R18[7..1] : Scale
    724      1.1  mrg ;; Compute *R24 = *R22 + *R20 >> R18[7..1].
    725      1.1  mrg 
    726      1.1  mrg #define     BA      10
    727      1.1  mrg #define     B0      BA+1
    728      1.1  mrg #define     B1      B0+1
    729      1.1  mrg #define     B2      B0+2
    730      1.1  mrg #define     B3      B0+3
    731      1.1  mrg #define     B4      B0+4
    732      1.1  mrg #define     B5      B0+5
    733      1.1  mrg #define     B6      B0+6
    734      1.1  mrg 
    735      1.1  mrg DEFUN addsub_mant_scaled
    736      1.1  mrg     do_prologue_saves  10
    737      1.1  mrg 
    738      1.1  mrg     bst     r18,    0  ;; ADD ?
    739      1.1  mrg     lsr     r18
    740      1.1  mrg     mov     r16,    r18
    741      1.1  mrg 
    742      1.1  mrg     wmov    ZL,     r20
    743      1.1  mrg     wmov    YL,     r22
    744      1.1  mrg     ;; C[] = bb >> shift
    745      1.1  mrg     wmov    XL,     r24
    746      1.1  mrg 
    747      1.1  mrg     F7call  load_mant.clr_CA
    748      1.1  mrg     F7call  lshrdi3
    749      1.1  mrg 
    750      1.1  mrg     wmov    BA,     CA
    751      1.1  mrg     wmov    B1,     C1
    752      1.1  mrg     wmov    B3,     C3
    753      1.1  mrg     wmov    B5,     C5
    754      1.1  mrg     wmov    ZL,     YL
    755      1.1  mrg     F7call  load_mant.clr_CA
    756      1.1  mrg 
    757      1.1  mrg     wmov    ZL,     XL
    758      1.1  mrg 
    759      1.1  mrg     brts .Ladd
    760      1.1  mrg 
    761      1.1  mrg     .global __subdi3
    762      1.1  mrg     XCALL   __subdi3
    763      1.1  mrg 
    764      1.1  mrg     breq .Lzero
    765      1.1  mrg     brcc .Lround
    766      1.1  mrg     ;; C = 1: Can underflow happen at all ?
    767      1.1  mrg .Lzero:
    768      1.1  mrg     F7call  clr
    769      1.1  mrg     rjmp .Lepilogue
    770      1.1  mrg 
    771      1.1  mrg .Ladd:
    772      1.1  mrg     .global __adddi3
    773      1.1  mrg     XCALL   __adddi3
    774      1.1  mrg     brcc .Lround
    775      1.1  mrg     ldi     Carry,  1
    776      1.1  mrg     .global __lshrdi3
    777      1.1  mrg     XCALL   __lshrdi3
    778      1.1  mrg     ori     C6, 1 << 7
    779      1.1  mrg     skipnext
    780      1.1  mrg .Lround:
    781      1.1  mrg     clr     Carry   ; skipped?
    782      1.1  mrg     F7call  normalize.round.store_with_flags
    783      1.1  mrg 
    784      1.1  mrg .Lepilogue:
    785      1.1  mrg     do_epilogue_restores 10
    786      1.1  mrg 
    787      1.1  mrg ENDF addsub_mant_scaled
    788      1.1  mrg 
    789      1.1  mrg #if !defined (__AVR_HAVE_MOVW__) || !defined (__AVR_HAVE_JMP_CALL__)
    790      1.1  mrg DEFUN lshrdi3
    791      1.1  mrg     .global __lshrdi3
    792      1.1  mrg     XJMP    __lshrdi3
    793      1.1  mrg ENDF lshrdi3
    794      1.1  mrg DEFUN ashldi3
    795      1.1  mrg     .global __ashldi3
    796      1.1  mrg     XJMP    __ashldi3
    797      1.1  mrg ENDF ashldi3
    798      1.1  mrg #else
    799      1.1  mrg 
    800      1.1  mrg # Basically just a wrapper around libgcc's __lshrdi3.
    801      1.1  mrg DEFUN lshrdi3
    802      1.1  mrg     ;; Handle bit 5 of shift offset.
    803      1.1  mrg     sbrs    r16,    5
    804      1.1  mrg     rjmp 4f
    805      1.1  mrg     wmov    CA,     C3
    806      1.1  mrg     wmov    C1,     C5
    807      1.1  mrg     clr     C6          $   clr     C5  $   wmov    C3, C5
    808      1.1  mrg 4:
    809      1.1  mrg     ;; Handle bit 4 of shift offset.
    810      1.1  mrg     sbrs    r16,    4
    811      1.1  mrg     rjmp 3f
    812      1.1  mrg     wmov CA, C1
    813      1.1  mrg     wmov C1, C3
    814      1.1  mrg     wmov C3, C5
    815      1.1  mrg     clr     C6          $   clr     C5
    816      1.1  mrg 3:
    817      1.1  mrg     ;; Handle bits 3...0 of shift offset.
    818      1.1  mrg     push    r16
    819      1.1  mrg     andi    r16,    0xf
    820      1.1  mrg     breq 0f
    821      1.1  mrg 
    822      1.1  mrg     .global __lshrdi3
    823      1.1  mrg     XCALL   __lshrdi3
    824      1.1  mrg 0:
    825      1.1  mrg     pop     r16
    826      1.1  mrg     ret
    827      1.1  mrg ENDF lshrdi3
    828      1.1  mrg 
    829      1.1  mrg # Basically just a wrapper around libgcc's __ashldi3.
    830      1.1  mrg DEFUN ashldi3
    831      1.1  mrg     ;; Handle bit 5 of shift offset.
    832      1.1  mrg     sbrs    r16,    5
    833      1.1  mrg     rjmp 4f
    834      1.1  mrg     wmov    C5,     C1
    835      1.1  mrg     wmov    C3,     CA
    836      1.1  mrg     clr     C2          $   clr     C1  $   wmov    CA, C1
    837      1.1  mrg 4:
    838      1.1  mrg     ;; Handle bit 4 of shift offset.
    839      1.1  mrg     sbrs    r16,    4
    840      1.1  mrg     rjmp 3f
    841      1.1  mrg     wmov C5, C3
    842      1.1  mrg     wmov C3, C1
    843      1.1  mrg     wmov C1, CA
    844      1.1  mrg     clr     CA          $   clr     C0
    845      1.1  mrg 3:
    846      1.1  mrg     ;; Handle bits 3...0 of shift offset.
    847      1.1  mrg     push    r16
    848      1.1  mrg     andi    r16,    0xf
    849      1.1  mrg     breq 0f
    850      1.1  mrg 
    851      1.1  mrg     .global __ashldi3
    852      1.1  mrg     XCALL   __ashldi3
    853      1.1  mrg 0:
    854      1.1  mrg     pop     r16
    855      1.1  mrg     ret
    856      1.1  mrg ENDF ashldi3
    857      1.1  mrg #endif /* Small device */
    858      1.1  mrg 
    859      1.1  mrg #endif /* F7MOD_addsub_mant_scaled_ */
    860      1.1  mrg 
    861      1.1  mrg #if defined F7MOD_mul_mant_ && defined (__AVR_HAVE_MUL__)
    862      1.1  mrg     #define     A0      11
    863      1.1  mrg     #define     A1      A0+1
    864      1.1  mrg     #define     A2      A0+2
    865      1.1  mrg     #define     A3      A0+3
    866      1.1  mrg     #define     A4      A0+4
    867      1.1  mrg     #define     A5      A0+5
    868      1.1  mrg     #define     A6      A0+6
    869      1.1  mrg 
    870      1.1  mrg     #define     TT0     26
    871      1.1  mrg     #define     TT1     TT0+1
    872      1.1  mrg     #define     TT2     28
    873      1.1  mrg     #define     TT3     TT2+1
    874      1.1  mrg 
    875      1.1  mrg     #define     BB      10
    876      1.1  mrg 
    877      1.1  mrg ;; R18.0 = 1: No rounding.
    878      1.1  mrg 
    879      1.1  mrg DEFUN mul_mant
    880      1.1  mrg     do_prologue_saves 10
    881      1.1  mrg     bst     r18,    0
    882      1.1  mrg     push    r25
    883      1.1  mrg     push    r24
    884      1.1  mrg     movw    ZL,     r22
    885      1.1  mrg     LDD     A0,     Z+0+Off
    886      1.1  mrg     LDD     A1,     Z+1+Off
    887      1.1  mrg     LDD     A2,     Z+2+Off
    888      1.1  mrg     LDD     A3,     Z+3+Off
    889      1.1  mrg     LDD     A4,     Z+4+Off
    890      1.1  mrg     LDD     A5,     Z+5+Off
    891      1.1  mrg     LDD     A6,     Z+6+Off
    892      1.1  mrg     movw    ZL,     r20
    893      1.1  mrg 
    894      1.1  mrg     ;; 6 * 6 -> 6:5
    895      1.1  mrg     ;; 4 * 6 -> 4:3
    896      1.1  mrg     ;; 2 * 6 -> 2:1
    897      1.1  mrg     ;; 0 * 6 -> 0:a
    898      1.1  mrg     ldd     BB, Z+6+Off
    899      1.1  mrg     mul     A6, BB      $   movw    C5, r0
    900      1.1  mrg     mul     A4, BB      $   movw    C3, r0
    901      1.1  mrg     mul     A2, BB      $   movw    C1, r0
    902      1.1  mrg     mul     A0, BB      $   movw    CA, r0
    903      1.1  mrg 
    904      1.1  mrg     ;; 5 * 6 -> 5:4
    905      1.1  mrg     ;; 3 * 6 -> 3:2
    906      1.1  mrg     ;; 1 * 6 -> 1:0
    907      1.1  mrg     mul     A5, BB      $   movw    TT2, r0
    908      1.1  mrg     mul     A3, BB      $   movw    TT0, r0
    909      1.1  mrg     mul     A1, BB
    910      1.1  mrg     ADD     C0, r0      $   adc     C1, r1
    911      1.1  mrg     adc     C2, TT0     $   adc     C3, TT1
    912      1.1  mrg     adc     C4, TT2     $   adc     C5, TT3     $   clr ZERO
    913      1.1  mrg     adc     C6, ZERO
    914      1.1  mrg     ;; Done B6
    915      1.1  mrg 
    916      1.1  mrg     ;; 3 * 3 -> 0:a
    917      1.1  mrg     ;; 4 * 4 -> 2:1
    918      1.1  mrg     ;; 5 * 5 -> 4:3
    919      1.1  mrg     ldd     BB, Z+3+Off $   mul     A3, BB      $   movw    TT0, r0
    920      1.1  mrg     ldd     BB, Z+4+Off $   mul     A4, BB      $   movw    TT2, r0
    921      1.1  mrg     ldd     BB, Z+5+Off $   mul     A5, BB
    922      1.1  mrg 
    923      1.1  mrg     ADD     CA, TT0     $   adc     C0, TT1
    924      1.1  mrg     adc     C1, TT2     $   adc     C2, TT3
    925      1.1  mrg     adc     C3, r0      $   adc     C4, r1
    926      1.1  mrg     brcc .+2
    927      1.1  mrg     adiw    C5, 1
    928      1.1  mrg 
    929      1.1  mrg     ;; 6 * 5 -> 5:4
    930      1.1  mrg     ;; 4 * 5 -> 3:2
    931      1.1  mrg     ;; 2 * 5 -> 1:0
    932      1.1  mrg     ;; 0 * 5 -> a:-
    933      1.1  mrg     mul     A0, BB
    934      1.1  mrg     ;; A0 done
    935      1.1  mrg #define Atmp A0
    936      1.1  mrg 
    937      1.1  mrg     mov     Atmp, r1
    938      1.1  mrg     mul     A6, BB      $   movw    TT2, r0
    939      1.1  mrg     mul     A4, BB      $   movw    TT0, r0
    940      1.1  mrg     mul     A2, BB
    941      1.1  mrg 
    942      1.1  mrg     ADD     CA, Atmp
    943      1.1  mrg     adc     C0, r0      $   adc     C1, r1
    944      1.1  mrg     adc     C2, TT0     $   adc     C3, TT1
    945      1.1  mrg     adc     C4, TT2     $   adc     C5, TT3     $   clr ZERO
    946      1.1  mrg     adc     C6, ZERO
    947      1.1  mrg 
    948      1.1  mrg     ;; 1 * 5 -> 0:a
    949      1.1  mrg     ;; 3 * 5 -> 2:1
    950      1.1  mrg     ;; 6 * 4 -> 4:3
    951      1.1  mrg     mul     A1, BB      $   movw    TT0, r0
    952      1.1  mrg     mul     A3, BB      $   movw    TT2, r0
    953      1.1  mrg     ldd     BB, Z+4+Off
    954      1.1  mrg     mul     A6, BB
    955      1.1  mrg 
    956      1.1  mrg     ADD     CA, TT0     $   adc     C0, TT1
    957      1.1  mrg     adc     C1, TT2     $   adc     C2, TT3
    958      1.1  mrg     adc     C3, r0      $   adc     C4, r1      $   clr ZERO
    959      1.1  mrg     adc     C5, ZERO    $   adc     C6, ZERO
    960      1.1  mrg     ;; B5 done
    961      1.1  mrg 
    962      1.1  mrg     ;; 6 * 3 -> 3:2
    963      1.1  mrg     ;; 6 * 1 -> 1:0
    964      1.1  mrg     ;; 4 * 1 -> a:-
    965      1.1  mrg     mov     TT0, A6     $   ldd TMP,  Z+3+Off
    966      1.1  mrg     mov     BB,  A4     $   ldd Atmp, Z+1+Off
    967      1.1  mrg     rcall   .Lmul.help.3
    968      1.1  mrg 
    969      1.1  mrg     ;; 5 * 4 -> 3:2
    970      1.1  mrg     ;; 5 * 2 -> 1:0
    971      1.1  mrg     ;; 3 * 2 -> a:-
    972      1.1  mrg     mov     TT0, A5     $   ldd TMP,  Z+4+Off
    973      1.1  mrg     mov     BB,  A3     $   ldd Atmp, Z+2+Off
    974      1.1  mrg     rcall   .Lmul.help.3
    975      1.1  mrg 
    976      1.1  mrg     ;; 4 *   -> 3:2 (=0)
    977      1.1  mrg     ;; 4 * 3 -> 1:0
    978      1.1  mrg     ;; 2 * 3 -> a:-
    979      1.1  mrg     mov     TT0, A4     $   clr TMP
    980      1.1  mrg     mov     BB,  A2     $   ldd Atmp, Z+3+Off
    981      1.1  mrg     rcall   .Lmul.help.3
    982      1.1  mrg 
    983      1.1  mrg     ;; 3 * . -> 3:2 (=0)
    984      1.1  mrg     ;; 3 * 4 -> 1:0
    985      1.1  mrg     ;; 1 * 4 -> a:-
    986      1.1  mrg     mov     TT0, A3     $   clr TMP
    987      1.1  mrg     mov     BB,  A1     $   ldd Atmp, Z+4+Off
    988      1.1  mrg     rcall   .Lmul.help.3
    989      1.1  mrg 
    990      1.1  mrg     ;; . * ? -> 3:2 (=0)
    991      1.1  mrg     ;; . * 0 -> 1:0 (=0)
    992      1.1  mrg     ;; 5 * 0 -> a:-
    993      1.1  mrg     clr     TT0
    994      1.1  mrg     mov     BB,  A5     $   ldd Atmp, Z+0+Off
    995      1.1  mrg     rcall   .Lmul.help.3
    996      1.1  mrg 
    997      1.1  mrg     clr TT3  ;; Asserted by .Lmul.help.2
    998      1.1  mrg     ;; 6 * 2 -> 2:1
    999      1.1  mrg     ;; 6 * 0 -> 0:a
   1000      1.1  mrg                         $   ldd TMP,  Z+2+Off
   1001      1.1  mrg     mov     BB, A6     ;$   ldd Atmp, Z+0+Off
   1002      1.1  mrg     rcall   .Lmul.help.2
   1003      1.1  mrg 
   1004      1.1  mrg     ;; 5 * 3 -> 2:1
   1005      1.1  mrg     ;; 5 * 1 -> 0:a
   1006      1.1  mrg                         $   ldd TMP,  Z+3+Off
   1007      1.1  mrg     mov     BB, A5      $   ldd Atmp, Z+1+Off
   1008      1.1  mrg     rcall   .Lmul.help.2
   1009      1.1  mrg 
   1010      1.1  mrg     ;; 4 * . -> 2:1 (=0)
   1011      1.1  mrg     ;; 4 * 2 -> 0:a
   1012      1.1  mrg                         $   clr TMP
   1013      1.1  mrg     mov     BB, A4      $   ldd Atmp, Z+2+Off
   1014      1.1  mrg     rcall   .Lmul.help.2
   1015      1.1  mrg 
   1016      1.1  mrg     ;; 2 * . -> 2:1 (=0)
   1017      1.1  mrg     ;; 2 * 4 -> 0:a
   1018      1.1  mrg                         $   clr TMP
   1019      1.1  mrg     mov     BB, A2      $   ldd Atmp, Z+4+Off
   1020      1.1  mrg     rcall   .Lmul.help.2
   1021      1.1  mrg 
   1022      1.1  mrg     ;; Finally...
   1023      1.1  mrg 
   1024      1.1  mrg     pop     ZL
   1025      1.1  mrg     pop     ZH
   1026      1.1  mrg     ;; The high byte is at least 0x40 and at most 0xfe.
   1027      1.1  mrg     ;; The result has to be left-shifted by one in order to scale it
   1028      1.1  mrg     ;; correctly.
   1029      1.1  mrg 
   1030      1.1  mrg     ldi     Carry,  1
   1031      1.1  mrg     F7call  normalize.maybe_round.store_with_flags
   1032      1.1  mrg 
   1033      1.1  mrg     do_epilogue_restores 10
   1034      1.1  mrg 
   1035      1.1  mrg ;; TT0 * Tmp  -> 3:2
   1036      1.1  mrg ;; TT0 * Atmp -> 1:0
   1037      1.1  mrg ;; BB  * Atmp -> a:-
   1038      1.1  mrg ;;
   1039      1.1  mrg ;; Clobbers : TMP, TT0...TT3.
   1040      1.1  mrg ;; Sets     : ZERO = 0.
   1041      1.1  mrg .Lmul.help.3:
   1042      1.1  mrg     mul     TT0, TMP    $   movw    TT2, r0
   1043      1.1  mrg     mul     TT0, Atmp   $   movw    TT0, r0
   1044      1.1  mrg     mul     BB,  Atmp
   1045      1.1  mrg 
   1046      1.1  mrg     ADD     CA, r1
   1047      1.1  mrg     adc     C0, TT0     $   adc     C1, TT1
   1048      1.1  mrg     adc     C2, TT2
   1049      1.1  mrg .Lmul.help.3.C3:        $   adc     C3, TT3     $ clr ZERO
   1050      1.1  mrg     adc     C4, ZERO    $   adc     C5, ZERO
   1051      1.1  mrg     adc     C6, ZERO
   1052      1.1  mrg     ret
   1053      1.1  mrg 
   1054      1.1  mrg ;; BB * TMP  -> 2:1
   1055      1.1  mrg ;; BB * Atmp -> 0:a
   1056      1.1  mrg ;;
   1057      1.1  mrg ;; Asserts  : TT3 = 0
   1058      1.1  mrg ;; Clobbers : TMP, TT0, TT1.
   1059      1.1  mrg ;; Sets     : ZERO = 0.
   1060      1.1  mrg .Lmul.help.2:
   1061      1.1  mrg     mul     BB, TMP     $   movw    TT0, r0
   1062      1.1  mrg     mul     BB, Atmp
   1063      1.1  mrg     ADD     CA, r0      $   adc     C0, r1
   1064      1.1  mrg     adc     C1, TT0     $   adc     C2, TT1
   1065      1.1  mrg     rjmp .Lmul.help.3.C3
   1066      1.1  mrg 
   1067      1.1  mrg ENDF mul_mant
   1068      1.1  mrg #endif /* F7MOD_mul_mant_ && MUL */
   1069      1.1  mrg 
   1070      1.1  mrg 
   1071      1.1  mrg #if defined (F7MOD_div_)
   1072      1.1  mrg 
   1073      1.1  mrg ;; Dividend is C[]
   1074      1.1  mrg 
   1075      1.1  mrg ;; Divisor
   1076      1.1  mrg #define A0       9
   1077      1.1  mrg #define A1      10
   1078      1.1  mrg #define A2      11
   1079      1.1  mrg #define A3      12
   1080      1.1  mrg #define A4      13
   1081      1.1  mrg #define A5      14
   1082      1.1  mrg #define A6      15
   1083      1.1  mrg 
   1084      1.1  mrg ;; Quotient
   1085      1.1  mrg #define Q0      0       /* === TMP  */
   1086      1.1  mrg #define Q1      Q0+1    /* === ZERO */
   1087      1.1  mrg #define Q2      26
   1088      1.1  mrg #define Q3      Q2+1
   1089      1.1  mrg #define Q4      28
   1090      1.1  mrg #define Q5      Q4+1
   1091      1.1  mrg #define Q6      16
   1092      1.1  mrg #define Q7      Q6+1
   1093      1.1  mrg 
   1094      1.1  mrg #define Cnt     CA
   1095      1.1  mrg #define QBits   r8
   1096      1.1  mrg 
   1097      1.1  mrg DEFUN div
   1098      1.1  mrg     do_prologue_saves 12
   1099      1.1  mrg 
   1100      1.1  mrg     ;; Number of bits requested for the quotient.
   1101      1.1  mrg     ;; This is usually 2 + F7_MANT_BITS.
   1102      1.1  mrg     mov     QBits,  r20
   1103      1.1  mrg     wmov    ZL,     r22
   1104      1.1  mrg     LDD     A0,     Z+0+Off
   1105      1.1  mrg     LDD     A1,     Z+1+Off
   1106      1.1  mrg     LDD     A2,     Z+2+Off
   1107      1.1  mrg     LDD     A3,     Z+3+Off
   1108      1.1  mrg     LDD     A4,     Z+4+Off
   1109      1.1  mrg     LDD     A5,     Z+5+Off
   1110      1.1  mrg     LDD     A6,     Z+6+Off
   1111      1.1  mrg     wmov    ZL,     r24
   1112      1.1  mrg     F7call  load_mant
   1113      1.1  mrg 
   1114      1.1  mrg     ;; Clear quotient Q[].
   1115      1.1  mrg     clr     Q0      ; === TMP
   1116      1.1  mrg     ;clr    Q1      ; === ZERO
   1117      1.1  mrg     wmov    Q2,     Q0
   1118      1.1  mrg     wmov    Q4,     Q0
   1119      1.1  mrg     wmov    Q6,     Q0
   1120      1.1  mrg 
   1121      1.1  mrg     ;; C[] and A[] are valid mantissae, i.e. their MSBit is set.  Therefore,
   1122      1.1  mrg     ;; quotient Q[] will be in  [0x0.ff..., 0x0.40...]  and to adjust Q[] we
   1123      1.1  mrg     ;; need at most 1 left-shift.  Compute F7_MANT_BITS + 2 bits of the
   1124      1.1  mrg     ;; quotient:  One bit is used for rounding, and one bit might be consumed
   1125      1.1  mrg     ;; by the mentioned left-shift.
   1126      1.1  mrg     mov     Cnt,    QBits
   1127      1.1  mrg     rjmp .Loop_start
   1128      1.1  mrg 
   1129      1.1  mrg .Loop:
   1130      1.1  mrg     ;; Shift dividend.
   1131      1.1  mrg     LSL     C0
   1132      1.1  mrg     rol     C1
   1133      1.1  mrg     rol     C2
   1134      1.1  mrg     rol     C3
   1135      1.1  mrg     rol     C4
   1136      1.1  mrg     rol     C5
   1137      1.1  mrg     rol     C6
   1138      1.1  mrg     brcs .Lfits
   1139      1.1  mrg     ;; Compare dividend against divisor.
   1140      1.1  mrg .Loop_start:
   1141      1.1  mrg     CP      C0,     A0
   1142      1.1  mrg     cpc     C1,     A1
   1143      1.1  mrg     cpc     C2,     A2
   1144      1.1  mrg     cpc     C3,     A3
   1145      1.1  mrg     cpc     C4,     A4
   1146      1.1  mrg     cpc     C5,     A5
   1147      1.1  mrg     cpc     C6,     A6
   1148      1.1  mrg     ;; Shift 0 into quotient.
   1149      1.1  mrg     brlo 1f
   1150      1.1  mrg .Lfits:
   1151      1.1  mrg     ;; Divisor fits into dividend.
   1152      1.1  mrg     SUB     C0,     A0
   1153      1.1  mrg     sbc     C1,     A1
   1154      1.1  mrg     sbc     C2,     A2
   1155      1.1  mrg     sbc     C3,     A3
   1156      1.1  mrg     sbc     C4,     A4
   1157      1.1  mrg     sbc     C5,     A5
   1158      1.1  mrg     sbc     C6,     A6
   1159      1.1  mrg     ;; Shift 1 into quotient.
   1160      1.1  mrg     sec
   1161      1.1  mrg     rol     Q0
   1162      1.1  mrg     skipnext
   1163      1.1  mrg 1:  lsl     Q0
   1164      1.1  mrg     rol     Q1
   1165      1.1  mrg     rol     Q2
   1166      1.1  mrg     rol     Q3
   1167      1.1  mrg     rol     Q4
   1168      1.1  mrg     rol     Q5
   1169      1.1  mrg     rol     Q6
   1170      1.1  mrg     rol     Q7
   1171      1.1  mrg     dec     Cnt
   1172      1.1  mrg     brne .Loop
   1173      1.1  mrg 
   1174      1.1  mrg     wmov    CA,     Q0
   1175      1.1  mrg     wmov    C1,     Q2
   1176      1.1  mrg     wmov    C3,     Q4
   1177      1.1  mrg     wmov    C5,     Q6
   1178      1.1  mrg     clr     ZERO
   1179      1.1  mrg 
   1180      1.1  mrg     ldi     Carry,  64
   1181      1.1  mrg     sub     Carry,  QBits
   1182      1.1  mrg     F7call  normalize.round.store_with_flags
   1183      1.1  mrg 
   1184      1.1  mrg     do_epilogue_restores 12
   1185      1.1  mrg ENDF div
   1186      1.1  mrg 
   1187      1.1  mrg #endif /* F7MOD_div_ */
   1188      1.1  mrg 
   1189      1.1  mrg 
   1190      1.1  mrg #if defined (F7MOD_sqrt16_) && defined (__AVR_HAVE_MUL__)
   1191      1.1  mrg 
   1192      1.1  mrg #define     Mask    C6
   1193      1.1  mrg #define     Q0      C3      /*  = R22  */
   1194      1.1  mrg #define     Q1      C4      /*  = R23  */
   1195      1.1  mrg 
   1196      1.1  mrg ;; uint16_t R24 = sqrt16_XXX (uint16_t R24);
   1197      1.1  mrg ;; Clobbers:   R22, R23, TMP.
   1198      1.1  mrg ;;
   1199      1.1  mrg ;; XXX = floor:  Return integral part of square-root of R25:R24 with R25 = 0.
   1200      1.1  mrg ;;               Error is in [0, -1 LSB).
   1201      1.1  mrg ;; XXX = round:  Return quare-root of R25:R24 rounded to nearest integer.
   1202      1.1  mrg ;;               R25 = (Q[] >= 65281) = (Q > 0xff00),  i.e. if Q[] is not
   1203      1.1  mrg ;;               bigger than 0xff00, then the result fits in 8 bits.
   1204      1.1  mrg ;;               Return C = 0 if the result is the same as for XXX = floor,
   1205      1.1  mrg ;;               error in [0, -1/2 LSB)
   1206      1.1  mrg ;;               Return C = 1 if the result is one higher than for XXX = floor,
   1207      1.1  mrg ;;               error in [1/2 LSB, 0).
   1208      1.1  mrg DEFUN sqrt16_round
   1209      1.1  mrg     set
   1210      1.1  mrg     skipnext
   1211      1.1  mrg     ;; ...
   1212      1.1  mrg LABEL sqrt16_floor
   1213      1.1  mrg     clt ; Skipped?
   1214      1.1  mrg     movw    Q0,     r24
   1215      1.1  mrg     clr     C5
   1216      1.1  mrg     ldi     Mask,   1 << 7
   1217      1.1  mrg 
   1218      1.1  mrg .Loop_mask:
   1219      1.1  mrg     add     C5,     Mask
   1220      1.1  mrg     mul     C5,     C5
   1221      1.1  mrg     cp      Q0,     R0
   1222      1.1  mrg     cpc     Q1,     R1
   1223      1.1  mrg     brsh 1f
   1224      1.1  mrg     sub     C5,     Mask
   1225      1.1  mrg 1:  lsr     Mask
   1226      1.1  mrg     brne .Loop_mask
   1227      1.1  mrg 
   1228      1.1  mrg     brtc .Ldone             ; No rounding  =>  C6 will be 0.
   1229      1.1  mrg 
   1230      1.1  mrg     ;; Rounding:  (X + 1/2)^2  =  X^2 + X + 1/4,  thus probing
   1231      1.1  mrg     ;; for bit -1 is testing Q[] against  C5^2 + C5.
   1232      1.1  mrg     mul     C5,     C5
   1233      1.1  mrg     add     R0,     C5
   1234      1.1  mrg     adc     R1,     C6      ; Exploit C6 === Mask = 0.
   1235      1.1  mrg     cp      R0,     Q0
   1236      1.1  mrg     cpc     R1,     Q1
   1237      1.1  mrg     brcc .Ldone
   1238      1.1  mrg     ;; If  C5^2 + C5 + 1/4  fits into Q[], then round up and C = 1.
   1239      1.1  mrg     adiw    C5,     1       ; Exploit C6 === Mask = 0.
   1240      1.1  mrg     sec
   1241      1.1  mrg 
   1242      1.1  mrg .Ldone:
   1243      1.1  mrg     clr     __zero_reg__
   1244      1.1  mrg     ret
   1245      1.1  mrg ENDF sqrt16_round
   1246      1.1  mrg #undef Mask
   1247      1.1  mrg #undef Q0
   1248      1.1  mrg #undef Q1
   1249      1.1  mrg #endif /* F7MOD_sqrt16_ && MUL */
   1250      1.1  mrg 
   1251      1.1  mrg #ifdef F7MOD_sqrt_approx_
   1252      1.1  mrg DEFUN sqrt_approx
   1253      1.1  mrg     push    r17
   1254      1.1  mrg     push    r16
   1255      1.1  mrg     wmov    XL,     r24
   1256      1.1  mrg     wmov    ZL,     r22
   1257      1.1  mrg 
   1258      1.1  mrg     ;; C[] = 0.
   1259      1.1  mrg     .global __clr_8
   1260      1.1  mrg     XCALL   __clr_8
   1261      1.1  mrg 
   1262      1.1  mrg     ldd     C5,     Z+5+Off
   1263      1.1  mrg     ldd     C6,     Z+6+Off
   1264      1.1  mrg 
   1265      1.1  mrg     ldd     Carry,  Z+0+Expo
   1266      1.1  mrg     ldd     TMP,    Z+1+Expo
   1267      1.1  mrg     wmov    ZL,     XL
   1268      1.1  mrg 
   1269      1.1  mrg     st      Z,      ZERO
   1270      1.1  mrg 
   1271      1.1  mrg     asr     TMP
   1272      1.1  mrg     ror     Carry
   1273      1.1  mrg     std     Z+1+Expo,   TMP
   1274      1.1  mrg     std     Z+0+Expo,   Carry
   1275      1.1  mrg 
   1276      1.1  mrg     ;; Re-interpreting our Q-format 1.xx mantissa as Q2.yy, we have to shift
   1277      1.1  mrg     ;; the mantissa to the right by 1.  As we need an even exponent, multiply
   1278      1.1  mrg     ;; the mantissa by 2 for odd exponents, i.e. only right-shift if .expo
   1279      1.1  mrg     ;; is even.
   1280      1.1  mrg 
   1281      1.1  mrg     brcs 1f
   1282      1.1  mrg     lsr     C6
   1283      1.1  mrg     ror     C5
   1284      1.1  mrg 
   1285      1.1  mrg 1:
   1286      1.1  mrg     F7call  sqrt16_round
   1287      1.1  mrg 
   1288      1.1  mrg     ;; sqrt16_round() returns:   C = 0:  error in [0, -1/2 LSB).
   1289      1.1  mrg     ;;                           C = 1:  error in [1/2 LSB, 0)
   1290      1.1  mrg 
   1291      1.1  mrg     brcc 2f
   1292      1.1  mrg     ;; Undo the round-up from sqrt16_round(); this will transform to
   1293      1.1  mrg     ;; error in [-1/2 LSB, -1 LSB).
   1294      1.1  mrg     sbiw    C5,     1
   1295      1.1  mrg     ;; Together with the correct bit C4.7, the error is in  [0, -1/2 LSB).
   1296      1.1  mrg     ori     C4,     1 << 7
   1297      1.1  mrg 
   1298      1.1  mrg 2:  ;; Setting C4.6 adds 1/4 LSB and the error is now in [1/4 LSB, -1/4 LSB)
   1299      1.1  mrg     ;; in either case.
   1300      1.1  mrg     ori     C4,     1 << 6
   1301      1.1  mrg 
   1302      1.1  mrg     ;; ????????????
   1303      1.1  mrg     ;; sqrt16_round() runs on integers which means that it computes the
   1304      1.1  mrg     ;; square root of  mant * 2^14  if we regard  mant as Q-format 2.yy,
   1305      1.1  mrg     ;; i.e. 2 integral bits.  The result is  sqrt(mant) * 2^7,
   1306      1.1  mrg     ;; and in order to get the same scaling like the input, .expo has to
   1307      1.1  mrg     ;; be adjusted by 7. ???????????????
   1308      1.1  mrg 
   1309      1.1  mrg     ldi     Carry,  8
   1310      1.1  mrg     F7call  normalize.store_with_flags
   1311      1.1  mrg 
   1312      1.1  mrg     pop     r16
   1313      1.1  mrg     pop     r17
   1314      1.1  mrg     ret
   1315      1.1  mrg 
   1316      1.1  mrg ENDF sqrt_approx
   1317      1.1  mrg #endif /* F7MOD_sqrt_approx_ */
   1318      1.1  mrg 
   1319      1.1  mrg 
   1320      1.1  mrg #undef CA
   1321      1.1  mrg #undef C0
   1322      1.1  mrg #undef C1
   1323      1.1  mrg #undef C2
   1324      1.1  mrg #undef C3
   1325      1.1  mrg #undef C4
   1326      1.1  mrg #undef C5
   1327      1.1  mrg #undef C6
   1328      1.1  mrg #undef Carry
   1329      1.1  mrg 
   1330      1.1  mrg 
   1331      1.1  mrg #ifdef F7MOD_D_fabs_
   1332      1.1  mrg _DEFUN __fabs
   1333      1.1  mrg     DALIAS fabs
   1334      1.1  mrg     LALIAS fabsl
   1335      1.1  mrg     andi    R25,    0b01111111
   1336      1.1  mrg     ret
   1337      1.1  mrg _ENDF __fabs
   1338      1.1  mrg #endif /* F7MOD_D_fabs_ */
   1339      1.1  mrg 
   1340      1.1  mrg 
   1341      1.1  mrg #ifdef F7MOD_D_neg_
   1342      1.1  mrg _DEFUN __neg
   1343      1.1  mrg _LABEL __negdf2
   1344      1.1  mrg     subi    R25,    0b10000000
   1345      1.1  mrg     ret
   1346      1.1  mrg _ENDF __neg
   1347      1.1  mrg #endif /* F7MOD_D_neg_ */
   1348      1.1  mrg 
   1349      1.1  mrg 
   1350      1.1  mrg #ifdef F7MOD_D_signbit_
   1351      1.1  mrg _DEFUN __signbit
   1352      1.1  mrg     DALIAS signbit
   1353      1.1  mrg     LALIAS signbitl
   1354      1.1  mrg     bst     R25,    7
   1355      1.1  mrg     clr     R25
   1356      1.1  mrg     clr     R24
   1357      1.1  mrg     bld     R24,    0
   1358      1.1  mrg     ret
   1359      1.1  mrg _ENDF __signbit
   1360      1.1  mrg #endif /* F7MOD_D_signbit_ */
   1361      1.1  mrg 
   1362      1.1  mrg 
   1363      1.1  mrg #ifdef F7MOD_D_copysign_
   1364      1.1  mrg _DEFUN __copysign
   1365      1.1  mrg     DALIAS copysign
   1366      1.1  mrg     LALIAS copysignl
   1367      1.1  mrg     bst     R17,    7
   1368      1.1  mrg     bld     R25,    7
   1369      1.1  mrg     ret
   1370      1.1  mrg _ENDF __copysign
   1371      1.1  mrg #endif /* F7MOD_D_copysign_ */
   1372      1.1  mrg 
   1373      1.1  mrg 
   1374      1.1  mrg #ifdef F7MOD_D_isinf_
   1375      1.1  mrg _DEFUN __isinf
   1376      1.1  mrg     DALIAS isinf
   1377      1.1  mrg     LALIAS isinfl
   1378      1.1  mrg     F7call  class_D
   1379      1.1  mrg     ;; Inf: T = Z = 1.
   1380      1.1  mrg     brtc 0f
   1381      1.1  mrg     ldi     R24,    1
   1382      1.1  mrg     breq 1f
   1383      1.1  mrg 0:
   1384      1.1  mrg     clr     R24
   1385      1.1  mrg 1:
   1386      1.1  mrg     clr     R25
   1387      1.1  mrg     ret
   1388      1.1  mrg _ENDF __isinf
   1389      1.1  mrg #endif /* F7MOD_D_isinf_ */
   1390      1.1  mrg 
   1391      1.1  mrg 
   1392      1.1  mrg #ifdef F7MOD_D_isnan_
   1393      1.1  mrg _DEFUN __isnan
   1394      1.1  mrg     DALIAS isnan
   1395      1.1  mrg     LALIAS isnanl
   1396      1.1  mrg     F7call  class_D
   1397      1.1  mrg     ;; NaN: T = 1, Z = 0.
   1398      1.1  mrg     brtc 0f
   1399      1.1  mrg     ldi     R24,    1
   1400      1.1  mrg     brne 1f
   1401      1.1  mrg 0:
   1402      1.1  mrg     clr     R24
   1403      1.1  mrg 1:
   1404      1.1  mrg     clr     R25
   1405      1.1  mrg     ret
   1406      1.1  mrg _ENDF __isnan
   1407      1.1  mrg #endif /* F7MOD_D_isnan_ */
   1408      1.1  mrg 
   1409      1.1  mrg 
   1410      1.1  mrg #ifdef F7MOD_D_isfinite_
   1411      1.1  mrg _DEFUN __isfinite
   1412      1.1  mrg     DALIAS isfinite
   1413      1.1  mrg     LALIAS isfinitel
   1414      1.1  mrg     F7call  class_D
   1415      1.1  mrg     ;; Number <=> T = 0.
   1416      1.1  mrg     bld     R24,    0
   1417      1.1  mrg     com     R24
   1418      1.1  mrg     andi    R24,    1
   1419      1.1  mrg     clr     R25
   1420      1.1  mrg     ret
   1421      1.1  mrg _ENDF __isfinite
   1422      1.1  mrg #endif /* F7MOD_D_isfinite_ */
   1423      1.1  mrg 
   1424      1.1  mrg 
   1425      1.1  mrg #ifdef F7MOD_D_class_
   1426      1.1  mrg ;; The encoded exponent has 11 Bits.
   1427      1.1  mrg #define MAX_BIASED_EXPO 0b0111111111110000
   1428      1.1  mrg 
   1429      1.1  mrg ;; Classify a double in R18[]
   1430      1.1  mrg ;; Number: T-Flag = 0.
   1431      1.1  mrg ;; +-Inf : T-Flag = 1, Z-Flag = 1.
   1432      1.1  mrg ;; NaN   : T-Flag = 1, Z-Flag = 0.
   1433      1.1  mrg DEFUN class_D
   1434      1.1  mrg     wmov    R26,    R24
   1435      1.1  mrg     andi    R26,    lo8 (MAX_BIASED_EXPO)
   1436      1.1  mrg     andi    R27,    hi8 (MAX_BIASED_EXPO)
   1437      1.1  mrg     subi    R26,    lo8 (MAX_BIASED_EXPO)
   1438      1.1  mrg     sbci    R27,    hi8 (MAX_BIASED_EXPO)
   1439      1.1  mrg     clt
   1440      1.1  mrg     brne .L.number
   1441      1.1  mrg     set
   1442      1.1  mrg     ;; Set sign and expo to 0.
   1443      1.1  mrg     clr     R25
   1444      1.1  mrg     andi    R24,    lo8 (~MAX_BIASED_EXPO)
   1445      1.1  mrg     ;; What remains is the mantissa.
   1446      1.1  mrg     ;; Mantissa == 0  =>  +/-Inf.
   1447      1.1  mrg     ;; Mantissa != 0  =>  NaN.
   1448      1.1  mrg     ;; Compare R18[] against sign_extend(R26) with R26 = 0.
   1449      1.1  mrg     .global __cmpdi2_s8
   1450      1.1  mrg     XJMP    __cmpdi2_s8
   1451      1.1  mrg .L.number:
   1452      1.1  mrg     ret
   1453      1.1  mrg 
   1454      1.1  mrg ENDF class_D
   1455      1.1  mrg #endif /* F7MOD_D_class_ */
   1456      1.1  mrg 
   1457      1.1  mrg 
   1458      1.1  mrg #ifdef F7MOD_call_dd_
   1459      1.1  mrg 
   1460      1.1  mrg ;; Provide double wrappers for functions that operate on f7_t and get f7_t*.
   1461      1.1  mrg ;;
   1462      1.1  mrg ;; We set up a frame of sizeof(f7_t), convert the input double in R18[] to
   1463      1.1  mrg ;; f7_t in that frame location, then call *Z and finally convert the result f7_t
   1464      1.1  mrg ;; to double R18[] if that's requested.
   1465      1.1  mrg ;;
   1466      1.1  mrg ;; call_dd:     double func (double A)
   1467      1.1  mrg ;;              void (*Z) (f7_t *aa, const f7_t *aa)
   1468      1.1  mrg ;;
   1469      1.1  mrg ;; call_dx:     double func (type_t A)  , sizeof(type_t) <= 4
   1470      1.1  mrg ;;              void (*Z) (f7_t *aa, type_t)
   1471      1.1  mrg ;;
   1472      1.1  mrg ;; call_xd:     type_t func (double A)
   1473      1.1  mrg ;;              type_t (*Z) (const f7_t *aa)
   1474      1.1  mrg ;;
   1475      1.1  mrg ;; call_ddx:    double func (double A, word_t)  , sizeof (word_t) <= 2
   1476      1.1  mrg ;;              void (*Z) (f7_t *aa, const f7_t *aa, word_t)
   1477      1.1  mrg 
   1478      1.1  mrg #define WHAT    R13
   1479      1.1  mrg 
   1480      1.1  mrg DEFUN call_dd   ; WHAT = R13 = 3
   1481      1.1  mrg     inc     ZERO
   1482      1.1  mrg LABEL call_xd   ; WHAT = R13 = 2
   1483      1.1  mrg     inc     ZERO
   1484      1.1  mrg LABEL call_ddx  ; WHAT = R13 = 1
   1485      1.1  mrg     inc     ZERO
   1486      1.1  mrg LABEL call_dx   ; WHAT = R13 = 0
   1487      1.1  mrg     push    WHAT
   1488      1.1  mrg     mov     WHAT,   ZERO
   1489      1.1  mrg     clr     ZERO
   1490      1.1  mrg     ;; R14/R15 hold Z, the address of the f7_worker function, until we need it.
   1491      1.1  mrg     push    r14
   1492      1.1  mrg     push    r15
   1493      1.1  mrg     wmov    r14,     Z
   1494      1.1  mrg 
   1495      1.1  mrg #define n_pushed    4
   1496      1.1  mrg #define n_frame     10
   1497      1.1  mrg 
   1498      1.1  mrg     do_prologue_saves n_pushed, n_frame
   1499      1.1  mrg     ;; Y = FramePointer + 1
   1500      1.1  mrg     adiw    Y,      1
   1501      1.1  mrg     dec     WHAT
   1502      1.1  mrg     brmi .Ldx                   ; WHAT was initially 0.
   1503      1.1  mrg     ;; FP + 1 = (f7_t) arg1
   1504      1.1  mrg     wmov    r16,    Y
   1505      1.1  mrg     ;; The double argument is in R18[].
   1506      1.1  mrg     XCALL   F7_NAME (set_double_impl)
   1507      1.1  mrg     tst     WHAT
   1508      1.1  mrg     brne .Lno.ddx               ; WHAT was initially != 1.
   1509      1.1  mrg     ;; call_ddx: Set R20/21 to the 2-byte scalar / pointer argument.
   1510      1.1  mrg     ;; Fetch it from where prologue_saves put it.
   1511      1.1  mrg     ldd     r20,    Y + n_frame + 3     ; Saved R16
   1512      1.1  mrg     ldd     r21,    Y + n_frame + 2     ; Saved R17
   1513      1.1  mrg .Lno.ddx:
   1514      1.1  mrg     wmov    r22,    Y           ; &arg1 (input)
   1515      1.1  mrg .Ldo.dx:
   1516      1.1  mrg     wmov    r24,    Y           ; &arg1 (output)
   1517      1.1  mrg     wmov    Z,      r14
   1518      1.1  mrg     XICALL
   1519      1.1  mrg     dec     WHAT
   1520      1.1  mrg     breq .Lepilogue             ; WHAT was initially 2: Return non-double.
   1521      1.1  mrg     wmov    r24,    Y           ; &arg1
   1522      1.1  mrg     XCALL   F7_NAME (get_double)
   1523      1.1  mrg .Lepilogue:
   1524      1.1  mrg     ;; + 3 to account for R13...R15 pushed prior to do_prologue_saves.
   1525      1.1  mrg     do_epilogue_restores n_pushed + 3, n_frame
   1526      1.1  mrg 
   1527      1.1  mrg .Ldx:
   1528      1.1  mrg     ;; call_dx: Copy the 4-byte input scalar from R22[4] to R20[4].
   1529      1.1  mrg     wmov    r20,    r22
   1530      1.1  mrg     wmov    r22,    r24
   1531      1.1  mrg     rjmp .Ldo.dx
   1532      1.1  mrg 
   1533      1.1  mrg ENDF call_dd
   1534      1.1  mrg #endif /* F7MOD_call_dd_ */
   1535      1.1  mrg 
   1536      1.1  mrg 
   1537      1.1  mrg #ifdef F7MOD_call_ddd_
   1538      1.1  mrg 
   1539      1.1  mrg ;; Provide double wrappers for functions that operate on f7_t and get f7_t*.
   1540      1.1  mrg ;;
   1541      1.1  mrg ;; We set up a frame of 2 * sizeof(f7_t), convert the input doubles in R18[]
   1542      1.1  mrg ;; and R10[] to f7_t in these frame locations, then call *Z and finally
   1543      1.1  mrg ;; convert the result f7_t to double R18[] if that's requested.
   1544      1.1  mrg ;;
   1545      1.1  mrg ;; call_ddd:    double func (double A, double B)
   1546      1.1  mrg ;;              void (*Z) (f7_t *aa, const f7_t *aa, const f7_t *bb)
   1547      1.1  mrg ;;
   1548      1.1  mrg ;; call_xdd:    type_t func (double A, double B)
   1549      1.1  mrg ;;              type_t (*Z) (const f7_t *aa, const f7_t *bb)
   1550      1.1  mrg 
   1551      1.1  mrg DEFUN call_ddd
   1552      1.1  mrg     inc     ZERO
   1553      1.1  mrg LABEL call_xdd
   1554      1.1  mrg     ;; R8/R9 hold Z, the address of the f7_worker function, until we need it.
   1555      1.1  mrg     push    r9
   1556      1.1  mrg     push    r8
   1557      1.1  mrg     wmov    r8,     Z
   1558      1.1  mrg     ;; This is an argument to call.2 and will be accessed by the arg pointer.
   1559      1.1  mrg     push    ZERO
   1560      1.1  mrg     clr     ZERO
   1561      1.1  mrg     rcall   call.2
   1562      1.1  mrg     pop     TMP
   1563      1.1  mrg     pop     r8
   1564      1.1  mrg     pop     r9
   1565      1.1  mrg     ret
   1566      1.1  mrg 
   1567      1.1  mrg #define n_pushed    4
   1568      1.1  mrg #define n_frame     20
   1569      1.1  mrg 
   1570      1.1  mrg call.2:
   1571      1.1  mrg     do_prologue_saves n_pushed, n_frame
   1572      1.1  mrg     ;; Y = FramePointer + 1
   1573      1.1  mrg     adiw    Y,      1
   1574      1.1  mrg     ;; FP + 1 = (f7_t) arg1
   1575      1.1  mrg     wmov    r16,    Y
   1576      1.1  mrg     ;; First double argument is already in R18[].
   1577      1.1  mrg     XCALL   F7_NAME (set_double_impl)
   1578      1.1  mrg     ;; FP + 11 = (f7_t) arg2
   1579      1.1  mrg     wmov    r16,    Y
   1580      1.1  mrg     subi    r16,    lo8 (-10)
   1581      1.1  mrg     sbci    r17,    hi8 (-10)
   1582      1.1  mrg     ;; Move second double argument to R18[].
   1583      1.1  mrg     wmov    r18,    r10
   1584      1.1  mrg     wmov    r20,    r12
   1585      1.1  mrg     wmov    r22,    r14
   1586      1.1  mrg     ;; Get high word of arg2 from where prologue_saves put it.
   1587      1.1  mrg     ldd     r24,    Y + n_frame + 3     ; Saved R16
   1588      1.1  mrg     ldd     r25,    Y + n_frame + 2     ; Saved R17
   1589      1.1  mrg     XCALL   F7_NAME (set_double_impl)
   1590      1.1  mrg     ;; Z (f7_t *arg1, const f7_t *arg1, const f7_t *arg2)
   1591      1.1  mrg     wmov    Z,      r8
   1592      1.1  mrg     wmov    r24,    Y                   ; &arg1
   1593      1.1  mrg     ;; WHAT == 0  =>  call_xdd
   1594      1.1  mrg     ;; WHAT != 0  =>  call_ddd
   1595      1.1  mrg     ldd     TMP,    Y + n_frame + n_pushed + PC_SIZE
   1596      1.1  mrg     tst     TMP
   1597      1.1  mrg     breq .Lxdd
   1598      1.1  mrg     wmov    r22,    Y                   ; &arg1
   1599      1.1  mrg     wmov    r20,    r16                 ; &arg2
   1600      1.1  mrg     XICALL
   1601      1.1  mrg     wmov    r24,    Y                   ; &arg1
   1602      1.1  mrg     XCALL   F7_NAME (get_double)
   1603      1.1  mrg .Lepilogue:
   1604      1.1  mrg     do_epilogue_restores n_pushed, n_frame
   1605      1.1  mrg .Lxdd:
   1606      1.1  mrg     wmov    r22,    r16                 ; &arg2
   1607      1.1  mrg     XICALL
   1608      1.1  mrg     rjmp .Lepilogue
   1609      1.1  mrg ENDF call_ddd
   1610      1.1  mrg #endif /* F7MOD_call_ddd_ */
   1611      1.1  mrg 
   1612      1.1  mrg #include "f7-wraps.h"
   1613      1.1  mrg 
   1614      1.1  mrg #endif /* !AVR_TINY */
   1615