Home | History | Annotate | Line # | Download | only in epiphany
      1 /* Unsigned 32 bit division optimized for Epiphany.
      2    Copyright (C) 2009-2022 Free Software Foundation, Inc.
      3    Contributed by Embecosm on behalf of Adapteva, Inc.
      4 
      5 This file is part of GCC.
      6 
      7 This file is free software; you can redistribute it and/or modify it
      8 under the terms of the GNU General Public License as published by the
      9 Free Software Foundation; either version 3, or (at your option) any
     10 later version.
     11 
     12 This file is distributed in the hope that it will be useful, but
     13 WITHOUT ANY WARRANTY; without even the implied warranty of
     14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15 General Public License for more details.
     16 
     17 Under Section 7 of GPL version 3, you are granted additional
     18 permissions described in the GCC Runtime Library Exception, version
     19 3.1, as published by the Free Software Foundation.
     20 
     21 You should have received a copy of the GNU General Public License and
     22 a copy of the GCC Runtime Library Exception along with this program;
     23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24 <http://www.gnu.org/licenses/>.  */
     25 
     26 #include "epiphany-asm.h"
     27 
     28 	FSTAB (__udivsi3,T_UINT)
     29 	.global SYM(__udivsi3)
     30 	.balign 4
     31 	HIDDEN_FUNC(__udivsi3)
     32 SYM(__udivsi3):
     33 	sub TMP0,r0,r1
     34 	bltu .Lret0
     35 	float TMP2,r0
     36 	  mov TMP1,%low(0xb0800000) ; ??? this would be faster with small data
     37 	float TMP3,r1
     38 	  movt TMP1,%high(0xb0800000)
     39 	asr TMP0,r0,8
     40 	sub TMP0,TMP0,TMP1
     41 	movt TMP1,%high(0x00810000)
     42 	movgteu TMP2,TMP0
     43 	bblt .Lret1
     44 	sub TMP2,TMP2,TMP1
     45 	sub TMP2,TMP2,TMP3
     46 	mov TMP3,0
     47 	movltu TMP2,TMP3
     48 	lsr TMP2,TMP2,23
     49 	lsl r1,r1,TMP2
     50 	mov TMP0,1
     51 	lsl TMP0,TMP0,TMP2
     52 	sub r0,r0,r1
     53 	bltu .Ladd_back
     54 	add TMP3,TMP3,TMP0
     55 	sub r0,r0,r1
     56 	bltu .Ladd_back
     57 .Lsub_loop:; More than two iterations are rare, so it makes sense to leave
     58            ; this label here to reduce average branch penalties.
     59 	add TMP3,TMP3,TMP0
     60 	sub r0,r0,r1
     61 	bgteu .Lsub_loop
     62 .Ladd_back:
     63 	add r0,r0,r1
     64 	sub TMP1,r1,1
     65 	mov r1,%low(.L0step)
     66 	movt r1,%high(.L0step)
     67 	lsl TMP2,TMP2,3
     68 	sub r1,r1,TMP2
     69 	jr r1
     70 	.rep 30
     71 	lsl r0,r0,1
     72 	sub.l r1,r0,TMP1
     73 	movgteu r0,r1
     74 	.endr
     75 .L0step:sub r1,TMP0,1 ; mask result bits from steps ...
     76 	and r0,r0,r1
     77 	orr r0,r0,TMP3 ; ... and combine with first bits.
     78 	rts
     79 .Lret0:	mov r0,0
     80 	rts
     81 .Lret1:	mov r0,1
     82 	rts
     83 	ENDFUNC(__udivsi3)
     84