1 /* Unsigned 32 bit division optimized for Epiphany. 2 Copyright (C) 2009-2024 Free Software Foundation, Inc. 3 Contributed by Embecosm on behalf of Adapteva, Inc. 4 5 This file is part of GCC. 6 7 This file is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by the 9 Free Software Foundation; either version 3, or (at your option) any 10 later version. 11 12 This file is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 General Public License for more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 #include "epiphany-asm.h" 27 28 FSTAB (__udivsi3,T_UINT) 29 .global SYM(__udivsi3) 30 .balign 4 31 HIDDEN_FUNC(__udivsi3) 32 SYM(__udivsi3): 33 sub TMP0,r0,r1 34 bltu .Lret0 35 float TMP2,r0 36 mov TMP1,%low(0xb0800000) ; ??? this would be faster with small data 37 float TMP3,r1 38 movt TMP1,%high(0xb0800000) 39 asr TMP0,r0,8 40 sub TMP0,TMP0,TMP1 41 movt TMP1,%high(0x00810000) 42 movgteu TMP2,TMP0 43 bblt .Lret1 44 sub TMP2,TMP2,TMP1 45 sub TMP2,TMP2,TMP3 46 mov TMP3,0 47 movltu TMP2,TMP3 48 lsr TMP2,TMP2,23 49 lsl r1,r1,TMP2 50 mov TMP0,1 51 lsl TMP0,TMP0,TMP2 52 sub r0,r0,r1 53 bltu .Ladd_back 54 add TMP3,TMP3,TMP0 55 sub r0,r0,r1 56 bltu .Ladd_back 57 .Lsub_loop:; More than two iterations are rare, so it makes sense to leave 58 ; this label here to reduce average branch penalties. 59 add TMP3,TMP3,TMP0 60 sub r0,r0,r1 61 bgteu .Lsub_loop 62 .Ladd_back: 63 add r0,r0,r1 64 sub TMP1,r1,1 65 mov r1,%low(.L0step) 66 movt r1,%high(.L0step) 67 lsl TMP2,TMP2,3 68 sub r1,r1,TMP2 69 jr r1 70 .rep 30 71 lsl r0,r0,1 72 sub.l r1,r0,TMP1 73 movgteu r0,r1 74 .endr 75 .L0step:sub r1,TMP0,1 ; mask result bits from steps ... 76 and r0,r0,r1 77 orr r0,r0,TMP3 ; ... and combine with first bits. 78 rts 79 .Lret0: mov r0,0 80 rts 81 .Lret1: mov r0,1 82 rts 83 ENDFUNC(__udivsi3) 84