Home | History | Annotate | Line # | Download | only in fuc
      1 /*
      2  * Copyright 2014 Martin Peres <martin.peres (at) free.fr>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the folloing conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  *
     22  * Authors: Martin Peres
     23  */
     24 
     25 /******************************************************************************
     26  * arith data segment
     27  *****************************************************************************/
     28 #ifdef INCLUDE_PROC
     29 #endif
     30 
     31 #ifdef INCLUDE_DATA
     32 #endif
     33 
     34 /******************************************************************************
     35  * arith code segment
     36  *****************************************************************************/
     37 #ifdef INCLUDE_CODE
     38 
     39 // does a 32x32 -> 64 multiplication
     40 //
     41 // A * B = A_lo * B_lo
     42 //        + ( A_hi * B_lo ) << 16
     43 //        + ( A_lo * B_hi ) << 16
     44 //        + ( A_hi * B_hi ) << 32
     45 //
     46 // $r15 - current
     47 // $r14 - A
     48 // $r13 - B
     49 // $r12 - mul_lo (return)
     50 // $r11 - mul_hi (return)
     51 // $r0  - zero
     52 mulu32_32_64:
     53 	push $r1 // A_hi
     54 	push $r2 // B_hi
     55 	push $r3 // tmp0
     56 	push $r4 // tmp1
     57 
     58 	shr b32 $r1 $r14 16
     59 	shr b32 $r2 $r13 16
     60 
     61 	clear b32 $r12
     62 	clear b32 $r11
     63 
     64 	// A_lo * B_lo
     65 	mulu $r12 $r14 $r13
     66 
     67 	// ( A_hi * B_lo ) << 16
     68 	mulu $r3 $r1 $r13 // tmp0 = A_hi * B_lo
     69 	mov b32 $r4 $r3
     70 	and $r3 0xffff // tmp0 = tmp0_lo
     71 	shl b32 $r3 16
     72 	shr b32 $r4 16 // tmp1 = tmp0_hi
     73 	add b32 $r12 $r3
     74 	adc b32 $r11 $r4
     75 
     76 	// ( A_lo * B_hi ) << 16
     77 	mulu $r3 $r14 $r2 // tmp0 = A_lo * B_hi
     78 	mov b32 $r4 $r3
     79 	and $r3 0xffff // tmp0 = tmp0_lo
     80 	shl b32 $r3 16
     81 	shr b32 $r4 16 // tmp1 = tmp0_hi
     82 	add b32 $r12 $r3
     83 	adc b32 $r11 $r4
     84 
     85 	// ( A_hi * B_hi ) << 32
     86 	mulu $r3 $r1 $r2 // tmp0 = A_hi * B_hi
     87 	add b32 $r11 $r3
     88 
     89 	pop $r4
     90 	pop $r3
     91 	pop $r2
     92 	pop $r1
     93 	ret
     94 #endif
     95