Home | History | Annotate | Line # | Download | only in ia64
      1 dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
      2 dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
      3 
      4 dnl  Contributed to the GNU project by Torbjorn Granlund.
      5 
      6 dnl  Copyright 2003-2005 Free Software Foundation, Inc.
      7 
      8 dnl  This file is part of the GNU MP Library.
      9 dnl
     10 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     11 dnl  it under the terms of either:
     12 dnl
     13 dnl    * the GNU Lesser General Public License as published by the Free
     14 dnl      Software Foundation; either version 3 of the License, or (at your
     15 dnl      option) any later version.
     16 dnl
     17 dnl  or
     18 dnl
     19 dnl    * the GNU General Public License as published by the Free Software
     20 dnl      Foundation; either version 2 of the License, or (at your option) any
     21 dnl      later version.
     22 dnl
     23 dnl  or both in parallel, as here.
     24 dnl
     25 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     26 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     27 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     28 dnl  for more details.
     29 dnl
     30 dnl  You should have received copies of the GNU General Public License and the
     31 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     32 dnl  see https://www.gnu.org/licenses/.
     33 
     34 include(`../config.m4')
     35 
     36 C           cycles/limb
     37 C Itanium:      2
     38 C Itanium 2:    1
     39 
     40 C TODO
     41 C  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
     42 C    wind-down code).
     43 
     44 C INPUT PARAMETERS
     45 define(`rp', `r32')
     46 define(`up', `r33')
     47 define(`vp', `r34')
     48 define(`n', `r35')
     49 
     50 ifdef(`OPERATION_and_n',
     51 `	define(`func',`mpn_and_n')
     52 	define(`logop',		`and	$1 = $2, $3')
     53 	define(`notormov',	`mov	$1 = $2')')
     54 ifdef(`OPERATION_andn_n',
     55 `	define(`func',`mpn_andn_n')
     56 	define(`logop',		`andcm	$1 = $2, $3')
     57 	define(`notormov',	`mov	$1 = $2')')
     58 ifdef(`OPERATION_nand_n',
     59 `	define(`func',`mpn_nand_n')
     60 	define(`logop',		`and	$1 = $2, $3')
     61 	define(`notormov',	`sub	$1 = -1, $2')')
     62 ifdef(`OPERATION_ior_n',
     63 `	define(`func',`mpn_ior_n')
     64 	define(`logop',		`or	$1 = $2, $3')
     65 	define(`notormov',	`mov	$1 = $2')')
     66 ifdef(`OPERATION_iorn_n',
     67 `	define(`func',`mpn_iorn_n')
     68 	define(`logop',		`andcm	$1 = $3, $2')
     69 	define(`notormov',	`sub	$1 = -1, $2')')
     70 ifdef(`OPERATION_nior_n',
     71 `	define(`func',`mpn_nior_n')
     72 	define(`logop',		`or	$1 = $2, $3')
     73 	define(`notormov',	`sub	$1 = -1, $2')')
     74 ifdef(`OPERATION_xor_n',
     75 `	define(`func',`mpn_xor_n')
     76 	define(`logop',		`xor	$1 = $2, $3')
     77 	define(`notormov',	`mov	$1 = $2')')
     78 ifdef(`OPERATION_xnor_n',
     79 `	define(`func',`mpn_xnor_n')
     80 	define(`logop',		`xor	$1 = $2, $3')
     81 	define(`notormov',	`sub	$1 = -1, $2')')
     82 
     83 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
     84 
     85 ASM_START()
     86 PROLOGUE(func)
     87 	.prologue
     88 	.save	ar.lc, r2
     89 	.body
     90 ifdef(`HAVE_ABI_32',
     91 `	addp4	rp = 0, rp			C			M I
     92 	addp4	up = 0, up			C			M I
     93 	addp4	vp = 0, vp			C			M I
     94 	nop.m		0
     95 	nop.m		0
     96 	zxt4	n = n				C			I
     97 	;;
     98 ')
     99 {.mmi
    100 	ld8		r10 = [up], 8		C			M
    101 	ld8		r11 = [vp], 8		C			M
    102 	mov.i		r2 = ar.lc		C			I0
    103 }
    104 {.mmi
    105 	and		r14 = 3, n		C			M I
    106 	cmp.lt		p15, p14 = 4, n		C			M I
    107 	shr.u		n = n, 2		C			I0
    108 	;;
    109 }
    110 {.mmi
    111 	cmp.eq		p6, p0 = 1, r14		C			M I
    112 	cmp.eq		p7, p0 = 2, r14		C			M I
    113 	cmp.eq		p8, p0 = 3, r14		C			M I
    114 }
    115 {.bbb
    116    (p6)	br.dptk		.Lb01			C			B
    117    (p7)	br.dptk		.Lb10			C			B
    118    (p8)	br.dptk		.Lb11			C			B
    119 }
    120 
    121 .Lb00:	ld8		r17 = [up], 8		C			M
    122 	ld8		r21 = [vp], 8		C			M
    123 	add		n = -2, n		C			M I
    124 	;;
    125 	ld8		r18 = [up], 8		C			M
    126 	ld8		r22 = [vp], 8		C			M
    127 	;;
    128 	ld8		r19 = [up], 8		C			M
    129 	ld8		r23 = [vp], 8		C			M
    130   (p15)	br.cond.dpnt	.grt4			C			B
    131 
    132 	logop(		r14, r10, r11)		C			M I
    133 	;;
    134 	logop(		r15, r17, r21)		C			M I
    135 	notormov(	r8, r14)		C			M I
    136 	br		.Lcj4			C			B
    137 
    138 .grt4:	logop(		r14, r10, r11)		C			M I
    139 	ld8		r16 = [up], 8		C			M
    140 	ld8		r20 = [vp], 8		C			M
    141 	;;
    142 	logop(		r15, r17, r21)		C			M I
    143 	ld8		r17 = [up], 8		C			M
    144 	mov.i		ar.lc = n		C			I0
    145 	notormov(	r8, r14)		C			M I
    146 	ld8		r21 = [vp], 8		C			M
    147 	br		.LL00			C			B
    148 
    149 .Lb01:	add		n = -1, n		C			M I
    150 	logop(		r15, r10, r11)		C			M I
    151   (p15)	br.cond.dpnt	.grt1			C			B
    152 	;;
    153 
    154 	notormov(	r9, r15)		C			M I
    155 	br		.Lcj1			C			B
    156 
    157 .grt1:	ld8		r16 = [up], 8		C			M
    158 	ld8		r20 = [vp], 8		C			M
    159 	;;
    160 	ld8		r17 = [up], 8		C			M
    161 	ld8		r21 = [vp], 8		C			M
    162 	mov.i		ar.lc = n		C			I0
    163 	;;
    164 	ld8		r18 = [up], 8		C			M
    165 	ld8		r22 = [vp], 8		C			M
    166 	;;
    167 	ld8		r19 = [up], 8		C			M
    168 	ld8		r23 = [vp], 8		C			M
    169 	br.cloop.dptk	.grt5			C			B
    170 	;;
    171 
    172 	logop(		r14, r16, r20)		C			M I
    173 	notormov(	r9, r15)		C			M I
    174 	br		.Lcj5			C			B
    175 
    176 .grt5:	logop(		r14, r16, r20)		C			M I
    177 	ld8		r16 = [up], 8		C			M
    178 	notormov(	r9, r15)		C			M I
    179 	ld8		r20 = [vp], 8		C			M
    180 	br		.LL01			C			B
    181 
    182 .Lb10:	ld8		r19 = [up], 8		C			M
    183 	ld8		r23 = [vp], 8		C			M
    184   (p15)	br.cond.dpnt	.grt2			C			B
    185 
    186 	logop(		r14, r10, r11)		C			M I
    187 	;;
    188 	logop(		r15, r19, r23)		C			M I
    189 	notormov(	r8, r14)		C			M I
    190 	br		.Lcj2			C			B
    191 
    192 .grt2:	ld8		r16 = [up], 8		C			M
    193 	ld8		r20 = [vp], 8		C			M
    194 	add		n = -1, n		C			M I
    195 	;;
    196 	ld8		r17 = [up], 8		C			M
    197 	ld8		r21 = [vp], 8		C			M
    198 	logop(		r14, r10, r11)		C			M I
    199 	;;
    200 	ld8		r18 = [up], 8		C			M
    201 	ld8		r22 = [vp], 8		C			M
    202 	mov.i		ar.lc = n		C			I0
    203 	;;
    204 	logop(		r15, r19, r23)		C			M I
    205 	ld8		r19 = [up], 8		C			M
    206 	notormov(	r8, r14)		C			M I
    207 	ld8		r23 = [vp], 8		C			M
    208 	br.cloop.dptk	.Loop			C			B
    209 	br		.Lcj6			C			B
    210 
    211 .Lb11:	ld8		r18 = [up], 8		C			M
    212 	ld8		r22 = [vp], 8		C			M
    213 	add		n = -1, n		C			M I
    214 	;;
    215 	ld8		r19 = [up], 8		C			M
    216 	ld8		r23 = [vp], 8		C			M
    217 	logop(		r15, r10, r11)		C			M I
    218   (p15)	br.cond.dpnt	.grt3			C			B
    219 	;;
    220 
    221 	logop(		r14, r18, r22)		C			M I
    222 	notormov(	r9, r15)		C			M I
    223 	br		.Lcj3			C			B
    224 
    225 .grt3:	ld8		r16 = [up], 8		C			M
    226 	ld8		r20 = [vp], 8		C			M
    227 	;;
    228 	ld8		r17 = [up], 8		C			M
    229 	ld8		r21 = [vp], 8		C			M
    230 	mov.i		ar.lc = n		C			I0
    231 	;;
    232 	logop(		r14, r18, r22)		C			M I
    233 	ld8		r18 = [up], 8		C			M
    234 	notormov(	r9, r15)		C			M I
    235 	ld8		r22 = [vp], 8		C			M
    236 	br		.LL11			C			B
    237 
    238 C *** MAIN LOOP START ***
    239 	ALIGN(32)
    240 .Loop:	st8		[rp] = r8, 8		C			M
    241 	logop(		r14, r16, r20)		C			M I
    242 	notormov(	r9, r15)		C			M I
    243 	ld8		r16 = [up], 8		C			M
    244 	ld8		r20 = [vp], 8		C			M
    245 	nop.b		0
    246 	;;
    247 .LL01:	st8		[rp] = r9, 8		C			M
    248 	logop(		r15, r17, r21)		C			M I
    249 	notormov(	r8, r14)		C			M I
    250 	ld8		r17 = [up], 8		C			M
    251 	ld8		r21 = [vp], 8		C			M
    252 	nop.b		0
    253 	;;
    254 .LL00:	st8		[rp] = r8, 8		C			M
    255 	logop(		r14, r18, r22)		C			M I
    256 	notormov(	r9, r15)		C			M I
    257 	ld8		r18 = [up], 8		C			M
    258 	ld8		r22 = [vp], 8		C			M
    259 	nop.b		0
    260 	;;
    261 .LL11:	st8		[rp] = r9, 8		C			M
    262 	logop(		r15, r19, r23)		C			M I
    263 	notormov(	r8, r14)		C			M I
    264 	ld8		r19 = [up], 8		C			M
    265 	ld8		r23 = [vp], 8		C			M
    266 	br.cloop.dptk	.Loop	;;		C			B
    267 C *** MAIN LOOP END ***
    268 
    269 .Lcj6:	st8		[rp] = r8, 8		C			M
    270 	logop(		r14, r16, r20)		C			M I
    271 	notormov(	r9, r15)		C			M I
    272 	;;
    273 .Lcj5:	st8		[rp] = r9, 8		C			M
    274 	logop(		r15, r17, r21)		C			M I
    275 	notormov(	r8, r14)		C			M I
    276 	;;
    277 .Lcj4:	st8		[rp] = r8, 8		C			M
    278 	logop(		r14, r18, r22)		C			M I
    279 	notormov(	r9, r15)		C			M I
    280 	;;
    281 .Lcj3:	st8		[rp] = r9, 8		C			M
    282 	logop(		r15, r19, r23)		C			M I
    283 	notormov(	r8, r14)		C			M I
    284 	;;
    285 .Lcj2:	st8		[rp] = r8, 8		C			M
    286 	notormov(	r9, r15)		C			M I
    287 	;;
    288 .Lcj1:	st8		[rp] = r9, 8		C			M
    289 	mov.i		ar.lc = r2		C			I0
    290 	br.ret.sptk.many b0			C			B
    291 EPILOGUE()
    292 ASM_END()
    293