Home | History | Annotate | Line # | Download | only in fpe
      1 /*	$NetBSD: fpu_cordic.c,v 1.4 2016/12/06 05:58:19 isaki Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2013 Tetsuya Isaki. All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     20  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     22  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     23  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     25  * SUCH DAMAGE.
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __KERNEL_RCSID(0, "$NetBSD: fpu_cordic.c,v 1.4 2016/12/06 05:58:19 isaki Exp $");
     30 
     31 #include <machine/ieee.h>
     32 
     33 #include "fpu_emulate.h"
     34 
     35 /*
     36  * sfpn = shoftened fp number; the idea is from fpu_log.c but not the same.
     37  * The most significant byte of sp_m0 is EXP (signed byte) and the rest
     38  * of sp_m0 is fp_mant[0].
     39  */
     40 struct sfpn {
     41 	uint32_t sp_m0;
     42 	uint32_t sp_m1;
     43 	uint32_t sp_m2;
     44 };
     45 
     46 #if defined(CORDIC_BOOTSTRAP)
     47 /*
     48  * This is a bootstrap code to generate a pre-calculated tables such as
     49  * atan_table[].  However, it's just for reference.
     50  * If you want to run the bootstrap, you will define CORDIC_BOOTSTRAP
     51  * and modify these files as a userland application.
     52  */
     53 
     54 #include <stdio.h>
     55 #include <stdlib.h>
     56 #include <string.h>
     57 #include <float.h>
     58 
     59 static void prepare_cordic_const(struct fpemu *);
     60 static struct fpn *fpu_gain1_cordic(struct fpemu *);
     61 static struct fpn *fpu_atan_taylor(struct fpemu *);
     62 static void printf_fpn(const struct fpn *);
     63 static void printf_sfpn(const struct sfpn *);
     64 static void fpn_to_sfpn(struct sfpn *, const struct fpn *);
     65 
     66 static struct sfpn atan_table[EXT_FRACBITS];
     67 static struct fpn inv_gain1;
     68 
     69 int
     70 main(int argc, char *argv[])
     71 {
     72 	struct fpemu dummyfe;
     73 	int i;
     74 	struct fpn fp;
     75 
     76 	memset(&dummyfe, 0, sizeof(dummyfe));
     77 	prepare_cordic_const(&dummyfe);
     78 
     79 	/* output as source code */
     80 	printf("static const struct sfpn atan_table[] = {\n");
     81 	for (i = 0; i < EXT_FRACBITS; i++) {
     82 		printf("\t");
     83 		printf_sfpn(&atan_table[i]);
     84 		printf(",\n");
     85 	}
     86 	printf("};\n\n");
     87 
     88 	printf("const struct fpn fpu_cordic_inv_gain1 =\n\t");
     89 	printf_fpn(&inv_gain1);
     90 	printf(";\n\n");
     91 }
     92 
     93 /*
     94  * This routine uses fpu_const(), fpu_add(), fpu_div(), fpu_logn()
     95  * and fpu_atan_taylor() as bootstrap.
     96  */
     97 static void
     98 prepare_cordic_const(struct fpemu *fe)
     99 {
    100 	struct fpn t;
    101 	struct fpn x;
    102 	struct fpn *r;
    103 	int i;
    104 
    105 	/* atan_table */
    106 	fpu_const(&t, FPU_CONST_1);
    107 	for (i = 0; i < EXT_FRACBITS; i++) {
    108 		/* atan(t) */
    109 		CPYFPN(&fe->fe_f2, &t);
    110 		r = fpu_atan_taylor(fe);
    111 		fpn_to_sfpn(&atan_table[i], r);
    112 
    113 		/* t /= 2 */
    114 		t.fp_exp--;
    115 	}
    116 
    117 	/* inv_gain1 = 1 / gain1cordic() */
    118 	r = fpu_gain1_cordic(fe);
    119 	CPYFPN(&fe->fe_f2, r);
    120 	fpu_const(&fe->fe_f1, FPU_CONST_1);
    121 	r = fpu_div(fe);
    122 	CPYFPN(&inv_gain1, r);
    123 }
    124 
    125 static struct fpn *
    126 fpu_gain1_cordic(struct fpemu *fe)
    127 {
    128 	struct fpn x;
    129 	struct fpn y;
    130 	struct fpn z;
    131 	struct fpn v;
    132 
    133 	fpu_const(&x, FPU_CONST_1);
    134 	fpu_const(&y, FPU_CONST_0);
    135 	fpu_const(&z, FPU_CONST_0);
    136 	CPYFPN(&v, &x);
    137 	v.fp_sign = !v.fp_sign;
    138 
    139 	fpu_cordit1(fe, &x, &y, &z, &v);
    140 	CPYFPN(&fe->fe_f2, &x);
    141 	return &fe->fe_f2;
    142 }
    143 
    144 /*
    145  * arctan(x) = pi/4 (for |x| = 1)
    146  *
    147  *                 x^3   x^5   x^7
    148  * arctan(x) = x - --- + --- - --- + ...   (for |x| < 1)
    149  *                  3     5     7
    150  */
    151 static struct fpn *
    152 fpu_atan_taylor(struct fpemu *fe)
    153 {
    154 	struct fpn res;
    155 	struct fpn x2;
    156 	struct fpn s0;
    157 	struct fpn *s1;
    158 	struct fpn *r;
    159 	uint32_t k;
    160 
    161 	/* arctan(1) is pi/4 */
    162 	if (fe->fe_f2.fp_exp == 0) {
    163 		fpu_const(&fe->fe_f2, FPU_CONST_PI);
    164 		fe->fe_f2.fp_exp -= 2;
    165 		return &fe->fe_f2;
    166 	}
    167 
    168 	/* s0 := x */
    169 	CPYFPN(&s0, &fe->fe_f2);
    170 
    171 	/* res := x */
    172 	CPYFPN(&res, &fe->fe_f2);
    173 
    174 	/* x2 := x * x */
    175 	CPYFPN(&fe->fe_f1, &fe->fe_f2);
    176 	r = fpu_mul(fe);
    177 	CPYFPN(&x2, r);
    178 
    179 	k = 3;
    180 	for (;;) {
    181 		/* s1 := -s0 * x2 */
    182 		CPYFPN(&fe->fe_f1, &s0);
    183 		CPYFPN(&fe->fe_f2, &x2);
    184 		s1 = fpu_mul(fe);
    185 		s1->fp_sign ^= 1;
    186 		CPYFPN(&fe->fe_f1, s1);
    187 
    188 		/* s0 := s1 for next loop */
    189 		CPYFPN(&s0, s1);
    190 
    191 		/* s1 := s1 / k */
    192 		fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
    193 		s1 = fpu_div(fe);
    194 
    195 		/* break if s1 is enough small */
    196 		if (ISZERO(s1))
    197 			break;
    198 		if (res.fp_exp - s1->fp_exp >= FP_NMANT)
    199 			break;
    200 
    201 		/* res += s1 */
    202 		CPYFPN(&fe->fe_f2, s1);
    203 		CPYFPN(&fe->fe_f1, &res);
    204 		r = fpu_add(fe);
    205 		CPYFPN(&res, r);
    206 
    207 		k += 2;
    208 	}
    209 
    210 	CPYFPN(&fe->fe_f2, &res);
    211 	return &fe->fe_f2;
    212 }
    213 
    214 static void
    215 printf_fpn(const struct fpn *fp)
    216 {
    217 	printf("{ %d, %d, %3d, %d, { 0x%08x, 0x%08x, 0x%08x, }, }",
    218 		fp->fp_class, fp->fp_sign, fp->fp_exp, fp->fp_sticky ? 1 : 0,
    219 		fp->fp_mant[0], fp->fp_mant[1], fp->fp_mant[2]);
    220 }
    221 
    222 static void
    223 printf_sfpn(const struct sfpn *sp)
    224 {
    225 	printf("{ 0x%08x, 0x%08x, 0x%08x, }",
    226 		sp->sp_m0, sp->sp_m1, sp->sp_m2);
    227 }
    228 
    229 static void
    230 fpn_to_sfpn(struct sfpn *sp, const struct fpn *fp)
    231 {
    232 	sp->sp_m0 = (fp->fp_exp << 24) | fp->fp_mant[0];
    233 	sp->sp_m1 = fp->fp_mant[1];
    234 	sp->sp_m2 = fp->fp_mant[2];
    235 }
    236 
    237 #else /* CORDIC_BOOTSTRAP */
    238 
    239 static const struct sfpn atan_table[] = {
    240 	{ 0xff06487e, 0xd5110b46, 0x11a80000, },
    241 	{ 0xfe076b19, 0xc1586ed3, 0xda2b7f0d, },
    242 	{ 0xfd07d6dd, 0x7e4b2037, 0x58ab6e33, },
    243 	{ 0xfc07f56e, 0xa6ab0bdb, 0x719644b5, },
    244 	{ 0xfb07fd56, 0xedcb3f7a, 0x71b65937, },
    245 	{ 0xfa07ff55, 0x6eea5d89, 0x2a13bce7, },
    246 	{ 0xf907ffd5, 0x56eedca6, 0xaddf3c5f, },
    247 	{ 0xf807fff5, 0x556eeea5, 0xcb403117, },
    248 	{ 0xf707fffd, 0x5556eeed, 0xca5d8956, },
    249 	{ 0xf607ffff, 0x55556eee, 0xea5ca6ab, },
    250 	{ 0xf507ffff, 0xd55556ee, 0xeedca5c8, },
    251 	{ 0xf407ffff, 0xf555556e, 0xeeeea5c8, },
    252 	{ 0xf307ffff, 0xfd555556, 0xeeeeedc8, },
    253 	{ 0xf207ffff, 0xff555555, 0x6eeeeee8, },
    254 	{ 0xf107ffff, 0xffd55555, 0x56eeeeed, },
    255 	{ 0xf007ffff, 0xfff55555, 0x556eeeed, },
    256 	{ 0xef07ffff, 0xfffd5555, 0x5556eeed, },
    257 	{ 0xee07ffff, 0xffff5555, 0x55556eed, },
    258 	{ 0xed07ffff, 0xffffd555, 0x555556ed, },
    259 	{ 0xec07ffff, 0xfffff555, 0x5555556d, },
    260 	{ 0xeb07ffff, 0xfffffd55, 0x55555555, },
    261 	{ 0xea07ffff, 0xffffff55, 0x55555554, },
    262 	{ 0xe907ffff, 0xffffffd5, 0x55555554, },
    263 	{ 0xe807ffff, 0xfffffff5, 0x55555554, },
    264 	{ 0xe707ffff, 0xfffffffd, 0x55555554, },
    265 	{ 0xe607ffff, 0xffffffff, 0x55555554, },
    266 	{ 0xe507ffff, 0xffffffff, 0xd5555554, },
    267 	{ 0xe407ffff, 0xffffffff, 0xf5555554, },
    268 	{ 0xe307ffff, 0xffffffff, 0xfd555554, },
    269 	{ 0xe207ffff, 0xffffffff, 0xff555554, },
    270 	{ 0xe107ffff, 0xffffffff, 0xffd55554, },
    271 	{ 0xe007ffff, 0xffffffff, 0xfff55554, },
    272 	{ 0xdf07ffff, 0xffffffff, 0xfffd5554, },
    273 	{ 0xde07ffff, 0xffffffff, 0xffff5554, },
    274 	{ 0xdd07ffff, 0xffffffff, 0xffffd554, },
    275 	{ 0xdc07ffff, 0xffffffff, 0xfffff554, },
    276 	{ 0xdb07ffff, 0xffffffff, 0xfffffd54, },
    277 	{ 0xda07ffff, 0xffffffff, 0xffffff54, },
    278 	{ 0xd907ffff, 0xffffffff, 0xffffffd4, },
    279 	{ 0xd807ffff, 0xffffffff, 0xfffffff4, },
    280 	{ 0xd707ffff, 0xffffffff, 0xfffffffc, },
    281 	{ 0xd7040000, 0x00000000, 0x00000000, },
    282 	{ 0xd6040000, 0x00000000, 0x00000000, },
    283 	{ 0xd5040000, 0x00000000, 0x00000000, },
    284 	{ 0xd4040000, 0x00000000, 0x00000000, },
    285 	{ 0xd3040000, 0x00000000, 0x00000000, },
    286 	{ 0xd2040000, 0x00000000, 0x00000000, },
    287 	{ 0xd1040000, 0x00000000, 0x00000000, },
    288 	{ 0xd0040000, 0x00000000, 0x00000000, },
    289 	{ 0xcf040000, 0x00000000, 0x00000000, },
    290 	{ 0xce040000, 0x00000000, 0x00000000, },
    291 	{ 0xcd040000, 0x00000000, 0x00000000, },
    292 	{ 0xcc040000, 0x00000000, 0x00000000, },
    293 	{ 0xcb040000, 0x00000000, 0x00000000, },
    294 	{ 0xca040000, 0x00000000, 0x00000000, },
    295 	{ 0xc9040000, 0x00000000, 0x00000000, },
    296 	{ 0xc8040000, 0x00000000, 0x00000000, },
    297 	{ 0xc7040000, 0x00000000, 0x00000000, },
    298 	{ 0xc6040000, 0x00000000, 0x00000000, },
    299 	{ 0xc5040000, 0x00000000, 0x00000000, },
    300 	{ 0xc4040000, 0x00000000, 0x00000000, },
    301 	{ 0xc3040000, 0x00000000, 0x00000000, },
    302 	{ 0xc2040000, 0x00000000, 0x00000000, },
    303 	{ 0xc1040000, 0x00000000, 0x00000000, },
    304 };
    305 
    306 const struct fpn fpu_cordic_inv_gain1 =
    307 	{ 1, 0,  -1, 1, { 0x0004dba7, 0x6d421af2, 0xd33fafd1, }, };
    308 
    309 #endif /* CORDIC_BOOTSTRAP */
    310 
    311 static inline void
    312 sfpn_to_fpn(struct fpn *fp, const struct sfpn *s)
    313 {
    314 	fp->fp_class = FPC_NUM;
    315 	fp->fp_sign = 0;
    316 	fp->fp_sticky = 0;
    317 	fp->fp_exp = s->sp_m0 >> 24;
    318 	if (fp->fp_exp & 0x80) {
    319 		fp->fp_exp |= 0xffffff00;
    320 	}
    321 	fp->fp_mant[0] = s->sp_m0 & 0x000fffff;
    322 	fp->fp_mant[1] = s->sp_m1;
    323 	fp->fp_mant[2] = s->sp_m2;
    324 }
    325 
    326 void
    327 fpu_cordit1(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0,
    328 	const struct fpn *vecmode)
    329 {
    330 	struct fpn t;
    331 	struct fpn x;
    332 	struct fpn y;
    333 	struct fpn z;
    334 	struct fpn *r;
    335 	int i;
    336 	int sign;
    337 
    338 	fpu_const(&t, FPU_CONST_1);
    339 	CPYFPN(&x, x0);
    340 	CPYFPN(&y, y0);
    341 	CPYFPN(&z, z0);
    342 
    343 	for (i = 0; i < EXT_FRACBITS; i++) {
    344 		struct fpn x1;
    345 
    346 		/* y < vecmode */
    347 		CPYFPN(&fe->fe_f1, &y);
    348 		CPYFPN(&fe->fe_f2, vecmode);
    349 		fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
    350 		r = fpu_add(fe);
    351 
    352 		if ((vecmode->fp_sign == 0 && r->fp_sign) ||
    353 		    (vecmode->fp_sign && z.fp_sign == 0)) {
    354 			sign = 1;
    355 		} else {
    356 			sign = 0;
    357 		}
    358 
    359 		/* y * t */
    360 		CPYFPN(&fe->fe_f1, &y);
    361 		CPYFPN(&fe->fe_f2, &t);
    362 		r = fpu_mul(fe);
    363 
    364 		/*
    365 		 * x1 = x - y*t (if sign)
    366 		 * x1 = x + y*t
    367 		 */
    368 		CPYFPN(&fe->fe_f2, r);
    369 		if (sign)
    370 			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
    371 		CPYFPN(&fe->fe_f1, &x);
    372 		r = fpu_add(fe);
    373 		CPYFPN(&x1, r);
    374 
    375 		/* x * t */
    376 		CPYFPN(&fe->fe_f1, &x);
    377 		CPYFPN(&fe->fe_f2, &t);
    378 		r = fpu_mul(fe);
    379 
    380 		/*
    381 		 * y = y + x*t (if sign)
    382 		 * y = y - x*t
    383 		 */
    384 		CPYFPN(&fe->fe_f2, r);
    385 		if (!sign)
    386 			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
    387 		CPYFPN(&fe->fe_f1, &y);
    388 		r = fpu_add(fe);
    389 		CPYFPN(&y, r);
    390 
    391 		/*
    392 		 * z = z - atan_table[i] (if sign)
    393 		 * z = z + atan_table[i]
    394 		 */
    395 		CPYFPN(&fe->fe_f1, &z);
    396 		sfpn_to_fpn(&fe->fe_f2, &atan_table[i]);
    397 		if (sign)
    398 			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
    399 		r = fpu_add(fe);
    400 		CPYFPN(&z, r);
    401 
    402 		/* x = x1 */
    403 		CPYFPN(&x, &x1);
    404 
    405 		/* t /= 2 */
    406 		t.fp_exp--;
    407 	}
    408 
    409 	CPYFPN(x0, &x);
    410 	CPYFPN(y0, &y);
    411 	CPYFPN(z0, &z);
    412 }
    413