Home | History | Annotate | Line # | Download | only in libfile
      1 /* @(#)apprentice.c	1.13 09/07/11 joerg */
      2 #ifndef lint
      3 static	const char sccsid[] =
      4 	"@(#)apprentice.c	1.13 09/07/11 joerg";
      5 #endif
      6 /*
      7 **	find file types by using a modified "magic" file
      8 **
      9 **	based on file v3.22 by Ian F. Darwin (see below)
     10 **
     11 **	Modified for mkhybrid James Pearson 19/5/98
     12 */
     13 
     14 /*
     15  * apprentice - make one pass through /etc/magic, learning its secrets.
     16  *
     17  * Copyright (c) Ian F. Darwin, 1987.
     18  * Written by Ian F. Darwin.
     19  *
     20  * This software is not subject to any export provision of the United States
     21  * Department of Commerce, and may be exported to any country or planet.
     22  *
     23  * Redistribution and use in source and binary forms, with or without
     24  * modification, are permitted provided that the following conditions
     25  * are met:
     26  * 1. Redistributions of source code must retain the above copyright
     27  *    notice immediately at the beginning of the file, without modification,
     28  *    this list of conditions, and the following disclaimer.
     29  * 2. Redistributions in binary form must reproduce the above copyright
     30  *    notice, this list of conditions and the following disclaimer in the
     31  *    documentation and/or other materials provided with the distribution.
     32  *
     33  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     34  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     35  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     36  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
     37  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     41  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     42  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     43  * SUCH DAMAGE.
     44  */
     45 
     46 #include <stdio.h>
     47 #include <stdlib.h>
     48 #include <string.h>
     49 #include <ctype.h>
     50 #include "proto.h"
     51 #include "file.h"
     52 
     53 #ifndef	lint
     54 static const char moduleid[] =
     55 	"@(#)Id: apprentice.c,v 1.25 1997/01/15 17:23:24 christos Exp";
     56 #endif	/* lint */
     57 
     58 int	__f_nmagic = 0;		/* number of valid magic[]s 		*/
     59 #if	defined(IS_MACOS_X)
     60 /*
     61  * The MAC OS X linker does not grok "common" varaibles.
     62  * Make __f_magic a "data" variable.
     63  */
     64 struct  magic *__f_magic = 0;	/* array of magic entries		*/
     65 #else
     66 struct  magic *__f_magic;	/* array of magic entries		*/
     67 #endif
     68 
     69 #define	EATAB {while (isascii((unsigned char) *l) && \
     70 		      isspace((unsigned char) *l))  ++l;}
     71 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
     72 			tolower((unsigned char) (l)) : (l))
     73 
     74 
     75 static int getvalue	(struct magic *, char **);
     76 static int hextoint	(int);
     77 static char *apgetstr	(char *, char *, int, int *);
     78 static int parse	(char *, int *, int);
     79 static void eatsize	(char **);
     80 
     81 static int maxmagic = 0;
     82 
     83 static int apprentice_1	(char *, int);
     84 
     85 /*
     86  * init_magic - read magic file and set up mapping
     87  * based on the original apprentice()
     88  */
     89 int
     90 init_magic(
     91 char *fn			/* list of magic files */
     92 )
     93 {
     94         maxmagic = MAXMAGIS;
     95 	__f_magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
     96 	if (__f_magic == NULL)
     97 		return -1;
     98 
     99 	return(apprentice_1(fn, 0));
    100 }
    101 
    102 static int
    103 apprentice_1(
    104 char *fn,			/* name of magic file */
    105 int check			/* non-zero? checking-only run. */
    106 )
    107 {
    108 	static const char hdr[] =
    109 		"cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
    110 	FILE *f;
    111 	char line[BUFSIZ+1];
    112 	int errs = 0;
    113 	int lineno;
    114 
    115 	f = fopen(fn, "r");
    116 	if (f==NULL) {
    117 		return -1;
    118 	}
    119 
    120 	/* parse it */
    121 	if (check)	/* print silly verbose header for USG compat. */
    122 		(void) printf("%s\n", hdr);
    123 
    124 	for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
    125 		if (line[0]=='#')	/* comment, do not parse */
    126 			continue;
    127 		if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
    128 			continue;
    129 		line[strlen(line)-1] = '\0'; /* delete newline */
    130 		if (parse(line, &__f_nmagic, check) != 0)
    131 			errs = 1;
    132 	}
    133 
    134 	(void) fclose(f);
    135 	return errs;
    136 }
    137 
    138 /*
    139  * extend the sign bit if the comparison is to be signed
    140  * XXX is uint32 really a good idea XXX JS
    141  */
    142 UInt32_t
    143 signextend(struct magic *m, UInt32_t v)
    144 {
    145 	if (!(m->flag & UNSIGNED))
    146 		switch(m->type) {
    147 		/*
    148 		 * Do not remove the casts below.  They are
    149 		 * vital.  When later compared with the data,
    150 		 * the sign extension must have happened.
    151 		 */
    152 		case BYTE:
    153 			v = (char) v;
    154 			break;
    155 		case SHORT:
    156 		case BESHORT:
    157 		case LESHORT:
    158 			v = (short) v;
    159 			break;
    160 		case DATE:
    161 		case BEDATE:
    162 		case LEDATE:
    163 		case LONG:
    164 		case BELONG:
    165 		case LELONG:
    166 			v = (Int32_t) v;
    167 			break;
    168 		case STRING:
    169 			break;
    170 		default:
    171 			return -1;
    172 		}
    173 	return v;
    174 }
    175 
    176 /*
    177  * parse one line from magic file, put into magic[index++] if valid
    178  */
    179 static int
    180 parse(char *l, int *ndx, int check)
    181 {
    182 	int i = 0, nd = *ndx;
    183 	struct magic *m;
    184 	char *t, *s;
    185 
    186 #define ALLOC_INCR	20
    187 	if (nd+1 >= maxmagic){
    188 	    maxmagic += ALLOC_INCR;
    189 	    if ((__f_magic = (struct magic *) realloc(__f_magic,
    190 						  sizeof(struct magic) *
    191 						  maxmagic)) == NULL) {
    192 #ifdef	MAIN
    193 		(void) fprintf(stderr, "%s: Out of memory.\n", progname);
    194 #else
    195 		(void) fprintf(stderr, "libfile: Out of memory.\n");
    196 #endif
    197 		if (check)
    198 			return -1;
    199 		else
    200 			exit(1);
    201 	    }
    202 	    memset(&__f_magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
    203 	}
    204 	m = &__f_magic[*ndx];
    205 	m->flag = 0;
    206 	m->cont_level = 0;
    207 
    208 	while (*l == '>') {
    209 		++l;		/* step over */
    210 		m->cont_level++;
    211 	}
    212 
    213 	if (m->cont_level != 0 && *l == '(') {
    214 		++l;		/* step over */
    215 		m->flag |= INDIR;
    216 	}
    217 	if (m->cont_level != 0 && *l == '&') {
    218                 ++l;            /* step over */
    219                 m->flag |= ADD;
    220         }
    221 
    222 	/* get offset, then skip over it */
    223 	m->offset = (int) strtoul(l,&t,0);
    224 /*
    225         if (l == t)
    226 		magwarn("offset %s invalid", l);
    227 */
    228         l = t;
    229 
    230 	if (m->flag & INDIR) {
    231 		m->in.type = LONG;
    232 		m->in.offset = 0;
    233 		/*
    234 		 * read [.lbs][+-]nnnnn)
    235 		 */
    236 		if (*l == '.') {
    237 			l++;
    238 			switch (LOWCASE(*l)) {
    239 			case 'l':
    240 				m->in.type = LONG;
    241 				break;
    242 			case 'h':
    243 			case 's':
    244 				m->in.type = SHORT;
    245 				break;
    246 			case 'c':
    247 			case 'b':
    248 				m->in.type = BYTE;
    249 				break;
    250 			default:
    251 				break;
    252 			}
    253 			l++;
    254 		}
    255 		s = l;
    256 		if (*l == '+' || *l == '-') l++;
    257 		if (isdigit((unsigned char)*l)) {
    258 			m->in.offset = strtoul(l, &t, 0);
    259 			if (*s == '-') m->in.offset = - m->in.offset;
    260 		}
    261 		else
    262 			t = l;
    263 /*
    264 		if (*t++ != ')')
    265 			magwarn("missing ')' in indirect offset");
    266 */
    267 		l = t;
    268 	}
    269 
    270 
    271 	while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
    272 		++l;
    273 	EATAB;
    274 
    275 #define NBYTE		4
    276 #define NSHORT		5
    277 #define NLONG		4
    278 #define NSTRING 	6
    279 #define NDATE		4
    280 #define NBESHORT	7
    281 #define NBELONG		6
    282 #define NBEDATE		6
    283 #define NLESHORT	7
    284 #define NLELONG		6
    285 #define NLEDATE		6
    286 
    287 	if (*l == 'u') {
    288 		++l;
    289 		m->flag |= UNSIGNED;
    290 	}
    291 
    292 	/* get type, skip it */
    293 	if (strncmp(l, "byte", NBYTE)==0) {
    294 		m->type = BYTE;
    295 		l += NBYTE;
    296 	} else if (strncmp(l, "short", NSHORT)==0) {
    297 		m->type = SHORT;
    298 		l += NSHORT;
    299 	} else if (strncmp(l, "long", NLONG)==0) {
    300 		m->type = LONG;
    301 		l += NLONG;
    302 	} else if (strncmp(l, "string", NSTRING)==0) {
    303 		m->type = STRING;
    304 		l += NSTRING;
    305 	} else if (strncmp(l, "date", NDATE)==0) {
    306 		m->type = DATE;
    307 		l += NDATE;
    308 	} else if (strncmp(l, "beshort", NBESHORT)==0) {
    309 		m->type = BESHORT;
    310 		l += NBESHORT;
    311 	} else if (strncmp(l, "belong", NBELONG)==0) {
    312 		m->type = BELONG;
    313 		l += NBELONG;
    314 	} else if (strncmp(l, "bedate", NBEDATE)==0) {
    315 		m->type = BEDATE;
    316 		l += NBEDATE;
    317 	} else if (strncmp(l, "leshort", NLESHORT)==0) {
    318 		m->type = LESHORT;
    319 		l += NLESHORT;
    320 	} else if (strncmp(l, "lelong", NLELONG)==0) {
    321 		m->type = LELONG;
    322 		l += NLELONG;
    323 	} else if (strncmp(l, "ledate", NLEDATE)==0) {
    324 		m->type = LEDATE;
    325 		l += NLEDATE;
    326 	} else {
    327 		return -1;
    328 	}
    329 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
    330 	if (*l == '&') {
    331 		++l;
    332 		m->mask = signextend(m, (UInt32_t)strtoul(l, &l, 0)); /* XXX JS uint32 cat may be wrong */
    333 		eatsize(&l);
    334 	} else
    335 		m->mask = ~0L;
    336 	EATAB;
    337 
    338 	switch (*l) {
    339 	case '>':
    340 	case '<':
    341 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
    342 	case '&':
    343 	case '^':
    344 	case '=':
    345   		m->reln = *l;
    346   		++l;
    347 		break;
    348 	case '!':
    349 		if (m->type != STRING) {
    350 			m->reln = *l;
    351 			++l;
    352 			break;
    353 		}
    354 		/* FALL THROUGH */
    355 	default:
    356 		if (*l == 'x' && isascii((unsigned char)l[1]) &&
    357 		    isspace((unsigned char)l[1])) {
    358 			m->reln = *l;
    359 			++l;
    360 			goto GetDesc;	/* Bill The Cat */
    361 		}
    362   		m->reln = '=';
    363 		break;
    364 	}
    365   	EATAB;
    366 
    367 	if (getvalue(m, &l))
    368 		return -1;
    369 	/*
    370 	 * TODO finish this macro and start using it!
    371 	 * #define offsetcheck {if (offset > HOWMANY-1)
    372 	 *	magwarn("offset too big"); }
    373 	 */
    374 
    375 	/*
    376 	 * now get last part - the description
    377 	 */
    378 GetDesc:
    379 	EATAB;
    380 	if (l[0] == '\b') {
    381 		++l;
    382 		m->nospflag = 1;
    383 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
    384 		++l;
    385 		++l;
    386 		m->nospflag = 1;
    387 	} else
    388 		m->nospflag = 0;
    389 	while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
    390 		/* NULLBODY */;
    391 
    392 	++(*ndx);		/* make room for next */
    393 	return 0;
    394 }
    395 
    396 /*
    397  * Read a numeric value from a pointer, into the value union of a magic
    398  * pointer, according to the magic type.  Update the string pointer to point
    399  * just after the number read.  Return 0 for success, non-zero for failure.
    400  */
    401 static int
    402 getvalue(struct magic *m, char **p)
    403 {
    404 	int slen;
    405 
    406 	if (m->type == STRING) {
    407 		*p = apgetstr(*p, m->value.s, sizeof(m->value.s), &slen);
    408 		m->vallen = slen;
    409 	} else
    410 		if (m->reln != 'x') {
    411 			m->value.l = signextend(m, (UInt32_t)strtoul(*p, p, 0)); /* XXX JS uint32 cat may be wrong */
    412 			eatsize(p);
    413 		}
    414 	return 0;
    415 }
    416 
    417 /*
    418  * Convert a string containing C character escapes.  Stop at an unescaped
    419  * space or tab.
    420  * Copy the converted version to "p", returning its length in *slen.
    421  * Return updated scan pointer as function result.
    422  */
    423 static char *
    424 apgetstr(char *s, char *p, int plen, int *slen)
    425 {
    426 	char	*origs = s, *origp = p;
    427 	char	*pmax = p + plen - 1;
    428 	register int	c;
    429 	register int	val;
    430 
    431 	while ((c = *s++) != '\0') {
    432 		if (isspace((unsigned char) c))
    433 			break;
    434 		if (p >= pmax) {
    435 			fprintf(stderr, "String too long: %s\n", origs);
    436 			break;
    437 		}
    438 		if(c == '\\') {
    439 			switch(c = *s++) {
    440 
    441 			case '\0':
    442 				goto out;
    443 
    444 			default:
    445 				*p++ = (char) c;
    446 				break;
    447 
    448 			case 'n':
    449 				*p++ = '\n';
    450 				break;
    451 
    452 			case 'r':
    453 				*p++ = '\r';
    454 				break;
    455 
    456 			case 'b':
    457 				*p++ = '\b';
    458 				break;
    459 
    460 			case 't':
    461 				*p++ = '\t';
    462 				break;
    463 
    464 			case 'f':
    465 				*p++ = '\f';
    466 				break;
    467 
    468 			case 'v':
    469 				*p++ = '\v';
    470 				break;
    471 
    472 			/* \ and up to 3 octal digits */
    473 			case '0':
    474 			case '1':
    475 			case '2':
    476 			case '3':
    477 			case '4':
    478 			case '5':
    479 			case '6':
    480 			case '7':
    481 				val = c - '0';
    482 				c = *s++;  /* try for 2 */
    483 				if(c >= '0' && c <= '7') {
    484 					val = (val<<3) | (c - '0');
    485 					c = *s++;  /* try for 3 */
    486 					if(c >= '0' && c <= '7')
    487 						val = (val<<3) | (c-'0');
    488 					else
    489 						--s;
    490 				}
    491 				else
    492 					--s;
    493 				*p++ = (char)val;
    494 				break;
    495 
    496 			/* \x and up to 2 hex digits */
    497 			case 'x':
    498 				val = 'x';	/* Default if no digits */
    499 				c = hextoint(*s++);	/* Get next char */
    500 				if (c >= 0) {
    501 					val = c;
    502 					c = hextoint(*s++);
    503 					if (c >= 0)
    504 						val = (val << 4) + c;
    505 					else
    506 						--s;
    507 				} else
    508 					--s;
    509 				*p++ = (char)val;
    510 				break;
    511 			}
    512 		} else
    513 			*p++ = (char)c;
    514 	}
    515 out:
    516 	*p = '\0';
    517 	*slen = p - origp;
    518 	return s;
    519 }
    520 
    521 
    522 /* Single hex char to int; -1 if not a hex char. */
    523 static int
    524 hextoint(int c)
    525 {
    526 	if (!isascii((unsigned char) c))	return -1;
    527 	if (isdigit((unsigned char) c))		return c - '0';
    528 	if ((c>='a')&&(c<='f'))	return c + 10 - 'a';
    529 	if ((c>='A')&&(c<='F'))	return c + 10 - 'A';
    530 				return -1;
    531 }
    532 
    533 
    534 /*
    535  * Print a string containing C character escapes.
    536  */
    537 void
    538 showstr(FILE *fp, const char *s, int len)
    539 {
    540 	register char	c;
    541 
    542 	for (;;) {
    543 		c = *s++;
    544 		if (len == -1) {
    545 			if (c == '\0')
    546 				break;
    547 		}
    548 		else  {
    549 			if (len-- == 0)
    550 				break;
    551 		}
    552 		if(c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
    553 			(void) fputc(c, fp);
    554 		else {
    555 			(void) fputc('\\', fp);
    556 			switch (c) {
    557 
    558 			case '\n':
    559 				(void) fputc('n', fp);
    560 				break;
    561 
    562 			case '\r':
    563 				(void) fputc('r', fp);
    564 				break;
    565 
    566 			case '\b':
    567 				(void) fputc('b', fp);
    568 				break;
    569 
    570 			case '\t':
    571 				(void) fputc('t', fp);
    572 				break;
    573 
    574 			case '\f':
    575 				(void) fputc('f', fp);
    576 				break;
    577 
    578 			case '\v':
    579 				(void) fputc('v', fp);
    580 				break;
    581 
    582 			default:
    583 				(void) fprintf(fp, "%.3o", c & 0377);
    584 				break;
    585 			}
    586 		}
    587 	}
    588 }
    589 
    590 /*
    591  * eatsize(): Eat the size spec from a number [eg. 10UL]
    592  */
    593 static void
    594 eatsize(char **p)
    595 {
    596 	char *l = *p;
    597 
    598 	if (LOWCASE(*l) == 'u')
    599 		l++;
    600 
    601 	switch (LOWCASE(*l)) {
    602 	case 'l':    /* long */
    603 	case 's':    /* short */
    604 	case 'h':    /* short */
    605 	case 'b':    /* char/byte */
    606 	case 'c':    /* char/byte */
    607 		l++;
    608 		/*FALLTHROUGH*/
    609 	default:
    610 		break;
    611 	}
    612 
    613 	*p = l;
    614 }
    615