parse.c revision 1.12 1 /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 #ifndef lint
38 #if 0
39 static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93";
40 #else
41 __RCSID("$NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $");
42 #endif
43 #endif /* not lint */
44
45 #include <sys/types.h>
46 #include <sys/file.h>
47
48 #include <ctype.h>
49 #include <err.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55
56 #include "hexdump.h"
57
58 FU *endfu; /* format at end-of-data */
59
60 void
61 addfile(name)
62 char *name;
63 {
64 char *p;
65 FILE *fp;
66 int ch;
67 char buf[2048 + 1];
68
69 if ((fp = fopen(name, "r")) == NULL)
70 err(1, "fopen %s", name);
71 while (fgets(buf, sizeof(buf), fp)) {
72 if (!(p = strchr(buf, '\n'))) {
73 warnx("line too long.");
74 while ((ch = getchar()) != '\n' && ch != EOF);
75 continue;
76 }
77 *p = '\0';
78 for (p = buf; *p && isspace((unsigned char)*p); ++p);
79 if (!*p || *p == '#')
80 continue;
81 add(p);
82 }
83 (void)fclose(fp);
84 }
85
86 void
87 add(fmt)
88 const char *fmt;
89 {
90 const char *p;
91 static FS **nextfs;
92 FS *tfs;
93 FU *tfu, **nextfu;
94 const char *savep;
95
96 /* start new linked list of format units */
97 tfs = emalloc(sizeof(FS));
98 if (!fshead)
99 fshead = tfs;
100 else
101 *nextfs = tfs;
102 nextfs = &tfs->nextfs;
103 nextfu = &tfs->nextfu;
104
105 /* take the format string and break it up into format units */
106 for (p = fmt;;) {
107 /* skip leading white space */
108 for (; isspace((unsigned char)*p); ++p);
109 if (!*p)
110 break;
111
112 /* allocate a new format unit and link it in */
113 tfu = emalloc(sizeof(FU));
114 *nextfu = tfu;
115 nextfu = &tfu->nextfu;
116 tfu->reps = 1;
117
118 /* if leading digit, repetition count */
119 if (isdigit((unsigned char)*p)) {
120 for (savep = p; isdigit((unsigned char)*p); ++p);
121 if (!isspace((unsigned char)*p) && *p != '/')
122 badfmt(fmt);
123 /* may overwrite either white space or slash */
124 tfu->reps = atoi(savep);
125 tfu->flags = F_SETREP;
126 /* skip trailing white space */
127 for (++p; isspace((unsigned char)*p); ++p);
128 }
129
130 /* skip slash and trailing white space */
131 if (*p == '/')
132 while (isspace((unsigned char)*++p));
133
134 /* byte count */
135 if (isdigit((unsigned char)*p)) {
136 for (savep = p; isdigit((unsigned char)*p); ++p);
137 if (!isspace((unsigned char)*p))
138 badfmt(fmt);
139 tfu->bcnt = atoi(savep);
140 /* skip trailing white space */
141 for (++p; isspace((unsigned char)*p); ++p);
142 }
143
144 /* format */
145 if (*p != '"')
146 badfmt(fmt);
147 for (savep = ++p; *p != '"';)
148 if (*p++ == 0)
149 badfmt(fmt);
150 if (!(tfu->fmt = malloc(p - savep + 1)))
151 nomem();
152 (void) strncpy(tfu->fmt, savep, p - savep);
153 tfu->fmt[p - savep] = '\0';
154 escape(tfu->fmt);
155 p++;
156 }
157 }
158
159 static const char *spec = ".#-+ 0123456789";
160
161 int
162 size(fs)
163 FS *fs;
164 {
165 FU *fu;
166 int bcnt, cursize;
167 char *fmt;
168 int prec;
169
170 /* figure out the data block size needed for each format unit */
171 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
172 if (fu->bcnt) {
173 cursize += fu->bcnt * fu->reps;
174 continue;
175 }
176 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
177 if (*fmt != '%')
178 continue;
179 /*
180 * skip any special chars -- save precision in
181 * case it's a %s format.
182 */
183 while (strchr(spec + 1, *++fmt));
184 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
185 prec = atoi(fmt);
186 while (isdigit((unsigned char)*++fmt));
187 }
188 switch(*fmt) {
189 case 'c':
190 bcnt += 1;
191 break;
192 case 'd': case 'i': case 'o': case 'u':
193 case 'x': case 'X':
194 bcnt += 4;
195 break;
196 case 'e': case 'E': case 'f': case 'g': case 'G':
197 bcnt += 8;
198 break;
199 case 's':
200 bcnt += prec;
201 break;
202 case '_':
203 switch(*++fmt) {
204 case 'c': case 'p': case 'u':
205 bcnt += 1;
206 break;
207 }
208 }
209 }
210 cursize += bcnt * fu->reps;
211 }
212 return (cursize);
213 }
214
215 void
216 rewrite(fs)
217 FS *fs;
218 {
219 enum { NOTOKAY, USEBCNT, USEPREC } sokay;
220 PR *pr, **nextpr;
221 FU *fu;
222 char *p1, *p2;
223 char savech, *fmtp, cs[3];
224 int nconv, prec;
225
226 nextpr = NULL;
227 prec = 0;
228 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
229 /*
230 * Break each format unit into print units; each conversion
231 * character gets its own.
232 */
233 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
234 pr = emalloc(sizeof(PR));
235 if (!fu->nextpr)
236 fu->nextpr = pr;
237 else
238 *nextpr = pr;
239
240 /* Skip preceding text and up to the next % sign. */
241 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
242
243 /* Only text in the string. */
244 if (!*p1) {
245 pr->fmt = fmtp;
246 pr->flags = F_TEXT;
247 break;
248 }
249
250 /*
251 * Get precision for %s -- if have a byte count, don't
252 * need it.
253 */
254 if (fu->bcnt) {
255 sokay = USEBCNT;
256 /* Skip to conversion character. */
257 for (++p1; strchr(spec, *p1); ++p1);
258 } else {
259 /* Skip any special chars, field width. */
260 while (strchr(spec + 1, *++p1));
261 if (*p1 == '.' &&
262 isdigit((unsigned char)*++p1)) {
263 sokay = USEPREC;
264 prec = atoi(p1);
265 while (isdigit((unsigned char)*++p1))
266 continue;
267 } else
268 sokay = NOTOKAY;
269 }
270
271 p2 = p1 + 1; /* Set end pointer. */
272 cs[0] = *p1; /* Set conversion string. */
273 cs[1] = '\0';
274
275 /*
276 * Figure out the byte count for each conversion;
277 * rewrite the format as necessary, set up blank-
278 * padding for end of data.
279 */
280 switch(cs[0]) {
281 case 'c':
282 pr->flags = F_CHAR;
283 switch(fu->bcnt) {
284 case 0: case 1:
285 pr->bcnt = 1;
286 break;
287 default:
288 p1[1] = '\0';
289 badcnt(p1);
290 }
291 break;
292 case 'd': case 'i':
293 pr->flags = F_INT;
294 goto isint;
295 case 'o': case 'u': case 'x': case 'X':
296 pr->flags = F_UINT;
297 isint: cs[2] = '\0';
298 cs[1] = cs[0];
299 cs[0] = 'q';
300 switch(fu->bcnt) {
301 case 0: case 4:
302 pr->bcnt = 4;
303 break;
304 case 1:
305 pr->bcnt = 1;
306 break;
307 case 2:
308 pr->bcnt = 2;
309 break;
310 case 8:
311 pr->bcnt = 8;
312 break;
313 default:
314 p1[1] = '\0';
315 badcnt(p1);
316 }
317 break;
318 case 'e': case 'E': case 'f': case 'g': case 'G':
319 pr->flags = F_DBL;
320 switch(fu->bcnt) {
321 case 0: case 8:
322 pr->bcnt = 8;
323 break;
324 case 4:
325 pr->bcnt = 4;
326 break;
327 default:
328 p1[1] = '\0';
329 badcnt(p1);
330 }
331 break;
332 case 's':
333 pr->flags = F_STR;
334 switch(sokay) {
335 case NOTOKAY:
336 badsfmt();
337 case USEBCNT:
338 pr->bcnt = fu->bcnt;
339 break;
340 case USEPREC:
341 pr->bcnt = prec;
342 break;
343 }
344 break;
345 case '_':
346 ++p2;
347 switch(p1[1]) {
348 case 'A':
349 endfu = fu;
350 fu->flags |= F_IGNORE;
351 /* FALLTHROUGH */
352 case 'a':
353 pr->flags = F_ADDRESS;
354 ++p2;
355 switch(p1[2]) {
356 case 'd': case 'o': case'x':
357 cs[0] = 'q';
358 cs[1] = p1[2];
359 cs[2] = '\0';
360 break;
361 default:
362 p1[3] = '\0';
363 badconv(p1);
364 }
365 break;
366 case 'c':
367 pr->flags = F_C;
368 /* cs[0] = 'c'; set in conv_c */
369 goto isint2;
370 case 'p':
371 pr->flags = F_P;
372 cs[0] = 'c';
373 goto isint2;
374 case 'u':
375 pr->flags = F_U;
376 /* cs[0] = 'c'; set in conv_u */
377 isint2: switch(fu->bcnt) {
378 case 0: case 1:
379 pr->bcnt = 1;
380 break;
381 default:
382 p1[2] = '\0';
383 badcnt(p1);
384 }
385 break;
386 default:
387 p1[2] = '\0';
388 badconv(p1);
389 }
390 break;
391 default:
392 p1[1] = '\0';
393 badconv(p1);
394 }
395
396 /*
397 * Copy to PR format string, set conversion character
398 * pointer, update original.
399 */
400 savech = *p2;
401 p1[0] = '\0';
402 pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1);
403 (void)strcpy(pr->fmt, fmtp);
404 (void)strcat(pr->fmt, cs);
405 *p2 = savech;
406 pr->cchar = pr->fmt + (p1 - fmtp);
407 fmtp = p2;
408
409 /* Only one conversion character if byte count. */
410 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
411 errx(1,
412 "byte count with multiple conversion characters");
413 }
414 /*
415 * If format unit byte count not specified, figure it out
416 * so can adjust rep count later.
417 */
418 if (!fu->bcnt)
419 for (pr = fu->nextpr; pr; pr = pr->nextpr)
420 fu->bcnt += pr->bcnt;
421 }
422 /*
423 * If the format string interprets any data at all, and it's
424 * not the same as the blocksize, and its last format unit
425 * interprets any data at all, and has no iteration count,
426 * repeat it as necessary.
427 *
428 * If, rep count is greater than 1, no trailing whitespace
429 * gets output from the last iteration of the format unit.
430 */
431 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
432 if (!fu->nextfu && fs->bcnt < blocksize &&
433 !(fu->flags&F_SETREP) && fu->bcnt)
434 fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
435 if (fu->reps > 1) {
436 for (pr = fu->nextpr;; pr = pr->nextpr)
437 if (!pr->nextpr)
438 break;
439 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
440 p2 = isspace((unsigned char)*p1) ? p1 : NULL;
441 if (p2)
442 pr->nospace = p2;
443 }
444 }
445 #ifdef DEBUG
446 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
447 (void)printf("fmt:");
448 for (pr = fu->nextpr; pr; pr = pr->nextpr)
449 (void)printf(" {%s}", pr->fmt);
450 (void)printf("\n");
451 }
452 #endif
453 }
454
455 void
456 escape(p1)
457 char *p1;
458 {
459 char *p2;
460
461 /* alphabetic escape sequences have to be done in place */
462 for (p2 = p1;; ++p1, ++p2) {
463 if (!*p1) {
464 *p2 = *p1;
465 break;
466 }
467 if (*p1 == '\\')
468 switch(*++p1) {
469 case 'a':
470 /* *p2 = '\a'; */
471 *p2 = '\007';
472 break;
473 case 'b':
474 *p2 = '\b';
475 break;
476 case 'f':
477 *p2 = '\f';
478 break;
479 case 'n':
480 *p2 = '\n';
481 break;
482 case 'r':
483 *p2 = '\r';
484 break;
485 case 't':
486 *p2 = '\t';
487 break;
488 case 'v':
489 *p2 = '\v';
490 break;
491 default:
492 *p2 = *p1;
493 break;
494 }
495 }
496 }
497
498 void
499 badcnt(s)
500 char *s;
501 {
502 errx(1, "%s: bad byte count", s);
503 }
504
505 void
506 badsfmt()
507 {
508 errx(1, "%%s: requires a precision or a byte count\n");
509 }
510
511 void
512 badfmt(fmt)
513 const char *fmt;
514 {
515 errx(1, "\"%s\": bad format\n", fmt);
516 }
517
518 void
519 badconv(ch)
520 char *ch;
521 {
522 errx(1, "%%%s: bad conversion character\n", ch);
523 }
524