parse.c revision 1.11 1 /* $NetBSD: parse.c,v 1.11 2001/02/07 18:32:07 christos Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 #ifndef lint
38 #if 0
39 static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93";
40 #else
41 __RCSID("$NetBSD: parse.c,v 1.11 2001/02/07 18:32:07 christos Exp $");
42 #endif
43 #endif /* not lint */
44
45 #include <sys/types.h>
46 #include <sys/file.h>
47
48 #include <ctype.h>
49 #include <err.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55
56 #include "hexdump.h"
57
58 FU *endfu; /* format at end-of-data */
59
60 void
61 addfile(name)
62 char *name;
63 {
64 char *p;
65 FILE *fp;
66 int ch;
67 char buf[2048 + 1];
68
69 if ((fp = fopen(name, "r")) == NULL)
70 err(1, "fopen %s", name);
71 while (fgets(buf, sizeof(buf), fp)) {
72 if (!(p = strchr(buf, '\n'))) {
73 warnx("line too long.");
74 while ((ch = getchar()) != '\n' && ch != EOF);
75 continue;
76 }
77 *p = '\0';
78 for (p = buf; *p && isspace((unsigned char)*p); ++p);
79 if (!*p || *p == '#')
80 continue;
81 add(p);
82 }
83 (void)fclose(fp);
84 }
85
86 void
87 add(fmt)
88 const char *fmt;
89 {
90 const char *p;
91 static FS **nextfs;
92 FS *tfs;
93 FU *tfu, **nextfu;
94 const char *savep;
95
96 /* start new linked list of format units */
97 tfs = emalloc(sizeof(FS));
98 if (!fshead)
99 fshead = tfs;
100 else
101 *nextfs = tfs;
102 nextfs = &tfs->nextfs;
103 nextfu = &tfs->nextfu;
104
105 /* take the format string and break it up into format units */
106 for (p = fmt;;) {
107 /* skip leading white space */
108 for (; isspace((unsigned char)*p); ++p);
109 if (!*p)
110 break;
111
112 /* allocate a new format unit and link it in */
113 tfu = emalloc(sizeof(FU));
114 *nextfu = tfu;
115 nextfu = &tfu->nextfu;
116 tfu->reps = 1;
117
118 /* if leading digit, repetition count */
119 if (isdigit((unsigned char)*p)) {
120 for (savep = p; isdigit((unsigned char)*p); ++p);
121 if (!isspace((unsigned char)*p) && *p != '/')
122 badfmt(fmt);
123 /* may overwrite either white space or slash */
124 tfu->reps = atoi(savep);
125 tfu->flags = F_SETREP;
126 /* skip trailing white space */
127 for (++p; isspace((unsigned char)*p); ++p);
128 }
129
130 /* skip slash and trailing white space */
131 if (*p == '/')
132 while (isspace((unsigned char)*++p));
133
134 /* byte count */
135 if (isdigit((unsigned char)*p)) {
136 for (savep = p; isdigit((unsigned char)*p); ++p);
137 if (!isspace((unsigned char)*p))
138 badfmt(fmt);
139 tfu->bcnt = atoi(savep);
140 /* skip trailing white space */
141 for (++p; isspace((unsigned char)*p); ++p);
142 }
143
144 /* format */
145 if (*p != '"')
146 badfmt(fmt);
147 for (savep = ++p; *p != '"';)
148 if (*p++ == 0)
149 badfmt(fmt);
150 if (!(tfu->fmt = malloc(p - savep + 1)))
151 nomem();
152 (void) strncpy(tfu->fmt, savep, p - savep);
153 tfu->fmt[p - savep] = '\0';
154 escape(tfu->fmt);
155 p++;
156 }
157 }
158
159 static const char *spec = ".#-+ 0123456789";
160
161 int
162 size(fs)
163 FS *fs;
164 {
165 FU *fu;
166 int bcnt, cursize;
167 char *fmt;
168 int prec;
169
170 /* figure out the data block size needed for each format unit */
171 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
172 if (fu->bcnt) {
173 cursize += fu->bcnt * fu->reps;
174 continue;
175 }
176 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) {
177 if (*fmt != '%')
178 continue;
179 /*
180 * skip any special chars -- save precision in
181 * case it's a %s format.
182 */
183 while (strchr(spec + 1, *++fmt));
184 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) {
185 prec = atoi(fmt);
186 while (isdigit((unsigned char)*++fmt));
187 }
188 switch(*fmt) {
189 case 'c':
190 bcnt += 1;
191 break;
192 case 'd': case 'i': case 'o': case 'u':
193 case 'x': case 'X':
194 bcnt += 4;
195 break;
196 case 'e': case 'E': case 'f': case 'g': case 'G':
197 bcnt += 8;
198 break;
199 case 's':
200 bcnt += prec;
201 break;
202 case '_':
203 switch(*++fmt) {
204 case 'c': case 'p': case 'u':
205 bcnt += 1;
206 break;
207 }
208 }
209 }
210 cursize += bcnt * fu->reps;
211 }
212 return (cursize);
213 }
214
215 void
216 rewrite(fs)
217 FS *fs;
218 {
219 enum { NOTOKAY, USEBCNT, USEPREC } sokay;
220 PR *pr, **nextpr;
221 FU *fu;
222 char *p1, *p2;
223 char savech, *fmtp, cs[3];
224 int nconv, prec;
225
226 nextpr = NULL;
227 prec = 0;
228 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
229 /*
230 * Break each format unit into print units; each conversion
231 * character gets its own.
232 */
233 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) {
234 pr = emalloc(sizeof(PR));
235 if (!fu->nextpr)
236 fu->nextpr = pr;
237 else
238 *nextpr = pr;
239
240 /* Skip preceding text and up to the next % sign. */
241 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
242
243 /* Only text in the string. */
244 if (!*p1) {
245 pr->fmt = fmtp;
246 pr->flags = F_TEXT;
247 break;
248 }
249
250 /*
251 * Get precision for %s -- if have a byte count, don't
252 * need it.
253 */
254 if (fu->bcnt) {
255 sokay = USEBCNT;
256 /* Skip to conversion character. */
257 for (++p1; strchr(spec, *p1); ++p1);
258 } else {
259 /* Skip any special chars, field width. */
260 while (strchr(spec + 1, *++p1));
261 if (*p1 == '.' &&
262 isdigit((unsigned char)*++p1)) {
263 sokay = USEPREC;
264 prec = atoi(p1);
265 while (isdigit((unsigned char)*++p1))
266 continue;
267 } else
268 sokay = NOTOKAY;
269 }
270
271 p2 = p1 + 1; /* Set end pointer. */
272 cs[0] = *p1; /* Set conversion string. */
273 cs[1] = '\0';
274
275 /*
276 * Figure out the byte count for each conversion;
277 * rewrite the format as necessary, set up blank-
278 * padding for end of data.
279 */
280 switch(cs[0]) {
281 case 'c':
282 pr->flags = F_CHAR;
283 switch(fu->bcnt) {
284 case 0: case 1:
285 pr->bcnt = 1;
286 break;
287 default:
288 p1[1] = '\0';
289 badcnt(p1);
290 }
291 break;
292 case 'd': case 'i':
293 pr->flags = F_INT;
294 goto isint;
295 case 'o': case 'u': case 'x': case 'X':
296 pr->flags = F_UINT;
297 isint: cs[2] = '\0';
298 cs[1] = cs[0];
299 cs[0] = 'q';
300 switch(fu->bcnt) {
301 case 0: case 4:
302 pr->bcnt = 4;
303 break;
304 case 1:
305 pr->bcnt = 1;
306 break;
307 case 2:
308 pr->bcnt = 2;
309 break;
310 default:
311 p1[1] = '\0';
312 badcnt(p1);
313 }
314 break;
315 case 'e': case 'E': case 'f': case 'g': case 'G':
316 pr->flags = F_DBL;
317 switch(fu->bcnt) {
318 case 0: case 8:
319 pr->bcnt = 8;
320 break;
321 case 4:
322 pr->bcnt = 4;
323 break;
324 default:
325 p1[1] = '\0';
326 badcnt(p1);
327 }
328 break;
329 case 's':
330 pr->flags = F_STR;
331 switch(sokay) {
332 case NOTOKAY:
333 badsfmt();
334 case USEBCNT:
335 pr->bcnt = fu->bcnt;
336 break;
337 case USEPREC:
338 pr->bcnt = prec;
339 break;
340 }
341 break;
342 case '_':
343 ++p2;
344 switch(p1[1]) {
345 case 'A':
346 endfu = fu;
347 fu->flags |= F_IGNORE;
348 /* FALLTHROUGH */
349 case 'a':
350 pr->flags = F_ADDRESS;
351 ++p2;
352 switch(p1[2]) {
353 case 'd': case 'o': case'x':
354 cs[0] = 'q';
355 cs[1] = p1[2];
356 cs[2] = '\0';
357 break;
358 default:
359 p1[3] = '\0';
360 badconv(p1);
361 }
362 break;
363 case 'c':
364 pr->flags = F_C;
365 /* cs[0] = 'c'; set in conv_c */
366 goto isint2;
367 case 'p':
368 pr->flags = F_P;
369 cs[0] = 'c';
370 goto isint2;
371 case 'u':
372 pr->flags = F_U;
373 /* cs[0] = 'c'; set in conv_u */
374 isint2: switch(fu->bcnt) {
375 case 0: case 1:
376 pr->bcnt = 1;
377 break;
378 default:
379 p1[2] = '\0';
380 badcnt(p1);
381 }
382 break;
383 default:
384 p1[2] = '\0';
385 badconv(p1);
386 }
387 break;
388 default:
389 p1[1] = '\0';
390 badconv(p1);
391 }
392
393 /*
394 * Copy to PR format string, set conversion character
395 * pointer, update original.
396 */
397 savech = *p2;
398 p1[0] = '\0';
399 pr->fmt = emalloc(strlen(fmtp) + strlen(cs) + 1);
400 (void)strcpy(pr->fmt, fmtp);
401 (void)strcat(pr->fmt, cs);
402 *p2 = savech;
403 pr->cchar = pr->fmt + (p1 - fmtp);
404 fmtp = p2;
405
406 /* Only one conversion character if byte count. */
407 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
408 errx(1,
409 "byte count with multiple conversion characters");
410 }
411 /*
412 * If format unit byte count not specified, figure it out
413 * so can adjust rep count later.
414 */
415 if (!fu->bcnt)
416 for (pr = fu->nextpr; pr; pr = pr->nextpr)
417 fu->bcnt += pr->bcnt;
418 }
419 /*
420 * If the format string interprets any data at all, and it's
421 * not the same as the blocksize, and its last format unit
422 * interprets any data at all, and has no iteration count,
423 * repeat it as necessary.
424 *
425 * If, rep count is greater than 1, no trailing whitespace
426 * gets output from the last iteration of the format unit.
427 */
428 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
429 if (!fu->nextfu && fs->bcnt < blocksize &&
430 !(fu->flags&F_SETREP) && fu->bcnt)
431 fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
432 if (fu->reps > 1) {
433 for (pr = fu->nextpr;; pr = pr->nextpr)
434 if (!pr->nextpr)
435 break;
436 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1)
437 p2 = isspace((unsigned char)*p1) ? p1 : NULL;
438 if (p2)
439 pr->nospace = p2;
440 }
441 }
442 #ifdef DEBUG
443 for (fu = fs->nextfu; fu; fu = fu->nextfu) {
444 (void)printf("fmt:");
445 for (pr = fu->nextpr; pr; pr = pr->nextpr)
446 (void)printf(" {%s}", pr->fmt);
447 (void)printf("\n");
448 }
449 #endif
450 }
451
452 void
453 escape(p1)
454 char *p1;
455 {
456 char *p2;
457
458 /* alphabetic escape sequences have to be done in place */
459 for (p2 = p1;; ++p1, ++p2) {
460 if (!*p1) {
461 *p2 = *p1;
462 break;
463 }
464 if (*p1 == '\\')
465 switch(*++p1) {
466 case 'a':
467 /* *p2 = '\a'; */
468 *p2 = '\007';
469 break;
470 case 'b':
471 *p2 = '\b';
472 break;
473 case 'f':
474 *p2 = '\f';
475 break;
476 case 'n':
477 *p2 = '\n';
478 break;
479 case 'r':
480 *p2 = '\r';
481 break;
482 case 't':
483 *p2 = '\t';
484 break;
485 case 'v':
486 *p2 = '\v';
487 break;
488 default:
489 *p2 = *p1;
490 break;
491 }
492 }
493 }
494
495 void
496 badcnt(s)
497 char *s;
498 {
499 errx(1, "%s: bad byte count", s);
500 }
501
502 void
503 badsfmt()
504 {
505 errx(1, "%%s: requires a precision or a byte count\n");
506 }
507
508 void
509 badfmt(fmt)
510 const char *fmt;
511 {
512 errx(1, "\"%s\": bad format\n", fmt);
513 }
514
515 void
516 badconv(ch)
517 char *ch;
518 {
519 errx(1, "%%%s: bad conversion character\n", ch);
520 }
521