lexi.c revision 1.3 1 /* $NetBSD: lexi.c,v 1.3 1997/01/09 20:20:17 tls Exp $ */
2
3 /*
4 * Copyright (c) 1985 Sun Microsystems, Inc.
5 * Copyright (c) 1980 The Regents of the University of California.
6 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38 #ifndef lint
39 /*static char sccsid[] = "from: @(#)lexi.c 5.16 (Berkeley) 2/26/91";*/
40 static char rcsid[] = "$NetBSD: lexi.c,v 1.3 1997/01/09 20:20:17 tls Exp $";
41 #endif /* not lint */
42
43 /*
44 * Here we have the token scanner for indent. It scans off one token and puts
45 * it in the global variable "token". It returns a code, indicating the type
46 * of token scanned.
47 */
48
49 #include <stdio.h>
50 #include <ctype.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include "indent_globs.h"
54 #include "indent_codes.h"
55
56 #define alphanum 1
57 #define opchar 3
58
59 struct templ {
60 char *rwd;
61 int rwcode;
62 };
63
64 struct templ specials[100] =
65 {
66 "switch", 1,
67 "case", 2,
68 "break", 0,
69 "struct", 3,
70 "union", 3,
71 "enum", 3,
72 "default", 2,
73 "int", 4,
74 "char", 4,
75 "float", 4,
76 "double", 4,
77 "long", 4,
78 "short", 4,
79 "typdef", 4,
80 "unsigned", 4,
81 "register", 4,
82 "static", 4,
83 "global", 4,
84 "extern", 4,
85 "void", 4,
86 "goto", 0,
87 "return", 0,
88 "if", 5,
89 "while", 5,
90 "for", 5,
91 "else", 6,
92 "do", 6,
93 "sizeof", 7,
94 0, 0
95 };
96
97 char chartype[128] =
98 { /* this is used to facilitate the decision of
99 * what type (alphanumeric, operator) each
100 * character is */
101 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 3, 0, 0, 1, 3, 3, 0,
106 0, 0, 3, 3, 0, 3, 0, 3,
107 1, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 0, 0, 3, 3, 3, 3,
109 0, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 1, 1, 1, 1, 1,
111 1, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 0, 0, 0, 3, 1,
113 0, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 0, 3, 0, 3, 0
117 };
118
119
120
121
122 int
123 lexi()
124 {
125 int unary_delim; /* this is set to 1 if the current token
126 *
127 * forces a following operator to be unary */
128 static int last_code; /* the last token type returned */
129 static int l_struct; /* set to 1 if the last token was 'struct' */
130 int code; /* internal code to be returned */
131 char qchar; /* the delimiter character for a string */
132
133 e_token = s_token; /* point to start of place to save token */
134 unary_delim = false;
135 ps.col_1 = ps.last_nl; /* tell world that this token started in
136 * column 1 iff the last thing scanned was nl */
137 ps.last_nl = false;
138
139 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
140 ps.col_1 = false; /* leading blanks imply token is not in column
141 * 1 */
142 if (++buf_ptr >= buf_end)
143 fill_buffer();
144 }
145
146 /* Scan an alphanumeric token */
147 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
148 /*
149 * we have a character or number
150 */
151 register char *j; /* used for searching thru list of
152 *
153 * reserved words */
154 register struct templ *p;
155
156 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
157 int seendot = 0,
158 seenexp = 0;
159 if (*buf_ptr == '0' &&
160 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
161 *e_token++ = *buf_ptr++;
162 *e_token++ = *buf_ptr++;
163 while (isxdigit(*buf_ptr)) {
164 CHECK_SIZE_TOKEN;
165 *e_token++ = *buf_ptr++;
166 }
167 }
168 else
169 while (1) {
170 if (*buf_ptr == '.')
171 if (seendot)
172 break;
173 else
174 seendot++;
175 CHECK_SIZE_TOKEN;
176 *e_token++ = *buf_ptr++;
177 if (!isdigit(*buf_ptr) && *buf_ptr != '.')
178 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
179 break;
180 else {
181 seenexp++;
182 seendot++;
183 CHECK_SIZE_TOKEN;
184 *e_token++ = *buf_ptr++;
185 if (*buf_ptr == '+' || *buf_ptr == '-')
186 *e_token++ = *buf_ptr++;
187 }
188 }
189 if (*buf_ptr == 'L' || *buf_ptr == 'l')
190 *e_token++ = *buf_ptr++;
191 }
192 else
193 while (chartype[*buf_ptr] == alphanum) { /* copy it over */
194 CHECK_SIZE_TOKEN;
195 *e_token++ = *buf_ptr++;
196 if (buf_ptr >= buf_end)
197 fill_buffer();
198 }
199 *e_token++ = '\0';
200 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
201 if (++buf_ptr >= buf_end)
202 fill_buffer();
203 }
204 ps.its_a_keyword = false;
205 ps.sizeof_keyword = false;
206 if (l_struct) { /* if last token was 'struct', then this token
207 * should be treated as a declaration */
208 l_struct = false;
209 last_code = ident;
210 ps.last_u_d = true;
211 return (decl);
212 }
213 ps.last_u_d = false; /* Operator after indentifier is binary */
214 last_code = ident; /* Remember that this is the code we will
215 * return */
216
217 /*
218 * This loop will check if the token is a keyword.
219 */
220 for (p = specials; (j = p->rwd) != 0; p++) {
221 register char *p = s_token; /* point at scanned token */
222 if (*j++ != *p++ || *j++ != *p++)
223 continue; /* This test depends on the fact that
224 * identifiers are always at least 1 character
225 * long (ie. the first two bytes of the
226 * identifier are always meaningful) */
227 if (p[-1] == 0)
228 break; /* If its a one-character identifier */
229 while (*p++ == *j)
230 if (*j++ == 0)
231 goto found_keyword; /* I wish that C had a multi-level
232 * break... */
233 }
234 if (p->rwd) { /* we have a keyword */
235 found_keyword:
236 ps.its_a_keyword = true;
237 ps.last_u_d = true;
238 switch (p->rwcode) {
239 case 1: /* it is a switch */
240 return (swstmt);
241 case 2: /* a case or default */
242 return (casestmt);
243
244 case 3: /* a "struct" */
245 if (ps.p_l_follow)
246 break; /* inside parens: cast */
247 l_struct = true;
248
249 /*
250 * Next time around, we will want to know that we have had a
251 * 'struct'
252 */
253 case 4: /* one of the declaration keywords */
254 if (ps.p_l_follow) {
255 ps.cast_mask |= 1 << ps.p_l_follow;
256 break; /* inside parens: cast */
257 }
258 last_code = decl;
259 return (decl);
260
261 case 5: /* if, while, for */
262 return (sp_paren);
263
264 case 6: /* do, else */
265 return (sp_nparen);
266
267 case 7:
268 ps.sizeof_keyword = true;
269 default: /* all others are treated like any other
270 * identifier */
271 return (ident);
272 } /* end of switch */
273 } /* end of if (found_it) */
274 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
275 register char *tp = buf_ptr;
276 while (tp < buf_end)
277 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
278 goto not_proc;
279 strncpy(ps.procname, token, sizeof ps.procname - 1);
280 ps.in_parameter_declaration = 1;
281 rparen_count = 1;
282 not_proc:;
283 }
284 /*
285 * The following hack attempts to guess whether or not the current
286 * token is in fact a declaration keyword -- one that has been
287 * typedefd
288 */
289 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
290 && !ps.p_l_follow
291 && !ps.block_init
292 && (ps.last_token == rparen || ps.last_token == semicolon ||
293 ps.last_token == decl ||
294 ps.last_token == lbrace || ps.last_token == rbrace)) {
295 ps.its_a_keyword = true;
296 ps.last_u_d = true;
297 last_code = decl;
298 return decl;
299 }
300 if (last_code == decl) /* if this is a declared variable, then
301 * following sign is unary */
302 ps.last_u_d = true; /* will make "int a -1" work */
303 last_code = ident;
304 return (ident); /* the ident is not in the list */
305 } /* end of procesing for alpanum character */
306
307 /* Scan a non-alphanumeric token */
308
309 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
310 * moved here */
311 *e_token = '\0';
312 if (++buf_ptr >= buf_end)
313 fill_buffer();
314
315 switch (*token) {
316 case '\n':
317 unary_delim = ps.last_u_d;
318 ps.last_nl = true; /* remember that we just had a newline */
319 code = (had_eof ? 0 : newline);
320
321 /*
322 * if data has been exausted, the newline is a dummy, and we should
323 * return code to stop
324 */
325 break;
326
327 case '\'': /* start of quoted character */
328 case '"': /* start of string */
329 qchar = *token;
330 if (troff) {
331 e_token[-1] = '`';
332 if (qchar == '"')
333 *e_token++ = '`';
334 e_token = chfont(&bodyf, &stringf, e_token);
335 }
336 do { /* copy the string */
337 while (1) { /* move one character or [/<char>]<char> */
338 if (*buf_ptr == '\n') {
339 printf("%d: Unterminated literal\n", line_no);
340 goto stop_lit;
341 }
342 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
343 * since CHECK_SIZE guarantees that there
344 * are at least 5 entries left */
345 *e_token = *buf_ptr++;
346 if (buf_ptr >= buf_end)
347 fill_buffer();
348 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
349 if (*buf_ptr == '\n') /* check for escaped newline */
350 ++line_no;
351 if (troff) {
352 *++e_token = BACKSLASH;
353 if (*buf_ptr == BACKSLASH)
354 *++e_token = BACKSLASH;
355 }
356 *++e_token = *buf_ptr++;
357 ++e_token; /* we must increment this again because we
358 * copied two chars */
359 if (buf_ptr >= buf_end)
360 fill_buffer();
361 }
362 else
363 break; /* we copied one character */
364 } /* end of while (1) */
365 } while (*e_token++ != qchar);
366 if (troff) {
367 e_token = chfont(&stringf, &bodyf, e_token - 1);
368 if (qchar == '"')
369 *e_token++ = '\'';
370 }
371 stop_lit:
372 code = ident;
373 break;
374
375 case ('('):
376 case ('['):
377 unary_delim = true;
378 code = lparen;
379 break;
380
381 case (')'):
382 case (']'):
383 code = rparen;
384 break;
385
386 case '#':
387 unary_delim = ps.last_u_d;
388 code = preesc;
389 break;
390
391 case '?':
392 unary_delim = true;
393 code = question;
394 break;
395
396 case (':'):
397 code = colon;
398 unary_delim = true;
399 break;
400
401 case (';'):
402 unary_delim = true;
403 code = semicolon;
404 break;
405
406 case ('{'):
407 unary_delim = true;
408
409 /*
410 * if (ps.in_or_st) ps.block_init = 1;
411 */
412 /* ? code = ps.block_init ? lparen : lbrace; */
413 code = lbrace;
414 break;
415
416 case ('}'):
417 unary_delim = true;
418 /* ? code = ps.block_init ? rparen : rbrace; */
419 code = rbrace;
420 break;
421
422 case 014: /* a form feed */
423 unary_delim = ps.last_u_d;
424 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
425 * right */
426 code = form_feed;
427 break;
428
429 case (','):
430 unary_delim = true;
431 code = comma;
432 break;
433
434 case '.':
435 unary_delim = false;
436 code = period;
437 break;
438
439 case '-':
440 case '+': /* check for -, +, --, ++ */
441 code = (ps.last_u_d ? unary_op : binary_op);
442 unary_delim = true;
443
444 if (*buf_ptr == token[0]) {
445 /* check for doubled character */
446 *e_token++ = *buf_ptr++;
447 /* buffer overflow will be checked at end of loop */
448 if (last_code == ident || last_code == rparen) {
449 code = (ps.last_u_d ? unary_op : postop);
450 /* check for following ++ or -- */
451 unary_delim = false;
452 }
453 }
454 else if (*buf_ptr == '=')
455 /* check for operator += */
456 *e_token++ = *buf_ptr++;
457 else if (*buf_ptr == '>') {
458 /* check for operator -> */
459 *e_token++ = *buf_ptr++;
460 if (!pointer_as_binop) {
461 unary_delim = false;
462 code = unary_op;
463 ps.want_blank = false;
464 }
465 }
466 break; /* buffer overflow will be checked at end of
467 * switch */
468
469 case '=':
470 if (ps.in_or_st)
471 ps.block_init = 1;
472 #ifdef undef
473 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
474 e_token[-1] = *buf_ptr++;
475 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
476 *e_token++ = *buf_ptr++;
477 *e_token++ = '='; /* Flip =+ to += */
478 *e_token = 0;
479 }
480 #else
481 if (*buf_ptr == '=') {/* == */
482 *e_token++ = '='; /* Flip =+ to += */
483 buf_ptr++;
484 *e_token = 0;
485 }
486 #endif
487 code = binary_op;
488 unary_delim = true;
489 break;
490 /* can drop thru!!! */
491
492 case '>':
493 case '<':
494 case '!': /* ops like <, <<, <=, !=, etc */
495 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
496 *e_token++ = *buf_ptr;
497 if (++buf_ptr >= buf_end)
498 fill_buffer();
499 }
500 if (*buf_ptr == '=')
501 *e_token++ = *buf_ptr++;
502 code = (ps.last_u_d ? unary_op : binary_op);
503 unary_delim = true;
504 break;
505
506 default:
507 if (token[0] == '/' && *buf_ptr == '*') {
508 /* it is start of comment */
509 *e_token++ = '*';
510
511 if (++buf_ptr >= buf_end)
512 fill_buffer();
513
514 code = comment;
515 unary_delim = ps.last_u_d;
516 break;
517 }
518 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
519 /*
520 * handle ||, &&, etc, and also things as in int *****i
521 */
522 *e_token++ = *buf_ptr;
523 if (++buf_ptr >= buf_end)
524 fill_buffer();
525 }
526 code = (ps.last_u_d ? unary_op : binary_op);
527 unary_delim = true;
528
529
530 } /* end of switch */
531 if (code != newline) {
532 l_struct = false;
533 last_code = code;
534 }
535 if (buf_ptr >= buf_end) /* check for input buffer empty */
536 fill_buffer();
537 ps.last_u_d = unary_delim;
538 *e_token = '\0'; /* null terminate the token */
539 return (code);
540 }
541
542 /*
543 * Add the given keyword to the keyword table, using val as the keyword type
544 */
545 addkey(key, val)
546 char *key;
547 {
548 register struct templ *p = specials;
549 while (p->rwd)
550 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
551 return;
552 else
553 p++;
554 if (p >= specials + sizeof specials / sizeof specials[0])
555 return; /* For now, table overflows are silently
556 * ignored */
557 p->rwd = key;
558 p->rwcode = val;
559 p[1].rwd = 0;
560 p[1].rwcode = 0;
561 return;
562 }
563