lexi.c revision 1.2 1 /*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980 The Regents of the University of California.
4 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #ifndef lint
37 /*static char sccsid[] = "from: @(#)lexi.c 5.16 (Berkeley) 2/26/91";*/
38 static char rcsid[] = "$Id: lexi.c,v 1.2 1993/08/01 18:14:31 mycroft Exp $";
39 #endif /* not lint */
40
41 /*
42 * Here we have the token scanner for indent. It scans off one token and puts
43 * it in the global variable "token". It returns a code, indicating the type
44 * of token scanned.
45 */
46
47 #include <stdio.h>
48 #include <ctype.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include "indent_globs.h"
52 #include "indent_codes.h"
53
54 #define alphanum 1
55 #define opchar 3
56
57 struct templ {
58 char *rwd;
59 int rwcode;
60 };
61
62 struct templ specials[100] =
63 {
64 "switch", 1,
65 "case", 2,
66 "break", 0,
67 "struct", 3,
68 "union", 3,
69 "enum", 3,
70 "default", 2,
71 "int", 4,
72 "char", 4,
73 "float", 4,
74 "double", 4,
75 "long", 4,
76 "short", 4,
77 "typdef", 4,
78 "unsigned", 4,
79 "register", 4,
80 "static", 4,
81 "global", 4,
82 "extern", 4,
83 "void", 4,
84 "goto", 0,
85 "return", 0,
86 "if", 5,
87 "while", 5,
88 "for", 5,
89 "else", 6,
90 "do", 6,
91 "sizeof", 7,
92 0, 0
93 };
94
95 char chartype[128] =
96 { /* this is used to facilitate the decision of
97 * what type (alphanumeric, operator) each
98 * character is */
99 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 3, 0, 0, 1, 3, 3, 0,
104 0, 0, 3, 3, 0, 3, 0, 3,
105 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 0, 0, 3, 3, 3, 3,
107 0, 1, 1, 1, 1, 1, 1, 1,
108 1, 1, 1, 1, 1, 1, 1, 1,
109 1, 1, 1, 1, 1, 1, 1, 1,
110 1, 1, 1, 0, 0, 0, 3, 1,
111 0, 1, 1, 1, 1, 1, 1, 1,
112 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 1, 0, 3, 0, 3, 0
115 };
116
117
118
119
120 int
121 lexi()
122 {
123 int unary_delim; /* this is set to 1 if the current token
124 *
125 * forces a following operator to be unary */
126 static int last_code; /* the last token type returned */
127 static int l_struct; /* set to 1 if the last token was 'struct' */
128 int code; /* internal code to be returned */
129 char qchar; /* the delimiter character for a string */
130
131 e_token = s_token; /* point to start of place to save token */
132 unary_delim = false;
133 ps.col_1 = ps.last_nl; /* tell world that this token started in
134 * column 1 iff the last thing scanned was nl */
135 ps.last_nl = false;
136
137 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
138 ps.col_1 = false; /* leading blanks imply token is not in column
139 * 1 */
140 if (++buf_ptr >= buf_end)
141 fill_buffer();
142 }
143
144 /* Scan an alphanumeric token */
145 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
146 /*
147 * we have a character or number
148 */
149 register char *j; /* used for searching thru list of
150 *
151 * reserved words */
152 register struct templ *p;
153
154 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
155 int seendot = 0,
156 seenexp = 0;
157 if (*buf_ptr == '0' &&
158 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
159 *e_token++ = *buf_ptr++;
160 *e_token++ = *buf_ptr++;
161 while (isxdigit(*buf_ptr)) {
162 CHECK_SIZE_TOKEN;
163 *e_token++ = *buf_ptr++;
164 }
165 }
166 else
167 while (1) {
168 if (*buf_ptr == '.')
169 if (seendot)
170 break;
171 else
172 seendot++;
173 CHECK_SIZE_TOKEN;
174 *e_token++ = *buf_ptr++;
175 if (!isdigit(*buf_ptr) && *buf_ptr != '.')
176 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
177 break;
178 else {
179 seenexp++;
180 seendot++;
181 CHECK_SIZE_TOKEN;
182 *e_token++ = *buf_ptr++;
183 if (*buf_ptr == '+' || *buf_ptr == '-')
184 *e_token++ = *buf_ptr++;
185 }
186 }
187 if (*buf_ptr == 'L' || *buf_ptr == 'l')
188 *e_token++ = *buf_ptr++;
189 }
190 else
191 while (chartype[*buf_ptr] == alphanum) { /* copy it over */
192 CHECK_SIZE_TOKEN;
193 *e_token++ = *buf_ptr++;
194 if (buf_ptr >= buf_end)
195 fill_buffer();
196 }
197 *e_token++ = '\0';
198 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
199 if (++buf_ptr >= buf_end)
200 fill_buffer();
201 }
202 ps.its_a_keyword = false;
203 ps.sizeof_keyword = false;
204 if (l_struct) { /* if last token was 'struct', then this token
205 * should be treated as a declaration */
206 l_struct = false;
207 last_code = ident;
208 ps.last_u_d = true;
209 return (decl);
210 }
211 ps.last_u_d = false; /* Operator after indentifier is binary */
212 last_code = ident; /* Remember that this is the code we will
213 * return */
214
215 /*
216 * This loop will check if the token is a keyword.
217 */
218 for (p = specials; (j = p->rwd) != 0; p++) {
219 register char *p = s_token; /* point at scanned token */
220 if (*j++ != *p++ || *j++ != *p++)
221 continue; /* This test depends on the fact that
222 * identifiers are always at least 1 character
223 * long (ie. the first two bytes of the
224 * identifier are always meaningful) */
225 if (p[-1] == 0)
226 break; /* If its a one-character identifier */
227 while (*p++ == *j)
228 if (*j++ == 0)
229 goto found_keyword; /* I wish that C had a multi-level
230 * break... */
231 }
232 if (p->rwd) { /* we have a keyword */
233 found_keyword:
234 ps.its_a_keyword = true;
235 ps.last_u_d = true;
236 switch (p->rwcode) {
237 case 1: /* it is a switch */
238 return (swstmt);
239 case 2: /* a case or default */
240 return (casestmt);
241
242 case 3: /* a "struct" */
243 if (ps.p_l_follow)
244 break; /* inside parens: cast */
245 l_struct = true;
246
247 /*
248 * Next time around, we will want to know that we have had a
249 * 'struct'
250 */
251 case 4: /* one of the declaration keywords */
252 if (ps.p_l_follow) {
253 ps.cast_mask |= 1 << ps.p_l_follow;
254 break; /* inside parens: cast */
255 }
256 last_code = decl;
257 return (decl);
258
259 case 5: /* if, while, for */
260 return (sp_paren);
261
262 case 6: /* do, else */
263 return (sp_nparen);
264
265 case 7:
266 ps.sizeof_keyword = true;
267 default: /* all others are treated like any other
268 * identifier */
269 return (ident);
270 } /* end of switch */
271 } /* end of if (found_it) */
272 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
273 register char *tp = buf_ptr;
274 while (tp < buf_end)
275 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
276 goto not_proc;
277 strncpy(ps.procname, token, sizeof ps.procname - 1);
278 ps.in_parameter_declaration = 1;
279 rparen_count = 1;
280 not_proc:;
281 }
282 /*
283 * The following hack attempts to guess whether or not the current
284 * token is in fact a declaration keyword -- one that has been
285 * typedefd
286 */
287 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
288 && !ps.p_l_follow
289 && !ps.block_init
290 && (ps.last_token == rparen || ps.last_token == semicolon ||
291 ps.last_token == decl ||
292 ps.last_token == lbrace || ps.last_token == rbrace)) {
293 ps.its_a_keyword = true;
294 ps.last_u_d = true;
295 last_code = decl;
296 return decl;
297 }
298 if (last_code == decl) /* if this is a declared variable, then
299 * following sign is unary */
300 ps.last_u_d = true; /* will make "int a -1" work */
301 last_code = ident;
302 return (ident); /* the ident is not in the list */
303 } /* end of procesing for alpanum character */
304
305 /* Scan a non-alphanumeric token */
306
307 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
308 * moved here */
309 *e_token = '\0';
310 if (++buf_ptr >= buf_end)
311 fill_buffer();
312
313 switch (*token) {
314 case '\n':
315 unary_delim = ps.last_u_d;
316 ps.last_nl = true; /* remember that we just had a newline */
317 code = (had_eof ? 0 : newline);
318
319 /*
320 * if data has been exausted, the newline is a dummy, and we should
321 * return code to stop
322 */
323 break;
324
325 case '\'': /* start of quoted character */
326 case '"': /* start of string */
327 qchar = *token;
328 if (troff) {
329 e_token[-1] = '`';
330 if (qchar == '"')
331 *e_token++ = '`';
332 e_token = chfont(&bodyf, &stringf, e_token);
333 }
334 do { /* copy the string */
335 while (1) { /* move one character or [/<char>]<char> */
336 if (*buf_ptr == '\n') {
337 printf("%d: Unterminated literal\n", line_no);
338 goto stop_lit;
339 }
340 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
341 * since CHECK_SIZE guarantees that there
342 * are at least 5 entries left */
343 *e_token = *buf_ptr++;
344 if (buf_ptr >= buf_end)
345 fill_buffer();
346 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
347 if (*buf_ptr == '\n') /* check for escaped newline */
348 ++line_no;
349 if (troff) {
350 *++e_token = BACKSLASH;
351 if (*buf_ptr == BACKSLASH)
352 *++e_token = BACKSLASH;
353 }
354 *++e_token = *buf_ptr++;
355 ++e_token; /* we must increment this again because we
356 * copied two chars */
357 if (buf_ptr >= buf_end)
358 fill_buffer();
359 }
360 else
361 break; /* we copied one character */
362 } /* end of while (1) */
363 } while (*e_token++ != qchar);
364 if (troff) {
365 e_token = chfont(&stringf, &bodyf, e_token - 1);
366 if (qchar == '"')
367 *e_token++ = '\'';
368 }
369 stop_lit:
370 code = ident;
371 break;
372
373 case ('('):
374 case ('['):
375 unary_delim = true;
376 code = lparen;
377 break;
378
379 case (')'):
380 case (']'):
381 code = rparen;
382 break;
383
384 case '#':
385 unary_delim = ps.last_u_d;
386 code = preesc;
387 break;
388
389 case '?':
390 unary_delim = true;
391 code = question;
392 break;
393
394 case (':'):
395 code = colon;
396 unary_delim = true;
397 break;
398
399 case (';'):
400 unary_delim = true;
401 code = semicolon;
402 break;
403
404 case ('{'):
405 unary_delim = true;
406
407 /*
408 * if (ps.in_or_st) ps.block_init = 1;
409 */
410 /* ? code = ps.block_init ? lparen : lbrace; */
411 code = lbrace;
412 break;
413
414 case ('}'):
415 unary_delim = true;
416 /* ? code = ps.block_init ? rparen : rbrace; */
417 code = rbrace;
418 break;
419
420 case 014: /* a form feed */
421 unary_delim = ps.last_u_d;
422 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
423 * right */
424 code = form_feed;
425 break;
426
427 case (','):
428 unary_delim = true;
429 code = comma;
430 break;
431
432 case '.':
433 unary_delim = false;
434 code = period;
435 break;
436
437 case '-':
438 case '+': /* check for -, +, --, ++ */
439 code = (ps.last_u_d ? unary_op : binary_op);
440 unary_delim = true;
441
442 if (*buf_ptr == token[0]) {
443 /* check for doubled character */
444 *e_token++ = *buf_ptr++;
445 /* buffer overflow will be checked at end of loop */
446 if (last_code == ident || last_code == rparen) {
447 code = (ps.last_u_d ? unary_op : postop);
448 /* check for following ++ or -- */
449 unary_delim = false;
450 }
451 }
452 else if (*buf_ptr == '=')
453 /* check for operator += */
454 *e_token++ = *buf_ptr++;
455 else if (*buf_ptr == '>') {
456 /* check for operator -> */
457 *e_token++ = *buf_ptr++;
458 if (!pointer_as_binop) {
459 unary_delim = false;
460 code = unary_op;
461 ps.want_blank = false;
462 }
463 }
464 break; /* buffer overflow will be checked at end of
465 * switch */
466
467 case '=':
468 if (ps.in_or_st)
469 ps.block_init = 1;
470 #ifdef undef
471 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
472 e_token[-1] = *buf_ptr++;
473 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
474 *e_token++ = *buf_ptr++;
475 *e_token++ = '='; /* Flip =+ to += */
476 *e_token = 0;
477 }
478 #else
479 if (*buf_ptr == '=') {/* == */
480 *e_token++ = '='; /* Flip =+ to += */
481 buf_ptr++;
482 *e_token = 0;
483 }
484 #endif
485 code = binary_op;
486 unary_delim = true;
487 break;
488 /* can drop thru!!! */
489
490 case '>':
491 case '<':
492 case '!': /* ops like <, <<, <=, !=, etc */
493 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
494 *e_token++ = *buf_ptr;
495 if (++buf_ptr >= buf_end)
496 fill_buffer();
497 }
498 if (*buf_ptr == '=')
499 *e_token++ = *buf_ptr++;
500 code = (ps.last_u_d ? unary_op : binary_op);
501 unary_delim = true;
502 break;
503
504 default:
505 if (token[0] == '/' && *buf_ptr == '*') {
506 /* it is start of comment */
507 *e_token++ = '*';
508
509 if (++buf_ptr >= buf_end)
510 fill_buffer();
511
512 code = comment;
513 unary_delim = ps.last_u_d;
514 break;
515 }
516 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
517 /*
518 * handle ||, &&, etc, and also things as in int *****i
519 */
520 *e_token++ = *buf_ptr;
521 if (++buf_ptr >= buf_end)
522 fill_buffer();
523 }
524 code = (ps.last_u_d ? unary_op : binary_op);
525 unary_delim = true;
526
527
528 } /* end of switch */
529 if (code != newline) {
530 l_struct = false;
531 last_code = code;
532 }
533 if (buf_ptr >= buf_end) /* check for input buffer empty */
534 fill_buffer();
535 ps.last_u_d = unary_delim;
536 *e_token = '\0'; /* null terminate the token */
537 return (code);
538 }
539
540 /*
541 * Add the given keyword to the keyword table, using val as the keyword type
542 */
543 addkey(key, val)
544 char *key;
545 {
546 register struct templ *p = specials;
547 while (p->rwd)
548 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
549 return;
550 else
551 p++;
552 if (p >= specials + sizeof specials / sizeof specials[0])
553 return; /* For now, table overflows are silently
554 * ignored */
555 p->rwd = key;
556 p->rwcode = val;
557 p[1].rwd = 0;
558 p[1].rwcode = 0;
559 return;
560 }
561