Home | History | Annotate | Line # | Download | only in indent
lexi.c revision 1.106
      1  1.106  rillig /*	$NetBSD: lexi.c,v 1.106 2021/10/28 21:56:26 rillig Exp $	*/
      2    1.3     tls 
      3   1.16   kamil /*-
      4   1.16   kamil  * SPDX-License-Identifier: BSD-4-Clause
      5   1.16   kamil  *
      6   1.16   kamil  * Copyright (c) 1985 Sun Microsystems, Inc.
      7    1.5     mrg  * Copyright (c) 1980, 1993
      8    1.5     mrg  *	The Regents of the University of California.  All rights reserved.
      9    1.1     cgd  * All rights reserved.
     10    1.1     cgd  *
     11    1.1     cgd  * Redistribution and use in source and binary forms, with or without
     12    1.1     cgd  * modification, are permitted provided that the following conditions
     13    1.1     cgd  * are met:
     14    1.1     cgd  * 1. Redistributions of source code must retain the above copyright
     15    1.1     cgd  *    notice, this list of conditions and the following disclaimer.
     16    1.1     cgd  * 2. Redistributions in binary form must reproduce the above copyright
     17    1.1     cgd  *    notice, this list of conditions and the following disclaimer in the
     18    1.1     cgd  *    documentation and/or other materials provided with the distribution.
     19    1.1     cgd  * 3. All advertising materials mentioning features or use of this software
     20    1.1     cgd  *    must display the following acknowledgement:
     21    1.1     cgd  *	This product includes software developed by the University of
     22    1.1     cgd  *	California, Berkeley and its contributors.
     23    1.1     cgd  * 4. Neither the name of the University nor the names of its contributors
     24    1.1     cgd  *    may be used to endorse or promote products derived from this software
     25    1.1     cgd  *    without specific prior written permission.
     26    1.1     cgd  *
     27    1.1     cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28    1.1     cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29    1.1     cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30    1.1     cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31    1.1     cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32    1.1     cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33    1.1     cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34    1.1     cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35    1.1     cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36    1.1     cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37    1.1     cgd  * SUCH DAMAGE.
     38    1.1     cgd  */
     39    1.1     cgd 
     40   1.16   kamil #if 0
     41   1.16   kamil static char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
     42   1.16   kamil #endif
     43   1.16   kamil 
     44    1.6   lukem #include <sys/cdefs.h>
     45   1.16   kamil #if defined(__NetBSD__)
     46  1.106  rillig __RCSID("$NetBSD: lexi.c,v 1.106 2021/10/28 21:56:26 rillig Exp $");
     47   1.16   kamil #elif defined(__FreeBSD__)
     48   1.16   kamil __FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $");
     49   1.16   kamil #endif
     50    1.1     cgd 
     51   1.93  rillig #include <sys/param.h>
     52   1.20  rillig #include <assert.h>
     53    1.1     cgd #include <stdio.h>
     54    1.1     cgd #include <ctype.h>
     55    1.1     cgd #include <stdlib.h>
     56    1.1     cgd #include <string.h>
     57   1.16   kamil 
     58   1.16   kamil #include "indent.h"
     59    1.1     cgd 
     60   1.60  rillig /* must be sorted alphabetically, is used in binary search */
     61   1.62  rillig static const struct keyword {
     62   1.62  rillig     const char *name;
     63   1.62  rillig     enum keyword_kind kind;
     64   1.62  rillig } keywords[] = {
     65   1.62  rillig     {"_Bool", kw_type},
     66   1.62  rillig     {"_Complex", kw_type},
     67   1.62  rillig     {"_Imaginary", kw_type},
     68   1.62  rillig     {"auto", kw_storage_class},
     69   1.62  rillig     {"bool", kw_type},
     70   1.62  rillig     {"break", kw_jump},
     71   1.62  rillig     {"case", kw_case_or_default},
     72   1.62  rillig     {"char", kw_type},
     73   1.62  rillig     {"complex", kw_type},
     74   1.62  rillig     {"const", kw_type},
     75   1.62  rillig     {"continue", kw_jump},
     76   1.62  rillig     {"default", kw_case_or_default},
     77   1.97  rillig     {"do", kw_do},
     78   1.62  rillig     {"double", kw_type},
     79   1.97  rillig     {"else", kw_else},
     80   1.62  rillig     {"enum", kw_struct_or_union_or_enum},
     81   1.62  rillig     {"extern", kw_storage_class},
     82   1.62  rillig     {"float", kw_type},
     83   1.98  rillig     {"for", kw_for},
     84   1.62  rillig     {"goto", kw_jump},
     85   1.98  rillig     {"if", kw_if},
     86   1.62  rillig     {"imaginary", kw_type},
     87   1.62  rillig     {"inline", kw_inline_or_restrict},
     88   1.62  rillig     {"int", kw_type},
     89   1.62  rillig     {"long", kw_type},
     90   1.62  rillig     {"offsetof", kw_offsetof},
     91   1.62  rillig     {"register", kw_storage_class},
     92   1.62  rillig     {"restrict", kw_inline_or_restrict},
     93   1.62  rillig     {"return", kw_jump},
     94   1.62  rillig     {"short", kw_type},
     95   1.62  rillig     {"signed", kw_type},
     96   1.62  rillig     {"sizeof", kw_sizeof},
     97   1.62  rillig     {"static", kw_storage_class},
     98   1.62  rillig     {"struct", kw_struct_or_union_or_enum},
     99   1.62  rillig     {"switch", kw_switch},
    100   1.62  rillig     {"typedef", kw_typedef},
    101   1.62  rillig     {"union", kw_struct_or_union_or_enum},
    102   1.62  rillig     {"unsigned", kw_type},
    103   1.62  rillig     {"void", kw_type},
    104   1.62  rillig     {"volatile", kw_type},
    105   1.98  rillig     {"while", kw_while}
    106    1.1     cgd };
    107    1.1     cgd 
    108   1.84  rillig static struct {
    109   1.64  rillig     const char **items;
    110   1.64  rillig     unsigned int len;
    111   1.64  rillig     unsigned int cap;
    112   1.64  rillig } typenames;
    113   1.16   kamil 
    114   1.16   kamil /*
    115   1.16   kamil  * The transition table below was rewritten by hand from lx's output, given
    116   1.16   kamil  * the following definitions. lx is Katherine Flavel's lexer generator.
    117   1.16   kamil  *
    118   1.16   kamil  * O  = /[0-7]/;        D  = /[0-9]/;          NZ = /[1-9]/;
    119   1.16   kamil  * H  = /[a-f0-9]/i;    B  = /[0-1]/;          HP = /0x/i;
    120   1.16   kamil  * BP = /0b/i;          E  = /e[+\-]?/i D+;    P  = /p[+\-]?/i D+;
    121   1.16   kamil  * FS = /[fl]/i;        IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
    122   1.16   kamil  *
    123   1.16   kamil  * D+           E  FS? -> $float;
    124   1.16   kamil  * D*    "." D+ E? FS? -> $float;
    125   1.16   kamil  * D+    "."    E? FS? -> $float;    HP H+           IS? -> $int;
    126   1.16   kamil  * HP H+        P  FS? -> $float;    NZ D*           IS? -> $int;
    127   1.16   kamil  * HP H* "." H+ P  FS? -> $float;    "0" O*          IS? -> $int;
    128   1.16   kamil  * HP H+ "."    P  FS  -> $float;    BP B+           IS? -> $int;
    129   1.16   kamil  */
    130   1.71  rillig /* INDENT OFF */
    131   1.82  rillig static const unsigned char lex_number_state[][26] = {
    132   1.16   kamil     /*                examples:
    133   1.16   kamil                                      00
    134   1.16   kamil              s                      0xx
    135   1.16   kamil              t                    00xaa
    136   1.16   kamil              a     11       101100xxa..
    137   1.16   kamil              r   11ee0001101lbuuxx.a.pp
    138   1.16   kamil              t.01.e+008bLuxll0Ll.aa.p+0
    139   1.16   kamil     states:  ABCDEFGHIJKLMNOPQRSTUVWXYZ */
    140   1.83  rillig     [0] =   "uuiifuufiuuiiuiiiiiuiuuuuu",	/* (other) */
    141   1.83  rillig     [1] =   "CEIDEHHHIJQ  U  Q  VUVVZZZ",	/* 0 */
    142   1.83  rillig     [2] =   "DEIDEHHHIJQ  U  Q  VUVVZZZ",	/* 1 */
    143   1.83  rillig     [3] =   "DEIDEHHHIJ   U     VUVVZZZ",	/* 2 3 4 5 6 7 */
    144   1.83  rillig     [4] =   "DEJDEHHHJJ   U     VUVVZZZ",	/* 8 9 */
    145   1.83  rillig     [5] =   "             U     VUVV   ",	/* A a C c D d */
    146   1.83  rillig     [6] =   "  K          U     VUVV   ",	/* B b */
    147   1.83  rillig     [7] =   "  FFF   FF   U     VUVV   ",	/* E e */
    148   1.83  rillig     [8] =   "    f  f     U     VUVV  f",	/* F f */
    149   1.83  rillig     [9] =   "  LLf  fL  PR   Li  L    f",	/* L */
    150   1.83  rillig     [10] =  "  OOf  fO   S P O i O    f",	/* l */
    151   1.83  rillig     [11] =  "                    FFX   ",	/* P p */
    152   1.83  rillig     [12] =  "  MM    M  i  iiM   M     ",	/* U u */
    153   1.83  rillig     [13] =  "  N                       ",	/* X x */
    154   1.83  rillig     [14] =  "     G                 Y  ",	/* + - */
    155   1.83  rillig     [15] =  "B EE    EE   T      W     ",	/* . */
    156   1.16   kamil     /*       ABCDEFGHIJKLMNOPQRSTUVWXYZ */
    157    1.1     cgd };
    158   1.71  rillig /* INDENT ON */
    159    1.1     cgd 
    160   1.82  rillig static const uint8_t lex_number_row[] = {
    161   1.56  rillig     ['0'] = 1,
    162   1.56  rillig     ['1'] = 2,
    163   1.56  rillig     ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3,
    164   1.56  rillig     ['8'] = 4, ['9'] = 4,
    165   1.56  rillig     ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5,
    166   1.56  rillig     ['B'] = 6, ['b'] = 6,
    167   1.56  rillig     ['E'] = 7, ['e'] = 7,
    168   1.56  rillig     ['F'] = 8, ['f'] = 8,
    169   1.56  rillig     ['L'] = 9,
    170   1.56  rillig     ['l'] = 10,
    171   1.56  rillig     ['P'] = 11, ['p'] = 11,
    172   1.56  rillig     ['U'] = 12, ['u'] = 12,
    173   1.56  rillig     ['X'] = 13, ['x'] = 13,
    174   1.56  rillig     ['+'] = 14, ['-'] = 14,
    175   1.56  rillig     ['.'] = 15,
    176   1.56  rillig };
    177   1.36  rillig 
    178   1.32  rillig static char
    179   1.32  rillig inbuf_peek(void)
    180   1.32  rillig {
    181   1.78  rillig     return *inp.s;
    182   1.32  rillig }
    183   1.32  rillig 
    184   1.66  rillig void
    185   1.32  rillig inbuf_skip(void)
    186   1.32  rillig {
    187   1.78  rillig     inp.s++;
    188   1.78  rillig     if (inp.s >= inp.e)
    189   1.81  rillig 	inbuf_read_line();
    190   1.32  rillig }
    191   1.32  rillig 
    192   1.66  rillig char
    193   1.32  rillig inbuf_next(void)
    194   1.32  rillig {
    195   1.32  rillig     char ch = inbuf_peek();
    196   1.32  rillig     inbuf_skip();
    197   1.32  rillig     return ch;
    198   1.32  rillig }
    199   1.32  rillig 
    200   1.25  rillig static void
    201   1.25  rillig check_size_token(size_t desired_size)
    202   1.25  rillig {
    203   1.58  rillig     if (token.e + desired_size >= token.l)
    204   1.58  rillig 	buf_expand(&token, desired_size);
    205   1.25  rillig }
    206   1.25  rillig 
    207   1.87  rillig static void
    208   1.87  rillig token_add_char(char ch)
    209   1.87  rillig {
    210   1.87  rillig     check_size_token(1);
    211   1.87  rillig     *token.e++ = ch;
    212   1.87  rillig }
    213   1.87  rillig 
    214   1.16   kamil static int
    215   1.62  rillig cmp_keyword_by_name(const void *key, const void *elem)
    216   1.16   kamil {
    217   1.62  rillig     return strcmp(key, ((const struct keyword *)elem)->name);
    218   1.27  rillig }
    219   1.27  rillig 
    220   1.20  rillig #ifdef debug
    221  1.100  rillig static const char *
    222  1.100  rillig lsym_name(lexer_symbol sym)
    223   1.20  rillig {
    224   1.20  rillig     static const char *const name[] = {
    225  1.100  rillig 	"eof",
    226  1.100  rillig 	"preprocessing",
    227  1.100  rillig 	"newline",
    228  1.100  rillig 	"form_feed",
    229  1.100  rillig 	"comment",
    230  1.100  rillig 	"lparen_or_lbracket",
    231  1.100  rillig 	"rparen_or_rbracket",
    232  1.100  rillig 	"lbrace",
    233  1.100  rillig 	"rbrace",
    234  1.100  rillig 	"period",
    235  1.100  rillig 	"unary_op",
    236  1.100  rillig 	"binary_op",
    237  1.100  rillig 	"postfix_op",
    238  1.100  rillig 	"question",
    239  1.100  rillig 	"colon",
    240  1.100  rillig 	"comma",
    241  1.100  rillig 	"semicolon",
    242  1.100  rillig 	"typedef",
    243  1.100  rillig 	"storage_class",
    244  1.100  rillig 	"type",
    245  1.100  rillig 	"tag",
    246  1.100  rillig 	"case_label",
    247  1.100  rillig 	"string_prefix",
    248  1.100  rillig 	"ident",
    249  1.100  rillig 	"funcname",
    250  1.100  rillig 	"do",
    251  1.100  rillig 	"else",
    252  1.100  rillig 	"for",
    253  1.100  rillig 	"if",
    254  1.100  rillig 	"switch",
    255  1.100  rillig 	"while",
    256   1.20  rillig     };
    257   1.20  rillig 
    258  1.100  rillig     assert(array_length(name) == (int)lsym_while + 1);
    259   1.20  rillig 
    260  1.100  rillig     return name[sym];
    261   1.20  rillig }
    262   1.20  rillig 
    263  1.101  rillig static const char *
    264  1.103  rillig kw_name(enum keyword_kind kw)
    265  1.103  rillig {
    266  1.101  rillig     static const char *name[] = {
    267  1.101  rillig 	"0",
    268  1.101  rillig 	"offsetof",
    269  1.101  rillig 	"sizeof",
    270  1.101  rillig 	"struct_or_union_or_enum",
    271  1.101  rillig 	"type",
    272  1.101  rillig 	"for",
    273  1.101  rillig 	"if",
    274  1.101  rillig 	"while",
    275  1.101  rillig 	"do",
    276  1.101  rillig 	"else",
    277  1.101  rillig 	"switch",
    278  1.101  rillig 	"case_or_default",
    279  1.101  rillig 	"jump",
    280  1.101  rillig 	"storage_class",
    281  1.101  rillig 	"typedef",
    282  1.101  rillig 	"inline_or_restrict",
    283  1.101  rillig     };
    284  1.101  rillig 
    285  1.101  rillig     return name[kw];
    286  1.101  rillig }
    287  1.101  rillig 
    288   1.20  rillig static void
    289   1.72  rillig debug_print_buf(const char *name, const struct buffer *buf)
    290   1.20  rillig {
    291   1.72  rillig     if (buf->s < buf->e) {
    292  1.101  rillig 	debug_printf("%s ", name);
    293  1.101  rillig 	debug_vis_range("\"", buf->s, buf->e, "\"\n");
    294   1.20  rillig     }
    295   1.20  rillig }
    296   1.20  rillig 
    297  1.101  rillig static void
    298  1.104  rillig debug_lexi(const struct parser_state *state, lexer_symbol lsym)
    299   1.20  rillig {
    300  1.104  rillig     debug_println("");
    301  1.101  rillig     debug_printf("line %d\n", line_no);
    302  1.104  rillig     if (state != &ps)
    303  1.104  rillig 	debug_println("transient state");
    304   1.72  rillig     debug_print_buf("label", &lab);
    305   1.72  rillig     debug_print_buf("code", &code);
    306   1.72  rillig     debug_print_buf("comment", &com);
    307  1.101  rillig     debug_printf("lexi returns '%s'", lsym_name(lsym));
    308  1.105  rillig     if (state->curr_keyword != kw_0)
    309  1.105  rillig 	debug_printf(" keyword '%s'", kw_name(state->curr_keyword));
    310  1.105  rillig     if (state->prev_keyword != kw_0)
    311  1.105  rillig 	debug_printf(" previous keyword '%s'", kw_name(state->prev_keyword));
    312  1.104  rillig     debug_println("");
    313  1.101  rillig     debug_print_buf("token", &token);
    314  1.101  rillig }
    315   1.96  rillig #endif
    316   1.20  rillig 
    317  1.104  rillig /* ARGSUSED */
    318  1.101  rillig static lexer_symbol
    319  1.104  rillig lexi_end(const struct parser_state *state, lexer_symbol lsym)
    320  1.101  rillig {
    321  1.101  rillig #ifdef debug
    322  1.104  rillig     debug_lexi(state, lsym);
    323  1.101  rillig #endif
    324  1.100  rillig     return lsym;
    325   1.20  rillig }
    326   1.20  rillig 
    327   1.43  rillig static void
    328   1.43  rillig lex_number(void)
    329   1.43  rillig {
    330   1.71  rillig     for (uint8_t s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
    331   1.78  rillig 	uint8_t ch = (uint8_t)*inp.s;
    332   1.94  rillig 	if (ch >= array_length(lex_number_row) || lex_number_row[ch] == 0)
    333   1.56  rillig 	    break;
    334   1.75  rillig 
    335   1.82  rillig 	uint8_t row = lex_number_row[ch];
    336   1.82  rillig 	if (lex_number_state[row][s - 'A'] == ' ') {
    337   1.71  rillig 	    /*-
    338   1.82  rillig 	     * lex_number_state[0][s - 'A'] now indicates the type:
    339   1.74  rillig 	     * f = floating, i = integer, u = unknown
    340   1.56  rillig 	     */
    341   1.43  rillig 	    break;
    342   1.43  rillig 	}
    343   1.75  rillig 
    344   1.82  rillig 	s = lex_number_state[row][s - 'A'];
    345   1.87  rillig 	token_add_char(inbuf_next());
    346   1.43  rillig     }
    347   1.43  rillig }
    348   1.43  rillig 
    349   1.43  rillig static void
    350   1.43  rillig lex_word(void)
    351   1.43  rillig {
    352   1.78  rillig     while (isalnum((unsigned char)*inp.s) ||
    353   1.95  rillig 	    *inp.s == '\\' ||
    354   1.95  rillig 	    *inp.s == '_' || *inp.s == '$') {
    355   1.75  rillig 
    356   1.78  rillig 	if (*inp.s == '\\') {
    357   1.78  rillig 	    if (inp.s[1] == '\n') {
    358   1.78  rillig 		inp.s += 2;
    359   1.78  rillig 		if (inp.s >= inp.e)
    360   1.81  rillig 		    inbuf_read_line();
    361   1.43  rillig 	    } else
    362   1.43  rillig 		break;
    363   1.43  rillig 	}
    364   1.75  rillig 
    365   1.87  rillig 	token_add_char(inbuf_next());
    366   1.43  rillig     }
    367   1.43  rillig }
    368   1.43  rillig 
    369   1.43  rillig static void
    370   1.43  rillig lex_char_or_string(void)
    371   1.43  rillig {
    372   1.52  rillig     for (char delim = *token.s;;) {
    373   1.78  rillig 	if (*inp.s == '\n') {
    374   1.52  rillig 	    diag(1, "Unterminated literal");
    375   1.52  rillig 	    return;
    376   1.52  rillig 	}
    377   1.75  rillig 
    378   1.87  rillig 	token_add_char(inbuf_next());
    379   1.52  rillig 	if (token.e[-1] == delim)
    380   1.52  rillig 	    return;
    381   1.75  rillig 
    382   1.52  rillig 	if (token.e[-1] == '\\') {
    383   1.78  rillig 	    if (*inp.s == '\n')
    384   1.52  rillig 		++line_no;
    385   1.87  rillig 	    token_add_char(inbuf_next());
    386   1.52  rillig 	}
    387   1.52  rillig     }
    388   1.43  rillig }
    389   1.43  rillig 
    390   1.84  rillig /* Guess whether the current token is a declared type. */
    391   1.57  rillig static bool
    392   1.84  rillig probably_typename(const struct parser_state *state)
    393   1.57  rillig {
    394   1.70  rillig     if (state->p_l_follow != 0)
    395   1.70  rillig 	return false;
    396   1.70  rillig     if (state->block_init || state->in_stmt)
    397   1.70  rillig 	return false;
    398   1.78  rillig     if (inp.s[0] == '*' && inp.s[1] != '=')
    399   1.70  rillig 	goto maybe;
    400   1.78  rillig     if (isalpha((unsigned char)*inp.s))
    401   1.70  rillig 	goto maybe;
    402   1.70  rillig     return false;
    403   1.70  rillig maybe:
    404  1.100  rillig     return state->last_token == lsym_semicolon ||
    405  1.100  rillig 	state->last_token == lsym_lbrace ||
    406  1.100  rillig 	state->last_token == lsym_rbrace;
    407   1.57  rillig }
    408   1.57  rillig 
    409   1.84  rillig static int
    410   1.84  rillig bsearch_typenames(const char *key)
    411   1.84  rillig {
    412   1.84  rillig     const char **arr = typenames.items;
    413   1.84  rillig     int lo = 0;
    414   1.84  rillig     int hi = (int)typenames.len - 1;
    415   1.84  rillig 
    416   1.84  rillig     while (lo <= hi) {
    417   1.84  rillig 	int mid = (int)((unsigned)(lo + hi) >> 1);
    418   1.84  rillig 	int cmp = strcmp(arr[mid], key);
    419   1.84  rillig 	if (cmp < 0)
    420   1.84  rillig 	    lo = mid + 1;
    421   1.84  rillig 	else if (cmp > 0)
    422   1.84  rillig 	    hi = mid - 1;
    423   1.84  rillig 	else
    424   1.84  rillig 	    return mid;
    425   1.84  rillig     }
    426   1.84  rillig     return -(lo + 1);
    427   1.84  rillig }
    428   1.84  rillig 
    429   1.63  rillig static bool
    430   1.63  rillig is_typename(void)
    431   1.63  rillig {
    432   1.84  rillig     if (opt.auto_typedefs &&
    433   1.84  rillig 	token.e - token.s >= 2 && memcmp(token.e - 2, "_t", 2) == 0)
    434   1.84  rillig 	return true;
    435   1.63  rillig 
    436   1.84  rillig     return bsearch_typenames(token.s) >= 0;
    437   1.63  rillig }
    438   1.63  rillig 
    439   1.90  rillig /* Read an alphanumeric token into 'token', or return end_of_file. */
    440  1.100  rillig static lexer_symbol
    441   1.89  rillig lexi_alnum(struct parser_state *state)
    442    1.1     cgd {
    443   1.89  rillig     if (isdigit((unsigned char)*inp.s) ||
    444   1.89  rillig 	(inp.s[0] == '.' && isdigit((unsigned char)inp.s[1]))) {
    445   1.89  rillig 	lex_number();
    446  1.103  rillig     } else if (isalnum((unsigned char)*inp.s) ||
    447  1.103  rillig 	    *inp.s == '_' || *inp.s == '$') {
    448   1.89  rillig 	lex_word();
    449  1.102  rillig     } else
    450  1.102  rillig 	return lsym_eof;	/* just as a placeholder */
    451  1.102  rillig 
    452   1.89  rillig     *token.e = '\0';
    453   1.16   kamil 
    454   1.89  rillig     if (token.s[0] == 'L' && token.s[1] == '\0' &&
    455   1.89  rillig 	(*inp.s == '"' || *inp.s == '\''))
    456  1.100  rillig 	return lsym_string_prefix;
    457   1.16   kamil 
    458   1.89  rillig     while (is_hspace(inbuf_peek()))
    459   1.32  rillig 	inbuf_skip();
    460   1.89  rillig 
    461  1.103  rillig     if (state->last_token == lsym_tag && state->p_l_follow == 0) {
    462   1.92  rillig 	state->next_unary = true;
    463  1.100  rillig 	return lsym_type;
    464   1.16   kamil     }
    465    1.6   lukem 
    466   1.89  rillig     /* Operator after identifier is binary unless last token was 'struct'. */
    467  1.100  rillig     state->next_unary = state->last_token == lsym_tag;
    468   1.16   kamil 
    469   1.89  rillig     const struct keyword *kw = bsearch(token.s, keywords,
    470   1.94  rillig 	array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
    471   1.89  rillig     if (kw == NULL) {
    472   1.89  rillig 	if (is_typename()) {
    473  1.105  rillig 	    state->curr_keyword = kw_type;
    474   1.92  rillig 	    state->next_unary = true;
    475   1.89  rillig 	    goto found_typename;
    476   1.16   kamil 	}
    477   1.89  rillig 
    478   1.89  rillig     } else {			/* we have a keyword */
    479  1.105  rillig 	state->curr_keyword = kw->kind;
    480   1.92  rillig 	state->next_unary = true;
    481   1.89  rillig 
    482   1.89  rillig 	switch (kw->kind) {
    483   1.89  rillig 	case kw_switch:
    484  1.100  rillig 	    return lsym_switch;
    485   1.89  rillig 
    486   1.89  rillig 	case kw_case_or_default:
    487  1.100  rillig 	    return lsym_case_label;
    488   1.89  rillig 
    489   1.89  rillig 	case kw_struct_or_union_or_enum:
    490   1.89  rillig 	case kw_type:
    491   1.89  rillig     found_typename:
    492   1.89  rillig 	    if (state->p_l_follow != 0) {
    493   1.89  rillig 		/* inside parens: cast, param list, offsetof or sizeof */
    494   1.89  rillig 		state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask;
    495   1.89  rillig 	    }
    496  1.100  rillig 	    if (state->last_token == lsym_period ||
    497  1.105  rillig 		    state->last_token == lsym_unary_op)
    498   1.89  rillig 		break;
    499   1.89  rillig 	    if (kw != NULL && kw->kind == kw_struct_or_union_or_enum)
    500  1.100  rillig 		return lsym_tag;
    501   1.89  rillig 	    if (state->p_l_follow != 0)
    502   1.89  rillig 		break;
    503  1.100  rillig 	    return lsym_type;
    504   1.75  rillig 
    505   1.98  rillig 	case kw_for:
    506  1.100  rillig 	    return lsym_for;
    507   1.98  rillig 
    508   1.98  rillig 	case kw_if:
    509  1.100  rillig 	    return lsym_if;
    510   1.98  rillig 
    511   1.98  rillig 	case kw_while:
    512  1.100  rillig 	    return lsym_while;
    513   1.75  rillig 
    514   1.97  rillig 	case kw_do:
    515  1.100  rillig 	    return lsym_do;
    516   1.97  rillig 
    517   1.97  rillig 	case kw_else:
    518  1.100  rillig 	    return lsym_else;
    519   1.16   kamil 
    520   1.89  rillig 	case kw_storage_class:
    521  1.100  rillig 	    return lsym_storage_class;
    522   1.16   kamil 
    523   1.89  rillig 	case kw_typedef:
    524  1.100  rillig 	    return lsym_typedef;
    525   1.16   kamil 
    526   1.89  rillig 	default:		/* all others are treated like any other
    527   1.16   kamil 				 * identifier */
    528  1.100  rillig 	    return lsym_ident;
    529   1.90  rillig 	}
    530   1.90  rillig     }
    531   1.89  rillig 
    532   1.89  rillig     if (*inp.s == '(' && state->tos <= 1 && state->ind_level == 0 &&
    533   1.89  rillig 	!state->in_parameter_declaration && !state->block_init) {
    534   1.89  rillig 
    535   1.89  rillig 	for (const char *p = inp.s; p < inp.e;)
    536   1.89  rillig 	    if (*p++ == ')' && (*p == ';' || *p == ','))
    537   1.89  rillig 		goto not_proc;
    538   1.89  rillig 
    539   1.89  rillig 	strncpy(state->procname, token.s, sizeof state->procname - 1);
    540   1.89  rillig 	if (state->in_decl)
    541   1.89  rillig 	    state->in_parameter_declaration = true;
    542  1.100  rillig 	return lsym_funcname;
    543   1.89  rillig not_proc:;
    544   1.89  rillig 
    545   1.89  rillig     } else if (probably_typename(state)) {
    546  1.105  rillig 	state->curr_keyword = kw_type;
    547   1.92  rillig 	state->next_unary = true;
    548  1.100  rillig 	return lsym_type;
    549   1.89  rillig     }
    550   1.89  rillig 
    551  1.100  rillig     if (state->last_token == lsym_type)	/* if this is a declared variable,
    552   1.89  rillig 					 * then following sign is unary */
    553   1.92  rillig 	state->next_unary = true;	/* will make "int a -1" work */
    554   1.89  rillig 
    555  1.100  rillig     return lsym_ident;		/* the ident is not in the list */
    556   1.89  rillig }
    557   1.75  rillig 
    558   1.89  rillig /* Reads the next token, placing it in the global variable "token". */
    559  1.100  rillig lexer_symbol
    560  1.106  rillig lexi(void)
    561   1.89  rillig {
    562  1.106  rillig     struct parser_state *state = &ps;
    563  1.106  rillig 
    564   1.90  rillig     token.e = token.s;
    565   1.90  rillig     state->col_1 = state->last_nl;
    566   1.89  rillig     state->last_nl = false;
    567  1.105  rillig     state->prev_keyword = state->curr_keyword;
    568  1.105  rillig     state->curr_keyword = kw_0;
    569   1.75  rillig 
    570   1.89  rillig     while (is_hspace(*inp.s)) {
    571   1.89  rillig 	state->col_1 = false;
    572   1.89  rillig 	inbuf_skip();
    573   1.89  rillig     }
    574   1.75  rillig 
    575  1.100  rillig     lexer_symbol alnum_lsym = lexi_alnum(state);
    576  1.100  rillig     if (alnum_lsym != lsym_eof)
    577  1.104  rillig 	return lexi_end(state, alnum_lsym);
    578   1.16   kamil 
    579   1.16   kamil     /* Scan a non-alphanumeric token */
    580   1.16   kamil 
    581   1.90  rillig     check_size_token(3);	/* for things like "<<=" */
    582   1.90  rillig     *token.e++ = inbuf_next();
    583   1.50  rillig     *token.e = '\0';
    584   1.16   kamil 
    585  1.100  rillig     lexer_symbol lsym;
    586   1.89  rillig     bool unary_delim = false;	/* whether the current token forces a
    587   1.89  rillig 				 * following operator to be unary */
    588   1.89  rillig 
    589   1.50  rillig     switch (*token.s) {
    590   1.16   kamil     case '\n':
    591   1.92  rillig 	unary_delim = state->next_unary;
    592   1.16   kamil 	state->last_nl = true;	/* remember that we just had a newline */
    593   1.47  rillig 	/* if data has been exhausted, the newline is a dummy. */
    594  1.100  rillig 	lsym = had_eof ? lsym_eof : lsym_newline;
    595   1.16   kamil 	break;
    596   1.16   kamil 
    597   1.43  rillig     case '\'':
    598   1.43  rillig     case '"':
    599   1.44  rillig 	lex_char_or_string();
    600  1.100  rillig 	lsym = lsym_ident;
    601   1.16   kamil 	break;
    602    1.6   lukem 
    603   1.40  rillig     case '(':
    604   1.40  rillig     case '[':
    605   1.16   kamil 	unary_delim = true;
    606  1.100  rillig 	lsym = lsym_lparen_or_lbracket;
    607   1.16   kamil 	break;
    608   1.16   kamil 
    609   1.40  rillig     case ')':
    610   1.40  rillig     case ']':
    611  1.100  rillig 	lsym = lsym_rparen_or_rbracket;
    612   1.16   kamil 	break;
    613   1.16   kamil 
    614   1.16   kamil     case '#':
    615   1.92  rillig 	unary_delim = state->next_unary;
    616  1.100  rillig 	lsym = lsym_preprocessing;
    617   1.16   kamil 	break;
    618   1.16   kamil 
    619   1.16   kamil     case '?':
    620   1.16   kamil 	unary_delim = true;
    621  1.100  rillig 	lsym = lsym_question;
    622   1.16   kamil 	break;
    623   1.16   kamil 
    624   1.40  rillig     case ':':
    625  1.100  rillig 	lsym = lsym_colon;
    626   1.16   kamil 	unary_delim = true;
    627   1.16   kamil 	break;
    628   1.16   kamil 
    629   1.40  rillig     case ';':
    630   1.16   kamil 	unary_delim = true;
    631  1.100  rillig 	lsym = lsym_semicolon;
    632   1.16   kamil 	break;
    633   1.16   kamil 
    634   1.40  rillig     case '{':
    635   1.16   kamil 	unary_delim = true;
    636  1.100  rillig 	lsym = lsym_lbrace;
    637   1.16   kamil 	break;
    638   1.16   kamil 
    639   1.40  rillig     case '}':
    640   1.16   kamil 	unary_delim = true;
    641  1.100  rillig 	lsym = lsym_rbrace;
    642   1.16   kamil 	break;
    643   1.16   kamil 
    644   1.69  rillig     case '\f':
    645   1.92  rillig 	unary_delim = state->next_unary;
    646   1.74  rillig 	state->last_nl = true;	/* remember this, so we can set 'state->col_1'
    647   1.16   kamil 				 * right */
    648  1.100  rillig 	lsym = lsym_form_feed;
    649   1.16   kamil 	break;
    650   1.16   kamil 
    651   1.40  rillig     case ',':
    652   1.16   kamil 	unary_delim = true;
    653  1.100  rillig 	lsym = lsym_comma;
    654   1.16   kamil 	break;
    655   1.16   kamil 
    656   1.16   kamil     case '.':
    657   1.16   kamil 	unary_delim = false;
    658  1.100  rillig 	lsym = lsym_period;
    659   1.16   kamil 	break;
    660    1.1     cgd 
    661   1.16   kamil     case '-':
    662   1.90  rillig     case '+':
    663  1.100  rillig 	lsym = state->next_unary ? lsym_unary_op : lsym_binary_op;
    664   1.16   kamil 	unary_delim = true;
    665   1.16   kamil 
    666   1.90  rillig 	if (*inp.s == token.s[0]) {	/* ++, -- */
    667   1.78  rillig 	    *token.e++ = *inp.s++;
    668  1.100  rillig 	    if (state->last_token == lsym_ident ||
    669  1.100  rillig 		    state->last_token == lsym_rparen_or_rbracket) {
    670  1.100  rillig 		lsym = state->next_unary ? lsym_unary_op : lsym_postfix_op;
    671    1.1     cgd 		unary_delim = false;
    672   1.16   kamil 	    }
    673   1.75  rillig 
    674   1.90  rillig 	} else if (*inp.s == '=') {	/* += */
    675   1.78  rillig 	    *token.e++ = *inp.s++;
    676   1.75  rillig 
    677   1.90  rillig 	} else if (*inp.s == '>') {	/* -> */
    678   1.78  rillig 	    *token.e++ = *inp.s++;
    679   1.16   kamil 	    unary_delim = false;
    680  1.100  rillig 	    lsym = lsym_unary_op;
    681   1.16   kamil 	    state->want_blank = false;
    682   1.16   kamil 	}
    683   1.90  rillig 	break;
    684   1.16   kamil 
    685   1.16   kamil     case '=':
    686   1.86  rillig 	if (state->init_or_struct)
    687   1.54  rillig 	    state->block_init = true;
    688   1.78  rillig 	if (*inp.s == '=') {	/* == */
    689   1.78  rillig 	    *token.e++ = *inp.s++;
    690   1.67  rillig 	    *token.e = '\0';
    691   1.16   kamil 	}
    692  1.100  rillig 	lsym = lsym_binary_op;
    693   1.16   kamil 	unary_delim = true;
    694   1.16   kamil 	break;
    695   1.16   kamil 
    696   1.16   kamil     case '>':
    697   1.16   kamil     case '<':
    698   1.16   kamil     case '!':			/* ops like <, <<, <=, !=, etc */
    699   1.78  rillig 	if (*inp.s == '>' || *inp.s == '<' || *inp.s == '=')
    700   1.50  rillig 	    *token.e++ = inbuf_next();
    701   1.78  rillig 	if (*inp.s == '=')
    702   1.78  rillig 	    *token.e++ = *inp.s++;
    703  1.100  rillig 	lsym = state->next_unary ? lsym_unary_op : lsym_binary_op;
    704   1.16   kamil 	unary_delim = true;
    705   1.16   kamil 	break;
    706   1.16   kamil 
    707   1.16   kamil     case '*':
    708   1.16   kamil 	unary_delim = true;
    709   1.92  rillig 	if (!state->next_unary) {
    710   1.78  rillig 	    if (*inp.s == '=')
    711   1.78  rillig 		*token.e++ = *inp.s++;
    712  1.100  rillig 	    lsym = lsym_binary_op;
    713   1.16   kamil 	    break;
    714   1.16   kamil 	}
    715   1.75  rillig 
    716   1.78  rillig 	while (*inp.s == '*' || isspace((unsigned char)*inp.s)) {
    717   1.87  rillig 	    if (*inp.s == '*')
    718   1.87  rillig 		token_add_char('*');
    719   1.32  rillig 	    inbuf_skip();
    720   1.16   kamil 	}
    721   1.75  rillig 
    722   1.16   kamil 	if (ps.in_decl) {
    723   1.78  rillig 	    char *tp = inp.s;
    724    1.6   lukem 
    725   1.16   kamil 	    while (isalpha((unsigned char)*tp) ||
    726  1.103  rillig 		    isspace((unsigned char)*tp)) {
    727   1.78  rillig 		if (++tp >= inp.e)
    728   1.81  rillig 		    inbuf_read_line();
    729   1.16   kamil 	    }
    730   1.16   kamil 	    if (*tp == '(')
    731   1.16   kamil 		ps.procname[0] = ' ';
    732   1.16   kamil 	}
    733   1.75  rillig 
    734  1.100  rillig 	lsym = lsym_unary_op;
    735   1.16   kamil 	break;
    736    1.1     cgd 
    737   1.16   kamil     default:
    738   1.78  rillig 	if (token.s[0] == '/' && (*inp.s == '*' || *inp.s == '/')) {
    739   1.16   kamil 	    /* it is start of comment */
    740   1.50  rillig 	    *token.e++ = inbuf_next();
    741    1.1     cgd 
    742  1.100  rillig 	    lsym = lsym_comment;
    743   1.92  rillig 	    unary_delim = state->next_unary;
    744   1.16   kamil 	    break;
    745    1.1     cgd 	}
    746   1.75  rillig 
    747   1.78  rillig 	while (token.e[-1] == *inp.s || *inp.s == '=') {
    748   1.87  rillig 	    /* handle '||', '&&', etc., and also things as in 'int *****i' */
    749   1.87  rillig 	    token_add_char(inbuf_next());
    750   1.16   kamil 	}
    751   1.75  rillig 
    752  1.100  rillig 	lsym = state->next_unary ? lsym_unary_op : lsym_binary_op;
    753   1.16   kamil 	unary_delim = true;
    754   1.47  rillig     }
    755   1.16   kamil 
    756   1.95  rillig     if (inp.s >= inp.e)		/* check for input buffer empty */
    757   1.81  rillig 	inbuf_read_line();
    758   1.75  rillig 
    759   1.92  rillig     state->next_unary = unary_delim;
    760   1.75  rillig 
    761   1.25  rillig     check_size_token(1);
    762   1.50  rillig     *token.e = '\0';
    763   1.75  rillig 
    764  1.104  rillig     return lexi_end(state, lsym);
    765    1.1     cgd }
    766   1.16   kamil 
    767    1.6   lukem void
    768   1.64  rillig add_typename(const char *name)
    769    1.1     cgd {
    770   1.64  rillig     if (typenames.len >= typenames.cap) {
    771   1.64  rillig 	typenames.cap = 16 + 2 * typenames.cap;
    772   1.64  rillig 	typenames.items = xrealloc(typenames.items,
    773   1.64  rillig 	    sizeof(typenames.items[0]) * typenames.cap);
    774   1.64  rillig     }
    775   1.16   kamil 
    776   1.84  rillig     int pos = bsearch_typenames(name);
    777   1.64  rillig     if (pos >= 0)
    778   1.64  rillig 	return;			/* already in the list */
    779   1.75  rillig 
    780   1.64  rillig     pos = -(pos + 1);
    781   1.64  rillig     memmove(typenames.items + pos + 1, typenames.items + pos,
    782   1.73  rillig 	sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos));
    783   1.64  rillig     typenames.items[pos] = xstrdup(name);
    784    1.1     cgd }
    785