Home | History | Annotate | Line # | Download | only in indent
lexi.c revision 1.231
      1  1.231  rillig /*	$NetBSD: lexi.c,v 1.231 2023/06/17 22:28:49 rillig Exp $	*/
      2    1.3     tls 
      3   1.16   kamil /*-
      4   1.16   kamil  * SPDX-License-Identifier: BSD-4-Clause
      5   1.16   kamil  *
      6   1.16   kamil  * Copyright (c) 1985 Sun Microsystems, Inc.
      7    1.5     mrg  * Copyright (c) 1980, 1993
      8    1.5     mrg  *	The Regents of the University of California.  All rights reserved.
      9    1.1     cgd  * All rights reserved.
     10    1.1     cgd  *
     11    1.1     cgd  * Redistribution and use in source and binary forms, with or without
     12    1.1     cgd  * modification, are permitted provided that the following conditions
     13    1.1     cgd  * are met:
     14    1.1     cgd  * 1. Redistributions of source code must retain the above copyright
     15    1.1     cgd  *    notice, this list of conditions and the following disclaimer.
     16    1.1     cgd  * 2. Redistributions in binary form must reproduce the above copyright
     17    1.1     cgd  *    notice, this list of conditions and the following disclaimer in the
     18    1.1     cgd  *    documentation and/or other materials provided with the distribution.
     19    1.1     cgd  * 3. All advertising materials mentioning features or use of this software
     20    1.1     cgd  *    must display the following acknowledgement:
     21    1.1     cgd  *	This product includes software developed by the University of
     22    1.1     cgd  *	California, Berkeley and its contributors.
     23    1.1     cgd  * 4. Neither the name of the University nor the names of its contributors
     24    1.1     cgd  *    may be used to endorse or promote products derived from this software
     25    1.1     cgd  *    without specific prior written permission.
     26    1.1     cgd  *
     27    1.1     cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28    1.1     cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29    1.1     cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30    1.1     cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31    1.1     cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32    1.1     cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33    1.1     cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34    1.1     cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35    1.1     cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36    1.1     cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37    1.1     cgd  * SUCH DAMAGE.
     38    1.1     cgd  */
     39    1.1     cgd 
     40    1.6   lukem #include <sys/cdefs.h>
     41  1.231  rillig __RCSID("$NetBSD: lexi.c,v 1.231 2023/06/17 22:28:49 rillig Exp $");
     42    1.1     cgd 
     43    1.1     cgd #include <stdlib.h>
     44    1.1     cgd #include <string.h>
     45   1.16   kamil 
     46   1.16   kamil #include "indent.h"
     47    1.1     cgd 
     48   1.60  rillig /* must be sorted alphabetically, is used in binary search */
     49   1.62  rillig static const struct keyword {
     50  1.198  rillig 	const char name[12];
     51  1.198  rillig 	lexer_symbol lsym;
     52   1.62  rillig } keywords[] = {
     53  1.198  rillig 	{"_Bool", lsym_type},
     54  1.198  rillig 	{"_Complex", lsym_type},
     55  1.198  rillig 	{"_Imaginary", lsym_type},
     56  1.209  rillig 	{"auto", lsym_modifier},
     57  1.198  rillig 	{"bool", lsym_type},
     58  1.198  rillig 	{"break", lsym_word},
     59  1.210  rillig 	{"case", lsym_case},
     60  1.198  rillig 	{"char", lsym_type},
     61  1.198  rillig 	{"complex", lsym_type},
     62  1.209  rillig 	{"const", lsym_modifier},
     63  1.198  rillig 	{"continue", lsym_word},
     64  1.210  rillig 	{"default", lsym_default},
     65  1.198  rillig 	{"do", lsym_do},
     66  1.198  rillig 	{"double", lsym_type},
     67  1.198  rillig 	{"else", lsym_else},
     68  1.198  rillig 	{"enum", lsym_tag},
     69  1.209  rillig 	{"extern", lsym_modifier},
     70  1.198  rillig 	{"float", lsym_type},
     71  1.198  rillig 	{"for", lsym_for},
     72  1.198  rillig 	{"goto", lsym_word},
     73  1.198  rillig 	{"if", lsym_if},
     74  1.198  rillig 	{"imaginary", lsym_type},
     75  1.209  rillig 	{"inline", lsym_modifier},
     76  1.198  rillig 	{"int", lsym_type},
     77  1.198  rillig 	{"long", lsym_type},
     78  1.198  rillig 	{"offsetof", lsym_offsetof},
     79  1.209  rillig 	{"register", lsym_modifier},
     80  1.198  rillig 	{"restrict", lsym_word},
     81  1.198  rillig 	{"return", lsym_return},
     82  1.198  rillig 	{"short", lsym_type},
     83  1.198  rillig 	{"signed", lsym_type},
     84  1.198  rillig 	{"sizeof", lsym_sizeof},
     85  1.209  rillig 	{"static", lsym_modifier},
     86  1.198  rillig 	{"struct", lsym_tag},
     87  1.198  rillig 	{"switch", lsym_switch},
     88  1.198  rillig 	{"typedef", lsym_typedef},
     89  1.198  rillig 	{"union", lsym_tag},
     90  1.198  rillig 	{"unsigned", lsym_type},
     91  1.198  rillig 	{"void", lsym_type},
     92  1.209  rillig 	{"volatile", lsym_modifier},
     93  1.198  rillig 	{"while", lsym_while}
     94    1.1     cgd };
     95    1.1     cgd 
     96   1.84  rillig static struct {
     97  1.198  rillig 	const char **items;
     98  1.198  rillig 	unsigned int len;
     99  1.198  rillig 	unsigned int cap;
    100   1.64  rillig } typenames;
    101   1.16   kamil 
    102  1.196  rillig /*-
    103   1.16   kamil  * The transition table below was rewritten by hand from lx's output, given
    104   1.16   kamil  * the following definitions. lx is Katherine Flavel's lexer generator.
    105   1.16   kamil  *
    106   1.16   kamil  * O  = /[0-7]/;        D  = /[0-9]/;          NZ = /[1-9]/;
    107   1.16   kamil  * H  = /[a-f0-9]/i;    B  = /[0-1]/;          HP = /0x/i;
    108   1.16   kamil  * BP = /0b/i;          E  = /e[+\-]?/i D+;    P  = /p[+\-]?/i D+;
    109   1.16   kamil  * FS = /[fl]/i;        IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
    110   1.16   kamil  *
    111   1.16   kamil  * D+           E  FS? -> $float;
    112   1.16   kamil  * D*    "." D+ E? FS? -> $float;
    113   1.16   kamil  * D+    "."    E? FS? -> $float;    HP H+           IS? -> $int;
    114   1.16   kamil  * HP H+        P  FS? -> $float;    NZ D*           IS? -> $int;
    115   1.16   kamil  * HP H* "." H+ P  FS? -> $float;    "0" O*          IS? -> $int;
    116   1.16   kamil  * HP H+ "."    P  FS  -> $float;    BP B+           IS? -> $int;
    117   1.16   kamil  */
    118   1.71  rillig /* INDENT OFF */
    119   1.82  rillig static const unsigned char lex_number_state[][26] = {
    120  1.199  rillig 	/*                examples:
    121  1.199  rillig 	                                 00
    122  1.199  rillig 	         s                      0xx
    123  1.199  rillig 	         t                    00xaa
    124  1.199  rillig 	         a     11       101100xxa..
    125  1.199  rillig 	         r   11ee0001101lbuuxx.a.pp
    126  1.199  rillig 	         t.01.e+008bLuxll0Ll.aa.p+0
    127  1.199  rillig 	states:  ABCDEFGHIJKLMNOPQRSTUVWXYZ */
    128  1.199  rillig 	[0] =   "uuiifuufiuuiiuiiiiiuiuuuuu",	/* (other) */
    129  1.199  rillig 	[1] =   "CEIDEHHHIJQ  U  Q  VUVVZZZ",	/* 0 */
    130  1.199  rillig 	[2] =   "DEIDEHHHIJQ  U  Q  VUVVZZZ",	/* 1 */
    131  1.199  rillig 	[3] =   "DEIDEHHHIJ   U     VUVVZZZ",	/* 2 3 4 5 6 7 */
    132  1.199  rillig 	[4] =   "DEJDEHHHJJ   U     VUVVZZZ",	/* 8 9 */
    133  1.199  rillig 	[5] =   "             U     VUVV   ",	/* A a C c D d */
    134  1.199  rillig 	[6] =   "  K          U     VUVV   ",	/* B b */
    135  1.199  rillig 	[7] =   "  FFF   FF   U     VUVV   ",	/* E e */
    136  1.199  rillig 	[8] =   "    f  f     U     VUVV  f",	/* F f */
    137  1.199  rillig 	[9] =   "  LLf  fL  PR   Li  L    f",	/* L */
    138  1.199  rillig 	[10] =  "  OOf  fO   S P O i O    f",	/* l */
    139  1.199  rillig 	[11] =  "                    FFX   ",	/* P p */
    140  1.199  rillig 	[12] =  "  MM    M  i  iiM   M     ",	/* U u */
    141  1.199  rillig 	[13] =  "  N                       ",	/* X x */
    142  1.199  rillig 	[14] =  "     G                 Y  ",	/* + - */
    143  1.199  rillig 	[15] =  "B EE    EE   T      W     ",	/* . */
    144  1.199  rillig 	/*       ABCDEFGHIJKLMNOPQRSTUVWXYZ */
    145    1.1     cgd };
    146   1.71  rillig /* INDENT ON */
    147    1.1     cgd 
    148  1.115  rillig static const unsigned char lex_number_row[] = {
    149  1.198  rillig 	['0'] = 1,
    150  1.198  rillig 	['1'] = 2,
    151  1.198  rillig 	['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3,
    152  1.198  rillig 	['8'] = 4, ['9'] = 4,
    153  1.198  rillig 	['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5,
    154  1.198  rillig 	['B'] = 6, ['b'] = 6,
    155  1.198  rillig 	['E'] = 7, ['e'] = 7,
    156  1.198  rillig 	['F'] = 8, ['f'] = 8,
    157  1.198  rillig 	['L'] = 9,
    158  1.198  rillig 	['l'] = 10,
    159  1.198  rillig 	['P'] = 11, ['p'] = 11,
    160  1.198  rillig 	['U'] = 12, ['u'] = 12,
    161  1.198  rillig 	['X'] = 13, ['x'] = 13,
    162  1.198  rillig 	['+'] = 14, ['-'] = 14,
    163  1.198  rillig 	['.'] = 15,
    164   1.56  rillig };
    165   1.36  rillig 
    166  1.215  rillig 
    167  1.225  rillig static bool
    168  1.225  rillig is_identifier_start(char ch)
    169  1.225  rillig {
    170  1.225  rillig 	return ch_isalpha(ch) || ch == '_' || ch == '$';
    171  1.225  rillig }
    172  1.225  rillig 
    173  1.225  rillig static bool
    174  1.225  rillig is_identifier_part(char ch)
    175  1.225  rillig {
    176  1.225  rillig 	return ch_isalnum(ch) || ch == '_' || ch == '$';
    177  1.225  rillig }
    178  1.225  rillig 
    179   1.25  rillig static void
    180   1.87  rillig token_add_char(char ch)
    181   1.87  rillig {
    182  1.198  rillig 	buf_add_char(&token, ch);
    183   1.87  rillig }
    184   1.87  rillig 
    185   1.43  rillig static void
    186   1.43  rillig lex_number(void)
    187   1.43  rillig {
    188  1.198  rillig 	for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
    189  1.231  rillig 		unsigned char ch = (unsigned char)*inp_p;
    190  1.212  rillig 		if (ch == '\\' && inp_p[1] == '\n') {
    191  1.212  rillig 			inp_p++;
    192  1.198  rillig 			inp_skip();
    193  1.198  rillig 			line_no++;
    194  1.198  rillig 			continue;
    195  1.198  rillig 		}
    196  1.199  rillig 		if (ch >= array_length(lex_number_row)
    197  1.199  rillig 		    || lex_number_row[ch] == 0)
    198  1.198  rillig 			break;
    199  1.198  rillig 
    200  1.198  rillig 		unsigned char row = lex_number_row[ch];
    201  1.198  rillig 		if (lex_number_state[row][s - 'A'] == ' ') {
    202  1.231  rillig 		        // lex_number_state[0][s - 'A'] now indicates the type:
    203  1.231  rillig 		        // f = floating, i = integer, u = unknown
    204  1.198  rillig 			return;
    205  1.198  rillig 		}
    206  1.198  rillig 
    207  1.198  rillig 		s = lex_number_state[row][s - 'A'];
    208  1.198  rillig 		token_add_char(inp_next());
    209   1.43  rillig 	}
    210   1.43  rillig }
    211   1.43  rillig 
    212   1.43  rillig static void
    213   1.43  rillig lex_word(void)
    214   1.43  rillig {
    215  1.198  rillig 	for (;;) {
    216  1.212  rillig 		if (is_identifier_part(inp_p[0]))
    217  1.212  rillig 			token_add_char(*inp_p++);
    218  1.212  rillig 		else if (inp_p[0] == '\\' && inp_p[1] == '\n') {
    219  1.212  rillig 			inp_p++;
    220  1.198  rillig 			inp_skip();
    221  1.198  rillig 			line_no++;
    222  1.198  rillig 		} else
    223  1.198  rillig 			return;
    224  1.198  rillig 	}
    225   1.43  rillig }
    226   1.43  rillig 
    227   1.43  rillig static void
    228   1.43  rillig lex_char_or_string(void)
    229   1.43  rillig {
    230  1.212  rillig 	for (char delim = token.s[token.len - 1];;) {
    231  1.231  rillig 		if (*inp_p == '\n') {
    232  1.198  rillig 			diag(1, "Unterminated literal");
    233  1.198  rillig 			return;
    234  1.198  rillig 		}
    235  1.198  rillig 
    236  1.212  rillig 		token_add_char(*inp_p++);
    237  1.212  rillig 		if (token.s[token.len - 1] == delim)
    238  1.198  rillig 			return;
    239  1.198  rillig 
    240  1.212  rillig 		if (token.s[token.len - 1] == '\\') {
    241  1.231  rillig 			if (*inp_p == '\n')
    242  1.231  rillig 				line_no++;
    243  1.198  rillig 			token_add_char(inp_next());
    244  1.198  rillig 		}
    245   1.52  rillig 	}
    246   1.43  rillig }
    247   1.43  rillig 
    248   1.84  rillig /* Guess whether the current token is a declared type. */
    249   1.57  rillig static bool
    250  1.107  rillig probably_typename(void)
    251   1.57  rillig {
    252  1.211  rillig 	if (ps.prev_lsym == lsym_modifier)
    253  1.198  rillig 		return true;
    254  1.221  rillig 	if (ps.in_init)
    255  1.198  rillig 		return false;
    256  1.198  rillig 	if (ps.in_stmt_or_decl)	/* XXX: this condition looks incorrect */
    257  1.198  rillig 		return false;
    258  1.220  rillig 	if (ps.prev_lsym == lsym_semicolon
    259  1.220  rillig 	    || ps.prev_lsym == lsym_lbrace
    260  1.220  rillig 	    || ps.prev_lsym == lsym_rbrace) {
    261  1.220  rillig 		if (inp_p[0] == '*' && inp_p[1] != '=')
    262  1.220  rillig 			return true;
    263  1.220  rillig 		/* XXX: is_identifier_start */
    264  1.220  rillig 		if (ch_isalpha(inp_p[0]))
    265  1.220  rillig 			return true;
    266  1.220  rillig 	}
    267   1.70  rillig 	return false;
    268   1.57  rillig }
    269   1.57  rillig 
    270   1.84  rillig static int
    271   1.84  rillig bsearch_typenames(const char *key)
    272   1.84  rillig {
    273  1.198  rillig 	const char **arr = typenames.items;
    274  1.225  rillig 	unsigned lo = 0;
    275  1.225  rillig 	unsigned hi = typenames.len;
    276  1.198  rillig 
    277  1.225  rillig 	while (lo < hi) {
    278  1.225  rillig 		unsigned mid = (lo + hi) / 2;
    279  1.198  rillig 		int cmp = strcmp(arr[mid], key);
    280  1.198  rillig 		if (cmp < 0)
    281  1.198  rillig 			lo = mid + 1;
    282  1.198  rillig 		else if (cmp > 0)
    283  1.225  rillig 			hi = mid;
    284  1.198  rillig 		else
    285  1.225  rillig 			return (int)mid;
    286  1.198  rillig 	}
    287  1.225  rillig 	return -1 - (int)lo;
    288   1.84  rillig }
    289   1.84  rillig 
    290   1.63  rillig static bool
    291   1.63  rillig is_typename(void)
    292   1.63  rillig {
    293  1.198  rillig 	if (opt.auto_typedefs &&
    294  1.212  rillig 	    token.len >= 2 && memcmp(token.s + token.len - 2, "_t", 2) == 0)
    295  1.198  rillig 		return true;
    296   1.63  rillig 
    297  1.212  rillig 	return bsearch_typenames(token.s) >= 0;
    298   1.63  rillig }
    299   1.63  rillig 
    300  1.225  rillig void
    301  1.225  rillig register_typename(const char *name)
    302  1.225  rillig {
    303  1.225  rillig 	if (typenames.len >= typenames.cap) {
    304  1.225  rillig 		typenames.cap = 16 + 2 * typenames.cap;
    305  1.225  rillig 		typenames.items = nonnull(realloc(typenames.items,
    306  1.225  rillig 			sizeof(typenames.items[0]) * typenames.cap));
    307  1.225  rillig 	}
    308  1.225  rillig 
    309  1.225  rillig 	int pos = bsearch_typenames(name);
    310  1.225  rillig 	if (pos >= 0)
    311  1.225  rillig 		return;		/* already in the list */
    312  1.225  rillig 
    313  1.225  rillig 	pos = -1 - pos;
    314  1.225  rillig 	memmove(typenames.items + pos + 1, typenames.items + pos,
    315  1.225  rillig 	    sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos));
    316  1.225  rillig 	typenames.items[pos] = nonnull(strdup(name));
    317  1.225  rillig }
    318  1.225  rillig 
    319  1.115  rillig static int
    320  1.115  rillig cmp_keyword_by_name(const void *key, const void *elem)
    321  1.115  rillig {
    322  1.198  rillig 	return strcmp(key, ((const struct keyword *)elem)->name);
    323  1.115  rillig }
    324  1.115  rillig 
    325  1.165  rillig /*
    326  1.231  rillig  * Looking at the '(', guess whether this starts a function definition or a
    327  1.231  rillig  * function declaration.
    328  1.165  rillig  */
    329  1.155  rillig static bool
    330  1.224  rillig probably_function_definition(void)
    331  1.155  rillig {
    332  1.198  rillig 	int paren_level = 0;
    333  1.212  rillig 	for (const char *p = inp_p; *p != '\n'; p++) {
    334  1.198  rillig 		if (*p == '(')
    335  1.198  rillig 			paren_level++;
    336  1.198  rillig 		if (*p == ')' && --paren_level == 0) {
    337  1.198  rillig 			p++;
    338  1.198  rillig 
    339  1.199  rillig 			while (*p != '\n'
    340  1.199  rillig 			    && (ch_isspace(*p) || is_identifier_part(*p)))
    341  1.198  rillig 				p++;	/* '__dead' or '__unused' */
    342  1.198  rillig 
    343  1.198  rillig 			if (*p == '\n')	/* func(...) */
    344  1.198  rillig 				break;
    345  1.198  rillig 			if (*p == ';')	/* func(...); */
    346  1.198  rillig 				return false;
    347  1.198  rillig 			if (*p == ',')	/* double abs(), pi; */
    348  1.198  rillig 				return false;
    349  1.198  rillig 			if (*p == '(')	/* func(...) __attribute__((...)) */
    350  1.198  rillig 				paren_level++;	/* func(...) __printflike(...)
    351  1.198  rillig 						 */
    352  1.198  rillig 			else
    353  1.198  rillig 				break;	/* func(...) { ... */
    354  1.198  rillig 		}
    355  1.219  rillig 
    356  1.219  rillig 		if (paren_level == 1 && p[0] == '*' && p[1] == ',')
    357  1.219  rillig 			return false;
    358  1.198  rillig 	}
    359  1.198  rillig 
    360  1.231  rillig 	/*
    361  1.231  rillig 	 * To further reduce the cases where indent wrongly treats an
    362  1.198  rillig 	 * incomplete function declaration as a function definition, thus
    363  1.198  rillig 	 * adding a newline before the function name, it may be worth looking
    364  1.198  rillig 	 * for parameter names, as these are often omitted in function
    365  1.198  rillig 	 * declarations and only included in function definitions. Or just
    366  1.198  rillig 	 * increase the lookahead to more than just the current line of input,
    367  1.231  rillig 	 * until the next '{'.
    368  1.231  rillig 	 */
    369  1.198  rillig 	return true;
    370  1.155  rillig }
    371  1.155  rillig 
    372  1.100  rillig static lexer_symbol
    373  1.107  rillig lexi_alnum(void)
    374    1.1     cgd {
    375  1.212  rillig 	if (ch_isdigit(inp_p[0]) ||
    376  1.212  rillig 	    (inp_p[0] == '.' && ch_isdigit(inp_p[1]))) {
    377  1.198  rillig 		lex_number();
    378  1.212  rillig 	} else if (is_identifier_start(inp_p[0])) {
    379  1.198  rillig 		lex_word();
    380  1.198  rillig 
    381  1.212  rillig 		if (token.len == 1 && token.s[0] == 'L' &&
    382  1.212  rillig 		    (inp_p[0] == '"' || inp_p[0] == '\'')) {
    383  1.212  rillig 			token_add_char(*inp_p++);
    384  1.198  rillig 			lex_char_or_string();
    385  1.198  rillig 			ps.next_unary = false;
    386  1.198  rillig 			return lsym_word;
    387  1.198  rillig 		}
    388  1.198  rillig 	} else
    389  1.198  rillig 		return lsym_eof;	/* just as a placeholder */
    390  1.198  rillig 
    391  1.231  rillig 	while (ch_isblank(*inp_p))
    392  1.212  rillig 		inp_p++;
    393  1.198  rillig 
    394  1.211  rillig 	ps.next_unary = ps.prev_lsym == lsym_tag
    395  1.211  rillig 	    || ps.prev_lsym == lsym_typedef;
    396  1.198  rillig 
    397  1.228  rillig 	if (ps.prev_lsym == lsym_tag && ps.paren.len == 0)
    398  1.230  rillig 		return lsym_type;
    399  1.198  rillig 
    400  1.231  rillig 	token_add_char('\0');		// Terminate in non-debug mode as well.
    401  1.198  rillig 	token.len--;
    402  1.212  rillig 	const struct keyword *kw = bsearch(token.s, keywords,
    403  1.198  rillig 	    array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
    404  1.201  rillig 	lexer_symbol lsym = lsym_word;
    405  1.201  rillig 	if (kw != NULL) {
    406  1.201  rillig 		if (kw->lsym == lsym_type)
    407  1.230  rillig 			lsym = lsym_type;
    408  1.201  rillig 		ps.next_unary = true;
    409  1.201  rillig 		if (kw->lsym == lsym_tag || kw->lsym == lsym_type)
    410  1.198  rillig 			goto found_typename;
    411  1.201  rillig 		return kw->lsym;
    412  1.201  rillig 	}
    413  1.198  rillig 
    414  1.201  rillig 	if (is_typename()) {
    415  1.230  rillig 		lsym = lsym_type;
    416  1.198  rillig 		ps.next_unary = true;
    417  1.118  rillig found_typename:
    418  1.228  rillig 		if (ps.paren.len > 0) {
    419  1.198  rillig 			/* inside parentheses: cast, param list, offsetof or
    420  1.198  rillig 			 * sizeof */
    421  1.228  rillig 			struct paren_level *paren_level =
    422  1.228  rillig 			    ps.paren.item + ps.paren.len - 1;
    423  1.228  rillig 			if (paren_level->cast == cast_unknown)
    424  1.228  rillig 				paren_level->cast = cast_maybe;
    425  1.198  rillig 		}
    426  1.211  rillig 		if (ps.prev_lsym != lsym_period
    427  1.211  rillig 		    && ps.prev_lsym != lsym_unary_op) {
    428  1.206  rillig 			if (kw != NULL && kw->lsym == lsym_tag)
    429  1.198  rillig 				return lsym_tag;
    430  1.228  rillig 			if (ps.paren.len == 0)
    431  1.230  rillig 				return lsym_type;
    432  1.198  rillig 		}
    433   1.90  rillig 	}
    434   1.89  rillig 
    435  1.231  rillig 	if (*inp_p == '(' && ps.psyms.len < 3 && ps.ind_level == 0 &&
    436  1.221  rillig 	    !ps.in_func_def_params && !ps.in_init) {
    437   1.89  rillig 
    438  1.228  rillig 		if (ps.paren.len == 0 && probably_function_definition()) {
    439  1.222  rillig 			ps.line_has_func_def = true;
    440  1.198  rillig 			if (ps.in_decl)
    441  1.198  rillig 				ps.in_func_def_params = true;
    442  1.198  rillig 			return lsym_funcname;
    443  1.198  rillig 		}
    444  1.198  rillig 
    445  1.228  rillig 	} else if (ps.paren.len == 0 && probably_typename()) {
    446  1.198  rillig 		ps.next_unary = true;
    447  1.230  rillig 		return lsym_type;
    448  1.155  rillig 	}
    449   1.89  rillig 
    450  1.201  rillig 	return lsym;
    451   1.89  rillig }
    452   1.75  rillig 
    453  1.163  rillig static bool
    454  1.224  rillig is_asterisk_pointer(void)
    455  1.163  rillig {
    456  1.217  rillig 	if (inp_p[strspn(inp_p, "* \t")] == ')')
    457  1.204  rillig 		return true;
    458  1.198  rillig 	if (ps.next_unary || ps.in_func_def_params)
    459  1.198  rillig 		return true;
    460  1.211  rillig 	if (ps.prev_lsym == lsym_word ||
    461  1.211  rillig 	    ps.prev_lsym == lsym_rparen ||
    462  1.211  rillig 	    ps.prev_lsym == lsym_rbracket)
    463  1.198  rillig 		return false;
    464  1.228  rillig 	return ps.in_decl && ps.paren.len > 0;
    465  1.163  rillig }
    466  1.163  rillig 
    467  1.200  rillig static bool
    468  1.200  rillig probably_in_function_definition(void)
    469  1.200  rillig {
    470  1.231  rillig 	for (const char *p = inp_p; *p != '\n';) {
    471  1.231  rillig 		if (ch_isspace(*p))
    472  1.231  rillig 			p++;
    473  1.231  rillig 		else if (is_identifier_start(*p)) {
    474  1.231  rillig 			p++;
    475  1.231  rillig 			while (is_identifier_part(*p))
    476  1.231  rillig 				p++;
    477  1.200  rillig 		} else
    478  1.231  rillig 			return *p == '(';
    479  1.200  rillig 	}
    480  1.200  rillig 	return false;
    481  1.200  rillig }
    482  1.200  rillig 
    483  1.161  rillig static void
    484  1.224  rillig lex_asterisk_pointer(void)
    485  1.161  rillig {
    486  1.231  rillig 	while (*inp_p == '*' || ch_isspace(*inp_p)) {
    487  1.231  rillig 		if (*inp_p == '*')
    488  1.198  rillig 			token_add_char('*');
    489  1.198  rillig 		inp_skip();
    490  1.198  rillig 	}
    491  1.198  rillig 
    492  1.200  rillig 	if (ps.in_decl && probably_in_function_definition())
    493  1.222  rillig 		ps.line_has_func_def = true;
    494  1.161  rillig }
    495  1.161  rillig 
    496  1.225  rillig static bool
    497  1.225  rillig skip(const char **pp, const char *s)
    498  1.193  rillig {
    499  1.225  rillig 	size_t len = strlen(s);
    500  1.198  rillig 	while (ch_isblank(**pp))
    501  1.198  rillig 		(*pp)++;
    502  1.198  rillig 	if (strncmp(*pp, s, len) == 0) {
    503  1.198  rillig 		*pp += len;
    504  1.198  rillig 		return true;
    505  1.198  rillig 	}
    506  1.198  rillig 	return false;
    507  1.193  rillig }
    508  1.193  rillig 
    509  1.194  rillig static void
    510  1.193  rillig lex_indent_comment(void)
    511  1.193  rillig {
    512  1.212  rillig 	const char *p = inp.s;
    513  1.225  rillig 	if (skip(&p, "/*") && skip(&p, "INDENT")) {
    514  1.225  rillig 		enum indent_enabled enabled;
    515  1.225  rillig 		if (skip(&p, "ON") || *p == '*')
    516  1.225  rillig 			enabled = indent_last_off_line;
    517  1.225  rillig 		else if (skip(&p, "OFF"))
    518  1.225  rillig 			enabled = indent_off;
    519  1.225  rillig 		else
    520  1.225  rillig 			return;
    521  1.225  rillig 		if (skip(&p, "*/\n")) {
    522  1.225  rillig 			if (lab.len > 0 || code.len > 0 || com.len > 0)
    523  1.225  rillig 				output_line();
    524  1.225  rillig 			indent_enabled = enabled;
    525  1.225  rillig 		}
    526  1.225  rillig 	}
    527  1.193  rillig }
    528  1.193  rillig 
    529   1.89  rillig /* Reads the next token, placing it in the global variable "token". */
    530  1.100  rillig lexer_symbol
    531  1.106  rillig lexi(void)
    532   1.89  rillig {
    533  1.223  rillig 	buf_clear(&token);
    534  1.198  rillig 
    535  1.198  rillig 	for (;;) {
    536  1.226  rillig 		if (ch_isblank(inp_p[0]))
    537  1.212  rillig 			inp_p++;
    538  1.226  rillig 		else if (inp_p[0] == '\\' && inp_p[1] == '\n') {
    539  1.212  rillig 			inp_p++;
    540  1.198  rillig 			inp_skip();
    541  1.198  rillig 			line_no++;
    542  1.198  rillig 		} else
    543  1.198  rillig 			break;
    544  1.198  rillig 	}
    545  1.198  rillig 
    546  1.198  rillig 	lexer_symbol alnum_lsym = lexi_alnum();
    547  1.205  rillig 	if (alnum_lsym != lsym_eof)
    548  1.198  rillig 		return alnum_lsym;
    549   1.75  rillig 
    550  1.198  rillig 	/* Scan a non-alphanumeric token */
    551   1.16   kamil 
    552  1.198  rillig 	token_add_char(inp_next());
    553   1.16   kamil 
    554  1.198  rillig 	lexer_symbol lsym;
    555  1.198  rillig 	bool next_unary;
    556   1.89  rillig 
    557  1.212  rillig 	switch (token.s[token.len - 1]) {
    558  1.160  rillig 
    559  1.220  rillig 	case '#':
    560  1.220  rillig 		lsym = lsym_preprocessing;
    561  1.220  rillig 		next_unary = ps.next_unary;
    562  1.208  rillig 		break;
    563  1.208  rillig 
    564  1.198  rillig 	case '\n':
    565  1.198  rillig 		/* if data has been exhausted, the '\n' is a dummy. */
    566  1.198  rillig 		lsym = had_eof ? lsym_eof : lsym_newline;
    567  1.198  rillig 		next_unary = ps.next_unary;
    568  1.198  rillig 		break;
    569  1.198  rillig 
    570  1.220  rillig 	/* INDENT OFF */
    571  1.220  rillig 	case '(':	lsym = lsym_lparen;	next_unary = true;	break;
    572  1.220  rillig 	case ')':	lsym = lsym_rparen;	next_unary = false;	break;
    573  1.220  rillig 	case '[':	lsym = lsym_lbracket;	next_unary = true;	break;
    574  1.220  rillig 	case ']':	lsym = lsym_rbracket;	next_unary = false;	break;
    575  1.220  rillig 	case '{':	lsym = lsym_lbrace;	next_unary = true;	break;
    576  1.220  rillig 	case '}':	lsym = lsym_rbrace;	next_unary = true;	break;
    577  1.220  rillig 	case '.':	lsym = lsym_period;	next_unary = false;	break;
    578  1.220  rillig 	case '?':	lsym = lsym_question;	next_unary = true;	break;
    579  1.220  rillig 	case ',':	lsym = lsym_comma;	next_unary = true;	break;
    580  1.220  rillig 	case ';':	lsym = lsym_semicolon;	next_unary = true;	break;
    581  1.220  rillig 	/* INDENT ON */
    582  1.198  rillig 
    583  1.231  rillig 	case '+':
    584  1.198  rillig 	case '-':
    585  1.198  rillig 		lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
    586  1.198  rillig 		next_unary = true;
    587  1.198  rillig 
    588  1.199  rillig 		/* '++' or '--' */
    589  1.231  rillig 		if (*inp_p == token.s[token.len - 1]) {
    590  1.212  rillig 			token_add_char(*inp_p++);
    591  1.211  rillig 			if (ps.prev_lsym == lsym_word ||
    592  1.211  rillig 			    ps.prev_lsym == lsym_rparen ||
    593  1.211  rillig 			    ps.prev_lsym == lsym_rbracket) {
    594  1.199  rillig 				lsym = ps.next_unary
    595  1.199  rillig 				    ? lsym_unary_op : lsym_postfix_op;
    596  1.198  rillig 				next_unary = false;
    597  1.198  rillig 			}
    598  1.198  rillig 
    599  1.231  rillig 		} else if (*inp_p == '=') {	/* '+=' or '-=' */
    600  1.212  rillig 			token_add_char(*inp_p++);
    601  1.198  rillig 
    602  1.231  rillig 		} else if (*inp_p == '>') {	/* '->' */
    603  1.212  rillig 			token_add_char(*inp_p++);
    604  1.198  rillig 			lsym = lsym_unary_op;
    605  1.198  rillig 			next_unary = false;
    606  1.198  rillig 			ps.want_blank = false;
    607  1.198  rillig 		}
    608  1.198  rillig 		break;
    609  1.198  rillig 
    610  1.220  rillig 	case ':':
    611  1.220  rillig 		lsym = ps.quest_level > 0
    612  1.225  rillig 		    ? (ps.quest_level--, lsym_question_colon)
    613  1.225  rillig 		    : ps.in_var_decl ? lsym_other_colon : lsym_label_colon;
    614  1.220  rillig 		next_unary = true;
    615  1.220  rillig 		break;
    616  1.220  rillig 
    617  1.220  rillig 	case '*':
    618  1.231  rillig 		if (*inp_p == '=') {
    619  1.220  rillig 			token_add_char(*inp_p++);
    620  1.220  rillig 			lsym = lsym_binary_op;
    621  1.224  rillig 		} else if (is_asterisk_pointer()) {
    622  1.224  rillig 			lex_asterisk_pointer();
    623  1.220  rillig 			lsym = lsym_unary_op;
    624  1.220  rillig 		} else
    625  1.220  rillig 			lsym = lsym_binary_op;
    626  1.220  rillig 		next_unary = true;
    627  1.220  rillig 		break;
    628  1.220  rillig 
    629  1.198  rillig 	case '=':
    630  1.221  rillig 		if (ps.in_var_decl)
    631  1.221  rillig 			ps.in_init = true;
    632  1.231  rillig 		if (*inp_p == '=')
    633  1.212  rillig 			token_add_char(*inp_p++);
    634  1.198  rillig 		lsym = lsym_binary_op;
    635  1.198  rillig 		next_unary = true;
    636  1.198  rillig 		break;
    637   1.75  rillig 
    638  1.198  rillig 	case '>':
    639  1.198  rillig 	case '<':
    640  1.225  rillig 	case '!':		/* ops like <, <<, <=, !=, etc. */
    641  1.231  rillig 		if (*inp_p == '>' || *inp_p == '<' || *inp_p == '=')
    642  1.212  rillig 			token_add_char(*inp_p++);
    643  1.231  rillig 		if (*inp_p == '=')
    644  1.212  rillig 			token_add_char(*inp_p++);
    645  1.198  rillig 		lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
    646  1.198  rillig 		next_unary = true;
    647  1.198  rillig 		break;
    648   1.75  rillig 
    649  1.220  rillig 	case '\'':
    650  1.220  rillig 	case '"':
    651  1.220  rillig 		lex_char_or_string();
    652  1.220  rillig 		lsym = lsym_word;
    653  1.220  rillig 		next_unary = false;
    654  1.198  rillig 		break;
    655    1.1     cgd 
    656  1.198  rillig 	default:
    657  1.212  rillig 		if (token.s[token.len - 1] == '/'
    658  1.231  rillig 		    && (*inp_p == '*' || *inp_p == '/')) {
    659  1.198  rillig 			enum indent_enabled prev = indent_enabled;
    660  1.198  rillig 			lex_indent_comment();
    661  1.198  rillig 			if (prev == indent_on && indent_enabled == indent_off)
    662  1.223  rillig 				buf_clear(&out.indent_off_text);
    663  1.212  rillig 			token_add_char(*inp_p++);
    664  1.198  rillig 			lsym = lsym_comment;
    665  1.198  rillig 			next_unary = ps.next_unary;
    666  1.198  rillig 			break;
    667  1.198  rillig 		}
    668  1.198  rillig 
    669  1.225  rillig 		/* punctuation like '%', '&&', '/', '^', '||', '~' */
    670  1.214  rillig 		lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
    671  1.231  rillig 		if (*inp_p == token.s[token.len - 1])
    672  1.214  rillig 			token_add_char(*inp_p++), lsym = lsym_binary_op;
    673  1.231  rillig 		if (*inp_p == '=')
    674  1.214  rillig 			token_add_char(*inp_p++), lsym = lsym_binary_op;
    675  1.198  rillig 
    676  1.198  rillig 		next_unary = true;
    677  1.198  rillig 	}
    678  1.198  rillig 
    679  1.198  rillig 	ps.next_unary = next_unary;
    680   1.75  rillig 
    681  1.198  rillig 	return lsym;
    682    1.1     cgd }
    683