Home | History | Annotate | Line # | Download | only in indent
      1  1.242  rillig /*	$NetBSD: lexi.c,v 1.242 2023/12/03 21:44:42 rillig Exp $	*/
      2    1.3     tls 
      3   1.16   kamil /*-
      4   1.16   kamil  * SPDX-License-Identifier: BSD-4-Clause
      5   1.16   kamil  *
      6   1.16   kamil  * Copyright (c) 1985 Sun Microsystems, Inc.
      7    1.5     mrg  * Copyright (c) 1980, 1993
      8    1.5     mrg  *	The Regents of the University of California.  All rights reserved.
      9    1.1     cgd  * All rights reserved.
     10    1.1     cgd  *
     11    1.1     cgd  * Redistribution and use in source and binary forms, with or without
     12    1.1     cgd  * modification, are permitted provided that the following conditions
     13    1.1     cgd  * are met:
     14    1.1     cgd  * 1. Redistributions of source code must retain the above copyright
     15    1.1     cgd  *    notice, this list of conditions and the following disclaimer.
     16    1.1     cgd  * 2. Redistributions in binary form must reproduce the above copyright
     17    1.1     cgd  *    notice, this list of conditions and the following disclaimer in the
     18    1.1     cgd  *    documentation and/or other materials provided with the distribution.
     19    1.1     cgd  * 3. All advertising materials mentioning features or use of this software
     20    1.1     cgd  *    must display the following acknowledgement:
     21    1.1     cgd  *	This product includes software developed by the University of
     22    1.1     cgd  *	California, Berkeley and its contributors.
     23    1.1     cgd  * 4. Neither the name of the University nor the names of its contributors
     24    1.1     cgd  *    may be used to endorse or promote products derived from this software
     25    1.1     cgd  *    without specific prior written permission.
     26    1.1     cgd  *
     27    1.1     cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28    1.1     cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29    1.1     cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30    1.1     cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31    1.1     cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32    1.1     cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33    1.1     cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34    1.1     cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35    1.1     cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36    1.1     cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37    1.1     cgd  * SUCH DAMAGE.
     38    1.1     cgd  */
     39    1.1     cgd 
     40    1.6   lukem #include <sys/cdefs.h>
     41  1.242  rillig __RCSID("$NetBSD: lexi.c,v 1.242 2023/12/03 21:44:42 rillig Exp $");
     42    1.1     cgd 
     43    1.1     cgd #include <stdlib.h>
     44    1.1     cgd #include <string.h>
     45   1.16   kamil 
     46   1.16   kamil #include "indent.h"
     47    1.1     cgd 
     48   1.60  rillig /* must be sorted alphabetically, is used in binary search */
     49   1.62  rillig static const struct keyword {
     50  1.198  rillig 	const char name[12];
     51  1.198  rillig 	lexer_symbol lsym;
     52   1.62  rillig } keywords[] = {
     53  1.198  rillig 	{"_Bool", lsym_type},
     54  1.235  rillig 	{"_Complex", lsym_modifier},
     55  1.235  rillig 	{"_Imaginary", lsym_modifier},
     56  1.209  rillig 	{"auto", lsym_modifier},
     57  1.198  rillig 	{"bool", lsym_type},
     58  1.198  rillig 	{"break", lsym_word},
     59  1.210  rillig 	{"case", lsym_case},
     60  1.198  rillig 	{"char", lsym_type},
     61  1.235  rillig 	{"complex", lsym_modifier},
     62  1.209  rillig 	{"const", lsym_modifier},
     63  1.198  rillig 	{"continue", lsym_word},
     64  1.210  rillig 	{"default", lsym_default},
     65  1.198  rillig 	{"do", lsym_do},
     66  1.198  rillig 	{"double", lsym_type},
     67  1.198  rillig 	{"else", lsym_else},
     68  1.198  rillig 	{"enum", lsym_tag},
     69  1.209  rillig 	{"extern", lsym_modifier},
     70  1.198  rillig 	{"float", lsym_type},
     71  1.198  rillig 	{"for", lsym_for},
     72  1.198  rillig 	{"goto", lsym_word},
     73  1.198  rillig 	{"if", lsym_if},
     74  1.235  rillig 	{"imaginary", lsym_modifier},
     75  1.209  rillig 	{"inline", lsym_modifier},
     76  1.198  rillig 	{"int", lsym_type},
     77  1.198  rillig 	{"long", lsym_type},
     78  1.198  rillig 	{"offsetof", lsym_offsetof},
     79  1.209  rillig 	{"register", lsym_modifier},
     80  1.198  rillig 	{"restrict", lsym_word},
     81  1.198  rillig 	{"return", lsym_return},
     82  1.198  rillig 	{"short", lsym_type},
     83  1.198  rillig 	{"signed", lsym_type},
     84  1.198  rillig 	{"sizeof", lsym_sizeof},
     85  1.209  rillig 	{"static", lsym_modifier},
     86  1.198  rillig 	{"struct", lsym_tag},
     87  1.198  rillig 	{"switch", lsym_switch},
     88  1.198  rillig 	{"typedef", lsym_typedef},
     89  1.198  rillig 	{"union", lsym_tag},
     90  1.198  rillig 	{"unsigned", lsym_type},
     91  1.198  rillig 	{"void", lsym_type},
     92  1.209  rillig 	{"volatile", lsym_modifier},
     93  1.198  rillig 	{"while", lsym_while}
     94    1.1     cgd };
     95    1.1     cgd 
     96   1.84  rillig static struct {
     97  1.198  rillig 	const char **items;
     98  1.198  rillig 	unsigned int len;
     99  1.198  rillig 	unsigned int cap;
    100   1.64  rillig } typenames;
    101   1.16   kamil 
    102  1.196  rillig /*-
    103   1.16   kamil  * The transition table below was rewritten by hand from lx's output, given
    104   1.16   kamil  * the following definitions. lx is Katherine Flavel's lexer generator.
    105   1.16   kamil  *
    106   1.16   kamil  * O  = /[0-7]/;        D  = /[0-9]/;          NZ = /[1-9]/;
    107   1.16   kamil  * H  = /[a-f0-9]/i;    B  = /[0-1]/;          HP = /0x/i;
    108   1.16   kamil  * BP = /0b/i;          E  = /e[+\-]?/i D+;    P  = /p[+\-]?/i D+;
    109   1.16   kamil  * FS = /[fl]/i;        IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
    110   1.16   kamil  *
    111   1.16   kamil  * D+           E  FS? -> $float;
    112   1.16   kamil  * D*    "." D+ E? FS? -> $float;
    113   1.16   kamil  * D+    "."    E? FS? -> $float;    HP H+           IS? -> $int;
    114   1.16   kamil  * HP H+        P  FS? -> $float;    NZ D*           IS? -> $int;
    115   1.16   kamil  * HP H* "." H+ P  FS? -> $float;    "0" O*          IS? -> $int;
    116   1.16   kamil  * HP H+ "."    P  FS  -> $float;    BP B+           IS? -> $int;
    117   1.16   kamil  */
    118   1.71  rillig /* INDENT OFF */
    119   1.82  rillig static const unsigned char lex_number_state[][26] = {
    120  1.199  rillig 	/*                examples:
    121  1.199  rillig 	                                 00
    122  1.199  rillig 	         s                      0xx
    123  1.199  rillig 	         t                    00xaa
    124  1.199  rillig 	         a     11       101100xxa..
    125  1.199  rillig 	         r   11ee0001101lbuuxx.a.pp
    126  1.199  rillig 	         t.01.e+008bLuxll0Ll.aa.p+0
    127  1.199  rillig 	states:  ABCDEFGHIJKLMNOPQRSTUVWXYZ */
    128  1.199  rillig 	[0] =   "uuiifuufiuuiiuiiiiiuiuuuuu",	/* (other) */
    129  1.199  rillig 	[1] =   "CEIDEHHHIJQ  U  Q  VUVVZZZ",	/* 0 */
    130  1.199  rillig 	[2] =   "DEIDEHHHIJQ  U  Q  VUVVZZZ",	/* 1 */
    131  1.199  rillig 	[3] =   "DEIDEHHHIJ   U     VUVVZZZ",	/* 2 3 4 5 6 7 */
    132  1.199  rillig 	[4] =   "DEJDEHHHJJ   U     VUVVZZZ",	/* 8 9 */
    133  1.199  rillig 	[5] =   "             U     VUVV   ",	/* A a C c D d */
    134  1.199  rillig 	[6] =   "  K          U     VUVV   ",	/* B b */
    135  1.199  rillig 	[7] =   "  FFF   FF   U     VUVV   ",	/* E e */
    136  1.199  rillig 	[8] =   "    f  f     U     VUVV  f",	/* F f */
    137  1.199  rillig 	[9] =   "  LLf  fL  PR   Li  L    f",	/* L */
    138  1.199  rillig 	[10] =  "  OOf  fO   S P O i O    f",	/* l */
    139  1.199  rillig 	[11] =  "                    FFX   ",	/* P p */
    140  1.199  rillig 	[12] =  "  MM    M  i  iiM   M     ",	/* U u */
    141  1.199  rillig 	[13] =  "  N                       ",	/* X x */
    142  1.199  rillig 	[14] =  "     G                 Y  ",	/* + - */
    143  1.199  rillig 	[15] =  "B EE    EE   T      W     ",	/* . */
    144  1.199  rillig 	/*       ABCDEFGHIJKLMNOPQRSTUVWXYZ */
    145    1.1     cgd };
    146   1.71  rillig /* INDENT ON */
    147    1.1     cgd 
    148  1.115  rillig static const unsigned char lex_number_row[] = {
    149  1.198  rillig 	['0'] = 1,
    150  1.198  rillig 	['1'] = 2,
    151  1.198  rillig 	['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3,
    152  1.198  rillig 	['8'] = 4, ['9'] = 4,
    153  1.198  rillig 	['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5,
    154  1.198  rillig 	['B'] = 6, ['b'] = 6,
    155  1.198  rillig 	['E'] = 7, ['e'] = 7,
    156  1.198  rillig 	['F'] = 8, ['f'] = 8,
    157  1.198  rillig 	['L'] = 9,
    158  1.198  rillig 	['l'] = 10,
    159  1.198  rillig 	['P'] = 11, ['p'] = 11,
    160  1.198  rillig 	['U'] = 12, ['u'] = 12,
    161  1.198  rillig 	['X'] = 13, ['x'] = 13,
    162  1.198  rillig 	['+'] = 14, ['-'] = 14,
    163  1.198  rillig 	['.'] = 15,
    164   1.56  rillig };
    165   1.36  rillig 
    166  1.215  rillig 
    167  1.225  rillig static bool
    168  1.225  rillig is_identifier_start(char ch)
    169  1.225  rillig {
    170  1.225  rillig 	return ch_isalpha(ch) || ch == '_' || ch == '$';
    171  1.225  rillig }
    172  1.225  rillig 
    173  1.225  rillig static bool
    174  1.225  rillig is_identifier_part(char ch)
    175  1.225  rillig {
    176  1.225  rillig 	return ch_isalnum(ch) || ch == '_' || ch == '$';
    177  1.225  rillig }
    178  1.225  rillig 
    179   1.25  rillig static void
    180   1.87  rillig token_add_char(char ch)
    181   1.87  rillig {
    182  1.198  rillig 	buf_add_char(&token, ch);
    183   1.87  rillig }
    184   1.87  rillig 
    185  1.232  rillig static bool
    186  1.232  rillig skip_line_continuation(void)
    187  1.232  rillig {
    188  1.242  rillig 	if (in.p[0] == '\\' && in.p[1] == '\n') {
    189  1.242  rillig 		in.p++;
    190  1.232  rillig 		inp_skip();
    191  1.242  rillig 		in.token_end_line++;
    192  1.232  rillig 		return true;
    193  1.232  rillig 	}
    194  1.232  rillig 	return false;
    195  1.232  rillig }
    196  1.232  rillig 
    197   1.43  rillig static void
    198   1.43  rillig lex_number(void)
    199   1.43  rillig {
    200  1.198  rillig 	for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
    201  1.242  rillig 		unsigned char ch = (unsigned char)*in.p;
    202  1.232  rillig 		if (skip_line_continuation())
    203  1.198  rillig 			continue;
    204  1.199  rillig 		if (ch >= array_length(lex_number_row)
    205  1.199  rillig 		    || lex_number_row[ch] == 0)
    206  1.198  rillig 			break;
    207  1.198  rillig 
    208  1.198  rillig 		unsigned char row = lex_number_row[ch];
    209  1.198  rillig 		if (lex_number_state[row][s - 'A'] == ' ') {
    210  1.237  rillig 			// lex_number_state[0][s - 'A'] now indicates the type:
    211  1.237  rillig 			// f = floating, i = integer, u = unknown
    212  1.198  rillig 			return;
    213  1.198  rillig 		}
    214  1.198  rillig 
    215  1.198  rillig 		s = lex_number_state[row][s - 'A'];
    216  1.198  rillig 		token_add_char(inp_next());
    217   1.43  rillig 	}
    218   1.43  rillig }
    219   1.43  rillig 
    220   1.43  rillig static void
    221   1.43  rillig lex_word(void)
    222   1.43  rillig {
    223  1.198  rillig 	for (;;) {
    224  1.242  rillig 		if (is_identifier_part(*in.p))
    225  1.242  rillig 			token_add_char(*in.p++);
    226  1.232  rillig 		else if (skip_line_continuation())
    227  1.232  rillig 			continue;
    228  1.232  rillig 		else
    229  1.198  rillig 			return;
    230  1.198  rillig 	}
    231   1.43  rillig }
    232   1.43  rillig 
    233   1.43  rillig static void
    234   1.43  rillig lex_char_or_string(void)
    235   1.43  rillig {
    236  1.212  rillig 	for (char delim = token.s[token.len - 1];;) {
    237  1.242  rillig 		if (*in.p == '\n') {
    238  1.198  rillig 			diag(1, "Unterminated literal");
    239  1.198  rillig 			return;
    240  1.198  rillig 		}
    241  1.198  rillig 
    242  1.242  rillig 		token_add_char(*in.p++);
    243  1.212  rillig 		if (token.s[token.len - 1] == delim)
    244  1.198  rillig 			return;
    245  1.198  rillig 
    246  1.212  rillig 		if (token.s[token.len - 1] == '\\') {
    247  1.242  rillig 			if (*in.p == '\n')
    248  1.242  rillig 				in.token_end_line++;
    249  1.198  rillig 			token_add_char(inp_next());
    250  1.198  rillig 		}
    251   1.52  rillig 	}
    252   1.43  rillig }
    253   1.43  rillig 
    254   1.84  rillig /* Guess whether the current token is a declared type. */
    255   1.57  rillig static bool
    256  1.107  rillig probably_typename(void)
    257   1.57  rillig {
    258  1.211  rillig 	if (ps.prev_lsym == lsym_modifier)
    259  1.198  rillig 		return true;
    260  1.221  rillig 	if (ps.in_init)
    261  1.198  rillig 		return false;
    262  1.198  rillig 	if (ps.in_stmt_or_decl)	/* XXX: this condition looks incorrect */
    263  1.198  rillig 		return false;
    264  1.220  rillig 	if (ps.prev_lsym == lsym_semicolon
    265  1.220  rillig 	    || ps.prev_lsym == lsym_lbrace
    266  1.220  rillig 	    || ps.prev_lsym == lsym_rbrace) {
    267  1.242  rillig 		if (in.p[0] == '*' && in.p[1] != '=')
    268  1.220  rillig 			return true;
    269  1.220  rillig 		/* XXX: is_identifier_start */
    270  1.242  rillig 		if (ch_isalpha(in.p[0]))
    271  1.220  rillig 			return true;
    272  1.220  rillig 	}
    273   1.70  rillig 	return false;
    274   1.57  rillig }
    275   1.57  rillig 
    276   1.84  rillig static int
    277   1.84  rillig bsearch_typenames(const char *key)
    278   1.84  rillig {
    279  1.198  rillig 	const char **arr = typenames.items;
    280  1.225  rillig 	unsigned lo = 0;
    281  1.225  rillig 	unsigned hi = typenames.len;
    282  1.198  rillig 
    283  1.225  rillig 	while (lo < hi) {
    284  1.225  rillig 		unsigned mid = (lo + hi) / 2;
    285  1.198  rillig 		int cmp = strcmp(arr[mid], key);
    286  1.198  rillig 		if (cmp < 0)
    287  1.198  rillig 			lo = mid + 1;
    288  1.198  rillig 		else if (cmp > 0)
    289  1.225  rillig 			hi = mid;
    290  1.198  rillig 		else
    291  1.225  rillig 			return (int)mid;
    292  1.198  rillig 	}
    293  1.225  rillig 	return -1 - (int)lo;
    294   1.84  rillig }
    295   1.84  rillig 
    296   1.63  rillig static bool
    297   1.63  rillig is_typename(void)
    298   1.63  rillig {
    299  1.236  rillig 	if (ps.prev_lsym == lsym_tag)
    300  1.236  rillig 		return true;
    301  1.198  rillig 	if (opt.auto_typedefs &&
    302  1.212  rillig 	    token.len >= 2 && memcmp(token.s + token.len - 2, "_t", 2) == 0)
    303  1.198  rillig 		return true;
    304   1.63  rillig 
    305  1.212  rillig 	return bsearch_typenames(token.s) >= 0;
    306   1.63  rillig }
    307   1.63  rillig 
    308  1.225  rillig void
    309  1.225  rillig register_typename(const char *name)
    310  1.225  rillig {
    311  1.225  rillig 	if (typenames.len >= typenames.cap) {
    312  1.225  rillig 		typenames.cap = 16 + 2 * typenames.cap;
    313  1.225  rillig 		typenames.items = nonnull(realloc(typenames.items,
    314  1.225  rillig 			sizeof(typenames.items[0]) * typenames.cap));
    315  1.225  rillig 	}
    316  1.225  rillig 
    317  1.225  rillig 	int pos = bsearch_typenames(name);
    318  1.225  rillig 	if (pos >= 0)
    319  1.225  rillig 		return;		/* already in the list */
    320  1.225  rillig 
    321  1.225  rillig 	pos = -1 - pos;
    322  1.225  rillig 	memmove(typenames.items + pos + 1, typenames.items + pos,
    323  1.225  rillig 	    sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos));
    324  1.225  rillig 	typenames.items[pos] = nonnull(strdup(name));
    325  1.225  rillig }
    326  1.225  rillig 
    327  1.115  rillig static int
    328  1.115  rillig cmp_keyword_by_name(const void *key, const void *elem)
    329  1.115  rillig {
    330  1.198  rillig 	return strcmp(key, ((const struct keyword *)elem)->name);
    331  1.115  rillig }
    332  1.115  rillig 
    333  1.165  rillig /*
    334  1.231  rillig  * Looking at the '(', guess whether this starts a function definition or a
    335  1.231  rillig  * function declaration.
    336  1.165  rillig  */
    337  1.155  rillig static bool
    338  1.234  rillig probably_function_definition(const char *p)
    339  1.155  rillig {
    340  1.236  rillig 	// TODO: Don't look at characters in comments, see lsym_funcname.c.
    341  1.198  rillig 	int paren_level = 0;
    342  1.234  rillig 	for (; *p != '\n'; p++) {
    343  1.198  rillig 		if (*p == '(')
    344  1.198  rillig 			paren_level++;
    345  1.198  rillig 		if (*p == ')' && --paren_level == 0) {
    346  1.198  rillig 			p++;
    347  1.198  rillig 
    348  1.199  rillig 			while (*p != '\n'
    349  1.199  rillig 			    && (ch_isspace(*p) || is_identifier_part(*p)))
    350  1.198  rillig 				p++;	/* '__dead' or '__unused' */
    351  1.198  rillig 
    352  1.198  rillig 			if (*p == '\n')	/* func(...) */
    353  1.198  rillig 				break;
    354  1.198  rillig 			if (*p == ';')	/* func(...); */
    355  1.198  rillig 				return false;
    356  1.198  rillig 			if (*p == ',')	/* double abs(), pi; */
    357  1.198  rillig 				return false;
    358  1.198  rillig 			if (*p == '(')	/* func(...) __attribute__((...)) */
    359  1.198  rillig 				paren_level++;	/* func(...) __printflike(...)
    360  1.198  rillig 						 */
    361  1.198  rillig 			else
    362  1.198  rillig 				break;	/* func(...) { ... */
    363  1.198  rillig 		}
    364  1.219  rillig 
    365  1.219  rillig 		if (paren_level == 1 && p[0] == '*' && p[1] == ',')
    366  1.219  rillig 			return false;
    367  1.198  rillig 	}
    368  1.198  rillig 
    369  1.231  rillig 	/*
    370  1.231  rillig 	 * To further reduce the cases where indent wrongly treats an
    371  1.198  rillig 	 * incomplete function declaration as a function definition, thus
    372  1.198  rillig 	 * adding a newline before the function name, it may be worth looking
    373  1.198  rillig 	 * for parameter names, as these are often omitted in function
    374  1.198  rillig 	 * declarations and only included in function definitions. Or just
    375  1.198  rillig 	 * increase the lookahead to more than just the current line of input,
    376  1.231  rillig 	 * until the next '{'.
    377  1.231  rillig 	 */
    378  1.198  rillig 	return true;
    379  1.155  rillig }
    380  1.155  rillig 
    381  1.100  rillig static lexer_symbol
    382  1.107  rillig lexi_alnum(void)
    383    1.1     cgd {
    384  1.242  rillig 	if (ch_isdigit(in.p[0]) ||
    385  1.242  rillig 	    (in.p[0] == '.' && ch_isdigit(in.p[1]))) {
    386  1.198  rillig 		lex_number();
    387  1.242  rillig 	} else if (is_identifier_start(in.p[0])) {
    388  1.198  rillig 		lex_word();
    389  1.198  rillig 
    390  1.212  rillig 		if (token.len == 1 && token.s[0] == 'L' &&
    391  1.242  rillig 		    (in.p[0] == '"' || in.p[0] == '\'')) {
    392  1.242  rillig 			token_add_char(*in.p++);
    393  1.198  rillig 			lex_char_or_string();
    394  1.198  rillig 			ps.next_unary = false;
    395  1.198  rillig 			return lsym_word;
    396  1.198  rillig 		}
    397  1.198  rillig 	} else
    398  1.198  rillig 		return lsym_eof;	/* just as a placeholder */
    399  1.198  rillig 
    400  1.242  rillig 	while (ch_isblank(*in.p))
    401  1.242  rillig 		in.p++;
    402  1.198  rillig 
    403  1.211  rillig 	ps.next_unary = ps.prev_lsym == lsym_tag
    404  1.238  rillig 	    || ps.prev_lsym == lsym_typedef
    405  1.242  rillig 	    || (ps.prev_lsym == lsym_modifier && *in.p == '*');
    406  1.198  rillig 
    407  1.228  rillig 	if (ps.prev_lsym == lsym_tag && ps.paren.len == 0)
    408  1.230  rillig 		return lsym_type;
    409  1.239  rillig 	if (ps.spaced_expr_psym == psym_for_exprs
    410  1.239  rillig 	    && ps.prev_lsym == lsym_lparen && ps.paren.len == 1
    411  1.242  rillig 	    && *in.p == '*') {
    412  1.239  rillig 		ps.next_unary = true;
    413  1.239  rillig 		return lsym_type;
    414  1.239  rillig 	}
    415  1.198  rillig 
    416  1.237  rillig 	token_add_char('\0');	// Terminate in non-debug mode as well.
    417  1.198  rillig 	token.len--;
    418  1.212  rillig 	const struct keyword *kw = bsearch(token.s, keywords,
    419  1.198  rillig 	    array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name);
    420  1.201  rillig 	lexer_symbol lsym = lsym_word;
    421  1.201  rillig 	if (kw != NULL) {
    422  1.236  rillig 		lsym = kw->lsym;
    423  1.201  rillig 		ps.next_unary = true;
    424  1.236  rillig 		if (lsym == lsym_tag || lsym == lsym_type)
    425  1.198  rillig 			goto found_typename;
    426  1.236  rillig 		return lsym;
    427  1.201  rillig 	}
    428  1.198  rillig 
    429  1.201  rillig 	if (is_typename()) {
    430  1.230  rillig 		lsym = lsym_type;
    431  1.198  rillig 		ps.next_unary = true;
    432  1.118  rillig found_typename:
    433  1.211  rillig 		if (ps.prev_lsym != lsym_period
    434  1.211  rillig 		    && ps.prev_lsym != lsym_unary_op) {
    435  1.236  rillig 			if (lsym == lsym_tag)
    436  1.198  rillig 				return lsym_tag;
    437  1.228  rillig 			if (ps.paren.len == 0)
    438  1.230  rillig 				return lsym_type;
    439  1.198  rillig 		}
    440   1.90  rillig 	}
    441   1.89  rillig 
    442  1.242  rillig 	const char *p = in.p;
    443  1.234  rillig 	if (*p == ')')
    444  1.234  rillig 		p++;
    445  1.234  rillig 	if (*p == '(' && ps.psyms.len < 3 && ps.ind_level == 0 &&
    446  1.221  rillig 	    !ps.in_func_def_params && !ps.in_init) {
    447   1.89  rillig 
    448  1.242  rillig 		bool maybe_function_definition = *in.p == ')'
    449  1.234  rillig 		    ? ps.paren.len == 1 && ps.prev_lsym != lsym_unary_op
    450  1.234  rillig 		    : ps.paren.len == 0;
    451  1.234  rillig 		if (maybe_function_definition
    452  1.234  rillig 		    && probably_function_definition(p)) {
    453  1.222  rillig 			ps.line_has_func_def = true;
    454  1.198  rillig 			if (ps.in_decl)
    455  1.198  rillig 				ps.in_func_def_params = true;
    456  1.198  rillig 			return lsym_funcname;
    457  1.198  rillig 		}
    458  1.198  rillig 
    459  1.228  rillig 	} else if (ps.paren.len == 0 && probably_typename()) {
    460  1.198  rillig 		ps.next_unary = true;
    461  1.230  rillig 		return lsym_type;
    462  1.155  rillig 	}
    463   1.89  rillig 
    464  1.201  rillig 	return lsym;
    465   1.89  rillig }
    466   1.75  rillig 
    467  1.234  rillig static void
    468  1.234  rillig check_parenthesized_function_definition(void)
    469  1.234  rillig {
    470  1.242  rillig 	const char *p = in.p;
    471  1.234  rillig 	while (ch_isblank(*p))
    472  1.234  rillig 		p++;
    473  1.234  rillig 	if (is_identifier_start(*p))
    474  1.234  rillig 		while (is_identifier_part(*p))
    475  1.234  rillig 			p++;
    476  1.234  rillig 	while (ch_isblank(*p))
    477  1.234  rillig 		p++;
    478  1.234  rillig 	if (*p == ')') {
    479  1.234  rillig 		p++;
    480  1.234  rillig 		while (ch_isblank(*p))
    481  1.234  rillig 			p++;
    482  1.234  rillig 		if (*p == '(' && probably_function_definition(p))
    483  1.234  rillig 			ps.line_has_func_def = true;
    484  1.234  rillig 	}
    485  1.234  rillig }
    486  1.234  rillig 
    487  1.163  rillig static bool
    488  1.234  rillig is_asterisk_unary(void)
    489  1.163  rillig {
    490  1.242  rillig 	const char *p = in.p;
    491  1.233  rillig 	while (*p == '*' || ch_isblank(*p))
    492  1.233  rillig 		p++;
    493  1.233  rillig 	if (*p == ')')
    494  1.204  rillig 		return true;
    495  1.198  rillig 	if (ps.next_unary || ps.in_func_def_params)
    496  1.198  rillig 		return true;
    497  1.211  rillig 	if (ps.prev_lsym == lsym_word ||
    498  1.211  rillig 	    ps.prev_lsym == lsym_rparen ||
    499  1.211  rillig 	    ps.prev_lsym == lsym_rbracket)
    500  1.198  rillig 		return false;
    501  1.228  rillig 	return ps.in_decl && ps.paren.len > 0;
    502  1.163  rillig }
    503  1.163  rillig 
    504  1.200  rillig static bool
    505  1.200  rillig probably_in_function_definition(void)
    506  1.200  rillig {
    507  1.242  rillig 	for (const char *p = in.p; *p != '\n';) {
    508  1.231  rillig 		if (ch_isspace(*p))
    509  1.231  rillig 			p++;
    510  1.231  rillig 		else if (is_identifier_start(*p)) {
    511  1.231  rillig 			p++;
    512  1.231  rillig 			while (is_identifier_part(*p))
    513  1.231  rillig 				p++;
    514  1.200  rillig 		} else
    515  1.231  rillig 			return *p == '(';
    516  1.200  rillig 	}
    517  1.200  rillig 	return false;
    518  1.200  rillig }
    519  1.200  rillig 
    520  1.161  rillig static void
    521  1.234  rillig lex_asterisk_unary(void)
    522  1.161  rillig {
    523  1.242  rillig 	while (*in.p == '*' || ch_isspace(*in.p)) {
    524  1.242  rillig 		if (*in.p == '*')
    525  1.198  rillig 			token_add_char('*');
    526  1.242  rillig 		if (*in.p == '\n')
    527  1.242  rillig 			in.token_end_line++;
    528  1.198  rillig 		inp_skip();
    529  1.198  rillig 	}
    530  1.198  rillig 
    531  1.200  rillig 	if (ps.in_decl && probably_in_function_definition())
    532  1.222  rillig 		ps.line_has_func_def = true;
    533  1.161  rillig }
    534  1.161  rillig 
    535  1.225  rillig static bool
    536  1.225  rillig skip(const char **pp, const char *s)
    537  1.193  rillig {
    538  1.225  rillig 	size_t len = strlen(s);
    539  1.198  rillig 	while (ch_isblank(**pp))
    540  1.198  rillig 		(*pp)++;
    541  1.198  rillig 	if (strncmp(*pp, s, len) == 0) {
    542  1.198  rillig 		*pp += len;
    543  1.198  rillig 		return true;
    544  1.198  rillig 	}
    545  1.198  rillig 	return false;
    546  1.193  rillig }
    547  1.193  rillig 
    548  1.194  rillig static void
    549  1.193  rillig lex_indent_comment(void)
    550  1.193  rillig {
    551  1.242  rillig 	const char *p = in.line.s;
    552  1.225  rillig 	if (skip(&p, "/*") && skip(&p, "INDENT")) {
    553  1.225  rillig 		enum indent_enabled enabled;
    554  1.225  rillig 		if (skip(&p, "ON") || *p == '*')
    555  1.225  rillig 			enabled = indent_last_off_line;
    556  1.225  rillig 		else if (skip(&p, "OFF"))
    557  1.225  rillig 			enabled = indent_off;
    558  1.225  rillig 		else
    559  1.225  rillig 			return;
    560  1.225  rillig 		if (skip(&p, "*/\n")) {
    561  1.225  rillig 			if (lab.len > 0 || code.len > 0 || com.len > 0)
    562  1.225  rillig 				output_line();
    563  1.225  rillig 			indent_enabled = enabled;
    564  1.225  rillig 		}
    565  1.225  rillig 	}
    566  1.193  rillig }
    567  1.193  rillig 
    568   1.89  rillig /* Reads the next token, placing it in the global variable "token". */
    569  1.100  rillig lexer_symbol
    570  1.106  rillig lexi(void)
    571   1.89  rillig {
    572  1.223  rillig 	buf_clear(&token);
    573  1.198  rillig 
    574  1.198  rillig 	for (;;) {
    575  1.242  rillig 		if (ch_isblank(*in.p))
    576  1.242  rillig 			in.p++;
    577  1.232  rillig 		else if (skip_line_continuation())
    578  1.232  rillig 			continue;
    579  1.232  rillig 		else
    580  1.198  rillig 			break;
    581  1.198  rillig 	}
    582  1.242  rillig 	in.token_start_line = in.token_end_line;
    583  1.198  rillig 
    584  1.198  rillig 	lexer_symbol alnum_lsym = lexi_alnum();
    585  1.205  rillig 	if (alnum_lsym != lsym_eof)
    586  1.198  rillig 		return alnum_lsym;
    587   1.75  rillig 
    588  1.198  rillig 	/* Scan a non-alphanumeric token */
    589   1.16   kamil 
    590  1.198  rillig 	token_add_char(inp_next());
    591   1.16   kamil 
    592  1.198  rillig 	lexer_symbol lsym;
    593  1.198  rillig 	bool next_unary;
    594   1.89  rillig 
    595  1.212  rillig 	switch (token.s[token.len - 1]) {
    596  1.160  rillig 
    597  1.220  rillig 	case '#':
    598  1.220  rillig 		lsym = lsym_preprocessing;
    599  1.220  rillig 		next_unary = ps.next_unary;
    600  1.208  rillig 		break;
    601  1.208  rillig 
    602  1.198  rillig 	case '\n':
    603  1.198  rillig 		/* if data has been exhausted, the '\n' is a dummy. */
    604  1.198  rillig 		lsym = had_eof ? lsym_eof : lsym_newline;
    605  1.198  rillig 		next_unary = ps.next_unary;
    606  1.198  rillig 		break;
    607  1.198  rillig 
    608  1.220  rillig 	/* INDENT OFF */
    609  1.220  rillig 	case ')':	lsym = lsym_rparen;	next_unary = false;	break;
    610  1.220  rillig 	case '[':	lsym = lsym_lbracket;	next_unary = true;	break;
    611  1.220  rillig 	case ']':	lsym = lsym_rbracket;	next_unary = false;	break;
    612  1.220  rillig 	case '{':	lsym = lsym_lbrace;	next_unary = true;	break;
    613  1.220  rillig 	case '}':	lsym = lsym_rbrace;	next_unary = true;	break;
    614  1.220  rillig 	case '.':	lsym = lsym_period;	next_unary = false;	break;
    615  1.220  rillig 	case '?':	lsym = lsym_question;	next_unary = true;	break;
    616  1.220  rillig 	case ',':	lsym = lsym_comma;	next_unary = true;	break;
    617  1.220  rillig 	case ';':	lsym = lsym_semicolon;	next_unary = true;	break;
    618  1.220  rillig 	/* INDENT ON */
    619  1.198  rillig 
    620  1.234  rillig 	case '(':
    621  1.242  rillig 		if (in.p == in.line.s + 1)
    622  1.234  rillig 			check_parenthesized_function_definition();
    623  1.234  rillig 		lsym = lsym_lparen;
    624  1.234  rillig 		next_unary = true;
    625  1.234  rillig 		break;
    626  1.234  rillig 
    627  1.231  rillig 	case '+':
    628  1.198  rillig 	case '-':
    629  1.198  rillig 		lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
    630  1.198  rillig 		next_unary = true;
    631  1.198  rillig 
    632  1.199  rillig 		/* '++' or '--' */
    633  1.242  rillig 		if (*in.p == token.s[token.len - 1]) {
    634  1.242  rillig 			token_add_char(*in.p++);
    635  1.211  rillig 			if (ps.prev_lsym == lsym_word ||
    636  1.211  rillig 			    ps.prev_lsym == lsym_rparen ||
    637  1.211  rillig 			    ps.prev_lsym == lsym_rbracket) {
    638  1.199  rillig 				lsym = ps.next_unary
    639  1.199  rillig 				    ? lsym_unary_op : lsym_postfix_op;
    640  1.198  rillig 				next_unary = false;
    641  1.198  rillig 			}
    642  1.198  rillig 
    643  1.242  rillig 		} else if (*in.p == '=') {	/* '+=' or '-=' */
    644  1.242  rillig 			token_add_char(*in.p++);
    645  1.198  rillig 
    646  1.242  rillig 		} else if (*in.p == '>') {	/* '->' */
    647  1.242  rillig 			token_add_char(*in.p++);
    648  1.198  rillig 			lsym = lsym_unary_op;
    649  1.198  rillig 			next_unary = false;
    650  1.198  rillig 			ps.want_blank = false;
    651  1.198  rillig 		}
    652  1.198  rillig 		break;
    653  1.198  rillig 
    654  1.220  rillig 	case ':':
    655  1.220  rillig 		lsym = ps.quest_level > 0
    656  1.225  rillig 		    ? (ps.quest_level--, lsym_question_colon)
    657  1.225  rillig 		    : ps.in_var_decl ? lsym_other_colon : lsym_label_colon;
    658  1.220  rillig 		next_unary = true;
    659  1.220  rillig 		break;
    660  1.220  rillig 
    661  1.220  rillig 	case '*':
    662  1.242  rillig 		if (*in.p == '=') {
    663  1.242  rillig 			token_add_char(*in.p++);
    664  1.220  rillig 			lsym = lsym_binary_op;
    665  1.234  rillig 		} else if (is_asterisk_unary()) {
    666  1.234  rillig 			lex_asterisk_unary();
    667  1.220  rillig 			lsym = lsym_unary_op;
    668  1.220  rillig 		} else
    669  1.220  rillig 			lsym = lsym_binary_op;
    670  1.220  rillig 		next_unary = true;
    671  1.220  rillig 		break;
    672  1.220  rillig 
    673  1.198  rillig 	case '=':
    674  1.221  rillig 		if (ps.in_var_decl)
    675  1.221  rillig 			ps.in_init = true;
    676  1.242  rillig 		if (*in.p == '=')
    677  1.242  rillig 			token_add_char(*in.p++);
    678  1.198  rillig 		lsym = lsym_binary_op;
    679  1.198  rillig 		next_unary = true;
    680  1.198  rillig 		break;
    681   1.75  rillig 
    682  1.198  rillig 	case '>':
    683  1.198  rillig 	case '<':
    684  1.225  rillig 	case '!':		/* ops like <, <<, <=, !=, etc. */
    685  1.242  rillig 		if (*in.p == '>' || *in.p == '<' || *in.p == '=')
    686  1.242  rillig 			token_add_char(*in.p++);
    687  1.242  rillig 		if (*in.p == '=')
    688  1.242  rillig 			token_add_char(*in.p++);
    689  1.198  rillig 		lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
    690  1.198  rillig 		next_unary = true;
    691  1.198  rillig 		break;
    692   1.75  rillig 
    693  1.220  rillig 	case '\'':
    694  1.220  rillig 	case '"':
    695  1.220  rillig 		lex_char_or_string();
    696  1.220  rillig 		lsym = lsym_word;
    697  1.220  rillig 		next_unary = false;
    698  1.198  rillig 		break;
    699    1.1     cgd 
    700  1.198  rillig 	default:
    701  1.212  rillig 		if (token.s[token.len - 1] == '/'
    702  1.242  rillig 		    && (*in.p == '*' || *in.p == '/')) {
    703  1.198  rillig 			enum indent_enabled prev = indent_enabled;
    704  1.198  rillig 			lex_indent_comment();
    705  1.198  rillig 			if (prev == indent_on && indent_enabled == indent_off)
    706  1.223  rillig 				buf_clear(&out.indent_off_text);
    707  1.242  rillig 			token_add_char(*in.p++);
    708  1.198  rillig 			lsym = lsym_comment;
    709  1.198  rillig 			next_unary = ps.next_unary;
    710  1.198  rillig 			break;
    711  1.198  rillig 		}
    712  1.198  rillig 
    713  1.225  rillig 		/* punctuation like '%', '&&', '/', '^', '||', '~' */
    714  1.214  rillig 		lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
    715  1.242  rillig 		if (*in.p == token.s[token.len - 1])
    716  1.242  rillig 			token_add_char(*in.p++), lsym = lsym_binary_op;
    717  1.242  rillig 		if (*in.p == '=')
    718  1.242  rillig 			token_add_char(*in.p++), lsym = lsym_binary_op;
    719  1.198  rillig 
    720  1.198  rillig 		next_unary = true;
    721  1.198  rillig 	}
    722  1.198  rillig 
    723  1.198  rillig 	ps.next_unary = next_unary;
    724   1.75  rillig 
    725  1.198  rillig 	return lsym;
    726    1.1     cgd }
    727