lsym_word.c revision 1.7 1 1.7 rillig /* $NetBSD: lsym_word.c,v 1.7 2023/06/17 22:09:24 rillig Exp $ */
2 1.1 rillig
3 1.1 rillig /*
4 1.1 rillig * Tests for the token lsym_word, which represents a constant, a string
5 1.1 rillig * literal or an identifier.
6 1.1 rillig *
7 1.1 rillig * See also:
8 1.1 rillig * lsym_funcname.c for an identifier followed by '('
9 1.1 rillig */
10 1.1 rillig
11 1.1 rillig // TODO: Is '"string"(' syntactically valid in any context?
12 1.1 rillig // TODO: Is '123(' syntactically valid in any context?
13 1.1 rillig // TODO: Would the output of the above depend on -pcs/-npcs?
14 1.6 rillig // TODO: Add more systematic tests.
15 1.6 rillig // TODO: Completely cover each state transition in lex_number_state.
16 1.1 rillig
17 1.7 rillig // TODO: Consider splitting this token into lsym_name and lsym_value, to
18 1.7 rillig // TODO: make it easier to skip tokens during lookahead, for example since
19 1.7 rillig // TODO: L"" is not an identifier but a string literal.
20 1.7 rillig
21 1.5 rillig //indent input
22 1.1 rillig // TODO: add input
23 1.5 rillig //indent end
24 1.1 rillig
25 1.5 rillig //indent run-equals-input
26 1.2 rillig
27 1.2 rillig
28 1.2 rillig /*
29 1.2 rillig * Since 2019-04-04 and before NetBSD lexi.c 1.149 from 2021-11-20, the first
30 1.2 rillig * character after a backslash continuation was always considered part of a
31 1.2 rillig * word, no matter whether it was a word character or not.
32 1.2 rillig */
33 1.5 rillig //indent input
34 1.2 rillig int var\
35 1.2 rillig +name = 4;
36 1.5 rillig //indent end
37 1.2 rillig
38 1.5 rillig //indent run
39 1.3 rillig int var + name = 4;
40 1.5 rillig //indent end
41 1.6 rillig
42 1.6 rillig
43 1.6 rillig //indent input
44 1.6 rillig wchar_t wide_string[] = L"wide string";
45 1.6 rillig //indent end
46 1.6 rillig
47 1.6 rillig /*
48 1.6 rillig * Regardless of the line length, the 'L' must never be separated from the
49 1.6 rillig * string literal. Before lexi.c 1.167 from 2021-11-28, the 'L' was a
50 1.6 rillig * separate token, which could have resulted in accidental spacing between the
51 1.6 rillig * 'L' and the following "".
52 1.6 rillig */
53 1.6 rillig //indent run-equals-input -di0
54 1.6 rillig
55 1.6 rillig //indent run-equals-input -di0 -l25
56 1.6 rillig
57 1.6 rillig //indent run-equals-input -di0 -l1
58 1.6 rillig
59 1.6 rillig
60 1.6 rillig //indent input
61 1.6 rillig wchar_t wide_char[] = L'w';
62 1.6 rillig //indent end
63 1.6 rillig
64 1.6 rillig //indent run-equals-input -di0
65 1.6 rillig
66 1.6 rillig
67 1.6 rillig /* Binary number literals, a GCC extension that was added in C11. */
68 1.6 rillig //indent input
69 1.6 rillig #define b00101010 -1
70 1.6 rillig void t(void) {
71 1.6 rillig unsigned a[] = {0b00101010, 0x00005678, 02, 17U};
72 1.6 rillig float x[] = {.7f, 0.7f};
73 1.6 rillig unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
74 1.6 rillig
75 1.6 rillig if (0 b00101010)
76 1.6 rillig return;
77 1.6 rillig /* $ '0r' is not a number base prefix, so the tokens are split. */
78 1.6 rillig if (0r12345)
79 1.6 rillig return;
80 1.6 rillig }
81 1.6 rillig //indent end
82 1.6 rillig
83 1.6 rillig //indent run
84 1.6 rillig #define b00101010 -1
85 1.6 rillig void
86 1.6 rillig t(void)
87 1.6 rillig {
88 1.6 rillig unsigned a[] = {0b00101010, 0x00005678, 02, 17U};
89 1.6 rillig float x[] = {.7f, 0.7f};
90 1.6 rillig unsigned long ul[] = {0b00001111UL, 0x01010101UL, 02UL, 17UL};
91 1.6 rillig
92 1.6 rillig if (0 b00101010)
93 1.6 rillig return;
94 1.6 rillig if (0 r12345)
95 1.6 rillig return;
96 1.6 rillig }
97 1.6 rillig //indent end
98 1.6 rillig
99 1.6 rillig
100 1.6 rillig /* Floating point numbers. */
101 1.6 rillig //indent input
102 1.6 rillig void t(void) {
103 1.6 rillig unsigned long x = 314UL;
104 1.6 rillig double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
105 1.6 rillig int z = 0b0101;
106 1.6 rillig DO_NOTHING;
107 1.6 rillig x._y = 5;
108 1.6 rillig }
109 1.6 rillig //indent end
110 1.6 rillig
111 1.6 rillig //indent run
112 1.6 rillig void
113 1.6 rillig t(void)
114 1.6 rillig {
115 1.6 rillig unsigned long x = 314UL;
116 1.6 rillig double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
117 1.6 rillig int z = 0b0101;
118 1.6 rillig DO_NOTHING;
119 1.6 rillig x._y = 5;
120 1.6 rillig }
121 1.6 rillig //indent end
122 1.6 rillig
123 1.6 rillig
124 1.6 rillig /*
125 1.6 rillig * Test identifiers containing '$', which some compilers support as an
126 1.6 rillig * extension to the C standard.
127 1.6 rillig */
128 1.6 rillig //indent input
129 1.6 rillig int $ = jQuery; // just kidding
130 1.6 rillig const char SYS$LOGIN[]="$HOME";
131 1.6 rillig //indent end
132 1.6 rillig
133 1.6 rillig //indent run
134 1.6 rillig int $ = jQuery; // just kidding
135 1.6 rillig const char SYS$LOGIN[] = "$HOME";
136 1.6 rillig //indent end
137 1.6 rillig
138 1.6 rillig
139 1.6 rillig /*
140 1.6 rillig * Test the tokenizer for number constants.
141 1.6 rillig *
142 1.6 rillig * When the tokenizer reads a character that makes a token invalid (such as
143 1.6 rillig * '0x') but may later be extended to form a valid token (such as '0x123'),
144 1.6 rillig * indent does not care about this invalid prefix and returns it nevertheless.
145 1.6 rillig */
146 1.6 rillig //indent input
147 1.6 rillig int unfinished_hex_prefix = 0x;
148 1.6 rillig double unfinished_hex_float = 0x123p;
149 1.6 rillig //indent end
150 1.6 rillig
151 1.6 rillig //indent run-equals-input -di0
152