rust-parse.c revision 1.1.1.3 1 1.1 christos /* Rust expression parsing for GDB, the GNU debugger.
2 1.1 christos
3 1.1.1.2 christos Copyright (C) 2016-2024 Free Software Foundation, Inc.
4 1.1 christos
5 1.1 christos This file is part of GDB.
6 1.1 christos
7 1.1 christos This program is free software; you can redistribute it and/or modify
8 1.1 christos it under the terms of the GNU General Public License as published by
9 1.1 christos the Free Software Foundation; either version 3 of the License, or
10 1.1 christos (at your option) any later version.
11 1.1 christos
12 1.1 christos This program is distributed in the hope that it will be useful,
13 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of
14 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 1.1 christos GNU General Public License for more details.
16 1.1 christos
17 1.1 christos You should have received a copy of the GNU General Public License
18 1.1 christos along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 1.1 christos
20 1.1 christos
21 1.1 christos #include "block.h"
22 1.1 christos #include "charset.h"
23 1.1 christos #include "cp-support.h"
24 1.1 christos #include "gdbsupport/gdb_obstack.h"
25 1.1 christos #include "gdbsupport/gdb_regex.h"
26 1.1 christos #include "rust-lang.h"
27 1.1 christos #include "parser-defs.h"
28 1.1 christos #include "gdbsupport/selftest.h"
29 1.1 christos #include "value.h"
30 1.1 christos #include "gdbarch.h"
31 1.1 christos #include "rust-exp.h"
32 1.1.1.2 christos #include "inferior.h"
33 1.1 christos
34 1.1 christos using namespace expr;
35 1.1 christos
36 1.1 christos /* A regular expression for matching Rust numbers. This is split up
37 1.1 christos since it is very long and this gives us a way to comment the
38 1.1 christos sections. */
39 1.1 christos
40 1.1 christos static const char number_regex_text[] =
41 1.1 christos /* subexpression 1: allows use of alternation, otherwise uninteresting */
42 1.1 christos "^("
43 1.1 christos /* First comes floating point. */
44 1.1 christos /* Recognize number after the decimal point, with optional
45 1.1 christos exponent and optional type suffix.
46 1.1 christos subexpression 2: allows "?", otherwise uninteresting
47 1.1 christos subexpression 3: if present, type suffix
48 1.1 christos */
49 1.1 christos "[0-9][0-9_]*\\.[0-9][0-9_]*([eE][-+]?[0-9][0-9_]*)?(f32|f64)?"
50 1.1 christos #define FLOAT_TYPE1 3
51 1.1 christos "|"
52 1.1 christos /* Recognize exponent without decimal point, with optional type
53 1.1 christos suffix.
54 1.1 christos subexpression 4: if present, type suffix
55 1.1 christos */
56 1.1 christos #define FLOAT_TYPE2 4
57 1.1 christos "[0-9][0-9_]*[eE][-+]?[0-9][0-9_]*(f32|f64)?"
58 1.1 christos "|"
59 1.1 christos /* "23." is a valid floating point number, but "23.e5" and
60 1.1 christos "23.f32" are not. So, handle the trailing-. case
61 1.1 christos separately. */
62 1.1 christos "[0-9][0-9_]*\\."
63 1.1 christos "|"
64 1.1 christos /* Finally come integers.
65 1.1 christos subexpression 5: text of integer
66 1.1 christos subexpression 6: if present, type suffix
67 1.1 christos subexpression 7: allows use of alternation, otherwise uninteresting
68 1.1 christos */
69 1.1 christos #define INT_TEXT 5
70 1.1 christos #define INT_TYPE 6
71 1.1 christos "(0x[a-fA-F0-9_]+|0o[0-7_]+|0b[01_]+|[0-9][0-9_]*)"
72 1.1.1.2 christos "([iu](size|8|16|32|64|128))?"
73 1.1 christos ")";
74 1.1 christos /* The number of subexpressions to allocate space for, including the
75 1.1 christos "0th" whole match subexpression. */
76 1.1 christos #define NUM_SUBEXPRESSIONS 8
77 1.1 christos
78 1.1 christos /* The compiled number-matching regex. */
79 1.1 christos
80 1.1 christos static regex_t number_regex;
81 1.1 christos
82 1.1 christos /* The kinds of tokens. Note that single-character tokens are
83 1.1 christos represented by themselves, so for instance '[' is a token. */
84 1.1 christos enum token_type : int
85 1.1 christos {
86 1.1 christos /* Make sure to start after any ASCII character. */
87 1.1 christos GDBVAR = 256,
88 1.1 christos IDENT,
89 1.1 christos COMPLETE,
90 1.1 christos INTEGER,
91 1.1 christos DECIMAL_INTEGER,
92 1.1 christos STRING,
93 1.1 christos BYTESTRING,
94 1.1 christos FLOAT,
95 1.1 christos COMPOUND_ASSIGN,
96 1.1 christos
97 1.1 christos /* Keyword tokens. */
98 1.1 christos KW_AS,
99 1.1 christos KW_IF,
100 1.1 christos KW_TRUE,
101 1.1 christos KW_FALSE,
102 1.1 christos KW_SUPER,
103 1.1 christos KW_SELF,
104 1.1 christos KW_MUT,
105 1.1 christos KW_EXTERN,
106 1.1 christos KW_CONST,
107 1.1 christos KW_FN,
108 1.1 christos KW_SIZEOF,
109 1.1 christos
110 1.1 christos /* Operator tokens. */
111 1.1 christos DOTDOT,
112 1.1 christos DOTDOTEQ,
113 1.1 christos OROR,
114 1.1 christos ANDAND,
115 1.1 christos EQEQ,
116 1.1 christos NOTEQ,
117 1.1 christos LTEQ,
118 1.1 christos GTEQ,
119 1.1 christos LSH,
120 1.1 christos RSH,
121 1.1 christos COLONCOLON,
122 1.1 christos ARROW,
123 1.1 christos };
124 1.1 christos
125 1.1 christos /* A typed integer constant. */
126 1.1 christos
127 1.1 christos struct typed_val_int
128 1.1 christos {
129 1.1.1.2 christos gdb_mpz val;
130 1.1 christos struct type *type;
131 1.1 christos };
132 1.1 christos
133 1.1 christos /* A typed floating point constant. */
134 1.1 christos
135 1.1 christos struct typed_val_float
136 1.1 christos {
137 1.1 christos float_data val;
138 1.1 christos struct type *type;
139 1.1 christos };
140 1.1 christos
141 1.1 christos /* A struct of this type is used to describe a token. */
142 1.1 christos
143 1.1 christos struct token_info
144 1.1 christos {
145 1.1 christos const char *name;
146 1.1 christos int value;
147 1.1 christos enum exp_opcode opcode;
148 1.1 christos };
149 1.1 christos
150 1.1 christos /* Identifier tokens. */
151 1.1 christos
152 1.1 christos static const struct token_info identifier_tokens[] =
153 1.1 christos {
154 1.1 christos { "as", KW_AS, OP_NULL },
155 1.1 christos { "false", KW_FALSE, OP_NULL },
156 1.1 christos { "if", 0, OP_NULL },
157 1.1 christos { "mut", KW_MUT, OP_NULL },
158 1.1 christos { "const", KW_CONST, OP_NULL },
159 1.1 christos { "self", KW_SELF, OP_NULL },
160 1.1 christos { "super", KW_SUPER, OP_NULL },
161 1.1 christos { "true", KW_TRUE, OP_NULL },
162 1.1 christos { "extern", KW_EXTERN, OP_NULL },
163 1.1 christos { "fn", KW_FN, OP_NULL },
164 1.1 christos { "sizeof", KW_SIZEOF, OP_NULL },
165 1.1 christos };
166 1.1 christos
167 1.1 christos /* Operator tokens, sorted longest first. */
168 1.1 christos
169 1.1 christos static const struct token_info operator_tokens[] =
170 1.1 christos {
171 1.1 christos { ">>=", COMPOUND_ASSIGN, BINOP_RSH },
172 1.1 christos { "<<=", COMPOUND_ASSIGN, BINOP_LSH },
173 1.1 christos
174 1.1 christos { "<<", LSH, OP_NULL },
175 1.1 christos { ">>", RSH, OP_NULL },
176 1.1 christos { "&&", ANDAND, OP_NULL },
177 1.1 christos { "||", OROR, OP_NULL },
178 1.1 christos { "==", EQEQ, OP_NULL },
179 1.1 christos { "!=", NOTEQ, OP_NULL },
180 1.1 christos { "<=", LTEQ, OP_NULL },
181 1.1 christos { ">=", GTEQ, OP_NULL },
182 1.1 christos { "+=", COMPOUND_ASSIGN, BINOP_ADD },
183 1.1 christos { "-=", COMPOUND_ASSIGN, BINOP_SUB },
184 1.1 christos { "*=", COMPOUND_ASSIGN, BINOP_MUL },
185 1.1 christos { "/=", COMPOUND_ASSIGN, BINOP_DIV },
186 1.1 christos { "%=", COMPOUND_ASSIGN, BINOP_REM },
187 1.1 christos { "&=", COMPOUND_ASSIGN, BINOP_BITWISE_AND },
188 1.1 christos { "|=", COMPOUND_ASSIGN, BINOP_BITWISE_IOR },
189 1.1 christos { "^=", COMPOUND_ASSIGN, BINOP_BITWISE_XOR },
190 1.1 christos { "..=", DOTDOTEQ, OP_NULL },
191 1.1 christos
192 1.1 christos { "::", COLONCOLON, OP_NULL },
193 1.1 christos { "..", DOTDOT, OP_NULL },
194 1.1 christos { "->", ARROW, OP_NULL }
195 1.1 christos };
196 1.1 christos
197 1.1 christos /* An instance of this is created before parsing, and destroyed when
198 1.1 christos parsing is finished. */
199 1.1 christos
200 1.1 christos struct rust_parser
201 1.1 christos {
202 1.1 christos explicit rust_parser (struct parser_state *state)
203 1.1 christos : pstate (state)
204 1.1 christos {
205 1.1 christos }
206 1.1 christos
207 1.1 christos DISABLE_COPY_AND_ASSIGN (rust_parser);
208 1.1 christos
209 1.1 christos /* Return the parser's language. */
210 1.1 christos const struct language_defn *language () const
211 1.1 christos {
212 1.1 christos return pstate->language ();
213 1.1 christos }
214 1.1 christos
215 1.1 christos /* Return the parser's gdbarch. */
216 1.1 christos struct gdbarch *arch () const
217 1.1 christos {
218 1.1 christos return pstate->gdbarch ();
219 1.1 christos }
220 1.1 christos
221 1.1 christos /* A helper to look up a Rust type, or fail. This only works for
222 1.1 christos types defined by rust_language_arch_info. */
223 1.1 christos
224 1.1 christos struct type *get_type (const char *name)
225 1.1 christos {
226 1.1 christos struct type *type;
227 1.1 christos
228 1.1 christos type = language_lookup_primitive_type (language (), arch (), name);
229 1.1 christos if (type == NULL)
230 1.1 christos error (_("Could not find Rust type %s"), name);
231 1.1 christos return type;
232 1.1 christos }
233 1.1 christos
234 1.1 christos std::string crate_name (const std::string &name);
235 1.1 christos std::string super_name (const std::string &ident, unsigned int n_supers);
236 1.1 christos
237 1.1 christos int lex_character ();
238 1.1 christos int lex_number ();
239 1.1 christos int lex_string ();
240 1.1 christos int lex_identifier ();
241 1.1 christos uint32_t lex_hex (int min, int max);
242 1.1.1.3 christos uint32_t lex_escape (bool is_byte);
243 1.1 christos int lex_operator ();
244 1.1 christos int lex_one_token ();
245 1.1 christos void push_back (char c);
246 1.1 christos
247 1.1 christos /* The main interface to lexing. Lexes one token and updates the
248 1.1 christos internal state. */
249 1.1 christos void lex ()
250 1.1 christos {
251 1.1 christos current_token = lex_one_token ();
252 1.1 christos }
253 1.1 christos
254 1.1 christos /* Assuming the current token is TYPE, lex the next token. */
255 1.1 christos void assume (int type)
256 1.1 christos {
257 1.1 christos gdb_assert (current_token == type);
258 1.1 christos lex ();
259 1.1 christos }
260 1.1 christos
261 1.1 christos /* Require the single-character token C, and lex the next token; or
262 1.1 christos throw an exception. */
263 1.1 christos void require (char type)
264 1.1 christos {
265 1.1 christos if (current_token != type)
266 1.1 christos error (_("'%c' expected"), type);
267 1.1 christos lex ();
268 1.1 christos }
269 1.1 christos
270 1.1 christos /* Entry point for all parsing. */
271 1.1 christos operation_up parse_entry_point ()
272 1.1 christos {
273 1.1 christos lex ();
274 1.1 christos operation_up result = parse_expr ();
275 1.1 christos if (current_token != 0)
276 1.1 christos error (_("Syntax error near '%s'"), pstate->prev_lexptr);
277 1.1 christos return result;
278 1.1 christos }
279 1.1 christos
280 1.1 christos operation_up parse_tuple ();
281 1.1 christos operation_up parse_array ();
282 1.1 christos operation_up name_to_operation (const std::string &name);
283 1.1 christos operation_up parse_struct_expr (struct type *type);
284 1.1 christos operation_up parse_binop (bool required);
285 1.1 christos operation_up parse_range ();
286 1.1 christos operation_up parse_expr ();
287 1.1 christos operation_up parse_sizeof ();
288 1.1 christos operation_up parse_addr ();
289 1.1 christos operation_up parse_field (operation_up &&);
290 1.1 christos operation_up parse_index (operation_up &&);
291 1.1 christos std::vector<operation_up> parse_paren_args ();
292 1.1 christos operation_up parse_call (operation_up &&);
293 1.1 christos std::vector<struct type *> parse_type_list ();
294 1.1 christos std::vector<struct type *> parse_maybe_type_list ();
295 1.1 christos struct type *parse_array_type ();
296 1.1 christos struct type *parse_slice_type ();
297 1.1 christos struct type *parse_pointer_type ();
298 1.1 christos struct type *parse_function_type ();
299 1.1 christos struct type *parse_tuple_type ();
300 1.1 christos struct type *parse_type ();
301 1.1 christos std::string parse_path (bool for_expr);
302 1.1 christos operation_up parse_string ();
303 1.1 christos operation_up parse_tuple_struct (struct type *type);
304 1.1 christos operation_up parse_path_expr ();
305 1.1 christos operation_up parse_atom (bool required);
306 1.1 christos
307 1.1 christos void update_innermost_block (struct block_symbol sym);
308 1.1 christos struct block_symbol lookup_symbol (const char *name,
309 1.1 christos const struct block *block,
310 1.1.1.2 christos const domain_search_flags domain);
311 1.1 christos struct type *rust_lookup_type (const char *name);
312 1.1 christos
313 1.1 christos /* Clear some state. This is only used for testing. */
314 1.1 christos #if GDB_SELF_TEST
315 1.1 christos void reset (const char *input)
316 1.1 christos {
317 1.1 christos pstate->prev_lexptr = nullptr;
318 1.1 christos pstate->lexptr = input;
319 1.1 christos paren_depth = 0;
320 1.1 christos current_token = 0;
321 1.1 christos current_int_val = {};
322 1.1 christos current_float_val = {};
323 1.1 christos current_string_val = {};
324 1.1 christos current_opcode = OP_NULL;
325 1.1 christos }
326 1.1 christos #endif /* GDB_SELF_TEST */
327 1.1 christos
328 1.1 christos /* Return the token's string value as a string. */
329 1.1 christos std::string get_string () const
330 1.1 christos {
331 1.1 christos return std::string (current_string_val.ptr, current_string_val.length);
332 1.1 christos }
333 1.1 christos
334 1.1 christos /* A pointer to this is installed globally. */
335 1.1 christos auto_obstack obstack;
336 1.1 christos
337 1.1 christos /* The parser state gdb gave us. */
338 1.1 christos struct parser_state *pstate;
339 1.1 christos
340 1.1 christos /* Depth of parentheses. */
341 1.1 christos int paren_depth = 0;
342 1.1 christos
343 1.1 christos /* The current token's type. */
344 1.1 christos int current_token = 0;
345 1.1 christos /* The current token's payload, if any. */
346 1.1 christos typed_val_int current_int_val {};
347 1.1 christos typed_val_float current_float_val {};
348 1.1 christos struct stoken current_string_val {};
349 1.1 christos enum exp_opcode current_opcode = OP_NULL;
350 1.1 christos
351 1.1 christos /* When completing, this may be set to the field operation to
352 1.1 christos complete. */
353 1.1 christos operation_up completion_op;
354 1.1 christos };
355 1.1 christos
356 1.1 christos /* Return an string referring to NAME, but relative to the crate's
357 1.1 christos name. */
358 1.1 christos
359 1.1 christos std::string
360 1.1 christos rust_parser::crate_name (const std::string &name)
361 1.1 christos {
362 1.1 christos std::string crate = rust_crate_for_block (pstate->expression_context_block);
363 1.1 christos
364 1.1 christos if (crate.empty ())
365 1.1 christos error (_("Could not find crate for current location"));
366 1.1 christos return "::" + crate + "::" + name;
367 1.1 christos }
368 1.1 christos
369 1.1 christos /* Return a string referring to a "super::" qualified name. IDENT is
370 1.1 christos the base name and N_SUPERS is how many "super::"s were provided.
371 1.1 christos N_SUPERS can be zero. */
372 1.1 christos
373 1.1 christos std::string
374 1.1 christos rust_parser::super_name (const std::string &ident, unsigned int n_supers)
375 1.1 christos {
376 1.1.1.2 christos const char *scope = "";
377 1.1.1.2 christos if (pstate->expression_context_block != nullptr)
378 1.1.1.2 christos scope = pstate->expression_context_block->scope ();
379 1.1 christos int offset;
380 1.1 christos
381 1.1 christos if (scope[0] == '\0')
382 1.1 christos error (_("Couldn't find namespace scope for self::"));
383 1.1 christos
384 1.1 christos if (n_supers > 0)
385 1.1 christos {
386 1.1 christos int len;
387 1.1 christos std::vector<int> offsets;
388 1.1 christos unsigned int current_len;
389 1.1 christos
390 1.1 christos current_len = cp_find_first_component (scope);
391 1.1 christos while (scope[current_len] != '\0')
392 1.1 christos {
393 1.1 christos offsets.push_back (current_len);
394 1.1 christos gdb_assert (scope[current_len] == ':');
395 1.1 christos /* The "::". */
396 1.1 christos current_len += 2;
397 1.1 christos current_len += cp_find_first_component (scope
398 1.1 christos + current_len);
399 1.1 christos }
400 1.1 christos
401 1.1 christos len = offsets.size ();
402 1.1 christos if (n_supers >= len)
403 1.1 christos error (_("Too many super:: uses from '%s'"), scope);
404 1.1 christos
405 1.1 christos offset = offsets[len - n_supers];
406 1.1 christos }
407 1.1 christos else
408 1.1 christos offset = strlen (scope);
409 1.1 christos
410 1.1 christos return "::" + std::string (scope, offset) + "::" + ident;
411 1.1 christos }
412 1.1 christos
413 1.1 christos /* A helper to appropriately munge NAME and BLOCK depending on the
414 1.1 christos presence of a leading "::". */
415 1.1 christos
416 1.1 christos static void
417 1.1 christos munge_name_and_block (const char **name, const struct block **block)
418 1.1 christos {
419 1.1 christos /* If it is a global reference, skip the current block in favor of
420 1.1 christos the static block. */
421 1.1 christos if (startswith (*name, "::"))
422 1.1 christos {
423 1.1 christos *name += 2;
424 1.1.1.2 christos *block = (*block)->static_block ();
425 1.1 christos }
426 1.1 christos }
427 1.1 christos
428 1.1 christos /* Like lookup_symbol, but handles Rust namespace conventions, and
429 1.1 christos doesn't require field_of_this_result. */
430 1.1 christos
431 1.1 christos struct block_symbol
432 1.1 christos rust_parser::lookup_symbol (const char *name, const struct block *block,
433 1.1.1.2 christos const domain_search_flags domain)
434 1.1 christos {
435 1.1 christos struct block_symbol result;
436 1.1 christos
437 1.1 christos munge_name_and_block (&name, &block);
438 1.1 christos
439 1.1 christos result = ::lookup_symbol (name, block, domain, NULL);
440 1.1 christos if (result.symbol != NULL)
441 1.1 christos update_innermost_block (result);
442 1.1 christos return result;
443 1.1 christos }
444 1.1 christos
445 1.1 christos /* Look up a type, following Rust namespace conventions. */
446 1.1 christos
447 1.1 christos struct type *
448 1.1 christos rust_parser::rust_lookup_type (const char *name)
449 1.1 christos {
450 1.1 christos struct block_symbol result;
451 1.1 christos struct type *type;
452 1.1 christos
453 1.1 christos const struct block *block = pstate->expression_context_block;
454 1.1 christos munge_name_and_block (&name, &block);
455 1.1 christos
456 1.1.1.2 christos result = ::lookup_symbol (name, block, SEARCH_TYPE_DOMAIN, nullptr);
457 1.1 christos if (result.symbol != NULL)
458 1.1 christos {
459 1.1 christos update_innermost_block (result);
460 1.1 christos return result.symbol->type ();
461 1.1 christos }
462 1.1 christos
463 1.1 christos type = lookup_typename (language (), name, NULL, 1);
464 1.1 christos if (type != NULL)
465 1.1 christos return type;
466 1.1 christos
467 1.1 christos /* Last chance, try a built-in type. */
468 1.1 christos return language_lookup_primitive_type (language (), arch (), name);
469 1.1 christos }
470 1.1 christos
471 1.1 christos /* A helper that updates the innermost block as appropriate. */
472 1.1 christos
473 1.1 christos void
474 1.1 christos rust_parser::update_innermost_block (struct block_symbol sym)
475 1.1 christos {
476 1.1 christos if (symbol_read_needs_frame (sym.symbol))
477 1.1 christos pstate->block_tracker->update (sym);
478 1.1 christos }
479 1.1 christos
480 1.1 christos /* Lex a hex number with at least MIN digits and at most MAX
481 1.1 christos digits. */
482 1.1 christos
483 1.1 christos uint32_t
484 1.1 christos rust_parser::lex_hex (int min, int max)
485 1.1 christos {
486 1.1 christos uint32_t result = 0;
487 1.1 christos int len = 0;
488 1.1 christos /* We only want to stop at MAX if we're lexing a byte escape. */
489 1.1 christos int check_max = min == max;
490 1.1 christos
491 1.1 christos while ((check_max ? len <= max : 1)
492 1.1 christos && ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'f')
493 1.1 christos || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'F')
494 1.1 christos || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')))
495 1.1 christos {
496 1.1 christos result *= 16;
497 1.1 christos if (pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'f')
498 1.1 christos result = result + 10 + pstate->lexptr[0] - 'a';
499 1.1 christos else if (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'F')
500 1.1 christos result = result + 10 + pstate->lexptr[0] - 'A';
501 1.1 christos else
502 1.1 christos result = result + pstate->lexptr[0] - '0';
503 1.1 christos ++pstate->lexptr;
504 1.1 christos ++len;
505 1.1 christos }
506 1.1 christos
507 1.1 christos if (len < min)
508 1.1 christos error (_("Not enough hex digits seen"));
509 1.1 christos if (len > max)
510 1.1 christos {
511 1.1 christos gdb_assert (min != max);
512 1.1 christos error (_("Overlong hex escape"));
513 1.1 christos }
514 1.1 christos
515 1.1 christos return result;
516 1.1 christos }
517 1.1 christos
518 1.1 christos /* Lex an escape. IS_BYTE is true if we're lexing a byte escape;
519 1.1 christos otherwise we're lexing a character escape. */
520 1.1 christos
521 1.1 christos uint32_t
522 1.1.1.3 christos rust_parser::lex_escape (bool is_byte)
523 1.1 christos {
524 1.1 christos uint32_t result;
525 1.1 christos
526 1.1 christos gdb_assert (pstate->lexptr[0] == '\\');
527 1.1 christos ++pstate->lexptr;
528 1.1 christos switch (pstate->lexptr[0])
529 1.1 christos {
530 1.1 christos case 'x':
531 1.1 christos ++pstate->lexptr;
532 1.1 christos result = lex_hex (2, 2);
533 1.1 christos break;
534 1.1 christos
535 1.1 christos case 'u':
536 1.1 christos if (is_byte)
537 1.1 christos error (_("Unicode escape in byte literal"));
538 1.1 christos ++pstate->lexptr;
539 1.1 christos if (pstate->lexptr[0] != '{')
540 1.1 christos error (_("Missing '{' in Unicode escape"));
541 1.1 christos ++pstate->lexptr;
542 1.1 christos result = lex_hex (1, 6);
543 1.1 christos /* Could do range checks here. */
544 1.1 christos if (pstate->lexptr[0] != '}')
545 1.1 christos error (_("Missing '}' in Unicode escape"));
546 1.1 christos ++pstate->lexptr;
547 1.1 christos break;
548 1.1 christos
549 1.1 christos case 'n':
550 1.1 christos result = '\n';
551 1.1 christos ++pstate->lexptr;
552 1.1 christos break;
553 1.1 christos case 'r':
554 1.1 christos result = '\r';
555 1.1 christos ++pstate->lexptr;
556 1.1 christos break;
557 1.1 christos case 't':
558 1.1 christos result = '\t';
559 1.1 christos ++pstate->lexptr;
560 1.1 christos break;
561 1.1 christos case '\\':
562 1.1 christos result = '\\';
563 1.1 christos ++pstate->lexptr;
564 1.1 christos break;
565 1.1 christos case '0':
566 1.1 christos result = '\0';
567 1.1 christos ++pstate->lexptr;
568 1.1 christos break;
569 1.1 christos case '\'':
570 1.1 christos result = '\'';
571 1.1 christos ++pstate->lexptr;
572 1.1 christos break;
573 1.1 christos case '"':
574 1.1 christos result = '"';
575 1.1 christos ++pstate->lexptr;
576 1.1 christos break;
577 1.1 christos
578 1.1 christos default:
579 1.1 christos error (_("Invalid escape \\%c in literal"), pstate->lexptr[0]);
580 1.1 christos }
581 1.1 christos
582 1.1 christos return result;
583 1.1 christos }
584 1.1 christos
585 1.1 christos /* A helper for lex_character. Search forward for the closing single
586 1.1 christos quote, then convert the bytes from the host charset to UTF-32. */
587 1.1 christos
588 1.1 christos static uint32_t
589 1.1 christos lex_multibyte_char (const char *text, int *len)
590 1.1 christos {
591 1.1 christos /* Only look a maximum of 5 bytes for the closing quote. This is
592 1.1 christos the maximum for UTF-8. */
593 1.1 christos int quote;
594 1.1 christos gdb_assert (text[0] != '\'');
595 1.1 christos for (quote = 1; text[quote] != '\0' && text[quote] != '\''; ++quote)
596 1.1 christos ;
597 1.1 christos *len = quote;
598 1.1 christos /* The caller will issue an error. */
599 1.1 christos if (text[quote] == '\0')
600 1.1 christos return 0;
601 1.1 christos
602 1.1 christos auto_obstack result;
603 1.1 christos convert_between_encodings (host_charset (), HOST_UTF32,
604 1.1 christos (const gdb_byte *) text,
605 1.1 christos quote, 1, &result, translit_none);
606 1.1 christos
607 1.1 christos int size = obstack_object_size (&result);
608 1.1 christos if (size > 4)
609 1.1 christos error (_("overlong character literal"));
610 1.1 christos uint32_t value;
611 1.1 christos memcpy (&value, obstack_finish (&result), size);
612 1.1 christos return value;
613 1.1 christos }
614 1.1 christos
615 1.1 christos /* Lex a character constant. */
616 1.1 christos
617 1.1 christos int
618 1.1 christos rust_parser::lex_character ()
619 1.1 christos {
620 1.1.1.3 christos bool is_byte = false;
621 1.1 christos uint32_t value;
622 1.1 christos
623 1.1 christos if (pstate->lexptr[0] == 'b')
624 1.1 christos {
625 1.1.1.3 christos is_byte = true;
626 1.1 christos ++pstate->lexptr;
627 1.1 christos }
628 1.1 christos gdb_assert (pstate->lexptr[0] == '\'');
629 1.1 christos ++pstate->lexptr;
630 1.1 christos if (pstate->lexptr[0] == '\'')
631 1.1 christos error (_("empty character literal"));
632 1.1 christos else if (pstate->lexptr[0] == '\\')
633 1.1 christos value = lex_escape (is_byte);
634 1.1 christos else
635 1.1 christos {
636 1.1 christos int len;
637 1.1 christos value = lex_multibyte_char (&pstate->lexptr[0], &len);
638 1.1 christos pstate->lexptr += len;
639 1.1 christos }
640 1.1 christos
641 1.1 christos if (pstate->lexptr[0] != '\'')
642 1.1 christos error (_("Unterminated character literal"));
643 1.1 christos ++pstate->lexptr;
644 1.1 christos
645 1.1 christos current_int_val.val = value;
646 1.1 christos current_int_val.type = get_type (is_byte ? "u8" : "char");
647 1.1 christos
648 1.1 christos return INTEGER;
649 1.1 christos }
650 1.1 christos
651 1.1 christos /* Return the offset of the double quote if STR looks like the start
652 1.1 christos of a raw string, or 0 if STR does not start a raw string. */
653 1.1 christos
654 1.1 christos static int
655 1.1 christos starts_raw_string (const char *str)
656 1.1 christos {
657 1.1 christos const char *save = str;
658 1.1 christos
659 1.1 christos if (str[0] != 'r')
660 1.1 christos return 0;
661 1.1 christos ++str;
662 1.1 christos while (str[0] == '#')
663 1.1 christos ++str;
664 1.1 christos if (str[0] == '"')
665 1.1 christos return str - save;
666 1.1 christos return 0;
667 1.1 christos }
668 1.1 christos
669 1.1 christos /* Return true if STR looks like the end of a raw string that had N
670 1.1 christos hashes at the start. */
671 1.1 christos
672 1.1 christos static bool
673 1.1 christos ends_raw_string (const char *str, int n)
674 1.1 christos {
675 1.1 christos gdb_assert (str[0] == '"');
676 1.1.1.3 christos for (int i = 0; i < n; ++i)
677 1.1 christos if (str[i + 1] != '#')
678 1.1 christos return false;
679 1.1 christos return true;
680 1.1 christos }
681 1.1 christos
682 1.1 christos /* Lex a string constant. */
683 1.1 christos
684 1.1 christos int
685 1.1 christos rust_parser::lex_string ()
686 1.1 christos {
687 1.1 christos int is_byte = pstate->lexptr[0] == 'b';
688 1.1 christos int raw_length;
689 1.1 christos
690 1.1 christos if (is_byte)
691 1.1 christos ++pstate->lexptr;
692 1.1 christos raw_length = starts_raw_string (pstate->lexptr);
693 1.1 christos pstate->lexptr += raw_length;
694 1.1 christos gdb_assert (pstate->lexptr[0] == '"');
695 1.1 christos ++pstate->lexptr;
696 1.1 christos
697 1.1 christos while (1)
698 1.1 christos {
699 1.1 christos uint32_t value;
700 1.1 christos
701 1.1 christos if (raw_length > 0)
702 1.1 christos {
703 1.1 christos if (pstate->lexptr[0] == '"' && ends_raw_string (pstate->lexptr,
704 1.1 christos raw_length - 1))
705 1.1 christos {
706 1.1 christos /* Exit with lexptr pointing after the final "#". */
707 1.1 christos pstate->lexptr += raw_length;
708 1.1 christos break;
709 1.1 christos }
710 1.1 christos else if (pstate->lexptr[0] == '\0')
711 1.1 christos error (_("Unexpected EOF in string"));
712 1.1 christos
713 1.1 christos value = pstate->lexptr[0] & 0xff;
714 1.1 christos if (is_byte && value > 127)
715 1.1 christos error (_("Non-ASCII value in raw byte string"));
716 1.1 christos obstack_1grow (&obstack, value);
717 1.1 christos
718 1.1 christos ++pstate->lexptr;
719 1.1 christos }
720 1.1 christos else if (pstate->lexptr[0] == '"')
721 1.1 christos {
722 1.1 christos /* Make sure to skip the quote. */
723 1.1 christos ++pstate->lexptr;
724 1.1 christos break;
725 1.1 christos }
726 1.1 christos else if (pstate->lexptr[0] == '\\')
727 1.1 christos {
728 1.1 christos value = lex_escape (is_byte);
729 1.1 christos
730 1.1 christos if (is_byte)
731 1.1 christos obstack_1grow (&obstack, value);
732 1.1 christos else
733 1.1 christos convert_between_encodings (HOST_UTF32, "UTF-8",
734 1.1 christos (gdb_byte *) &value,
735 1.1 christos sizeof (value), sizeof (value),
736 1.1 christos &obstack, translit_none);
737 1.1 christos }
738 1.1 christos else if (pstate->lexptr[0] == '\0')
739 1.1 christos error (_("Unexpected EOF in string"));
740 1.1 christos else
741 1.1 christos {
742 1.1 christos value = pstate->lexptr[0] & 0xff;
743 1.1 christos if (is_byte && value > 127)
744 1.1 christos error (_("Non-ASCII value in byte string"));
745 1.1 christos obstack_1grow (&obstack, value);
746 1.1 christos ++pstate->lexptr;
747 1.1 christos }
748 1.1 christos }
749 1.1 christos
750 1.1 christos current_string_val.length = obstack_object_size (&obstack);
751 1.1 christos current_string_val.ptr = (const char *) obstack_finish (&obstack);
752 1.1 christos return is_byte ? BYTESTRING : STRING;
753 1.1 christos }
754 1.1 christos
755 1.1 christos /* Return true if STRING starts with whitespace followed by a digit. */
756 1.1 christos
757 1.1 christos static bool
758 1.1 christos space_then_number (const char *string)
759 1.1 christos {
760 1.1 christos const char *p = string;
761 1.1 christos
762 1.1 christos while (p[0] == ' ' || p[0] == '\t')
763 1.1 christos ++p;
764 1.1 christos if (p == string)
765 1.1 christos return false;
766 1.1 christos
767 1.1 christos return *p >= '0' && *p <= '9';
768 1.1 christos }
769 1.1 christos
770 1.1 christos /* Return true if C can start an identifier. */
771 1.1 christos
772 1.1 christos static bool
773 1.1 christos rust_identifier_start_p (char c)
774 1.1 christos {
775 1.1 christos return ((c >= 'a' && c <= 'z')
776 1.1 christos || (c >= 'A' && c <= 'Z')
777 1.1 christos || c == '_'
778 1.1 christos || c == '$'
779 1.1 christos /* Allow any non-ASCII character as an identifier. There
780 1.1 christos doesn't seem to be a need to be picky about this. */
781 1.1 christos || (c & 0x80) != 0);
782 1.1 christos }
783 1.1 christos
784 1.1 christos /* Lex an identifier. */
785 1.1 christos
786 1.1 christos int
787 1.1 christos rust_parser::lex_identifier ()
788 1.1 christos {
789 1.1 christos unsigned int length;
790 1.1 christos const struct token_info *token;
791 1.1 christos int is_gdb_var = pstate->lexptr[0] == '$';
792 1.1 christos
793 1.1 christos bool is_raw = false;
794 1.1 christos if (pstate->lexptr[0] == 'r'
795 1.1 christos && pstate->lexptr[1] == '#'
796 1.1 christos && rust_identifier_start_p (pstate->lexptr[2]))
797 1.1 christos {
798 1.1 christos is_raw = true;
799 1.1 christos pstate->lexptr += 2;
800 1.1 christos }
801 1.1 christos
802 1.1 christos const char *start = pstate->lexptr;
803 1.1 christos gdb_assert (rust_identifier_start_p (pstate->lexptr[0]));
804 1.1 christos
805 1.1 christos ++pstate->lexptr;
806 1.1 christos
807 1.1 christos /* Allow any non-ASCII character here. This "handles" UTF-8 by
808 1.1 christos passing it through. */
809 1.1 christos while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z')
810 1.1 christos || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z')
811 1.1 christos || pstate->lexptr[0] == '_'
812 1.1 christos || (is_gdb_var && pstate->lexptr[0] == '$')
813 1.1 christos || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
814 1.1 christos || (pstate->lexptr[0] & 0x80) != 0)
815 1.1 christos ++pstate->lexptr;
816 1.1 christos
817 1.1 christos
818 1.1 christos length = pstate->lexptr - start;
819 1.1 christos token = NULL;
820 1.1 christos if (!is_raw)
821 1.1 christos {
822 1.1 christos for (const auto &candidate : identifier_tokens)
823 1.1 christos {
824 1.1 christos if (length == strlen (candidate.name)
825 1.1 christos && strncmp (candidate.name, start, length) == 0)
826 1.1 christos {
827 1.1 christos token = &candidate;
828 1.1 christos break;
829 1.1 christos }
830 1.1 christos }
831 1.1 christos }
832 1.1 christos
833 1.1 christos if (token != NULL)
834 1.1 christos {
835 1.1 christos if (token->value == 0)
836 1.1 christos {
837 1.1 christos /* Leave the terminating token alone. */
838 1.1 christos pstate->lexptr = start;
839 1.1 christos return 0;
840 1.1 christos }
841 1.1 christos }
842 1.1 christos else if (token == NULL
843 1.1 christos && !is_raw
844 1.1 christos && (strncmp (start, "thread", length) == 0
845 1.1 christos || strncmp (start, "task", length) == 0)
846 1.1 christos && space_then_number (pstate->lexptr))
847 1.1 christos {
848 1.1 christos /* "task" or "thread" followed by a number terminates the
849 1.1 christos parse, per gdb rules. */
850 1.1 christos pstate->lexptr = start;
851 1.1 christos return 0;
852 1.1 christos }
853 1.1 christos
854 1.1 christos if (token == NULL || (pstate->parse_completion && pstate->lexptr[0] == '\0'))
855 1.1 christos {
856 1.1 christos current_string_val.length = length;
857 1.1 christos current_string_val.ptr = start;
858 1.1 christos }
859 1.1 christos
860 1.1 christos if (pstate->parse_completion && pstate->lexptr[0] == '\0')
861 1.1 christos {
862 1.1 christos /* Prevent rustyylex from returning two COMPLETE tokens. */
863 1.1 christos pstate->prev_lexptr = pstate->lexptr;
864 1.1 christos return COMPLETE;
865 1.1 christos }
866 1.1 christos
867 1.1 christos if (token != NULL)
868 1.1 christos return token->value;
869 1.1 christos if (is_gdb_var)
870 1.1 christos return GDBVAR;
871 1.1 christos return IDENT;
872 1.1 christos }
873 1.1 christos
874 1.1 christos /* Lex an operator. */
875 1.1 christos
876 1.1 christos int
877 1.1 christos rust_parser::lex_operator ()
878 1.1 christos {
879 1.1 christos const struct token_info *token = NULL;
880 1.1 christos
881 1.1 christos for (const auto &candidate : operator_tokens)
882 1.1 christos {
883 1.1 christos if (strncmp (candidate.name, pstate->lexptr,
884 1.1 christos strlen (candidate.name)) == 0)
885 1.1 christos {
886 1.1 christos pstate->lexptr += strlen (candidate.name);
887 1.1 christos token = &candidate;
888 1.1 christos break;
889 1.1 christos }
890 1.1 christos }
891 1.1 christos
892 1.1 christos if (token != NULL)
893 1.1 christos {
894 1.1 christos current_opcode = token->opcode;
895 1.1 christos return token->value;
896 1.1 christos }
897 1.1 christos
898 1.1 christos return *pstate->lexptr++;
899 1.1 christos }
900 1.1 christos
901 1.1 christos /* Lex a number. */
902 1.1 christos
903 1.1 christos int
904 1.1 christos rust_parser::lex_number ()
905 1.1 christos {
906 1.1 christos regmatch_t subexps[NUM_SUBEXPRESSIONS];
907 1.1 christos int match;
908 1.1.1.3 christos bool is_integer = false;
909 1.1.1.3 christos bool could_be_decimal = true;
910 1.1.1.3 christos bool implicit_i32 = false;
911 1.1 christos const char *type_name = NULL;
912 1.1 christos struct type *type;
913 1.1 christos int end_index;
914 1.1 christos int type_index = -1;
915 1.1 christos
916 1.1 christos match = regexec (&number_regex, pstate->lexptr, ARRAY_SIZE (subexps),
917 1.1 christos subexps, 0);
918 1.1 christos /* Failure means the regexp is broken. */
919 1.1 christos gdb_assert (match == 0);
920 1.1 christos
921 1.1 christos if (subexps[INT_TEXT].rm_so != -1)
922 1.1 christos {
923 1.1 christos /* Integer part matched. */
924 1.1.1.3 christos is_integer = true;
925 1.1 christos end_index = subexps[INT_TEXT].rm_eo;
926 1.1 christos if (subexps[INT_TYPE].rm_so == -1)
927 1.1 christos {
928 1.1 christos type_name = "i32";
929 1.1.1.3 christos implicit_i32 = true;
930 1.1 christos }
931 1.1 christos else
932 1.1 christos {
933 1.1 christos type_index = INT_TYPE;
934 1.1.1.3 christos could_be_decimal = false;
935 1.1 christos }
936 1.1 christos }
937 1.1 christos else if (subexps[FLOAT_TYPE1].rm_so != -1)
938 1.1 christos {
939 1.1 christos /* Found floating point type suffix. */
940 1.1 christos end_index = subexps[FLOAT_TYPE1].rm_so;
941 1.1 christos type_index = FLOAT_TYPE1;
942 1.1 christos }
943 1.1 christos else if (subexps[FLOAT_TYPE2].rm_so != -1)
944 1.1 christos {
945 1.1 christos /* Found floating point type suffix. */
946 1.1 christos end_index = subexps[FLOAT_TYPE2].rm_so;
947 1.1 christos type_index = FLOAT_TYPE2;
948 1.1 christos }
949 1.1 christos else
950 1.1 christos {
951 1.1 christos /* Any other floating point match. */
952 1.1 christos end_index = subexps[0].rm_eo;
953 1.1 christos type_name = "f64";
954 1.1 christos }
955 1.1 christos
956 1.1 christos /* We need a special case if the final character is ".". In this
957 1.1 christos case we might need to parse an integer. For example, "23.f()" is
958 1.1 christos a request for a trait method call, not a syntax error involving
959 1.1 christos the floating point number "23.". */
960 1.1 christos gdb_assert (subexps[0].rm_eo > 0);
961 1.1 christos if (pstate->lexptr[subexps[0].rm_eo - 1] == '.')
962 1.1 christos {
963 1.1 christos const char *next = skip_spaces (&pstate->lexptr[subexps[0].rm_eo]);
964 1.1 christos
965 1.1 christos if (rust_identifier_start_p (*next) || *next == '.')
966 1.1 christos {
967 1.1 christos --subexps[0].rm_eo;
968 1.1.1.3 christos is_integer = true;
969 1.1 christos end_index = subexps[0].rm_eo;
970 1.1 christos type_name = "i32";
971 1.1.1.3 christos could_be_decimal = true;
972 1.1.1.3 christos implicit_i32 = true;
973 1.1 christos }
974 1.1 christos }
975 1.1 christos
976 1.1 christos /* Compute the type name if we haven't already. */
977 1.1 christos std::string type_name_holder;
978 1.1 christos if (type_name == NULL)
979 1.1 christos {
980 1.1 christos gdb_assert (type_index != -1);
981 1.1 christos type_name_holder = std::string ((pstate->lexptr
982 1.1 christos + subexps[type_index].rm_so),
983 1.1 christos (subexps[type_index].rm_eo
984 1.1 christos - subexps[type_index].rm_so));
985 1.1 christos type_name = type_name_holder.c_str ();
986 1.1 christos }
987 1.1 christos
988 1.1 christos /* Look up the type. */
989 1.1 christos type = get_type (type_name);
990 1.1 christos
991 1.1 christos /* Copy the text of the number and remove the "_"s. */
992 1.1 christos std::string number;
993 1.1.1.3 christos for (int i = 0; i < end_index && pstate->lexptr[i]; ++i)
994 1.1 christos {
995 1.1 christos if (pstate->lexptr[i] == '_')
996 1.1.1.3 christos could_be_decimal = false;
997 1.1 christos else
998 1.1 christos number.push_back (pstate->lexptr[i]);
999 1.1 christos }
1000 1.1 christos
1001 1.1 christos /* Advance past the match. */
1002 1.1 christos pstate->lexptr += subexps[0].rm_eo;
1003 1.1 christos
1004 1.1 christos /* Parse the number. */
1005 1.1 christos if (is_integer)
1006 1.1 christos {
1007 1.1 christos int radix = 10;
1008 1.1 christos int offset = 0;
1009 1.1 christos
1010 1.1 christos if (number[0] == '0')
1011 1.1 christos {
1012 1.1 christos if (number[1] == 'x')
1013 1.1 christos radix = 16;
1014 1.1 christos else if (number[1] == 'o')
1015 1.1 christos radix = 8;
1016 1.1 christos else if (number[1] == 'b')
1017 1.1 christos radix = 2;
1018 1.1 christos if (radix != 10)
1019 1.1 christos {
1020 1.1 christos offset = 2;
1021 1.1.1.3 christos could_be_decimal = false;
1022 1.1 christos }
1023 1.1 christos }
1024 1.1 christos
1025 1.1.1.2 christos if (!current_int_val.val.set (number.c_str () + offset, radix))
1026 1.1.1.2 christos {
1027 1.1.1.2 christos /* Shouldn't be possible. */
1028 1.1.1.2 christos error (_("Invalid integer"));
1029 1.1.1.2 christos }
1030 1.1.1.2 christos if (implicit_i32)
1031 1.1.1.2 christos {
1032 1.1.1.2 christos static gdb_mpz sixty_three_bit = gdb_mpz::pow (2, 63);
1033 1.1.1.2 christos static gdb_mpz thirty_one_bit = gdb_mpz::pow (2, 31);
1034 1.1.1.2 christos
1035 1.1.1.2 christos if (current_int_val.val >= sixty_three_bit)
1036 1.1.1.2 christos type = get_type ("i128");
1037 1.1.1.2 christos else if (current_int_val.val >= thirty_one_bit)
1038 1.1.1.2 christos type = get_type ("i64");
1039 1.1.1.2 christos }
1040 1.1 christos
1041 1.1 christos current_int_val.type = type;
1042 1.1 christos }
1043 1.1 christos else
1044 1.1 christos {
1045 1.1 christos current_float_val.type = type;
1046 1.1 christos bool parsed = parse_float (number.c_str (), number.length (),
1047 1.1 christos current_float_val.type,
1048 1.1 christos current_float_val.val.data ());
1049 1.1 christos gdb_assert (parsed);
1050 1.1 christos }
1051 1.1 christos
1052 1.1 christos return is_integer ? (could_be_decimal ? DECIMAL_INTEGER : INTEGER) : FLOAT;
1053 1.1 christos }
1054 1.1 christos
1055 1.1 christos /* The lexer. */
1056 1.1 christos
1057 1.1 christos int
1058 1.1 christos rust_parser::lex_one_token ()
1059 1.1 christos {
1060 1.1 christos /* Skip all leading whitespace. */
1061 1.1 christos while (pstate->lexptr[0] == ' '
1062 1.1 christos || pstate->lexptr[0] == '\t'
1063 1.1 christos || pstate->lexptr[0] == '\r'
1064 1.1 christos || pstate->lexptr[0] == '\n')
1065 1.1 christos ++pstate->lexptr;
1066 1.1 christos
1067 1.1 christos /* If we hit EOF and we're completing, then return COMPLETE -- maybe
1068 1.1 christos we're completing an empty string at the end of a field_expr.
1069 1.1 christos But, we don't want to return two COMPLETE tokens in a row. */
1070 1.1 christos if (pstate->lexptr[0] == '\0' && pstate->lexptr == pstate->prev_lexptr)
1071 1.1 christos return 0;
1072 1.1 christos pstate->prev_lexptr = pstate->lexptr;
1073 1.1 christos if (pstate->lexptr[0] == '\0')
1074 1.1 christos {
1075 1.1 christos if (pstate->parse_completion)
1076 1.1 christos {
1077 1.1 christos current_string_val.length =0;
1078 1.1 christos current_string_val.ptr = "";
1079 1.1 christos return COMPLETE;
1080 1.1 christos }
1081 1.1 christos return 0;
1082 1.1 christos }
1083 1.1 christos
1084 1.1 christos if (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
1085 1.1 christos return lex_number ();
1086 1.1 christos else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '\'')
1087 1.1 christos return lex_character ();
1088 1.1 christos else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '"')
1089 1.1 christos return lex_string ();
1090 1.1 christos else if (pstate->lexptr[0] == 'b' && starts_raw_string (pstate->lexptr + 1))
1091 1.1 christos return lex_string ();
1092 1.1 christos else if (starts_raw_string (pstate->lexptr))
1093 1.1 christos return lex_string ();
1094 1.1 christos else if (rust_identifier_start_p (pstate->lexptr[0]))
1095 1.1 christos return lex_identifier ();
1096 1.1 christos else if (pstate->lexptr[0] == '"')
1097 1.1 christos return lex_string ();
1098 1.1 christos else if (pstate->lexptr[0] == '\'')
1099 1.1 christos return lex_character ();
1100 1.1 christos else if (pstate->lexptr[0] == '}' || pstate->lexptr[0] == ']')
1101 1.1 christos {
1102 1.1 christos /* Falls through to lex_operator. */
1103 1.1 christos --paren_depth;
1104 1.1 christos }
1105 1.1 christos else if (pstate->lexptr[0] == '(' || pstate->lexptr[0] == '{')
1106 1.1 christos {
1107 1.1 christos /* Falls through to lex_operator. */
1108 1.1 christos ++paren_depth;
1109 1.1 christos }
1110 1.1 christos else if (pstate->lexptr[0] == ',' && pstate->comma_terminates
1111 1.1 christos && paren_depth == 0)
1112 1.1 christos return 0;
1113 1.1 christos
1114 1.1 christos return lex_operator ();
1115 1.1 christos }
1116 1.1 christos
1117 1.1 christos /* Push back a single character to be re-lexed. */
1118 1.1 christos
1119 1.1 christos void
1120 1.1 christos rust_parser::push_back (char c)
1121 1.1 christos {
1122 1.1 christos /* Can't be called before any lexing. */
1123 1.1 christos gdb_assert (pstate->prev_lexptr != NULL);
1124 1.1 christos
1125 1.1 christos --pstate->lexptr;
1126 1.1 christos gdb_assert (*pstate->lexptr == c);
1127 1.1 christos }
1128 1.1 christos
1129 1.1 christos
1130 1.1 christos
1132 1.1 christos /* Parse a tuple or paren expression. */
1133 1.1 christos
1134 1.1 christos operation_up
1135 1.1 christos rust_parser::parse_tuple ()
1136 1.1 christos {
1137 1.1 christos assume ('(');
1138 1.1 christos
1139 1.1 christos if (current_token == ')')
1140 1.1 christos {
1141 1.1 christos lex ();
1142 1.1 christos struct type *unit = get_type ("()");
1143 1.1 christos return make_operation<long_const_operation> (unit, 0);
1144 1.1 christos }
1145 1.1 christos
1146 1.1 christos operation_up expr = parse_expr ();
1147 1.1 christos if (current_token == ')')
1148 1.1 christos {
1149 1.1 christos /* Parenthesized expression. */
1150 1.1 christos lex ();
1151 1.1 christos return make_operation<rust_parenthesized_operation> (std::move (expr));
1152 1.1 christos }
1153 1.1 christos
1154 1.1 christos std::vector<operation_up> ops;
1155 1.1 christos ops.push_back (std::move (expr));
1156 1.1 christos while (current_token != ')')
1157 1.1 christos {
1158 1.1 christos if (current_token != ',')
1159 1.1 christos error (_("',' or ')' expected"));
1160 1.1 christos lex ();
1161 1.1 christos
1162 1.1 christos /* A trailing "," is ok. */
1163 1.1 christos if (current_token != ')')
1164 1.1 christos ops.push_back (parse_expr ());
1165 1.1 christos }
1166 1.1 christos
1167 1.1 christos assume (')');
1168 1.1 christos
1169 1.1 christos error (_("Tuple expressions not supported yet"));
1170 1.1 christos }
1171 1.1 christos
1172 1.1 christos /* Parse an array expression. */
1173 1.1 christos
1174 1.1 christos operation_up
1175 1.1 christos rust_parser::parse_array ()
1176 1.1 christos {
1177 1.1 christos assume ('[');
1178 1.1 christos
1179 1.1 christos if (current_token == KW_MUT)
1180 1.1 christos lex ();
1181 1.1 christos
1182 1.1 christos operation_up result;
1183 1.1 christos operation_up expr = parse_expr ();
1184 1.1 christos if (current_token == ';')
1185 1.1 christos {
1186 1.1 christos lex ();
1187 1.1 christos operation_up rhs = parse_expr ();
1188 1.1 christos result = make_operation<rust_array_operation> (std::move (expr),
1189 1.1 christos std::move (rhs));
1190 1.1.1.2 christos }
1191 1.1 christos else if (current_token == ',' || current_token == ']')
1192 1.1 christos {
1193 1.1 christos std::vector<operation_up> ops;
1194 1.1 christos ops.push_back (std::move (expr));
1195 1.1 christos while (current_token != ']')
1196 1.1 christos {
1197 1.1 christos if (current_token != ',')
1198 1.1 christos error (_("',' or ']' expected"));
1199 1.1 christos lex ();
1200 1.1 christos ops.push_back (parse_expr ());
1201 1.1 christos }
1202 1.1 christos ops.shrink_to_fit ();
1203 1.1 christos int len = ops.size () - 1;
1204 1.1 christos result = make_operation<array_operation> (0, len, std::move (ops));
1205 1.1.1.2 christos }
1206 1.1 christos else
1207 1.1 christos error (_("',', ';', or ']' expected"));
1208 1.1 christos
1209 1.1 christos require (']');
1210 1.1 christos
1211 1.1 christos return result;
1212 1.1 christos }
1213 1.1 christos
1214 1.1 christos /* Turn a name into an operation. */
1215 1.1 christos
1216 1.1 christos operation_up
1217 1.1 christos rust_parser::name_to_operation (const std::string &name)
1218 1.1 christos {
1219 1.1 christos struct block_symbol sym = lookup_symbol (name.c_str (),
1220 1.1.1.2 christos pstate->expression_context_block,
1221 1.1 christos SEARCH_VFT);
1222 1.1 christos if (sym.symbol != nullptr && sym.symbol->aclass () != LOC_TYPEDEF)
1223 1.1 christos return make_operation<var_value_operation> (sym);
1224 1.1 christos
1225 1.1 christos struct type *type = nullptr;
1226 1.1 christos
1227 1.1 christos if (sym.symbol != nullptr)
1228 1.1 christos {
1229 1.1 christos gdb_assert (sym.symbol->aclass () == LOC_TYPEDEF);
1230 1.1 christos type = sym.symbol->type ();
1231 1.1 christos }
1232 1.1 christos if (type == nullptr)
1233 1.1 christos type = rust_lookup_type (name.c_str ());
1234 1.1 christos if (type == nullptr)
1235 1.1 christos error (_("No symbol '%s' in current context"), name.c_str ());
1236 1.1 christos
1237 1.1 christos if (type->code () == TYPE_CODE_STRUCT && type->num_fields () == 0)
1238 1.1 christos {
1239 1.1 christos /* A unit-like struct. */
1240 1.1 christos operation_up result (new rust_aggregate_operation (type, {}, {}));
1241 1.1 christos return result;
1242 1.1 christos }
1243 1.1 christos else
1244 1.1 christos return make_operation<type_operation> (type);
1245 1.1 christos }
1246 1.1 christos
1247 1.1 christos /* Parse a struct expression. */
1248 1.1 christos
1249 1.1 christos operation_up
1250 1.1 christos rust_parser::parse_struct_expr (struct type *type)
1251 1.1 christos {
1252 1.1 christos assume ('{');
1253 1.1 christos
1254 1.1 christos if (type->code () != TYPE_CODE_STRUCT
1255 1.1 christos || rust_tuple_type_p (type)
1256 1.1 christos || rust_tuple_struct_type_p (type))
1257 1.1 christos error (_("Struct expression applied to non-struct type"));
1258 1.1 christos
1259 1.1 christos std::vector<std::pair<std::string, operation_up>> field_v;
1260 1.1 christos while (current_token != '}' && current_token != DOTDOT)
1261 1.1 christos {
1262 1.1 christos if (current_token != IDENT)
1263 1.1 christos error (_("'}', '..', or identifier expected"));
1264 1.1 christos
1265 1.1 christos std::string name = get_string ();
1266 1.1 christos lex ();
1267 1.1 christos
1268 1.1 christos operation_up expr;
1269 1.1 christos if (current_token == ',' || current_token == '}'
1270 1.1 christos || current_token == DOTDOT)
1271 1.1 christos expr = name_to_operation (name);
1272 1.1 christos else
1273 1.1 christos {
1274 1.1 christos require (':');
1275 1.1 christos expr = parse_expr ();
1276 1.1 christos }
1277 1.1 christos field_v.emplace_back (std::move (name), std::move (expr));
1278 1.1 christos
1279 1.1 christos /* A trailing "," is ok. */
1280 1.1 christos if (current_token == ',')
1281 1.1 christos lex ();
1282 1.1 christos }
1283 1.1 christos
1284 1.1 christos operation_up others;
1285 1.1 christos if (current_token == DOTDOT)
1286 1.1 christos {
1287 1.1 christos lex ();
1288 1.1 christos others = parse_expr ();
1289 1.1 christos }
1290 1.1 christos
1291 1.1 christos require ('}');
1292 1.1 christos
1293 1.1 christos return make_operation<rust_aggregate_operation> (type,
1294 1.1 christos std::move (others),
1295 1.1 christos std::move (field_v));
1296 1.1 christos }
1297 1.1 christos
1298 1.1 christos /* Used by the operator precedence parser. */
1299 1.1 christos struct rustop_item
1300 1.1 christos {
1301 1.1 christos rustop_item (int token_, int precedence_, enum exp_opcode opcode_,
1302 1.1 christos operation_up &&op_)
1303 1.1 christos : token (token_),
1304 1.1 christos precedence (precedence_),
1305 1.1 christos opcode (opcode_),
1306 1.1 christos op (std::move (op_))
1307 1.1 christos {
1308 1.1 christos }
1309 1.1 christos
1310 1.1 christos /* The token value. */
1311 1.1 christos int token;
1312 1.1 christos /* Precedence of this operator. */
1313 1.1 christos int precedence;
1314 1.1 christos /* This is used only for assign-modify. */
1315 1.1 christos enum exp_opcode opcode;
1316 1.1 christos /* The right hand side of this operation. */
1317 1.1 christos operation_up op;
1318 1.1 christos };
1319 1.1 christos
1320 1.1 christos /* An operator precedence parser for binary operations, including
1321 1.1 christos "as". */
1322 1.1 christos
1323 1.1 christos operation_up
1324 1.1 christos rust_parser::parse_binop (bool required)
1325 1.1 christos {
1326 1.1 christos /* All the binary operators. Each one is of the form
1327 1.1 christos OPERATION(TOKEN, PRECEDENCE, TYPE)
1328 1.1 christos TOKEN is the corresponding operator token.
1329 1.1 christos PRECEDENCE is a value indicating relative precedence.
1330 1.1 christos TYPE is the operation type corresponding to the operator.
1331 1.1 christos Assignment operations are handled specially, not via this
1332 1.1 christos table; they have precedence 0. */
1333 1.1 christos #define ALL_OPS \
1334 1.1 christos OPERATION ('*', 10, mul_operation) \
1335 1.1 christos OPERATION ('/', 10, div_operation) \
1336 1.1 christos OPERATION ('%', 10, rem_operation) \
1337 1.1 christos OPERATION ('@', 9, repeat_operation) \
1338 1.1 christos OPERATION ('+', 8, add_operation) \
1339 1.1 christos OPERATION ('-', 8, sub_operation) \
1340 1.1 christos OPERATION (LSH, 7, lsh_operation) \
1341 1.1 christos OPERATION (RSH, 7, rsh_operation) \
1342 1.1 christos OPERATION ('&', 6, bitwise_and_operation) \
1343 1.1 christos OPERATION ('^', 5, bitwise_xor_operation) \
1344 1.1 christos OPERATION ('|', 4, bitwise_ior_operation) \
1345 1.1 christos OPERATION (EQEQ, 3, equal_operation) \
1346 1.1 christos OPERATION (NOTEQ, 3, notequal_operation) \
1347 1.1 christos OPERATION ('<', 3, less_operation) \
1348 1.1 christos OPERATION (LTEQ, 3, leq_operation) \
1349 1.1 christos OPERATION ('>', 3, gtr_operation) \
1350 1.1 christos OPERATION (GTEQ, 3, geq_operation) \
1351 1.1 christos OPERATION (ANDAND, 2, logical_and_operation) \
1352 1.1 christos OPERATION (OROR, 1, logical_or_operation)
1353 1.1 christos
1354 1.1 christos #define ASSIGN_PREC 0
1355 1.1 christos
1356 1.1 christos operation_up start = parse_atom (required);
1357 1.1 christos if (start == nullptr)
1358 1.1 christos {
1359 1.1 christos gdb_assert (!required);
1360 1.1 christos return start;
1361 1.1 christos }
1362 1.1 christos
1363 1.1 christos std::vector<rustop_item> operator_stack;
1364 1.1 christos operator_stack.emplace_back (0, -1, OP_NULL, std::move (start));
1365 1.1 christos
1366 1.1 christos while (true)
1367 1.1 christos {
1368 1.1 christos int this_token = current_token;
1369 1.1 christos enum exp_opcode compound_assign_op = OP_NULL;
1370 1.1 christos int precedence = -2;
1371 1.1 christos
1372 1.1 christos switch (this_token)
1373 1.1 christos {
1374 1.1 christos #define OPERATION(TOKEN, PRECEDENCE, TYPE) \
1375 1.1 christos case TOKEN: \
1376 1.1 christos precedence = PRECEDENCE; \
1377 1.1 christos lex (); \
1378 1.1 christos break;
1379 1.1 christos
1380 1.1 christos ALL_OPS
1381 1.1 christos
1382 1.1 christos #undef OPERATION
1383 1.1 christos
1384 1.1 christos case COMPOUND_ASSIGN:
1385 1.1.1.2 christos compound_assign_op = current_opcode;
1386 1.1 christos [[fallthrough]];
1387 1.1 christos case '=':
1388 1.1 christos precedence = ASSIGN_PREC;
1389 1.1 christos lex ();
1390 1.1 christos break;
1391 1.1 christos
1392 1.1 christos /* "as" must be handled specially. */
1393 1.1 christos case KW_AS:
1394 1.1 christos {
1395 1.1 christos lex ();
1396 1.1 christos rustop_item &lhs = operator_stack.back ();
1397 1.1 christos struct type *type = parse_type ();
1398 1.1 christos lhs.op = make_operation<unop_cast_operation> (std::move (lhs.op),
1399 1.1 christos type);
1400 1.1 christos }
1401 1.1 christos /* Bypass the rest of the loop. */
1402 1.1 christos continue;
1403 1.1 christos
1404 1.1 christos default:
1405 1.1 christos /* Arrange to pop the entire stack. */
1406 1.1 christos precedence = -2;
1407 1.1.1.2 christos break;
1408 1.1 christos }
1409 1.1 christos
1410 1.1 christos /* Make sure that assignments are right-associative while other
1411 1.1 christos operations are left-associative. */
1412 1.1 christos while ((precedence == ASSIGN_PREC
1413 1.1 christos ? precedence < operator_stack.back ().precedence
1414 1.1 christos : precedence <= operator_stack.back ().precedence)
1415 1.1 christos && operator_stack.size () > 1)
1416 1.1 christos {
1417 1.1 christos rustop_item rhs = std::move (operator_stack.back ());
1418 1.1 christos operator_stack.pop_back ();
1419 1.1 christos
1420 1.1 christos rustop_item &lhs = operator_stack.back ();
1421 1.1 christos
1422 1.1 christos switch (rhs.token)
1423 1.1 christos {
1424 1.1 christos #define OPERATION(TOKEN, PRECEDENCE, TYPE) \
1425 1.1 christos case TOKEN: \
1426 1.1 christos lhs.op = make_operation<TYPE> (std::move (lhs.op), \
1427 1.1 christos std::move (rhs.op)); \
1428 1.1 christos break;
1429 1.1 christos
1430 1.1 christos ALL_OPS
1431 1.1 christos
1432 1.1 christos #undef OPERATION
1433 1.1 christos
1434 1.1 christos case '=':
1435 1.1 christos case COMPOUND_ASSIGN:
1436 1.1 christos {
1437 1.1 christos if (rhs.token == '=')
1438 1.1 christos lhs.op = (make_operation<assign_operation>
1439 1.1 christos (std::move (lhs.op), std::move (rhs.op)));
1440 1.1 christos else
1441 1.1 christos lhs.op = (make_operation<assign_modify_operation>
1442 1.1 christos (rhs.opcode, std::move (lhs.op),
1443 1.1 christos std::move (rhs.op)));
1444 1.1 christos
1445 1.1 christos struct type *unit_type = get_type ("()");
1446 1.1 christos
1447 1.1 christos operation_up nil (new long_const_operation (unit_type, 0));
1448 1.1 christos lhs.op = (make_operation<comma_operation>
1449 1.1 christos (std::move (lhs.op), std::move (nil)));
1450 1.1 christos }
1451 1.1 christos break;
1452 1.1 christos
1453 1.1 christos default:
1454 1.1 christos gdb_assert_not_reached ("bad binary operator");
1455 1.1 christos }
1456 1.1 christos }
1457 1.1 christos
1458 1.1 christos if (precedence == -2)
1459 1.1 christos break;
1460 1.1 christos
1461 1.1 christos operator_stack.emplace_back (this_token, precedence, compound_assign_op,
1462 1.1 christos parse_atom (true));
1463 1.1 christos }
1464 1.1 christos
1465 1.1 christos gdb_assert (operator_stack.size () == 1);
1466 1.1 christos return std::move (operator_stack[0].op);
1467 1.1 christos #undef ALL_OPS
1468 1.1 christos }
1469 1.1 christos
1470 1.1 christos /* Parse a range expression. */
1471 1.1 christos
1472 1.1 christos operation_up
1473 1.1 christos rust_parser::parse_range ()
1474 1.1 christos {
1475 1.1 christos enum range_flag kind = (RANGE_HIGH_BOUND_DEFAULT
1476 1.1 christos | RANGE_LOW_BOUND_DEFAULT);
1477 1.1 christos
1478 1.1 christos operation_up lhs;
1479 1.1 christos if (current_token != DOTDOT && current_token != DOTDOTEQ)
1480 1.1 christos {
1481 1.1 christos lhs = parse_binop (true);
1482 1.1 christos kind &= ~RANGE_LOW_BOUND_DEFAULT;
1483 1.1 christos }
1484 1.1 christos
1485 1.1 christos if (current_token == DOTDOT)
1486 1.1 christos kind |= RANGE_HIGH_BOUND_EXCLUSIVE;
1487 1.1 christos else if (current_token != DOTDOTEQ)
1488 1.1 christos return lhs;
1489 1.1 christos lex ();
1490 1.1 christos
1491 1.1 christos /* A "..=" range requires a high bound, but otherwise it is
1492 1.1 christos optional. */
1493 1.1 christos operation_up rhs = parse_binop ((kind & RANGE_HIGH_BOUND_EXCLUSIVE) == 0);
1494 1.1 christos if (rhs != nullptr)
1495 1.1 christos kind &= ~RANGE_HIGH_BOUND_DEFAULT;
1496 1.1 christos
1497 1.1 christos return make_operation<rust_range_operation> (kind,
1498 1.1 christos std::move (lhs),
1499 1.1 christos std::move (rhs));
1500 1.1 christos }
1501 1.1 christos
1502 1.1 christos /* Parse an expression. */
1503 1.1 christos
1504 1.1 christos operation_up
1505 1.1 christos rust_parser::parse_expr ()
1506 1.1 christos {
1507 1.1 christos return parse_range ();
1508 1.1 christos }
1509 1.1 christos
1510 1.1 christos /* Parse a sizeof expression. */
1511 1.1 christos
1512 1.1 christos operation_up
1513 1.1 christos rust_parser::parse_sizeof ()
1514 1.1 christos {
1515 1.1 christos assume (KW_SIZEOF);
1516 1.1 christos
1517 1.1 christos require ('(');
1518 1.1 christos operation_up result = make_operation<unop_sizeof_operation> (parse_expr ());
1519 1.1 christos require (')');
1520 1.1 christos return result;
1521 1.1 christos }
1522 1.1 christos
1523 1.1 christos /* Parse an address-of operation. */
1524 1.1 christos
1525 1.1 christos operation_up
1526 1.1 christos rust_parser::parse_addr ()
1527 1.1 christos {
1528 1.1 christos assume ('&');
1529 1.1 christos
1530 1.1 christos if (current_token == KW_MUT)
1531 1.1 christos lex ();
1532 1.1 christos
1533 1.1 christos return make_operation<rust_unop_addr_operation> (parse_atom (true));
1534 1.1 christos }
1535 1.1 christos
1536 1.1 christos /* Parse a field expression. */
1537 1.1 christos
1538 1.1 christos operation_up
1539 1.1 christos rust_parser::parse_field (operation_up &&lhs)
1540 1.1 christos {
1541 1.1 christos assume ('.');
1542 1.1 christos
1543 1.1 christos operation_up result;
1544 1.1 christos switch (current_token)
1545 1.1 christos {
1546 1.1 christos case IDENT:
1547 1.1 christos case COMPLETE:
1548 1.1 christos {
1549 1.1 christos bool is_complete = current_token == COMPLETE;
1550 1.1 christos auto struct_op = new rust_structop (std::move (lhs), get_string ());
1551 1.1 christos lex ();
1552 1.1 christos if (is_complete)
1553 1.1 christos {
1554 1.1 christos completion_op.reset (struct_op);
1555 1.1 christos pstate->mark_struct_expression (struct_op);
1556 1.1 christos /* Throw to the outermost level of the parser. */
1557 1.1 christos error (_("not really an error"));
1558 1.1 christos }
1559 1.1 christos result.reset (struct_op);
1560 1.1 christos }
1561 1.1 christos break;
1562 1.1 christos
1563 1.1.1.2 christos case DECIMAL_INTEGER:
1564 1.1.1.2 christos {
1565 1.1.1.2 christos int idx = current_int_val.val.as_integer<int> ();
1566 1.1.1.2 christos result = make_operation<rust_struct_anon> (idx, std::move (lhs));
1567 1.1.1.2 christos lex ();
1568 1.1 christos }
1569 1.1 christos break;
1570 1.1 christos
1571 1.1 christos case INTEGER:
1572 1.1 christos error (_("'_' not allowed in integers in anonymous field references"));
1573 1.1 christos
1574 1.1 christos default:
1575 1.1 christos error (_("field name expected"));
1576 1.1 christos }
1577 1.1 christos
1578 1.1 christos return result;
1579 1.1 christos }
1580 1.1 christos
1581 1.1 christos /* Parse an index expression. */
1582 1.1 christos
1583 1.1 christos operation_up
1584 1.1 christos rust_parser::parse_index (operation_up &&lhs)
1585 1.1 christos {
1586 1.1 christos assume ('[');
1587 1.1 christos operation_up rhs = parse_expr ();
1588 1.1 christos require (']');
1589 1.1 christos
1590 1.1 christos return make_operation<rust_subscript_operation> (std::move (lhs),
1591 1.1 christos std::move (rhs));
1592 1.1 christos }
1593 1.1 christos
1594 1.1 christos /* Parse a sequence of comma-separated expressions in parens. */
1595 1.1 christos
1596 1.1 christos std::vector<operation_up>
1597 1.1 christos rust_parser::parse_paren_args ()
1598 1.1 christos {
1599 1.1 christos assume ('(');
1600 1.1 christos
1601 1.1 christos std::vector<operation_up> args;
1602 1.1 christos while (current_token != ')')
1603 1.1 christos {
1604 1.1 christos if (!args.empty ())
1605 1.1 christos {
1606 1.1 christos if (current_token != ',')
1607 1.1 christos error (_("',' or ')' expected"));
1608 1.1 christos lex ();
1609 1.1 christos }
1610 1.1 christos
1611 1.1 christos args.push_back (parse_expr ());
1612 1.1 christos }
1613 1.1 christos
1614 1.1 christos assume (')');
1615 1.1 christos
1616 1.1 christos return args;
1617 1.1 christos }
1618 1.1 christos
1619 1.1 christos /* Parse the parenthesized part of a function call. */
1620 1.1 christos
1621 1.1 christos operation_up
1622 1.1 christos rust_parser::parse_call (operation_up &&lhs)
1623 1.1 christos {
1624 1.1 christos std::vector<operation_up> args = parse_paren_args ();
1625 1.1 christos
1626 1.1 christos return make_operation<funcall_operation> (std::move (lhs),
1627 1.1 christos std::move (args));
1628 1.1 christos }
1629 1.1 christos
1630 1.1 christos /* Parse a list of types. */
1631 1.1 christos
1632 1.1 christos std::vector<struct type *>
1633 1.1 christos rust_parser::parse_type_list ()
1634 1.1 christos {
1635 1.1 christos std::vector<struct type *> result;
1636 1.1 christos result.push_back (parse_type ());
1637 1.1 christos while (current_token == ',')
1638 1.1 christos {
1639 1.1 christos lex ();
1640 1.1 christos result.push_back (parse_type ());
1641 1.1 christos }
1642 1.1 christos return result;
1643 1.1 christos }
1644 1.1 christos
1645 1.1 christos /* Parse a possibly-empty list of types, surrounded in parens. */
1646 1.1 christos
1647 1.1 christos std::vector<struct type *>
1648 1.1 christos rust_parser::parse_maybe_type_list ()
1649 1.1 christos {
1650 1.1 christos assume ('(');
1651 1.1 christos std::vector<struct type *> types;
1652 1.1 christos if (current_token != ')')
1653 1.1 christos types = parse_type_list ();
1654 1.1 christos require (')');
1655 1.1 christos return types;
1656 1.1 christos }
1657 1.1 christos
1658 1.1 christos /* Parse an array type. */
1659 1.1 christos
1660 1.1 christos struct type *
1661 1.1 christos rust_parser::parse_array_type ()
1662 1.1 christos {
1663 1.1 christos assume ('[');
1664 1.1 christos struct type *elt_type = parse_type ();
1665 1.1 christos require (';');
1666 1.1 christos
1667 1.1 christos if (current_token != INTEGER && current_token != DECIMAL_INTEGER)
1668 1.1.1.2 christos error (_("integer expected"));
1669 1.1 christos ULONGEST val = current_int_val.val.as_integer<ULONGEST> ();
1670 1.1 christos lex ();
1671 1.1 christos require (']');
1672 1.1 christos
1673 1.1 christos return lookup_array_range_type (elt_type, 0, val - 1);
1674 1.1 christos }
1675 1.1 christos
1676 1.1 christos /* Parse a slice type. */
1677 1.1 christos
1678 1.1 christos struct type *
1679 1.1 christos rust_parser::parse_slice_type ()
1680 1.1 christos {
1681 1.1 christos assume ('&');
1682 1.1.1.2 christos
1683 1.1.1.2 christos /* Handle &str specially. This is an important type in Rust. While
1684 1.1.1.2 christos the compiler does emit the "&str" type in the DWARF, just "str"
1685 1.1.1.2 christos itself isn't always available -- but it's handy if this works
1686 1.1.1.2 christos seamlessly. */
1687 1.1.1.2 christos if (current_token == IDENT && get_string () == "str")
1688 1.1.1.2 christos {
1689 1.1.1.2 christos lex ();
1690 1.1.1.2 christos return rust_slice_type ("&str", get_type ("u8"), get_type ("usize"));
1691 1.1.1.2 christos }
1692 1.1 christos
1693 1.1 christos bool is_slice = current_token == '[';
1694 1.1 christos if (is_slice)
1695 1.1 christos lex ();
1696 1.1 christos
1697 1.1 christos struct type *target = parse_type ();
1698 1.1 christos
1699 1.1 christos if (is_slice)
1700 1.1 christos {
1701 1.1 christos require (']');
1702 1.1 christos return rust_slice_type ("&[*gdb*]", target, get_type ("usize"));
1703 1.1 christos }
1704 1.1 christos
1705 1.1 christos /* For now we treat &x and *x identically. */
1706 1.1 christos return lookup_pointer_type (target);
1707 1.1 christos }
1708 1.1 christos
1709 1.1 christos /* Parse a pointer type. */
1710 1.1 christos
1711 1.1 christos struct type *
1712 1.1 christos rust_parser::parse_pointer_type ()
1713 1.1 christos {
1714 1.1 christos assume ('*');
1715 1.1 christos
1716 1.1 christos if (current_token == KW_MUT || current_token == KW_CONST)
1717 1.1 christos lex ();
1718 1.1 christos
1719 1.1 christos struct type *target = parse_type ();
1720 1.1 christos /* For the time being we ignore mut/const. */
1721 1.1 christos return lookup_pointer_type (target);
1722 1.1 christos }
1723 1.1 christos
1724 1.1 christos /* Parse a function type. */
1725 1.1 christos
1726 1.1 christos struct type *
1727 1.1 christos rust_parser::parse_function_type ()
1728 1.1 christos {
1729 1.1 christos assume (KW_FN);
1730 1.1 christos
1731 1.1 christos if (current_token != '(')
1732 1.1 christos error (_("'(' expected"));
1733 1.1 christos
1734 1.1 christos std::vector<struct type *> types = parse_maybe_type_list ();
1735 1.1 christos
1736 1.1 christos if (current_token != ARROW)
1737 1.1 christos error (_("'->' expected"));
1738 1.1 christos lex ();
1739 1.1 christos
1740 1.1 christos struct type *result_type = parse_type ();
1741 1.1 christos
1742 1.1 christos struct type **argtypes = nullptr;
1743 1.1 christos if (!types.empty ())
1744 1.1 christos argtypes = types.data ();
1745 1.1 christos
1746 1.1 christos result_type = lookup_function_type_with_arguments (result_type,
1747 1.1 christos types.size (),
1748 1.1 christos argtypes);
1749 1.1 christos return lookup_pointer_type (result_type);
1750 1.1 christos }
1751 1.1 christos
1752 1.1 christos /* Parse a tuple type. */
1753 1.1 christos
1754 1.1 christos struct type *
1755 1.1 christos rust_parser::parse_tuple_type ()
1756 1.1 christos {
1757 1.1 christos std::vector<struct type *> types = parse_maybe_type_list ();
1758 1.1 christos
1759 1.1 christos auto_obstack obstack;
1760 1.1 christos obstack_1grow (&obstack, '(');
1761 1.1 christos for (int i = 0; i < types.size (); ++i)
1762 1.1 christos {
1763 1.1 christos std::string type_name = type_to_string (types[i]);
1764 1.1 christos
1765 1.1 christos if (i > 0)
1766 1.1 christos obstack_1grow (&obstack, ',');
1767 1.1 christos obstack_grow_str (&obstack, type_name.c_str ());
1768 1.1 christos }
1769 1.1 christos
1770 1.1 christos obstack_grow_str0 (&obstack, ")");
1771 1.1 christos const char *name = (const char *) obstack_finish (&obstack);
1772 1.1 christos
1773 1.1 christos /* We don't allow creating new tuple types (yet), but we do allow
1774 1.1 christos looking up existing tuple types. */
1775 1.1 christos struct type *result = rust_lookup_type (name);
1776 1.1 christos if (result == nullptr)
1777 1.1 christos error (_("could not find tuple type '%s'"), name);
1778 1.1 christos
1779 1.1 christos return result;
1780 1.1 christos }
1781 1.1 christos
1782 1.1 christos /* Parse a type. */
1783 1.1 christos
1784 1.1 christos struct type *
1785 1.1 christos rust_parser::parse_type ()
1786 1.1 christos {
1787 1.1 christos switch (current_token)
1788 1.1 christos {
1789 1.1 christos case '[':
1790 1.1 christos return parse_array_type ();
1791 1.1 christos case '&':
1792 1.1 christos return parse_slice_type ();
1793 1.1 christos case '*':
1794 1.1 christos return parse_pointer_type ();
1795 1.1 christos case KW_FN:
1796 1.1 christos return parse_function_type ();
1797 1.1 christos case '(':
1798 1.1 christos return parse_tuple_type ();
1799 1.1 christos case KW_SELF:
1800 1.1 christos case KW_SUPER:
1801 1.1 christos case COLONCOLON:
1802 1.1 christos case KW_EXTERN:
1803 1.1 christos case IDENT:
1804 1.1 christos {
1805 1.1 christos std::string path = parse_path (false);
1806 1.1 christos struct type *result = rust_lookup_type (path.c_str ());
1807 1.1 christos if (result == nullptr)
1808 1.1 christos error (_("No type name '%s' in current context"), path.c_str ());
1809 1.1 christos return result;
1810 1.1 christos }
1811 1.1 christos default:
1812 1.1 christos error (_("type expected"));
1813 1.1 christos }
1814 1.1 christos }
1815 1.1 christos
1816 1.1 christos /* Parse a path. */
1817 1.1 christos
1818 1.1 christos std::string
1819 1.1 christos rust_parser::parse_path (bool for_expr)
1820 1.1 christos {
1821 1.1 christos unsigned n_supers = 0;
1822 1.1 christos int first_token = current_token;
1823 1.1 christos
1824 1.1 christos switch (current_token)
1825 1.1 christos {
1826 1.1 christos case KW_SELF:
1827 1.1 christos lex ();
1828 1.1 christos if (current_token != COLONCOLON)
1829 1.1 christos return "self";
1830 1.1.1.2 christos lex ();
1831 1.1 christos [[fallthrough]];
1832 1.1 christos case KW_SUPER:
1833 1.1 christos while (current_token == KW_SUPER)
1834 1.1 christos {
1835 1.1 christos ++n_supers;
1836 1.1 christos lex ();
1837 1.1 christos if (current_token != COLONCOLON)
1838 1.1 christos error (_("'::' expected"));
1839 1.1 christos lex ();
1840 1.1 christos }
1841 1.1 christos break;
1842 1.1 christos
1843 1.1 christos case COLONCOLON:
1844 1.1 christos lex ();
1845 1.1 christos break;
1846 1.1 christos
1847 1.1 christos case KW_EXTERN:
1848 1.1 christos /* This is a gdb extension to make it possible to refer to items
1849 1.1 christos in other crates. It just bypasses adding the current crate
1850 1.1 christos to the front of the name. */
1851 1.1 christos lex ();
1852 1.1 christos break;
1853 1.1 christos }
1854 1.1 christos
1855 1.1 christos if (current_token != IDENT)
1856 1.1 christos error (_("identifier expected"));
1857 1.1 christos std::string path = get_string ();
1858 1.1 christos bool saw_ident = true;
1859 1.1 christos lex ();
1860 1.1 christos
1861 1.1 christos /* The condition here lets us enter the loop even if we see
1862 1.1 christos "ident<...>". */
1863 1.1 christos while (current_token == COLONCOLON || current_token == '<')
1864 1.1 christos {
1865 1.1 christos if (current_token == COLONCOLON)
1866 1.1 christos {
1867 1.1 christos lex ();
1868 1.1 christos saw_ident = false;
1869 1.1 christos
1870 1.1 christos if (current_token == IDENT)
1871 1.1 christos {
1872 1.1 christos path = path + "::" + get_string ();
1873 1.1 christos lex ();
1874 1.1 christos saw_ident = true;
1875 1.1 christos }
1876 1.1 christos else if (current_token == COLONCOLON)
1877 1.1 christos {
1878 1.1 christos /* The code below won't detect this scenario. */
1879 1.1 christos error (_("unexpected '::'"));
1880 1.1 christos }
1881 1.1 christos }
1882 1.1 christos
1883 1.1 christos if (current_token != '<')
1884 1.1 christos continue;
1885 1.1 christos
1886 1.1 christos /* Expression use name::<...>, whereas types use name<...>. */
1887 1.1 christos if (for_expr)
1888 1.1 christos {
1889 1.1 christos /* Expressions use "name::<...>", so if we saw an identifier
1890 1.1 christos after the "::", we ignore the "<" here. */
1891 1.1 christos if (saw_ident)
1892 1.1 christos break;
1893 1.1 christos }
1894 1.1 christos else
1895 1.1 christos {
1896 1.1 christos /* Types use "name<...>", so we need to have seen the
1897 1.1 christos identifier. */
1898 1.1 christos if (!saw_ident)
1899 1.1 christos break;
1900 1.1 christos }
1901 1.1 christos
1902 1.1 christos lex ();
1903 1.1 christos std::vector<struct type *> types = parse_type_list ();
1904 1.1 christos if (current_token == '>')
1905 1.1 christos lex ();
1906 1.1 christos else if (current_token == RSH)
1907 1.1 christos {
1908 1.1 christos push_back ('>');
1909 1.1 christos lex ();
1910 1.1 christos }
1911 1.1 christos else
1912 1.1 christos error (_("'>' expected"));
1913 1.1 christos
1914 1.1 christos path += "<";
1915 1.1 christos for (int i = 0; i < types.size (); ++i)
1916 1.1 christos {
1917 1.1 christos if (i > 0)
1918 1.1 christos path += ",";
1919 1.1 christos path += type_to_string (types[i]);
1920 1.1 christos }
1921 1.1 christos path += ">";
1922 1.1 christos break;
1923 1.1 christos }
1924 1.1 christos
1925 1.1 christos switch (first_token)
1926 1.1 christos {
1927 1.1 christos case KW_SELF:
1928 1.1 christos case KW_SUPER:
1929 1.1 christos return super_name (path, n_supers);
1930 1.1 christos
1931 1.1 christos case COLONCOLON:
1932 1.1 christos return crate_name (path);
1933 1.1 christos
1934 1.1 christos case KW_EXTERN:
1935 1.1 christos return "::" + path;
1936 1.1 christos
1937 1.1 christos case IDENT:
1938 1.1 christos return path;
1939 1.1 christos
1940 1.1 christos default:
1941 1.1 christos gdb_assert_not_reached ("missing case in path parsing");
1942 1.1 christos }
1943 1.1 christos }
1944 1.1 christos
1945 1.1 christos /* Handle the parsing for a string expression. */
1946 1.1 christos
1947 1.1 christos operation_up
1948 1.1 christos rust_parser::parse_string ()
1949 1.1 christos {
1950 1.1 christos gdb_assert (current_token == STRING);
1951 1.1 christos
1952 1.1 christos /* Wrap the raw string in the &str struct. */
1953 1.1 christos struct type *type = rust_lookup_type ("&str");
1954 1.1 christos if (type == nullptr)
1955 1.1 christos error (_("Could not find type '&str'"));
1956 1.1 christos
1957 1.1 christos std::vector<std::pair<std::string, operation_up>> field_v;
1958 1.1 christos
1959 1.1 christos size_t len = current_string_val.length;
1960 1.1 christos operation_up str = make_operation<string_operation> (get_string ());
1961 1.1 christos operation_up addr
1962 1.1 christos = make_operation<rust_unop_addr_operation> (std::move (str));
1963 1.1 christos field_v.emplace_back ("data_ptr", std::move (addr));
1964 1.1 christos
1965 1.1 christos struct type *valtype = get_type ("usize");
1966 1.1 christos operation_up lenop = make_operation<long_const_operation> (valtype, len);
1967 1.1 christos field_v.emplace_back ("length", std::move (lenop));
1968 1.1 christos
1969 1.1 christos return make_operation<rust_aggregate_operation> (type,
1970 1.1 christos operation_up (),
1971 1.1 christos std::move (field_v));
1972 1.1 christos }
1973 1.1 christos
1974 1.1 christos /* Parse a tuple struct expression. */
1975 1.1 christos
1976 1.1 christos operation_up
1977 1.1 christos rust_parser::parse_tuple_struct (struct type *type)
1978 1.1 christos {
1979 1.1 christos std::vector<operation_up> args = parse_paren_args ();
1980 1.1 christos
1981 1.1 christos std::vector<std::pair<std::string, operation_up>> field_v (args.size ());
1982 1.1 christos for (int i = 0; i < args.size (); ++i)
1983 1.1 christos field_v[i] = { string_printf ("__%d", i), std::move (args[i]) };
1984 1.1 christos
1985 1.1 christos return (make_operation<rust_aggregate_operation>
1986 1.1 christos (type, operation_up (), std::move (field_v)));
1987 1.1 christos }
1988 1.1 christos
1989 1.1 christos /* Parse a path expression. */
1990 1.1 christos
1991 1.1 christos operation_up
1992 1.1 christos rust_parser::parse_path_expr ()
1993 1.1 christos {
1994 1.1 christos std::string path = parse_path (true);
1995 1.1 christos
1996 1.1 christos if (current_token == '{')
1997 1.1 christos {
1998 1.1 christos struct type *type = rust_lookup_type (path.c_str ());
1999 1.1 christos if (type == nullptr)
2000 1.1 christos error (_("Could not find type '%s'"), path.c_str ());
2001 1.1 christos
2002 1.1 christos return parse_struct_expr (type);
2003 1.1 christos }
2004 1.1 christos else if (current_token == '(')
2005 1.1 christos {
2006 1.1 christos struct type *type = rust_lookup_type (path.c_str ());
2007 1.1 christos /* If this is actually a tuple struct expression, handle it
2008 1.1 christos here. If it is a call, it will be handled elsewhere. */
2009 1.1 christos if (type != nullptr)
2010 1.1 christos {
2011 1.1 christos if (!rust_tuple_struct_type_p (type))
2012 1.1 christos error (_("Type %s is not a tuple struct"), path.c_str ());
2013 1.1 christos return parse_tuple_struct (type);
2014 1.1 christos }
2015 1.1 christos }
2016 1.1 christos
2017 1.1 christos return name_to_operation (path);
2018 1.1 christos }
2019 1.1 christos
2020 1.1 christos /* Parse an atom. "Atom" isn't a Rust term, but this refers to a
2021 1.1 christos single unitary item in the grammar; but here including some unary
2022 1.1 christos prefix and postfix expressions. */
2023 1.1 christos
2024 1.1 christos operation_up
2025 1.1 christos rust_parser::parse_atom (bool required)
2026 1.1 christos {
2027 1.1 christos operation_up result;
2028 1.1 christos
2029 1.1 christos switch (current_token)
2030 1.1 christos {
2031 1.1 christos case '(':
2032 1.1 christos result = parse_tuple ();
2033 1.1 christos break;
2034 1.1 christos
2035 1.1 christos case '[':
2036 1.1 christos result = parse_array ();
2037 1.1 christos break;
2038 1.1 christos
2039 1.1 christos case INTEGER:
2040 1.1 christos case DECIMAL_INTEGER:
2041 1.1 christos result = make_operation<long_const_operation> (current_int_val.type,
2042 1.1 christos current_int_val.val);
2043 1.1 christos lex ();
2044 1.1 christos break;
2045 1.1 christos
2046 1.1 christos case FLOAT:
2047 1.1 christos result = make_operation<float_const_operation> (current_float_val.type,
2048 1.1 christos current_float_val.val);
2049 1.1 christos lex ();
2050 1.1 christos break;
2051 1.1 christos
2052 1.1 christos case STRING:
2053 1.1 christos result = parse_string ();
2054 1.1 christos lex ();
2055 1.1 christos break;
2056 1.1 christos
2057 1.1 christos case BYTESTRING:
2058 1.1 christos result = make_operation<string_operation> (get_string ());
2059 1.1 christos lex ();
2060 1.1 christos break;
2061 1.1 christos
2062 1.1 christos case KW_TRUE:
2063 1.1 christos case KW_FALSE:
2064 1.1 christos result = make_operation<bool_operation> (current_token == KW_TRUE);
2065 1.1 christos lex ();
2066 1.1 christos break;
2067 1.1 christos
2068 1.1 christos case GDBVAR:
2069 1.1 christos /* This is kind of a hacky approach. */
2070 1.1 christos {
2071 1.1 christos pstate->push_dollar (current_string_val);
2072 1.1 christos result = pstate->pop ();
2073 1.1 christos lex ();
2074 1.1 christos }
2075 1.1 christos break;
2076 1.1 christos
2077 1.1 christos case KW_SELF:
2078 1.1 christos case KW_SUPER:
2079 1.1 christos case COLONCOLON:
2080 1.1 christos case KW_EXTERN:
2081 1.1 christos case IDENT:
2082 1.1 christos result = parse_path_expr ();
2083 1.1 christos break;
2084 1.1 christos
2085 1.1 christos case '*':
2086 1.1 christos lex ();
2087 1.1 christos result = make_operation<rust_unop_ind_operation> (parse_atom (true));
2088 1.1 christos break;
2089 1.1 christos case '+':
2090 1.1 christos lex ();
2091 1.1 christos result = make_operation<unary_plus_operation> (parse_atom (true));
2092 1.1 christos break;
2093 1.1 christos case '-':
2094 1.1 christos lex ();
2095 1.1 christos result = make_operation<unary_neg_operation> (parse_atom (true));
2096 1.1 christos break;
2097 1.1 christos case '!':
2098 1.1 christos lex ();
2099 1.1 christos result = make_operation<rust_unop_compl_operation> (parse_atom (true));
2100 1.1 christos break;
2101 1.1 christos case KW_SIZEOF:
2102 1.1 christos result = parse_sizeof ();
2103 1.1 christos break;
2104 1.1 christos case '&':
2105 1.1 christos result = parse_addr ();
2106 1.1 christos break;
2107 1.1 christos
2108 1.1 christos default:
2109 1.1 christos if (!required)
2110 1.1 christos return {};
2111 1.1 christos error (_("unexpected token"));
2112 1.1 christos }
2113 1.1 christos
2114 1.1 christos /* Now parse suffixes. */
2115 1.1 christos while (true)
2116 1.1 christos {
2117 1.1 christos switch (current_token)
2118 1.1 christos {
2119 1.1 christos case '.':
2120 1.1 christos result = parse_field (std::move (result));
2121 1.1 christos break;
2122 1.1 christos
2123 1.1 christos case '[':
2124 1.1 christos result = parse_index (std::move (result));
2125 1.1 christos break;
2126 1.1 christos
2127 1.1 christos case '(':
2128 1.1 christos result = parse_call (std::move (result));
2129 1.1 christos break;
2130 1.1 christos
2131 1.1 christos default:
2132 1.1 christos return result;
2133 1.1 christos }
2134 1.1 christos }
2135 1.1 christos }
2136 1.1 christos
2137 1.1 christos
2138 1.1 christos
2140 1.1 christos /* The parser as exposed to gdb. */
2141 1.1 christos
2142 1.1 christos int
2143 1.1 christos rust_language::parser (struct parser_state *state) const
2144 1.1 christos {
2145 1.1 christos rust_parser parser (state);
2146 1.1 christos
2147 1.1 christos operation_up result;
2148 1.1 christos try
2149 1.1 christos {
2150 1.1 christos result = parser.parse_entry_point ();
2151 1.1 christos }
2152 1.1 christos catch (const gdb_exception &exc)
2153 1.1 christos {
2154 1.1 christos if (state->parse_completion)
2155 1.1 christos {
2156 1.1 christos result = std::move (parser.completion_op);
2157 1.1 christos if (result == nullptr)
2158 1.1 christos throw;
2159 1.1 christos }
2160 1.1 christos else
2161 1.1 christos throw;
2162 1.1 christos }
2163 1.1 christos
2164 1.1 christos state->set_operation (std::move (result));
2165 1.1 christos
2166 1.1 christos return 0;
2167 1.1 christos }
2168 1.1 christos
2169 1.1 christos
2170 1.1 christos
2172 1.1 christos #if GDB_SELF_TEST
2173 1.1 christos
2174 1.1 christos /* A test helper that lexes a string, expecting a single token. */
2175 1.1 christos
2176 1.1 christos static void
2177 1.1 christos rust_lex_test_one (rust_parser *parser, const char *input, int expected)
2178 1.1 christos {
2179 1.1 christos int token;
2180 1.1 christos
2181 1.1 christos parser->reset (input);
2182 1.1 christos
2183 1.1 christos token = parser->lex_one_token ();
2184 1.1 christos SELF_CHECK (token == expected);
2185 1.1 christos
2186 1.1 christos if (token)
2187 1.1 christos {
2188 1.1 christos token = parser->lex_one_token ();
2189 1.1 christos SELF_CHECK (token == 0);
2190 1.1 christos }
2191 1.1 christos }
2192 1.1 christos
2193 1.1 christos /* Test that INPUT lexes as the integer VALUE. */
2194 1.1 christos
2195 1.1 christos static void
2196 1.1 christos rust_lex_int_test (rust_parser *parser, const char *input,
2197 1.1 christos ULONGEST value, int kind)
2198 1.1 christos {
2199 1.1 christos rust_lex_test_one (parser, input, kind);
2200 1.1 christos SELF_CHECK (parser->current_int_val.val == value);
2201 1.1 christos }
2202 1.1 christos
2203 1.1 christos /* Test that INPUT throws an exception with text ERR. */
2204 1.1 christos
2205 1.1 christos static void
2206 1.1 christos rust_lex_exception_test (rust_parser *parser, const char *input,
2207 1.1 christos const char *err)
2208 1.1 christos {
2209 1.1 christos try
2210 1.1 christos {
2211 1.1 christos /* The "kind" doesn't matter. */
2212 1.1 christos rust_lex_test_one (parser, input, DECIMAL_INTEGER);
2213 1.1 christos SELF_CHECK (0);
2214 1.1 christos }
2215 1.1 christos catch (const gdb_exception_error &except)
2216 1.1 christos {
2217 1.1 christos SELF_CHECK (strcmp (except.what (), err) == 0);
2218 1.1 christos }
2219 1.1 christos }
2220 1.1 christos
2221 1.1 christos /* Test that INPUT lexes as the identifier, string, or byte-string
2222 1.1 christos VALUE. KIND holds the expected token kind. */
2223 1.1 christos
2224 1.1 christos static void
2225 1.1 christos rust_lex_stringish_test (rust_parser *parser, const char *input,
2226 1.1 christos const char *value, int kind)
2227 1.1 christos {
2228 1.1 christos rust_lex_test_one (parser, input, kind);
2229 1.1 christos SELF_CHECK (parser->get_string () == value);
2230 1.1 christos }
2231 1.1 christos
2232 1.1 christos /* Helper to test that a string parses as a given token sequence. */
2233 1.1 christos
2234 1.1 christos static void
2235 1.1 christos rust_lex_test_sequence (rust_parser *parser, const char *input, int len,
2236 1.1 christos const int expected[])
2237 1.1.1.3 christos {
2238 1.1 christos parser->reset (input);
2239 1.1 christos
2240 1.1 christos for (int i = 0; i < len; ++i)
2241 1.1 christos {
2242 1.1 christos int token = parser->lex_one_token ();
2243 1.1 christos SELF_CHECK (token == expected[i]);
2244 1.1 christos }
2245 1.1 christos }
2246 1.1 christos
2247 1.1 christos /* Tests for an integer-parsing corner case. */
2248 1.1 christos
2249 1.1 christos static void
2250 1.1 christos rust_lex_test_trailing_dot (rust_parser *parser)
2251 1.1 christos {
2252 1.1 christos const int expected1[] = { DECIMAL_INTEGER, '.', IDENT, '(', ')', 0 };
2253 1.1 christos const int expected2[] = { INTEGER, '.', IDENT, '(', ')', 0 };
2254 1.1 christos const int expected3[] = { FLOAT, EQEQ, '(', ')', 0 };
2255 1.1 christos const int expected4[] = { DECIMAL_INTEGER, DOTDOT, DECIMAL_INTEGER, 0 };
2256 1.1 christos
2257 1.1 christos rust_lex_test_sequence (parser, "23.g()", ARRAY_SIZE (expected1), expected1);
2258 1.1 christos rust_lex_test_sequence (parser, "23_0.g()", ARRAY_SIZE (expected2),
2259 1.1 christos expected2);
2260 1.1 christos rust_lex_test_sequence (parser, "23.==()", ARRAY_SIZE (expected3),
2261 1.1 christos expected3);
2262 1.1 christos rust_lex_test_sequence (parser, "23..25", ARRAY_SIZE (expected4), expected4);
2263 1.1 christos }
2264 1.1 christos
2265 1.1 christos /* Tests of completion. */
2266 1.1 christos
2267 1.1 christos static void
2268 1.1 christos rust_lex_test_completion (rust_parser *parser)
2269 1.1.1.3 christos {
2270 1.1 christos const int expected[] = { IDENT, '.', COMPLETE, 0 };
2271 1.1 christos
2272 1.1 christos parser->pstate->parse_completion = true;
2273 1.1 christos
2274 1.1 christos rust_lex_test_sequence (parser, "something.wha", ARRAY_SIZE (expected),
2275 1.1 christos expected);
2276 1.1.1.3 christos rust_lex_test_sequence (parser, "something.", ARRAY_SIZE (expected),
2277 1.1 christos expected);
2278 1.1 christos
2279 1.1 christos parser->pstate->parse_completion = false;
2280 1.1 christos }
2281 1.1 christos
2282 1.1 christos /* Test pushback. */
2283 1.1 christos
2284 1.1 christos static void
2285 1.1 christos rust_lex_test_push_back (rust_parser *parser)
2286 1.1 christos {
2287 1.1 christos int token;
2288 1.1 christos
2289 1.1 christos parser->reset (">>=");
2290 1.1 christos
2291 1.1 christos token = parser->lex_one_token ();
2292 1.1 christos SELF_CHECK (token == COMPOUND_ASSIGN);
2293 1.1 christos SELF_CHECK (parser->current_opcode == BINOP_RSH);
2294 1.1 christos
2295 1.1 christos parser->push_back ('=');
2296 1.1 christos
2297 1.1 christos token = parser->lex_one_token ();
2298 1.1 christos SELF_CHECK (token == '=');
2299 1.1 christos
2300 1.1 christos token = parser->lex_one_token ();
2301 1.1 christos SELF_CHECK (token == 0);
2302 1.1 christos }
2303 1.1 christos
2304 1.1 christos /* Unit test the lexer. */
2305 1.1 christos
2306 1.1 christos static void
2307 1.1.1.2 christos rust_lex_tests (void)
2308 1.1.1.2 christos {
2309 1.1 christos /* Set up dummy "parser", so that rust_type works. */
2310 1.1 christos parser_state ps (language_def (language_rust), current_inferior ()->arch (),
2311 1.1 christos nullptr, 0, 0, nullptr, 0, nullptr);
2312 1.1 christos rust_parser parser (&ps);
2313 1.1 christos
2314 1.1 christos rust_lex_test_one (&parser, "", 0);
2315 1.1 christos rust_lex_test_one (&parser, " \t \n \r ", 0);
2316 1.1 christos rust_lex_test_one (&parser, "thread 23", 0);
2317 1.1 christos rust_lex_test_one (&parser, "task 23", 0);
2318 1.1 christos rust_lex_test_one (&parser, "th 104", 0);
2319 1.1 christos rust_lex_test_one (&parser, "ta 97", 0);
2320 1.1 christos
2321 1.1 christos rust_lex_int_test (&parser, "'z'", 'z', INTEGER);
2322 1.1 christos rust_lex_int_test (&parser, "'\\xff'", 0xff, INTEGER);
2323 1.1 christos rust_lex_int_test (&parser, "'\\u{1016f}'", 0x1016f, INTEGER);
2324 1.1 christos rust_lex_int_test (&parser, "b'z'", 'z', INTEGER);
2325 1.1 christos rust_lex_int_test (&parser, "b'\\xfe'", 0xfe, INTEGER);
2326 1.1 christos rust_lex_int_test (&parser, "b'\\xFE'", 0xfe, INTEGER);
2327 1.1 christos rust_lex_int_test (&parser, "b'\\xfE'", 0xfe, INTEGER);
2328 1.1 christos
2329 1.1 christos /* Test all escapes in both modes. */
2330 1.1 christos rust_lex_int_test (&parser, "'\\n'", '\n', INTEGER);
2331 1.1 christos rust_lex_int_test (&parser, "'\\r'", '\r', INTEGER);
2332 1.1 christos rust_lex_int_test (&parser, "'\\t'", '\t', INTEGER);
2333 1.1 christos rust_lex_int_test (&parser, "'\\\\'", '\\', INTEGER);
2334 1.1 christos rust_lex_int_test (&parser, "'\\0'", '\0', INTEGER);
2335 1.1 christos rust_lex_int_test (&parser, "'\\''", '\'', INTEGER);
2336 1.1 christos rust_lex_int_test (&parser, "'\\\"'", '"', INTEGER);
2337 1.1 christos
2338 1.1 christos rust_lex_int_test (&parser, "b'\\n'", '\n', INTEGER);
2339 1.1 christos rust_lex_int_test (&parser, "b'\\r'", '\r', INTEGER);
2340 1.1 christos rust_lex_int_test (&parser, "b'\\t'", '\t', INTEGER);
2341 1.1 christos rust_lex_int_test (&parser, "b'\\\\'", '\\', INTEGER);
2342 1.1 christos rust_lex_int_test (&parser, "b'\\0'", '\0', INTEGER);
2343 1.1 christos rust_lex_int_test (&parser, "b'\\''", '\'', INTEGER);
2344 1.1 christos rust_lex_int_test (&parser, "b'\\\"'", '"', INTEGER);
2345 1.1 christos
2346 1.1 christos rust_lex_exception_test (&parser, "'z", "Unterminated character literal");
2347 1.1 christos rust_lex_exception_test (&parser, "b'\\x0'", "Not enough hex digits seen");
2348 1.1 christos rust_lex_exception_test (&parser, "b'\\u{0}'",
2349 1.1 christos "Unicode escape in byte literal");
2350 1.1 christos rust_lex_exception_test (&parser, "'\\x0'", "Not enough hex digits seen");
2351 1.1 christos rust_lex_exception_test (&parser, "'\\u0'", "Missing '{' in Unicode escape");
2352 1.1 christos rust_lex_exception_test (&parser, "'\\u{0", "Missing '}' in Unicode escape");
2353 1.1 christos rust_lex_exception_test (&parser, "'\\u{0000007}", "Overlong hex escape");
2354 1.1 christos rust_lex_exception_test (&parser, "'\\u{}", "Not enough hex digits seen");
2355 1.1 christos rust_lex_exception_test (&parser, "'\\Q'", "Invalid escape \\Q in literal");
2356 1.1 christos rust_lex_exception_test (&parser, "b'\\Q'", "Invalid escape \\Q in literal");
2357 1.1 christos
2358 1.1 christos rust_lex_int_test (&parser, "23", 23, DECIMAL_INTEGER);
2359 1.1 christos rust_lex_int_test (&parser, "2_344__29", 234429, INTEGER);
2360 1.1 christos rust_lex_int_test (&parser, "0x1f", 0x1f, INTEGER);
2361 1.1 christos rust_lex_int_test (&parser, "23usize", 23, INTEGER);
2362 1.1 christos rust_lex_int_test (&parser, "23i32", 23, INTEGER);
2363 1.1 christos rust_lex_int_test (&parser, "0x1_f", 0x1f, INTEGER);
2364 1.1 christos rust_lex_int_test (&parser, "0b1_101011__", 0x6b, INTEGER);
2365 1.1 christos rust_lex_int_test (&parser, "0o001177i64", 639, INTEGER);
2366 1.1 christos rust_lex_int_test (&parser, "0x123456789u64", 0x123456789ull, INTEGER);
2367 1.1 christos
2368 1.1 christos rust_lex_test_trailing_dot (&parser);
2369 1.1 christos
2370 1.1 christos rust_lex_test_one (&parser, "23.", FLOAT);
2371 1.1 christos rust_lex_test_one (&parser, "23.99f32", FLOAT);
2372 1.1 christos rust_lex_test_one (&parser, "23e7", FLOAT);
2373 1.1 christos rust_lex_test_one (&parser, "23E-7", FLOAT);
2374 1.1 christos rust_lex_test_one (&parser, "23e+7", FLOAT);
2375 1.1 christos rust_lex_test_one (&parser, "23.99e+7f64", FLOAT);
2376 1.1 christos rust_lex_test_one (&parser, "23.82f32", FLOAT);
2377 1.1 christos
2378 1.1 christos rust_lex_stringish_test (&parser, "hibob", "hibob", IDENT);
2379 1.1 christos rust_lex_stringish_test (&parser, "hibob__93", "hibob__93", IDENT);
2380 1.1 christos rust_lex_stringish_test (&parser, "thread", "thread", IDENT);
2381 1.1 christos rust_lex_stringish_test (&parser, "r#true", "true", IDENT);
2382 1.1 christos
2383 1.1 christos const int expected1[] = { IDENT, DECIMAL_INTEGER, 0 };
2384 1.1 christos rust_lex_test_sequence (&parser, "r#thread 23", ARRAY_SIZE (expected1),
2385 1.1 christos expected1);
2386 1.1 christos const int expected2[] = { IDENT, '#', 0 };
2387 1.1 christos rust_lex_test_sequence (&parser, "r#", ARRAY_SIZE (expected2), expected2);
2388 1.1 christos
2389 1.1 christos rust_lex_stringish_test (&parser, "\"string\"", "string", STRING);
2390 1.1 christos rust_lex_stringish_test (&parser, "\"str\\ting\"", "str\ting", STRING);
2391 1.1 christos rust_lex_stringish_test (&parser, "\"str\\\"ing\"", "str\"ing", STRING);
2392 1.1 christos rust_lex_stringish_test (&parser, "r\"str\\ing\"", "str\\ing", STRING);
2393 1.1 christos rust_lex_stringish_test (&parser, "r#\"str\\ting\"#", "str\\ting", STRING);
2394 1.1 christos rust_lex_stringish_test (&parser, "r###\"str\\\"ing\"###", "str\\\"ing",
2395 1.1 christos STRING);
2396 1.1 christos
2397 1.1 christos rust_lex_stringish_test (&parser, "b\"string\"", "string", BYTESTRING);
2398 1.1 christos rust_lex_stringish_test (&parser, "b\"\x73tring\"", "string", BYTESTRING);
2399 1.1 christos rust_lex_stringish_test (&parser, "b\"str\\\"ing\"", "str\"ing", BYTESTRING);
2400 1.1 christos rust_lex_stringish_test (&parser, "br####\"\\x73tring\"####", "\\x73tring",
2401 1.1 christos BYTESTRING);
2402 1.1 christos
2403 1.1 christos for (const auto &candidate : identifier_tokens)
2404 1.1 christos rust_lex_test_one (&parser, candidate.name, candidate.value);
2405 1.1 christos
2406 1.1 christos for (const auto &candidate : operator_tokens)
2407 1.1 christos rust_lex_test_one (&parser, candidate.name, candidate.value);
2408 1.1 christos
2409 1.1 christos rust_lex_test_completion (&parser);
2410 1.1 christos rust_lex_test_push_back (&parser);
2411 1.1 christos }
2412 1.1 christos
2413 1.1 christos #endif /* GDB_SELF_TEST */
2414 1.1 christos
2415 1.1 christos
2416 1.1 christos
2418 1.1 christos void _initialize_rust_exp ();
2419 1.1 christos void
2420 1.1 christos _initialize_rust_exp ()
2421 1.1 christos {
2422 1.1 christos int code = regcomp (&number_regex, number_regex_text, REG_EXTENDED);
2423 1.1 christos /* If the regular expression was incorrect, it was a programming
2424 1.1 christos error. */
2425 1.1 christos gdb_assert (code == 0);
2426 1.1 christos
2427 #if GDB_SELF_TEST
2428 selftests::register_test ("rust-lex", rust_lex_tests);
2429 #endif
2430 }
2431