rust-parse.c revision 1.1.1.2 1 1.1 christos /* Rust expression parsing for GDB, the GNU debugger.
2 1.1 christos
3 1.1.1.2 christos Copyright (C) 2016-2024 Free Software Foundation, Inc.
4 1.1 christos
5 1.1 christos This file is part of GDB.
6 1.1 christos
7 1.1 christos This program is free software; you can redistribute it and/or modify
8 1.1 christos it under the terms of the GNU General Public License as published by
9 1.1 christos the Free Software Foundation; either version 3 of the License, or
10 1.1 christos (at your option) any later version.
11 1.1 christos
12 1.1 christos This program is distributed in the hope that it will be useful,
13 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of
14 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 1.1 christos GNU General Public License for more details.
16 1.1 christos
17 1.1 christos You should have received a copy of the GNU General Public License
18 1.1 christos along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 1.1 christos
20 1.1 christos
21 1.1 christos #include "block.h"
22 1.1 christos #include "charset.h"
23 1.1 christos #include "cp-support.h"
24 1.1 christos #include "gdbsupport/gdb_obstack.h"
25 1.1 christos #include "gdbsupport/gdb_regex.h"
26 1.1 christos #include "rust-lang.h"
27 1.1 christos #include "parser-defs.h"
28 1.1 christos #include "gdbsupport/selftest.h"
29 1.1 christos #include "value.h"
30 1.1 christos #include "gdbarch.h"
31 1.1 christos #include "rust-exp.h"
32 1.1.1.2 christos #include "inferior.h"
33 1.1 christos
34 1.1 christos using namespace expr;
35 1.1 christos
36 1.1 christos /* A regular expression for matching Rust numbers. This is split up
37 1.1 christos since it is very long and this gives us a way to comment the
38 1.1 christos sections. */
39 1.1 christos
40 1.1 christos static const char number_regex_text[] =
41 1.1 christos /* subexpression 1: allows use of alternation, otherwise uninteresting */
42 1.1 christos "^("
43 1.1 christos /* First comes floating point. */
44 1.1 christos /* Recognize number after the decimal point, with optional
45 1.1 christos exponent and optional type suffix.
46 1.1 christos subexpression 2: allows "?", otherwise uninteresting
47 1.1 christos subexpression 3: if present, type suffix
48 1.1 christos */
49 1.1 christos "[0-9][0-9_]*\\.[0-9][0-9_]*([eE][-+]?[0-9][0-9_]*)?(f32|f64)?"
50 1.1 christos #define FLOAT_TYPE1 3
51 1.1 christos "|"
52 1.1 christos /* Recognize exponent without decimal point, with optional type
53 1.1 christos suffix.
54 1.1 christos subexpression 4: if present, type suffix
55 1.1 christos */
56 1.1 christos #define FLOAT_TYPE2 4
57 1.1 christos "[0-9][0-9_]*[eE][-+]?[0-9][0-9_]*(f32|f64)?"
58 1.1 christos "|"
59 1.1 christos /* "23." is a valid floating point number, but "23.e5" and
60 1.1 christos "23.f32" are not. So, handle the trailing-. case
61 1.1 christos separately. */
62 1.1 christos "[0-9][0-9_]*\\."
63 1.1 christos "|"
64 1.1 christos /* Finally come integers.
65 1.1 christos subexpression 5: text of integer
66 1.1 christos subexpression 6: if present, type suffix
67 1.1 christos subexpression 7: allows use of alternation, otherwise uninteresting
68 1.1 christos */
69 1.1 christos #define INT_TEXT 5
70 1.1 christos #define INT_TYPE 6
71 1.1 christos "(0x[a-fA-F0-9_]+|0o[0-7_]+|0b[01_]+|[0-9][0-9_]*)"
72 1.1.1.2 christos "([iu](size|8|16|32|64|128))?"
73 1.1 christos ")";
74 1.1 christos /* The number of subexpressions to allocate space for, including the
75 1.1 christos "0th" whole match subexpression. */
76 1.1 christos #define NUM_SUBEXPRESSIONS 8
77 1.1 christos
78 1.1 christos /* The compiled number-matching regex. */
79 1.1 christos
80 1.1 christos static regex_t number_regex;
81 1.1 christos
82 1.1 christos /* The kinds of tokens. Note that single-character tokens are
83 1.1 christos represented by themselves, so for instance '[' is a token. */
84 1.1 christos enum token_type : int
85 1.1 christos {
86 1.1 christos /* Make sure to start after any ASCII character. */
87 1.1 christos GDBVAR = 256,
88 1.1 christos IDENT,
89 1.1 christos COMPLETE,
90 1.1 christos INTEGER,
91 1.1 christos DECIMAL_INTEGER,
92 1.1 christos STRING,
93 1.1 christos BYTESTRING,
94 1.1 christos FLOAT,
95 1.1 christos COMPOUND_ASSIGN,
96 1.1 christos
97 1.1 christos /* Keyword tokens. */
98 1.1 christos KW_AS,
99 1.1 christos KW_IF,
100 1.1 christos KW_TRUE,
101 1.1 christos KW_FALSE,
102 1.1 christos KW_SUPER,
103 1.1 christos KW_SELF,
104 1.1 christos KW_MUT,
105 1.1 christos KW_EXTERN,
106 1.1 christos KW_CONST,
107 1.1 christos KW_FN,
108 1.1 christos KW_SIZEOF,
109 1.1 christos
110 1.1 christos /* Operator tokens. */
111 1.1 christos DOTDOT,
112 1.1 christos DOTDOTEQ,
113 1.1 christos OROR,
114 1.1 christos ANDAND,
115 1.1 christos EQEQ,
116 1.1 christos NOTEQ,
117 1.1 christos LTEQ,
118 1.1 christos GTEQ,
119 1.1 christos LSH,
120 1.1 christos RSH,
121 1.1 christos COLONCOLON,
122 1.1 christos ARROW,
123 1.1 christos };
124 1.1 christos
125 1.1 christos /* A typed integer constant. */
126 1.1 christos
127 1.1 christos struct typed_val_int
128 1.1 christos {
129 1.1.1.2 christos gdb_mpz val;
130 1.1 christos struct type *type;
131 1.1 christos };
132 1.1 christos
133 1.1 christos /* A typed floating point constant. */
134 1.1 christos
135 1.1 christos struct typed_val_float
136 1.1 christos {
137 1.1 christos float_data val;
138 1.1 christos struct type *type;
139 1.1 christos };
140 1.1 christos
141 1.1 christos /* A struct of this type is used to describe a token. */
142 1.1 christos
143 1.1 christos struct token_info
144 1.1 christos {
145 1.1 christos const char *name;
146 1.1 christos int value;
147 1.1 christos enum exp_opcode opcode;
148 1.1 christos };
149 1.1 christos
150 1.1 christos /* Identifier tokens. */
151 1.1 christos
152 1.1 christos static const struct token_info identifier_tokens[] =
153 1.1 christos {
154 1.1 christos { "as", KW_AS, OP_NULL },
155 1.1 christos { "false", KW_FALSE, OP_NULL },
156 1.1 christos { "if", 0, OP_NULL },
157 1.1 christos { "mut", KW_MUT, OP_NULL },
158 1.1 christos { "const", KW_CONST, OP_NULL },
159 1.1 christos { "self", KW_SELF, OP_NULL },
160 1.1 christos { "super", KW_SUPER, OP_NULL },
161 1.1 christos { "true", KW_TRUE, OP_NULL },
162 1.1 christos { "extern", KW_EXTERN, OP_NULL },
163 1.1 christos { "fn", KW_FN, OP_NULL },
164 1.1 christos { "sizeof", KW_SIZEOF, OP_NULL },
165 1.1 christos };
166 1.1 christos
167 1.1 christos /* Operator tokens, sorted longest first. */
168 1.1 christos
169 1.1 christos static const struct token_info operator_tokens[] =
170 1.1 christos {
171 1.1 christos { ">>=", COMPOUND_ASSIGN, BINOP_RSH },
172 1.1 christos { "<<=", COMPOUND_ASSIGN, BINOP_LSH },
173 1.1 christos
174 1.1 christos { "<<", LSH, OP_NULL },
175 1.1 christos { ">>", RSH, OP_NULL },
176 1.1 christos { "&&", ANDAND, OP_NULL },
177 1.1 christos { "||", OROR, OP_NULL },
178 1.1 christos { "==", EQEQ, OP_NULL },
179 1.1 christos { "!=", NOTEQ, OP_NULL },
180 1.1 christos { "<=", LTEQ, OP_NULL },
181 1.1 christos { ">=", GTEQ, OP_NULL },
182 1.1 christos { "+=", COMPOUND_ASSIGN, BINOP_ADD },
183 1.1 christos { "-=", COMPOUND_ASSIGN, BINOP_SUB },
184 1.1 christos { "*=", COMPOUND_ASSIGN, BINOP_MUL },
185 1.1 christos { "/=", COMPOUND_ASSIGN, BINOP_DIV },
186 1.1 christos { "%=", COMPOUND_ASSIGN, BINOP_REM },
187 1.1 christos { "&=", COMPOUND_ASSIGN, BINOP_BITWISE_AND },
188 1.1 christos { "|=", COMPOUND_ASSIGN, BINOP_BITWISE_IOR },
189 1.1 christos { "^=", COMPOUND_ASSIGN, BINOP_BITWISE_XOR },
190 1.1 christos { "..=", DOTDOTEQ, OP_NULL },
191 1.1 christos
192 1.1 christos { "::", COLONCOLON, OP_NULL },
193 1.1 christos { "..", DOTDOT, OP_NULL },
194 1.1 christos { "->", ARROW, OP_NULL }
195 1.1 christos };
196 1.1 christos
197 1.1 christos /* An instance of this is created before parsing, and destroyed when
198 1.1 christos parsing is finished. */
199 1.1 christos
200 1.1 christos struct rust_parser
201 1.1 christos {
202 1.1 christos explicit rust_parser (struct parser_state *state)
203 1.1 christos : pstate (state)
204 1.1 christos {
205 1.1 christos }
206 1.1 christos
207 1.1 christos DISABLE_COPY_AND_ASSIGN (rust_parser);
208 1.1 christos
209 1.1 christos /* Return the parser's language. */
210 1.1 christos const struct language_defn *language () const
211 1.1 christos {
212 1.1 christos return pstate->language ();
213 1.1 christos }
214 1.1 christos
215 1.1 christos /* Return the parser's gdbarch. */
216 1.1 christos struct gdbarch *arch () const
217 1.1 christos {
218 1.1 christos return pstate->gdbarch ();
219 1.1 christos }
220 1.1 christos
221 1.1 christos /* A helper to look up a Rust type, or fail. This only works for
222 1.1 christos types defined by rust_language_arch_info. */
223 1.1 christos
224 1.1 christos struct type *get_type (const char *name)
225 1.1 christos {
226 1.1 christos struct type *type;
227 1.1 christos
228 1.1 christos type = language_lookup_primitive_type (language (), arch (), name);
229 1.1 christos if (type == NULL)
230 1.1 christos error (_("Could not find Rust type %s"), name);
231 1.1 christos return type;
232 1.1 christos }
233 1.1 christos
234 1.1 christos std::string crate_name (const std::string &name);
235 1.1 christos std::string super_name (const std::string &ident, unsigned int n_supers);
236 1.1 christos
237 1.1 christos int lex_character ();
238 1.1 christos int lex_number ();
239 1.1 christos int lex_string ();
240 1.1 christos int lex_identifier ();
241 1.1 christos uint32_t lex_hex (int min, int max);
242 1.1 christos uint32_t lex_escape (int is_byte);
243 1.1 christos int lex_operator ();
244 1.1 christos int lex_one_token ();
245 1.1 christos void push_back (char c);
246 1.1 christos
247 1.1 christos /* The main interface to lexing. Lexes one token and updates the
248 1.1 christos internal state. */
249 1.1 christos void lex ()
250 1.1 christos {
251 1.1 christos current_token = lex_one_token ();
252 1.1 christos }
253 1.1 christos
254 1.1 christos /* Assuming the current token is TYPE, lex the next token. */
255 1.1 christos void assume (int type)
256 1.1 christos {
257 1.1 christos gdb_assert (current_token == type);
258 1.1 christos lex ();
259 1.1 christos }
260 1.1 christos
261 1.1 christos /* Require the single-character token C, and lex the next token; or
262 1.1 christos throw an exception. */
263 1.1 christos void require (char type)
264 1.1 christos {
265 1.1 christos if (current_token != type)
266 1.1 christos error (_("'%c' expected"), type);
267 1.1 christos lex ();
268 1.1 christos }
269 1.1 christos
270 1.1 christos /* Entry point for all parsing. */
271 1.1 christos operation_up parse_entry_point ()
272 1.1 christos {
273 1.1 christos lex ();
274 1.1 christos operation_up result = parse_expr ();
275 1.1 christos if (current_token != 0)
276 1.1 christos error (_("Syntax error near '%s'"), pstate->prev_lexptr);
277 1.1 christos return result;
278 1.1 christos }
279 1.1 christos
280 1.1 christos operation_up parse_tuple ();
281 1.1 christos operation_up parse_array ();
282 1.1 christos operation_up name_to_operation (const std::string &name);
283 1.1 christos operation_up parse_struct_expr (struct type *type);
284 1.1 christos operation_up parse_binop (bool required);
285 1.1 christos operation_up parse_range ();
286 1.1 christos operation_up parse_expr ();
287 1.1 christos operation_up parse_sizeof ();
288 1.1 christos operation_up parse_addr ();
289 1.1 christos operation_up parse_field (operation_up &&);
290 1.1 christos operation_up parse_index (operation_up &&);
291 1.1 christos std::vector<operation_up> parse_paren_args ();
292 1.1 christos operation_up parse_call (operation_up &&);
293 1.1 christos std::vector<struct type *> parse_type_list ();
294 1.1 christos std::vector<struct type *> parse_maybe_type_list ();
295 1.1 christos struct type *parse_array_type ();
296 1.1 christos struct type *parse_slice_type ();
297 1.1 christos struct type *parse_pointer_type ();
298 1.1 christos struct type *parse_function_type ();
299 1.1 christos struct type *parse_tuple_type ();
300 1.1 christos struct type *parse_type ();
301 1.1 christos std::string parse_path (bool for_expr);
302 1.1 christos operation_up parse_string ();
303 1.1 christos operation_up parse_tuple_struct (struct type *type);
304 1.1 christos operation_up parse_path_expr ();
305 1.1 christos operation_up parse_atom (bool required);
306 1.1 christos
307 1.1 christos void update_innermost_block (struct block_symbol sym);
308 1.1 christos struct block_symbol lookup_symbol (const char *name,
309 1.1 christos const struct block *block,
310 1.1.1.2 christos const domain_search_flags domain);
311 1.1 christos struct type *rust_lookup_type (const char *name);
312 1.1 christos
313 1.1 christos /* Clear some state. This is only used for testing. */
314 1.1 christos #if GDB_SELF_TEST
315 1.1 christos void reset (const char *input)
316 1.1 christos {
317 1.1 christos pstate->prev_lexptr = nullptr;
318 1.1 christos pstate->lexptr = input;
319 1.1 christos paren_depth = 0;
320 1.1 christos current_token = 0;
321 1.1 christos current_int_val = {};
322 1.1 christos current_float_val = {};
323 1.1 christos current_string_val = {};
324 1.1 christos current_opcode = OP_NULL;
325 1.1 christos }
326 1.1 christos #endif /* GDB_SELF_TEST */
327 1.1 christos
328 1.1 christos /* Return the token's string value as a string. */
329 1.1 christos std::string get_string () const
330 1.1 christos {
331 1.1 christos return std::string (current_string_val.ptr, current_string_val.length);
332 1.1 christos }
333 1.1 christos
334 1.1 christos /* A pointer to this is installed globally. */
335 1.1 christos auto_obstack obstack;
336 1.1 christos
337 1.1 christos /* The parser state gdb gave us. */
338 1.1 christos struct parser_state *pstate;
339 1.1 christos
340 1.1 christos /* Depth of parentheses. */
341 1.1 christos int paren_depth = 0;
342 1.1 christos
343 1.1 christos /* The current token's type. */
344 1.1 christos int current_token = 0;
345 1.1 christos /* The current token's payload, if any. */
346 1.1 christos typed_val_int current_int_val {};
347 1.1 christos typed_val_float current_float_val {};
348 1.1 christos struct stoken current_string_val {};
349 1.1 christos enum exp_opcode current_opcode = OP_NULL;
350 1.1 christos
351 1.1 christos /* When completing, this may be set to the field operation to
352 1.1 christos complete. */
353 1.1 christos operation_up completion_op;
354 1.1 christos };
355 1.1 christos
356 1.1 christos /* Return an string referring to NAME, but relative to the crate's
357 1.1 christos name. */
358 1.1 christos
359 1.1 christos std::string
360 1.1 christos rust_parser::crate_name (const std::string &name)
361 1.1 christos {
362 1.1 christos std::string crate = rust_crate_for_block (pstate->expression_context_block);
363 1.1 christos
364 1.1 christos if (crate.empty ())
365 1.1 christos error (_("Could not find crate for current location"));
366 1.1 christos return "::" + crate + "::" + name;
367 1.1 christos }
368 1.1 christos
369 1.1 christos /* Return a string referring to a "super::" qualified name. IDENT is
370 1.1 christos the base name and N_SUPERS is how many "super::"s were provided.
371 1.1 christos N_SUPERS can be zero. */
372 1.1 christos
373 1.1 christos std::string
374 1.1 christos rust_parser::super_name (const std::string &ident, unsigned int n_supers)
375 1.1 christos {
376 1.1.1.2 christos const char *scope = "";
377 1.1.1.2 christos if (pstate->expression_context_block != nullptr)
378 1.1.1.2 christos scope = pstate->expression_context_block->scope ();
379 1.1 christos int offset;
380 1.1 christos
381 1.1 christos if (scope[0] == '\0')
382 1.1 christos error (_("Couldn't find namespace scope for self::"));
383 1.1 christos
384 1.1 christos if (n_supers > 0)
385 1.1 christos {
386 1.1 christos int len;
387 1.1 christos std::vector<int> offsets;
388 1.1 christos unsigned int current_len;
389 1.1 christos
390 1.1 christos current_len = cp_find_first_component (scope);
391 1.1 christos while (scope[current_len] != '\0')
392 1.1 christos {
393 1.1 christos offsets.push_back (current_len);
394 1.1 christos gdb_assert (scope[current_len] == ':');
395 1.1 christos /* The "::". */
396 1.1 christos current_len += 2;
397 1.1 christos current_len += cp_find_first_component (scope
398 1.1 christos + current_len);
399 1.1 christos }
400 1.1 christos
401 1.1 christos len = offsets.size ();
402 1.1 christos if (n_supers >= len)
403 1.1 christos error (_("Too many super:: uses from '%s'"), scope);
404 1.1 christos
405 1.1 christos offset = offsets[len - n_supers];
406 1.1 christos }
407 1.1 christos else
408 1.1 christos offset = strlen (scope);
409 1.1 christos
410 1.1 christos return "::" + std::string (scope, offset) + "::" + ident;
411 1.1 christos }
412 1.1 christos
413 1.1 christos /* A helper to appropriately munge NAME and BLOCK depending on the
414 1.1 christos presence of a leading "::". */
415 1.1 christos
416 1.1 christos static void
417 1.1 christos munge_name_and_block (const char **name, const struct block **block)
418 1.1 christos {
419 1.1 christos /* If it is a global reference, skip the current block in favor of
420 1.1 christos the static block. */
421 1.1 christos if (startswith (*name, "::"))
422 1.1 christos {
423 1.1 christos *name += 2;
424 1.1.1.2 christos *block = (*block)->static_block ();
425 1.1 christos }
426 1.1 christos }
427 1.1 christos
428 1.1 christos /* Like lookup_symbol, but handles Rust namespace conventions, and
429 1.1 christos doesn't require field_of_this_result. */
430 1.1 christos
431 1.1 christos struct block_symbol
432 1.1 christos rust_parser::lookup_symbol (const char *name, const struct block *block,
433 1.1.1.2 christos const domain_search_flags domain)
434 1.1 christos {
435 1.1 christos struct block_symbol result;
436 1.1 christos
437 1.1 christos munge_name_and_block (&name, &block);
438 1.1 christos
439 1.1 christos result = ::lookup_symbol (name, block, domain, NULL);
440 1.1 christos if (result.symbol != NULL)
441 1.1 christos update_innermost_block (result);
442 1.1 christos return result;
443 1.1 christos }
444 1.1 christos
445 1.1 christos /* Look up a type, following Rust namespace conventions. */
446 1.1 christos
447 1.1 christos struct type *
448 1.1 christos rust_parser::rust_lookup_type (const char *name)
449 1.1 christos {
450 1.1 christos struct block_symbol result;
451 1.1 christos struct type *type;
452 1.1 christos
453 1.1 christos const struct block *block = pstate->expression_context_block;
454 1.1 christos munge_name_and_block (&name, &block);
455 1.1 christos
456 1.1.1.2 christos result = ::lookup_symbol (name, block, SEARCH_TYPE_DOMAIN, nullptr);
457 1.1 christos if (result.symbol != NULL)
458 1.1 christos {
459 1.1 christos update_innermost_block (result);
460 1.1 christos return result.symbol->type ();
461 1.1 christos }
462 1.1 christos
463 1.1 christos type = lookup_typename (language (), name, NULL, 1);
464 1.1 christos if (type != NULL)
465 1.1 christos return type;
466 1.1 christos
467 1.1 christos /* Last chance, try a built-in type. */
468 1.1 christos return language_lookup_primitive_type (language (), arch (), name);
469 1.1 christos }
470 1.1 christos
471 1.1 christos /* A helper that updates the innermost block as appropriate. */
472 1.1 christos
473 1.1 christos void
474 1.1 christos rust_parser::update_innermost_block (struct block_symbol sym)
475 1.1 christos {
476 1.1 christos if (symbol_read_needs_frame (sym.symbol))
477 1.1 christos pstate->block_tracker->update (sym);
478 1.1 christos }
479 1.1 christos
480 1.1 christos /* Lex a hex number with at least MIN digits and at most MAX
481 1.1 christos digits. */
482 1.1 christos
483 1.1 christos uint32_t
484 1.1 christos rust_parser::lex_hex (int min, int max)
485 1.1 christos {
486 1.1 christos uint32_t result = 0;
487 1.1 christos int len = 0;
488 1.1 christos /* We only want to stop at MAX if we're lexing a byte escape. */
489 1.1 christos int check_max = min == max;
490 1.1 christos
491 1.1 christos while ((check_max ? len <= max : 1)
492 1.1 christos && ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'f')
493 1.1 christos || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'F')
494 1.1 christos || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')))
495 1.1 christos {
496 1.1 christos result *= 16;
497 1.1 christos if (pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'f')
498 1.1 christos result = result + 10 + pstate->lexptr[0] - 'a';
499 1.1 christos else if (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'F')
500 1.1 christos result = result + 10 + pstate->lexptr[0] - 'A';
501 1.1 christos else
502 1.1 christos result = result + pstate->lexptr[0] - '0';
503 1.1 christos ++pstate->lexptr;
504 1.1 christos ++len;
505 1.1 christos }
506 1.1 christos
507 1.1 christos if (len < min)
508 1.1 christos error (_("Not enough hex digits seen"));
509 1.1 christos if (len > max)
510 1.1 christos {
511 1.1 christos gdb_assert (min != max);
512 1.1 christos error (_("Overlong hex escape"));
513 1.1 christos }
514 1.1 christos
515 1.1 christos return result;
516 1.1 christos }
517 1.1 christos
518 1.1 christos /* Lex an escape. IS_BYTE is true if we're lexing a byte escape;
519 1.1 christos otherwise we're lexing a character escape. */
520 1.1 christos
521 1.1 christos uint32_t
522 1.1 christos rust_parser::lex_escape (int is_byte)
523 1.1 christos {
524 1.1 christos uint32_t result;
525 1.1 christos
526 1.1 christos gdb_assert (pstate->lexptr[0] == '\\');
527 1.1 christos ++pstate->lexptr;
528 1.1 christos switch (pstate->lexptr[0])
529 1.1 christos {
530 1.1 christos case 'x':
531 1.1 christos ++pstate->lexptr;
532 1.1 christos result = lex_hex (2, 2);
533 1.1 christos break;
534 1.1 christos
535 1.1 christos case 'u':
536 1.1 christos if (is_byte)
537 1.1 christos error (_("Unicode escape in byte literal"));
538 1.1 christos ++pstate->lexptr;
539 1.1 christos if (pstate->lexptr[0] != '{')
540 1.1 christos error (_("Missing '{' in Unicode escape"));
541 1.1 christos ++pstate->lexptr;
542 1.1 christos result = lex_hex (1, 6);
543 1.1 christos /* Could do range checks here. */
544 1.1 christos if (pstate->lexptr[0] != '}')
545 1.1 christos error (_("Missing '}' in Unicode escape"));
546 1.1 christos ++pstate->lexptr;
547 1.1 christos break;
548 1.1 christos
549 1.1 christos case 'n':
550 1.1 christos result = '\n';
551 1.1 christos ++pstate->lexptr;
552 1.1 christos break;
553 1.1 christos case 'r':
554 1.1 christos result = '\r';
555 1.1 christos ++pstate->lexptr;
556 1.1 christos break;
557 1.1 christos case 't':
558 1.1 christos result = '\t';
559 1.1 christos ++pstate->lexptr;
560 1.1 christos break;
561 1.1 christos case '\\':
562 1.1 christos result = '\\';
563 1.1 christos ++pstate->lexptr;
564 1.1 christos break;
565 1.1 christos case '0':
566 1.1 christos result = '\0';
567 1.1 christos ++pstate->lexptr;
568 1.1 christos break;
569 1.1 christos case '\'':
570 1.1 christos result = '\'';
571 1.1 christos ++pstate->lexptr;
572 1.1 christos break;
573 1.1 christos case '"':
574 1.1 christos result = '"';
575 1.1 christos ++pstate->lexptr;
576 1.1 christos break;
577 1.1 christos
578 1.1 christos default:
579 1.1 christos error (_("Invalid escape \\%c in literal"), pstate->lexptr[0]);
580 1.1 christos }
581 1.1 christos
582 1.1 christos return result;
583 1.1 christos }
584 1.1 christos
585 1.1 christos /* A helper for lex_character. Search forward for the closing single
586 1.1 christos quote, then convert the bytes from the host charset to UTF-32. */
587 1.1 christos
588 1.1 christos static uint32_t
589 1.1 christos lex_multibyte_char (const char *text, int *len)
590 1.1 christos {
591 1.1 christos /* Only look a maximum of 5 bytes for the closing quote. This is
592 1.1 christos the maximum for UTF-8. */
593 1.1 christos int quote;
594 1.1 christos gdb_assert (text[0] != '\'');
595 1.1 christos for (quote = 1; text[quote] != '\0' && text[quote] != '\''; ++quote)
596 1.1 christos ;
597 1.1 christos *len = quote;
598 1.1 christos /* The caller will issue an error. */
599 1.1 christos if (text[quote] == '\0')
600 1.1 christos return 0;
601 1.1 christos
602 1.1 christos auto_obstack result;
603 1.1 christos convert_between_encodings (host_charset (), HOST_UTF32,
604 1.1 christos (const gdb_byte *) text,
605 1.1 christos quote, 1, &result, translit_none);
606 1.1 christos
607 1.1 christos int size = obstack_object_size (&result);
608 1.1 christos if (size > 4)
609 1.1 christos error (_("overlong character literal"));
610 1.1 christos uint32_t value;
611 1.1 christos memcpy (&value, obstack_finish (&result), size);
612 1.1 christos return value;
613 1.1 christos }
614 1.1 christos
615 1.1 christos /* Lex a character constant. */
616 1.1 christos
617 1.1 christos int
618 1.1 christos rust_parser::lex_character ()
619 1.1 christos {
620 1.1 christos int is_byte = 0;
621 1.1 christos uint32_t value;
622 1.1 christos
623 1.1 christos if (pstate->lexptr[0] == 'b')
624 1.1 christos {
625 1.1 christos is_byte = 1;
626 1.1 christos ++pstate->lexptr;
627 1.1 christos }
628 1.1 christos gdb_assert (pstate->lexptr[0] == '\'');
629 1.1 christos ++pstate->lexptr;
630 1.1 christos if (pstate->lexptr[0] == '\'')
631 1.1 christos error (_("empty character literal"));
632 1.1 christos else if (pstate->lexptr[0] == '\\')
633 1.1 christos value = lex_escape (is_byte);
634 1.1 christos else
635 1.1 christos {
636 1.1 christos int len;
637 1.1 christos value = lex_multibyte_char (&pstate->lexptr[0], &len);
638 1.1 christos pstate->lexptr += len;
639 1.1 christos }
640 1.1 christos
641 1.1 christos if (pstate->lexptr[0] != '\'')
642 1.1 christos error (_("Unterminated character literal"));
643 1.1 christos ++pstate->lexptr;
644 1.1 christos
645 1.1 christos current_int_val.val = value;
646 1.1 christos current_int_val.type = get_type (is_byte ? "u8" : "char");
647 1.1 christos
648 1.1 christos return INTEGER;
649 1.1 christos }
650 1.1 christos
651 1.1 christos /* Return the offset of the double quote if STR looks like the start
652 1.1 christos of a raw string, or 0 if STR does not start a raw string. */
653 1.1 christos
654 1.1 christos static int
655 1.1 christos starts_raw_string (const char *str)
656 1.1 christos {
657 1.1 christos const char *save = str;
658 1.1 christos
659 1.1 christos if (str[0] != 'r')
660 1.1 christos return 0;
661 1.1 christos ++str;
662 1.1 christos while (str[0] == '#')
663 1.1 christos ++str;
664 1.1 christos if (str[0] == '"')
665 1.1 christos return str - save;
666 1.1 christos return 0;
667 1.1 christos }
668 1.1 christos
669 1.1 christos /* Return true if STR looks like the end of a raw string that had N
670 1.1 christos hashes at the start. */
671 1.1 christos
672 1.1 christos static bool
673 1.1 christos ends_raw_string (const char *str, int n)
674 1.1 christos {
675 1.1 christos int i;
676 1.1 christos
677 1.1 christos gdb_assert (str[0] == '"');
678 1.1 christos for (i = 0; i < n; ++i)
679 1.1 christos if (str[i + 1] != '#')
680 1.1 christos return false;
681 1.1 christos return true;
682 1.1 christos }
683 1.1 christos
684 1.1 christos /* Lex a string constant. */
685 1.1 christos
686 1.1 christos int
687 1.1 christos rust_parser::lex_string ()
688 1.1 christos {
689 1.1 christos int is_byte = pstate->lexptr[0] == 'b';
690 1.1 christos int raw_length;
691 1.1 christos
692 1.1 christos if (is_byte)
693 1.1 christos ++pstate->lexptr;
694 1.1 christos raw_length = starts_raw_string (pstate->lexptr);
695 1.1 christos pstate->lexptr += raw_length;
696 1.1 christos gdb_assert (pstate->lexptr[0] == '"');
697 1.1 christos ++pstate->lexptr;
698 1.1 christos
699 1.1 christos while (1)
700 1.1 christos {
701 1.1 christos uint32_t value;
702 1.1 christos
703 1.1 christos if (raw_length > 0)
704 1.1 christos {
705 1.1 christos if (pstate->lexptr[0] == '"' && ends_raw_string (pstate->lexptr,
706 1.1 christos raw_length - 1))
707 1.1 christos {
708 1.1 christos /* Exit with lexptr pointing after the final "#". */
709 1.1 christos pstate->lexptr += raw_length;
710 1.1 christos break;
711 1.1 christos }
712 1.1 christos else if (pstate->lexptr[0] == '\0')
713 1.1 christos error (_("Unexpected EOF in string"));
714 1.1 christos
715 1.1 christos value = pstate->lexptr[0] & 0xff;
716 1.1 christos if (is_byte && value > 127)
717 1.1 christos error (_("Non-ASCII value in raw byte string"));
718 1.1 christos obstack_1grow (&obstack, value);
719 1.1 christos
720 1.1 christos ++pstate->lexptr;
721 1.1 christos }
722 1.1 christos else if (pstate->lexptr[0] == '"')
723 1.1 christos {
724 1.1 christos /* Make sure to skip the quote. */
725 1.1 christos ++pstate->lexptr;
726 1.1 christos break;
727 1.1 christos }
728 1.1 christos else if (pstate->lexptr[0] == '\\')
729 1.1 christos {
730 1.1 christos value = lex_escape (is_byte);
731 1.1 christos
732 1.1 christos if (is_byte)
733 1.1 christos obstack_1grow (&obstack, value);
734 1.1 christos else
735 1.1 christos convert_between_encodings (HOST_UTF32, "UTF-8",
736 1.1 christos (gdb_byte *) &value,
737 1.1 christos sizeof (value), sizeof (value),
738 1.1 christos &obstack, translit_none);
739 1.1 christos }
740 1.1 christos else if (pstate->lexptr[0] == '\0')
741 1.1 christos error (_("Unexpected EOF in string"));
742 1.1 christos else
743 1.1 christos {
744 1.1 christos value = pstate->lexptr[0] & 0xff;
745 1.1 christos if (is_byte && value > 127)
746 1.1 christos error (_("Non-ASCII value in byte string"));
747 1.1 christos obstack_1grow (&obstack, value);
748 1.1 christos ++pstate->lexptr;
749 1.1 christos }
750 1.1 christos }
751 1.1 christos
752 1.1 christos current_string_val.length = obstack_object_size (&obstack);
753 1.1 christos current_string_val.ptr = (const char *) obstack_finish (&obstack);
754 1.1 christos return is_byte ? BYTESTRING : STRING;
755 1.1 christos }
756 1.1 christos
757 1.1 christos /* Return true if STRING starts with whitespace followed by a digit. */
758 1.1 christos
759 1.1 christos static bool
760 1.1 christos space_then_number (const char *string)
761 1.1 christos {
762 1.1 christos const char *p = string;
763 1.1 christos
764 1.1 christos while (p[0] == ' ' || p[0] == '\t')
765 1.1 christos ++p;
766 1.1 christos if (p == string)
767 1.1 christos return false;
768 1.1 christos
769 1.1 christos return *p >= '0' && *p <= '9';
770 1.1 christos }
771 1.1 christos
772 1.1 christos /* Return true if C can start an identifier. */
773 1.1 christos
774 1.1 christos static bool
775 1.1 christos rust_identifier_start_p (char c)
776 1.1 christos {
777 1.1 christos return ((c >= 'a' && c <= 'z')
778 1.1 christos || (c >= 'A' && c <= 'Z')
779 1.1 christos || c == '_'
780 1.1 christos || c == '$'
781 1.1 christos /* Allow any non-ASCII character as an identifier. There
782 1.1 christos doesn't seem to be a need to be picky about this. */
783 1.1 christos || (c & 0x80) != 0);
784 1.1 christos }
785 1.1 christos
786 1.1 christos /* Lex an identifier. */
787 1.1 christos
788 1.1 christos int
789 1.1 christos rust_parser::lex_identifier ()
790 1.1 christos {
791 1.1 christos unsigned int length;
792 1.1 christos const struct token_info *token;
793 1.1 christos int is_gdb_var = pstate->lexptr[0] == '$';
794 1.1 christos
795 1.1 christos bool is_raw = false;
796 1.1 christos if (pstate->lexptr[0] == 'r'
797 1.1 christos && pstate->lexptr[1] == '#'
798 1.1 christos && rust_identifier_start_p (pstate->lexptr[2]))
799 1.1 christos {
800 1.1 christos is_raw = true;
801 1.1 christos pstate->lexptr += 2;
802 1.1 christos }
803 1.1 christos
804 1.1 christos const char *start = pstate->lexptr;
805 1.1 christos gdb_assert (rust_identifier_start_p (pstate->lexptr[0]));
806 1.1 christos
807 1.1 christos ++pstate->lexptr;
808 1.1 christos
809 1.1 christos /* Allow any non-ASCII character here. This "handles" UTF-8 by
810 1.1 christos passing it through. */
811 1.1 christos while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z')
812 1.1 christos || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z')
813 1.1 christos || pstate->lexptr[0] == '_'
814 1.1 christos || (is_gdb_var && pstate->lexptr[0] == '$')
815 1.1 christos || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
816 1.1 christos || (pstate->lexptr[0] & 0x80) != 0)
817 1.1 christos ++pstate->lexptr;
818 1.1 christos
819 1.1 christos
820 1.1 christos length = pstate->lexptr - start;
821 1.1 christos token = NULL;
822 1.1 christos if (!is_raw)
823 1.1 christos {
824 1.1 christos for (const auto &candidate : identifier_tokens)
825 1.1 christos {
826 1.1 christos if (length == strlen (candidate.name)
827 1.1 christos && strncmp (candidate.name, start, length) == 0)
828 1.1 christos {
829 1.1 christos token = &candidate;
830 1.1 christos break;
831 1.1 christos }
832 1.1 christos }
833 1.1 christos }
834 1.1 christos
835 1.1 christos if (token != NULL)
836 1.1 christos {
837 1.1 christos if (token->value == 0)
838 1.1 christos {
839 1.1 christos /* Leave the terminating token alone. */
840 1.1 christos pstate->lexptr = start;
841 1.1 christos return 0;
842 1.1 christos }
843 1.1 christos }
844 1.1 christos else if (token == NULL
845 1.1 christos && !is_raw
846 1.1 christos && (strncmp (start, "thread", length) == 0
847 1.1 christos || strncmp (start, "task", length) == 0)
848 1.1 christos && space_then_number (pstate->lexptr))
849 1.1 christos {
850 1.1 christos /* "task" or "thread" followed by a number terminates the
851 1.1 christos parse, per gdb rules. */
852 1.1 christos pstate->lexptr = start;
853 1.1 christos return 0;
854 1.1 christos }
855 1.1 christos
856 1.1 christos if (token == NULL || (pstate->parse_completion && pstate->lexptr[0] == '\0'))
857 1.1 christos {
858 1.1 christos current_string_val.length = length;
859 1.1 christos current_string_val.ptr = start;
860 1.1 christos }
861 1.1 christos
862 1.1 christos if (pstate->parse_completion && pstate->lexptr[0] == '\0')
863 1.1 christos {
864 1.1 christos /* Prevent rustyylex from returning two COMPLETE tokens. */
865 1.1 christos pstate->prev_lexptr = pstate->lexptr;
866 1.1 christos return COMPLETE;
867 1.1 christos }
868 1.1 christos
869 1.1 christos if (token != NULL)
870 1.1 christos return token->value;
871 1.1 christos if (is_gdb_var)
872 1.1 christos return GDBVAR;
873 1.1 christos return IDENT;
874 1.1 christos }
875 1.1 christos
876 1.1 christos /* Lex an operator. */
877 1.1 christos
878 1.1 christos int
879 1.1 christos rust_parser::lex_operator ()
880 1.1 christos {
881 1.1 christos const struct token_info *token = NULL;
882 1.1 christos
883 1.1 christos for (const auto &candidate : operator_tokens)
884 1.1 christos {
885 1.1 christos if (strncmp (candidate.name, pstate->lexptr,
886 1.1 christos strlen (candidate.name)) == 0)
887 1.1 christos {
888 1.1 christos pstate->lexptr += strlen (candidate.name);
889 1.1 christos token = &candidate;
890 1.1 christos break;
891 1.1 christos }
892 1.1 christos }
893 1.1 christos
894 1.1 christos if (token != NULL)
895 1.1 christos {
896 1.1 christos current_opcode = token->opcode;
897 1.1 christos return token->value;
898 1.1 christos }
899 1.1 christos
900 1.1 christos return *pstate->lexptr++;
901 1.1 christos }
902 1.1 christos
903 1.1 christos /* Lex a number. */
904 1.1 christos
905 1.1 christos int
906 1.1 christos rust_parser::lex_number ()
907 1.1 christos {
908 1.1 christos regmatch_t subexps[NUM_SUBEXPRESSIONS];
909 1.1 christos int match;
910 1.1 christos int is_integer = 0;
911 1.1 christos int could_be_decimal = 1;
912 1.1 christos int implicit_i32 = 0;
913 1.1 christos const char *type_name = NULL;
914 1.1 christos struct type *type;
915 1.1 christos int end_index;
916 1.1 christos int type_index = -1;
917 1.1 christos int i;
918 1.1 christos
919 1.1 christos match = regexec (&number_regex, pstate->lexptr, ARRAY_SIZE (subexps),
920 1.1 christos subexps, 0);
921 1.1 christos /* Failure means the regexp is broken. */
922 1.1 christos gdb_assert (match == 0);
923 1.1 christos
924 1.1 christos if (subexps[INT_TEXT].rm_so != -1)
925 1.1 christos {
926 1.1 christos /* Integer part matched. */
927 1.1 christos is_integer = 1;
928 1.1 christos end_index = subexps[INT_TEXT].rm_eo;
929 1.1 christos if (subexps[INT_TYPE].rm_so == -1)
930 1.1 christos {
931 1.1 christos type_name = "i32";
932 1.1 christos implicit_i32 = 1;
933 1.1 christos }
934 1.1 christos else
935 1.1 christos {
936 1.1 christos type_index = INT_TYPE;
937 1.1 christos could_be_decimal = 0;
938 1.1 christos }
939 1.1 christos }
940 1.1 christos else if (subexps[FLOAT_TYPE1].rm_so != -1)
941 1.1 christos {
942 1.1 christos /* Found floating point type suffix. */
943 1.1 christos end_index = subexps[FLOAT_TYPE1].rm_so;
944 1.1 christos type_index = FLOAT_TYPE1;
945 1.1 christos }
946 1.1 christos else if (subexps[FLOAT_TYPE2].rm_so != -1)
947 1.1 christos {
948 1.1 christos /* Found floating point type suffix. */
949 1.1 christos end_index = subexps[FLOAT_TYPE2].rm_so;
950 1.1 christos type_index = FLOAT_TYPE2;
951 1.1 christos }
952 1.1 christos else
953 1.1 christos {
954 1.1 christos /* Any other floating point match. */
955 1.1 christos end_index = subexps[0].rm_eo;
956 1.1 christos type_name = "f64";
957 1.1 christos }
958 1.1 christos
959 1.1 christos /* We need a special case if the final character is ".". In this
960 1.1 christos case we might need to parse an integer. For example, "23.f()" is
961 1.1 christos a request for a trait method call, not a syntax error involving
962 1.1 christos the floating point number "23.". */
963 1.1 christos gdb_assert (subexps[0].rm_eo > 0);
964 1.1 christos if (pstate->lexptr[subexps[0].rm_eo - 1] == '.')
965 1.1 christos {
966 1.1 christos const char *next = skip_spaces (&pstate->lexptr[subexps[0].rm_eo]);
967 1.1 christos
968 1.1 christos if (rust_identifier_start_p (*next) || *next == '.')
969 1.1 christos {
970 1.1 christos --subexps[0].rm_eo;
971 1.1 christos is_integer = 1;
972 1.1 christos end_index = subexps[0].rm_eo;
973 1.1 christos type_name = "i32";
974 1.1 christos could_be_decimal = 1;
975 1.1 christos implicit_i32 = 1;
976 1.1 christos }
977 1.1 christos }
978 1.1 christos
979 1.1 christos /* Compute the type name if we haven't already. */
980 1.1 christos std::string type_name_holder;
981 1.1 christos if (type_name == NULL)
982 1.1 christos {
983 1.1 christos gdb_assert (type_index != -1);
984 1.1 christos type_name_holder = std::string ((pstate->lexptr
985 1.1 christos + subexps[type_index].rm_so),
986 1.1 christos (subexps[type_index].rm_eo
987 1.1 christos - subexps[type_index].rm_so));
988 1.1 christos type_name = type_name_holder.c_str ();
989 1.1 christos }
990 1.1 christos
991 1.1 christos /* Look up the type. */
992 1.1 christos type = get_type (type_name);
993 1.1 christos
994 1.1 christos /* Copy the text of the number and remove the "_"s. */
995 1.1 christos std::string number;
996 1.1 christos for (i = 0; i < end_index && pstate->lexptr[i]; ++i)
997 1.1 christos {
998 1.1 christos if (pstate->lexptr[i] == '_')
999 1.1 christos could_be_decimal = 0;
1000 1.1 christos else
1001 1.1 christos number.push_back (pstate->lexptr[i]);
1002 1.1 christos }
1003 1.1 christos
1004 1.1 christos /* Advance past the match. */
1005 1.1 christos pstate->lexptr += subexps[0].rm_eo;
1006 1.1 christos
1007 1.1 christos /* Parse the number. */
1008 1.1 christos if (is_integer)
1009 1.1 christos {
1010 1.1 christos int radix = 10;
1011 1.1 christos int offset = 0;
1012 1.1 christos
1013 1.1 christos if (number[0] == '0')
1014 1.1 christos {
1015 1.1 christos if (number[1] == 'x')
1016 1.1 christos radix = 16;
1017 1.1 christos else if (number[1] == 'o')
1018 1.1 christos radix = 8;
1019 1.1 christos else if (number[1] == 'b')
1020 1.1 christos radix = 2;
1021 1.1 christos if (radix != 10)
1022 1.1 christos {
1023 1.1 christos offset = 2;
1024 1.1 christos could_be_decimal = 0;
1025 1.1 christos }
1026 1.1 christos }
1027 1.1 christos
1028 1.1.1.2 christos if (!current_int_val.val.set (number.c_str () + offset, radix))
1029 1.1.1.2 christos {
1030 1.1.1.2 christos /* Shouldn't be possible. */
1031 1.1.1.2 christos error (_("Invalid integer"));
1032 1.1.1.2 christos }
1033 1.1.1.2 christos if (implicit_i32)
1034 1.1.1.2 christos {
1035 1.1.1.2 christos static gdb_mpz sixty_three_bit = gdb_mpz::pow (2, 63);
1036 1.1.1.2 christos static gdb_mpz thirty_one_bit = gdb_mpz::pow (2, 31);
1037 1.1.1.2 christos
1038 1.1.1.2 christos if (current_int_val.val >= sixty_three_bit)
1039 1.1.1.2 christos type = get_type ("i128");
1040 1.1.1.2 christos else if (current_int_val.val >= thirty_one_bit)
1041 1.1.1.2 christos type = get_type ("i64");
1042 1.1.1.2 christos }
1043 1.1 christos
1044 1.1 christos current_int_val.type = type;
1045 1.1 christos }
1046 1.1 christos else
1047 1.1 christos {
1048 1.1 christos current_float_val.type = type;
1049 1.1 christos bool parsed = parse_float (number.c_str (), number.length (),
1050 1.1 christos current_float_val.type,
1051 1.1 christos current_float_val.val.data ());
1052 1.1 christos gdb_assert (parsed);
1053 1.1 christos }
1054 1.1 christos
1055 1.1 christos return is_integer ? (could_be_decimal ? DECIMAL_INTEGER : INTEGER) : FLOAT;
1056 1.1 christos }
1057 1.1 christos
1058 1.1 christos /* The lexer. */
1059 1.1 christos
1060 1.1 christos int
1061 1.1 christos rust_parser::lex_one_token ()
1062 1.1 christos {
1063 1.1 christos /* Skip all leading whitespace. */
1064 1.1 christos while (pstate->lexptr[0] == ' '
1065 1.1 christos || pstate->lexptr[0] == '\t'
1066 1.1 christos || pstate->lexptr[0] == '\r'
1067 1.1 christos || pstate->lexptr[0] == '\n')
1068 1.1 christos ++pstate->lexptr;
1069 1.1 christos
1070 1.1 christos /* If we hit EOF and we're completing, then return COMPLETE -- maybe
1071 1.1 christos we're completing an empty string at the end of a field_expr.
1072 1.1 christos But, we don't want to return two COMPLETE tokens in a row. */
1073 1.1 christos if (pstate->lexptr[0] == '\0' && pstate->lexptr == pstate->prev_lexptr)
1074 1.1 christos return 0;
1075 1.1 christos pstate->prev_lexptr = pstate->lexptr;
1076 1.1 christos if (pstate->lexptr[0] == '\0')
1077 1.1 christos {
1078 1.1 christos if (pstate->parse_completion)
1079 1.1 christos {
1080 1.1 christos current_string_val.length =0;
1081 1.1 christos current_string_val.ptr = "";
1082 1.1 christos return COMPLETE;
1083 1.1 christos }
1084 1.1 christos return 0;
1085 1.1 christos }
1086 1.1 christos
1087 1.1 christos if (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
1088 1.1 christos return lex_number ();
1089 1.1 christos else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '\'')
1090 1.1 christos return lex_character ();
1091 1.1 christos else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '"')
1092 1.1 christos return lex_string ();
1093 1.1 christos else if (pstate->lexptr[0] == 'b' && starts_raw_string (pstate->lexptr + 1))
1094 1.1 christos return lex_string ();
1095 1.1 christos else if (starts_raw_string (pstate->lexptr))
1096 1.1 christos return lex_string ();
1097 1.1 christos else if (rust_identifier_start_p (pstate->lexptr[0]))
1098 1.1 christos return lex_identifier ();
1099 1.1 christos else if (pstate->lexptr[0] == '"')
1100 1.1 christos return lex_string ();
1101 1.1 christos else if (pstate->lexptr[0] == '\'')
1102 1.1 christos return lex_character ();
1103 1.1 christos else if (pstate->lexptr[0] == '}' || pstate->lexptr[0] == ']')
1104 1.1 christos {
1105 1.1 christos /* Falls through to lex_operator. */
1106 1.1 christos --paren_depth;
1107 1.1 christos }
1108 1.1 christos else if (pstate->lexptr[0] == '(' || pstate->lexptr[0] == '{')
1109 1.1 christos {
1110 1.1 christos /* Falls through to lex_operator. */
1111 1.1 christos ++paren_depth;
1112 1.1 christos }
1113 1.1 christos else if (pstate->lexptr[0] == ',' && pstate->comma_terminates
1114 1.1 christos && paren_depth == 0)
1115 1.1 christos return 0;
1116 1.1 christos
1117 1.1 christos return lex_operator ();
1118 1.1 christos }
1119 1.1 christos
1120 1.1 christos /* Push back a single character to be re-lexed. */
1121 1.1 christos
1122 1.1 christos void
1123 1.1 christos rust_parser::push_back (char c)
1124 1.1 christos {
1125 1.1 christos /* Can't be called before any lexing. */
1126 1.1 christos gdb_assert (pstate->prev_lexptr != NULL);
1127 1.1 christos
1128 1.1 christos --pstate->lexptr;
1129 1.1 christos gdb_assert (*pstate->lexptr == c);
1130 1.1 christos }
1131 1.1 christos
1132 1.1 christos
1133 1.1 christos
1135 1.1 christos /* Parse a tuple or paren expression. */
1136 1.1 christos
1137 1.1 christos operation_up
1138 1.1 christos rust_parser::parse_tuple ()
1139 1.1 christos {
1140 1.1 christos assume ('(');
1141 1.1 christos
1142 1.1 christos if (current_token == ')')
1143 1.1 christos {
1144 1.1 christos lex ();
1145 1.1 christos struct type *unit = get_type ("()");
1146 1.1 christos return make_operation<long_const_operation> (unit, 0);
1147 1.1 christos }
1148 1.1 christos
1149 1.1 christos operation_up expr = parse_expr ();
1150 1.1 christos if (current_token == ')')
1151 1.1 christos {
1152 1.1 christos /* Parenthesized expression. */
1153 1.1 christos lex ();
1154 1.1 christos return make_operation<rust_parenthesized_operation> (std::move (expr));
1155 1.1 christos }
1156 1.1 christos
1157 1.1 christos std::vector<operation_up> ops;
1158 1.1 christos ops.push_back (std::move (expr));
1159 1.1 christos while (current_token != ')')
1160 1.1 christos {
1161 1.1 christos if (current_token != ',')
1162 1.1 christos error (_("',' or ')' expected"));
1163 1.1 christos lex ();
1164 1.1 christos
1165 1.1 christos /* A trailing "," is ok. */
1166 1.1 christos if (current_token != ')')
1167 1.1 christos ops.push_back (parse_expr ());
1168 1.1 christos }
1169 1.1 christos
1170 1.1 christos assume (')');
1171 1.1 christos
1172 1.1 christos error (_("Tuple expressions not supported yet"));
1173 1.1 christos }
1174 1.1 christos
1175 1.1 christos /* Parse an array expression. */
1176 1.1 christos
1177 1.1 christos operation_up
1178 1.1 christos rust_parser::parse_array ()
1179 1.1 christos {
1180 1.1 christos assume ('[');
1181 1.1 christos
1182 1.1 christos if (current_token == KW_MUT)
1183 1.1 christos lex ();
1184 1.1 christos
1185 1.1 christos operation_up result;
1186 1.1 christos operation_up expr = parse_expr ();
1187 1.1 christos if (current_token == ';')
1188 1.1 christos {
1189 1.1 christos lex ();
1190 1.1 christos operation_up rhs = parse_expr ();
1191 1.1 christos result = make_operation<rust_array_operation> (std::move (expr),
1192 1.1 christos std::move (rhs));
1193 1.1.1.2 christos }
1194 1.1 christos else if (current_token == ',' || current_token == ']')
1195 1.1 christos {
1196 1.1 christos std::vector<operation_up> ops;
1197 1.1 christos ops.push_back (std::move (expr));
1198 1.1 christos while (current_token != ']')
1199 1.1 christos {
1200 1.1 christos if (current_token != ',')
1201 1.1 christos error (_("',' or ']' expected"));
1202 1.1 christos lex ();
1203 1.1 christos ops.push_back (parse_expr ());
1204 1.1 christos }
1205 1.1 christos ops.shrink_to_fit ();
1206 1.1 christos int len = ops.size () - 1;
1207 1.1 christos result = make_operation<array_operation> (0, len, std::move (ops));
1208 1.1.1.2 christos }
1209 1.1 christos else
1210 1.1 christos error (_("',', ';', or ']' expected"));
1211 1.1 christos
1212 1.1 christos require (']');
1213 1.1 christos
1214 1.1 christos return result;
1215 1.1 christos }
1216 1.1 christos
1217 1.1 christos /* Turn a name into an operation. */
1218 1.1 christos
1219 1.1 christos operation_up
1220 1.1 christos rust_parser::name_to_operation (const std::string &name)
1221 1.1 christos {
1222 1.1 christos struct block_symbol sym = lookup_symbol (name.c_str (),
1223 1.1.1.2 christos pstate->expression_context_block,
1224 1.1 christos SEARCH_VFT);
1225 1.1 christos if (sym.symbol != nullptr && sym.symbol->aclass () != LOC_TYPEDEF)
1226 1.1 christos return make_operation<var_value_operation> (sym);
1227 1.1 christos
1228 1.1 christos struct type *type = nullptr;
1229 1.1 christos
1230 1.1 christos if (sym.symbol != nullptr)
1231 1.1 christos {
1232 1.1 christos gdb_assert (sym.symbol->aclass () == LOC_TYPEDEF);
1233 1.1 christos type = sym.symbol->type ();
1234 1.1 christos }
1235 1.1 christos if (type == nullptr)
1236 1.1 christos type = rust_lookup_type (name.c_str ());
1237 1.1 christos if (type == nullptr)
1238 1.1 christos error (_("No symbol '%s' in current context"), name.c_str ());
1239 1.1 christos
1240 1.1 christos if (type->code () == TYPE_CODE_STRUCT && type->num_fields () == 0)
1241 1.1 christos {
1242 1.1 christos /* A unit-like struct. */
1243 1.1 christos operation_up result (new rust_aggregate_operation (type, {}, {}));
1244 1.1 christos return result;
1245 1.1 christos }
1246 1.1 christos else
1247 1.1 christos return make_operation<type_operation> (type);
1248 1.1 christos }
1249 1.1 christos
1250 1.1 christos /* Parse a struct expression. */
1251 1.1 christos
1252 1.1 christos operation_up
1253 1.1 christos rust_parser::parse_struct_expr (struct type *type)
1254 1.1 christos {
1255 1.1 christos assume ('{');
1256 1.1 christos
1257 1.1 christos if (type->code () != TYPE_CODE_STRUCT
1258 1.1 christos || rust_tuple_type_p (type)
1259 1.1 christos || rust_tuple_struct_type_p (type))
1260 1.1 christos error (_("Struct expression applied to non-struct type"));
1261 1.1 christos
1262 1.1 christos std::vector<std::pair<std::string, operation_up>> field_v;
1263 1.1 christos while (current_token != '}' && current_token != DOTDOT)
1264 1.1 christos {
1265 1.1 christos if (current_token != IDENT)
1266 1.1 christos error (_("'}', '..', or identifier expected"));
1267 1.1 christos
1268 1.1 christos std::string name = get_string ();
1269 1.1 christos lex ();
1270 1.1 christos
1271 1.1 christos operation_up expr;
1272 1.1 christos if (current_token == ',' || current_token == '}'
1273 1.1 christos || current_token == DOTDOT)
1274 1.1 christos expr = name_to_operation (name);
1275 1.1 christos else
1276 1.1 christos {
1277 1.1 christos require (':');
1278 1.1 christos expr = parse_expr ();
1279 1.1 christos }
1280 1.1 christos field_v.emplace_back (std::move (name), std::move (expr));
1281 1.1 christos
1282 1.1 christos /* A trailing "," is ok. */
1283 1.1 christos if (current_token == ',')
1284 1.1 christos lex ();
1285 1.1 christos }
1286 1.1 christos
1287 1.1 christos operation_up others;
1288 1.1 christos if (current_token == DOTDOT)
1289 1.1 christos {
1290 1.1 christos lex ();
1291 1.1 christos others = parse_expr ();
1292 1.1 christos }
1293 1.1 christos
1294 1.1 christos require ('}');
1295 1.1 christos
1296 1.1 christos return make_operation<rust_aggregate_operation> (type,
1297 1.1 christos std::move (others),
1298 1.1 christos std::move (field_v));
1299 1.1 christos }
1300 1.1 christos
1301 1.1 christos /* Used by the operator precedence parser. */
1302 1.1 christos struct rustop_item
1303 1.1 christos {
1304 1.1 christos rustop_item (int token_, int precedence_, enum exp_opcode opcode_,
1305 1.1 christos operation_up &&op_)
1306 1.1 christos : token (token_),
1307 1.1 christos precedence (precedence_),
1308 1.1 christos opcode (opcode_),
1309 1.1 christos op (std::move (op_))
1310 1.1 christos {
1311 1.1 christos }
1312 1.1 christos
1313 1.1 christos /* The token value. */
1314 1.1 christos int token;
1315 1.1 christos /* Precedence of this operator. */
1316 1.1 christos int precedence;
1317 1.1 christos /* This is used only for assign-modify. */
1318 1.1 christos enum exp_opcode opcode;
1319 1.1 christos /* The right hand side of this operation. */
1320 1.1 christos operation_up op;
1321 1.1 christos };
1322 1.1 christos
1323 1.1 christos /* An operator precedence parser for binary operations, including
1324 1.1 christos "as". */
1325 1.1 christos
1326 1.1 christos operation_up
1327 1.1 christos rust_parser::parse_binop (bool required)
1328 1.1 christos {
1329 1.1 christos /* All the binary operators. Each one is of the form
1330 1.1 christos OPERATION(TOKEN, PRECEDENCE, TYPE)
1331 1.1 christos TOKEN is the corresponding operator token.
1332 1.1 christos PRECEDENCE is a value indicating relative precedence.
1333 1.1 christos TYPE is the operation type corresponding to the operator.
1334 1.1 christos Assignment operations are handled specially, not via this
1335 1.1 christos table; they have precedence 0. */
1336 1.1 christos #define ALL_OPS \
1337 1.1 christos OPERATION ('*', 10, mul_operation) \
1338 1.1 christos OPERATION ('/', 10, div_operation) \
1339 1.1 christos OPERATION ('%', 10, rem_operation) \
1340 1.1 christos OPERATION ('@', 9, repeat_operation) \
1341 1.1 christos OPERATION ('+', 8, add_operation) \
1342 1.1 christos OPERATION ('-', 8, sub_operation) \
1343 1.1 christos OPERATION (LSH, 7, lsh_operation) \
1344 1.1 christos OPERATION (RSH, 7, rsh_operation) \
1345 1.1 christos OPERATION ('&', 6, bitwise_and_operation) \
1346 1.1 christos OPERATION ('^', 5, bitwise_xor_operation) \
1347 1.1 christos OPERATION ('|', 4, bitwise_ior_operation) \
1348 1.1 christos OPERATION (EQEQ, 3, equal_operation) \
1349 1.1 christos OPERATION (NOTEQ, 3, notequal_operation) \
1350 1.1 christos OPERATION ('<', 3, less_operation) \
1351 1.1 christos OPERATION (LTEQ, 3, leq_operation) \
1352 1.1 christos OPERATION ('>', 3, gtr_operation) \
1353 1.1 christos OPERATION (GTEQ, 3, geq_operation) \
1354 1.1 christos OPERATION (ANDAND, 2, logical_and_operation) \
1355 1.1 christos OPERATION (OROR, 1, logical_or_operation)
1356 1.1 christos
1357 1.1 christos #define ASSIGN_PREC 0
1358 1.1 christos
1359 1.1 christos operation_up start = parse_atom (required);
1360 1.1 christos if (start == nullptr)
1361 1.1 christos {
1362 1.1 christos gdb_assert (!required);
1363 1.1 christos return start;
1364 1.1 christos }
1365 1.1 christos
1366 1.1 christos std::vector<rustop_item> operator_stack;
1367 1.1 christos operator_stack.emplace_back (0, -1, OP_NULL, std::move (start));
1368 1.1 christos
1369 1.1 christos while (true)
1370 1.1 christos {
1371 1.1 christos int this_token = current_token;
1372 1.1 christos enum exp_opcode compound_assign_op = OP_NULL;
1373 1.1 christos int precedence = -2;
1374 1.1 christos
1375 1.1 christos switch (this_token)
1376 1.1 christos {
1377 1.1 christos #define OPERATION(TOKEN, PRECEDENCE, TYPE) \
1378 1.1 christos case TOKEN: \
1379 1.1 christos precedence = PRECEDENCE; \
1380 1.1 christos lex (); \
1381 1.1 christos break;
1382 1.1 christos
1383 1.1 christos ALL_OPS
1384 1.1 christos
1385 1.1 christos #undef OPERATION
1386 1.1 christos
1387 1.1 christos case COMPOUND_ASSIGN:
1388 1.1.1.2 christos compound_assign_op = current_opcode;
1389 1.1 christos [[fallthrough]];
1390 1.1 christos case '=':
1391 1.1 christos precedence = ASSIGN_PREC;
1392 1.1 christos lex ();
1393 1.1 christos break;
1394 1.1 christos
1395 1.1 christos /* "as" must be handled specially. */
1396 1.1 christos case KW_AS:
1397 1.1 christos {
1398 1.1 christos lex ();
1399 1.1 christos rustop_item &lhs = operator_stack.back ();
1400 1.1 christos struct type *type = parse_type ();
1401 1.1 christos lhs.op = make_operation<unop_cast_operation> (std::move (lhs.op),
1402 1.1 christos type);
1403 1.1 christos }
1404 1.1 christos /* Bypass the rest of the loop. */
1405 1.1 christos continue;
1406 1.1 christos
1407 1.1 christos default:
1408 1.1 christos /* Arrange to pop the entire stack. */
1409 1.1 christos precedence = -2;
1410 1.1.1.2 christos break;
1411 1.1 christos }
1412 1.1 christos
1413 1.1 christos /* Make sure that assignments are right-associative while other
1414 1.1 christos operations are left-associative. */
1415 1.1 christos while ((precedence == ASSIGN_PREC
1416 1.1 christos ? precedence < operator_stack.back ().precedence
1417 1.1 christos : precedence <= operator_stack.back ().precedence)
1418 1.1 christos && operator_stack.size () > 1)
1419 1.1 christos {
1420 1.1 christos rustop_item rhs = std::move (operator_stack.back ());
1421 1.1 christos operator_stack.pop_back ();
1422 1.1 christos
1423 1.1 christos rustop_item &lhs = operator_stack.back ();
1424 1.1 christos
1425 1.1 christos switch (rhs.token)
1426 1.1 christos {
1427 1.1 christos #define OPERATION(TOKEN, PRECEDENCE, TYPE) \
1428 1.1 christos case TOKEN: \
1429 1.1 christos lhs.op = make_operation<TYPE> (std::move (lhs.op), \
1430 1.1 christos std::move (rhs.op)); \
1431 1.1 christos break;
1432 1.1 christos
1433 1.1 christos ALL_OPS
1434 1.1 christos
1435 1.1 christos #undef OPERATION
1436 1.1 christos
1437 1.1 christos case '=':
1438 1.1 christos case COMPOUND_ASSIGN:
1439 1.1 christos {
1440 1.1 christos if (rhs.token == '=')
1441 1.1 christos lhs.op = (make_operation<assign_operation>
1442 1.1 christos (std::move (lhs.op), std::move (rhs.op)));
1443 1.1 christos else
1444 1.1 christos lhs.op = (make_operation<assign_modify_operation>
1445 1.1 christos (rhs.opcode, std::move (lhs.op),
1446 1.1 christos std::move (rhs.op)));
1447 1.1 christos
1448 1.1 christos struct type *unit_type = get_type ("()");
1449 1.1 christos
1450 1.1 christos operation_up nil (new long_const_operation (unit_type, 0));
1451 1.1 christos lhs.op = (make_operation<comma_operation>
1452 1.1 christos (std::move (lhs.op), std::move (nil)));
1453 1.1 christos }
1454 1.1 christos break;
1455 1.1 christos
1456 1.1 christos default:
1457 1.1 christos gdb_assert_not_reached ("bad binary operator");
1458 1.1 christos }
1459 1.1 christos }
1460 1.1 christos
1461 1.1 christos if (precedence == -2)
1462 1.1 christos break;
1463 1.1 christos
1464 1.1 christos operator_stack.emplace_back (this_token, precedence, compound_assign_op,
1465 1.1 christos parse_atom (true));
1466 1.1 christos }
1467 1.1 christos
1468 1.1 christos gdb_assert (operator_stack.size () == 1);
1469 1.1 christos return std::move (operator_stack[0].op);
1470 1.1 christos #undef ALL_OPS
1471 1.1 christos }
1472 1.1 christos
1473 1.1 christos /* Parse a range expression. */
1474 1.1 christos
1475 1.1 christos operation_up
1476 1.1 christos rust_parser::parse_range ()
1477 1.1 christos {
1478 1.1 christos enum range_flag kind = (RANGE_HIGH_BOUND_DEFAULT
1479 1.1 christos | RANGE_LOW_BOUND_DEFAULT);
1480 1.1 christos
1481 1.1 christos operation_up lhs;
1482 1.1 christos if (current_token != DOTDOT && current_token != DOTDOTEQ)
1483 1.1 christos {
1484 1.1 christos lhs = parse_binop (true);
1485 1.1 christos kind &= ~RANGE_LOW_BOUND_DEFAULT;
1486 1.1 christos }
1487 1.1 christos
1488 1.1 christos if (current_token == DOTDOT)
1489 1.1 christos kind |= RANGE_HIGH_BOUND_EXCLUSIVE;
1490 1.1 christos else if (current_token != DOTDOTEQ)
1491 1.1 christos return lhs;
1492 1.1 christos lex ();
1493 1.1 christos
1494 1.1 christos /* A "..=" range requires a high bound, but otherwise it is
1495 1.1 christos optional. */
1496 1.1 christos operation_up rhs = parse_binop ((kind & RANGE_HIGH_BOUND_EXCLUSIVE) == 0);
1497 1.1 christos if (rhs != nullptr)
1498 1.1 christos kind &= ~RANGE_HIGH_BOUND_DEFAULT;
1499 1.1 christos
1500 1.1 christos return make_operation<rust_range_operation> (kind,
1501 1.1 christos std::move (lhs),
1502 1.1 christos std::move (rhs));
1503 1.1 christos }
1504 1.1 christos
1505 1.1 christos /* Parse an expression. */
1506 1.1 christos
1507 1.1 christos operation_up
1508 1.1 christos rust_parser::parse_expr ()
1509 1.1 christos {
1510 1.1 christos return parse_range ();
1511 1.1 christos }
1512 1.1 christos
1513 1.1 christos /* Parse a sizeof expression. */
1514 1.1 christos
1515 1.1 christos operation_up
1516 1.1 christos rust_parser::parse_sizeof ()
1517 1.1 christos {
1518 1.1 christos assume (KW_SIZEOF);
1519 1.1 christos
1520 1.1 christos require ('(');
1521 1.1 christos operation_up result = make_operation<unop_sizeof_operation> (parse_expr ());
1522 1.1 christos require (')');
1523 1.1 christos return result;
1524 1.1 christos }
1525 1.1 christos
1526 1.1 christos /* Parse an address-of operation. */
1527 1.1 christos
1528 1.1 christos operation_up
1529 1.1 christos rust_parser::parse_addr ()
1530 1.1 christos {
1531 1.1 christos assume ('&');
1532 1.1 christos
1533 1.1 christos if (current_token == KW_MUT)
1534 1.1 christos lex ();
1535 1.1 christos
1536 1.1 christos return make_operation<rust_unop_addr_operation> (parse_atom (true));
1537 1.1 christos }
1538 1.1 christos
1539 1.1 christos /* Parse a field expression. */
1540 1.1 christos
1541 1.1 christos operation_up
1542 1.1 christos rust_parser::parse_field (operation_up &&lhs)
1543 1.1 christos {
1544 1.1 christos assume ('.');
1545 1.1 christos
1546 1.1 christos operation_up result;
1547 1.1 christos switch (current_token)
1548 1.1 christos {
1549 1.1 christos case IDENT:
1550 1.1 christos case COMPLETE:
1551 1.1 christos {
1552 1.1 christos bool is_complete = current_token == COMPLETE;
1553 1.1 christos auto struct_op = new rust_structop (std::move (lhs), get_string ());
1554 1.1 christos lex ();
1555 1.1 christos if (is_complete)
1556 1.1 christos {
1557 1.1 christos completion_op.reset (struct_op);
1558 1.1 christos pstate->mark_struct_expression (struct_op);
1559 1.1 christos /* Throw to the outermost level of the parser. */
1560 1.1 christos error (_("not really an error"));
1561 1.1 christos }
1562 1.1 christos result.reset (struct_op);
1563 1.1 christos }
1564 1.1 christos break;
1565 1.1 christos
1566 1.1.1.2 christos case DECIMAL_INTEGER:
1567 1.1.1.2 christos {
1568 1.1.1.2 christos int idx = current_int_val.val.as_integer<int> ();
1569 1.1.1.2 christos result = make_operation<rust_struct_anon> (idx, std::move (lhs));
1570 1.1.1.2 christos lex ();
1571 1.1 christos }
1572 1.1 christos break;
1573 1.1 christos
1574 1.1 christos case INTEGER:
1575 1.1 christos error (_("'_' not allowed in integers in anonymous field references"));
1576 1.1 christos
1577 1.1 christos default:
1578 1.1 christos error (_("field name expected"));
1579 1.1 christos }
1580 1.1 christos
1581 1.1 christos return result;
1582 1.1 christos }
1583 1.1 christos
1584 1.1 christos /* Parse an index expression. */
1585 1.1 christos
1586 1.1 christos operation_up
1587 1.1 christos rust_parser::parse_index (operation_up &&lhs)
1588 1.1 christos {
1589 1.1 christos assume ('[');
1590 1.1 christos operation_up rhs = parse_expr ();
1591 1.1 christos require (']');
1592 1.1 christos
1593 1.1 christos return make_operation<rust_subscript_operation> (std::move (lhs),
1594 1.1 christos std::move (rhs));
1595 1.1 christos }
1596 1.1 christos
1597 1.1 christos /* Parse a sequence of comma-separated expressions in parens. */
1598 1.1 christos
1599 1.1 christos std::vector<operation_up>
1600 1.1 christos rust_parser::parse_paren_args ()
1601 1.1 christos {
1602 1.1 christos assume ('(');
1603 1.1 christos
1604 1.1 christos std::vector<operation_up> args;
1605 1.1 christos while (current_token != ')')
1606 1.1 christos {
1607 1.1 christos if (!args.empty ())
1608 1.1 christos {
1609 1.1 christos if (current_token != ',')
1610 1.1 christos error (_("',' or ')' expected"));
1611 1.1 christos lex ();
1612 1.1 christos }
1613 1.1 christos
1614 1.1 christos args.push_back (parse_expr ());
1615 1.1 christos }
1616 1.1 christos
1617 1.1 christos assume (')');
1618 1.1 christos
1619 1.1 christos return args;
1620 1.1 christos }
1621 1.1 christos
1622 1.1 christos /* Parse the parenthesized part of a function call. */
1623 1.1 christos
1624 1.1 christos operation_up
1625 1.1 christos rust_parser::parse_call (operation_up &&lhs)
1626 1.1 christos {
1627 1.1 christos std::vector<operation_up> args = parse_paren_args ();
1628 1.1 christos
1629 1.1 christos return make_operation<funcall_operation> (std::move (lhs),
1630 1.1 christos std::move (args));
1631 1.1 christos }
1632 1.1 christos
1633 1.1 christos /* Parse a list of types. */
1634 1.1 christos
1635 1.1 christos std::vector<struct type *>
1636 1.1 christos rust_parser::parse_type_list ()
1637 1.1 christos {
1638 1.1 christos std::vector<struct type *> result;
1639 1.1 christos result.push_back (parse_type ());
1640 1.1 christos while (current_token == ',')
1641 1.1 christos {
1642 1.1 christos lex ();
1643 1.1 christos result.push_back (parse_type ());
1644 1.1 christos }
1645 1.1 christos return result;
1646 1.1 christos }
1647 1.1 christos
1648 1.1 christos /* Parse a possibly-empty list of types, surrounded in parens. */
1649 1.1 christos
1650 1.1 christos std::vector<struct type *>
1651 1.1 christos rust_parser::parse_maybe_type_list ()
1652 1.1 christos {
1653 1.1 christos assume ('(');
1654 1.1 christos std::vector<struct type *> types;
1655 1.1 christos if (current_token != ')')
1656 1.1 christos types = parse_type_list ();
1657 1.1 christos require (')');
1658 1.1 christos return types;
1659 1.1 christos }
1660 1.1 christos
1661 1.1 christos /* Parse an array type. */
1662 1.1 christos
1663 1.1 christos struct type *
1664 1.1 christos rust_parser::parse_array_type ()
1665 1.1 christos {
1666 1.1 christos assume ('[');
1667 1.1 christos struct type *elt_type = parse_type ();
1668 1.1 christos require (';');
1669 1.1 christos
1670 1.1 christos if (current_token != INTEGER && current_token != DECIMAL_INTEGER)
1671 1.1.1.2 christos error (_("integer expected"));
1672 1.1 christos ULONGEST val = current_int_val.val.as_integer<ULONGEST> ();
1673 1.1 christos lex ();
1674 1.1 christos require (']');
1675 1.1 christos
1676 1.1 christos return lookup_array_range_type (elt_type, 0, val - 1);
1677 1.1 christos }
1678 1.1 christos
1679 1.1 christos /* Parse a slice type. */
1680 1.1 christos
1681 1.1 christos struct type *
1682 1.1 christos rust_parser::parse_slice_type ()
1683 1.1 christos {
1684 1.1 christos assume ('&');
1685 1.1.1.2 christos
1686 1.1.1.2 christos /* Handle &str specially. This is an important type in Rust. While
1687 1.1.1.2 christos the compiler does emit the "&str" type in the DWARF, just "str"
1688 1.1.1.2 christos itself isn't always available -- but it's handy if this works
1689 1.1.1.2 christos seamlessly. */
1690 1.1.1.2 christos if (current_token == IDENT && get_string () == "str")
1691 1.1.1.2 christos {
1692 1.1.1.2 christos lex ();
1693 1.1.1.2 christos return rust_slice_type ("&str", get_type ("u8"), get_type ("usize"));
1694 1.1.1.2 christos }
1695 1.1 christos
1696 1.1 christos bool is_slice = current_token == '[';
1697 1.1 christos if (is_slice)
1698 1.1 christos lex ();
1699 1.1 christos
1700 1.1 christos struct type *target = parse_type ();
1701 1.1 christos
1702 1.1 christos if (is_slice)
1703 1.1 christos {
1704 1.1 christos require (']');
1705 1.1 christos return rust_slice_type ("&[*gdb*]", target, get_type ("usize"));
1706 1.1 christos }
1707 1.1 christos
1708 1.1 christos /* For now we treat &x and *x identically. */
1709 1.1 christos return lookup_pointer_type (target);
1710 1.1 christos }
1711 1.1 christos
1712 1.1 christos /* Parse a pointer type. */
1713 1.1 christos
1714 1.1 christos struct type *
1715 1.1 christos rust_parser::parse_pointer_type ()
1716 1.1 christos {
1717 1.1 christos assume ('*');
1718 1.1 christos
1719 1.1 christos if (current_token == KW_MUT || current_token == KW_CONST)
1720 1.1 christos lex ();
1721 1.1 christos
1722 1.1 christos struct type *target = parse_type ();
1723 1.1 christos /* For the time being we ignore mut/const. */
1724 1.1 christos return lookup_pointer_type (target);
1725 1.1 christos }
1726 1.1 christos
1727 1.1 christos /* Parse a function type. */
1728 1.1 christos
1729 1.1 christos struct type *
1730 1.1 christos rust_parser::parse_function_type ()
1731 1.1 christos {
1732 1.1 christos assume (KW_FN);
1733 1.1 christos
1734 1.1 christos if (current_token != '(')
1735 1.1 christos error (_("'(' expected"));
1736 1.1 christos
1737 1.1 christos std::vector<struct type *> types = parse_maybe_type_list ();
1738 1.1 christos
1739 1.1 christos if (current_token != ARROW)
1740 1.1 christos error (_("'->' expected"));
1741 1.1 christos lex ();
1742 1.1 christos
1743 1.1 christos struct type *result_type = parse_type ();
1744 1.1 christos
1745 1.1 christos struct type **argtypes = nullptr;
1746 1.1 christos if (!types.empty ())
1747 1.1 christos argtypes = types.data ();
1748 1.1 christos
1749 1.1 christos result_type = lookup_function_type_with_arguments (result_type,
1750 1.1 christos types.size (),
1751 1.1 christos argtypes);
1752 1.1 christos return lookup_pointer_type (result_type);
1753 1.1 christos }
1754 1.1 christos
1755 1.1 christos /* Parse a tuple type. */
1756 1.1 christos
1757 1.1 christos struct type *
1758 1.1 christos rust_parser::parse_tuple_type ()
1759 1.1 christos {
1760 1.1 christos std::vector<struct type *> types = parse_maybe_type_list ();
1761 1.1 christos
1762 1.1 christos auto_obstack obstack;
1763 1.1 christos obstack_1grow (&obstack, '(');
1764 1.1 christos for (int i = 0; i < types.size (); ++i)
1765 1.1 christos {
1766 1.1 christos std::string type_name = type_to_string (types[i]);
1767 1.1 christos
1768 1.1 christos if (i > 0)
1769 1.1 christos obstack_1grow (&obstack, ',');
1770 1.1 christos obstack_grow_str (&obstack, type_name.c_str ());
1771 1.1 christos }
1772 1.1 christos
1773 1.1 christos obstack_grow_str0 (&obstack, ")");
1774 1.1 christos const char *name = (const char *) obstack_finish (&obstack);
1775 1.1 christos
1776 1.1 christos /* We don't allow creating new tuple types (yet), but we do allow
1777 1.1 christos looking up existing tuple types. */
1778 1.1 christos struct type *result = rust_lookup_type (name);
1779 1.1 christos if (result == nullptr)
1780 1.1 christos error (_("could not find tuple type '%s'"), name);
1781 1.1 christos
1782 1.1 christos return result;
1783 1.1 christos }
1784 1.1 christos
1785 1.1 christos /* Parse a type. */
1786 1.1 christos
1787 1.1 christos struct type *
1788 1.1 christos rust_parser::parse_type ()
1789 1.1 christos {
1790 1.1 christos switch (current_token)
1791 1.1 christos {
1792 1.1 christos case '[':
1793 1.1 christos return parse_array_type ();
1794 1.1 christos case '&':
1795 1.1 christos return parse_slice_type ();
1796 1.1 christos case '*':
1797 1.1 christos return parse_pointer_type ();
1798 1.1 christos case KW_FN:
1799 1.1 christos return parse_function_type ();
1800 1.1 christos case '(':
1801 1.1 christos return parse_tuple_type ();
1802 1.1 christos case KW_SELF:
1803 1.1 christos case KW_SUPER:
1804 1.1 christos case COLONCOLON:
1805 1.1 christos case KW_EXTERN:
1806 1.1 christos case IDENT:
1807 1.1 christos {
1808 1.1 christos std::string path = parse_path (false);
1809 1.1 christos struct type *result = rust_lookup_type (path.c_str ());
1810 1.1 christos if (result == nullptr)
1811 1.1 christos error (_("No type name '%s' in current context"), path.c_str ());
1812 1.1 christos return result;
1813 1.1 christos }
1814 1.1 christos default:
1815 1.1 christos error (_("type expected"));
1816 1.1 christos }
1817 1.1 christos }
1818 1.1 christos
1819 1.1 christos /* Parse a path. */
1820 1.1 christos
1821 1.1 christos std::string
1822 1.1 christos rust_parser::parse_path (bool for_expr)
1823 1.1 christos {
1824 1.1 christos unsigned n_supers = 0;
1825 1.1 christos int first_token = current_token;
1826 1.1 christos
1827 1.1 christos switch (current_token)
1828 1.1 christos {
1829 1.1 christos case KW_SELF:
1830 1.1 christos lex ();
1831 1.1 christos if (current_token != COLONCOLON)
1832 1.1 christos return "self";
1833 1.1.1.2 christos lex ();
1834 1.1 christos [[fallthrough]];
1835 1.1 christos case KW_SUPER:
1836 1.1 christos while (current_token == KW_SUPER)
1837 1.1 christos {
1838 1.1 christos ++n_supers;
1839 1.1 christos lex ();
1840 1.1 christos if (current_token != COLONCOLON)
1841 1.1 christos error (_("'::' expected"));
1842 1.1 christos lex ();
1843 1.1 christos }
1844 1.1 christos break;
1845 1.1 christos
1846 1.1 christos case COLONCOLON:
1847 1.1 christos lex ();
1848 1.1 christos break;
1849 1.1 christos
1850 1.1 christos case KW_EXTERN:
1851 1.1 christos /* This is a gdb extension to make it possible to refer to items
1852 1.1 christos in other crates. It just bypasses adding the current crate
1853 1.1 christos to the front of the name. */
1854 1.1 christos lex ();
1855 1.1 christos break;
1856 1.1 christos }
1857 1.1 christos
1858 1.1 christos if (current_token != IDENT)
1859 1.1 christos error (_("identifier expected"));
1860 1.1 christos std::string path = get_string ();
1861 1.1 christos bool saw_ident = true;
1862 1.1 christos lex ();
1863 1.1 christos
1864 1.1 christos /* The condition here lets us enter the loop even if we see
1865 1.1 christos "ident<...>". */
1866 1.1 christos while (current_token == COLONCOLON || current_token == '<')
1867 1.1 christos {
1868 1.1 christos if (current_token == COLONCOLON)
1869 1.1 christos {
1870 1.1 christos lex ();
1871 1.1 christos saw_ident = false;
1872 1.1 christos
1873 1.1 christos if (current_token == IDENT)
1874 1.1 christos {
1875 1.1 christos path = path + "::" + get_string ();
1876 1.1 christos lex ();
1877 1.1 christos saw_ident = true;
1878 1.1 christos }
1879 1.1 christos else if (current_token == COLONCOLON)
1880 1.1 christos {
1881 1.1 christos /* The code below won't detect this scenario. */
1882 1.1 christos error (_("unexpected '::'"));
1883 1.1 christos }
1884 1.1 christos }
1885 1.1 christos
1886 1.1 christos if (current_token != '<')
1887 1.1 christos continue;
1888 1.1 christos
1889 1.1 christos /* Expression use name::<...>, whereas types use name<...>. */
1890 1.1 christos if (for_expr)
1891 1.1 christos {
1892 1.1 christos /* Expressions use "name::<...>", so if we saw an identifier
1893 1.1 christos after the "::", we ignore the "<" here. */
1894 1.1 christos if (saw_ident)
1895 1.1 christos break;
1896 1.1 christos }
1897 1.1 christos else
1898 1.1 christos {
1899 1.1 christos /* Types use "name<...>", so we need to have seen the
1900 1.1 christos identifier. */
1901 1.1 christos if (!saw_ident)
1902 1.1 christos break;
1903 1.1 christos }
1904 1.1 christos
1905 1.1 christos lex ();
1906 1.1 christos std::vector<struct type *> types = parse_type_list ();
1907 1.1 christos if (current_token == '>')
1908 1.1 christos lex ();
1909 1.1 christos else if (current_token == RSH)
1910 1.1 christos {
1911 1.1 christos push_back ('>');
1912 1.1 christos lex ();
1913 1.1 christos }
1914 1.1 christos else
1915 1.1 christos error (_("'>' expected"));
1916 1.1 christos
1917 1.1 christos path += "<";
1918 1.1 christos for (int i = 0; i < types.size (); ++i)
1919 1.1 christos {
1920 1.1 christos if (i > 0)
1921 1.1 christos path += ",";
1922 1.1 christos path += type_to_string (types[i]);
1923 1.1 christos }
1924 1.1 christos path += ">";
1925 1.1 christos break;
1926 1.1 christos }
1927 1.1 christos
1928 1.1 christos switch (first_token)
1929 1.1 christos {
1930 1.1 christos case KW_SELF:
1931 1.1 christos case KW_SUPER:
1932 1.1 christos return super_name (path, n_supers);
1933 1.1 christos
1934 1.1 christos case COLONCOLON:
1935 1.1 christos return crate_name (path);
1936 1.1 christos
1937 1.1 christos case KW_EXTERN:
1938 1.1 christos return "::" + path;
1939 1.1 christos
1940 1.1 christos case IDENT:
1941 1.1 christos return path;
1942 1.1 christos
1943 1.1 christos default:
1944 1.1 christos gdb_assert_not_reached ("missing case in path parsing");
1945 1.1 christos }
1946 1.1 christos }
1947 1.1 christos
1948 1.1 christos /* Handle the parsing for a string expression. */
1949 1.1 christos
1950 1.1 christos operation_up
1951 1.1 christos rust_parser::parse_string ()
1952 1.1 christos {
1953 1.1 christos gdb_assert (current_token == STRING);
1954 1.1 christos
1955 1.1 christos /* Wrap the raw string in the &str struct. */
1956 1.1 christos struct type *type = rust_lookup_type ("&str");
1957 1.1 christos if (type == nullptr)
1958 1.1 christos error (_("Could not find type '&str'"));
1959 1.1 christos
1960 1.1 christos std::vector<std::pair<std::string, operation_up>> field_v;
1961 1.1 christos
1962 1.1 christos size_t len = current_string_val.length;
1963 1.1 christos operation_up str = make_operation<string_operation> (get_string ());
1964 1.1 christos operation_up addr
1965 1.1 christos = make_operation<rust_unop_addr_operation> (std::move (str));
1966 1.1 christos field_v.emplace_back ("data_ptr", std::move (addr));
1967 1.1 christos
1968 1.1 christos struct type *valtype = get_type ("usize");
1969 1.1 christos operation_up lenop = make_operation<long_const_operation> (valtype, len);
1970 1.1 christos field_v.emplace_back ("length", std::move (lenop));
1971 1.1 christos
1972 1.1 christos return make_operation<rust_aggregate_operation> (type,
1973 1.1 christos operation_up (),
1974 1.1 christos std::move (field_v));
1975 1.1 christos }
1976 1.1 christos
1977 1.1 christos /* Parse a tuple struct expression. */
1978 1.1 christos
1979 1.1 christos operation_up
1980 1.1 christos rust_parser::parse_tuple_struct (struct type *type)
1981 1.1 christos {
1982 1.1 christos std::vector<operation_up> args = parse_paren_args ();
1983 1.1 christos
1984 1.1 christos std::vector<std::pair<std::string, operation_up>> field_v (args.size ());
1985 1.1 christos for (int i = 0; i < args.size (); ++i)
1986 1.1 christos field_v[i] = { string_printf ("__%d", i), std::move (args[i]) };
1987 1.1 christos
1988 1.1 christos return (make_operation<rust_aggregate_operation>
1989 1.1 christos (type, operation_up (), std::move (field_v)));
1990 1.1 christos }
1991 1.1 christos
1992 1.1 christos /* Parse a path expression. */
1993 1.1 christos
1994 1.1 christos operation_up
1995 1.1 christos rust_parser::parse_path_expr ()
1996 1.1 christos {
1997 1.1 christos std::string path = parse_path (true);
1998 1.1 christos
1999 1.1 christos if (current_token == '{')
2000 1.1 christos {
2001 1.1 christos struct type *type = rust_lookup_type (path.c_str ());
2002 1.1 christos if (type == nullptr)
2003 1.1 christos error (_("Could not find type '%s'"), path.c_str ());
2004 1.1 christos
2005 1.1 christos return parse_struct_expr (type);
2006 1.1 christos }
2007 1.1 christos else if (current_token == '(')
2008 1.1 christos {
2009 1.1 christos struct type *type = rust_lookup_type (path.c_str ());
2010 1.1 christos /* If this is actually a tuple struct expression, handle it
2011 1.1 christos here. If it is a call, it will be handled elsewhere. */
2012 1.1 christos if (type != nullptr)
2013 1.1 christos {
2014 1.1 christos if (!rust_tuple_struct_type_p (type))
2015 1.1 christos error (_("Type %s is not a tuple struct"), path.c_str ());
2016 1.1 christos return parse_tuple_struct (type);
2017 1.1 christos }
2018 1.1 christos }
2019 1.1 christos
2020 1.1 christos return name_to_operation (path);
2021 1.1 christos }
2022 1.1 christos
2023 1.1 christos /* Parse an atom. "Atom" isn't a Rust term, but this refers to a
2024 1.1 christos single unitary item in the grammar; but here including some unary
2025 1.1 christos prefix and postfix expressions. */
2026 1.1 christos
2027 1.1 christos operation_up
2028 1.1 christos rust_parser::parse_atom (bool required)
2029 1.1 christos {
2030 1.1 christos operation_up result;
2031 1.1 christos
2032 1.1 christos switch (current_token)
2033 1.1 christos {
2034 1.1 christos case '(':
2035 1.1 christos result = parse_tuple ();
2036 1.1 christos break;
2037 1.1 christos
2038 1.1 christos case '[':
2039 1.1 christos result = parse_array ();
2040 1.1 christos break;
2041 1.1 christos
2042 1.1 christos case INTEGER:
2043 1.1 christos case DECIMAL_INTEGER:
2044 1.1 christos result = make_operation<long_const_operation> (current_int_val.type,
2045 1.1 christos current_int_val.val);
2046 1.1 christos lex ();
2047 1.1 christos break;
2048 1.1 christos
2049 1.1 christos case FLOAT:
2050 1.1 christos result = make_operation<float_const_operation> (current_float_val.type,
2051 1.1 christos current_float_val.val);
2052 1.1 christos lex ();
2053 1.1 christos break;
2054 1.1 christos
2055 1.1 christos case STRING:
2056 1.1 christos result = parse_string ();
2057 1.1 christos lex ();
2058 1.1 christos break;
2059 1.1 christos
2060 1.1 christos case BYTESTRING:
2061 1.1 christos result = make_operation<string_operation> (get_string ());
2062 1.1 christos lex ();
2063 1.1 christos break;
2064 1.1 christos
2065 1.1 christos case KW_TRUE:
2066 1.1 christos case KW_FALSE:
2067 1.1 christos result = make_operation<bool_operation> (current_token == KW_TRUE);
2068 1.1 christos lex ();
2069 1.1 christos break;
2070 1.1 christos
2071 1.1 christos case GDBVAR:
2072 1.1 christos /* This is kind of a hacky approach. */
2073 1.1 christos {
2074 1.1 christos pstate->push_dollar (current_string_val);
2075 1.1 christos result = pstate->pop ();
2076 1.1 christos lex ();
2077 1.1 christos }
2078 1.1 christos break;
2079 1.1 christos
2080 1.1 christos case KW_SELF:
2081 1.1 christos case KW_SUPER:
2082 1.1 christos case COLONCOLON:
2083 1.1 christos case KW_EXTERN:
2084 1.1 christos case IDENT:
2085 1.1 christos result = parse_path_expr ();
2086 1.1 christos break;
2087 1.1 christos
2088 1.1 christos case '*':
2089 1.1 christos lex ();
2090 1.1 christos result = make_operation<rust_unop_ind_operation> (parse_atom (true));
2091 1.1 christos break;
2092 1.1 christos case '+':
2093 1.1 christos lex ();
2094 1.1 christos result = make_operation<unary_plus_operation> (parse_atom (true));
2095 1.1 christos break;
2096 1.1 christos case '-':
2097 1.1 christos lex ();
2098 1.1 christos result = make_operation<unary_neg_operation> (parse_atom (true));
2099 1.1 christos break;
2100 1.1 christos case '!':
2101 1.1 christos lex ();
2102 1.1 christos result = make_operation<rust_unop_compl_operation> (parse_atom (true));
2103 1.1 christos break;
2104 1.1 christos case KW_SIZEOF:
2105 1.1 christos result = parse_sizeof ();
2106 1.1 christos break;
2107 1.1 christos case '&':
2108 1.1 christos result = parse_addr ();
2109 1.1 christos break;
2110 1.1 christos
2111 1.1 christos default:
2112 1.1 christos if (!required)
2113 1.1 christos return {};
2114 1.1 christos error (_("unexpected token"));
2115 1.1 christos }
2116 1.1 christos
2117 1.1 christos /* Now parse suffixes. */
2118 1.1 christos while (true)
2119 1.1 christos {
2120 1.1 christos switch (current_token)
2121 1.1 christos {
2122 1.1 christos case '.':
2123 1.1 christos result = parse_field (std::move (result));
2124 1.1 christos break;
2125 1.1 christos
2126 1.1 christos case '[':
2127 1.1 christos result = parse_index (std::move (result));
2128 1.1 christos break;
2129 1.1 christos
2130 1.1 christos case '(':
2131 1.1 christos result = parse_call (std::move (result));
2132 1.1 christos break;
2133 1.1 christos
2134 1.1 christos default:
2135 1.1 christos return result;
2136 1.1 christos }
2137 1.1 christos }
2138 1.1 christos }
2139 1.1 christos
2140 1.1 christos
2141 1.1 christos
2143 1.1 christos /* The parser as exposed to gdb. */
2144 1.1 christos
2145 1.1 christos int
2146 1.1 christos rust_language::parser (struct parser_state *state) const
2147 1.1 christos {
2148 1.1 christos rust_parser parser (state);
2149 1.1 christos
2150 1.1 christos operation_up result;
2151 1.1 christos try
2152 1.1 christos {
2153 1.1 christos result = parser.parse_entry_point ();
2154 1.1 christos }
2155 1.1 christos catch (const gdb_exception &exc)
2156 1.1 christos {
2157 1.1 christos if (state->parse_completion)
2158 1.1 christos {
2159 1.1 christos result = std::move (parser.completion_op);
2160 1.1 christos if (result == nullptr)
2161 1.1 christos throw;
2162 1.1 christos }
2163 1.1 christos else
2164 1.1 christos throw;
2165 1.1 christos }
2166 1.1 christos
2167 1.1 christos state->set_operation (std::move (result));
2168 1.1 christos
2169 1.1 christos return 0;
2170 1.1 christos }
2171 1.1 christos
2172 1.1 christos
2173 1.1 christos
2175 1.1 christos #if GDB_SELF_TEST
2176 1.1 christos
2177 1.1 christos /* A test helper that lexes a string, expecting a single token. */
2178 1.1 christos
2179 1.1 christos static void
2180 1.1 christos rust_lex_test_one (rust_parser *parser, const char *input, int expected)
2181 1.1 christos {
2182 1.1 christos int token;
2183 1.1 christos
2184 1.1 christos parser->reset (input);
2185 1.1 christos
2186 1.1 christos token = parser->lex_one_token ();
2187 1.1 christos SELF_CHECK (token == expected);
2188 1.1 christos
2189 1.1 christos if (token)
2190 1.1 christos {
2191 1.1 christos token = parser->lex_one_token ();
2192 1.1 christos SELF_CHECK (token == 0);
2193 1.1 christos }
2194 1.1 christos }
2195 1.1 christos
2196 1.1 christos /* Test that INPUT lexes as the integer VALUE. */
2197 1.1 christos
2198 1.1 christos static void
2199 1.1 christos rust_lex_int_test (rust_parser *parser, const char *input,
2200 1.1 christos ULONGEST value, int kind)
2201 1.1 christos {
2202 1.1 christos rust_lex_test_one (parser, input, kind);
2203 1.1 christos SELF_CHECK (parser->current_int_val.val == value);
2204 1.1 christos }
2205 1.1 christos
2206 1.1 christos /* Test that INPUT throws an exception with text ERR. */
2207 1.1 christos
2208 1.1 christos static void
2209 1.1 christos rust_lex_exception_test (rust_parser *parser, const char *input,
2210 1.1 christos const char *err)
2211 1.1 christos {
2212 1.1 christos try
2213 1.1 christos {
2214 1.1 christos /* The "kind" doesn't matter. */
2215 1.1 christos rust_lex_test_one (parser, input, DECIMAL_INTEGER);
2216 1.1 christos SELF_CHECK (0);
2217 1.1 christos }
2218 1.1 christos catch (const gdb_exception_error &except)
2219 1.1 christos {
2220 1.1 christos SELF_CHECK (strcmp (except.what (), err) == 0);
2221 1.1 christos }
2222 1.1 christos }
2223 1.1 christos
2224 1.1 christos /* Test that INPUT lexes as the identifier, string, or byte-string
2225 1.1 christos VALUE. KIND holds the expected token kind. */
2226 1.1 christos
2227 1.1 christos static void
2228 1.1 christos rust_lex_stringish_test (rust_parser *parser, const char *input,
2229 1.1 christos const char *value, int kind)
2230 1.1 christos {
2231 1.1 christos rust_lex_test_one (parser, input, kind);
2232 1.1 christos SELF_CHECK (parser->get_string () == value);
2233 1.1 christos }
2234 1.1 christos
2235 1.1 christos /* Helper to test that a string parses as a given token sequence. */
2236 1.1 christos
2237 1.1 christos static void
2238 1.1 christos rust_lex_test_sequence (rust_parser *parser, const char *input, int len,
2239 1.1 christos const int expected[])
2240 1.1 christos {
2241 1.1 christos int i;
2242 1.1 christos
2243 1.1 christos parser->reset (input);
2244 1.1 christos
2245 1.1 christos for (i = 0; i < len; ++i)
2246 1.1 christos {
2247 1.1 christos int token = parser->lex_one_token ();
2248 1.1 christos SELF_CHECK (token == expected[i]);
2249 1.1 christos }
2250 1.1 christos }
2251 1.1 christos
2252 1.1 christos /* Tests for an integer-parsing corner case. */
2253 1.1 christos
2254 1.1 christos static void
2255 1.1 christos rust_lex_test_trailing_dot (rust_parser *parser)
2256 1.1 christos {
2257 1.1 christos const int expected1[] = { DECIMAL_INTEGER, '.', IDENT, '(', ')', 0 };
2258 1.1 christos const int expected2[] = { INTEGER, '.', IDENT, '(', ')', 0 };
2259 1.1 christos const int expected3[] = { FLOAT, EQEQ, '(', ')', 0 };
2260 1.1 christos const int expected4[] = { DECIMAL_INTEGER, DOTDOT, DECIMAL_INTEGER, 0 };
2261 1.1 christos
2262 1.1 christos rust_lex_test_sequence (parser, "23.g()", ARRAY_SIZE (expected1), expected1);
2263 1.1 christos rust_lex_test_sequence (parser, "23_0.g()", ARRAY_SIZE (expected2),
2264 1.1 christos expected2);
2265 1.1 christos rust_lex_test_sequence (parser, "23.==()", ARRAY_SIZE (expected3),
2266 1.1 christos expected3);
2267 1.1 christos rust_lex_test_sequence (parser, "23..25", ARRAY_SIZE (expected4), expected4);
2268 1.1 christos }
2269 1.1 christos
2270 1.1 christos /* Tests of completion. */
2271 1.1 christos
2272 1.1 christos static void
2273 1.1 christos rust_lex_test_completion (rust_parser *parser)
2274 1.1 christos {
2275 1.1 christos const int expected[] = { IDENT, '.', COMPLETE, 0 };
2276 1.1 christos
2277 1.1 christos parser->pstate->parse_completion = 1;
2278 1.1 christos
2279 1.1 christos rust_lex_test_sequence (parser, "something.wha", ARRAY_SIZE (expected),
2280 1.1 christos expected);
2281 1.1 christos rust_lex_test_sequence (parser, "something.", ARRAY_SIZE (expected),
2282 1.1 christos expected);
2283 1.1 christos
2284 1.1 christos parser->pstate->parse_completion = 0;
2285 1.1 christos }
2286 1.1 christos
2287 1.1 christos /* Test pushback. */
2288 1.1 christos
2289 1.1 christos static void
2290 1.1 christos rust_lex_test_push_back (rust_parser *parser)
2291 1.1 christos {
2292 1.1 christos int token;
2293 1.1 christos
2294 1.1 christos parser->reset (">>=");
2295 1.1 christos
2296 1.1 christos token = parser->lex_one_token ();
2297 1.1 christos SELF_CHECK (token == COMPOUND_ASSIGN);
2298 1.1 christos SELF_CHECK (parser->current_opcode == BINOP_RSH);
2299 1.1 christos
2300 1.1 christos parser->push_back ('=');
2301 1.1 christos
2302 1.1 christos token = parser->lex_one_token ();
2303 1.1 christos SELF_CHECK (token == '=');
2304 1.1 christos
2305 1.1 christos token = parser->lex_one_token ();
2306 1.1 christos SELF_CHECK (token == 0);
2307 1.1 christos }
2308 1.1 christos
2309 1.1 christos /* Unit test the lexer. */
2310 1.1 christos
2311 1.1 christos static void
2312 1.1.1.2 christos rust_lex_tests (void)
2313 1.1.1.2 christos {
2314 1.1 christos /* Set up dummy "parser", so that rust_type works. */
2315 1.1 christos parser_state ps (language_def (language_rust), current_inferior ()->arch (),
2316 1.1 christos nullptr, 0, 0, nullptr, 0, nullptr);
2317 1.1 christos rust_parser parser (&ps);
2318 1.1 christos
2319 1.1 christos rust_lex_test_one (&parser, "", 0);
2320 1.1 christos rust_lex_test_one (&parser, " \t \n \r ", 0);
2321 1.1 christos rust_lex_test_one (&parser, "thread 23", 0);
2322 1.1 christos rust_lex_test_one (&parser, "task 23", 0);
2323 1.1 christos rust_lex_test_one (&parser, "th 104", 0);
2324 1.1 christos rust_lex_test_one (&parser, "ta 97", 0);
2325 1.1 christos
2326 1.1 christos rust_lex_int_test (&parser, "'z'", 'z', INTEGER);
2327 1.1 christos rust_lex_int_test (&parser, "'\\xff'", 0xff, INTEGER);
2328 1.1 christos rust_lex_int_test (&parser, "'\\u{1016f}'", 0x1016f, INTEGER);
2329 1.1 christos rust_lex_int_test (&parser, "b'z'", 'z', INTEGER);
2330 1.1 christos rust_lex_int_test (&parser, "b'\\xfe'", 0xfe, INTEGER);
2331 1.1 christos rust_lex_int_test (&parser, "b'\\xFE'", 0xfe, INTEGER);
2332 1.1 christos rust_lex_int_test (&parser, "b'\\xfE'", 0xfe, INTEGER);
2333 1.1 christos
2334 1.1 christos /* Test all escapes in both modes. */
2335 1.1 christos rust_lex_int_test (&parser, "'\\n'", '\n', INTEGER);
2336 1.1 christos rust_lex_int_test (&parser, "'\\r'", '\r', INTEGER);
2337 1.1 christos rust_lex_int_test (&parser, "'\\t'", '\t', INTEGER);
2338 1.1 christos rust_lex_int_test (&parser, "'\\\\'", '\\', INTEGER);
2339 1.1 christos rust_lex_int_test (&parser, "'\\0'", '\0', INTEGER);
2340 1.1 christos rust_lex_int_test (&parser, "'\\''", '\'', INTEGER);
2341 1.1 christos rust_lex_int_test (&parser, "'\\\"'", '"', INTEGER);
2342 1.1 christos
2343 1.1 christos rust_lex_int_test (&parser, "b'\\n'", '\n', INTEGER);
2344 1.1 christos rust_lex_int_test (&parser, "b'\\r'", '\r', INTEGER);
2345 1.1 christos rust_lex_int_test (&parser, "b'\\t'", '\t', INTEGER);
2346 1.1 christos rust_lex_int_test (&parser, "b'\\\\'", '\\', INTEGER);
2347 1.1 christos rust_lex_int_test (&parser, "b'\\0'", '\0', INTEGER);
2348 1.1 christos rust_lex_int_test (&parser, "b'\\''", '\'', INTEGER);
2349 1.1 christos rust_lex_int_test (&parser, "b'\\\"'", '"', INTEGER);
2350 1.1 christos
2351 1.1 christos rust_lex_exception_test (&parser, "'z", "Unterminated character literal");
2352 1.1 christos rust_lex_exception_test (&parser, "b'\\x0'", "Not enough hex digits seen");
2353 1.1 christos rust_lex_exception_test (&parser, "b'\\u{0}'",
2354 1.1 christos "Unicode escape in byte literal");
2355 1.1 christos rust_lex_exception_test (&parser, "'\\x0'", "Not enough hex digits seen");
2356 1.1 christos rust_lex_exception_test (&parser, "'\\u0'", "Missing '{' in Unicode escape");
2357 1.1 christos rust_lex_exception_test (&parser, "'\\u{0", "Missing '}' in Unicode escape");
2358 1.1 christos rust_lex_exception_test (&parser, "'\\u{0000007}", "Overlong hex escape");
2359 1.1 christos rust_lex_exception_test (&parser, "'\\u{}", "Not enough hex digits seen");
2360 1.1 christos rust_lex_exception_test (&parser, "'\\Q'", "Invalid escape \\Q in literal");
2361 1.1 christos rust_lex_exception_test (&parser, "b'\\Q'", "Invalid escape \\Q in literal");
2362 1.1 christos
2363 1.1 christos rust_lex_int_test (&parser, "23", 23, DECIMAL_INTEGER);
2364 1.1 christos rust_lex_int_test (&parser, "2_344__29", 234429, INTEGER);
2365 1.1 christos rust_lex_int_test (&parser, "0x1f", 0x1f, INTEGER);
2366 1.1 christos rust_lex_int_test (&parser, "23usize", 23, INTEGER);
2367 1.1 christos rust_lex_int_test (&parser, "23i32", 23, INTEGER);
2368 1.1 christos rust_lex_int_test (&parser, "0x1_f", 0x1f, INTEGER);
2369 1.1 christos rust_lex_int_test (&parser, "0b1_101011__", 0x6b, INTEGER);
2370 1.1 christos rust_lex_int_test (&parser, "0o001177i64", 639, INTEGER);
2371 1.1 christos rust_lex_int_test (&parser, "0x123456789u64", 0x123456789ull, INTEGER);
2372 1.1 christos
2373 1.1 christos rust_lex_test_trailing_dot (&parser);
2374 1.1 christos
2375 1.1 christos rust_lex_test_one (&parser, "23.", FLOAT);
2376 1.1 christos rust_lex_test_one (&parser, "23.99f32", FLOAT);
2377 1.1 christos rust_lex_test_one (&parser, "23e7", FLOAT);
2378 1.1 christos rust_lex_test_one (&parser, "23E-7", FLOAT);
2379 1.1 christos rust_lex_test_one (&parser, "23e+7", FLOAT);
2380 1.1 christos rust_lex_test_one (&parser, "23.99e+7f64", FLOAT);
2381 1.1 christos rust_lex_test_one (&parser, "23.82f32", FLOAT);
2382 1.1 christos
2383 1.1 christos rust_lex_stringish_test (&parser, "hibob", "hibob", IDENT);
2384 1.1 christos rust_lex_stringish_test (&parser, "hibob__93", "hibob__93", IDENT);
2385 1.1 christos rust_lex_stringish_test (&parser, "thread", "thread", IDENT);
2386 1.1 christos rust_lex_stringish_test (&parser, "r#true", "true", IDENT);
2387 1.1 christos
2388 1.1 christos const int expected1[] = { IDENT, DECIMAL_INTEGER, 0 };
2389 1.1 christos rust_lex_test_sequence (&parser, "r#thread 23", ARRAY_SIZE (expected1),
2390 1.1 christos expected1);
2391 1.1 christos const int expected2[] = { IDENT, '#', 0 };
2392 1.1 christos rust_lex_test_sequence (&parser, "r#", ARRAY_SIZE (expected2), expected2);
2393 1.1 christos
2394 1.1 christos rust_lex_stringish_test (&parser, "\"string\"", "string", STRING);
2395 1.1 christos rust_lex_stringish_test (&parser, "\"str\\ting\"", "str\ting", STRING);
2396 1.1 christos rust_lex_stringish_test (&parser, "\"str\\\"ing\"", "str\"ing", STRING);
2397 1.1 christos rust_lex_stringish_test (&parser, "r\"str\\ing\"", "str\\ing", STRING);
2398 1.1 christos rust_lex_stringish_test (&parser, "r#\"str\\ting\"#", "str\\ting", STRING);
2399 1.1 christos rust_lex_stringish_test (&parser, "r###\"str\\\"ing\"###", "str\\\"ing",
2400 1.1 christos STRING);
2401 1.1 christos
2402 1.1 christos rust_lex_stringish_test (&parser, "b\"string\"", "string", BYTESTRING);
2403 1.1 christos rust_lex_stringish_test (&parser, "b\"\x73tring\"", "string", BYTESTRING);
2404 1.1 christos rust_lex_stringish_test (&parser, "b\"str\\\"ing\"", "str\"ing", BYTESTRING);
2405 1.1 christos rust_lex_stringish_test (&parser, "br####\"\\x73tring\"####", "\\x73tring",
2406 1.1 christos BYTESTRING);
2407 1.1 christos
2408 1.1 christos for (const auto &candidate : identifier_tokens)
2409 1.1 christos rust_lex_test_one (&parser, candidate.name, candidate.value);
2410 1.1 christos
2411 1.1 christos for (const auto &candidate : operator_tokens)
2412 1.1 christos rust_lex_test_one (&parser, candidate.name, candidate.value);
2413 1.1 christos
2414 1.1 christos rust_lex_test_completion (&parser);
2415 1.1 christos rust_lex_test_push_back (&parser);
2416 1.1 christos }
2417 1.1 christos
2418 1.1 christos #endif /* GDB_SELF_TEST */
2419 1.1 christos
2420 1.1 christos
2421 1.1 christos
2423 1.1 christos void _initialize_rust_exp ();
2424 1.1 christos void
2425 1.1 christos _initialize_rust_exp ()
2426 1.1 christos {
2427 1.1 christos int code = regcomp (&number_regex, number_regex_text, REG_EXTENDED);
2428 1.1 christos /* If the regular expression was incorrect, it was a programming
2429 1.1 christos error. */
2430 1.1 christos gdb_assert (code == 0);
2431 1.1 christos
2432 #if GDB_SELF_TEST
2433 selftests::register_test ("rust-lex", rust_lex_tests);
2434 #endif
2435 }
2436