break-cond-parse.c revision 1.1 1 /* Copyright (C) 2023 Free Software Foundation, Inc.
2
3 This file is part of GDB.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "defs.h"
19 #include "gdbsupport/gdb_assert.h"
20 #include "gdbsupport/selftest.h"
21 #include "test-target.h"
22 #include "scoped-mock-context.h"
23 #include "break-cond-parse.h"
24 #include "tid-parse.h"
25 #include "ada-lang.h"
26 #include "exceptions.h"
27
28 /* When parsing tokens from a string, which direction are we parsing?
29
30 Given the following string and pointer 'ptr':
31
32 ABC DEF GHI JKL
33 ^
34 ptr
35
36 Parsing 'forward' will return the token 'GHI' and update 'ptr' to point
37 between GHI and JKL. Parsing 'backward' will return the token 'DEF' and
38 update 'ptr' to point between ABC and DEF.
39 */
40
41 enum class parse_direction
42 {
43 /* Parse the next token forwards. */
44 forward,
45
46 /* Parse the previous token backwards. */
47 backward
48 };
49
50 /* Find the next token in DIRECTION from *CURR. */
51
52 static std::string_view
53 find_next_token (const char **curr, parse_direction direction)
54 {
55 const char *tok_start, *tok_end;
56
57 gdb_assert (**curr != '\0');
58
59 if (direction == parse_direction::forward)
60 {
61 *curr = skip_spaces (*curr);
62 tok_start = *curr;
63 *curr = skip_to_space (*curr);
64 tok_end = *curr - 1;
65 }
66 else
67 {
68 gdb_assert (direction == parse_direction::backward);
69
70 while (isspace (**curr))
71 --(*curr);
72
73 tok_end = *curr;
74
75 while (!isspace (**curr))
76 --(*curr);
77
78 tok_start = (*curr) + 1;
79 }
80
81 return std::string_view (tok_start, tok_end - tok_start + 1);
82 }
83
84 /* A class that represents a complete parsed token. Each token has a type
85 and a std::string_view into the original breakpoint condition string. */
86
87 struct token
88 {
89 /* The types a token might take. */
90 enum class type
91 {
92 /* These are the token types for the 'if', 'thread', 'inferior', and
93 'task' keywords. The m_content for these token types is the value
94 passed to the keyword, not the keyword itself. */
95 CONDITION,
96 THREAD,
97 INFERIOR,
98 TASK,
99
100 /* This is the token used when we find unknown content, the m_content
101 for this token is the rest of the input string. */
102 REST,
103
104 /* This is the token for the -force-condition token, the m_content for
105 this token contains the keyword itself. */
106 FORCE
107 };
108
109 token (enum type type, std::string_view content)
110 : m_type (type),
111 m_content (std::move (content))
112 {
113 /* Nothing. */
114 }
115
116 /* Return a string representing this token. Only used for debug. */
117 std::string to_string () const
118 {
119 switch (m_type)
120 {
121 case type::CONDITION:
122 return string_printf ("{ CONDITION: \"%s\" }",
123 std::string (m_content).c_str ());
124 case type::THREAD:
125 return string_printf ("{ THREAD: \"%s\" }",
126 std::string (m_content).c_str ());
127 case type::INFERIOR:
128 return string_printf ("{ INFERIOR: \"%s\" }",
129 std::string (m_content).c_str ());
130 case type::TASK:
131 return string_printf ("{ TASK: \"%s\" }",
132 std::string (m_content).c_str ());
133 case type::REST:
134 return string_printf ("{ REST: \"%s\" }",
135 std::string (m_content).c_str ());
136 case type::FORCE:
137 return string_printf ("{ FORCE }");
138 default:
139 return "** unknown **";
140 }
141 }
142
143 /* The type of this token. */
144 const type &get_type () const
145 {
146 return m_type;
147 }
148
149 /* Return the value of this token. */
150 const std::string_view &get_value () const
151 {
152 gdb_assert (m_content.size () > 0);
153 return m_content;
154 }
155
156 /* Extend this token with the contents of OTHER. This only makes sense
157 if OTHER is the next token after this one in the original string,
158 however, enforcing that restriction is left to the caller of this
159 function.
160
161 When OTHER is a keyword/value token, e.g. 'thread 1', the m_content
162 for OTHER will only point to the '1'. However, as the m_content is a
163 std::string_view, then when we merge the m_content of OTHER into this
164 token we automatically merge in the 'thread' part too, as it
165 naturally sits between this token and OTHER. */
166
167 void
168 extend (const token &other)
169 {
170 m_content = std::string_view (this->m_content.data (),
171 (other.m_content.data ()
172 - this->m_content.data ()
173 + other.m_content.size ()));
174 }
175
176 private:
177 /* The type of this token. */
178 type m_type;
179
180 /* The important content part of this token. The extend member function
181 depends on this being a std::string_view. */
182 std::string_view m_content;
183 };
184
185 /* Split STR, a breakpoint condition string, into a vector of tokens where
186 each token represents a component of the condition. Tokens are first
187 parsed from the front of STR until we encounter an 'if' token. At this
188 point tokens are parsed from the end of STR until we encounter an
189 unknown token, which we assume is the other end of the 'if' condition.
190 If when scanning forward we encounter an unknown token then the
191 remainder of STR is placed into a 'rest' token (the rest of the
192 string), and no backward scan is performed. */
193
194 static std::vector<token>
195 parse_all_tokens (const char *str)
196 {
197 gdb_assert (str != nullptr);
198
199 std::vector<token> forward_results;
200 std::vector<token> backward_results;
201
202 const char *cond_start = nullptr;
203 const char *cond_end = nullptr;
204 parse_direction direction = parse_direction::forward;
205 std::vector<token> *curr_results = &forward_results;
206 while (*str != '\0')
207 {
208 /* Find the next token. If moving backward and this token starts at
209 the same location as the condition then we must have found the
210 other end of the condition string -- we're done. */
211 std::string_view t = find_next_token (&str, direction);
212 if (direction == parse_direction::backward && t.data () <= cond_start)
213 {
214 cond_end = &t.back ();
215 break;
216 }
217
218 /* We only have a single flag option to check for. All the other
219 options take a value so require an additional token to be found.
220 Additionally, we require that this flag be at least '-f', we
221 don't allow it to be abbreviated to '-'. */
222 if (t.length () > 1 && startswith ("-force-condition", t))
223 {
224 curr_results->emplace_back (token::type::FORCE, t);
225 continue;
226 }
227
228 /* Maybe the first token was the last token in the string. If this
229 is the case then we definitely can't try to extract a value
230 token. This also means that the token T is meaningless. Reset
231 TOK to point at the start of the unknown content and break out of
232 the loop. We'll record the unknown part of the string outside of
233 the scanning loop (below). */
234 if (direction == parse_direction::forward && *str == '\0')
235 {
236 str = t.data ();
237 break;
238 }
239
240 /* As before, find the next token and, if we are scanning backwards,
241 check that we have not reached the start of the condition string. */
242 std::string_view v = find_next_token (&str, direction);
243 if (direction == parse_direction::backward && v.data () <= cond_start)
244 {
245 /* Use token T here as that must also be part of the condition
246 string. */
247 cond_end = &t.back ();
248 break;
249 }
250
251 /* When moving backward we will first parse the value token then the
252 keyword token, so swap them now. */
253 if (direction == parse_direction::backward)
254 std::swap (t, v);
255
256 /* Check for valid option in token T. If we find a valid option then
257 parse the value from the token V. Except for 'if', that's handled
258 differently.
259
260 For the 'if' token we need to capture the entire condition
261 string, so record the start of the condition string and then
262 start scanning backwards looking for the end of the condition
263 string.
264
265 The order of these checks is important, at least the check for
266 'thread' must occur before the check for 'task'. We accept
267 abbreviations of these token names, and 't' should resolve to
268 'thread', which will only happen if we check 'thread' first. */
269 if (direction == parse_direction::forward && startswith ("if", t))
270 {
271 cond_start = v.data ();
272 str = str + strlen (str);
273 gdb_assert (*str == '\0');
274 --str;
275 direction = parse_direction::backward;
276 curr_results = &backward_results;
277 continue;
278 }
279 else if (startswith ("thread", t))
280 curr_results->emplace_back (token::type::THREAD, v);
281 else if (startswith ("inferior", t))
282 curr_results->emplace_back (token::type::INFERIOR, v);
283 else if (startswith ("task", t))
284 curr_results->emplace_back (token::type::TASK, v);
285 else
286 {
287 /* An unknown token. If we are scanning forward then reset TOK
288 to point at the start of the unknown content, we record this
289 outside of the scanning loop (below).
290
291 If we are scanning backward then unknown content is assumed to
292 be the other end of the condition string, obviously, this is
293 just a heuristic, we could be looking at a mistyped command
294 line, but this will be spotted when the condition is
295 eventually evaluated.
296
297 Either way, no more scanning is required after this. */
298 if (direction == parse_direction::forward)
299 str = t.data ();
300 else
301 {
302 gdb_assert (direction == parse_direction::backward);
303 cond_end = &v.back ();
304 }
305 break;
306 }
307 }
308
309 if (cond_start != nullptr)
310 {
311 /* If we found the start of a condition string then we should have
312 switched to backward scan mode, and found the end of the condition
313 string. Capture the whole condition string into COND_STRING
314 now. */
315 gdb_assert (direction == parse_direction::backward);
316 gdb_assert (cond_end != nullptr);
317
318 std::string_view v (cond_start, cond_end - cond_start + 1);
319
320 forward_results.emplace_back (token::type::CONDITION, v);
321 }
322 else if (*str != '\0')
323 {
324 /* If we didn't have a condition start pointer then we should still
325 be in forward scanning mode. If we didn't reach the end of the
326 input string (TOK is not at the null character) then the rest of
327 the input string is garbage that we didn't understand.
328
329 Record the unknown content into REST. The caller of this function
330 will report this as an error later on. We could report the error
331 here, but we prefer to allow the caller to run other checks, and
332 prioritise other errors before reporting this problem. */
333 gdb_assert (direction == parse_direction::forward);
334 gdb_assert (cond_end == nullptr);
335
336 std::string_view v (str, strlen (str));
337
338 forward_results.emplace_back (token::type::REST, v);
339 }
340
341 /* If we have tokens in the BACKWARD_RESULTS vector then this means that
342 we found an 'if' condition (which will be the last thing in the
343 FORWARD_RESULTS vector), and then we started a backward scan.
344
345 The last tokens from the input string (those after the 'if' condition)
346 will be the first tokens added to the BACKWARD_RESULTS vector, so the
347 last items in the BACKWARD_RESULTS vector are those next to the 'if'
348 condition.
349
350 Check the tokens in the BACKWARD_RESULTS vector from back to front.
351 If the tokens look invalid then we assume that they are actually part
352 of the 'if' condition, and merge the token with the 'if' condition.
353 If it turns out that this was incorrect and that instead the user just
354 messed up entering the token value, then this will show as an error
355 when parsing the 'if' condition.
356
357 Doing this allows us to handle things like:
358
359 break function if ( variable == thread )
360
361 Where 'thread' is a local variable within 'function'. When parsing
362 this we will initially see 'thread )' as a thread token with ')' as
363 the value. However, the following code will spot that ')' is not a
364 valid thread-id, and so we merge 'thread )' into the 'if' condition
365 string.
366
367 This code also handles the special treatment for '-force-condition',
368 which exists for backwards compatibility reasons. Traditionally this
369 flag, if it occurred immediately after the 'if' condition, would be
370 treated as part of the 'if' condition. When the breakpoint condition
371 parsing code was rewritten, this behavior was retained. */
372 gdb_assert (backward_results.empty ()
373 || (forward_results.back ().get_type ()
374 == token::type::CONDITION));
375 while (!backward_results.empty ())
376 {
377 token &t = backward_results.back ();
378
379 if (t.get_type () == token::type::FORCE)
380 forward_results.back ().extend (std::move (t));
381 else if (t.get_type () == token::type::THREAD)
382 {
383 const char *end;
384 std::string v (t.get_value ());
385 if (is_thread_id (v.c_str (), &end) && *end == '\0')
386 break;
387 forward_results.back ().extend (std::move (t));
388 }
389 else if (t.get_type () == token::type::INFERIOR
390 || t.get_type () == token::type::TASK)
391 {
392 /* Place the token's value into a null-terminated string, parse
393 the string as a number and check that the entire string was
394 parsed. If this is true then this looks like a valid inferior
395 or task number, otherwise, assume an invalid id, and merge
396 this token with the 'if' token. */
397 char *end;
398 std::string v (t.get_value ());
399 (void) strtol (v.c_str (), &end, 0);
400 if (end > v.c_str () && *end == '\0')
401 break;
402 forward_results.back ().extend (std::move (t));
403 }
404 else
405 gdb_assert_not_reached ("unexpected token type");
406
407 /* If we found an actual valid token above then we will have broken
408 out of the loop. We only get here if the token was merged with
409 the 'if' condition, in which case we can discard the last token
410 and then check the token before that. */
411 backward_results.pop_back ();
412 }
413
414 /* If after the above checks we still have some tokens in the
415 BACKWARD_RESULTS vector, then these need to be appended to the
416 FORWARD_RESULTS vector. However, we first reverse the order so that
417 FORWARD_RESULTS retains the tokens in the order they appeared in the
418 input string. */
419 if (!backward_results.empty ())
420 forward_results.insert (forward_results.end (),
421 backward_results.rbegin (),
422 backward_results.rend ());
423
424 return forward_results;
425 }
426
427 /* Called when the global debug_breakpoint is true. Prints VEC to the
428 debug output stream. */
429
430 static void
431 dump_condition_tokens (const std::vector<token> &vec)
432 {
433 gdb_assert (debug_breakpoint);
434
435 bool first = true;
436 std::string str = "Tokens: ";
437 for (const token &t : vec)
438 {
439 if (!first)
440 str += " ";
441 first = false;
442 str += t.to_string ();
443 }
444 breakpoint_debug_printf ("%s", str.c_str ());
445 }
446
447 /* See break-cond-parse.h. */
448
449 void
450 create_breakpoint_parse_arg_string
451 (const char *str, gdb::unique_xmalloc_ptr<char> *cond_string_ptr,
452 int *thread_ptr, int *inferior_ptr, int *task_ptr,
453 gdb::unique_xmalloc_ptr<char> *rest_ptr, bool *force_ptr)
454 {
455 /* Set up the defaults. */
456 cond_string_ptr->reset ();
457 rest_ptr->reset ();
458 *thread_ptr = -1;
459 *inferior_ptr = -1;
460 *task_ptr = -1;
461 *force_ptr = false;
462
463 if (str == nullptr)
464 return;
465
466 /* Split STR into a series of tokens. */
467 std::vector<token> tokens = parse_all_tokens (str);
468 if (debug_breakpoint)
469 dump_condition_tokens (tokens);
470
471 /* Temporary variables. Initialised to the default state, then updated
472 as we parse TOKENS. If all of TOKENS is parsed successfully then the
473 state from these variables is copied into the output arguments before
474 the function returns. */
475 int thread = -1, inferior = -1, task = -1;
476 bool force = false;
477 gdb::unique_xmalloc_ptr<char> cond_string, rest;
478
479 for (const token &t : tokens)
480 {
481 std::string tok_value (t.get_value ());
482 switch (t.get_type ())
483 {
484 case token::type::FORCE:
485 force = true;
486 break;
487 case token::type::THREAD:
488 {
489 if (thread != -1)
490 error ("You can specify only one thread.");
491 if (task != -1 || inferior != -1)
492 error ("You can specify only one of thread, inferior, or task.");
493 const char *tmptok;
494 thread_info *thr = parse_thread_id (tok_value.c_str (), &tmptok);
495 gdb_assert (*tmptok == '\0');
496 thread = thr->global_num;
497 }
498 break;
499 case token::type::INFERIOR:
500 {
501 if (inferior != -1)
502 error ("You can specify only one inferior.");
503 if (task != -1 || thread != -1)
504 error ("You can specify only one of thread, inferior, or task.");
505 char *tmptok;
506 long inferior_id = strtol (tok_value.c_str (), &tmptok, 0);
507 if (*tmptok != '\0')
508 error (_("Junk '%s' after inferior keyword."), tmptok);
509 if (inferior_id > INT_MAX)
510 error (_("No inferior number '%ld'"), inferior_id);
511 inferior = static_cast<int> (inferior_id);
512 struct inferior *inf = find_inferior_id (inferior);
513 if (inf == nullptr)
514 error (_("No inferior number '%d'"), inferior);
515 }
516 break;
517 case token::type::TASK:
518 {
519 if (task != -1)
520 error ("You can specify only one task.");
521 if (inferior != -1 || thread != -1)
522 error ("You can specify only one of thread, inferior, or task.");
523 char *tmptok;
524 long task_id = strtol (tok_value.c_str (), &tmptok, 0);
525 if (*tmptok != '\0')
526 error (_("Junk '%s' after task keyword."), tmptok);
527 if (task_id > INT_MAX)
528 error (_("Unknown task %ld"), task_id);
529 task = static_cast<int> (task_id);
530 if (!valid_task_id (task))
531 error (_("Unknown task %d."), task);
532 }
533 break;
534 case token::type::CONDITION:
535 cond_string.reset (savestring (t.get_value ().data (),
536 t.get_value ().size ()));
537 break;
538 case token::type::REST:
539 rest.reset (savestring (t.get_value ().data (),
540 t.get_value ().size ()));
541 break;
542 }
543 }
544
545 /* Move results into the output locations. */
546 *force_ptr = force;
547 *thread_ptr = thread;
548 *inferior_ptr = inferior;
549 *task_ptr = task;
550 rest_ptr->reset (rest.release ());
551 cond_string_ptr->reset (cond_string.release ());
552 }
553
554 #if GDB_SELF_TEST
555
556 namespace selftests {
557
558 /* Run a single test of the create_breakpoint_parse_arg_string function.
559 INPUT is passed to create_breakpoint_parse_arg_string while all other
560 arguments are the expected output from
561 create_breakpoint_parse_arg_string. */
562
563 static void
564 test (const char *input, const char *condition, int thread = -1,
565 int inferior = -1, int task = -1, bool force = false,
566 const char *rest = nullptr, const char *error_msg = nullptr)
567 {
568 gdb::unique_xmalloc_ptr<char> extracted_condition;
569 gdb::unique_xmalloc_ptr<char> extracted_rest;
570 int extracted_thread, extracted_inferior, extracted_task;
571 bool extracted_force_condition;
572 std::string exception_msg, error_str;
573
574 if (error_msg != nullptr)
575 error_str = std::string (error_msg) + "\n";
576
577 try
578 {
579 create_breakpoint_parse_arg_string (input, &extracted_condition,
580 &extracted_thread,
581 &extracted_inferior,
582 &extracted_task, &extracted_rest,
583 &extracted_force_condition);
584 }
585 catch (const gdb_exception_error &ex)
586 {
587 string_file buf;
588
589 exception_print (&buf, ex);
590 exception_msg = buf.release ();
591 }
592
593 if ((condition == nullptr) != (extracted_condition.get () == nullptr)
594 || (condition != nullptr
595 && strcmp (condition, extracted_condition.get ()) != 0)
596 || (rest == nullptr) != (extracted_rest.get () == nullptr)
597 || (rest != nullptr && strcmp (rest, extracted_rest.get ()) != 0)
598 || thread != extracted_thread
599 || inferior != extracted_inferior
600 || task != extracted_task
601 || force != extracted_force_condition
602 || exception_msg != error_str)
603 {
604 if (run_verbose ())
605 {
606 debug_printf ("input: '%s'\n", input);
607 debug_printf ("condition: '%s'\n", extracted_condition.get ());
608 debug_printf ("rest: '%s'\n", extracted_rest.get ());
609 debug_printf ("thread: %d\n", extracted_thread);
610 debug_printf ("inferior: %d\n", extracted_inferior);
611 debug_printf ("task: %d\n", extracted_task);
612 debug_printf ("forced: %s\n",
613 extracted_force_condition ? "true" : "false");
614 debug_printf ("exception: '%s'\n", exception_msg.c_str ());
615 }
616
617 /* Report the failure. */
618 SELF_CHECK (false);
619 }
620 }
621
622 /* Wrapper for test function. Pass through the default values for all
623 parameters, except the last parameter, which indicates that we expect
624 INPUT to trigger an error. */
625
626 static void
627 test_error (const char *input, const char *error_msg)
628 {
629 test (input, nullptr, -1, -1, -1, false, nullptr, error_msg);
630 }
631
632 /* Test the create_breakpoint_parse_arg_string function. Just wraps
633 multiple calls to the test function above. */
634
635 static void
636 create_breakpoint_parse_arg_string_tests ()
637 {
638 gdbarch *arch = current_inferior ()->arch ();
639 scoped_restore_current_pspace_and_thread restore;
640 scoped_mock_context<test_target_ops> mock_target (arch);
641
642 int global_thread_num = mock_target.mock_thread.global_num;
643
644 /* Test parsing valid breakpoint condition strings. */
645 test (" if blah ", "blah");
646 test (" if blah thread 1", "blah", global_thread_num);
647 test (" if blah inferior 1", "blah", -1, 1);
648 test (" if blah thread 1 ", "blah", global_thread_num);
649 test ("thread 1 woof", nullptr, global_thread_num, -1, -1, false, "woof");
650 test ("thread 1 X", nullptr, global_thread_num, -1, -1, false, "X");
651 test (" if blah thread 1 -force-condition", "blah", global_thread_num,
652 -1, -1, true);
653 test (" -force-condition if blah thread 1", "blah", global_thread_num,
654 -1, -1, true);
655 test (" -force-condition if blah thread 1 ", "blah", global_thread_num,
656 -1, -1, true);
657 test ("thread 1 -force-condition if blah", "blah", global_thread_num,
658 -1, -1, true);
659 test ("if (A::outer::func ())", "(A::outer::func ())");
660 test ("if ( foo == thread )", "( foo == thread )");
661 test ("if ( foo == thread ) inferior 1", "( foo == thread )", -1, 1);
662 test ("if ( foo == thread ) thread 1", "( foo == thread )",
663 global_thread_num);
664 test ("if foo == thread", "foo == thread");
665 test ("if foo == thread 1", "foo ==", global_thread_num);
666
667 /* Test parsing some invalid breakpoint condition strings. */
668 test_error ("thread 1 if foo == 123 thread 1",
669 "You can specify only one thread.");
670 test_error ("thread 1 if foo == 123 inferior 1",
671 "You can specify only one of thread, inferior, or task.");
672 test_error ("thread 1 if foo == 123 task 1",
673 "You can specify only one of thread, inferior, or task.");
674 test_error ("inferior 1 if foo == 123 inferior 1",
675 "You can specify only one inferior.");
676 test_error ("inferior 1 if foo == 123 thread 1",
677 "You can specify only one of thread, inferior, or task.");
678 test_error ("inferior 1 if foo == 123 task 1",
679 "You can specify only one of thread, inferior, or task.");
680 test_error ("thread 1.2.3", "Invalid thread ID: 1.2.3");
681 test_error ("thread 1/2", "Invalid thread ID: 1/2");
682 test_error ("thread 1xxx", "Invalid thread ID: 1xxx");
683 test_error ("inferior 1xxx", "Junk 'xxx' after inferior keyword.");
684 test_error ("task 1xxx", "Junk 'xxx' after task keyword.");
685 }
686
687 } // namespace selftests
688 #endif /* GDB_SELF_TEST */
689
690 void _initialize_break_cond_parse ();
691 void
692 _initialize_break_cond_parse ()
693 {
694 #if GDB_SELF_TEST
695 selftests::register_test
696 ("create_breakpoint_parse_arg_string",
697 selftests::create_breakpoint_parse_arg_string_tests);
698 #endif
699 }
700