1 /* Copyright (C) 2023 Free Software Foundation, Inc. 2 3 This file is part of GDB. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "defs.h" 19 #include "gdbsupport/gdb_assert.h" 20 #include "gdbsupport/selftest.h" 21 #include "test-target.h" 22 #include "scoped-mock-context.h" 23 #include "break-cond-parse.h" 24 #include "tid-parse.h" 25 #include "ada-lang.h" 26 #include "exceptions.h" 27 28 /* When parsing tokens from a string, which direction are we parsing? 29 30 Given the following string and pointer 'ptr': 31 32 ABC DEF GHI JKL 33 ^ 34 ptr 35 36 Parsing 'forward' will return the token 'GHI' and update 'ptr' to point 37 between GHI and JKL. Parsing 'backward' will return the token 'DEF' and 38 update 'ptr' to point between ABC and DEF. 39 */ 40 41 enum class parse_direction 42 { 43 /* Parse the next token forwards. */ 44 forward, 45 46 /* Parse the previous token backwards. */ 47 backward 48 }; 49 50 /* Find the next token in DIRECTION from *CURR. */ 51 52 static std::string_view 53 find_next_token (const char **curr, parse_direction direction) 54 { 55 const char *tok_start, *tok_end; 56 57 gdb_assert (**curr != '\0'); 58 59 if (direction == parse_direction::forward) 60 { 61 *curr = skip_spaces (*curr); 62 tok_start = *curr; 63 *curr = skip_to_space (*curr); 64 tok_end = *curr - 1; 65 } 66 else 67 { 68 gdb_assert (direction == parse_direction::backward); 69 70 while (isspace (**curr)) 71 --(*curr); 72 73 tok_end = *curr; 74 75 while (!isspace (**curr)) 76 --(*curr); 77 78 tok_start = (*curr) + 1; 79 } 80 81 return std::string_view (tok_start, tok_end - tok_start + 1); 82 } 83 84 /* A class that represents a complete parsed token. Each token has a type 85 and a std::string_view into the original breakpoint condition string. */ 86 87 struct token 88 { 89 /* The types a token might take. */ 90 enum class type 91 { 92 /* These are the token types for the 'if', 'thread', 'inferior', and 93 'task' keywords. The m_content for these token types is the value 94 passed to the keyword, not the keyword itself. */ 95 CONDITION, 96 THREAD, 97 INFERIOR, 98 TASK, 99 100 /* This is the token used when we find unknown content, the m_content 101 for this token is the rest of the input string. */ 102 REST, 103 104 /* This is the token for the -force-condition token, the m_content for 105 this token contains the keyword itself. */ 106 FORCE 107 }; 108 109 token (enum type type, std::string_view content) 110 : m_type (type), 111 m_content (std::move (content)) 112 { 113 /* Nothing. */ 114 } 115 116 /* Return a string representing this token. Only used for debug. */ 117 std::string to_string () const 118 { 119 switch (m_type) 120 { 121 case type::CONDITION: 122 return string_printf ("{ CONDITION: \"%s\" }", 123 std::string (m_content).c_str ()); 124 case type::THREAD: 125 return string_printf ("{ THREAD: \"%s\" }", 126 std::string (m_content).c_str ()); 127 case type::INFERIOR: 128 return string_printf ("{ INFERIOR: \"%s\" }", 129 std::string (m_content).c_str ()); 130 case type::TASK: 131 return string_printf ("{ TASK: \"%s\" }", 132 std::string (m_content).c_str ()); 133 case type::REST: 134 return string_printf ("{ REST: \"%s\" }", 135 std::string (m_content).c_str ()); 136 case type::FORCE: 137 return string_printf ("{ FORCE }"); 138 default: 139 return "** unknown **"; 140 } 141 } 142 143 /* The type of this token. */ 144 const type &get_type () const 145 { 146 return m_type; 147 } 148 149 /* Return the value of this token. */ 150 const std::string_view &get_value () const 151 { 152 gdb_assert (m_content.size () > 0); 153 return m_content; 154 } 155 156 /* Extend this token with the contents of OTHER. This only makes sense 157 if OTHER is the next token after this one in the original string, 158 however, enforcing that restriction is left to the caller of this 159 function. 160 161 When OTHER is a keyword/value token, e.g. 'thread 1', the m_content 162 for OTHER will only point to the '1'. However, as the m_content is a 163 std::string_view, then when we merge the m_content of OTHER into this 164 token we automatically merge in the 'thread' part too, as it 165 naturally sits between this token and OTHER. */ 166 167 void 168 extend (const token &other) 169 { 170 m_content = std::string_view (this->m_content.data (), 171 (other.m_content.data () 172 - this->m_content.data () 173 + other.m_content.size ())); 174 } 175 176 private: 177 /* The type of this token. */ 178 type m_type; 179 180 /* The important content part of this token. The extend member function 181 depends on this being a std::string_view. */ 182 std::string_view m_content; 183 }; 184 185 /* Split STR, a breakpoint condition string, into a vector of tokens where 186 each token represents a component of the condition. Tokens are first 187 parsed from the front of STR until we encounter an 'if' token. At this 188 point tokens are parsed from the end of STR until we encounter an 189 unknown token, which we assume is the other end of the 'if' condition. 190 If when scanning forward we encounter an unknown token then the 191 remainder of STR is placed into a 'rest' token (the rest of the 192 string), and no backward scan is performed. */ 193 194 static std::vector<token> 195 parse_all_tokens (const char *str) 196 { 197 gdb_assert (str != nullptr); 198 199 std::vector<token> forward_results; 200 std::vector<token> backward_results; 201 202 const char *cond_start = nullptr; 203 const char *cond_end = nullptr; 204 parse_direction direction = parse_direction::forward; 205 std::vector<token> *curr_results = &forward_results; 206 while (*str != '\0') 207 { 208 /* Find the next token. If moving backward and this token starts at 209 the same location as the condition then we must have found the 210 other end of the condition string -- we're done. */ 211 std::string_view t = find_next_token (&str, direction); 212 if (direction == parse_direction::backward && t.data () <= cond_start) 213 { 214 cond_end = &t.back (); 215 break; 216 } 217 218 /* We only have a single flag option to check for. All the other 219 options take a value so require an additional token to be found. 220 Additionally, we require that this flag be at least '-f', we 221 don't allow it to be abbreviated to '-'. */ 222 if (t.length () > 1 && startswith ("-force-condition", t)) 223 { 224 curr_results->emplace_back (token::type::FORCE, t); 225 continue; 226 } 227 228 /* Maybe the first token was the last token in the string. If this 229 is the case then we definitely can't try to extract a value 230 token. This also means that the token T is meaningless. Reset 231 TOK to point at the start of the unknown content and break out of 232 the loop. We'll record the unknown part of the string outside of 233 the scanning loop (below). */ 234 if (direction == parse_direction::forward && *str == '\0') 235 { 236 str = t.data (); 237 break; 238 } 239 240 /* As before, find the next token and, if we are scanning backwards, 241 check that we have not reached the start of the condition string. */ 242 std::string_view v = find_next_token (&str, direction); 243 if (direction == parse_direction::backward && v.data () <= cond_start) 244 { 245 /* Use token T here as that must also be part of the condition 246 string. */ 247 cond_end = &t.back (); 248 break; 249 } 250 251 /* When moving backward we will first parse the value token then the 252 keyword token, so swap them now. */ 253 if (direction == parse_direction::backward) 254 std::swap (t, v); 255 256 /* Check for valid option in token T. If we find a valid option then 257 parse the value from the token V. Except for 'if', that's handled 258 differently. 259 260 For the 'if' token we need to capture the entire condition 261 string, so record the start of the condition string and then 262 start scanning backwards looking for the end of the condition 263 string. 264 265 The order of these checks is important, at least the check for 266 'thread' must occur before the check for 'task'. We accept 267 abbreviations of these token names, and 't' should resolve to 268 'thread', which will only happen if we check 'thread' first. */ 269 if (direction == parse_direction::forward && startswith ("if", t)) 270 { 271 cond_start = v.data (); 272 str = str + strlen (str); 273 gdb_assert (*str == '\0'); 274 --str; 275 direction = parse_direction::backward; 276 curr_results = &backward_results; 277 continue; 278 } 279 else if (startswith ("thread", t)) 280 curr_results->emplace_back (token::type::THREAD, v); 281 else if (startswith ("inferior", t)) 282 curr_results->emplace_back (token::type::INFERIOR, v); 283 else if (startswith ("task", t)) 284 curr_results->emplace_back (token::type::TASK, v); 285 else 286 { 287 /* An unknown token. If we are scanning forward then reset TOK 288 to point at the start of the unknown content, we record this 289 outside of the scanning loop (below). 290 291 If we are scanning backward then unknown content is assumed to 292 be the other end of the condition string, obviously, this is 293 just a heuristic, we could be looking at a mistyped command 294 line, but this will be spotted when the condition is 295 eventually evaluated. 296 297 Either way, no more scanning is required after this. */ 298 if (direction == parse_direction::forward) 299 str = t.data (); 300 else 301 { 302 gdb_assert (direction == parse_direction::backward); 303 cond_end = &v.back (); 304 } 305 break; 306 } 307 } 308 309 if (cond_start != nullptr) 310 { 311 /* If we found the start of a condition string then we should have 312 switched to backward scan mode, and found the end of the condition 313 string. Capture the whole condition string into COND_STRING 314 now. */ 315 gdb_assert (direction == parse_direction::backward); 316 gdb_assert (cond_end != nullptr); 317 318 std::string_view v (cond_start, cond_end - cond_start + 1); 319 320 forward_results.emplace_back (token::type::CONDITION, v); 321 } 322 else if (*str != '\0') 323 { 324 /* If we didn't have a condition start pointer then we should still 325 be in forward scanning mode. If we didn't reach the end of the 326 input string (TOK is not at the null character) then the rest of 327 the input string is garbage that we didn't understand. 328 329 Record the unknown content into REST. The caller of this function 330 will report this as an error later on. We could report the error 331 here, but we prefer to allow the caller to run other checks, and 332 prioritise other errors before reporting this problem. */ 333 gdb_assert (direction == parse_direction::forward); 334 gdb_assert (cond_end == nullptr); 335 336 std::string_view v (str, strlen (str)); 337 338 forward_results.emplace_back (token::type::REST, v); 339 } 340 341 /* If we have tokens in the BACKWARD_RESULTS vector then this means that 342 we found an 'if' condition (which will be the last thing in the 343 FORWARD_RESULTS vector), and then we started a backward scan. 344 345 The last tokens from the input string (those after the 'if' condition) 346 will be the first tokens added to the BACKWARD_RESULTS vector, so the 347 last items in the BACKWARD_RESULTS vector are those next to the 'if' 348 condition. 349 350 Check the tokens in the BACKWARD_RESULTS vector from back to front. 351 If the tokens look invalid then we assume that they are actually part 352 of the 'if' condition, and merge the token with the 'if' condition. 353 If it turns out that this was incorrect and that instead the user just 354 messed up entering the token value, then this will show as an error 355 when parsing the 'if' condition. 356 357 Doing this allows us to handle things like: 358 359 break function if ( variable == thread ) 360 361 Where 'thread' is a local variable within 'function'. When parsing 362 this we will initially see 'thread )' as a thread token with ')' as 363 the value. However, the following code will spot that ')' is not a 364 valid thread-id, and so we merge 'thread )' into the 'if' condition 365 string. 366 367 This code also handles the special treatment for '-force-condition', 368 which exists for backwards compatibility reasons. Traditionally this 369 flag, if it occurred immediately after the 'if' condition, would be 370 treated as part of the 'if' condition. When the breakpoint condition 371 parsing code was rewritten, this behavior was retained. */ 372 gdb_assert (backward_results.empty () 373 || (forward_results.back ().get_type () 374 == token::type::CONDITION)); 375 while (!backward_results.empty ()) 376 { 377 token &t = backward_results.back (); 378 379 if (t.get_type () == token::type::FORCE) 380 forward_results.back ().extend (std::move (t)); 381 else if (t.get_type () == token::type::THREAD) 382 { 383 const char *end; 384 std::string v (t.get_value ()); 385 if (is_thread_id (v.c_str (), &end) && *end == '\0') 386 break; 387 forward_results.back ().extend (std::move (t)); 388 } 389 else if (t.get_type () == token::type::INFERIOR 390 || t.get_type () == token::type::TASK) 391 { 392 /* Place the token's value into a null-terminated string, parse 393 the string as a number and check that the entire string was 394 parsed. If this is true then this looks like a valid inferior 395 or task number, otherwise, assume an invalid id, and merge 396 this token with the 'if' token. */ 397 char *end; 398 std::string v (t.get_value ()); 399 (void) strtol (v.c_str (), &end, 0); 400 if (end > v.c_str () && *end == '\0') 401 break; 402 forward_results.back ().extend (std::move (t)); 403 } 404 else 405 gdb_assert_not_reached ("unexpected token type"); 406 407 /* If we found an actual valid token above then we will have broken 408 out of the loop. We only get here if the token was merged with 409 the 'if' condition, in which case we can discard the last token 410 and then check the token before that. */ 411 backward_results.pop_back (); 412 } 413 414 /* If after the above checks we still have some tokens in the 415 BACKWARD_RESULTS vector, then these need to be appended to the 416 FORWARD_RESULTS vector. However, we first reverse the order so that 417 FORWARD_RESULTS retains the tokens in the order they appeared in the 418 input string. */ 419 if (!backward_results.empty ()) 420 forward_results.insert (forward_results.end (), 421 backward_results.rbegin (), 422 backward_results.rend ()); 423 424 return forward_results; 425 } 426 427 /* Called when the global debug_breakpoint is true. Prints VEC to the 428 debug output stream. */ 429 430 static void 431 dump_condition_tokens (const std::vector<token> &vec) 432 { 433 gdb_assert (debug_breakpoint); 434 435 bool first = true; 436 std::string str = "Tokens: "; 437 for (const token &t : vec) 438 { 439 if (!first) 440 str += " "; 441 first = false; 442 str += t.to_string (); 443 } 444 breakpoint_debug_printf ("%s", str.c_str ()); 445 } 446 447 /* See break-cond-parse.h. */ 448 449 void 450 create_breakpoint_parse_arg_string 451 (const char *str, gdb::unique_xmalloc_ptr<char> *cond_string_ptr, 452 int *thread_ptr, int *inferior_ptr, int *task_ptr, 453 gdb::unique_xmalloc_ptr<char> *rest_ptr, bool *force_ptr) 454 { 455 /* Set up the defaults. */ 456 cond_string_ptr->reset (); 457 rest_ptr->reset (); 458 *thread_ptr = -1; 459 *inferior_ptr = -1; 460 *task_ptr = -1; 461 *force_ptr = false; 462 463 if (str == nullptr) 464 return; 465 466 /* Split STR into a series of tokens. */ 467 std::vector<token> tokens = parse_all_tokens (str); 468 if (debug_breakpoint) 469 dump_condition_tokens (tokens); 470 471 /* Temporary variables. Initialised to the default state, then updated 472 as we parse TOKENS. If all of TOKENS is parsed successfully then the 473 state from these variables is copied into the output arguments before 474 the function returns. */ 475 int thread = -1, inferior = -1, task = -1; 476 bool force = false; 477 gdb::unique_xmalloc_ptr<char> cond_string, rest; 478 479 for (const token &t : tokens) 480 { 481 std::string tok_value (t.get_value ()); 482 switch (t.get_type ()) 483 { 484 case token::type::FORCE: 485 force = true; 486 break; 487 case token::type::THREAD: 488 { 489 if (thread != -1) 490 error ("You can specify only one thread."); 491 if (task != -1 || inferior != -1) 492 error ("You can specify only one of thread, inferior, or task."); 493 const char *tmptok; 494 thread_info *thr = parse_thread_id (tok_value.c_str (), &tmptok); 495 gdb_assert (*tmptok == '\0'); 496 thread = thr->global_num; 497 } 498 break; 499 case token::type::INFERIOR: 500 { 501 if (inferior != -1) 502 error ("You can specify only one inferior."); 503 if (task != -1 || thread != -1) 504 error ("You can specify only one of thread, inferior, or task."); 505 char *tmptok; 506 long inferior_id = strtol (tok_value.c_str (), &tmptok, 0); 507 if (*tmptok != '\0') 508 error (_("Junk '%s' after inferior keyword."), tmptok); 509 if (inferior_id > INT_MAX) 510 error (_("No inferior number '%ld'"), inferior_id); 511 inferior = static_cast<int> (inferior_id); 512 struct inferior *inf = find_inferior_id (inferior); 513 if (inf == nullptr) 514 error (_("No inferior number '%d'"), inferior); 515 } 516 break; 517 case token::type::TASK: 518 { 519 if (task != -1) 520 error ("You can specify only one task."); 521 if (inferior != -1 || thread != -1) 522 error ("You can specify only one of thread, inferior, or task."); 523 char *tmptok; 524 long task_id = strtol (tok_value.c_str (), &tmptok, 0); 525 if (*tmptok != '\0') 526 error (_("Junk '%s' after task keyword."), tmptok); 527 if (task_id > INT_MAX) 528 error (_("Unknown task %ld"), task_id); 529 task = static_cast<int> (task_id); 530 if (!valid_task_id (task)) 531 error (_("Unknown task %d."), task); 532 } 533 break; 534 case token::type::CONDITION: 535 cond_string.reset (savestring (t.get_value ().data (), 536 t.get_value ().size ())); 537 break; 538 case token::type::REST: 539 rest.reset (savestring (t.get_value ().data (), 540 t.get_value ().size ())); 541 break; 542 } 543 } 544 545 /* Move results into the output locations. */ 546 *force_ptr = force; 547 *thread_ptr = thread; 548 *inferior_ptr = inferior; 549 *task_ptr = task; 550 rest_ptr->reset (rest.release ()); 551 cond_string_ptr->reset (cond_string.release ()); 552 } 553 554 #if GDB_SELF_TEST 555 556 namespace selftests { 557 558 /* Run a single test of the create_breakpoint_parse_arg_string function. 559 INPUT is passed to create_breakpoint_parse_arg_string while all other 560 arguments are the expected output from 561 create_breakpoint_parse_arg_string. */ 562 563 static void 564 test (const char *input, const char *condition, int thread = -1, 565 int inferior = -1, int task = -1, bool force = false, 566 const char *rest = nullptr, const char *error_msg = nullptr) 567 { 568 gdb::unique_xmalloc_ptr<char> extracted_condition; 569 gdb::unique_xmalloc_ptr<char> extracted_rest; 570 int extracted_thread, extracted_inferior, extracted_task; 571 bool extracted_force_condition; 572 std::string exception_msg, error_str; 573 574 if (error_msg != nullptr) 575 error_str = std::string (error_msg) + "\n"; 576 577 try 578 { 579 create_breakpoint_parse_arg_string (input, &extracted_condition, 580 &extracted_thread, 581 &extracted_inferior, 582 &extracted_task, &extracted_rest, 583 &extracted_force_condition); 584 } 585 catch (const gdb_exception_error &ex) 586 { 587 string_file buf; 588 589 exception_print (&buf, ex); 590 exception_msg = buf.release (); 591 } 592 593 if ((condition == nullptr) != (extracted_condition.get () == nullptr) 594 || (condition != nullptr 595 && strcmp (condition, extracted_condition.get ()) != 0) 596 || (rest == nullptr) != (extracted_rest.get () == nullptr) 597 || (rest != nullptr && strcmp (rest, extracted_rest.get ()) != 0) 598 || thread != extracted_thread 599 || inferior != extracted_inferior 600 || task != extracted_task 601 || force != extracted_force_condition 602 || exception_msg != error_str) 603 { 604 if (run_verbose ()) 605 { 606 debug_printf ("input: '%s'\n", input); 607 debug_printf ("condition: '%s'\n", extracted_condition.get ()); 608 debug_printf ("rest: '%s'\n", extracted_rest.get ()); 609 debug_printf ("thread: %d\n", extracted_thread); 610 debug_printf ("inferior: %d\n", extracted_inferior); 611 debug_printf ("task: %d\n", extracted_task); 612 debug_printf ("forced: %s\n", 613 extracted_force_condition ? "true" : "false"); 614 debug_printf ("exception: '%s'\n", exception_msg.c_str ()); 615 } 616 617 /* Report the failure. */ 618 SELF_CHECK (false); 619 } 620 } 621 622 /* Wrapper for test function. Pass through the default values for all 623 parameters, except the last parameter, which indicates that we expect 624 INPUT to trigger an error. */ 625 626 static void 627 test_error (const char *input, const char *error_msg) 628 { 629 test (input, nullptr, -1, -1, -1, false, nullptr, error_msg); 630 } 631 632 /* Test the create_breakpoint_parse_arg_string function. Just wraps 633 multiple calls to the test function above. */ 634 635 static void 636 create_breakpoint_parse_arg_string_tests () 637 { 638 gdbarch *arch = current_inferior ()->arch (); 639 scoped_restore_current_pspace_and_thread restore; 640 scoped_mock_context<test_target_ops> mock_target (arch); 641 642 int global_thread_num = mock_target.mock_thread.global_num; 643 644 /* Test parsing valid breakpoint condition strings. */ 645 test (" if blah ", "blah"); 646 test (" if blah thread 1", "blah", global_thread_num); 647 test (" if blah inferior 1", "blah", -1, 1); 648 test (" if blah thread 1 ", "blah", global_thread_num); 649 test ("thread 1 woof", nullptr, global_thread_num, -1, -1, false, "woof"); 650 test ("thread 1 X", nullptr, global_thread_num, -1, -1, false, "X"); 651 test (" if blah thread 1 -force-condition", "blah", global_thread_num, 652 -1, -1, true); 653 test (" -force-condition if blah thread 1", "blah", global_thread_num, 654 -1, -1, true); 655 test (" -force-condition if blah thread 1 ", "blah", global_thread_num, 656 -1, -1, true); 657 test ("thread 1 -force-condition if blah", "blah", global_thread_num, 658 -1, -1, true); 659 test ("if (A::outer::func ())", "(A::outer::func ())"); 660 test ("if ( foo == thread )", "( foo == thread )"); 661 test ("if ( foo == thread ) inferior 1", "( foo == thread )", -1, 1); 662 test ("if ( foo == thread ) thread 1", "( foo == thread )", 663 global_thread_num); 664 test ("if foo == thread", "foo == thread"); 665 test ("if foo == thread 1", "foo ==", global_thread_num); 666 667 /* Test parsing some invalid breakpoint condition strings. */ 668 test_error ("thread 1 if foo == 123 thread 1", 669 "You can specify only one thread."); 670 test_error ("thread 1 if foo == 123 inferior 1", 671 "You can specify only one of thread, inferior, or task."); 672 test_error ("thread 1 if foo == 123 task 1", 673 "You can specify only one of thread, inferior, or task."); 674 test_error ("inferior 1 if foo == 123 inferior 1", 675 "You can specify only one inferior."); 676 test_error ("inferior 1 if foo == 123 thread 1", 677 "You can specify only one of thread, inferior, or task."); 678 test_error ("inferior 1 if foo == 123 task 1", 679 "You can specify only one of thread, inferior, or task."); 680 test_error ("thread 1.2.3", "Invalid thread ID: 1.2.3"); 681 test_error ("thread 1/2", "Invalid thread ID: 1/2"); 682 test_error ("thread 1xxx", "Invalid thread ID: 1xxx"); 683 test_error ("inferior 1xxx", "Junk 'xxx' after inferior keyword."); 684 test_error ("task 1xxx", "Junk 'xxx' after task keyword."); 685 } 686 687 } // namespace selftests 688 #endif /* GDB_SELF_TEST */ 689 690 void _initialize_break_cond_parse (); 691 void 692 _initialize_break_cond_parse () 693 { 694 #if GDB_SELF_TEST 695 selftests::register_test 696 ("create_breakpoint_parse_arg_string", 697 selftests::create_breakpoint_parse_arg_string_tests); 698 #endif 699 } 700