Home | History | Annotate | Line # | Download | only in gdbsupport
      1 /* Parse a printf-style format string.
      2 
      3    Copyright (C) 1986-2024 Free Software Foundation, Inc.
      4 
      5    This file is part of GDB.
      6 
      7    This program is free software; you can redistribute it and/or modify
      8    it under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3 of the License, or
     10    (at your option) any later version.
     11 
     12    This program is distributed in the hope that it will be useful,
     13    but WITHOUT ANY WARRANTY; without even the implied warranty of
     14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15    GNU General Public License for more details.
     16 
     17    You should have received a copy of the GNU General Public License
     18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
     19 
     20 #include "format.h"
     21 
     22 format_pieces::format_pieces (const char **arg, bool gdb_extensions,
     23 			      bool value_extension)
     24 {
     25   const char *s;
     26   const char *string;
     27   const char *prev_start;
     28   const char *percent_loc;
     29   char *sub_start, *current_substring;
     30   enum argclass this_argclass;
     31 
     32   s = *arg;
     33 
     34   if (gdb_extensions)
     35     {
     36       string = *arg;
     37       *arg += strlen (*arg);
     38     }
     39   else
     40     {
     41       /* Parse the format-control string and copy it into the string STRING,
     42 	 processing some kinds of escape sequence.  */
     43 
     44       char *f = (char *) alloca (strlen (s) + 1);
     45       string = f;
     46 
     47       while (*s != '"' && *s != '\0')
     48 	{
     49 	  int c = *s++;
     50 	  switch (c)
     51 	    {
     52 	    case '\0':
     53 	      continue;
     54 
     55 	    case '\\':
     56 	      switch (c = *s++)
     57 		{
     58 		case '\\':
     59 		  *f++ = '\\';
     60 		  break;
     61 		case 'a':
     62 		  *f++ = '\a';
     63 		  break;
     64 		case 'b':
     65 		  *f++ = '\b';
     66 		  break;
     67 		case 'e':
     68 		  *f++ = '\e';
     69 		  break;
     70 		case 'f':
     71 		  *f++ = '\f';
     72 		  break;
     73 		case 'n':
     74 		  *f++ = '\n';
     75 		  break;
     76 		case 'r':
     77 		  *f++ = '\r';
     78 		  break;
     79 		case 't':
     80 		  *f++ = '\t';
     81 		  break;
     82 		case 'v':
     83 		  *f++ = '\v';
     84 		  break;
     85 		case '"':
     86 		  *f++ = '"';
     87 		  break;
     88 		default:
     89 		  /* ??? TODO: handle other escape sequences.  */
     90 		  error (_("Unrecognized escape character \\%c in format string."),
     91 			 c);
     92 		}
     93 	      break;
     94 
     95 	    default:
     96 	      *f++ = c;
     97 	    }
     98 	}
     99 
    100       /* Terminate our escape-processed copy.  */
    101       *f++ = '\0';
    102 
    103       /* Whether the format string ended with double-quote or zero, we're
    104 	 done with it; it's up to callers to complain about syntax.  */
    105       *arg = s;
    106     }
    107 
    108   /* Need extra space for the '\0's.  Doubling the size is sufficient.  */
    109 
    110   current_substring = (char *) xmalloc (strlen (string) * 2 + 1000);
    111   m_storage.reset (current_substring);
    112 
    113   /* Now scan the string for %-specs and see what kinds of args they want.
    114      argclass classifies the %-specs so we can give printf-type functions
    115      something of the right size.  */
    116 
    117   const char *f = string;
    118   prev_start = string;
    119   while (*f)
    120     if (*f++ == '%')
    121       {
    122 	int seen_hash = 0, seen_zero = 0, lcount = 0, seen_prec = 0;
    123 	int seen_space = 0, seen_plus = 0;
    124 	int seen_big_l = 0, seen_h = 0, seen_big_h = 0;
    125 	int seen_big_d = 0, seen_double_big_d = 0;
    126 	int seen_size_t = 0;
    127 	int bad = 0;
    128 	int n_int_args = 0;
    129 	bool seen_i64 = false;
    130 
    131 	/* Skip over "%%", it will become part of a literal piece.  */
    132 	if (*f == '%')
    133 	  {
    134 	    f++;
    135 	    continue;
    136 	  }
    137 
    138 	sub_start = current_substring;
    139 
    140 	strncpy (current_substring, prev_start, f - 1 - prev_start);
    141 	current_substring += f - 1 - prev_start;
    142 	*current_substring++ = '\0';
    143 
    144 	if (*sub_start != '\0')
    145 	  m_pieces.emplace_back (sub_start, literal_piece, 0);
    146 
    147 	percent_loc = f - 1;
    148 
    149 	/* Check the validity of the format specifier, and work
    150 	   out what argument it expects.  We only accept C89
    151 	   format strings, with the exception of long long (which
    152 	   we autoconf for).  */
    153 
    154 	/* The first part of a format specifier is a set of flag
    155 	   characters.  */
    156 	while (*f != '\0' && strchr ("0-+ #", *f))
    157 	  {
    158 	    if (*f == '#')
    159 	      seen_hash = 1;
    160 	    else if (*f == '0')
    161 	      seen_zero = 1;
    162 	    else if (*f == ' ')
    163 	      seen_space = 1;
    164 	    else if (*f == '+')
    165 	      seen_plus = 1;
    166 	    f++;
    167 	  }
    168 
    169 	/* The next part of a format specifier is a width.  */
    170 	if (gdb_extensions && *f == '*')
    171 	  {
    172 	    ++f;
    173 	    ++n_int_args;
    174 	  }
    175 	else
    176 	  {
    177 	    while (*f != '\0' && strchr ("0123456789", *f))
    178 	      f++;
    179 	  }
    180 
    181 	/* The next part of a format specifier is a precision.  */
    182 	if (*f == '.')
    183 	  {
    184 	    seen_prec = 1;
    185 	    f++;
    186 	    if (gdb_extensions && *f == '*')
    187 	      {
    188 		++f;
    189 		++n_int_args;
    190 	      }
    191 	    else
    192 	      {
    193 		while (*f != '\0' && strchr ("0123456789", *f))
    194 		  f++;
    195 	      }
    196 	  }
    197 
    198 	/* The next part of a format specifier is a length modifier.  */
    199 	switch (*f)
    200 	  {
    201 	  case 'h':
    202 	    seen_h = 1;
    203 	    f++;
    204 	    break;
    205 	  case 'l':
    206 	    f++;
    207 	    lcount++;
    208 	    if (*f == 'l')
    209 	      {
    210 		f++;
    211 		lcount++;
    212 	      }
    213 	    break;
    214 	  case 'L':
    215 	    seen_big_l = 1;
    216 	    f++;
    217 	    break;
    218 	  case 'H':
    219 	    /* Decimal32 modifier.  */
    220 	    seen_big_h = 1;
    221 	    f++;
    222 	    break;
    223 	  case 'D':
    224 	    /* Decimal64 and Decimal128 modifiers.  */
    225 	    f++;
    226 
    227 	    /* Check for a Decimal128.  */
    228 	    if (*f == 'D')
    229 	      {
    230 		f++;
    231 		seen_double_big_d = 1;
    232 	      }
    233 	    else
    234 	      seen_big_d = 1;
    235 	    break;
    236 	  case 'z':
    237 	    /* For size_t or ssize_t.  */
    238 	    seen_size_t = 1;
    239 	    f++;
    240 	    break;
    241 	  case 'I':
    242 	    /* Support the Windows '%I64' extension, because an
    243 	       earlier call to format_pieces might have converted %lld
    244 	       to %I64d.  */
    245 	    if (f[1] == '6' && f[2] == '4')
    246 	      {
    247 		f += 3;
    248 		lcount = 2;
    249 		seen_i64 = true;
    250 	      }
    251 	    break;
    252 	}
    253 
    254 	switch (*f)
    255 	  {
    256 	  case 'u':
    257 	    if (seen_hash)
    258 	      bad = 1;
    259 	    [[fallthrough]];
    260 
    261 	  case 'o':
    262 	  case 'x':
    263 	  case 'X':
    264 	    if (seen_space || seen_plus)
    265 	      bad = 1;
    266 	  [[fallthrough]];
    267 
    268 	  case 'd':
    269 	  case 'i':
    270 	    if (seen_size_t)
    271 	      this_argclass = size_t_arg;
    272 	    else if (lcount == 0)
    273 	      this_argclass = int_arg;
    274 	    else if (lcount == 1)
    275 	      this_argclass = long_arg;
    276 	    else
    277 	      this_argclass = long_long_arg;
    278 
    279 	    if (seen_big_l)
    280 	      bad = 1;
    281 	    break;
    282 
    283 	  case 'c':
    284 	    this_argclass = lcount == 0 ? int_arg : wide_char_arg;
    285 	    if (lcount > 1 || seen_h || seen_big_l)
    286 	      bad = 1;
    287 	    if (seen_prec || seen_zero || seen_space || seen_plus)
    288 	      bad = 1;
    289 	    break;
    290 
    291 	  case 'p':
    292 	    this_argclass = ptr_arg;
    293 	    if (lcount || seen_h || seen_big_l)
    294 	      bad = 1;
    295 	    if (seen_prec)
    296 	      bad = 1;
    297 	    if (seen_hash || seen_zero || seen_space || seen_plus)
    298 	      bad = 1;
    299 
    300 	    if (gdb_extensions)
    301 	      {
    302 		switch (f[1])
    303 		  {
    304 		  case 's':
    305 		  case 'F':
    306 		  case '[':
    307 		  case ']':
    308 		    f++;
    309 		    break;
    310 		  }
    311 	      }
    312 
    313 	    break;
    314 
    315 	  case 's':
    316 	    this_argclass = lcount == 0 ? string_arg : wide_string_arg;
    317 	    if (lcount > 1 || seen_h || seen_big_l)
    318 	      bad = 1;
    319 	    if (seen_zero || seen_space || seen_plus)
    320 	      bad = 1;
    321 	    break;
    322 
    323 	  case 'e':
    324 	  case 'f':
    325 	  case 'g':
    326 	  case 'E':
    327 	  case 'G':
    328 	    if (seen_double_big_d)
    329 	      this_argclass = dec128float_arg;
    330 	    else if (seen_big_d)
    331 	      this_argclass = dec64float_arg;
    332 	    else if (seen_big_h)
    333 	      this_argclass = dec32float_arg;
    334 	    else if (seen_big_l)
    335 	      this_argclass = long_double_arg;
    336 	    else
    337 	      this_argclass = double_arg;
    338 
    339 	    if (lcount || seen_h)
    340 	      bad = 1;
    341 	    break;
    342 
    343 	  case 'V':
    344 	    if (!value_extension)
    345 	      error (_("Unrecognized format specifier '%c' in printf"), *f);
    346 
    347 	    if (lcount > 1 || seen_h || seen_big_h || seen_big_h
    348 		|| seen_big_d || seen_double_big_d || seen_size_t
    349 		|| seen_prec || seen_zero || seen_space || seen_plus)
    350 	      bad = 1;
    351 
    352 	    this_argclass = value_arg;
    353 
    354 	    if (f[1] == '[')
    355 	      {
    356 		/* Move F forward to the next ']' character if such a
    357 		   character exists, otherwise leave F unchanged.  */
    358 		const char *tmp = strchr (f, ']');
    359 		if (tmp != nullptr)
    360 		  f = tmp;
    361 	      }
    362 	    break;
    363 
    364 	  case '*':
    365 	    error (_("`*' not supported for precision or width in printf"));
    366 
    367 	  case 'n':
    368 	    error (_("Format specifier `n' not supported in printf"));
    369 
    370 	  case '\0':
    371 	    error (_("Incomplete format specifier at end of format string"));
    372 
    373 	  default:
    374 	    error (_("Unrecognized format specifier '%c' in printf"), *f);
    375 	  }
    376 
    377 	if (bad)
    378 	  error (_("Inappropriate modifiers to "
    379 		   "format specifier '%c' in printf"),
    380 		 *f);
    381 
    382 	f++;
    383 
    384 	sub_start = current_substring;
    385 
    386 	if (lcount > 1 && !seen_i64 && USE_PRINTF_I64)
    387 	  {
    388 	    /* Windows' printf does support long long, but not the usual way.
    389 	       Convert %lld to %I64d.  */
    390 	    int length_before_ll = f - percent_loc - 1 - lcount;
    391 
    392 	    strncpy (current_substring, percent_loc, length_before_ll);
    393 	    strcpy (current_substring + length_before_ll, "I64");
    394 	    current_substring[length_before_ll + 3] =
    395 	      percent_loc[length_before_ll + lcount];
    396 	    current_substring += length_before_ll + 4;
    397 	  }
    398 	else if (this_argclass == wide_string_arg
    399 		 || this_argclass == wide_char_arg)
    400 	  {
    401 	    /* Convert %ls or %lc to %s.  */
    402 	    int length_before_ls = f - percent_loc - 2;
    403 
    404 	    strncpy (current_substring, percent_loc, length_before_ls);
    405 	    strcpy (current_substring + length_before_ls, "s");
    406 	    current_substring += length_before_ls + 2;
    407 	  }
    408 	else
    409 	  {
    410 	    strncpy (current_substring, percent_loc, f - percent_loc);
    411 	    current_substring += f - percent_loc;
    412 	  }
    413 
    414 	*current_substring++ = '\0';
    415 
    416 	prev_start = f;
    417 
    418 	m_pieces.emplace_back (sub_start, this_argclass, n_int_args);
    419       }
    420 
    421   /* Record the remainder of the string.  */
    422 
    423   if (f > prev_start)
    424     {
    425       sub_start = current_substring;
    426 
    427       strncpy (current_substring, prev_start, f - prev_start);
    428       current_substring += f - prev_start;
    429       *current_substring++ = '\0';
    430 
    431       m_pieces.emplace_back (sub_start, literal_piece, 0);
    432     }
    433 }
    434