Home | History | Annotate | Line # | Download | only in libiberty
rust-demangle.c revision 1.1.1.6
      1 /* Demangler for the Rust programming language
      2    Copyright (C) 2016-2020 Free Software Foundation, Inc.
      3    Written by David Tolnay (dtolnay (at) gmail.com).
      4 
      5 This file is part of the libiberty library.
      6 Libiberty is free software; you can redistribute it and/or
      7 modify it under the terms of the GNU Library General Public
      8 License as published by the Free Software Foundation; either
      9 version 2 of the License, or (at your option) any later version.
     10 
     11 In addition to the permissions in the GNU Library General Public
     12 License, the Free Software Foundation gives you unlimited permission
     13 to link the compiled version of this file into combinations with other
     14 programs, and to distribute those combinations without any restriction
     15 coming from the use of this file.  (The Library Public License
     16 restrictions do apply in other respects; for example, they cover
     17 modification of the file, and distribution when not linked into a
     18 combined executable.)
     19 
     20 Libiberty is distributed in the hope that it will be useful,
     21 but WITHOUT ANY WARRANTY; without even the implied warranty of
     22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23 Library General Public License for more details.
     24 
     25 You should have received a copy of the GNU Library General Public
     26 License along with libiberty; see the file COPYING.LIB.
     27 If not, see <http://www.gnu.org/licenses/>.  */
     28 
     29 
     30 #ifdef HAVE_CONFIG_H
     31 #include "config.h"
     32 #endif
     33 
     34 #include "safe-ctype.h"
     35 
     36 #include <inttypes.h>
     37 #include <sys/types.h>
     38 #include <string.h>
     39 #include <stdio.h>
     40 #include <stdlib.h>
     41 
     42 #ifdef HAVE_STRING_H
     43 #include <string.h>
     44 #else
     45 extern size_t strlen(const char *s);
     46 extern int strncmp(const char *s1, const char *s2, size_t n);
     47 extern void *memset(void *s, int c, size_t n);
     48 #endif
     49 
     50 #include <demangle.h>
     51 #include "libiberty.h"
     52 
     53 struct rust_demangler
     54 {
     55   const char *sym;
     56   size_t sym_len;
     57 
     58   void *callback_opaque;
     59   demangle_callbackref callback;
     60 
     61   /* Position of the next character to read from the symbol. */
     62   size_t next;
     63 
     64   /* Non-zero if any error occurred. */
     65   int errored;
     66 
     67   /* Non-zero if printing should be verbose (e.g. include hashes). */
     68   int verbose;
     69 
     70   /* Rust mangling version, with legacy mangling being -1. */
     71   int version;
     72 };
     73 
     74 /* Parsing functions. */
     75 
     76 static char
     77 peek (const struct rust_demangler *rdm)
     78 {
     79   if (rdm->next < rdm->sym_len)
     80     return rdm->sym[rdm->next];
     81   return 0;
     82 }
     83 
     84 static char
     85 next (struct rust_demangler *rdm)
     86 {
     87   char c = peek (rdm);
     88   if (!c)
     89     rdm->errored = 1;
     90   else
     91     rdm->next++;
     92   return c;
     93 }
     94 
     95 struct rust_mangled_ident
     96 {
     97   /* ASCII part of the identifier. */
     98   const char *ascii;
     99   size_t ascii_len;
    100 };
    101 
    102 static struct rust_mangled_ident
    103 parse_ident (struct rust_demangler *rdm)
    104 {
    105   char c;
    106   size_t start, len;
    107   struct rust_mangled_ident ident;
    108 
    109   ident.ascii = NULL;
    110   ident.ascii_len = 0;
    111 
    112   c = next (rdm);
    113   if (!ISDIGIT (c))
    114     {
    115       rdm->errored = 1;
    116       return ident;
    117     }
    118   len = c - '0';
    119 
    120   if (c != '0')
    121     while (ISDIGIT (peek (rdm)))
    122       len = len * 10 + (next (rdm) - '0');
    123 
    124   start = rdm->next;
    125   rdm->next += len;
    126   /* Check for overflows. */
    127   if ((start > rdm->next) || (rdm->next > rdm->sym_len))
    128     {
    129       rdm->errored = 1;
    130       return ident;
    131     }
    132 
    133   ident.ascii = rdm->sym + start;
    134   ident.ascii_len = len;
    135 
    136   if (ident.ascii_len == 0)
    137     ident.ascii = NULL;
    138 
    139   return ident;
    140 }
    141 
    142 /* Printing functions. */
    143 
    144 static void
    145 print_str (struct rust_demangler *rdm, const char *data, size_t len)
    146 {
    147   if (!rdm->errored)
    148     rdm->callback (data, len, rdm->callback_opaque);
    149 }
    150 
    151 #define PRINT(s) print_str (rdm, s, strlen (s))
    152 
    153 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
    154 static int
    155 decode_lower_hex_nibble (char nibble)
    156 {
    157   if ('0' <= nibble && nibble <= '9')
    158     return nibble - '0';
    159   if ('a' <= nibble && nibble <= 'f')
    160     return 0xa + (nibble - 'a');
    161   return -1;
    162 }
    163 
    164 /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
    165 static char
    166 decode_legacy_escape (const char *e, size_t len, size_t *out_len)
    167 {
    168   char c = 0;
    169   size_t escape_len = 0;
    170   int lo_nibble = -1, hi_nibble = -1;
    171 
    172   if (len < 3 || e[0] != '$')
    173     return 0;
    174 
    175   e++;
    176   len--;
    177 
    178   if (e[0] == 'C')
    179     {
    180       escape_len = 1;
    181 
    182       c = ',';
    183     }
    184   else if (len > 2)
    185     {
    186       escape_len = 2;
    187 
    188       if (e[0] == 'S' && e[1] == 'P')
    189         c = '@';
    190       else if (e[0] == 'B' && e[1] == 'P')
    191         c = '*';
    192       else if (e[0] == 'R' && e[1] == 'F')
    193         c = '&';
    194       else if (e[0] == 'L' && e[1] == 'T')
    195         c = '<';
    196       else if (e[0] == 'G' && e[1] == 'T')
    197         c = '>';
    198       else if (e[0] == 'L' && e[1] == 'P')
    199         c = '(';
    200       else if (e[0] == 'R' && e[1] == 'P')
    201         c = ')';
    202       else if (e[0] == 'u' && len > 3)
    203         {
    204           escape_len = 3;
    205 
    206           hi_nibble = decode_lower_hex_nibble (e[1]);
    207           if (hi_nibble < 0)
    208             return 0;
    209           lo_nibble = decode_lower_hex_nibble (e[2]);
    210           if (lo_nibble < 0)
    211             return 0;
    212 
    213           /* Only allow non-control ASCII characters. */
    214           if (hi_nibble > 7)
    215             return 0;
    216           c = (hi_nibble << 4) | lo_nibble;
    217           if (c < 0x20)
    218             return 0;
    219         }
    220     }
    221 
    222   if (!c || len <= escape_len || e[escape_len] != '$')
    223     return 0;
    224 
    225   *out_len = 2 + escape_len;
    226   return c;
    227 }
    228 
    229 static void
    230 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
    231 {
    232   char unescaped;
    233   size_t len;
    234 
    235   if (rdm->errored)
    236     return;
    237 
    238   if (rdm->version == -1)
    239     {
    240       /* Ignore leading underscores preceding escape sequences.
    241          The mangler inserts an underscore to make sure the
    242          identifier begins with a XID_Start character. */
    243       if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
    244           && ident.ascii[1] == '$')
    245         {
    246           ident.ascii++;
    247           ident.ascii_len--;
    248         }
    249 
    250       while (ident.ascii_len > 0)
    251         {
    252           /* Handle legacy escape sequences ("$...$", ".." or "."). */
    253           if (ident.ascii[0] == '$')
    254             {
    255               unescaped
    256                   = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
    257               if (unescaped)
    258                 print_str (rdm, &unescaped, 1);
    259               else
    260                 {
    261                   /* Unexpected escape sequence, print the rest verbatim. */
    262                   print_str (rdm, ident.ascii, ident.ascii_len);
    263                   return;
    264                 }
    265             }
    266           else if (ident.ascii[0] == '.')
    267             {
    268               if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
    269                 {
    270                   /* ".." becomes "::" */
    271                   PRINT ("::");
    272                   len = 2;
    273                 }
    274               else
    275                 {
    276                   /* "." becomes "-" */
    277                   PRINT ("-");
    278                   len = 1;
    279                 }
    280             }
    281           else
    282             {
    283               /* Print everything before the next escape sequence, at once. */
    284               for (len = 0; len < ident.ascii_len; len++)
    285                 if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
    286                   break;
    287 
    288               print_str (rdm, ident.ascii, len);
    289             }
    290 
    291           ident.ascii += len;
    292           ident.ascii_len -= len;
    293         }
    294 
    295       return;
    296     }
    297 }
    298 
    299 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
    300    The hex digits must contain at least 5 distinct digits. */
    301 static int
    302 is_legacy_prefixed_hash (struct rust_mangled_ident ident)
    303 {
    304   uint16_t seen;
    305   int nibble;
    306   size_t i, count;
    307 
    308   if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
    309     return 0;
    310 
    311   seen = 0;
    312   for (i = 0; i < 16; i++)
    313     {
    314       nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
    315       if (nibble < 0)
    316         return 0;
    317       seen |= (uint16_t)1 << nibble;
    318     }
    319 
    320   /* Count how many distinct digits were seen. */
    321   count = 0;
    322   while (seen)
    323     {
    324       if (seen & 1)
    325         count++;
    326       seen >>= 1;
    327     }
    328 
    329   return count >= 5;
    330 }
    331 
    332 int
    333 rust_demangle_callback (const char *mangled, int options,
    334                         demangle_callbackref callback, void *opaque)
    335 {
    336   const char *p;
    337   struct rust_demangler rdm;
    338   struct rust_mangled_ident ident;
    339 
    340   rdm.sym = mangled;
    341   rdm.sym_len = 0;
    342 
    343   rdm.callback_opaque = opaque;
    344   rdm.callback = callback;
    345 
    346   rdm.next = 0;
    347   rdm.errored = 0;
    348   rdm.verbose = (options & DMGL_VERBOSE) != 0;
    349   rdm.version = 0;
    350 
    351   /* Rust symbols always start with _ZN (legacy). */
    352   if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
    353     {
    354       rdm.sym += 3;
    355       rdm.version = -1;
    356     }
    357   else
    358     return 0;
    359 
    360   /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */
    361   for (p = rdm.sym; *p; p++)
    362     {
    363       rdm.sym_len++;
    364 
    365       if (*p == '_' || ISALNUM (*p))
    366         continue;
    367 
    368       if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
    369         continue;
    370 
    371       return 0;
    372     }
    373 
    374   /* Legacy Rust symbols need to be handled separately. */
    375   if (rdm.version == -1)
    376     {
    377       /* Legacy Rust symbols always end with E. */
    378       if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
    379         return 0;
    380       rdm.sym_len--;
    381 
    382       /* Legacy Rust symbols also always end with a path segment
    383          that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
    384          This early check, before any parse_ident calls, should
    385          quickly filter out most C++ symbols unrelated to Rust. */
    386       if (!(rdm.sym_len > 19
    387             && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
    388         return 0;
    389 
    390       do
    391         {
    392           ident = parse_ident (&rdm);
    393           if (rdm.errored || !ident.ascii)
    394             return 0;
    395         }
    396       while (rdm.next < rdm.sym_len);
    397 
    398       /* The last path segment should be the hash. */
    399       if (!is_legacy_prefixed_hash (ident))
    400         return 0;
    401 
    402       /* Reset the state for a second pass, to print the symbol. */
    403       rdm.next = 0;
    404       if (!rdm.verbose && rdm.sym_len > 19)
    405         {
    406           /* Hide the last segment, containing the hash, if not verbose. */
    407           rdm.sym_len -= 19;
    408         }
    409 
    410       do
    411         {
    412           if (rdm.next > 0)
    413             print_str (&rdm, "::", 2);
    414 
    415           ident = parse_ident (&rdm);
    416           print_ident (&rdm, ident);
    417         }
    418       while (rdm.next < rdm.sym_len);
    419     }
    420   else
    421     return 0;
    422 
    423   return !rdm.errored;
    424 }
    425 
    426 /* Growable string buffers. */
    427 struct str_buf
    428 {
    429   char *ptr;
    430   size_t len;
    431   size_t cap;
    432   int errored;
    433 };
    434 
    435 static void
    436 str_buf_reserve (struct str_buf *buf, size_t extra)
    437 {
    438   size_t available, min_new_cap, new_cap;
    439   char *new_ptr;
    440 
    441   /* Allocation failed before. */
    442   if (buf->errored)
    443     return;
    444 
    445   available = buf->cap - buf->len;
    446 
    447   if (extra <= available)
    448     return;
    449 
    450   min_new_cap = buf->cap + (extra - available);
    451 
    452   /* Check for overflows. */
    453   if (min_new_cap < buf->cap)
    454     {
    455       buf->errored = 1;
    456       return;
    457     }
    458 
    459   new_cap = buf->cap;
    460 
    461   if (new_cap == 0)
    462     new_cap = 4;
    463 
    464   /* Double capacity until sufficiently large. */
    465   while (new_cap < min_new_cap)
    466     {
    467       new_cap *= 2;
    468 
    469       /* Check for overflows. */
    470       if (new_cap < buf->cap)
    471         {
    472           buf->errored = 1;
    473           return;
    474         }
    475     }
    476 
    477   new_ptr = (char *)realloc (buf->ptr, new_cap);
    478   if (new_ptr == NULL)
    479     {
    480       free (buf->ptr);
    481       buf->ptr = NULL;
    482       buf->len = 0;
    483       buf->cap = 0;
    484       buf->errored = 1;
    485     }
    486   else
    487     {
    488       buf->ptr = new_ptr;
    489       buf->cap = new_cap;
    490     }
    491 }
    492 
    493 static void
    494 str_buf_append (struct str_buf *buf, const char *data, size_t len)
    495 {
    496   str_buf_reserve (buf, len);
    497   if (buf->errored)
    498     return;
    499 
    500   memcpy (buf->ptr + buf->len, data, len);
    501   buf->len += len;
    502 }
    503 
    504 static void
    505 str_buf_demangle_callback (const char *data, size_t len, void *opaque)
    506 {
    507   str_buf_append ((struct str_buf *)opaque, data, len);
    508 }
    509 
    510 char *
    511 rust_demangle (const char *mangled, int options)
    512 {
    513   struct str_buf out;
    514   int success;
    515 
    516   out.ptr = NULL;
    517   out.len = 0;
    518   out.cap = 0;
    519   out.errored = 0;
    520 
    521   success = rust_demangle_callback (mangled, options,
    522                                     str_buf_demangle_callback, &out);
    523 
    524   if (!success)
    525     {
    526       free (out.ptr);
    527       return NULL;
    528     }
    529 
    530   str_buf_append (&out, "\0", 1);
    531   return out.ptr;
    532 }
    533