Home | History | Annotate | Line # | Download | only in libopts
cook.c revision 1.1.1.11
      1 /*	$NetBSD: cook.c,v 1.1.1.11 2024/08/18 20:37:43 christos Exp $	*/
      2 
      3 /**
      4  * \file cook.c
      5  *
      6  *  This file contains the routines that deal with processing quoted strings
      7  *  into an internal format.
      8  *
      9  * @addtogroup autoopts
     10  * @{
     11  */
     12 /*
     13  *  This file is part of AutoOpts, a companion to AutoGen.
     14  *  AutoOpts is free software.
     15  *  AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
     16  *
     17  *  AutoOpts is available under any one of two licenses.  The license
     18  *  in use must be one of these two and the choice is under the control
     19  *  of the user of the license.
     20  *
     21  *   The GNU Lesser General Public License, version 3 or later
     22  *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
     23  *
     24  *   The Modified Berkeley Software Distribution License
     25  *      See the file "COPYING.mbsd"
     26  *
     27  *  These files have the following sha256 sums:
     28  *
     29  *  8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95  COPYING.gplv3
     30  *  4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b  COPYING.lgplv3
     31  *  13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239  COPYING.mbsd
     32  */
     33 
     34 /*=export_func  ao_string_cook_escape_char
     35  * private:
     36  *
     37  * what:  escape-process a string fragment
     38  * arg:   + char const * + pzScan  + points to character after the escape +
     39  * arg:   + char *       + pRes    + Where to put the result byte +
     40  * arg:   + unsigned int + nl_ch   + replacement char if scanned char is \n +
     41  *
     42  * ret-type: unsigned int
     43  * ret-desc: The number of bytes consumed processing the escaped character.
     44  *
     45  * doc:
     46  *
     47  *  This function converts "t" into "\t" and all your other favorite
     48  *  escapes, including numeric ones:  hex and ocatal, too.
     49  *  The returned result tells the caller how far to advance the
     50  *  scan pointer (passed in).  The default is to just pass through the
     51  *  escaped character and advance the scan by one.
     52  *
     53  *  Some applications need to keep an escaped newline, others need to
     54  *  suppress it.  This is accomplished by supplying a '\n' replacement
     55  *  character that is different from \n, if need be.  For example, use
     56  *  0x7F and never emit a 0x7F.
     57  *
     58  * err:  @code{NULL} is returned if the string is mal-formed.
     59 =*/
     60 unsigned int
     61 ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
     62 {
     63     unsigned int res = 1;
     64 
     65     switch (*pRes = *pzIn++) {
     66     case NUL:         /* NUL - end of input string */
     67         return 0;
     68     case '\r':
     69         if (*pzIn != NL)
     70             return 1;
     71         res++;
     72         /* FALLTHROUGH */
     73     case NL:        /* NL  - emit newline        */
     74         *pRes = (char)nl;
     75         return res;
     76 
     77     case 'a': *pRes = '\a'; break;
     78     case 'b': *pRes = '\b'; break;
     79     case 'f': *pRes = '\f'; break;
     80     case 'n': *pRes = NL;   break;
     81     case 'r': *pRes = '\r'; break;
     82     case 't': *pRes = '\t'; break;
     83     case 'v': *pRes = '\v'; break;
     84 
     85     case 'x':
     86     case 'X':         /* HEX Escape       */
     87         if (IS_HEX_DIGIT_CHAR(*pzIn))  {
     88             char z[4];
     89             unsigned int ct = 0;
     90 
     91             do  {
     92                 z[ct] = pzIn[ct];
     93                 if (++ct >= 2)
     94                     break;
     95             } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
     96             z[ct] = NUL;
     97             *pRes = (char)strtoul(z, NULL, 16);
     98             return ct + 1;
     99         }
    100         break;
    101 
    102     case '0': case '1': case '2': case '3':
    103     case '4': case '5': case '6': case '7':
    104     {
    105         /*
    106          *  IF the character copied was an octal digit,
    107          *  THEN set the output character to an octal value.
    108          *  The 3 octal digit result might exceed 0xFF, so check it.
    109          */
    110         char z[4];
    111         unsigned long val;
    112         unsigned int  ct = 0;
    113 
    114         z[ct++] = *--pzIn;
    115         while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
    116             z[ct] = pzIn[ct];
    117             if (++ct >= 3)
    118                 break;
    119         }
    120 
    121         z[ct] = NUL;
    122         val = strtoul(z, NULL, 8);
    123         if (val > 0xFF)
    124             val = 0xFF;
    125         *pRes = (char)val;
    126         return ct;
    127     }
    128 
    129     default: /* quoted character is result character */;
    130     }
    131 
    132     return res;
    133 }
    134 
    135 /**
    136  * count newlines between start and end
    137  */
    138 static char *
    139 nl_count(char * start, char * end, int * lnct_p)
    140 {
    141     while (start < end) {
    142         if (*(start++) == NL)
    143             (*lnct_p)++;
    144     }
    145     return end;
    146 }
    147 
    148 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
    149  *
    150  *  A quoted string has been found.
    151  *  Find the end of it and compress any escape sequences.
    152  */
    153 static bool
    154 contiguous_quote(char ** pps, char * pq, int * lnct_p)
    155 {
    156     char * ps = *pps + 1;
    157 
    158     for (;;) {
    159         while (IS_WHITESPACE_CHAR(*ps))
    160             if (*(ps++) == NL)
    161                 (*lnct_p)++;
    162 
    163         /*
    164          *  IF the next character is a quote character,
    165          *  THEN we will concatenate the strings.
    166          */
    167         switch (*ps) {
    168         case '"':
    169         case '\'':
    170             *pq  = *(ps++);  /* assign new quote character and return */
    171             *pps = ps;
    172             return true;
    173 
    174         case '/':
    175             /*
    176              *  Allow for a comment embedded in the concatenated string.
    177              */
    178             switch (ps[1]) {
    179             default:
    180                 goto fail_return;
    181 
    182             case '/':
    183                 /*
    184                  *  Skip to end of line
    185                  */
    186                 ps = strchr(ps, NL);
    187                 if (ps == NULL)
    188                     goto fail_return;
    189                 break;
    190 
    191             case '*':
    192                 ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p);
    193                 if (ps == NULL)
    194                     goto fail_return;
    195                 ps += 2;
    196             }
    197             continue;
    198 
    199         default:
    200             /*
    201              *  The next non-whitespace character is not a quote.
    202              *  The series of quoted strings has come to an end.
    203              */
    204             *pps = ps;
    205             return false;
    206         }
    207     }
    208 
    209  fail_return:
    210     *pps = NULL;
    211     return false;
    212 }
    213 
    214 /*=export_func  ao_string_cook
    215  * private:
    216  *
    217  * what:  concatenate and escape-process strings
    218  * arg:   + char * + pzScan  + The *MODIFIABLE* input buffer +
    219  * arg:   + int *  + lnct_p  + The (possibly NULL) pointer to a line count +
    220  *
    221  * ret-type: char *
    222  * ret-desc: The address of the text following the processed strings.
    223  *           The return value is NULL if the strings are ill-formed.
    224  *
    225  * doc:
    226  *
    227  *  A series of one or more quoted strings are concatenated together.
    228  *  If they are quoted with double quotes (@code{"}), then backslash
    229  *  escapes are processed per the C programming language.  If they are
    230  *  single quote strings, then the backslashes are honored only when they
    231  *  precede another backslash or a single quote character.
    232  *
    233  * err:  @code{NULL} is returned if the string(s) is/are mal-formed.
    234 =*/
    235 char *
    236 ao_string_cook(char * pzScan, int * lnct_p)
    237 {
    238     int   l = 0;
    239     char  q = *pzScan;
    240 
    241     /*
    242      *  It is a quoted string.  Process the escape sequence characters
    243      *  (in the set "abfnrtv") and make sure we find a closing quote.
    244      */
    245     char * pzD = pzScan++;
    246     char * pzS = pzScan;
    247 
    248     if (lnct_p == NULL)
    249         lnct_p = &l;
    250 
    251     for (;;) {
    252         /*
    253          *  IF the next character is the quote character, THEN we may end the
    254          *  string.  We end it unless the next non-blank character *after* the
    255          *  string happens to also be a quote.  If it is, then we will change
    256          *  our quote character to the new quote character and continue
    257          *  condensing text.
    258          */
    259         while (*pzS == q) {
    260             *pzD = NUL; /* This is probably the end of the line */
    261             if (! contiguous_quote(&pzS, &q, lnct_p))
    262                 return pzS;
    263         }
    264 
    265         /*
    266          *  We are inside a quoted string.  Copy text.
    267          */
    268         switch (*(pzD++) = *(pzS++)) {
    269         case NUL:
    270             return NULL;
    271 
    272         case NL:
    273             (*lnct_p)++;
    274             break;
    275 
    276         case '\\':
    277             /*
    278              *  IF we are escaping a new line,
    279              *  THEN drop both the escape and the newline from
    280              *       the result string.
    281              */
    282             if (*pzS == NL) {
    283                 pzS++;
    284                 pzD--;
    285                 (*lnct_p)++;
    286             }
    287 
    288             /*
    289              *  ELSE IF the quote character is '"' or '`',
    290              *  THEN we do the full escape character processing
    291              */
    292             else if (q != '\'') {
    293                 unsigned int ct;
    294                 ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
    295                 if (ct == 0)
    296                     return NULL;
    297 
    298                 pzS += ct;
    299             }     /* if (q != '\'')                  */
    300 
    301             /*
    302              *  OTHERWISE, we only process "\\", "\'" and "\#" sequences.
    303              *  The latter only to easily hide preprocessing directives.
    304              */
    305             else switch (*pzS) {
    306             case '\\':
    307             case '\'':
    308             case '#':
    309                 pzD[-1] = *pzS++;
    310             }
    311         }     /* switch (*(pzD++) = *(pzS++))    */
    312     }         /* for (;;)                        */
    313 }
    314 
    315 /** @}
    316  *
    317  * Local Variables:
    318  * mode: C
    319  * c-file-style: "stroustrup"
    320  * indent-tabs-mode: nil
    321  * End:
    322  * end of autoopts/cook.c */
    323