Home | History | Annotate | Line # | Download | only in libopts
cook.c revision 1.5
      1 /*	$NetBSD: cook.c,v 1.5 2020/05/25 20:47:34 christos Exp $	*/
      2 
      3 /**
      4  * \file cook.c
      5  *
      6  *  This file contains the routines that deal with processing quoted strings
      7  *  into an internal format.
      8  *
      9  * @addtogroup autoopts
     10  * @{
     11  */
     12 /*
     13  *  This file is part of AutoOpts, a companion to AutoGen.
     14  *  AutoOpts is free software.
     15  *  AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved
     16  *
     17  *  AutoOpts is available under any one of two licenses.  The license
     18  *  in use must be one of these two and the choice is under the control
     19  *  of the user of the license.
     20  *
     21  *   The GNU Lesser General Public License, version 3 or later
     22  *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
     23  *
     24  *   The Modified Berkeley Software Distribution License
     25  *      See the file "COPYING.mbsd"
     26  *
     27  *  These files have the following sha256 sums:
     28  *
     29  *  8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95  COPYING.gplv3
     30  *  4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b  COPYING.lgplv3
     31  *  13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239  COPYING.mbsd
     32  */
     33 
     34 /* = = = START-STATIC-FORWARD = = = */
     35 static bool
     36 contiguous_quote(char ** pps, char * pq, int * lnct_p);
     37 /* = = = END-STATIC-FORWARD = = = */
     38 
     39 /*=export_func  ao_string_cook_escape_char
     40  * private:
     41  *
     42  * what:  escape-process a string fragment
     43  * arg:   + char const * + pzScan  + points to character after the escape +
     44  * arg:   + char *       + pRes    + Where to put the result byte +
     45  * arg:   + unsigned int + nl_ch   + replacement char if scanned char is \n +
     46  *
     47  * ret-type: unsigned int
     48  * ret-desc: The number of bytes consumed processing the escaped character.
     49  *
     50  * doc:
     51  *
     52  *  This function converts "t" into "\t" and all your other favorite
     53  *  escapes, including numeric ones:  hex and ocatal, too.
     54  *  The returned result tells the caller how far to advance the
     55  *  scan pointer (passed in).  The default is to just pass through the
     56  *  escaped character and advance the scan by one.
     57  *
     58  *  Some applications need to keep an escaped newline, others need to
     59  *  suppress it.  This is accomplished by supplying a '\n' replacement
     60  *  character that is different from \n, if need be.  For example, use
     61  *  0x7F and never emit a 0x7F.
     62  *
     63  * err:  @code{NULL} is returned if the string is mal-formed.
     64 =*/
     65 unsigned int
     66 ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
     67 {
     68     unsigned int res = 1;
     69 
     70     switch (*pRes = *pzIn++) {
     71     case NUL:         /* NUL - end of input string */
     72         return 0;
     73     case '\r':
     74         if (*pzIn != NL)
     75             return 1;
     76         res++;
     77         /* FALLTHROUGH */
     78     case NL:        /* NL  - emit newline        */
     79         *pRes = (char)nl;
     80         return res;
     81 
     82     case 'a': *pRes = '\a'; break;
     83     case 'b': *pRes = '\b'; break;
     84     case 'f': *pRes = '\f'; break;
     85     case 'n': *pRes = NL;   break;
     86     case 'r': *pRes = '\r'; break;
     87     case 't': *pRes = '\t'; break;
     88     case 'v': *pRes = '\v'; break;
     89 
     90     case 'x':
     91     case 'X':         /* HEX Escape       */
     92         if (IS_HEX_DIGIT_CHAR(*pzIn))  {
     93             char z[4];
     94             unsigned int ct = 0;
     95 
     96             do  {
     97                 z[ct] = pzIn[ct];
     98                 if (++ct >= 2)
     99                     break;
    100             } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
    101             z[ct] = NUL;
    102             *pRes = (char)strtoul(z, NULL, 16);
    103             return ct + 1;
    104         }
    105         break;
    106 
    107     case '0': case '1': case '2': case '3':
    108     case '4': case '5': case '6': case '7':
    109     {
    110         /*
    111          *  IF the character copied was an octal digit,
    112          *  THEN set the output character to an octal value.
    113          *  The 3 octal digit result might exceed 0xFF, so check it.
    114          */
    115         char z[4];
    116         unsigned long val;
    117         unsigned int  ct = 0;
    118 
    119         z[ct++] = *--pzIn;
    120         while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
    121             z[ct] = pzIn[ct];
    122             if (++ct >= 3)
    123                 break;
    124         }
    125 
    126         z[ct] = NUL;
    127         val = strtoul(z, NULL, 8);
    128         if (val > 0xFF)
    129             val = 0xFF;
    130         *pRes = (char)val;
    131         return ct;
    132     }
    133 
    134     default: /* quoted character is result character */;
    135     }
    136 
    137     return res;
    138 }
    139 
    140 
    141 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
    142  *
    143  *  A quoted string has been found.
    144  *  Find the end of it and compress any escape sequences.
    145  */
    146 static bool
    147 contiguous_quote(char ** pps, char * pq, int * lnct_p)
    148 {
    149     char * ps = *pps + 1;
    150 
    151     for (;;) {
    152         while (IS_WHITESPACE_CHAR(*ps))
    153             if (*(ps++) == NL)
    154                 (*lnct_p)++;
    155 
    156         /*
    157          *  IF the next character is a quote character,
    158          *  THEN we will concatenate the strings.
    159          */
    160         switch (*ps) {
    161         case '"':
    162         case '\'':
    163             *pq  = *(ps++);  /* assign new quote character and return */
    164             *pps = ps;
    165             return true;
    166 
    167         case '/':
    168             /*
    169              *  Allow for a comment embedded in the concatenated string.
    170              */
    171             switch (ps[1]) {
    172             default:
    173                 *pps = NULL;
    174                 return false;
    175 
    176             case '/':
    177                 /*
    178                  *  Skip to end of line
    179                  */
    180                 ps = strchr(ps, NL);
    181                 if (ps == NULL) {
    182                     *pps = NULL;
    183                     return false;
    184                 }
    185                 break;
    186 
    187             case '*':
    188             {
    189                 char * p = strstr( ps+2, "*/" );
    190                 /*
    191                  *  Skip to terminating star slash
    192                  */
    193                 if (p == NULL) {
    194                     *pps = NULL;
    195                     return false;
    196                 }
    197 
    198                 while (ps < p) {
    199                     if (*(ps++) == NL)
    200                         (*lnct_p)++;
    201                 }
    202 
    203                 ps = p + 2;
    204             }
    205             }
    206             continue;
    207 
    208         default:
    209             /*
    210              *  The next non-whitespace character is not a quote.
    211              *  The series of quoted strings has come to an end.
    212              */
    213             *pps = ps;
    214             return false;
    215         }
    216     }
    217 }
    218 
    219 /*=export_func  ao_string_cook
    220  * private:
    221  *
    222  * what:  concatenate and escape-process strings
    223  * arg:   + char * + pzScan  + The *MODIFIABLE* input buffer +
    224  * arg:   + int *  + lnct_p  + The (possibly NULL) pointer to a line count +
    225  *
    226  * ret-type: char *
    227  * ret-desc: The address of the text following the processed strings.
    228  *           The return value is NULL if the strings are ill-formed.
    229  *
    230  * doc:
    231  *
    232  *  A series of one or more quoted strings are concatenated together.
    233  *  If they are quoted with double quotes (@code{"}), then backslash
    234  *  escapes are processed per the C programming language.  If they are
    235  *  single quote strings, then the backslashes are honored only when they
    236  *  precede another backslash or a single quote character.
    237  *
    238  * err:  @code{NULL} is returned if the string(s) is/are mal-formed.
    239 =*/
    240 char *
    241 ao_string_cook(char * pzScan, int * lnct_p)
    242 {
    243     int   l = 0;
    244     char  q = *pzScan;
    245 
    246     /*
    247      *  It is a quoted string.  Process the escape sequence characters
    248      *  (in the set "abfnrtv") and make sure we find a closing quote.
    249      */
    250     char * pzD = pzScan++;
    251     char * pzS = pzScan;
    252 
    253     if (lnct_p == NULL)
    254         lnct_p = &l;
    255 
    256     for (;;) {
    257         /*
    258          *  IF the next character is the quote character, THEN we may end the
    259          *  string.  We end it unless the next non-blank character *after* the
    260          *  string happens to also be a quote.  If it is, then we will change
    261          *  our quote character to the new quote character and continue
    262          *  condensing text.
    263          */
    264         while (*pzS == q) {
    265             *pzD = NUL; /* This is probably the end of the line */
    266             if (! contiguous_quote(&pzS, &q, lnct_p))
    267                 return pzS;
    268         }
    269 
    270         /*
    271          *  We are inside a quoted string.  Copy text.
    272          */
    273         switch (*(pzD++) = *(pzS++)) {
    274         case NUL:
    275             return NULL;
    276 
    277         case NL:
    278             (*lnct_p)++;
    279             break;
    280 
    281         case '\\':
    282             /*
    283              *  IF we are escaping a new line,
    284              *  THEN drop both the escape and the newline from
    285              *       the result string.
    286              */
    287             if (*pzS == NL) {
    288                 pzS++;
    289                 pzD--;
    290                 (*lnct_p)++;
    291             }
    292 
    293             /*
    294              *  ELSE IF the quote character is '"' or '`',
    295              *  THEN we do the full escape character processing
    296              */
    297             else if (q != '\'') {
    298                 unsigned int ct;
    299                 ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
    300                 if (ct == 0)
    301                     return NULL;
    302 
    303                 pzS += ct;
    304             }     /* if (q != '\'')                  */
    305 
    306             /*
    307              *  OTHERWISE, we only process "\\", "\'" and "\#" sequences.
    308              *  The latter only to easily hide preprocessing directives.
    309              */
    310             else switch (*pzS) {
    311             case '\\':
    312             case '\'':
    313             case '#':
    314                 pzD[-1] = *pzS++;
    315             }
    316         }     /* switch (*(pzD++) = *(pzS++))    */
    317     }         /* for (;;)                        */
    318 }
    319 
    320 /** @}
    321  *
    322  * Local Variables:
    323  * mode: C
    324  * c-file-style: "stroustrup"
    325  * indent-tabs-mode: nil
    326  * End:
    327  * end of autoopts/cook.c */
    328