Home | History | Annotate | Line # | Download | only in libopts
cook.c revision 1.1.1.5
      1 /**
      2  * \file cook.c
      3  *
      4  *  This file contains the routines that deal with processing quoted strings
      5  *  into an internal format.
      6  *
      7  * @addtogroup autoopts
      8  * @{
      9  */
     10 /*
     11  *  This file is part of AutoOpts, a companion to AutoGen.
     12  *  AutoOpts is free software.
     13  *  AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved
     14  *
     15  *  AutoOpts is available under any one of two licenses.  The license
     16  *  in use must be one of these two and the choice is under the control
     17  *  of the user of the license.
     18  *
     19  *   The GNU Lesser General Public License, version 3 or later
     20  *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
     21  *
     22  *   The Modified Berkeley Software Distribution License
     23  *      See the file "COPYING.mbsd"
     24  *
     25  *  These files have the following sha256 sums:
     26  *
     27  *  8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95  COPYING.gplv3
     28  *  4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b  COPYING.lgplv3
     29  *  13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239  COPYING.mbsd
     30  */
     31 
     32 /* = = = START-STATIC-FORWARD = = = */
     33 static bool
     34 contiguous_quote(char ** pps, char * pq, int * lnct_p);
     35 /* = = = END-STATIC-FORWARD = = = */
     36 
     37 /*=export_func  ao_string_cook_escape_char
     38  * private:
     39  *
     40  * what:  escape-process a string fragment
     41  * arg:   + char const * + pzScan  + points to character after the escape +
     42  * arg:   + char *       + pRes    + Where to put the result byte +
     43  * arg:   + unsigned int + nl_ch   + replacement char if scanned char is \n +
     44  *
     45  * ret-type: unsigned int
     46  * ret-desc: The number of bytes consumed processing the escaped character.
     47  *
     48  * doc:
     49  *
     50  *  This function converts "t" into "\t" and all your other favorite
     51  *  escapes, including numeric ones:  hex and ocatal, too.
     52  *  The returned result tells the caller how far to advance the
     53  *  scan pointer (passed in).  The default is to just pass through the
     54  *  escaped character and advance the scan by one.
     55  *
     56  *  Some applications need to keep an escaped newline, others need to
     57  *  suppress it.  This is accomplished by supplying a '\n' replacement
     58  *  character that is different from \n, if need be.  For example, use
     59  *  0x7F and never emit a 0x7F.
     60  *
     61  * err:  @code{NULL} is returned if the string is mal-formed.
     62 =*/
     63 unsigned int
     64 ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
     65 {
     66     unsigned int res = 1;
     67 
     68     switch (*pRes = *pzIn++) {
     69     case NUL:         /* NUL - end of input string */
     70         return 0;
     71     case '\r':
     72         if (*pzIn != NL)
     73             return 1;
     74         res++;
     75         /* FALLTHROUGH */
     76     case NL:        /* NL  - emit newline        */
     77         *pRes = (char)nl;
     78         return res;
     79 
     80     case 'a': *pRes = '\a'; break;
     81     case 'b': *pRes = '\b'; break;
     82     case 'f': *pRes = '\f'; break;
     83     case 'n': *pRes = NL;   break;
     84     case 'r': *pRes = '\r'; break;
     85     case 't': *pRes = '\t'; break;
     86     case 'v': *pRes = '\v'; break;
     87 
     88     case 'x':
     89     case 'X':         /* HEX Escape       */
     90         if (IS_HEX_DIGIT_CHAR(*pzIn))  {
     91             char z[4];
     92             unsigned int ct = 0;
     93 
     94             do  {
     95                 z[ct] = pzIn[ct];
     96                 if (++ct >= 2)
     97                     break;
     98             } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
     99             z[ct] = NUL;
    100             *pRes = (char)strtoul(z, NULL, 16);
    101             return ct + 1;
    102         }
    103         break;
    104 
    105     case '0': case '1': case '2': case '3':
    106     case '4': case '5': case '6': case '7':
    107     {
    108         /*
    109          *  IF the character copied was an octal digit,
    110          *  THEN set the output character to an octal value.
    111          *  The 3 octal digit result might exceed 0xFF, so check it.
    112          */
    113         char z[4];
    114         unsigned long val;
    115         unsigned int  ct = 0;
    116 
    117         z[ct++] = *--pzIn;
    118         while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
    119             z[ct] = pzIn[ct];
    120             if (++ct >= 3)
    121                 break;
    122         }
    123 
    124         z[ct] = NUL;
    125         val = strtoul(z, NULL, 8);
    126         if (val > 0xFF)
    127             val = 0xFF;
    128         *pRes = (char)val;
    129         return ct;
    130     }
    131 
    132     default: /* quoted character is result character */;
    133     }
    134 
    135     return res;
    136 }
    137 
    138 
    139 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
    140  *
    141  *  A quoted string has been found.
    142  *  Find the end of it and compress any escape sequences.
    143  */
    144 static bool
    145 contiguous_quote(char ** pps, char * pq, int * lnct_p)
    146 {
    147     char * ps = *pps + 1;
    148 
    149     for (;;) {
    150         while (IS_WHITESPACE_CHAR(*ps))
    151             if (*(ps++) == NL)
    152                 (*lnct_p)++;
    153 
    154         /*
    155          *  IF the next character is a quote character,
    156          *  THEN we will concatenate the strings.
    157          */
    158         switch (*ps) {
    159         case '"':
    160         case '\'':
    161             *pq  = *(ps++);  /* assign new quote character and return */
    162             *pps = ps;
    163             return true;
    164 
    165         case '/':
    166             /*
    167              *  Allow for a comment embedded in the concatenated string.
    168              */
    169             switch (ps[1]) {
    170             default:
    171                 *pps = NULL;
    172                 return false;
    173 
    174             case '/':
    175                 /*
    176                  *  Skip to end of line
    177                  */
    178                 ps = strchr(ps, NL);
    179                 if (ps == NULL) {
    180                     *pps = NULL;
    181                     return false;
    182                 }
    183                 break;
    184 
    185             case '*':
    186             {
    187                 char * p = strstr( ps+2, "*/" );
    188                 /*
    189                  *  Skip to terminating star slash
    190                  */
    191                 if (p == NULL) {
    192                     *pps = NULL;
    193                     return false;
    194                 }
    195 
    196                 while (ps < p) {
    197                     if (*(ps++) == NL)
    198                         (*lnct_p)++;
    199                 }
    200 
    201                 ps = p + 2;
    202             }
    203             }
    204             continue;
    205 
    206         default:
    207             /*
    208              *  The next non-whitespace character is not a quote.
    209              *  The series of quoted strings has come to an end.
    210              */
    211             *pps = ps;
    212             return false;
    213         }
    214     }
    215 }
    216 
    217 /*=export_func  ao_string_cook
    218  * private:
    219  *
    220  * what:  concatenate and escape-process strings
    221  * arg:   + char * + pzScan  + The *MODIFIABLE* input buffer +
    222  * arg:   + int *  + lnct_p  + The (possibly NULL) pointer to a line count +
    223  *
    224  * ret-type: char *
    225  * ret-desc: The address of the text following the processed strings.
    226  *           The return value is NULL if the strings are ill-formed.
    227  *
    228  * doc:
    229  *
    230  *  A series of one or more quoted strings are concatenated together.
    231  *  If they are quoted with double quotes (@code{"}), then backslash
    232  *  escapes are processed per the C programming language.  If they are
    233  *  single quote strings, then the backslashes are honored only when they
    234  *  precede another backslash or a single quote character.
    235  *
    236  * err:  @code{NULL} is returned if the string(s) is/are mal-formed.
    237 =*/
    238 char *
    239 ao_string_cook(char * pzScan, int * lnct_p)
    240 {
    241     int   l = 0;
    242     char  q = *pzScan;
    243 
    244     /*
    245      *  It is a quoted string.  Process the escape sequence characters
    246      *  (in the set "abfnrtv") and make sure we find a closing quote.
    247      */
    248     char * pzD = pzScan++;
    249     char * pzS = pzScan;
    250 
    251     if (lnct_p == NULL)
    252         lnct_p = &l;
    253 
    254     for (;;) {
    255         /*
    256          *  IF the next character is the quote character, THEN we may end the
    257          *  string.  We end it unless the next non-blank character *after* the
    258          *  string happens to also be a quote.  If it is, then we will change
    259          *  our quote character to the new quote character and continue
    260          *  condensing text.
    261          */
    262         while (*pzS == q) {
    263             *pzD = NUL; /* This is probably the end of the line */
    264             if (! contiguous_quote(&pzS, &q, lnct_p))
    265                 return pzS;
    266         }
    267 
    268         /*
    269          *  We are inside a quoted string.  Copy text.
    270          */
    271         switch (*(pzD++) = *(pzS++)) {
    272         case NUL:
    273             return NULL;
    274 
    275         case NL:
    276             (*lnct_p)++;
    277             break;
    278 
    279         case '\\':
    280             /*
    281              *  IF we are escaping a new line,
    282              *  THEN drop both the escape and the newline from
    283              *       the result string.
    284              */
    285             if (*pzS == NL) {
    286                 pzS++;
    287                 pzD--;
    288                 (*lnct_p)++;
    289             }
    290 
    291             /*
    292              *  ELSE IF the quote character is '"' or '`',
    293              *  THEN we do the full escape character processing
    294              */
    295             else if (q != '\'') {
    296                 unsigned int ct;
    297                 ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
    298                 if (ct == 0)
    299                     return NULL;
    300 
    301                 pzS += ct;
    302             }     /* if (q != '\'')                  */
    303 
    304             /*
    305              *  OTHERWISE, we only process "\\", "\'" and "\#" sequences.
    306              *  The latter only to easily hide preprocessing directives.
    307              */
    308             else switch (*pzS) {
    309             case '\\':
    310             case '\'':
    311             case '#':
    312                 pzD[-1] = *pzS++;
    313             }
    314         }     /* switch (*(pzD++) = *(pzS++))    */
    315     }         /* for (;;)                        */
    316 }
    317 
    318 /** @}
    319  *
    320  * Local Variables:
    321  * mode: C
    322  * c-file-style: "stroustrup"
    323  * indent-tabs-mode: nil
    324  * End:
    325  * end of autoopts/cook.c */
    326