Home | History | Annotate | Line # | Download | only in libopts
cook.c revision 1.1.1.11
      1  1.1.1.10  christos /*	$NetBSD: cook.c,v 1.1.1.11 2024/08/18 20:37:43 christos Exp $	*/
      2  1.1.1.10  christos 
      3   1.1.1.2    kardel /**
      4   1.1.1.2    kardel  * \file cook.c
      5   1.1.1.2    kardel  *
      6       1.1    kardel  *  This file contains the routines that deal with processing quoted strings
      7       1.1    kardel  *  into an internal format.
      8       1.1    kardel  *
      9   1.1.1.3  christos  * @addtogroup autoopts
     10   1.1.1.3  christos  * @{
     11   1.1.1.3  christos  */
     12   1.1.1.3  christos /*
     13       1.1    kardel  *  This file is part of AutoOpts, a companion to AutoGen.
     14       1.1    kardel  *  AutoOpts is free software.
     15  1.1.1.11  christos  *  AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
     16       1.1    kardel  *
     17       1.1    kardel  *  AutoOpts is available under any one of two licenses.  The license
     18       1.1    kardel  *  in use must be one of these two and the choice is under the control
     19       1.1    kardel  *  of the user of the license.
     20       1.1    kardel  *
     21       1.1    kardel  *   The GNU Lesser General Public License, version 3 or later
     22       1.1    kardel  *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
     23       1.1    kardel  *
     24       1.1    kardel  *   The Modified Berkeley Software Distribution License
     25       1.1    kardel  *      See the file "COPYING.mbsd"
     26       1.1    kardel  *
     27   1.1.1.3  christos  *  These files have the following sha256 sums:
     28       1.1    kardel  *
     29   1.1.1.3  christos  *  8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95  COPYING.gplv3
     30   1.1.1.3  christos  *  4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b  COPYING.lgplv3
     31   1.1.1.3  christos  *  13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239  COPYING.mbsd
     32       1.1    kardel  */
     33       1.1    kardel 
     34       1.1    kardel /*=export_func  ao_string_cook_escape_char
     35       1.1    kardel  * private:
     36       1.1    kardel  *
     37       1.1    kardel  * what:  escape-process a string fragment
     38   1.1.1.5  christos  * arg:   + char const * + pzScan  + points to character after the escape +
     39   1.1.1.5  christos  * arg:   + char *       + pRes    + Where to put the result byte +
     40       1.1    kardel  * arg:   + unsigned int + nl_ch   + replacement char if scanned char is \n +
     41       1.1    kardel  *
     42       1.1    kardel  * ret-type: unsigned int
     43       1.1    kardel  * ret-desc: The number of bytes consumed processing the escaped character.
     44       1.1    kardel  *
     45       1.1    kardel  * doc:
     46       1.1    kardel  *
     47       1.1    kardel  *  This function converts "t" into "\t" and all your other favorite
     48       1.1    kardel  *  escapes, including numeric ones:  hex and ocatal, too.
     49       1.1    kardel  *  The returned result tells the caller how far to advance the
     50       1.1    kardel  *  scan pointer (passed in).  The default is to just pass through the
     51       1.1    kardel  *  escaped character and advance the scan by one.
     52       1.1    kardel  *
     53       1.1    kardel  *  Some applications need to keep an escaped newline, others need to
     54       1.1    kardel  *  suppress it.  This is accomplished by supplying a '\n' replacement
     55       1.1    kardel  *  character that is different from \n, if need be.  For example, use
     56       1.1    kardel  *  0x7F and never emit a 0x7F.
     57       1.1    kardel  *
     58       1.1    kardel  * err:  @code{NULL} is returned if the string is mal-formed.
     59       1.1    kardel =*/
     60       1.1    kardel unsigned int
     61   1.1.1.3  christos ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
     62       1.1    kardel {
     63   1.1.1.3  christos     unsigned int res = 1;
     64       1.1    kardel 
     65       1.1    kardel     switch (*pRes = *pzIn++) {
     66       1.1    kardel     case NUL:         /* NUL - end of input string */
     67       1.1    kardel         return 0;
     68       1.1    kardel     case '\r':
     69   1.1.1.3  christos         if (*pzIn != NL)
     70       1.1    kardel             return 1;
     71       1.1    kardel         res++;
     72       1.1    kardel         /* FALLTHROUGH */
     73   1.1.1.3  christos     case NL:        /* NL  - emit newline        */
     74       1.1    kardel         *pRes = (char)nl;
     75       1.1    kardel         return res;
     76       1.1    kardel 
     77       1.1    kardel     case 'a': *pRes = '\a'; break;
     78       1.1    kardel     case 'b': *pRes = '\b'; break;
     79       1.1    kardel     case 'f': *pRes = '\f'; break;
     80   1.1.1.3  christos     case 'n': *pRes = NL;   break;
     81       1.1    kardel     case 'r': *pRes = '\r'; break;
     82       1.1    kardel     case 't': *pRes = '\t'; break;
     83       1.1    kardel     case 'v': *pRes = '\v'; break;
     84       1.1    kardel 
     85       1.1    kardel     case 'x':
     86       1.1    kardel     case 'X':         /* HEX Escape       */
     87       1.1    kardel         if (IS_HEX_DIGIT_CHAR(*pzIn))  {
     88   1.1.1.3  christos             char z[4];
     89   1.1.1.3  christos             unsigned int ct = 0;
     90       1.1    kardel 
     91   1.1.1.3  christos             do  {
     92   1.1.1.3  christos                 z[ct] = pzIn[ct];
     93   1.1.1.3  christos                 if (++ct >= 2)
     94   1.1.1.3  christos                     break;
     95   1.1.1.3  christos             } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
     96   1.1.1.3  christos             z[ct] = NUL;
     97   1.1.1.3  christos             *pRes = (char)strtoul(z, NULL, 16);
     98   1.1.1.3  christos             return ct + 1;
     99       1.1    kardel         }
    100       1.1    kardel         break;
    101       1.1    kardel 
    102       1.1    kardel     case '0': case '1': case '2': case '3':
    103       1.1    kardel     case '4': case '5': case '6': case '7':
    104       1.1    kardel     {
    105       1.1    kardel         /*
    106       1.1    kardel          *  IF the character copied was an octal digit,
    107   1.1.1.3  christos          *  THEN set the output character to an octal value.
    108   1.1.1.3  christos          *  The 3 octal digit result might exceed 0xFF, so check it.
    109       1.1    kardel          */
    110   1.1.1.3  christos         char z[4];
    111       1.1    kardel         unsigned long val;
    112   1.1.1.3  christos         unsigned int  ct = 0;
    113   1.1.1.3  christos 
    114   1.1.1.3  christos         z[ct++] = *--pzIn;
    115   1.1.1.3  christos         while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
    116   1.1.1.3  christos             z[ct] = pzIn[ct];
    117   1.1.1.3  christos             if (++ct >= 3)
    118   1.1.1.3  christos                 break;
    119   1.1.1.3  christos         }
    120       1.1    kardel 
    121   1.1.1.3  christos         z[ct] = NUL;
    122       1.1    kardel         val = strtoul(z, NULL, 8);
    123       1.1    kardel         if (val > 0xFF)
    124       1.1    kardel             val = 0xFF;
    125   1.1.1.3  christos         *pRes = (char)val;
    126   1.1.1.3  christos         return ct;
    127       1.1    kardel     }
    128       1.1    kardel 
    129   1.1.1.3  christos     default: /* quoted character is result character */;
    130       1.1    kardel     }
    131       1.1    kardel 
    132       1.1    kardel     return res;
    133       1.1    kardel }
    134       1.1    kardel 
    135  1.1.1.11  christos /**
    136  1.1.1.11  christos  * count newlines between start and end
    137  1.1.1.11  christos  */
    138  1.1.1.11  christos static char *
    139  1.1.1.11  christos nl_count(char * start, char * end, int * lnct_p)
    140  1.1.1.11  christos {
    141  1.1.1.11  christos     while (start < end) {
    142  1.1.1.11  christos         if (*(start++) == NL)
    143  1.1.1.11  christos             (*lnct_p)++;
    144  1.1.1.11  christos     }
    145  1.1.1.11  christos     return end;
    146  1.1.1.11  christos }
    147       1.1    kardel 
    148       1.1    kardel /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
    149       1.1    kardel  *
    150       1.1    kardel  *  A quoted string has been found.
    151       1.1    kardel  *  Find the end of it and compress any escape sequences.
    152       1.1    kardel  */
    153   1.1.1.3  christos static bool
    154   1.1.1.2    kardel contiguous_quote(char ** pps, char * pq, int * lnct_p)
    155   1.1.1.2    kardel {
    156   1.1.1.2    kardel     char * ps = *pps + 1;
    157   1.1.1.2    kardel 
    158   1.1.1.2    kardel     for (;;) {
    159   1.1.1.2    kardel         while (IS_WHITESPACE_CHAR(*ps))
    160   1.1.1.3  christos             if (*(ps++) == NL)
    161   1.1.1.2    kardel                 (*lnct_p)++;
    162   1.1.1.2    kardel 
    163   1.1.1.2    kardel         /*
    164   1.1.1.2    kardel          *  IF the next character is a quote character,
    165   1.1.1.2    kardel          *  THEN we will concatenate the strings.
    166   1.1.1.2    kardel          */
    167   1.1.1.2    kardel         switch (*ps) {
    168   1.1.1.2    kardel         case '"':
    169   1.1.1.2    kardel         case '\'':
    170   1.1.1.2    kardel             *pq  = *(ps++);  /* assign new quote character and return */
    171   1.1.1.2    kardel             *pps = ps;
    172   1.1.1.3  christos             return true;
    173   1.1.1.2    kardel 
    174   1.1.1.2    kardel         case '/':
    175   1.1.1.2    kardel             /*
    176   1.1.1.2    kardel              *  Allow for a comment embedded in the concatenated string.
    177   1.1.1.2    kardel              */
    178   1.1.1.2    kardel             switch (ps[1]) {
    179   1.1.1.2    kardel             default:
    180  1.1.1.11  christos                 goto fail_return;
    181   1.1.1.2    kardel 
    182   1.1.1.2    kardel             case '/':
    183   1.1.1.2    kardel                 /*
    184   1.1.1.2    kardel                  *  Skip to end of line
    185   1.1.1.2    kardel                  */
    186   1.1.1.3  christos                 ps = strchr(ps, NL);
    187  1.1.1.11  christos                 if (ps == NULL)
    188  1.1.1.11  christos                     goto fail_return;
    189   1.1.1.2    kardel                 break;
    190   1.1.1.2    kardel 
    191   1.1.1.2    kardel             case '*':
    192  1.1.1.11  christos                 ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p);
    193  1.1.1.11  christos                 if (ps == NULL)
    194  1.1.1.11  christos                     goto fail_return;
    195  1.1.1.11  christos                 ps += 2;
    196   1.1.1.2    kardel             }
    197   1.1.1.2    kardel             continue;
    198   1.1.1.2    kardel 
    199   1.1.1.2    kardel         default:
    200   1.1.1.2    kardel             /*
    201   1.1.1.2    kardel              *  The next non-whitespace character is not a quote.
    202   1.1.1.2    kardel              *  The series of quoted strings has come to an end.
    203   1.1.1.2    kardel              */
    204   1.1.1.2    kardel             *pps = ps;
    205   1.1.1.3  christos             return false;
    206   1.1.1.2    kardel         }
    207   1.1.1.2    kardel     }
    208  1.1.1.11  christos 
    209  1.1.1.11  christos  fail_return:
    210  1.1.1.11  christos     *pps = NULL;
    211  1.1.1.11  christos     return false;
    212   1.1.1.2    kardel }
    213   1.1.1.2    kardel 
    214       1.1    kardel /*=export_func  ao_string_cook
    215       1.1    kardel  * private:
    216       1.1    kardel  *
    217       1.1    kardel  * what:  concatenate and escape-process strings
    218   1.1.1.5  christos  * arg:   + char * + pzScan  + The *MODIFIABLE* input buffer +
    219   1.1.1.5  christos  * arg:   + int *  + lnct_p  + The (possibly NULL) pointer to a line count +
    220       1.1    kardel  *
    221   1.1.1.5  christos  * ret-type: char *
    222       1.1    kardel  * ret-desc: The address of the text following the processed strings.
    223       1.1    kardel  *           The return value is NULL if the strings are ill-formed.
    224       1.1    kardel  *
    225       1.1    kardel  * doc:
    226       1.1    kardel  *
    227       1.1    kardel  *  A series of one or more quoted strings are concatenated together.
    228       1.1    kardel  *  If they are quoted with double quotes (@code{"}), then backslash
    229       1.1    kardel  *  escapes are processed per the C programming language.  If they are
    230       1.1    kardel  *  single quote strings, then the backslashes are honored only when they
    231       1.1    kardel  *  precede another backslash or a single quote character.
    232       1.1    kardel  *
    233       1.1    kardel  * err:  @code{NULL} is returned if the string(s) is/are mal-formed.
    234       1.1    kardel =*/
    235   1.1.1.2    kardel char *
    236   1.1.1.2    kardel ao_string_cook(char * pzScan, int * lnct_p)
    237       1.1    kardel {
    238       1.1    kardel     int   l = 0;
    239       1.1    kardel     char  q = *pzScan;
    240       1.1    kardel 
    241       1.1    kardel     /*
    242       1.1    kardel      *  It is a quoted string.  Process the escape sequence characters
    243       1.1    kardel      *  (in the set "abfnrtv") and make sure we find a closing quote.
    244       1.1    kardel      */
    245   1.1.1.5  christos     char * pzD = pzScan++;
    246   1.1.1.5  christos     char * pzS = pzScan;
    247       1.1    kardel 
    248   1.1.1.2    kardel     if (lnct_p == NULL)
    249   1.1.1.2    kardel         lnct_p = &l;
    250       1.1    kardel 
    251       1.1    kardel     for (;;) {
    252       1.1    kardel         /*
    253       1.1    kardel          *  IF the next character is the quote character, THEN we may end the
    254       1.1    kardel          *  string.  We end it unless the next non-blank character *after* the
    255       1.1    kardel          *  string happens to also be a quote.  If it is, then we will change
    256       1.1    kardel          *  our quote character to the new quote character and continue
    257       1.1    kardel          *  condensing text.
    258       1.1    kardel          */
    259       1.1    kardel         while (*pzS == q) {
    260       1.1    kardel             *pzD = NUL; /* This is probably the end of the line */
    261   1.1.1.2    kardel             if (! contiguous_quote(&pzS, &q, lnct_p))
    262       1.1    kardel                 return pzS;
    263       1.1    kardel         }
    264       1.1    kardel 
    265       1.1    kardel         /*
    266       1.1    kardel          *  We are inside a quoted string.  Copy text.
    267       1.1    kardel          */
    268       1.1    kardel         switch (*(pzD++) = *(pzS++)) {
    269       1.1    kardel         case NUL:
    270       1.1    kardel             return NULL;
    271       1.1    kardel 
    272   1.1.1.3  christos         case NL:
    273   1.1.1.2    kardel             (*lnct_p)++;
    274       1.1    kardel             break;
    275       1.1    kardel 
    276       1.1    kardel         case '\\':
    277       1.1    kardel             /*
    278       1.1    kardel              *  IF we are escaping a new line,
    279       1.1    kardel              *  THEN drop both the escape and the newline from
    280       1.1    kardel              *       the result string.
    281       1.1    kardel              */
    282   1.1.1.3  christos             if (*pzS == NL) {
    283       1.1    kardel                 pzS++;
    284       1.1    kardel                 pzD--;
    285   1.1.1.2    kardel                 (*lnct_p)++;
    286       1.1    kardel             }
    287       1.1    kardel 
    288       1.1    kardel             /*
    289       1.1    kardel              *  ELSE IF the quote character is '"' or '`',
    290       1.1    kardel              *  THEN we do the full escape character processing
    291       1.1    kardel              */
    292       1.1    kardel             else if (q != '\'') {
    293   1.1.1.3  christos                 unsigned int ct;
    294   1.1.1.3  christos                 ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
    295       1.1    kardel                 if (ct == 0)
    296       1.1    kardel                     return NULL;
    297       1.1    kardel 
    298       1.1    kardel                 pzS += ct;
    299       1.1    kardel             }     /* if (q != '\'')                  */
    300       1.1    kardel 
    301       1.1    kardel             /*
    302       1.1    kardel              *  OTHERWISE, we only process "\\", "\'" and "\#" sequences.
    303       1.1    kardel              *  The latter only to easily hide preprocessing directives.
    304       1.1    kardel              */
    305       1.1    kardel             else switch (*pzS) {
    306       1.1    kardel             case '\\':
    307       1.1    kardel             case '\'':
    308       1.1    kardel             case '#':
    309       1.1    kardel                 pzD[-1] = *pzS++;
    310       1.1    kardel             }
    311       1.1    kardel         }     /* switch (*(pzD++) = *(pzS++))    */
    312       1.1    kardel     }         /* for (;;)                        */
    313       1.1    kardel }
    314   1.1.1.3  christos 
    315   1.1.1.3  christos /** @}
    316   1.1.1.3  christos  *
    317       1.1    kardel  * Local Variables:
    318       1.1    kardel  * mode: C
    319       1.1    kardel  * c-file-style: "stroustrup"
    320       1.1    kardel  * indent-tabs-mode: nil
    321       1.1    kardel  * End:
    322       1.1    kardel  * end of autoopts/cook.c */
    323