1 1.5 christos /* $NetBSD: cook.c,v 1.6 2024/08/18 20:47:24 christos Exp $ */ 2 1.1 kardel 3 1.2 christos /** 4 1.2 christos * \file cook.c 5 1.1 kardel * 6 1.1 kardel * This file contains the routines that deal with processing quoted strings 7 1.1 kardel * into an internal format. 8 1.1 kardel * 9 1.2 christos * @addtogroup autoopts 10 1.2 christos * @{ 11 1.2 christos */ 12 1.2 christos /* 13 1.1 kardel * This file is part of AutoOpts, a companion to AutoGen. 14 1.1 kardel * AutoOpts is free software. 15 1.6 christos * AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved 16 1.1 kardel * 17 1.1 kardel * AutoOpts is available under any one of two licenses. The license 18 1.1 kardel * in use must be one of these two and the choice is under the control 19 1.1 kardel * of the user of the license. 20 1.1 kardel * 21 1.1 kardel * The GNU Lesser General Public License, version 3 or later 22 1.1 kardel * See the files "COPYING.lgplv3" and "COPYING.gplv3" 23 1.1 kardel * 24 1.1 kardel * The Modified Berkeley Software Distribution License 25 1.1 kardel * See the file "COPYING.mbsd" 26 1.1 kardel * 27 1.2 christos * These files have the following sha256 sums: 28 1.1 kardel * 29 1.2 christos * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3 30 1.2 christos * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3 31 1.2 christos * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd 32 1.1 kardel */ 33 1.1 kardel 34 1.1 kardel /*=export_func ao_string_cook_escape_char 35 1.1 kardel * private: 36 1.1 kardel * 37 1.1 kardel * what: escape-process a string fragment 38 1.3 christos * arg: + char const * + pzScan + points to character after the escape + 39 1.3 christos * arg: + char * + pRes + Where to put the result byte + 40 1.1 kardel * arg: + unsigned int + nl_ch + replacement char if scanned char is \n + 41 1.1 kardel * 42 1.1 kardel * ret-type: unsigned int 43 1.1 kardel * ret-desc: The number of bytes consumed processing the escaped character. 44 1.1 kardel * 45 1.1 kardel * doc: 46 1.1 kardel * 47 1.1 kardel * This function converts "t" into "\t" and all your other favorite 48 1.1 kardel * escapes, including numeric ones: hex and ocatal, too. 49 1.1 kardel * The returned result tells the caller how far to advance the 50 1.1 kardel * scan pointer (passed in). The default is to just pass through the 51 1.1 kardel * escaped character and advance the scan by one. 52 1.1 kardel * 53 1.1 kardel * Some applications need to keep an escaped newline, others need to 54 1.1 kardel * suppress it. This is accomplished by supplying a '\n' replacement 55 1.1 kardel * character that is different from \n, if need be. For example, use 56 1.1 kardel * 0x7F and never emit a 0x7F. 57 1.1 kardel * 58 1.1 kardel * err: @code{NULL} is returned if the string is mal-formed. 59 1.1 kardel =*/ 60 1.1 kardel unsigned int 61 1.2 christos ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl) 62 1.1 kardel { 63 1.2 christos unsigned int res = 1; 64 1.1 kardel 65 1.1 kardel switch (*pRes = *pzIn++) { 66 1.1 kardel case NUL: /* NUL - end of input string */ 67 1.1 kardel return 0; 68 1.1 kardel case '\r': 69 1.2 christos if (*pzIn != NL) 70 1.1 kardel return 1; 71 1.1 kardel res++; 72 1.1 kardel /* FALLTHROUGH */ 73 1.2 christos case NL: /* NL - emit newline */ 74 1.1 kardel *pRes = (char)nl; 75 1.1 kardel return res; 76 1.1 kardel 77 1.1 kardel case 'a': *pRes = '\a'; break; 78 1.1 kardel case 'b': *pRes = '\b'; break; 79 1.1 kardel case 'f': *pRes = '\f'; break; 80 1.2 christos case 'n': *pRes = NL; break; 81 1.1 kardel case 'r': *pRes = '\r'; break; 82 1.1 kardel case 't': *pRes = '\t'; break; 83 1.1 kardel case 'v': *pRes = '\v'; break; 84 1.1 kardel 85 1.1 kardel case 'x': 86 1.1 kardel case 'X': /* HEX Escape */ 87 1.1 kardel if (IS_HEX_DIGIT_CHAR(*pzIn)) { 88 1.2 christos char z[4]; 89 1.2 christos unsigned int ct = 0; 90 1.1 kardel 91 1.2 christos do { 92 1.2 christos z[ct] = pzIn[ct]; 93 1.2 christos if (++ct >= 2) 94 1.2 christos break; 95 1.2 christos } while (IS_HEX_DIGIT_CHAR(pzIn[ct])); 96 1.2 christos z[ct] = NUL; 97 1.2 christos *pRes = (char)strtoul(z, NULL, 16); 98 1.2 christos return ct + 1; 99 1.1 kardel } 100 1.1 kardel break; 101 1.1 kardel 102 1.1 kardel case '0': case '1': case '2': case '3': 103 1.1 kardel case '4': case '5': case '6': case '7': 104 1.1 kardel { 105 1.1 kardel /* 106 1.1 kardel * IF the character copied was an octal digit, 107 1.2 christos * THEN set the output character to an octal value. 108 1.2 christos * The 3 octal digit result might exceed 0xFF, so check it. 109 1.1 kardel */ 110 1.2 christos char z[4]; 111 1.1 kardel unsigned long val; 112 1.2 christos unsigned int ct = 0; 113 1.1 kardel 114 1.2 christos z[ct++] = *--pzIn; 115 1.2 christos while (IS_OCT_DIGIT_CHAR(pzIn[ct])) { 116 1.2 christos z[ct] = pzIn[ct]; 117 1.2 christos if (++ct >= 3) 118 1.2 christos break; 119 1.2 christos } 120 1.2 christos 121 1.2 christos z[ct] = NUL; 122 1.1 kardel val = strtoul(z, NULL, 8); 123 1.1 kardel if (val > 0xFF) 124 1.1 kardel val = 0xFF; 125 1.2 christos *pRes = (char)val; 126 1.2 christos return ct; 127 1.1 kardel } 128 1.1 kardel 129 1.2 christos default: /* quoted character is result character */; 130 1.1 kardel } 131 1.1 kardel 132 1.1 kardel return res; 133 1.1 kardel } 134 1.1 kardel 135 1.6 christos /** 136 1.6 christos * count newlines between start and end 137 1.6 christos */ 138 1.6 christos static char * 139 1.6 christos nl_count(char * start, char * end, int * lnct_p) 140 1.6 christos { 141 1.6 christos while (start < end) { 142 1.6 christos if (*(start++) == NL) 143 1.6 christos (*lnct_p)++; 144 1.6 christos } 145 1.6 christos return end; 146 1.6 christos } 147 1.1 kardel 148 1.1 kardel /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 149 1.1 kardel * 150 1.1 kardel * A quoted string has been found. 151 1.1 kardel * Find the end of it and compress any escape sequences. 152 1.1 kardel */ 153 1.2 christos static bool 154 1.2 christos contiguous_quote(char ** pps, char * pq, int * lnct_p) 155 1.2 christos { 156 1.2 christos char * ps = *pps + 1; 157 1.2 christos 158 1.2 christos for (;;) { 159 1.2 christos while (IS_WHITESPACE_CHAR(*ps)) 160 1.2 christos if (*(ps++) == NL) 161 1.2 christos (*lnct_p)++; 162 1.2 christos 163 1.2 christos /* 164 1.2 christos * IF the next character is a quote character, 165 1.2 christos * THEN we will concatenate the strings. 166 1.2 christos */ 167 1.2 christos switch (*ps) { 168 1.2 christos case '"': 169 1.2 christos case '\'': 170 1.2 christos *pq = *(ps++); /* assign new quote character and return */ 171 1.2 christos *pps = ps; 172 1.2 christos return true; 173 1.2 christos 174 1.2 christos case '/': 175 1.2 christos /* 176 1.2 christos * Allow for a comment embedded in the concatenated string. 177 1.2 christos */ 178 1.2 christos switch (ps[1]) { 179 1.2 christos default: 180 1.6 christos goto fail_return; 181 1.2 christos 182 1.2 christos case '/': 183 1.2 christos /* 184 1.2 christos * Skip to end of line 185 1.2 christos */ 186 1.2 christos ps = strchr(ps, NL); 187 1.6 christos if (ps == NULL) 188 1.6 christos goto fail_return; 189 1.2 christos break; 190 1.2 christos 191 1.2 christos case '*': 192 1.6 christos ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p); 193 1.6 christos if (ps == NULL) 194 1.6 christos goto fail_return; 195 1.6 christos ps += 2; 196 1.2 christos } 197 1.2 christos continue; 198 1.2 christos 199 1.2 christos default: 200 1.2 christos /* 201 1.2 christos * The next non-whitespace character is not a quote. 202 1.2 christos * The series of quoted strings has come to an end. 203 1.2 christos */ 204 1.2 christos *pps = ps; 205 1.2 christos return false; 206 1.2 christos } 207 1.2 christos } 208 1.6 christos 209 1.6 christos fail_return: 210 1.6 christos *pps = NULL; 211 1.6 christos return false; 212 1.2 christos } 213 1.2 christos 214 1.1 kardel /*=export_func ao_string_cook 215 1.1 kardel * private: 216 1.1 kardel * 217 1.1 kardel * what: concatenate and escape-process strings 218 1.3 christos * arg: + char * + pzScan + The *MODIFIABLE* input buffer + 219 1.3 christos * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count + 220 1.1 kardel * 221 1.3 christos * ret-type: char * 222 1.1 kardel * ret-desc: The address of the text following the processed strings. 223 1.1 kardel * The return value is NULL if the strings are ill-formed. 224 1.1 kardel * 225 1.1 kardel * doc: 226 1.1 kardel * 227 1.1 kardel * A series of one or more quoted strings are concatenated together. 228 1.1 kardel * If they are quoted with double quotes (@code{"}), then backslash 229 1.1 kardel * escapes are processed per the C programming language. If they are 230 1.1 kardel * single quote strings, then the backslashes are honored only when they 231 1.1 kardel * precede another backslash or a single quote character. 232 1.1 kardel * 233 1.1 kardel * err: @code{NULL} is returned if the string(s) is/are mal-formed. 234 1.1 kardel =*/ 235 1.2 christos char * 236 1.2 christos ao_string_cook(char * pzScan, int * lnct_p) 237 1.1 kardel { 238 1.1 kardel int l = 0; 239 1.1 kardel char q = *pzScan; 240 1.1 kardel 241 1.1 kardel /* 242 1.1 kardel * It is a quoted string. Process the escape sequence characters 243 1.1 kardel * (in the set "abfnrtv") and make sure we find a closing quote. 244 1.1 kardel */ 245 1.3 christos char * pzD = pzScan++; 246 1.3 christos char * pzS = pzScan; 247 1.1 kardel 248 1.2 christos if (lnct_p == NULL) 249 1.2 christos lnct_p = &l; 250 1.1 kardel 251 1.1 kardel for (;;) { 252 1.1 kardel /* 253 1.1 kardel * IF the next character is the quote character, THEN we may end the 254 1.1 kardel * string. We end it unless the next non-blank character *after* the 255 1.1 kardel * string happens to also be a quote. If it is, then we will change 256 1.1 kardel * our quote character to the new quote character and continue 257 1.1 kardel * condensing text. 258 1.1 kardel */ 259 1.1 kardel while (*pzS == q) { 260 1.1 kardel *pzD = NUL; /* This is probably the end of the line */ 261 1.2 christos if (! contiguous_quote(&pzS, &q, lnct_p)) 262 1.1 kardel return pzS; 263 1.1 kardel } 264 1.1 kardel 265 1.1 kardel /* 266 1.1 kardel * We are inside a quoted string. Copy text. 267 1.1 kardel */ 268 1.1 kardel switch (*(pzD++) = *(pzS++)) { 269 1.1 kardel case NUL: 270 1.1 kardel return NULL; 271 1.1 kardel 272 1.2 christos case NL: 273 1.2 christos (*lnct_p)++; 274 1.1 kardel break; 275 1.1 kardel 276 1.1 kardel case '\\': 277 1.1 kardel /* 278 1.1 kardel * IF we are escaping a new line, 279 1.1 kardel * THEN drop both the escape and the newline from 280 1.1 kardel * the result string. 281 1.1 kardel */ 282 1.2 christos if (*pzS == NL) { 283 1.1 kardel pzS++; 284 1.1 kardel pzD--; 285 1.2 christos (*lnct_p)++; 286 1.1 kardel } 287 1.1 kardel 288 1.1 kardel /* 289 1.1 kardel * ELSE IF the quote character is '"' or '`', 290 1.1 kardel * THEN we do the full escape character processing 291 1.1 kardel */ 292 1.1 kardel else if (q != '\'') { 293 1.2 christos unsigned int ct; 294 1.2 christos ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL); 295 1.1 kardel if (ct == 0) 296 1.1 kardel return NULL; 297 1.1 kardel 298 1.1 kardel pzS += ct; 299 1.1 kardel } /* if (q != '\'') */ 300 1.1 kardel 301 1.1 kardel /* 302 1.1 kardel * OTHERWISE, we only process "\\", "\'" and "\#" sequences. 303 1.1 kardel * The latter only to easily hide preprocessing directives. 304 1.1 kardel */ 305 1.1 kardel else switch (*pzS) { 306 1.1 kardel case '\\': 307 1.1 kardel case '\'': 308 1.1 kardel case '#': 309 1.1 kardel pzD[-1] = *pzS++; 310 1.1 kardel } 311 1.1 kardel } /* switch (*(pzD++) = *(pzS++)) */ 312 1.1 kardel } /* for (;;) */ 313 1.1 kardel } 314 1.2 christos 315 1.2 christos /** @} 316 1.2 christos * 317 1.1 kardel * Local Variables: 318 1.1 kardel * mode: C 319 1.1 kardel * c-file-style: "stroustrup" 320 1.1 kardel * indent-tabs-mode: nil 321 1.1 kardel * End: 322 1.1 kardel * end of autoopts/cook.c */ 323