cook.c revision 1.1.1.11 1 /* $NetBSD: cook.c,v 1.1.1.11 2024/08/18 20:37:43 christos Exp $ */
2
3 /**
4 * \file cook.c
5 *
6 * This file contains the routines that deal with processing quoted strings
7 * into an internal format.
8 *
9 * @addtogroup autoopts
10 * @{
11 */
12 /*
13 * This file is part of AutoOpts, a companion to AutoGen.
14 * AutoOpts is free software.
15 * AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
16 *
17 * AutoOpts is available under any one of two licenses. The license
18 * in use must be one of these two and the choice is under the control
19 * of the user of the license.
20 *
21 * The GNU Lesser General Public License, version 3 or later
22 * See the files "COPYING.lgplv3" and "COPYING.gplv3"
23 *
24 * The Modified Berkeley Software Distribution License
25 * See the file "COPYING.mbsd"
26 *
27 * These files have the following sha256 sums:
28 *
29 * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3
30 * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3
31 * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd
32 */
33
34 /*=export_func ao_string_cook_escape_char
35 * private:
36 *
37 * what: escape-process a string fragment
38 * arg: + char const * + pzScan + points to character after the escape +
39 * arg: + char * + pRes + Where to put the result byte +
40 * arg: + unsigned int + nl_ch + replacement char if scanned char is \n +
41 *
42 * ret-type: unsigned int
43 * ret-desc: The number of bytes consumed processing the escaped character.
44 *
45 * doc:
46 *
47 * This function converts "t" into "\t" and all your other favorite
48 * escapes, including numeric ones: hex and ocatal, too.
49 * The returned result tells the caller how far to advance the
50 * scan pointer (passed in). The default is to just pass through the
51 * escaped character and advance the scan by one.
52 *
53 * Some applications need to keep an escaped newline, others need to
54 * suppress it. This is accomplished by supplying a '\n' replacement
55 * character that is different from \n, if need be. For example, use
56 * 0x7F and never emit a 0x7F.
57 *
58 * err: @code{NULL} is returned if the string is mal-formed.
59 =*/
60 unsigned int
61 ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
62 {
63 unsigned int res = 1;
64
65 switch (*pRes = *pzIn++) {
66 case NUL: /* NUL - end of input string */
67 return 0;
68 case '\r':
69 if (*pzIn != NL)
70 return 1;
71 res++;
72 /* FALLTHROUGH */
73 case NL: /* NL - emit newline */
74 *pRes = (char)nl;
75 return res;
76
77 case 'a': *pRes = '\a'; break;
78 case 'b': *pRes = '\b'; break;
79 case 'f': *pRes = '\f'; break;
80 case 'n': *pRes = NL; break;
81 case 'r': *pRes = '\r'; break;
82 case 't': *pRes = '\t'; break;
83 case 'v': *pRes = '\v'; break;
84
85 case 'x':
86 case 'X': /* HEX Escape */
87 if (IS_HEX_DIGIT_CHAR(*pzIn)) {
88 char z[4];
89 unsigned int ct = 0;
90
91 do {
92 z[ct] = pzIn[ct];
93 if (++ct >= 2)
94 break;
95 } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
96 z[ct] = NUL;
97 *pRes = (char)strtoul(z, NULL, 16);
98 return ct + 1;
99 }
100 break;
101
102 case '0': case '1': case '2': case '3':
103 case '4': case '5': case '6': case '7':
104 {
105 /*
106 * IF the character copied was an octal digit,
107 * THEN set the output character to an octal value.
108 * The 3 octal digit result might exceed 0xFF, so check it.
109 */
110 char z[4];
111 unsigned long val;
112 unsigned int ct = 0;
113
114 z[ct++] = *--pzIn;
115 while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
116 z[ct] = pzIn[ct];
117 if (++ct >= 3)
118 break;
119 }
120
121 z[ct] = NUL;
122 val = strtoul(z, NULL, 8);
123 if (val > 0xFF)
124 val = 0xFF;
125 *pRes = (char)val;
126 return ct;
127 }
128
129 default: /* quoted character is result character */;
130 }
131
132 return res;
133 }
134
135 /**
136 * count newlines between start and end
137 */
138 static char *
139 nl_count(char * start, char * end, int * lnct_p)
140 {
141 while (start < end) {
142 if (*(start++) == NL)
143 (*lnct_p)++;
144 }
145 return end;
146 }
147
148 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
149 *
150 * A quoted string has been found.
151 * Find the end of it and compress any escape sequences.
152 */
153 static bool
154 contiguous_quote(char ** pps, char * pq, int * lnct_p)
155 {
156 char * ps = *pps + 1;
157
158 for (;;) {
159 while (IS_WHITESPACE_CHAR(*ps))
160 if (*(ps++) == NL)
161 (*lnct_p)++;
162
163 /*
164 * IF the next character is a quote character,
165 * THEN we will concatenate the strings.
166 */
167 switch (*ps) {
168 case '"':
169 case '\'':
170 *pq = *(ps++); /* assign new quote character and return */
171 *pps = ps;
172 return true;
173
174 case '/':
175 /*
176 * Allow for a comment embedded in the concatenated string.
177 */
178 switch (ps[1]) {
179 default:
180 goto fail_return;
181
182 case '/':
183 /*
184 * Skip to end of line
185 */
186 ps = strchr(ps, NL);
187 if (ps == NULL)
188 goto fail_return;
189 break;
190
191 case '*':
192 ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p);
193 if (ps == NULL)
194 goto fail_return;
195 ps += 2;
196 }
197 continue;
198
199 default:
200 /*
201 * The next non-whitespace character is not a quote.
202 * The series of quoted strings has come to an end.
203 */
204 *pps = ps;
205 return false;
206 }
207 }
208
209 fail_return:
210 *pps = NULL;
211 return false;
212 }
213
214 /*=export_func ao_string_cook
215 * private:
216 *
217 * what: concatenate and escape-process strings
218 * arg: + char * + pzScan + The *MODIFIABLE* input buffer +
219 * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count +
220 *
221 * ret-type: char *
222 * ret-desc: The address of the text following the processed strings.
223 * The return value is NULL if the strings are ill-formed.
224 *
225 * doc:
226 *
227 * A series of one or more quoted strings are concatenated together.
228 * If they are quoted with double quotes (@code{"}), then backslash
229 * escapes are processed per the C programming language. If they are
230 * single quote strings, then the backslashes are honored only when they
231 * precede another backslash or a single quote character.
232 *
233 * err: @code{NULL} is returned if the string(s) is/are mal-formed.
234 =*/
235 char *
236 ao_string_cook(char * pzScan, int * lnct_p)
237 {
238 int l = 0;
239 char q = *pzScan;
240
241 /*
242 * It is a quoted string. Process the escape sequence characters
243 * (in the set "abfnrtv") and make sure we find a closing quote.
244 */
245 char * pzD = pzScan++;
246 char * pzS = pzScan;
247
248 if (lnct_p == NULL)
249 lnct_p = &l;
250
251 for (;;) {
252 /*
253 * IF the next character is the quote character, THEN we may end the
254 * string. We end it unless the next non-blank character *after* the
255 * string happens to also be a quote. If it is, then we will change
256 * our quote character to the new quote character and continue
257 * condensing text.
258 */
259 while (*pzS == q) {
260 *pzD = NUL; /* This is probably the end of the line */
261 if (! contiguous_quote(&pzS, &q, lnct_p))
262 return pzS;
263 }
264
265 /*
266 * We are inside a quoted string. Copy text.
267 */
268 switch (*(pzD++) = *(pzS++)) {
269 case NUL:
270 return NULL;
271
272 case NL:
273 (*lnct_p)++;
274 break;
275
276 case '\\':
277 /*
278 * IF we are escaping a new line,
279 * THEN drop both the escape and the newline from
280 * the result string.
281 */
282 if (*pzS == NL) {
283 pzS++;
284 pzD--;
285 (*lnct_p)++;
286 }
287
288 /*
289 * ELSE IF the quote character is '"' or '`',
290 * THEN we do the full escape character processing
291 */
292 else if (q != '\'') {
293 unsigned int ct;
294 ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
295 if (ct == 0)
296 return NULL;
297
298 pzS += ct;
299 } /* if (q != '\'') */
300
301 /*
302 * OTHERWISE, we only process "\\", "\'" and "\#" sequences.
303 * The latter only to easily hide preprocessing directives.
304 */
305 else switch (*pzS) {
306 case '\\':
307 case '\'':
308 case '#':
309 pzD[-1] = *pzS++;
310 }
311 } /* switch (*(pzD++) = *(pzS++)) */
312 } /* for (;;) */
313 }
314
315 /** @}
316 *
317 * Local Variables:
318 * mode: C
319 * c-file-style: "stroustrup"
320 * indent-tabs-mode: nil
321 * End:
322 * end of autoopts/cook.c */
323