cook.c revision 1.1.1.9 1 /**
2 * \file cook.c
3 *
4 * This file contains the routines that deal with processing quoted strings
5 * into an internal format.
6 *
7 * @addtogroup autoopts
8 * @{
9 */
10 /*
11 * This file is part of AutoOpts, a companion to AutoGen.
12 * AutoOpts is free software.
13 * AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved
14 *
15 * AutoOpts is available under any one of two licenses. The license
16 * in use must be one of these two and the choice is under the control
17 * of the user of the license.
18 *
19 * The GNU Lesser General Public License, version 3 or later
20 * See the files "COPYING.lgplv3" and "COPYING.gplv3"
21 *
22 * The Modified Berkeley Software Distribution License
23 * See the file "COPYING.mbsd"
24 *
25 * These files have the following sha256 sums:
26 *
27 * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3
28 * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3
29 * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd
30 */
31
32 /* = = = START-STATIC-FORWARD = = = */
33 static bool
34 contiguous_quote(char ** pps, char * pq, int * lnct_p);
35 /* = = = END-STATIC-FORWARD = = = */
36
37 /*=export_func ao_string_cook_escape_char
38 * private:
39 *
40 * what: escape-process a string fragment
41 * arg: + char const * + pzScan + points to character after the escape +
42 * arg: + char * + pRes + Where to put the result byte +
43 * arg: + unsigned int + nl_ch + replacement char if scanned char is \n +
44 *
45 * ret-type: unsigned int
46 * ret-desc: The number of bytes consumed processing the escaped character.
47 *
48 * doc:
49 *
50 * This function converts "t" into "\t" and all your other favorite
51 * escapes, including numeric ones: hex and ocatal, too.
52 * The returned result tells the caller how far to advance the
53 * scan pointer (passed in). The default is to just pass through the
54 * escaped character and advance the scan by one.
55 *
56 * Some applications need to keep an escaped newline, others need to
57 * suppress it. This is accomplished by supplying a '\n' replacement
58 * character that is different from \n, if need be. For example, use
59 * 0x7F and never emit a 0x7F.
60 *
61 * err: @code{NULL} is returned if the string is mal-formed.
62 =*/
63 unsigned int
64 ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
65 {
66 unsigned int res = 1;
67
68 switch (*pRes = *pzIn++) {
69 case NUL: /* NUL - end of input string */
70 return 0;
71 case '\r':
72 if (*pzIn != NL)
73 return 1;
74 res++;
75 /* FALLTHROUGH */
76 case NL: /* NL - emit newline */
77 *pRes = (char)nl;
78 return res;
79
80 case 'a': *pRes = '\a'; break;
81 case 'b': *pRes = '\b'; break;
82 case 'f': *pRes = '\f'; break;
83 case 'n': *pRes = NL; break;
84 case 'r': *pRes = '\r'; break;
85 case 't': *pRes = '\t'; break;
86 case 'v': *pRes = '\v'; break;
87
88 case 'x':
89 case 'X': /* HEX Escape */
90 if (IS_HEX_DIGIT_CHAR(*pzIn)) {
91 char z[4];
92 unsigned int ct = 0;
93
94 do {
95 z[ct] = pzIn[ct];
96 if (++ct >= 2)
97 break;
98 } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
99 z[ct] = NUL;
100 *pRes = (char)strtoul(z, NULL, 16);
101 return ct + 1;
102 }
103 break;
104
105 case '0': case '1': case '2': case '3':
106 case '4': case '5': case '6': case '7':
107 {
108 /*
109 * IF the character copied was an octal digit,
110 * THEN set the output character to an octal value.
111 * The 3 octal digit result might exceed 0xFF, so check it.
112 */
113 char z[4];
114 unsigned long val;
115 unsigned int ct = 0;
116
117 z[ct++] = *--pzIn;
118 while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
119 z[ct] = pzIn[ct];
120 if (++ct >= 3)
121 break;
122 }
123
124 z[ct] = NUL;
125 val = strtoul(z, NULL, 8);
126 if (val > 0xFF)
127 val = 0xFF;
128 *pRes = (char)val;
129 return ct;
130 }
131
132 default: /* quoted character is result character */;
133 }
134
135 return res;
136 }
137
138
139 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
140 *
141 * A quoted string has been found.
142 * Find the end of it and compress any escape sequences.
143 */
144 static bool
145 contiguous_quote(char ** pps, char * pq, int * lnct_p)
146 {
147 char * ps = *pps + 1;
148
149 for (;;) {
150 while (IS_WHITESPACE_CHAR(*ps))
151 if (*(ps++) == NL)
152 (*lnct_p)++;
153
154 /*
155 * IF the next character is a quote character,
156 * THEN we will concatenate the strings.
157 */
158 switch (*ps) {
159 case '"':
160 case '\'':
161 *pq = *(ps++); /* assign new quote character and return */
162 *pps = ps;
163 return true;
164
165 case '/':
166 /*
167 * Allow for a comment embedded in the concatenated string.
168 */
169 switch (ps[1]) {
170 default:
171 *pps = NULL;
172 return false;
173
174 case '/':
175 /*
176 * Skip to end of line
177 */
178 ps = strchr(ps, NL);
179 if (ps == NULL) {
180 *pps = NULL;
181 return false;
182 }
183 break;
184
185 case '*':
186 {
187 char * p = strstr( ps+2, "*/" );
188 /*
189 * Skip to terminating star slash
190 */
191 if (p == NULL) {
192 *pps = NULL;
193 return false;
194 }
195
196 while (ps < p) {
197 if (*(ps++) == NL)
198 (*lnct_p)++;
199 }
200
201 ps = p + 2;
202 }
203 }
204 continue;
205
206 default:
207 /*
208 * The next non-whitespace character is not a quote.
209 * The series of quoted strings has come to an end.
210 */
211 *pps = ps;
212 return false;
213 }
214 }
215 }
216
217 /*=export_func ao_string_cook
218 * private:
219 *
220 * what: concatenate and escape-process strings
221 * arg: + char * + pzScan + The *MODIFIABLE* input buffer +
222 * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count +
223 *
224 * ret-type: char *
225 * ret-desc: The address of the text following the processed strings.
226 * The return value is NULL if the strings are ill-formed.
227 *
228 * doc:
229 *
230 * A series of one or more quoted strings are concatenated together.
231 * If they are quoted with double quotes (@code{"}), then backslash
232 * escapes are processed per the C programming language. If they are
233 * single quote strings, then the backslashes are honored only when they
234 * precede another backslash or a single quote character.
235 *
236 * err: @code{NULL} is returned if the string(s) is/are mal-formed.
237 =*/
238 char *
239 ao_string_cook(char * pzScan, int * lnct_p)
240 {
241 int l = 0;
242 char q = *pzScan;
243
244 /*
245 * It is a quoted string. Process the escape sequence characters
246 * (in the set "abfnrtv") and make sure we find a closing quote.
247 */
248 char * pzD = pzScan++;
249 char * pzS = pzScan;
250
251 if (lnct_p == NULL)
252 lnct_p = &l;
253
254 for (;;) {
255 /*
256 * IF the next character is the quote character, THEN we may end the
257 * string. We end it unless the next non-blank character *after* the
258 * string happens to also be a quote. If it is, then we will change
259 * our quote character to the new quote character and continue
260 * condensing text.
261 */
262 while (*pzS == q) {
263 *pzD = NUL; /* This is probably the end of the line */
264 if (! contiguous_quote(&pzS, &q, lnct_p))
265 return pzS;
266 }
267
268 /*
269 * We are inside a quoted string. Copy text.
270 */
271 switch (*(pzD++) = *(pzS++)) {
272 case NUL:
273 return NULL;
274
275 case NL:
276 (*lnct_p)++;
277 break;
278
279 case '\\':
280 /*
281 * IF we are escaping a new line,
282 * THEN drop both the escape and the newline from
283 * the result string.
284 */
285 if (*pzS == NL) {
286 pzS++;
287 pzD--;
288 (*lnct_p)++;
289 }
290
291 /*
292 * ELSE IF the quote character is '"' or '`',
293 * THEN we do the full escape character processing
294 */
295 else if (q != '\'') {
296 unsigned int ct;
297 ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
298 if (ct == 0)
299 return NULL;
300
301 pzS += ct;
302 } /* if (q != '\'') */
303
304 /*
305 * OTHERWISE, we only process "\\", "\'" and "\#" sequences.
306 * The latter only to easily hide preprocessing directives.
307 */
308 else switch (*pzS) {
309 case '\\':
310 case '\'':
311 case '#':
312 pzD[-1] = *pzS++;
313 }
314 } /* switch (*(pzD++) = *(pzS++)) */
315 } /* for (;;) */
316 }
317
318 /** @}
319 *
320 * Local Variables:
321 * mode: C
322 * c-file-style: "stroustrup"
323 * indent-tabs-mode: nil
324 * End:
325 * end of autoopts/cook.c */
326