cook.c revision 1.1.1.2 1 /* $NetBSD: cook.c,v 1.1.1.2 2012/01/31 21:27:47 kardel Exp $ */
2
3 /**
4 * \file cook.c
5 *
6 * Time-stamp: "2011-03-12 15:05:26 bkorb"
7 *
8 * This file contains the routines that deal with processing quoted strings
9 * into an internal format.
10 *
11 * This file is part of AutoOpts, a companion to AutoGen.
12 * AutoOpts is free software.
13 * AutoOpts is Copyright (c) 1992-2011 by Bruce Korb - all rights reserved
14 *
15 * AutoOpts is available under any one of two licenses. The license
16 * in use must be one of these two and the choice is under the control
17 * of the user of the license.
18 *
19 * The GNU Lesser General Public License, version 3 or later
20 * See the files "COPYING.lgplv3" and "COPYING.gplv3"
21 *
22 * The Modified Berkeley Software Distribution License
23 * See the file "COPYING.mbsd"
24 *
25 * These files have the following md5sums:
26 *
27 * 43b91e8ca915626ed3818ffb1b71248b pkg/libopts/COPYING.gplv3
28 * 06a1a2e4760c90ea5e1dad8dfaac4d39 pkg/libopts/COPYING.lgplv3
29 * 66a5cedaf62c4b2637025f049f9b826f pkg/libopts/COPYING.mbsd
30 */
31
32 /* = = = START-STATIC-FORWARD = = = */
33 static ag_bool
34 contiguous_quote(char ** pps, char * pq, int * lnct_p);
35 /* = = = END-STATIC-FORWARD = = = */
36
37 /*=export_func ao_string_cook_escape_char
38 * private:
39 *
40 * what: escape-process a string fragment
41 * arg: + char const* + pzScan + points to character after the escape +
42 * arg: + char* + pRes + Where to put the result byte +
43 * arg: + unsigned int + nl_ch + replacement char if scanned char is \n +
44 *
45 * ret-type: unsigned int
46 * ret-desc: The number of bytes consumed processing the escaped character.
47 *
48 * doc:
49 *
50 * This function converts "t" into "\t" and all your other favorite
51 * escapes, including numeric ones: hex and ocatal, too.
52 * The returned result tells the caller how far to advance the
53 * scan pointer (passed in). The default is to just pass through the
54 * escaped character and advance the scan by one.
55 *
56 * Some applications need to keep an escaped newline, others need to
57 * suppress it. This is accomplished by supplying a '\n' replacement
58 * character that is different from \n, if need be. For example, use
59 * 0x7F and never emit a 0x7F.
60 *
61 * err: @code{NULL} is returned if the string is mal-formed.
62 =*/
63 unsigned int
64 ao_string_cook_escape_char( char const* pzIn, char* pRes, u_int nl )
65 {
66 unsigned int res = 1;
67
68 switch (*pRes = *pzIn++) {
69 case NUL: /* NUL - end of input string */
70 return 0;
71 case '\r':
72 if (*pzIn != '\n')
73 return 1;
74 res++;
75 /* FALLTHROUGH */
76 case '\n': /* NL - emit newline */
77 *pRes = (char)nl;
78 return res;
79
80 case 'a': *pRes = '\a'; break;
81 case 'b': *pRes = '\b'; break;
82 case 'f': *pRes = '\f'; break;
83 case 'n': *pRes = '\n'; break;
84 case 'r': *pRes = '\r'; break;
85 case 't': *pRes = '\t'; break;
86 case 'v': *pRes = '\v'; break;
87
88 case 'x':
89 case 'X': /* HEX Escape */
90 if (IS_HEX_DIGIT_CHAR(*pzIn)) {
91 char z[4], *pz = z;
92
93 do *(pz++) = *(pzIn++);
94 while (IS_HEX_DIGIT_CHAR(*pzIn) && (pz < z + 2));
95 *pz = NUL;
96 *pRes = (unsigned char)strtoul(z, NULL, 16);
97 res += pz - z;
98 }
99 break;
100
101 case '0': case '1': case '2': case '3':
102 case '4': case '5': case '6': case '7':
103 {
104 /*
105 * IF the character copied was an octal digit,
106 * THEN set the output character to an octal value
107 */
108 char z[4], *pz = z + 1;
109 unsigned long val;
110 z[0] = *pRes;
111
112 while (IS_OCT_DIGIT_CHAR(*pzIn) && (pz < z + 3))
113 *(pz++) = *(pzIn++);
114 *pz = NUL;
115 val = strtoul(z, NULL, 8);
116 if (val > 0xFF)
117 val = 0xFF;
118 *pRes = (unsigned char)val;
119 res = pz - z;
120 break;
121 }
122
123 default: ;
124 }
125
126 return res;
127 }
128
129
130 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
131 *
132 * A quoted string has been found.
133 * Find the end of it and compress any escape sequences.
134 */
135 static ag_bool
136 contiguous_quote(char ** pps, char * pq, int * lnct_p)
137 {
138 char * ps = *pps + 1;
139
140 for (;;) {
141 while (IS_WHITESPACE_CHAR(*ps))
142 if (*(ps++) == '\n')
143 (*lnct_p)++;
144
145 /*
146 * IF the next character is a quote character,
147 * THEN we will concatenate the strings.
148 */
149 switch (*ps) {
150 case '"':
151 case '\'':
152 *pq = *(ps++); /* assign new quote character and return */
153 *pps = ps;
154 return AG_TRUE;
155
156 case '/':
157 /*
158 * Allow for a comment embedded in the concatenated string.
159 */
160 switch (ps[1]) {
161 default:
162 *pps = NULL;
163 return AG_FALSE;
164
165 case '/':
166 /*
167 * Skip to end of line
168 */
169 ps = strchr(ps, '\n');
170 if (ps == NULL) {
171 *pps = NULL;
172 return AG_FALSE;
173 }
174 break;
175
176 case '*':
177 {
178 char* p = strstr( ps+2, "*/" );
179 /*
180 * Skip to terminating star slash
181 */
182 if (p == NULL) {
183 *pps = NULL;
184 return AG_FALSE;
185 }
186
187 while (ps < p) {
188 if (*(ps++) == '\n')
189 (*lnct_p)++;
190 }
191
192 ps = p + 2;
193 }
194 }
195 continue;
196
197 default:
198 /*
199 * The next non-whitespace character is not a quote.
200 * The series of quoted strings has come to an end.
201 */
202 *pps = ps;
203 return AG_FALSE;
204 }
205 }
206 }
207
208 /*=export_func ao_string_cook
209 * private:
210 *
211 * what: concatenate and escape-process strings
212 * arg: + char* + pzScan + The *MODIFIABLE* input buffer +
213 * arg: + int* + lnct_p + The (possibly NULL) pointer to a line count +
214 *
215 * ret-type: char*
216 * ret-desc: The address of the text following the processed strings.
217 * The return value is NULL if the strings are ill-formed.
218 *
219 * doc:
220 *
221 * A series of one or more quoted strings are concatenated together.
222 * If they are quoted with double quotes (@code{"}), then backslash
223 * escapes are processed per the C programming language. If they are
224 * single quote strings, then the backslashes are honored only when they
225 * precede another backslash or a single quote character.
226 *
227 * err: @code{NULL} is returned if the string(s) is/are mal-formed.
228 =*/
229 char *
230 ao_string_cook(char * pzScan, int * lnct_p)
231 {
232 int l = 0;
233 char q = *pzScan;
234
235 /*
236 * It is a quoted string. Process the escape sequence characters
237 * (in the set "abfnrtv") and make sure we find a closing quote.
238 */
239 char* pzD = pzScan++;
240 char* pzS = pzScan;
241
242 if (lnct_p == NULL)
243 lnct_p = &l;
244
245 for (;;) {
246 /*
247 * IF the next character is the quote character, THEN we may end the
248 * string. We end it unless the next non-blank character *after* the
249 * string happens to also be a quote. If it is, then we will change
250 * our quote character to the new quote character and continue
251 * condensing text.
252 */
253 while (*pzS == q) {
254 *pzD = NUL; /* This is probably the end of the line */
255 if (! contiguous_quote(&pzS, &q, lnct_p))
256 return pzS;
257 }
258
259 /*
260 * We are inside a quoted string. Copy text.
261 */
262 switch (*(pzD++) = *(pzS++)) {
263 case NUL:
264 return NULL;
265
266 case '\n':
267 (*lnct_p)++;
268 break;
269
270 case '\\':
271 /*
272 * IF we are escaping a new line,
273 * THEN drop both the escape and the newline from
274 * the result string.
275 */
276 if (*pzS == '\n') {
277 pzS++;
278 pzD--;
279 (*lnct_p)++;
280 }
281
282 /*
283 * ELSE IF the quote character is '"' or '`',
284 * THEN we do the full escape character processing
285 */
286 else if (q != '\'') {
287 int ct = ao_string_cook_escape_char( pzS, pzD-1, (u_int)'\n' );
288 if (ct == 0)
289 return NULL;
290
291 pzS += ct;
292 } /* if (q != '\'') */
293
294 /*
295 * OTHERWISE, we only process "\\", "\'" and "\#" sequences.
296 * The latter only to easily hide preprocessing directives.
297 */
298 else switch (*pzS) {
299 case '\\':
300 case '\'':
301 case '#':
302 pzD[-1] = *pzS++;
303 }
304 } /* switch (*(pzD++) = *(pzS++)) */
305 } /* for (;;) */
306 }
307 /*
308 * Local Variables:
309 * mode: C
310 * c-file-style: "stroustrup"
311 * indent-tabs-mode: nil
312 * End:
313 * end of autoopts/cook.c */
314