lib.c revision 1.15 1 1.1 christos /****************************************************************
2 1.1 christos Copyright (C) Lucent Technologies 1997
3 1.1 christos All Rights Reserved
4 1.1 christos
5 1.1 christos Permission to use, copy, modify, and distribute this software and
6 1.1 christos its documentation for any purpose and without fee is hereby
7 1.1 christos granted, provided that the above copyright notice appear in all
8 1.1 christos copies and that both that the copyright notice and this
9 1.1 christos permission notice and warranty disclaimer appear in supporting
10 1.1 christos documentation, and that the name Lucent Technologies or any of
11 1.1 christos its entities not be used in advertising or publicity pertaining
12 1.1 christos to distribution of the software without specific, written prior
13 1.1 christos permission.
14 1.1 christos
15 1.1 christos LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 1.1 christos INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 1.1 christos IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 1.1 christos SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 1.1 christos WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 1.1 christos IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 1.1 christos ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 1.1 christos THIS SOFTWARE.
23 1.1 christos ****************************************************************/
24 1.1 christos
25 1.2 christos #if HAVE_NBTOOL_CONFIG_H
26 1.2 christos #include "nbtool_config.h"
27 1.2 christos #endif
28 1.2 christos
29 1.1 christos #define DEBUG
30 1.1 christos #include <stdio.h>
31 1.1 christos #include <string.h>
32 1.15 christos #include <strings.h>
33 1.1 christos #include <ctype.h>
34 1.1 christos #include <errno.h>
35 1.1 christos #include <stdlib.h>
36 1.1 christos #include <stdarg.h>
37 1.10 christos #include <limits.h>
38 1.15 christos #include <math.h>
39 1.1 christos #include "awk.h"
40 1.2 christos #include "awkgram.h"
41 1.1 christos
42 1.15 christos extern int u8_nextlen(const char *s);
43 1.15 christos
44 1.2 christos char EMPTY[] = { '\0' };
45 1.1 christos FILE *infile = NULL;
46 1.11 christos bool innew; /* true = infile has not been read by readrec */
47 1.2 christos char *file = EMPTY;
48 1.10 christos char *record;
49 1.1 christos int recsize = RECSIZE;
50 1.1 christos char *fields;
51 1.1 christos int fieldssize = RECSIZE;
52 1.1 christos
53 1.1 christos Cell **fldtab; /* pointers to Cells */
54 1.2 christos static size_t len_inputFS = 0;
55 1.12 ad static char *inputFS; /* FS at time of input, for field splitting */
56 1.1 christos
57 1.1 christos #define MAXFLD 2
58 1.1 christos int nfields = MAXFLD; /* last allocated slot for $i */
59 1.1 christos
60 1.10 christos bool donefld; /* true = implies rec broken into fields */
61 1.10 christos bool donerec; /* true = record is valid (no flds have changed) */
62 1.1 christos
63 1.1 christos int lastfld = 0; /* last used field */
64 1.1 christos int argno = 1; /* current input argument number */
65 1.1 christos extern Awkfloat *ARGC;
66 1.1 christos
67 1.10 christos static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
68 1.10 christos static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
69 1.1 christos
70 1.1 christos void recinit(unsigned int n)
71 1.1 christos {
72 1.15 christos if ( (record = (char *) malloc(n)) == NULL
73 1.15 christos || (fields = (char *) malloc(n+1)) == NULL
74 1.15 christos || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
75 1.15 christos || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
76 1.1 christos FATAL("out of space for $0 and fields");
77 1.10 christos *record = '\0';
78 1.1 christos *fldtab[0] = dollar0;
79 1.1 christos fldtab[0]->sval = record;
80 1.1 christos fldtab[0]->nval = tostring("0");
81 1.1 christos makefields(1, nfields);
82 1.12 ad inputFS = strdup("");
83 1.1 christos }
84 1.1 christos
85 1.1 christos void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
86 1.1 christos {
87 1.1 christos char temp[50];
88 1.1 christos int i;
89 1.1 christos
90 1.1 christos for (i = n1; i <= n2; i++) {
91 1.15 christos fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
92 1.1 christos if (fldtab[i] == NULL)
93 1.1 christos FATAL("out of space in makefields %d", i);
94 1.1 christos *fldtab[i] = dollar1;
95 1.2 christos snprintf(temp, sizeof(temp), "%d", i);
96 1.1 christos fldtab[i]->nval = tostring(temp);
97 1.1 christos }
98 1.1 christos }
99 1.1 christos
100 1.1 christos void initgetrec(void)
101 1.1 christos {
102 1.1 christos int i;
103 1.1 christos char *p;
104 1.1 christos
105 1.1 christos for (i = 1; i < *ARGC; i++) {
106 1.6 christos p = getargv(i); /* find 1st real filename */
107 1.6 christos if (p == NULL || *p == '\0') { /* deleted or zapped */
108 1.6 christos argno++;
109 1.6 christos continue;
110 1.6 christos }
111 1.6 christos if (!isclvar(p)) {
112 1.6 christos setsval(lookup("FILENAME", symtab), p);
113 1.1 christos return;
114 1.1 christos }
115 1.1 christos setclvar(p); /* a commandline assignment before filename */
116 1.1 christos argno++;
117 1.1 christos }
118 1.1 christos infile = stdin; /* no filenames, so use stdin */
119 1.11 christos innew = true;
120 1.1 christos }
121 1.1 christos
122 1.10 christos /*
123 1.10 christos * POSIX specifies that fields are supposed to be evaluated as if they were
124 1.10 christos * split using the value of FS at the time that the record's value ($0) was
125 1.10 christos * read.
126 1.10 christos *
127 1.10 christos * Since field-splitting is done lazily, we save the current value of FS
128 1.10 christos * whenever a new record is read in (implicitly or via getline), or when
129 1.10 christos * a new value is assigned to $0.
130 1.10 christos */
131 1.10 christos void savefs(void)
132 1.10 christos {
133 1.10 christos size_t len;
134 1.10 christos if ((len = strlen(getsval(fsloc))) < len_inputFS) {
135 1.10 christos strcpy(inputFS, *FS); /* for subsequent field splitting */
136 1.10 christos return;
137 1.10 christos }
138 1.10 christos
139 1.10 christos len_inputFS = len + 1;
140 1.15 christos inputFS = (char *) realloc(inputFS, len_inputFS);
141 1.10 christos if (inputFS == NULL)
142 1.10 christos FATAL("field separator %.10s... is too long", *FS);
143 1.10 christos memcpy(inputFS, *FS, len_inputFS);
144 1.10 christos }
145 1.10 christos
146 1.10 christos static bool firsttime = true;
147 1.1 christos
148 1.10 christos int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record */
149 1.1 christos { /* note: cares whether buf == record */
150 1.1 christos int c;
151 1.10 christos char *buf = *pbuf;
152 1.1 christos uschar saveb0;
153 1.1 christos int bufsize = *pbufsize, savebufsize = bufsize;
154 1.1 christos
155 1.1 christos if (firsttime) {
156 1.10 christos firsttime = false;
157 1.1 christos initgetrec();
158 1.1 christos }
159 1.15 christos DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
160 1.15 christos *RS, *FS, *ARGC, *FILENAME);
161 1.1 christos saveb0 = buf[0];
162 1.1 christos buf[0] = 0;
163 1.1 christos while (argno < *ARGC || infile == stdin) {
164 1.15 christos DPRINTF("argno=%d, file=|%s|\n", argno, file);
165 1.1 christos if (infile == NULL) { /* have to open a new file */
166 1.1 christos file = getargv(argno);
167 1.6 christos if (file == NULL || *file == '\0') { /* deleted or zapped */
168 1.1 christos argno++;
169 1.1 christos continue;
170 1.1 christos }
171 1.1 christos if (isclvar(file)) { /* a var=value arg */
172 1.1 christos setclvar(file);
173 1.1 christos argno++;
174 1.1 christos continue;
175 1.1 christos }
176 1.1 christos *FILENAME = file;
177 1.15 christos DPRINTF("opening file %s\n", file);
178 1.1 christos if (*file == '-' && *(file+1) == '\0')
179 1.1 christos infile = stdin;
180 1.1 christos else if ((infile = fopen(file, "r")) == NULL)
181 1.1 christos FATAL("can't open file %s", file);
182 1.15 christos innew = true;
183 1.1 christos setfval(fnrloc, 0.0);
184 1.1 christos }
185 1.11 christos c = readrec(&buf, &bufsize, infile, innew);
186 1.11 christos if (innew)
187 1.11 christos innew = false;
188 1.1 christos if (c != 0 || buf[0] != '\0') { /* normal record */
189 1.1 christos if (isrecord) {
190 1.1 christos if (freeable(fldtab[0]))
191 1.1 christos xfree(fldtab[0]->sval);
192 1.1 christos fldtab[0]->sval = buf; /* buf == record */
193 1.1 christos fldtab[0]->tval = REC | STR | DONTFREE;
194 1.13 christos check_number(fldtab[0]);
195 1.15 christos donefld = false;
196 1.15 christos donerec = true;
197 1.15 christos savefs();
198 1.1 christos }
199 1.1 christos setfval(nrloc, nrloc->fval+1);
200 1.1 christos setfval(fnrloc, fnrloc->fval+1);
201 1.1 christos *pbuf = buf;
202 1.1 christos *pbufsize = bufsize;
203 1.1 christos return 1;
204 1.1 christos }
205 1.1 christos /* EOF arrived on this file; set up next */
206 1.1 christos if (infile != stdin)
207 1.1 christos fclose(infile);
208 1.1 christos infile = NULL;
209 1.1 christos argno++;
210 1.1 christos }
211 1.1 christos buf[0] = saveb0;
212 1.1 christos *pbuf = buf;
213 1.1 christos *pbufsize = savebufsize;
214 1.1 christos return 0; /* true end of file */
215 1.1 christos }
216 1.1 christos
217 1.1 christos void nextfile(void)
218 1.1 christos {
219 1.1 christos if (infile != NULL && infile != stdin)
220 1.1 christos fclose(infile);
221 1.1 christos infile = NULL;
222 1.1 christos argno++;
223 1.1 christos }
224 1.1 christos
225 1.15 christos extern int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag);
226 1.15 christos
227 1.11 christos int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
228 1.1 christos {
229 1.15 christos int sep, c, isrec; // POTENTIAL BUG? isrec is a macro in awk.h
230 1.15 christos char *rr = *pbuf, *buf = *pbuf;
231 1.1 christos int bufsize = *pbufsize;
232 1.10 christos char *rs = getsval(rsloc);
233 1.10 christos
234 1.15 christos if (CSV) {
235 1.15 christos c = readcsvrec(&buf, &bufsize, inf, newflag);
236 1.15 christos isrec = (c == EOF && rr == buf) ? false : true;
237 1.15 christos } else if (*rs && rs[1]) {
238 1.10 christos bool found;
239 1.1 christos
240 1.15 christos memset(buf, 0, bufsize);
241 1.10 christos fa *pfa = makedfa(rs, 1);
242 1.11 christos if (newflag)
243 1.11 christos found = fnematch(pfa, inf, &buf, &bufsize, recsize);
244 1.11 christos else {
245 1.11 christos int tempstat = pfa->initstat;
246 1.11 christos pfa->initstat = 2;
247 1.11 christos found = fnematch(pfa, inf, &buf, &bufsize, recsize);
248 1.11 christos pfa->initstat = tempstat;
249 1.11 christos }
250 1.5 christos if (found)
251 1.10 christos setptr(patbeg, '\0');
252 1.14 christos isrec = found != 0 || *buf != '\0';
253 1.15 christos
254 1.2 christos } else {
255 1.10 christos if ((sep = *rs) == 0) {
256 1.5 christos sep = '\n';
257 1.5 christos while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
258 1.5 christos ;
259 1.5 christos if (c != EOF)
260 1.5 christos ungetc(c, inf);
261 1.5 christos }
262 1.2 christos for (rr = buf; ; ) {
263 1.2 christos for (; (c=getc(inf)) != sep && c != EOF; ) {
264 1.2 christos if (rr-buf+1 > bufsize)
265 1.2 christos if (!adjbuf(&buf, &bufsize, 1+rr-buf,
266 1.2 christos recsize, &rr, "readrec 1"))
267 1.10 christos FATAL("input record `%.30s...' too long", buf);
268 1.2 christos *rr++ = c;
269 1.2 christos }
270 1.10 christos if (*rs == sep || c == EOF)
271 1.2 christos break;
272 1.10 christos if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
273 1.2 christos break;
274 1.2 christos if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
275 1.2 christos "readrec 2"))
276 1.2 christos FATAL("input record `%.30s...' too long", buf);
277 1.2 christos *rr++ = '\n';
278 1.1 christos *rr++ = c;
279 1.1 christos }
280 1.5 christos if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
281 1.5 christos FATAL("input record `%.30s...' too long", buf);
282 1.5 christos *rr = 0;
283 1.14 christos isrec = c != EOF || rr != buf;
284 1.1 christos }
285 1.1 christos *pbuf = buf;
286 1.1 christos *pbufsize = bufsize;
287 1.15 christos DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
288 1.5 christos return isrec;
289 1.1 christos }
290 1.1 christos
291 1.15 christos
292 1.15 christos /*******************
293 1.15 christos * loose ends here:
294 1.15 christos * \r\n should become \n
295 1.15 christos * what about bare \r? Excel uses that for embedded newlines
296 1.15 christos * can't have "" in unquoted fields, according to RFC 4180
297 1.15 christos */
298 1.15 christos
299 1.15 christos
300 1.15 christos int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */
301 1.15 christos { /* so read a complete record that might be multiple lines */
302 1.15 christos int sep, c;
303 1.15 christos char *rr = *pbuf, *buf = *pbuf;
304 1.15 christos int bufsize = *pbufsize;
305 1.15 christos bool in_quote = false;
306 1.15 christos
307 1.15 christos sep = '\n'; /* the only separator; have to skip over \n embedded in "..." */
308 1.15 christos rr = buf;
309 1.15 christos while ((c = getc(inf)) != EOF) {
310 1.15 christos if (c == sep) {
311 1.15 christos if (! in_quote)
312 1.15 christos break;
313 1.15 christos if (rr > buf && rr[-1] == '\r') // remove \r if was \r\n
314 1.15 christos rr--;
315 1.15 christos }
316 1.15 christos
317 1.15 christos if (rr-buf+1 > bufsize)
318 1.15 christos if (!adjbuf(&buf, &bufsize, 1+rr-buf,
319 1.15 christos recsize, &rr, "readcsvrec 1"))
320 1.15 christos FATAL("input record `%.30s...' too long", buf);
321 1.15 christos *rr++ = c;
322 1.15 christos if (c == '"')
323 1.15 christos in_quote = ! in_quote;
324 1.15 christos }
325 1.15 christos if (c == '\n' && rr > buf && rr[-1] == '\r') // remove \r if was \r\n
326 1.15 christos rr--;
327 1.15 christos
328 1.15 christos if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))
329 1.15 christos FATAL("input record `%.30s...' too long", buf);
330 1.15 christos *rr = 0;
331 1.15 christos *pbuf = buf;
332 1.15 christos *pbufsize = bufsize;
333 1.15 christos DPRINTF("readcsvrec saw <%s>, returns %d\n", buf, c);
334 1.15 christos return c;
335 1.15 christos }
336 1.15 christos
337 1.1 christos char *getargv(int n) /* get ARGV[n] */
338 1.1 christos {
339 1.15 christos Array *ap;
340 1.1 christos Cell *x;
341 1.1 christos char *s, temp[50];
342 1.15 christos extern Cell *ARGVcell;
343 1.1 christos
344 1.15 christos ap = (Array *)ARGVcell->sval;
345 1.2 christos snprintf(temp, sizeof(temp), "%d", n);
346 1.15 christos if (lookup(temp, ap) == NULL)
347 1.6 christos return NULL;
348 1.15 christos x = setsymtab(temp, "", 0.0, STR, ap);
349 1.1 christos s = getsval(x);
350 1.15 christos DPRINTF("getargv(%d) returns |%s|\n", n, s);
351 1.1 christos return s;
352 1.1 christos }
353 1.1 christos
354 1.1 christos void setclvar(char *s) /* set var=value from s */
355 1.1 christos {
356 1.15 christos char *e, *p;
357 1.1 christos Cell *q;
358 1.1 christos
359 1.15 christos /* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
360 1.15 christos /* I don't understand why it was changed. */
361 1.15 christos
362 1.1 christos for (p=s; *p != '='; p++)
363 1.1 christos ;
364 1.15 christos e = p;
365 1.1 christos *p++ = 0;
366 1.1 christos p = qstring(p, '\0');
367 1.1 christos q = setsymtab(s, p, 0.0, STR, symtab);
368 1.1 christos setsval(q, p);
369 1.13 christos check_number(q);
370 1.15 christos DPRINTF("command line set %s to |%s|\n", s, p);
371 1.15 christos free(p);
372 1.15 christos *e = '=';
373 1.1 christos }
374 1.1 christos
375 1.1 christos
376 1.1 christos void fldbld(void) /* create fields from current record */
377 1.1 christos {
378 1.1 christos /* this relies on having fields[] the same length as $0 */
379 1.1 christos /* the fields are all stored in this one array with \0's */
380 1.6 christos /* possibly with a final trailing \0 not associated with any field */
381 1.1 christos char *r, *fr, sep;
382 1.1 christos Cell *p;
383 1.1 christos int i, j, n;
384 1.1 christos
385 1.1 christos if (donefld)
386 1.1 christos return;
387 1.1 christos if (!isstr(fldtab[0]))
388 1.1 christos getsval(fldtab[0]);
389 1.1 christos r = fldtab[0]->sval;
390 1.1 christos n = strlen(r);
391 1.1 christos if (n > fieldssize) {
392 1.1 christos xfree(fields);
393 1.15 christos if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
394 1.1 christos FATAL("out of space for fields in fldbld %d", n);
395 1.1 christos fieldssize = n;
396 1.1 christos }
397 1.1 christos fr = fields;
398 1.1 christos i = 0; /* number of fields accumulated here */
399 1.15 christos if (inputFS == NULL) /* make sure we have a copy of FS */
400 1.15 christos savefs();
401 1.15 christos if (!CSV && strlen(inputFS) > 1) { /* it's a regular expression */
402 1.1 christos i = refldbld(r, inputFS);
403 1.15 christos } else if (!CSV && (sep = *inputFS) == ' ') { /* default whitespace */
404 1.1 christos for (i = 0; ; ) {
405 1.1 christos while (*r == ' ' || *r == '\t' || *r == '\n')
406 1.1 christos r++;
407 1.1 christos if (*r == 0)
408 1.1 christos break;
409 1.1 christos i++;
410 1.1 christos if (i > nfields)
411 1.1 christos growfldtab(i);
412 1.1 christos if (freeable(fldtab[i]))
413 1.1 christos xfree(fldtab[i]->sval);
414 1.1 christos fldtab[i]->sval = fr;
415 1.1 christos fldtab[i]->tval = FLD | STR | DONTFREE;
416 1.1 christos do
417 1.1 christos *fr++ = *r++;
418 1.1 christos while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
419 1.1 christos *fr++ = 0;
420 1.1 christos }
421 1.1 christos *fr = 0;
422 1.15 christos } else if (CSV) { /* CSV processing. no error handling */
423 1.15 christos if (*r != 0) {
424 1.15 christos for (;;) {
425 1.15 christos i++;
426 1.15 christos if (i > nfields)
427 1.15 christos growfldtab(i);
428 1.15 christos if (freeable(fldtab[i]))
429 1.15 christos xfree(fldtab[i]->sval);
430 1.15 christos fldtab[i]->sval = fr;
431 1.15 christos fldtab[i]->tval = FLD | STR | DONTFREE;
432 1.15 christos if (*r == '"' ) { /* start of "..." */
433 1.15 christos for (r++ ; *r != '\0'; ) {
434 1.15 christos if (*r == '"' && r[1] != '\0' && r[1] == '"') {
435 1.15 christos r += 2; /* doubled quote */
436 1.15 christos *fr++ = '"';
437 1.15 christos } else if (*r == '"' && (r[1] == '\0' || r[1] == ',')) {
438 1.15 christos r++; /* skip over closing quote */
439 1.15 christos break;
440 1.15 christos } else {
441 1.15 christos *fr++ = *r++;
442 1.15 christos }
443 1.15 christos }
444 1.15 christos *fr++ = 0;
445 1.15 christos } else { /* unquoted field */
446 1.15 christos while (*r != ',' && *r != '\0')
447 1.15 christos *fr++ = *r++;
448 1.15 christos *fr++ = 0;
449 1.15 christos }
450 1.15 christos if (*r++ == 0)
451 1.15 christos break;
452 1.15 christos
453 1.15 christos }
454 1.15 christos }
455 1.15 christos *fr = 0;
456 1.15 christos } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
457 1.15 christos for (i = 0; *r != '\0'; ) {
458 1.15 christos char buf[10];
459 1.1 christos i++;
460 1.1 christos if (i > nfields)
461 1.1 christos growfldtab(i);
462 1.1 christos if (freeable(fldtab[i]))
463 1.1 christos xfree(fldtab[i]->sval);
464 1.15 christos n = u8_nextlen(r);
465 1.15 christos for (j = 0; j < n; j++)
466 1.15 christos buf[j] = *r++;
467 1.15 christos buf[j] = '\0';
468 1.1 christos fldtab[i]->sval = tostring(buf);
469 1.1 christos fldtab[i]->tval = FLD | STR;
470 1.1 christos }
471 1.1 christos *fr = 0;
472 1.1 christos } else if (*r != 0) { /* if 0, it's a null field */
473 1.15 christos /* subtle case: if length(FS) == 1 && length(RS > 0)
474 1.1 christos * \n is NOT a field separator (cf awk book 61,84).
475 1.1 christos * this variable is tested in the inner while loop.
476 1.1 christos */
477 1.1 christos int rtest = '\n'; /* normal case */
478 1.1 christos if (strlen(*RS) > 0)
479 1.1 christos rtest = '\0';
480 1.1 christos for (;;) {
481 1.1 christos i++;
482 1.1 christos if (i > nfields)
483 1.1 christos growfldtab(i);
484 1.1 christos if (freeable(fldtab[i]))
485 1.1 christos xfree(fldtab[i]->sval);
486 1.1 christos fldtab[i]->sval = fr;
487 1.1 christos fldtab[i]->tval = FLD | STR | DONTFREE;
488 1.1 christos while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */
489 1.1 christos *fr++ = *r++;
490 1.1 christos *fr++ = 0;
491 1.1 christos if (*r++ == 0)
492 1.1 christos break;
493 1.1 christos }
494 1.1 christos *fr = 0;
495 1.1 christos }
496 1.1 christos if (i > nfields)
497 1.1 christos FATAL("record `%.30s...' has too many fields; can't happen", r);
498 1.1 christos cleanfld(i+1, lastfld); /* clean out junk from previous record */
499 1.1 christos lastfld = i;
500 1.10 christos donefld = true;
501 1.1 christos for (j = 1; j <= lastfld; j++) {
502 1.1 christos p = fldtab[j];
503 1.13 christos check_number(p);
504 1.1 christos }
505 1.1 christos setfval(nfloc, (Awkfloat) lastfld);
506 1.10 christos donerec = true; /* restore */
507 1.1 christos if (dbg) {
508 1.1 christos for (j = 0; j <= lastfld; j++) {
509 1.1 christos p = fldtab[j];
510 1.1 christos printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
511 1.1 christos }
512 1.1 christos }
513 1.1 christos }
514 1.1 christos
515 1.1 christos void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */
516 1.1 christos { /* nvals remain intact */
517 1.1 christos Cell *p;
518 1.1 christos int i;
519 1.1 christos
520 1.1 christos for (i = n1; i <= n2; i++) {
521 1.1 christos p = fldtab[i];
522 1.1 christos if (freeable(p))
523 1.1 christos xfree(p->sval);
524 1.10 christos p->sval = EMPTY,
525 1.1 christos p->tval = FLD | STR | DONTFREE;
526 1.1 christos }
527 1.1 christos }
528 1.1 christos
529 1.1 christos void newfld(int n) /* add field n after end of existing lastfld */
530 1.1 christos {
531 1.1 christos if (n > nfields)
532 1.1 christos growfldtab(n);
533 1.1 christos cleanfld(lastfld+1, n);
534 1.1 christos lastfld = n;
535 1.1 christos setfval(nfloc, (Awkfloat) n);
536 1.1 christos }
537 1.1 christos
538 1.3 christos void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
539 1.3 christos {
540 1.10 christos if (n < 0)
541 1.10 christos FATAL("cannot set NF to a negative value");
542 1.3 christos if (n > nfields)
543 1.3 christos growfldtab(n);
544 1.3 christos
545 1.3 christos if (lastfld < n)
546 1.3 christos cleanfld(lastfld+1, n);
547 1.3 christos else
548 1.3 christos cleanfld(n+1, lastfld);
549 1.3 christos
550 1.3 christos lastfld = n;
551 1.3 christos }
552 1.3 christos
553 1.1 christos Cell *fieldadr(int n) /* get nth field */
554 1.1 christos {
555 1.1 christos if (n < 0)
556 1.1 christos FATAL("trying to access out of range field %d", n);
557 1.1 christos if (n > nfields) /* fields after NF are empty */
558 1.1 christos growfldtab(n); /* but does not increase NF */
559 1.1 christos return(fldtab[n]);
560 1.1 christos }
561 1.1 christos
562 1.1 christos void growfldtab(int n) /* make new fields up to at least $n */
563 1.1 christos {
564 1.1 christos int nf = 2 * nfields;
565 1.1 christos size_t s;
566 1.1 christos
567 1.1 christos if (n > nf)
568 1.1 christos nf = n;
569 1.1 christos s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */
570 1.2 christos if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
571 1.2 christos fldtab = realloc(fldtab, s);
572 1.1 christos else /* overflow sizeof int */
573 1.1 christos xfree(fldtab); /* make it null */
574 1.1 christos if (fldtab == NULL)
575 1.1 christos FATAL("out of space creating %d fields", nf);
576 1.1 christos makefields(nfields+1, nf);
577 1.1 christos nfields = nf;
578 1.1 christos }
579 1.1 christos
580 1.1 christos int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */
581 1.1 christos {
582 1.1 christos /* this relies on having fields[] the same length as $0 */
583 1.1 christos /* the fields are all stored in this one array with \0's */
584 1.1 christos char *fr;
585 1.1 christos int i, tempstat, n;
586 1.1 christos fa *pfa;
587 1.1 christos
588 1.1 christos n = strlen(rec);
589 1.1 christos if (n > fieldssize) {
590 1.1 christos xfree(fields);
591 1.15 christos if ((fields = (char *) malloc(n+1)) == NULL)
592 1.1 christos FATAL("out of space for fields in refldbld %d", n);
593 1.1 christos fieldssize = n;
594 1.1 christos }
595 1.1 christos fr = fields;
596 1.1 christos *fr = '\0';
597 1.1 christos if (*rec == '\0')
598 1.1 christos return 0;
599 1.1 christos pfa = makedfa(fs, 1);
600 1.15 christos DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
601 1.1 christos tempstat = pfa->initstat;
602 1.1 christos for (i = 1; ; i++) {
603 1.1 christos if (i > nfields)
604 1.1 christos growfldtab(i);
605 1.1 christos if (freeable(fldtab[i]))
606 1.1 christos xfree(fldtab[i]->sval);
607 1.1 christos fldtab[i]->tval = FLD | STR | DONTFREE;
608 1.1 christos fldtab[i]->sval = fr;
609 1.15 christos DPRINTF("refldbld: i=%d\n", i);
610 1.1 christos if (nematch(pfa, rec)) {
611 1.1 christos pfa->initstat = 2; /* horrible coupling to b.c */
612 1.15 christos DPRINTF("match %s (%d chars)\n", patbeg, patlen);
613 1.10 christos strncpy(fr, rec, patbeg-rec);
614 1.10 christos fr += patbeg - rec + 1;
615 1.1 christos *(fr-1) = '\0';
616 1.1 christos rec = patbeg + patlen;
617 1.1 christos } else {
618 1.15 christos DPRINTF("no match %s\n", rec);
619 1.1 christos strcpy(fr, rec);
620 1.1 christos pfa->initstat = tempstat;
621 1.1 christos break;
622 1.1 christos }
623 1.1 christos }
624 1.9 christos return i;
625 1.1 christos }
626 1.1 christos
627 1.1 christos void recbld(void) /* create $0 from $1..$NF if necessary */
628 1.1 christos {
629 1.1 christos int i;
630 1.10 christos char *r, *p;
631 1.10 christos char *sep = getsval(ofsloc);
632 1.1 christos
633 1.10 christos if (donerec)
634 1.1 christos return;
635 1.1 christos r = record;
636 1.1 christos for (i = 1; i <= *NF; i++) {
637 1.1 christos p = getsval(fldtab[i]);
638 1.1 christos if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
639 1.1 christos FATAL("created $0 `%.30s...' too long", record);
640 1.1 christos while ((*r = *p++) != 0)
641 1.1 christos r++;
642 1.1 christos if (i < *NF) {
643 1.10 christos if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
644 1.1 christos FATAL("created $0 `%.30s...' too long", record);
645 1.10 christos for (p = sep; (*r = *p++) != 0; )
646 1.1 christos r++;
647 1.1 christos }
648 1.1 christos }
649 1.1 christos if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
650 1.1 christos FATAL("built giant record `%.30s...'", record);
651 1.1 christos *r = '\0';
652 1.15 christos DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
653 1.1 christos
654 1.1 christos if (freeable(fldtab[0]))
655 1.1 christos xfree(fldtab[0]->sval);
656 1.1 christos fldtab[0]->tval = REC | STR | DONTFREE;
657 1.1 christos fldtab[0]->sval = record;
658 1.1 christos
659 1.15 christos DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
660 1.15 christos DPRINTF("recbld = |%s|\n", record);
661 1.10 christos donerec = true;
662 1.1 christos }
663 1.1 christos
664 1.1 christos int errorflag = 0;
665 1.1 christos
666 1.1 christos void yyerror(const char *s)
667 1.1 christos {
668 1.1 christos SYNTAX("%s", s);
669 1.1 christos }
670 1.1 christos
671 1.1 christos void SYNTAX(const char *fmt, ...)
672 1.1 christos {
673 1.1 christos extern char *cmdname, *curfname;
674 1.1 christos static int been_here = 0;
675 1.1 christos va_list varg;
676 1.1 christos
677 1.1 christos if (been_here++ > 2)
678 1.1 christos return;
679 1.1 christos fprintf(stderr, "%s: ", cmdname);
680 1.1 christos va_start(varg, fmt);
681 1.1 christos vfprintf(stderr, fmt, varg);
682 1.1 christos va_end(varg);
683 1.1 christos fprintf(stderr, " at source line %d", lineno);
684 1.1 christos if (curfname != NULL)
685 1.1 christos fprintf(stderr, " in function %s", curfname);
686 1.10 christos if (compile_time == COMPILING && cursource() != NULL)
687 1.1 christos fprintf(stderr, " source file %s", cursource());
688 1.1 christos fprintf(stderr, "\n");
689 1.1 christos errorflag = 2;
690 1.1 christos eprint();
691 1.1 christos }
692 1.1 christos
693 1.1 christos extern int bracecnt, brackcnt, parencnt;
694 1.1 christos
695 1.1 christos void bracecheck(void)
696 1.1 christos {
697 1.1 christos int c;
698 1.1 christos static int beenhere = 0;
699 1.1 christos
700 1.1 christos if (beenhere++)
701 1.1 christos return;
702 1.1 christos while ((c = input()) != EOF && c != '\0')
703 1.1 christos bclass(c);
704 1.1 christos bcheck2(bracecnt, '{', '}');
705 1.1 christos bcheck2(brackcnt, '[', ']');
706 1.1 christos bcheck2(parencnt, '(', ')');
707 1.1 christos }
708 1.1 christos
709 1.1 christos void bcheck2(int n, int c1, int c2)
710 1.1 christos {
711 1.1 christos if (n == 1)
712 1.1 christos fprintf(stderr, "\tmissing %c\n", c2);
713 1.1 christos else if (n > 1)
714 1.1 christos fprintf(stderr, "\t%d missing %c's\n", n, c2);
715 1.1 christos else if (n == -1)
716 1.1 christos fprintf(stderr, "\textra %c\n", c2);
717 1.1 christos else if (n < -1)
718 1.1 christos fprintf(stderr, "\t%d extra %c's\n", -n, c2);
719 1.1 christos }
720 1.1 christos
721 1.1 christos void FATAL(const char *fmt, ...)
722 1.1 christos {
723 1.1 christos extern char *cmdname;
724 1.1 christos va_list varg;
725 1.1 christos
726 1.1 christos fflush(stdout);
727 1.1 christos fprintf(stderr, "%s: ", cmdname);
728 1.1 christos va_start(varg, fmt);
729 1.1 christos vfprintf(stderr, fmt, varg);
730 1.1 christos va_end(varg);
731 1.1 christos error();
732 1.1 christos if (dbg > 1) /* core dump if serious debugging on */
733 1.1 christos abort();
734 1.1 christos exit(2);
735 1.1 christos }
736 1.1 christos
737 1.1 christos void WARNING(const char *fmt, ...)
738 1.1 christos {
739 1.1 christos extern char *cmdname;
740 1.1 christos va_list varg;
741 1.1 christos
742 1.1 christos fflush(stdout);
743 1.1 christos fprintf(stderr, "%s: ", cmdname);
744 1.1 christos va_start(varg, fmt);
745 1.1 christos vfprintf(stderr, fmt, varg);
746 1.1 christos va_end(varg);
747 1.1 christos error();
748 1.1 christos }
749 1.1 christos
750 1.1 christos void error()
751 1.1 christos {
752 1.1 christos extern Node *curnode;
753 1.1 christos
754 1.1 christos fprintf(stderr, "\n");
755 1.10 christos if (compile_time != ERROR_PRINTING) {
756 1.10 christos if (NR && *NR > 0) {
757 1.10 christos fprintf(stderr, " input record number %d", (int) (*FNR));
758 1.10 christos if (strcmp(*FILENAME, "-") != 0)
759 1.10 christos fprintf(stderr, ", file %s", *FILENAME);
760 1.10 christos fprintf(stderr, "\n");
761 1.10 christos }
762 1.10 christos if (curnode)
763 1.10 christos fprintf(stderr, " source line number %d", curnode->lineno);
764 1.10 christos else if (lineno)
765 1.10 christos fprintf(stderr, " source line number %d", lineno);
766 1.15 christos if (compile_time == COMPILING && cursource() != NULL)
767 1.15 christos fprintf(stderr, " source file %s", cursource());
768 1.15 christos fprintf(stderr, "\n");
769 1.15 christos eprint();
770 1.10 christos }
771 1.1 christos }
772 1.1 christos
773 1.1 christos void eprint(void) /* try to print context around error */
774 1.1 christos {
775 1.1 christos char *p, *q;
776 1.10 christos int c;
777 1.1 christos static int been_here = 0;
778 1.1 christos extern char ebuf[], *ep;
779 1.1 christos
780 1.10 christos if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
781 1.10 christos return;
782 1.10 christos if (ebuf == ep)
783 1.1 christos return;
784 1.1 christos p = ep - 1;
785 1.1 christos if (p > ebuf && *p == '\n')
786 1.1 christos p--;
787 1.1 christos for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
788 1.1 christos ;
789 1.1 christos while (*p == '\n')
790 1.1 christos p++;
791 1.1 christos fprintf(stderr, " context is\n\t");
792 1.1 christos for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
793 1.1 christos ;
794 1.1 christos for ( ; p < q; p++)
795 1.1 christos if (*p)
796 1.1 christos putc(*p, stderr);
797 1.1 christos fprintf(stderr, " >>> ");
798 1.1 christos for ( ; p < ep; p++)
799 1.1 christos if (*p)
800 1.1 christos putc(*p, stderr);
801 1.1 christos fprintf(stderr, " <<< ");
802 1.10 christos if (*ep)
803 1.1 christos while ((c = input()) != '\n' && c != '\0' && c != EOF) {
804 1.1 christos putc(c, stderr);
805 1.1 christos bclass(c);
806 1.1 christos }
807 1.1 christos putc('\n', stderr);
808 1.1 christos ep = ebuf;
809 1.1 christos }
810 1.1 christos
811 1.1 christos void bclass(int c)
812 1.1 christos {
813 1.1 christos switch (c) {
814 1.1 christos case '{': bracecnt++; break;
815 1.1 christos case '}': bracecnt--; break;
816 1.1 christos case '[': brackcnt++; break;
817 1.1 christos case ']': brackcnt--; break;
818 1.1 christos case '(': parencnt++; break;
819 1.1 christos case ')': parencnt--; break;
820 1.1 christos }
821 1.1 christos }
822 1.1 christos
823 1.1 christos double errcheck(double x, const char *s)
824 1.1 christos {
825 1.1 christos
826 1.1 christos if (errno == EDOM) {
827 1.1 christos errno = 0;
828 1.1 christos WARNING("%s argument out of domain", s);
829 1.1 christos x = 1;
830 1.1 christos } else if (errno == ERANGE) {
831 1.1 christos errno = 0;
832 1.1 christos WARNING("%s result out of range", s);
833 1.1 christos x = 1;
834 1.1 christos }
835 1.1 christos return x;
836 1.1 christos }
837 1.1 christos
838 1.1 christos int isclvar(const char *s) /* is s of form var=something ? */
839 1.1 christos {
840 1.1 christos const char *os = s;
841 1.1 christos
842 1.15 christos if (!isalpha((int) *s) && *s != '_')
843 1.1 christos return 0;
844 1.1 christos for ( ; *s; s++)
845 1.15 christos if (!(isalnum((int) *s) || *s == '_'))
846 1.1 christos break;
847 1.10 christos return *s == '=' && s > os;
848 1.1 christos }
849 1.1 christos
850 1.1 christos /* strtod is supposed to be a proper test of what's a valid number */
851 1.1 christos /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
852 1.1 christos /* wrong: violates 4.10.1.4 of ansi C standard */
853 1.1 christos
854 1.15 christos /* well, not quite. As of C99, hex floating point is allowed. so this is
855 1.15 christos * a bit of a mess. We work around the mess by checking for a hexadecimal
856 1.15 christos * value and disallowing it. Similarly, we now follow gawk and allow only
857 1.15 christos * +nan, -nan, +inf, and -inf for NaN and infinity values.
858 1.15 christos */
859 1.15 christos
860 1.15 christos /*
861 1.15 christos * This routine now has a more complicated interface, the main point
862 1.15 christos * being to avoid the double conversion of a string to double, and
863 1.15 christos * also to convey out, if requested, the information that the numeric
864 1.15 christos * value was a leading string or is all of the string. The latter bit
865 1.15 christos * is used in getfval().
866 1.15 christos */
867 1.13 christos
868 1.15 christos bool is_valid_number(const char *s, bool trailing_stuff_ok,
869 1.15 christos bool *no_trailing, double *result)
870 1.1 christos {
871 1.10 christos double r;
872 1.1 christos char *ep;
873 1.15 christos bool retval = false;
874 1.15 christos bool is_nan = false;
875 1.15 christos bool is_inf = false;
876 1.15 christos
877 1.15 christos if (no_trailing)
878 1.15 christos *no_trailing = false;
879 1.15 christos
880 1.15 christos while (isspace((int) *s))
881 1.15 christos s++;
882 1.15 christos
883 1.15 christos /* no hex floating point, sorry */
884 1.15 christos if (s[0] == '0' && tolower((unsigned char)s[1]) == 'x')
885 1.15 christos return false;
886 1.15 christos
887 1.15 christos /* allow +nan, -nan, +inf, -inf, any other letter, no */
888 1.15 christos if (s[0] == '+' || s[0] == '-') {
889 1.15 christos is_nan = (strncasecmp(s+1, "nan", 3) == 0);
890 1.15 christos is_inf = (strncasecmp(s+1, "inf", 3) == 0);
891 1.15 christos if ((is_nan || is_inf)
892 1.15 christos && (isspace((int) s[4]) || s[4] == '\0'))
893 1.15 christos goto convert;
894 1.15 christos else if (! isdigit((unsigned char)s[1]) && s[1] != '.')
895 1.15 christos return false;
896 1.15 christos }
897 1.15 christos else if (! isdigit((unsigned char)s[0]) && s[0] != '.')
898 1.15 christos return false;
899 1.15 christos
900 1.15 christos convert:
901 1.1 christos errno = 0;
902 1.10 christos r = strtod(s, &ep);
903 1.15 christos if (ep == s || errno == ERANGE)
904 1.15 christos return false;
905 1.15 christos
906 1.15 christos if (isnan(r) && s[0] == '-' && signbit(r) == 0)
907 1.15 christos r = -r;
908 1.15 christos
909 1.15 christos if (result != NULL)
910 1.15 christos *result = r;
911 1.15 christos
912 1.15 christos /*
913 1.15 christos * check for trailing stuff
914 1.15 christos */
915 1.15 christos while (isspace((int) *ep))
916 1.1 christos ep++;
917 1.15 christos
918 1.15 christos if (no_trailing != NULL)
919 1.15 christos *no_trailing = (*ep == '\0');
920 1.15 christos
921 1.15 christos /* return true if found the end, or trailing stuff is allowed */
922 1.15 christos retval = *ep == '\0' || trailing_stuff_ok;
923 1.15 christos
924 1.15 christos return retval;
925 1.13 christos }
926 1.13 christos
927 1.13 christos void check_number(Cell *x)
928 1.13 christos {
929 1.15 christos if (is_valid_number(x->sval, false, NULL, &x->fval))
930 1.13 christos x->tval |= NUM;
931 1.13 christos }
932