compile.c revision 1.19 1 /* $NetBSD: compile.c,v 1.19 2020/03/28 15:27:54 roy Exp $ */
2
3 /*
4 * Copyright (c) 2009, 2010, 2011, 2020 The NetBSD Foundation, Inc.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Roy Marples.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #if HAVE_NBTOOL_CONFIG_H
31 #include "nbtool_config.h"
32 #endif
33
34 #include <sys/cdefs.h>
35 __RCSID("$NetBSD: compile.c,v 1.19 2020/03/28 15:27:54 roy Exp $");
36
37 #if !HAVE_NBTOOL_CONFIG_H || HAVE_SYS_ENDIAN_H
38 #include <sys/endian.h>
39 #endif
40
41 #include <assert.h>
42 #include <ctype.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <limits.h>
46 #include <stdarg.h>
47 #include <stdlib.h>
48 #include <stdint.h>
49 #include <stdio.h>
50 #include <string.h>
51 #include <term_private.h>
52 #include <term.h>
53
54 static void __printflike(2, 3)
55 dowarn(int flags, const char *fmt, ...)
56 {
57 va_list va;
58
59 errno = EINVAL;
60 if (flags & TIC_WARNING) {
61 va_start(va, fmt);
62 vwarnx(fmt, va);
63 va_end(va);
64 }
65 }
66
67 char *
68 _ti_grow_tbuf(TBUF *tbuf, size_t len)
69 {
70 char *buf;
71 size_t l;
72
73 _DIAGASSERT(tbuf != NULL);
74
75 l = tbuf->bufpos + len;
76 if (l > tbuf->buflen) {
77 if (tbuf->buflen == 0)
78 buf = malloc(l);
79 else
80 buf = realloc(tbuf->buf, l);
81 if (buf == NULL)
82 return NULL;
83 tbuf->buf = buf;
84 tbuf->buflen = l;
85 }
86 return tbuf->buf;
87 }
88
89 const char *
90 _ti_find_cap(TIC *tic, TBUF *tbuf, char type, short ind)
91 {
92 size_t n;
93 uint16_t num;
94 const char *cap;
95
96 _DIAGASSERT(tbuf != NULL);
97
98 cap = tbuf->buf;
99 for (n = tbuf->entries; n > 0; n--) {
100 num = _ti_decode_16(&cap);
101 if ((short)num == ind)
102 return cap;
103 switch (type) {
104 case 'f':
105 cap++;
106 break;
107 case 'n':
108 cap += _ti_numsize(tic);
109 break;
110 case 's':
111 num = _ti_decode_16(&cap);
112 cap += num;
113 break;
114 }
115 }
116
117 errno = ESRCH;
118 return NULL;
119 }
120
121 const char *
122 _ti_find_extra(TIC *tic, TBUF *tbuf, const char *code)
123 {
124 size_t n;
125 uint16_t num;
126 const char *cap;
127
128 _DIAGASSERT(tbuf != NULL);
129 _DIAGASSERT(code != NULL);
130
131 cap = tbuf->buf;
132 for (n = tbuf->entries; n > 0; n--) {
133 num = _ti_decode_16(&cap);
134 if (strcmp(cap, code) == 0)
135 return cap + num;
136 cap += num;
137 switch (*cap++) {
138 case 'f':
139 cap++;
140 break;
141 case 'n':
142 cap += _ti_numsize(tic);
143 break;
144 case 's':
145 num = _ti_decode_16(&cap);
146 cap += num;
147 break;
148 }
149 }
150
151 errno = ESRCH;
152 return NULL;
153 }
154
155 char *
156 _ti_getname(int rtype, const char *orig)
157 {
158 char *name;
159
160 if (rtype == TERMINFO_RTYPE) {
161 /* , and | are the two print characters now allowed
162 * in terminfo aliases or long descriptions.
163 * As | is generally used to delimit aliases inside the
164 * description, we use a comma. */
165 if (asprintf(&name, "%s,v3", orig) < 0)
166 name = NULL;
167 } else {
168 name = strdup(orig);
169 }
170 return name;
171 }
172
173 size_t
174 _ti_store_extra(TIC *tic, int wrn, const char *id, char type, char flag,
175 int num, const char *str, size_t strl, int flags)
176 {
177 size_t l;
178
179 _DIAGASSERT(tic != NULL);
180
181 if (strcmp(id, "use") != 0) {
182 if (_ti_find_extra(tic, &tic->extras, id) != NULL)
183 return 0;
184 if (!(flags & TIC_EXTRA)) {
185 if (wrn != 0)
186 dowarn(flags, "%s: %s: unknown capability",
187 tic->name, id);
188 return 0;
189 }
190 }
191
192 l = strlen(id) + 1;
193 if (l > UINT16_T_MAX) {
194 dowarn(flags, "%s: %s: cap name is too long", tic->name, id);
195 return 0;
196 }
197
198 if (!_ti_grow_tbuf(&tic->extras,
199 l + strl + sizeof(uint16_t) + _ti_numsize(tic) + 1))
200 return 0;
201 _ti_encode_buf_count_str(&tic->extras, id, l);
202 tic->extras.buf[tic->extras.bufpos++] = type;
203 switch (type) {
204 case 'f':
205 tic->extras.buf[tic->extras.bufpos++] = flag;
206 break;
207 case 'n':
208 _ti_encode_buf_num(&tic->extras, num, tic->rtype);
209 break;
210 case 's':
211 _ti_encode_buf_count_str(&tic->extras, str, strl);
212 break;
213 }
214 tic->extras.entries++;
215 return 1;
216 }
217
218 static void
219 _ti_encode_buf(char **cap, const TBUF *buf)
220 {
221 if (buf->entries == 0) {
222 _ti_encode_16(cap, 0);
223 } else {
224 _ti_encode_16(cap, buf->bufpos + sizeof(uint16_t));
225 _ti_encode_16(cap, buf->entries);
226 _ti_encode_str(cap, buf->buf, buf->bufpos);
227 }
228 }
229
230 ssize_t
231 _ti_flatten(uint8_t **buf, const TIC *tic)
232 {
233 size_t buflen, len, alen, dlen;
234 char *cap;
235
236 _DIAGASSERT(buf != NULL);
237 _DIAGASSERT(tic != NULL);
238
239 len = strlen(tic->name) + 1;
240 if (tic->alias == NULL)
241 alen = 0;
242 else
243 alen = strlen(tic->alias) + 1;
244 if (tic->desc == NULL)
245 dlen = 0;
246 else
247 dlen = strlen(tic->desc) + 1;
248
249 buflen = sizeof(char) +
250 sizeof(uint16_t) + len +
251 sizeof(uint16_t) + alen +
252 sizeof(uint16_t) + dlen +
253 (sizeof(uint16_t) * 2) + tic->flags.bufpos +
254 (sizeof(uint16_t) * 2) + tic->nums.bufpos +
255 (sizeof(uint16_t) * 2) + tic->strs.bufpos +
256 (sizeof(uint16_t) * 2) + tic->extras.bufpos;
257
258 *buf = malloc(buflen);
259 if (*buf == NULL)
260 return -1;
261
262 cap = (char *)*buf;
263 *cap++ = tic->rtype;
264
265 _ti_encode_count_str(&cap, tic->name, len);
266 _ti_encode_count_str(&cap, tic->alias, alen);
267 _ti_encode_count_str(&cap, tic->desc, dlen);
268
269 _ti_encode_buf(&cap, &tic->flags);
270
271 _ti_encode_buf(&cap, &tic->nums);
272 _ti_encode_buf(&cap, &tic->strs);
273 _ti_encode_buf(&cap, &tic->extras);
274
275 return (uint8_t *)cap - *buf;
276 }
277
278 static int
279 encode_string(const char *term, const char *cap, TBUF *tbuf, const char *str,
280 int flags)
281 {
282 int slash, i, num;
283 char ch, *p, *s, last;
284
285 if (_ti_grow_tbuf(tbuf, strlen(str) + 1) == NULL)
286 return -1;
287 p = s = tbuf->buf + tbuf->bufpos;
288 slash = 0;
289 last = '\0';
290 /* Convert escape codes */
291 while ((ch = *str++) != '\0') {
292 if (ch == '\n') {
293 /* Following a newline, strip leading whitespace from
294 * capability strings. */
295 while (isspace((unsigned char)*str))
296 str++;
297 continue;
298 }
299 if (slash == 0 && ch == '\\') {
300 slash = 1;
301 continue;
302 }
303 if (slash == 0) {
304 if (last != '%' && ch == '^') {
305 ch = *str++;
306 if (((unsigned char)ch) >= 128)
307 dowarn(flags,
308 "%s: %s: illegal ^ character",
309 term, cap);
310 if (ch == '\0')
311 break;
312 if (ch == '?')
313 ch = '\177';
314 else if ((ch &= 037) == 0)
315 ch = (char)128;
316 } else if (!isprint((unsigned char)ch))
317 dowarn(flags,
318 "%s: %s: unprintable character",
319 term, cap);
320 *p++ = ch;
321 last = ch;
322 continue;
323 }
324 slash = 0;
325 if (ch >= '0' && ch <= '7') {
326 num = ch - '0';
327 for (i = 0; i < 2; i++) {
328 if (*str < '0' || *str > '7') {
329 if (isdigit((unsigned char)*str))
330 dowarn(flags,
331 "%s: %s: non octal"
332 " digit", term, cap);
333 else
334 break;
335 }
336 num = num * 8 + *str++ - '0';
337 }
338 if (num == 0)
339 num = 0200;
340 *p++ = (char)num;
341 continue;
342 }
343 switch (ch) {
344 case 'a':
345 *p++ = '\a';
346 break;
347 case 'b':
348 *p++ = '\b';
349 break;
350 case 'e': /* FALLTHROUGH */
351 case 'E':
352 *p++ = '\033';
353 break;
354 case 'f':
355 *p++ = '\014';
356 break;
357 case 'l': /* FALLTHROUGH */
358 case 'n':
359 *p++ = '\n';
360 break;
361 case 'r':
362 *p++ = '\r';
363 break;
364 case 's':
365 *p++ = ' ';
366 break;
367 case 't':
368 *p++ = '\t';
369 break;
370 default:
371 /* We should warn here */
372 case '^':
373 case ',':
374 case ':':
375 case '|':
376 *p++ = ch;
377 break;
378 }
379 last = ch;
380 }
381 *p++ = '\0';
382 tbuf->bufpos += (size_t)(p - s);
383 return 0;
384 }
385
386 char *
387 _ti_get_token(char **cap, char sep)
388 {
389 char esc, *token;
390
391 while (isspace((unsigned char)**cap))
392 (*cap)++;
393 if (**cap == '\0')
394 return NULL;
395
396 /* We can't use stresep(3) as ^ we need two escape chars */
397 esc = '\0';
398 for (token = *cap;
399 **cap != '\0' && (esc != '\0' || **cap != sep);
400 (*cap)++)
401 {
402 if (esc == '\0') {
403 if (**cap == '\\' || **cap == '^')
404 esc = **cap;
405 } else {
406 /* termcap /E/ is valid */
407 if (sep == ':' && esc == '\\' && **cap == 'E')
408 esc = 'x';
409 else
410 esc = '\0';
411 }
412 }
413
414 if (**cap != '\0')
415 *(*cap)++ = '\0';
416
417 return token;
418 }
419
420 static int
421 _ti_find_rtype(const char *cap)
422 {
423 const char *ptr;
424
425 for (ptr = cap; (ptr = strchr(ptr, '#')) != NULL;) {
426 if (strtol(++ptr, NULL, 0) > SHRT_MAX) {
427 return TERMINFO_RTYPE;
428 }
429 }
430 return TERMINFO_RTYPE_O1;
431 }
432
433 int
434 _ti_encode_buf_id_num(TBUF *tbuf, int ind, int num, size_t len)
435 {
436 if (!_ti_grow_tbuf(tbuf, sizeof(uint16_t) + len))
437 return 0;
438 _ti_encode_buf_16(tbuf, ind);
439 if (len == sizeof(uint32_t))
440 _ti_encode_buf_32(tbuf, num);
441 else
442 _ti_encode_buf_16(tbuf, num);
443 tbuf->entries++;
444 return 1;
445 }
446
447 int
448 _ti_encode_buf_id_count_str(TBUF *tbuf, int ind, const void *buf, size_t len)
449 {
450 if (!_ti_grow_tbuf(tbuf, 2 * sizeof(uint16_t) + len))
451 return 0;
452 _ti_encode_buf_16(tbuf, ind);
453 _ti_encode_buf_count_str(tbuf, buf, len);
454 tbuf->entries++;
455 return 1;
456 }
457
458 int
459 _ti_encode_buf_id_flags(TBUF *tbuf, int ind, int flag)
460 {
461 if (!_ti_grow_tbuf(tbuf, sizeof(uint16_t) + 1))
462 return 0;
463 _ti_encode_buf_16(tbuf, ind);
464 tbuf->buf[tbuf->bufpos++] = flag;
465 tbuf->entries++;
466 return 1;
467 }
468
469 TIC *
470 _ti_compile(char *cap, int flags)
471 {
472 char *token, *p, *e, *name, *desc, *alias;
473 signed char flag;
474 long cnum;
475 short ind;
476 int num;
477 size_t len;
478 TBUF buf;
479 TIC *tic;
480
481 _DIAGASSERT(cap != NULL);
482
483 name = _ti_get_token(&cap, ',');
484 if (name == NULL) {
485 dowarn(flags, "no separator found: %s", cap);
486 return NULL;
487 }
488 desc = strrchr(name, '|');
489 if (desc != NULL)
490 *desc++ = '\0';
491 alias = strchr(name, '|');
492 if (alias != NULL)
493 *alias++ = '\0';
494
495 if (strlen(name) > UINT16_MAX - 1) {
496 dowarn(flags, "%s: name too long", name);
497 return NULL;
498 }
499 if (desc != NULL && strlen(desc) > UINT16_MAX - 1) {
500 dowarn(flags, "%s: description too long: %s", name, desc);
501 return NULL;
502 }
503 if (alias != NULL && strlen(alias) > UINT16_MAX - 1) {
504 dowarn(flags, "%s: alias too long: %s", name, alias);
505 return NULL;
506 }
507
508 tic = calloc(sizeof(*tic), 1);
509 if (tic == NULL)
510 return NULL;
511
512 tic->rtype = (flags & TIC_COMPAT_V1) ? TERMINFO_RTYPE_O1 :
513 _ti_find_rtype(cap);
514 buf.buf = NULL;
515 buf.buflen = 0;
516
517 tic->name = _ti_getname(tic->rtype, name);
518 if (tic->name == NULL)
519 goto error;
520 if (alias != NULL && flags & TIC_ALIAS) {
521 tic->alias = _ti_getname(tic->rtype, alias);
522 if (tic->alias == NULL)
523 goto error;
524 }
525 if (desc != NULL && flags & TIC_DESCRIPTION) {
526 tic->desc = strdup(desc);
527 if (tic->desc == NULL)
528 goto error;
529 }
530
531 for (token = _ti_get_token(&cap, ',');
532 token != NULL && *token != '\0';
533 token = _ti_get_token(&cap, ','))
534 {
535 /* Skip commented caps */
536 if (!(flags & TIC_COMMENT) && token[0] == '.')
537 continue;
538
539 /* Obsolete entries */
540 if (token[0] == 'O' && token[1] == 'T') {
541 if (!(flags & TIC_EXTRA))
542 continue;
543 token += 2;
544 }
545
546 /* str cap */
547 p = strchr(token, '=');
548 if (p != NULL) {
549 *p++ = '\0';
550 /* Don't use the string if we already have it */
551 ind = (short)_ti_strindex(token);
552 if (ind != -1 &&
553 _ti_find_cap(tic, &tic->strs, 's', ind) != NULL)
554 continue;
555
556 /* Encode the string to our scratch buffer */
557 buf.bufpos = 0;
558 if (encode_string(tic->name, token,
559 &buf, p, flags) == -1)
560 goto error;
561 if (buf.bufpos > UINT16_MAX - 1) {
562 dowarn(flags, "%s: %s: string is too long",
563 tic->name, token);
564 continue;
565 }
566 if (!VALID_STRING(buf.buf)) {
567 dowarn(flags, "%s: %s: invalid string",
568 tic->name, token);
569 continue;
570 }
571
572 if (ind == -1) {
573 if (!_ti_store_extra(tic, 1, token, 's', -1, -2,
574 buf.buf, buf.bufpos, flags))
575 goto error;
576 } else {
577 if (!_ti_encode_buf_id_count_str(&tic->strs,
578 ind, buf.buf, buf.bufpos))
579 goto error;
580 }
581 continue;
582 }
583
584 /* num cap */
585 p = strchr(token, '#');
586 if (p != NULL) {
587 *p++ = '\0';
588 /* Don't use the number if we already have it */
589 ind = (short)_ti_numindex(token);
590 if (ind != -1 &&
591 _ti_find_cap(tic, &tic->nums, 'n', ind) != NULL)
592 continue;
593
594 cnum = strtol(p, &e, 0);
595 if (*e != '\0') {
596 dowarn(flags, "%s: %s: not a number",
597 tic->name, token);
598 continue;
599 }
600 if (!VALID_NUMERIC(cnum) || cnum > INT32_MAX) {
601 dowarn(flags, "%s: %s: number %ld out of range",
602 tic->name, token, cnum);
603 continue;
604 }
605
606 num = (int)cnum;
607 if (ind == -1) {
608 if (!_ti_store_extra(tic, 1, token, 'n', -1,
609 num, NULL, 0, flags))
610 goto error;
611 } else {
612 if (!_ti_encode_buf_id_num(&tic->nums,
613 ind, num, _ti_numsize(tic)))
614 goto error;
615 }
616 continue;
617 }
618
619 flag = 1;
620 len = strlen(token) - 1;
621 if (token[len] == '@') {
622 flag = CANCELLED_BOOLEAN;
623 token[len] = '\0';
624 }
625 ind = (short)_ti_flagindex(token);
626 if (ind == -1 && flag == CANCELLED_BOOLEAN) {
627 if ((ind = (short)_ti_numindex(token)) != -1) {
628 if (_ti_find_cap(tic, &tic->nums, 'n', ind)
629 != NULL)
630 continue;
631 if (!_ti_encode_buf_id_num(&tic->nums, ind,
632 CANCELLED_NUMERIC, _ti_numsize(tic)))
633 goto error;
634 continue;
635 } else if ((ind = (short)_ti_strindex(token)) != -1) {
636 if (_ti_find_cap(tic, &tic->strs, 's', ind)
637 != NULL)
638 continue;
639 if (!_ti_encode_buf_id_num(
640 &tic->strs, ind, 0, sizeof(uint16_t)))
641 goto error;
642 continue;
643 }
644 }
645 if (ind == -1) {
646 if (!_ti_store_extra(tic, 1, token, 'f', flag, 0, NULL,
647 0, flags))
648 goto error;
649 } else if (_ti_find_cap(tic, &tic->flags, 'f', ind) == NULL) {
650 if (!_ti_encode_buf_id_flags(&tic->flags, ind, flags))
651 goto error;
652 }
653 }
654
655 free(buf.buf);
656 return tic;
657
658 error:
659 free(buf.buf);
660 _ti_freetic(tic);
661 return NULL;
662 }
663
664 void
665 _ti_freetic(TIC *tic)
666 {
667
668 if (tic != NULL) {
669 free(tic->name);
670 free(tic->alias);
671 free(tic->desc);
672 free(tic->extras.buf);
673 free(tic->flags.buf);
674 free(tic->nums.buf);
675 free(tic->strs.buf);
676 free(tic);
677 }
678 }
679