prop_intern.c revision 1.2 1 /* $NetBSD: prop_intern.c,v 1.2 2025/05/14 03:25:46 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 2006, 2007, 2025 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "prop_object_impl.h"
33 #include <prop/prop_object.h>
34
35 #if !defined(_KERNEL) && !defined(_STANDALONE)
36 #include <sys/mman.h>
37 #include <sys/stat.h>
38 #include <fcntl.h>
39 #include <unistd.h>
40 #endif /* !_KERNEL && !_STANDALONE */
41
42 /*
43 * _prop_intern_skip_whitespace --
44 * Skip and span of whitespace.
45 */
46 const char *
47 _prop_intern_skip_whitespace(const char *cp)
48 {
49 while (_PROP_ISSPACE(*cp)) {
50 cp++;
51 }
52 return cp;
53 }
54
55 /*
56 * _prop_intern_match --
57 * Returns true if the two character streams match.
58 */
59 bool
60 _prop_intern_match(const char *str1, size_t len1,
61 const char *str2, size_t len2)
62 {
63 return (len1 == len2 && memcmp(str1, str2, len1) == 0);
64 }
65
66 /*
67 * _prop_xml_intern_skip_comment --
68 * Skip the body and end tag of an XML comment.
69 */
70 static bool
71 _prop_xml_intern_skip_comment(struct _prop_object_internalize_context *ctx)
72 {
73 const char *cp = ctx->poic_cp;
74
75 for (cp = ctx->poic_cp; !_PROP_EOF(*cp); cp++) {
76 if (cp[0] == '-' &&
77 cp[1] == '-' &&
78 cp[2] == '>') {
79 ctx->poic_cp = cp + 3;
80 return true;
81 }
82 }
83
84 return false; /* ran out of buffer */
85 }
86
87 /*
88 * _prop_xml_intern_find_tag --
89 * Find the next tag in an XML stream. Optionally compare the found
90 * tag to an expected tag name. State of the context is undefined
91 * if this routine returns false. Upon success, the context points
92 * to the first octet after the tag.
93 */
94 bool
95 _prop_xml_intern_find_tag(struct _prop_object_internalize_context *ctx,
96 const char *tag, _prop_tag_type_t type)
97 {
98 const char *cp;
99 size_t taglen;
100
101 taglen = tag != NULL ? strlen(tag) : 0;
102
103 start_over:
104 cp = ctx->poic_cp;
105
106 /*
107 * Find the start of the tag.
108 */
109 cp = _prop_intern_skip_whitespace(cp);
110 if (*cp != '<') {
111 return false;
112 }
113
114 ctx->poic_tag_start = cp++;
115 if (_PROP_EOF(*cp)) {
116 return false;
117 }
118
119 if (*cp == '!') {
120 if (cp[1] != '-' || cp[2] != '-') {
121 return false;
122 }
123 /*
124 * Comment block -- only allowed if we are allowed to
125 * return a start tag.
126 */
127 if (type == _PROP_TAG_TYPE_END) {
128 return false;
129 }
130 ctx->poic_cp = cp + 3;
131 if (_prop_xml_intern_skip_comment(ctx) == false) {
132 return false;
133 }
134 goto start_over;
135 }
136
137 if (*cp == '/') {
138 if (type != _PROP_TAG_TYPE_END &&
139 type != _PROP_TAG_TYPE_EITHER) {
140 return false;
141 }
142 cp++;
143 if (_PROP_EOF(*cp)) {
144 return false;
145 }
146 ctx->poic_tag_type = _PROP_TAG_TYPE_END;
147 } else {
148 if (type != _PROP_TAG_TYPE_START &&
149 type != _PROP_TAG_TYPE_EITHER) {
150 return false;
151 }
152 ctx->poic_tag_type = _PROP_TAG_TYPE_START;
153 }
154
155 ctx->poic_tagname = cp;
156
157 while (!_PROP_ISSPACE(*cp) && *cp != '/' && *cp != '>') {
158 if (_PROP_EOF(*cp)) {
159 return false;
160 }
161 cp++;
162 }
163
164 ctx->poic_tagname_len = cp - ctx->poic_tagname;
165
166 /* Make sure this is the tag we're looking for. */
167 if (tag != NULL &&
168 (taglen != ctx->poic_tagname_len ||
169 memcmp(tag, ctx->poic_tagname, taglen) != 0)) {
170 return false;
171 }
172
173 /* Check for empty tag. */
174 if (*cp == '/') {
175 if (ctx->poic_tag_type != _PROP_TAG_TYPE_START) {
176 return false; /* only valid on start tags */
177 }
178 ctx->poic_is_empty_element = true;
179 cp++;
180 if (_PROP_EOF(*cp) || *cp != '>') {
181 return false;
182 }
183 } else {
184 ctx->poic_is_empty_element = false;
185 }
186
187 /* Easy case of no arguments. */
188 if (*cp == '>') {
189 ctx->poic_tagattr = NULL;
190 ctx->poic_tagattr_len = 0;
191 ctx->poic_tagattrval = NULL;
192 ctx->poic_tagattrval_len = 0;
193 ctx->poic_cp = cp + 1;
194 return true;
195 }
196
197 _PROP_ASSERT(!_PROP_EOF(*cp));
198 cp++;
199 if (_PROP_EOF(*cp)) {
200 return false;
201 }
202
203 cp = _prop_intern_skip_whitespace(cp);
204 if (_PROP_EOF(*cp)) {
205 return false;
206 }
207
208 ctx->poic_tagattr = cp;
209
210 while (!_PROP_ISSPACE(*cp) && *cp != '=') {
211 if (_PROP_EOF(*cp)) {
212 return false;
213 }
214 cp++;
215 }
216
217 ctx->poic_tagattr_len = cp - ctx->poic_tagattr;
218
219 cp++;
220 if (*cp != '\"') {
221 return false;
222 }
223 cp++;
224 if (_PROP_EOF(*cp)) {
225 return false;
226 }
227
228 ctx->poic_tagattrval = cp;
229 while (*cp != '\"') {
230 if (_PROP_EOF(*cp)) {
231 return false;
232 }
233 cp++;
234 }
235 ctx->poic_tagattrval_len = cp - ctx->poic_tagattrval;
236
237 cp++;
238 if (*cp != '>') {
239 return false;
240 }
241
242 ctx->poic_cp = cp + 1;
243 return true;
244 }
245
246 #define INTERNALIZER(t, f) \
247 { t, sizeof(t) - 1, f }
248
249 static const struct _prop_object_internalizer {
250 const char *poi_tag;
251 size_t poi_taglen;
252 prop_object_internalizer_t poi_intern;
253 } _prop_object_internalizer_table[] = {
254 INTERNALIZER("array", _prop_array_internalize),
255
256 INTERNALIZER("true", _prop_bool_internalize),
257 INTERNALIZER("false", _prop_bool_internalize),
258
259 INTERNALIZER("data", _prop_data_internalize),
260
261 INTERNALIZER("dict", _prop_dictionary_internalize),
262
263 INTERNALIZER("integer", _prop_number_internalize),
264
265 INTERNALIZER("string", _prop_string_internalize),
266
267 { 0, 0, NULL }
268 };
269
270 #undef INTERNALIZER
271
272 /*
273 * _prop_xml_intern_by_tag --
274 * Determine the object type from the tag in the context and
275 * internalize it.
276 */
277 static prop_object_t
278 _prop_xml_intern_by_tag(struct _prop_object_internalize_context *ctx)
279 {
280 const struct _prop_object_internalizer *poi;
281 prop_object_t obj, parent_obj;
282 void *data, *iter;
283 prop_object_internalizer_continue_t iter_func;
284 struct _prop_stack stack;
285
286 _prop_stack_init(&stack);
287
288 match_start:
289 for (poi = _prop_object_internalizer_table;
290 poi->poi_tag != NULL; poi++) {
291 if (_prop_intern_match(ctx->poic_tagname,
292 ctx->poic_tagname_len,
293 poi->poi_tag,
294 poi->poi_taglen)) {
295 break;
296 }
297 }
298 if (poi == NULL || poi->poi_tag == NULL) {
299 while (_prop_stack_pop(&stack, &obj, &iter, &data, NULL)) {
300 iter_func = (prop_object_internalizer_continue_t)iter;
301 (*iter_func)(&stack, &obj, ctx, data, NULL);
302 }
303 return NULL;
304 }
305
306 obj = NULL;
307 if (!(*poi->poi_intern)(&stack, &obj, ctx)) {
308 goto match_start;
309 }
310
311 parent_obj = obj;
312 while (_prop_stack_pop(&stack, &parent_obj, &iter, &data, NULL)) {
313 iter_func = (prop_object_internalizer_continue_t)iter;
314 if (!(*iter_func)(&stack, &parent_obj, ctx, data, obj)) {
315 goto match_start;
316 }
317 obj = parent_obj;
318 }
319
320 return parent_obj;
321 }
322
323 #define ADDCHAR(x) \
324 do { \
325 if (target) { \
326 if (tarindex >= targsize) { \
327 return false; \
328 } \
329 target[tarindex] = (x); \
330 } \
331 tarindex++; \
332 } while (/*CONSTCOND*/0)
333
334 /*
335 * _prop_json_intern_decode_uesc_getu16 --
336 * Get the 16-bit value from a "u-escape" ("\uXXXX").
337 */
338 static unsigned int
339 _prop_json_intern_decode_uesc_getu16(const char *src, unsigned int idx,
340 uint16_t *valp)
341 {
342 unsigned int i;
343 uint16_t val;
344 unsigned char c;
345
346 if (src[idx] != '\\' || src[idx + 1] != 'u') {
347 return 0;
348 }
349
350 for (val = 0, i = 2; i < 6; i++) {
351 val <<= 4;
352 c = src[idx + i];
353 if (c >= 'A' && c <= 'F') {
354 val |= 10 + (c - 'A');
355 } else if (c >= 'a' && c <= 'f') {
356 val |= 10 + (c - 'a');
357 } else if (c >= '0' && c <= '9') {
358 val |= c - '0';
359 } else {
360 return 0;
361 }
362 }
363
364 *valp = val;
365 return idx + i;
366 }
367
368 #define HS_FIRST 0xd800
369 #define HS_LAST 0xdbff
370 #define HS_SHIFT 10
371 #define LS_FIRST 0xdc00
372 #define LS_LAST 0xdfff
373
374 #define HIGH_SURROGAGE_P(x) \
375 ((x) >= HS_FIRST && (x) <= HS_LAST)
376 #define LOW_SURROGATE_P(x) \
377 ((x) >= LS_FIRST && (x) <= LS_LAST)
378 #define SURROGATE_P(x) \
379 (HIGH_SURROGAGE_P(x) || LOW_SURROGATE_P(x))
380
381 /*
382 * _prop_json_intern_decode_uesc --
383 * Decode a JSON UTF-16 "u-escape" ("\uXXXX").
384 */
385 static int
386 _prop_json_intern_decode_uesc(const char *src, char *c, unsigned int *cszp)
387 {
388 unsigned int idx = 0;
389 uint32_t code;
390 uint16_t code16[2] = { 0, 0 };
391
392 idx = _prop_json_intern_decode_uesc_getu16(src, idx, &code16[0]);
393 if (idx == 0) {
394 return 0;
395 }
396 if (! SURROGATE_P(code16[0])) {
397 /* Simple case: not a surrogate pair */
398 code = code16[0];
399 } else if (HIGH_SURROGAGE_P(code16[0])) {
400 idx = _prop_json_intern_decode_uesc_getu16(src, idx,
401 &code16[1]);
402 if (idx == 0) {
403 return 0;
404 }
405 /* Next code must be the low surrogate. */
406 if (! LOW_SURROGATE_P(code16[1])) {
407 return 0;
408 }
409 code = (((uint32_t)code16[0] - HS_FIRST) << HS_SHIFT) +
410 ( code16[1] - LS_FIRST) +
411 0x10000;
412 } else {
413 /* Got the low surrogate first; this is an error. */
414 return 0;
415 }
416
417 /*
418 * Ok, we have the code point. Now convert it to UTF-8.
419 * First we'll just split into nybbles.
420 */
421 uint8_t u = (code >> 20) & 0xf;
422 uint8_t v = (code >> 16) & 0xf;
423 uint8_t w = (code >> 12) & 0xf;
424 uint8_t x = (code >> 8) & 0xf;
425 uint8_t y = (code >> 4) & 0xf;
426 uint8_t z = (code ) & 0xf;
427
428 /*
429 * ...and swizzle the nybbles accordingly.
430 *
431 * N.B. we expcitly disallow inserting a NUL into the string
432 * by way of a \uXXXX escape.
433 */
434 if (code == 0) {
435 /* Not allowed. */
436 return 0;
437 } else if (/*code >= 0x0000 &&*/ code <= 0x007f) {
438 c[0] = (char)code; /* == (y << 4) | z */
439 *cszp = 1;
440 } else if (/*code >= 0x0080 &&*/ code <= 0x07ff) {
441 c[0] = 0xc0 | (x << 2) | (y >> 2);
442 c[1] = 0x80 | ((y & 3) << 4) | z;
443 *cszp = 2;
444 } else if (/*code >= 0x0800 &&*/ code <= 0xffff) {
445 c[0] = 0xe0 | w;
446 c[1] = 0x80 | (x << 2) | (y >> 2);
447 c[2] = 0x80 | ((y & 3) << 4) | z;
448 *cszp = 3;
449 } else if (/*code >= 0x010000 &&*/ code <= 0x10ffff) {
450 c[0] = 0xf0 | ((u & 1) << 2) | (v >> 2);
451 c[1] = 0x80 | ((v & 3) << 4) | w;
452 c[2] = 0x80 | (x << 2) | (y >> 2);
453 c[3] = 0x80 | ((y & 3) << 4) | z;
454 *cszp = 4;
455 } else {
456 /* Invalid code. */
457 return 0;
458 }
459
460 return idx; /* advance input by this much */
461 }
462
463 #undef HS_FIRST
464 #undef HS_LAST
465 #undef LS_FIRST
466 #undef LS_LAST
467 #undef HIGH_SURROGAGE_P
468 #undef LOW_SURROGATE_P
469 #undef SURROGATE_P
470
471 /*
472 * _prop_json_intern_decode_string --
473 * Decode a JSON-encoded string.
474 */
475 static bool
476 _prop_json_intern_decode_string(struct _prop_object_internalize_context *ctx,
477 char *target, size_t targsize, size_t *sizep,
478 const char **cpp)
479 {
480 const char *src;
481 size_t tarindex;
482 char c[4];
483 unsigned int csz;
484
485 tarindex = 0;
486 src = ctx->poic_cp;
487
488 for (;;) {
489 if (_PROP_EOF(*src)) {
490 return false;
491 }
492 if (*src == '"') {
493 break;
494 }
495
496 csz = 1;
497 if ((c[0] = *src) == '\\') {
498 int advance = 2;
499
500 switch ((c[0] = src[1])) {
501 case '"': /* quotation mark */
502 case '\\': /* reverse solidus */
503 case '/': /* solidus */
504 /* identity mapping */
505 break;
506
507 case 'b': /* backspace */
508 c[0] = 0x08;
509 break;
510
511 case 'f': /* form feed */
512 c[0] = 0x0c;
513 break;
514
515 case 'n': /* line feed */
516 c[0] = 0x0a;
517 break;
518
519 case 'r': /* carriage return */
520 c[0] = 0x0d;
521 break;
522
523 case 't': /* tab */
524 c[0] = 0x09;
525 break;
526
527 case 'u':
528 advance = _prop_json_intern_decode_uesc(
529 src, c, &csz);
530 if (advance == 0) {
531 return false;
532 }
533 break;
534
535 default:
536 /* invalid escape */
537 return false;
538 }
539 src += advance;
540 } else {
541 src++;
542 }
543 for (unsigned int i = 0; i < csz; i++) {
544 ADDCHAR(c[i]);
545 }
546 }
547
548 _PROP_ASSERT(*src == '"');
549 if (sizep != NULL) {
550 *sizep = tarindex;
551 }
552 if (cpp != NULL) {
553 *cpp = src;
554 }
555
556 return true;
557 }
558
559 /*
560 * _prop_xml_intern_decode_string --
561 * Decode an XML-encoded string.
562 */
563 static bool
564 _prop_xml_intern_decode_string(struct _prop_object_internalize_context *ctx,
565 char *target, size_t targsize, size_t *sizep,
566 const char **cpp)
567 {
568 const char *src;
569 size_t tarindex;
570 char c;
571
572 tarindex = 0;
573 src = ctx->poic_cp;
574
575 for (;;) {
576 if (_PROP_EOF(*src)) {
577 return true;
578 }
579 if (*src == '<') {
580 break;
581 }
582
583 if ((c = *src) == '&') {
584 if (src[1] == 'a' &&
585 src[2] == 'm' &&
586 src[3] == 'p' &&
587 src[4] == ';') {
588 c = '&';
589 src += 5;
590 } else if (src[1] == 'l' &&
591 src[2] == 't' &&
592 src[3] == ';') {
593 c = '<';
594 src += 4;
595 } else if (src[1] == 'g' &&
596 src[2] == 't' &&
597 src[3] == ';') {
598 c = '>';
599 src += 4;
600 } else if (src[1] == 'a' &&
601 src[2] == 'p' &&
602 src[3] == 'o' &&
603 src[4] == 's' &&
604 src[5] == ';') {
605 c = '\'';
606 src += 6;
607 } else if (src[1] == 'q' &&
608 src[2] == 'u' &&
609 src[3] == 'o' &&
610 src[4] == 't' &&
611 src[5] == ';') {
612 c = '\"';
613 src += 6;
614 } else {
615 return false;
616 }
617 } else {
618 src++;
619 }
620 ADDCHAR(c);
621 }
622
623 _PROP_ASSERT(*src == '<');
624 if (sizep != NULL) {
625 *sizep = tarindex;
626 }
627 if (cpp != NULL) {
628 *cpp = src;
629 }
630
631 return true;
632 }
633
634 #undef ADDCHAR
635
636 /*
637 * _prop_intern_decode_string --
638 * Decode an encoded string.
639 */
640 bool
641 _prop_intern_decode_string(struct _prop_object_internalize_context *ctx,
642 char *target, size_t targsize, size_t *sizep,
643 const char **cpp)
644 {
645 _PROP_ASSERT(ctx->poic_format == PROP_FORMAT_XML ||
646 ctx->poic_format == PROP_FORMAT_JSON);
647
648 switch (ctx->poic_format) {
649 case PROP_FORMAT_JSON:
650 return _prop_json_intern_decode_string(ctx, target, targsize,
651 sizep, cpp);
652
653 default: /* PROP_FORMAT_XML */
654 return _prop_xml_intern_decode_string(ctx, target, targsize,
655 sizep, cpp);
656 }
657 }
658
659 /*
660 * _prop_intern_context_alloc --
661 * Allocate an internalize context.
662 */
663 static struct _prop_object_internalize_context *
664 _prop_intern_context_alloc(const char *data, prop_format_t fmt)
665 {
666 struct _prop_object_internalize_context *ctx;
667
668 ctx = _PROP_MALLOC(sizeof(*ctx), M_TEMP);
669 if (ctx == NULL) {
670 return NULL;
671 }
672
673 ctx->poic_format = fmt;
674 ctx->poic_data = ctx->poic_cp = data;
675
676 /*
677 * If we're digesting JSON, check for a byte order mark and
678 * skip it, if present. We should never see one, but we're
679 * allowed to detect and ignore it. (RFC 8259 section 8.1)
680 */
681 if (fmt == PROP_FORMAT_JSON) {
682 if (((unsigned char)data[0] == 0xff &&
683 (unsigned char)data[1] == 0xfe) ||
684 ((unsigned char)data[0] == 0xfe &&
685 (unsigned char)data[1] == 0xff)) {
686 ctx->poic_cp = data + 2;
687 }
688
689 /* No additional processing work to do for JSON. */
690 return ctx;
691 }
692
693 /*
694 * Skip any whitespace and XML preamble stuff that we don't
695 * know about / care about.
696 */
697 for (;;) {
698 data = _prop_intern_skip_whitespace(data);
699 if (_PROP_EOF(*data) || *data != '<') {
700 goto bad;
701 }
702
703 #define MATCH(str) (strncmp(&data[1], str, strlen(str)) == 0)
704
705 /*
706 * Skip over the XML preamble that Apple XML property
707 * lists usually include at the top of the file.
708 */
709 if (MATCH("?xml ") ||
710 MATCH("!DOCTYPE plist")) {
711 while (*data != '>' && !_PROP_EOF(*data)) {
712 data++;
713 }
714 if (_PROP_EOF(*data)) {
715 goto bad;
716 }
717 data++; /* advance past the '>' */
718 continue;
719 }
720
721 if (MATCH("<!--")) {
722 ctx->poic_cp = data + 4;
723 if (_prop_xml_intern_skip_comment(ctx) == false) {
724 goto bad;
725 }
726 data = ctx->poic_cp;
727 continue;
728 }
729
730 #undef MATCH
731
732 /*
733 * We don't think we should skip it, so let's hope we can
734 * parse it.
735 */
736 break;
737 }
738
739 ctx->poic_cp = data;
740 return ctx;
741 bad:
742 _PROP_FREE(ctx, M_TEMP);
743 return NULL;
744 }
745
746 /*
747 * _prop_intern_context_free --
748 * Free an internalize context.
749 */
750 static void
751 _prop_intern_context_free(struct _prop_object_internalize_context *ctx)
752 {
753 _PROP_FREE(ctx, M_TEMP);
754 }
755
756 /*
757 * _prop_object_internalize_json --
758 * Internalize a property list from JSON data.
759 */
760 static prop_object_t
761 _prop_object_internalize_json(struct _prop_object_internalize_context *ctx,
762 const struct _prop_object_type_tags *initial_tag __unused)
763 {
764 prop_object_t obj, parent_obj;
765 void *data, *iter;
766 prop_object_internalizer_continue_t iter_func;
767 struct _prop_stack stack;
768 bool (*intern)(prop_stack_t, prop_object_t *,
769 struct _prop_object_internalize_context *);
770
771 _prop_stack_init(&stack);
772
773 match_start:
774 intern = NULL;
775 ctx->poic_tagname = ctx->poic_tagattr = ctx->poic_tagattrval = NULL;
776 ctx->poic_tagname_len = ctx->poic_tagattr_len =
777 ctx->poic_tagattrval_len = 0;
778 ctx->poic_is_empty_element = false;
779 ctx->poic_cp = _prop_intern_skip_whitespace(ctx->poic_cp);
780 switch (ctx->poic_cp[0]) {
781 case '{':
782 ctx->poic_cp++;
783 intern = _prop_dictionary_internalize;
784 break;
785
786 case '[':
787 ctx->poic_cp++;
788 intern = _prop_array_internalize;
789 break;
790
791 case '"':
792 ctx->poic_cp++;
793 /* XXX Slightly gross. */
794 if (*ctx->poic_cp == '"') {
795 ctx->poic_cp++;
796 ctx->poic_is_empty_element = true;
797 }
798 intern = _prop_string_internalize;
799 break;
800
801 case 't':
802 if (ctx->poic_cp[1] == 'r' &&
803 ctx->poic_cp[2] == 'u' &&
804 ctx->poic_cp[3] == 'e') {
805 /* XXX Slightly gross. */
806 ctx->poic_tagname = ctx->poic_cp;
807 ctx->poic_tagname_len = 4;
808 ctx->poic_is_empty_element = true;
809 intern = _prop_bool_internalize;
810 ctx->poic_cp += 4;
811 }
812 break;
813
814 case 'f':
815 if (ctx->poic_cp[1] == 'a' &&
816 ctx->poic_cp[2] == 'l' &&
817 ctx->poic_cp[3] == 's' &&
818 ctx->poic_cp[4] == 'e') {
819 /* XXX Slightly gross. */
820 ctx->poic_tagname = ctx->poic_cp;
821 ctx->poic_tagname_len = 5;
822 ctx->poic_is_empty_element = true;
823 intern = _prop_bool_internalize;
824 ctx->poic_cp += 5;
825 }
826 break;
827
828 default:
829 if (ctx->poic_cp[0] == '+' ||
830 ctx->poic_cp[0] == '-' ||
831 (ctx->poic_cp[0] >= '0' && ctx->poic_cp[0] <= '9')) {
832 intern = _prop_number_internalize;
833 }
834 break;
835 }
836
837 if (intern == NULL) {
838 while (_prop_stack_pop(&stack, &obj, &iter, &data, NULL)) {
839 iter_func = (prop_object_internalizer_continue_t)iter;
840 (*iter_func)(&stack, &obj, ctx, data, NULL);
841 }
842 return NULL;
843 }
844
845 obj = NULL;
846 if ((*intern)(&stack, &obj, ctx) == false) {
847 goto match_start;
848 }
849
850 parent_obj = obj;
851 while (_prop_stack_pop(&stack, &parent_obj, &iter, &data, NULL)) {
852 iter_func = (prop_object_internalizer_continue_t)iter;
853 if ((*iter_func)(&stack, &parent_obj, ctx, data,
854 obj) == false) {
855 goto match_start;
856 }
857 obj = parent_obj;
858 }
859
860 /* Ensure there's no trailing junk. */
861 if (parent_obj != NULL) {
862 ctx->poic_cp = _prop_intern_skip_whitespace(ctx->poic_cp);
863 if (!_PROP_EOF(*ctx->poic_cp)) {
864 prop_object_release(parent_obj);
865 parent_obj = NULL;
866 }
867 }
868 return parent_obj;
869 }
870
871 /*
872 * _prop_object_internalize_xml --
873 * Internalize a property list from XML data.
874 */
875 static prop_object_t
876 _prop_object_internalize_xml(struct _prop_object_internalize_context *ctx,
877 const struct _prop_object_type_tags *initial_tag)
878 {
879 prop_object_t obj = NULL;
880
881 /* We start with a <plist> tag. */
882 if (_prop_xml_intern_find_tag(ctx, "plist",
883 _PROP_TAG_TYPE_START) == false) {
884 goto out;
885 }
886
887 /* Plist elements cannot be empty. */
888 if (ctx->poic_is_empty_element) {
889 goto out;
890 }
891
892 /*
893 * We don't understand any plist attributes, but Apple XML
894 * property lists often have a "version" attribute. If we
895 * see that one, we simply ignore it.
896 */
897 if (ctx->poic_tagattr != NULL &&
898 !_PROP_TAGATTR_MATCH(ctx, "version")) {
899 goto out;
900 }
901
902 /* Next we expect to see opening main tag. */
903 if (_prop_xml_intern_find_tag(ctx,
904 initial_tag != NULL ? initial_tag->xml_tag
905 : NULL,
906 _PROP_TAG_TYPE_START) == false) {
907 goto out;
908 }
909
910 obj = _prop_xml_intern_by_tag(ctx);
911 if (obj == NULL) {
912 goto out;
913 }
914
915 /*
916 * We've advanced past the closing main tag.
917 * Now we want </plist>.
918 */
919 if (_prop_xml_intern_find_tag(ctx, "plist",
920 _PROP_TAG_TYPE_END) == false) {
921 prop_object_release(obj);
922 obj = NULL;
923 }
924 out:
925 return obj;
926 }
927
928 /*
929 * _prop_object_internalize --
930 * Internalize a property list from a NUL-terminated data blob.
931 */
932 prop_object_t
933 _prop_object_internalize(const char *data,
934 const struct _prop_object_type_tags *initial_tag)
935 {
936 struct _prop_object_internalize_context *ctx;
937 prop_object_t obj;
938 prop_format_t fmt;
939
940 /*
941 * Skip all whitespace until and look at the first
942 * non-whitespace character to determine the format:
943 * An XML plist will always have '<' as the first non-ws
944 * character. If we encounter something else, we assume
945 * it is JSON.
946 */
947 data = _prop_intern_skip_whitespace(data);
948 if (_PROP_EOF(*data)) {
949 return NULL;
950 }
951
952 fmt = *data == '<' ? PROP_FORMAT_XML : PROP_FORMAT_JSON;
953
954 ctx = _prop_intern_context_alloc(data, fmt);
955 if (ctx == NULL) {
956 return NULL;
957 }
958
959 switch (fmt) {
960 case PROP_FORMAT_JSON:
961 obj = _prop_object_internalize_json(ctx, initial_tag);
962 break;
963
964 default: /* PROP_FORMAT_XML */
965 obj = _prop_object_internalize_xml(ctx, initial_tag);
966 break;
967 }
968
969 _prop_intern_context_free(ctx);
970 return obj;
971 }
972
973 _PROP_EXPORT prop_object_t
974 prop_object_internalize(const char *data)
975 {
976 return _prop_object_internalize(data, NULL);
977 }
978
979 #if !defined(_KERNEL) && !defined(_STANDALONE)
980 struct _prop_intern_mapped_file {
981 char * pimf_data;
982 size_t pimf_mapsize;
983 };
984
985 /*
986 * _prop_intern_map_file --
987 * Map a file for the purpose of internalizing it.
988 */
989 static struct _prop_intern_mapped_file *
990 _prop_intern_map_file(const char *fname)
991 {
992 struct stat sb;
993 struct _prop_intern_mapped_file *mf;
994 size_t pgsize = (size_t)sysconf(_SC_PAGESIZE);
995 size_t pgmask = pgsize - 1;
996 int fd;
997
998 mf = _PROP_MALLOC(sizeof(*mf), M_TEMP);
999 if (mf == NULL) {
1000 return NULL;
1001 }
1002
1003 fd = open(fname, O_RDONLY, 0400);
1004 if (fd == -1) {
1005 _PROP_FREE(mf, M_TEMP);
1006 return NULL;
1007 }
1008
1009 if (fstat(fd, &sb) == -1) {
1010 (void) close(fd);
1011 _PROP_FREE(mf, M_TEMP);
1012 return NULL;
1013 }
1014 mf->pimf_mapsize = ((size_t)sb.st_size + pgmask) & ~pgmask;
1015 if (mf->pimf_mapsize < (size_t)sb.st_size) {
1016 (void) close(fd);
1017 _PROP_FREE(mf, M_TEMP);
1018 return NULL;
1019 }
1020
1021 /*
1022 * If the file length is an integral number of pages, then we
1023 * need to map a guard page at the end in order to provide the
1024 * necessary NUL-termination of the buffer.
1025 */
1026 bool need_guard = (sb.st_size & pgmask) == 0;
1027
1028 mf->pimf_data = mmap(NULL, need_guard ? mf->pimf_mapsize + pgsize
1029 : mf->pimf_mapsize,
1030 PROT_READ, MAP_FILE|MAP_SHARED, fd, (off_t)0);
1031 (void) close(fd);
1032 if (mf->pimf_data == MAP_FAILED) {
1033 _PROP_FREE(mf, M_TEMP);
1034 return (NULL);
1035 }
1036 #ifdef POSIX_MADV_SEQUENTIAL
1037 (void) posix_madvise(mf->pimf_data, mf->pimf_mapsize,
1038 POSIX_MADV_SEQUENTIAL);
1039 #endif
1040
1041 if (need_guard) {
1042 if (mmap(mf->pimf_data + mf->pimf_mapsize,
1043 pgsize, PROT_READ,
1044 MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1,
1045 (off_t)0) == MAP_FAILED) {
1046 (void) munmap(mf->pimf_data, mf->pimf_mapsize);
1047 _PROP_FREE(mf, M_TEMP);
1048 return NULL;
1049 }
1050 mf->pimf_mapsize += pgsize;
1051 }
1052 return mf;
1053 }
1054
1055 /*
1056 * _prop_intern_unmap_file --
1057 * Unmap a file previously mapped for internalizing.
1058 */
1059 static void
1060 _prop_intern_unmap_file(struct _prop_intern_mapped_file *mf)
1061 {
1062 #ifdef POSIX_MADV_DONTNEED
1063 (void) posix_madvise(mf->pimf_data, mf->pimf_mapsize,
1064 POSIX_MADV_DONTNEED);
1065 #endif
1066 (void) munmap(mf->pimf_data, mf->pimf_mapsize);
1067 _PROP_FREE(mf, M_TEMP);
1068 }
1069
1070 /*
1071 * _prop_object_internalize_from_file --
1072 * Internalize a property list from a file.
1073 */
1074 prop_object_t
1075 _prop_object_internalize_from_file(const char *fname,
1076 const struct _prop_object_type_tags *initial_tag)
1077 {
1078 struct _prop_intern_mapped_file *mf;
1079 prop_object_t obj;
1080
1081 mf = _prop_intern_map_file(fname);
1082 if (mf == NULL) {
1083 return NULL;
1084 }
1085 obj = _prop_object_internalize(mf->pimf_data, initial_tag);
1086 _prop_intern_unmap_file(mf);
1087
1088 return obj;
1089 }
1090
1091 _PROP_EXPORT prop_object_t
1092 prop_object_internalize_from_file(const char *fname)
1093 {
1094 return _prop_object_internalize_from_file(fname, NULL);
1095 }
1096 #endif /* !_KERNEL && !_STANDALONE */
1097