Home | History | Annotate | Line # | Download | only in libprop
      1 /*	$NetBSD: prop_intern.c,v 1.2 2025/05/14 03:25:46 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2006, 2007, 2025 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include "prop_object_impl.h"
     33 #include <prop/prop_object.h>
     34 
     35 #if !defined(_KERNEL) && !defined(_STANDALONE)
     36 #include <sys/mman.h>
     37 #include <sys/stat.h>
     38 #include <fcntl.h>
     39 #include <unistd.h>
     40 #endif /* !_KERNEL && !_STANDALONE */
     41 
     42 /*
     43  * _prop_intern_skip_whitespace --
     44  *	Skip and span of whitespace.
     45  */
     46 const char *
     47 _prop_intern_skip_whitespace(const char *cp)
     48 {
     49 	while (_PROP_ISSPACE(*cp)) {
     50 		cp++;
     51 	}
     52 	return cp;
     53 }
     54 
     55 /*
     56  * _prop_intern_match --
     57  *	Returns true if the two character streams match.
     58  */
     59 bool
     60 _prop_intern_match(const char *str1, size_t len1,
     61 		   const char *str2, size_t len2)
     62 {
     63 	return (len1 == len2 && memcmp(str1, str2, len1) == 0);
     64 }
     65 
     66 /*
     67  * _prop_xml_intern_skip_comment --
     68  *	Skip the body and end tag of an XML comment.
     69  */
     70 static bool
     71 _prop_xml_intern_skip_comment(struct _prop_object_internalize_context *ctx)
     72 {
     73 	const char *cp = ctx->poic_cp;
     74 
     75 	for (cp = ctx->poic_cp; !_PROP_EOF(*cp); cp++) {
     76 		if (cp[0] == '-' &&
     77 		    cp[1] == '-' &&
     78 		    cp[2] == '>') {
     79 			ctx->poic_cp = cp + 3;
     80 			return true;
     81 		}
     82 	}
     83 
     84 	return false;		/* ran out of buffer */
     85 }
     86 
     87 /*
     88  * _prop_xml_intern_find_tag --
     89  *	Find the next tag in an XML stream.  Optionally compare the found
     90  *	tag to an expected tag name.  State of the context is undefined
     91  *	if this routine returns false.  Upon success, the context points
     92  *	to the first octet after the tag.
     93  */
     94 bool
     95 _prop_xml_intern_find_tag(struct _prop_object_internalize_context *ctx,
     96     const char *tag, _prop_tag_type_t type)
     97 {
     98 	const char *cp;
     99 	size_t taglen;
    100 
    101 	taglen = tag != NULL ? strlen(tag) : 0;
    102 
    103  start_over:
    104 	cp = ctx->poic_cp;
    105 
    106 	/*
    107 	 * Find the start of the tag.
    108 	 */
    109 	cp = _prop_intern_skip_whitespace(cp);
    110 	if (*cp != '<') {
    111 		return false;
    112 	}
    113 
    114 	ctx->poic_tag_start = cp++;
    115 	if (_PROP_EOF(*cp)) {
    116 		return false;
    117 	}
    118 
    119 	if (*cp == '!') {
    120 		if (cp[1] != '-' || cp[2] != '-') {
    121 			return false;
    122 		}
    123 		/*
    124 		 * Comment block -- only allowed if we are allowed to
    125 		 * return a start tag.
    126 		 */
    127 		if (type == _PROP_TAG_TYPE_END) {
    128 			return false;
    129 		}
    130 		ctx->poic_cp = cp + 3;
    131 		if (_prop_xml_intern_skip_comment(ctx) == false) {
    132 			return false;
    133 		}
    134 		goto start_over;
    135 	}
    136 
    137 	if (*cp == '/') {
    138 		if (type != _PROP_TAG_TYPE_END &&
    139 		    type != _PROP_TAG_TYPE_EITHER) {
    140 			return false;
    141 		}
    142 		cp++;
    143 		if (_PROP_EOF(*cp)) {
    144 			return false;
    145 		}
    146 		ctx->poic_tag_type = _PROP_TAG_TYPE_END;
    147 	} else {
    148 		if (type != _PROP_TAG_TYPE_START &&
    149 		    type != _PROP_TAG_TYPE_EITHER) {
    150 			return false;
    151 		}
    152 		ctx->poic_tag_type = _PROP_TAG_TYPE_START;
    153 	}
    154 
    155 	ctx->poic_tagname = cp;
    156 
    157 	while (!_PROP_ISSPACE(*cp) && *cp != '/' && *cp != '>') {
    158 		if (_PROP_EOF(*cp)) {
    159 			return false;
    160 		}
    161 		cp++;
    162 	}
    163 
    164 	ctx->poic_tagname_len = cp - ctx->poic_tagname;
    165 
    166 	/* Make sure this is the tag we're looking for. */
    167 	if (tag != NULL &&
    168 	    (taglen != ctx->poic_tagname_len ||
    169 	     memcmp(tag, ctx->poic_tagname, taglen) != 0)) {
    170 		return false;
    171 	}
    172 
    173 	/* Check for empty tag. */
    174 	if (*cp == '/') {
    175 		if (ctx->poic_tag_type != _PROP_TAG_TYPE_START) {
    176 			return false;		/* only valid on start tags */
    177 		}
    178 		ctx->poic_is_empty_element = true;
    179 		cp++;
    180 		if (_PROP_EOF(*cp) || *cp != '>') {
    181 			return false;
    182 		}
    183 	} else {
    184 		ctx->poic_is_empty_element = false;
    185 	}
    186 
    187 	/* Easy case of no arguments. */
    188 	if (*cp == '>') {
    189 		ctx->poic_tagattr = NULL;
    190 		ctx->poic_tagattr_len = 0;
    191 		ctx->poic_tagattrval = NULL;
    192 		ctx->poic_tagattrval_len = 0;
    193 		ctx->poic_cp = cp + 1;
    194 		return true;
    195 	}
    196 
    197 	_PROP_ASSERT(!_PROP_EOF(*cp));
    198 	cp++;
    199 	if (_PROP_EOF(*cp)) {
    200 		return false;
    201 	}
    202 
    203 	cp = _prop_intern_skip_whitespace(cp);
    204 	if (_PROP_EOF(*cp)) {
    205 		return false;
    206 	}
    207 
    208 	ctx->poic_tagattr = cp;
    209 
    210 	while (!_PROP_ISSPACE(*cp) && *cp != '=') {
    211 		if (_PROP_EOF(*cp)) {
    212 			return false;
    213 		}
    214 		cp++;
    215 	}
    216 
    217 	ctx->poic_tagattr_len = cp - ctx->poic_tagattr;
    218 
    219 	cp++;
    220 	if (*cp != '\"') {
    221 		return false;
    222 	}
    223 	cp++;
    224 	if (_PROP_EOF(*cp)) {
    225 		return false;
    226 	}
    227 
    228 	ctx->poic_tagattrval = cp;
    229 	while (*cp != '\"') {
    230 		if (_PROP_EOF(*cp)) {
    231 			return false;
    232 		}
    233 		cp++;
    234 	}
    235 	ctx->poic_tagattrval_len = cp - ctx->poic_tagattrval;
    236 
    237 	cp++;
    238 	if (*cp != '>') {
    239 		return false;
    240 	}
    241 
    242 	ctx->poic_cp = cp + 1;
    243 	return true;
    244 }
    245 
    246 #define	INTERNALIZER(t, f)			\
    247 {	t,	sizeof(t) - 1,		f	}
    248 
    249 static const struct _prop_object_internalizer {
    250 	const char			*poi_tag;
    251 	size_t				poi_taglen;
    252 	prop_object_internalizer_t	poi_intern;
    253 } _prop_object_internalizer_table[] = {
    254 	INTERNALIZER("array", _prop_array_internalize),
    255 
    256 	INTERNALIZER("true", _prop_bool_internalize),
    257 	INTERNALIZER("false", _prop_bool_internalize),
    258 
    259 	INTERNALIZER("data", _prop_data_internalize),
    260 
    261 	INTERNALIZER("dict", _prop_dictionary_internalize),
    262 
    263 	INTERNALIZER("integer", _prop_number_internalize),
    264 
    265 	INTERNALIZER("string", _prop_string_internalize),
    266 
    267 	{ 0, 0, NULL }
    268 };
    269 
    270 #undef INTERNALIZER
    271 
    272 /*
    273  * _prop_xml_intern_by_tag --
    274  *	Determine the object type from the tag in the context and
    275  *	internalize it.
    276  */
    277 static prop_object_t
    278 _prop_xml_intern_by_tag(struct _prop_object_internalize_context *ctx)
    279 {
    280 	const struct _prop_object_internalizer *poi;
    281 	prop_object_t obj, parent_obj;
    282 	void *data, *iter;
    283 	prop_object_internalizer_continue_t iter_func;
    284 	struct _prop_stack stack;
    285 
    286 	_prop_stack_init(&stack);
    287 
    288   match_start:
    289  	for (poi = _prop_object_internalizer_table;
    290 	     poi->poi_tag != NULL; poi++) {
    291 		if (_prop_intern_match(ctx->poic_tagname,
    292 				       ctx->poic_tagname_len,
    293 				       poi->poi_tag,
    294 				       poi->poi_taglen)) {
    295 			break;
    296 		}
    297 	}
    298 	if (poi == NULL || poi->poi_tag == NULL) {
    299 		while (_prop_stack_pop(&stack, &obj, &iter, &data, NULL)) {
    300 			iter_func = (prop_object_internalizer_continue_t)iter;
    301 			(*iter_func)(&stack, &obj, ctx, data, NULL);
    302 		}
    303 		return NULL;
    304 	}
    305 
    306 	obj = NULL;
    307 	if (!(*poi->poi_intern)(&stack, &obj, ctx)) {
    308 		goto match_start;
    309 	}
    310 
    311 	parent_obj = obj;
    312 	while (_prop_stack_pop(&stack, &parent_obj, &iter, &data, NULL)) {
    313 		iter_func = (prop_object_internalizer_continue_t)iter;
    314 		if (!(*iter_func)(&stack, &parent_obj, ctx, data, obj)) {
    315 			goto match_start;
    316 		}
    317 		obj = parent_obj;
    318 	}
    319 
    320 	return parent_obj;
    321 }
    322 
    323 #define	ADDCHAR(x)							\
    324 	do {								\
    325 		if (target) {						\
    326 			if (tarindex >= targsize) {			\
    327 				return false;				\
    328 			}						\
    329 			target[tarindex] = (x);				\
    330 		}							\
    331 		tarindex++;						\
    332 	} while (/*CONSTCOND*/0)
    333 
    334 /*
    335  * _prop_json_intern_decode_uesc_getu16 --
    336  *	Get the 16-bit value from a "u-escape" ("\uXXXX").
    337  */
    338 static unsigned int
    339 _prop_json_intern_decode_uesc_getu16(const char *src, unsigned int idx,
    340     uint16_t *valp)
    341 {
    342 	unsigned int i;
    343 	uint16_t val;
    344 	unsigned char c;
    345 
    346 	if (src[idx] != '\\' || src[idx + 1] != 'u') {
    347 		return 0;
    348 	}
    349 
    350 	for (val = 0, i = 2; i < 6; i++) {
    351 		val <<= 4;
    352 		c = src[idx + i];
    353 		if (c >= 'A' && c <= 'F') {
    354 			val |= 10 + (c - 'A');
    355 		} else if (c >= 'a' && c <= 'f') {
    356 			val |= 10 + (c - 'a');
    357 		} else if (c >= '0' && c <= '9') {
    358 			val |= c - '0';
    359 		} else {
    360 			return 0;
    361 		}
    362 	}
    363 
    364 	*valp = val;
    365 	return idx + i;
    366 }
    367 
    368 #define	HS_FIRST	0xd800
    369 #define	HS_LAST		0xdbff
    370 #define	HS_SHIFT	10
    371 #define	LS_FIRST	0xdc00
    372 #define	LS_LAST		0xdfff
    373 
    374 #define	HIGH_SURROGAGE_P(x)	\
    375 	((x) >= HS_FIRST && (x) <= HS_LAST)
    376 #define	LOW_SURROGATE_P(x)	\
    377 	((x) >= LS_FIRST && (x) <= LS_LAST)
    378 #define	SURROGATE_P(x)		\
    379 	(HIGH_SURROGAGE_P(x) || LOW_SURROGATE_P(x))
    380 
    381 /*
    382  * _prop_json_intern_decode_uesc --
    383  *	Decode a JSON UTF-16 "u-escape" ("\uXXXX").
    384  */
    385 static int
    386 _prop_json_intern_decode_uesc(const char *src, char *c, unsigned int *cszp)
    387 {
    388 	unsigned int idx = 0;
    389 	uint32_t code;
    390 	uint16_t code16[2] = { 0, 0 };
    391 
    392 	idx = _prop_json_intern_decode_uesc_getu16(src, idx, &code16[0]);
    393 	if (idx == 0) {
    394 		return 0;
    395 	}
    396 	if (! SURROGATE_P(code16[0])) {
    397 		/* Simple case: not a surrogate pair */
    398 		code = code16[0];
    399 	} else if (HIGH_SURROGAGE_P(code16[0])) {
    400 		idx = _prop_json_intern_decode_uesc_getu16(src, idx,
    401 							   &code16[1]);
    402 		if (idx == 0) {
    403 			return 0;
    404 		}
    405 		/* Next code must be the low surrogate. */
    406 		if (! LOW_SURROGATE_P(code16[1])) {
    407 			return 0;
    408 		}
    409 		code = (((uint32_t)code16[0] - HS_FIRST) << HS_SHIFT) +
    410 		        (          code16[1] - LS_FIRST)              +
    411 		       0x10000;
    412 	} else {
    413 		/* Got the low surrogate first; this is an error. */
    414 		return 0;
    415 	}
    416 
    417 	/*
    418 	 * Ok, we have the code point.  Now convert it to UTF-8.
    419 	 * First we'll just split into nybbles.
    420 	 */
    421 	uint8_t u = (code >> 20) & 0xf;
    422 	uint8_t v = (code >> 16) & 0xf;
    423 	uint8_t w = (code >> 12) & 0xf;
    424 	uint8_t x = (code >>  8) & 0xf;
    425 	uint8_t y = (code >>  4) & 0xf;
    426 	uint8_t z = (code      ) & 0xf;
    427 
    428 	/*
    429 	 * ...and swizzle the nybbles accordingly.
    430 	 *
    431 	 * N.B. we expcitly disallow inserting a NUL into the string
    432 	 * by way of a \uXXXX escape.
    433 	 */
    434 	if (code == 0) {
    435 		/* Not allowed. */
    436 		return 0;
    437 	} else if (/*code >= 0x0000 &&*/ code <= 0x007f) {
    438 		c[0] = (char)code;	/* == (y << 4) | z */
    439 		*cszp = 1;
    440 	} else if (/*code >= 0x0080 &&*/ code <= 0x07ff) {
    441 		c[0] = 0xc0 | (x << 2) | (y >> 2);
    442 		c[1] = 0x80 | ((y & 3) << 4) | z;
    443 		*cszp = 2;
    444 	} else if (/*code >= 0x0800 &&*/ code <= 0xffff) {
    445 		c[0] = 0xe0 | w;
    446 		c[1] = 0x80 | (x << 2) | (y >> 2);
    447 		c[2] = 0x80 | ((y & 3) << 4) | z;
    448 		*cszp = 3;
    449 	} else if (/*code >= 0x010000 &&*/ code <= 0x10ffff) {
    450 		c[0] = 0xf0 | ((u & 1) << 2) | (v >> 2);
    451 		c[1] = 0x80 | ((v & 3) << 4) | w;
    452 		c[2] = 0x80 | (x << 2) | (y >> 2);
    453 		c[3] = 0x80 | ((y & 3) << 4) | z;
    454 		*cszp = 4;
    455 	} else {
    456 		/* Invalid code. */
    457 		return 0;
    458 	}
    459 
    460 	return idx;	/* advance input by this much */
    461 }
    462 
    463 #undef HS_FIRST
    464 #undef HS_LAST
    465 #undef LS_FIRST
    466 #undef LS_LAST
    467 #undef HIGH_SURROGAGE_P
    468 #undef LOW_SURROGATE_P
    469 #undef SURROGATE_P
    470 
    471 /*
    472  * _prop_json_intern_decode_string --
    473  *	Decode a JSON-encoded string.
    474  */
    475 static bool
    476 _prop_json_intern_decode_string(struct _prop_object_internalize_context *ctx,
    477     char *target, size_t targsize, size_t *sizep,
    478     const char **cpp)
    479 {
    480 	const char *src;
    481 	size_t tarindex;
    482 	char c[4];
    483 	unsigned int csz;
    484 
    485 	tarindex = 0;
    486 	src = ctx->poic_cp;
    487 
    488 	for (;;) {
    489 		if (_PROP_EOF(*src)) {
    490 			return false;
    491 		}
    492 		if (*src == '"') {
    493 			break;
    494 		}
    495 
    496 		csz = 1;
    497 		if ((c[0] = *src) == '\\') {
    498 			int advance = 2;
    499 
    500 			switch ((c[0] = src[1])) {
    501 			case '"':		/* quotation mark */
    502 			case '\\':		/* reverse solidus */
    503 			case '/':		/* solidus */
    504 				/* identity mapping */
    505 				break;
    506 
    507 			case 'b':		/* backspace */
    508 				c[0] = 0x08;
    509 				break;
    510 
    511 			case 'f':		/* form feed */
    512 				c[0] = 0x0c;
    513 				break;
    514 
    515 			case 'n':		/* line feed */
    516 				c[0] = 0x0a;
    517 				break;
    518 
    519 			case 'r':		/* carriage return */
    520 				c[0] = 0x0d;
    521 				break;
    522 
    523 			case 't':		/* tab */
    524 				c[0] = 0x09;
    525 				break;
    526 
    527 			case 'u':
    528 				advance = _prop_json_intern_decode_uesc(
    529 				    src, c, &csz);
    530 				if (advance == 0) {
    531 					return false;
    532 				}
    533 				break;
    534 
    535 			default:
    536 				/* invalid escape */
    537 				return false;
    538 			}
    539 			src += advance;
    540 		} else {
    541 			src++;
    542 		}
    543 		for (unsigned int i = 0; i < csz; i++) {
    544 			ADDCHAR(c[i]);
    545 		}
    546 	}
    547 
    548 	_PROP_ASSERT(*src == '"');
    549 	if (sizep != NULL) {
    550 		*sizep = tarindex;
    551 	}
    552 	if (cpp != NULL) {
    553 		*cpp = src;
    554 	}
    555 
    556 	return true;
    557 }
    558 
    559 /*
    560  * _prop_xml_intern_decode_string --
    561  *	Decode an XML-encoded string.
    562  */
    563 static bool
    564 _prop_xml_intern_decode_string(struct _prop_object_internalize_context *ctx,
    565     char *target, size_t targsize, size_t *sizep,
    566     const char **cpp)
    567 {
    568 	const char *src;
    569 	size_t tarindex;
    570 	char c;
    571 
    572 	tarindex = 0;
    573 	src = ctx->poic_cp;
    574 
    575 	for (;;) {
    576 		if (_PROP_EOF(*src)) {
    577 			return true;
    578 		}
    579 		if (*src == '<') {
    580 			break;
    581 		}
    582 
    583 		if ((c = *src) == '&') {
    584 			if (src[1] == 'a' &&
    585 			    src[2] == 'm' &&
    586 			    src[3] == 'p' &&
    587 			    src[4] == ';') {
    588 				c = '&';
    589 				src += 5;
    590 			} else if (src[1] == 'l' &&
    591 				   src[2] == 't' &&
    592 				   src[3] == ';') {
    593 				c = '<';
    594 				src += 4;
    595 			} else if (src[1] == 'g' &&
    596 				   src[2] == 't' &&
    597 				   src[3] == ';') {
    598 				c = '>';
    599 				src += 4;
    600 			} else if (src[1] == 'a' &&
    601 				   src[2] == 'p' &&
    602 				   src[3] == 'o' &&
    603 				   src[4] == 's' &&
    604 				   src[5] == ';') {
    605 				c = '\'';
    606 				src += 6;
    607 			} else if (src[1] == 'q' &&
    608 				   src[2] == 'u' &&
    609 				   src[3] == 'o' &&
    610 				   src[4] == 't' &&
    611 				   src[5] == ';') {
    612 				c = '\"';
    613 				src += 6;
    614 			} else {
    615 				return false;
    616 			}
    617 		} else {
    618 			src++;
    619 		}
    620 		ADDCHAR(c);
    621 	}
    622 
    623 	_PROP_ASSERT(*src == '<');
    624 	if (sizep != NULL) {
    625 		*sizep = tarindex;
    626 	}
    627 	if (cpp != NULL) {
    628 		*cpp = src;
    629 	}
    630 
    631 	return true;
    632 }
    633 
    634 #undef ADDCHAR
    635 
    636 /*
    637  * _prop_intern_decode_string --
    638  *	Decode an encoded string.
    639  */
    640 bool
    641 _prop_intern_decode_string(struct _prop_object_internalize_context *ctx,
    642     char *target, size_t targsize, size_t *sizep,
    643     const char **cpp)
    644 {
    645 	_PROP_ASSERT(ctx->poic_format == PROP_FORMAT_XML ||
    646 		     ctx->poic_format == PROP_FORMAT_JSON);
    647 
    648 	switch (ctx->poic_format) {
    649 	case PROP_FORMAT_JSON:
    650 		return _prop_json_intern_decode_string(ctx, target, targsize,
    651 		    sizep, cpp);
    652 
    653 	default:		/* PROP_FORMAT_XML */
    654 		return _prop_xml_intern_decode_string(ctx, target, targsize,
    655 		    sizep, cpp);
    656 	}
    657 }
    658 
    659 /*
    660  * _prop_intern_context_alloc --
    661  *	Allocate an internalize context.
    662  */
    663 static struct _prop_object_internalize_context *
    664 _prop_intern_context_alloc(const char *data, prop_format_t fmt)
    665 {
    666 	struct _prop_object_internalize_context *ctx;
    667 
    668 	ctx = _PROP_MALLOC(sizeof(*ctx), M_TEMP);
    669 	if (ctx == NULL) {
    670 		return NULL;
    671 	}
    672 
    673 	ctx->poic_format = fmt;
    674 	ctx->poic_data = ctx->poic_cp = data;
    675 
    676 	/*
    677 	 * If we're digesting JSON, check for a byte order mark and
    678 	 * skip it, if present.  We should never see one, but we're
    679 	 * allowed to detect and ignore it.  (RFC 8259 section 8.1)
    680 	 */
    681 	if (fmt == PROP_FORMAT_JSON) {
    682 		if (((unsigned char)data[0] == 0xff &&
    683 		     (unsigned char)data[1] == 0xfe) ||
    684 		    ((unsigned char)data[0] == 0xfe &&
    685 		     (unsigned char)data[1] == 0xff)) {
    686 			ctx->poic_cp = data + 2;
    687 		}
    688 
    689 		/* No additional processing work to do for JSON. */
    690 		return ctx;
    691 	}
    692 
    693 	/*
    694 	 * Skip any whitespace and XML preamble stuff that we don't
    695 	 * know about / care about.
    696 	 */
    697 	for (;;) {
    698 		data = _prop_intern_skip_whitespace(data);
    699 		if (_PROP_EOF(*data) || *data != '<') {
    700 			goto bad;
    701 		}
    702 
    703 #define	MATCH(str)	(strncmp(&data[1], str, strlen(str)) == 0)
    704 
    705 		/*
    706 		 * Skip over the XML preamble that Apple XML property
    707 		 * lists usually include at the top of the file.
    708 		 */
    709 		if (MATCH("?xml ") ||
    710 		    MATCH("!DOCTYPE plist")) {
    711 			while (*data != '>' && !_PROP_EOF(*data)) {
    712 				data++;
    713 			}
    714 			if (_PROP_EOF(*data)) {
    715 				goto bad;
    716 			}
    717 			data++;	/* advance past the '>' */
    718 			continue;
    719 		}
    720 
    721 		if (MATCH("<!--")) {
    722 			ctx->poic_cp = data + 4;
    723 			if (_prop_xml_intern_skip_comment(ctx) == false) {
    724 				goto bad;
    725 			}
    726 			data = ctx->poic_cp;
    727 			continue;
    728 		}
    729 
    730 #undef MATCH
    731 
    732 		/*
    733 		 * We don't think we should skip it, so let's hope we can
    734 		 * parse it.
    735 		 */
    736 		break;
    737 	}
    738 
    739 	ctx->poic_cp = data;
    740 	return ctx;
    741  bad:
    742 	_PROP_FREE(ctx, M_TEMP);
    743 	return NULL;
    744 }
    745 
    746 /*
    747  * _prop_intern_context_free --
    748  *	Free an internalize context.
    749  */
    750 static void
    751 _prop_intern_context_free(struct _prop_object_internalize_context *ctx)
    752 {
    753 	_PROP_FREE(ctx, M_TEMP);
    754 }
    755 
    756 /*
    757  * _prop_object_internalize_json --
    758  *	Internalize a property list from JSON data.
    759  */
    760 static prop_object_t
    761 _prop_object_internalize_json(struct _prop_object_internalize_context *ctx,
    762     const struct _prop_object_type_tags *initial_tag __unused)
    763 {
    764 	prop_object_t obj, parent_obj;
    765 	void *data, *iter;
    766 	prop_object_internalizer_continue_t iter_func;
    767 	struct _prop_stack stack;
    768 	bool (*intern)(prop_stack_t, prop_object_t *,
    769 		       struct _prop_object_internalize_context *);
    770 
    771 	_prop_stack_init(&stack);
    772 
    773  match_start:
    774 	intern = NULL;
    775 	ctx->poic_tagname = ctx->poic_tagattr = ctx->poic_tagattrval = NULL;
    776 	ctx->poic_tagname_len = ctx->poic_tagattr_len =
    777 	    ctx->poic_tagattrval_len = 0;
    778 	ctx->poic_is_empty_element = false;
    779 	ctx->poic_cp = _prop_intern_skip_whitespace(ctx->poic_cp);
    780 	switch (ctx->poic_cp[0]) {
    781 	case '{':
    782 		ctx->poic_cp++;
    783 		intern = _prop_dictionary_internalize;
    784 		break;
    785 
    786 	case '[':
    787 		ctx->poic_cp++;
    788 		intern = _prop_array_internalize;
    789 		break;
    790 
    791 	case '"':
    792 		ctx->poic_cp++;
    793 		/* XXX Slightly gross. */
    794 		if (*ctx->poic_cp == '"') {
    795 			ctx->poic_cp++;
    796 			ctx->poic_is_empty_element = true;
    797 		}
    798 		intern = _prop_string_internalize;
    799 		break;
    800 
    801 	case 't':
    802 		if (ctx->poic_cp[1] == 'r' &&
    803 		    ctx->poic_cp[2] == 'u' &&
    804 		    ctx->poic_cp[3] == 'e') {
    805 			/* XXX Slightly gross. */
    806 			ctx->poic_tagname = ctx->poic_cp;
    807 			ctx->poic_tagname_len = 4;
    808 			ctx->poic_is_empty_element = true;
    809 			intern = _prop_bool_internalize;
    810 			ctx->poic_cp += 4;
    811 		}
    812 		break;
    813 
    814 	case 'f':
    815 		if (ctx->poic_cp[1] == 'a' &&
    816 		    ctx->poic_cp[2] == 'l' &&
    817 		    ctx->poic_cp[3] == 's' &&
    818 		    ctx->poic_cp[4] == 'e') {
    819 			/* XXX Slightly gross. */
    820 			ctx->poic_tagname = ctx->poic_cp;
    821 			ctx->poic_tagname_len = 5;
    822 			ctx->poic_is_empty_element = true;
    823 			intern = _prop_bool_internalize;
    824 			ctx->poic_cp += 5;
    825 		}
    826 		break;
    827 
    828 	default:
    829 		if (ctx->poic_cp[0] == '+' ||
    830 		    ctx->poic_cp[0] == '-' ||
    831 		    (ctx->poic_cp[0] >= '0' && ctx->poic_cp[0] <= '9')) {
    832 			intern = _prop_number_internalize;
    833 		}
    834 		break;
    835 	}
    836 
    837 	if (intern == NULL) {
    838 		while (_prop_stack_pop(&stack, &obj, &iter, &data, NULL)) {
    839 			iter_func = (prop_object_internalizer_continue_t)iter;
    840 			(*iter_func)(&stack, &obj, ctx, data, NULL);
    841 		}
    842 		return NULL;
    843 	}
    844 
    845 	obj = NULL;
    846 	if ((*intern)(&stack, &obj, ctx) == false) {
    847 		goto match_start;
    848 	}
    849 
    850 	parent_obj = obj;
    851 	while (_prop_stack_pop(&stack, &parent_obj, &iter, &data, NULL)) {
    852 		iter_func = (prop_object_internalizer_continue_t)iter;
    853 		if ((*iter_func)(&stack, &parent_obj, ctx, data,
    854 				 obj) == false) {
    855 			goto match_start;
    856 		}
    857 		obj = parent_obj;
    858 	}
    859 
    860 	/* Ensure there's no trailing junk. */
    861 	if (parent_obj != NULL) {
    862 		ctx->poic_cp = _prop_intern_skip_whitespace(ctx->poic_cp);
    863 		if (!_PROP_EOF(*ctx->poic_cp)) {
    864 			prop_object_release(parent_obj);
    865 			parent_obj = NULL;
    866 		}
    867 	}
    868 	return parent_obj;
    869 }
    870 
    871 /*
    872  * _prop_object_internalize_xml --
    873  *	Internalize a property list from XML data.
    874  */
    875 static prop_object_t
    876 _prop_object_internalize_xml(struct _prop_object_internalize_context *ctx,
    877     const struct _prop_object_type_tags *initial_tag)
    878 {
    879 	prop_object_t obj = NULL;
    880 
    881 	/* We start with a <plist> tag. */
    882 	if (_prop_xml_intern_find_tag(ctx, "plist",
    883 				      _PROP_TAG_TYPE_START) == false) {
    884 		goto out;
    885 	}
    886 
    887 	/* Plist elements cannot be empty. */
    888 	if (ctx->poic_is_empty_element) {
    889 		goto out;
    890 	}
    891 
    892 	/*
    893 	 * We don't understand any plist attributes, but Apple XML
    894 	 * property lists often have a "version" attribute.  If we
    895 	 * see that one, we simply ignore it.
    896 	 */
    897 	if (ctx->poic_tagattr != NULL &&
    898 	    !_PROP_TAGATTR_MATCH(ctx, "version")) {
    899 		goto out;
    900 	}
    901 
    902 	/* Next we expect to see opening main tag. */
    903 	if (_prop_xml_intern_find_tag(ctx,
    904 				initial_tag != NULL ? initial_tag->xml_tag
    905 						    : NULL,
    906 				_PROP_TAG_TYPE_START) == false) {
    907 		goto out;
    908 	}
    909 
    910 	obj = _prop_xml_intern_by_tag(ctx);
    911 	if (obj == NULL) {
    912 		goto out;
    913 	}
    914 
    915 	/*
    916 	 * We've advanced past the closing main tag.
    917 	 * Now we want </plist>.
    918 	 */
    919 	if (_prop_xml_intern_find_tag(ctx, "plist",
    920 				      _PROP_TAG_TYPE_END) == false) {
    921 		prop_object_release(obj);
    922 		obj = NULL;
    923 	}
    924  out:
    925 	return obj;
    926 }
    927 
    928 /*
    929  * _prop_object_internalize --
    930  *	Internalize a property list from a NUL-terminated data blob.
    931  */
    932 prop_object_t
    933 _prop_object_internalize(const char *data,
    934     const struct _prop_object_type_tags *initial_tag)
    935 {
    936 	struct _prop_object_internalize_context *ctx;
    937 	prop_object_t obj;
    938 	prop_format_t fmt;
    939 
    940 	/*
    941 	 * Skip all whitespace until and look at the first
    942 	 * non-whitespace character to determine the format:
    943 	 * An XML plist will always have '<' as the first non-ws
    944 	 * character.  If we encounter something else, we assume
    945 	 * it is JSON.
    946 	 */
    947 	data = _prop_intern_skip_whitespace(data);
    948 	if (_PROP_EOF(*data)) {
    949 		return NULL;
    950 	}
    951 
    952 	fmt = *data == '<' ? PROP_FORMAT_XML : PROP_FORMAT_JSON;
    953 
    954 	ctx = _prop_intern_context_alloc(data, fmt);
    955 	if (ctx == NULL) {
    956 		return NULL;
    957 	}
    958 
    959 	switch (fmt) {
    960 	case PROP_FORMAT_JSON:
    961 		obj = _prop_object_internalize_json(ctx, initial_tag);
    962 		break;
    963 
    964 	default:		/* PROP_FORMAT_XML */
    965 		obj = _prop_object_internalize_xml(ctx, initial_tag);
    966 		break;
    967 	}
    968 
    969 	_prop_intern_context_free(ctx);
    970 	return obj;
    971 }
    972 
    973 _PROP_EXPORT prop_object_t
    974 prop_object_internalize(const char *data)
    975 {
    976 	return _prop_object_internalize(data, NULL);
    977 }
    978 
    979 #if !defined(_KERNEL) && !defined(_STANDALONE)
    980 struct _prop_intern_mapped_file {
    981 	char *	pimf_data;
    982 	size_t	pimf_mapsize;
    983 };
    984 
    985 /*
    986  * _prop_intern_map_file --
    987  *	Map a file for the purpose of internalizing it.
    988  */
    989 static struct _prop_intern_mapped_file *
    990 _prop_intern_map_file(const char *fname)
    991 {
    992 	struct stat sb;
    993 	struct _prop_intern_mapped_file *mf;
    994 	size_t pgsize = (size_t)sysconf(_SC_PAGESIZE);
    995 	size_t pgmask = pgsize - 1;
    996 	int fd;
    997 
    998 	mf = _PROP_MALLOC(sizeof(*mf), M_TEMP);
    999 	if (mf == NULL) {
   1000 		return NULL;
   1001 	}
   1002 
   1003 	fd = open(fname, O_RDONLY, 0400);
   1004 	if (fd == -1) {
   1005 		_PROP_FREE(mf, M_TEMP);
   1006 		return NULL;
   1007 	}
   1008 
   1009 	if (fstat(fd, &sb) == -1) {
   1010 		(void) close(fd);
   1011 		_PROP_FREE(mf, M_TEMP);
   1012 		return NULL;
   1013 	}
   1014 	mf->pimf_mapsize = ((size_t)sb.st_size + pgmask) & ~pgmask;
   1015 	if (mf->pimf_mapsize < (size_t)sb.st_size) {
   1016 		(void) close(fd);
   1017 		_PROP_FREE(mf, M_TEMP);
   1018 		return NULL;
   1019 	}
   1020 
   1021 	/*
   1022 	 * If the file length is an integral number of pages, then we
   1023 	 * need to map a guard page at the end in order to provide the
   1024 	 * necessary NUL-termination of the buffer.
   1025 	 */
   1026 	bool need_guard = (sb.st_size & pgmask) == 0;
   1027 
   1028 	mf->pimf_data = mmap(NULL, need_guard ? mf->pimf_mapsize + pgsize
   1029 					      : mf->pimf_mapsize,
   1030 			     PROT_READ, MAP_FILE|MAP_SHARED, fd, (off_t)0);
   1031 	(void) close(fd);
   1032 	if (mf->pimf_data == MAP_FAILED) {
   1033 		_PROP_FREE(mf, M_TEMP);
   1034 		return (NULL);
   1035 	}
   1036 #ifdef POSIX_MADV_SEQUENTIAL
   1037 	(void) posix_madvise(mf->pimf_data, mf->pimf_mapsize,
   1038 	    POSIX_MADV_SEQUENTIAL);
   1039 #endif
   1040 
   1041 	if (need_guard) {
   1042 		if (mmap(mf->pimf_data + mf->pimf_mapsize,
   1043 			 pgsize, PROT_READ,
   1044 			 MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1,
   1045 			 (off_t)0) == MAP_FAILED) {
   1046 			(void) munmap(mf->pimf_data, mf->pimf_mapsize);
   1047 			_PROP_FREE(mf, M_TEMP);
   1048 			return NULL;
   1049 		}
   1050 		mf->pimf_mapsize += pgsize;
   1051 	}
   1052 	return mf;
   1053 }
   1054 
   1055 /*
   1056  * _prop_intern_unmap_file --
   1057  *	Unmap a file previously mapped for internalizing.
   1058  */
   1059 static void
   1060 _prop_intern_unmap_file(struct _prop_intern_mapped_file *mf)
   1061 {
   1062 #ifdef POSIX_MADV_DONTNEED
   1063 	(void) posix_madvise(mf->pimf_data, mf->pimf_mapsize,
   1064 	    POSIX_MADV_DONTNEED);
   1065 #endif
   1066 	(void) munmap(mf->pimf_data, mf->pimf_mapsize);
   1067 	_PROP_FREE(mf, M_TEMP);
   1068 }
   1069 
   1070 /*
   1071  * _prop_object_internalize_from_file --
   1072  *	Internalize a property list from a file.
   1073  */
   1074 prop_object_t
   1075 _prop_object_internalize_from_file(const char *fname,
   1076     const struct _prop_object_type_tags *initial_tag)
   1077 {
   1078 	struct _prop_intern_mapped_file *mf;
   1079 	prop_object_t obj;
   1080 
   1081 	mf = _prop_intern_map_file(fname);
   1082 	if (mf == NULL) {
   1083 		return NULL;
   1084 	}
   1085 	obj = _prop_object_internalize(mf->pimf_data, initial_tag);
   1086 	_prop_intern_unmap_file(mf);
   1087 
   1088 	return obj;
   1089 }
   1090 
   1091 _PROP_EXPORT prop_object_t
   1092 prop_object_internalize_from_file(const char *fname)
   1093 {
   1094 	return _prop_object_internalize_from_file(fname, NULL);
   1095 }
   1096 #endif /* !_KERNEL && !_STANDALONE */
   1097