1 /* $NetBSD: prop_intern.c,v 1.2 2025/05/14 03:25:46 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 2006, 2007, 2025 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "prop_object_impl.h" 33 #include <prop/prop_object.h> 34 35 #if !defined(_KERNEL) && !defined(_STANDALONE) 36 #include <sys/mman.h> 37 #include <sys/stat.h> 38 #include <fcntl.h> 39 #include <unistd.h> 40 #endif /* !_KERNEL && !_STANDALONE */ 41 42 /* 43 * _prop_intern_skip_whitespace -- 44 * Skip and span of whitespace. 45 */ 46 const char * 47 _prop_intern_skip_whitespace(const char *cp) 48 { 49 while (_PROP_ISSPACE(*cp)) { 50 cp++; 51 } 52 return cp; 53 } 54 55 /* 56 * _prop_intern_match -- 57 * Returns true if the two character streams match. 58 */ 59 bool 60 _prop_intern_match(const char *str1, size_t len1, 61 const char *str2, size_t len2) 62 { 63 return (len1 == len2 && memcmp(str1, str2, len1) == 0); 64 } 65 66 /* 67 * _prop_xml_intern_skip_comment -- 68 * Skip the body and end tag of an XML comment. 69 */ 70 static bool 71 _prop_xml_intern_skip_comment(struct _prop_object_internalize_context *ctx) 72 { 73 const char *cp = ctx->poic_cp; 74 75 for (cp = ctx->poic_cp; !_PROP_EOF(*cp); cp++) { 76 if (cp[0] == '-' && 77 cp[1] == '-' && 78 cp[2] == '>') { 79 ctx->poic_cp = cp + 3; 80 return true; 81 } 82 } 83 84 return false; /* ran out of buffer */ 85 } 86 87 /* 88 * _prop_xml_intern_find_tag -- 89 * Find the next tag in an XML stream. Optionally compare the found 90 * tag to an expected tag name. State of the context is undefined 91 * if this routine returns false. Upon success, the context points 92 * to the first octet after the tag. 93 */ 94 bool 95 _prop_xml_intern_find_tag(struct _prop_object_internalize_context *ctx, 96 const char *tag, _prop_tag_type_t type) 97 { 98 const char *cp; 99 size_t taglen; 100 101 taglen = tag != NULL ? strlen(tag) : 0; 102 103 start_over: 104 cp = ctx->poic_cp; 105 106 /* 107 * Find the start of the tag. 108 */ 109 cp = _prop_intern_skip_whitespace(cp); 110 if (*cp != '<') { 111 return false; 112 } 113 114 ctx->poic_tag_start = cp++; 115 if (_PROP_EOF(*cp)) { 116 return false; 117 } 118 119 if (*cp == '!') { 120 if (cp[1] != '-' || cp[2] != '-') { 121 return false; 122 } 123 /* 124 * Comment block -- only allowed if we are allowed to 125 * return a start tag. 126 */ 127 if (type == _PROP_TAG_TYPE_END) { 128 return false; 129 } 130 ctx->poic_cp = cp + 3; 131 if (_prop_xml_intern_skip_comment(ctx) == false) { 132 return false; 133 } 134 goto start_over; 135 } 136 137 if (*cp == '/') { 138 if (type != _PROP_TAG_TYPE_END && 139 type != _PROP_TAG_TYPE_EITHER) { 140 return false; 141 } 142 cp++; 143 if (_PROP_EOF(*cp)) { 144 return false; 145 } 146 ctx->poic_tag_type = _PROP_TAG_TYPE_END; 147 } else { 148 if (type != _PROP_TAG_TYPE_START && 149 type != _PROP_TAG_TYPE_EITHER) { 150 return false; 151 } 152 ctx->poic_tag_type = _PROP_TAG_TYPE_START; 153 } 154 155 ctx->poic_tagname = cp; 156 157 while (!_PROP_ISSPACE(*cp) && *cp != '/' && *cp != '>') { 158 if (_PROP_EOF(*cp)) { 159 return false; 160 } 161 cp++; 162 } 163 164 ctx->poic_tagname_len = cp - ctx->poic_tagname; 165 166 /* Make sure this is the tag we're looking for. */ 167 if (tag != NULL && 168 (taglen != ctx->poic_tagname_len || 169 memcmp(tag, ctx->poic_tagname, taglen) != 0)) { 170 return false; 171 } 172 173 /* Check for empty tag. */ 174 if (*cp == '/') { 175 if (ctx->poic_tag_type != _PROP_TAG_TYPE_START) { 176 return false; /* only valid on start tags */ 177 } 178 ctx->poic_is_empty_element = true; 179 cp++; 180 if (_PROP_EOF(*cp) || *cp != '>') { 181 return false; 182 } 183 } else { 184 ctx->poic_is_empty_element = false; 185 } 186 187 /* Easy case of no arguments. */ 188 if (*cp == '>') { 189 ctx->poic_tagattr = NULL; 190 ctx->poic_tagattr_len = 0; 191 ctx->poic_tagattrval = NULL; 192 ctx->poic_tagattrval_len = 0; 193 ctx->poic_cp = cp + 1; 194 return true; 195 } 196 197 _PROP_ASSERT(!_PROP_EOF(*cp)); 198 cp++; 199 if (_PROP_EOF(*cp)) { 200 return false; 201 } 202 203 cp = _prop_intern_skip_whitespace(cp); 204 if (_PROP_EOF(*cp)) { 205 return false; 206 } 207 208 ctx->poic_tagattr = cp; 209 210 while (!_PROP_ISSPACE(*cp) && *cp != '=') { 211 if (_PROP_EOF(*cp)) { 212 return false; 213 } 214 cp++; 215 } 216 217 ctx->poic_tagattr_len = cp - ctx->poic_tagattr; 218 219 cp++; 220 if (*cp != '\"') { 221 return false; 222 } 223 cp++; 224 if (_PROP_EOF(*cp)) { 225 return false; 226 } 227 228 ctx->poic_tagattrval = cp; 229 while (*cp != '\"') { 230 if (_PROP_EOF(*cp)) { 231 return false; 232 } 233 cp++; 234 } 235 ctx->poic_tagattrval_len = cp - ctx->poic_tagattrval; 236 237 cp++; 238 if (*cp != '>') { 239 return false; 240 } 241 242 ctx->poic_cp = cp + 1; 243 return true; 244 } 245 246 #define INTERNALIZER(t, f) \ 247 { t, sizeof(t) - 1, f } 248 249 static const struct _prop_object_internalizer { 250 const char *poi_tag; 251 size_t poi_taglen; 252 prop_object_internalizer_t poi_intern; 253 } _prop_object_internalizer_table[] = { 254 INTERNALIZER("array", _prop_array_internalize), 255 256 INTERNALIZER("true", _prop_bool_internalize), 257 INTERNALIZER("false", _prop_bool_internalize), 258 259 INTERNALIZER("data", _prop_data_internalize), 260 261 INTERNALIZER("dict", _prop_dictionary_internalize), 262 263 INTERNALIZER("integer", _prop_number_internalize), 264 265 INTERNALIZER("string", _prop_string_internalize), 266 267 { 0, 0, NULL } 268 }; 269 270 #undef INTERNALIZER 271 272 /* 273 * _prop_xml_intern_by_tag -- 274 * Determine the object type from the tag in the context and 275 * internalize it. 276 */ 277 static prop_object_t 278 _prop_xml_intern_by_tag(struct _prop_object_internalize_context *ctx) 279 { 280 const struct _prop_object_internalizer *poi; 281 prop_object_t obj, parent_obj; 282 void *data, *iter; 283 prop_object_internalizer_continue_t iter_func; 284 struct _prop_stack stack; 285 286 _prop_stack_init(&stack); 287 288 match_start: 289 for (poi = _prop_object_internalizer_table; 290 poi->poi_tag != NULL; poi++) { 291 if (_prop_intern_match(ctx->poic_tagname, 292 ctx->poic_tagname_len, 293 poi->poi_tag, 294 poi->poi_taglen)) { 295 break; 296 } 297 } 298 if (poi == NULL || poi->poi_tag == NULL) { 299 while (_prop_stack_pop(&stack, &obj, &iter, &data, NULL)) { 300 iter_func = (prop_object_internalizer_continue_t)iter; 301 (*iter_func)(&stack, &obj, ctx, data, NULL); 302 } 303 return NULL; 304 } 305 306 obj = NULL; 307 if (!(*poi->poi_intern)(&stack, &obj, ctx)) { 308 goto match_start; 309 } 310 311 parent_obj = obj; 312 while (_prop_stack_pop(&stack, &parent_obj, &iter, &data, NULL)) { 313 iter_func = (prop_object_internalizer_continue_t)iter; 314 if (!(*iter_func)(&stack, &parent_obj, ctx, data, obj)) { 315 goto match_start; 316 } 317 obj = parent_obj; 318 } 319 320 return parent_obj; 321 } 322 323 #define ADDCHAR(x) \ 324 do { \ 325 if (target) { \ 326 if (tarindex >= targsize) { \ 327 return false; \ 328 } \ 329 target[tarindex] = (x); \ 330 } \ 331 tarindex++; \ 332 } while (/*CONSTCOND*/0) 333 334 /* 335 * _prop_json_intern_decode_uesc_getu16 -- 336 * Get the 16-bit value from a "u-escape" ("\uXXXX"). 337 */ 338 static unsigned int 339 _prop_json_intern_decode_uesc_getu16(const char *src, unsigned int idx, 340 uint16_t *valp) 341 { 342 unsigned int i; 343 uint16_t val; 344 unsigned char c; 345 346 if (src[idx] != '\\' || src[idx + 1] != 'u') { 347 return 0; 348 } 349 350 for (val = 0, i = 2; i < 6; i++) { 351 val <<= 4; 352 c = src[idx + i]; 353 if (c >= 'A' && c <= 'F') { 354 val |= 10 + (c - 'A'); 355 } else if (c >= 'a' && c <= 'f') { 356 val |= 10 + (c - 'a'); 357 } else if (c >= '0' && c <= '9') { 358 val |= c - '0'; 359 } else { 360 return 0; 361 } 362 } 363 364 *valp = val; 365 return idx + i; 366 } 367 368 #define HS_FIRST 0xd800 369 #define HS_LAST 0xdbff 370 #define HS_SHIFT 10 371 #define LS_FIRST 0xdc00 372 #define LS_LAST 0xdfff 373 374 #define HIGH_SURROGAGE_P(x) \ 375 ((x) >= HS_FIRST && (x) <= HS_LAST) 376 #define LOW_SURROGATE_P(x) \ 377 ((x) >= LS_FIRST && (x) <= LS_LAST) 378 #define SURROGATE_P(x) \ 379 (HIGH_SURROGAGE_P(x) || LOW_SURROGATE_P(x)) 380 381 /* 382 * _prop_json_intern_decode_uesc -- 383 * Decode a JSON UTF-16 "u-escape" ("\uXXXX"). 384 */ 385 static int 386 _prop_json_intern_decode_uesc(const char *src, char *c, unsigned int *cszp) 387 { 388 unsigned int idx = 0; 389 uint32_t code; 390 uint16_t code16[2] = { 0, 0 }; 391 392 idx = _prop_json_intern_decode_uesc_getu16(src, idx, &code16[0]); 393 if (idx == 0) { 394 return 0; 395 } 396 if (! SURROGATE_P(code16[0])) { 397 /* Simple case: not a surrogate pair */ 398 code = code16[0]; 399 } else if (HIGH_SURROGAGE_P(code16[0])) { 400 idx = _prop_json_intern_decode_uesc_getu16(src, idx, 401 &code16[1]); 402 if (idx == 0) { 403 return 0; 404 } 405 /* Next code must be the low surrogate. */ 406 if (! LOW_SURROGATE_P(code16[1])) { 407 return 0; 408 } 409 code = (((uint32_t)code16[0] - HS_FIRST) << HS_SHIFT) + 410 ( code16[1] - LS_FIRST) + 411 0x10000; 412 } else { 413 /* Got the low surrogate first; this is an error. */ 414 return 0; 415 } 416 417 /* 418 * Ok, we have the code point. Now convert it to UTF-8. 419 * First we'll just split into nybbles. 420 */ 421 uint8_t u = (code >> 20) & 0xf; 422 uint8_t v = (code >> 16) & 0xf; 423 uint8_t w = (code >> 12) & 0xf; 424 uint8_t x = (code >> 8) & 0xf; 425 uint8_t y = (code >> 4) & 0xf; 426 uint8_t z = (code ) & 0xf; 427 428 /* 429 * ...and swizzle the nybbles accordingly. 430 * 431 * N.B. we expcitly disallow inserting a NUL into the string 432 * by way of a \uXXXX escape. 433 */ 434 if (code == 0) { 435 /* Not allowed. */ 436 return 0; 437 } else if (/*code >= 0x0000 &&*/ code <= 0x007f) { 438 c[0] = (char)code; /* == (y << 4) | z */ 439 *cszp = 1; 440 } else if (/*code >= 0x0080 &&*/ code <= 0x07ff) { 441 c[0] = 0xc0 | (x << 2) | (y >> 2); 442 c[1] = 0x80 | ((y & 3) << 4) | z; 443 *cszp = 2; 444 } else if (/*code >= 0x0800 &&*/ code <= 0xffff) { 445 c[0] = 0xe0 | w; 446 c[1] = 0x80 | (x << 2) | (y >> 2); 447 c[2] = 0x80 | ((y & 3) << 4) | z; 448 *cszp = 3; 449 } else if (/*code >= 0x010000 &&*/ code <= 0x10ffff) { 450 c[0] = 0xf0 | ((u & 1) << 2) | (v >> 2); 451 c[1] = 0x80 | ((v & 3) << 4) | w; 452 c[2] = 0x80 | (x << 2) | (y >> 2); 453 c[3] = 0x80 | ((y & 3) << 4) | z; 454 *cszp = 4; 455 } else { 456 /* Invalid code. */ 457 return 0; 458 } 459 460 return idx; /* advance input by this much */ 461 } 462 463 #undef HS_FIRST 464 #undef HS_LAST 465 #undef LS_FIRST 466 #undef LS_LAST 467 #undef HIGH_SURROGAGE_P 468 #undef LOW_SURROGATE_P 469 #undef SURROGATE_P 470 471 /* 472 * _prop_json_intern_decode_string -- 473 * Decode a JSON-encoded string. 474 */ 475 static bool 476 _prop_json_intern_decode_string(struct _prop_object_internalize_context *ctx, 477 char *target, size_t targsize, size_t *sizep, 478 const char **cpp) 479 { 480 const char *src; 481 size_t tarindex; 482 char c[4]; 483 unsigned int csz; 484 485 tarindex = 0; 486 src = ctx->poic_cp; 487 488 for (;;) { 489 if (_PROP_EOF(*src)) { 490 return false; 491 } 492 if (*src == '"') { 493 break; 494 } 495 496 csz = 1; 497 if ((c[0] = *src) == '\\') { 498 int advance = 2; 499 500 switch ((c[0] = src[1])) { 501 case '"': /* quotation mark */ 502 case '\\': /* reverse solidus */ 503 case '/': /* solidus */ 504 /* identity mapping */ 505 break; 506 507 case 'b': /* backspace */ 508 c[0] = 0x08; 509 break; 510 511 case 'f': /* form feed */ 512 c[0] = 0x0c; 513 break; 514 515 case 'n': /* line feed */ 516 c[0] = 0x0a; 517 break; 518 519 case 'r': /* carriage return */ 520 c[0] = 0x0d; 521 break; 522 523 case 't': /* tab */ 524 c[0] = 0x09; 525 break; 526 527 case 'u': 528 advance = _prop_json_intern_decode_uesc( 529 src, c, &csz); 530 if (advance == 0) { 531 return false; 532 } 533 break; 534 535 default: 536 /* invalid escape */ 537 return false; 538 } 539 src += advance; 540 } else { 541 src++; 542 } 543 for (unsigned int i = 0; i < csz; i++) { 544 ADDCHAR(c[i]); 545 } 546 } 547 548 _PROP_ASSERT(*src == '"'); 549 if (sizep != NULL) { 550 *sizep = tarindex; 551 } 552 if (cpp != NULL) { 553 *cpp = src; 554 } 555 556 return true; 557 } 558 559 /* 560 * _prop_xml_intern_decode_string -- 561 * Decode an XML-encoded string. 562 */ 563 static bool 564 _prop_xml_intern_decode_string(struct _prop_object_internalize_context *ctx, 565 char *target, size_t targsize, size_t *sizep, 566 const char **cpp) 567 { 568 const char *src; 569 size_t tarindex; 570 char c; 571 572 tarindex = 0; 573 src = ctx->poic_cp; 574 575 for (;;) { 576 if (_PROP_EOF(*src)) { 577 return true; 578 } 579 if (*src == '<') { 580 break; 581 } 582 583 if ((c = *src) == '&') { 584 if (src[1] == 'a' && 585 src[2] == 'm' && 586 src[3] == 'p' && 587 src[4] == ';') { 588 c = '&'; 589 src += 5; 590 } else if (src[1] == 'l' && 591 src[2] == 't' && 592 src[3] == ';') { 593 c = '<'; 594 src += 4; 595 } else if (src[1] == 'g' && 596 src[2] == 't' && 597 src[3] == ';') { 598 c = '>'; 599 src += 4; 600 } else if (src[1] == 'a' && 601 src[2] == 'p' && 602 src[3] == 'o' && 603 src[4] == 's' && 604 src[5] == ';') { 605 c = '\''; 606 src += 6; 607 } else if (src[1] == 'q' && 608 src[2] == 'u' && 609 src[3] == 'o' && 610 src[4] == 't' && 611 src[5] == ';') { 612 c = '\"'; 613 src += 6; 614 } else { 615 return false; 616 } 617 } else { 618 src++; 619 } 620 ADDCHAR(c); 621 } 622 623 _PROP_ASSERT(*src == '<'); 624 if (sizep != NULL) { 625 *sizep = tarindex; 626 } 627 if (cpp != NULL) { 628 *cpp = src; 629 } 630 631 return true; 632 } 633 634 #undef ADDCHAR 635 636 /* 637 * _prop_intern_decode_string -- 638 * Decode an encoded string. 639 */ 640 bool 641 _prop_intern_decode_string(struct _prop_object_internalize_context *ctx, 642 char *target, size_t targsize, size_t *sizep, 643 const char **cpp) 644 { 645 _PROP_ASSERT(ctx->poic_format == PROP_FORMAT_XML || 646 ctx->poic_format == PROP_FORMAT_JSON); 647 648 switch (ctx->poic_format) { 649 case PROP_FORMAT_JSON: 650 return _prop_json_intern_decode_string(ctx, target, targsize, 651 sizep, cpp); 652 653 default: /* PROP_FORMAT_XML */ 654 return _prop_xml_intern_decode_string(ctx, target, targsize, 655 sizep, cpp); 656 } 657 } 658 659 /* 660 * _prop_intern_context_alloc -- 661 * Allocate an internalize context. 662 */ 663 static struct _prop_object_internalize_context * 664 _prop_intern_context_alloc(const char *data, prop_format_t fmt) 665 { 666 struct _prop_object_internalize_context *ctx; 667 668 ctx = _PROP_MALLOC(sizeof(*ctx), M_TEMP); 669 if (ctx == NULL) { 670 return NULL; 671 } 672 673 ctx->poic_format = fmt; 674 ctx->poic_data = ctx->poic_cp = data; 675 676 /* 677 * If we're digesting JSON, check for a byte order mark and 678 * skip it, if present. We should never see one, but we're 679 * allowed to detect and ignore it. (RFC 8259 section 8.1) 680 */ 681 if (fmt == PROP_FORMAT_JSON) { 682 if (((unsigned char)data[0] == 0xff && 683 (unsigned char)data[1] == 0xfe) || 684 ((unsigned char)data[0] == 0xfe && 685 (unsigned char)data[1] == 0xff)) { 686 ctx->poic_cp = data + 2; 687 } 688 689 /* No additional processing work to do for JSON. */ 690 return ctx; 691 } 692 693 /* 694 * Skip any whitespace and XML preamble stuff that we don't 695 * know about / care about. 696 */ 697 for (;;) { 698 data = _prop_intern_skip_whitespace(data); 699 if (_PROP_EOF(*data) || *data != '<') { 700 goto bad; 701 } 702 703 #define MATCH(str) (strncmp(&data[1], str, strlen(str)) == 0) 704 705 /* 706 * Skip over the XML preamble that Apple XML property 707 * lists usually include at the top of the file. 708 */ 709 if (MATCH("?xml ") || 710 MATCH("!DOCTYPE plist")) { 711 while (*data != '>' && !_PROP_EOF(*data)) { 712 data++; 713 } 714 if (_PROP_EOF(*data)) { 715 goto bad; 716 } 717 data++; /* advance past the '>' */ 718 continue; 719 } 720 721 if (MATCH("<!--")) { 722 ctx->poic_cp = data + 4; 723 if (_prop_xml_intern_skip_comment(ctx) == false) { 724 goto bad; 725 } 726 data = ctx->poic_cp; 727 continue; 728 } 729 730 #undef MATCH 731 732 /* 733 * We don't think we should skip it, so let's hope we can 734 * parse it. 735 */ 736 break; 737 } 738 739 ctx->poic_cp = data; 740 return ctx; 741 bad: 742 _PROP_FREE(ctx, M_TEMP); 743 return NULL; 744 } 745 746 /* 747 * _prop_intern_context_free -- 748 * Free an internalize context. 749 */ 750 static void 751 _prop_intern_context_free(struct _prop_object_internalize_context *ctx) 752 { 753 _PROP_FREE(ctx, M_TEMP); 754 } 755 756 /* 757 * _prop_object_internalize_json -- 758 * Internalize a property list from JSON data. 759 */ 760 static prop_object_t 761 _prop_object_internalize_json(struct _prop_object_internalize_context *ctx, 762 const struct _prop_object_type_tags *initial_tag __unused) 763 { 764 prop_object_t obj, parent_obj; 765 void *data, *iter; 766 prop_object_internalizer_continue_t iter_func; 767 struct _prop_stack stack; 768 bool (*intern)(prop_stack_t, prop_object_t *, 769 struct _prop_object_internalize_context *); 770 771 _prop_stack_init(&stack); 772 773 match_start: 774 intern = NULL; 775 ctx->poic_tagname = ctx->poic_tagattr = ctx->poic_tagattrval = NULL; 776 ctx->poic_tagname_len = ctx->poic_tagattr_len = 777 ctx->poic_tagattrval_len = 0; 778 ctx->poic_is_empty_element = false; 779 ctx->poic_cp = _prop_intern_skip_whitespace(ctx->poic_cp); 780 switch (ctx->poic_cp[0]) { 781 case '{': 782 ctx->poic_cp++; 783 intern = _prop_dictionary_internalize; 784 break; 785 786 case '[': 787 ctx->poic_cp++; 788 intern = _prop_array_internalize; 789 break; 790 791 case '"': 792 ctx->poic_cp++; 793 /* XXX Slightly gross. */ 794 if (*ctx->poic_cp == '"') { 795 ctx->poic_cp++; 796 ctx->poic_is_empty_element = true; 797 } 798 intern = _prop_string_internalize; 799 break; 800 801 case 't': 802 if (ctx->poic_cp[1] == 'r' && 803 ctx->poic_cp[2] == 'u' && 804 ctx->poic_cp[3] == 'e') { 805 /* XXX Slightly gross. */ 806 ctx->poic_tagname = ctx->poic_cp; 807 ctx->poic_tagname_len = 4; 808 ctx->poic_is_empty_element = true; 809 intern = _prop_bool_internalize; 810 ctx->poic_cp += 4; 811 } 812 break; 813 814 case 'f': 815 if (ctx->poic_cp[1] == 'a' && 816 ctx->poic_cp[2] == 'l' && 817 ctx->poic_cp[3] == 's' && 818 ctx->poic_cp[4] == 'e') { 819 /* XXX Slightly gross. */ 820 ctx->poic_tagname = ctx->poic_cp; 821 ctx->poic_tagname_len = 5; 822 ctx->poic_is_empty_element = true; 823 intern = _prop_bool_internalize; 824 ctx->poic_cp += 5; 825 } 826 break; 827 828 default: 829 if (ctx->poic_cp[0] == '+' || 830 ctx->poic_cp[0] == '-' || 831 (ctx->poic_cp[0] >= '0' && ctx->poic_cp[0] <= '9')) { 832 intern = _prop_number_internalize; 833 } 834 break; 835 } 836 837 if (intern == NULL) { 838 while (_prop_stack_pop(&stack, &obj, &iter, &data, NULL)) { 839 iter_func = (prop_object_internalizer_continue_t)iter; 840 (*iter_func)(&stack, &obj, ctx, data, NULL); 841 } 842 return NULL; 843 } 844 845 obj = NULL; 846 if ((*intern)(&stack, &obj, ctx) == false) { 847 goto match_start; 848 } 849 850 parent_obj = obj; 851 while (_prop_stack_pop(&stack, &parent_obj, &iter, &data, NULL)) { 852 iter_func = (prop_object_internalizer_continue_t)iter; 853 if ((*iter_func)(&stack, &parent_obj, ctx, data, 854 obj) == false) { 855 goto match_start; 856 } 857 obj = parent_obj; 858 } 859 860 /* Ensure there's no trailing junk. */ 861 if (parent_obj != NULL) { 862 ctx->poic_cp = _prop_intern_skip_whitespace(ctx->poic_cp); 863 if (!_PROP_EOF(*ctx->poic_cp)) { 864 prop_object_release(parent_obj); 865 parent_obj = NULL; 866 } 867 } 868 return parent_obj; 869 } 870 871 /* 872 * _prop_object_internalize_xml -- 873 * Internalize a property list from XML data. 874 */ 875 static prop_object_t 876 _prop_object_internalize_xml(struct _prop_object_internalize_context *ctx, 877 const struct _prop_object_type_tags *initial_tag) 878 { 879 prop_object_t obj = NULL; 880 881 /* We start with a <plist> tag. */ 882 if (_prop_xml_intern_find_tag(ctx, "plist", 883 _PROP_TAG_TYPE_START) == false) { 884 goto out; 885 } 886 887 /* Plist elements cannot be empty. */ 888 if (ctx->poic_is_empty_element) { 889 goto out; 890 } 891 892 /* 893 * We don't understand any plist attributes, but Apple XML 894 * property lists often have a "version" attribute. If we 895 * see that one, we simply ignore it. 896 */ 897 if (ctx->poic_tagattr != NULL && 898 !_PROP_TAGATTR_MATCH(ctx, "version")) { 899 goto out; 900 } 901 902 /* Next we expect to see opening main tag. */ 903 if (_prop_xml_intern_find_tag(ctx, 904 initial_tag != NULL ? initial_tag->xml_tag 905 : NULL, 906 _PROP_TAG_TYPE_START) == false) { 907 goto out; 908 } 909 910 obj = _prop_xml_intern_by_tag(ctx); 911 if (obj == NULL) { 912 goto out; 913 } 914 915 /* 916 * We've advanced past the closing main tag. 917 * Now we want </plist>. 918 */ 919 if (_prop_xml_intern_find_tag(ctx, "plist", 920 _PROP_TAG_TYPE_END) == false) { 921 prop_object_release(obj); 922 obj = NULL; 923 } 924 out: 925 return obj; 926 } 927 928 /* 929 * _prop_object_internalize -- 930 * Internalize a property list from a NUL-terminated data blob. 931 */ 932 prop_object_t 933 _prop_object_internalize(const char *data, 934 const struct _prop_object_type_tags *initial_tag) 935 { 936 struct _prop_object_internalize_context *ctx; 937 prop_object_t obj; 938 prop_format_t fmt; 939 940 /* 941 * Skip all whitespace until and look at the first 942 * non-whitespace character to determine the format: 943 * An XML plist will always have '<' as the first non-ws 944 * character. If we encounter something else, we assume 945 * it is JSON. 946 */ 947 data = _prop_intern_skip_whitespace(data); 948 if (_PROP_EOF(*data)) { 949 return NULL; 950 } 951 952 fmt = *data == '<' ? PROP_FORMAT_XML : PROP_FORMAT_JSON; 953 954 ctx = _prop_intern_context_alloc(data, fmt); 955 if (ctx == NULL) { 956 return NULL; 957 } 958 959 switch (fmt) { 960 case PROP_FORMAT_JSON: 961 obj = _prop_object_internalize_json(ctx, initial_tag); 962 break; 963 964 default: /* PROP_FORMAT_XML */ 965 obj = _prop_object_internalize_xml(ctx, initial_tag); 966 break; 967 } 968 969 _prop_intern_context_free(ctx); 970 return obj; 971 } 972 973 _PROP_EXPORT prop_object_t 974 prop_object_internalize(const char *data) 975 { 976 return _prop_object_internalize(data, NULL); 977 } 978 979 #if !defined(_KERNEL) && !defined(_STANDALONE) 980 struct _prop_intern_mapped_file { 981 char * pimf_data; 982 size_t pimf_mapsize; 983 }; 984 985 /* 986 * _prop_intern_map_file -- 987 * Map a file for the purpose of internalizing it. 988 */ 989 static struct _prop_intern_mapped_file * 990 _prop_intern_map_file(const char *fname) 991 { 992 struct stat sb; 993 struct _prop_intern_mapped_file *mf; 994 size_t pgsize = (size_t)sysconf(_SC_PAGESIZE); 995 size_t pgmask = pgsize - 1; 996 int fd; 997 998 mf = _PROP_MALLOC(sizeof(*mf), M_TEMP); 999 if (mf == NULL) { 1000 return NULL; 1001 } 1002 1003 fd = open(fname, O_RDONLY, 0400); 1004 if (fd == -1) { 1005 _PROP_FREE(mf, M_TEMP); 1006 return NULL; 1007 } 1008 1009 if (fstat(fd, &sb) == -1) { 1010 (void) close(fd); 1011 _PROP_FREE(mf, M_TEMP); 1012 return NULL; 1013 } 1014 mf->pimf_mapsize = ((size_t)sb.st_size + pgmask) & ~pgmask; 1015 if (mf->pimf_mapsize < (size_t)sb.st_size) { 1016 (void) close(fd); 1017 _PROP_FREE(mf, M_TEMP); 1018 return NULL; 1019 } 1020 1021 /* 1022 * If the file length is an integral number of pages, then we 1023 * need to map a guard page at the end in order to provide the 1024 * necessary NUL-termination of the buffer. 1025 */ 1026 bool need_guard = (sb.st_size & pgmask) == 0; 1027 1028 mf->pimf_data = mmap(NULL, need_guard ? mf->pimf_mapsize + pgsize 1029 : mf->pimf_mapsize, 1030 PROT_READ, MAP_FILE|MAP_SHARED, fd, (off_t)0); 1031 (void) close(fd); 1032 if (mf->pimf_data == MAP_FAILED) { 1033 _PROP_FREE(mf, M_TEMP); 1034 return (NULL); 1035 } 1036 #ifdef POSIX_MADV_SEQUENTIAL 1037 (void) posix_madvise(mf->pimf_data, mf->pimf_mapsize, 1038 POSIX_MADV_SEQUENTIAL); 1039 #endif 1040 1041 if (need_guard) { 1042 if (mmap(mf->pimf_data + mf->pimf_mapsize, 1043 pgsize, PROT_READ, 1044 MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 1045 (off_t)0) == MAP_FAILED) { 1046 (void) munmap(mf->pimf_data, mf->pimf_mapsize); 1047 _PROP_FREE(mf, M_TEMP); 1048 return NULL; 1049 } 1050 mf->pimf_mapsize += pgsize; 1051 } 1052 return mf; 1053 } 1054 1055 /* 1056 * _prop_intern_unmap_file -- 1057 * Unmap a file previously mapped for internalizing. 1058 */ 1059 static void 1060 _prop_intern_unmap_file(struct _prop_intern_mapped_file *mf) 1061 { 1062 #ifdef POSIX_MADV_DONTNEED 1063 (void) posix_madvise(mf->pimf_data, mf->pimf_mapsize, 1064 POSIX_MADV_DONTNEED); 1065 #endif 1066 (void) munmap(mf->pimf_data, mf->pimf_mapsize); 1067 _PROP_FREE(mf, M_TEMP); 1068 } 1069 1070 /* 1071 * _prop_object_internalize_from_file -- 1072 * Internalize a property list from a file. 1073 */ 1074 prop_object_t 1075 _prop_object_internalize_from_file(const char *fname, 1076 const struct _prop_object_type_tags *initial_tag) 1077 { 1078 struct _prop_intern_mapped_file *mf; 1079 prop_object_t obj; 1080 1081 mf = _prop_intern_map_file(fname); 1082 if (mf == NULL) { 1083 return NULL; 1084 } 1085 obj = _prop_object_internalize(mf->pimf_data, initial_tag); 1086 _prop_intern_unmap_file(mf); 1087 1088 return obj; 1089 } 1090 1091 _PROP_EXPORT prop_object_t 1092 prop_object_internalize_from_file(const char *fname) 1093 { 1094 return _prop_object_internalize_from_file(fname, NULL); 1095 } 1096 #endif /* !_KERNEL && !_STANDALONE */ 1097