1 1.1 christos #include "test/jemalloc_test.h" 2 1.1 christos 3 1.1 christos #include "jemalloc/internal/util.h" 4 1.1 christos 5 1.1 christos typedef enum { 6 1.1 christos TOKEN_TYPE_NONE, 7 1.1 christos TOKEN_TYPE_ERROR, 8 1.1 christos TOKEN_TYPE_EOI, 9 1.1 christos TOKEN_TYPE_NULL, 10 1.1 christos TOKEN_TYPE_FALSE, 11 1.1 christos TOKEN_TYPE_TRUE, 12 1.1 christos TOKEN_TYPE_LBRACKET, 13 1.1 christos TOKEN_TYPE_RBRACKET, 14 1.1 christos TOKEN_TYPE_LBRACE, 15 1.1 christos TOKEN_TYPE_RBRACE, 16 1.1 christos TOKEN_TYPE_COLON, 17 1.1 christos TOKEN_TYPE_COMMA, 18 1.1 christos TOKEN_TYPE_STRING, 19 1.1 christos TOKEN_TYPE_NUMBER 20 1.1 christos } token_type_t; 21 1.1 christos 22 1.1 christos typedef struct parser_s parser_t; 23 1.1 christos typedef struct { 24 1.1 christos parser_t *parser; 25 1.1 christos token_type_t token_type; 26 1.1 christos size_t pos; 27 1.1 christos size_t len; 28 1.1 christos size_t line; 29 1.1 christos size_t col; 30 1.1 christos } token_t; 31 1.1 christos 32 1.1 christos struct parser_s { 33 1.1 christos bool verbose; 34 1.1 christos char *buf; /* '\0'-terminated. */ 35 1.1 christos size_t len; /* Number of characters preceding '\0' in buf. */ 36 1.1 christos size_t pos; 37 1.1 christos size_t line; 38 1.1 christos size_t col; 39 1.1 christos token_t token; 40 1.1 christos }; 41 1.1 christos 42 1.1 christos static void 43 1.1 christos token_init(token_t *token, parser_t *parser, token_type_t token_type, 44 1.1 christos size_t pos, size_t len, size_t line, size_t col) { 45 1.1 christos token->parser = parser; 46 1.1 christos token->token_type = token_type; 47 1.1 christos token->pos = pos; 48 1.1 christos token->len = len; 49 1.1 christos token->line = line; 50 1.1 christos token->col = col; 51 1.1 christos } 52 1.1 christos 53 1.1 christos static void 54 1.1 christos token_error(token_t *token) { 55 1.1 christos if (!token->parser->verbose) { 56 1.1 christos return; 57 1.1 christos } 58 1.1 christos switch (token->token_type) { 59 1.1 christos case TOKEN_TYPE_NONE: 60 1.1 christos not_reached(); 61 1.1 christos case TOKEN_TYPE_ERROR: 62 1.1 christos malloc_printf("%zu:%zu: Unexpected character in token: ", 63 1.1 christos token->line, token->col); 64 1.1 christos break; 65 1.1 christos default: 66 1.1 christos malloc_printf("%zu:%zu: Unexpected token: ", token->line, 67 1.1 christos token->col); 68 1.1 christos break; 69 1.1 christos } 70 1.1 christos UNUSED ssize_t err = malloc_write_fd(STDERR_FILENO, 71 1.1 christos &token->parser->buf[token->pos], token->len); 72 1.1 christos malloc_printf("\n"); 73 1.1 christos } 74 1.1 christos 75 1.1 christos static void 76 1.1 christos parser_init(parser_t *parser, bool verbose) { 77 1.1 christos parser->verbose = verbose; 78 1.1 christos parser->buf = NULL; 79 1.1 christos parser->len = 0; 80 1.1 christos parser->pos = 0; 81 1.1 christos parser->line = 1; 82 1.1 christos parser->col = 0; 83 1.1 christos } 84 1.1 christos 85 1.1 christos static void 86 1.1 christos parser_fini(parser_t *parser) { 87 1.1 christos if (parser->buf != NULL) { 88 1.1 christos dallocx(parser->buf, MALLOCX_TCACHE_NONE); 89 1.1 christos } 90 1.1 christos } 91 1.1 christos 92 1.1 christos static bool 93 1.1 christos parser_append(parser_t *parser, const char *str) { 94 1.1 christos size_t len = strlen(str); 95 1.1 christos char *buf = (parser->buf == NULL) ? mallocx(len + 1, 96 1.1 christos MALLOCX_TCACHE_NONE) : rallocx(parser->buf, parser->len + len + 1, 97 1.1 christos MALLOCX_TCACHE_NONE); 98 1.1 christos if (buf == NULL) { 99 1.1 christos return true; 100 1.1 christos } 101 1.1 christos memcpy(&buf[parser->len], str, len + 1); 102 1.1 christos parser->buf = buf; 103 1.1 christos parser->len += len; 104 1.1 christos return false; 105 1.1 christos } 106 1.1 christos 107 1.1 christos static bool 108 1.1 christos parser_tokenize(parser_t *parser) { 109 1.1 christos enum { 110 1.1 christos STATE_START, 111 1.1 christos STATE_EOI, 112 1.1 christos STATE_N, STATE_NU, STATE_NUL, STATE_NULL, 113 1.1 christos STATE_F, STATE_FA, STATE_FAL, STATE_FALS, STATE_FALSE, 114 1.1 christos STATE_T, STATE_TR, STATE_TRU, STATE_TRUE, 115 1.1 christos STATE_LBRACKET, 116 1.1 christos STATE_RBRACKET, 117 1.1 christos STATE_LBRACE, 118 1.1 christos STATE_RBRACE, 119 1.1 christos STATE_COLON, 120 1.1 christos STATE_COMMA, 121 1.1 christos STATE_CHARS, 122 1.1 christos STATE_CHAR_ESCAPE, 123 1.1 christos STATE_CHAR_U, STATE_CHAR_UD, STATE_CHAR_UDD, STATE_CHAR_UDDD, 124 1.1 christos STATE_STRING, 125 1.1 christos STATE_MINUS, 126 1.1 christos STATE_LEADING_ZERO, 127 1.1 christos STATE_DIGITS, 128 1.1 christos STATE_DECIMAL, 129 1.1 christos STATE_FRAC_DIGITS, 130 1.1 christos STATE_EXP, 131 1.1 christos STATE_EXP_SIGN, 132 1.1 christos STATE_EXP_DIGITS, 133 1.1 christos STATE_ACCEPT 134 1.1 christos } state = STATE_START; 135 1.1 christos size_t token_pos JEMALLOC_CC_SILENCE_INIT(0); 136 1.1 christos size_t token_line JEMALLOC_CC_SILENCE_INIT(1); 137 1.1 christos size_t token_col JEMALLOC_CC_SILENCE_INIT(0); 138 1.1 christos 139 1.1 christos assert_zu_le(parser->pos, parser->len, 140 1.1 christos "Position is past end of buffer"); 141 1.1 christos 142 1.1 christos while (state != STATE_ACCEPT) { 143 1.1 christos char c = parser->buf[parser->pos]; 144 1.1 christos 145 1.1 christos switch (state) { 146 1.1 christos case STATE_START: 147 1.1 christos token_pos = parser->pos; 148 1.1 christos token_line = parser->line; 149 1.1 christos token_col = parser->col; 150 1.1 christos switch (c) { 151 1.1 christos case ' ': case '\b': case '\n': case '\r': case '\t': 152 1.1 christos break; 153 1.1 christos case '\0': 154 1.1 christos state = STATE_EOI; 155 1.1 christos break; 156 1.1 christos case 'n': 157 1.1 christos state = STATE_N; 158 1.1 christos break; 159 1.1 christos case 'f': 160 1.1 christos state = STATE_F; 161 1.1 christos break; 162 1.1 christos case 't': 163 1.1 christos state = STATE_T; 164 1.1 christos break; 165 1.1 christos case '[': 166 1.1 christos state = STATE_LBRACKET; 167 1.1 christos break; 168 1.1 christos case ']': 169 1.1 christos state = STATE_RBRACKET; 170 1.1 christos break; 171 1.1 christos case '{': 172 1.1 christos state = STATE_LBRACE; 173 1.1 christos break; 174 1.1 christos case '}': 175 1.1 christos state = STATE_RBRACE; 176 1.1 christos break; 177 1.1 christos case ':': 178 1.1 christos state = STATE_COLON; 179 1.1 christos break; 180 1.1 christos case ',': 181 1.1 christos state = STATE_COMMA; 182 1.1 christos break; 183 1.1 christos case '"': 184 1.1 christos state = STATE_CHARS; 185 1.1 christos break; 186 1.1 christos case '-': 187 1.1 christos state = STATE_MINUS; 188 1.1 christos break; 189 1.1 christos case '0': 190 1.1 christos state = STATE_LEADING_ZERO; 191 1.1 christos break; 192 1.1 christos case '1': case '2': case '3': case '4': 193 1.1 christos case '5': case '6': case '7': case '8': case '9': 194 1.1 christos state = STATE_DIGITS; 195 1.1 christos break; 196 1.1 christos default: 197 1.1 christos token_init(&parser->token, parser, 198 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 199 1.1 christos - token_pos, token_line, token_col); 200 1.1 christos return true; 201 1.1 christos } 202 1.1 christos break; 203 1.1 christos case STATE_EOI: 204 1.1 christos token_init(&parser->token, parser, 205 1.1 christos TOKEN_TYPE_EOI, token_pos, parser->pos - 206 1.1 christos token_pos, token_line, token_col); 207 1.1 christos state = STATE_ACCEPT; 208 1.1 christos break; 209 1.1 christos case STATE_N: 210 1.1 christos switch (c) { 211 1.1 christos case 'u': 212 1.1 christos state = STATE_NU; 213 1.1 christos break; 214 1.1 christos default: 215 1.1 christos token_init(&parser->token, parser, 216 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 217 1.1 christos - token_pos, token_line, token_col); 218 1.1 christos return true; 219 1.1 christos } 220 1.1 christos break; 221 1.1 christos case STATE_NU: 222 1.1 christos switch (c) { 223 1.1 christos case 'l': 224 1.1 christos state = STATE_NUL; 225 1.1 christos break; 226 1.1 christos default: 227 1.1 christos token_init(&parser->token, parser, 228 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 229 1.1 christos - token_pos, token_line, token_col); 230 1.1 christos return true; 231 1.1 christos } 232 1.1 christos break; 233 1.1 christos case STATE_NUL: 234 1.1 christos switch (c) { 235 1.1 christos case 'l': 236 1.1 christos state = STATE_NULL; 237 1.1 christos break; 238 1.1 christos default: 239 1.1 christos token_init(&parser->token, parser, 240 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 241 1.1 christos - token_pos, token_line, token_col); 242 1.1 christos return true; 243 1.1 christos } 244 1.1 christos break; 245 1.1 christos case STATE_NULL: 246 1.1 christos switch (c) { 247 1.1 christos case ' ': case '\b': case '\n': case '\r': case '\t': 248 1.1 christos case '\0': 249 1.1 christos case '[': case ']': case '{': case '}': case ':': 250 1.1 christos case ',': 251 1.1 christos break; 252 1.1 christos default: 253 1.1 christos token_init(&parser->token, parser, 254 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 255 1.1 christos - token_pos, token_line, token_col); 256 1.1 christos return true; 257 1.1 christos } 258 1.1 christos token_init(&parser->token, parser, TOKEN_TYPE_NULL, 259 1.1 christos token_pos, parser->pos - token_pos, token_line, 260 1.1 christos token_col); 261 1.1 christos state = STATE_ACCEPT; 262 1.1 christos break; 263 1.1 christos case STATE_F: 264 1.1 christos switch (c) { 265 1.1 christos case 'a': 266 1.1 christos state = STATE_FA; 267 1.1 christos break; 268 1.1 christos default: 269 1.1 christos token_init(&parser->token, parser, 270 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 271 1.1 christos - token_pos, token_line, token_col); 272 1.1 christos return true; 273 1.1 christos } 274 1.1 christos break; 275 1.1 christos case STATE_FA: 276 1.1 christos switch (c) { 277 1.1 christos case 'l': 278 1.1 christos state = STATE_FAL; 279 1.1 christos break; 280 1.1 christos default: 281 1.1 christos token_init(&parser->token, parser, 282 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 283 1.1 christos - token_pos, token_line, token_col); 284 1.1 christos return true; 285 1.1 christos } 286 1.1 christos break; 287 1.1 christos case STATE_FAL: 288 1.1 christos switch (c) { 289 1.1 christos case 's': 290 1.1 christos state = STATE_FALS; 291 1.1 christos break; 292 1.1 christos default: 293 1.1 christos token_init(&parser->token, parser, 294 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 295 1.1 christos - token_pos, token_line, token_col); 296 1.1 christos return true; 297 1.1 christos } 298 1.1 christos break; 299 1.1 christos case STATE_FALS: 300 1.1 christos switch (c) { 301 1.1 christos case 'e': 302 1.1 christos state = STATE_FALSE; 303 1.1 christos break; 304 1.1 christos default: 305 1.1 christos token_init(&parser->token, parser, 306 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 307 1.1 christos - token_pos, token_line, token_col); 308 1.1 christos return true; 309 1.1 christos } 310 1.1 christos break; 311 1.1 christos case STATE_FALSE: 312 1.1 christos switch (c) { 313 1.1 christos case ' ': case '\b': case '\n': case '\r': case '\t': 314 1.1 christos case '\0': 315 1.1 christos case '[': case ']': case '{': case '}': case ':': 316 1.1 christos case ',': 317 1.1 christos break; 318 1.1 christos default: 319 1.1 christos token_init(&parser->token, parser, 320 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 321 1.1 christos - token_pos, token_line, token_col); 322 1.1 christos return true; 323 1.1 christos } 324 1.1 christos token_init(&parser->token, parser, 325 1.1 christos TOKEN_TYPE_FALSE, token_pos, parser->pos - 326 1.1 christos token_pos, token_line, token_col); 327 1.1 christos state = STATE_ACCEPT; 328 1.1 christos break; 329 1.1 christos case STATE_T: 330 1.1 christos switch (c) { 331 1.1 christos case 'r': 332 1.1 christos state = STATE_TR; 333 1.1 christos break; 334 1.1 christos default: 335 1.1 christos token_init(&parser->token, parser, 336 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 337 1.1 christos - token_pos, token_line, token_col); 338 1.1 christos return true; 339 1.1 christos } 340 1.1 christos break; 341 1.1 christos case STATE_TR: 342 1.1 christos switch (c) { 343 1.1 christos case 'u': 344 1.1 christos state = STATE_TRU; 345 1.1 christos break; 346 1.1 christos default: 347 1.1 christos token_init(&parser->token, parser, 348 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 349 1.1 christos - token_pos, token_line, token_col); 350 1.1 christos return true; 351 1.1 christos } 352 1.1 christos break; 353 1.1 christos case STATE_TRU: 354 1.1 christos switch (c) { 355 1.1 christos case 'e': 356 1.1 christos state = STATE_TRUE; 357 1.1 christos break; 358 1.1 christos default: 359 1.1 christos token_init(&parser->token, parser, 360 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 361 1.1 christos - token_pos, token_line, token_col); 362 1.1 christos return true; 363 1.1 christos } 364 1.1 christos break; 365 1.1 christos case STATE_TRUE: 366 1.1 christos switch (c) { 367 1.1 christos case ' ': case '\b': case '\n': case '\r': case '\t': 368 1.1 christos case '\0': 369 1.1 christos case '[': case ']': case '{': case '}': case ':': 370 1.1 christos case ',': 371 1.1 christos break; 372 1.1 christos default: 373 1.1 christos token_init(&parser->token, parser, 374 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 375 1.1 christos - token_pos, token_line, token_col); 376 1.1 christos return true; 377 1.1 christos } 378 1.1 christos token_init(&parser->token, parser, TOKEN_TYPE_TRUE, 379 1.1 christos token_pos, parser->pos - token_pos, token_line, 380 1.1 christos token_col); 381 1.1 christos state = STATE_ACCEPT; 382 1.1 christos break; 383 1.1 christos case STATE_LBRACKET: 384 1.1 christos token_init(&parser->token, parser, TOKEN_TYPE_LBRACKET, 385 1.1 christos token_pos, parser->pos - token_pos, token_line, 386 1.1 christos token_col); 387 1.1 christos state = STATE_ACCEPT; 388 1.1 christos break; 389 1.1 christos case STATE_RBRACKET: 390 1.1 christos token_init(&parser->token, parser, TOKEN_TYPE_RBRACKET, 391 1.1 christos token_pos, parser->pos - token_pos, token_line, 392 1.1 christos token_col); 393 1.1 christos state = STATE_ACCEPT; 394 1.1 christos break; 395 1.1 christos case STATE_LBRACE: 396 1.1 christos token_init(&parser->token, parser, TOKEN_TYPE_LBRACE, 397 1.1 christos token_pos, parser->pos - token_pos, token_line, 398 1.1 christos token_col); 399 1.1 christos state = STATE_ACCEPT; 400 1.1 christos break; 401 1.1 christos case STATE_RBRACE: 402 1.1 christos token_init(&parser->token, parser, TOKEN_TYPE_RBRACE, 403 1.1 christos token_pos, parser->pos - token_pos, token_line, 404 1.1 christos token_col); 405 1.1 christos state = STATE_ACCEPT; 406 1.1 christos break; 407 1.1 christos case STATE_COLON: 408 1.1 christos token_init(&parser->token, parser, TOKEN_TYPE_COLON, 409 1.1 christos token_pos, parser->pos - token_pos, token_line, 410 1.1 christos token_col); 411 1.1 christos state = STATE_ACCEPT; 412 1.1 christos break; 413 1.1 christos case STATE_COMMA: 414 1.1 christos token_init(&parser->token, parser, TOKEN_TYPE_COMMA, 415 1.1 christos token_pos, parser->pos - token_pos, token_line, 416 1.1 christos token_col); 417 1.1 christos state = STATE_ACCEPT; 418 1.1 christos break; 419 1.1 christos case STATE_CHARS: 420 1.1 christos switch (c) { 421 1.1 christos case '\\': 422 1.1 christos state = STATE_CHAR_ESCAPE; 423 1.1 christos break; 424 1.1 christos case '"': 425 1.1 christos state = STATE_STRING; 426 1.1 christos break; 427 1.1 christos case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: 428 1.1 christos case 0x05: case 0x06: case 0x07: case 0x08: case 0x09: 429 1.1 christos case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e: 430 1.1 christos case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13: 431 1.1 christos case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: 432 1.1 christos case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: 433 1.1 christos case 0x1e: case 0x1f: 434 1.1 christos token_init(&parser->token, parser, 435 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 436 1.1 christos - token_pos, token_line, token_col); 437 1.1 christos return true; 438 1.1 christos default: 439 1.1 christos break; 440 1.1 christos } 441 1.1 christos break; 442 1.1 christos case STATE_CHAR_ESCAPE: 443 1.1 christos switch (c) { 444 1.1 christos case '"': case '\\': case '/': case 'b': case 'n': 445 1.1 christos case 'r': case 't': 446 1.1 christos state = STATE_CHARS; 447 1.1 christos break; 448 1.1 christos case 'u': 449 1.1 christos state = STATE_CHAR_U; 450 1.1 christos break; 451 1.1 christos default: 452 1.1 christos token_init(&parser->token, parser, 453 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 454 1.1 christos - token_pos, token_line, token_col); 455 1.1 christos return true; 456 1.1 christos } 457 1.1 christos break; 458 1.1 christos case STATE_CHAR_U: 459 1.1 christos switch (c) { 460 1.1 christos case '0': case '1': case '2': case '3': case '4': 461 1.1 christos case '5': case '6': case '7': case '8': case '9': 462 1.1 christos case 'a': case 'b': case 'c': case 'd': case 'e': 463 1.1 christos case 'f': 464 1.1 christos case 'A': case 'B': case 'C': case 'D': case 'E': 465 1.1 christos case 'F': 466 1.1 christos state = STATE_CHAR_UD; 467 1.1 christos break; 468 1.1 christos default: 469 1.1 christos token_init(&parser->token, parser, 470 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 471 1.1 christos - token_pos, token_line, token_col); 472 1.1 christos return true; 473 1.1 christos } 474 1.1 christos break; 475 1.1 christos case STATE_CHAR_UD: 476 1.1 christos switch (c) { 477 1.1 christos case '0': case '1': case '2': case '3': case '4': 478 1.1 christos case '5': case '6': case '7': case '8': case '9': 479 1.1 christos case 'a': case 'b': case 'c': case 'd': case 'e': 480 1.1 christos case 'f': 481 1.1 christos case 'A': case 'B': case 'C': case 'D': case 'E': 482 1.1 christos case 'F': 483 1.1 christos state = STATE_CHAR_UDD; 484 1.1 christos break; 485 1.1 christos default: 486 1.1 christos token_init(&parser->token, parser, 487 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 488 1.1 christos - token_pos, token_line, token_col); 489 1.1 christos return true; 490 1.1 christos } 491 1.1 christos break; 492 1.1 christos case STATE_CHAR_UDD: 493 1.1 christos switch (c) { 494 1.1 christos case '0': case '1': case '2': case '3': case '4': 495 1.1 christos case '5': case '6': case '7': case '8': case '9': 496 1.1 christos case 'a': case 'b': case 'c': case 'd': case 'e': 497 1.1 christos case 'f': 498 1.1 christos case 'A': case 'B': case 'C': case 'D': case 'E': 499 1.1 christos case 'F': 500 1.1 christos state = STATE_CHAR_UDDD; 501 1.1 christos break; 502 1.1 christos default: 503 1.1 christos token_init(&parser->token, parser, 504 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 505 1.1 christos - token_pos, token_line, token_col); 506 1.1 christos return true; 507 1.1 christos } 508 1.1 christos break; 509 1.1 christos case STATE_CHAR_UDDD: 510 1.1 christos switch (c) { 511 1.1 christos case '0': case '1': case '2': case '3': case '4': 512 1.1 christos case '5': case '6': case '7': case '8': case '9': 513 1.1 christos case 'a': case 'b': case 'c': case 'd': case 'e': 514 1.1 christos case 'f': 515 1.1 christos case 'A': case 'B': case 'C': case 'D': case 'E': 516 1.1 christos case 'F': 517 1.1 christos state = STATE_CHARS; 518 1.1 christos break; 519 1.1 christos default: 520 1.1 christos token_init(&parser->token, parser, 521 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 522 1.1 christos - token_pos, token_line, token_col); 523 1.1 christos return true; 524 1.1 christos } 525 1.1 christos break; 526 1.1 christos case STATE_STRING: 527 1.1 christos token_init(&parser->token, parser, TOKEN_TYPE_STRING, 528 1.1 christos token_pos, parser->pos - token_pos, token_line, 529 1.1 christos token_col); 530 1.1 christos state = STATE_ACCEPT; 531 1.1 christos break; 532 1.1 christos case STATE_MINUS: 533 1.1 christos switch (c) { 534 1.1 christos case '0': 535 1.1 christos state = STATE_LEADING_ZERO; 536 1.1 christos break; 537 1.1 christos case '1': case '2': case '3': case '4': 538 1.1 christos case '5': case '6': case '7': case '8': case '9': 539 1.1 christos state = STATE_DIGITS; 540 1.1 christos break; 541 1.1 christos default: 542 1.1 christos token_init(&parser->token, parser, 543 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 544 1.1 christos - token_pos, token_line, token_col); 545 1.1 christos return true; 546 1.1 christos } 547 1.1 christos break; 548 1.1 christos case STATE_LEADING_ZERO: 549 1.1 christos switch (c) { 550 1.1 christos case '.': 551 1.1 christos state = STATE_DECIMAL; 552 1.1 christos break; 553 1.1 christos default: 554 1.1 christos token_init(&parser->token, parser, 555 1.1 christos TOKEN_TYPE_NUMBER, token_pos, parser->pos - 556 1.1 christos token_pos, token_line, token_col); 557 1.1 christos state = STATE_ACCEPT; 558 1.1 christos break; 559 1.1 christos } 560 1.1 christos break; 561 1.1 christos case STATE_DIGITS: 562 1.1 christos switch (c) { 563 1.1 christos case '0': case '1': case '2': case '3': case '4': 564 1.1 christos case '5': case '6': case '7': case '8': case '9': 565 1.1 christos break; 566 1.1 christos case '.': 567 1.1 christos state = STATE_DECIMAL; 568 1.1 christos break; 569 1.1 christos default: 570 1.1 christos token_init(&parser->token, parser, 571 1.1 christos TOKEN_TYPE_NUMBER, token_pos, parser->pos - 572 1.1 christos token_pos, token_line, token_col); 573 1.1 christos state = STATE_ACCEPT; 574 1.1 christos break; 575 1.1 christos } 576 1.1 christos break; 577 1.1 christos case STATE_DECIMAL: 578 1.1 christos switch (c) { 579 1.1 christos case '0': case '1': case '2': case '3': case '4': 580 1.1 christos case '5': case '6': case '7': case '8': case '9': 581 1.1 christos state = STATE_FRAC_DIGITS; 582 1.1 christos break; 583 1.1 christos default: 584 1.1 christos token_init(&parser->token, parser, 585 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 586 1.1 christos - token_pos, token_line, token_col); 587 1.1 christos return true; 588 1.1 christos } 589 1.1 christos break; 590 1.1 christos case STATE_FRAC_DIGITS: 591 1.1 christos switch (c) { 592 1.1 christos case '0': case '1': case '2': case '3': case '4': 593 1.1 christos case '5': case '6': case '7': case '8': case '9': 594 1.1 christos break; 595 1.1 christos case 'e': case 'E': 596 1.1 christos state = STATE_EXP; 597 1.1 christos break; 598 1.1 christos default: 599 1.1 christos token_init(&parser->token, parser, 600 1.1 christos TOKEN_TYPE_NUMBER, token_pos, parser->pos - 601 1.1 christos token_pos, token_line, token_col); 602 1.1 christos state = STATE_ACCEPT; 603 1.1 christos break; 604 1.1 christos } 605 1.1 christos break; 606 1.1 christos case STATE_EXP: 607 1.1 christos switch (c) { 608 1.1 christos case '-': case '+': 609 1.1 christos state = STATE_EXP_SIGN; 610 1.1 christos break; 611 1.1 christos case '0': case '1': case '2': case '3': case '4': 612 1.1 christos case '5': case '6': case '7': case '8': case '9': 613 1.1 christos state = STATE_EXP_DIGITS; 614 1.1 christos break; 615 1.1 christos default: 616 1.1 christos token_init(&parser->token, parser, 617 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 618 1.1 christos - token_pos, token_line, token_col); 619 1.1 christos return true; 620 1.1 christos } 621 1.1 christos break; 622 1.1 christos case STATE_EXP_SIGN: 623 1.1 christos switch (c) { 624 1.1 christos case '0': case '1': case '2': case '3': case '4': 625 1.1 christos case '5': case '6': case '7': case '8': case '9': 626 1.1 christos state = STATE_EXP_DIGITS; 627 1.1 christos break; 628 1.1 christos default: 629 1.1 christos token_init(&parser->token, parser, 630 1.1 christos TOKEN_TYPE_ERROR, token_pos, parser->pos + 1 631 1.1 christos - token_pos, token_line, token_col); 632 1.1 christos return true; 633 1.1 christos } 634 1.1 christos break; 635 1.1 christos case STATE_EXP_DIGITS: 636 1.1 christos switch (c) { 637 1.1 christos case '0': case '1': case '2': case '3': case '4': 638 1.1 christos case '5': case '6': case '7': case '8': case '9': 639 1.1 christos break; 640 1.1 christos default: 641 1.1 christos token_init(&parser->token, parser, 642 1.1 christos TOKEN_TYPE_NUMBER, token_pos, parser->pos - 643 1.1 christos token_pos, token_line, token_col); 644 1.1 christos state = STATE_ACCEPT; 645 1.1 christos break; 646 1.1 christos } 647 1.1 christos break; 648 1.1 christos default: 649 1.1 christos not_reached(); 650 1.1 christos } 651 1.1 christos 652 1.1 christos if (state != STATE_ACCEPT) { 653 1.1 christos if (c == '\n') { 654 1.1 christos parser->line++; 655 1.1 christos parser->col = 0; 656 1.1 christos } else { 657 1.1 christos parser->col++; 658 1.1 christos } 659 1.1 christos parser->pos++; 660 1.1 christos } 661 1.1 christos } 662 1.1 christos return false; 663 1.1 christos } 664 1.1 christos 665 1.1 christos static bool parser_parse_array(parser_t *parser); 666 1.1 christos static bool parser_parse_object(parser_t *parser); 667 1.1 christos 668 1.1 christos static bool 669 1.1 christos parser_parse_value(parser_t *parser) { 670 1.1 christos switch (parser->token.token_type) { 671 1.1 christos case TOKEN_TYPE_NULL: 672 1.1 christos case TOKEN_TYPE_FALSE: 673 1.1 christos case TOKEN_TYPE_TRUE: 674 1.1 christos case TOKEN_TYPE_STRING: 675 1.1 christos case TOKEN_TYPE_NUMBER: 676 1.1 christos return false; 677 1.1 christos case TOKEN_TYPE_LBRACE: 678 1.1 christos return parser_parse_object(parser); 679 1.1 christos case TOKEN_TYPE_LBRACKET: 680 1.1 christos return parser_parse_array(parser); 681 1.1 christos default: 682 1.1 christos return true; 683 1.1 christos } 684 1.1 christos not_reached(); 685 1.1 christos } 686 1.1 christos 687 1.1 christos static bool 688 1.1 christos parser_parse_pair(parser_t *parser) { 689 1.1 christos assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING, 690 1.1 christos "Pair should start with string"); 691 1.1 christos if (parser_tokenize(parser)) { 692 1.1 christos return true; 693 1.1 christos } 694 1.1 christos switch (parser->token.token_type) { 695 1.1 christos case TOKEN_TYPE_COLON: 696 1.1 christos if (parser_tokenize(parser)) { 697 1.1 christos return true; 698 1.1 christos } 699 1.1 christos return parser_parse_value(parser); 700 1.1 christos default: 701 1.1 christos return true; 702 1.1 christos } 703 1.1 christos } 704 1.1 christos 705 1.1 christos static bool 706 1.1 christos parser_parse_values(parser_t *parser) { 707 1.1 christos if (parser_parse_value(parser)) { 708 1.1 christos return true; 709 1.1 christos } 710 1.1 christos 711 1.1 christos while (true) { 712 1.1 christos if (parser_tokenize(parser)) { 713 1.1 christos return true; 714 1.1 christos } 715 1.1 christos switch (parser->token.token_type) { 716 1.1 christos case TOKEN_TYPE_COMMA: 717 1.1 christos if (parser_tokenize(parser)) { 718 1.1 christos return true; 719 1.1 christos } 720 1.1 christos if (parser_parse_value(parser)) { 721 1.1 christos return true; 722 1.1 christos } 723 1.1 christos break; 724 1.1 christos case TOKEN_TYPE_RBRACKET: 725 1.1 christos return false; 726 1.1 christos default: 727 1.1 christos return true; 728 1.1 christos } 729 1.1 christos } 730 1.1 christos } 731 1.1 christos 732 1.1 christos static bool 733 1.1 christos parser_parse_array(parser_t *parser) { 734 1.1 christos assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACKET, 735 1.1 christos "Array should start with ["); 736 1.1 christos if (parser_tokenize(parser)) { 737 1.1 christos return true; 738 1.1 christos } 739 1.1 christos switch (parser->token.token_type) { 740 1.1 christos case TOKEN_TYPE_RBRACKET: 741 1.1 christos return false; 742 1.1 christos default: 743 1.1 christos return parser_parse_values(parser); 744 1.1 christos } 745 1.1 christos not_reached(); 746 1.1 christos } 747 1.1 christos 748 1.1 christos static bool 749 1.1 christos parser_parse_pairs(parser_t *parser) { 750 1.1 christos assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING, 751 1.1 christos "Object should start with string"); 752 1.1 christos if (parser_parse_pair(parser)) { 753 1.1 christos return true; 754 1.1 christos } 755 1.1 christos 756 1.1 christos while (true) { 757 1.1 christos if (parser_tokenize(parser)) { 758 1.1 christos return true; 759 1.1 christos } 760 1.1 christos switch (parser->token.token_type) { 761 1.1 christos case TOKEN_TYPE_COMMA: 762 1.1 christos if (parser_tokenize(parser)) { 763 1.1 christos return true; 764 1.1 christos } 765 1.1 christos switch (parser->token.token_type) { 766 1.1 christos case TOKEN_TYPE_STRING: 767 1.1 christos if (parser_parse_pair(parser)) { 768 1.1 christos return true; 769 1.1 christos } 770 1.1 christos break; 771 1.1 christos default: 772 1.1 christos return true; 773 1.1 christos } 774 1.1 christos break; 775 1.1 christos case TOKEN_TYPE_RBRACE: 776 1.1 christos return false; 777 1.1 christos default: 778 1.1 christos return true; 779 1.1 christos } 780 1.1 christos } 781 1.1 christos } 782 1.1 christos 783 1.1 christos static bool 784 1.1 christos parser_parse_object(parser_t *parser) { 785 1.1 christos assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACE, 786 1.1 christos "Object should start with {"); 787 1.1 christos if (parser_tokenize(parser)) { 788 1.1 christos return true; 789 1.1 christos } 790 1.1 christos switch (parser->token.token_type) { 791 1.1 christos case TOKEN_TYPE_STRING: 792 1.1 christos return parser_parse_pairs(parser); 793 1.1 christos case TOKEN_TYPE_RBRACE: 794 1.1 christos return false; 795 1.1 christos default: 796 1.1 christos return true; 797 1.1 christos } 798 1.1 christos not_reached(); 799 1.1 christos } 800 1.1 christos 801 1.1 christos static bool 802 1.1 christos parser_parse(parser_t *parser) { 803 1.1 christos if (parser_tokenize(parser)) { 804 1.1 christos goto label_error; 805 1.1 christos } 806 1.1 christos if (parser_parse_value(parser)) { 807 1.1 christos goto label_error; 808 1.1 christos } 809 1.1 christos 810 1.1 christos if (parser_tokenize(parser)) { 811 1.1 christos goto label_error; 812 1.1 christos } 813 1.1 christos switch (parser->token.token_type) { 814 1.1 christos case TOKEN_TYPE_EOI: 815 1.1 christos return false; 816 1.1 christos default: 817 1.1 christos goto label_error; 818 1.1 christos } 819 1.1 christos not_reached(); 820 1.1 christos 821 1.1 christos label_error: 822 1.1 christos token_error(&parser->token); 823 1.1 christos return true; 824 1.1 christos } 825 1.1 christos 826 1.1 christos TEST_BEGIN(test_json_parser) { 827 1.1 christos size_t i; 828 1.1 christos const char *invalid_inputs[] = { 829 1.1 christos /* Tokenizer error case tests. */ 830 1.1 christos "{ \"string\": X }", 831 1.1 christos "{ \"string\": nXll }", 832 1.1 christos "{ \"string\": nuXl }", 833 1.1 christos "{ \"string\": nulX }", 834 1.1 christos "{ \"string\": nullX }", 835 1.1 christos "{ \"string\": fXlse }", 836 1.1 christos "{ \"string\": faXse }", 837 1.1 christos "{ \"string\": falXe }", 838 1.1 christos "{ \"string\": falsX }", 839 1.1 christos "{ \"string\": falseX }", 840 1.1 christos "{ \"string\": tXue }", 841 1.1 christos "{ \"string\": trXe }", 842 1.1 christos "{ \"string\": truX }", 843 1.1 christos "{ \"string\": trueX }", 844 1.1 christos "{ \"string\": \"\n\" }", 845 1.1 christos "{ \"string\": \"\\z\" }", 846 1.1 christos "{ \"string\": \"\\uX000\" }", 847 1.1 christos "{ \"string\": \"\\u0X00\" }", 848 1.1 christos "{ \"string\": \"\\u00X0\" }", 849 1.1 christos "{ \"string\": \"\\u000X\" }", 850 1.1 christos "{ \"string\": -X }", 851 1.1 christos "{ \"string\": 0.X }", 852 1.1 christos "{ \"string\": 0.0eX }", 853 1.1 christos "{ \"string\": 0.0e+X }", 854 1.1 christos 855 1.1 christos /* Parser error test cases. */ 856 1.1 christos "{\"string\": }", 857 1.1 christos "{\"string\" }", 858 1.1 christos "{\"string\": [ 0 }", 859 1.1 christos "{\"string\": {\"a\":0, 1 } }", 860 1.1 christos "{\"string\": {\"a\":0: } }", 861 1.1 christos "{", 862 1.1 christos "{}{", 863 1.1 christos }; 864 1.1 christos const char *valid_inputs[] = { 865 1.1 christos /* Token tests. */ 866 1.1 christos "null", 867 1.1 christos "false", 868 1.1 christos "true", 869 1.1 christos "{}", 870 1.1 christos "{\"a\": 0}", 871 1.1 christos "[]", 872 1.1 christos "[0, 1]", 873 1.1 christos "0", 874 1.1 christos "1", 875 1.1 christos "10", 876 1.1 christos "-10", 877 1.1 christos "10.23", 878 1.1 christos "10.23e4", 879 1.1 christos "10.23e-4", 880 1.1 christos "10.23e+4", 881 1.1 christos "10.23E4", 882 1.1 christos "10.23E-4", 883 1.1 christos "10.23E+4", 884 1.1 christos "-10.23", 885 1.1 christos "-10.23e4", 886 1.1 christos "-10.23e-4", 887 1.1 christos "-10.23e+4", 888 1.1 christos "-10.23E4", 889 1.1 christos "-10.23E-4", 890 1.1 christos "-10.23E+4", 891 1.1 christos "\"value\"", 892 1.1 christos "\" \\\" \\/ \\b \\n \\r \\t \\u0abc \\u1DEF \"", 893 1.1 christos 894 1.1 christos /* Parser test with various nesting. */ 895 1.1 christos "{\"a\":null, \"b\":[1,[{\"c\":2},3]], \"d\":{\"e\":true}}", 896 1.1 christos }; 897 1.1 christos 898 1.1 christos for (i = 0; i < sizeof(invalid_inputs)/sizeof(const char *); i++) { 899 1.1 christos const char *input = invalid_inputs[i]; 900 1.1 christos parser_t parser; 901 1.1 christos parser_init(&parser, false); 902 1.1 christos assert_false(parser_append(&parser, input), 903 1.1 christos "Unexpected input appending failure"); 904 1.1 christos assert_true(parser_parse(&parser), 905 1.1 christos "Unexpected parse success for input: %s", input); 906 1.1 christos parser_fini(&parser); 907 1.1 christos } 908 1.1 christos 909 1.1 christos for (i = 0; i < sizeof(valid_inputs)/sizeof(const char *); i++) { 910 1.1 christos const char *input = valid_inputs[i]; 911 1.1 christos parser_t parser; 912 1.1 christos parser_init(&parser, true); 913 1.1 christos assert_false(parser_append(&parser, input), 914 1.1 christos "Unexpected input appending failure"); 915 1.1 christos assert_false(parser_parse(&parser), 916 1.1 christos "Unexpected parse error for input: %s", input); 917 1.1 christos parser_fini(&parser); 918 1.1 christos } 919 1.1 christos } 920 1.1 christos TEST_END 921 1.1 christos 922 1.1 christos void 923 1.1 christos write_cb(void *opaque, const char *str) { 924 1.1 christos parser_t *parser = (parser_t *)opaque; 925 1.1 christos if (parser_append(parser, str)) { 926 1.1 christos test_fail("Unexpected input appending failure"); 927 1.1 christos } 928 1.1 christos } 929 1.1 christos 930 1.1 christos TEST_BEGIN(test_stats_print_json) { 931 1.1 christos const char *opts[] = { 932 1.1 christos "J", 933 1.1 christos "Jg", 934 1.1 christos "Jm", 935 1.1 christos "Jd", 936 1.1 christos "Jmd", 937 1.1 christos "Jgd", 938 1.1 christos "Jgm", 939 1.1 christos "Jgmd", 940 1.1 christos "Ja", 941 1.1 christos "Jb", 942 1.1 christos "Jl", 943 1.1 christos "Jx", 944 1.1 christos "Jbl", 945 1.1 christos "Jal", 946 1.1 christos "Jab", 947 1.1 christos "Jabl", 948 1.1 christos "Jax", 949 1.1 christos "Jbx", 950 1.1 christos "Jlx", 951 1.1 christos "Jablx", 952 1.1 christos "Jgmdablx", 953 1.1 christos }; 954 1.1 christos unsigned arena_ind, i; 955 1.1 christos 956 1.1 christos for (i = 0; i < 3; i++) { 957 1.1 christos unsigned j; 958 1.1 christos 959 1.1 christos switch (i) { 960 1.1 christos case 0: 961 1.1 christos break; 962 1.1 christos case 1: { 963 1.1 christos size_t sz = sizeof(arena_ind); 964 1.1 christos assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, 965 1.1 christos &sz, NULL, 0), 0, "Unexpected mallctl failure"); 966 1.1 christos break; 967 1.1 christos } case 2: { 968 1.1 christos size_t mib[3]; 969 1.1 christos size_t miblen = sizeof(mib)/sizeof(size_t); 970 1.1 christos assert_d_eq(mallctlnametomib("arena.0.destroy", 971 1.1 christos mib, &miblen), 0, 972 1.1 christos "Unexpected mallctlnametomib failure"); 973 1.1 christos mib[1] = arena_ind; 974 1.1 christos assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 975 1.1 christos 0), 0, "Unexpected mallctlbymib failure"); 976 1.1 christos break; 977 1.1 christos } default: 978 1.1 christos not_reached(); 979 1.1 christos } 980 1.1 christos 981 1.1 christos for (j = 0; j < sizeof(opts)/sizeof(const char *); j++) { 982 1.1 christos parser_t parser; 983 1.1 christos 984 1.1 christos parser_init(&parser, true); 985 1.1 christos malloc_stats_print(write_cb, (void *)&parser, opts[j]); 986 1.1 christos assert_false(parser_parse(&parser), 987 1.1 christos "Unexpected parse error, opts=\"%s\"", opts[j]); 988 1.1 christos parser_fini(&parser); 989 1.1 christos } 990 1.1 christos } 991 1.1 christos } 992 1.1 christos TEST_END 993 1.1 christos 994 1.1 christos int 995 1.1 christos main(void) { 996 1.1 christos return test( 997 1.1 christos test_json_parser, 998 1.1 christos test_stats_print_json); 999 1.1 christos } 1000