1 1.5 andvar /* $NetBSD: plural_parser.c,v 1.5 2025/02/26 04:49:45 andvar Exp $ */ 2 1.1 tshiozak 3 1.1 tshiozak /*- 4 1.1 tshiozak * Copyright (c) 2005 Citrus Project, 5 1.1 tshiozak * All rights reserved. 6 1.1 tshiozak * 7 1.1 tshiozak * Redistribution and use in source and binary forms, with or without 8 1.1 tshiozak * modification, are permitted provided that the following conditions 9 1.1 tshiozak * are met: 10 1.1 tshiozak * 1. Redistributions of source code must retain the above copyright 11 1.1 tshiozak * notice, this list of conditions and the following disclaimer. 12 1.1 tshiozak * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 tshiozak * notice, this list of conditions and the following disclaimer in the 14 1.1 tshiozak * documentation and/or other materials provided with the distribution. 15 1.1 tshiozak * 16 1.1 tshiozak * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 1.1 tshiozak * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 1.1 tshiozak * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 1.1 tshiozak * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 1.1 tshiozak * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 1.1 tshiozak * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 1.1 tshiozak * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 1.1 tshiozak * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 1.1 tshiozak * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 1.1 tshiozak * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 1.1 tshiozak * SUCH DAMAGE. 27 1.1 tshiozak * 28 1.1 tshiozak */ 29 1.1 tshiozak 30 1.1 tshiozak #include <sys/cdefs.h> 31 1.5 andvar __RCSID("$NetBSD: plural_parser.c,v 1.5 2025/02/26 04:49:45 andvar Exp $"); 32 1.1 tshiozak 33 1.1 tshiozak #include <assert.h> 34 1.1 tshiozak #include <stdio.h> 35 1.1 tshiozak #include <stdlib.h> 36 1.1 tshiozak #include <string.h> 37 1.1 tshiozak #include <citrus/citrus_namespace.h> 38 1.1 tshiozak #include <citrus/citrus_region.h> 39 1.1 tshiozak #include <citrus/citrus_memstream.h> 40 1.1 tshiozak #include <citrus/citrus_bcs.h> 41 1.1 tshiozak #include "plural_parser.h" 42 1.1 tshiozak 43 1.1 tshiozak #if defined(TEST_TOKENIZER) || defined(TEST_PARSER) 44 1.1 tshiozak #define ALLOW_EMPTY 45 1.1 tshiozak #define ALLOW_ARBITRARY_IDENTIFIER 46 1.1 tshiozak #endif 47 1.1 tshiozak 48 1.1 tshiozak #define MAX_LEN_ATOM 10 49 1.1 tshiozak #define MAX_NUM_OPERANDS 3 50 1.1 tshiozak 51 1.1 tshiozak #define T_EOF EOF 52 1.1 tshiozak #define T_NONE 0x100 53 1.1 tshiozak #define T_LAND 0x101 /* && */ 54 1.1 tshiozak #define T_LOR 0x102 /* || */ 55 1.1 tshiozak #define T_EQUALITY 0x103 /* == or != */ 56 1.1 tshiozak #define T_RELATIONAL 0x104 /* <, >, <= or >= */ 57 1.1 tshiozak #define T_ADDITIVE 0x105 /* + or - */ 58 1.1 tshiozak #define T_MULTIPLICATIVE 0x106 /* *, / or % */ 59 1.1 tshiozak #define T_IDENTIFIER 0x200 60 1.1 tshiozak #define T_CONSTANT 0x201 61 1.1 tshiozak #define T_ILCHAR 0x300 62 1.1 tshiozak #define T_TOOLONG 0x301 63 1.1 tshiozak #define T_ILTOKEN 0x302 64 1.1 tshiozak #define T_ILEND 0x303 65 1.1 tshiozak #define T_NOMEM 0x304 66 1.1 tshiozak #define T_NOTFOUND 0x305 67 1.1 tshiozak #define T_ILPLURAL 0x306 68 1.1 tshiozak #define T_IS_OPERATOR(t) ((t) < 0x200) 69 1.1 tshiozak #define T_IS_ERROR(t) ((t) >= 0x300) 70 1.1 tshiozak 71 1.1 tshiozak #define OP_EQ ('='+'=') 72 1.1 tshiozak #define OP_NEQ ('!'+'=') 73 1.1 tshiozak #define OP_LTEQ ('<'+'=') 74 1.1 tshiozak #define OP_GTEQ ('>'+'=') 75 1.1 tshiozak 76 1.1 tshiozak #define PLURAL_NUMBER_SYMBOL "n" 77 1.1 tshiozak #define NPLURALS_SYMBOL "nplurals" 78 1.1 tshiozak #define LEN_NPLURAL_SYMBOL (sizeof (NPLURALS_SYMBOL) -1) 79 1.1 tshiozak #define PLURAL_SYMBOL "plural" 80 1.1 tshiozak #define LEN_PLURAL_SYMBOL (sizeof (PLURAL_SYMBOL) -1) 81 1.1 tshiozak #define PLURAL_FORMS "Plural-Forms:" 82 1.1 tshiozak #define LEN_PLURAL_FORMS (sizeof (PLURAL_FORMS) -1) 83 1.1 tshiozak 84 1.1 tshiozak /* ---------------------------------------------------------------------- 85 1.1 tshiozak * tokenizer part 86 1.1 tshiozak */ 87 1.1 tshiozak 88 1.1 tshiozak union token_data 89 1.1 tshiozak { 90 1.1 tshiozak unsigned long constant; 91 1.1 tshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER 92 1.1 tshiozak char identifier[MAX_LEN_ATOM+1]; 93 1.1 tshiozak #endif 94 1.1 tshiozak char op; 95 1.1 tshiozak }; 96 1.1 tshiozak 97 1.1 tshiozak struct tokenizer_context 98 1.1 tshiozak { 99 1.1 tshiozak struct _memstream memstream; 100 1.1 tshiozak struct { 101 1.1 tshiozak int token; 102 1.1 tshiozak union token_data token_data; 103 1.1 tshiozak } token0; 104 1.1 tshiozak }; 105 1.1 tshiozak 106 1.1 tshiozak /* initialize a tokenizer context */ 107 1.1 tshiozak static void 108 1.1 tshiozak init_tokenizer_context(struct tokenizer_context *tcx) 109 1.1 tshiozak { 110 1.1 tshiozak tcx->token0.token = T_NONE; 111 1.1 tshiozak } 112 1.1 tshiozak 113 1.1 tshiozak /* get an atom (identifier or constant) */ 114 1.1 tshiozak static int 115 1.1 tshiozak tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data) 116 1.1 tshiozak { 117 1.1 tshiozak int ch, len; 118 1.1 tshiozak char buf[MAX_LEN_ATOM+1]; 119 1.1 tshiozak 120 1.1 tshiozak len = 0; 121 1.1 tshiozak while (/*CONSTCOND*/1) { 122 1.1 tshiozak ch = _memstream_getc(&tcx->memstream); 123 1.1 tshiozak if (!(_bcs_isalnum(ch) || ch == '_')) { 124 1.1 tshiozak _memstream_ungetc(&tcx->memstream, ch); 125 1.1 tshiozak break; 126 1.1 tshiozak } 127 1.1 tshiozak if (len == MAX_LEN_ATOM) 128 1.1 tshiozak return T_TOOLONG; 129 1.1 tshiozak buf[len++] = ch; 130 1.1 tshiozak } 131 1.1 tshiozak buf[len] = '\0'; 132 1.1 tshiozak if (len == 0) 133 1.1 tshiozak return T_ILCHAR; 134 1.1 tshiozak 135 1.1 tshiozak if (_bcs_isdigit((int)(unsigned char)buf[0])) { 136 1.1 tshiozak unsigned long ul; 137 1.1 tshiozak char *post; 138 1.1 tshiozak ul = strtoul(buf, &post, 0); 139 1.1 tshiozak if (buf+len != post) 140 1.1 tshiozak return T_ILCHAR; 141 1.1 tshiozak token_data->constant = ul; 142 1.1 tshiozak return T_CONSTANT; 143 1.1 tshiozak } 144 1.1 tshiozak 145 1.1 tshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER 146 1.1 tshiozak strcpy(token_data->identifier, buf); 147 1.1 tshiozak return T_IDENTIFIER; 148 1.1 tshiozak #else 149 1.1 tshiozak if (!strcmp(buf, PLURAL_NUMBER_SYMBOL)) 150 1.1 tshiozak return T_IDENTIFIER; 151 1.1 tshiozak return T_ILCHAR; 152 1.1 tshiozak #endif 153 1.1 tshiozak } 154 1.1 tshiozak 155 1.1 tshiozak /* tokenizer main routine */ 156 1.1 tshiozak static int 157 1.1 tshiozak tokenize(struct tokenizer_context *tcx, union token_data *token_data) 158 1.1 tshiozak { 159 1.1 tshiozak int ch, prevch; 160 1.1 tshiozak 161 1.1 tshiozak retry: 162 1.1 tshiozak ch = _memstream_getc(&tcx->memstream); 163 1.1 tshiozak if (_bcs_isspace(ch)) 164 1.1 tshiozak goto retry; 165 1.1 tshiozak 166 1.1 tshiozak switch (ch) { 167 1.1 tshiozak case T_EOF: 168 1.1 tshiozak return ch; 169 1.1 tshiozak case '+': case '-': 170 1.1 tshiozak token_data->op = ch; 171 1.1 tshiozak return T_ADDITIVE; 172 1.1 tshiozak case '*': case '/': case '%': 173 1.1 tshiozak token_data->op = ch; 174 1.1 tshiozak return T_MULTIPLICATIVE; 175 1.1 tshiozak case '?': case ':': case '(': case ')': 176 1.1 tshiozak token_data->op = ch; 177 1.1 tshiozak return ch; 178 1.1 tshiozak case '&': case '|': 179 1.1 tshiozak prevch = ch; 180 1.1 tshiozak ch = _memstream_getc(&tcx->memstream); 181 1.1 tshiozak if (ch != prevch) { 182 1.1 tshiozak _memstream_ungetc(&tcx->memstream, ch); 183 1.1 tshiozak return T_ILCHAR; 184 1.1 tshiozak } 185 1.1 tshiozak token_data->op = ch; 186 1.1 tshiozak switch (ch) { 187 1.1 tshiozak case '&': 188 1.1 tshiozak return T_LAND; 189 1.1 tshiozak case '|': 190 1.1 tshiozak return T_LOR; 191 1.3 christos default: 192 1.3 christos return T_ILTOKEN; 193 1.1 tshiozak } 194 1.1 tshiozak case '=': case '!': case '<': case '>': 195 1.1 tshiozak prevch = ch; 196 1.1 tshiozak ch = _memstream_getc(&tcx->memstream); 197 1.1 tshiozak if (ch != '=') { 198 1.1 tshiozak _memstream_ungetc(&tcx->memstream, ch); 199 1.1 tshiozak switch (prevch) { 200 1.1 tshiozak case '=': 201 1.1 tshiozak return T_ILCHAR; 202 1.1 tshiozak case '!': 203 1.1 tshiozak return '!'; 204 1.1 tshiozak case '<': 205 1.1 tshiozak case '>': 206 1.1 tshiozak token_data->op = prevch; /* OP_LT or OP_GT */ 207 1.1 tshiozak return T_RELATIONAL; 208 1.1 tshiozak } 209 1.1 tshiozak } 210 1.1 tshiozak /* '==', '!=', '<=' or '>=' */ 211 1.1 tshiozak token_data->op = ch+prevch; 212 1.1 tshiozak switch (prevch) { 213 1.1 tshiozak case '=': 214 1.1 tshiozak case '!': 215 1.1 tshiozak return T_EQUALITY; 216 1.1 tshiozak case '<': 217 1.1 tshiozak case '>': 218 1.1 tshiozak return T_RELATIONAL; 219 1.1 tshiozak } 220 1.1 tshiozak /*NOTREACHED*/ 221 1.1 tshiozak } 222 1.1 tshiozak 223 1.1 tshiozak _memstream_ungetc(&tcx->memstream, ch); 224 1.1 tshiozak return tokenize_atom(tcx, token_data); 225 1.1 tshiozak } 226 1.1 tshiozak 227 1.1 tshiozak /* get the next token */ 228 1.1 tshiozak static int 229 1.1 tshiozak get_token(struct tokenizer_context *tcx, union token_data *token_data) 230 1.1 tshiozak { 231 1.1 tshiozak if (tcx->token0.token != T_NONE) { 232 1.1 tshiozak int token = tcx->token0.token; 233 1.1 tshiozak tcx->token0.token = T_NONE; 234 1.1 tshiozak *token_data = tcx->token0.token_data; 235 1.1 tshiozak return token; 236 1.1 tshiozak } 237 1.1 tshiozak return tokenize(tcx, token_data); 238 1.1 tshiozak } 239 1.1 tshiozak 240 1.1 tshiozak /* push back the last token */ 241 1.1 tshiozak static void 242 1.1 tshiozak unget_token(struct tokenizer_context *tcx, 243 1.1 tshiozak int token, union token_data *token_data) 244 1.1 tshiozak { 245 1.1 tshiozak tcx->token0.token = token; 246 1.1 tshiozak tcx->token0.token_data = *token_data; 247 1.1 tshiozak } 248 1.1 tshiozak 249 1.1 tshiozak #ifdef TEST_TOKENIZER 250 1.1 tshiozak 251 1.1 tshiozak int 252 1.1 tshiozak main(int argc, char **argv) 253 1.1 tshiozak { 254 1.1 tshiozak struct tokenizer_context tcx; 255 1.1 tshiozak union token_data token_data; 256 1.1 tshiozak int token; 257 1.1 tshiozak 258 1.1 tshiozak if (argc != 2) { 259 1.1 tshiozak fprintf(stderr, "usage: %s <expression>\n", argv[0]); 260 1.1 tshiozak return EXIT_FAILURE; 261 1.1 tshiozak } 262 1.1 tshiozak 263 1.1 tshiozak init_tokenizer_context(&tcx); 264 1.1 tshiozak _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1])); 265 1.1 tshiozak 266 1.1 tshiozak while (1) { 267 1.1 tshiozak token = get_token(&tcx, &token_data); 268 1.1 tshiozak switch (token) { 269 1.1 tshiozak case T_EOF: 270 1.1 tshiozak goto quit; 271 1.1 tshiozak case T_ILCHAR: 272 1.1 tshiozak printf("illegal character.\n"); 273 1.1 tshiozak goto quit; 274 1.1 tshiozak case T_TOOLONG: 275 1.1 tshiozak printf("too long atom.\n"); 276 1.1 tshiozak goto quit; 277 1.1 tshiozak case T_CONSTANT: 278 1.1 tshiozak printf("constant: %lu\n", token_data.constant); 279 1.1 tshiozak break; 280 1.1 tshiozak case T_IDENTIFIER: 281 1.1 tshiozak printf("symbol: %s\n", token_data.identifier); 282 1.1 tshiozak break; 283 1.1 tshiozak default: 284 1.1 tshiozak printf("operator: "); 285 1.1 tshiozak switch (token) { 286 1.1 tshiozak case T_LAND: 287 1.1 tshiozak printf("&&\n"); 288 1.1 tshiozak break; 289 1.1 tshiozak case T_LOR: 290 1.1 tshiozak printf("||\n"); 291 1.1 tshiozak break; 292 1.1 tshiozak case T_EQUALITY: 293 1.1 tshiozak printf("%c=\n", token_data.op-'='); 294 1.1 tshiozak break; 295 1.1 tshiozak case T_RELATIONAL: 296 1.1 tshiozak switch(token_data.op) { 297 1.1 tshiozak case OP_LTEQ: 298 1.1 tshiozak case OP_GTEQ: 299 1.1 tshiozak printf("%c=\n", token_data.op-'='); 300 1.1 tshiozak break; 301 1.1 tshiozak default: 302 1.1 tshiozak printf("%c\n", token_data.op); 303 1.1 tshiozak break; 304 1.1 tshiozak } 305 1.1 tshiozak break; 306 1.1 tshiozak case T_ADDITIVE: 307 1.1 tshiozak case T_MULTIPLICATIVE: 308 1.1 tshiozak printf("%c\n", token_data.op); 309 1.1 tshiozak break; 310 1.1 tshiozak default: 311 1.1 tshiozak printf("operator: %c\n", token); 312 1.1 tshiozak } 313 1.1 tshiozak } 314 1.1 tshiozak } 315 1.1 tshiozak quit: 316 1.1 tshiozak return 0; 317 1.1 tshiozak } 318 1.1 tshiozak #endif /* TEST_TOKENIZER */ 319 1.1 tshiozak 320 1.1 tshiozak 321 1.1 tshiozak /* ---------------------------------------------------------------------- 322 1.1 tshiozak * parser part 323 1.1 tshiozak * 324 1.1 tshiozak * exp := cond 325 1.1 tshiozak * 326 1.1 tshiozak * cond := lor | lor '?' cond ':' cond 327 1.1 tshiozak * 328 1.1 tshiozak * lor := land ( '||' land )* 329 1.1 tshiozak * 330 1.1 tshiozak * land := equality ( '&&' equality )* 331 1.1 tshiozak * 332 1.1 tshiozak * equality := relational ( equalityops relational )* 333 1.1 tshiozak * equalityops := '==' | '!=' 334 1.1 tshiozak * 335 1.1 tshiozak * relational := additive ( relationalops additive )* 336 1.1 tshiozak * relationalops := '<' | '>' | '<=' | '>=' 337 1.1 tshiozak * 338 1.1 tshiozak * additive := multiplicative ( additiveops multiplicative )* 339 1.1 tshiozak * additiveops := '+' | '-' 340 1.1 tshiozak * 341 1.1 tshiozak * multiplicative := lnot ( multiplicativeops lnot )* 342 1.1 tshiozak * multiplicativeops := '*' | '/' | '%' 343 1.1 tshiozak * 344 1.1 tshiozak * lnot := '!' lnot | term 345 1.1 tshiozak * 346 1.1 tshiozak * term := literal | identifier | '(' exp ')' 347 1.1 tshiozak * 348 1.1 tshiozak */ 349 1.1 tshiozak 350 1.1 tshiozak #define T_ENSURE_OK(token, label) \ 351 1.1 tshiozak do { \ 352 1.1 tshiozak if (T_IS_ERROR(token)) \ 353 1.1 tshiozak goto label; \ 354 1.4 rillig } while (0) 355 1.1 tshiozak #define T_ENSURE_SOMETHING(token, label) \ 356 1.1 tshiozak do { \ 357 1.1 tshiozak if ((token) == T_EOF) { \ 358 1.1 tshiozak token = T_ILEND; \ 359 1.1 tshiozak goto label; \ 360 1.1 tshiozak } else if (T_IS_ERROR(token)) \ 361 1.1 tshiozak goto label; \ 362 1.4 rillig } while (0) 363 1.1 tshiozak 364 1.1 tshiozak #define parser_element plural_element 365 1.1 tshiozak 366 1.1 tshiozak struct parser_element; 367 1.1 tshiozak struct parser_op 368 1.1 tshiozak { 369 1.1 tshiozak char op; 370 1.1 tshiozak struct parser_element *operands[MAX_NUM_OPERANDS]; 371 1.1 tshiozak }; 372 1.1 tshiozak struct parser_element 373 1.1 tshiozak { 374 1.1 tshiozak int kind; 375 1.1 tshiozak union 376 1.1 tshiozak { 377 1.1 tshiozak struct parser_op parser_op; 378 1.1 tshiozak union token_data token_data; 379 1.1 tshiozak } u; 380 1.1 tshiozak }; 381 1.1 tshiozak 382 1.1 tshiozak struct parser_op2_transition 383 1.1 tshiozak { 384 1.1 tshiozak int kind; 385 1.1 tshiozak const struct parser_op2_transition *next; 386 1.1 tshiozak }; 387 1.1 tshiozak 388 1.1 tshiozak /* prototypes */ 389 1.1 tshiozak static int parse_cond(struct tokenizer_context *, struct parser_element *); 390 1.1 tshiozak 391 1.1 tshiozak 392 1.1 tshiozak /* transition table for the 2-operand operators */ 393 1.1 tshiozak #define DEF_TR(t, k, n) \ 394 1.1 tshiozak static struct parser_op2_transition exp_tr_##t = { \ 395 1.1 tshiozak k, &exp_tr_##n \ 396 1.1 tshiozak } 397 1.1 tshiozak #define DEF_TR0(t, k) \ 398 1.1 tshiozak static struct parser_op2_transition exp_tr_##t = { \ 399 1.1 tshiozak k, NULL /* expect lnot */ \ 400 1.1 tshiozak } 401 1.1 tshiozak 402 1.1 tshiozak DEF_TR0(multiplicative, T_MULTIPLICATIVE); 403 1.1 tshiozak DEF_TR(additive, T_ADDITIVE, multiplicative); 404 1.1 tshiozak DEF_TR(relational, T_RELATIONAL, additive); 405 1.1 tshiozak DEF_TR(equality, T_EQUALITY, relational); 406 1.1 tshiozak DEF_TR(land, T_LAND, equality); 407 1.1 tshiozak DEF_TR(lor, T_LOR, land); 408 1.1 tshiozak 409 1.1 tshiozak /* init a parser element structure */ 410 1.1 tshiozak static void 411 1.1 tshiozak init_parser_element(struct parser_element *pe) 412 1.1 tshiozak { 413 1.1 tshiozak int i; 414 1.1 tshiozak 415 1.1 tshiozak pe->kind = T_NONE; 416 1.1 tshiozak for (i=0; i<MAX_NUM_OPERANDS; i++) 417 1.1 tshiozak pe->u.parser_op.operands[i] = NULL; 418 1.1 tshiozak } 419 1.1 tshiozak 420 1.1 tshiozak /* uninitialize a parser element structure with freeing children */ 421 1.1 tshiozak static void free_parser_element(struct parser_element *); 422 1.1 tshiozak static void 423 1.1 tshiozak uninit_parser_element(struct parser_element *pe) 424 1.1 tshiozak { 425 1.1 tshiozak int i; 426 1.1 tshiozak 427 1.1 tshiozak if (T_IS_OPERATOR(pe->kind)) 428 1.1 tshiozak for (i=0; i<MAX_NUM_OPERANDS; i++) 429 1.1 tshiozak if (pe->u.parser_op.operands[i]) 430 1.1 tshiozak free_parser_element( 431 1.1 tshiozak pe->u.parser_op.operands[i]); 432 1.1 tshiozak } 433 1.1 tshiozak 434 1.1 tshiozak /* free a parser element structure with freeing children */ 435 1.1 tshiozak static void 436 1.1 tshiozak free_parser_element(struct parser_element *pe) 437 1.1 tshiozak { 438 1.1 tshiozak if (pe) { 439 1.1 tshiozak uninit_parser_element(pe); 440 1.1 tshiozak free(pe); 441 1.1 tshiozak } 442 1.1 tshiozak } 443 1.1 tshiozak 444 1.1 tshiozak 445 1.1 tshiozak /* copy a parser element structure shallowly */ 446 1.1 tshiozak static void 447 1.1 tshiozak copy_parser_element(struct parser_element *dpe, 448 1.1 tshiozak const struct parser_element *spe) 449 1.1 tshiozak { 450 1.1 tshiozak memcpy(dpe, spe, sizeof *dpe); 451 1.1 tshiozak } 452 1.1 tshiozak 453 1.1 tshiozak /* duplicate a parser element structure shallowly */ 454 1.1 tshiozak static struct parser_element * 455 1.1 tshiozak dup_parser_element(const struct parser_element *pe) 456 1.1 tshiozak { 457 1.1 tshiozak struct parser_element *dpe = malloc(sizeof *dpe); 458 1.1 tshiozak if (dpe) 459 1.1 tshiozak copy_parser_element(dpe, pe); 460 1.1 tshiozak return dpe; 461 1.1 tshiozak } 462 1.1 tshiozak 463 1.1 tshiozak /* term := identifier | constant | '(' exp ')' */ 464 1.1 tshiozak static int 465 1.1 tshiozak parse_term(struct tokenizer_context *tcx, struct parser_element *pelem) 466 1.1 tshiozak { 467 1.1 tshiozak struct parser_element pe0; 468 1.1 tshiozak int token; 469 1.1 tshiozak union token_data token_data; 470 1.1 tshiozak 471 1.1 tshiozak token = get_token(tcx, &token_data); 472 1.1 tshiozak switch (token) { 473 1.1 tshiozak case '(': 474 1.1 tshiozak /* '(' exp ')' */ 475 1.1 tshiozak init_parser_element(&pe0); 476 1.1 tshiozak /* expect exp */ 477 1.1 tshiozak token = parse_cond(tcx, &pe0); 478 1.1 tshiozak T_ENSURE_OK(token, err); 479 1.1 tshiozak /* expect ')' */ 480 1.1 tshiozak token = get_token(tcx, &token_data); 481 1.1 tshiozak T_ENSURE_SOMETHING(token, err); 482 1.1 tshiozak if (token != ')') { 483 1.1 tshiozak unget_token(tcx, token, &token_data); 484 1.1 tshiozak token = T_ILTOKEN; 485 1.1 tshiozak goto err; 486 1.1 tshiozak } 487 1.1 tshiozak copy_parser_element(pelem, &pe0); 488 1.1 tshiozak return token; 489 1.1 tshiozak err: 490 1.1 tshiozak uninit_parser_element(&pe0); 491 1.1 tshiozak return token; 492 1.1 tshiozak case T_IDENTIFIER: 493 1.1 tshiozak case T_CONSTANT: 494 1.1 tshiozak pelem->kind = token; 495 1.1 tshiozak pelem->u.token_data = token_data; 496 1.1 tshiozak return token; 497 1.1 tshiozak case T_EOF: 498 1.1 tshiozak return T_ILEND; 499 1.1 tshiozak default: 500 1.1 tshiozak return T_ILTOKEN; 501 1.1 tshiozak } 502 1.1 tshiozak } 503 1.1 tshiozak 504 1.1 tshiozak /* lnot := '!' lnot | term */ 505 1.1 tshiozak static int 506 1.1 tshiozak parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem) 507 1.1 tshiozak { 508 1.1 tshiozak struct parser_element pe0; 509 1.1 tshiozak int token; 510 1.1 tshiozak union token_data token_data; 511 1.1 tshiozak 512 1.1 tshiozak init_parser_element(&pe0); 513 1.1 tshiozak 514 1.1 tshiozak /* '!' or not */ 515 1.1 tshiozak token = get_token(tcx, &token_data); 516 1.1 tshiozak if (token != '!') { 517 1.1 tshiozak /* stop: term */ 518 1.1 tshiozak unget_token(tcx, token, &token_data); 519 1.1 tshiozak return parse_term(tcx, pelem); 520 1.1 tshiozak } 521 1.1 tshiozak 522 1.1 tshiozak /* '!' term */ 523 1.1 tshiozak token = parse_lnot(tcx, &pe0); 524 1.1 tshiozak T_ENSURE_OK(token, err); 525 1.1 tshiozak 526 1.1 tshiozak pelem->kind = '!'; 527 1.1 tshiozak pelem->u.parser_op.operands[0] = dup_parser_element(&pe0); 528 1.1 tshiozak return pelem->kind; 529 1.1 tshiozak err: 530 1.1 tshiozak uninit_parser_element(&pe0); 531 1.1 tshiozak return token; 532 1.1 tshiozak } 533 1.1 tshiozak 534 1.1 tshiozak /* ext_op := ext_next ( op ext_next )* */ 535 1.1 tshiozak static int 536 1.1 tshiozak parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem, 537 1.1 tshiozak const struct parser_op2_transition *tr) 538 1.1 tshiozak { 539 1.1 tshiozak struct parser_element pe0, pe1, peop; 540 1.1 tshiozak int token; 541 1.1 tshiozak union token_data token_data; 542 1.1 tshiozak char op; 543 1.1 tshiozak 544 1.1 tshiozak /* special case: expect lnot */ 545 1.1 tshiozak if (tr == NULL) 546 1.1 tshiozak return parse_lnot(tcx, pelem); 547 1.1 tshiozak 548 1.1 tshiozak init_parser_element(&pe0); 549 1.1 tshiozak init_parser_element(&pe1); 550 1.1 tshiozak token = parse_op2(tcx, &pe0, tr->next); 551 1.1 tshiozak T_ENSURE_OK(token, err); 552 1.1 tshiozak 553 1.1 tshiozak while (/*CONSTCOND*/1) { 554 1.1 tshiozak /* expect op or empty */ 555 1.1 tshiozak token = get_token(tcx, &token_data); 556 1.1 tshiozak if (token != tr->kind) { 557 1.1 tshiozak /* stop */ 558 1.1 tshiozak unget_token(tcx, token, &token_data); 559 1.1 tshiozak copy_parser_element(pelem, &pe0); 560 1.1 tshiozak break; 561 1.1 tshiozak } 562 1.1 tshiozak op = token_data.op; 563 1.1 tshiozak /* right hand */ 564 1.1 tshiozak token = parse_op2(tcx, &pe1, tr->next); 565 1.1 tshiozak T_ENSURE_OK(token, err); 566 1.1 tshiozak 567 1.1 tshiozak init_parser_element(&peop); 568 1.1 tshiozak peop.kind = tr->kind; 569 1.1 tshiozak peop.u.parser_op.op = op; 570 1.1 tshiozak peop.u.parser_op.operands[0] = dup_parser_element(&pe0); 571 1.1 tshiozak init_parser_element(&pe0); 572 1.1 tshiozak peop.u.parser_op.operands[1] = dup_parser_element(&pe1); 573 1.1 tshiozak init_parser_element(&pe1); 574 1.1 tshiozak copy_parser_element(&pe0, &peop); 575 1.1 tshiozak } 576 1.1 tshiozak return pelem->kind; 577 1.1 tshiozak err: 578 1.1 tshiozak uninit_parser_element(&pe1); 579 1.1 tshiozak uninit_parser_element(&pe0); 580 1.1 tshiozak return token; 581 1.1 tshiozak } 582 1.1 tshiozak 583 1.1 tshiozak /* cond := lor | lor '?' cond ':' cond */ 584 1.1 tshiozak static int 585 1.1 tshiozak parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem) 586 1.1 tshiozak { 587 1.1 tshiozak struct parser_element pe0, pe1, pe2; 588 1.1 tshiozak int token; 589 1.1 tshiozak union token_data token_data; 590 1.1 tshiozak 591 1.1 tshiozak init_parser_element(&pe0); 592 1.1 tshiozak init_parser_element(&pe1); 593 1.1 tshiozak init_parser_element(&pe2); 594 1.1 tshiozak 595 1.1 tshiozak /* expect lor or empty */ 596 1.1 tshiozak token = parse_op2(tcx, &pe0, &exp_tr_lor); 597 1.1 tshiozak T_ENSURE_OK(token, err); 598 1.1 tshiozak 599 1.1 tshiozak /* '?' or not */ 600 1.1 tshiozak token = get_token(tcx, &token_data); 601 1.1 tshiozak if (token != '?') { 602 1.1 tshiozak /* stop: lor */ 603 1.1 tshiozak unget_token(tcx, token, &token_data); 604 1.1 tshiozak copy_parser_element(pelem, &pe0); 605 1.1 tshiozak return pe0.kind; 606 1.1 tshiozak } 607 1.1 tshiozak 608 1.1 tshiozak /* lor '?' cond ':' cond */ 609 1.1 tshiozak /* expect cond */ 610 1.1 tshiozak token = parse_cond(tcx, &pe1); 611 1.1 tshiozak T_ENSURE_OK(token, err); 612 1.1 tshiozak 613 1.1 tshiozak /* expect ':' */ 614 1.1 tshiozak token = get_token(tcx, &token_data); 615 1.1 tshiozak T_ENSURE_OK(token, err); 616 1.1 tshiozak if (token != ':') { 617 1.1 tshiozak unget_token(tcx, token, &token_data); 618 1.1 tshiozak token = T_ILTOKEN; 619 1.1 tshiozak goto err; 620 1.1 tshiozak } 621 1.1 tshiozak 622 1.1 tshiozak /* expect cond */ 623 1.1 tshiozak token = parse_cond(tcx, &pe2); 624 1.1 tshiozak T_ENSURE_OK(token, err); 625 1.1 tshiozak 626 1.1 tshiozak pelem->kind = '?'; 627 1.1 tshiozak pelem->u.parser_op.operands[0] = dup_parser_element(&pe0); 628 1.1 tshiozak pelem->u.parser_op.operands[1] = dup_parser_element(&pe1); 629 1.1 tshiozak pelem->u.parser_op.operands[2] = dup_parser_element(&pe2); 630 1.1 tshiozak return pelem->kind; 631 1.1 tshiozak err: 632 1.1 tshiozak uninit_parser_element(&pe2); 633 1.1 tshiozak uninit_parser_element(&pe1); 634 1.1 tshiozak uninit_parser_element(&pe0); 635 1.1 tshiozak return token; 636 1.1 tshiozak } 637 1.1 tshiozak 638 1.1 tshiozak static int 639 1.1 tshiozak parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem) 640 1.1 tshiozak { 641 1.1 tshiozak int token, token1; 642 1.1 tshiozak union token_data token_data; 643 1.1 tshiozak 644 1.1 tshiozak #ifdef ALLOW_EMPTY 645 1.1 tshiozak /* empty check */ 646 1.1 tshiozak token = get_token(tcx, &token_data); 647 1.1 tshiozak if (token == T_EOF) 648 1.1 tshiozak return token; 649 1.1 tshiozak unget_token(tcx, token, &token_data); 650 1.1 tshiozak #endif 651 1.1 tshiozak 652 1.1 tshiozak token = parse_cond(tcx, pelem); 653 1.1 tshiozak if (!T_IS_ERROR(token)) { 654 1.1 tshiozak /* termination check */ 655 1.1 tshiozak token1 = get_token(tcx, &token_data); 656 1.1 tshiozak if (token1 == T_EOF) 657 1.1 tshiozak return token; 658 1.1 tshiozak else if (!T_IS_ERROR(token)) 659 1.1 tshiozak unget_token(tcx, token1, &token_data); 660 1.1 tshiozak return T_ILTOKEN; 661 1.1 tshiozak } 662 1.1 tshiozak return token; 663 1.1 tshiozak } 664 1.1 tshiozak 665 1.1 tshiozak 666 1.1 tshiozak #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL) 667 1.1 tshiozak #include <stdio.h> 668 1.1 tshiozak 669 1.1 tshiozak static void dump_elem(struct parser_element *); 670 1.1 tshiozak 671 1.1 tshiozak static void 672 1.1 tshiozak dump_op2(struct parser_element *pelem) 673 1.1 tshiozak { 674 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[0]); 675 1.1 tshiozak printf(" "); 676 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[1]); 677 1.1 tshiozak printf(")"); 678 1.1 tshiozak } 679 1.1 tshiozak 680 1.1 tshiozak static void 681 1.1 tshiozak dump_op3(struct parser_element *pelem) 682 1.1 tshiozak { 683 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[0]); 684 1.1 tshiozak printf(" "); 685 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[1]); 686 1.1 tshiozak printf(" "); 687 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[2]); 688 1.1 tshiozak printf(")"); 689 1.1 tshiozak } 690 1.1 tshiozak 691 1.1 tshiozak static void 692 1.1 tshiozak dump_elem(struct parser_element *pelem) 693 1.1 tshiozak { 694 1.1 tshiozak switch (pelem->kind) { 695 1.1 tshiozak case T_LAND: 696 1.1 tshiozak printf("(&& "); 697 1.1 tshiozak dump_op2(pelem); 698 1.1 tshiozak break; 699 1.1 tshiozak case T_LOR: 700 1.1 tshiozak printf("(|| "); 701 1.1 tshiozak dump_op2(pelem); 702 1.1 tshiozak break; 703 1.1 tshiozak case T_EQUALITY: 704 1.1 tshiozak switch (pelem->u.parser_op.op) { 705 1.1 tshiozak case OP_EQ: 706 1.1 tshiozak printf("(== "); 707 1.1 tshiozak break; 708 1.1 tshiozak case OP_NEQ: 709 1.1 tshiozak printf("(!= "); 710 1.1 tshiozak break; 711 1.1 tshiozak } 712 1.1 tshiozak dump_op2(pelem); 713 1.1 tshiozak break; 714 1.1 tshiozak case T_RELATIONAL: 715 1.1 tshiozak switch (pelem->u.parser_op.op) { 716 1.1 tshiozak case '<': 717 1.1 tshiozak case '>': 718 1.1 tshiozak printf("(%c ", pelem->u.parser_op.op); 719 1.1 tshiozak break; 720 1.1 tshiozak case OP_LTEQ: 721 1.1 tshiozak case OP_GTEQ: 722 1.1 tshiozak printf("(%c= ", pelem->u.parser_op.op-'='); 723 1.1 tshiozak break; 724 1.1 tshiozak } 725 1.1 tshiozak dump_op2(pelem); 726 1.1 tshiozak break; 727 1.1 tshiozak case T_ADDITIVE: 728 1.1 tshiozak case T_MULTIPLICATIVE: 729 1.1 tshiozak printf("(%c ", pelem->u.parser_op.op); 730 1.1 tshiozak dump_op2(pelem); 731 1.1 tshiozak break; 732 1.1 tshiozak case '!': 733 1.1 tshiozak printf("(! "); 734 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[0]); 735 1.1 tshiozak printf(")"); 736 1.1 tshiozak break; 737 1.1 tshiozak case '?': 738 1.1 tshiozak printf("(? "); 739 1.1 tshiozak dump_op3(pelem); 740 1.1 tshiozak break; 741 1.1 tshiozak case T_CONSTANT: 742 1.1 tshiozak printf("%d", pelem->u.token_data.constant); 743 1.1 tshiozak break; 744 1.1 tshiozak case T_IDENTIFIER: 745 1.1 tshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER 746 1.1 tshiozak printf("%s", pelem->u.token_data.identifier); 747 1.1 tshiozak #else 748 1.1 tshiozak printf(PLURAL_NUMBER_SYMBOL); 749 1.1 tshiozak #endif 750 1.1 tshiozak break; 751 1.1 tshiozak } 752 1.1 tshiozak } 753 1.1 tshiozak #endif 754 1.1 tshiozak #ifdef TEST_PARSER 755 1.1 tshiozak int 756 1.1 tshiozak main(int argc, char **argv) 757 1.1 tshiozak { 758 1.1 tshiozak struct tokenizer_context tcx; 759 1.1 tshiozak struct parser_element pelem; 760 1.1 tshiozak int token; 761 1.1 tshiozak 762 1.1 tshiozak if (argc != 2) { 763 1.1 tshiozak fprintf(stderr, "usage: %s <expression>\n", argv[0]); 764 1.1 tshiozak return EXIT_FAILURE; 765 1.1 tshiozak } 766 1.1 tshiozak 767 1.1 tshiozak init_tokenizer_context(&tcx); 768 1.1 tshiozak _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1])); 769 1.1 tshiozak 770 1.1 tshiozak init_parser_element(&pelem); 771 1.1 tshiozak token = parse_exp(&tcx, &pelem); 772 1.1 tshiozak 773 1.1 tshiozak if (token == T_EOF) 774 1.1 tshiozak printf("none"); 775 1.1 tshiozak else if (T_IS_ERROR(token)) 776 1.1 tshiozak printf("error: 0x%X", token); 777 1.1 tshiozak else 778 1.1 tshiozak dump_elem(&pelem); 779 1.1 tshiozak printf("\n"); 780 1.1 tshiozak 781 1.1 tshiozak uninit_parser_element(&pelem); 782 1.1 tshiozak 783 1.1 tshiozak return EXIT_SUCCESS; 784 1.1 tshiozak } 785 1.1 tshiozak #endif /* TEST_PARSER */ 786 1.1 tshiozak 787 1.1 tshiozak /* ---------------------------------------------------------------------- 788 1.5 andvar * calculate plural number 789 1.1 tshiozak */ 790 1.1 tshiozak static unsigned long 791 1.1 tshiozak calculate_plural(const struct parser_element *pe, unsigned long n) 792 1.1 tshiozak { 793 1.1 tshiozak unsigned long val0, val1; 794 1.1 tshiozak switch (pe->kind) { 795 1.1 tshiozak case T_IDENTIFIER: 796 1.1 tshiozak return n; 797 1.1 tshiozak case T_CONSTANT: 798 1.1 tshiozak return pe->u.token_data.constant; 799 1.1 tshiozak case '?': 800 1.1 tshiozak val0 = calculate_plural(pe->u.parser_op.operands[0], n); 801 1.1 tshiozak if (val0) 802 1.1 tshiozak val1=calculate_plural(pe->u.parser_op.operands[1], n); 803 1.1 tshiozak else 804 1.1 tshiozak val1=calculate_plural(pe->u.parser_op.operands[2], n); 805 1.1 tshiozak return val1; 806 1.1 tshiozak case '!': 807 1.1 tshiozak return !calculate_plural(pe->u.parser_op.operands[0], n); 808 1.1 tshiozak case T_MULTIPLICATIVE: 809 1.1 tshiozak case T_ADDITIVE: 810 1.1 tshiozak case T_RELATIONAL: 811 1.1 tshiozak case T_EQUALITY: 812 1.1 tshiozak case T_LOR: 813 1.1 tshiozak case T_LAND: 814 1.1 tshiozak val0 = calculate_plural(pe->u.parser_op.operands[0], n); 815 1.1 tshiozak val1 = calculate_plural(pe->u.parser_op.operands[1], n); 816 1.1 tshiozak switch (pe->u.parser_op.op) { 817 1.1 tshiozak case '*': 818 1.1 tshiozak return val0*val1; 819 1.1 tshiozak case '/': 820 1.1 tshiozak return val0/val1; 821 1.1 tshiozak case '%': 822 1.1 tshiozak return val0%val1; 823 1.1 tshiozak case '+': 824 1.1 tshiozak return val0+val1; 825 1.1 tshiozak case '-': 826 1.1 tshiozak return val0-val1; 827 1.1 tshiozak case '<': 828 1.1 tshiozak return val0<val1; 829 1.1 tshiozak case '>': 830 1.1 tshiozak return val0>val1; 831 1.1 tshiozak case OP_LTEQ: 832 1.1 tshiozak return val0<=val1; 833 1.1 tshiozak case OP_GTEQ: 834 1.1 tshiozak return val0>=val1; 835 1.1 tshiozak case OP_EQ: 836 1.1 tshiozak return val0==val1; 837 1.1 tshiozak case OP_NEQ: 838 1.1 tshiozak return val0!=val1; 839 1.1 tshiozak case '|': 840 1.1 tshiozak return val0||val1; 841 1.1 tshiozak case '&': 842 1.1 tshiozak return val0&&val1; 843 1.1 tshiozak } 844 1.1 tshiozak } 845 1.1 tshiozak return 0; 846 1.1 tshiozak } 847 1.1 tshiozak 848 1.1 tshiozak #ifdef TEST_CALC_PLURAL 849 1.1 tshiozak #include <stdio.h> 850 1.1 tshiozak 851 1.1 tshiozak int 852 1.1 tshiozak main(int argc, char **argv) 853 1.1 tshiozak { 854 1.1 tshiozak struct tokenizer_context tcx; 855 1.1 tshiozak struct parser_element pelem; 856 1.1 tshiozak int token; 857 1.1 tshiozak 858 1.1 tshiozak if (argc != 3) { 859 1.1 tshiozak fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]); 860 1.1 tshiozak return EXIT_FAILURE; 861 1.1 tshiozak } 862 1.1 tshiozak 863 1.1 tshiozak init_tokenizer_context(&tcx); 864 1.1 tshiozak _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1])); 865 1.1 tshiozak 866 1.1 tshiozak init_parser_element(&pelem); 867 1.1 tshiozak token = parse_exp(&tcx, &pelem); 868 1.1 tshiozak 869 1.1 tshiozak if (token == T_EOF) 870 1.1 tshiozak printf("none"); 871 1.1 tshiozak else if (T_IS_ERROR(token)) 872 1.1 tshiozak printf("error: 0x%X", token); 873 1.1 tshiozak else { 874 1.1 tshiozak printf("plural = %lu", 875 1.1 tshiozak calculate_plural(&pelem, atoi(argv[2]))); 876 1.1 tshiozak } 877 1.1 tshiozak printf("\n"); 878 1.1 tshiozak 879 1.1 tshiozak uninit_parser_element(&pelem); 880 1.1 tshiozak 881 1.1 tshiozak return EXIT_SUCCESS; 882 1.1 tshiozak } 883 1.1 tshiozak #endif /* TEST_CALC_PLURAL */ 884 1.1 tshiozak 885 1.1 tshiozak 886 1.1 tshiozak /* ---------------------------------------------------------------------- 887 1.1 tshiozak * parse plural forms 888 1.1 tshiozak */ 889 1.1 tshiozak 890 1.1 tshiozak static void 891 1.1 tshiozak region_skip_ws(struct _region *r) 892 1.1 tshiozak { 893 1.1 tshiozak const char *str = _region_head(r); 894 1.1 tshiozak size_t len = _region_size(r); 895 1.1 tshiozak 896 1.1 tshiozak str = _bcs_skip_ws_len(str, &len); 897 1.1 tshiozak _region_init(r, __UNCONST(str), len); 898 1.1 tshiozak } 899 1.1 tshiozak 900 1.1 tshiozak static void 901 1.1 tshiozak region_trunc_rws(struct _region *r) 902 1.1 tshiozak { 903 1.1 tshiozak const char *str = _region_head(r); 904 1.1 tshiozak size_t len = _region_size(r); 905 1.1 tshiozak 906 1.1 tshiozak _bcs_trunc_rws_len(str, &len); 907 1.1 tshiozak _region_init(r, __UNCONST(str), len); 908 1.1 tshiozak } 909 1.1 tshiozak 910 1.1 tshiozak static int 911 1.1 tshiozak region_check_prefix(struct _region *r, const char *pre, size_t prelen, 912 1.1 tshiozak int ignorecase) 913 1.1 tshiozak { 914 1.1 tshiozak if (_region_size(r) < prelen) 915 1.1 tshiozak return -1; 916 1.1 tshiozak 917 1.1 tshiozak if (ignorecase) { 918 1.1 tshiozak if (_bcs_strncasecmp(_region_head(r), pre, prelen)) 919 1.1 tshiozak return -1; 920 1.1 tshiozak } else { 921 1.1 tshiozak if (memcmp(_region_head(r), pre, prelen)) 922 1.1 tshiozak return -1; 923 1.1 tshiozak } 924 1.1 tshiozak return 0; 925 1.1 tshiozak } 926 1.1 tshiozak 927 1.1 tshiozak static int 928 1.1 tshiozak cut_trailing_semicolon(struct _region *r) 929 1.1 tshiozak { 930 1.1 tshiozak 931 1.1 tshiozak region_trunc_rws(r); 932 1.1 tshiozak if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';') 933 1.1 tshiozak return -1; 934 1.1 tshiozak _region_get_subregion(r, r, 0, _region_size(r)-1); 935 1.1 tshiozak return 0; 936 1.1 tshiozak } 937 1.1 tshiozak 938 1.1 tshiozak static int 939 1.1 tshiozak find_plural_forms(struct _region *r) 940 1.1 tshiozak { 941 1.1 tshiozak struct _memstream ms; 942 1.1 tshiozak struct _region rr; 943 1.1 tshiozak 944 1.1 tshiozak _memstream_bind(&ms, r); 945 1.1 tshiozak 946 1.1 tshiozak while (!_memstream_getln_region(&ms, &rr)) { 947 1.1 tshiozak if (!region_check_prefix(&rr, 948 1.1 tshiozak PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) { 949 1.1 tshiozak _region_get_subregion( 950 1.1 tshiozak r, &rr, LEN_PLURAL_FORMS, 951 1.1 tshiozak _region_size(&rr)-LEN_PLURAL_FORMS); 952 1.1 tshiozak region_skip_ws(r); 953 1.1 tshiozak region_trunc_rws(r); 954 1.1 tshiozak return 0; 955 1.1 tshiozak } 956 1.1 tshiozak } 957 1.1 tshiozak return -1; 958 1.1 tshiozak } 959 1.1 tshiozak 960 1.1 tshiozak static int 961 1.1 tshiozak skip_assignment(struct _region *r, const char *sym, size_t symlen) 962 1.1 tshiozak { 963 1.1 tshiozak region_skip_ws(r); 964 1.1 tshiozak if (region_check_prefix(r, sym, symlen, 0)) 965 1.1 tshiozak return -1; 966 1.1 tshiozak _region_get_subregion(r, r, symlen, _region_size(r)-symlen); 967 1.1 tshiozak region_skip_ws(r); 968 1.1 tshiozak if (_region_size(r) == 0 || _region_peek8(r, 0) != '=') 969 1.1 tshiozak return -1; 970 1.1 tshiozak _region_get_subregion(r, r, 1, _region_size(r)-1); 971 1.1 tshiozak region_skip_ws(r); 972 1.1 tshiozak return 0; 973 1.1 tshiozak } 974 1.1 tshiozak 975 1.1 tshiozak static int 976 1.1 tshiozak skip_nplurals(struct _region *r, unsigned long *rnp) 977 1.1 tshiozak { 978 1.1 tshiozak unsigned long np; 979 1.1 tshiozak char buf[MAX_LEN_ATOM+2], *endptr; 980 1.1 tshiozak const char *endptrconst; 981 1.1 tshiozak size_t ofs; 982 1.1 tshiozak 983 1.1 tshiozak if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL)) 984 1.1 tshiozak return -1; 985 1.1 tshiozak if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0))) 986 1.1 tshiozak return -1; 987 1.1 tshiozak strlcpy(buf, _region_head(r), sizeof (buf)); 988 1.1 tshiozak np = strtoul(buf, &endptr, 0); 989 1.1 tshiozak endptrconst = _bcs_skip_ws(endptr); 990 1.1 tshiozak if (*endptrconst != ';') 991 1.1 tshiozak return -1; 992 1.1 tshiozak ofs = endptrconst+1-buf; 993 1.1 tshiozak if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs)) 994 1.1 tshiozak return -1; 995 1.1 tshiozak if (rnp) 996 1.1 tshiozak *rnp = np; 997 1.1 tshiozak return 0; 998 1.1 tshiozak } 999 1.1 tshiozak 1000 1.1 tshiozak static int 1001 1.1 tshiozak parse_plural_body(struct _region *r, struct parser_element **rpe) 1002 1.1 tshiozak { 1003 1.1 tshiozak int token; 1004 1.1 tshiozak struct tokenizer_context tcx; 1005 1.1 tshiozak struct parser_element pelem, *ppe; 1006 1.1 tshiozak 1007 1.1 tshiozak init_tokenizer_context(&tcx); 1008 1.1 tshiozak _memstream_bind(&tcx.memstream, r); 1009 1.1 tshiozak 1010 1.1 tshiozak init_parser_element(&pelem); 1011 1.1 tshiozak token = parse_exp(&tcx, &pelem); 1012 1.1 tshiozak if (T_IS_ERROR(token)) 1013 1.1 tshiozak return token; 1014 1.1 tshiozak 1015 1.1 tshiozak ppe = dup_parser_element(&pelem); 1016 1.1 tshiozak if (ppe == NULL) { 1017 1.1 tshiozak uninit_parser_element(&pelem); 1018 1.1 tshiozak return T_NOMEM; 1019 1.1 tshiozak } 1020 1.1 tshiozak 1021 1.1 tshiozak *rpe = ppe; 1022 1.1 tshiozak 1023 1.1 tshiozak return 0; 1024 1.1 tshiozak } 1025 1.1 tshiozak 1026 1.1 tshiozak static int 1027 1.1 tshiozak parse_plural(struct parser_element **rpe, unsigned long *rnp, 1028 1.1 tshiozak const char *str, size_t len) 1029 1.1 tshiozak { 1030 1.1 tshiozak struct _region r; 1031 1.1 tshiozak 1032 1.1 tshiozak _region_init(&r, __UNCONST(str), len); 1033 1.1 tshiozak 1034 1.1 tshiozak if (find_plural_forms(&r)) 1035 1.1 tshiozak return T_NOTFOUND; 1036 1.1 tshiozak if (skip_nplurals(&r, rnp)) 1037 1.1 tshiozak return T_ILPLURAL; 1038 1.1 tshiozak if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL)) 1039 1.1 tshiozak return T_ILPLURAL; 1040 1.1 tshiozak if (cut_trailing_semicolon(&r)) 1041 1.1 tshiozak return T_ILPLURAL; 1042 1.1 tshiozak return parse_plural_body(&r, rpe); 1043 1.1 tshiozak } 1044 1.1 tshiozak 1045 1.1 tshiozak #ifdef TEST_PARSE_PLURAL 1046 1.1 tshiozak int 1047 1.1 tshiozak main(int argc, char **argv) 1048 1.1 tshiozak { 1049 1.1 tshiozak int ret; 1050 1.1 tshiozak struct parser_element *pelem; 1051 1.1 tshiozak unsigned long np; 1052 1.1 tshiozak 1053 1.1 tshiozak if (argc != 2 && argc != 3) { 1054 1.1 tshiozak fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]); 1055 1.1 tshiozak return EXIT_FAILURE; 1056 1.1 tshiozak } 1057 1.1 tshiozak 1058 1.1 tshiozak ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1])); 1059 1.1 tshiozak 1060 1.1 tshiozak if (ret == T_EOF) 1061 1.1 tshiozak printf("none"); 1062 1.1 tshiozak else if (T_IS_ERROR(ret)) 1063 1.1 tshiozak printf("error: 0x%X", ret); 1064 1.1 tshiozak else { 1065 1.1 tshiozak printf("syntax tree: "); 1066 1.1 tshiozak dump_elem(pelem); 1067 1.1 tshiozak printf("\nnplurals = %lu", np); 1068 1.1 tshiozak if (argv[2]) 1069 1.1 tshiozak printf(", plural = %lu", 1070 1.1 tshiozak calculate_plural(pelem, atoi(argv[2]))); 1071 1.1 tshiozak free_parser_element(pelem); 1072 1.1 tshiozak } 1073 1.1 tshiozak printf("\n"); 1074 1.1 tshiozak 1075 1.1 tshiozak 1076 1.1 tshiozak return EXIT_SUCCESS; 1077 1.1 tshiozak } 1078 1.1 tshiozak #endif /* TEST_PARSE_PLURAL */ 1079 1.1 tshiozak 1080 1.1 tshiozak /* 1081 1.1 tshiozak * external interface 1082 1.1 tshiozak */ 1083 1.1 tshiozak 1084 1.1 tshiozak int 1085 1.1 tshiozak _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp, 1086 1.1 tshiozak const char *str, size_t len) 1087 1.1 tshiozak { 1088 1.1 tshiozak return parse_plural((struct parser_element **)rpe, rnp, str, len); 1089 1.1 tshiozak } 1090 1.1 tshiozak 1091 1.1 tshiozak unsigned long 1092 1.1 tshiozak _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n) 1093 1.1 tshiozak { 1094 1.1 tshiozak return calculate_plural((void *)__UNCONST(pe), n); 1095 1.1 tshiozak } 1096 1.1 tshiozak 1097 1.1 tshiozak void 1098 1.1 tshiozak _gettext_free_plural(struct gettext_plural *pe) 1099 1.1 tshiozak { 1100 1.1 tshiozak free_parser_element((void *)pe); 1101 1.1 tshiozak } 1102 1.1 tshiozak 1103 1.1 tshiozak #ifdef TEST_PLURAL 1104 1.1 tshiozak #include <libintl.h> 1105 1.1 tshiozak #include <locale.h> 1106 1.1 tshiozak 1107 1.1 tshiozak #define PR(n) printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n)) 1108 1.1 tshiozak 1109 1.1 tshiozak int 1110 1.1 tshiozak main(void) 1111 1.1 tshiozak { 1112 1.1 tshiozak bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */ 1113 1.1 tshiozak PR(1); 1114 1.1 tshiozak PR(2); 1115 1.1 tshiozak PR(3); 1116 1.1 tshiozak PR(4); 1117 1.1 tshiozak 1118 1.1 tshiozak return 0; 1119 1.1 tshiozak } 1120 1.1 tshiozak #endif 1121