plural_parser.c revision 1.5 1 1.5 andvar /* $NetBSD: plural_parser.c,v 1.5 2025/02/26 04:49:45 andvar Exp $ */
2 1.1 tshiozak
3 1.1 tshiozak /*-
4 1.1 tshiozak * Copyright (c) 2005 Citrus Project,
5 1.1 tshiozak * All rights reserved.
6 1.1 tshiozak *
7 1.1 tshiozak * Redistribution and use in source and binary forms, with or without
8 1.1 tshiozak * modification, are permitted provided that the following conditions
9 1.1 tshiozak * are met:
10 1.1 tshiozak * 1. Redistributions of source code must retain the above copyright
11 1.1 tshiozak * notice, this list of conditions and the following disclaimer.
12 1.1 tshiozak * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 tshiozak * notice, this list of conditions and the following disclaimer in the
14 1.1 tshiozak * documentation and/or other materials provided with the distribution.
15 1.1 tshiozak *
16 1.1 tshiozak * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 1.1 tshiozak * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 1.1 tshiozak * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 1.1 tshiozak * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 1.1 tshiozak * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 1.1 tshiozak * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 1.1 tshiozak * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 1.1 tshiozak * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 1.1 tshiozak * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 1.1 tshiozak * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 1.1 tshiozak * SUCH DAMAGE.
27 1.1 tshiozak *
28 1.1 tshiozak */
29 1.1 tshiozak
30 1.1 tshiozak #include <sys/cdefs.h>
31 1.5 andvar __RCSID("$NetBSD: plural_parser.c,v 1.5 2025/02/26 04:49:45 andvar Exp $");
32 1.1 tshiozak
33 1.1 tshiozak #include <assert.h>
34 1.1 tshiozak #include <stdio.h>
35 1.1 tshiozak #include <stdlib.h>
36 1.1 tshiozak #include <string.h>
37 1.1 tshiozak #include <citrus/citrus_namespace.h>
38 1.1 tshiozak #include <citrus/citrus_region.h>
39 1.1 tshiozak #include <citrus/citrus_memstream.h>
40 1.1 tshiozak #include <citrus/citrus_bcs.h>
41 1.1 tshiozak #include "plural_parser.h"
42 1.1 tshiozak
43 1.1 tshiozak #if defined(TEST_TOKENIZER) || defined(TEST_PARSER)
44 1.1 tshiozak #define ALLOW_EMPTY
45 1.1 tshiozak #define ALLOW_ARBITRARY_IDENTIFIER
46 1.1 tshiozak #endif
47 1.1 tshiozak
48 1.1 tshiozak #define MAX_LEN_ATOM 10
49 1.1 tshiozak #define MAX_NUM_OPERANDS 3
50 1.1 tshiozak
51 1.1 tshiozak #define T_EOF EOF
52 1.1 tshiozak #define T_NONE 0x100
53 1.1 tshiozak #define T_LAND 0x101 /* && */
54 1.1 tshiozak #define T_LOR 0x102 /* || */
55 1.1 tshiozak #define T_EQUALITY 0x103 /* == or != */
56 1.1 tshiozak #define T_RELATIONAL 0x104 /* <, >, <= or >= */
57 1.1 tshiozak #define T_ADDITIVE 0x105 /* + or - */
58 1.1 tshiozak #define T_MULTIPLICATIVE 0x106 /* *, / or % */
59 1.1 tshiozak #define T_IDENTIFIER 0x200
60 1.1 tshiozak #define T_CONSTANT 0x201
61 1.1 tshiozak #define T_ILCHAR 0x300
62 1.1 tshiozak #define T_TOOLONG 0x301
63 1.1 tshiozak #define T_ILTOKEN 0x302
64 1.1 tshiozak #define T_ILEND 0x303
65 1.1 tshiozak #define T_NOMEM 0x304
66 1.1 tshiozak #define T_NOTFOUND 0x305
67 1.1 tshiozak #define T_ILPLURAL 0x306
68 1.1 tshiozak #define T_IS_OPERATOR(t) ((t) < 0x200)
69 1.1 tshiozak #define T_IS_ERROR(t) ((t) >= 0x300)
70 1.1 tshiozak
71 1.1 tshiozak #define OP_EQ ('='+'=')
72 1.1 tshiozak #define OP_NEQ ('!'+'=')
73 1.1 tshiozak #define OP_LTEQ ('<'+'=')
74 1.1 tshiozak #define OP_GTEQ ('>'+'=')
75 1.1 tshiozak
76 1.1 tshiozak #define PLURAL_NUMBER_SYMBOL "n"
77 1.1 tshiozak #define NPLURALS_SYMBOL "nplurals"
78 1.1 tshiozak #define LEN_NPLURAL_SYMBOL (sizeof (NPLURALS_SYMBOL) -1)
79 1.1 tshiozak #define PLURAL_SYMBOL "plural"
80 1.1 tshiozak #define LEN_PLURAL_SYMBOL (sizeof (PLURAL_SYMBOL) -1)
81 1.1 tshiozak #define PLURAL_FORMS "Plural-Forms:"
82 1.1 tshiozak #define LEN_PLURAL_FORMS (sizeof (PLURAL_FORMS) -1)
83 1.1 tshiozak
84 1.1 tshiozak /* ----------------------------------------------------------------------
85 1.1 tshiozak * tokenizer part
86 1.1 tshiozak */
87 1.1 tshiozak
88 1.1 tshiozak union token_data
89 1.1 tshiozak {
90 1.1 tshiozak unsigned long constant;
91 1.1 tshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER
92 1.1 tshiozak char identifier[MAX_LEN_ATOM+1];
93 1.1 tshiozak #endif
94 1.1 tshiozak char op;
95 1.1 tshiozak };
96 1.1 tshiozak
97 1.1 tshiozak struct tokenizer_context
98 1.1 tshiozak {
99 1.1 tshiozak struct _memstream memstream;
100 1.1 tshiozak struct {
101 1.1 tshiozak int token;
102 1.1 tshiozak union token_data token_data;
103 1.1 tshiozak } token0;
104 1.1 tshiozak };
105 1.1 tshiozak
106 1.1 tshiozak /* initialize a tokenizer context */
107 1.1 tshiozak static void
108 1.1 tshiozak init_tokenizer_context(struct tokenizer_context *tcx)
109 1.1 tshiozak {
110 1.1 tshiozak tcx->token0.token = T_NONE;
111 1.1 tshiozak }
112 1.1 tshiozak
113 1.1 tshiozak /* get an atom (identifier or constant) */
114 1.1 tshiozak static int
115 1.1 tshiozak tokenize_atom(struct tokenizer_context *tcx, union token_data *token_data)
116 1.1 tshiozak {
117 1.1 tshiozak int ch, len;
118 1.1 tshiozak char buf[MAX_LEN_ATOM+1];
119 1.1 tshiozak
120 1.1 tshiozak len = 0;
121 1.1 tshiozak while (/*CONSTCOND*/1) {
122 1.1 tshiozak ch = _memstream_getc(&tcx->memstream);
123 1.1 tshiozak if (!(_bcs_isalnum(ch) || ch == '_')) {
124 1.1 tshiozak _memstream_ungetc(&tcx->memstream, ch);
125 1.1 tshiozak break;
126 1.1 tshiozak }
127 1.1 tshiozak if (len == MAX_LEN_ATOM)
128 1.1 tshiozak return T_TOOLONG;
129 1.1 tshiozak buf[len++] = ch;
130 1.1 tshiozak }
131 1.1 tshiozak buf[len] = '\0';
132 1.1 tshiozak if (len == 0)
133 1.1 tshiozak return T_ILCHAR;
134 1.1 tshiozak
135 1.1 tshiozak if (_bcs_isdigit((int)(unsigned char)buf[0])) {
136 1.1 tshiozak unsigned long ul;
137 1.1 tshiozak char *post;
138 1.1 tshiozak ul = strtoul(buf, &post, 0);
139 1.1 tshiozak if (buf+len != post)
140 1.1 tshiozak return T_ILCHAR;
141 1.1 tshiozak token_data->constant = ul;
142 1.1 tshiozak return T_CONSTANT;
143 1.1 tshiozak }
144 1.1 tshiozak
145 1.1 tshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER
146 1.1 tshiozak strcpy(token_data->identifier, buf);
147 1.1 tshiozak return T_IDENTIFIER;
148 1.1 tshiozak #else
149 1.1 tshiozak if (!strcmp(buf, PLURAL_NUMBER_SYMBOL))
150 1.1 tshiozak return T_IDENTIFIER;
151 1.1 tshiozak return T_ILCHAR;
152 1.1 tshiozak #endif
153 1.1 tshiozak }
154 1.1 tshiozak
155 1.1 tshiozak /* tokenizer main routine */
156 1.1 tshiozak static int
157 1.1 tshiozak tokenize(struct tokenizer_context *tcx, union token_data *token_data)
158 1.1 tshiozak {
159 1.1 tshiozak int ch, prevch;
160 1.1 tshiozak
161 1.1 tshiozak retry:
162 1.1 tshiozak ch = _memstream_getc(&tcx->memstream);
163 1.1 tshiozak if (_bcs_isspace(ch))
164 1.1 tshiozak goto retry;
165 1.1 tshiozak
166 1.1 tshiozak switch (ch) {
167 1.1 tshiozak case T_EOF:
168 1.1 tshiozak return ch;
169 1.1 tshiozak case '+': case '-':
170 1.1 tshiozak token_data->op = ch;
171 1.1 tshiozak return T_ADDITIVE;
172 1.1 tshiozak case '*': case '/': case '%':
173 1.1 tshiozak token_data->op = ch;
174 1.1 tshiozak return T_MULTIPLICATIVE;
175 1.1 tshiozak case '?': case ':': case '(': case ')':
176 1.1 tshiozak token_data->op = ch;
177 1.1 tshiozak return ch;
178 1.1 tshiozak case '&': case '|':
179 1.1 tshiozak prevch = ch;
180 1.1 tshiozak ch = _memstream_getc(&tcx->memstream);
181 1.1 tshiozak if (ch != prevch) {
182 1.1 tshiozak _memstream_ungetc(&tcx->memstream, ch);
183 1.1 tshiozak return T_ILCHAR;
184 1.1 tshiozak }
185 1.1 tshiozak token_data->op = ch;
186 1.1 tshiozak switch (ch) {
187 1.1 tshiozak case '&':
188 1.1 tshiozak return T_LAND;
189 1.1 tshiozak case '|':
190 1.1 tshiozak return T_LOR;
191 1.3 christos default:
192 1.3 christos return T_ILTOKEN;
193 1.1 tshiozak }
194 1.1 tshiozak case '=': case '!': case '<': case '>':
195 1.1 tshiozak prevch = ch;
196 1.1 tshiozak ch = _memstream_getc(&tcx->memstream);
197 1.1 tshiozak if (ch != '=') {
198 1.1 tshiozak _memstream_ungetc(&tcx->memstream, ch);
199 1.1 tshiozak switch (prevch) {
200 1.1 tshiozak case '=':
201 1.1 tshiozak return T_ILCHAR;
202 1.1 tshiozak case '!':
203 1.1 tshiozak return '!';
204 1.1 tshiozak case '<':
205 1.1 tshiozak case '>':
206 1.1 tshiozak token_data->op = prevch; /* OP_LT or OP_GT */
207 1.1 tshiozak return T_RELATIONAL;
208 1.1 tshiozak }
209 1.1 tshiozak }
210 1.1 tshiozak /* '==', '!=', '<=' or '>=' */
211 1.1 tshiozak token_data->op = ch+prevch;
212 1.1 tshiozak switch (prevch) {
213 1.1 tshiozak case '=':
214 1.1 tshiozak case '!':
215 1.1 tshiozak return T_EQUALITY;
216 1.1 tshiozak case '<':
217 1.1 tshiozak case '>':
218 1.1 tshiozak return T_RELATIONAL;
219 1.1 tshiozak }
220 1.1 tshiozak /*NOTREACHED*/
221 1.1 tshiozak }
222 1.1 tshiozak
223 1.1 tshiozak _memstream_ungetc(&tcx->memstream, ch);
224 1.1 tshiozak return tokenize_atom(tcx, token_data);
225 1.1 tshiozak }
226 1.1 tshiozak
227 1.1 tshiozak /* get the next token */
228 1.1 tshiozak static int
229 1.1 tshiozak get_token(struct tokenizer_context *tcx, union token_data *token_data)
230 1.1 tshiozak {
231 1.1 tshiozak if (tcx->token0.token != T_NONE) {
232 1.1 tshiozak int token = tcx->token0.token;
233 1.1 tshiozak tcx->token0.token = T_NONE;
234 1.1 tshiozak *token_data = tcx->token0.token_data;
235 1.1 tshiozak return token;
236 1.1 tshiozak }
237 1.1 tshiozak return tokenize(tcx, token_data);
238 1.1 tshiozak }
239 1.1 tshiozak
240 1.1 tshiozak /* push back the last token */
241 1.1 tshiozak static void
242 1.1 tshiozak unget_token(struct tokenizer_context *tcx,
243 1.1 tshiozak int token, union token_data *token_data)
244 1.1 tshiozak {
245 1.1 tshiozak tcx->token0.token = token;
246 1.1 tshiozak tcx->token0.token_data = *token_data;
247 1.1 tshiozak }
248 1.1 tshiozak
249 1.1 tshiozak #ifdef TEST_TOKENIZER
250 1.1 tshiozak
251 1.1 tshiozak int
252 1.1 tshiozak main(int argc, char **argv)
253 1.1 tshiozak {
254 1.1 tshiozak struct tokenizer_context tcx;
255 1.1 tshiozak union token_data token_data;
256 1.1 tshiozak int token;
257 1.1 tshiozak
258 1.1 tshiozak if (argc != 2) {
259 1.1 tshiozak fprintf(stderr, "usage: %s <expression>\n", argv[0]);
260 1.1 tshiozak return EXIT_FAILURE;
261 1.1 tshiozak }
262 1.1 tshiozak
263 1.1 tshiozak init_tokenizer_context(&tcx);
264 1.1 tshiozak _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
265 1.1 tshiozak
266 1.1 tshiozak while (1) {
267 1.1 tshiozak token = get_token(&tcx, &token_data);
268 1.1 tshiozak switch (token) {
269 1.1 tshiozak case T_EOF:
270 1.1 tshiozak goto quit;
271 1.1 tshiozak case T_ILCHAR:
272 1.1 tshiozak printf("illegal character.\n");
273 1.1 tshiozak goto quit;
274 1.1 tshiozak case T_TOOLONG:
275 1.1 tshiozak printf("too long atom.\n");
276 1.1 tshiozak goto quit;
277 1.1 tshiozak case T_CONSTANT:
278 1.1 tshiozak printf("constant: %lu\n", token_data.constant);
279 1.1 tshiozak break;
280 1.1 tshiozak case T_IDENTIFIER:
281 1.1 tshiozak printf("symbol: %s\n", token_data.identifier);
282 1.1 tshiozak break;
283 1.1 tshiozak default:
284 1.1 tshiozak printf("operator: ");
285 1.1 tshiozak switch (token) {
286 1.1 tshiozak case T_LAND:
287 1.1 tshiozak printf("&&\n");
288 1.1 tshiozak break;
289 1.1 tshiozak case T_LOR:
290 1.1 tshiozak printf("||\n");
291 1.1 tshiozak break;
292 1.1 tshiozak case T_EQUALITY:
293 1.1 tshiozak printf("%c=\n", token_data.op-'=');
294 1.1 tshiozak break;
295 1.1 tshiozak case T_RELATIONAL:
296 1.1 tshiozak switch(token_data.op) {
297 1.1 tshiozak case OP_LTEQ:
298 1.1 tshiozak case OP_GTEQ:
299 1.1 tshiozak printf("%c=\n", token_data.op-'=');
300 1.1 tshiozak break;
301 1.1 tshiozak default:
302 1.1 tshiozak printf("%c\n", token_data.op);
303 1.1 tshiozak break;
304 1.1 tshiozak }
305 1.1 tshiozak break;
306 1.1 tshiozak case T_ADDITIVE:
307 1.1 tshiozak case T_MULTIPLICATIVE:
308 1.1 tshiozak printf("%c\n", token_data.op);
309 1.1 tshiozak break;
310 1.1 tshiozak default:
311 1.1 tshiozak printf("operator: %c\n", token);
312 1.1 tshiozak }
313 1.1 tshiozak }
314 1.1 tshiozak }
315 1.1 tshiozak quit:
316 1.1 tshiozak return 0;
317 1.1 tshiozak }
318 1.1 tshiozak #endif /* TEST_TOKENIZER */
319 1.1 tshiozak
320 1.1 tshiozak
321 1.1 tshiozak /* ----------------------------------------------------------------------
322 1.1 tshiozak * parser part
323 1.1 tshiozak *
324 1.1 tshiozak * exp := cond
325 1.1 tshiozak *
326 1.1 tshiozak * cond := lor | lor '?' cond ':' cond
327 1.1 tshiozak *
328 1.1 tshiozak * lor := land ( '||' land )*
329 1.1 tshiozak *
330 1.1 tshiozak * land := equality ( '&&' equality )*
331 1.1 tshiozak *
332 1.1 tshiozak * equality := relational ( equalityops relational )*
333 1.1 tshiozak * equalityops := '==' | '!='
334 1.1 tshiozak *
335 1.1 tshiozak * relational := additive ( relationalops additive )*
336 1.1 tshiozak * relationalops := '<' | '>' | '<=' | '>='
337 1.1 tshiozak *
338 1.1 tshiozak * additive := multiplicative ( additiveops multiplicative )*
339 1.1 tshiozak * additiveops := '+' | '-'
340 1.1 tshiozak *
341 1.1 tshiozak * multiplicative := lnot ( multiplicativeops lnot )*
342 1.1 tshiozak * multiplicativeops := '*' | '/' | '%'
343 1.1 tshiozak *
344 1.1 tshiozak * lnot := '!' lnot | term
345 1.1 tshiozak *
346 1.1 tshiozak * term := literal | identifier | '(' exp ')'
347 1.1 tshiozak *
348 1.1 tshiozak */
349 1.1 tshiozak
350 1.1 tshiozak #define T_ENSURE_OK(token, label) \
351 1.1 tshiozak do { \
352 1.1 tshiozak if (T_IS_ERROR(token)) \
353 1.1 tshiozak goto label; \
354 1.4 rillig } while (0)
355 1.1 tshiozak #define T_ENSURE_SOMETHING(token, label) \
356 1.1 tshiozak do { \
357 1.1 tshiozak if ((token) == T_EOF) { \
358 1.1 tshiozak token = T_ILEND; \
359 1.1 tshiozak goto label; \
360 1.1 tshiozak } else if (T_IS_ERROR(token)) \
361 1.1 tshiozak goto label; \
362 1.4 rillig } while (0)
363 1.1 tshiozak
364 1.1 tshiozak #define parser_element plural_element
365 1.1 tshiozak
366 1.1 tshiozak struct parser_element;
367 1.1 tshiozak struct parser_op
368 1.1 tshiozak {
369 1.1 tshiozak char op;
370 1.1 tshiozak struct parser_element *operands[MAX_NUM_OPERANDS];
371 1.1 tshiozak };
372 1.1 tshiozak struct parser_element
373 1.1 tshiozak {
374 1.1 tshiozak int kind;
375 1.1 tshiozak union
376 1.1 tshiozak {
377 1.1 tshiozak struct parser_op parser_op;
378 1.1 tshiozak union token_data token_data;
379 1.1 tshiozak } u;
380 1.1 tshiozak };
381 1.1 tshiozak
382 1.1 tshiozak struct parser_op2_transition
383 1.1 tshiozak {
384 1.1 tshiozak int kind;
385 1.1 tshiozak const struct parser_op2_transition *next;
386 1.1 tshiozak };
387 1.1 tshiozak
388 1.1 tshiozak /* prototypes */
389 1.1 tshiozak static int parse_cond(struct tokenizer_context *, struct parser_element *);
390 1.1 tshiozak
391 1.1 tshiozak
392 1.1 tshiozak /* transition table for the 2-operand operators */
393 1.1 tshiozak #define DEF_TR(t, k, n) \
394 1.1 tshiozak static struct parser_op2_transition exp_tr_##t = { \
395 1.1 tshiozak k, &exp_tr_##n \
396 1.1 tshiozak }
397 1.1 tshiozak #define DEF_TR0(t, k) \
398 1.1 tshiozak static struct parser_op2_transition exp_tr_##t = { \
399 1.1 tshiozak k, NULL /* expect lnot */ \
400 1.1 tshiozak }
401 1.1 tshiozak
402 1.1 tshiozak DEF_TR0(multiplicative, T_MULTIPLICATIVE);
403 1.1 tshiozak DEF_TR(additive, T_ADDITIVE, multiplicative);
404 1.1 tshiozak DEF_TR(relational, T_RELATIONAL, additive);
405 1.1 tshiozak DEF_TR(equality, T_EQUALITY, relational);
406 1.1 tshiozak DEF_TR(land, T_LAND, equality);
407 1.1 tshiozak DEF_TR(lor, T_LOR, land);
408 1.1 tshiozak
409 1.1 tshiozak /* init a parser element structure */
410 1.1 tshiozak static void
411 1.1 tshiozak init_parser_element(struct parser_element *pe)
412 1.1 tshiozak {
413 1.1 tshiozak int i;
414 1.1 tshiozak
415 1.1 tshiozak pe->kind = T_NONE;
416 1.1 tshiozak for (i=0; i<MAX_NUM_OPERANDS; i++)
417 1.1 tshiozak pe->u.parser_op.operands[i] = NULL;
418 1.1 tshiozak }
419 1.1 tshiozak
420 1.1 tshiozak /* uninitialize a parser element structure with freeing children */
421 1.1 tshiozak static void free_parser_element(struct parser_element *);
422 1.1 tshiozak static void
423 1.1 tshiozak uninit_parser_element(struct parser_element *pe)
424 1.1 tshiozak {
425 1.1 tshiozak int i;
426 1.1 tshiozak
427 1.1 tshiozak if (T_IS_OPERATOR(pe->kind))
428 1.1 tshiozak for (i=0; i<MAX_NUM_OPERANDS; i++)
429 1.1 tshiozak if (pe->u.parser_op.operands[i])
430 1.1 tshiozak free_parser_element(
431 1.1 tshiozak pe->u.parser_op.operands[i]);
432 1.1 tshiozak }
433 1.1 tshiozak
434 1.1 tshiozak /* free a parser element structure with freeing children */
435 1.1 tshiozak static void
436 1.1 tshiozak free_parser_element(struct parser_element *pe)
437 1.1 tshiozak {
438 1.1 tshiozak if (pe) {
439 1.1 tshiozak uninit_parser_element(pe);
440 1.1 tshiozak free(pe);
441 1.1 tshiozak }
442 1.1 tshiozak }
443 1.1 tshiozak
444 1.1 tshiozak
445 1.1 tshiozak /* copy a parser element structure shallowly */
446 1.1 tshiozak static void
447 1.1 tshiozak copy_parser_element(struct parser_element *dpe,
448 1.1 tshiozak const struct parser_element *spe)
449 1.1 tshiozak {
450 1.1 tshiozak memcpy(dpe, spe, sizeof *dpe);
451 1.1 tshiozak }
452 1.1 tshiozak
453 1.1 tshiozak /* duplicate a parser element structure shallowly */
454 1.1 tshiozak static struct parser_element *
455 1.1 tshiozak dup_parser_element(const struct parser_element *pe)
456 1.1 tshiozak {
457 1.1 tshiozak struct parser_element *dpe = malloc(sizeof *dpe);
458 1.1 tshiozak if (dpe)
459 1.1 tshiozak copy_parser_element(dpe, pe);
460 1.1 tshiozak return dpe;
461 1.1 tshiozak }
462 1.1 tshiozak
463 1.1 tshiozak /* term := identifier | constant | '(' exp ')' */
464 1.1 tshiozak static int
465 1.1 tshiozak parse_term(struct tokenizer_context *tcx, struct parser_element *pelem)
466 1.1 tshiozak {
467 1.1 tshiozak struct parser_element pe0;
468 1.1 tshiozak int token;
469 1.1 tshiozak union token_data token_data;
470 1.1 tshiozak
471 1.1 tshiozak token = get_token(tcx, &token_data);
472 1.1 tshiozak switch (token) {
473 1.1 tshiozak case '(':
474 1.1 tshiozak /* '(' exp ')' */
475 1.1 tshiozak init_parser_element(&pe0);
476 1.1 tshiozak /* expect exp */
477 1.1 tshiozak token = parse_cond(tcx, &pe0);
478 1.1 tshiozak T_ENSURE_OK(token, err);
479 1.1 tshiozak /* expect ')' */
480 1.1 tshiozak token = get_token(tcx, &token_data);
481 1.1 tshiozak T_ENSURE_SOMETHING(token, err);
482 1.1 tshiozak if (token != ')') {
483 1.1 tshiozak unget_token(tcx, token, &token_data);
484 1.1 tshiozak token = T_ILTOKEN;
485 1.1 tshiozak goto err;
486 1.1 tshiozak }
487 1.1 tshiozak copy_parser_element(pelem, &pe0);
488 1.1 tshiozak return token;
489 1.1 tshiozak err:
490 1.1 tshiozak uninit_parser_element(&pe0);
491 1.1 tshiozak return token;
492 1.1 tshiozak case T_IDENTIFIER:
493 1.1 tshiozak case T_CONSTANT:
494 1.1 tshiozak pelem->kind = token;
495 1.1 tshiozak pelem->u.token_data = token_data;
496 1.1 tshiozak return token;
497 1.1 tshiozak case T_EOF:
498 1.1 tshiozak return T_ILEND;
499 1.1 tshiozak default:
500 1.1 tshiozak return T_ILTOKEN;
501 1.1 tshiozak }
502 1.1 tshiozak }
503 1.1 tshiozak
504 1.1 tshiozak /* lnot := '!' lnot | term */
505 1.1 tshiozak static int
506 1.1 tshiozak parse_lnot(struct tokenizer_context *tcx, struct parser_element *pelem)
507 1.1 tshiozak {
508 1.1 tshiozak struct parser_element pe0;
509 1.1 tshiozak int token;
510 1.1 tshiozak union token_data token_data;
511 1.1 tshiozak
512 1.1 tshiozak init_parser_element(&pe0);
513 1.1 tshiozak
514 1.1 tshiozak /* '!' or not */
515 1.1 tshiozak token = get_token(tcx, &token_data);
516 1.1 tshiozak if (token != '!') {
517 1.1 tshiozak /* stop: term */
518 1.1 tshiozak unget_token(tcx, token, &token_data);
519 1.1 tshiozak return parse_term(tcx, pelem);
520 1.1 tshiozak }
521 1.1 tshiozak
522 1.1 tshiozak /* '!' term */
523 1.1 tshiozak token = parse_lnot(tcx, &pe0);
524 1.1 tshiozak T_ENSURE_OK(token, err);
525 1.1 tshiozak
526 1.1 tshiozak pelem->kind = '!';
527 1.1 tshiozak pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
528 1.1 tshiozak return pelem->kind;
529 1.1 tshiozak err:
530 1.1 tshiozak uninit_parser_element(&pe0);
531 1.1 tshiozak return token;
532 1.1 tshiozak }
533 1.1 tshiozak
534 1.1 tshiozak /* ext_op := ext_next ( op ext_next )* */
535 1.1 tshiozak static int
536 1.1 tshiozak parse_op2(struct tokenizer_context *tcx, struct parser_element *pelem,
537 1.1 tshiozak const struct parser_op2_transition *tr)
538 1.1 tshiozak {
539 1.1 tshiozak struct parser_element pe0, pe1, peop;
540 1.1 tshiozak int token;
541 1.1 tshiozak union token_data token_data;
542 1.1 tshiozak char op;
543 1.1 tshiozak
544 1.1 tshiozak /* special case: expect lnot */
545 1.1 tshiozak if (tr == NULL)
546 1.1 tshiozak return parse_lnot(tcx, pelem);
547 1.1 tshiozak
548 1.1 tshiozak init_parser_element(&pe0);
549 1.1 tshiozak init_parser_element(&pe1);
550 1.1 tshiozak token = parse_op2(tcx, &pe0, tr->next);
551 1.1 tshiozak T_ENSURE_OK(token, err);
552 1.1 tshiozak
553 1.1 tshiozak while (/*CONSTCOND*/1) {
554 1.1 tshiozak /* expect op or empty */
555 1.1 tshiozak token = get_token(tcx, &token_data);
556 1.1 tshiozak if (token != tr->kind) {
557 1.1 tshiozak /* stop */
558 1.1 tshiozak unget_token(tcx, token, &token_data);
559 1.1 tshiozak copy_parser_element(pelem, &pe0);
560 1.1 tshiozak break;
561 1.1 tshiozak }
562 1.1 tshiozak op = token_data.op;
563 1.1 tshiozak /* right hand */
564 1.1 tshiozak token = parse_op2(tcx, &pe1, tr->next);
565 1.1 tshiozak T_ENSURE_OK(token, err);
566 1.1 tshiozak
567 1.1 tshiozak init_parser_element(&peop);
568 1.1 tshiozak peop.kind = tr->kind;
569 1.1 tshiozak peop.u.parser_op.op = op;
570 1.1 tshiozak peop.u.parser_op.operands[0] = dup_parser_element(&pe0);
571 1.1 tshiozak init_parser_element(&pe0);
572 1.1 tshiozak peop.u.parser_op.operands[1] = dup_parser_element(&pe1);
573 1.1 tshiozak init_parser_element(&pe1);
574 1.1 tshiozak copy_parser_element(&pe0, &peop);
575 1.1 tshiozak }
576 1.1 tshiozak return pelem->kind;
577 1.1 tshiozak err:
578 1.1 tshiozak uninit_parser_element(&pe1);
579 1.1 tshiozak uninit_parser_element(&pe0);
580 1.1 tshiozak return token;
581 1.1 tshiozak }
582 1.1 tshiozak
583 1.1 tshiozak /* cond := lor | lor '?' cond ':' cond */
584 1.1 tshiozak static int
585 1.1 tshiozak parse_cond(struct tokenizer_context *tcx, struct parser_element *pelem)
586 1.1 tshiozak {
587 1.1 tshiozak struct parser_element pe0, pe1, pe2;
588 1.1 tshiozak int token;
589 1.1 tshiozak union token_data token_data;
590 1.1 tshiozak
591 1.1 tshiozak init_parser_element(&pe0);
592 1.1 tshiozak init_parser_element(&pe1);
593 1.1 tshiozak init_parser_element(&pe2);
594 1.1 tshiozak
595 1.1 tshiozak /* expect lor or empty */
596 1.1 tshiozak token = parse_op2(tcx, &pe0, &exp_tr_lor);
597 1.1 tshiozak T_ENSURE_OK(token, err);
598 1.1 tshiozak
599 1.1 tshiozak /* '?' or not */
600 1.1 tshiozak token = get_token(tcx, &token_data);
601 1.1 tshiozak if (token != '?') {
602 1.1 tshiozak /* stop: lor */
603 1.1 tshiozak unget_token(tcx, token, &token_data);
604 1.1 tshiozak copy_parser_element(pelem, &pe0);
605 1.1 tshiozak return pe0.kind;
606 1.1 tshiozak }
607 1.1 tshiozak
608 1.1 tshiozak /* lor '?' cond ':' cond */
609 1.1 tshiozak /* expect cond */
610 1.1 tshiozak token = parse_cond(tcx, &pe1);
611 1.1 tshiozak T_ENSURE_OK(token, err);
612 1.1 tshiozak
613 1.1 tshiozak /* expect ':' */
614 1.1 tshiozak token = get_token(tcx, &token_data);
615 1.1 tshiozak T_ENSURE_OK(token, err);
616 1.1 tshiozak if (token != ':') {
617 1.1 tshiozak unget_token(tcx, token, &token_data);
618 1.1 tshiozak token = T_ILTOKEN;
619 1.1 tshiozak goto err;
620 1.1 tshiozak }
621 1.1 tshiozak
622 1.1 tshiozak /* expect cond */
623 1.1 tshiozak token = parse_cond(tcx, &pe2);
624 1.1 tshiozak T_ENSURE_OK(token, err);
625 1.1 tshiozak
626 1.1 tshiozak pelem->kind = '?';
627 1.1 tshiozak pelem->u.parser_op.operands[0] = dup_parser_element(&pe0);
628 1.1 tshiozak pelem->u.parser_op.operands[1] = dup_parser_element(&pe1);
629 1.1 tshiozak pelem->u.parser_op.operands[2] = dup_parser_element(&pe2);
630 1.1 tshiozak return pelem->kind;
631 1.1 tshiozak err:
632 1.1 tshiozak uninit_parser_element(&pe2);
633 1.1 tshiozak uninit_parser_element(&pe1);
634 1.1 tshiozak uninit_parser_element(&pe0);
635 1.1 tshiozak return token;
636 1.1 tshiozak }
637 1.1 tshiozak
638 1.1 tshiozak static int
639 1.1 tshiozak parse_exp(struct tokenizer_context *tcx, struct parser_element *pelem)
640 1.1 tshiozak {
641 1.1 tshiozak int token, token1;
642 1.1 tshiozak union token_data token_data;
643 1.1 tshiozak
644 1.1 tshiozak #ifdef ALLOW_EMPTY
645 1.1 tshiozak /* empty check */
646 1.1 tshiozak token = get_token(tcx, &token_data);
647 1.1 tshiozak if (token == T_EOF)
648 1.1 tshiozak return token;
649 1.1 tshiozak unget_token(tcx, token, &token_data);
650 1.1 tshiozak #endif
651 1.1 tshiozak
652 1.1 tshiozak token = parse_cond(tcx, pelem);
653 1.1 tshiozak if (!T_IS_ERROR(token)) {
654 1.1 tshiozak /* termination check */
655 1.1 tshiozak token1 = get_token(tcx, &token_data);
656 1.1 tshiozak if (token1 == T_EOF)
657 1.1 tshiozak return token;
658 1.1 tshiozak else if (!T_IS_ERROR(token))
659 1.1 tshiozak unget_token(tcx, token1, &token_data);
660 1.1 tshiozak return T_ILTOKEN;
661 1.1 tshiozak }
662 1.1 tshiozak return token;
663 1.1 tshiozak }
664 1.1 tshiozak
665 1.1 tshiozak
666 1.1 tshiozak #if defined(TEST_PARSER) || defined(TEST_PARSE_PLURAL)
667 1.1 tshiozak #include <stdio.h>
668 1.1 tshiozak
669 1.1 tshiozak static void dump_elem(struct parser_element *);
670 1.1 tshiozak
671 1.1 tshiozak static void
672 1.1 tshiozak dump_op2(struct parser_element *pelem)
673 1.1 tshiozak {
674 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[0]);
675 1.1 tshiozak printf(" ");
676 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[1]);
677 1.1 tshiozak printf(")");
678 1.1 tshiozak }
679 1.1 tshiozak
680 1.1 tshiozak static void
681 1.1 tshiozak dump_op3(struct parser_element *pelem)
682 1.1 tshiozak {
683 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[0]);
684 1.1 tshiozak printf(" ");
685 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[1]);
686 1.1 tshiozak printf(" ");
687 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[2]);
688 1.1 tshiozak printf(")");
689 1.1 tshiozak }
690 1.1 tshiozak
691 1.1 tshiozak static void
692 1.1 tshiozak dump_elem(struct parser_element *pelem)
693 1.1 tshiozak {
694 1.1 tshiozak switch (pelem->kind) {
695 1.1 tshiozak case T_LAND:
696 1.1 tshiozak printf("(&& ");
697 1.1 tshiozak dump_op2(pelem);
698 1.1 tshiozak break;
699 1.1 tshiozak case T_LOR:
700 1.1 tshiozak printf("(|| ");
701 1.1 tshiozak dump_op2(pelem);
702 1.1 tshiozak break;
703 1.1 tshiozak case T_EQUALITY:
704 1.1 tshiozak switch (pelem->u.parser_op.op) {
705 1.1 tshiozak case OP_EQ:
706 1.1 tshiozak printf("(== ");
707 1.1 tshiozak break;
708 1.1 tshiozak case OP_NEQ:
709 1.1 tshiozak printf("(!= ");
710 1.1 tshiozak break;
711 1.1 tshiozak }
712 1.1 tshiozak dump_op2(pelem);
713 1.1 tshiozak break;
714 1.1 tshiozak case T_RELATIONAL:
715 1.1 tshiozak switch (pelem->u.parser_op.op) {
716 1.1 tshiozak case '<':
717 1.1 tshiozak case '>':
718 1.1 tshiozak printf("(%c ", pelem->u.parser_op.op);
719 1.1 tshiozak break;
720 1.1 tshiozak case OP_LTEQ:
721 1.1 tshiozak case OP_GTEQ:
722 1.1 tshiozak printf("(%c= ", pelem->u.parser_op.op-'=');
723 1.1 tshiozak break;
724 1.1 tshiozak }
725 1.1 tshiozak dump_op2(pelem);
726 1.1 tshiozak break;
727 1.1 tshiozak case T_ADDITIVE:
728 1.1 tshiozak case T_MULTIPLICATIVE:
729 1.1 tshiozak printf("(%c ", pelem->u.parser_op.op);
730 1.1 tshiozak dump_op2(pelem);
731 1.1 tshiozak break;
732 1.1 tshiozak case '!':
733 1.1 tshiozak printf("(! ");
734 1.1 tshiozak dump_elem(pelem->u.parser_op.operands[0]);
735 1.1 tshiozak printf(")");
736 1.1 tshiozak break;
737 1.1 tshiozak case '?':
738 1.1 tshiozak printf("(? ");
739 1.1 tshiozak dump_op3(pelem);
740 1.1 tshiozak break;
741 1.1 tshiozak case T_CONSTANT:
742 1.1 tshiozak printf("%d", pelem->u.token_data.constant);
743 1.1 tshiozak break;
744 1.1 tshiozak case T_IDENTIFIER:
745 1.1 tshiozak #ifdef ALLOW_ARBITRARY_IDENTIFIER
746 1.1 tshiozak printf("%s", pelem->u.token_data.identifier);
747 1.1 tshiozak #else
748 1.1 tshiozak printf(PLURAL_NUMBER_SYMBOL);
749 1.1 tshiozak #endif
750 1.1 tshiozak break;
751 1.1 tshiozak }
752 1.1 tshiozak }
753 1.1 tshiozak #endif
754 1.1 tshiozak #ifdef TEST_PARSER
755 1.1 tshiozak int
756 1.1 tshiozak main(int argc, char **argv)
757 1.1 tshiozak {
758 1.1 tshiozak struct tokenizer_context tcx;
759 1.1 tshiozak struct parser_element pelem;
760 1.1 tshiozak int token;
761 1.1 tshiozak
762 1.1 tshiozak if (argc != 2) {
763 1.1 tshiozak fprintf(stderr, "usage: %s <expression>\n", argv[0]);
764 1.1 tshiozak return EXIT_FAILURE;
765 1.1 tshiozak }
766 1.1 tshiozak
767 1.1 tshiozak init_tokenizer_context(&tcx);
768 1.1 tshiozak _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
769 1.1 tshiozak
770 1.1 tshiozak init_parser_element(&pelem);
771 1.1 tshiozak token = parse_exp(&tcx, &pelem);
772 1.1 tshiozak
773 1.1 tshiozak if (token == T_EOF)
774 1.1 tshiozak printf("none");
775 1.1 tshiozak else if (T_IS_ERROR(token))
776 1.1 tshiozak printf("error: 0x%X", token);
777 1.1 tshiozak else
778 1.1 tshiozak dump_elem(&pelem);
779 1.1 tshiozak printf("\n");
780 1.1 tshiozak
781 1.1 tshiozak uninit_parser_element(&pelem);
782 1.1 tshiozak
783 1.1 tshiozak return EXIT_SUCCESS;
784 1.1 tshiozak }
785 1.1 tshiozak #endif /* TEST_PARSER */
786 1.1 tshiozak
787 1.1 tshiozak /* ----------------------------------------------------------------------
788 1.5 andvar * calculate plural number
789 1.1 tshiozak */
790 1.1 tshiozak static unsigned long
791 1.1 tshiozak calculate_plural(const struct parser_element *pe, unsigned long n)
792 1.1 tshiozak {
793 1.1 tshiozak unsigned long val0, val1;
794 1.1 tshiozak switch (pe->kind) {
795 1.1 tshiozak case T_IDENTIFIER:
796 1.1 tshiozak return n;
797 1.1 tshiozak case T_CONSTANT:
798 1.1 tshiozak return pe->u.token_data.constant;
799 1.1 tshiozak case '?':
800 1.1 tshiozak val0 = calculate_plural(pe->u.parser_op.operands[0], n);
801 1.1 tshiozak if (val0)
802 1.1 tshiozak val1=calculate_plural(pe->u.parser_op.operands[1], n);
803 1.1 tshiozak else
804 1.1 tshiozak val1=calculate_plural(pe->u.parser_op.operands[2], n);
805 1.1 tshiozak return val1;
806 1.1 tshiozak case '!':
807 1.1 tshiozak return !calculate_plural(pe->u.parser_op.operands[0], n);
808 1.1 tshiozak case T_MULTIPLICATIVE:
809 1.1 tshiozak case T_ADDITIVE:
810 1.1 tshiozak case T_RELATIONAL:
811 1.1 tshiozak case T_EQUALITY:
812 1.1 tshiozak case T_LOR:
813 1.1 tshiozak case T_LAND:
814 1.1 tshiozak val0 = calculate_plural(pe->u.parser_op.operands[0], n);
815 1.1 tshiozak val1 = calculate_plural(pe->u.parser_op.operands[1], n);
816 1.1 tshiozak switch (pe->u.parser_op.op) {
817 1.1 tshiozak case '*':
818 1.1 tshiozak return val0*val1;
819 1.1 tshiozak case '/':
820 1.1 tshiozak return val0/val1;
821 1.1 tshiozak case '%':
822 1.1 tshiozak return val0%val1;
823 1.1 tshiozak case '+':
824 1.1 tshiozak return val0+val1;
825 1.1 tshiozak case '-':
826 1.1 tshiozak return val0-val1;
827 1.1 tshiozak case '<':
828 1.1 tshiozak return val0<val1;
829 1.1 tshiozak case '>':
830 1.1 tshiozak return val0>val1;
831 1.1 tshiozak case OP_LTEQ:
832 1.1 tshiozak return val0<=val1;
833 1.1 tshiozak case OP_GTEQ:
834 1.1 tshiozak return val0>=val1;
835 1.1 tshiozak case OP_EQ:
836 1.1 tshiozak return val0==val1;
837 1.1 tshiozak case OP_NEQ:
838 1.1 tshiozak return val0!=val1;
839 1.1 tshiozak case '|':
840 1.1 tshiozak return val0||val1;
841 1.1 tshiozak case '&':
842 1.1 tshiozak return val0&&val1;
843 1.1 tshiozak }
844 1.1 tshiozak }
845 1.1 tshiozak return 0;
846 1.1 tshiozak }
847 1.1 tshiozak
848 1.1 tshiozak #ifdef TEST_CALC_PLURAL
849 1.1 tshiozak #include <stdio.h>
850 1.1 tshiozak
851 1.1 tshiozak int
852 1.1 tshiozak main(int argc, char **argv)
853 1.1 tshiozak {
854 1.1 tshiozak struct tokenizer_context tcx;
855 1.1 tshiozak struct parser_element pelem;
856 1.1 tshiozak int token;
857 1.1 tshiozak
858 1.1 tshiozak if (argc != 3) {
859 1.1 tshiozak fprintf(stderr, "usage: %s <expression> <n>\n", argv[0]);
860 1.1 tshiozak return EXIT_FAILURE;
861 1.1 tshiozak }
862 1.1 tshiozak
863 1.1 tshiozak init_tokenizer_context(&tcx);
864 1.1 tshiozak _memstream_bind_ptr(&tcx.memstream, argv[1], strlen(argv[1]));
865 1.1 tshiozak
866 1.1 tshiozak init_parser_element(&pelem);
867 1.1 tshiozak token = parse_exp(&tcx, &pelem);
868 1.1 tshiozak
869 1.1 tshiozak if (token == T_EOF)
870 1.1 tshiozak printf("none");
871 1.1 tshiozak else if (T_IS_ERROR(token))
872 1.1 tshiozak printf("error: 0x%X", token);
873 1.1 tshiozak else {
874 1.1 tshiozak printf("plural = %lu",
875 1.1 tshiozak calculate_plural(&pelem, atoi(argv[2])));
876 1.1 tshiozak }
877 1.1 tshiozak printf("\n");
878 1.1 tshiozak
879 1.1 tshiozak uninit_parser_element(&pelem);
880 1.1 tshiozak
881 1.1 tshiozak return EXIT_SUCCESS;
882 1.1 tshiozak }
883 1.1 tshiozak #endif /* TEST_CALC_PLURAL */
884 1.1 tshiozak
885 1.1 tshiozak
886 1.1 tshiozak /* ----------------------------------------------------------------------
887 1.1 tshiozak * parse plural forms
888 1.1 tshiozak */
889 1.1 tshiozak
890 1.1 tshiozak static void
891 1.1 tshiozak region_skip_ws(struct _region *r)
892 1.1 tshiozak {
893 1.1 tshiozak const char *str = _region_head(r);
894 1.1 tshiozak size_t len = _region_size(r);
895 1.1 tshiozak
896 1.1 tshiozak str = _bcs_skip_ws_len(str, &len);
897 1.1 tshiozak _region_init(r, __UNCONST(str), len);
898 1.1 tshiozak }
899 1.1 tshiozak
900 1.1 tshiozak static void
901 1.1 tshiozak region_trunc_rws(struct _region *r)
902 1.1 tshiozak {
903 1.1 tshiozak const char *str = _region_head(r);
904 1.1 tshiozak size_t len = _region_size(r);
905 1.1 tshiozak
906 1.1 tshiozak _bcs_trunc_rws_len(str, &len);
907 1.1 tshiozak _region_init(r, __UNCONST(str), len);
908 1.1 tshiozak }
909 1.1 tshiozak
910 1.1 tshiozak static int
911 1.1 tshiozak region_check_prefix(struct _region *r, const char *pre, size_t prelen,
912 1.1 tshiozak int ignorecase)
913 1.1 tshiozak {
914 1.1 tshiozak if (_region_size(r) < prelen)
915 1.1 tshiozak return -1;
916 1.1 tshiozak
917 1.1 tshiozak if (ignorecase) {
918 1.1 tshiozak if (_bcs_strncasecmp(_region_head(r), pre, prelen))
919 1.1 tshiozak return -1;
920 1.1 tshiozak } else {
921 1.1 tshiozak if (memcmp(_region_head(r), pre, prelen))
922 1.1 tshiozak return -1;
923 1.1 tshiozak }
924 1.1 tshiozak return 0;
925 1.1 tshiozak }
926 1.1 tshiozak
927 1.1 tshiozak static int
928 1.1 tshiozak cut_trailing_semicolon(struct _region *r)
929 1.1 tshiozak {
930 1.1 tshiozak
931 1.1 tshiozak region_trunc_rws(r);
932 1.1 tshiozak if (_region_size(r) == 0 || _region_peek8(r, _region_size(r)-1) != ';')
933 1.1 tshiozak return -1;
934 1.1 tshiozak _region_get_subregion(r, r, 0, _region_size(r)-1);
935 1.1 tshiozak return 0;
936 1.1 tshiozak }
937 1.1 tshiozak
938 1.1 tshiozak static int
939 1.1 tshiozak find_plural_forms(struct _region *r)
940 1.1 tshiozak {
941 1.1 tshiozak struct _memstream ms;
942 1.1 tshiozak struct _region rr;
943 1.1 tshiozak
944 1.1 tshiozak _memstream_bind(&ms, r);
945 1.1 tshiozak
946 1.1 tshiozak while (!_memstream_getln_region(&ms, &rr)) {
947 1.1 tshiozak if (!region_check_prefix(&rr,
948 1.1 tshiozak PLURAL_FORMS, LEN_PLURAL_FORMS, 1)) {
949 1.1 tshiozak _region_get_subregion(
950 1.1 tshiozak r, &rr, LEN_PLURAL_FORMS,
951 1.1 tshiozak _region_size(&rr)-LEN_PLURAL_FORMS);
952 1.1 tshiozak region_skip_ws(r);
953 1.1 tshiozak region_trunc_rws(r);
954 1.1 tshiozak return 0;
955 1.1 tshiozak }
956 1.1 tshiozak }
957 1.1 tshiozak return -1;
958 1.1 tshiozak }
959 1.1 tshiozak
960 1.1 tshiozak static int
961 1.1 tshiozak skip_assignment(struct _region *r, const char *sym, size_t symlen)
962 1.1 tshiozak {
963 1.1 tshiozak region_skip_ws(r);
964 1.1 tshiozak if (region_check_prefix(r, sym, symlen, 0))
965 1.1 tshiozak return -1;
966 1.1 tshiozak _region_get_subregion(r, r, symlen, _region_size(r)-symlen);
967 1.1 tshiozak region_skip_ws(r);
968 1.1 tshiozak if (_region_size(r) == 0 || _region_peek8(r, 0) != '=')
969 1.1 tshiozak return -1;
970 1.1 tshiozak _region_get_subregion(r, r, 1, _region_size(r)-1);
971 1.1 tshiozak region_skip_ws(r);
972 1.1 tshiozak return 0;
973 1.1 tshiozak }
974 1.1 tshiozak
975 1.1 tshiozak static int
976 1.1 tshiozak skip_nplurals(struct _region *r, unsigned long *rnp)
977 1.1 tshiozak {
978 1.1 tshiozak unsigned long np;
979 1.1 tshiozak char buf[MAX_LEN_ATOM+2], *endptr;
980 1.1 tshiozak const char *endptrconst;
981 1.1 tshiozak size_t ofs;
982 1.1 tshiozak
983 1.1 tshiozak if (skip_assignment(r, NPLURALS_SYMBOL, LEN_NPLURAL_SYMBOL))
984 1.1 tshiozak return -1;
985 1.1 tshiozak if (_region_size(r) == 0 || !_bcs_isdigit(_region_peek8(r, 0)))
986 1.1 tshiozak return -1;
987 1.1 tshiozak strlcpy(buf, _region_head(r), sizeof (buf));
988 1.1 tshiozak np = strtoul(buf, &endptr, 0);
989 1.1 tshiozak endptrconst = _bcs_skip_ws(endptr);
990 1.1 tshiozak if (*endptrconst != ';')
991 1.1 tshiozak return -1;
992 1.1 tshiozak ofs = endptrconst+1-buf;
993 1.1 tshiozak if (_region_get_subregion(r, r, ofs, _region_size(r)-ofs))
994 1.1 tshiozak return -1;
995 1.1 tshiozak if (rnp)
996 1.1 tshiozak *rnp = np;
997 1.1 tshiozak return 0;
998 1.1 tshiozak }
999 1.1 tshiozak
1000 1.1 tshiozak static int
1001 1.1 tshiozak parse_plural_body(struct _region *r, struct parser_element **rpe)
1002 1.1 tshiozak {
1003 1.1 tshiozak int token;
1004 1.1 tshiozak struct tokenizer_context tcx;
1005 1.1 tshiozak struct parser_element pelem, *ppe;
1006 1.1 tshiozak
1007 1.1 tshiozak init_tokenizer_context(&tcx);
1008 1.1 tshiozak _memstream_bind(&tcx.memstream, r);
1009 1.1 tshiozak
1010 1.1 tshiozak init_parser_element(&pelem);
1011 1.1 tshiozak token = parse_exp(&tcx, &pelem);
1012 1.1 tshiozak if (T_IS_ERROR(token))
1013 1.1 tshiozak return token;
1014 1.1 tshiozak
1015 1.1 tshiozak ppe = dup_parser_element(&pelem);
1016 1.1 tshiozak if (ppe == NULL) {
1017 1.1 tshiozak uninit_parser_element(&pelem);
1018 1.1 tshiozak return T_NOMEM;
1019 1.1 tshiozak }
1020 1.1 tshiozak
1021 1.1 tshiozak *rpe = ppe;
1022 1.1 tshiozak
1023 1.1 tshiozak return 0;
1024 1.1 tshiozak }
1025 1.1 tshiozak
1026 1.1 tshiozak static int
1027 1.1 tshiozak parse_plural(struct parser_element **rpe, unsigned long *rnp,
1028 1.1 tshiozak const char *str, size_t len)
1029 1.1 tshiozak {
1030 1.1 tshiozak struct _region r;
1031 1.1 tshiozak
1032 1.1 tshiozak _region_init(&r, __UNCONST(str), len);
1033 1.1 tshiozak
1034 1.1 tshiozak if (find_plural_forms(&r))
1035 1.1 tshiozak return T_NOTFOUND;
1036 1.1 tshiozak if (skip_nplurals(&r, rnp))
1037 1.1 tshiozak return T_ILPLURAL;
1038 1.1 tshiozak if (skip_assignment(&r, PLURAL_SYMBOL, LEN_PLURAL_SYMBOL))
1039 1.1 tshiozak return T_ILPLURAL;
1040 1.1 tshiozak if (cut_trailing_semicolon(&r))
1041 1.1 tshiozak return T_ILPLURAL;
1042 1.1 tshiozak return parse_plural_body(&r, rpe);
1043 1.1 tshiozak }
1044 1.1 tshiozak
1045 1.1 tshiozak #ifdef TEST_PARSE_PLURAL
1046 1.1 tshiozak int
1047 1.1 tshiozak main(int argc, char **argv)
1048 1.1 tshiozak {
1049 1.1 tshiozak int ret;
1050 1.1 tshiozak struct parser_element *pelem;
1051 1.1 tshiozak unsigned long np;
1052 1.1 tshiozak
1053 1.1 tshiozak if (argc != 2 && argc != 3) {
1054 1.1 tshiozak fprintf(stderr, "usage: %s <mime-header> [n]\n", argv[0]);
1055 1.1 tshiozak return EXIT_FAILURE;
1056 1.1 tshiozak }
1057 1.1 tshiozak
1058 1.1 tshiozak ret = parse_plural(&pelem, &np, argv[1], strlen(argv[1]));
1059 1.1 tshiozak
1060 1.1 tshiozak if (ret == T_EOF)
1061 1.1 tshiozak printf("none");
1062 1.1 tshiozak else if (T_IS_ERROR(ret))
1063 1.1 tshiozak printf("error: 0x%X", ret);
1064 1.1 tshiozak else {
1065 1.1 tshiozak printf("syntax tree: ");
1066 1.1 tshiozak dump_elem(pelem);
1067 1.1 tshiozak printf("\nnplurals = %lu", np);
1068 1.1 tshiozak if (argv[2])
1069 1.1 tshiozak printf(", plural = %lu",
1070 1.1 tshiozak calculate_plural(pelem, atoi(argv[2])));
1071 1.1 tshiozak free_parser_element(pelem);
1072 1.1 tshiozak }
1073 1.1 tshiozak printf("\n");
1074 1.1 tshiozak
1075 1.1 tshiozak
1076 1.1 tshiozak return EXIT_SUCCESS;
1077 1.1 tshiozak }
1078 1.1 tshiozak #endif /* TEST_PARSE_PLURAL */
1079 1.1 tshiozak
1080 1.1 tshiozak /*
1081 1.1 tshiozak * external interface
1082 1.1 tshiozak */
1083 1.1 tshiozak
1084 1.1 tshiozak int
1085 1.1 tshiozak _gettext_parse_plural(struct gettext_plural **rpe, unsigned long *rnp,
1086 1.1 tshiozak const char *str, size_t len)
1087 1.1 tshiozak {
1088 1.1 tshiozak return parse_plural((struct parser_element **)rpe, rnp, str, len);
1089 1.1 tshiozak }
1090 1.1 tshiozak
1091 1.1 tshiozak unsigned long
1092 1.1 tshiozak _gettext_calculate_plural(const struct gettext_plural *pe, unsigned long n)
1093 1.1 tshiozak {
1094 1.1 tshiozak return calculate_plural((void *)__UNCONST(pe), n);
1095 1.1 tshiozak }
1096 1.1 tshiozak
1097 1.1 tshiozak void
1098 1.1 tshiozak _gettext_free_plural(struct gettext_plural *pe)
1099 1.1 tshiozak {
1100 1.1 tshiozak free_parser_element((void *)pe);
1101 1.1 tshiozak }
1102 1.1 tshiozak
1103 1.1 tshiozak #ifdef TEST_PLURAL
1104 1.1 tshiozak #include <libintl.h>
1105 1.1 tshiozak #include <locale.h>
1106 1.1 tshiozak
1107 1.1 tshiozak #define PR(n) printf("n=%d: \"%s\"\n", n, dngettext("test", "1", "2", n))
1108 1.1 tshiozak
1109 1.1 tshiozak int
1110 1.1 tshiozak main(void)
1111 1.1 tshiozak {
1112 1.1 tshiozak bindtextdomain("test", "."); /* ./LANG/LC_MESSAGES/test.mo */
1113 1.1 tshiozak PR(1);
1114 1.1 tshiozak PR(2);
1115 1.1 tshiozak PR(3);
1116 1.1 tshiozak PR(4);
1117 1.1 tshiozak
1118 1.1 tshiozak return 0;
1119 1.1 tshiozak }
1120 1.1 tshiozak #endif
1121