lalr.c revision 1.11.8.1 1 /* $NetBSD: lalr.c,v 1.11.8.1 2025/08/02 05:20:54 perseant Exp $ */
2
3 #include "defs.h"
4 /* Id: lalr.c,v 1.14 2021/05/20 23:57:23 tom Exp */
5
6 #include <sys/cdefs.h>
7 __RCSID("$NetBSD: lalr.c,v 1.11.8.1 2025/08/02 05:20:54 perseant Exp $");
8
9 typedef struct shorts
10 {
11 struct shorts *next;
12 Value_t value;
13 }
14 shorts;
15
16 static Value_t map_goto(int state, int symbol);
17 static Value_t **transpose(Value_t **R, int n);
18 static void add_lookback_edge(int stateno, int ruleno, int gotono);
19 static void build_relations(void);
20 static void compute_FOLLOWS(void);
21 static void compute_lookaheads(void);
22 static void digraph(Value_t **relation);
23 static void initialize_F(void);
24 static void initialize_LA(void);
25 static void set_accessing_symbol(void);
26 static void set_goto_map(void);
27 static void set_maxrhs(void);
28 static void set_reduction_table(void);
29 static void set_shift_table(void);
30 static void set_state_table(void);
31 static void traverse(int i);
32
33 static int tokensetsize;
34 Value_t *lookaheads;
35 Value_t *LAruleno;
36 unsigned *LA;
37 Value_t *accessing_symbol;
38 core **state_table;
39 shifts **shift_table;
40 reductions **reduction_table;
41 Value_t *goto_base;
42 Value_t *goto_map;
43 Value_t *from_state;
44 Value_t *to_state;
45
46 static Value_t infinity;
47 static int maxrhs;
48 static Value_t ngotos;
49 static unsigned *F;
50 static Value_t **includes;
51 static shorts **lookback;
52 static Value_t **R;
53 static Value_t *INDEX;
54 static Value_t *VERTICES;
55 static Value_t top;
56
57 void
58 lalr(void)
59 {
60 tokensetsize = WORDSIZE(ntokens);
61
62 set_state_table();
63 set_accessing_symbol();
64 set_shift_table();
65 set_reduction_table();
66 set_maxrhs();
67 initialize_LA();
68 set_goto_map();
69 initialize_F();
70 build_relations();
71 compute_FOLLOWS();
72 compute_lookaheads();
73 }
74
75 static void
76 set_state_table(void)
77 {
78 core *sp;
79
80 state_table = NEW2(nstates, core *);
81 for (sp = first_state; sp; sp = sp->next)
82 state_table[sp->number] = sp;
83 }
84
85 static void
86 set_accessing_symbol(void)
87 {
88 core *sp;
89
90 accessing_symbol = NEW2(nstates, Value_t);
91 for (sp = first_state; sp; sp = sp->next)
92 accessing_symbol[sp->number] = sp->accessing_symbol;
93 }
94
95 static void
96 set_shift_table(void)
97 {
98 shifts *sp;
99
100 shift_table = NEW2(nstates, shifts *);
101 for (sp = first_shift; sp; sp = sp->next)
102 shift_table[sp->number] = sp;
103 }
104
105 static void
106 set_reduction_table(void)
107 {
108 reductions *rp;
109
110 reduction_table = NEW2(nstates, reductions *);
111 for (rp = first_reduction; rp; rp = rp->next)
112 reduction_table[rp->number] = rp;
113 }
114
115 static void
116 set_maxrhs(void)
117 {
118 Value_t *itemp;
119 Value_t *item_end;
120 int length;
121 int max;
122
123 length = 0;
124 max = 0;
125 item_end = ritem + nitems;
126 for (itemp = ritem; itemp < item_end; itemp++)
127 {
128 if (*itemp >= 0)
129 {
130 length++;
131 }
132 else
133 {
134 if (length > max)
135 max = length;
136 length = 0;
137 }
138 }
139
140 maxrhs = max;
141 }
142
143 static void
144 initialize_LA(void)
145 {
146 int i, j, k;
147 reductions *rp;
148
149 lookaheads = NEW2(nstates + 1, Value_t);
150
151 k = 0;
152 for (i = 0; i < nstates; i++)
153 {
154 lookaheads[i] = (Value_t)k;
155 rp = reduction_table[i];
156 if (rp)
157 k += rp->nreds;
158 }
159 lookaheads[nstates] = (Value_t)k;
160
161 LA = NEW2(k * tokensetsize, unsigned);
162 LAruleno = NEW2(k, Value_t);
163 lookback = NEW2(k, shorts *);
164
165 k = 0;
166 for (i = 0; i < nstates; i++)
167 {
168 rp = reduction_table[i];
169 if (rp)
170 {
171 for (j = 0; j < rp->nreds; j++)
172 {
173 LAruleno[k] = rp->rules[j];
174 k++;
175 }
176 }
177 }
178 }
179
180 static void
181 set_goto_map(void)
182 {
183 shifts *sp;
184 int i;
185 int symbol;
186 int k;
187 Value_t *temp_base;
188 Value_t *temp_map;
189 Value_t state2;
190
191 goto_base = NEW2(nvars + 1, Value_t);
192 temp_base = NEW2(nvars + 1, Value_t);
193
194 goto_map = goto_base - ntokens;
195 temp_map = temp_base - ntokens;
196
197 ngotos = 0;
198 for (sp = first_shift; sp; sp = sp->next)
199 {
200 for (i = sp->nshifts - 1; i >= 0; i--)
201 {
202 symbol = accessing_symbol[sp->shift[i]];
203
204 if (ISTOKEN(symbol))
205 break;
206
207 if (ngotos == MAXYYINT)
208 fatal("too many gotos");
209
210 ngotos++;
211 goto_map[symbol]++;
212 }
213 }
214
215 k = 0;
216 for (i = ntokens; i < nsyms; i++)
217 {
218 temp_map[i] = (Value_t)k;
219 k += goto_map[i];
220 }
221
222 for (i = ntokens; i < nsyms; i++)
223 goto_map[i] = temp_map[i];
224
225 goto_map[nsyms] = (Value_t)ngotos;
226 temp_map[nsyms] = (Value_t)ngotos;
227
228 from_state = NEW2(ngotos, Value_t);
229 to_state = NEW2(ngotos, Value_t);
230
231 for (sp = first_shift; sp; sp = sp->next)
232 {
233 Value_t state1 = sp->number;
234
235 for (i = sp->nshifts - 1; i >= 0; i--)
236 {
237 state2 = sp->shift[i];
238 symbol = accessing_symbol[state2];
239
240 if (ISTOKEN(symbol))
241 break;
242
243 k = temp_map[symbol]++;
244 from_state[k] = state1;
245 to_state[k] = state2;
246 }
247 }
248
249 FREE(temp_base);
250 }
251
252 /* Map_goto maps a state/symbol pair into its numeric representation. */
253
254 static Value_t
255 map_goto(int state, int symbol)
256 {
257 int low = goto_map[symbol];
258 int high = goto_map[symbol + 1];
259
260 for (;;)
261 {
262 int middle;
263 int s;
264
265 assert(low <= high);
266 middle = (low + high) >> 1;
267 s = from_state[middle];
268 if (s == state)
269 return (Value_t)(middle);
270 else if (s < state)
271 low = middle + 1;
272 else
273 high = middle - 1;
274 }
275 }
276
277 static void
278 initialize_F(void)
279 {
280 int i;
281 int j;
282 int k;
283 shifts *sp;
284 Value_t *edge;
285 unsigned *rowp;
286 Value_t *rp;
287 Value_t **reads;
288 int nedges;
289 int symbol;
290 int nwords;
291
292 nwords = ngotos * tokensetsize;
293 F = NEW2(nwords, unsigned);
294
295 reads = NEW2(ngotos, Value_t *);
296 edge = NEW2(ngotos + 1, Value_t);
297 nedges = 0;
298
299 rowp = F;
300 for (i = 0; i < ngotos; i++)
301 {
302 int stateno = to_state[i];
303
304 sp = shift_table[stateno];
305
306 if (sp)
307 {
308 k = sp->nshifts;
309
310 for (j = 0; j < k; j++)
311 {
312 symbol = accessing_symbol[sp->shift[j]];
313 if (ISVAR(symbol))
314 break;
315 SETBIT(rowp, symbol);
316 }
317
318 for (; j < k; j++)
319 {
320 symbol = accessing_symbol[sp->shift[j]];
321 if (nullable[symbol])
322 edge[nedges++] = map_goto(stateno, symbol);
323 }
324
325 if (nedges)
326 {
327 reads[i] = rp = NEW2(nedges + 1, Value_t);
328
329 for (j = 0; j < nedges; j++)
330 rp[j] = edge[j];
331
332 rp[nedges] = -1;
333 nedges = 0;
334 }
335 }
336
337 rowp += tokensetsize;
338 }
339
340 SETBIT(F, 0);
341 digraph(reads);
342
343 for (i = 0; i < ngotos; i++)
344 {
345 if (reads[i])
346 FREE(reads[i]);
347 }
348
349 FREE(reads);
350 FREE(edge);
351 }
352
353 static void
354 build_relations(void)
355 {
356 int i;
357 int j;
358 int k;
359 Value_t *rulep;
360 Value_t *rp;
361 shifts *sp;
362 int length;
363 int done_flag;
364 Value_t stateno;
365 int symbol2;
366 Value_t *shortp;
367 Value_t *edge;
368 Value_t *states;
369 Value_t **new_includes;
370
371 includes = NEW2(ngotos, Value_t *);
372 edge = NEW2(ngotos + 1, Value_t);
373 states = NEW2(maxrhs + 1, Value_t);
374
375 for (i = 0; i < ngotos; i++)
376 {
377 int nedges = 0;
378 int symbol1 = accessing_symbol[to_state[i]];
379 Value_t state1 = from_state[i];
380
381 for (rulep = derives[symbol1]; *rulep >= 0; rulep++)
382 {
383 length = 1;
384 states[0] = state1;
385 stateno = state1;
386
387 for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++)
388 {
389 symbol2 = *rp;
390 sp = shift_table[stateno];
391 k = sp->nshifts;
392
393 for (j = 0; j < k; j++)
394 {
395 stateno = sp->shift[j];
396 if (accessing_symbol[stateno] == symbol2)
397 break;
398 }
399
400 states[length++] = stateno;
401 }
402
403 add_lookback_edge(stateno, *rulep, i);
404
405 length--;
406 done_flag = 0;
407 while (!done_flag)
408 {
409 done_flag = 1;
410 rp--;
411 if (ISVAR(*rp))
412 {
413 stateno = states[--length];
414 edge[nedges++] = map_goto(stateno, *rp);
415 if (nullable[*rp] && length > 0)
416 done_flag = 0;
417 }
418 }
419 }
420
421 if (nedges)
422 {
423 includes[i] = shortp = NEW2(nedges + 1, Value_t);
424 for (j = 0; j < nedges; j++)
425 shortp[j] = edge[j];
426 shortp[nedges] = -1;
427 }
428 }
429
430 new_includes = transpose(includes, ngotos);
431
432 for (i = 0; i < ngotos; i++)
433 if (includes[i])
434 FREE(includes[i]);
435
436 FREE(includes);
437
438 includes = new_includes;
439
440 FREE(edge);
441 FREE(states);
442 }
443
444 static void
445 add_lookback_edge(int stateno, int ruleno, int gotono)
446 {
447 int i, k;
448 int found;
449 shorts *sp;
450
451 i = lookaheads[stateno];
452 k = lookaheads[stateno + 1];
453 found = 0;
454 while (!found && i < k)
455 {
456 if (LAruleno[i] == ruleno)
457 found = 1;
458 else
459 ++i;
460 }
461 assert(found);
462
463 sp = NEW(shorts);
464 sp->next = lookback[i];
465 sp->value = (Value_t)gotono;
466 lookback[i] = sp;
467 }
468
469 static Value_t **
470 transpose(Value_t **R2, int n)
471 {
472 Value_t **new_R;
473 Value_t **temp_R;
474 Value_t *nedges;
475 Value_t *sp;
476 int i;
477
478 nedges = NEW2(n, Value_t);
479
480 for (i = 0; i < n; i++)
481 {
482 sp = R2[i];
483 if (sp)
484 {
485 while (*sp >= 0)
486 nedges[*sp++]++;
487 }
488 }
489
490 new_R = NEW2(n, Value_t *);
491 temp_R = NEW2(n, Value_t *);
492
493 for (i = 0; i < n; i++)
494 {
495 int k = nedges[i];
496
497 if (k > 0)
498 {
499 sp = NEW2(k + 1, Value_t);
500 new_R[i] = sp;
501 temp_R[i] = sp;
502 sp[k] = -1;
503 }
504 }
505
506 FREE(nedges);
507
508 for (i = 0; i < n; i++)
509 {
510 sp = R2[i];
511 if (sp)
512 {
513 while (*sp >= 0)
514 *temp_R[*sp++]++ = (Value_t)i;
515 }
516 }
517
518 FREE(temp_R);
519
520 return (new_R);
521 }
522
523 static void
524 compute_FOLLOWS(void)
525 {
526 digraph(includes);
527 }
528
529 static void
530 compute_lookaheads(void)
531 {
532 int i, n;
533 unsigned *fp1, *fp2, *fp3;
534 shorts *sp, *next;
535 unsigned *rowp;
536
537 rowp = LA;
538 n = lookaheads[nstates];
539 for (i = 0; i < n; i++)
540 {
541 fp3 = rowp + tokensetsize;
542 for (sp = lookback[i]; sp; sp = sp->next)
543 {
544 fp1 = rowp;
545 fp2 = F + tokensetsize * sp->value;
546 while (fp1 < fp3)
547 *fp1++ |= *fp2++;
548 }
549 rowp = fp3;
550 }
551
552 for (i = 0; i < n; i++)
553 for (sp = lookback[i]; sp; sp = next)
554 {
555 next = sp->next;
556 FREE(sp);
557 }
558
559 FREE(lookback);
560 FREE(F);
561 }
562
563 static void
564 digraph(Value_t **relation)
565 {
566 int i;
567
568 infinity = (Value_t)(ngotos + 2);
569 INDEX = NEW2(ngotos + 1, Value_t);
570 VERTICES = NEW2(ngotos + 1, Value_t);
571 top = 0;
572
573 R = relation;
574
575 for (i = 0; i < ngotos; i++)
576 INDEX[i] = 0;
577
578 for (i = 0; i < ngotos; i++)
579 {
580 if (INDEX[i] == 0 && R[i])
581 traverse(i);
582 }
583
584 FREE(INDEX);
585 FREE(VERTICES);
586 }
587
588 static void
589 traverse(int i)
590 {
591 unsigned *fp1;
592 unsigned *fp2;
593 unsigned *fp3;
594 int j;
595 Value_t *rp;
596
597 Value_t height;
598 unsigned *base;
599
600 VERTICES[++top] = (Value_t)i;
601 INDEX[i] = height = top;
602
603 base = F + i * tokensetsize;
604 fp3 = base + tokensetsize;
605
606 rp = R[i];
607 if (rp)
608 {
609 while ((j = *rp++) >= 0)
610 {
611 if (INDEX[j] == 0)
612 traverse(j);
613
614 if (INDEX[i] > INDEX[j])
615 INDEX[i] = INDEX[j];
616
617 fp1 = base;
618 fp2 = F + j * tokensetsize;
619
620 while (fp1 < fp3)
621 *fp1++ |= *fp2++;
622 }
623 }
624
625 if (INDEX[i] == height)
626 {
627 for (;;)
628 {
629 j = VERTICES[top--];
630 INDEX[j] = infinity;
631
632 if (i == j)
633 break;
634
635 fp1 = base;
636 fp2 = F + j * tokensetsize;
637
638 while (fp1 < fp3)
639 *fp2++ = *fp1++;
640 }
641 }
642 }
643
644 #ifdef NO_LEAKS
645 void
646 lalr_leaks(void)
647 {
648 if (includes != 0)
649 {
650 int i;
651
652 for (i = 0; i < ngotos; i++)
653 {
654 free(includes[i]);
655 }
656 DO_FREE(includes);
657 }
658 }
659 #endif
660