1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include <limits.h>
26
27#include "gpir.h"
28
29/*
30 * GP schedule algorithm (by Connor Abbott <cwabbott0@gmail.com>)
31 *
32 * Pre schedule phase:
33 * 1. order all nodes in a sequence
34 * 2. convert the real reg read/write to GP load/store node, now all
35 *    variable is SSA
36 * 3. do reg alloc for all SSA with 11 reg (value reg) and spill with
37 *    load/store to real reg if needed
38 * 4. add fake dependency like this:
39 *    after step 3, node sequence is
40 *      01: r1=r2+r3
41 *      02: r4=r1+r2
42 *      03: r1=r5+r6
43 *    we should add a fake dependency of node 3 to node 2 like a
44 *    write-after-read dep. But this is not really write-after-read
45 *    dep because there's no r1 really, because it's a value register.
46 *    We need this fake dep in the schedule phase to make sure in any
47 *    schedule point, there're only <=11 input needed by the past
48 *    scheduled nodes.
49 * 5. build DAG according to all the real and fake dep
50 *
51 * Schedule phase:
52 * 1. Compute the nodes ready to schedule, if no nodes, exit
53 * 2. Create a new GP instruction, and call it as current instr
54 * 3. For any nodes with a use 2 cycles ago with a definition ready to
55 *    schedule, schedule that definition immediately if possible, or else
56 *    schedule a move.
57 * 4. For any nodes with a use 2 cycles ago but the definition not
58 *    scheduled and not ready to schedule, schedule a move immediately
59 *    to prevent the value from falling off the queue.
60 * 5. Calculate the number of remaining nodes with a use 1 cycle ago but
61 *    the definition not yet scheduled, and if there are more than 5,
62 *    schedule moves or definitions for the rest now.
63 * 6. Schedule the rest of the available nodes using your favorite heuristic
64 *    to current instr.
65 * 7. go to step 1
66 *
67 * Step 5 for the current instruction guarantees that steps 3 and 4 for
68 * the next instruction will always succeed, so it's only step 5 that can
69 * possibly fail. Now, note that the nodes whose definitions have not yet
70 * been scheduled but one or more use has been scheduled, are exactly the
71 * nodes that are live in the final schedule. Therefore there will never
72 * be more than 11 of them (guarenteed by the 11 value reg alloc and the
73 * fake dep added before schedule). The worst case for step 5 is that all of
74 * these nodes had a use 1 cycle ago, which means that none of them hit
75 * case 3 or 4 already, so there are 6 slots still available so step 5
76 * will always succeed. In general, even if there are exactly 11 values
77 * live, if n are scheduled in steps 3 and 4, there are 11-n left in step
78 * 4 so at most 11-n-5 = 6-n are scheduled in step 5 and therefore 6 are
79 * scheduled total, below the limit. So the algorithm will always succeed.
80 */
81
82static int gpir_min_dist_alu(gpir_dep *dep)
83{
84   switch (dep->pred->op) {
85   case gpir_op_load_uniform:
86   case gpir_op_load_temp:
87   case gpir_op_load_reg:
88   case gpir_op_load_attribute:
89      return 0;
90
91   case gpir_op_complex1:
92      return 2;
93
94   default:
95      return 1;
96   }
97}
98
99static int gpir_get_min_dist(gpir_dep *dep)
100{
101   switch (dep->type) {
102   case GPIR_DEP_INPUT:
103      switch (dep->succ->op) {
104      case gpir_op_store_temp:
105      case gpir_op_store_reg:
106      case gpir_op_store_varying:
107         /* store must use alu node as input */
108         if (dep->pred->type == gpir_node_type_load)
109            return INT_MAX >> 2;
110         else
111            return 0;
112
113      default:
114         return gpir_min_dist_alu(dep);
115      }
116
117   case GPIR_DEP_OFFSET:
118      assert(dep->succ->op == gpir_op_store_temp);
119      return gpir_min_dist_alu(dep);
120
121   case GPIR_DEP_READ_AFTER_WRITE:
122      switch (dep->succ->op) {
123      case gpir_op_load_temp:
124         assert(dep->pred->op == gpir_op_store_temp);
125         return 4;
126      case gpir_op_load_reg:
127         assert(dep->pred->op == gpir_op_store_reg);
128         return 3;
129      case gpir_op_load_uniform:
130         assert(dep->pred->op == gpir_op_store_temp_load_off0 ||
131                dep->pred->op == gpir_op_store_temp_load_off1 ||
132                dep->pred->op == gpir_op_store_temp_load_off2);
133         return 4;
134      default:
135         assert(0);
136      }
137
138   case GPIR_DEP_WRITE_AFTER_READ:
139      switch (dep->pred->op) {
140      case gpir_op_load_temp:
141         assert(dep->succ->op == gpir_op_store_temp);
142         return -3;
143      case gpir_op_load_reg:
144         assert(dep->succ->op == gpir_op_store_reg);
145         return -2;
146      case gpir_op_load_uniform:
147         assert(dep->succ->op == gpir_op_store_temp_load_off0 ||
148                dep->succ->op == gpir_op_store_temp_load_off1 ||
149                dep->succ->op == gpir_op_store_temp_load_off2);
150         return -3;
151      default:
152         assert(0);
153      }
154
155   case GPIR_DEP_VREG_WRITE_AFTER_READ:
156      return 0;
157
158   case GPIR_DEP_VREG_READ_AFTER_WRITE:
159      assert(0); /* not possible, this is GPIR_DEP_INPUT */
160   }
161
162   return 0;
163}
164
165static int gpir_max_dist_alu(gpir_dep *dep)
166{
167   switch (dep->pred->op) {
168   case gpir_op_load_uniform:
169   case gpir_op_load_temp:
170      return 0;
171   case gpir_op_load_attribute:
172      return 1;
173   case gpir_op_load_reg:
174      if (dep->pred->sched.pos < GPIR_INSTR_SLOT_REG0_LOAD0 ||
175          dep->pred->sched.pos > GPIR_INSTR_SLOT_REG0_LOAD3)
176         return 0;
177      else
178         return 1;
179   case gpir_op_exp2_impl:
180   case gpir_op_log2_impl:
181   case gpir_op_rcp_impl:
182   case gpir_op_rsqrt_impl:
183   case gpir_op_store_temp_load_off0:
184   case gpir_op_store_temp_load_off1:
185   case gpir_op_store_temp_load_off2:
186      return 1;
187   case gpir_op_mov:
188      if (dep->pred->sched.pos == GPIR_INSTR_SLOT_COMPLEX)
189         return 1;
190      else
191         return 2;
192   default:
193      return 2;
194   }
195}
196
197static int gpir_get_max_dist(gpir_dep *dep)
198{
199   switch (dep->type) {
200   case GPIR_DEP_INPUT:
201      switch (dep->succ->op) {
202      case gpir_op_store_temp:
203      case gpir_op_store_reg:
204      case gpir_op_store_varying:
205         return 0;
206
207      default:
208         return gpir_max_dist_alu(dep);
209      }
210
211   case GPIR_DEP_OFFSET:
212      assert(dep->succ->op == gpir_op_store_temp);
213      return gpir_max_dist_alu(dep);
214
215   default:
216      return INT_MAX >> 2; /* Don't want to overflow... */
217   }
218}
219
220static void schedule_update_distance(gpir_node *node)
221{
222   if (gpir_node_is_leaf(node)) {
223      node->sched.dist = 0;
224      return;
225   }
226
227   gpir_node_foreach_pred(node, dep) {
228      gpir_node *pred = dep->pred;
229
230      if (pred->sched.dist < 0)
231         schedule_update_distance(pred);
232
233      int dist = pred->sched.dist + 1;
234      if (node->sched.dist < dist)
235         node->sched.dist = dist;
236   }
237}
238
239static void schedule_insert_ready_list(struct list_head *ready_list,
240                                       gpir_node *insert_node)
241{
242   /* if this node is fully ready or partially ready
243    *   fully ready: all successors have been scheduled
244    *   partially ready: part of input successors have been scheduled
245    *
246    * either fully ready or partially ready node need be inserted to
247    * the ready list, but we only schedule a move node for partially
248    * ready node.
249    */
250   bool ready = true, insert = false;
251   gpir_node_foreach_succ(insert_node, dep) {
252      gpir_node *succ = dep->succ;
253      if (succ->sched.instr >= 0) {
254         if (dep->type == GPIR_DEP_INPUT)
255            insert = true;
256      }
257      else
258         ready = false;
259   }
260
261   insert_node->sched.ready = ready;
262   /* for root node */
263   insert |= ready;
264
265   if (!insert || insert_node->sched.inserted)
266      return;
267
268   struct list_head *insert_pos = ready_list;
269   list_for_each_entry(gpir_node, node, ready_list, list) {
270      if (insert_node->sched.dist > node->sched.dist) {
271         insert_pos = &node->list;
272         break;
273      }
274   }
275
276   list_addtail(&insert_node->list, insert_pos);
277   insert_node->sched.inserted = true;
278}
279
280static int gpir_get_max_start(gpir_node *node)
281{
282   int max_start = 0;
283
284   /* find the max start instr constrainted by all successors */
285   gpir_node_foreach_succ(node, dep) {
286      gpir_node *succ = dep->succ;
287      if (succ->sched.instr < 0)
288         continue;
289
290      int start = succ->sched.instr + gpir_get_min_dist(dep);
291      if (start > max_start)
292         max_start = start;
293   }
294
295   return max_start;
296}
297
298static int gpir_get_min_end(gpir_node *node)
299{
300   int min_end = INT_MAX;
301
302   /* find the min end instr constrainted by all successors */
303   gpir_node_foreach_succ(node, dep) {
304      gpir_node *succ = dep->succ;
305      if (succ->sched.instr < 0)
306         continue;
307
308      int end = succ->sched.instr + gpir_get_max_dist(dep);
309      if (end < min_end)
310         min_end = end;
311   }
312
313   return min_end;
314}
315
316static gpir_node *gpir_sched_instr_has_load(gpir_instr *instr, gpir_node *node)
317{
318   gpir_load_node *load = gpir_node_to_load(node);
319
320   for (int i = GPIR_INSTR_SLOT_REG0_LOAD0; i <= GPIR_INSTR_SLOT_MEM_LOAD3; i++) {
321      if (!instr->slots[i])
322         continue;
323
324      gpir_load_node *iload = gpir_node_to_load(instr->slots[i]);
325      if (load->node.op == iload->node.op &&
326          load->index == iload->index &&
327          load->component == iload->component)
328         return &iload->node;
329   }
330   return NULL;
331}
332
333static bool schedule_try_place_node(gpir_instr *instr, gpir_node *node)
334{
335   if (node->type == gpir_node_type_load) {
336      gpir_node *load = gpir_sched_instr_has_load(instr, node);
337      if (load) {
338         gpir_debug("same load %d in instr %d for node %d\n",
339                    load->index, instr->index, node->index);
340
341         /* not really merge two node, just fake scheduled same place */
342         node->sched.instr = load->sched.instr;
343         node->sched.pos = load->sched.pos;
344         return true;
345      }
346   }
347
348   node->sched.instr = instr->index;
349
350   int *slots = gpir_op_infos[node->op].slots;
351   for (int i = 0; slots[i] != GPIR_INSTR_SLOT_END; i++) {
352      node->sched.pos = slots[i];
353      if (node->sched.instr >= gpir_get_max_start(node) &&
354          node->sched.instr <= gpir_get_min_end(node) &&
355          gpir_instr_try_insert_node(instr, node))
356         return true;
357   }
358
359   node->sched.instr = -1;
360   node->sched.pos = -1;
361   return false;
362}
363
364static gpir_node *schedule_create_move_node(gpir_node *node)
365{
366   gpir_alu_node *move = gpir_node_create(node->block, gpir_op_mov);
367   if (unlikely(!move))
368      return NULL;
369
370   move->children[0] = node;
371   move->num_child = 1;
372
373   move->node.sched.instr = -1;
374   move->node.sched.pos = -1;
375   move->node.sched.dist = node->sched.dist;
376
377   gpir_debug("create move %d for %d\n", move->node.index, node->index);
378   return &move->node;
379}
380
381static gpir_node *gpir_sched_node(gpir_instr *instr, gpir_node *node)
382{
383   if (node->op == gpir_op_mov) {
384      gpir_node *child = gpir_node_to_alu(node)->children[0];
385      gpir_node_foreach_succ_safe(node, dep) {
386         gpir_node *succ = dep->succ;
387         if (succ->sched.instr < 0 ||
388             instr->index < succ->sched.instr + gpir_get_min_dist(dep)) {
389            gpir_node_replace_pred(dep, child);
390            if (dep->type == GPIR_DEP_INPUT)
391               gpir_node_replace_child(succ, node, child);
392         }
393      }
394      MAYBE_UNUSED bool result = schedule_try_place_node(instr, node);
395      assert(result);
396      return node;
397   }
398   else {
399      gpir_node *move = schedule_create_move_node(node);
400      list_del(&node->list);
401      node->sched.ready = false;
402      node->sched.inserted = false;
403      gpir_node_replace_succ(move, node);
404      gpir_node_add_dep(move, node, GPIR_DEP_INPUT);
405      return move;
406   }
407}
408
409static bool gpir_is_input_node(gpir_node *node)
410{
411   gpir_node_foreach_succ(node, dep) {
412      if (dep->type == GPIR_DEP_INPUT)
413         return true;
414   }
415   return false;
416}
417
418static int gpir_get_min_scheduled_succ(gpir_node *node)
419{
420   int min = INT_MAX;
421   gpir_node_foreach_succ(node, dep) {
422      gpir_node *succ = dep->succ;
423      if (succ->sched.instr >= 0 && dep->type == GPIR_DEP_INPUT) {
424         if (min > succ->sched.instr)
425            min = succ->sched.instr;
426      }
427   }
428   return min;
429}
430
431static gpir_node *gpir_sched_instr_pass(gpir_instr *instr,
432                                        struct list_head *ready_list)
433{
434   /* fully ready node reach its max dist with any of its successor */
435   list_for_each_entry_safe(gpir_node, node, ready_list, list) {
436      if (node->sched.ready) {
437         int end = gpir_get_min_end(node);
438         assert(end >= instr->index);
439         if (instr->index < end)
440            continue;
441
442         gpir_debug("fully ready max node %d\n", node->index);
443
444         if (schedule_try_place_node(instr, node))
445            return node;
446
447         return gpir_sched_node(instr, node);
448      }
449   }
450
451   /* partially ready node reach its max dist with any of its successor */
452   list_for_each_entry_safe(gpir_node, node, ready_list, list) {
453      if (!node->sched.ready) {
454         int end = gpir_get_min_end(node);
455         assert(end >= instr->index);
456         if (instr->index < end)
457            continue;
458
459         gpir_debug("partially ready max node %d\n", node->index);
460
461         return gpir_sched_node(instr, node);
462      }
463   }
464
465   /* schedule node used by previous instr when count > 5 */
466   int count = 0;
467   gpir_node *two_slot_node = NULL;
468   list_for_each_entry(gpir_node, node, ready_list, list) {
469      if (gpir_is_input_node(node)) {
470         int min = gpir_get_min_scheduled_succ(node);
471         assert(min >= instr->index - 1);
472         if (min == instr->index - 1) {
473            if (gpir_op_infos[node->op].may_consume_two_slots) {
474               two_slot_node = node;
475               count += 2;
476            }
477            else
478               count++;
479         }
480      }
481   }
482
483   if (count > 5) {
484      /* When no slot avaible, must schedule a move for two slot node
485       * to reduce the count. This results from the dummy_m/f method.
486       */
487      if (gpir_instr_alu_slot_is_full(instr)) {
488         assert(two_slot_node);
489         gpir_debug("instr is full, schedule move node for two slot node %d\n",
490                    two_slot_node->index);
491         return gpir_sched_node(instr, two_slot_node);
492      }
493
494      /* schedule fully ready node first */
495      list_for_each_entry(gpir_node, node, ready_list, list) {
496         if (gpir_is_input_node(node)) {
497            int min = gpir_get_min_scheduled_succ(node);
498            if (min == instr->index - 1 && node->sched.ready) {
499               gpir_debug(">5 ready node %d\n", node->index);
500
501               if (schedule_try_place_node(instr, node))
502                  return node;
503            }
504         }
505      }
506
507      /* no fully ready node be scheduled, schedule partially ready node */
508      list_for_each_entry_safe(gpir_node, node, ready_list, list) {
509         if (gpir_is_input_node(node)) {
510            int min = gpir_get_min_scheduled_succ(node);
511            if (min == instr->index - 1 && !node->sched.ready) {
512               gpir_debug(">5 partially ready node %d\n", node->index);
513
514               return gpir_sched_node(instr, node);
515            }
516         }
517      }
518
519      /* finally schedule move for fully ready node */
520      list_for_each_entry_safe(gpir_node, node, ready_list, list) {
521         if (gpir_is_input_node(node)) {
522            int min = gpir_get_min_scheduled_succ(node);
523            if (min == instr->index - 1 && node->sched.ready) {
524               gpir_debug(">5 fully ready move node %d\n", node->index);
525
526               return gpir_sched_node(instr, node);
527            }
528         }
529      }
530   }
531
532   /* schedule remain fully ready nodes */
533   list_for_each_entry(gpir_node, node, ready_list, list) {
534      if (node->sched.ready) {
535         gpir_debug("remain fully ready node %d\n", node->index);
536
537         if (schedule_try_place_node(instr, node))
538            return node;
539      }
540   }
541
542   return NULL;
543}
544
545static void schedule_print_pre_one_instr(gpir_instr *instr,
546                                         struct list_head *ready_list)
547{
548   if (!(lima_debug & LIMA_DEBUG_GP))
549      return;
550
551   printf("instr %d for ready list:", instr->index);
552   list_for_each_entry(gpir_node, node, ready_list, list) {
553      printf(" %d/%c", node->index, node->sched.ready ? 'r' : 'p');
554   }
555   printf("\n");
556}
557
558static void schedule_print_post_one_instr(gpir_instr *instr)
559{
560   if (!(lima_debug & LIMA_DEBUG_GP))
561      return;
562
563   printf("post schedule instr");
564   for (int i = 0; i < GPIR_INSTR_SLOT_NUM; i++) {
565      if (instr->slots[i])
566         printf(" %d/%d", i, instr->slots[i]->index);
567   }
568   printf("\n");
569}
570
571
572static bool schedule_one_instr(gpir_block *block, struct list_head *ready_list)
573{
574   gpir_instr *instr = gpir_instr_create(block);
575   if (unlikely(!instr))
576      return false;
577
578   schedule_print_pre_one_instr(instr, ready_list);
579
580   while (true) {
581      gpir_node *node = gpir_sched_instr_pass(instr, ready_list);
582      if (!node)
583         break;
584
585      if (node->sched.instr < 0)
586         schedule_insert_ready_list(ready_list, node);
587      else {
588         list_del(&node->list);
589         list_add(&node->list, &block->node_list);
590
591         gpir_node_foreach_pred(node, dep) {
592            gpir_node *pred = dep->pred;
593            schedule_insert_ready_list(ready_list, pred);
594         }
595      }
596   }
597
598   schedule_print_post_one_instr(instr);
599   return true;
600}
601
602static bool schedule_block(gpir_block *block)
603{
604   /* calculate distance */
605   list_for_each_entry(gpir_node, node, &block->node_list, list) {
606      if (gpir_node_is_root(node))
607         schedule_update_distance(node);
608   }
609
610   struct list_head ready_list;
611   list_inithead(&ready_list);
612
613   /* construct the ready list from root nodes */
614   list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
615      if (gpir_node_is_root(node))
616         schedule_insert_ready_list(&ready_list, node);
617   }
618
619   list_inithead(&block->node_list);
620   while (!list_empty(&ready_list)) {
621      if (!schedule_one_instr(block, &ready_list))
622         return false;
623   }
624
625   return true;
626}
627
628static void schedule_build_vreg_dependency(gpir_block *block)
629{
630   gpir_node *regs[GPIR_VALUE_REG_NUM] = {0};
631   list_for_each_entry(gpir_node, node, &block->node_list, list) {
632      /* store node has no value reg assigned */
633      if (node->value_reg < 0)
634         continue;
635
636      gpir_node *reg = regs[node->value_reg];
637      if (reg) {
638         gpir_node_foreach_succ(reg, dep) {
639            /* write after read dep should only apply to real 'read' */
640            if (dep->type != GPIR_DEP_INPUT)
641               continue;
642
643            gpir_node *succ = dep->succ;
644            gpir_node_add_dep(node, succ, GPIR_DEP_VREG_WRITE_AFTER_READ);
645         }
646      }
647      regs[node->value_reg] = node;
648   }
649
650   /* merge dummy_f/m to the node created from */
651   list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
652      if (node->op == gpir_op_dummy_m) {
653         gpir_alu_node *alu = gpir_node_to_alu(node);
654         gpir_node *origin = alu->children[0];
655         gpir_node *dummy_f = alu->children[1];
656
657         gpir_node_foreach_succ(node, dep) {
658            gpir_node *succ = dep->succ;
659            /* origin and node may have same succ (by VREG/INPUT or
660             * VREG/VREG dep), so use gpir_node_add_dep() instead of
661             * gpir_node_replace_pred() */
662            gpir_node_add_dep(succ, origin, dep->type);
663            gpir_node_replace_child(succ, node, origin);
664         }
665         gpir_node_delete(dummy_f);
666         gpir_node_delete(node);
667      }
668   }
669}
670
671static void schedule_build_preg_dependency(gpir_compiler *comp)
672{
673   /* merge reg with the same index */
674   gpir_reg *regs[GPIR_VALUE_REG_NUM] = {0};
675   list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
676      if (!regs[reg->index])
677         regs[reg->index] = reg;
678      else {
679         list_splicetail(&reg->defs_list, &regs[reg->index]->defs_list);
680         list_splicetail(&reg->uses_list, &regs[reg->index]->uses_list);
681      }
682   }
683
684   /* calculate physical reg read/write dependency for load/store nodes */
685   for (int i = 0; i < GPIR_VALUE_REG_NUM; i++) {
686      gpir_reg *reg = regs[i];
687      if (!reg)
688         continue;
689
690      /* sort reg write */
691      struct list_head tmp_list;
692      list_replace(&reg->defs_list, &tmp_list);
693      list_inithead(&reg->defs_list);
694      list_for_each_entry_safe(gpir_store_node, store, &tmp_list, reg_link) {
695         struct list_head *insert_pos = &reg->defs_list;
696         list_for_each_entry(gpir_store_node, st, &reg->defs_list, reg_link) {
697            if (st->node.sched.index > store->node.sched.index) {
698               insert_pos = &st->reg_link;
699               break;
700            }
701         }
702         list_del(&store->reg_link);
703         list_addtail(&store->reg_link, insert_pos);
704      }
705
706      /* sort reg read */
707      list_replace(&reg->uses_list, &tmp_list);
708      list_inithead(&reg->uses_list);
709      list_for_each_entry_safe(gpir_load_node, load, &tmp_list, reg_link) {
710         struct list_head *insert_pos = &reg->uses_list;
711         list_for_each_entry(gpir_load_node, ld, &reg->uses_list, reg_link) {
712            if (ld->node.sched.index > load->node.sched.index) {
713               insert_pos = &ld->reg_link;
714               break;
715            }
716         }
717         list_del(&load->reg_link);
718         list_addtail(&load->reg_link, insert_pos);
719      }
720
721      /* insert dependency */
722      gpir_store_node *store =
723         list_first_entry(&reg->defs_list, gpir_store_node, reg_link);
724      gpir_store_node *next = store->reg_link.next != &reg->defs_list ?
725         list_first_entry(&store->reg_link, gpir_store_node, reg_link) : NULL;
726
727      list_for_each_entry(gpir_load_node, load, &reg->uses_list, reg_link) {
728         /* loop until load is between store and next */
729         while (next && next->node.sched.index < load->node.sched.index) {
730            store = next;
731            next = store->reg_link.next != &reg->defs_list ?
732               list_first_entry(&store->reg_link, gpir_store_node, reg_link) : NULL;
733         }
734
735         gpir_node_add_dep(&load->node, &store->node, GPIR_DEP_READ_AFTER_WRITE);
736         if (next)
737            gpir_node_add_dep(&next->node, &load->node, GPIR_DEP_WRITE_AFTER_READ);
738      }
739   }
740}
741
742static void print_statistic(gpir_compiler *comp, int save_index)
743{
744   int num_nodes[gpir_op_num] = {0};
745   int num_created_nodes[gpir_op_num] = {0};
746
747   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
748      list_for_each_entry(gpir_node, node, &block->node_list, list) {
749         num_nodes[node->op]++;
750         if (node->index >= save_index)
751            num_created_nodes[node->op]++;
752      }
753   }
754
755   printf("====== gpir scheduler statistic ======\n");
756   printf("---- how many nodes are scheduled ----\n");
757   int n = 0, l = 0;
758   for (int i = 0; i < gpir_op_num; i++) {
759      if (num_nodes[i]) {
760         printf("%10s:%-6d", gpir_op_infos[i].name, num_nodes[i]);
761         n += num_nodes[i];
762         if (!(++l % 4))
763            printf("\n");
764      }
765   }
766   if (l % 4)
767      printf("\n");
768   printf("\ntotal: %d\n", n);
769
770   printf("---- how many nodes are created ----\n");
771   n = l = 0;
772   for (int i = 0; i < gpir_op_num; i++) {
773      if (num_created_nodes[i]) {
774         printf("%10s:%-6d", gpir_op_infos[i].name, num_created_nodes[i]);
775         n += num_created_nodes[i];
776         if (!(++l % 4))
777            printf("\n");
778      }
779   }
780   if (l % 4)
781      printf("\n");
782   printf("\ntotal: %d\n", n);
783   printf("------------------------------------\n");
784}
785
786bool gpir_schedule_prog(gpir_compiler *comp)
787{
788   int save_index = comp->cur_index;
789
790   /* init schedule info */
791   int index = 0;
792   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
793      block->sched.instr_index = 0;
794      list_for_each_entry(gpir_node, node, &block->node_list, list) {
795         node->sched.instr = -1;
796         node->sched.pos = -1;
797         node->sched.index = index++;
798         node->sched.dist = -1;
799         node->sched.ready = false;
800         node->sched.inserted = false;
801      }
802   }
803
804   /* build fake/virtual dependency */
805   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
806      schedule_build_vreg_dependency(block);
807   }
808   schedule_build_preg_dependency(comp);
809
810   //gpir_debug("after scheduler build reg dependency\n");
811   //gpir_node_print_prog_dep(comp);
812
813   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
814      if (!schedule_block(block)) {
815         gpir_error("fail schedule block\n");
816         return false;
817      }
818   }
819
820   if (lima_debug & LIMA_DEBUG_GP) {
821      print_statistic(comp, save_index);
822      gpir_instr_print_prog(comp);
823   }
824
825   return true;
826}
827