1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include "util/u_math.h"
26#include "util/ralloc.h"
27#include "util/bitscan.h"
28
29#include "ppir.h"
30
31const ppir_op_info ppir_op_infos[] = {
32   [ppir_op_mov] = {
33      .name = "mov",
34      .slots = (int []) {
35         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
36         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
37         PPIR_INSTR_SLOT_END
38      },
39   },
40   [ppir_op_abs] = {
41      .name = "abs",
42   },
43   [ppir_op_neg] = {
44      .name = "neg",
45   },
46   [ppir_op_sat] = {
47      .name = "sat",
48   },
49   [ppir_op_mul] = {
50      .name = "mul",
51      .slots = (int []) {
52         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
53         PPIR_INSTR_SLOT_END
54      },
55   },
56   [ppir_op_add] = {
57      .name = "add",
58      .slots = (int []) {
59         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
60         PPIR_INSTR_SLOT_END
61      },
62   },
63   [ppir_op_sum3] = {
64      .name = "sum3",
65      .slots = (int []) {
66         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END
67      },
68   },
69   [ppir_op_sum4] = {
70      .name = "sum4",
71      .slots = (int []) {
72         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END
73      },
74   },
75   [ppir_op_rsqrt] = {
76      .name = "rsqrt",
77      .slots = (int []) {
78         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
79      },
80   },
81   [ppir_op_log2] = {
82      .name = "log2",
83      .slots = (int []) {
84         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
85      },
86   },
87   [ppir_op_exp2] = {
88      .name = "exp2",
89      .slots = (int []) {
90         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
91      },
92   },
93   [ppir_op_sqrt] = {
94      .name = "sqrt",
95      .slots = (int []) {
96         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
97      },
98   },
99   [ppir_op_sin] = {
100      .name = "sin",
101      .slots = (int []) {
102         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
103      },
104   },
105   [ppir_op_cos] = {
106      .name = "cos",
107      .slots = (int []) {
108         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
109      },
110   },
111   [ppir_op_max] = {
112      .name = "max",
113      .slots = (int []) {
114         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
115         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
116         PPIR_INSTR_SLOT_END
117      },
118   },
119   [ppir_op_min] = {
120      .name = "min",
121      .slots = (int []) {
122         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
123         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
124         PPIR_INSTR_SLOT_END
125      },
126   },
127   [ppir_op_floor] = {
128      .name = "floor",
129      .slots = (int []) {
130         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
131         PPIR_INSTR_SLOT_END
132      },
133   },
134   [ppir_op_ceil] = {
135      .name = "ceil",
136      .slots = (int []) {
137         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
138         PPIR_INSTR_SLOT_END
139      },
140   },
141   [ppir_op_fract] = {
142      .name = "fract",
143      .slots = (int []) {
144         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
145         PPIR_INSTR_SLOT_END
146      },
147   },
148   [ppir_op_ddx] = {
149      .name = "ddx",
150      .slots = (int []) {
151         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
152         PPIR_INSTR_SLOT_END
153      },
154   },
155   [ppir_op_ddy] = {
156      .name = "ddy",
157      .slots = (int []) {
158         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
159         PPIR_INSTR_SLOT_END
160      },
161   },
162   [ppir_op_and] = {
163      .name = "and",
164      .slots = (int []) {
165         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
166         PPIR_INSTR_SLOT_END
167      },
168   },
169   [ppir_op_or] = {
170      .name = "or",
171      .slots = (int []) {
172         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
173         PPIR_INSTR_SLOT_END
174      },
175   },
176   [ppir_op_xor] = {
177      .name = "xor",
178      .slots = (int []) {
179         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
180         PPIR_INSTR_SLOT_END
181      },
182   },
183   [ppir_op_not] = {
184      .name = "not",
185      .slots = (int []) {
186         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
187         PPIR_INSTR_SLOT_END
188      },
189   },
190   [ppir_op_lt] = {
191      .name = "lt",
192   },
193   [ppir_op_le] = {
194      .name = "le",
195   },
196   [ppir_op_gt] = {
197      .name = "gt",
198      .slots = (int []) {
199         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
200         PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
201         PPIR_INSTR_SLOT_END
202      },
203   },
204   [ppir_op_ge] = {
205      .name = "ge",
206      .slots = (int []) {
207         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
208         PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
209         PPIR_INSTR_SLOT_END
210      },
211   },
212   [ppir_op_eq] = {
213      .name = "eq",
214      .slots = (int []) {
215         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
216         PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
217         PPIR_INSTR_SLOT_END
218      },
219   },
220   [ppir_op_ne] = {
221      .name = "ne",
222      .slots = (int []) {
223         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
224         PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
225         PPIR_INSTR_SLOT_END
226      },
227   },
228   [ppir_op_select] = {
229      .name = "select",
230      .slots = (int []) {
231         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
232         PPIR_INSTR_SLOT_END
233      },
234   },
235   [ppir_op_rcp] = {
236      .name = "rcp",
237      .slots = (int []) {
238         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
239      },
240   },
241   [ppir_op_load_varying] = {
242      .name = "ld_var",
243      .type = ppir_node_type_load,
244      .slots = (int []) {
245         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
246      },
247   },
248   [ppir_op_load_coords] = {
249      .name = "ld_coords",
250      .type = ppir_node_type_load,
251      .slots = (int []) {
252         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
253      },
254   },
255   [ppir_op_load_coords_reg] = {
256      .name = "ld_coords_reg",
257      .type = ppir_node_type_load,
258      .slots = (int []) {
259         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
260      },
261   },
262   [ppir_op_load_fragcoord] = {
263      .name = "ld_fragcoord",
264      .type = ppir_node_type_load,
265      .slots = (int []) {
266         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
267      },
268   },
269   [ppir_op_load_pointcoord] = {
270      .name = "ld_pointcoord",
271      .type = ppir_node_type_load,
272      .slots = (int []) {
273         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
274      },
275   },
276   [ppir_op_load_frontface] = {
277      .name = "ld_frontface",
278      .type = ppir_node_type_load,
279      .slots = (int []) {
280         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
281      },
282   },
283   [ppir_op_load_uniform] = {
284      .name = "ld_uni",
285      .type = ppir_node_type_load,
286      .slots = (int []) {
287         PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END
288      },
289   },
290   [ppir_op_load_texture] = {
291      .name = "ld_tex",
292      .type = ppir_node_type_load_texture,
293      .slots = (int []) {
294         PPIR_INSTR_SLOT_TEXLD, PPIR_INSTR_SLOT_END
295      },
296   },
297   [ppir_op_load_temp] = {
298      .name = "ld_temp",
299      .type = ppir_node_type_load,
300      .slots = (int []) {
301         PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END
302      },
303   },
304   [ppir_op_const] = {
305      .name = "const",
306      .type = ppir_node_type_const,
307   },
308   [ppir_op_store_temp] = {
309      .name = "st_temp",
310      .type = ppir_node_type_store,
311      .slots = (int []) {
312         PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END
313      },
314   },
315   [ppir_op_discard] = {
316      .name = "discard",
317      .type = ppir_node_type_discard,
318      .slots = (int []) {
319         PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
320      },
321   },
322   [ppir_op_branch] = {
323      .name = "branch",
324      .type = ppir_node_type_branch,
325      .slots = (int []) {
326         PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
327      },
328   },
329   [ppir_op_undef] = {
330      .name = "undef",
331      .type = ppir_node_type_alu,
332      .slots = (int []) {
333      },
334   },
335   [ppir_op_dummy] = {
336      .name = "dummy",
337      .type = ppir_node_type_alu,
338      .slots = (int []) {
339      },
340   },
341};
342
343void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
344{
345   ppir_compiler *comp = block->comp;
346   static const int node_size[] = {
347      [ppir_node_type_alu] = sizeof(ppir_alu_node),
348      [ppir_node_type_const] = sizeof(ppir_const_node),
349      [ppir_node_type_load] = sizeof(ppir_load_node),
350      [ppir_node_type_store] = sizeof(ppir_store_node),
351      [ppir_node_type_load_texture] = sizeof(ppir_load_texture_node),
352      [ppir_node_type_discard] = sizeof(ppir_discard_node),
353      [ppir_node_type_branch] = sizeof(ppir_branch_node),
354   };
355
356   ppir_node_type type = ppir_op_infos[op].type;
357   int size = node_size[type];
358   ppir_node *node = rzalloc_size(block, size);
359   if (!node)
360      return NULL;
361
362   list_inithead(&node->succ_list);
363   list_inithead(&node->pred_list);
364
365   if (index >= 0) {
366      if (mask) {
367         /* reg has 4 slots for each component write node */
368         while (mask)
369            comp->var_nodes[(index << 2) + comp->reg_base + u_bit_scan(&mask)] = node;
370         snprintf(node->name, sizeof(node->name), "reg%d", index);
371      } else {
372         comp->var_nodes[index] = node;
373         snprintf(node->name, sizeof(node->name), "ssa%d", index);
374      }
375   }
376   else
377      snprintf(node->name, sizeof(node->name), "new");
378
379   node->op = op;
380   node->type = type;
381   node->index = comp->cur_index++;
382   node->block = block;
383
384   return node;
385}
386
387void ppir_node_add_dep(ppir_node *succ, ppir_node *pred,
388                       ppir_dep_type type)
389{
390   /* don't add dep for two nodes from different block */
391   if (succ->block != pred->block) {
392      pred->succ_different_block = true;
393      return;
394   }
395
396   /* don't add duplicated dep */
397   ppir_node_foreach_pred(succ, dep) {
398      if (dep->pred == pred)
399         return;
400   }
401
402   ppir_dep *dep = ralloc(succ, ppir_dep);
403   dep->pred = pred;
404   dep->succ = succ;
405   dep->type = type;
406   list_addtail(&dep->pred_link, &succ->pred_list);
407   list_addtail(&dep->succ_link, &pred->succ_list);
408}
409
410void ppir_node_remove_dep(ppir_dep *dep)
411{
412   list_del(&dep->succ_link);
413   list_del(&dep->pred_link);
414   ralloc_free(dep);
415}
416
417static void _ppir_node_replace_child(ppir_src *src, ppir_node *old_child, ppir_node *new_child)
418{
419   ppir_dest *od = ppir_node_get_dest(old_child);
420   if (ppir_node_target_equal(src, od)) {
421      ppir_node_target_assign(src, new_child);
422   }
423}
424
425void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child)
426{
427   switch (parent->type) {
428   case ppir_node_type_alu:
429   {
430      ppir_alu_node *alu = ppir_node_to_alu(parent);
431      for (int i = 0; i < alu->num_src; i++)
432         _ppir_node_replace_child(alu->src + i, old_child, new_child);
433      break;
434   }
435   case ppir_node_type_branch:
436   {
437      ppir_branch_node *branch = ppir_node_to_branch(parent);
438      for (int i = 0; i < 2; i++)
439         _ppir_node_replace_child(branch->src + i, old_child, new_child);
440      break;
441   }
442   case ppir_node_type_load:
443   {
444      ppir_load_node *load = ppir_node_to_load(parent);
445      _ppir_node_replace_child(&load->src, old_child, new_child);
446      break;
447   }
448   case ppir_node_type_load_texture:
449   {
450      ppir_load_texture_node *load_texture = ppir_node_to_load_texture(parent);
451      for (int i = 0; i < load_texture->num_src; i++)
452         _ppir_node_replace_child(ppir_node_get_src(parent, i), old_child, new_child);
453      break;
454   }
455   case ppir_node_type_store:
456   {
457      ppir_store_node *store = ppir_node_to_store(parent);
458      _ppir_node_replace_child(&store->src, old_child, new_child);
459      break;
460   }
461   default:
462      ppir_debug("unknown node type in %s\n", __func__);
463      break;
464   }
465}
466
467void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred)
468{
469   list_del(&dep->succ_link);
470   dep->pred = new_pred;
471   list_addtail(&dep->succ_link, &new_pred->succ_list);
472}
473
474ppir_dep *ppir_dep_for_pred(ppir_node *node, ppir_node *pred)
475{
476   if (!pred)
477      return NULL;
478
479   if (node->block != pred->block)
480      return NULL;
481
482   ppir_node_foreach_pred(node, dep) {
483      if (dep->pred == pred)
484         return dep;
485   }
486   return NULL;
487}
488
489void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src)
490{
491   ppir_node_foreach_succ_safe(src, dep) {
492      ppir_node_replace_pred(dep, dst);
493      ppir_node_replace_child(dep->succ, src, dst);
494   }
495}
496
497void ppir_node_delete(ppir_node *node)
498{
499   ppir_node_foreach_succ_safe(node, dep)
500      ppir_node_remove_dep(dep);
501
502   ppir_node_foreach_pred_safe(node, dep)
503      ppir_node_remove_dep(dep);
504
505   list_del(&node->list);
506   ralloc_free(node);
507}
508
509static void ppir_node_print_dest(ppir_dest *dest)
510{
511   switch (dest->type) {
512   case ppir_target_ssa:
513      printf("ssa%d", dest->ssa.index);
514      break;
515   case ppir_target_pipeline:
516      printf("pipeline %d", dest->pipeline);
517      break;
518   case ppir_target_register:
519      printf("reg %d", dest->reg->index);
520      break;
521   }
522}
523
524static void ppir_node_print_src(ppir_src *src)
525{
526   switch (src->type) {
527   case ppir_target_ssa: {
528      if (src->node)
529         printf("ssa node %d", src->node->index);
530      else
531         printf("ssa idx %d", src->ssa ? src->ssa->index : -1);
532      break;
533   }
534   case ppir_target_pipeline:
535      if (src->node)
536         printf("pipeline %d node %d", src->pipeline, src->node->index);
537      else
538         printf("pipeline %d", src->pipeline);
539      break;
540   case ppir_target_register:
541      printf("reg %d", src->reg->index);
542      break;
543   }
544}
545
546static void ppir_node_print_node(ppir_node *node, int space)
547{
548   for (int i = 0; i < space; i++)
549      printf(" ");
550
551   printf("%s%d: %s %s: ", node->printed && !ppir_node_is_leaf(node) ? "+" : "",
552          node->index, ppir_op_infos[node->op].name, node->name);
553
554   ppir_dest *dest = ppir_node_get_dest(node);
555   if (dest) {
556      printf("dest: ");
557      ppir_node_print_dest(dest);
558   }
559
560   if (ppir_node_get_src_num(node) > 0) {
561      printf(" src: ");
562   }
563   for (int i = 0; i < ppir_node_get_src_num(node); i++) {
564      ppir_node_print_src(ppir_node_get_src(node, i));
565      if (i != (ppir_node_get_src_num(node) - 1))
566         printf(", ");
567   }
568   printf("\n");
569
570   if (!node->printed) {
571      ppir_node_foreach_pred(node, dep) {
572         ppir_node *pred = dep->pred;
573         ppir_node_print_node(pred, space + 2);
574      }
575
576      node->printed = true;
577   }
578}
579
580void ppir_node_print_prog(ppir_compiler *comp)
581{
582   if (!(lima_debug & LIMA_DEBUG_PP))
583      return;
584
585   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
586      list_for_each_entry(ppir_node, node, &block->node_list, list) {
587         node->printed = false;
588      }
589   }
590
591   printf("========prog========\n");
592   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
593      printf("-------block %3d-------\n", block->index);
594      list_for_each_entry(ppir_node, node, &block->node_list, list) {
595         if (ppir_node_is_root(node))
596            ppir_node_print_node(node, 0);
597      }
598   }
599   printf("====================\n");
600}
601
602static ppir_node *ppir_node_insert_mov_local(ppir_node *node)
603{
604   ppir_node *move = ppir_node_create(node->block, ppir_op_mov, -1, 0);
605   if (unlikely(!move))
606      return NULL;
607
608   ppir_dest *dest = ppir_node_get_dest(node);
609   ppir_alu_node *alu = ppir_node_to_alu(move);
610   alu->dest = *dest;
611   alu->num_src = 1;
612   ppir_node_target_assign(alu->src, node);
613
614   for (int s = 0; s < 4; s++)
615      alu->src->swizzle[s] = s;
616
617   ppir_node_replace_all_succ(move, node);
618   ppir_node_add_dep(move, node, ppir_dep_src);
619   list_addtail(&move->list, &node->list);
620
621   if (node->is_end) {
622      node->is_end = false;
623      move->is_end = true;
624   }
625
626   return move;
627}
628
629ppir_node *ppir_node_insert_mov(ppir_node *old)
630{
631   ppir_node *move = ppir_node_insert_mov_local(old);
632   ppir_compiler *comp = old->block->comp;
633
634   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
635      if (old->block == block)
636         continue;
637      list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
638         for (int i = 0; i < ppir_node_get_src_num(node); i++){
639            ppir_src *src = ppir_node_get_src(node, i);
640            if (!src)
641               continue;
642            if (src->node == old)
643               ppir_node_target_assign(src, move);
644         }
645      }
646   }
647
648   return move;
649}
650
651bool ppir_node_has_single_src_succ(ppir_node *node)
652{
653   if (ppir_node_has_single_succ(node) &&
654       list_first_entry(&node->succ_list,
655                        ppir_dep, succ_link)->type == ppir_dep_src)
656      return true;
657
658   int cnt = 0;
659   ppir_node_foreach_succ(node, dep) {
660      if (dep->type != ppir_dep_src)
661         continue;
662      cnt++;
663   }
664
665   return cnt == 1;
666}
667