1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include "util/ralloc.h"
26
27#include "gpir.h"
28#include "lima_context.h"
29
30static gpir_node *
31gpir_lower_create_insert_node(gpir_node *parent, gpir_node *child,
32                              gpir_node *child2, gpir_op op)
33{
34   gpir_node *node = gpir_node_create(parent->block, op);
35   if (!node)
36      return NULL;
37
38   gpir_alu_node *alu = gpir_node_to_alu(node);
39   alu->children[0] = child;
40   alu->children[1] = child2;
41   alu->num_child = 2;
42   gpir_node_insert_child(parent, child, node);
43   gpir_node_add_dep(node, child2, GPIR_DEP_INPUT);
44   list_addtail(&node->list, &parent->list);
45   return node;
46}
47
48static bool gpir_lower_viewport_transform(gpir_compiler *comp)
49{
50   gpir_node *rcpw = NULL;
51
52   /* rcpw = 1 / w */
53   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
54      list_for_each_entry(gpir_node, node, &block->node_list, list) {
55         if (node->op == gpir_op_store_varying) {
56            gpir_store_node *store = gpir_node_to_store(node);
57            if (store->index == 0 && store->component == 3) {
58               gpir_node *w = store->child;
59
60               rcpw = gpir_node_create(block, gpir_op_rcp);
61               if (!rcpw)
62                  return false;
63               list_addtail(&rcpw->list, &node->list);
64
65               gpir_alu_node *alu = gpir_node_to_alu(rcpw);
66               alu->children[0] = w;
67               alu->num_child = 1;
68               store->child = rcpw;
69
70               gpir_node_insert_child(node, w, rcpw);
71               goto found;
72            }
73         }
74      }
75   }
76
77found:
78   assert(rcpw);
79
80   /* xyz = xyz * rcpw * scale + transition */
81   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
82      list_for_each_entry(gpir_node, node, &block->node_list, list) {
83         if (node->op == gpir_op_store_varying) {
84            gpir_store_node *store = gpir_node_to_store(node);
85            if (store->index == 0 && store->component < 3) {
86               gpir_node *xyz = store->child;
87
88               gpir_node *mul1 =
89                  gpir_lower_create_insert_node(node, xyz, rcpw, gpir_op_mul);
90               if (!mul1)
91                  return false;
92
93               gpir_load_node *scale = gpir_node_create(block, gpir_op_load_uniform);
94               if (!scale)
95                  return false;
96               scale->index = comp->constant_base;
97               scale->component = store->component;
98               list_addtail(&scale->node.list, &node->list);
99
100               gpir_node *mul2 =
101                  gpir_lower_create_insert_node(node, mul1, &scale->node, gpir_op_mul);
102               if (!mul2)
103                  return false;
104
105               gpir_load_node *translate = gpir_node_create(block, gpir_op_load_uniform);
106               if (!translate)
107                  return false;
108               translate->index = comp->constant_base + 1;
109               translate->component = store->component;
110               list_addtail(&translate->node.list, &node->list);
111
112               gpir_node *add =
113                  gpir_lower_create_insert_node(node, mul2, &translate->node, gpir_op_add);
114               if (!add)
115                  return false;
116
117               store->child = add;
118            }
119         }
120      }
121   }
122
123   comp->constant_base += 2;
124   return true;
125}
126
127static bool gpir_lower_const(gpir_compiler *comp)
128{
129   int num_constant = 0;
130   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
131      list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
132         if (node->op == gpir_op_const) {
133            if (gpir_node_is_root(node))
134               gpir_node_delete(node);
135            else
136               num_constant++;
137         }
138      }
139   }
140
141   if (num_constant) {
142      union fi *constant = ralloc_array(comp->prog, union fi, num_constant);
143      if (!constant)
144         return false;
145
146      comp->prog->constant = constant;
147      comp->prog->constant_size = num_constant * sizeof(union fi);
148
149      int index = 0;
150      list_for_each_entry(gpir_block, block, &comp->block_list, list) {
151         list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
152            if (node->op == gpir_op_const) {
153               gpir_const_node *c = gpir_node_to_const(node);
154
155               if (!gpir_node_is_root(node)) {
156                  gpir_load_node *load = gpir_node_create(block, gpir_op_load_uniform);
157                  if (unlikely(!load))
158                     return false;
159
160                  load->index = comp->constant_base + (index >> 2);
161                  load->component = index % 4;
162                  constant[index++] = c->value;
163
164                  gpir_node_replace_succ(&load->node, node);
165
166                  list_addtail(&load->node.list, &node->list);
167
168                  gpir_debug("lower const create uniform %d for const %d\n",
169                             load->node.index, node->index);
170               }
171
172               gpir_node_delete(node);
173            }
174         }
175      }
176   }
177
178   return true;
179}
180
181/* duplicate load to all its successors */
182static bool gpir_lower_load(gpir_compiler *comp)
183{
184   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
185      list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
186         if (node->type == gpir_node_type_load) {
187            gpir_load_node *load = gpir_node_to_load(node);
188
189            bool first = true;
190            gpir_node_foreach_succ_safe(node, dep) {
191               gpir_node *succ = dep->succ;
192
193               if (first) {
194                  first = false;
195                  continue;
196               }
197
198               gpir_node *new = gpir_node_create(succ->block, node->op);
199               if (unlikely(!new))
200                  return false;
201               list_addtail(&new->list, &succ->list);
202
203               gpir_debug("lower load create %d from %d for succ %d\n",
204                          new->index, node->index, succ->index);
205
206               gpir_load_node *nload = gpir_node_to_load(new);
207               nload->index = load->index;
208               nload->component = load->component;
209               if (load->reg) {
210                  nload->reg = load->reg;
211                  list_addtail(&nload->reg_link, &load->reg->uses_list);
212               }
213
214               gpir_node_replace_pred(dep, new);
215               gpir_node_replace_child(succ, node, new);
216            }
217         }
218      }
219   }
220
221   return true;
222}
223
224static bool gpir_lower_neg(gpir_block *block, gpir_node *node)
225{
226   gpir_alu_node *neg = gpir_node_to_alu(node);
227   gpir_node *child = neg->children[0];
228
229   /* check if child can dest negate */
230   if (child->type == gpir_node_type_alu) {
231      /* negate must be its only successor */
232      if (list_is_singular(&child->succ_list) &&
233          gpir_op_infos[child->op].dest_neg) {
234         gpir_alu_node *alu = gpir_node_to_alu(child);
235         alu->dest_negate = !alu->dest_negate;
236
237         gpir_node_replace_succ(child, node);
238         gpir_node_delete(node);
239         return true;
240      }
241   }
242
243   /* check if child can src negate */
244   gpir_node_foreach_succ_safe(node, dep) {
245      gpir_node *succ = dep->succ;
246      if (succ->type != gpir_node_type_alu)
247         continue;
248
249      bool success = true;
250      gpir_alu_node *alu = gpir_node_to_alu(dep->succ);
251      for (int i = 0; i < alu->num_child; i++) {
252         if (alu->children[i] == node) {
253            if (gpir_op_infos[succ->op].src_neg[i]) {
254               alu->children_negate[i] = !alu->children_negate[i];
255               alu->children[i] = child;
256            }
257            else
258               success = false;
259         }
260      }
261
262      if (success)
263         gpir_node_replace_pred(dep, child);
264   }
265
266   if (gpir_node_is_root(node))
267      gpir_node_delete(node);
268
269   return true;
270}
271
272static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
273{
274   gpir_alu_node *alu = gpir_node_to_alu(node);
275   gpir_node *child = alu->children[0];
276
277   gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
278   if (unlikely(!complex2))
279      return false;
280
281   complex2->children[0] = child;
282   complex2->num_child = 1;
283   gpir_node_add_dep(&complex2->node, child, GPIR_DEP_INPUT);
284   list_addtail(&complex2->node.list, &node->list);
285
286   int impl_op = 0;
287   switch (node->op) {
288   case gpir_op_rcp:
289      impl_op = gpir_op_rcp_impl;
290      break;
291   case gpir_op_rsqrt:
292      impl_op = gpir_op_rsqrt_impl;
293      break;
294   default:
295      assert(0);
296   }
297
298   gpir_alu_node *impl = gpir_node_create(block, impl_op);
299   if (unlikely(!impl))
300      return false;
301
302   impl->children[0] = child;
303   impl->num_child = 1;
304   gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
305   list_addtail(&impl->node.list, &node->list);
306
307   /* change node to complex1 node */
308   node->op = gpir_op_complex1;
309   alu->children[0] = &impl->node;
310   alu->children[1] = &complex2->node;
311   alu->children[2] = child;
312   alu->num_child = 3;
313   gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT);
314   gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT);
315
316   return true;
317}
318
319static bool gpir_lower_node_may_consume_two_slots(gpir_compiler *comp)
320{
321   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
322      list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
323         if (gpir_op_infos[node->op].may_consume_two_slots) {
324            /* dummy_f/m are auxiliary nodes for value reg alloc:
325             * 1. before reg alloc, create fake nodes dummy_f, dummy_m,
326             *    so the tree become: (dummy_m (node dummy_f))
327             *    dummy_m can be spilled, but other nodes in the tree can't
328             *    be spilled.
329             * 2. After reg allocation and fake dep add, merge all deps of
330             *    dummy_m and dummy_f to node and remove dummy_m & dummy_f
331             *
332             * We may also not use dummy_f/m, but alloc two value reg for
333             * node. But that means we need to make sure there're 2 free
334             * slot after the node successors, but we just need one slot
335             * after to be able to schedule it because we can use one move for
336             * the two slot node. It's also not easy to handle the spill case
337             * for the alloc 2 value method.
338             *
339             * With the dummy_f/m method, there's no such requirement, the
340             * node can be scheduled only when there's two slots for it,
341             * otherwise a move. And the node can be spilled with one reg.
342             */
343            gpir_node *dummy_m = gpir_node_create(block, gpir_op_dummy_m);
344            if (unlikely(!dummy_m))
345               return false;
346            list_add(&dummy_m->list, &node->list);
347
348            gpir_node *dummy_f = gpir_node_create(block, gpir_op_dummy_f);
349            if (unlikely(!dummy_f))
350               return false;
351            list_add(&dummy_f->list, &node->list);
352
353            gpir_alu_node *alu = gpir_node_to_alu(dummy_m);
354            alu->children[0] = node;
355            alu->children[1] = dummy_f;
356            alu->num_child = 2;
357
358            gpir_node_replace_succ(dummy_m, node);
359            gpir_node_add_dep(dummy_m, node, GPIR_DEP_INPUT);
360            gpir_node_add_dep(dummy_m, dummy_f, GPIR_DEP_INPUT);
361
362         }
363      }
364   }
365
366   return true;
367}
368
369/*
370 * There are no 'equal' or 'not-equal' opcodes.
371 * eq (a == b) is lowered to and(a >= b, b >= a)
372 * ne (a != b) is lowered to or(a < b, b < a)
373 */
374static bool gpir_lower_eq_ne(gpir_block *block, gpir_node *node)
375{
376   gpir_op cmp_node_op;
377   gpir_op node_new_op;
378   switch (node->op) {
379      case gpir_op_eq:
380         cmp_node_op = gpir_op_ge;
381         node_new_op = gpir_op_min; /* and */
382         break;
383      case gpir_op_ne:
384         cmp_node_op = gpir_op_lt;
385         node_new_op = gpir_op_max; /* or */
386         break;
387      default:
388         assert(0);
389   }
390
391   gpir_alu_node *e = gpir_node_to_alu(node);
392
393   gpir_alu_node *cmp1 = gpir_node_create(block, cmp_node_op);
394   list_addtail(&cmp1->node.list, &node->list);
395   gpir_alu_node *cmp2 = gpir_node_create(block, cmp_node_op);
396   list_addtail(&cmp2->node.list, &node->list);
397
398   cmp1->children[0] = e->children[0];
399   cmp1->children[1] = e->children[1];
400   cmp1->num_child = 2;
401
402   cmp2->children[0] = e->children[1];
403   cmp2->children[1] = e->children[0];
404   cmp2->num_child = 2;
405
406   gpir_node_add_dep(&cmp1->node, e->children[0], GPIR_DEP_INPUT);
407   gpir_node_add_dep(&cmp1->node, e->children[1], GPIR_DEP_INPUT);
408
409   gpir_node_add_dep(&cmp2->node, e->children[0], GPIR_DEP_INPUT);
410   gpir_node_add_dep(&cmp2->node, e->children[1], GPIR_DEP_INPUT);
411
412   gpir_node_foreach_pred_safe(node, dep) {
413      gpir_node_remove_dep(node, dep->pred);
414   }
415
416   gpir_node_add_dep(node, &cmp1->node, GPIR_DEP_INPUT);
417   gpir_node_add_dep(node, &cmp2->node, GPIR_DEP_INPUT);
418
419   node->op = node_new_op;
420   e->children[0] = &cmp1->node;
421   e->children[1] = &cmp2->node;
422   e->num_child = 2;
423
424   return true;
425}
426
427/*
428 * There is no 'abs' opcode.
429 * abs(a) is lowered to max(a, -a)
430 */
431static bool gpir_lower_abs(gpir_block *block, gpir_node *node)
432{
433   gpir_alu_node *alu = gpir_node_to_alu(node);
434
435   assert(node->op == gpir_op_abs);
436
437   node->op = gpir_op_max;
438
439   alu->children[1] = alu->children[0];
440   alu->children_negate[1] = true;
441   alu->num_child = 2;
442
443   return true;
444}
445
446/*
447 * There is no 'not' opcode.
448 * not(a) is lowered to add(1, -a)
449 */
450static bool gpir_lower_not(gpir_block *block, gpir_node *node)
451{
452   gpir_alu_node *alu = gpir_node_to_alu(node);
453
454   assert(alu->node.op == gpir_op_not);
455
456   node->op = gpir_op_add;
457
458   gpir_node *node_const = gpir_node_create(block, gpir_op_const);
459   gpir_const_node *c = gpir_node_to_const(node_const);
460
461   assert(c->node.op == gpir_op_const);
462
463   list_addtail(&c->node.list, &node->list);
464   c->value.f = 1.0f;
465   gpir_node_add_dep(&alu->node, &c->node, GPIR_DEP_INPUT);
466
467   alu->children_negate[1] = !alu->children_negate[0];
468   alu->children[1] = alu->children[0];
469   alu->children[0] = &c->node;
470   alu->num_child = 2;
471
472   return true;
473}
474
475
476static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = {
477   [gpir_op_not] = gpir_lower_not,
478};
479
480static bool (*gpir_post_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = {
481   [gpir_op_neg] = gpir_lower_neg,
482   [gpir_op_rcp] = gpir_lower_complex,
483   [gpir_op_rsqrt] = gpir_lower_complex,
484   [gpir_op_eq] = gpir_lower_eq_ne,
485   [gpir_op_ne] = gpir_lower_eq_ne,
486   [gpir_op_abs] = gpir_lower_abs,
487};
488
489bool gpir_pre_rsched_lower_prog(gpir_compiler *comp)
490{
491   if (!gpir_lower_viewport_transform(comp))
492      return false;
493
494   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
495      list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
496         if (gpir_pre_rsched_lower_funcs[node->op] &&
497             !gpir_pre_rsched_lower_funcs[node->op](block, node))
498            return false;
499      }
500   }
501
502   if (!gpir_lower_const(comp))
503      return false;
504
505   if (!gpir_lower_load(comp))
506      return false;
507
508   gpir_debug("pre rsched lower prog\n");
509   gpir_node_print_prog_seq(comp);
510   return true;
511}
512
513bool gpir_post_rsched_lower_prog(gpir_compiler *comp)
514{
515   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
516      list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
517         if (gpir_post_rsched_lower_funcs[node->op] &&
518             !gpir_post_rsched_lower_funcs[node->op](block, node))
519            return false;
520      }
521   }
522
523   if (!gpir_lower_node_may_consume_two_slots(comp))
524      return false;
525
526   gpir_debug("post rsched lower prog\n");
527   gpir_node_print_prog_seq(comp);
528   return true;
529}
530