1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include "util/bitscan.h"
26#include "util/ralloc.h"
27
28#include "ppir.h"
29
30static bool ppir_lower_const(ppir_block *block, ppir_node *node)
31{
32   if (ppir_node_is_root(node)) {
33      ppir_node_delete(node);
34      return true;
35   }
36
37   assert(ppir_node_has_single_succ(node));
38
39   ppir_node *succ = ppir_node_first_succ(node);
40   ppir_dest *dest = ppir_node_get_dest(node);
41
42   switch (succ->type) {
43   case ppir_node_type_alu:
44   case ppir_node_type_branch:
45      /* ALU and branch can consume consts directly */
46      dest->type = ppir_target_pipeline;
47      /* Reg will be updated in node_to_instr later */
48      dest->pipeline = ppir_pipeline_reg_const0;
49
50      /* single succ can still have multiple references to this node */
51      for (int i = 0; i < ppir_node_get_src_num(succ); i++) {
52         ppir_src *src = ppir_node_get_src(succ, i);
53         if (src && src->node == node) {
54            src->type = ppir_target_pipeline;
55            src->pipeline = ppir_pipeline_reg_const0;
56         }
57      }
58      return true;
59   default:
60      /* Create a move for everyone else */
61      break;
62   }
63
64   ppir_node *move = ppir_node_insert_mov(node);
65   if (unlikely(!move))
66      return false;
67
68   ppir_debug("lower const create move %d for %d\n",
69              move->index, node->index);
70
71   /* Need to be careful with changing src/dst type here:
72    * it has to be done *after* successors have their children
73    * replaced, otherwise ppir_node_replace_child() won't find
74    * matching src/dst and as result won't work
75    */
76   ppir_src *mov_src = ppir_node_get_src(move, 0);
77   mov_src->type = dest->type = ppir_target_pipeline;
78   mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_const0;
79
80   return true;
81}
82
83static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)
84{
85   /* swapped op must be the next op */
86   node->op++;
87
88   assert(node->type == ppir_node_type_alu);
89   ppir_alu_node *alu = ppir_node_to_alu(node);
90   assert(alu->num_src == 2);
91
92   ppir_src tmp = alu->src[0];
93   alu->src[0] = alu->src[1];
94   alu->src[1] = tmp;
95   return true;
96}
97
98static bool ppir_lower_load(ppir_block *block, ppir_node *node)
99{
100   ppir_dest *dest = ppir_node_get_dest(node);
101   if (ppir_node_is_root(node) && dest->type == ppir_target_ssa) {
102      ppir_node_delete(node);
103      return true;
104   }
105
106   /* load can have multiple successors in case if we duplicated load node
107    * that has load node in source
108    */
109   if ((ppir_node_has_single_src_succ(node) || ppir_node_is_root(node)) &&
110      dest->type != ppir_target_register) {
111      ppir_node *succ = ppir_node_first_succ(node);
112      switch (succ->type) {
113      case ppir_node_type_alu:
114      case ppir_node_type_branch: {
115         /* single succ can still have multiple references to this node */
116         for (int i = 0; i < ppir_node_get_src_num(succ); i++) {
117            ppir_src *src = ppir_node_get_src(succ, i);
118            if (src && src->node == node) {
119               /* Can consume uniforms directly */
120               src->type = dest->type = ppir_target_pipeline;
121               src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform;
122            }
123         }
124         return true;
125      }
126      default:
127         /* Create mov for everyone else */
128         break;
129      }
130   }
131
132   ppir_node *move = ppir_node_insert_mov(node);
133   if (unlikely(!move))
134      return false;
135
136   ppir_src *mov_src = ppir_node_get_src(move, 0);
137   mov_src->type = dest->type = ppir_target_pipeline;
138   mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform;
139
140   return true;
141}
142
143static bool ppir_lower_ddxy(ppir_block *block, ppir_node *node)
144{
145   assert(node->type == ppir_node_type_alu);
146   ppir_alu_node *alu = ppir_node_to_alu(node);
147
148   alu->src[1] = alu->src[0];
149   if (node->op == ppir_op_ddx)
150      alu->src[1].negate = !alu->src[1].negate;
151   else if (node->op == ppir_op_ddy)
152      alu->src[0].negate = !alu->src[0].negate;
153   else
154      assert(0);
155
156   alu->num_src = 2;
157
158   return true;
159}
160
161static bool ppir_lower_texture(ppir_block *block, ppir_node *node)
162{
163   ppir_dest *dest = ppir_node_get_dest(node);
164
165   if (ppir_node_has_single_succ(node) && dest->type == ppir_target_ssa) {
166      ppir_node *succ = ppir_node_first_succ(node);
167      dest->type = ppir_target_pipeline;
168      dest->pipeline = ppir_pipeline_reg_sampler;
169
170      for (int i = 0; i < ppir_node_get_src_num(succ); i++) {
171         ppir_src *src = ppir_node_get_src(succ, i);
172         if (src && src->node == node) {
173            src->type = ppir_target_pipeline;
174            src->pipeline = ppir_pipeline_reg_sampler;
175         }
176      }
177      return true;
178   }
179
180   /* Create move node as fallback */
181   ppir_node *move = ppir_node_insert_mov(node);
182   if (unlikely(!move))
183      return false;
184
185   ppir_debug("lower texture create move %d for %d\n",
186              move->index, node->index);
187
188   ppir_src *mov_src = ppir_node_get_src(move, 0);
189   mov_src->type = dest->type = ppir_target_pipeline;
190   mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_sampler;
191
192   return true;
193}
194
195/* Check if the select condition and ensure it can be inserted to
196 * the scalar mul slot */
197static bool ppir_lower_select(ppir_block *block, ppir_node *node)
198{
199   ppir_alu_node *alu = ppir_node_to_alu(node);
200   ppir_src *src0 = &alu->src[0];
201   ppir_src *src1 = &alu->src[1];
202   ppir_src *src2 = &alu->src[2];
203
204   /* If the condition is already an alu scalar whose only successor
205    * is the select node, just turn it into pipeline output. */
206   /* The (src2->node == cond) case is a tricky exception.
207    * The reason is that we must force cond to output to ^fmul -- but
208    * then it no longer writes to a register and it is impossible to
209    * reference ^fmul in src2. So in that exceptional case, also fall
210    * back to the mov. */
211   ppir_node *cond = src0->node;
212   if (cond &&
213       cond->type == ppir_node_type_alu &&
214       ppir_node_has_single_succ(cond) &&
215       ppir_target_is_scalar(ppir_node_get_dest(cond)) &&
216       ppir_node_schedulable_slot(cond, PPIR_INSTR_SLOT_ALU_SCL_MUL) &&
217       src2->node != cond) {
218
219      ppir_dest *cond_dest = ppir_node_get_dest(cond);
220      cond_dest->type = ppir_target_pipeline;
221      cond_dest->pipeline = ppir_pipeline_reg_fmul;
222
223      ppir_node_target_assign(src0, cond);
224
225      /* src1 could also be a reference from the same node as
226       * the condition, so update it in that case. */
227      if (src1->node && src1->node == cond)
228         ppir_node_target_assign(src1, cond);
229
230      return true;
231   }
232
233   /* If the condition can't be used for any reason, insert a mov
234    * so that the condition can end up in ^fmul */
235   ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);
236   if (!move)
237      return false;
238   list_addtail(&move->list, &node->list);
239
240   ppir_alu_node *move_alu = ppir_node_to_alu(move);
241   ppir_src *move_src = move_alu->src;
242   move_src->type = src0->type;
243   move_src->ssa = src0->ssa;
244   move_src->swizzle[0] = src0->swizzle[0];
245   move_alu->num_src = 1;
246
247   ppir_dest *move_dest = &move_alu->dest;
248   move_dest->type = ppir_target_pipeline;
249   move_dest->pipeline = ppir_pipeline_reg_fmul;
250   move_dest->write_mask = 1;
251
252   ppir_node *pred = src0->node;
253   ppir_dep *dep = ppir_dep_for_pred(node, pred);
254   if (dep)
255      ppir_node_replace_pred(dep, move);
256   else
257      ppir_node_add_dep(node, move, ppir_dep_src);
258
259   /* pred can be a register */
260   if (pred)
261      ppir_node_add_dep(move, pred, ppir_dep_src);
262
263   ppir_node_target_assign(src0, move);
264
265   /* src1 could also be a reference from the same node as
266    * the condition, so update it in that case. */
267   if (src1->node && src1->node == pred)
268      ppir_node_target_assign(src1, move);
269
270   return true;
271}
272
273static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)
274{
275   /* Turn it into a mov with a round to integer output modifier */
276   ppir_alu_node *alu = ppir_node_to_alu(node);
277   ppir_dest *move_dest = &alu->dest;
278   move_dest->modifier = ppir_outmod_round;
279   node->op = ppir_op_mov;
280
281   return true;
282}
283
284static bool ppir_lower_abs(ppir_block *block, ppir_node *node)
285{
286   /* Turn it into a mov and set the absolute modifier */
287   ppir_alu_node *alu = ppir_node_to_alu(node);
288
289   assert(alu->num_src == 1);
290
291   alu->src[0].absolute = true;
292   alu->src[0].negate = false;
293   node->op = ppir_op_mov;
294
295   return true;
296}
297
298static bool ppir_lower_neg(ppir_block *block, ppir_node *node)
299{
300   /* Turn it into a mov and set the negate modifier */
301   ppir_alu_node *alu = ppir_node_to_alu(node);
302
303   assert(alu->num_src == 1);
304
305   alu->src[0].negate = !alu->src[0].negate;
306   node->op = ppir_op_mov;
307
308   return true;
309}
310
311static bool ppir_lower_sat(ppir_block *block, ppir_node *node)
312{
313   /* Turn it into a mov with the saturate output modifier */
314   ppir_alu_node *alu = ppir_node_to_alu(node);
315
316   assert(alu->num_src == 1);
317
318   ppir_dest *move_dest = &alu->dest;
319   move_dest->modifier = ppir_outmod_clamp_fraction;
320   node->op = ppir_op_mov;
321
322   return true;
323}
324
325static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
326{
327   ppir_branch_node *branch = ppir_node_to_branch(node);
328
329   /* Unconditional branch */
330   if (branch->num_src == 0)
331      return true;
332
333   ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
334
335   if (!zero)
336      return false;
337
338   zero->constant.value[0].f = 0;
339   zero->constant.num = 1;
340   zero->dest.type = ppir_target_pipeline;
341   zero->dest.pipeline = ppir_pipeline_reg_const0;
342   zero->dest.ssa.num_components = 1;
343   zero->dest.write_mask = 0x01;
344
345   /* For now we're just comparing branch condition with 0,
346    * in future we should look whether it's possible to move
347    * comparision node into branch itself and use current
348    * way as a fallback for complex conditions.
349    */
350   ppir_node_target_assign(&branch->src[1], &zero->node);
351
352   if (branch->negate)
353      branch->cond_eq = true;
354   else {
355      branch->cond_gt = true;
356      branch->cond_lt = true;
357   }
358
359   branch->num_src = 2;
360
361   ppir_node_add_dep(&branch->node, &zero->node, ppir_dep_src);
362   list_addtail(&zero->node.list, &node->list);
363
364   return true;
365}
366
367static bool ppir_lower_accum(ppir_block *block, ppir_node *node)
368{
369    /* If the last argument of a node placed in PPIR_INSTR_SLOT_ALU_SCL_ADD
370    * (or PPIR_INSTR_SLOT_ALU_VEC_ADD) is placed in
371    * PPIR_INSTR_SLOT_ALU_SCL_MUL (or PPIR_INSTR_SLOT_ALU_VEC_MUL) we cannot
372    * save a register (and an instruction) by using a pipeline register.
373    * Therefore it is interesting to make sure arguments of that type are
374    * the first argument by swapping arguments (if possible) */
375   ppir_alu_node *alu = ppir_node_to_alu(node);
376
377   assert(alu->num_src >= 2);
378
379   if (alu->src[0].type == ppir_target_pipeline)
380      return true;
381
382   if (alu->src[0].type == ppir_target_ssa) {
383      int *src_0_slots = ppir_op_infos[alu->src[0].node->op].slots;
384      if (src_0_slots) {
385         for (int i = 0; src_0_slots[i] != PPIR_INSTR_SLOT_END; i++) {
386            if ((src_0_slots[i] == PPIR_INSTR_SLOT_ALU_SCL_MUL) ||
387               (src_0_slots[i] == PPIR_INSTR_SLOT_ALU_VEC_MUL)) {
388               return true;
389            }
390         }
391      }
392   }
393
394   int src_to_swap = -1;
395   for (int j = 1; j < alu->num_src; j++) {
396      if (alu->src[j].type != ppir_target_ssa)
397         continue;
398      int *src_slots = ppir_op_infos[alu->src[j].node->op].slots;
399      if (!src_slots)
400         continue;
401      for (int i = 0; src_slots[i] != PPIR_INSTR_SLOT_END; i++) {
402         if ((src_slots[i] == PPIR_INSTR_SLOT_ALU_SCL_MUL) ||
403             (src_slots[i] == PPIR_INSTR_SLOT_ALU_VEC_MUL)) {
404            src_to_swap = j;
405            break;
406         }
407      }
408      if (src_to_swap > 0)
409         break;
410   }
411
412   if (src_to_swap < 0)
413      return true;
414
415   /* Swap arguments so that we can use a pipeline register later on */
416   ppir_src tmp = alu->src[0];
417   alu->src[0] = alu->src[src_to_swap];
418   alu->src[src_to_swap] = tmp;
419
420   return true;
421}
422
423static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
424   [ppir_op_abs] = ppir_lower_abs,
425   [ppir_op_neg] = ppir_lower_neg,
426   [ppir_op_const] = ppir_lower_const,
427   [ppir_op_ddx] = ppir_lower_ddxy,
428   [ppir_op_ddy] = ppir_lower_ddxy,
429   [ppir_op_lt] = ppir_lower_swap_args,
430   [ppir_op_le] = ppir_lower_swap_args,
431   [ppir_op_load_texture] = ppir_lower_texture,
432   [ppir_op_select] = ppir_lower_select,
433   [ppir_op_trunc] = ppir_lower_trunc,
434   [ppir_op_sat] = ppir_lower_sat,
435   [ppir_op_branch] = ppir_lower_branch,
436   [ppir_op_load_uniform] = ppir_lower_load,
437   [ppir_op_load_temp] = ppir_lower_load,
438   [ppir_op_add] = ppir_lower_accum,
439   [ppir_op_max] = ppir_lower_accum,
440   [ppir_op_min] = ppir_lower_accum,
441   [ppir_op_eq] = ppir_lower_accum,
442   [ppir_op_ne] = ppir_lower_accum,
443};
444
445bool ppir_lower_prog(ppir_compiler *comp)
446{
447   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
448      list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
449         if (ppir_lower_funcs[node->op] &&
450             !ppir_lower_funcs[node->op](block, node))
451            return false;
452      }
453   }
454
455   return true;
456}
457