1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include <string.h>
26
27#include "util/ralloc.h"
28#include "util/bitscan.h"
29#include "compiler/nir/nir.h"
30
31#include "ppir.h"
32
33static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
34{
35   ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
36   if (!node)
37      return NULL;
38
39   ppir_dest *dest = ppir_node_get_dest(node);
40   dest->type = ppir_target_ssa;
41   dest->ssa.num_components = ssa->num_components;
42   dest->ssa.live_in = INT_MAX;
43   dest->ssa.live_out = 0;
44   dest->write_mask = u_bit_consecutive(0, ssa->num_components);
45
46   if (node->type == ppir_node_type_load ||
47       node->type == ppir_node_type_store)
48      dest->ssa.is_head = true;
49
50   return node;
51}
52
53static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
54                                  nir_reg_dest *reg, unsigned mask)
55{
56   ppir_node *node = ppir_node_create(block, op, reg->reg->index, mask);
57   if (!node)
58      return NULL;
59
60   ppir_dest *dest = ppir_node_get_dest(node);
61
62   list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
63      if (r->index == reg->reg->index) {
64         dest->reg = r;
65         break;
66      }
67   }
68
69   dest->type = ppir_target_register;
70   dest->write_mask = mask;
71
72   if (node->type == ppir_node_type_load ||
73       node->type == ppir_node_type_store)
74      dest->reg->is_head = true;
75
76   return node;
77}
78
79static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
80                                   nir_dest *dest, unsigned mask)
81{
82   unsigned index = -1;
83
84   if (dest) {
85      if (dest->is_ssa)
86         return ppir_node_create_ssa(block, op, &dest->ssa);
87      else
88         return ppir_node_create_reg(block, op, &dest->reg, mask);
89   }
90
91   return ppir_node_create(block, op, index, 0);
92}
93
94static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
95                              ppir_src *ps, nir_src *ns, unsigned mask)
96{
97   ppir_node *child = NULL;
98
99   if (ns->is_ssa) {
100      child = comp->var_nodes[ns->ssa->index];
101      ppir_node_add_dep(node, child);
102   }
103   else {
104      nir_register *reg = ns->reg.reg;
105      while (mask) {
106         int swizzle = ps->swizzle[u_bit_scan(&mask)];
107         child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
108         ppir_node_add_dep(node, child);
109      }
110   }
111
112   ppir_dest *dest = ppir_node_get_dest(child);
113   ppir_node_target_assign(ps, dest);
114}
115
116static int nir_to_ppir_opcodes[nir_num_opcodes] = {
117   /* not supported */
118   [0 ... nir_last_opcode] = -1,
119
120   [nir_op_fmov] = ppir_op_mov,
121   [nir_op_imov] = ppir_op_mov,
122   [nir_op_fmul] = ppir_op_mul,
123   [nir_op_fadd] = ppir_op_add,
124   [nir_op_fdot2] = ppir_op_dot2,
125   [nir_op_fdot3] = ppir_op_dot3,
126   [nir_op_fdot4] = ppir_op_dot4,
127   [nir_op_frsq] = ppir_op_rsqrt,
128   [nir_op_flog2] = ppir_op_log2,
129   [nir_op_fexp2] = ppir_op_exp2,
130   [nir_op_fsqrt] = ppir_op_sqrt,
131   [nir_op_fsin] = ppir_op_sin,
132   [nir_op_fcos] = ppir_op_cos,
133   [nir_op_fmax] = ppir_op_max,
134   [nir_op_fmin] = ppir_op_min,
135   [nir_op_frcp] = ppir_op_rcp,
136   [nir_op_ffloor] = ppir_op_floor,
137   [nir_op_fceil] = ppir_op_ceil,
138   [nir_op_ffract] = ppir_op_fract,
139   [nir_op_fand] = ppir_op_and,
140   [nir_op_for] = ppir_op_or,
141   [nir_op_fxor] = ppir_op_xor,
142   [nir_op_sge] = ppir_op_ge,
143   [nir_op_fge] = ppir_op_ge,
144   [nir_op_slt] = ppir_op_lt,
145   [nir_op_flt] = ppir_op_lt,
146   [nir_op_seq] = ppir_op_eq,
147   [nir_op_feq] = ppir_op_eq,
148   [nir_op_sne] = ppir_op_ne,
149   [nir_op_fne] = ppir_op_ne,
150   [nir_op_fnot] = ppir_op_not,
151   [nir_op_fcsel] = ppir_op_select,
152   [nir_op_inot] = ppir_op_not,
153   [nir_op_ftrunc] = ppir_op_trunc,
154};
155
156static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
157{
158   nir_alu_instr *instr = nir_instr_as_alu(ni);
159   int op = nir_to_ppir_opcodes[instr->op];
160
161   if (op < 0) {
162      ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
163      return NULL;
164   }
165
166   ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
167                                               instr->dest.write_mask);
168   if (!node)
169      return NULL;
170
171   ppir_dest *pd = &node->dest;
172   nir_alu_dest *nd = &instr->dest;
173   if (nd->saturate)
174      pd->modifier = ppir_outmod_clamp_fraction;
175
176   unsigned src_mask;
177   switch (op) {
178   case ppir_op_dot2:
179      src_mask = 0b0011;
180      break;
181   case ppir_op_dot3:
182      src_mask = 0b0111;
183      break;
184   case ppir_op_dot4:
185      src_mask = 0b1111;
186      break;
187   default:
188      src_mask = pd->write_mask;
189      break;
190   }
191
192   unsigned num_child = nir_op_infos[instr->op].num_inputs;
193   node->num_src = num_child;
194
195   for (int i = 0; i < num_child; i++) {
196      nir_alu_src *ns = instr->src + i;
197      ppir_src *ps = node->src + i;
198      memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
199      ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
200
201      ps->absolute = ns->abs;
202      ps->negate = ns->negate;
203   }
204
205   return &node->node;
206}
207
208static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
209{
210   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
211   unsigned mask = 0;
212   ppir_load_node *lnode;
213   ppir_store_node *snode;
214
215   switch (instr->intrinsic) {
216   case nir_intrinsic_load_input:
217      if (!instr->dest.is_ssa)
218         mask = u_bit_consecutive(0, instr->num_components);
219
220      lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
221      if (!lnode)
222         return NULL;
223
224      lnode->num_components = instr->num_components;
225      lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
226      return &lnode->node;
227
228   case nir_intrinsic_load_frag_coord:
229      if (!instr->dest.is_ssa)
230         mask = u_bit_consecutive(0, instr->num_components);
231
232      lnode = ppir_node_create_dest(block, ppir_op_load_fragcoord, &instr->dest, mask);
233      if (!lnode)
234         return NULL;
235
236      lnode->num_components = instr->num_components;
237      return &lnode->node;
238
239   case nir_intrinsic_load_uniform:
240      if (!instr->dest.is_ssa)
241         mask = u_bit_consecutive(0, instr->num_components);
242
243      lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
244      if (!lnode)
245         return NULL;
246
247      lnode->num_components = instr->num_components;
248      lnode->index = nir_intrinsic_base(instr);
249      lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
250
251      return &lnode->node;
252
253   case nir_intrinsic_store_output:
254      snode = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
255      if (!snode)
256         return NULL;
257
258      snode->index = nir_intrinsic_base(instr);
259
260      for (int i = 0; i < instr->num_components; i++)
261         snode->src.swizzle[i] = i;
262
263      ppir_node_add_src(block->comp, &snode->node, &snode->src, instr->src,
264                        u_bit_consecutive(0, instr->num_components));
265
266      return &snode->node;
267
268   default:
269      ppir_error("unsupported nir_intrinsic_instr %d\n", instr->intrinsic);
270      return NULL;
271   }
272}
273
274static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
275{
276   nir_load_const_instr *instr = nir_instr_as_load_const(ni);
277   ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
278   if (!node)
279      return NULL;
280
281   assert(instr->def.bit_size == 32);
282
283   for (int i = 0; i < instr->def.num_components; i++)
284      node->constant.value[i].i = instr->value[i].i32;
285   node->constant.num = instr->def.num_components;
286
287   return &node->node;
288}
289
290static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
291{
292   ppir_error("nir_ssa_undef_instr not support\n");
293   return NULL;
294}
295
296static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
297{
298   nir_tex_instr *instr = nir_instr_as_tex(ni);
299   ppir_load_texture_node *node;
300
301   if (instr->op != nir_texop_tex) {
302      ppir_error("unsupported texop %d\n", instr->op);
303      return NULL;
304   }
305
306   node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, 0);
307   if (!node)
308      return NULL;
309
310   node->sampler = instr->texture_index;
311
312   switch (instr->sampler_dim) {
313   case GLSL_SAMPLER_DIM_2D:
314   case GLSL_SAMPLER_DIM_RECT:
315   case GLSL_SAMPLER_DIM_EXTERNAL:
316      break;
317   default:
318      ppir_debug("unsupported sampler dim: %d\n", instr->sampler_dim);
319      return NULL;
320   }
321
322   node->sampler_dim = instr->sampler_dim;
323
324   for (int i = 0; i < instr->coord_components; i++)
325         node->src_coords.swizzle[i] = i;
326
327   assert(instr->num_srcs == 1);
328   for (int i = 0; i < instr->num_srcs; i++) {
329      switch (instr->src[i].src_type) {
330      case nir_tex_src_coord:
331         ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
332                           u_bit_consecutive(0, instr->coord_components));
333         break;
334      default:
335         ppir_debug("unknown texture source");
336         return NULL;
337      }
338   }
339
340   return &node->node;
341}
342
343static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
344{
345   ppir_error("nir_jump_instr not support\n");
346   return NULL;
347}
348
349static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
350   [nir_instr_type_alu]        = ppir_emit_alu,
351   [nir_instr_type_intrinsic]  = ppir_emit_intrinsic,
352   [nir_instr_type_load_const] = ppir_emit_load_const,
353   [nir_instr_type_ssa_undef]  = ppir_emit_ssa_undef,
354   [nir_instr_type_tex]        = ppir_emit_tex,
355   [nir_instr_type_jump]       = ppir_emit_jump,
356};
357
358static ppir_block *ppir_block_create(ppir_compiler *comp)
359{
360   ppir_block *block = rzalloc(comp, ppir_block);
361   if (!block)
362      return NULL;
363
364   list_inithead(&block->node_list);
365   list_inithead(&block->instr_list);
366
367   return block;
368}
369
370static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
371{
372   ppir_block *block = ppir_block_create(comp);
373   if (!block)
374      return false;
375
376   list_addtail(&block->list, &comp->block_list);
377   block->comp = comp;
378
379   nir_foreach_instr(instr, nblock) {
380      assert(instr->type < nir_instr_type_phi);
381      ppir_node *node = ppir_emit_instr[instr->type](block, instr);
382      if (node)
383         list_addtail(&node->list, &block->node_list);
384   }
385
386   return true;
387}
388
389static bool ppir_emit_if(ppir_compiler *comp, nir_if *nif)
390{
391   ppir_error("if nir_cf_node not support\n");
392   return false;
393}
394
395static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
396{
397   ppir_error("loop nir_cf_node not support\n");
398   return false;
399}
400
401static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
402{
403   ppir_error("function nir_cf_node not support\n");
404   return false;
405}
406
407static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
408{
409   foreach_list_typed(nir_cf_node, node, node, list) {
410      bool ret;
411
412      switch (node->type) {
413      case nir_cf_node_block:
414         ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
415         break;
416      case nir_cf_node_if:
417         ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
418         break;
419      case nir_cf_node_loop:
420         ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
421         break;
422      case nir_cf_node_function:
423         ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
424         break;
425      default:
426         ppir_error("unknown NIR node type %d\n", node->type);
427         return false;
428      }
429
430      if (!ret)
431         return false;
432   }
433
434   return true;
435}
436
437static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
438{
439   ppir_compiler *comp = rzalloc_size(
440      prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
441   if (!comp)
442      return NULL;
443
444   list_inithead(&comp->block_list);
445   list_inithead(&comp->reg_list);
446
447   comp->var_nodes = (ppir_node **)(comp + 1);
448   comp->reg_base = num_ssa;
449   comp->prog = prog;
450   return comp;
451}
452
453bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
454                      struct ra_regs *ra)
455{
456   nir_function_impl *func = nir_shader_get_entrypoint(nir);
457   ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
458   if (!comp)
459      return false;
460
461   comp->ra = ra;
462
463   foreach_list_typed(nir_register, reg, node, &func->registers) {
464      ppir_reg *r = rzalloc(comp, ppir_reg);
465      if (!r)
466         return false;
467
468      r->index = reg->index;
469      r->num_components = reg->num_components;
470      r->live_in = INT_MAX;
471      r->live_out = 0;
472      r->is_head = false;
473      list_addtail(&r->list, &comp->reg_list);
474   }
475
476   if (!ppir_emit_cf_list(comp, &func->body))
477      goto err_out0;
478   ppir_node_print_prog(comp);
479
480   if (!ppir_lower_prog(comp))
481      goto err_out0;
482
483   if (!ppir_node_to_instr(comp))
484      goto err_out0;
485
486   if (!ppir_schedule_prog(comp))
487      goto err_out0;
488
489   if (!ppir_regalloc_prog(comp))
490      goto err_out0;
491
492   if (!ppir_codegen_prog(comp))
493      goto err_out0;
494
495   ralloc_free(comp);
496   return true;
497
498err_out0:
499   ralloc_free(comp);
500   return false;
501}
502
503