1/*
2 * Copyright (c) 2017 Lima Project
3 * Copyright (c) 2013 Connor Abbott
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 * THE SOFTWARE.
22 *
23 */
24
25#ifndef LIMA_IR_GP_GPIR_H
26#define LIMA_IR_GP_GPIR_H
27
28#include "util/list.h"
29#include "util/u_math.h"
30
31#include "ir/lima_ir.h"
32
33/* list of operations that a node can do. */
34typedef enum {
35   gpir_op_mov,
36
37   /* mul ops */
38   gpir_op_mul,
39   gpir_op_select,
40   gpir_op_complex1,
41   gpir_op_complex2,
42
43   /* add ops */
44   gpir_op_add,
45   gpir_op_floor,
46   gpir_op_sign,
47   gpir_op_ge,
48   gpir_op_lt,
49   gpir_op_min,
50   gpir_op_max,
51   gpir_op_abs,
52   gpir_op_not,
53
54   /* mul/add ops */
55   gpir_op_neg,
56
57   /* passthrough ops */
58   gpir_op_clamp_const,
59   gpir_op_preexp2,
60   gpir_op_postlog2,
61
62   /* complex ops */
63   gpir_op_exp2_impl,
64   gpir_op_log2_impl,
65   gpir_op_rcp_impl,
66   gpir_op_rsqrt_impl,
67
68   /* load/store ops */
69   gpir_op_load_uniform,
70   gpir_op_load_temp,
71   gpir_op_load_attribute,
72   gpir_op_load_reg,
73   gpir_op_store_temp,
74   gpir_op_store_reg,
75   gpir_op_store_varying,
76   gpir_op_store_temp_load_off0,
77   gpir_op_store_temp_load_off1,
78   gpir_op_store_temp_load_off2,
79
80   /* branch */
81   gpir_op_branch_cond,
82
83   /* const (emulated) */
84   gpir_op_const,
85
86   /* emulated ops */
87   gpir_op_exp2,
88   gpir_op_log2,
89   gpir_op_rcp,
90   gpir_op_rsqrt,
91   gpir_op_ceil,
92   gpir_op_exp,
93   gpir_op_log,
94   gpir_op_sin,
95   gpir_op_cos,
96   gpir_op_tan,
97   gpir_op_branch_uncond,
98   gpir_op_eq,
99   gpir_op_ne,
100
101   /* auxiliary ops */
102   gpir_op_dummy_f,
103   gpir_op_dummy_m,
104
105   gpir_op_num,
106} gpir_op;
107
108typedef enum {
109   gpir_node_type_alu,
110   gpir_node_type_const,
111   gpir_node_type_load,
112   gpir_node_type_store,
113   gpir_node_type_branch,
114} gpir_node_type;
115
116typedef struct {
117   char *name;
118   bool dest_neg;
119   bool src_neg[4];
120   int *slots;
121   gpir_node_type type;
122   bool spillless;
123   bool schedule_first;
124   bool may_consume_two_slots;
125} gpir_op_info;
126
127extern const gpir_op_info gpir_op_infos[];
128
129typedef struct {
130   enum {
131      GPIR_DEP_INPUT,     /* def is the input of use */
132      GPIR_DEP_OFFSET,    /* def is the offset of use (i.e. temp store) */
133      GPIR_DEP_READ_AFTER_WRITE,
134      GPIR_DEP_WRITE_AFTER_READ,
135   } type;
136
137   /* node execute before succ */
138   struct gpir_node *pred;
139   /* node execute after pred */
140   struct gpir_node *succ;
141
142   /* for node pred_list */
143   struct list_head pred_link;
144   /* for ndoe succ_list */
145   struct list_head succ_link;
146} gpir_dep;
147
148struct gpir_instr;
149struct gpir_store_node;
150
151typedef struct gpir_node {
152   struct list_head list;
153   gpir_op op;
154   gpir_node_type type;
155   int index;
156   char name[16];
157   bool printed;
158   struct gpir_block *block;
159
160   /* for nodes relationship */
161   /* for node who uses this node (successor) */
162   struct list_head succ_list;
163   /* for node this node uses (predecessor) */
164   struct list_head pred_list;
165
166   /* for scheduler and regalloc */
167   int value_reg;
168   union {
169      struct {
170         struct gpir_instr *instr;
171         struct gpir_store_node *physreg_store;
172         int pos;
173         int dist;
174         int index;
175         bool ready;
176         bool inserted;
177         bool max_node, next_max_node;
178         bool complex_allowed;
179      } sched;
180      struct {
181         int parent_index;
182         float reg_pressure;
183         int est;
184         bool scheduled;
185      } rsched;
186      struct {
187         float index;
188         struct gpir_node *last;
189      } vreg;
190      struct {
191         int index;
192      } preg;
193   };
194} gpir_node;
195
196typedef struct {
197   gpir_node node;
198
199   gpir_node *children[3];
200   bool children_negate[3];
201   int num_child;
202
203   bool dest_negate;
204} gpir_alu_node;
205
206typedef struct {
207   gpir_node node;
208   union fi value;
209} gpir_const_node;
210
211typedef struct {
212   int index;
213   struct list_head list;
214} gpir_reg;
215
216typedef struct {
217   gpir_node node;
218
219   unsigned index;
220   unsigned component;
221
222   gpir_reg *reg;
223   struct list_head reg_link;
224} gpir_load_node;
225
226typedef struct gpir_store_node {
227   gpir_node node;
228
229   unsigned index;
230   unsigned component;
231   gpir_node *child;
232
233   gpir_reg *reg;
234} gpir_store_node;
235
236enum gpir_instr_slot {
237   GPIR_INSTR_SLOT_MUL0,
238   GPIR_INSTR_SLOT_MUL1,
239   GPIR_INSTR_SLOT_ADD0,
240   GPIR_INSTR_SLOT_ADD1,
241   GPIR_INSTR_SLOT_PASS,
242   GPIR_INSTR_SLOT_COMPLEX,
243   GPIR_INSTR_SLOT_REG0_LOAD0,
244   GPIR_INSTR_SLOT_REG0_LOAD1,
245   GPIR_INSTR_SLOT_REG0_LOAD2,
246   GPIR_INSTR_SLOT_REG0_LOAD3,
247   GPIR_INSTR_SLOT_REG1_LOAD0,
248   GPIR_INSTR_SLOT_REG1_LOAD1,
249   GPIR_INSTR_SLOT_REG1_LOAD2,
250   GPIR_INSTR_SLOT_REG1_LOAD3,
251   GPIR_INSTR_SLOT_MEM_LOAD0,
252   GPIR_INSTR_SLOT_MEM_LOAD1,
253   GPIR_INSTR_SLOT_MEM_LOAD2,
254   GPIR_INSTR_SLOT_MEM_LOAD3,
255   GPIR_INSTR_SLOT_STORE0,
256   GPIR_INSTR_SLOT_STORE1,
257   GPIR_INSTR_SLOT_STORE2,
258   GPIR_INSTR_SLOT_STORE3,
259   GPIR_INSTR_SLOT_NUM,
260   GPIR_INSTR_SLOT_END,
261   GPIR_INSTR_SLOT_ALU_BEGIN      = GPIR_INSTR_SLOT_MUL0,
262   GPIR_INSTR_SLOT_ALU_END        = GPIR_INSTR_SLOT_COMPLEX,
263   GPIR_INSTR_SLOT_DIST_TWO_BEGIN = GPIR_INSTR_SLOT_MUL0,
264   GPIR_INSTR_SLOT_DIST_TWO_END   = GPIR_INSTR_SLOT_PASS,
265};
266
267typedef struct gpir_instr {
268   int index;
269   struct list_head list;
270
271   gpir_node *slots[GPIR_INSTR_SLOT_NUM];
272
273   /* The number of ALU slots free for moves. */
274   int alu_num_slot_free;
275
276   /* The number of ALU slots free for moves, except for the complex slot. */
277   int alu_non_cplx_slot_free;
278
279   /* We need to make sure that we can insert moves in the following cases:
280    * (1) There was a use of a value two cycles ago.
281    * (2) There were more than 5 uses of a value 1 cycle ago (or else we can't
282    *     possibly satisfy (1) for the next cycle).
283    * (3) There is a store instruction scheduled, but not its child.
284    *
285    * The complex slot cannot be used for a move in case (1), since it only
286    * has a FIFO depth of 1, but it can be used for (2) as well as (3) as long
287    * as the uses aren't in certain slots. It turns out that we don't have to
288    * worry about nodes that can't use the complex slot for (2), since there
289    * are at most 4 uses 1 cycle ago that can't use the complex slot, but we
290    * do have to worry about (3). This means tracking stores whose children
291    * cannot be in the complex slot. In order to ensure that we have enough
292    * space for all three, we maintain the following invariants:
293    *
294    * (1) alu_num_slot_free >= alu_num_slot_needed_by_store +
295    *       alu_num_slot_needed_by_max +
296    *       max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0)
297    * (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max +
298    *       alu_num_slot_needed_by_non_cplx_store
299    *
300    * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max
301    * nodes for the next instruction) but when there is a complex1 node in
302    * this instruction it reduces to 4 to reserve a slot for complex2 in the
303    * next instruction.
304    */
305   int alu_num_slot_needed_by_store;
306   int alu_num_slot_needed_by_non_cplx_store;
307   int alu_num_slot_needed_by_max;
308   int alu_num_unscheduled_next_max;
309   int alu_max_allowed_next_max;
310
311   /* Used to communicate to the scheduler how many slots need to be cleared
312    * up in order to satisfy the invariants.
313    */
314   int slot_difference;
315   int non_cplx_slot_difference;
316
317   int reg0_use_count;
318   bool reg0_is_attr;
319   int reg0_index;
320
321   int reg1_use_count;
322   int reg1_index;
323
324   int mem_use_count;
325   bool mem_is_temp;
326   int mem_index;
327
328   enum {
329      GPIR_INSTR_STORE_NONE,
330      GPIR_INSTR_STORE_VARYING,
331      GPIR_INSTR_STORE_REG,
332      GPIR_INSTR_STORE_TEMP,
333   } store_content[2];
334   int store_index[2];
335} gpir_instr;
336
337typedef struct gpir_block {
338   struct list_head list;
339   struct list_head node_list;
340   struct list_head instr_list;
341   struct gpir_compiler *comp;
342
343   struct gpir_block *successors[2];
344   struct list_head predecessors;
345   struct list_head predecessors_node;
346
347   /* for regalloc */
348
349   /* The set of live registers, i.e. registers whose value may be used
350    * eventually, at the beginning of the block.
351    */
352   BITSET_WORD *live_in;
353
354   /* Set of live registers at the end of the block. */
355   BITSET_WORD *live_out;
356
357   /* Set of registers that may have a value defined at the end of the
358    * block.
359    */
360   BITSET_WORD *def_out;
361
362   /* After register allocation, the set of live physical registers at the end
363    * of the block. Needed for scheduling.
364    */
365   uint64_t live_out_phys;
366
367   /* For codegen, the offset in the final program. */
368   unsigned instr_offset;
369
370   /* for scheduler */
371   union {
372      struct {
373         int instr_index;
374      } sched;
375      struct {
376         int node_index;
377      } rsched;
378   };
379} gpir_block;
380
381typedef struct {
382   gpir_node node;
383   gpir_block *dest;
384   gpir_node *cond;
385} gpir_branch_node;
386
387struct lima_vs_compiled_shader;
388
389#define GPIR_VECTOR_SSA_VIEWPORT_SCALE  0
390#define GPIR_VECTOR_SSA_VIEWPORT_OFFSET 1
391#define GPIR_VECTOR_SSA_NUM 2
392
393typedef struct gpir_compiler {
394   struct list_head block_list;
395   int cur_index;
396
397   /* Find the gpir node for a given NIR SSA def. */
398   gpir_node **node_for_ssa;
399
400   /* Find the gpir node for a given NIR register. */
401   gpir_node **node_for_reg;
402
403   /* Find the gpir register for a given NIR SSA def. */
404   gpir_reg **reg_for_ssa;
405
406   /* Find the gpir register for a given NIR register. */
407   gpir_reg **reg_for_reg;
408
409   /* gpir block for NIR block. */
410   gpir_block **blocks;
411
412   /* for physical reg */
413   struct list_head reg_list;
414   int cur_reg;
415
416   /* lookup for vector ssa */
417   struct {
418      int ssa;
419      gpir_node *nodes[4];
420   } vector_ssa[GPIR_VECTOR_SSA_NUM];
421
422   struct lima_vs_compiled_shader *prog;
423   int constant_base;
424
425   /* shaderdb */
426   int num_instr;
427   int num_loops;
428   int num_spills;
429   int num_fills;
430} gpir_compiler;
431
432#define GPIR_VALUE_REG_NUM 11
433#define GPIR_PHYSICAL_REG_NUM 64
434
435void *gpir_node_create(gpir_block *block, gpir_op op);
436gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type);
437void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred);
438void gpir_node_replace_succ(gpir_node *dst, gpir_node *src);
439void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred);
440void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_node *new_child);
441void gpir_node_insert_child(gpir_node *parent, gpir_node *child, gpir_node *insert_child);
442void gpir_node_delete(gpir_node *node);
443void gpir_node_print_prog_dep(gpir_compiler *comp);
444void gpir_node_print_prog_seq(gpir_compiler *comp);
445
446#define gpir_node_foreach_succ(node, dep) \
447   list_for_each_entry(gpir_dep, dep, &node->succ_list, succ_link)
448#define gpir_node_foreach_succ_safe(node, dep) \
449   list_for_each_entry_safe(gpir_dep, dep, &node->succ_list, succ_link)
450#define gpir_node_foreach_pred(node, dep) \
451   list_for_each_entry(gpir_dep, dep, &node->pred_list, pred_link)
452#define gpir_node_foreach_pred_safe(node, dep) \
453   list_for_each_entry_safe(gpir_dep, dep, &node->pred_list, pred_link)
454
455static inline bool gpir_node_is_root(gpir_node *node)
456{
457   return list_is_empty(&node->succ_list);
458}
459
460static inline bool gpir_node_is_leaf(gpir_node *node)
461{
462   return list_is_empty(&node->pred_list);
463}
464
465#define gpir_node_to_alu(node) ((gpir_alu_node *)(node))
466#define gpir_node_to_const(node) ((gpir_const_node *)(node))
467#define gpir_node_to_load(node) ((gpir_load_node *)(node))
468#define gpir_node_to_store(node) ((gpir_store_node *)(node))
469#define gpir_node_to_branch(node) ((gpir_branch_node *)(node))
470
471gpir_instr *gpir_instr_create(gpir_block *block);
472bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node);
473void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node);
474void gpir_instr_print_prog(gpir_compiler *comp);
475
476bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2);
477
478bool gpir_optimize(gpir_compiler *comp);
479bool gpir_pre_rsched_lower_prog(gpir_compiler *comp);
480bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp);
481bool gpir_regalloc_prog(gpir_compiler *comp);
482bool gpir_schedule_prog(gpir_compiler *comp);
483bool gpir_codegen_prog(gpir_compiler *comp);
484
485gpir_reg *gpir_create_reg(gpir_compiler *comp);
486
487#endif
488