1/* 2 * Copyright (c) 2017 Lima Project 3 * Copyright (c) 2013 Connor Abbott 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a copy 6 * of this software and associated documentation files (the "Software"), to deal 7 * in the Software without restriction, including without limitation the rights 8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 * copies of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 * THE SOFTWARE. 22 * 23 */ 24 25#ifndef LIMA_IR_GP_GPIR_H 26#define LIMA_IR_GP_GPIR_H 27 28#include "util/list.h" 29#include "util/u_math.h" 30 31#include "ir/lima_ir.h" 32 33/* list of operations that a node can do. */ 34typedef enum { 35 gpir_op_mov, 36 37 /* mul ops */ 38 gpir_op_mul, 39 gpir_op_select, 40 gpir_op_complex1, 41 gpir_op_complex2, 42 43 /* add ops */ 44 gpir_op_add, 45 gpir_op_floor, 46 gpir_op_sign, 47 gpir_op_ge, 48 gpir_op_lt, 49 gpir_op_min, 50 gpir_op_max, 51 gpir_op_abs, 52 gpir_op_not, 53 54 /* mul/add ops */ 55 gpir_op_neg, 56 57 /* passthrough ops */ 58 gpir_op_clamp_const, 59 gpir_op_preexp2, 60 gpir_op_postlog2, 61 62 /* complex ops */ 63 gpir_op_exp2_impl, 64 gpir_op_log2_impl, 65 gpir_op_rcp_impl, 66 gpir_op_rsqrt_impl, 67 68 /* load/store ops */ 69 gpir_op_load_uniform, 70 gpir_op_load_temp, 71 gpir_op_load_attribute, 72 gpir_op_load_reg, 73 gpir_op_store_temp, 74 gpir_op_store_reg, 75 gpir_op_store_varying, 76 gpir_op_store_temp_load_off0, 77 gpir_op_store_temp_load_off1, 78 gpir_op_store_temp_load_off2, 79 80 /* branch */ 81 gpir_op_branch_cond, 82 83 /* const (emulated) */ 84 gpir_op_const, 85 86 /* emulated ops */ 87 gpir_op_exp2, 88 gpir_op_log2, 89 gpir_op_rcp, 90 gpir_op_rsqrt, 91 gpir_op_ceil, 92 gpir_op_exp, 93 gpir_op_log, 94 gpir_op_sin, 95 gpir_op_cos, 96 gpir_op_tan, 97 gpir_op_branch_uncond, 98 gpir_op_eq, 99 gpir_op_ne, 100 101 /* auxiliary ops */ 102 gpir_op_dummy_f, 103 gpir_op_dummy_m, 104 105 gpir_op_num, 106} gpir_op; 107 108typedef enum { 109 gpir_node_type_alu, 110 gpir_node_type_const, 111 gpir_node_type_load, 112 gpir_node_type_store, 113 gpir_node_type_branch, 114} gpir_node_type; 115 116typedef struct { 117 char *name; 118 bool dest_neg; 119 bool src_neg[4]; 120 int *slots; 121 gpir_node_type type; 122 bool spillless; 123 bool schedule_first; 124 bool may_consume_two_slots; 125} gpir_op_info; 126 127extern const gpir_op_info gpir_op_infos[]; 128 129typedef struct { 130 enum { 131 GPIR_DEP_INPUT, /* def is the input of use */ 132 GPIR_DEP_OFFSET, /* def is the offset of use (i.e. temp store) */ 133 GPIR_DEP_READ_AFTER_WRITE, 134 GPIR_DEP_WRITE_AFTER_READ, 135 } type; 136 137 /* node execute before succ */ 138 struct gpir_node *pred; 139 /* node execute after pred */ 140 struct gpir_node *succ; 141 142 /* for node pred_list */ 143 struct list_head pred_link; 144 /* for ndoe succ_list */ 145 struct list_head succ_link; 146} gpir_dep; 147 148struct gpir_instr; 149struct gpir_store_node; 150 151typedef struct gpir_node { 152 struct list_head list; 153 gpir_op op; 154 gpir_node_type type; 155 int index; 156 char name[16]; 157 bool printed; 158 struct gpir_block *block; 159 160 /* for nodes relationship */ 161 /* for node who uses this node (successor) */ 162 struct list_head succ_list; 163 /* for node this node uses (predecessor) */ 164 struct list_head pred_list; 165 166 /* for scheduler and regalloc */ 167 int value_reg; 168 union { 169 struct { 170 struct gpir_instr *instr; 171 struct gpir_store_node *physreg_store; 172 int pos; 173 int dist; 174 int index; 175 bool ready; 176 bool inserted; 177 bool max_node, next_max_node; 178 bool complex_allowed; 179 } sched; 180 struct { 181 int parent_index; 182 float reg_pressure; 183 int est; 184 bool scheduled; 185 } rsched; 186 struct { 187 float index; 188 struct gpir_node *last; 189 } vreg; 190 struct { 191 int index; 192 } preg; 193 }; 194} gpir_node; 195 196typedef struct { 197 gpir_node node; 198 199 gpir_node *children[3]; 200 bool children_negate[3]; 201 int num_child; 202 203 bool dest_negate; 204} gpir_alu_node; 205 206typedef struct { 207 gpir_node node; 208 union fi value; 209} gpir_const_node; 210 211typedef struct { 212 int index; 213 struct list_head list; 214} gpir_reg; 215 216typedef struct { 217 gpir_node node; 218 219 unsigned index; 220 unsigned component; 221 222 gpir_reg *reg; 223 struct list_head reg_link; 224} gpir_load_node; 225 226typedef struct gpir_store_node { 227 gpir_node node; 228 229 unsigned index; 230 unsigned component; 231 gpir_node *child; 232 233 gpir_reg *reg; 234} gpir_store_node; 235 236enum gpir_instr_slot { 237 GPIR_INSTR_SLOT_MUL0, 238 GPIR_INSTR_SLOT_MUL1, 239 GPIR_INSTR_SLOT_ADD0, 240 GPIR_INSTR_SLOT_ADD1, 241 GPIR_INSTR_SLOT_PASS, 242 GPIR_INSTR_SLOT_COMPLEX, 243 GPIR_INSTR_SLOT_REG0_LOAD0, 244 GPIR_INSTR_SLOT_REG0_LOAD1, 245 GPIR_INSTR_SLOT_REG0_LOAD2, 246 GPIR_INSTR_SLOT_REG0_LOAD3, 247 GPIR_INSTR_SLOT_REG1_LOAD0, 248 GPIR_INSTR_SLOT_REG1_LOAD1, 249 GPIR_INSTR_SLOT_REG1_LOAD2, 250 GPIR_INSTR_SLOT_REG1_LOAD3, 251 GPIR_INSTR_SLOT_MEM_LOAD0, 252 GPIR_INSTR_SLOT_MEM_LOAD1, 253 GPIR_INSTR_SLOT_MEM_LOAD2, 254 GPIR_INSTR_SLOT_MEM_LOAD3, 255 GPIR_INSTR_SLOT_STORE0, 256 GPIR_INSTR_SLOT_STORE1, 257 GPIR_INSTR_SLOT_STORE2, 258 GPIR_INSTR_SLOT_STORE3, 259 GPIR_INSTR_SLOT_NUM, 260 GPIR_INSTR_SLOT_END, 261 GPIR_INSTR_SLOT_ALU_BEGIN = GPIR_INSTR_SLOT_MUL0, 262 GPIR_INSTR_SLOT_ALU_END = GPIR_INSTR_SLOT_COMPLEX, 263 GPIR_INSTR_SLOT_DIST_TWO_BEGIN = GPIR_INSTR_SLOT_MUL0, 264 GPIR_INSTR_SLOT_DIST_TWO_END = GPIR_INSTR_SLOT_PASS, 265}; 266 267typedef struct gpir_instr { 268 int index; 269 struct list_head list; 270 271 gpir_node *slots[GPIR_INSTR_SLOT_NUM]; 272 273 /* The number of ALU slots free for moves. */ 274 int alu_num_slot_free; 275 276 /* The number of ALU slots free for moves, except for the complex slot. */ 277 int alu_non_cplx_slot_free; 278 279 /* We need to make sure that we can insert moves in the following cases: 280 * (1) There was a use of a value two cycles ago. 281 * (2) There were more than 5 uses of a value 1 cycle ago (or else we can't 282 * possibly satisfy (1) for the next cycle). 283 * (3) There is a store instruction scheduled, but not its child. 284 * 285 * The complex slot cannot be used for a move in case (1), since it only 286 * has a FIFO depth of 1, but it can be used for (2) as well as (3) as long 287 * as the uses aren't in certain slots. It turns out that we don't have to 288 * worry about nodes that can't use the complex slot for (2), since there 289 * are at most 4 uses 1 cycle ago that can't use the complex slot, but we 290 * do have to worry about (3). This means tracking stores whose children 291 * cannot be in the complex slot. In order to ensure that we have enough 292 * space for all three, we maintain the following invariants: 293 * 294 * (1) alu_num_slot_free >= alu_num_slot_needed_by_store + 295 * alu_num_slot_needed_by_max + 296 * max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0) 297 * (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max + 298 * alu_num_slot_needed_by_non_cplx_store 299 * 300 * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max 301 * nodes for the next instruction) but when there is a complex1 node in 302 * this instruction it reduces to 4 to reserve a slot for complex2 in the 303 * next instruction. 304 */ 305 int alu_num_slot_needed_by_store; 306 int alu_num_slot_needed_by_non_cplx_store; 307 int alu_num_slot_needed_by_max; 308 int alu_num_unscheduled_next_max; 309 int alu_max_allowed_next_max; 310 311 /* Used to communicate to the scheduler how many slots need to be cleared 312 * up in order to satisfy the invariants. 313 */ 314 int slot_difference; 315 int non_cplx_slot_difference; 316 317 int reg0_use_count; 318 bool reg0_is_attr; 319 int reg0_index; 320 321 int reg1_use_count; 322 int reg1_index; 323 324 int mem_use_count; 325 bool mem_is_temp; 326 int mem_index; 327 328 enum { 329 GPIR_INSTR_STORE_NONE, 330 GPIR_INSTR_STORE_VARYING, 331 GPIR_INSTR_STORE_REG, 332 GPIR_INSTR_STORE_TEMP, 333 } store_content[2]; 334 int store_index[2]; 335} gpir_instr; 336 337typedef struct gpir_block { 338 struct list_head list; 339 struct list_head node_list; 340 struct list_head instr_list; 341 struct gpir_compiler *comp; 342 343 struct gpir_block *successors[2]; 344 struct list_head predecessors; 345 struct list_head predecessors_node; 346 347 /* for regalloc */ 348 349 /* The set of live registers, i.e. registers whose value may be used 350 * eventually, at the beginning of the block. 351 */ 352 BITSET_WORD *live_in; 353 354 /* Set of live registers at the end of the block. */ 355 BITSET_WORD *live_out; 356 357 /* Set of registers that may have a value defined at the end of the 358 * block. 359 */ 360 BITSET_WORD *def_out; 361 362 /* After register allocation, the set of live physical registers at the end 363 * of the block. Needed for scheduling. 364 */ 365 uint64_t live_out_phys; 366 367 /* For codegen, the offset in the final program. */ 368 unsigned instr_offset; 369 370 /* for scheduler */ 371 union { 372 struct { 373 int instr_index; 374 } sched; 375 struct { 376 int node_index; 377 } rsched; 378 }; 379} gpir_block; 380 381typedef struct { 382 gpir_node node; 383 gpir_block *dest; 384 gpir_node *cond; 385} gpir_branch_node; 386 387struct lima_vs_compiled_shader; 388 389#define GPIR_VECTOR_SSA_VIEWPORT_SCALE 0 390#define GPIR_VECTOR_SSA_VIEWPORT_OFFSET 1 391#define GPIR_VECTOR_SSA_NUM 2 392 393typedef struct gpir_compiler { 394 struct list_head block_list; 395 int cur_index; 396 397 /* Find the gpir node for a given NIR SSA def. */ 398 gpir_node **node_for_ssa; 399 400 /* Find the gpir node for a given NIR register. */ 401 gpir_node **node_for_reg; 402 403 /* Find the gpir register for a given NIR SSA def. */ 404 gpir_reg **reg_for_ssa; 405 406 /* Find the gpir register for a given NIR register. */ 407 gpir_reg **reg_for_reg; 408 409 /* gpir block for NIR block. */ 410 gpir_block **blocks; 411 412 /* for physical reg */ 413 struct list_head reg_list; 414 int cur_reg; 415 416 /* lookup for vector ssa */ 417 struct { 418 int ssa; 419 gpir_node *nodes[4]; 420 } vector_ssa[GPIR_VECTOR_SSA_NUM]; 421 422 struct lima_vs_compiled_shader *prog; 423 int constant_base; 424 425 /* shaderdb */ 426 int num_instr; 427 int num_loops; 428 int num_spills; 429 int num_fills; 430} gpir_compiler; 431 432#define GPIR_VALUE_REG_NUM 11 433#define GPIR_PHYSICAL_REG_NUM 64 434 435void *gpir_node_create(gpir_block *block, gpir_op op); 436gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type); 437void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred); 438void gpir_node_replace_succ(gpir_node *dst, gpir_node *src); 439void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred); 440void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_node *new_child); 441void gpir_node_insert_child(gpir_node *parent, gpir_node *child, gpir_node *insert_child); 442void gpir_node_delete(gpir_node *node); 443void gpir_node_print_prog_dep(gpir_compiler *comp); 444void gpir_node_print_prog_seq(gpir_compiler *comp); 445 446#define gpir_node_foreach_succ(node, dep) \ 447 list_for_each_entry(gpir_dep, dep, &node->succ_list, succ_link) 448#define gpir_node_foreach_succ_safe(node, dep) \ 449 list_for_each_entry_safe(gpir_dep, dep, &node->succ_list, succ_link) 450#define gpir_node_foreach_pred(node, dep) \ 451 list_for_each_entry(gpir_dep, dep, &node->pred_list, pred_link) 452#define gpir_node_foreach_pred_safe(node, dep) \ 453 list_for_each_entry_safe(gpir_dep, dep, &node->pred_list, pred_link) 454 455static inline bool gpir_node_is_root(gpir_node *node) 456{ 457 return list_is_empty(&node->succ_list); 458} 459 460static inline bool gpir_node_is_leaf(gpir_node *node) 461{ 462 return list_is_empty(&node->pred_list); 463} 464 465#define gpir_node_to_alu(node) ((gpir_alu_node *)(node)) 466#define gpir_node_to_const(node) ((gpir_const_node *)(node)) 467#define gpir_node_to_load(node) ((gpir_load_node *)(node)) 468#define gpir_node_to_store(node) ((gpir_store_node *)(node)) 469#define gpir_node_to_branch(node) ((gpir_branch_node *)(node)) 470 471gpir_instr *gpir_instr_create(gpir_block *block); 472bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node); 473void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node); 474void gpir_instr_print_prog(gpir_compiler *comp); 475 476bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2); 477 478bool gpir_optimize(gpir_compiler *comp); 479bool gpir_pre_rsched_lower_prog(gpir_compiler *comp); 480bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp); 481bool gpir_regalloc_prog(gpir_compiler *comp); 482bool gpir_schedule_prog(gpir_compiler *comp); 483bool gpir_codegen_prog(gpir_compiler *comp); 484 485gpir_reg *gpir_create_reg(gpir_compiler *comp); 486 487#endif 488