1/* 2 * Copyright (c) 2017 Lima Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25#include "util/ralloc.h" 26 27#include "gpir.h" 28#include "lima_context.h" 29 30static gpir_node * 31gpir_lower_create_insert_node(gpir_node *parent, gpir_node *child, 32 gpir_node *child2, gpir_op op) 33{ 34 gpir_node *node = gpir_node_create(parent->block, op); 35 if (!node) 36 return NULL; 37 38 gpir_alu_node *alu = gpir_node_to_alu(node); 39 alu->children[0] = child; 40 alu->children[1] = child2; 41 alu->num_child = 2; 42 gpir_node_insert_child(parent, child, node); 43 gpir_node_add_dep(node, child2, GPIR_DEP_INPUT); 44 list_addtail(&node->list, &parent->list); 45 return node; 46} 47 48static bool gpir_lower_viewport_transform(gpir_compiler *comp) 49{ 50 gpir_node *rcpw = NULL; 51 52 /* rcpw = 1 / w */ 53 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 54 list_for_each_entry(gpir_node, node, &block->node_list, list) { 55 if (node->op == gpir_op_store_varying) { 56 gpir_store_node *store = gpir_node_to_store(node); 57 if (store->index == 0 && store->component == 3) { 58 gpir_node *w = store->child; 59 60 rcpw = gpir_node_create(block, gpir_op_rcp); 61 if (!rcpw) 62 return false; 63 list_addtail(&rcpw->list, &node->list); 64 65 gpir_alu_node *alu = gpir_node_to_alu(rcpw); 66 alu->children[0] = w; 67 alu->num_child = 1; 68 store->child = rcpw; 69 70 gpir_node_insert_child(node, w, rcpw); 71 goto found; 72 } 73 } 74 } 75 } 76 77found: 78 assert(rcpw); 79 80 /* xyz = xyz * rcpw * scale + transition */ 81 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 82 list_for_each_entry(gpir_node, node, &block->node_list, list) { 83 if (node->op == gpir_op_store_varying) { 84 gpir_store_node *store = gpir_node_to_store(node); 85 if (store->index == 0 && store->component < 3) { 86 gpir_node *xyz = store->child; 87 88 gpir_node *mul1 = 89 gpir_lower_create_insert_node(node, xyz, rcpw, gpir_op_mul); 90 if (!mul1) 91 return false; 92 93 gpir_load_node *scale = gpir_node_create(block, gpir_op_load_uniform); 94 if (!scale) 95 return false; 96 scale->index = comp->constant_base; 97 scale->component = store->component; 98 list_addtail(&scale->node.list, &node->list); 99 100 gpir_node *mul2 = 101 gpir_lower_create_insert_node(node, mul1, &scale->node, gpir_op_mul); 102 if (!mul2) 103 return false; 104 105 gpir_load_node *translate = gpir_node_create(block, gpir_op_load_uniform); 106 if (!translate) 107 return false; 108 translate->index = comp->constant_base + 1; 109 translate->component = store->component; 110 list_addtail(&translate->node.list, &node->list); 111 112 gpir_node *add = 113 gpir_lower_create_insert_node(node, mul2, &translate->node, gpir_op_add); 114 if (!add) 115 return false; 116 117 store->child = add; 118 } 119 } 120 } 121 } 122 123 comp->constant_base += 2; 124 return true; 125} 126 127static bool gpir_lower_const(gpir_compiler *comp) 128{ 129 int num_constant = 0; 130 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 131 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 132 if (node->op == gpir_op_const) { 133 if (gpir_node_is_root(node)) 134 gpir_node_delete(node); 135 else 136 num_constant++; 137 } 138 } 139 } 140 141 if (num_constant) { 142 union fi *constant = ralloc_array(comp->prog, union fi, num_constant); 143 if (!constant) 144 return false; 145 146 comp->prog->constant = constant; 147 comp->prog->constant_size = num_constant * sizeof(union fi); 148 149 int index = 0; 150 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 151 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 152 if (node->op == gpir_op_const) { 153 gpir_const_node *c = gpir_node_to_const(node); 154 155 if (!gpir_node_is_root(node)) { 156 gpir_load_node *load = gpir_node_create(block, gpir_op_load_uniform); 157 if (unlikely(!load)) 158 return false; 159 160 load->index = comp->constant_base + (index >> 2); 161 load->component = index % 4; 162 constant[index++] = c->value; 163 164 gpir_node_replace_succ(&load->node, node); 165 166 list_addtail(&load->node.list, &node->list); 167 168 gpir_debug("lower const create uniform %d for const %d\n", 169 load->node.index, node->index); 170 } 171 172 gpir_node_delete(node); 173 } 174 } 175 } 176 } 177 178 return true; 179} 180 181/* duplicate load to all its successors */ 182static bool gpir_lower_load(gpir_compiler *comp) 183{ 184 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 185 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 186 if (node->type == gpir_node_type_load) { 187 gpir_load_node *load = gpir_node_to_load(node); 188 189 bool first = true; 190 gpir_node_foreach_succ_safe(node, dep) { 191 gpir_node *succ = dep->succ; 192 193 if (first) { 194 first = false; 195 continue; 196 } 197 198 gpir_node *new = gpir_node_create(succ->block, node->op); 199 if (unlikely(!new)) 200 return false; 201 list_addtail(&new->list, &succ->list); 202 203 gpir_debug("lower load create %d from %d for succ %d\n", 204 new->index, node->index, succ->index); 205 206 gpir_load_node *nload = gpir_node_to_load(new); 207 nload->index = load->index; 208 nload->component = load->component; 209 if (load->reg) { 210 nload->reg = load->reg; 211 list_addtail(&nload->reg_link, &load->reg->uses_list); 212 } 213 214 gpir_node_replace_pred(dep, new); 215 gpir_node_replace_child(succ, node, new); 216 } 217 } 218 } 219 } 220 221 return true; 222} 223 224static bool gpir_lower_neg(gpir_block *block, gpir_node *node) 225{ 226 gpir_alu_node *neg = gpir_node_to_alu(node); 227 gpir_node *child = neg->children[0]; 228 229 /* check if child can dest negate */ 230 if (child->type == gpir_node_type_alu) { 231 /* negate must be its only successor */ 232 if (list_is_singular(&child->succ_list) && 233 gpir_op_infos[child->op].dest_neg) { 234 gpir_alu_node *alu = gpir_node_to_alu(child); 235 alu->dest_negate = !alu->dest_negate; 236 237 gpir_node_replace_succ(child, node); 238 gpir_node_delete(node); 239 return true; 240 } 241 } 242 243 /* check if child can src negate */ 244 gpir_node_foreach_succ_safe(node, dep) { 245 gpir_node *succ = dep->succ; 246 if (succ->type != gpir_node_type_alu) 247 continue; 248 249 bool success = true; 250 gpir_alu_node *alu = gpir_node_to_alu(dep->succ); 251 for (int i = 0; i < alu->num_child; i++) { 252 if (alu->children[i] == node) { 253 if (gpir_op_infos[succ->op].src_neg[i]) { 254 alu->children_negate[i] = !alu->children_negate[i]; 255 alu->children[i] = child; 256 } 257 else 258 success = false; 259 } 260 } 261 262 if (success) 263 gpir_node_replace_pred(dep, child); 264 } 265 266 if (gpir_node_is_root(node)) 267 gpir_node_delete(node); 268 269 return true; 270} 271 272static bool gpir_lower_complex(gpir_block *block, gpir_node *node) 273{ 274 gpir_alu_node *alu = gpir_node_to_alu(node); 275 gpir_node *child = alu->children[0]; 276 277 gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2); 278 if (unlikely(!complex2)) 279 return false; 280 281 complex2->children[0] = child; 282 complex2->num_child = 1; 283 gpir_node_add_dep(&complex2->node, child, GPIR_DEP_INPUT); 284 list_addtail(&complex2->node.list, &node->list); 285 286 int impl_op = 0; 287 switch (node->op) { 288 case gpir_op_rcp: 289 impl_op = gpir_op_rcp_impl; 290 break; 291 case gpir_op_rsqrt: 292 impl_op = gpir_op_rsqrt_impl; 293 break; 294 default: 295 assert(0); 296 } 297 298 gpir_alu_node *impl = gpir_node_create(block, impl_op); 299 if (unlikely(!impl)) 300 return false; 301 302 impl->children[0] = child; 303 impl->num_child = 1; 304 gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT); 305 list_addtail(&impl->node.list, &node->list); 306 307 /* change node to complex1 node */ 308 node->op = gpir_op_complex1; 309 alu->children[0] = &impl->node; 310 alu->children[1] = &complex2->node; 311 alu->children[2] = child; 312 alu->num_child = 3; 313 gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT); 314 gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT); 315 316 return true; 317} 318 319static bool gpir_lower_node_may_consume_two_slots(gpir_compiler *comp) 320{ 321 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 322 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 323 if (gpir_op_infos[node->op].may_consume_two_slots) { 324 /* dummy_f/m are auxiliary nodes for value reg alloc: 325 * 1. before reg alloc, create fake nodes dummy_f, dummy_m, 326 * so the tree become: (dummy_m (node dummy_f)) 327 * dummy_m can be spilled, but other nodes in the tree can't 328 * be spilled. 329 * 2. After reg allocation and fake dep add, merge all deps of 330 * dummy_m and dummy_f to node and remove dummy_m & dummy_f 331 * 332 * We may also not use dummy_f/m, but alloc two value reg for 333 * node. But that means we need to make sure there're 2 free 334 * slot after the node successors, but we just need one slot 335 * after to be able to schedule it because we can use one move for 336 * the two slot node. It's also not easy to handle the spill case 337 * for the alloc 2 value method. 338 * 339 * With the dummy_f/m method, there's no such requirement, the 340 * node can be scheduled only when there's two slots for it, 341 * otherwise a move. And the node can be spilled with one reg. 342 */ 343 gpir_node *dummy_m = gpir_node_create(block, gpir_op_dummy_m); 344 if (unlikely(!dummy_m)) 345 return false; 346 list_add(&dummy_m->list, &node->list); 347 348 gpir_node *dummy_f = gpir_node_create(block, gpir_op_dummy_f); 349 if (unlikely(!dummy_f)) 350 return false; 351 list_add(&dummy_f->list, &node->list); 352 353 gpir_alu_node *alu = gpir_node_to_alu(dummy_m); 354 alu->children[0] = node; 355 alu->children[1] = dummy_f; 356 alu->num_child = 2; 357 358 gpir_node_replace_succ(dummy_m, node); 359 gpir_node_add_dep(dummy_m, node, GPIR_DEP_INPUT); 360 gpir_node_add_dep(dummy_m, dummy_f, GPIR_DEP_INPUT); 361 362 } 363 } 364 } 365 366 return true; 367} 368 369/* 370 * There are no 'equal' or 'not-equal' opcodes. 371 * eq (a == b) is lowered to and(a >= b, b >= a) 372 * ne (a != b) is lowered to or(a < b, b < a) 373 */ 374static bool gpir_lower_eq_ne(gpir_block *block, gpir_node *node) 375{ 376 gpir_op cmp_node_op; 377 gpir_op node_new_op; 378 switch (node->op) { 379 case gpir_op_eq: 380 cmp_node_op = gpir_op_ge; 381 node_new_op = gpir_op_min; /* and */ 382 break; 383 case gpir_op_ne: 384 cmp_node_op = gpir_op_lt; 385 node_new_op = gpir_op_max; /* or */ 386 break; 387 default: 388 assert(0); 389 } 390 391 gpir_alu_node *e = gpir_node_to_alu(node); 392 393 gpir_alu_node *cmp1 = gpir_node_create(block, cmp_node_op); 394 list_addtail(&cmp1->node.list, &node->list); 395 gpir_alu_node *cmp2 = gpir_node_create(block, cmp_node_op); 396 list_addtail(&cmp2->node.list, &node->list); 397 398 cmp1->children[0] = e->children[0]; 399 cmp1->children[1] = e->children[1]; 400 cmp1->num_child = 2; 401 402 cmp2->children[0] = e->children[1]; 403 cmp2->children[1] = e->children[0]; 404 cmp2->num_child = 2; 405 406 gpir_node_add_dep(&cmp1->node, e->children[0], GPIR_DEP_INPUT); 407 gpir_node_add_dep(&cmp1->node, e->children[1], GPIR_DEP_INPUT); 408 409 gpir_node_add_dep(&cmp2->node, e->children[0], GPIR_DEP_INPUT); 410 gpir_node_add_dep(&cmp2->node, e->children[1], GPIR_DEP_INPUT); 411 412 gpir_node_foreach_pred_safe(node, dep) { 413 gpir_node_remove_dep(node, dep->pred); 414 } 415 416 gpir_node_add_dep(node, &cmp1->node, GPIR_DEP_INPUT); 417 gpir_node_add_dep(node, &cmp2->node, GPIR_DEP_INPUT); 418 419 node->op = node_new_op; 420 e->children[0] = &cmp1->node; 421 e->children[1] = &cmp2->node; 422 e->num_child = 2; 423 424 return true; 425} 426 427/* 428 * There is no 'abs' opcode. 429 * abs(a) is lowered to max(a, -a) 430 */ 431static bool gpir_lower_abs(gpir_block *block, gpir_node *node) 432{ 433 gpir_alu_node *alu = gpir_node_to_alu(node); 434 435 assert(node->op == gpir_op_abs); 436 437 node->op = gpir_op_max; 438 439 alu->children[1] = alu->children[0]; 440 alu->children_negate[1] = true; 441 alu->num_child = 2; 442 443 return true; 444} 445 446/* 447 * There is no 'not' opcode. 448 * not(a) is lowered to add(1, -a) 449 */ 450static bool gpir_lower_not(gpir_block *block, gpir_node *node) 451{ 452 gpir_alu_node *alu = gpir_node_to_alu(node); 453 454 assert(alu->node.op == gpir_op_not); 455 456 node->op = gpir_op_add; 457 458 gpir_node *node_const = gpir_node_create(block, gpir_op_const); 459 gpir_const_node *c = gpir_node_to_const(node_const); 460 461 assert(c->node.op == gpir_op_const); 462 463 list_addtail(&c->node.list, &node->list); 464 c->value.f = 1.0f; 465 gpir_node_add_dep(&alu->node, &c->node, GPIR_DEP_INPUT); 466 467 alu->children_negate[1] = !alu->children_negate[0]; 468 alu->children[1] = alu->children[0]; 469 alu->children[0] = &c->node; 470 alu->num_child = 2; 471 472 return true; 473} 474 475 476static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = { 477 [gpir_op_not] = gpir_lower_not, 478}; 479 480static bool (*gpir_post_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = { 481 [gpir_op_neg] = gpir_lower_neg, 482 [gpir_op_rcp] = gpir_lower_complex, 483 [gpir_op_rsqrt] = gpir_lower_complex, 484 [gpir_op_eq] = gpir_lower_eq_ne, 485 [gpir_op_ne] = gpir_lower_eq_ne, 486 [gpir_op_abs] = gpir_lower_abs, 487}; 488 489bool gpir_pre_rsched_lower_prog(gpir_compiler *comp) 490{ 491 if (!gpir_lower_viewport_transform(comp)) 492 return false; 493 494 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 495 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 496 if (gpir_pre_rsched_lower_funcs[node->op] && 497 !gpir_pre_rsched_lower_funcs[node->op](block, node)) 498 return false; 499 } 500 } 501 502 if (!gpir_lower_const(comp)) 503 return false; 504 505 if (!gpir_lower_load(comp)) 506 return false; 507 508 gpir_debug("pre rsched lower prog\n"); 509 gpir_node_print_prog_seq(comp); 510 return true; 511} 512 513bool gpir_post_rsched_lower_prog(gpir_compiler *comp) 514{ 515 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 516 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 517 if (gpir_post_rsched_lower_funcs[node->op] && 518 !gpir_post_rsched_lower_funcs[node->op](block, node)) 519 return false; 520 } 521 } 522 523 if (!gpir_lower_node_may_consume_two_slots(comp)) 524 return false; 525 526 gpir_debug("post rsched lower prog\n"); 527 gpir_node_print_prog_seq(comp); 528 return true; 529} 530