1/* 2 * Copyright (c) 2017 Lima Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25#include <limits.h> 26 27#include "gpir.h" 28 29/* 30 * GP schedule algorithm (by Connor Abbott <cwabbott0@gmail.com>) 31 * 32 * Pre schedule phase: 33 * 1. order all nodes in a sequence 34 * 2. convert the real reg read/write to GP load/store node, now all 35 * variable is SSA 36 * 3. do reg alloc for all SSA with 11 reg (value reg) and spill with 37 * load/store to real reg if needed 38 * 4. add fake dependency like this: 39 * after step 3, node sequence is 40 * 01: r1=r2+r3 41 * 02: r4=r1+r2 42 * 03: r1=r5+r6 43 * we should add a fake dependency of node 3 to node 2 like a 44 * write-after-read dep. But this is not really write-after-read 45 * dep because there's no r1 really, because it's a value register. 46 * We need this fake dep in the schedule phase to make sure in any 47 * schedule point, there're only <=11 input needed by the past 48 * scheduled nodes. 49 * 5. build DAG according to all the real and fake dep 50 * 51 * Schedule phase: 52 * 1. Compute the nodes ready to schedule, if no nodes, exit 53 * 2. Create a new GP instruction, and call it as current instr 54 * 3. For any nodes with a use 2 cycles ago with a definition ready to 55 * schedule, schedule that definition immediately if possible, or else 56 * schedule a move. 57 * 4. For any nodes with a use 2 cycles ago but the definition not 58 * scheduled and not ready to schedule, schedule a move immediately 59 * to prevent the value from falling off the queue. 60 * 5. Calculate the number of remaining nodes with a use 1 cycle ago but 61 * the definition not yet scheduled, and if there are more than 5, 62 * schedule moves or definitions for the rest now. 63 * 6. Schedule the rest of the available nodes using your favorite heuristic 64 * to current instr. 65 * 7. go to step 1 66 * 67 * Step 5 for the current instruction guarantees that steps 3 and 4 for 68 * the next instruction will always succeed, so it's only step 5 that can 69 * possibly fail. Now, note that the nodes whose definitions have not yet 70 * been scheduled but one or more use has been scheduled, are exactly the 71 * nodes that are live in the final schedule. Therefore there will never 72 * be more than 11 of them (guarenteed by the 11 value reg alloc and the 73 * fake dep added before schedule). The worst case for step 5 is that all of 74 * these nodes had a use 1 cycle ago, which means that none of them hit 75 * case 3 or 4 already, so there are 6 slots still available so step 5 76 * will always succeed. In general, even if there are exactly 11 values 77 * live, if n are scheduled in steps 3 and 4, there are 11-n left in step 78 * 4 so at most 11-n-5 = 6-n are scheduled in step 5 and therefore 6 are 79 * scheduled total, below the limit. So the algorithm will always succeed. 80 */ 81 82static int gpir_min_dist_alu(gpir_dep *dep) 83{ 84 switch (dep->pred->op) { 85 case gpir_op_load_uniform: 86 case gpir_op_load_temp: 87 case gpir_op_load_reg: 88 case gpir_op_load_attribute: 89 return 0; 90 91 case gpir_op_complex1: 92 return 2; 93 94 default: 95 return 1; 96 } 97} 98 99static int gpir_get_min_dist(gpir_dep *dep) 100{ 101 switch (dep->type) { 102 case GPIR_DEP_INPUT: 103 switch (dep->succ->op) { 104 case gpir_op_store_temp: 105 case gpir_op_store_reg: 106 case gpir_op_store_varying: 107 /* store must use alu node as input */ 108 if (dep->pred->type == gpir_node_type_load) 109 return INT_MAX >> 2; 110 else 111 return 0; 112 113 default: 114 return gpir_min_dist_alu(dep); 115 } 116 117 case GPIR_DEP_OFFSET: 118 assert(dep->succ->op == gpir_op_store_temp); 119 return gpir_min_dist_alu(dep); 120 121 case GPIR_DEP_READ_AFTER_WRITE: 122 switch (dep->succ->op) { 123 case gpir_op_load_temp: 124 assert(dep->pred->op == gpir_op_store_temp); 125 return 4; 126 case gpir_op_load_reg: 127 assert(dep->pred->op == gpir_op_store_reg); 128 return 3; 129 case gpir_op_load_uniform: 130 assert(dep->pred->op == gpir_op_store_temp_load_off0 || 131 dep->pred->op == gpir_op_store_temp_load_off1 || 132 dep->pred->op == gpir_op_store_temp_load_off2); 133 return 4; 134 default: 135 assert(0); 136 } 137 138 case GPIR_DEP_WRITE_AFTER_READ: 139 switch (dep->pred->op) { 140 case gpir_op_load_temp: 141 assert(dep->succ->op == gpir_op_store_temp); 142 return -3; 143 case gpir_op_load_reg: 144 assert(dep->succ->op == gpir_op_store_reg); 145 return -2; 146 case gpir_op_load_uniform: 147 assert(dep->succ->op == gpir_op_store_temp_load_off0 || 148 dep->succ->op == gpir_op_store_temp_load_off1 || 149 dep->succ->op == gpir_op_store_temp_load_off2); 150 return -3; 151 default: 152 assert(0); 153 } 154 155 case GPIR_DEP_VREG_WRITE_AFTER_READ: 156 return 0; 157 158 case GPIR_DEP_VREG_READ_AFTER_WRITE: 159 assert(0); /* not possible, this is GPIR_DEP_INPUT */ 160 } 161 162 return 0; 163} 164 165static int gpir_max_dist_alu(gpir_dep *dep) 166{ 167 switch (dep->pred->op) { 168 case gpir_op_load_uniform: 169 case gpir_op_load_temp: 170 return 0; 171 case gpir_op_load_attribute: 172 return 1; 173 case gpir_op_load_reg: 174 if (dep->pred->sched.pos < GPIR_INSTR_SLOT_REG0_LOAD0 || 175 dep->pred->sched.pos > GPIR_INSTR_SLOT_REG0_LOAD3) 176 return 0; 177 else 178 return 1; 179 case gpir_op_exp2_impl: 180 case gpir_op_log2_impl: 181 case gpir_op_rcp_impl: 182 case gpir_op_rsqrt_impl: 183 case gpir_op_store_temp_load_off0: 184 case gpir_op_store_temp_load_off1: 185 case gpir_op_store_temp_load_off2: 186 return 1; 187 case gpir_op_mov: 188 if (dep->pred->sched.pos == GPIR_INSTR_SLOT_COMPLEX) 189 return 1; 190 else 191 return 2; 192 default: 193 return 2; 194 } 195} 196 197static int gpir_get_max_dist(gpir_dep *dep) 198{ 199 switch (dep->type) { 200 case GPIR_DEP_INPUT: 201 switch (dep->succ->op) { 202 case gpir_op_store_temp: 203 case gpir_op_store_reg: 204 case gpir_op_store_varying: 205 return 0; 206 207 default: 208 return gpir_max_dist_alu(dep); 209 } 210 211 case GPIR_DEP_OFFSET: 212 assert(dep->succ->op == gpir_op_store_temp); 213 return gpir_max_dist_alu(dep); 214 215 default: 216 return INT_MAX >> 2; /* Don't want to overflow... */ 217 } 218} 219 220static void schedule_update_distance(gpir_node *node) 221{ 222 if (gpir_node_is_leaf(node)) { 223 node->sched.dist = 0; 224 return; 225 } 226 227 gpir_node_foreach_pred(node, dep) { 228 gpir_node *pred = dep->pred; 229 230 if (pred->sched.dist < 0) 231 schedule_update_distance(pred); 232 233 int dist = pred->sched.dist + 1; 234 if (node->sched.dist < dist) 235 node->sched.dist = dist; 236 } 237} 238 239static void schedule_insert_ready_list(struct list_head *ready_list, 240 gpir_node *insert_node) 241{ 242 /* if this node is fully ready or partially ready 243 * fully ready: all successors have been scheduled 244 * partially ready: part of input successors have been scheduled 245 * 246 * either fully ready or partially ready node need be inserted to 247 * the ready list, but we only schedule a move node for partially 248 * ready node. 249 */ 250 bool ready = true, insert = false; 251 gpir_node_foreach_succ(insert_node, dep) { 252 gpir_node *succ = dep->succ; 253 if (succ->sched.instr >= 0) { 254 if (dep->type == GPIR_DEP_INPUT) 255 insert = true; 256 } 257 else 258 ready = false; 259 } 260 261 insert_node->sched.ready = ready; 262 /* for root node */ 263 insert |= ready; 264 265 if (!insert || insert_node->sched.inserted) 266 return; 267 268 struct list_head *insert_pos = ready_list; 269 list_for_each_entry(gpir_node, node, ready_list, list) { 270 if (insert_node->sched.dist > node->sched.dist) { 271 insert_pos = &node->list; 272 break; 273 } 274 } 275 276 list_addtail(&insert_node->list, insert_pos); 277 insert_node->sched.inserted = true; 278} 279 280static int gpir_get_max_start(gpir_node *node) 281{ 282 int max_start = 0; 283 284 /* find the max start instr constrainted by all successors */ 285 gpir_node_foreach_succ(node, dep) { 286 gpir_node *succ = dep->succ; 287 if (succ->sched.instr < 0) 288 continue; 289 290 int start = succ->sched.instr + gpir_get_min_dist(dep); 291 if (start > max_start) 292 max_start = start; 293 } 294 295 return max_start; 296} 297 298static int gpir_get_min_end(gpir_node *node) 299{ 300 int min_end = INT_MAX; 301 302 /* find the min end instr constrainted by all successors */ 303 gpir_node_foreach_succ(node, dep) { 304 gpir_node *succ = dep->succ; 305 if (succ->sched.instr < 0) 306 continue; 307 308 int end = succ->sched.instr + gpir_get_max_dist(dep); 309 if (end < min_end) 310 min_end = end; 311 } 312 313 return min_end; 314} 315 316static gpir_node *gpir_sched_instr_has_load(gpir_instr *instr, gpir_node *node) 317{ 318 gpir_load_node *load = gpir_node_to_load(node); 319 320 for (int i = GPIR_INSTR_SLOT_REG0_LOAD0; i <= GPIR_INSTR_SLOT_MEM_LOAD3; i++) { 321 if (!instr->slots[i]) 322 continue; 323 324 gpir_load_node *iload = gpir_node_to_load(instr->slots[i]); 325 if (load->node.op == iload->node.op && 326 load->index == iload->index && 327 load->component == iload->component) 328 return &iload->node; 329 } 330 return NULL; 331} 332 333static bool schedule_try_place_node(gpir_instr *instr, gpir_node *node) 334{ 335 if (node->type == gpir_node_type_load) { 336 gpir_node *load = gpir_sched_instr_has_load(instr, node); 337 if (load) { 338 gpir_debug("same load %d in instr %d for node %d\n", 339 load->index, instr->index, node->index); 340 341 /* not really merge two node, just fake scheduled same place */ 342 node->sched.instr = load->sched.instr; 343 node->sched.pos = load->sched.pos; 344 return true; 345 } 346 } 347 348 node->sched.instr = instr->index; 349 350 int *slots = gpir_op_infos[node->op].slots; 351 for (int i = 0; slots[i] != GPIR_INSTR_SLOT_END; i++) { 352 node->sched.pos = slots[i]; 353 if (node->sched.instr >= gpir_get_max_start(node) && 354 node->sched.instr <= gpir_get_min_end(node) && 355 gpir_instr_try_insert_node(instr, node)) 356 return true; 357 } 358 359 node->sched.instr = -1; 360 node->sched.pos = -1; 361 return false; 362} 363 364static gpir_node *schedule_create_move_node(gpir_node *node) 365{ 366 gpir_alu_node *move = gpir_node_create(node->block, gpir_op_mov); 367 if (unlikely(!move)) 368 return NULL; 369 370 move->children[0] = node; 371 move->num_child = 1; 372 373 move->node.sched.instr = -1; 374 move->node.sched.pos = -1; 375 move->node.sched.dist = node->sched.dist; 376 377 gpir_debug("create move %d for %d\n", move->node.index, node->index); 378 return &move->node; 379} 380 381static gpir_node *gpir_sched_node(gpir_instr *instr, gpir_node *node) 382{ 383 if (node->op == gpir_op_mov) { 384 gpir_node *child = gpir_node_to_alu(node)->children[0]; 385 gpir_node_foreach_succ_safe(node, dep) { 386 gpir_node *succ = dep->succ; 387 if (succ->sched.instr < 0 || 388 instr->index < succ->sched.instr + gpir_get_min_dist(dep)) { 389 gpir_node_replace_pred(dep, child); 390 if (dep->type == GPIR_DEP_INPUT) 391 gpir_node_replace_child(succ, node, child); 392 } 393 } 394 MAYBE_UNUSED bool result = schedule_try_place_node(instr, node); 395 assert(result); 396 return node; 397 } 398 else { 399 gpir_node *move = schedule_create_move_node(node); 400 list_del(&node->list); 401 node->sched.ready = false; 402 node->sched.inserted = false; 403 gpir_node_replace_succ(move, node); 404 gpir_node_add_dep(move, node, GPIR_DEP_INPUT); 405 return move; 406 } 407} 408 409static bool gpir_is_input_node(gpir_node *node) 410{ 411 gpir_node_foreach_succ(node, dep) { 412 if (dep->type == GPIR_DEP_INPUT) 413 return true; 414 } 415 return false; 416} 417 418static int gpir_get_min_scheduled_succ(gpir_node *node) 419{ 420 int min = INT_MAX; 421 gpir_node_foreach_succ(node, dep) { 422 gpir_node *succ = dep->succ; 423 if (succ->sched.instr >= 0 && dep->type == GPIR_DEP_INPUT) { 424 if (min > succ->sched.instr) 425 min = succ->sched.instr; 426 } 427 } 428 return min; 429} 430 431static gpir_node *gpir_sched_instr_pass(gpir_instr *instr, 432 struct list_head *ready_list) 433{ 434 /* fully ready node reach its max dist with any of its successor */ 435 list_for_each_entry_safe(gpir_node, node, ready_list, list) { 436 if (node->sched.ready) { 437 int end = gpir_get_min_end(node); 438 assert(end >= instr->index); 439 if (instr->index < end) 440 continue; 441 442 gpir_debug("fully ready max node %d\n", node->index); 443 444 if (schedule_try_place_node(instr, node)) 445 return node; 446 447 return gpir_sched_node(instr, node); 448 } 449 } 450 451 /* partially ready node reach its max dist with any of its successor */ 452 list_for_each_entry_safe(gpir_node, node, ready_list, list) { 453 if (!node->sched.ready) { 454 int end = gpir_get_min_end(node); 455 assert(end >= instr->index); 456 if (instr->index < end) 457 continue; 458 459 gpir_debug("partially ready max node %d\n", node->index); 460 461 return gpir_sched_node(instr, node); 462 } 463 } 464 465 /* schedule node used by previous instr when count > 5 */ 466 int count = 0; 467 gpir_node *two_slot_node = NULL; 468 list_for_each_entry(gpir_node, node, ready_list, list) { 469 if (gpir_is_input_node(node)) { 470 int min = gpir_get_min_scheduled_succ(node); 471 assert(min >= instr->index - 1); 472 if (min == instr->index - 1) { 473 if (gpir_op_infos[node->op].may_consume_two_slots) { 474 two_slot_node = node; 475 count += 2; 476 } 477 else 478 count++; 479 } 480 } 481 } 482 483 if (count > 5) { 484 /* When no slot avaible, must schedule a move for two slot node 485 * to reduce the count. This results from the dummy_m/f method. 486 */ 487 if (gpir_instr_alu_slot_is_full(instr)) { 488 assert(two_slot_node); 489 gpir_debug("instr is full, schedule move node for two slot node %d\n", 490 two_slot_node->index); 491 return gpir_sched_node(instr, two_slot_node); 492 } 493 494 /* schedule fully ready node first */ 495 list_for_each_entry(gpir_node, node, ready_list, list) { 496 if (gpir_is_input_node(node)) { 497 int min = gpir_get_min_scheduled_succ(node); 498 if (min == instr->index - 1 && node->sched.ready) { 499 gpir_debug(">5 ready node %d\n", node->index); 500 501 if (schedule_try_place_node(instr, node)) 502 return node; 503 } 504 } 505 } 506 507 /* no fully ready node be scheduled, schedule partially ready node */ 508 list_for_each_entry_safe(gpir_node, node, ready_list, list) { 509 if (gpir_is_input_node(node)) { 510 int min = gpir_get_min_scheduled_succ(node); 511 if (min == instr->index - 1 && !node->sched.ready) { 512 gpir_debug(">5 partially ready node %d\n", node->index); 513 514 return gpir_sched_node(instr, node); 515 } 516 } 517 } 518 519 /* finally schedule move for fully ready node */ 520 list_for_each_entry_safe(gpir_node, node, ready_list, list) { 521 if (gpir_is_input_node(node)) { 522 int min = gpir_get_min_scheduled_succ(node); 523 if (min == instr->index - 1 && node->sched.ready) { 524 gpir_debug(">5 fully ready move node %d\n", node->index); 525 526 return gpir_sched_node(instr, node); 527 } 528 } 529 } 530 } 531 532 /* schedule remain fully ready nodes */ 533 list_for_each_entry(gpir_node, node, ready_list, list) { 534 if (node->sched.ready) { 535 gpir_debug("remain fully ready node %d\n", node->index); 536 537 if (schedule_try_place_node(instr, node)) 538 return node; 539 } 540 } 541 542 return NULL; 543} 544 545static void schedule_print_pre_one_instr(gpir_instr *instr, 546 struct list_head *ready_list) 547{ 548 if (!(lima_debug & LIMA_DEBUG_GP)) 549 return; 550 551 printf("instr %d for ready list:", instr->index); 552 list_for_each_entry(gpir_node, node, ready_list, list) { 553 printf(" %d/%c", node->index, node->sched.ready ? 'r' : 'p'); 554 } 555 printf("\n"); 556} 557 558static void schedule_print_post_one_instr(gpir_instr *instr) 559{ 560 if (!(lima_debug & LIMA_DEBUG_GP)) 561 return; 562 563 printf("post schedule instr"); 564 for (int i = 0; i < GPIR_INSTR_SLOT_NUM; i++) { 565 if (instr->slots[i]) 566 printf(" %d/%d", i, instr->slots[i]->index); 567 } 568 printf("\n"); 569} 570 571 572static bool schedule_one_instr(gpir_block *block, struct list_head *ready_list) 573{ 574 gpir_instr *instr = gpir_instr_create(block); 575 if (unlikely(!instr)) 576 return false; 577 578 schedule_print_pre_one_instr(instr, ready_list); 579 580 while (true) { 581 gpir_node *node = gpir_sched_instr_pass(instr, ready_list); 582 if (!node) 583 break; 584 585 if (node->sched.instr < 0) 586 schedule_insert_ready_list(ready_list, node); 587 else { 588 list_del(&node->list); 589 list_add(&node->list, &block->node_list); 590 591 gpir_node_foreach_pred(node, dep) { 592 gpir_node *pred = dep->pred; 593 schedule_insert_ready_list(ready_list, pred); 594 } 595 } 596 } 597 598 schedule_print_post_one_instr(instr); 599 return true; 600} 601 602static bool schedule_block(gpir_block *block) 603{ 604 /* calculate distance */ 605 list_for_each_entry(gpir_node, node, &block->node_list, list) { 606 if (gpir_node_is_root(node)) 607 schedule_update_distance(node); 608 } 609 610 struct list_head ready_list; 611 list_inithead(&ready_list); 612 613 /* construct the ready list from root nodes */ 614 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 615 if (gpir_node_is_root(node)) 616 schedule_insert_ready_list(&ready_list, node); 617 } 618 619 list_inithead(&block->node_list); 620 while (!list_empty(&ready_list)) { 621 if (!schedule_one_instr(block, &ready_list)) 622 return false; 623 } 624 625 return true; 626} 627 628static void schedule_build_vreg_dependency(gpir_block *block) 629{ 630 gpir_node *regs[GPIR_VALUE_REG_NUM] = {0}; 631 list_for_each_entry(gpir_node, node, &block->node_list, list) { 632 /* store node has no value reg assigned */ 633 if (node->value_reg < 0) 634 continue; 635 636 gpir_node *reg = regs[node->value_reg]; 637 if (reg) { 638 gpir_node_foreach_succ(reg, dep) { 639 /* write after read dep should only apply to real 'read' */ 640 if (dep->type != GPIR_DEP_INPUT) 641 continue; 642 643 gpir_node *succ = dep->succ; 644 gpir_node_add_dep(node, succ, GPIR_DEP_VREG_WRITE_AFTER_READ); 645 } 646 } 647 regs[node->value_reg] = node; 648 } 649 650 /* merge dummy_f/m to the node created from */ 651 list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { 652 if (node->op == gpir_op_dummy_m) { 653 gpir_alu_node *alu = gpir_node_to_alu(node); 654 gpir_node *origin = alu->children[0]; 655 gpir_node *dummy_f = alu->children[1]; 656 657 gpir_node_foreach_succ(node, dep) { 658 gpir_node *succ = dep->succ; 659 /* origin and node may have same succ (by VREG/INPUT or 660 * VREG/VREG dep), so use gpir_node_add_dep() instead of 661 * gpir_node_replace_pred() */ 662 gpir_node_add_dep(succ, origin, dep->type); 663 gpir_node_replace_child(succ, node, origin); 664 } 665 gpir_node_delete(dummy_f); 666 gpir_node_delete(node); 667 } 668 } 669} 670 671static void schedule_build_preg_dependency(gpir_compiler *comp) 672{ 673 /* merge reg with the same index */ 674 gpir_reg *regs[GPIR_VALUE_REG_NUM] = {0}; 675 list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) { 676 if (!regs[reg->index]) 677 regs[reg->index] = reg; 678 else { 679 list_splicetail(®->defs_list, ®s[reg->index]->defs_list); 680 list_splicetail(®->uses_list, ®s[reg->index]->uses_list); 681 } 682 } 683 684 /* calculate physical reg read/write dependency for load/store nodes */ 685 for (int i = 0; i < GPIR_VALUE_REG_NUM; i++) { 686 gpir_reg *reg = regs[i]; 687 if (!reg) 688 continue; 689 690 /* sort reg write */ 691 struct list_head tmp_list; 692 list_replace(®->defs_list, &tmp_list); 693 list_inithead(®->defs_list); 694 list_for_each_entry_safe(gpir_store_node, store, &tmp_list, reg_link) { 695 struct list_head *insert_pos = ®->defs_list; 696 list_for_each_entry(gpir_store_node, st, ®->defs_list, reg_link) { 697 if (st->node.sched.index > store->node.sched.index) { 698 insert_pos = &st->reg_link; 699 break; 700 } 701 } 702 list_del(&store->reg_link); 703 list_addtail(&store->reg_link, insert_pos); 704 } 705 706 /* sort reg read */ 707 list_replace(®->uses_list, &tmp_list); 708 list_inithead(®->uses_list); 709 list_for_each_entry_safe(gpir_load_node, load, &tmp_list, reg_link) { 710 struct list_head *insert_pos = ®->uses_list; 711 list_for_each_entry(gpir_load_node, ld, ®->uses_list, reg_link) { 712 if (ld->node.sched.index > load->node.sched.index) { 713 insert_pos = &ld->reg_link; 714 break; 715 } 716 } 717 list_del(&load->reg_link); 718 list_addtail(&load->reg_link, insert_pos); 719 } 720 721 /* insert dependency */ 722 gpir_store_node *store = 723 list_first_entry(®->defs_list, gpir_store_node, reg_link); 724 gpir_store_node *next = store->reg_link.next != ®->defs_list ? 725 list_first_entry(&store->reg_link, gpir_store_node, reg_link) : NULL; 726 727 list_for_each_entry(gpir_load_node, load, ®->uses_list, reg_link) { 728 /* loop until load is between store and next */ 729 while (next && next->node.sched.index < load->node.sched.index) { 730 store = next; 731 next = store->reg_link.next != ®->defs_list ? 732 list_first_entry(&store->reg_link, gpir_store_node, reg_link) : NULL; 733 } 734 735 gpir_node_add_dep(&load->node, &store->node, GPIR_DEP_READ_AFTER_WRITE); 736 if (next) 737 gpir_node_add_dep(&next->node, &load->node, GPIR_DEP_WRITE_AFTER_READ); 738 } 739 } 740} 741 742static void print_statistic(gpir_compiler *comp, int save_index) 743{ 744 int num_nodes[gpir_op_num] = {0}; 745 int num_created_nodes[gpir_op_num] = {0}; 746 747 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 748 list_for_each_entry(gpir_node, node, &block->node_list, list) { 749 num_nodes[node->op]++; 750 if (node->index >= save_index) 751 num_created_nodes[node->op]++; 752 } 753 } 754 755 printf("====== gpir scheduler statistic ======\n"); 756 printf("---- how many nodes are scheduled ----\n"); 757 int n = 0, l = 0; 758 for (int i = 0; i < gpir_op_num; i++) { 759 if (num_nodes[i]) { 760 printf("%10s:%-6d", gpir_op_infos[i].name, num_nodes[i]); 761 n += num_nodes[i]; 762 if (!(++l % 4)) 763 printf("\n"); 764 } 765 } 766 if (l % 4) 767 printf("\n"); 768 printf("\ntotal: %d\n", n); 769 770 printf("---- how many nodes are created ----\n"); 771 n = l = 0; 772 for (int i = 0; i < gpir_op_num; i++) { 773 if (num_created_nodes[i]) { 774 printf("%10s:%-6d", gpir_op_infos[i].name, num_created_nodes[i]); 775 n += num_created_nodes[i]; 776 if (!(++l % 4)) 777 printf("\n"); 778 } 779 } 780 if (l % 4) 781 printf("\n"); 782 printf("\ntotal: %d\n", n); 783 printf("------------------------------------\n"); 784} 785 786bool gpir_schedule_prog(gpir_compiler *comp) 787{ 788 int save_index = comp->cur_index; 789 790 /* init schedule info */ 791 int index = 0; 792 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 793 block->sched.instr_index = 0; 794 list_for_each_entry(gpir_node, node, &block->node_list, list) { 795 node->sched.instr = -1; 796 node->sched.pos = -1; 797 node->sched.index = index++; 798 node->sched.dist = -1; 799 node->sched.ready = false; 800 node->sched.inserted = false; 801 } 802 } 803 804 /* build fake/virtual dependency */ 805 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 806 schedule_build_vreg_dependency(block); 807 } 808 schedule_build_preg_dependency(comp); 809 810 //gpir_debug("after scheduler build reg dependency\n"); 811 //gpir_node_print_prog_dep(comp); 812 813 list_for_each_entry(gpir_block, block, &comp->block_list, list) { 814 if (!schedule_block(block)) { 815 gpir_error("fail schedule block\n"); 816 return false; 817 } 818 } 819 820 if (lima_debug & LIMA_DEBUG_GP) { 821 print_statistic(comp, save_index); 822 gpir_instr_print_prog(comp); 823 } 824 825 return true; 826} 827