1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2016 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "nir_phi_builder.h" 25b8e80941Smrg#include "nir/nir_vla.h" 26b8e80941Smrg 27b8e80941Smrgstruct nir_phi_builder { 28b8e80941Smrg nir_shader *shader; 29b8e80941Smrg nir_function_impl *impl; 30b8e80941Smrg 31b8e80941Smrg /* Copied from the impl for easy access */ 32b8e80941Smrg unsigned num_blocks; 33b8e80941Smrg 34b8e80941Smrg /* Array of all blocks indexed by block->index. */ 35b8e80941Smrg nir_block **blocks; 36b8e80941Smrg 37b8e80941Smrg /* Hold on to the values so we can easily iterate over them. */ 38b8e80941Smrg struct exec_list values; 39b8e80941Smrg 40b8e80941Smrg /* Worklist for phi adding */ 41b8e80941Smrg unsigned iter_count; 42b8e80941Smrg unsigned *work; 43b8e80941Smrg nir_block **W; 44b8e80941Smrg}; 45b8e80941Smrg 46b8e80941Smrg#define NEEDS_PHI ((nir_ssa_def *)(intptr_t)-1) 47b8e80941Smrg 48b8e80941Smrgstruct nir_phi_builder_value { 49b8e80941Smrg struct exec_node node; 50b8e80941Smrg 51b8e80941Smrg struct nir_phi_builder *builder; 52b8e80941Smrg 53b8e80941Smrg /* Needed so we can create phis and undefs */ 54b8e80941Smrg unsigned num_components; 55b8e80941Smrg unsigned bit_size; 56b8e80941Smrg 57b8e80941Smrg /* The list of phi nodes associated with this value. Phi nodes are not 58b8e80941Smrg * added directly. Instead, they are created, the instr->block pointer 59b8e80941Smrg * set, and then added to this list. Later, in phi_builder_finish, we 60b8e80941Smrg * set up their sources and add them to the top of their respective 61b8e80941Smrg * blocks. 62b8e80941Smrg */ 63b8e80941Smrg struct exec_list phis; 64b8e80941Smrg 65b8e80941Smrg /* Array of SSA defs, indexed by block. For each block, this array has has 66b8e80941Smrg * one of three types of values: 67b8e80941Smrg * 68b8e80941Smrg * - NULL. Indicates that there is no known definition in this block. If 69b8e80941Smrg * you need to find one, look at the block's immediate dominator. 70b8e80941Smrg * 71b8e80941Smrg * - NEEDS_PHI. Indicates that the block may need a phi node but none has 72b8e80941Smrg * been created yet. If a def is requested for a block, a phi will need 73b8e80941Smrg * to be created. 74b8e80941Smrg * 75b8e80941Smrg * - A regular SSA def. This will be either the result of a phi node or 76b8e80941Smrg * one of the defs provided by nir_phi_builder_value_set_blocK_def(). 77b8e80941Smrg */ 78b8e80941Smrg struct hash_table ht; 79b8e80941Smrg}; 80b8e80941Smrg 81b8e80941Smrg/** 82b8e80941Smrg * Convert a block index into a value that can be used as a key for a hash table 83b8e80941Smrg * 84b8e80941Smrg * The hash table functions want a pointer that is not \c NULL. 85b8e80941Smrg * _mesa_hash_pointer drops the two least significant bits, but that's where 86b8e80941Smrg * most of our data likely is. Shift by 2 and add 1 to make everything happy. 87b8e80941Smrg */ 88b8e80941Smrg#define INDEX_TO_KEY(x) ((void *)(uintptr_t) ((x << 2) + 1)) 89b8e80941Smrg 90b8e80941Smrgstruct nir_phi_builder * 91b8e80941Smrgnir_phi_builder_create(nir_function_impl *impl) 92b8e80941Smrg{ 93b8e80941Smrg struct nir_phi_builder *pb = rzalloc(NULL, struct nir_phi_builder); 94b8e80941Smrg 95b8e80941Smrg pb->shader = impl->function->shader; 96b8e80941Smrg pb->impl = impl; 97b8e80941Smrg 98b8e80941Smrg assert(impl->valid_metadata & (nir_metadata_block_index | 99b8e80941Smrg nir_metadata_dominance)); 100b8e80941Smrg 101b8e80941Smrg pb->num_blocks = impl->num_blocks; 102b8e80941Smrg pb->blocks = ralloc_array(pb, nir_block *, pb->num_blocks); 103b8e80941Smrg nir_foreach_block(block, impl) { 104b8e80941Smrg pb->blocks[block->index] = block; 105b8e80941Smrg } 106b8e80941Smrg 107b8e80941Smrg exec_list_make_empty(&pb->values); 108b8e80941Smrg 109b8e80941Smrg pb->iter_count = 0; 110b8e80941Smrg pb->work = rzalloc_array(pb, unsigned, pb->num_blocks); 111b8e80941Smrg pb->W = ralloc_array(pb, nir_block *, pb->num_blocks); 112b8e80941Smrg 113b8e80941Smrg return pb; 114b8e80941Smrg} 115b8e80941Smrg 116b8e80941Smrgstruct nir_phi_builder_value * 117b8e80941Smrgnir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components, 118b8e80941Smrg unsigned bit_size, const BITSET_WORD *defs) 119b8e80941Smrg{ 120b8e80941Smrg struct nir_phi_builder_value *val; 121b8e80941Smrg unsigned i, w_start = 0, w_end = 0; 122b8e80941Smrg 123b8e80941Smrg val = rzalloc_size(pb, sizeof(*val)); 124b8e80941Smrg val->builder = pb; 125b8e80941Smrg val->num_components = num_components; 126b8e80941Smrg val->bit_size = bit_size; 127b8e80941Smrg exec_list_make_empty(&val->phis); 128b8e80941Smrg exec_list_push_tail(&pb->values, &val->node); 129b8e80941Smrg 130b8e80941Smrg _mesa_hash_table_init(&val->ht, pb, _mesa_hash_pointer, 131b8e80941Smrg _mesa_key_pointer_equal); 132b8e80941Smrg 133b8e80941Smrg pb->iter_count++; 134b8e80941Smrg 135b8e80941Smrg BITSET_WORD tmp; 136b8e80941Smrg BITSET_FOREACH_SET(i, tmp, defs, pb->num_blocks) { 137b8e80941Smrg if (pb->work[i] < pb->iter_count) 138b8e80941Smrg pb->W[w_end++] = pb->blocks[i]; 139b8e80941Smrg pb->work[i] = pb->iter_count; 140b8e80941Smrg } 141b8e80941Smrg 142b8e80941Smrg while (w_start != w_end) { 143b8e80941Smrg nir_block *cur = pb->W[w_start++]; 144b8e80941Smrg set_foreach(cur->dom_frontier, dom_entry) { 145b8e80941Smrg nir_block *next = (nir_block *) dom_entry->key; 146b8e80941Smrg 147b8e80941Smrg /* If there's more than one return statement, then the end block 148b8e80941Smrg * can be a join point for some definitions. However, there are 149b8e80941Smrg * no instructions in the end block, so nothing would use those 150b8e80941Smrg * phi nodes. Of course, we couldn't place those phi nodes 151b8e80941Smrg * anyways due to the restriction of having no instructions in the 152b8e80941Smrg * end block... 153b8e80941Smrg */ 154b8e80941Smrg if (next == pb->impl->end_block) 155b8e80941Smrg continue; 156b8e80941Smrg 157b8e80941Smrg if (_mesa_hash_table_search(&val->ht, INDEX_TO_KEY(next->index)) == NULL) { 158b8e80941Smrg /* Instead of creating a phi node immediately, we simply set the 159b8e80941Smrg * value to the magic value NEEDS_PHI. Later, we create phi nodes 160b8e80941Smrg * on demand in nir_phi_builder_value_get_block_def(). 161b8e80941Smrg */ 162b8e80941Smrg nir_phi_builder_value_set_block_def(val, next, NEEDS_PHI); 163b8e80941Smrg 164b8e80941Smrg if (pb->work[next->index] < pb->iter_count) { 165b8e80941Smrg pb->work[next->index] = pb->iter_count; 166b8e80941Smrg pb->W[w_end++] = next; 167b8e80941Smrg } 168b8e80941Smrg } 169b8e80941Smrg } 170b8e80941Smrg } 171b8e80941Smrg 172b8e80941Smrg return val; 173b8e80941Smrg} 174b8e80941Smrg 175b8e80941Smrgvoid 176b8e80941Smrgnir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val, 177b8e80941Smrg nir_block *block, nir_ssa_def *def) 178b8e80941Smrg{ 179b8e80941Smrg _mesa_hash_table_insert(&val->ht, INDEX_TO_KEY(block->index), def); 180b8e80941Smrg} 181b8e80941Smrg 182b8e80941Smrgnir_ssa_def * 183b8e80941Smrgnir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val, 184b8e80941Smrg nir_block *block) 185b8e80941Smrg{ 186b8e80941Smrg /* Crawl up the dominance tree and find the closest dominator for which we 187b8e80941Smrg * have a valid ssa_def, if any. 188b8e80941Smrg */ 189b8e80941Smrg nir_block *dom = block; 190b8e80941Smrg struct hash_entry *he = NULL; 191b8e80941Smrg 192b8e80941Smrg while (dom != NULL) { 193b8e80941Smrg he = _mesa_hash_table_search(&val->ht, INDEX_TO_KEY(dom->index)); 194b8e80941Smrg if (he != NULL) 195b8e80941Smrg break; 196b8e80941Smrg 197b8e80941Smrg dom = dom->imm_dom; 198b8e80941Smrg } 199b8e80941Smrg 200b8e80941Smrg /* Exactly one of (he != NULL) and (dom == NULL) must be true. */ 201b8e80941Smrg assert((he != NULL) != (dom == NULL)); 202b8e80941Smrg 203b8e80941Smrg nir_ssa_def *def; 204b8e80941Smrg if (dom == NULL) { 205b8e80941Smrg /* No dominator means either that we crawled to the top without ever 206b8e80941Smrg * finding a definition or that this block is unreachable. In either 207b8e80941Smrg * case, the value is undefined so we need an SSA undef. 208b8e80941Smrg */ 209b8e80941Smrg nir_ssa_undef_instr *undef = 210b8e80941Smrg nir_ssa_undef_instr_create(val->builder->shader, 211b8e80941Smrg val->num_components, 212b8e80941Smrg val->bit_size); 213b8e80941Smrg nir_instr_insert(nir_before_cf_list(&val->builder->impl->body), 214b8e80941Smrg &undef->instr); 215b8e80941Smrg def = &undef->def; 216b8e80941Smrg } else if (he->data == NEEDS_PHI) { 217b8e80941Smrg /* The magic value NEEDS_PHI indicates that the block needs a phi node 218b8e80941Smrg * but none has been created. We need to create one now so we can 219b8e80941Smrg * return it to the caller. 220b8e80941Smrg * 221b8e80941Smrg * Because a phi node may use SSA defs that it does not dominate (this 222b8e80941Smrg * happens in loops), we do not yet have enough information to fully 223b8e80941Smrg * fill out the phi node. Instead, the phi nodes we create here will be 224b8e80941Smrg * empty (have no sources) and won't actually be placed in the block's 225b8e80941Smrg * instruction list yet. Later, in nir_phi_builder_finish(), we walk 226b8e80941Smrg * over all of the phi instructions, fill out the sources lists, and 227b8e80941Smrg * place them at the top of their respective block's instruction list. 228b8e80941Smrg * 229b8e80941Smrg * Creating phi nodes on-demand allows us to avoid creating dead phi 230b8e80941Smrg * nodes that will just get deleted later. While this probably isn't a 231b8e80941Smrg * big win for a full into-SSA pass, other users may use the phi builder 232b8e80941Smrg * to make small SSA form repairs where most of the phi nodes will never 233b8e80941Smrg * be used. 234b8e80941Smrg */ 235b8e80941Smrg nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader); 236b8e80941Smrg nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components, 237b8e80941Smrg val->bit_size, NULL); 238b8e80941Smrg phi->instr.block = dom; 239b8e80941Smrg exec_list_push_tail(&val->phis, &phi->instr.node); 240b8e80941Smrg def = &phi->dest.ssa; 241b8e80941Smrg he->data = def; 242b8e80941Smrg } else { 243b8e80941Smrg /* In this case, we have an actual SSA def. It's either the result of a 244b8e80941Smrg * phi node created by the case above or one passed to us through 245b8e80941Smrg * nir_phi_builder_value_set_block_def(). 246b8e80941Smrg */ 247b8e80941Smrg def = (struct nir_ssa_def *) he->data; 248b8e80941Smrg } 249b8e80941Smrg 250b8e80941Smrg /* Walk the chain and stash the def in all of the applicable blocks. We do 251b8e80941Smrg * this for two reasons: 252b8e80941Smrg * 253b8e80941Smrg * 1) To speed up lookup next time even if the next time is called from a 254b8e80941Smrg * block that is not dominated by this one. 255b8e80941Smrg * 2) To avoid unneeded recreation of phi nodes and undefs. 256b8e80941Smrg */ 257b8e80941Smrg for (dom = block; dom != NULL; dom = dom->imm_dom) { 258b8e80941Smrg if (_mesa_hash_table_search(&val->ht, INDEX_TO_KEY(dom->index)) != NULL) 259b8e80941Smrg break; 260b8e80941Smrg 261b8e80941Smrg nir_phi_builder_value_set_block_def(val, dom, def); 262b8e80941Smrg } 263b8e80941Smrg 264b8e80941Smrg return def; 265b8e80941Smrg} 266b8e80941Smrg 267b8e80941Smrgstatic int 268b8e80941Smrgcompare_blocks(const void *_a, const void *_b) 269b8e80941Smrg{ 270b8e80941Smrg const nir_block * const * a = _a; 271b8e80941Smrg const nir_block * const * b = _b; 272b8e80941Smrg 273b8e80941Smrg return (*a)->index - (*b)->index; 274b8e80941Smrg} 275b8e80941Smrg 276b8e80941Smrgvoid 277b8e80941Smrgnir_phi_builder_finish(struct nir_phi_builder *pb) 278b8e80941Smrg{ 279b8e80941Smrg const unsigned num_blocks = pb->num_blocks; 280b8e80941Smrg NIR_VLA(nir_block *, preds, num_blocks); 281b8e80941Smrg 282b8e80941Smrg foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) { 283b8e80941Smrg /* We treat the linked list of phi nodes like a worklist. The list is 284b8e80941Smrg * pre-populated by calls to nir_phi_builder_value_get_block_def() that 285b8e80941Smrg * create phi nodes. As we fill in the sources of phi nodes, more may 286b8e80941Smrg * be created and are added to the end of the list. 287b8e80941Smrg * 288b8e80941Smrg * Because we are adding and removing phi nodes from the list as we go, 289b8e80941Smrg * we can't iterate over it normally. Instead, we just iterate until 290b8e80941Smrg * the list is empty. 291b8e80941Smrg */ 292b8e80941Smrg while (!exec_list_is_empty(&val->phis)) { 293b8e80941Smrg struct exec_node *head = exec_list_get_head(&val->phis); 294b8e80941Smrg nir_phi_instr *phi = exec_node_data(nir_phi_instr, head, instr.node); 295b8e80941Smrg assert(phi->instr.type == nir_instr_type_phi); 296b8e80941Smrg 297b8e80941Smrg exec_node_remove(&phi->instr.node); 298b8e80941Smrg 299b8e80941Smrg /* Construct an array of predecessors. We sort it to ensure 300b8e80941Smrg * determinism in the phi insertion algorithm. 301b8e80941Smrg * 302b8e80941Smrg * XXX: Calling qsort this many times seems expensive. 303b8e80941Smrg */ 304b8e80941Smrg int num_preds = 0; 305b8e80941Smrg set_foreach(phi->instr.block->predecessors, entry) 306b8e80941Smrg preds[num_preds++] = (nir_block *)entry->key; 307b8e80941Smrg qsort(preds, num_preds, sizeof(*preds), compare_blocks); 308b8e80941Smrg 309b8e80941Smrg for (unsigned i = 0; i < num_preds; i++) { 310b8e80941Smrg nir_phi_src *src = ralloc(phi, nir_phi_src); 311b8e80941Smrg src->pred = preds[i]; 312b8e80941Smrg src->src = nir_src_for_ssa( 313b8e80941Smrg nir_phi_builder_value_get_block_def(val, preds[i])); 314b8e80941Smrg exec_list_push_tail(&phi->srcs, &src->node); 315b8e80941Smrg } 316b8e80941Smrg 317b8e80941Smrg nir_instr_insert(nir_before_block(phi->instr.block), &phi->instr); 318b8e80941Smrg } 319b8e80941Smrg } 320b8e80941Smrg 321b8e80941Smrg ralloc_free(pb); 322b8e80941Smrg} 323