1/* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27#define IFC_DEBUG 0 28 29#if IFC_DEBUG 30#define IFC_DUMP(q) do { q } while (0) 31#else 32#define IFC_DUMP(q) 33#endif 34 35#include "sb_shader.h" 36#include "sb_pass.h" 37 38namespace r600_sb { 39 40int if_conversion::run() { 41 42 regions_vec &rv = sh.get_regions(); 43 44 unsigned converted = 0; 45 for (regions_vec::reverse_iterator I = rv.rbegin(); I != rv.rend(); ) { 46 region_node *r = *I; 47 if (run_on(r)) { 48 I = regions_vec::reverse_iterator(rv.erase((++I).base())); 49 ++converted; 50 } else 51 ++I; 52 } 53 return 0; 54} 55 56void if_conversion::convert_kill_instructions(region_node *r, 57 value *em, bool branch, 58 container_node *c) { 59 value *cnd = NULL; 60 61 for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) { 62 N = I + 1; 63 64 if (!I->is_alu_inst()) 65 continue; 66 67 alu_node *a = static_cast<alu_node*>(*I); 68 unsigned flags = a->bc.op_ptr->flags; 69 70 if (!(flags & AF_KILL)) 71 continue; 72 73 // ignore predicated or non-const kill instructions 74 if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const()) 75 continue; 76 77 literal l0 = a->src[0]->literal_value; 78 literal l1 = a->src[1]->literal_value; 79 80 expr_handler::apply_alu_src_mod(a->bc, 0, l0); 81 expr_handler::apply_alu_src_mod(a->bc, 1, l1); 82 83 if (expr_handler::evaluate_condition(flags, l0, l1)) { 84 // kill with constant 'true' condition, we'll convert it to the 85 // conditional kill outside of the if-then-else block 86 87 a->remove(); 88 89 if (!cnd) { 90 cnd = get_select_value_for_em(sh, em); 91 } else { 92 // more than one kill with the same condition, just remove it 93 continue; 94 } 95 96 r->insert_before(a); 97 a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT); 98 99 a->src[0] = cnd; 100 a->src[1] = sh.get_const_value(0); 101 // clear modifiers 102 memset(&a->bc.src[0], 0, sizeof(bc_alu_src)); 103 memset(&a->bc.src[1], 0, sizeof(bc_alu_src)); 104 } else { 105 // kill with constant 'false' condition, this shouldn't happen 106 // but remove it anyway 107 a->remove(); 108 } 109 } 110} 111 112bool if_conversion::check_and_convert(region_node *r) { 113 114 depart_node *nd1 = static_cast<depart_node*>(r->first); 115 if (!nd1->is_depart() || nd1->target != r) 116 return false; 117 if_node *nif = static_cast<if_node*>(nd1->first); 118 if (!nif->is_if()) 119 return false; 120 depart_node *nd2 = static_cast<depart_node*>(nif->first); 121 if (!nd2->is_depart() || nd2->target != r) 122 return false; 123 124 value* &em = nif->cond; 125 126 node_stats s; 127 128 r->collect_stats(s); 129 130 IFC_DUMP( 131 sblog << "ifcvt: region " << r->region_id << " :\n"; 132 s.dump(); 133 ); 134 135 if (s.region_count || s.fetch_count || s.alu_kill_count || 136 s.if_count != 1 || s.repeat_count || s.uses_ar) 137 return false; 138 139 unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count; 140 141 // if_conversion allows to eliminate JUMP-ALU_POP_AFTER or 142 // JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions 143 // are eliminated. According to the docs, cost of CF instruction is 144 // equal to ~40 ALU VLIW instructions (instruction groups), 145 // so we have eliminated cost equal to ~120 groups in total. 146 // Let's also assume that we have avg 3 ALU instructions per group, 147 // This means that potential eliminated cost is about 360 single alu inst. 148 // On the other hand, we are speculatively executing conditional code now, 149 // so we are increasing the cost in some cases. In the worst case, we'll 150 // have to execute real_alu_count additional alu instructions instead of 151 // jumping over them. Let's assume for now that average added cost is 152 // 153 // (0.9 * real_alu_count) 154 // 155 // So we should perform if_conversion if 156 // 157 // (0.9 * real_alu_count) < 360, or 158 // 159 // real_alu_count < 400 160 // 161 // So if real_alu_count is more than 400, than we think that if_conversion 162 // doesn't make sense. 163 164 // FIXME: We can use more precise heuristic, taking into account sizes of 165 // the branches and their probability instead of total size. 166 // Another way to improve this is to consider the number of the groups 167 // instead of the number of instructions (taking into account actual VLIW 168 // packing). 169 // (Currently we don't know anything about packing at this stage, but 170 // probably we can make some more precise estimations anyway) 171 172 if (real_alu_count > 400) 173 return false; 174 175 IFC_DUMP( sblog << "if_cvt: processing...\n"; ); 176 177 value *select = get_select_value_for_em(sh, em); 178 179 if (!select) 180 return false; 181 182 for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) { 183 node *n = *I; 184 185 alu_node *ns = convert_phi(select, n); 186 187 if (ns) 188 r->insert_after(ns); 189 } 190 191 nd2->expand(); 192 nif->expand(); 193 nd1->expand(); 194 r->expand(); 195 196 return true; 197} 198 199bool if_conversion::run_on(region_node* r) { 200 201 if (r->dep_count() != 2 || r->rep_count() != 1) 202 return false; 203 204 depart_node *nd1 = static_cast<depart_node*>(r->first); 205 if (!nd1->is_depart()) 206 return false; 207 if_node *nif = static_cast<if_node*>(nd1->first); 208 if (!nif->is_if()) 209 return false; 210 depart_node *nd2 = static_cast<depart_node*>(nif->first); 211 if (!nd2->is_depart()) 212 return false; 213 214 value* &em = nif->cond; 215 216 convert_kill_instructions(r, em, true, nd2); 217 convert_kill_instructions(r, em, false, nd1); 218 219 if (check_and_convert(r)) 220 return true; 221 222 if (nd2->empty() && nif->next) { 223 // empty true branch, non-empty false branch 224 // we'll invert it to get rid of 'else' 225 226 assert(em && em->def); 227 228 alu_node *predset = static_cast<alu_node*>(em->def); 229 230 // create clone of PREDSET instruction with inverted condition. 231 // PREDSET has 3 dst operands in our IR (value written to gpr, 232 // predicate value and exec mask value), we'll split it such that 233 // new PREDSET will define exec mask value only, and two others will 234 // be defined in the old PREDSET (if they are not used then DCE will 235 // simply remove old PREDSET). 236 237 alu_node *newpredset = sh.clone(predset); 238 predset->insert_after(newpredset); 239 240 predset->dst[2] = NULL; 241 242 newpredset->dst[0] = NULL; 243 newpredset->dst[1] = NULL; 244 245 em->def = newpredset; 246 247 unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK; 248 unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK; 249 bool swapargs = false; 250 251 cc = invert_setcc_condition(cc, swapargs); 252 253 if (swapargs) { 254 std::swap(newpredset->src[0], newpredset->src[1]); 255 std::swap(newpredset->bc.src[0], newpredset->bc.src[1]); 256 } 257 258 unsigned newopcode = get_predsetcc_op(cc, cmptype); 259 newpredset->bc.set_op(newopcode); 260 261 // move the code from the 'false' branch ('else') to the 'true' branch 262 nd2->move(nif->next, NULL); 263 264 // swap phi operands 265 for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; 266 ++I) { 267 node *p = *I; 268 assert(p->src.size() == 2); 269 std::swap(p->src[0], p->src[1]); 270 } 271 } 272 273 return false; 274} 275 276alu_node* if_conversion::convert_phi(value* select, node* phi) { 277 assert(phi->dst.size() == 1 || phi->src.size() == 2); 278 279 value *d = phi->dst[0]; 280 value *v1 = phi->src[0]; 281 value *v2 = phi->src[1]; 282 283 assert(d); 284 285 if (!d->is_any_gpr()) 286 return NULL; 287 288 if (v1->is_undef()) { 289 if (v2->is_undef()) { 290 return NULL; 291 } else { 292 return sh.create_mov(d, v2); 293 } 294 } else if (v2->is_undef()) 295 return sh.create_mov(d, v1); 296 297 alu_node* n = sh.create_alu(); 298 299 n->bc.set_op(ALU_OP3_CNDE_INT); 300 n->dst.push_back(d); 301 n->src.push_back(select); 302 n->src.push_back(v1); 303 n->src.push_back(v2); 304 305 return n; 306} 307 308} // namespace r600_sb 309