1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_fs.h" 25#include "brw_cfg.h" 26#include "brw_eu.h" 27 28/** @file brw_fs_cmod_propagation.cpp 29 * 30 * Implements a pass that propagates the conditional modifier from a CMP x 0.0 31 * instruction into the instruction that generated x. For instance, in this 32 * sequence 33 * 34 * add(8) g70<1>F g69<8,8,1>F 4096F 35 * cmp.ge.f0(8) null g70<8,8,1>F 0F 36 * 37 * we can do the comparison as part of the ADD instruction directly: 38 * 39 * add.ge.f0(8) g70<1>F g69<8,8,1>F 4096F 40 * 41 * If there had been a use of the flag register and another CMP using g70 42 * 43 * add.ge.f0(8) g70<1>F g69<8,8,1>F 4096F 44 * (+f0) sel(8) g71<F> g72<8,8,1>F g73<8,8,1>F 45 * cmp.ge.f0(8) null g70<8,8,1>F 0F 46 * 47 * we can recognize that the CMP is generating the flag value that already 48 * exists and therefore remove the instruction. 49 */ 50 51static bool 52cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block, 53 fs_inst *inst) 54{ 55 bool read_flag = false; 56 57 foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { 58 if (scan_inst->opcode == BRW_OPCODE_ADD && 59 !scan_inst->is_partial_write() && 60 scan_inst->exec_size == inst->exec_size) { 61 bool negate; 62 63 /* A CMP is basically a subtraction. The result of the 64 * subtraction must be the same as the result of the addition. 65 * This means that one of the operands must be negated. So (a + 66 * b) vs (a == -b) or (a + -b) vs (a == b). 67 */ 68 if ((inst->src[0].equals(scan_inst->src[0]) && 69 inst->src[1].negative_equals(scan_inst->src[1])) || 70 (inst->src[0].equals(scan_inst->src[1]) && 71 inst->src[1].negative_equals(scan_inst->src[0]))) { 72 negate = false; 73 } else if ((inst->src[0].negative_equals(scan_inst->src[0]) && 74 inst->src[1].equals(scan_inst->src[1])) || 75 (inst->src[0].negative_equals(scan_inst->src[1]) && 76 inst->src[1].equals(scan_inst->src[0]))) { 77 negate = true; 78 } else { 79 goto not_match; 80 } 81 82 /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods": 83 * 84 * * Note that the [post condition signal] bits generated at 85 * the output of a compute are before the .sat. 86 * 87 * So we don't have to bail if scan_inst has saturate. 88 */ 89 /* Otherwise, try propagating the conditional. */ 90 const enum brw_conditional_mod cond = 91 negate ? brw_swap_cmod(inst->conditional_mod) 92 : inst->conditional_mod; 93 94 if (scan_inst->can_do_cmod() && 95 ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || 96 scan_inst->conditional_mod == cond)) { 97 scan_inst->conditional_mod = cond; 98 inst->remove(block); 99 return true; 100 } 101 break; 102 } 103 104 not_match: 105 if (scan_inst->flags_written()) 106 break; 107 108 read_flag = read_flag || scan_inst->flags_read(devinfo); 109 } 110 111 return false; 112} 113 114/** 115 * Propagate conditional modifiers from NOT instructions 116 * 117 * Attempt to convert sequences like 118 * 119 * or(8) g78<8,8,1> g76<8,8,1>UD g77<8,8,1>UD 120 * ... 121 * not.nz.f0(8) null g78<8,8,1>UD 122 * 123 * into 124 * 125 * or.z.f0(8) g78<8,8,1> g76<8,8,1>UD g77<8,8,1>UD 126 */ 127static bool 128cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block, 129 fs_inst *inst) 130{ 131 const enum brw_conditional_mod cond = brw_negate_cmod(inst->conditional_mod); 132 bool read_flag = false; 133 134 if (cond != BRW_CONDITIONAL_Z && cond != BRW_CONDITIONAL_NZ) 135 return false; 136 137 foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { 138 if (regions_overlap(scan_inst->dst, scan_inst->size_written, 139 inst->src[0], inst->size_read(0))) { 140 if (scan_inst->opcode != BRW_OPCODE_OR && 141 scan_inst->opcode != BRW_OPCODE_AND) 142 break; 143 144 if (scan_inst->is_partial_write() || 145 scan_inst->dst.offset != inst->src[0].offset || 146 scan_inst->exec_size != inst->exec_size) 147 break; 148 149 if (scan_inst->can_do_cmod() && 150 ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || 151 scan_inst->conditional_mod == cond)) { 152 scan_inst->conditional_mod = cond; 153 inst->remove(block); 154 return true; 155 } 156 break; 157 } 158 159 if (scan_inst->flags_written()) 160 break; 161 162 read_flag = read_flag || scan_inst->flags_read(devinfo); 163 } 164 165 return false; 166} 167 168static bool 169opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) 170{ 171 bool progress = false; 172 int ip = block->end_ip + 1; 173 174 foreach_inst_in_block_reverse_safe(fs_inst, inst, block) { 175 ip--; 176 177 if ((inst->opcode != BRW_OPCODE_AND && 178 inst->opcode != BRW_OPCODE_CMP && 179 inst->opcode != BRW_OPCODE_MOV && 180 inst->opcode != BRW_OPCODE_NOT) || 181 inst->predicate != BRW_PREDICATE_NONE || 182 !inst->dst.is_null() || 183 (inst->src[0].file != VGRF && inst->src[0].file != ATTR && 184 inst->src[0].file != UNIFORM)) 185 continue; 186 187 /* An ABS source modifier can only be handled when processing a compare 188 * with a value other than zero. 189 */ 190 if (inst->src[0].abs && 191 (inst->opcode != BRW_OPCODE_CMP || inst->src[1].is_zero())) 192 continue; 193 194 /* Only an AND.NZ can be propagated. Many AND.Z instructions are 195 * generated (for ir_unop_not in fs_visitor::emit_bool_to_cond_code). 196 * Propagating those would require inverting the condition on the CMP. 197 * This changes both the flag value and the register destination of the 198 * CMP. That result may be used elsewhere, so we can't change its value 199 * on a whim. 200 */ 201 if (inst->opcode == BRW_OPCODE_AND && 202 !(inst->src[1].is_one() && 203 inst->conditional_mod == BRW_CONDITIONAL_NZ && 204 !inst->src[0].negate)) 205 continue; 206 207 if (inst->opcode == BRW_OPCODE_MOV && 208 inst->conditional_mod != BRW_CONDITIONAL_NZ) 209 continue; 210 211 /* A CMP with a second source of zero can match with anything. A CMP 212 * with a second source that is not zero can only match with an ADD 213 * instruction. 214 * 215 * Only apply this optimization to float-point sources. It can fail for 216 * integers. For inputs a = 0x80000000, b = 4, int(0x80000000) < 4, but 217 * int(0x80000000) - 4 overflows and results in 0x7ffffffc. that's not 218 * less than zero, so the flags get set differently than for (a < b). 219 */ 220 if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) { 221 if (brw_reg_type_is_floating_point(inst->src[0].type) && 222 cmod_propagate_cmp_to_add(devinfo, block, inst)) 223 progress = true; 224 225 continue; 226 } 227 228 if (inst->opcode == BRW_OPCODE_NOT) { 229 progress = cmod_propagate_not(devinfo, block, inst) || progress; 230 continue; 231 } 232 233 bool read_flag = false; 234 foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { 235 if (regions_overlap(scan_inst->dst, scan_inst->size_written, 236 inst->src[0], inst->size_read(0))) { 237 if (scan_inst->is_partial_write() || 238 scan_inst->dst.offset != inst->src[0].offset || 239 scan_inst->exec_size != inst->exec_size) 240 break; 241 242 /* CMP's result is the same regardless of dest type. */ 243 if (inst->conditional_mod == BRW_CONDITIONAL_NZ && 244 scan_inst->opcode == BRW_OPCODE_CMP && 245 brw_reg_type_is_integer(inst->dst.type)) { 246 inst->remove(block); 247 progress = true; 248 break; 249 } 250 251 /* If the AND wasn't handled by the previous case, it isn't safe 252 * to remove it. 253 */ 254 if (inst->opcode == BRW_OPCODE_AND) 255 break; 256 257 /* Not safe to use inequality operators if the types are different 258 */ 259 if (scan_inst->dst.type != inst->src[0].type && 260 inst->conditional_mod != BRW_CONDITIONAL_Z && 261 inst->conditional_mod != BRW_CONDITIONAL_NZ) 262 break; 263 264 /* Comparisons operate differently for ints and floats */ 265 if (scan_inst->dst.type != inst->dst.type) { 266 /* Comparison result may be altered if the bit-size changes 267 * since that affects range, denorms, etc 268 */ 269 if (type_sz(scan_inst->dst.type) != type_sz(inst->dst.type)) 270 break; 271 272 /* We should propagate from a MOV to another instruction in a 273 * sequence like: 274 * 275 * and(16) g31<1>UD g20<8,8,1>UD g22<8,8,1>UD 276 * mov.nz.f0(16) null<1>F g31<8,8,1>D 277 */ 278 if (inst->opcode == BRW_OPCODE_MOV) { 279 if ((inst->src[0].type != BRW_REGISTER_TYPE_D && 280 inst->src[0].type != BRW_REGISTER_TYPE_UD) || 281 (scan_inst->dst.type != BRW_REGISTER_TYPE_D && 282 scan_inst->dst.type != BRW_REGISTER_TYPE_UD)) { 283 break; 284 } 285 } else if (brw_reg_type_is_floating_point(scan_inst->dst.type) != 286 brw_reg_type_is_floating_point(inst->dst.type)) { 287 break; 288 } 289 } 290 291 /* If the instruction generating inst's source also wrote the 292 * flag, and inst is doing a simple .nz comparison, then inst 293 * is redundant - the appropriate value is already in the flag 294 * register. Delete inst. 295 */ 296 if (inst->conditional_mod == BRW_CONDITIONAL_NZ && 297 !inst->src[0].negate && 298 scan_inst->flags_written()) { 299 inst->remove(block); 300 progress = true; 301 break; 302 } 303 304 /* The conditional mod of the CMP/CMPN instructions behaves 305 * specially because the flag output is not calculated from the 306 * result of the instruction, but the other way around, which 307 * means that even if the condmod to propagate and the condmod 308 * from the CMP instruction are the same they will in general give 309 * different results because they are evaluated based on different 310 * inputs. 311 */ 312 if (scan_inst->opcode == BRW_OPCODE_CMP || 313 scan_inst->opcode == BRW_OPCODE_CMPN) 314 break; 315 316 /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods": 317 * 318 * * Note that the [post condition signal] bits generated at 319 * the output of a compute are before the .sat. 320 */ 321 if (scan_inst->saturate) 322 break; 323 324 /* From the Sky Lake PRM, Vol 2a, "Multiply": 325 * 326 * "When multiplying integer data types, if one of the sources 327 * is a DW, the resulting full precision data is stored in 328 * the accumulator. However, if the destination data type is 329 * either W or DW, the low bits of the result are written to 330 * the destination register and the remaining high bits are 331 * discarded. This results in undefined Overflow and Sign 332 * flags. Therefore, conditional modifiers and saturation 333 * (.sat) cannot be used in this case." 334 * 335 * We just disallow cmod propagation on all integer multiplies. 336 */ 337 if (!brw_reg_type_is_floating_point(scan_inst->dst.type) && 338 scan_inst->opcode == BRW_OPCODE_MUL) 339 break; 340 341 /* Otherwise, try propagating the conditional. */ 342 enum brw_conditional_mod cond = 343 inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod) 344 : inst->conditional_mod; 345 346 if (scan_inst->can_do_cmod() && 347 ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || 348 scan_inst->conditional_mod == cond)) { 349 scan_inst->conditional_mod = cond; 350 inst->remove(block); 351 progress = true; 352 } 353 break; 354 } 355 356 if (scan_inst->flags_written()) 357 break; 358 359 read_flag = read_flag || scan_inst->flags_read(devinfo); 360 } 361 } 362 363 return progress; 364} 365 366bool 367fs_visitor::opt_cmod_propagation() 368{ 369 bool progress = false; 370 371 foreach_block_reverse(block, cfg) { 372 progress = opt_cmod_propagation_local(devinfo, block) || progress; 373 } 374 375 if (progress) 376 invalidate_live_intervals(); 377 378 return progress; 379} 380