1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2013 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** 25b8e80941Smrg * \file opt_vectorize.cpp 26b8e80941Smrg * 27b8e80941Smrg * Combines scalar assignments of the same expression (modulo swizzle) to 28b8e80941Smrg * multiple channels of the same variable into a single vectorized expression 29b8e80941Smrg * and assignment. 30b8e80941Smrg * 31b8e80941Smrg * Many generated shaders contain scalarized code. That is, they contain 32b8e80941Smrg * 33b8e80941Smrg * r1.x = log2(v0.x); 34b8e80941Smrg * r1.y = log2(v0.y); 35b8e80941Smrg * r1.z = log2(v0.z); 36b8e80941Smrg * 37b8e80941Smrg * rather than 38b8e80941Smrg * 39b8e80941Smrg * r1.xyz = log2(v0.xyz); 40b8e80941Smrg * 41b8e80941Smrg * We look for consecutive assignments of the same expression (modulo swizzle) 42b8e80941Smrg * to each channel of the same variable. 43b8e80941Smrg * 44b8e80941Smrg * For instance, we want to convert these three scalar operations 45b8e80941Smrg * 46b8e80941Smrg * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0)))) 47b8e80941Smrg * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0)))) 48b8e80941Smrg * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0)))) 49b8e80941Smrg * 50b8e80941Smrg * into a single vector operation 51b8e80941Smrg * 52b8e80941Smrg * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0)))) 53b8e80941Smrg */ 54b8e80941Smrg 55b8e80941Smrg#include "ir.h" 56b8e80941Smrg#include "ir_visitor.h" 57b8e80941Smrg#include "ir_optimization.h" 58b8e80941Smrg#include "compiler/glsl_types.h" 59b8e80941Smrg#include "program/prog_instruction.h" 60b8e80941Smrg 61b8e80941Smrgnamespace { 62b8e80941Smrg 63b8e80941Smrgclass ir_vectorize_visitor : public ir_hierarchical_visitor { 64b8e80941Smrgpublic: 65b8e80941Smrg void clear() 66b8e80941Smrg { 67b8e80941Smrg assignment[0] = NULL; 68b8e80941Smrg assignment[1] = NULL; 69b8e80941Smrg assignment[2] = NULL; 70b8e80941Smrg assignment[3] = NULL; 71b8e80941Smrg current_assignment = NULL; 72b8e80941Smrg last_assignment = NULL; 73b8e80941Smrg channels = 0; 74b8e80941Smrg has_swizzle = false; 75b8e80941Smrg } 76b8e80941Smrg 77b8e80941Smrg ir_vectorize_visitor() 78b8e80941Smrg { 79b8e80941Smrg clear(); 80b8e80941Smrg progress = false; 81b8e80941Smrg } 82b8e80941Smrg 83b8e80941Smrg virtual ir_visitor_status visit_enter(ir_assignment *); 84b8e80941Smrg virtual ir_visitor_status visit_enter(ir_swizzle *); 85b8e80941Smrg virtual ir_visitor_status visit_enter(ir_dereference_array *); 86b8e80941Smrg virtual ir_visitor_status visit_enter(ir_expression *); 87b8e80941Smrg virtual ir_visitor_status visit_enter(ir_if *); 88b8e80941Smrg virtual ir_visitor_status visit_enter(ir_loop *); 89b8e80941Smrg virtual ir_visitor_status visit_enter(ir_texture *); 90b8e80941Smrg 91b8e80941Smrg virtual ir_visitor_status visit_leave(ir_assignment *); 92b8e80941Smrg 93b8e80941Smrg void try_vectorize(); 94b8e80941Smrg 95b8e80941Smrg ir_assignment *assignment[4]; 96b8e80941Smrg ir_assignment *current_assignment, *last_assignment; 97b8e80941Smrg unsigned channels; 98b8e80941Smrg bool has_swizzle; 99b8e80941Smrg 100b8e80941Smrg bool progress; 101b8e80941Smrg}; 102b8e80941Smrg 103b8e80941Smrg} /* unnamed namespace */ 104b8e80941Smrg 105b8e80941Smrg/** 106b8e80941Smrg * Rewrites the swizzles and types of a right-hand side of an assignment. 107b8e80941Smrg * 108b8e80941Smrg * From the example above, this function would be called (by visit_tree()) on 109b8e80941Smrg * the nodes of the tree (expression float log2 (swiz z (var_ref v0))), 110b8e80941Smrg * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))). 111b8e80941Smrg * 112b8e80941Smrg * The function operates on ir_expressions (and its operands) and ir_swizzles. 113b8e80941Smrg * For expressions it sets a new type and swizzles any non-expression and non- 114b8e80941Smrg * swizzle scalar operands into appropriately sized vector arguments. For 115b8e80941Smrg * example, if combining 116b8e80941Smrg * 117b8e80941Smrg * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1)))) 118b8e80941Smrg * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1)))) 119b8e80941Smrg * 120b8e80941Smrg * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on 121b8e80941Smrg * (var_ref v1) such that the final result was 122b8e80941Smrg * 123b8e80941Smrg * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0)) 124b8e80941Smrg * (swiz xx (var_ref v1)))) 125b8e80941Smrg * 126b8e80941Smrg * For swizzles, it sets a new type, and if the variable being swizzled is a 127b8e80941Smrg * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the 128b8e80941Smrg * data parameter. If the swizzled variable is scalar, then the swizzle was 129b8e80941Smrg * added by an earlier call to rewrite_swizzle() on an expression, so the 130b8e80941Smrg * mask should not be modified. 131b8e80941Smrg */ 132b8e80941Smrgstatic void 133b8e80941Smrgrewrite_swizzle(ir_instruction *ir, void *data) 134b8e80941Smrg{ 135b8e80941Smrg ir_swizzle_mask *mask = (ir_swizzle_mask *)data; 136b8e80941Smrg 137b8e80941Smrg switch (ir->ir_type) { 138b8e80941Smrg case ir_type_swizzle: { 139b8e80941Smrg ir_swizzle *swz = (ir_swizzle *)ir; 140b8e80941Smrg if (swz->val->type->is_vector()) { 141b8e80941Smrg swz->mask = *mask; 142b8e80941Smrg } 143b8e80941Smrg swz->type = glsl_type::get_instance(swz->type->base_type, 144b8e80941Smrg mask->num_components, 1); 145b8e80941Smrg break; 146b8e80941Smrg } 147b8e80941Smrg case ir_type_expression: { 148b8e80941Smrg ir_expression *expr = (ir_expression *)ir; 149b8e80941Smrg expr->type = glsl_type::get_instance(expr->type->base_type, 150b8e80941Smrg mask->num_components, 1); 151b8e80941Smrg for (unsigned i = 0; i < 4; i++) { 152b8e80941Smrg if (expr->operands[i]) { 153b8e80941Smrg ir_rvalue *rval = expr->operands[i]->as_rvalue(); 154b8e80941Smrg if (rval && rval->type->is_scalar() && 155b8e80941Smrg !rval->as_expression() && !rval->as_swizzle()) { 156b8e80941Smrg expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0, 157b8e80941Smrg mask->num_components); 158b8e80941Smrg } 159b8e80941Smrg } 160b8e80941Smrg } 161b8e80941Smrg break; 162b8e80941Smrg } 163b8e80941Smrg default: 164b8e80941Smrg break; 165b8e80941Smrg } 166b8e80941Smrg} 167b8e80941Smrg 168b8e80941Smrg/** 169b8e80941Smrg * Attempt to vectorize the previously saved assignments, and clear them from 170b8e80941Smrg * consideration. 171b8e80941Smrg * 172b8e80941Smrg * If the assignments are able to be combined, it modifies in-place the last 173b8e80941Smrg * assignment seen to be an equivalent vector form of the scalar assignments. 174b8e80941Smrg * It then removes the other now obsolete scalar assignments. 175b8e80941Smrg */ 176b8e80941Smrgvoid 177b8e80941Smrgir_vectorize_visitor::try_vectorize() 178b8e80941Smrg{ 179b8e80941Smrg if (this->last_assignment && this->channels > 1) { 180b8e80941Smrg ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0}; 181b8e80941Smrg 182b8e80941Smrg this->last_assignment->write_mask = 0; 183b8e80941Smrg 184b8e80941Smrg for (unsigned i = 0, j = 0; i < 4; i++) { 185b8e80941Smrg if (this->assignment[i]) { 186b8e80941Smrg this->last_assignment->write_mask |= 1 << i; 187b8e80941Smrg 188b8e80941Smrg if (this->assignment[i] != this->last_assignment) { 189b8e80941Smrg this->assignment[i]->remove(); 190b8e80941Smrg } 191b8e80941Smrg 192b8e80941Smrg switch (j) { 193b8e80941Smrg case 0: mask.x = i; break; 194b8e80941Smrg case 1: mask.y = i; break; 195b8e80941Smrg case 2: mask.z = i; break; 196b8e80941Smrg case 3: mask.w = i; break; 197b8e80941Smrg } 198b8e80941Smrg 199b8e80941Smrg j++; 200b8e80941Smrg } 201b8e80941Smrg } 202b8e80941Smrg 203b8e80941Smrg visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); 204b8e80941Smrg 205b8e80941Smrg this->progress = true; 206b8e80941Smrg } 207b8e80941Smrg clear(); 208b8e80941Smrg} 209b8e80941Smrg 210b8e80941Smrg/** 211b8e80941Smrg * Returns whether the write mask is a single channel. 212b8e80941Smrg */ 213b8e80941Smrgstatic bool 214b8e80941Smrgsingle_channel_write_mask(unsigned write_mask) 215b8e80941Smrg{ 216b8e80941Smrg return write_mask != 0 && (write_mask & (write_mask - 1)) == 0; 217b8e80941Smrg} 218b8e80941Smrg 219b8e80941Smrg/** 220b8e80941Smrg * Translates single-channeled write mask to single-channeled swizzle. 221b8e80941Smrg */ 222b8e80941Smrgstatic unsigned 223b8e80941Smrgwrite_mask_to_swizzle(unsigned write_mask) 224b8e80941Smrg{ 225b8e80941Smrg switch (write_mask) { 226b8e80941Smrg case WRITEMASK_X: return SWIZZLE_X; 227b8e80941Smrg case WRITEMASK_Y: return SWIZZLE_Y; 228b8e80941Smrg case WRITEMASK_Z: return SWIZZLE_Z; 229b8e80941Smrg case WRITEMASK_W: return SWIZZLE_W; 230b8e80941Smrg } 231b8e80941Smrg unreachable("not reached"); 232b8e80941Smrg} 233b8e80941Smrg 234b8e80941Smrg/** 235b8e80941Smrg * Returns whether a single-channeled write mask matches a swizzle. 236b8e80941Smrg */ 237b8e80941Smrgstatic bool 238b8e80941Smrgwrite_mask_matches_swizzle(unsigned write_mask, 239b8e80941Smrg const ir_swizzle *swz) 240b8e80941Smrg{ 241b8e80941Smrg return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) || 242b8e80941Smrg (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) || 243b8e80941Smrg (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) || 244b8e80941Smrg (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W)); 245b8e80941Smrg} 246b8e80941Smrg 247b8e80941Smrg/** 248b8e80941Smrg * Upon entering an ir_assignment, attempt to vectorize the currently tracked 249b8e80941Smrg * assignments if the current assignment is not suitable. Keep a pointer to 250b8e80941Smrg * the current assignment. 251b8e80941Smrg */ 252b8e80941Smrgir_visitor_status 253b8e80941Smrgir_vectorize_visitor::visit_enter(ir_assignment *ir) 254b8e80941Smrg{ 255b8e80941Smrg ir_dereference *lhs = this->last_assignment != NULL ? 256b8e80941Smrg this->last_assignment->lhs : NULL; 257b8e80941Smrg ir_rvalue *rhs = this->last_assignment != NULL ? 258b8e80941Smrg this->last_assignment->rhs : NULL; 259b8e80941Smrg 260b8e80941Smrg if (ir->condition || 261b8e80941Smrg this->channels >= 4 || 262b8e80941Smrg !single_channel_write_mask(ir->write_mask) || 263b8e80941Smrg this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL || 264b8e80941Smrg (lhs && !ir->lhs->equals(lhs)) || 265b8e80941Smrg (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) { 266b8e80941Smrg try_vectorize(); 267b8e80941Smrg } 268b8e80941Smrg 269b8e80941Smrg this->current_assignment = ir; 270b8e80941Smrg 271b8e80941Smrg return visit_continue; 272b8e80941Smrg} 273b8e80941Smrg 274b8e80941Smrg/** 275b8e80941Smrg * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an 276b8e80941Smrg * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask 277b8e80941Smrg * matches the current assignment's write mask. 278b8e80941Smrg * 279b8e80941Smrg * If the write mask doesn't match the swizzle mask, remove the current 280b8e80941Smrg * assignment from further consideration. 281b8e80941Smrg */ 282b8e80941Smrgir_visitor_status 283b8e80941Smrgir_vectorize_visitor::visit_enter(ir_swizzle *ir) 284b8e80941Smrg{ 285b8e80941Smrg if (this->current_assignment) { 286b8e80941Smrg if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) { 287b8e80941Smrg this->has_swizzle = true; 288b8e80941Smrg } else { 289b8e80941Smrg this->current_assignment = NULL; 290b8e80941Smrg } 291b8e80941Smrg } 292b8e80941Smrg return visit_continue; 293b8e80941Smrg} 294b8e80941Smrg 295b8e80941Smrg/* Upon entering an ir_array_dereference, remove the current assignment from 296b8e80941Smrg * further consideration. Since the index of an array dereference must scalar, 297b8e80941Smrg * we are not able to vectorize it. 298b8e80941Smrg * 299b8e80941Smrg * FINISHME: If all of scalar indices are identical we could vectorize. 300b8e80941Smrg */ 301b8e80941Smrgir_visitor_status 302b8e80941Smrgir_vectorize_visitor::visit_enter(ir_dereference_array *) 303b8e80941Smrg{ 304b8e80941Smrg this->current_assignment = NULL; 305b8e80941Smrg return visit_continue_with_parent; 306b8e80941Smrg} 307b8e80941Smrg 308b8e80941Smrg/** 309b8e80941Smrg * Upon entering an ir_expression, remove the current assignment from further 310b8e80941Smrg * consideration if the expression operates horizontally on vectors. 311b8e80941Smrg */ 312b8e80941Smrgir_visitor_status 313b8e80941Smrgir_vectorize_visitor::visit_enter(ir_expression *ir) 314b8e80941Smrg{ 315b8e80941Smrg if (ir->is_horizontal()) { 316b8e80941Smrg this->current_assignment = NULL; 317b8e80941Smrg return visit_continue_with_parent; 318b8e80941Smrg } 319b8e80941Smrg return visit_continue; 320b8e80941Smrg} 321b8e80941Smrg 322b8e80941Smrg/* Since there is no statement to visit between the "then" and "else" 323b8e80941Smrg * instructions try to vectorize before, in between, and after them to avoid 324b8e80941Smrg * combining statements from different basic blocks. 325b8e80941Smrg */ 326b8e80941Smrgir_visitor_status 327b8e80941Smrgir_vectorize_visitor::visit_enter(ir_if *ir) 328b8e80941Smrg{ 329b8e80941Smrg try_vectorize(); 330b8e80941Smrg 331b8e80941Smrg visit_list_elements(this, &ir->then_instructions); 332b8e80941Smrg try_vectorize(); 333b8e80941Smrg 334b8e80941Smrg visit_list_elements(this, &ir->else_instructions); 335b8e80941Smrg try_vectorize(); 336b8e80941Smrg 337b8e80941Smrg return visit_continue_with_parent; 338b8e80941Smrg} 339b8e80941Smrg 340b8e80941Smrg/* Since there is no statement to visit between the instructions in the body of 341b8e80941Smrg * the loop and the instructions after it try to vectorize before and after the 342b8e80941Smrg * body to avoid combining statements from different basic blocks. 343b8e80941Smrg */ 344b8e80941Smrgir_visitor_status 345b8e80941Smrgir_vectorize_visitor::visit_enter(ir_loop *ir) 346b8e80941Smrg{ 347b8e80941Smrg try_vectorize(); 348b8e80941Smrg 349b8e80941Smrg visit_list_elements(this, &ir->body_instructions); 350b8e80941Smrg try_vectorize(); 351b8e80941Smrg 352b8e80941Smrg return visit_continue_with_parent; 353b8e80941Smrg} 354b8e80941Smrg 355b8e80941Smrg/** 356b8e80941Smrg * Upon entering an ir_texture, remove the current assignment from 357b8e80941Smrg * further consideration. Vectorizing multiple texture lookups into one 358b8e80941Smrg * is wrong. 359b8e80941Smrg */ 360b8e80941Smrgir_visitor_status 361b8e80941Smrgir_vectorize_visitor::visit_enter(ir_texture *) 362b8e80941Smrg{ 363b8e80941Smrg this->current_assignment = NULL; 364b8e80941Smrg return visit_continue_with_parent; 365b8e80941Smrg} 366b8e80941Smrg 367b8e80941Smrg/** 368b8e80941Smrg * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if 369b8e80941Smrg * the swizzle mask(s) found were appropriate. Also save a pointer in 370b8e80941Smrg * ::last_assignment so that we can compare future assignments with it. 371b8e80941Smrg * 372b8e80941Smrg * Finally, clear ::current_assignment and ::has_swizzle. 373b8e80941Smrg */ 374b8e80941Smrgir_visitor_status 375b8e80941Smrgir_vectorize_visitor::visit_leave(ir_assignment *ir) 376b8e80941Smrg{ 377b8e80941Smrg if (this->has_swizzle && this->current_assignment) { 378b8e80941Smrg assert(this->current_assignment == ir); 379b8e80941Smrg 380b8e80941Smrg unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask); 381b8e80941Smrg this->assignment[channel] = ir; 382b8e80941Smrg this->channels++; 383b8e80941Smrg 384b8e80941Smrg this->last_assignment = this->current_assignment; 385b8e80941Smrg } 386b8e80941Smrg this->current_assignment = NULL; 387b8e80941Smrg this->has_swizzle = false; 388b8e80941Smrg return visit_continue; 389b8e80941Smrg} 390b8e80941Smrg 391b8e80941Smrg/** 392b8e80941Smrg * Combines scalar assignments of the same expression (modulo swizzle) to 393b8e80941Smrg * multiple channels of the same variable into a single vectorized expression 394b8e80941Smrg * and assignment. 395b8e80941Smrg */ 396b8e80941Smrgbool 397b8e80941Smrgdo_vectorize(exec_list *instructions) 398b8e80941Smrg{ 399b8e80941Smrg ir_vectorize_visitor v; 400b8e80941Smrg 401b8e80941Smrg v.run(instructions); 402b8e80941Smrg 403b8e80941Smrg /* Try to vectorize the last assignments seen. */ 404b8e80941Smrg v.try_vectorize(); 405b8e80941Smrg 406b8e80941Smrg return v.progress; 407b8e80941Smrg} 408