101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2013 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2101e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/** 2501e04c3fSmrg * \file opt_vectorize.cpp 2601e04c3fSmrg * 2701e04c3fSmrg * Combines scalar assignments of the same expression (modulo swizzle) to 2801e04c3fSmrg * multiple channels of the same variable into a single vectorized expression 2901e04c3fSmrg * and assignment. 3001e04c3fSmrg * 3101e04c3fSmrg * Many generated shaders contain scalarized code. That is, they contain 3201e04c3fSmrg * 3301e04c3fSmrg * r1.x = log2(v0.x); 3401e04c3fSmrg * r1.y = log2(v0.y); 3501e04c3fSmrg * r1.z = log2(v0.z); 3601e04c3fSmrg * 3701e04c3fSmrg * rather than 3801e04c3fSmrg * 3901e04c3fSmrg * r1.xyz = log2(v0.xyz); 4001e04c3fSmrg * 4101e04c3fSmrg * We look for consecutive assignments of the same expression (modulo swizzle) 4201e04c3fSmrg * to each channel of the same variable. 4301e04c3fSmrg * 4401e04c3fSmrg * For instance, we want to convert these three scalar operations 4501e04c3fSmrg * 4601e04c3fSmrg * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0)))) 4701e04c3fSmrg * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0)))) 4801e04c3fSmrg * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0)))) 4901e04c3fSmrg * 5001e04c3fSmrg * into a single vector operation 5101e04c3fSmrg * 5201e04c3fSmrg * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0)))) 5301e04c3fSmrg */ 5401e04c3fSmrg 5501e04c3fSmrg#include "ir.h" 5601e04c3fSmrg#include "ir_visitor.h" 5701e04c3fSmrg#include "ir_optimization.h" 5801e04c3fSmrg#include "compiler/glsl_types.h" 5901e04c3fSmrg#include "program/prog_instruction.h" 6001e04c3fSmrg 6101e04c3fSmrgnamespace { 6201e04c3fSmrg 6301e04c3fSmrgclass ir_vectorize_visitor : public ir_hierarchical_visitor { 6401e04c3fSmrgpublic: 6501e04c3fSmrg void clear() 6601e04c3fSmrg { 6701e04c3fSmrg assignment[0] = NULL; 6801e04c3fSmrg assignment[1] = NULL; 6901e04c3fSmrg assignment[2] = NULL; 7001e04c3fSmrg assignment[3] = NULL; 7101e04c3fSmrg current_assignment = NULL; 7201e04c3fSmrg last_assignment = NULL; 7301e04c3fSmrg channels = 0; 7401e04c3fSmrg has_swizzle = false; 7501e04c3fSmrg } 7601e04c3fSmrg 7701e04c3fSmrg ir_vectorize_visitor() 7801e04c3fSmrg { 7901e04c3fSmrg clear(); 8001e04c3fSmrg progress = false; 8101e04c3fSmrg } 8201e04c3fSmrg 8301e04c3fSmrg virtual ir_visitor_status visit_enter(ir_assignment *); 8401e04c3fSmrg virtual ir_visitor_status visit_enter(ir_swizzle *); 8501e04c3fSmrg virtual ir_visitor_status visit_enter(ir_dereference_array *); 8601e04c3fSmrg virtual ir_visitor_status visit_enter(ir_expression *); 8701e04c3fSmrg virtual ir_visitor_status visit_enter(ir_if *); 8801e04c3fSmrg virtual ir_visitor_status visit_enter(ir_loop *); 8901e04c3fSmrg virtual ir_visitor_status visit_enter(ir_texture *); 9001e04c3fSmrg 9101e04c3fSmrg virtual ir_visitor_status visit_leave(ir_assignment *); 9201e04c3fSmrg 9301e04c3fSmrg void try_vectorize(); 9401e04c3fSmrg 9501e04c3fSmrg ir_assignment *assignment[4]; 9601e04c3fSmrg ir_assignment *current_assignment, *last_assignment; 9701e04c3fSmrg unsigned channels; 9801e04c3fSmrg bool has_swizzle; 9901e04c3fSmrg 10001e04c3fSmrg bool progress; 10101e04c3fSmrg}; 10201e04c3fSmrg 10301e04c3fSmrg} /* unnamed namespace */ 10401e04c3fSmrg 10501e04c3fSmrg/** 10601e04c3fSmrg * Rewrites the swizzles and types of a right-hand side of an assignment. 10701e04c3fSmrg * 10801e04c3fSmrg * From the example above, this function would be called (by visit_tree()) on 10901e04c3fSmrg * the nodes of the tree (expression float log2 (swiz z (var_ref v0))), 11001e04c3fSmrg * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))). 11101e04c3fSmrg * 11201e04c3fSmrg * The function operates on ir_expressions (and its operands) and ir_swizzles. 11301e04c3fSmrg * For expressions it sets a new type and swizzles any non-expression and non- 11401e04c3fSmrg * swizzle scalar operands into appropriately sized vector arguments. For 11501e04c3fSmrg * example, if combining 11601e04c3fSmrg * 11701e04c3fSmrg * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1)))) 11801e04c3fSmrg * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1)))) 11901e04c3fSmrg * 12001e04c3fSmrg * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on 12101e04c3fSmrg * (var_ref v1) such that the final result was 12201e04c3fSmrg * 12301e04c3fSmrg * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0)) 12401e04c3fSmrg * (swiz xx (var_ref v1)))) 12501e04c3fSmrg * 12601e04c3fSmrg * For swizzles, it sets a new type, and if the variable being swizzled is a 12701e04c3fSmrg * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the 12801e04c3fSmrg * data parameter. If the swizzled variable is scalar, then the swizzle was 12901e04c3fSmrg * added by an earlier call to rewrite_swizzle() on an expression, so the 13001e04c3fSmrg * mask should not be modified. 13101e04c3fSmrg */ 13201e04c3fSmrgstatic void 13301e04c3fSmrgrewrite_swizzle(ir_instruction *ir, void *data) 13401e04c3fSmrg{ 13501e04c3fSmrg ir_swizzle_mask *mask = (ir_swizzle_mask *)data; 13601e04c3fSmrg 13701e04c3fSmrg switch (ir->ir_type) { 13801e04c3fSmrg case ir_type_swizzle: { 13901e04c3fSmrg ir_swizzle *swz = (ir_swizzle *)ir; 14001e04c3fSmrg if (swz->val->type->is_vector()) { 14101e04c3fSmrg swz->mask = *mask; 14201e04c3fSmrg } 14301e04c3fSmrg swz->type = glsl_type::get_instance(swz->type->base_type, 14401e04c3fSmrg mask->num_components, 1); 14501e04c3fSmrg break; 14601e04c3fSmrg } 14701e04c3fSmrg case ir_type_expression: { 14801e04c3fSmrg ir_expression *expr = (ir_expression *)ir; 14901e04c3fSmrg expr->type = glsl_type::get_instance(expr->type->base_type, 15001e04c3fSmrg mask->num_components, 1); 15101e04c3fSmrg for (unsigned i = 0; i < 4; i++) { 15201e04c3fSmrg if (expr->operands[i]) { 15301e04c3fSmrg ir_rvalue *rval = expr->operands[i]->as_rvalue(); 15401e04c3fSmrg if (rval && rval->type->is_scalar() && 15501e04c3fSmrg !rval->as_expression() && !rval->as_swizzle()) { 15601e04c3fSmrg expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0, 15701e04c3fSmrg mask->num_components); 15801e04c3fSmrg } 15901e04c3fSmrg } 16001e04c3fSmrg } 16101e04c3fSmrg break; 16201e04c3fSmrg } 16301e04c3fSmrg default: 16401e04c3fSmrg break; 16501e04c3fSmrg } 16601e04c3fSmrg} 16701e04c3fSmrg 16801e04c3fSmrg/** 16901e04c3fSmrg * Attempt to vectorize the previously saved assignments, and clear them from 17001e04c3fSmrg * consideration. 17101e04c3fSmrg * 17201e04c3fSmrg * If the assignments are able to be combined, it modifies in-place the last 17301e04c3fSmrg * assignment seen to be an equivalent vector form of the scalar assignments. 17401e04c3fSmrg * It then removes the other now obsolete scalar assignments. 17501e04c3fSmrg */ 17601e04c3fSmrgvoid 17701e04c3fSmrgir_vectorize_visitor::try_vectorize() 17801e04c3fSmrg{ 17901e04c3fSmrg if (this->last_assignment && this->channels > 1) { 18001e04c3fSmrg ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0}; 18101e04c3fSmrg 18201e04c3fSmrg this->last_assignment->write_mask = 0; 18301e04c3fSmrg 18401e04c3fSmrg for (unsigned i = 0, j = 0; i < 4; i++) { 18501e04c3fSmrg if (this->assignment[i]) { 18601e04c3fSmrg this->last_assignment->write_mask |= 1 << i; 18701e04c3fSmrg 18801e04c3fSmrg if (this->assignment[i] != this->last_assignment) { 18901e04c3fSmrg this->assignment[i]->remove(); 19001e04c3fSmrg } 19101e04c3fSmrg 19201e04c3fSmrg switch (j) { 19301e04c3fSmrg case 0: mask.x = i; break; 19401e04c3fSmrg case 1: mask.y = i; break; 19501e04c3fSmrg case 2: mask.z = i; break; 19601e04c3fSmrg case 3: mask.w = i; break; 19701e04c3fSmrg } 19801e04c3fSmrg 19901e04c3fSmrg j++; 20001e04c3fSmrg } 20101e04c3fSmrg } 20201e04c3fSmrg 20301e04c3fSmrg visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); 20401e04c3fSmrg 20501e04c3fSmrg this->progress = true; 20601e04c3fSmrg } 20701e04c3fSmrg clear(); 20801e04c3fSmrg} 20901e04c3fSmrg 21001e04c3fSmrg/** 21101e04c3fSmrg * Returns whether the write mask is a single channel. 21201e04c3fSmrg */ 21301e04c3fSmrgstatic bool 21401e04c3fSmrgsingle_channel_write_mask(unsigned write_mask) 21501e04c3fSmrg{ 21601e04c3fSmrg return write_mask != 0 && (write_mask & (write_mask - 1)) == 0; 21701e04c3fSmrg} 21801e04c3fSmrg 21901e04c3fSmrg/** 22001e04c3fSmrg * Translates single-channeled write mask to single-channeled swizzle. 22101e04c3fSmrg */ 22201e04c3fSmrgstatic unsigned 22301e04c3fSmrgwrite_mask_to_swizzle(unsigned write_mask) 22401e04c3fSmrg{ 22501e04c3fSmrg switch (write_mask) { 22601e04c3fSmrg case WRITEMASK_X: return SWIZZLE_X; 22701e04c3fSmrg case WRITEMASK_Y: return SWIZZLE_Y; 22801e04c3fSmrg case WRITEMASK_Z: return SWIZZLE_Z; 22901e04c3fSmrg case WRITEMASK_W: return SWIZZLE_W; 23001e04c3fSmrg } 23101e04c3fSmrg unreachable("not reached"); 23201e04c3fSmrg} 23301e04c3fSmrg 23401e04c3fSmrg/** 23501e04c3fSmrg * Returns whether a single-channeled write mask matches a swizzle. 23601e04c3fSmrg */ 23701e04c3fSmrgstatic bool 23801e04c3fSmrgwrite_mask_matches_swizzle(unsigned write_mask, 23901e04c3fSmrg const ir_swizzle *swz) 24001e04c3fSmrg{ 24101e04c3fSmrg return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) || 24201e04c3fSmrg (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) || 24301e04c3fSmrg (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) || 24401e04c3fSmrg (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W)); 24501e04c3fSmrg} 24601e04c3fSmrg 24701e04c3fSmrg/** 24801e04c3fSmrg * Upon entering an ir_assignment, attempt to vectorize the currently tracked 24901e04c3fSmrg * assignments if the current assignment is not suitable. Keep a pointer to 25001e04c3fSmrg * the current assignment. 25101e04c3fSmrg */ 25201e04c3fSmrgir_visitor_status 25301e04c3fSmrgir_vectorize_visitor::visit_enter(ir_assignment *ir) 25401e04c3fSmrg{ 25501e04c3fSmrg ir_dereference *lhs = this->last_assignment != NULL ? 25601e04c3fSmrg this->last_assignment->lhs : NULL; 25701e04c3fSmrg ir_rvalue *rhs = this->last_assignment != NULL ? 25801e04c3fSmrg this->last_assignment->rhs : NULL; 25901e04c3fSmrg 26001e04c3fSmrg if (ir->condition || 26101e04c3fSmrg this->channels >= 4 || 26201e04c3fSmrg !single_channel_write_mask(ir->write_mask) || 26301e04c3fSmrg this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL || 26401e04c3fSmrg (lhs && !ir->lhs->equals(lhs)) || 26501e04c3fSmrg (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) { 26601e04c3fSmrg try_vectorize(); 26701e04c3fSmrg } 26801e04c3fSmrg 26901e04c3fSmrg this->current_assignment = ir; 27001e04c3fSmrg 27101e04c3fSmrg return visit_continue; 27201e04c3fSmrg} 27301e04c3fSmrg 27401e04c3fSmrg/** 27501e04c3fSmrg * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an 27601e04c3fSmrg * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask 27701e04c3fSmrg * matches the current assignment's write mask. 27801e04c3fSmrg * 27901e04c3fSmrg * If the write mask doesn't match the swizzle mask, remove the current 28001e04c3fSmrg * assignment from further consideration. 28101e04c3fSmrg */ 28201e04c3fSmrgir_visitor_status 28301e04c3fSmrgir_vectorize_visitor::visit_enter(ir_swizzle *ir) 28401e04c3fSmrg{ 28501e04c3fSmrg if (this->current_assignment) { 28601e04c3fSmrg if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) { 28701e04c3fSmrg this->has_swizzle = true; 28801e04c3fSmrg } else { 28901e04c3fSmrg this->current_assignment = NULL; 29001e04c3fSmrg } 29101e04c3fSmrg } 29201e04c3fSmrg return visit_continue; 29301e04c3fSmrg} 29401e04c3fSmrg 29501e04c3fSmrg/* Upon entering an ir_array_dereference, remove the current assignment from 29601e04c3fSmrg * further consideration. Since the index of an array dereference must scalar, 29701e04c3fSmrg * we are not able to vectorize it. 29801e04c3fSmrg * 29901e04c3fSmrg * FINISHME: If all of scalar indices are identical we could vectorize. 30001e04c3fSmrg */ 30101e04c3fSmrgir_visitor_status 30201e04c3fSmrgir_vectorize_visitor::visit_enter(ir_dereference_array *) 30301e04c3fSmrg{ 30401e04c3fSmrg this->current_assignment = NULL; 30501e04c3fSmrg return visit_continue_with_parent; 30601e04c3fSmrg} 30701e04c3fSmrg 30801e04c3fSmrg/** 30901e04c3fSmrg * Upon entering an ir_expression, remove the current assignment from further 31001e04c3fSmrg * consideration if the expression operates horizontally on vectors. 31101e04c3fSmrg */ 31201e04c3fSmrgir_visitor_status 31301e04c3fSmrgir_vectorize_visitor::visit_enter(ir_expression *ir) 31401e04c3fSmrg{ 31501e04c3fSmrg if (ir->is_horizontal()) { 31601e04c3fSmrg this->current_assignment = NULL; 31701e04c3fSmrg return visit_continue_with_parent; 31801e04c3fSmrg } 31901e04c3fSmrg return visit_continue; 32001e04c3fSmrg} 32101e04c3fSmrg 32201e04c3fSmrg/* Since there is no statement to visit between the "then" and "else" 32301e04c3fSmrg * instructions try to vectorize before, in between, and after them to avoid 32401e04c3fSmrg * combining statements from different basic blocks. 32501e04c3fSmrg */ 32601e04c3fSmrgir_visitor_status 32701e04c3fSmrgir_vectorize_visitor::visit_enter(ir_if *ir) 32801e04c3fSmrg{ 32901e04c3fSmrg try_vectorize(); 33001e04c3fSmrg 33101e04c3fSmrg visit_list_elements(this, &ir->then_instructions); 33201e04c3fSmrg try_vectorize(); 33301e04c3fSmrg 33401e04c3fSmrg visit_list_elements(this, &ir->else_instructions); 33501e04c3fSmrg try_vectorize(); 33601e04c3fSmrg 33701e04c3fSmrg return visit_continue_with_parent; 33801e04c3fSmrg} 33901e04c3fSmrg 34001e04c3fSmrg/* Since there is no statement to visit between the instructions in the body of 34101e04c3fSmrg * the loop and the instructions after it try to vectorize before and after the 34201e04c3fSmrg * body to avoid combining statements from different basic blocks. 34301e04c3fSmrg */ 34401e04c3fSmrgir_visitor_status 34501e04c3fSmrgir_vectorize_visitor::visit_enter(ir_loop *ir) 34601e04c3fSmrg{ 34701e04c3fSmrg try_vectorize(); 34801e04c3fSmrg 34901e04c3fSmrg visit_list_elements(this, &ir->body_instructions); 35001e04c3fSmrg try_vectorize(); 35101e04c3fSmrg 35201e04c3fSmrg return visit_continue_with_parent; 35301e04c3fSmrg} 35401e04c3fSmrg 35501e04c3fSmrg/** 35601e04c3fSmrg * Upon entering an ir_texture, remove the current assignment from 35701e04c3fSmrg * further consideration. Vectorizing multiple texture lookups into one 35801e04c3fSmrg * is wrong. 35901e04c3fSmrg */ 36001e04c3fSmrgir_visitor_status 36101e04c3fSmrgir_vectorize_visitor::visit_enter(ir_texture *) 36201e04c3fSmrg{ 36301e04c3fSmrg this->current_assignment = NULL; 36401e04c3fSmrg return visit_continue_with_parent; 36501e04c3fSmrg} 36601e04c3fSmrg 36701e04c3fSmrg/** 36801e04c3fSmrg * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if 36901e04c3fSmrg * the swizzle mask(s) found were appropriate. Also save a pointer in 37001e04c3fSmrg * ::last_assignment so that we can compare future assignments with it. 37101e04c3fSmrg * 37201e04c3fSmrg * Finally, clear ::current_assignment and ::has_swizzle. 37301e04c3fSmrg */ 37401e04c3fSmrgir_visitor_status 37501e04c3fSmrgir_vectorize_visitor::visit_leave(ir_assignment *ir) 37601e04c3fSmrg{ 37701e04c3fSmrg if (this->has_swizzle && this->current_assignment) { 37801e04c3fSmrg assert(this->current_assignment == ir); 37901e04c3fSmrg 38001e04c3fSmrg unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask); 38101e04c3fSmrg this->assignment[channel] = ir; 38201e04c3fSmrg this->channels++; 38301e04c3fSmrg 38401e04c3fSmrg this->last_assignment = this->current_assignment; 38501e04c3fSmrg } 38601e04c3fSmrg this->current_assignment = NULL; 38701e04c3fSmrg this->has_swizzle = false; 38801e04c3fSmrg return visit_continue; 38901e04c3fSmrg} 39001e04c3fSmrg 39101e04c3fSmrg/** 39201e04c3fSmrg * Combines scalar assignments of the same expression (modulo swizzle) to 39301e04c3fSmrg * multiple channels of the same variable into a single vectorized expression 39401e04c3fSmrg * and assignment. 39501e04c3fSmrg */ 39601e04c3fSmrgbool 39701e04c3fSmrgdo_vectorize(exec_list *instructions) 39801e04c3fSmrg{ 39901e04c3fSmrg ir_vectorize_visitor v; 40001e04c3fSmrg 40101e04c3fSmrg v.run(instructions); 40201e04c3fSmrg 40301e04c3fSmrg /* Try to vectorize the last assignments seen. */ 40401e04c3fSmrg v.try_vectorize(); 40501e04c3fSmrg 40601e04c3fSmrg return v.progress; 40701e04c3fSmrg} 408