101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2101e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/** 2501e04c3fSmrg * \file lower_int64.cpp 2601e04c3fSmrg * 2701e04c3fSmrg * Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered 2801e04c3fSmrg * to a uvec2. For each operation that can be lowered, there is a function 2901e04c3fSmrg * called __builtin_foo with the same number of parameters that takes uvec2 3001e04c3fSmrg * sources and produces uvec2 results. An operation like 3101e04c3fSmrg * 3201e04c3fSmrg * uint64_t(x) * uint64_t(y) 3301e04c3fSmrg * 3401e04c3fSmrg * becomes 3501e04c3fSmrg * 3601e04c3fSmrg * packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y))); 3701e04c3fSmrg */ 3801e04c3fSmrg 3901e04c3fSmrg#include "main/macros.h" 4001e04c3fSmrg#include "compiler/glsl_types.h" 4101e04c3fSmrg#include "ir.h" 4201e04c3fSmrg#include "ir_rvalue_visitor.h" 4301e04c3fSmrg#include "ir_builder.h" 4401e04c3fSmrg#include "ir_optimization.h" 4501e04c3fSmrg#include "util/hash_table.h" 4601e04c3fSmrg#include "builtin_functions.h" 4701e04c3fSmrg 4801e04c3fSmrgtypedef ir_function_signature *(*function_generator)(void *mem_ctx, 4901e04c3fSmrg builtin_available_predicate avail); 5001e04c3fSmrg 5101e04c3fSmrgusing namespace ir_builder; 5201e04c3fSmrg 5301e04c3fSmrgnamespace lower_64bit { 5401e04c3fSmrgvoid expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src); 5501e04c3fSmrg 5601e04c3fSmrgir_dereference_variable *compact_destination(ir_factory &, 5701e04c3fSmrg const glsl_type *type, 5801e04c3fSmrg ir_variable *result[4]); 5901e04c3fSmrg 6001e04c3fSmrgir_rvalue *lower_op_to_function_call(ir_instruction *base_ir, 6101e04c3fSmrg ir_expression *ir, 6201e04c3fSmrg ir_function_signature *callee); 6301e04c3fSmrg}; 6401e04c3fSmrg 6501e04c3fSmrgusing namespace lower_64bit; 6601e04c3fSmrg 6701e04c3fSmrgnamespace { 6801e04c3fSmrg 6901e04c3fSmrgclass lower_64bit_visitor : public ir_rvalue_visitor { 7001e04c3fSmrgpublic: 7101e04c3fSmrg lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower) 7201e04c3fSmrg : progress(false), lower(lower), 7301e04c3fSmrg function_list(), added_functions(&function_list, mem_ctx) 7401e04c3fSmrg { 7501e04c3fSmrg functions = _mesa_hash_table_create(mem_ctx, 767ec681f3Smrg _mesa_hash_string, 7701e04c3fSmrg _mesa_key_string_equal); 7801e04c3fSmrg 7901e04c3fSmrg foreach_in_list(ir_instruction, node, instructions) { 8001e04c3fSmrg ir_function *const f = node->as_function(); 8101e04c3fSmrg 8201e04c3fSmrg if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0) 8301e04c3fSmrg continue; 8401e04c3fSmrg 8501e04c3fSmrg add_function(f); 8601e04c3fSmrg } 8701e04c3fSmrg } 8801e04c3fSmrg 8901e04c3fSmrg ~lower_64bit_visitor() 9001e04c3fSmrg { 9101e04c3fSmrg _mesa_hash_table_destroy(functions, NULL); 9201e04c3fSmrg } 9301e04c3fSmrg 9401e04c3fSmrg void handle_rvalue(ir_rvalue **rvalue); 9501e04c3fSmrg 9601e04c3fSmrg void add_function(ir_function *f) 9701e04c3fSmrg { 9801e04c3fSmrg _mesa_hash_table_insert(functions, f->name, f); 9901e04c3fSmrg } 10001e04c3fSmrg 10101e04c3fSmrg ir_function *find_function(const char *name) 10201e04c3fSmrg { 10301e04c3fSmrg struct hash_entry *const entry = 10401e04c3fSmrg _mesa_hash_table_search(functions, name); 10501e04c3fSmrg 10601e04c3fSmrg return entry != NULL ? (ir_function *) entry->data : NULL; 10701e04c3fSmrg } 10801e04c3fSmrg 10901e04c3fSmrg bool progress; 11001e04c3fSmrg 11101e04c3fSmrgprivate: 11201e04c3fSmrg unsigned lower; /** Bitfield of which operations to lower */ 11301e04c3fSmrg 11401e04c3fSmrg /** Hashtable containing all of the known functions in the IR */ 11501e04c3fSmrg struct hash_table *functions; 11601e04c3fSmrg 11701e04c3fSmrgpublic: 11801e04c3fSmrg exec_list function_list; 11901e04c3fSmrg 12001e04c3fSmrgprivate: 12101e04c3fSmrg ir_factory added_functions; 12201e04c3fSmrg 12301e04c3fSmrg ir_rvalue *handle_op(ir_expression *ir, const char *function_name, 12401e04c3fSmrg function_generator generator); 12501e04c3fSmrg}; 12601e04c3fSmrg 12701e04c3fSmrg} /* anonymous namespace */ 12801e04c3fSmrg 12901e04c3fSmrg/** 13001e04c3fSmrg * Determine if a particular type of lowering should occur 13101e04c3fSmrg */ 13201e04c3fSmrg#define lowering(x) (this->lower & x) 13301e04c3fSmrg 13401e04c3fSmrgbool 13501e04c3fSmrglower_64bit_integer_instructions(exec_list *instructions, 13601e04c3fSmrg unsigned what_to_lower) 13701e04c3fSmrg{ 13801e04c3fSmrg if (instructions->is_empty()) 13901e04c3fSmrg return false; 14001e04c3fSmrg 14101e04c3fSmrg ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw(); 14201e04c3fSmrg void *const mem_ctx = ralloc_parent(first_inst); 14301e04c3fSmrg lower_64bit_visitor v(mem_ctx, instructions, what_to_lower); 14401e04c3fSmrg 14501e04c3fSmrg visit_list_elements(&v, instructions); 14601e04c3fSmrg 14701e04c3fSmrg if (v.progress && !v.function_list.is_empty()) { 14801e04c3fSmrg /* Move all of the nodes from function_list to the head if the incoming 14901e04c3fSmrg * instruction list. 15001e04c3fSmrg */ 15101e04c3fSmrg exec_node *const after = &instructions->head_sentinel; 15201e04c3fSmrg exec_node *const before = instructions->head_sentinel.next; 15301e04c3fSmrg exec_node *const head = v.function_list.head_sentinel.next; 15401e04c3fSmrg exec_node *const tail = v.function_list.tail_sentinel.prev; 15501e04c3fSmrg 15601e04c3fSmrg before->next = head; 15701e04c3fSmrg head->prev = before; 15801e04c3fSmrg 15901e04c3fSmrg after->prev = tail; 16001e04c3fSmrg tail->next = after; 16101e04c3fSmrg } 16201e04c3fSmrg 16301e04c3fSmrg return v.progress; 16401e04c3fSmrg} 16501e04c3fSmrg 16601e04c3fSmrg 16701e04c3fSmrg/** 16801e04c3fSmrg * Expand individual 64-bit values to uvec2 values 16901e04c3fSmrg * 17001e04c3fSmrg * Each operation is in one of a few forms. 17101e04c3fSmrg * 17201e04c3fSmrg * vector op vector 17301e04c3fSmrg * vector op scalar 17401e04c3fSmrg * scalar op vector 17501e04c3fSmrg * scalar op scalar 17601e04c3fSmrg * 17701e04c3fSmrg * In the 'vector op vector' case, the two vectors must have the same size. 17801e04c3fSmrg * In a way, the 'scalar op scalar' form is special case of the 'vector op 17901e04c3fSmrg * vector' form. 18001e04c3fSmrg * 18101e04c3fSmrg * This method generates a new set of uvec2 values for each element of a 18201e04c3fSmrg * single operand. If the operand is a scalar, the uvec2 is replicated 18301e04c3fSmrg * multiple times. A value like 18401e04c3fSmrg * 18501e04c3fSmrg * u64vec3(a) + u64vec3(b) 18601e04c3fSmrg * 18701e04c3fSmrg * becomes 18801e04c3fSmrg * 18901e04c3fSmrg * u64vec3 tmp0 = u64vec3(a) + u64vec3(b); 19001e04c3fSmrg * uvec2 tmp1 = unpackUint2x32(tmp0.x); 19101e04c3fSmrg * uvec2 tmp2 = unpackUint2x32(tmp0.y); 19201e04c3fSmrg * uvec2 tmp3 = unpackUint2x32(tmp0.z); 19301e04c3fSmrg * 19401e04c3fSmrg * and the returned operands array contains ir_variable pointers to 19501e04c3fSmrg * 19601e04c3fSmrg * { tmp1, tmp2, tmp3, tmp1 } 19701e04c3fSmrg */ 19801e04c3fSmrgvoid 19901e04c3fSmrglower_64bit::expand_source(ir_factory &body, 20001e04c3fSmrg ir_rvalue *val, 20101e04c3fSmrg ir_variable **expanded_src) 20201e04c3fSmrg{ 20301e04c3fSmrg assert(val->type->is_integer_64()); 20401e04c3fSmrg 20501e04c3fSmrg ir_variable *const temp = body.make_temp(val->type, "tmp"); 20601e04c3fSmrg 20701e04c3fSmrg body.emit(assign(temp, val)); 20801e04c3fSmrg 20901e04c3fSmrg const ir_expression_operation unpack_opcode = 21001e04c3fSmrg val->type->base_type == GLSL_TYPE_UINT64 21101e04c3fSmrg ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32; 21201e04c3fSmrg 21301e04c3fSmrg const glsl_type *const type = 21401e04c3fSmrg val->type->base_type == GLSL_TYPE_UINT64 21501e04c3fSmrg ? glsl_type::uvec2_type : glsl_type::ivec2_type; 21601e04c3fSmrg 21701e04c3fSmrg unsigned i; 21801e04c3fSmrg for (i = 0; i < val->type->vector_elements; i++) { 21901e04c3fSmrg expanded_src[i] = body.make_temp(type, "expanded_64bit_source"); 22001e04c3fSmrg 22101e04c3fSmrg body.emit(assign(expanded_src[i], 22201e04c3fSmrg expr(unpack_opcode, swizzle(temp, i, 1)))); 22301e04c3fSmrg } 22401e04c3fSmrg 22501e04c3fSmrg for (/* empty */; i < 4; i++) 22601e04c3fSmrg expanded_src[i] = expanded_src[0]; 22701e04c3fSmrg} 22801e04c3fSmrg 22901e04c3fSmrg/** 23001e04c3fSmrg * Convert a series of uvec2 results into a single 64-bit integer vector 23101e04c3fSmrg */ 23201e04c3fSmrgir_dereference_variable * 23301e04c3fSmrglower_64bit::compact_destination(ir_factory &body, 23401e04c3fSmrg const glsl_type *type, 23501e04c3fSmrg ir_variable *result[4]) 23601e04c3fSmrg{ 23701e04c3fSmrg const ir_expression_operation pack_opcode = 23801e04c3fSmrg type->base_type == GLSL_TYPE_UINT64 23901e04c3fSmrg ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32; 24001e04c3fSmrg 24101e04c3fSmrg ir_variable *const compacted_result = 24201e04c3fSmrg body.make_temp(type, "compacted_64bit_result"); 24301e04c3fSmrg 24401e04c3fSmrg for (unsigned i = 0; i < type->vector_elements; i++) { 24501e04c3fSmrg body.emit(assign(compacted_result, 24601e04c3fSmrg expr(pack_opcode, result[i]), 24701e04c3fSmrg 1U << i)); 24801e04c3fSmrg } 24901e04c3fSmrg 25001e04c3fSmrg void *const mem_ctx = ralloc_parent(compacted_result); 25101e04c3fSmrg return new(mem_ctx) ir_dereference_variable(compacted_result); 25201e04c3fSmrg} 25301e04c3fSmrg 25401e04c3fSmrgir_rvalue * 25501e04c3fSmrglower_64bit::lower_op_to_function_call(ir_instruction *base_ir, 25601e04c3fSmrg ir_expression *ir, 25701e04c3fSmrg ir_function_signature *callee) 25801e04c3fSmrg{ 25901e04c3fSmrg const unsigned num_operands = ir->num_operands; 26001e04c3fSmrg ir_variable *src[4][4]; 26101e04c3fSmrg ir_variable *dst[4]; 26201e04c3fSmrg void *const mem_ctx = ralloc_parent(ir); 26301e04c3fSmrg exec_list instructions; 26401e04c3fSmrg unsigned source_components = 0; 26501e04c3fSmrg const glsl_type *const result_type = 26601e04c3fSmrg ir->type->base_type == GLSL_TYPE_UINT64 26701e04c3fSmrg ? glsl_type::uvec2_type : glsl_type::ivec2_type; 26801e04c3fSmrg 26901e04c3fSmrg ir_factory body(&instructions, mem_ctx); 27001e04c3fSmrg 27101e04c3fSmrg for (unsigned i = 0; i < num_operands; i++) { 27201e04c3fSmrg expand_source(body, ir->operands[i], src[i]); 27301e04c3fSmrg 27401e04c3fSmrg if (ir->operands[i]->type->vector_elements > source_components) 27501e04c3fSmrg source_components = ir->operands[i]->type->vector_elements; 27601e04c3fSmrg } 27701e04c3fSmrg 27801e04c3fSmrg for (unsigned i = 0; i < source_components; i++) { 27901e04c3fSmrg dst[i] = body.make_temp(result_type, "expanded_64bit_result"); 28001e04c3fSmrg 28101e04c3fSmrg exec_list parameters; 28201e04c3fSmrg 28301e04c3fSmrg for (unsigned j = 0; j < num_operands; j++) 28401e04c3fSmrg parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i])); 28501e04c3fSmrg 28601e04c3fSmrg ir_dereference_variable *const return_deref = 28701e04c3fSmrg new(mem_ctx) ir_dereference_variable(dst[i]); 28801e04c3fSmrg 28901e04c3fSmrg ir_call *const c = new(mem_ctx) ir_call(callee, 29001e04c3fSmrg return_deref, 29101e04c3fSmrg ¶meters); 29201e04c3fSmrg 29301e04c3fSmrg body.emit(c); 29401e04c3fSmrg } 29501e04c3fSmrg 29601e04c3fSmrg ir_rvalue *const rv = compact_destination(body, ir->type, dst); 29701e04c3fSmrg 29801e04c3fSmrg /* Move all of the nodes from instructions between base_ir and the 29901e04c3fSmrg * instruction before it. 30001e04c3fSmrg */ 30101e04c3fSmrg exec_node *const after = base_ir; 30201e04c3fSmrg exec_node *const before = after->prev; 30301e04c3fSmrg exec_node *const head = instructions.head_sentinel.next; 30401e04c3fSmrg exec_node *const tail = instructions.tail_sentinel.prev; 30501e04c3fSmrg 30601e04c3fSmrg before->next = head; 30701e04c3fSmrg head->prev = before; 30801e04c3fSmrg 30901e04c3fSmrg after->prev = tail; 31001e04c3fSmrg tail->next = after; 31101e04c3fSmrg 31201e04c3fSmrg return rv; 31301e04c3fSmrg} 31401e04c3fSmrg 31501e04c3fSmrgir_rvalue * 31601e04c3fSmrglower_64bit_visitor::handle_op(ir_expression *ir, 31701e04c3fSmrg const char *function_name, 31801e04c3fSmrg function_generator generator) 31901e04c3fSmrg{ 32001e04c3fSmrg for (unsigned i = 0; i < ir->num_operands; i++) 32101e04c3fSmrg if (!ir->operands[i]->type->is_integer_64()) 32201e04c3fSmrg return ir; 32301e04c3fSmrg 32401e04c3fSmrg /* Get a handle to the correct ir_function_signature for the core 32501e04c3fSmrg * operation. 32601e04c3fSmrg */ 32701e04c3fSmrg ir_function_signature *callee = NULL; 32801e04c3fSmrg ir_function *f = find_function(function_name); 32901e04c3fSmrg 33001e04c3fSmrg if (f != NULL) { 33101e04c3fSmrg callee = (ir_function_signature *) f->signatures.get_head(); 33201e04c3fSmrg assert(callee != NULL && callee->ir_type == ir_type_function_signature); 33301e04c3fSmrg } else { 33401e04c3fSmrg f = new(base_ir) ir_function(function_name); 33501e04c3fSmrg callee = generator(base_ir, NULL); 33601e04c3fSmrg 33701e04c3fSmrg f->add_signature(callee); 33801e04c3fSmrg 33901e04c3fSmrg add_function(f); 34001e04c3fSmrg } 34101e04c3fSmrg 34201e04c3fSmrg this->progress = true; 34301e04c3fSmrg return lower_op_to_function_call(this->base_ir, ir, callee); 34401e04c3fSmrg} 34501e04c3fSmrg 34601e04c3fSmrgvoid 34701e04c3fSmrglower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) 34801e04c3fSmrg{ 34901e04c3fSmrg if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression) 35001e04c3fSmrg return; 35101e04c3fSmrg 35201e04c3fSmrg ir_expression *const ir = (*rvalue)->as_expression(); 35301e04c3fSmrg assert(ir != NULL); 35401e04c3fSmrg 35501e04c3fSmrg switch (ir->operation) { 35601e04c3fSmrg case ir_unop_sign: 35701e04c3fSmrg if (lowering(SIGN64)) { 35801e04c3fSmrg *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64); 35901e04c3fSmrg } 36001e04c3fSmrg break; 36101e04c3fSmrg 36201e04c3fSmrg case ir_binop_div: 36301e04c3fSmrg if (lowering(DIV64)) { 36401e04c3fSmrg if (ir->type->base_type == GLSL_TYPE_UINT64) { 36501e04c3fSmrg *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64); 36601e04c3fSmrg } else { 36701e04c3fSmrg *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64); 36801e04c3fSmrg } 36901e04c3fSmrg } 37001e04c3fSmrg break; 37101e04c3fSmrg 37201e04c3fSmrg case ir_binop_mod: 37301e04c3fSmrg if (lowering(MOD64)) { 37401e04c3fSmrg if (ir->type->base_type == GLSL_TYPE_UINT64) { 37501e04c3fSmrg *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64); 37601e04c3fSmrg } else { 37701e04c3fSmrg *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64); 37801e04c3fSmrg } 37901e04c3fSmrg } 38001e04c3fSmrg break; 38101e04c3fSmrg 38201e04c3fSmrg case ir_binop_mul: 38301e04c3fSmrg if (lowering(MUL64)) { 38401e04c3fSmrg *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64); 38501e04c3fSmrg } 38601e04c3fSmrg break; 38701e04c3fSmrg 38801e04c3fSmrg default: 38901e04c3fSmrg break; 39001e04c3fSmrg } 39101e04c3fSmrg} 392