1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2016 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** 25b8e80941Smrg * \file lower_int64.cpp 26b8e80941Smrg * 27b8e80941Smrg * Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered 28b8e80941Smrg * to a uvec2. For each operation that can be lowered, there is a function 29b8e80941Smrg * called __builtin_foo with the same number of parameters that takes uvec2 30b8e80941Smrg * sources and produces uvec2 results. An operation like 31b8e80941Smrg * 32b8e80941Smrg * uint64_t(x) * uint64_t(y) 33b8e80941Smrg * 34b8e80941Smrg * becomes 35b8e80941Smrg * 36b8e80941Smrg * packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y))); 37b8e80941Smrg */ 38b8e80941Smrg 39b8e80941Smrg#include "main/macros.h" 40b8e80941Smrg#include "compiler/glsl_types.h" 41b8e80941Smrg#include "ir.h" 42b8e80941Smrg#include "ir_rvalue_visitor.h" 43b8e80941Smrg#include "ir_builder.h" 44b8e80941Smrg#include "ir_optimization.h" 45b8e80941Smrg#include "util/hash_table.h" 46b8e80941Smrg#include "builtin_functions.h" 47b8e80941Smrg 48b8e80941Smrgtypedef ir_function_signature *(*function_generator)(void *mem_ctx, 49b8e80941Smrg builtin_available_predicate avail); 50b8e80941Smrg 51b8e80941Smrgusing namespace ir_builder; 52b8e80941Smrg 53b8e80941Smrgnamespace lower_64bit { 54b8e80941Smrgvoid expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src); 55b8e80941Smrg 56b8e80941Smrgir_dereference_variable *compact_destination(ir_factory &, 57b8e80941Smrg const glsl_type *type, 58b8e80941Smrg ir_variable *result[4]); 59b8e80941Smrg 60b8e80941Smrgir_rvalue *lower_op_to_function_call(ir_instruction *base_ir, 61b8e80941Smrg ir_expression *ir, 62b8e80941Smrg ir_function_signature *callee); 63b8e80941Smrg}; 64b8e80941Smrg 65b8e80941Smrgusing namespace lower_64bit; 66b8e80941Smrg 67b8e80941Smrgnamespace { 68b8e80941Smrg 69b8e80941Smrgclass lower_64bit_visitor : public ir_rvalue_visitor { 70b8e80941Smrgpublic: 71b8e80941Smrg lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower) 72b8e80941Smrg : progress(false), lower(lower), 73b8e80941Smrg function_list(), added_functions(&function_list, mem_ctx) 74b8e80941Smrg { 75b8e80941Smrg functions = _mesa_hash_table_create(mem_ctx, 76b8e80941Smrg _mesa_key_hash_string, 77b8e80941Smrg _mesa_key_string_equal); 78b8e80941Smrg 79b8e80941Smrg foreach_in_list(ir_instruction, node, instructions) { 80b8e80941Smrg ir_function *const f = node->as_function(); 81b8e80941Smrg 82b8e80941Smrg if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0) 83b8e80941Smrg continue; 84b8e80941Smrg 85b8e80941Smrg add_function(f); 86b8e80941Smrg } 87b8e80941Smrg } 88b8e80941Smrg 89b8e80941Smrg ~lower_64bit_visitor() 90b8e80941Smrg { 91b8e80941Smrg _mesa_hash_table_destroy(functions, NULL); 92b8e80941Smrg } 93b8e80941Smrg 94b8e80941Smrg void handle_rvalue(ir_rvalue **rvalue); 95b8e80941Smrg 96b8e80941Smrg void add_function(ir_function *f) 97b8e80941Smrg { 98b8e80941Smrg _mesa_hash_table_insert(functions, f->name, f); 99b8e80941Smrg } 100b8e80941Smrg 101b8e80941Smrg ir_function *find_function(const char *name) 102b8e80941Smrg { 103b8e80941Smrg struct hash_entry *const entry = 104b8e80941Smrg _mesa_hash_table_search(functions, name); 105b8e80941Smrg 106b8e80941Smrg return entry != NULL ? (ir_function *) entry->data : NULL; 107b8e80941Smrg } 108b8e80941Smrg 109b8e80941Smrg bool progress; 110b8e80941Smrg 111b8e80941Smrgprivate: 112b8e80941Smrg unsigned lower; /** Bitfield of which operations to lower */ 113b8e80941Smrg 114b8e80941Smrg /** Hashtable containing all of the known functions in the IR */ 115b8e80941Smrg struct hash_table *functions; 116b8e80941Smrg 117b8e80941Smrgpublic: 118b8e80941Smrg exec_list function_list; 119b8e80941Smrg 120b8e80941Smrgprivate: 121b8e80941Smrg ir_factory added_functions; 122b8e80941Smrg 123b8e80941Smrg ir_rvalue *handle_op(ir_expression *ir, const char *function_name, 124b8e80941Smrg function_generator generator); 125b8e80941Smrg}; 126b8e80941Smrg 127b8e80941Smrg} /* anonymous namespace */ 128b8e80941Smrg 129b8e80941Smrg/** 130b8e80941Smrg * Determine if a particular type of lowering should occur 131b8e80941Smrg */ 132b8e80941Smrg#define lowering(x) (this->lower & x) 133b8e80941Smrg 134b8e80941Smrgbool 135b8e80941Smrglower_64bit_integer_instructions(exec_list *instructions, 136b8e80941Smrg unsigned what_to_lower) 137b8e80941Smrg{ 138b8e80941Smrg if (instructions->is_empty()) 139b8e80941Smrg return false; 140b8e80941Smrg 141b8e80941Smrg ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw(); 142b8e80941Smrg void *const mem_ctx = ralloc_parent(first_inst); 143b8e80941Smrg lower_64bit_visitor v(mem_ctx, instructions, what_to_lower); 144b8e80941Smrg 145b8e80941Smrg visit_list_elements(&v, instructions); 146b8e80941Smrg 147b8e80941Smrg if (v.progress && !v.function_list.is_empty()) { 148b8e80941Smrg /* Move all of the nodes from function_list to the head if the incoming 149b8e80941Smrg * instruction list. 150b8e80941Smrg */ 151b8e80941Smrg exec_node *const after = &instructions->head_sentinel; 152b8e80941Smrg exec_node *const before = instructions->head_sentinel.next; 153b8e80941Smrg exec_node *const head = v.function_list.head_sentinel.next; 154b8e80941Smrg exec_node *const tail = v.function_list.tail_sentinel.prev; 155b8e80941Smrg 156b8e80941Smrg before->next = head; 157b8e80941Smrg head->prev = before; 158b8e80941Smrg 159b8e80941Smrg after->prev = tail; 160b8e80941Smrg tail->next = after; 161b8e80941Smrg } 162b8e80941Smrg 163b8e80941Smrg return v.progress; 164b8e80941Smrg} 165b8e80941Smrg 166b8e80941Smrg 167b8e80941Smrg/** 168b8e80941Smrg * Expand individual 64-bit values to uvec2 values 169b8e80941Smrg * 170b8e80941Smrg * Each operation is in one of a few forms. 171b8e80941Smrg * 172b8e80941Smrg * vector op vector 173b8e80941Smrg * vector op scalar 174b8e80941Smrg * scalar op vector 175b8e80941Smrg * scalar op scalar 176b8e80941Smrg * 177b8e80941Smrg * In the 'vector op vector' case, the two vectors must have the same size. 178b8e80941Smrg * In a way, the 'scalar op scalar' form is special case of the 'vector op 179b8e80941Smrg * vector' form. 180b8e80941Smrg * 181b8e80941Smrg * This method generates a new set of uvec2 values for each element of a 182b8e80941Smrg * single operand. If the operand is a scalar, the uvec2 is replicated 183b8e80941Smrg * multiple times. A value like 184b8e80941Smrg * 185b8e80941Smrg * u64vec3(a) + u64vec3(b) 186b8e80941Smrg * 187b8e80941Smrg * becomes 188b8e80941Smrg * 189b8e80941Smrg * u64vec3 tmp0 = u64vec3(a) + u64vec3(b); 190b8e80941Smrg * uvec2 tmp1 = unpackUint2x32(tmp0.x); 191b8e80941Smrg * uvec2 tmp2 = unpackUint2x32(tmp0.y); 192b8e80941Smrg * uvec2 tmp3 = unpackUint2x32(tmp0.z); 193b8e80941Smrg * 194b8e80941Smrg * and the returned operands array contains ir_variable pointers to 195b8e80941Smrg * 196b8e80941Smrg * { tmp1, tmp2, tmp3, tmp1 } 197b8e80941Smrg */ 198b8e80941Smrgvoid 199b8e80941Smrglower_64bit::expand_source(ir_factory &body, 200b8e80941Smrg ir_rvalue *val, 201b8e80941Smrg ir_variable **expanded_src) 202b8e80941Smrg{ 203b8e80941Smrg assert(val->type->is_integer_64()); 204b8e80941Smrg 205b8e80941Smrg ir_variable *const temp = body.make_temp(val->type, "tmp"); 206b8e80941Smrg 207b8e80941Smrg body.emit(assign(temp, val)); 208b8e80941Smrg 209b8e80941Smrg const ir_expression_operation unpack_opcode = 210b8e80941Smrg val->type->base_type == GLSL_TYPE_UINT64 211b8e80941Smrg ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32; 212b8e80941Smrg 213b8e80941Smrg const glsl_type *const type = 214b8e80941Smrg val->type->base_type == GLSL_TYPE_UINT64 215b8e80941Smrg ? glsl_type::uvec2_type : glsl_type::ivec2_type; 216b8e80941Smrg 217b8e80941Smrg unsigned i; 218b8e80941Smrg for (i = 0; i < val->type->vector_elements; i++) { 219b8e80941Smrg expanded_src[i] = body.make_temp(type, "expanded_64bit_source"); 220b8e80941Smrg 221b8e80941Smrg body.emit(assign(expanded_src[i], 222b8e80941Smrg expr(unpack_opcode, swizzle(temp, i, 1)))); 223b8e80941Smrg } 224b8e80941Smrg 225b8e80941Smrg for (/* empty */; i < 4; i++) 226b8e80941Smrg expanded_src[i] = expanded_src[0]; 227b8e80941Smrg} 228b8e80941Smrg 229b8e80941Smrg/** 230b8e80941Smrg * Convert a series of uvec2 results into a single 64-bit integer vector 231b8e80941Smrg */ 232b8e80941Smrgir_dereference_variable * 233b8e80941Smrglower_64bit::compact_destination(ir_factory &body, 234b8e80941Smrg const glsl_type *type, 235b8e80941Smrg ir_variable *result[4]) 236b8e80941Smrg{ 237b8e80941Smrg const ir_expression_operation pack_opcode = 238b8e80941Smrg type->base_type == GLSL_TYPE_UINT64 239b8e80941Smrg ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32; 240b8e80941Smrg 241b8e80941Smrg ir_variable *const compacted_result = 242b8e80941Smrg body.make_temp(type, "compacted_64bit_result"); 243b8e80941Smrg 244b8e80941Smrg for (unsigned i = 0; i < type->vector_elements; i++) { 245b8e80941Smrg body.emit(assign(compacted_result, 246b8e80941Smrg expr(pack_opcode, result[i]), 247b8e80941Smrg 1U << i)); 248b8e80941Smrg } 249b8e80941Smrg 250b8e80941Smrg void *const mem_ctx = ralloc_parent(compacted_result); 251b8e80941Smrg return new(mem_ctx) ir_dereference_variable(compacted_result); 252b8e80941Smrg} 253b8e80941Smrg 254b8e80941Smrgir_rvalue * 255b8e80941Smrglower_64bit::lower_op_to_function_call(ir_instruction *base_ir, 256b8e80941Smrg ir_expression *ir, 257b8e80941Smrg ir_function_signature *callee) 258b8e80941Smrg{ 259b8e80941Smrg const unsigned num_operands = ir->num_operands; 260b8e80941Smrg ir_variable *src[4][4]; 261b8e80941Smrg ir_variable *dst[4]; 262b8e80941Smrg void *const mem_ctx = ralloc_parent(ir); 263b8e80941Smrg exec_list instructions; 264b8e80941Smrg unsigned source_components = 0; 265b8e80941Smrg const glsl_type *const result_type = 266b8e80941Smrg ir->type->base_type == GLSL_TYPE_UINT64 267b8e80941Smrg ? glsl_type::uvec2_type : glsl_type::ivec2_type; 268b8e80941Smrg 269b8e80941Smrg ir_factory body(&instructions, mem_ctx); 270b8e80941Smrg 271b8e80941Smrg for (unsigned i = 0; i < num_operands; i++) { 272b8e80941Smrg expand_source(body, ir->operands[i], src[i]); 273b8e80941Smrg 274b8e80941Smrg if (ir->operands[i]->type->vector_elements > source_components) 275b8e80941Smrg source_components = ir->operands[i]->type->vector_elements; 276b8e80941Smrg } 277b8e80941Smrg 278b8e80941Smrg for (unsigned i = 0; i < source_components; i++) { 279b8e80941Smrg dst[i] = body.make_temp(result_type, "expanded_64bit_result"); 280b8e80941Smrg 281b8e80941Smrg exec_list parameters; 282b8e80941Smrg 283b8e80941Smrg for (unsigned j = 0; j < num_operands; j++) 284b8e80941Smrg parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i])); 285b8e80941Smrg 286b8e80941Smrg ir_dereference_variable *const return_deref = 287b8e80941Smrg new(mem_ctx) ir_dereference_variable(dst[i]); 288b8e80941Smrg 289b8e80941Smrg ir_call *const c = new(mem_ctx) ir_call(callee, 290b8e80941Smrg return_deref, 291b8e80941Smrg ¶meters); 292b8e80941Smrg 293b8e80941Smrg body.emit(c); 294b8e80941Smrg } 295b8e80941Smrg 296b8e80941Smrg ir_rvalue *const rv = compact_destination(body, ir->type, dst); 297b8e80941Smrg 298b8e80941Smrg /* Move all of the nodes from instructions between base_ir and the 299b8e80941Smrg * instruction before it. 300b8e80941Smrg */ 301b8e80941Smrg exec_node *const after = base_ir; 302b8e80941Smrg exec_node *const before = after->prev; 303b8e80941Smrg exec_node *const head = instructions.head_sentinel.next; 304b8e80941Smrg exec_node *const tail = instructions.tail_sentinel.prev; 305b8e80941Smrg 306b8e80941Smrg before->next = head; 307b8e80941Smrg head->prev = before; 308b8e80941Smrg 309b8e80941Smrg after->prev = tail; 310b8e80941Smrg tail->next = after; 311b8e80941Smrg 312b8e80941Smrg return rv; 313b8e80941Smrg} 314b8e80941Smrg 315b8e80941Smrgir_rvalue * 316b8e80941Smrglower_64bit_visitor::handle_op(ir_expression *ir, 317b8e80941Smrg const char *function_name, 318b8e80941Smrg function_generator generator) 319b8e80941Smrg{ 320b8e80941Smrg for (unsigned i = 0; i < ir->num_operands; i++) 321b8e80941Smrg if (!ir->operands[i]->type->is_integer_64()) 322b8e80941Smrg return ir; 323b8e80941Smrg 324b8e80941Smrg /* Get a handle to the correct ir_function_signature for the core 325b8e80941Smrg * operation. 326b8e80941Smrg */ 327b8e80941Smrg ir_function_signature *callee = NULL; 328b8e80941Smrg ir_function *f = find_function(function_name); 329b8e80941Smrg 330b8e80941Smrg if (f != NULL) { 331b8e80941Smrg callee = (ir_function_signature *) f->signatures.get_head(); 332b8e80941Smrg assert(callee != NULL && callee->ir_type == ir_type_function_signature); 333b8e80941Smrg } else { 334b8e80941Smrg f = new(base_ir) ir_function(function_name); 335b8e80941Smrg callee = generator(base_ir, NULL); 336b8e80941Smrg 337b8e80941Smrg f->add_signature(callee); 338b8e80941Smrg 339b8e80941Smrg add_function(f); 340b8e80941Smrg } 341b8e80941Smrg 342b8e80941Smrg this->progress = true; 343b8e80941Smrg return lower_op_to_function_call(this->base_ir, ir, callee); 344b8e80941Smrg} 345b8e80941Smrg 346b8e80941Smrgvoid 347b8e80941Smrglower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) 348b8e80941Smrg{ 349b8e80941Smrg if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression) 350b8e80941Smrg return; 351b8e80941Smrg 352b8e80941Smrg ir_expression *const ir = (*rvalue)->as_expression(); 353b8e80941Smrg assert(ir != NULL); 354b8e80941Smrg 355b8e80941Smrg switch (ir->operation) { 356b8e80941Smrg case ir_unop_sign: 357b8e80941Smrg if (lowering(SIGN64)) { 358b8e80941Smrg *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64); 359b8e80941Smrg } 360b8e80941Smrg break; 361b8e80941Smrg 362b8e80941Smrg case ir_binop_div: 363b8e80941Smrg if (lowering(DIV64)) { 364b8e80941Smrg if (ir->type->base_type == GLSL_TYPE_UINT64) { 365b8e80941Smrg *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64); 366b8e80941Smrg } else { 367b8e80941Smrg *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64); 368b8e80941Smrg } 369b8e80941Smrg } 370b8e80941Smrg break; 371b8e80941Smrg 372b8e80941Smrg case ir_binop_mod: 373b8e80941Smrg if (lowering(MOD64)) { 374b8e80941Smrg if (ir->type->base_type == GLSL_TYPE_UINT64) { 375b8e80941Smrg *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64); 376b8e80941Smrg } else { 377b8e80941Smrg *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64); 378b8e80941Smrg } 379b8e80941Smrg } 380b8e80941Smrg break; 381b8e80941Smrg 382b8e80941Smrg case ir_binop_mul: 383b8e80941Smrg if (lowering(MUL64)) { 384b8e80941Smrg *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64); 385b8e80941Smrg } 386b8e80941Smrg break; 387b8e80941Smrg 388b8e80941Smrg default: 389b8e80941Smrg break; 390b8e80941Smrg } 391b8e80941Smrg} 392