101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2016 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2101e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/**
2501e04c3fSmrg * \file lower_int64.cpp
2601e04c3fSmrg *
2701e04c3fSmrg * Lower 64-bit operations to 32-bit operations.  Each 64-bit value is lowered
2801e04c3fSmrg * to a uvec2.  For each operation that can be lowered, there is a function
2901e04c3fSmrg * called __builtin_foo with the same number of parameters that takes uvec2
3001e04c3fSmrg * sources and produces uvec2 results.  An operation like
3101e04c3fSmrg *
3201e04c3fSmrg *     uint64_t(x) * uint64_t(y)
3301e04c3fSmrg *
3401e04c3fSmrg * becomes
3501e04c3fSmrg *
3601e04c3fSmrg *     packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
3701e04c3fSmrg */
3801e04c3fSmrg
3901e04c3fSmrg#include "main/macros.h"
4001e04c3fSmrg#include "compiler/glsl_types.h"
4101e04c3fSmrg#include "ir.h"
4201e04c3fSmrg#include "ir_rvalue_visitor.h"
4301e04c3fSmrg#include "ir_builder.h"
4401e04c3fSmrg#include "ir_optimization.h"
4501e04c3fSmrg#include "util/hash_table.h"
4601e04c3fSmrg#include "builtin_functions.h"
4701e04c3fSmrg
4801e04c3fSmrgtypedef ir_function_signature *(*function_generator)(void *mem_ctx,
4901e04c3fSmrg                                                     builtin_available_predicate avail);
5001e04c3fSmrg
5101e04c3fSmrgusing namespace ir_builder;
5201e04c3fSmrg
5301e04c3fSmrgnamespace lower_64bit {
5401e04c3fSmrgvoid expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
5501e04c3fSmrg
5601e04c3fSmrgir_dereference_variable *compact_destination(ir_factory &,
5701e04c3fSmrg                                             const glsl_type *type,
5801e04c3fSmrg                                             ir_variable *result[4]);
5901e04c3fSmrg
6001e04c3fSmrgir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
6101e04c3fSmrg                                     ir_expression *ir,
6201e04c3fSmrg                                     ir_function_signature *callee);
6301e04c3fSmrg};
6401e04c3fSmrg
6501e04c3fSmrgusing namespace lower_64bit;
6601e04c3fSmrg
6701e04c3fSmrgnamespace {
6801e04c3fSmrg
6901e04c3fSmrgclass lower_64bit_visitor : public ir_rvalue_visitor {
7001e04c3fSmrgpublic:
7101e04c3fSmrg   lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
7201e04c3fSmrg      : progress(false), lower(lower),
7301e04c3fSmrg        function_list(), added_functions(&function_list, mem_ctx)
7401e04c3fSmrg   {
7501e04c3fSmrg      functions = _mesa_hash_table_create(mem_ctx,
767ec681f3Smrg                                          _mesa_hash_string,
7701e04c3fSmrg                                          _mesa_key_string_equal);
7801e04c3fSmrg
7901e04c3fSmrg      foreach_in_list(ir_instruction, node, instructions) {
8001e04c3fSmrg         ir_function *const f = node->as_function();
8101e04c3fSmrg
8201e04c3fSmrg         if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0)
8301e04c3fSmrg            continue;
8401e04c3fSmrg
8501e04c3fSmrg         add_function(f);
8601e04c3fSmrg      }
8701e04c3fSmrg   }
8801e04c3fSmrg
8901e04c3fSmrg   ~lower_64bit_visitor()
9001e04c3fSmrg   {
9101e04c3fSmrg      _mesa_hash_table_destroy(functions, NULL);
9201e04c3fSmrg   }
9301e04c3fSmrg
9401e04c3fSmrg   void handle_rvalue(ir_rvalue **rvalue);
9501e04c3fSmrg
9601e04c3fSmrg   void add_function(ir_function *f)
9701e04c3fSmrg   {
9801e04c3fSmrg      _mesa_hash_table_insert(functions, f->name, f);
9901e04c3fSmrg   }
10001e04c3fSmrg
10101e04c3fSmrg   ir_function *find_function(const char *name)
10201e04c3fSmrg   {
10301e04c3fSmrg      struct hash_entry *const entry =
10401e04c3fSmrg         _mesa_hash_table_search(functions, name);
10501e04c3fSmrg
10601e04c3fSmrg      return entry != NULL ? (ir_function *) entry->data : NULL;
10701e04c3fSmrg   }
10801e04c3fSmrg
10901e04c3fSmrg   bool progress;
11001e04c3fSmrg
11101e04c3fSmrgprivate:
11201e04c3fSmrg   unsigned lower; /** Bitfield of which operations to lower */
11301e04c3fSmrg
11401e04c3fSmrg   /** Hashtable containing all of the known functions in the IR */
11501e04c3fSmrg   struct hash_table *functions;
11601e04c3fSmrg
11701e04c3fSmrgpublic:
11801e04c3fSmrg   exec_list function_list;
11901e04c3fSmrg
12001e04c3fSmrgprivate:
12101e04c3fSmrg   ir_factory added_functions;
12201e04c3fSmrg
12301e04c3fSmrg   ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
12401e04c3fSmrg                        function_generator generator);
12501e04c3fSmrg};
12601e04c3fSmrg
12701e04c3fSmrg} /* anonymous namespace */
12801e04c3fSmrg
12901e04c3fSmrg/**
13001e04c3fSmrg * Determine if a particular type of lowering should occur
13101e04c3fSmrg */
13201e04c3fSmrg#define lowering(x) (this->lower & x)
13301e04c3fSmrg
13401e04c3fSmrgbool
13501e04c3fSmrglower_64bit_integer_instructions(exec_list *instructions,
13601e04c3fSmrg                                 unsigned what_to_lower)
13701e04c3fSmrg{
13801e04c3fSmrg   if (instructions->is_empty())
13901e04c3fSmrg      return false;
14001e04c3fSmrg
14101e04c3fSmrg   ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw();
14201e04c3fSmrg   void *const mem_ctx = ralloc_parent(first_inst);
14301e04c3fSmrg   lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);
14401e04c3fSmrg
14501e04c3fSmrg   visit_list_elements(&v, instructions);
14601e04c3fSmrg
14701e04c3fSmrg   if (v.progress && !v.function_list.is_empty()) {
14801e04c3fSmrg      /* Move all of the nodes from function_list to the head if the incoming
14901e04c3fSmrg       * instruction list.
15001e04c3fSmrg       */
15101e04c3fSmrg      exec_node *const after = &instructions->head_sentinel;
15201e04c3fSmrg      exec_node *const before = instructions->head_sentinel.next;
15301e04c3fSmrg      exec_node *const head = v.function_list.head_sentinel.next;
15401e04c3fSmrg      exec_node *const tail = v.function_list.tail_sentinel.prev;
15501e04c3fSmrg
15601e04c3fSmrg      before->next = head;
15701e04c3fSmrg      head->prev = before;
15801e04c3fSmrg
15901e04c3fSmrg      after->prev = tail;
16001e04c3fSmrg      tail->next = after;
16101e04c3fSmrg   }
16201e04c3fSmrg
16301e04c3fSmrg   return v.progress;
16401e04c3fSmrg}
16501e04c3fSmrg
16601e04c3fSmrg
16701e04c3fSmrg/**
16801e04c3fSmrg * Expand individual 64-bit values to uvec2 values
16901e04c3fSmrg *
17001e04c3fSmrg * Each operation is in one of a few forms.
17101e04c3fSmrg *
17201e04c3fSmrg *     vector op vector
17301e04c3fSmrg *     vector op scalar
17401e04c3fSmrg *     scalar op vector
17501e04c3fSmrg *     scalar op scalar
17601e04c3fSmrg *
17701e04c3fSmrg * In the 'vector op vector' case, the two vectors must have the same size.
17801e04c3fSmrg * In a way, the 'scalar op scalar' form is special case of the 'vector op
17901e04c3fSmrg * vector' form.
18001e04c3fSmrg *
18101e04c3fSmrg * This method generates a new set of uvec2 values for each element of a
18201e04c3fSmrg * single operand.  If the operand is a scalar, the uvec2 is replicated
18301e04c3fSmrg * multiple times.  A value like
18401e04c3fSmrg *
18501e04c3fSmrg *     u64vec3(a) + u64vec3(b)
18601e04c3fSmrg *
18701e04c3fSmrg * becomes
18801e04c3fSmrg *
18901e04c3fSmrg *     u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
19001e04c3fSmrg *     uvec2 tmp1 = unpackUint2x32(tmp0.x);
19101e04c3fSmrg *     uvec2 tmp2 = unpackUint2x32(tmp0.y);
19201e04c3fSmrg *     uvec2 tmp3 = unpackUint2x32(tmp0.z);
19301e04c3fSmrg *
19401e04c3fSmrg * and the returned operands array contains ir_variable pointers to
19501e04c3fSmrg *
19601e04c3fSmrg *     { tmp1, tmp2, tmp3, tmp1 }
19701e04c3fSmrg */
19801e04c3fSmrgvoid
19901e04c3fSmrglower_64bit::expand_source(ir_factory &body,
20001e04c3fSmrg                           ir_rvalue *val,
20101e04c3fSmrg                           ir_variable **expanded_src)
20201e04c3fSmrg{
20301e04c3fSmrg   assert(val->type->is_integer_64());
20401e04c3fSmrg
20501e04c3fSmrg   ir_variable *const temp = body.make_temp(val->type, "tmp");
20601e04c3fSmrg
20701e04c3fSmrg   body.emit(assign(temp, val));
20801e04c3fSmrg
20901e04c3fSmrg   const ir_expression_operation unpack_opcode =
21001e04c3fSmrg      val->type->base_type == GLSL_TYPE_UINT64
21101e04c3fSmrg      ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
21201e04c3fSmrg
21301e04c3fSmrg   const glsl_type *const type =
21401e04c3fSmrg      val->type->base_type == GLSL_TYPE_UINT64
21501e04c3fSmrg      ? glsl_type::uvec2_type : glsl_type::ivec2_type;
21601e04c3fSmrg
21701e04c3fSmrg   unsigned i;
21801e04c3fSmrg   for (i = 0; i < val->type->vector_elements; i++) {
21901e04c3fSmrg      expanded_src[i] = body.make_temp(type, "expanded_64bit_source");
22001e04c3fSmrg
22101e04c3fSmrg      body.emit(assign(expanded_src[i],
22201e04c3fSmrg                       expr(unpack_opcode, swizzle(temp, i, 1))));
22301e04c3fSmrg   }
22401e04c3fSmrg
22501e04c3fSmrg   for (/* empty */; i < 4; i++)
22601e04c3fSmrg      expanded_src[i] = expanded_src[0];
22701e04c3fSmrg}
22801e04c3fSmrg
22901e04c3fSmrg/**
23001e04c3fSmrg * Convert a series of uvec2 results into a single 64-bit integer vector
23101e04c3fSmrg */
23201e04c3fSmrgir_dereference_variable *
23301e04c3fSmrglower_64bit::compact_destination(ir_factory &body,
23401e04c3fSmrg                                 const glsl_type *type,
23501e04c3fSmrg                                 ir_variable *result[4])
23601e04c3fSmrg{
23701e04c3fSmrg   const ir_expression_operation pack_opcode =
23801e04c3fSmrg      type->base_type == GLSL_TYPE_UINT64
23901e04c3fSmrg      ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;
24001e04c3fSmrg
24101e04c3fSmrg   ir_variable *const compacted_result =
24201e04c3fSmrg      body.make_temp(type, "compacted_64bit_result");
24301e04c3fSmrg
24401e04c3fSmrg   for (unsigned i = 0; i < type->vector_elements; i++) {
24501e04c3fSmrg      body.emit(assign(compacted_result,
24601e04c3fSmrg                       expr(pack_opcode, result[i]),
24701e04c3fSmrg                       1U << i));
24801e04c3fSmrg   }
24901e04c3fSmrg
25001e04c3fSmrg   void *const mem_ctx = ralloc_parent(compacted_result);
25101e04c3fSmrg   return new(mem_ctx) ir_dereference_variable(compacted_result);
25201e04c3fSmrg}
25301e04c3fSmrg
25401e04c3fSmrgir_rvalue *
25501e04c3fSmrglower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
25601e04c3fSmrg                                       ir_expression *ir,
25701e04c3fSmrg                                       ir_function_signature *callee)
25801e04c3fSmrg{
25901e04c3fSmrg   const unsigned num_operands = ir->num_operands;
26001e04c3fSmrg   ir_variable *src[4][4];
26101e04c3fSmrg   ir_variable *dst[4];
26201e04c3fSmrg   void *const mem_ctx = ralloc_parent(ir);
26301e04c3fSmrg   exec_list instructions;
26401e04c3fSmrg   unsigned source_components = 0;
26501e04c3fSmrg   const glsl_type *const result_type =
26601e04c3fSmrg      ir->type->base_type == GLSL_TYPE_UINT64
26701e04c3fSmrg      ? glsl_type::uvec2_type : glsl_type::ivec2_type;
26801e04c3fSmrg
26901e04c3fSmrg   ir_factory body(&instructions, mem_ctx);
27001e04c3fSmrg
27101e04c3fSmrg   for (unsigned i = 0; i < num_operands; i++) {
27201e04c3fSmrg      expand_source(body, ir->operands[i], src[i]);
27301e04c3fSmrg
27401e04c3fSmrg      if (ir->operands[i]->type->vector_elements > source_components)
27501e04c3fSmrg         source_components = ir->operands[i]->type->vector_elements;
27601e04c3fSmrg   }
27701e04c3fSmrg
27801e04c3fSmrg   for (unsigned i = 0; i < source_components; i++) {
27901e04c3fSmrg      dst[i] = body.make_temp(result_type, "expanded_64bit_result");
28001e04c3fSmrg
28101e04c3fSmrg      exec_list parameters;
28201e04c3fSmrg
28301e04c3fSmrg      for (unsigned j = 0; j < num_operands; j++)
28401e04c3fSmrg         parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));
28501e04c3fSmrg
28601e04c3fSmrg      ir_dereference_variable *const return_deref =
28701e04c3fSmrg         new(mem_ctx) ir_dereference_variable(dst[i]);
28801e04c3fSmrg
28901e04c3fSmrg      ir_call *const c = new(mem_ctx) ir_call(callee,
29001e04c3fSmrg                                              return_deref,
29101e04c3fSmrg                                              &parameters);
29201e04c3fSmrg
29301e04c3fSmrg      body.emit(c);
29401e04c3fSmrg   }
29501e04c3fSmrg
29601e04c3fSmrg   ir_rvalue *const rv = compact_destination(body, ir->type, dst);
29701e04c3fSmrg
29801e04c3fSmrg   /* Move all of the nodes from instructions between base_ir and the
29901e04c3fSmrg    * instruction before it.
30001e04c3fSmrg    */
30101e04c3fSmrg   exec_node *const after = base_ir;
30201e04c3fSmrg   exec_node *const before = after->prev;
30301e04c3fSmrg   exec_node *const head = instructions.head_sentinel.next;
30401e04c3fSmrg   exec_node *const tail = instructions.tail_sentinel.prev;
30501e04c3fSmrg
30601e04c3fSmrg   before->next = head;
30701e04c3fSmrg   head->prev = before;
30801e04c3fSmrg
30901e04c3fSmrg   after->prev = tail;
31001e04c3fSmrg   tail->next = after;
31101e04c3fSmrg
31201e04c3fSmrg   return rv;
31301e04c3fSmrg}
31401e04c3fSmrg
31501e04c3fSmrgir_rvalue *
31601e04c3fSmrglower_64bit_visitor::handle_op(ir_expression *ir,
31701e04c3fSmrg                               const char *function_name,
31801e04c3fSmrg                               function_generator generator)
31901e04c3fSmrg{
32001e04c3fSmrg   for (unsigned i = 0; i < ir->num_operands; i++)
32101e04c3fSmrg      if (!ir->operands[i]->type->is_integer_64())
32201e04c3fSmrg         return ir;
32301e04c3fSmrg
32401e04c3fSmrg   /* Get a handle to the correct ir_function_signature for the core
32501e04c3fSmrg    * operation.
32601e04c3fSmrg    */
32701e04c3fSmrg   ir_function_signature *callee = NULL;
32801e04c3fSmrg   ir_function *f = find_function(function_name);
32901e04c3fSmrg
33001e04c3fSmrg   if (f != NULL) {
33101e04c3fSmrg      callee = (ir_function_signature *) f->signatures.get_head();
33201e04c3fSmrg      assert(callee != NULL && callee->ir_type == ir_type_function_signature);
33301e04c3fSmrg   } else {
33401e04c3fSmrg      f = new(base_ir) ir_function(function_name);
33501e04c3fSmrg      callee = generator(base_ir, NULL);
33601e04c3fSmrg
33701e04c3fSmrg      f->add_signature(callee);
33801e04c3fSmrg
33901e04c3fSmrg      add_function(f);
34001e04c3fSmrg   }
34101e04c3fSmrg
34201e04c3fSmrg   this->progress = true;
34301e04c3fSmrg   return lower_op_to_function_call(this->base_ir, ir, callee);
34401e04c3fSmrg}
34501e04c3fSmrg
34601e04c3fSmrgvoid
34701e04c3fSmrglower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
34801e04c3fSmrg{
34901e04c3fSmrg   if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression)
35001e04c3fSmrg      return;
35101e04c3fSmrg
35201e04c3fSmrg   ir_expression *const ir = (*rvalue)->as_expression();
35301e04c3fSmrg   assert(ir != NULL);
35401e04c3fSmrg
35501e04c3fSmrg   switch (ir->operation) {
35601e04c3fSmrg   case ir_unop_sign:
35701e04c3fSmrg      if (lowering(SIGN64)) {
35801e04c3fSmrg         *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
35901e04c3fSmrg      }
36001e04c3fSmrg      break;
36101e04c3fSmrg
36201e04c3fSmrg   case ir_binop_div:
36301e04c3fSmrg      if (lowering(DIV64)) {
36401e04c3fSmrg         if (ir->type->base_type == GLSL_TYPE_UINT64) {
36501e04c3fSmrg            *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64);
36601e04c3fSmrg         } else {
36701e04c3fSmrg            *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64);
36801e04c3fSmrg         }
36901e04c3fSmrg      }
37001e04c3fSmrg      break;
37101e04c3fSmrg
37201e04c3fSmrg   case ir_binop_mod:
37301e04c3fSmrg      if (lowering(MOD64)) {
37401e04c3fSmrg         if (ir->type->base_type == GLSL_TYPE_UINT64) {
37501e04c3fSmrg            *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64);
37601e04c3fSmrg         } else {
37701e04c3fSmrg            *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64);
37801e04c3fSmrg         }
37901e04c3fSmrg      }
38001e04c3fSmrg      break;
38101e04c3fSmrg
38201e04c3fSmrg   case ir_binop_mul:
38301e04c3fSmrg      if (lowering(MUL64)) {
38401e04c3fSmrg         *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
38501e04c3fSmrg      }
38601e04c3fSmrg      break;
38701e04c3fSmrg
38801e04c3fSmrg   default:
38901e04c3fSmrg      break;
39001e04c3fSmrg   }
39101e04c3fSmrg}
392