1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2016 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21b8e80941Smrg * DEALINGS IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg/**
25b8e80941Smrg * \file lower_int64.cpp
26b8e80941Smrg *
27b8e80941Smrg * Lower 64-bit operations to 32-bit operations.  Each 64-bit value is lowered
28b8e80941Smrg * to a uvec2.  For each operation that can be lowered, there is a function
29b8e80941Smrg * called __builtin_foo with the same number of parameters that takes uvec2
30b8e80941Smrg * sources and produces uvec2 results.  An operation like
31b8e80941Smrg *
32b8e80941Smrg *     uint64_t(x) * uint64_t(y)
33b8e80941Smrg *
34b8e80941Smrg * becomes
35b8e80941Smrg *
36b8e80941Smrg *     packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
37b8e80941Smrg */
38b8e80941Smrg
39b8e80941Smrg#include "main/macros.h"
40b8e80941Smrg#include "compiler/glsl_types.h"
41b8e80941Smrg#include "ir.h"
42b8e80941Smrg#include "ir_rvalue_visitor.h"
43b8e80941Smrg#include "ir_builder.h"
44b8e80941Smrg#include "ir_optimization.h"
45b8e80941Smrg#include "util/hash_table.h"
46b8e80941Smrg#include "builtin_functions.h"
47b8e80941Smrg
48b8e80941Smrgtypedef ir_function_signature *(*function_generator)(void *mem_ctx,
49b8e80941Smrg                                                     builtin_available_predicate avail);
50b8e80941Smrg
51b8e80941Smrgusing namespace ir_builder;
52b8e80941Smrg
53b8e80941Smrgnamespace lower_64bit {
54b8e80941Smrgvoid expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
55b8e80941Smrg
56b8e80941Smrgir_dereference_variable *compact_destination(ir_factory &,
57b8e80941Smrg                                             const glsl_type *type,
58b8e80941Smrg                                             ir_variable *result[4]);
59b8e80941Smrg
60b8e80941Smrgir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
61b8e80941Smrg                                     ir_expression *ir,
62b8e80941Smrg                                     ir_function_signature *callee);
63b8e80941Smrg};
64b8e80941Smrg
65b8e80941Smrgusing namespace lower_64bit;
66b8e80941Smrg
67b8e80941Smrgnamespace {
68b8e80941Smrg
69b8e80941Smrgclass lower_64bit_visitor : public ir_rvalue_visitor {
70b8e80941Smrgpublic:
71b8e80941Smrg   lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
72b8e80941Smrg      : progress(false), lower(lower),
73b8e80941Smrg        function_list(), added_functions(&function_list, mem_ctx)
74b8e80941Smrg   {
75b8e80941Smrg      functions = _mesa_hash_table_create(mem_ctx,
76b8e80941Smrg                                          _mesa_key_hash_string,
77b8e80941Smrg                                          _mesa_key_string_equal);
78b8e80941Smrg
79b8e80941Smrg      foreach_in_list(ir_instruction, node, instructions) {
80b8e80941Smrg         ir_function *const f = node->as_function();
81b8e80941Smrg
82b8e80941Smrg         if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0)
83b8e80941Smrg            continue;
84b8e80941Smrg
85b8e80941Smrg         add_function(f);
86b8e80941Smrg      }
87b8e80941Smrg   }
88b8e80941Smrg
89b8e80941Smrg   ~lower_64bit_visitor()
90b8e80941Smrg   {
91b8e80941Smrg      _mesa_hash_table_destroy(functions, NULL);
92b8e80941Smrg   }
93b8e80941Smrg
94b8e80941Smrg   void handle_rvalue(ir_rvalue **rvalue);
95b8e80941Smrg
96b8e80941Smrg   void add_function(ir_function *f)
97b8e80941Smrg   {
98b8e80941Smrg      _mesa_hash_table_insert(functions, f->name, f);
99b8e80941Smrg   }
100b8e80941Smrg
101b8e80941Smrg   ir_function *find_function(const char *name)
102b8e80941Smrg   {
103b8e80941Smrg      struct hash_entry *const entry =
104b8e80941Smrg         _mesa_hash_table_search(functions, name);
105b8e80941Smrg
106b8e80941Smrg      return entry != NULL ? (ir_function *) entry->data : NULL;
107b8e80941Smrg   }
108b8e80941Smrg
109b8e80941Smrg   bool progress;
110b8e80941Smrg
111b8e80941Smrgprivate:
112b8e80941Smrg   unsigned lower; /** Bitfield of which operations to lower */
113b8e80941Smrg
114b8e80941Smrg   /** Hashtable containing all of the known functions in the IR */
115b8e80941Smrg   struct hash_table *functions;
116b8e80941Smrg
117b8e80941Smrgpublic:
118b8e80941Smrg   exec_list function_list;
119b8e80941Smrg
120b8e80941Smrgprivate:
121b8e80941Smrg   ir_factory added_functions;
122b8e80941Smrg
123b8e80941Smrg   ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
124b8e80941Smrg                        function_generator generator);
125b8e80941Smrg};
126b8e80941Smrg
127b8e80941Smrg} /* anonymous namespace */
128b8e80941Smrg
129b8e80941Smrg/**
130b8e80941Smrg * Determine if a particular type of lowering should occur
131b8e80941Smrg */
132b8e80941Smrg#define lowering(x) (this->lower & x)
133b8e80941Smrg
134b8e80941Smrgbool
135b8e80941Smrglower_64bit_integer_instructions(exec_list *instructions,
136b8e80941Smrg                                 unsigned what_to_lower)
137b8e80941Smrg{
138b8e80941Smrg   if (instructions->is_empty())
139b8e80941Smrg      return false;
140b8e80941Smrg
141b8e80941Smrg   ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw();
142b8e80941Smrg   void *const mem_ctx = ralloc_parent(first_inst);
143b8e80941Smrg   lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);
144b8e80941Smrg
145b8e80941Smrg   visit_list_elements(&v, instructions);
146b8e80941Smrg
147b8e80941Smrg   if (v.progress && !v.function_list.is_empty()) {
148b8e80941Smrg      /* Move all of the nodes from function_list to the head if the incoming
149b8e80941Smrg       * instruction list.
150b8e80941Smrg       */
151b8e80941Smrg      exec_node *const after = &instructions->head_sentinel;
152b8e80941Smrg      exec_node *const before = instructions->head_sentinel.next;
153b8e80941Smrg      exec_node *const head = v.function_list.head_sentinel.next;
154b8e80941Smrg      exec_node *const tail = v.function_list.tail_sentinel.prev;
155b8e80941Smrg
156b8e80941Smrg      before->next = head;
157b8e80941Smrg      head->prev = before;
158b8e80941Smrg
159b8e80941Smrg      after->prev = tail;
160b8e80941Smrg      tail->next = after;
161b8e80941Smrg   }
162b8e80941Smrg
163b8e80941Smrg   return v.progress;
164b8e80941Smrg}
165b8e80941Smrg
166b8e80941Smrg
167b8e80941Smrg/**
168b8e80941Smrg * Expand individual 64-bit values to uvec2 values
169b8e80941Smrg *
170b8e80941Smrg * Each operation is in one of a few forms.
171b8e80941Smrg *
172b8e80941Smrg *     vector op vector
173b8e80941Smrg *     vector op scalar
174b8e80941Smrg *     scalar op vector
175b8e80941Smrg *     scalar op scalar
176b8e80941Smrg *
177b8e80941Smrg * In the 'vector op vector' case, the two vectors must have the same size.
178b8e80941Smrg * In a way, the 'scalar op scalar' form is special case of the 'vector op
179b8e80941Smrg * vector' form.
180b8e80941Smrg *
181b8e80941Smrg * This method generates a new set of uvec2 values for each element of a
182b8e80941Smrg * single operand.  If the operand is a scalar, the uvec2 is replicated
183b8e80941Smrg * multiple times.  A value like
184b8e80941Smrg *
185b8e80941Smrg *     u64vec3(a) + u64vec3(b)
186b8e80941Smrg *
187b8e80941Smrg * becomes
188b8e80941Smrg *
189b8e80941Smrg *     u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
190b8e80941Smrg *     uvec2 tmp1 = unpackUint2x32(tmp0.x);
191b8e80941Smrg *     uvec2 tmp2 = unpackUint2x32(tmp0.y);
192b8e80941Smrg *     uvec2 tmp3 = unpackUint2x32(tmp0.z);
193b8e80941Smrg *
194b8e80941Smrg * and the returned operands array contains ir_variable pointers to
195b8e80941Smrg *
196b8e80941Smrg *     { tmp1, tmp2, tmp3, tmp1 }
197b8e80941Smrg */
198b8e80941Smrgvoid
199b8e80941Smrglower_64bit::expand_source(ir_factory &body,
200b8e80941Smrg                           ir_rvalue *val,
201b8e80941Smrg                           ir_variable **expanded_src)
202b8e80941Smrg{
203b8e80941Smrg   assert(val->type->is_integer_64());
204b8e80941Smrg
205b8e80941Smrg   ir_variable *const temp = body.make_temp(val->type, "tmp");
206b8e80941Smrg
207b8e80941Smrg   body.emit(assign(temp, val));
208b8e80941Smrg
209b8e80941Smrg   const ir_expression_operation unpack_opcode =
210b8e80941Smrg      val->type->base_type == GLSL_TYPE_UINT64
211b8e80941Smrg      ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
212b8e80941Smrg
213b8e80941Smrg   const glsl_type *const type =
214b8e80941Smrg      val->type->base_type == GLSL_TYPE_UINT64
215b8e80941Smrg      ? glsl_type::uvec2_type : glsl_type::ivec2_type;
216b8e80941Smrg
217b8e80941Smrg   unsigned i;
218b8e80941Smrg   for (i = 0; i < val->type->vector_elements; i++) {
219b8e80941Smrg      expanded_src[i] = body.make_temp(type, "expanded_64bit_source");
220b8e80941Smrg
221b8e80941Smrg      body.emit(assign(expanded_src[i],
222b8e80941Smrg                       expr(unpack_opcode, swizzle(temp, i, 1))));
223b8e80941Smrg   }
224b8e80941Smrg
225b8e80941Smrg   for (/* empty */; i < 4; i++)
226b8e80941Smrg      expanded_src[i] = expanded_src[0];
227b8e80941Smrg}
228b8e80941Smrg
229b8e80941Smrg/**
230b8e80941Smrg * Convert a series of uvec2 results into a single 64-bit integer vector
231b8e80941Smrg */
232b8e80941Smrgir_dereference_variable *
233b8e80941Smrglower_64bit::compact_destination(ir_factory &body,
234b8e80941Smrg                                 const glsl_type *type,
235b8e80941Smrg                                 ir_variable *result[4])
236b8e80941Smrg{
237b8e80941Smrg   const ir_expression_operation pack_opcode =
238b8e80941Smrg      type->base_type == GLSL_TYPE_UINT64
239b8e80941Smrg      ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;
240b8e80941Smrg
241b8e80941Smrg   ir_variable *const compacted_result =
242b8e80941Smrg      body.make_temp(type, "compacted_64bit_result");
243b8e80941Smrg
244b8e80941Smrg   for (unsigned i = 0; i < type->vector_elements; i++) {
245b8e80941Smrg      body.emit(assign(compacted_result,
246b8e80941Smrg                       expr(pack_opcode, result[i]),
247b8e80941Smrg                       1U << i));
248b8e80941Smrg   }
249b8e80941Smrg
250b8e80941Smrg   void *const mem_ctx = ralloc_parent(compacted_result);
251b8e80941Smrg   return new(mem_ctx) ir_dereference_variable(compacted_result);
252b8e80941Smrg}
253b8e80941Smrg
254b8e80941Smrgir_rvalue *
255b8e80941Smrglower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
256b8e80941Smrg                                       ir_expression *ir,
257b8e80941Smrg                                       ir_function_signature *callee)
258b8e80941Smrg{
259b8e80941Smrg   const unsigned num_operands = ir->num_operands;
260b8e80941Smrg   ir_variable *src[4][4];
261b8e80941Smrg   ir_variable *dst[4];
262b8e80941Smrg   void *const mem_ctx = ralloc_parent(ir);
263b8e80941Smrg   exec_list instructions;
264b8e80941Smrg   unsigned source_components = 0;
265b8e80941Smrg   const glsl_type *const result_type =
266b8e80941Smrg      ir->type->base_type == GLSL_TYPE_UINT64
267b8e80941Smrg      ? glsl_type::uvec2_type : glsl_type::ivec2_type;
268b8e80941Smrg
269b8e80941Smrg   ir_factory body(&instructions, mem_ctx);
270b8e80941Smrg
271b8e80941Smrg   for (unsigned i = 0; i < num_operands; i++) {
272b8e80941Smrg      expand_source(body, ir->operands[i], src[i]);
273b8e80941Smrg
274b8e80941Smrg      if (ir->operands[i]->type->vector_elements > source_components)
275b8e80941Smrg         source_components = ir->operands[i]->type->vector_elements;
276b8e80941Smrg   }
277b8e80941Smrg
278b8e80941Smrg   for (unsigned i = 0; i < source_components; i++) {
279b8e80941Smrg      dst[i] = body.make_temp(result_type, "expanded_64bit_result");
280b8e80941Smrg
281b8e80941Smrg      exec_list parameters;
282b8e80941Smrg
283b8e80941Smrg      for (unsigned j = 0; j < num_operands; j++)
284b8e80941Smrg         parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));
285b8e80941Smrg
286b8e80941Smrg      ir_dereference_variable *const return_deref =
287b8e80941Smrg         new(mem_ctx) ir_dereference_variable(dst[i]);
288b8e80941Smrg
289b8e80941Smrg      ir_call *const c = new(mem_ctx) ir_call(callee,
290b8e80941Smrg                                              return_deref,
291b8e80941Smrg                                              &parameters);
292b8e80941Smrg
293b8e80941Smrg      body.emit(c);
294b8e80941Smrg   }
295b8e80941Smrg
296b8e80941Smrg   ir_rvalue *const rv = compact_destination(body, ir->type, dst);
297b8e80941Smrg
298b8e80941Smrg   /* Move all of the nodes from instructions between base_ir and the
299b8e80941Smrg    * instruction before it.
300b8e80941Smrg    */
301b8e80941Smrg   exec_node *const after = base_ir;
302b8e80941Smrg   exec_node *const before = after->prev;
303b8e80941Smrg   exec_node *const head = instructions.head_sentinel.next;
304b8e80941Smrg   exec_node *const tail = instructions.tail_sentinel.prev;
305b8e80941Smrg
306b8e80941Smrg   before->next = head;
307b8e80941Smrg   head->prev = before;
308b8e80941Smrg
309b8e80941Smrg   after->prev = tail;
310b8e80941Smrg   tail->next = after;
311b8e80941Smrg
312b8e80941Smrg   return rv;
313b8e80941Smrg}
314b8e80941Smrg
315b8e80941Smrgir_rvalue *
316b8e80941Smrglower_64bit_visitor::handle_op(ir_expression *ir,
317b8e80941Smrg                               const char *function_name,
318b8e80941Smrg                               function_generator generator)
319b8e80941Smrg{
320b8e80941Smrg   for (unsigned i = 0; i < ir->num_operands; i++)
321b8e80941Smrg      if (!ir->operands[i]->type->is_integer_64())
322b8e80941Smrg         return ir;
323b8e80941Smrg
324b8e80941Smrg   /* Get a handle to the correct ir_function_signature for the core
325b8e80941Smrg    * operation.
326b8e80941Smrg    */
327b8e80941Smrg   ir_function_signature *callee = NULL;
328b8e80941Smrg   ir_function *f = find_function(function_name);
329b8e80941Smrg
330b8e80941Smrg   if (f != NULL) {
331b8e80941Smrg      callee = (ir_function_signature *) f->signatures.get_head();
332b8e80941Smrg      assert(callee != NULL && callee->ir_type == ir_type_function_signature);
333b8e80941Smrg   } else {
334b8e80941Smrg      f = new(base_ir) ir_function(function_name);
335b8e80941Smrg      callee = generator(base_ir, NULL);
336b8e80941Smrg
337b8e80941Smrg      f->add_signature(callee);
338b8e80941Smrg
339b8e80941Smrg      add_function(f);
340b8e80941Smrg   }
341b8e80941Smrg
342b8e80941Smrg   this->progress = true;
343b8e80941Smrg   return lower_op_to_function_call(this->base_ir, ir, callee);
344b8e80941Smrg}
345b8e80941Smrg
346b8e80941Smrgvoid
347b8e80941Smrglower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
348b8e80941Smrg{
349b8e80941Smrg   if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression)
350b8e80941Smrg      return;
351b8e80941Smrg
352b8e80941Smrg   ir_expression *const ir = (*rvalue)->as_expression();
353b8e80941Smrg   assert(ir != NULL);
354b8e80941Smrg
355b8e80941Smrg   switch (ir->operation) {
356b8e80941Smrg   case ir_unop_sign:
357b8e80941Smrg      if (lowering(SIGN64)) {
358b8e80941Smrg         *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
359b8e80941Smrg      }
360b8e80941Smrg      break;
361b8e80941Smrg
362b8e80941Smrg   case ir_binop_div:
363b8e80941Smrg      if (lowering(DIV64)) {
364b8e80941Smrg         if (ir->type->base_type == GLSL_TYPE_UINT64) {
365b8e80941Smrg            *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64);
366b8e80941Smrg         } else {
367b8e80941Smrg            *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64);
368b8e80941Smrg         }
369b8e80941Smrg      }
370b8e80941Smrg      break;
371b8e80941Smrg
372b8e80941Smrg   case ir_binop_mod:
373b8e80941Smrg      if (lowering(MOD64)) {
374b8e80941Smrg         if (ir->type->base_type == GLSL_TYPE_UINT64) {
375b8e80941Smrg            *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64);
376b8e80941Smrg         } else {
377b8e80941Smrg            *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64);
378b8e80941Smrg         }
379b8e80941Smrg      }
380b8e80941Smrg      break;
381b8e80941Smrg
382b8e80941Smrg   case ir_binop_mul:
383b8e80941Smrg      if (lowering(MUL64)) {
384b8e80941Smrg         *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
385b8e80941Smrg      }
386b8e80941Smrg      break;
387b8e80941Smrg
388b8e80941Smrg   default:
389b8e80941Smrg      break;
390b8e80941Smrg   }
391b8e80941Smrg}
392