1/*
2 * Copyright © 2015 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "nir.h"
28#include "nir_builder.h"
29
30/* Lowers idiv/udiv/umod
31 * Based on NV50LegalizeSSA::handleDIV()
32 *
33 * Note that this is probably not enough precision for compute shaders.
34 * Perhaps we want a second higher precision (looping) version of this?
35 * Or perhaps we assume if you can do compute shaders you can also
36 * branch out to a pre-optimized shader library routine..
37 */
38
39static bool
40convert_instr(nir_builder *bld, nir_alu_instr *alu)
41{
42   nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
43   nir_op op = alu->op;
44   bool is_signed;
45
46   if ((op != nir_op_idiv) &&
47       (op != nir_op_udiv) &&
48       (op != nir_op_umod))
49      return false;
50
51   is_signed = (op == nir_op_idiv);
52
53   bld->cursor = nir_before_instr(&alu->instr);
54
55   numer = nir_ssa_for_alu_src(bld, alu, 0);
56   denom = nir_ssa_for_alu_src(bld, alu, 1);
57
58   if (is_signed) {
59      af = nir_i2f32(bld, numer);
60      bf = nir_i2f32(bld, denom);
61      af = nir_fabs(bld, af);
62      bf = nir_fabs(bld, bf);
63      a  = nir_iabs(bld, numer);
64      b  = nir_iabs(bld, denom);
65   } else {
66      af = nir_u2f32(bld, numer);
67      bf = nir_u2f32(bld, denom);
68      a  = numer;
69      b  = denom;
70   }
71
72   /* get first result: */
73   bf = nir_frcp(bld, bf);
74   bf = nir_isub(bld, bf, nir_imm_int(bld, 2));  /* yes, really */
75   q  = nir_fmul(bld, af, bf);
76
77   if (is_signed) {
78      q = nir_f2i32(bld, q);
79   } else {
80      q = nir_f2u32(bld, q);
81   }
82
83   /* get error of first result: */
84   r = nir_imul(bld, q, b);
85   r = nir_isub(bld, a, r);
86   r = nir_u2f32(bld, r);
87   r = nir_fmul(bld, r, bf);
88   r = nir_f2u32(bld, r);
89
90   /* add quotients: */
91   q = nir_iadd(bld, q, r);
92
93   /* correction: if modulus >= divisor, add 1 */
94   r = nir_imul(bld, q, b);
95   r = nir_isub(bld, a, r);
96
97   r = nir_uge(bld, r, b);
98   r = nir_b2i32(bld, r);
99
100   q = nir_iadd(bld, q, r);
101   if (is_signed)  {
102      /* fix the sign: */
103      r = nir_ixor(bld, numer, denom);
104      r = nir_ilt(bld, r, nir_imm_int(bld, 0));
105      b = nir_ineg(bld, q);
106      q = nir_bcsel(bld, r, b, q);
107   }
108
109   if (op == nir_op_umod) {
110      /* division result in q */
111      r = nir_imul(bld, q, b);
112      q = nir_isub(bld, a, r);
113   }
114
115   assert(alu->dest.dest.is_ssa);
116   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q));
117
118   return true;
119}
120
121static bool
122convert_impl(nir_function_impl *impl)
123{
124   nir_builder b;
125   nir_builder_init(&b, impl);
126   bool progress = false;
127
128   nir_foreach_block(block, impl) {
129      nir_foreach_instr_safe(instr, block) {
130         if (instr->type == nir_instr_type_alu)
131            progress |= convert_instr(&b, nir_instr_as_alu(instr));
132      }
133   }
134
135   nir_metadata_preserve(impl, nir_metadata_block_index |
136                               nir_metadata_dominance);
137
138   return progress;
139}
140
141bool
142nir_lower_idiv(nir_shader *shader)
143{
144   bool progress = false;
145
146   nir_foreach_function(function, shader) {
147      if (function->impl)
148         progress |= convert_impl(function->impl);
149   }
150
151   return progress;
152}
153