1/* -*- mesa-c++  -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28#include "sfn_emitaluinstruction.h"
29#include "sfn_debug.h"
30
31#include "gallium/drivers/r600/r600_shader.h"
32
33namespace r600 {
34
35using std::vector;
36
37EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
38   EmitInstruction (processor)
39{
40
41}
42
43bool EmitAluInstruction::do_emit(nir_instr* ir)
44{
45   const nir_alu_instr& instr = *nir_instr_as_alu(ir);
46
47   r600::sfn_log << SfnLog::instr << "emit '"
48                 << *ir
49                 << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
50                 << "' (" << __func__ << ")\n";
51
52   preload_src(instr);
53
54   if (get_chip_class() == CAYMAN) {
55      switch (instr.op) {
56      case nir_op_fcos_r600: return emit_alu_cm_trig(instr, op1_cos);
57      case nir_op_fexp2: return emit_alu_cm_trig(instr, op1_exp_ieee);
58      case nir_op_flog2: return emit_alu_cm_trig(instr, op1_log_clamped);
59      case nir_op_frcp: return emit_alu_cm_trig(instr, op1_recip_ieee);
60      case nir_op_frsq: return emit_alu_cm_trig(instr, op1_recipsqrt_ieee1);
61      case nir_op_fsin_r600: return emit_alu_cm_trig(instr, op1_sin);
62      case nir_op_fsqrt: return emit_alu_cm_trig(instr, op1_sqrt_ieee);
63         default:
64            ;
65      }
66   }
67
68   switch (instr.op) {
69    /* These are in the ALU instruction list, but they should be texture instructions */
70   case nir_op_b2b1: return emit_mov(instr);
71   case nir_op_b2b32: return emit_mov(instr);
72   case nir_op_b2f32: return emit_alu_b2f(instr);
73   case nir_op_b2i32: return emit_b2i32(instr);
74   case nir_op_b32all_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
75   case nir_op_b32all_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
76   case nir_op_b32all_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
77   case nir_op_b32all_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
78   case nir_op_b32all_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
79   case nir_op_b32all_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
80   case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
81   case nir_op_b32any_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
82   case nir_op_b32any_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
83   case nir_op_b32any_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
84   case nir_op_b32any_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
85   case nir_op_b32any_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
86   case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde_int,  {0, 2, 1});
87   case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
88   case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
89   case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
90   case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
91   case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
92   case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
93   case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
94   case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
95   case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
96   case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
97   case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
98   case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
99   case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int,  {0, 2, 1});
100   case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int);
101   case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
102
103   case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
104   case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int);
105   case nir_op_cube_r600: return emit_cube(instr);
106   case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
107   case nir_op_f2b32: return emit_alu_f2b32(instr);
108   case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
109   case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
110   case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
111   case nir_op_fadd: return emit_alu_op2(instr, op2_add);
112   case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
113   case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos);
114   case nir_op_fcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1});
115   case nir_op_fcsel_ge: return emit_alu_op3(instr, op3_cndge, {0, 1, 2});
116   case nir_op_fcsel_gt: return emit_alu_op3(instr, op3_cndgt, {0, 1, 2});
117
118    /* These are in the ALU instruction list, but they should be texture instructions */
119   case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
120   case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
121   case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
122   case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
123   case nir_op_fddy_coarse:
124   case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v,  true);
125   case nir_op_fdot2: return emit_dot(instr, 2);
126   case nir_op_fdot3: return emit_dot(instr, 3);
127   case nir_op_fdot4: return emit_dot(instr, 4);
128   case nir_op_fdph:  return emit_fdph(instr);
129   case nir_op_feq32: return emit_alu_op2(instr, op2_sete_dx10);
130   case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
131   case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
132   case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
133   case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
134   case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
135   case nir_op_fge32: return emit_alu_op2(instr, op2_setge_dx10);
136   case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
137   case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
138   case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
139   case nir_op_flt32: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
140   case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
141   case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
142   case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
143   case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
144   case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
145   case nir_op_fneu32: return emit_alu_op2(instr, op2_setne_dx10);
146   case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10);
147   case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
148   case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
149   case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
150   case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
151   case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin);
152   case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
153   case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
154   case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
155   case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
156   case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int);
157   case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
158   case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
159   case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
160   case nir_op_ibfe: return emit_alu_op3(instr, op3_bfe_int);
161   case nir_op_i32csel_ge: return emit_alu_op3(instr, op3_cndge_int,  {0, 1, 2});
162   case nir_op_i32csel_gt: return emit_alu_op3(instr, op3_cndgt_int,  {0, 1, 2});
163   case nir_op_ieq32: return emit_alu_op2_int(instr, op2_sete_int);
164   case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
165   case nir_op_ifind_msb_rev: return emit_alu_op1(instr, op1_ffbh_int);
166   case nir_op_ige32: return emit_alu_op2_int(instr, op2_setge_int);
167   case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
168   case nir_op_ilt32: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
169   case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
170   case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
171   case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
172   case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
173   case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
174   case nir_op_ine32: return emit_alu_op2_int(instr, op2_setne_int);
175   case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
176   case nir_op_ineg: return emit_alu_ineg(instr);
177   case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
178   case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
179   case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
180   case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
181   case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
182   case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
183   case nir_op_mov:return emit_mov(instr);
184   case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
185   case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
186   case nir_op_slt: return emit_alu_op2(instr, op2_setgt, op2_opt_reverse);
187   case nir_op_sge: return emit_alu_op2(instr, op2_setge);
188   case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
189   case nir_op_ubfe: return emit_alu_op3(instr, op3_bfe_uint);
190   case nir_op_ufind_msb_rev: return emit_alu_op1(instr, op1_ffbh_uint);
191   case nir_op_uge32: return emit_alu_op2_int(instr, op2_setge_uint);
192   case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
193   case nir_op_ult32: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
194   case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
195   case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24,  {0, 1, 2});
196   case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
197   case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
198   case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
199   case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
200   case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
201   case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
202   case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
203   case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
204   case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
205   case nir_op_vec2: return emit_create_vec(instr, 2);
206   case nir_op_vec3: return emit_create_vec(instr, 3);
207   case nir_op_vec4: return emit_create_vec(instr, 4);
208   default:
209      return false;
210   }
211}
212
213void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
214{
215   const nir_op_info *op_info = &nir_op_infos[instr.op];
216   assert(op_info->num_inputs <= 4);
217
218   unsigned nsrc_comp = num_src_comp(instr);
219   sfn_log << SfnLog::reg << "Preload:\n";
220   for (unsigned i = 0; i < op_info->num_inputs; ++i) {
221      for (unsigned c = 0; c < nsrc_comp; ++c) {
222         m_src[i][c] = from_nir(instr.src[i], c);
223         sfn_log << SfnLog::reg << " " << *m_src[i][c];
224
225      }
226      sfn_log << SfnLog::reg << "\n";
227   }
228   if (instr.op == nir_op_fdph) {
229      m_src[1][3] = from_nir(instr.src[1], 3);
230      sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n";
231   }
232
233   split_constants(instr, nsrc_comp);
234}
235
236unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr)
237{
238   switch (instr.op) {
239   case nir_op_fdot2:
240   case nir_op_bany_inequal2:
241   case nir_op_ball_iequal2:
242   case nir_op_bany_fnequal2:
243   case nir_op_ball_fequal2:
244   case nir_op_b32any_inequal2:
245   case nir_op_b32all_iequal2:
246   case nir_op_b32any_fnequal2:
247   case nir_op_b32all_fequal2:
248   case nir_op_unpack_64_2x32_split_y:
249      return 2;
250
251   case nir_op_fdot3:
252   case nir_op_bany_inequal3:
253   case nir_op_ball_iequal3:
254   case nir_op_bany_fnequal3:
255   case nir_op_ball_fequal3:
256   case nir_op_b32any_inequal3:
257   case nir_op_b32all_iequal3:
258   case nir_op_b32any_fnequal3:
259   case nir_op_b32all_fequal3:
260   case nir_op_cube_r600:
261      return 3;
262
263   case nir_op_fdot4:
264   case nir_op_fdph:
265   case nir_op_bany_inequal4:
266   case nir_op_ball_iequal4:
267   case nir_op_bany_fnequal4:
268   case nir_op_ball_fequal4:
269   case nir_op_b32any_inequal4:
270   case nir_op_b32all_iequal4:
271   case nir_op_b32any_fnequal4:
272   case nir_op_b32all_fequal4:
273      return 4;
274
275   case nir_op_vec2:
276   case nir_op_vec3:
277   case nir_op_vec4:
278      return 1;
279
280   default:
281      return nir_dest_num_components(instr.dest.dest);
282
283   }
284}
285
286bool EmitAluInstruction::emit_cube(const nir_alu_instr& instr)
287{
288   AluInstruction *ir = nullptr;
289   const uint16_t src0_chan[4] = {2, 2, 0, 1};
290   const uint16_t src1_chan[4] = {1, 0, 2, 2};
291
292   for (int i = 0; i < 4; ++i)  {
293      ir = new AluInstruction(op2_cube, from_nir(instr.dest, i),
294                              from_nir(instr.src[0], src0_chan[i]),
295                              from_nir(instr.src[0], src1_chan[i]), {alu_write});
296      emit_instruction(ir);
297   }
298   ir->set_flag(alu_last_instr);
299   return true;
300}
301
302void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp)
303{
304    const nir_op_info *op_info = &nir_op_infos[instr.op];
305    if (op_info->num_inputs < 2)
306       return;
307
308    int nconst = 0;
309    std::array<const UniformValue *,4> c;
310    std::array<int,4> idx;
311    for (unsigned i = 0; i < op_info->num_inputs; ++i) {
312       PValue& src = m_src[i][0];
313       assert(src);
314       sfn_log << SfnLog::reg << "Split test " << *src;
315
316       if (src->type() == Value::kconst) {
317          c[nconst] = static_cast<const UniformValue *>(src.get());
318          idx[nconst++] = i;
319          sfn_log << SfnLog::reg << " is constant " << i;
320       }
321       sfn_log << SfnLog::reg << "\n";
322    }
323
324    if (nconst < 2)
325       return;
326
327    unsigned sel = c[0]->sel();
328    unsigned kcache =  c[0]->kcache_bank();
329    sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
330
331    for (int i = 1; i < nconst; ++i) {
332       sfn_log << "sel[" << i << "] = " <<  c[i]->sel() << "\n";
333       if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
334          AluInstruction *ir = nullptr;
335          auto v = get_temp_vec4();
336          for (unsigned k = 0; k < nsrc_comp; ++k) {
337             ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write});
338             emit_instruction(ir);
339             m_src[idx[i]][k] = v[k];
340          }
341          make_last(ir);
342       }
343    }
344}
345
346bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
347{
348   if (instr.src[0].negate || instr.src[0].abs) {
349      std::cerr << "source modifiers not supported with int ops\n";
350      return false;
351   }
352
353   AluInstruction *ir = nullptr;
354   for (int i = 0; i < 4 ; ++i) {
355      if (instr.dest.write_mask & (1 << i)){
356         ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
357                                 m_src[0][i], write);
358         emit_instruction(ir);
359      }
360   }
361   make_last(ir);
362   return true;
363}
364
365bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
366                                      const AluOpFlags& flags)
367{
368   AluInstruction *ir = nullptr;
369   for (int i = 0; i < 4 ; ++i) {
370      if (instr.dest.write_mask & (1 << i)){
371         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
372                                 m_src[0][i], write);
373
374         if (flags.test(alu_src0_abs) || instr.src[0].abs)
375            ir->set_flag(alu_src0_abs);
376
377         if (instr.src[0].negate ^ flags.test(alu_src0_neg))
378            ir->set_flag(alu_src0_neg);
379
380         if (flags.test(alu_dst_clamp) || instr.dest.saturate)
381             ir->set_flag(alu_dst_clamp);
382
383         emit_instruction(ir);
384      }
385   }
386   make_last(ir);
387
388   return true;
389}
390
391bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
392{
393   /* If the op is a plain move beween SSA values we can just forward
394    * the register reference to the original register */
395   if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
396       !instr.src[0].abs && !instr.src[0].negate  && !instr.dest.saturate) {
397      bool result = true;
398      for (int i = 0; i < 4 ; ++i) {
399         if (instr.dest.write_mask & (1 << i)){
400            result &= inject_register(instr.dest.dest.ssa.index, i,
401                                      m_src[0][i], true);
402         }
403      }
404      return result;
405   } else {
406      return emit_alu_op1(instr, op1_mov);
407   }
408}
409
410bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
411                                            bool absolute)
412{
413   AluInstruction *ir = nullptr;
414   std::set<int> src_idx;
415
416   if (get_chip_class() == CAYMAN) {
417      int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
418      for (int i = 0; i < last_slot; ++i) {
419         bool write_comp = instr.dest.write_mask & (1 << i);
420         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
421                                 m_src[0][write_comp ? i : 0], write_comp ? write : empty);
422         if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
423         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
424         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
425
426         if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
427
428         emit_instruction(ir);
429      }
430   } else {
431      for (int i = 0; i < 4 ; ++i) {
432         if (instr.dest.write_mask & (1 << i)){
433            ir = new AluInstruction(opcode, from_nir(instr.dest, i),
434                                    m_src[0][i], last_write);
435            if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
436            if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
437            if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
438            emit_instruction(ir);
439         }
440      }
441   }
442   return true;
443}
444
445bool EmitAluInstruction::emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode)
446{
447   AluInstruction *ir = nullptr;
448   std::set<int> src_idx;
449
450   unsigned last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
451
452   for (unsigned j = 0; j < nir_dest_num_components(instr.dest.dest); ++j) {
453      for (unsigned i = 0; i < last_slot; ++i) {
454         bool write_comp = instr.dest.write_mask & (1 << j) && (i == j);
455         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
456                                 m_src[0][j], write_comp ? write : empty);
457         if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
458         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
459         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
460
461         if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
462
463         emit_instruction(ir);
464      }
465   }
466   return true;
467}
468
469
470bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
471{
472   AluInstruction *ir = nullptr;
473
474   if (get_chip_class() < CAYMAN) {
475      std::array<PValue, 4> v;
476
477      for (int i = 0; i < 4; ++i) {
478         if (!(instr.dest.write_mask & (1 << i)))
479            continue;
480         v[i] = from_nir(instr.dest, i);
481         ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write});
482         if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
483         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
484         emit_instruction(ir);
485      }
486      make_last(ir);
487
488      for (int i = 0; i < 4; ++i) {
489         if (!(instr.dest.write_mask & (1 << i)))
490            continue;
491         ir = new AluInstruction(op, v[i], v[i], {alu_write});
492         emit_instruction(ir);
493         if (op == op1_flt_to_uint)
494            make_last(ir);
495      }
496      make_last(ir);
497   } else {
498      for (int i = 0; i < 4; ++i) {
499         if (!(instr.dest.write_mask & (1 << i)))
500            continue;
501         ir = new AluInstruction(op, from_nir(instr.dest, i), m_src[0][i], {alu_write});
502         if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
503         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
504         emit_instruction(ir);
505         if (op == op1_flt_to_uint)
506            make_last(ir);
507      }
508      make_last(ir);
509   }
510
511   return true;
512}
513
514bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr)
515{
516   AluInstruction *ir = nullptr;
517   for (int i = 0; i < 4 ; ++i) {
518      if (instr.dest.write_mask & (1 << i)){
519         ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i),
520                                 m_src[0][i], literal(0.0f), write);
521         emit_instruction(ir);
522      }
523   }
524   make_last(ir);
525   return true;
526}
527
528bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
529{
530   AluInstruction *ir = nullptr;
531   for (int i = 0; i < 4 ; ++i) {
532      if (!(instr.dest.write_mask & (1 << i)))
533         continue;
534
535      ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
536                              m_src[0][i], Value::one_i, write);
537     emit_instruction(ir);
538   }
539   make_last(ir);
540
541   return true;
542}
543
544bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
545{
546   AluInstruction *ir = nullptr;
547   for (unsigned i = 0; i < 2; ++i) {
548      if (!(instr.dest.write_mask & (1 << i)))
549         continue;
550     ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
551                             m_src[0][i], write);
552     emit_instruction(ir);
553   }
554   ir->set_flag(alu_last_instr);
555   return true;
556}
557
558bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
559{
560   emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
561                                       m_src[0][comp], last_write));
562   return true;
563}
564
565bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
566{
567   AluInstruction *ir = nullptr;
568   std::set<int> src_slot;
569   for(unsigned i = 0; i < nc; ++i) {
570      if (instr.dest.write_mask & (1 << i)){
571         auto src = m_src[i][0];
572         ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
573         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
574
575         // FIXME: This is a rather crude approach to fix the problem that
576         // r600 can't read from four different slots of the same component
577         // here we check only for the register index
578         if (src->type() == Value::gpr)
579            src_slot.insert(src->sel());
580         if (src_slot.size() >= 3) {
581            src_slot.clear();
582            ir->set_flag(alu_last_instr);
583         }
584         emit_instruction(ir);
585      }
586   }
587   if (ir)
588      ir->set_flag(alu_last_instr);
589   return true;
590}
591
592bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
593{
594   const nir_alu_src& src0 = instr.src[0];
595   const nir_alu_src& src1 = instr.src[1];
596
597   AluInstruction *ir = nullptr;
598   for (int i = 0; i < n ; ++i) {
599      ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
600                              m_src[0][i], m_src[1][i],
601                              instr.dest.write_mask & (1 << i) ? write : empty);
602
603      if (src0.negate) ir->set_flag(alu_src0_neg);
604      if (src0.abs) ir->set_flag(alu_src0_abs);
605      if (src1.negate) ir->set_flag(alu_src1_neg);
606      if (src1.abs) ir->set_flag(alu_src1_abs);
607
608      if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
609      emit_instruction(ir);
610   }
611   for (int i = n; i < 4 ; ++i) {
612      ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
613                              Value::zero, Value::zero,
614                              instr.dest.write_mask & (1 << i) ? write : empty);
615      emit_instruction(ir);
616   }
617
618   if (ir)
619      ir->set_flag(alu_last_instr);
620   return true;
621}
622
623bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
624{
625   const nir_alu_src& src0 = instr.src[0];
626   const nir_alu_src& src1 = instr.src[1];
627
628   AluInstruction *ir = nullptr;
629   for (int i = 0; i < 3 ; ++i) {
630      ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
631                              m_src[0][i], m_src[1][i],
632                              instr.dest.write_mask & (1 << i) ? write : empty);
633      if (src0.negate) ir->set_flag(alu_src0_neg);
634      if (src0.abs) ir->set_flag(alu_src0_abs);
635      if (src1.negate) ir->set_flag(alu_src1_neg);
636      if (src1.abs) ir->set_flag(alu_src1_abs);
637      if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
638      emit_instruction(ir);
639   }
640
641   ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
642                           m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty);
643   if (src1.negate) ir->set_flag(alu_src1_neg);
644   if (src1.abs) ir->set_flag(alu_src1_abs);
645   emit_instruction(ir);
646
647   ir->set_flag(alu_last_instr);
648   return true;
649
650}
651
652bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
653{
654   AluInstruction *ir = nullptr;
655   for (int i = 0; i < 4 ; ++i) {
656      if (instr.dest.write_mask & (1 << i)) {
657         ir = new AluInstruction(op, from_nir(instr.dest, i),
658                                 m_src[0][i], Value::zero,
659                                 write);
660         emit_instruction(ir);
661      }
662   }
663   if (ir)
664      ir->set_flag(alu_last_instr);
665   return true;
666}
667
668bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
669{
670   AluInstruction *ir = nullptr;
671   for (int i = 0; i < 4 ; ++i) {
672      if (instr.dest.write_mask & (1 << i)){
673         ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
674                                 m_src[0][i], Value::one_f, write);
675         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
676         if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
677         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
678         emit_instruction(ir);
679      }
680   }
681   if (ir)
682      ir->set_flag(alu_last_instr);
683   return true;
684}
685
686bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
687{
688
689   AluInstruction *ir = nullptr;
690   PValue v[4]; // this might need some additional temp register creation
691   for (unsigned i = 0; i < 4 ; ++i)
692      v[i] = from_nir(instr.dest, i);
693
694   EAluOp combine = all ? op2_and_int : op2_or_int;
695
696   /* For integers we can not use the modifiers, so this needs some emulation */
697   /* Should actually be lowered with NIR */
698   if (instr.src[0].negate == instr.src[1].negate &&
699       instr.src[0].abs == instr.src[1].abs) {
700
701      for (unsigned i = 0; i < nc ; ++i) {
702         ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
703         emit_instruction(ir);
704      }
705      if (ir)
706         ir->set_flag(alu_last_instr);
707   } else {
708      std::cerr << "Negate in iequal/inequal not (yet) supported\n";
709      return false;
710   }
711
712   for (unsigned i = 0; i < nc/2 ; ++i) {
713      ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
714      emit_instruction(ir);
715   }
716   if (ir)
717      ir->set_flag(alu_last_instr);
718
719   if (nc > 2) {
720      ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
721      emit_instruction(ir);
722   }
723
724   return true;
725}
726
727bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
728{
729   AluInstruction *ir = nullptr;
730   PValue v[4]; // this might need some additional temp register creation
731   for (unsigned i = 0; i < 4 ; ++i)
732      v[i] = from_nir(instr.dest, i);
733
734   for (unsigned i = 0; i < nc ; ++i) {
735      ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
736
737      if (instr.src[0].abs)
738         ir->set_flag(alu_src0_abs);
739      if (instr.src[0].negate)
740         ir->set_flag(alu_src0_neg);
741
742      if (instr.src[1].abs)
743         ir->set_flag(alu_src1_abs);
744      if (instr.src[1].negate)
745         ir->set_flag(alu_src1_neg);
746
747      emit_instruction(ir);
748   }
749   if (ir)
750      ir->set_flag(alu_last_instr);
751
752   for (unsigned i = 0; i < nc ; ++i) {
753      ir = new AluInstruction(op1_max4, v[i], v[i], write);
754      if (all) ir->set_flag(alu_src0_neg);
755      emit_instruction(ir);
756   }
757
758   for (unsigned i = nc; i < 4 ; ++i) {
759      ir = new AluInstruction(op1_max4, v[i],
760                              all ? Value::one_f : Value::zero, write);
761      if (all)
762         ir->set_flag(alu_src0_neg);
763
764      emit_instruction(ir);
765   }
766
767   ir->set_flag(alu_last_instr);
768
769   if (all)
770      op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
771   else
772      op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
773
774   ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
775   if (all)
776      ir->set_flag(alu_src1_neg);
777   emit_instruction(ir);
778
779   return true;
780}
781
782bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
783{
784   AluInstruction *ir = nullptr;
785   PValue v[4]; // this might need some additional temp register creation
786   for (unsigned i = 0; i < 4 ; ++i)
787      v[i] = from_nir(instr.dest, i);
788
789   for (unsigned i = 0; i < 2 ; ++i) {
790      ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
791      if (instr.src[0].abs)
792         ir->set_flag(alu_src0_abs);
793      if (instr.src[0].negate)
794         ir->set_flag(alu_src0_neg);
795
796      if (instr.src[1].abs)
797         ir->set_flag(alu_src1_abs);
798      if (instr.src[1].negate)
799         ir->set_flag(alu_src1_neg);
800
801      emit_instruction(ir);
802   }
803   if (ir)
804      ir->set_flag(alu_last_instr);
805
806   op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
807   ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
808   emit_instruction(ir);
809
810   return true;
811}
812
813bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
814{
815   const nir_alu_src& src0 = instr.src[0];
816   const nir_alu_src& src1 = instr.src[1];
817
818   AluInstruction *ir = nullptr;
819
820   if (get_chip_class() == CAYMAN) {
821      for (int k = 0; k < 4; ++k) {
822         if (instr.dest.write_mask & (1 << k)) {
823
824            for (int i = 0; i < 4; i++) {
825               ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[1][k], (i == k) ? write : empty);
826               if (src0.negate) ir->set_flag(alu_src0_neg);
827               if (src0.abs) ir->set_flag(alu_src0_abs);
828               if (src1.negate) ir->set_flag(alu_src1_neg);
829               if (src1.abs) ir->set_flag(alu_src1_abs);
830               if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
831               if (i == 3) ir->set_flag(alu_last_instr);
832               emit_instruction(ir);
833            }
834         }
835      }
836   } else {
837      for (int i = 0; i < 4 ; ++i) {
838         if (instr.dest.write_mask & (1 << i)){
839            ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write);
840            if (src0.negate) ir->set_flag(alu_src0_neg);
841            if (src0.abs) ir->set_flag(alu_src0_abs);
842            if (src1.negate) ir->set_flag(alu_src1_neg);
843            if (src1.abs) ir->set_flag(alu_src1_abs);
844            if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
845            emit_instruction(ir);
846         }
847      }
848   }
849   return true;
850}
851
852bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
853{
854
855   const nir_alu_src& src0 = instr.src[0];
856   const nir_alu_src& src1 = instr.src[1];
857
858   if (src0.negate || src1.negate ||
859       src0.abs || src1.abs) {
860      std::cerr << "R600: don't support modifiers with integer operations";
861      return false;
862   }
863   return emit_alu_op2(instr, opcode, opts);
864}
865
866bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
867{
868   const nir_alu_src *src0 = &instr.src[0];
869   const nir_alu_src *src1 = &instr.src[1];
870
871   int idx0 = 0;
872   int idx1 = 1;
873   if (ops & op2_opt_reverse) {
874      std::swap(src0, src1);
875      std::swap(idx0, idx1);
876   }
877
878   bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
879
880   AluInstruction *ir = nullptr;
881   for (int i = 0; i < 4 ; ++i) {
882      if (instr.dest.write_mask & (1 << i)){
883         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
884                                 m_src[idx0][i], m_src[idx1][i], write);
885
886         if (src0->negate) ir->set_flag(alu_src0_neg);
887         if (src0->abs) ir->set_flag(alu_src0_abs);
888         if (src1_negate) ir->set_flag(alu_src1_neg);
889         if (src1->abs) ir->set_flag(alu_src1_abs);
890         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
891         emit_instruction(ir);
892      }
893   }
894   if (ir)
895      ir->set_flag(alu_last_instr);
896   return true;
897}
898
899bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
900                                      std::array<uint8_t, 3> reorder)
901{
902   const nir_alu_src *src[3];
903   src[0] = &instr.src[reorder[0]];
904   src[1] = &instr.src[reorder[1]];
905   src[2] = &instr.src[reorder[2]];
906
907   AluInstruction *ir = nullptr;
908   for (int i = 0; i < 4 ; ++i) {
909      if (instr.dest.write_mask & (1 << i)){
910         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
911                                 m_src[reorder[0]][i],
912                                 m_src[reorder[1]][i],
913                                 m_src[reorder[2]][i],
914               write);
915
916         if (src[0]->negate) ir->set_flag(alu_src0_neg);
917         if (src[1]->negate) ir->set_flag(alu_src1_neg);
918         if (src[2]->negate) ir->set_flag(alu_src2_neg);
919
920         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
921         ir->set_flag(alu_write);
922         emit_instruction(ir);
923      }
924   }
925   make_last(ir);
926   return true;
927}
928
929bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
930{
931   AluInstruction *ir = nullptr;
932   for (int i = 0; i < 4 ; ++i) {
933      if (instr.dest.write_mask & (1 << i)){
934         ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
935                                 m_src[0][i], write);
936         emit_instruction(ir);
937      }
938   }
939   if (ir)
940      ir->set_flag(alu_last_instr);
941
942   return true;
943}
944
945static const char swz[] = "xyzw01?_";
946
947void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src,
948                                             const GPRVector::Values& v, GPRVector::Values& out, int ncomp)
949{
950
951   AluInstruction *alu = nullptr;
952   for (int i = 0; i < ncomp; ++i) {
953      alu  = new AluInstruction(op1_mov,  out[i], v[i], {alu_write});
954      if (src.abs)
955         alu->set_flag(alu_src0_abs);
956      if (src.negate)
957         alu->set_flag(alu_src0_neg);
958      emit_instruction(alu);
959   }
960   make_last(alu);
961}
962
963bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
964                                      bool fine)
965{
966
967   GPRVector::Values v;
968   std::array<int, 4> writemask = {0,1,2,3};
969
970   int ncomp = nir_dest_num_components(instr.dest.dest);
971   GPRVector::Swizzle src_swz = {7,7,7,7};
972   for (auto i = 0; i < ncomp; ++i)
973      src_swz[i] = instr.src[0].swizzle[i];
974
975   auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz);
976
977   if (instr.src[0].abs || instr.src[0].negate) {
978      GPRVector tmp = get_temp_vec4();
979      split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp);
980      src = tmp;
981   }
982
983   for (int i = 0; i < 4; ++i) {
984      writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
985      v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
986   }
987
988   /* This is querying the dreivatives of the output fb, so we would either need
989    * access to the neighboring pixels or to the framebuffer. Neither is currently
990    * implemented */
991   GPRVector dst(v);
992
993   auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
994   tex->set_dest_swizzle(writemask);
995
996   if (fine)
997      tex->set_flag(TexInstruction::grad_fine);
998
999   emit_instruction(tex);
1000
1001   return true;
1002}
1003
1004bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1005{
1006   auto tmp = get_temp_register();
1007   emit_instruction(op2_lshr_int, tmp,
1008   {m_src[0][0], PValue(new LiteralValue(16))},
1009   {alu_write, alu_last_instr});
1010
1011   emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1012                                  {tmp}, {alu_write, alu_last_instr});
1013
1014   return true;
1015}
1016
1017bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1018{
1019   emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1020   {m_src[0][0]},{alu_write, alu_last_instr});
1021   return true;
1022}
1023
1024bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1025{
1026   PValue x = get_temp_register();
1027   PValue y = get_temp_register();
1028
1029   emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write});
1030   emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr});
1031
1032   emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1033
1034   emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1035
1036   return true;
1037}
1038
1039}
1040