1/* -*- mesa-c++  -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27#include "../r600_pipe.h"
28#include "../r600_shader.h"
29#include "sfn_shader_vertex.h"
30
31#include "sfn_shader_compute.h"
32#include "sfn_shader_fragment.h"
33#include "sfn_shader_geometry.h"
34#include "sfn_liverange.h"
35#include "sfn_ir_to_assembly.h"
36#include "sfn_nir.h"
37#include "sfn_instruction_misc.h"
38#include "sfn_instruction_fetch.h"
39#include "sfn_instruction_lds.h"
40
41#include <iostream>
42
43#define ENABLE_DEBUG 1
44
45#ifdef ENABLE_DEBUG
46#define DEBUG_SFN(X)  \
47   do {\
48      X; \
49   } while (0)
50#else
51#define DEBUG_SFN(X)
52#endif
53
54namespace r600 {
55
56using namespace std;
57
58
59ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
60                                               r600_pipe_shader_selector& sel,
61                                               r600_shader &sh_info, int scratch_size,
62                                               enum chip_class chip_class,
63                                               int atomic_base):
64   m_processor_type(ptype),
65   m_nesting_depth(0),
66   m_block_number(0),
67   m_export_output(0, -1),
68   m_sh_info(sh_info),
69   m_chip_class(chip_class),
70   m_tex_instr(*this),
71   m_alu_instr(*this),
72   m_ssbo_instr(*this),
73   m_pending_else(nullptr),
74   m_scratch_size(scratch_size),
75   m_next_hwatomic_loc(0),
76   m_sel(sel),
77   m_atomic_base(atomic_base),
78   m_image_count(0),
79   last_emitted_alu(nullptr)
80{
81   m_sh_info.processor_type = ptype;
82
83}
84
85
86ShaderFromNirProcessor::~ShaderFromNirProcessor()
87{
88}
89
90bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
91{
92   switch (instr->type) {
93   case nir_instr_type_tex: {
94      nir_tex_instr *t = nir_instr_as_tex(instr);
95      if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
96         sh_info().uses_tex_buffers = true;
97      if (t->op == nir_texop_txs &&
98          t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
99          t->is_array)
100         sh_info().has_txq_cube_array_z_comp = true;
101      break;
102   }
103   case nir_instr_type_intrinsic: {
104      auto *i = nir_instr_as_intrinsic(instr);
105      switch (i->intrinsic) {
106      case nir_intrinsic_ssbo_atomic_add:
107      case nir_intrinsic_image_atomic_add:
108      case nir_intrinsic_ssbo_atomic_and:
109      case nir_intrinsic_image_atomic_and:
110      case nir_intrinsic_ssbo_atomic_or:
111      case nir_intrinsic_image_atomic_or:
112      case nir_intrinsic_ssbo_atomic_imin:
113      case nir_intrinsic_image_atomic_imin:
114      case nir_intrinsic_ssbo_atomic_imax:
115      case nir_intrinsic_image_atomic_imax:
116      case nir_intrinsic_ssbo_atomic_umin:
117      case nir_intrinsic_image_atomic_umin:
118      case nir_intrinsic_ssbo_atomic_umax:
119      case nir_intrinsic_image_atomic_umax:
120      case nir_intrinsic_ssbo_atomic_xor:
121      case nir_intrinsic_image_atomic_xor:
122      case nir_intrinsic_ssbo_atomic_exchange:
123      case nir_intrinsic_image_atomic_exchange:
124      case nir_intrinsic_image_atomic_comp_swap:
125      case nir_intrinsic_ssbo_atomic_comp_swap:
126         m_sel.info.writes_memory = 1;
127         FALLTHROUGH;
128      case nir_intrinsic_image_load:
129         m_ssbo_instr.set_require_rat_return_address();
130         break;
131      case nir_intrinsic_image_size: {
132         if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
133             nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
134            sh_info().has_txq_cube_array_z_comp = true;
135      }
136
137
138
139      default:
140         ;
141      }
142
143
144   }
145   default:
146      ;
147   }
148
149   return scan_sysvalue_access(instr);
150}
151
152enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
153{
154  return m_chip_class;
155}
156
157bool ShaderFromNirProcessor::allocate_reserved_registers()
158{
159   bool retval = do_allocate_reserved_registers();
160   m_ssbo_instr.load_rat_return_address();
161   if (sh_info().uses_atomics)
162      m_ssbo_instr.load_atomic_inc_limits();
163   m_ssbo_instr.set_ssbo_offset(m_image_count);
164   return retval;
165}
166
167static void remap_shader_info(r600_shader& sh_info,
168                              std::vector<rename_reg_pair>& map,
169                              UNUSED ValueMap& values)
170{
171   for (unsigned i = 0; i < sh_info.num_arrays; ++i) {
172      auto new_index = map[sh_info.arrays[i].gpr_start];
173      if (new_index.valid)
174         sh_info.arrays[i].gpr_start = new_index.new_reg;
175      map[sh_info.arrays[i].gpr_start].used = true;
176   }
177
178   for (unsigned i = 0; i < sh_info.ninput; ++i) {
179      sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
180              << " of map.size()\n";
181
182      assert(sh_info.input[i].gpr < map.size());
183      auto new_index = map[sh_info.input[i].gpr];
184      if (new_index.valid)
185         sh_info.input[i].gpr = new_index.new_reg;
186      map[sh_info.input[i].gpr].used = true;
187   }
188
189   for (unsigned i = 0; i < sh_info.noutput; ++i) {
190      assert(sh_info.output[i].gpr < map.size());
191      auto new_index = map[sh_info.output[i].gpr];
192      if (new_index.valid)
193         sh_info.output[i].gpr = new_index.new_reg;
194      map[sh_info.output[i].gpr].used = true;
195   }
196}
197
198void ShaderFromNirProcessor::remap_registers()
199{
200   // register renumbering
201   auto rc = register_count();
202   if (!rc)
203      return;
204
205   std::vector<register_live_range> register_live_ranges(rc);
206
207   auto temp_register_map = get_temp_registers();
208
209   Shader sh{m_output, temp_register_map};
210   LiverangeEvaluator().run(sh, register_live_ranges);
211   auto register_map = get_temp_registers_remapping(register_live_ranges);
212
213   sfn_log << SfnLog::merge << "=========Mapping===========\n";
214   for (size_t  i = 0; i < register_map.size(); ++i)
215      if (register_map[i].valid)
216         sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
217
218   ValueRemapper vmap0(register_map, temp_register_map);
219   for (auto& block: m_output)
220      block.remap_registers(vmap0);
221
222   remap_shader_info(m_sh_info, register_map, temp_register_map);
223
224   /* Mark inputs as used registers, these registers should no be remapped */
225   for (auto& v: sh.m_temp) {
226      if (v.second->type() == Value::gpr) {
227         const auto& g = static_cast<const GPRValue&>(*v.second);
228         if (g.is_input())
229            register_map[g.sel()].used = true;
230      }
231   }
232
233   int new_index = 0;
234   for (auto& i : register_map) {
235      i.valid = i.used;
236      if (i.used)
237         i.new_reg = new_index++;
238   }
239
240   ValueRemapper vmap1(register_map, temp_register_map);
241   for (auto& ir: m_output)
242      ir.remap_registers(vmap1);
243
244   remap_shader_info(m_sh_info, register_map, temp_register_map);
245}
246
247bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
248{
249   // m_uniform_type_map
250   m_uniform_type_map[uniform->data.location] = uniform->type;
251
252   if (uniform->type->contains_atomic()) {
253      int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
254      sh_info().nhwatomic += natomics;
255
256      if (uniform->type->is_array())
257         sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
258
259      sh_info().uses_atomics = 1;
260
261      struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
262      ++sh_info().nhwatomic_ranges;
263      atom.buffer_id = uniform->data.binding;
264      atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
265
266      atom.start = uniform->data.offset >> 2;
267      atom.end = atom.start + natomics - 1;
268
269      if (m_atomic_base_map.find(uniform->data.binding) ==
270          m_atomic_base_map.end())
271         m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
272
273      m_next_hwatomic_loc += natomics;
274
275      m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
276
277      sfn_log << SfnLog::io << "HW_ATOMIC file count: "
278              << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
279   }
280
281   auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
282   if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
283      sh_info().uses_images = 1;
284      if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo))
285         sh_info().indirect_files |= 1 << TGSI_FILE_IMAGE;
286   }
287
288   return true;
289}
290
291void ShaderFromNirProcessor::set_shader_info(const nir_shader *sh)
292{
293   m_image_count = sh->info.num_images;
294   do_set_shader_info(sh);
295}
296
297void ShaderFromNirProcessor::do_set_shader_info(const nir_shader *sh)
298{
299   (void)sh;
300}
301
302bool ShaderFromNirProcessor::scan_inputs_read(const nir_shader *sh)
303{
304   return true;
305}
306
307void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
308{
309   auto& dest = instr->dest;
310   unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
311   assert(util_bitcount(instr->modes) == 1);
312   m_var_mode[instr->var] = instr->modes;
313   m_var_derefs[index] = instr->var;
314
315   sfn_log << SfnLog::io << "Add var deref:" << index
316           << " with DDL:" << instr->var->data.driver_location << "\n";
317}
318
319void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
320{
321   switch (io.name) {
322   case TGSI_SEMANTIC_POSITION:
323   case TGSI_SEMANTIC_PSIZE:
324   case TGSI_SEMANTIC_EDGEFLAG:
325   case TGSI_SEMANTIC_FACE:
326   case TGSI_SEMANTIC_SAMPLEMASK:
327   case TGSI_SEMANTIC_CLIPVERTEX:
328      io.spi_sid = 0;
329      break;
330   case TGSI_SEMANTIC_GENERIC:
331   case TGSI_SEMANTIC_TEXCOORD:
332   case TGSI_SEMANTIC_PCOORD:
333      io.spi_sid = io.sid + 1;
334      break;
335   default:
336      /* For non-generic params - pack name and sid into 8 bits */
337      io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
338   }
339}
340
341const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
342{
343   unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
344
345   sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
346
347   auto v = m_var_derefs.find(index);
348   if (v != m_var_derefs.end())
349      return v->second;
350
351     fprintf(stderr, "R600: could not find deref with index %d\n", index);
352
353     return nullptr;
354
355   /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
356   return  nir_deref_instr_get_variable(deref); */
357}
358
359bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
360{
361   return m_tex_instr.emit(instr);
362}
363
364void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir)
365{
366   if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) {
367      for (unsigned i = 0; i < ir->n_sources(); ++i) {
368         auto& s = ir->src(i);
369         if (s.type() == Value::kconst) {
370            auto& c = static_cast<UniformValue&>(s);
371            if (c.addr()) {
372               last_emitted_alu->set_flag(alu_last_instr);
373               break;
374            }
375         }
376      }
377   }
378   last_emitted_alu = ir;
379   emit_instruction_internal(ir);
380}
381
382
383void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
384{
385
386   emit_instruction_internal(ir);
387   last_emitted_alu = nullptr;
388}
389
390void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir)
391{
392   if (m_pending_else) {
393      append_block(-1);
394      m_output.back().emit(PInstruction(m_pending_else));
395      append_block(1);
396      m_pending_else = nullptr;
397   }
398
399   r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
400   if (m_output.empty())
401      append_block(0);
402
403   m_output.back().emit(Instruction::Pointer(ir));
404}
405
406void ShaderFromNirProcessor::emit_shader_start()
407{
408   /* placeholder, may become an abstract method */
409   m_ssbo_instr.set_ssbo_offset(m_image_count);
410}
411
412bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
413{
414   switch (instr->type) {
415   case nir_jump_break: {
416      auto b = new LoopBreakInstruction();
417      emit_instruction(b);
418      return true;
419   }
420   case nir_jump_continue: {
421      auto  b = new LoopContInstruction();
422      emit_instruction(b);
423      return true;
424   }
425   default: {
426      nir_instr *i = reinterpret_cast<nir_instr*>(instr);
427      sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
428      return false;
429   }
430   }
431   return true;
432}
433
434bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
435{
436   return m_alu_instr.emit(instr);
437}
438
439bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
440{
441   return false;
442}
443
444bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
445{
446   LoopBeginInstruction *loop = new LoopBeginInstruction();
447   emit_instruction(loop);
448   m_loop_begin_block_map[loop_id] = loop;
449   append_block(1);
450   return true;
451}
452bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
453{
454   auto start = m_loop_begin_block_map.find(loop_id);
455   if (start == m_loop_begin_block_map.end()) {
456      sfn_log << SfnLog::err  << "End loop: Loop start for "
457              << loop_id << "  not found\n";
458      return false;
459   }
460   m_nesting_depth--;
461   m_block_number++;
462   m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
463   LoopEndInstruction *loop = new LoopEndInstruction(start->second);
464   emit_instruction(loop);
465
466   m_loop_begin_block_map.erase(start);
467   return true;
468}
469
470bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
471{
472
473   auto value = from_nir(if_stmt->condition, 0, 0);
474   AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
475                                             value, Value::zero, EmitInstruction::last);
476   pred->set_flag(alu_update_exec);
477   pred->set_flag(alu_update_pred);
478   pred->set_cf_type(cf_alu_push_before);
479
480   append_block(1);
481
482   IfInstruction *ir = new IfInstruction(pred);
483   emit_instruction(ir);
484   assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
485   m_if_block_start_map[if_id] = ir;
486   return true;
487}
488
489bool ShaderFromNirProcessor::emit_else_start(int if_id)
490{
491   auto iif = m_if_block_start_map.find(if_id);
492   if (iif == m_if_block_start_map.end()) {
493      std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
494      return false;
495   }
496
497   if (iif->second->type() != Instruction::cond_if) {
498      std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
499      return false;
500   }
501   IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
502   ElseInstruction *ir = new ElseInstruction(if_instr);
503   m_if_block_start_map[if_id] = ir;
504   m_pending_else = ir;
505
506   return true;
507}
508
509bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
510{
511   auto ifelse = m_if_block_start_map.find(if_id);
512   if (ifelse == m_if_block_start_map.end()) {
513      std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
514      return false;
515   }
516
517   if (ifelse->second->type() != Instruction::cond_if &&
518       ifelse->second->type() != Instruction::cond_else) {
519      std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
520      return false;
521   }
522   /* Clear pending else, if the else branch was empty, non will be emitted */
523
524   m_pending_else = nullptr;
525
526   append_block(-1);
527   IfElseEndInstruction *ir = new IfElseEndInstruction();
528   emit_instruction(ir);
529
530   return true;
531}
532
533bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
534{
535   PValue src = get_temp_register();
536   emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
537
538   GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
539   emit_instruction(new FetchTCSIOParam(dest, src, offset));
540
541   return true;
542
543}
544
545bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
546{
547   auto address = varvec_from_nir(instr->src[0], instr->num_components);
548   auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
549
550   emit_instruction(new LDSReadInstruction(address, dest_value));
551   return true;
552}
553
554static unsigned
555lds_op_from_intrinsic(nir_intrinsic_op op) {
556   switch (op) {
557   case nir_intrinsic_shared_atomic_add:
558      return LDS_OP2_LDS_ADD_RET;
559   case nir_intrinsic_shared_atomic_and:
560      return LDS_OP2_LDS_AND_RET;
561   case nir_intrinsic_shared_atomic_or:
562      return LDS_OP2_LDS_OR_RET;
563   case nir_intrinsic_shared_atomic_imax:
564      return LDS_OP2_LDS_MAX_INT_RET;
565   case nir_intrinsic_shared_atomic_umax:
566      return LDS_OP2_LDS_MAX_UINT_RET;
567   case nir_intrinsic_shared_atomic_imin:
568      return LDS_OP2_LDS_MIN_INT_RET;
569   case nir_intrinsic_shared_atomic_umin:
570      return LDS_OP2_LDS_MIN_UINT_RET;
571   case nir_intrinsic_shared_atomic_xor:
572      return LDS_OP2_LDS_XOR_RET;
573   case nir_intrinsic_shared_atomic_exchange:
574      return LDS_OP2_LDS_XCHG_RET;
575   case nir_intrinsic_shared_atomic_comp_swap:
576      return LDS_OP3_LDS_CMP_XCHG_RET;
577   default:
578      unreachable("Unsupported shared atomic opcode");
579   }
580}
581
582bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
583{
584   auto address = from_nir(instr->src[0], 0);
585   auto dest_value = from_nir(instr->dest, 0);
586   auto value = from_nir(instr->src[1], 0);
587   auto op = lds_op_from_intrinsic(instr->intrinsic);
588
589   if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
590      auto value2 = from_nir(instr->src[2], 0);
591      emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
592   } else {
593      emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
594   }
595   return true;
596}
597
598
599bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
600{
601   unsigned write_mask = nir_intrinsic_write_mask(instr);
602
603   auto address = from_nir(instr->src[1], 0);
604   int swizzle_base = (write_mask & 0x3) ? 0 : 2;
605   write_mask |= write_mask >> 2;
606
607   auto value =  from_nir(instr->src[0], swizzle_base);
608   if (!(write_mask & 2)) {
609      emit_instruction(new LDSWriteInstruction(address, 0, value));
610   } else {
611      auto value1 = from_nir(instr->src[0], swizzle_base + 1);
612      emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
613   }
614
615   return true;
616}
617
618bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
619{
620   r600::sfn_log << SfnLog::instr << "emit '"
621                 << *reinterpret_cast<nir_instr*>(instr)
622                 << "' (" << __func__ << ")\n";
623
624   if (emit_intrinsic_instruction_override(instr))
625      return true;
626
627   if (m_ssbo_instr.emit(&instr->instr)) {
628      m_sel.info.writes_memory = true;
629      return true;
630   }
631
632   switch (instr->intrinsic) {
633   case nir_intrinsic_load_deref: {
634      auto var = get_deref_location(instr->src[0]);
635      if (!var)
636         return false;
637      auto mode_helper = m_var_mode.find(var);
638      if (mode_helper == m_var_mode.end()) {
639         cerr << "r600-nir: variable '" << var->name << "' not found\n";
640         return false;
641      }
642      switch (mode_helper->second) {
643      case nir_var_function_temp:
644         return emit_load_function_temp(var, instr);
645      default:
646         cerr << "r600-nir: Unsupported mode" << mode_helper->second
647              << "for src variable\n";
648         return false;
649      }
650   }
651   case nir_intrinsic_store_scratch:
652      return emit_store_scratch(instr);
653   case nir_intrinsic_load_scratch:
654      return emit_load_scratch(instr);
655   case nir_intrinsic_load_uniform:
656      return load_uniform(instr);
657   case nir_intrinsic_discard:
658   case nir_intrinsic_discard_if:
659      return emit_discard_if(instr);
660   case nir_intrinsic_load_ubo_vec4:
661      return emit_load_ubo_vec4(instr);
662   case nir_intrinsic_load_tcs_in_param_base_r600:
663      return emit_load_tcs_param_base(instr, 0);
664   case nir_intrinsic_load_tcs_out_param_base_r600:
665      return emit_load_tcs_param_base(instr, 16);
666   case nir_intrinsic_load_local_shared_r600:
667   case nir_intrinsic_load_shared:
668      return emit_load_local_shared(instr);
669   case nir_intrinsic_store_local_shared_r600:
670   case nir_intrinsic_store_shared:
671      return emit_store_local_shared(instr);
672   case nir_intrinsic_control_barrier:
673   case nir_intrinsic_memory_barrier_tcs_patch:
674   case nir_intrinsic_memory_barrier_shared:
675   case nir_intrinsic_memory_barrier_buffer:
676   case nir_intrinsic_memory_barrier:
677   case nir_intrinsic_memory_barrier_image:
678   case nir_intrinsic_group_memory_barrier:
679      return emit_barrier(instr);
680   case nir_intrinsic_memory_barrier_atomic_counter:
681      return true;
682   case nir_intrinsic_shared_atomic_add:
683   case nir_intrinsic_shared_atomic_and:
684   case nir_intrinsic_shared_atomic_or:
685   case nir_intrinsic_shared_atomic_imax:
686   case nir_intrinsic_shared_atomic_umax:
687   case nir_intrinsic_shared_atomic_imin:
688   case nir_intrinsic_shared_atomic_umin:
689   case nir_intrinsic_shared_atomic_xor:
690   case nir_intrinsic_shared_atomic_exchange:
691   case nir_intrinsic_shared_atomic_comp_swap:
692      return emit_atomic_local_shared(instr);
693   case nir_intrinsic_shader_clock:
694      return emit_shader_clock(instr);
695   case nir_intrinsic_copy_deref:
696   case nir_intrinsic_load_constant:
697   case nir_intrinsic_load_input:
698   case nir_intrinsic_store_output:
699
700   default:
701      fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
702      return false;
703   }
704   return false;
705}
706
707bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
708{
709   return false;
710}
711
712bool
713ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
714{
715   return false;
716}
717
718bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
719{
720   AluInstruction *ir = new AluInstruction(op0_group_barrier);
721   ir->set_flag(alu_last_instr);
722   emit_instruction(ir);
723   return true;
724}
725
726
727bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
728{
729   if (!dest.is_ssa) {
730      auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
731      if (as_last)
732         ir->set_flag(alu_last_instr);
733      emit_instruction(ir);
734   } else {
735      inject_register(dest.ssa.index, chan, value, true);
736   }
737   return true;
738}
739
740bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
741{
742   PValue address = from_nir(instr->src[1], 0, 0);
743
744   auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
745         swizzle_from_comps(instr->num_components));
746
747   int writemask = nir_intrinsic_write_mask(instr);
748   int align = nir_intrinsic_align_mul(instr);
749   int align_offset = nir_intrinsic_align_offset(instr);
750
751   WriteScratchInstruction *ir = nullptr;
752   if (address->type() == Value::literal) {
753      const auto& lv = static_cast<const LiteralValue&>(*address);
754      ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
755   } else {
756      address = from_nir_with_fetch_constant(instr->src[1], 0);
757      ir = new WriteScratchInstruction(address, value, align, align_offset,
758                                       writemask, m_scratch_size);
759   }
760   emit_instruction(ir);
761   sh_info().needs_scratch_space = 1;
762   return true;
763}
764
765bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
766{
767   PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
768   std::array<PValue, 4> dst_val;
769   for (int i = 0; i < 4; ++i)
770      dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
771
772   GPRVector dst(dst_val);
773   auto ir = new LoadFromScratch(dst, address, m_scratch_size);
774   ir->prelude_append(new WaitAck(0));
775   emit_instruction(ir);
776   sh_info().needs_scratch_space = 1;
777   return true;
778}
779
780bool ShaderFromNirProcessor::emit_shader_clock(nir_intrinsic_instr* instr)
781{
782   emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 0),
783                                       PValue(new InlineConstValue(ALU_SRC_TIME_LO, 0)), EmitInstruction::write));
784   emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 1),
785                                       PValue(new InlineConstValue(ALU_SRC_TIME_HI, 0)), EmitInstruction::last_write));
786   return true;
787}
788
789GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
790                                                                   unsigned mask,
791                                                                   const GPRVector::Swizzle& swizzle,
792                                                                   bool match)
793{
794   bool use_same = true;
795   GPRVector::Values v;
796
797   std::array<bool,4> used_swizzles = {false, false, false, false};
798
799   /* Check whether all sources come from a GPR, and,
800    * if requested, whether they are swizzled as expected */
801
802   for (int i = 0; i < 4 && use_same; ++i)  {
803      if ((1 << i) & mask) {
804         if (swizzle[i] < 4) {
805            v[i] = from_nir(src, swizzle[i]);
806            assert(v[i]);
807            use_same &= (v[i]->type() == Value::gpr);
808            if (match) {
809               use_same &= (v[i]->chan() == swizzle[i]);
810            }
811            used_swizzles[v[i]->chan()] = true;
812         }
813      }
814   }
815
816
817   /* Now check whether all inputs come from the same GPR, and fill
818    * empty slots in the vector with unused swizzles, bail out if
819    * the sources are not from the same GPR
820    */
821
822   if (use_same) {
823      int next_free_swizzle = 0;
824      while (used_swizzles[next_free_swizzle] && next_free_swizzle < 4)
825         next_free_swizzle++;
826
827      /* Find the first GPR index used */
828      int i = 0;
829      while (!v[i] && i < 4) ++i;
830      assert(i < 4);
831      unsigned sel = v[i]->sel();
832
833
834      for (i = 0; i < 4 && use_same; ++i) {
835         if (!v[i]) {
836            if (swizzle[i] >= 4)
837               v[i] = PValue(new GPRValue(sel, swizzle[i]));
838            else {
839               assert(next_free_swizzle < 4);
840               v[i] = PValue(new GPRValue(sel, next_free_swizzle));
841               used_swizzles[next_free_swizzle] = true;
842               while (next_free_swizzle < 4 && used_swizzles[next_free_swizzle])
843                  next_free_swizzle++;
844            }
845         }
846         else
847            use_same &= v[i]->sel() == sel;
848      }
849   }
850
851   /* We can't re-use the source data because they either need re-swizzling, or
852    * they didn't come all from a GPR or the same GPR, so copy to a new vector
853    */
854   if (!use_same) {
855      AluInstruction *ir = nullptr;
856      GPRVector result = get_temp_vec4(swizzle);
857      for (int i = 0; i < 4; ++i) {
858         if (swizzle[i] < 4 && (mask & (1 << i))) {
859            ir = new AluInstruction(op1_mov, result[i], from_nir(src, swizzle[i]),
860                                    EmitInstruction::write);
861            emit_instruction(ir);
862         }
863      }
864      if (ir)
865         ir->set_flag(alu_last_instr);
866      return result;
867   } else
868      return GPRVector(v);;
869}
870
871bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
872{
873   auto bufid = nir_src_as_const_value(instr->src[0]);
874   auto buf_offset = nir_src_as_const_value(instr->src[1]);
875
876   if (!buf_offset) {
877      /* TODO: if buf_offset is constant then this can also be solved by using the CF indes
878       * on the ALU block, and this would probably make sense when there are more then one
879       * loads with the same buffer ID. */
880
881      PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
882      GPRVector trgt;
883      std::array<int, 4> swz = {7,7,7,7};
884      for (unsigned i = 0; i < 4; ++i) {
885         if (i < nir_dest_num_components(instr->dest)) {
886            trgt.set_reg_i(i, from_nir(instr->dest, i));
887            swz[i] = i + nir_intrinsic_component(instr);
888         } else {
889            trgt.set_reg_i(i, from_nir(instr->dest, 7));
890         }
891      }
892
893      FetchInstruction *ir;
894      if (bufid) {
895         ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
896                                              1 + bufid->u32, nullptr, bim_none);
897      } else {
898         PValue bufid = from_nir(instr->src[0], 0, 0);
899         ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
900                                              1, bufid, bim_zero);
901      }
902      ir->set_dest_swizzle(swz);
903      emit_instruction(ir);
904      m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
905      return true;
906   }
907
908
909   if (bufid) {
910      int buf_cmp = nir_intrinsic_component(instr);
911      AluInstruction *ir = nullptr;
912      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
913         int cmp = buf_cmp + i;
914         assert(cmp < 4);
915         auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, bufid->u32 + 1));
916         if (instr->dest.is_ssa)
917            load_preloaded_value(instr->dest, i, u);
918         else {
919            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
920            emit_instruction(ir);
921         }
922      }
923      if (ir)
924         ir->set_flag(alu_last_instr);
925      return true;
926
927   } else {
928      int buf_cmp = nir_intrinsic_component(instr);
929      AluInstruction *ir = nullptr;
930      auto kc_id = from_nir(instr->src[0], 0);
931      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
932         int cmp = buf_cmp + i;
933         auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, kc_id));
934         if (instr->dest.is_ssa)
935            load_preloaded_value(instr->dest, i, u);
936         else {
937            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
938            emit_instruction(ir);
939         }
940      }
941      if (ir)
942         ir->set_flag(alu_last_instr);
943      return true;
944   }
945}
946
947bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
948{
949   r600::sfn_log << SfnLog::instr << "emit '"
950                 << *reinterpret_cast<nir_instr*>(instr)
951                 << "' (" << __func__ << ")\n";
952
953   if (instr->intrinsic == nir_intrinsic_discard_if) {
954      emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
955                          {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
956
957   } else {
958      emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
959                       {Value::zero, Value::zero}, {alu_last_instr}));
960   }
961   m_sh_info.uses_kill = 1;
962   return true;
963}
964
965bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr)
966{
967   r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
968                 << *reinterpret_cast<nir_instr*>(instr)
969                 << "'\n";
970
971
972   /* If the target register is a SSA register and the loading is not
973    * indirect then we can do lazy loading, i.e. the uniform value can
974    * be used directly. Otherwise we have to load the data for real
975    * rigt away.
976    */
977   auto literal = nir_src_as_const_value(instr->src[0]);
978   int base = nir_intrinsic_base(instr);
979
980   if (literal) {
981      AluInstruction *ir = nullptr;
982      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
983         PValue u = PValue(new UniformValue(512 + literal->u32 + base, i));
984         sfn_log << SfnLog::io << "uniform "
985                 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
986
987         if (instr->dest.is_ssa)
988            load_preloaded_value(instr->dest, i, u);
989         else {
990            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
991                                                   u, {alu_write});
992             emit_instruction(ir);
993         }
994      }
995      if (ir)
996         ir->set_flag(alu_last_instr);
997   } else {
998      PValue addr = from_nir(instr->src[0], 0, 0);
999      return load_uniform_indirect(instr, addr, 16 * base, 0);
1000   }
1001   return true;
1002}
1003
1004bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
1005{
1006   if (!addr) {
1007      std::cerr << "r600-nir: don't know how uniform is addressed\n";
1008      return false;
1009   }
1010
1011   GPRVector trgt;
1012   std::array<int, 4> swz = {7,7,7,7};
1013   for (int i = 0; i < 4; ++i) {
1014      trgt.set_reg_i(i, from_nir(instr->dest, i));
1015      swz[i] = i;
1016   }
1017
1018   if (addr->type() != Value::gpr) {
1019      emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
1020      addr = trgt.reg_i(0);
1021   }
1022
1023   auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
1024                                  bufferid, PValue(), bim_none);
1025   ir->set_dest_swizzle(swz);
1026   emit_instruction(ir);
1027   m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
1028   return true;
1029}
1030
1031AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
1032{
1033   AluInstruction *ir = nullptr;
1034   for (int i = 0; i < literal->def.num_components ; ++i) {
1035      if (writemask & (1 << i)){
1036         PValue lsrc;
1037         switch (literal->def.bit_size) {
1038
1039         case 1:
1040            sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
1041            lsrc = literal->value[i].b ?
1042                     PValue(new LiteralValue( 0xffffffff, i)) :
1043                     Value::zero;
1044            break;
1045         case 32:
1046            sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
1047            if (literal->value[i].u32 == 0)
1048               lsrc = Value::zero;
1049            else if (literal->value[i].u32 == 1)
1050               lsrc = Value::one_i;
1051            else if (literal->value[i].f32 == 1.0f)
1052               lsrc = Value::one_f;
1053            else if (literal->value[i].f32 == 0.5f)
1054               lsrc = Value::zero_dot_5;
1055            else
1056               lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
1057            break;
1058         default:
1059            sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
1060                    << " falling back to 32 bit\n";
1061            lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
1062         }
1063         ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
1064
1065         emit_instruction(ir);
1066      }
1067   }
1068   return ir;
1069}
1070
1071PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
1072{
1073   PValue value = from_nir(src, component);
1074   if (value->type() != Value::gpr &&
1075       value->type() != Value::gpr_vector &&
1076       value->type() != Value::gpr_array_value) {
1077      PValue retval = get_temp_register(channel);
1078      emit_instruction(new AluInstruction(op1_mov, retval, value,
1079                                          EmitInstruction::last_write));
1080      value = retval;
1081   }
1082   return value;
1083}
1084
1085bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
1086{
1087   r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
1088                 << *reinterpret_cast<nir_instr*>(instr)
1089                 << "'\n";
1090
1091   /* Give the specific shader type a chance to process this, i.e. Geometry and
1092    * tesselation shaders need specialized deref_array, for the other shaders
1093    * it is lowered.
1094    */
1095   if (emit_deref_instruction_override(instr))
1096      return true;
1097
1098   switch (instr->deref_type) {
1099   case nir_deref_type_var:
1100      set_var_address(instr);
1101      return true;
1102   case nir_deref_type_array:
1103   case nir_deref_type_array_wildcard:
1104   case nir_deref_type_struct:
1105   case nir_deref_type_cast:
1106   default:
1107      fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
1108   }
1109   return false;
1110}
1111
1112bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
1113                                              std::vector<PValue> srcs,
1114                                              const std::set<AluModifiers>& m_flags)
1115{
1116   AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
1117   emit_instruction(ir);
1118   return true;
1119}
1120
1121void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
1122{
1123   m_output_register_map[loc] = gpr;
1124}
1125
1126void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
1127{
1128   r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
1129   m_export_output.emit(PInstruction(ir));
1130}
1131
1132const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
1133{
1134   const GPRVector *retval = nullptr;
1135   auto val = m_output_register_map.find(location);
1136   if (val != m_output_register_map.end())
1137      retval =  val->second;
1138   return retval;
1139}
1140
1141void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
1142{
1143   r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
1144   m_inputs[pos] = var;
1145}
1146
1147void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
1148{
1149   r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel <<  "\n";
1150   m_outputs[pos] = sel;
1151}
1152
1153void ShaderFromNirProcessor::append_block(int nesting_change)
1154{
1155   m_nesting_depth += nesting_change;
1156   m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
1157}
1158
1159void ShaderFromNirProcessor::get_array_info(r600_shader& shader) const
1160{
1161   shader.num_arrays = m_reg_arrays.size();
1162   if (shader.num_arrays) {
1163      shader.arrays = (r600_shader_array *)calloc(shader.num_arrays, sizeof(r600_shader_array));
1164      for (unsigned i = 0; i < shader.num_arrays; ++i) {
1165         shader.arrays[i].comp_mask = m_reg_arrays[i]->mask();
1166         shader.arrays[i].gpr_start = m_reg_arrays[i]->sel();
1167         shader.arrays[i].gpr_count = m_reg_arrays[i]->size();
1168      }
1169      shader.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
1170   }
1171}
1172
1173void ShaderFromNirProcessor::finalize()
1174{
1175   do_finalize();
1176
1177   for (auto& i : m_inputs)
1178      m_sh_info.input[i.first].gpr = i.second->sel();
1179
1180   for (auto& i : m_outputs)
1181      m_sh_info.output[i.first].gpr = i.second;
1182
1183   m_output.push_back(m_export_output);
1184}
1185
1186}
1187