1#include "sfn_emitssboinstruction.h"
2
3#include "sfn_instruction_fetch.h"
4#include "sfn_instruction_gds.h"
5#include "sfn_instruction_misc.h"
6#include "sfn_instruction_tex.h"
7#include "../r600_pipe.h"
8#include "../r600_asm.h"
9
10namespace r600 {
11
12#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
13
14EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
15   EmitInstruction(processor),
16   m_require_rat_return_address(false),
17   m_ssbo_image_offset(0)
18{
19}
20
21void EmitSSBOInstruction::set_ssbo_offset(int offset)
22{
23   m_ssbo_image_offset = offset;
24}
25
26
27void EmitSSBOInstruction::set_require_rat_return_address()
28{
29   m_require_rat_return_address = true;
30}
31
32bool
33EmitSSBOInstruction::load_rat_return_address()
34{
35   if (m_require_rat_return_address) {
36      m_rat_return_address = get_temp_vec4();
37      emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
38      emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
39      emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
40                                          literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
41      emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
42                                          m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
43      {alu_write, alu_last_instr}));
44      m_require_rat_return_address = false;
45   }
46   return true;
47}
48
49
50bool EmitSSBOInstruction::do_emit(nir_instr* instr)
51{
52   const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
53   switch (intr->intrinsic) {
54   case nir_intrinsic_atomic_counter_add:
55   case nir_intrinsic_atomic_counter_and:
56   case nir_intrinsic_atomic_counter_exchange:
57   case nir_intrinsic_atomic_counter_max:
58   case nir_intrinsic_atomic_counter_min:
59   case nir_intrinsic_atomic_counter_or:
60   case nir_intrinsic_atomic_counter_xor:
61   case nir_intrinsic_atomic_counter_comp_swap:
62      return emit_atomic(intr);
63   case nir_intrinsic_atomic_counter_read:
64   case nir_intrinsic_atomic_counter_post_dec:
65      return emit_unary_atomic(intr);
66   case nir_intrinsic_atomic_counter_inc:
67      return emit_atomic_inc(intr);
68   case nir_intrinsic_atomic_counter_pre_dec:
69      return emit_atomic_pre_dec(intr);
70   case nir_intrinsic_load_ssbo:
71       return emit_load_ssbo(intr);
72   case nir_intrinsic_store_ssbo:
73      return emit_store_ssbo(intr);
74   case nir_intrinsic_ssbo_atomic_add:
75   case nir_intrinsic_ssbo_atomic_comp_swap:
76   case nir_intrinsic_ssbo_atomic_or:
77   case nir_intrinsic_ssbo_atomic_xor:
78   case nir_intrinsic_ssbo_atomic_imax:
79   case nir_intrinsic_ssbo_atomic_imin:
80   case nir_intrinsic_ssbo_atomic_umax:
81   case nir_intrinsic_ssbo_atomic_umin:
82   case nir_intrinsic_ssbo_atomic_and:
83   case nir_intrinsic_ssbo_atomic_exchange:
84      return emit_ssbo_atomic_op(intr);
85   case nir_intrinsic_image_store:
86      return emit_image_store(intr);
87   case nir_intrinsic_image_load:
88   case nir_intrinsic_image_atomic_add:
89   case nir_intrinsic_image_atomic_and:
90   case nir_intrinsic_image_atomic_or:
91   case nir_intrinsic_image_atomic_xor:
92   case nir_intrinsic_image_atomic_exchange:
93   case nir_intrinsic_image_atomic_comp_swap:
94   case nir_intrinsic_image_atomic_umin:
95   case nir_intrinsic_image_atomic_umax:
96   case nir_intrinsic_image_atomic_imin:
97   case nir_intrinsic_image_atomic_imax:
98      return emit_image_load(intr);
99   case nir_intrinsic_image_size:
100      return emit_image_size(intr);
101   case nir_intrinsic_get_ssbo_size:
102      return emit_buffer_size(intr);
103   case nir_intrinsic_memory_barrier:
104   case nir_intrinsic_memory_barrier_image:
105   case nir_intrinsic_memory_barrier_buffer:
106   case nir_intrinsic_group_memory_barrier:
107      return make_stores_ack_and_waitack();
108   default:
109      return false;
110   }
111}
112
113bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
114{
115   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
116
117   ESDOp op = read_result ? get_opcode(instr->intrinsic) :
118                            get_opcode_wo(instr->intrinsic);
119
120   if (DS_OP_INVALID == op)
121      return false;
122
123
124
125   GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
126
127   int base = remap_atomic_base(nir_intrinsic_base(instr));
128
129   PValue uav_id = from_nir(instr->src[0], 0);
130
131   PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
132
133   GDSInstr *ir = nullptr;
134   if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap)  {
135      PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
136      ir = new GDSInstr(op, dest, value, value2, uav_id, base);
137   } else {
138      ir = new GDSInstr(op, dest, value, uav_id, base);
139   }
140
141   emit_instruction(ir);
142   return true;
143}
144
145bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
146{
147   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
148
149   ESDOp op = read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
150
151   if (DS_OP_INVALID == op)
152      return false;
153
154   GPRVector dest = read_result ? make_dest(instr) : GPRVector(0, {7,7,7,7});
155
156   PValue uav_id = from_nir(instr->src[0], 0);
157
158   auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
159
160   emit_instruction(ir);
161   return true;
162}
163
164ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode) const
165{
166   switch (opcode) {
167   case nir_intrinsic_atomic_counter_add:
168      return DS_OP_ADD_RET;
169   case nir_intrinsic_atomic_counter_and:
170      return DS_OP_AND_RET;
171   case nir_intrinsic_atomic_counter_exchange:
172      return DS_OP_XCHG_RET;
173   case nir_intrinsic_atomic_counter_inc:
174      return DS_OP_INC_RET;
175   case nir_intrinsic_atomic_counter_max:
176      return DS_OP_MAX_UINT_RET;
177   case nir_intrinsic_atomic_counter_min:
178      return DS_OP_MIN_UINT_RET;
179   case nir_intrinsic_atomic_counter_or:
180      return DS_OP_OR_RET;
181   case nir_intrinsic_atomic_counter_read:
182      return DS_OP_READ_RET;
183   case nir_intrinsic_atomic_counter_xor:
184      return DS_OP_XOR_RET;
185   case nir_intrinsic_atomic_counter_post_dec:
186      return DS_OP_DEC_RET;
187   case nir_intrinsic_atomic_counter_comp_swap:
188      return DS_OP_CMP_XCHG_RET;
189   case nir_intrinsic_atomic_counter_pre_dec:
190   default:
191      return DS_OP_INVALID;
192   }
193}
194
195ESDOp EmitSSBOInstruction::get_opcode_wo(const nir_intrinsic_op opcode) const
196{
197   switch (opcode) {
198   case nir_intrinsic_atomic_counter_add:
199      return DS_OP_ADD;
200   case nir_intrinsic_atomic_counter_and:
201      return DS_OP_AND;
202   case nir_intrinsic_atomic_counter_inc:
203      return DS_OP_INC;
204   case nir_intrinsic_atomic_counter_max:
205      return DS_OP_MAX_UINT;
206   case nir_intrinsic_atomic_counter_min:
207      return DS_OP_MIN_UINT;
208   case nir_intrinsic_atomic_counter_or:
209      return DS_OP_OR;
210   case nir_intrinsic_atomic_counter_xor:
211      return DS_OP_XOR;
212   case nir_intrinsic_atomic_counter_post_dec:
213      return DS_OP_DEC;
214   case nir_intrinsic_atomic_counter_comp_swap:
215      return DS_OP_CMP_XCHG_RET;
216   case nir_intrinsic_atomic_counter_exchange:
217      return DS_OP_XCHG_RET;
218   case nir_intrinsic_atomic_counter_pre_dec:
219   default:
220      return DS_OP_INVALID;
221   }
222}
223
224RatInstruction::ERatOp
225EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
226{
227   switch (opcode) {
228   case nir_intrinsic_ssbo_atomic_add:
229   case nir_intrinsic_image_atomic_add:
230      return RatInstruction::ADD_RTN;
231   case nir_intrinsic_ssbo_atomic_and:
232   case nir_intrinsic_image_atomic_and:
233      return RatInstruction::AND_RTN;
234   case nir_intrinsic_ssbo_atomic_exchange:
235   case nir_intrinsic_image_atomic_exchange:
236      return RatInstruction::XCHG_RTN;
237   case nir_intrinsic_ssbo_atomic_or:
238   case nir_intrinsic_image_atomic_or:
239      return RatInstruction::OR_RTN;
240   case nir_intrinsic_ssbo_atomic_imin:
241   case nir_intrinsic_image_atomic_imin:
242      return RatInstruction::MIN_INT_RTN;
243   case nir_intrinsic_ssbo_atomic_imax:
244   case nir_intrinsic_image_atomic_imax:
245      return RatInstruction::MAX_INT_RTN;
246   case nir_intrinsic_ssbo_atomic_umin:
247   case nir_intrinsic_image_atomic_umin:
248      return RatInstruction::MIN_UINT_RTN;
249   case nir_intrinsic_ssbo_atomic_umax:
250   case nir_intrinsic_image_atomic_umax:
251      return RatInstruction::MAX_UINT_RTN;
252   case nir_intrinsic_ssbo_atomic_xor:
253   case nir_intrinsic_image_atomic_xor:
254      return RatInstruction::XOR_RTN;
255   case nir_intrinsic_ssbo_atomic_comp_swap:
256   case nir_intrinsic_image_atomic_comp_swap:
257      if (util_format_is_float(format))
258         return RatInstruction::CMPXCHG_FLT_RTN;
259      else
260         return RatInstruction::CMPXCHG_INT_RTN;
261   case nir_intrinsic_image_load:
262      return RatInstruction::NOP_RTN;
263   default:
264      unreachable("Unsupported RAT instruction");
265   }
266}
267
268RatInstruction::ERatOp
269EmitSSBOInstruction::get_rat_opcode_wo(const nir_intrinsic_op opcode, pipe_format format) const
270{
271	switch (opcode) {
272   case nir_intrinsic_ssbo_atomic_add:
273   case nir_intrinsic_image_atomic_add:
274      return RatInstruction::ADD;
275   case nir_intrinsic_ssbo_atomic_and:
276   case nir_intrinsic_image_atomic_and:
277      return RatInstruction::AND;
278   case nir_intrinsic_ssbo_atomic_or:
279   case nir_intrinsic_image_atomic_or:
280      return RatInstruction::OR;
281   case nir_intrinsic_ssbo_atomic_imin:
282   case nir_intrinsic_image_atomic_imin:
283      return RatInstruction::MIN_INT;
284   case nir_intrinsic_ssbo_atomic_imax:
285   case nir_intrinsic_image_atomic_imax:
286      return RatInstruction::MAX_INT;
287   case nir_intrinsic_ssbo_atomic_umin:
288   case nir_intrinsic_image_atomic_umin:
289      return RatInstruction::MIN_UINT;
290   case nir_intrinsic_ssbo_atomic_umax:
291   case nir_intrinsic_image_atomic_umax:
292      return RatInstruction::MAX_UINT;
293   case nir_intrinsic_ssbo_atomic_xor:
294   case nir_intrinsic_image_atomic_xor:
295      return RatInstruction::XOR;
296   case nir_intrinsic_ssbo_atomic_comp_swap:
297   case nir_intrinsic_image_atomic_comp_swap:
298      if (util_format_is_float(format))
299         return RatInstruction::CMPXCHG_FLT;
300      else
301         return RatInstruction::CMPXCHG_INT;
302   default:
303      unreachable("Unsupported WO RAT instruction");
304   }
305}
306
307bool EmitSSBOInstruction::load_atomic_inc_limits()
308{
309   m_atomic_update = get_temp_register();
310   m_atomic_update->set_keep_alive();
311   emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
312   {alu_write, alu_last_instr}));
313   return true;
314}
315
316bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
317{
318   bool read_result = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
319   PValue uav_id = from_nir(instr->src[0], 0);
320   GPRVector dest = read_result ? make_dest(instr): GPRVector(0, {7,7,7,7});
321   auto ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest,
322                          m_atomic_update, uav_id,
323                          remap_atomic_base(nir_intrinsic_base(instr)));
324   emit_instruction(ir);
325   return true;
326}
327
328bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
329{
330   GPRVector dest = make_dest(instr);
331
332   PValue uav_id = from_nir(instr->src[0], 0);
333
334   auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
335                          remap_atomic_base(nir_intrinsic_base(instr)));
336   emit_instruction(ir);
337
338   emit_instruction(new AluInstruction(op2_sub_int,  dest.x(), dest.x(), literal(1), last_write));
339
340   return true;
341}
342
343bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
344{
345   GPRVector dest = make_dest(instr);
346
347   /** src0 not used, should be some offset */
348   auto addr = from_nir(instr->src[1], 0);
349   PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
350
351   /** Should be lowered in nir */
352   emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
353                    {alu_write, alu_last_instr}));
354
355   const EVTXDataFormat formats[4] = {
356      fmt_32,
357      fmt_32_32,
358      fmt_32_32_32,
359      fmt_32_32_32_32
360   };
361
362   const std::array<int,4> dest_swt[4] = {
363      {0,7,7,7},
364      {0,1,7,7},
365      {0,1,2,7},
366      {0,1,2,3}
367   };
368
369   /* TODO fix resource index */
370   auto ir = new FetchInstruction(dest, addr_temp,
371                                  R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
372                                  , from_nir(instr->src[0], 0),
373                                  formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
374   ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
375   ir->set_flag(vtx_use_tc);
376
377   emit_instruction(ir);
378   return true;
379}
380
381bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
382{
383
384   GPRVector::Swizzle swz = {7,7,7,7};
385   for (unsigned i = 0; i <  nir_src_num_components(instr->src[0]); ++i)
386      swz[i] = i;
387
388   auto orig_addr = from_nir(instr->src[2], 0);
389
390   GPRVector addr_vec = get_temp_vec4({0,1,2,7});
391
392   auto temp2 = get_temp_vec4();
393
394   auto rat_id = from_nir(instr->src[1], 0);
395
396   emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
397                                       PValue(new LiteralValue(2)), write));
398   emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
399   emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
400
401
402   auto values = vec_from_nir_with_fetch_constant(instr->src[0],
403         (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
404
405   auto cf_op = cf_mem_rat;
406   //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
407   auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
408                                   values, addr_vec, m_ssbo_image_offset, rat_id, 1,
409                                   1, 0, false);
410   emit_instruction(store);
411   m_store_ops.push_back(store);
412
413   for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
414      emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), get_chip_class() == CAYMAN  ?  last_write : write));
415      emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
416                                          {addr_vec.reg_i(0), Value::one_i}, last_write));
417      store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
418                                 temp2, addr_vec, m_ssbo_image_offset, rat_id, 1,
419                                 1, 0, false);
420      emit_instruction(store);
421      if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
422         m_store_ops.push_back(store);
423   }
424
425   return true;
426}
427
428bool
429EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
430{
431   int imageid = 0;
432   PValue image_offset;
433
434   if (nir_src_is_const(intrin->src[0]))
435      imageid = nir_src_as_int(intrin->src[0]);
436   else
437      image_offset = from_nir(intrin->src[0], 0);
438
439   auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
440   auto undef = from_nir(intrin->src[2], 0);
441   auto value = vec_from_nir_with_fetch_constant(intrin->src[3],  0xf, {0,1,2,3});
442   auto unknown  = from_nir(intrin->src[4], 0);
443
444   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
445       nir_intrinsic_image_array(intrin)) {
446      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
447      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
448   }
449
450   auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
451   auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
452                                   image_offset, 1, 0xf, 0, false);
453
454   //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
455      m_store_ops.push_back(store);
456
457   emit_instruction(store);
458   return true;
459}
460
461bool
462EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
463{
464   int imageid = 0;
465   PValue image_offset;
466
467   if (nir_src_is_const(intrin->src[0]))
468      imageid = nir_src_as_int(intrin->src[0]);
469   else
470      image_offset = from_nir(intrin->src[0], 0);
471
472   bool read_result = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
473   auto opcode = read_result ? get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT) :
474                               get_rat_opcode_wo(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
475
476   auto coord_orig =  from_nir(intrin->src[1], 0, 0);
477   auto coord = get_temp_register(0);
478
479   emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
480
481   if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
482      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
483                                          from_nir(intrin->src[3], 0), {alu_write}));
484      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
485                                          from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
486   } else {
487      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
488                                          from_nir(intrin->src[2], 0), {alu_write}));
489      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
490   }
491
492
493   GPRVector out_vec({coord, coord, coord, coord});
494
495   auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
496                                   image_offset, 1, 0xf, 0, true);
497   emit_instruction(atomic);
498
499   if (read_result) {
500      emit_instruction(new WaitAck(0));
501
502      GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
503      auto fetch = new FetchInstruction(vc_fetch,
504                                        no_index_offset,
505                                        fmt_32,
506                                        vtx_nf_int,
507                                        vtx_es_none,
508                                        m_rat_return_address.reg_i(1),
509                                        dest,
510                                        0,
511                                        false,
512                                        0xf,
513                                        R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
514                                        0,
515                                        bim_none,
516                                        false,
517                                        false,
518                                        0,
519                                        0,
520                                        0,
521                                        image_offset,
522                                        {0,7,7,7});
523      fetch->set_flag(vtx_srf_mode);
524      fetch->set_flag(vtx_use_tc);
525      fetch->set_flag(vtx_vpm);
526      emit_instruction(fetch);
527   }
528
529   return true;
530
531}
532
533bool
534EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
535{
536   int imageid = 0;
537   PValue image_offset;
538
539   if (nir_src_is_const(intrin->src[0]))
540      imageid = nir_src_as_int(intrin->src[0]);
541   else
542      image_offset = from_nir(intrin->src[0], 0);
543
544   bool read_retvalue = !intrin->dest.is_ssa || !list_is_empty(&intrin->dest.ssa.uses);
545   auto rat_op = read_retvalue ? get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin)):
546                                 get_rat_opcode_wo(intrin->intrinsic, nir_intrinsic_format(intrin));
547
548   GPRVector::Swizzle swz = {0,1,2,3};
549   auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
550
551   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
552       nir_intrinsic_image_array(intrin)) {
553      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
554      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
555   }
556
557   if (intrin->intrinsic != nir_intrinsic_image_load) {
558      if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
559         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
560                                             from_nir(intrin->src[4], 0), {alu_write}));
561         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(get_chip_class() == CAYMAN ? 2 : 3),
562                                             from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
563      } else {
564         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
565                                             from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
566      }
567   }
568   auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
569
570   auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
571                                   image_offset, 1, 0xf, 0, true);
572   emit_instruction(store);
573   return read_retvalue ? fetch_return_value(intrin) : true;
574}
575
576bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
577{
578   emit_instruction(new WaitAck(0));
579
580   pipe_format format = nir_intrinsic_format(intrin);
581   unsigned fmt = fmt_32;
582   unsigned num_format = 0;
583   unsigned format_comp = 0;
584   unsigned endian = 0;
585
586   int imageid = 0;
587   PValue image_offset;
588
589   if (nir_src_is_const(intrin->src[0]))
590      imageid = nir_src_as_int(intrin->src[0]);
591   else
592      image_offset = from_nir(intrin->src[0], 0);
593
594   r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
595
596   GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
597
598   auto fetch = new FetchInstruction(vc_fetch,
599                                     no_index_offset,
600                                     (EVTXDataFormat)fmt,
601                                     (EVFetchNumFormat)num_format,
602                                     (EVFetchEndianSwap)endian,
603                                     m_rat_return_address.reg_i(1),
604                                     dest,
605                                     0,
606                                     false,
607                                     0x3,
608                                     R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
609                                     0,
610                                     bim_none,
611                                     false,
612                                     false,
613                                     0,
614                                     0,
615                                     0,
616                                     image_offset, {0,1,2,3});
617   fetch->set_flag(vtx_srf_mode);
618   fetch->set_flag(vtx_use_tc);
619   fetch->set_flag(vtx_vpm);
620   if (format_comp)
621      fetch->set_flag(vtx_format_comp_signed);
622
623   emit_instruction(fetch);
624   return true;
625}
626
627bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
628{
629   GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
630   GPRVector src{0,{4,4,4,4}};
631
632   assert(nir_src_as_uint(intrin->src[1]) == 0);
633
634   auto const_offset = nir_src_as_const_value(intrin->src[0]);
635   auto dyn_offset = PValue();
636   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
637   if (const_offset)
638      res_id += const_offset[0].u32;
639   else
640      dyn_offset = from_nir(intrin->src[0], 0);
641
642   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
643      emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
644                       res_id,
645                       bim_none));
646      return true;
647   } else {
648      emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
649                                             0/* ?? */,
650                                             res_id, dyn_offset));
651      if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
652          nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
653         /* Need to load the layers from a const buffer */
654
655         set_has_txs_cube_array_comp();
656
657         if (const_offset) {
658            unsigned lookup_resid = const_offset[0].u32;
659            emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
660                                                PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
661                                                                        R600_BUFFER_INFO_CONST_BUFFER)),
662                                                EmitInstruction::last_write));
663         } else {
664            /* If the adressing is indirect we have to get the z-value by using a binary search */
665            GPRVector trgt;
666            GPRVector help;
667
668            auto addr = help.reg_i(0);
669            auto comp = help.reg_i(1);
670            auto low_bit = help.reg_i(2);
671            auto high_bit = help.reg_i(3);
672
673            emit_instruction(new AluInstruction(op2_lshr_int, addr, from_nir(intrin->src[0], 0),
674                             literal(2), EmitInstruction::write));
675            emit_instruction(new AluInstruction(op2_and_int, comp, from_nir(intrin->src[0], 0),
676                             literal(3), EmitInstruction::last_write));
677
678            emit_instruction(new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, R600_SHADER_BUFFER_INFO_SEL,
679                                                  R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none));
680
681            emit_instruction(new AluInstruction(op3_cnde_int, comp, high_bit, trgt.reg_i(0), trgt.reg_i(2),
682                                                EmitInstruction::write));
683            emit_instruction(new AluInstruction(op3_cnde_int, high_bit, high_bit, trgt.reg_i(1), trgt.reg_i(3),
684                                                EmitInstruction::last_write));
685
686            emit_instruction(new AluInstruction(op3_cnde_int, dest.reg_i(2), low_bit, comp, high_bit, EmitInstruction::last_write));
687         }
688      }
689   }
690   return true;
691}
692
693bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
694{
695   std::array<PValue,4> dst_elms;
696
697
698   for (uint16_t i = 0; i < 4; ++i) {
699      dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
700   }
701
702   GPRVector dst(dst_elms);
703   GPRVector src(0,{4,4,4,4});
704
705   auto const_offset = nir_src_as_const_value(intr->src[0]);
706   auto dyn_offset = PValue();
707   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
708   if (const_offset)
709      res_id += const_offset[0].u32;
710   else
711      assert(0 && "dynamic buffer offset not supported in buffer_size");
712
713   emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
714                    res_id, bim_none));
715
716   return true;
717}
718
719bool EmitSSBOInstruction::make_stores_ack_and_waitack()
720{
721   for (auto&& store: m_store_ops)
722      store->set_ack();
723
724   if (!m_store_ops.empty())
725      emit_instruction(new WaitAck(0));
726
727   m_store_ops.clear();
728
729   return true;
730}
731
732GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
733{
734   GPRVector::Values v;
735   int i;
736   for (i = 0; i < 4; ++i)
737      v[i] = from_nir(ir->dest, i);
738   return GPRVector(v);
739}
740
741}
742