1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include "util/ralloc.h"
26#include "util/u_half.h"
27#include "util/bitscan.h"
28
29#include "ppir.h"
30#include "codegen.h"
31#include "lima_context.h"
32
33static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34{
35   unsigned ret = 0;
36   for (int i = 0; i < 4; i++)
37      ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38   return ret;
39}
40
41static int get_scl_reg_index(ppir_src *src, int component)
42{
43   int ret = ppir_target_get_src_reg_index(src);
44   ret += src->swizzle[component];
45   return ret;
46}
47
48static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49{
50   ppir_codegen_field_varying *f = code;
51   ppir_load_node *load = ppir_node_to_load(node);
52   ppir_dest *dest = &load->dest;
53   int index = ppir_target_get_dest_reg_index(dest);
54   int num_components = load->num_components;
55
56   if (num_components) {
57      assert(node->op == ppir_op_load_varying ||
58             node->op == ppir_op_load_coords ||
59             node->op == ppir_op_load_fragcoord);
60
61      f->imm.dest = index >> 2;
62      f->imm.mask = dest->write_mask << (index & 0x3);
63
64      int alignment = num_components == 3 ? 3 : num_components - 1;
65      f->imm.alignment = alignment;
66      f->imm.offset_vector = 0xf;
67
68      if (alignment == 3)
69         f->imm.index = load->index >> 2;
70      else
71         f->imm.index = load->index >> alignment;
72
73      if (node->op == ppir_op_load_fragcoord) {
74         f->imm.source_type = 2;
75         f->imm.perspective = 3;
76      }
77   }
78   else {
79      assert(node->op == ppir_op_load_coords);
80
81      f->reg.dest = index >> 2;
82      f->reg.mask = dest->write_mask << (index & 0x3);
83
84      f->reg.source_type = 1;
85
86      ppir_src *src = &load->src;
87      index = ppir_target_get_src_reg_index(src);
88      f->reg.source = index >> 2;
89      f->reg.negate = src->negate;
90      f->reg.absolute = src->absolute;
91      f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
92   }
93}
94
95static void ppir_codegen_encode_texld(ppir_node *node, void *code)
96{
97   ppir_codegen_field_sampler *f = code;
98   ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
99
100   f->index = ldtex->sampler;
101   f->lod_bias_en = 0;
102   f->type = ppir_codegen_sampler_type_2d;
103   f->offset_en = 0;
104   f->unknown_2 = 0x39001;
105}
106
107static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
108{
109   ppir_codegen_field_uniform *f = code;
110   ppir_load_node *load = ppir_node_to_load(node);
111
112   switch (node->op) {
113      case ppir_op_load_uniform:
114         f->source = ppir_codegen_uniform_src_uniform;
115         break;
116      case ppir_op_load_temp:
117         f->source = ppir_codegen_uniform_src_temporary;
118         break;
119      default:
120         assert(0);
121   }
122
123   int num_components = load->num_components;
124   int alignment = num_components == 4 ? 2 : num_components - 1;
125
126   f->alignment = alignment;
127
128   /* TODO: uniform can be also combined like varying */
129   f->index = load->index << (2 - alignment);
130}
131
132static unsigned shift_to_op(int shift)
133{
134   assert(shift >= -3 && shift <= 3);
135   return shift < 0 ? shift + 8 : shift;
136}
137
138static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
139{
140   ppir_codegen_field_vec4_mul *f = code;
141   ppir_alu_node *alu = ppir_node_to_alu(node);
142
143   ppir_dest *dest = &alu->dest;
144   int dest_shift = 0;
145   if (dest->type != ppir_target_pipeline) {
146      int index = ppir_target_get_dest_reg_index(dest);
147      dest_shift = index & 0x3;
148      f->dest = index >> 2;
149      f->mask = dest->write_mask << dest_shift;
150   }
151   f->dest_modifier = dest->modifier;
152
153   switch (node->op) {
154   case ppir_op_mul:
155      f->op = shift_to_op(alu->shift);
156      break;
157   case ppir_op_mov:
158      f->op = ppir_codegen_vec4_mul_op_mov;
159      break;
160   case ppir_op_max:
161      f->op = ppir_codegen_vec4_mul_op_max;
162      break;
163   case ppir_op_min:
164      f->op = ppir_codegen_vec4_mul_op_min;
165      break;
166   case ppir_op_and:
167      f->op = ppir_codegen_vec4_mul_op_and;
168      break;
169   case ppir_op_or:
170      f->op = ppir_codegen_vec4_mul_op_or;
171      break;
172   case ppir_op_xor:
173      f->op = ppir_codegen_vec4_mul_op_xor;
174      break;
175   case ppir_op_gt:
176      f->op = ppir_codegen_vec4_mul_op_gt;
177      break;
178   case ppir_op_ge:
179      f->op = ppir_codegen_vec4_mul_op_ge;
180      break;
181   case ppir_op_eq:
182      f->op = ppir_codegen_vec4_mul_op_eq;
183      break;
184   case ppir_op_ne:
185      f->op = ppir_codegen_vec4_mul_op_ne;
186      break;
187   case ppir_op_not:
188      f->op = ppir_codegen_vec4_mul_op_not;
189      break;
190   default:
191      break;
192   }
193
194   ppir_src *src = alu->src;
195   int index = ppir_target_get_src_reg_index(src);
196   f->arg0_source = index >> 2;
197   f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
198   f->arg0_absolute = src->absolute;
199   f->arg0_negate = src->negate;
200
201   if (alu->num_src == 2) {
202      src = alu->src + 1;
203      index = ppir_target_get_src_reg_index(src);
204      f->arg1_source = index >> 2;
205      f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
206      f->arg1_absolute = src->absolute;
207      f->arg1_negate = src->negate;
208   }
209}
210
211static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
212{
213   ppir_codegen_field_float_mul *f = code;
214   ppir_alu_node *alu = ppir_node_to_alu(node);
215
216   ppir_dest *dest = &alu->dest;
217   int dest_component = ffs(dest->write_mask) - 1;
218   assert(dest_component >= 0);
219
220   if (dest->type != ppir_target_pipeline) {
221      f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
222      f->output_en = true;
223   }
224   f->dest_modifier = dest->modifier;
225
226   switch (node->op) {
227   case ppir_op_mul:
228      f->op = shift_to_op(alu->shift);
229      break;
230   case ppir_op_mov:
231      f->op = ppir_codegen_float_mul_op_mov;
232      break;
233   case ppir_op_max:
234      f->op = ppir_codegen_float_mul_op_max;
235      break;
236   case ppir_op_min:
237      f->op = ppir_codegen_float_mul_op_min;
238      break;
239   case ppir_op_and:
240      f->op = ppir_codegen_float_mul_op_and;
241      break;
242   case ppir_op_or:
243      f->op = ppir_codegen_float_mul_op_or;
244      break;
245   case ppir_op_xor:
246      f->op = ppir_codegen_float_mul_op_xor;
247      break;
248   case ppir_op_gt:
249      f->op = ppir_codegen_float_mul_op_gt;
250      break;
251   case ppir_op_ge:
252      f->op = ppir_codegen_float_mul_op_ge;
253      break;
254   case ppir_op_eq:
255      f->op = ppir_codegen_float_mul_op_eq;
256      break;
257   case ppir_op_ne:
258      f->op = ppir_codegen_float_mul_op_ne;
259      break;
260   case ppir_op_not:
261      f->op = ppir_codegen_float_mul_op_not;
262      break;
263   default:
264      break;
265   }
266
267   ppir_src *src = alu->src;
268   f->arg0_source = get_scl_reg_index(src, dest_component);
269   f->arg0_absolute = src->absolute;
270   f->arg0_negate = src->negate;
271
272   if (alu->num_src == 2) {
273      src = alu->src + 1;
274      f->arg1_source = get_scl_reg_index(src, dest_component);
275      f->arg1_absolute = src->absolute;
276      f->arg1_negate = src->negate;
277   }
278}
279
280static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
281{
282   ppir_codegen_field_vec4_acc *f = code;
283   ppir_alu_node *alu = ppir_node_to_alu(node);
284
285   ppir_dest *dest = &alu->dest;
286   int index = ppir_target_get_dest_reg_index(dest);
287   int dest_shift = index & 0x3;
288   f->dest = index >> 2;
289   f->mask = dest->write_mask << dest_shift;
290   f->dest_modifier = dest->modifier;
291
292   switch (node->op) {
293   case ppir_op_add:
294      f->op = ppir_codegen_vec4_acc_op_add;
295      break;
296   case ppir_op_mov:
297      f->op = ppir_codegen_vec4_acc_op_mov;
298      break;
299   case ppir_op_sum3:
300      f->op = ppir_codegen_vec4_acc_op_sum3;
301      dest_shift = 0;
302      break;
303   case ppir_op_sum4:
304      f->op = ppir_codegen_vec4_acc_op_sum4;
305      dest_shift = 0;
306      break;
307   case ppir_op_floor:
308      f->op = ppir_codegen_vec4_acc_op_floor;
309      break;
310   case ppir_op_ceil:
311      f->op = ppir_codegen_vec4_acc_op_ceil;
312      break;
313   case ppir_op_fract:
314      f->op = ppir_codegen_vec4_acc_op_fract;
315      break;
316   case ppir_op_gt:
317      f->op = ppir_codegen_vec4_acc_op_gt;
318      break;
319   case ppir_op_ge:
320      f->op = ppir_codegen_vec4_acc_op_ge;
321      break;
322   case ppir_op_eq:
323      f->op = ppir_codegen_vec4_acc_op_eq;
324      break;
325   case ppir_op_ne:
326      f->op = ppir_codegen_vec4_acc_op_ne;
327      break;
328   case ppir_op_select:
329      f->op = ppir_codegen_vec4_acc_op_sel;
330      break;
331   default:
332      break;
333   }
334
335   ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
336   index = ppir_target_get_src_reg_index(src);
337
338   if (src->type == ppir_target_pipeline &&
339       src->pipeline == ppir_pipeline_reg_vmul)
340      f->mul_in = true;
341   else
342      f->arg0_source = index >> 2;
343
344   f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
345   f->arg0_absolute = src->absolute;
346   f->arg0_negate = src->negate;
347
348   if (++src < alu->src + alu->num_src) {
349      index = ppir_target_get_src_reg_index(src);
350      f->arg1_source = index >> 2;
351      f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
352      f->arg1_absolute = src->absolute;
353      f->arg1_negate = src->negate;
354   }
355}
356
357static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
358{
359   ppir_codegen_field_float_acc *f = code;
360   ppir_alu_node *alu = ppir_node_to_alu(node);
361
362   ppir_dest *dest = &alu->dest;
363   int dest_component = ffs(dest->write_mask) - 1;
364   assert(dest_component >= 0);
365
366   f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
367   f->output_en = true;
368   f->dest_modifier = dest->modifier;
369
370   switch (node->op) {
371   case ppir_op_add:
372      f->op = shift_to_op(alu->shift);
373      break;
374   case ppir_op_mov:
375      f->op = ppir_codegen_float_acc_op_mov;
376      break;
377   case ppir_op_max:
378      f->op = ppir_codegen_float_acc_op_max;
379      break;
380   case ppir_op_min:
381      f->op = ppir_codegen_float_acc_op_min;
382      break;
383   case ppir_op_floor:
384      f->op = ppir_codegen_float_acc_op_floor;
385      break;
386   case ppir_op_ceil:
387      f->op = ppir_codegen_float_acc_op_ceil;
388      break;
389   case ppir_op_fract:
390      f->op = ppir_codegen_float_acc_op_fract;
391      break;
392   case ppir_op_gt:
393      f->op = ppir_codegen_float_acc_op_gt;
394      break;
395   case ppir_op_ge:
396      f->op = ppir_codegen_float_acc_op_ge;
397      break;
398   case ppir_op_eq:
399      f->op = ppir_codegen_float_acc_op_eq;
400      break;
401   case ppir_op_ne:
402      f->op = ppir_codegen_float_acc_op_ne;
403      break;
404   case ppir_op_select:
405      f->op = ppir_codegen_float_acc_op_sel;
406      break;
407   default:
408      break;
409   }
410
411   ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
412   if (src->type == ppir_target_pipeline &&
413       src->pipeline == ppir_pipeline_reg_fmul)
414      f->mul_in = true;
415   else
416      f->arg0_source = get_scl_reg_index(src, dest_component);
417   f->arg0_absolute = src->absolute;
418   f->arg0_negate = src->negate;
419
420   if (++src < alu->src + alu->num_src) {
421      f->arg1_source = get_scl_reg_index(src, dest_component);
422      f->arg1_absolute = src->absolute;
423      f->arg1_negate = src->negate;
424   }
425}
426
427static void ppir_codegen_encode_combine(ppir_node *node, void *code)
428{
429   ppir_codegen_field_combine *f = code;
430   ppir_alu_node *alu = ppir_node_to_alu(node);
431
432   switch (node->op) {
433   case ppir_op_rsqrt:
434   case ppir_op_log2:
435   case ppir_op_exp2:
436   case ppir_op_rcp:
437   case ppir_op_sqrt:
438   case ppir_op_sin:
439   case ppir_op_cos:
440   {
441      f->scalar.dest_vec = false;
442      f->scalar.arg1_en = false;
443
444      ppir_dest *dest = &alu->dest;
445      int dest_component = ffs(dest->write_mask) - 1;
446      assert(dest_component >= 0);
447      f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
448      f->scalar.dest_modifier = dest->modifier;
449
450      ppir_src *src = alu->src;
451      f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
452      f->scalar.arg0_absolute = src->absolute;
453      f->scalar.arg0_negate = src->negate;
454
455      switch (node->op) {
456      case ppir_op_rsqrt:
457         f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
458         break;
459      case ppir_op_log2:
460         f->scalar.op = ppir_codegen_combine_scalar_op_log2;
461         break;
462      case ppir_op_exp2:
463         f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
464         break;
465      case ppir_op_rcp:
466         f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
467         break;
468      case ppir_op_sqrt:
469         f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
470         break;
471      case ppir_op_sin:
472         f->scalar.op = ppir_codegen_combine_scalar_op_sin;
473         break;
474      case ppir_op_cos:
475         f->scalar.op = ppir_codegen_combine_scalar_op_cos;
476         break;
477      default:
478         break;
479      }
480   }
481   default:
482      break;
483   }
484}
485
486static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
487{
488   assert(node->op == ppir_op_store_temp);
489
490   ppir_codegen_field_temp_write *f = code;
491   ppir_store_node *snode = ppir_node_to_store(node);
492   int num_components = snode->num_components;
493
494   f->temp_write.dest = 0x03; // 11 - temporary
495   f->temp_write.source = snode->src.reg->index;
496
497   int alignment = num_components == 4 ? 2 : num_components - 1;
498   f->temp_write.alignment = alignment;
499   f->temp_write.index = snode->index << (2 - alignment);
500
501   f->temp_write.offset_reg = snode->index >> 2;
502}
503
504static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
505{
506   for (int i = 0; i < constant->num; i++)
507      code[i] = util_float_to_half(constant->value[i].f);
508}
509
510typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
511
512static const ppir_codegen_instr_slot_encode_func
513ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
514   [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
515   [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
516   [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
517   [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
518   [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
519   [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
520   [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
521   [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
522   [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
523};
524
525static const int ppir_codegen_field_size[] = {
526   34, 62, 41, 43, 30, 44, 31, 30, 41, 73
527};
528
529static inline int align_to_word(int size)
530{
531   return ((size + 0x1f) >> 5);
532}
533
534static int get_instr_encode_size(ppir_instr *instr)
535{
536   int size = 0;
537
538   for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
539      if (instr->slots[i])
540         size += ppir_codegen_field_size[i];
541   }
542
543   for (int i = 0; i < 2; i++) {
544      if (instr->constant[i].num)
545         size += 64;
546   }
547
548   return align_to_word(size) + 1;
549}
550
551static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
552{
553   int off1 = dst_offset & 0x1f;
554   uint32_t *cpy_dst = dst, *cpy_src = src;
555
556   cpy_dst += (dst_offset >> 5);
557
558   if (off1) {
559      int off2 = 32 - off1;
560      int cpy_size = 0;
561      while (1) {
562         *cpy_dst |= *cpy_src << off1;
563         cpy_dst++;
564
565         cpy_size += off2;
566         if (cpy_size >= src_size)
567            break;
568
569         *cpy_dst |= *cpy_src >> off2;
570         cpy_src++;
571
572         cpy_size += off1;
573         if (cpy_size >= src_size)
574            break;
575      }
576   }
577   else
578      memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
579}
580
581static int encode_instr(ppir_instr *instr, void *code, void *last_code)
582{
583   int size = 0;
584   ppir_codegen_ctrl *ctrl = code;
585
586   for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
587      if (instr->slots[i]) {
588         /* max field size (73), align to dword */
589         uint8_t output[12] = {0};
590
591         ppir_codegen_encode_slot[i](instr->slots[i], output);
592         bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
593
594         size += ppir_codegen_field_size[i];
595         ctrl->fields |= 1 << i;
596      }
597   }
598
599   if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
600      ctrl->sync = true;
601
602   for (int i = 0; i < 2; i++) {
603      if (instr->constant[i].num) {
604         uint16_t output[4] = {0};
605
606         ppir_codegen_encode_const(instr->constant + i, output);
607         bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
608
609         size += 64;
610         ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
611      }
612   }
613
614   size = align_to_word(size) + 1;
615
616   ctrl->count = size;
617   if (instr->is_end)
618      ctrl->stop = true;
619
620   if (last_code) {
621      ppir_codegen_ctrl *last_ctrl = last_code;
622      last_ctrl->next_count = size;
623      last_ctrl->prefetch = true;
624   }
625
626   return size;
627}
628
629static void ppir_codegen_print_prog(ppir_compiler *comp)
630{
631   uint32_t *prog = comp->prog->shader;
632   unsigned offset = 0;
633
634   printf("========ppir codegen========\n");
635   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
636      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
637         printf("%03d: ", instr->index);
638         int n = prog[0] & 0x1f;
639         for (int i = 0; i < n; i++) {
640            if (i && i % 6 == 0)
641               printf("\n    ");
642            printf("%08x ", prog[i]);
643         }
644         printf("\n");
645         ppir_disassemble_instr(prog, offset);
646         prog += n;
647         offset += n;
648      }
649   }
650   printf("-----------------------\n");
651}
652
653bool ppir_codegen_prog(ppir_compiler *comp)
654{
655   int size = 0;
656   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
657      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
658         size += get_instr_encode_size(instr);
659      }
660   }
661
662   uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
663   if (!prog)
664      return false;
665
666   uint32_t *code = prog, *last_code = NULL;
667   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
668      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
669         int offset = encode_instr(instr, code, last_code);
670         last_code = code;
671         code += offset;
672      }
673   }
674
675   comp->prog->shader = prog;
676   comp->prog->shader_size = size * sizeof(uint32_t);
677
678   if (lima_debug & LIMA_DEBUG_PP)
679      ppir_codegen_print_prog(comp);
680
681   return true;
682}
683