1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include "util/ralloc.h"
26#include "util/half_float.h"
27#include "util/bitscan.h"
28
29#include "ppir.h"
30#include "codegen.h"
31#include "lima_context.h"
32
33static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34{
35   unsigned ret = 0;
36   for (int i = 0; i < 4; i++)
37      ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38   return ret;
39}
40
41static int get_scl_reg_index(ppir_src *src, int component)
42{
43   int ret = ppir_target_get_src_reg_index(src);
44   ret += src->swizzle[component];
45   return ret;
46}
47
48static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49{
50   ppir_codegen_field_varying *f = code;
51   ppir_load_node *load = ppir_node_to_load(node);
52   ppir_dest *dest = &load->dest;
53   int index = ppir_target_get_dest_reg_index(dest);
54   int num_components = load->num_components;
55
56   if (node->op != ppir_op_load_coords_reg) {
57      assert(node->op == ppir_op_load_varying ||
58             node->op == ppir_op_load_coords ||
59             node->op == ppir_op_load_fragcoord ||
60             node->op == ppir_op_load_pointcoord ||
61             node->op == ppir_op_load_frontface);
62
63      f->imm.dest = index >> 2;
64      f->imm.mask = dest->write_mask << (index & 0x3);
65
66      int alignment = num_components == 3 ? 3 : num_components - 1;
67      f->imm.alignment = alignment;
68
69      if (load->num_src) {
70         index = ppir_target_get_src_reg_index(&load->src);
71         f->imm.offset_vector = index >> 2;
72         f->imm.offset_scalar = index & 0x3;
73      } else
74         f->imm.offset_vector = 0xf;
75
76      if (alignment == 3)
77         f->imm.index = load->index >> 2;
78      else
79         f->imm.index = load->index >> alignment;
80
81      switch (node->op) {
82         case ppir_op_load_fragcoord:
83            f->imm.source_type = 2;
84            f->imm.perspective = 3;
85            break;
86         case ppir_op_load_pointcoord:
87            f->imm.source_type = 3;
88            break;
89         case ppir_op_load_frontface:
90            f->imm.source_type = 3;
91            f->imm.perspective = 1;
92            break;
93         case ppir_op_load_coords:
94            /* num_components == 3 implies cubemap as we don't support 3D textures */
95            f->imm.source_type = num_components == 3 ? 2 : 0;
96            break;
97         default:
98            break;
99      }
100   }
101   else {  /* node->op == ppir_op_load_coords_reg */
102      f->reg.dest = index >> 2;
103      f->reg.mask = dest->write_mask << (index & 0x3);
104
105      if (load->num_src) {
106         /* num_components == 3 implies cubemap as we don't support 3D textures */
107         if (num_components == 3) {
108            f->reg.source_type = 2;
109            f->reg.perspective = 1;
110         } else {
111            f->reg.source_type = 1;
112         }
113         ppir_src *src = &load->src;
114         index = ppir_target_get_src_reg_index(src);
115         f->reg.source = index >> 2;
116         f->reg.negate = src->negate;
117         f->reg.absolute = src->absolute;
118         f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
119      }
120   }
121}
122
123static void ppir_codegen_encode_texld(ppir_node *node, void *code)
124{
125   ppir_codegen_field_sampler *f = code;
126   ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
127
128   f->index = ldtex->sampler;
129
130   f->lod_bias_en = ldtex->lod_bias_en;
131   f->explicit_lod = ldtex->explicit_lod;
132   if (ldtex->lod_bias_en)
133      f->lod_bias = ppir_target_get_src_reg_index(&ldtex->src[1]);
134
135   switch (ldtex->sampler_dim) {
136   case GLSL_SAMPLER_DIM_2D:
137   case GLSL_SAMPLER_DIM_RECT:
138   case GLSL_SAMPLER_DIM_EXTERNAL:
139      f->type = ppir_codegen_sampler_type_2d;
140      break;
141   case GLSL_SAMPLER_DIM_CUBE:
142      f->type = ppir_codegen_sampler_type_cube;
143      break;
144   default:
145      break;
146   }
147
148   f->offset_en = 0;
149   f->unknown_2 = 0x39001;
150}
151
152static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
153{
154   ppir_codegen_field_uniform *f = code;
155   ppir_load_node *load = ppir_node_to_load(node);
156
157   switch (node->op) {
158      case ppir_op_load_uniform:
159         f->source = ppir_codegen_uniform_src_uniform;
160         break;
161      case ppir_op_load_temp:
162         f->source = ppir_codegen_uniform_src_temporary;
163         break;
164      default:
165         assert(0);
166   }
167
168   /* Uniforms are always aligned to vec4 boundary */
169   f->alignment = 2;
170   f->index = load->index;
171
172   if (load->num_src) {
173      f->offset_en = 1;
174      f->offset_reg = ppir_target_get_src_reg_index(&load->src);
175   }
176}
177
178static unsigned shift_to_op(int shift)
179{
180   assert(shift >= -3 && shift <= 3);
181   return shift < 0 ? shift + 8 : shift;
182}
183
184static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
185{
186   ppir_codegen_field_vec4_mul *f = code;
187   ppir_alu_node *alu = ppir_node_to_alu(node);
188
189   ppir_dest *dest = &alu->dest;
190   int dest_shift = 0;
191   if (dest->type != ppir_target_pipeline) {
192      int index = ppir_target_get_dest_reg_index(dest);
193      dest_shift = index & 0x3;
194      f->dest = index >> 2;
195      f->mask = dest->write_mask << dest_shift;
196   }
197   f->dest_modifier = dest->modifier;
198
199   switch (node->op) {
200   case ppir_op_mul:
201      f->op = shift_to_op(alu->shift);
202      break;
203   case ppir_op_mov:
204      f->op = ppir_codegen_vec4_mul_op_mov;
205      break;
206   case ppir_op_max:
207      f->op = ppir_codegen_vec4_mul_op_max;
208      break;
209   case ppir_op_min:
210      f->op = ppir_codegen_vec4_mul_op_min;
211      break;
212   case ppir_op_and:
213      f->op = ppir_codegen_vec4_mul_op_and;
214      break;
215   case ppir_op_or:
216      f->op = ppir_codegen_vec4_mul_op_or;
217      break;
218   case ppir_op_xor:
219      f->op = ppir_codegen_vec4_mul_op_xor;
220      break;
221   case ppir_op_gt:
222      f->op = ppir_codegen_vec4_mul_op_gt;
223      break;
224   case ppir_op_ge:
225      f->op = ppir_codegen_vec4_mul_op_ge;
226      break;
227   case ppir_op_eq:
228      f->op = ppir_codegen_vec4_mul_op_eq;
229      break;
230   case ppir_op_ne:
231      f->op = ppir_codegen_vec4_mul_op_ne;
232      break;
233   case ppir_op_not:
234      f->op = ppir_codegen_vec4_mul_op_not;
235      break;
236   default:
237      break;
238   }
239
240   ppir_src *src = alu->src;
241   int index = ppir_target_get_src_reg_index(src);
242   f->arg0_source = index >> 2;
243   f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
244   f->arg0_absolute = src->absolute;
245   f->arg0_negate = src->negate;
246
247   if (alu->num_src == 2) {
248      src = alu->src + 1;
249      index = ppir_target_get_src_reg_index(src);
250      f->arg1_source = index >> 2;
251      f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
252      f->arg1_absolute = src->absolute;
253      f->arg1_negate = src->negate;
254   }
255}
256
257static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
258{
259   ppir_codegen_field_float_mul *f = code;
260   ppir_alu_node *alu = ppir_node_to_alu(node);
261
262   ppir_dest *dest = &alu->dest;
263   int dest_component = ffs(dest->write_mask) - 1;
264   assert(dest_component >= 0);
265
266   if (dest->type != ppir_target_pipeline) {
267      f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
268      f->output_en = true;
269   }
270   f->dest_modifier = dest->modifier;
271
272   switch (node->op) {
273   case ppir_op_mul:
274      f->op = shift_to_op(alu->shift);
275      break;
276   case ppir_op_mov:
277      f->op = ppir_codegen_float_mul_op_mov;
278      break;
279   case ppir_op_max:
280      f->op = ppir_codegen_float_mul_op_max;
281      break;
282   case ppir_op_min:
283      f->op = ppir_codegen_float_mul_op_min;
284      break;
285   case ppir_op_and:
286      f->op = ppir_codegen_float_mul_op_and;
287      break;
288   case ppir_op_or:
289      f->op = ppir_codegen_float_mul_op_or;
290      break;
291   case ppir_op_xor:
292      f->op = ppir_codegen_float_mul_op_xor;
293      break;
294   case ppir_op_gt:
295      f->op = ppir_codegen_float_mul_op_gt;
296      break;
297   case ppir_op_ge:
298      f->op = ppir_codegen_float_mul_op_ge;
299      break;
300   case ppir_op_eq:
301      f->op = ppir_codegen_float_mul_op_eq;
302      break;
303   case ppir_op_ne:
304      f->op = ppir_codegen_float_mul_op_ne;
305      break;
306   case ppir_op_not:
307      f->op = ppir_codegen_float_mul_op_not;
308      break;
309   default:
310      break;
311   }
312
313   ppir_src *src = alu->src;
314   f->arg0_source = get_scl_reg_index(src, dest_component);
315   f->arg0_absolute = src->absolute;
316   f->arg0_negate = src->negate;
317
318   if (alu->num_src == 2) {
319      src = alu->src + 1;
320      f->arg1_source = get_scl_reg_index(src, dest_component);
321      f->arg1_absolute = src->absolute;
322      f->arg1_negate = src->negate;
323   }
324}
325
326static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
327{
328   ppir_codegen_field_vec4_acc *f = code;
329   ppir_alu_node *alu = ppir_node_to_alu(node);
330
331   ppir_dest *dest = &alu->dest;
332   int index = ppir_target_get_dest_reg_index(dest);
333   int dest_shift = index & 0x3;
334   f->dest = index >> 2;
335   f->mask = dest->write_mask << dest_shift;
336   f->dest_modifier = dest->modifier;
337
338   switch (node->op) {
339   case ppir_op_add:
340      f->op = ppir_codegen_vec4_acc_op_add;
341      break;
342   case ppir_op_mov:
343      f->op = ppir_codegen_vec4_acc_op_mov;
344      break;
345   case ppir_op_sum3:
346      f->op = ppir_codegen_vec4_acc_op_sum3;
347      dest_shift = 0;
348      break;
349   case ppir_op_sum4:
350      f->op = ppir_codegen_vec4_acc_op_sum4;
351      dest_shift = 0;
352      break;
353   case ppir_op_floor:
354      f->op = ppir_codegen_vec4_acc_op_floor;
355      break;
356   case ppir_op_ceil:
357      f->op = ppir_codegen_vec4_acc_op_ceil;
358      break;
359   case ppir_op_fract:
360      f->op = ppir_codegen_vec4_acc_op_fract;
361      break;
362   case ppir_op_gt:
363      f->op = ppir_codegen_vec4_acc_op_gt;
364      break;
365   case ppir_op_ge:
366      f->op = ppir_codegen_vec4_acc_op_ge;
367      break;
368   case ppir_op_eq:
369      f->op = ppir_codegen_vec4_acc_op_eq;
370      break;
371   case ppir_op_ne:
372      f->op = ppir_codegen_vec4_acc_op_ne;
373      break;
374   case ppir_op_select:
375      f->op = ppir_codegen_vec4_acc_op_sel;
376      break;
377   case ppir_op_max:
378      f->op = ppir_codegen_vec4_acc_op_max;
379      break;
380   case ppir_op_min:
381      f->op = ppir_codegen_vec4_acc_op_min;
382      break;
383   case ppir_op_ddx:
384      f->op = ppir_codegen_vec4_acc_op_dFdx;
385      break;
386   case ppir_op_ddy:
387      f->op = ppir_codegen_vec4_acc_op_dFdy;
388      break;
389   default:
390      break;
391   }
392
393   ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
394   index = ppir_target_get_src_reg_index(src);
395
396   if (src->type == ppir_target_pipeline &&
397       src->pipeline == ppir_pipeline_reg_vmul)
398      f->mul_in = true;
399   else
400      f->arg0_source = index >> 2;
401
402   f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
403   f->arg0_absolute = src->absolute;
404   f->arg0_negate = src->negate;
405
406   if (++src < alu->src + alu->num_src) {
407      index = ppir_target_get_src_reg_index(src);
408      f->arg1_source = index >> 2;
409      f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
410      f->arg1_absolute = src->absolute;
411      f->arg1_negate = src->negate;
412   }
413}
414
415static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
416{
417   ppir_codegen_field_float_acc *f = code;
418   ppir_alu_node *alu = ppir_node_to_alu(node);
419
420   ppir_dest *dest = &alu->dest;
421   int dest_component = ffs(dest->write_mask) - 1;
422   assert(dest_component >= 0);
423
424   f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
425   f->output_en = true;
426   f->dest_modifier = dest->modifier;
427
428   switch (node->op) {
429   case ppir_op_add:
430      f->op = shift_to_op(alu->shift);
431      break;
432   case ppir_op_mov:
433      f->op = ppir_codegen_float_acc_op_mov;
434      break;
435   case ppir_op_max:
436      f->op = ppir_codegen_float_acc_op_max;
437      break;
438   case ppir_op_min:
439      f->op = ppir_codegen_float_acc_op_min;
440      break;
441   case ppir_op_floor:
442      f->op = ppir_codegen_float_acc_op_floor;
443      break;
444   case ppir_op_ceil:
445      f->op = ppir_codegen_float_acc_op_ceil;
446      break;
447   case ppir_op_fract:
448      f->op = ppir_codegen_float_acc_op_fract;
449      break;
450   case ppir_op_gt:
451      f->op = ppir_codegen_float_acc_op_gt;
452      break;
453   case ppir_op_ge:
454      f->op = ppir_codegen_float_acc_op_ge;
455      break;
456   case ppir_op_eq:
457      f->op = ppir_codegen_float_acc_op_eq;
458      break;
459   case ppir_op_ne:
460      f->op = ppir_codegen_float_acc_op_ne;
461      break;
462   case ppir_op_select:
463      f->op = ppir_codegen_float_acc_op_sel;
464      break;
465   case ppir_op_ddx:
466      f->op = ppir_codegen_float_acc_op_dFdx;
467      break;
468   case ppir_op_ddy:
469      f->op = ppir_codegen_float_acc_op_dFdy;
470      break;
471   default:
472      break;
473   }
474
475   ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
476   if (src->type == ppir_target_pipeline &&
477       src->pipeline == ppir_pipeline_reg_fmul)
478      f->mul_in = true;
479   else
480      f->arg0_source = get_scl_reg_index(src, dest_component);
481   f->arg0_absolute = src->absolute;
482   f->arg0_negate = src->negate;
483
484   if (++src < alu->src + alu->num_src) {
485      f->arg1_source = get_scl_reg_index(src, dest_component);
486      f->arg1_absolute = src->absolute;
487      f->arg1_negate = src->negate;
488   }
489}
490
491static void ppir_codegen_encode_combine(ppir_node *node, void *code)
492{
493   ppir_codegen_field_combine *f = code;
494   ppir_alu_node *alu = ppir_node_to_alu(node);
495
496   switch (node->op) {
497   case ppir_op_rsqrt:
498   case ppir_op_log2:
499   case ppir_op_exp2:
500   case ppir_op_rcp:
501   case ppir_op_sqrt:
502   case ppir_op_sin:
503   case ppir_op_cos:
504   {
505      f->scalar.dest_vec = false;
506      f->scalar.arg1_en = false;
507
508      ppir_dest *dest = &alu->dest;
509      int dest_component = ffs(dest->write_mask) - 1;
510      assert(dest_component >= 0);
511      f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
512      f->scalar.dest_modifier = dest->modifier;
513
514      ppir_src *src = alu->src;
515      f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
516      f->scalar.arg0_absolute = src->absolute;
517      f->scalar.arg0_negate = src->negate;
518
519      switch (node->op) {
520      case ppir_op_rsqrt:
521         f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
522         break;
523      case ppir_op_log2:
524         f->scalar.op = ppir_codegen_combine_scalar_op_log2;
525         break;
526      case ppir_op_exp2:
527         f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
528         break;
529      case ppir_op_rcp:
530         f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
531         break;
532      case ppir_op_sqrt:
533         f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
534         break;
535      case ppir_op_sin:
536         f->scalar.op = ppir_codegen_combine_scalar_op_sin;
537         break;
538      case ppir_op_cos:
539         f->scalar.op = ppir_codegen_combine_scalar_op_cos;
540         break;
541      default:
542         break;
543      }
544      break;
545   }
546   default:
547      break;
548   }
549}
550
551static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
552{
553   assert(node->op == ppir_op_store_temp);
554
555   ppir_codegen_field_temp_write *f = code;
556   ppir_store_node *snode = ppir_node_to_store(node);
557   int num_components = snode->num_components;
558
559   f->temp_write.dest = 0x03; // 11 - temporary
560   f->temp_write.source = snode->src.reg->index;
561
562   int alignment = num_components == 4 ? 2 : num_components - 1;
563   f->temp_write.alignment = alignment;
564   f->temp_write.index = snode->index << (2 - alignment);
565
566   f->temp_write.offset_reg = snode->index >> 2;
567}
568
569static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
570{
571   for (int i = 0; i < constant->num; i++)
572      code[i] = _mesa_float_to_half(constant->value[i].f);
573}
574
575static void ppir_codegen_encode_discard(ppir_node *node, void *code)
576{
577   ppir_codegen_field_branch *b = code;
578   assert(node->op == ppir_op_discard);
579
580   b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
581   b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
582   b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
583}
584
585static void ppir_codegen_encode_branch(ppir_node *node, void *code)
586{
587   ppir_codegen_field_branch *b = code;
588   ppir_branch_node *branch;
589   ppir_instr *target_instr;
590   ppir_block *target;
591   if (node->op == ppir_op_discard) {
592      ppir_codegen_encode_discard(node, code);
593      return;
594   }
595
596   assert(node->op == ppir_op_branch);
597   branch = ppir_node_to_branch(node);
598
599   b->branch.unknown_0 = 0x0;
600   b->branch.unknown_1 = 0x0;
601
602   if (branch->num_src == 2) {
603      b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
604      b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
605      b->branch.cond_gt = branch->cond_gt;
606      b->branch.cond_eq = branch->cond_eq;
607      b->branch.cond_lt = branch->cond_lt;
608   } else if (branch->num_src == 0) {
609      /* Unconditional branch */
610      b->branch.arg0_source = 0;
611      b->branch.arg1_source = 0;
612      b->branch.cond_gt = true;
613      b->branch.cond_eq = true;
614      b->branch.cond_lt = true;
615   } else {
616      assert(false);
617   }
618
619   target = branch->target;
620   while (list_is_empty(&target->instr_list)) {
621      if (!target->list.next)
622         break;
623      target = LIST_ENTRY(ppir_block, target->list.next, list);
624   }
625
626   assert(!list_is_empty(&target->instr_list));
627
628   target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
629   b->branch.target = target_instr->offset - node->instr->offset;
630   b->branch.next_count = target_instr->encode_size;
631}
632
633typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
634
635static const ppir_codegen_instr_slot_encode_func
636ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
637   [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
638   [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
639   [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
640   [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
641   [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
642   [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
643   [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
644   [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
645   [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
646   [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
647};
648
649static const int ppir_codegen_field_size[] = {
650   34, 62, 41, 43, 30, 44, 31, 30, 41, 73
651};
652
653static inline int align_to_word(int size)
654{
655   return ((size + 0x1f) >> 5);
656}
657
658static int get_instr_encode_size(ppir_instr *instr)
659{
660   int size = 0;
661
662   for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
663      if (instr->slots[i])
664         size += ppir_codegen_field_size[i];
665   }
666
667   for (int i = 0; i < 2; i++) {
668      if (instr->constant[i].num)
669         size += 64;
670   }
671
672   return align_to_word(size) + 1;
673}
674
675static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
676{
677   int off1 = dst_offset & 0x1f;
678   uint32_t *cpy_dst = dst, *cpy_src = src;
679
680   cpy_dst += (dst_offset >> 5);
681
682   if (off1) {
683      int off2 = 32 - off1;
684      int cpy_size = 0;
685      while (1) {
686         *cpy_dst |= *cpy_src << off1;
687         cpy_dst++;
688
689         cpy_size += off2;
690         if (cpy_size >= src_size)
691            break;
692
693         *cpy_dst |= *cpy_src >> off2;
694         cpy_src++;
695
696         cpy_size += off1;
697         if (cpy_size >= src_size)
698            break;
699      }
700   }
701   else
702      memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
703}
704
705static int encode_instr(ppir_instr *instr, void *code, void *last_code)
706{
707   int size = 0;
708   ppir_codegen_ctrl *ctrl = code;
709
710   for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
711      if (instr->slots[i]) {
712         /* max field size (73), align to dword */
713         uint8_t output[12] = {0};
714
715         ppir_codegen_encode_slot[i](instr->slots[i], output);
716         bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
717
718         size += ppir_codegen_field_size[i];
719         ctrl->fields |= 1 << i;
720      }
721   }
722
723   if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
724      ctrl->sync = true;
725
726   if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
727      ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
728      if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
729         ctrl->sync = true;
730   }
731
732   if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
733      ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
734      if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
735         ctrl->sync = true;
736   }
737
738   for (int i = 0; i < 2; i++) {
739      if (instr->constant[i].num) {
740         uint16_t output[4] = {0};
741
742         ppir_codegen_encode_const(instr->constant + i, output);
743         bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
744
745         size += 64;
746         ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
747      }
748   }
749
750   size = align_to_word(size) + 1;
751
752   ctrl->count = size;
753   if (instr->is_end)
754      ctrl->stop = true;
755
756   if (last_code) {
757      ppir_codegen_ctrl *last_ctrl = last_code;
758      last_ctrl->next_count = size;
759      last_ctrl->prefetch = true;
760   }
761
762   return size;
763}
764
765static void ppir_codegen_print_prog(ppir_compiler *comp)
766{
767   uint32_t *prog = comp->prog->shader;
768   unsigned offset = 0;
769
770   printf("========ppir codegen========\n");
771   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
772      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
773         printf("%03d (@%6d): ", instr->index, instr->offset);
774         int n = prog[0] & 0x1f;
775         for (int i = 0; i < n; i++) {
776            if (i && i % 6 == 0)
777               printf("\n    ");
778            printf("%08x ", prog[i]);
779         }
780         printf("\n");
781         ppir_disassemble_instr(prog, offset, stdout);
782         prog += n;
783         offset += n;
784      }
785   }
786   printf("-----------------------\n");
787}
788
789bool ppir_codegen_prog(ppir_compiler *comp)
790{
791   int size = 0;
792   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
793      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
794         instr->offset = size;
795         instr->encode_size = get_instr_encode_size(instr);
796         size += instr->encode_size;
797      }
798   }
799
800   uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
801   if (!prog)
802      return false;
803
804   uint32_t *code = prog, *last_code = NULL;
805   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
806      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
807         int offset = encode_instr(instr, code, last_code);
808         last_code = code;
809         code += offset;
810      }
811   }
812
813   if (comp->prog->shader)
814      ralloc_free(comp->prog->shader);
815
816   comp->prog->shader = prog;
817   comp->prog->state.shader_size = size * sizeof(uint32_t);
818
819   if (lima_debug & LIMA_DEBUG_PP)
820      ppir_codegen_print_prog(comp);
821
822   return true;
823}
824