1/*
2 * Copyright © 2016-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "broadcom/common/v3d_device_info.h"
25#include "v3d_compiler.h"
26
27int
28vir_get_nsrc(struct qinst *inst)
29{
30        switch (inst->qpu.type) {
31        case V3D_QPU_INSTR_TYPE_BRANCH:
32                return 0;
33        case V3D_QPU_INSTR_TYPE_ALU:
34                if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
35                        return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
36                else
37                        return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
38        }
39
40        return 0;
41}
42
43/**
44 * Returns whether the instruction has any side effects that must be
45 * preserved.
46 */
47bool
48vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
49{
50        switch (inst->qpu.type) {
51        case V3D_QPU_INSTR_TYPE_BRANCH:
52                return true;
53        case V3D_QPU_INSTR_TYPE_ALU:
54                switch (inst->qpu.alu.add.op) {
55                case V3D_QPU_A_SETREVF:
56                case V3D_QPU_A_SETMSF:
57                case V3D_QPU_A_VPMSETUP:
58                case V3D_QPU_A_STVPMV:
59                case V3D_QPU_A_STVPMD:
60                case V3D_QPU_A_STVPMP:
61                case V3D_QPU_A_VPMWT:
62                case V3D_QPU_A_TMUWT:
63                        return true;
64                default:
65                        break;
66                }
67
68                switch (inst->qpu.alu.mul.op) {
69                case V3D_QPU_M_MULTOP:
70                        return true;
71                default:
72                        break;
73                }
74        }
75
76        if (inst->qpu.sig.ldtmu ||
77            inst->qpu.sig.ldvary ||
78            inst->qpu.sig.wrtmuc ||
79            inst->qpu.sig.thrsw) {
80                return true;
81        }
82
83        return false;
84}
85
86bool
87vir_is_raw_mov(struct qinst *inst)
88{
89        if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
90            (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
91             inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
92                return false;
93        }
94
95        if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
96            inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
97                return false;
98        }
99
100        if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
101            inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE ||
102            inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
103            inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) {
104                return false;
105        }
106
107        if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
108            inst->qpu.flags.mc != V3D_QPU_COND_NONE)
109                return false;
110
111        return true;
112}
113
114bool
115vir_is_add(struct qinst *inst)
116{
117        return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
118                inst->qpu.alu.add.op != V3D_QPU_A_NOP);
119}
120
121bool
122vir_is_mul(struct qinst *inst)
123{
124        return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
125                inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
126}
127
128bool
129vir_is_tex(struct qinst *inst)
130{
131        if (inst->dst.file == QFILE_MAGIC)
132                return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
133
134        if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
135            inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
136                return true;
137        }
138
139        return false;
140}
141
142bool
143vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
144{
145        for (int i = 0; i < vir_get_nsrc(inst); i++) {
146                switch (inst->src[i].file) {
147                case QFILE_VPM:
148                        return true;
149                default:
150                        break;
151                }
152        }
153
154        if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
155                                  inst->qpu.sig.ldtlb ||
156                                  inst->qpu.sig.ldtlbu ||
157                                  inst->qpu.sig.ldvpm)) {
158                return true;
159        }
160
161        return false;
162}
163
164bool
165vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
166{
167        switch (inst->dst.file) {
168        case QFILE_MAGIC:
169                switch (inst->dst.index) {
170                case V3D_QPU_WADDR_RECIP:
171                case V3D_QPU_WADDR_RSQRT:
172                case V3D_QPU_WADDR_EXP:
173                case V3D_QPU_WADDR_LOG:
174                case V3D_QPU_WADDR_SIN:
175                        return true;
176                }
177                break;
178        default:
179                break;
180        }
181
182        if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
183                return true;
184
185        return false;
186}
187
188void
189vir_set_unpack(struct qinst *inst, int src,
190               enum v3d_qpu_input_unpack unpack)
191{
192        assert(src == 0 || src == 1);
193
194        if (vir_is_add(inst)) {
195                if (src == 0)
196                        inst->qpu.alu.add.a_unpack = unpack;
197                else
198                        inst->qpu.alu.add.b_unpack = unpack;
199        } else {
200                assert(vir_is_mul(inst));
201                if (src == 0)
202                        inst->qpu.alu.mul.a_unpack = unpack;
203                else
204                        inst->qpu.alu.mul.b_unpack = unpack;
205        }
206}
207
208void
209vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
210{
211        if (vir_is_add(inst)) {
212                inst->qpu.flags.ac = cond;
213        } else {
214                assert(vir_is_mul(inst));
215                inst->qpu.flags.mc = cond;
216        }
217}
218
219void
220vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
221{
222        if (vir_is_add(inst)) {
223                inst->qpu.flags.apf = pf;
224        } else {
225                assert(vir_is_mul(inst));
226                inst->qpu.flags.mpf = pf;
227        }
228}
229
230void
231vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf)
232{
233        if (vir_is_add(inst)) {
234                inst->qpu.flags.auf = uf;
235        } else {
236                assert(vir_is_mul(inst));
237                inst->qpu.flags.muf = uf;
238        }
239}
240
241#if 0
242uint8_t
243vir_channels_written(struct qinst *inst)
244{
245        if (vir_is_mul(inst)) {
246                switch (inst->dst.pack) {
247                case QPU_PACK_MUL_NOP:
248                case QPU_PACK_MUL_8888:
249                        return 0xf;
250                case QPU_PACK_MUL_8A:
251                        return 0x1;
252                case QPU_PACK_MUL_8B:
253                        return 0x2;
254                case QPU_PACK_MUL_8C:
255                        return 0x4;
256                case QPU_PACK_MUL_8D:
257                        return 0x8;
258                }
259        } else {
260                switch (inst->dst.pack) {
261                case QPU_PACK_A_NOP:
262                case QPU_PACK_A_8888:
263                case QPU_PACK_A_8888_SAT:
264                case QPU_PACK_A_32_SAT:
265                        return 0xf;
266                case QPU_PACK_A_8A:
267                case QPU_PACK_A_8A_SAT:
268                        return 0x1;
269                case QPU_PACK_A_8B:
270                case QPU_PACK_A_8B_SAT:
271                        return 0x2;
272                case QPU_PACK_A_8C:
273                case QPU_PACK_A_8C_SAT:
274                        return 0x4;
275                case QPU_PACK_A_8D:
276                case QPU_PACK_A_8D_SAT:
277                        return 0x8;
278                case QPU_PACK_A_16A:
279                case QPU_PACK_A_16A_SAT:
280                        return 0x3;
281                case QPU_PACK_A_16B:
282                case QPU_PACK_A_16B_SAT:
283                        return 0xc;
284                }
285        }
286        unreachable("Bad pack field");
287}
288#endif
289
290struct qreg
291vir_get_temp(struct v3d_compile *c)
292{
293        struct qreg reg;
294
295        reg.file = QFILE_TEMP;
296        reg.index = c->num_temps++;
297
298        if (c->num_temps > c->defs_array_size) {
299                uint32_t old_size = c->defs_array_size;
300                c->defs_array_size = MAX2(old_size * 2, 16);
301
302                c->defs = reralloc(c, c->defs, struct qinst *,
303                                   c->defs_array_size);
304                memset(&c->defs[old_size], 0,
305                       sizeof(c->defs[0]) * (c->defs_array_size - old_size));
306
307                c->spillable = reralloc(c, c->spillable,
308                                        BITSET_WORD,
309                                        BITSET_WORDS(c->defs_array_size));
310                for (int i = old_size; i < c->defs_array_size; i++)
311                        BITSET_SET(c->spillable, i);
312        }
313
314        return reg;
315}
316
317struct qinst *
318vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
319{
320        struct qinst *inst = calloc(1, sizeof(*inst));
321
322        inst->qpu = v3d_qpu_nop();
323        inst->qpu.alu.add.op = op;
324
325        inst->dst = dst;
326        inst->src[0] = src0;
327        inst->src[1] = src1;
328        inst->uniform = ~0;
329
330        return inst;
331}
332
333struct qinst *
334vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
335{
336        struct qinst *inst = calloc(1, sizeof(*inst));
337
338        inst->qpu = v3d_qpu_nop();
339        inst->qpu.alu.mul.op = op;
340
341        inst->dst = dst;
342        inst->src[0] = src0;
343        inst->src[1] = src1;
344        inst->uniform = ~0;
345
346        return inst;
347}
348
349struct qinst *
350vir_branch_inst(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
351{
352        struct qinst *inst = calloc(1, sizeof(*inst));
353
354        inst->qpu = v3d_qpu_nop();
355        inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
356        inst->qpu.branch.cond = cond;
357        inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
358        inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
359        inst->qpu.branch.ub = true;
360        inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
361
362        inst->dst = vir_nop_reg();
363        inst->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT, 0);
364
365        return inst;
366}
367
368static void
369vir_emit(struct v3d_compile *c, struct qinst *inst)
370{
371        switch (c->cursor.mode) {
372        case vir_cursor_add:
373                list_add(&inst->link, c->cursor.link);
374                break;
375        case vir_cursor_addtail:
376                list_addtail(&inst->link, c->cursor.link);
377                break;
378        }
379
380        c->cursor = vir_after_inst(inst);
381        c->live_intervals_valid = false;
382}
383
384/* Updates inst to write to a new temporary, emits it, and notes the def. */
385struct qreg
386vir_emit_def(struct v3d_compile *c, struct qinst *inst)
387{
388        assert(inst->dst.file == QFILE_NULL);
389
390        /* If we're emitting an instruction that's a def, it had better be
391         * writing a register.
392         */
393        if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
394                assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
395                       v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
396                assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
397                       v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
398        }
399
400        inst->dst = vir_get_temp(c);
401
402        if (inst->dst.file == QFILE_TEMP)
403                c->defs[inst->dst.index] = inst;
404
405        vir_emit(c, inst);
406
407        return inst->dst;
408}
409
410struct qinst *
411vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
412{
413        if (inst->dst.file == QFILE_TEMP)
414                c->defs[inst->dst.index] = NULL;
415
416        vir_emit(c, inst);
417
418        return inst;
419}
420
421struct qblock *
422vir_new_block(struct v3d_compile *c)
423{
424        struct qblock *block = rzalloc(c, struct qblock);
425
426        list_inithead(&block->instructions);
427
428        block->predecessors = _mesa_set_create(block,
429                                               _mesa_hash_pointer,
430                                               _mesa_key_pointer_equal);
431
432        block->index = c->next_block_index++;
433
434        return block;
435}
436
437void
438vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
439{
440        c->cur_block = block;
441        c->cursor = vir_after_block(block);
442        list_addtail(&block->link, &c->blocks);
443}
444
445struct qblock *
446vir_entry_block(struct v3d_compile *c)
447{
448        return list_first_entry(&c->blocks, struct qblock, link);
449}
450
451struct qblock *
452vir_exit_block(struct v3d_compile *c)
453{
454        return list_last_entry(&c->blocks, struct qblock, link);
455}
456
457void
458vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
459{
460        _mesa_set_add(successor->predecessors, predecessor);
461        if (predecessor->successors[0]) {
462                assert(!predecessor->successors[1]);
463                predecessor->successors[1] = successor;
464        } else {
465                predecessor->successors[0] = successor;
466        }
467}
468
469const struct v3d_compiler *
470v3d_compiler_init(const struct v3d_device_info *devinfo)
471{
472        struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
473        if (!compiler)
474                return NULL;
475
476        compiler->devinfo = devinfo;
477
478        if (!vir_init_reg_sets(compiler)) {
479                ralloc_free(compiler);
480                return NULL;
481        }
482
483        return compiler;
484}
485
486void
487v3d_compiler_free(const struct v3d_compiler *compiler)
488{
489        ralloc_free((void *)compiler);
490}
491
492static struct v3d_compile *
493vir_compile_init(const struct v3d_compiler *compiler,
494                 struct v3d_key *key,
495                 nir_shader *s,
496                 void (*debug_output)(const char *msg,
497                                      void *debug_output_data),
498                 void *debug_output_data,
499                 int program_id, int variant_id)
500{
501        struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
502
503        c->compiler = compiler;
504        c->devinfo = compiler->devinfo;
505        c->key = key;
506        c->program_id = program_id;
507        c->variant_id = variant_id;
508        c->threads = 4;
509        c->debug_output = debug_output;
510        c->debug_output_data = debug_output_data;
511
512        s = nir_shader_clone(c, s);
513        c->s = s;
514
515        list_inithead(&c->blocks);
516        vir_set_emit_block(c, vir_new_block(c));
517
518        c->output_position_index = -1;
519        c->output_sample_mask_index = -1;
520
521        c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
522                                            _mesa_key_pointer_equal);
523
524        return c;
525}
526
527static int
528type_size_vec4(const struct glsl_type *type, bool bindless)
529{
530        return glsl_count_attribute_slots(type, false);
531}
532
533static void
534v3d_lower_nir(struct v3d_compile *c)
535{
536        struct nir_lower_tex_options tex_options = {
537                .lower_txd = true,
538                .lower_tg4_broadcom_swizzle = true,
539
540                .lower_rect = false, /* XXX: Use this on V3D 3.x */
541                .lower_txp = ~0,
542                /* Apply swizzles to all samplers. */
543                .swizzle_result = ~0,
544        };
545
546        /* Lower the format swizzle and (for 32-bit returns)
547         * ARB_texture_swizzle-style swizzle.
548         */
549        for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
550                for (int j = 0; j < 4; j++)
551                        tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
552
553                if (c->key->tex[i].clamp_s)
554                        tex_options.saturate_s |= 1 << i;
555                if (c->key->tex[i].clamp_t)
556                        tex_options.saturate_t |= 1 << i;
557                if (c->key->tex[i].clamp_r)
558                        tex_options.saturate_r |= 1 << i;
559                if (c->key->tex[i].return_size == 16) {
560                        tex_options.lower_tex_packing[i] =
561                                nir_lower_tex_packing_16;
562                }
563        }
564
565        /* CS textures may not have return_size reflecting the shadow state. */
566        nir_foreach_variable(var, &c->s->uniforms) {
567                const struct glsl_type *type = glsl_without_array(var->type);
568                unsigned array_len = MAX2(glsl_get_length(var->type), 1);
569
570                if (!glsl_type_is_sampler(type) ||
571                    !glsl_sampler_type_is_shadow(type))
572                        continue;
573
574                for (int i = 0; i < array_len; i++) {
575                        tex_options.lower_tex_packing[var->data.binding + i] =
576                                nir_lower_tex_packing_16;
577                }
578        }
579
580        NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
581        NIR_PASS_V(c->s, nir_lower_system_values);
582
583        NIR_PASS_V(c->s, nir_lower_vars_to_scratch,
584                   nir_var_function_temp,
585                   0,
586                   glsl_get_natural_size_align_bytes);
587        NIR_PASS_V(c->s, v3d_nir_lower_scratch);
588}
589
590static void
591v3d_set_prog_data_uniforms(struct v3d_compile *c,
592                           struct v3d_prog_data *prog_data)
593{
594        int count = c->num_uniforms;
595        struct v3d_uniform_list *ulist = &prog_data->uniforms;
596
597        ulist->count = count;
598        ulist->data = ralloc_array(prog_data, uint32_t, count);
599        memcpy(ulist->data, c->uniform_data,
600               count * sizeof(*ulist->data));
601        ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
602        memcpy(ulist->contents, c->uniform_contents,
603               count * sizeof(*ulist->contents));
604}
605
606static void
607v3d_vs_set_prog_data(struct v3d_compile *c,
608                     struct v3d_vs_prog_data *prog_data)
609{
610        /* The vertex data gets format converted by the VPM so that
611         * each attribute channel takes up a VPM column.  Precompute
612         * the sizes for the shader record.
613         */
614        for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
615                prog_data->vattr_sizes[i] = c->vattr_sizes[i];
616                prog_data->vpm_input_size += c->vattr_sizes[i];
617        }
618
619        prog_data->uses_vid = (c->s->info.system_values_read &
620                               (1ull << SYSTEM_VALUE_VERTEX_ID));
621        prog_data->uses_iid = (c->s->info.system_values_read &
622                               (1ull << SYSTEM_VALUE_INSTANCE_ID));
623
624        if (prog_data->uses_vid)
625                prog_data->vpm_input_size++;
626        if (prog_data->uses_iid)
627                prog_data->vpm_input_size++;
628
629        /* Input/output segment size are in sectors (8 rows of 32 bits per
630         * channel).
631         */
632        prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
633        prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8;
634
635        /* Set us up for shared input/output segments.  This is apparently
636         * necessary for our VCM setup to avoid varying corruption.
637         */
638        prog_data->separate_segments = false;
639        prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
640                                          prog_data->vpm_input_size);
641        prog_data->vpm_input_size = 0;
642
643        /* Compute VCM cache size.  We set up our program to take up less than
644         * half of the VPM, so that any set of bin and render programs won't
645         * run out of space.  We need space for at least one input segment,
646         * and then allocate the rest to output segments (one for the current
647         * program, the rest to VCM).  The valid range of the VCM cache size
648         * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
649         * batches.
650         */
651        assert(c->devinfo->vpm_size);
652        int sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8;
653        int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
654        int half_vpm = vpm_size_in_sectors / 2;
655        int vpm_output_sectors = half_vpm - prog_data->vpm_input_size;
656        int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size;
657        assert(vpm_output_batches >= 2);
658        prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
659}
660
661static void
662v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
663                            struct v3d_fs_prog_data *prog_data)
664{
665        prog_data->num_inputs = c->num_inputs;
666        memcpy(prog_data->input_slots, c->input_slots,
667               c->num_inputs * sizeof(*c->input_slots));
668
669        STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
670                      (V3D_MAX_FS_INPUTS - 1) / 24);
671        for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
672                if (BITSET_TEST(c->flat_shade_flags, i))
673                        prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
674
675                if (BITSET_TEST(c->noperspective_flags, i))
676                        prog_data->noperspective_flags[i / 24] |= 1 << (i % 24);
677
678                if (BITSET_TEST(c->centroid_flags, i))
679                        prog_data->centroid_flags[i / 24] |= 1 << (i % 24);
680        }
681}
682
683static void
684v3d_fs_set_prog_data(struct v3d_compile *c,
685                     struct v3d_fs_prog_data *prog_data)
686{
687        v3d_set_fs_prog_data_inputs(c, prog_data);
688        prog_data->writes_z = c->writes_z;
689        prog_data->disable_ez = !c->s->info.fs.early_fragment_tests;
690        prog_data->uses_center_w = c->uses_center_w;
691}
692
693static void
694v3d_cs_set_prog_data(struct v3d_compile *c,
695                     struct v3d_compute_prog_data *prog_data)
696{
697        prog_data->shared_size = c->s->info.cs.shared_size;
698}
699
700static void
701v3d_set_prog_data(struct v3d_compile *c,
702                  struct v3d_prog_data *prog_data)
703{
704        prog_data->threads = c->threads;
705        prog_data->single_seg = !c->last_thrsw;
706        prog_data->spill_size = c->spill_size;
707
708        v3d_set_prog_data_uniforms(c, prog_data);
709
710        if (c->s->info.stage == MESA_SHADER_COMPUTE) {
711                v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data);
712        } else if (c->s->info.stage == MESA_SHADER_VERTEX) {
713                v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
714        } else {
715                assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
716                v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data);
717        }
718}
719
720static uint64_t *
721v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
722{
723        *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
724
725        uint64_t *qpu_insts = malloc(*final_assembly_size);
726        if (!qpu_insts)
727                return NULL;
728
729        memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
730
731        vir_compile_destroy(c);
732
733        return qpu_insts;
734}
735
736static void
737v3d_nir_lower_vs_early(struct v3d_compile *c)
738{
739        /* Split our I/O vars and dead code eliminate the unused
740         * components.
741         */
742        NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
743                   nir_var_shader_in | nir_var_shader_out);
744        uint64_t used_outputs[4] = {0};
745        for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
746                int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]);
747                int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]);
748                used_outputs[comp] |= 1ull << slot;
749        }
750        NIR_PASS_V(c->s, nir_remove_unused_io_vars,
751                   &c->s->outputs, used_outputs, NULL); /* demotes to globals */
752        NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
753        v3d_optimize_nir(c->s);
754        NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in);
755        NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
756                   type_size_vec4,
757                   (nir_lower_io_options)0);
758}
759
760static void
761v3d_fixup_fs_output_types(struct v3d_compile *c)
762{
763        nir_foreach_variable(var, &c->s->outputs) {
764                uint32_t mask = 0;
765
766                switch (var->data.location) {
767                case FRAG_RESULT_COLOR:
768                        mask = ~0;
769                        break;
770                case FRAG_RESULT_DATA0:
771                case FRAG_RESULT_DATA1:
772                case FRAG_RESULT_DATA2:
773                case FRAG_RESULT_DATA3:
774                        mask = 1 << (var->data.location - FRAG_RESULT_DATA0);
775                        break;
776                }
777
778                if (c->fs_key->int_color_rb & mask) {
779                        var->type =
780                                glsl_vector_type(GLSL_TYPE_INT,
781                                                 glsl_get_components(var->type));
782                } else if (c->fs_key->uint_color_rb & mask) {
783                        var->type =
784                                glsl_vector_type(GLSL_TYPE_UINT,
785                                                 glsl_get_components(var->type));
786                }
787        }
788}
789
790static void
791v3d_nir_lower_fs_early(struct v3d_compile *c)
792{
793        if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
794                v3d_fixup_fs_output_types(c);
795
796        /* If the shader has no non-TLB side effects, we can promote it to
797         * enabling early_fragment_tests even if the user didn't.
798         */
799        if (!(c->s->info.num_images ||
800              c->s->info.num_ssbos ||
801              c->s->info.num_abos)) {
802                c->s->info.fs.early_fragment_tests = true;
803        }
804}
805
806static void
807v3d_nir_lower_vs_late(struct v3d_compile *c)
808{
809        if (c->vs_key->clamp_color)
810                NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
811
812        if (c->key->ucp_enables) {
813                NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables,
814                           false);
815                NIR_PASS_V(c->s, nir_lower_io_to_scalar,
816                           nir_var_shader_out);
817        }
818
819        /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
820        NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
821}
822
823static void
824v3d_nir_lower_fs_late(struct v3d_compile *c)
825{
826        if (c->fs_key->light_twoside)
827                NIR_PASS_V(c->s, nir_lower_two_sided_color);
828
829        if (c->fs_key->clamp_color)
830                NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
831
832        if (c->fs_key->alpha_test) {
833                NIR_PASS_V(c->s, nir_lower_alpha_test,
834                           c->fs_key->alpha_test_func,
835                           false);
836        }
837
838        if (c->key->ucp_enables)
839                NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
840
841        /* Note: FS input scalarizing must happen after
842         * nir_lower_two_sided_color, which only handles a vec4 at a time.
843         */
844        NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
845}
846
847static uint32_t
848vir_get_max_temps(struct v3d_compile *c)
849{
850        int max_ip = 0;
851        vir_for_each_inst_inorder(inst, c)
852                max_ip++;
853
854        uint32_t *pressure = rzalloc_array(NULL, uint32_t, max_ip);
855
856        for (int t = 0; t < c->num_temps; t++) {
857                for (int i = c->temp_start[t]; (i < c->temp_end[t] &&
858                                                i < max_ip); i++) {
859                        if (i > max_ip)
860                                break;
861                        pressure[i]++;
862                }
863        }
864
865        uint32_t max_temps = 0;
866        for (int i = 0; i < max_ip; i++)
867                max_temps = MAX2(max_temps, pressure[i]);
868
869        ralloc_free(pressure);
870
871        return max_temps;
872}
873
874uint64_t *v3d_compile(const struct v3d_compiler *compiler,
875                      struct v3d_key *key,
876                      struct v3d_prog_data **out_prog_data,
877                      nir_shader *s,
878                      void (*debug_output)(const char *msg,
879                                           void *debug_output_data),
880                      void *debug_output_data,
881                      int program_id, int variant_id,
882                      uint32_t *final_assembly_size)
883{
884        struct v3d_prog_data *prog_data;
885        struct v3d_compile *c = vir_compile_init(compiler, key, s,
886                                                 debug_output, debug_output_data,
887                                                 program_id, variant_id);
888
889        switch (c->s->info.stage) {
890        case MESA_SHADER_VERTEX:
891                c->vs_key = (struct v3d_vs_key *)key;
892                prog_data = rzalloc_size(NULL, sizeof(struct v3d_vs_prog_data));
893                break;
894        case MESA_SHADER_FRAGMENT:
895                c->fs_key = (struct v3d_fs_key *)key;
896                prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data));
897                break;
898        case MESA_SHADER_COMPUTE:
899                prog_data = rzalloc_size(NULL,
900                                         sizeof(struct v3d_compute_prog_data));
901                break;
902        default:
903                unreachable("unsupported shader stage");
904        }
905
906        if (c->s->info.stage == MESA_SHADER_VERTEX) {
907                v3d_nir_lower_vs_early(c);
908        } else if (c->s->info.stage != MESA_SHADER_COMPUTE) {
909                assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
910                v3d_nir_lower_fs_early(c);
911        }
912
913        v3d_lower_nir(c);
914
915        if (c->s->info.stage == MESA_SHADER_VERTEX) {
916                v3d_nir_lower_vs_late(c);
917        } else if (c->s->info.stage != MESA_SHADER_COMPUTE)  {
918                assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
919                v3d_nir_lower_fs_late(c);
920        }
921
922        NIR_PASS_V(c->s, v3d_nir_lower_io, c);
923        NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
924        NIR_PASS_V(c->s, v3d_nir_lower_image_load_store);
925        NIR_PASS_V(c->s, nir_lower_idiv);
926
927        v3d_optimize_nir(c->s);
928        NIR_PASS_V(c->s, nir_lower_bool_to_int32);
929        NIR_PASS_V(c->s, nir_convert_from_ssa, true);
930
931        v3d_nir_to_vir(c);
932
933        v3d_set_prog_data(c, prog_data);
934
935        *out_prog_data = prog_data;
936
937        char *shaderdb;
938        int ret = asprintf(&shaderdb,
939                           "%s shader: %d inst, %d threads, %d loops, "
940                           "%d uniforms, %d max-temps, %d:%d spills:fills",
941                           vir_get_stage_name(c),
942                           c->qpu_inst_count,
943                           c->threads,
944                           c->loops,
945                           c->num_uniforms,
946                           vir_get_max_temps(c),
947                           c->spills,
948                           c->fills);
949        if (ret >= 0) {
950                if (V3D_DEBUG & V3D_DEBUG_SHADERDB)
951                        fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
952
953                c->debug_output(shaderdb, c->debug_output_data);
954                free(shaderdb);
955        }
956
957       return v3d_return_qpu_insts(c, final_assembly_size);
958}
959
960void
961vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
962{
963        if (qinst->dst.file == QFILE_TEMP)
964                c->defs[qinst->dst.index] = NULL;
965
966        assert(&qinst->link != c->cursor.link);
967
968        list_del(&qinst->link);
969        free(qinst);
970
971        c->live_intervals_valid = false;
972}
973
974struct qreg
975vir_follow_movs(struct v3d_compile *c, struct qreg reg)
976{
977        /* XXX
978        int pack = reg.pack;
979
980        while (reg.file == QFILE_TEMP &&
981               c->defs[reg.index] &&
982               (c->defs[reg.index]->op == QOP_MOV ||
983                c->defs[reg.index]->op == QOP_FMOV) &&
984               !c->defs[reg.index]->dst.pack &&
985               !c->defs[reg.index]->src[0].pack) {
986                reg = c->defs[reg.index]->src[0];
987        }
988
989        reg.pack = pack;
990        */
991        return reg;
992}
993
994void
995vir_compile_destroy(struct v3d_compile *c)
996{
997        /* Defuse the assert that we aren't removing the cursor's instruction.
998         */
999        c->cursor.link = NULL;
1000
1001        vir_for_each_block(block, c) {
1002                while (!list_empty(&block->instructions)) {
1003                        struct qinst *qinst =
1004                                list_first_entry(&block->instructions,
1005                                                 struct qinst, link);
1006                        vir_remove_instruction(c, qinst);
1007                }
1008        }
1009
1010        ralloc_free(c);
1011}
1012
1013uint32_t
1014vir_get_uniform_index(struct v3d_compile *c,
1015                      enum quniform_contents contents,
1016                      uint32_t data)
1017{
1018        for (int i = 0; i < c->num_uniforms; i++) {
1019                if (c->uniform_contents[i] == contents &&
1020                    c->uniform_data[i] == data) {
1021                        return i;
1022                }
1023        }
1024
1025        uint32_t uniform = c->num_uniforms++;
1026
1027        if (uniform >= c->uniform_array_size) {
1028                c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
1029                                             c->uniform_array_size * 2);
1030
1031                c->uniform_data = reralloc(c, c->uniform_data,
1032                                           uint32_t,
1033                                           c->uniform_array_size);
1034                c->uniform_contents = reralloc(c, c->uniform_contents,
1035                                               enum quniform_contents,
1036                                               c->uniform_array_size);
1037        }
1038
1039        c->uniform_contents[uniform] = contents;
1040        c->uniform_data[uniform] = data;
1041
1042        return uniform;
1043}
1044
1045struct qreg
1046vir_uniform(struct v3d_compile *c,
1047            enum quniform_contents contents,
1048            uint32_t data)
1049{
1050        struct qinst *inst = vir_NOP(c);
1051        inst->qpu.sig.ldunif = true;
1052        inst->uniform = vir_get_uniform_index(c, contents, data);
1053        inst->dst = vir_get_temp(c);
1054        c->defs[inst->dst.index] = inst;
1055        return inst->dst;
1056}
1057
1058#define OPTPASS(func)                                                   \
1059        do {                                                            \
1060                bool stage_progress = func(c);                          \
1061                if (stage_progress) {                                   \
1062                        progress = true;                                \
1063                        if (print_opt_debug) {                          \
1064                                fprintf(stderr,                         \
1065                                        "VIR opt pass %2d: %s progress\n", \
1066                                        pass, #func);                   \
1067                        }                                               \
1068                        /*XXX vir_validate(c);*/                        \
1069                }                                                       \
1070        } while (0)
1071
1072void
1073vir_optimize(struct v3d_compile *c)
1074{
1075        bool print_opt_debug = false;
1076        int pass = 1;
1077
1078        while (true) {
1079                bool progress = false;
1080
1081                OPTPASS(vir_opt_copy_propagate);
1082                OPTPASS(vir_opt_redundant_flags);
1083                OPTPASS(vir_opt_dead_code);
1084                OPTPASS(vir_opt_small_immediates);
1085
1086                if (!progress)
1087                        break;
1088
1089                pass++;
1090        }
1091}
1092
1093const char *
1094vir_get_stage_name(struct v3d_compile *c)
1095{
1096        if (c->vs_key && c->vs_key->is_coord)
1097                return "MESA_SHADER_COORD";
1098        else
1099                return gl_shader_stage_name(c->s->info.stage);
1100}
1101