1b8e80941Smrg/*
2b8e80941Smrg * © Copyright 2018 Alyssa Rosenzweig
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg#include <stdio.h>
26b8e80941Smrg#include <stdlib.h>
27b8e80941Smrg#include <string.h>
28b8e80941Smrg#include "pan_context.h"
29b8e80941Smrg
30b8e80941Smrg#include "compiler/nir/nir.h"
31b8e80941Smrg#include "nir/tgsi_to_nir.h"
32b8e80941Smrg#include "midgard/midgard_compile.h"
33b8e80941Smrg#include "util/u_dynarray.h"
34b8e80941Smrg
35b8e80941Smrg#include "tgsi/tgsi_dump.h"
36b8e80941Smrg
37b8e80941Smrgvoid
38b8e80941Smrgpanfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *meta, const char *src, int type, struct panfrost_shader_state *state)
39b8e80941Smrg{
40b8e80941Smrg        uint8_t *dst;
41b8e80941Smrg
42b8e80941Smrg        nir_shader *s;
43b8e80941Smrg
44b8e80941Smrg        struct pipe_shader_state *cso = state->base;
45b8e80941Smrg
46b8e80941Smrg        if (cso->type == PIPE_SHADER_IR_NIR) {
47b8e80941Smrg                s = nir_shader_clone(NULL, cso->ir.nir);
48b8e80941Smrg        } else {
49b8e80941Smrg                assert (cso->type == PIPE_SHADER_IR_TGSI);
50b8e80941Smrg                //tgsi_dump(cso->tokens, 0);
51b8e80941Smrg                s = tgsi_to_nir(cso->tokens, ctx->base.screen);
52b8e80941Smrg        }
53b8e80941Smrg
54b8e80941Smrg        s->info.stage = type == JOB_TYPE_VERTEX ? MESA_SHADER_VERTEX : MESA_SHADER_FRAGMENT;
55b8e80941Smrg
56b8e80941Smrg        if (s->info.stage == MESA_SHADER_FRAGMENT) {
57b8e80941Smrg                /* Inject the alpha test now if we need to */
58b8e80941Smrg
59b8e80941Smrg                if (state->alpha_state.enabled) {
60b8e80941Smrg                        NIR_PASS_V(s, nir_lower_alpha_test, state->alpha_state.func, false);
61b8e80941Smrg                }
62b8e80941Smrg        }
63b8e80941Smrg
64b8e80941Smrg        /* Call out to Midgard compiler given the above NIR */
65b8e80941Smrg
66b8e80941Smrg        midgard_program program = {
67b8e80941Smrg                .alpha_ref = state->alpha_state.ref_value
68b8e80941Smrg        };
69b8e80941Smrg
70b8e80941Smrg        midgard_compile_shader_nir(s, &program, false);
71b8e80941Smrg
72b8e80941Smrg        /* Prepare the compiled binary for upload */
73b8e80941Smrg        int size = program.compiled.size;
74b8e80941Smrg        dst = program.compiled.data;
75b8e80941Smrg
76b8e80941Smrg        /* Upload the shader. The lookahead tag is ORed on as a tagged pointer.
77b8e80941Smrg         * I bet someone just thought that would be a cute pun. At least,
78b8e80941Smrg         * that's how I'd do it. */
79b8e80941Smrg
80b8e80941Smrg        meta->shader = panfrost_upload(&ctx->shaders, dst, size, true) | program.first_tag;
81b8e80941Smrg
82b8e80941Smrg        util_dynarray_fini(&program.compiled);
83b8e80941Smrg
84b8e80941Smrg        /* Sysvals are prepended */
85b8e80941Smrg        program.uniform_count += program.sysval_count;
86b8e80941Smrg        state->sysval_count = program.sysval_count;
87b8e80941Smrg        memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count);
88b8e80941Smrg
89b8e80941Smrg        meta->midgard1.uniform_count = MIN2(program.uniform_count, program.uniform_cutoff);
90b8e80941Smrg        meta->attribute_count = program.attribute_count;
91b8e80941Smrg        meta->varying_count = program.varying_count;
92b8e80941Smrg        meta->midgard1.work_count = program.work_register_count;
93b8e80941Smrg
94b8e80941Smrg        state->can_discard = program.can_discard;
95b8e80941Smrg        state->writes_point_size = program.writes_point_size;
96b8e80941Smrg        state->reads_point_coord = false;
97b8e80941Smrg
98b8e80941Smrg        /* Separate as primary uniform count is truncated */
99b8e80941Smrg        state->uniform_count = program.uniform_count;
100b8e80941Smrg
101b8e80941Smrg        meta->midgard1.unknown2 = 8; /* XXX */
102b8e80941Smrg
103b8e80941Smrg        unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
104b8e80941Smrg        unsigned default_vec2_swizzle = panfrost_get_default_swizzle(2);
105b8e80941Smrg        unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
106b8e80941Smrg
107b8e80941Smrg        /* Iterate the varyings and emit the corresponding descriptor */
108b8e80941Smrg        unsigned general_purpose_count = 0;
109b8e80941Smrg
110b8e80941Smrg        for (unsigned i = 0; i < program.varying_count; ++i) {
111b8e80941Smrg                unsigned location = program.varyings[i];
112b8e80941Smrg
113b8e80941Smrg                /* Default to a vec4 varying */
114b8e80941Smrg                struct mali_attr_meta v = {
115b8e80941Smrg                        .format = MALI_RGBA32F,
116b8e80941Smrg                        .swizzle = default_vec4_swizzle,
117b8e80941Smrg                        .unknown1 = 0x2,
118b8e80941Smrg                };
119b8e80941Smrg
120b8e80941Smrg                /* Check for special cases, otherwise assume general varying */
121b8e80941Smrg
122b8e80941Smrg                if (location == VARYING_SLOT_POS) {
123b8e80941Smrg                        v.index = 1;
124b8e80941Smrg                        v.format = MALI_VARYING_POS;
125b8e80941Smrg                } else if (location == VARYING_SLOT_PSIZ) {
126b8e80941Smrg                        v.index = 2;
127b8e80941Smrg                        v.format = MALI_R16F;
128b8e80941Smrg                        v.swizzle = default_vec1_swizzle;
129b8e80941Smrg
130b8e80941Smrg                        state->writes_point_size = true;
131b8e80941Smrg                } else if (location == VARYING_SLOT_PNTC) {
132b8e80941Smrg                        v.index = 3;
133b8e80941Smrg                        v.format = MALI_RG16F;
134b8e80941Smrg                        v.swizzle = default_vec2_swizzle;
135b8e80941Smrg
136b8e80941Smrg                        state->reads_point_coord = true;
137b8e80941Smrg                } else {
138b8e80941Smrg                        v.index = 0;
139b8e80941Smrg                        v.src_offset = 16 * (general_purpose_count++);
140b8e80941Smrg                }
141b8e80941Smrg
142b8e80941Smrg                state->varyings[i] = v;
143b8e80941Smrg        }
144b8e80941Smrg
145b8e80941Smrg        /* Set the stride for the general purpose fp32 vec4 varyings */
146b8e80941Smrg        state->general_varying_stride = (4 * 4) * general_purpose_count;
147b8e80941Smrg}
148