1/*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * Implements most of the fixed function fragment pipeline in shader code.
26 *
27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28 * or color mask.  Instead, you read the current contents of the destination
29 * from the tile buffer after having waited for the scoreboard (which is
30 * handled by vc4_qpu_emit.c), then do math using your output color and that
31 * destination value, and update the output color appropriately.
32 *
33 * Once this pass is done, the color write will either have one component (for
34 * single sample) with packed argb8888, or 4 components with the per-sample
35 * argb8888 result.
36 */
37
38/**
39 * Lowers fixed-function blending to a load of the destination color and a
40 * series of ALU operations before the store of the output.
41 */
42#include "util/u_format.h"
43#include "vc4_qir.h"
44#include "compiler/nir/nir_builder.h"
45#include "compiler/nir/nir_format_convert.h"
46#include "vc4_context.h"
47
48static bool
49blend_depends_on_dst_color(struct vc4_compile *c)
50{
51        return (c->fs_key->blend.blend_enable ||
52                c->fs_key->blend.colormask != 0xf ||
53                c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
54}
55
56/** Emits a load of the previous fragment color from the tile buffer. */
57static nir_ssa_def *
58vc4_nir_get_dst_color(nir_builder *b, int sample)
59{
60        nir_intrinsic_instr *load =
61                nir_intrinsic_instr_create(b->shader,
62                                           nir_intrinsic_load_input);
63        load->num_components = 1;
64        nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
65        load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
66        nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
67        nir_builder_instr_insert(b, &load->instr);
68        return &load->dest.ssa;
69}
70
71static nir_ssa_def *
72vc4_blend_channel_f(nir_builder *b,
73                    nir_ssa_def **src,
74                    nir_ssa_def **dst,
75                    unsigned factor,
76                    int channel)
77{
78        switch(factor) {
79        case PIPE_BLENDFACTOR_ONE:
80                return nir_imm_float(b, 1.0);
81        case PIPE_BLENDFACTOR_SRC_COLOR:
82                return src[channel];
83        case PIPE_BLENDFACTOR_SRC_ALPHA:
84                return src[3];
85        case PIPE_BLENDFACTOR_DST_ALPHA:
86                return dst[3];
87        case PIPE_BLENDFACTOR_DST_COLOR:
88                return dst[channel];
89        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
90                if (channel != 3) {
91                        return nir_fmin(b,
92                                        src[3],
93                                        nir_fsub(b,
94                                                 nir_imm_float(b, 1.0),
95                                                 dst[3]));
96                } else {
97                        return nir_imm_float(b, 1.0);
98                }
99        case PIPE_BLENDFACTOR_CONST_COLOR:
100                return nir_load_system_value(b,
101                                             nir_intrinsic_load_blend_const_color_r_float +
102                                             channel,
103                                             0, 32);
104        case PIPE_BLENDFACTOR_CONST_ALPHA:
105                return nir_load_blend_const_color_a_float(b);
106        case PIPE_BLENDFACTOR_ZERO:
107                return nir_imm_float(b, 0.0);
108        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
109                return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
110        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
111                return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
112        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
113                return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
114        case PIPE_BLENDFACTOR_INV_DST_COLOR:
115                return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
116        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
117                return nir_fsub(b, nir_imm_float(b, 1.0),
118                                nir_load_system_value(b,
119                                                      nir_intrinsic_load_blend_const_color_r_float +
120                                                      channel,
121                                                      0, 32));
122        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
123                return nir_fsub(b, nir_imm_float(b, 1.0),
124                                nir_load_blend_const_color_a_float(b));
125
126        default:
127        case PIPE_BLENDFACTOR_SRC1_COLOR:
128        case PIPE_BLENDFACTOR_SRC1_ALPHA:
129        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
130        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
131                /* Unsupported. */
132                fprintf(stderr, "Unknown blend factor %d\n", factor);
133                return nir_imm_float(b, 1.0);
134        }
135}
136
137static nir_ssa_def *
138vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
139                        int chan)
140{
141        unsigned chan_mask = 0xff << (chan * 8);
142        return nir_ior(b,
143                       nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
144                       nir_iand(b, src1, nir_imm_int(b, chan_mask)));
145}
146
147static nir_ssa_def *
148vc4_blend_channel_i(nir_builder *b,
149                    nir_ssa_def *src,
150                    nir_ssa_def *dst,
151                    nir_ssa_def *src_a,
152                    nir_ssa_def *dst_a,
153                    unsigned factor,
154                    int a_chan)
155{
156        switch (factor) {
157        case PIPE_BLENDFACTOR_ONE:
158                return nir_imm_int(b, ~0);
159        case PIPE_BLENDFACTOR_SRC_COLOR:
160                return src;
161        case PIPE_BLENDFACTOR_SRC_ALPHA:
162                return src_a;
163        case PIPE_BLENDFACTOR_DST_ALPHA:
164                return dst_a;
165        case PIPE_BLENDFACTOR_DST_COLOR:
166                return dst;
167        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
168                return vc4_nir_set_packed_chan(b,
169                                               nir_umin_4x8(b,
170                                                            src_a,
171                                                            nir_inot(b, dst_a)),
172                                               nir_imm_int(b, ~0),
173                                               a_chan);
174        case PIPE_BLENDFACTOR_CONST_COLOR:
175                return nir_load_blend_const_color_rgba8888_unorm(b);
176        case PIPE_BLENDFACTOR_CONST_ALPHA:
177                return nir_load_blend_const_color_aaaa8888_unorm(b);
178        case PIPE_BLENDFACTOR_ZERO:
179                return nir_imm_int(b, 0);
180        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
181                return nir_inot(b, src);
182        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
183                return nir_inot(b, src_a);
184        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
185                return nir_inot(b, dst_a);
186        case PIPE_BLENDFACTOR_INV_DST_COLOR:
187                return nir_inot(b, dst);
188        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
189                return nir_inot(b,
190                                nir_load_blend_const_color_rgba8888_unorm(b));
191        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
192                return nir_inot(b,
193                                nir_load_blend_const_color_aaaa8888_unorm(b));
194
195        default:
196        case PIPE_BLENDFACTOR_SRC1_COLOR:
197        case PIPE_BLENDFACTOR_SRC1_ALPHA:
198        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
199        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
200                /* Unsupported. */
201                fprintf(stderr, "Unknown blend factor %d\n", factor);
202                return nir_imm_int(b, ~0);
203        }
204}
205
206static nir_ssa_def *
207vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
208                 unsigned func)
209{
210        switch (func) {
211        case PIPE_BLEND_ADD:
212                return nir_fadd(b, src, dst);
213        case PIPE_BLEND_SUBTRACT:
214                return nir_fsub(b, src, dst);
215        case PIPE_BLEND_REVERSE_SUBTRACT:
216                return nir_fsub(b, dst, src);
217        case PIPE_BLEND_MIN:
218                return nir_fmin(b, src, dst);
219        case PIPE_BLEND_MAX:
220                return nir_fmax(b, src, dst);
221
222        default:
223                /* Unsupported. */
224                fprintf(stderr, "Unknown blend func %d\n", func);
225                return src;
226
227        }
228}
229
230static nir_ssa_def *
231vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
232                 unsigned func)
233{
234        switch (func) {
235        case PIPE_BLEND_ADD:
236                return nir_usadd_4x8(b, src, dst);
237        case PIPE_BLEND_SUBTRACT:
238                return nir_ussub_4x8(b, src, dst);
239        case PIPE_BLEND_REVERSE_SUBTRACT:
240                return nir_ussub_4x8(b, dst, src);
241        case PIPE_BLEND_MIN:
242                return nir_umin_4x8(b, src, dst);
243        case PIPE_BLEND_MAX:
244                return nir_umax_4x8(b, src, dst);
245
246        default:
247                /* Unsupported. */
248                fprintf(stderr, "Unknown blend func %d\n", func);
249                return src;
250
251        }
252}
253
254static void
255vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
256                  nir_ssa_def **src_color, nir_ssa_def **dst_color)
257{
258        struct pipe_rt_blend_state *blend = &c->fs_key->blend;
259
260        if (!blend->blend_enable) {
261                for (int i = 0; i < 4; i++)
262                        result[i] = src_color[i];
263                return;
264        }
265
266        /* Clamp the src color to [0, 1].  Dest is already clamped. */
267        for (int i = 0; i < 4; i++)
268                src_color[i] = nir_fsat(b, src_color[i]);
269
270        nir_ssa_def *src_blend[4], *dst_blend[4];
271        for (int i = 0; i < 4; i++) {
272                int src_factor = ((i != 3) ? blend->rgb_src_factor :
273                                  blend->alpha_src_factor);
274                int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
275                                  blend->alpha_dst_factor);
276                src_blend[i] = nir_fmul(b, src_color[i],
277                                        vc4_blend_channel_f(b,
278                                                            src_color, dst_color,
279                                                            src_factor, i));
280                dst_blend[i] = nir_fmul(b, dst_color[i],
281                                        vc4_blend_channel_f(b,
282                                                            src_color, dst_color,
283                                                            dst_factor, i));
284        }
285
286        for (int i = 0; i < 4; i++) {
287                result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
288                                             ((i != 3) ? blend->rgb_func :
289                                              blend->alpha_func));
290        }
291}
292
293static nir_ssa_def *
294vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
295{
296        nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
297        return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
298}
299
300static nir_ssa_def *
301vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
302                  nir_ssa_def *src_color, nir_ssa_def *dst_color,
303                  nir_ssa_def *src_float_a)
304{
305        struct pipe_rt_blend_state *blend = &c->fs_key->blend;
306
307        if (!blend->blend_enable)
308                return src_color;
309
310        enum pipe_format color_format = c->fs_key->color_format;
311        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
312        nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
313        nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
314        nir_ssa_def *dst_a;
315        int alpha_chan;
316        for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
317                if (format_swiz[alpha_chan] == 3)
318                        break;
319        }
320        if (alpha_chan != 4) {
321                nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
322                dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
323                                                              shift), imm_0xff));
324        } else {
325                dst_a = nir_imm_int(b, ~0);
326        }
327
328        nir_ssa_def *src_factor = vc4_blend_channel_i(b,
329                                                      src_color, dst_color,
330                                                      src_a, dst_a,
331                                                      blend->rgb_src_factor,
332                                                      alpha_chan);
333        nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
334                                                      src_color, dst_color,
335                                                      src_a, dst_a,
336                                                      blend->rgb_dst_factor,
337                                                      alpha_chan);
338
339        if (alpha_chan != 4 &&
340            blend->alpha_src_factor != blend->rgb_src_factor) {
341                nir_ssa_def *src_alpha_factor =
342                        vc4_blend_channel_i(b,
343                                            src_color, dst_color,
344                                            src_a, dst_a,
345                                            blend->alpha_src_factor,
346                                            alpha_chan);
347                src_factor = vc4_nir_set_packed_chan(b, src_factor,
348                                                     src_alpha_factor,
349                                                     alpha_chan);
350        }
351        if (alpha_chan != 4 &&
352            blend->alpha_dst_factor != blend->rgb_dst_factor) {
353                nir_ssa_def *dst_alpha_factor =
354                        vc4_blend_channel_i(b,
355                                            src_color, dst_color,
356                                            src_a, dst_a,
357                                            blend->alpha_dst_factor,
358                                            alpha_chan);
359                dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
360                                                     dst_alpha_factor,
361                                                     alpha_chan);
362        }
363        nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
364        nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
365
366        nir_ssa_def *result =
367                vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
368        if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
369                nir_ssa_def *result_a = vc4_blend_func_i(b,
370                                                         src_blend,
371                                                         dst_blend,
372                                                         blend->alpha_func);
373                result = vc4_nir_set_packed_chan(b, result, result_a,
374                                                 alpha_chan);
375        }
376        return result;
377}
378
379static nir_ssa_def *
380vc4_logicop(nir_builder *b, int logicop_func,
381            nir_ssa_def *src, nir_ssa_def *dst)
382{
383        switch (logicop_func) {
384        case PIPE_LOGICOP_CLEAR:
385                return nir_imm_int(b, 0);
386        case PIPE_LOGICOP_NOR:
387                return nir_inot(b, nir_ior(b, src, dst));
388        case PIPE_LOGICOP_AND_INVERTED:
389                return nir_iand(b, nir_inot(b, src), dst);
390        case PIPE_LOGICOP_COPY_INVERTED:
391                return nir_inot(b, src);
392        case PIPE_LOGICOP_AND_REVERSE:
393                return nir_iand(b, src, nir_inot(b, dst));
394        case PIPE_LOGICOP_INVERT:
395                return nir_inot(b, dst);
396        case PIPE_LOGICOP_XOR:
397                return nir_ixor(b, src, dst);
398        case PIPE_LOGICOP_NAND:
399                return nir_inot(b, nir_iand(b, src, dst));
400        case PIPE_LOGICOP_AND:
401                return nir_iand(b, src, dst);
402        case PIPE_LOGICOP_EQUIV:
403                return nir_inot(b, nir_ixor(b, src, dst));
404        case PIPE_LOGICOP_NOOP:
405                return dst;
406        case PIPE_LOGICOP_OR_INVERTED:
407                return nir_ior(b, nir_inot(b, src), dst);
408        case PIPE_LOGICOP_OR_REVERSE:
409                return nir_ior(b, src, nir_inot(b, dst));
410        case PIPE_LOGICOP_OR:
411                return nir_ior(b, src, dst);
412        case PIPE_LOGICOP_SET:
413                return nir_imm_int(b, ~0);
414        default:
415                fprintf(stderr, "Unknown logic op %d\n", logicop_func);
416                /* FALLTHROUGH */
417        case PIPE_LOGICOP_COPY:
418                return src;
419        }
420}
421
422static nir_ssa_def *
423vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
424                         nir_ssa_def **colors)
425{
426        enum pipe_format color_format = c->fs_key->color_format;
427        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
428
429        nir_ssa_def *swizzled[4];
430        for (int i = 0; i < 4; i++) {
431                swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
432                                                           format_swiz[i]);
433        }
434
435        return nir_pack_unorm_4x8(b,
436                                  nir_vec4(b,
437                                           swizzled[0], swizzled[1],
438                                           swizzled[2], swizzled[3]));
439
440}
441
442static nir_ssa_def *
443vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
444                       int sample)
445{
446        enum pipe_format color_format = c->fs_key->color_format;
447        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
448        bool srgb = util_format_is_srgb(color_format);
449
450        /* Pull out the float src/dst color components. */
451        nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
452        nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
453        nir_ssa_def *src_color[4], *unpacked_dst_color[4];
454        for (unsigned i = 0; i < 4; i++) {
455                src_color[i] = nir_channel(b, src, i);
456                unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
457        }
458
459        if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
460                src_color[3] = nir_imm_float(b, 1.0);
461
462        nir_ssa_def *packed_color;
463        if (srgb) {
464                /* Unswizzle the destination color. */
465                nir_ssa_def *dst_color[4];
466                for (unsigned i = 0; i < 4; i++) {
467                        dst_color[i] = vc4_nir_get_swizzled_channel(b,
468                                                                    unpacked_dst_color,
469                                                                    format_swiz[i]);
470                }
471
472                /* Turn dst color to linear. */
473                for (int i = 0; i < 3; i++)
474                        dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
475
476                nir_ssa_def *blend_color[4];
477                vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
478
479                /* sRGB encode the output color */
480                for (int i = 0; i < 3; i++)
481                        blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
482
483                packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
484        } else {
485                nir_ssa_def *packed_src_color =
486                        vc4_nir_swizzle_and_pack(c, b, src_color);
487
488                packed_color =
489                        vc4_do_blending_i(c, b,
490                                          packed_src_color, packed_dst_color,
491                                          src_color[3]);
492        }
493
494        packed_color = vc4_logicop(b, c->fs_key->logicop_func,
495                                   packed_color, packed_dst_color);
496
497        /* If the bit isn't set in the color mask, then just return the
498         * original dst color, instead.
499         */
500        uint32_t colormask = 0xffffffff;
501        for (int i = 0; i < 4; i++) {
502                if (format_swiz[i] < 4 &&
503                    !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
504                        colormask &= ~(0xff << (i * 8));
505                }
506        }
507
508        return nir_ior(b,
509                       nir_iand(b, packed_color,
510                                nir_imm_int(b, colormask)),
511                       nir_iand(b, packed_dst_color,
512                                nir_imm_int(b, ~colormask)));
513}
514
515static int
516vc4_nir_next_output_driver_location(nir_shader *s)
517{
518        int maxloc = -1;
519
520        nir_foreach_variable(var, &s->outputs)
521                maxloc = MAX2(maxloc, (int)var->data.driver_location);
522
523        return maxloc + 1;
524}
525
526static void
527vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
528                          nir_ssa_def *val)
529{
530        nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
531                                                        glsl_uint_type(),
532                                                        "sample_mask");
533        sample_mask->data.driver_location =
534                vc4_nir_next_output_driver_location(c->s);
535        sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
536
537        nir_intrinsic_instr *intr =
538                nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
539        intr->num_components = 1;
540        nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
541
542        intr->src[0] = nir_src_for_ssa(val);
543        intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
544        nir_builder_instr_insert(b, &intr->instr);
545}
546
547static void
548vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
549                          nir_intrinsic_instr *intr)
550{
551        nir_ssa_def *frag_color = intr->src[0].ssa;
552
553        if (c->fs_key->sample_alpha_to_coverage) {
554                nir_ssa_def *a = nir_channel(b, frag_color, 3);
555
556                /* XXX: We should do a nice dither based on the fragment
557                 * coordinate, instead.
558                 */
559                nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
560                nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
561                nir_ssa_def *bitmask = nir_isub(b,
562                                                nir_ishl(b,
563                                                         nir_imm_int(b, 1),
564                                                         num_bits),
565                                                nir_imm_int(b, 1));
566                vc4_nir_store_sample_mask(c, b, bitmask);
567        }
568
569        /* The TLB color read returns each sample in turn, so if our blending
570         * depends on the destination color, we're going to have to run the
571         * blending function separately for each destination sample value, and
572         * then output the per-sample color using TLB_COLOR_MS.
573         */
574        nir_ssa_def *blend_output;
575        if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
576                c->msaa_per_sample_output = true;
577
578                nir_ssa_def *samples[4];
579                for (int i = 0; i < VC4_MAX_SAMPLES; i++)
580                        samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
581                blend_output = nir_vec4(b,
582                                        samples[0], samples[1],
583                                        samples[2], samples[3]);
584        } else {
585                blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
586        }
587
588        nir_instr_rewrite_src(&intr->instr, &intr->src[0],
589                              nir_src_for_ssa(blend_output));
590        intr->num_components = blend_output->num_components;
591}
592
593static bool
594vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
595{
596        nir_foreach_instr_safe(instr, block) {
597                if (instr->type != nir_instr_type_intrinsic)
598                        continue;
599                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
600                if (intr->intrinsic != nir_intrinsic_store_output)
601                        continue;
602
603                nir_variable *output_var = NULL;
604                nir_foreach_variable(var, &c->s->outputs) {
605                        if (var->data.driver_location ==
606                            nir_intrinsic_base(intr)) {
607                                output_var = var;
608                                break;
609                        }
610                }
611                assert(output_var);
612
613                if (output_var->data.location != FRAG_RESULT_COLOR &&
614                    output_var->data.location != FRAG_RESULT_DATA0) {
615                        continue;
616                }
617
618                nir_function_impl *impl =
619                        nir_cf_node_get_function(&block->cf_node);
620                nir_builder b;
621                nir_builder_init(&b, impl);
622                b.cursor = nir_before_instr(&intr->instr);
623                vc4_nir_lower_blend_instr(c, &b, intr);
624        }
625        return true;
626}
627
628void
629vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
630{
631        nir_foreach_function(function, s) {
632                if (function->impl) {
633                        nir_foreach_block(block, function->impl) {
634                                vc4_nir_lower_blend_block(block, c);
635                        }
636
637                        nir_metadata_preserve(function->impl,
638                                              nir_metadata_block_index |
639                                              nir_metadata_dominance);
640                }
641        }
642
643        /* If we didn't do alpha-to-coverage on the output color, we still
644         * need to pass glSampleMask() through.
645         */
646        if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
647                nir_function_impl *impl = nir_shader_get_entrypoint(s);
648                nir_builder b;
649                nir_builder_init(&b, impl);
650                b.cursor = nir_after_block(nir_impl_last_block(impl));
651
652                vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
653        }
654}
655