17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2019-2021 Collabora, Ltd.
37ec681f3Smrg * Copyright (C) 2019 Alyssa Rosenzweig
47ec681f3Smrg *
57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
67ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
77ec681f3Smrg * to deal in the Software without restriction, including without limitation
87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
107ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * The above copyright notice and this permission notice (including the next
137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
147ec681f3Smrg * Software.
157ec681f3Smrg *
167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
217ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
227ec681f3Smrg * IN THE SOFTWARE.
237ec681f3Smrg */
247ec681f3Smrg
257ec681f3Smrg/**
267ec681f3Smrg * @file
277ec681f3Smrg *
287ec681f3Smrg * Implements the fragment pipeline (blending and writeout) in software, to be
297ec681f3Smrg * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
307ec681f3Smrg * shader variant on typical GPUs. This pass is useful if hardware lacks
317ec681f3Smrg * fixed-function blending in part or in full.
327ec681f3Smrg */
337ec681f3Smrg
347ec681f3Smrg#include "compiler/nir/nir.h"
357ec681f3Smrg#include "compiler/nir/nir_builder.h"
367ec681f3Smrg#include "compiler/nir/nir_format_convert.h"
377ec681f3Smrg#include "nir_lower_blend.h"
387ec681f3Smrg
397ec681f3Smrg/* Given processed factors, combine them per a blend function */
407ec681f3Smrg
417ec681f3Smrgstatic nir_ssa_def *
427ec681f3Smrgnir_blend_func(
437ec681f3Smrg   nir_builder *b,
447ec681f3Smrg   enum blend_func func,
457ec681f3Smrg   nir_ssa_def *src, nir_ssa_def *dst)
467ec681f3Smrg{
477ec681f3Smrg   switch (func) {
487ec681f3Smrg   case BLEND_FUNC_ADD:
497ec681f3Smrg      return nir_fadd(b, src, dst);
507ec681f3Smrg   case BLEND_FUNC_SUBTRACT:
517ec681f3Smrg      return nir_fsub(b, src, dst);
527ec681f3Smrg   case BLEND_FUNC_REVERSE_SUBTRACT:
537ec681f3Smrg      return nir_fsub(b, dst, src);
547ec681f3Smrg   case BLEND_FUNC_MIN:
557ec681f3Smrg      return nir_fmin(b, src, dst);
567ec681f3Smrg   case BLEND_FUNC_MAX:
577ec681f3Smrg      return nir_fmax(b, src, dst);
587ec681f3Smrg   }
597ec681f3Smrg
607ec681f3Smrg   unreachable("Invalid blend function");
617ec681f3Smrg}
627ec681f3Smrg
637ec681f3Smrg/* Does this blend function multiply by a blend factor? */
647ec681f3Smrg
657ec681f3Smrgstatic bool
667ec681f3Smrgnir_blend_factored(enum blend_func func)
677ec681f3Smrg{
687ec681f3Smrg   switch (func) {
697ec681f3Smrg   case BLEND_FUNC_ADD:
707ec681f3Smrg   case BLEND_FUNC_SUBTRACT:
717ec681f3Smrg   case BLEND_FUNC_REVERSE_SUBTRACT:
727ec681f3Smrg      return true;
737ec681f3Smrg   default:
747ec681f3Smrg      return false;
757ec681f3Smrg   }
767ec681f3Smrg}
777ec681f3Smrg
787ec681f3Smrg/* Compute a src_alpha_saturate factor */
797ec681f3Smrgstatic nir_ssa_def *
807ec681f3Smrgnir_alpha_saturate(
817ec681f3Smrg   nir_builder *b,
827ec681f3Smrg   nir_ssa_def *src, nir_ssa_def *dst,
837ec681f3Smrg   unsigned chan)
847ec681f3Smrg{
857ec681f3Smrg   nir_ssa_def *Asrc = nir_channel(b, src, 3);
867ec681f3Smrg   nir_ssa_def *Adst = nir_channel(b, dst, 3);
877ec681f3Smrg   nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
887ec681f3Smrg   nir_ssa_def *Adsti = nir_fsub(b, one, Adst);
897ec681f3Smrg
907ec681f3Smrg   return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
917ec681f3Smrg}
927ec681f3Smrg
937ec681f3Smrg/* Returns a scalar single factor, unmultiplied */
947ec681f3Smrg
957ec681f3Smrgstatic nir_ssa_def *
967ec681f3Smrgnir_blend_factor_value(
977ec681f3Smrg   nir_builder *b,
987ec681f3Smrg   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
997ec681f3Smrg   unsigned chan,
1007ec681f3Smrg   enum blend_factor factor)
1017ec681f3Smrg{
1027ec681f3Smrg   switch (factor) {
1037ec681f3Smrg   case BLEND_FACTOR_ZERO:
1047ec681f3Smrg      return nir_imm_floatN_t(b, 0.0, src->bit_size);
1057ec681f3Smrg   case BLEND_FACTOR_SRC_COLOR:
1067ec681f3Smrg      return nir_channel(b, src, chan);
1077ec681f3Smrg   case BLEND_FACTOR_SRC1_COLOR:
1087ec681f3Smrg      return nir_channel(b, src1, chan);
1097ec681f3Smrg   case BLEND_FACTOR_DST_COLOR:
1107ec681f3Smrg      return nir_channel(b, dst, chan);
1117ec681f3Smrg   case BLEND_FACTOR_SRC_ALPHA:
1127ec681f3Smrg      return nir_channel(b, src, 3);
1137ec681f3Smrg   case BLEND_FACTOR_SRC1_ALPHA:
1147ec681f3Smrg      return nir_channel(b, src1, 3);
1157ec681f3Smrg   case BLEND_FACTOR_DST_ALPHA:
1167ec681f3Smrg      return nir_channel(b, dst, 3);
1177ec681f3Smrg   case BLEND_FACTOR_CONSTANT_COLOR:
1187ec681f3Smrg      return nir_channel(b, bconst, chan);
1197ec681f3Smrg   case BLEND_FACTOR_CONSTANT_ALPHA:
1207ec681f3Smrg      return nir_channel(b, bconst, 3);
1217ec681f3Smrg   case BLEND_FACTOR_SRC_ALPHA_SATURATE:
1227ec681f3Smrg      return nir_alpha_saturate(b, src, dst, chan);
1237ec681f3Smrg   }
1247ec681f3Smrg
1257ec681f3Smrg   unreachable("Invalid blend factor");
1267ec681f3Smrg}
1277ec681f3Smrg
1287ec681f3Smrgstatic nir_ssa_def *
1297ec681f3Smrgnir_blend_factor(
1307ec681f3Smrg   nir_builder *b,
1317ec681f3Smrg   nir_ssa_def *raw_scalar,
1327ec681f3Smrg   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
1337ec681f3Smrg   unsigned chan,
1347ec681f3Smrg   enum blend_factor factor,
1357ec681f3Smrg   bool inverted)
1367ec681f3Smrg{
1377ec681f3Smrg   nir_ssa_def *f =
1387ec681f3Smrg      nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor);
1397ec681f3Smrg
1407ec681f3Smrg   if (inverted)
1417ec681f3Smrg      f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);
1427ec681f3Smrg
1437ec681f3Smrg   return nir_fmul(b, raw_scalar, f);
1447ec681f3Smrg}
1457ec681f3Smrg
1467ec681f3Smrg/* Given a colormask, "blend" with the destination */
1477ec681f3Smrg
1487ec681f3Smrgstatic nir_ssa_def *
1497ec681f3Smrgnir_color_mask(
1507ec681f3Smrg   nir_builder *b,
1517ec681f3Smrg   unsigned mask,
1527ec681f3Smrg   nir_ssa_def *src,
1537ec681f3Smrg   nir_ssa_def *dst)
1547ec681f3Smrg{
1557ec681f3Smrg   return nir_vec4(b,
1567ec681f3Smrg         nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
1577ec681f3Smrg         nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
1587ec681f3Smrg         nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
1597ec681f3Smrg         nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
1607ec681f3Smrg}
1617ec681f3Smrg
1627ec681f3Smrgstatic nir_ssa_def *
1637ec681f3Smrgnir_logicop_func(
1647ec681f3Smrg   nir_builder *b,
1657ec681f3Smrg   unsigned func,
1667ec681f3Smrg   nir_ssa_def *src, nir_ssa_def *dst)
1677ec681f3Smrg{
1687ec681f3Smrg   switch (func) {
1697ec681f3Smrg   case PIPE_LOGICOP_CLEAR:
1707ec681f3Smrg      return nir_imm_ivec4(b, 0, 0, 0, 0);
1717ec681f3Smrg   case PIPE_LOGICOP_NOR:
1727ec681f3Smrg      return nir_inot(b, nir_ior(b, src, dst));
1737ec681f3Smrg   case PIPE_LOGICOP_AND_INVERTED:
1747ec681f3Smrg      return nir_iand(b, nir_inot(b, src), dst);
1757ec681f3Smrg   case PIPE_LOGICOP_COPY_INVERTED:
1767ec681f3Smrg      return nir_inot(b, src);
1777ec681f3Smrg   case PIPE_LOGICOP_AND_REVERSE:
1787ec681f3Smrg      return nir_iand(b, src, nir_inot(b, dst));
1797ec681f3Smrg   case PIPE_LOGICOP_INVERT:
1807ec681f3Smrg      return nir_inot(b, dst);
1817ec681f3Smrg   case PIPE_LOGICOP_XOR:
1827ec681f3Smrg      return nir_ixor(b, src, dst);
1837ec681f3Smrg   case PIPE_LOGICOP_NAND:
1847ec681f3Smrg      return nir_inot(b, nir_iand(b, src, dst));
1857ec681f3Smrg   case PIPE_LOGICOP_AND:
1867ec681f3Smrg      return nir_iand(b, src, dst);
1877ec681f3Smrg   case PIPE_LOGICOP_EQUIV:
1887ec681f3Smrg      return nir_inot(b, nir_ixor(b, src, dst));
1897ec681f3Smrg   case PIPE_LOGICOP_NOOP:
1907ec681f3Smrg      return dst;
1917ec681f3Smrg   case PIPE_LOGICOP_OR_INVERTED:
1927ec681f3Smrg      return nir_ior(b, nir_inot(b, src), dst);
1937ec681f3Smrg   case PIPE_LOGICOP_COPY:
1947ec681f3Smrg      return src;
1957ec681f3Smrg   case PIPE_LOGICOP_OR_REVERSE:
1967ec681f3Smrg      return nir_ior(b, src, nir_inot(b, dst));
1977ec681f3Smrg   case PIPE_LOGICOP_OR:
1987ec681f3Smrg      return nir_ior(b, src, dst);
1997ec681f3Smrg   case PIPE_LOGICOP_SET:
2007ec681f3Smrg      return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
2017ec681f3Smrg   }
2027ec681f3Smrg
2037ec681f3Smrg   unreachable("Invalid logciop function");
2047ec681f3Smrg}
2057ec681f3Smrg
2067ec681f3Smrgstatic nir_ssa_def *
2077ec681f3Smrgnir_blend_logicop(
2087ec681f3Smrg   nir_builder *b,
2097ec681f3Smrg   nir_lower_blend_options options,
2107ec681f3Smrg   unsigned rt,
2117ec681f3Smrg   nir_ssa_def *src, nir_ssa_def *dst)
2127ec681f3Smrg{
2137ec681f3Smrg   unsigned bit_size = src->bit_size;
2147ec681f3Smrg   const struct util_format_description *format_desc =
2157ec681f3Smrg      util_format_description(options.format[rt]);
2167ec681f3Smrg
2177ec681f3Smrg   if (bit_size != 32) {
2187ec681f3Smrg      src = nir_f2f32(b, src);
2197ec681f3Smrg      dst = nir_f2f32(b, dst);
2207ec681f3Smrg   }
2217ec681f3Smrg
2227ec681f3Smrg   assert(src->num_components <= 4);
2237ec681f3Smrg   assert(dst->num_components <= 4);
2247ec681f3Smrg
2257ec681f3Smrg   unsigned bits[4];
2267ec681f3Smrg   for (int i = 0; i < 4; ++i)
2277ec681f3Smrg       bits[i] = format_desc->channel[i].size;
2287ec681f3Smrg
2297ec681f3Smrg   src = nir_format_float_to_unorm(b, src, bits);
2307ec681f3Smrg   dst = nir_format_float_to_unorm(b, dst, bits);
2317ec681f3Smrg
2327ec681f3Smrg   nir_ssa_def *out = nir_logicop_func(b, options.logicop_func, src, dst);
2337ec681f3Smrg
2347ec681f3Smrg   if (bits[0] < 32) {
2357ec681f3Smrg       nir_const_value mask[4];
2367ec681f3Smrg       for (int i = 0; i < 4; ++i)
2377ec681f3Smrg           mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32);
2387ec681f3Smrg
2397ec681f3Smrg       out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask));
2407ec681f3Smrg   }
2417ec681f3Smrg
2427ec681f3Smrg   out = nir_format_unorm_to_float(b, out, bits);
2437ec681f3Smrg
2447ec681f3Smrg   if (bit_size == 16)
2457ec681f3Smrg      out = nir_f2f16(b, out);
2467ec681f3Smrg
2477ec681f3Smrg   return out;
2487ec681f3Smrg}
2497ec681f3Smrg
2507ec681f3Smrg/* Given a blend state, the source color, and the destination color,
2517ec681f3Smrg * return the blended color
2527ec681f3Smrg */
2537ec681f3Smrg
2547ec681f3Smrgstatic nir_ssa_def *
2557ec681f3Smrgnir_blend(
2567ec681f3Smrg   nir_builder *b,
2577ec681f3Smrg   nir_lower_blend_options options,
2587ec681f3Smrg   unsigned rt,
2597ec681f3Smrg   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst)
2607ec681f3Smrg{
2617ec681f3Smrg   /* Grab the blend constant ahead of time */
2627ec681f3Smrg   nir_ssa_def *bconst;
2637ec681f3Smrg   if (options.scalar_blend_const) {
2647ec681f3Smrg      bconst = nir_vec4(b,
2657ec681f3Smrg                        nir_load_blend_const_color_r_float(b),
2667ec681f3Smrg                        nir_load_blend_const_color_g_float(b),
2677ec681f3Smrg                        nir_load_blend_const_color_b_float(b),
2687ec681f3Smrg                        nir_load_blend_const_color_a_float(b));
2697ec681f3Smrg   } else {
2707ec681f3Smrg      bconst = nir_load_blend_const_color_rgba(b);
2717ec681f3Smrg   }
2727ec681f3Smrg
2737ec681f3Smrg   if (src->bit_size == 16)
2747ec681f3Smrg      bconst = nir_f2f16(b, bconst);
2757ec681f3Smrg
2767ec681f3Smrg   /* Fixed-point framebuffers require their inputs clamped. */
2777ec681f3Smrg   enum pipe_format format = options.format[rt];
2787ec681f3Smrg
2797ec681f3Smrg   if (!util_format_is_float(format))
2807ec681f3Smrg      src = nir_fsat(b, src);
2817ec681f3Smrg
2827ec681f3Smrg   /* DST_ALPHA reads back 1.0 if there is no alpha channel */
2837ec681f3Smrg   const struct util_format_description *desc =
2847ec681f3Smrg      util_format_description(format);
2857ec681f3Smrg
2867ec681f3Smrg   if (desc->nr_channels < 4) {
2877ec681f3Smrg      nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
2887ec681f3Smrg      nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);
2897ec681f3Smrg
2907ec681f3Smrg      dst = nir_vec4(b, nir_channel(b, dst, 0),
2917ec681f3Smrg            desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero,
2927ec681f3Smrg            desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero,
2937ec681f3Smrg            desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one);
2947ec681f3Smrg   }
2957ec681f3Smrg
2967ec681f3Smrg   /* We blend per channel and recombine later */
2977ec681f3Smrg   nir_ssa_def *channels[4];
2987ec681f3Smrg
2997ec681f3Smrg   for (unsigned c = 0; c < 4; ++c) {
3007ec681f3Smrg      /* Decide properties based on channel */
3017ec681f3Smrg      nir_lower_blend_channel chan =
3027ec681f3Smrg         (c < 3) ? options.rt[rt].rgb : options.rt[rt].alpha;
3037ec681f3Smrg
3047ec681f3Smrg      nir_ssa_def *psrc = nir_channel(b, src, c);
3057ec681f3Smrg      nir_ssa_def *pdst = nir_channel(b, dst, c);
3067ec681f3Smrg
3077ec681f3Smrg      if (nir_blend_factored(chan.func)) {
3087ec681f3Smrg         psrc = nir_blend_factor(
3097ec681f3Smrg                   b, psrc,
3107ec681f3Smrg                   src, src1, dst, bconst, c,
3117ec681f3Smrg                   chan.src_factor, chan.invert_src_factor);
3127ec681f3Smrg
3137ec681f3Smrg         pdst = nir_blend_factor(
3147ec681f3Smrg                   b, pdst,
3157ec681f3Smrg                   src, src1, dst, bconst, c,
3167ec681f3Smrg                   chan.dst_factor, chan.invert_dst_factor);
3177ec681f3Smrg      }
3187ec681f3Smrg
3197ec681f3Smrg      channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
3207ec681f3Smrg   }
3217ec681f3Smrg
3227ec681f3Smrg   return nir_vec(b, channels, 4);
3237ec681f3Smrg}
3247ec681f3Smrg
3257ec681f3Smrgstatic bool
3267ec681f3Smrgnir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
3277ec681f3Smrg{
3287ec681f3Smrg   nir_lower_blend_options *options = data;
3297ec681f3Smrg   if (instr->type != nir_instr_type_intrinsic)
3307ec681f3Smrg      return false;
3317ec681f3Smrg
3327ec681f3Smrg   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3337ec681f3Smrg   if (intr->intrinsic != nir_intrinsic_store_deref)
3347ec681f3Smrg      return false;
3357ec681f3Smrg
3367ec681f3Smrg   nir_variable *var = nir_intrinsic_get_var(intr, 0);
3377ec681f3Smrg   if (var->data.mode != nir_var_shader_out ||
3387ec681f3Smrg         (var->data.location != FRAG_RESULT_COLOR &&
3397ec681f3Smrg         var->data.location < FRAG_RESULT_DATA0))
3407ec681f3Smrg      return false;
3417ec681f3Smrg
3427ec681f3Smrg   /* Determine render target for per-RT blending */
3437ec681f3Smrg   unsigned rt =
3447ec681f3Smrg      (var->data.location == FRAG_RESULT_COLOR) ? 0 :
3457ec681f3Smrg      (var->data.location - FRAG_RESULT_DATA0);
3467ec681f3Smrg
3477ec681f3Smrg   /* No blend lowering requested on this RT */
3487ec681f3Smrg   if (options->format[rt] == PIPE_FORMAT_NONE)
3497ec681f3Smrg      return false;
3507ec681f3Smrg
3517ec681f3Smrg   b->cursor = nir_before_instr(instr);
3527ec681f3Smrg
3537ec681f3Smrg   /* Grab the input color */
3547ec681f3Smrg   unsigned src_num_comps = nir_src_num_components(intr->src[1]);
3557ec681f3Smrg   nir_ssa_def *src =
3567ec681f3Smrg      nir_pad_vector(b, nir_ssa_for_src(b, intr->src[1], src_num_comps), 4);
3577ec681f3Smrg
3587ec681f3Smrg   /* Grab the previous fragment color */
3597ec681f3Smrg   var->data.fb_fetch_output = true;
3607ec681f3Smrg   b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location);
3617ec681f3Smrg   b->shader->info.fs.uses_fbfetch_output = true;
3627ec681f3Smrg   nir_ssa_def *dst = nir_load_var(b, var);
3637ec681f3Smrg
3647ec681f3Smrg   /* Blend the two colors per the passed options */
3657ec681f3Smrg   nir_ssa_def *blended = src;
3667ec681f3Smrg
3677ec681f3Smrg   if (options->logicop_enable) {
3687ec681f3Smrg      blended = nir_blend_logicop(b, *options, rt, src, dst);
3697ec681f3Smrg   } else if (!util_format_is_pure_integer(options->format[rt])) {
3707ec681f3Smrg      assert(!util_format_is_scaled(options->format[rt]));
3717ec681f3Smrg      blended = nir_blend(b, *options, rt, src, options->src1, dst);
3727ec681f3Smrg   }
3737ec681f3Smrg
3747ec681f3Smrg   /* Apply a colormask */
3757ec681f3Smrg   blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);
3767ec681f3Smrg
3777ec681f3Smrg   if (src_num_comps != 4)
378024565cbSchristos      blended = nir_channels(b, blended, (nir_component_mask_t)BITFIELD_MASK(src_num_comps));
3797ec681f3Smrg
3807ec681f3Smrg   /* Write out the final color instead of the input */
3817ec681f3Smrg   nir_instr_rewrite_src_ssa(instr, &intr->src[1], blended);
3827ec681f3Smrg   return true;
3837ec681f3Smrg}
3847ec681f3Smrg
3857ec681f3Smrgvoid
3867ec681f3Smrgnir_lower_blend(nir_shader *shader, nir_lower_blend_options options)
3877ec681f3Smrg{
3887ec681f3Smrg   assert(shader->info.stage == MESA_SHADER_FRAGMENT);
3897ec681f3Smrg
3907ec681f3Smrg   nir_shader_instructions_pass(shader, nir_lower_blend_instr,
3917ec681f3Smrg         nir_metadata_block_index | nir_metadata_dominance, &options);
3927ec681f3Smrg}
393