17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2019-2021 Collabora, Ltd. 37ec681f3Smrg * Copyright (C) 2019 Alyssa Rosenzweig 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 217ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 227ec681f3Smrg * IN THE SOFTWARE. 237ec681f3Smrg */ 247ec681f3Smrg 257ec681f3Smrg/** 267ec681f3Smrg * @file 277ec681f3Smrg * 287ec681f3Smrg * Implements the fragment pipeline (blending and writeout) in software, to be 297ec681f3Smrg * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment 307ec681f3Smrg * shader variant on typical GPUs. This pass is useful if hardware lacks 317ec681f3Smrg * fixed-function blending in part or in full. 327ec681f3Smrg */ 337ec681f3Smrg 347ec681f3Smrg#include "compiler/nir/nir.h" 357ec681f3Smrg#include "compiler/nir/nir_builder.h" 367ec681f3Smrg#include "compiler/nir/nir_format_convert.h" 377ec681f3Smrg#include "nir_lower_blend.h" 387ec681f3Smrg 397ec681f3Smrg/* Given processed factors, combine them per a blend function */ 407ec681f3Smrg 417ec681f3Smrgstatic nir_ssa_def * 427ec681f3Smrgnir_blend_func( 437ec681f3Smrg nir_builder *b, 447ec681f3Smrg enum blend_func func, 457ec681f3Smrg nir_ssa_def *src, nir_ssa_def *dst) 467ec681f3Smrg{ 477ec681f3Smrg switch (func) { 487ec681f3Smrg case BLEND_FUNC_ADD: 497ec681f3Smrg return nir_fadd(b, src, dst); 507ec681f3Smrg case BLEND_FUNC_SUBTRACT: 517ec681f3Smrg return nir_fsub(b, src, dst); 527ec681f3Smrg case BLEND_FUNC_REVERSE_SUBTRACT: 537ec681f3Smrg return nir_fsub(b, dst, src); 547ec681f3Smrg case BLEND_FUNC_MIN: 557ec681f3Smrg return nir_fmin(b, src, dst); 567ec681f3Smrg case BLEND_FUNC_MAX: 577ec681f3Smrg return nir_fmax(b, src, dst); 587ec681f3Smrg } 597ec681f3Smrg 607ec681f3Smrg unreachable("Invalid blend function"); 617ec681f3Smrg} 627ec681f3Smrg 637ec681f3Smrg/* Does this blend function multiply by a blend factor? */ 647ec681f3Smrg 657ec681f3Smrgstatic bool 667ec681f3Smrgnir_blend_factored(enum blend_func func) 677ec681f3Smrg{ 687ec681f3Smrg switch (func) { 697ec681f3Smrg case BLEND_FUNC_ADD: 707ec681f3Smrg case BLEND_FUNC_SUBTRACT: 717ec681f3Smrg case BLEND_FUNC_REVERSE_SUBTRACT: 727ec681f3Smrg return true; 737ec681f3Smrg default: 747ec681f3Smrg return false; 757ec681f3Smrg } 767ec681f3Smrg} 777ec681f3Smrg 787ec681f3Smrg/* Compute a src_alpha_saturate factor */ 797ec681f3Smrgstatic nir_ssa_def * 807ec681f3Smrgnir_alpha_saturate( 817ec681f3Smrg nir_builder *b, 827ec681f3Smrg nir_ssa_def *src, nir_ssa_def *dst, 837ec681f3Smrg unsigned chan) 847ec681f3Smrg{ 857ec681f3Smrg nir_ssa_def *Asrc = nir_channel(b, src, 3); 867ec681f3Smrg nir_ssa_def *Adst = nir_channel(b, dst, 3); 877ec681f3Smrg nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size); 887ec681f3Smrg nir_ssa_def *Adsti = nir_fsub(b, one, Adst); 897ec681f3Smrg 907ec681f3Smrg return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one; 917ec681f3Smrg} 927ec681f3Smrg 937ec681f3Smrg/* Returns a scalar single factor, unmultiplied */ 947ec681f3Smrg 957ec681f3Smrgstatic nir_ssa_def * 967ec681f3Smrgnir_blend_factor_value( 977ec681f3Smrg nir_builder *b, 987ec681f3Smrg nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst, 997ec681f3Smrg unsigned chan, 1007ec681f3Smrg enum blend_factor factor) 1017ec681f3Smrg{ 1027ec681f3Smrg switch (factor) { 1037ec681f3Smrg case BLEND_FACTOR_ZERO: 1047ec681f3Smrg return nir_imm_floatN_t(b, 0.0, src->bit_size); 1057ec681f3Smrg case BLEND_FACTOR_SRC_COLOR: 1067ec681f3Smrg return nir_channel(b, src, chan); 1077ec681f3Smrg case BLEND_FACTOR_SRC1_COLOR: 1087ec681f3Smrg return nir_channel(b, src1, chan); 1097ec681f3Smrg case BLEND_FACTOR_DST_COLOR: 1107ec681f3Smrg return nir_channel(b, dst, chan); 1117ec681f3Smrg case BLEND_FACTOR_SRC_ALPHA: 1127ec681f3Smrg return nir_channel(b, src, 3); 1137ec681f3Smrg case BLEND_FACTOR_SRC1_ALPHA: 1147ec681f3Smrg return nir_channel(b, src1, 3); 1157ec681f3Smrg case BLEND_FACTOR_DST_ALPHA: 1167ec681f3Smrg return nir_channel(b, dst, 3); 1177ec681f3Smrg case BLEND_FACTOR_CONSTANT_COLOR: 1187ec681f3Smrg return nir_channel(b, bconst, chan); 1197ec681f3Smrg case BLEND_FACTOR_CONSTANT_ALPHA: 1207ec681f3Smrg return nir_channel(b, bconst, 3); 1217ec681f3Smrg case BLEND_FACTOR_SRC_ALPHA_SATURATE: 1227ec681f3Smrg return nir_alpha_saturate(b, src, dst, chan); 1237ec681f3Smrg } 1247ec681f3Smrg 1257ec681f3Smrg unreachable("Invalid blend factor"); 1267ec681f3Smrg} 1277ec681f3Smrg 1287ec681f3Smrgstatic nir_ssa_def * 1297ec681f3Smrgnir_blend_factor( 1307ec681f3Smrg nir_builder *b, 1317ec681f3Smrg nir_ssa_def *raw_scalar, 1327ec681f3Smrg nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst, 1337ec681f3Smrg unsigned chan, 1347ec681f3Smrg enum blend_factor factor, 1357ec681f3Smrg bool inverted) 1367ec681f3Smrg{ 1377ec681f3Smrg nir_ssa_def *f = 1387ec681f3Smrg nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor); 1397ec681f3Smrg 1407ec681f3Smrg if (inverted) 1417ec681f3Smrg f = nir_fadd_imm(b, nir_fneg(b, f), 1.0); 1427ec681f3Smrg 1437ec681f3Smrg return nir_fmul(b, raw_scalar, f); 1447ec681f3Smrg} 1457ec681f3Smrg 1467ec681f3Smrg/* Given a colormask, "blend" with the destination */ 1477ec681f3Smrg 1487ec681f3Smrgstatic nir_ssa_def * 1497ec681f3Smrgnir_color_mask( 1507ec681f3Smrg nir_builder *b, 1517ec681f3Smrg unsigned mask, 1527ec681f3Smrg nir_ssa_def *src, 1537ec681f3Smrg nir_ssa_def *dst) 1547ec681f3Smrg{ 1557ec681f3Smrg return nir_vec4(b, 1567ec681f3Smrg nir_channel(b, (mask & (1 << 0)) ? src : dst, 0), 1577ec681f3Smrg nir_channel(b, (mask & (1 << 1)) ? src : dst, 1), 1587ec681f3Smrg nir_channel(b, (mask & (1 << 2)) ? src : dst, 2), 1597ec681f3Smrg nir_channel(b, (mask & (1 << 3)) ? src : dst, 3)); 1607ec681f3Smrg} 1617ec681f3Smrg 1627ec681f3Smrgstatic nir_ssa_def * 1637ec681f3Smrgnir_logicop_func( 1647ec681f3Smrg nir_builder *b, 1657ec681f3Smrg unsigned func, 1667ec681f3Smrg nir_ssa_def *src, nir_ssa_def *dst) 1677ec681f3Smrg{ 1687ec681f3Smrg switch (func) { 1697ec681f3Smrg case PIPE_LOGICOP_CLEAR: 1707ec681f3Smrg return nir_imm_ivec4(b, 0, 0, 0, 0); 1717ec681f3Smrg case PIPE_LOGICOP_NOR: 1727ec681f3Smrg return nir_inot(b, nir_ior(b, src, dst)); 1737ec681f3Smrg case PIPE_LOGICOP_AND_INVERTED: 1747ec681f3Smrg return nir_iand(b, nir_inot(b, src), dst); 1757ec681f3Smrg case PIPE_LOGICOP_COPY_INVERTED: 1767ec681f3Smrg return nir_inot(b, src); 1777ec681f3Smrg case PIPE_LOGICOP_AND_REVERSE: 1787ec681f3Smrg return nir_iand(b, src, nir_inot(b, dst)); 1797ec681f3Smrg case PIPE_LOGICOP_INVERT: 1807ec681f3Smrg return nir_inot(b, dst); 1817ec681f3Smrg case PIPE_LOGICOP_XOR: 1827ec681f3Smrg return nir_ixor(b, src, dst); 1837ec681f3Smrg case PIPE_LOGICOP_NAND: 1847ec681f3Smrg return nir_inot(b, nir_iand(b, src, dst)); 1857ec681f3Smrg case PIPE_LOGICOP_AND: 1867ec681f3Smrg return nir_iand(b, src, dst); 1877ec681f3Smrg case PIPE_LOGICOP_EQUIV: 1887ec681f3Smrg return nir_inot(b, nir_ixor(b, src, dst)); 1897ec681f3Smrg case PIPE_LOGICOP_NOOP: 1907ec681f3Smrg return dst; 1917ec681f3Smrg case PIPE_LOGICOP_OR_INVERTED: 1927ec681f3Smrg return nir_ior(b, nir_inot(b, src), dst); 1937ec681f3Smrg case PIPE_LOGICOP_COPY: 1947ec681f3Smrg return src; 1957ec681f3Smrg case PIPE_LOGICOP_OR_REVERSE: 1967ec681f3Smrg return nir_ior(b, src, nir_inot(b, dst)); 1977ec681f3Smrg case PIPE_LOGICOP_OR: 1987ec681f3Smrg return nir_ior(b, src, dst); 1997ec681f3Smrg case PIPE_LOGICOP_SET: 2007ec681f3Smrg return nir_imm_ivec4(b, ~0, ~0, ~0, ~0); 2017ec681f3Smrg } 2027ec681f3Smrg 2037ec681f3Smrg unreachable("Invalid logciop function"); 2047ec681f3Smrg} 2057ec681f3Smrg 2067ec681f3Smrgstatic nir_ssa_def * 2077ec681f3Smrgnir_blend_logicop( 2087ec681f3Smrg nir_builder *b, 2097ec681f3Smrg nir_lower_blend_options options, 2107ec681f3Smrg unsigned rt, 2117ec681f3Smrg nir_ssa_def *src, nir_ssa_def *dst) 2127ec681f3Smrg{ 2137ec681f3Smrg unsigned bit_size = src->bit_size; 2147ec681f3Smrg const struct util_format_description *format_desc = 2157ec681f3Smrg util_format_description(options.format[rt]); 2167ec681f3Smrg 2177ec681f3Smrg if (bit_size != 32) { 2187ec681f3Smrg src = nir_f2f32(b, src); 2197ec681f3Smrg dst = nir_f2f32(b, dst); 2207ec681f3Smrg } 2217ec681f3Smrg 2227ec681f3Smrg assert(src->num_components <= 4); 2237ec681f3Smrg assert(dst->num_components <= 4); 2247ec681f3Smrg 2257ec681f3Smrg unsigned bits[4]; 2267ec681f3Smrg for (int i = 0; i < 4; ++i) 2277ec681f3Smrg bits[i] = format_desc->channel[i].size; 2287ec681f3Smrg 2297ec681f3Smrg src = nir_format_float_to_unorm(b, src, bits); 2307ec681f3Smrg dst = nir_format_float_to_unorm(b, dst, bits); 2317ec681f3Smrg 2327ec681f3Smrg nir_ssa_def *out = nir_logicop_func(b, options.logicop_func, src, dst); 2337ec681f3Smrg 2347ec681f3Smrg if (bits[0] < 32) { 2357ec681f3Smrg nir_const_value mask[4]; 2367ec681f3Smrg for (int i = 0; i < 4; ++i) 2377ec681f3Smrg mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32); 2387ec681f3Smrg 2397ec681f3Smrg out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask)); 2407ec681f3Smrg } 2417ec681f3Smrg 2427ec681f3Smrg out = nir_format_unorm_to_float(b, out, bits); 2437ec681f3Smrg 2447ec681f3Smrg if (bit_size == 16) 2457ec681f3Smrg out = nir_f2f16(b, out); 2467ec681f3Smrg 2477ec681f3Smrg return out; 2487ec681f3Smrg} 2497ec681f3Smrg 2507ec681f3Smrg/* Given a blend state, the source color, and the destination color, 2517ec681f3Smrg * return the blended color 2527ec681f3Smrg */ 2537ec681f3Smrg 2547ec681f3Smrgstatic nir_ssa_def * 2557ec681f3Smrgnir_blend( 2567ec681f3Smrg nir_builder *b, 2577ec681f3Smrg nir_lower_blend_options options, 2587ec681f3Smrg unsigned rt, 2597ec681f3Smrg nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst) 2607ec681f3Smrg{ 2617ec681f3Smrg /* Grab the blend constant ahead of time */ 2627ec681f3Smrg nir_ssa_def *bconst; 2637ec681f3Smrg if (options.scalar_blend_const) { 2647ec681f3Smrg bconst = nir_vec4(b, 2657ec681f3Smrg nir_load_blend_const_color_r_float(b), 2667ec681f3Smrg nir_load_blend_const_color_g_float(b), 2677ec681f3Smrg nir_load_blend_const_color_b_float(b), 2687ec681f3Smrg nir_load_blend_const_color_a_float(b)); 2697ec681f3Smrg } else { 2707ec681f3Smrg bconst = nir_load_blend_const_color_rgba(b); 2717ec681f3Smrg } 2727ec681f3Smrg 2737ec681f3Smrg if (src->bit_size == 16) 2747ec681f3Smrg bconst = nir_f2f16(b, bconst); 2757ec681f3Smrg 2767ec681f3Smrg /* Fixed-point framebuffers require their inputs clamped. */ 2777ec681f3Smrg enum pipe_format format = options.format[rt]; 2787ec681f3Smrg 2797ec681f3Smrg if (!util_format_is_float(format)) 2807ec681f3Smrg src = nir_fsat(b, src); 2817ec681f3Smrg 2827ec681f3Smrg /* DST_ALPHA reads back 1.0 if there is no alpha channel */ 2837ec681f3Smrg const struct util_format_description *desc = 2847ec681f3Smrg util_format_description(format); 2857ec681f3Smrg 2867ec681f3Smrg if (desc->nr_channels < 4) { 2877ec681f3Smrg nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size); 2887ec681f3Smrg nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size); 2897ec681f3Smrg 2907ec681f3Smrg dst = nir_vec4(b, nir_channel(b, dst, 0), 2917ec681f3Smrg desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero, 2927ec681f3Smrg desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero, 2937ec681f3Smrg desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one); 2947ec681f3Smrg } 2957ec681f3Smrg 2967ec681f3Smrg /* We blend per channel and recombine later */ 2977ec681f3Smrg nir_ssa_def *channels[4]; 2987ec681f3Smrg 2997ec681f3Smrg for (unsigned c = 0; c < 4; ++c) { 3007ec681f3Smrg /* Decide properties based on channel */ 3017ec681f3Smrg nir_lower_blend_channel chan = 3027ec681f3Smrg (c < 3) ? options.rt[rt].rgb : options.rt[rt].alpha; 3037ec681f3Smrg 3047ec681f3Smrg nir_ssa_def *psrc = nir_channel(b, src, c); 3057ec681f3Smrg nir_ssa_def *pdst = nir_channel(b, dst, c); 3067ec681f3Smrg 3077ec681f3Smrg if (nir_blend_factored(chan.func)) { 3087ec681f3Smrg psrc = nir_blend_factor( 3097ec681f3Smrg b, psrc, 3107ec681f3Smrg src, src1, dst, bconst, c, 3117ec681f3Smrg chan.src_factor, chan.invert_src_factor); 3127ec681f3Smrg 3137ec681f3Smrg pdst = nir_blend_factor( 3147ec681f3Smrg b, pdst, 3157ec681f3Smrg src, src1, dst, bconst, c, 3167ec681f3Smrg chan.dst_factor, chan.invert_dst_factor); 3177ec681f3Smrg } 3187ec681f3Smrg 3197ec681f3Smrg channels[c] = nir_blend_func(b, chan.func, psrc, pdst); 3207ec681f3Smrg } 3217ec681f3Smrg 3227ec681f3Smrg return nir_vec(b, channels, 4); 3237ec681f3Smrg} 3247ec681f3Smrg 3257ec681f3Smrgstatic bool 3267ec681f3Smrgnir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) 3277ec681f3Smrg{ 3287ec681f3Smrg nir_lower_blend_options *options = data; 3297ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 3307ec681f3Smrg return false; 3317ec681f3Smrg 3327ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 3337ec681f3Smrg if (intr->intrinsic != nir_intrinsic_store_deref) 3347ec681f3Smrg return false; 3357ec681f3Smrg 3367ec681f3Smrg nir_variable *var = nir_intrinsic_get_var(intr, 0); 3377ec681f3Smrg if (var->data.mode != nir_var_shader_out || 3387ec681f3Smrg (var->data.location != FRAG_RESULT_COLOR && 3397ec681f3Smrg var->data.location < FRAG_RESULT_DATA0)) 3407ec681f3Smrg return false; 3417ec681f3Smrg 3427ec681f3Smrg /* Determine render target for per-RT blending */ 3437ec681f3Smrg unsigned rt = 3447ec681f3Smrg (var->data.location == FRAG_RESULT_COLOR) ? 0 : 3457ec681f3Smrg (var->data.location - FRAG_RESULT_DATA0); 3467ec681f3Smrg 3477ec681f3Smrg /* No blend lowering requested on this RT */ 3487ec681f3Smrg if (options->format[rt] == PIPE_FORMAT_NONE) 3497ec681f3Smrg return false; 3507ec681f3Smrg 3517ec681f3Smrg b->cursor = nir_before_instr(instr); 3527ec681f3Smrg 3537ec681f3Smrg /* Grab the input color */ 3547ec681f3Smrg unsigned src_num_comps = nir_src_num_components(intr->src[1]); 3557ec681f3Smrg nir_ssa_def *src = 3567ec681f3Smrg nir_pad_vector(b, nir_ssa_for_src(b, intr->src[1], src_num_comps), 4); 3577ec681f3Smrg 3587ec681f3Smrg /* Grab the previous fragment color */ 3597ec681f3Smrg var->data.fb_fetch_output = true; 3607ec681f3Smrg b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location); 3617ec681f3Smrg b->shader->info.fs.uses_fbfetch_output = true; 3627ec681f3Smrg nir_ssa_def *dst = nir_load_var(b, var); 3637ec681f3Smrg 3647ec681f3Smrg /* Blend the two colors per the passed options */ 3657ec681f3Smrg nir_ssa_def *blended = src; 3667ec681f3Smrg 3677ec681f3Smrg if (options->logicop_enable) { 3687ec681f3Smrg blended = nir_blend_logicop(b, *options, rt, src, dst); 3697ec681f3Smrg } else if (!util_format_is_pure_integer(options->format[rt])) { 3707ec681f3Smrg assert(!util_format_is_scaled(options->format[rt])); 3717ec681f3Smrg blended = nir_blend(b, *options, rt, src, options->src1, dst); 3727ec681f3Smrg } 3737ec681f3Smrg 3747ec681f3Smrg /* Apply a colormask */ 3757ec681f3Smrg blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst); 3767ec681f3Smrg 3777ec681f3Smrg if (src_num_comps != 4) 378024565cbSchristos blended = nir_channels(b, blended, (nir_component_mask_t)BITFIELD_MASK(src_num_comps)); 3797ec681f3Smrg 3807ec681f3Smrg /* Write out the final color instead of the input */ 3817ec681f3Smrg nir_instr_rewrite_src_ssa(instr, &intr->src[1], blended); 3827ec681f3Smrg return true; 3837ec681f3Smrg} 3847ec681f3Smrg 3857ec681f3Smrgvoid 3867ec681f3Smrgnir_lower_blend(nir_shader *shader, nir_lower_blend_options options) 3877ec681f3Smrg{ 3887ec681f3Smrg assert(shader->info.stage == MESA_SHADER_FRAGMENT); 3897ec681f3Smrg 3907ec681f3Smrg nir_shader_instructions_pass(shader, nir_lower_blend_instr, 3917ec681f3Smrg nir_metadata_block_index | nir_metadata_dominance, &options); 3927ec681f3Smrg} 393