nir_lower_blend.c revision 024565cb
1/* 2 * Copyright (C) 2019-2021 Collabora, Ltd. 3 * Copyright (C) 2019 Alyssa Rosenzweig 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25/** 26 * @file 27 * 28 * Implements the fragment pipeline (blending and writeout) in software, to be 29 * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment 30 * shader variant on typical GPUs. This pass is useful if hardware lacks 31 * fixed-function blending in part or in full. 32 */ 33 34#include "compiler/nir/nir.h" 35#include "compiler/nir/nir_builder.h" 36#include "compiler/nir/nir_format_convert.h" 37#include "nir_lower_blend.h" 38 39/* Given processed factors, combine them per a blend function */ 40 41static nir_ssa_def * 42nir_blend_func( 43 nir_builder *b, 44 enum blend_func func, 45 nir_ssa_def *src, nir_ssa_def *dst) 46{ 47 switch (func) { 48 case BLEND_FUNC_ADD: 49 return nir_fadd(b, src, dst); 50 case BLEND_FUNC_SUBTRACT: 51 return nir_fsub(b, src, dst); 52 case BLEND_FUNC_REVERSE_SUBTRACT: 53 return nir_fsub(b, dst, src); 54 case BLEND_FUNC_MIN: 55 return nir_fmin(b, src, dst); 56 case BLEND_FUNC_MAX: 57 return nir_fmax(b, src, dst); 58 } 59 60 unreachable("Invalid blend function"); 61} 62 63/* Does this blend function multiply by a blend factor? */ 64 65static bool 66nir_blend_factored(enum blend_func func) 67{ 68 switch (func) { 69 case BLEND_FUNC_ADD: 70 case BLEND_FUNC_SUBTRACT: 71 case BLEND_FUNC_REVERSE_SUBTRACT: 72 return true; 73 default: 74 return false; 75 } 76} 77 78/* Compute a src_alpha_saturate factor */ 79static nir_ssa_def * 80nir_alpha_saturate( 81 nir_builder *b, 82 nir_ssa_def *src, nir_ssa_def *dst, 83 unsigned chan) 84{ 85 nir_ssa_def *Asrc = nir_channel(b, src, 3); 86 nir_ssa_def *Adst = nir_channel(b, dst, 3); 87 nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size); 88 nir_ssa_def *Adsti = nir_fsub(b, one, Adst); 89 90 return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one; 91} 92 93/* Returns a scalar single factor, unmultiplied */ 94 95static nir_ssa_def * 96nir_blend_factor_value( 97 nir_builder *b, 98 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst, 99 unsigned chan, 100 enum blend_factor factor) 101{ 102 switch (factor) { 103 case BLEND_FACTOR_ZERO: 104 return nir_imm_floatN_t(b, 0.0, src->bit_size); 105 case BLEND_FACTOR_SRC_COLOR: 106 return nir_channel(b, src, chan); 107 case BLEND_FACTOR_SRC1_COLOR: 108 return nir_channel(b, src1, chan); 109 case BLEND_FACTOR_DST_COLOR: 110 return nir_channel(b, dst, chan); 111 case BLEND_FACTOR_SRC_ALPHA: 112 return nir_channel(b, src, 3); 113 case BLEND_FACTOR_SRC1_ALPHA: 114 return nir_channel(b, src1, 3); 115 case BLEND_FACTOR_DST_ALPHA: 116 return nir_channel(b, dst, 3); 117 case BLEND_FACTOR_CONSTANT_COLOR: 118 return nir_channel(b, bconst, chan); 119 case BLEND_FACTOR_CONSTANT_ALPHA: 120 return nir_channel(b, bconst, 3); 121 case BLEND_FACTOR_SRC_ALPHA_SATURATE: 122 return nir_alpha_saturate(b, src, dst, chan); 123 } 124 125 unreachable("Invalid blend factor"); 126} 127 128static nir_ssa_def * 129nir_blend_factor( 130 nir_builder *b, 131 nir_ssa_def *raw_scalar, 132 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst, 133 unsigned chan, 134 enum blend_factor factor, 135 bool inverted) 136{ 137 nir_ssa_def *f = 138 nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor); 139 140 if (inverted) 141 f = nir_fadd_imm(b, nir_fneg(b, f), 1.0); 142 143 return nir_fmul(b, raw_scalar, f); 144} 145 146/* Given a colormask, "blend" with the destination */ 147 148static nir_ssa_def * 149nir_color_mask( 150 nir_builder *b, 151 unsigned mask, 152 nir_ssa_def *src, 153 nir_ssa_def *dst) 154{ 155 return nir_vec4(b, 156 nir_channel(b, (mask & (1 << 0)) ? src : dst, 0), 157 nir_channel(b, (mask & (1 << 1)) ? src : dst, 1), 158 nir_channel(b, (mask & (1 << 2)) ? src : dst, 2), 159 nir_channel(b, (mask & (1 << 3)) ? src : dst, 3)); 160} 161 162static nir_ssa_def * 163nir_logicop_func( 164 nir_builder *b, 165 unsigned func, 166 nir_ssa_def *src, nir_ssa_def *dst) 167{ 168 switch (func) { 169 case PIPE_LOGICOP_CLEAR: 170 return nir_imm_ivec4(b, 0, 0, 0, 0); 171 case PIPE_LOGICOP_NOR: 172 return nir_inot(b, nir_ior(b, src, dst)); 173 case PIPE_LOGICOP_AND_INVERTED: 174 return nir_iand(b, nir_inot(b, src), dst); 175 case PIPE_LOGICOP_COPY_INVERTED: 176 return nir_inot(b, src); 177 case PIPE_LOGICOP_AND_REVERSE: 178 return nir_iand(b, src, nir_inot(b, dst)); 179 case PIPE_LOGICOP_INVERT: 180 return nir_inot(b, dst); 181 case PIPE_LOGICOP_XOR: 182 return nir_ixor(b, src, dst); 183 case PIPE_LOGICOP_NAND: 184 return nir_inot(b, nir_iand(b, src, dst)); 185 case PIPE_LOGICOP_AND: 186 return nir_iand(b, src, dst); 187 case PIPE_LOGICOP_EQUIV: 188 return nir_inot(b, nir_ixor(b, src, dst)); 189 case PIPE_LOGICOP_NOOP: 190 return dst; 191 case PIPE_LOGICOP_OR_INVERTED: 192 return nir_ior(b, nir_inot(b, src), dst); 193 case PIPE_LOGICOP_COPY: 194 return src; 195 case PIPE_LOGICOP_OR_REVERSE: 196 return nir_ior(b, src, nir_inot(b, dst)); 197 case PIPE_LOGICOP_OR: 198 return nir_ior(b, src, dst); 199 case PIPE_LOGICOP_SET: 200 return nir_imm_ivec4(b, ~0, ~0, ~0, ~0); 201 } 202 203 unreachable("Invalid logciop function"); 204} 205 206static nir_ssa_def * 207nir_blend_logicop( 208 nir_builder *b, 209 nir_lower_blend_options options, 210 unsigned rt, 211 nir_ssa_def *src, nir_ssa_def *dst) 212{ 213 unsigned bit_size = src->bit_size; 214 const struct util_format_description *format_desc = 215 util_format_description(options.format[rt]); 216 217 if (bit_size != 32) { 218 src = nir_f2f32(b, src); 219 dst = nir_f2f32(b, dst); 220 } 221 222 assert(src->num_components <= 4); 223 assert(dst->num_components <= 4); 224 225 unsigned bits[4]; 226 for (int i = 0; i < 4; ++i) 227 bits[i] = format_desc->channel[i].size; 228 229 src = nir_format_float_to_unorm(b, src, bits); 230 dst = nir_format_float_to_unorm(b, dst, bits); 231 232 nir_ssa_def *out = nir_logicop_func(b, options.logicop_func, src, dst); 233 234 if (bits[0] < 32) { 235 nir_const_value mask[4]; 236 for (int i = 0; i < 4; ++i) 237 mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32); 238 239 out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask)); 240 } 241 242 out = nir_format_unorm_to_float(b, out, bits); 243 244 if (bit_size == 16) 245 out = nir_f2f16(b, out); 246 247 return out; 248} 249 250/* Given a blend state, the source color, and the destination color, 251 * return the blended color 252 */ 253 254static nir_ssa_def * 255nir_blend( 256 nir_builder *b, 257 nir_lower_blend_options options, 258 unsigned rt, 259 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst) 260{ 261 /* Grab the blend constant ahead of time */ 262 nir_ssa_def *bconst; 263 if (options.scalar_blend_const) { 264 bconst = nir_vec4(b, 265 nir_load_blend_const_color_r_float(b), 266 nir_load_blend_const_color_g_float(b), 267 nir_load_blend_const_color_b_float(b), 268 nir_load_blend_const_color_a_float(b)); 269 } else { 270 bconst = nir_load_blend_const_color_rgba(b); 271 } 272 273 if (src->bit_size == 16) 274 bconst = nir_f2f16(b, bconst); 275 276 /* Fixed-point framebuffers require their inputs clamped. */ 277 enum pipe_format format = options.format[rt]; 278 279 if (!util_format_is_float(format)) 280 src = nir_fsat(b, src); 281 282 /* DST_ALPHA reads back 1.0 if there is no alpha channel */ 283 const struct util_format_description *desc = 284 util_format_description(format); 285 286 if (desc->nr_channels < 4) { 287 nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size); 288 nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size); 289 290 dst = nir_vec4(b, nir_channel(b, dst, 0), 291 desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero, 292 desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero, 293 desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one); 294 } 295 296 /* We blend per channel and recombine later */ 297 nir_ssa_def *channels[4]; 298 299 for (unsigned c = 0; c < 4; ++c) { 300 /* Decide properties based on channel */ 301 nir_lower_blend_channel chan = 302 (c < 3) ? options.rt[rt].rgb : options.rt[rt].alpha; 303 304 nir_ssa_def *psrc = nir_channel(b, src, c); 305 nir_ssa_def *pdst = nir_channel(b, dst, c); 306 307 if (nir_blend_factored(chan.func)) { 308 psrc = nir_blend_factor( 309 b, psrc, 310 src, src1, dst, bconst, c, 311 chan.src_factor, chan.invert_src_factor); 312 313 pdst = nir_blend_factor( 314 b, pdst, 315 src, src1, dst, bconst, c, 316 chan.dst_factor, chan.invert_dst_factor); 317 } 318 319 channels[c] = nir_blend_func(b, chan.func, psrc, pdst); 320 } 321 322 return nir_vec(b, channels, 4); 323} 324 325static bool 326nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data) 327{ 328 nir_lower_blend_options *options = data; 329 if (instr->type != nir_instr_type_intrinsic) 330 return false; 331 332 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 333 if (intr->intrinsic != nir_intrinsic_store_deref) 334 return false; 335 336 nir_variable *var = nir_intrinsic_get_var(intr, 0); 337 if (var->data.mode != nir_var_shader_out || 338 (var->data.location != FRAG_RESULT_COLOR && 339 var->data.location < FRAG_RESULT_DATA0)) 340 return false; 341 342 /* Determine render target for per-RT blending */ 343 unsigned rt = 344 (var->data.location == FRAG_RESULT_COLOR) ? 0 : 345 (var->data.location - FRAG_RESULT_DATA0); 346 347 /* No blend lowering requested on this RT */ 348 if (options->format[rt] == PIPE_FORMAT_NONE) 349 return false; 350 351 b->cursor = nir_before_instr(instr); 352 353 /* Grab the input color */ 354 unsigned src_num_comps = nir_src_num_components(intr->src[1]); 355 nir_ssa_def *src = 356 nir_pad_vector(b, nir_ssa_for_src(b, intr->src[1], src_num_comps), 4); 357 358 /* Grab the previous fragment color */ 359 var->data.fb_fetch_output = true; 360 b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location); 361 b->shader->info.fs.uses_fbfetch_output = true; 362 nir_ssa_def *dst = nir_load_var(b, var); 363 364 /* Blend the two colors per the passed options */ 365 nir_ssa_def *blended = src; 366 367 if (options->logicop_enable) { 368 blended = nir_blend_logicop(b, *options, rt, src, dst); 369 } else if (!util_format_is_pure_integer(options->format[rt])) { 370 assert(!util_format_is_scaled(options->format[rt])); 371 blended = nir_blend(b, *options, rt, src, options->src1, dst); 372 } 373 374 /* Apply a colormask */ 375 blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst); 376 377 if (src_num_comps != 4) 378 blended = nir_channels(b, blended, (nir_component_mask_t)BITFIELD_MASK(src_num_comps)); 379 380 /* Write out the final color instead of the input */ 381 nir_instr_rewrite_src_ssa(instr, &intr->src[1], blended); 382 return true; 383} 384 385void 386nir_lower_blend(nir_shader *shader, nir_lower_blend_options options) 387{ 388 assert(shader->info.stage == MESA_SHADER_FRAGMENT); 389 390 nir_shader_instructions_pass(shader, nir_lower_blend_instr, 391 nir_metadata_block_index | nir_metadata_dominance, &options); 392} 393