1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29/** 30 * @file 31 * Blend LLVM IR generation -- AoS layout. 32 * 33 * AoS blending is in general much slower than SoA, but there are some cases 34 * where it might be faster. In particular, if a pixel is rendered only once 35 * then the overhead of tiling and untiling will dominate over the speedup that 36 * SoA gives. So we might want to detect such cases and fallback to AoS in the 37 * future, but for now this function is here for historical/benchmarking 38 * purposes. 39 * 40 * Run lp_blend_test after any change to this file. 41 * 42 * @author Jose Fonseca <jfonseca@vmware.com> 43 */ 44 45 46#include "pipe/p_state.h" 47#include "util/u_debug.h" 48#include "util/format/u_format.h" 49 50#include "gallivm/lp_bld_type.h" 51#include "gallivm/lp_bld_const.h" 52#include "gallivm/lp_bld_arit.h" 53#include "gallivm/lp_bld_logic.h" 54#include "gallivm/lp_bld_swizzle.h" 55#include "gallivm/lp_bld_bitarit.h" 56#include "gallivm/lp_bld_debug.h" 57 58#include "lp_bld_blend.h" 59 60 61/** 62 * We may the same values several times, so we keep them here to avoid 63 * recomputing them. Also reusing the values allows us to do simplifications 64 * that LLVM optimization passes wouldn't normally be able to do. 65 */ 66struct lp_build_blend_aos_context 67{ 68 struct lp_build_context base; 69 70 LLVMValueRef src; 71 LLVMValueRef src_alpha; 72 LLVMValueRef src1; 73 LLVMValueRef src1_alpha; 74 LLVMValueRef dst; 75 LLVMValueRef const_; 76 LLVMValueRef const_alpha; 77 boolean has_dst_alpha; 78 79 LLVMValueRef inv_src; 80 LLVMValueRef inv_src_alpha; 81 LLVMValueRef inv_dst; 82 LLVMValueRef inv_const; 83 LLVMValueRef inv_const_alpha; 84 LLVMValueRef saturate; 85 86 LLVMValueRef rgb_src_factor; 87 LLVMValueRef alpha_src_factor; 88 LLVMValueRef rgb_dst_factor; 89 LLVMValueRef alpha_dst_factor; 90}; 91 92 93static LLVMValueRef 94lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, 95 unsigned factor, 96 boolean alpha) 97{ 98 LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src; 99 LLVMValueRef src1_alpha = bld->src1_alpha ? bld->src1_alpha : bld->src1; 100 LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_; 101 102 switch (factor) { 103 case PIPE_BLENDFACTOR_ZERO: 104 return bld->base.zero; 105 case PIPE_BLENDFACTOR_ONE: 106 return bld->base.one; 107 case PIPE_BLENDFACTOR_SRC_COLOR: 108 return bld->src; 109 case PIPE_BLENDFACTOR_SRC_ALPHA: 110 return src_alpha; 111 case PIPE_BLENDFACTOR_DST_COLOR: 112 case PIPE_BLENDFACTOR_DST_ALPHA: 113 return bld->dst; 114 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 115 if (alpha) 116 return bld->base.one; 117 else { 118 /* 119 * If there's no dst alpha the complement is zero but for unclamped 120 * float inputs (or snorm inputs) min can be non-zero (negative). 121 */ 122 if (!bld->saturate) { 123 if (!bld->has_dst_alpha) { 124 bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero); 125 } 126 else if (bld->base.type.norm && bld->base.type.sign) { 127 /* 128 * The complement/min totally doesn't work, since 129 * the complement is in range [0,2] but the other 130 * min input is [-1,1]. However, we can just clamp to 0 131 * before doing the complement... 132 */ 133 LLVMValueRef inv_dst; 134 inv_dst = lp_build_max(&bld->base, bld->base.zero, bld->dst); 135 inv_dst = lp_build_comp(&bld->base, inv_dst); 136 bld->saturate = lp_build_min(&bld->base, src_alpha, inv_dst); 137 } else { 138 if (!bld->inv_dst) { 139 bld->inv_dst = lp_build_comp(&bld->base, bld->dst); 140 } 141 bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst); 142 } 143 } 144 return bld->saturate; 145 } 146 case PIPE_BLENDFACTOR_CONST_COLOR: 147 return bld->const_; 148 case PIPE_BLENDFACTOR_CONST_ALPHA: 149 return const_alpha; 150 case PIPE_BLENDFACTOR_SRC1_COLOR: 151 return bld->src1; 152 case PIPE_BLENDFACTOR_SRC1_ALPHA: 153 return src1_alpha; 154 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 155 if (!bld->inv_src) 156 bld->inv_src = lp_build_comp(&bld->base, bld->src); 157 return bld->inv_src; 158 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 159 if (!bld->inv_src_alpha) 160 bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha); 161 return bld->inv_src_alpha; 162 case PIPE_BLENDFACTOR_INV_DST_COLOR: 163 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 164 if (!bld->inv_dst) 165 bld->inv_dst = lp_build_comp(&bld->base, bld->dst); 166 return bld->inv_dst; 167 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 168 if (!bld->inv_const) 169 bld->inv_const = lp_build_comp(&bld->base, bld->const_); 170 return bld->inv_const; 171 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 172 if (!bld->inv_const_alpha) 173 bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha); 174 return bld->inv_const_alpha; 175 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 176 return lp_build_comp(&bld->base, bld->src1); 177 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 178 return lp_build_comp(&bld->base, src1_alpha); 179 default: 180 assert(0); 181 return bld->base.zero; 182 } 183} 184 185 186enum lp_build_blend_swizzle { 187 LP_BUILD_BLEND_SWIZZLE_RGBA = 0, 188 LP_BUILD_BLEND_SWIZZLE_AAAA = 1 189}; 190 191 192/** 193 * How should we shuffle the base factor. 194 */ 195static enum lp_build_blend_swizzle 196lp_build_blend_factor_swizzle(unsigned factor) 197{ 198 switch (factor) { 199 case PIPE_BLENDFACTOR_ONE: 200 case PIPE_BLENDFACTOR_ZERO: 201 case PIPE_BLENDFACTOR_SRC_COLOR: 202 case PIPE_BLENDFACTOR_DST_COLOR: 203 case PIPE_BLENDFACTOR_CONST_COLOR: 204 case PIPE_BLENDFACTOR_SRC1_COLOR: 205 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 206 case PIPE_BLENDFACTOR_INV_DST_COLOR: 207 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 208 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 209 return LP_BUILD_BLEND_SWIZZLE_RGBA; 210 case PIPE_BLENDFACTOR_SRC_ALPHA: 211 case PIPE_BLENDFACTOR_DST_ALPHA: 212 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 213 case PIPE_BLENDFACTOR_SRC1_ALPHA: 214 case PIPE_BLENDFACTOR_CONST_ALPHA: 215 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 216 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 217 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 218 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 219 return LP_BUILD_BLEND_SWIZZLE_AAAA; 220 default: 221 assert(0); 222 return LP_BUILD_BLEND_SWIZZLE_RGBA; 223 } 224} 225 226 227static LLVMValueRef 228lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, 229 LLVMValueRef rgb, 230 LLVMValueRef alpha, 231 enum lp_build_blend_swizzle rgb_swizzle, 232 unsigned alpha_swizzle, 233 unsigned num_channels) 234{ 235 LLVMValueRef swizzled_rgb; 236 237 switch (rgb_swizzle) { 238 case LP_BUILD_BLEND_SWIZZLE_RGBA: 239 swizzled_rgb = rgb; 240 break; 241 case LP_BUILD_BLEND_SWIZZLE_AAAA: 242 swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels); 243 break; 244 default: 245 assert(0); 246 swizzled_rgb = bld->base.undef; 247 } 248 249 if (rgb != alpha) { 250 swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle, 251 alpha, swizzled_rgb, 252 num_channels); 253 } 254 255 return swizzled_rgb; 256} 257 258/** 259 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml 260 */ 261static LLVMValueRef 262lp_build_blend_factor(struct lp_build_blend_aos_context *bld, 263 unsigned rgb_factor, 264 unsigned alpha_factor, 265 unsigned alpha_swizzle, 266 unsigned num_channels) 267{ 268 LLVMValueRef rgb_factor_, alpha_factor_; 269 enum lp_build_blend_swizzle rgb_swizzle; 270 271 if (alpha_swizzle == PIPE_SWIZZLE_X && num_channels == 1) { 272 return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); 273 } 274 275 rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); 276 277 if (alpha_swizzle != PIPE_SWIZZLE_NONE) { 278 rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); 279 alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); 280 return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, 281 alpha_swizzle, num_channels); 282 } else { 283 return rgb_factor_; 284 } 285} 286 287 288/** 289 * Performs blending of src and dst pixels 290 * 291 * @param blend the blend state of the shader variant 292 * @param cbuf_format format of the colour buffer 293 * @param type data type of the pixel vector 294 * @param rt render target index 295 * @param src blend src 296 * @param src_alpha blend src alpha (if not included in src) 297 * @param src1 second blend src (for dual source blend) 298 * @param src1_alpha second blend src alpha (if not included in src1) 299 * @param dst blend dst 300 * @param mask optional mask to apply to the blending result 301 * @param const_ const blend color 302 * @param const_alpha const blend color alpha (if not included in const_) 303 * @param swizzle swizzle values for RGBA 304 * 305 * @return the result of blending src and dst 306 */ 307LLVMValueRef 308lp_build_blend_aos(struct gallivm_state *gallivm, 309 const struct pipe_blend_state *blend, 310 enum pipe_format cbuf_format, 311 struct lp_type type, 312 unsigned rt, 313 LLVMValueRef src, 314 LLVMValueRef src_alpha, 315 LLVMValueRef src1, 316 LLVMValueRef src1_alpha, 317 LLVMValueRef dst, 318 LLVMValueRef mask, 319 LLVMValueRef const_, 320 LLVMValueRef const_alpha, 321 const unsigned char swizzle[4], 322 int nr_channels) 323{ 324 const struct pipe_rt_blend_state * state = &blend->rt[rt]; 325 const struct util_format_description * desc; 326 struct lp_build_blend_aos_context bld; 327 LLVMValueRef src_factor, dst_factor; 328 LLVMValueRef result; 329 unsigned alpha_swizzle = PIPE_SWIZZLE_NONE; 330 unsigned i; 331 332 desc = util_format_description(cbuf_format); 333 334 /* Setup build context */ 335 memset(&bld, 0, sizeof bld); 336 lp_build_context_init(&bld.base, gallivm, type); 337 bld.src = src; 338 bld.src1 = src1; 339 bld.dst = dst; 340 bld.const_ = const_; 341 bld.src_alpha = src_alpha; 342 bld.src1_alpha = src1_alpha; 343 bld.const_alpha = const_alpha; 344 bld.has_dst_alpha = FALSE; 345 346 /* Find the alpha channel if not provided separately */ 347 if (!src_alpha) { 348 for (i = 0; i < 4; ++i) { 349 if (swizzle[i] == 3) { 350 alpha_swizzle = i; 351 } 352 } 353 /* 354 * Note that we may get src_alpha included from source (and 4 channels) 355 * even if the destination doesn't have an alpha channel (for rgbx 356 * formats). Generally this shouldn't make much of a difference (we're 357 * relying on blend factors being sanitized already if there's no 358 * dst alpha). 359 */ 360 bld.has_dst_alpha = desc->swizzle[3] <= PIPE_SWIZZLE_W; 361 } 362 363 if (blend->logicop_enable) { 364 if (!type.floating) { 365 result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst); 366 } 367 else { 368 result = src; 369 } 370 } else if (!state->blend_enable) { 371 result = src; 372 } else { 373 boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && 374 state->alpha_src_factor == state->alpha_dst_factor) || 375 nr_channels == 1; 376 boolean alpha_only = nr_channels == 1 && alpha_swizzle == PIPE_SWIZZLE_X; 377 378 src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, 379 state->alpha_src_factor, 380 alpha_swizzle, 381 nr_channels); 382 383 dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor, 384 state->alpha_dst_factor, 385 alpha_swizzle, 386 nr_channels); 387 388 result = lp_build_blend(&bld.base, 389 state->rgb_func, 390 alpha_only ? state->alpha_src_factor : state->rgb_src_factor, 391 alpha_only ? state->alpha_dst_factor : state->rgb_dst_factor, 392 src, 393 dst, 394 src_factor, 395 dst_factor, 396 rgb_alpha_same, 397 false); 398 399 if (state->rgb_func != state->alpha_func && nr_channels > 1 && 400 alpha_swizzle != PIPE_SWIZZLE_NONE) { 401 LLVMValueRef alpha; 402 403 alpha = lp_build_blend(&bld.base, 404 state->alpha_func, 405 state->alpha_src_factor, 406 state->alpha_dst_factor, 407 src, 408 dst, 409 src_factor, 410 dst_factor, 411 rgb_alpha_same, 412 false); 413 414 result = lp_build_blend_swizzle(&bld, 415 result, 416 alpha, 417 LP_BUILD_BLEND_SWIZZLE_RGBA, 418 alpha_swizzle, 419 nr_channels); 420 } 421 } 422 423 /* Check if color mask is necessary */ 424 if (!util_format_colormask_full(desc, state->colormask)) { 425 LLVMValueRef color_mask; 426 427 color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, 428 state->colormask, nr_channels, swizzle); 429 lp_build_name(color_mask, "color_mask"); 430 431 /* Combine with input mask if necessary */ 432 if (mask) { 433 /* We can be blending floating values but masks are always integer... */ 434 unsigned floating = bld.base.type.floating; 435 bld.base.type.floating = 0; 436 437 mask = lp_build_and(&bld.base, color_mask, mask); 438 439 bld.base.type.floating = floating; 440 } else { 441 mask = color_mask; 442 } 443 } 444 445 /* Apply mask, if one exists */ 446 if (mask) { 447 result = lp_build_select(&bld.base, mask, result, dst); 448 } 449 450 return result; 451} 452