1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29/**
30 * @file
31 * Blend LLVM IR generation -- AoS layout.
32 *
33 * AoS blending is in general much slower than SoA, but there are some cases
34 * where it might be faster. In particular, if a pixel is rendered only once
35 * then the overhead of tiling and untiling will dominate over the speedup that
36 * SoA gives. So we might want to detect such cases and fallback to AoS in the
37 * future, but for now this function is here for historical/benchmarking
38 * purposes.
39 *
40 * Run lp_blend_test after any change to this file.
41 *
42 * @author Jose Fonseca <jfonseca@vmware.com>
43 */
44
45
46#include "pipe/p_state.h"
47#include "util/u_debug.h"
48#include "util/u_format.h"
49
50#include "gallivm/lp_bld_type.h"
51#include "gallivm/lp_bld_const.h"
52#include "gallivm/lp_bld_arit.h"
53#include "gallivm/lp_bld_logic.h"
54#include "gallivm/lp_bld_swizzle.h"
55#include "gallivm/lp_bld_bitarit.h"
56#include "gallivm/lp_bld_debug.h"
57
58#include "lp_bld_blend.h"
59
60
61/**
62 * We may the same values several times, so we keep them here to avoid
63 * recomputing them. Also reusing the values allows us to do simplifications
64 * that LLVM optimization passes wouldn't normally be able to do.
65 */
66struct lp_build_blend_aos_context
67{
68   struct lp_build_context base;
69
70   LLVMValueRef src;
71   LLVMValueRef src_alpha;
72   LLVMValueRef src1;
73   LLVMValueRef src1_alpha;
74   LLVMValueRef dst;
75   LLVMValueRef const_;
76   LLVMValueRef const_alpha;
77   boolean has_dst_alpha;
78
79   LLVMValueRef inv_src;
80   LLVMValueRef inv_src_alpha;
81   LLVMValueRef inv_dst;
82   LLVMValueRef inv_const;
83   LLVMValueRef inv_const_alpha;
84   LLVMValueRef saturate;
85
86   LLVMValueRef rgb_src_factor;
87   LLVMValueRef alpha_src_factor;
88   LLVMValueRef rgb_dst_factor;
89   LLVMValueRef alpha_dst_factor;
90};
91
92
93static LLVMValueRef
94lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
95                                 unsigned factor,
96                                 boolean alpha)
97{
98   LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src;
99   LLVMValueRef src1_alpha = bld->src1_alpha ? bld->src1_alpha : bld->src1;
100   LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_;
101
102   switch (factor) {
103   case PIPE_BLENDFACTOR_ZERO:
104      return bld->base.zero;
105   case PIPE_BLENDFACTOR_ONE:
106      return bld->base.one;
107   case PIPE_BLENDFACTOR_SRC_COLOR:
108      return bld->src;
109   case PIPE_BLENDFACTOR_SRC_ALPHA:
110      return src_alpha;
111   case PIPE_BLENDFACTOR_DST_COLOR:
112   case PIPE_BLENDFACTOR_DST_ALPHA:
113      return bld->dst;
114   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
115      if (alpha)
116         return bld->base.one;
117      else {
118         /*
119          * If there's no dst alpha the complement is zero but for unclamped
120          * float inputs (or snorm inputs) min can be non-zero (negative).
121          */
122         if (!bld->saturate) {
123            if (!bld->has_dst_alpha) {
124               bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero);
125            }
126            else if (bld->base.type.norm && bld->base.type.sign) {
127               /*
128                * The complement/min totally doesn't work, since
129                * the complement is in range [0,2] but the other
130                * min input is [-1,1]. However, we can just clamp to 0
131                * before doing the complement...
132                */
133               LLVMValueRef inv_dst;
134               inv_dst = lp_build_max(&bld->base, bld->base.zero, bld->dst);
135               inv_dst = lp_build_comp(&bld->base, inv_dst);
136               bld->saturate = lp_build_min(&bld->base, src_alpha, inv_dst);
137            } else {
138               if (!bld->inv_dst) {
139                  bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
140               }
141               bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst);
142            }
143         }
144         return bld->saturate;
145      }
146   case PIPE_BLENDFACTOR_CONST_COLOR:
147      return bld->const_;
148   case PIPE_BLENDFACTOR_CONST_ALPHA:
149      return const_alpha;
150   case PIPE_BLENDFACTOR_SRC1_COLOR:
151      return bld->src1;
152   case PIPE_BLENDFACTOR_SRC1_ALPHA:
153      return src1_alpha;
154   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
155      if (!bld->inv_src)
156         bld->inv_src = lp_build_comp(&bld->base, bld->src);
157      return bld->inv_src;
158   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
159      if (!bld->inv_src_alpha)
160         bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha);
161      return bld->inv_src_alpha;
162   case PIPE_BLENDFACTOR_INV_DST_COLOR:
163   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
164      if (!bld->inv_dst)
165         bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
166      return bld->inv_dst;
167   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
168      if (!bld->inv_const)
169         bld->inv_const = lp_build_comp(&bld->base, bld->const_);
170      return bld->inv_const;
171   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
172      if (!bld->inv_const_alpha)
173         bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha);
174      return bld->inv_const_alpha;
175   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
176      return lp_build_comp(&bld->base, bld->src1);
177   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
178      return lp_build_comp(&bld->base, src1_alpha);
179   default:
180      assert(0);
181      return bld->base.zero;
182   }
183}
184
185
186enum lp_build_blend_swizzle {
187   LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
188   LP_BUILD_BLEND_SWIZZLE_AAAA = 1
189};
190
191
192/**
193 * How should we shuffle the base factor.
194 */
195static enum lp_build_blend_swizzle
196lp_build_blend_factor_swizzle(unsigned factor)
197{
198   switch (factor) {
199   case PIPE_BLENDFACTOR_ONE:
200   case PIPE_BLENDFACTOR_ZERO:
201   case PIPE_BLENDFACTOR_SRC_COLOR:
202   case PIPE_BLENDFACTOR_DST_COLOR:
203   case PIPE_BLENDFACTOR_CONST_COLOR:
204   case PIPE_BLENDFACTOR_SRC1_COLOR:
205   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
206   case PIPE_BLENDFACTOR_INV_DST_COLOR:
207   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
208   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
209      return LP_BUILD_BLEND_SWIZZLE_RGBA;
210   case PIPE_BLENDFACTOR_SRC_ALPHA:
211   case PIPE_BLENDFACTOR_DST_ALPHA:
212   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
213   case PIPE_BLENDFACTOR_SRC1_ALPHA:
214   case PIPE_BLENDFACTOR_CONST_ALPHA:
215   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
216   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
217   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
218   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
219      return LP_BUILD_BLEND_SWIZZLE_AAAA;
220   default:
221      assert(0);
222      return LP_BUILD_BLEND_SWIZZLE_RGBA;
223   }
224}
225
226
227static LLVMValueRef
228lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
229                       LLVMValueRef rgb,
230                       LLVMValueRef alpha,
231                       enum lp_build_blend_swizzle rgb_swizzle,
232                       unsigned alpha_swizzle,
233                       unsigned num_channels)
234{
235   LLVMValueRef swizzled_rgb;
236
237   switch (rgb_swizzle) {
238   case LP_BUILD_BLEND_SWIZZLE_RGBA:
239      swizzled_rgb = rgb;
240      break;
241   case LP_BUILD_BLEND_SWIZZLE_AAAA:
242      swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels);
243      break;
244   default:
245      assert(0);
246      swizzled_rgb = bld->base.undef;
247   }
248
249   if (rgb != alpha) {
250      swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle,
251                                         alpha, swizzled_rgb,
252                                         num_channels);
253   }
254
255   return swizzled_rgb;
256}
257
258/**
259 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
260 */
261static LLVMValueRef
262lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
263                      unsigned rgb_factor,
264                      unsigned alpha_factor,
265                      unsigned alpha_swizzle,
266                      unsigned num_channels)
267{
268   LLVMValueRef rgb_factor_, alpha_factor_;
269   enum lp_build_blend_swizzle rgb_swizzle;
270
271   if (alpha_swizzle == PIPE_SWIZZLE_X && num_channels == 1) {
272      return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
273   }
274
275   rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
276
277   if (alpha_swizzle != PIPE_SWIZZLE_NONE) {
278      rgb_swizzle   = lp_build_blend_factor_swizzle(rgb_factor);
279      alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
280      return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle,
281                                    alpha_swizzle, num_channels);
282   } else {
283      return rgb_factor_;
284   }
285}
286
287
288/**
289 * Performs blending of src and dst pixels
290 *
291 * @param blend         the blend state of the shader variant
292 * @param cbuf_format   format of the colour buffer
293 * @param type          data type of the pixel vector
294 * @param rt            render target index
295 * @param src           blend src
296 * @param src_alpha     blend src alpha (if not included in src)
297 * @param src1          second blend src (for dual source blend)
298 * @param src1_alpha    second blend src alpha (if not included in src1)
299 * @param dst           blend dst
300 * @param mask          optional mask to apply to the blending result
301 * @param const_        const blend color
302 * @param const_alpha   const blend color alpha (if not included in const_)
303 * @param swizzle       swizzle values for RGBA
304 *
305 * @return the result of blending src and dst
306 */
307LLVMValueRef
308lp_build_blend_aos(struct gallivm_state *gallivm,
309                   const struct pipe_blend_state *blend,
310                   enum pipe_format cbuf_format,
311                   struct lp_type type,
312                   unsigned rt,
313                   LLVMValueRef src,
314                   LLVMValueRef src_alpha,
315                   LLVMValueRef src1,
316                   LLVMValueRef src1_alpha,
317                   LLVMValueRef dst,
318                   LLVMValueRef mask,
319                   LLVMValueRef const_,
320                   LLVMValueRef const_alpha,
321                   const unsigned char swizzle[4],
322                   int nr_channels)
323{
324   const struct pipe_rt_blend_state * state = &blend->rt[rt];
325   const struct util_format_description * desc;
326   struct lp_build_blend_aos_context bld;
327   LLVMValueRef src_factor, dst_factor;
328   LLVMValueRef result;
329   unsigned alpha_swizzle = PIPE_SWIZZLE_NONE;
330   unsigned i;
331
332   desc = util_format_description(cbuf_format);
333
334   /* Setup build context */
335   memset(&bld, 0, sizeof bld);
336   lp_build_context_init(&bld.base, gallivm, type);
337   bld.src = src;
338   bld.src1 = src1;
339   bld.dst = dst;
340   bld.const_ = const_;
341   bld.src_alpha = src_alpha;
342   bld.src1_alpha = src1_alpha;
343   bld.const_alpha = const_alpha;
344   bld.has_dst_alpha = FALSE;
345
346   /* Find the alpha channel if not provided separately */
347   if (!src_alpha) {
348      for (i = 0; i < 4; ++i) {
349         if (swizzle[i] == 3) {
350            alpha_swizzle = i;
351         }
352      }
353      /*
354       * Note that we may get src_alpha included from source (and 4 channels)
355       * even if the destination doesn't have an alpha channel (for rgbx
356       * formats). Generally this shouldn't make much of a difference (we're
357       * relying on blend factors being sanitized already if there's no
358       * dst alpha).
359       */
360      bld.has_dst_alpha = desc->swizzle[3] <= PIPE_SWIZZLE_W;
361   }
362
363   if (blend->logicop_enable) {
364      if (!type.floating) {
365         result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst);
366      }
367      else {
368         result = src;
369      }
370   } else if (!state->blend_enable) {
371      result = src;
372   } else {
373      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor &&
374                                state->alpha_src_factor == state->alpha_dst_factor) ||
375                               nr_channels == 1;
376      boolean alpha_only = nr_channels == 1 && alpha_swizzle == PIPE_SWIZZLE_X;
377
378      src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
379                                         state->alpha_src_factor,
380                                         alpha_swizzle,
381                                         nr_channels);
382
383      dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor,
384                                         state->alpha_dst_factor,
385                                         alpha_swizzle,
386                                         nr_channels);
387
388      result = lp_build_blend(&bld.base,
389                              state->rgb_func,
390                              alpha_only ? state->alpha_src_factor : state->rgb_src_factor,
391                              alpha_only ? state->alpha_dst_factor : state->rgb_dst_factor,
392                              src,
393                              dst,
394                              src_factor,
395                              dst_factor,
396                              rgb_alpha_same,
397                              false);
398
399      if (state->rgb_func != state->alpha_func && nr_channels > 1 &&
400          alpha_swizzle != PIPE_SWIZZLE_NONE) {
401         LLVMValueRef alpha;
402
403         alpha = lp_build_blend(&bld.base,
404                                state->alpha_func,
405                                state->alpha_src_factor,
406                                state->alpha_dst_factor,
407                                src,
408                                dst,
409                                src_factor,
410                                dst_factor,
411                                rgb_alpha_same,
412                                false);
413
414         result = lp_build_blend_swizzle(&bld,
415                                         result,
416                                         alpha,
417                                         LP_BUILD_BLEND_SWIZZLE_RGBA,
418                                         alpha_swizzle,
419                                         nr_channels);
420      }
421   }
422
423   /* Check if color mask is necessary */
424   if (!util_format_colormask_full(desc, state->colormask)) {
425      LLVMValueRef color_mask;
426
427      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type,
428                                                    state->colormask, nr_channels, swizzle);
429      lp_build_name(color_mask, "color_mask");
430
431      /* Combine with input mask if necessary */
432      if (mask) {
433         /* We can be blending floating values but masks are always integer... */
434         unsigned floating = bld.base.type.floating;
435         bld.base.type.floating = 0;
436
437         mask = lp_build_and(&bld.base, color_mask, mask);
438
439         bld.base.type.floating = floating;
440      } else {
441         mask = color_mask;
442      }
443   }
444
445   /* Apply mask, if one exists */
446   if (mask) {
447      result = lp_build_select(&bld.base, mask, result, dst);
448   }
449
450   return result;
451}
452