1/**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * quad blending
30 * \author Brian Paul
31 */
32
33#include "pipe/p_defines.h"
34#include "util/u_math.h"
35#include "util/u_memory.h"
36#include "util/format/u_format.h"
37#include "util/u_dual_blend.h"
38#include "sp_context.h"
39#include "sp_state.h"
40#include "sp_quad.h"
41#include "sp_tile_cache.h"
42#include "sp_quad_pipe.h"
43
44
45enum format
46{
47   RGBA,
48   RGB,
49   LUMINANCE,
50   LUMINANCE_ALPHA,
51   INTENSITY
52};
53
54
55/** Subclass of quad_stage */
56struct blend_quad_stage
57{
58   struct quad_stage base;
59   boolean clamp[PIPE_MAX_COLOR_BUFS];  /**< clamp colors to [0,1]? */
60   enum format base_format[PIPE_MAX_COLOR_BUFS];
61   enum util_format_type format_type[PIPE_MAX_COLOR_BUFS];
62};
63
64
65/** cast wrapper */
66static inline struct blend_quad_stage *
67blend_quad_stage(struct quad_stage *stage)
68{
69   return (struct blend_quad_stage *) stage;
70}
71
72
73#define VEC4_COPY(DST, SRC) \
74do { \
75    DST[0] = SRC[0]; \
76    DST[1] = SRC[1]; \
77    DST[2] = SRC[2]; \
78    DST[3] = SRC[3]; \
79} while(0)
80
81#define VEC4_SCALAR(DST, SRC) \
82do { \
83    DST[0] = SRC; \
84    DST[1] = SRC; \
85    DST[2] = SRC; \
86    DST[3] = SRC; \
87} while(0)
88
89#define VEC4_ADD(R, A, B) \
90do { \
91   R[0] = A[0] + B[0]; \
92   R[1] = A[1] + B[1]; \
93   R[2] = A[2] + B[2]; \
94   R[3] = A[3] + B[3]; \
95} while (0)
96
97#define VEC4_SUB(R, A, B) \
98do { \
99   R[0] = A[0] - B[0]; \
100   R[1] = A[1] - B[1]; \
101   R[2] = A[2] - B[2]; \
102   R[3] = A[3] - B[3]; \
103} while (0)
104
105/** Add and limit result to ceiling of 1.0 */
106#define VEC4_ADD_SAT(R, A, B) \
107do { \
108   R[0] = A[0] + B[0];  if (R[0] > 1.0f) R[0] = 1.0f; \
109   R[1] = A[1] + B[1];  if (R[1] > 1.0f) R[1] = 1.0f; \
110   R[2] = A[2] + B[2];  if (R[2] > 1.0f) R[2] = 1.0f; \
111   R[3] = A[3] + B[3];  if (R[3] > 1.0f) R[3] = 1.0f; \
112} while (0)
113
114/** Subtract and limit result to floor of 0.0 */
115#define VEC4_SUB_SAT(R, A, B) \
116do { \
117   R[0] = A[0] - B[0];  if (R[0] < 0.0f) R[0] = 0.0f; \
118   R[1] = A[1] - B[1];  if (R[1] < 0.0f) R[1] = 0.0f; \
119   R[2] = A[2] - B[2];  if (R[2] < 0.0f) R[2] = 0.0f; \
120   R[3] = A[3] - B[3];  if (R[3] < 0.0f) R[3] = 0.0f; \
121} while (0)
122
123#define VEC4_MUL(R, A, B) \
124do { \
125   R[0] = A[0] * B[0]; \
126   R[1] = A[1] * B[1]; \
127   R[2] = A[2] * B[2]; \
128   R[3] = A[3] * B[3]; \
129} while (0)
130
131#define VEC4_MIN(R, A, B) \
132do { \
133   R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
134   R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
135   R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
136   R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
137} while (0)
138
139#define VEC4_MAX(R, A, B) \
140do { \
141   R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
142   R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
143   R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
144   R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
145} while (0)
146
147
148
149static void
150logicop_quad(struct quad_stage *qs,
151             float (*quadColor)[4],
152             float (*dest)[4])
153{
154   struct softpipe_context *softpipe = qs->softpipe;
155   ubyte src[4][4], dst[4][4], res[4][4];
156   uint *src4 = (uint *) src;
157   uint *dst4 = (uint *) dst;
158   uint *res4 = (uint *) res;
159   uint j;
160
161
162   /* convert to ubyte */
163   for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
164      dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
165      dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
166      dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
167      dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
168
169      src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
170      src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
171      src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
172      src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
173
174      res[j][0] = 0;
175   }
176
177   switch (softpipe->blend->logicop_func) {
178   case PIPE_LOGICOP_CLEAR:
179      for (j = 0; j < 4; j++)
180         res4[j] = 0;
181      break;
182   case PIPE_LOGICOP_NOR:
183      for (j = 0; j < 4; j++)
184         res4[j] = ~(src4[j] | dst4[j]);
185      break;
186   case PIPE_LOGICOP_AND_INVERTED:
187      for (j = 0; j < 4; j++)
188         res4[j] = ~src4[j] & dst4[j];
189      break;
190   case PIPE_LOGICOP_COPY_INVERTED:
191      for (j = 0; j < 4; j++)
192         res4[j] = ~src4[j];
193      break;
194   case PIPE_LOGICOP_AND_REVERSE:
195      for (j = 0; j < 4; j++)
196         res4[j] = src4[j] & ~dst4[j];
197      break;
198   case PIPE_LOGICOP_INVERT:
199      for (j = 0; j < 4; j++)
200         res4[j] = ~dst4[j];
201      break;
202   case PIPE_LOGICOP_XOR:
203      for (j = 0; j < 4; j++)
204         res4[j] = dst4[j] ^ src4[j];
205      break;
206   case PIPE_LOGICOP_NAND:
207      for (j = 0; j < 4; j++)
208         res4[j] = ~(src4[j] & dst4[j]);
209      break;
210   case PIPE_LOGICOP_AND:
211      for (j = 0; j < 4; j++)
212         res4[j] = src4[j] & dst4[j];
213      break;
214   case PIPE_LOGICOP_EQUIV:
215      for (j = 0; j < 4; j++)
216         res4[j] = ~(src4[j] ^ dst4[j]);
217      break;
218   case PIPE_LOGICOP_NOOP:
219      for (j = 0; j < 4; j++)
220         res4[j] = dst4[j];
221      break;
222   case PIPE_LOGICOP_OR_INVERTED:
223      for (j = 0; j < 4; j++)
224         res4[j] = ~src4[j] | dst4[j];
225      break;
226   case PIPE_LOGICOP_COPY:
227      for (j = 0; j < 4; j++)
228         res4[j] = src4[j];
229      break;
230   case PIPE_LOGICOP_OR_REVERSE:
231      for (j = 0; j < 4; j++)
232         res4[j] = src4[j] | ~dst4[j];
233      break;
234   case PIPE_LOGICOP_OR:
235      for (j = 0; j < 4; j++)
236         res4[j] = src4[j] | dst4[j];
237      break;
238   case PIPE_LOGICOP_SET:
239      for (j = 0; j < 4; j++)
240         res4[j] = ~0;
241      break;
242   default:
243      assert(0 && "invalid logicop mode");
244   }
245
246   for (j = 0; j < 4; j++) {
247      quadColor[j][0] = ubyte_to_float(res[j][0]);
248      quadColor[j][1] = ubyte_to_float(res[j][1]);
249      quadColor[j][2] = ubyte_to_float(res[j][2]);
250      quadColor[j][3] = ubyte_to_float(res[j][3]);
251   }
252}
253
254
255
256/**
257 * Do blending for a 2x2 quad for one color buffer.
258 * \param quadColor  the incoming quad colors
259 * \param dest  the destination/framebuffer quad colors
260 * \param const_blend_color  the constant blend color
261 * \param blend_index  which set of blending terms to use
262 */
263static void
264blend_quad(struct quad_stage *qs,
265           float (*quadColor)[4],
266           float (*quadColor2)[4],
267           float (*dest)[4],
268           const float const_blend_color[4],
269           unsigned blend_index)
270{
271   static const float zero[4] = { 0, 0, 0, 0 };
272   static const float one[4] = { 1, 1, 1, 1 };
273   struct softpipe_context *softpipe = qs->softpipe;
274   float source[4][TGSI_QUAD_SIZE] = { { 0 } };
275   float blend_dest[4][TGSI_QUAD_SIZE];
276
277   /*
278    * Compute src/first term RGB
279    */
280   switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
281   case PIPE_BLENDFACTOR_ONE:
282      VEC4_COPY(source[0], quadColor[0]); /* R */
283      VEC4_COPY(source[1], quadColor[1]); /* G */
284      VEC4_COPY(source[2], quadColor[2]); /* B */
285      break;
286   case PIPE_BLENDFACTOR_SRC_COLOR:
287      VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
288      VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
289      VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
290      break;
291   case PIPE_BLENDFACTOR_SRC_ALPHA:
292      {
293         const float *alpha = quadColor[3];
294         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
295         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
296         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
297      }
298      break;
299   case PIPE_BLENDFACTOR_DST_COLOR:
300      VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
301      VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
302      VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
303      break;
304   case PIPE_BLENDFACTOR_DST_ALPHA:
305      {
306         const float *alpha = dest[3];
307         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
308         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
309         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
310      }
311      break;
312   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
313      {
314         const float *alpha = quadColor[3];
315         float diff[4], temp[4];
316         VEC4_SUB(diff, one, dest[3]);
317         VEC4_MIN(temp, alpha, diff);
318         VEC4_MUL(source[0], quadColor[0], temp); /* R */
319         VEC4_MUL(source[1], quadColor[1], temp); /* G */
320         VEC4_MUL(source[2], quadColor[2], temp); /* B */
321      }
322      break;
323   case PIPE_BLENDFACTOR_CONST_COLOR:
324      {
325         float comp[4];
326         VEC4_SCALAR(comp, const_blend_color[0]); /* R */
327         VEC4_MUL(source[0], quadColor[0], comp); /* R */
328         VEC4_SCALAR(comp, const_blend_color[1]); /* G */
329         VEC4_MUL(source[1], quadColor[1], comp); /* G */
330         VEC4_SCALAR(comp, const_blend_color[2]); /* B */
331         VEC4_MUL(source[2], quadColor[2], comp); /* B */
332      }
333      break;
334   case PIPE_BLENDFACTOR_CONST_ALPHA:
335      {
336         float alpha[4];
337         VEC4_SCALAR(alpha, const_blend_color[3]);
338         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
339         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
340         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
341      }
342      break;
343   case PIPE_BLENDFACTOR_SRC1_COLOR:
344      VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */
345      VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */
346      VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */
347      break;
348   case PIPE_BLENDFACTOR_SRC1_ALPHA:
349      {
350         const float *alpha = quadColor2[3];
351         VEC4_MUL(source[0], quadColor[0], alpha); /* R */
352         VEC4_MUL(source[1], quadColor[1], alpha); /* G */
353         VEC4_MUL(source[2], quadColor[2], alpha); /* B */
354      }
355      break;
356   case PIPE_BLENDFACTOR_ZERO:
357      VEC4_COPY(source[0], zero); /* R */
358      VEC4_COPY(source[1], zero); /* G */
359      VEC4_COPY(source[2], zero); /* B */
360      break;
361   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
362      {
363         float inv_comp[4];
364         VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
365         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
366         VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
367         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
368         VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
369         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
370      }
371      break;
372   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
373      {
374         float inv_alpha[4];
375         VEC4_SUB(inv_alpha, one, quadColor[3]);
376         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
377         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
378         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
379      }
380      break;
381   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
382      {
383         float inv_alpha[4];
384         VEC4_SUB(inv_alpha, one, dest[3]);
385         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
386         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
387         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
388      }
389      break;
390   case PIPE_BLENDFACTOR_INV_DST_COLOR:
391      {
392         float inv_comp[4];
393         VEC4_SUB(inv_comp, one, dest[0]); /* R */
394         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
395         VEC4_SUB(inv_comp, one, dest[1]); /* G */
396         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
397         VEC4_SUB(inv_comp, one, dest[2]); /* B */
398         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
399      }
400      break;
401   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
402      {
403         float inv_comp[4];
404         /* R */
405         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
406         VEC4_MUL(source[0], quadColor[0], inv_comp);
407         /* G */
408         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
409         VEC4_MUL(source[1], quadColor[1], inv_comp);
410         /* B */
411         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
412         VEC4_MUL(source[2], quadColor[2], inv_comp);
413      }
414      break;
415   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
416      {
417         float inv_alpha[4];
418         VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
419         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
420         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
421         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
422      }
423      break;
424   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
425      {
426         float inv_comp[4];
427         VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
428         VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
429         VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
430         VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
431         VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
432         VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
433      }
434      break;
435   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
436      {
437         float inv_alpha[4];
438         VEC4_SUB(inv_alpha, one, quadColor2[3]);
439         VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
440         VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
441         VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
442      }
443      break;
444   default:
445      assert(0 && "invalid rgb src factor");
446   }
447
448   /*
449    * Compute src/first term A
450    */
451   switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
452   case PIPE_BLENDFACTOR_ONE:
453      VEC4_COPY(source[3], quadColor[3]); /* A */
454      break;
455   case PIPE_BLENDFACTOR_SRC_COLOR:
456      FALLTHROUGH;
457   case PIPE_BLENDFACTOR_SRC_ALPHA:
458      {
459         const float *alpha = quadColor[3];
460         VEC4_MUL(source[3], quadColor[3], alpha); /* A */
461      }
462      break;
463   case PIPE_BLENDFACTOR_DST_COLOR:
464      FALLTHROUGH;
465   case PIPE_BLENDFACTOR_DST_ALPHA:
466      VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
467      break;
468   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
469      /* multiply alpha by 1.0 */
470      VEC4_COPY(source[3], quadColor[3]); /* A */
471      break;
472   case PIPE_BLENDFACTOR_CONST_COLOR:
473      FALLTHROUGH;
474   case PIPE_BLENDFACTOR_CONST_ALPHA:
475      {
476         float comp[4];
477         VEC4_SCALAR(comp, const_blend_color[3]); /* A */
478         VEC4_MUL(source[3], quadColor[3], comp); /* A */
479      }
480      break;
481   case PIPE_BLENDFACTOR_ZERO:
482      VEC4_COPY(source[3], zero); /* A */
483      break;
484   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
485      FALLTHROUGH;
486   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
487      {
488         float inv_alpha[4];
489         VEC4_SUB(inv_alpha, one, quadColor[3]);
490         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
491      }
492      break;
493   case PIPE_BLENDFACTOR_INV_DST_COLOR:
494      FALLTHROUGH;
495   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
496      {
497         float inv_alpha[4];
498         VEC4_SUB(inv_alpha, one, dest[3]);
499         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
500      }
501      break;
502   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
503      FALLTHROUGH;
504   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
505      {
506         float inv_comp[4];
507         /* A */
508         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
509         VEC4_MUL(source[3], quadColor[3], inv_comp);
510      }
511      break;
512   case PIPE_BLENDFACTOR_SRC1_COLOR:
513      FALLTHROUGH;
514   case PIPE_BLENDFACTOR_SRC1_ALPHA:
515      {
516         const float *alpha = quadColor2[3];
517         VEC4_MUL(source[3], quadColor[3], alpha); /* A */
518      }
519      break;
520   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
521      FALLTHROUGH;
522   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
523      {
524         float inv_alpha[4];
525         VEC4_SUB(inv_alpha, one, quadColor2[3]);
526         VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
527      }
528      break;
529   default:
530      assert(0 && "invalid alpha src factor");
531   }
532
533   /* Save the original dest for use in masking */
534   VEC4_COPY(blend_dest[0], dest[0]);
535   VEC4_COPY(blend_dest[1], dest[1]);
536   VEC4_COPY(blend_dest[2], dest[2]);
537   VEC4_COPY(blend_dest[3], dest[3]);
538
539
540   /*
541    * Compute blend_dest/second term RGB
542    */
543   switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
544   case PIPE_BLENDFACTOR_ONE:
545      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
546      break;
547   case PIPE_BLENDFACTOR_SRC_COLOR:
548      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
549      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
550      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
551      break;
552   case PIPE_BLENDFACTOR_SRC_ALPHA:
553      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
554      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
555      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
556      break;
557   case PIPE_BLENDFACTOR_DST_ALPHA:
558      VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
559      VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
560      VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
561      break;
562   case PIPE_BLENDFACTOR_DST_COLOR:
563      VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
564      VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
565      VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
566      break;
567   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
568      {
569         const float *alpha = quadColor[3];
570         float diff[4], temp[4];
571         VEC4_SUB(diff, one, blend_dest[3]);
572         VEC4_MIN(temp, alpha, diff);
573         VEC4_MUL(blend_dest[0], blend_dest[0], temp); /* R */
574         VEC4_MUL(blend_dest[1], blend_dest[1], temp); /* G */
575         VEC4_MUL(blend_dest[2], blend_dest[2], temp); /* B */
576      }
577      break;
578   case PIPE_BLENDFACTOR_CONST_COLOR:
579      {
580         float comp[4];
581         VEC4_SCALAR(comp, const_blend_color[0]); /* R */
582         VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
583         VEC4_SCALAR(comp, const_blend_color[1]); /* G */
584         VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
585         VEC4_SCALAR(comp, const_blend_color[2]); /* B */
586         VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
587      }
588      break;
589   case PIPE_BLENDFACTOR_CONST_ALPHA:
590      {
591         float comp[4];
592         VEC4_SCALAR(comp, const_blend_color[3]); /* A */
593         VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
594         VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
595         VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
596      }
597      break;
598   case PIPE_BLENDFACTOR_ZERO:
599      VEC4_COPY(blend_dest[0], zero); /* R */
600      VEC4_COPY(blend_dest[1], zero); /* G */
601      VEC4_COPY(blend_dest[2], zero); /* B */
602      break;
603   case PIPE_BLENDFACTOR_SRC1_COLOR:
604      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */
605      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */
606      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */
607      break;
608   case PIPE_BLENDFACTOR_SRC1_ALPHA:
609      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */
610      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */
611      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */
612      break;
613   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
614      {
615         float inv_comp[4];
616         VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
617         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
618         VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
619         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
620         VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
621         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
622      }
623      break;
624   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
625      {
626         float one_minus_alpha[TGSI_QUAD_SIZE];
627         VEC4_SUB(one_minus_alpha, one, quadColor[3]);
628         VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
629         VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
630         VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
631      }
632      break;
633   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
634      {
635         float inv_comp[4];
636         VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
637         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
638         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
639         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
640      }
641      break;
642   case PIPE_BLENDFACTOR_INV_DST_COLOR:
643      {
644         float inv_comp[4];
645         VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
646         VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
647         VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
648         VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
649         VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
650         VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
651      }
652      break;
653   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
654      {
655         float inv_comp[4];
656         /* R */
657         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
658         VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
659         /* G */
660         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
661         VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
662         /* B */
663         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
664         VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
665      }
666      break;
667   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
668      {
669         float inv_comp[4];
670         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
671         VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
672         VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
673         VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
674      }
675      break;
676   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
677      {
678         float inv_comp[4];
679         VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
680         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
681         VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
682         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
683         VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
684         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
685      }
686      break;
687   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
688      {
689         float one_minus_alpha[TGSI_QUAD_SIZE];
690         VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
691         VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
692         VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
693         VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
694      }
695      break;
696   default:
697      assert(0 && "invalid rgb dst factor");
698   }
699
700   /*
701    * Compute blend_dest/second term A
702    */
703   switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
704   case PIPE_BLENDFACTOR_ONE:
705      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
706      break;
707   case PIPE_BLENDFACTOR_SRC_COLOR:
708      FALLTHROUGH;
709   case PIPE_BLENDFACTOR_SRC_ALPHA:
710      VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
711      break;
712   case PIPE_BLENDFACTOR_DST_COLOR:
713      FALLTHROUGH;
714   case PIPE_BLENDFACTOR_DST_ALPHA:
715      VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
716      break;
717   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
718      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
719      break;
720   case PIPE_BLENDFACTOR_CONST_COLOR:
721      FALLTHROUGH;
722   case PIPE_BLENDFACTOR_CONST_ALPHA:
723      {
724         float comp[4];
725         VEC4_SCALAR(comp, const_blend_color[3]); /* A */
726         VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
727      }
728      break;
729   case PIPE_BLENDFACTOR_ZERO:
730      VEC4_COPY(blend_dest[3], zero); /* A */
731      break;
732   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
733      FALLTHROUGH;
734   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
735      {
736         float one_minus_alpha[TGSI_QUAD_SIZE];
737         VEC4_SUB(one_minus_alpha, one, quadColor[3]);
738         VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
739      }
740      break;
741   case PIPE_BLENDFACTOR_INV_DST_COLOR:
742      FALLTHROUGH;
743   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
744      {
745         float inv_comp[4];
746         VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
747         VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
748      }
749      break;
750   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
751      FALLTHROUGH;
752   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
753      {
754         float inv_comp[4];
755         VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
756         VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
757      }
758      break;
759   case PIPE_BLENDFACTOR_SRC1_COLOR:
760      FALLTHROUGH;
761   case PIPE_BLENDFACTOR_SRC1_ALPHA:
762      VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */
763      break;
764   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
765      FALLTHROUGH;
766   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
767      {
768         float one_minus_alpha[TGSI_QUAD_SIZE];
769         VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
770         VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
771      }
772      break;
773   default:
774      assert(0 && "invalid alpha dst factor");
775   }
776
777   /*
778    * Combine RGB terms
779    */
780   switch (softpipe->blend->rt[blend_index].rgb_func) {
781   case PIPE_BLEND_ADD:
782      VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */
783      VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */
784      VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */
785      break;
786   case PIPE_BLEND_SUBTRACT:
787      VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */
788      VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */
789      VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */
790      break;
791   case PIPE_BLEND_REVERSE_SUBTRACT:
792      VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */
793      VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */
794      VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */
795      break;
796   case PIPE_BLEND_MIN:
797      VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
798      VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
799      VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
800      break;
801   case PIPE_BLEND_MAX:
802      VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
803      VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
804      VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
805      break;
806   default:
807      assert(0 && "invalid rgb blend func");
808   }
809
810   /*
811    * Combine A terms
812    */
813   switch (softpipe->blend->rt[blend_index].alpha_func) {
814   case PIPE_BLEND_ADD:
815      VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */
816      break;
817   case PIPE_BLEND_SUBTRACT:
818      VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */
819      break;
820   case PIPE_BLEND_REVERSE_SUBTRACT:
821      VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */
822      break;
823   case PIPE_BLEND_MIN:
824      VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
825      break;
826   case PIPE_BLEND_MAX:
827      VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
828      break;
829   default:
830      assert(0 && "invalid alpha blend func");
831   }
832}
833
834static void
835colormask_quad(unsigned colormask,
836               float (*quadColor)[4],
837               float (*dest)[4])
838{
839   /* R */
840   if (!(colormask & PIPE_MASK_R))
841      COPY_4V(quadColor[0], dest[0]);
842
843   /* G */
844   if (!(colormask & PIPE_MASK_G))
845      COPY_4V(quadColor[1], dest[1]);
846
847   /* B */
848   if (!(colormask & PIPE_MASK_B))
849      COPY_4V(quadColor[2], dest[2]);
850
851   /* A */
852   if (!(colormask & PIPE_MASK_A))
853      COPY_4V(quadColor[3], dest[3]);
854}
855
856
857/**
858 * Clamp all colors in a quad to [0, 1]
859 */
860static void
861clamp_colors(float (*quadColor)[4])
862{
863   unsigned i, j;
864
865   for (i = 0; i < 4; i++) {
866      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
867         quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
868      }
869   }
870}
871
872
873/**
874 * If we're drawing to a luminance, luminance/alpha or intensity surface
875 * we have to adjust (rebase) the fragment/quad colors before writing them
876 * to the tile cache.  The tile cache always stores RGBA colors but if
877 * we're caching a L/A surface (for example) we need to be sure that R=G=B
878 * so that subsequent reads from the surface cache appear to return L/A
879 * values.
880 * The piglit fbo-blending-formats test will exercise this.
881 */
882static void
883rebase_colors(enum format base_format, float (*quadColor)[4])
884{
885   unsigned i;
886
887   switch (base_format) {
888   case RGB:
889      for (i = 0; i < 4; i++) {
890         /* A = 1 */
891         quadColor[3][i] = 1.0F;
892      }
893      break;
894   case LUMINANCE:
895      for (i = 0; i < 4; i++) {
896         /* B = G = R */
897         quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
898         /* A = 1 */
899         quadColor[3][i] = 1.0F;
900      }
901      break;
902   case LUMINANCE_ALPHA:
903      for (i = 0; i < 4; i++) {
904         /* B = G = R */
905         quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
906      }
907      break;
908   case INTENSITY:
909      for (i = 0; i < 4; i++) {
910         /* A = B = G = R */
911         quadColor[3][i] = quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
912      }
913      break;
914   default:
915      ; /* nothing */
916   }
917}
918
919static void
920blend_fallback(struct quad_stage *qs,
921               struct quad_header *quads[],
922               unsigned nr)
923{
924   const struct blend_quad_stage *bqs = blend_quad_stage(qs);
925   struct softpipe_context *softpipe = qs->softpipe;
926   const struct pipe_blend_state *blend = softpipe->blend;
927   unsigned cbuf;
928   boolean write_all =
929      softpipe->fs_variant->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
930
931   for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
932      if (softpipe->framebuffer.cbufs[cbuf]) {
933         /* which blend/mask state index to use: */
934         const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
935         float dest[4][TGSI_QUAD_SIZE];
936         struct softpipe_cached_tile *tile
937            = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
938                                 quads[0]->input.x0,
939                                 quads[0]->input.y0, quads[0]->input.layer);
940         const boolean clamp = bqs->clamp[cbuf];
941         const float *blend_color;
942         const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf);
943         uint q, i, j;
944
945         if (clamp)
946            blend_color = softpipe->blend_color_clamped.color;
947         else
948            blend_color = softpipe->blend_color.color;
949
950         for (q = 0; q < nr; q++) {
951            struct quad_header *quad = quads[q];
952            float (*quadColor)[4];
953            float (*quadColor2)[4] = NULL;
954            float temp_quad_color[TGSI_QUAD_SIZE][4];
955            const int itx = (quad->input.x0 & (TILE_SIZE-1));
956            const int ity = (quad->input.y0 & (TILE_SIZE-1));
957
958            if (write_all) {
959               for (j = 0; j < TGSI_QUAD_SIZE; j++) {
960                  for (i = 0; i < 4; i++) {
961                     temp_quad_color[i][j] = quad->output.color[0][i][j];
962                  }
963               }
964               quadColor = temp_quad_color;
965            } else {
966               quadColor = quad->output.color[cbuf];
967               if (dual_source_blend)
968                  quadColor2 = quad->output.color[cbuf + 1];
969            }
970
971            /* If fixed-point dest color buffer, need to clamp the incoming
972             * fragment colors now.
973             */
974            if (clamp || softpipe->rasterizer->clamp_fragment_color) {
975               clamp_colors(quadColor);
976            }
977
978            /* get/swizzle dest colors
979             */
980            for (j = 0; j < TGSI_QUAD_SIZE; j++) {
981               int x = itx + (j & 1);
982               int y = ity + (j >> 1);
983               for (i = 0; i < 4; i++) {
984                  dest[i][j] = tile->data.color[y][x][i];
985               }
986            }
987
988
989            if (blend->logicop_enable) {
990               if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) {
991                  logicop_quad( qs, quadColor, dest );
992               }
993            }
994            else if (blend->rt[blend_buf].blend_enable) {
995               blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf);
996
997               /* If fixed-point dest color buffer, need to clamp the outgoing
998                * fragment colors now.
999                */
1000               if (clamp) {
1001                  clamp_colors(quadColor);
1002               }
1003            }
1004
1005            rebase_colors(bqs->base_format[cbuf], quadColor);
1006
1007            if (blend->rt[blend_buf].colormask != 0xf)
1008               colormask_quad( blend->rt[blend_buf].colormask, quadColor, dest);
1009
1010            /* Output color values
1011             */
1012            for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1013               if (quad->inout.mask & (1 << j)) {
1014                  int x = itx + (j & 1);
1015                  int y = ity + (j >> 1);
1016                  for (i = 0; i < 4; i++) { /* loop over color chans */
1017                     tile->data.color[y][x][i] = quadColor[i][j];
1018                  }
1019               }
1020            }
1021         }
1022      }
1023   }
1024}
1025
1026
1027static void
1028blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
1029                                         struct quad_header *quads[],
1030                                         unsigned nr)
1031{
1032   const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1033   static const float one[4] = { 1, 1, 1, 1 };
1034   float one_minus_alpha[TGSI_QUAD_SIZE];
1035   float dest[4][TGSI_QUAD_SIZE];
1036   float source[4][TGSI_QUAD_SIZE];
1037   uint i, j, q;
1038
1039   struct softpipe_cached_tile *tile
1040      = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1041                           quads[0]->input.x0,
1042                           quads[0]->input.y0, quads[0]->input.layer);
1043
1044   for (q = 0; q < nr; q++) {
1045      struct quad_header *quad = quads[q];
1046      float (*quadColor)[4] = quad->output.color[0];
1047      const float *alpha = quadColor[3];
1048      const int itx = (quad->input.x0 & (TILE_SIZE-1));
1049      const int ity = (quad->input.y0 & (TILE_SIZE-1));
1050
1051      /* get/swizzle dest colors */
1052      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1053         int x = itx + (j & 1);
1054         int y = ity + (j >> 1);
1055         for (i = 0; i < 4; i++) {
1056            dest[i][j] = tile->data.color[y][x][i];
1057         }
1058      }
1059
1060      /* If fixed-point dest color buffer, need to clamp the incoming
1061       * fragment colors now.
1062       */
1063      if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1064         clamp_colors(quadColor);
1065      }
1066
1067      VEC4_MUL(source[0], quadColor[0], alpha); /* R */
1068      VEC4_MUL(source[1], quadColor[1], alpha); /* G */
1069      VEC4_MUL(source[2], quadColor[2], alpha); /* B */
1070      VEC4_MUL(source[3], quadColor[3], alpha); /* A */
1071
1072      VEC4_SUB(one_minus_alpha, one, alpha);
1073      VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
1074      VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
1075      VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
1076      VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
1077
1078      VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
1079      VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
1080      VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
1081      VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
1082
1083      /* If fixed-point dest color buffer, need to clamp the outgoing
1084       * fragment colors now.
1085       */
1086      if (bqs->clamp[0]) {
1087         clamp_colors(quadColor);
1088      }
1089
1090      rebase_colors(bqs->base_format[0], quadColor);
1091
1092      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1093         if (quad->inout.mask & (1 << j)) {
1094            int x = itx + (j & 1);
1095            int y = ity + (j >> 1);
1096            for (i = 0; i < 4; i++) { /* loop over color chans */
1097               tile->data.color[y][x][i] = quadColor[i][j];
1098            }
1099         }
1100      }
1101   }
1102}
1103
1104static void
1105blend_single_add_one_one(struct quad_stage *qs,
1106                         struct quad_header *quads[],
1107                         unsigned nr)
1108{
1109   const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1110   float dest[4][TGSI_QUAD_SIZE];
1111   uint i, j, q;
1112
1113   struct softpipe_cached_tile *tile
1114      = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1115                           quads[0]->input.x0,
1116                           quads[0]->input.y0, quads[0]->input.layer);
1117
1118   for (q = 0; q < nr; q++) {
1119      struct quad_header *quad = quads[q];
1120      float (*quadColor)[4] = quad->output.color[0];
1121      const int itx = (quad->input.x0 & (TILE_SIZE-1));
1122      const int ity = (quad->input.y0 & (TILE_SIZE-1));
1123
1124      /* get/swizzle dest colors */
1125      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1126         int x = itx + (j & 1);
1127         int y = ity + (j >> 1);
1128         for (i = 0; i < 4; i++) {
1129            dest[i][j] = tile->data.color[y][x][i];
1130         }
1131      }
1132
1133      /* If fixed-point dest color buffer, need to clamp the incoming
1134       * fragment colors now.
1135       */
1136      if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
1137         clamp_colors(quadColor);
1138      }
1139
1140      VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */
1141      VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */
1142      VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */
1143      VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */
1144
1145      /* If fixed-point dest color buffer, need to clamp the outgoing
1146       * fragment colors now.
1147       */
1148      if (bqs->clamp[0]) {
1149         clamp_colors(quadColor);
1150      }
1151
1152      rebase_colors(bqs->base_format[0], quadColor);
1153
1154      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1155         if (quad->inout.mask & (1 << j)) {
1156            int x = itx + (j & 1);
1157            int y = ity + (j >> 1);
1158            for (i = 0; i < 4; i++) { /* loop over color chans */
1159               tile->data.color[y][x][i] = quadColor[i][j];
1160            }
1161         }
1162      }
1163   }
1164}
1165
1166
1167/**
1168 * Just copy the quad color to the framebuffer tile (respecting the writemask),
1169 * for one color buffer.
1170 * Clamping will be done, if needed (depending on the color buffer's
1171 * datatype) when we write/pack the colors later.
1172 */
1173static void
1174single_output_color(struct quad_stage *qs,
1175                    struct quad_header *quads[],
1176                    unsigned nr)
1177{
1178   const struct blend_quad_stage *bqs = blend_quad_stage(qs);
1179   uint i, j, q;
1180
1181   struct softpipe_cached_tile *tile
1182      = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
1183                           quads[0]->input.x0,
1184                           quads[0]->input.y0, quads[0]->input.layer);
1185
1186   for (q = 0; q < nr; q++) {
1187      struct quad_header *quad = quads[q];
1188      float (*quadColor)[4] = quad->output.color[0];
1189      const int itx = (quad->input.x0 & (TILE_SIZE-1));
1190      const int ity = (quad->input.y0 & (TILE_SIZE-1));
1191
1192      if (qs->softpipe->rasterizer->clamp_fragment_color)
1193         clamp_colors(quadColor);
1194
1195      rebase_colors(bqs->base_format[0], quadColor);
1196
1197      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1198         if (quad->inout.mask & (1 << j)) {
1199            int x = itx + (j & 1);
1200            int y = ity + (j >> 1);
1201            for (i = 0; i < 4; i++) { /* loop over color chans */
1202               tile->data.color[y][x][i] = quadColor[i][j];
1203            }
1204         }
1205      }
1206   }
1207}
1208
1209static void
1210blend_noop(struct quad_stage *qs,
1211           struct quad_header *quads[],
1212           unsigned nr)
1213{
1214}
1215
1216
1217static void
1218choose_blend_quad(struct quad_stage *qs,
1219                  struct quad_header *quads[],
1220                  unsigned nr)
1221{
1222   struct blend_quad_stage *bqs = blend_quad_stage(qs);
1223   struct softpipe_context *softpipe = qs->softpipe;
1224   const struct pipe_blend_state *blend = softpipe->blend;
1225   unsigned i;
1226
1227   qs->run = blend_fallback;
1228
1229   if (softpipe->framebuffer.nr_cbufs == 0) {
1230      qs->run = blend_noop;
1231   }
1232   else if (!softpipe->blend->logicop_enable &&
1233            softpipe->blend->rt[0].colormask == 0xf &&
1234            softpipe->framebuffer.nr_cbufs == 1)
1235   {
1236      if (softpipe->framebuffer.cbufs[0] == NULL) {
1237         qs->run = blend_noop;
1238      }
1239      else if (!blend->rt[0].blend_enable) {
1240         qs->run = single_output_color;
1241      }
1242      else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
1243               blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
1244               blend->rt[0].rgb_func == blend->rt[0].alpha_func)
1245      {
1246         if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
1247            if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
1248                blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
1249               qs->run = blend_single_add_one_one;
1250            }
1251            else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
1252                blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
1253               qs->run = blend_single_add_src_alpha_inv_src_alpha;
1254
1255         }
1256      }
1257   }
1258
1259   /* For each color buffer, determine if the buffer has destination alpha and
1260    * whether color clamping is needed.
1261    */
1262   for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
1263      if (softpipe->framebuffer.cbufs[i]) {
1264         const enum pipe_format format = softpipe->framebuffer.cbufs[i]->format;
1265         const struct util_format_description *desc =
1266            util_format_description(format);
1267         /* assuming all or no color channels are normalized: */
1268         bqs->clamp[i] = desc->channel[0].normalized;
1269         bqs->format_type[i] = desc->channel[0].type;
1270
1271         if (util_format_is_intensity(format))
1272            bqs->base_format[i] = INTENSITY;
1273         else if (util_format_is_luminance(format))
1274            bqs->base_format[i] = LUMINANCE;
1275         else if (util_format_is_luminance_alpha(format))
1276            bqs->base_format[i] = LUMINANCE_ALPHA;
1277         else if (!util_format_has_alpha(format))
1278            bqs->base_format[i] = RGB;
1279         else
1280            bqs->base_format[i] = RGBA;
1281      }
1282   }
1283
1284   qs->run(qs, quads, nr);
1285}
1286
1287
1288static void blend_begin(struct quad_stage *qs)
1289{
1290   qs->run = choose_blend_quad;
1291}
1292
1293
1294static void blend_destroy(struct quad_stage *qs)
1295{
1296   FREE( qs );
1297}
1298
1299
1300struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
1301{
1302   struct blend_quad_stage *stage = CALLOC_STRUCT(blend_quad_stage);
1303
1304   if (!stage)
1305      return NULL;
1306
1307   stage->base.softpipe = softpipe;
1308   stage->base.begin = blend_begin;
1309   stage->base.run = choose_blend_quad;
1310   stage->base.destroy = blend_destroy;
1311
1312   return &stage->base;
1313}
1314