1/*
2 * Copyright © 2010-2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "sna.h"
33#include "sna_render.h"
34#include "sna_render_inline.h"
35#include "sna_reg.h"
36#include "sna_video.h"
37
38#include "gen3_render.h"
39
40#define NO_COMPOSITE 0
41#define NO_COMPOSITE_SPANS 0
42#define NO_COPY 0
43#define NO_COPY_BOXES 0
44#define NO_FILL 0
45#define NO_FILL_ONE 0
46#define NO_FILL_BOXES 0
47
48#define PREFER_BLT_FILL 1
49
50enum {
51	SHADER_NONE = 0,
52	SHADER_ZERO,
53	SHADER_BLACK,
54	SHADER_WHITE,
55	SHADER_CONSTANT,
56	SHADER_LINEAR,
57	SHADER_RADIAL,
58	SHADER_TEXTURE,
59	SHADER_OPACITY,
60};
61
62#define MAX_3D_SIZE 2048
63#define MAX_3D_PITCH 8192
64
65#define OUT_BATCH(v) batch_emit(sna, v)
66#define OUT_BATCH_F(v) batch_emit_float(sna, v)
67#define OUT_VERTEX(v) vertex_emit(sna, v)
68
69enum gen3_radial_mode {
70	RADIAL_ONE,
71	RADIAL_TWO
72};
73
74static const struct blendinfo {
75	bool dst_alpha;
76	bool src_alpha;
77	uint32_t src_blend;
78	uint32_t dst_blend;
79} gen3_blend_op[] = {
80	/* Clear */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
81	/* Src */	{0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
82	/* Dst */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
83	/* Over */	{0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
84	/* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
85	/* In */	{1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
86	/* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
87	/* Out */	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
88	/* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
89	/* Atop */	{1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
90	/* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
91	/* Xor */	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
92	/* Add */	{0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
93};
94
95#define S6_COLOR_WRITE_ONLY \
96	(S6_COLOR_WRITE_ENABLE | \
97	 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
98	 BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
99	 BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
100
101static const struct formatinfo {
102	unsigned int fmt, xfmt;
103	uint32_t card_fmt;
104	bool rb_reversed;
105} gen3_tex_formats[] = {
106	{PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
107	{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
108	{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
109	{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
110	{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
111#ifdef PICT_a2r10g10b10
112	{PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
113	{PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
114#endif
115	{PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
116	{PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
117	{PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
118	{PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
119	{PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
120	{PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
121};
122
123#define xFixedToDouble(f) pixman_fixed_to_double(f)
124
125static inline bool too_large(int width, int height)
126{
127	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
128}
129
130static inline uint32_t gen3_buf_tiling(uint32_t tiling)
131{
132	uint32_t v = 0;
133	switch (tiling) {
134	case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
135	case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
136	case I915_TILING_NONE: break;
137	}
138	return v;
139}
140
141static inline bool
142gen3_check_pitch_3d(struct kgem_bo *bo)
143{
144	return bo->pitch <= MAX_3D_PITCH;
145}
146
147static uint32_t gen3_get_blend_cntl(int op,
148				    bool has_component_alpha,
149				    uint32_t dst_format)
150{
151	uint32_t sblend = gen3_blend_op[op].src_blend;
152	uint32_t dblend = gen3_blend_op[op].dst_blend;
153
154	if (op <= PictOpSrc) /* for clear and src disable blending */
155		return S6_COLOR_WRITE_ONLY;
156
157	/* If there's no dst alpha channel, adjust the blend op so that we'll
158	 * treat it as always 1.
159	 */
160	if (gen3_blend_op[op].dst_alpha) {
161		if (PICT_FORMAT_A(dst_format) == 0) {
162			if (sblend == BLENDFACT_DST_ALPHA)
163				sblend = BLENDFACT_ONE;
164			else if (sblend == BLENDFACT_INV_DST_ALPHA)
165				sblend = BLENDFACT_ZERO;
166		}
167
168		/* gen3 engine reads 8bit color buffer into green channel
169		 * in cases like color buffer blending etc., and also writes
170		 * back green channel.  So with dst_alpha blend we should use
171		 * color factor. See spec on "8-bit rendering".
172		 */
173		if (dst_format == PICT_a8) {
174			if (sblend == BLENDFACT_DST_ALPHA)
175				sblend = BLENDFACT_DST_COLR;
176			else if (sblend == BLENDFACT_INV_DST_ALPHA)
177				sblend = BLENDFACT_INV_DST_COLR;
178		}
179	}
180
181	/* If the source alpha is being used, then we should only be in a case
182	 * where the source blend factor is 0, and the source blend value is the
183	 * mask channels multiplied by the source picture's alpha.
184	 */
185	if (has_component_alpha && gen3_blend_op[op].src_alpha) {
186		if (dblend == BLENDFACT_SRC_ALPHA)
187			dblend = BLENDFACT_SRC_COLR;
188		else if (dblend == BLENDFACT_INV_SRC_ALPHA)
189			dblend = BLENDFACT_INV_SRC_COLR;
190	}
191
192	return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
193		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
194		sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
195		dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
196}
197
198static bool gen3_check_dst_format(uint32_t format)
199{
200	switch (format) {
201	case PICT_a8r8g8b8:
202	case PICT_x8r8g8b8:
203	case PICT_a8b8g8r8:
204	case PICT_x8b8g8r8:
205	case PICT_r5g6b5:
206	case PICT_b5g6r5:
207	case PICT_a1r5g5b5:
208	case PICT_x1r5g5b5:
209	case PICT_a1b5g5r5:
210	case PICT_x1b5g5r5:
211#ifdef PICT_a2r10g10b10
212	case PICT_a2r10g10b10:
213	case PICT_x2r10g10b10:
214	case PICT_a2b10g10r10:
215	case PICT_x2b10g10r10:
216#endif
217	case PICT_a8:
218	case PICT_a4r4g4b4:
219	case PICT_x4r4g4b4:
220	case PICT_a4b4g4r4:
221	case PICT_x4b4g4r4:
222		return true;
223	default:
224		return false;
225	}
226}
227
228static bool gen3_dst_rb_reversed(uint32_t format)
229{
230	switch (format) {
231	case PICT_a8r8g8b8:
232	case PICT_x8r8g8b8:
233	case PICT_r5g6b5:
234	case PICT_a1r5g5b5:
235	case PICT_x1r5g5b5:
236#ifdef PICT_a2r10g10b10
237	case PICT_a2r10g10b10:
238	case PICT_x2r10g10b10:
239#endif
240	case PICT_a8:
241	case PICT_a4r4g4b4:
242	case PICT_x4r4g4b4:
243		return false;
244	default:
245		return true;
246	}
247}
248
249#define DSTORG_HORT_BIAS(x)             ((x)<<20)
250#define DSTORG_VERT_BIAS(x)             ((x)<<16)
251
252static uint32_t gen3_get_dst_format(uint32_t format)
253{
254#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
255	switch (format) {
256	default:
257	case PICT_a8r8g8b8:
258	case PICT_x8r8g8b8:
259	case PICT_a8b8g8r8:
260	case PICT_x8b8g8r8:
261		return BIAS | COLR_BUF_ARGB8888;
262	case PICT_r5g6b5:
263	case PICT_b5g6r5:
264		return BIAS | COLR_BUF_RGB565;
265	case PICT_a1r5g5b5:
266	case PICT_x1r5g5b5:
267	case PICT_a1b5g5r5:
268	case PICT_x1b5g5r5:
269		return BIAS | COLR_BUF_ARGB1555;
270#ifdef PICT_a2r10g10b10
271	case PICT_a2r10g10b10:
272	case PICT_x2r10g10b10:
273	case PICT_a2b10g10r10:
274	case PICT_x2b10g10r10:
275		return BIAS | COLR_BUF_ARGB2AAA;
276#endif
277	case PICT_a8:
278		return BIAS | COLR_BUF_8BIT;
279	case PICT_a4r4g4b4:
280	case PICT_x4r4g4b4:
281	case PICT_a4b4g4r4:
282	case PICT_x4b4g4r4:
283		return BIAS | COLR_BUF_ARGB4444;
284	}
285#undef BIAS
286}
287
288static bool gen3_check_format(PicturePtr p)
289{
290	switch (p->format) {
291	case PICT_a8:
292	case PICT_a8r8g8b8:
293	case PICT_x8r8g8b8:
294	case PICT_a8b8g8r8:
295	case PICT_x8b8g8r8:
296#ifdef PICT_a2r10g10b10
297	case PICT_a2r10g10b10:
298	case PICT_a2b10g10r10:
299#endif
300	case PICT_r5g6b5:
301	case PICT_b5g6r5:
302	case PICT_a1r5g5b5:
303	case PICT_a1b5g5r5:
304	case PICT_a4r4g4b4:
305	case PICT_a4b4g4r4:
306		return true;
307	default:
308		return false;
309	}
310}
311
312static bool gen3_check_xformat(PicturePtr p)
313{
314	switch (p->format) {
315	case PICT_a8r8g8b8:
316	case PICT_x8r8g8b8:
317	case PICT_a8b8g8r8:
318	case PICT_x8b8g8r8:
319	case PICT_r5g6b5:
320	case PICT_b5g6r5:
321	case PICT_a1r5g5b5:
322	case PICT_x1r5g5b5:
323	case PICT_a1b5g5r5:
324	case PICT_x1b5g5r5:
325#ifdef PICT_a2r10g10b10
326	case PICT_a2r10g10b10:
327	case PICT_x2r10g10b10:
328	case PICT_a2b10g10r10:
329	case PICT_x2b10g10r10:
330#endif
331	case PICT_a8:
332	case PICT_a4r4g4b4:
333	case PICT_x4r4g4b4:
334	case PICT_a4b4g4r4:
335	case PICT_x4b4g4r4:
336		return true;
337	default:
338		return false;
339	}
340}
341
342static uint32_t gen3_texture_repeat(uint32_t repeat)
343{
344#define REPEAT(x) \
345	(SS3_NORMALIZED_COORDS | \
346	 TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
347	 TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT)
348	switch (repeat) {
349	default:
350	case RepeatNone:
351		return REPEAT(CLAMP_BORDER);
352	case RepeatNormal:
353		return REPEAT(WRAP);
354	case RepeatPad:
355		return REPEAT(CLAMP_EDGE);
356	case RepeatReflect:
357		return REPEAT(MIRROR);
358	}
359#undef REPEAT
360}
361
362static uint32_t gen3_gradient_repeat(uint32_t repeat)
363{
364#define REPEAT(x) \
365	(SS3_NORMALIZED_COORDS | \
366	 TEXCOORDMODE_##x  << SS3_TCX_ADDR_MODE_SHIFT | \
367	 TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT)
368	switch (repeat) {
369	default:
370	case RepeatNone:
371		return REPEAT(CLAMP_BORDER);
372	case RepeatNormal:
373		return REPEAT(WRAP);
374	case RepeatPad:
375		return REPEAT(CLAMP_EDGE);
376	case RepeatReflect:
377		return REPEAT(MIRROR);
378	}
379#undef REPEAT
380}
381
382static bool gen3_check_repeat(PicturePtr p)
383{
384	if (!p->repeat)
385		return true;
386
387	switch (p->repeatType) {
388	case RepeatNone:
389	case RepeatNormal:
390	case RepeatPad:
391	case RepeatReflect:
392		return true;
393	default:
394		return false;
395	}
396}
397
398static uint32_t gen3_filter(uint32_t filter)
399{
400	switch (filter) {
401	default:
402		assert(0);
403	case PictFilterNearest:
404		return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
405			FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
406			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
407	case PictFilterBilinear:
408		return (FILTER_LINEAR  << SS2_MAG_FILTER_SHIFT |
409			FILTER_LINEAR  << SS2_MIN_FILTER_SHIFT |
410			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
411	}
412}
413
414static bool gen3_check_filter(PicturePtr p)
415{
416	switch (p->filter) {
417	case PictFilterNearest:
418	case PictFilterBilinear:
419		return true;
420	default:
421		return false;
422	}
423}
424
425static inline void
426gen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY)
427{
428	OUT_VERTEX(dstX);
429	OUT_VERTEX(dstY);
430}
431
432fastcall static void
433gen3_emit_composite_primitive_constant(struct sna *sna,
434				       const struct sna_composite_op *op,
435				       const struct sna_composite_rectangles *r)
436{
437	int16_t dst_x = r->dst.x + op->dst.x;
438	int16_t dst_y = r->dst.y + op->dst.y;
439
440	gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
441	gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
442	gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
443}
444
445fastcall static void
446gen3_emit_composite_boxes_constant(const struct sna_composite_op *op,
447				   const BoxRec *box, int nbox,
448				   float *v)
449{
450	do {
451		v[0] = box->x2;
452		v[1] = box->y2;
453
454		v[2] = box->x1;
455		v[3] = box->y2;
456
457		v[4] = box->x1;
458		v[5] = box->y1;
459
460		box++;
461		v += 6;
462	} while (--nbox);
463}
464
465fastcall static void
466gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
467						const struct sna_composite_op *op,
468						const struct sna_composite_rectangles *r)
469{
470	int16_t dst_x, dst_y;
471	int16_t src_x, src_y;
472
473	dst_x = r->dst.x + op->dst.x;
474	dst_y = r->dst.y + op->dst.y;
475	src_x = r->src.x + op->src.offset[0];
476	src_y = r->src.y + op->src.offset[1];
477
478	gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
479	OUT_VERTEX(src_x + r->width);
480	OUT_VERTEX(src_y + r->height);
481
482	gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
483	OUT_VERTEX(src_x);
484	OUT_VERTEX(src_y + r->height);
485
486	gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
487	OUT_VERTEX(src_x);
488	OUT_VERTEX(src_y);
489}
490
491fastcall static void
492gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
493					    const BoxRec *box, int nbox,
494					    float *v)
495{
496	do {
497		v[0] = box->x2;
498		v[1] = box->y2;
499		v[2] = box->x2 + op->src.offset[0];
500		v[3] = box->y2 + op->src.offset[1];
501
502		v[4] = box->x1;
503		v[5] = box->y2;
504		v[6] = box->x1 + op->src.offset[0];
505		v[7] = box->y2 + op->src.offset[1];
506
507		v[8] = box->x1;
508		v[9] = box->y1;
509		v[10] = box->x1 + op->src.offset[0];
510		v[11] = box->y1 + op->src.offset[1];
511
512		v += 12;
513		box++;
514	} while (--nbox);
515}
516
517fastcall static void
518gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
519					      const struct sna_composite_op *op,
520					      const struct sna_composite_rectangles *r)
521{
522	PictTransform *transform = op->src.transform;
523	int16_t dst_x, dst_y;
524	int16_t src_x, src_y;
525	float *v;
526
527	dst_x = r->dst.x + op->dst.x;
528	dst_y = r->dst.y + op->dst.y;
529	src_x = r->src.x + op->src.offset[0];
530	src_y = r->src.y + op->src.offset[1];
531
532	v = sna->render.vertices + sna->render.vertex_used;
533	sna->render.vertex_used += 12;
534
535	v[0] = dst_x + r->width;
536	v[1] = dst_y + r->height;
537	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
538				    transform, op->src.scale,
539				    &v[2], &v[3]);
540
541	v[4] = dst_x;
542	v[5] = dst_y + r->height;
543	_sna_get_transformed_scaled(src_x, src_y + r->height,
544				    transform, op->src.scale,
545				    &v[6], &v[7]);
546
547	v[8] = dst_x;
548	v[9] = dst_y;
549	_sna_get_transformed_scaled(src_x, src_y,
550				    transform, op->src.scale,
551				    &v[10], &v[11]);
552}
553
554fastcall static void
555gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
556					  const BoxRec *box, int nbox,
557					  float *v)
558{
559	const PictTransform *transform = op->src.transform;
560
561	do {
562		v[0] = box->x2;
563		v[1] = box->y2;
564		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
565					    box->y2 + op->src.offset[1],
566					    transform, op->src.scale,
567					    &v[2], &v[3]);
568
569		v[4] = box->x1;
570		v[5] = box->y2;
571		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
572					    box->y2 + op->src.offset[1],
573					    transform, op->src.scale,
574					    &v[6], &v[7]);
575
576		v[8] = box->x1;
577		v[9] = box->y1;
578		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
579					    box->y1 + op->src.offset[1],
580					    transform, op->src.scale,
581					    &v[10], &v[11]);
582
583		box++;
584		v += 12;
585	} while (--nbox);
586}
587
588fastcall static void
589gen3_emit_composite_primitive_identity_source(struct sna *sna,
590					      const struct sna_composite_op *op,
591					      const struct sna_composite_rectangles *r)
592{
593	float w = r->width;
594	float h = r->height;
595	float *v;
596
597	v = sna->render.vertices + sna->render.vertex_used;
598	sna->render.vertex_used += 12;
599
600	v[8] = v[4] = r->dst.x + op->dst.x;
601	v[0] = v[4] + w;
602
603	v[9] = r->dst.y + op->dst.y;
604	v[5] = v[1] = v[9] + h;
605
606	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
607	v[2] = v[6] + w * op->src.scale[0];
608
609	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
610	v[7] = v[3] = v[11] + h * op->src.scale[1];
611}
612
613fastcall static void
614gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
615					  const BoxRec *box, int nbox,
616					  float *v)
617{
618	do {
619		v[0] = box->x2 + op->dst.x;
620		v[8] = v[4] = box->x1 + op->dst.x;
621		v[5] = v[1] = box->y2 + op->dst.y;
622		v[9] = box->y1 + op->dst.y;
623
624		v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
625		v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
626
627		v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
628		v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
629
630		v += 12;
631		box++;
632	} while (--nbox);
633}
634
635fastcall static void
636gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
637							const struct sna_composite_op *op,
638							const struct sna_composite_rectangles *r)
639{
640	float w = r->width;
641	float h = r->height;
642	float *v;
643
644	v = sna->render.vertices + sna->render.vertex_used;
645	sna->render.vertex_used += 12;
646
647	v[8] = v[4] = r->dst.x;
648	v[9] = r->dst.y;
649
650	v[0] = v[4] + w;
651	v[5] = v[1] = v[9] + h;
652
653	v[10] = v[6] = r->src.x * op->src.scale[0];
654	v[11] = r->src.y * op->src.scale[1];
655
656	v[2] = v[6] + w * op->src.scale[0];
657	v[7] = v[3] = v[11] + h * op->src.scale[1];
658}
659
660fastcall static void
661gen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op,
662						    const BoxRec *box, int nbox,
663						    float *v)
664{
665	do {
666		v[0] = box->x2;
667		v[8] = v[4] = box->x1;
668		v[5] = v[1] = box->y2;
669		v[9] = box->y1;
670
671		v[10] = v[6] = box->x1 * op->src.scale[0];
672		v[2] = box->x2 * op->src.scale[0];
673
674		v[11] = box->y1 * op->src.scale[1];
675		v[7] = v[3] = box->y2 * op->src.scale[1];
676
677		v += 12;
678		box++;
679	} while (--nbox);
680}
681
682fastcall static void
683gen3_emit_composite_primitive_affine_source(struct sna *sna,
684					    const struct sna_composite_op *op,
685					    const struct sna_composite_rectangles *r)
686{
687	PictTransform *transform = op->src.transform;
688	int16_t dst_x = r->dst.x + op->dst.x;
689	int16_t dst_y = r->dst.y + op->dst.y;
690	int src_x = r->src.x + (int)op->src.offset[0];
691	int src_y = r->src.y + (int)op->src.offset[1];
692	float *v;
693
694	v = sna->render.vertices + sna->render.vertex_used;
695	sna->render.vertex_used += 12;
696
697	v[0] = dst_x + r->width;
698	v[5] = v[1] = dst_y + r->height;
699	v[8] = v[4] = dst_x;
700	v[9] = dst_y;
701
702	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
703				    transform, op->src.scale,
704				    &v[2], &v[3]);
705
706	_sna_get_transformed_scaled(src_x, src_y + r->height,
707				    transform, op->src.scale,
708				    &v[6], &v[7]);
709
710	_sna_get_transformed_scaled(src_x, src_y,
711				    transform, op->src.scale,
712				    &v[10], &v[11]);
713}
714
715fastcall static void
716gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op,
717					const BoxRec *box, int nbox,
718					float *v)
719{
720	const PictTransform *transform = op->src.transform;
721
722	do {
723		v[0] = box->x2;
724		v[5] = v[1] = box->y2;
725		v[8] = v[4] = box->x1;
726		v[9] = box->y1;
727
728		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
729					    box->y2 + op->src.offset[1],
730					    transform, op->src.scale,
731					    &v[2], &v[3]);
732
733		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
734					    box->y2 + op->src.offset[1],
735					    transform, op->src.scale,
736					    &v[6], &v[7]);
737
738		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
739					    box->y1 + op->src.offset[1],
740					    transform, op->src.scale,
741					    &v[10], &v[11]);
742
743		v += 12;
744		box++;
745	} while (--nbox);
746}
747
748fastcall static void
749gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
750						     const struct sna_composite_op *op,
751						     const struct sna_composite_rectangles *r)
752{
753	float w = r->width;
754	float h = r->height;
755	float *v;
756
757	v = sna->render.vertices + sna->render.vertex_used;
758	sna->render.vertex_used += 12;
759
760	v[8] = v[4] = r->dst.x + op->dst.x;
761	v[0] = v[4] + w;
762
763	v[9] = r->dst.y + op->dst.y;
764	v[5] = v[1] = v[9] + h;
765
766	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
767	v[2] = v[6] + w * op->mask.scale[0];
768
769	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
770	v[7] = v[3] = v[11] + h * op->mask.scale[1];
771}
772
773fastcall static void
774gen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna,
775							       const struct sna_composite_op *op,
776							       const struct sna_composite_rectangles *r)
777{
778	float w = r->width;
779	float h = r->height;
780	float *v;
781
782	v = sna->render.vertices + sna->render.vertex_used;
783	sna->render.vertex_used += 12;
784
785	v[8] = v[4] = r->dst.x;
786	v[9] = r->dst.y;
787
788	v[0] = v[4] + w;
789	v[5] = v[1] = v[9] + h;
790
791	v[10] = v[6] = r->mask.x * op->mask.scale[0];
792	v[11] = r->mask.y * op->mask.scale[1];
793
794	v[2] = v[6] + w * op->mask.scale[0];
795	v[7] = v[3] = v[11] + h * op->mask.scale[1];
796}
797
798fastcall static void
799gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
800						   const struct sna_composite_op *op,
801						   const struct sna_composite_rectangles *r)
802{
803	float dst_x, dst_y;
804	float src_x, src_y;
805	float msk_x, msk_y;
806	float w, h;
807	float *v;
808
809	dst_x = r->dst.x + op->dst.x;
810	dst_y = r->dst.y + op->dst.y;
811	src_x = r->src.x + op->src.offset[0];
812	src_y = r->src.y + op->src.offset[1];
813	msk_x = r->mask.x + op->mask.offset[0];
814	msk_y = r->mask.y + op->mask.offset[1];
815	w = r->width;
816	h = r->height;
817
818	v = sna->render.vertices + sna->render.vertex_used;
819	sna->render.vertex_used += 18;
820
821	v[0] = dst_x + w;
822	v[1] = dst_y + h;
823	v[2] = (src_x + w) * op->src.scale[0];
824	v[3] = (src_y + h) * op->src.scale[1];
825	v[4] = (msk_x + w) * op->mask.scale[0];
826	v[5] = (msk_y + h) * op->mask.scale[1];
827
828	v[6] = dst_x;
829	v[7] = v[1];
830	v[8] = src_x * op->src.scale[0];
831	v[9] = v[3];
832	v[10] = msk_x * op->mask.scale[0];
833	v[11] =v[5];
834
835	v[12] = v[6];
836	v[13] = dst_y;
837	v[14] = v[8];
838	v[15] = src_y * op->src.scale[1];
839	v[16] = v[10];
840	v[17] = msk_y * op->mask.scale[1];
841}
842
843fastcall static void
844gen3_emit_composite_primitive_affine_source_mask(struct sna *sna,
845						 const struct sna_composite_op *op,
846						 const struct sna_composite_rectangles *r)
847{
848	int16_t src_x, src_y;
849	float dst_x, dst_y;
850	float msk_x, msk_y;
851	float w, h;
852	float *v;
853
854	dst_x = r->dst.x + op->dst.x;
855	dst_y = r->dst.y + op->dst.y;
856	src_x = r->src.x + op->src.offset[0];
857	src_y = r->src.y + op->src.offset[1];
858	msk_x = r->mask.x + op->mask.offset[0];
859	msk_y = r->mask.y + op->mask.offset[1];
860	w = r->width;
861	h = r->height;
862
863	v = sna->render.vertices + sna->render.vertex_used;
864	sna->render.vertex_used += 18;
865
866	v[0] = dst_x + w;
867	v[1] = dst_y + h;
868	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
869				    op->src.transform, op->src.scale,
870				    &v[2], &v[3]);
871	v[4] = (msk_x + w) * op->mask.scale[0];
872	v[5] = (msk_y + h) * op->mask.scale[1];
873
874	v[6] = dst_x;
875	v[7] = v[1];
876	_sna_get_transformed_scaled(src_x, src_y + r->height,
877				    op->src.transform, op->src.scale,
878				    &v[8], &v[9]);
879	v[10] = msk_x * op->mask.scale[0];
880	v[11] =v[5];
881
882	v[12] = v[6];
883	v[13] = dst_y;
884	_sna_get_transformed_scaled(src_x, src_y,
885				    op->src.transform, op->src.scale,
886				    &v[14], &v[15]);
887	v[16] = v[10];
888	v[17] = msk_y * op->mask.scale[1];
889}
890
891static void
892gen3_emit_composite_texcoord(struct sna *sna,
893			     const struct sna_composite_channel *channel,
894			     int16_t x, int16_t y)
895{
896	float s = 0, t = 0, w = 1;
897
898	switch (channel->u.gen3.type) {
899	case SHADER_OPACITY:
900	case SHADER_NONE:
901	case SHADER_ZERO:
902	case SHADER_BLACK:
903	case SHADER_WHITE:
904	case SHADER_CONSTANT:
905		break;
906
907	case SHADER_LINEAR:
908	case SHADER_RADIAL:
909	case SHADER_TEXTURE:
910		x += channel->offset[0];
911		y += channel->offset[1];
912		if (channel->is_affine) {
913			sna_get_transformed_coordinates(x, y,
914							channel->transform,
915							&s, &t);
916			OUT_VERTEX(s * channel->scale[0]);
917			OUT_VERTEX(t * channel->scale[1]);
918		} else {
919			sna_get_transformed_coordinates_3d(x, y,
920							   channel->transform,
921							   &s, &t, &w);
922			OUT_VERTEX(s * channel->scale[0]);
923			OUT_VERTEX(t * channel->scale[1]);
924			OUT_VERTEX(0);
925			OUT_VERTEX(w);
926		}
927		break;
928	}
929}
930
931static void
932gen3_emit_composite_vertex(struct sna *sna,
933			   const struct sna_composite_op *op,
934			   int16_t srcX, int16_t srcY,
935			   int16_t maskX, int16_t maskY,
936			   int16_t dstX, int16_t dstY)
937{
938	gen3_emit_composite_dstcoord(sna, dstX, dstY);
939	gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY);
940	gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY);
941}
942
943fastcall static void
944gen3_emit_composite_primitive(struct sna *sna,
945			      const struct sna_composite_op *op,
946			      const struct sna_composite_rectangles *r)
947{
948	gen3_emit_composite_vertex(sna, op,
949				   r->src.x + r->width,
950				   r->src.y + r->height,
951				   r->mask.x + r->width,
952				   r->mask.y + r->height,
953				   op->dst.x + r->dst.x + r->width,
954				   op->dst.y + r->dst.y + r->height);
955	gen3_emit_composite_vertex(sna, op,
956				   r->src.x,
957				   r->src.y + r->height,
958				   r->mask.x,
959				   r->mask.y + r->height,
960				   op->dst.x + r->dst.x,
961				   op->dst.y + r->dst.y + r->height);
962	gen3_emit_composite_vertex(sna, op,
963				   r->src.x,
964				   r->src.y,
965				   r->mask.x,
966				   r->mask.y,
967				   op->dst.x + r->dst.x,
968				   op->dst.y + r->dst.y);
969}
970
971#if defined(sse2) && !defined(__x86_64__)
972sse2 fastcall static void
973gen3_emit_composite_primitive_constant__sse2(struct sna *sna,
974					     const struct sna_composite_op *op,
975					     const struct sna_composite_rectangles *r)
976{
977	float *v;
978
979	v = sna->render.vertices + sna->render.vertex_used;
980	sna->render.vertex_used += 6;
981
982	v[4] = v[2] = r->dst.x + op->dst.x;
983	v[5] = r->dst.y + op->dst.y;
984
985	v[0] = v[2] + r->width;
986	v[3] = v[1] = v[5] + r->height;
987
988}
989
990sse2 fastcall static void
991gen3_emit_composite_boxes_constant__sse2(const struct sna_composite_op *op,
992					 const BoxRec *box, int nbox,
993					 float *v)
994{
995	do {
996		v[0] = box->x2;
997		v[3] = v[1] = box->y2;
998		v[4] = v[2] = box->x1;
999		v[5] = box->y1;
1000
1001		box++;
1002		v += 6;
1003	} while (--nbox);
1004}
1005
1006sse2 fastcall static void
1007gen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna,
1008						      const struct sna_composite_op *op,
1009						      const struct sna_composite_rectangles *r)
1010{
1011	int16_t x, y;
1012	float *v;
1013
1014	v = sna->render.vertices + sna->render.vertex_used;
1015	sna->render.vertex_used += 12;
1016
1017	x = r->dst.x + op->dst.x;
1018	y = r->dst.y + op->dst.y;
1019	v[0] = x + r->width;
1020	v[5] = v[1] = y + r->height;
1021	v[8] = v[4] = x;
1022	v[9] = y;
1023
1024	x = r->src.x + op->src.offset[0];
1025	y = r->src.y + op->src.offset[1];
1026	v[2] = x + r->width;
1027	v[7] = v[3] = y + r->height;
1028	v[10] = v[6] = x;
1029	v[11] = y;
1030}
1031
1032sse2 fastcall static void
1033gen3_emit_composite_boxes_identity_gradient__sse2(const struct sna_composite_op *op,
1034						  const BoxRec *box, int nbox,
1035						  float *v)
1036{
1037	do {
1038		v[0] = box->x2;
1039		v[5] = v[1] = box->y2;
1040		v[8] = v[4] = box->x1;
1041		v[9] = box->y1;
1042
1043		v[2] = box->x2 + op->src.offset[0];
1044		v[7] = v[3] = box->y2 + op->src.offset[1];
1045		v[10] = v[6] = box->x1 + op->src.offset[0];
1046		v[11] = box->y1 + op->src.offset[1];
1047
1048		v += 12;
1049		box++;
1050	} while (--nbox);
1051}
1052
1053sse2 fastcall static void
1054gen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna,
1055						    const struct sna_composite_op *op,
1056						    const struct sna_composite_rectangles *r)
1057{
1058	PictTransform *transform = op->src.transform;
1059	int16_t dst_x, dst_y;
1060	int16_t src_x, src_y;
1061	float *v;
1062
1063	dst_x = r->dst.x + op->dst.x;
1064	dst_y = r->dst.y + op->dst.y;
1065	src_x = r->src.x + op->src.offset[0];
1066	src_y = r->src.y + op->src.offset[1];
1067
1068	v = sna->render.vertices + sna->render.vertex_used;
1069	sna->render.vertex_used += 12;
1070
1071	v[0] = dst_x + r->width;
1072	v[1] = dst_y + r->height;
1073	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
1074				    transform, op->src.scale,
1075				    &v[2], &v[3]);
1076
1077	v[4] = dst_x;
1078	v[5] = dst_y + r->height;
1079	_sna_get_transformed_scaled(src_x, src_y + r->height,
1080				    transform, op->src.scale,
1081				    &v[6], &v[7]);
1082
1083	v[8] = dst_x;
1084	v[9] = dst_y;
1085	_sna_get_transformed_scaled(src_x, src_y,
1086				    transform, op->src.scale,
1087				    &v[10], &v[11]);
1088}
1089
1090sse2 fastcall static void
1091gen3_emit_composite_boxes_affine_gradient__sse2(const struct sna_composite_op *op,
1092						const BoxRec *box, int nbox,
1093						float *v)
1094{
1095	const PictTransform *transform = op->src.transform;
1096
1097	do {
1098		v[0] = box->x2;
1099		v[1] = box->y2;
1100		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
1101					    box->y2 + op->src.offset[1],
1102					    transform, op->src.scale,
1103					    &v[2], &v[3]);
1104
1105		v[4] = box->x1;
1106		v[5] = box->y2;
1107		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
1108					    box->y2 + op->src.offset[1],
1109					    transform, op->src.scale,
1110					    &v[6], &v[7]);
1111
1112		v[8] = box->x1;
1113		v[9] = box->y1;
1114		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
1115					    box->y1 + op->src.offset[1],
1116					    transform, op->src.scale,
1117					    &v[10], &v[11]);
1118
1119		box++;
1120		v += 12;
1121	} while (--nbox);
1122}
1123
1124sse2 fastcall static void
1125gen3_emit_composite_primitive_identity_source__sse2(struct sna *sna,
1126						    const struct sna_composite_op *op,
1127						    const struct sna_composite_rectangles *r)
1128{
1129	float w = r->width;
1130	float h = r->height;
1131	float *v;
1132
1133	v = sna->render.vertices + sna->render.vertex_used;
1134	sna->render.vertex_used += 12;
1135
1136	v[8] = v[4] = r->dst.x + op->dst.x;
1137	v[0] = v[4] + w;
1138
1139	v[9] = r->dst.y + op->dst.y;
1140	v[5] = v[1] = v[9] + h;
1141
1142	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
1143	v[2] = v[6] + w * op->src.scale[0];
1144
1145	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
1146	v[7] = v[3] = v[11] + h * op->src.scale[1];
1147}
1148
1149sse2 fastcall static void
1150gen3_emit_composite_boxes_identity_source__sse2(const struct sna_composite_op *op,
1151						const BoxRec *box, int nbox,
1152						float *v)
1153{
1154	do {
1155		v[0] = box->x2 + op->dst.x;
1156		v[8] = v[4] = box->x1 + op->dst.x;
1157		v[5] = v[1] = box->y2 + op->dst.y;
1158		v[9] = box->y1 + op->dst.y;
1159
1160		v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
1161		v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
1162
1163		v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
1164		v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
1165
1166		v += 12;
1167		box++;
1168	} while (--nbox);
1169}
1170
1171sse2 fastcall static void
1172gen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna,
1173							      const struct sna_composite_op *op,
1174							      const struct sna_composite_rectangles *r)
1175{
1176	float w = r->width;
1177	float h = r->height;
1178	float *v;
1179
1180	v = sna->render.vertices + sna->render.vertex_used;
1181	sna->render.vertex_used += 12;
1182
1183	v[8] = v[4] = r->dst.x;
1184	v[9] = r->dst.y;
1185
1186	v[0] = v[4] + w;
1187	v[5] = v[1] = v[9] + h;
1188
1189	v[10] = v[6] = r->src.x * op->src.scale[0];
1190	v[11] = r->src.y * op->src.scale[1];
1191
1192	v[2] = v[6] + w * op->src.scale[0];
1193	v[7] = v[3] = v[11] + h * op->src.scale[1];
1194}
1195
1196sse2 fastcall static void
1197gen3_emit_composite_boxes_identity_source_no_offset__sse2(const struct sna_composite_op *op,
1198							  const BoxRec *box, int nbox,
1199							  float *v)
1200{
1201	do {
1202		v[0] = box->x2;
1203		v[8] = v[4] = box->x1;
1204		v[5] = v[1] = box->y2;
1205		v[9] = box->y1;
1206
1207		v[10] = v[6] = box->x1 * op->src.scale[0];
1208		v[2] = box->x2 * op->src.scale[0];
1209
1210		v[11] = box->y1 * op->src.scale[1];
1211		v[7] = v[3] = box->y2 * op->src.scale[1];
1212
1213		v += 12;
1214		box++;
1215	} while (--nbox);
1216}
1217
1218sse2 fastcall static void
1219gen3_emit_composite_primitive_affine_source__sse2(struct sna *sna,
1220						  const struct sna_composite_op *op,
1221						  const struct sna_composite_rectangles *r)
1222{
1223	PictTransform *transform = op->src.transform;
1224	int16_t dst_x = r->dst.x + op->dst.x;
1225	int16_t dst_y = r->dst.y + op->dst.y;
1226	int src_x = r->src.x + (int)op->src.offset[0];
1227	int src_y = r->src.y + (int)op->src.offset[1];
1228	float *v;
1229
1230	v = sna->render.vertices + sna->render.vertex_used;
1231	sna->render.vertex_used += 12;
1232
1233	v[0] = dst_x + r->width;
1234	v[5] = v[1] = dst_y + r->height;
1235	v[8] = v[4] = dst_x;
1236	v[9] = dst_y;
1237
1238	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
1239				    transform, op->src.scale,
1240				    &v[2], &v[3]);
1241
1242	_sna_get_transformed_scaled(src_x, src_y + r->height,
1243				    transform, op->src.scale,
1244				    &v[6], &v[7]);
1245
1246	_sna_get_transformed_scaled(src_x, src_y,
1247				    transform, op->src.scale,
1248				    &v[10], &v[11]);
1249}
1250
1251sse2 fastcall static void
1252gen3_emit_composite_boxes_affine_source__sse2(const struct sna_composite_op *op,
1253					      const BoxRec *box, int nbox,
1254					      float *v)
1255{
1256	const PictTransform *transform = op->src.transform;
1257
1258	do {
1259		v[0] = box->x2;
1260		v[5] = v[1] = box->y2;
1261		v[8] = v[4] = box->x1;
1262		v[9] = box->y1;
1263
1264		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
1265					    box->y2 + op->src.offset[1],
1266					    transform, op->src.scale,
1267					    &v[2], &v[3]);
1268
1269		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
1270					    box->y2 + op->src.offset[1],
1271					    transform, op->src.scale,
1272					    &v[6], &v[7]);
1273
1274		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
1275					    box->y1 + op->src.offset[1],
1276					    transform, op->src.scale,
1277					    &v[10], &v[11]);
1278
1279		v += 12;
1280		box++;
1281	} while (--nbox);
1282}
1283
1284sse2 fastcall static void
1285gen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna,
1286							   const struct sna_composite_op *op,
1287							   const struct sna_composite_rectangles *r)
1288{
1289	float w = r->width;
1290	float h = r->height;
1291	float *v;
1292
1293	v = sna->render.vertices + sna->render.vertex_used;
1294	sna->render.vertex_used += 12;
1295
1296	v[8] = v[4] = r->dst.x + op->dst.x;
1297	v[0] = v[4] + w;
1298
1299	v[9] = r->dst.y + op->dst.y;
1300	v[5] = v[1] = v[9] + h;
1301
1302	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
1303	v[2] = v[6] + w * op->mask.scale[0];
1304
1305	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
1306	v[7] = v[3] = v[11] + h * op->mask.scale[1];
1307}
1308
1309sse2 fastcall static void
1310gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna *sna,
1311								     const struct sna_composite_op *op,
1312								     const struct sna_composite_rectangles *r)
1313{
1314	float w = r->width;
1315	float h = r->height;
1316	float *v;
1317
1318	v = sna->render.vertices + sna->render.vertex_used;
1319	sna->render.vertex_used += 12;
1320
1321	v[8] = v[4] = r->dst.x;
1322	v[9] = r->dst.y;
1323
1324	v[0] = v[4] + w;
1325	v[5] = v[1] = v[9] + h;
1326
1327	v[10] = v[6] = r->mask.x * op->mask.scale[0];
1328	v[11] = r->mask.y * op->mask.scale[1];
1329
1330	v[2] = v[6] + w * op->mask.scale[0];
1331	v[7] = v[3] = v[11] + h * op->mask.scale[1];
1332}
1333
1334sse2 fastcall static void
1335gen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna,
1336							 const struct sna_composite_op *op,
1337							 const struct sna_composite_rectangles *r)
1338{
1339	float dst_x, dst_y;
1340	float src_x, src_y;
1341	float msk_x, msk_y;
1342	float w, h;
1343	float *v;
1344
1345	dst_x = r->dst.x + op->dst.x;
1346	dst_y = r->dst.y + op->dst.y;
1347	src_x = r->src.x + op->src.offset[0];
1348	src_y = r->src.y + op->src.offset[1];
1349	msk_x = r->mask.x + op->mask.offset[0];
1350	msk_y = r->mask.y + op->mask.offset[1];
1351	w = r->width;
1352	h = r->height;
1353
1354	v = sna->render.vertices + sna->render.vertex_used;
1355	sna->render.vertex_used += 18;
1356
1357	v[0] = dst_x + w;
1358	v[1] = dst_y + h;
1359	v[2] = (src_x + w) * op->src.scale[0];
1360	v[3] = (src_y + h) * op->src.scale[1];
1361	v[4] = (msk_x + w) * op->mask.scale[0];
1362	v[5] = (msk_y + h) * op->mask.scale[1];
1363
1364	v[6] = dst_x;
1365	v[7] = v[1];
1366	v[8] = src_x * op->src.scale[0];
1367	v[9] = v[3];
1368	v[10] = msk_x * op->mask.scale[0];
1369	v[11] =v[5];
1370
1371	v[12] = v[6];
1372	v[13] = dst_y;
1373	v[14] = v[8];
1374	v[15] = src_y * op->src.scale[1];
1375	v[16] = v[10];
1376	v[17] = msk_y * op->mask.scale[1];
1377}
1378
1379sse2 fastcall static void
1380gen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna,
1381						       const struct sna_composite_op *op,
1382						       const struct sna_composite_rectangles *r)
1383{
1384	int16_t src_x, src_y;
1385	float dst_x, dst_y;
1386	float msk_x, msk_y;
1387	float w, h;
1388	float *v;
1389
1390	dst_x = r->dst.x + op->dst.x;
1391	dst_y = r->dst.y + op->dst.y;
1392	src_x = r->src.x + op->src.offset[0];
1393	src_y = r->src.y + op->src.offset[1];
1394	msk_x = r->mask.x + op->mask.offset[0];
1395	msk_y = r->mask.y + op->mask.offset[1];
1396	w = r->width;
1397	h = r->height;
1398
1399	v = sna->render.vertices + sna->render.vertex_used;
1400	sna->render.vertex_used += 18;
1401
1402	v[0] = dst_x + w;
1403	v[1] = dst_y + h;
1404	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
1405				    op->src.transform, op->src.scale,
1406				    &v[2], &v[3]);
1407	v[4] = (msk_x + w) * op->mask.scale[0];
1408	v[5] = (msk_y + h) * op->mask.scale[1];
1409
1410	v[6] = dst_x;
1411	v[7] = v[1];
1412	_sna_get_transformed_scaled(src_x, src_y + r->height,
1413				    op->src.transform, op->src.scale,
1414				    &v[8], &v[9]);
1415	v[10] = msk_x * op->mask.scale[0];
1416	v[11] =v[5];
1417
1418	v[12] = v[6];
1419	v[13] = dst_y;
1420	_sna_get_transformed_scaled(src_x, src_y,
1421				    op->src.transform, op->src.scale,
1422				    &v[14], &v[15]);
1423	v[16] = v[10];
1424	v[17] = msk_y * op->mask.scale[1];
1425}
1426#endif
1427
1428static inline void
1429gen3_2d_perspective(struct sna *sna, int in, int out)
1430{
1431	gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
1432	gen3_fs_mul(out,
1433		    gen3_fs_operand(in, X, Y, ZERO, ONE),
1434		    gen3_fs_operand_reg(out));
1435}
1436
1437static inline void
1438gen3_linear_coord(struct sna *sna,
1439		  const struct sna_composite_channel *channel,
1440		  int in, int out)
1441{
1442	int c = channel->u.gen3.constants;
1443
1444	if (!channel->is_affine) {
1445		gen3_2d_perspective(sna, in, FS_U0);
1446		in = FS_U0;
1447	}
1448
1449	gen3_fs_mov(out, gen3_fs_operand_zero());
1450	gen3_fs_dp3(out, MASK_X,
1451		    gen3_fs_operand(in, X, Y, ONE, ZERO),
1452		    gen3_fs_operand_reg(c));
1453}
1454
1455static void
1456gen3_radial_coord(struct sna *sna,
1457		  const struct sna_composite_channel *channel,
1458		  int in, int out)
1459{
1460	int c = channel->u.gen3.constants;
1461
1462	if (!channel->is_affine) {
1463		gen3_2d_perspective(sna, in, FS_U0);
1464		in = FS_U0;
1465	}
1466
1467	switch (channel->u.gen3.mode) {
1468	case RADIAL_ONE:
1469		/*
1470		   pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
1471		   r² = pdx*pdx + pdy*pdy
1472		   t = r²/sqrt(r²) - r1/dr;
1473		   */
1474		gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
1475			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
1476			    gen3_fs_operand(c, Z, Z, ZERO, ZERO),
1477			    gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
1478		gen3_fs_dp2add(FS_U0, MASK_X,
1479			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
1480			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
1481			       gen3_fs_operand_zero());
1482		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
1483		gen3_fs_mad(out, 0,
1484			    gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
1485			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
1486			    gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
1487		break;
1488
1489	case RADIAL_TWO:
1490		/*
1491		   pdx = x - c1x, pdy = y - c1y;
1492		   A = dx² + dy² - dr²
1493		   B = -2*(pdx*dx + pdy*dy + r1*dr);
1494		   C = pdx² + pdy² - r1²;
1495		   det = B*B - 4*A*C;
1496		   t = (-B + sqrt (det)) / (2 * A)
1497		   */
1498
1499		/* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
1500		gen3_fs_add(FS_U0,
1501			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
1502			    gen3_fs_operand(c, X, Y, Z, ZERO));
1503		/* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
1504		gen3_fs_dp3(FS_U0, MASK_W,
1505			    gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
1506			    gen3_fs_operand(c+1, X, Y, Z, ZERO));
1507		/* u1.x = pdx² + pdy² - r1²; [C] */
1508		gen3_fs_dp3(FS_U1, MASK_X,
1509			    gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
1510			    gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
1511		/* u1.x = C, u1.y = B, u1.z=-4*A; */
1512		gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
1513		gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
1514		/* u1.x = B² - 4*A*C */
1515		gen3_fs_dp2add(FS_U1, MASK_X,
1516			       gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
1517			       gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
1518			       gen3_fs_operand_zero());
1519		/* out.x = -B + sqrt (B² - 4*A*C), */
1520		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
1521		gen3_fs_mad(out, MASK_X,
1522			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
1523			    gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
1524			    gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
1525		/* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */
1526		gen3_fs_mul(out,
1527			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
1528			    gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
1529		break;
1530	}
1531}
1532
1533static void
1534gen3_composite_emit_shader(struct sna *sna,
1535			   const struct sna_composite_op *op,
1536			   uint8_t blend)
1537{
1538	bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
1539	const struct sna_composite_channel *src, *mask;
1540	struct gen3_render_state *state = &sna->render_state.gen3;
1541	uint32_t shader_offset, id;
1542	int src_reg, mask_reg;
1543	int t, length;
1544
1545	src = &op->src;
1546	mask = &op->mask;
1547	if (mask->u.gen3.type == SHADER_NONE)
1548		mask = NULL;
1549
1550	id = (src->u.gen3.type |
1551	      src->is_affine << 4 |
1552	      src->alpha_fixup << 5 |
1553	      src->rb_reversed << 6);
1554	if (mask) {
1555		id |= (mask->u.gen3.type << 8 |
1556		       mask->is_affine << 12 |
1557		       gen3_blend_op[blend].src_alpha << 13 |
1558		       op->has_component_alpha << 14 |
1559		       mask->alpha_fixup << 15 |
1560		       mask->rb_reversed << 16);
1561	}
1562	id |= dst_is_alpha << 24;
1563	id |= op->rb_reversed << 25;
1564
1565	if (id == state->last_shader)
1566		return;
1567
1568	state->last_shader = id;
1569
1570	shader_offset = sna->kgem.nbatch++;
1571	t = 0;
1572	switch (src->u.gen3.type) {
1573	case SHADER_NONE:
1574	case SHADER_OPACITY:
1575		assert(0);
1576	case SHADER_ZERO:
1577	case SHADER_BLACK:
1578	case SHADER_WHITE:
1579		break;
1580	case SHADER_CONSTANT:
1581		gen3_fs_dcl(FS_T8);
1582		src_reg = FS_T8;
1583		break;
1584	case SHADER_TEXTURE:
1585	case SHADER_RADIAL:
1586	case SHADER_LINEAR:
1587		gen3_fs_dcl(FS_S0);
1588		gen3_fs_dcl(FS_T0);
1589		t++;
1590		break;
1591	}
1592
1593	if (mask == NULL) {
1594		switch (src->u.gen3.type) {
1595		case SHADER_ZERO:
1596			gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
1597			goto done;
1598		case SHADER_BLACK:
1599			if (dst_is_alpha)
1600				gen3_fs_mov(FS_OC, gen3_fs_operand_one());
1601			else
1602				gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
1603			goto done;
1604		case SHADER_WHITE:
1605			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
1606			goto done;
1607		}
1608		if (src->alpha_fixup && dst_is_alpha) {
1609			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
1610			goto done;
1611		}
1612		/* No mask, so load directly to output color */
1613		if (src->u.gen3.type != SHADER_CONSTANT) {
1614			if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
1615				src_reg = FS_R0;
1616			else
1617				src_reg = FS_OC;
1618		}
1619		switch (src->u.gen3.type) {
1620		case SHADER_LINEAR:
1621			gen3_linear_coord(sna, src, FS_T0, FS_R0);
1622			gen3_fs_texld(src_reg, FS_S0, FS_R0);
1623			break;
1624
1625		case SHADER_RADIAL:
1626			gen3_radial_coord(sna, src, FS_T0, FS_R0);
1627			gen3_fs_texld(src_reg, FS_S0, FS_R0);
1628			break;
1629
1630		case SHADER_TEXTURE:
1631			if (src->is_affine)
1632				gen3_fs_texld(src_reg, FS_S0, FS_T0);
1633			else
1634				gen3_fs_texldp(src_reg, FS_S0, FS_T0);
1635			break;
1636
1637		case SHADER_NONE:
1638		case SHADER_WHITE:
1639		case SHADER_BLACK:
1640		case SHADER_ZERO:
1641			assert(0);
1642		case SHADER_CONSTANT:
1643			break;
1644		}
1645
1646		if (src_reg != FS_OC) {
1647			if (src->alpha_fixup)
1648				gen3_fs_mov(FS_OC,
1649					    src->rb_reversed ^ op->rb_reversed ?
1650					    gen3_fs_operand(src_reg, Z, Y, X, ONE) :
1651					    gen3_fs_operand(src_reg, X, Y, Z, ONE));
1652			else if (dst_is_alpha)
1653				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
1654			else if (src->rb_reversed ^ op->rb_reversed)
1655				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
1656			else
1657				gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
1658		} else if (src->alpha_fixup)
1659			gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
1660	} else {
1661		int out_reg = FS_OC;
1662		if (op->rb_reversed)
1663			out_reg = FS_U0;
1664
1665		switch (mask->u.gen3.type) {
1666		case SHADER_CONSTANT:
1667			gen3_fs_dcl(FS_T9);
1668			mask_reg = FS_T9;
1669			break;
1670		case SHADER_TEXTURE:
1671		case SHADER_LINEAR:
1672		case SHADER_RADIAL:
1673			gen3_fs_dcl(FS_S0 + t);
1674			/* fall through */
1675		case SHADER_OPACITY:
1676			gen3_fs_dcl(FS_T0 + t);
1677			break;
1678		case SHADER_ZERO:
1679		case SHADER_BLACK:
1680			assert(0);
1681		case SHADER_NONE:
1682		case SHADER_WHITE:
1683			break;
1684		}
1685
1686		t = 0;
1687		switch (src->u.gen3.type) {
1688		case SHADER_LINEAR:
1689			gen3_linear_coord(sna, src, FS_T0, FS_R0);
1690			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
1691			src_reg = FS_R0;
1692			t++;
1693			break;
1694
1695		case SHADER_RADIAL:
1696			gen3_radial_coord(sna, src, FS_T0, FS_R0);
1697			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
1698			src_reg = FS_R0;
1699			t++;
1700			break;
1701
1702		case SHADER_TEXTURE:
1703			if (src->is_affine)
1704				gen3_fs_texld(FS_R0, FS_S0, FS_T0);
1705			else
1706				gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
1707			src_reg = FS_R0;
1708			t++;
1709			break;
1710
1711		case SHADER_CONSTANT:
1712		case SHADER_NONE:
1713		case SHADER_ZERO:
1714		case SHADER_BLACK:
1715		case SHADER_WHITE:
1716			break;
1717		}
1718		if (src->alpha_fixup)
1719			gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
1720		if (src->rb_reversed)
1721			gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
1722
1723		switch (mask->u.gen3.type) {
1724		case SHADER_LINEAR:
1725			gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
1726			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
1727			mask_reg = FS_R1;
1728			break;
1729
1730		case SHADER_RADIAL:
1731			gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
1732			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
1733			mask_reg = FS_R1;
1734			break;
1735
1736		case SHADER_TEXTURE:
1737			if (mask->is_affine)
1738				gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
1739			else
1740				gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
1741			mask_reg = FS_R1;
1742			break;
1743
1744		case SHADER_OPACITY:
1745			switch (src->u.gen3.type) {
1746			case SHADER_BLACK:
1747			case SHADER_WHITE:
1748				if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
1749					gen3_fs_mov(out_reg,
1750						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
1751				} else {
1752					gen3_fs_mov(out_reg,
1753						    gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
1754				}
1755				break;
1756			default:
1757				if (dst_is_alpha) {
1758					gen3_fs_mul(out_reg,
1759						    gen3_fs_operand(src_reg, W, W, W, W),
1760						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
1761				} else {
1762					gen3_fs_mul(out_reg,
1763						    gen3_fs_operand(src_reg, X, Y, Z, W),
1764						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
1765				}
1766			}
1767			goto mask_done;
1768
1769		case SHADER_CONSTANT:
1770		case SHADER_ZERO:
1771		case SHADER_BLACK:
1772		case SHADER_WHITE:
1773		case SHADER_NONE:
1774			break;
1775		}
1776		if (mask->alpha_fixup)
1777			gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
1778		if (mask->rb_reversed)
1779			gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
1780
1781		if (dst_is_alpha) {
1782			switch (src->u.gen3.type) {
1783			case SHADER_BLACK:
1784			case SHADER_WHITE:
1785				gen3_fs_mov(out_reg,
1786					    gen3_fs_operand(mask_reg, W, W, W, W));
1787				break;
1788			default:
1789				gen3_fs_mul(out_reg,
1790					    gen3_fs_operand(src_reg, W, W, W, W),
1791					    gen3_fs_operand(mask_reg, W, W, W, W));
1792				break;
1793			}
1794		} else {
1795			/* If component alpha is active in the mask and the blend
1796			 * operation uses the source alpha, then we know we don't
1797			 * need the source value (otherwise we would have hit a
1798			 * fallback earlier), so we provide the source alpha (src.A *
1799			 * mask.X) as output color.
1800			 * Conversely, if CA is set and we don't need the source alpha,
1801			 * then we produce the source value (src.X * mask.X) and the
1802			 * source alpha is unused.  Otherwise, we provide the non-CA
1803			 * source value (src.X * mask.A).
1804			 */
1805			if (op->has_component_alpha) {
1806				switch (src->u.gen3.type) {
1807				case SHADER_BLACK:
1808					if (gen3_blend_op[blend].src_alpha)
1809						gen3_fs_mov(out_reg,
1810							    gen3_fs_operand_reg(mask_reg));
1811					else
1812						gen3_fs_mov(out_reg,
1813							    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
1814					break;
1815				case SHADER_WHITE:
1816					gen3_fs_mov(out_reg,
1817						    gen3_fs_operand_reg(mask_reg));
1818					break;
1819				default:
1820					if (gen3_blend_op[blend].src_alpha)
1821						gen3_fs_mul(out_reg,
1822							    gen3_fs_operand(src_reg, W, W, W, W),
1823							    gen3_fs_operand_reg(mask_reg));
1824					else
1825						gen3_fs_mul(out_reg,
1826							    gen3_fs_operand_reg(src_reg),
1827							    gen3_fs_operand_reg(mask_reg));
1828					break;
1829				}
1830			} else {
1831				switch (src->u.gen3.type) {
1832				case SHADER_WHITE:
1833					gen3_fs_mov(out_reg,
1834						    gen3_fs_operand(mask_reg, W, W, W, W));
1835					break;
1836				case SHADER_BLACK:
1837					gen3_fs_mov(out_reg,
1838						    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
1839					break;
1840				default:
1841					gen3_fs_mul(out_reg,
1842						    gen3_fs_operand_reg(src_reg),
1843						    gen3_fs_operand(mask_reg, W, W, W, W));
1844					break;
1845				}
1846			}
1847		}
1848mask_done:
1849		if (op->rb_reversed)
1850			gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
1851	}
1852
1853done:
1854	length = sna->kgem.nbatch - shader_offset;
1855	sna->kgem.batch[shader_offset] =
1856		_3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
1857}
1858
1859static uint32_t gen3_ms_tiling(uint32_t tiling)
1860{
1861	uint32_t v = 0;
1862	switch (tiling) {
1863	case I915_TILING_Y: v |= MS3_TILE_WALK;
1864	case I915_TILING_X: v |= MS3_TILED_SURFACE;
1865	case I915_TILING_NONE: break;
1866	}
1867	return v;
1868}
1869
1870static void gen3_emit_invariant(struct sna *sna)
1871{
1872	/* Disable independent alpha blend */
1873	OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
1874		  IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
1875		  IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
1876		  IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
1877
1878	OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
1879		  CSB_TCB(0, 0) |
1880		  CSB_TCB(1, 1) |
1881		  CSB_TCB(2, 2) |
1882		  CSB_TCB(3, 3) |
1883		  CSB_TCB(4, 4) |
1884		  CSB_TCB(5, 5) |
1885		  CSB_TCB(6, 6) |
1886		  CSB_TCB(7, 7));
1887
1888	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
1889	OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
1890	OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
1891		  S4_LINE_WIDTH_ONE |
1892		  S4_CULLMODE_NONE |
1893		  S4_VFMT_XY);
1894	OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
1895	OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
1896
1897	OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
1898	OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
1899
1900	OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
1901	OUT_BATCH(0x00000000);
1902
1903	OUT_BATCH(_3DSTATE_STIPPLE);
1904	OUT_BATCH(0x00000000);
1905
1906	sna->render_state.gen3.need_invariant = false;
1907}
1908
1909#define MAX_OBJECTS 3 /* worst case: dst + src + mask  */
1910
1911static void
1912gen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
1913{
1914	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1915
1916	if (!kgem_check_batch(&sna->kgem, 200)) {
1917		DBG(("%s: flushing batch: size %d > %d\n",
1918		     __FUNCTION__, 200,
1919		     sna->kgem.surface-sna->kgem.nbatch));
1920		kgem_submit(&sna->kgem);
1921		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1922	}
1923
1924	if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
1925		DBG(("%s: flushing batch: reloc %d >= %d\n",
1926		     __FUNCTION__,
1927		     sna->kgem.nreloc,
1928		     (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
1929		kgem_submit(&sna->kgem);
1930		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1931	}
1932
1933	if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
1934		DBG(("%s: flushing batch: exec %d >= %d\n",
1935		     __FUNCTION__,
1936		     sna->kgem.nexec,
1937		     (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
1938		kgem_submit(&sna->kgem);
1939		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1940	}
1941
1942	if (sna->render_state.gen3.need_invariant)
1943		gen3_emit_invariant(sna);
1944#undef MAX_OBJECTS
1945}
1946
1947static void gen3_emit_target(struct sna *sna,
1948			     struct kgem_bo *bo,
1949			     int width,
1950			     int height,
1951			     int format)
1952{
1953	struct gen3_render_state *state = &sna->render_state.gen3;
1954
1955	assert(!too_large(width, height));
1956
1957	/* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
1958	assert(bo->unique_id != 0);
1959	if (bo->unique_id != state->current_dst) {
1960		uint32_t v;
1961
1962		DBG(("%s: setting new target id=%d, handle=%d\n",
1963		     __FUNCTION__, bo->unique_id, bo->handle));
1964
1965		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
1966		OUT_BATCH(BUF_3D_ID_COLOR_BACK |
1967			  gen3_buf_tiling(bo->tiling) |
1968			  bo->pitch);
1969		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
1970					 bo,
1971					 I915_GEM_DOMAIN_RENDER << 16 |
1972					 I915_GEM_DOMAIN_RENDER,
1973					 0));
1974
1975		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
1976		OUT_BATCH(gen3_get_dst_format(format));
1977
1978		v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
1979		if (v != state->last_drawrect_limit) {
1980			OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
1981			OUT_BATCH(0); /* XXX dither origin? */
1982			OUT_BATCH(0);
1983			OUT_BATCH(v);
1984			OUT_BATCH(0);
1985			state->last_drawrect_limit = v;
1986		}
1987
1988		state->current_dst = bo->unique_id;
1989	}
1990	assert(bo->exec);
1991	kgem_bo_mark_dirty(bo);
1992}
1993
1994static void gen3_emit_composite_state(struct sna *sna,
1995				      const struct sna_composite_op *op)
1996{
1997	struct gen3_render_state *state = &sna->render_state.gen3;
1998	uint32_t map[4];
1999	uint32_t sampler[4];
2000	struct kgem_bo *bo[2];
2001	unsigned int tex_count, n;
2002	uint32_t ss2;
2003
2004	gen3_get_batch(sna, op);
2005
2006	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
2007		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
2008			OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
2009		else
2010			OUT_BATCH(_3DSTATE_MODES_5_CMD |
2011				  PIPELINE_FLUSH_RENDER_CACHE |
2012				  PIPELINE_FLUSH_TEXTURE_CACHE);
2013		kgem_clear_dirty(&sna->kgem);
2014	}
2015
2016	gen3_emit_target(sna,
2017			 op->dst.bo,
2018			 op->dst.width,
2019			 op->dst.height,
2020			 op->dst.format);
2021
2022	ss2 = ~0;
2023	tex_count = 0;
2024	switch (op->src.u.gen3.type) {
2025	case SHADER_OPACITY:
2026	case SHADER_NONE:
2027		assert(0);
2028	case SHADER_ZERO:
2029	case SHADER_BLACK:
2030	case SHADER_WHITE:
2031		break;
2032	case SHADER_CONSTANT:
2033		if (op->src.u.gen3.mode != state->last_diffuse) {
2034			OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
2035			OUT_BATCH(op->src.u.gen3.mode);
2036			state->last_diffuse = op->src.u.gen3.mode;
2037		}
2038		break;
2039	case SHADER_LINEAR:
2040	case SHADER_RADIAL:
2041	case SHADER_TEXTURE:
2042		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
2043		ss2 |= S2_TEXCOORD_FMT(tex_count,
2044				       op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
2045		assert(op->src.card_format);
2046		map[tex_count * 2 + 0] =
2047			op->src.card_format |
2048			gen3_ms_tiling(op->src.bo->tiling) |
2049			(op->src.height - 1) << MS3_HEIGHT_SHIFT |
2050			(op->src.width - 1) << MS3_WIDTH_SHIFT;
2051		map[tex_count * 2 + 1] =
2052			(op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
2053
2054		sampler[tex_count * 2 + 0] = op->src.filter;
2055		sampler[tex_count * 2 + 1] =
2056			op->src.repeat |
2057			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
2058		bo[tex_count] = op->src.bo;
2059		tex_count++;
2060		break;
2061	}
2062	switch (op->mask.u.gen3.type) {
2063	case SHADER_NONE:
2064	case SHADER_ZERO:
2065	case SHADER_BLACK:
2066	case SHADER_WHITE:
2067		break;
2068	case SHADER_CONSTANT:
2069		if (op->mask.u.gen3.mode != state->last_specular) {
2070			OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
2071			OUT_BATCH(op->mask.u.gen3.mode);
2072			state->last_specular = op->mask.u.gen3.mode;
2073		}
2074		break;
2075	case SHADER_LINEAR:
2076	case SHADER_RADIAL:
2077	case SHADER_TEXTURE:
2078		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
2079		ss2 |= S2_TEXCOORD_FMT(tex_count,
2080				       op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
2081		assert(op->mask.card_format);
2082		map[tex_count * 2 + 0] =
2083			op->mask.card_format |
2084			gen3_ms_tiling(op->mask.bo->tiling) |
2085			(op->mask.height - 1) << MS3_HEIGHT_SHIFT |
2086			(op->mask.width - 1) << MS3_WIDTH_SHIFT;
2087		map[tex_count * 2 + 1] =
2088			(op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
2089
2090		sampler[tex_count * 2 + 0] = op->mask.filter;
2091		sampler[tex_count * 2 + 1] =
2092			op->mask.repeat |
2093			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
2094		bo[tex_count] = op->mask.bo;
2095		tex_count++;
2096		break;
2097	case SHADER_OPACITY:
2098		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
2099		ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
2100		break;
2101	}
2102
2103	{
2104		uint32_t blend_offset = sna->kgem.nbatch;
2105
2106		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
2107		OUT_BATCH(ss2);
2108		OUT_BATCH(gen3_get_blend_cntl(op->op,
2109					      op->has_component_alpha,
2110					      op->dst.format));
2111
2112		if (memcmp(sna->kgem.batch + state->last_blend + 1,
2113			   sna->kgem.batch + blend_offset + 1,
2114			   2 * 4) == 0)
2115			sna->kgem.nbatch = blend_offset;
2116		else
2117			state->last_blend = blend_offset;
2118	}
2119
2120	if (op->u.gen3.num_constants) {
2121		int count = op->u.gen3.num_constants;
2122		if (state->last_constants) {
2123			int last = sna->kgem.batch[state->last_constants+1];
2124			if (last == (1 << (count >> 2)) - 1 &&
2125			    memcmp(&sna->kgem.batch[state->last_constants+2],
2126				   op->u.gen3.constants,
2127				   count * sizeof(uint32_t)) == 0)
2128				count = 0;
2129		}
2130		if (count) {
2131			state->last_constants = sna->kgem.nbatch;
2132			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
2133			OUT_BATCH((1 << (count >> 2)) - 1);
2134
2135			memcpy(sna->kgem.batch + sna->kgem.nbatch,
2136			       op->u.gen3.constants,
2137			       count * sizeof(uint32_t));
2138			sna->kgem.nbatch += count;
2139		}
2140	}
2141
2142	if (tex_count != 0) {
2143		uint32_t rewind;
2144
2145		n = 0;
2146		if (tex_count == state->tex_count) {
2147			for (; n < tex_count; n++) {
2148				if (map[2*n+0] != state->tex_map[2*n+0] ||
2149				    map[2*n+1] != state->tex_map[2*n+1] ||
2150				    state->tex_handle[n] != bo[n]->handle ||
2151				    state->tex_delta[n] != bo[n]->delta)
2152					break;
2153			}
2154		}
2155		if (n < tex_count) {
2156			OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
2157			OUT_BATCH((1 << tex_count) - 1);
2158			for (n = 0; n < tex_count; n++) {
2159				OUT_BATCH(kgem_add_reloc(&sna->kgem,
2160							 sna->kgem.nbatch,
2161							 bo[n],
2162							 I915_GEM_DOMAIN_SAMPLER<< 16,
2163							 0));
2164				OUT_BATCH(map[2*n + 0]);
2165				OUT_BATCH(map[2*n + 1]);
2166
2167				state->tex_map[2*n+0] = map[2*n+0];
2168				state->tex_map[2*n+1] = map[2*n+1];
2169				state->tex_handle[n] = bo[n]->handle;
2170				state->tex_delta[n] = bo[n]->delta;
2171			}
2172			state->tex_count = n;
2173		}
2174
2175		rewind = sna->kgem.nbatch;
2176		OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
2177		OUT_BATCH((1 << tex_count) - 1);
2178		for (n = 0; n < tex_count; n++) {
2179			OUT_BATCH(sampler[2*n + 0]);
2180			OUT_BATCH(sampler[2*n + 1]);
2181			OUT_BATCH(0);
2182		}
2183		if (state->last_sampler &&
2184		    memcmp(&sna->kgem.batch[state->last_sampler+1],
2185			   &sna->kgem.batch[rewind + 1],
2186			   (3*tex_count + 1)*sizeof(uint32_t)) == 0)
2187			sna->kgem.nbatch = rewind;
2188		else
2189			state->last_sampler = rewind;
2190	}
2191
2192	gen3_composite_emit_shader(sna, op, op->op);
2193}
2194
2195static bool gen3_magic_ca_pass(struct sna *sna,
2196			       const struct sna_composite_op *op)
2197{
2198	if (!op->need_magic_ca_pass)
2199		return false;
2200
2201	DBG(("%s(%d)\n", __FUNCTION__,
2202	     sna->render.vertex_index - sna->render.vertex_start));
2203
2204	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
2205	OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
2206	gen3_composite_emit_shader(sna, op, PictOpAdd);
2207
2208	OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
2209		  (sna->render.vertex_index - sna->render.vertex_start));
2210	OUT_BATCH(sna->render.vertex_start);
2211
2212	sna->render_state.gen3.last_blend = 0;
2213	return true;
2214}
2215
2216static void gen3_vertex_flush(struct sna *sna)
2217{
2218	assert(sna->render.vertex_offset);
2219
2220	DBG(("%s[%x] = %d\n", __FUNCTION__,
2221	     4*sna->render.vertex_offset,
2222	     sna->render.vertex_index - sna->render.vertex_start));
2223
2224	sna->kgem.batch[sna->render.vertex_offset] =
2225		PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
2226		(sna->render.vertex_index - sna->render.vertex_start);
2227	sna->kgem.batch[sna->render.vertex_offset + 1] =
2228		sna->render.vertex_start;
2229
2230	sna->render.vertex_offset = 0;
2231}
2232
2233static int gen3_vertex_finish(struct sna *sna)
2234{
2235	struct kgem_bo *bo;
2236
2237	DBG(("%s: used=%d/%d, vbo active? %d\n",
2238	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
2239	     sna->render.vbo ? sna->render.vbo->handle : 0));
2240	assert(sna->render.vertex_offset == 0);
2241	assert(sna->render.vertex_used);
2242	assert(sna->render.vertex_used <= sna->render.vertex_size);
2243
2244	sna_vertex_wait__locked(&sna->render);
2245
2246	bo = sna->render.vbo;
2247	if (bo) {
2248		DBG(("%s: reloc = %d\n", __FUNCTION__,
2249		     sna->render.vertex_reloc[0]));
2250
2251		if (sna->render.vertex_reloc[0]) {
2252			sna->kgem.batch[sna->render.vertex_reloc[0]] =
2253				kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
2254					       bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
2255
2256			sna->render.vertex_reloc[0] = 0;
2257		}
2258		sna->render.vertex_used = 0;
2259		sna->render.vertex_index = 0;
2260		sna->render.vbo = NULL;
2261
2262		kgem_bo_destroy(&sna->kgem, bo);
2263	}
2264
2265	sna->render.vertices = NULL;
2266	sna->render.vbo = kgem_create_linear(&sna->kgem,
2267					     256*1024, CREATE_GTT_MAP);
2268	if (sna->render.vbo)
2269		sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
2270	if (sna->render.vertices == NULL) {
2271		if (sna->render.vbo)
2272			kgem_bo_destroy(&sna->kgem, sna->render.vbo);
2273		sna->render.vbo = NULL;
2274		return 0;
2275	}
2276	assert(sna->render.vbo->snoop == false);
2277
2278	if (sna->render.vertex_used) {
2279		memcpy(sna->render.vertices,
2280		       sna->render.vertex_data,
2281		       sizeof(float)*sna->render.vertex_used);
2282	}
2283	sna->render.vertex_size = 64 * 1024 - 1;
2284	return sna->render.vertex_size - sna->render.vertex_used;
2285}
2286
2287static void gen3_vertex_close(struct sna *sna)
2288{
2289	struct kgem_bo *bo, *free_bo = NULL;
2290	unsigned int delta = 0;
2291
2292	assert(sna->render.vertex_offset == 0);
2293	if (sna->render.vertex_reloc[0] == 0)
2294		return;
2295
2296	DBG(("%s: used=%d/%d, vbo active? %d\n",
2297	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
2298	     sna->render.vbo ? sna->render.vbo->handle : 0));
2299
2300	bo = sna->render.vbo;
2301	if (bo) {
2302		if (sna->render.vertex_size - sna->render.vertex_used < 64) {
2303			DBG(("%s: discarding full vbo\n", __FUNCTION__));
2304			sna->render.vbo = NULL;
2305			sna->render.vertices = sna->render.vertex_data;
2306			sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
2307			free_bo = bo;
2308		} else if (sna->render.vertices == MAP(bo->map__cpu)) {
2309			DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
2310			sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
2311			if (sna->render.vertices == NULL) {
2312				DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
2313				sna->render.vbo = NULL;
2314				sna->render.vertices = sna->render.vertex_data;
2315				sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
2316				free_bo = bo;
2317			}
2318		}
2319	} else {
2320		if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
2321			DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
2322			     sna->render.vertex_used, sna->kgem.nbatch));
2323			memcpy(sna->kgem.batch + sna->kgem.nbatch,
2324			       sna->render.vertex_data,
2325			       sna->render.vertex_used * 4);
2326			delta = sna->kgem.nbatch * 4;
2327			bo = NULL;
2328			sna->kgem.nbatch += sna->render.vertex_used;
2329		} else {
2330			DBG(("%s: new vbo: %d\n", __FUNCTION__,
2331			     sna->render.vertex_used));
2332			bo = kgem_create_linear(&sna->kgem,
2333						4*sna->render.vertex_used,
2334						CREATE_NO_THROTTLE);
2335			if (bo) {
2336				assert(bo->snoop == false);
2337				kgem_bo_write(&sna->kgem, bo,
2338					      sna->render.vertex_data,
2339					      4*sna->render.vertex_used);
2340			}
2341			free_bo = bo;
2342		}
2343	}
2344
2345	DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
2346	sna->kgem.batch[sna->render.vertex_reloc[0]] =
2347		kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
2348			       bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
2349	sna->render.vertex_reloc[0] = 0;
2350
2351	if (sna->render.vbo == NULL) {
2352		DBG(("%s: resetting vbo\n", __FUNCTION__));
2353		sna->render.vertex_used = 0;
2354		sna->render.vertex_index = 0;
2355		assert(sna->render.vertices == sna->render.vertex_data);
2356		assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
2357	}
2358
2359	if (free_bo)
2360		kgem_bo_destroy(&sna->kgem, free_bo);
2361}
2362
2363static bool gen3_rectangle_begin(struct sna *sna,
2364				 const struct sna_composite_op *op)
2365{
2366	struct gen3_render_state *state = &sna->render_state.gen3;
2367	int ndwords, i1_cmd = 0, i1_len = 0;
2368
2369	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
2370		return true;
2371
2372	ndwords = 2;
2373	if (op->need_magic_ca_pass)
2374		ndwords += 100;
2375	if (sna->render.vertex_reloc[0] == 0)
2376		i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
2377	if (state->floats_per_vertex != op->floats_per_vertex)
2378		i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
2379
2380	if (!kgem_check_batch(&sna->kgem, ndwords+1))
2381		return false;
2382
2383	if (i1_cmd) {
2384		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
2385		if (sna->render.vertex_reloc[0] == 0)
2386			sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
2387		if (state->floats_per_vertex != op->floats_per_vertex) {
2388			state->floats_per_vertex = op->floats_per_vertex;
2389			OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
2390				  state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
2391		}
2392	}
2393
2394	if (sna->kgem.nbatch == 2 + state->last_vertex_offset &&
2395	    !op->need_magic_ca_pass) {
2396		sna->render.vertex_offset = state->last_vertex_offset;
2397	} else {
2398		sna->render.vertex_offset = sna->kgem.nbatch;
2399		OUT_BATCH(MI_NOOP); /* to be filled later */
2400		OUT_BATCH(MI_NOOP);
2401		sna->render.vertex_start = sna->render.vertex_index;
2402		state->last_vertex_offset = sna->render.vertex_offset;
2403	}
2404
2405	return true;
2406}
2407
2408static int gen3_get_rectangles__flush(struct sna *sna,
2409				      const struct sna_composite_op *op)
2410{
2411	/* Preventing discarding new vbo after lock contention */
2412	if (sna_vertex_wait__locked(&sna->render)) {
2413		int rem = vertex_space(sna);
2414		if (rem > op->floats_per_rect)
2415			return rem;
2416	}
2417
2418	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
2419		return 0;
2420	if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
2421		return 0;
2422
2423	if (sna->render.vertex_offset) {
2424		gen3_vertex_flush(sna);
2425		if (gen3_magic_ca_pass(sna, op)) {
2426			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
2427			OUT_BATCH(gen3_get_blend_cntl(op->op,
2428						      op->has_component_alpha,
2429						      op->dst.format));
2430			gen3_composite_emit_shader(sna, op, op->op);
2431		}
2432	}
2433
2434	return gen3_vertex_finish(sna);
2435}
2436
2437inline static int gen3_get_rectangles(struct sna *sna,
2438				      const struct sna_composite_op *op,
2439				      int want)
2440{
2441	int rem;
2442
2443	DBG(("%s: want=%d, rem=%d\n",
2444	     __FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
2445
2446	assert(want);
2447	assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
2448
2449start:
2450	rem = vertex_space(sna);
2451	if (unlikely(op->floats_per_rect > rem)) {
2452		DBG(("flushing vbo for %s: %d < %d\n",
2453		     __FUNCTION__, rem, op->floats_per_rect));
2454		rem = gen3_get_rectangles__flush(sna, op);
2455		if (unlikely(rem == 0))
2456			goto flush;
2457	}
2458
2459	if (unlikely(sna->render.vertex_offset == 0)) {
2460		if (!gen3_rectangle_begin(sna, op))
2461			goto flush;
2462		else
2463			goto start;
2464	}
2465
2466	assert(rem <= vertex_space(sna));
2467	assert(op->floats_per_rect <= rem);
2468	if (want > 1 && want * op->floats_per_rect > rem)
2469		want = rem / op->floats_per_rect;
2470	sna->render.vertex_index += 3*want;
2471
2472	assert(want);
2473	assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
2474	return want;
2475
2476flush:
2477	DBG(("%s: flushing batch\n", __FUNCTION__));
2478	if (sna->render.vertex_offset) {
2479		gen3_vertex_flush(sna);
2480		gen3_magic_ca_pass(sna, op);
2481	}
2482	sna_vertex_wait__locked(&sna->render);
2483	_kgem_submit(&sna->kgem);
2484	gen3_emit_composite_state(sna, op);
2485	assert(sna->render.vertex_offset == 0);
2486	assert(sna->render.vertex_reloc[0] == 0);
2487	goto start;
2488}
2489
2490fastcall static void
2491gen3_render_composite_blt(struct sna *sna,
2492			  const struct sna_composite_op *op,
2493			  const struct sna_composite_rectangles *r)
2494{
2495	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
2496	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
2497	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
2498	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
2499	     r->width, r->height));
2500
2501	gen3_get_rectangles(sna, op, 1);
2502
2503	op->prim_emit(sna, op, r);
2504}
2505
2506fastcall static void
2507gen3_render_composite_box(struct sna *sna,
2508			  const struct sna_composite_op *op,
2509			  const BoxRec *box)
2510{
2511	struct sna_composite_rectangles r;
2512
2513	DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
2514	     __FUNCTION__,
2515	     op->src.offset[0], op->src.offset[1],
2516	     op->mask.offset[0], op->mask.offset[1],
2517	     op->dst.x, op->dst.y));
2518
2519	gen3_get_rectangles(sna, op, 1);
2520
2521	r.dst.x  = box->x1;
2522	r.dst.y  = box->y1;
2523	r.width  = box->x2 - box->x1;
2524	r.height = box->y2 - box->y1;
2525	r.src = r.mask = r.dst;
2526
2527	op->prim_emit(sna, op, &r);
2528}
2529
2530static void
2531gen3_render_composite_boxes__blt(struct sna *sna,
2532				 const struct sna_composite_op *op,
2533				 const BoxRec *box, int nbox)
2534{
2535	DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
2536	     __FUNCTION__, nbox,
2537	     op->src.offset[0], op->src.offset[1],
2538	     op->mask.offset[0], op->mask.offset[1],
2539	     op->dst.x, op->dst.y));
2540
2541	do {
2542		int nbox_this_time;
2543
2544		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
2545		nbox -= nbox_this_time;
2546
2547		do {
2548			struct sna_composite_rectangles r;
2549
2550			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2551			     box->x1, box->y1,
2552			     box->x2 - box->x1,
2553			     box->y2 - box->y1));
2554
2555			r.dst.x  = box->x1; r.dst.y  = box->y1;
2556			r.width = box->x2 - box->x1;
2557			r.height = box->y2 - box->y1;
2558			r.src = r.mask = r.dst;
2559
2560			op->prim_emit(sna, op, &r);
2561			box++;
2562		} while (--nbox_this_time);
2563	} while (nbox);
2564}
2565
2566static void
2567gen3_render_composite_boxes(struct sna *sna,
2568			    const struct sna_composite_op *op,
2569			    const BoxRec *box, int nbox)
2570{
2571	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
2572
2573	do {
2574		int nbox_this_time;
2575		float *v;
2576
2577		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
2578		assert(nbox_this_time);
2579		nbox -= nbox_this_time;
2580
2581		v = sna->render.vertices + sna->render.vertex_used;
2582		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
2583
2584		op->emit_boxes(op, box, nbox_this_time, v);
2585		box += nbox_this_time;
2586	} while (nbox);
2587}
2588
2589static void
2590gen3_render_composite_boxes__thread(struct sna *sna,
2591				    const struct sna_composite_op *op,
2592				    const BoxRec *box, int nbox)
2593{
2594	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
2595
2596	sna_vertex_lock(&sna->render);
2597	do {
2598		int nbox_this_time;
2599		float *v;
2600
2601		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
2602		assert(nbox_this_time);
2603		nbox -= nbox_this_time;
2604
2605		v = sna->render.vertices + sna->render.vertex_used;
2606		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
2607
2608		sna_vertex_acquire__locked(&sna->render);
2609		sna_vertex_unlock(&sna->render);
2610
2611		op->emit_boxes(op, box, nbox_this_time, v);
2612		box += nbox_this_time;
2613
2614		sna_vertex_lock(&sna->render);
2615		sna_vertex_release__locked(&sna->render);
2616	} while (nbox);
2617	sna_vertex_unlock(&sna->render);
2618}
2619
2620static void
2621gen3_render_composite_done(struct sna *sna,
2622			   const struct sna_composite_op *op)
2623{
2624	DBG(("%s()\n", __FUNCTION__));
2625
2626	if (sna->render.vertex_offset) {
2627		gen3_vertex_flush(sna);
2628		gen3_magic_ca_pass(sna, op);
2629	}
2630
2631	if (op->mask.bo)
2632		kgem_bo_destroy(&sna->kgem, op->mask.bo);
2633	if (op->src.bo)
2634		kgem_bo_destroy(&sna->kgem, op->src.bo);
2635
2636	sna_render_composite_redirect_done(sna, op);
2637}
2638
2639static void
2640discard_vbo(struct sna *sna)
2641{
2642	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
2643	sna->render.vbo = NULL;
2644	sna->render.vertices = sna->render.vertex_data;
2645	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
2646	sna->render.vertex_used = 0;
2647	sna->render.vertex_index = 0;
2648}
2649
2650static void
2651gen3_render_reset(struct sna *sna)
2652{
2653	struct gen3_render_state *state = &sna->render_state.gen3;
2654
2655	state->need_invariant = true;
2656	state->current_dst = 0;
2657	state->tex_count = 0;
2658	state->last_drawrect_limit = ~0U;
2659	state->last_target = 0;
2660	state->last_blend = 0;
2661	state->last_constants = 0;
2662	state->last_sampler = 0;
2663	state->last_shader = 0x7fffffff;
2664	state->last_diffuse = 0xcc00ffee;
2665	state->last_specular = 0xcc00ffee;
2666
2667	state->floats_per_vertex = 0;
2668	state->last_floats_per_vertex = 0;
2669	state->last_vertex_offset = 0;
2670
2671	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
2672		DBG(("%s: discarding vbo as next access will stall: %lx\n",
2673		     __FUNCTION__, (long)sna->render.vbo->presumed_offset));
2674		discard_vbo(sna);
2675	}
2676
2677	sna->render.vertex_reloc[0] = 0;
2678	sna->render.vertex_offset = 0;
2679}
2680
2681static void
2682gen3_render_retire(struct kgem *kgem)
2683{
2684	struct sna *sna;
2685
2686	sna = container_of(kgem, struct sna, kgem);
2687	if (sna->render.vertex_reloc[0] == 0 &&
2688	    sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
2689		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
2690		sna->render.vertex_used = 0;
2691		sna->render.vertex_index = 0;
2692	}
2693}
2694
2695static void
2696gen3_render_expire(struct kgem *kgem)
2697{
2698	struct sna *sna;
2699
2700	sna = container_of(kgem, struct sna, kgem);
2701	if (sna->render.vbo && !sna->render.vertex_used) {
2702		DBG(("%s: discarding vbo\n", __FUNCTION__));
2703		discard_vbo(sna);
2704	}
2705}
2706
2707static bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
2708					      CARD32 format)
2709{
2710	unsigned int i;
2711
2712	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
2713		if (gen3_tex_formats[i].fmt == format) {
2714			channel->card_format = gen3_tex_formats[i].card_fmt;
2715			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
2716			return true;
2717		}
2718	}
2719	return false;
2720}
2721
2722static bool source_is_covered(PicturePtr picture,
2723			      int x, int y,
2724			      int width, int height)
2725{
2726	int x1, y1, x2, y2;
2727
2728	if (picture->repeat && picture->repeatType != RepeatNone)
2729		return true;
2730
2731	if (picture->pDrawable == NULL)
2732		return false;
2733
2734	if (picture->transform) {
2735		pixman_box16_t sample;
2736
2737		sample.x1 = x;
2738		sample.y1 = y;
2739		sample.x2 = x + width;
2740		sample.y2 = y + height;
2741
2742		pixman_transform_bounds(picture->transform, &sample);
2743
2744		x1 = sample.x1;
2745		x2 = sample.x2;
2746		y1 = sample.y1;
2747		y2 = sample.y2;
2748	} else {
2749		x1 = x;
2750		y1 = y;
2751		x2 = x + width;
2752		y2 = y + height;
2753	}
2754
2755	return
2756		x1 >= 0 && y1 >= 0 &&
2757		x2 <= picture->pDrawable->width &&
2758		y2 <= picture->pDrawable->height;
2759}
2760
2761static bool gen3_composite_channel_set_xformat(PicturePtr picture,
2762					       struct sna_composite_channel *channel,
2763					       int x, int y,
2764					       int width, int height)
2765{
2766	unsigned int i;
2767
2768	if (PICT_FORMAT_A(picture->format) != 0)
2769		return false;
2770
2771	if (width == 0 || height == 0)
2772		return false;
2773
2774	if (!source_is_covered(picture, x, y, width, height))
2775		return false;
2776
2777	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
2778		if (gen3_tex_formats[i].xfmt == picture->format) {
2779			channel->card_format = gen3_tex_formats[i].card_fmt;
2780			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
2781			channel->alpha_fixup = true;
2782			return true;
2783		}
2784	}
2785
2786	return false;
2787}
2788
2789static int
2790gen3_init_solid(struct sna_composite_channel *channel, uint32_t color)
2791{
2792	channel->u.gen3.mode = color;
2793	channel->u.gen3.type = SHADER_CONSTANT;
2794	if (color == 0)
2795		channel->u.gen3.type = SHADER_ZERO;
2796	else if (color == 0xff000000)
2797		channel->u.gen3.type = SHADER_BLACK;
2798	else if (color == 0xffffffff)
2799		channel->u.gen3.type = SHADER_WHITE;
2800
2801	channel->bo = NULL;
2802	channel->is_opaque = (color >> 24) == 0xff;
2803	channel->is_affine = 1;
2804	channel->alpha_fixup = 0;
2805	channel->rb_reversed = 0;
2806
2807	DBG(("%s: color=%08x, is_opaque=%d, type=%d\n",
2808	     __FUNCTION__, color, channel->is_opaque, channel->u.gen3.type));
2809
2810	/* for consistency */
2811	channel->repeat = RepeatNormal;
2812	channel->filter = PictFilterNearest;
2813	channel->pict_format = PICT_a8r8g8b8;
2814	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
2815
2816	return 1;
2817}
2818
2819static void gen3_composite_channel_convert(struct sna_composite_channel *channel)
2820{
2821	if (channel->u.gen3.type == SHADER_TEXTURE)
2822		channel->repeat = gen3_texture_repeat(channel->repeat);
2823	else
2824		channel->repeat = gen3_gradient_repeat(channel->repeat);
2825
2826	channel->filter = gen3_filter(channel->filter);
2827	if (channel->card_format == 0)
2828		gen3_composite_channel_set_format(channel, channel->pict_format);
2829	assert(channel->card_format);
2830}
2831
2832static bool gen3_gradient_setup(struct sna *sna,
2833				PicturePtr picture,
2834				struct sna_composite_channel *channel,
2835				int16_t ox, int16_t oy)
2836{
2837	int16_t dx, dy;
2838
2839	if (picture->repeat == 0) {
2840		channel->repeat = RepeatNone;
2841	} else switch (picture->repeatType) {
2842	case RepeatNone:
2843	case RepeatNormal:
2844	case RepeatPad:
2845	case RepeatReflect:
2846		channel->repeat = picture->repeatType;
2847		break;
2848	default:
2849		return false;
2850	}
2851
2852	channel->bo =
2853		sna_render_get_gradient(sna,
2854					(PictGradient *)picture->pSourcePict);
2855	if (channel->bo == NULL)
2856		return false;
2857
2858	channel->pict_format = PICT_a8r8g8b8;
2859	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
2860	channel->filter = PictFilterNearest;
2861	channel->is_affine = sna_transform_is_affine(picture->transform);
2862	if (sna_transform_is_imprecise_integer_translation(picture->transform, PictFilterNearest, false, &dx, &dy)) {
2863		DBG(("%s: integer translation (%d, %d), removing\n",
2864		     __FUNCTION__, dx, dy));
2865		ox += dx;
2866		oy += dy;
2867		channel->transform = NULL;
2868	} else
2869		channel->transform = picture->transform;
2870	channel->width  = channel->bo->pitch / 4;
2871	channel->height = 1;
2872	channel->offset[0] = ox;
2873	channel->offset[1] = oy;
2874	channel->scale[0] = channel->scale[1] = 1;
2875	return true;
2876}
2877
2878static int
2879gen3_init_linear(struct sna *sna,
2880		 PicturePtr picture,
2881		 struct sna_composite_op *op,
2882		 struct sna_composite_channel *channel,
2883		 int ox, int oy)
2884{
2885	PictLinearGradient *linear =
2886		(PictLinearGradient *)picture->pSourcePict;
2887	float x0, y0, sf;
2888	float dx, dy, offset;
2889	int n;
2890
2891	DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
2892	     __FUNCTION__,
2893	     xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
2894	     xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
2895
2896	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
2897		return 0;
2898
2899	dx = xFixedToDouble(linear->p2.x - linear->p1.x);
2900	dy = xFixedToDouble(linear->p2.y - linear->p1.y);
2901	sf = dx*dx + dy*dy;
2902	dx /= sf;
2903	dy /= sf;
2904
2905	x0 = xFixedToDouble(linear->p1.x);
2906	y0 = xFixedToDouble(linear->p1.y);
2907	offset = dx*x0 + dy*y0;
2908
2909	n = op->u.gen3.num_constants;
2910	channel->u.gen3.constants = FS_C0 + n / 4;
2911	op->u.gen3.constants[n++] = dx;
2912	op->u.gen3.constants[n++] = dy;
2913	op->u.gen3.constants[n++] = -offset;
2914	op->u.gen3.constants[n++] = 0;
2915
2916	if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
2917		return -1;
2918
2919	channel->u.gen3.type = SHADER_LINEAR;
2920	op->u.gen3.num_constants = n;
2921
2922	DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n",
2923	     __FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0));
2924	return 1;
2925}
2926
2927static int
2928gen3_init_radial(struct sna *sna,
2929		 PicturePtr picture,
2930		 struct sna_composite_op *op,
2931		 struct sna_composite_channel *channel,
2932		 int ox, int oy)
2933{
2934	PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict;
2935	double dx, dy, dr, r1;
2936	int n;
2937
2938	dx = xFixedToDouble(radial->c2.x - radial->c1.x);
2939	dy = xFixedToDouble(radial->c2.y - radial->c1.y);
2940	dr = xFixedToDouble(radial->c2.radius - radial->c1.radius);
2941
2942	r1 = xFixedToDouble(radial->c1.radius);
2943
2944	n = op->u.gen3.num_constants;
2945	channel->u.gen3.constants = FS_C0 + n / 4;
2946	if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
2947		if (radial->c2.radius == radial->c1.radius) {
2948			channel->u.gen3.type = SHADER_ZERO;
2949			return 1;
2950		}
2951
2952		op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr;
2953		op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr;
2954		op->u.gen3.constants[n++] = 1. / dr;
2955		op->u.gen3.constants[n++] = -r1 / dr;
2956
2957		channel->u.gen3.mode = RADIAL_ONE;
2958	} else {
2959		op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x);
2960		op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y);
2961		op->u.gen3.constants[n++] = r1;
2962		op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr);
2963
2964		op->u.gen3.constants[n++] = -2 * dx;
2965		op->u.gen3.constants[n++] = -2 * dy;
2966		op->u.gen3.constants[n++] = -2 * r1 * dr;
2967		op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
2968
2969		channel->u.gen3.mode = RADIAL_TWO;
2970	}
2971
2972	if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
2973		return -1;
2974
2975	channel->u.gen3.type = SHADER_RADIAL;
2976	op->u.gen3.num_constants = n;
2977	return 1;
2978}
2979
2980static bool
2981sna_picture_is_clear(PicturePtr picture,
2982		     int x, int y, int w, int h,
2983		     uint32_t *color)
2984{
2985	struct sna_pixmap *priv;
2986
2987	if (!picture->pDrawable)
2988		return false;
2989
2990	priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable));
2991	if (priv == NULL || !priv->clear)
2992		return false;
2993
2994	if (!source_is_covered(picture, x, y, w, h))
2995		return false;
2996
2997	*color = priv->clear_color;
2998	return true;
2999}
3000
3001static int
3002gen3_composite_picture(struct sna *sna,
3003		       PicturePtr picture,
3004		       struct sna_composite_op *op,
3005		       struct sna_composite_channel *channel,
3006		       int16_t x, int16_t y,
3007		       int16_t w, int16_t h,
3008		       int16_t dst_x, int16_t dst_y,
3009		       bool precise)
3010{
3011	PixmapPtr pixmap;
3012	uint32_t color;
3013	int16_t dx, dy;
3014
3015	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
3016	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
3017
3018	channel->card_format = 0;
3019
3020	if (picture->pDrawable == NULL) {
3021		SourcePict *source = picture->pSourcePict;
3022		int ret = -1;
3023
3024		switch (source->type) {
3025		case SourcePictTypeSolidFill:
3026			DBG(("%s: solid fill [%08x], format %08x\n",
3027			     __FUNCTION__,
3028			     (unsigned)source->solidFill.color,
3029			     (unsigned)picture->format));
3030			ret = gen3_init_solid(channel, source->solidFill.color);
3031			break;
3032
3033		case SourcePictTypeLinear:
3034			ret = gen3_init_linear(sna, picture, op, channel,
3035					       x - dst_x, y - dst_y);
3036			break;
3037
3038		case SourcePictTypeRadial:
3039			ret = gen3_init_radial(sna, picture, op, channel,
3040					       x - dst_x, y - dst_y);
3041			break;
3042		}
3043
3044		if (ret == -1) {
3045			if (!precise)
3046				ret = sna_render_picture_approximate_gradient(sna, picture, channel,
3047									      x, y, w, h, dst_x, dst_y);
3048			if (ret == -1)
3049				ret = sna_render_picture_fixup(sna, picture, channel,
3050							       x, y, w, h, dst_x, dst_y);
3051		}
3052		return ret;
3053	}
3054
3055	if (picture->alphaMap) {
3056		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
3057		return sna_render_picture_fixup(sna, picture, channel,
3058						x, y, w, h, dst_x, dst_y);
3059	}
3060
3061	if (sna_picture_is_solid(picture, &color)) {
3062		DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
3063		return gen3_init_solid(channel, color);
3064	}
3065
3066	if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
3067		DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
3068		return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
3069	}
3070
3071	if (!gen3_check_repeat(picture))
3072		return sna_render_picture_fixup(sna, picture, channel,
3073						x, y, w, h, dst_x, dst_y);
3074
3075	if (!gen3_check_filter(picture))
3076		return sna_render_picture_fixup(sna, picture, channel,
3077						x, y, w, h, dst_x, dst_y);
3078
3079	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
3080	channel->filter = picture->filter;
3081	channel->pict_format = picture->format;
3082
3083	pixmap = get_drawable_pixmap(picture->pDrawable);
3084	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
3085
3086	x += dx + picture->pDrawable->x;
3087	y += dy + picture->pDrawable->y;
3088
3089	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
3090		DBG(("%s: integer translation (%d, %d), removing\n",
3091		     __FUNCTION__, dx, dy));
3092		x += dx;
3093		y += dy;
3094		channel->transform = NULL;
3095		channel->filter = PictFilterNearest;
3096
3097		if (channel->repeat ||
3098		    (x >= 0 &&
3099		     y >= 0 &&
3100		     x + w < pixmap->drawable.width &&
3101		     y + h < pixmap->drawable.height)) {
3102			struct sna_pixmap *priv = sna_pixmap(pixmap);
3103			if (priv && priv->clear) {
3104				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
3105				return gen3_init_solid(channel, priv->clear_color);
3106			}
3107		}
3108	} else {
3109		channel->transform = picture->transform;
3110		channel->is_affine = sna_transform_is_affine(picture->transform);
3111	}
3112
3113	if (!gen3_composite_channel_set_format(channel, picture->format) &&
3114	    !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
3115		return sna_render_picture_convert(sna, picture, channel, pixmap,
3116						  x, y, w, h, dst_x, dst_y,
3117						  false);
3118	assert(channel->card_format);
3119
3120	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
3121		DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
3122		     __FUNCTION__,
3123		     pixmap->drawable.width, pixmap->drawable.height,
3124		     x, y, w, h));
3125		return sna_render_picture_extract(sna, picture, channel,
3126						  x, y, w, h, dst_x, dst_y);
3127	}
3128
3129	return sna_render_pixmap_bo(sna, channel, pixmap,
3130				    x, y, w, h, dst_x, dst_y);
3131}
3132
3133static void
3134gen3_align_vertex(struct sna *sna,
3135		  const struct sna_composite_op *op)
3136{
3137	int vertex_index;
3138
3139	if (op->floats_per_vertex == sna->render_state.gen3.last_floats_per_vertex)
3140		return;
3141
3142	DBG(("aligning vertex: was %d, now %d floats per vertex\n",
3143	     sna->render_state.gen3.last_floats_per_vertex,
3144	     op->floats_per_vertex));
3145
3146	assert(op->floats_per_rect == 3*op->floats_per_vertex);
3147
3148	vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
3149	if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) {
3150		DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n",
3151		     __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex));
3152		if (gen3_vertex_finish(sna) < 2*op->floats_per_vertex)
3153			kgem_submit(&sna->kgem);
3154
3155		vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
3156	}
3157
3158	sna->render.vertex_index = vertex_index;
3159	sna->render.vertex_used = vertex_index * op->floats_per_vertex;
3160}
3161
3162static bool
3163gen3_composite_set_target(struct sna *sna,
3164			  struct sna_composite_op *op,
3165			  PicturePtr dst,
3166			  int x, int y, int w, int h,
3167			  bool partial)
3168{
3169	BoxRec box;
3170	unsigned hint;
3171
3172	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
3173	op->dst.format = dst->format;
3174	op->dst.width = op->dst.pixmap->drawable.width;
3175	op->dst.height = op->dst.pixmap->drawable.height;
3176
3177	if (w && h) {
3178		box.x1 = x;
3179		box.y1 = y;
3180		box.x2 = x + w;
3181		box.y2 = y + h;
3182	} else
3183		sna_render_picture_extents(dst, &box);
3184
3185	hint = PREFER_GPU | FORCE_GPU | RENDER_GPU;
3186	if (!partial) {
3187		hint |= IGNORE_DAMAGE;
3188		if (w == op->dst.width && h == op->dst.height)
3189			hint |= REPLACES;
3190	}
3191
3192	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
3193	if (op->dst.bo == NULL)
3194		return false;
3195
3196	if (hint & REPLACES) {
3197		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
3198		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
3199	}
3200
3201	assert(op->dst.bo->unique_id);
3202
3203	/* For single-stream mode there should be no minimum alignment
3204	 * required, except that the width must be at least 2 elements.
3205	 * Furthermore, it appears that the pitch must be a multiple of
3206	 * 2 elements.
3207	 */
3208	if (op->dst.bo->pitch & ((2*op->dst.pixmap->drawable.bitsPerPixel >> 3) - 1))
3209		return false;
3210
3211	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
3212			    &op->dst.x, &op->dst.y);
3213
3214	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
3215	     __FUNCTION__,
3216	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
3217	     op->dst.width, op->dst.height,
3218	     op->dst.bo->pitch,
3219	     op->dst.x, op->dst.y,
3220	     op->damage ? *op->damage : (void *)-1));
3221
3222	assert(op->dst.bo->proxy == NULL);
3223
3224	if ((too_large(op->dst.width, op->dst.height) ||
3225	     !gen3_check_pitch_3d(op->dst.bo)) &&
3226	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
3227		return false;
3228
3229	return true;
3230}
3231
3232static inline uint8_t
3233mul_8_8(uint8_t a, uint8_t b)
3234{
3235    uint16_t t = a * (uint16_t)b + 0x7f;
3236    return ((t >> 8) + t) >> 8;
3237}
3238
3239static inline uint32_t multa(uint32_t s, uint32_t m, int shift)
3240{
3241	return mul_8_8((s >> shift) & 0xff, m >> 24) << shift;
3242}
3243
3244static inline bool is_constant_ps(uint32_t type)
3245{
3246	switch (type) {
3247	case SHADER_NONE: /* be warned! */
3248	case SHADER_ZERO:
3249	case SHADER_BLACK:
3250	case SHADER_WHITE:
3251	case SHADER_CONSTANT:
3252		return true;
3253	default:
3254		return false;
3255	}
3256}
3257
3258static bool
3259has_alphamap(PicturePtr p)
3260{
3261	return p->alphaMap != NULL;
3262}
3263
3264static bool
3265need_upload(PicturePtr p)
3266{
3267	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
3268}
3269
3270static bool
3271source_is_busy(PixmapPtr pixmap)
3272{
3273	struct sna_pixmap *priv = sna_pixmap(pixmap);
3274	if (priv == NULL)
3275		return false;
3276
3277	if (priv->clear)
3278		return false;
3279
3280	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
3281		return true;
3282
3283	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
3284		return true;
3285
3286	return priv->gpu_damage && !priv->cpu_damage;
3287}
3288
3289static bool
3290is_unhandled_gradient(PicturePtr picture, bool precise)
3291{
3292	if (picture->pDrawable)
3293		return false;
3294
3295	switch (picture->pSourcePict->type) {
3296	case SourcePictTypeSolidFill:
3297	case SourcePictTypeLinear:
3298	case SourcePictTypeRadial:
3299		return false;
3300	default:
3301		return precise;
3302	}
3303}
3304
3305static bool
3306source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
3307{
3308	if (sna_picture_is_solid(p, NULL))
3309		return false;
3310
3311	if (is_unhandled_gradient(p, precise))
3312		return true;
3313
3314	if (!gen3_check_xformat(p) || !gen3_check_repeat(p))
3315		return true;
3316
3317	if (pixmap && source_is_busy(pixmap))
3318		return false;
3319
3320	return has_alphamap(p) || !gen3_check_filter(p) || need_upload(p);
3321}
3322
3323static bool
3324gen3_composite_fallback(struct sna *sna,
3325			uint8_t op,
3326			PicturePtr src,
3327			PicturePtr mask,
3328			PicturePtr dst)
3329{
3330	PixmapPtr src_pixmap;
3331	PixmapPtr mask_pixmap;
3332	PixmapPtr dst_pixmap;
3333	bool src_fallback, mask_fallback;
3334
3335	if (!gen3_check_dst_format(dst->format)) {
3336		DBG(("%s: unknown destination format: %d\n",
3337		     __FUNCTION__, dst->format));
3338		return true;
3339	}
3340
3341	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
3342
3343	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
3344	src_fallback = source_fallback(src, src_pixmap,
3345				       dst->polyMode == PolyModePrecise);
3346
3347	if (mask) {
3348		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
3349		mask_fallback = source_fallback(mask, mask_pixmap,
3350						dst->polyMode == PolyModePrecise);
3351	} else {
3352		mask_pixmap = NULL;
3353		mask_fallback = false;
3354	}
3355
3356	/* If we are using the destination as a source and need to
3357	 * readback in order to upload the source, do it all
3358	 * on the cpu.
3359	 */
3360	if (src_pixmap == dst_pixmap && src_fallback) {
3361		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
3362		return true;
3363	}
3364	if (mask_pixmap == dst_pixmap && mask_fallback) {
3365		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
3366		return true;
3367	}
3368
3369	if (mask &&
3370	    mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
3371	    gen3_blend_op[op].src_alpha &&
3372	    gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
3373	    op != PictOpOver) {
3374		DBG(("%s: component-alpha mask with op=%d, should fallback\n",
3375		     __FUNCTION__, op));
3376		return true;
3377	}
3378
3379	/* If anything is on the GPU, push everything out to the GPU */
3380	if (dst_use_gpu(dst_pixmap)) {
3381		DBG(("%s: dst is already on the GPU, try to use GPU\n",
3382		     __FUNCTION__));
3383		return false;
3384	}
3385
3386	if (src_pixmap && !src_fallback) {
3387		DBG(("%s: src is already on the GPU, try to use GPU\n",
3388		     __FUNCTION__));
3389		return false;
3390	}
3391	if (mask_pixmap && !mask_fallback) {
3392		DBG(("%s: mask is already on the GPU, try to use GPU\n",
3393		     __FUNCTION__));
3394		return false;
3395	}
3396
3397	/* However if the dst is not on the GPU and we need to
3398	 * render one of the sources using the CPU, we may
3399	 * as well do the entire operation in place onthe CPU.
3400	 */
3401	if (src_fallback) {
3402		DBG(("%s: dst is on the CPU and src will fallback\n",
3403		     __FUNCTION__));
3404		return true;
3405	}
3406
3407	if (mask && mask_fallback) {
3408		DBG(("%s: dst is on the CPU and mask will fallback\n",
3409		     __FUNCTION__));
3410		return true;
3411	}
3412
3413	if (too_large(dst_pixmap->drawable.width,
3414		      dst_pixmap->drawable.height) &&
3415	    dst_is_cpu(dst_pixmap)) {
3416		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
3417		return true;
3418	}
3419
3420	DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
3421	     __FUNCTION__, dst_use_cpu(dst_pixmap)));
3422	return dst_use_cpu(dst_pixmap);
3423}
3424
3425static int
3426reuse_source(struct sna *sna,
3427	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
3428	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
3429{
3430	if (src_x != msk_x || src_y != msk_y)
3431		return false;
3432
3433	if (mask == src) {
3434		*mc = *sc;
3435		if (mc->bo)
3436			kgem_bo_reference(mc->bo);
3437		return true;
3438	}
3439
3440	if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable))
3441		return false;
3442
3443	if (sc->is_solid)
3444		return false;
3445
3446	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
3447
3448	if (!sna_transform_equal(src->transform, mask->transform))
3449		return false;
3450
3451	if (!sna_picture_alphamap_equal(src, mask))
3452		return false;
3453
3454	if (!gen3_check_repeat(mask))
3455		return false;
3456
3457	if (!gen3_check_filter(mask))
3458		return false;
3459
3460	if (!gen3_check_format(mask))
3461		return false;
3462
3463	DBG(("%s: reusing source channel for mask with a twist\n",
3464	     __FUNCTION__));
3465
3466	*mc = *sc;
3467	mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone);
3468	mc->filter = gen3_filter(mask->filter);
3469	mc->pict_format = mask->format;
3470	gen3_composite_channel_set_format(mc, mask->format);
3471	assert(mc->card_format);
3472	if (mc->bo)
3473		kgem_bo_reference(mc->bo);
3474	return true;
3475}
3476
3477static bool
3478gen3_render_composite(struct sna *sna,
3479		      uint8_t op,
3480		      PicturePtr src,
3481		      PicturePtr mask,
3482		      PicturePtr dst,
3483		      int16_t src_x,  int16_t src_y,
3484		      int16_t mask_x, int16_t mask_y,
3485		      int16_t dst_x,  int16_t dst_y,
3486		      int16_t width,  int16_t height,
3487		      unsigned flags,
3488		      struct sna_composite_op *tmp)
3489{
3490	DBG(("%s()\n", __FUNCTION__));
3491
3492	if (op >= ARRAY_SIZE(gen3_blend_op)) {
3493		DBG(("%s: fallback due to unhandled blend op: %d\n",
3494		     __FUNCTION__, op));
3495		return false;
3496	}
3497
3498	/* Try to use the BLT engine unless it implies a
3499	 * 3D -> 2D context switch.
3500	 */
3501	if (mask == NULL &&
3502	    sna_blt_composite(sna,
3503			      op, src, dst,
3504			      src_x, src_y,
3505			      dst_x, dst_y,
3506			      width, height,
3507			      flags, tmp))
3508		return true;
3509
3510	if (gen3_composite_fallback(sna, op, src, mask, dst))
3511		goto fallback;
3512
3513	if (need_tiling(sna, width, height))
3514		return sna_tiling_composite(op, src, mask, dst,
3515					    src_x,  src_y,
3516					    mask_x, mask_y,
3517					    dst_x,  dst_y,
3518					    width,  height,
3519					    tmp);
3520
3521	if (!gen3_composite_set_target(sna, tmp, dst,
3522				       dst_x, dst_y, width, height,
3523				       flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
3524		DBG(("%s: unable to set render target\n",
3525		     __FUNCTION__));
3526		goto fallback;
3527	}
3528
3529	tmp->op = op;
3530	tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
3531	tmp->u.gen3.num_constants = 0;
3532	tmp->src.u.gen3.type = SHADER_TEXTURE;
3533	tmp->src.is_affine = true;
3534	DBG(("%s: preparing source\n", __FUNCTION__));
3535	switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
3536				       src_x, src_y,
3537				       width, height,
3538				       dst_x, dst_y,
3539				       dst->polyMode == PolyModePrecise)) {
3540	case -1:
3541		goto cleanup_dst;
3542	case 0:
3543		tmp->src.u.gen3.type = SHADER_ZERO;
3544		break;
3545	case 1:
3546		if (mask == NULL && tmp->src.bo &&
3547		    sna_blt_composite__convert(sna,
3548					       dst_x, dst_y, width, height,
3549					       tmp))
3550			return true;
3551
3552		gen3_composite_channel_convert(&tmp->src);
3553		break;
3554	}
3555	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
3556
3557	tmp->mask.u.gen3.type = SHADER_NONE;
3558	tmp->mask.is_affine = true;
3559	tmp->need_magic_ca_pass = false;
3560	tmp->has_component_alpha = false;
3561	if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
3562		if (!reuse_source(sna,
3563				  src, &tmp->src, src_x, src_y,
3564				  mask, &tmp->mask, mask_x, mask_y)) {
3565			tmp->mask.u.gen3.type = SHADER_TEXTURE;
3566			DBG(("%s: preparing mask\n", __FUNCTION__));
3567			switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
3568						       mask_x, mask_y,
3569						       width,  height,
3570						       dst_x,  dst_y,
3571						       dst->polyMode == PolyModePrecise)) {
3572			case -1:
3573				goto cleanup_src;
3574			case 0:
3575				tmp->mask.u.gen3.type = SHADER_ZERO;
3576				break;
3577			case 1:
3578				gen3_composite_channel_convert(&tmp->mask);
3579				break;
3580			}
3581		}
3582		DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
3583		if (tmp->mask.u.gen3.type == SHADER_ZERO) {
3584			if (tmp->src.bo) {
3585				kgem_bo_destroy(&sna->kgem,
3586						tmp->src.bo);
3587				tmp->src.bo = NULL;
3588			}
3589			tmp->src.u.gen3.type = SHADER_ZERO;
3590			tmp->mask.u.gen3.type = SHADER_NONE;
3591		}
3592
3593		if (tmp->mask.u.gen3.type != SHADER_NONE) {
3594			if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
3595				/* Check if it's component alpha that relies on a source alpha
3596				 * and on the source value.  We can only get one of those
3597				 * into the single source value that we get to blend with.
3598				 */
3599				DBG(("%s: component-alpha mask: %d\n",
3600				     __FUNCTION__, tmp->mask.u.gen3.type));
3601				tmp->has_component_alpha = true;
3602				if (tmp->mask.u.gen3.type == SHADER_WHITE) {
3603					tmp->mask.u.gen3.type = SHADER_NONE;
3604					tmp->has_component_alpha = false;
3605				} else if (gen3_blend_op[op].src_alpha &&
3606					   gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
3607					if (op != PictOpOver)
3608						goto cleanup_mask;
3609
3610					tmp->need_magic_ca_pass = true;
3611					tmp->op = PictOpOutReverse;
3612				}
3613			} else {
3614				if (tmp->mask.is_opaque) {
3615					tmp->mask.u.gen3.type = SHADER_NONE;
3616				} else if (is_constant_ps(tmp->src.u.gen3.type) &&
3617					   is_constant_ps(tmp->mask.u.gen3.type)) {
3618					uint32_t v;
3619
3620					v = multa(tmp->src.u.gen3.mode,
3621						  tmp->mask.u.gen3.mode,
3622						  24);
3623					v |= multa(tmp->src.u.gen3.mode,
3624						   tmp->mask.u.gen3.mode,
3625						   16);
3626					v |= multa(tmp->src.u.gen3.mode,
3627						   tmp->mask.u.gen3.mode,
3628						   8);
3629					v |= multa(tmp->src.u.gen3.mode,
3630						   tmp->mask.u.gen3.mode,
3631						   0);
3632
3633					DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
3634					     __FUNCTION__,
3635					     tmp->src.u.gen3.mode,
3636					     tmp->mask.u.gen3.mode,
3637					     v));
3638
3639					tmp->src.u.gen3.type = SHADER_CONSTANT;
3640					tmp->src.u.gen3.mode = v;
3641					tmp->src.is_opaque = false;
3642
3643					tmp->mask.u.gen3.type = SHADER_NONE;
3644				}
3645			}
3646		}
3647	}
3648	DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
3649	     tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
3650	     tmp->src.is_affine, tmp->mask.is_affine));
3651
3652	tmp->prim_emit = gen3_emit_composite_primitive;
3653	if (is_constant_ps(tmp->mask.u.gen3.type)) {
3654		switch (tmp->src.u.gen3.type) {
3655		case SHADER_NONE:
3656		case SHADER_ZERO:
3657		case SHADER_BLACK:
3658		case SHADER_WHITE:
3659		case SHADER_CONSTANT:
3660#if defined(sse2) && !defined(__x86_64__)
3661			if (sna->cpu_features & SSE2) {
3662				tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2;
3663				tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2;
3664			} else
3665#endif
3666			{
3667				tmp->prim_emit = gen3_emit_composite_primitive_constant;
3668				tmp->emit_boxes = gen3_emit_composite_boxes_constant;
3669			}
3670
3671			break;
3672		case SHADER_LINEAR:
3673		case SHADER_RADIAL:
3674			if (tmp->src.transform == NULL) {
3675#if defined(sse2) && !defined(__x86_64__)
3676				if (sna->cpu_features & SSE2) {
3677					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2;
3678					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2;
3679				} else
3680#endif
3681				{
3682					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
3683					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
3684				}
3685			} else if (tmp->src.is_affine) {
3686				tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2];
3687#if defined(sse2) && !defined(__x86_64__)
3688				if (sna->cpu_features & SSE2) {
3689					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2;
3690					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2;
3691				} else
3692#endif
3693				{
3694					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
3695					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
3696				}
3697			}
3698			break;
3699		case SHADER_TEXTURE:
3700			if (tmp->src.transform == NULL) {
3701				if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
3702#if defined(sse2) && !defined(__x86_64__)
3703					if (sna->cpu_features & SSE2) {
3704						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2;
3705						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2;
3706					} else
3707#endif
3708					{
3709						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
3710						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
3711					}
3712				} else {
3713#if defined(sse2) && !defined(__x86_64__)
3714					if (sna->cpu_features & SSE2) {
3715						tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2;
3716						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2;
3717					} else
3718#endif
3719					{
3720						tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
3721						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
3722					}
3723				}
3724			} else if (tmp->src.is_affine) {
3725				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
3726				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
3727#if defined(sse2) && !defined(__x86_64__)
3728				if (sna->cpu_features & SSE2) {
3729					tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2;
3730					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2;
3731				} else
3732#endif
3733				{
3734					tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
3735					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
3736				}
3737			}
3738			break;
3739		}
3740	} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
3741		if (tmp->mask.transform == NULL) {
3742			if (is_constant_ps(tmp->src.u.gen3.type)) {
3743				if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
3744#if defined(sse2) && !defined(__x86_64__)
3745					if (sna->cpu_features & SSE2) {
3746						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2;
3747					} else
3748#endif
3749					{
3750						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
3751					}
3752				} else {
3753#if defined(sse2) && !defined(__x86_64__)
3754					if (sna->cpu_features & SSE2) {
3755						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2;
3756					} else
3757#endif
3758					{
3759						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
3760					}
3761				}
3762			} else if (tmp->src.transform == NULL) {
3763#if defined(sse2) && !defined(__x86_64__)
3764				if (sna->cpu_features & SSE2) {
3765					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2;
3766				} else
3767#endif
3768				{
3769					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
3770				}
3771			} else if (tmp->src.is_affine) {
3772				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
3773				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
3774#if defined(sse2) && !defined(__x86_64__)
3775				if (sna->cpu_features & SSE2) {
3776					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2;
3777				} else
3778#endif
3779				{
3780					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
3781				}
3782			}
3783		}
3784	}
3785
3786	tmp->floats_per_vertex = 2;
3787	if (!is_constant_ps(tmp->src.u.gen3.type))
3788		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
3789	if (!is_constant_ps(tmp->mask.u.gen3.type))
3790		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
3791	DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
3792	     !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
3793	     !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
3794	     tmp->floats_per_vertex,
3795	     tmp->prim_emit != gen3_emit_composite_primitive));
3796	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
3797
3798	tmp->blt   = gen3_render_composite_blt;
3799	tmp->box   = gen3_render_composite_box;
3800	tmp->boxes = gen3_render_composite_boxes__blt;
3801	if (tmp->emit_boxes) {
3802		tmp->boxes = gen3_render_composite_boxes;
3803		tmp->thread_boxes = gen3_render_composite_boxes__thread;
3804	}
3805	tmp->done  = gen3_render_composite_done;
3806
3807	if (!kgem_check_bo(&sna->kgem,
3808			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
3809			   NULL)) {
3810		kgem_submit(&sna->kgem);
3811		if (!kgem_check_bo(&sna->kgem,
3812				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
3813				   NULL))
3814			goto cleanup_mask;
3815	}
3816
3817	gen3_align_vertex(sna, tmp);
3818	gen3_emit_composite_state(sna, tmp);
3819	return true;
3820
3821cleanup_mask:
3822	if (tmp->mask.bo) {
3823		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
3824		tmp->mask.bo = NULL;
3825	}
3826cleanup_src:
3827	if (tmp->src.bo) {
3828		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
3829		tmp->src.bo = NULL;
3830	}
3831cleanup_dst:
3832	if (tmp->redirect.real_bo) {
3833		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
3834		tmp->redirect.real_bo = NULL;
3835	}
3836fallback:
3837	return (mask == NULL &&
3838		sna_blt_composite(sna,
3839				  op, src, dst,
3840				  src_x, src_y,
3841				  dst_x, dst_y,
3842				  width, height,
3843				  flags | COMPOSITE_FALLBACK, tmp));
3844}
3845
3846static void
3847gen3_emit_composite_spans_vertex(struct sna *sna,
3848				 const struct sna_composite_spans_op *op,
3849				 int16_t x, int16_t y,
3850				 float opacity)
3851{
3852	gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
3853	gen3_emit_composite_texcoord(sna, &op->base.src, x, y);
3854	OUT_VERTEX(opacity);
3855}
3856
3857fastcall static void
3858gen3_emit_composite_spans_primitive_zero(struct sna *sna,
3859					 const struct sna_composite_spans_op *op,
3860					 const BoxRec *box,
3861					 float opacity)
3862{
3863	float *v = sna->render.vertices + sna->render.vertex_used;
3864	sna->render.vertex_used += 6;
3865
3866	v[0] = op->base.dst.x + box->x2;
3867	v[1] = op->base.dst.y + box->y2;
3868
3869	v[2] = op->base.dst.x + box->x1;
3870	v[3] = v[1];
3871
3872	v[4] = v[2];
3873	v[5] = op->base.dst.x + box->y1;
3874}
3875
3876fastcall static void
3877gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op,
3878						const struct sna_opacity_box *b,
3879						int nbox, float *v)
3880{
3881	do {
3882		v[0] = op->base.dst.x + b->box.x2;
3883		v[1] = op->base.dst.y + b->box.y2;
3884
3885		v[2] = op->base.dst.x + b->box.x1;
3886		v[3] = v[1];
3887
3888		v[4] = v[2];
3889		v[5] = op->base.dst.x + b->box.y1;
3890
3891		v += 6;
3892		b++;
3893	} while (--nbox);
3894}
3895
3896fastcall static void
3897gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
3898						   const struct sna_composite_spans_op *op,
3899						   const BoxRec *box,
3900						   float opacity)
3901{
3902	float *v = sna->render.vertices + sna->render.vertex_used;
3903	sna->render.vertex_used += 6;
3904
3905	v[0] = box->x2;
3906	v[3] = v[1] = box->y2;
3907	v[4] = v[2] = box->x1;
3908	v[5] = box->y1;
3909}
3910
3911fastcall static void
3912gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op,
3913							  const struct sna_opacity_box *b,
3914							  int nbox, float *v)
3915{
3916	do {
3917		v[0] = b->box.x2;
3918		v[3] = v[1] = b->box.y2;
3919		v[4] = v[2] = b->box.x1;
3920		v[5] = b->box.y1;
3921
3922		b++;
3923		v += 6;
3924	} while (--nbox);
3925}
3926
3927fastcall static void
3928gen3_emit_composite_spans_primitive_constant(struct sna *sna,
3929					     const struct sna_composite_spans_op *op,
3930					     const BoxRec *box,
3931					     float opacity)
3932{
3933	float *v = sna->render.vertices + sna->render.vertex_used;
3934	sna->render.vertex_used += 9;
3935
3936	v[0] = op->base.dst.x + box->x2;
3937	v[6] = v[3] = op->base.dst.x + box->x1;
3938	v[4] = v[1] = op->base.dst.y + box->y2;
3939	v[7] = op->base.dst.y + box->y1;
3940	v[8] = v[5] = v[2] = opacity;
3941}
3942
3943fastcall static void
3944gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op,
3945						    const struct sna_opacity_box *b,
3946						    int nbox,
3947						    float *v)
3948{
3949	do {
3950		v[0] = op->base.dst.x + b->box.x2;
3951		v[6] = v[3] = op->base.dst.x + b->box.x1;
3952		v[4] = v[1] = op->base.dst.y + b->box.y2;
3953		v[7] = op->base.dst.y + b->box.y1;
3954		v[8] = v[5] = v[2] = b->alpha;
3955
3956		v += 9;
3957		b++;
3958	} while (--nbox);
3959}
3960
3961fastcall static void
3962gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
3963						       const struct sna_composite_spans_op *op,
3964						       const BoxRec *box,
3965						       float opacity)
3966{
3967	float *v = sna->render.vertices + sna->render.vertex_used;
3968	sna->render.vertex_used += 9;
3969
3970	v[0] = box->x2;
3971	v[6] = v[3] = box->x1;
3972	v[4] = v[1] = box->y2;
3973	v[7] = box->y1;
3974	v[8] = v[5] = v[2] = opacity;
3975}
3976
3977fastcall static void
3978gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op,
3979							      const struct sna_opacity_box *b,
3980							      int nbox, float *v)
3981{
3982	do {
3983		v[0] = b->box.x2;
3984		v[6] = v[3] = b->box.x1;
3985		v[4] = v[1] = b->box.y2;
3986		v[7] = b->box.y1;
3987		v[8] = v[5] = v[2] = b->alpha;
3988
3989		v += 9;
3990		b++;
3991	} while (--nbox);
3992}
3993
3994fastcall static void
3995gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
3996						    const struct sna_composite_spans_op *op,
3997						    const BoxRec *box,
3998						    float opacity)
3999{
4000	float *v = sna->render.vertices + sna->render.vertex_used;
4001	sna->render.vertex_used += 15;
4002
4003	v[0] = op->base.dst.x + box->x2;
4004	v[1] = op->base.dst.y + box->y2;
4005	v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
4006	v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
4007	v[4] = opacity;
4008
4009	v[5] = op->base.dst.x + box->x1;
4010	v[6] = v[1];
4011	v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
4012	v[8] = v[3];
4013	v[9] = opacity;
4014
4015	v[10] = v[5];
4016	v[11] = op->base.dst.y + box->y1;
4017	v[12] = v[7];
4018	v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
4019	v[14] = opacity;
4020}
4021
4022fastcall static void
4023gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op,
4024							   const struct sna_opacity_box *b,
4025							   int nbox,
4026							   float *v)
4027{
4028	do {
4029		v[0] = op->base.dst.x + b->box.x2;
4030		v[1] = op->base.dst.y + b->box.y2;
4031		v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
4032		v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
4033		v[4] = b->alpha;
4034
4035		v[5] = op->base.dst.x + b->box.x1;
4036		v[6] = v[1];
4037		v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
4038		v[8] = v[3];
4039		v[9] = b->alpha;
4040
4041		v[10] = v[5];
4042		v[11] = op->base.dst.y + b->box.y1;
4043		v[12] = v[7];
4044		v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
4045		v[14] = b->alpha;
4046
4047		v += 15;
4048		b++;
4049	} while (--nbox);
4050}
4051
4052fastcall static void
4053gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
4054						  const struct sna_composite_spans_op *op,
4055						  const BoxRec *box,
4056						  float opacity)
4057{
4058	PictTransform *transform = op->base.src.transform;
4059	float *v;
4060
4061	v = sna->render.vertices + sna->render.vertex_used;
4062	sna->render.vertex_used += 15;
4063
4064	v[0]  = op->base.dst.x + box->x2;
4065	v[6]  = v[1] = op->base.dst.y + box->y2;
4066	v[10] = v[5] = op->base.dst.x + box->x1;
4067	v[11] = op->base.dst.y + box->y1;
4068	v[14] = v[9] = v[4]  = opacity;
4069
4070	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
4071				    (int)op->base.src.offset[1] + box->y2,
4072				    transform, op->base.src.scale,
4073				    &v[2], &v[3]);
4074
4075	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
4076				    (int)op->base.src.offset[1] + box->y2,
4077				    transform, op->base.src.scale,
4078				    &v[7], &v[8]);
4079
4080	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
4081				    (int)op->base.src.offset[1] + box->y1,
4082				    transform, op->base.src.scale,
4083				    &v[12], &v[13]);
4084}
4085
4086fastcall static void
4087gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op,
4088							 const struct sna_opacity_box *b,
4089							 int nbox,
4090							 float *v)
4091{
4092	PictTransform *transform = op->base.src.transform;
4093
4094	do {
4095		v[0]  = op->base.dst.x + b->box.x2;
4096		v[6]  = v[1] = op->base.dst.y + b->box.y2;
4097		v[10] = v[5] = op->base.dst.x + b->box.x1;
4098		v[11] = op->base.dst.y + b->box.y1;
4099		v[14] = v[9] = v[4]  = b->alpha;
4100
4101		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
4102					    (int)op->base.src.offset[1] + b->box.y2,
4103					    transform, op->base.src.scale,
4104					    &v[2], &v[3]);
4105
4106		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
4107					    (int)op->base.src.offset[1] + b->box.y2,
4108					    transform, op->base.src.scale,
4109					    &v[7], &v[8]);
4110
4111		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
4112					    (int)op->base.src.offset[1] + b->box.y1,
4113					    transform, op->base.src.scale,
4114					    &v[12], &v[13]);
4115		v += 15;
4116		b++;
4117	} while (--nbox);
4118}
4119
4120fastcall static void
4121gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
4122						      const struct sna_composite_spans_op *op,
4123						      const BoxRec *box,
4124						      float opacity)
4125{
4126	float *v = sna->render.vertices + sna->render.vertex_used;
4127	sna->render.vertex_used += 15;
4128
4129	v[0] = op->base.dst.x + box->x2;
4130	v[1] = op->base.dst.y + box->y2;
4131	v[2] = op->base.src.offset[0] + box->x2;
4132	v[3] = op->base.src.offset[1] + box->y2;
4133	v[4] = opacity;
4134
4135	v[5] = op->base.dst.x + box->x1;
4136	v[6] = v[1];
4137	v[7] = op->base.src.offset[0] + box->x1;
4138	v[8] = v[3];
4139	v[9] = opacity;
4140
4141	v[10] = v[5];
4142	v[11] = op->base.dst.y + box->y1;
4143	v[12] = v[7];
4144	v[13] = op->base.src.offset[1] + box->y1;
4145	v[14] = opacity;
4146}
4147
4148fastcall static void
4149gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op,
4150							     const struct sna_opacity_box *b,
4151							     int nbox,
4152							     float *v)
4153{
4154	do {
4155		v[0] = op->base.dst.x + b->box.x2;
4156		v[1] = op->base.dst.y + b->box.y2;
4157		v[2] = op->base.src.offset[0] + b->box.x2;
4158		v[3] = op->base.src.offset[1] + b->box.y2;
4159		v[4] = b->alpha;
4160
4161		v[5] = op->base.dst.x + b->box.x1;
4162		v[6] = v[1];
4163		v[7] = op->base.src.offset[0] + b->box.x1;
4164		v[8] = v[3];
4165		v[9] = b->alpha;
4166
4167		v[10] = v[5];
4168		v[11] = op->base.dst.y + b->box.y1;
4169		v[12] = v[7];
4170		v[13] = op->base.src.offset[1] + b->box.y1;
4171		v[14] = b->alpha;
4172
4173		v += 15;
4174		b++;
4175	} while (--nbox);
4176}
4177
4178#if defined(sse2) && !defined(__x86_64__)
4179sse2 fastcall static void
4180gen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
4181						   const struct sna_composite_spans_op *op,
4182						   const BoxRec *box,
4183						   float opacity)
4184{
4185	float *v = sna->render.vertices + sna->render.vertex_used;
4186	sna->render.vertex_used += 9;
4187
4188	v[0] = op->base.dst.x + box->x2;
4189	v[6] = v[3] = op->base.dst.x + box->x1;
4190	v[4] = v[1] = op->base.dst.y + box->y2;
4191	v[7] = op->base.dst.y + box->y1;
4192	v[8] = v[5] = v[2] = opacity;
4193}
4194
4195sse2 fastcall static void
4196gen3_emit_composite_spans_primitive_constant__sse2__boxes(const struct sna_composite_spans_op *op,
4197							  const struct sna_opacity_box *b,
4198							  int nbox,
4199							  float *v)
4200{
4201	do {
4202		v[0] = op->base.dst.x + b->box.x2;
4203		v[6] = v[3] = op->base.dst.x + b->box.x1;
4204		v[4] = v[1] = op->base.dst.y + b->box.y2;
4205		v[7] = op->base.dst.y + b->box.y1;
4206		v[8] = v[5] = v[2] = b->alpha;
4207
4208		v += 9;
4209		b++;
4210	} while (--nbox);
4211}
4212
4213sse2 fastcall static void
4214gen3_render_composite_spans_constant_box__sse2(struct sna *sna,
4215					       const struct sna_composite_spans_op *op,
4216					       const BoxRec *box, float opacity)
4217{
4218	float *v;
4219	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
4220	     __FUNCTION__,
4221	     op->base.src.offset[0], op->base.src.offset[1],
4222	     opacity,
4223	     op->base.dst.x, op->base.dst.y,
4224	     box->x1, box->y1,
4225	     box->x2 - box->x1,
4226	     box->y2 - box->y1));
4227
4228	gen3_get_rectangles(sna, &op->base, 1);
4229
4230	v = sna->render.vertices + sna->render.vertex_used;
4231	sna->render.vertex_used += 9;
4232
4233	v[0] = box->x2;
4234	v[6] = v[3] = box->x1;
4235	v[4] = v[1] = box->y2;
4236	v[7] = box->y1;
4237	v[8] = v[5] = v[2] = opacity;
4238}
4239
4240sse2 fastcall static void
4241gen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna,
4242							 const struct sna_composite_spans_op *op,
4243							 const struct sna_opacity_box *box,
4244							 int nbox)
4245{
4246	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
4247	     __FUNCTION__, nbox,
4248	     op->base.src.offset[0], op->base.src.offset[1],
4249	     op->base.dst.x, op->base.dst.y));
4250
4251	sna_vertex_lock(&sna->render);
4252	do {
4253		int nbox_this_time;
4254		float *v;
4255
4256		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
4257		assert(nbox_this_time);
4258		nbox -= nbox_this_time;
4259
4260		v = sna->render.vertices + sna->render.vertex_used;
4261		sna->render.vertex_used += nbox_this_time * 9;
4262
4263		sna_vertex_acquire__locked(&sna->render);
4264		sna_vertex_unlock(&sna->render);
4265
4266		do {
4267			v[0] = box->box.x2;
4268			v[6] = v[3] = box->box.x1;
4269			v[4] = v[1] = box->box.y2;
4270			v[7] = box->box.y1;
4271			v[8] = v[5] = v[2] = box->alpha;
4272			v += 9;
4273			box++;
4274		} while (--nbox_this_time);
4275
4276		sna_vertex_lock(&sna->render);
4277		sna_vertex_release__locked(&sna->render);
4278	} while (nbox);
4279	sna_vertex_unlock(&sna->render);
4280}
4281
4282sse2 fastcall static void
4283gen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna,
4284							      const struct sna_composite_spans_op *op,
4285							      const BoxRec *box,
4286							      float opacity)
4287{
4288	float *v = sna->render.vertices + sna->render.vertex_used;
4289	sna->render.vertex_used += 9;
4290
4291	v[0] = box->x2;
4292	v[6] = v[3] = box->x1;
4293	v[4] = v[1] = box->y2;
4294	v[7] = box->y1;
4295	v[8] = v[5] = v[2] = opacity;
4296}
4297
4298sse2 fastcall static void
4299gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes(const struct sna_composite_spans_op *op,
4300								     const struct sna_opacity_box *b,
4301								     int nbox, float *v)
4302{
4303	do {
4304		v[0] = b->box.x2;
4305		v[6] = v[3] = b->box.x1;
4306		v[4] = v[1] = b->box.y2;
4307		v[7] = b->box.y1;
4308		v[8] = v[5] = v[2] = b->alpha;
4309
4310		v += 9;
4311		b++;
4312	} while (--nbox);
4313}
4314
4315sse2 fastcall static void
4316gen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna,
4317							  const struct sna_composite_spans_op *op,
4318							  const BoxRec *box,
4319							  float opacity)
4320{
4321	float *v = sna->render.vertices + sna->render.vertex_used;
4322	sna->render.vertex_used += 15;
4323
4324	v[0] = op->base.dst.x + box->x2;
4325	v[1] = op->base.dst.y + box->y2;
4326	v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
4327	v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
4328	v[4] = opacity;
4329
4330	v[5] = op->base.dst.x + box->x1;
4331	v[6] = v[1];
4332	v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
4333	v[8] = v[3];
4334	v[9] = opacity;
4335
4336	v[10] = v[5];
4337	v[11] = op->base.dst.y + box->y1;
4338	v[12] = v[7];
4339	v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
4340	v[14] = opacity;
4341}
4342
4343sse2 fastcall static void
4344gen3_emit_composite_spans_primitive_identity_source__sse2__boxes(const struct sna_composite_spans_op *op,
4345								 const struct sna_opacity_box *b,
4346								 int nbox,
4347								 float *v)
4348{
4349	do {
4350		v[0] = op->base.dst.x + b->box.x2;
4351		v[1] = op->base.dst.y + b->box.y2;
4352		v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
4353		v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
4354		v[4] = b->alpha;
4355
4356		v[5] = op->base.dst.x + b->box.x1;
4357		v[6] = v[1];
4358		v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
4359		v[8] = v[3];
4360		v[9] = b->alpha;
4361
4362		v[10] = v[5];
4363		v[11] = op->base.dst.y + b->box.y1;
4364		v[12] = v[7];
4365		v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
4366		v[14] = b->alpha;
4367
4368		v += 15;
4369		b++;
4370	} while (--nbox);
4371}
4372sse2 fastcall static void
4373gen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna,
4374							const struct sna_composite_spans_op *op,
4375							const BoxRec *box,
4376							float opacity)
4377{
4378	PictTransform *transform = op->base.src.transform;
4379	float *v;
4380
4381	v = sna->render.vertices + sna->render.vertex_used;
4382	sna->render.vertex_used += 15;
4383
4384	v[0]  = op->base.dst.x + box->x2;
4385	v[6]  = v[1] = op->base.dst.y + box->y2;
4386	v[10] = v[5] = op->base.dst.x + box->x1;
4387	v[11] = op->base.dst.y + box->y1;
4388	v[14] = v[9] = v[4]  = opacity;
4389
4390	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
4391				    (int)op->base.src.offset[1] + box->y2,
4392				    transform, op->base.src.scale,
4393				    &v[2], &v[3]);
4394
4395	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
4396				    (int)op->base.src.offset[1] + box->y2,
4397				    transform, op->base.src.scale,
4398				    &v[7], &v[8]);
4399
4400	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
4401				    (int)op->base.src.offset[1] + box->y1,
4402				    transform, op->base.src.scale,
4403				    &v[12], &v[13]);
4404}
4405
4406sse2 fastcall static void
4407gen3_emit_composite_spans_primitive_affine_source__sse2__boxes(const struct sna_composite_spans_op *op,
4408							       const struct sna_opacity_box *b,
4409							       int nbox,
4410							       float *v)
4411{
4412	PictTransform *transform = op->base.src.transform;
4413
4414	do {
4415		v[0]  = op->base.dst.x + b->box.x2;
4416		v[6]  = v[1] = op->base.dst.y + b->box.y2;
4417		v[10] = v[5] = op->base.dst.x + b->box.x1;
4418		v[11] = op->base.dst.y + b->box.y1;
4419		v[14] = v[9] = v[4]  = b->alpha;
4420
4421		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
4422					    (int)op->base.src.offset[1] + b->box.y2,
4423					    transform, op->base.src.scale,
4424					    &v[2], &v[3]);
4425
4426		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
4427					    (int)op->base.src.offset[1] + b->box.y2,
4428					    transform, op->base.src.scale,
4429					    &v[7], &v[8]);
4430
4431		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
4432					    (int)op->base.src.offset[1] + b->box.y1,
4433					    transform, op->base.src.scale,
4434					    &v[12], &v[13]);
4435		v += 15;
4436		b++;
4437	} while (--nbox);
4438}
4439
4440sse2 fastcall static void
4441gen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna,
4442							    const struct sna_composite_spans_op *op,
4443							    const BoxRec *box,
4444							    float opacity)
4445{
4446	float *v = sna->render.vertices + sna->render.vertex_used;
4447	sna->render.vertex_used += 15;
4448
4449	v[0] = op->base.dst.x + box->x2;
4450	v[1] = op->base.dst.y + box->y2;
4451	v[2] = op->base.src.offset[0] + box->x2;
4452	v[3] = op->base.src.offset[1] + box->y2;
4453	v[4] = opacity;
4454
4455	v[5] = op->base.dst.x + box->x1;
4456	v[6] = v[1];
4457	v[7] = op->base.src.offset[0] + box->x1;
4458	v[8] = v[3];
4459	v[9] = opacity;
4460
4461	v[10] = v[5];
4462	v[11] = op->base.dst.y + box->y1;
4463	v[12] = v[7];
4464	v[13] = op->base.src.offset[1] + box->y1;
4465	v[14] = opacity;
4466}
4467
4468sse2 fastcall static void
4469gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
4470								   const struct sna_opacity_box *b,
4471								   int nbox,
4472								   float *v)
4473{
4474	do {
4475		v[0] = op->base.dst.x + b->box.x2;
4476		v[1] = op->base.dst.y + b->box.y2;
4477		v[2] = op->base.src.offset[0] + b->box.x2;
4478		v[3] = op->base.src.offset[1] + b->box.y2;
4479		v[4] = b->alpha;
4480
4481		v[5] = op->base.dst.x + b->box.x1;
4482		v[6] = v[1];
4483		v[7] = op->base.src.offset[0] + b->box.x1;
4484		v[8] = v[3];
4485		v[9] = b->alpha;
4486
4487		v[10] = v[5];
4488		v[11] = op->base.dst.y + b->box.y1;
4489		v[12] = v[7];
4490		v[13] = op->base.src.offset[1] + b->box.y1;
4491		v[14] = b->alpha;
4492
4493		v += 15;
4494		b++;
4495	} while (--nbox);
4496}
4497
4498sse2 fastcall static void
4499gen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna,
4500							  const struct sna_composite_spans_op *op,
4501							  const BoxRec *box,
4502							  float opacity)
4503{
4504	PictTransform *transform = op->base.src.transform;
4505	float *v = sna->render.vertices + sna->render.vertex_used;
4506	sna->render.vertex_used += 15;
4507
4508	v[0] = op->base.dst.x + box->x2;
4509	v[1] = op->base.dst.y + box->y2;
4510	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
4511				    op->base.src.offset[1] + box->y2,
4512				    transform, op->base.src.scale,
4513				    &v[2], &v[3]);
4514	v[4] = opacity;
4515
4516	v[5] = op->base.dst.x + box->x1;
4517	v[6] = v[1];
4518	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
4519				    op->base.src.offset[1] + box->y2,
4520				    transform, op->base.src.scale,
4521				    &v[7], &v[8]);
4522	v[9] = opacity;
4523
4524	v[10] = v[5];
4525	v[11] = op->base.dst.y + box->y1;
4526	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
4527				    op->base.src.offset[1] + box->y1,
4528				    transform, op->base.src.scale,
4529				    &v[12], &v[13]);
4530	v[14] = opacity;
4531}
4532
4533sse2 fastcall static void
4534gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
4535								 const struct sna_opacity_box *b,
4536								 int nbox,
4537								 float *v)
4538{
4539	PictTransform *transform = op->base.src.transform;
4540
4541	do {
4542		v[0] = op->base.dst.x + b->box.x2;
4543		v[1] = op->base.dst.y + b->box.y2;
4544		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
4545					    op->base.src.offset[1] + b->box.y2,
4546					    transform, op->base.src.scale,
4547					    &v[2], &v[3]);
4548		v[4] = b->alpha;
4549
4550		v[5] = op->base.dst.x + b->box.x1;
4551		v[6] = v[1];
4552		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
4553					    op->base.src.offset[1] + b->box.y2,
4554					    transform, op->base.src.scale,
4555					    &v[7], &v[8]);
4556		v[9] = b->alpha;
4557
4558		v[10] = v[5];
4559		v[11] = op->base.dst.y + b->box.y1;
4560		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
4561					    op->base.src.offset[1] + b->box.y1,
4562					    transform, op->base.src.scale,
4563					    &v[12], &v[13]);
4564		v[14] = b->alpha;
4565		v += 15;
4566		b++;
4567	} while (--nbox);
4568}
4569#endif
4570
4571fastcall static void
4572gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
4573						    const struct sna_composite_spans_op *op,
4574						    const BoxRec *box,
4575						    float opacity)
4576{
4577	PictTransform *transform = op->base.src.transform;
4578	float *v = sna->render.vertices + sna->render.vertex_used;
4579	sna->render.vertex_used += 15;
4580
4581	v[0] = op->base.dst.x + box->x2;
4582	v[1] = op->base.dst.y + box->y2;
4583	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
4584				    op->base.src.offset[1] + box->y2,
4585				    transform, op->base.src.scale,
4586				    &v[2], &v[3]);
4587	v[4] = opacity;
4588
4589	v[5] = op->base.dst.x + box->x1;
4590	v[6] = v[1];
4591	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
4592				    op->base.src.offset[1] + box->y2,
4593				    transform, op->base.src.scale,
4594				    &v[7], &v[8]);
4595	v[9] = opacity;
4596
4597	v[10] = v[5];
4598	v[11] = op->base.dst.y + box->y1;
4599	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
4600				    op->base.src.offset[1] + box->y1,
4601				    transform, op->base.src.scale,
4602				    &v[12], &v[13]);
4603	v[14] = opacity;
4604}
4605
4606fastcall static void
4607gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op,
4608							   const struct sna_opacity_box *b,
4609							   int nbox,
4610							   float *v)
4611{
4612	PictTransform *transform = op->base.src.transform;
4613
4614	do {
4615		v[0] = op->base.dst.x + b->box.x2;
4616		v[1] = op->base.dst.y + b->box.y2;
4617		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
4618					    op->base.src.offset[1] + b->box.y2,
4619					    transform, op->base.src.scale,
4620					    &v[2], &v[3]);
4621		v[4] = b->alpha;
4622
4623		v[5] = op->base.dst.x + b->box.x1;
4624		v[6] = v[1];
4625		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
4626					    op->base.src.offset[1] + b->box.y2,
4627					    transform, op->base.src.scale,
4628					    &v[7], &v[8]);
4629		v[9] = b->alpha;
4630
4631		v[10] = v[5];
4632		v[11] = op->base.dst.y + b->box.y1;
4633		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
4634					    op->base.src.offset[1] + b->box.y1,
4635					    transform, op->base.src.scale,
4636					    &v[12], &v[13]);
4637		v[14] = b->alpha;
4638		v += 15;
4639		b++;
4640	} while (--nbox);
4641}
4642
4643fastcall static void
4644gen3_emit_composite_spans_primitive(struct sna *sna,
4645				    const struct sna_composite_spans_op *op,
4646				    const BoxRec *box,
4647				    float opacity)
4648{
4649	gen3_emit_composite_spans_vertex(sna, op,
4650					 box->x2, box->y2,
4651					 opacity);
4652	gen3_emit_composite_spans_vertex(sna, op,
4653					 box->x1, box->y2,
4654					 opacity);
4655	gen3_emit_composite_spans_vertex(sna, op,
4656					 box->x1, box->y1,
4657					 opacity);
4658}
4659
4660fastcall static void
4661gen3_render_composite_spans_constant_box(struct sna *sna,
4662					 const struct sna_composite_spans_op *op,
4663					 const BoxRec *box, float opacity)
4664{
4665	float *v;
4666	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
4667	     __FUNCTION__,
4668	     op->base.src.offset[0], op->base.src.offset[1],
4669	     opacity,
4670	     op->base.dst.x, op->base.dst.y,
4671	     box->x1, box->y1,
4672	     box->x2 - box->x1,
4673	     box->y2 - box->y1));
4674
4675	gen3_get_rectangles(sna, &op->base, 1);
4676
4677	v = sna->render.vertices + sna->render.vertex_used;
4678	sna->render.vertex_used += 9;
4679
4680	v[0] = box->x2;
4681	v[6] = v[3] = box->x1;
4682	v[4] = v[1] = box->y2;
4683	v[7] = box->y1;
4684	v[8] = v[5] = v[2] = opacity;
4685}
4686
4687fastcall static void
4688gen3_render_composite_spans_constant_thread_boxes(struct sna *sna,
4689						  const struct sna_composite_spans_op *op,
4690						  const struct sna_opacity_box *box,
4691						  int nbox)
4692{
4693	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
4694	     __FUNCTION__, nbox,
4695	     op->base.src.offset[0], op->base.src.offset[1],
4696	     op->base.dst.x, op->base.dst.y));
4697
4698	sna_vertex_lock(&sna->render);
4699	do {
4700		int nbox_this_time;
4701		float *v;
4702
4703		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
4704		assert(nbox_this_time);
4705		nbox -= nbox_this_time;
4706
4707		v = sna->render.vertices + sna->render.vertex_used;
4708		sna->render.vertex_used += nbox_this_time * 9;
4709
4710		sna_vertex_acquire__locked(&sna->render);
4711		sna_vertex_unlock(&sna->render);
4712
4713		do {
4714			v[0] = box->box.x2;
4715			v[6] = v[3] = box->box.x1;
4716			v[4] = v[1] = box->box.y2;
4717			v[7] = box->box.y1;
4718			v[8] = v[5] = v[2] = box->alpha;
4719			v += 9;
4720			box++;
4721		} while (--nbox_this_time);
4722
4723		sna_vertex_lock(&sna->render);
4724		sna_vertex_release__locked(&sna->render);
4725	} while (nbox);
4726	sna_vertex_unlock(&sna->render);
4727}
4728
4729fastcall static void
4730gen3_render_composite_spans_box(struct sna *sna,
4731				const struct sna_composite_spans_op *op,
4732				const BoxRec *box, float opacity)
4733{
4734	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
4735	     __FUNCTION__,
4736	     op->base.src.offset[0], op->base.src.offset[1],
4737	     opacity,
4738	     op->base.dst.x, op->base.dst.y,
4739	     box->x1, box->y1,
4740	     box->x2 - box->x1,
4741	     box->y2 - box->y1));
4742
4743	gen3_get_rectangles(sna, &op->base, 1);
4744	op->prim_emit(sna, op, box, opacity);
4745}
4746
4747static void
4748gen3_render_composite_spans_boxes(struct sna *sna,
4749				  const struct sna_composite_spans_op *op,
4750				  const BoxRec *box, int nbox,
4751				  float opacity)
4752{
4753	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
4754	     __FUNCTION__, nbox,
4755	     op->base.src.offset[0], op->base.src.offset[1],
4756	     opacity,
4757	     op->base.dst.x, op->base.dst.y));
4758
4759	do {
4760		int nbox_this_time;
4761
4762		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
4763		nbox -= nbox_this_time;
4764
4765		do {
4766			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
4767			     box->x1, box->y1,
4768			     box->x2 - box->x1,
4769			     box->y2 - box->y1));
4770
4771			op->prim_emit(sna, op, box++, opacity);
4772		} while (--nbox_this_time);
4773	} while (nbox);
4774}
4775
4776fastcall static void
4777gen3_render_composite_spans_boxes__thread(struct sna *sna,
4778					  const struct sna_composite_spans_op *op,
4779					  const struct sna_opacity_box *box,
4780					  int nbox)
4781{
4782	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
4783	     __FUNCTION__, nbox,
4784	     op->base.src.offset[0], op->base.src.offset[1],
4785	     op->base.dst.x, op->base.dst.y));
4786
4787	sna_vertex_lock(&sna->render);
4788	do {
4789		int nbox_this_time;
4790		float *v;
4791
4792		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
4793		assert(nbox_this_time);
4794		nbox -= nbox_this_time;
4795
4796		v = sna->render.vertices + sna->render.vertex_used;
4797		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
4798
4799		sna_vertex_acquire__locked(&sna->render);
4800		sna_vertex_unlock(&sna->render);
4801
4802		op->emit_boxes(op, box, nbox_this_time, v);
4803		box += nbox_this_time;
4804
4805		sna_vertex_lock(&sna->render);
4806		sna_vertex_release__locked(&sna->render);
4807	} while (nbox);
4808	sna_vertex_unlock(&sna->render);
4809}
4810
4811fastcall static void
4812gen3_render_composite_spans_done(struct sna *sna,
4813				 const struct sna_composite_spans_op *op)
4814{
4815	if (sna->render.vertex_offset)
4816		gen3_vertex_flush(sna);
4817
4818	DBG(("%s()\n", __FUNCTION__));
4819
4820	if (op->base.src.bo)
4821		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
4822
4823	sna_render_composite_redirect_done(sna, &op->base);
4824}
4825
4826static bool
4827gen3_check_composite_spans(struct sna *sna,
4828			   uint8_t op, PicturePtr src, PicturePtr dst,
4829			   int16_t width, int16_t height, unsigned flags)
4830{
4831	if (op >= ARRAY_SIZE(gen3_blend_op))
4832		return false;
4833
4834	if (gen3_composite_fallback(sna, op, src, NULL, dst))
4835		return false;
4836
4837	if (need_tiling(sna, width, height) &&
4838	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
4839		DBG(("%s: fallback, tiled operation not on GPU\n",
4840		     __FUNCTION__));
4841		return false;
4842	}
4843
4844	return true;
4845}
4846
4847static bool
4848gen3_render_composite_spans(struct sna *sna,
4849			    uint8_t op,
4850			    PicturePtr src,
4851			    PicturePtr dst,
4852			    int16_t src_x,  int16_t src_y,
4853			    int16_t dst_x,  int16_t dst_y,
4854			    int16_t width,  int16_t height,
4855			    unsigned flags,
4856			    struct sna_composite_spans_op *tmp)
4857{
4858	bool no_offset;
4859
4860	DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
4861	     src_x, src_y, dst_x, dst_y, width, height));
4862
4863	assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags));
4864
4865	if (need_tiling(sna, width, height)) {
4866		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
4867		     __FUNCTION__, width, height));
4868		return sna_tiling_composite_spans(op, src, dst,
4869						  src_x, src_y, dst_x, dst_y,
4870						  width, height, flags, tmp);
4871	}
4872
4873	if (!gen3_composite_set_target(sna, &tmp->base, dst,
4874				       dst_x, dst_y, width, height,
4875				       true)) {
4876		DBG(("%s: unable to set render target\n",
4877		     __FUNCTION__));
4878		return false;
4879	}
4880
4881	tmp->base.op = op;
4882	tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
4883	tmp->base.src.u.gen3.type = SHADER_TEXTURE;
4884	tmp->base.src.is_affine = true;
4885	DBG(("%s: preparing source\n", __FUNCTION__));
4886	switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src,
4887				       src_x, src_y,
4888				       width, height,
4889				       dst_x, dst_y,
4890				       dst->polyMode == PolyModePrecise)) {
4891	case -1:
4892		goto cleanup_dst;
4893	case 0:
4894		tmp->base.src.u.gen3.type = SHADER_ZERO;
4895		break;
4896	case 1:
4897		gen3_composite_channel_convert(&tmp->base.src);
4898		break;
4899	}
4900	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type));
4901
4902	if (tmp->base.src.u.gen3.type != SHADER_ZERO)
4903		tmp->base.mask.u.gen3.type = SHADER_OPACITY;
4904
4905	no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
4906	tmp->box   = gen3_render_composite_spans_box;
4907	tmp->boxes = gen3_render_composite_spans_boxes;
4908	tmp->thread_boxes = gen3_render_composite_spans_boxes__thread;
4909	tmp->done  = gen3_render_composite_spans_done;
4910	tmp->prim_emit = gen3_emit_composite_spans_primitive;
4911	switch (tmp->base.src.u.gen3.type) {
4912	case SHADER_NONE:
4913		assert(0);
4914	case SHADER_ZERO:
4915		if (no_offset) {
4916			tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset;
4917			tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes;
4918		} else {
4919			tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
4920			tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes;
4921		}
4922		break;
4923	case SHADER_BLACK:
4924	case SHADER_WHITE:
4925	case SHADER_CONSTANT:
4926		if (no_offset) {
4927#if defined(sse2) && !defined(__x86_64__)
4928			if (sna->cpu_features & SSE2) {
4929				tmp->box = gen3_render_composite_spans_constant_box__sse2;
4930				tmp->thread_boxes = gen3_render_composite_spans_constant_thread__sse2__boxes;
4931				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2__no_offset;
4932				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes;
4933			} else
4934#endif
4935			{
4936				tmp->box = gen3_render_composite_spans_constant_box;
4937				tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes;
4938				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
4939				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes;
4940			}
4941		} else {
4942#if defined(sse2) && !defined(__x86_64__)
4943			if (sna->cpu_features & SSE2) {
4944				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2;
4945				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__boxes;
4946			} else
4947#endif
4948			{
4949				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
4950				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes;
4951			}
4952		}
4953		break;
4954	case SHADER_LINEAR:
4955	case SHADER_RADIAL:
4956		if (tmp->base.src.transform == NULL) {
4957#if defined(sse2) && !defined(__x86_64__)
4958			if (sna->cpu_features & SSE2) {
4959				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient__sse2;
4960				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes;
4961			} else
4962#endif
4963			{
4964				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
4965				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes;
4966			}
4967		} else if (tmp->base.src.is_affine) {
4968			tmp->base.src.scale[1] = tmp->base.src.scale[0] = 1. / tmp->base.src.transform->matrix[2][2];
4969#if defined(sse2) && !defined(__x86_64__)
4970			if (sna->cpu_features & SSE2) {
4971				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient__sse2;
4972				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes;
4973			} else
4974#endif
4975			{
4976				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
4977				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes;
4978			}
4979		}
4980		break;
4981	case SHADER_TEXTURE:
4982		if (tmp->base.src.transform == NULL) {
4983#if defined(sse2) && !defined(__x86_64__)
4984			if (sna->cpu_features & SSE2) {
4985				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source__sse2;
4986				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__sse2__boxes;
4987			} else
4988#endif
4989			{
4990				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
4991				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes;
4992			}
4993		} else if (tmp->base.src.is_affine) {
4994			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
4995			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
4996#if defined(sse2) && !defined(__x86_64__)
4997			if (sna->cpu_features & SSE2) {
4998				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source__sse2;
4999				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__sse2__boxes;
5000			} else
5001#endif
5002			{
5003				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
5004				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes;
5005			}
5006		}
5007		break;
5008	}
5009	if (tmp->emit_boxes == NULL)
5010		tmp->thread_boxes = NULL;
5011
5012	tmp->base.mask.bo = NULL;
5013
5014	tmp->base.floats_per_vertex = 2;
5015	if (!is_constant_ps(tmp->base.src.u.gen3.type))
5016		tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
5017	tmp->base.floats_per_vertex +=
5018		tmp->base.mask.u.gen3.type == SHADER_OPACITY;
5019	tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
5020
5021	if (!kgem_check_bo(&sna->kgem,
5022			   tmp->base.dst.bo, tmp->base.src.bo,
5023			   NULL)) {
5024		kgem_submit(&sna->kgem);
5025		if (!kgem_check_bo(&sna->kgem,
5026				   tmp->base.dst.bo, tmp->base.src.bo,
5027				   NULL))
5028			goto cleanup_src;
5029	}
5030
5031	gen3_align_vertex(sna, &tmp->base);
5032	gen3_emit_composite_state(sna, &tmp->base);
5033	return true;
5034
5035cleanup_src:
5036	if (tmp->base.src.bo)
5037		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
5038cleanup_dst:
5039	if (tmp->base.redirect.real_bo)
5040		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
5041	return false;
5042}
5043
5044static void
5045gen3_emit_video_state(struct sna *sna,
5046		      struct sna_video *video,
5047		      struct sna_video_frame *frame,
5048		      PixmapPtr pixmap,
5049		      struct kgem_bo *dst_bo,
5050		      int width, int height,
5051		      bool bilinear)
5052{
5053	struct gen3_render_state *state = &sna->render_state.gen3;
5054	uint32_t id, ms3, rewind;
5055
5056	gen3_emit_target(sna, dst_bo, width, height,
5057			 sna_format_for_depth(pixmap->drawable.depth));
5058
5059	/* XXX share with composite? Is it worth the effort? */
5060	if ((state->last_shader & (1<<31)) == 0) {
5061		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
5062			  I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
5063			  2);
5064		OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
5065		OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
5066			  S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
5067			  S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
5068			  S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
5069			  S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
5070			  S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
5071			  S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
5072			  S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
5073		OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
5074			  (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
5075			  S6_COLOR_WRITE_ENABLE);
5076
5077		state->last_blend = 0;
5078		state->floats_per_vertex = 4;
5079	}
5080
5081	if (!is_planar_fourcc(frame->id)) {
5082		rewind = sna->kgem.nbatch;
5083		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
5084		OUT_BATCH(0x0000001);	/* constant 0 */
5085		/* constant 0: brightness/contrast */
5086		OUT_BATCH_F(video->brightness / 128.0);
5087		OUT_BATCH_F(video->contrast / 255.0);
5088		OUT_BATCH_F(0.0);
5089		OUT_BATCH_F(0.0);
5090		if (state->last_constants &&
5091		    memcmp(&sna->kgem.batch[state->last_constants],
5092			   &sna->kgem.batch[rewind],
5093			   6*sizeof(uint32_t)) == 0)
5094			sna->kgem.nbatch = rewind;
5095		else
5096			state->last_constants = rewind;
5097
5098		rewind = sna->kgem.nbatch;
5099		OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
5100		OUT_BATCH(0x00000001);
5101		OUT_BATCH(SS2_COLORSPACE_CONVERSION |
5102			  (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
5103			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
5104		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
5105			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
5106			  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
5107			  SS3_NORMALIZED_COORDS);
5108		OUT_BATCH(0x00000000);
5109		if (state->last_sampler &&
5110		    memcmp(&sna->kgem.batch[state->last_sampler],
5111			   &sna->kgem.batch[rewind],
5112			   5*sizeof(uint32_t)) == 0)
5113			sna->kgem.nbatch = rewind;
5114		else
5115			state->last_sampler = rewind;
5116
5117		OUT_BATCH(_3DSTATE_MAP_STATE | 3);
5118		OUT_BATCH(0x00000001);	/* texture map #1 */
5119		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
5120					 frame->bo,
5121					 I915_GEM_DOMAIN_SAMPLER << 16,
5122					 0));
5123
5124		ms3 = MAPSURF_422;
5125		switch (frame->id) {
5126		case FOURCC_YUY2:
5127			ms3 |= MT_422_YCRCB_NORMAL;
5128			break;
5129		case FOURCC_UYVY:
5130			ms3 |= MT_422_YCRCB_SWAPY;
5131			break;
5132		}
5133		ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
5134		ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
5135		OUT_BATCH(ms3);
5136		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
5137
5138		id = 1<<31 | 1<<1 | !!video->brightness;
5139		if (state->last_shader != id) {
5140			state->last_shader = id;
5141			id = sna->kgem.nbatch++;
5142
5143			gen3_fs_dcl(FS_S0);
5144			gen3_fs_dcl(FS_T0);
5145			gen3_fs_texld(FS_OC, FS_S0, FS_T0);
5146			if (video->brightness != 0) {
5147				gen3_fs_add(FS_OC,
5148					    gen3_fs_operand_reg(FS_OC),
5149					    gen3_fs_operand(FS_C0, X, X, X, ZERO));
5150			}
5151
5152			sna->kgem.batch[id] =
5153				_3DSTATE_PIXEL_SHADER_PROGRAM |
5154				(sna->kgem.nbatch - id - 2);
5155		}
5156	} else {
5157		/* For the planar formats, we set up three samplers --
5158		 * one for each plane, in a Y8 format.  Because I
5159		 * couldn't get the special PLANAR_TO_PACKED
5160		 * shader setup to work, I did the manual pixel shader:
5161		 *
5162		 * y' = y - .0625
5163		 * u' = u - .5
5164		 * v' = v - .5;
5165		 *
5166		 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
5167		 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
5168		 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
5169		 *
5170		 * register assignment:
5171		 * r0 = (y',u',v',0)
5172		 * r1 = (y,y,y,y)
5173		 * r2 = (u,u,u,u)
5174		 * r3 = (v,v,v,v)
5175		 * OC = (r,g,b,1)
5176		 */
5177		rewind = sna->kgem.nbatch;
5178		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
5179		OUT_BATCH(0x000001f);	/* constants 0-4 */
5180		/* constant 0: normalization offsets */
5181		OUT_BATCH_F(-0.0625);
5182		OUT_BATCH_F(-0.5);
5183		OUT_BATCH_F(-0.5);
5184		OUT_BATCH_F(0.0);
5185		/* constant 1: r coefficients */
5186		OUT_BATCH_F(1.1643);
5187		OUT_BATCH_F(0.0);
5188		OUT_BATCH_F(1.5958);
5189		OUT_BATCH_F(0.0);
5190		/* constant 2: g coefficients */
5191		OUT_BATCH_F(1.1643);
5192		OUT_BATCH_F(-0.39173);
5193		OUT_BATCH_F(-0.81290);
5194		OUT_BATCH_F(0.0);
5195		/* constant 3: b coefficients */
5196		OUT_BATCH_F(1.1643);
5197		OUT_BATCH_F(2.017);
5198		OUT_BATCH_F(0.0);
5199		OUT_BATCH_F(0.0);
5200		/* constant 4: brightness/contrast */
5201		OUT_BATCH_F(video->brightness / 128.0);
5202		OUT_BATCH_F(video->contrast / 255.0);
5203		OUT_BATCH_F(0.0);
5204		OUT_BATCH_F(0.0);
5205		if (state->last_constants &&
5206		    memcmp(&sna->kgem.batch[state->last_constants],
5207			   &sna->kgem.batch[rewind],
5208			   22*sizeof(uint32_t)) == 0)
5209			sna->kgem.nbatch = rewind;
5210		else
5211			state->last_constants = rewind;
5212
5213		rewind = sna->kgem.nbatch;
5214		OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
5215		OUT_BATCH(0x00000007);
5216		/* sampler 0 */
5217		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
5218			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
5219		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
5220			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
5221			  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
5222			  SS3_NORMALIZED_COORDS);
5223		OUT_BATCH(0x00000000);
5224		/* sampler 1 */
5225		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
5226			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
5227		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
5228			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
5229			  (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
5230			  SS3_NORMALIZED_COORDS);
5231		OUT_BATCH(0x00000000);
5232		/* sampler 2 */
5233		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
5234			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
5235		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
5236			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
5237			  (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
5238			  SS3_NORMALIZED_COORDS);
5239		OUT_BATCH(0x00000000);
5240		if (state->last_sampler &&
5241		    memcmp(&sna->kgem.batch[state->last_sampler],
5242			   &sna->kgem.batch[rewind],
5243			   11*sizeof(uint32_t)) == 0)
5244			sna->kgem.nbatch = rewind;
5245		else
5246			state->last_sampler = rewind;
5247
5248		OUT_BATCH(_3DSTATE_MAP_STATE | 9);
5249		OUT_BATCH(0x00000007);
5250
5251		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
5252					 frame->bo,
5253					 I915_GEM_DOMAIN_SAMPLER << 16,
5254					 0));
5255
5256		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
5257		ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
5258		ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
5259		OUT_BATCH(ms3);
5260		/* check to see if Y has special pitch than normal
5261		 * double u/v pitch, e.g i915 XvMC hw requires at
5262		 * least 1K alignment, so Y pitch might
5263		 * be same as U/V's.*/
5264		if (frame->pitch[1])
5265			OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT);
5266		else
5267			OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT);
5268
5269		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
5270					 frame->bo,
5271					 I915_GEM_DOMAIN_SAMPLER << 16,
5272					 frame->UBufOffset));
5273
5274		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
5275		ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
5276		ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
5277		OUT_BATCH(ms3);
5278		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
5279
5280		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
5281					 frame->bo,
5282					 I915_GEM_DOMAIN_SAMPLER << 16,
5283					 frame->VBufOffset));
5284
5285		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
5286		ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
5287		ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
5288		OUT_BATCH(ms3);
5289		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
5290
5291		id = 1<<31 | 2<<1 | !!video->brightness;
5292		if (state->last_shader != id) {
5293			state->last_shader = id;
5294			id = sna->kgem.nbatch++;
5295
5296			/* Declare samplers */
5297			gen3_fs_dcl(FS_S0);	/* Y */
5298			gen3_fs_dcl(FS_S1);	/* U */
5299			gen3_fs_dcl(FS_S2);	/* V */
5300			gen3_fs_dcl(FS_T0);	/* normalized coords */
5301
5302			/* Load samplers to temporaries. */
5303			gen3_fs_texld(FS_R1, FS_S0, FS_T0);
5304			gen3_fs_texld(FS_R2, FS_S1, FS_T0);
5305			gen3_fs_texld(FS_R3, FS_S2, FS_T0);
5306
5307			/* Move the sampled YUV data in R[123] to the first
5308			 * 3 channels of R0.
5309			 */
5310			gen3_fs_mov_masked(FS_R0, MASK_X,
5311					   gen3_fs_operand_reg(FS_R1));
5312			gen3_fs_mov_masked(FS_R0, MASK_Y,
5313					   gen3_fs_operand_reg(FS_R2));
5314			gen3_fs_mov_masked(FS_R0, MASK_Z,
5315					   gen3_fs_operand_reg(FS_R3));
5316
5317			/* Normalize the YUV data */
5318			gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
5319				    gen3_fs_operand_reg(FS_C0));
5320			/* dot-product the YUV data in R0 by the vectors of
5321			 * coefficients for calculating R, G, and B, storing
5322			 * the results in the R, G, or B channels of the output
5323			 * color.  The OC results are implicitly clamped
5324			 * at the end of the program.
5325			 */
5326			gen3_fs_dp3(FS_OC, MASK_X,
5327				    gen3_fs_operand_reg(FS_R0),
5328				    gen3_fs_operand_reg(FS_C1));
5329			gen3_fs_dp3(FS_OC, MASK_Y,
5330				    gen3_fs_operand_reg(FS_R0),
5331				    gen3_fs_operand_reg(FS_C2));
5332			gen3_fs_dp3(FS_OC, MASK_Z,
5333				    gen3_fs_operand_reg(FS_R0),
5334				    gen3_fs_operand_reg(FS_C3));
5335			/* Set alpha of the output to 1.0, by wiring W to 1
5336			 * and not actually using the source.
5337			 */
5338			gen3_fs_mov_masked(FS_OC, MASK_W,
5339					   gen3_fs_operand_one());
5340
5341			if (video->brightness != 0) {
5342				gen3_fs_add(FS_OC,
5343					    gen3_fs_operand_reg(FS_OC),
5344					    gen3_fs_operand(FS_C4, X, X, X, ZERO));
5345			}
5346
5347			sna->kgem.batch[id] =
5348				_3DSTATE_PIXEL_SHADER_PROGRAM |
5349				(sna->kgem.nbatch - id - 2);
5350		}
5351	}
5352}
5353
5354static void
5355gen3_video_get_batch(struct sna *sna, struct kgem_bo *bo)
5356{
5357	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
5358
5359	if (!kgem_check_batch(&sna->kgem, 120) ||
5360	    !kgem_check_reloc(&sna->kgem, 4) ||
5361	    !kgem_check_exec(&sna->kgem, 2)) {
5362		_kgem_submit(&sna->kgem);
5363		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
5364	}
5365
5366	if (sna->render_state.gen3.need_invariant)
5367		gen3_emit_invariant(sna);
5368}
5369
5370static int
5371gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
5372{
5373	int size = floats_per_vertex * 3;
5374	int rem = batch_space(sna) - 1;
5375
5376	if (size * want > rem)
5377		want = rem / size;
5378
5379	return want;
5380}
5381
5382static bool
5383gen3_render_video(struct sna *sna,
5384		  struct sna_video *video,
5385		  struct sna_video_frame *frame,
5386		  RegionPtr dstRegion,
5387		  PixmapPtr pixmap)
5388{
5389	struct sna_pixmap *priv = sna_pixmap(pixmap);
5390	const BoxRec *pbox = region_rects(dstRegion);
5391	int nbox = region_num_rects(dstRegion);
5392	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
5393	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
5394	int src_width = frame->src.x2 - frame->src.x1;
5395	int src_height = frame->src.y2 - frame->src.y1;
5396	float src_offset_x, src_offset_y;
5397	float src_scale_x, src_scale_y;
5398	int pix_xoff, pix_yoff;
5399	struct kgem_bo *dst_bo;
5400	bool bilinear;
5401	int copy = 0;
5402
5403	DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__,
5404	     src_width, src_height, frame->width, frame->height, dst_width, dst_height));
5405
5406	assert(priv->gpu_bo);
5407	dst_bo = priv->gpu_bo;
5408
5409	bilinear = src_width != dst_width || src_height != dst_height;
5410
5411	src_scale_x = (float)src_width / dst_width / frame->width;
5412	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
5413
5414	src_scale_y = (float)src_height / dst_height / frame->height;
5415	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
5416	DBG(("%s: src offset (%f, %f), scale (%f, %f)\n",
5417	     __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y));
5418
5419	if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
5420	    !gen3_check_pitch_3d(dst_bo)) {
5421		int bpp = pixmap->drawable.bitsPerPixel;
5422
5423		if (too_large(dst_width, dst_height))
5424			return false;
5425
5426		dst_bo = kgem_create_2d(&sna->kgem,
5427					dst_width, dst_height, bpp,
5428					kgem_choose_tiling(&sna->kgem,
5429							   I915_TILING_X,
5430							   dst_width, dst_height, bpp),
5431					0);
5432		if (!dst_bo)
5433			return false;
5434
5435		pix_xoff = -dstRegion->extents.x1;
5436		pix_yoff = -dstRegion->extents.y1;
5437		copy = 1;
5438	} else {
5439		/* Set up the offset for translating from the given region
5440		 * (in screen coordinates) to the backing pixmap.
5441		 */
5442#ifdef COMPOSITE
5443		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
5444		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
5445#else
5446		pix_xoff = 0;
5447		pix_yoff = 0;
5448#endif
5449
5450		dst_width  = pixmap->drawable.width;
5451		dst_height = pixmap->drawable.height;
5452	}
5453
5454	gen3_video_get_batch(sna, dst_bo);
5455	gen3_emit_video_state(sna, video, frame, pixmap,
5456			      dst_bo, dst_width, dst_height, bilinear);
5457	do {
5458		int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
5459		if (nbox_this_time == 0) {
5460			gen3_video_get_batch(sna, dst_bo);
5461			gen3_emit_video_state(sna, video, frame, pixmap,
5462					      dst_bo, dst_width, dst_height, bilinear);
5463			nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
5464			assert(nbox_this_time);
5465		}
5466		nbox -= nbox_this_time;
5467
5468		OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
5469		do {
5470			int box_x1 = pbox->x1;
5471			int box_y1 = pbox->y1;
5472			int box_x2 = pbox->x2;
5473			int box_y2 = pbox->y2;
5474
5475			pbox++;
5476
5477			DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n",
5478			     __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff,
5479			     box_x1 * src_scale_x + src_offset_x,
5480			     box_y1 * src_scale_y + src_offset_y,
5481			     box_x2 * src_scale_x + src_offset_x,
5482			     box_y2 * src_scale_y + src_offset_y));
5483
5484			/* bottom right */
5485			OUT_BATCH_F(box_x2 + pix_xoff);
5486			OUT_BATCH_F(box_y2 + pix_yoff);
5487			OUT_BATCH_F(box_x2 * src_scale_x + src_offset_x);
5488			OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
5489
5490			/* bottom left */
5491			OUT_BATCH_F(box_x1 + pix_xoff);
5492			OUT_BATCH_F(box_y2 + pix_yoff);
5493			OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
5494			OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
5495
5496			/* top left */
5497			OUT_BATCH_F(box_x1 + pix_xoff);
5498			OUT_BATCH_F(box_y1 + pix_yoff);
5499			OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
5500			OUT_BATCH_F(box_y1 * src_scale_y + src_offset_y);
5501		} while (--nbox_this_time);
5502	} while (nbox);
5503
5504	if (copy) {
5505#ifdef COMPOSITE
5506		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
5507		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
5508#else
5509		pix_xoff = 0;
5510		pix_yoff = 0;
5511#endif
5512		sna_blt_copy_boxes(sna, GXcopy,
5513				   dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1,
5514				   priv->gpu_bo, pix_xoff, pix_yoff,
5515				   pixmap->drawable.bitsPerPixel,
5516				   region_rects(dstRegion),
5517				   region_num_rects(dstRegion));
5518
5519		kgem_bo_destroy(&sna->kgem, dst_bo);
5520	}
5521
5522	if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
5523		if ((pix_xoff | pix_yoff) == 0) {
5524			sna_damage_add(&priv->gpu_damage, dstRegion);
5525			sna_damage_subtract(&priv->cpu_damage, dstRegion);
5526		} else {
5527			sna_damage_add_boxes(&priv->gpu_damage,
5528					     region_rects(dstRegion),
5529					     region_num_rects(dstRegion),
5530					     pix_xoff, pix_yoff);
5531			sna_damage_subtract_boxes(&priv->cpu_damage,
5532						  region_rects(dstRegion),
5533						  region_num_rects(dstRegion),
5534						  pix_xoff, pix_yoff);
5535		}
5536	}
5537
5538	return true;
5539}
5540
5541static void
5542gen3_render_copy_setup_source(struct sna_composite_channel *channel,
5543			      const DrawableRec *draw,
5544			      struct kgem_bo *bo)
5545{
5546	int i;
5547
5548	channel->u.gen3.type = SHADER_TEXTURE;
5549	channel->filter = gen3_filter(PictFilterNearest);
5550	channel->repeat = gen3_texture_repeat(RepeatNone);
5551	channel->width  = draw->width;
5552	channel->height = draw->height;
5553	channel->scale[0] = 1.f/draw->width;
5554	channel->scale[1] = 1.f/draw->height;
5555	channel->offset[0] = 0;
5556	channel->offset[1] = 0;
5557
5558	channel->pict_format = sna_format_for_depth(draw->depth);
5559	if (!gen3_composite_channel_set_format(channel, channel->pict_format)) {
5560		for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
5561			if (gen3_tex_formats[i].xfmt == channel->pict_format) {
5562				channel->card_format = gen3_tex_formats[i].card_fmt;
5563				channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
5564				channel->alpha_fixup = true;
5565				break;
5566			}
5567		}
5568	}
5569	assert(channel->card_format);
5570
5571	channel->bo = bo;
5572	channel->is_affine = 1;
5573}
5574
5575static bool
5576gen3_render_copy_boxes(struct sna *sna, uint8_t alu,
5577		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
5578		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
5579		       const BoxRec *box, int n, unsigned flags)
5580{
5581	struct sna_composite_op tmp;
5582
5583#if NO_COPY_BOXES
5584	if (!sna_blt_compare_depth(src, dst))
5585		return false;
5586
5587	return sna_blt_copy_boxes(sna, alu,
5588				  src_bo, src_dx, src_dy,
5589				  dst_bo, dst_dx, dst_dy,
5590				  dst->bitsPerPixel,
5591				  box, n);
5592#endif
5593
5594	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
5595	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
5596
5597	if (sna_blt_compare_depth(src, dst) &&
5598	    sna_blt_copy_boxes(sna, alu,
5599			       src_bo, src_dx, src_dy,
5600			       dst_bo, dst_dx, dst_dy,
5601			       dst->bitsPerPixel,
5602			       box, n))
5603		return true;
5604
5605	if (!(alu == GXcopy || alu == GXclear) ||
5606	    src_bo == dst_bo || /* XXX handle overlap using 3D ? */
5607	    src_bo->pitch > MAX_3D_PITCH ||
5608	    too_large(src->width, src->height)) {
5609fallback_blt:
5610		if (!kgem_bo_can_blt(&sna->kgem, src_bo) ||
5611		    !kgem_bo_can_blt(&sna->kgem, dst_bo))
5612			return false;
5613
5614		return sna_blt_copy_boxes_fallback(sna, alu,
5615						   src, src_bo, src_dx, src_dy,
5616						   dst, dst_bo, dst_dx, dst_dy,
5617						   box, n);
5618	}
5619
5620	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
5621		kgem_submit(&sna->kgem);
5622		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
5623			goto fallback_blt;
5624	}
5625
5626	memset(&tmp, 0, sizeof(tmp));
5627	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
5628
5629	tmp.dst.pixmap = (PixmapPtr)dst;
5630	tmp.dst.width = dst->width;
5631	tmp.dst.height = dst->height;
5632	tmp.dst.format = sna_format_for_depth(dst->depth);
5633	tmp.dst.bo = dst_bo;
5634	tmp.dst.x = tmp.dst.y = 0;
5635	tmp.damage = NULL;
5636
5637	sna_render_composite_redirect_init(&tmp);
5638	if (too_large(tmp.dst.width, tmp.dst.height) ||
5639	    dst_bo->pitch > MAX_3D_PITCH) {
5640		BoxRec extents = box[0];
5641		int i;
5642
5643		for (i = 1; i < n; i++) {
5644			if (box[i].x1 < extents.x1)
5645				extents.x1 = box[i].x1;
5646			if (box[i].y1 < extents.y1)
5647				extents.y1 = box[i].y1;
5648
5649			if (box[i].x2 > extents.x2)
5650				extents.x2 = box[i].x2;
5651			if (box[i].y2 > extents.y2)
5652				extents.y2 = box[i].y2;
5653		}
5654		if (!sna_render_composite_redirect(sna, &tmp,
5655						   extents.x1 + dst_dx,
5656						   extents.y1 + dst_dy,
5657						   extents.x2 - extents.x1,
5658						   extents.y2 - extents.y1,
5659						   n > 1))
5660			goto fallback_tiled;
5661	}
5662
5663	gen3_render_copy_setup_source(&tmp.src, src, src_bo);
5664
5665	tmp.floats_per_vertex = 4;
5666	tmp.floats_per_rect = 12;
5667	tmp.mask.bo = NULL;
5668	tmp.mask.u.gen3.type = SHADER_NONE;
5669
5670	dst_dx += tmp.dst.x;
5671	dst_dy += tmp.dst.y;
5672	tmp.dst.x = tmp.dst.y = 0;
5673
5674	gen3_align_vertex(sna, &tmp);
5675	gen3_emit_composite_state(sna, &tmp);
5676
5677	do {
5678		int n_this_time;
5679
5680		n_this_time = gen3_get_rectangles(sna, &tmp, n);
5681		n -= n_this_time;
5682
5683		do {
5684			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
5685			     box->x1 + src_dx, box->y1 + src_dy,
5686			     box->x1 + dst_dx, box->y1 + dst_dy,
5687			     box->x2 - box->x1, box->y2 - box->y1));
5688			OUT_VERTEX(box->x2 + dst_dx);
5689			OUT_VERTEX(box->y2 + dst_dy);
5690			OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
5691			OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
5692
5693			OUT_VERTEX(box->x1 + dst_dx);
5694			OUT_VERTEX(box->y2 + dst_dy);
5695			OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
5696			OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
5697
5698			OUT_VERTEX(box->x1 + dst_dx);
5699			OUT_VERTEX(box->y1 + dst_dy);
5700			OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
5701			OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
5702
5703			box++;
5704		} while (--n_this_time);
5705	} while (n);
5706
5707	gen3_vertex_flush(sna);
5708	sna_render_composite_redirect_done(sna, &tmp);
5709	return true;
5710
5711fallback_tiled:
5712	return sna_tiling_copy_boxes(sna, alu,
5713				     src, src_bo, src_dx, src_dy,
5714				     dst, dst_bo, dst_dx, dst_dy,
5715				     box, n);
5716}
5717
5718static void
5719gen3_render_copy_blt(struct sna *sna,
5720		     const struct sna_copy_op *op,
5721		     int16_t sx, int16_t sy,
5722		     int16_t w, int16_t h,
5723		     int16_t dx, int16_t dy)
5724{
5725	gen3_get_rectangles(sna, &op->base, 1);
5726
5727	OUT_VERTEX(dx+w);
5728	OUT_VERTEX(dy+h);
5729	OUT_VERTEX((sx+w)*op->base.src.scale[0]);
5730	OUT_VERTEX((sy+h)*op->base.src.scale[1]);
5731
5732	OUT_VERTEX(dx);
5733	OUT_VERTEX(dy+h);
5734	OUT_VERTEX(sx*op->base.src.scale[0]);
5735	OUT_VERTEX((sy+h)*op->base.src.scale[1]);
5736
5737	OUT_VERTEX(dx);
5738	OUT_VERTEX(dy);
5739	OUT_VERTEX(sx*op->base.src.scale[0]);
5740	OUT_VERTEX(sy*op->base.src.scale[1]);
5741}
5742
5743static void
5744gen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
5745{
5746	if (sna->render.vertex_offset)
5747		gen3_vertex_flush(sna);
5748}
5749
5750static bool
5751gen3_render_copy(struct sna *sna, uint8_t alu,
5752		 PixmapPtr src, struct kgem_bo *src_bo,
5753		 PixmapPtr dst, struct kgem_bo *dst_bo,
5754		 struct sna_copy_op *tmp)
5755{
5756#if NO_COPY
5757	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
5758		return false;
5759
5760	return sna_blt_copy(sna, alu,
5761			    src_bo, dst_bo,
5762			    dst->drawable.bitsPerPixel,
5763			    tmp);
5764#endif
5765
5766	/* Prefer to use the BLT */
5767	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
5768	    sna_blt_copy(sna, alu,
5769			 src_bo, dst_bo,
5770			 dst->drawable.bitsPerPixel,
5771			 tmp))
5772		return true;
5773
5774	/* Must use the BLT if we can't RENDER... */
5775	if (!(alu == GXcopy || alu == GXclear) ||
5776	    too_large(src->drawable.width, src->drawable.height) ||
5777	    too_large(dst->drawable.width, dst->drawable.height) ||
5778	    src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch > MAX_3D_PITCH) {
5779fallback:
5780		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
5781			return false;
5782
5783		return sna_blt_copy(sna, alu, src_bo, dst_bo,
5784				    dst->drawable.bitsPerPixel,
5785				    tmp);
5786	}
5787
5788	tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
5789
5790	tmp->base.dst.pixmap = dst;
5791	tmp->base.dst.width = dst->drawable.width;
5792	tmp->base.dst.height = dst->drawable.height;
5793	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
5794	tmp->base.dst.bo = dst_bo;
5795
5796	gen3_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo);
5797
5798	tmp->base.floats_per_vertex = 4;
5799	tmp->base.floats_per_rect = 12;
5800	tmp->base.mask.bo = NULL;
5801	tmp->base.mask.u.gen3.type = SHADER_NONE;
5802
5803	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
5804		kgem_submit(&sna->kgem);
5805		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
5806			goto fallback;
5807	}
5808
5809	tmp->blt  = gen3_render_copy_blt;
5810	tmp->done = gen3_render_copy_done;
5811
5812	gen3_align_vertex(sna, &tmp->base);
5813	gen3_emit_composite_state(sna, &tmp->base);
5814	return true;
5815}
5816
5817static bool
5818gen3_render_fill_boxes_try_blt(struct sna *sna,
5819			       CARD8 op, PictFormat format,
5820			       const xRenderColor *color,
5821			       const DrawableRec *dst, struct kgem_bo *dst_bo,
5822			       const BoxRec *box, int n)
5823{
5824	uint8_t alu;
5825	uint32_t pixel;
5826
5827	if (dst_bo->tiling == I915_TILING_Y) {
5828		DBG(("%s: y-tiling, can't blit\n", __FUNCTION__));
5829		assert(!too_large(dst->width, dst->height));
5830		return false;
5831	}
5832
5833	if (op > PictOpSrc)
5834		return false;
5835
5836	if (op == PictOpClear) {
5837		alu = GXclear;
5838		pixel = 0;
5839	} else if (!sna_get_pixel_from_rgba(&pixel,
5840					    color->red,
5841					    color->green,
5842					    color->blue,
5843					    color->alpha,
5844					    format))
5845		return false;
5846	else
5847		alu = GXcopy;
5848
5849	return sna_blt_fill_boxes(sna, alu,
5850				  dst_bo, dst->bitsPerPixel,
5851				  pixel, box, n);
5852}
5853
5854static inline bool prefer_fill_blt(struct sna *sna)
5855{
5856#if PREFER_BLT_FILL
5857	return true;
5858#else
5859	return sna->kgem.mode != KGEM_RENDER;
5860#endif
5861}
5862
5863static bool
5864gen3_render_fill_boxes(struct sna *sna,
5865		       CARD8 op,
5866		       PictFormat format,
5867		       const xRenderColor *color,
5868		       const DrawableRec *dst, struct kgem_bo *dst_bo,
5869		       const BoxRec *box, int n)
5870{
5871	struct sna_composite_op tmp;
5872	uint32_t pixel;
5873
5874	if (op >= ARRAY_SIZE(gen3_blend_op)) {
5875		DBG(("%s: fallback due to unhandled blend op: %d\n",
5876		     __FUNCTION__, op));
5877		return false;
5878	}
5879
5880#if NO_FILL_BOXES
5881	return gen3_render_fill_boxes_try_blt(sna, op, format, color,
5882					      dst, dst_bo,
5883					      box, n);
5884#endif
5885
5886	DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
5887	     __FUNCTION__, op, (int)format,
5888	     color->red, color->green, color->blue, color->alpha));
5889
5890	if (too_large(dst->width, dst->height) ||
5891	    dst_bo->pitch > MAX_3D_PITCH ||
5892	    !gen3_check_dst_format(format)) {
5893		DBG(("%s: try blt, too large or incompatible destination\n",
5894		     __FUNCTION__));
5895		if (gen3_render_fill_boxes_try_blt(sna, op, format, color,
5896						   dst, dst_bo,
5897						   box, n))
5898			return true;
5899
5900		if (!gen3_check_dst_format(format))
5901			return false;
5902
5903		return sna_tiling_fill_boxes(sna, op, format, color,
5904					     dst, dst_bo, box, n);
5905	}
5906
5907	if (prefer_fill_blt(sna) &&
5908	    gen3_render_fill_boxes_try_blt(sna, op, format, color,
5909					   dst, dst_bo,
5910					   box, n))
5911		return true;
5912
5913	if (op == PictOpClear) {
5914		pixel = 0;
5915	} else {
5916		if (!sna_get_pixel_from_rgba(&pixel,
5917					     color->red,
5918					     color->green,
5919					     color->blue,
5920					     color->alpha,
5921					     PICT_a8r8g8b8)) {
5922			assert(0);
5923			return false;
5924		}
5925	}
5926	DBG(("%s: using shader for op=%d, format=%08x, pixel=%08x\n",
5927	     __FUNCTION__, op, (int)format, pixel));
5928
5929	tmp.op = op;
5930	tmp.dst.pixmap = (PixmapPtr)dst;
5931	tmp.dst.width = dst->width;
5932	tmp.dst.height = dst->height;
5933	tmp.dst.format = format;
5934	tmp.dst.bo = dst_bo;
5935	tmp.damage = NULL;
5936	tmp.floats_per_vertex = 2;
5937	tmp.floats_per_rect = 6;
5938	tmp.rb_reversed = 0;
5939	tmp.has_component_alpha = 0;
5940	tmp.need_magic_ca_pass = false;
5941
5942	gen3_init_solid(&tmp.src, pixel);
5943	tmp.mask.bo = NULL;
5944	tmp.mask.u.gen3.type = SHADER_NONE;
5945	tmp.u.gen3.num_constants = 0;
5946
5947	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
5948		kgem_submit(&sna->kgem);
5949		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
5950			return false;
5951	}
5952
5953	gen3_align_vertex(sna, &tmp);
5954	gen3_emit_composite_state(sna, &tmp);
5955
5956	do {
5957		int n_this_time;
5958
5959		n_this_time = gen3_get_rectangles(sna, &tmp, n);
5960		n -= n_this_time;
5961
5962		do {
5963			DBG(("	(%d, %d), (%d, %d): %x\n",
5964			     box->x1, box->y1, box->x2, box->y2, pixel));
5965			OUT_VERTEX(box->x2);
5966			OUT_VERTEX(box->y2);
5967			OUT_VERTEX(box->x1);
5968			OUT_VERTEX(box->y2);
5969			OUT_VERTEX(box->x1);
5970			OUT_VERTEX(box->y1);
5971			box++;
5972		} while (--n_this_time);
5973	} while (n);
5974
5975	gen3_vertex_flush(sna);
5976	return true;
5977}
5978
5979static void
5980gen3_render_fill_op_blt(struct sna *sna,
5981			const struct sna_fill_op *op,
5982			int16_t x, int16_t y, int16_t w, int16_t h)
5983{
5984	gen3_get_rectangles(sna, &op->base, 1);
5985
5986	OUT_VERTEX(x+w);
5987	OUT_VERTEX(y+h);
5988	OUT_VERTEX(x);
5989	OUT_VERTEX(y+h);
5990	OUT_VERTEX(x);
5991	OUT_VERTEX(y);
5992}
5993
5994fastcall static void
5995gen3_render_fill_op_box(struct sna *sna,
5996			const struct sna_fill_op *op,
5997			const BoxRec *box)
5998{
5999	gen3_get_rectangles(sna, &op->base, 1);
6000
6001	OUT_VERTEX(box->x2);
6002	OUT_VERTEX(box->y2);
6003	OUT_VERTEX(box->x1);
6004	OUT_VERTEX(box->y2);
6005	OUT_VERTEX(box->x1);
6006	OUT_VERTEX(box->y1);
6007}
6008
6009fastcall static void
6010gen3_render_fill_op_boxes(struct sna *sna,
6011			  const struct sna_fill_op *op,
6012			  const BoxRec *box,
6013			  int nbox)
6014{
6015	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
6016	     box->x1, box->y1, box->x2, box->y2, nbox));
6017
6018	do {
6019		int nbox_this_time;
6020
6021		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
6022		nbox -= nbox_this_time;
6023
6024		do {
6025			OUT_VERTEX(box->x2);
6026			OUT_VERTEX(box->y2);
6027			OUT_VERTEX(box->x1);
6028			OUT_VERTEX(box->y2);
6029			OUT_VERTEX(box->x1);
6030			OUT_VERTEX(box->y1);
6031			box++;
6032		} while (--nbox_this_time);
6033	} while (nbox);
6034}
6035
6036static void
6037gen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
6038{
6039	if (sna->render.vertex_offset)
6040		gen3_vertex_flush(sna);
6041}
6042
6043static bool
6044gen3_render_fill(struct sna *sna, uint8_t alu,
6045		 PixmapPtr dst, struct kgem_bo *dst_bo,
6046		 uint32_t color, unsigned flags,
6047		 struct sna_fill_op *tmp)
6048{
6049#if NO_FILL
6050	return sna_blt_fill(sna, alu,
6051			    dst_bo, dst->drawable.bitsPerPixel,
6052			    color,
6053			    tmp);
6054#endif
6055
6056	/* Prefer to use the BLT if already engaged */
6057	if (prefer_fill_blt(sna) &&
6058	    sna_blt_fill(sna, alu,
6059			 dst_bo, dst->drawable.bitsPerPixel,
6060			 color,
6061			 tmp))
6062		return true;
6063
6064	/* Must use the BLT if we can't RENDER... */
6065	if (!(alu == GXcopy || alu == GXclear) ||
6066	    too_large(dst->drawable.width, dst->drawable.height) ||
6067	    dst_bo->pitch > MAX_3D_PITCH)
6068		return sna_blt_fill(sna, alu,
6069				    dst_bo, dst->drawable.bitsPerPixel,
6070				    color,
6071				    tmp);
6072
6073	if (alu == GXclear)
6074		color = 0;
6075
6076	tmp->base.op = color == 0 ? PictOpClear : PictOpSrc;
6077	tmp->base.dst.pixmap = dst;
6078	tmp->base.dst.width = dst->drawable.width;
6079	tmp->base.dst.height = dst->drawable.height;
6080	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
6081	tmp->base.dst.bo = dst_bo;
6082	tmp->base.floats_per_vertex = 2;
6083	tmp->base.floats_per_rect = 6;
6084	tmp->base.need_magic_ca_pass = 0;
6085	tmp->base.has_component_alpha = 0;
6086	tmp->base.rb_reversed = 0;
6087
6088	gen3_init_solid(&tmp->base.src,
6089			sna_rgba_for_color(color, dst->drawable.depth));
6090	tmp->base.mask.bo = NULL;
6091	tmp->base.mask.u.gen3.type = SHADER_NONE;
6092	tmp->base.u.gen3.num_constants = 0;
6093
6094	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
6095		kgem_submit(&sna->kgem);
6096		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
6097			return false;
6098	}
6099
6100	tmp->blt   = gen3_render_fill_op_blt;
6101	tmp->box   = gen3_render_fill_op_box;
6102	tmp->boxes = gen3_render_fill_op_boxes;
6103	tmp->points = NULL;
6104	tmp->done  = gen3_render_fill_op_done;
6105
6106	gen3_align_vertex(sna, &tmp->base);
6107	gen3_emit_composite_state(sna, &tmp->base);
6108	return true;
6109}
6110
6111static bool
6112gen3_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
6113			     uint32_t color,
6114			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
6115			     uint8_t alu)
6116{
6117	BoxRec box;
6118
6119	box.x1 = x1;
6120	box.y1 = y1;
6121	box.x2 = x2;
6122	box.y2 = y2;
6123
6124	return sna_blt_fill_boxes(sna, alu,
6125				  bo, dst->drawable.bitsPerPixel,
6126				  color, &box, 1);
6127}
6128
6129static bool
6130gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
6131		     uint32_t color,
6132		     int16_t x1, int16_t y1,
6133		     int16_t x2, int16_t y2,
6134		     uint8_t alu)
6135{
6136	struct sna_composite_op tmp;
6137
6138#if NO_FILL_ONE
6139	return gen3_render_fill_one_try_blt(sna, dst, bo, color,
6140					    x1, y1, x2, y2, alu);
6141#endif
6142
6143	/* Prefer to use the BLT if already engaged */
6144	if (prefer_fill_blt(sna) &&
6145	    gen3_render_fill_one_try_blt(sna, dst, bo, color,
6146					 x1, y1, x2, y2, alu))
6147		return true;
6148
6149	/* Must use the BLT if we can't RENDER... */
6150	if (!(alu == GXcopy || alu == GXclear) ||
6151	    too_large(dst->drawable.width, dst->drawable.height) ||
6152	    bo->pitch > MAX_3D_PITCH)
6153		return gen3_render_fill_one_try_blt(sna, dst, bo, color,
6154						    x1, y1, x2, y2, alu);
6155
6156	if (alu == GXclear)
6157		color = 0;
6158
6159	tmp.op = color == 0 ? PictOpClear : PictOpSrc;
6160	tmp.dst.pixmap = dst;
6161	tmp.dst.width = dst->drawable.width;
6162	tmp.dst.height = dst->drawable.height;
6163	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
6164	tmp.dst.bo = bo;
6165	tmp.floats_per_vertex = 2;
6166	tmp.floats_per_rect = 6;
6167	tmp.need_magic_ca_pass = 0;
6168	tmp.has_component_alpha = 0;
6169	tmp.rb_reversed = 0;
6170
6171	gen3_init_solid(&tmp.src,
6172			sna_rgba_for_color(color, dst->drawable.depth));
6173	tmp.mask.bo = NULL;
6174	tmp.mask.u.gen3.type = SHADER_NONE;
6175	tmp.u.gen3.num_constants = 0;
6176
6177	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
6178		kgem_submit(&sna->kgem);
6179
6180		if (gen3_render_fill_one_try_blt(sna, dst, bo, color,
6181						 x1, y1, x2, y2, alu))
6182			return true;
6183
6184		if (!kgem_check_bo(&sna->kgem, bo, NULL))
6185			return false;
6186	}
6187
6188	gen3_align_vertex(sna, &tmp);
6189	gen3_emit_composite_state(sna, &tmp);
6190	gen3_get_rectangles(sna, &tmp, 1);
6191	DBG(("	(%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color));
6192	OUT_VERTEX(x2);
6193	OUT_VERTEX(y2);
6194	OUT_VERTEX(x1);
6195	OUT_VERTEX(y2);
6196	OUT_VERTEX(x1);
6197	OUT_VERTEX(y1);
6198	gen3_vertex_flush(sna);
6199
6200	return true;
6201}
6202
6203static void gen3_render_flush(struct sna *sna)
6204{
6205	gen3_vertex_close(sna);
6206
6207	assert(sna->render.vertex_reloc[0] == 0);
6208	assert(sna->render.vertex_offset == 0);
6209}
6210
6211static void
6212gen3_render_fini(struct sna *sna)
6213{
6214}
6215
6216const char *gen3_render_init(struct sna *sna, const char *backend)
6217{
6218	struct sna_render *render = &sna->render;
6219
6220#if !NO_COMPOSITE
6221	render->composite = gen3_render_composite;
6222	render->prefer_gpu |= PREFER_GPU_RENDER;
6223#endif
6224#if !NO_COMPOSITE_SPANS
6225	render->check_composite_spans = gen3_check_composite_spans;
6226	render->composite_spans = gen3_render_composite_spans;
6227	render->prefer_gpu |= PREFER_GPU_SPANS;
6228#endif
6229
6230	render->video = gen3_render_video;
6231
6232	render->copy_boxes = gen3_render_copy_boxes;
6233	render->copy = gen3_render_copy;
6234
6235	render->fill_boxes = gen3_render_fill_boxes;
6236	render->fill = gen3_render_fill;
6237	render->fill_one = gen3_render_fill_one;
6238
6239	render->reset = gen3_render_reset;
6240	render->flush = gen3_render_flush;
6241	render->fini = gen3_render_fini;
6242
6243	render->max_3d_size = MAX_3D_SIZE;
6244	render->max_3d_pitch = MAX_3D_PITCH;
6245
6246	sna->kgem.retire = gen3_render_retire;
6247	sna->kgem.expire = gen3_render_expire;
6248	return "Alviso (gen3)";
6249}
6250