1/*
2 * Copyright © 2006,2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Wang Zhenyu <zhenyu.z.wang@intel.com>
25 *    Eric Anholt <eric@anholt.net>
26 *    Chris Wilson <chris@chris-wilson.co.uk>
27 *
28 */
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include "sna.h"
35#include "sna_reg.h"
36#include "sna_render.h"
37#include "sna_render_inline.h"
38#include "sna_video.h"
39
40#include "gen2_render.h"
41
42#define NO_COMPOSITE 0
43#define NO_COMPOSITE_SPANS 0
44#define NO_COPY 0
45#define NO_COPY_BOXES 0
46#define NO_FILL 0
47#define NO_FILL_ONE 0
48#define NO_FILL_BOXES 0
49
50#define MAX_3D_SIZE 2048
51#define MAX_3D_PITCH 8192
52#define MAX_INLINE (1 << 18)
53
54#define BATCH(v) batch_emit(sna, v)
55#define BATCH_ALIGNED(v, a) batch_emit_aligned(sna, v, a)
56#define BATCH_F(v) batch_emit_float(sna, v)
57#define VERTEX(v) batch_emit_float(sna, v)
58
59static const struct blendinfo {
60	bool dst_alpha;
61	bool src_alpha;
62	uint32_t src_blend;
63	uint32_t dst_blend;
64} gen2_blend_op[] = {
65	/* Clear */
66	{0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO},
67	/* Src */
68	{0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO},
69	/* Dst */
70	{0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE},
71	/* Over */
72	{0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA},
73	/* OverReverse */
74	{1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE},
75	/* In */
76	{1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO},
77	/* InReverse */
78	{0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA},
79	/* Out */
80	{1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO},
81	/* OutReverse */
82	{0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA},
83	/* Atop */
84	{1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
85	/* AtopReverse */
86	{1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA},
87	/* Xor */
88	{1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
89	/* Add */
90	{0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE},
91};
92
93static const struct formatinfo {
94	unsigned int fmt;
95	uint32_t card_fmt;
96} i8xx_tex_formats[] = {
97	{PICT_a8, MAPSURF_8BIT | MT_8BIT_A8},
98	{PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888},
99	{PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888},
100	{PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565},
101	{PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555},
102	{PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444},
103}, i85x_tex_formats[] = {
104	{PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888},
105	{PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
106};
107
108static inline bool
109too_large(int width, int height)
110{
111	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
112}
113
114static inline uint32_t
115gen2_buf_tiling(uint32_t tiling)
116{
117	uint32_t v = 0;
118	switch (tiling) {
119	default: assert(0);
120	case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
121	case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
122	case I915_TILING_NONE: break;
123	}
124	return v;
125}
126
127static uint32_t
128gen2_get_dst_format(uint32_t format)
129{
130#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8)
131	switch (format) {
132	default:
133		assert(0);
134	case PICT_a8r8g8b8:
135	case PICT_x8r8g8b8:
136		return COLR_BUF_ARGB8888 | BIAS;
137	case PICT_r5g6b5:
138		return COLR_BUF_RGB565 | BIAS;
139	case PICT_a1r5g5b5:
140	case PICT_x1r5g5b5:
141		return COLR_BUF_ARGB1555 | BIAS;
142	case PICT_a8:
143		return COLR_BUF_8BIT | BIAS;
144	case PICT_a4r4g4b4:
145	case PICT_x4r4g4b4:
146		return COLR_BUF_ARGB4444 | BIAS;
147	}
148#undef BIAS
149}
150
151static bool
152gen2_check_dst_format(uint32_t format)
153{
154	switch (format) {
155	case PICT_a8r8g8b8:
156	case PICT_x8r8g8b8:
157	case PICT_r5g6b5:
158	case PICT_a1r5g5b5:
159	case PICT_x1r5g5b5:
160	case PICT_a8:
161	case PICT_a4r4g4b4:
162	case PICT_x4r4g4b4:
163		return true;
164	default:
165		return false;
166	}
167}
168
169static uint32_t
170gen2_get_card_format(struct sna *sna, uint32_t format)
171{
172	unsigned int i;
173
174	for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++)
175		if (i8xx_tex_formats[i].fmt == format)
176			return i8xx_tex_formats[i].card_fmt;
177
178	if (sna->kgem.gen < 021) {
179		/* Whilst these are not directly supported on 830/845,
180		 * we only enable them when we can implicitly convert
181		 * them to a supported variant through the texture
182		 * combiners.
183		 */
184		for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
185			if (i85x_tex_formats[i].fmt == format)
186				return i8xx_tex_formats[1+i].card_fmt;
187	} else {
188		for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
189			if (i85x_tex_formats[i].fmt == format)
190				return i85x_tex_formats[i].card_fmt;
191	}
192
193	assert(0);
194	return 0;
195}
196
197static uint32_t
198gen2_check_format(struct sna *sna, PicturePtr p)
199{
200	unsigned int i;
201
202	for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++)
203		if (i8xx_tex_formats[i].fmt == p->format)
204			return true;
205
206	if (sna->kgem.gen > 021) {
207		for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
208			if (i85x_tex_formats[i].fmt == p->format)
209				return true;
210	}
211
212	return false;
213}
214
215static uint32_t
216gen2_sampler_tiling_bits(uint32_t tiling)
217{
218	uint32_t bits = 0;
219	switch (tiling) {
220	default:
221		assert(0);
222	case I915_TILING_Y:
223		bits |= TM0S1_TILE_WALK;
224	case I915_TILING_X:
225		bits |= TM0S1_TILED_SURFACE;
226	case I915_TILING_NONE:
227		break;
228	}
229	return bits;
230}
231
232static bool
233gen2_check_filter(PicturePtr picture)
234{
235	switch (picture->filter) {
236	case PictFilterNearest:
237	case PictFilterBilinear:
238		return true;
239	default:
240		return false;
241	}
242}
243
244static bool
245gen2_check_repeat(PicturePtr picture)
246{
247	if (!picture->repeat)
248		return true;
249
250	switch (picture->repeatType) {
251	case RepeatNone:
252	case RepeatNormal:
253	case RepeatPad:
254	case RepeatReflect:
255		return true;
256	default:
257		return false;
258	}
259}
260
261static void
262gen2_emit_texture(struct sna *sna,
263		  const struct sna_composite_channel *channel,
264		  int unit)
265{
266	uint32_t wrap_mode_u, wrap_mode_v;
267	uint32_t texcoordtype;
268	uint32_t filter;
269
270	assert(channel->bo);
271
272	if (channel->is_affine)
273		texcoordtype = TEXCOORDTYPE_CARTESIAN;
274	else
275		texcoordtype = TEXCOORDTYPE_HOMOGENEOUS;
276
277	switch (channel->repeat) {
278	default:
279		assert(0);
280	case RepeatNone:
281		wrap_mode_u = TEXCOORDMODE_CLAMP_BORDER;
282		break;
283	case RepeatNormal:
284		wrap_mode_u = TEXCOORDMODE_WRAP;
285		break;
286	case RepeatPad:
287		wrap_mode_u = TEXCOORDMODE_CLAMP;
288		break;
289	case RepeatReflect:
290		wrap_mode_u = TEXCOORDMODE_MIRROR;
291		break;
292	}
293	if (channel->is_linear)
294		wrap_mode_v = TEXCOORDMODE_WRAP;
295	else
296		wrap_mode_v = wrap_mode_u;
297
298	switch (channel->filter) {
299	default:
300		assert(0);
301	case PictFilterNearest:
302		filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT |
303			  FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT |
304			  MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
305		break;
306	case PictFilterBilinear:
307		filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT |
308			  FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT |
309			  MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
310		break;
311	}
312
313	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(unit) | 4);
314	BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
315			     channel->bo,
316			     I915_GEM_DOMAIN_SAMPLER << 16,
317			     0));
318	BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) |
319	      ((channel->width - 1)  << TM0S1_WIDTH_SHIFT) |
320	      gen2_get_card_format(sna, channel->pict_format) |
321	      gen2_sampler_tiling_bits(channel->bo->tiling));
322	BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D);
323	BATCH(filter);
324	BATCH(0);	/* default color */
325
326	BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) |
327	      ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | texcoordtype |
328	      ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode_v) |
329	      ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode_u));
330}
331
332static void
333gen2_get_blend_factors(const struct sna_composite_op *op,
334		       int blend,
335		       uint32_t *c_out,
336		       uint32_t *a_out)
337{
338	uint32_t cblend, ablend;
339
340	/* If component alpha is active in the mask and the blend operation
341	 * uses the source alpha, then we know we don't need the source
342	 * value (otherwise we would have hit a fallback earlier), so we
343	 * provide the source alpha (src.A * mask.X) as output color.
344	 * Conversely, if CA is set and we don't need the source alpha, then
345	 * we produce the source value (src.X * mask.X) and the source alpha
346	 * is unused..  Otherwise, we provide the non-CA source value
347	 * (src.X * mask.A).
348	 *
349	 * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8
350	 * pictures, but we need to implement it for 830/845 and there's no
351	 * harm done in leaving it in.
352	 */
353	cblend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT;
354	ablend = TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT;
355
356	/* Get the source picture's channels into TBx_ARG1 */
357	if ((op->has_component_alpha && gen2_blend_op[blend].src_alpha) ||
358	    op->dst.format == PICT_a8) {
359		/* Producing source alpha value, so the first set of channels
360		 * is src.A instead of src.X.  We also do this if the destination
361		 * is a8, in which case src.G is what's written, and the other
362		 * channels are ignored.
363		 */
364		if (op->src.is_opaque) {
365			ablend |= TB0C_ARG1_SEL_ONE;
366			cblend |= TB0C_ARG1_SEL_ONE;
367		} else if (op->src.is_solid) {
368			ablend |= TB0C_ARG1_SEL_DIFFUSE;
369			cblend |= TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA;
370		} else {
371			ablend |= TB0C_ARG1_SEL_TEXEL0;
372			cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA;
373		}
374	} else {
375		if (op->src.is_solid)
376			cblend |= TB0C_ARG1_SEL_DIFFUSE;
377		else if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
378			cblend |= TB0C_ARG1_SEL_TEXEL0;
379		else
380			cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT;	/* 0.0 */
381
382		if (op->src.is_opaque)
383			ablend |= TB0A_ARG1_SEL_ONE;
384		else if (op->src.is_solid)
385			ablend |= TB0A_ARG1_SEL_DIFFUSE;
386		else
387			ablend |= TB0A_ARG1_SEL_TEXEL0;
388	}
389
390	if (op->mask.bo) {
391		if (op->src.is_solid) {
392			cblend |= TB0C_ARG2_SEL_TEXEL0;
393			ablend |= TB0A_ARG2_SEL_TEXEL0;
394		} else {
395			cblend |= TB0C_ARG2_SEL_TEXEL1;
396			ablend |= TB0A_ARG2_SEL_TEXEL1;
397		}
398
399		if (op->dst.format == PICT_a8 || !op->has_component_alpha)
400			cblend |= TB0C_ARG2_REPLICATE_ALPHA;
401
402		cblend |= TB0C_OP_MODULATE;
403		ablend |= TB0A_OP_MODULATE;
404	} else if (op->mask.is_solid) {
405		cblend |= TB0C_ARG2_SEL_DIFFUSE;
406		ablend |= TB0A_ARG2_SEL_DIFFUSE;
407
408		if (op->dst.format == PICT_a8 || !op->has_component_alpha)
409			cblend |= TB0C_ARG2_REPLICATE_ALPHA;
410
411		cblend |= TB0C_OP_MODULATE;
412		ablend |= TB0A_OP_MODULATE;
413	} else {
414		cblend |= TB0C_OP_ARG1;
415		ablend |= TB0A_OP_ARG1;
416	}
417
418	*c_out = cblend;
419	*a_out = ablend;
420}
421
422static uint32_t gen2_get_blend_cntl(int op,
423				    bool has_component_alpha,
424				    uint32_t dst_format)
425{
426	uint32_t sblend, dblend;
427
428	if (op <= PictOpSrc)
429		return S8_ENABLE_COLOR_BUFFER_WRITE;
430
431	sblend = gen2_blend_op[op].src_blend;
432	dblend = gen2_blend_op[op].dst_blend;
433
434	if (gen2_blend_op[op].dst_alpha) {
435		/* If there's no dst alpha channel, adjust the blend op so that
436		 * we'll treat it as always 1.
437		 */
438		if (PICT_FORMAT_A(dst_format) == 0) {
439			if (sblend == BLENDFACTOR_DST_ALPHA)
440				sblend = BLENDFACTOR_ONE;
441			else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
442				sblend = BLENDFACTOR_ZERO;
443		}
444
445		/* gen2 engine reads 8bit color buffer into green channel
446		 * in cases like color buffer blending etc., and also writes
447		 * back green channel.  So with dst_alpha blend we should use
448		 * color factor.
449		 */
450		if (dst_format == PICT_a8) {
451			if (sblend == BLENDFACTOR_DST_ALPHA)
452				sblend = BLENDFACTOR_DST_COLR;
453			else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
454				sblend = BLENDFACTOR_INV_DST_COLR;
455		}
456	}
457
458	/* If the source alpha is being used, then we should only be in a case
459	 * where the source blend factor is 0, and the source blend value is
460	 * the mask channels multiplied by the source picture's alpha.
461	 */
462	if (has_component_alpha && gen2_blend_op[op].src_alpha) {
463		if (dblend == BLENDFACTOR_SRC_ALPHA)
464			dblend = BLENDFACTOR_SRC_COLR;
465		else if (dblend == BLENDFACTOR_INV_SRC_ALPHA)
466			dblend = BLENDFACTOR_INV_SRC_COLR;
467	}
468
469	return (sblend << S8_SRC_BLEND_FACTOR_SHIFT |
470		dblend << S8_DST_BLEND_FACTOR_SHIFT |
471		S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
472		S8_ENABLE_COLOR_BUFFER_WRITE);
473}
474
475static void gen2_emit_invariant(struct sna *sna)
476{
477	int i;
478
479	for (i = 0; i < 4; i++) {
480		BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(i));
481		BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | MAP_UNIT(i) |
482		      DISABLE_TEX_STREAM_BUMP |
483		      ENABLE_TEX_STREAM_COORD_SET | TEX_STREAM_COORD_SET(i) |
484		      ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(i));
485		BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
486		BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(i));
487	}
488
489	BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD);
490	BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) |
491	      TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
492	      TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
493	      TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
494
495	BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
496
497	BATCH(_3DSTATE_VERTEX_TRANSFORM);
498	BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
499
500	BATCH(_3DSTATE_W_STATE_CMD);
501	BATCH(MAGIC_W_STATE_DWORD1);
502	BATCH_F(1.0);
503
504	BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD |
505	      DISABLE_INDPT_ALPHA_BLEND |
506	      ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD);
507
508	BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
509	BATCH(0);
510
511	BATCH(_3DSTATE_MODES_1_CMD |
512	      ENABLE_COLR_BLND_FUNC | BLENDFUNC_ADD |
513	      ENABLE_SRC_BLND_FACTOR | SRC_BLND_FACT(BLENDFACTOR_ONE) |
514	      ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO));
515
516	BATCH(_3DSTATE_ENABLES_1_CMD |
517	      DISABLE_LOGIC_OP |
518	      DISABLE_STENCIL_TEST |
519	      DISABLE_DEPTH_BIAS |
520	      DISABLE_SPEC_ADD |
521	      DISABLE_FOG |
522	      DISABLE_ALPHA_TEST |
523	      DISABLE_DEPTH_TEST |
524	      ENABLE_COLOR_BLEND);
525
526	BATCH(_3DSTATE_ENABLES_2_CMD |
527	      DISABLE_STENCIL_WRITE |
528	      DISABLE_DITHER |
529	      DISABLE_DEPTH_WRITE |
530	      ENABLE_COLOR_MASK |
531	      ENABLE_COLOR_WRITE |
532	      ENABLE_TEX_CACHE);
533
534	BATCH(_3DSTATE_STIPPLE);
535	BATCH(0);
536
537	BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
538	      TEXPIPE_COLOR |
539	      ENABLE_TEXOUTPUT_WRT_SEL |
540	      TEXOP_OUTPUT_CURRENT |
541	      DISABLE_TEX_CNTRL_STAGE |
542	      TEXOP_SCALE_1X |
543	      TEXOP_MODIFY_PARMS | TEXOP_LAST_STAGE |
544	      TEXBLENDOP_ARG1);
545	BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
546	      TEXPIPE_ALPHA |
547	      ENABLE_TEXOUTPUT_WRT_SEL |
548	      TEXOP_OUTPUT_CURRENT |
549	      TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS |
550	      TEXBLENDOP_ARG1);
551	BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
552	      TEXPIPE_COLOR |
553	      TEXBLEND_ARG1 |
554	      TEXBLENDARG_MODIFY_PARMS |
555	      TEXBLENDARG_DIFFUSE);
556	BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
557	      TEXPIPE_ALPHA |
558	      TEXBLEND_ARG1 |
559	      TEXBLENDARG_MODIFY_PARMS |
560	      TEXBLENDARG_DIFFUSE);
561
562#define INVARIANT_SIZE 35
563
564	sna->render_state.gen2.need_invariant = false;
565}
566
567static void
568gen2_get_batch(struct sna *sna, const struct sna_composite_op *op)
569{
570	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
571
572	/* +7 for i830 3DSTATE_BUFFER_INFO w/a */
573	if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40+7)) {
574		DBG(("%s: flushing batch: size %d > %d\n",
575		     __FUNCTION__, INVARIANT_SIZE+40,
576		     sna->kgem.surface-sna->kgem.nbatch));
577		kgem_submit(&sna->kgem);
578		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
579	}
580
581	if (!kgem_check_reloc(&sna->kgem, 3)) {
582		DBG(("%s: flushing batch: reloc %d >= %d\n",
583		     __FUNCTION__,
584		     sna->kgem.nreloc + 3,
585		     (int)KGEM_RELOC_SIZE(&sna->kgem)));
586		kgem_submit(&sna->kgem);
587		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
588	}
589
590	if (!kgem_check_exec(&sna->kgem, 3)) {
591		DBG(("%s: flushing batch: exec %d >= %d\n",
592		     __FUNCTION__,
593		     sna->kgem.nexec + 1,
594		     (int)KGEM_EXEC_SIZE(&sna->kgem)));
595		kgem_submit(&sna->kgem);
596		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
597	}
598
599	if (sna->render_state.gen2.need_invariant)
600		gen2_emit_invariant(sna);
601}
602
603static void gen2_emit_target(struct sna *sna,
604			     struct kgem_bo *bo,
605			     int width,
606			     int height,
607			     int format)
608{
609	assert(!too_large(width, height));
610	assert(bo->pitch >= 8 && bo->pitch <= MAX_3D_PITCH);
611	assert(sna->render.vertex_offset == 0);
612
613	assert(bo->unique_id);
614	if (sna->render_state.gen2.target == bo->unique_id) {
615		kgem_bo_mark_dirty(bo);
616		return;
617	}
618
619	/*
620	 * i830 w/a: 3DSTATE_BUFFER_INFO
621	 * must not straddle two cachelines.
622	 */
623	if (intel_get_device_id(sna->dev) == 0x3577)
624		BATCH_ALIGNED(_3DSTATE_BUF_INFO_CMD, 8);
625	else
626		BATCH(_3DSTATE_BUF_INFO_CMD);
627	BATCH(BUF_3D_ID_COLOR_BACK |
628	      gen2_buf_tiling(bo->tiling) |
629	      BUF_3D_PITCH(bo->pitch));
630	BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
631			     bo,
632			     I915_GEM_DOMAIN_RENDER << 16 |
633			     I915_GEM_DOMAIN_RENDER,
634			     0));
635
636	BATCH(_3DSTATE_DST_BUF_VARS_CMD);
637	BATCH(gen2_get_dst_format(format));
638
639	BATCH(_3DSTATE_DRAW_RECT_CMD);
640	BATCH(0);
641	BATCH(0);	/* ymin, xmin */
642	BATCH(DRAW_YMAX(height - 1) |
643	      DRAW_XMAX(width - 1));
644	BATCH(0);	/* yorig, xorig */
645
646	sna->render_state.gen2.target = bo->unique_id;
647}
648
649static void gen2_disable_logic_op(struct sna *sna)
650{
651	if (!sna->render_state.gen2.logic_op_enabled)
652		return;
653
654	DBG(("%s\n", __FUNCTION__));
655
656	BATCH(_3DSTATE_ENABLES_1_CMD |
657	      DISABLE_LOGIC_OP | ENABLE_COLOR_BLEND);
658
659	sna->render_state.gen2.logic_op_enabled = 0;
660}
661
662static void gen2_enable_logic_op(struct sna *sna, int op)
663{
664	static const uint8_t logic_op[] = {
665		LOGICOP_CLEAR,		/* GXclear */
666		LOGICOP_AND,		/* GXand */
667		LOGICOP_AND_RVRSE, 	/* GXandReverse */
668		LOGICOP_COPY,		/* GXcopy */
669		LOGICOP_AND_INV,	/* GXandInverted */
670		LOGICOP_NOOP,		/* GXnoop */
671		LOGICOP_XOR,		/* GXxor */
672		LOGICOP_OR,		/* GXor */
673		LOGICOP_NOR,		/* GXnor */
674		LOGICOP_EQUIV,		/* GXequiv */
675		LOGICOP_INV,		/* GXinvert */
676		LOGICOP_OR_RVRSE,	/* GXorReverse */
677		LOGICOP_COPY_INV,	/* GXcopyInverted */
678		LOGICOP_OR_INV,		/* GXorInverted */
679		LOGICOP_NAND,		/* GXnand */
680		LOGICOP_SET		/* GXset */
681	};
682
683	if (sna->render_state.gen2.logic_op_enabled != op+1) {
684		if (!sna->render_state.gen2.logic_op_enabled) {
685			if (op == GXclear || op == GXcopy)
686				return;
687
688			DBG(("%s\n", __FUNCTION__));
689
690			BATCH(_3DSTATE_ENABLES_1_CMD |
691			      ENABLE_LOGIC_OP | DISABLE_COLOR_BLEND);
692		}
693
694		BATCH(_3DSTATE_MODES_4_CMD |
695		      ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(logic_op[op]));
696		sna->render_state.gen2.logic_op_enabled = op+1;
697	}
698}
699
700static void gen2_emit_composite_state(struct sna *sna,
701				      const struct sna_composite_op *op)
702{
703	uint32_t texcoordfmt, v, unwind;
704	uint32_t cblend, ablend;
705	int tex;
706
707	gen2_get_batch(sna, op);
708
709	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
710		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
711			BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
712		else
713			BATCH(_3DSTATE_MODES_5_CMD |
714			      PIPELINE_FLUSH_RENDER_CACHE |
715			      PIPELINE_FLUSH_TEXTURE_CACHE);
716		kgem_clear_dirty(&sna->kgem);
717	}
718
719	gen2_emit_target(sna,
720			 op->dst.bo,
721			 op->dst.width,
722			 op->dst.height,
723			 op->dst.format);
724
725	unwind = sna->kgem.nbatch;
726	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
727	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
728	BATCH((!op->src.is_solid + (op->mask.bo != NULL)) << 12);
729	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
730	BATCH(gen2_get_blend_cntl(op->op,
731				  op->has_component_alpha,
732				  op->dst.format));
733	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
734		   sna->kgem.batch + unwind + 1,
735		   3 * sizeof(uint32_t)) == 0)
736		sna->kgem.nbatch = unwind;
737	else
738		sna->render_state.gen2.ls1 = unwind;
739
740	gen2_disable_logic_op(sna);
741
742	gen2_get_blend_factors(op, op->op, &cblend, &ablend);
743	unwind = sna->kgem.nbatch;
744	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
745	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
746	BATCH(cblend);
747	BATCH(ablend);
748	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
749		   sna->kgem.batch + unwind + 1,
750		   2 * sizeof(uint32_t)) == 0)
751		sna->kgem.nbatch = unwind;
752	else
753		sna->render_state.gen2.ls2 = unwind;
754
755	tex = texcoordfmt = 0;
756	if (!op->src.is_solid) {
757		if (op->src.is_affine)
758			texcoordfmt |= TEXCOORDFMT_2D << (2*tex);
759		else
760			texcoordfmt |= TEXCOORDFMT_3D << (2*tex);
761		gen2_emit_texture(sna, &op->src, tex++);
762	} else {
763		if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) {
764			BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
765			BATCH(op->src.u.gen2.pixel);
766			sna->render_state.gen2.diffuse = op->src.u.gen2.pixel;
767		}
768	}
769	if (op->mask.bo) {
770		if (op->mask.is_affine)
771			texcoordfmt |= TEXCOORDFMT_2D << (2*tex);
772		else
773			texcoordfmt |= TEXCOORDFMT_3D << (2*tex);
774		gen2_emit_texture(sna, &op->mask, tex++);
775	} else if (op->mask.is_solid) {
776		if (op->mask.u.gen2.pixel != sna->render_state.gen2.diffuse) {
777			BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
778			BATCH(op->mask.u.gen2.pixel);
779			sna->render_state.gen2.diffuse = op->mask.u.gen2.pixel;
780		}
781	}
782
783	v = _3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt;
784	if (sna->render_state.gen2.vft != v) {
785		BATCH(v);
786		sna->render_state.gen2.vft = v;
787	}
788}
789
790static inline void
791gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY)
792{
793	VERTEX(dstX);
794	VERTEX(dstY);
795}
796
797inline static void
798gen2_emit_composite_linear(struct sna *sna,
799			   const struct sna_composite_channel *channel,
800			   int16_t x, int16_t y)
801{
802	float v;
803
804	v = (x * channel->u.linear.dx +
805	     y * channel->u.linear.dy +
806	     channel->u.linear.offset);
807	DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v));
808	VERTEX(v);
809	VERTEX(v);
810}
811
812static void
813gen2_emit_composite_texcoord(struct sna *sna,
814			     const struct sna_composite_channel *channel,
815			     int16_t x, int16_t y)
816{
817	float s = 0, t = 0, w = 1;
818
819	x += channel->offset[0];
820	y += channel->offset[1];
821
822	if (channel->is_affine) {
823		sna_get_transformed_coordinates(x, y,
824						channel->transform,
825						&s, &t);
826		VERTEX(s * channel->scale[0]);
827		VERTEX(t * channel->scale[1]);
828	} else {
829		sna_get_transformed_coordinates_3d(x, y,
830						   channel->transform,
831						   &s, &t, &w);
832		VERTEX(s * channel->scale[0]);
833		VERTEX(t * channel->scale[1]);
834		VERTEX(w);
835	}
836}
837
838static void
839gen2_emit_composite_vertex(struct sna *sna,
840			   const struct sna_composite_op *op,
841			   int16_t srcX, int16_t srcY,
842			   int16_t mskX, int16_t mskY,
843			   int16_t dstX, int16_t dstY)
844{
845	gen2_emit_composite_dstcoord(sna, dstX, dstY);
846	if (op->src.is_linear)
847		gen2_emit_composite_linear(sna, &op->src, srcX, srcY);
848	else if (!op->src.is_solid)
849		gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY);
850
851	if (op->mask.is_linear)
852		gen2_emit_composite_linear(sna, &op->mask, mskX, mskY);
853	else if (op->mask.bo)
854		gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY);
855}
856
857fastcall static void
858gen2_emit_composite_primitive(struct sna *sna,
859			      const struct sna_composite_op *op,
860			      const struct sna_composite_rectangles *r)
861{
862	gen2_emit_composite_vertex(sna, op,
863				   r->src.x + r->width,
864				   r->src.y + r->height,
865				   r->mask.x + r->width,
866				   r->mask.y + r->height,
867				   op->dst.x + r->dst.x + r->width,
868				   op->dst.y + r->dst.y + r->height);
869	gen2_emit_composite_vertex(sna, op,
870				   r->src.x,
871				   r->src.y + r->height,
872				   r->mask.x,
873				   r->mask.y + r->height,
874				   op->dst.x + r->dst.x,
875				   op->dst.y + r->dst.y + r->height);
876	gen2_emit_composite_vertex(sna, op,
877				   r->src.x,
878				   r->src.y,
879				   r->mask.x,
880				   r->mask.y,
881				   op->dst.x + r->dst.x,
882				   op->dst.y + r->dst.y);
883}
884
885fastcall static void
886gen2_emit_composite_primitive_constant(struct sna *sna,
887				       const struct sna_composite_op *op,
888				       const struct sna_composite_rectangles *r)
889{
890	int16_t dst_x = r->dst.x + op->dst.x;
891	int16_t dst_y = r->dst.y + op->dst.y;
892
893	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
894	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
895	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
896}
897
898fastcall static void
899gen2_emit_composite_primitive_linear(struct sna *sna,
900				       const struct sna_composite_op *op,
901				       const struct sna_composite_rectangles *r)
902{
903	int16_t dst_x = r->dst.x + op->dst.x;
904	int16_t dst_y = r->dst.y + op->dst.y;
905
906	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
907	gen2_emit_composite_linear(sna, &op->src,
908				   r->src.x + r->width, r->src.y + r->height);
909
910	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
911	gen2_emit_composite_linear(sna, &op->src,
912				   r->src.x, r->src.y + r->height);
913
914	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
915	gen2_emit_composite_linear(sna, &op->src,
916				   r->src.x, r->src.y);
917}
918
919fastcall static void
920gen2_emit_composite_primitive_identity(struct sna *sna,
921				       const struct sna_composite_op *op,
922				       const struct sna_composite_rectangles *r)
923{
924	float w = r->width;
925	float h = r->height;
926	float *v;
927
928	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
929	sna->kgem.nbatch += 12;
930
931	v[8] = v[4] = r->dst.x + op->dst.x;
932	v[0] = v[4] + w;
933
934	v[9] = r->dst.y + op->dst.y;
935	v[5] = v[1] = v[9] + h;
936
937	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
938	v[2] = v[6] + w * op->src.scale[0];
939
940	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
941	v[7] = v[3] = v[11] + h * op->src.scale[1];
942}
943
944fastcall static void
945gen2_emit_composite_primitive_affine(struct sna *sna,
946				     const struct sna_composite_op *op,
947				     const struct sna_composite_rectangles *r)
948{
949	PictTransform *transform = op->src.transform;
950	int src_x = r->src.x + (int)op->src.offset[0];
951	int src_y = r->src.y + (int)op->src.offset[1];
952	float *v;
953
954	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
955	sna->kgem.nbatch += 12;
956
957	v[8] = v[4] = r->dst.x + op->dst.x;
958	v[0] = v[4] + r->width;
959
960	v[9] = r->dst.y + op->dst.y;
961	v[5] = v[1] = v[9] + r->height;
962
963	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
964				    transform, op->src.scale,
965				    &v[2], &v[3]);
966
967	_sna_get_transformed_scaled(src_x, src_y + r->height,
968				    transform, op->src.scale,
969				    &v[6], &v[7]);
970
971	_sna_get_transformed_scaled(src_x, src_y,
972				    transform, op->src.scale,
973				    &v[10], &v[11]);
974}
975
976fastcall static void
977gen2_emit_composite_primitive_constant_identity_mask(struct sna *sna,
978						     const struct sna_composite_op *op,
979						     const struct sna_composite_rectangles *r)
980{
981	float w = r->width;
982	float h = r->height;
983	float *v;
984
985	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
986	sna->kgem.nbatch += 12;
987
988	v[8] = v[4] = r->dst.x + op->dst.x;
989	v[0] = v[4] + w;
990
991	v[9] = r->dst.y + op->dst.y;
992	v[5] = v[1] = v[9] + h;
993
994	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
995	v[2] = v[6] + w * op->mask.scale[0];
996
997	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
998	v[7] = v[3] = v[11] + h * op->mask.scale[1];
999}
1000
1001#if defined(sse2) && !defined(__x86_64__)
1002sse2 fastcall static void
1003gen2_emit_composite_primitive_constant__sse2(struct sna *sna,
1004					     const struct sna_composite_op *op,
1005					     const struct sna_composite_rectangles *r)
1006{
1007	int16_t dst_x = r->dst.x + op->dst.x;
1008	int16_t dst_y = r->dst.y + op->dst.y;
1009
1010	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
1011	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
1012	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
1013}
1014
1015sse2 fastcall static void
1016gen2_emit_composite_primitive_linear__sse2(struct sna *sna,
1017					   const struct sna_composite_op *op,
1018					   const struct sna_composite_rectangles *r)
1019{
1020	int16_t dst_x = r->dst.x + op->dst.x;
1021	int16_t dst_y = r->dst.y + op->dst.y;
1022
1023	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
1024	gen2_emit_composite_linear(sna, &op->src,
1025				   r->src.x + r->width, r->src.y + r->height);
1026
1027	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
1028	gen2_emit_composite_linear(sna, &op->src,
1029				   r->src.x, r->src.y + r->height);
1030
1031	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
1032	gen2_emit_composite_linear(sna, &op->src,
1033				   r->src.x, r->src.y);
1034}
1035
1036sse2 fastcall static void
1037gen2_emit_composite_primitive_identity__sse2(struct sna *sna,
1038					     const struct sna_composite_op *op,
1039					     const struct sna_composite_rectangles *r)
1040{
1041	float w = r->width;
1042	float h = r->height;
1043	float *v;
1044
1045	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
1046	sna->kgem.nbatch += 12;
1047
1048	v[8] = v[4] = r->dst.x + op->dst.x;
1049	v[0] = v[4] + w;
1050
1051	v[9] = r->dst.y + op->dst.y;
1052	v[5] = v[1] = v[9] + h;
1053
1054	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
1055	v[2] = v[6] + w * op->src.scale[0];
1056
1057	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
1058	v[7] = v[3] = v[11] + h * op->src.scale[1];
1059}
1060
1061sse2 fastcall static void
1062gen2_emit_composite_primitive_affine__sse2(struct sna *sna,
1063					   const struct sna_composite_op *op,
1064					   const struct sna_composite_rectangles *r)
1065{
1066	PictTransform *transform = op->src.transform;
1067	int src_x = r->src.x + (int)op->src.offset[0];
1068	int src_y = r->src.y + (int)op->src.offset[1];
1069	float *v;
1070
1071	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
1072	sna->kgem.nbatch += 12;
1073
1074	v[8] = v[4] = r->dst.x + op->dst.x;
1075	v[0] = v[4] + r->width;
1076
1077	v[9] = r->dst.y + op->dst.y;
1078	v[5] = v[1] = v[9] + r->height;
1079
1080	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
1081				    transform, op->src.scale,
1082				    &v[2], &v[3]);
1083
1084	_sna_get_transformed_scaled(src_x, src_y + r->height,
1085				    transform, op->src.scale,
1086				    &v[6], &v[7]);
1087
1088	_sna_get_transformed_scaled(src_x, src_y,
1089				    transform, op->src.scale,
1090				    &v[10], &v[11]);
1091}
1092
1093sse2 fastcall static void
1094gen2_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna,
1095							   const struct sna_composite_op *op,
1096							   const struct sna_composite_rectangles *r)
1097{
1098	float w = r->width;
1099	float h = r->height;
1100	float *v;
1101
1102	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
1103	sna->kgem.nbatch += 12;
1104
1105	v[8] = v[4] = r->dst.x + op->dst.x;
1106	v[0] = v[4] + w;
1107
1108	v[9] = r->dst.y + op->dst.y;
1109	v[5] = v[1] = v[9] + h;
1110
1111	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
1112	v[2] = v[6] + w * op->mask.scale[0];
1113
1114	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
1115	v[7] = v[3] = v[11] + h * op->mask.scale[1];
1116}
1117#endif
1118
1119static void gen2_magic_ca_pass(struct sna *sna,
1120			       const struct sna_composite_op *op)
1121{
1122	uint32_t ablend, cblend, *src, *dst;
1123	int n;
1124
1125	if (!op->need_magic_ca_pass)
1126		return;
1127
1128	DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__,
1129	     sna->kgem.nbatch, sna->render.vertex_offset));
1130
1131	assert(op->mask.bo);
1132	assert(op->has_component_alpha);
1133
1134	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 0);
1135	BATCH(BLENDFACTOR_ONE << S8_SRC_BLEND_FACTOR_SHIFT |
1136	      BLENDFACTOR_ONE << S8_DST_BLEND_FACTOR_SHIFT |
1137	      S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
1138	      S8_ENABLE_COLOR_BUFFER_WRITE);
1139	sna->render_state.gen2.ls1 = 0;
1140
1141	gen2_get_blend_factors(op, PictOpAdd, &cblend, &ablend);
1142	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
1143	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
1144	BATCH(cblend);
1145	BATCH(ablend);
1146	sna->render_state.gen2.ls2 = 0;
1147
1148	src = sna->kgem.batch + sna->render.vertex_offset;
1149	dst = sna->kgem.batch + sna->kgem.nbatch;
1150	n = 1 + sna->render.vertex_index;
1151	sna->kgem.nbatch += n;
1152	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
1153	while (n--)
1154		*dst++ = *src++;
1155}
1156
1157static void gen2_vertex_flush(struct sna *sna,
1158			      const struct sna_composite_op *op)
1159{
1160	if (sna->render.vertex_index == 0)
1161		return;
1162
1163	sna->kgem.batch[sna->render.vertex_offset] |=
1164		sna->render.vertex_index - 1;
1165
1166	gen2_magic_ca_pass(sna, op);
1167
1168	sna->render.vertex_offset = 0;
1169	sna->render.vertex_index = 0;
1170}
1171
1172inline static int gen2_get_rectangles(struct sna *sna,
1173				      const struct sna_composite_op *op,
1174				      int want)
1175{
1176	int rem = batch_space(sna), size, need;
1177
1178	DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n",
1179	     __FUNCTION__, want, op->floats_per_vertex, rem));
1180
1181	assert(op->floats_per_vertex);
1182	assert(op->floats_per_rect == 3 * op->floats_per_vertex);
1183
1184	need = 1;
1185	size = op->floats_per_rect;
1186	if (op->need_magic_ca_pass)
1187		need += 6 + size*sna->render.vertex_index, size *= 2;
1188
1189	DBG(("%s: want=%d, need=%d,size=%d, rem=%d\n",
1190	     __FUNCTION__, want, need, size, rem));
1191	if (rem < need + size) {
1192		gen2_vertex_flush(sna, op);
1193		kgem_submit(&sna->kgem);
1194		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1195		return 0;
1196	}
1197
1198	rem -= need;
1199	if (sna->render.vertex_offset == 0) {
1200		if ((sna->kgem.batch[sna->kgem.nbatch-1] & ~0xffff) ==
1201		    (PRIM3D_INLINE | PRIM3D_RECTLIST)) {
1202			uint32_t *b = &sna->kgem.batch[sna->kgem.nbatch-1];
1203			assert(*b & 0xffff);
1204			sna->render.vertex_index = 1 + (*b & 0xffff);
1205			*b = PRIM3D_INLINE | PRIM3D_RECTLIST;
1206			sna->render.vertex_offset = sna->kgem.nbatch - 1;
1207			assert(!op->need_magic_ca_pass);
1208		} else {
1209			sna->render.vertex_offset = sna->kgem.nbatch;
1210			BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
1211		}
1212
1213		need = 0;
1214	} else
1215		need = sna->kgem.nbatch - sna->render.vertex_offset;
1216
1217	if (rem > MAX_INLINE - need)
1218		rem = MAX_INLINE -need;
1219
1220	if (want > 1 && want * size > rem)
1221		want = rem / size;
1222
1223	assert(want);
1224	sna->render.vertex_index += want*op->floats_per_rect;
1225	return want;
1226}
1227
1228fastcall static void
1229gen2_render_composite_blt(struct sna *sna,
1230			  const struct sna_composite_op *op,
1231			  const struct sna_composite_rectangles *r)
1232{
1233	if (!gen2_get_rectangles(sna, op, 1)) {
1234		gen2_emit_composite_state(sna, op);
1235		gen2_get_rectangles(sna, op, 1);
1236	}
1237
1238	op->prim_emit(sna, op, r);
1239}
1240
1241fastcall static void
1242gen2_render_composite_box(struct sna *sna,
1243			  const struct sna_composite_op *op,
1244			  const BoxRec *box)
1245{
1246	struct sna_composite_rectangles r;
1247
1248	if (!gen2_get_rectangles(sna, op, 1)) {
1249		gen2_emit_composite_state(sna, op);
1250		gen2_get_rectangles(sna, op, 1);
1251	}
1252
1253	DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1254	     box->x1, box->y1,
1255	     box->x2 - box->x1,
1256	     box->y2 - box->y1));
1257
1258	r.dst.x  = box->x1; r.dst.y  = box->y1;
1259	r.width = box->x2 - box->x1;
1260	r.height = box->y2 - box->y1;
1261	r.src = r.mask = r.dst;
1262
1263	op->prim_emit(sna, op, &r);
1264}
1265
1266static void
1267gen2_render_composite_boxes(struct sna *sna,
1268			    const struct sna_composite_op *op,
1269			    const BoxRec *box, int nbox)
1270{
1271	do {
1272		int nbox_this_time;
1273
1274		nbox_this_time = gen2_get_rectangles(sna, op, nbox);
1275		if (nbox_this_time == 0) {
1276			gen2_emit_composite_state(sna, op);
1277			nbox_this_time = gen2_get_rectangles(sna, op, nbox);
1278		}
1279		nbox -= nbox_this_time;
1280
1281		do {
1282			struct sna_composite_rectangles r;
1283
1284			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1285			     box->x1, box->y1,
1286			     box->x2 - box->x1,
1287			     box->y2 - box->y1));
1288
1289			r.dst.x  = box->x1; r.dst.y  = box->y1;
1290			r.width = box->x2 - box->x1;
1291			r.height = box->y2 - box->y1;
1292			r.src = r.mask = r.dst;
1293
1294			op->prim_emit(sna, op, &r);
1295			box++;
1296		} while (--nbox_this_time);
1297	} while (nbox);
1298}
1299
1300static void gen2_render_composite_done(struct sna *sna,
1301				       const struct sna_composite_op *op)
1302{
1303	gen2_vertex_flush(sna, op);
1304
1305	if (op->mask.bo)
1306		kgem_bo_destroy(&sna->kgem, op->mask.bo);
1307	if (op->src.bo)
1308		kgem_bo_destroy(&sna->kgem, op->src.bo);
1309	sna_render_composite_redirect_done(sna, op);
1310}
1311
1312static bool
1313gen2_composite_solid_init(struct sna *sna,
1314			  struct sna_composite_channel *channel,
1315			  uint32_t color)
1316{
1317	channel->filter = PictFilterNearest;
1318	channel->repeat = RepeatNormal;
1319	channel->is_solid  = true;
1320	channel->is_affine = true;
1321	channel->width  = 1;
1322	channel->height = 1;
1323	channel->pict_format = PICT_a8r8g8b8;
1324
1325	channel->bo = NULL;
1326	channel->u.gen2.pixel = color;
1327
1328	channel->scale[0]  = channel->scale[1]  = 1;
1329	channel->offset[0] = channel->offset[1] = 0;
1330	return true;
1331}
1332
1333#define xFixedToDouble(f) pixman_fixed_to_double(f)
1334
1335static bool
1336gen2_composite_linear_init(struct sna *sna,
1337			   PicturePtr picture,
1338			   struct sna_composite_channel *channel,
1339			   int x, int y,
1340			   int w, int h,
1341			   int dst_x, int dst_y)
1342{
1343	PictLinearGradient *linear =
1344		(PictLinearGradient *)picture->pSourcePict;
1345	pixman_fixed_t tx, ty;
1346	float x0, y0, sf;
1347	float dx, dy;
1348
1349	DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
1350	     __FUNCTION__,
1351	     xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
1352	     xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
1353
1354	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
1355		return 0;
1356
1357	if (!sna_transform_is_affine(picture->transform)) {
1358		DBG(("%s: fallback due to projective transform\n",
1359		     __FUNCTION__));
1360		return sna_render_picture_fixup(sna, picture, channel,
1361						x, y, w, h, dst_x, dst_y);
1362	}
1363
1364	channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
1365	if (!channel->bo)
1366		return 0;
1367
1368	channel->filter = PictFilterNearest;
1369	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1370	channel->is_linear = true;
1371	channel->width  = channel->bo->pitch / 4;
1372	channel->height = 1;
1373	channel->pict_format = PICT_a8r8g8b8;
1374
1375	channel->scale[0]  = channel->scale[1]  = 1;
1376	channel->offset[0] = channel->offset[1] = 0;
1377
1378	if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
1379		dx = xFixedToDouble(linear->p2.x - linear->p1.x);
1380		dy = xFixedToDouble(linear->p2.y - linear->p1.y);
1381
1382		x0 = xFixedToDouble(linear->p1.x);
1383		y0 = xFixedToDouble(linear->p1.y);
1384
1385		if (tx | ty) {
1386			x0 -= pixman_fixed_to_double(tx);
1387			y0 -= pixman_fixed_to_double(ty);
1388		}
1389	} else {
1390		struct pixman_f_vector p1, p2;
1391		struct pixman_f_transform m, inv;
1392
1393		pixman_f_transform_from_pixman_transform(&m, picture->transform);
1394		DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
1395		     __FUNCTION__,
1396		     m.m[0][0], m.m[0][1], m.m[0][2],
1397		     m.m[1][0], m.m[1][1], m.m[1][2],
1398		     m.m[2][0], m.m[2][1], m.m[2][2]));
1399		if (!pixman_f_transform_invert(&inv, &m))
1400			return 0;
1401
1402		p1.v[0] = pixman_fixed_to_double(linear->p1.x);
1403		p1.v[1] = pixman_fixed_to_double(linear->p1.y);
1404		p1.v[2] = 1.;
1405		pixman_f_transform_point(&inv, &p1);
1406
1407		p2.v[0] = pixman_fixed_to_double(linear->p2.x);
1408		p2.v[1] = pixman_fixed_to_double(linear->p2.y);
1409		p2.v[2] = 1.;
1410		pixman_f_transform_point(&inv, &p2);
1411
1412		DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
1413		     __FUNCTION__,
1414		     p1.v[0], p1.v[1], p1.v[2],
1415		     p2.v[0], p2.v[1], p2.v[2]));
1416
1417		dx = p2.v[0] - p1.v[0];
1418		dy = p2.v[1] - p1.v[1];
1419
1420		x0 = p1.v[0];
1421		y0 = p1.v[1];
1422	}
1423
1424	sf = dx*dx + dy*dy;
1425	dx /= sf;
1426	dy /= sf;
1427
1428	channel->u.linear.dx = dx;
1429	channel->u.linear.dy = dy;
1430	channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y);
1431
1432	DBG(("%s: dx=%f, dy=%f, offset=%f\n",
1433	     __FUNCTION__, dx, dy, channel->u.linear.offset));
1434
1435	return channel->bo != NULL;
1436}
1437
1438static bool source_is_covered(PicturePtr picture,
1439			      int x, int y,
1440			      int width, int height)
1441{
1442	int x1, y1, x2, y2;
1443
1444	if (picture->repeat && picture->repeatType != RepeatNone)
1445		return true;
1446
1447	if (picture->pDrawable == NULL)
1448		return false;
1449
1450	if (picture->transform) {
1451		pixman_box16_t sample;
1452
1453		sample.x1 = x;
1454		sample.y1 = y;
1455		sample.x2 = x + width;
1456		sample.y2 = y + height;
1457
1458		pixman_transform_bounds(picture->transform, &sample);
1459
1460		x1 = sample.x1;
1461		x2 = sample.x2;
1462		y1 = sample.y1;
1463		y2 = sample.y2;
1464	} else {
1465		x1 = x;
1466		y1 = y;
1467		x2 = x + width;
1468		y2 = y + height;
1469	}
1470
1471	return
1472		x1 >= 0 && y1 >= 0 &&
1473		x2 <= picture->pDrawable->width &&
1474		y2 <= picture->pDrawable->height;
1475}
1476
1477static bool
1478gen2_check_card_format(struct sna *sna,
1479		       PicturePtr picture,
1480		       struct sna_composite_channel *channel,
1481		       int x, int y, int w, int h,
1482		       bool *fixup_alpha)
1483{
1484	uint32_t format = picture->format;
1485	unsigned int i;
1486
1487	for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) {
1488		if (i8xx_tex_formats[i].fmt == format)
1489			return true;
1490	}
1491
1492	for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) {
1493		if (i85x_tex_formats[i].fmt == format) {
1494			if (sna->kgem.gen >= 021)
1495				return true;
1496
1497			if (source_is_covered(picture, x, y, w,h)) {
1498				channel->is_opaque = true;
1499				return true;
1500			}
1501
1502			*fixup_alpha = true;
1503			return false;
1504		}
1505	}
1506
1507	*fixup_alpha = false;
1508	return false;
1509}
1510
1511static int
1512gen2_composite_picture(struct sna *sna,
1513		       PicturePtr picture,
1514		       struct sna_composite_channel *channel,
1515		       int x, int y,
1516		       int w, int h,
1517		       int dst_x, int dst_y,
1518		       bool precise)
1519{
1520	PixmapPtr pixmap;
1521	uint32_t color;
1522	int16_t dx, dy;
1523	bool fixup_alpha;
1524
1525	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1526	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1527
1528	channel->is_solid = false;
1529	channel->is_linear = false;
1530	channel->is_opaque = false;
1531	channel->is_affine = true;
1532	channel->transform = NULL;
1533	channel->card_format = -1;
1534
1535	if (sna_picture_is_solid(picture, &color))
1536		return gen2_composite_solid_init(sna, channel, color);
1537
1538	if (!gen2_check_repeat(picture)) {
1539		DBG(("%s -- fallback, unhandled repeat %d\n",
1540		     __FUNCTION__, picture->repeat));
1541		return sna_render_picture_fixup(sna, picture, channel,
1542						x, y, w, h, dst_x, dst_y);
1543	}
1544
1545	if (!gen2_check_filter(picture)) {
1546		DBG(("%s -- fallback, unhandled filter %d\n",
1547		     __FUNCTION__, picture->filter));
1548		return sna_render_picture_fixup(sna, picture, channel,
1549						x, y, w, h, dst_x, dst_y);
1550	}
1551
1552	if (picture->pDrawable == NULL) {
1553		int ret;
1554
1555		if (picture->pSourcePict->type == SourcePictTypeLinear)
1556			return gen2_composite_linear_init(sna, picture, channel,
1557							  x, y,
1558							  w, h,
1559							  dst_x, dst_y);
1560
1561		DBG(("%s -- fallback, unhandled source %d\n",
1562		     __FUNCTION__, picture->pSourcePict->type));
1563		ret = -1;
1564		if (!precise)
1565			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1566								      x, y, w, h, dst_x, dst_y);
1567		if (ret == -1)
1568			ret = sna_render_picture_fixup(sna, picture, channel,
1569						       x, y, w, h, dst_x, dst_y);
1570		return ret;
1571	}
1572
1573	if (picture->alphaMap) {
1574		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1575		return sna_render_picture_fixup(sna, picture, channel,
1576						x, y, w, h, dst_x, dst_y);
1577	}
1578
1579	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1580	channel->filter = picture->filter;
1581
1582	pixmap = get_drawable_pixmap(picture->pDrawable);
1583	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1584
1585	x += dx + picture->pDrawable->x;
1586	y += dy + picture->pDrawable->y;
1587
1588	channel->is_affine = sna_transform_is_affine(picture->transform);
1589	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
1590		DBG(("%s: integer translation (%d, %d), removing\n",
1591		     __FUNCTION__, dx, dy));
1592		x += dx;
1593		y += dy;
1594		channel->transform = NULL;
1595		channel->filter = PictFilterNearest;
1596
1597		if (channel->repeat &&
1598		    (x >= 0 &&
1599		     y >= 0 &&
1600		     x + w <= pixmap->drawable.width &&
1601		     y + h <= pixmap->drawable.height)) {
1602			struct sna_pixmap *priv = sna_pixmap(pixmap);
1603			if (priv && priv->clear) {
1604				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
1605				return gen2_composite_solid_init(sna, channel, solid_color(picture->format, priv->clear_color));
1606			}
1607		}
1608	} else
1609		channel->transform = picture->transform;
1610
1611	if (!gen2_check_card_format(sna, picture, channel, x,  y, w ,h, &fixup_alpha))
1612		return sna_render_picture_convert(sna, picture, channel, pixmap,
1613						  x, y, w, h, dst_x, dst_y, fixup_alpha);
1614
1615	channel->pict_format = picture->format;
1616	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
1617		return sna_render_picture_extract(sna, picture, channel,
1618						  x, y, w, h, dst_x, dst_y);
1619
1620	return sna_render_pixmap_bo(sna, channel, pixmap,
1621				    x, y, w, h, dst_x, dst_y);
1622}
1623
1624static bool
1625gen2_composite_set_target(struct sna *sna,
1626			  struct sna_composite_op *op,
1627			  PicturePtr dst,
1628			  int x, int y, int w, int h,
1629			  bool partial)
1630{
1631	BoxRec box;
1632	unsigned hint;
1633
1634	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1635	op->dst.format = dst->format;
1636	op->dst.width = op->dst.pixmap->drawable.width;
1637	op->dst.height = op->dst.pixmap->drawable.height;
1638
1639	if (w && h) {
1640		box.x1 = x;
1641		box.y1 = y;
1642		box.x2 = x + w;
1643		box.y2 = y + h;
1644	} else
1645		sna_render_picture_extents(dst, &box);
1646
1647	hint = PREFER_GPU | RENDER_GPU;
1648	if (!need_tiling(sna, op->dst.width, op->dst.height))
1649		hint |= FORCE_GPU;
1650	if (!partial) {
1651		hint |= IGNORE_DAMAGE;
1652		if (w == op->dst.width && h == op->dst.height)
1653			hint |= REPLACES;
1654	}
1655
1656	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
1657	if (op->dst.bo == NULL)
1658		return false;
1659
1660	if (hint & REPLACES) {
1661		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1662		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
1663	}
1664
1665	assert((op->dst.bo->pitch & 7) == 0);
1666
1667	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1668			    &op->dst.x, &op->dst.y);
1669
1670	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1671	     __FUNCTION__,
1672	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
1673	     op->dst.width, op->dst.height,
1674	     op->dst.bo->pitch,
1675	     op->dst.x, op->dst.y,
1676	     op->damage ? *op->damage : (void *)-1));
1677
1678	assert(op->dst.bo->proxy == NULL);
1679
1680	if (((too_large(op->dst.width, op->dst.height) ||
1681	      op->dst.bo->pitch > MAX_3D_PITCH)) &&
1682	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1683		return false;
1684
1685	return true;
1686}
1687
1688static bool
1689is_unhandled_gradient(PicturePtr picture, bool precise)
1690{
1691	if (picture->pDrawable)
1692		return false;
1693
1694	switch (picture->pSourcePict->type) {
1695	case SourcePictTypeSolidFill:
1696	case SourcePictTypeLinear:
1697		return false;
1698	default:
1699		return precise;
1700	}
1701}
1702
1703static bool
1704has_alphamap(PicturePtr p)
1705{
1706	return p->alphaMap != NULL;
1707}
1708
1709static bool
1710need_upload(PicturePtr p)
1711{
1712	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
1713}
1714
1715static bool
1716source_is_busy(PixmapPtr pixmap)
1717{
1718	struct sna_pixmap *priv = sna_pixmap(pixmap);
1719	if (priv == NULL)
1720		return false;
1721
1722	if (priv->clear)
1723		return false;
1724
1725	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1726		return true;
1727
1728	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1729		return true;
1730
1731	return priv->gpu_damage && !priv->cpu_damage;
1732}
1733
1734static bool
1735source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
1736{
1737	if (sna_picture_is_solid(p, NULL))
1738		return false;
1739
1740	if (is_unhandled_gradient(p, precise) || !gen2_check_repeat(p))
1741		return true;
1742
1743	if (pixmap && source_is_busy(pixmap))
1744		return false;
1745
1746	return has_alphamap(p) || !gen2_check_filter(p) || need_upload(p);
1747}
1748
1749static bool
1750gen2_composite_fallback(struct sna *sna,
1751			PicturePtr src,
1752			PicturePtr mask,
1753			PicturePtr dst)
1754{
1755	PixmapPtr src_pixmap;
1756	PixmapPtr mask_pixmap;
1757	PixmapPtr dst_pixmap;
1758	bool src_fallback, mask_fallback;
1759
1760	if (!gen2_check_dst_format(dst->format)) {
1761		DBG(("%s: unknown destination format: %d\n",
1762		     __FUNCTION__, dst->format));
1763		return true;
1764	}
1765
1766	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
1767
1768	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1769	src_fallback = source_fallback(src, src_pixmap,
1770				       dst->polyMode == PolyModePrecise);
1771
1772	if (mask) {
1773		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1774		mask_fallback = source_fallback(mask, mask_pixmap,
1775						dst->polyMode == PolyModePrecise);
1776	} else {
1777		mask_pixmap = NULL;
1778		mask_fallback = NULL;
1779	}
1780
1781	/* If we are using the destination as a source and need to
1782	 * readback in order to upload the source, do it all
1783	 * on the cpu.
1784	 */
1785	if (src_pixmap == dst_pixmap && src_fallback) {
1786		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1787		return true;
1788	}
1789	if (mask_pixmap == dst_pixmap && mask_fallback) {
1790		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
1791		return true;
1792	}
1793
1794	/* If anything is on the GPU, push everything out to the GPU */
1795	if (dst_use_gpu(dst_pixmap)) {
1796		DBG(("%s: dst is already on the GPU, try to use GPU\n",
1797		     __FUNCTION__));
1798		return false;
1799	}
1800
1801	if (src_pixmap && !src_fallback) {
1802		DBG(("%s: src is already on the GPU, try to use GPU\n",
1803		     __FUNCTION__));
1804		return false;
1805	}
1806	if (mask_pixmap && !mask_fallback) {
1807		DBG(("%s: mask is already on the GPU, try to use GPU\n",
1808		     __FUNCTION__));
1809		return false;
1810	}
1811
1812	/* However if the dst is not on the GPU and we need to
1813	 * render one of the sources using the CPU, we may
1814	 * as well do the entire operation in place onthe CPU.
1815	 */
1816	if (src_fallback) {
1817		DBG(("%s: dst is on the CPU and src will fallback\n",
1818		     __FUNCTION__));
1819		return true;
1820	}
1821
1822	if (mask && mask_fallback) {
1823		DBG(("%s: dst is on the CPU and mask will fallback\n",
1824		     __FUNCTION__));
1825		return true;
1826	}
1827
1828	if (too_large(dst_pixmap->drawable.width,
1829		      dst_pixmap->drawable.height) &&
1830	    dst_is_cpu(dst_pixmap)) {
1831		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
1832		return true;
1833	}
1834
1835	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
1836	     __FUNCTION__));
1837	return dst_use_cpu(dst_pixmap);
1838}
1839
1840static int
1841reuse_source(struct sna *sna,
1842	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
1843	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
1844{
1845	uint32_t color;
1846
1847	if (src_x != msk_x || src_y != msk_y)
1848		return false;
1849
1850	if (sna_picture_is_solid(mask, &color))
1851		return gen2_composite_solid_init(sna, mc, color);
1852
1853	if (sc->is_solid)
1854		return false;
1855
1856	if (src == mask) {
1857		DBG(("%s: mask is source\n", __FUNCTION__));
1858		*mc = *sc;
1859		mc->bo = kgem_bo_reference(mc->bo);
1860		return true;
1861	}
1862
1863	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
1864		return false;
1865
1866	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
1867
1868	if (!sna_transform_equal(src->transform, mask->transform))
1869		return false;
1870
1871	if (!sna_picture_alphamap_equal(src, mask))
1872		return false;
1873
1874	if (!gen2_check_repeat(mask))
1875		return false;
1876
1877	if (!gen2_check_filter(mask))
1878		return false;
1879
1880	if (!gen2_check_format(sna, mask))
1881		return false;
1882
1883	DBG(("%s: reusing source channel for mask with a twist\n",
1884	     __FUNCTION__));
1885
1886	*mc = *sc;
1887	mc->repeat = mask->repeat ? mask->repeatType : RepeatNone;
1888	mc->filter = mask->filter;
1889	mc->pict_format = mask->format;
1890	mc->bo = kgem_bo_reference(mc->bo);
1891	return true;
1892}
1893
1894static bool
1895gen2_render_composite(struct sna *sna,
1896		      uint8_t op,
1897		      PicturePtr src,
1898		      PicturePtr mask,
1899		      PicturePtr dst,
1900		      int16_t src_x,  int16_t src_y,
1901		      int16_t mask_x, int16_t mask_y,
1902		      int16_t dst_x,  int16_t dst_y,
1903		      int16_t width,  int16_t height,
1904		      unsigned flags,
1905		      struct sna_composite_op *tmp)
1906{
1907	DBG(("%s()\n", __FUNCTION__));
1908
1909	if (op >= ARRAY_SIZE(gen2_blend_op)) {
1910		DBG(("%s: fallback due to unhandled blend op: %d\n",
1911		     __FUNCTION__, op));
1912		return false;
1913	}
1914
1915	if (mask == NULL &&
1916	    sna_blt_composite(sna, op, src, dst,
1917			      src_x, src_y,
1918			      dst_x, dst_y,
1919			      width, height,
1920			      flags, tmp))
1921		return true;
1922
1923	if (gen2_composite_fallback(sna, src, mask, dst))
1924		goto fallback;
1925
1926	if (need_tiling(sna, width, height))
1927		return sna_tiling_composite(op, src, mask, dst,
1928					    src_x,  src_y,
1929					    mask_x, mask_y,
1930					    dst_x,  dst_y,
1931					    width,  height,
1932					    tmp);
1933
1934	tmp->op = op;
1935	sna_render_composite_redirect_init(tmp);
1936
1937	if (!gen2_composite_set_target(sna, tmp, dst,
1938				       dst_x, dst_y, width, height,
1939				       flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
1940		DBG(("%s: unable to set render target\n",
1941		     __FUNCTION__));
1942		goto fallback;
1943	}
1944
1945	switch (gen2_composite_picture(sna, src, &tmp->src,
1946				       src_x, src_y,
1947				       width, height,
1948				       dst_x, dst_y,
1949				       dst->polyMode == PolyModePrecise)) {
1950	case -1:
1951		DBG(("%s: fallback -- unable to prepare source\n",
1952		     __FUNCTION__));
1953		goto cleanup_dst;
1954	case 0:
1955		gen2_composite_solid_init(sna, &tmp->src, 0);
1956		break;
1957	case 1:
1958		if (mask == NULL && tmp->src.bo &&
1959		    sna_blt_composite__convert(sna,
1960					       dst_x, dst_y, width, height,
1961					       tmp))
1962			return true;
1963		break;
1964	}
1965
1966	if (mask) {
1967		if (!reuse_source(sna,
1968				  src, &tmp->src, src_x, src_y,
1969				  mask, &tmp->mask, mask_x, mask_y)) {
1970			switch (gen2_composite_picture(sna, mask, &tmp->mask,
1971						       mask_x, mask_y,
1972						       width,  height,
1973						       dst_x,  dst_y,
1974						       dst->polyMode == PolyModePrecise)) {
1975			case -1:
1976				DBG(("%s: fallback -- unable to prepare mask\n",
1977				     __FUNCTION__));
1978				goto cleanup_src;
1979			case 0:
1980				gen2_composite_solid_init(sna, &tmp->mask, 0);
1981			case 1:
1982				break;
1983			}
1984		}
1985
1986		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1987			/* Check if it's component alpha that relies on a source alpha
1988			 * and on the source value.  We can only get one of those
1989			 * into the single source value that we get to blend with.
1990			 */
1991			tmp->has_component_alpha = true;
1992			if (gen2_blend_op[op].src_alpha &&
1993			    (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) {
1994				if (op != PictOpOver) {
1995					DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n",
1996					     __FUNCTION__,
1997					     gen2_blend_op[op].src_blend));
1998					goto cleanup_src;
1999				}
2000
2001				tmp->need_magic_ca_pass = true;
2002				tmp->op = PictOpOutReverse;
2003			}
2004		}
2005
2006		/* convert solid to a texture (pure convenience) */
2007		if (tmp->mask.is_solid && tmp->src.is_solid) {
2008			assert(tmp->mask.is_affine);
2009			tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel);
2010			if (!tmp->mask.bo)
2011				goto cleanup_src;
2012		}
2013	}
2014
2015	tmp->floats_per_vertex = 2;
2016	if (!tmp->src.is_solid)
2017		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3;
2018	if (tmp->mask.bo)
2019		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3;
2020	tmp->floats_per_rect = 3*tmp->floats_per_vertex;
2021
2022	tmp->prim_emit = gen2_emit_composite_primitive;
2023	if (tmp->mask.bo) {
2024		if (tmp->mask.transform == NULL) {
2025			if (tmp->src.is_solid) {
2026				assert(tmp->floats_per_rect == 12);
2027#if defined(sse2) && !defined(__x86_64__)
2028				if (sna->cpu_features & SSE2) {
2029					tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask__sse2;
2030				} else
2031#endif
2032				{
2033					tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask;
2034				}
2035			}
2036		}
2037	} else {
2038		if (tmp->src.is_solid) {
2039			assert(tmp->floats_per_rect == 6);
2040#if defined(sse2) && !defined(__x86_64__)
2041			if (sna->cpu_features & SSE2) {
2042				tmp->prim_emit = gen2_emit_composite_primitive_constant__sse2;
2043			} else
2044#endif
2045			{
2046				tmp->prim_emit = gen2_emit_composite_primitive_constant;
2047			}
2048		} else if (tmp->src.is_linear) {
2049			assert(tmp->floats_per_rect == 12);
2050#if defined(sse2) && !defined(__x86_64__)
2051			if (sna->cpu_features & SSE2) {
2052				tmp->prim_emit = gen2_emit_composite_primitive_linear__sse2;
2053			} else
2054#endif
2055			{
2056				tmp->prim_emit = gen2_emit_composite_primitive_linear;
2057			}
2058		} else if (tmp->src.transform == NULL) {
2059			assert(tmp->floats_per_rect == 12);
2060#if defined(sse2) && !defined(__x86_64__)
2061			if (sna->cpu_features & SSE2) {
2062				tmp->prim_emit = gen2_emit_composite_primitive_identity__sse2;
2063			} else
2064#endif
2065			{
2066				tmp->prim_emit = gen2_emit_composite_primitive_identity;
2067			}
2068		} else if (tmp->src.is_affine) {
2069			assert(tmp->floats_per_rect == 12);
2070			tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
2071			tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
2072#if defined(sse2) && !defined(__x86_64__)
2073			if (sna->cpu_features & SSE2) {
2074				tmp->prim_emit = gen2_emit_composite_primitive_affine__sse2;
2075			} else
2076#endif
2077			{
2078				tmp->prim_emit = gen2_emit_composite_primitive_affine;
2079			}
2080		}
2081	}
2082
2083	tmp->blt   = gen2_render_composite_blt;
2084	tmp->box   = gen2_render_composite_box;
2085	tmp->boxes = gen2_render_composite_boxes;
2086	tmp->done  = gen2_render_composite_done;
2087
2088	if (!kgem_check_bo(&sna->kgem,
2089			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2090			   NULL)) {
2091		kgem_submit(&sna->kgem);
2092		if (!kgem_check_bo(&sna->kgem,
2093				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2094				   NULL)) {
2095			DBG(("%s: fallback, operation does not fit into GTT\n",
2096			     __FUNCTION__));
2097			goto cleanup_mask;
2098		}
2099	}
2100
2101	gen2_emit_composite_state(sna, tmp);
2102	return true;
2103
2104cleanup_mask:
2105	if (tmp->mask.bo) {
2106		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2107		tmp->mask.bo = NULL;
2108	}
2109cleanup_src:
2110	if (tmp->src.bo) {
2111		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2112		tmp->src.bo = NULL;
2113	}
2114cleanup_dst:
2115	if (tmp->redirect.real_bo) {
2116		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2117		tmp->redirect.real_bo = NULL;
2118	}
2119fallback:
2120	return (mask == NULL &&
2121		sna_blt_composite(sna, op, src, dst,
2122				  src_x, src_y,
2123				  dst_x, dst_y,
2124				  width, height,
2125				  flags | COMPOSITE_FALLBACK, tmp));
2126}
2127
2128fastcall static void
2129gen2_emit_composite_spans_primitive_constant(struct sna *sna,
2130					     const struct sna_composite_spans_op *op,
2131					     const BoxRec *box,
2132					     float opacity)
2133{
2134	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2135	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2136	sna->kgem.nbatch += 9;
2137
2138	v[0] = op->base.dst.x + box->x2;
2139	v[1] = op->base.dst.y + box->y2;
2140	*((uint32_t *)v + 2) = alpha;
2141
2142	v[3] = op->base.dst.x + box->x1;
2143	v[4] = v[1];
2144	*((uint32_t *)v + 5) = alpha;
2145
2146	v[6] = v[3];
2147	v[7] = op->base.dst.y + box->y1;
2148	*((uint32_t *)v + 8) = alpha;
2149}
2150
2151fastcall static void
2152gen2_emit_composite_spans_primitive_linear(struct sna *sna,
2153					     const struct sna_composite_spans_op *op,
2154					     const BoxRec *box,
2155					     float opacity)
2156{
2157	union {
2158		float f;
2159		uint32_t u;
2160	} alpha;
2161
2162	alpha.u = (uint8_t)(255 * opacity) << 24;
2163
2164	gen2_emit_composite_dstcoord(sna,
2165				     op->base.dst.x + box->x2,
2166				     op->base.dst.y + box->y2);
2167	VERTEX(alpha.f);
2168	gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2);
2169
2170	gen2_emit_composite_dstcoord(sna,
2171				     op->base.dst.x + box->x1,
2172				     op->base.dst.y + box->y2);
2173	VERTEX(alpha.f);
2174	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2);
2175
2176	gen2_emit_composite_dstcoord(sna,
2177				     op->base.dst.x + box->x1,
2178				     op->base.dst.y + box->y1);
2179	VERTEX(alpha.f);
2180	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1);
2181}
2182
2183fastcall static void
2184gen2_emit_composite_spans_primitive_identity_source(struct sna *sna,
2185						    const struct sna_composite_spans_op *op,
2186						    const BoxRec *box,
2187						    float opacity)
2188{
2189	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2190	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2191	sna->kgem.nbatch += 15;
2192
2193	v[0] = op->base.dst.x + box->x2;
2194	v[1] = op->base.dst.y + box->y2;
2195	*((uint32_t *)v + 2) = alpha;
2196	v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
2197	v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
2198
2199	v[5] = op->base.dst.x + box->x1;
2200	v[6] = v[1];
2201	*((uint32_t *)v + 7) = alpha;
2202	v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
2203	v[9] = v[4];
2204
2205	v[10] = v[5];
2206	v[11] = op->base.dst.y + box->y1;
2207	*((uint32_t *)v + 12) = alpha;
2208	v[13] = v[8];
2209	v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
2210}
2211
2212fastcall static void
2213gen2_emit_composite_spans_primitive_affine_source(struct sna *sna,
2214						  const struct sna_composite_spans_op *op,
2215						  const BoxRec *box,
2216						  float opacity)
2217{
2218	PictTransform *transform = op->base.src.transform;
2219	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2220	float *v;
2221
2222	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2223	sna->kgem.nbatch += 15;
2224
2225	v[0]  = op->base.dst.x + box->x2;
2226	v[6]  = v[1] = op->base.dst.y + box->y2;
2227	v[10] = v[5] = op->base.dst.x + box->x1;
2228	v[11] = op->base.dst.y + box->y1;
2229	*((uint32_t *)v + 2) = alpha;
2230	*((uint32_t *)v + 7) = alpha;
2231	*((uint32_t *)v + 12) = alpha;
2232
2233	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
2234				    (int)op->base.src.offset[1] + box->y2,
2235				    transform, op->base.src.scale,
2236				    &v[3], &v[4]);
2237
2238	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2239				    (int)op->base.src.offset[1] + box->y2,
2240				    transform, op->base.src.scale,
2241				    &v[8], &v[9]);
2242
2243	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2244				    (int)op->base.src.offset[1] + box->y1,
2245				    transform, op->base.src.scale,
2246				    &v[13], &v[14]);
2247}
2248
2249#if defined(sse2) && !defined(__x86_64__)
2250sse2 fastcall static void
2251gen2_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
2252						   const struct sna_composite_spans_op *op,
2253						   const BoxRec *box,
2254						   float opacity)
2255{
2256	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2257	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2258	sna->kgem.nbatch += 9;
2259
2260	v[0] = op->base.dst.x + box->x2;
2261	v[1] = op->base.dst.y + box->y2;
2262	*((uint32_t *)v + 2) = alpha;
2263
2264	v[3] = op->base.dst.x + box->x1;
2265	v[4] = v[1];
2266	*((uint32_t *)v + 5) = alpha;
2267
2268	v[6] = v[3];
2269	v[7] = op->base.dst.y + box->y1;
2270	*((uint32_t *)v + 8) = alpha;
2271}
2272
2273sse2 fastcall static void
2274gen2_emit_composite_spans_primitive_linear__sse2(struct sna *sna,
2275						 const struct sna_composite_spans_op *op,
2276						 const BoxRec *box,
2277						 float opacity)
2278{
2279	union {
2280		float f;
2281		uint32_t u;
2282	} alpha;
2283
2284	alpha.u = (uint8_t)(255 * opacity) << 24;
2285
2286	gen2_emit_composite_dstcoord(sna,
2287				     op->base.dst.x + box->x2,
2288				     op->base.dst.y + box->y2);
2289	VERTEX(alpha.f);
2290	gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2);
2291
2292	gen2_emit_composite_dstcoord(sna,
2293				     op->base.dst.x + box->x1,
2294				     op->base.dst.y + box->y2);
2295	VERTEX(alpha.f);
2296	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2);
2297
2298	gen2_emit_composite_dstcoord(sna,
2299				     op->base.dst.x + box->x1,
2300				     op->base.dst.y + box->y1);
2301	VERTEX(alpha.f);
2302	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1);
2303}
2304
2305sse2 fastcall static void
2306gen2_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna,
2307							  const struct sna_composite_spans_op *op,
2308							  const BoxRec *box,
2309							  float opacity)
2310{
2311	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2312	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2313	sna->kgem.nbatch += 15;
2314
2315	v[0] = op->base.dst.x + box->x2;
2316	v[1] = op->base.dst.y + box->y2;
2317	*((uint32_t *)v + 2) = alpha;
2318	v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
2319	v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
2320
2321	v[5] = op->base.dst.x + box->x1;
2322	v[6] = v[1];
2323	*((uint32_t *)v + 7) = alpha;
2324	v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
2325	v[9] = v[4];
2326
2327	v[10] = v[5];
2328	v[11] = op->base.dst.y + box->y1;
2329	*((uint32_t *)v + 12) = alpha;
2330	v[13] = v[8];
2331	v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
2332}
2333
2334sse2 fastcall static void
2335gen2_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna,
2336							const struct sna_composite_spans_op *op,
2337							const BoxRec *box,
2338							float opacity)
2339{
2340	PictTransform *transform = op->base.src.transform;
2341	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2342	float *v;
2343
2344	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2345	sna->kgem.nbatch += 15;
2346
2347	v[0]  = op->base.dst.x + box->x2;
2348	v[6]  = v[1] = op->base.dst.y + box->y2;
2349	v[10] = v[5] = op->base.dst.x + box->x1;
2350	v[11] = op->base.dst.y + box->y1;
2351	*((uint32_t *)v + 2) = alpha;
2352	*((uint32_t *)v + 7) = alpha;
2353	*((uint32_t *)v + 12) = alpha;
2354
2355	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
2356				    (int)op->base.src.offset[1] + box->y2,
2357				    transform, op->base.src.scale,
2358				    &v[3], &v[4]);
2359
2360	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2361				    (int)op->base.src.offset[1] + box->y2,
2362				    transform, op->base.src.scale,
2363				    &v[8], &v[9]);
2364
2365	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2366				    (int)op->base.src.offset[1] + box->y1,
2367				    transform, op->base.src.scale,
2368				    &v[13], &v[14]);
2369}
2370#endif
2371
2372static void
2373gen2_emit_composite_spans_vertex(struct sna *sna,
2374				 const struct sna_composite_spans_op *op,
2375				 int16_t x, int16_t y,
2376				 float opacity)
2377{
2378	gen2_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
2379	BATCH((uint8_t)(opacity * 255) << 24);
2380	assert(!op->base.src.is_solid);
2381	if (op->base.src.is_linear)
2382		gen2_emit_composite_linear(sna, &op->base.src, x, y);
2383	else
2384		gen2_emit_composite_texcoord(sna, &op->base.src, x, y);
2385}
2386
2387fastcall static void
2388gen2_emit_composite_spans_primitive(struct sna *sna,
2389				    const struct sna_composite_spans_op *op,
2390				    const BoxRec *box,
2391				    float opacity)
2392{
2393	gen2_emit_composite_spans_vertex(sna, op, box->x2, box->y2, opacity);
2394	gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y2, opacity);
2395	gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y1, opacity);
2396}
2397
2398static void
2399gen2_emit_spans_pipeline(struct sna *sna,
2400			 const struct sna_composite_spans_op *op)
2401{
2402	uint32_t cblend, ablend;
2403	uint32_t unwind;
2404
2405	cblend =
2406		TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULATE |
2407		TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA |
2408		TB0C_OUTPUT_WRITE_CURRENT;
2409	ablend =
2410		TB0A_RESULT_SCALE_1X | TB0A_OP_MODULATE |
2411		TB0A_ARG1_SEL_DIFFUSE |
2412		TB0A_OUTPUT_WRITE_CURRENT;
2413
2414	if (op->base.src.is_solid) {
2415		ablend |= TB0A_ARG2_SEL_SPECULAR;
2416		cblend |= TB0C_ARG2_SEL_SPECULAR;
2417		if (op->base.dst.format == PICT_a8)
2418			cblend |= TB0C_ARG2_REPLICATE_ALPHA;
2419	} else if (op->base.dst.format == PICT_a8) {
2420		ablend |= TB0A_ARG2_SEL_TEXEL0;
2421		cblend |= TB0C_ARG2_SEL_TEXEL0 | TB0C_ARG2_REPLICATE_ALPHA;
2422	} else {
2423		if (PICT_FORMAT_RGB(op->base.src.pict_format) != 0)
2424			cblend |= TB0C_ARG2_SEL_TEXEL0;
2425		else
2426			cblend |= TB0C_ARG2_SEL_ONE | TB0C_ARG2_INVERT;
2427
2428		if (op->base.src.is_opaque)
2429			ablend |= TB0A_ARG2_SEL_ONE;
2430		else
2431			ablend |= TB0A_ARG2_SEL_TEXEL0;
2432	}
2433
2434	unwind = sna->kgem.nbatch;
2435	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
2436	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
2437	BATCH(cblend);
2438	BATCH(ablend);
2439	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
2440		   sna->kgem.batch + unwind + 1,
2441		   2 * sizeof(uint32_t)) == 0)
2442		sna->kgem.nbatch = unwind;
2443	else
2444		sna->render_state.gen2.ls2 = unwind;
2445}
2446
2447static void gen2_emit_composite_spans_state(struct sna *sna,
2448					    const struct sna_composite_spans_op *op)
2449{
2450	uint32_t unwind;
2451
2452	gen2_get_batch(sna, &op->base);
2453	gen2_emit_target(sna,
2454			 op->base.dst.bo,
2455			 op->base.dst.width,
2456			 op->base.dst.height,
2457			 op->base.dst.format);
2458
2459	unwind = sna->kgem.nbatch;
2460	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
2461	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
2462	BATCH(!op->base.src.is_solid << 12);
2463	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY | S3_DIFFUSE_PRESENT);
2464	BATCH(gen2_get_blend_cntl(op->base.op, false, op->base.dst.format));
2465	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
2466		   sna->kgem.batch + unwind + 1,
2467		   3 * sizeof(uint32_t)) == 0)
2468		sna->kgem.nbatch = unwind;
2469	else
2470		sna->render_state.gen2.ls1 = unwind;
2471
2472	gen2_disable_logic_op(sna);
2473	gen2_emit_spans_pipeline(sna, op);
2474
2475	if (op->base.src.is_solid) {
2476		if (op->base.src.u.gen2.pixel != sna->render_state.gen2.specular) {
2477			BATCH(_3DSTATE_DFLT_SPECULAR_CMD);
2478			BATCH(op->base.src.u.gen2.pixel);
2479			sna->render_state.gen2.specular = op->base.src.u.gen2.pixel;
2480		}
2481	} else {
2482		uint32_t v =_3DSTATE_VERTEX_FORMAT_2_CMD |
2483			(op->base.src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_3D);
2484		if (sna->render_state.gen2.vft != v) {
2485			BATCH(v);
2486			sna->render_state.gen2.vft = v;
2487		}
2488		gen2_emit_texture(sna, &op->base.src, 0);
2489	}
2490}
2491
2492fastcall static void
2493gen2_render_composite_spans_box(struct sna *sna,
2494				const struct sna_composite_spans_op *op,
2495				const BoxRec *box, float opacity)
2496{
2497	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2498	     __FUNCTION__,
2499	     op->base.src.offset[0], op->base.src.offset[1],
2500	     opacity,
2501	     op->base.dst.x, op->base.dst.y,
2502	     box->x1, box->y1,
2503	     box->x2 - box->x1,
2504	     box->y2 - box->y1));
2505
2506	if (gen2_get_rectangles(sna, &op->base, 1) == 0) {
2507		gen2_emit_composite_spans_state(sna, op);
2508		gen2_get_rectangles(sna, &op->base, 1);
2509	}
2510
2511	op->prim_emit(sna, op, box, opacity);
2512}
2513
2514static void
2515gen2_render_composite_spans_boxes(struct sna *sna,
2516				  const struct sna_composite_spans_op *op,
2517				  const BoxRec *box, int nbox,
2518				  float opacity)
2519{
2520	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2521	     __FUNCTION__, nbox,
2522	     op->base.src.offset[0], op->base.src.offset[1],
2523	     opacity,
2524	     op->base.dst.x, op->base.dst.y));
2525
2526	do {
2527		int nbox_this_time;
2528
2529		nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2530		if (nbox_this_time == 0) {
2531			gen2_emit_composite_spans_state(sna, op);
2532			nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2533		}
2534		nbox -= nbox_this_time;
2535
2536		do {
2537			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2538			     box->x1, box->y1,
2539			     box->x2 - box->x1,
2540			     box->y2 - box->y1));
2541
2542			op->prim_emit(sna, op, box++, opacity);
2543		} while (--nbox_this_time);
2544	} while (nbox);
2545}
2546
2547fastcall static void
2548gen2_render_composite_spans_done(struct sna *sna,
2549				 const struct sna_composite_spans_op *op)
2550{
2551	DBG(("%s()\n", __FUNCTION__));
2552
2553	gen2_vertex_flush(sna, &op->base);
2554
2555	if (op->base.src.bo)
2556		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2557
2558	sna_render_composite_redirect_done(sna, &op->base);
2559}
2560
2561static bool
2562gen2_check_composite_spans(struct sna *sna,
2563			   uint8_t op, PicturePtr src, PicturePtr dst,
2564			   int16_t width, int16_t height, unsigned flags)
2565{
2566	if (op >= ARRAY_SIZE(gen2_blend_op))
2567		return false;
2568
2569	if (gen2_composite_fallback(sna, src, NULL, dst))
2570		return false;
2571
2572	if (need_tiling(sna, width, height)) {
2573		if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2574			DBG(("%s: fallback, tiled operation not on GPU\n",
2575			     __FUNCTION__));
2576			return false;
2577		}
2578	}
2579
2580	return true;
2581}
2582
2583static bool
2584gen2_render_composite_spans(struct sna *sna,
2585			    uint8_t op,
2586			    PicturePtr src,
2587			    PicturePtr dst,
2588			    int16_t src_x,  int16_t src_y,
2589			    int16_t dst_x,  int16_t dst_y,
2590			    int16_t width,  int16_t height,
2591			    unsigned flags,
2592			    struct sna_composite_spans_op *tmp)
2593{
2594	DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
2595	     src_x, src_y, dst_x, dst_y, width, height));
2596
2597	assert(gen2_check_composite_spans(sna, op, src, dst, width, height, flags));
2598	if (need_tiling(sna, width, height)) {
2599		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2600		     __FUNCTION__, width, height));
2601		return sna_tiling_composite_spans(op, src, dst,
2602						  src_x, src_y, dst_x, dst_y,
2603						  width, height, flags, tmp);
2604	}
2605
2606	tmp->base.op = op;
2607	sna_render_composite_redirect_init(&tmp->base);
2608	if (!gen2_composite_set_target(sna, &tmp->base, dst,
2609				       dst_x, dst_y, width, height,
2610				       true)) {
2611		DBG(("%s: unable to set render target\n",
2612		     __FUNCTION__));
2613		return false;
2614	}
2615
2616	switch (gen2_composite_picture(sna, src, &tmp->base.src,
2617				       src_x, src_y,
2618				       width, height,
2619				       dst_x, dst_y,
2620				       dst->polyMode == PolyModePrecise)) {
2621	case -1:
2622		goto cleanup_dst;
2623	case 0:
2624		gen2_composite_solid_init(sna, &tmp->base.src, 0);
2625	case 1:
2626		break;
2627	}
2628	assert(tmp->base.src.bo || tmp->base.src.is_solid);
2629
2630	tmp->prim_emit = gen2_emit_composite_spans_primitive;
2631	tmp->base.floats_per_vertex = 3;
2632	if (tmp->base.src.is_solid) {
2633#if defined(sse2) && !defined(__x86_64__)
2634		if (sna->cpu_features & SSE2) {
2635			tmp->prim_emit = gen2_emit_composite_spans_primitive_constant__sse2;
2636		} else
2637#endif
2638		{
2639			tmp->prim_emit = gen2_emit_composite_spans_primitive_constant;
2640		}
2641	} else if (tmp->base.src.is_linear) {
2642		tmp->base.floats_per_vertex += 2;
2643#if defined(sse2) && !defined(__x86_64__)
2644		if (sna->cpu_features & SSE2) {
2645			tmp->prim_emit = gen2_emit_composite_spans_primitive_linear__sse2;
2646		} else
2647#endif
2648		{
2649			tmp->prim_emit = gen2_emit_composite_spans_primitive_linear;
2650		}
2651	} else {
2652		assert(tmp->base.src.bo);
2653		tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
2654		if (tmp->base.src.transform == NULL) {
2655#if defined(sse2) && !defined(__x86_64__)
2656			if (sna->cpu_features & SSE2) {
2657				tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source__sse2;
2658			} else
2659#endif
2660			{
2661				tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source;
2662			}
2663		} else if (tmp->base.src.is_affine) {
2664			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
2665			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
2666#if defined(sse2) && !defined(__x86_64__)
2667			if (sna->cpu_features & SSE2) {
2668				tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source__sse2;
2669			} else
2670#endif
2671			{
2672				tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source;
2673			}
2674		}
2675	}
2676	tmp->base.mask.bo = NULL;
2677	tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex;
2678
2679	tmp->box   = gen2_render_composite_spans_box;
2680	tmp->boxes = gen2_render_composite_spans_boxes;
2681	tmp->done  = gen2_render_composite_spans_done;
2682
2683	if (!kgem_check_bo(&sna->kgem,
2684			   tmp->base.dst.bo, tmp->base.src.bo,
2685			   NULL)) {
2686		kgem_submit(&sna->kgem);
2687		if (!kgem_check_bo(&sna->kgem,
2688				   tmp->base.dst.bo, tmp->base.src.bo,
2689				   NULL))
2690			goto cleanup_src;
2691	}
2692
2693	gen2_emit_composite_spans_state(sna, tmp);
2694	return true;
2695
2696cleanup_src:
2697	if (tmp->base.src.bo)
2698		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2699cleanup_dst:
2700	if (tmp->base.redirect.real_bo)
2701		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2702	return false;
2703}
2704
2705static void
2706gen2_emit_fill_pipeline(struct sna *sna, const struct sna_composite_op *op)
2707{
2708	uint32_t blend, unwind;
2709
2710	unwind = sna->kgem.nbatch;
2711	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
2712	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
2713
2714	blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 |
2715		TB0C_ARG1_SEL_DIFFUSE |
2716		TB0C_OUTPUT_WRITE_CURRENT;
2717	if (op->dst.format == PICT_a8)
2718		blend |= TB0C_ARG1_REPLICATE_ALPHA;
2719	BATCH(blend);
2720
2721	BATCH(TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 |
2722	      TB0A_ARG1_SEL_DIFFUSE |
2723	      TB0A_OUTPUT_WRITE_CURRENT);
2724
2725	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
2726		   sna->kgem.batch + unwind + 1,
2727		   2 * sizeof(uint32_t)) == 0)
2728		sna->kgem.nbatch = unwind;
2729	else
2730		sna->render_state.gen2.ls2 = unwind;
2731}
2732
2733static void gen2_emit_fill_composite_state(struct sna *sna,
2734					   const struct sna_composite_op *op,
2735					   uint32_t pixel)
2736{
2737	uint32_t ls1;
2738
2739	gen2_get_batch(sna, op);
2740	gen2_emit_target(sna,
2741			 op->dst.bo,
2742			 op->dst.width,
2743			 op->dst.height,
2744			 op->dst.format);
2745
2746	ls1 = sna->kgem.nbatch;
2747	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
2748	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
2749	BATCH(0);
2750	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
2751	BATCH(gen2_get_blend_cntl(op->op, false, op->dst.format));
2752	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
2753		   sna->kgem.batch + ls1 + 1,
2754		   3 * sizeof(uint32_t)) == 0)
2755		sna->kgem.nbatch = ls1;
2756	else
2757		sna->render_state.gen2.ls1 = ls1;
2758
2759	gen2_emit_fill_pipeline(sna, op);
2760
2761	if (pixel != sna->render_state.gen2.diffuse) {
2762		BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
2763		BATCH(pixel);
2764		sna->render_state.gen2.diffuse = pixel;
2765	}
2766}
2767
2768static bool
2769gen2_render_fill_boxes_try_blt(struct sna *sna,
2770			       CARD8 op, PictFormat format,
2771			       const xRenderColor *color,
2772			       const DrawableRec *dst, struct kgem_bo *dst_bo,
2773			       const BoxRec *box, int n)
2774{
2775	uint8_t alu;
2776	uint32_t pixel;
2777
2778	if (op > PictOpSrc)
2779		return false;
2780
2781	if (op == PictOpClear) {
2782		alu = GXclear;
2783		pixel = 0;
2784	} else if (!sna_get_pixel_from_rgba(&pixel,
2785					    color->red,
2786					    color->green,
2787					    color->blue,
2788					    color->alpha,
2789					    format))
2790		return false;
2791	else
2792		alu = GXcopy;
2793
2794	return sna_blt_fill_boxes(sna, alu,
2795				  dst_bo, dst->bitsPerPixel,
2796				  pixel, box, n);
2797}
2798
2799static bool
2800gen2_render_fill_boxes(struct sna *sna,
2801		       CARD8 op,
2802		       PictFormat format,
2803		       const xRenderColor *color,
2804		       const DrawableRec *dst, struct kgem_bo *dst_bo,
2805		       const BoxRec *box, int n)
2806{
2807	struct sna_composite_op tmp;
2808	uint32_t pixel;
2809
2810	if (op >= ARRAY_SIZE(gen2_blend_op)) {
2811		DBG(("%s: fallback due to unhandled blend op: %d\n",
2812		     __FUNCTION__, op));
2813		return false;
2814	}
2815
2816#if NO_FILL_BOXES
2817	return gen2_render_fill_boxes_try_blt(sna, op, format, color,
2818					      dst, dst_bo,
2819					      box, n);
2820#endif
2821	if (gen2_render_fill_boxes_try_blt(sna, op, format, color,
2822					   dst, dst_bo,
2823					   box, n))
2824		return true;
2825
2826
2827	DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
2828	     __FUNCTION__, op, (int)format,
2829	     color->red, color->green, color->blue, color->alpha));
2830
2831	if (too_large(dst->width, dst->height) ||
2832	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH ||
2833	    !gen2_check_dst_format(format)) {
2834		DBG(("%s: try blt, too large or incompatible destination\n",
2835		     __FUNCTION__));
2836		if (!gen2_check_dst_format(format))
2837			return false;
2838
2839		assert(dst_bo->pitch >= 8);
2840		return sna_tiling_fill_boxes(sna, op, format, color,
2841					     dst, dst_bo, box, n);
2842	}
2843
2844	if (op == PictOpClear)
2845		pixel = 0;
2846	else if (!sna_get_pixel_from_rgba(&pixel,
2847					  color->red,
2848					  color->green,
2849					  color->blue,
2850					  color->alpha,
2851					  PICT_a8r8g8b8))
2852		return false;
2853
2854	DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n",
2855	     __FUNCTION__, op, (int)format, pixel));
2856
2857	memset(&tmp, 0, sizeof(tmp));
2858	tmp.op = op;
2859	tmp.dst.pixmap = (PixmapPtr)dst;
2860	tmp.dst.width = dst->width;
2861	tmp.dst.height = dst->height;
2862	tmp.dst.format = format;
2863	tmp.dst.bo = dst_bo;
2864	tmp.floats_per_vertex = 2;
2865	tmp.floats_per_rect = 6;
2866
2867	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2868		kgem_submit(&sna->kgem);
2869		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
2870			return false;
2871	}
2872
2873	gen2_emit_fill_composite_state(sna, &tmp, pixel);
2874
2875	do {
2876		int n_this_time = gen2_get_rectangles(sna, &tmp, n);
2877		if (n_this_time == 0) {
2878			gen2_emit_fill_composite_state(sna, &tmp, pixel);
2879			n_this_time = gen2_get_rectangles(sna, &tmp, n);
2880		}
2881		n -= n_this_time;
2882
2883		do {
2884			DBG(("	(%d, %d), (%d, %d): %x\n",
2885			     box->x1, box->y1, box->x2, box->y2, pixel));
2886			VERTEX(box->x2);
2887			VERTEX(box->y2);
2888			VERTEX(box->x1);
2889			VERTEX(box->y2);
2890			VERTEX(box->x1);
2891			VERTEX(box->y1);
2892			box++;
2893		} while (--n_this_time);
2894	} while (n);
2895
2896	gen2_vertex_flush(sna, &tmp);
2897	return true;
2898}
2899
2900static void gen2_emit_fill_state(struct sna *sna,
2901				 const struct sna_composite_op *op)
2902{
2903	uint32_t ls1;
2904
2905	gen2_get_batch(sna, op);
2906	gen2_emit_target(sna,
2907			 op->dst.bo,
2908			 op->dst.width,
2909			 op->dst.height,
2910			 op->dst.format);
2911
2912	ls1 = sna->kgem.nbatch;
2913	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
2914	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
2915	BATCH(0);
2916	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
2917	BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
2918	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
2919		   sna->kgem.batch + ls1 + 1,
2920		   3 * sizeof(uint32_t)) == 0)
2921		sna->kgem.nbatch = ls1;
2922	else
2923		sna->render_state.gen2.ls1 = ls1;
2924
2925	gen2_enable_logic_op(sna, op->op);
2926	gen2_emit_fill_pipeline(sna, op);
2927
2928	if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) {
2929		BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
2930		BATCH(op->src.u.gen2.pixel);
2931		sna->render_state.gen2.diffuse = op->src.u.gen2.pixel;
2932	}
2933}
2934
2935static void
2936gen2_render_fill_op_blt(struct sna *sna,
2937			const struct sna_fill_op *op,
2938			int16_t x, int16_t y, int16_t w, int16_t h)
2939{
2940	if (!gen2_get_rectangles(sna, &op->base, 1)) {
2941		gen2_emit_fill_state(sna, &op->base);
2942		gen2_get_rectangles(sna, &op->base, 1);
2943	}
2944
2945	VERTEX(x+w);
2946	VERTEX(y+h);
2947	VERTEX(x);
2948	VERTEX(y+h);
2949	VERTEX(x);
2950	VERTEX(y);
2951}
2952
2953fastcall static void
2954gen2_render_fill_op_box(struct sna *sna,
2955			const struct sna_fill_op *op,
2956			const BoxRec *box)
2957{
2958	if (!gen2_get_rectangles(sna, &op->base, 1)) {
2959		gen2_emit_fill_state(sna, &op->base);
2960		gen2_get_rectangles(sna, &op->base, 1);
2961	}
2962
2963	VERTEX(box->x2);
2964	VERTEX(box->y2);
2965	VERTEX(box->x1);
2966	VERTEX(box->y2);
2967	VERTEX(box->x1);
2968	VERTEX(box->y1);
2969}
2970
2971fastcall static void
2972gen2_render_fill_op_boxes(struct sna *sna,
2973			  const struct sna_fill_op *op,
2974			  const BoxRec *box,
2975			  int nbox)
2976{
2977	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
2978	     box->x1, box->y1, box->x2, box->y2, nbox));
2979
2980	do {
2981		int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2982		if (nbox_this_time == 0) {
2983			gen2_emit_fill_state(sna, &op->base);
2984			nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2985		}
2986		nbox -= nbox_this_time;
2987
2988		do {
2989			VERTEX(box->x2);
2990			VERTEX(box->y2);
2991			VERTEX(box->x1);
2992			VERTEX(box->y2);
2993			VERTEX(box->x1);
2994			VERTEX(box->y1);
2995			box++;
2996		} while (--nbox_this_time);
2997	} while (nbox);
2998}
2999
3000static void
3001gen2_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
3002{
3003	gen2_vertex_flush(sna, &op->base);
3004}
3005
3006static bool
3007gen2_render_fill(struct sna *sna, uint8_t alu,
3008		 PixmapPtr dst, struct kgem_bo *dst_bo,
3009		 uint32_t color, unsigned flags,
3010		 struct sna_fill_op *tmp)
3011{
3012#if NO_FILL
3013	return sna_blt_fill(sna, alu,
3014			    dst_bo, dst->drawable.bitsPerPixel,
3015			    color,
3016			    tmp);
3017#endif
3018
3019	/* Prefer to use the BLT if already engaged */
3020	if (sna_blt_fill(sna, alu,
3021			 dst_bo, dst->drawable.bitsPerPixel,
3022			 color,
3023			 tmp))
3024		return true;
3025
3026	/* Must use the BLT if we can't RENDER... */
3027	if (too_large(dst->drawable.width, dst->drawable.height) ||
3028	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH)
3029		return false;
3030
3031	tmp->base.op = alu;
3032	tmp->base.dst.pixmap = dst;
3033	tmp->base.dst.width = dst->drawable.width;
3034	tmp->base.dst.height = dst->drawable.height;
3035	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3036	tmp->base.dst.bo = dst_bo;
3037	tmp->base.dst.x = tmp->base.dst.y = 0;
3038	tmp->base.floats_per_vertex = 2;
3039	tmp->base.floats_per_rect = 6;
3040
3041	tmp->base.src.u.gen2.pixel =
3042		sna_rgba_for_color(color, dst->drawable.depth);
3043
3044	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3045		kgem_submit(&sna->kgem);
3046		return sna_blt_fill(sna, alu,
3047				    dst_bo, dst->drawable.bitsPerPixel,
3048				    color,
3049				    tmp);
3050	}
3051
3052	tmp->blt   = gen2_render_fill_op_blt;
3053	tmp->box   = gen2_render_fill_op_box;
3054	tmp->boxes = gen2_render_fill_op_boxes;
3055	tmp->points = NULL;
3056	tmp->done  = gen2_render_fill_op_done;
3057
3058	gen2_emit_fill_state(sna, &tmp->base);
3059	return true;
3060}
3061
3062static bool
3063gen2_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3064			     uint32_t color,
3065			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3066			     uint8_t alu)
3067{
3068	BoxRec box;
3069
3070	box.x1 = x1;
3071	box.y1 = y1;
3072	box.x2 = x2;
3073	box.y2 = y2;
3074
3075	return sna_blt_fill_boxes(sna, alu,
3076				  bo, dst->drawable.bitsPerPixel,
3077				  color, &box, 1);
3078}
3079
3080static bool
3081gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3082		     uint32_t color,
3083		     int16_t x1, int16_t y1,
3084		     int16_t x2, int16_t y2,
3085		     uint8_t alu)
3086{
3087	struct sna_composite_op tmp;
3088
3089#if NO_FILL_ONE
3090	return gen2_render_fill_one_try_blt(sna, dst, bo, color,
3091					    x1, y1, x2, y2, alu);
3092#endif
3093
3094	/* Prefer to use the BLT if already engaged */
3095	if (gen2_render_fill_one_try_blt(sna, dst, bo, color,
3096					 x1, y1, x2, y2, alu))
3097		return true;
3098
3099	/* Must use the BLT if we can't RENDER... */
3100	if (too_large(dst->drawable.width, dst->drawable.height) ||
3101	    bo->pitch < 8 || bo->pitch > MAX_3D_PITCH)
3102		return false;
3103
3104	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3105		kgem_submit(&sna->kgem);
3106
3107		if (gen2_render_fill_one_try_blt(sna, dst, bo, color,
3108						 x1, y1, x2, y2, alu))
3109			return true;
3110
3111		if (!kgem_check_bo(&sna->kgem, bo, NULL))
3112			return false;
3113	}
3114
3115	tmp.op = alu;
3116	tmp.dst.pixmap = dst;
3117	tmp.dst.width = dst->drawable.width;
3118	tmp.dst.height = dst->drawable.height;
3119	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3120	tmp.dst.bo = bo;
3121	tmp.floats_per_vertex = 2;
3122	tmp.floats_per_rect = 6;
3123	tmp.need_magic_ca_pass = false;
3124
3125	tmp.src.u.gen2.pixel =
3126		sna_rgba_for_color(color, dst->drawable.depth);
3127
3128	gen2_emit_fill_state(sna, &tmp);
3129	gen2_get_rectangles(sna, &tmp, 1);
3130	DBG(("%s: (%d, %d), (%d, %d): %x\n", __FUNCTION__,
3131	     x1, y1, x2, y2, tmp.src.u.gen2.pixel));
3132	VERTEX(x2);
3133	VERTEX(y2);
3134	VERTEX(x1);
3135	VERTEX(y2);
3136	VERTEX(x1);
3137	VERTEX(y1);
3138	gen2_vertex_flush(sna, &tmp);
3139
3140	return true;
3141}
3142
3143static void
3144gen2_emit_video_state(struct sna *sna,
3145		      struct sna_video *video,
3146		      struct sna_video_frame *frame,
3147		      PixmapPtr pixmap,
3148		      struct kgem_bo *dst_bo,
3149		      int width, int height,
3150		      bool bilinear)
3151{
3152	uint32_t ms1, v, unwind;
3153
3154	gen2_emit_target(sna, dst_bo, width, height,
3155			 sna_format_for_depth(pixmap->drawable.depth));
3156
3157	unwind = sna->kgem.nbatch;
3158	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
3159	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
3160	BATCH(1 << 12);
3161	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
3162	BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
3163	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
3164		   sna->kgem.batch + unwind + 1,
3165		   3 * sizeof(uint32_t)) == 0)
3166		sna->kgem.nbatch = unwind;
3167	else
3168		sna->render_state.gen2.ls1 = unwind;
3169
3170	gen2_disable_logic_op(sna);
3171
3172	unwind = sna->kgem.nbatch;
3173	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
3174	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
3175	BATCH(TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT |
3176	      TB0C_OP_ARG1 | TB0C_ARG1_SEL_TEXEL0);
3177	BATCH(TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT |
3178	      TB0A_OP_ARG1 | TB0A_ARG1_SEL_ONE);
3179	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
3180		   sna->kgem.batch + unwind + 1,
3181		   2 * sizeof(uint32_t)) == 0)
3182		sna->kgem.nbatch = unwind;
3183	else
3184		sna->render_state.gen2.ls2 = unwind;
3185
3186	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(0) | 4);
3187	BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
3188			     frame->bo,
3189			     I915_GEM_DOMAIN_SAMPLER << 16,
3190			     0));
3191	ms1 = MAPSURF_422 | TM0S1_COLORSPACE_CONVERSION;
3192	switch (frame->id) {
3193	case FOURCC_YUY2:
3194		ms1 |= MT_422_YCRCB_NORMAL;
3195		break;
3196	case FOURCC_UYVY:
3197		ms1 |= MT_422_YCRCB_SWAPY;
3198		break;
3199	}
3200	BATCH(((frame->height - 1) << TM0S1_HEIGHT_SHIFT) |
3201	      ((frame->width - 1)  << TM0S1_WIDTH_SHIFT) |
3202	      ms1 |
3203	      gen2_sampler_tiling_bits(frame->bo->tiling));
3204	BATCH((frame->pitch[0] / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D);
3205	if (bilinear)
3206		BATCH(FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT |
3207		      FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT |
3208		      MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
3209	else
3210		BATCH(FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT |
3211		      FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT |
3212		      MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
3213	BATCH(0);	/* default color */
3214
3215	BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(0) |
3216	      ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | TEXCOORDTYPE_CARTESIAN |
3217	      ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP) |
3218	      ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP));
3219
3220	v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D;
3221	if (sna->render_state.gen2.vft != v) {
3222		BATCH(v);
3223		sna->render_state.gen2.vft = v;
3224	}
3225}
3226
3227static void
3228gen2_video_get_batch(struct sna *sna, struct kgem_bo *bo)
3229{
3230	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3231
3232	if (!kgem_check_batch(&sna->kgem, 120) ||
3233	    !kgem_check_reloc(&sna->kgem, 4) ||
3234	    !kgem_check_exec(&sna->kgem, 2)) {
3235		_kgem_submit(&sna->kgem);
3236		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3237	}
3238
3239	if (sna->render_state.gen2.need_invariant)
3240		gen2_emit_invariant(sna);
3241}
3242
3243static int
3244gen2_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
3245{
3246	int size = floats_per_vertex * 3;
3247	int rem = batch_space(sna) - 1;
3248
3249	if (rem > MAX_INLINE)
3250		rem = MAX_INLINE;
3251
3252	if (size * want > rem)
3253		want = rem / size;
3254
3255	return want;
3256}
3257
3258static bool
3259gen2_render_video(struct sna *sna,
3260		  struct sna_video *video,
3261		  struct sna_video_frame *frame,
3262		  RegionPtr dstRegion,
3263		  PixmapPtr pixmap)
3264{
3265	struct sna_pixmap *priv = sna_pixmap(pixmap);
3266	const BoxRec *pbox = region_rects(dstRegion);
3267	int nbox = region_num_rects(dstRegion);
3268	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
3269	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
3270	int src_width = frame->src.x2 - frame->src.x1;
3271	int src_height = frame->src.y2 - frame->src.y1;
3272	float src_offset_x, src_offset_y;
3273	float src_scale_x, src_scale_y;
3274	int pix_xoff, pix_yoff;
3275	struct kgem_bo *dst_bo;
3276	bool bilinear;
3277	int copy = 0;
3278
3279	DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__,
3280	     src_width, src_height, frame->width, frame->height, dst_width, dst_height));
3281
3282	assert(priv->gpu_bo);
3283	dst_bo = priv->gpu_bo;
3284
3285	bilinear = src_width != dst_width || src_height != dst_height;
3286
3287	src_scale_x = (float)src_width / dst_width / frame->width;
3288	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
3289
3290	src_scale_y = (float)src_height / dst_height / frame->height;
3291	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
3292	DBG(("%s: src offset (%f, %f), scale (%f, %f)\n",
3293	     __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y));
3294
3295	if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
3296	    dst_bo->pitch > MAX_3D_PITCH) {
3297		int bpp = pixmap->drawable.bitsPerPixel;
3298
3299		if (too_large(dst_width, dst_height))
3300			return false;
3301
3302		dst_bo = kgem_create_2d(&sna->kgem,
3303					dst_width, dst_height, bpp,
3304					kgem_choose_tiling(&sna->kgem,
3305							   I915_TILING_X,
3306							   dst_width, dst_height, bpp),
3307					0);
3308		if (!dst_bo)
3309			return false;
3310
3311		pix_xoff = -dstRegion->extents.x1;
3312		pix_yoff = -dstRegion->extents.y1;
3313		copy = 1;
3314	} else {
3315		/* Set up the offset for translating from the given region
3316		 * (in screen coordinates) to the backing pixmap.
3317		 */
3318#ifdef COMPOSITE
3319		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
3320		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
3321#else
3322		pix_xoff = 0;
3323		pix_yoff = 0;
3324#endif
3325
3326		dst_width  = pixmap->drawable.width;
3327		dst_height = pixmap->drawable.height;
3328	}
3329
3330	gen2_video_get_batch(sna, dst_bo);
3331	gen2_emit_video_state(sna, video, frame, pixmap,
3332			      dst_bo, dst_width, dst_height, bilinear);
3333	do {
3334		int nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4);
3335		if (nbox_this_time == 0) {
3336			gen2_video_get_batch(sna, dst_bo);
3337			gen2_emit_video_state(sna, video, frame, pixmap,
3338					      dst_bo, dst_width, dst_height, bilinear);
3339			nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4);
3340			assert(nbox_this_time);
3341		}
3342		nbox -= nbox_this_time;
3343
3344		BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST |
3345		      ((12 * nbox_this_time) - 1));
3346		do {
3347			int box_x1 = pbox->x1;
3348			int box_y1 = pbox->y1;
3349			int box_x2 = pbox->x2;
3350			int box_y2 = pbox->y2;
3351
3352			pbox++;
3353
3354			DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n",
3355			     __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff,
3356			     box_x1 * src_scale_x + src_offset_x,
3357			     box_y1 * src_scale_y + src_offset_y,
3358			     box_x2 * src_scale_x + src_offset_x,
3359			     box_y2 * src_scale_y + src_offset_y));
3360
3361			/* bottom right */
3362			BATCH_F(box_x2 + pix_xoff);
3363			BATCH_F(box_y2 + pix_yoff);
3364			BATCH_F(box_x2 * src_scale_x + src_offset_x);
3365			BATCH_F(box_y2 * src_scale_y + src_offset_y);
3366
3367			/* bottom left */
3368			BATCH_F(box_x1 + pix_xoff);
3369			BATCH_F(box_y2 + pix_yoff);
3370			BATCH_F(box_x1 * src_scale_x + src_offset_x);
3371			BATCH_F(box_y2 * src_scale_y + src_offset_y);
3372
3373			/* top left */
3374			BATCH_F(box_x1 + pix_xoff);
3375			BATCH_F(box_y1 + pix_yoff);
3376			BATCH_F(box_x1 * src_scale_x + src_offset_x);
3377			BATCH_F(box_y1 * src_scale_y + src_offset_y);
3378		} while (--nbox_this_time);
3379	} while (nbox);
3380
3381	if (copy) {
3382#ifdef COMPOSITE
3383		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
3384		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
3385#else
3386		pix_xoff = 0;
3387		pix_yoff = 0;
3388#endif
3389		sna_blt_copy_boxes(sna, GXcopy,
3390				   dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1,
3391				   priv->gpu_bo, pix_xoff, pix_yoff,
3392				   pixmap->drawable.bitsPerPixel,
3393				   region_rects(dstRegion),
3394				   region_num_rects(dstRegion));
3395
3396		kgem_bo_destroy(&sna->kgem, dst_bo);
3397	}
3398
3399	if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
3400		if ((pix_xoff | pix_yoff) == 0) {
3401			sna_damage_add(&priv->gpu_damage, dstRegion);
3402		} else {
3403			sna_damage_add_boxes(&priv->gpu_damage,
3404					     region_rects(dstRegion),
3405					     region_num_rects(dstRegion),
3406					     pix_xoff, pix_yoff);
3407		}
3408	}
3409
3410	return true;
3411}
3412
3413static void
3414gen2_render_copy_setup_source(struct sna_composite_channel *channel,
3415			      const DrawableRec *draw,
3416			      struct kgem_bo *bo)
3417{
3418	assert(draw->width && draw->height);
3419
3420	channel->filter = PictFilterNearest;
3421	channel->repeat = RepeatNone;
3422	channel->width  = draw->width;
3423	channel->height = draw->height;
3424	channel->scale[0] = 1.f/draw->width;
3425	channel->scale[1] = 1.f/draw->height;
3426	channel->offset[0] = 0;
3427	channel->offset[1] = 0;
3428	channel->pict_format = sna_format_for_depth(draw->depth);
3429	channel->bo = bo;
3430	channel->is_affine = 1;
3431
3432	DBG(("%s: source=%d, (%dx%d), format=%08x\n",
3433	     __FUNCTION__, bo->handle,
3434	     channel->width, channel->height,
3435	     channel->pict_format));
3436}
3437
3438static void
3439gen2_emit_copy_pipeline(struct sna *sna, const struct sna_composite_op *op)
3440{
3441	uint32_t blend, unwind;
3442
3443	unwind = sna->kgem.nbatch;
3444	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
3445	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
3446
3447	blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 |
3448		TB0C_OUTPUT_WRITE_CURRENT;
3449	if (op->dst.format == PICT_a8)
3450		blend |= TB0C_ARG1_REPLICATE_ALPHA | TB0C_ARG1_SEL_TEXEL0;
3451	else if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
3452		blend |= TB0C_ARG1_SEL_TEXEL0;
3453	else
3454		blend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT;	/* 0.0 */
3455	BATCH(blend);
3456
3457	blend = TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 |
3458		TB0A_OUTPUT_WRITE_CURRENT;
3459	if (PICT_FORMAT_A(op->src.pict_format) == 0)
3460		blend |= TB0A_ARG1_SEL_ONE;
3461	else
3462		blend |= TB0A_ARG1_SEL_TEXEL0;
3463	BATCH(blend);
3464
3465	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
3466		   sna->kgem.batch + unwind + 1,
3467		   2 * sizeof(uint32_t)) == 0)
3468		sna->kgem.nbatch = unwind;
3469	else
3470		sna->render_state.gen2.ls2 = unwind;
3471}
3472
3473static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op *op)
3474{
3475	uint32_t ls1, v;
3476
3477	gen2_get_batch(sna, op);
3478
3479	if (kgem_bo_is_dirty(op->src.bo)) {
3480		if (op->src.bo == op->dst.bo)
3481			BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
3482		else
3483			BATCH(_3DSTATE_MODES_5_CMD |
3484			      PIPELINE_FLUSH_RENDER_CACHE |
3485			      PIPELINE_FLUSH_TEXTURE_CACHE);
3486		kgem_clear_dirty(&sna->kgem);
3487	}
3488	gen2_emit_target(sna,
3489			 op->dst.bo,
3490			 op->dst.width,
3491			 op->dst.height,
3492			 op->dst.format);
3493
3494	ls1 = sna->kgem.nbatch;
3495	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
3496	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
3497	BATCH(1<<12);
3498	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
3499	BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
3500	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
3501		   sna->kgem.batch + ls1 + 1,
3502		   3 * sizeof(uint32_t)) == 0)
3503		sna->kgem.nbatch = ls1;
3504	else
3505		sna->render_state.gen2.ls1 = ls1;
3506
3507	gen2_enable_logic_op(sna, op->op);
3508	gen2_emit_copy_pipeline(sna, op);
3509
3510	v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D;
3511	if (sna->render_state.gen2.vft != v) {
3512		BATCH(v);
3513		sna->render_state.gen2.vft = v;
3514	}
3515
3516	gen2_emit_texture(sna, &op->src, 0);
3517}
3518
3519static bool
3520gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
3521		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
3522		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
3523		       const BoxRec *box, int n, unsigned flags)
3524{
3525	struct sna_composite_op tmp;
3526
3527#if NO_COPY_BOXES
3528	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3529		return false;
3530
3531	return sna_blt_copy_boxes(sna, alu,
3532				  src_bo, src_dx, src_dy,
3533				  dst_bo, dst_dx, dst_dy,
3534				  dst->drawable.bitsPerPixel,
3535				  box, n);
3536#endif
3537
3538	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
3539	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
3540
3541	if (sna_blt_compare_depth(src, dst) &&
3542	    sna_blt_copy_boxes(sna, alu,
3543			       src_bo, src_dx, src_dy,
3544			       dst_bo, dst_dx, dst_dy,
3545			       dst->bitsPerPixel,
3546			       box, n))
3547		return true;
3548
3549	if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */
3550	    too_large(src->width, src->height) ||
3551	    src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch < 8) {
3552fallback:
3553		return sna_blt_copy_boxes_fallback(sna, alu,
3554						   src, src_bo, src_dx, src_dy,
3555						   dst, dst_bo, dst_dx, dst_dy,
3556						   box, n);
3557	}
3558
3559	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3560		kgem_submit(&sna->kgem);
3561		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3562			goto fallback;
3563	}
3564
3565	assert(dst_bo->pitch >= 8);
3566
3567	memset(&tmp, 0, sizeof(tmp));
3568	tmp.op = alu;
3569
3570	tmp.dst.pixmap = (PixmapPtr)dst;
3571	tmp.dst.width = dst->width;
3572	tmp.dst.height = dst->height;
3573	tmp.dst.format = sna_format_for_depth(dst->depth);
3574	tmp.dst.bo = dst_bo;
3575	tmp.dst.x = tmp.dst.y = 0;
3576	tmp.damage = NULL;
3577
3578	DBG(("%s: target=%d, format=%08x, size=%dx%d\n",
3579	     __FUNCTION__, dst_bo->handle,
3580	     (unsigned)tmp.dst.format,
3581	     tmp.dst.width,
3582	     tmp.dst.height));
3583
3584	sna_render_composite_redirect_init(&tmp);
3585	if (too_large(tmp.dst.width, tmp.dst.height) ||
3586	    dst_bo->pitch > MAX_3D_PITCH) {
3587		BoxRec extents = box[0];
3588		int i;
3589
3590		for (i = 1; i < n; i++) {
3591			if (box[i].x1 < extents.x1)
3592				extents.x1 = box[i].x1;
3593			if (box[i].y1 < extents.y1)
3594				extents.y1 = box[i].y1;
3595
3596			if (box[i].x2 > extents.x2)
3597				extents.x2 = box[i].x2;
3598			if (box[i].y2 > extents.y2)
3599				extents.y2 = box[i].y2;
3600		}
3601		if (!sna_render_composite_redirect(sna, &tmp,
3602						   extents.x1 + dst_dx,
3603						   extents.y1 + dst_dy,
3604						   extents.x2 - extents.x1,
3605						   extents.y2 - extents.y1,
3606						   alu != GXcopy || n > 1))
3607			goto fallback_tiled;
3608	}
3609
3610	tmp.floats_per_vertex = 4;
3611	tmp.floats_per_rect = 12;
3612
3613	dst_dx += tmp.dst.x;
3614	dst_dy += tmp.dst.y;
3615	tmp.dst.x = tmp.dst.y = 0;
3616
3617	gen2_render_copy_setup_source(&tmp.src, src, src_bo);
3618	gen2_emit_copy_state(sna, &tmp);
3619	do {
3620		int n_this_time;
3621
3622		n_this_time = gen2_get_rectangles(sna, &tmp, n);
3623		if (n_this_time == 0) {
3624			gen2_emit_copy_state(sna, &tmp);
3625			n_this_time = gen2_get_rectangles(sna, &tmp, n);
3626		}
3627		n -= n_this_time;
3628
3629		do {
3630			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
3631			     box->x1 + src_dx, box->y1 + src_dy,
3632			     box->x1 + dst_dx, box->y1 + dst_dy,
3633			     box->x2 - box->x1, box->y2 - box->y1));
3634			VERTEX(box->x2 + dst_dx);
3635			VERTEX(box->y2 + dst_dy);
3636			VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
3637			VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
3638
3639			VERTEX(box->x1 + dst_dx);
3640			VERTEX(box->y2 + dst_dy);
3641			VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
3642			VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
3643
3644			VERTEX(box->x1 + dst_dx);
3645			VERTEX(box->y1 + dst_dy);
3646			VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
3647			VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
3648
3649			box++;
3650		} while (--n_this_time);
3651	} while (n);
3652
3653	gen2_vertex_flush(sna, &tmp);
3654	sna_render_composite_redirect_done(sna, &tmp);
3655	return true;
3656
3657fallback_tiled:
3658	return sna_tiling_copy_boxes(sna, alu,
3659				     src, src_bo, src_dx, src_dy,
3660				     dst, dst_bo, dst_dx, dst_dy,
3661				     box, n);
3662}
3663
3664static void
3665gen2_render_copy_blt(struct sna *sna,
3666		     const struct sna_copy_op *op,
3667		     int16_t sx, int16_t sy,
3668		     int16_t w, int16_t h,
3669		     int16_t dx, int16_t dy)
3670{
3671	if (!gen2_get_rectangles(sna, &op->base, 1)) {
3672		gen2_emit_copy_state(sna, &op->base);
3673		gen2_get_rectangles(sna, &op->base, 1);
3674	}
3675
3676	VERTEX(dx+w);
3677	VERTEX(dy+h);
3678	VERTEX((sx+w)*op->base.src.scale[0]);
3679	VERTEX((sy+h)*op->base.src.scale[1]);
3680
3681	VERTEX(dx);
3682	VERTEX(dy+h);
3683	VERTEX(sx*op->base.src.scale[0]);
3684	VERTEX((sy+h)*op->base.src.scale[1]);
3685
3686	VERTEX(dx);
3687	VERTEX(dy);
3688	VERTEX(sx*op->base.src.scale[0]);
3689	VERTEX(sy*op->base.src.scale[1]);
3690}
3691
3692static void
3693gen2_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
3694{
3695	gen2_vertex_flush(sna, &op->base);
3696}
3697
3698static bool
3699gen2_render_copy(struct sna *sna, uint8_t alu,
3700		 PixmapPtr src, struct kgem_bo *src_bo,
3701		 PixmapPtr dst, struct kgem_bo *dst_bo,
3702		 struct sna_copy_op *tmp)
3703{
3704#if NO_COPY
3705	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3706		return false;
3707
3708	return sna_blt_copy(sna, alu,
3709			    src_bo, dst_bo,
3710			    dst->drawable.bitsPerPixel,
3711			    tmp);
3712#endif
3713
3714	/* Prefer to use the BLT */
3715	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
3716	    sna_blt_copy(sna, alu,
3717			 src_bo, dst_bo,
3718			 dst->drawable.bitsPerPixel,
3719			 tmp))
3720		return true;
3721
3722	/* Must use the BLT if we can't RENDER... */
3723	if (too_large(src->drawable.width, src->drawable.height) ||
3724	    too_large(dst->drawable.width, dst->drawable.height) ||
3725	    src_bo->pitch > MAX_3D_PITCH ||
3726	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) {
3727fallback:
3728		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3729			return false;
3730
3731		return sna_blt_copy(sna, alu, src_bo, dst_bo,
3732				    dst->drawable.bitsPerPixel,
3733				    tmp);
3734	}
3735
3736	tmp->base.op = alu;
3737
3738	tmp->base.dst.pixmap = dst;
3739	tmp->base.dst.width = dst->drawable.width;
3740	tmp->base.dst.height = dst->drawable.height;
3741	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3742	tmp->base.dst.bo = dst_bo;
3743
3744	gen2_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo);
3745	tmp->base.mask.bo = NULL;
3746
3747	tmp->base.floats_per_vertex = 4;
3748	tmp->base.floats_per_rect = 12;
3749
3750	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3751		kgem_submit(&sna->kgem);
3752		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3753			goto fallback;
3754	}
3755
3756	tmp->blt  = gen2_render_copy_blt;
3757	tmp->done = gen2_render_copy_done;
3758
3759	gen2_emit_composite_state(sna, &tmp->base);
3760	return true;
3761}
3762
3763static void
3764gen2_render_reset(struct sna *sna)
3765{
3766	sna->render_state.gen2.need_invariant = true;
3767	sna->render_state.gen2.logic_op_enabled = 0;
3768	sna->render_state.gen2.target = 0;
3769
3770	sna->render_state.gen2.ls1 = 0;
3771	sna->render_state.gen2.ls2 = 0;
3772	sna->render_state.gen2.vft = 0;
3773
3774	sna->render_state.gen2.diffuse = 0x0c0ffee0;
3775	sna->render_state.gen2.specular = 0x0c0ffee0;
3776}
3777
3778static void
3779gen2_render_flush(struct sna *sna)
3780{
3781	assert(sna->render.vertex_index == 0);
3782	assert(sna->render.vertex_offset == 0);
3783}
3784
3785static void
3786gen2_render_context_switch(struct kgem *kgem,
3787			   int new_mode)
3788{
3789	struct sna *sna = container_of(kgem, struct sna, kgem);
3790
3791	if (!kgem->nbatch)
3792		return;
3793
3794	/* Reload BLT registers following a lost context */
3795	sna->blt_state.fill_bo = 0;
3796
3797	if (kgem_ring_is_idle(kgem, kgem->ring)) {
3798		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
3799		_kgem_submit(kgem);
3800	}
3801}
3802
3803const char *gen2_render_init(struct sna *sna, const char *backend)
3804{
3805	struct sna_render *render = &sna->render;
3806
3807	sna->kgem.context_switch = gen2_render_context_switch;
3808
3809	/* Use the BLT (and overlay) for everything except when forced to
3810	 * use the texture combiners.
3811	 */
3812#if !NO_COMPOSITE
3813	render->composite = gen2_render_composite;
3814	render->prefer_gpu |= PREFER_GPU_RENDER;
3815#endif
3816#if !NO_COMPOSITE_SPANS
3817	render->check_composite_spans = gen2_check_composite_spans;
3818	render->composite_spans = gen2_render_composite_spans;
3819	render->prefer_gpu |= PREFER_GPU_SPANS;
3820#endif
3821	render->fill_boxes = gen2_render_fill_boxes;
3822	render->fill = gen2_render_fill;
3823	render->fill_one = gen2_render_fill_one;
3824	render->copy = gen2_render_copy;
3825	render->copy_boxes = gen2_render_copy_boxes;
3826
3827	render->video = gen2_render_video;
3828
3829	render->reset = gen2_render_reset;
3830	render->flush = gen2_render_flush;
3831
3832	render->max_3d_size = MAX_3D_SIZE;
3833	render->max_3d_pitch = MAX_3D_PITCH;
3834	return "Almador (gen2)";
3835}
3836