gen2_render.c revision 03b705cf
1/*
2 * Copyright © 2006,2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Wang Zhenyu <zhenyu.z.wang@intel.com>
25 *    Eric Anholt <eric@anholt.net>
26 *    Chris Wilson <chris@chris-wilson.co.uk>
27 *
28 */
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include "sna.h"
35#include "sna_reg.h"
36#include "sna_render.h"
37#include "sna_render_inline.h"
38
39#include "gen2_render.h"
40
41#define NO_COMPOSITE 0
42#define NO_COMPOSITE_SPANS 0
43#define NO_COPY 0
44#define NO_COPY_BOXES 0
45#define NO_FILL 0
46#define NO_FILL_ONE 0
47#define NO_FILL_BOXES 0
48
49#define MAX_3D_SIZE 2048
50#define MAX_3D_PITCH 8192
51
52#define BATCH(v) batch_emit(sna, v)
53#define BATCH_F(v) batch_emit_float(sna, v)
54#define VERTEX(v) batch_emit_float(sna, v)
55
56static const struct blendinfo {
57	bool dst_alpha;
58	bool src_alpha;
59	uint32_t src_blend;
60	uint32_t dst_blend;
61} gen2_blend_op[] = {
62	/* Clear */
63	{0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO},
64	/* Src */
65	{0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO},
66	/* Dst */
67	{0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE},
68	/* Over */
69	{0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA},
70	/* OverReverse */
71	{1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE},
72	/* In */
73	{1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO},
74	/* InReverse */
75	{0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA},
76	/* Out */
77	{1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO},
78	/* OutReverse */
79	{0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA},
80	/* Atop */
81	{1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
82	/* AtopReverse */
83	{1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA},
84	/* Xor */
85	{1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
86	/* Add */
87	{0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE},
88};
89
90static const struct formatinfo {
91	unsigned int fmt;
92	uint32_t card_fmt;
93} i8xx_tex_formats[] = {
94	{PICT_a8, MAPSURF_8BIT | MT_8BIT_A8},
95	{PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888},
96	{PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888},
97	{PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565},
98	{PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555},
99	{PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444},
100}, i85x_tex_formats[] = {
101	{PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888},
102	{PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
103};
104
105static inline bool
106too_large(int width, int height)
107{
108	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
109}
110
111static inline uint32_t
112gen2_buf_tiling(uint32_t tiling)
113{
114	uint32_t v = 0;
115	switch (tiling) {
116	default: assert(0);
117	case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
118	case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
119	case I915_TILING_NONE: break;
120	}
121	return v;
122}
123
124static uint32_t
125gen2_get_dst_format(uint32_t format)
126{
127#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8)
128	switch (format) {
129	default:
130		assert(0);
131	case PICT_a8r8g8b8:
132	case PICT_x8r8g8b8:
133		return COLR_BUF_ARGB8888 | BIAS;
134	case PICT_r5g6b5:
135		return COLR_BUF_RGB565 | BIAS;
136	case PICT_a1r5g5b5:
137	case PICT_x1r5g5b5:
138		return COLR_BUF_ARGB1555 | BIAS;
139	case PICT_a8:
140		return COLR_BUF_8BIT | BIAS;
141	case PICT_a4r4g4b4:
142	case PICT_x4r4g4b4:
143		return COLR_BUF_ARGB4444 | BIAS;
144	}
145#undef BIAS
146}
147
148static bool
149gen2_check_dst_format(uint32_t format)
150{
151	switch (format) {
152	case PICT_a8r8g8b8:
153	case PICT_x8r8g8b8:
154	case PICT_r5g6b5:
155	case PICT_a1r5g5b5:
156	case PICT_x1r5g5b5:
157	case PICT_a8:
158	case PICT_a4r4g4b4:
159	case PICT_x4r4g4b4:
160		return true;
161	default:
162		return false;
163	}
164}
165
166static uint32_t
167gen2_get_card_format(struct sna *sna, uint32_t format)
168{
169	unsigned int i;
170
171	for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++)
172		if (i8xx_tex_formats[i].fmt == format)
173			return i8xx_tex_formats[i].card_fmt;
174
175	if (sna->kgem.gen < 021) {
176		/* Whilst these are not directly supported on 830/845,
177		 * we only enable them when we can implicitly convert
178		 * them to a supported variant through the texture
179		 * combiners.
180		 */
181		for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
182			if (i85x_tex_formats[i].fmt == format)
183				return i8xx_tex_formats[1+i].card_fmt;
184	} else {
185		for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
186			if (i85x_tex_formats[i].fmt == format)
187				return i85x_tex_formats[i].card_fmt;
188	}
189
190	assert(0);
191	return 0;
192}
193
194static uint32_t
195gen2_check_format(struct sna *sna, PicturePtr p)
196{
197	unsigned int i;
198
199	for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++)
200		if (i8xx_tex_formats[i].fmt == p->format)
201			return true;
202
203	if (sna->kgem.gen > 021) {
204		for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
205			if (i85x_tex_formats[i].fmt == p->format)
206				return true;
207	}
208
209	return false;
210}
211
212static uint32_t
213gen2_sampler_tiling_bits(uint32_t tiling)
214{
215	uint32_t bits = 0;
216	switch (tiling) {
217	default:
218		assert(0);
219	case I915_TILING_Y:
220		bits |= TM0S1_TILE_WALK;
221	case I915_TILING_X:
222		bits |= TM0S1_TILED_SURFACE;
223	case I915_TILING_NONE:
224		break;
225	}
226	return bits;
227}
228
229static bool
230gen2_check_filter(PicturePtr picture)
231{
232	switch (picture->filter) {
233	case PictFilterNearest:
234	case PictFilterBilinear:
235		return true;
236	default:
237		return false;
238	}
239}
240
241static bool
242gen2_check_repeat(PicturePtr picture)
243{
244	if (!picture->repeat)
245		return true;
246
247	switch (picture->repeatType) {
248	case RepeatNone:
249	case RepeatNormal:
250	case RepeatPad:
251	case RepeatReflect:
252		return true;
253	default:
254		return false;
255	}
256}
257
258static void
259gen2_emit_texture(struct sna *sna,
260		  const struct sna_composite_channel *channel,
261		  int unit)
262{
263	uint32_t wrap_mode_u, wrap_mode_v;
264	uint32_t texcoordtype;
265	uint32_t filter;
266
267	if (channel->is_affine)
268		texcoordtype = TEXCOORDTYPE_CARTESIAN;
269	else
270		texcoordtype = TEXCOORDTYPE_HOMOGENEOUS;
271
272	switch (channel->repeat) {
273	default:
274		assert(0);
275	case RepeatNone:
276		wrap_mode_u = TEXCOORDMODE_CLAMP_BORDER;
277		break;
278	case RepeatNormal:
279		wrap_mode_u = TEXCOORDMODE_WRAP;
280		break;
281	case RepeatPad:
282		wrap_mode_u = TEXCOORDMODE_CLAMP;
283		break;
284	case RepeatReflect:
285		wrap_mode_u = TEXCOORDMODE_MIRROR;
286		break;
287	}
288	if (channel->is_linear)
289		wrap_mode_v = TEXCOORDMODE_WRAP;
290	else
291		wrap_mode_v = wrap_mode_u;
292
293	switch (channel->filter) {
294	default:
295		assert(0);
296	case PictFilterNearest:
297		filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT |
298			  FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT |
299			  MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
300		break;
301	case PictFilterBilinear:
302		filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT |
303			  FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT |
304			  MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
305		break;
306	}
307
308	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(unit) | 4);
309	BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
310			     channel->bo,
311			     I915_GEM_DOMAIN_SAMPLER << 16,
312			     0));
313	BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) |
314	      ((channel->width - 1)  << TM0S1_WIDTH_SHIFT) |
315	      gen2_get_card_format(sna, channel->pict_format) |
316	      gen2_sampler_tiling_bits(channel->bo->tiling));
317	BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D);
318	BATCH(filter);
319	BATCH(0);	/* default color */
320
321	BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) |
322	      ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | texcoordtype |
323	      ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode_v) |
324	      ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode_u));
325}
326
327static void
328gen2_get_blend_factors(const struct sna_composite_op *op,
329		       int blend,
330		       uint32_t *c_out,
331		       uint32_t *a_out)
332{
333	uint32_t cblend, ablend;
334
335	/* If component alpha is active in the mask and the blend operation
336	 * uses the source alpha, then we know we don't need the source
337	 * value (otherwise we would have hit a fallback earlier), so we
338	 * provide the source alpha (src.A * mask.X) as output color.
339	 * Conversely, if CA is set and we don't need the source alpha, then
340	 * we produce the source value (src.X * mask.X) and the source alpha
341	 * is unused..  Otherwise, we provide the non-CA source value
342	 * (src.X * mask.A).
343	 *
344	 * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8
345	 * pictures, but we need to implement it for 830/845 and there's no
346	 * harm done in leaving it in.
347	 */
348	cblend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT;
349	ablend = TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT;
350
351
352	/* Get the source picture's channels into TBx_ARG1 */
353	if ((op->has_component_alpha && gen2_blend_op[blend].src_alpha) ||
354	    op->dst.format == PICT_a8) {
355		/* Producing source alpha value, so the first set of channels
356		 * is src.A instead of src.X.  We also do this if the destination
357		 * is a8, in which case src.G is what's written, and the other
358		 * channels are ignored.
359		 */
360		if (op->src.is_solid) {
361			ablend |= TB0A_ARG1_SEL_DIFFUSE;
362			cblend |= TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA;
363		} else {
364			ablend |= TB0A_ARG1_SEL_TEXEL0;
365			cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA;
366		}
367	} else {
368		if (op->src.is_solid)
369			cblend |= TB0C_ARG1_SEL_DIFFUSE;
370		else if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
371			cblend |= TB0C_ARG1_SEL_TEXEL0;
372		else
373			cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT;	/* 0.0 */
374		if (op->src.is_solid)
375			ablend |= TB0A_ARG1_SEL_DIFFUSE;
376		else if (op->src.is_opaque)
377			ablend |= TB0A_ARG1_SEL_ONE;
378		else
379			ablend |= TB0A_ARG1_SEL_TEXEL0;
380	}
381
382	if (op->mask.bo) {
383		if (op->src.is_solid) {
384			cblend |= TB0C_ARG2_SEL_TEXEL0;
385			ablend |= TB0A_ARG2_SEL_TEXEL0;
386		} else {
387			cblend |= TB0C_ARG2_SEL_TEXEL1;
388			ablend |= TB0A_ARG2_SEL_TEXEL1;
389		}
390
391		if (op->dst.format == PICT_a8 || !op->has_component_alpha)
392			cblend |= TB0C_ARG2_REPLICATE_ALPHA;
393
394		cblend |= TB0C_OP_MODULATE;
395		ablend |= TB0A_OP_MODULATE;
396	} else if (op->mask.is_solid) {
397		cblend |= TB0C_ARG2_SEL_DIFFUSE;
398		ablend |= TB0A_ARG2_SEL_DIFFUSE;
399
400		if (op->dst.format == PICT_a8 || !op->has_component_alpha)
401			cblend |= TB0C_ARG2_REPLICATE_ALPHA;
402
403		cblend |= TB0C_OP_MODULATE;
404		ablend |= TB0A_OP_MODULATE;
405	} else {
406		cblend |= TB0C_OP_ARG1;
407		ablend |= TB0A_OP_ARG1;
408	}
409
410	*c_out = cblend;
411	*a_out = ablend;
412}
413
414static uint32_t gen2_get_blend_cntl(int op,
415				    bool has_component_alpha,
416				    uint32_t dst_format)
417{
418	uint32_t sblend, dblend;
419
420	if (op <= PictOpSrc)
421		return S8_ENABLE_COLOR_BUFFER_WRITE;
422
423	sblend = gen2_blend_op[op].src_blend;
424	dblend = gen2_blend_op[op].dst_blend;
425
426	/* If there's no dst alpha channel, adjust the blend op so that
427	 * we'll treat it as always 1.
428	 */
429	if (PICT_FORMAT_A(dst_format) == 0 && gen2_blend_op[op].dst_alpha) {
430		if (sblend == BLENDFACTOR_DST_ALPHA)
431			sblend = BLENDFACTOR_ONE;
432		else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
433			sblend = BLENDFACTOR_ZERO;
434	}
435
436	/* If the source alpha is being used, then we should only be in a case
437	 * where the source blend factor is 0, and the source blend value is
438	 * the mask channels multiplied by the source picture's alpha.
439	 */
440	if (has_component_alpha && gen2_blend_op[op].src_alpha) {
441		if (dblend == BLENDFACTOR_SRC_ALPHA)
442			dblend = BLENDFACTOR_SRC_COLR;
443		else if (dblend == BLENDFACTOR_INV_SRC_ALPHA)
444			dblend = BLENDFACTOR_INV_SRC_COLR;
445	}
446
447	return (sblend << S8_SRC_BLEND_FACTOR_SHIFT |
448		dblend << S8_DST_BLEND_FACTOR_SHIFT |
449		S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
450		S8_ENABLE_COLOR_BUFFER_WRITE);
451}
452
453static void gen2_emit_invariant(struct sna *sna)
454{
455	int i;
456
457	for (i = 0; i < 4; i++) {
458		BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(i));
459		BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | MAP_UNIT(i) |
460		      DISABLE_TEX_STREAM_BUMP |
461		      ENABLE_TEX_STREAM_COORD_SET | TEX_STREAM_COORD_SET(i) |
462		      ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(i));
463		BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
464		BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(i));
465	}
466
467	BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD);
468	BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) |
469	      TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
470	      TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
471	      TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
472
473	BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
474
475	BATCH(_3DSTATE_VERTEX_TRANSFORM);
476	BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
477
478	BATCH(_3DSTATE_W_STATE_CMD);
479	BATCH(MAGIC_W_STATE_DWORD1);
480	BATCH_F(1.0);
481
482	BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD |
483	      DISABLE_INDPT_ALPHA_BLEND |
484	      ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD);
485
486	BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
487	BATCH(0);
488
489	BATCH(_3DSTATE_MODES_1_CMD |
490	      ENABLE_COLR_BLND_FUNC | BLENDFUNC_ADD |
491	      ENABLE_SRC_BLND_FACTOR | SRC_BLND_FACT(BLENDFACTOR_ONE) |
492	      ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO));
493
494	BATCH(_3DSTATE_ENABLES_1_CMD |
495	      DISABLE_LOGIC_OP |
496	      DISABLE_STENCIL_TEST |
497	      DISABLE_DEPTH_BIAS |
498	      DISABLE_SPEC_ADD |
499	      DISABLE_FOG |
500	      DISABLE_ALPHA_TEST |
501	      DISABLE_DEPTH_TEST |
502	      ENABLE_COLOR_BLEND);
503
504	BATCH(_3DSTATE_ENABLES_2_CMD |
505	      DISABLE_STENCIL_WRITE |
506	      DISABLE_DITHER |
507	      DISABLE_DEPTH_WRITE |
508	      ENABLE_COLOR_MASK |
509	      ENABLE_COLOR_WRITE |
510	      ENABLE_TEX_CACHE);
511
512	BATCH(_3DSTATE_STIPPLE);
513	BATCH(0);
514
515	BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
516	      TEXPIPE_COLOR |
517	      ENABLE_TEXOUTPUT_WRT_SEL |
518	      TEXOP_OUTPUT_CURRENT |
519	      DISABLE_TEX_CNTRL_STAGE |
520	      TEXOP_SCALE_1X |
521	      TEXOP_MODIFY_PARMS | TEXOP_LAST_STAGE |
522	      TEXBLENDOP_ARG1);
523	BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
524	      TEXPIPE_ALPHA |
525	      ENABLE_TEXOUTPUT_WRT_SEL |
526	      TEXOP_OUTPUT_CURRENT |
527	      TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS |
528	      TEXBLENDOP_ARG1);
529	BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
530	      TEXPIPE_COLOR |
531	      TEXBLEND_ARG1 |
532	      TEXBLENDARG_MODIFY_PARMS |
533	      TEXBLENDARG_DIFFUSE);
534	BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
535	      TEXPIPE_ALPHA |
536	      TEXBLEND_ARG1 |
537	      TEXBLENDARG_MODIFY_PARMS |
538	      TEXBLENDARG_DIFFUSE);
539
540#define INVARIANT_SIZE 35
541
542	sna->render_state.gen2.need_invariant = false;
543}
544
545static void
546gen2_get_batch(struct sna *sna, const struct sna_composite_op *op)
547{
548	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
549
550	if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40)) {
551		DBG(("%s: flushing batch: size %d > %d\n",
552		     __FUNCTION__, INVARIANT_SIZE+40,
553		     sna->kgem.surface-sna->kgem.nbatch));
554		kgem_submit(&sna->kgem);
555		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
556	}
557
558	if (!kgem_check_reloc(&sna->kgem, 3)) {
559		DBG(("%s: flushing batch: reloc %d >= %d\n",
560		     __FUNCTION__,
561		     sna->kgem.nreloc + 3,
562		     (int)KGEM_RELOC_SIZE(&sna->kgem)));
563		kgem_submit(&sna->kgem);
564		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
565	}
566
567	if (!kgem_check_exec(&sna->kgem, 3)) {
568		DBG(("%s: flushing batch: exec %d >= %d\n",
569		     __FUNCTION__,
570		     sna->kgem.nexec + 1,
571		     (int)KGEM_EXEC_SIZE(&sna->kgem)));
572		kgem_submit(&sna->kgem);
573		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
574	}
575
576	if (sna->render_state.gen2.need_invariant)
577		gen2_emit_invariant(sna);
578}
579
580static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op)
581{
582	assert(!too_large(op->dst.width, op->dst.height));
583	assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH);
584	assert(sna->render.vertex_offset == 0);
585
586	if (sna->render_state.gen2.target == op->dst.bo->unique_id) {
587		kgem_bo_mark_dirty(op->dst.bo);
588		return;
589	}
590
591	BATCH(_3DSTATE_BUF_INFO_CMD);
592	BATCH(BUF_3D_ID_COLOR_BACK |
593	      gen2_buf_tiling(op->dst.bo->tiling) |
594	      BUF_3D_PITCH(op->dst.bo->pitch));
595	BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
596			     op->dst.bo,
597			     I915_GEM_DOMAIN_RENDER << 16 |
598			     I915_GEM_DOMAIN_RENDER,
599			     0));
600
601	BATCH(_3DSTATE_DST_BUF_VARS_CMD);
602	BATCH(gen2_get_dst_format(op->dst.format));
603
604	BATCH(_3DSTATE_DRAW_RECT_CMD);
605	BATCH(0);
606	BATCH(0);	/* ymin, xmin */
607	BATCH(DRAW_YMAX(op->dst.height - 1) |
608	      DRAW_XMAX(op->dst.width - 1));
609	BATCH(0);	/* yorig, xorig */
610
611	sna->render_state.gen2.target = op->dst.bo->unique_id;
612}
613
614static void gen2_disable_logic_op(struct sna *sna)
615{
616	if (!sna->render_state.gen2.logic_op_enabled)
617		return;
618
619	DBG(("%s\n", __FUNCTION__));
620
621	BATCH(_3DSTATE_ENABLES_1_CMD |
622	      DISABLE_LOGIC_OP | ENABLE_COLOR_BLEND);
623
624	sna->render_state.gen2.logic_op_enabled = 0;
625}
626
627static void gen2_enable_logic_op(struct sna *sna, int op)
628{
629	static const uint8_t logic_op[] = {
630		LOGICOP_CLEAR,		/* GXclear */
631		LOGICOP_AND,		/* GXand */
632		LOGICOP_AND_RVRSE, 	/* GXandReverse */
633		LOGICOP_COPY,		/* GXcopy */
634		LOGICOP_AND_INV,	/* GXandInverted */
635		LOGICOP_NOOP,		/* GXnoop */
636		LOGICOP_XOR,		/* GXxor */
637		LOGICOP_OR,		/* GXor */
638		LOGICOP_NOR,		/* GXnor */
639		LOGICOP_EQUIV,		/* GXequiv */
640		LOGICOP_INV,		/* GXinvert */
641		LOGICOP_OR_RVRSE,	/* GXorReverse */
642		LOGICOP_COPY_INV,	/* GXcopyInverted */
643		LOGICOP_OR_INV,		/* GXorInverted */
644		LOGICOP_NAND,		/* GXnand */
645		LOGICOP_SET		/* GXset */
646	};
647
648	if (sna->render_state.gen2.logic_op_enabled != op+1) {
649		if (!sna->render_state.gen2.logic_op_enabled) {
650			if (op == GXclear || op == GXcopy)
651				return;
652
653			DBG(("%s\n", __FUNCTION__));
654
655			BATCH(_3DSTATE_ENABLES_1_CMD |
656			      ENABLE_LOGIC_OP | DISABLE_COLOR_BLEND);
657		}
658
659		BATCH(_3DSTATE_MODES_4_CMD |
660		      ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(logic_op[op]));
661		sna->render_state.gen2.logic_op_enabled = op+1;
662	}
663}
664
665static void gen2_emit_composite_state(struct sna *sna,
666				      const struct sna_composite_op *op)
667{
668	uint32_t texcoordfmt, v, unwind;
669	uint32_t cblend, ablend;
670	int tex;
671
672	gen2_get_batch(sna, op);
673
674	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
675		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
676			BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
677		else
678			BATCH(_3DSTATE_MODES_5_CMD |
679			      PIPELINE_FLUSH_RENDER_CACHE |
680			      PIPELINE_FLUSH_TEXTURE_CACHE);
681		kgem_clear_dirty(&sna->kgem);
682	}
683
684	gen2_emit_target(sna, op);
685
686	unwind = sna->kgem.nbatch;
687	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
688	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
689	BATCH((!op->src.is_solid + (op->mask.bo != NULL)) << 12);
690	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
691	BATCH(gen2_get_blend_cntl(op->op,
692				  op->has_component_alpha,
693				  op->dst.format));
694	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
695		   sna->kgem.batch + unwind + 1,
696		   3 * sizeof(uint32_t)) == 0)
697		sna->kgem.nbatch = unwind;
698	else
699		sna->render_state.gen2.ls1 = unwind;
700
701	gen2_disable_logic_op(sna);
702
703	gen2_get_blend_factors(op, op->op, &cblend, &ablend);
704	unwind = sna->kgem.nbatch;
705	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
706	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
707	BATCH(cblend);
708	BATCH(ablend);
709	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
710		   sna->kgem.batch + unwind + 1,
711		   2 * sizeof(uint32_t)) == 0)
712		sna->kgem.nbatch = unwind;
713	else
714		sna->render_state.gen2.ls2 = unwind;
715
716	tex = texcoordfmt = 0;
717	if (!op->src.is_solid) {
718		if (op->src.is_affine)
719			texcoordfmt |= TEXCOORDFMT_2D << (2*tex);
720		else
721			texcoordfmt |= TEXCOORDFMT_3D << (2*tex);
722		gen2_emit_texture(sna, &op->src, tex++);
723	} else {
724		if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) {
725			BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
726			BATCH(op->src.u.gen2.pixel);
727			sna->render_state.gen2.diffuse = op->src.u.gen2.pixel;
728		}
729	}
730	if (op->mask.bo) {
731		if (op->mask.is_affine)
732			texcoordfmt |= TEXCOORDFMT_2D << (2*tex);
733		else
734			texcoordfmt |= TEXCOORDFMT_3D << (2*tex);
735		gen2_emit_texture(sna, &op->mask, tex++);
736	} else if (op->mask.is_solid) {
737		if (op->mask.u.gen2.pixel != sna->render_state.gen2.diffuse) {
738			BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
739			BATCH(op->mask.u.gen2.pixel);
740			sna->render_state.gen2.diffuse = op->mask.u.gen2.pixel;
741		}
742	}
743
744	v = _3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt;
745	if (sna->render_state.gen2.vft != v) {
746		BATCH(v);
747		sna->render_state.gen2.vft = v;
748	}
749}
750
751static inline void
752gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY)
753{
754	VERTEX(dstX);
755	VERTEX(dstY);
756}
757
758inline static void
759gen2_emit_composite_linear(struct sna *sna,
760			   const struct sna_composite_channel *channel,
761			   int16_t x, int16_t y)
762{
763	float v;
764
765	v = (x * channel->u.linear.dx +
766	     y * channel->u.linear.dy +
767	     channel->u.linear.offset);
768	DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v));
769	VERTEX(v);
770	VERTEX(v);
771}
772
773static void
774gen2_emit_composite_texcoord(struct sna *sna,
775			     const struct sna_composite_channel *channel,
776			     int16_t x, int16_t y)
777{
778	float s = 0, t = 0, w = 1;
779
780	x += channel->offset[0];
781	y += channel->offset[1];
782
783	if (channel->is_affine) {
784		sna_get_transformed_coordinates(x, y,
785						channel->transform,
786						&s, &t);
787		VERTEX(s * channel->scale[0]);
788		VERTEX(t * channel->scale[1]);
789	} else {
790		sna_get_transformed_coordinates_3d(x, y,
791						   channel->transform,
792						   &s, &t, &w);
793		VERTEX(s * channel->scale[0]);
794		VERTEX(t * channel->scale[1]);
795		VERTEX(w);
796	}
797}
798
799static void
800gen2_emit_composite_vertex(struct sna *sna,
801			   const struct sna_composite_op *op,
802			   int16_t srcX, int16_t srcY,
803			   int16_t mskX, int16_t mskY,
804			   int16_t dstX, int16_t dstY)
805{
806	gen2_emit_composite_dstcoord(sna, dstX, dstY);
807	if (op->src.is_linear)
808		gen2_emit_composite_linear(sna, &op->src, srcX, srcY);
809	else if (!op->src.is_solid)
810		gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY);
811
812	if (op->mask.is_linear)
813		gen2_emit_composite_linear(sna, &op->mask, mskX, mskY);
814	else if (op->mask.bo)
815		gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY);
816}
817
818fastcall static void
819gen2_emit_composite_primitive(struct sna *sna,
820			      const struct sna_composite_op *op,
821			      const struct sna_composite_rectangles *r)
822{
823	gen2_emit_composite_vertex(sna, op,
824				   r->src.x + r->width,
825				   r->src.y + r->height,
826				   r->mask.x + r->width,
827				   r->mask.y + r->height,
828				   op->dst.x + r->dst.x + r->width,
829				   op->dst.y + r->dst.y + r->height);
830	gen2_emit_composite_vertex(sna, op,
831				   r->src.x,
832				   r->src.y + r->height,
833				   r->mask.x,
834				   r->mask.y + r->height,
835				   op->dst.x + r->dst.x,
836				   op->dst.y + r->dst.y + r->height);
837	gen2_emit_composite_vertex(sna, op,
838				   r->src.x,
839				   r->src.y,
840				   r->mask.x,
841				   r->mask.y,
842				   op->dst.x + r->dst.x,
843				   op->dst.y + r->dst.y);
844}
845
846fastcall static void
847gen2_emit_composite_primitive_constant(struct sna *sna,
848				       const struct sna_composite_op *op,
849				       const struct sna_composite_rectangles *r)
850{
851	int16_t dst_x = r->dst.x + op->dst.x;
852	int16_t dst_y = r->dst.y + op->dst.y;
853
854	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
855	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
856	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
857}
858
859fastcall static void
860gen2_emit_composite_primitive_linear(struct sna *sna,
861				       const struct sna_composite_op *op,
862				       const struct sna_composite_rectangles *r)
863{
864	int16_t dst_x = r->dst.x + op->dst.x;
865	int16_t dst_y = r->dst.y + op->dst.y;
866
867	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
868	gen2_emit_composite_linear(sna, &op->src,
869				   r->src.x + r->width, r->src.y + r->height);
870
871	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
872	gen2_emit_composite_linear(sna, &op->src,
873				   r->src.x, r->src.y + r->height);
874
875	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
876	gen2_emit_composite_linear(sna, &op->src,
877				   r->src.x, r->src.y);
878}
879
880fastcall static void
881gen2_emit_composite_primitive_identity(struct sna *sna,
882				       const struct sna_composite_op *op,
883				       const struct sna_composite_rectangles *r)
884{
885	float w = r->width;
886	float h = r->height;
887	float *v;
888
889	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
890	sna->kgem.nbatch += 12;
891
892	v[8] = v[4] = r->dst.x + op->dst.x;
893	v[0] = v[4] + w;
894
895	v[9] = r->dst.y + op->dst.y;
896	v[5] = v[1] = v[9] + h;
897
898	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
899	v[2] = v[6] + w * op->src.scale[0];
900
901	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
902	v[7] = v[3] = v[11] + h * op->src.scale[1];
903}
904
905fastcall static void
906gen2_emit_composite_primitive_affine(struct sna *sna,
907				     const struct sna_composite_op *op,
908				     const struct sna_composite_rectangles *r)
909{
910	PictTransform *transform = op->src.transform;
911	int src_x = r->src.x + (int)op->src.offset[0];
912	int src_y = r->src.y + (int)op->src.offset[1];
913	float *v;
914
915	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
916	sna->kgem.nbatch += 12;
917
918	v[8] = v[4] = r->dst.x + op->dst.x;
919	v[0] = v[4] + r->width;
920
921	v[9] = r->dst.y + op->dst.y;
922	v[5] = v[1] = v[9] + r->height;
923
924	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
925				    transform, op->src.scale,
926				    &v[2], &v[3]);
927
928	_sna_get_transformed_scaled(src_x, src_y + r->height,
929				    transform, op->src.scale,
930				    &v[6], &v[7]);
931
932	_sna_get_transformed_scaled(src_x, src_y,
933				    transform, op->src.scale,
934				    &v[10], &v[11]);
935}
936
937fastcall static void
938gen2_emit_composite_primitive_constant_identity_mask(struct sna *sna,
939						     const struct sna_composite_op *op,
940						     const struct sna_composite_rectangles *r)
941{
942	float w = r->width;
943	float h = r->height;
944	float *v;
945
946	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
947	sna->kgem.nbatch += 12;
948
949	v[8] = v[4] = r->dst.x + op->dst.x;
950	v[0] = v[4] + w;
951
952	v[9] = r->dst.y + op->dst.y;
953	v[5] = v[1] = v[9] + h;
954
955	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
956	v[2] = v[6] + w * op->mask.scale[0];
957
958	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
959	v[7] = v[3] = v[11] + h * op->mask.scale[1];
960}
961
962#if defined(sse2) && !defined(__x86_64__)
963sse2 fastcall static void
964gen2_emit_composite_primitive_constant__sse2(struct sna *sna,
965					     const struct sna_composite_op *op,
966					     const struct sna_composite_rectangles *r)
967{
968	int16_t dst_x = r->dst.x + op->dst.x;
969	int16_t dst_y = r->dst.y + op->dst.y;
970
971	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
972	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
973	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
974}
975
976sse2 fastcall static void
977gen2_emit_composite_primitive_linear__sse2(struct sna *sna,
978					   const struct sna_composite_op *op,
979					   const struct sna_composite_rectangles *r)
980{
981	int16_t dst_x = r->dst.x + op->dst.x;
982	int16_t dst_y = r->dst.y + op->dst.y;
983
984	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
985	gen2_emit_composite_linear(sna, &op->src,
986				   r->src.x + r->width, r->src.y + r->height);
987
988	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
989	gen2_emit_composite_linear(sna, &op->src,
990				   r->src.x, r->src.y + r->height);
991
992	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
993	gen2_emit_composite_linear(sna, &op->src,
994				   r->src.x, r->src.y);
995}
996
997sse2 fastcall static void
998gen2_emit_composite_primitive_identity__sse2(struct sna *sna,
999					     const struct sna_composite_op *op,
1000					     const struct sna_composite_rectangles *r)
1001{
1002	float w = r->width;
1003	float h = r->height;
1004	float *v;
1005
1006	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
1007	sna->kgem.nbatch += 12;
1008
1009	v[8] = v[4] = r->dst.x + op->dst.x;
1010	v[0] = v[4] + w;
1011
1012	v[9] = r->dst.y + op->dst.y;
1013	v[5] = v[1] = v[9] + h;
1014
1015	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
1016	v[2] = v[6] + w * op->src.scale[0];
1017
1018	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
1019	v[7] = v[3] = v[11] + h * op->src.scale[1];
1020}
1021
1022sse2 fastcall static void
1023gen2_emit_composite_primitive_affine__sse2(struct sna *sna,
1024					   const struct sna_composite_op *op,
1025					   const struct sna_composite_rectangles *r)
1026{
1027	PictTransform *transform = op->src.transform;
1028	int src_x = r->src.x + (int)op->src.offset[0];
1029	int src_y = r->src.y + (int)op->src.offset[1];
1030	float *v;
1031
1032	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
1033	sna->kgem.nbatch += 12;
1034
1035	v[8] = v[4] = r->dst.x + op->dst.x;
1036	v[0] = v[4] + r->width;
1037
1038	v[9] = r->dst.y + op->dst.y;
1039	v[5] = v[1] = v[9] + r->height;
1040
1041	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
1042				    transform, op->src.scale,
1043				    &v[2], &v[3]);
1044
1045	_sna_get_transformed_scaled(src_x, src_y + r->height,
1046				    transform, op->src.scale,
1047				    &v[6], &v[7]);
1048
1049	_sna_get_transformed_scaled(src_x, src_y,
1050				    transform, op->src.scale,
1051				    &v[10], &v[11]);
1052}
1053
1054sse2 fastcall static void
1055gen2_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna,
1056							   const struct sna_composite_op *op,
1057							   const struct sna_composite_rectangles *r)
1058{
1059	float w = r->width;
1060	float h = r->height;
1061	float *v;
1062
1063	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
1064	sna->kgem.nbatch += 12;
1065
1066	v[8] = v[4] = r->dst.x + op->dst.x;
1067	v[0] = v[4] + w;
1068
1069	v[9] = r->dst.y + op->dst.y;
1070	v[5] = v[1] = v[9] + h;
1071
1072	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
1073	v[2] = v[6] + w * op->mask.scale[0];
1074
1075	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
1076	v[7] = v[3] = v[11] + h * op->mask.scale[1];
1077}
1078#endif
1079
1080static void gen2_magic_ca_pass(struct sna *sna,
1081			       const struct sna_composite_op *op)
1082{
1083	uint32_t ablend, cblend, *src, *dst;
1084	int n;
1085
1086	if (!op->need_magic_ca_pass)
1087		return;
1088
1089	DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__,
1090	     sna->kgem.nbatch, sna->render.vertex_offset));
1091
1092	assert(op->mask.bo);
1093	assert(op->has_component_alpha);
1094
1095	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 0);
1096	BATCH(BLENDFACTOR_ONE << S8_SRC_BLEND_FACTOR_SHIFT |
1097	      BLENDFACTOR_ONE << S8_DST_BLEND_FACTOR_SHIFT |
1098	      S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
1099	      S8_ENABLE_COLOR_BUFFER_WRITE);
1100	sna->render_state.gen2.ls1 = 0;
1101
1102	gen2_get_blend_factors(op, PictOpAdd, &cblend, &ablend);
1103	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
1104	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
1105	BATCH(cblend);
1106	BATCH(ablend);
1107	sna->render_state.gen2.ls2 = 0;
1108
1109	src = sna->kgem.batch + sna->render.vertex_offset;
1110	dst = sna->kgem.batch + sna->kgem.nbatch;
1111	n = 1 + sna->render.vertex_index;
1112	sna->kgem.nbatch += n;
1113	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
1114	while (n--)
1115		*dst++ = *src++;
1116}
1117
1118static void gen2_vertex_flush(struct sna *sna,
1119			      const struct sna_composite_op *op)
1120{
1121	if (sna->render.vertex_index == 0)
1122		return;
1123
1124	sna->kgem.batch[sna->render.vertex_offset] |=
1125		sna->render.vertex_index - 1;
1126
1127	gen2_magic_ca_pass(sna, op);
1128
1129	sna->render.vertex_offset = 0;
1130	sna->render.vertex_index = 0;
1131}
1132
1133inline static int gen2_get_rectangles(struct sna *sna,
1134				      const struct sna_composite_op *op,
1135				      int want)
1136{
1137	int rem = batch_space(sna), size, need;
1138
1139	DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n",
1140	     __FUNCTION__, want, op->floats_per_vertex, rem));
1141
1142	assert(op->floats_per_vertex);
1143	assert(op->floats_per_rect == 3 * op->floats_per_vertex);
1144
1145	need = 1;
1146	size = op->floats_per_rect;
1147	if (op->need_magic_ca_pass)
1148		need += 6 + size*sna->render.vertex_index, size *= 2;
1149
1150	DBG(("%s: want=%d, need=%d,size=%d, rem=%d\n",
1151	     __FUNCTION__, want, need, size, rem));
1152	if (rem < need + size) {
1153		gen2_vertex_flush(sna, op);
1154		kgem_submit(&sna->kgem);
1155		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1156		return 0;
1157	}
1158
1159	rem -= need;
1160	if (sna->render.vertex_offset == 0) {
1161		if ((sna->kgem.batch[sna->kgem.nbatch-1] & ~0xffff) ==
1162		    (PRIM3D_INLINE | PRIM3D_RECTLIST)) {
1163			uint32_t *b = &sna->kgem.batch[sna->kgem.nbatch-1];
1164			assert(*b & 0xffff);
1165			sna->render.vertex_index = 1 + (*b & 0xffff);
1166			*b = PRIM3D_INLINE | PRIM3D_RECTLIST;
1167			sna->render.vertex_offset = sna->kgem.nbatch - 1;
1168			assert(!op->need_magic_ca_pass);
1169		} else {
1170			sna->render.vertex_offset = sna->kgem.nbatch;
1171			BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
1172		}
1173	}
1174
1175	if (want > 1 && want * size > rem)
1176		want = rem / size;
1177
1178	assert(want);
1179	sna->render.vertex_index += want*op->floats_per_rect;
1180	return want;
1181}
1182
1183fastcall static void
1184gen2_render_composite_blt(struct sna *sna,
1185			  const struct sna_composite_op *op,
1186			  const struct sna_composite_rectangles *r)
1187{
1188	if (!gen2_get_rectangles(sna, op, 1)) {
1189		gen2_emit_composite_state(sna, op);
1190		gen2_get_rectangles(sna, op, 1);
1191	}
1192
1193	op->prim_emit(sna, op, r);
1194}
1195
1196fastcall static void
1197gen2_render_composite_box(struct sna *sna,
1198			  const struct sna_composite_op *op,
1199			  const BoxRec *box)
1200{
1201	struct sna_composite_rectangles r;
1202
1203	if (!gen2_get_rectangles(sna, op, 1)) {
1204		gen2_emit_composite_state(sna, op);
1205		gen2_get_rectangles(sna, op, 1);
1206	}
1207
1208	DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1209	     box->x1, box->y1,
1210	     box->x2 - box->x1,
1211	     box->y2 - box->y1));
1212
1213	r.dst.x  = box->x1; r.dst.y  = box->y1;
1214	r.width = box->x2 - box->x1;
1215	r.height = box->y2 - box->y1;
1216	r.src = r.mask = r.dst;
1217
1218	op->prim_emit(sna, op, &r);
1219}
1220
1221static void
1222gen2_render_composite_boxes(struct sna *sna,
1223			    const struct sna_composite_op *op,
1224			    const BoxRec *box, int nbox)
1225{
1226	do {
1227		int nbox_this_time;
1228
1229		nbox_this_time = gen2_get_rectangles(sna, op, nbox);
1230		if (nbox_this_time == 0) {
1231			gen2_emit_composite_state(sna, op);
1232			nbox_this_time = gen2_get_rectangles(sna, op, nbox);
1233		}
1234		nbox -= nbox_this_time;
1235
1236		do {
1237			struct sna_composite_rectangles r;
1238
1239			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1240			     box->x1, box->y1,
1241			     box->x2 - box->x1,
1242			     box->y2 - box->y1));
1243
1244			r.dst.x  = box->x1; r.dst.y  = box->y1;
1245			r.width = box->x2 - box->x1;
1246			r.height = box->y2 - box->y1;
1247			r.src = r.mask = r.dst;
1248
1249			op->prim_emit(sna, op, &r);
1250			box++;
1251		} while (--nbox_this_time);
1252	} while (nbox);
1253}
1254
1255static void gen2_render_composite_done(struct sna *sna,
1256				       const struct sna_composite_op *op)
1257{
1258	gen2_vertex_flush(sna, op);
1259
1260	if (op->mask.bo)
1261		kgem_bo_destroy(&sna->kgem, op->mask.bo);
1262	if (op->src.bo)
1263		kgem_bo_destroy(&sna->kgem, op->src.bo);
1264	sna_render_composite_redirect_done(sna, op);
1265}
1266
1267static bool
1268gen2_composite_solid_init(struct sna *sna,
1269			  struct sna_composite_channel *channel,
1270			  uint32_t color)
1271{
1272	channel->filter = PictFilterNearest;
1273	channel->repeat = RepeatNormal;
1274	channel->is_solid  = true;
1275	channel->is_affine = true;
1276	channel->width  = 1;
1277	channel->height = 1;
1278	channel->pict_format = PICT_a8r8g8b8;
1279
1280	channel->bo = NULL;
1281	channel->u.gen2.pixel = color;
1282
1283	channel->scale[0]  = channel->scale[1]  = 1;
1284	channel->offset[0] = channel->offset[1] = 0;
1285	return true;
1286}
1287
1288#define xFixedToDouble(f) pixman_fixed_to_double(f)
1289
1290static bool
1291gen2_composite_linear_init(struct sna *sna,
1292			   PicturePtr picture,
1293			   struct sna_composite_channel *channel,
1294			   int x, int y,
1295			   int w, int h,
1296			   int dst_x, int dst_y)
1297{
1298	PictLinearGradient *linear =
1299		(PictLinearGradient *)picture->pSourcePict;
1300	pixman_fixed_t tx, ty;
1301	float x0, y0, sf;
1302	float dx, dy;
1303
1304	DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
1305	     __FUNCTION__,
1306	     xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
1307	     xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
1308
1309	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
1310		return 0;
1311
1312	if (!sna_transform_is_affine(picture->transform)) {
1313		DBG(("%s: fallback due to projective transform\n",
1314		     __FUNCTION__));
1315		return sna_render_picture_fixup(sna, picture, channel,
1316						x, y, w, h, dst_x, dst_y);
1317	}
1318
1319	channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
1320	if (!channel->bo)
1321		return 0;
1322
1323	channel->filter = PictFilterNearest;
1324	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1325	channel->is_linear = true;
1326	channel->width  = channel->bo->pitch / 4;
1327	channel->height = 1;
1328	channel->pict_format = PICT_a8r8g8b8;
1329
1330	channel->scale[0]  = channel->scale[1]  = 1;
1331	channel->offset[0] = channel->offset[1] = 0;
1332
1333	if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
1334		dx = xFixedToDouble(linear->p2.x - linear->p1.x);
1335		dy = xFixedToDouble(linear->p2.y - linear->p1.y);
1336
1337		x0 = xFixedToDouble(linear->p1.x);
1338		y0 = xFixedToDouble(linear->p1.y);
1339
1340		if (tx | ty) {
1341			x0 -= pixman_fixed_to_double(tx);
1342			y0 -= pixman_fixed_to_double(ty);
1343		}
1344	} else {
1345		struct pixman_f_vector p1, p2;
1346		struct pixman_f_transform m, inv;
1347
1348		pixman_f_transform_from_pixman_transform(&m, picture->transform);
1349		DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
1350		     __FUNCTION__,
1351		     m.m[0][0], m.m[0][1], m.m[0][2],
1352		     m.m[1][0], m.m[1][1], m.m[1][2],
1353		     m.m[2][0], m.m[2][1], m.m[2][2]));
1354		if (!pixman_f_transform_invert(&inv, &m))
1355			return 0;
1356
1357		p1.v[0] = pixman_fixed_to_double(linear->p1.x);
1358		p1.v[1] = pixman_fixed_to_double(linear->p1.y);
1359		p1.v[2] = 1.;
1360		pixman_f_transform_point(&inv, &p1);
1361
1362		p2.v[0] = pixman_fixed_to_double(linear->p2.x);
1363		p2.v[1] = pixman_fixed_to_double(linear->p2.y);
1364		p2.v[2] = 1.;
1365		pixman_f_transform_point(&inv, &p2);
1366
1367		DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
1368		     __FUNCTION__,
1369		     p1.v[0], p1.v[1], p1.v[2],
1370		     p2.v[0], p2.v[1], p2.v[2]));
1371
1372		dx = p2.v[0] - p1.v[0];
1373		dy = p2.v[1] - p1.v[1];
1374
1375		x0 = p1.v[0];
1376		y0 = p1.v[1];
1377	}
1378
1379	sf = dx*dx + dy*dy;
1380	dx /= sf;
1381	dy /= sf;
1382
1383	channel->u.linear.dx = dx;
1384	channel->u.linear.dy = dy;
1385	channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y);
1386
1387	DBG(("%s: dx=%f, dy=%f, offset=%f\n",
1388	     __FUNCTION__, dx, dy, channel->u.linear.offset));
1389
1390	return channel->bo != NULL;
1391}
1392
1393static bool source_is_covered(PicturePtr picture,
1394			      int x, int y,
1395			      int width, int height)
1396{
1397	int x1, y1, x2, y2;
1398
1399	if (picture->repeat && picture->repeatType != RepeatNone)
1400		return true;
1401
1402	if (picture->pDrawable == NULL)
1403		return false;
1404
1405	if (picture->transform) {
1406		pixman_box16_t sample;
1407
1408		sample.x1 = x;
1409		sample.y1 = y;
1410		sample.x2 = x + width;
1411		sample.y2 = y + height;
1412
1413		pixman_transform_bounds(picture->transform, &sample);
1414
1415		x1 = sample.x1;
1416		x2 = sample.x2;
1417		y1 = sample.y1;
1418		y2 = sample.y2;
1419	} else {
1420		x1 = x;
1421		y1 = y;
1422		x2 = x + width;
1423		y2 = y + height;
1424	}
1425
1426	return
1427		x1 >= 0 && y1 >= 0 &&
1428		x2 <= picture->pDrawable->width &&
1429		y2 <= picture->pDrawable->height;
1430}
1431
1432static bool
1433gen2_check_card_format(struct sna *sna,
1434		       PicturePtr picture,
1435		       struct sna_composite_channel *channel,
1436		       int x, int y, int w, int h,
1437		       bool *fixup_alpha)
1438{
1439	uint32_t format = picture->format;
1440	unsigned int i;
1441
1442	for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) {
1443		if (i8xx_tex_formats[i].fmt == format)
1444			return true;
1445	}
1446
1447	for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) {
1448		if (i85x_tex_formats[i].fmt == format) {
1449			if (sna->kgem.gen >= 021)
1450				return true;
1451
1452			if (source_is_covered(picture, x, y, w,h)) {
1453				channel->is_opaque = true;
1454				return true;
1455			}
1456
1457			*fixup_alpha = true;
1458			return false;
1459		}
1460	}
1461
1462	*fixup_alpha = false;
1463	return false;
1464}
1465
1466static int
1467gen2_composite_picture(struct sna *sna,
1468		       PicturePtr picture,
1469		       struct sna_composite_channel *channel,
1470		       int x, int y,
1471		       int w, int h,
1472		       int dst_x, int dst_y,
1473		       bool precise)
1474{
1475	PixmapPtr pixmap;
1476	uint32_t color;
1477	int16_t dx, dy;
1478	bool fixup_alpha;
1479
1480	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1481	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1482
1483	channel->is_solid = false;
1484	channel->is_linear = false;
1485	channel->is_opaque = false;
1486	channel->is_affine = true;
1487	channel->transform = NULL;
1488
1489	if (sna_picture_is_solid(picture, &color))
1490		return gen2_composite_solid_init(sna, channel, color);
1491
1492	if (!gen2_check_repeat(picture)) {
1493		DBG(("%s -- fallback, unhandled repeat %d\n",
1494		     __FUNCTION__, picture->repeat));
1495		return sna_render_picture_fixup(sna, picture, channel,
1496						x, y, w, h, dst_x, dst_y);
1497	}
1498
1499	if (!gen2_check_filter(picture)) {
1500		DBG(("%s -- fallback, unhandled filter %d\n",
1501		     __FUNCTION__, picture->filter));
1502		return sna_render_picture_fixup(sna, picture, channel,
1503						x, y, w, h, dst_x, dst_y);
1504	}
1505
1506	if (picture->pDrawable == NULL) {
1507		int ret;
1508
1509		if (picture->pSourcePict->type == SourcePictTypeLinear)
1510			return gen2_composite_linear_init(sna, picture, channel,
1511							  x, y,
1512							  w, h,
1513							  dst_x, dst_y);
1514
1515		DBG(("%s -- fallback, unhandled source %d\n",
1516		     __FUNCTION__, picture->pSourcePict->type));
1517		ret = -1;
1518		if (!precise)
1519			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1520								      x, y, w, h, dst_x, dst_y);
1521		if (ret == -1)
1522			ret = sna_render_picture_fixup(sna, picture, channel,
1523						       x, y, w, h, dst_x, dst_y);
1524		return ret;
1525	}
1526
1527	if (picture->alphaMap) {
1528		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1529		return sna_render_picture_fixup(sna, picture, channel,
1530						x, y, w, h, dst_x, dst_y);
1531	}
1532
1533	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1534	channel->filter = picture->filter;
1535
1536	pixmap = get_drawable_pixmap(picture->pDrawable);
1537	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1538
1539	x += dx + picture->pDrawable->x;
1540	y += dy + picture->pDrawable->y;
1541
1542	channel->is_affine = sna_transform_is_affine(picture->transform);
1543	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
1544		DBG(("%s: integer translation (%d, %d), removing\n",
1545		     __FUNCTION__, dx, dy));
1546		x += dx;
1547		y += dy;
1548		channel->transform = NULL;
1549		channel->filter = PictFilterNearest;
1550	} else
1551		channel->transform = picture->transform;
1552
1553	if (!gen2_check_card_format(sna, picture, channel, x,  y, w ,h, &fixup_alpha))
1554		return sna_render_picture_convert(sna, picture, channel, pixmap,
1555						  x, y, w, h, dst_x, dst_y, fixup_alpha);
1556
1557	channel->pict_format = picture->format;
1558	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
1559		return sna_render_picture_extract(sna, picture, channel,
1560						  x, y, w, h, dst_x, dst_y);
1561
1562	return sna_render_pixmap_bo(sna, channel, pixmap,
1563				    x, y, w, h, dst_x, dst_y);
1564}
1565
1566static bool
1567gen2_composite_set_target(struct sna *sna,
1568			  struct sna_composite_op *op,
1569			  PicturePtr dst,
1570			  int x, int y, int w, int h)
1571{
1572	BoxRec box;
1573
1574	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1575	op->dst.format = dst->format;
1576	op->dst.width = op->dst.pixmap->drawable.width;
1577	op->dst.height = op->dst.pixmap->drawable.height;
1578
1579	if (w && h) {
1580		box.x1 = x;
1581		box.y1 = y;
1582		box.x2 = x + w;
1583		box.y2 = y + h;
1584	} else
1585		sna_render_picture_extents(dst, &box);
1586
1587	op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
1588					  PREFER_GPU | FORCE_GPU | RENDER_GPU,
1589					  &box, &op->damage);
1590	if (op->dst.bo == NULL)
1591		return false;
1592
1593	if (op->dst.bo->pitch < 8) {
1594		struct sna_pixmap *priv;
1595		struct kgem_bo *bo;
1596
1597		priv = sna_pixmap_move_to_gpu (op->dst.pixmap,
1598					       MOVE_READ | MOVE_WRITE);
1599		if (priv == NULL || priv->pinned)
1600			return false;
1601
1602		assert(op->dst.bo == priv->gpu_bo);
1603		bo = kgem_replace_bo(&sna->kgem, priv->gpu_bo,
1604				     op->dst.width, op->dst.height, 8,
1605				     op->dst.pixmap->drawable.bitsPerPixel);
1606		if (bo == NULL)
1607			return false;
1608
1609		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
1610		priv->gpu_bo = bo;
1611
1612		op->dst.bo = priv->gpu_bo;
1613		op->damage = &priv->gpu_damage;
1614		if (sna_damage_is_all(op->damage,
1615				      op->dst.width, op->dst.height))
1616			op->damage = NULL;
1617	}
1618
1619	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1620			    &op->dst.x, &op->dst.y);
1621
1622	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1623	     __FUNCTION__,
1624	     op->dst.pixmap, (int)op->dst.format,
1625	     op->dst.width, op->dst.height,
1626	     op->dst.bo->pitch,
1627	     op->dst.x, op->dst.y,
1628	     op->damage ? *op->damage : (void *)-1));
1629
1630	assert(op->dst.bo->proxy == NULL);
1631	return true;
1632}
1633
1634static bool
1635is_unhandled_gradient(PicturePtr picture, bool precise)
1636{
1637	if (picture->pDrawable)
1638		return false;
1639
1640	switch (picture->pSourcePict->type) {
1641	case SourcePictTypeSolidFill:
1642	case SourcePictTypeLinear:
1643		return false;
1644	default:
1645		return precise;
1646	}
1647}
1648
1649static bool
1650has_alphamap(PicturePtr p)
1651{
1652	return p->alphaMap != NULL;
1653}
1654
1655static bool
1656need_upload(PicturePtr p)
1657{
1658	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
1659}
1660
1661static bool
1662source_is_busy(PixmapPtr pixmap)
1663{
1664	struct sna_pixmap *priv = sna_pixmap(pixmap);
1665	if (priv == NULL)
1666		return false;
1667
1668	if (priv->clear)
1669		return false;
1670
1671	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1672		return true;
1673
1674	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1675		return true;
1676
1677	return priv->gpu_damage && !priv->cpu_damage;
1678}
1679
1680static bool
1681source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
1682{
1683	if (sna_picture_is_solid(p, NULL))
1684		return false;
1685
1686	if (is_unhandled_gradient(p, precise) || !gen2_check_repeat(p))
1687		return true;
1688
1689	if (pixmap && source_is_busy(pixmap))
1690		return false;
1691
1692	return has_alphamap(p) || !gen2_check_filter(p) || need_upload(p);
1693}
1694
1695static bool
1696gen2_composite_fallback(struct sna *sna,
1697			PicturePtr src,
1698			PicturePtr mask,
1699			PicturePtr dst)
1700{
1701	PixmapPtr src_pixmap;
1702	PixmapPtr mask_pixmap;
1703	PixmapPtr dst_pixmap;
1704	bool src_fallback, mask_fallback;
1705
1706	if (!gen2_check_dst_format(dst->format)) {
1707		DBG(("%s: unknown destination format: %d\n",
1708		     __FUNCTION__, dst->format));
1709		return true;
1710	}
1711
1712	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
1713
1714	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1715	src_fallback = source_fallback(src, src_pixmap,
1716				       dst->polyMode == PolyModePrecise);
1717
1718	if (mask) {
1719		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1720		mask_fallback = source_fallback(mask, mask_pixmap,
1721						dst->polyMode == PolyModePrecise);
1722	} else {
1723		mask_pixmap = NULL;
1724		mask_fallback = NULL;
1725	}
1726
1727	/* If we are using the destination as a source and need to
1728	 * readback in order to upload the source, do it all
1729	 * on the cpu.
1730	 */
1731	if (src_pixmap == dst_pixmap && src_fallback) {
1732		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1733		return true;
1734	}
1735	if (mask_pixmap == dst_pixmap && mask_fallback) {
1736		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
1737		return true;
1738	}
1739
1740	/* If anything is on the GPU, push everything out to the GPU */
1741	if (dst_use_gpu(dst_pixmap)) {
1742		DBG(("%s: dst is already on the GPU, try to use GPU\n",
1743		     __FUNCTION__));
1744		return false;
1745	}
1746
1747	if (src_pixmap && !src_fallback) {
1748		DBG(("%s: src is already on the GPU, try to use GPU\n",
1749		     __FUNCTION__));
1750		return false;
1751	}
1752	if (mask_pixmap && !mask_fallback) {
1753		DBG(("%s: mask is already on the GPU, try to use GPU\n",
1754		     __FUNCTION__));
1755		return false;
1756	}
1757
1758	/* However if the dst is not on the GPU and we need to
1759	 * render one of the sources using the CPU, we may
1760	 * as well do the entire operation in place onthe CPU.
1761	 */
1762	if (src_fallback) {
1763		DBG(("%s: dst is on the CPU and src will fallback\n",
1764		     __FUNCTION__));
1765		return true;
1766	}
1767
1768	if (mask && mask_fallback) {
1769		DBG(("%s: dst is on the CPU and mask will fallback\n",
1770		     __FUNCTION__));
1771		return true;
1772	}
1773
1774	if (too_large(dst_pixmap->drawable.width,
1775		      dst_pixmap->drawable.height) &&
1776	    dst_is_cpu(dst_pixmap)) {
1777		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
1778		return true;
1779	}
1780
1781	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
1782	     __FUNCTION__));
1783	return dst_use_cpu(dst_pixmap);
1784}
1785
1786static int
1787reuse_source(struct sna *sna,
1788	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
1789	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
1790{
1791	uint32_t color;
1792
1793	if (src_x != msk_x || src_y != msk_y)
1794		return false;
1795
1796	if (sna_picture_is_solid(mask, &color))
1797		return gen2_composite_solid_init(sna, mc, color);
1798
1799	if (sc->is_solid)
1800		return false;
1801
1802	if (src == mask) {
1803		DBG(("%s: mask is source\n", __FUNCTION__));
1804		*mc = *sc;
1805		mc->bo = kgem_bo_reference(mc->bo);
1806		return true;
1807	}
1808
1809	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
1810		return false;
1811
1812	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
1813
1814	if (!sna_transform_equal(src->transform, mask->transform))
1815		return false;
1816
1817	if (!sna_picture_alphamap_equal(src, mask))
1818		return false;
1819
1820	if (!gen2_check_repeat(mask))
1821		return false;
1822
1823	if (!gen2_check_filter(mask))
1824		return false;
1825
1826	if (!gen2_check_format(sna, mask))
1827		return false;
1828
1829	DBG(("%s: reusing source channel for mask with a twist\n",
1830	     __FUNCTION__));
1831
1832	*mc = *sc;
1833	mc->repeat = mask->repeat ? mask->repeatType : RepeatNone;
1834	mc->filter = mask->filter;
1835	mc->pict_format = mask->format;
1836	mc->bo = kgem_bo_reference(mc->bo);
1837	return true;
1838}
1839
1840static bool
1841gen2_render_composite(struct sna *sna,
1842		      uint8_t op,
1843		      PicturePtr src,
1844		      PicturePtr mask,
1845		      PicturePtr dst,
1846		      int16_t src_x,  int16_t src_y,
1847		      int16_t mask_x, int16_t mask_y,
1848		      int16_t dst_x,  int16_t dst_y,
1849		      int16_t width,  int16_t height,
1850		      struct sna_composite_op *tmp)
1851{
1852	DBG(("%s()\n", __FUNCTION__));
1853
1854	if (op >= ARRAY_SIZE(gen2_blend_op)) {
1855		DBG(("%s: fallback due to unhandled blend op: %d\n",
1856		     __FUNCTION__, op));
1857		return false;
1858	}
1859
1860	if (mask == NULL &&
1861	    sna_blt_composite(sna, op, src, dst,
1862			      src_x, src_y,
1863			      dst_x, dst_y,
1864			      width, height,
1865			      tmp, false))
1866		return true;
1867
1868	if (gen2_composite_fallback(sna, src, mask, dst))
1869		return false;
1870
1871	if (need_tiling(sna, width, height))
1872		return sna_tiling_composite(op, src, mask, dst,
1873					    src_x,  src_y,
1874					    mask_x, mask_y,
1875					    dst_x,  dst_y,
1876					    width,  height,
1877					    tmp);
1878
1879	if (!gen2_composite_set_target(sna, tmp, dst,
1880				       dst_x, dst_y, width, height)) {
1881		DBG(("%s: unable to set render target\n",
1882		     __FUNCTION__));
1883		return false;
1884	}
1885
1886	tmp->op = op;
1887
1888	sna_render_composite_redirect_init(tmp);
1889	if (too_large(tmp->dst.width, tmp->dst.height) ||
1890	    tmp->dst.bo->pitch > MAX_3D_PITCH) {
1891		if (!sna_render_composite_redirect(sna, tmp,
1892						   dst_x, dst_y, width, height,
1893						   op > PictOpSrc || dst->pCompositeClip->data != NULL))
1894			return false;
1895	}
1896
1897	switch (gen2_composite_picture(sna, src, &tmp->src,
1898				       src_x, src_y,
1899				       width, height,
1900				       dst_x, dst_y,
1901				       dst->polyMode == PolyModePrecise)) {
1902	case -1:
1903		DBG(("%s: fallback -- unable to prepare source\n",
1904		     __FUNCTION__));
1905		goto cleanup_dst;
1906	case 0:
1907		gen2_composite_solid_init(sna, &tmp->src, 0);
1908		break;
1909	case 1:
1910		if (mask == NULL && tmp->src.bo &&
1911		    sna_blt_composite__convert(sna,
1912					       dst_x, dst_y, width, height,
1913					       tmp))
1914			return true;
1915		break;
1916	}
1917
1918	if (mask) {
1919		if (!reuse_source(sna,
1920				  src, &tmp->src, src_x, src_y,
1921				  mask, &tmp->mask, mask_x, mask_y)) {
1922			switch (gen2_composite_picture(sna, mask, &tmp->mask,
1923						       mask_x, mask_y,
1924						       width,  height,
1925						       dst_x,  dst_y,
1926						       dst->polyMode == PolyModePrecise)) {
1927			case -1:
1928				DBG(("%s: fallback -- unable to prepare mask\n",
1929				     __FUNCTION__));
1930				goto cleanup_src;
1931			case 0:
1932				gen2_composite_solid_init(sna, &tmp->mask, 0);
1933			case 1:
1934				break;
1935			}
1936		}
1937
1938		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1939			/* Check if it's component alpha that relies on a source alpha
1940			 * and on the source value.  We can only get one of those
1941			 * into the single source value that we get to blend with.
1942			 */
1943			tmp->has_component_alpha = true;
1944			if (gen2_blend_op[op].src_alpha &&
1945			    (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) {
1946				if (op != PictOpOver) {
1947					DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n",
1948					     __FUNCTION__,
1949					     gen2_blend_op[op].src_blend));
1950					goto cleanup_src;
1951				}
1952
1953				tmp->need_magic_ca_pass = true;
1954				tmp->op = PictOpOutReverse;
1955			}
1956		}
1957
1958		/* convert solid to a texture (pure convenience) */
1959		if (tmp->mask.is_solid && tmp->src.is_solid) {
1960			assert(tmp->mask.is_affine);
1961			tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel);
1962			if (!tmp->mask.bo)
1963				goto cleanup_src;
1964		}
1965	}
1966
1967	tmp->floats_per_vertex = 2;
1968	if (!tmp->src.is_solid)
1969		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3;
1970	if (tmp->mask.bo)
1971		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3;
1972	tmp->floats_per_rect = 3*tmp->floats_per_vertex;
1973
1974	tmp->prim_emit = gen2_emit_composite_primitive;
1975	if (tmp->mask.bo) {
1976		if (tmp->mask.transform == NULL) {
1977			if (tmp->src.is_solid) {
1978				assert(tmp->floats_per_rect == 12);
1979#if defined(sse2) && !defined(__x86_64__)
1980				if (sna->cpu_features & SSE2) {
1981					tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask__sse2;
1982				} else
1983#endif
1984				{
1985					tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask;
1986				}
1987			}
1988		}
1989	} else {
1990		if (tmp->src.is_solid) {
1991			assert(tmp->floats_per_rect == 6);
1992#if defined(sse2) && !defined(__x86_64__)
1993			if (sna->cpu_features & SSE2) {
1994				tmp->prim_emit = gen2_emit_composite_primitive_constant__sse2;
1995			} else
1996#endif
1997			{
1998				tmp->prim_emit = gen2_emit_composite_primitive_constant;
1999			}
2000		} else if (tmp->src.is_linear) {
2001			assert(tmp->floats_per_rect == 12);
2002#if defined(sse2) && !defined(__x86_64__)
2003			if (sna->cpu_features & SSE2) {
2004				tmp->prim_emit = gen2_emit_composite_primitive_linear__sse2;
2005			} else
2006#endif
2007			{
2008				tmp->prim_emit = gen2_emit_composite_primitive_linear;
2009			}
2010		} else if (tmp->src.transform == NULL) {
2011			assert(tmp->floats_per_rect == 12);
2012#if defined(sse2) && !defined(__x86_64__)
2013			if (sna->cpu_features & SSE2) {
2014				tmp->prim_emit = gen2_emit_composite_primitive_identity__sse2;
2015			} else
2016#endif
2017			{
2018				tmp->prim_emit = gen2_emit_composite_primitive_identity;
2019			}
2020		} else if (tmp->src.is_affine) {
2021			assert(tmp->floats_per_rect == 12);
2022			tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
2023			tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
2024#if defined(sse2) && !defined(__x86_64__)
2025			if (sna->cpu_features & SSE2) {
2026				tmp->prim_emit = gen2_emit_composite_primitive_affine__sse2;
2027			} else
2028#endif
2029			{
2030				tmp->prim_emit = gen2_emit_composite_primitive_affine;
2031			}
2032		}
2033	}
2034
2035	tmp->blt   = gen2_render_composite_blt;
2036	tmp->box   = gen2_render_composite_box;
2037	tmp->boxes = gen2_render_composite_boxes;
2038	tmp->done  = gen2_render_composite_done;
2039
2040	if (!kgem_check_bo(&sna->kgem,
2041			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2042			   NULL)) {
2043		kgem_submit(&sna->kgem);
2044		if (!kgem_check_bo(&sna->kgem,
2045				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2046				   NULL)) {
2047			DBG(("%s: fallback, operation does not fit into GTT\n",
2048			     __FUNCTION__));
2049			goto cleanup_mask;
2050		}
2051	}
2052
2053	gen2_emit_composite_state(sna, tmp);
2054	return true;
2055
2056cleanup_mask:
2057	if (tmp->mask.bo)
2058		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2059cleanup_src:
2060	if (tmp->src.bo)
2061		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2062cleanup_dst:
2063	if (tmp->redirect.real_bo)
2064		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2065	return false;
2066}
2067
2068fastcall static void
2069gen2_emit_composite_spans_primitive_constant(struct sna *sna,
2070					     const struct sna_composite_spans_op *op,
2071					     const BoxRec *box,
2072					     float opacity)
2073{
2074	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2075	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2076	sna->kgem.nbatch += 9;
2077
2078	v[0] = op->base.dst.x + box->x2;
2079	v[1] = op->base.dst.y + box->y2;
2080	*((uint32_t *)v + 2) = alpha;
2081
2082	v[3] = op->base.dst.x + box->x1;
2083	v[4] = v[1];
2084	*((uint32_t *)v + 5) = alpha;
2085
2086	v[6] = v[3];
2087	v[7] = op->base.dst.y + box->y1;
2088	*((uint32_t *)v + 8) = alpha;
2089}
2090
2091fastcall static void
2092gen2_emit_composite_spans_primitive_linear(struct sna *sna,
2093					     const struct sna_composite_spans_op *op,
2094					     const BoxRec *box,
2095					     float opacity)
2096{
2097	union {
2098		float f;
2099		uint32_t u;
2100	} alpha;
2101
2102	alpha.u = (uint8_t)(255 * opacity) << 24;
2103
2104	gen2_emit_composite_dstcoord(sna,
2105				     op->base.dst.x + box->x2,
2106				     op->base.dst.y + box->y2);
2107	VERTEX(alpha.f);
2108	gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2);
2109
2110	gen2_emit_composite_dstcoord(sna,
2111				     op->base.dst.x + box->x1,
2112				     op->base.dst.y + box->y2);
2113	VERTEX(alpha.f);
2114	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2);
2115
2116	gen2_emit_composite_dstcoord(sna,
2117				     op->base.dst.x + box->x1,
2118				     op->base.dst.y + box->y1);
2119	VERTEX(alpha.f);
2120	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1);
2121}
2122
2123fastcall static void
2124gen2_emit_composite_spans_primitive_identity_source(struct sna *sna,
2125						    const struct sna_composite_spans_op *op,
2126						    const BoxRec *box,
2127						    float opacity)
2128{
2129	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2130	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2131	sna->kgem.nbatch += 15;
2132
2133	v[0] = op->base.dst.x + box->x2;
2134	v[1] = op->base.dst.y + box->y2;
2135	*((uint32_t *)v + 2) = alpha;
2136	v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
2137	v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
2138
2139	v[5] = op->base.dst.x + box->x1;
2140	v[6] = v[1];
2141	*((uint32_t *)v + 7) = alpha;
2142	v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
2143	v[9] = v[4];
2144
2145	v[10] = v[5];
2146	v[11] = op->base.dst.y + box->y1;
2147	*((uint32_t *)v + 12) = alpha;
2148	v[13] = v[8];
2149	v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
2150}
2151
2152fastcall static void
2153gen2_emit_composite_spans_primitive_affine_source(struct sna *sna,
2154						  const struct sna_composite_spans_op *op,
2155						  const BoxRec *box,
2156						  float opacity)
2157{
2158	PictTransform *transform = op->base.src.transform;
2159	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2160	float *v;
2161
2162	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2163	sna->kgem.nbatch += 15;
2164
2165	v[0]  = op->base.dst.x + box->x2;
2166	v[6]  = v[1] = op->base.dst.y + box->y2;
2167	v[10] = v[5] = op->base.dst.x + box->x1;
2168	v[11] = op->base.dst.y + box->y1;
2169	*((uint32_t *)v + 2) = alpha;
2170	*((uint32_t *)v + 7) = alpha;
2171	*((uint32_t *)v + 12) = alpha;
2172
2173	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
2174				    (int)op->base.src.offset[1] + box->y2,
2175				    transform, op->base.src.scale,
2176				    &v[3], &v[4]);
2177
2178	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2179				    (int)op->base.src.offset[1] + box->y2,
2180				    transform, op->base.src.scale,
2181				    &v[8], &v[9]);
2182
2183	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2184				    (int)op->base.src.offset[1] + box->y1,
2185				    transform, op->base.src.scale,
2186				    &v[13], &v[14]);
2187}
2188
2189#if defined(sse2) && !defined(__x86_64__)
2190sse2 fastcall static void
2191gen2_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
2192						   const struct sna_composite_spans_op *op,
2193						   const BoxRec *box,
2194						   float opacity)
2195{
2196	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2197	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2198	sna->kgem.nbatch += 9;
2199
2200	v[0] = op->base.dst.x + box->x2;
2201	v[1] = op->base.dst.y + box->y2;
2202	*((uint32_t *)v + 2) = alpha;
2203
2204	v[3] = op->base.dst.x + box->x1;
2205	v[4] = v[1];
2206	*((uint32_t *)v + 5) = alpha;
2207
2208	v[6] = v[3];
2209	v[7] = op->base.dst.y + box->y1;
2210	*((uint32_t *)v + 8) = alpha;
2211}
2212
2213sse2 fastcall static void
2214gen2_emit_composite_spans_primitive_linear__sse2(struct sna *sna,
2215						 const struct sna_composite_spans_op *op,
2216						 const BoxRec *box,
2217						 float opacity)
2218{
2219	union {
2220		float f;
2221		uint32_t u;
2222	} alpha;
2223
2224	alpha.u = (uint8_t)(255 * opacity) << 24;
2225
2226	gen2_emit_composite_dstcoord(sna,
2227				     op->base.dst.x + box->x2,
2228				     op->base.dst.y + box->y2);
2229	VERTEX(alpha.f);
2230	gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2);
2231
2232	gen2_emit_composite_dstcoord(sna,
2233				     op->base.dst.x + box->x1,
2234				     op->base.dst.y + box->y2);
2235	VERTEX(alpha.f);
2236	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2);
2237
2238	gen2_emit_composite_dstcoord(sna,
2239				     op->base.dst.x + box->x1,
2240				     op->base.dst.y + box->y1);
2241	VERTEX(alpha.f);
2242	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1);
2243}
2244
2245sse2 fastcall static void
2246gen2_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna,
2247							  const struct sna_composite_spans_op *op,
2248							  const BoxRec *box,
2249							  float opacity)
2250{
2251	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2252	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2253	sna->kgem.nbatch += 15;
2254
2255	v[0] = op->base.dst.x + box->x2;
2256	v[1] = op->base.dst.y + box->y2;
2257	*((uint32_t *)v + 2) = alpha;
2258	v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
2259	v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
2260
2261	v[5] = op->base.dst.x + box->x1;
2262	v[6] = v[1];
2263	*((uint32_t *)v + 7) = alpha;
2264	v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
2265	v[9] = v[4];
2266
2267	v[10] = v[5];
2268	v[11] = op->base.dst.y + box->y1;
2269	*((uint32_t *)v + 12) = alpha;
2270	v[13] = v[8];
2271	v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
2272}
2273
2274sse2 fastcall static void
2275gen2_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna,
2276							const struct sna_composite_spans_op *op,
2277							const BoxRec *box,
2278							float opacity)
2279{
2280	PictTransform *transform = op->base.src.transform;
2281	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2282	float *v;
2283
2284	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2285	sna->kgem.nbatch += 15;
2286
2287	v[0]  = op->base.dst.x + box->x2;
2288	v[6]  = v[1] = op->base.dst.y + box->y2;
2289	v[10] = v[5] = op->base.dst.x + box->x1;
2290	v[11] = op->base.dst.y + box->y1;
2291	*((uint32_t *)v + 2) = alpha;
2292	*((uint32_t *)v + 7) = alpha;
2293	*((uint32_t *)v + 12) = alpha;
2294
2295	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
2296				    (int)op->base.src.offset[1] + box->y2,
2297				    transform, op->base.src.scale,
2298				    &v[3], &v[4]);
2299
2300	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2301				    (int)op->base.src.offset[1] + box->y2,
2302				    transform, op->base.src.scale,
2303				    &v[8], &v[9]);
2304
2305	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2306				    (int)op->base.src.offset[1] + box->y1,
2307				    transform, op->base.src.scale,
2308				    &v[13], &v[14]);
2309}
2310#endif
2311
2312static void
2313gen2_emit_composite_spans_vertex(struct sna *sna,
2314				 const struct sna_composite_spans_op *op,
2315				 int16_t x, int16_t y,
2316				 float opacity)
2317{
2318	gen2_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
2319	BATCH((uint8_t)(opacity * 255) << 24);
2320	assert(!op->base.src.is_solid);
2321	if (op->base.src.is_linear)
2322		gen2_emit_composite_linear(sna, &op->base.src, x, y);
2323	else
2324		gen2_emit_composite_texcoord(sna, &op->base.src, x, y);
2325}
2326
2327fastcall static void
2328gen2_emit_composite_spans_primitive(struct sna *sna,
2329				    const struct sna_composite_spans_op *op,
2330				    const BoxRec *box,
2331				    float opacity)
2332{
2333	gen2_emit_composite_spans_vertex(sna, op, box->x2, box->y2, opacity);
2334	gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y2, opacity);
2335	gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y1, opacity);
2336}
2337
2338static void
2339gen2_emit_spans_pipeline(struct sna *sna,
2340			 const struct sna_composite_spans_op *op)
2341{
2342	uint32_t cblend, ablend;
2343	uint32_t unwind;
2344
2345	cblend =
2346		TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULATE |
2347		TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA |
2348		TB0C_OUTPUT_WRITE_CURRENT;
2349	ablend =
2350		TB0A_RESULT_SCALE_1X | TB0A_OP_MODULATE |
2351		TB0A_ARG1_SEL_DIFFUSE |
2352		TB0A_OUTPUT_WRITE_CURRENT;
2353
2354	if (op->base.src.is_solid) {
2355		ablend |= TB0A_ARG2_SEL_SPECULAR;
2356		cblend |= TB0C_ARG2_SEL_SPECULAR;
2357		if (op->base.dst.format == PICT_a8)
2358			cblend |= TB0C_ARG2_REPLICATE_ALPHA;
2359	} else if (op->base.dst.format == PICT_a8) {
2360		ablend |= TB0A_ARG2_SEL_TEXEL0;
2361		cblend |= TB0C_ARG2_SEL_TEXEL0 | TB0C_ARG2_REPLICATE_ALPHA;
2362	} else {
2363		if (PICT_FORMAT_RGB(op->base.src.pict_format) != 0)
2364			cblend |= TB0C_ARG2_SEL_TEXEL0;
2365		else
2366			cblend |= TB0C_ARG2_SEL_ONE | TB0C_ARG2_INVERT;
2367
2368		if (op->base.src.is_opaque)
2369			ablend |= TB0A_ARG2_SEL_ONE;
2370		else
2371			ablend |= TB0A_ARG2_SEL_TEXEL0;
2372	}
2373
2374	unwind = sna->kgem.nbatch;
2375	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
2376	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
2377	BATCH(cblend);
2378	BATCH(ablend);
2379	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
2380		   sna->kgem.batch + unwind + 1,
2381		   2 * sizeof(uint32_t)) == 0)
2382		sna->kgem.nbatch = unwind;
2383	else
2384		sna->render_state.gen2.ls2 = unwind;
2385}
2386
2387static void gen2_emit_composite_spans_state(struct sna *sna,
2388					    const struct sna_composite_spans_op *op)
2389{
2390	uint32_t unwind;
2391
2392	gen2_get_batch(sna, &op->base);
2393	gen2_emit_target(sna, &op->base);
2394
2395	unwind = sna->kgem.nbatch;
2396	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
2397	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
2398	BATCH(!op->base.src.is_solid << 12);
2399	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY | S3_DIFFUSE_PRESENT);
2400	BATCH(gen2_get_blend_cntl(op->base.op, false, op->base.dst.format));
2401	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
2402		   sna->kgem.batch + unwind + 1,
2403		   3 * sizeof(uint32_t)) == 0)
2404		sna->kgem.nbatch = unwind;
2405	else
2406		sna->render_state.gen2.ls1 = unwind;
2407
2408	gen2_disable_logic_op(sna);
2409	gen2_emit_spans_pipeline(sna, op);
2410
2411	if (op->base.src.is_solid) {
2412		if (op->base.src.u.gen2.pixel != sna->render_state.gen2.specular) {
2413			BATCH(_3DSTATE_DFLT_SPECULAR_CMD);
2414			BATCH(op->base.src.u.gen2.pixel);
2415			sna->render_state.gen2.specular = op->base.src.u.gen2.pixel;
2416		}
2417	} else {
2418		uint32_t v =_3DSTATE_VERTEX_FORMAT_2_CMD |
2419			(op->base.src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_3D);
2420		if (sna->render_state.gen2.vft != v) {
2421			BATCH(v);
2422			sna->render_state.gen2.vft = v;
2423		}
2424		gen2_emit_texture(sna, &op->base.src, 0);
2425	}
2426}
2427
2428fastcall static void
2429gen2_render_composite_spans_box(struct sna *sna,
2430				const struct sna_composite_spans_op *op,
2431				const BoxRec *box, float opacity)
2432{
2433	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2434	     __FUNCTION__,
2435	     op->base.src.offset[0], op->base.src.offset[1],
2436	     opacity,
2437	     op->base.dst.x, op->base.dst.y,
2438	     box->x1, box->y1,
2439	     box->x2 - box->x1,
2440	     box->y2 - box->y1));
2441
2442	if (gen2_get_rectangles(sna, &op->base, 1) == 0) {
2443		gen2_emit_composite_spans_state(sna, op);
2444		gen2_get_rectangles(sna, &op->base, 1);
2445	}
2446
2447	op->prim_emit(sna, op, box, opacity);
2448}
2449
2450static void
2451gen2_render_composite_spans_boxes(struct sna *sna,
2452				  const struct sna_composite_spans_op *op,
2453				  const BoxRec *box, int nbox,
2454				  float opacity)
2455{
2456	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2457	     __FUNCTION__, nbox,
2458	     op->base.src.offset[0], op->base.src.offset[1],
2459	     opacity,
2460	     op->base.dst.x, op->base.dst.y));
2461
2462	do {
2463		int nbox_this_time;
2464
2465		nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2466		if (nbox_this_time == 0) {
2467			gen2_emit_composite_spans_state(sna, op);
2468			nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2469		}
2470		nbox -= nbox_this_time;
2471
2472		do {
2473			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2474			     box->x1, box->y1,
2475			     box->x2 - box->x1,
2476			     box->y2 - box->y1));
2477
2478			op->prim_emit(sna, op, box++, opacity);
2479		} while (--nbox_this_time);
2480	} while (nbox);
2481}
2482
2483fastcall static void
2484gen2_render_composite_spans_done(struct sna *sna,
2485				 const struct sna_composite_spans_op *op)
2486{
2487	DBG(("%s()\n", __FUNCTION__));
2488
2489	gen2_vertex_flush(sna, &op->base);
2490
2491	if (op->base.src.bo)
2492		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2493
2494	sna_render_composite_redirect_done(sna, &op->base);
2495}
2496
2497static bool
2498gen2_check_composite_spans(struct sna *sna,
2499			   uint8_t op, PicturePtr src, PicturePtr dst,
2500			   int16_t width, int16_t height, unsigned flags)
2501{
2502	if (op >= ARRAY_SIZE(gen2_blend_op))
2503		return false;
2504
2505	if (gen2_composite_fallback(sna, src, NULL, dst))
2506		return false;
2507
2508	if (need_tiling(sna, width, height)) {
2509		if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2510			DBG(("%s: fallback, tiled operation not on GPU\n",
2511			     __FUNCTION__));
2512			return false;
2513		}
2514	}
2515
2516	return true;
2517}
2518
2519static bool
2520gen2_render_composite_spans(struct sna *sna,
2521			    uint8_t op,
2522			    PicturePtr src,
2523			    PicturePtr dst,
2524			    int16_t src_x,  int16_t src_y,
2525			    int16_t dst_x,  int16_t dst_y,
2526			    int16_t width,  int16_t height,
2527			    unsigned flags,
2528			    struct sna_composite_spans_op *tmp)
2529{
2530	DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
2531	     src_x, src_y, dst_x, dst_y, width, height));
2532
2533	assert(gen2_check_composite_spans(sna, op, src, dst, width, height, flags));
2534	if (need_tiling(sna, width, height)) {
2535		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2536		     __FUNCTION__, width, height));
2537		return sna_tiling_composite_spans(op, src, dst,
2538						  src_x, src_y, dst_x, dst_y,
2539						  width, height, flags, tmp);
2540	}
2541
2542	if (!gen2_composite_set_target(sna, &tmp->base, dst,
2543				       dst_x, dst_y, width, height)) {
2544		DBG(("%s: unable to set render target\n",
2545		     __FUNCTION__));
2546		return false;
2547	}
2548
2549	tmp->base.op = op;
2550
2551	sna_render_composite_redirect_init(&tmp->base);
2552	if (too_large(tmp->base.dst.width, tmp->base.dst.height) ||
2553	    tmp->base.dst.bo->pitch > MAX_3D_PITCH) {
2554		if (!sna_render_composite_redirect(sna, &tmp->base,
2555						   dst_x, dst_y, width, height,
2556						   true))
2557			return false;
2558	}
2559
2560	switch (gen2_composite_picture(sna, src, &tmp->base.src,
2561				       src_x, src_y,
2562				       width, height,
2563				       dst_x, dst_y,
2564				       dst->polyMode == PolyModePrecise)) {
2565	case -1:
2566		goto cleanup_dst;
2567	case 0:
2568		gen2_composite_solid_init(sna, &tmp->base.src, 0);
2569	case 1:
2570		break;
2571	}
2572
2573	tmp->prim_emit = gen2_emit_composite_spans_primitive;
2574	tmp->base.floats_per_vertex = 3;
2575	if (tmp->base.src.is_solid) {
2576#if defined(sse2) && !defined(__x86_64__)
2577		if (sna->cpu_features & SSE2) {
2578			tmp->prim_emit = gen2_emit_composite_spans_primitive_constant__sse2;
2579		} else
2580#endif
2581		{
2582			tmp->prim_emit = gen2_emit_composite_spans_primitive_constant;
2583		}
2584	} else if (tmp->base.src.is_linear) {
2585		tmp->base.floats_per_vertex += 2;
2586#if defined(sse2) && !defined(__x86_64__)
2587		if (sna->cpu_features & SSE2) {
2588			tmp->prim_emit = gen2_emit_composite_spans_primitive_linear__sse2;
2589		} else
2590#endif
2591		{
2592			tmp->prim_emit = gen2_emit_composite_spans_primitive_linear;
2593		}
2594	} else {
2595		assert(tmp->base.src.bo);
2596		tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
2597		if (tmp->base.src.transform == NULL) {
2598#if defined(sse2) && !defined(__x86_64__)
2599			if (sna->cpu_features & SSE2) {
2600				tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source__sse2;
2601			} else
2602#endif
2603			{
2604				tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source;
2605			}
2606		} else if (tmp->base.src.is_affine) {
2607			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
2608			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
2609#if defined(sse2) && !defined(__x86_64__)
2610			if (sna->cpu_features & SSE2) {
2611				tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source__sse2;
2612			} else
2613#endif
2614			{
2615				tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source;
2616			}
2617		}
2618	}
2619	tmp->base.mask.bo = NULL;
2620	tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex;
2621
2622	tmp->box   = gen2_render_composite_spans_box;
2623	tmp->boxes = gen2_render_composite_spans_boxes;
2624	tmp->done  = gen2_render_composite_spans_done;
2625
2626	if (!kgem_check_bo(&sna->kgem,
2627			   tmp->base.dst.bo, tmp->base.src.bo,
2628			   NULL)) {
2629		kgem_submit(&sna->kgem);
2630		if (!kgem_check_bo(&sna->kgem,
2631				   tmp->base.dst.bo, tmp->base.src.bo,
2632				   NULL))
2633			goto cleanup_src;
2634	}
2635
2636	gen2_emit_composite_spans_state(sna, tmp);
2637	return true;
2638
2639cleanup_src:
2640	if (tmp->base.src.bo)
2641		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2642cleanup_dst:
2643	if (tmp->base.redirect.real_bo)
2644		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2645	return false;
2646}
2647
2648static void
2649gen2_emit_fill_pipeline(struct sna *sna, const struct sna_composite_op *op)
2650{
2651	uint32_t blend, unwind;
2652
2653	unwind = sna->kgem.nbatch;
2654	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
2655	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
2656
2657	blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 |
2658		TB0C_ARG1_SEL_DIFFUSE |
2659		TB0C_OUTPUT_WRITE_CURRENT;
2660	if (op->dst.format == PICT_a8)
2661		blend |= TB0C_ARG1_REPLICATE_ALPHA;
2662	BATCH(blend);
2663
2664	BATCH(TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 |
2665	      TB0A_ARG1_SEL_DIFFUSE |
2666	      TB0A_OUTPUT_WRITE_CURRENT);
2667
2668	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
2669		   sna->kgem.batch + unwind + 1,
2670		   2 * sizeof(uint32_t)) == 0)
2671		sna->kgem.nbatch = unwind;
2672	else
2673		sna->render_state.gen2.ls2 = unwind;
2674}
2675
2676static void gen2_emit_fill_composite_state(struct sna *sna,
2677					   const struct sna_composite_op *op,
2678					   uint32_t pixel)
2679{
2680	uint32_t ls1;
2681
2682	gen2_get_batch(sna, op);
2683	gen2_emit_target(sna, op);
2684
2685	ls1 = sna->kgem.nbatch;
2686	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
2687	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
2688	BATCH(0);
2689	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
2690	BATCH(gen2_get_blend_cntl(op->op, false, op->dst.format));
2691	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
2692		   sna->kgem.batch + ls1 + 1,
2693		   3 * sizeof(uint32_t)) == 0)
2694		sna->kgem.nbatch = ls1;
2695	else
2696		sna->render_state.gen2.ls1 = ls1;
2697
2698	gen2_emit_fill_pipeline(sna, op);
2699
2700	if (pixel != sna->render_state.gen2.diffuse) {
2701		BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
2702		BATCH(pixel);
2703		sna->render_state.gen2.diffuse = pixel;
2704	}
2705}
2706
2707static bool
2708gen2_render_fill_boxes_try_blt(struct sna *sna,
2709			       CARD8 op, PictFormat format,
2710			       const xRenderColor *color,
2711			       PixmapPtr dst, struct kgem_bo *dst_bo,
2712			       const BoxRec *box, int n)
2713{
2714	uint8_t alu;
2715	uint32_t pixel;
2716
2717	if (op > PictOpSrc)
2718		return false;
2719
2720	if (op == PictOpClear) {
2721		alu = GXclear;
2722		pixel = 0;
2723	} else if (!sna_get_pixel_from_rgba(&pixel,
2724					    color->red,
2725					    color->green,
2726					    color->blue,
2727					    color->alpha,
2728					    format))
2729		return false;
2730	else
2731		alu = GXcopy;
2732
2733	return sna_blt_fill_boxes(sna, alu,
2734				  dst_bo, dst->drawable.bitsPerPixel,
2735				  pixel, box, n);
2736}
2737
2738static bool
2739gen2_render_fill_boxes(struct sna *sna,
2740		       CARD8 op,
2741		       PictFormat format,
2742		       const xRenderColor *color,
2743		       PixmapPtr dst, struct kgem_bo *dst_bo,
2744		       const BoxRec *box, int n)
2745{
2746	struct sna_composite_op tmp;
2747	uint32_t pixel;
2748
2749	if (op >= ARRAY_SIZE(gen2_blend_op)) {
2750		DBG(("%s: fallback due to unhandled blend op: %d\n",
2751		     __FUNCTION__, op));
2752		return false;
2753	}
2754
2755#if NO_FILL_BOXES
2756	return gen2_render_fill_boxes_try_blt(sna, op, format, color,
2757					      dst, dst_bo,
2758					      box, n);
2759#endif
2760	if (gen2_render_fill_boxes_try_blt(sna, op, format, color,
2761					   dst, dst_bo,
2762					   box, n))
2763		return true;
2764
2765
2766	DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
2767	     __FUNCTION__, op, (int)format,
2768	     color->red, color->green, color->blue, color->alpha));
2769
2770	if (too_large(dst->drawable.width, dst->drawable.height) ||
2771	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH ||
2772	    !gen2_check_dst_format(format)) {
2773		DBG(("%s: try blt, too large or incompatible destination\n",
2774		     __FUNCTION__));
2775		if (!gen2_check_dst_format(format))
2776			return false;
2777
2778		assert(dst_bo->pitch >= 8);
2779		return sna_tiling_fill_boxes(sna, op, format, color,
2780					     dst, dst_bo, box, n);
2781	}
2782
2783	if (op == PictOpClear)
2784		pixel = 0;
2785	else if (!sna_get_pixel_from_rgba(&pixel,
2786					  color->red,
2787					  color->green,
2788					  color->blue,
2789					  color->alpha,
2790					  PICT_a8r8g8b8))
2791		return false;
2792
2793	DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n",
2794	     __FUNCTION__, op, (int)format, pixel));
2795
2796	memset(&tmp, 0, sizeof(tmp));
2797	tmp.op = op;
2798	tmp.dst.pixmap = dst;
2799	tmp.dst.width = dst->drawable.width;
2800	tmp.dst.height = dst->drawable.height;
2801	tmp.dst.format = format;
2802	tmp.dst.bo = dst_bo;
2803	tmp.floats_per_vertex = 2;
2804	tmp.floats_per_rect = 6;
2805
2806	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2807		kgem_submit(&sna->kgem);
2808		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
2809	}
2810
2811	gen2_emit_fill_composite_state(sna, &tmp, pixel);
2812
2813	do {
2814		int n_this_time = gen2_get_rectangles(sna, &tmp, n);
2815		if (n_this_time == 0) {
2816			gen2_emit_fill_composite_state(sna, &tmp, pixel);
2817			n_this_time = gen2_get_rectangles(sna, &tmp, n);
2818		}
2819		n -= n_this_time;
2820
2821		do {
2822			DBG(("	(%d, %d), (%d, %d): %x\n",
2823			     box->x1, box->y1, box->x2, box->y2, pixel));
2824			VERTEX(box->x2);
2825			VERTEX(box->y2);
2826			VERTEX(box->x1);
2827			VERTEX(box->y2);
2828			VERTEX(box->x1);
2829			VERTEX(box->y1);
2830			box++;
2831		} while (--n_this_time);
2832	} while (n);
2833
2834	gen2_vertex_flush(sna, &tmp);
2835	return true;
2836}
2837
2838static void gen2_emit_fill_state(struct sna *sna,
2839				 const struct sna_composite_op *op)
2840{
2841	uint32_t ls1;
2842
2843	gen2_get_batch(sna, op);
2844	gen2_emit_target(sna, op);
2845
2846	ls1 = sna->kgem.nbatch;
2847	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
2848	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
2849	BATCH(0);
2850	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
2851	BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
2852	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
2853		   sna->kgem.batch + ls1 + 1,
2854		   3 * sizeof(uint32_t)) == 0)
2855		sna->kgem.nbatch = ls1;
2856	else
2857		sna->render_state.gen2.ls1 = ls1;
2858
2859	gen2_enable_logic_op(sna, op->op);
2860	gen2_emit_fill_pipeline(sna, op);
2861
2862	if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) {
2863		BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
2864		BATCH(op->src.u.gen2.pixel);
2865		sna->render_state.gen2.diffuse = op->src.u.gen2.pixel;
2866	}
2867}
2868
2869static void
2870gen2_render_fill_op_blt(struct sna *sna,
2871			const struct sna_fill_op *op,
2872			int16_t x, int16_t y, int16_t w, int16_t h)
2873{
2874	if (!gen2_get_rectangles(sna, &op->base, 1)) {
2875		gen2_emit_fill_state(sna, &op->base);
2876		gen2_get_rectangles(sna, &op->base, 1);
2877	}
2878
2879	VERTEX(x+w);
2880	VERTEX(y+h);
2881	VERTEX(x);
2882	VERTEX(y+h);
2883	VERTEX(x);
2884	VERTEX(y);
2885}
2886
2887fastcall static void
2888gen2_render_fill_op_box(struct sna *sna,
2889			const struct sna_fill_op *op,
2890			const BoxRec *box)
2891{
2892	if (!gen2_get_rectangles(sna, &op->base, 1)) {
2893		gen2_emit_fill_state(sna, &op->base);
2894		gen2_get_rectangles(sna, &op->base, 1);
2895	}
2896
2897	VERTEX(box->x2);
2898	VERTEX(box->y2);
2899	VERTEX(box->x1);
2900	VERTEX(box->y2);
2901	VERTEX(box->x1);
2902	VERTEX(box->y1);
2903}
2904
2905fastcall static void
2906gen2_render_fill_op_boxes(struct sna *sna,
2907			  const struct sna_fill_op *op,
2908			  const BoxRec *box,
2909			  int nbox)
2910{
2911	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
2912	     box->x1, box->y1, box->x2, box->y2, nbox));
2913
2914	do {
2915		int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2916		if (nbox_this_time == 0) {
2917			gen2_emit_fill_state(sna, &op->base);
2918			nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2919		}
2920		nbox -= nbox_this_time;
2921
2922		do {
2923			VERTEX(box->x2);
2924			VERTEX(box->y2);
2925			VERTEX(box->x1);
2926			VERTEX(box->y2);
2927			VERTEX(box->x1);
2928			VERTEX(box->y1);
2929			box++;
2930		} while (--nbox_this_time);
2931	} while (nbox);
2932}
2933
2934static void
2935gen2_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
2936{
2937	gen2_vertex_flush(sna, &op->base);
2938}
2939
2940static bool
2941gen2_render_fill(struct sna *sna, uint8_t alu,
2942		 PixmapPtr dst, struct kgem_bo *dst_bo,
2943		 uint32_t color,
2944		 struct sna_fill_op *tmp)
2945{
2946#if NO_FILL
2947	return sna_blt_fill(sna, alu,
2948			    dst_bo, dst->drawable.bitsPerPixel,
2949			    color,
2950			    tmp);
2951#endif
2952
2953	/* Prefer to use the BLT if already engaged */
2954	if (sna_blt_fill(sna, alu,
2955			 dst_bo, dst->drawable.bitsPerPixel,
2956			 color,
2957			 tmp))
2958		return true;
2959
2960	/* Must use the BLT if we can't RENDER... */
2961	if (too_large(dst->drawable.width, dst->drawable.height) ||
2962	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH)
2963		return false;
2964
2965	tmp->base.op = alu;
2966	tmp->base.dst.pixmap = dst;
2967	tmp->base.dst.width = dst->drawable.width;
2968	tmp->base.dst.height = dst->drawable.height;
2969	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2970	tmp->base.dst.bo = dst_bo;
2971	tmp->base.dst.x = tmp->base.dst.y = 0;
2972	tmp->base.floats_per_vertex = 2;
2973	tmp->base.floats_per_rect = 6;
2974
2975	tmp->base.src.u.gen2.pixel =
2976		sna_rgba_for_color(color, dst->drawable.depth);
2977
2978	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2979		kgem_submit(&sna->kgem);
2980		return sna_blt_fill(sna, alu,
2981				    dst_bo, dst->drawable.bitsPerPixel,
2982				    color,
2983				    tmp);
2984	}
2985
2986	tmp->blt   = gen2_render_fill_op_blt;
2987	tmp->box   = gen2_render_fill_op_box;
2988	tmp->boxes = gen2_render_fill_op_boxes;
2989	tmp->done  = gen2_render_fill_op_done;
2990
2991	gen2_emit_fill_state(sna, &tmp->base);
2992	return true;
2993}
2994
2995static bool
2996gen2_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
2997			     uint32_t color,
2998			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
2999			     uint8_t alu)
3000{
3001	BoxRec box;
3002
3003	box.x1 = x1;
3004	box.y1 = y1;
3005	box.x2 = x2;
3006	box.y2 = y2;
3007
3008	return sna_blt_fill_boxes(sna, alu,
3009				  bo, dst->drawable.bitsPerPixel,
3010				  color, &box, 1);
3011}
3012
3013static bool
3014gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3015		     uint32_t color,
3016		     int16_t x1, int16_t y1,
3017		     int16_t x2, int16_t y2,
3018		     uint8_t alu)
3019{
3020	struct sna_composite_op tmp;
3021
3022#if NO_FILL_ONE
3023	return gen2_render_fill_one_try_blt(sna, dst, bo, color,
3024					    x1, y1, x2, y2, alu);
3025#endif
3026
3027	/* Prefer to use the BLT if already engaged */
3028	if (gen2_render_fill_one_try_blt(sna, dst, bo, color,
3029					 x1, y1, x2, y2, alu))
3030		return true;
3031
3032	/* Must use the BLT if we can't RENDER... */
3033	if (too_large(dst->drawable.width, dst->drawable.height) ||
3034	    bo->pitch < 8 || bo->pitch > MAX_3D_PITCH)
3035		return false;
3036
3037	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3038		kgem_submit(&sna->kgem);
3039		if (gen2_render_fill_one_try_blt(sna, dst, bo, color,
3040						 x1, y1, x2, y2, alu))
3041			return true;
3042		assert(kgem_check_bo(&sna->kgem, bo, NULL));
3043	}
3044
3045	tmp.op = alu;
3046	tmp.dst.pixmap = dst;
3047	tmp.dst.width = dst->drawable.width;
3048	tmp.dst.height = dst->drawable.height;
3049	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3050	tmp.dst.bo = bo;
3051	tmp.floats_per_vertex = 2;
3052	tmp.floats_per_rect = 6;
3053	tmp.need_magic_ca_pass = false;
3054
3055	tmp.src.u.gen2.pixel =
3056		sna_rgba_for_color(color, dst->drawable.depth);
3057
3058	gen2_emit_fill_state(sna, &tmp);
3059	gen2_get_rectangles(sna, &tmp, 1);
3060	DBG(("%s: (%d, %d), (%d, %d): %x\n", __FUNCTION__,
3061	     x1, y1, x2, y2, tmp.src.u.gen2.pixel));
3062	VERTEX(x2);
3063	VERTEX(y2);
3064	VERTEX(x1);
3065	VERTEX(y2);
3066	VERTEX(x1);
3067	VERTEX(y1);
3068	gen2_vertex_flush(sna, &tmp);
3069
3070	return true;
3071}
3072
3073static void
3074gen2_render_copy_setup_source(struct sna_composite_channel *channel,
3075			      PixmapPtr pixmap,
3076			      struct kgem_bo *bo)
3077{
3078	assert(pixmap->drawable.width && pixmap->drawable.height);
3079
3080	channel->filter = PictFilterNearest;
3081	channel->repeat = RepeatNone;
3082	channel->width  = pixmap->drawable.width;
3083	channel->height = pixmap->drawable.height;
3084	channel->scale[0] = 1.f/pixmap->drawable.width;
3085	channel->scale[1] = 1.f/pixmap->drawable.height;
3086	channel->offset[0] = 0;
3087	channel->offset[1] = 0;
3088	channel->pict_format = sna_format_for_depth(pixmap->drawable.depth);
3089	channel->bo = bo;
3090	channel->is_affine = 1;
3091
3092	DBG(("%s: source=%d, (%dx%d), format=%08x\n",
3093	     __FUNCTION__, bo->handle,
3094	     channel->width, channel->height,
3095	     channel->pict_format));
3096}
3097
3098static void
3099gen2_emit_copy_pipeline(struct sna *sna, const struct sna_composite_op *op)
3100{
3101	uint32_t blend, unwind;
3102
3103	unwind = sna->kgem.nbatch;
3104	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
3105	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
3106
3107	blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 |
3108		TB0C_OUTPUT_WRITE_CURRENT;
3109	if (op->dst.format == PICT_a8)
3110		blend |= TB0C_ARG1_REPLICATE_ALPHA | TB0C_ARG1_SEL_TEXEL0;
3111	else if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
3112		blend |= TB0C_ARG1_SEL_TEXEL0;
3113	else
3114		blend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT;	/* 0.0 */
3115	BATCH(blend);
3116
3117	blend = TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 |
3118		TB0A_OUTPUT_WRITE_CURRENT;
3119	if (PICT_FORMAT_A(op->src.pict_format) == 0)
3120		blend |= TB0A_ARG1_SEL_ONE;
3121	else
3122		blend |= TB0A_ARG1_SEL_TEXEL0;
3123	BATCH(blend);
3124
3125	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
3126		   sna->kgem.batch + unwind + 1,
3127		   2 * sizeof(uint32_t)) == 0)
3128		sna->kgem.nbatch = unwind;
3129	else
3130		sna->render_state.gen2.ls2 = unwind;
3131}
3132
3133static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op *op)
3134{
3135	uint32_t ls1, v;
3136
3137	gen2_get_batch(sna, op);
3138
3139	if (kgem_bo_is_dirty(op->src.bo)) {
3140		if (op->src.bo == op->dst.bo)
3141			BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
3142		else
3143			BATCH(_3DSTATE_MODES_5_CMD |
3144			      PIPELINE_FLUSH_RENDER_CACHE |
3145			      PIPELINE_FLUSH_TEXTURE_CACHE);
3146		kgem_clear_dirty(&sna->kgem);
3147	}
3148	gen2_emit_target(sna, op);
3149
3150	ls1 = sna->kgem.nbatch;
3151	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
3152	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
3153	BATCH(1<<12);
3154	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
3155	BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
3156	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
3157		   sna->kgem.batch + ls1 + 1,
3158		   3 * sizeof(uint32_t)) == 0)
3159		sna->kgem.nbatch = ls1;
3160	else
3161		sna->render_state.gen2.ls1 = ls1;
3162
3163	gen2_enable_logic_op(sna, op->op);
3164	gen2_emit_copy_pipeline(sna, op);
3165
3166	v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D;
3167	if (sna->render_state.gen2.vft != v) {
3168		BATCH(v);
3169		sna->render_state.gen2.vft = v;
3170	}
3171
3172	gen2_emit_texture(sna, &op->src, 0);
3173}
3174
3175static bool
3176gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
3177		       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
3178		       PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
3179		       const BoxRec *box, int n, unsigned flags)
3180{
3181	struct sna_composite_op tmp;
3182
3183#if NO_COPY_BOXES
3184	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3185		return false;
3186
3187	return sna_blt_copy_boxes(sna, alu,
3188				  src_bo, src_dx, src_dy,
3189				  dst_bo, dst_dx, dst_dy,
3190				  dst->drawable.bitsPerPixel,
3191				  box, n);
3192#endif
3193
3194	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
3195	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
3196
3197	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
3198	    sna_blt_copy_boxes(sna, alu,
3199			       src_bo, src_dx, src_dy,
3200			       dst_bo, dst_dx, dst_dy,
3201			       dst->drawable.bitsPerPixel,
3202			       box, n))
3203		return true;
3204
3205	if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */
3206	    too_large(src->drawable.width, src->drawable.height) ||
3207	    src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch < 8) {
3208fallback:
3209		return sna_blt_copy_boxes_fallback(sna, alu,
3210						   src, src_bo, src_dx, src_dy,
3211						   dst, dst_bo, dst_dx, dst_dy,
3212						   box, n);
3213	}
3214
3215	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3216		kgem_submit(&sna->kgem);
3217		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3218			goto fallback;
3219	}
3220
3221	assert(dst_bo->pitch >= 8);
3222
3223	memset(&tmp, 0, sizeof(tmp));
3224	tmp.op = alu;
3225
3226	tmp.dst.pixmap = dst;
3227	tmp.dst.width = dst->drawable.width;
3228	tmp.dst.height = dst->drawable.height;
3229	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3230	tmp.dst.bo = dst_bo;
3231	tmp.dst.x = tmp.dst.y = 0;
3232	tmp.damage = NULL;
3233
3234	DBG(("%s: target=%d, format=%08x, size=%dx%d\n",
3235	     __FUNCTION__, dst_bo->handle,
3236	     (unsigned)tmp.dst.format,
3237	     tmp.dst.width,
3238	     tmp.dst.height));
3239
3240	sna_render_composite_redirect_init(&tmp);
3241	if (too_large(tmp.dst.width, tmp.dst.height) ||
3242	    dst_bo->pitch > MAX_3D_PITCH) {
3243		BoxRec extents = box[0];
3244		int i;
3245
3246		for (i = 1; i < n; i++) {
3247			if (box[i].x1 < extents.x1)
3248				extents.x1 = box[i].x1;
3249			if (box[i].y1 < extents.y1)
3250				extents.y1 = box[i].y1;
3251
3252			if (box[i].x2 > extents.x2)
3253				extents.x2 = box[i].x2;
3254			if (box[i].y2 > extents.y2)
3255				extents.y2 = box[i].y2;
3256		}
3257		if (!sna_render_composite_redirect(sna, &tmp,
3258						   extents.x1 + dst_dx,
3259						   extents.y1 + dst_dy,
3260						   extents.x2 - extents.x1,
3261						   extents.y2 - extents.y1,
3262						   alu != GXcopy || n > 1))
3263			goto fallback_tiled;
3264	}
3265
3266	tmp.floats_per_vertex = 4;
3267	tmp.floats_per_rect = 12;
3268
3269	dst_dx += tmp.dst.x;
3270	dst_dy += tmp.dst.y;
3271	tmp.dst.x = tmp.dst.y = 0;
3272
3273	gen2_render_copy_setup_source(&tmp.src, src, src_bo);
3274	gen2_emit_copy_state(sna, &tmp);
3275	do {
3276		int n_this_time;
3277
3278		n_this_time = gen2_get_rectangles(sna, &tmp, n);
3279		if (n_this_time == 0) {
3280			gen2_emit_copy_state(sna, &tmp);
3281			n_this_time = gen2_get_rectangles(sna, &tmp, n);
3282		}
3283		n -= n_this_time;
3284
3285		do {
3286			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
3287			     box->x1 + src_dx, box->y1 + src_dy,
3288			     box->x1 + dst_dx, box->y1 + dst_dy,
3289			     box->x2 - box->x1, box->y2 - box->y1));
3290			VERTEX(box->x2 + dst_dx);
3291			VERTEX(box->y2 + dst_dy);
3292			VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
3293			VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
3294
3295			VERTEX(box->x1 + dst_dx);
3296			VERTEX(box->y2 + dst_dy);
3297			VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
3298			VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
3299
3300			VERTEX(box->x1 + dst_dx);
3301			VERTEX(box->y1 + dst_dy);
3302			VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
3303			VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
3304
3305			box++;
3306		} while (--n_this_time);
3307	} while (n);
3308
3309	gen2_vertex_flush(sna, &tmp);
3310	sna_render_composite_redirect_done(sna, &tmp);
3311	return true;
3312
3313fallback_tiled:
3314	return sna_tiling_copy_boxes(sna, alu,
3315				     src, src_bo, src_dx, src_dy,
3316				     dst, dst_bo, dst_dx, dst_dy,
3317				     box, n);
3318}
3319
3320static void
3321gen2_render_copy_blt(struct sna *sna,
3322		     const struct sna_copy_op *op,
3323		     int16_t sx, int16_t sy,
3324		     int16_t w, int16_t h,
3325		     int16_t dx, int16_t dy)
3326{
3327	if (!gen2_get_rectangles(sna, &op->base, 1)) {
3328		gen2_emit_copy_state(sna, &op->base);
3329		gen2_get_rectangles(sna, &op->base, 1);
3330	}
3331
3332	VERTEX(dx+w);
3333	VERTEX(dy+h);
3334	VERTEX((sx+w)*op->base.src.scale[0]);
3335	VERTEX((sy+h)*op->base.src.scale[1]);
3336
3337	VERTEX(dx);
3338	VERTEX(dy+h);
3339	VERTEX(sx*op->base.src.scale[0]);
3340	VERTEX((sy+h)*op->base.src.scale[1]);
3341
3342	VERTEX(dx);
3343	VERTEX(dy);
3344	VERTEX(sx*op->base.src.scale[0]);
3345	VERTEX(sy*op->base.src.scale[1]);
3346}
3347
3348static void
3349gen2_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
3350{
3351	gen2_vertex_flush(sna, &op->base);
3352}
3353
3354static bool
3355gen2_render_copy(struct sna *sna, uint8_t alu,
3356		 PixmapPtr src, struct kgem_bo *src_bo,
3357		 PixmapPtr dst, struct kgem_bo *dst_bo,
3358		 struct sna_copy_op *tmp)
3359{
3360#if NO_COPY
3361	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3362		return false;
3363
3364	return sna_blt_copy(sna, alu,
3365			    src_bo, dst_bo,
3366			    dst->drawable.bitsPerPixel,
3367			    tmp);
3368#endif
3369
3370	/* Prefer to use the BLT */
3371	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
3372	    sna_blt_copy(sna, alu,
3373			 src_bo, dst_bo,
3374			 dst->drawable.bitsPerPixel,
3375			 tmp))
3376		return true;
3377
3378	/* Must use the BLT if we can't RENDER... */
3379	if (too_large(src->drawable.width, src->drawable.height) ||
3380	    too_large(dst->drawable.width, dst->drawable.height) ||
3381	    src_bo->pitch > MAX_3D_PITCH ||
3382	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) {
3383fallback:
3384		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3385			return false;
3386
3387		return sna_blt_copy(sna, alu, src_bo, dst_bo,
3388				    dst->drawable.bitsPerPixel,
3389				    tmp);
3390	}
3391
3392	tmp->base.op = alu;
3393
3394	tmp->base.dst.pixmap = dst;
3395	tmp->base.dst.width = dst->drawable.width;
3396	tmp->base.dst.height = dst->drawable.height;
3397	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3398	tmp->base.dst.bo = dst_bo;
3399
3400	gen2_render_copy_setup_source(&tmp->base.src, src, src_bo);
3401	tmp->base.mask.bo = NULL;
3402
3403	tmp->base.floats_per_vertex = 4;
3404	tmp->base.floats_per_rect = 12;
3405
3406	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3407		kgem_submit(&sna->kgem);
3408		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3409			goto fallback;
3410	}
3411
3412	tmp->blt  = gen2_render_copy_blt;
3413	tmp->done = gen2_render_copy_done;
3414
3415	gen2_emit_composite_state(sna, &tmp->base);
3416	return true;
3417}
3418
3419static void
3420gen2_render_reset(struct sna *sna)
3421{
3422	sna->render_state.gen2.need_invariant = true;
3423	sna->render_state.gen2.logic_op_enabled = 0;
3424	sna->render_state.gen2.target = 0;
3425
3426	sna->render_state.gen2.ls1 = 0;
3427	sna->render_state.gen2.ls2 = 0;
3428	sna->render_state.gen2.vft = 0;
3429
3430	sna->render_state.gen2.diffuse = 0x0c0ffee0;
3431	sna->render_state.gen2.specular = 0x0c0ffee0;
3432}
3433
3434static void
3435gen2_render_flush(struct sna *sna)
3436{
3437	assert(sna->render.vertex_index == 0);
3438	assert(sna->render.vertex_offset == 0);
3439}
3440
3441static void
3442gen2_render_context_switch(struct kgem *kgem,
3443			   int new_mode)
3444{
3445	struct sna *sna = container_of(kgem, struct sna, kgem);
3446
3447	if (!kgem->nbatch)
3448		return;
3449
3450	/* Reload BLT registers following a lost context */
3451	sna->blt_state.fill_bo = 0;
3452
3453	if (kgem_ring_is_idle(kgem, kgem->ring)) {
3454		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
3455		_kgem_submit(kgem);
3456	}
3457}
3458
3459const char *gen2_render_init(struct sna *sna, const char *backend)
3460{
3461	struct sna_render *render = &sna->render;
3462
3463	sna->kgem.context_switch = gen2_render_context_switch;
3464
3465	/* Use the BLT (and overlay) for everything except when forced to
3466	 * use the texture combiners.
3467	 */
3468#if !NO_COMPOSITE
3469	render->composite = gen2_render_composite;
3470	render->prefer_gpu |= PREFER_GPU_RENDER;
3471#endif
3472#if !NO_COMPOSITE_SPANS
3473	render->check_composite_spans = gen2_check_composite_spans;
3474	render->composite_spans = gen2_render_composite_spans;
3475	render->prefer_gpu |= PREFER_GPU_SPANS;
3476#endif
3477	render->fill_boxes = gen2_render_fill_boxes;
3478	render->fill = gen2_render_fill;
3479	render->fill_one = gen2_render_fill_one;
3480	render->copy = gen2_render_copy;
3481	render->copy_boxes = gen2_render_copy_boxes;
3482
3483	/* XXX YUV color space conversion for video? */
3484
3485	render->reset = gen2_render_reset;
3486	render->flush = gen2_render_flush;
3487
3488	render->max_3d_size = MAX_3D_SIZE;
3489	render->max_3d_pitch = MAX_3D_PITCH;
3490	return "Almador (gen2)";
3491}
3492