1428d7b3dSmrg/*
2428d7b3dSmrg * Copyright © 2006,2011 Intel Corporation
3428d7b3dSmrg *
4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"),
6428d7b3dSmrg * to deal in the Software without restriction, including without limitation
7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the
9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions:
10428d7b3dSmrg *
11428d7b3dSmrg * The above copyright notice and this permission notice (including the next
12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the
13428d7b3dSmrg * Software.
14428d7b3dSmrg *
15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21428d7b3dSmrg * SOFTWARE.
22428d7b3dSmrg *
23428d7b3dSmrg * Authors:
24428d7b3dSmrg *    Wang Zhenyu <zhenyu.z.wang@intel.com>
25428d7b3dSmrg *    Eric Anholt <eric@anholt.net>
26428d7b3dSmrg *    Chris Wilson <chris@chris-wilson.co.uk>
27428d7b3dSmrg *
28428d7b3dSmrg */
29428d7b3dSmrg
30428d7b3dSmrg#ifdef HAVE_CONFIG_H
31428d7b3dSmrg#include "config.h"
32428d7b3dSmrg#endif
33428d7b3dSmrg
34428d7b3dSmrg#include "sna.h"
35428d7b3dSmrg#include "sna_reg.h"
36428d7b3dSmrg#include "sna_render.h"
37428d7b3dSmrg#include "sna_render_inline.h"
38428d7b3dSmrg
39428d7b3dSmrg#include "gen2_render.h"
40428d7b3dSmrg
41428d7b3dSmrg#define NO_COMPOSITE 0
42428d7b3dSmrg#define NO_COMPOSITE_SPANS 0
43428d7b3dSmrg#define NO_COPY 0
44428d7b3dSmrg#define NO_COPY_BOXES 0
45428d7b3dSmrg#define NO_FILL 0
46428d7b3dSmrg#define NO_FILL_ONE 0
47428d7b3dSmrg#define NO_FILL_BOXES 0
48428d7b3dSmrg
49428d7b3dSmrg#define MAX_3D_SIZE 2048
50428d7b3dSmrg#define MAX_3D_PITCH 8192
51428d7b3dSmrg
52428d7b3dSmrg#define BATCH(v) batch_emit(sna, v)
53428d7b3dSmrg#define BATCH_F(v) batch_emit_float(sna, v)
54428d7b3dSmrg#define VERTEX(v) batch_emit_float(sna, v)
55428d7b3dSmrg
56428d7b3dSmrgstatic const struct blendinfo {
57428d7b3dSmrg	bool dst_alpha;
58428d7b3dSmrg	bool src_alpha;
59428d7b3dSmrg	uint32_t src_blend;
60428d7b3dSmrg	uint32_t dst_blend;
61428d7b3dSmrg} gen2_blend_op[] = {
62428d7b3dSmrg	/* Clear */
63428d7b3dSmrg	{0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO},
64428d7b3dSmrg	/* Src */
65428d7b3dSmrg	{0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO},
66428d7b3dSmrg	/* Dst */
67428d7b3dSmrg	{0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE},
68428d7b3dSmrg	/* Over */
69428d7b3dSmrg	{0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA},
70428d7b3dSmrg	/* OverReverse */
71428d7b3dSmrg	{1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE},
72428d7b3dSmrg	/* In */
73428d7b3dSmrg	{1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO},
74428d7b3dSmrg	/* InReverse */
75428d7b3dSmrg	{0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA},
76428d7b3dSmrg	/* Out */
77428d7b3dSmrg	{1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO},
78428d7b3dSmrg	/* OutReverse */
79428d7b3dSmrg	{0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA},
80428d7b3dSmrg	/* Atop */
81428d7b3dSmrg	{1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
82428d7b3dSmrg	/* AtopReverse */
83428d7b3dSmrg	{1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA},
84428d7b3dSmrg	/* Xor */
85428d7b3dSmrg	{1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
86428d7b3dSmrg	/* Add */
87428d7b3dSmrg	{0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE},
88428d7b3dSmrg};
89428d7b3dSmrg
90428d7b3dSmrgstatic const struct formatinfo {
91428d7b3dSmrg	unsigned int fmt;
92428d7b3dSmrg	uint32_t card_fmt;
93428d7b3dSmrg} i8xx_tex_formats[] = {
94428d7b3dSmrg	{PICT_a8, MAPSURF_8BIT | MT_8BIT_A8},
95428d7b3dSmrg	{PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888},
96428d7b3dSmrg	{PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888},
97428d7b3dSmrg	{PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565},
98428d7b3dSmrg	{PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555},
99428d7b3dSmrg	{PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444},
100428d7b3dSmrg}, i85x_tex_formats[] = {
101428d7b3dSmrg	{PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888},
102428d7b3dSmrg	{PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
103428d7b3dSmrg};
104428d7b3dSmrg
105428d7b3dSmrgstatic inline bool
106428d7b3dSmrgtoo_large(int width, int height)
107428d7b3dSmrg{
108428d7b3dSmrg	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
109428d7b3dSmrg}
110428d7b3dSmrg
111428d7b3dSmrgstatic inline uint32_t
112428d7b3dSmrggen2_buf_tiling(uint32_t tiling)
113428d7b3dSmrg{
114428d7b3dSmrg	uint32_t v = 0;
115428d7b3dSmrg	switch (tiling) {
116428d7b3dSmrg	default: assert(0);
117428d7b3dSmrg	case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
118428d7b3dSmrg	case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
119428d7b3dSmrg	case I915_TILING_NONE: break;
120428d7b3dSmrg	}
121428d7b3dSmrg	return v;
122428d7b3dSmrg}
123428d7b3dSmrg
124428d7b3dSmrgstatic uint32_t
125428d7b3dSmrggen2_get_dst_format(uint32_t format)
126428d7b3dSmrg{
127428d7b3dSmrg#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8)
128428d7b3dSmrg	switch (format) {
129428d7b3dSmrg	default:
130428d7b3dSmrg		assert(0);
131428d7b3dSmrg	case PICT_a8r8g8b8:
132428d7b3dSmrg	case PICT_x8r8g8b8:
133428d7b3dSmrg		return COLR_BUF_ARGB8888 | BIAS;
134428d7b3dSmrg	case PICT_r5g6b5:
135428d7b3dSmrg		return COLR_BUF_RGB565 | BIAS;
136428d7b3dSmrg	case PICT_a1r5g5b5:
137428d7b3dSmrg	case PICT_x1r5g5b5:
138428d7b3dSmrg		return COLR_BUF_ARGB1555 | BIAS;
139428d7b3dSmrg	case PICT_a8:
140428d7b3dSmrg		return COLR_BUF_8BIT | BIAS;
141428d7b3dSmrg	case PICT_a4r4g4b4:
142428d7b3dSmrg	case PICT_x4r4g4b4:
143428d7b3dSmrg		return COLR_BUF_ARGB4444 | BIAS;
144428d7b3dSmrg	}
145428d7b3dSmrg#undef BIAS
146428d7b3dSmrg}
147428d7b3dSmrg
148428d7b3dSmrgstatic bool
149428d7b3dSmrggen2_check_dst_format(uint32_t format)
150428d7b3dSmrg{
151428d7b3dSmrg	switch (format) {
152428d7b3dSmrg	case PICT_a8r8g8b8:
153428d7b3dSmrg	case PICT_x8r8g8b8:
154428d7b3dSmrg	case PICT_r5g6b5:
155428d7b3dSmrg	case PICT_a1r5g5b5:
156428d7b3dSmrg	case PICT_x1r5g5b5:
157428d7b3dSmrg	case PICT_a8:
158428d7b3dSmrg	case PICT_a4r4g4b4:
159428d7b3dSmrg	case PICT_x4r4g4b4:
160428d7b3dSmrg		return true;
161428d7b3dSmrg	default:
162428d7b3dSmrg		return false;
163428d7b3dSmrg	}
164428d7b3dSmrg}
165428d7b3dSmrg
166428d7b3dSmrgstatic uint32_t
167428d7b3dSmrggen2_get_card_format(struct sna *sna, uint32_t format)
168428d7b3dSmrg{
169428d7b3dSmrg	unsigned int i;
170428d7b3dSmrg
171428d7b3dSmrg	for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++)
172428d7b3dSmrg		if (i8xx_tex_formats[i].fmt == format)
173428d7b3dSmrg			return i8xx_tex_formats[i].card_fmt;
174428d7b3dSmrg
175428d7b3dSmrg	if (sna->kgem.gen < 021) {
176428d7b3dSmrg		/* Whilst these are not directly supported on 830/845,
177428d7b3dSmrg		 * we only enable them when we can implicitly convert
178428d7b3dSmrg		 * them to a supported variant through the texture
179428d7b3dSmrg		 * combiners.
180428d7b3dSmrg		 */
181428d7b3dSmrg		for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
182428d7b3dSmrg			if (i85x_tex_formats[i].fmt == format)
183428d7b3dSmrg				return i8xx_tex_formats[1+i].card_fmt;
184428d7b3dSmrg	} else {
185428d7b3dSmrg		for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
186428d7b3dSmrg			if (i85x_tex_formats[i].fmt == format)
187428d7b3dSmrg				return i85x_tex_formats[i].card_fmt;
188428d7b3dSmrg	}
189428d7b3dSmrg
190428d7b3dSmrg	assert(0);
191428d7b3dSmrg	return 0;
192428d7b3dSmrg}
193428d7b3dSmrg
194428d7b3dSmrgstatic uint32_t
195428d7b3dSmrggen2_check_format(struct sna *sna, PicturePtr p)
196428d7b3dSmrg{
197428d7b3dSmrg	unsigned int i;
198428d7b3dSmrg
199428d7b3dSmrg	for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++)
200428d7b3dSmrg		if (i8xx_tex_formats[i].fmt == p->format)
201428d7b3dSmrg			return true;
202428d7b3dSmrg
203428d7b3dSmrg	if (sna->kgem.gen > 021) {
204428d7b3dSmrg		for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++)
205428d7b3dSmrg			if (i85x_tex_formats[i].fmt == p->format)
206428d7b3dSmrg				return true;
207428d7b3dSmrg	}
208428d7b3dSmrg
209428d7b3dSmrg	return false;
210428d7b3dSmrg}
211428d7b3dSmrg
212428d7b3dSmrgstatic uint32_t
213428d7b3dSmrggen2_sampler_tiling_bits(uint32_t tiling)
214428d7b3dSmrg{
215428d7b3dSmrg	uint32_t bits = 0;
216428d7b3dSmrg	switch (tiling) {
217428d7b3dSmrg	default:
218428d7b3dSmrg		assert(0);
219428d7b3dSmrg	case I915_TILING_Y:
220428d7b3dSmrg		bits |= TM0S1_TILE_WALK;
221428d7b3dSmrg	case I915_TILING_X:
222428d7b3dSmrg		bits |= TM0S1_TILED_SURFACE;
223428d7b3dSmrg	case I915_TILING_NONE:
224428d7b3dSmrg		break;
225428d7b3dSmrg	}
226428d7b3dSmrg	return bits;
227428d7b3dSmrg}
228428d7b3dSmrg
229428d7b3dSmrgstatic bool
230428d7b3dSmrggen2_check_filter(PicturePtr picture)
231428d7b3dSmrg{
232428d7b3dSmrg	switch (picture->filter) {
233428d7b3dSmrg	case PictFilterNearest:
234428d7b3dSmrg	case PictFilterBilinear:
235428d7b3dSmrg		return true;
236428d7b3dSmrg	default:
237428d7b3dSmrg		return false;
238428d7b3dSmrg	}
239428d7b3dSmrg}
240428d7b3dSmrg
241428d7b3dSmrgstatic bool
242428d7b3dSmrggen2_check_repeat(PicturePtr picture)
243428d7b3dSmrg{
244428d7b3dSmrg	if (!picture->repeat)
245428d7b3dSmrg		return true;
246428d7b3dSmrg
247428d7b3dSmrg	switch (picture->repeatType) {
248428d7b3dSmrg	case RepeatNone:
249428d7b3dSmrg	case RepeatNormal:
250428d7b3dSmrg	case RepeatPad:
251428d7b3dSmrg	case RepeatReflect:
252428d7b3dSmrg		return true;
253428d7b3dSmrg	default:
254428d7b3dSmrg		return false;
255428d7b3dSmrg	}
256428d7b3dSmrg}
257428d7b3dSmrg
258428d7b3dSmrgstatic void
259428d7b3dSmrggen2_emit_texture(struct sna *sna,
260428d7b3dSmrg		  const struct sna_composite_channel *channel,
261428d7b3dSmrg		  int unit)
262428d7b3dSmrg{
263428d7b3dSmrg	uint32_t wrap_mode_u, wrap_mode_v;
264428d7b3dSmrg	uint32_t texcoordtype;
265428d7b3dSmrg	uint32_t filter;
266428d7b3dSmrg
267428d7b3dSmrg	assert(channel->bo);
268428d7b3dSmrg
269428d7b3dSmrg	if (channel->is_affine)
270428d7b3dSmrg		texcoordtype = TEXCOORDTYPE_CARTESIAN;
271428d7b3dSmrg	else
272428d7b3dSmrg		texcoordtype = TEXCOORDTYPE_HOMOGENEOUS;
273428d7b3dSmrg
274428d7b3dSmrg	switch (channel->repeat) {
275428d7b3dSmrg	default:
276428d7b3dSmrg		assert(0);
277428d7b3dSmrg	case RepeatNone:
278428d7b3dSmrg		wrap_mode_u = TEXCOORDMODE_CLAMP_BORDER;
279428d7b3dSmrg		break;
280428d7b3dSmrg	case RepeatNormal:
281428d7b3dSmrg		wrap_mode_u = TEXCOORDMODE_WRAP;
282428d7b3dSmrg		break;
283428d7b3dSmrg	case RepeatPad:
284428d7b3dSmrg		wrap_mode_u = TEXCOORDMODE_CLAMP;
285428d7b3dSmrg		break;
286428d7b3dSmrg	case RepeatReflect:
287428d7b3dSmrg		wrap_mode_u = TEXCOORDMODE_MIRROR;
288428d7b3dSmrg		break;
289428d7b3dSmrg	}
290428d7b3dSmrg	if (channel->is_linear)
291428d7b3dSmrg		wrap_mode_v = TEXCOORDMODE_WRAP;
292428d7b3dSmrg	else
293428d7b3dSmrg		wrap_mode_v = wrap_mode_u;
294428d7b3dSmrg
295428d7b3dSmrg	switch (channel->filter) {
296428d7b3dSmrg	default:
297428d7b3dSmrg		assert(0);
298428d7b3dSmrg	case PictFilterNearest:
299428d7b3dSmrg		filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT |
300428d7b3dSmrg			  FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT |
301428d7b3dSmrg			  MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
302428d7b3dSmrg		break;
303428d7b3dSmrg	case PictFilterBilinear:
304428d7b3dSmrg		filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT |
305428d7b3dSmrg			  FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT |
306428d7b3dSmrg			  MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
307428d7b3dSmrg		break;
308428d7b3dSmrg	}
309428d7b3dSmrg
310428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(unit) | 4);
311428d7b3dSmrg	BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
312428d7b3dSmrg			     channel->bo,
313428d7b3dSmrg			     I915_GEM_DOMAIN_SAMPLER << 16,
314428d7b3dSmrg			     0));
315428d7b3dSmrg	BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) |
316428d7b3dSmrg	      ((channel->width - 1)  << TM0S1_WIDTH_SHIFT) |
317428d7b3dSmrg	      gen2_get_card_format(sna, channel->pict_format) |
318428d7b3dSmrg	      gen2_sampler_tiling_bits(channel->bo->tiling));
319428d7b3dSmrg	BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D);
320428d7b3dSmrg	BATCH(filter);
321428d7b3dSmrg	BATCH(0);	/* default color */
322428d7b3dSmrg
323428d7b3dSmrg	BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) |
324428d7b3dSmrg	      ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | texcoordtype |
325428d7b3dSmrg	      ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode_v) |
326428d7b3dSmrg	      ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode_u));
327428d7b3dSmrg}
328428d7b3dSmrg
329428d7b3dSmrgstatic void
330428d7b3dSmrggen2_get_blend_factors(const struct sna_composite_op *op,
331428d7b3dSmrg		       int blend,
332428d7b3dSmrg		       uint32_t *c_out,
333428d7b3dSmrg		       uint32_t *a_out)
334428d7b3dSmrg{
335428d7b3dSmrg	uint32_t cblend, ablend;
336428d7b3dSmrg
337428d7b3dSmrg	/* If component alpha is active in the mask and the blend operation
338428d7b3dSmrg	 * uses the source alpha, then we know we don't need the source
339428d7b3dSmrg	 * value (otherwise we would have hit a fallback earlier), so we
340428d7b3dSmrg	 * provide the source alpha (src.A * mask.X) as output color.
341428d7b3dSmrg	 * Conversely, if CA is set and we don't need the source alpha, then
342428d7b3dSmrg	 * we produce the source value (src.X * mask.X) and the source alpha
343428d7b3dSmrg	 * is unused..  Otherwise, we provide the non-CA source value
344428d7b3dSmrg	 * (src.X * mask.A).
345428d7b3dSmrg	 *
346428d7b3dSmrg	 * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8
347428d7b3dSmrg	 * pictures, but we need to implement it for 830/845 and there's no
348428d7b3dSmrg	 * harm done in leaving it in.
349428d7b3dSmrg	 */
350428d7b3dSmrg	cblend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT;
351428d7b3dSmrg	ablend = TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT;
352428d7b3dSmrg
353428d7b3dSmrg	/* Get the source picture's channels into TBx_ARG1 */
354428d7b3dSmrg	if ((op->has_component_alpha && gen2_blend_op[blend].src_alpha) ||
355428d7b3dSmrg	    op->dst.format == PICT_a8) {
356428d7b3dSmrg		/* Producing source alpha value, so the first set of channels
357428d7b3dSmrg		 * is src.A instead of src.X.  We also do this if the destination
358428d7b3dSmrg		 * is a8, in which case src.G is what's written, and the other
359428d7b3dSmrg		 * channels are ignored.
360428d7b3dSmrg		 */
361428d7b3dSmrg		if (op->src.is_opaque) {
362428d7b3dSmrg			ablend |= TB0C_ARG1_SEL_ONE;
363428d7b3dSmrg			cblend |= TB0C_ARG1_SEL_ONE;
364428d7b3dSmrg		} else if (op->src.is_solid) {
365428d7b3dSmrg			ablend |= TB0C_ARG1_SEL_DIFFUSE;
366428d7b3dSmrg			cblend |= TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA;
367428d7b3dSmrg		} else {
368428d7b3dSmrg			ablend |= TB0C_ARG1_SEL_TEXEL0;
369428d7b3dSmrg			cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA;
370428d7b3dSmrg		}
371428d7b3dSmrg	} else {
372428d7b3dSmrg		if (op->src.is_solid)
373428d7b3dSmrg			cblend |= TB0C_ARG1_SEL_DIFFUSE;
374428d7b3dSmrg		else if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
375428d7b3dSmrg			cblend |= TB0C_ARG1_SEL_TEXEL0;
376428d7b3dSmrg		else
377428d7b3dSmrg			cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT;	/* 0.0 */
378428d7b3dSmrg
379428d7b3dSmrg		if (op->src.is_opaque)
380428d7b3dSmrg			ablend |= TB0A_ARG1_SEL_ONE;
381428d7b3dSmrg		else if (op->src.is_solid)
382428d7b3dSmrg			ablend |= TB0A_ARG1_SEL_DIFFUSE;
383428d7b3dSmrg		else
384428d7b3dSmrg			ablend |= TB0A_ARG1_SEL_TEXEL0;
385428d7b3dSmrg	}
386428d7b3dSmrg
387428d7b3dSmrg	if (op->mask.bo) {
388428d7b3dSmrg		if (op->src.is_solid) {
389428d7b3dSmrg			cblend |= TB0C_ARG2_SEL_TEXEL0;
390428d7b3dSmrg			ablend |= TB0A_ARG2_SEL_TEXEL0;
391428d7b3dSmrg		} else {
392428d7b3dSmrg			cblend |= TB0C_ARG2_SEL_TEXEL1;
393428d7b3dSmrg			ablend |= TB0A_ARG2_SEL_TEXEL1;
394428d7b3dSmrg		}
395428d7b3dSmrg
396428d7b3dSmrg		if (op->dst.format == PICT_a8 || !op->has_component_alpha)
397428d7b3dSmrg			cblend |= TB0C_ARG2_REPLICATE_ALPHA;
398428d7b3dSmrg
399428d7b3dSmrg		cblend |= TB0C_OP_MODULATE;
400428d7b3dSmrg		ablend |= TB0A_OP_MODULATE;
401428d7b3dSmrg	} else if (op->mask.is_solid) {
402428d7b3dSmrg		cblend |= TB0C_ARG2_SEL_DIFFUSE;
403428d7b3dSmrg		ablend |= TB0A_ARG2_SEL_DIFFUSE;
404428d7b3dSmrg
405428d7b3dSmrg		if (op->dst.format == PICT_a8 || !op->has_component_alpha)
406428d7b3dSmrg			cblend |= TB0C_ARG2_REPLICATE_ALPHA;
407428d7b3dSmrg
408428d7b3dSmrg		cblend |= TB0C_OP_MODULATE;
409428d7b3dSmrg		ablend |= TB0A_OP_MODULATE;
410428d7b3dSmrg	} else {
411428d7b3dSmrg		cblend |= TB0C_OP_ARG1;
412428d7b3dSmrg		ablend |= TB0A_OP_ARG1;
413428d7b3dSmrg	}
414428d7b3dSmrg
415428d7b3dSmrg	*c_out = cblend;
416428d7b3dSmrg	*a_out = ablend;
417428d7b3dSmrg}
418428d7b3dSmrg
419428d7b3dSmrgstatic uint32_t gen2_get_blend_cntl(int op,
420428d7b3dSmrg				    bool has_component_alpha,
421428d7b3dSmrg				    uint32_t dst_format)
422428d7b3dSmrg{
423428d7b3dSmrg	uint32_t sblend, dblend;
424428d7b3dSmrg
425428d7b3dSmrg	if (op <= PictOpSrc)
426428d7b3dSmrg		return S8_ENABLE_COLOR_BUFFER_WRITE;
427428d7b3dSmrg
428428d7b3dSmrg	sblend = gen2_blend_op[op].src_blend;
429428d7b3dSmrg	dblend = gen2_blend_op[op].dst_blend;
430428d7b3dSmrg
431428d7b3dSmrg	if (gen2_blend_op[op].dst_alpha) {
432428d7b3dSmrg		/* If there's no dst alpha channel, adjust the blend op so that
433428d7b3dSmrg		 * we'll treat it as always 1.
434428d7b3dSmrg		 */
435428d7b3dSmrg		if (PICT_FORMAT_A(dst_format) == 0) {
436428d7b3dSmrg			if (sblend == BLENDFACTOR_DST_ALPHA)
437428d7b3dSmrg				sblend = BLENDFACTOR_ONE;
438428d7b3dSmrg			else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
439428d7b3dSmrg				sblend = BLENDFACTOR_ZERO;
440428d7b3dSmrg		}
441428d7b3dSmrg
442428d7b3dSmrg		/* gen2 engine reads 8bit color buffer into green channel
443428d7b3dSmrg		 * in cases like color buffer blending etc., and also writes
444428d7b3dSmrg		 * back green channel.  So with dst_alpha blend we should use
445428d7b3dSmrg		 * color factor.
446428d7b3dSmrg		 */
447428d7b3dSmrg		if (dst_format == PICT_a8) {
448428d7b3dSmrg			if (sblend == BLENDFACTOR_DST_ALPHA)
449428d7b3dSmrg				sblend = BLENDFACTOR_DST_COLR;
450428d7b3dSmrg			else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
451428d7b3dSmrg				sblend = BLENDFACTOR_INV_DST_COLR;
452428d7b3dSmrg		}
453428d7b3dSmrg	}
454428d7b3dSmrg
455428d7b3dSmrg	/* If the source alpha is being used, then we should only be in a case
456428d7b3dSmrg	 * where the source blend factor is 0, and the source blend value is
457428d7b3dSmrg	 * the mask channels multiplied by the source picture's alpha.
458428d7b3dSmrg	 */
459428d7b3dSmrg	if (has_component_alpha && gen2_blend_op[op].src_alpha) {
460428d7b3dSmrg		if (dblend == BLENDFACTOR_SRC_ALPHA)
461428d7b3dSmrg			dblend = BLENDFACTOR_SRC_COLR;
462428d7b3dSmrg		else if (dblend == BLENDFACTOR_INV_SRC_ALPHA)
463428d7b3dSmrg			dblend = BLENDFACTOR_INV_SRC_COLR;
464428d7b3dSmrg	}
465428d7b3dSmrg
466428d7b3dSmrg	return (sblend << S8_SRC_BLEND_FACTOR_SHIFT |
467428d7b3dSmrg		dblend << S8_DST_BLEND_FACTOR_SHIFT |
468428d7b3dSmrg		S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
469428d7b3dSmrg		S8_ENABLE_COLOR_BUFFER_WRITE);
470428d7b3dSmrg}
471428d7b3dSmrg
472428d7b3dSmrgstatic void gen2_emit_invariant(struct sna *sna)
473428d7b3dSmrg{
474428d7b3dSmrg	int i;
475428d7b3dSmrg
476428d7b3dSmrg	for (i = 0; i < 4; i++) {
477428d7b3dSmrg		BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(i));
478428d7b3dSmrg		BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | MAP_UNIT(i) |
479428d7b3dSmrg		      DISABLE_TEX_STREAM_BUMP |
480428d7b3dSmrg		      ENABLE_TEX_STREAM_COORD_SET | TEX_STREAM_COORD_SET(i) |
481428d7b3dSmrg		      ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(i));
482428d7b3dSmrg		BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
483428d7b3dSmrg		BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(i));
484428d7b3dSmrg	}
485428d7b3dSmrg
486428d7b3dSmrg	BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD);
487428d7b3dSmrg	BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) |
488428d7b3dSmrg	      TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
489428d7b3dSmrg	      TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
490428d7b3dSmrg	      TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
491428d7b3dSmrg
492428d7b3dSmrg	BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
493428d7b3dSmrg
494428d7b3dSmrg	BATCH(_3DSTATE_VERTEX_TRANSFORM);
495428d7b3dSmrg	BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
496428d7b3dSmrg
497428d7b3dSmrg	BATCH(_3DSTATE_W_STATE_CMD);
498428d7b3dSmrg	BATCH(MAGIC_W_STATE_DWORD1);
499428d7b3dSmrg	BATCH_F(1.0);
500428d7b3dSmrg
501428d7b3dSmrg	BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD |
502428d7b3dSmrg	      DISABLE_INDPT_ALPHA_BLEND |
503428d7b3dSmrg	      ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD);
504428d7b3dSmrg
505428d7b3dSmrg	BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
506428d7b3dSmrg	BATCH(0);
507428d7b3dSmrg
508428d7b3dSmrg	BATCH(_3DSTATE_MODES_1_CMD |
509428d7b3dSmrg	      ENABLE_COLR_BLND_FUNC | BLENDFUNC_ADD |
510428d7b3dSmrg	      ENABLE_SRC_BLND_FACTOR | SRC_BLND_FACT(BLENDFACTOR_ONE) |
511428d7b3dSmrg	      ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO));
512428d7b3dSmrg
513428d7b3dSmrg	BATCH(_3DSTATE_ENABLES_1_CMD |
514428d7b3dSmrg	      DISABLE_LOGIC_OP |
515428d7b3dSmrg	      DISABLE_STENCIL_TEST |
516428d7b3dSmrg	      DISABLE_DEPTH_BIAS |
517428d7b3dSmrg	      DISABLE_SPEC_ADD |
518428d7b3dSmrg	      DISABLE_FOG |
519428d7b3dSmrg	      DISABLE_ALPHA_TEST |
520428d7b3dSmrg	      DISABLE_DEPTH_TEST |
521428d7b3dSmrg	      ENABLE_COLOR_BLEND);
522428d7b3dSmrg
523428d7b3dSmrg	BATCH(_3DSTATE_ENABLES_2_CMD |
524428d7b3dSmrg	      DISABLE_STENCIL_WRITE |
525428d7b3dSmrg	      DISABLE_DITHER |
526428d7b3dSmrg	      DISABLE_DEPTH_WRITE |
527428d7b3dSmrg	      ENABLE_COLOR_MASK |
528428d7b3dSmrg	      ENABLE_COLOR_WRITE |
529428d7b3dSmrg	      ENABLE_TEX_CACHE);
530428d7b3dSmrg
531428d7b3dSmrg	BATCH(_3DSTATE_STIPPLE);
532428d7b3dSmrg	BATCH(0);
533428d7b3dSmrg
534428d7b3dSmrg	BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
535428d7b3dSmrg	      TEXPIPE_COLOR |
536428d7b3dSmrg	      ENABLE_TEXOUTPUT_WRT_SEL |
537428d7b3dSmrg	      TEXOP_OUTPUT_CURRENT |
538428d7b3dSmrg	      DISABLE_TEX_CNTRL_STAGE |
539428d7b3dSmrg	      TEXOP_SCALE_1X |
540428d7b3dSmrg	      TEXOP_MODIFY_PARMS | TEXOP_LAST_STAGE |
541428d7b3dSmrg	      TEXBLENDOP_ARG1);
542428d7b3dSmrg	BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
543428d7b3dSmrg	      TEXPIPE_ALPHA |
544428d7b3dSmrg	      ENABLE_TEXOUTPUT_WRT_SEL |
545428d7b3dSmrg	      TEXOP_OUTPUT_CURRENT |
546428d7b3dSmrg	      TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS |
547428d7b3dSmrg	      TEXBLENDOP_ARG1);
548428d7b3dSmrg	BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
549428d7b3dSmrg	      TEXPIPE_COLOR |
550428d7b3dSmrg	      TEXBLEND_ARG1 |
551428d7b3dSmrg	      TEXBLENDARG_MODIFY_PARMS |
552428d7b3dSmrg	      TEXBLENDARG_DIFFUSE);
553428d7b3dSmrg	BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
554428d7b3dSmrg	      TEXPIPE_ALPHA |
555428d7b3dSmrg	      TEXBLEND_ARG1 |
556428d7b3dSmrg	      TEXBLENDARG_MODIFY_PARMS |
557428d7b3dSmrg	      TEXBLENDARG_DIFFUSE);
558428d7b3dSmrg
559428d7b3dSmrg#define INVARIANT_SIZE 35
560428d7b3dSmrg
561428d7b3dSmrg	sna->render_state.gen2.need_invariant = false;
562428d7b3dSmrg}
563428d7b3dSmrg
564428d7b3dSmrgstatic void
565428d7b3dSmrggen2_get_batch(struct sna *sna, const struct sna_composite_op *op)
566428d7b3dSmrg{
567428d7b3dSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
568428d7b3dSmrg
569428d7b3dSmrg	if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40)) {
570428d7b3dSmrg		DBG(("%s: flushing batch: size %d > %d\n",
571428d7b3dSmrg		     __FUNCTION__, INVARIANT_SIZE+40,
572428d7b3dSmrg		     sna->kgem.surface-sna->kgem.nbatch));
573428d7b3dSmrg		kgem_submit(&sna->kgem);
574428d7b3dSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
575428d7b3dSmrg	}
576428d7b3dSmrg
577428d7b3dSmrg	if (!kgem_check_reloc(&sna->kgem, 3)) {
578428d7b3dSmrg		DBG(("%s: flushing batch: reloc %d >= %d\n",
579428d7b3dSmrg		     __FUNCTION__,
580428d7b3dSmrg		     sna->kgem.nreloc + 3,
581428d7b3dSmrg		     (int)KGEM_RELOC_SIZE(&sna->kgem)));
582428d7b3dSmrg		kgem_submit(&sna->kgem);
583428d7b3dSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
584428d7b3dSmrg	}
585428d7b3dSmrg
586428d7b3dSmrg	if (!kgem_check_exec(&sna->kgem, 3)) {
587428d7b3dSmrg		DBG(("%s: flushing batch: exec %d >= %d\n",
588428d7b3dSmrg		     __FUNCTION__,
589428d7b3dSmrg		     sna->kgem.nexec + 1,
590428d7b3dSmrg		     (int)KGEM_EXEC_SIZE(&sna->kgem)));
591428d7b3dSmrg		kgem_submit(&sna->kgem);
592428d7b3dSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
593428d7b3dSmrg	}
594428d7b3dSmrg
595428d7b3dSmrg	if (sna->render_state.gen2.need_invariant)
596428d7b3dSmrg		gen2_emit_invariant(sna);
597428d7b3dSmrg}
598428d7b3dSmrg
599428d7b3dSmrgstatic void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op)
600428d7b3dSmrg{
601428d7b3dSmrg	assert(!too_large(op->dst.width, op->dst.height));
602428d7b3dSmrg	assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH);
603428d7b3dSmrg	assert(sna->render.vertex_offset == 0);
604428d7b3dSmrg
605428d7b3dSmrg	assert(op->dst.bo->unique_id);
606428d7b3dSmrg	if (sna->render_state.gen2.target == op->dst.bo->unique_id) {
607428d7b3dSmrg		kgem_bo_mark_dirty(op->dst.bo);
608428d7b3dSmrg		return;
609428d7b3dSmrg	}
610428d7b3dSmrg
611428d7b3dSmrg	BATCH(_3DSTATE_BUF_INFO_CMD);
612428d7b3dSmrg	BATCH(BUF_3D_ID_COLOR_BACK |
613428d7b3dSmrg	      gen2_buf_tiling(op->dst.bo->tiling) |
614428d7b3dSmrg	      BUF_3D_PITCH(op->dst.bo->pitch));
615428d7b3dSmrg	BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
616428d7b3dSmrg			     op->dst.bo,
617428d7b3dSmrg			     I915_GEM_DOMAIN_RENDER << 16 |
618428d7b3dSmrg			     I915_GEM_DOMAIN_RENDER,
619428d7b3dSmrg			     0));
620428d7b3dSmrg
621428d7b3dSmrg	BATCH(_3DSTATE_DST_BUF_VARS_CMD);
622428d7b3dSmrg	BATCH(gen2_get_dst_format(op->dst.format));
623428d7b3dSmrg
624428d7b3dSmrg	BATCH(_3DSTATE_DRAW_RECT_CMD);
625428d7b3dSmrg	BATCH(0);
626428d7b3dSmrg	BATCH(0);	/* ymin, xmin */
627428d7b3dSmrg	BATCH(DRAW_YMAX(op->dst.height - 1) |
628428d7b3dSmrg	      DRAW_XMAX(op->dst.width - 1));
629428d7b3dSmrg	BATCH(0);	/* yorig, xorig */
630428d7b3dSmrg
631428d7b3dSmrg	sna->render_state.gen2.target = op->dst.bo->unique_id;
632428d7b3dSmrg}
633428d7b3dSmrg
634428d7b3dSmrgstatic void gen2_disable_logic_op(struct sna *sna)
635428d7b3dSmrg{
636428d7b3dSmrg	if (!sna->render_state.gen2.logic_op_enabled)
637428d7b3dSmrg		return;
638428d7b3dSmrg
639428d7b3dSmrg	DBG(("%s\n", __FUNCTION__));
640428d7b3dSmrg
641428d7b3dSmrg	BATCH(_3DSTATE_ENABLES_1_CMD |
642428d7b3dSmrg	      DISABLE_LOGIC_OP | ENABLE_COLOR_BLEND);
643428d7b3dSmrg
644428d7b3dSmrg	sna->render_state.gen2.logic_op_enabled = 0;
645428d7b3dSmrg}
646428d7b3dSmrg
647428d7b3dSmrgstatic void gen2_enable_logic_op(struct sna *sna, int op)
648428d7b3dSmrg{
649428d7b3dSmrg	static const uint8_t logic_op[] = {
650428d7b3dSmrg		LOGICOP_CLEAR,		/* GXclear */
651428d7b3dSmrg		LOGICOP_AND,		/* GXand */
652428d7b3dSmrg		LOGICOP_AND_RVRSE, 	/* GXandReverse */
653428d7b3dSmrg		LOGICOP_COPY,		/* GXcopy */
654428d7b3dSmrg		LOGICOP_AND_INV,	/* GXandInverted */
655428d7b3dSmrg		LOGICOP_NOOP,		/* GXnoop */
656428d7b3dSmrg		LOGICOP_XOR,		/* GXxor */
657428d7b3dSmrg		LOGICOP_OR,		/* GXor */
658428d7b3dSmrg		LOGICOP_NOR,		/* GXnor */
659428d7b3dSmrg		LOGICOP_EQUIV,		/* GXequiv */
660428d7b3dSmrg		LOGICOP_INV,		/* GXinvert */
661428d7b3dSmrg		LOGICOP_OR_RVRSE,	/* GXorReverse */
662428d7b3dSmrg		LOGICOP_COPY_INV,	/* GXcopyInverted */
663428d7b3dSmrg		LOGICOP_OR_INV,		/* GXorInverted */
664428d7b3dSmrg		LOGICOP_NAND,		/* GXnand */
665428d7b3dSmrg		LOGICOP_SET		/* GXset */
666428d7b3dSmrg	};
667428d7b3dSmrg
668428d7b3dSmrg	if (sna->render_state.gen2.logic_op_enabled != op+1) {
669428d7b3dSmrg		if (!sna->render_state.gen2.logic_op_enabled) {
670428d7b3dSmrg			if (op == GXclear || op == GXcopy)
671428d7b3dSmrg				return;
672428d7b3dSmrg
673428d7b3dSmrg			DBG(("%s\n", __FUNCTION__));
674428d7b3dSmrg
675428d7b3dSmrg			BATCH(_3DSTATE_ENABLES_1_CMD |
676428d7b3dSmrg			      ENABLE_LOGIC_OP | DISABLE_COLOR_BLEND);
677428d7b3dSmrg		}
678428d7b3dSmrg
679428d7b3dSmrg		BATCH(_3DSTATE_MODES_4_CMD |
680428d7b3dSmrg		      ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(logic_op[op]));
681428d7b3dSmrg		sna->render_state.gen2.logic_op_enabled = op+1;
682428d7b3dSmrg	}
683428d7b3dSmrg}
684428d7b3dSmrg
685428d7b3dSmrgstatic void gen2_emit_composite_state(struct sna *sna,
686428d7b3dSmrg				      const struct sna_composite_op *op)
687428d7b3dSmrg{
688428d7b3dSmrg	uint32_t texcoordfmt, v, unwind;
689428d7b3dSmrg	uint32_t cblend, ablend;
690428d7b3dSmrg	int tex;
691428d7b3dSmrg
692428d7b3dSmrg	gen2_get_batch(sna, op);
693428d7b3dSmrg
694428d7b3dSmrg	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
695428d7b3dSmrg		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
696428d7b3dSmrg			BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
697428d7b3dSmrg		else
698428d7b3dSmrg			BATCH(_3DSTATE_MODES_5_CMD |
699428d7b3dSmrg			      PIPELINE_FLUSH_RENDER_CACHE |
700428d7b3dSmrg			      PIPELINE_FLUSH_TEXTURE_CACHE);
701428d7b3dSmrg		kgem_clear_dirty(&sna->kgem);
702428d7b3dSmrg	}
703428d7b3dSmrg
704428d7b3dSmrg	gen2_emit_target(sna, op);
705428d7b3dSmrg
706428d7b3dSmrg	unwind = sna->kgem.nbatch;
707428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
708428d7b3dSmrg	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
709428d7b3dSmrg	BATCH((!op->src.is_solid + (op->mask.bo != NULL)) << 12);
710428d7b3dSmrg	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
711428d7b3dSmrg	BATCH(gen2_get_blend_cntl(op->op,
712428d7b3dSmrg				  op->has_component_alpha,
713428d7b3dSmrg				  op->dst.format));
714428d7b3dSmrg	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
715428d7b3dSmrg		   sna->kgem.batch + unwind + 1,
716428d7b3dSmrg		   3 * sizeof(uint32_t)) == 0)
717428d7b3dSmrg		sna->kgem.nbatch = unwind;
718428d7b3dSmrg	else
719428d7b3dSmrg		sna->render_state.gen2.ls1 = unwind;
720428d7b3dSmrg
721428d7b3dSmrg	gen2_disable_logic_op(sna);
722428d7b3dSmrg
723428d7b3dSmrg	gen2_get_blend_factors(op, op->op, &cblend, &ablend);
724428d7b3dSmrg	unwind = sna->kgem.nbatch;
725428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
726428d7b3dSmrg	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
727428d7b3dSmrg	BATCH(cblend);
728428d7b3dSmrg	BATCH(ablend);
729428d7b3dSmrg	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
730428d7b3dSmrg		   sna->kgem.batch + unwind + 1,
731428d7b3dSmrg		   2 * sizeof(uint32_t)) == 0)
732428d7b3dSmrg		sna->kgem.nbatch = unwind;
733428d7b3dSmrg	else
734428d7b3dSmrg		sna->render_state.gen2.ls2 = unwind;
735428d7b3dSmrg
736428d7b3dSmrg	tex = texcoordfmt = 0;
737428d7b3dSmrg	if (!op->src.is_solid) {
738428d7b3dSmrg		if (op->src.is_affine)
739428d7b3dSmrg			texcoordfmt |= TEXCOORDFMT_2D << (2*tex);
740428d7b3dSmrg		else
741428d7b3dSmrg			texcoordfmt |= TEXCOORDFMT_3D << (2*tex);
742428d7b3dSmrg		gen2_emit_texture(sna, &op->src, tex++);
743428d7b3dSmrg	} else {
744428d7b3dSmrg		if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) {
745428d7b3dSmrg			BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
746428d7b3dSmrg			BATCH(op->src.u.gen2.pixel);
747428d7b3dSmrg			sna->render_state.gen2.diffuse = op->src.u.gen2.pixel;
748428d7b3dSmrg		}
749428d7b3dSmrg	}
750428d7b3dSmrg	if (op->mask.bo) {
751428d7b3dSmrg		if (op->mask.is_affine)
752428d7b3dSmrg			texcoordfmt |= TEXCOORDFMT_2D << (2*tex);
753428d7b3dSmrg		else
754428d7b3dSmrg			texcoordfmt |= TEXCOORDFMT_3D << (2*tex);
755428d7b3dSmrg		gen2_emit_texture(sna, &op->mask, tex++);
756428d7b3dSmrg	} else if (op->mask.is_solid) {
757428d7b3dSmrg		if (op->mask.u.gen2.pixel != sna->render_state.gen2.diffuse) {
758428d7b3dSmrg			BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
759428d7b3dSmrg			BATCH(op->mask.u.gen2.pixel);
760428d7b3dSmrg			sna->render_state.gen2.diffuse = op->mask.u.gen2.pixel;
761428d7b3dSmrg		}
762428d7b3dSmrg	}
763428d7b3dSmrg
764428d7b3dSmrg	v = _3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt;
765428d7b3dSmrg	if (sna->render_state.gen2.vft != v) {
766428d7b3dSmrg		BATCH(v);
767428d7b3dSmrg		sna->render_state.gen2.vft = v;
768428d7b3dSmrg	}
769428d7b3dSmrg}
770428d7b3dSmrg
771428d7b3dSmrgstatic inline void
772428d7b3dSmrggen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY)
773428d7b3dSmrg{
774428d7b3dSmrg	VERTEX(dstX);
775428d7b3dSmrg	VERTEX(dstY);
776428d7b3dSmrg}
777428d7b3dSmrg
778428d7b3dSmrginline static void
779428d7b3dSmrggen2_emit_composite_linear(struct sna *sna,
780428d7b3dSmrg			   const struct sna_composite_channel *channel,
781428d7b3dSmrg			   int16_t x, int16_t y)
782428d7b3dSmrg{
783428d7b3dSmrg	float v;
784428d7b3dSmrg
785428d7b3dSmrg	v = (x * channel->u.linear.dx +
786428d7b3dSmrg	     y * channel->u.linear.dy +
787428d7b3dSmrg	     channel->u.linear.offset);
788428d7b3dSmrg	DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v));
789428d7b3dSmrg	VERTEX(v);
790428d7b3dSmrg	VERTEX(v);
791428d7b3dSmrg}
792428d7b3dSmrg
793428d7b3dSmrgstatic void
794428d7b3dSmrggen2_emit_composite_texcoord(struct sna *sna,
795428d7b3dSmrg			     const struct sna_composite_channel *channel,
796428d7b3dSmrg			     int16_t x, int16_t y)
797428d7b3dSmrg{
798428d7b3dSmrg	float s = 0, t = 0, w = 1;
799428d7b3dSmrg
800428d7b3dSmrg	x += channel->offset[0];
801428d7b3dSmrg	y += channel->offset[1];
802428d7b3dSmrg
803428d7b3dSmrg	if (channel->is_affine) {
804428d7b3dSmrg		sna_get_transformed_coordinates(x, y,
805428d7b3dSmrg						channel->transform,
806428d7b3dSmrg						&s, &t);
807428d7b3dSmrg		VERTEX(s * channel->scale[0]);
808428d7b3dSmrg		VERTEX(t * channel->scale[1]);
809428d7b3dSmrg	} else {
810428d7b3dSmrg		sna_get_transformed_coordinates_3d(x, y,
811428d7b3dSmrg						   channel->transform,
812428d7b3dSmrg						   &s, &t, &w);
813428d7b3dSmrg		VERTEX(s * channel->scale[0]);
814428d7b3dSmrg		VERTEX(t * channel->scale[1]);
815428d7b3dSmrg		VERTEX(w);
816428d7b3dSmrg	}
817428d7b3dSmrg}
818428d7b3dSmrg
819428d7b3dSmrgstatic void
820428d7b3dSmrggen2_emit_composite_vertex(struct sna *sna,
821428d7b3dSmrg			   const struct sna_composite_op *op,
822428d7b3dSmrg			   int16_t srcX, int16_t srcY,
823428d7b3dSmrg			   int16_t mskX, int16_t mskY,
824428d7b3dSmrg			   int16_t dstX, int16_t dstY)
825428d7b3dSmrg{
826428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dstX, dstY);
827428d7b3dSmrg	if (op->src.is_linear)
828428d7b3dSmrg		gen2_emit_composite_linear(sna, &op->src, srcX, srcY);
829428d7b3dSmrg	else if (!op->src.is_solid)
830428d7b3dSmrg		gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY);
831428d7b3dSmrg
832428d7b3dSmrg	if (op->mask.is_linear)
833428d7b3dSmrg		gen2_emit_composite_linear(sna, &op->mask, mskX, mskY);
834428d7b3dSmrg	else if (op->mask.bo)
835428d7b3dSmrg		gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY);
836428d7b3dSmrg}
837428d7b3dSmrg
838428d7b3dSmrgfastcall static void
839428d7b3dSmrggen2_emit_composite_primitive(struct sna *sna,
840428d7b3dSmrg			      const struct sna_composite_op *op,
841428d7b3dSmrg			      const struct sna_composite_rectangles *r)
842428d7b3dSmrg{
843428d7b3dSmrg	gen2_emit_composite_vertex(sna, op,
844428d7b3dSmrg				   r->src.x + r->width,
845428d7b3dSmrg				   r->src.y + r->height,
846428d7b3dSmrg				   r->mask.x + r->width,
847428d7b3dSmrg				   r->mask.y + r->height,
848428d7b3dSmrg				   op->dst.x + r->dst.x + r->width,
849428d7b3dSmrg				   op->dst.y + r->dst.y + r->height);
850428d7b3dSmrg	gen2_emit_composite_vertex(sna, op,
851428d7b3dSmrg				   r->src.x,
852428d7b3dSmrg				   r->src.y + r->height,
853428d7b3dSmrg				   r->mask.x,
854428d7b3dSmrg				   r->mask.y + r->height,
855428d7b3dSmrg				   op->dst.x + r->dst.x,
856428d7b3dSmrg				   op->dst.y + r->dst.y + r->height);
857428d7b3dSmrg	gen2_emit_composite_vertex(sna, op,
858428d7b3dSmrg				   r->src.x,
859428d7b3dSmrg				   r->src.y,
860428d7b3dSmrg				   r->mask.x,
861428d7b3dSmrg				   r->mask.y,
862428d7b3dSmrg				   op->dst.x + r->dst.x,
863428d7b3dSmrg				   op->dst.y + r->dst.y);
864428d7b3dSmrg}
865428d7b3dSmrg
866428d7b3dSmrgfastcall static void
867428d7b3dSmrggen2_emit_composite_primitive_constant(struct sna *sna,
868428d7b3dSmrg				       const struct sna_composite_op *op,
869428d7b3dSmrg				       const struct sna_composite_rectangles *r)
870428d7b3dSmrg{
871428d7b3dSmrg	int16_t dst_x = r->dst.x + op->dst.x;
872428d7b3dSmrg	int16_t dst_y = r->dst.y + op->dst.y;
873428d7b3dSmrg
874428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
875428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
876428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
877428d7b3dSmrg}
878428d7b3dSmrg
879428d7b3dSmrgfastcall static void
880428d7b3dSmrggen2_emit_composite_primitive_linear(struct sna *sna,
881428d7b3dSmrg				       const struct sna_composite_op *op,
882428d7b3dSmrg				       const struct sna_composite_rectangles *r)
883428d7b3dSmrg{
884428d7b3dSmrg	int16_t dst_x = r->dst.x + op->dst.x;
885428d7b3dSmrg	int16_t dst_y = r->dst.y + op->dst.y;
886428d7b3dSmrg
887428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
888428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->src,
889428d7b3dSmrg				   r->src.x + r->width, r->src.y + r->height);
890428d7b3dSmrg
891428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
892428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->src,
893428d7b3dSmrg				   r->src.x, r->src.y + r->height);
894428d7b3dSmrg
895428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
896428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->src,
897428d7b3dSmrg				   r->src.x, r->src.y);
898428d7b3dSmrg}
899428d7b3dSmrg
900428d7b3dSmrgfastcall static void
901428d7b3dSmrggen2_emit_composite_primitive_identity(struct sna *sna,
902428d7b3dSmrg				       const struct sna_composite_op *op,
903428d7b3dSmrg				       const struct sna_composite_rectangles *r)
904428d7b3dSmrg{
905428d7b3dSmrg	float w = r->width;
906428d7b3dSmrg	float h = r->height;
907428d7b3dSmrg	float *v;
908428d7b3dSmrg
909428d7b3dSmrg	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
910428d7b3dSmrg	sna->kgem.nbatch += 12;
911428d7b3dSmrg
912428d7b3dSmrg	v[8] = v[4] = r->dst.x + op->dst.x;
913428d7b3dSmrg	v[0] = v[4] + w;
914428d7b3dSmrg
915428d7b3dSmrg	v[9] = r->dst.y + op->dst.y;
916428d7b3dSmrg	v[5] = v[1] = v[9] + h;
917428d7b3dSmrg
918428d7b3dSmrg	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
919428d7b3dSmrg	v[2] = v[6] + w * op->src.scale[0];
920428d7b3dSmrg
921428d7b3dSmrg	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
922428d7b3dSmrg	v[7] = v[3] = v[11] + h * op->src.scale[1];
923428d7b3dSmrg}
924428d7b3dSmrg
925428d7b3dSmrgfastcall static void
926428d7b3dSmrggen2_emit_composite_primitive_affine(struct sna *sna,
927428d7b3dSmrg				     const struct sna_composite_op *op,
928428d7b3dSmrg				     const struct sna_composite_rectangles *r)
929428d7b3dSmrg{
930428d7b3dSmrg	PictTransform *transform = op->src.transform;
931428d7b3dSmrg	int src_x = r->src.x + (int)op->src.offset[0];
932428d7b3dSmrg	int src_y = r->src.y + (int)op->src.offset[1];
933428d7b3dSmrg	float *v;
934428d7b3dSmrg
935428d7b3dSmrg	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
936428d7b3dSmrg	sna->kgem.nbatch += 12;
937428d7b3dSmrg
938428d7b3dSmrg	v[8] = v[4] = r->dst.x + op->dst.x;
939428d7b3dSmrg	v[0] = v[4] + r->width;
940428d7b3dSmrg
941428d7b3dSmrg	v[9] = r->dst.y + op->dst.y;
942428d7b3dSmrg	v[5] = v[1] = v[9] + r->height;
943428d7b3dSmrg
944428d7b3dSmrg	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
945428d7b3dSmrg				    transform, op->src.scale,
946428d7b3dSmrg				    &v[2], &v[3]);
947428d7b3dSmrg
948428d7b3dSmrg	_sna_get_transformed_scaled(src_x, src_y + r->height,
949428d7b3dSmrg				    transform, op->src.scale,
950428d7b3dSmrg				    &v[6], &v[7]);
951428d7b3dSmrg
952428d7b3dSmrg	_sna_get_transformed_scaled(src_x, src_y,
953428d7b3dSmrg				    transform, op->src.scale,
954428d7b3dSmrg				    &v[10], &v[11]);
955428d7b3dSmrg}
956428d7b3dSmrg
957428d7b3dSmrgfastcall static void
958428d7b3dSmrggen2_emit_composite_primitive_constant_identity_mask(struct sna *sna,
959428d7b3dSmrg						     const struct sna_composite_op *op,
960428d7b3dSmrg						     const struct sna_composite_rectangles *r)
961428d7b3dSmrg{
962428d7b3dSmrg	float w = r->width;
963428d7b3dSmrg	float h = r->height;
964428d7b3dSmrg	float *v;
965428d7b3dSmrg
966428d7b3dSmrg	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
967428d7b3dSmrg	sna->kgem.nbatch += 12;
968428d7b3dSmrg
969428d7b3dSmrg	v[8] = v[4] = r->dst.x + op->dst.x;
970428d7b3dSmrg	v[0] = v[4] + w;
971428d7b3dSmrg
972428d7b3dSmrg	v[9] = r->dst.y + op->dst.y;
973428d7b3dSmrg	v[5] = v[1] = v[9] + h;
974428d7b3dSmrg
975428d7b3dSmrg	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
976428d7b3dSmrg	v[2] = v[6] + w * op->mask.scale[0];
977428d7b3dSmrg
978428d7b3dSmrg	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
979428d7b3dSmrg	v[7] = v[3] = v[11] + h * op->mask.scale[1];
980428d7b3dSmrg}
981428d7b3dSmrg
982428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
983428d7b3dSmrgsse2 fastcall static void
984428d7b3dSmrggen2_emit_composite_primitive_constant__sse2(struct sna *sna,
985428d7b3dSmrg					     const struct sna_composite_op *op,
986428d7b3dSmrg					     const struct sna_composite_rectangles *r)
987428d7b3dSmrg{
988428d7b3dSmrg	int16_t dst_x = r->dst.x + op->dst.x;
989428d7b3dSmrg	int16_t dst_y = r->dst.y + op->dst.y;
990428d7b3dSmrg
991428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
992428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
993428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
994428d7b3dSmrg}
995428d7b3dSmrg
996428d7b3dSmrgsse2 fastcall static void
997428d7b3dSmrggen2_emit_composite_primitive_linear__sse2(struct sna *sna,
998428d7b3dSmrg					   const struct sna_composite_op *op,
999428d7b3dSmrg					   const struct sna_composite_rectangles *r)
1000428d7b3dSmrg{
1001428d7b3dSmrg	int16_t dst_x = r->dst.x + op->dst.x;
1002428d7b3dSmrg	int16_t dst_y = r->dst.y + op->dst.y;
1003428d7b3dSmrg
1004428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
1005428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->src,
1006428d7b3dSmrg				   r->src.x + r->width, r->src.y + r->height);
1007428d7b3dSmrg
1008428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
1009428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->src,
1010428d7b3dSmrg				   r->src.x, r->src.y + r->height);
1011428d7b3dSmrg
1012428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, dst_x, dst_y);
1013428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->src,
1014428d7b3dSmrg				   r->src.x, r->src.y);
1015428d7b3dSmrg}
1016428d7b3dSmrg
1017428d7b3dSmrgsse2 fastcall static void
1018428d7b3dSmrggen2_emit_composite_primitive_identity__sse2(struct sna *sna,
1019428d7b3dSmrg					     const struct sna_composite_op *op,
1020428d7b3dSmrg					     const struct sna_composite_rectangles *r)
1021428d7b3dSmrg{
1022428d7b3dSmrg	float w = r->width;
1023428d7b3dSmrg	float h = r->height;
1024428d7b3dSmrg	float *v;
1025428d7b3dSmrg
1026428d7b3dSmrg	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
1027428d7b3dSmrg	sna->kgem.nbatch += 12;
1028428d7b3dSmrg
1029428d7b3dSmrg	v[8] = v[4] = r->dst.x + op->dst.x;
1030428d7b3dSmrg	v[0] = v[4] + w;
1031428d7b3dSmrg
1032428d7b3dSmrg	v[9] = r->dst.y + op->dst.y;
1033428d7b3dSmrg	v[5] = v[1] = v[9] + h;
1034428d7b3dSmrg
1035428d7b3dSmrg	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
1036428d7b3dSmrg	v[2] = v[6] + w * op->src.scale[0];
1037428d7b3dSmrg
1038428d7b3dSmrg	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
1039428d7b3dSmrg	v[7] = v[3] = v[11] + h * op->src.scale[1];
1040428d7b3dSmrg}
1041428d7b3dSmrg
1042428d7b3dSmrgsse2 fastcall static void
1043428d7b3dSmrggen2_emit_composite_primitive_affine__sse2(struct sna *sna,
1044428d7b3dSmrg					   const struct sna_composite_op *op,
1045428d7b3dSmrg					   const struct sna_composite_rectangles *r)
1046428d7b3dSmrg{
1047428d7b3dSmrg	PictTransform *transform = op->src.transform;
1048428d7b3dSmrg	int src_x = r->src.x + (int)op->src.offset[0];
1049428d7b3dSmrg	int src_y = r->src.y + (int)op->src.offset[1];
1050428d7b3dSmrg	float *v;
1051428d7b3dSmrg
1052428d7b3dSmrg	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
1053428d7b3dSmrg	sna->kgem.nbatch += 12;
1054428d7b3dSmrg
1055428d7b3dSmrg	v[8] = v[4] = r->dst.x + op->dst.x;
1056428d7b3dSmrg	v[0] = v[4] + r->width;
1057428d7b3dSmrg
1058428d7b3dSmrg	v[9] = r->dst.y + op->dst.y;
1059428d7b3dSmrg	v[5] = v[1] = v[9] + r->height;
1060428d7b3dSmrg
1061428d7b3dSmrg	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
1062428d7b3dSmrg				    transform, op->src.scale,
1063428d7b3dSmrg				    &v[2], &v[3]);
1064428d7b3dSmrg
1065428d7b3dSmrg	_sna_get_transformed_scaled(src_x, src_y + r->height,
1066428d7b3dSmrg				    transform, op->src.scale,
1067428d7b3dSmrg				    &v[6], &v[7]);
1068428d7b3dSmrg
1069428d7b3dSmrg	_sna_get_transformed_scaled(src_x, src_y,
1070428d7b3dSmrg				    transform, op->src.scale,
1071428d7b3dSmrg				    &v[10], &v[11]);
1072428d7b3dSmrg}
1073428d7b3dSmrg
1074428d7b3dSmrgsse2 fastcall static void
1075428d7b3dSmrggen2_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna,
1076428d7b3dSmrg							   const struct sna_composite_op *op,
1077428d7b3dSmrg							   const struct sna_composite_rectangles *r)
1078428d7b3dSmrg{
1079428d7b3dSmrg	float w = r->width;
1080428d7b3dSmrg	float h = r->height;
1081428d7b3dSmrg	float *v;
1082428d7b3dSmrg
1083428d7b3dSmrg	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
1084428d7b3dSmrg	sna->kgem.nbatch += 12;
1085428d7b3dSmrg
1086428d7b3dSmrg	v[8] = v[4] = r->dst.x + op->dst.x;
1087428d7b3dSmrg	v[0] = v[4] + w;
1088428d7b3dSmrg
1089428d7b3dSmrg	v[9] = r->dst.y + op->dst.y;
1090428d7b3dSmrg	v[5] = v[1] = v[9] + h;
1091428d7b3dSmrg
1092428d7b3dSmrg	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
1093428d7b3dSmrg	v[2] = v[6] + w * op->mask.scale[0];
1094428d7b3dSmrg
1095428d7b3dSmrg	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
1096428d7b3dSmrg	v[7] = v[3] = v[11] + h * op->mask.scale[1];
1097428d7b3dSmrg}
1098428d7b3dSmrg#endif
1099428d7b3dSmrg
1100428d7b3dSmrgstatic void gen2_magic_ca_pass(struct sna *sna,
1101428d7b3dSmrg			       const struct sna_composite_op *op)
1102428d7b3dSmrg{
1103428d7b3dSmrg	uint32_t ablend, cblend, *src, *dst;
1104428d7b3dSmrg	int n;
1105428d7b3dSmrg
1106428d7b3dSmrg	if (!op->need_magic_ca_pass)
1107428d7b3dSmrg		return;
1108428d7b3dSmrg
1109428d7b3dSmrg	DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__,
1110428d7b3dSmrg	     sna->kgem.nbatch, sna->render.vertex_offset));
1111428d7b3dSmrg
1112428d7b3dSmrg	assert(op->mask.bo);
1113428d7b3dSmrg	assert(op->has_component_alpha);
1114428d7b3dSmrg
1115428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 0);
1116428d7b3dSmrg	BATCH(BLENDFACTOR_ONE << S8_SRC_BLEND_FACTOR_SHIFT |
1117428d7b3dSmrg	      BLENDFACTOR_ONE << S8_DST_BLEND_FACTOR_SHIFT |
1118428d7b3dSmrg	      S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
1119428d7b3dSmrg	      S8_ENABLE_COLOR_BUFFER_WRITE);
1120428d7b3dSmrg	sna->render_state.gen2.ls1 = 0;
1121428d7b3dSmrg
1122428d7b3dSmrg	gen2_get_blend_factors(op, PictOpAdd, &cblend, &ablend);
1123428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
1124428d7b3dSmrg	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
1125428d7b3dSmrg	BATCH(cblend);
1126428d7b3dSmrg	BATCH(ablend);
1127428d7b3dSmrg	sna->render_state.gen2.ls2 = 0;
1128428d7b3dSmrg
1129428d7b3dSmrg	src = sna->kgem.batch + sna->render.vertex_offset;
1130428d7b3dSmrg	dst = sna->kgem.batch + sna->kgem.nbatch;
1131428d7b3dSmrg	n = 1 + sna->render.vertex_index;
1132428d7b3dSmrg	sna->kgem.nbatch += n;
1133428d7b3dSmrg	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
1134428d7b3dSmrg	while (n--)
1135428d7b3dSmrg		*dst++ = *src++;
1136428d7b3dSmrg}
1137428d7b3dSmrg
1138428d7b3dSmrgstatic void gen2_vertex_flush(struct sna *sna,
1139428d7b3dSmrg			      const struct sna_composite_op *op)
1140428d7b3dSmrg{
1141428d7b3dSmrg	if (sna->render.vertex_index == 0)
1142428d7b3dSmrg		return;
1143428d7b3dSmrg
1144428d7b3dSmrg	sna->kgem.batch[sna->render.vertex_offset] |=
1145428d7b3dSmrg		sna->render.vertex_index - 1;
1146428d7b3dSmrg
1147428d7b3dSmrg	gen2_magic_ca_pass(sna, op);
1148428d7b3dSmrg
1149428d7b3dSmrg	sna->render.vertex_offset = 0;
1150428d7b3dSmrg	sna->render.vertex_index = 0;
1151428d7b3dSmrg}
1152428d7b3dSmrg
1153428d7b3dSmrginline static int gen2_get_rectangles(struct sna *sna,
1154428d7b3dSmrg				      const struct sna_composite_op *op,
1155428d7b3dSmrg				      int want)
1156428d7b3dSmrg{
1157428d7b3dSmrg	int rem = batch_space(sna), size, need;
1158428d7b3dSmrg
1159428d7b3dSmrg	DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n",
1160428d7b3dSmrg	     __FUNCTION__, want, op->floats_per_vertex, rem));
1161428d7b3dSmrg
1162428d7b3dSmrg	assert(op->floats_per_vertex);
1163428d7b3dSmrg	assert(op->floats_per_rect == 3 * op->floats_per_vertex);
1164428d7b3dSmrg
1165428d7b3dSmrg	need = 1;
1166428d7b3dSmrg	size = op->floats_per_rect;
1167428d7b3dSmrg	if (op->need_magic_ca_pass)
1168428d7b3dSmrg		need += 6 + size*sna->render.vertex_index, size *= 2;
1169428d7b3dSmrg
1170428d7b3dSmrg	DBG(("%s: want=%d, need=%d,size=%d, rem=%d\n",
1171428d7b3dSmrg	     __FUNCTION__, want, need, size, rem));
1172428d7b3dSmrg	if (rem < need + size) {
1173428d7b3dSmrg		gen2_vertex_flush(sna, op);
1174428d7b3dSmrg		kgem_submit(&sna->kgem);
1175428d7b3dSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1176428d7b3dSmrg		return 0;
1177428d7b3dSmrg	}
1178428d7b3dSmrg
1179428d7b3dSmrg	rem -= need;
1180428d7b3dSmrg	if (sna->render.vertex_offset == 0) {
1181428d7b3dSmrg		if ((sna->kgem.batch[sna->kgem.nbatch-1] & ~0xffff) ==
1182428d7b3dSmrg		    (PRIM3D_INLINE | PRIM3D_RECTLIST)) {
1183428d7b3dSmrg			uint32_t *b = &sna->kgem.batch[sna->kgem.nbatch-1];
1184428d7b3dSmrg			assert(*b & 0xffff);
1185428d7b3dSmrg			sna->render.vertex_index = 1 + (*b & 0xffff);
1186428d7b3dSmrg			*b = PRIM3D_INLINE | PRIM3D_RECTLIST;
1187428d7b3dSmrg			sna->render.vertex_offset = sna->kgem.nbatch - 1;
1188428d7b3dSmrg			assert(!op->need_magic_ca_pass);
1189428d7b3dSmrg		} else {
1190428d7b3dSmrg			sna->render.vertex_offset = sna->kgem.nbatch;
1191428d7b3dSmrg			BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
1192428d7b3dSmrg		}
1193428d7b3dSmrg	}
1194428d7b3dSmrg
1195428d7b3dSmrg	if (want > 1 && want * size > rem)
1196428d7b3dSmrg		want = rem / size;
1197428d7b3dSmrg
1198428d7b3dSmrg	assert(want);
1199428d7b3dSmrg	sna->render.vertex_index += want*op->floats_per_rect;
1200428d7b3dSmrg	return want;
1201428d7b3dSmrg}
1202428d7b3dSmrg
1203428d7b3dSmrgfastcall static void
1204428d7b3dSmrggen2_render_composite_blt(struct sna *sna,
1205428d7b3dSmrg			  const struct sna_composite_op *op,
1206428d7b3dSmrg			  const struct sna_composite_rectangles *r)
1207428d7b3dSmrg{
1208428d7b3dSmrg	if (!gen2_get_rectangles(sna, op, 1)) {
1209428d7b3dSmrg		gen2_emit_composite_state(sna, op);
1210428d7b3dSmrg		gen2_get_rectangles(sna, op, 1);
1211428d7b3dSmrg	}
1212428d7b3dSmrg
1213428d7b3dSmrg	op->prim_emit(sna, op, r);
1214428d7b3dSmrg}
1215428d7b3dSmrg
1216428d7b3dSmrgfastcall static void
1217428d7b3dSmrggen2_render_composite_box(struct sna *sna,
1218428d7b3dSmrg			  const struct sna_composite_op *op,
1219428d7b3dSmrg			  const BoxRec *box)
1220428d7b3dSmrg{
1221428d7b3dSmrg	struct sna_composite_rectangles r;
1222428d7b3dSmrg
1223428d7b3dSmrg	if (!gen2_get_rectangles(sna, op, 1)) {
1224428d7b3dSmrg		gen2_emit_composite_state(sna, op);
1225428d7b3dSmrg		gen2_get_rectangles(sna, op, 1);
1226428d7b3dSmrg	}
1227428d7b3dSmrg
1228428d7b3dSmrg	DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1229428d7b3dSmrg	     box->x1, box->y1,
1230428d7b3dSmrg	     box->x2 - box->x1,
1231428d7b3dSmrg	     box->y2 - box->y1));
1232428d7b3dSmrg
1233428d7b3dSmrg	r.dst.x  = box->x1; r.dst.y  = box->y1;
1234428d7b3dSmrg	r.width = box->x2 - box->x1;
1235428d7b3dSmrg	r.height = box->y2 - box->y1;
1236428d7b3dSmrg	r.src = r.mask = r.dst;
1237428d7b3dSmrg
1238428d7b3dSmrg	op->prim_emit(sna, op, &r);
1239428d7b3dSmrg}
1240428d7b3dSmrg
1241428d7b3dSmrgstatic void
1242428d7b3dSmrggen2_render_composite_boxes(struct sna *sna,
1243428d7b3dSmrg			    const struct sna_composite_op *op,
1244428d7b3dSmrg			    const BoxRec *box, int nbox)
1245428d7b3dSmrg{
1246428d7b3dSmrg	do {
1247428d7b3dSmrg		int nbox_this_time;
1248428d7b3dSmrg
1249428d7b3dSmrg		nbox_this_time = gen2_get_rectangles(sna, op, nbox);
1250428d7b3dSmrg		if (nbox_this_time == 0) {
1251428d7b3dSmrg			gen2_emit_composite_state(sna, op);
1252428d7b3dSmrg			nbox_this_time = gen2_get_rectangles(sna, op, nbox);
1253428d7b3dSmrg		}
1254428d7b3dSmrg		nbox -= nbox_this_time;
1255428d7b3dSmrg
1256428d7b3dSmrg		do {
1257428d7b3dSmrg			struct sna_composite_rectangles r;
1258428d7b3dSmrg
1259428d7b3dSmrg			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1260428d7b3dSmrg			     box->x1, box->y1,
1261428d7b3dSmrg			     box->x2 - box->x1,
1262428d7b3dSmrg			     box->y2 - box->y1));
1263428d7b3dSmrg
1264428d7b3dSmrg			r.dst.x  = box->x1; r.dst.y  = box->y1;
1265428d7b3dSmrg			r.width = box->x2 - box->x1;
1266428d7b3dSmrg			r.height = box->y2 - box->y1;
1267428d7b3dSmrg			r.src = r.mask = r.dst;
1268428d7b3dSmrg
1269428d7b3dSmrg			op->prim_emit(sna, op, &r);
1270428d7b3dSmrg			box++;
1271428d7b3dSmrg		} while (--nbox_this_time);
1272428d7b3dSmrg	} while (nbox);
1273428d7b3dSmrg}
1274428d7b3dSmrg
1275428d7b3dSmrgstatic void gen2_render_composite_done(struct sna *sna,
1276428d7b3dSmrg				       const struct sna_composite_op *op)
1277428d7b3dSmrg{
1278428d7b3dSmrg	gen2_vertex_flush(sna, op);
1279428d7b3dSmrg
1280428d7b3dSmrg	if (op->mask.bo)
1281428d7b3dSmrg		kgem_bo_destroy(&sna->kgem, op->mask.bo);
1282428d7b3dSmrg	if (op->src.bo)
1283428d7b3dSmrg		kgem_bo_destroy(&sna->kgem, op->src.bo);
1284428d7b3dSmrg	sna_render_composite_redirect_done(sna, op);
1285428d7b3dSmrg}
1286428d7b3dSmrg
1287428d7b3dSmrgstatic bool
1288428d7b3dSmrggen2_composite_solid_init(struct sna *sna,
1289428d7b3dSmrg			  struct sna_composite_channel *channel,
1290428d7b3dSmrg			  uint32_t color)
1291428d7b3dSmrg{
1292428d7b3dSmrg	channel->filter = PictFilterNearest;
1293428d7b3dSmrg	channel->repeat = RepeatNormal;
1294428d7b3dSmrg	channel->is_solid  = true;
1295428d7b3dSmrg	channel->is_affine = true;
1296428d7b3dSmrg	channel->width  = 1;
1297428d7b3dSmrg	channel->height = 1;
1298428d7b3dSmrg	channel->pict_format = PICT_a8r8g8b8;
1299428d7b3dSmrg
1300428d7b3dSmrg	channel->bo = NULL;
1301428d7b3dSmrg	channel->u.gen2.pixel = color;
1302428d7b3dSmrg
1303428d7b3dSmrg	channel->scale[0]  = channel->scale[1]  = 1;
1304428d7b3dSmrg	channel->offset[0] = channel->offset[1] = 0;
1305428d7b3dSmrg	return true;
1306428d7b3dSmrg}
1307428d7b3dSmrg
1308428d7b3dSmrg#define xFixedToDouble(f) pixman_fixed_to_double(f)
1309428d7b3dSmrg
1310428d7b3dSmrgstatic bool
1311428d7b3dSmrggen2_composite_linear_init(struct sna *sna,
1312428d7b3dSmrg			   PicturePtr picture,
1313428d7b3dSmrg			   struct sna_composite_channel *channel,
1314428d7b3dSmrg			   int x, int y,
1315428d7b3dSmrg			   int w, int h,
1316428d7b3dSmrg			   int dst_x, int dst_y)
1317428d7b3dSmrg{
1318428d7b3dSmrg	PictLinearGradient *linear =
1319428d7b3dSmrg		(PictLinearGradient *)picture->pSourcePict;
1320428d7b3dSmrg	pixman_fixed_t tx, ty;
1321428d7b3dSmrg	float x0, y0, sf;
1322428d7b3dSmrg	float dx, dy;
1323428d7b3dSmrg
1324428d7b3dSmrg	DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
1325428d7b3dSmrg	     __FUNCTION__,
1326428d7b3dSmrg	     xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
1327428d7b3dSmrg	     xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
1328428d7b3dSmrg
1329428d7b3dSmrg	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
1330428d7b3dSmrg		return 0;
1331428d7b3dSmrg
1332428d7b3dSmrg	if (!sna_transform_is_affine(picture->transform)) {
1333428d7b3dSmrg		DBG(("%s: fallback due to projective transform\n",
1334428d7b3dSmrg		     __FUNCTION__));
1335428d7b3dSmrg		return sna_render_picture_fixup(sna, picture, channel,
1336428d7b3dSmrg						x, y, w, h, dst_x, dst_y);
1337428d7b3dSmrg	}
1338428d7b3dSmrg
1339428d7b3dSmrg	channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear);
1340428d7b3dSmrg	if (!channel->bo)
1341428d7b3dSmrg		return 0;
1342428d7b3dSmrg
1343428d7b3dSmrg	channel->filter = PictFilterNearest;
1344428d7b3dSmrg	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1345428d7b3dSmrg	channel->is_linear = true;
1346428d7b3dSmrg	channel->width  = channel->bo->pitch / 4;
1347428d7b3dSmrg	channel->height = 1;
1348428d7b3dSmrg	channel->pict_format = PICT_a8r8g8b8;
1349428d7b3dSmrg
1350428d7b3dSmrg	channel->scale[0]  = channel->scale[1]  = 1;
1351428d7b3dSmrg	channel->offset[0] = channel->offset[1] = 0;
1352428d7b3dSmrg
1353428d7b3dSmrg	if (sna_transform_is_translation(picture->transform, &tx, &ty)) {
1354428d7b3dSmrg		dx = xFixedToDouble(linear->p2.x - linear->p1.x);
1355428d7b3dSmrg		dy = xFixedToDouble(linear->p2.y - linear->p1.y);
1356428d7b3dSmrg
1357428d7b3dSmrg		x0 = xFixedToDouble(linear->p1.x);
1358428d7b3dSmrg		y0 = xFixedToDouble(linear->p1.y);
1359428d7b3dSmrg
1360428d7b3dSmrg		if (tx | ty) {
1361428d7b3dSmrg			x0 -= pixman_fixed_to_double(tx);
1362428d7b3dSmrg			y0 -= pixman_fixed_to_double(ty);
1363428d7b3dSmrg		}
1364428d7b3dSmrg	} else {
1365428d7b3dSmrg		struct pixman_f_vector p1, p2;
1366428d7b3dSmrg		struct pixman_f_transform m, inv;
1367428d7b3dSmrg
1368428d7b3dSmrg		pixman_f_transform_from_pixman_transform(&m, picture->transform);
1369428d7b3dSmrg		DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n",
1370428d7b3dSmrg		     __FUNCTION__,
1371428d7b3dSmrg		     m.m[0][0], m.m[0][1], m.m[0][2],
1372428d7b3dSmrg		     m.m[1][0], m.m[1][1], m.m[1][2],
1373428d7b3dSmrg		     m.m[2][0], m.m[2][1], m.m[2][2]));
1374428d7b3dSmrg		if (!pixman_f_transform_invert(&inv, &m))
1375428d7b3dSmrg			return 0;
1376428d7b3dSmrg
1377428d7b3dSmrg		p1.v[0] = pixman_fixed_to_double(linear->p1.x);
1378428d7b3dSmrg		p1.v[1] = pixman_fixed_to_double(linear->p1.y);
1379428d7b3dSmrg		p1.v[2] = 1.;
1380428d7b3dSmrg		pixman_f_transform_point(&inv, &p1);
1381428d7b3dSmrg
1382428d7b3dSmrg		p2.v[0] = pixman_fixed_to_double(linear->p2.x);
1383428d7b3dSmrg		p2.v[1] = pixman_fixed_to_double(linear->p2.y);
1384428d7b3dSmrg		p2.v[2] = 1.;
1385428d7b3dSmrg		pixman_f_transform_point(&inv, &p2);
1386428d7b3dSmrg
1387428d7b3dSmrg		DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n",
1388428d7b3dSmrg		     __FUNCTION__,
1389428d7b3dSmrg		     p1.v[0], p1.v[1], p1.v[2],
1390428d7b3dSmrg		     p2.v[0], p2.v[1], p2.v[2]));
1391428d7b3dSmrg
1392428d7b3dSmrg		dx = p2.v[0] - p1.v[0];
1393428d7b3dSmrg		dy = p2.v[1] - p1.v[1];
1394428d7b3dSmrg
1395428d7b3dSmrg		x0 = p1.v[0];
1396428d7b3dSmrg		y0 = p1.v[1];
1397428d7b3dSmrg	}
1398428d7b3dSmrg
1399428d7b3dSmrg	sf = dx*dx + dy*dy;
1400428d7b3dSmrg	dx /= sf;
1401428d7b3dSmrg	dy /= sf;
1402428d7b3dSmrg
1403428d7b3dSmrg	channel->u.linear.dx = dx;
1404428d7b3dSmrg	channel->u.linear.dy = dy;
1405428d7b3dSmrg	channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y);
1406428d7b3dSmrg
1407428d7b3dSmrg	DBG(("%s: dx=%f, dy=%f, offset=%f\n",
1408428d7b3dSmrg	     __FUNCTION__, dx, dy, channel->u.linear.offset));
1409428d7b3dSmrg
1410428d7b3dSmrg	return channel->bo != NULL;
1411428d7b3dSmrg}
1412428d7b3dSmrg
1413428d7b3dSmrgstatic bool source_is_covered(PicturePtr picture,
1414428d7b3dSmrg			      int x, int y,
1415428d7b3dSmrg			      int width, int height)
1416428d7b3dSmrg{
1417428d7b3dSmrg	int x1, y1, x2, y2;
1418428d7b3dSmrg
1419428d7b3dSmrg	if (picture->repeat && picture->repeatType != RepeatNone)
1420428d7b3dSmrg		return true;
1421428d7b3dSmrg
1422428d7b3dSmrg	if (picture->pDrawable == NULL)
1423428d7b3dSmrg		return false;
1424428d7b3dSmrg
1425428d7b3dSmrg	if (picture->transform) {
1426428d7b3dSmrg		pixman_box16_t sample;
1427428d7b3dSmrg
1428428d7b3dSmrg		sample.x1 = x;
1429428d7b3dSmrg		sample.y1 = y;
1430428d7b3dSmrg		sample.x2 = x + width;
1431428d7b3dSmrg		sample.y2 = y + height;
1432428d7b3dSmrg
1433428d7b3dSmrg		pixman_transform_bounds(picture->transform, &sample);
1434428d7b3dSmrg
1435428d7b3dSmrg		x1 = sample.x1;
1436428d7b3dSmrg		x2 = sample.x2;
1437428d7b3dSmrg		y1 = sample.y1;
1438428d7b3dSmrg		y2 = sample.y2;
1439428d7b3dSmrg	} else {
1440428d7b3dSmrg		x1 = x;
1441428d7b3dSmrg		y1 = y;
1442428d7b3dSmrg		x2 = x + width;
1443428d7b3dSmrg		y2 = y + height;
1444428d7b3dSmrg	}
1445428d7b3dSmrg
1446428d7b3dSmrg	return
1447428d7b3dSmrg		x1 >= 0 && y1 >= 0 &&
1448428d7b3dSmrg		x2 <= picture->pDrawable->width &&
1449428d7b3dSmrg		y2 <= picture->pDrawable->height;
1450428d7b3dSmrg}
1451428d7b3dSmrg
1452428d7b3dSmrgstatic bool
1453428d7b3dSmrggen2_check_card_format(struct sna *sna,
1454428d7b3dSmrg		       PicturePtr picture,
1455428d7b3dSmrg		       struct sna_composite_channel *channel,
1456428d7b3dSmrg		       int x, int y, int w, int h,
1457428d7b3dSmrg		       bool *fixup_alpha)
1458428d7b3dSmrg{
1459428d7b3dSmrg	uint32_t format = picture->format;
1460428d7b3dSmrg	unsigned int i;
1461428d7b3dSmrg
1462428d7b3dSmrg	for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) {
1463428d7b3dSmrg		if (i8xx_tex_formats[i].fmt == format)
1464428d7b3dSmrg			return true;
1465428d7b3dSmrg	}
1466428d7b3dSmrg
1467428d7b3dSmrg	for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) {
1468428d7b3dSmrg		if (i85x_tex_formats[i].fmt == format) {
1469428d7b3dSmrg			if (sna->kgem.gen >= 021)
1470428d7b3dSmrg				return true;
1471428d7b3dSmrg
1472428d7b3dSmrg			if (source_is_covered(picture, x, y, w,h)) {
1473428d7b3dSmrg				channel->is_opaque = true;
1474428d7b3dSmrg				return true;
1475428d7b3dSmrg			}
1476428d7b3dSmrg
1477428d7b3dSmrg			*fixup_alpha = true;
1478428d7b3dSmrg			return false;
1479428d7b3dSmrg		}
1480428d7b3dSmrg	}
1481428d7b3dSmrg
1482428d7b3dSmrg	*fixup_alpha = false;
1483428d7b3dSmrg	return false;
1484428d7b3dSmrg}
1485428d7b3dSmrg
1486428d7b3dSmrgstatic int
1487428d7b3dSmrggen2_composite_picture(struct sna *sna,
1488428d7b3dSmrg		       PicturePtr picture,
1489428d7b3dSmrg		       struct sna_composite_channel *channel,
1490428d7b3dSmrg		       int x, int y,
1491428d7b3dSmrg		       int w, int h,
1492428d7b3dSmrg		       int dst_x, int dst_y,
1493428d7b3dSmrg		       bool precise)
1494428d7b3dSmrg{
1495428d7b3dSmrg	PixmapPtr pixmap;
1496428d7b3dSmrg	uint32_t color;
1497428d7b3dSmrg	int16_t dx, dy;
1498428d7b3dSmrg	bool fixup_alpha;
1499428d7b3dSmrg
1500428d7b3dSmrg	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1501428d7b3dSmrg	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1502428d7b3dSmrg
1503428d7b3dSmrg	channel->is_solid = false;
1504428d7b3dSmrg	channel->is_linear = false;
1505428d7b3dSmrg	channel->is_opaque = false;
1506428d7b3dSmrg	channel->is_affine = true;
1507428d7b3dSmrg	channel->transform = NULL;
1508428d7b3dSmrg	channel->card_format = -1;
1509428d7b3dSmrg
1510428d7b3dSmrg	if (sna_picture_is_solid(picture, &color))
1511428d7b3dSmrg		return gen2_composite_solid_init(sna, channel, color);
1512428d7b3dSmrg
1513428d7b3dSmrg	if (!gen2_check_repeat(picture)) {
1514428d7b3dSmrg		DBG(("%s -- fallback, unhandled repeat %d\n",
1515428d7b3dSmrg		     __FUNCTION__, picture->repeat));
1516428d7b3dSmrg		return sna_render_picture_fixup(sna, picture, channel,
1517428d7b3dSmrg						x, y, w, h, dst_x, dst_y);
1518428d7b3dSmrg	}
1519428d7b3dSmrg
1520428d7b3dSmrg	if (!gen2_check_filter(picture)) {
1521428d7b3dSmrg		DBG(("%s -- fallback, unhandled filter %d\n",
1522428d7b3dSmrg		     __FUNCTION__, picture->filter));
1523428d7b3dSmrg		return sna_render_picture_fixup(sna, picture, channel,
1524428d7b3dSmrg						x, y, w, h, dst_x, dst_y);
1525428d7b3dSmrg	}
1526428d7b3dSmrg
1527428d7b3dSmrg	if (picture->pDrawable == NULL) {
1528428d7b3dSmrg		int ret;
1529428d7b3dSmrg
1530428d7b3dSmrg		if (picture->pSourcePict->type == SourcePictTypeLinear)
1531428d7b3dSmrg			return gen2_composite_linear_init(sna, picture, channel,
1532428d7b3dSmrg							  x, y,
1533428d7b3dSmrg							  w, h,
1534428d7b3dSmrg							  dst_x, dst_y);
1535428d7b3dSmrg
1536428d7b3dSmrg		DBG(("%s -- fallback, unhandled source %d\n",
1537428d7b3dSmrg		     __FUNCTION__, picture->pSourcePict->type));
1538428d7b3dSmrg		ret = -1;
1539428d7b3dSmrg		if (!precise)
1540428d7b3dSmrg			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1541428d7b3dSmrg								      x, y, w, h, dst_x, dst_y);
1542428d7b3dSmrg		if (ret == -1)
1543428d7b3dSmrg			ret = sna_render_picture_fixup(sna, picture, channel,
1544428d7b3dSmrg						       x, y, w, h, dst_x, dst_y);
1545428d7b3dSmrg		return ret;
1546428d7b3dSmrg	}
1547428d7b3dSmrg
1548428d7b3dSmrg	if (picture->alphaMap) {
1549428d7b3dSmrg		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1550428d7b3dSmrg		return sna_render_picture_fixup(sna, picture, channel,
1551428d7b3dSmrg						x, y, w, h, dst_x, dst_y);
1552428d7b3dSmrg	}
1553428d7b3dSmrg
1554428d7b3dSmrg	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1555428d7b3dSmrg	channel->filter = picture->filter;
1556428d7b3dSmrg
1557428d7b3dSmrg	pixmap = get_drawable_pixmap(picture->pDrawable);
1558428d7b3dSmrg	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1559428d7b3dSmrg
1560428d7b3dSmrg	x += dx + picture->pDrawable->x;
1561428d7b3dSmrg	y += dy + picture->pDrawable->y;
1562428d7b3dSmrg
1563428d7b3dSmrg	channel->is_affine = sna_transform_is_affine(picture->transform);
1564428d7b3dSmrg	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
1565428d7b3dSmrg		DBG(("%s: integer translation (%d, %d), removing\n",
1566428d7b3dSmrg		     __FUNCTION__, dx, dy));
1567428d7b3dSmrg		x += dx;
1568428d7b3dSmrg		y += dy;
1569428d7b3dSmrg		channel->transform = NULL;
1570428d7b3dSmrg		channel->filter = PictFilterNearest;
1571428d7b3dSmrg
1572428d7b3dSmrg		if (channel->repeat &&
1573428d7b3dSmrg		    (x >= 0 &&
1574428d7b3dSmrg		     y >= 0 &&
1575428d7b3dSmrg		     x + w < pixmap->drawable.width &&
1576428d7b3dSmrg		     y + h < pixmap->drawable.height)) {
1577428d7b3dSmrg			struct sna_pixmap *priv = sna_pixmap(pixmap);
1578428d7b3dSmrg			if (priv && priv->clear) {
1579428d7b3dSmrg				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
1580428d7b3dSmrg				return gen2_composite_solid_init(sna, channel, priv->clear_color);
1581428d7b3dSmrg			}
1582428d7b3dSmrg		}
1583428d7b3dSmrg	} else
1584428d7b3dSmrg		channel->transform = picture->transform;
1585428d7b3dSmrg
1586428d7b3dSmrg	if (!gen2_check_card_format(sna, picture, channel, x,  y, w ,h, &fixup_alpha))
1587428d7b3dSmrg		return sna_render_picture_convert(sna, picture, channel, pixmap,
1588428d7b3dSmrg						  x, y, w, h, dst_x, dst_y, fixup_alpha);
1589428d7b3dSmrg
1590428d7b3dSmrg	channel->pict_format = picture->format;
1591428d7b3dSmrg	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
1592428d7b3dSmrg		return sna_render_picture_extract(sna, picture, channel,
1593428d7b3dSmrg						  x, y, w, h, dst_x, dst_y);
1594428d7b3dSmrg
1595428d7b3dSmrg	return sna_render_pixmap_bo(sna, channel, pixmap,
1596428d7b3dSmrg				    x, y, w, h, dst_x, dst_y);
1597428d7b3dSmrg}
1598428d7b3dSmrg
1599428d7b3dSmrgstatic bool
1600428d7b3dSmrggen2_composite_set_target(struct sna *sna,
1601428d7b3dSmrg			  struct sna_composite_op *op,
1602428d7b3dSmrg			  PicturePtr dst,
1603428d7b3dSmrg			  int x, int y, int w, int h,
1604428d7b3dSmrg			  bool partial)
1605428d7b3dSmrg{
1606428d7b3dSmrg	BoxRec box;
1607428d7b3dSmrg	unsigned hint;
1608428d7b3dSmrg
1609428d7b3dSmrg	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1610428d7b3dSmrg	op->dst.format = dst->format;
1611428d7b3dSmrg	op->dst.width = op->dst.pixmap->drawable.width;
1612428d7b3dSmrg	op->dst.height = op->dst.pixmap->drawable.height;
1613428d7b3dSmrg
1614428d7b3dSmrg	if (w && h) {
1615428d7b3dSmrg		box.x1 = x;
1616428d7b3dSmrg		box.y1 = y;
1617428d7b3dSmrg		box.x2 = x + w;
1618428d7b3dSmrg		box.y2 = y + h;
1619428d7b3dSmrg	} else
1620428d7b3dSmrg		sna_render_picture_extents(dst, &box);
1621428d7b3dSmrg
1622428d7b3dSmrg	hint = PREFER_GPU | FORCE_GPU | RENDER_GPU;
1623428d7b3dSmrg	if (!partial) {
1624428d7b3dSmrg		hint |= IGNORE_DAMAGE;
1625428d7b3dSmrg		if (w == op->dst.width && h == op->dst.height)
1626428d7b3dSmrg			hint |= REPLACES;
1627428d7b3dSmrg	}
1628428d7b3dSmrg
1629428d7b3dSmrg	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
1630428d7b3dSmrg	if (op->dst.bo == NULL)
1631428d7b3dSmrg		return false;
1632428d7b3dSmrg
1633428d7b3dSmrg	if (hint & REPLACES) {
1634428d7b3dSmrg		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1635428d7b3dSmrg		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
1636428d7b3dSmrg	}
1637428d7b3dSmrg
1638428d7b3dSmrg	assert((op->dst.bo->pitch & 7) == 0);
1639428d7b3dSmrg
1640428d7b3dSmrg	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1641428d7b3dSmrg			    &op->dst.x, &op->dst.y);
1642428d7b3dSmrg
1643428d7b3dSmrg	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1644428d7b3dSmrg	     __FUNCTION__,
1645428d7b3dSmrg	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
1646428d7b3dSmrg	     op->dst.width, op->dst.height,
1647428d7b3dSmrg	     op->dst.bo->pitch,
1648428d7b3dSmrg	     op->dst.x, op->dst.y,
1649428d7b3dSmrg	     op->damage ? *op->damage : (void *)-1));
1650428d7b3dSmrg
1651428d7b3dSmrg	assert(op->dst.bo->proxy == NULL);
1652428d7b3dSmrg
1653428d7b3dSmrg	if (((too_large(op->dst.width, op->dst.height) ||
1654428d7b3dSmrg	      op->dst.bo->pitch > MAX_3D_PITCH)) &&
1655428d7b3dSmrg	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1656428d7b3dSmrg		return false;
1657428d7b3dSmrg
1658428d7b3dSmrg	return true;
1659428d7b3dSmrg}
1660428d7b3dSmrg
1661428d7b3dSmrgstatic bool
1662428d7b3dSmrgis_unhandled_gradient(PicturePtr picture, bool precise)
1663428d7b3dSmrg{
1664428d7b3dSmrg	if (picture->pDrawable)
1665428d7b3dSmrg		return false;
1666428d7b3dSmrg
1667428d7b3dSmrg	switch (picture->pSourcePict->type) {
1668428d7b3dSmrg	case SourcePictTypeSolidFill:
1669428d7b3dSmrg	case SourcePictTypeLinear:
1670428d7b3dSmrg		return false;
1671428d7b3dSmrg	default:
1672428d7b3dSmrg		return precise;
1673428d7b3dSmrg	}
1674428d7b3dSmrg}
1675428d7b3dSmrg
1676428d7b3dSmrgstatic bool
1677428d7b3dSmrghas_alphamap(PicturePtr p)
1678428d7b3dSmrg{
1679428d7b3dSmrg	return p->alphaMap != NULL;
1680428d7b3dSmrg}
1681428d7b3dSmrg
1682428d7b3dSmrgstatic bool
1683428d7b3dSmrgneed_upload(PicturePtr p)
1684428d7b3dSmrg{
1685428d7b3dSmrg	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
1686428d7b3dSmrg}
1687428d7b3dSmrg
1688428d7b3dSmrgstatic bool
1689428d7b3dSmrgsource_is_busy(PixmapPtr pixmap)
1690428d7b3dSmrg{
1691428d7b3dSmrg	struct sna_pixmap *priv = sna_pixmap(pixmap);
1692428d7b3dSmrg	if (priv == NULL)
1693428d7b3dSmrg		return false;
1694428d7b3dSmrg
1695428d7b3dSmrg	if (priv->clear)
1696428d7b3dSmrg		return false;
1697428d7b3dSmrg
1698428d7b3dSmrg	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1699428d7b3dSmrg		return true;
1700428d7b3dSmrg
1701428d7b3dSmrg	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1702428d7b3dSmrg		return true;
1703428d7b3dSmrg
1704428d7b3dSmrg	return priv->gpu_damage && !priv->cpu_damage;
1705428d7b3dSmrg}
1706428d7b3dSmrg
1707428d7b3dSmrgstatic bool
1708428d7b3dSmrgsource_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
1709428d7b3dSmrg{
1710428d7b3dSmrg	if (sna_picture_is_solid(p, NULL))
1711428d7b3dSmrg		return false;
1712428d7b3dSmrg
1713428d7b3dSmrg	if (is_unhandled_gradient(p, precise) || !gen2_check_repeat(p))
1714428d7b3dSmrg		return true;
1715428d7b3dSmrg
1716428d7b3dSmrg	if (pixmap && source_is_busy(pixmap))
1717428d7b3dSmrg		return false;
1718428d7b3dSmrg
1719428d7b3dSmrg	return has_alphamap(p) || !gen2_check_filter(p) || need_upload(p);
1720428d7b3dSmrg}
1721428d7b3dSmrg
1722428d7b3dSmrgstatic bool
1723428d7b3dSmrggen2_composite_fallback(struct sna *sna,
1724428d7b3dSmrg			PicturePtr src,
1725428d7b3dSmrg			PicturePtr mask,
1726428d7b3dSmrg			PicturePtr dst)
1727428d7b3dSmrg{
1728428d7b3dSmrg	PixmapPtr src_pixmap;
1729428d7b3dSmrg	PixmapPtr mask_pixmap;
1730428d7b3dSmrg	PixmapPtr dst_pixmap;
1731428d7b3dSmrg	bool src_fallback, mask_fallback;
1732428d7b3dSmrg
1733428d7b3dSmrg	if (!gen2_check_dst_format(dst->format)) {
1734428d7b3dSmrg		DBG(("%s: unknown destination format: %d\n",
1735428d7b3dSmrg		     __FUNCTION__, dst->format));
1736428d7b3dSmrg		return true;
1737428d7b3dSmrg	}
1738428d7b3dSmrg
1739428d7b3dSmrg	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
1740428d7b3dSmrg
1741428d7b3dSmrg	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1742428d7b3dSmrg	src_fallback = source_fallback(src, src_pixmap,
1743428d7b3dSmrg				       dst->polyMode == PolyModePrecise);
1744428d7b3dSmrg
1745428d7b3dSmrg	if (mask) {
1746428d7b3dSmrg		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1747428d7b3dSmrg		mask_fallback = source_fallback(mask, mask_pixmap,
1748428d7b3dSmrg						dst->polyMode == PolyModePrecise);
1749428d7b3dSmrg	} else {
1750428d7b3dSmrg		mask_pixmap = NULL;
1751428d7b3dSmrg		mask_fallback = NULL;
1752428d7b3dSmrg	}
1753428d7b3dSmrg
1754428d7b3dSmrg	/* If we are using the destination as a source and need to
1755428d7b3dSmrg	 * readback in order to upload the source, do it all
1756428d7b3dSmrg	 * on the cpu.
1757428d7b3dSmrg	 */
1758428d7b3dSmrg	if (src_pixmap == dst_pixmap && src_fallback) {
1759428d7b3dSmrg		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1760428d7b3dSmrg		return true;
1761428d7b3dSmrg	}
1762428d7b3dSmrg	if (mask_pixmap == dst_pixmap && mask_fallback) {
1763428d7b3dSmrg		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
1764428d7b3dSmrg		return true;
1765428d7b3dSmrg	}
1766428d7b3dSmrg
1767428d7b3dSmrg	/* If anything is on the GPU, push everything out to the GPU */
1768428d7b3dSmrg	if (dst_use_gpu(dst_pixmap)) {
1769428d7b3dSmrg		DBG(("%s: dst is already on the GPU, try to use GPU\n",
1770428d7b3dSmrg		     __FUNCTION__));
1771428d7b3dSmrg		return false;
1772428d7b3dSmrg	}
1773428d7b3dSmrg
1774428d7b3dSmrg	if (src_pixmap && !src_fallback) {
1775428d7b3dSmrg		DBG(("%s: src is already on the GPU, try to use GPU\n",
1776428d7b3dSmrg		     __FUNCTION__));
1777428d7b3dSmrg		return false;
1778428d7b3dSmrg	}
1779428d7b3dSmrg	if (mask_pixmap && !mask_fallback) {
1780428d7b3dSmrg		DBG(("%s: mask is already on the GPU, try to use GPU\n",
1781428d7b3dSmrg		     __FUNCTION__));
1782428d7b3dSmrg		return false;
1783428d7b3dSmrg	}
1784428d7b3dSmrg
1785428d7b3dSmrg	/* However if the dst is not on the GPU and we need to
1786428d7b3dSmrg	 * render one of the sources using the CPU, we may
1787428d7b3dSmrg	 * as well do the entire operation in place onthe CPU.
1788428d7b3dSmrg	 */
1789428d7b3dSmrg	if (src_fallback) {
1790428d7b3dSmrg		DBG(("%s: dst is on the CPU and src will fallback\n",
1791428d7b3dSmrg		     __FUNCTION__));
1792428d7b3dSmrg		return true;
1793428d7b3dSmrg	}
1794428d7b3dSmrg
1795428d7b3dSmrg	if (mask && mask_fallback) {
1796428d7b3dSmrg		DBG(("%s: dst is on the CPU and mask will fallback\n",
1797428d7b3dSmrg		     __FUNCTION__));
1798428d7b3dSmrg		return true;
1799428d7b3dSmrg	}
1800428d7b3dSmrg
1801428d7b3dSmrg	if (too_large(dst_pixmap->drawable.width,
1802428d7b3dSmrg		      dst_pixmap->drawable.height) &&
1803428d7b3dSmrg	    dst_is_cpu(dst_pixmap)) {
1804428d7b3dSmrg		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
1805428d7b3dSmrg		return true;
1806428d7b3dSmrg	}
1807428d7b3dSmrg
1808428d7b3dSmrg	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
1809428d7b3dSmrg	     __FUNCTION__));
1810428d7b3dSmrg	return dst_use_cpu(dst_pixmap);
1811428d7b3dSmrg}
1812428d7b3dSmrg
1813428d7b3dSmrgstatic int
1814428d7b3dSmrgreuse_source(struct sna *sna,
1815428d7b3dSmrg	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
1816428d7b3dSmrg	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
1817428d7b3dSmrg{
1818428d7b3dSmrg	uint32_t color;
1819428d7b3dSmrg
1820428d7b3dSmrg	if (src_x != msk_x || src_y != msk_y)
1821428d7b3dSmrg		return false;
1822428d7b3dSmrg
1823428d7b3dSmrg	if (sna_picture_is_solid(mask, &color))
1824428d7b3dSmrg		return gen2_composite_solid_init(sna, mc, color);
1825428d7b3dSmrg
1826428d7b3dSmrg	if (sc->is_solid)
1827428d7b3dSmrg		return false;
1828428d7b3dSmrg
1829428d7b3dSmrg	if (src == mask) {
1830428d7b3dSmrg		DBG(("%s: mask is source\n", __FUNCTION__));
1831428d7b3dSmrg		*mc = *sc;
1832428d7b3dSmrg		mc->bo = kgem_bo_reference(mc->bo);
1833428d7b3dSmrg		return true;
1834428d7b3dSmrg	}
1835428d7b3dSmrg
1836428d7b3dSmrg	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
1837428d7b3dSmrg		return false;
1838428d7b3dSmrg
1839428d7b3dSmrg	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
1840428d7b3dSmrg
1841428d7b3dSmrg	if (!sna_transform_equal(src->transform, mask->transform))
1842428d7b3dSmrg		return false;
1843428d7b3dSmrg
1844428d7b3dSmrg	if (!sna_picture_alphamap_equal(src, mask))
1845428d7b3dSmrg		return false;
1846428d7b3dSmrg
1847428d7b3dSmrg	if (!gen2_check_repeat(mask))
1848428d7b3dSmrg		return false;
1849428d7b3dSmrg
1850428d7b3dSmrg	if (!gen2_check_filter(mask))
1851428d7b3dSmrg		return false;
1852428d7b3dSmrg
1853428d7b3dSmrg	if (!gen2_check_format(sna, mask))
1854428d7b3dSmrg		return false;
1855428d7b3dSmrg
1856428d7b3dSmrg	DBG(("%s: reusing source channel for mask with a twist\n",
1857428d7b3dSmrg	     __FUNCTION__));
1858428d7b3dSmrg
1859428d7b3dSmrg	*mc = *sc;
1860428d7b3dSmrg	mc->repeat = mask->repeat ? mask->repeatType : RepeatNone;
1861428d7b3dSmrg	mc->filter = mask->filter;
1862428d7b3dSmrg	mc->pict_format = mask->format;
1863428d7b3dSmrg	mc->bo = kgem_bo_reference(mc->bo);
1864428d7b3dSmrg	return true;
1865428d7b3dSmrg}
1866428d7b3dSmrg
1867428d7b3dSmrgstatic bool
1868428d7b3dSmrggen2_render_composite(struct sna *sna,
1869428d7b3dSmrg		      uint8_t op,
1870428d7b3dSmrg		      PicturePtr src,
1871428d7b3dSmrg		      PicturePtr mask,
1872428d7b3dSmrg		      PicturePtr dst,
1873428d7b3dSmrg		      int16_t src_x,  int16_t src_y,
1874428d7b3dSmrg		      int16_t mask_x, int16_t mask_y,
1875428d7b3dSmrg		      int16_t dst_x,  int16_t dst_y,
1876428d7b3dSmrg		      int16_t width,  int16_t height,
1877428d7b3dSmrg		      unsigned flags,
1878428d7b3dSmrg		      struct sna_composite_op *tmp)
1879428d7b3dSmrg{
1880428d7b3dSmrg	DBG(("%s()\n", __FUNCTION__));
1881428d7b3dSmrg
1882428d7b3dSmrg	if (op >= ARRAY_SIZE(gen2_blend_op)) {
1883428d7b3dSmrg		DBG(("%s: fallback due to unhandled blend op: %d\n",
1884428d7b3dSmrg		     __FUNCTION__, op));
1885428d7b3dSmrg		return false;
1886428d7b3dSmrg	}
1887428d7b3dSmrg
1888428d7b3dSmrg	if (mask == NULL &&
1889428d7b3dSmrg	    sna_blt_composite(sna, op, src, dst,
1890428d7b3dSmrg			      src_x, src_y,
1891428d7b3dSmrg			      dst_x, dst_y,
1892428d7b3dSmrg			      width, height,
1893428d7b3dSmrg			      flags, tmp))
1894428d7b3dSmrg		return true;
1895428d7b3dSmrg
1896428d7b3dSmrg	if (gen2_composite_fallback(sna, src, mask, dst))
1897428d7b3dSmrg		goto fallback;
1898428d7b3dSmrg
1899428d7b3dSmrg	if (need_tiling(sna, width, height))
1900428d7b3dSmrg		return sna_tiling_composite(op, src, mask, dst,
1901428d7b3dSmrg					    src_x,  src_y,
1902428d7b3dSmrg					    mask_x, mask_y,
1903428d7b3dSmrg					    dst_x,  dst_y,
1904428d7b3dSmrg					    width,  height,
1905428d7b3dSmrg					    tmp);
1906428d7b3dSmrg
1907428d7b3dSmrg	tmp->op = op;
1908428d7b3dSmrg	sna_render_composite_redirect_init(tmp);
1909428d7b3dSmrg
1910428d7b3dSmrg	if (!gen2_composite_set_target(sna, tmp, dst,
1911428d7b3dSmrg				       dst_x, dst_y, width, height,
1912428d7b3dSmrg				       flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
1913428d7b3dSmrg		DBG(("%s: unable to set render target\n",
1914428d7b3dSmrg		     __FUNCTION__));
1915428d7b3dSmrg		goto fallback;
1916428d7b3dSmrg	}
1917428d7b3dSmrg
1918428d7b3dSmrg	switch (gen2_composite_picture(sna, src, &tmp->src,
1919428d7b3dSmrg				       src_x, src_y,
1920428d7b3dSmrg				       width, height,
1921428d7b3dSmrg				       dst_x, dst_y,
1922428d7b3dSmrg				       dst->polyMode == PolyModePrecise)) {
1923428d7b3dSmrg	case -1:
1924428d7b3dSmrg		DBG(("%s: fallback -- unable to prepare source\n",
1925428d7b3dSmrg		     __FUNCTION__));
1926428d7b3dSmrg		goto cleanup_dst;
1927428d7b3dSmrg	case 0:
1928428d7b3dSmrg		gen2_composite_solid_init(sna, &tmp->src, 0);
1929428d7b3dSmrg		break;
1930428d7b3dSmrg	case 1:
1931428d7b3dSmrg		if (mask == NULL && tmp->src.bo &&
1932428d7b3dSmrg		    sna_blt_composite__convert(sna,
1933428d7b3dSmrg					       dst_x, dst_y, width, height,
1934428d7b3dSmrg					       tmp))
1935428d7b3dSmrg			return true;
1936428d7b3dSmrg		break;
1937428d7b3dSmrg	}
1938428d7b3dSmrg
1939428d7b3dSmrg	if (mask) {
1940428d7b3dSmrg		if (!reuse_source(sna,
1941428d7b3dSmrg				  src, &tmp->src, src_x, src_y,
1942428d7b3dSmrg				  mask, &tmp->mask, mask_x, mask_y)) {
1943428d7b3dSmrg			switch (gen2_composite_picture(sna, mask, &tmp->mask,
1944428d7b3dSmrg						       mask_x, mask_y,
1945428d7b3dSmrg						       width,  height,
1946428d7b3dSmrg						       dst_x,  dst_y,
1947428d7b3dSmrg						       dst->polyMode == PolyModePrecise)) {
1948428d7b3dSmrg			case -1:
1949428d7b3dSmrg				DBG(("%s: fallback -- unable to prepare mask\n",
1950428d7b3dSmrg				     __FUNCTION__));
1951428d7b3dSmrg				goto cleanup_src;
1952428d7b3dSmrg			case 0:
1953428d7b3dSmrg				gen2_composite_solid_init(sna, &tmp->mask, 0);
1954428d7b3dSmrg			case 1:
1955428d7b3dSmrg				break;
1956428d7b3dSmrg			}
1957428d7b3dSmrg		}
1958428d7b3dSmrg
1959428d7b3dSmrg		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1960428d7b3dSmrg			/* Check if it's component alpha that relies on a source alpha
1961428d7b3dSmrg			 * and on the source value.  We can only get one of those
1962428d7b3dSmrg			 * into the single source value that we get to blend with.
1963428d7b3dSmrg			 */
1964428d7b3dSmrg			tmp->has_component_alpha = true;
1965428d7b3dSmrg			if (gen2_blend_op[op].src_alpha &&
1966428d7b3dSmrg			    (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) {
1967428d7b3dSmrg				if (op != PictOpOver) {
1968428d7b3dSmrg					DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n",
1969428d7b3dSmrg					     __FUNCTION__,
1970428d7b3dSmrg					     gen2_blend_op[op].src_blend));
1971428d7b3dSmrg					goto cleanup_src;
1972428d7b3dSmrg				}
1973428d7b3dSmrg
1974428d7b3dSmrg				tmp->need_magic_ca_pass = true;
1975428d7b3dSmrg				tmp->op = PictOpOutReverse;
1976428d7b3dSmrg			}
1977428d7b3dSmrg		}
1978428d7b3dSmrg
1979428d7b3dSmrg		/* convert solid to a texture (pure convenience) */
1980428d7b3dSmrg		if (tmp->mask.is_solid && tmp->src.is_solid) {
1981428d7b3dSmrg			assert(tmp->mask.is_affine);
1982428d7b3dSmrg			tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel);
1983428d7b3dSmrg			if (!tmp->mask.bo)
1984428d7b3dSmrg				goto cleanup_src;
1985428d7b3dSmrg		}
1986428d7b3dSmrg	}
1987428d7b3dSmrg
1988428d7b3dSmrg	tmp->floats_per_vertex = 2;
1989428d7b3dSmrg	if (!tmp->src.is_solid)
1990428d7b3dSmrg		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3;
1991428d7b3dSmrg	if (tmp->mask.bo)
1992428d7b3dSmrg		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3;
1993428d7b3dSmrg	tmp->floats_per_rect = 3*tmp->floats_per_vertex;
1994428d7b3dSmrg
1995428d7b3dSmrg	tmp->prim_emit = gen2_emit_composite_primitive;
1996428d7b3dSmrg	if (tmp->mask.bo) {
1997428d7b3dSmrg		if (tmp->mask.transform == NULL) {
1998428d7b3dSmrg			if (tmp->src.is_solid) {
1999428d7b3dSmrg				assert(tmp->floats_per_rect == 12);
2000428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
2001428d7b3dSmrg				if (sna->cpu_features & SSE2) {
2002428d7b3dSmrg					tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask__sse2;
2003428d7b3dSmrg				} else
2004428d7b3dSmrg#endif
2005428d7b3dSmrg				{
2006428d7b3dSmrg					tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask;
2007428d7b3dSmrg				}
2008428d7b3dSmrg			}
2009428d7b3dSmrg		}
2010428d7b3dSmrg	} else {
2011428d7b3dSmrg		if (tmp->src.is_solid) {
2012428d7b3dSmrg			assert(tmp->floats_per_rect == 6);
2013428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
2014428d7b3dSmrg			if (sna->cpu_features & SSE2) {
2015428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_primitive_constant__sse2;
2016428d7b3dSmrg			} else
2017428d7b3dSmrg#endif
2018428d7b3dSmrg			{
2019428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_primitive_constant;
2020428d7b3dSmrg			}
2021428d7b3dSmrg		} else if (tmp->src.is_linear) {
2022428d7b3dSmrg			assert(tmp->floats_per_rect == 12);
2023428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
2024428d7b3dSmrg			if (sna->cpu_features & SSE2) {
2025428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_primitive_linear__sse2;
2026428d7b3dSmrg			} else
2027428d7b3dSmrg#endif
2028428d7b3dSmrg			{
2029428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_primitive_linear;
2030428d7b3dSmrg			}
2031428d7b3dSmrg		} else if (tmp->src.transform == NULL) {
2032428d7b3dSmrg			assert(tmp->floats_per_rect == 12);
2033428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
2034428d7b3dSmrg			if (sna->cpu_features & SSE2) {
2035428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_primitive_identity__sse2;
2036428d7b3dSmrg			} else
2037428d7b3dSmrg#endif
2038428d7b3dSmrg			{
2039428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_primitive_identity;
2040428d7b3dSmrg			}
2041428d7b3dSmrg		} else if (tmp->src.is_affine) {
2042428d7b3dSmrg			assert(tmp->floats_per_rect == 12);
2043428d7b3dSmrg			tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
2044428d7b3dSmrg			tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
2045428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
2046428d7b3dSmrg			if (sna->cpu_features & SSE2) {
2047428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_primitive_affine__sse2;
2048428d7b3dSmrg			} else
2049428d7b3dSmrg#endif
2050428d7b3dSmrg			{
2051428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_primitive_affine;
2052428d7b3dSmrg			}
2053428d7b3dSmrg		}
2054428d7b3dSmrg	}
2055428d7b3dSmrg
2056428d7b3dSmrg	tmp->blt   = gen2_render_composite_blt;
2057428d7b3dSmrg	tmp->box   = gen2_render_composite_box;
2058428d7b3dSmrg	tmp->boxes = gen2_render_composite_boxes;
2059428d7b3dSmrg	tmp->done  = gen2_render_composite_done;
2060428d7b3dSmrg
2061428d7b3dSmrg	if (!kgem_check_bo(&sna->kgem,
2062428d7b3dSmrg			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2063428d7b3dSmrg			   NULL)) {
2064428d7b3dSmrg		kgem_submit(&sna->kgem);
2065428d7b3dSmrg		if (!kgem_check_bo(&sna->kgem,
2066428d7b3dSmrg				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2067428d7b3dSmrg				   NULL)) {
2068428d7b3dSmrg			DBG(("%s: fallback, operation does not fit into GTT\n",
2069428d7b3dSmrg			     __FUNCTION__));
2070428d7b3dSmrg			goto cleanup_mask;
2071428d7b3dSmrg		}
2072428d7b3dSmrg	}
2073428d7b3dSmrg
2074428d7b3dSmrg	gen2_emit_composite_state(sna, tmp);
2075428d7b3dSmrg	return true;
2076428d7b3dSmrg
2077428d7b3dSmrgcleanup_mask:
2078428d7b3dSmrg	if (tmp->mask.bo) {
2079428d7b3dSmrg		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2080428d7b3dSmrg		tmp->mask.bo = NULL;
2081428d7b3dSmrg	}
2082428d7b3dSmrgcleanup_src:
2083428d7b3dSmrg	if (tmp->src.bo) {
2084428d7b3dSmrg		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2085428d7b3dSmrg		tmp->src.bo = NULL;
2086428d7b3dSmrg	}
2087428d7b3dSmrgcleanup_dst:
2088428d7b3dSmrg	if (tmp->redirect.real_bo) {
2089428d7b3dSmrg		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2090428d7b3dSmrg		tmp->redirect.real_bo = NULL;
2091428d7b3dSmrg	}
2092428d7b3dSmrgfallback:
2093428d7b3dSmrg	return (mask == NULL &&
2094428d7b3dSmrg		sna_blt_composite(sna, op, src, dst,
2095428d7b3dSmrg				  src_x, src_y,
2096428d7b3dSmrg				  dst_x, dst_y,
2097428d7b3dSmrg				  width, height,
2098428d7b3dSmrg				  flags | COMPOSITE_FALLBACK, tmp));
2099428d7b3dSmrg}
2100428d7b3dSmrg
2101428d7b3dSmrgfastcall static void
2102428d7b3dSmrggen2_emit_composite_spans_primitive_constant(struct sna *sna,
2103428d7b3dSmrg					     const struct sna_composite_spans_op *op,
2104428d7b3dSmrg					     const BoxRec *box,
2105428d7b3dSmrg					     float opacity)
2106428d7b3dSmrg{
2107428d7b3dSmrg	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2108428d7b3dSmrg	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2109428d7b3dSmrg	sna->kgem.nbatch += 9;
2110428d7b3dSmrg
2111428d7b3dSmrg	v[0] = op->base.dst.x + box->x2;
2112428d7b3dSmrg	v[1] = op->base.dst.y + box->y2;
2113428d7b3dSmrg	*((uint32_t *)v + 2) = alpha;
2114428d7b3dSmrg
2115428d7b3dSmrg	v[3] = op->base.dst.x + box->x1;
2116428d7b3dSmrg	v[4] = v[1];
2117428d7b3dSmrg	*((uint32_t *)v + 5) = alpha;
2118428d7b3dSmrg
2119428d7b3dSmrg	v[6] = v[3];
2120428d7b3dSmrg	v[7] = op->base.dst.y + box->y1;
2121428d7b3dSmrg	*((uint32_t *)v + 8) = alpha;
2122428d7b3dSmrg}
2123428d7b3dSmrg
2124428d7b3dSmrgfastcall static void
2125428d7b3dSmrggen2_emit_composite_spans_primitive_linear(struct sna *sna,
2126428d7b3dSmrg					     const struct sna_composite_spans_op *op,
2127428d7b3dSmrg					     const BoxRec *box,
2128428d7b3dSmrg					     float opacity)
2129428d7b3dSmrg{
2130428d7b3dSmrg	union {
2131428d7b3dSmrg		float f;
2132428d7b3dSmrg		uint32_t u;
2133428d7b3dSmrg	} alpha;
2134428d7b3dSmrg
2135428d7b3dSmrg	alpha.u = (uint8_t)(255 * opacity) << 24;
2136428d7b3dSmrg
2137428d7b3dSmrg	gen2_emit_composite_dstcoord(sna,
2138428d7b3dSmrg				     op->base.dst.x + box->x2,
2139428d7b3dSmrg				     op->base.dst.y + box->y2);
2140428d7b3dSmrg	VERTEX(alpha.f);
2141428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2);
2142428d7b3dSmrg
2143428d7b3dSmrg	gen2_emit_composite_dstcoord(sna,
2144428d7b3dSmrg				     op->base.dst.x + box->x1,
2145428d7b3dSmrg				     op->base.dst.y + box->y2);
2146428d7b3dSmrg	VERTEX(alpha.f);
2147428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2);
2148428d7b3dSmrg
2149428d7b3dSmrg	gen2_emit_composite_dstcoord(sna,
2150428d7b3dSmrg				     op->base.dst.x + box->x1,
2151428d7b3dSmrg				     op->base.dst.y + box->y1);
2152428d7b3dSmrg	VERTEX(alpha.f);
2153428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1);
2154428d7b3dSmrg}
2155428d7b3dSmrg
2156428d7b3dSmrgfastcall static void
2157428d7b3dSmrggen2_emit_composite_spans_primitive_identity_source(struct sna *sna,
2158428d7b3dSmrg						    const struct sna_composite_spans_op *op,
2159428d7b3dSmrg						    const BoxRec *box,
2160428d7b3dSmrg						    float opacity)
2161428d7b3dSmrg{
2162428d7b3dSmrg	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2163428d7b3dSmrg	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2164428d7b3dSmrg	sna->kgem.nbatch += 15;
2165428d7b3dSmrg
2166428d7b3dSmrg	v[0] = op->base.dst.x + box->x2;
2167428d7b3dSmrg	v[1] = op->base.dst.y + box->y2;
2168428d7b3dSmrg	*((uint32_t *)v + 2) = alpha;
2169428d7b3dSmrg	v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
2170428d7b3dSmrg	v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
2171428d7b3dSmrg
2172428d7b3dSmrg	v[5] = op->base.dst.x + box->x1;
2173428d7b3dSmrg	v[6] = v[1];
2174428d7b3dSmrg	*((uint32_t *)v + 7) = alpha;
2175428d7b3dSmrg	v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
2176428d7b3dSmrg	v[9] = v[4];
2177428d7b3dSmrg
2178428d7b3dSmrg	v[10] = v[5];
2179428d7b3dSmrg	v[11] = op->base.dst.y + box->y1;
2180428d7b3dSmrg	*((uint32_t *)v + 12) = alpha;
2181428d7b3dSmrg	v[13] = v[8];
2182428d7b3dSmrg	v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
2183428d7b3dSmrg}
2184428d7b3dSmrg
2185428d7b3dSmrgfastcall static void
2186428d7b3dSmrggen2_emit_composite_spans_primitive_affine_source(struct sna *sna,
2187428d7b3dSmrg						  const struct sna_composite_spans_op *op,
2188428d7b3dSmrg						  const BoxRec *box,
2189428d7b3dSmrg						  float opacity)
2190428d7b3dSmrg{
2191428d7b3dSmrg	PictTransform *transform = op->base.src.transform;
2192428d7b3dSmrg	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2193428d7b3dSmrg	float *v;
2194428d7b3dSmrg
2195428d7b3dSmrg	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2196428d7b3dSmrg	sna->kgem.nbatch += 15;
2197428d7b3dSmrg
2198428d7b3dSmrg	v[0]  = op->base.dst.x + box->x2;
2199428d7b3dSmrg	v[6]  = v[1] = op->base.dst.y + box->y2;
2200428d7b3dSmrg	v[10] = v[5] = op->base.dst.x + box->x1;
2201428d7b3dSmrg	v[11] = op->base.dst.y + box->y1;
2202428d7b3dSmrg	*((uint32_t *)v + 2) = alpha;
2203428d7b3dSmrg	*((uint32_t *)v + 7) = alpha;
2204428d7b3dSmrg	*((uint32_t *)v + 12) = alpha;
2205428d7b3dSmrg
2206428d7b3dSmrg	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
2207428d7b3dSmrg				    (int)op->base.src.offset[1] + box->y2,
2208428d7b3dSmrg				    transform, op->base.src.scale,
2209428d7b3dSmrg				    &v[3], &v[4]);
2210428d7b3dSmrg
2211428d7b3dSmrg	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2212428d7b3dSmrg				    (int)op->base.src.offset[1] + box->y2,
2213428d7b3dSmrg				    transform, op->base.src.scale,
2214428d7b3dSmrg				    &v[8], &v[9]);
2215428d7b3dSmrg
2216428d7b3dSmrg	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2217428d7b3dSmrg				    (int)op->base.src.offset[1] + box->y1,
2218428d7b3dSmrg				    transform, op->base.src.scale,
2219428d7b3dSmrg				    &v[13], &v[14]);
2220428d7b3dSmrg}
2221428d7b3dSmrg
2222428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
2223428d7b3dSmrgsse2 fastcall static void
2224428d7b3dSmrggen2_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
2225428d7b3dSmrg						   const struct sna_composite_spans_op *op,
2226428d7b3dSmrg						   const BoxRec *box,
2227428d7b3dSmrg						   float opacity)
2228428d7b3dSmrg{
2229428d7b3dSmrg	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2230428d7b3dSmrg	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2231428d7b3dSmrg	sna->kgem.nbatch += 9;
2232428d7b3dSmrg
2233428d7b3dSmrg	v[0] = op->base.dst.x + box->x2;
2234428d7b3dSmrg	v[1] = op->base.dst.y + box->y2;
2235428d7b3dSmrg	*((uint32_t *)v + 2) = alpha;
2236428d7b3dSmrg
2237428d7b3dSmrg	v[3] = op->base.dst.x + box->x1;
2238428d7b3dSmrg	v[4] = v[1];
2239428d7b3dSmrg	*((uint32_t *)v + 5) = alpha;
2240428d7b3dSmrg
2241428d7b3dSmrg	v[6] = v[3];
2242428d7b3dSmrg	v[7] = op->base.dst.y + box->y1;
2243428d7b3dSmrg	*((uint32_t *)v + 8) = alpha;
2244428d7b3dSmrg}
2245428d7b3dSmrg
2246428d7b3dSmrgsse2 fastcall static void
2247428d7b3dSmrggen2_emit_composite_spans_primitive_linear__sse2(struct sna *sna,
2248428d7b3dSmrg						 const struct sna_composite_spans_op *op,
2249428d7b3dSmrg						 const BoxRec *box,
2250428d7b3dSmrg						 float opacity)
2251428d7b3dSmrg{
2252428d7b3dSmrg	union {
2253428d7b3dSmrg		float f;
2254428d7b3dSmrg		uint32_t u;
2255428d7b3dSmrg	} alpha;
2256428d7b3dSmrg
2257428d7b3dSmrg	alpha.u = (uint8_t)(255 * opacity) << 24;
2258428d7b3dSmrg
2259428d7b3dSmrg	gen2_emit_composite_dstcoord(sna,
2260428d7b3dSmrg				     op->base.dst.x + box->x2,
2261428d7b3dSmrg				     op->base.dst.y + box->y2);
2262428d7b3dSmrg	VERTEX(alpha.f);
2263428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2);
2264428d7b3dSmrg
2265428d7b3dSmrg	gen2_emit_composite_dstcoord(sna,
2266428d7b3dSmrg				     op->base.dst.x + box->x1,
2267428d7b3dSmrg				     op->base.dst.y + box->y2);
2268428d7b3dSmrg	VERTEX(alpha.f);
2269428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2);
2270428d7b3dSmrg
2271428d7b3dSmrg	gen2_emit_composite_dstcoord(sna,
2272428d7b3dSmrg				     op->base.dst.x + box->x1,
2273428d7b3dSmrg				     op->base.dst.y + box->y1);
2274428d7b3dSmrg	VERTEX(alpha.f);
2275428d7b3dSmrg	gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1);
2276428d7b3dSmrg}
2277428d7b3dSmrg
2278428d7b3dSmrgsse2 fastcall static void
2279428d7b3dSmrggen2_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna,
2280428d7b3dSmrg							  const struct sna_composite_spans_op *op,
2281428d7b3dSmrg							  const BoxRec *box,
2282428d7b3dSmrg							  float opacity)
2283428d7b3dSmrg{
2284428d7b3dSmrg	float *v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2285428d7b3dSmrg	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2286428d7b3dSmrg	sna->kgem.nbatch += 15;
2287428d7b3dSmrg
2288428d7b3dSmrg	v[0] = op->base.dst.x + box->x2;
2289428d7b3dSmrg	v[1] = op->base.dst.y + box->y2;
2290428d7b3dSmrg	*((uint32_t *)v + 2) = alpha;
2291428d7b3dSmrg	v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
2292428d7b3dSmrg	v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
2293428d7b3dSmrg
2294428d7b3dSmrg	v[5] = op->base.dst.x + box->x1;
2295428d7b3dSmrg	v[6] = v[1];
2296428d7b3dSmrg	*((uint32_t *)v + 7) = alpha;
2297428d7b3dSmrg	v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
2298428d7b3dSmrg	v[9] = v[4];
2299428d7b3dSmrg
2300428d7b3dSmrg	v[10] = v[5];
2301428d7b3dSmrg	v[11] = op->base.dst.y + box->y1;
2302428d7b3dSmrg	*((uint32_t *)v + 12) = alpha;
2303428d7b3dSmrg	v[13] = v[8];
2304428d7b3dSmrg	v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
2305428d7b3dSmrg}
2306428d7b3dSmrg
2307428d7b3dSmrgsse2 fastcall static void
2308428d7b3dSmrggen2_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna,
2309428d7b3dSmrg							const struct sna_composite_spans_op *op,
2310428d7b3dSmrg							const BoxRec *box,
2311428d7b3dSmrg							float opacity)
2312428d7b3dSmrg{
2313428d7b3dSmrg	PictTransform *transform = op->base.src.transform;
2314428d7b3dSmrg	uint32_t alpha = (uint8_t)(255 * opacity) << 24;
2315428d7b3dSmrg	float *v;
2316428d7b3dSmrg
2317428d7b3dSmrg	v = (float *)sna->kgem.batch + sna->kgem.nbatch;
2318428d7b3dSmrg	sna->kgem.nbatch += 15;
2319428d7b3dSmrg
2320428d7b3dSmrg	v[0]  = op->base.dst.x + box->x2;
2321428d7b3dSmrg	v[6]  = v[1] = op->base.dst.y + box->y2;
2322428d7b3dSmrg	v[10] = v[5] = op->base.dst.x + box->x1;
2323428d7b3dSmrg	v[11] = op->base.dst.y + box->y1;
2324428d7b3dSmrg	*((uint32_t *)v + 2) = alpha;
2325428d7b3dSmrg	*((uint32_t *)v + 7) = alpha;
2326428d7b3dSmrg	*((uint32_t *)v + 12) = alpha;
2327428d7b3dSmrg
2328428d7b3dSmrg	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
2329428d7b3dSmrg				    (int)op->base.src.offset[1] + box->y2,
2330428d7b3dSmrg				    transform, op->base.src.scale,
2331428d7b3dSmrg				    &v[3], &v[4]);
2332428d7b3dSmrg
2333428d7b3dSmrg	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2334428d7b3dSmrg				    (int)op->base.src.offset[1] + box->y2,
2335428d7b3dSmrg				    transform, op->base.src.scale,
2336428d7b3dSmrg				    &v[8], &v[9]);
2337428d7b3dSmrg
2338428d7b3dSmrg	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
2339428d7b3dSmrg				    (int)op->base.src.offset[1] + box->y1,
2340428d7b3dSmrg				    transform, op->base.src.scale,
2341428d7b3dSmrg				    &v[13], &v[14]);
2342428d7b3dSmrg}
2343428d7b3dSmrg#endif
2344428d7b3dSmrg
2345428d7b3dSmrgstatic void
2346428d7b3dSmrggen2_emit_composite_spans_vertex(struct sna *sna,
2347428d7b3dSmrg				 const struct sna_composite_spans_op *op,
2348428d7b3dSmrg				 int16_t x, int16_t y,
2349428d7b3dSmrg				 float opacity)
2350428d7b3dSmrg{
2351428d7b3dSmrg	gen2_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
2352428d7b3dSmrg	BATCH((uint8_t)(opacity * 255) << 24);
2353428d7b3dSmrg	assert(!op->base.src.is_solid);
2354428d7b3dSmrg	if (op->base.src.is_linear)
2355428d7b3dSmrg		gen2_emit_composite_linear(sna, &op->base.src, x, y);
2356428d7b3dSmrg	else
2357428d7b3dSmrg		gen2_emit_composite_texcoord(sna, &op->base.src, x, y);
2358428d7b3dSmrg}
2359428d7b3dSmrg
2360428d7b3dSmrgfastcall static void
2361428d7b3dSmrggen2_emit_composite_spans_primitive(struct sna *sna,
2362428d7b3dSmrg				    const struct sna_composite_spans_op *op,
2363428d7b3dSmrg				    const BoxRec *box,
2364428d7b3dSmrg				    float opacity)
2365428d7b3dSmrg{
2366428d7b3dSmrg	gen2_emit_composite_spans_vertex(sna, op, box->x2, box->y2, opacity);
2367428d7b3dSmrg	gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y2, opacity);
2368428d7b3dSmrg	gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y1, opacity);
2369428d7b3dSmrg}
2370428d7b3dSmrg
2371428d7b3dSmrgstatic void
2372428d7b3dSmrggen2_emit_spans_pipeline(struct sna *sna,
2373428d7b3dSmrg			 const struct sna_composite_spans_op *op)
2374428d7b3dSmrg{
2375428d7b3dSmrg	uint32_t cblend, ablend;
2376428d7b3dSmrg	uint32_t unwind;
2377428d7b3dSmrg
2378428d7b3dSmrg	cblend =
2379428d7b3dSmrg		TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULATE |
2380428d7b3dSmrg		TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA |
2381428d7b3dSmrg		TB0C_OUTPUT_WRITE_CURRENT;
2382428d7b3dSmrg	ablend =
2383428d7b3dSmrg		TB0A_RESULT_SCALE_1X | TB0A_OP_MODULATE |
2384428d7b3dSmrg		TB0A_ARG1_SEL_DIFFUSE |
2385428d7b3dSmrg		TB0A_OUTPUT_WRITE_CURRENT;
2386428d7b3dSmrg
2387428d7b3dSmrg	if (op->base.src.is_solid) {
2388428d7b3dSmrg		ablend |= TB0A_ARG2_SEL_SPECULAR;
2389428d7b3dSmrg		cblend |= TB0C_ARG2_SEL_SPECULAR;
2390428d7b3dSmrg		if (op->base.dst.format == PICT_a8)
2391428d7b3dSmrg			cblend |= TB0C_ARG2_REPLICATE_ALPHA;
2392428d7b3dSmrg	} else if (op->base.dst.format == PICT_a8) {
2393428d7b3dSmrg		ablend |= TB0A_ARG2_SEL_TEXEL0;
2394428d7b3dSmrg		cblend |= TB0C_ARG2_SEL_TEXEL0 | TB0C_ARG2_REPLICATE_ALPHA;
2395428d7b3dSmrg	} else {
2396428d7b3dSmrg		if (PICT_FORMAT_RGB(op->base.src.pict_format) != 0)
2397428d7b3dSmrg			cblend |= TB0C_ARG2_SEL_TEXEL0;
2398428d7b3dSmrg		else
2399428d7b3dSmrg			cblend |= TB0C_ARG2_SEL_ONE | TB0C_ARG2_INVERT;
2400428d7b3dSmrg
2401428d7b3dSmrg		if (op->base.src.is_opaque)
2402428d7b3dSmrg			ablend |= TB0A_ARG2_SEL_ONE;
2403428d7b3dSmrg		else
2404428d7b3dSmrg			ablend |= TB0A_ARG2_SEL_TEXEL0;
2405428d7b3dSmrg	}
2406428d7b3dSmrg
2407428d7b3dSmrg	unwind = sna->kgem.nbatch;
2408428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
2409428d7b3dSmrg	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
2410428d7b3dSmrg	BATCH(cblend);
2411428d7b3dSmrg	BATCH(ablend);
2412428d7b3dSmrg	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
2413428d7b3dSmrg		   sna->kgem.batch + unwind + 1,
2414428d7b3dSmrg		   2 * sizeof(uint32_t)) == 0)
2415428d7b3dSmrg		sna->kgem.nbatch = unwind;
2416428d7b3dSmrg	else
2417428d7b3dSmrg		sna->render_state.gen2.ls2 = unwind;
2418428d7b3dSmrg}
2419428d7b3dSmrg
2420428d7b3dSmrgstatic void gen2_emit_composite_spans_state(struct sna *sna,
2421428d7b3dSmrg					    const struct sna_composite_spans_op *op)
2422428d7b3dSmrg{
2423428d7b3dSmrg	uint32_t unwind;
2424428d7b3dSmrg
2425428d7b3dSmrg	gen2_get_batch(sna, &op->base);
2426428d7b3dSmrg	gen2_emit_target(sna, &op->base);
2427428d7b3dSmrg
2428428d7b3dSmrg	unwind = sna->kgem.nbatch;
2429428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
2430428d7b3dSmrg	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
2431428d7b3dSmrg	BATCH(!op->base.src.is_solid << 12);
2432428d7b3dSmrg	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY | S3_DIFFUSE_PRESENT);
2433428d7b3dSmrg	BATCH(gen2_get_blend_cntl(op->base.op, false, op->base.dst.format));
2434428d7b3dSmrg	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
2435428d7b3dSmrg		   sna->kgem.batch + unwind + 1,
2436428d7b3dSmrg		   3 * sizeof(uint32_t)) == 0)
2437428d7b3dSmrg		sna->kgem.nbatch = unwind;
2438428d7b3dSmrg	else
2439428d7b3dSmrg		sna->render_state.gen2.ls1 = unwind;
2440428d7b3dSmrg
2441428d7b3dSmrg	gen2_disable_logic_op(sna);
2442428d7b3dSmrg	gen2_emit_spans_pipeline(sna, op);
2443428d7b3dSmrg
2444428d7b3dSmrg	if (op->base.src.is_solid) {
2445428d7b3dSmrg		if (op->base.src.u.gen2.pixel != sna->render_state.gen2.specular) {
2446428d7b3dSmrg			BATCH(_3DSTATE_DFLT_SPECULAR_CMD);
2447428d7b3dSmrg			BATCH(op->base.src.u.gen2.pixel);
2448428d7b3dSmrg			sna->render_state.gen2.specular = op->base.src.u.gen2.pixel;
2449428d7b3dSmrg		}
2450428d7b3dSmrg	} else {
2451428d7b3dSmrg		uint32_t v =_3DSTATE_VERTEX_FORMAT_2_CMD |
2452428d7b3dSmrg			(op->base.src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_3D);
2453428d7b3dSmrg		if (sna->render_state.gen2.vft != v) {
2454428d7b3dSmrg			BATCH(v);
2455428d7b3dSmrg			sna->render_state.gen2.vft = v;
2456428d7b3dSmrg		}
2457428d7b3dSmrg		gen2_emit_texture(sna, &op->base.src, 0);
2458428d7b3dSmrg	}
2459428d7b3dSmrg}
2460428d7b3dSmrg
2461428d7b3dSmrgfastcall static void
2462428d7b3dSmrggen2_render_composite_spans_box(struct sna *sna,
2463428d7b3dSmrg				const struct sna_composite_spans_op *op,
2464428d7b3dSmrg				const BoxRec *box, float opacity)
2465428d7b3dSmrg{
2466428d7b3dSmrg	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2467428d7b3dSmrg	     __FUNCTION__,
2468428d7b3dSmrg	     op->base.src.offset[0], op->base.src.offset[1],
2469428d7b3dSmrg	     opacity,
2470428d7b3dSmrg	     op->base.dst.x, op->base.dst.y,
2471428d7b3dSmrg	     box->x1, box->y1,
2472428d7b3dSmrg	     box->x2 - box->x1,
2473428d7b3dSmrg	     box->y2 - box->y1));
2474428d7b3dSmrg
2475428d7b3dSmrg	if (gen2_get_rectangles(sna, &op->base, 1) == 0) {
2476428d7b3dSmrg		gen2_emit_composite_spans_state(sna, op);
2477428d7b3dSmrg		gen2_get_rectangles(sna, &op->base, 1);
2478428d7b3dSmrg	}
2479428d7b3dSmrg
2480428d7b3dSmrg	op->prim_emit(sna, op, box, opacity);
2481428d7b3dSmrg}
2482428d7b3dSmrg
2483428d7b3dSmrgstatic void
2484428d7b3dSmrggen2_render_composite_spans_boxes(struct sna *sna,
2485428d7b3dSmrg				  const struct sna_composite_spans_op *op,
2486428d7b3dSmrg				  const BoxRec *box, int nbox,
2487428d7b3dSmrg				  float opacity)
2488428d7b3dSmrg{
2489428d7b3dSmrg	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2490428d7b3dSmrg	     __FUNCTION__, nbox,
2491428d7b3dSmrg	     op->base.src.offset[0], op->base.src.offset[1],
2492428d7b3dSmrg	     opacity,
2493428d7b3dSmrg	     op->base.dst.x, op->base.dst.y));
2494428d7b3dSmrg
2495428d7b3dSmrg	do {
2496428d7b3dSmrg		int nbox_this_time;
2497428d7b3dSmrg
2498428d7b3dSmrg		nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2499428d7b3dSmrg		if (nbox_this_time == 0) {
2500428d7b3dSmrg			gen2_emit_composite_spans_state(sna, op);
2501428d7b3dSmrg			nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2502428d7b3dSmrg		}
2503428d7b3dSmrg		nbox -= nbox_this_time;
2504428d7b3dSmrg
2505428d7b3dSmrg		do {
2506428d7b3dSmrg			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2507428d7b3dSmrg			     box->x1, box->y1,
2508428d7b3dSmrg			     box->x2 - box->x1,
2509428d7b3dSmrg			     box->y2 - box->y1));
2510428d7b3dSmrg
2511428d7b3dSmrg			op->prim_emit(sna, op, box++, opacity);
2512428d7b3dSmrg		} while (--nbox_this_time);
2513428d7b3dSmrg	} while (nbox);
2514428d7b3dSmrg}
2515428d7b3dSmrg
2516428d7b3dSmrgfastcall static void
2517428d7b3dSmrggen2_render_composite_spans_done(struct sna *sna,
2518428d7b3dSmrg				 const struct sna_composite_spans_op *op)
2519428d7b3dSmrg{
2520428d7b3dSmrg	DBG(("%s()\n", __FUNCTION__));
2521428d7b3dSmrg
2522428d7b3dSmrg	gen2_vertex_flush(sna, &op->base);
2523428d7b3dSmrg
2524428d7b3dSmrg	if (op->base.src.bo)
2525428d7b3dSmrg		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2526428d7b3dSmrg
2527428d7b3dSmrg	sna_render_composite_redirect_done(sna, &op->base);
2528428d7b3dSmrg}
2529428d7b3dSmrg
2530428d7b3dSmrgstatic bool
2531428d7b3dSmrggen2_check_composite_spans(struct sna *sna,
2532428d7b3dSmrg			   uint8_t op, PicturePtr src, PicturePtr dst,
2533428d7b3dSmrg			   int16_t width, int16_t height, unsigned flags)
2534428d7b3dSmrg{
2535428d7b3dSmrg	if (op >= ARRAY_SIZE(gen2_blend_op))
2536428d7b3dSmrg		return false;
2537428d7b3dSmrg
2538428d7b3dSmrg	if (gen2_composite_fallback(sna, src, NULL, dst))
2539428d7b3dSmrg		return false;
2540428d7b3dSmrg
2541428d7b3dSmrg	if (need_tiling(sna, width, height)) {
2542428d7b3dSmrg		if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2543428d7b3dSmrg			DBG(("%s: fallback, tiled operation not on GPU\n",
2544428d7b3dSmrg			     __FUNCTION__));
2545428d7b3dSmrg			return false;
2546428d7b3dSmrg		}
2547428d7b3dSmrg	}
2548428d7b3dSmrg
2549428d7b3dSmrg	return true;
2550428d7b3dSmrg}
2551428d7b3dSmrg
2552428d7b3dSmrgstatic bool
2553428d7b3dSmrggen2_render_composite_spans(struct sna *sna,
2554428d7b3dSmrg			    uint8_t op,
2555428d7b3dSmrg			    PicturePtr src,
2556428d7b3dSmrg			    PicturePtr dst,
2557428d7b3dSmrg			    int16_t src_x,  int16_t src_y,
2558428d7b3dSmrg			    int16_t dst_x,  int16_t dst_y,
2559428d7b3dSmrg			    int16_t width,  int16_t height,
2560428d7b3dSmrg			    unsigned flags,
2561428d7b3dSmrg			    struct sna_composite_spans_op *tmp)
2562428d7b3dSmrg{
2563428d7b3dSmrg	DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
2564428d7b3dSmrg	     src_x, src_y, dst_x, dst_y, width, height));
2565428d7b3dSmrg
2566428d7b3dSmrg	assert(gen2_check_composite_spans(sna, op, src, dst, width, height, flags));
2567428d7b3dSmrg	if (need_tiling(sna, width, height)) {
2568428d7b3dSmrg		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2569428d7b3dSmrg		     __FUNCTION__, width, height));
2570428d7b3dSmrg		return sna_tiling_composite_spans(op, src, dst,
2571428d7b3dSmrg						  src_x, src_y, dst_x, dst_y,
2572428d7b3dSmrg						  width, height, flags, tmp);
2573428d7b3dSmrg	}
2574428d7b3dSmrg
2575428d7b3dSmrg	tmp->base.op = op;
2576428d7b3dSmrg	sna_render_composite_redirect_init(&tmp->base);
2577428d7b3dSmrg	if (!gen2_composite_set_target(sna, &tmp->base, dst,
2578428d7b3dSmrg				       dst_x, dst_y, width, height,
2579428d7b3dSmrg				       true)) {
2580428d7b3dSmrg		DBG(("%s: unable to set render target\n",
2581428d7b3dSmrg		     __FUNCTION__));
2582428d7b3dSmrg		return false;
2583428d7b3dSmrg	}
2584428d7b3dSmrg
2585428d7b3dSmrg	switch (gen2_composite_picture(sna, src, &tmp->base.src,
2586428d7b3dSmrg				       src_x, src_y,
2587428d7b3dSmrg				       width, height,
2588428d7b3dSmrg				       dst_x, dst_y,
2589428d7b3dSmrg				       dst->polyMode == PolyModePrecise)) {
2590428d7b3dSmrg	case -1:
2591428d7b3dSmrg		goto cleanup_dst;
2592428d7b3dSmrg	case 0:
2593428d7b3dSmrg		gen2_composite_solid_init(sna, &tmp->base.src, 0);
2594428d7b3dSmrg	case 1:
2595428d7b3dSmrg		break;
2596428d7b3dSmrg	}
2597428d7b3dSmrg	assert(tmp->base.src.bo || tmp->base.src.is_solid);
2598428d7b3dSmrg
2599428d7b3dSmrg	tmp->prim_emit = gen2_emit_composite_spans_primitive;
2600428d7b3dSmrg	tmp->base.floats_per_vertex = 3;
2601428d7b3dSmrg	if (tmp->base.src.is_solid) {
2602428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
2603428d7b3dSmrg		if (sna->cpu_features & SSE2) {
2604428d7b3dSmrg			tmp->prim_emit = gen2_emit_composite_spans_primitive_constant__sse2;
2605428d7b3dSmrg		} else
2606428d7b3dSmrg#endif
2607428d7b3dSmrg		{
2608428d7b3dSmrg			tmp->prim_emit = gen2_emit_composite_spans_primitive_constant;
2609428d7b3dSmrg		}
2610428d7b3dSmrg	} else if (tmp->base.src.is_linear) {
2611428d7b3dSmrg		tmp->base.floats_per_vertex += 2;
2612428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
2613428d7b3dSmrg		if (sna->cpu_features & SSE2) {
2614428d7b3dSmrg			tmp->prim_emit = gen2_emit_composite_spans_primitive_linear__sse2;
2615428d7b3dSmrg		} else
2616428d7b3dSmrg#endif
2617428d7b3dSmrg		{
2618428d7b3dSmrg			tmp->prim_emit = gen2_emit_composite_spans_primitive_linear;
2619428d7b3dSmrg		}
2620428d7b3dSmrg	} else {
2621428d7b3dSmrg		assert(tmp->base.src.bo);
2622428d7b3dSmrg		tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
2623428d7b3dSmrg		if (tmp->base.src.transform == NULL) {
2624428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
2625428d7b3dSmrg			if (sna->cpu_features & SSE2) {
2626428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source__sse2;
2627428d7b3dSmrg			} else
2628428d7b3dSmrg#endif
2629428d7b3dSmrg			{
2630428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source;
2631428d7b3dSmrg			}
2632428d7b3dSmrg		} else if (tmp->base.src.is_affine) {
2633428d7b3dSmrg			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
2634428d7b3dSmrg			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
2635428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__)
2636428d7b3dSmrg			if (sna->cpu_features & SSE2) {
2637428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source__sse2;
2638428d7b3dSmrg			} else
2639428d7b3dSmrg#endif
2640428d7b3dSmrg			{
2641428d7b3dSmrg				tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source;
2642428d7b3dSmrg			}
2643428d7b3dSmrg		}
2644428d7b3dSmrg	}
2645428d7b3dSmrg	tmp->base.mask.bo = NULL;
2646428d7b3dSmrg	tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex;
2647428d7b3dSmrg
2648428d7b3dSmrg	tmp->box   = gen2_render_composite_spans_box;
2649428d7b3dSmrg	tmp->boxes = gen2_render_composite_spans_boxes;
2650428d7b3dSmrg	tmp->done  = gen2_render_composite_spans_done;
2651428d7b3dSmrg
2652428d7b3dSmrg	if (!kgem_check_bo(&sna->kgem,
2653428d7b3dSmrg			   tmp->base.dst.bo, tmp->base.src.bo,
2654428d7b3dSmrg			   NULL)) {
2655428d7b3dSmrg		kgem_submit(&sna->kgem);
2656428d7b3dSmrg		if (!kgem_check_bo(&sna->kgem,
2657428d7b3dSmrg				   tmp->base.dst.bo, tmp->base.src.bo,
2658428d7b3dSmrg				   NULL))
2659428d7b3dSmrg			goto cleanup_src;
2660428d7b3dSmrg	}
2661428d7b3dSmrg
2662428d7b3dSmrg	gen2_emit_composite_spans_state(sna, tmp);
2663428d7b3dSmrg	return true;
2664428d7b3dSmrg
2665428d7b3dSmrgcleanup_src:
2666428d7b3dSmrg	if (tmp->base.src.bo)
2667428d7b3dSmrg		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2668428d7b3dSmrgcleanup_dst:
2669428d7b3dSmrg	if (tmp->base.redirect.real_bo)
2670428d7b3dSmrg		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2671428d7b3dSmrg	return false;
2672428d7b3dSmrg}
2673428d7b3dSmrg
2674428d7b3dSmrgstatic void
2675428d7b3dSmrggen2_emit_fill_pipeline(struct sna *sna, const struct sna_composite_op *op)
2676428d7b3dSmrg{
2677428d7b3dSmrg	uint32_t blend, unwind;
2678428d7b3dSmrg
2679428d7b3dSmrg	unwind = sna->kgem.nbatch;
2680428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
2681428d7b3dSmrg	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
2682428d7b3dSmrg
2683428d7b3dSmrg	blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 |
2684428d7b3dSmrg		TB0C_ARG1_SEL_DIFFUSE |
2685428d7b3dSmrg		TB0C_OUTPUT_WRITE_CURRENT;
2686428d7b3dSmrg	if (op->dst.format == PICT_a8)
2687428d7b3dSmrg		blend |= TB0C_ARG1_REPLICATE_ALPHA;
2688428d7b3dSmrg	BATCH(blend);
2689428d7b3dSmrg
2690428d7b3dSmrg	BATCH(TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 |
2691428d7b3dSmrg	      TB0A_ARG1_SEL_DIFFUSE |
2692428d7b3dSmrg	      TB0A_OUTPUT_WRITE_CURRENT);
2693428d7b3dSmrg
2694428d7b3dSmrg	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
2695428d7b3dSmrg		   sna->kgem.batch + unwind + 1,
2696428d7b3dSmrg		   2 * sizeof(uint32_t)) == 0)
2697428d7b3dSmrg		sna->kgem.nbatch = unwind;
2698428d7b3dSmrg	else
2699428d7b3dSmrg		sna->render_state.gen2.ls2 = unwind;
2700428d7b3dSmrg}
2701428d7b3dSmrg
2702428d7b3dSmrgstatic void gen2_emit_fill_composite_state(struct sna *sna,
2703428d7b3dSmrg					   const struct sna_composite_op *op,
2704428d7b3dSmrg					   uint32_t pixel)
2705428d7b3dSmrg{
2706428d7b3dSmrg	uint32_t ls1;
2707428d7b3dSmrg
2708428d7b3dSmrg	gen2_get_batch(sna, op);
2709428d7b3dSmrg	gen2_emit_target(sna, op);
2710428d7b3dSmrg
2711428d7b3dSmrg	ls1 = sna->kgem.nbatch;
2712428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
2713428d7b3dSmrg	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
2714428d7b3dSmrg	BATCH(0);
2715428d7b3dSmrg	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
2716428d7b3dSmrg	BATCH(gen2_get_blend_cntl(op->op, false, op->dst.format));
2717428d7b3dSmrg	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
2718428d7b3dSmrg		   sna->kgem.batch + ls1 + 1,
2719428d7b3dSmrg		   3 * sizeof(uint32_t)) == 0)
2720428d7b3dSmrg		sna->kgem.nbatch = ls1;
2721428d7b3dSmrg	else
2722428d7b3dSmrg		sna->render_state.gen2.ls1 = ls1;
2723428d7b3dSmrg
2724428d7b3dSmrg	gen2_emit_fill_pipeline(sna, op);
2725428d7b3dSmrg
2726428d7b3dSmrg	if (pixel != sna->render_state.gen2.diffuse) {
2727428d7b3dSmrg		BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
2728428d7b3dSmrg		BATCH(pixel);
2729428d7b3dSmrg		sna->render_state.gen2.diffuse = pixel;
2730428d7b3dSmrg	}
2731428d7b3dSmrg}
2732428d7b3dSmrg
2733428d7b3dSmrgstatic bool
2734428d7b3dSmrggen2_render_fill_boxes_try_blt(struct sna *sna,
2735428d7b3dSmrg			       CARD8 op, PictFormat format,
2736428d7b3dSmrg			       const xRenderColor *color,
2737428d7b3dSmrg			       const DrawableRec *dst, struct kgem_bo *dst_bo,
2738428d7b3dSmrg			       const BoxRec *box, int n)
2739428d7b3dSmrg{
2740428d7b3dSmrg	uint8_t alu;
2741428d7b3dSmrg	uint32_t pixel;
2742428d7b3dSmrg
2743428d7b3dSmrg	if (op > PictOpSrc)
2744428d7b3dSmrg		return false;
2745428d7b3dSmrg
2746428d7b3dSmrg	if (op == PictOpClear) {
2747428d7b3dSmrg		alu = GXclear;
2748428d7b3dSmrg		pixel = 0;
2749428d7b3dSmrg	} else if (!sna_get_pixel_from_rgba(&pixel,
2750428d7b3dSmrg					    color->red,
2751428d7b3dSmrg					    color->green,
2752428d7b3dSmrg					    color->blue,
2753428d7b3dSmrg					    color->alpha,
2754428d7b3dSmrg					    format))
2755428d7b3dSmrg		return false;
2756428d7b3dSmrg	else
2757428d7b3dSmrg		alu = GXcopy;
2758428d7b3dSmrg
2759428d7b3dSmrg	return sna_blt_fill_boxes(sna, alu,
2760428d7b3dSmrg				  dst_bo, dst->bitsPerPixel,
2761428d7b3dSmrg				  pixel, box, n);
2762428d7b3dSmrg}
2763428d7b3dSmrg
2764428d7b3dSmrgstatic bool
2765428d7b3dSmrggen2_render_fill_boxes(struct sna *sna,
2766428d7b3dSmrg		       CARD8 op,
2767428d7b3dSmrg		       PictFormat format,
2768428d7b3dSmrg		       const xRenderColor *color,
2769428d7b3dSmrg		       const DrawableRec *dst, struct kgem_bo *dst_bo,
2770428d7b3dSmrg		       const BoxRec *box, int n)
2771428d7b3dSmrg{
2772428d7b3dSmrg	struct sna_composite_op tmp;
2773428d7b3dSmrg	uint32_t pixel;
2774428d7b3dSmrg
2775428d7b3dSmrg	if (op >= ARRAY_SIZE(gen2_blend_op)) {
2776428d7b3dSmrg		DBG(("%s: fallback due to unhandled blend op: %d\n",
2777428d7b3dSmrg		     __FUNCTION__, op));
2778428d7b3dSmrg		return false;
2779428d7b3dSmrg	}
2780428d7b3dSmrg
2781428d7b3dSmrg#if NO_FILL_BOXES
2782428d7b3dSmrg	return gen2_render_fill_boxes_try_blt(sna, op, format, color,
2783428d7b3dSmrg					      dst, dst_bo,
2784428d7b3dSmrg					      box, n);
2785428d7b3dSmrg#endif
2786428d7b3dSmrg	if (gen2_render_fill_boxes_try_blt(sna, op, format, color,
2787428d7b3dSmrg					   dst, dst_bo,
2788428d7b3dSmrg					   box, n))
2789428d7b3dSmrg		return true;
2790428d7b3dSmrg
2791428d7b3dSmrg
2792428d7b3dSmrg	DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
2793428d7b3dSmrg	     __FUNCTION__, op, (int)format,
2794428d7b3dSmrg	     color->red, color->green, color->blue, color->alpha));
2795428d7b3dSmrg
2796428d7b3dSmrg	if (too_large(dst->width, dst->height) ||
2797428d7b3dSmrg	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH ||
2798428d7b3dSmrg	    !gen2_check_dst_format(format)) {
2799428d7b3dSmrg		DBG(("%s: try blt, too large or incompatible destination\n",
2800428d7b3dSmrg		     __FUNCTION__));
2801428d7b3dSmrg		if (!gen2_check_dst_format(format))
2802428d7b3dSmrg			return false;
2803428d7b3dSmrg
2804428d7b3dSmrg		assert(dst_bo->pitch >= 8);
2805428d7b3dSmrg		return sna_tiling_fill_boxes(sna, op, format, color,
2806428d7b3dSmrg					     dst, dst_bo, box, n);
2807428d7b3dSmrg	}
2808428d7b3dSmrg
2809428d7b3dSmrg	if (op == PictOpClear)
2810428d7b3dSmrg		pixel = 0;
2811428d7b3dSmrg	else if (!sna_get_pixel_from_rgba(&pixel,
2812428d7b3dSmrg					  color->red,
2813428d7b3dSmrg					  color->green,
2814428d7b3dSmrg					  color->blue,
2815428d7b3dSmrg					  color->alpha,
2816428d7b3dSmrg					  PICT_a8r8g8b8))
2817428d7b3dSmrg		return false;
2818428d7b3dSmrg
2819428d7b3dSmrg	DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n",
2820428d7b3dSmrg	     __FUNCTION__, op, (int)format, pixel));
2821428d7b3dSmrg
2822428d7b3dSmrg	memset(&tmp, 0, sizeof(tmp));
2823428d7b3dSmrg	tmp.op = op;
2824428d7b3dSmrg	tmp.dst.pixmap = (PixmapPtr)dst;
2825428d7b3dSmrg	tmp.dst.width = dst->width;
2826428d7b3dSmrg	tmp.dst.height = dst->height;
2827428d7b3dSmrg	tmp.dst.format = format;
2828428d7b3dSmrg	tmp.dst.bo = dst_bo;
2829428d7b3dSmrg	tmp.floats_per_vertex = 2;
2830428d7b3dSmrg	tmp.floats_per_rect = 6;
2831428d7b3dSmrg
2832428d7b3dSmrg	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2833428d7b3dSmrg		kgem_submit(&sna->kgem);
2834428d7b3dSmrg		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
2835428d7b3dSmrg			return false;
2836428d7b3dSmrg	}
2837428d7b3dSmrg
2838428d7b3dSmrg	gen2_emit_fill_composite_state(sna, &tmp, pixel);
2839428d7b3dSmrg
2840428d7b3dSmrg	do {
2841428d7b3dSmrg		int n_this_time = gen2_get_rectangles(sna, &tmp, n);
2842428d7b3dSmrg		if (n_this_time == 0) {
2843428d7b3dSmrg			gen2_emit_fill_composite_state(sna, &tmp, pixel);
2844428d7b3dSmrg			n_this_time = gen2_get_rectangles(sna, &tmp, n);
2845428d7b3dSmrg		}
2846428d7b3dSmrg		n -= n_this_time;
2847428d7b3dSmrg
2848428d7b3dSmrg		do {
2849428d7b3dSmrg			DBG(("	(%d, %d), (%d, %d): %x\n",
2850428d7b3dSmrg			     box->x1, box->y1, box->x2, box->y2, pixel));
2851428d7b3dSmrg			VERTEX(box->x2);
2852428d7b3dSmrg			VERTEX(box->y2);
2853428d7b3dSmrg			VERTEX(box->x1);
2854428d7b3dSmrg			VERTEX(box->y2);
2855428d7b3dSmrg			VERTEX(box->x1);
2856428d7b3dSmrg			VERTEX(box->y1);
2857428d7b3dSmrg			box++;
2858428d7b3dSmrg		} while (--n_this_time);
2859428d7b3dSmrg	} while (n);
2860428d7b3dSmrg
2861428d7b3dSmrg	gen2_vertex_flush(sna, &tmp);
2862428d7b3dSmrg	return true;
2863428d7b3dSmrg}
2864428d7b3dSmrg
2865428d7b3dSmrgstatic void gen2_emit_fill_state(struct sna *sna,
2866428d7b3dSmrg				 const struct sna_composite_op *op)
2867428d7b3dSmrg{
2868428d7b3dSmrg	uint32_t ls1;
2869428d7b3dSmrg
2870428d7b3dSmrg	gen2_get_batch(sna, op);
2871428d7b3dSmrg	gen2_emit_target(sna, op);
2872428d7b3dSmrg
2873428d7b3dSmrg	ls1 = sna->kgem.nbatch;
2874428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
2875428d7b3dSmrg	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
2876428d7b3dSmrg	BATCH(0);
2877428d7b3dSmrg	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
2878428d7b3dSmrg	BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
2879428d7b3dSmrg	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
2880428d7b3dSmrg		   sna->kgem.batch + ls1 + 1,
2881428d7b3dSmrg		   3 * sizeof(uint32_t)) == 0)
2882428d7b3dSmrg		sna->kgem.nbatch = ls1;
2883428d7b3dSmrg	else
2884428d7b3dSmrg		sna->render_state.gen2.ls1 = ls1;
2885428d7b3dSmrg
2886428d7b3dSmrg	gen2_enable_logic_op(sna, op->op);
2887428d7b3dSmrg	gen2_emit_fill_pipeline(sna, op);
2888428d7b3dSmrg
2889428d7b3dSmrg	if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) {
2890428d7b3dSmrg		BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
2891428d7b3dSmrg		BATCH(op->src.u.gen2.pixel);
2892428d7b3dSmrg		sna->render_state.gen2.diffuse = op->src.u.gen2.pixel;
2893428d7b3dSmrg	}
2894428d7b3dSmrg}
2895428d7b3dSmrg
2896428d7b3dSmrgstatic void
2897428d7b3dSmrggen2_render_fill_op_blt(struct sna *sna,
2898428d7b3dSmrg			const struct sna_fill_op *op,
2899428d7b3dSmrg			int16_t x, int16_t y, int16_t w, int16_t h)
2900428d7b3dSmrg{
2901428d7b3dSmrg	if (!gen2_get_rectangles(sna, &op->base, 1)) {
2902428d7b3dSmrg		gen2_emit_fill_state(sna, &op->base);
2903428d7b3dSmrg		gen2_get_rectangles(sna, &op->base, 1);
2904428d7b3dSmrg	}
2905428d7b3dSmrg
2906428d7b3dSmrg	VERTEX(x+w);
2907428d7b3dSmrg	VERTEX(y+h);
2908428d7b3dSmrg	VERTEX(x);
2909428d7b3dSmrg	VERTEX(y+h);
2910428d7b3dSmrg	VERTEX(x);
2911428d7b3dSmrg	VERTEX(y);
2912428d7b3dSmrg}
2913428d7b3dSmrg
2914428d7b3dSmrgfastcall static void
2915428d7b3dSmrggen2_render_fill_op_box(struct sna *sna,
2916428d7b3dSmrg			const struct sna_fill_op *op,
2917428d7b3dSmrg			const BoxRec *box)
2918428d7b3dSmrg{
2919428d7b3dSmrg	if (!gen2_get_rectangles(sna, &op->base, 1)) {
2920428d7b3dSmrg		gen2_emit_fill_state(sna, &op->base);
2921428d7b3dSmrg		gen2_get_rectangles(sna, &op->base, 1);
2922428d7b3dSmrg	}
2923428d7b3dSmrg
2924428d7b3dSmrg	VERTEX(box->x2);
2925428d7b3dSmrg	VERTEX(box->y2);
2926428d7b3dSmrg	VERTEX(box->x1);
2927428d7b3dSmrg	VERTEX(box->y2);
2928428d7b3dSmrg	VERTEX(box->x1);
2929428d7b3dSmrg	VERTEX(box->y1);
2930428d7b3dSmrg}
2931428d7b3dSmrg
2932428d7b3dSmrgfastcall static void
2933428d7b3dSmrggen2_render_fill_op_boxes(struct sna *sna,
2934428d7b3dSmrg			  const struct sna_fill_op *op,
2935428d7b3dSmrg			  const BoxRec *box,
2936428d7b3dSmrg			  int nbox)
2937428d7b3dSmrg{
2938428d7b3dSmrg	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
2939428d7b3dSmrg	     box->x1, box->y1, box->x2, box->y2, nbox));
2940428d7b3dSmrg
2941428d7b3dSmrg	do {
2942428d7b3dSmrg		int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2943428d7b3dSmrg		if (nbox_this_time == 0) {
2944428d7b3dSmrg			gen2_emit_fill_state(sna, &op->base);
2945428d7b3dSmrg			nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
2946428d7b3dSmrg		}
2947428d7b3dSmrg		nbox -= nbox_this_time;
2948428d7b3dSmrg
2949428d7b3dSmrg		do {
2950428d7b3dSmrg			VERTEX(box->x2);
2951428d7b3dSmrg			VERTEX(box->y2);
2952428d7b3dSmrg			VERTEX(box->x1);
2953428d7b3dSmrg			VERTEX(box->y2);
2954428d7b3dSmrg			VERTEX(box->x1);
2955428d7b3dSmrg			VERTEX(box->y1);
2956428d7b3dSmrg			box++;
2957428d7b3dSmrg		} while (--nbox_this_time);
2958428d7b3dSmrg	} while (nbox);
2959428d7b3dSmrg}
2960428d7b3dSmrg
2961428d7b3dSmrgstatic void
2962428d7b3dSmrggen2_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
2963428d7b3dSmrg{
2964428d7b3dSmrg	gen2_vertex_flush(sna, &op->base);
2965428d7b3dSmrg}
2966428d7b3dSmrg
2967428d7b3dSmrgstatic bool
2968428d7b3dSmrggen2_render_fill(struct sna *sna, uint8_t alu,
2969428d7b3dSmrg		 PixmapPtr dst, struct kgem_bo *dst_bo,
2970428d7b3dSmrg		 uint32_t color, unsigned flags,
2971428d7b3dSmrg		 struct sna_fill_op *tmp)
2972428d7b3dSmrg{
2973428d7b3dSmrg#if NO_FILL
2974428d7b3dSmrg	return sna_blt_fill(sna, alu,
2975428d7b3dSmrg			    dst_bo, dst->drawable.bitsPerPixel,
2976428d7b3dSmrg			    color,
2977428d7b3dSmrg			    tmp);
2978428d7b3dSmrg#endif
2979428d7b3dSmrg
2980428d7b3dSmrg	/* Prefer to use the BLT if already engaged */
2981428d7b3dSmrg	if (sna_blt_fill(sna, alu,
2982428d7b3dSmrg			 dst_bo, dst->drawable.bitsPerPixel,
2983428d7b3dSmrg			 color,
2984428d7b3dSmrg			 tmp))
2985428d7b3dSmrg		return true;
2986428d7b3dSmrg
2987428d7b3dSmrg	/* Must use the BLT if we can't RENDER... */
2988428d7b3dSmrg	if (too_large(dst->drawable.width, dst->drawable.height) ||
2989428d7b3dSmrg	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH)
2990428d7b3dSmrg		return false;
2991428d7b3dSmrg
2992428d7b3dSmrg	tmp->base.op = alu;
2993428d7b3dSmrg	tmp->base.dst.pixmap = dst;
2994428d7b3dSmrg	tmp->base.dst.width = dst->drawable.width;
2995428d7b3dSmrg	tmp->base.dst.height = dst->drawable.height;
2996428d7b3dSmrg	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2997428d7b3dSmrg	tmp->base.dst.bo = dst_bo;
2998428d7b3dSmrg	tmp->base.dst.x = tmp->base.dst.y = 0;
2999428d7b3dSmrg	tmp->base.floats_per_vertex = 2;
3000428d7b3dSmrg	tmp->base.floats_per_rect = 6;
3001428d7b3dSmrg
3002428d7b3dSmrg	tmp->base.src.u.gen2.pixel =
3003428d7b3dSmrg		sna_rgba_for_color(color, dst->drawable.depth);
3004428d7b3dSmrg
3005428d7b3dSmrg	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3006428d7b3dSmrg		kgem_submit(&sna->kgem);
3007428d7b3dSmrg		return sna_blt_fill(sna, alu,
3008428d7b3dSmrg				    dst_bo, dst->drawable.bitsPerPixel,
3009428d7b3dSmrg				    color,
3010428d7b3dSmrg				    tmp);
3011428d7b3dSmrg	}
3012428d7b3dSmrg
3013428d7b3dSmrg	tmp->blt   = gen2_render_fill_op_blt;
3014428d7b3dSmrg	tmp->box   = gen2_render_fill_op_box;
3015428d7b3dSmrg	tmp->boxes = gen2_render_fill_op_boxes;
3016428d7b3dSmrg	tmp->points = NULL;
3017428d7b3dSmrg	tmp->done  = gen2_render_fill_op_done;
3018428d7b3dSmrg
3019428d7b3dSmrg	gen2_emit_fill_state(sna, &tmp->base);
3020428d7b3dSmrg	return true;
3021428d7b3dSmrg}
3022428d7b3dSmrg
3023428d7b3dSmrgstatic bool
3024428d7b3dSmrggen2_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3025428d7b3dSmrg			     uint32_t color,
3026428d7b3dSmrg			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3027428d7b3dSmrg			     uint8_t alu)
3028428d7b3dSmrg{
3029428d7b3dSmrg	BoxRec box;
3030428d7b3dSmrg
3031428d7b3dSmrg	box.x1 = x1;
3032428d7b3dSmrg	box.y1 = y1;
3033428d7b3dSmrg	box.x2 = x2;
3034428d7b3dSmrg	box.y2 = y2;
3035428d7b3dSmrg
3036428d7b3dSmrg	return sna_blt_fill_boxes(sna, alu,
3037428d7b3dSmrg				  bo, dst->drawable.bitsPerPixel,
3038428d7b3dSmrg				  color, &box, 1);
3039428d7b3dSmrg}
3040428d7b3dSmrg
3041428d7b3dSmrgstatic bool
3042428d7b3dSmrggen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3043428d7b3dSmrg		     uint32_t color,
3044428d7b3dSmrg		     int16_t x1, int16_t y1,
3045428d7b3dSmrg		     int16_t x2, int16_t y2,
3046428d7b3dSmrg		     uint8_t alu)
3047428d7b3dSmrg{
3048428d7b3dSmrg	struct sna_composite_op tmp;
3049428d7b3dSmrg
3050428d7b3dSmrg#if NO_FILL_ONE
3051428d7b3dSmrg	return gen2_render_fill_one_try_blt(sna, dst, bo, color,
3052428d7b3dSmrg					    x1, y1, x2, y2, alu);
3053428d7b3dSmrg#endif
3054428d7b3dSmrg
3055428d7b3dSmrg	/* Prefer to use the BLT if already engaged */
3056428d7b3dSmrg	if (gen2_render_fill_one_try_blt(sna, dst, bo, color,
3057428d7b3dSmrg					 x1, y1, x2, y2, alu))
3058428d7b3dSmrg		return true;
3059428d7b3dSmrg
3060428d7b3dSmrg	/* Must use the BLT if we can't RENDER... */
3061428d7b3dSmrg	if (too_large(dst->drawable.width, dst->drawable.height) ||
3062428d7b3dSmrg	    bo->pitch < 8 || bo->pitch > MAX_3D_PITCH)
3063428d7b3dSmrg		return false;
3064428d7b3dSmrg
3065428d7b3dSmrg	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3066428d7b3dSmrg		kgem_submit(&sna->kgem);
3067428d7b3dSmrg
3068428d7b3dSmrg		if (gen2_render_fill_one_try_blt(sna, dst, bo, color,
3069428d7b3dSmrg						 x1, y1, x2, y2, alu))
3070428d7b3dSmrg			return true;
3071428d7b3dSmrg
3072428d7b3dSmrg		if (!kgem_check_bo(&sna->kgem, bo, NULL))
3073428d7b3dSmrg			return false;
3074428d7b3dSmrg	}
3075428d7b3dSmrg
3076428d7b3dSmrg	tmp.op = alu;
3077428d7b3dSmrg	tmp.dst.pixmap = dst;
3078428d7b3dSmrg	tmp.dst.width = dst->drawable.width;
3079428d7b3dSmrg	tmp.dst.height = dst->drawable.height;
3080428d7b3dSmrg	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3081428d7b3dSmrg	tmp.dst.bo = bo;
3082428d7b3dSmrg	tmp.floats_per_vertex = 2;
3083428d7b3dSmrg	tmp.floats_per_rect = 6;
3084428d7b3dSmrg	tmp.need_magic_ca_pass = false;
3085428d7b3dSmrg
3086428d7b3dSmrg	tmp.src.u.gen2.pixel =
3087428d7b3dSmrg		sna_rgba_for_color(color, dst->drawable.depth);
3088428d7b3dSmrg
3089428d7b3dSmrg	gen2_emit_fill_state(sna, &tmp);
3090428d7b3dSmrg	gen2_get_rectangles(sna, &tmp, 1);
3091428d7b3dSmrg	DBG(("%s: (%d, %d), (%d, %d): %x\n", __FUNCTION__,
3092428d7b3dSmrg	     x1, y1, x2, y2, tmp.src.u.gen2.pixel));
3093428d7b3dSmrg	VERTEX(x2);
3094428d7b3dSmrg	VERTEX(y2);
3095428d7b3dSmrg	VERTEX(x1);
3096428d7b3dSmrg	VERTEX(y2);
3097428d7b3dSmrg	VERTEX(x1);
3098428d7b3dSmrg	VERTEX(y1);
3099428d7b3dSmrg	gen2_vertex_flush(sna, &tmp);
3100428d7b3dSmrg
3101428d7b3dSmrg	return true;
3102428d7b3dSmrg}
3103428d7b3dSmrg
3104428d7b3dSmrgstatic void
3105428d7b3dSmrggen2_render_copy_setup_source(struct sna_composite_channel *channel,
3106428d7b3dSmrg			      const DrawableRec *draw,
3107428d7b3dSmrg			      struct kgem_bo *bo)
3108428d7b3dSmrg{
3109428d7b3dSmrg	assert(draw->width && draw->height);
3110428d7b3dSmrg
3111428d7b3dSmrg	channel->filter = PictFilterNearest;
3112428d7b3dSmrg	channel->repeat = RepeatNone;
3113428d7b3dSmrg	channel->width  = draw->width;
3114428d7b3dSmrg	channel->height = draw->height;
3115428d7b3dSmrg	channel->scale[0] = 1.f/draw->width;
3116428d7b3dSmrg	channel->scale[1] = 1.f/draw->height;
3117428d7b3dSmrg	channel->offset[0] = 0;
3118428d7b3dSmrg	channel->offset[1] = 0;
3119428d7b3dSmrg	channel->pict_format = sna_format_for_depth(draw->depth);
3120428d7b3dSmrg	channel->bo = bo;
3121428d7b3dSmrg	channel->is_affine = 1;
3122428d7b3dSmrg
3123428d7b3dSmrg	DBG(("%s: source=%d, (%dx%d), format=%08x\n",
3124428d7b3dSmrg	     __FUNCTION__, bo->handle,
3125428d7b3dSmrg	     channel->width, channel->height,
3126428d7b3dSmrg	     channel->pict_format));
3127428d7b3dSmrg}
3128428d7b3dSmrg
3129428d7b3dSmrgstatic void
3130428d7b3dSmrggen2_emit_copy_pipeline(struct sna *sna, const struct sna_composite_op *op)
3131428d7b3dSmrg{
3132428d7b3dSmrg	uint32_t blend, unwind;
3133428d7b3dSmrg
3134428d7b3dSmrg	unwind = sna->kgem.nbatch;
3135428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
3136428d7b3dSmrg	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
3137428d7b3dSmrg
3138428d7b3dSmrg	blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 |
3139428d7b3dSmrg		TB0C_OUTPUT_WRITE_CURRENT;
3140428d7b3dSmrg	if (op->dst.format == PICT_a8)
3141428d7b3dSmrg		blend |= TB0C_ARG1_REPLICATE_ALPHA | TB0C_ARG1_SEL_TEXEL0;
3142428d7b3dSmrg	else if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
3143428d7b3dSmrg		blend |= TB0C_ARG1_SEL_TEXEL0;
3144428d7b3dSmrg	else
3145428d7b3dSmrg		blend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT;	/* 0.0 */
3146428d7b3dSmrg	BATCH(blend);
3147428d7b3dSmrg
3148428d7b3dSmrg	blend = TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 |
3149428d7b3dSmrg		TB0A_OUTPUT_WRITE_CURRENT;
3150428d7b3dSmrg	if (PICT_FORMAT_A(op->src.pict_format) == 0)
3151428d7b3dSmrg		blend |= TB0A_ARG1_SEL_ONE;
3152428d7b3dSmrg	else
3153428d7b3dSmrg		blend |= TB0A_ARG1_SEL_TEXEL0;
3154428d7b3dSmrg	BATCH(blend);
3155428d7b3dSmrg
3156428d7b3dSmrg	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
3157428d7b3dSmrg		   sna->kgem.batch + unwind + 1,
3158428d7b3dSmrg		   2 * sizeof(uint32_t)) == 0)
3159428d7b3dSmrg		sna->kgem.nbatch = unwind;
3160428d7b3dSmrg	else
3161428d7b3dSmrg		sna->render_state.gen2.ls2 = unwind;
3162428d7b3dSmrg}
3163428d7b3dSmrg
3164428d7b3dSmrgstatic void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op *op)
3165428d7b3dSmrg{
3166428d7b3dSmrg	uint32_t ls1, v;
3167428d7b3dSmrg
3168428d7b3dSmrg	gen2_get_batch(sna, op);
3169428d7b3dSmrg
3170428d7b3dSmrg	if (kgem_bo_is_dirty(op->src.bo)) {
3171428d7b3dSmrg		if (op->src.bo == op->dst.bo)
3172428d7b3dSmrg			BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
3173428d7b3dSmrg		else
3174428d7b3dSmrg			BATCH(_3DSTATE_MODES_5_CMD |
3175428d7b3dSmrg			      PIPELINE_FLUSH_RENDER_CACHE |
3176428d7b3dSmrg			      PIPELINE_FLUSH_TEXTURE_CACHE);
3177428d7b3dSmrg		kgem_clear_dirty(&sna->kgem);
3178428d7b3dSmrg	}
3179428d7b3dSmrg	gen2_emit_target(sna, op);
3180428d7b3dSmrg
3181428d7b3dSmrg	ls1 = sna->kgem.nbatch;
3182428d7b3dSmrg	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
3183428d7b3dSmrg	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
3184428d7b3dSmrg	BATCH(1<<12);
3185428d7b3dSmrg	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
3186428d7b3dSmrg	BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
3187428d7b3dSmrg	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
3188428d7b3dSmrg		   sna->kgem.batch + ls1 + 1,
3189428d7b3dSmrg		   3 * sizeof(uint32_t)) == 0)
3190428d7b3dSmrg		sna->kgem.nbatch = ls1;
3191428d7b3dSmrg	else
3192428d7b3dSmrg		sna->render_state.gen2.ls1 = ls1;
3193428d7b3dSmrg
3194428d7b3dSmrg	gen2_enable_logic_op(sna, op->op);
3195428d7b3dSmrg	gen2_emit_copy_pipeline(sna, op);
3196428d7b3dSmrg
3197428d7b3dSmrg	v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D;
3198428d7b3dSmrg	if (sna->render_state.gen2.vft != v) {
3199428d7b3dSmrg		BATCH(v);
3200428d7b3dSmrg		sna->render_state.gen2.vft = v;
3201428d7b3dSmrg	}
3202428d7b3dSmrg
3203428d7b3dSmrg	gen2_emit_texture(sna, &op->src, 0);
3204428d7b3dSmrg}
3205428d7b3dSmrg
3206428d7b3dSmrgstatic bool
3207428d7b3dSmrggen2_render_copy_boxes(struct sna *sna, uint8_t alu,
3208428d7b3dSmrg		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
3209428d7b3dSmrg		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
3210428d7b3dSmrg		       const BoxRec *box, int n, unsigned flags)
3211428d7b3dSmrg{
3212428d7b3dSmrg	struct sna_composite_op tmp;
3213428d7b3dSmrg
3214428d7b3dSmrg#if NO_COPY_BOXES
3215428d7b3dSmrg	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3216428d7b3dSmrg		return false;
3217428d7b3dSmrg
3218428d7b3dSmrg	return sna_blt_copy_boxes(sna, alu,
3219428d7b3dSmrg				  src_bo, src_dx, src_dy,
3220428d7b3dSmrg				  dst_bo, dst_dx, dst_dy,
3221428d7b3dSmrg				  dst->drawable.bitsPerPixel,
3222428d7b3dSmrg				  box, n);
3223428d7b3dSmrg#endif
3224428d7b3dSmrg
3225428d7b3dSmrg	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
3226428d7b3dSmrg	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
3227428d7b3dSmrg
3228428d7b3dSmrg	if (sna_blt_compare_depth(src, dst) &&
3229428d7b3dSmrg	    sna_blt_copy_boxes(sna, alu,
3230428d7b3dSmrg			       src_bo, src_dx, src_dy,
3231428d7b3dSmrg			       dst_bo, dst_dx, dst_dy,
3232428d7b3dSmrg			       dst->bitsPerPixel,
3233428d7b3dSmrg			       box, n))
3234428d7b3dSmrg		return true;
3235428d7b3dSmrg
3236428d7b3dSmrg	if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */
3237428d7b3dSmrg	    too_large(src->width, src->height) ||
3238428d7b3dSmrg	    src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch < 8) {
3239428d7b3dSmrgfallback:
3240428d7b3dSmrg		return sna_blt_copy_boxes_fallback(sna, alu,
3241428d7b3dSmrg						   src, src_bo, src_dx, src_dy,
3242428d7b3dSmrg						   dst, dst_bo, dst_dx, dst_dy,
3243428d7b3dSmrg						   box, n);
3244428d7b3dSmrg	}
3245428d7b3dSmrg
3246428d7b3dSmrg	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3247428d7b3dSmrg		kgem_submit(&sna->kgem);
3248428d7b3dSmrg		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3249428d7b3dSmrg			goto fallback;
3250428d7b3dSmrg	}
3251428d7b3dSmrg
3252428d7b3dSmrg	assert(dst_bo->pitch >= 8);
3253428d7b3dSmrg
3254428d7b3dSmrg	memset(&tmp, 0, sizeof(tmp));
3255428d7b3dSmrg	tmp.op = alu;
3256428d7b3dSmrg
3257428d7b3dSmrg	tmp.dst.pixmap = (PixmapPtr)dst;
3258428d7b3dSmrg	tmp.dst.width = dst->width;
3259428d7b3dSmrg	tmp.dst.height = dst->height;
3260428d7b3dSmrg	tmp.dst.format = sna_format_for_depth(dst->depth);
3261428d7b3dSmrg	tmp.dst.bo = dst_bo;
3262428d7b3dSmrg	tmp.dst.x = tmp.dst.y = 0;
3263428d7b3dSmrg	tmp.damage = NULL;
3264428d7b3dSmrg
3265428d7b3dSmrg	DBG(("%s: target=%d, format=%08x, size=%dx%d\n",
3266428d7b3dSmrg	     __FUNCTION__, dst_bo->handle,
3267428d7b3dSmrg	     (unsigned)tmp.dst.format,
3268428d7b3dSmrg	     tmp.dst.width,
3269428d7b3dSmrg	     tmp.dst.height));
3270428d7b3dSmrg
3271428d7b3dSmrg	sna_render_composite_redirect_init(&tmp);
3272428d7b3dSmrg	if (too_large(tmp.dst.width, tmp.dst.height) ||
3273428d7b3dSmrg	    dst_bo->pitch > MAX_3D_PITCH) {
3274428d7b3dSmrg		BoxRec extents = box[0];
3275428d7b3dSmrg		int i;
3276428d7b3dSmrg
3277428d7b3dSmrg		for (i = 1; i < n; i++) {
3278428d7b3dSmrg			if (box[i].x1 < extents.x1)
3279428d7b3dSmrg				extents.x1 = box[i].x1;
3280428d7b3dSmrg			if (box[i].y1 < extents.y1)
3281428d7b3dSmrg				extents.y1 = box[i].y1;
3282428d7b3dSmrg
3283428d7b3dSmrg			if (box[i].x2 > extents.x2)
3284428d7b3dSmrg				extents.x2 = box[i].x2;
3285428d7b3dSmrg			if (box[i].y2 > extents.y2)
3286428d7b3dSmrg				extents.y2 = box[i].y2;
3287428d7b3dSmrg		}
3288428d7b3dSmrg		if (!sna_render_composite_redirect(sna, &tmp,
3289428d7b3dSmrg						   extents.x1 + dst_dx,
3290428d7b3dSmrg						   extents.y1 + dst_dy,
3291428d7b3dSmrg						   extents.x2 - extents.x1,
3292428d7b3dSmrg						   extents.y2 - extents.y1,
3293428d7b3dSmrg						   alu != GXcopy || n > 1))
3294428d7b3dSmrg			goto fallback_tiled;
3295428d7b3dSmrg	}
3296428d7b3dSmrg
3297428d7b3dSmrg	tmp.floats_per_vertex = 4;
3298428d7b3dSmrg	tmp.floats_per_rect = 12;
3299428d7b3dSmrg
3300428d7b3dSmrg	dst_dx += tmp.dst.x;
3301428d7b3dSmrg	dst_dy += tmp.dst.y;
3302428d7b3dSmrg	tmp.dst.x = tmp.dst.y = 0;
3303428d7b3dSmrg
3304428d7b3dSmrg	gen2_render_copy_setup_source(&tmp.src, src, src_bo);
3305428d7b3dSmrg	gen2_emit_copy_state(sna, &tmp);
3306428d7b3dSmrg	do {
3307428d7b3dSmrg		int n_this_time;
3308428d7b3dSmrg
3309428d7b3dSmrg		n_this_time = gen2_get_rectangles(sna, &tmp, n);
3310428d7b3dSmrg		if (n_this_time == 0) {
3311428d7b3dSmrg			gen2_emit_copy_state(sna, &tmp);
3312428d7b3dSmrg			n_this_time = gen2_get_rectangles(sna, &tmp, n);
3313428d7b3dSmrg		}
3314428d7b3dSmrg		n -= n_this_time;
3315428d7b3dSmrg
3316428d7b3dSmrg		do {
3317428d7b3dSmrg			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
3318428d7b3dSmrg			     box->x1 + src_dx, box->y1 + src_dy,
3319428d7b3dSmrg			     box->x1 + dst_dx, box->y1 + dst_dy,
3320428d7b3dSmrg			     box->x2 - box->x1, box->y2 - box->y1));
3321428d7b3dSmrg			VERTEX(box->x2 + dst_dx);
3322428d7b3dSmrg			VERTEX(box->y2 + dst_dy);
3323428d7b3dSmrg			VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
3324428d7b3dSmrg			VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
3325428d7b3dSmrg
3326428d7b3dSmrg			VERTEX(box->x1 + dst_dx);
3327428d7b3dSmrg			VERTEX(box->y2 + dst_dy);
3328428d7b3dSmrg			VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
3329428d7b3dSmrg			VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
3330428d7b3dSmrg
3331428d7b3dSmrg			VERTEX(box->x1 + dst_dx);
3332428d7b3dSmrg			VERTEX(box->y1 + dst_dy);
3333428d7b3dSmrg			VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
3334428d7b3dSmrg			VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
3335428d7b3dSmrg
3336428d7b3dSmrg			box++;
3337428d7b3dSmrg		} while (--n_this_time);
3338428d7b3dSmrg	} while (n);
3339428d7b3dSmrg
3340428d7b3dSmrg	gen2_vertex_flush(sna, &tmp);
3341428d7b3dSmrg	sna_render_composite_redirect_done(sna, &tmp);
3342428d7b3dSmrg	return true;
3343428d7b3dSmrg
3344428d7b3dSmrgfallback_tiled:
3345428d7b3dSmrg	return sna_tiling_copy_boxes(sna, alu,
3346428d7b3dSmrg				     src, src_bo, src_dx, src_dy,
3347428d7b3dSmrg				     dst, dst_bo, dst_dx, dst_dy,
3348428d7b3dSmrg				     box, n);
3349428d7b3dSmrg}
3350428d7b3dSmrg
3351428d7b3dSmrgstatic void
3352428d7b3dSmrggen2_render_copy_blt(struct sna *sna,
3353428d7b3dSmrg		     const struct sna_copy_op *op,
3354428d7b3dSmrg		     int16_t sx, int16_t sy,
3355428d7b3dSmrg		     int16_t w, int16_t h,
3356428d7b3dSmrg		     int16_t dx, int16_t dy)
3357428d7b3dSmrg{
3358428d7b3dSmrg	if (!gen2_get_rectangles(sna, &op->base, 1)) {
3359428d7b3dSmrg		gen2_emit_copy_state(sna, &op->base);
3360428d7b3dSmrg		gen2_get_rectangles(sna, &op->base, 1);
3361428d7b3dSmrg	}
3362428d7b3dSmrg
3363428d7b3dSmrg	VERTEX(dx+w);
3364428d7b3dSmrg	VERTEX(dy+h);
3365428d7b3dSmrg	VERTEX((sx+w)*op->base.src.scale[0]);
3366428d7b3dSmrg	VERTEX((sy+h)*op->base.src.scale[1]);
3367428d7b3dSmrg
3368428d7b3dSmrg	VERTEX(dx);
3369428d7b3dSmrg	VERTEX(dy+h);
3370428d7b3dSmrg	VERTEX(sx*op->base.src.scale[0]);
3371428d7b3dSmrg	VERTEX((sy+h)*op->base.src.scale[1]);
3372428d7b3dSmrg
3373428d7b3dSmrg	VERTEX(dx);
3374428d7b3dSmrg	VERTEX(dy);
3375428d7b3dSmrg	VERTEX(sx*op->base.src.scale[0]);
3376428d7b3dSmrg	VERTEX(sy*op->base.src.scale[1]);
3377428d7b3dSmrg}
3378428d7b3dSmrg
3379428d7b3dSmrgstatic void
3380428d7b3dSmrggen2_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
3381428d7b3dSmrg{
3382428d7b3dSmrg	gen2_vertex_flush(sna, &op->base);
3383428d7b3dSmrg}
3384428d7b3dSmrg
3385428d7b3dSmrgstatic bool
3386428d7b3dSmrggen2_render_copy(struct sna *sna, uint8_t alu,
3387428d7b3dSmrg		 PixmapPtr src, struct kgem_bo *src_bo,
3388428d7b3dSmrg		 PixmapPtr dst, struct kgem_bo *dst_bo,
3389428d7b3dSmrg		 struct sna_copy_op *tmp)
3390428d7b3dSmrg{
3391428d7b3dSmrg#if NO_COPY
3392428d7b3dSmrg	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3393428d7b3dSmrg		return false;
3394428d7b3dSmrg
3395428d7b3dSmrg	return sna_blt_copy(sna, alu,
3396428d7b3dSmrg			    src_bo, dst_bo,
3397428d7b3dSmrg			    dst->drawable.bitsPerPixel,
3398428d7b3dSmrg			    tmp);
3399428d7b3dSmrg#endif
3400428d7b3dSmrg
3401428d7b3dSmrg	/* Prefer to use the BLT */
3402428d7b3dSmrg	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
3403428d7b3dSmrg	    sna_blt_copy(sna, alu,
3404428d7b3dSmrg			 src_bo, dst_bo,
3405428d7b3dSmrg			 dst->drawable.bitsPerPixel,
3406428d7b3dSmrg			 tmp))
3407428d7b3dSmrg		return true;
3408428d7b3dSmrg
3409428d7b3dSmrg	/* Must use the BLT if we can't RENDER... */
3410428d7b3dSmrg	if (too_large(src->drawable.width, src->drawable.height) ||
3411428d7b3dSmrg	    too_large(dst->drawable.width, dst->drawable.height) ||
3412428d7b3dSmrg	    src_bo->pitch > MAX_3D_PITCH ||
3413428d7b3dSmrg	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) {
3414428d7b3dSmrgfallback:
3415428d7b3dSmrg		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3416428d7b3dSmrg			return false;
3417428d7b3dSmrg
3418428d7b3dSmrg		return sna_blt_copy(sna, alu, src_bo, dst_bo,
3419428d7b3dSmrg				    dst->drawable.bitsPerPixel,
3420428d7b3dSmrg				    tmp);
3421428d7b3dSmrg	}
3422428d7b3dSmrg
3423428d7b3dSmrg	tmp->base.op = alu;
3424428d7b3dSmrg
3425428d7b3dSmrg	tmp->base.dst.pixmap = dst;
3426428d7b3dSmrg	tmp->base.dst.width = dst->drawable.width;
3427428d7b3dSmrg	tmp->base.dst.height = dst->drawable.height;
3428428d7b3dSmrg	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3429428d7b3dSmrg	tmp->base.dst.bo = dst_bo;
3430428d7b3dSmrg
3431428d7b3dSmrg	gen2_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo);
3432428d7b3dSmrg	tmp->base.mask.bo = NULL;
3433428d7b3dSmrg
3434428d7b3dSmrg	tmp->base.floats_per_vertex = 4;
3435428d7b3dSmrg	tmp->base.floats_per_rect = 12;
3436428d7b3dSmrg
3437428d7b3dSmrg	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3438428d7b3dSmrg		kgem_submit(&sna->kgem);
3439428d7b3dSmrg		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3440428d7b3dSmrg			goto fallback;
3441428d7b3dSmrg	}
3442428d7b3dSmrg
3443428d7b3dSmrg	tmp->blt  = gen2_render_copy_blt;
3444428d7b3dSmrg	tmp->done = gen2_render_copy_done;
3445428d7b3dSmrg
3446428d7b3dSmrg	gen2_emit_composite_state(sna, &tmp->base);
3447428d7b3dSmrg	return true;
3448428d7b3dSmrg}
3449428d7b3dSmrg
3450428d7b3dSmrgstatic void
3451428d7b3dSmrggen2_render_reset(struct sna *sna)
3452428d7b3dSmrg{
3453428d7b3dSmrg	sna->render_state.gen2.need_invariant = true;
3454428d7b3dSmrg	sna->render_state.gen2.logic_op_enabled = 0;
3455428d7b3dSmrg	sna->render_state.gen2.target = 0;
3456428d7b3dSmrg
3457428d7b3dSmrg	sna->render_state.gen2.ls1 = 0;
3458428d7b3dSmrg	sna->render_state.gen2.ls2 = 0;
3459428d7b3dSmrg	sna->render_state.gen2.vft = 0;
3460428d7b3dSmrg
3461428d7b3dSmrg	sna->render_state.gen2.diffuse = 0x0c0ffee0;
3462428d7b3dSmrg	sna->render_state.gen2.specular = 0x0c0ffee0;
3463428d7b3dSmrg}
3464428d7b3dSmrg
3465428d7b3dSmrgstatic void
3466428d7b3dSmrggen2_render_flush(struct sna *sna)
3467428d7b3dSmrg{
3468428d7b3dSmrg	assert(sna->render.vertex_index == 0);
3469428d7b3dSmrg	assert(sna->render.vertex_offset == 0);
3470428d7b3dSmrg}
3471428d7b3dSmrg
3472428d7b3dSmrgstatic void
3473428d7b3dSmrggen2_render_context_switch(struct kgem *kgem,
3474428d7b3dSmrg			   int new_mode)
3475428d7b3dSmrg{
3476428d7b3dSmrg	struct sna *sna = container_of(kgem, struct sna, kgem);
3477428d7b3dSmrg
3478428d7b3dSmrg	if (!kgem->nbatch)
3479428d7b3dSmrg		return;
3480428d7b3dSmrg
3481428d7b3dSmrg	/* Reload BLT registers following a lost context */
3482428d7b3dSmrg	sna->blt_state.fill_bo = 0;
3483428d7b3dSmrg
3484428d7b3dSmrg	if (kgem_ring_is_idle(kgem, kgem->ring)) {
3485428d7b3dSmrg		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
3486428d7b3dSmrg		_kgem_submit(kgem);
3487428d7b3dSmrg	}
3488428d7b3dSmrg}
3489428d7b3dSmrg
3490428d7b3dSmrgconst char *gen2_render_init(struct sna *sna, const char *backend)
3491428d7b3dSmrg{
3492428d7b3dSmrg	struct sna_render *render = &sna->render;
3493428d7b3dSmrg
3494428d7b3dSmrg	sna->kgem.context_switch = gen2_render_context_switch;
3495428d7b3dSmrg
3496428d7b3dSmrg	/* Use the BLT (and overlay) for everything except when forced to
3497428d7b3dSmrg	 * use the texture combiners.
3498428d7b3dSmrg	 */
3499428d7b3dSmrg#if !NO_COMPOSITE
3500428d7b3dSmrg	render->composite = gen2_render_composite;
3501428d7b3dSmrg	render->prefer_gpu |= PREFER_GPU_RENDER;
3502428d7b3dSmrg#endif
3503428d7b3dSmrg#if !NO_COMPOSITE_SPANS
3504428d7b3dSmrg	render->check_composite_spans = gen2_check_composite_spans;
3505428d7b3dSmrg	render->composite_spans = gen2_render_composite_spans;
3506428d7b3dSmrg	render->prefer_gpu |= PREFER_GPU_SPANS;
3507428d7b3dSmrg#endif
3508428d7b3dSmrg	render->fill_boxes = gen2_render_fill_boxes;
3509428d7b3dSmrg	render->fill = gen2_render_fill;
3510428d7b3dSmrg	render->fill_one = gen2_render_fill_one;
3511428d7b3dSmrg	render->copy = gen2_render_copy;
3512428d7b3dSmrg	render->copy_boxes = gen2_render_copy_boxes;
3513428d7b3dSmrg
3514428d7b3dSmrg	/* XXX YUV color space conversion for video? */
3515428d7b3dSmrg
3516428d7b3dSmrg	render->reset = gen2_render_reset;
3517428d7b3dSmrg	render->flush = gen2_render_flush;
3518428d7b3dSmrg
3519428d7b3dSmrg	render->max_3d_size = MAX_3D_SIZE;
3520428d7b3dSmrg	render->max_3d_pitch = MAX_3D_PITCH;
3521428d7b3dSmrg	return "Almador (gen2)";
3522428d7b3dSmrg}
3523