gen3_render.c revision 03b705cf
103b705cfSriastradh/*
203b705cfSriastradh * Copyright © 2010-2011 Intel Corporation
303b705cfSriastradh *
403b705cfSriastradh * Permission is hereby granted, free of charge, to any person obtaining a
503b705cfSriastradh * copy of this software and associated documentation files (the "Software"),
603b705cfSriastradh * to deal in the Software without restriction, including without limitation
703b705cfSriastradh * the rights to use, copy, modify, merge, publish, distribute, sublicense,
803b705cfSriastradh * and/or sell copies of the Software, and to permit persons to whom the
903b705cfSriastradh * Software is furnished to do so, subject to the following conditions:
1003b705cfSriastradh *
1103b705cfSriastradh * The above copyright notice and this permission notice (including the next
1203b705cfSriastradh * paragraph) shall be included in all copies or substantial portions of the
1303b705cfSriastradh * Software.
1403b705cfSriastradh *
1503b705cfSriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1603b705cfSriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1703b705cfSriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1803b705cfSriastradh * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1903b705cfSriastradh * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2003b705cfSriastradh * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2103b705cfSriastradh * SOFTWARE.
2203b705cfSriastradh *
2303b705cfSriastradh * Authors:
2403b705cfSriastradh *    Chris Wilson <chris@chris-wilson.co.uk>
2503b705cfSriastradh *
2603b705cfSriastradh */
2703b705cfSriastradh
2803b705cfSriastradh#ifdef HAVE_CONFIG_H
2903b705cfSriastradh#include "config.h"
3003b705cfSriastradh#endif
3103b705cfSriastradh
3203b705cfSriastradh#include "sna.h"
3303b705cfSriastradh#include "sna_render.h"
3403b705cfSriastradh#include "sna_render_inline.h"
3503b705cfSriastradh#include "sna_reg.h"
3603b705cfSriastradh#include "sna_video.h"
3703b705cfSriastradh
3803b705cfSriastradh#include "gen3_render.h"
3903b705cfSriastradh
4003b705cfSriastradh#define NO_COMPOSITE 0
4103b705cfSriastradh#define NO_COMPOSITE_SPANS 0
4203b705cfSriastradh#define NO_COPY 0
4303b705cfSriastradh#define NO_COPY_BOXES 0
4403b705cfSriastradh#define NO_FILL 0
4503b705cfSriastradh#define NO_FILL_ONE 0
4603b705cfSriastradh#define NO_FILL_BOXES 0
4703b705cfSriastradh
4803b705cfSriastradh#define PREFER_BLT_FILL 1
4903b705cfSriastradh
5003b705cfSriastradhenum {
5103b705cfSriastradh	SHADER_NONE = 0,
5203b705cfSriastradh	SHADER_ZERO,
5303b705cfSriastradh	SHADER_BLACK,
5403b705cfSriastradh	SHADER_WHITE,
5503b705cfSriastradh	SHADER_CONSTANT,
5603b705cfSriastradh	SHADER_LINEAR,
5703b705cfSriastradh	SHADER_RADIAL,
5803b705cfSriastradh	SHADER_TEXTURE,
5903b705cfSriastradh	SHADER_OPACITY,
6003b705cfSriastradh};
6103b705cfSriastradh
6203b705cfSriastradh#define MAX_3D_SIZE 2048
6303b705cfSriastradh#define MAX_3D_PITCH 8192
6403b705cfSriastradh
6503b705cfSriastradh#define OUT_BATCH(v) batch_emit(sna, v)
6603b705cfSriastradh#define OUT_BATCH_F(v) batch_emit_float(sna, v)
6703b705cfSriastradh#define OUT_VERTEX(v) vertex_emit(sna, v)
6803b705cfSriastradh
6903b705cfSriastradhenum gen3_radial_mode {
7003b705cfSriastradh	RADIAL_ONE,
7103b705cfSriastradh	RADIAL_TWO
7203b705cfSriastradh};
7303b705cfSriastradh
7403b705cfSriastradhstatic const struct blendinfo {
7503b705cfSriastradh	bool dst_alpha;
7603b705cfSriastradh	bool src_alpha;
7703b705cfSriastradh	uint32_t src_blend;
7803b705cfSriastradh	uint32_t dst_blend;
7903b705cfSriastradh} gen3_blend_op[] = {
8003b705cfSriastradh	/* Clear */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
8103b705cfSriastradh	/* Src */	{0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
8203b705cfSriastradh	/* Dst */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
8303b705cfSriastradh	/* Over */	{0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
8403b705cfSriastradh	/* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
8503b705cfSriastradh	/* In */	{1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
8603b705cfSriastradh	/* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
8703b705cfSriastradh	/* Out */	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
8803b705cfSriastradh	/* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
8903b705cfSriastradh	/* Atop */	{1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
9003b705cfSriastradh	/* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
9103b705cfSriastradh	/* Xor */	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
9203b705cfSriastradh	/* Add */	{0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
9303b705cfSriastradh};
9403b705cfSriastradh
9503b705cfSriastradh#define S6_COLOR_WRITE_ONLY \
9603b705cfSriastradh	(S6_COLOR_WRITE_ENABLE | \
9703b705cfSriastradh	 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
9803b705cfSriastradh	 BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
9903b705cfSriastradh	 BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
10003b705cfSriastradh
10103b705cfSriastradhstatic const struct formatinfo {
10203b705cfSriastradh	unsigned int fmt, xfmt;
10303b705cfSriastradh	uint32_t card_fmt;
10403b705cfSriastradh	bool rb_reversed;
10503b705cfSriastradh} gen3_tex_formats[] = {
10603b705cfSriastradh	{PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
10703b705cfSriastradh	{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
10803b705cfSriastradh	{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
10903b705cfSriastradh	{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
11003b705cfSriastradh	{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
11103b705cfSriastradh	{PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
11203b705cfSriastradh	{PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
11303b705cfSriastradh	{PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
11403b705cfSriastradh	{PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
11503b705cfSriastradh	{PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
11603b705cfSriastradh	{PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
11703b705cfSriastradh	{PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
11803b705cfSriastradh	{PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
11903b705cfSriastradh};
12003b705cfSriastradh
12103b705cfSriastradh#define xFixedToDouble(f) pixman_fixed_to_double(f)
12203b705cfSriastradh
12303b705cfSriastradhstatic inline bool too_large(int width, int height)
12403b705cfSriastradh{
12503b705cfSriastradh	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
12603b705cfSriastradh}
12703b705cfSriastradh
12803b705cfSriastradhstatic inline uint32_t gen3_buf_tiling(uint32_t tiling)
12903b705cfSriastradh{
13003b705cfSriastradh	uint32_t v = 0;
13103b705cfSriastradh	switch (tiling) {
13203b705cfSriastradh	case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
13303b705cfSriastradh	case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
13403b705cfSriastradh	case I915_TILING_NONE: break;
13503b705cfSriastradh	}
13603b705cfSriastradh	return v;
13703b705cfSriastradh}
13803b705cfSriastradh
13903b705cfSriastradhstatic inline bool
14003b705cfSriastradhgen3_check_pitch_3d(struct kgem_bo *bo)
14103b705cfSriastradh{
14203b705cfSriastradh	return bo->pitch <= MAX_3D_PITCH;
14303b705cfSriastradh}
14403b705cfSriastradh
14503b705cfSriastradhstatic uint32_t gen3_get_blend_cntl(int op,
14603b705cfSriastradh				    bool has_component_alpha,
14703b705cfSriastradh				    uint32_t dst_format)
14803b705cfSriastradh{
14903b705cfSriastradh	uint32_t sblend = gen3_blend_op[op].src_blend;
15003b705cfSriastradh	uint32_t dblend = gen3_blend_op[op].dst_blend;
15103b705cfSriastradh
15203b705cfSriastradh	if (op <= PictOpSrc) /* for clear and src disable blending */
15303b705cfSriastradh		return S6_COLOR_WRITE_ONLY;
15403b705cfSriastradh
15503b705cfSriastradh	/* If there's no dst alpha channel, adjust the blend op so that we'll
15603b705cfSriastradh	 * treat it as always 1.
15703b705cfSriastradh	 */
15803b705cfSriastradh	if (gen3_blend_op[op].dst_alpha) {
15903b705cfSriastradh		if (PICT_FORMAT_A(dst_format) == 0) {
16003b705cfSriastradh			if (sblend == BLENDFACT_DST_ALPHA)
16103b705cfSriastradh				sblend = BLENDFACT_ONE;
16203b705cfSriastradh			else if (sblend == BLENDFACT_INV_DST_ALPHA)
16303b705cfSriastradh				sblend = BLENDFACT_ZERO;
16403b705cfSriastradh		}
16503b705cfSriastradh
16603b705cfSriastradh		/* gen3 engine reads 8bit color buffer into green channel
16703b705cfSriastradh		 * in cases like color buffer blending etc., and also writes
16803b705cfSriastradh		 * back green channel.  So with dst_alpha blend we should use
16903b705cfSriastradh		 * color factor. See spec on "8-bit rendering".
17003b705cfSriastradh		 */
17103b705cfSriastradh		if (dst_format == PICT_a8) {
17203b705cfSriastradh			if (sblend == BLENDFACT_DST_ALPHA)
17303b705cfSriastradh				sblend = BLENDFACT_DST_COLR;
17403b705cfSriastradh			else if (sblend == BLENDFACT_INV_DST_ALPHA)
17503b705cfSriastradh				sblend = BLENDFACT_INV_DST_COLR;
17603b705cfSriastradh		}
17703b705cfSriastradh	}
17803b705cfSriastradh
17903b705cfSriastradh	/* If the source alpha is being used, then we should only be in a case
18003b705cfSriastradh	 * where the source blend factor is 0, and the source blend value is the
18103b705cfSriastradh	 * mask channels multiplied by the source picture's alpha.
18203b705cfSriastradh	 */
18303b705cfSriastradh	if (has_component_alpha && gen3_blend_op[op].src_alpha) {
18403b705cfSriastradh		if (dblend == BLENDFACT_SRC_ALPHA)
18503b705cfSriastradh			dblend = BLENDFACT_SRC_COLR;
18603b705cfSriastradh		else if (dblend == BLENDFACT_INV_SRC_ALPHA)
18703b705cfSriastradh			dblend = BLENDFACT_INV_SRC_COLR;
18803b705cfSriastradh	}
18903b705cfSriastradh
19003b705cfSriastradh	return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
19103b705cfSriastradh		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
19203b705cfSriastradh		sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
19303b705cfSriastradh		dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
19403b705cfSriastradh}
19503b705cfSriastradh
19603b705cfSriastradhstatic bool gen3_check_dst_format(uint32_t format)
19703b705cfSriastradh{
19803b705cfSriastradh	switch (format) {
19903b705cfSriastradh	case PICT_a8r8g8b8:
20003b705cfSriastradh	case PICT_x8r8g8b8:
20103b705cfSriastradh	case PICT_a8b8g8r8:
20203b705cfSriastradh	case PICT_x8b8g8r8:
20303b705cfSriastradh	case PICT_r5g6b5:
20403b705cfSriastradh	case PICT_b5g6r5:
20503b705cfSriastradh	case PICT_a1r5g5b5:
20603b705cfSriastradh	case PICT_x1r5g5b5:
20703b705cfSriastradh	case PICT_a1b5g5r5:
20803b705cfSriastradh	case PICT_x1b5g5r5:
20903b705cfSriastradh	case PICT_a2r10g10b10:
21003b705cfSriastradh	case PICT_x2r10g10b10:
21103b705cfSriastradh	case PICT_a2b10g10r10:
21203b705cfSriastradh	case PICT_x2b10g10r10:
21303b705cfSriastradh	case PICT_a8:
21403b705cfSriastradh	case PICT_a4r4g4b4:
21503b705cfSriastradh	case PICT_x4r4g4b4:
21603b705cfSriastradh	case PICT_a4b4g4r4:
21703b705cfSriastradh	case PICT_x4b4g4r4:
21803b705cfSriastradh		return true;
21903b705cfSriastradh	default:
22003b705cfSriastradh		return false;
22103b705cfSriastradh	}
22203b705cfSriastradh}
22303b705cfSriastradh
22403b705cfSriastradhstatic bool gen3_dst_rb_reversed(uint32_t format)
22503b705cfSriastradh{
22603b705cfSriastradh	switch (format) {
22703b705cfSriastradh	case PICT_a8r8g8b8:
22803b705cfSriastradh	case PICT_x8r8g8b8:
22903b705cfSriastradh	case PICT_r5g6b5:
23003b705cfSriastradh	case PICT_a1r5g5b5:
23103b705cfSriastradh	case PICT_x1r5g5b5:
23203b705cfSriastradh	case PICT_a2r10g10b10:
23303b705cfSriastradh	case PICT_x2r10g10b10:
23403b705cfSriastradh	case PICT_a8:
23503b705cfSriastradh	case PICT_a4r4g4b4:
23603b705cfSriastradh	case PICT_x4r4g4b4:
23703b705cfSriastradh		return false;
23803b705cfSriastradh	default:
23903b705cfSriastradh		return true;
24003b705cfSriastradh	}
24103b705cfSriastradh}
24203b705cfSriastradh
24303b705cfSriastradh#define DSTORG_HORT_BIAS(x)             ((x)<<20)
24403b705cfSriastradh#define DSTORG_VERT_BIAS(x)             ((x)<<16)
24503b705cfSriastradh
24603b705cfSriastradhstatic uint32_t gen3_get_dst_format(uint32_t format)
24703b705cfSriastradh{
24803b705cfSriastradh#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
24903b705cfSriastradh	switch (format) {
25003b705cfSriastradh	default:
25103b705cfSriastradh	case PICT_a8r8g8b8:
25203b705cfSriastradh	case PICT_x8r8g8b8:
25303b705cfSriastradh	case PICT_a8b8g8r8:
25403b705cfSriastradh	case PICT_x8b8g8r8:
25503b705cfSriastradh		return BIAS | COLR_BUF_ARGB8888;
25603b705cfSriastradh	case PICT_r5g6b5:
25703b705cfSriastradh	case PICT_b5g6r5:
25803b705cfSriastradh		return BIAS | COLR_BUF_RGB565;
25903b705cfSriastradh	case PICT_a1r5g5b5:
26003b705cfSriastradh	case PICT_x1r5g5b5:
26103b705cfSriastradh	case PICT_a1b5g5r5:
26203b705cfSriastradh	case PICT_x1b5g5r5:
26303b705cfSriastradh		return BIAS | COLR_BUF_ARGB1555;
26403b705cfSriastradh	case PICT_a2r10g10b10:
26503b705cfSriastradh	case PICT_x2r10g10b10:
26603b705cfSriastradh	case PICT_a2b10g10r10:
26703b705cfSriastradh	case PICT_x2b10g10r10:
26803b705cfSriastradh		return BIAS | COLR_BUF_ARGB2AAA;
26903b705cfSriastradh	case PICT_a8:
27003b705cfSriastradh		return BIAS | COLR_BUF_8BIT;
27103b705cfSriastradh	case PICT_a4r4g4b4:
27203b705cfSriastradh	case PICT_x4r4g4b4:
27303b705cfSriastradh	case PICT_a4b4g4r4:
27403b705cfSriastradh	case PICT_x4b4g4r4:
27503b705cfSriastradh		return BIAS | COLR_BUF_ARGB4444;
27603b705cfSriastradh	}
27703b705cfSriastradh#undef BIAS
27803b705cfSriastradh}
27903b705cfSriastradh
28003b705cfSriastradhstatic bool gen3_check_format(PicturePtr p)
28103b705cfSriastradh{
28203b705cfSriastradh	switch (p->format) {
28303b705cfSriastradh	case PICT_a8:
28403b705cfSriastradh	case PICT_a8r8g8b8:
28503b705cfSriastradh	case PICT_x8r8g8b8:
28603b705cfSriastradh	case PICT_a8b8g8r8:
28703b705cfSriastradh	case PICT_x8b8g8r8:
28803b705cfSriastradh	case PICT_a2r10g10b10:
28903b705cfSriastradh	case PICT_a2b10g10r10:
29003b705cfSriastradh	case PICT_r5g6b5:
29103b705cfSriastradh	case PICT_b5g6r5:
29203b705cfSriastradh	case PICT_a1r5g5b5:
29303b705cfSriastradh	case PICT_a1b5g5r5:
29403b705cfSriastradh	case PICT_a4r4g4b4:
29503b705cfSriastradh	case PICT_a4b4g4r4:
29603b705cfSriastradh		return true;
29703b705cfSriastradh	default:
29803b705cfSriastradh		return false;
29903b705cfSriastradh	}
30003b705cfSriastradh}
30103b705cfSriastradh
30203b705cfSriastradhstatic bool gen3_check_xformat(PicturePtr p)
30303b705cfSriastradh{
30403b705cfSriastradh	switch (p->format) {
30503b705cfSriastradh	case PICT_a8r8g8b8:
30603b705cfSriastradh	case PICT_x8r8g8b8:
30703b705cfSriastradh	case PICT_a8b8g8r8:
30803b705cfSriastradh	case PICT_x8b8g8r8:
30903b705cfSriastradh	case PICT_r5g6b5:
31003b705cfSriastradh	case PICT_b5g6r5:
31103b705cfSriastradh	case PICT_a1r5g5b5:
31203b705cfSriastradh	case PICT_x1r5g5b5:
31303b705cfSriastradh	case PICT_a1b5g5r5:
31403b705cfSriastradh	case PICT_x1b5g5r5:
31503b705cfSriastradh	case PICT_a2r10g10b10:
31603b705cfSriastradh	case PICT_x2r10g10b10:
31703b705cfSriastradh	case PICT_a2b10g10r10:
31803b705cfSriastradh	case PICT_x2b10g10r10:
31903b705cfSriastradh	case PICT_a8:
32003b705cfSriastradh	case PICT_a4r4g4b4:
32103b705cfSriastradh	case PICT_x4r4g4b4:
32203b705cfSriastradh	case PICT_a4b4g4r4:
32303b705cfSriastradh	case PICT_x4b4g4r4:
32403b705cfSriastradh		return true;
32503b705cfSriastradh	default:
32603b705cfSriastradh		return false;
32703b705cfSriastradh	}
32803b705cfSriastradh}
32903b705cfSriastradh
33003b705cfSriastradhstatic uint32_t gen3_texture_repeat(uint32_t repeat)
33103b705cfSriastradh{
33203b705cfSriastradh#define REPEAT(x) \
33303b705cfSriastradh	(SS3_NORMALIZED_COORDS | \
33403b705cfSriastradh	 TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
33503b705cfSriastradh	 TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT)
33603b705cfSriastradh	switch (repeat) {
33703b705cfSriastradh	default:
33803b705cfSriastradh	case RepeatNone:
33903b705cfSriastradh		return REPEAT(CLAMP_BORDER);
34003b705cfSriastradh	case RepeatNormal:
34103b705cfSriastradh		return REPEAT(WRAP);
34203b705cfSriastradh	case RepeatPad:
34303b705cfSriastradh		return REPEAT(CLAMP_EDGE);
34403b705cfSriastradh	case RepeatReflect:
34503b705cfSriastradh		return REPEAT(MIRROR);
34603b705cfSriastradh	}
34703b705cfSriastradh#undef REPEAT
34803b705cfSriastradh}
34903b705cfSriastradh
35003b705cfSriastradhstatic uint32_t gen3_gradient_repeat(uint32_t repeat)
35103b705cfSriastradh{
35203b705cfSriastradh#define REPEAT(x) \
35303b705cfSriastradh	(SS3_NORMALIZED_COORDS | \
35403b705cfSriastradh	 TEXCOORDMODE_##x  << SS3_TCX_ADDR_MODE_SHIFT | \
35503b705cfSriastradh	 TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT)
35603b705cfSriastradh	switch (repeat) {
35703b705cfSriastradh	default:
35803b705cfSriastradh	case RepeatNone:
35903b705cfSriastradh		return REPEAT(CLAMP_BORDER);
36003b705cfSriastradh	case RepeatNormal:
36103b705cfSriastradh		return REPEAT(WRAP);
36203b705cfSriastradh	case RepeatPad:
36303b705cfSriastradh		return REPEAT(CLAMP_EDGE);
36403b705cfSriastradh	case RepeatReflect:
36503b705cfSriastradh		return REPEAT(MIRROR);
36603b705cfSriastradh	}
36703b705cfSriastradh#undef REPEAT
36803b705cfSriastradh}
36903b705cfSriastradh
37003b705cfSriastradhstatic bool gen3_check_repeat(PicturePtr p)
37103b705cfSriastradh{
37203b705cfSriastradh	if (!p->repeat)
37303b705cfSriastradh		return true;
37403b705cfSriastradh
37503b705cfSriastradh	switch (p->repeatType) {
37603b705cfSriastradh	case RepeatNone:
37703b705cfSriastradh	case RepeatNormal:
37803b705cfSriastradh	case RepeatPad:
37903b705cfSriastradh	case RepeatReflect:
38003b705cfSriastradh		return true;
38103b705cfSriastradh	default:
38203b705cfSriastradh		return false;
38303b705cfSriastradh	}
38403b705cfSriastradh}
38503b705cfSriastradh
38603b705cfSriastradhstatic uint32_t gen3_filter(uint32_t filter)
38703b705cfSriastradh{
38803b705cfSriastradh	switch (filter) {
38903b705cfSriastradh	default:
39003b705cfSriastradh		assert(0);
39103b705cfSriastradh	case PictFilterNearest:
39203b705cfSriastradh		return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
39303b705cfSriastradh			FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
39403b705cfSriastradh			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
39503b705cfSriastradh	case PictFilterBilinear:
39603b705cfSriastradh		return (FILTER_LINEAR  << SS2_MAG_FILTER_SHIFT |
39703b705cfSriastradh			FILTER_LINEAR  << SS2_MIN_FILTER_SHIFT |
39803b705cfSriastradh			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
39903b705cfSriastradh	}
40003b705cfSriastradh}
40103b705cfSriastradh
40203b705cfSriastradhstatic bool gen3_check_filter(PicturePtr p)
40303b705cfSriastradh{
40403b705cfSriastradh	switch (p->filter) {
40503b705cfSriastradh	case PictFilterNearest:
40603b705cfSriastradh	case PictFilterBilinear:
40703b705cfSriastradh		return true;
40803b705cfSriastradh	default:
40903b705cfSriastradh		return false;
41003b705cfSriastradh	}
41103b705cfSriastradh}
41203b705cfSriastradh
41303b705cfSriastradhstatic inline void
41403b705cfSriastradhgen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY)
41503b705cfSriastradh{
41603b705cfSriastradh	OUT_VERTEX(dstX);
41703b705cfSriastradh	OUT_VERTEX(dstY);
41803b705cfSriastradh}
41903b705cfSriastradh
42003b705cfSriastradhfastcall static void
42103b705cfSriastradhgen3_emit_composite_primitive_constant(struct sna *sna,
42203b705cfSriastradh				       const struct sna_composite_op *op,
42303b705cfSriastradh				       const struct sna_composite_rectangles *r)
42403b705cfSriastradh{
42503b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
42603b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
42703b705cfSriastradh
42803b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
42903b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
43003b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
43103b705cfSriastradh}
43203b705cfSriastradh
43303b705cfSriastradhfastcall static void
43403b705cfSriastradhgen3_emit_composite_boxes_constant(const struct sna_composite_op *op,
43503b705cfSriastradh				   const BoxRec *box, int nbox,
43603b705cfSriastradh				   float *v)
43703b705cfSriastradh{
43803b705cfSriastradh	do {
43903b705cfSriastradh		v[0] = box->x2;
44003b705cfSriastradh		v[1] = box->y2;
44103b705cfSriastradh
44203b705cfSriastradh		v[2] = box->x1;
44303b705cfSriastradh		v[3] = box->y2;
44403b705cfSriastradh
44503b705cfSriastradh		v[4] = box->x1;
44603b705cfSriastradh		v[5] = box->y1;
44703b705cfSriastradh
44803b705cfSriastradh		box++;
44903b705cfSriastradh		v += 6;
45003b705cfSriastradh	} while (--nbox);
45103b705cfSriastradh}
45203b705cfSriastradh
45303b705cfSriastradhfastcall static void
45403b705cfSriastradhgen3_emit_composite_primitive_identity_gradient(struct sna *sna,
45503b705cfSriastradh						const struct sna_composite_op *op,
45603b705cfSriastradh						const struct sna_composite_rectangles *r)
45703b705cfSriastradh{
45803b705cfSriastradh	int16_t dst_x, dst_y;
45903b705cfSriastradh	int16_t src_x, src_y;
46003b705cfSriastradh
46103b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
46203b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
46303b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
46403b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
46503b705cfSriastradh
46603b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
46703b705cfSriastradh	OUT_VERTEX(src_x + r->width);
46803b705cfSriastradh	OUT_VERTEX(src_y + r->height);
46903b705cfSriastradh
47003b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
47103b705cfSriastradh	OUT_VERTEX(src_x);
47203b705cfSriastradh	OUT_VERTEX(src_y + r->height);
47303b705cfSriastradh
47403b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
47503b705cfSriastradh	OUT_VERTEX(src_x);
47603b705cfSriastradh	OUT_VERTEX(src_y);
47703b705cfSriastradh}
47803b705cfSriastradh
47903b705cfSriastradhfastcall static void
48003b705cfSriastradhgen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
48103b705cfSriastradh					    const BoxRec *box, int nbox,
48203b705cfSriastradh					    float *v)
48303b705cfSriastradh{
48403b705cfSriastradh	do {
48503b705cfSriastradh		v[0] = box->x2;
48603b705cfSriastradh		v[1] = box->y2;
48703b705cfSriastradh		v[2] = box->x2 + op->src.offset[0];
48803b705cfSriastradh		v[3] = box->y2 + op->src.offset[1];
48903b705cfSriastradh
49003b705cfSriastradh		v[4] = box->x1;
49103b705cfSriastradh		v[5] = box->y2;
49203b705cfSriastradh		v[6] = box->x1 + op->src.offset[0];
49303b705cfSriastradh		v[7] = box->y2 + op->src.offset[1];
49403b705cfSriastradh
49503b705cfSriastradh		v[8] = box->x1;
49603b705cfSriastradh		v[9] = box->y1;
49703b705cfSriastradh		v[10] = box->x1 + op->src.offset[0];
49803b705cfSriastradh		v[11] = box->y1 + op->src.offset[1];
49903b705cfSriastradh
50003b705cfSriastradh		v += 12;
50103b705cfSriastradh		box++;
50203b705cfSriastradh	} while (--nbox);
50303b705cfSriastradh}
50403b705cfSriastradh
50503b705cfSriastradhfastcall static void
50603b705cfSriastradhgen3_emit_composite_primitive_affine_gradient(struct sna *sna,
50703b705cfSriastradh					      const struct sna_composite_op *op,
50803b705cfSriastradh					      const struct sna_composite_rectangles *r)
50903b705cfSriastradh{
51003b705cfSriastradh	PictTransform *transform = op->src.transform;
51103b705cfSriastradh	int16_t dst_x, dst_y;
51203b705cfSriastradh	int16_t src_x, src_y;
51303b705cfSriastradh	float *v;
51403b705cfSriastradh
51503b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
51603b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
51703b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
51803b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
51903b705cfSriastradh
52003b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
52103b705cfSriastradh	sna->render.vertex_used += 12;
52203b705cfSriastradh
52303b705cfSriastradh	v[0] = dst_x + r->width;
52403b705cfSriastradh	v[1] = dst_y + r->height;
52503b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
52603b705cfSriastradh				    transform, op->src.scale,
52703b705cfSriastradh				    &v[2], &v[3]);
52803b705cfSriastradh
52903b705cfSriastradh	v[4] = dst_x;
53003b705cfSriastradh	v[5] = dst_y + r->height;
53103b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
53203b705cfSriastradh				    transform, op->src.scale,
53303b705cfSriastradh				    &v[6], &v[7]);
53403b705cfSriastradh
53503b705cfSriastradh	v[8] = dst_x;
53603b705cfSriastradh	v[9] = dst_y;
53703b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
53803b705cfSriastradh				    transform, op->src.scale,
53903b705cfSriastradh				    &v[10], &v[11]);
54003b705cfSriastradh}
54103b705cfSriastradh
54203b705cfSriastradhfastcall static void
54303b705cfSriastradhgen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
54403b705cfSriastradh					  const BoxRec *box, int nbox,
54503b705cfSriastradh					  float *v)
54603b705cfSriastradh{
54703b705cfSriastradh	const PictTransform *transform = op->src.transform;
54803b705cfSriastradh
54903b705cfSriastradh	do {
55003b705cfSriastradh		v[0] = box->x2;
55103b705cfSriastradh		v[1] = box->y2;
55203b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
55303b705cfSriastradh					    box->y2 + op->src.offset[1],
55403b705cfSriastradh					    transform, op->src.scale,
55503b705cfSriastradh					    &v[2], &v[3]);
55603b705cfSriastradh
55703b705cfSriastradh		v[4] = box->x1;
55803b705cfSriastradh		v[5] = box->y2;
55903b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
56003b705cfSriastradh					    box->y2 + op->src.offset[1],
56103b705cfSriastradh					    transform, op->src.scale,
56203b705cfSriastradh					    &v[6], &v[7]);
56303b705cfSriastradh
56403b705cfSriastradh		v[8] = box->x1;
56503b705cfSriastradh		v[9] = box->y1;
56603b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
56703b705cfSriastradh					    box->y1 + op->src.offset[1],
56803b705cfSriastradh					    transform, op->src.scale,
56903b705cfSriastradh					    &v[10], &v[11]);
57003b705cfSriastradh
57103b705cfSriastradh		box++;
57203b705cfSriastradh		v += 12;
57303b705cfSriastradh	} while (--nbox);
57403b705cfSriastradh}
57503b705cfSriastradh
57603b705cfSriastradhfastcall static void
57703b705cfSriastradhgen3_emit_composite_primitive_identity_source(struct sna *sna,
57803b705cfSriastradh					      const struct sna_composite_op *op,
57903b705cfSriastradh					      const struct sna_composite_rectangles *r)
58003b705cfSriastradh{
58103b705cfSriastradh	float w = r->width;
58203b705cfSriastradh	float h = r->height;
58303b705cfSriastradh	float *v;
58403b705cfSriastradh
58503b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
58603b705cfSriastradh	sna->render.vertex_used += 12;
58703b705cfSriastradh
58803b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
58903b705cfSriastradh	v[0] = v[4] + w;
59003b705cfSriastradh
59103b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
59203b705cfSriastradh	v[5] = v[1] = v[9] + h;
59303b705cfSriastradh
59403b705cfSriastradh	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
59503b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
59603b705cfSriastradh
59703b705cfSriastradh	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
59803b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
59903b705cfSriastradh}
60003b705cfSriastradh
60103b705cfSriastradhfastcall static void
60203b705cfSriastradhgen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
60303b705cfSriastradh					  const BoxRec *box, int nbox,
60403b705cfSriastradh					  float *v)
60503b705cfSriastradh{
60603b705cfSriastradh	do {
60703b705cfSriastradh		v[0] = box->x2 + op->dst.x;
60803b705cfSriastradh		v[8] = v[4] = box->x1 + op->dst.x;
60903b705cfSriastradh		v[5] = v[1] = box->y2 + op->dst.y;
61003b705cfSriastradh		v[9] = box->y1 + op->dst.y;
61103b705cfSriastradh
61203b705cfSriastradh		v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
61303b705cfSriastradh		v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
61403b705cfSriastradh
61503b705cfSriastradh		v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
61603b705cfSriastradh		v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
61703b705cfSriastradh
61803b705cfSriastradh		v += 12;
61903b705cfSriastradh		box++;
62003b705cfSriastradh	} while (--nbox);
62103b705cfSriastradh}
62203b705cfSriastradh
62303b705cfSriastradhfastcall static void
62403b705cfSriastradhgen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
62503b705cfSriastradh							const struct sna_composite_op *op,
62603b705cfSriastradh							const struct sna_composite_rectangles *r)
62703b705cfSriastradh{
62803b705cfSriastradh	float w = r->width;
62903b705cfSriastradh	float h = r->height;
63003b705cfSriastradh	float *v;
63103b705cfSriastradh
63203b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
63303b705cfSriastradh	sna->render.vertex_used += 12;
63403b705cfSriastradh
63503b705cfSriastradh	v[8] = v[4] = r->dst.x;
63603b705cfSriastradh	v[9] = r->dst.y;
63703b705cfSriastradh
63803b705cfSriastradh	v[0] = v[4] + w;
63903b705cfSriastradh	v[5] = v[1] = v[9] + h;
64003b705cfSriastradh
64103b705cfSriastradh	v[10] = v[6] = r->src.x * op->src.scale[0];
64203b705cfSriastradh	v[11] = r->src.y * op->src.scale[1];
64303b705cfSriastradh
64403b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
64503b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
64603b705cfSriastradh}
64703b705cfSriastradh
64803b705cfSriastradhfastcall static void
64903b705cfSriastradhgen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op,
65003b705cfSriastradh						    const BoxRec *box, int nbox,
65103b705cfSriastradh						    float *v)
65203b705cfSriastradh{
65303b705cfSriastradh	do {
65403b705cfSriastradh		v[0] = box->x2;
65503b705cfSriastradh		v[8] = v[4] = box->x1;
65603b705cfSriastradh		v[5] = v[1] = box->y2;
65703b705cfSriastradh		v[9] = box->y1;
65803b705cfSriastradh
65903b705cfSriastradh		v[10] = v[6] = box->x1 * op->src.scale[0];
66003b705cfSriastradh		v[2] = box->x2 * op->src.scale[0];
66103b705cfSriastradh
66203b705cfSriastradh		v[11] = box->y1 * op->src.scale[1];
66303b705cfSriastradh		v[7] = v[3] = box->y2 * op->src.scale[1];
66403b705cfSriastradh
66503b705cfSriastradh		v += 12;
66603b705cfSriastradh		box++;
66703b705cfSriastradh	} while (--nbox);
66803b705cfSriastradh}
66903b705cfSriastradh
67003b705cfSriastradhfastcall static void
67103b705cfSriastradhgen3_emit_composite_primitive_affine_source(struct sna *sna,
67203b705cfSriastradh					    const struct sna_composite_op *op,
67303b705cfSriastradh					    const struct sna_composite_rectangles *r)
67403b705cfSriastradh{
67503b705cfSriastradh	PictTransform *transform = op->src.transform;
67603b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
67703b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
67803b705cfSriastradh	int src_x = r->src.x + (int)op->src.offset[0];
67903b705cfSriastradh	int src_y = r->src.y + (int)op->src.offset[1];
68003b705cfSriastradh	float *v;
68103b705cfSriastradh
68203b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
68303b705cfSriastradh	sna->render.vertex_used += 12;
68403b705cfSriastradh
68503b705cfSriastradh	v[0] = dst_x + r->width;
68603b705cfSriastradh	v[5] = v[1] = dst_y + r->height;
68703b705cfSriastradh	v[8] = v[4] = dst_x;
68803b705cfSriastradh	v[9] = dst_y;
68903b705cfSriastradh
69003b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
69103b705cfSriastradh				    transform, op->src.scale,
69203b705cfSriastradh				    &v[2], &v[3]);
69303b705cfSriastradh
69403b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
69503b705cfSriastradh				    transform, op->src.scale,
69603b705cfSriastradh				    &v[6], &v[7]);
69703b705cfSriastradh
69803b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
69903b705cfSriastradh				    transform, op->src.scale,
70003b705cfSriastradh				    &v[10], &v[11]);
70103b705cfSriastradh}
70203b705cfSriastradh
70303b705cfSriastradhfastcall static void
70403b705cfSriastradhgen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op,
70503b705cfSriastradh					const BoxRec *box, int nbox,
70603b705cfSriastradh					float *v)
70703b705cfSriastradh{
70803b705cfSriastradh	const PictTransform *transform = op->src.transform;
70903b705cfSriastradh
71003b705cfSriastradh	do {
71103b705cfSriastradh		v[0] = box->x2;
71203b705cfSriastradh		v[5] = v[1] = box->y2;
71303b705cfSriastradh		v[8] = v[4] = box->x1;
71403b705cfSriastradh		v[9] = box->y1;
71503b705cfSriastradh
71603b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
71703b705cfSriastradh					    box->y2 + op->src.offset[1],
71803b705cfSriastradh					    transform, op->src.scale,
71903b705cfSriastradh					    &v[2], &v[3]);
72003b705cfSriastradh
72103b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
72203b705cfSriastradh					    box->y2 + op->src.offset[1],
72303b705cfSriastradh					    transform, op->src.scale,
72403b705cfSriastradh					    &v[6], &v[7]);
72503b705cfSriastradh
72603b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
72703b705cfSriastradh					    box->y1 + op->src.offset[1],
72803b705cfSriastradh					    transform, op->src.scale,
72903b705cfSriastradh					    &v[10], &v[11]);
73003b705cfSriastradh
73103b705cfSriastradh		v += 12;
73203b705cfSriastradh		box++;
73303b705cfSriastradh	} while (--nbox);
73403b705cfSriastradh}
73503b705cfSriastradh
73603b705cfSriastradhfastcall static void
73703b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
73803b705cfSriastradh						     const struct sna_composite_op *op,
73903b705cfSriastradh						     const struct sna_composite_rectangles *r)
74003b705cfSriastradh{
74103b705cfSriastradh	float w = r->width;
74203b705cfSriastradh	float h = r->height;
74303b705cfSriastradh	float *v;
74403b705cfSriastradh
74503b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
74603b705cfSriastradh	sna->render.vertex_used += 12;
74703b705cfSriastradh
74803b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
74903b705cfSriastradh	v[0] = v[4] + w;
75003b705cfSriastradh
75103b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
75203b705cfSriastradh	v[5] = v[1] = v[9] + h;
75303b705cfSriastradh
75403b705cfSriastradh	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
75503b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
75603b705cfSriastradh
75703b705cfSriastradh	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
75803b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
75903b705cfSriastradh}
76003b705cfSriastradh
76103b705cfSriastradhfastcall static void
76203b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna,
76303b705cfSriastradh							       const struct sna_composite_op *op,
76403b705cfSriastradh							       const struct sna_composite_rectangles *r)
76503b705cfSriastradh{
76603b705cfSriastradh	float w = r->width;
76703b705cfSriastradh	float h = r->height;
76803b705cfSriastradh	float *v;
76903b705cfSriastradh
77003b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
77103b705cfSriastradh	sna->render.vertex_used += 12;
77203b705cfSriastradh
77303b705cfSriastradh	v[8] = v[4] = r->dst.x;
77403b705cfSriastradh	v[9] = r->dst.y;
77503b705cfSriastradh
77603b705cfSriastradh	v[0] = v[4] + w;
77703b705cfSriastradh	v[5] = v[1] = v[9] + h;
77803b705cfSriastradh
77903b705cfSriastradh	v[10] = v[6] = r->mask.x * op->mask.scale[0];
78003b705cfSriastradh	v[11] = r->mask.y * op->mask.scale[1];
78103b705cfSriastradh
78203b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
78303b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
78403b705cfSriastradh}
78503b705cfSriastradh
78603b705cfSriastradhfastcall static void
78703b705cfSriastradhgen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
78803b705cfSriastradh						   const struct sna_composite_op *op,
78903b705cfSriastradh						   const struct sna_composite_rectangles *r)
79003b705cfSriastradh{
79103b705cfSriastradh	float dst_x, dst_y;
79203b705cfSriastradh	float src_x, src_y;
79303b705cfSriastradh	float msk_x, msk_y;
79403b705cfSriastradh	float w, h;
79503b705cfSriastradh	float *v;
79603b705cfSriastradh
79703b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
79803b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
79903b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
80003b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
80103b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
80203b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
80303b705cfSriastradh	w = r->width;
80403b705cfSriastradh	h = r->height;
80503b705cfSriastradh
80603b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
80703b705cfSriastradh	sna->render.vertex_used += 18;
80803b705cfSriastradh
80903b705cfSriastradh	v[0] = dst_x + w;
81003b705cfSriastradh	v[1] = dst_y + h;
81103b705cfSriastradh	v[2] = (src_x + w) * op->src.scale[0];
81203b705cfSriastradh	v[3] = (src_y + h) * op->src.scale[1];
81303b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
81403b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
81503b705cfSriastradh
81603b705cfSriastradh	v[6] = dst_x;
81703b705cfSriastradh	v[7] = v[1];
81803b705cfSriastradh	v[8] = src_x * op->src.scale[0];
81903b705cfSriastradh	v[9] = v[3];
82003b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
82103b705cfSriastradh	v[11] =v[5];
82203b705cfSriastradh
82303b705cfSriastradh	v[12] = v[6];
82403b705cfSriastradh	v[13] = dst_y;
82503b705cfSriastradh	v[14] = v[8];
82603b705cfSriastradh	v[15] = src_y * op->src.scale[1];
82703b705cfSriastradh	v[16] = v[10];
82803b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
82903b705cfSriastradh}
83003b705cfSriastradh
83103b705cfSriastradhfastcall static void
83203b705cfSriastradhgen3_emit_composite_primitive_affine_source_mask(struct sna *sna,
83303b705cfSriastradh						 const struct sna_composite_op *op,
83403b705cfSriastradh						 const struct sna_composite_rectangles *r)
83503b705cfSriastradh{
83603b705cfSriastradh	int16_t src_x, src_y;
83703b705cfSriastradh	float dst_x, dst_y;
83803b705cfSriastradh	float msk_x, msk_y;
83903b705cfSriastradh	float w, h;
84003b705cfSriastradh	float *v;
84103b705cfSriastradh
84203b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
84303b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
84403b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
84503b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
84603b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
84703b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
84803b705cfSriastradh	w = r->width;
84903b705cfSriastradh	h = r->height;
85003b705cfSriastradh
85103b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
85203b705cfSriastradh	sna->render.vertex_used += 18;
85303b705cfSriastradh
85403b705cfSriastradh	v[0] = dst_x + w;
85503b705cfSriastradh	v[1] = dst_y + h;
85603b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
85703b705cfSriastradh				    op->src.transform, op->src.scale,
85803b705cfSriastradh				    &v[2], &v[3]);
85903b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
86003b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
86103b705cfSriastradh
86203b705cfSriastradh	v[6] = dst_x;
86303b705cfSriastradh	v[7] = v[1];
86403b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
86503b705cfSriastradh				    op->src.transform, op->src.scale,
86603b705cfSriastradh				    &v[8], &v[9]);
86703b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
86803b705cfSriastradh	v[11] =v[5];
86903b705cfSriastradh
87003b705cfSriastradh	v[12] = v[6];
87103b705cfSriastradh	v[13] = dst_y;
87203b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
87303b705cfSriastradh				    op->src.transform, op->src.scale,
87403b705cfSriastradh				    &v[14], &v[15]);
87503b705cfSriastradh	v[16] = v[10];
87603b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
87703b705cfSriastradh}
87803b705cfSriastradh
87903b705cfSriastradhstatic void
88003b705cfSriastradhgen3_emit_composite_texcoord(struct sna *sna,
88103b705cfSriastradh			     const struct sna_composite_channel *channel,
88203b705cfSriastradh			     int16_t x, int16_t y)
88303b705cfSriastradh{
88403b705cfSriastradh	float s = 0, t = 0, w = 1;
88503b705cfSriastradh
88603b705cfSriastradh	switch (channel->u.gen3.type) {
88703b705cfSriastradh	case SHADER_OPACITY:
88803b705cfSriastradh	case SHADER_NONE:
88903b705cfSriastradh	case SHADER_ZERO:
89003b705cfSriastradh	case SHADER_BLACK:
89103b705cfSriastradh	case SHADER_WHITE:
89203b705cfSriastradh	case SHADER_CONSTANT:
89303b705cfSriastradh		break;
89403b705cfSriastradh
89503b705cfSriastradh	case SHADER_LINEAR:
89603b705cfSriastradh	case SHADER_RADIAL:
89703b705cfSriastradh	case SHADER_TEXTURE:
89803b705cfSriastradh		x += channel->offset[0];
89903b705cfSriastradh		y += channel->offset[1];
90003b705cfSriastradh		if (channel->is_affine) {
90103b705cfSriastradh			sna_get_transformed_coordinates(x, y,
90203b705cfSriastradh							channel->transform,
90303b705cfSriastradh							&s, &t);
90403b705cfSriastradh			OUT_VERTEX(s * channel->scale[0]);
90503b705cfSriastradh			OUT_VERTEX(t * channel->scale[1]);
90603b705cfSriastradh		} else {
90703b705cfSriastradh			sna_get_transformed_coordinates_3d(x, y,
90803b705cfSriastradh							   channel->transform,
90903b705cfSriastradh							   &s, &t, &w);
91003b705cfSriastradh			OUT_VERTEX(s * channel->scale[0]);
91103b705cfSriastradh			OUT_VERTEX(t * channel->scale[1]);
91203b705cfSriastradh			OUT_VERTEX(0);
91303b705cfSriastradh			OUT_VERTEX(w);
91403b705cfSriastradh		}
91503b705cfSriastradh		break;
91603b705cfSriastradh	}
91703b705cfSriastradh}
91803b705cfSriastradh
91903b705cfSriastradhstatic void
92003b705cfSriastradhgen3_emit_composite_vertex(struct sna *sna,
92103b705cfSriastradh			   const struct sna_composite_op *op,
92203b705cfSriastradh			   int16_t srcX, int16_t srcY,
92303b705cfSriastradh			   int16_t maskX, int16_t maskY,
92403b705cfSriastradh			   int16_t dstX, int16_t dstY)
92503b705cfSriastradh{
92603b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dstX, dstY);
92703b705cfSriastradh	gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY);
92803b705cfSriastradh	gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY);
92903b705cfSriastradh}
93003b705cfSriastradh
93103b705cfSriastradhfastcall static void
93203b705cfSriastradhgen3_emit_composite_primitive(struct sna *sna,
93303b705cfSriastradh			      const struct sna_composite_op *op,
93403b705cfSriastradh			      const struct sna_composite_rectangles *r)
93503b705cfSriastradh{
93603b705cfSriastradh	gen3_emit_composite_vertex(sna, op,
93703b705cfSriastradh				   r->src.x + r->width,
93803b705cfSriastradh				   r->src.y + r->height,
93903b705cfSriastradh				   r->mask.x + r->width,
94003b705cfSriastradh				   r->mask.y + r->height,
94103b705cfSriastradh				   op->dst.x + r->dst.x + r->width,
94203b705cfSriastradh				   op->dst.y + r->dst.y + r->height);
94303b705cfSriastradh	gen3_emit_composite_vertex(sna, op,
94403b705cfSriastradh				   r->src.x,
94503b705cfSriastradh				   r->src.y + r->height,
94603b705cfSriastradh				   r->mask.x,
94703b705cfSriastradh				   r->mask.y + r->height,
94803b705cfSriastradh				   op->dst.x + r->dst.x,
94903b705cfSriastradh				   op->dst.y + r->dst.y + r->height);
95003b705cfSriastradh	gen3_emit_composite_vertex(sna, op,
95103b705cfSriastradh				   r->src.x,
95203b705cfSriastradh				   r->src.y,
95303b705cfSriastradh				   r->mask.x,
95403b705cfSriastradh				   r->mask.y,
95503b705cfSriastradh				   op->dst.x + r->dst.x,
95603b705cfSriastradh				   op->dst.y + r->dst.y);
95703b705cfSriastradh}
95803b705cfSriastradh
95903b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
96003b705cfSriastradhsse2 fastcall static void
96103b705cfSriastradhgen3_emit_composite_primitive_constant__sse2(struct sna *sna,
96203b705cfSriastradh					     const struct sna_composite_op *op,
96303b705cfSriastradh					     const struct sna_composite_rectangles *r)
96403b705cfSriastradh{
96503b705cfSriastradh	float *v;
96603b705cfSriastradh
96703b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
96803b705cfSriastradh	sna->render.vertex_used += 6;
96903b705cfSriastradh
97003b705cfSriastradh	v[4] = v[2] = r->dst.x + op->dst.x;
97103b705cfSriastradh	v[5] = r->dst.y + op->dst.y;
97203b705cfSriastradh
97303b705cfSriastradh	v[0] = v[2] + r->width;
97403b705cfSriastradh	v[3] = v[1] = v[5] + r->height;
97503b705cfSriastradh
97603b705cfSriastradh}
97703b705cfSriastradh
97803b705cfSriastradhsse2 fastcall static void
97903b705cfSriastradhgen3_emit_composite_boxes_constant__sse2(const struct sna_composite_op *op,
98003b705cfSriastradh					 const BoxRec *box, int nbox,
98103b705cfSriastradh					 float *v)
98203b705cfSriastradh{
98303b705cfSriastradh	do {
98403b705cfSriastradh		v[0] = box->x2;
98503b705cfSriastradh		v[3] = v[1] = box->y2;
98603b705cfSriastradh		v[4] = v[2] = box->x1;
98703b705cfSriastradh		v[5] = box->y1;
98803b705cfSriastradh
98903b705cfSriastradh		box++;
99003b705cfSriastradh		v += 6;
99103b705cfSriastradh	} while (--nbox);
99203b705cfSriastradh}
99303b705cfSriastradh
99403b705cfSriastradhsse2 fastcall static void
99503b705cfSriastradhgen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna,
99603b705cfSriastradh						      const struct sna_composite_op *op,
99703b705cfSriastradh						      const struct sna_composite_rectangles *r)
99803b705cfSriastradh{
99903b705cfSriastradh	int16_t x, y;
100003b705cfSriastradh	float *v;
100103b705cfSriastradh
100203b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
100303b705cfSriastradh	sna->render.vertex_used += 12;
100403b705cfSriastradh
100503b705cfSriastradh	x = r->dst.x + op->dst.x;
100603b705cfSriastradh	y = r->dst.y + op->dst.y;
100703b705cfSriastradh	v[0] = x + r->width;
100803b705cfSriastradh	v[5] = v[1] = y + r->height;
100903b705cfSriastradh	v[8] = v[4] = x;
101003b705cfSriastradh	v[9] = y;
101103b705cfSriastradh
101203b705cfSriastradh	x = r->src.x + op->src.offset[0];
101303b705cfSriastradh	y = r->src.y + op->src.offset[1];
101403b705cfSriastradh	v[2] = x + r->width;
101503b705cfSriastradh	v[7] = v[3] = y + r->height;
101603b705cfSriastradh	v[10] = v[6] = x;
101703b705cfSriastradh	v[11] = y;
101803b705cfSriastradh}
101903b705cfSriastradh
102003b705cfSriastradhsse2 fastcall static void
102103b705cfSriastradhgen3_emit_composite_boxes_identity_gradient__sse2(const struct sna_composite_op *op,
102203b705cfSriastradh						  const BoxRec *box, int nbox,
102303b705cfSriastradh						  float *v)
102403b705cfSriastradh{
102503b705cfSriastradh	do {
102603b705cfSriastradh		v[0] = box->x2;
102703b705cfSriastradh		v[5] = v[1] = box->y2;
102803b705cfSriastradh		v[8] = v[4] = box->x1;
102903b705cfSriastradh		v[9] = box->y1;
103003b705cfSriastradh
103103b705cfSriastradh		v[2] = box->x2 + op->src.offset[0];
103203b705cfSriastradh		v[7] = v[3] = box->y2 + op->src.offset[1];
103303b705cfSriastradh		v[10] = v[6] = box->x1 + op->src.offset[0];
103403b705cfSriastradh		v[11] = box->y1 + op->src.offset[1];
103503b705cfSriastradh
103603b705cfSriastradh		v += 12;
103703b705cfSriastradh		box++;
103803b705cfSriastradh	} while (--nbox);
103903b705cfSriastradh}
104003b705cfSriastradh
104103b705cfSriastradhsse2 fastcall static void
104203b705cfSriastradhgen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna,
104303b705cfSriastradh						    const struct sna_composite_op *op,
104403b705cfSriastradh						    const struct sna_composite_rectangles *r)
104503b705cfSriastradh{
104603b705cfSriastradh	PictTransform *transform = op->src.transform;
104703b705cfSriastradh	int16_t dst_x, dst_y;
104803b705cfSriastradh	int16_t src_x, src_y;
104903b705cfSriastradh	float *v;
105003b705cfSriastradh
105103b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
105203b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
105303b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
105403b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
105503b705cfSriastradh
105603b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
105703b705cfSriastradh	sna->render.vertex_used += 12;
105803b705cfSriastradh
105903b705cfSriastradh	v[0] = dst_x + r->width;
106003b705cfSriastradh	v[1] = dst_y + r->height;
106103b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
106203b705cfSriastradh				    transform, op->src.scale,
106303b705cfSriastradh				    &v[2], &v[3]);
106403b705cfSriastradh
106503b705cfSriastradh	v[4] = dst_x;
106603b705cfSriastradh	v[5] = dst_y + r->height;
106703b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
106803b705cfSriastradh				    transform, op->src.scale,
106903b705cfSriastradh				    &v[6], &v[7]);
107003b705cfSriastradh
107103b705cfSriastradh	v[8] = dst_x;
107203b705cfSriastradh	v[9] = dst_y;
107303b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
107403b705cfSriastradh				    transform, op->src.scale,
107503b705cfSriastradh				    &v[10], &v[11]);
107603b705cfSriastradh}
107703b705cfSriastradh
107803b705cfSriastradhsse2 fastcall static void
107903b705cfSriastradhgen3_emit_composite_boxes_affine_gradient__sse2(const struct sna_composite_op *op,
108003b705cfSriastradh						const BoxRec *box, int nbox,
108103b705cfSriastradh						float *v)
108203b705cfSriastradh{
108303b705cfSriastradh	const PictTransform *transform = op->src.transform;
108403b705cfSriastradh
108503b705cfSriastradh	do {
108603b705cfSriastradh		v[0] = box->x2;
108703b705cfSriastradh		v[1] = box->y2;
108803b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
108903b705cfSriastradh					    box->y2 + op->src.offset[1],
109003b705cfSriastradh					    transform, op->src.scale,
109103b705cfSriastradh					    &v[2], &v[3]);
109203b705cfSriastradh
109303b705cfSriastradh		v[4] = box->x1;
109403b705cfSriastradh		v[5] = box->y2;
109503b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
109603b705cfSriastradh					    box->y2 + op->src.offset[1],
109703b705cfSriastradh					    transform, op->src.scale,
109803b705cfSriastradh					    &v[6], &v[7]);
109903b705cfSriastradh
110003b705cfSriastradh		v[8] = box->x1;
110103b705cfSriastradh		v[9] = box->y1;
110203b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
110303b705cfSriastradh					    box->y1 + op->src.offset[1],
110403b705cfSriastradh					    transform, op->src.scale,
110503b705cfSriastradh					    &v[10], &v[11]);
110603b705cfSriastradh
110703b705cfSriastradh		box++;
110803b705cfSriastradh		v += 12;
110903b705cfSriastradh	} while (--nbox);
111003b705cfSriastradh}
111103b705cfSriastradh
111203b705cfSriastradhsse2 fastcall static void
111303b705cfSriastradhgen3_emit_composite_primitive_identity_source__sse2(struct sna *sna,
111403b705cfSriastradh						    const struct sna_composite_op *op,
111503b705cfSriastradh						    const struct sna_composite_rectangles *r)
111603b705cfSriastradh{
111703b705cfSriastradh	float w = r->width;
111803b705cfSriastradh	float h = r->height;
111903b705cfSriastradh	float *v;
112003b705cfSriastradh
112103b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
112203b705cfSriastradh	sna->render.vertex_used += 12;
112303b705cfSriastradh
112403b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
112503b705cfSriastradh	v[0] = v[4] + w;
112603b705cfSriastradh
112703b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
112803b705cfSriastradh	v[5] = v[1] = v[9] + h;
112903b705cfSriastradh
113003b705cfSriastradh	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
113103b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
113203b705cfSriastradh
113303b705cfSriastradh	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
113403b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
113503b705cfSriastradh}
113603b705cfSriastradh
113703b705cfSriastradhsse2 fastcall static void
113803b705cfSriastradhgen3_emit_composite_boxes_identity_source__sse2(const struct sna_composite_op *op,
113903b705cfSriastradh						const BoxRec *box, int nbox,
114003b705cfSriastradh						float *v)
114103b705cfSriastradh{
114203b705cfSriastradh	do {
114303b705cfSriastradh		v[0] = box->x2 + op->dst.x;
114403b705cfSriastradh		v[8] = v[4] = box->x1 + op->dst.x;
114503b705cfSriastradh		v[5] = v[1] = box->y2 + op->dst.y;
114603b705cfSriastradh		v[9] = box->y1 + op->dst.y;
114703b705cfSriastradh
114803b705cfSriastradh		v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
114903b705cfSriastradh		v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
115003b705cfSriastradh
115103b705cfSriastradh		v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
115203b705cfSriastradh		v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
115303b705cfSriastradh
115403b705cfSriastradh		v += 12;
115503b705cfSriastradh		box++;
115603b705cfSriastradh	} while (--nbox);
115703b705cfSriastradh}
115803b705cfSriastradh
115903b705cfSriastradhsse2 fastcall static void
116003b705cfSriastradhgen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna,
116103b705cfSriastradh							      const struct sna_composite_op *op,
116203b705cfSriastradh							      const struct sna_composite_rectangles *r)
116303b705cfSriastradh{
116403b705cfSriastradh	float w = r->width;
116503b705cfSriastradh	float h = r->height;
116603b705cfSriastradh	float *v;
116703b705cfSriastradh
116803b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
116903b705cfSriastradh	sna->render.vertex_used += 12;
117003b705cfSriastradh
117103b705cfSriastradh	v[8] = v[4] = r->dst.x;
117203b705cfSriastradh	v[9] = r->dst.y;
117303b705cfSriastradh
117403b705cfSriastradh	v[0] = v[4] + w;
117503b705cfSriastradh	v[5] = v[1] = v[9] + h;
117603b705cfSriastradh
117703b705cfSriastradh	v[10] = v[6] = r->src.x * op->src.scale[0];
117803b705cfSriastradh	v[11] = r->src.y * op->src.scale[1];
117903b705cfSriastradh
118003b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
118103b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
118203b705cfSriastradh}
118303b705cfSriastradh
118403b705cfSriastradhsse2 fastcall static void
118503b705cfSriastradhgen3_emit_composite_boxes_identity_source_no_offset__sse2(const struct sna_composite_op *op,
118603b705cfSriastradh							  const BoxRec *box, int nbox,
118703b705cfSriastradh							  float *v)
118803b705cfSriastradh{
118903b705cfSriastradh	do {
119003b705cfSriastradh		v[0] = box->x2;
119103b705cfSriastradh		v[8] = v[4] = box->x1;
119203b705cfSriastradh		v[5] = v[1] = box->y2;
119303b705cfSriastradh		v[9] = box->y1;
119403b705cfSriastradh
119503b705cfSriastradh		v[10] = v[6] = box->x1 * op->src.scale[0];
119603b705cfSriastradh		v[2] = box->x2 * op->src.scale[0];
119703b705cfSriastradh
119803b705cfSriastradh		v[11] = box->y1 * op->src.scale[1];
119903b705cfSriastradh		v[7] = v[3] = box->y2 * op->src.scale[1];
120003b705cfSriastradh
120103b705cfSriastradh		v += 12;
120203b705cfSriastradh		box++;
120303b705cfSriastradh	} while (--nbox);
120403b705cfSriastradh}
120503b705cfSriastradh
120603b705cfSriastradhsse2 fastcall static void
120703b705cfSriastradhgen3_emit_composite_primitive_affine_source__sse2(struct sna *sna,
120803b705cfSriastradh						  const struct sna_composite_op *op,
120903b705cfSriastradh						  const struct sna_composite_rectangles *r)
121003b705cfSriastradh{
121103b705cfSriastradh	PictTransform *transform = op->src.transform;
121203b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
121303b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
121403b705cfSriastradh	int src_x = r->src.x + (int)op->src.offset[0];
121503b705cfSriastradh	int src_y = r->src.y + (int)op->src.offset[1];
121603b705cfSriastradh	float *v;
121703b705cfSriastradh
121803b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
121903b705cfSriastradh	sna->render.vertex_used += 12;
122003b705cfSriastradh
122103b705cfSriastradh	v[0] = dst_x + r->width;
122203b705cfSriastradh	v[5] = v[1] = dst_y + r->height;
122303b705cfSriastradh	v[8] = v[4] = dst_x;
122403b705cfSriastradh	v[9] = dst_y;
122503b705cfSriastradh
122603b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
122703b705cfSriastradh				    transform, op->src.scale,
122803b705cfSriastradh				    &v[2], &v[3]);
122903b705cfSriastradh
123003b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
123103b705cfSriastradh				    transform, op->src.scale,
123203b705cfSriastradh				    &v[6], &v[7]);
123303b705cfSriastradh
123403b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
123503b705cfSriastradh				    transform, op->src.scale,
123603b705cfSriastradh				    &v[10], &v[11]);
123703b705cfSriastradh}
123803b705cfSriastradh
123903b705cfSriastradhsse2 fastcall static void
124003b705cfSriastradhgen3_emit_composite_boxes_affine_source__sse2(const struct sna_composite_op *op,
124103b705cfSriastradh					      const BoxRec *box, int nbox,
124203b705cfSriastradh					      float *v)
124303b705cfSriastradh{
124403b705cfSriastradh	const PictTransform *transform = op->src.transform;
124503b705cfSriastradh
124603b705cfSriastradh	do {
124703b705cfSriastradh		v[0] = box->x2;
124803b705cfSriastradh		v[5] = v[1] = box->y2;
124903b705cfSriastradh		v[8] = v[4] = box->x1;
125003b705cfSriastradh		v[9] = box->y1;
125103b705cfSriastradh
125203b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
125303b705cfSriastradh					    box->y2 + op->src.offset[1],
125403b705cfSriastradh					    transform, op->src.scale,
125503b705cfSriastradh					    &v[2], &v[3]);
125603b705cfSriastradh
125703b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
125803b705cfSriastradh					    box->y2 + op->src.offset[1],
125903b705cfSriastradh					    transform, op->src.scale,
126003b705cfSriastradh					    &v[6], &v[7]);
126103b705cfSriastradh
126203b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
126303b705cfSriastradh					    box->y1 + op->src.offset[1],
126403b705cfSriastradh					    transform, op->src.scale,
126503b705cfSriastradh					    &v[10], &v[11]);
126603b705cfSriastradh
126703b705cfSriastradh		v += 12;
126803b705cfSriastradh		box++;
126903b705cfSriastradh	} while (--nbox);
127003b705cfSriastradh}
127103b705cfSriastradh
127203b705cfSriastradhsse2 fastcall static void
127303b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna,
127403b705cfSriastradh							   const struct sna_composite_op *op,
127503b705cfSriastradh							   const struct sna_composite_rectangles *r)
127603b705cfSriastradh{
127703b705cfSriastradh	float w = r->width;
127803b705cfSriastradh	float h = r->height;
127903b705cfSriastradh	float *v;
128003b705cfSriastradh
128103b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
128203b705cfSriastradh	sna->render.vertex_used += 12;
128303b705cfSriastradh
128403b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
128503b705cfSriastradh	v[0] = v[4] + w;
128603b705cfSriastradh
128703b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
128803b705cfSriastradh	v[5] = v[1] = v[9] + h;
128903b705cfSriastradh
129003b705cfSriastradh	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
129103b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
129203b705cfSriastradh
129303b705cfSriastradh	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
129403b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
129503b705cfSriastradh}
129603b705cfSriastradh
129703b705cfSriastradhsse2 fastcall static void
129803b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna *sna,
129903b705cfSriastradh								     const struct sna_composite_op *op,
130003b705cfSriastradh								     const struct sna_composite_rectangles *r)
130103b705cfSriastradh{
130203b705cfSriastradh	float w = r->width;
130303b705cfSriastradh	float h = r->height;
130403b705cfSriastradh	float *v;
130503b705cfSriastradh
130603b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
130703b705cfSriastradh	sna->render.vertex_used += 12;
130803b705cfSriastradh
130903b705cfSriastradh	v[8] = v[4] = r->dst.x;
131003b705cfSriastradh	v[9] = r->dst.y;
131103b705cfSriastradh
131203b705cfSriastradh	v[0] = v[4] + w;
131303b705cfSriastradh	v[5] = v[1] = v[9] + h;
131403b705cfSriastradh
131503b705cfSriastradh	v[10] = v[6] = r->mask.x * op->mask.scale[0];
131603b705cfSriastradh	v[11] = r->mask.y * op->mask.scale[1];
131703b705cfSriastradh
131803b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
131903b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
132003b705cfSriastradh}
132103b705cfSriastradh
132203b705cfSriastradhsse2 fastcall static void
132303b705cfSriastradhgen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna,
132403b705cfSriastradh							 const struct sna_composite_op *op,
132503b705cfSriastradh							 const struct sna_composite_rectangles *r)
132603b705cfSriastradh{
132703b705cfSriastradh	float dst_x, dst_y;
132803b705cfSriastradh	float src_x, src_y;
132903b705cfSriastradh	float msk_x, msk_y;
133003b705cfSriastradh	float w, h;
133103b705cfSriastradh	float *v;
133203b705cfSriastradh
133303b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
133403b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
133503b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
133603b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
133703b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
133803b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
133903b705cfSriastradh	w = r->width;
134003b705cfSriastradh	h = r->height;
134103b705cfSriastradh
134203b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
134303b705cfSriastradh	sna->render.vertex_used += 18;
134403b705cfSriastradh
134503b705cfSriastradh	v[0] = dst_x + w;
134603b705cfSriastradh	v[1] = dst_y + h;
134703b705cfSriastradh	v[2] = (src_x + w) * op->src.scale[0];
134803b705cfSriastradh	v[3] = (src_y + h) * op->src.scale[1];
134903b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
135003b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
135103b705cfSriastradh
135203b705cfSriastradh	v[6] = dst_x;
135303b705cfSriastradh	v[7] = v[1];
135403b705cfSriastradh	v[8] = src_x * op->src.scale[0];
135503b705cfSriastradh	v[9] = v[3];
135603b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
135703b705cfSriastradh	v[11] =v[5];
135803b705cfSriastradh
135903b705cfSriastradh	v[12] = v[6];
136003b705cfSriastradh	v[13] = dst_y;
136103b705cfSriastradh	v[14] = v[8];
136203b705cfSriastradh	v[15] = src_y * op->src.scale[1];
136303b705cfSriastradh	v[16] = v[10];
136403b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
136503b705cfSriastradh}
136603b705cfSriastradh
136703b705cfSriastradhsse2 fastcall static void
136803b705cfSriastradhgen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna,
136903b705cfSriastradh						       const struct sna_composite_op *op,
137003b705cfSriastradh						       const struct sna_composite_rectangles *r)
137103b705cfSriastradh{
137203b705cfSriastradh	int16_t src_x, src_y;
137303b705cfSriastradh	float dst_x, dst_y;
137403b705cfSriastradh	float msk_x, msk_y;
137503b705cfSriastradh	float w, h;
137603b705cfSriastradh	float *v;
137703b705cfSriastradh
137803b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
137903b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
138003b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
138103b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
138203b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
138303b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
138403b705cfSriastradh	w = r->width;
138503b705cfSriastradh	h = r->height;
138603b705cfSriastradh
138703b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
138803b705cfSriastradh	sna->render.vertex_used += 18;
138903b705cfSriastradh
139003b705cfSriastradh	v[0] = dst_x + w;
139103b705cfSriastradh	v[1] = dst_y + h;
139203b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
139303b705cfSriastradh				    op->src.transform, op->src.scale,
139403b705cfSriastradh				    &v[2], &v[3]);
139503b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
139603b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
139703b705cfSriastradh
139803b705cfSriastradh	v[6] = dst_x;
139903b705cfSriastradh	v[7] = v[1];
140003b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
140103b705cfSriastradh				    op->src.transform, op->src.scale,
140203b705cfSriastradh				    &v[8], &v[9]);
140303b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
140403b705cfSriastradh	v[11] =v[5];
140503b705cfSriastradh
140603b705cfSriastradh	v[12] = v[6];
140703b705cfSriastradh	v[13] = dst_y;
140803b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
140903b705cfSriastradh				    op->src.transform, op->src.scale,
141003b705cfSriastradh				    &v[14], &v[15]);
141103b705cfSriastradh	v[16] = v[10];
141203b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
141303b705cfSriastradh}
141403b705cfSriastradh#endif
141503b705cfSriastradh
141603b705cfSriastradhstatic inline void
141703b705cfSriastradhgen3_2d_perspective(struct sna *sna, int in, int out)
141803b705cfSriastradh{
141903b705cfSriastradh	gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
142003b705cfSriastradh	gen3_fs_mul(out,
142103b705cfSriastradh		    gen3_fs_operand(in, X, Y, ZERO, ONE),
142203b705cfSriastradh		    gen3_fs_operand_reg(out));
142303b705cfSriastradh}
142403b705cfSriastradh
142503b705cfSriastradhstatic inline void
142603b705cfSriastradhgen3_linear_coord(struct sna *sna,
142703b705cfSriastradh		  const struct sna_composite_channel *channel,
142803b705cfSriastradh		  int in, int out)
142903b705cfSriastradh{
143003b705cfSriastradh	int c = channel->u.gen3.constants;
143103b705cfSriastradh
143203b705cfSriastradh	if (!channel->is_affine) {
143303b705cfSriastradh		gen3_2d_perspective(sna, in, FS_U0);
143403b705cfSriastradh		in = FS_U0;
143503b705cfSriastradh	}
143603b705cfSriastradh
143703b705cfSriastradh	gen3_fs_mov(out, gen3_fs_operand_zero());
143803b705cfSriastradh	gen3_fs_dp3(out, MASK_X,
143903b705cfSriastradh		    gen3_fs_operand(in, X, Y, ONE, ZERO),
144003b705cfSriastradh		    gen3_fs_operand_reg(c));
144103b705cfSriastradh}
144203b705cfSriastradh
144303b705cfSriastradhstatic void
144403b705cfSriastradhgen3_radial_coord(struct sna *sna,
144503b705cfSriastradh		  const struct sna_composite_channel *channel,
144603b705cfSriastradh		  int in, int out)
144703b705cfSriastradh{
144803b705cfSriastradh	int c = channel->u.gen3.constants;
144903b705cfSriastradh
145003b705cfSriastradh	if (!channel->is_affine) {
145103b705cfSriastradh		gen3_2d_perspective(sna, in, FS_U0);
145203b705cfSriastradh		in = FS_U0;
145303b705cfSriastradh	}
145403b705cfSriastradh
145503b705cfSriastradh	switch (channel->u.gen3.mode) {
145603b705cfSriastradh	case RADIAL_ONE:
145703b705cfSriastradh		/*
145803b705cfSriastradh		   pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
145903b705cfSriastradh		   r² = pdx*pdx + pdy*pdy
146003b705cfSriastradh		   t = r²/sqrt(r²) - r1/dr;
146103b705cfSriastradh		   */
146203b705cfSriastradh		gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
146303b705cfSriastradh			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
146403b705cfSriastradh			    gen3_fs_operand(c, Z, Z, ZERO, ZERO),
146503b705cfSriastradh			    gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
146603b705cfSriastradh		gen3_fs_dp2add(FS_U0, MASK_X,
146703b705cfSriastradh			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
146803b705cfSriastradh			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
146903b705cfSriastradh			       gen3_fs_operand_zero());
147003b705cfSriastradh		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
147103b705cfSriastradh		gen3_fs_mad(out, 0,
147203b705cfSriastradh			    gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
147303b705cfSriastradh			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
147403b705cfSriastradh			    gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
147503b705cfSriastradh		break;
147603b705cfSriastradh
147703b705cfSriastradh	case RADIAL_TWO:
147803b705cfSriastradh		/*
147903b705cfSriastradh		   pdx = x - c1x, pdy = y - c1y;
148003b705cfSriastradh		   A = dx² + dy² - dr²
148103b705cfSriastradh		   B = -2*(pdx*dx + pdy*dy + r1*dr);
148203b705cfSriastradh		   C = pdx² + pdy² - r1²;
148303b705cfSriastradh		   det = B*B - 4*A*C;
148403b705cfSriastradh		   t = (-B + sqrt (det)) / (2 * A)
148503b705cfSriastradh		   */
148603b705cfSriastradh
148703b705cfSriastradh		/* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
148803b705cfSriastradh		gen3_fs_add(FS_U0,
148903b705cfSriastradh			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
149003b705cfSriastradh			    gen3_fs_operand(c, X, Y, Z, ZERO));
149103b705cfSriastradh		/* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
149203b705cfSriastradh		gen3_fs_dp3(FS_U0, MASK_W,
149303b705cfSriastradh			    gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
149403b705cfSriastradh			    gen3_fs_operand(c+1, X, Y, Z, ZERO));
149503b705cfSriastradh		/* u1.x = pdx² + pdy² - r1²; [C] */
149603b705cfSriastradh		gen3_fs_dp3(FS_U1, MASK_X,
149703b705cfSriastradh			    gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
149803b705cfSriastradh			    gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
149903b705cfSriastradh		/* u1.x = C, u1.y = B, u1.z=-4*A; */
150003b705cfSriastradh		gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
150103b705cfSriastradh		gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
150203b705cfSriastradh		/* u1.x = B² - 4*A*C */
150303b705cfSriastradh		gen3_fs_dp2add(FS_U1, MASK_X,
150403b705cfSriastradh			       gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
150503b705cfSriastradh			       gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
150603b705cfSriastradh			       gen3_fs_operand_zero());
150703b705cfSriastradh		/* out.x = -B + sqrt (B² - 4*A*C), */
150803b705cfSriastradh		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
150903b705cfSriastradh		gen3_fs_mad(out, MASK_X,
151003b705cfSriastradh			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
151103b705cfSriastradh			    gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
151203b705cfSriastradh			    gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
151303b705cfSriastradh		/* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */
151403b705cfSriastradh		gen3_fs_mul(out,
151503b705cfSriastradh			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
151603b705cfSriastradh			    gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
151703b705cfSriastradh		break;
151803b705cfSriastradh	}
151903b705cfSriastradh}
152003b705cfSriastradh
152103b705cfSriastradhstatic void
152203b705cfSriastradhgen3_composite_emit_shader(struct sna *sna,
152303b705cfSriastradh			   const struct sna_composite_op *op,
152403b705cfSriastradh			   uint8_t blend)
152503b705cfSriastradh{
152603b705cfSriastradh	bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
152703b705cfSriastradh	const struct sna_composite_channel *src, *mask;
152803b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
152903b705cfSriastradh	uint32_t shader_offset, id;
153003b705cfSriastradh	int src_reg, mask_reg;
153103b705cfSriastradh	int t, length;
153203b705cfSriastradh
153303b705cfSriastradh	src = &op->src;
153403b705cfSriastradh	mask = &op->mask;
153503b705cfSriastradh	if (mask->u.gen3.type == SHADER_NONE)
153603b705cfSriastradh		mask = NULL;
153703b705cfSriastradh
153803b705cfSriastradh	id = (src->u.gen3.type |
153903b705cfSriastradh	      src->is_affine << 4 |
154003b705cfSriastradh	      src->alpha_fixup << 5 |
154103b705cfSriastradh	      src->rb_reversed << 6);
154203b705cfSriastradh	if (mask) {
154303b705cfSriastradh		id |= (mask->u.gen3.type << 8 |
154403b705cfSriastradh		       mask->is_affine << 12 |
154503b705cfSriastradh		       gen3_blend_op[blend].src_alpha << 13 |
154603b705cfSriastradh		       op->has_component_alpha << 14 |
154703b705cfSriastradh		       mask->alpha_fixup << 15 |
154803b705cfSriastradh		       mask->rb_reversed << 16);
154903b705cfSriastradh	}
155003b705cfSriastradh	id |= dst_is_alpha << 24;
155103b705cfSriastradh	id |= op->rb_reversed << 25;
155203b705cfSriastradh
155303b705cfSriastradh	if (id == state->last_shader)
155403b705cfSriastradh		return;
155503b705cfSriastradh
155603b705cfSriastradh	state->last_shader = id;
155703b705cfSriastradh
155803b705cfSriastradh	shader_offset = sna->kgem.nbatch++;
155903b705cfSriastradh	t = 0;
156003b705cfSriastradh	switch (src->u.gen3.type) {
156103b705cfSriastradh	case SHADER_NONE:
156203b705cfSriastradh	case SHADER_OPACITY:
156303b705cfSriastradh		assert(0);
156403b705cfSriastradh	case SHADER_ZERO:
156503b705cfSriastradh	case SHADER_BLACK:
156603b705cfSriastradh	case SHADER_WHITE:
156703b705cfSriastradh		break;
156803b705cfSriastradh	case SHADER_CONSTANT:
156903b705cfSriastradh		gen3_fs_dcl(FS_T8);
157003b705cfSriastradh		src_reg = FS_T8;
157103b705cfSriastradh		break;
157203b705cfSriastradh	case SHADER_TEXTURE:
157303b705cfSriastradh	case SHADER_RADIAL:
157403b705cfSriastradh	case SHADER_LINEAR:
157503b705cfSriastradh		gen3_fs_dcl(FS_S0);
157603b705cfSriastradh		gen3_fs_dcl(FS_T0);
157703b705cfSriastradh		t++;
157803b705cfSriastradh		break;
157903b705cfSriastradh	}
158003b705cfSriastradh
158103b705cfSriastradh	if (mask == NULL) {
158203b705cfSriastradh		switch (src->u.gen3.type) {
158303b705cfSriastradh		case SHADER_ZERO:
158403b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
158503b705cfSriastradh			goto done;
158603b705cfSriastradh		case SHADER_BLACK:
158703b705cfSriastradh			if (dst_is_alpha)
158803b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand_one());
158903b705cfSriastradh			else
159003b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
159103b705cfSriastradh			goto done;
159203b705cfSriastradh		case SHADER_WHITE:
159303b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
159403b705cfSriastradh			goto done;
159503b705cfSriastradh		}
159603b705cfSriastradh		if (src->alpha_fixup && dst_is_alpha) {
159703b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
159803b705cfSriastradh			goto done;
159903b705cfSriastradh		}
160003b705cfSriastradh		/* No mask, so load directly to output color */
160103b705cfSriastradh		if (src->u.gen3.type != SHADER_CONSTANT) {
160203b705cfSriastradh			if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
160303b705cfSriastradh				src_reg = FS_R0;
160403b705cfSriastradh			else
160503b705cfSriastradh				src_reg = FS_OC;
160603b705cfSriastradh		}
160703b705cfSriastradh		switch (src->u.gen3.type) {
160803b705cfSriastradh		case SHADER_LINEAR:
160903b705cfSriastradh			gen3_linear_coord(sna, src, FS_T0, FS_R0);
161003b705cfSriastradh			gen3_fs_texld(src_reg, FS_S0, FS_R0);
161103b705cfSriastradh			break;
161203b705cfSriastradh
161303b705cfSriastradh		case SHADER_RADIAL:
161403b705cfSriastradh			gen3_radial_coord(sna, src, FS_T0, FS_R0);
161503b705cfSriastradh			gen3_fs_texld(src_reg, FS_S0, FS_R0);
161603b705cfSriastradh			break;
161703b705cfSriastradh
161803b705cfSriastradh		case SHADER_TEXTURE:
161903b705cfSriastradh			if (src->is_affine)
162003b705cfSriastradh				gen3_fs_texld(src_reg, FS_S0, FS_T0);
162103b705cfSriastradh			else
162203b705cfSriastradh				gen3_fs_texldp(src_reg, FS_S0, FS_T0);
162303b705cfSriastradh			break;
162403b705cfSriastradh
162503b705cfSriastradh		case SHADER_NONE:
162603b705cfSriastradh		case SHADER_WHITE:
162703b705cfSriastradh		case SHADER_BLACK:
162803b705cfSriastradh		case SHADER_ZERO:
162903b705cfSriastradh			assert(0);
163003b705cfSriastradh		case SHADER_CONSTANT:
163103b705cfSriastradh			break;
163203b705cfSriastradh		}
163303b705cfSriastradh
163403b705cfSriastradh		if (src_reg != FS_OC) {
163503b705cfSriastradh			if (src->alpha_fixup)
163603b705cfSriastradh				gen3_fs_mov(FS_OC,
163703b705cfSriastradh					    src->rb_reversed ^ op->rb_reversed ?
163803b705cfSriastradh					    gen3_fs_operand(src_reg, Z, Y, X, ONE) :
163903b705cfSriastradh					    gen3_fs_operand(src_reg, X, Y, Z, ONE));
164003b705cfSriastradh			else if (dst_is_alpha)
164103b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
164203b705cfSriastradh			else if (src->rb_reversed ^ op->rb_reversed)
164303b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
164403b705cfSriastradh			else
164503b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
164603b705cfSriastradh		} else if (src->alpha_fixup)
164703b705cfSriastradh			gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
164803b705cfSriastradh	} else {
164903b705cfSriastradh		int out_reg = FS_OC;
165003b705cfSriastradh		if (op->rb_reversed)
165103b705cfSriastradh			out_reg = FS_U0;
165203b705cfSriastradh
165303b705cfSriastradh		switch (mask->u.gen3.type) {
165403b705cfSriastradh		case SHADER_CONSTANT:
165503b705cfSriastradh			gen3_fs_dcl(FS_T9);
165603b705cfSriastradh			mask_reg = FS_T9;
165703b705cfSriastradh			break;
165803b705cfSriastradh		case SHADER_TEXTURE:
165903b705cfSriastradh		case SHADER_LINEAR:
166003b705cfSriastradh		case SHADER_RADIAL:
166103b705cfSriastradh			gen3_fs_dcl(FS_S0 + t);
166203b705cfSriastradh			/* fall through */
166303b705cfSriastradh		case SHADER_OPACITY:
166403b705cfSriastradh			gen3_fs_dcl(FS_T0 + t);
166503b705cfSriastradh			break;
166603b705cfSriastradh		case SHADER_ZERO:
166703b705cfSriastradh		case SHADER_BLACK:
166803b705cfSriastradh			assert(0);
166903b705cfSriastradh		case SHADER_NONE:
167003b705cfSriastradh		case SHADER_WHITE:
167103b705cfSriastradh			break;
167203b705cfSriastradh		}
167303b705cfSriastradh
167403b705cfSriastradh		t = 0;
167503b705cfSriastradh		switch (src->u.gen3.type) {
167603b705cfSriastradh		case SHADER_LINEAR:
167703b705cfSriastradh			gen3_linear_coord(sna, src, FS_T0, FS_R0);
167803b705cfSriastradh			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
167903b705cfSriastradh			src_reg = FS_R0;
168003b705cfSriastradh			t++;
168103b705cfSriastradh			break;
168203b705cfSriastradh
168303b705cfSriastradh		case SHADER_RADIAL:
168403b705cfSriastradh			gen3_radial_coord(sna, src, FS_T0, FS_R0);
168503b705cfSriastradh			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
168603b705cfSriastradh			src_reg = FS_R0;
168703b705cfSriastradh			t++;
168803b705cfSriastradh			break;
168903b705cfSriastradh
169003b705cfSriastradh		case SHADER_TEXTURE:
169103b705cfSriastradh			if (src->is_affine)
169203b705cfSriastradh				gen3_fs_texld(FS_R0, FS_S0, FS_T0);
169303b705cfSriastradh			else
169403b705cfSriastradh				gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
169503b705cfSriastradh			src_reg = FS_R0;
169603b705cfSriastradh			t++;
169703b705cfSriastradh			break;
169803b705cfSriastradh
169903b705cfSriastradh		case SHADER_CONSTANT:
170003b705cfSriastradh		case SHADER_NONE:
170103b705cfSriastradh		case SHADER_ZERO:
170203b705cfSriastradh		case SHADER_BLACK:
170303b705cfSriastradh		case SHADER_WHITE:
170403b705cfSriastradh			break;
170503b705cfSriastradh		}
170603b705cfSriastradh		if (src->alpha_fixup)
170703b705cfSriastradh			gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
170803b705cfSriastradh		if (src->rb_reversed)
170903b705cfSriastradh			gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
171003b705cfSriastradh
171103b705cfSriastradh		switch (mask->u.gen3.type) {
171203b705cfSriastradh		case SHADER_LINEAR:
171303b705cfSriastradh			gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
171403b705cfSriastradh			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
171503b705cfSriastradh			mask_reg = FS_R1;
171603b705cfSriastradh			break;
171703b705cfSriastradh
171803b705cfSriastradh		case SHADER_RADIAL:
171903b705cfSriastradh			gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
172003b705cfSriastradh			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
172103b705cfSriastradh			mask_reg = FS_R1;
172203b705cfSriastradh			break;
172303b705cfSriastradh
172403b705cfSriastradh		case SHADER_TEXTURE:
172503b705cfSriastradh			if (mask->is_affine)
172603b705cfSriastradh				gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
172703b705cfSriastradh			else
172803b705cfSriastradh				gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
172903b705cfSriastradh			mask_reg = FS_R1;
173003b705cfSriastradh			break;
173103b705cfSriastradh
173203b705cfSriastradh		case SHADER_OPACITY:
173303b705cfSriastradh			switch (src->u.gen3.type) {
173403b705cfSriastradh			case SHADER_BLACK:
173503b705cfSriastradh			case SHADER_WHITE:
173603b705cfSriastradh				if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
173703b705cfSriastradh					gen3_fs_mov(out_reg,
173803b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
173903b705cfSriastradh				} else {
174003b705cfSriastradh					gen3_fs_mov(out_reg,
174103b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
174203b705cfSriastradh				}
174303b705cfSriastradh				break;
174403b705cfSriastradh			default:
174503b705cfSriastradh				if (dst_is_alpha) {
174603b705cfSriastradh					gen3_fs_mul(out_reg,
174703b705cfSriastradh						    gen3_fs_operand(src_reg, W, W, W, W),
174803b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
174903b705cfSriastradh				} else {
175003b705cfSriastradh					gen3_fs_mul(out_reg,
175103b705cfSriastradh						    gen3_fs_operand(src_reg, X, Y, Z, W),
175203b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
175303b705cfSriastradh				}
175403b705cfSriastradh			}
175503b705cfSriastradh			goto mask_done;
175603b705cfSriastradh
175703b705cfSriastradh		case SHADER_CONSTANT:
175803b705cfSriastradh		case SHADER_ZERO:
175903b705cfSriastradh		case SHADER_BLACK:
176003b705cfSriastradh		case SHADER_WHITE:
176103b705cfSriastradh		case SHADER_NONE:
176203b705cfSriastradh			break;
176303b705cfSriastradh		}
176403b705cfSriastradh		if (mask->alpha_fixup)
176503b705cfSriastradh			gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
176603b705cfSriastradh		if (mask->rb_reversed)
176703b705cfSriastradh			gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
176803b705cfSriastradh
176903b705cfSriastradh		if (dst_is_alpha) {
177003b705cfSriastradh			switch (src->u.gen3.type) {
177103b705cfSriastradh			case SHADER_BLACK:
177203b705cfSriastradh			case SHADER_WHITE:
177303b705cfSriastradh				gen3_fs_mov(out_reg,
177403b705cfSriastradh					    gen3_fs_operand(mask_reg, W, W, W, W));
177503b705cfSriastradh				break;
177603b705cfSriastradh			default:
177703b705cfSriastradh				gen3_fs_mul(out_reg,
177803b705cfSriastradh					    gen3_fs_operand(src_reg, W, W, W, W),
177903b705cfSriastradh					    gen3_fs_operand(mask_reg, W, W, W, W));
178003b705cfSriastradh				break;
178103b705cfSriastradh			}
178203b705cfSriastradh		} else {
178303b705cfSriastradh			/* If component alpha is active in the mask and the blend
178403b705cfSriastradh			 * operation uses the source alpha, then we know we don't
178503b705cfSriastradh			 * need the source value (otherwise we would have hit a
178603b705cfSriastradh			 * fallback earlier), so we provide the source alpha (src.A *
178703b705cfSriastradh			 * mask.X) as output color.
178803b705cfSriastradh			 * Conversely, if CA is set and we don't need the source alpha,
178903b705cfSriastradh			 * then we produce the source value (src.X * mask.X) and the
179003b705cfSriastradh			 * source alpha is unused.  Otherwise, we provide the non-CA
179103b705cfSriastradh			 * source value (src.X * mask.A).
179203b705cfSriastradh			 */
179303b705cfSriastradh			if (op->has_component_alpha) {
179403b705cfSriastradh				switch (src->u.gen3.type) {
179503b705cfSriastradh				case SHADER_BLACK:
179603b705cfSriastradh					if (gen3_blend_op[blend].src_alpha)
179703b705cfSriastradh						gen3_fs_mov(out_reg,
179803b705cfSriastradh							    gen3_fs_operand_reg(mask_reg));
179903b705cfSriastradh					else
180003b705cfSriastradh						gen3_fs_mov(out_reg,
180103b705cfSriastradh							    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
180203b705cfSriastradh					break;
180303b705cfSriastradh				case SHADER_WHITE:
180403b705cfSriastradh					gen3_fs_mov(out_reg,
180503b705cfSriastradh						    gen3_fs_operand_reg(mask_reg));
180603b705cfSriastradh					break;
180703b705cfSriastradh				default:
180803b705cfSriastradh					if (gen3_blend_op[blend].src_alpha)
180903b705cfSriastradh						gen3_fs_mul(out_reg,
181003b705cfSriastradh							    gen3_fs_operand(src_reg, W, W, W, W),
181103b705cfSriastradh							    gen3_fs_operand_reg(mask_reg));
181203b705cfSriastradh					else
181303b705cfSriastradh						gen3_fs_mul(out_reg,
181403b705cfSriastradh							    gen3_fs_operand_reg(src_reg),
181503b705cfSriastradh							    gen3_fs_operand_reg(mask_reg));
181603b705cfSriastradh					break;
181703b705cfSriastradh				}
181803b705cfSriastradh			} else {
181903b705cfSriastradh				switch (src->u.gen3.type) {
182003b705cfSriastradh				case SHADER_WHITE:
182103b705cfSriastradh					gen3_fs_mov(out_reg,
182203b705cfSriastradh						    gen3_fs_operand(mask_reg, W, W, W, W));
182303b705cfSriastradh					break;
182403b705cfSriastradh				case SHADER_BLACK:
182503b705cfSriastradh					gen3_fs_mov(out_reg,
182603b705cfSriastradh						    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
182703b705cfSriastradh					break;
182803b705cfSriastradh				default:
182903b705cfSriastradh					gen3_fs_mul(out_reg,
183003b705cfSriastradh						    gen3_fs_operand_reg(src_reg),
183103b705cfSriastradh						    gen3_fs_operand(mask_reg, W, W, W, W));
183203b705cfSriastradh					break;
183303b705cfSriastradh				}
183403b705cfSriastradh			}
183503b705cfSriastradh		}
183603b705cfSriastradhmask_done:
183703b705cfSriastradh		if (op->rb_reversed)
183803b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
183903b705cfSriastradh	}
184003b705cfSriastradh
184103b705cfSriastradhdone:
184203b705cfSriastradh	length = sna->kgem.nbatch - shader_offset;
184303b705cfSriastradh	sna->kgem.batch[shader_offset] =
184403b705cfSriastradh		_3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
184503b705cfSriastradh}
184603b705cfSriastradh
184703b705cfSriastradhstatic uint32_t gen3_ms_tiling(uint32_t tiling)
184803b705cfSriastradh{
184903b705cfSriastradh	uint32_t v = 0;
185003b705cfSriastradh	switch (tiling) {
185103b705cfSriastradh	case I915_TILING_Y: v |= MS3_TILE_WALK;
185203b705cfSriastradh	case I915_TILING_X: v |= MS3_TILED_SURFACE;
185303b705cfSriastradh	case I915_TILING_NONE: break;
185403b705cfSriastradh	}
185503b705cfSriastradh	return v;
185603b705cfSriastradh}
185703b705cfSriastradh
185803b705cfSriastradhstatic void gen3_emit_invariant(struct sna *sna)
185903b705cfSriastradh{
186003b705cfSriastradh	/* Disable independent alpha blend */
186103b705cfSriastradh	OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
186203b705cfSriastradh		  IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
186303b705cfSriastradh		  IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
186403b705cfSriastradh		  IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
186503b705cfSriastradh
186603b705cfSriastradh	OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
186703b705cfSriastradh		  CSB_TCB(0, 0) |
186803b705cfSriastradh		  CSB_TCB(1, 1) |
186903b705cfSriastradh		  CSB_TCB(2, 2) |
187003b705cfSriastradh		  CSB_TCB(3, 3) |
187103b705cfSriastradh		  CSB_TCB(4, 4) |
187203b705cfSriastradh		  CSB_TCB(5, 5) |
187303b705cfSriastradh		  CSB_TCB(6, 6) |
187403b705cfSriastradh		  CSB_TCB(7, 7));
187503b705cfSriastradh
187603b705cfSriastradh	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
187703b705cfSriastradh	OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
187803b705cfSriastradh	OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
187903b705cfSriastradh		  S4_LINE_WIDTH_ONE |
188003b705cfSriastradh		  S4_CULLMODE_NONE |
188103b705cfSriastradh		  S4_VFMT_XY);
188203b705cfSriastradh	OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
188303b705cfSriastradh	OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
188403b705cfSriastradh
188503b705cfSriastradh	OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
188603b705cfSriastradh	OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
188703b705cfSriastradh
188803b705cfSriastradh	OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
188903b705cfSriastradh	OUT_BATCH(0x00000000);
189003b705cfSriastradh
189103b705cfSriastradh	OUT_BATCH(_3DSTATE_STIPPLE);
189203b705cfSriastradh	OUT_BATCH(0x00000000);
189303b705cfSriastradh
189403b705cfSriastradh	sna->render_state.gen3.need_invariant = false;
189503b705cfSriastradh}
189603b705cfSriastradh
189703b705cfSriastradh#define MAX_OBJECTS 3 /* worst case: dst + src + mask  */
189803b705cfSriastradh
189903b705cfSriastradhstatic void
190003b705cfSriastradhgen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
190103b705cfSriastradh{
190203b705cfSriastradh	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
190303b705cfSriastradh
190403b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, 200)) {
190503b705cfSriastradh		DBG(("%s: flushing batch: size %d > %d\n",
190603b705cfSriastradh		     __FUNCTION__, 200,
190703b705cfSriastradh		     sna->kgem.surface-sna->kgem.nbatch));
190803b705cfSriastradh		kgem_submit(&sna->kgem);
190903b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
191003b705cfSriastradh	}
191103b705cfSriastradh
191203b705cfSriastradh	if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
191303b705cfSriastradh		DBG(("%s: flushing batch: reloc %d >= %d\n",
191403b705cfSriastradh		     __FUNCTION__,
191503b705cfSriastradh		     sna->kgem.nreloc,
191603b705cfSriastradh		     (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
191703b705cfSriastradh		kgem_submit(&sna->kgem);
191803b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
191903b705cfSriastradh	}
192003b705cfSriastradh
192103b705cfSriastradh	if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
192203b705cfSriastradh		DBG(("%s: flushing batch: exec %d >= %d\n",
192303b705cfSriastradh		     __FUNCTION__,
192403b705cfSriastradh		     sna->kgem.nexec,
192503b705cfSriastradh		     (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
192603b705cfSriastradh		kgem_submit(&sna->kgem);
192703b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
192803b705cfSriastradh	}
192903b705cfSriastradh
193003b705cfSriastradh	if (sna->render_state.gen3.need_invariant)
193103b705cfSriastradh		gen3_emit_invariant(sna);
193203b705cfSriastradh#undef MAX_OBJECTS
193303b705cfSriastradh}
193403b705cfSriastradh
193503b705cfSriastradhstatic void gen3_emit_target(struct sna *sna,
193603b705cfSriastradh			     struct kgem_bo *bo,
193703b705cfSriastradh			     int width,
193803b705cfSriastradh			     int height,
193903b705cfSriastradh			     int format)
194003b705cfSriastradh{
194103b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
194203b705cfSriastradh
194303b705cfSriastradh	assert(!too_large(width, height));
194403b705cfSriastradh
194503b705cfSriastradh	/* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
194603b705cfSriastradh	assert(bo->unique_id != 0);
194703b705cfSriastradh	if (bo->unique_id != state->current_dst) {
194803b705cfSriastradh		uint32_t v;
194903b705cfSriastradh
195003b705cfSriastradh		DBG(("%s: setting new target id=%d, handle=%d\n",
195103b705cfSriastradh		     __FUNCTION__, bo->unique_id, bo->handle));
195203b705cfSriastradh
195303b705cfSriastradh		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
195403b705cfSriastradh		OUT_BATCH(BUF_3D_ID_COLOR_BACK |
195503b705cfSriastradh			  gen3_buf_tiling(bo->tiling) |
195603b705cfSriastradh			  bo->pitch);
195703b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
195803b705cfSriastradh					 bo,
195903b705cfSriastradh					 I915_GEM_DOMAIN_RENDER << 16 |
196003b705cfSriastradh					 I915_GEM_DOMAIN_RENDER,
196103b705cfSriastradh					 0));
196203b705cfSriastradh
196303b705cfSriastradh		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
196403b705cfSriastradh		OUT_BATCH(gen3_get_dst_format(format));
196503b705cfSriastradh
196603b705cfSriastradh		v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
196703b705cfSriastradh		if (v != state->last_drawrect_limit) {
196803b705cfSriastradh			OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
196903b705cfSriastradh			OUT_BATCH(0); /* XXX dither origin? */
197003b705cfSriastradh			OUT_BATCH(0);
197103b705cfSriastradh			OUT_BATCH(v);
197203b705cfSriastradh			OUT_BATCH(0);
197303b705cfSriastradh			state->last_drawrect_limit = v;
197403b705cfSriastradh		}
197503b705cfSriastradh
197603b705cfSriastradh		state->current_dst = bo->unique_id;
197703b705cfSriastradh	}
197803b705cfSriastradh	assert(bo->exec);
197903b705cfSriastradh	kgem_bo_mark_dirty(bo);
198003b705cfSriastradh}
198103b705cfSriastradh
198203b705cfSriastradhstatic void gen3_emit_composite_state(struct sna *sna,
198303b705cfSriastradh				      const struct sna_composite_op *op)
198403b705cfSriastradh{
198503b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
198603b705cfSriastradh	uint32_t map[4];
198703b705cfSriastradh	uint32_t sampler[4];
198803b705cfSriastradh	struct kgem_bo *bo[2];
198903b705cfSriastradh	unsigned int tex_count, n;
199003b705cfSriastradh	uint32_t ss2;
199103b705cfSriastradh
199203b705cfSriastradh	gen3_get_batch(sna, op);
199303b705cfSriastradh
199403b705cfSriastradh	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
199503b705cfSriastradh		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
199603b705cfSriastradh			OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
199703b705cfSriastradh		else
199803b705cfSriastradh			OUT_BATCH(_3DSTATE_MODES_5_CMD |
199903b705cfSriastradh				  PIPELINE_FLUSH_RENDER_CACHE |
200003b705cfSriastradh				  PIPELINE_FLUSH_TEXTURE_CACHE);
200103b705cfSriastradh		kgem_clear_dirty(&sna->kgem);
200203b705cfSriastradh	}
200303b705cfSriastradh
200403b705cfSriastradh	gen3_emit_target(sna,
200503b705cfSriastradh			 op->dst.bo,
200603b705cfSriastradh			 op->dst.width,
200703b705cfSriastradh			 op->dst.height,
200803b705cfSriastradh			 op->dst.format);
200903b705cfSriastradh
201003b705cfSriastradh	ss2 = ~0;
201103b705cfSriastradh	tex_count = 0;
201203b705cfSriastradh	switch (op->src.u.gen3.type) {
201303b705cfSriastradh	case SHADER_OPACITY:
201403b705cfSriastradh	case SHADER_NONE:
201503b705cfSriastradh		assert(0);
201603b705cfSriastradh	case SHADER_ZERO:
201703b705cfSriastradh	case SHADER_BLACK:
201803b705cfSriastradh	case SHADER_WHITE:
201903b705cfSriastradh		break;
202003b705cfSriastradh	case SHADER_CONSTANT:
202103b705cfSriastradh		if (op->src.u.gen3.mode != state->last_diffuse) {
202203b705cfSriastradh			OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
202303b705cfSriastradh			OUT_BATCH(op->src.u.gen3.mode);
202403b705cfSriastradh			state->last_diffuse = op->src.u.gen3.mode;
202503b705cfSriastradh		}
202603b705cfSriastradh		break;
202703b705cfSriastradh	case SHADER_LINEAR:
202803b705cfSriastradh	case SHADER_RADIAL:
202903b705cfSriastradh	case SHADER_TEXTURE:
203003b705cfSriastradh		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
203103b705cfSriastradh		ss2 |= S2_TEXCOORD_FMT(tex_count,
203203b705cfSriastradh				       op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
203303b705cfSriastradh		assert(op->src.card_format);
203403b705cfSriastradh		map[tex_count * 2 + 0] =
203503b705cfSriastradh			op->src.card_format |
203603b705cfSriastradh			gen3_ms_tiling(op->src.bo->tiling) |
203703b705cfSriastradh			(op->src.height - 1) << MS3_HEIGHT_SHIFT |
203803b705cfSriastradh			(op->src.width - 1) << MS3_WIDTH_SHIFT;
203903b705cfSriastradh		map[tex_count * 2 + 1] =
204003b705cfSriastradh			(op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
204103b705cfSriastradh
204203b705cfSriastradh		sampler[tex_count * 2 + 0] = op->src.filter;
204303b705cfSriastradh		sampler[tex_count * 2 + 1] =
204403b705cfSriastradh			op->src.repeat |
204503b705cfSriastradh			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
204603b705cfSriastradh		bo[tex_count] = op->src.bo;
204703b705cfSriastradh		tex_count++;
204803b705cfSriastradh		break;
204903b705cfSriastradh	}
205003b705cfSriastradh	switch (op->mask.u.gen3.type) {
205103b705cfSriastradh	case SHADER_NONE:
205203b705cfSriastradh	case SHADER_ZERO:
205303b705cfSriastradh	case SHADER_BLACK:
205403b705cfSriastradh	case SHADER_WHITE:
205503b705cfSriastradh		break;
205603b705cfSriastradh	case SHADER_CONSTANT:
205703b705cfSriastradh		if (op->mask.u.gen3.mode != state->last_specular) {
205803b705cfSriastradh			OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
205903b705cfSriastradh			OUT_BATCH(op->mask.u.gen3.mode);
206003b705cfSriastradh			state->last_specular = op->mask.u.gen3.mode;
206103b705cfSriastradh		}
206203b705cfSriastradh		break;
206303b705cfSriastradh	case SHADER_LINEAR:
206403b705cfSriastradh	case SHADER_RADIAL:
206503b705cfSriastradh	case SHADER_TEXTURE:
206603b705cfSriastradh		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
206703b705cfSriastradh		ss2 |= S2_TEXCOORD_FMT(tex_count,
206803b705cfSriastradh				       op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
206903b705cfSriastradh		assert(op->mask.card_format);
207003b705cfSriastradh		map[tex_count * 2 + 0] =
207103b705cfSriastradh			op->mask.card_format |
207203b705cfSriastradh			gen3_ms_tiling(op->mask.bo->tiling) |
207303b705cfSriastradh			(op->mask.height - 1) << MS3_HEIGHT_SHIFT |
207403b705cfSriastradh			(op->mask.width - 1) << MS3_WIDTH_SHIFT;
207503b705cfSriastradh		map[tex_count * 2 + 1] =
207603b705cfSriastradh			(op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
207703b705cfSriastradh
207803b705cfSriastradh		sampler[tex_count * 2 + 0] = op->mask.filter;
207903b705cfSriastradh		sampler[tex_count * 2 + 1] =
208003b705cfSriastradh			op->mask.repeat |
208103b705cfSriastradh			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
208203b705cfSriastradh		bo[tex_count] = op->mask.bo;
208303b705cfSriastradh		tex_count++;
208403b705cfSriastradh		break;
208503b705cfSriastradh	case SHADER_OPACITY:
208603b705cfSriastradh		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
208703b705cfSriastradh		ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
208803b705cfSriastradh		break;
208903b705cfSriastradh	}
209003b705cfSriastradh
209103b705cfSriastradh	{
209203b705cfSriastradh		uint32_t blend_offset = sna->kgem.nbatch;
209303b705cfSriastradh
209403b705cfSriastradh		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
209503b705cfSriastradh		OUT_BATCH(ss2);
209603b705cfSriastradh		OUT_BATCH(gen3_get_blend_cntl(op->op,
209703b705cfSriastradh					      op->has_component_alpha,
209803b705cfSriastradh					      op->dst.format));
209903b705cfSriastradh
210003b705cfSriastradh		if (memcmp(sna->kgem.batch + state->last_blend + 1,
210103b705cfSriastradh			   sna->kgem.batch + blend_offset + 1,
210203b705cfSriastradh			   2 * 4) == 0)
210303b705cfSriastradh			sna->kgem.nbatch = blend_offset;
210403b705cfSriastradh		else
210503b705cfSriastradh			state->last_blend = blend_offset;
210603b705cfSriastradh	}
210703b705cfSriastradh
210803b705cfSriastradh	if (op->u.gen3.num_constants) {
210903b705cfSriastradh		int count = op->u.gen3.num_constants;
211003b705cfSriastradh		if (state->last_constants) {
211103b705cfSriastradh			int last = sna->kgem.batch[state->last_constants+1];
211203b705cfSriastradh			if (last == (1 << (count >> 2)) - 1 &&
211303b705cfSriastradh			    memcmp(&sna->kgem.batch[state->last_constants+2],
211403b705cfSriastradh				   op->u.gen3.constants,
211503b705cfSriastradh				   count * sizeof(uint32_t)) == 0)
211603b705cfSriastradh				count = 0;
211703b705cfSriastradh		}
211803b705cfSriastradh		if (count) {
211903b705cfSriastradh			state->last_constants = sna->kgem.nbatch;
212003b705cfSriastradh			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
212103b705cfSriastradh			OUT_BATCH((1 << (count >> 2)) - 1);
212203b705cfSriastradh
212303b705cfSriastradh			memcpy(sna->kgem.batch + sna->kgem.nbatch,
212403b705cfSriastradh			       op->u.gen3.constants,
212503b705cfSriastradh			       count * sizeof(uint32_t));
212603b705cfSriastradh			sna->kgem.nbatch += count;
212703b705cfSriastradh		}
212803b705cfSriastradh	}
212903b705cfSriastradh
213003b705cfSriastradh	if (tex_count != 0) {
213103b705cfSriastradh		uint32_t rewind;
213203b705cfSriastradh
213303b705cfSriastradh		n = 0;
213403b705cfSriastradh		if (tex_count == state->tex_count) {
213503b705cfSriastradh			for (; n < tex_count; n++) {
213603b705cfSriastradh				if (map[2*n+0] != state->tex_map[2*n+0] ||
213703b705cfSriastradh				    map[2*n+1] != state->tex_map[2*n+1] ||
213803b705cfSriastradh				    state->tex_handle[n] != bo[n]->handle ||
213903b705cfSriastradh				    state->tex_delta[n] != bo[n]->delta)
214003b705cfSriastradh					break;
214103b705cfSriastradh			}
214203b705cfSriastradh		}
214303b705cfSriastradh		if (n < tex_count) {
214403b705cfSriastradh			OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
214503b705cfSriastradh			OUT_BATCH((1 << tex_count) - 1);
214603b705cfSriastradh			for (n = 0; n < tex_count; n++) {
214703b705cfSriastradh				OUT_BATCH(kgem_add_reloc(&sna->kgem,
214803b705cfSriastradh							 sna->kgem.nbatch,
214903b705cfSriastradh							 bo[n],
215003b705cfSriastradh							 I915_GEM_DOMAIN_SAMPLER<< 16,
215103b705cfSriastradh							 0));
215203b705cfSriastradh				OUT_BATCH(map[2*n + 0]);
215303b705cfSriastradh				OUT_BATCH(map[2*n + 1]);
215403b705cfSriastradh
215503b705cfSriastradh				state->tex_map[2*n+0] = map[2*n+0];
215603b705cfSriastradh				state->tex_map[2*n+1] = map[2*n+1];
215703b705cfSriastradh				state->tex_handle[n] = bo[n]->handle;
215803b705cfSriastradh				state->tex_delta[n] = bo[n]->delta;
215903b705cfSriastradh			}
216003b705cfSriastradh			state->tex_count = n;
216103b705cfSriastradh		}
216203b705cfSriastradh
216303b705cfSriastradh		rewind = sna->kgem.nbatch;
216403b705cfSriastradh		OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
216503b705cfSriastradh		OUT_BATCH((1 << tex_count) - 1);
216603b705cfSriastradh		for (n = 0; n < tex_count; n++) {
216703b705cfSriastradh			OUT_BATCH(sampler[2*n + 0]);
216803b705cfSriastradh			OUT_BATCH(sampler[2*n + 1]);
216903b705cfSriastradh			OUT_BATCH(0);
217003b705cfSriastradh		}
217103b705cfSriastradh		if (state->last_sampler &&
217203b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_sampler+1],
217303b705cfSriastradh			   &sna->kgem.batch[rewind + 1],
217403b705cfSriastradh			   (3*tex_count + 1)*sizeof(uint32_t)) == 0)
217503b705cfSriastradh			sna->kgem.nbatch = rewind;
217603b705cfSriastradh		else
217703b705cfSriastradh			state->last_sampler = rewind;
217803b705cfSriastradh	}
217903b705cfSriastradh
218003b705cfSriastradh	gen3_composite_emit_shader(sna, op, op->op);
218103b705cfSriastradh}
218203b705cfSriastradh
218303b705cfSriastradhstatic bool gen3_magic_ca_pass(struct sna *sna,
218403b705cfSriastradh			       const struct sna_composite_op *op)
218503b705cfSriastradh{
218603b705cfSriastradh	if (!op->need_magic_ca_pass)
218703b705cfSriastradh		return false;
218803b705cfSriastradh
218903b705cfSriastradh	DBG(("%s(%d)\n", __FUNCTION__,
219003b705cfSriastradh	     sna->render.vertex_index - sna->render.vertex_start));
219103b705cfSriastradh
219203b705cfSriastradh	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
219303b705cfSriastradh	OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
219403b705cfSriastradh	gen3_composite_emit_shader(sna, op, PictOpAdd);
219503b705cfSriastradh
219603b705cfSriastradh	OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
219703b705cfSriastradh		  (sna->render.vertex_index - sna->render.vertex_start));
219803b705cfSriastradh	OUT_BATCH(sna->render.vertex_start);
219903b705cfSriastradh
220003b705cfSriastradh	sna->render_state.gen3.last_blend = 0;
220103b705cfSriastradh	return true;
220203b705cfSriastradh}
220303b705cfSriastradh
220403b705cfSriastradhstatic void gen3_vertex_flush(struct sna *sna)
220503b705cfSriastradh{
220603b705cfSriastradh	assert(sna->render.vertex_offset);
220703b705cfSriastradh
220803b705cfSriastradh	DBG(("%s[%x] = %d\n", __FUNCTION__,
220903b705cfSriastradh	     4*sna->render.vertex_offset,
221003b705cfSriastradh	     sna->render.vertex_index - sna->render.vertex_start));
221103b705cfSriastradh
221203b705cfSriastradh	sna->kgem.batch[sna->render.vertex_offset] =
221303b705cfSriastradh		PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
221403b705cfSriastradh		(sna->render.vertex_index - sna->render.vertex_start);
221503b705cfSriastradh	sna->kgem.batch[sna->render.vertex_offset + 1] =
221603b705cfSriastradh		sna->render.vertex_start;
221703b705cfSriastradh
221803b705cfSriastradh	sna->render.vertex_offset = 0;
221903b705cfSriastradh}
222003b705cfSriastradh
222103b705cfSriastradhstatic int gen3_vertex_finish(struct sna *sna)
222203b705cfSriastradh{
222303b705cfSriastradh	struct kgem_bo *bo;
222403b705cfSriastradh
222503b705cfSriastradh	DBG(("%s: used=%d/%d, vbo active? %d\n",
222603b705cfSriastradh	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
222703b705cfSriastradh	     sna->render.vbo ? sna->render.vbo->handle : 0));
222803b705cfSriastradh	assert(sna->render.vertex_offset == 0);
222903b705cfSriastradh	assert(sna->render.vertex_used);
223003b705cfSriastradh	assert(sna->render.vertex_used <= sna->render.vertex_size);
223103b705cfSriastradh
223203b705cfSriastradh	sna_vertex_wait__locked(&sna->render);
223303b705cfSriastradh
223403b705cfSriastradh	bo = sna->render.vbo;
223503b705cfSriastradh	if (bo) {
223603b705cfSriastradh		DBG(("%s: reloc = %d\n", __FUNCTION__,
223703b705cfSriastradh		     sna->render.vertex_reloc[0]));
223803b705cfSriastradh
223903b705cfSriastradh		if (sna->render.vertex_reloc[0]) {
224003b705cfSriastradh			sna->kgem.batch[sna->render.vertex_reloc[0]] =
224103b705cfSriastradh				kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
224203b705cfSriastradh					       bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
224303b705cfSriastradh
224403b705cfSriastradh			sna->render.vertex_reloc[0] = 0;
224503b705cfSriastradh		}
224603b705cfSriastradh		sna->render.vertex_used = 0;
224703b705cfSriastradh		sna->render.vertex_index = 0;
224803b705cfSriastradh		sna->render.vbo = NULL;
224903b705cfSriastradh
225003b705cfSriastradh		kgem_bo_destroy(&sna->kgem, bo);
225103b705cfSriastradh	}
225203b705cfSriastradh
225303b705cfSriastradh	sna->render.vertices = NULL;
225403b705cfSriastradh	sna->render.vbo = kgem_create_linear(&sna->kgem,
225503b705cfSriastradh					     256*1024, CREATE_GTT_MAP);
225603b705cfSriastradh	if (sna->render.vbo)
225703b705cfSriastradh		sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
225803b705cfSriastradh	if (sna->render.vertices == NULL) {
225903b705cfSriastradh		if (sna->render.vbo)
226003b705cfSriastradh			kgem_bo_destroy(&sna->kgem, sna->render.vbo);
226103b705cfSriastradh		sna->render.vbo = NULL;
226203b705cfSriastradh		return 0;
226303b705cfSriastradh	}
226403b705cfSriastradh	assert(sna->render.vbo->snoop == false);
226503b705cfSriastradh
226603b705cfSriastradh	if (sna->render.vertex_used) {
226703b705cfSriastradh		memcpy(sna->render.vertices,
226803b705cfSriastradh		       sna->render.vertex_data,
226903b705cfSriastradh		       sizeof(float)*sna->render.vertex_used);
227003b705cfSriastradh	}
227103b705cfSriastradh	sna->render.vertex_size = 64 * 1024 - 1;
227203b705cfSriastradh	return sna->render.vertex_size - sna->render.vertex_used;
227303b705cfSriastradh}
227403b705cfSriastradh
227503b705cfSriastradhstatic void gen3_vertex_close(struct sna *sna)
227603b705cfSriastradh{
227703b705cfSriastradh	struct kgem_bo *bo, *free_bo = NULL;
227803b705cfSriastradh	unsigned int delta = 0;
227903b705cfSriastradh
228003b705cfSriastradh	assert(sna->render.vertex_offset == 0);
228103b705cfSriastradh	if (sna->render.vertex_reloc[0] == 0)
228203b705cfSriastradh		return;
228303b705cfSriastradh
228403b705cfSriastradh	DBG(("%s: used=%d/%d, vbo active? %d\n",
228503b705cfSriastradh	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
228603b705cfSriastradh	     sna->render.vbo ? sna->render.vbo->handle : 0));
228703b705cfSriastradh
228803b705cfSriastradh	bo = sna->render.vbo;
228903b705cfSriastradh	if (bo) {
229003b705cfSriastradh		if (sna->render.vertex_size - sna->render.vertex_used < 64) {
229103b705cfSriastradh			DBG(("%s: discarding full vbo\n", __FUNCTION__));
229203b705cfSriastradh			sna->render.vbo = NULL;
229303b705cfSriastradh			sna->render.vertices = sna->render.vertex_data;
229403b705cfSriastradh			sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
229503b705cfSriastradh			free_bo = bo;
229603b705cfSriastradh		} else if (IS_CPU_MAP(bo->map)) {
229703b705cfSriastradh			DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
229803b705cfSriastradh			sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
229903b705cfSriastradh			if (sna->render.vertices == NULL) {
230003b705cfSriastradh				DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
230103b705cfSriastradh				sna->render.vbo = NULL;
230203b705cfSriastradh				sna->render.vertices = sna->render.vertex_data;
230303b705cfSriastradh				sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
230403b705cfSriastradh				free_bo = bo;
230503b705cfSriastradh			}
230603b705cfSriastradh		}
230703b705cfSriastradh	} else {
230803b705cfSriastradh		if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
230903b705cfSriastradh			DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
231003b705cfSriastradh			     sna->render.vertex_used, sna->kgem.nbatch));
231103b705cfSriastradh			memcpy(sna->kgem.batch + sna->kgem.nbatch,
231203b705cfSriastradh			       sna->render.vertex_data,
231303b705cfSriastradh			       sna->render.vertex_used * 4);
231403b705cfSriastradh			delta = sna->kgem.nbatch * 4;
231503b705cfSriastradh			bo = NULL;
231603b705cfSriastradh			sna->kgem.nbatch += sna->render.vertex_used;
231703b705cfSriastradh		} else {
231803b705cfSriastradh			DBG(("%s: new vbo: %d\n", __FUNCTION__,
231903b705cfSriastradh			     sna->render.vertex_used));
232003b705cfSriastradh			bo = kgem_create_linear(&sna->kgem,
232103b705cfSriastradh						4*sna->render.vertex_used,
232203b705cfSriastradh						CREATE_NO_THROTTLE);
232303b705cfSriastradh			if (bo) {
232403b705cfSriastradh				assert(bo->snoop == false);
232503b705cfSriastradh				kgem_bo_write(&sna->kgem, bo,
232603b705cfSriastradh					      sna->render.vertex_data,
232703b705cfSriastradh					      4*sna->render.vertex_used);
232803b705cfSriastradh			}
232903b705cfSriastradh			free_bo = bo;
233003b705cfSriastradh		}
233103b705cfSriastradh	}
233203b705cfSriastradh
233303b705cfSriastradh	DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
233403b705cfSriastradh	sna->kgem.batch[sna->render.vertex_reloc[0]] =
233503b705cfSriastradh		kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
233603b705cfSriastradh			       bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
233703b705cfSriastradh	sna->render.vertex_reloc[0] = 0;
233803b705cfSriastradh
233903b705cfSriastradh	if (sna->render.vbo == NULL) {
234003b705cfSriastradh		DBG(("%s: resetting vbo\n", __FUNCTION__));
234103b705cfSriastradh		sna->render.vertex_used = 0;
234203b705cfSriastradh		sna->render.vertex_index = 0;
234303b705cfSriastradh		assert(sna->render.vertices == sna->render.vertex_data);
234403b705cfSriastradh		assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
234503b705cfSriastradh	}
234603b705cfSriastradh
234703b705cfSriastradh	if (free_bo)
234803b705cfSriastradh		kgem_bo_destroy(&sna->kgem, free_bo);
234903b705cfSriastradh}
235003b705cfSriastradh
235103b705cfSriastradhstatic bool gen3_rectangle_begin(struct sna *sna,
235203b705cfSriastradh				 const struct sna_composite_op *op)
235303b705cfSriastradh{
235403b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
235503b705cfSriastradh	int ndwords, i1_cmd = 0, i1_len = 0;
235603b705cfSriastradh
235703b705cfSriastradh	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
235803b705cfSriastradh		return true;
235903b705cfSriastradh
236003b705cfSriastradh	ndwords = 2;
236103b705cfSriastradh	if (op->need_magic_ca_pass)
236203b705cfSriastradh		ndwords += 100;
236303b705cfSriastradh	if (sna->render.vertex_reloc[0] == 0)
236403b705cfSriastradh		i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
236503b705cfSriastradh	if (state->floats_per_vertex != op->floats_per_vertex)
236603b705cfSriastradh		i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
236703b705cfSriastradh
236803b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, ndwords+1))
236903b705cfSriastradh		return false;
237003b705cfSriastradh
237103b705cfSriastradh	if (i1_cmd) {
237203b705cfSriastradh		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
237303b705cfSriastradh		if (sna->render.vertex_reloc[0] == 0)
237403b705cfSriastradh			sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
237503b705cfSriastradh		if (state->floats_per_vertex != op->floats_per_vertex) {
237603b705cfSriastradh			state->floats_per_vertex = op->floats_per_vertex;
237703b705cfSriastradh			OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
237803b705cfSriastradh				  state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
237903b705cfSriastradh		}
238003b705cfSriastradh	}
238103b705cfSriastradh
238203b705cfSriastradh	if (sna->kgem.nbatch == 2 + state->last_vertex_offset &&
238303b705cfSriastradh	    !op->need_magic_ca_pass) {
238403b705cfSriastradh		sna->render.vertex_offset = state->last_vertex_offset;
238503b705cfSriastradh	} else {
238603b705cfSriastradh		sna->render.vertex_offset = sna->kgem.nbatch;
238703b705cfSriastradh		OUT_BATCH(MI_NOOP); /* to be filled later */
238803b705cfSriastradh		OUT_BATCH(MI_NOOP);
238903b705cfSriastradh		sna->render.vertex_start = sna->render.vertex_index;
239003b705cfSriastradh		state->last_vertex_offset = sna->render.vertex_offset;
239103b705cfSriastradh	}
239203b705cfSriastradh
239303b705cfSriastradh	return true;
239403b705cfSriastradh}
239503b705cfSriastradh
239603b705cfSriastradhstatic int gen3_get_rectangles__flush(struct sna *sna,
239703b705cfSriastradh				      const struct sna_composite_op *op)
239803b705cfSriastradh{
239903b705cfSriastradh	/* Preventing discarding new vbo after lock contention */
240003b705cfSriastradh	if (sna_vertex_wait__locked(&sna->render)) {
240103b705cfSriastradh		int rem = vertex_space(sna);
240203b705cfSriastradh		if (rem > op->floats_per_rect)
240303b705cfSriastradh			return rem;
240403b705cfSriastradh	}
240503b705cfSriastradh
240603b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
240703b705cfSriastradh		return 0;
240803b705cfSriastradh	if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
240903b705cfSriastradh		return 0;
241003b705cfSriastradh
241103b705cfSriastradh	if (sna->render.vertex_offset) {
241203b705cfSriastradh		gen3_vertex_flush(sna);
241303b705cfSriastradh		if (gen3_magic_ca_pass(sna, op)) {
241403b705cfSriastradh			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
241503b705cfSriastradh			OUT_BATCH(gen3_get_blend_cntl(op->op,
241603b705cfSriastradh						      op->has_component_alpha,
241703b705cfSriastradh						      op->dst.format));
241803b705cfSriastradh			gen3_composite_emit_shader(sna, op, op->op);
241903b705cfSriastradh		}
242003b705cfSriastradh	}
242103b705cfSriastradh
242203b705cfSriastradh	return gen3_vertex_finish(sna);
242303b705cfSriastradh}
242403b705cfSriastradh
242503b705cfSriastradhinline static int gen3_get_rectangles(struct sna *sna,
242603b705cfSriastradh				      const struct sna_composite_op *op,
242703b705cfSriastradh				      int want)
242803b705cfSriastradh{
242903b705cfSriastradh	int rem;
243003b705cfSriastradh
243103b705cfSriastradh	DBG(("%s: want=%d, rem=%d\n",
243203b705cfSriastradh	     __FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
243303b705cfSriastradh
243403b705cfSriastradh	assert(want);
243503b705cfSriastradh	assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
243603b705cfSriastradh
243703b705cfSriastradhstart:
243803b705cfSriastradh	rem = vertex_space(sna);
243903b705cfSriastradh	if (unlikely(op->floats_per_rect > rem)) {
244003b705cfSriastradh		DBG(("flushing vbo for %s: %d < %d\n",
244103b705cfSriastradh		     __FUNCTION__, rem, op->floats_per_rect));
244203b705cfSriastradh		rem = gen3_get_rectangles__flush(sna, op);
244303b705cfSriastradh		if (unlikely(rem == 0))
244403b705cfSriastradh			goto flush;
244503b705cfSriastradh	}
244603b705cfSriastradh
244703b705cfSriastradh	if (unlikely(sna->render.vertex_offset == 0)) {
244803b705cfSriastradh		if (!gen3_rectangle_begin(sna, op))
244903b705cfSriastradh			goto flush;
245003b705cfSriastradh		else
245103b705cfSriastradh			goto start;
245203b705cfSriastradh	}
245303b705cfSriastradh
245403b705cfSriastradh	assert(rem <= vertex_space(sna));
245503b705cfSriastradh	assert(op->floats_per_rect <= rem);
245603b705cfSriastradh	if (want > 1 && want * op->floats_per_rect > rem)
245703b705cfSriastradh		want = rem / op->floats_per_rect;
245803b705cfSriastradh	sna->render.vertex_index += 3*want;
245903b705cfSriastradh
246003b705cfSriastradh	assert(want);
246103b705cfSriastradh	assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
246203b705cfSriastradh	return want;
246303b705cfSriastradh
246403b705cfSriastradhflush:
246503b705cfSriastradh	DBG(("%s: flushing batch\n", __FUNCTION__));
246603b705cfSriastradh	if (sna->render.vertex_offset) {
246703b705cfSriastradh		gen3_vertex_flush(sna);
246803b705cfSriastradh		gen3_magic_ca_pass(sna, op);
246903b705cfSriastradh	}
247003b705cfSriastradh	sna_vertex_wait__locked(&sna->render);
247103b705cfSriastradh	_kgem_submit(&sna->kgem);
247203b705cfSriastradh	gen3_emit_composite_state(sna, op);
247303b705cfSriastradh	assert(sna->render.vertex_offset == 0);
247403b705cfSriastradh	assert(sna->render.vertex_reloc[0] == 0);
247503b705cfSriastradh	goto start;
247603b705cfSriastradh}
247703b705cfSriastradh
247803b705cfSriastradhfastcall static void
247903b705cfSriastradhgen3_render_composite_blt(struct sna *sna,
248003b705cfSriastradh			  const struct sna_composite_op *op,
248103b705cfSriastradh			  const struct sna_composite_rectangles *r)
248203b705cfSriastradh{
248303b705cfSriastradh	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
248403b705cfSriastradh	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
248503b705cfSriastradh	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
248603b705cfSriastradh	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
248703b705cfSriastradh	     r->width, r->height));
248803b705cfSriastradh
248903b705cfSriastradh	gen3_get_rectangles(sna, op, 1);
249003b705cfSriastradh
249103b705cfSriastradh	op->prim_emit(sna, op, r);
249203b705cfSriastradh}
249303b705cfSriastradh
249403b705cfSriastradhfastcall static void
249503b705cfSriastradhgen3_render_composite_box(struct sna *sna,
249603b705cfSriastradh			  const struct sna_composite_op *op,
249703b705cfSriastradh			  const BoxRec *box)
249803b705cfSriastradh{
249903b705cfSriastradh	struct sna_composite_rectangles r;
250003b705cfSriastradh
250103b705cfSriastradh	DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
250203b705cfSriastradh	     __FUNCTION__,
250303b705cfSriastradh	     op->src.offset[0], op->src.offset[1],
250403b705cfSriastradh	     op->mask.offset[0], op->mask.offset[1],
250503b705cfSriastradh	     op->dst.x, op->dst.y));
250603b705cfSriastradh
250703b705cfSriastradh	gen3_get_rectangles(sna, op, 1);
250803b705cfSriastradh
250903b705cfSriastradh	r.dst.x  = box->x1;
251003b705cfSriastradh	r.dst.y  = box->y1;
251103b705cfSriastradh	r.width  = box->x2 - box->x1;
251203b705cfSriastradh	r.height = box->y2 - box->y1;
251303b705cfSriastradh	r.src = r.mask = r.dst;
251403b705cfSriastradh
251503b705cfSriastradh	op->prim_emit(sna, op, &r);
251603b705cfSriastradh}
251703b705cfSriastradh
251803b705cfSriastradhstatic void
251903b705cfSriastradhgen3_render_composite_boxes__blt(struct sna *sna,
252003b705cfSriastradh				 const struct sna_composite_op *op,
252103b705cfSriastradh				 const BoxRec *box, int nbox)
252203b705cfSriastradh{
252303b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
252403b705cfSriastradh	     __FUNCTION__, nbox,
252503b705cfSriastradh	     op->src.offset[0], op->src.offset[1],
252603b705cfSriastradh	     op->mask.offset[0], op->mask.offset[1],
252703b705cfSriastradh	     op->dst.x, op->dst.y));
252803b705cfSriastradh
252903b705cfSriastradh	do {
253003b705cfSriastradh		int nbox_this_time;
253103b705cfSriastradh
253203b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
253303b705cfSriastradh		nbox -= nbox_this_time;
253403b705cfSriastradh
253503b705cfSriastradh		do {
253603b705cfSriastradh			struct sna_composite_rectangles r;
253703b705cfSriastradh
253803b705cfSriastradh			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
253903b705cfSriastradh			     box->x1, box->y1,
254003b705cfSriastradh			     box->x2 - box->x1,
254103b705cfSriastradh			     box->y2 - box->y1));
254203b705cfSriastradh
254303b705cfSriastradh			r.dst.x  = box->x1; r.dst.y  = box->y1;
254403b705cfSriastradh			r.width = box->x2 - box->x1;
254503b705cfSriastradh			r.height = box->y2 - box->y1;
254603b705cfSriastradh			r.src = r.mask = r.dst;
254703b705cfSriastradh
254803b705cfSriastradh			op->prim_emit(sna, op, &r);
254903b705cfSriastradh			box++;
255003b705cfSriastradh		} while (--nbox_this_time);
255103b705cfSriastradh	} while (nbox);
255203b705cfSriastradh}
255303b705cfSriastradh
255403b705cfSriastradhstatic void
255503b705cfSriastradhgen3_render_composite_boxes(struct sna *sna,
255603b705cfSriastradh			    const struct sna_composite_op *op,
255703b705cfSriastradh			    const BoxRec *box, int nbox)
255803b705cfSriastradh{
255903b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
256003b705cfSriastradh
256103b705cfSriastradh	do {
256203b705cfSriastradh		int nbox_this_time;
256303b705cfSriastradh		float *v;
256403b705cfSriastradh
256503b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
256603b705cfSriastradh		assert(nbox_this_time);
256703b705cfSriastradh		nbox -= nbox_this_time;
256803b705cfSriastradh
256903b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
257003b705cfSriastradh		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
257103b705cfSriastradh
257203b705cfSriastradh		op->emit_boxes(op, box, nbox_this_time, v);
257303b705cfSriastradh		box += nbox_this_time;
257403b705cfSriastradh	} while (nbox);
257503b705cfSriastradh}
257603b705cfSriastradh
257703b705cfSriastradhstatic void
257803b705cfSriastradhgen3_render_composite_boxes__thread(struct sna *sna,
257903b705cfSriastradh				    const struct sna_composite_op *op,
258003b705cfSriastradh				    const BoxRec *box, int nbox)
258103b705cfSriastradh{
258203b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
258303b705cfSriastradh
258403b705cfSriastradh	sna_vertex_lock(&sna->render);
258503b705cfSriastradh	do {
258603b705cfSriastradh		int nbox_this_time;
258703b705cfSriastradh		float *v;
258803b705cfSriastradh
258903b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
259003b705cfSriastradh		assert(nbox_this_time);
259103b705cfSriastradh		nbox -= nbox_this_time;
259203b705cfSriastradh
259303b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
259403b705cfSriastradh		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
259503b705cfSriastradh
259603b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
259703b705cfSriastradh		sna_vertex_unlock(&sna->render);
259803b705cfSriastradh
259903b705cfSriastradh		op->emit_boxes(op, box, nbox_this_time, v);
260003b705cfSriastradh		box += nbox_this_time;
260103b705cfSriastradh
260203b705cfSriastradh		sna_vertex_lock(&sna->render);
260303b705cfSriastradh		sna_vertex_release__locked(&sna->render);
260403b705cfSriastradh	} while (nbox);
260503b705cfSriastradh	sna_vertex_unlock(&sna->render);
260603b705cfSriastradh}
260703b705cfSriastradh
260803b705cfSriastradhstatic void
260903b705cfSriastradhgen3_render_composite_done(struct sna *sna,
261003b705cfSriastradh			   const struct sna_composite_op *op)
261103b705cfSriastradh{
261203b705cfSriastradh	DBG(("%s()\n", __FUNCTION__));
261303b705cfSriastradh
261403b705cfSriastradh	if (sna->render.vertex_offset) {
261503b705cfSriastradh		gen3_vertex_flush(sna);
261603b705cfSriastradh		gen3_magic_ca_pass(sna, op);
261703b705cfSriastradh	}
261803b705cfSriastradh
261903b705cfSriastradh	if (op->mask.bo)
262003b705cfSriastradh		kgem_bo_destroy(&sna->kgem, op->mask.bo);
262103b705cfSriastradh	if (op->src.bo)
262203b705cfSriastradh		kgem_bo_destroy(&sna->kgem, op->src.bo);
262303b705cfSriastradh
262403b705cfSriastradh	sna_render_composite_redirect_done(sna, op);
262503b705cfSriastradh}
262603b705cfSriastradh
262703b705cfSriastradhstatic void
262803b705cfSriastradhdiscard_vbo(struct sna *sna)
262903b705cfSriastradh{
263003b705cfSriastradh	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
263103b705cfSriastradh	sna->render.vbo = NULL;
263203b705cfSriastradh	sna->render.vertices = sna->render.vertex_data;
263303b705cfSriastradh	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
263403b705cfSriastradh	sna->render.vertex_used = 0;
263503b705cfSriastradh	sna->render.vertex_index = 0;
263603b705cfSriastradh}
263703b705cfSriastradh
263803b705cfSriastradhstatic void
263903b705cfSriastradhgen3_render_reset(struct sna *sna)
264003b705cfSriastradh{
264103b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
264203b705cfSriastradh
264303b705cfSriastradh	state->need_invariant = true;
264403b705cfSriastradh	state->current_dst = 0;
264503b705cfSriastradh	state->tex_count = 0;
264603b705cfSriastradh	state->last_drawrect_limit = ~0U;
264703b705cfSriastradh	state->last_target = 0;
264803b705cfSriastradh	state->last_blend = 0;
264903b705cfSriastradh	state->last_constants = 0;
265003b705cfSriastradh	state->last_sampler = 0;
265103b705cfSriastradh	state->last_shader = 0x7fffffff;
265203b705cfSriastradh	state->last_diffuse = 0xcc00ffee;
265303b705cfSriastradh	state->last_specular = 0xcc00ffee;
265403b705cfSriastradh
265503b705cfSriastradh	state->floats_per_vertex = 0;
265603b705cfSriastradh	state->last_floats_per_vertex = 0;
265703b705cfSriastradh	state->last_vertex_offset = 0;
265803b705cfSriastradh
265903b705cfSriastradh	if (sna->render.vbo != NULL &&
266003b705cfSriastradh	    !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
266103b705cfSriastradh		DBG(("%s: discarding vbo as next access will stall: %d\n",
266203b705cfSriastradh		     __FUNCTION__, sna->render.vbo->presumed_offset));
266303b705cfSriastradh		discard_vbo(sna);
266403b705cfSriastradh	}
266503b705cfSriastradh
266603b705cfSriastradh	sna->render.vertex_reloc[0] = 0;
266703b705cfSriastradh	sna->render.vertex_offset = 0;
266803b705cfSriastradh}
266903b705cfSriastradh
267003b705cfSriastradhstatic void
267103b705cfSriastradhgen3_render_retire(struct kgem *kgem)
267203b705cfSriastradh{
267303b705cfSriastradh	struct sna *sna;
267403b705cfSriastradh
267503b705cfSriastradh	sna = container_of(kgem, struct sna, kgem);
267603b705cfSriastradh	if (sna->render.vertex_reloc[0] == 0 &&
267703b705cfSriastradh	    sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
267803b705cfSriastradh		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
267903b705cfSriastradh		sna->render.vertex_used = 0;
268003b705cfSriastradh		sna->render.vertex_index = 0;
268103b705cfSriastradh	}
268203b705cfSriastradh}
268303b705cfSriastradh
268403b705cfSriastradhstatic void
268503b705cfSriastradhgen3_render_expire(struct kgem *kgem)
268603b705cfSriastradh{
268703b705cfSriastradh	struct sna *sna;
268803b705cfSriastradh
268903b705cfSriastradh	sna = container_of(kgem, struct sna, kgem);
269003b705cfSriastradh	if (sna->render.vbo && !sna->render.vertex_used) {
269103b705cfSriastradh		DBG(("%s: discarding vbo\n", __FUNCTION__));
269203b705cfSriastradh		discard_vbo(sna);
269303b705cfSriastradh	}
269403b705cfSriastradh}
269503b705cfSriastradh
269603b705cfSriastradhstatic bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
269703b705cfSriastradh					      CARD32 format)
269803b705cfSriastradh{
269903b705cfSriastradh	unsigned int i;
270003b705cfSriastradh
270103b705cfSriastradh	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
270203b705cfSriastradh		if (gen3_tex_formats[i].fmt == format) {
270303b705cfSriastradh			channel->card_format = gen3_tex_formats[i].card_fmt;
270403b705cfSriastradh			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
270503b705cfSriastradh			return true;
270603b705cfSriastradh		}
270703b705cfSriastradh	}
270803b705cfSriastradh	return false;
270903b705cfSriastradh}
271003b705cfSriastradh
271103b705cfSriastradhstatic bool source_is_covered(PicturePtr picture,
271203b705cfSriastradh			      int x, int y,
271303b705cfSriastradh			      int width, int height)
271403b705cfSriastradh{
271503b705cfSriastradh	int x1, y1, x2, y2;
271603b705cfSriastradh
271703b705cfSriastradh	if (picture->repeat && picture->repeatType != RepeatNone)
271803b705cfSriastradh		return true;
271903b705cfSriastradh
272003b705cfSriastradh	if (picture->pDrawable == NULL)
272103b705cfSriastradh		return false;
272203b705cfSriastradh
272303b705cfSriastradh	if (picture->transform) {
272403b705cfSriastradh		pixman_box16_t sample;
272503b705cfSriastradh
272603b705cfSriastradh		sample.x1 = x;
272703b705cfSriastradh		sample.y1 = y;
272803b705cfSriastradh		sample.x2 = x + width;
272903b705cfSriastradh		sample.y2 = y + height;
273003b705cfSriastradh
273103b705cfSriastradh		pixman_transform_bounds(picture->transform, &sample);
273203b705cfSriastradh
273303b705cfSriastradh		x1 = sample.x1;
273403b705cfSriastradh		x2 = sample.x2;
273503b705cfSriastradh		y1 = sample.y1;
273603b705cfSriastradh		y2 = sample.y2;
273703b705cfSriastradh	} else {
273803b705cfSriastradh		x1 = x;
273903b705cfSriastradh		y1 = y;
274003b705cfSriastradh		x2 = x + width;
274103b705cfSriastradh		y2 = y + height;
274203b705cfSriastradh	}
274303b705cfSriastradh
274403b705cfSriastradh	return
274503b705cfSriastradh		x1 >= 0 && y1 >= 0 &&
274603b705cfSriastradh		x2 <= picture->pDrawable->width &&
274703b705cfSriastradh		y2 <= picture->pDrawable->height;
274803b705cfSriastradh}
274903b705cfSriastradh
275003b705cfSriastradhstatic bool gen3_composite_channel_set_xformat(PicturePtr picture,
275103b705cfSriastradh					       struct sna_composite_channel *channel,
275203b705cfSriastradh					       int x, int y,
275303b705cfSriastradh					       int width, int height)
275403b705cfSriastradh{
275503b705cfSriastradh	unsigned int i;
275603b705cfSriastradh
275703b705cfSriastradh	if (PICT_FORMAT_A(picture->format) != 0)
275803b705cfSriastradh		return false;
275903b705cfSriastradh
276003b705cfSriastradh	if (width == 0 || height == 0)
276103b705cfSriastradh		return false;
276203b705cfSriastradh
276303b705cfSriastradh	if (!source_is_covered(picture, x, y, width, height))
276403b705cfSriastradh		return false;
276503b705cfSriastradh
276603b705cfSriastradh	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
276703b705cfSriastradh		if (gen3_tex_formats[i].xfmt == picture->format) {
276803b705cfSriastradh			channel->card_format = gen3_tex_formats[i].card_fmt;
276903b705cfSriastradh			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
277003b705cfSriastradh			channel->alpha_fixup = true;
277103b705cfSriastradh			return true;
277203b705cfSriastradh		}
277303b705cfSriastradh	}
277403b705cfSriastradh
277503b705cfSriastradh	return false;
277603b705cfSriastradh}
277703b705cfSriastradh
277803b705cfSriastradhstatic int
277903b705cfSriastradhgen3_init_solid(struct sna_composite_channel *channel, uint32_t color)
278003b705cfSriastradh{
278103b705cfSriastradh	channel->u.gen3.mode = color;
278203b705cfSriastradh	channel->u.gen3.type = SHADER_CONSTANT;
278303b705cfSriastradh	if (color == 0)
278403b705cfSriastradh		channel->u.gen3.type = SHADER_ZERO;
278503b705cfSriastradh	else if (color == 0xff000000)
278603b705cfSriastradh		channel->u.gen3.type = SHADER_BLACK;
278703b705cfSriastradh	else if (color == 0xffffffff)
278803b705cfSriastradh		channel->u.gen3.type = SHADER_WHITE;
278903b705cfSriastradh
279003b705cfSriastradh	channel->bo = NULL;
279103b705cfSriastradh	channel->is_opaque = (color >> 24) == 0xff;
279203b705cfSriastradh	channel->is_affine = 1;
279303b705cfSriastradh	channel->alpha_fixup = 0;
279403b705cfSriastradh	channel->rb_reversed = 0;
279503b705cfSriastradh
279603b705cfSriastradh	DBG(("%s: color=%08x, is_opaque=%d, type=%d\n",
279703b705cfSriastradh	     __FUNCTION__, color, channel->is_opaque, channel->u.gen3.type));
279803b705cfSriastradh
279903b705cfSriastradh	/* for consistency */
280003b705cfSriastradh	channel->repeat = RepeatNormal;
280103b705cfSriastradh	channel->filter = PictFilterNearest;
280203b705cfSriastradh	channel->pict_format = PICT_a8r8g8b8;
280303b705cfSriastradh	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
280403b705cfSriastradh
280503b705cfSriastradh	return 1;
280603b705cfSriastradh}
280703b705cfSriastradh
280803b705cfSriastradhstatic void gen3_composite_channel_convert(struct sna_composite_channel *channel)
280903b705cfSriastradh{
281003b705cfSriastradh	if (channel->u.gen3.type == SHADER_TEXTURE)
281103b705cfSriastradh		channel->repeat = gen3_texture_repeat(channel->repeat);
281203b705cfSriastradh	else
281303b705cfSriastradh		channel->repeat = gen3_gradient_repeat(channel->repeat);
281403b705cfSriastradh
281503b705cfSriastradh	channel->filter = gen3_filter(channel->filter);
281603b705cfSriastradh	if (channel->card_format == 0)
281703b705cfSriastradh		gen3_composite_channel_set_format(channel, channel->pict_format);
281803b705cfSriastradh	assert(channel->card_format);
281903b705cfSriastradh}
282003b705cfSriastradh
282103b705cfSriastradhstatic bool gen3_gradient_setup(struct sna *sna,
282203b705cfSriastradh				PicturePtr picture,
282303b705cfSriastradh				struct sna_composite_channel *channel,
282403b705cfSriastradh				int16_t ox, int16_t oy)
282503b705cfSriastradh{
282603b705cfSriastradh	int16_t dx, dy;
282703b705cfSriastradh
282803b705cfSriastradh	if (picture->repeat == 0) {
282903b705cfSriastradh		channel->repeat = RepeatNone;
283003b705cfSriastradh	} else switch (picture->repeatType) {
283103b705cfSriastradh	case RepeatNone:
283203b705cfSriastradh	case RepeatNormal:
283303b705cfSriastradh	case RepeatPad:
283403b705cfSriastradh	case RepeatReflect:
283503b705cfSriastradh		channel->repeat = picture->repeatType;
283603b705cfSriastradh		break;
283703b705cfSriastradh	default:
283803b705cfSriastradh		return false;
283903b705cfSriastradh	}
284003b705cfSriastradh
284103b705cfSriastradh	channel->bo =
284203b705cfSriastradh		sna_render_get_gradient(sna,
284303b705cfSriastradh					(PictGradient *)picture->pSourcePict);
284403b705cfSriastradh	if (channel->bo == NULL)
284503b705cfSriastradh		return false;
284603b705cfSriastradh
284703b705cfSriastradh	channel->pict_format = PICT_a8r8g8b8;
284803b705cfSriastradh	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
284903b705cfSriastradh	channel->filter = PictFilterNearest;
285003b705cfSriastradh	channel->is_affine = sna_transform_is_affine(picture->transform);
285103b705cfSriastradh	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
285203b705cfSriastradh		DBG(("%s: integer translation (%d, %d), removing\n",
285303b705cfSriastradh		     __FUNCTION__, dx, dy));
285403b705cfSriastradh		ox += dx;
285503b705cfSriastradh		oy += dy;
285603b705cfSriastradh		channel->transform = NULL;
285703b705cfSriastradh	} else
285803b705cfSriastradh		channel->transform = picture->transform;
285903b705cfSriastradh	channel->width  = channel->bo->pitch / 4;
286003b705cfSriastradh	channel->height = 1;
286103b705cfSriastradh	channel->offset[0] = ox;
286203b705cfSriastradh	channel->offset[1] = oy;
286303b705cfSriastradh	channel->scale[0] = channel->scale[1] = 1;
286403b705cfSriastradh	return true;
286503b705cfSriastradh}
286603b705cfSriastradh
286703b705cfSriastradhstatic int
286803b705cfSriastradhgen3_init_linear(struct sna *sna,
286903b705cfSriastradh		 PicturePtr picture,
287003b705cfSriastradh		 struct sna_composite_op *op,
287103b705cfSriastradh		 struct sna_composite_channel *channel,
287203b705cfSriastradh		 int ox, int oy)
287303b705cfSriastradh{
287403b705cfSriastradh	PictLinearGradient *linear =
287503b705cfSriastradh		(PictLinearGradient *)picture->pSourcePict;
287603b705cfSriastradh	float x0, y0, sf;
287703b705cfSriastradh	float dx, dy, offset;
287803b705cfSriastradh	int n;
287903b705cfSriastradh
288003b705cfSriastradh	DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
288103b705cfSriastradh	     __FUNCTION__,
288203b705cfSriastradh	     xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
288303b705cfSriastradh	     xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
288403b705cfSriastradh
288503b705cfSriastradh	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
288603b705cfSriastradh		return 0;
288703b705cfSriastradh
288803b705cfSriastradh	dx = xFixedToDouble(linear->p2.x - linear->p1.x);
288903b705cfSriastradh	dy = xFixedToDouble(linear->p2.y - linear->p1.y);
289003b705cfSriastradh	sf = dx*dx + dy*dy;
289103b705cfSriastradh	dx /= sf;
289203b705cfSriastradh	dy /= sf;
289303b705cfSriastradh
289403b705cfSriastradh	x0 = xFixedToDouble(linear->p1.x);
289503b705cfSriastradh	y0 = xFixedToDouble(linear->p1.y);
289603b705cfSriastradh	offset = dx*x0 + dy*y0;
289703b705cfSriastradh
289803b705cfSriastradh	n = op->u.gen3.num_constants;
289903b705cfSriastradh	channel->u.gen3.constants = FS_C0 + n / 4;
290003b705cfSriastradh	op->u.gen3.constants[n++] = dx;
290103b705cfSriastradh	op->u.gen3.constants[n++] = dy;
290203b705cfSriastradh	op->u.gen3.constants[n++] = -offset;
290303b705cfSriastradh	op->u.gen3.constants[n++] = 0;
290403b705cfSriastradh
290503b705cfSriastradh	if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
290603b705cfSriastradh		return -1;
290703b705cfSriastradh
290803b705cfSriastradh	channel->u.gen3.type = SHADER_LINEAR;
290903b705cfSriastradh	op->u.gen3.num_constants = n;
291003b705cfSriastradh
291103b705cfSriastradh	DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n",
291203b705cfSriastradh	     __FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0));
291303b705cfSriastradh	return 1;
291403b705cfSriastradh}
291503b705cfSriastradh
291603b705cfSriastradhstatic int
291703b705cfSriastradhgen3_init_radial(struct sna *sna,
291803b705cfSriastradh		 PicturePtr picture,
291903b705cfSriastradh		 struct sna_composite_op *op,
292003b705cfSriastradh		 struct sna_composite_channel *channel,
292103b705cfSriastradh		 int ox, int oy)
292203b705cfSriastradh{
292303b705cfSriastradh	PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict;
292403b705cfSriastradh	double dx, dy, dr, r1;
292503b705cfSriastradh	int n;
292603b705cfSriastradh
292703b705cfSriastradh	dx = xFixedToDouble(radial->c2.x - radial->c1.x);
292803b705cfSriastradh	dy = xFixedToDouble(radial->c2.y - radial->c1.y);
292903b705cfSriastradh	dr = xFixedToDouble(radial->c2.radius - radial->c1.radius);
293003b705cfSriastradh
293103b705cfSriastradh	r1 = xFixedToDouble(radial->c1.radius);
293203b705cfSriastradh
293303b705cfSriastradh	n = op->u.gen3.num_constants;
293403b705cfSriastradh	channel->u.gen3.constants = FS_C0 + n / 4;
293503b705cfSriastradh	if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
293603b705cfSriastradh		if (radial->c2.radius == radial->c1.radius) {
293703b705cfSriastradh			channel->u.gen3.type = SHADER_ZERO;
293803b705cfSriastradh			return 1;
293903b705cfSriastradh		}
294003b705cfSriastradh
294103b705cfSriastradh		op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr;
294203b705cfSriastradh		op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr;
294303b705cfSriastradh		op->u.gen3.constants[n++] = 1. / dr;
294403b705cfSriastradh		op->u.gen3.constants[n++] = -r1 / dr;
294503b705cfSriastradh
294603b705cfSriastradh		channel->u.gen3.mode = RADIAL_ONE;
294703b705cfSriastradh	} else {
294803b705cfSriastradh		op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x);
294903b705cfSriastradh		op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y);
295003b705cfSriastradh		op->u.gen3.constants[n++] = r1;
295103b705cfSriastradh		op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr);
295203b705cfSriastradh
295303b705cfSriastradh		op->u.gen3.constants[n++] = -2 * dx;
295403b705cfSriastradh		op->u.gen3.constants[n++] = -2 * dy;
295503b705cfSriastradh		op->u.gen3.constants[n++] = -2 * r1 * dr;
295603b705cfSriastradh		op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
295703b705cfSriastradh
295803b705cfSriastradh		channel->u.gen3.mode = RADIAL_TWO;
295903b705cfSriastradh	}
296003b705cfSriastradh
296103b705cfSriastradh	if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
296203b705cfSriastradh		return -1;
296303b705cfSriastradh
296403b705cfSriastradh	channel->u.gen3.type = SHADER_RADIAL;
296503b705cfSriastradh	op->u.gen3.num_constants = n;
296603b705cfSriastradh	return 1;
296703b705cfSriastradh}
296803b705cfSriastradh
296903b705cfSriastradhstatic bool
297003b705cfSriastradhsna_picture_is_clear(PicturePtr picture,
297103b705cfSriastradh		     int x, int y, int w, int h,
297203b705cfSriastradh		     uint32_t *color)
297303b705cfSriastradh{
297403b705cfSriastradh	struct sna_pixmap *priv;
297503b705cfSriastradh
297603b705cfSriastradh	if (!picture->pDrawable)
297703b705cfSriastradh		return false;
297803b705cfSriastradh
297903b705cfSriastradh	priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable));
298003b705cfSriastradh	if (priv == NULL || !priv->clear)
298103b705cfSriastradh		return false;
298203b705cfSriastradh
298303b705cfSriastradh	if (!source_is_covered(picture, x, y, w, h))
298403b705cfSriastradh		return false;
298503b705cfSriastradh
298603b705cfSriastradh	*color = priv->clear_color;
298703b705cfSriastradh	return true;
298803b705cfSriastradh}
298903b705cfSriastradh
299003b705cfSriastradhstatic int
299103b705cfSriastradhgen3_composite_picture(struct sna *sna,
299203b705cfSriastradh		       PicturePtr picture,
299303b705cfSriastradh		       struct sna_composite_op *op,
299403b705cfSriastradh		       struct sna_composite_channel *channel,
299503b705cfSriastradh		       int16_t x, int16_t y,
299603b705cfSriastradh		       int16_t w, int16_t h,
299703b705cfSriastradh		       int16_t dst_x, int16_t dst_y,
299803b705cfSriastradh		       bool precise)
299903b705cfSriastradh{
300003b705cfSriastradh	PixmapPtr pixmap;
300103b705cfSriastradh	uint32_t color;
300203b705cfSriastradh	int16_t dx, dy;
300303b705cfSriastradh
300403b705cfSriastradh	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
300503b705cfSriastradh	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
300603b705cfSriastradh
300703b705cfSriastradh	channel->card_format = 0;
300803b705cfSriastradh
300903b705cfSriastradh	if (picture->pDrawable == NULL) {
301003b705cfSriastradh		SourcePict *source = picture->pSourcePict;
301103b705cfSriastradh		int ret = -1;
301203b705cfSriastradh
301303b705cfSriastradh		switch (source->type) {
301403b705cfSriastradh		case SourcePictTypeSolidFill:
301503b705cfSriastradh			DBG(("%s: solid fill [%08x], format %08x\n",
301603b705cfSriastradh			     __FUNCTION__,
301703b705cfSriastradh			     (unsigned)source->solidFill.color,
301803b705cfSriastradh			     (unsigned)picture->format));
301903b705cfSriastradh			ret = gen3_init_solid(channel, source->solidFill.color);
302003b705cfSriastradh			break;
302103b705cfSriastradh
302203b705cfSriastradh		case SourcePictTypeLinear:
302303b705cfSriastradh			ret = gen3_init_linear(sna, picture, op, channel,
302403b705cfSriastradh					       x - dst_x, y - dst_y);
302503b705cfSriastradh			break;
302603b705cfSriastradh
302703b705cfSriastradh		case SourcePictTypeRadial:
302803b705cfSriastradh			ret = gen3_init_radial(sna, picture, op, channel,
302903b705cfSriastradh					       x - dst_x, y - dst_y);
303003b705cfSriastradh			break;
303103b705cfSriastradh		}
303203b705cfSriastradh
303303b705cfSriastradh		if (ret == -1) {
303403b705cfSriastradh			if (!precise)
303503b705cfSriastradh				ret = sna_render_picture_approximate_gradient(sna, picture, channel,
303603b705cfSriastradh									      x, y, w, h, dst_x, dst_y);
303703b705cfSriastradh			if (ret == -1)
303803b705cfSriastradh				ret = sna_render_picture_fixup(sna, picture, channel,
303903b705cfSriastradh							       x, y, w, h, dst_x, dst_y);
304003b705cfSriastradh		}
304103b705cfSriastradh		return ret;
304203b705cfSriastradh	}
304303b705cfSriastradh
304403b705cfSriastradh	if (picture->alphaMap) {
304503b705cfSriastradh		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
304603b705cfSriastradh		return sna_render_picture_fixup(sna, picture, channel,
304703b705cfSriastradh						x, y, w, h, dst_x, dst_y);
304803b705cfSriastradh	}
304903b705cfSriastradh
305003b705cfSriastradh	if (sna_picture_is_solid(picture, &color)) {
305103b705cfSriastradh		DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
305203b705cfSriastradh		return gen3_init_solid(channel, color);
305303b705cfSriastradh	}
305403b705cfSriastradh
305503b705cfSriastradh	if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
305603b705cfSriastradh		DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
305703b705cfSriastradh		return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
305803b705cfSriastradh	}
305903b705cfSriastradh
306003b705cfSriastradh	if (!gen3_check_repeat(picture))
306103b705cfSriastradh		return sna_render_picture_fixup(sna, picture, channel,
306203b705cfSriastradh						x, y, w, h, dst_x, dst_y);
306303b705cfSriastradh
306403b705cfSriastradh	if (!gen3_check_filter(picture))
306503b705cfSriastradh		return sna_render_picture_fixup(sna, picture, channel,
306603b705cfSriastradh						x, y, w, h, dst_x, dst_y);
306703b705cfSriastradh
306803b705cfSriastradh	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
306903b705cfSriastradh	channel->filter = picture->filter;
307003b705cfSriastradh	channel->pict_format = picture->format;
307103b705cfSriastradh
307203b705cfSriastradh	pixmap = get_drawable_pixmap(picture->pDrawable);
307303b705cfSriastradh	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
307403b705cfSriastradh
307503b705cfSriastradh	x += dx + picture->pDrawable->x;
307603b705cfSriastradh	y += dy + picture->pDrawable->y;
307703b705cfSriastradh
307803b705cfSriastradh	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
307903b705cfSriastradh		DBG(("%s: integer translation (%d, %d), removing\n",
308003b705cfSriastradh		     __FUNCTION__, dx, dy));
308103b705cfSriastradh		x += dx;
308203b705cfSriastradh		y += dy;
308303b705cfSriastradh		channel->transform = NULL;
308403b705cfSriastradh		channel->filter = PictFilterNearest;
308503b705cfSriastradh	} else {
308603b705cfSriastradh		channel->transform = picture->transform;
308703b705cfSriastradh		channel->is_affine = sna_transform_is_affine(picture->transform);
308803b705cfSriastradh	}
308903b705cfSriastradh
309003b705cfSriastradh	if (!gen3_composite_channel_set_format(channel, picture->format) &&
309103b705cfSriastradh	    !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
309203b705cfSriastradh		return sna_render_picture_convert(sna, picture, channel, pixmap,
309303b705cfSriastradh						  x, y, w, h, dst_x, dst_y,
309403b705cfSriastradh						  false);
309503b705cfSriastradh	assert(channel->card_format);
309603b705cfSriastradh
309703b705cfSriastradh	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
309803b705cfSriastradh		DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
309903b705cfSriastradh		     __FUNCTION__,
310003b705cfSriastradh		     pixmap->drawable.width, pixmap->drawable.height,
310103b705cfSriastradh		     x, y, w, h));
310203b705cfSriastradh		return sna_render_picture_extract(sna, picture, channel,
310303b705cfSriastradh						  x, y, w, h, dst_x, dst_y);
310403b705cfSriastradh	}
310503b705cfSriastradh
310603b705cfSriastradh	return sna_render_pixmap_bo(sna, channel, pixmap,
310703b705cfSriastradh				    x, y, w, h, dst_x, dst_y);
310803b705cfSriastradh}
310903b705cfSriastradh
311003b705cfSriastradhstatic inline bool
311103b705cfSriastradhsource_use_blt(struct sna *sna, PicturePtr picture)
311203b705cfSriastradh{
311303b705cfSriastradh	/* If it is a solid, try to use the BLT paths */
311403b705cfSriastradh	if (!picture->pDrawable)
311503b705cfSriastradh		return picture->pSourcePict->type == SourcePictTypeSolidFill;
311603b705cfSriastradh
311703b705cfSriastradh	if (picture->pDrawable->width  == 1 &&
311803b705cfSriastradh	    picture->pDrawable->height == 1 &&
311903b705cfSriastradh	    picture->repeat)
312003b705cfSriastradh		return true;
312103b705cfSriastradh
312203b705cfSriastradh	if (too_large(picture->pDrawable->width, picture->pDrawable->height))
312303b705cfSriastradh		return true;
312403b705cfSriastradh
312503b705cfSriastradh	return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER);
312603b705cfSriastradh}
312703b705cfSriastradh
312803b705cfSriastradhstatic bool
312903b705cfSriastradhtry_blt(struct sna *sna,
313003b705cfSriastradh	PicturePtr dst,
313103b705cfSriastradh	PicturePtr src,
313203b705cfSriastradh	int width, int height)
313303b705cfSriastradh{
313403b705cfSriastradh	if (sna->kgem.mode != KGEM_RENDER) {
313503b705cfSriastradh		DBG(("%s: already performing BLT\n", __FUNCTION__));
313603b705cfSriastradh		return true;
313703b705cfSriastradh	}
313803b705cfSriastradh
313903b705cfSriastradh	if (too_large(width, height)) {
314003b705cfSriastradh		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
314103b705cfSriastradh		     __FUNCTION__, width, height));
314203b705cfSriastradh		return true;
314303b705cfSriastradh	}
314403b705cfSriastradh
314503b705cfSriastradh	if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
314603b705cfSriastradh		DBG(("%s: target too large for 3D pipe (%d, %d)\n",
314703b705cfSriastradh		     __FUNCTION__,
314803b705cfSriastradh		     dst->pDrawable->width, dst->pDrawable->height));
314903b705cfSriastradh		return true;
315003b705cfSriastradh	}
315103b705cfSriastradh
315203b705cfSriastradh	/* is the source picture only in cpu memory e.g. a shm pixmap? */
315303b705cfSriastradh	return source_use_blt(sna, src);
315403b705cfSriastradh}
315503b705cfSriastradh
315603b705cfSriastradhstatic void
315703b705cfSriastradhgen3_align_vertex(struct sna *sna,
315803b705cfSriastradh		  const struct sna_composite_op *op)
315903b705cfSriastradh{
316003b705cfSriastradh	if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
316103b705cfSriastradh		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
316203b705cfSriastradh			gen3_vertex_finish(sna);
316303b705cfSriastradh
316403b705cfSriastradh		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
316503b705cfSriastradh		     sna->render_state.gen3.last_floats_per_vertex,
316603b705cfSriastradh		     op->floats_per_vertex,
316703b705cfSriastradh		     sna->render.vertex_index,
316803b705cfSriastradh		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
316903b705cfSriastradh		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
317003b705cfSriastradh		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
317103b705cfSriastradh		assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
317203b705cfSriastradh		sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
317303b705cfSriastradh	}
317403b705cfSriastradh}
317503b705cfSriastradh
317603b705cfSriastradhstatic bool
317703b705cfSriastradhgen3_composite_set_target(struct sna *sna,
317803b705cfSriastradh			  struct sna_composite_op *op,
317903b705cfSriastradh			  PicturePtr dst,
318003b705cfSriastradh			  int x, int y, int w, int h)
318103b705cfSriastradh{
318203b705cfSriastradh	BoxRec box;
318303b705cfSriastradh
318403b705cfSriastradh	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
318503b705cfSriastradh	op->dst.format = dst->format;
318603b705cfSriastradh	op->dst.width = op->dst.pixmap->drawable.width;
318703b705cfSriastradh	op->dst.height = op->dst.pixmap->drawable.height;
318803b705cfSriastradh
318903b705cfSriastradh	if (w && h) {
319003b705cfSriastradh		box.x1 = x;
319103b705cfSriastradh		box.y1 = y;
319203b705cfSriastradh		box.x2 = x + w;
319303b705cfSriastradh		box.y2 = y + h;
319403b705cfSriastradh	} else
319503b705cfSriastradh		sna_render_picture_extents(dst, &box);
319603b705cfSriastradh
319703b705cfSriastradh	op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
319803b705cfSriastradh					  PREFER_GPU | FORCE_GPU | RENDER_GPU,
319903b705cfSriastradh					  &box, &op->damage);
320003b705cfSriastradh	if (op->dst.bo == NULL)
320103b705cfSriastradh		return false;
320203b705cfSriastradh
320303b705cfSriastradh	assert(op->dst.bo->unique_id);
320403b705cfSriastradh
320503b705cfSriastradh	/* For single-stream mode there should be no minimum alignment
320603b705cfSriastradh	 * required, except that the width must be at least 2 elements.
320703b705cfSriastradh	 */
320803b705cfSriastradh	if (op->dst.bo->pitch < 2*op->dst.pixmap->drawable.bitsPerPixel) {
320903b705cfSriastradh		struct sna_pixmap *priv;
321003b705cfSriastradh
321103b705cfSriastradh		priv = sna_pixmap_move_to_gpu (op->dst.pixmap,
321203b705cfSriastradh					       MOVE_READ | MOVE_WRITE);
321303b705cfSriastradh		if (priv == NULL || priv->pinned)
321403b705cfSriastradh			return false;
321503b705cfSriastradh
321603b705cfSriastradh		if (priv->gpu_bo->pitch < 2*op->dst.pixmap->drawable.bitsPerPixel) {
321703b705cfSriastradh			struct kgem_bo *bo;
321803b705cfSriastradh
321903b705cfSriastradh			bo = kgem_replace_bo(&sna->kgem, priv->gpu_bo,
322003b705cfSriastradh					     op->dst.width, op->dst.height,
322103b705cfSriastradh					     2*op->dst.pixmap->drawable.bitsPerPixel,
322203b705cfSriastradh					     op->dst.pixmap->drawable.bitsPerPixel);
322303b705cfSriastradh			if (bo == NULL)
322403b705cfSriastradh				return false;
322503b705cfSriastradh
322603b705cfSriastradh			kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
322703b705cfSriastradh			priv->gpu_bo = bo;
322803b705cfSriastradh		}
322903b705cfSriastradh
323003b705cfSriastradh		op->dst.bo = priv->gpu_bo;
323103b705cfSriastradh		op->damage = &priv->gpu_damage;
323203b705cfSriastradh		if (sna_damage_is_all(op->damage,
323303b705cfSriastradh				      op->dst.width, op->dst.height))
323403b705cfSriastradh			op->damage = NULL;
323503b705cfSriastradh	}
323603b705cfSriastradh
323703b705cfSriastradh	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
323803b705cfSriastradh			    &op->dst.x, &op->dst.y);
323903b705cfSriastradh
324003b705cfSriastradh	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
324103b705cfSriastradh	     __FUNCTION__,
324203b705cfSriastradh	     op->dst.pixmap, (int)op->dst.format,
324303b705cfSriastradh	     op->dst.width, op->dst.height,
324403b705cfSriastradh	     op->dst.bo->pitch,
324503b705cfSriastradh	     op->dst.x, op->dst.y,
324603b705cfSriastradh	     op->damage ? *op->damage : (void *)-1));
324703b705cfSriastradh
324803b705cfSriastradh	assert(op->dst.bo->proxy == NULL);
324903b705cfSriastradh	return true;
325003b705cfSriastradh}
325103b705cfSriastradh
325203b705cfSriastradhstatic inline uint8_t
325303b705cfSriastradhmul_8_8(uint8_t a, uint8_t b)
325403b705cfSriastradh{
325503b705cfSriastradh    uint16_t t = a * (uint16_t)b + 0x7f;
325603b705cfSriastradh    return ((t >> 8) + t) >> 8;
325703b705cfSriastradh}
325803b705cfSriastradh
325903b705cfSriastradhstatic inline uint32_t multa(uint32_t s, uint32_t m, int shift)
326003b705cfSriastradh{
326103b705cfSriastradh	return mul_8_8((s >> shift) & 0xff, m >> 24) << shift;
326203b705cfSriastradh}
326303b705cfSriastradh
326403b705cfSriastradhstatic inline bool is_constant_ps(uint32_t type)
326503b705cfSriastradh{
326603b705cfSriastradh	switch (type) {
326703b705cfSriastradh	case SHADER_NONE: /* be warned! */
326803b705cfSriastradh	case SHADER_ZERO:
326903b705cfSriastradh	case SHADER_BLACK:
327003b705cfSriastradh	case SHADER_WHITE:
327103b705cfSriastradh	case SHADER_CONSTANT:
327203b705cfSriastradh		return true;
327303b705cfSriastradh	default:
327403b705cfSriastradh		return false;
327503b705cfSriastradh	}
327603b705cfSriastradh}
327703b705cfSriastradh
327803b705cfSriastradhstatic bool
327903b705cfSriastradhhas_alphamap(PicturePtr p)
328003b705cfSriastradh{
328103b705cfSriastradh	return p->alphaMap != NULL;
328203b705cfSriastradh}
328303b705cfSriastradh
328403b705cfSriastradhstatic bool
328503b705cfSriastradhneed_upload(PicturePtr p)
328603b705cfSriastradh{
328703b705cfSriastradh	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
328803b705cfSriastradh}
328903b705cfSriastradh
329003b705cfSriastradhstatic bool
329103b705cfSriastradhsource_is_busy(PixmapPtr pixmap)
329203b705cfSriastradh{
329303b705cfSriastradh	struct sna_pixmap *priv = sna_pixmap(pixmap);
329403b705cfSriastradh	if (priv == NULL)
329503b705cfSriastradh		return false;
329603b705cfSriastradh
329703b705cfSriastradh	if (priv->clear)
329803b705cfSriastradh		return false;
329903b705cfSriastradh
330003b705cfSriastradh	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
330103b705cfSriastradh		return true;
330203b705cfSriastradh
330303b705cfSriastradh	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
330403b705cfSriastradh		return true;
330503b705cfSriastradh
330603b705cfSriastradh	return priv->gpu_damage && !priv->cpu_damage;
330703b705cfSriastradh}
330803b705cfSriastradh
330903b705cfSriastradhstatic bool
331003b705cfSriastradhis_unhandled_gradient(PicturePtr picture, bool precise)
331103b705cfSriastradh{
331203b705cfSriastradh	if (picture->pDrawable)
331303b705cfSriastradh		return false;
331403b705cfSriastradh
331503b705cfSriastradh	switch (picture->pSourcePict->type) {
331603b705cfSriastradh	case SourcePictTypeSolidFill:
331703b705cfSriastradh	case SourcePictTypeLinear:
331803b705cfSriastradh	case SourcePictTypeRadial:
331903b705cfSriastradh		return false;
332003b705cfSriastradh	default:
332103b705cfSriastradh		return precise;
332203b705cfSriastradh	}
332303b705cfSriastradh}
332403b705cfSriastradh
332503b705cfSriastradhstatic bool
332603b705cfSriastradhsource_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
332703b705cfSriastradh{
332803b705cfSriastradh	if (sna_picture_is_solid(p, NULL))
332903b705cfSriastradh		return false;
333003b705cfSriastradh
333103b705cfSriastradh	if (is_unhandled_gradient(p, precise))
333203b705cfSriastradh		return true;
333303b705cfSriastradh
333403b705cfSriastradh	if (!gen3_check_xformat(p) || !gen3_check_repeat(p))
333503b705cfSriastradh		return true;
333603b705cfSriastradh
333703b705cfSriastradh	if (pixmap && source_is_busy(pixmap))
333803b705cfSriastradh		return false;
333903b705cfSriastradh
334003b705cfSriastradh	return has_alphamap(p) || !gen3_check_filter(p) || need_upload(p);
334103b705cfSriastradh}
334203b705cfSriastradh
334303b705cfSriastradhstatic bool
334403b705cfSriastradhgen3_composite_fallback(struct sna *sna,
334503b705cfSriastradh			uint8_t op,
334603b705cfSriastradh			PicturePtr src,
334703b705cfSriastradh			PicturePtr mask,
334803b705cfSriastradh			PicturePtr dst)
334903b705cfSriastradh{
335003b705cfSriastradh	PixmapPtr src_pixmap;
335103b705cfSriastradh	PixmapPtr mask_pixmap;
335203b705cfSriastradh	PixmapPtr dst_pixmap;
335303b705cfSriastradh	bool src_fallback, mask_fallback;
335403b705cfSriastradh
335503b705cfSriastradh	if (!gen3_check_dst_format(dst->format)) {
335603b705cfSriastradh		DBG(("%s: unknown destination format: %d\n",
335703b705cfSriastradh		     __FUNCTION__, dst->format));
335803b705cfSriastradh		return true;
335903b705cfSriastradh	}
336003b705cfSriastradh
336103b705cfSriastradh	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
336203b705cfSriastradh
336303b705cfSriastradh	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
336403b705cfSriastradh	src_fallback = source_fallback(src, src_pixmap,
336503b705cfSriastradh				       dst->polyMode == PolyModePrecise);
336603b705cfSriastradh
336703b705cfSriastradh	if (mask) {
336803b705cfSriastradh		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
336903b705cfSriastradh		mask_fallback = source_fallback(mask, mask_pixmap,
337003b705cfSriastradh						dst->polyMode == PolyModePrecise);
337103b705cfSriastradh	} else {
337203b705cfSriastradh		mask_pixmap = NULL;
337303b705cfSriastradh		mask_fallback = false;
337403b705cfSriastradh	}
337503b705cfSriastradh
337603b705cfSriastradh	/* If we are using the destination as a source and need to
337703b705cfSriastradh	 * readback in order to upload the source, do it all
337803b705cfSriastradh	 * on the cpu.
337903b705cfSriastradh	 */
338003b705cfSriastradh	if (src_pixmap == dst_pixmap && src_fallback) {
338103b705cfSriastradh		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
338203b705cfSriastradh		return true;
338303b705cfSriastradh	}
338403b705cfSriastradh	if (mask_pixmap == dst_pixmap && mask_fallback) {
338503b705cfSriastradh		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
338603b705cfSriastradh		return true;
338703b705cfSriastradh	}
338803b705cfSriastradh
338903b705cfSriastradh	if (mask &&
339003b705cfSriastradh	    mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
339103b705cfSriastradh	    gen3_blend_op[op].src_alpha &&
339203b705cfSriastradh	    gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
339303b705cfSriastradh	    op != PictOpOver) {
339403b705cfSriastradh		DBG(("%s: component-alpha mask with op=%d, should fallback\n",
339503b705cfSriastradh		     __FUNCTION__, op));
339603b705cfSriastradh		return true;
339703b705cfSriastradh	}
339803b705cfSriastradh
339903b705cfSriastradh	/* If anything is on the GPU, push everything out to the GPU */
340003b705cfSriastradh	if (dst_use_gpu(dst_pixmap)) {
340103b705cfSriastradh		DBG(("%s: dst is already on the GPU, try to use GPU\n",
340203b705cfSriastradh		     __FUNCTION__));
340303b705cfSriastradh		return false;
340403b705cfSriastradh	}
340503b705cfSriastradh
340603b705cfSriastradh	if (src_pixmap && !src_fallback) {
340703b705cfSriastradh		DBG(("%s: src is already on the GPU, try to use GPU\n",
340803b705cfSriastradh		     __FUNCTION__));
340903b705cfSriastradh		return false;
341003b705cfSriastradh	}
341103b705cfSriastradh	if (mask_pixmap && !mask_fallback) {
341203b705cfSriastradh		DBG(("%s: mask is already on the GPU, try to use GPU\n",
341303b705cfSriastradh		     __FUNCTION__));
341403b705cfSriastradh		return false;
341503b705cfSriastradh	}
341603b705cfSriastradh
341703b705cfSriastradh	/* However if the dst is not on the GPU and we need to
341803b705cfSriastradh	 * render one of the sources using the CPU, we may
341903b705cfSriastradh	 * as well do the entire operation in place onthe CPU.
342003b705cfSriastradh	 */
342103b705cfSriastradh	if (src_fallback) {
342203b705cfSriastradh		DBG(("%s: dst is on the CPU and src will fallback\n",
342303b705cfSriastradh		     __FUNCTION__));
342403b705cfSriastradh		return true;
342503b705cfSriastradh	}
342603b705cfSriastradh
342703b705cfSriastradh	if (mask && mask_fallback) {
342803b705cfSriastradh		DBG(("%s: dst is on the CPU and mask will fallback\n",
342903b705cfSriastradh		     __FUNCTION__));
343003b705cfSriastradh		return true;
343103b705cfSriastradh	}
343203b705cfSriastradh
343303b705cfSriastradh	if (too_large(dst_pixmap->drawable.width,
343403b705cfSriastradh		      dst_pixmap->drawable.height) &&
343503b705cfSriastradh	    dst_is_cpu(dst_pixmap)) {
343603b705cfSriastradh		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
343703b705cfSriastradh		return true;
343803b705cfSriastradh	}
343903b705cfSriastradh
344003b705cfSriastradh	DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
344103b705cfSriastradh	     __FUNCTION__, dst_use_cpu(dst_pixmap)));
344203b705cfSriastradh	return dst_use_cpu(dst_pixmap);
344303b705cfSriastradh}
344403b705cfSriastradh
344503b705cfSriastradhstatic int
344603b705cfSriastradhreuse_source(struct sna *sna,
344703b705cfSriastradh	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
344803b705cfSriastradh	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
344903b705cfSriastradh{
345003b705cfSriastradh	if (src_x != msk_x || src_y != msk_y)
345103b705cfSriastradh		return false;
345203b705cfSriastradh
345303b705cfSriastradh	if (mask == src) {
345403b705cfSriastradh		*mc = *sc;
345503b705cfSriastradh		if (mc->bo)
345603b705cfSriastradh			kgem_bo_reference(mc->bo);
345703b705cfSriastradh		return true;
345803b705cfSriastradh	}
345903b705cfSriastradh
346003b705cfSriastradh	if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable))
346103b705cfSriastradh		return false;
346203b705cfSriastradh
346303b705cfSriastradh	if (sc->is_solid)
346403b705cfSriastradh		return false;
346503b705cfSriastradh
346603b705cfSriastradh	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
346703b705cfSriastradh
346803b705cfSriastradh	if (!sna_transform_equal(src->transform, mask->transform))
346903b705cfSriastradh		return false;
347003b705cfSriastradh
347103b705cfSriastradh	if (!sna_picture_alphamap_equal(src, mask))
347203b705cfSriastradh		return false;
347303b705cfSriastradh
347403b705cfSriastradh	if (!gen3_check_repeat(mask))
347503b705cfSriastradh		return false;
347603b705cfSriastradh
347703b705cfSriastradh	if (!gen3_check_filter(mask))
347803b705cfSriastradh		return false;
347903b705cfSriastradh
348003b705cfSriastradh	if (!gen3_check_format(mask))
348103b705cfSriastradh		return false;
348203b705cfSriastradh
348303b705cfSriastradh	DBG(("%s: reusing source channel for mask with a twist\n",
348403b705cfSriastradh	     __FUNCTION__));
348503b705cfSriastradh
348603b705cfSriastradh	*mc = *sc;
348703b705cfSriastradh	mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone);
348803b705cfSriastradh	mc->filter = gen3_filter(mask->filter);
348903b705cfSriastradh	mc->pict_format = mask->format;
349003b705cfSriastradh	gen3_composite_channel_set_format(mc, mask->format);
349103b705cfSriastradh	assert(mc->card_format);
349203b705cfSriastradh	if (mc->bo)
349303b705cfSriastradh		kgem_bo_reference(mc->bo);
349403b705cfSriastradh	return true;
349503b705cfSriastradh}
349603b705cfSriastradh
349703b705cfSriastradhstatic bool
349803b705cfSriastradhgen3_render_composite(struct sna *sna,
349903b705cfSriastradh		      uint8_t op,
350003b705cfSriastradh		      PicturePtr src,
350103b705cfSriastradh		      PicturePtr mask,
350203b705cfSriastradh		      PicturePtr dst,
350303b705cfSriastradh		      int16_t src_x,  int16_t src_y,
350403b705cfSriastradh		      int16_t mask_x, int16_t mask_y,
350503b705cfSriastradh		      int16_t dst_x,  int16_t dst_y,
350603b705cfSriastradh		      int16_t width,  int16_t height,
350703b705cfSriastradh		      struct sna_composite_op *tmp)
350803b705cfSriastradh{
350903b705cfSriastradh	DBG(("%s()\n", __FUNCTION__));
351003b705cfSriastradh
351103b705cfSriastradh	if (op >= ARRAY_SIZE(gen3_blend_op)) {
351203b705cfSriastradh		DBG(("%s: fallback due to unhandled blend op: %d\n",
351303b705cfSriastradh		     __FUNCTION__, op));
351403b705cfSriastradh		return false;
351503b705cfSriastradh	}
351603b705cfSriastradh
351703b705cfSriastradh	/* Try to use the BLT engine unless it implies a
351803b705cfSriastradh	 * 3D -> 2D context switch.
351903b705cfSriastradh	 */
352003b705cfSriastradh	if (mask == NULL &&
352103b705cfSriastradh	    try_blt(sna, dst, src, width, height) &&
352203b705cfSriastradh	    sna_blt_composite(sna,
352303b705cfSriastradh			      op, src, dst,
352403b705cfSriastradh			      src_x, src_y,
352503b705cfSriastradh			      dst_x, dst_y,
352603b705cfSriastradh			      width, height,
352703b705cfSriastradh			      tmp, false))
352803b705cfSriastradh		return true;
352903b705cfSriastradh
353003b705cfSriastradh	if (gen3_composite_fallback(sna, op, src, mask, dst))
353103b705cfSriastradh		return false;
353203b705cfSriastradh
353303b705cfSriastradh	if (need_tiling(sna, width, height))
353403b705cfSriastradh		return sna_tiling_composite(op, src, mask, dst,
353503b705cfSriastradh					    src_x,  src_y,
353603b705cfSriastradh					    mask_x, mask_y,
353703b705cfSriastradh					    dst_x,  dst_y,
353803b705cfSriastradh					    width,  height,
353903b705cfSriastradh					    tmp);
354003b705cfSriastradh
354103b705cfSriastradh	if (!gen3_composite_set_target(sna, tmp, dst,
354203b705cfSriastradh				       dst_x, dst_y, width, height)) {
354303b705cfSriastradh		DBG(("%s: unable to set render target\n",
354403b705cfSriastradh		     __FUNCTION__));
354503b705cfSriastradh		return false;
354603b705cfSriastradh	}
354703b705cfSriastradh
354803b705cfSriastradh	tmp->op = op;
354903b705cfSriastradh	tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
355003b705cfSriastradh	if (too_large(tmp->dst.width, tmp->dst.height) ||
355103b705cfSriastradh	    !gen3_check_pitch_3d(tmp->dst.bo)) {
355203b705cfSriastradh		if (!sna_render_composite_redirect(sna, tmp,
355303b705cfSriastradh						   dst_x, dst_y, width, height,
355403b705cfSriastradh						   op > PictOpSrc || dst->pCompositeClip->data))
355503b705cfSriastradh			return false;
355603b705cfSriastradh	}
355703b705cfSriastradh
355803b705cfSriastradh	tmp->u.gen3.num_constants = 0;
355903b705cfSriastradh	tmp->src.u.gen3.type = SHADER_TEXTURE;
356003b705cfSriastradh	tmp->src.is_affine = true;
356103b705cfSriastradh	DBG(("%s: preparing source\n", __FUNCTION__));
356203b705cfSriastradh	switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
356303b705cfSriastradh				       src_x, src_y,
356403b705cfSriastradh				       width, height,
356503b705cfSriastradh				       dst_x, dst_y,
356603b705cfSriastradh				       dst->polyMode == PolyModePrecise)) {
356703b705cfSriastradh	case -1:
356803b705cfSriastradh		goto cleanup_dst;
356903b705cfSriastradh	case 0:
357003b705cfSriastradh		tmp->src.u.gen3.type = SHADER_ZERO;
357103b705cfSriastradh		break;
357203b705cfSriastradh	case 1:
357303b705cfSriastradh		if (mask == NULL && tmp->src.bo &&
357403b705cfSriastradh		    sna_blt_composite__convert(sna,
357503b705cfSriastradh					       dst_x, dst_y, width, height,
357603b705cfSriastradh					       tmp))
357703b705cfSriastradh			return true;
357803b705cfSriastradh
357903b705cfSriastradh		gen3_composite_channel_convert(&tmp->src);
358003b705cfSriastradh		break;
358103b705cfSriastradh	}
358203b705cfSriastradh	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
358303b705cfSriastradh
358403b705cfSriastradh	tmp->mask.u.gen3.type = SHADER_NONE;
358503b705cfSriastradh	tmp->mask.is_affine = true;
358603b705cfSriastradh	tmp->need_magic_ca_pass = false;
358703b705cfSriastradh	tmp->has_component_alpha = false;
358803b705cfSriastradh	if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
358903b705cfSriastradh		if (!reuse_source(sna,
359003b705cfSriastradh				  src, &tmp->src, src_x, src_y,
359103b705cfSriastradh				  mask, &tmp->mask, mask_x, mask_y)) {
359203b705cfSriastradh			tmp->mask.u.gen3.type = SHADER_TEXTURE;
359303b705cfSriastradh			DBG(("%s: preparing mask\n", __FUNCTION__));
359403b705cfSriastradh			switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
359503b705cfSriastradh						       mask_x, mask_y,
359603b705cfSriastradh						       width,  height,
359703b705cfSriastradh						       dst_x,  dst_y,
359803b705cfSriastradh						       dst->polyMode == PolyModePrecise)) {
359903b705cfSriastradh			case -1:
360003b705cfSriastradh				goto cleanup_src;
360103b705cfSriastradh			case 0:
360203b705cfSriastradh				tmp->mask.u.gen3.type = SHADER_ZERO;
360303b705cfSriastradh				break;
360403b705cfSriastradh			case 1:
360503b705cfSriastradh				gen3_composite_channel_convert(&tmp->mask);
360603b705cfSriastradh				break;
360703b705cfSriastradh			}
360803b705cfSriastradh		}
360903b705cfSriastradh		DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
361003b705cfSriastradh		if (tmp->mask.u.gen3.type == SHADER_ZERO) {
361103b705cfSriastradh			if (tmp->src.bo) {
361203b705cfSriastradh				kgem_bo_destroy(&sna->kgem,
361303b705cfSriastradh						tmp->src.bo);
361403b705cfSriastradh				tmp->src.bo = NULL;
361503b705cfSriastradh			}
361603b705cfSriastradh			tmp->src.u.gen3.type = SHADER_ZERO;
361703b705cfSriastradh			tmp->mask.u.gen3.type = SHADER_NONE;
361803b705cfSriastradh		}
361903b705cfSriastradh
362003b705cfSriastradh		if (tmp->mask.u.gen3.type != SHADER_NONE) {
362103b705cfSriastradh			if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
362203b705cfSriastradh				/* Check if it's component alpha that relies on a source alpha
362303b705cfSriastradh				 * and on the source value.  We can only get one of those
362403b705cfSriastradh				 * into the single source value that we get to blend with.
362503b705cfSriastradh				 */
362603b705cfSriastradh				DBG(("%s: component-alpha mask: %d\n",
362703b705cfSriastradh				     __FUNCTION__, tmp->mask.u.gen3.type));
362803b705cfSriastradh				tmp->has_component_alpha = true;
362903b705cfSriastradh				if (tmp->mask.u.gen3.type == SHADER_WHITE) {
363003b705cfSriastradh					tmp->mask.u.gen3.type = SHADER_NONE;
363103b705cfSriastradh					tmp->has_component_alpha = false;
363203b705cfSriastradh				} else if (gen3_blend_op[op].src_alpha &&
363303b705cfSriastradh					   gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
363403b705cfSriastradh					if (op != PictOpOver)
363503b705cfSriastradh						goto cleanup_mask;
363603b705cfSriastradh
363703b705cfSriastradh					tmp->need_magic_ca_pass = true;
363803b705cfSriastradh					tmp->op = PictOpOutReverse;
363903b705cfSriastradh				}
364003b705cfSriastradh			} else {
364103b705cfSriastradh				if (tmp->mask.is_opaque) {
364203b705cfSriastradh					tmp->mask.u.gen3.type = SHADER_NONE;
364303b705cfSriastradh				} else if (is_constant_ps(tmp->src.u.gen3.type) &&
364403b705cfSriastradh					   is_constant_ps(tmp->mask.u.gen3.type)) {
364503b705cfSriastradh					uint32_t v;
364603b705cfSriastradh
364703b705cfSriastradh					v = multa(tmp->src.u.gen3.mode,
364803b705cfSriastradh						  tmp->mask.u.gen3.mode,
364903b705cfSriastradh						  24);
365003b705cfSriastradh					v |= multa(tmp->src.u.gen3.mode,
365103b705cfSriastradh						   tmp->mask.u.gen3.mode,
365203b705cfSriastradh						   16);
365303b705cfSriastradh					v |= multa(tmp->src.u.gen3.mode,
365403b705cfSriastradh						   tmp->mask.u.gen3.mode,
365503b705cfSriastradh						   8);
365603b705cfSriastradh					v |= multa(tmp->src.u.gen3.mode,
365703b705cfSriastradh						   tmp->mask.u.gen3.mode,
365803b705cfSriastradh						   0);
365903b705cfSriastradh
366003b705cfSriastradh					DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
366103b705cfSriastradh					     __FUNCTION__,
366203b705cfSriastradh					     tmp->src.u.gen3.mode,
366303b705cfSriastradh					     tmp->mask.u.gen3.mode,
366403b705cfSriastradh					     v));
366503b705cfSriastradh
366603b705cfSriastradh					tmp->src.u.gen3.type = SHADER_CONSTANT;
366703b705cfSriastradh					tmp->src.u.gen3.mode = v;
366803b705cfSriastradh					tmp->src.is_opaque = false;
366903b705cfSriastradh
367003b705cfSriastradh					tmp->mask.u.gen3.type = SHADER_NONE;
367103b705cfSriastradh				}
367203b705cfSriastradh			}
367303b705cfSriastradh		}
367403b705cfSriastradh	}
367503b705cfSriastradh	DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
367603b705cfSriastradh	     tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
367703b705cfSriastradh	     tmp->src.is_affine, tmp->mask.is_affine));
367803b705cfSriastradh
367903b705cfSriastradh	tmp->prim_emit = gen3_emit_composite_primitive;
368003b705cfSriastradh	if (is_constant_ps(tmp->mask.u.gen3.type)) {
368103b705cfSriastradh		switch (tmp->src.u.gen3.type) {
368203b705cfSriastradh		case SHADER_NONE:
368303b705cfSriastradh		case SHADER_ZERO:
368403b705cfSriastradh		case SHADER_BLACK:
368503b705cfSriastradh		case SHADER_WHITE:
368603b705cfSriastradh		case SHADER_CONSTANT:
368703b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
368803b705cfSriastradh			if (sna->cpu_features & SSE2) {
368903b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2;
369003b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2;
369103b705cfSriastradh			} else
369203b705cfSriastradh#endif
369303b705cfSriastradh			{
369403b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_primitive_constant;
369503b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_boxes_constant;
369603b705cfSriastradh			}
369703b705cfSriastradh
369803b705cfSriastradh			break;
369903b705cfSriastradh		case SHADER_LINEAR:
370003b705cfSriastradh		case SHADER_RADIAL:
370103b705cfSriastradh			if (tmp->src.transform == NULL) {
370203b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
370303b705cfSriastradh				if (sna->cpu_features & SSE2) {
370403b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2;
370503b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2;
370603b705cfSriastradh				} else
370703b705cfSriastradh#endif
370803b705cfSriastradh				{
370903b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
371003b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
371103b705cfSriastradh				}
371203b705cfSriastradh			} else if (tmp->src.is_affine) {
371303b705cfSriastradh				tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2];
371403b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
371503b705cfSriastradh				if (sna->cpu_features & SSE2) {
371603b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2;
371703b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2;
371803b705cfSriastradh				} else
371903b705cfSriastradh#endif
372003b705cfSriastradh				{
372103b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
372203b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
372303b705cfSriastradh				}
372403b705cfSriastradh			}
372503b705cfSriastradh			break;
372603b705cfSriastradh		case SHADER_TEXTURE:
372703b705cfSriastradh			if (tmp->src.transform == NULL) {
372803b705cfSriastradh				if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
372903b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
373003b705cfSriastradh					if (sna->cpu_features & SSE2) {
373103b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2;
373203b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2;
373303b705cfSriastradh					} else
373403b705cfSriastradh#endif
373503b705cfSriastradh					{
373603b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
373703b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
373803b705cfSriastradh					}
373903b705cfSriastradh				} else {
374003b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
374103b705cfSriastradh					if (sna->cpu_features & SSE2) {
374203b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2;
374303b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2;
374403b705cfSriastradh					} else
374503b705cfSriastradh#endif
374603b705cfSriastradh					{
374703b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
374803b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
374903b705cfSriastradh					}
375003b705cfSriastradh				}
375103b705cfSriastradh			} else if (tmp->src.is_affine) {
375203b705cfSriastradh				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
375303b705cfSriastradh				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
375403b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
375503b705cfSriastradh				if (sna->cpu_features & SSE2) {
375603b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2;
375703b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2;
375803b705cfSriastradh				} else
375903b705cfSriastradh#endif
376003b705cfSriastradh				{
376103b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
376203b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
376303b705cfSriastradh				}
376403b705cfSriastradh			}
376503b705cfSriastradh			break;
376603b705cfSriastradh		}
376703b705cfSriastradh	} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
376803b705cfSriastradh		if (tmp->mask.transform == NULL) {
376903b705cfSriastradh			if (is_constant_ps(tmp->src.u.gen3.type)) {
377003b705cfSriastradh				if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
377103b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
377203b705cfSriastradh					if (sna->cpu_features & SSE2) {
377303b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2;
377403b705cfSriastradh					} else
377503b705cfSriastradh#endif
377603b705cfSriastradh					{
377703b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
377803b705cfSriastradh					}
377903b705cfSriastradh				} else {
378003b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
378103b705cfSriastradh					if (sna->cpu_features & SSE2) {
378203b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2;
378303b705cfSriastradh					} else
378403b705cfSriastradh#endif
378503b705cfSriastradh					{
378603b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
378703b705cfSriastradh					}
378803b705cfSriastradh				}
378903b705cfSriastradh			} else if (tmp->src.transform == NULL) {
379003b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
379103b705cfSriastradh				if (sna->cpu_features & SSE2) {
379203b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2;
379303b705cfSriastradh				} else
379403b705cfSriastradh#endif
379503b705cfSriastradh				{
379603b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
379703b705cfSriastradh				}
379803b705cfSriastradh			} else if (tmp->src.is_affine) {
379903b705cfSriastradh				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
380003b705cfSriastradh				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
380103b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
380203b705cfSriastradh				if (sna->cpu_features & SSE2) {
380303b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2;
380403b705cfSriastradh				} else
380503b705cfSriastradh#endif
380603b705cfSriastradh				{
380703b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
380803b705cfSriastradh				}
380903b705cfSriastradh			}
381003b705cfSriastradh		}
381103b705cfSriastradh	}
381203b705cfSriastradh
381303b705cfSriastradh	tmp->floats_per_vertex = 2;
381403b705cfSriastradh	if (!is_constant_ps(tmp->src.u.gen3.type))
381503b705cfSriastradh		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
381603b705cfSriastradh	if (!is_constant_ps(tmp->mask.u.gen3.type))
381703b705cfSriastradh		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
381803b705cfSriastradh	DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
381903b705cfSriastradh	     !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
382003b705cfSriastradh	     !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
382103b705cfSriastradh	     tmp->floats_per_vertex,
382203b705cfSriastradh	     tmp->prim_emit != gen3_emit_composite_primitive));
382303b705cfSriastradh	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
382403b705cfSriastradh
382503b705cfSriastradh	tmp->blt   = gen3_render_composite_blt;
382603b705cfSriastradh	tmp->box   = gen3_render_composite_box;
382703b705cfSriastradh	tmp->boxes = gen3_render_composite_boxes__blt;
382803b705cfSriastradh	if (tmp->emit_boxes) {
382903b705cfSriastradh		tmp->boxes = gen3_render_composite_boxes;
383003b705cfSriastradh		tmp->thread_boxes = gen3_render_composite_boxes__thread;
383103b705cfSriastradh	}
383203b705cfSriastradh	tmp->done  = gen3_render_composite_done;
383303b705cfSriastradh
383403b705cfSriastradh	if (!kgem_check_bo(&sna->kgem,
383503b705cfSriastradh			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
383603b705cfSriastradh			   NULL)) {
383703b705cfSriastradh		kgem_submit(&sna->kgem);
383803b705cfSriastradh		if (!kgem_check_bo(&sna->kgem,
383903b705cfSriastradh				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
384003b705cfSriastradh				   NULL))
384103b705cfSriastradh			goto cleanup_mask;
384203b705cfSriastradh	}
384303b705cfSriastradh
384403b705cfSriastradh	gen3_emit_composite_state(sna, tmp);
384503b705cfSriastradh	gen3_align_vertex(sna, tmp);
384603b705cfSriastradh	return true;
384703b705cfSriastradh
384803b705cfSriastradhcleanup_mask:
384903b705cfSriastradh	if (tmp->mask.bo)
385003b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
385103b705cfSriastradhcleanup_src:
385203b705cfSriastradh	if (tmp->src.bo)
385303b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
385403b705cfSriastradhcleanup_dst:
385503b705cfSriastradh	if (tmp->redirect.real_bo)
385603b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
385703b705cfSriastradh	return false;
385803b705cfSriastradh}
385903b705cfSriastradh
386003b705cfSriastradhstatic void
386103b705cfSriastradhgen3_emit_composite_spans_vertex(struct sna *sna,
386203b705cfSriastradh				 const struct sna_composite_spans_op *op,
386303b705cfSriastradh				 int16_t x, int16_t y,
386403b705cfSriastradh				 float opacity)
386503b705cfSriastradh{
386603b705cfSriastradh	gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
386703b705cfSriastradh	gen3_emit_composite_texcoord(sna, &op->base.src, x, y);
386803b705cfSriastradh	OUT_VERTEX(opacity);
386903b705cfSriastradh}
387003b705cfSriastradh
387103b705cfSriastradhfastcall static void
387203b705cfSriastradhgen3_emit_composite_spans_primitive_zero(struct sna *sna,
387303b705cfSriastradh					 const struct sna_composite_spans_op *op,
387403b705cfSriastradh					 const BoxRec *box,
387503b705cfSriastradh					 float opacity)
387603b705cfSriastradh{
387703b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
387803b705cfSriastradh	sna->render.vertex_used += 6;
387903b705cfSriastradh
388003b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
388103b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
388203b705cfSriastradh
388303b705cfSriastradh	v[2] = op->base.dst.x + box->x1;
388403b705cfSriastradh	v[3] = v[1];
388503b705cfSriastradh
388603b705cfSriastradh	v[4] = v[2];
388703b705cfSriastradh	v[5] = op->base.dst.x + box->y1;
388803b705cfSriastradh}
388903b705cfSriastradh
389003b705cfSriastradhfastcall static void
389103b705cfSriastradhgen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op,
389203b705cfSriastradh						const struct sna_opacity_box *b,
389303b705cfSriastradh						int nbox, float *v)
389403b705cfSriastradh{
389503b705cfSriastradh	do {
389603b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
389703b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
389803b705cfSriastradh
389903b705cfSriastradh		v[2] = op->base.dst.x + b->box.x1;
390003b705cfSriastradh		v[3] = v[1];
390103b705cfSriastradh
390203b705cfSriastradh		v[4] = v[2];
390303b705cfSriastradh		v[5] = op->base.dst.x + b->box.y1;
390403b705cfSriastradh
390503b705cfSriastradh		v += 6;
390603b705cfSriastradh		b++;
390703b705cfSriastradh	} while (--nbox);
390803b705cfSriastradh}
390903b705cfSriastradh
391003b705cfSriastradhfastcall static void
391103b705cfSriastradhgen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
391203b705cfSriastradh						   const struct sna_composite_spans_op *op,
391303b705cfSriastradh						   const BoxRec *box,
391403b705cfSriastradh						   float opacity)
391503b705cfSriastradh{
391603b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
391703b705cfSriastradh	sna->render.vertex_used += 6;
391803b705cfSriastradh
391903b705cfSriastradh	v[0] = box->x2;
392003b705cfSriastradh	v[3] = v[1] = box->y2;
392103b705cfSriastradh	v[4] = v[2] = box->x1;
392203b705cfSriastradh	v[5] = box->y1;
392303b705cfSriastradh}
392403b705cfSriastradh
392503b705cfSriastradhfastcall static void
392603b705cfSriastradhgen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op,
392703b705cfSriastradh							  const struct sna_opacity_box *b,
392803b705cfSriastradh							  int nbox, float *v)
392903b705cfSriastradh{
393003b705cfSriastradh	do {
393103b705cfSriastradh		v[0] = b->box.x2;
393203b705cfSriastradh		v[3] = v[1] = b->box.y2;
393303b705cfSriastradh		v[4] = v[2] = b->box.x1;
393403b705cfSriastradh		v[5] = b->box.y1;
393503b705cfSriastradh
393603b705cfSriastradh		b++;
393703b705cfSriastradh		v += 6;
393803b705cfSriastradh	} while (--nbox);
393903b705cfSriastradh}
394003b705cfSriastradh
394103b705cfSriastradhfastcall static void
394203b705cfSriastradhgen3_emit_composite_spans_primitive_constant(struct sna *sna,
394303b705cfSriastradh					     const struct sna_composite_spans_op *op,
394403b705cfSriastradh					     const BoxRec *box,
394503b705cfSriastradh					     float opacity)
394603b705cfSriastradh{
394703b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
394803b705cfSriastradh	sna->render.vertex_used += 9;
394903b705cfSriastradh
395003b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
395103b705cfSriastradh	v[6] = v[3] = op->base.dst.x + box->x1;
395203b705cfSriastradh	v[4] = v[1] = op->base.dst.y + box->y2;
395303b705cfSriastradh	v[7] = op->base.dst.y + box->y1;
395403b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
395503b705cfSriastradh}
395603b705cfSriastradh
395703b705cfSriastradhfastcall static void
395803b705cfSriastradhgen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op,
395903b705cfSriastradh						    const struct sna_opacity_box *b,
396003b705cfSriastradh						    int nbox,
396103b705cfSriastradh						    float *v)
396203b705cfSriastradh{
396303b705cfSriastradh	do {
396403b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
396503b705cfSriastradh		v[6] = v[3] = op->base.dst.x + b->box.x1;
396603b705cfSriastradh		v[4] = v[1] = op->base.dst.y + b->box.y2;
396703b705cfSriastradh		v[7] = op->base.dst.y + b->box.y1;
396803b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
396903b705cfSriastradh
397003b705cfSriastradh		v += 9;
397103b705cfSriastradh		b++;
397203b705cfSriastradh	} while (--nbox);
397303b705cfSriastradh}
397403b705cfSriastradh
397503b705cfSriastradhfastcall static void
397603b705cfSriastradhgen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
397703b705cfSriastradh						       const struct sna_composite_spans_op *op,
397803b705cfSriastradh						       const BoxRec *box,
397903b705cfSriastradh						       float opacity)
398003b705cfSriastradh{
398103b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
398203b705cfSriastradh	sna->render.vertex_used += 9;
398303b705cfSriastradh
398403b705cfSriastradh	v[0] = box->x2;
398503b705cfSriastradh	v[6] = v[3] = box->x1;
398603b705cfSriastradh	v[4] = v[1] = box->y2;
398703b705cfSriastradh	v[7] = box->y1;
398803b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
398903b705cfSriastradh}
399003b705cfSriastradh
399103b705cfSriastradhfastcall static void
399203b705cfSriastradhgen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op,
399303b705cfSriastradh							      const struct sna_opacity_box *b,
399403b705cfSriastradh							      int nbox, float *v)
399503b705cfSriastradh{
399603b705cfSriastradh	do {
399703b705cfSriastradh		v[0] = b->box.x2;
399803b705cfSriastradh		v[6] = v[3] = b->box.x1;
399903b705cfSriastradh		v[4] = v[1] = b->box.y2;
400003b705cfSriastradh		v[7] = b->box.y1;
400103b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
400203b705cfSriastradh
400303b705cfSriastradh		v += 9;
400403b705cfSriastradh		b++;
400503b705cfSriastradh	} while (--nbox);
400603b705cfSriastradh}
400703b705cfSriastradh
400803b705cfSriastradhfastcall static void
400903b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
401003b705cfSriastradh						    const struct sna_composite_spans_op *op,
401103b705cfSriastradh						    const BoxRec *box,
401203b705cfSriastradh						    float opacity)
401303b705cfSriastradh{
401403b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
401503b705cfSriastradh	sna->render.vertex_used += 15;
401603b705cfSriastradh
401703b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
401803b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
401903b705cfSriastradh	v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
402003b705cfSriastradh	v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
402103b705cfSriastradh	v[4] = opacity;
402203b705cfSriastradh
402303b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
402403b705cfSriastradh	v[6] = v[1];
402503b705cfSriastradh	v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
402603b705cfSriastradh	v[8] = v[3];
402703b705cfSriastradh	v[9] = opacity;
402803b705cfSriastradh
402903b705cfSriastradh	v[10] = v[5];
403003b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
403103b705cfSriastradh	v[12] = v[7];
403203b705cfSriastradh	v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
403303b705cfSriastradh	v[14] = opacity;
403403b705cfSriastradh}
403503b705cfSriastradh
403603b705cfSriastradhfastcall static void
403703b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op,
403803b705cfSriastradh							   const struct sna_opacity_box *b,
403903b705cfSriastradh							   int nbox,
404003b705cfSriastradh							   float *v)
404103b705cfSriastradh{
404203b705cfSriastradh	do {
404303b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
404403b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
404503b705cfSriastradh		v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
404603b705cfSriastradh		v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
404703b705cfSriastradh		v[4] = b->alpha;
404803b705cfSriastradh
404903b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
405003b705cfSriastradh		v[6] = v[1];
405103b705cfSriastradh		v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
405203b705cfSriastradh		v[8] = v[3];
405303b705cfSriastradh		v[9] = b->alpha;
405403b705cfSriastradh
405503b705cfSriastradh		v[10] = v[5];
405603b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
405703b705cfSriastradh		v[12] = v[7];
405803b705cfSriastradh		v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
405903b705cfSriastradh		v[14] = b->alpha;
406003b705cfSriastradh
406103b705cfSriastradh		v += 15;
406203b705cfSriastradh		b++;
406303b705cfSriastradh	} while (--nbox);
406403b705cfSriastradh}
406503b705cfSriastradh
406603b705cfSriastradhfastcall static void
406703b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
406803b705cfSriastradh						  const struct sna_composite_spans_op *op,
406903b705cfSriastradh						  const BoxRec *box,
407003b705cfSriastradh						  float opacity)
407103b705cfSriastradh{
407203b705cfSriastradh	PictTransform *transform = op->base.src.transform;
407303b705cfSriastradh	float *v;
407403b705cfSriastradh
407503b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
407603b705cfSriastradh	sna->render.vertex_used += 15;
407703b705cfSriastradh
407803b705cfSriastradh	v[0]  = op->base.dst.x + box->x2;
407903b705cfSriastradh	v[6]  = v[1] = op->base.dst.y + box->y2;
408003b705cfSriastradh	v[10] = v[5] = op->base.dst.x + box->x1;
408103b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
408203b705cfSriastradh	v[14] = v[9] = v[4]  = opacity;
408303b705cfSriastradh
408403b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
408503b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
408603b705cfSriastradh				    transform, op->base.src.scale,
408703b705cfSriastradh				    &v[2], &v[3]);
408803b705cfSriastradh
408903b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
409003b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
409103b705cfSriastradh				    transform, op->base.src.scale,
409203b705cfSriastradh				    &v[7], &v[8]);
409303b705cfSriastradh
409403b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
409503b705cfSriastradh				    (int)op->base.src.offset[1] + box->y1,
409603b705cfSriastradh				    transform, op->base.src.scale,
409703b705cfSriastradh				    &v[12], &v[13]);
409803b705cfSriastradh}
409903b705cfSriastradh
410003b705cfSriastradhfastcall static void
410103b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op,
410203b705cfSriastradh							 const struct sna_opacity_box *b,
410303b705cfSriastradh							 int nbox,
410403b705cfSriastradh							 float *v)
410503b705cfSriastradh{
410603b705cfSriastradh	PictTransform *transform = op->base.src.transform;
410703b705cfSriastradh
410803b705cfSriastradh	do {
410903b705cfSriastradh		v[0]  = op->base.dst.x + b->box.x2;
411003b705cfSriastradh		v[6]  = v[1] = op->base.dst.y + b->box.y2;
411103b705cfSriastradh		v[10] = v[5] = op->base.dst.x + b->box.x1;
411203b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
411303b705cfSriastradh		v[14] = v[9] = v[4]  = b->alpha;
411403b705cfSriastradh
411503b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
411603b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
411703b705cfSriastradh					    transform, op->base.src.scale,
411803b705cfSriastradh					    &v[2], &v[3]);
411903b705cfSriastradh
412003b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
412103b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
412203b705cfSriastradh					    transform, op->base.src.scale,
412303b705cfSriastradh					    &v[7], &v[8]);
412403b705cfSriastradh
412503b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
412603b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y1,
412703b705cfSriastradh					    transform, op->base.src.scale,
412803b705cfSriastradh					    &v[12], &v[13]);
412903b705cfSriastradh		v += 15;
413003b705cfSriastradh		b++;
413103b705cfSriastradh	} while (--nbox);
413203b705cfSriastradh}
413303b705cfSriastradh
413403b705cfSriastradhfastcall static void
413503b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
413603b705cfSriastradh						      const struct sna_composite_spans_op *op,
413703b705cfSriastradh						      const BoxRec *box,
413803b705cfSriastradh						      float opacity)
413903b705cfSriastradh{
414003b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
414103b705cfSriastradh	sna->render.vertex_used += 15;
414203b705cfSriastradh
414303b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
414403b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
414503b705cfSriastradh	v[2] = op->base.src.offset[0] + box->x2;
414603b705cfSriastradh	v[3] = op->base.src.offset[1] + box->y2;
414703b705cfSriastradh	v[4] = opacity;
414803b705cfSriastradh
414903b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
415003b705cfSriastradh	v[6] = v[1];
415103b705cfSriastradh	v[7] = op->base.src.offset[0] + box->x1;
415203b705cfSriastradh	v[8] = v[3];
415303b705cfSriastradh	v[9] = opacity;
415403b705cfSriastradh
415503b705cfSriastradh	v[10] = v[5];
415603b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
415703b705cfSriastradh	v[12] = v[7];
415803b705cfSriastradh	v[13] = op->base.src.offset[1] + box->y1;
415903b705cfSriastradh	v[14] = opacity;
416003b705cfSriastradh}
416103b705cfSriastradh
416203b705cfSriastradhfastcall static void
416303b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op,
416403b705cfSriastradh							     const struct sna_opacity_box *b,
416503b705cfSriastradh							     int nbox,
416603b705cfSriastradh							     float *v)
416703b705cfSriastradh{
416803b705cfSriastradh	do {
416903b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
417003b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
417103b705cfSriastradh		v[2] = op->base.src.offset[0] + b->box.x2;
417203b705cfSriastradh		v[3] = op->base.src.offset[1] + b->box.y2;
417303b705cfSriastradh		v[4] = b->alpha;
417403b705cfSriastradh
417503b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
417603b705cfSriastradh		v[6] = v[1];
417703b705cfSriastradh		v[7] = op->base.src.offset[0] + b->box.x1;
417803b705cfSriastradh		v[8] = v[3];
417903b705cfSriastradh		v[9] = b->alpha;
418003b705cfSriastradh
418103b705cfSriastradh		v[10] = v[5];
418203b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
418303b705cfSriastradh		v[12] = v[7];
418403b705cfSriastradh		v[13] = op->base.src.offset[1] + b->box.y1;
418503b705cfSriastradh		v[14] = b->alpha;
418603b705cfSriastradh
418703b705cfSriastradh		v += 15;
418803b705cfSriastradh		b++;
418903b705cfSriastradh	} while (--nbox);
419003b705cfSriastradh}
419103b705cfSriastradh
419203b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
419303b705cfSriastradhsse2 fastcall static void
419403b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
419503b705cfSriastradh						   const struct sna_composite_spans_op *op,
419603b705cfSriastradh						   const BoxRec *box,
419703b705cfSriastradh						   float opacity)
419803b705cfSriastradh{
419903b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
420003b705cfSriastradh	sna->render.vertex_used += 9;
420103b705cfSriastradh
420203b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
420303b705cfSriastradh	v[6] = v[3] = op->base.dst.x + box->x1;
420403b705cfSriastradh	v[4] = v[1] = op->base.dst.y + box->y2;
420503b705cfSriastradh	v[7] = op->base.dst.y + box->y1;
420603b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
420703b705cfSriastradh}
420803b705cfSriastradh
420903b705cfSriastradhsse2 fastcall static void
421003b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2__boxes(const struct sna_composite_spans_op *op,
421103b705cfSriastradh							  const struct sna_opacity_box *b,
421203b705cfSriastradh							  int nbox,
421303b705cfSriastradh							  float *v)
421403b705cfSriastradh{
421503b705cfSriastradh	do {
421603b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
421703b705cfSriastradh		v[6] = v[3] = op->base.dst.x + b->box.x1;
421803b705cfSriastradh		v[4] = v[1] = op->base.dst.y + b->box.y2;
421903b705cfSriastradh		v[7] = op->base.dst.y + b->box.y1;
422003b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
422103b705cfSriastradh
422203b705cfSriastradh		v += 9;
422303b705cfSriastradh		b++;
422403b705cfSriastradh	} while (--nbox);
422503b705cfSriastradh}
422603b705cfSriastradh
422703b705cfSriastradhsse2 fastcall static void
422803b705cfSriastradhgen3_render_composite_spans_constant_box__sse2(struct sna *sna,
422903b705cfSriastradh					       const struct sna_composite_spans_op *op,
423003b705cfSriastradh					       const BoxRec *box, float opacity)
423103b705cfSriastradh{
423203b705cfSriastradh	float *v;
423303b705cfSriastradh	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
423403b705cfSriastradh	     __FUNCTION__,
423503b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
423603b705cfSriastradh	     opacity,
423703b705cfSriastradh	     op->base.dst.x, op->base.dst.y,
423803b705cfSriastradh	     box->x1, box->y1,
423903b705cfSriastradh	     box->x2 - box->x1,
424003b705cfSriastradh	     box->y2 - box->y1));
424103b705cfSriastradh
424203b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
424303b705cfSriastradh
424403b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
424503b705cfSriastradh	sna->render.vertex_used += 9;
424603b705cfSriastradh
424703b705cfSriastradh	v[0] = box->x2;
424803b705cfSriastradh	v[6] = v[3] = box->x1;
424903b705cfSriastradh	v[4] = v[1] = box->y2;
425003b705cfSriastradh	v[7] = box->y1;
425103b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
425203b705cfSriastradh}
425303b705cfSriastradh
425403b705cfSriastradhsse2 fastcall static void
425503b705cfSriastradhgen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna,
425603b705cfSriastradh							 const struct sna_composite_spans_op *op,
425703b705cfSriastradh							 const struct sna_opacity_box *box,
425803b705cfSriastradh							 int nbox)
425903b705cfSriastradh{
426003b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
426103b705cfSriastradh	     __FUNCTION__, nbox,
426203b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
426303b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
426403b705cfSriastradh
426503b705cfSriastradh	sna_vertex_lock(&sna->render);
426603b705cfSriastradh	do {
426703b705cfSriastradh		int nbox_this_time;
426803b705cfSriastradh		float *v;
426903b705cfSriastradh
427003b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
427103b705cfSriastradh		assert(nbox_this_time);
427203b705cfSriastradh		nbox -= nbox_this_time;
427303b705cfSriastradh
427403b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
427503b705cfSriastradh		sna->render.vertex_used += nbox_this_time * 9;
427603b705cfSriastradh
427703b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
427803b705cfSriastradh		sna_vertex_unlock(&sna->render);
427903b705cfSriastradh
428003b705cfSriastradh		do {
428103b705cfSriastradh			v[0] = box->box.x2;
428203b705cfSriastradh			v[6] = v[3] = box->box.x1;
428303b705cfSriastradh			v[4] = v[1] = box->box.y2;
428403b705cfSriastradh			v[7] = box->box.y1;
428503b705cfSriastradh			v[8] = v[5] = v[2] = box->alpha;
428603b705cfSriastradh			v += 9;
428703b705cfSriastradh			box++;
428803b705cfSriastradh		} while (--nbox_this_time);
428903b705cfSriastradh
429003b705cfSriastradh		sna_vertex_lock(&sna->render);
429103b705cfSriastradh		sna_vertex_release__locked(&sna->render);
429203b705cfSriastradh	} while (nbox);
429303b705cfSriastradh	sna_vertex_unlock(&sna->render);
429403b705cfSriastradh}
429503b705cfSriastradh
429603b705cfSriastradhsse2 fastcall static void
429703b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna,
429803b705cfSriastradh							      const struct sna_composite_spans_op *op,
429903b705cfSriastradh							      const BoxRec *box,
430003b705cfSriastradh							      float opacity)
430103b705cfSriastradh{
430203b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
430303b705cfSriastradh	sna->render.vertex_used += 9;
430403b705cfSriastradh
430503b705cfSriastradh	v[0] = box->x2;
430603b705cfSriastradh	v[6] = v[3] = box->x1;
430703b705cfSriastradh	v[4] = v[1] = box->y2;
430803b705cfSriastradh	v[7] = box->y1;
430903b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
431003b705cfSriastradh}
431103b705cfSriastradh
431203b705cfSriastradhsse2 fastcall static void
431303b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes(const struct sna_composite_spans_op *op,
431403b705cfSriastradh								     const struct sna_opacity_box *b,
431503b705cfSriastradh								     int nbox, float *v)
431603b705cfSriastradh{
431703b705cfSriastradh	do {
431803b705cfSriastradh		v[0] = b->box.x2;
431903b705cfSriastradh		v[6] = v[3] = b->box.x1;
432003b705cfSriastradh		v[4] = v[1] = b->box.y2;
432103b705cfSriastradh		v[7] = b->box.y1;
432203b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
432303b705cfSriastradh
432403b705cfSriastradh		v += 9;
432503b705cfSriastradh		b++;
432603b705cfSriastradh	} while (--nbox);
432703b705cfSriastradh}
432803b705cfSriastradh
432903b705cfSriastradhsse2 fastcall static void
433003b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna,
433103b705cfSriastradh							  const struct sna_composite_spans_op *op,
433203b705cfSriastradh							  const BoxRec *box,
433303b705cfSriastradh							  float opacity)
433403b705cfSriastradh{
433503b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
433603b705cfSriastradh	sna->render.vertex_used += 15;
433703b705cfSriastradh
433803b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
433903b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
434003b705cfSriastradh	v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
434103b705cfSriastradh	v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
434203b705cfSriastradh	v[4] = opacity;
434303b705cfSriastradh
434403b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
434503b705cfSriastradh	v[6] = v[1];
434603b705cfSriastradh	v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
434703b705cfSriastradh	v[8] = v[3];
434803b705cfSriastradh	v[9] = opacity;
434903b705cfSriastradh
435003b705cfSriastradh	v[10] = v[5];
435103b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
435203b705cfSriastradh	v[12] = v[7];
435303b705cfSriastradh	v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
435403b705cfSriastradh	v[14] = opacity;
435503b705cfSriastradh}
435603b705cfSriastradh
435703b705cfSriastradhsse2 fastcall static void
435803b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source__sse2__boxes(const struct sna_composite_spans_op *op,
435903b705cfSriastradh								 const struct sna_opacity_box *b,
436003b705cfSriastradh								 int nbox,
436103b705cfSriastradh								 float *v)
436203b705cfSriastradh{
436303b705cfSriastradh	do {
436403b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
436503b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
436603b705cfSriastradh		v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
436703b705cfSriastradh		v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
436803b705cfSriastradh		v[4] = b->alpha;
436903b705cfSriastradh
437003b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
437103b705cfSriastradh		v[6] = v[1];
437203b705cfSriastradh		v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
437303b705cfSriastradh		v[8] = v[3];
437403b705cfSriastradh		v[9] = b->alpha;
437503b705cfSriastradh
437603b705cfSriastradh		v[10] = v[5];
437703b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
437803b705cfSriastradh		v[12] = v[7];
437903b705cfSriastradh		v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
438003b705cfSriastradh		v[14] = b->alpha;
438103b705cfSriastradh
438203b705cfSriastradh		v += 15;
438303b705cfSriastradh		b++;
438403b705cfSriastradh	} while (--nbox);
438503b705cfSriastradh}
438603b705cfSriastradhsse2 fastcall static void
438703b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna,
438803b705cfSriastradh							const struct sna_composite_spans_op *op,
438903b705cfSriastradh							const BoxRec *box,
439003b705cfSriastradh							float opacity)
439103b705cfSriastradh{
439203b705cfSriastradh	PictTransform *transform = op->base.src.transform;
439303b705cfSriastradh	float *v;
439403b705cfSriastradh
439503b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
439603b705cfSriastradh	sna->render.vertex_used += 15;
439703b705cfSriastradh
439803b705cfSriastradh	v[0]  = op->base.dst.x + box->x2;
439903b705cfSriastradh	v[6]  = v[1] = op->base.dst.y + box->y2;
440003b705cfSriastradh	v[10] = v[5] = op->base.dst.x + box->x1;
440103b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
440203b705cfSriastradh	v[14] = v[9] = v[4]  = opacity;
440303b705cfSriastradh
440403b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
440503b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
440603b705cfSriastradh				    transform, op->base.src.scale,
440703b705cfSriastradh				    &v[2], &v[3]);
440803b705cfSriastradh
440903b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
441003b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
441103b705cfSriastradh				    transform, op->base.src.scale,
441203b705cfSriastradh				    &v[7], &v[8]);
441303b705cfSriastradh
441403b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
441503b705cfSriastradh				    (int)op->base.src.offset[1] + box->y1,
441603b705cfSriastradh				    transform, op->base.src.scale,
441703b705cfSriastradh				    &v[12], &v[13]);
441803b705cfSriastradh}
441903b705cfSriastradh
442003b705cfSriastradhsse2 fastcall static void
442103b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source__sse2__boxes(const struct sna_composite_spans_op *op,
442203b705cfSriastradh							       const struct sna_opacity_box *b,
442303b705cfSriastradh							       int nbox,
442403b705cfSriastradh							       float *v)
442503b705cfSriastradh{
442603b705cfSriastradh	PictTransform *transform = op->base.src.transform;
442703b705cfSriastradh
442803b705cfSriastradh	do {
442903b705cfSriastradh		v[0]  = op->base.dst.x + b->box.x2;
443003b705cfSriastradh		v[6]  = v[1] = op->base.dst.y + b->box.y2;
443103b705cfSriastradh		v[10] = v[5] = op->base.dst.x + b->box.x1;
443203b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
443303b705cfSriastradh		v[14] = v[9] = v[4]  = b->alpha;
443403b705cfSriastradh
443503b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
443603b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
443703b705cfSriastradh					    transform, op->base.src.scale,
443803b705cfSriastradh					    &v[2], &v[3]);
443903b705cfSriastradh
444003b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
444103b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
444203b705cfSriastradh					    transform, op->base.src.scale,
444303b705cfSriastradh					    &v[7], &v[8]);
444403b705cfSriastradh
444503b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
444603b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y1,
444703b705cfSriastradh					    transform, op->base.src.scale,
444803b705cfSriastradh					    &v[12], &v[13]);
444903b705cfSriastradh		v += 15;
445003b705cfSriastradh		b++;
445103b705cfSriastradh	} while (--nbox);
445203b705cfSriastradh}
445303b705cfSriastradh
445403b705cfSriastradhsse2 fastcall static void
445503b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna,
445603b705cfSriastradh							    const struct sna_composite_spans_op *op,
445703b705cfSriastradh							    const BoxRec *box,
445803b705cfSriastradh							    float opacity)
445903b705cfSriastradh{
446003b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
446103b705cfSriastradh	sna->render.vertex_used += 15;
446203b705cfSriastradh
446303b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
446403b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
446503b705cfSriastradh	v[2] = op->base.src.offset[0] + box->x2;
446603b705cfSriastradh	v[3] = op->base.src.offset[1] + box->y2;
446703b705cfSriastradh	v[4] = opacity;
446803b705cfSriastradh
446903b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
447003b705cfSriastradh	v[6] = v[1];
447103b705cfSriastradh	v[7] = op->base.src.offset[0] + box->x1;
447203b705cfSriastradh	v[8] = v[3];
447303b705cfSriastradh	v[9] = opacity;
447403b705cfSriastradh
447503b705cfSriastradh	v[10] = v[5];
447603b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
447703b705cfSriastradh	v[12] = v[7];
447803b705cfSriastradh	v[13] = op->base.src.offset[1] + box->y1;
447903b705cfSriastradh	v[14] = opacity;
448003b705cfSriastradh}
448103b705cfSriastradh
448203b705cfSriastradhsse2 fastcall static void
448303b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
448403b705cfSriastradh								   const struct sna_opacity_box *b,
448503b705cfSriastradh								   int nbox,
448603b705cfSriastradh								   float *v)
448703b705cfSriastradh{
448803b705cfSriastradh	do {
448903b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
449003b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
449103b705cfSriastradh		v[2] = op->base.src.offset[0] + b->box.x2;
449203b705cfSriastradh		v[3] = op->base.src.offset[1] + b->box.y2;
449303b705cfSriastradh		v[4] = b->alpha;
449403b705cfSriastradh
449503b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
449603b705cfSriastradh		v[6] = v[1];
449703b705cfSriastradh		v[7] = op->base.src.offset[0] + b->box.x1;
449803b705cfSriastradh		v[8] = v[3];
449903b705cfSriastradh		v[9] = b->alpha;
450003b705cfSriastradh
450103b705cfSriastradh		v[10] = v[5];
450203b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
450303b705cfSriastradh		v[12] = v[7];
450403b705cfSriastradh		v[13] = op->base.src.offset[1] + b->box.y1;
450503b705cfSriastradh		v[14] = b->alpha;
450603b705cfSriastradh
450703b705cfSriastradh		v += 15;
450803b705cfSriastradh		b++;
450903b705cfSriastradh	} while (--nbox);
451003b705cfSriastradh}
451103b705cfSriastradh
451203b705cfSriastradhsse2 fastcall static void
451303b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna,
451403b705cfSriastradh							  const struct sna_composite_spans_op *op,
451503b705cfSriastradh							  const BoxRec *box,
451603b705cfSriastradh							  float opacity)
451703b705cfSriastradh{
451803b705cfSriastradh	PictTransform *transform = op->base.src.transform;
451903b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
452003b705cfSriastradh	sna->render.vertex_used += 15;
452103b705cfSriastradh
452203b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
452303b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
452403b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
452503b705cfSriastradh				    op->base.src.offset[1] + box->y2,
452603b705cfSriastradh				    transform, op->base.src.scale,
452703b705cfSriastradh				    &v[2], &v[3]);
452803b705cfSriastradh	v[4] = opacity;
452903b705cfSriastradh
453003b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
453103b705cfSriastradh	v[6] = v[1];
453203b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
453303b705cfSriastradh				    op->base.src.offset[1] + box->y2,
453403b705cfSriastradh				    transform, op->base.src.scale,
453503b705cfSriastradh				    &v[7], &v[8]);
453603b705cfSriastradh	v[9] = opacity;
453703b705cfSriastradh
453803b705cfSriastradh	v[10] = v[5];
453903b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
454003b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
454103b705cfSriastradh				    op->base.src.offset[1] + box->y1,
454203b705cfSriastradh				    transform, op->base.src.scale,
454303b705cfSriastradh				    &v[12], &v[13]);
454403b705cfSriastradh	v[14] = opacity;
454503b705cfSriastradh}
454603b705cfSriastradh
454703b705cfSriastradhsse2 fastcall static void
454803b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
454903b705cfSriastradh								 const struct sna_opacity_box *b,
455003b705cfSriastradh								 int nbox,
455103b705cfSriastradh								 float *v)
455203b705cfSriastradh{
455303b705cfSriastradh	PictTransform *transform = op->base.src.transform;
455403b705cfSriastradh
455503b705cfSriastradh	do {
455603b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
455703b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
455803b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
455903b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
456003b705cfSriastradh					    transform, op->base.src.scale,
456103b705cfSriastradh					    &v[2], &v[3]);
456203b705cfSriastradh		v[4] = b->alpha;
456303b705cfSriastradh
456403b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
456503b705cfSriastradh		v[6] = v[1];
456603b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
456703b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
456803b705cfSriastradh					    transform, op->base.src.scale,
456903b705cfSriastradh					    &v[7], &v[8]);
457003b705cfSriastradh		v[9] = b->alpha;
457103b705cfSriastradh
457203b705cfSriastradh		v[10] = v[5];
457303b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
457403b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
457503b705cfSriastradh					    op->base.src.offset[1] + b->box.y1,
457603b705cfSriastradh					    transform, op->base.src.scale,
457703b705cfSriastradh					    &v[12], &v[13]);
457803b705cfSriastradh		v[14] = b->alpha;
457903b705cfSriastradh		v += 15;
458003b705cfSriastradh		b++;
458103b705cfSriastradh	} while (--nbox);
458203b705cfSriastradh}
458303b705cfSriastradh#endif
458403b705cfSriastradh
458503b705cfSriastradhfastcall static void
458603b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
458703b705cfSriastradh						    const struct sna_composite_spans_op *op,
458803b705cfSriastradh						    const BoxRec *box,
458903b705cfSriastradh						    float opacity)
459003b705cfSriastradh{
459103b705cfSriastradh	PictTransform *transform = op->base.src.transform;
459203b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
459303b705cfSriastradh	sna->render.vertex_used += 15;
459403b705cfSriastradh
459503b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
459603b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
459703b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
459803b705cfSriastradh				    op->base.src.offset[1] + box->y2,
459903b705cfSriastradh				    transform, op->base.src.scale,
460003b705cfSriastradh				    &v[2], &v[3]);
460103b705cfSriastradh	v[4] = opacity;
460203b705cfSriastradh
460303b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
460403b705cfSriastradh	v[6] = v[1];
460503b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
460603b705cfSriastradh				    op->base.src.offset[1] + box->y2,
460703b705cfSriastradh				    transform, op->base.src.scale,
460803b705cfSriastradh				    &v[7], &v[8]);
460903b705cfSriastradh	v[9] = opacity;
461003b705cfSriastradh
461103b705cfSriastradh	v[10] = v[5];
461203b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
461303b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
461403b705cfSriastradh				    op->base.src.offset[1] + box->y1,
461503b705cfSriastradh				    transform, op->base.src.scale,
461603b705cfSriastradh				    &v[12], &v[13]);
461703b705cfSriastradh	v[14] = opacity;
461803b705cfSriastradh}
461903b705cfSriastradh
462003b705cfSriastradhfastcall static void
462103b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op,
462203b705cfSriastradh							   const struct sna_opacity_box *b,
462303b705cfSriastradh							   int nbox,
462403b705cfSriastradh							   float *v)
462503b705cfSriastradh{
462603b705cfSriastradh	PictTransform *transform = op->base.src.transform;
462703b705cfSriastradh
462803b705cfSriastradh	do {
462903b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
463003b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
463103b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
463203b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
463303b705cfSriastradh					    transform, op->base.src.scale,
463403b705cfSriastradh					    &v[2], &v[3]);
463503b705cfSriastradh		v[4] = b->alpha;
463603b705cfSriastradh
463703b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
463803b705cfSriastradh		v[6] = v[1];
463903b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
464003b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
464103b705cfSriastradh					    transform, op->base.src.scale,
464203b705cfSriastradh					    &v[7], &v[8]);
464303b705cfSriastradh		v[9] = b->alpha;
464403b705cfSriastradh
464503b705cfSriastradh		v[10] = v[5];
464603b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
464703b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
464803b705cfSriastradh					    op->base.src.offset[1] + b->box.y1,
464903b705cfSriastradh					    transform, op->base.src.scale,
465003b705cfSriastradh					    &v[12], &v[13]);
465103b705cfSriastradh		v[14] = b->alpha;
465203b705cfSriastradh		v += 15;
465303b705cfSriastradh		b++;
465403b705cfSriastradh	} while (--nbox);
465503b705cfSriastradh}
465603b705cfSriastradh
465703b705cfSriastradhfastcall static void
465803b705cfSriastradhgen3_emit_composite_spans_primitive(struct sna *sna,
465903b705cfSriastradh				    const struct sna_composite_spans_op *op,
466003b705cfSriastradh				    const BoxRec *box,
466103b705cfSriastradh				    float opacity)
466203b705cfSriastradh{
466303b705cfSriastradh	gen3_emit_composite_spans_vertex(sna, op,
466403b705cfSriastradh					 box->x2, box->y2,
466503b705cfSriastradh					 opacity);
466603b705cfSriastradh	gen3_emit_composite_spans_vertex(sna, op,
466703b705cfSriastradh					 box->x1, box->y2,
466803b705cfSriastradh					 opacity);
466903b705cfSriastradh	gen3_emit_composite_spans_vertex(sna, op,
467003b705cfSriastradh					 box->x1, box->y1,
467103b705cfSriastradh					 opacity);
467203b705cfSriastradh}
467303b705cfSriastradh
467403b705cfSriastradhfastcall static void
467503b705cfSriastradhgen3_render_composite_spans_constant_box(struct sna *sna,
467603b705cfSriastradh					 const struct sna_composite_spans_op *op,
467703b705cfSriastradh					 const BoxRec *box, float opacity)
467803b705cfSriastradh{
467903b705cfSriastradh	float *v;
468003b705cfSriastradh	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
468103b705cfSriastradh	     __FUNCTION__,
468203b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
468303b705cfSriastradh	     opacity,
468403b705cfSriastradh	     op->base.dst.x, op->base.dst.y,
468503b705cfSriastradh	     box->x1, box->y1,
468603b705cfSriastradh	     box->x2 - box->x1,
468703b705cfSriastradh	     box->y2 - box->y1));
468803b705cfSriastradh
468903b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
469003b705cfSriastradh
469103b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
469203b705cfSriastradh	sna->render.vertex_used += 9;
469303b705cfSriastradh
469403b705cfSriastradh	v[0] = box->x2;
469503b705cfSriastradh	v[6] = v[3] = box->x1;
469603b705cfSriastradh	v[4] = v[1] = box->y2;
469703b705cfSriastradh	v[7] = box->y1;
469803b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
469903b705cfSriastradh}
470003b705cfSriastradh
470103b705cfSriastradhfastcall static void
470203b705cfSriastradhgen3_render_composite_spans_constant_thread_boxes(struct sna *sna,
470303b705cfSriastradh						  const struct sna_composite_spans_op *op,
470403b705cfSriastradh						  const struct sna_opacity_box *box,
470503b705cfSriastradh						  int nbox)
470603b705cfSriastradh{
470703b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
470803b705cfSriastradh	     __FUNCTION__, nbox,
470903b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
471003b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
471103b705cfSriastradh
471203b705cfSriastradh	sna_vertex_lock(&sna->render);
471303b705cfSriastradh	do {
471403b705cfSriastradh		int nbox_this_time;
471503b705cfSriastradh		float *v;
471603b705cfSriastradh
471703b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
471803b705cfSriastradh		assert(nbox_this_time);
471903b705cfSriastradh		nbox -= nbox_this_time;
472003b705cfSriastradh
472103b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
472203b705cfSriastradh		sna->render.vertex_used += nbox_this_time * 9;
472303b705cfSriastradh
472403b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
472503b705cfSriastradh		sna_vertex_unlock(&sna->render);
472603b705cfSriastradh
472703b705cfSriastradh		do {
472803b705cfSriastradh			v[0] = box->box.x2;
472903b705cfSriastradh			v[6] = v[3] = box->box.x1;
473003b705cfSriastradh			v[4] = v[1] = box->box.y2;
473103b705cfSriastradh			v[7] = box->box.y1;
473203b705cfSriastradh			v[8] = v[5] = v[2] = box->alpha;
473303b705cfSriastradh			v += 9;
473403b705cfSriastradh			box++;
473503b705cfSriastradh		} while (--nbox_this_time);
473603b705cfSriastradh
473703b705cfSriastradh		sna_vertex_lock(&sna->render);
473803b705cfSriastradh		sna_vertex_release__locked(&sna->render);
473903b705cfSriastradh	} while (nbox);
474003b705cfSriastradh	sna_vertex_unlock(&sna->render);
474103b705cfSriastradh}
474203b705cfSriastradh
474303b705cfSriastradhfastcall static void
474403b705cfSriastradhgen3_render_composite_spans_box(struct sna *sna,
474503b705cfSriastradh				const struct sna_composite_spans_op *op,
474603b705cfSriastradh				const BoxRec *box, float opacity)
474703b705cfSriastradh{
474803b705cfSriastradh	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
474903b705cfSriastradh	     __FUNCTION__,
475003b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
475103b705cfSriastradh	     opacity,
475203b705cfSriastradh	     op->base.dst.x, op->base.dst.y,
475303b705cfSriastradh	     box->x1, box->y1,
475403b705cfSriastradh	     box->x2 - box->x1,
475503b705cfSriastradh	     box->y2 - box->y1));
475603b705cfSriastradh
475703b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
475803b705cfSriastradh	op->prim_emit(sna, op, box, opacity);
475903b705cfSriastradh}
476003b705cfSriastradh
476103b705cfSriastradhstatic void
476203b705cfSriastradhgen3_render_composite_spans_boxes(struct sna *sna,
476303b705cfSriastradh				  const struct sna_composite_spans_op *op,
476403b705cfSriastradh				  const BoxRec *box, int nbox,
476503b705cfSriastradh				  float opacity)
476603b705cfSriastradh{
476703b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
476803b705cfSriastradh	     __FUNCTION__, nbox,
476903b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
477003b705cfSriastradh	     opacity,
477103b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
477203b705cfSriastradh
477303b705cfSriastradh	do {
477403b705cfSriastradh		int nbox_this_time;
477503b705cfSriastradh
477603b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
477703b705cfSriastradh		nbox -= nbox_this_time;
477803b705cfSriastradh
477903b705cfSriastradh		do {
478003b705cfSriastradh			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
478103b705cfSriastradh			     box->x1, box->y1,
478203b705cfSriastradh			     box->x2 - box->x1,
478303b705cfSriastradh			     box->y2 - box->y1));
478403b705cfSriastradh
478503b705cfSriastradh			op->prim_emit(sna, op, box++, opacity);
478603b705cfSriastradh		} while (--nbox_this_time);
478703b705cfSriastradh	} while (nbox);
478803b705cfSriastradh}
478903b705cfSriastradh
479003b705cfSriastradhfastcall static void
479103b705cfSriastradhgen3_render_composite_spans_boxes__thread(struct sna *sna,
479203b705cfSriastradh					  const struct sna_composite_spans_op *op,
479303b705cfSriastradh					  const struct sna_opacity_box *box,
479403b705cfSriastradh					  int nbox)
479503b705cfSriastradh{
479603b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
479703b705cfSriastradh	     __FUNCTION__, nbox,
479803b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
479903b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
480003b705cfSriastradh
480103b705cfSriastradh	sna_vertex_lock(&sna->render);
480203b705cfSriastradh	do {
480303b705cfSriastradh		int nbox_this_time;
480403b705cfSriastradh		float *v;
480503b705cfSriastradh
480603b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
480703b705cfSriastradh		assert(nbox_this_time);
480803b705cfSriastradh		nbox -= nbox_this_time;
480903b705cfSriastradh
481003b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
481103b705cfSriastradh		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
481203b705cfSriastradh
481303b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
481403b705cfSriastradh		sna_vertex_unlock(&sna->render);
481503b705cfSriastradh
481603b705cfSriastradh		op->emit_boxes(op, box, nbox_this_time, v);
481703b705cfSriastradh		box += nbox_this_time;
481803b705cfSriastradh
481903b705cfSriastradh		sna_vertex_lock(&sna->render);
482003b705cfSriastradh		sna_vertex_release__locked(&sna->render);
482103b705cfSriastradh	} while (nbox);
482203b705cfSriastradh	sna_vertex_unlock(&sna->render);
482303b705cfSriastradh}
482403b705cfSriastradh
482503b705cfSriastradhfastcall static void
482603b705cfSriastradhgen3_render_composite_spans_done(struct sna *sna,
482703b705cfSriastradh				 const struct sna_composite_spans_op *op)
482803b705cfSriastradh{
482903b705cfSriastradh	if (sna->render.vertex_offset)
483003b705cfSriastradh		gen3_vertex_flush(sna);
483103b705cfSriastradh
483203b705cfSriastradh	DBG(("%s()\n", __FUNCTION__));
483303b705cfSriastradh
483403b705cfSriastradh	if (op->base.src.bo)
483503b705cfSriastradh		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
483603b705cfSriastradh
483703b705cfSriastradh	sna_render_composite_redirect_done(sna, &op->base);
483803b705cfSriastradh}
483903b705cfSriastradh
484003b705cfSriastradhstatic bool
484103b705cfSriastradhgen3_check_composite_spans(struct sna *sna,
484203b705cfSriastradh			   uint8_t op, PicturePtr src, PicturePtr dst,
484303b705cfSriastradh			   int16_t width, int16_t height, unsigned flags)
484403b705cfSriastradh{
484503b705cfSriastradh	if (op >= ARRAY_SIZE(gen3_blend_op))
484603b705cfSriastradh		return false;
484703b705cfSriastradh
484803b705cfSriastradh	if (gen3_composite_fallback(sna, op, src, NULL, dst))
484903b705cfSriastradh		return false;
485003b705cfSriastradh
485103b705cfSriastradh	if (need_tiling(sna, width, height) &&
485203b705cfSriastradh	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
485303b705cfSriastradh		DBG(("%s: fallback, tiled operation not on GPU\n",
485403b705cfSriastradh		     __FUNCTION__));
485503b705cfSriastradh		return false;
485603b705cfSriastradh	}
485703b705cfSriastradh
485803b705cfSriastradh	return true;
485903b705cfSriastradh}
486003b705cfSriastradh
486103b705cfSriastradhstatic bool
486203b705cfSriastradhgen3_render_composite_spans(struct sna *sna,
486303b705cfSriastradh			    uint8_t op,
486403b705cfSriastradh			    PicturePtr src,
486503b705cfSriastradh			    PicturePtr dst,
486603b705cfSriastradh			    int16_t src_x,  int16_t src_y,
486703b705cfSriastradh			    int16_t dst_x,  int16_t dst_y,
486803b705cfSriastradh			    int16_t width,  int16_t height,
486903b705cfSriastradh			    unsigned flags,
487003b705cfSriastradh			    struct sna_composite_spans_op *tmp)
487103b705cfSriastradh{
487203b705cfSriastradh	bool no_offset;
487303b705cfSriastradh
487403b705cfSriastradh	DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
487503b705cfSriastradh	     src_x, src_y, dst_x, dst_y, width, height));
487603b705cfSriastradh
487703b705cfSriastradh	assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags));
487803b705cfSriastradh
487903b705cfSriastradh	if (need_tiling(sna, width, height)) {
488003b705cfSriastradh		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
488103b705cfSriastradh		     __FUNCTION__, width, height));
488203b705cfSriastradh		return sna_tiling_composite_spans(op, src, dst,
488303b705cfSriastradh						  src_x, src_y, dst_x, dst_y,
488403b705cfSriastradh						  width, height, flags, tmp);
488503b705cfSriastradh	}
488603b705cfSriastradh
488703b705cfSriastradh	if (!gen3_composite_set_target(sna, &tmp->base, dst,
488803b705cfSriastradh				       dst_x, dst_y, width, height)) {
488903b705cfSriastradh		DBG(("%s: unable to set render target\n",
489003b705cfSriastradh		     __FUNCTION__));
489103b705cfSriastradh		return false;
489203b705cfSriastradh	}
489303b705cfSriastradh
489403b705cfSriastradh	tmp->base.op = op;
489503b705cfSriastradh	tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
489603b705cfSriastradh	if (too_large(tmp->base.dst.width, tmp->base.dst.height) ||
489703b705cfSriastradh	    !gen3_check_pitch_3d(tmp->base.dst.bo)) {
489803b705cfSriastradh		if (!sna_render_composite_redirect(sna, &tmp->base,
489903b705cfSriastradh						   dst_x, dst_y, width, height,
490003b705cfSriastradh						   true))
490103b705cfSriastradh			return false;
490203b705cfSriastradh	}
490303b705cfSriastradh
490403b705cfSriastradh	tmp->base.src.u.gen3.type = SHADER_TEXTURE;
490503b705cfSriastradh	tmp->base.src.is_affine = true;
490603b705cfSriastradh	DBG(("%s: preparing source\n", __FUNCTION__));
490703b705cfSriastradh	switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src,
490803b705cfSriastradh				       src_x, src_y,
490903b705cfSriastradh				       width, height,
491003b705cfSriastradh				       dst_x, dst_y,
491103b705cfSriastradh				       dst->polyMode == PolyModePrecise)) {
491203b705cfSriastradh	case -1:
491303b705cfSriastradh		goto cleanup_dst;
491403b705cfSriastradh	case 0:
491503b705cfSriastradh		tmp->base.src.u.gen3.type = SHADER_ZERO;
491603b705cfSriastradh		break;
491703b705cfSriastradh	case 1:
491803b705cfSriastradh		gen3_composite_channel_convert(&tmp->base.src);
491903b705cfSriastradh		break;
492003b705cfSriastradh	}
492103b705cfSriastradh	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type));
492203b705cfSriastradh
492303b705cfSriastradh	if (tmp->base.src.u.gen3.type != SHADER_ZERO)
492403b705cfSriastradh		tmp->base.mask.u.gen3.type = SHADER_OPACITY;
492503b705cfSriastradh
492603b705cfSriastradh	no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
492703b705cfSriastradh	tmp->box   = gen3_render_composite_spans_box;
492803b705cfSriastradh	tmp->boxes = gen3_render_composite_spans_boxes;
492903b705cfSriastradh	tmp->thread_boxes = gen3_render_composite_spans_boxes__thread;
493003b705cfSriastradh	tmp->done  = gen3_render_composite_spans_done;
493103b705cfSriastradh	tmp->prim_emit = gen3_emit_composite_spans_primitive;
493203b705cfSriastradh	switch (tmp->base.src.u.gen3.type) {
493303b705cfSriastradh	case SHADER_NONE:
493403b705cfSriastradh		assert(0);
493503b705cfSriastradh	case SHADER_ZERO:
493603b705cfSriastradh		if (no_offset) {
493703b705cfSriastradh			tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset;
493803b705cfSriastradh			tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes;
493903b705cfSriastradh		} else {
494003b705cfSriastradh			tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
494103b705cfSriastradh			tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes;
494203b705cfSriastradh		}
494303b705cfSriastradh		break;
494403b705cfSriastradh	case SHADER_BLACK:
494503b705cfSriastradh	case SHADER_WHITE:
494603b705cfSriastradh	case SHADER_CONSTANT:
494703b705cfSriastradh		if (no_offset) {
494803b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
494903b705cfSriastradh			if (sna->cpu_features & SSE2) {
495003b705cfSriastradh				tmp->box = gen3_render_composite_spans_constant_box__sse2;
495103b705cfSriastradh				tmp->thread_boxes = gen3_render_composite_spans_constant_thread__sse2__boxes;
495203b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2__no_offset;
495303b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes;
495403b705cfSriastradh			} else
495503b705cfSriastradh#endif
495603b705cfSriastradh			{
495703b705cfSriastradh				tmp->box = gen3_render_composite_spans_constant_box;
495803b705cfSriastradh				tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes;
495903b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
496003b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes;
496103b705cfSriastradh			}
496203b705cfSriastradh		} else {
496303b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
496403b705cfSriastradh			if (sna->cpu_features & SSE2) {
496503b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2;
496603b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__boxes;
496703b705cfSriastradh			} else
496803b705cfSriastradh#endif
496903b705cfSriastradh			{
497003b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
497103b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes;
497203b705cfSriastradh			}
497303b705cfSriastradh		}
497403b705cfSriastradh		break;
497503b705cfSriastradh	case SHADER_LINEAR:
497603b705cfSriastradh	case SHADER_RADIAL:
497703b705cfSriastradh		if (tmp->base.src.transform == NULL) {
497803b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
497903b705cfSriastradh			if (sna->cpu_features & SSE2) {
498003b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient__sse2;
498103b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes;
498203b705cfSriastradh			} else
498303b705cfSriastradh#endif
498403b705cfSriastradh			{
498503b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
498603b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes;
498703b705cfSriastradh			}
498803b705cfSriastradh		} else if (tmp->base.src.is_affine) {
498903b705cfSriastradh			tmp->base.src.scale[1] = tmp->base.src.scale[0] = 1. / tmp->base.src.transform->matrix[2][2];
499003b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
499103b705cfSriastradh			if (sna->cpu_features & SSE2) {
499203b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient__sse2;
499303b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes;
499403b705cfSriastradh			} else
499503b705cfSriastradh#endif
499603b705cfSriastradh			{
499703b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
499803b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes;
499903b705cfSriastradh			}
500003b705cfSriastradh		}
500103b705cfSriastradh		break;
500203b705cfSriastradh	case SHADER_TEXTURE:
500303b705cfSriastradh		if (tmp->base.src.transform == NULL) {
500403b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
500503b705cfSriastradh			if (sna->cpu_features & SSE2) {
500603b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source__sse2;
500703b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__sse2__boxes;
500803b705cfSriastradh			} else
500903b705cfSriastradh#endif
501003b705cfSriastradh			{
501103b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
501203b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes;
501303b705cfSriastradh			}
501403b705cfSriastradh		} else if (tmp->base.src.is_affine) {
501503b705cfSriastradh			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
501603b705cfSriastradh			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
501703b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
501803b705cfSriastradh			if (sna->cpu_features & SSE2) {
501903b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source__sse2;
502003b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__sse2__boxes;
502103b705cfSriastradh			} else
502203b705cfSriastradh#endif
502303b705cfSriastradh			{
502403b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
502503b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes;
502603b705cfSriastradh			}
502703b705cfSriastradh		}
502803b705cfSriastradh		break;
502903b705cfSriastradh	}
503003b705cfSriastradh	if (tmp->emit_boxes == NULL)
503103b705cfSriastradh		tmp->thread_boxes = NULL;
503203b705cfSriastradh
503303b705cfSriastradh	tmp->base.mask.bo = NULL;
503403b705cfSriastradh
503503b705cfSriastradh	tmp->base.floats_per_vertex = 2;
503603b705cfSriastradh	if (!is_constant_ps(tmp->base.src.u.gen3.type))
503703b705cfSriastradh		tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
503803b705cfSriastradh	tmp->base.floats_per_vertex +=
503903b705cfSriastradh		tmp->base.mask.u.gen3.type == SHADER_OPACITY;
504003b705cfSriastradh	tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
504103b705cfSriastradh
504203b705cfSriastradh	if (!kgem_check_bo(&sna->kgem,
504303b705cfSriastradh			   tmp->base.dst.bo, tmp->base.src.bo,
504403b705cfSriastradh			   NULL)) {
504503b705cfSriastradh		kgem_submit(&sna->kgem);
504603b705cfSriastradh		if (!kgem_check_bo(&sna->kgem,
504703b705cfSriastradh				   tmp->base.dst.bo, tmp->base.src.bo,
504803b705cfSriastradh				   NULL))
504903b705cfSriastradh			goto cleanup_src;
505003b705cfSriastradh	}
505103b705cfSriastradh
505203b705cfSriastradh	gen3_emit_composite_state(sna, &tmp->base);
505303b705cfSriastradh	gen3_align_vertex(sna, &tmp->base);
505403b705cfSriastradh	return true;
505503b705cfSriastradh
505603b705cfSriastradhcleanup_src:
505703b705cfSriastradh	if (tmp->base.src.bo)
505803b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
505903b705cfSriastradhcleanup_dst:
506003b705cfSriastradh	if (tmp->base.redirect.real_bo)
506103b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
506203b705cfSriastradh	return false;
506303b705cfSriastradh}
506403b705cfSriastradh
506503b705cfSriastradhstatic void
506603b705cfSriastradhgen3_emit_video_state(struct sna *sna,
506703b705cfSriastradh		      struct sna_video *video,
506803b705cfSriastradh		      struct sna_video_frame *frame,
506903b705cfSriastradh		      PixmapPtr pixmap,
507003b705cfSriastradh		      struct kgem_bo *dst_bo,
507103b705cfSriastradh		      int width, int height,
507203b705cfSriastradh		      bool bilinear)
507303b705cfSriastradh{
507403b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
507503b705cfSriastradh	uint32_t id, ms3, rewind;
507603b705cfSriastradh
507703b705cfSriastradh	gen3_emit_target(sna, dst_bo, width, height,
507803b705cfSriastradh			 sna_format_for_depth(pixmap->drawable.depth));
507903b705cfSriastradh
508003b705cfSriastradh	/* XXX share with composite? Is it worth the effort? */
508103b705cfSriastradh	if ((state->last_shader & (1<<31)) == 0) {
508203b705cfSriastradh		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
508303b705cfSriastradh			  I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
508403b705cfSriastradh			  2);
508503b705cfSriastradh		OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
508603b705cfSriastradh		OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
508703b705cfSriastradh			  S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
508803b705cfSriastradh			  S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
508903b705cfSriastradh			  S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
509003b705cfSriastradh			  S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
509103b705cfSriastradh			  S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
509203b705cfSriastradh			  S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
509303b705cfSriastradh			  S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
509403b705cfSriastradh		OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
509503b705cfSriastradh			  (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
509603b705cfSriastradh			  S6_COLOR_WRITE_ENABLE);
509703b705cfSriastradh
509803b705cfSriastradh		state->last_blend = 0;
509903b705cfSriastradh		state->floats_per_vertex = 4;
510003b705cfSriastradh	}
510103b705cfSriastradh
510203b705cfSriastradh	if (!is_planar_fourcc(frame->id)) {
510303b705cfSriastradh		rewind = sna->kgem.nbatch;
510403b705cfSriastradh		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
510503b705cfSriastradh		OUT_BATCH(0x0000001);	/* constant 0 */
510603b705cfSriastradh		/* constant 0: brightness/contrast */
510703b705cfSriastradh		OUT_BATCH_F(video->brightness / 128.0);
510803b705cfSriastradh		OUT_BATCH_F(video->contrast / 255.0);
510903b705cfSriastradh		OUT_BATCH_F(0.0);
511003b705cfSriastradh		OUT_BATCH_F(0.0);
511103b705cfSriastradh		if (state->last_constants &&
511203b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_constants],
511303b705cfSriastradh			   &sna->kgem.batch[rewind],
511403b705cfSriastradh			   6*sizeof(uint32_t)) == 0)
511503b705cfSriastradh			sna->kgem.nbatch = rewind;
511603b705cfSriastradh		else
511703b705cfSriastradh			state->last_constants = rewind;
511803b705cfSriastradh
511903b705cfSriastradh		rewind = sna->kgem.nbatch;
512003b705cfSriastradh		OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
512103b705cfSriastradh		OUT_BATCH(0x00000001);
512203b705cfSriastradh		OUT_BATCH(SS2_COLORSPACE_CONVERSION |
512303b705cfSriastradh			  (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
512403b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
512503b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
512603b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
512703b705cfSriastradh			  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
512803b705cfSriastradh			  SS3_NORMALIZED_COORDS);
512903b705cfSriastradh		OUT_BATCH(0x00000000);
513003b705cfSriastradh		if (state->last_sampler &&
513103b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_sampler],
513203b705cfSriastradh			   &sna->kgem.batch[rewind],
513303b705cfSriastradh			   5*sizeof(uint32_t)) == 0)
513403b705cfSriastradh			sna->kgem.nbatch = rewind;
513503b705cfSriastradh		else
513603b705cfSriastradh			state->last_sampler = rewind;
513703b705cfSriastradh
513803b705cfSriastradh		OUT_BATCH(_3DSTATE_MAP_STATE | 3);
513903b705cfSriastradh		OUT_BATCH(0x00000001);	/* texture map #1 */
514003b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
514103b705cfSriastradh					 frame->bo,
514203b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
514303b705cfSriastradh					 0));
514403b705cfSriastradh
514503b705cfSriastradh		ms3 = MAPSURF_422;
514603b705cfSriastradh		switch (frame->id) {
514703b705cfSriastradh		case FOURCC_YUY2:
514803b705cfSriastradh			ms3 |= MT_422_YCRCB_NORMAL;
514903b705cfSriastradh			break;
515003b705cfSriastradh		case FOURCC_UYVY:
515103b705cfSriastradh			ms3 |= MT_422_YCRCB_SWAPY;
515203b705cfSriastradh			break;
515303b705cfSriastradh		}
515403b705cfSriastradh		ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
515503b705cfSriastradh		ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
515603b705cfSriastradh		OUT_BATCH(ms3);
515703b705cfSriastradh		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
515803b705cfSriastradh
515903b705cfSriastradh		id = 1<<31 | 1<<1 | !!video->brightness;
516003b705cfSriastradh		if (state->last_shader != id) {
516103b705cfSriastradh			state->last_shader = id;
516203b705cfSriastradh			id = sna->kgem.nbatch++;
516303b705cfSriastradh
516403b705cfSriastradh			gen3_fs_dcl(FS_S0);
516503b705cfSriastradh			gen3_fs_dcl(FS_T0);
516603b705cfSriastradh			gen3_fs_texld(FS_OC, FS_S0, FS_T0);
516703b705cfSriastradh			if (video->brightness != 0) {
516803b705cfSriastradh				gen3_fs_add(FS_OC,
516903b705cfSriastradh					    gen3_fs_operand_reg(FS_OC),
517003b705cfSriastradh					    gen3_fs_operand(FS_C0, X, X, X, ZERO));
517103b705cfSriastradh			}
517203b705cfSriastradh
517303b705cfSriastradh			sna->kgem.batch[id] =
517403b705cfSriastradh				_3DSTATE_PIXEL_SHADER_PROGRAM |
517503b705cfSriastradh				(sna->kgem.nbatch - id - 2);
517603b705cfSriastradh		}
517703b705cfSriastradh	} else {
517803b705cfSriastradh		/* For the planar formats, we set up three samplers --
517903b705cfSriastradh		 * one for each plane, in a Y8 format.  Because I
518003b705cfSriastradh		 * couldn't get the special PLANAR_TO_PACKED
518103b705cfSriastradh		 * shader setup to work, I did the manual pixel shader:
518203b705cfSriastradh		 *
518303b705cfSriastradh		 * y' = y - .0625
518403b705cfSriastradh		 * u' = u - .5
518503b705cfSriastradh		 * v' = v - .5;
518603b705cfSriastradh		 *
518703b705cfSriastradh		 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
518803b705cfSriastradh		 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
518903b705cfSriastradh		 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
519003b705cfSriastradh		 *
519103b705cfSriastradh		 * register assignment:
519203b705cfSriastradh		 * r0 = (y',u',v',0)
519303b705cfSriastradh		 * r1 = (y,y,y,y)
519403b705cfSriastradh		 * r2 = (u,u,u,u)
519503b705cfSriastradh		 * r3 = (v,v,v,v)
519603b705cfSriastradh		 * OC = (r,g,b,1)
519703b705cfSriastradh		 */
519803b705cfSriastradh		rewind = sna->kgem.nbatch;
519903b705cfSriastradh		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
520003b705cfSriastradh		OUT_BATCH(0x000001f);	/* constants 0-4 */
520103b705cfSriastradh		/* constant 0: normalization offsets */
520203b705cfSriastradh		OUT_BATCH_F(-0.0625);
520303b705cfSriastradh		OUT_BATCH_F(-0.5);
520403b705cfSriastradh		OUT_BATCH_F(-0.5);
520503b705cfSriastradh		OUT_BATCH_F(0.0);
520603b705cfSriastradh		/* constant 1: r coefficients */
520703b705cfSriastradh		OUT_BATCH_F(1.1643);
520803b705cfSriastradh		OUT_BATCH_F(0.0);
520903b705cfSriastradh		OUT_BATCH_F(1.5958);
521003b705cfSriastradh		OUT_BATCH_F(0.0);
521103b705cfSriastradh		/* constant 2: g coefficients */
521203b705cfSriastradh		OUT_BATCH_F(1.1643);
521303b705cfSriastradh		OUT_BATCH_F(-0.39173);
521403b705cfSriastradh		OUT_BATCH_F(-0.81290);
521503b705cfSriastradh		OUT_BATCH_F(0.0);
521603b705cfSriastradh		/* constant 3: b coefficients */
521703b705cfSriastradh		OUT_BATCH_F(1.1643);
521803b705cfSriastradh		OUT_BATCH_F(2.017);
521903b705cfSriastradh		OUT_BATCH_F(0.0);
522003b705cfSriastradh		OUT_BATCH_F(0.0);
522103b705cfSriastradh		/* constant 4: brightness/contrast */
522203b705cfSriastradh		OUT_BATCH_F(video->brightness / 128.0);
522303b705cfSriastradh		OUT_BATCH_F(video->contrast / 255.0);
522403b705cfSriastradh		OUT_BATCH_F(0.0);
522503b705cfSriastradh		OUT_BATCH_F(0.0);
522603b705cfSriastradh		if (state->last_constants &&
522703b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_constants],
522803b705cfSriastradh			   &sna->kgem.batch[rewind],
522903b705cfSriastradh			   22*sizeof(uint32_t)) == 0)
523003b705cfSriastradh			sna->kgem.nbatch = rewind;
523103b705cfSriastradh		else
523203b705cfSriastradh			state->last_constants = rewind;
523303b705cfSriastradh
523403b705cfSriastradh		rewind = sna->kgem.nbatch;
523503b705cfSriastradh		OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
523603b705cfSriastradh		OUT_BATCH(0x00000007);
523703b705cfSriastradh		/* sampler 0 */
523803b705cfSriastradh		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
523903b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
524003b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
524103b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
524203b705cfSriastradh			  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
524303b705cfSriastradh			  SS3_NORMALIZED_COORDS);
524403b705cfSriastradh		OUT_BATCH(0x00000000);
524503b705cfSriastradh		/* sampler 1 */
524603b705cfSriastradh		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
524703b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
524803b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
524903b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
525003b705cfSriastradh			  (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
525103b705cfSriastradh			  SS3_NORMALIZED_COORDS);
525203b705cfSriastradh		OUT_BATCH(0x00000000);
525303b705cfSriastradh		/* sampler 2 */
525403b705cfSriastradh		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
525503b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
525603b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
525703b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
525803b705cfSriastradh			  (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
525903b705cfSriastradh			  SS3_NORMALIZED_COORDS);
526003b705cfSriastradh		OUT_BATCH(0x00000000);
526103b705cfSriastradh		if (state->last_sampler &&
526203b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_sampler],
526303b705cfSriastradh			   &sna->kgem.batch[rewind],
526403b705cfSriastradh			   11*sizeof(uint32_t)) == 0)
526503b705cfSriastradh			sna->kgem.nbatch = rewind;
526603b705cfSriastradh		else
526703b705cfSriastradh			state->last_sampler = rewind;
526803b705cfSriastradh
526903b705cfSriastradh		OUT_BATCH(_3DSTATE_MAP_STATE | 9);
527003b705cfSriastradh		OUT_BATCH(0x00000007);
527103b705cfSriastradh
527203b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
527303b705cfSriastradh					 frame->bo,
527403b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
527503b705cfSriastradh					 0));
527603b705cfSriastradh
527703b705cfSriastradh		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
527803b705cfSriastradh		ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
527903b705cfSriastradh		ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
528003b705cfSriastradh		OUT_BATCH(ms3);
528103b705cfSriastradh		/* check to see if Y has special pitch than normal
528203b705cfSriastradh		 * double u/v pitch, e.g i915 XvMC hw requires at
528303b705cfSriastradh		 * least 1K alignment, so Y pitch might
528403b705cfSriastradh		 * be same as U/V's.*/
528503b705cfSriastradh		if (frame->pitch[1])
528603b705cfSriastradh			OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT);
528703b705cfSriastradh		else
528803b705cfSriastradh			OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT);
528903b705cfSriastradh
529003b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
529103b705cfSriastradh					 frame->bo,
529203b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
529303b705cfSriastradh					 frame->UBufOffset));
529403b705cfSriastradh
529503b705cfSriastradh		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
529603b705cfSriastradh		ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
529703b705cfSriastradh		ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
529803b705cfSriastradh		OUT_BATCH(ms3);
529903b705cfSriastradh		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
530003b705cfSriastradh
530103b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
530203b705cfSriastradh					 frame->bo,
530303b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
530403b705cfSriastradh					 frame->VBufOffset));
530503b705cfSriastradh
530603b705cfSriastradh		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
530703b705cfSriastradh		ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
530803b705cfSriastradh		ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
530903b705cfSriastradh		OUT_BATCH(ms3);
531003b705cfSriastradh		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
531103b705cfSriastradh
531203b705cfSriastradh		id = 1<<31 | 2<<1 | !!video->brightness;
531303b705cfSriastradh		if (state->last_shader != id) {
531403b705cfSriastradh			state->last_shader = id;
531503b705cfSriastradh			id = sna->kgem.nbatch++;
531603b705cfSriastradh
531703b705cfSriastradh			/* Declare samplers */
531803b705cfSriastradh			gen3_fs_dcl(FS_S0);	/* Y */
531903b705cfSriastradh			gen3_fs_dcl(FS_S1);	/* U */
532003b705cfSriastradh			gen3_fs_dcl(FS_S2);	/* V */
532103b705cfSriastradh			gen3_fs_dcl(FS_T0);	/* normalized coords */
532203b705cfSriastradh
532303b705cfSriastradh			/* Load samplers to temporaries. */
532403b705cfSriastradh			gen3_fs_texld(FS_R1, FS_S0, FS_T0);
532503b705cfSriastradh			gen3_fs_texld(FS_R2, FS_S1, FS_T0);
532603b705cfSriastradh			gen3_fs_texld(FS_R3, FS_S2, FS_T0);
532703b705cfSriastradh
532803b705cfSriastradh			/* Move the sampled YUV data in R[123] to the first
532903b705cfSriastradh			 * 3 channels of R0.
533003b705cfSriastradh			 */
533103b705cfSriastradh			gen3_fs_mov_masked(FS_R0, MASK_X,
533203b705cfSriastradh					   gen3_fs_operand_reg(FS_R1));
533303b705cfSriastradh			gen3_fs_mov_masked(FS_R0, MASK_Y,
533403b705cfSriastradh					   gen3_fs_operand_reg(FS_R2));
533503b705cfSriastradh			gen3_fs_mov_masked(FS_R0, MASK_Z,
533603b705cfSriastradh					   gen3_fs_operand_reg(FS_R3));
533703b705cfSriastradh
533803b705cfSriastradh			/* Normalize the YUV data */
533903b705cfSriastradh			gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
534003b705cfSriastradh				    gen3_fs_operand_reg(FS_C0));
534103b705cfSriastradh			/* dot-product the YUV data in R0 by the vectors of
534203b705cfSriastradh			 * coefficients for calculating R, G, and B, storing
534303b705cfSriastradh			 * the results in the R, G, or B channels of the output
534403b705cfSriastradh			 * color.  The OC results are implicitly clamped
534503b705cfSriastradh			 * at the end of the program.
534603b705cfSriastradh			 */
534703b705cfSriastradh			gen3_fs_dp3(FS_OC, MASK_X,
534803b705cfSriastradh				    gen3_fs_operand_reg(FS_R0),
534903b705cfSriastradh				    gen3_fs_operand_reg(FS_C1));
535003b705cfSriastradh			gen3_fs_dp3(FS_OC, MASK_Y,
535103b705cfSriastradh				    gen3_fs_operand_reg(FS_R0),
535203b705cfSriastradh				    gen3_fs_operand_reg(FS_C2));
535303b705cfSriastradh			gen3_fs_dp3(FS_OC, MASK_Z,
535403b705cfSriastradh				    gen3_fs_operand_reg(FS_R0),
535503b705cfSriastradh				    gen3_fs_operand_reg(FS_C3));
535603b705cfSriastradh			/* Set alpha of the output to 1.0, by wiring W to 1
535703b705cfSriastradh			 * and not actually using the source.
535803b705cfSriastradh			 */
535903b705cfSriastradh			gen3_fs_mov_masked(FS_OC, MASK_W,
536003b705cfSriastradh					   gen3_fs_operand_one());
536103b705cfSriastradh
536203b705cfSriastradh			if (video->brightness != 0) {
536303b705cfSriastradh				gen3_fs_add(FS_OC,
536403b705cfSriastradh					    gen3_fs_operand_reg(FS_OC),
536503b705cfSriastradh					    gen3_fs_operand(FS_C4, X, X, X, ZERO));
536603b705cfSriastradh			}
536703b705cfSriastradh
536803b705cfSriastradh			sna->kgem.batch[id] =
536903b705cfSriastradh				_3DSTATE_PIXEL_SHADER_PROGRAM |
537003b705cfSriastradh				(sna->kgem.nbatch - id - 2);
537103b705cfSriastradh		}
537203b705cfSriastradh	}
537303b705cfSriastradh}
537403b705cfSriastradh
537503b705cfSriastradhstatic void
537603b705cfSriastradhgen3_video_get_batch(struct sna *sna, struct kgem_bo *bo)
537703b705cfSriastradh{
537803b705cfSriastradh	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
537903b705cfSriastradh
538003b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, 120) ||
538103b705cfSriastradh	    !kgem_check_reloc(&sna->kgem, 4) ||
538203b705cfSriastradh	    !kgem_check_exec(&sna->kgem, 2)) {
538303b705cfSriastradh		_kgem_submit(&sna->kgem);
538403b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
538503b705cfSriastradh	}
538603b705cfSriastradh
538703b705cfSriastradh	if (sna->render_state.gen3.need_invariant)
538803b705cfSriastradh		gen3_emit_invariant(sna);
538903b705cfSriastradh}
539003b705cfSriastradh
539103b705cfSriastradhstatic int
539203b705cfSriastradhgen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
539303b705cfSriastradh{
539403b705cfSriastradh	int size = floats_per_vertex * 3;
539503b705cfSriastradh	int rem = batch_space(sna) - 1;
539603b705cfSriastradh
539703b705cfSriastradh	if (size * want > rem)
539803b705cfSriastradh		want = rem / size;
539903b705cfSriastradh
540003b705cfSriastradh	return want;
540103b705cfSriastradh}
540203b705cfSriastradh
540303b705cfSriastradhstatic bool
540403b705cfSriastradhgen3_render_video(struct sna *sna,
540503b705cfSriastradh		  struct sna_video *video,
540603b705cfSriastradh		  struct sna_video_frame *frame,
540703b705cfSriastradh		  RegionPtr dstRegion,
540803b705cfSriastradh		  PixmapPtr pixmap)
540903b705cfSriastradh{
541003b705cfSriastradh	struct sna_pixmap *priv = sna_pixmap(pixmap);
541103b705cfSriastradh	BoxPtr pbox = REGION_RECTS(dstRegion);
541203b705cfSriastradh	int nbox = REGION_NUM_RECTS(dstRegion);
541303b705cfSriastradh	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
541403b705cfSriastradh	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
541503b705cfSriastradh	int src_width = frame->src.x2 - frame->src.x1;
541603b705cfSriastradh	int src_height = frame->src.y2 - frame->src.y1;
541703b705cfSriastradh	float src_offset_x, src_offset_y;
541803b705cfSriastradh	float src_scale_x, src_scale_y;
541903b705cfSriastradh	int pix_xoff, pix_yoff;
542003b705cfSriastradh	struct kgem_bo *dst_bo;
542103b705cfSriastradh	bool bilinear;
542203b705cfSriastradh	int copy = 0;
542303b705cfSriastradh
542403b705cfSriastradh	DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__,
542503b705cfSriastradh	     src_width, src_height, frame->width, frame->height, dst_width, dst_height));
542603b705cfSriastradh
542703b705cfSriastradh	dst_bo = priv->gpu_bo;
542803b705cfSriastradh	if (dst_bo == NULL)
542903b705cfSriastradh		return false;
543003b705cfSriastradh
543103b705cfSriastradh	bilinear = src_width != dst_width || src_height != dst_height;
543203b705cfSriastradh
543303b705cfSriastradh	src_scale_x = (float)src_width / dst_width / frame->width;
543403b705cfSriastradh	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
543503b705cfSriastradh
543603b705cfSriastradh	src_scale_y = (float)src_height / dst_height / frame->height;
543703b705cfSriastradh	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
543803b705cfSriastradh	DBG(("%s: src offset (%f, %f), scale (%f, %f)\n",
543903b705cfSriastradh	     __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y));
544003b705cfSriastradh
544103b705cfSriastradh	if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
544203b705cfSriastradh	    !gen3_check_pitch_3d(dst_bo)) {
544303b705cfSriastradh		int bpp = pixmap->drawable.bitsPerPixel;
544403b705cfSriastradh
544503b705cfSriastradh		if (too_large(dst_width, dst_height))
544603b705cfSriastradh			return false;
544703b705cfSriastradh
544803b705cfSriastradh		dst_bo = kgem_create_2d(&sna->kgem,
544903b705cfSriastradh					dst_width, dst_height, bpp,
545003b705cfSriastradh					kgem_choose_tiling(&sna->kgem,
545103b705cfSriastradh							   I915_TILING_X,
545203b705cfSriastradh							   dst_width, dst_height, bpp),
545303b705cfSriastradh					0);
545403b705cfSriastradh		if (!dst_bo)
545503b705cfSriastradh			return false;
545603b705cfSriastradh
545703b705cfSriastradh		pix_xoff = -dstRegion->extents.x1;
545803b705cfSriastradh		pix_yoff = -dstRegion->extents.y1;
545903b705cfSriastradh		copy = 1;
546003b705cfSriastradh	} else {
546103b705cfSriastradh		/* Set up the offset for translating from the given region
546203b705cfSriastradh		 * (in screen coordinates) to the backing pixmap.
546303b705cfSriastradh		 */
546403b705cfSriastradh#ifdef COMPOSITE
546503b705cfSriastradh		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
546603b705cfSriastradh		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
546703b705cfSriastradh#else
546803b705cfSriastradh		pix_xoff = 0;
546903b705cfSriastradh		pix_yoff = 0;
547003b705cfSriastradh#endif
547103b705cfSriastradh
547203b705cfSriastradh		dst_width  = pixmap->drawable.width;
547303b705cfSriastradh		dst_height = pixmap->drawable.height;
547403b705cfSriastradh	}
547503b705cfSriastradh
547603b705cfSriastradh	gen3_video_get_batch(sna, dst_bo);
547703b705cfSriastradh	gen3_emit_video_state(sna, video, frame, pixmap,
547803b705cfSriastradh			      dst_bo, dst_width, dst_height, bilinear);
547903b705cfSriastradh	do {
548003b705cfSriastradh		int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
548103b705cfSriastradh		if (nbox_this_time == 0) {
548203b705cfSriastradh			gen3_video_get_batch(sna, dst_bo);
548303b705cfSriastradh			gen3_emit_video_state(sna, video, frame, pixmap,
548403b705cfSriastradh					      dst_bo, dst_width, dst_height, bilinear);
548503b705cfSriastradh			nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
548603b705cfSriastradh			assert(nbox_this_time);
548703b705cfSriastradh		}
548803b705cfSriastradh		nbox -= nbox_this_time;
548903b705cfSriastradh
549003b705cfSriastradh		OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
549103b705cfSriastradh		do {
549203b705cfSriastradh			int box_x1 = pbox->x1;
549303b705cfSriastradh			int box_y1 = pbox->y1;
549403b705cfSriastradh			int box_x2 = pbox->x2;
549503b705cfSriastradh			int box_y2 = pbox->y2;
549603b705cfSriastradh
549703b705cfSriastradh			pbox++;
549803b705cfSriastradh
549903b705cfSriastradh			DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n",
550003b705cfSriastradh			     __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff,
550103b705cfSriastradh			     box_x1 * src_scale_x + src_offset_x,
550203b705cfSriastradh			     box_y1 * src_scale_y + src_offset_y,
550303b705cfSriastradh			     box_x2 * src_scale_x + src_offset_x,
550403b705cfSriastradh			     box_y2 * src_scale_y + src_offset_y));
550503b705cfSriastradh
550603b705cfSriastradh			/* bottom right */
550703b705cfSriastradh			OUT_BATCH_F(box_x2 + pix_xoff);
550803b705cfSriastradh			OUT_BATCH_F(box_y2 + pix_yoff);
550903b705cfSriastradh			OUT_BATCH_F(box_x2 * src_scale_x + src_offset_x);
551003b705cfSriastradh			OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
551103b705cfSriastradh
551203b705cfSriastradh			/* bottom left */
551303b705cfSriastradh			OUT_BATCH_F(box_x1 + pix_xoff);
551403b705cfSriastradh			OUT_BATCH_F(box_y2 + pix_yoff);
551503b705cfSriastradh			OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
551603b705cfSriastradh			OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
551703b705cfSriastradh
551803b705cfSriastradh			/* top left */
551903b705cfSriastradh			OUT_BATCH_F(box_x1 + pix_xoff);
552003b705cfSriastradh			OUT_BATCH_F(box_y1 + pix_yoff);
552103b705cfSriastradh			OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
552203b705cfSriastradh			OUT_BATCH_F(box_y1 * src_scale_y + src_offset_y);
552303b705cfSriastradh		} while (--nbox_this_time);
552403b705cfSriastradh	} while (nbox);
552503b705cfSriastradh
552603b705cfSriastradh	if (copy) {
552703b705cfSriastradh#ifdef COMPOSITE
552803b705cfSriastradh		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
552903b705cfSriastradh		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
553003b705cfSriastradh#else
553103b705cfSriastradh		pix_xoff = 0;
553203b705cfSriastradh		pix_yoff = 0;
553303b705cfSriastradh#endif
553403b705cfSriastradh		sna_blt_copy_boxes(sna, GXcopy,
553503b705cfSriastradh				   dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1,
553603b705cfSriastradh				   priv->gpu_bo, pix_xoff, pix_yoff,
553703b705cfSriastradh				   pixmap->drawable.bitsPerPixel,
553803b705cfSriastradh				   REGION_RECTS(dstRegion),
553903b705cfSriastradh				   REGION_NUM_RECTS(dstRegion));
554003b705cfSriastradh
554103b705cfSriastradh		kgem_bo_destroy(&sna->kgem, dst_bo);
554203b705cfSriastradh	}
554303b705cfSriastradh
554403b705cfSriastradh	if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
554503b705cfSriastradh		if ((pix_xoff | pix_yoff) == 0) {
554603b705cfSriastradh			sna_damage_add(&priv->gpu_damage, dstRegion);
554703b705cfSriastradh			sna_damage_subtract(&priv->cpu_damage, dstRegion);
554803b705cfSriastradh		} else {
554903b705cfSriastradh			sna_damage_add_boxes(&priv->gpu_damage,
555003b705cfSriastradh					     REGION_RECTS(dstRegion),
555103b705cfSriastradh					     REGION_NUM_RECTS(dstRegion),
555203b705cfSriastradh					     pix_xoff, pix_yoff);
555303b705cfSriastradh			sna_damage_subtract_boxes(&priv->cpu_damage,
555403b705cfSriastradh						  REGION_RECTS(dstRegion),
555503b705cfSriastradh						  REGION_NUM_RECTS(dstRegion),
555603b705cfSriastradh						  pix_xoff, pix_yoff);
555703b705cfSriastradh		}
555803b705cfSriastradh	}
555903b705cfSriastradh
556003b705cfSriastradh	return true;
556103b705cfSriastradh}
556203b705cfSriastradh
556303b705cfSriastradhstatic void
556403b705cfSriastradhgen3_render_copy_setup_source(struct sna_composite_channel *channel,
556503b705cfSriastradh			      PixmapPtr pixmap,
556603b705cfSriastradh			      struct kgem_bo *bo)
556703b705cfSriastradh{
556803b705cfSriastradh	int i;
556903b705cfSriastradh
557003b705cfSriastradh	channel->u.gen3.type = SHADER_TEXTURE;
557103b705cfSriastradh	channel->filter = gen3_filter(PictFilterNearest);
557203b705cfSriastradh	channel->repeat = gen3_texture_repeat(RepeatNone);
557303b705cfSriastradh	channel->width  = pixmap->drawable.width;
557403b705cfSriastradh	channel->height = pixmap->drawable.height;
557503b705cfSriastradh	channel->scale[0] = 1.f/pixmap->drawable.width;
557603b705cfSriastradh	channel->scale[1] = 1.f/pixmap->drawable.height;
557703b705cfSriastradh	channel->offset[0] = 0;
557803b705cfSriastradh	channel->offset[1] = 0;
557903b705cfSriastradh
558003b705cfSriastradh	channel->pict_format = sna_format_for_depth(pixmap->drawable.depth);
558103b705cfSriastradh	if (!gen3_composite_channel_set_format(channel, channel->pict_format)) {
558203b705cfSriastradh		for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
558303b705cfSriastradh			if (gen3_tex_formats[i].xfmt == channel->pict_format) {
558403b705cfSriastradh				channel->card_format = gen3_tex_formats[i].card_fmt;
558503b705cfSriastradh				channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
558603b705cfSriastradh				channel->alpha_fixup = true;
558703b705cfSriastradh				break;
558803b705cfSriastradh			}
558903b705cfSriastradh		}
559003b705cfSriastradh	}
559103b705cfSriastradh	assert(channel->card_format);
559203b705cfSriastradh
559303b705cfSriastradh	channel->bo = bo;
559403b705cfSriastradh	channel->is_affine = 1;
559503b705cfSriastradh}
559603b705cfSriastradh
559703b705cfSriastradhstatic bool
559803b705cfSriastradhgen3_render_copy_boxes(struct sna *sna, uint8_t alu,
559903b705cfSriastradh		       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
560003b705cfSriastradh		       PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
560103b705cfSriastradh		       const BoxRec *box, int n, unsigned flags)
560203b705cfSriastradh{
560303b705cfSriastradh	struct sna_composite_op tmp;
560403b705cfSriastradh
560503b705cfSriastradh#if NO_COPY_BOXES
560603b705cfSriastradh	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
560703b705cfSriastradh		return false;
560803b705cfSriastradh
560903b705cfSriastradh	return sna_blt_copy_boxes(sna, alu,
561003b705cfSriastradh				  src_bo, src_dx, src_dy,
561103b705cfSriastradh				  dst_bo, dst_dx, dst_dy,
561203b705cfSriastradh				  dst->drawable.bitsPerPixel,
561303b705cfSriastradh				  box, n);
561403b705cfSriastradh#endif
561503b705cfSriastradh
561603b705cfSriastradh	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
561703b705cfSriastradh	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
561803b705cfSriastradh
561903b705cfSriastradh	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
562003b705cfSriastradh	    sna_blt_copy_boxes(sna, alu,
562103b705cfSriastradh			       src_bo, src_dx, src_dy,
562203b705cfSriastradh			       dst_bo, dst_dx, dst_dy,
562303b705cfSriastradh			       dst->drawable.bitsPerPixel,
562403b705cfSriastradh			       box, n))
562503b705cfSriastradh		return true;
562603b705cfSriastradh
562703b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
562803b705cfSriastradh	    src_bo == dst_bo || /* XXX handle overlap using 3D ? */
562903b705cfSriastradh	    src_bo->pitch > MAX_3D_PITCH ||
563003b705cfSriastradh	    too_large(src->drawable.width, src->drawable.height)) {
563103b705cfSriastradhfallback_blt:
563203b705cfSriastradh		if (!kgem_bo_can_blt(&sna->kgem, src_bo) ||
563303b705cfSriastradh		    !kgem_bo_can_blt(&sna->kgem, dst_bo))
563403b705cfSriastradh			return false;
563503b705cfSriastradh
563603b705cfSriastradh		return sna_blt_copy_boxes_fallback(sna, alu,
563703b705cfSriastradh						   src, src_bo, src_dx, src_dy,
563803b705cfSriastradh						   dst, dst_bo, dst_dx, dst_dy,
563903b705cfSriastradh						   box, n);
564003b705cfSriastradh	}
564103b705cfSriastradh
564203b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
564303b705cfSriastradh		kgem_submit(&sna->kgem);
564403b705cfSriastradh		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
564503b705cfSriastradh			goto fallback_blt;
564603b705cfSriastradh	}
564703b705cfSriastradh
564803b705cfSriastradh	memset(&tmp, 0, sizeof(tmp));
564903b705cfSriastradh	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
565003b705cfSriastradh
565103b705cfSriastradh	tmp.dst.pixmap = dst;
565203b705cfSriastradh	tmp.dst.width = dst->drawable.width;
565303b705cfSriastradh	tmp.dst.height = dst->drawable.height;
565403b705cfSriastradh	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
565503b705cfSriastradh	tmp.dst.bo = dst_bo;
565603b705cfSriastradh	tmp.dst.x = tmp.dst.y = 0;
565703b705cfSriastradh	tmp.damage = NULL;
565803b705cfSriastradh
565903b705cfSriastradh	sna_render_composite_redirect_init(&tmp);
566003b705cfSriastradh	if (too_large(tmp.dst.width, tmp.dst.height) ||
566103b705cfSriastradh	    dst_bo->pitch > MAX_3D_PITCH) {
566203b705cfSriastradh		BoxRec extents = box[0];
566303b705cfSriastradh		int i;
566403b705cfSriastradh
566503b705cfSriastradh		for (i = 1; i < n; i++) {
566603b705cfSriastradh			if (box[i].x1 < extents.x1)
566703b705cfSriastradh				extents.x1 = box[i].x1;
566803b705cfSriastradh			if (box[i].y1 < extents.y1)
566903b705cfSriastradh				extents.y1 = box[i].y1;
567003b705cfSriastradh
567103b705cfSriastradh			if (box[i].x2 > extents.x2)
567203b705cfSriastradh				extents.x2 = box[i].x2;
567303b705cfSriastradh			if (box[i].y2 > extents.y2)
567403b705cfSriastradh				extents.y2 = box[i].y2;
567503b705cfSriastradh		}
567603b705cfSriastradh		if (!sna_render_composite_redirect(sna, &tmp,
567703b705cfSriastradh						   extents.x1 + dst_dx,
567803b705cfSriastradh						   extents.y1 + dst_dy,
567903b705cfSriastradh						   extents.x2 - extents.x1,
568003b705cfSriastradh						   extents.y2 - extents.y1,
568103b705cfSriastradh						   n > 1))
568203b705cfSriastradh			goto fallback_tiled;
568303b705cfSriastradh	}
568403b705cfSriastradh
568503b705cfSriastradh	gen3_render_copy_setup_source(&tmp.src, src, src_bo);
568603b705cfSriastradh
568703b705cfSriastradh	tmp.floats_per_vertex = 4;
568803b705cfSriastradh	tmp.floats_per_rect = 12;
568903b705cfSriastradh	tmp.mask.bo = NULL;
569003b705cfSriastradh	tmp.mask.u.gen3.type = SHADER_NONE;
569103b705cfSriastradh
569203b705cfSriastradh	dst_dx += tmp.dst.x;
569303b705cfSriastradh	dst_dy += tmp.dst.y;
569403b705cfSriastradh	tmp.dst.x = tmp.dst.y = 0;
569503b705cfSriastradh
569603b705cfSriastradh	gen3_emit_composite_state(sna, &tmp);
569703b705cfSriastradh	gen3_align_vertex(sna, &tmp);
569803b705cfSriastradh
569903b705cfSriastradh	do {
570003b705cfSriastradh		int n_this_time;
570103b705cfSriastradh
570203b705cfSriastradh		n_this_time = gen3_get_rectangles(sna, &tmp, n);
570303b705cfSriastradh		n -= n_this_time;
570403b705cfSriastradh
570503b705cfSriastradh		do {
570603b705cfSriastradh			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
570703b705cfSriastradh			     box->x1 + src_dx, box->y1 + src_dy,
570803b705cfSriastradh			     box->x1 + dst_dx, box->y1 + dst_dy,
570903b705cfSriastradh			     box->x2 - box->x1, box->y2 - box->y1));
571003b705cfSriastradh			OUT_VERTEX(box->x2 + dst_dx);
571103b705cfSriastradh			OUT_VERTEX(box->y2 + dst_dy);
571203b705cfSriastradh			OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
571303b705cfSriastradh			OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
571403b705cfSriastradh
571503b705cfSriastradh			OUT_VERTEX(box->x1 + dst_dx);
571603b705cfSriastradh			OUT_VERTEX(box->y2 + dst_dy);
571703b705cfSriastradh			OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
571803b705cfSriastradh			OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
571903b705cfSriastradh
572003b705cfSriastradh			OUT_VERTEX(box->x1 + dst_dx);
572103b705cfSriastradh			OUT_VERTEX(box->y1 + dst_dy);
572203b705cfSriastradh			OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
572303b705cfSriastradh			OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
572403b705cfSriastradh
572503b705cfSriastradh			box++;
572603b705cfSriastradh		} while (--n_this_time);
572703b705cfSriastradh	} while (n);
572803b705cfSriastradh
572903b705cfSriastradh	gen3_vertex_flush(sna);
573003b705cfSriastradh	sna_render_composite_redirect_done(sna, &tmp);
573103b705cfSriastradh	return true;
573203b705cfSriastradh
573303b705cfSriastradhfallback_tiled:
573403b705cfSriastradh	return sna_tiling_copy_boxes(sna, alu,
573503b705cfSriastradh				     src, src_bo, src_dx, src_dy,
573603b705cfSriastradh				     dst, dst_bo, dst_dx, dst_dy,
573703b705cfSriastradh				     box, n);
573803b705cfSriastradh}
573903b705cfSriastradh
574003b705cfSriastradhstatic void
574103b705cfSriastradhgen3_render_copy_blt(struct sna *sna,
574203b705cfSriastradh		     const struct sna_copy_op *op,
574303b705cfSriastradh		     int16_t sx, int16_t sy,
574403b705cfSriastradh		     int16_t w, int16_t h,
574503b705cfSriastradh		     int16_t dx, int16_t dy)
574603b705cfSriastradh{
574703b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
574803b705cfSriastradh
574903b705cfSriastradh	OUT_VERTEX(dx+w);
575003b705cfSriastradh	OUT_VERTEX(dy+h);
575103b705cfSriastradh	OUT_VERTEX((sx+w)*op->base.src.scale[0]);
575203b705cfSriastradh	OUT_VERTEX((sy+h)*op->base.src.scale[1]);
575303b705cfSriastradh
575403b705cfSriastradh	OUT_VERTEX(dx);
575503b705cfSriastradh	OUT_VERTEX(dy+h);
575603b705cfSriastradh	OUT_VERTEX(sx*op->base.src.scale[0]);
575703b705cfSriastradh	OUT_VERTEX((sy+h)*op->base.src.scale[1]);
575803b705cfSriastradh
575903b705cfSriastradh	OUT_VERTEX(dx);
576003b705cfSriastradh	OUT_VERTEX(dy);
576103b705cfSriastradh	OUT_VERTEX(sx*op->base.src.scale[0]);
576203b705cfSriastradh	OUT_VERTEX(sy*op->base.src.scale[1]);
576303b705cfSriastradh}
576403b705cfSriastradh
576503b705cfSriastradhstatic void
576603b705cfSriastradhgen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
576703b705cfSriastradh{
576803b705cfSriastradh	if (sna->render.vertex_offset)
576903b705cfSriastradh		gen3_vertex_flush(sna);
577003b705cfSriastradh}
577103b705cfSriastradh
577203b705cfSriastradhstatic bool
577303b705cfSriastradhgen3_render_copy(struct sna *sna, uint8_t alu,
577403b705cfSriastradh		 PixmapPtr src, struct kgem_bo *src_bo,
577503b705cfSriastradh		 PixmapPtr dst, struct kgem_bo *dst_bo,
577603b705cfSriastradh		 struct sna_copy_op *tmp)
577703b705cfSriastradh{
577803b705cfSriastradh#if NO_COPY
577903b705cfSriastradh	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
578003b705cfSriastradh		return false;
578103b705cfSriastradh
578203b705cfSriastradh	return sna_blt_copy(sna, alu,
578303b705cfSriastradh			    src_bo, dst_bo,
578403b705cfSriastradh			    dst->drawable.bitsPerPixel,
578503b705cfSriastradh			    tmp);
578603b705cfSriastradh#endif
578703b705cfSriastradh
578803b705cfSriastradh	/* Prefer to use the BLT */
578903b705cfSriastradh	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
579003b705cfSriastradh	    sna_blt_copy(sna, alu,
579103b705cfSriastradh			 src_bo, dst_bo,
579203b705cfSriastradh			 dst->drawable.bitsPerPixel,
579303b705cfSriastradh			 tmp))
579403b705cfSriastradh		return true;
579503b705cfSriastradh
579603b705cfSriastradh	/* Must use the BLT if we can't RENDER... */
579703b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
579803b705cfSriastradh	    too_large(src->drawable.width, src->drawable.height) ||
579903b705cfSriastradh	    too_large(dst->drawable.width, dst->drawable.height) ||
580003b705cfSriastradh	    src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch > MAX_3D_PITCH) {
580103b705cfSriastradhfallback:
580203b705cfSriastradh		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
580303b705cfSriastradh			return false;
580403b705cfSriastradh
580503b705cfSriastradh		return sna_blt_copy(sna, alu, src_bo, dst_bo,
580603b705cfSriastradh				    dst->drawable.bitsPerPixel,
580703b705cfSriastradh				    tmp);
580803b705cfSriastradh	}
580903b705cfSriastradh
581003b705cfSriastradh	tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
581103b705cfSriastradh
581203b705cfSriastradh	tmp->base.dst.pixmap = dst;
581303b705cfSriastradh	tmp->base.dst.width = dst->drawable.width;
581403b705cfSriastradh	tmp->base.dst.height = dst->drawable.height;
581503b705cfSriastradh	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
581603b705cfSriastradh	tmp->base.dst.bo = dst_bo;
581703b705cfSriastradh
581803b705cfSriastradh	gen3_render_copy_setup_source(&tmp->base.src, src, src_bo);
581903b705cfSriastradh
582003b705cfSriastradh	tmp->base.floats_per_vertex = 4;
582103b705cfSriastradh	tmp->base.floats_per_rect = 12;
582203b705cfSriastradh	tmp->base.mask.bo = NULL;
582303b705cfSriastradh	tmp->base.mask.u.gen3.type = SHADER_NONE;
582403b705cfSriastradh
582503b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
582603b705cfSriastradh		kgem_submit(&sna->kgem);
582703b705cfSriastradh		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
582803b705cfSriastradh			goto fallback;
582903b705cfSriastradh	}
583003b705cfSriastradh
583103b705cfSriastradh	tmp->blt  = gen3_render_copy_blt;
583203b705cfSriastradh	tmp->done = gen3_render_copy_done;
583303b705cfSriastradh
583403b705cfSriastradh	gen3_emit_composite_state(sna, &tmp->base);
583503b705cfSriastradh	gen3_align_vertex(sna, &tmp->base);
583603b705cfSriastradh	return true;
583703b705cfSriastradh}
583803b705cfSriastradh
583903b705cfSriastradhstatic bool
584003b705cfSriastradhgen3_render_fill_boxes_try_blt(struct sna *sna,
584103b705cfSriastradh			       CARD8 op, PictFormat format,
584203b705cfSriastradh			       const xRenderColor *color,
584303b705cfSriastradh			       PixmapPtr dst, struct kgem_bo *dst_bo,
584403b705cfSriastradh			       const BoxRec *box, int n)
584503b705cfSriastradh{
584603b705cfSriastradh	uint8_t alu;
584703b705cfSriastradh	uint32_t pixel;
584803b705cfSriastradh
584903b705cfSriastradh	if (dst_bo->tiling == I915_TILING_Y) {
585003b705cfSriastradh		DBG(("%s: y-tiling, can't blit\n", __FUNCTION__));
585103b705cfSriastradh		assert(!too_large(dst->drawable.width, dst->drawable.height));
585203b705cfSriastradh		return false;
585303b705cfSriastradh	}
585403b705cfSriastradh
585503b705cfSriastradh	if (op > PictOpSrc)
585603b705cfSriastradh		return false;
585703b705cfSriastradh
585803b705cfSriastradh	if (op == PictOpClear) {
585903b705cfSriastradh		alu = GXclear;
586003b705cfSriastradh		pixel = 0;
586103b705cfSriastradh	} else if (!sna_get_pixel_from_rgba(&pixel,
586203b705cfSriastradh					    color->red,
586303b705cfSriastradh					    color->green,
586403b705cfSriastradh					    color->blue,
586503b705cfSriastradh					    color->alpha,
586603b705cfSriastradh					    format))
586703b705cfSriastradh		return false;
586803b705cfSriastradh	else
586903b705cfSriastradh		alu = GXcopy;
587003b705cfSriastradh
587103b705cfSriastradh	return sna_blt_fill_boxes(sna, alu,
587203b705cfSriastradh				  dst_bo, dst->drawable.bitsPerPixel,
587303b705cfSriastradh				  pixel, box, n);
587403b705cfSriastradh}
587503b705cfSriastradh
587603b705cfSriastradhstatic inline bool prefer_fill_blt(struct sna *sna)
587703b705cfSriastradh{
587803b705cfSriastradh#if PREFER_BLT_FILL
587903b705cfSriastradh	return true;
588003b705cfSriastradh#else
588103b705cfSriastradh	return sna->kgem.mode != KGEM_RENDER;
588203b705cfSriastradh#endif
588303b705cfSriastradh}
588403b705cfSriastradh
588503b705cfSriastradhstatic bool
588603b705cfSriastradhgen3_render_fill_boxes(struct sna *sna,
588703b705cfSriastradh		       CARD8 op,
588803b705cfSriastradh		       PictFormat format,
588903b705cfSriastradh		       const xRenderColor *color,
589003b705cfSriastradh		       PixmapPtr dst, struct kgem_bo *dst_bo,
589103b705cfSriastradh		       const BoxRec *box, int n)
589203b705cfSriastradh{
589303b705cfSriastradh	struct sna_composite_op tmp;
589403b705cfSriastradh	uint32_t pixel;
589503b705cfSriastradh
589603b705cfSriastradh	if (op >= ARRAY_SIZE(gen3_blend_op)) {
589703b705cfSriastradh		DBG(("%s: fallback due to unhandled blend op: %d\n",
589803b705cfSriastradh		     __FUNCTION__, op));
589903b705cfSriastradh		return false;
590003b705cfSriastradh	}
590103b705cfSriastradh
590203b705cfSriastradh#if NO_FILL_BOXES
590303b705cfSriastradh	return gen3_render_fill_boxes_try_blt(sna, op, format, color,
590403b705cfSriastradh					      dst, dst_bo,
590503b705cfSriastradh					      box, n);
590603b705cfSriastradh#endif
590703b705cfSriastradh
590803b705cfSriastradh	DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
590903b705cfSriastradh	     __FUNCTION__, op, (int)format,
591003b705cfSriastradh	     color->red, color->green, color->blue, color->alpha));
591103b705cfSriastradh
591203b705cfSriastradh	if (too_large(dst->drawable.width, dst->drawable.height) ||
591303b705cfSriastradh	    dst_bo->pitch > MAX_3D_PITCH ||
591403b705cfSriastradh	    !gen3_check_dst_format(format)) {
591503b705cfSriastradh		DBG(("%s: try blt, too large or incompatible destination\n",
591603b705cfSriastradh		     __FUNCTION__));
591703b705cfSriastradh		if (gen3_render_fill_boxes_try_blt(sna, op, format, color,
591803b705cfSriastradh						   dst, dst_bo,
591903b705cfSriastradh						   box, n))
592003b705cfSriastradh			return true;
592103b705cfSriastradh
592203b705cfSriastradh		if (!gen3_check_dst_format(format))
592303b705cfSriastradh			return false;
592403b705cfSriastradh
592503b705cfSriastradh		return sna_tiling_fill_boxes(sna, op, format, color,
592603b705cfSriastradh					     dst, dst_bo, box, n);
592703b705cfSriastradh	}
592803b705cfSriastradh
592903b705cfSriastradh	if (prefer_fill_blt(sna) &&
593003b705cfSriastradh	    gen3_render_fill_boxes_try_blt(sna, op, format, color,
593103b705cfSriastradh					   dst, dst_bo,
593203b705cfSriastradh					   box, n))
593303b705cfSriastradh		return true;
593403b705cfSriastradh
593503b705cfSriastradh	if (op == PictOpClear) {
593603b705cfSriastradh		pixel = 0;
593703b705cfSriastradh	} else {
593803b705cfSriastradh		if (!sna_get_pixel_from_rgba(&pixel,
593903b705cfSriastradh					     color->red,
594003b705cfSriastradh					     color->green,
594103b705cfSriastradh					     color->blue,
594203b705cfSriastradh					     color->alpha,
594303b705cfSriastradh					     PICT_a8r8g8b8)) {
594403b705cfSriastradh			assert(0);
594503b705cfSriastradh			return false;
594603b705cfSriastradh		}
594703b705cfSriastradh	}
594803b705cfSriastradh	DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n",
594903b705cfSriastradh	     __FUNCTION__, op, (int)format, pixel));
595003b705cfSriastradh
595103b705cfSriastradh	tmp.op = op;
595203b705cfSriastradh	tmp.dst.pixmap = dst;
595303b705cfSriastradh	tmp.dst.width = dst->drawable.width;
595403b705cfSriastradh	tmp.dst.height = dst->drawable.height;
595503b705cfSriastradh	tmp.dst.format = format;
595603b705cfSriastradh	tmp.dst.bo = dst_bo;
595703b705cfSriastradh	tmp.damage = NULL;
595803b705cfSriastradh	tmp.floats_per_vertex = 2;
595903b705cfSriastradh	tmp.floats_per_rect = 6;
596003b705cfSriastradh	tmp.rb_reversed = 0;
596103b705cfSriastradh	tmp.has_component_alpha = 0;
596203b705cfSriastradh	tmp.need_magic_ca_pass = false;
596303b705cfSriastradh
596403b705cfSriastradh	gen3_init_solid(&tmp.src, pixel);
596503b705cfSriastradh	tmp.mask.bo = NULL;
596603b705cfSriastradh	tmp.mask.u.gen3.type = SHADER_NONE;
596703b705cfSriastradh	tmp.u.gen3.num_constants = 0;
596803b705cfSriastradh
596903b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
597003b705cfSriastradh		kgem_submit(&sna->kgem);
597103b705cfSriastradh		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
597203b705cfSriastradh	}
597303b705cfSriastradh
597403b705cfSriastradh	gen3_emit_composite_state(sna, &tmp);
597503b705cfSriastradh	gen3_align_vertex(sna, &tmp);
597603b705cfSriastradh
597703b705cfSriastradh	do {
597803b705cfSriastradh		int n_this_time;
597903b705cfSriastradh
598003b705cfSriastradh		n_this_time = gen3_get_rectangles(sna, &tmp, n);
598103b705cfSriastradh		n -= n_this_time;
598203b705cfSriastradh
598303b705cfSriastradh		do {
598403b705cfSriastradh			DBG(("	(%d, %d), (%d, %d): %x\n",
598503b705cfSriastradh			     box->x1, box->y1, box->x2, box->y2, pixel));
598603b705cfSriastradh			OUT_VERTEX(box->x2);
598703b705cfSriastradh			OUT_VERTEX(box->y2);
598803b705cfSriastradh			OUT_VERTEX(box->x1);
598903b705cfSriastradh			OUT_VERTEX(box->y2);
599003b705cfSriastradh			OUT_VERTEX(box->x1);
599103b705cfSriastradh			OUT_VERTEX(box->y1);
599203b705cfSriastradh			box++;
599303b705cfSriastradh		} while (--n_this_time);
599403b705cfSriastradh	} while (n);
599503b705cfSriastradh
599603b705cfSriastradh	gen3_vertex_flush(sna);
599703b705cfSriastradh	return true;
599803b705cfSriastradh}
599903b705cfSriastradh
600003b705cfSriastradhstatic void
600103b705cfSriastradhgen3_render_fill_op_blt(struct sna *sna,
600203b705cfSriastradh			const struct sna_fill_op *op,
600303b705cfSriastradh			int16_t x, int16_t y, int16_t w, int16_t h)
600403b705cfSriastradh{
600503b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
600603b705cfSriastradh
600703b705cfSriastradh	OUT_VERTEX(x+w);
600803b705cfSriastradh	OUT_VERTEX(y+h);
600903b705cfSriastradh	OUT_VERTEX(x);
601003b705cfSriastradh	OUT_VERTEX(y+h);
601103b705cfSriastradh	OUT_VERTEX(x);
601203b705cfSriastradh	OUT_VERTEX(y);
601303b705cfSriastradh}
601403b705cfSriastradh
601503b705cfSriastradhfastcall static void
601603b705cfSriastradhgen3_render_fill_op_box(struct sna *sna,
601703b705cfSriastradh			const struct sna_fill_op *op,
601803b705cfSriastradh			const BoxRec *box)
601903b705cfSriastradh{
602003b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
602103b705cfSriastradh
602203b705cfSriastradh	OUT_VERTEX(box->x2);
602303b705cfSriastradh	OUT_VERTEX(box->y2);
602403b705cfSriastradh	OUT_VERTEX(box->x1);
602503b705cfSriastradh	OUT_VERTEX(box->y2);
602603b705cfSriastradh	OUT_VERTEX(box->x1);
602703b705cfSriastradh	OUT_VERTEX(box->y1);
602803b705cfSriastradh}
602903b705cfSriastradh
603003b705cfSriastradhfastcall static void
603103b705cfSriastradhgen3_render_fill_op_boxes(struct sna *sna,
603203b705cfSriastradh			  const struct sna_fill_op *op,
603303b705cfSriastradh			  const BoxRec *box,
603403b705cfSriastradh			  int nbox)
603503b705cfSriastradh{
603603b705cfSriastradh	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
603703b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2, nbox));
603803b705cfSriastradh
603903b705cfSriastradh	do {
604003b705cfSriastradh		int nbox_this_time;
604103b705cfSriastradh
604203b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
604303b705cfSriastradh		nbox -= nbox_this_time;
604403b705cfSriastradh
604503b705cfSriastradh		do {
604603b705cfSriastradh			OUT_VERTEX(box->x2);
604703b705cfSriastradh			OUT_VERTEX(box->y2);
604803b705cfSriastradh			OUT_VERTEX(box->x1);
604903b705cfSriastradh			OUT_VERTEX(box->y2);
605003b705cfSriastradh			OUT_VERTEX(box->x1);
605103b705cfSriastradh			OUT_VERTEX(box->y1);
605203b705cfSriastradh			box++;
605303b705cfSriastradh		} while (--nbox_this_time);
605403b705cfSriastradh	} while (nbox);
605503b705cfSriastradh}
605603b705cfSriastradh
605703b705cfSriastradhstatic void
605803b705cfSriastradhgen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
605903b705cfSriastradh{
606003b705cfSriastradh	if (sna->render.vertex_offset)
606103b705cfSriastradh		gen3_vertex_flush(sna);
606203b705cfSriastradh}
606303b705cfSriastradh
606403b705cfSriastradhstatic bool
606503b705cfSriastradhgen3_render_fill(struct sna *sna, uint8_t alu,
606603b705cfSriastradh		 PixmapPtr dst, struct kgem_bo *dst_bo,
606703b705cfSriastradh		 uint32_t color,
606803b705cfSriastradh		 struct sna_fill_op *tmp)
606903b705cfSriastradh{
607003b705cfSriastradh#if NO_FILL
607103b705cfSriastradh	return sna_blt_fill(sna, alu,
607203b705cfSriastradh			    dst_bo, dst->drawable.bitsPerPixel,
607303b705cfSriastradh			    color,
607403b705cfSriastradh			    tmp);
607503b705cfSriastradh#endif
607603b705cfSriastradh
607703b705cfSriastradh	/* Prefer to use the BLT if already engaged */
607803b705cfSriastradh	if (prefer_fill_blt(sna) &&
607903b705cfSriastradh	    sna_blt_fill(sna, alu,
608003b705cfSriastradh			 dst_bo, dst->drawable.bitsPerPixel,
608103b705cfSriastradh			 color,
608203b705cfSriastradh			 tmp))
608303b705cfSriastradh		return true;
608403b705cfSriastradh
608503b705cfSriastradh	/* Must use the BLT if we can't RENDER... */
608603b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
608703b705cfSriastradh	    too_large(dst->drawable.width, dst->drawable.height) ||
608803b705cfSriastradh	    dst_bo->pitch > MAX_3D_PITCH)
608903b705cfSriastradh		return sna_blt_fill(sna, alu,
609003b705cfSriastradh				    dst_bo, dst->drawable.bitsPerPixel,
609103b705cfSriastradh				    color,
609203b705cfSriastradh				    tmp);
609303b705cfSriastradh
609403b705cfSriastradh	if (alu == GXclear)
609503b705cfSriastradh		color = 0;
609603b705cfSriastradh
609703b705cfSriastradh	tmp->base.op = color == 0 ? PictOpClear : PictOpSrc;
609803b705cfSriastradh	tmp->base.dst.pixmap = dst;
609903b705cfSriastradh	tmp->base.dst.width = dst->drawable.width;
610003b705cfSriastradh	tmp->base.dst.height = dst->drawable.height;
610103b705cfSriastradh	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
610203b705cfSriastradh	tmp->base.dst.bo = dst_bo;
610303b705cfSriastradh	tmp->base.floats_per_vertex = 2;
610403b705cfSriastradh	tmp->base.floats_per_rect = 6;
610503b705cfSriastradh	tmp->base.need_magic_ca_pass = 0;
610603b705cfSriastradh	tmp->base.has_component_alpha = 0;
610703b705cfSriastradh	tmp->base.rb_reversed = 0;
610803b705cfSriastradh
610903b705cfSriastradh	gen3_init_solid(&tmp->base.src,
611003b705cfSriastradh			sna_rgba_for_color(color, dst->drawable.depth));
611103b705cfSriastradh	tmp->base.mask.bo = NULL;
611203b705cfSriastradh	tmp->base.mask.u.gen3.type = SHADER_NONE;
611303b705cfSriastradh	tmp->base.u.gen3.num_constants = 0;
611403b705cfSriastradh
611503b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
611603b705cfSriastradh		kgem_submit(&sna->kgem);
611703b705cfSriastradh		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
611803b705cfSriastradh	}
611903b705cfSriastradh
612003b705cfSriastradh	tmp->blt   = gen3_render_fill_op_blt;
612103b705cfSriastradh	tmp->box   = gen3_render_fill_op_box;
612203b705cfSriastradh	tmp->boxes = gen3_render_fill_op_boxes;
612303b705cfSriastradh	tmp->done  = gen3_render_fill_op_done;
612403b705cfSriastradh
612503b705cfSriastradh	gen3_emit_composite_state(sna, &tmp->base);
612603b705cfSriastradh	gen3_align_vertex(sna, &tmp->base);
612703b705cfSriastradh	return true;
612803b705cfSriastradh}
612903b705cfSriastradh
613003b705cfSriastradhstatic bool
613103b705cfSriastradhgen3_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
613203b705cfSriastradh			     uint32_t color,
613303b705cfSriastradh			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
613403b705cfSriastradh			     uint8_t alu)
613503b705cfSriastradh{
613603b705cfSriastradh	BoxRec box;
613703b705cfSriastradh
613803b705cfSriastradh	box.x1 = x1;
613903b705cfSriastradh	box.y1 = y1;
614003b705cfSriastradh	box.x2 = x2;
614103b705cfSriastradh	box.y2 = y2;
614203b705cfSriastradh
614303b705cfSriastradh	return sna_blt_fill_boxes(sna, alu,
614403b705cfSriastradh				  bo, dst->drawable.bitsPerPixel,
614503b705cfSriastradh				  color, &box, 1);
614603b705cfSriastradh}
614703b705cfSriastradh
614803b705cfSriastradhstatic bool
614903b705cfSriastradhgen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
615003b705cfSriastradh		     uint32_t color,
615103b705cfSriastradh		     int16_t x1, int16_t y1,
615203b705cfSriastradh		     int16_t x2, int16_t y2,
615303b705cfSriastradh		     uint8_t alu)
615403b705cfSriastradh{
615503b705cfSriastradh	struct sna_composite_op tmp;
615603b705cfSriastradh
615703b705cfSriastradh#if NO_FILL_ONE
615803b705cfSriastradh	return gen3_render_fill_one_try_blt(sna, dst, bo, color,
615903b705cfSriastradh					    x1, y1, x2, y2, alu);
616003b705cfSriastradh#endif
616103b705cfSriastradh
616203b705cfSriastradh	/* Prefer to use the BLT if already engaged */
616303b705cfSriastradh	if (prefer_fill_blt(sna) &&
616403b705cfSriastradh	    gen3_render_fill_one_try_blt(sna, dst, bo, color,
616503b705cfSriastradh					 x1, y1, x2, y2, alu))
616603b705cfSriastradh		return true;
616703b705cfSriastradh
616803b705cfSriastradh	/* Must use the BLT if we can't RENDER... */
616903b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
617003b705cfSriastradh	    too_large(dst->drawable.width, dst->drawable.height) ||
617103b705cfSriastradh	    bo->pitch > MAX_3D_PITCH)
617203b705cfSriastradh		return gen3_render_fill_one_try_blt(sna, dst, bo, color,
617303b705cfSriastradh						    x1, y1, x2, y2, alu);
617403b705cfSriastradh
617503b705cfSriastradh	if (alu == GXclear)
617603b705cfSriastradh		color = 0;
617703b705cfSriastradh
617803b705cfSriastradh	tmp.op = color == 0 ? PictOpClear : PictOpSrc;
617903b705cfSriastradh	tmp.dst.pixmap = dst;
618003b705cfSriastradh	tmp.dst.width = dst->drawable.width;
618103b705cfSriastradh	tmp.dst.height = dst->drawable.height;
618203b705cfSriastradh	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
618303b705cfSriastradh	tmp.dst.bo = bo;
618403b705cfSriastradh	tmp.floats_per_vertex = 2;
618503b705cfSriastradh	tmp.floats_per_rect = 6;
618603b705cfSriastradh	tmp.need_magic_ca_pass = 0;
618703b705cfSriastradh	tmp.has_component_alpha = 0;
618803b705cfSriastradh	tmp.rb_reversed = 0;
618903b705cfSriastradh
619003b705cfSriastradh	gen3_init_solid(&tmp.src,
619103b705cfSriastradh			sna_rgba_for_color(color, dst->drawable.depth));
619203b705cfSriastradh	tmp.mask.bo = NULL;
619303b705cfSriastradh	tmp.mask.u.gen3.type = SHADER_NONE;
619403b705cfSriastradh	tmp.u.gen3.num_constants = 0;
619503b705cfSriastradh
619603b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
619703b705cfSriastradh		kgem_submit(&sna->kgem);
619803b705cfSriastradh		if (gen3_render_fill_one_try_blt(sna, dst, bo, color,
619903b705cfSriastradh						 x1, y1, x2, y2, alu))
620003b705cfSriastradh			return true;
620103b705cfSriastradh	}
620203b705cfSriastradh
620303b705cfSriastradh	gen3_emit_composite_state(sna, &tmp);
620403b705cfSriastradh	gen3_align_vertex(sna, &tmp);
620503b705cfSriastradh	gen3_get_rectangles(sna, &tmp, 1);
620603b705cfSriastradh	DBG(("	(%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color));
620703b705cfSriastradh	OUT_VERTEX(x2);
620803b705cfSriastradh	OUT_VERTEX(y2);
620903b705cfSriastradh	OUT_VERTEX(x1);
621003b705cfSriastradh	OUT_VERTEX(y2);
621103b705cfSriastradh	OUT_VERTEX(x1);
621203b705cfSriastradh	OUT_VERTEX(y1);
621303b705cfSriastradh	gen3_vertex_flush(sna);
621403b705cfSriastradh
621503b705cfSriastradh	return true;
621603b705cfSriastradh}
621703b705cfSriastradh
621803b705cfSriastradhstatic void gen3_render_flush(struct sna *sna)
621903b705cfSriastradh{
622003b705cfSriastradh	gen3_vertex_close(sna);
622103b705cfSriastradh
622203b705cfSriastradh	assert(sna->render.vertex_reloc[0] == 0);
622303b705cfSriastradh	assert(sna->render.vertex_offset == 0);
622403b705cfSriastradh}
622503b705cfSriastradh
622603b705cfSriastradhstatic void
622703b705cfSriastradhgen3_render_fini(struct sna *sna)
622803b705cfSriastradh{
622903b705cfSriastradh}
623003b705cfSriastradh
623103b705cfSriastradhconst char *gen3_render_init(struct sna *sna, const char *backend)
623203b705cfSriastradh{
623303b705cfSriastradh	struct sna_render *render = &sna->render;
623403b705cfSriastradh
623503b705cfSriastradh#if !NO_COMPOSITE
623603b705cfSriastradh	render->composite = gen3_render_composite;
623703b705cfSriastradh	render->prefer_gpu |= PREFER_GPU_RENDER;
623803b705cfSriastradh#endif
623903b705cfSriastradh#if !NO_COMPOSITE_SPANS
624003b705cfSriastradh	render->check_composite_spans = gen3_check_composite_spans;
624103b705cfSriastradh	render->composite_spans = gen3_render_composite_spans;
624203b705cfSriastradh	render->prefer_gpu |= PREFER_GPU_SPANS;
624303b705cfSriastradh#endif
624403b705cfSriastradh
624503b705cfSriastradh	render->video = gen3_render_video;
624603b705cfSriastradh
624703b705cfSriastradh	render->copy_boxes = gen3_render_copy_boxes;
624803b705cfSriastradh	render->copy = gen3_render_copy;
624903b705cfSriastradh
625003b705cfSriastradh	render->fill_boxes = gen3_render_fill_boxes;
625103b705cfSriastradh	render->fill = gen3_render_fill;
625203b705cfSriastradh	render->fill_one = gen3_render_fill_one;
625303b705cfSriastradh
625403b705cfSriastradh	render->reset = gen3_render_reset;
625503b705cfSriastradh	render->flush = gen3_render_flush;
625603b705cfSriastradh	render->fini = gen3_render_fini;
625703b705cfSriastradh
625803b705cfSriastradh	render->max_3d_size = MAX_3D_SIZE;
625903b705cfSriastradh	render->max_3d_pitch = MAX_3D_PITCH;
626003b705cfSriastradh
626103b705cfSriastradh	sna->kgem.retire = gen3_render_retire;
626203b705cfSriastradh	sna->kgem.expire = gen3_render_expire;
626303b705cfSriastradh	return "Alviso (gen3)";
626403b705cfSriastradh}
6265