103b705cfSriastradh/*
203b705cfSriastradh * Copyright © 2010-2011 Intel Corporation
303b705cfSriastradh *
403b705cfSriastradh * Permission is hereby granted, free of charge, to any person obtaining a
503b705cfSriastradh * copy of this software and associated documentation files (the "Software"),
603b705cfSriastradh * to deal in the Software without restriction, including without limitation
703b705cfSriastradh * the rights to use, copy, modify, merge, publish, distribute, sublicense,
803b705cfSriastradh * and/or sell copies of the Software, and to permit persons to whom the
903b705cfSriastradh * Software is furnished to do so, subject to the following conditions:
1003b705cfSriastradh *
1103b705cfSriastradh * The above copyright notice and this permission notice (including the next
1203b705cfSriastradh * paragraph) shall be included in all copies or substantial portions of the
1303b705cfSriastradh * Software.
1403b705cfSriastradh *
1503b705cfSriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1603b705cfSriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1703b705cfSriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1803b705cfSriastradh * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1903b705cfSriastradh * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2003b705cfSriastradh * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2103b705cfSriastradh * SOFTWARE.
2203b705cfSriastradh *
2303b705cfSriastradh * Authors:
2403b705cfSriastradh *    Chris Wilson <chris@chris-wilson.co.uk>
2503b705cfSriastradh *
2603b705cfSriastradh */
2703b705cfSriastradh
2803b705cfSriastradh#ifdef HAVE_CONFIG_H
2903b705cfSriastradh#include "config.h"
3003b705cfSriastradh#endif
3103b705cfSriastradh
3203b705cfSriastradh#include "sna.h"
3303b705cfSriastradh#include "sna_render.h"
3403b705cfSriastradh#include "sna_render_inline.h"
3503b705cfSriastradh#include "sna_reg.h"
3603b705cfSriastradh#include "sna_video.h"
3703b705cfSriastradh
3803b705cfSriastradh#include "gen3_render.h"
3903b705cfSriastradh
4003b705cfSriastradh#define NO_COMPOSITE 0
4103b705cfSriastradh#define NO_COMPOSITE_SPANS 0
4203b705cfSriastradh#define NO_COPY 0
4303b705cfSriastradh#define NO_COPY_BOXES 0
4403b705cfSriastradh#define NO_FILL 0
4503b705cfSriastradh#define NO_FILL_ONE 0
4603b705cfSriastradh#define NO_FILL_BOXES 0
4703b705cfSriastradh
4803b705cfSriastradh#define PREFER_BLT_FILL 1
4903b705cfSriastradh
5003b705cfSriastradhenum {
5103b705cfSriastradh	SHADER_NONE = 0,
5203b705cfSriastradh	SHADER_ZERO,
5303b705cfSriastradh	SHADER_BLACK,
5403b705cfSriastradh	SHADER_WHITE,
5503b705cfSriastradh	SHADER_CONSTANT,
5603b705cfSriastradh	SHADER_LINEAR,
5703b705cfSriastradh	SHADER_RADIAL,
5803b705cfSriastradh	SHADER_TEXTURE,
5903b705cfSriastradh	SHADER_OPACITY,
6003b705cfSriastradh};
6103b705cfSriastradh
6203b705cfSriastradh#define MAX_3D_SIZE 2048
6303b705cfSriastradh#define MAX_3D_PITCH 8192
6403b705cfSriastradh
6503b705cfSriastradh#define OUT_BATCH(v) batch_emit(sna, v)
6603b705cfSriastradh#define OUT_BATCH_F(v) batch_emit_float(sna, v)
6703b705cfSriastradh#define OUT_VERTEX(v) vertex_emit(sna, v)
6803b705cfSriastradh
6903b705cfSriastradhenum gen3_radial_mode {
7003b705cfSriastradh	RADIAL_ONE,
7103b705cfSriastradh	RADIAL_TWO
7203b705cfSriastradh};
7303b705cfSriastradh
7403b705cfSriastradhstatic const struct blendinfo {
7503b705cfSriastradh	bool dst_alpha;
7603b705cfSriastradh	bool src_alpha;
7703b705cfSriastradh	uint32_t src_blend;
7803b705cfSriastradh	uint32_t dst_blend;
7903b705cfSriastradh} gen3_blend_op[] = {
8003b705cfSriastradh	/* Clear */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
8103b705cfSriastradh	/* Src */	{0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
8203b705cfSriastradh	/* Dst */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
8303b705cfSriastradh	/* Over */	{0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
8403b705cfSriastradh	/* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
8503b705cfSriastradh	/* In */	{1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
8603b705cfSriastradh	/* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
8703b705cfSriastradh	/* Out */	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
8803b705cfSriastradh	/* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
8903b705cfSriastradh	/* Atop */	{1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
9003b705cfSriastradh	/* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
9103b705cfSriastradh	/* Xor */	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
9203b705cfSriastradh	/* Add */	{0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
9303b705cfSriastradh};
9403b705cfSriastradh
9503b705cfSriastradh#define S6_COLOR_WRITE_ONLY \
9603b705cfSriastradh	(S6_COLOR_WRITE_ENABLE | \
9703b705cfSriastradh	 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
9803b705cfSriastradh	 BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
9903b705cfSriastradh	 BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
10003b705cfSriastradh
10103b705cfSriastradhstatic const struct formatinfo {
10203b705cfSriastradh	unsigned int fmt, xfmt;
10303b705cfSriastradh	uint32_t card_fmt;
10403b705cfSriastradh	bool rb_reversed;
10503b705cfSriastradh} gen3_tex_formats[] = {
10603b705cfSriastradh	{PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
10703b705cfSriastradh	{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
10803b705cfSriastradh	{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
10903b705cfSriastradh	{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
11003b705cfSriastradh	{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
111fe8aea9eSmrg#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
11203b705cfSriastradh	{PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
11303b705cfSriastradh	{PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
11442542f5fSchristos#endif
11503b705cfSriastradh	{PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
11603b705cfSriastradh	{PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
11703b705cfSriastradh	{PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
11803b705cfSriastradh	{PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
11903b705cfSriastradh	{PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
12003b705cfSriastradh	{PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
12103b705cfSriastradh};
12203b705cfSriastradh
12303b705cfSriastradh#define xFixedToDouble(f) pixman_fixed_to_double(f)
12403b705cfSriastradh
12503b705cfSriastradhstatic inline bool too_large(int width, int height)
12603b705cfSriastradh{
12703b705cfSriastradh	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
12803b705cfSriastradh}
12903b705cfSriastradh
13003b705cfSriastradhstatic inline uint32_t gen3_buf_tiling(uint32_t tiling)
13103b705cfSriastradh{
13203b705cfSriastradh	uint32_t v = 0;
13303b705cfSriastradh	switch (tiling) {
13403b705cfSriastradh	case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
13503b705cfSriastradh	case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
13603b705cfSriastradh	case I915_TILING_NONE: break;
13703b705cfSriastradh	}
13803b705cfSriastradh	return v;
13903b705cfSriastradh}
14003b705cfSriastradh
14103b705cfSriastradhstatic inline bool
14203b705cfSriastradhgen3_check_pitch_3d(struct kgem_bo *bo)
14303b705cfSriastradh{
14403b705cfSriastradh	return bo->pitch <= MAX_3D_PITCH;
14503b705cfSriastradh}
14603b705cfSriastradh
14703b705cfSriastradhstatic uint32_t gen3_get_blend_cntl(int op,
14803b705cfSriastradh				    bool has_component_alpha,
14903b705cfSriastradh				    uint32_t dst_format)
15003b705cfSriastradh{
15103b705cfSriastradh	uint32_t sblend = gen3_blend_op[op].src_blend;
15203b705cfSriastradh	uint32_t dblend = gen3_blend_op[op].dst_blend;
15303b705cfSriastradh
15403b705cfSriastradh	if (op <= PictOpSrc) /* for clear and src disable blending */
15503b705cfSriastradh		return S6_COLOR_WRITE_ONLY;
15603b705cfSriastradh
15703b705cfSriastradh	/* If there's no dst alpha channel, adjust the blend op so that we'll
15803b705cfSriastradh	 * treat it as always 1.
15903b705cfSriastradh	 */
16003b705cfSriastradh	if (gen3_blend_op[op].dst_alpha) {
16103b705cfSriastradh		if (PICT_FORMAT_A(dst_format) == 0) {
16203b705cfSriastradh			if (sblend == BLENDFACT_DST_ALPHA)
16303b705cfSriastradh				sblend = BLENDFACT_ONE;
16403b705cfSriastradh			else if (sblend == BLENDFACT_INV_DST_ALPHA)
16503b705cfSriastradh				sblend = BLENDFACT_ZERO;
16603b705cfSriastradh		}
16703b705cfSriastradh
16803b705cfSriastradh		/* gen3 engine reads 8bit color buffer into green channel
16903b705cfSriastradh		 * in cases like color buffer blending etc., and also writes
17003b705cfSriastradh		 * back green channel.  So with dst_alpha blend we should use
17103b705cfSriastradh		 * color factor. See spec on "8-bit rendering".
17203b705cfSriastradh		 */
17303b705cfSriastradh		if (dst_format == PICT_a8) {
17403b705cfSriastradh			if (sblend == BLENDFACT_DST_ALPHA)
17503b705cfSriastradh				sblend = BLENDFACT_DST_COLR;
17603b705cfSriastradh			else if (sblend == BLENDFACT_INV_DST_ALPHA)
17703b705cfSriastradh				sblend = BLENDFACT_INV_DST_COLR;
17803b705cfSriastradh		}
17903b705cfSriastradh	}
18003b705cfSriastradh
18103b705cfSriastradh	/* If the source alpha is being used, then we should only be in a case
18203b705cfSriastradh	 * where the source blend factor is 0, and the source blend value is the
18303b705cfSriastradh	 * mask channels multiplied by the source picture's alpha.
18403b705cfSriastradh	 */
18503b705cfSriastradh	if (has_component_alpha && gen3_blend_op[op].src_alpha) {
18603b705cfSriastradh		if (dblend == BLENDFACT_SRC_ALPHA)
18703b705cfSriastradh			dblend = BLENDFACT_SRC_COLR;
18803b705cfSriastradh		else if (dblend == BLENDFACT_INV_SRC_ALPHA)
18903b705cfSriastradh			dblend = BLENDFACT_INV_SRC_COLR;
19003b705cfSriastradh	}
19103b705cfSriastradh
19203b705cfSriastradh	return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
19303b705cfSriastradh		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
19403b705cfSriastradh		sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
19503b705cfSriastradh		dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
19603b705cfSriastradh}
19703b705cfSriastradh
19803b705cfSriastradhstatic bool gen3_check_dst_format(uint32_t format)
19903b705cfSriastradh{
20003b705cfSriastradh	switch (format) {
20103b705cfSriastradh	case PICT_a8r8g8b8:
20203b705cfSriastradh	case PICT_x8r8g8b8:
20303b705cfSriastradh	case PICT_a8b8g8r8:
20403b705cfSriastradh	case PICT_x8b8g8r8:
20503b705cfSriastradh	case PICT_r5g6b5:
20603b705cfSriastradh	case PICT_b5g6r5:
20703b705cfSriastradh	case PICT_a1r5g5b5:
20803b705cfSriastradh	case PICT_x1r5g5b5:
20903b705cfSriastradh	case PICT_a1b5g5r5:
21003b705cfSriastradh	case PICT_x1b5g5r5:
211fe8aea9eSmrg#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
21203b705cfSriastradh	case PICT_a2r10g10b10:
21303b705cfSriastradh	case PICT_x2r10g10b10:
21403b705cfSriastradh	case PICT_a2b10g10r10:
21503b705cfSriastradh	case PICT_x2b10g10r10:
21642542f5fSchristos#endif
21703b705cfSriastradh	case PICT_a8:
21803b705cfSriastradh	case PICT_a4r4g4b4:
21903b705cfSriastradh	case PICT_x4r4g4b4:
22003b705cfSriastradh	case PICT_a4b4g4r4:
22103b705cfSriastradh	case PICT_x4b4g4r4:
22203b705cfSriastradh		return true;
22303b705cfSriastradh	default:
22403b705cfSriastradh		return false;
22503b705cfSriastradh	}
22603b705cfSriastradh}
22703b705cfSriastradh
22803b705cfSriastradhstatic bool gen3_dst_rb_reversed(uint32_t format)
22903b705cfSriastradh{
23003b705cfSriastradh	switch (format) {
23103b705cfSriastradh	case PICT_a8r8g8b8:
23203b705cfSriastradh	case PICT_x8r8g8b8:
23303b705cfSriastradh	case PICT_r5g6b5:
23403b705cfSriastradh	case PICT_a1r5g5b5:
23503b705cfSriastradh	case PICT_x1r5g5b5:
236fe8aea9eSmrg#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
23703b705cfSriastradh	case PICT_a2r10g10b10:
23803b705cfSriastradh	case PICT_x2r10g10b10:
23942542f5fSchristos#endif
24003b705cfSriastradh	case PICT_a8:
24103b705cfSriastradh	case PICT_a4r4g4b4:
24203b705cfSriastradh	case PICT_x4r4g4b4:
24303b705cfSriastradh		return false;
24403b705cfSriastradh	default:
24503b705cfSriastradh		return true;
24603b705cfSriastradh	}
24703b705cfSriastradh}
24803b705cfSriastradh
24903b705cfSriastradh#define DSTORG_HORT_BIAS(x)             ((x)<<20)
25003b705cfSriastradh#define DSTORG_VERT_BIAS(x)             ((x)<<16)
25103b705cfSriastradh
25203b705cfSriastradhstatic uint32_t gen3_get_dst_format(uint32_t format)
25303b705cfSriastradh{
25403b705cfSriastradh#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
25503b705cfSriastradh	switch (format) {
25603b705cfSriastradh	default:
25703b705cfSriastradh	case PICT_a8r8g8b8:
25803b705cfSriastradh	case PICT_x8r8g8b8:
25903b705cfSriastradh	case PICT_a8b8g8r8:
26003b705cfSriastradh	case PICT_x8b8g8r8:
26103b705cfSriastradh		return BIAS | COLR_BUF_ARGB8888;
26203b705cfSriastradh	case PICT_r5g6b5:
26303b705cfSriastradh	case PICT_b5g6r5:
26403b705cfSriastradh		return BIAS | COLR_BUF_RGB565;
26503b705cfSriastradh	case PICT_a1r5g5b5:
26603b705cfSriastradh	case PICT_x1r5g5b5:
26703b705cfSriastradh	case PICT_a1b5g5r5:
26803b705cfSriastradh	case PICT_x1b5g5r5:
26903b705cfSriastradh		return BIAS | COLR_BUF_ARGB1555;
270fe8aea9eSmrg#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
27103b705cfSriastradh	case PICT_a2r10g10b10:
27203b705cfSriastradh	case PICT_x2r10g10b10:
27303b705cfSriastradh	case PICT_a2b10g10r10:
27403b705cfSriastradh	case PICT_x2b10g10r10:
27503b705cfSriastradh		return BIAS | COLR_BUF_ARGB2AAA;
27642542f5fSchristos#endif
27703b705cfSriastradh	case PICT_a8:
27803b705cfSriastradh		return BIAS | COLR_BUF_8BIT;
27903b705cfSriastradh	case PICT_a4r4g4b4:
28003b705cfSriastradh	case PICT_x4r4g4b4:
28103b705cfSriastradh	case PICT_a4b4g4r4:
28203b705cfSriastradh	case PICT_x4b4g4r4:
28303b705cfSriastradh		return BIAS | COLR_BUF_ARGB4444;
28403b705cfSriastradh	}
28503b705cfSriastradh#undef BIAS
28603b705cfSriastradh}
28703b705cfSriastradh
28803b705cfSriastradhstatic bool gen3_check_format(PicturePtr p)
28903b705cfSriastradh{
29003b705cfSriastradh	switch (p->format) {
29103b705cfSriastradh	case PICT_a8:
29203b705cfSriastradh	case PICT_a8r8g8b8:
29303b705cfSriastradh	case PICT_x8r8g8b8:
29403b705cfSriastradh	case PICT_a8b8g8r8:
29503b705cfSriastradh	case PICT_x8b8g8r8:
29642542f5fSchristos#ifdef PICT_a2r10g10b10
29703b705cfSriastradh	case PICT_a2r10g10b10:
29803b705cfSriastradh	case PICT_a2b10g10r10:
29942542f5fSchristos#endif
30003b705cfSriastradh	case PICT_r5g6b5:
30103b705cfSriastradh	case PICT_b5g6r5:
30203b705cfSriastradh	case PICT_a1r5g5b5:
30303b705cfSriastradh	case PICT_a1b5g5r5:
30403b705cfSriastradh	case PICT_a4r4g4b4:
30503b705cfSriastradh	case PICT_a4b4g4r4:
30603b705cfSriastradh		return true;
30703b705cfSriastradh	default:
30803b705cfSriastradh		return false;
30903b705cfSriastradh	}
31003b705cfSriastradh}
31103b705cfSriastradh
31203b705cfSriastradhstatic bool gen3_check_xformat(PicturePtr p)
31303b705cfSriastradh{
31403b705cfSriastradh	switch (p->format) {
31503b705cfSriastradh	case PICT_a8r8g8b8:
31603b705cfSriastradh	case PICT_x8r8g8b8:
31703b705cfSriastradh	case PICT_a8b8g8r8:
31803b705cfSriastradh	case PICT_x8b8g8r8:
31903b705cfSriastradh	case PICT_r5g6b5:
32003b705cfSriastradh	case PICT_b5g6r5:
32103b705cfSriastradh	case PICT_a1r5g5b5:
32203b705cfSriastradh	case PICT_x1r5g5b5:
32303b705cfSriastradh	case PICT_a1b5g5r5:
32403b705cfSriastradh	case PICT_x1b5g5r5:
325fe8aea9eSmrg#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
32603b705cfSriastradh	case PICT_a2r10g10b10:
32703b705cfSriastradh	case PICT_x2r10g10b10:
32803b705cfSriastradh	case PICT_a2b10g10r10:
32903b705cfSriastradh	case PICT_x2b10g10r10:
33042542f5fSchristos#endif
33103b705cfSriastradh	case PICT_a8:
33203b705cfSriastradh	case PICT_a4r4g4b4:
33303b705cfSriastradh	case PICT_x4r4g4b4:
33403b705cfSriastradh	case PICT_a4b4g4r4:
33503b705cfSriastradh	case PICT_x4b4g4r4:
33603b705cfSriastradh		return true;
33703b705cfSriastradh	default:
33803b705cfSriastradh		return false;
33903b705cfSriastradh	}
34003b705cfSriastradh}
34103b705cfSriastradh
34203b705cfSriastradhstatic uint32_t gen3_texture_repeat(uint32_t repeat)
34303b705cfSriastradh{
34403b705cfSriastradh#define REPEAT(x) \
34503b705cfSriastradh	(SS3_NORMALIZED_COORDS | \
34603b705cfSriastradh	 TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
34703b705cfSriastradh	 TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT)
34803b705cfSriastradh	switch (repeat) {
34903b705cfSriastradh	default:
35003b705cfSriastradh	case RepeatNone:
35103b705cfSriastradh		return REPEAT(CLAMP_BORDER);
35203b705cfSriastradh	case RepeatNormal:
35303b705cfSriastradh		return REPEAT(WRAP);
35403b705cfSriastradh	case RepeatPad:
35503b705cfSriastradh		return REPEAT(CLAMP_EDGE);
35603b705cfSriastradh	case RepeatReflect:
35703b705cfSriastradh		return REPEAT(MIRROR);
35803b705cfSriastradh	}
35903b705cfSriastradh#undef REPEAT
36003b705cfSriastradh}
36103b705cfSriastradh
36203b705cfSriastradhstatic uint32_t gen3_gradient_repeat(uint32_t repeat)
36303b705cfSriastradh{
36403b705cfSriastradh#define REPEAT(x) \
36503b705cfSriastradh	(SS3_NORMALIZED_COORDS | \
36603b705cfSriastradh	 TEXCOORDMODE_##x  << SS3_TCX_ADDR_MODE_SHIFT | \
36703b705cfSriastradh	 TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT)
36803b705cfSriastradh	switch (repeat) {
36903b705cfSriastradh	default:
37003b705cfSriastradh	case RepeatNone:
37103b705cfSriastradh		return REPEAT(CLAMP_BORDER);
37203b705cfSriastradh	case RepeatNormal:
37303b705cfSriastradh		return REPEAT(WRAP);
37403b705cfSriastradh	case RepeatPad:
37503b705cfSriastradh		return REPEAT(CLAMP_EDGE);
37603b705cfSriastradh	case RepeatReflect:
37703b705cfSriastradh		return REPEAT(MIRROR);
37803b705cfSriastradh	}
37903b705cfSriastradh#undef REPEAT
38003b705cfSriastradh}
38103b705cfSriastradh
38203b705cfSriastradhstatic bool gen3_check_repeat(PicturePtr p)
38303b705cfSriastradh{
38403b705cfSriastradh	if (!p->repeat)
38503b705cfSriastradh		return true;
38603b705cfSriastradh
38703b705cfSriastradh	switch (p->repeatType) {
38803b705cfSriastradh	case RepeatNone:
38903b705cfSriastradh	case RepeatNormal:
39003b705cfSriastradh	case RepeatPad:
39103b705cfSriastradh	case RepeatReflect:
39203b705cfSriastradh		return true;
39303b705cfSriastradh	default:
39403b705cfSriastradh		return false;
39503b705cfSriastradh	}
39603b705cfSriastradh}
39703b705cfSriastradh
39803b705cfSriastradhstatic uint32_t gen3_filter(uint32_t filter)
39903b705cfSriastradh{
40003b705cfSriastradh	switch (filter) {
40103b705cfSriastradh	default:
40203b705cfSriastradh		assert(0);
40303b705cfSriastradh	case PictFilterNearest:
40403b705cfSriastradh		return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
40503b705cfSriastradh			FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
40603b705cfSriastradh			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
40703b705cfSriastradh	case PictFilterBilinear:
40803b705cfSriastradh		return (FILTER_LINEAR  << SS2_MAG_FILTER_SHIFT |
40903b705cfSriastradh			FILTER_LINEAR  << SS2_MIN_FILTER_SHIFT |
41003b705cfSriastradh			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
41103b705cfSriastradh	}
41203b705cfSriastradh}
41303b705cfSriastradh
41403b705cfSriastradhstatic bool gen3_check_filter(PicturePtr p)
41503b705cfSriastradh{
41603b705cfSriastradh	switch (p->filter) {
41703b705cfSriastradh	case PictFilterNearest:
41803b705cfSriastradh	case PictFilterBilinear:
41903b705cfSriastradh		return true;
42003b705cfSriastradh	default:
42103b705cfSriastradh		return false;
42203b705cfSriastradh	}
42303b705cfSriastradh}
42403b705cfSriastradh
42503b705cfSriastradhstatic inline void
42603b705cfSriastradhgen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY)
42703b705cfSriastradh{
42803b705cfSriastradh	OUT_VERTEX(dstX);
42903b705cfSriastradh	OUT_VERTEX(dstY);
43003b705cfSriastradh}
43103b705cfSriastradh
43203b705cfSriastradhfastcall static void
43303b705cfSriastradhgen3_emit_composite_primitive_constant(struct sna *sna,
43403b705cfSriastradh				       const struct sna_composite_op *op,
43503b705cfSriastradh				       const struct sna_composite_rectangles *r)
43603b705cfSriastradh{
43703b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
43803b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
43903b705cfSriastradh
44003b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
44103b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
44203b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
44303b705cfSriastradh}
44403b705cfSriastradh
44503b705cfSriastradhfastcall static void
44603b705cfSriastradhgen3_emit_composite_boxes_constant(const struct sna_composite_op *op,
44703b705cfSriastradh				   const BoxRec *box, int nbox,
44803b705cfSriastradh				   float *v)
44903b705cfSriastradh{
45003b705cfSriastradh	do {
451fe8aea9eSmrg		v[0] = box->x2 + op->dst.x;
452fe8aea9eSmrg		v[1] = box->y2 + op->dst.y;
45303b705cfSriastradh
454fe8aea9eSmrg		v[2] = box->x1 + op->dst.x;
455fe8aea9eSmrg		v[3] = box->y2 + op->dst.y;
45603b705cfSriastradh
457fe8aea9eSmrg		v[4] = box->x1 + op->dst.x;
458fe8aea9eSmrg		v[5] = box->y1 + op->dst.y;
45903b705cfSriastradh
46003b705cfSriastradh		box++;
46103b705cfSriastradh		v += 6;
46203b705cfSriastradh	} while (--nbox);
46303b705cfSriastradh}
46403b705cfSriastradh
46503b705cfSriastradhfastcall static void
46603b705cfSriastradhgen3_emit_composite_primitive_identity_gradient(struct sna *sna,
46703b705cfSriastradh						const struct sna_composite_op *op,
46803b705cfSriastradh						const struct sna_composite_rectangles *r)
46903b705cfSriastradh{
47003b705cfSriastradh	int16_t dst_x, dst_y;
47103b705cfSriastradh	int16_t src_x, src_y;
47203b705cfSriastradh
47303b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
47403b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
47503b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
47603b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
47703b705cfSriastradh
47803b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
47903b705cfSriastradh	OUT_VERTEX(src_x + r->width);
48003b705cfSriastradh	OUT_VERTEX(src_y + r->height);
48103b705cfSriastradh
48203b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
48303b705cfSriastradh	OUT_VERTEX(src_x);
48403b705cfSriastradh	OUT_VERTEX(src_y + r->height);
48503b705cfSriastradh
48603b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
48703b705cfSriastradh	OUT_VERTEX(src_x);
48803b705cfSriastradh	OUT_VERTEX(src_y);
48903b705cfSriastradh}
49003b705cfSriastradh
49103b705cfSriastradhfastcall static void
49203b705cfSriastradhgen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
49303b705cfSriastradh					    const BoxRec *box, int nbox,
49403b705cfSriastradh					    float *v)
49503b705cfSriastradh{
49603b705cfSriastradh	do {
497fe8aea9eSmrg		v[0] = box->x2 + op->dst.x;
498fe8aea9eSmrg		v[1] = box->y2 + op->dst.y;
49903b705cfSriastradh		v[2] = box->x2 + op->src.offset[0];
50003b705cfSriastradh		v[3] = box->y2 + op->src.offset[1];
50103b705cfSriastradh
502fe8aea9eSmrg		v[4] = box->x1 + op->dst.x;
503fe8aea9eSmrg		v[5] = box->y2 + op->dst.y;
50403b705cfSriastradh		v[6] = box->x1 + op->src.offset[0];
50503b705cfSriastradh		v[7] = box->y2 + op->src.offset[1];
50603b705cfSriastradh
507fe8aea9eSmrg		v[8] = box->x1 + op->dst.x;
508fe8aea9eSmrg		v[9] = box->y1 + op->dst.y;
50903b705cfSriastradh		v[10] = box->x1 + op->src.offset[0];
51003b705cfSriastradh		v[11] = box->y1 + op->src.offset[1];
51103b705cfSriastradh
51203b705cfSriastradh		v += 12;
51303b705cfSriastradh		box++;
51403b705cfSriastradh	} while (--nbox);
51503b705cfSriastradh}
51603b705cfSriastradh
51703b705cfSriastradhfastcall static void
51803b705cfSriastradhgen3_emit_composite_primitive_affine_gradient(struct sna *sna,
51903b705cfSriastradh					      const struct sna_composite_op *op,
52003b705cfSriastradh					      const struct sna_composite_rectangles *r)
52103b705cfSriastradh{
52203b705cfSriastradh	PictTransform *transform = op->src.transform;
52303b705cfSriastradh	int16_t dst_x, dst_y;
52403b705cfSriastradh	int16_t src_x, src_y;
52503b705cfSriastradh	float *v;
52603b705cfSriastradh
52703b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
52803b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
52903b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
53003b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
53103b705cfSriastradh
53203b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
53303b705cfSriastradh	sna->render.vertex_used += 12;
534fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
53503b705cfSriastradh
53603b705cfSriastradh	v[0] = dst_x + r->width;
53703b705cfSriastradh	v[1] = dst_y + r->height;
53803b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
53903b705cfSriastradh				    transform, op->src.scale,
54003b705cfSriastradh				    &v[2], &v[3]);
54103b705cfSriastradh
54203b705cfSriastradh	v[4] = dst_x;
54303b705cfSriastradh	v[5] = dst_y + r->height;
54403b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
54503b705cfSriastradh				    transform, op->src.scale,
54603b705cfSriastradh				    &v[6], &v[7]);
54703b705cfSriastradh
54803b705cfSriastradh	v[8] = dst_x;
54903b705cfSriastradh	v[9] = dst_y;
55003b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
55103b705cfSriastradh				    transform, op->src.scale,
55203b705cfSriastradh				    &v[10], &v[11]);
55303b705cfSriastradh}
55403b705cfSriastradh
55503b705cfSriastradhfastcall static void
55603b705cfSriastradhgen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
55703b705cfSriastradh					  const BoxRec *box, int nbox,
55803b705cfSriastradh					  float *v)
55903b705cfSriastradh{
56003b705cfSriastradh	const PictTransform *transform = op->src.transform;
56103b705cfSriastradh
56203b705cfSriastradh	do {
563fe8aea9eSmrg		v[0] = box->x2 + op->dst.x;
564fe8aea9eSmrg		v[1] = box->y2 + op->dst.y;
56503b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
56603b705cfSriastradh					    box->y2 + op->src.offset[1],
56703b705cfSriastradh					    transform, op->src.scale,
56803b705cfSriastradh					    &v[2], &v[3]);
56903b705cfSriastradh
570fe8aea9eSmrg		v[4] = box->x1 + op->dst.x;
571fe8aea9eSmrg		v[5] = box->y2 + op->dst.y;
57203b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
57303b705cfSriastradh					    box->y2 + op->src.offset[1],
57403b705cfSriastradh					    transform, op->src.scale,
57503b705cfSriastradh					    &v[6], &v[7]);
57603b705cfSriastradh
577fe8aea9eSmrg		v[8] = box->x1 + op->dst.x;
578fe8aea9eSmrg		v[9] = box->y1 + op->dst.y;
57903b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
58003b705cfSriastradh					    box->y1 + op->src.offset[1],
58103b705cfSriastradh					    transform, op->src.scale,
58203b705cfSriastradh					    &v[10], &v[11]);
58303b705cfSriastradh
58403b705cfSriastradh		box++;
58503b705cfSriastradh		v += 12;
58603b705cfSriastradh	} while (--nbox);
58703b705cfSriastradh}
58803b705cfSriastradh
58903b705cfSriastradhfastcall static void
59003b705cfSriastradhgen3_emit_composite_primitive_identity_source(struct sna *sna,
59103b705cfSriastradh					      const struct sna_composite_op *op,
59203b705cfSriastradh					      const struct sna_composite_rectangles *r)
59303b705cfSriastradh{
59403b705cfSriastradh	float w = r->width;
59503b705cfSriastradh	float h = r->height;
59603b705cfSriastradh	float *v;
59703b705cfSriastradh
59803b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
59903b705cfSriastradh	sna->render.vertex_used += 12;
600fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
60103b705cfSriastradh
60203b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
60303b705cfSriastradh	v[0] = v[4] + w;
60403b705cfSriastradh
60503b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
60603b705cfSriastradh	v[5] = v[1] = v[9] + h;
60703b705cfSriastradh
60803b705cfSriastradh	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
60903b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
61003b705cfSriastradh
61103b705cfSriastradh	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
61203b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
61303b705cfSriastradh}
61403b705cfSriastradh
61503b705cfSriastradhfastcall static void
61603b705cfSriastradhgen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
61703b705cfSriastradh					  const BoxRec *box, int nbox,
61803b705cfSriastradh					  float *v)
61903b705cfSriastradh{
62003b705cfSriastradh	do {
62103b705cfSriastradh		v[0] = box->x2 + op->dst.x;
62203b705cfSriastradh		v[8] = v[4] = box->x1 + op->dst.x;
62303b705cfSriastradh		v[5] = v[1] = box->y2 + op->dst.y;
62403b705cfSriastradh		v[9] = box->y1 + op->dst.y;
62503b705cfSriastradh
62603b705cfSriastradh		v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
62703b705cfSriastradh		v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
62803b705cfSriastradh
62903b705cfSriastradh		v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
63003b705cfSriastradh		v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
63103b705cfSriastradh
63203b705cfSriastradh		v += 12;
63303b705cfSriastradh		box++;
63403b705cfSriastradh	} while (--nbox);
63503b705cfSriastradh}
63603b705cfSriastradh
63703b705cfSriastradhfastcall static void
63803b705cfSriastradhgen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
63903b705cfSriastradh							const struct sna_composite_op *op,
64003b705cfSriastradh							const struct sna_composite_rectangles *r)
64103b705cfSriastradh{
64203b705cfSriastradh	float w = r->width;
64303b705cfSriastradh	float h = r->height;
64403b705cfSriastradh	float *v;
64503b705cfSriastradh
64603b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
64703b705cfSriastradh	sna->render.vertex_used += 12;
648fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
64903b705cfSriastradh
65003b705cfSriastradh	v[8] = v[4] = r->dst.x;
65103b705cfSriastradh	v[9] = r->dst.y;
65203b705cfSriastradh
65303b705cfSriastradh	v[0] = v[4] + w;
65403b705cfSriastradh	v[5] = v[1] = v[9] + h;
65503b705cfSriastradh
65603b705cfSriastradh	v[10] = v[6] = r->src.x * op->src.scale[0];
65703b705cfSriastradh	v[11] = r->src.y * op->src.scale[1];
65803b705cfSriastradh
65903b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
66003b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
66103b705cfSriastradh}
66203b705cfSriastradh
66303b705cfSriastradhfastcall static void
66403b705cfSriastradhgen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op,
66503b705cfSriastradh						    const BoxRec *box, int nbox,
66603b705cfSriastradh						    float *v)
66703b705cfSriastradh{
66803b705cfSriastradh	do {
66903b705cfSriastradh		v[0] = box->x2;
67003b705cfSriastradh		v[8] = v[4] = box->x1;
67103b705cfSriastradh		v[5] = v[1] = box->y2;
67203b705cfSriastradh		v[9] = box->y1;
67303b705cfSriastradh
67403b705cfSriastradh		v[10] = v[6] = box->x1 * op->src.scale[0];
67503b705cfSriastradh		v[2] = box->x2 * op->src.scale[0];
67603b705cfSriastradh
67703b705cfSriastradh		v[11] = box->y1 * op->src.scale[1];
67803b705cfSriastradh		v[7] = v[3] = box->y2 * op->src.scale[1];
67903b705cfSriastradh
68003b705cfSriastradh		v += 12;
68103b705cfSriastradh		box++;
68203b705cfSriastradh	} while (--nbox);
68303b705cfSriastradh}
68403b705cfSriastradh
68503b705cfSriastradhfastcall static void
68603b705cfSriastradhgen3_emit_composite_primitive_affine_source(struct sna *sna,
68703b705cfSriastradh					    const struct sna_composite_op *op,
68803b705cfSriastradh					    const struct sna_composite_rectangles *r)
68903b705cfSriastradh{
69003b705cfSriastradh	PictTransform *transform = op->src.transform;
69103b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
69203b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
69303b705cfSriastradh	int src_x = r->src.x + (int)op->src.offset[0];
69403b705cfSriastradh	int src_y = r->src.y + (int)op->src.offset[1];
69503b705cfSriastradh	float *v;
69603b705cfSriastradh
69703b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
69803b705cfSriastradh	sna->render.vertex_used += 12;
699fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
70003b705cfSriastradh
70103b705cfSriastradh	v[0] = dst_x + r->width;
70203b705cfSriastradh	v[5] = v[1] = dst_y + r->height;
70303b705cfSriastradh	v[8] = v[4] = dst_x;
70403b705cfSriastradh	v[9] = dst_y;
70503b705cfSriastradh
70603b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
70703b705cfSriastradh				    transform, op->src.scale,
70803b705cfSriastradh				    &v[2], &v[3]);
70903b705cfSriastradh
71003b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
71103b705cfSriastradh				    transform, op->src.scale,
71203b705cfSriastradh				    &v[6], &v[7]);
71303b705cfSriastradh
71403b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
71503b705cfSriastradh				    transform, op->src.scale,
71603b705cfSriastradh				    &v[10], &v[11]);
71703b705cfSriastradh}
71803b705cfSriastradh
71903b705cfSriastradhfastcall static void
72003b705cfSriastradhgen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op,
72103b705cfSriastradh					const BoxRec *box, int nbox,
72203b705cfSriastradh					float *v)
72303b705cfSriastradh{
72403b705cfSriastradh	const PictTransform *transform = op->src.transform;
72503b705cfSriastradh
72603b705cfSriastradh	do {
727fe8aea9eSmrg		v[0] = box->x2 + op->dst.x;
728fe8aea9eSmrg		v[5] = v[1] = box->y2 + op->dst.y;
729fe8aea9eSmrg		v[8] = v[4] = box->x1 + op->dst.x;
730fe8aea9eSmrg		v[9] = box->y1 + op->dst.y;
73103b705cfSriastradh
73203b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
73303b705cfSriastradh					    box->y2 + op->src.offset[1],
73403b705cfSriastradh					    transform, op->src.scale,
73503b705cfSriastradh					    &v[2], &v[3]);
73603b705cfSriastradh
73703b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
73803b705cfSriastradh					    box->y2 + op->src.offset[1],
73903b705cfSriastradh					    transform, op->src.scale,
74003b705cfSriastradh					    &v[6], &v[7]);
74103b705cfSriastradh
74203b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
74303b705cfSriastradh					    box->y1 + op->src.offset[1],
74403b705cfSriastradh					    transform, op->src.scale,
74503b705cfSriastradh					    &v[10], &v[11]);
74603b705cfSriastradh
74703b705cfSriastradh		v += 12;
74803b705cfSriastradh		box++;
74903b705cfSriastradh	} while (--nbox);
75003b705cfSriastradh}
75103b705cfSriastradh
75203b705cfSriastradhfastcall static void
75303b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
75403b705cfSriastradh						     const struct sna_composite_op *op,
75503b705cfSriastradh						     const struct sna_composite_rectangles *r)
75603b705cfSriastradh{
75703b705cfSriastradh	float w = r->width;
75803b705cfSriastradh	float h = r->height;
75903b705cfSriastradh	float *v;
76003b705cfSriastradh
76103b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
76203b705cfSriastradh	sna->render.vertex_used += 12;
763fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
76403b705cfSriastradh
76503b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
76603b705cfSriastradh	v[0] = v[4] + w;
76703b705cfSriastradh
76803b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
76903b705cfSriastradh	v[5] = v[1] = v[9] + h;
77003b705cfSriastradh
77103b705cfSriastradh	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
77203b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
77303b705cfSriastradh
77403b705cfSriastradh	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
77503b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
77603b705cfSriastradh}
77703b705cfSriastradh
77803b705cfSriastradhfastcall static void
77903b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna,
78003b705cfSriastradh							       const struct sna_composite_op *op,
78103b705cfSriastradh							       const struct sna_composite_rectangles *r)
78203b705cfSriastradh{
78303b705cfSriastradh	float w = r->width;
78403b705cfSriastradh	float h = r->height;
78503b705cfSriastradh	float *v;
78603b705cfSriastradh
78703b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
78803b705cfSriastradh	sna->render.vertex_used += 12;
789fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
79003b705cfSriastradh
79103b705cfSriastradh	v[8] = v[4] = r->dst.x;
79203b705cfSriastradh	v[9] = r->dst.y;
79303b705cfSriastradh
79403b705cfSriastradh	v[0] = v[4] + w;
79503b705cfSriastradh	v[5] = v[1] = v[9] + h;
79603b705cfSriastradh
79703b705cfSriastradh	v[10] = v[6] = r->mask.x * op->mask.scale[0];
79803b705cfSriastradh	v[11] = r->mask.y * op->mask.scale[1];
79903b705cfSriastradh
80003b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
80103b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
80203b705cfSriastradh}
80303b705cfSriastradh
80403b705cfSriastradhfastcall static void
80503b705cfSriastradhgen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
80603b705cfSriastradh						   const struct sna_composite_op *op,
80703b705cfSriastradh						   const struct sna_composite_rectangles *r)
80803b705cfSriastradh{
80903b705cfSriastradh	float dst_x, dst_y;
81003b705cfSriastradh	float src_x, src_y;
81103b705cfSriastradh	float msk_x, msk_y;
81203b705cfSriastradh	float w, h;
81303b705cfSriastradh	float *v;
81403b705cfSriastradh
81503b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
81603b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
81703b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
81803b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
81903b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
82003b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
82103b705cfSriastradh	w = r->width;
82203b705cfSriastradh	h = r->height;
82303b705cfSriastradh
82403b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
82503b705cfSriastradh	sna->render.vertex_used += 18;
826fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
82703b705cfSriastradh
82803b705cfSriastradh	v[0] = dst_x + w;
82903b705cfSriastradh	v[1] = dst_y + h;
83003b705cfSriastradh	v[2] = (src_x + w) * op->src.scale[0];
83103b705cfSriastradh	v[3] = (src_y + h) * op->src.scale[1];
83203b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
83303b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
83403b705cfSriastradh
83503b705cfSriastradh	v[6] = dst_x;
83603b705cfSriastradh	v[7] = v[1];
83703b705cfSriastradh	v[8] = src_x * op->src.scale[0];
83803b705cfSriastradh	v[9] = v[3];
83903b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
84003b705cfSriastradh	v[11] =v[5];
84103b705cfSriastradh
84203b705cfSriastradh	v[12] = v[6];
84303b705cfSriastradh	v[13] = dst_y;
84403b705cfSriastradh	v[14] = v[8];
84503b705cfSriastradh	v[15] = src_y * op->src.scale[1];
84603b705cfSriastradh	v[16] = v[10];
84703b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
84803b705cfSriastradh}
84903b705cfSriastradh
85003b705cfSriastradhfastcall static void
85103b705cfSriastradhgen3_emit_composite_primitive_affine_source_mask(struct sna *sna,
85203b705cfSriastradh						 const struct sna_composite_op *op,
85303b705cfSriastradh						 const struct sna_composite_rectangles *r)
85403b705cfSriastradh{
85503b705cfSriastradh	int16_t src_x, src_y;
85603b705cfSriastradh	float dst_x, dst_y;
85703b705cfSriastradh	float msk_x, msk_y;
85803b705cfSriastradh	float w, h;
85903b705cfSriastradh	float *v;
86003b705cfSriastradh
86103b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
86203b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
86303b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
86403b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
86503b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
86603b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
86703b705cfSriastradh	w = r->width;
86803b705cfSriastradh	h = r->height;
86903b705cfSriastradh
87003b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
87103b705cfSriastradh	sna->render.vertex_used += 18;
872fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
87303b705cfSriastradh
87403b705cfSriastradh	v[0] = dst_x + w;
87503b705cfSriastradh	v[1] = dst_y + h;
87603b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
87703b705cfSriastradh				    op->src.transform, op->src.scale,
87803b705cfSriastradh				    &v[2], &v[3]);
87903b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
88003b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
88103b705cfSriastradh
88203b705cfSriastradh	v[6] = dst_x;
88303b705cfSriastradh	v[7] = v[1];
88403b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
88503b705cfSriastradh				    op->src.transform, op->src.scale,
88603b705cfSriastradh				    &v[8], &v[9]);
88703b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
88803b705cfSriastradh	v[11] =v[5];
88903b705cfSriastradh
89003b705cfSriastradh	v[12] = v[6];
89103b705cfSriastradh	v[13] = dst_y;
89203b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
89303b705cfSriastradh				    op->src.transform, op->src.scale,
89403b705cfSriastradh				    &v[14], &v[15]);
89503b705cfSriastradh	v[16] = v[10];
89603b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
89703b705cfSriastradh}
89803b705cfSriastradh
89903b705cfSriastradhstatic void
90003b705cfSriastradhgen3_emit_composite_texcoord(struct sna *sna,
90103b705cfSriastradh			     const struct sna_composite_channel *channel,
90203b705cfSriastradh			     int16_t x, int16_t y)
90303b705cfSriastradh{
90403b705cfSriastradh	float s = 0, t = 0, w = 1;
90503b705cfSriastradh
90603b705cfSriastradh	switch (channel->u.gen3.type) {
90703b705cfSriastradh	case SHADER_OPACITY:
90803b705cfSriastradh	case SHADER_NONE:
90903b705cfSriastradh	case SHADER_ZERO:
91003b705cfSriastradh	case SHADER_BLACK:
91103b705cfSriastradh	case SHADER_WHITE:
91203b705cfSriastradh	case SHADER_CONSTANT:
91303b705cfSriastradh		break;
91403b705cfSriastradh
91503b705cfSriastradh	case SHADER_LINEAR:
91603b705cfSriastradh	case SHADER_RADIAL:
91703b705cfSriastradh	case SHADER_TEXTURE:
91803b705cfSriastradh		x += channel->offset[0];
91903b705cfSriastradh		y += channel->offset[1];
92003b705cfSriastradh		if (channel->is_affine) {
92103b705cfSriastradh			sna_get_transformed_coordinates(x, y,
92203b705cfSriastradh							channel->transform,
92303b705cfSriastradh							&s, &t);
92403b705cfSriastradh			OUT_VERTEX(s * channel->scale[0]);
92503b705cfSriastradh			OUT_VERTEX(t * channel->scale[1]);
92603b705cfSriastradh		} else {
92703b705cfSriastradh			sna_get_transformed_coordinates_3d(x, y,
92803b705cfSriastradh							   channel->transform,
92903b705cfSriastradh							   &s, &t, &w);
93003b705cfSriastradh			OUT_VERTEX(s * channel->scale[0]);
93103b705cfSriastradh			OUT_VERTEX(t * channel->scale[1]);
93203b705cfSriastradh			OUT_VERTEX(0);
93303b705cfSriastradh			OUT_VERTEX(w);
93403b705cfSriastradh		}
93503b705cfSriastradh		break;
93603b705cfSriastradh	}
93703b705cfSriastradh}
93803b705cfSriastradh
93903b705cfSriastradhstatic void
94003b705cfSriastradhgen3_emit_composite_vertex(struct sna *sna,
94103b705cfSriastradh			   const struct sna_composite_op *op,
94203b705cfSriastradh			   int16_t srcX, int16_t srcY,
94303b705cfSriastradh			   int16_t maskX, int16_t maskY,
94403b705cfSriastradh			   int16_t dstX, int16_t dstY)
94503b705cfSriastradh{
94603b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dstX, dstY);
94703b705cfSriastradh	gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY);
94803b705cfSriastradh	gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY);
94903b705cfSriastradh}
95003b705cfSriastradh
95103b705cfSriastradhfastcall static void
95203b705cfSriastradhgen3_emit_composite_primitive(struct sna *sna,
95303b705cfSriastradh			      const struct sna_composite_op *op,
95403b705cfSriastradh			      const struct sna_composite_rectangles *r)
95503b705cfSriastradh{
95603b705cfSriastradh	gen3_emit_composite_vertex(sna, op,
95703b705cfSriastradh				   r->src.x + r->width,
95803b705cfSriastradh				   r->src.y + r->height,
95903b705cfSriastradh				   r->mask.x + r->width,
96003b705cfSriastradh				   r->mask.y + r->height,
96103b705cfSriastradh				   op->dst.x + r->dst.x + r->width,
96203b705cfSriastradh				   op->dst.y + r->dst.y + r->height);
96303b705cfSriastradh	gen3_emit_composite_vertex(sna, op,
96403b705cfSriastradh				   r->src.x,
96503b705cfSriastradh				   r->src.y + r->height,
96603b705cfSriastradh				   r->mask.x,
96703b705cfSriastradh				   r->mask.y + r->height,
96803b705cfSriastradh				   op->dst.x + r->dst.x,
96903b705cfSriastradh				   op->dst.y + r->dst.y + r->height);
97003b705cfSriastradh	gen3_emit_composite_vertex(sna, op,
97103b705cfSriastradh				   r->src.x,
97203b705cfSriastradh				   r->src.y,
97303b705cfSriastradh				   r->mask.x,
97403b705cfSriastradh				   r->mask.y,
97503b705cfSriastradh				   op->dst.x + r->dst.x,
97603b705cfSriastradh				   op->dst.y + r->dst.y);
97703b705cfSriastradh}
97803b705cfSriastradh
97903b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
98003b705cfSriastradhsse2 fastcall static void
98103b705cfSriastradhgen3_emit_composite_primitive_constant__sse2(struct sna *sna,
98203b705cfSriastradh					     const struct sna_composite_op *op,
98303b705cfSriastradh					     const struct sna_composite_rectangles *r)
98403b705cfSriastradh{
98503b705cfSriastradh	float *v;
98603b705cfSriastradh
98703b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
98803b705cfSriastradh	sna->render.vertex_used += 6;
989fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
99003b705cfSriastradh
99103b705cfSriastradh	v[4] = v[2] = r->dst.x + op->dst.x;
99203b705cfSriastradh	v[5] = r->dst.y + op->dst.y;
99303b705cfSriastradh
99403b705cfSriastradh	v[0] = v[2] + r->width;
99503b705cfSriastradh	v[3] = v[1] = v[5] + r->height;
99603b705cfSriastradh
99703b705cfSriastradh}
99803b705cfSriastradh
99903b705cfSriastradhsse2 fastcall static void
100003b705cfSriastradhgen3_emit_composite_boxes_constant__sse2(const struct sna_composite_op *op,
100103b705cfSriastradh					 const BoxRec *box, int nbox,
100203b705cfSriastradh					 float *v)
100303b705cfSriastradh{
100403b705cfSriastradh	do {
1005fe8aea9eSmrg		v[0] = box->x2 + op->dst.x;
1006fe8aea9eSmrg		v[3] = v[1] = box->y2 + op->dst.y;
1007fe8aea9eSmrg		v[4] = v[2] = box->x1 + op->dst.x;
1008fe8aea9eSmrg		v[5] = box->y1 + op->dst.y;
100903b705cfSriastradh
101003b705cfSriastradh		box++;
101103b705cfSriastradh		v += 6;
101203b705cfSriastradh	} while (--nbox);
101303b705cfSriastradh}
101403b705cfSriastradh
101503b705cfSriastradhsse2 fastcall static void
101603b705cfSriastradhgen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna,
101703b705cfSriastradh						      const struct sna_composite_op *op,
101803b705cfSriastradh						      const struct sna_composite_rectangles *r)
101903b705cfSriastradh{
102003b705cfSriastradh	int16_t x, y;
102103b705cfSriastradh	float *v;
102203b705cfSriastradh
102303b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
102403b705cfSriastradh	sna->render.vertex_used += 12;
1025fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
102603b705cfSriastradh
102703b705cfSriastradh	x = r->dst.x + op->dst.x;
102803b705cfSriastradh	y = r->dst.y + op->dst.y;
102903b705cfSriastradh	v[0] = x + r->width;
103003b705cfSriastradh	v[5] = v[1] = y + r->height;
103103b705cfSriastradh	v[8] = v[4] = x;
103203b705cfSriastradh	v[9] = y;
103303b705cfSriastradh
103403b705cfSriastradh	x = r->src.x + op->src.offset[0];
103503b705cfSriastradh	y = r->src.y + op->src.offset[1];
103603b705cfSriastradh	v[2] = x + r->width;
103703b705cfSriastradh	v[7] = v[3] = y + r->height;
103803b705cfSriastradh	v[10] = v[6] = x;
103903b705cfSriastradh	v[11] = y;
104003b705cfSriastradh}
104103b705cfSriastradh
104203b705cfSriastradhsse2 fastcall static void
104303b705cfSriastradhgen3_emit_composite_boxes_identity_gradient__sse2(const struct sna_composite_op *op,
104403b705cfSriastradh						  const BoxRec *box, int nbox,
104503b705cfSriastradh						  float *v)
104603b705cfSriastradh{
104703b705cfSriastradh	do {
1048fe8aea9eSmrg		v[0] = box->x2 + op->dst.x;
1049fe8aea9eSmrg		v[5] = v[1] = box->y2 + op->dst.y;
1050fe8aea9eSmrg		v[8] = v[4] = box->x1 + op->dst.x;
1051fe8aea9eSmrg		v[9] = box->y1 + op->dst.y;
105203b705cfSriastradh
105303b705cfSriastradh		v[2] = box->x2 + op->src.offset[0];
105403b705cfSriastradh		v[7] = v[3] = box->y2 + op->src.offset[1];
105503b705cfSriastradh		v[10] = v[6] = box->x1 + op->src.offset[0];
105603b705cfSriastradh		v[11] = box->y1 + op->src.offset[1];
105703b705cfSriastradh
105803b705cfSriastradh		v += 12;
105903b705cfSriastradh		box++;
106003b705cfSriastradh	} while (--nbox);
106103b705cfSriastradh}
106203b705cfSriastradh
106303b705cfSriastradhsse2 fastcall static void
106403b705cfSriastradhgen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna,
106503b705cfSriastradh						    const struct sna_composite_op *op,
106603b705cfSriastradh						    const struct sna_composite_rectangles *r)
106703b705cfSriastradh{
106803b705cfSriastradh	PictTransform *transform = op->src.transform;
106903b705cfSriastradh	int16_t dst_x, dst_y;
107003b705cfSriastradh	int16_t src_x, src_y;
107103b705cfSriastradh	float *v;
107203b705cfSriastradh
107303b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
107403b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
107503b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
107603b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
107703b705cfSriastradh
107803b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
107903b705cfSriastradh	sna->render.vertex_used += 12;
1080fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
108103b705cfSriastradh
108203b705cfSriastradh	v[0] = dst_x + r->width;
108303b705cfSriastradh	v[1] = dst_y + r->height;
108403b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
108503b705cfSriastradh				    transform, op->src.scale,
108603b705cfSriastradh				    &v[2], &v[3]);
108703b705cfSriastradh
108803b705cfSriastradh	v[4] = dst_x;
108903b705cfSriastradh	v[5] = dst_y + r->height;
109003b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
109103b705cfSriastradh				    transform, op->src.scale,
109203b705cfSriastradh				    &v[6], &v[7]);
109303b705cfSriastradh
109403b705cfSriastradh	v[8] = dst_x;
109503b705cfSriastradh	v[9] = dst_y;
109603b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
109703b705cfSriastradh				    transform, op->src.scale,
109803b705cfSriastradh				    &v[10], &v[11]);
109903b705cfSriastradh}
110003b705cfSriastradh
110103b705cfSriastradhsse2 fastcall static void
110203b705cfSriastradhgen3_emit_composite_boxes_affine_gradient__sse2(const struct sna_composite_op *op,
110303b705cfSriastradh						const BoxRec *box, int nbox,
110403b705cfSriastradh						float *v)
110503b705cfSriastradh{
110603b705cfSriastradh	const PictTransform *transform = op->src.transform;
110703b705cfSriastradh
110803b705cfSriastradh	do {
1109fe8aea9eSmrg		v[0] = box->x2 + op->dst.x;
1110fe8aea9eSmrg		v[1] = box->y2 + op->dst.y;
111103b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
111203b705cfSriastradh					    box->y2 + op->src.offset[1],
111303b705cfSriastradh					    transform, op->src.scale,
111403b705cfSriastradh					    &v[2], &v[3]);
111503b705cfSriastradh
1116fe8aea9eSmrg		v[4] = box->x1 + op->dst.x;
1117fe8aea9eSmrg		v[5] = box->y2 + op->dst.y;
111803b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
111903b705cfSriastradh					    box->y2 + op->src.offset[1],
112003b705cfSriastradh					    transform, op->src.scale,
112103b705cfSriastradh					    &v[6], &v[7]);
112203b705cfSriastradh
1123fe8aea9eSmrg		v[8] = box->x1 + op->dst.x;
1124fe8aea9eSmrg		v[9] = box->y1 + op->dst.y;
112503b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
112603b705cfSriastradh					    box->y1 + op->src.offset[1],
112703b705cfSriastradh					    transform, op->src.scale,
112803b705cfSriastradh					    &v[10], &v[11]);
112903b705cfSriastradh
113003b705cfSriastradh		box++;
113103b705cfSriastradh		v += 12;
113203b705cfSriastradh	} while (--nbox);
113303b705cfSriastradh}
113403b705cfSriastradh
113503b705cfSriastradhsse2 fastcall static void
113603b705cfSriastradhgen3_emit_composite_primitive_identity_source__sse2(struct sna *sna,
113703b705cfSriastradh						    const struct sna_composite_op *op,
113803b705cfSriastradh						    const struct sna_composite_rectangles *r)
113903b705cfSriastradh{
114003b705cfSriastradh	float w = r->width;
114103b705cfSriastradh	float h = r->height;
114203b705cfSriastradh	float *v;
114303b705cfSriastradh
114403b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
114503b705cfSriastradh	sna->render.vertex_used += 12;
1146fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
114703b705cfSriastradh
114803b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
114903b705cfSriastradh	v[0] = v[4] + w;
115003b705cfSriastradh
115103b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
115203b705cfSriastradh	v[5] = v[1] = v[9] + h;
115303b705cfSriastradh
115403b705cfSriastradh	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
115503b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
115603b705cfSriastradh
115703b705cfSriastradh	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
115803b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
115903b705cfSriastradh}
116003b705cfSriastradh
116103b705cfSriastradhsse2 fastcall static void
116203b705cfSriastradhgen3_emit_composite_boxes_identity_source__sse2(const struct sna_composite_op *op,
116303b705cfSriastradh						const BoxRec *box, int nbox,
116403b705cfSriastradh						float *v)
116503b705cfSriastradh{
116603b705cfSriastradh	do {
116703b705cfSriastradh		v[0] = box->x2 + op->dst.x;
116803b705cfSriastradh		v[8] = v[4] = box->x1 + op->dst.x;
116903b705cfSriastradh		v[5] = v[1] = box->y2 + op->dst.y;
117003b705cfSriastradh		v[9] = box->y1 + op->dst.y;
117103b705cfSriastradh
117203b705cfSriastradh		v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
117303b705cfSriastradh		v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
117403b705cfSriastradh
117503b705cfSriastradh		v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
117603b705cfSriastradh		v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
117703b705cfSriastradh
117803b705cfSriastradh		v += 12;
117903b705cfSriastradh		box++;
118003b705cfSriastradh	} while (--nbox);
118103b705cfSriastradh}
118203b705cfSriastradh
118303b705cfSriastradhsse2 fastcall static void
118403b705cfSriastradhgen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna,
118503b705cfSriastradh							      const struct sna_composite_op *op,
118603b705cfSriastradh							      const struct sna_composite_rectangles *r)
118703b705cfSriastradh{
118803b705cfSriastradh	float w = r->width;
118903b705cfSriastradh	float h = r->height;
119003b705cfSriastradh	float *v;
119103b705cfSriastradh
119203b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
119303b705cfSriastradh	sna->render.vertex_used += 12;
1194fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
119503b705cfSriastradh
119603b705cfSriastradh	v[8] = v[4] = r->dst.x;
119703b705cfSriastradh	v[9] = r->dst.y;
119803b705cfSriastradh
119903b705cfSriastradh	v[0] = v[4] + w;
120003b705cfSriastradh	v[5] = v[1] = v[9] + h;
120103b705cfSriastradh
120203b705cfSriastradh	v[10] = v[6] = r->src.x * op->src.scale[0];
120303b705cfSriastradh	v[11] = r->src.y * op->src.scale[1];
120403b705cfSriastradh
120503b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
120603b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
120703b705cfSriastradh}
120803b705cfSriastradh
120903b705cfSriastradhsse2 fastcall static void
121003b705cfSriastradhgen3_emit_composite_boxes_identity_source_no_offset__sse2(const struct sna_composite_op *op,
121103b705cfSriastradh							  const BoxRec *box, int nbox,
121203b705cfSriastradh							  float *v)
121303b705cfSriastradh{
121403b705cfSriastradh	do {
121503b705cfSriastradh		v[0] = box->x2;
121603b705cfSriastradh		v[8] = v[4] = box->x1;
121703b705cfSriastradh		v[5] = v[1] = box->y2;
121803b705cfSriastradh		v[9] = box->y1;
121903b705cfSriastradh
122003b705cfSriastradh		v[10] = v[6] = box->x1 * op->src.scale[0];
122103b705cfSriastradh		v[2] = box->x2 * op->src.scale[0];
122203b705cfSriastradh
122303b705cfSriastradh		v[11] = box->y1 * op->src.scale[1];
122403b705cfSriastradh		v[7] = v[3] = box->y2 * op->src.scale[1];
122503b705cfSriastradh
122603b705cfSriastradh		v += 12;
122703b705cfSriastradh		box++;
122803b705cfSriastradh	} while (--nbox);
122903b705cfSriastradh}
123003b705cfSriastradh
123103b705cfSriastradhsse2 fastcall static void
123203b705cfSriastradhgen3_emit_composite_primitive_affine_source__sse2(struct sna *sna,
123303b705cfSriastradh						  const struct sna_composite_op *op,
123403b705cfSriastradh						  const struct sna_composite_rectangles *r)
123503b705cfSriastradh{
123603b705cfSriastradh	PictTransform *transform = op->src.transform;
123703b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
123803b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
123903b705cfSriastradh	int src_x = r->src.x + (int)op->src.offset[0];
124003b705cfSriastradh	int src_y = r->src.y + (int)op->src.offset[1];
124103b705cfSriastradh	float *v;
124203b705cfSriastradh
1243fe8aea9eSmrg	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d\n",
1244fe8aea9eSmrg	     __FUNCTION__, src_x, src_y, dst_x, dst_y, r->width, r->height));
1245fe8aea9eSmrg
124603b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
124703b705cfSriastradh	sna->render.vertex_used += 12;
1248fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
124903b705cfSriastradh
125003b705cfSriastradh	v[0] = dst_x + r->width;
125103b705cfSriastradh	v[5] = v[1] = dst_y + r->height;
125203b705cfSriastradh	v[8] = v[4] = dst_x;
125303b705cfSriastradh	v[9] = dst_y;
125403b705cfSriastradh
125503b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
125603b705cfSriastradh				    transform, op->src.scale,
125703b705cfSriastradh				    &v[2], &v[3]);
125803b705cfSriastradh
125903b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
126003b705cfSriastradh				    transform, op->src.scale,
126103b705cfSriastradh				    &v[6], &v[7]);
126203b705cfSriastradh
126303b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
126403b705cfSriastradh				    transform, op->src.scale,
126503b705cfSriastradh				    &v[10], &v[11]);
126603b705cfSriastradh}
126703b705cfSriastradh
126803b705cfSriastradhsse2 fastcall static void
126903b705cfSriastradhgen3_emit_composite_boxes_affine_source__sse2(const struct sna_composite_op *op,
127003b705cfSriastradh					      const BoxRec *box, int nbox,
127103b705cfSriastradh					      float *v)
127203b705cfSriastradh{
127303b705cfSriastradh	const PictTransform *transform = op->src.transform;
127403b705cfSriastradh
127503b705cfSriastradh	do {
1276fe8aea9eSmrg		DBG(("%s: box=(%d, %d), (%d, %d), src.offset=(%d, %d)\n",
1277fe8aea9eSmrg		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2, op->src.offset[0], op->src.offset[1]));
1278fe8aea9eSmrg
1279fe8aea9eSmrg		v[0] = box->x2 + op->dst.x;
1280fe8aea9eSmrg		v[5] = v[1] = box->y2 + op->dst.y;
1281fe8aea9eSmrg		v[8] = v[4] = box->x1 + op->dst.x;
1282fe8aea9eSmrg		v[9] = box->y1 + op->dst.y;
128303b705cfSriastradh
128403b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
128503b705cfSriastradh					    box->y2 + op->src.offset[1],
128603b705cfSriastradh					    transform, op->src.scale,
128703b705cfSriastradh					    &v[2], &v[3]);
128803b705cfSriastradh
128903b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
129003b705cfSriastradh					    box->y2 + op->src.offset[1],
129103b705cfSriastradh					    transform, op->src.scale,
129203b705cfSriastradh					    &v[6], &v[7]);
129303b705cfSriastradh
129403b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
129503b705cfSriastradh					    box->y1 + op->src.offset[1],
129603b705cfSriastradh					    transform, op->src.scale,
129703b705cfSriastradh					    &v[10], &v[11]);
129803b705cfSriastradh
129903b705cfSriastradh		v += 12;
130003b705cfSriastradh		box++;
130103b705cfSriastradh	} while (--nbox);
130203b705cfSriastradh}
130303b705cfSriastradh
130403b705cfSriastradhsse2 fastcall static void
130503b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna,
130603b705cfSriastradh							   const struct sna_composite_op *op,
130703b705cfSriastradh							   const struct sna_composite_rectangles *r)
130803b705cfSriastradh{
130903b705cfSriastradh	float w = r->width;
131003b705cfSriastradh	float h = r->height;
131103b705cfSriastradh	float *v;
131203b705cfSriastradh
131303b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
131403b705cfSriastradh	sna->render.vertex_used += 12;
1315fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
131603b705cfSriastradh
131703b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
131803b705cfSriastradh	v[0] = v[4] + w;
131903b705cfSriastradh
132003b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
132103b705cfSriastradh	v[5] = v[1] = v[9] + h;
132203b705cfSriastradh
132303b705cfSriastradh	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
132403b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
132503b705cfSriastradh
132603b705cfSriastradh	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
132703b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
132803b705cfSriastradh}
132903b705cfSriastradh
133003b705cfSriastradhsse2 fastcall static void
133103b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna *sna,
133203b705cfSriastradh								     const struct sna_composite_op *op,
133303b705cfSriastradh								     const struct sna_composite_rectangles *r)
133403b705cfSriastradh{
133503b705cfSriastradh	float w = r->width;
133603b705cfSriastradh	float h = r->height;
133703b705cfSriastradh	float *v;
133803b705cfSriastradh
133903b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
134003b705cfSriastradh	sna->render.vertex_used += 12;
1341fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
134203b705cfSriastradh
134303b705cfSriastradh	v[8] = v[4] = r->dst.x;
134403b705cfSriastradh	v[9] = r->dst.y;
134503b705cfSriastradh
134603b705cfSriastradh	v[0] = v[4] + w;
134703b705cfSriastradh	v[5] = v[1] = v[9] + h;
134803b705cfSriastradh
134903b705cfSriastradh	v[10] = v[6] = r->mask.x * op->mask.scale[0];
135003b705cfSriastradh	v[11] = r->mask.y * op->mask.scale[1];
135103b705cfSriastradh
135203b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
135303b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
135403b705cfSriastradh}
135503b705cfSriastradh
135603b705cfSriastradhsse2 fastcall static void
135703b705cfSriastradhgen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna,
135803b705cfSriastradh							 const struct sna_composite_op *op,
135903b705cfSriastradh							 const struct sna_composite_rectangles *r)
136003b705cfSriastradh{
136103b705cfSriastradh	float dst_x, dst_y;
136203b705cfSriastradh	float src_x, src_y;
136303b705cfSriastradh	float msk_x, msk_y;
136403b705cfSriastradh	float w, h;
136503b705cfSriastradh	float *v;
136603b705cfSriastradh
136703b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
136803b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
136903b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
137003b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
137103b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
137203b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
137303b705cfSriastradh	w = r->width;
137403b705cfSriastradh	h = r->height;
137503b705cfSriastradh
137603b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
137703b705cfSriastradh	sna->render.vertex_used += 18;
1378fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
137903b705cfSriastradh
138003b705cfSriastradh	v[0] = dst_x + w;
138103b705cfSriastradh	v[1] = dst_y + h;
138203b705cfSriastradh	v[2] = (src_x + w) * op->src.scale[0];
138303b705cfSriastradh	v[3] = (src_y + h) * op->src.scale[1];
138403b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
138503b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
138603b705cfSriastradh
138703b705cfSriastradh	v[6] = dst_x;
138803b705cfSriastradh	v[7] = v[1];
138903b705cfSriastradh	v[8] = src_x * op->src.scale[0];
139003b705cfSriastradh	v[9] = v[3];
139103b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
139203b705cfSriastradh	v[11] =v[5];
139303b705cfSriastradh
139403b705cfSriastradh	v[12] = v[6];
139503b705cfSriastradh	v[13] = dst_y;
139603b705cfSriastradh	v[14] = v[8];
139703b705cfSriastradh	v[15] = src_y * op->src.scale[1];
139803b705cfSriastradh	v[16] = v[10];
139903b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
140003b705cfSriastradh}
140103b705cfSriastradh
140203b705cfSriastradhsse2 fastcall static void
140303b705cfSriastradhgen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna,
140403b705cfSriastradh						       const struct sna_composite_op *op,
140503b705cfSriastradh						       const struct sna_composite_rectangles *r)
140603b705cfSriastradh{
140703b705cfSriastradh	int16_t src_x, src_y;
140803b705cfSriastradh	float dst_x, dst_y;
140903b705cfSriastradh	float msk_x, msk_y;
141003b705cfSriastradh	float w, h;
141103b705cfSriastradh	float *v;
141203b705cfSriastradh
141303b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
141403b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
141503b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
141603b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
141703b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
141803b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
141903b705cfSriastradh	w = r->width;
142003b705cfSriastradh	h = r->height;
142103b705cfSriastradh
142203b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
142303b705cfSriastradh	sna->render.vertex_used += 18;
1424fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
142503b705cfSriastradh
142603b705cfSriastradh	v[0] = dst_x + w;
142703b705cfSriastradh	v[1] = dst_y + h;
142803b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
142903b705cfSriastradh				    op->src.transform, op->src.scale,
143003b705cfSriastradh				    &v[2], &v[3]);
143103b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
143203b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
143303b705cfSriastradh
143403b705cfSriastradh	v[6] = dst_x;
143503b705cfSriastradh	v[7] = v[1];
143603b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
143703b705cfSriastradh				    op->src.transform, op->src.scale,
143803b705cfSriastradh				    &v[8], &v[9]);
143903b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
144003b705cfSriastradh	v[11] =v[5];
144103b705cfSriastradh
144203b705cfSriastradh	v[12] = v[6];
144303b705cfSriastradh	v[13] = dst_y;
144403b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
144503b705cfSriastradh				    op->src.transform, op->src.scale,
144603b705cfSriastradh				    &v[14], &v[15]);
144703b705cfSriastradh	v[16] = v[10];
144803b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
144903b705cfSriastradh}
145003b705cfSriastradh#endif
145103b705cfSriastradh
145203b705cfSriastradhstatic inline void
145303b705cfSriastradhgen3_2d_perspective(struct sna *sna, int in, int out)
145403b705cfSriastradh{
145503b705cfSriastradh	gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
145603b705cfSriastradh	gen3_fs_mul(out,
145703b705cfSriastradh		    gen3_fs_operand(in, X, Y, ZERO, ONE),
145803b705cfSriastradh		    gen3_fs_operand_reg(out));
145903b705cfSriastradh}
146003b705cfSriastradh
146103b705cfSriastradhstatic inline void
146203b705cfSriastradhgen3_linear_coord(struct sna *sna,
146303b705cfSriastradh		  const struct sna_composite_channel *channel,
146403b705cfSriastradh		  int in, int out)
146503b705cfSriastradh{
146603b705cfSriastradh	int c = channel->u.gen3.constants;
146703b705cfSriastradh
146803b705cfSriastradh	if (!channel->is_affine) {
146903b705cfSriastradh		gen3_2d_perspective(sna, in, FS_U0);
147003b705cfSriastradh		in = FS_U0;
147103b705cfSriastradh	}
147203b705cfSriastradh
147303b705cfSriastradh	gen3_fs_mov(out, gen3_fs_operand_zero());
147403b705cfSriastradh	gen3_fs_dp3(out, MASK_X,
147503b705cfSriastradh		    gen3_fs_operand(in, X, Y, ONE, ZERO),
147603b705cfSriastradh		    gen3_fs_operand_reg(c));
147703b705cfSriastradh}
147803b705cfSriastradh
147903b705cfSriastradhstatic void
148003b705cfSriastradhgen3_radial_coord(struct sna *sna,
148103b705cfSriastradh		  const struct sna_composite_channel *channel,
148203b705cfSriastradh		  int in, int out)
148303b705cfSriastradh{
148403b705cfSriastradh	int c = channel->u.gen3.constants;
148503b705cfSriastradh
148603b705cfSriastradh	if (!channel->is_affine) {
148703b705cfSriastradh		gen3_2d_perspective(sna, in, FS_U0);
148803b705cfSriastradh		in = FS_U0;
148903b705cfSriastradh	}
149003b705cfSriastradh
149103b705cfSriastradh	switch (channel->u.gen3.mode) {
149203b705cfSriastradh	case RADIAL_ONE:
149303b705cfSriastradh		/*
149403b705cfSriastradh		   pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
149503b705cfSriastradh		   r² = pdx*pdx + pdy*pdy
149603b705cfSriastradh		   t = r²/sqrt(r²) - r1/dr;
149703b705cfSriastradh		   */
149803b705cfSriastradh		gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
149903b705cfSriastradh			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
150003b705cfSriastradh			    gen3_fs_operand(c, Z, Z, ZERO, ZERO),
150103b705cfSriastradh			    gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
150203b705cfSriastradh		gen3_fs_dp2add(FS_U0, MASK_X,
150303b705cfSriastradh			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
150403b705cfSriastradh			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
150503b705cfSriastradh			       gen3_fs_operand_zero());
150603b705cfSriastradh		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
150703b705cfSriastradh		gen3_fs_mad(out, 0,
150803b705cfSriastradh			    gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
150903b705cfSriastradh			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
151003b705cfSriastradh			    gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
151103b705cfSriastradh		break;
151203b705cfSriastradh
151303b705cfSriastradh	case RADIAL_TWO:
151403b705cfSriastradh		/*
151503b705cfSriastradh		   pdx = x - c1x, pdy = y - c1y;
151603b705cfSriastradh		   A = dx² + dy² - dr²
151703b705cfSriastradh		   B = -2*(pdx*dx + pdy*dy + r1*dr);
151803b705cfSriastradh		   C = pdx² + pdy² - r1²;
151903b705cfSriastradh		   det = B*B - 4*A*C;
152003b705cfSriastradh		   t = (-B + sqrt (det)) / (2 * A)
152103b705cfSriastradh		   */
152203b705cfSriastradh
152303b705cfSriastradh		/* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
152403b705cfSriastradh		gen3_fs_add(FS_U0,
152503b705cfSriastradh			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
152603b705cfSriastradh			    gen3_fs_operand(c, X, Y, Z, ZERO));
152703b705cfSriastradh		/* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
152803b705cfSriastradh		gen3_fs_dp3(FS_U0, MASK_W,
152903b705cfSriastradh			    gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
153003b705cfSriastradh			    gen3_fs_operand(c+1, X, Y, Z, ZERO));
153103b705cfSriastradh		/* u1.x = pdx² + pdy² - r1²; [C] */
153203b705cfSriastradh		gen3_fs_dp3(FS_U1, MASK_X,
153303b705cfSriastradh			    gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
153403b705cfSriastradh			    gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
153503b705cfSriastradh		/* u1.x = C, u1.y = B, u1.z=-4*A; */
153603b705cfSriastradh		gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
153703b705cfSriastradh		gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
153803b705cfSriastradh		/* u1.x = B² - 4*A*C */
153903b705cfSriastradh		gen3_fs_dp2add(FS_U1, MASK_X,
154003b705cfSriastradh			       gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
154103b705cfSriastradh			       gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
154203b705cfSriastradh			       gen3_fs_operand_zero());
154303b705cfSriastradh		/* out.x = -B + sqrt (B² - 4*A*C), */
154403b705cfSriastradh		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
154503b705cfSriastradh		gen3_fs_mad(out, MASK_X,
154603b705cfSriastradh			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
154703b705cfSriastradh			    gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
154803b705cfSriastradh			    gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
154903b705cfSriastradh		/* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */
155003b705cfSriastradh		gen3_fs_mul(out,
155103b705cfSriastradh			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
155203b705cfSriastradh			    gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
155303b705cfSriastradh		break;
155403b705cfSriastradh	}
155503b705cfSriastradh}
155603b705cfSriastradh
155703b705cfSriastradhstatic void
155803b705cfSriastradhgen3_composite_emit_shader(struct sna *sna,
155903b705cfSriastradh			   const struct sna_composite_op *op,
156003b705cfSriastradh			   uint8_t blend)
156103b705cfSriastradh{
156203b705cfSriastradh	bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
156303b705cfSriastradh	const struct sna_composite_channel *src, *mask;
156403b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
156503b705cfSriastradh	uint32_t shader_offset, id;
156603b705cfSriastradh	int src_reg, mask_reg;
156703b705cfSriastradh	int t, length;
156803b705cfSriastradh
156903b705cfSriastradh	src = &op->src;
157003b705cfSriastradh	mask = &op->mask;
157103b705cfSriastradh	if (mask->u.gen3.type == SHADER_NONE)
157203b705cfSriastradh		mask = NULL;
157303b705cfSriastradh
157403b705cfSriastradh	id = (src->u.gen3.type |
157503b705cfSriastradh	      src->is_affine << 4 |
157603b705cfSriastradh	      src->alpha_fixup << 5 |
157703b705cfSriastradh	      src->rb_reversed << 6);
157803b705cfSriastradh	if (mask) {
157903b705cfSriastradh		id |= (mask->u.gen3.type << 8 |
158003b705cfSriastradh		       mask->is_affine << 12 |
158103b705cfSriastradh		       gen3_blend_op[blend].src_alpha << 13 |
158203b705cfSriastradh		       op->has_component_alpha << 14 |
158303b705cfSriastradh		       mask->alpha_fixup << 15 |
158403b705cfSriastradh		       mask->rb_reversed << 16);
158503b705cfSriastradh	}
158603b705cfSriastradh	id |= dst_is_alpha << 24;
158703b705cfSriastradh	id |= op->rb_reversed << 25;
158803b705cfSriastradh
158903b705cfSriastradh	if (id == state->last_shader)
159003b705cfSriastradh		return;
159103b705cfSriastradh
159203b705cfSriastradh	state->last_shader = id;
159303b705cfSriastradh
159403b705cfSriastradh	shader_offset = sna->kgem.nbatch++;
159503b705cfSriastradh	t = 0;
159603b705cfSriastradh	switch (src->u.gen3.type) {
159703b705cfSriastradh	case SHADER_NONE:
159803b705cfSriastradh	case SHADER_OPACITY:
159903b705cfSriastradh		assert(0);
160003b705cfSriastradh	case SHADER_ZERO:
160103b705cfSriastradh	case SHADER_BLACK:
160203b705cfSriastradh	case SHADER_WHITE:
160303b705cfSriastradh		break;
160403b705cfSriastradh	case SHADER_CONSTANT:
160503b705cfSriastradh		gen3_fs_dcl(FS_T8);
160603b705cfSriastradh		src_reg = FS_T8;
160703b705cfSriastradh		break;
160803b705cfSriastradh	case SHADER_TEXTURE:
160903b705cfSriastradh	case SHADER_RADIAL:
161003b705cfSriastradh	case SHADER_LINEAR:
161103b705cfSriastradh		gen3_fs_dcl(FS_S0);
161203b705cfSriastradh		gen3_fs_dcl(FS_T0);
161303b705cfSriastradh		t++;
161403b705cfSriastradh		break;
161503b705cfSriastradh	}
161603b705cfSriastradh
161703b705cfSriastradh	if (mask == NULL) {
161803b705cfSriastradh		switch (src->u.gen3.type) {
161903b705cfSriastradh		case SHADER_ZERO:
162003b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
162103b705cfSriastradh			goto done;
162203b705cfSriastradh		case SHADER_BLACK:
162303b705cfSriastradh			if (dst_is_alpha)
162403b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand_one());
162503b705cfSriastradh			else
162603b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
162703b705cfSriastradh			goto done;
162803b705cfSriastradh		case SHADER_WHITE:
162903b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
163003b705cfSriastradh			goto done;
163103b705cfSriastradh		}
163203b705cfSriastradh		if (src->alpha_fixup && dst_is_alpha) {
163303b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
163403b705cfSriastradh			goto done;
163503b705cfSriastradh		}
163603b705cfSriastradh		/* No mask, so load directly to output color */
163703b705cfSriastradh		if (src->u.gen3.type != SHADER_CONSTANT) {
163803b705cfSriastradh			if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
163903b705cfSriastradh				src_reg = FS_R0;
164003b705cfSriastradh			else
164103b705cfSriastradh				src_reg = FS_OC;
164203b705cfSriastradh		}
164303b705cfSriastradh		switch (src->u.gen3.type) {
164403b705cfSriastradh		case SHADER_LINEAR:
164503b705cfSriastradh			gen3_linear_coord(sna, src, FS_T0, FS_R0);
164603b705cfSriastradh			gen3_fs_texld(src_reg, FS_S0, FS_R0);
164703b705cfSriastradh			break;
164803b705cfSriastradh
164903b705cfSriastradh		case SHADER_RADIAL:
165003b705cfSriastradh			gen3_radial_coord(sna, src, FS_T0, FS_R0);
165103b705cfSriastradh			gen3_fs_texld(src_reg, FS_S0, FS_R0);
165203b705cfSriastradh			break;
165303b705cfSriastradh
165403b705cfSriastradh		case SHADER_TEXTURE:
165503b705cfSriastradh			if (src->is_affine)
165603b705cfSriastradh				gen3_fs_texld(src_reg, FS_S0, FS_T0);
165703b705cfSriastradh			else
165803b705cfSriastradh				gen3_fs_texldp(src_reg, FS_S0, FS_T0);
165903b705cfSriastradh			break;
166003b705cfSriastradh
166103b705cfSriastradh		case SHADER_NONE:
166203b705cfSriastradh		case SHADER_WHITE:
166303b705cfSriastradh		case SHADER_BLACK:
166403b705cfSriastradh		case SHADER_ZERO:
166503b705cfSriastradh			assert(0);
166603b705cfSriastradh		case SHADER_CONSTANT:
166703b705cfSriastradh			break;
166803b705cfSriastradh		}
166903b705cfSriastradh
167003b705cfSriastradh		if (src_reg != FS_OC) {
167103b705cfSriastradh			if (src->alpha_fixup)
167203b705cfSriastradh				gen3_fs_mov(FS_OC,
167303b705cfSriastradh					    src->rb_reversed ^ op->rb_reversed ?
167403b705cfSriastradh					    gen3_fs_operand(src_reg, Z, Y, X, ONE) :
167503b705cfSriastradh					    gen3_fs_operand(src_reg, X, Y, Z, ONE));
167603b705cfSriastradh			else if (dst_is_alpha)
167703b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
167803b705cfSriastradh			else if (src->rb_reversed ^ op->rb_reversed)
167903b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
168003b705cfSriastradh			else
168103b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
168203b705cfSriastradh		} else if (src->alpha_fixup)
168303b705cfSriastradh			gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
168403b705cfSriastradh	} else {
168503b705cfSriastradh		int out_reg = FS_OC;
168603b705cfSriastradh		if (op->rb_reversed)
168703b705cfSriastradh			out_reg = FS_U0;
168803b705cfSriastradh
168903b705cfSriastradh		switch (mask->u.gen3.type) {
169003b705cfSriastradh		case SHADER_CONSTANT:
169103b705cfSriastradh			gen3_fs_dcl(FS_T9);
169203b705cfSriastradh			mask_reg = FS_T9;
169303b705cfSriastradh			break;
169403b705cfSriastradh		case SHADER_TEXTURE:
169503b705cfSriastradh		case SHADER_LINEAR:
169603b705cfSriastradh		case SHADER_RADIAL:
169703b705cfSriastradh			gen3_fs_dcl(FS_S0 + t);
169803b705cfSriastradh			/* fall through */
169903b705cfSriastradh		case SHADER_OPACITY:
170003b705cfSriastradh			gen3_fs_dcl(FS_T0 + t);
170103b705cfSriastradh			break;
170203b705cfSriastradh		case SHADER_ZERO:
170303b705cfSriastradh		case SHADER_BLACK:
170403b705cfSriastradh			assert(0);
170503b705cfSriastradh		case SHADER_NONE:
170603b705cfSriastradh		case SHADER_WHITE:
170703b705cfSriastradh			break;
170803b705cfSriastradh		}
170903b705cfSriastradh
171003b705cfSriastradh		t = 0;
171103b705cfSriastradh		switch (src->u.gen3.type) {
171203b705cfSriastradh		case SHADER_LINEAR:
171303b705cfSriastradh			gen3_linear_coord(sna, src, FS_T0, FS_R0);
171403b705cfSriastradh			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
171503b705cfSriastradh			src_reg = FS_R0;
171603b705cfSriastradh			t++;
171703b705cfSriastradh			break;
171803b705cfSriastradh
171903b705cfSriastradh		case SHADER_RADIAL:
172003b705cfSriastradh			gen3_radial_coord(sna, src, FS_T0, FS_R0);
172103b705cfSriastradh			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
172203b705cfSriastradh			src_reg = FS_R0;
172303b705cfSriastradh			t++;
172403b705cfSriastradh			break;
172503b705cfSriastradh
172603b705cfSriastradh		case SHADER_TEXTURE:
172703b705cfSriastradh			if (src->is_affine)
172803b705cfSriastradh				gen3_fs_texld(FS_R0, FS_S0, FS_T0);
172903b705cfSriastradh			else
173003b705cfSriastradh				gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
173103b705cfSriastradh			src_reg = FS_R0;
173203b705cfSriastradh			t++;
173303b705cfSriastradh			break;
173403b705cfSriastradh
173503b705cfSriastradh		case SHADER_CONSTANT:
173603b705cfSriastradh		case SHADER_NONE:
173703b705cfSriastradh		case SHADER_ZERO:
173803b705cfSriastradh		case SHADER_BLACK:
173903b705cfSriastradh		case SHADER_WHITE:
174003b705cfSriastradh			break;
174103b705cfSriastradh		}
174203b705cfSriastradh		if (src->alpha_fixup)
174303b705cfSriastradh			gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
174403b705cfSriastradh		if (src->rb_reversed)
174503b705cfSriastradh			gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
174603b705cfSriastradh
174703b705cfSriastradh		switch (mask->u.gen3.type) {
174803b705cfSriastradh		case SHADER_LINEAR:
174903b705cfSriastradh			gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
175003b705cfSriastradh			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
175103b705cfSriastradh			mask_reg = FS_R1;
175203b705cfSriastradh			break;
175303b705cfSriastradh
175403b705cfSriastradh		case SHADER_RADIAL:
175503b705cfSriastradh			gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
175603b705cfSriastradh			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
175703b705cfSriastradh			mask_reg = FS_R1;
175803b705cfSriastradh			break;
175903b705cfSriastradh
176003b705cfSriastradh		case SHADER_TEXTURE:
176103b705cfSriastradh			if (mask->is_affine)
176203b705cfSriastradh				gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
176303b705cfSriastradh			else
176403b705cfSriastradh				gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
176503b705cfSriastradh			mask_reg = FS_R1;
176603b705cfSriastradh			break;
176703b705cfSriastradh
176803b705cfSriastradh		case SHADER_OPACITY:
176903b705cfSriastradh			switch (src->u.gen3.type) {
177003b705cfSriastradh			case SHADER_BLACK:
177103b705cfSriastradh			case SHADER_WHITE:
177203b705cfSriastradh				if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
177303b705cfSriastradh					gen3_fs_mov(out_reg,
177403b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
177503b705cfSriastradh				} else {
177603b705cfSriastradh					gen3_fs_mov(out_reg,
177703b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
177803b705cfSriastradh				}
177903b705cfSriastradh				break;
178003b705cfSriastradh			default:
178103b705cfSriastradh				if (dst_is_alpha) {
178203b705cfSriastradh					gen3_fs_mul(out_reg,
178303b705cfSriastradh						    gen3_fs_operand(src_reg, W, W, W, W),
178403b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
178503b705cfSriastradh				} else {
178603b705cfSriastradh					gen3_fs_mul(out_reg,
178703b705cfSriastradh						    gen3_fs_operand(src_reg, X, Y, Z, W),
178803b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
178903b705cfSriastradh				}
179003b705cfSriastradh			}
179103b705cfSriastradh			goto mask_done;
179203b705cfSriastradh
179303b705cfSriastradh		case SHADER_CONSTANT:
179403b705cfSriastradh		case SHADER_ZERO:
179503b705cfSriastradh		case SHADER_BLACK:
179603b705cfSriastradh		case SHADER_WHITE:
179703b705cfSriastradh		case SHADER_NONE:
179803b705cfSriastradh			break;
179903b705cfSriastradh		}
180003b705cfSriastradh		if (mask->alpha_fixup)
180103b705cfSriastradh			gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
180203b705cfSriastradh		if (mask->rb_reversed)
180303b705cfSriastradh			gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
180403b705cfSriastradh
180503b705cfSriastradh		if (dst_is_alpha) {
180603b705cfSriastradh			switch (src->u.gen3.type) {
180703b705cfSriastradh			case SHADER_BLACK:
180803b705cfSriastradh			case SHADER_WHITE:
180903b705cfSriastradh				gen3_fs_mov(out_reg,
181003b705cfSriastradh					    gen3_fs_operand(mask_reg, W, W, W, W));
181103b705cfSriastradh				break;
181203b705cfSriastradh			default:
181303b705cfSriastradh				gen3_fs_mul(out_reg,
181403b705cfSriastradh					    gen3_fs_operand(src_reg, W, W, W, W),
181503b705cfSriastradh					    gen3_fs_operand(mask_reg, W, W, W, W));
181603b705cfSriastradh				break;
181703b705cfSriastradh			}
181803b705cfSriastradh		} else {
181903b705cfSriastradh			/* If component alpha is active in the mask and the blend
182003b705cfSriastradh			 * operation uses the source alpha, then we know we don't
182103b705cfSriastradh			 * need the source value (otherwise we would have hit a
182203b705cfSriastradh			 * fallback earlier), so we provide the source alpha (src.A *
182303b705cfSriastradh			 * mask.X) as output color.
182403b705cfSriastradh			 * Conversely, if CA is set and we don't need the source alpha,
182503b705cfSriastradh			 * then we produce the source value (src.X * mask.X) and the
182603b705cfSriastradh			 * source alpha is unused.  Otherwise, we provide the non-CA
182703b705cfSriastradh			 * source value (src.X * mask.A).
182803b705cfSriastradh			 */
182903b705cfSriastradh			if (op->has_component_alpha) {
183003b705cfSriastradh				switch (src->u.gen3.type) {
183103b705cfSriastradh				case SHADER_BLACK:
183203b705cfSriastradh					if (gen3_blend_op[blend].src_alpha)
183303b705cfSriastradh						gen3_fs_mov(out_reg,
183403b705cfSriastradh							    gen3_fs_operand_reg(mask_reg));
183503b705cfSriastradh					else
183603b705cfSriastradh						gen3_fs_mov(out_reg,
183703b705cfSriastradh							    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
183803b705cfSriastradh					break;
183903b705cfSriastradh				case SHADER_WHITE:
184003b705cfSriastradh					gen3_fs_mov(out_reg,
184103b705cfSriastradh						    gen3_fs_operand_reg(mask_reg));
184203b705cfSriastradh					break;
184303b705cfSriastradh				default:
184403b705cfSriastradh					if (gen3_blend_op[blend].src_alpha)
184503b705cfSriastradh						gen3_fs_mul(out_reg,
184603b705cfSriastradh							    gen3_fs_operand(src_reg, W, W, W, W),
184703b705cfSriastradh							    gen3_fs_operand_reg(mask_reg));
184803b705cfSriastradh					else
184903b705cfSriastradh						gen3_fs_mul(out_reg,
185003b705cfSriastradh							    gen3_fs_operand_reg(src_reg),
185103b705cfSriastradh							    gen3_fs_operand_reg(mask_reg));
185203b705cfSriastradh					break;
185303b705cfSriastradh				}
185403b705cfSriastradh			} else {
185503b705cfSriastradh				switch (src->u.gen3.type) {
185603b705cfSriastradh				case SHADER_WHITE:
185703b705cfSriastradh					gen3_fs_mov(out_reg,
185803b705cfSriastradh						    gen3_fs_operand(mask_reg, W, W, W, W));
185903b705cfSriastradh					break;
186003b705cfSriastradh				case SHADER_BLACK:
186103b705cfSriastradh					gen3_fs_mov(out_reg,
186203b705cfSriastradh						    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
186303b705cfSriastradh					break;
186403b705cfSriastradh				default:
186503b705cfSriastradh					gen3_fs_mul(out_reg,
186603b705cfSriastradh						    gen3_fs_operand_reg(src_reg),
186703b705cfSriastradh						    gen3_fs_operand(mask_reg, W, W, W, W));
186803b705cfSriastradh					break;
186903b705cfSriastradh				}
187003b705cfSriastradh			}
187103b705cfSriastradh		}
187203b705cfSriastradhmask_done:
187303b705cfSriastradh		if (op->rb_reversed)
187403b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
187503b705cfSriastradh	}
187603b705cfSriastradh
187703b705cfSriastradhdone:
187803b705cfSriastradh	length = sna->kgem.nbatch - shader_offset;
187903b705cfSriastradh	sna->kgem.batch[shader_offset] =
188003b705cfSriastradh		_3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
188103b705cfSriastradh}
188203b705cfSriastradh
188303b705cfSriastradhstatic uint32_t gen3_ms_tiling(uint32_t tiling)
188403b705cfSriastradh{
188503b705cfSriastradh	uint32_t v = 0;
188603b705cfSriastradh	switch (tiling) {
188703b705cfSriastradh	case I915_TILING_Y: v |= MS3_TILE_WALK;
188803b705cfSriastradh	case I915_TILING_X: v |= MS3_TILED_SURFACE;
188903b705cfSriastradh	case I915_TILING_NONE: break;
189003b705cfSriastradh	}
189103b705cfSriastradh	return v;
189203b705cfSriastradh}
189303b705cfSriastradh
189403b705cfSriastradhstatic void gen3_emit_invariant(struct sna *sna)
189503b705cfSriastradh{
189603b705cfSriastradh	/* Disable independent alpha blend */
189703b705cfSriastradh	OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
189803b705cfSriastradh		  IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
189903b705cfSriastradh		  IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
190003b705cfSriastradh		  IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
190103b705cfSriastradh
190203b705cfSriastradh	OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
190303b705cfSriastradh		  CSB_TCB(0, 0) |
190403b705cfSriastradh		  CSB_TCB(1, 1) |
190503b705cfSriastradh		  CSB_TCB(2, 2) |
190603b705cfSriastradh		  CSB_TCB(3, 3) |
190703b705cfSriastradh		  CSB_TCB(4, 4) |
190803b705cfSriastradh		  CSB_TCB(5, 5) |
190903b705cfSriastradh		  CSB_TCB(6, 6) |
191003b705cfSriastradh		  CSB_TCB(7, 7));
191103b705cfSriastradh
191203b705cfSriastradh	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
191303b705cfSriastradh	OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
191403b705cfSriastradh	OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
191503b705cfSriastradh		  S4_LINE_WIDTH_ONE |
191603b705cfSriastradh		  S4_CULLMODE_NONE |
191703b705cfSriastradh		  S4_VFMT_XY);
191803b705cfSriastradh	OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
191903b705cfSriastradh	OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
192003b705cfSriastradh
192103b705cfSriastradh	OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
192203b705cfSriastradh	OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
192303b705cfSriastradh
192403b705cfSriastradh	OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
192503b705cfSriastradh	OUT_BATCH(0x00000000);
192603b705cfSriastradh
192703b705cfSriastradh	OUT_BATCH(_3DSTATE_STIPPLE);
192803b705cfSriastradh	OUT_BATCH(0x00000000);
192903b705cfSriastradh
193003b705cfSriastradh	sna->render_state.gen3.need_invariant = false;
193103b705cfSriastradh}
193203b705cfSriastradh
193303b705cfSriastradh#define MAX_OBJECTS 3 /* worst case: dst + src + mask  */
193403b705cfSriastradh
193503b705cfSriastradhstatic void
193603b705cfSriastradhgen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
193703b705cfSriastradh{
193803b705cfSriastradh	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
193903b705cfSriastradh
194003b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, 200)) {
194103b705cfSriastradh		DBG(("%s: flushing batch: size %d > %d\n",
194203b705cfSriastradh		     __FUNCTION__, 200,
194303b705cfSriastradh		     sna->kgem.surface-sna->kgem.nbatch));
194403b705cfSriastradh		kgem_submit(&sna->kgem);
194503b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
194603b705cfSriastradh	}
194703b705cfSriastradh
194803b705cfSriastradh	if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
194903b705cfSriastradh		DBG(("%s: flushing batch: reloc %d >= %d\n",
195003b705cfSriastradh		     __FUNCTION__,
195103b705cfSriastradh		     sna->kgem.nreloc,
195203b705cfSriastradh		     (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
195303b705cfSriastradh		kgem_submit(&sna->kgem);
195403b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
195503b705cfSriastradh	}
195603b705cfSriastradh
195703b705cfSriastradh	if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
195803b705cfSriastradh		DBG(("%s: flushing batch: exec %d >= %d\n",
195903b705cfSriastradh		     __FUNCTION__,
196003b705cfSriastradh		     sna->kgem.nexec,
196103b705cfSriastradh		     (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
196203b705cfSriastradh		kgem_submit(&sna->kgem);
196303b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
196403b705cfSriastradh	}
196503b705cfSriastradh
196603b705cfSriastradh	if (sna->render_state.gen3.need_invariant)
196703b705cfSriastradh		gen3_emit_invariant(sna);
196803b705cfSriastradh#undef MAX_OBJECTS
196903b705cfSriastradh}
197003b705cfSriastradh
197103b705cfSriastradhstatic void gen3_emit_target(struct sna *sna,
197203b705cfSriastradh			     struct kgem_bo *bo,
197303b705cfSriastradh			     int width,
197403b705cfSriastradh			     int height,
197503b705cfSriastradh			     int format)
197603b705cfSriastradh{
197703b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
197803b705cfSriastradh
197903b705cfSriastradh	assert(!too_large(width, height));
198003b705cfSriastradh
198103b705cfSriastradh	/* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
198203b705cfSriastradh	assert(bo->unique_id != 0);
198303b705cfSriastradh	if (bo->unique_id != state->current_dst) {
198403b705cfSriastradh		uint32_t v;
198503b705cfSriastradh
198603b705cfSriastradh		DBG(("%s: setting new target id=%d, handle=%d\n",
198703b705cfSriastradh		     __FUNCTION__, bo->unique_id, bo->handle));
198803b705cfSriastradh
198903b705cfSriastradh		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
199003b705cfSriastradh		OUT_BATCH(BUF_3D_ID_COLOR_BACK |
199103b705cfSriastradh			  gen3_buf_tiling(bo->tiling) |
199203b705cfSriastradh			  bo->pitch);
199303b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
199403b705cfSriastradh					 bo,
199503b705cfSriastradh					 I915_GEM_DOMAIN_RENDER << 16 |
199603b705cfSriastradh					 I915_GEM_DOMAIN_RENDER,
199703b705cfSriastradh					 0));
199803b705cfSriastradh
199903b705cfSriastradh		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
200003b705cfSriastradh		OUT_BATCH(gen3_get_dst_format(format));
200103b705cfSriastradh
200203b705cfSriastradh		v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
200303b705cfSriastradh		if (v != state->last_drawrect_limit) {
200403b705cfSriastradh			OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
200503b705cfSriastradh			OUT_BATCH(0); /* XXX dither origin? */
200603b705cfSriastradh			OUT_BATCH(0);
200703b705cfSriastradh			OUT_BATCH(v);
200803b705cfSriastradh			OUT_BATCH(0);
200903b705cfSriastradh			state->last_drawrect_limit = v;
201003b705cfSriastradh		}
201103b705cfSriastradh
201203b705cfSriastradh		state->current_dst = bo->unique_id;
201303b705cfSriastradh	}
201403b705cfSriastradh	assert(bo->exec);
201503b705cfSriastradh	kgem_bo_mark_dirty(bo);
201603b705cfSriastradh}
201703b705cfSriastradh
201803b705cfSriastradhstatic void gen3_emit_composite_state(struct sna *sna,
201903b705cfSriastradh				      const struct sna_composite_op *op)
202003b705cfSriastradh{
202103b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
202203b705cfSriastradh	uint32_t map[4];
202303b705cfSriastradh	uint32_t sampler[4];
202403b705cfSriastradh	struct kgem_bo *bo[2];
202503b705cfSriastradh	unsigned int tex_count, n;
202603b705cfSriastradh	uint32_t ss2;
202703b705cfSriastradh
202803b705cfSriastradh	gen3_get_batch(sna, op);
202903b705cfSriastradh
203003b705cfSriastradh	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
203103b705cfSriastradh		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
203203b705cfSriastradh			OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
203303b705cfSriastradh		else
203403b705cfSriastradh			OUT_BATCH(_3DSTATE_MODES_5_CMD |
203503b705cfSriastradh				  PIPELINE_FLUSH_RENDER_CACHE |
203603b705cfSriastradh				  PIPELINE_FLUSH_TEXTURE_CACHE);
203703b705cfSriastradh		kgem_clear_dirty(&sna->kgem);
203803b705cfSriastradh	}
203903b705cfSriastradh
204003b705cfSriastradh	gen3_emit_target(sna,
204103b705cfSriastradh			 op->dst.bo,
204203b705cfSriastradh			 op->dst.width,
204303b705cfSriastradh			 op->dst.height,
204403b705cfSriastradh			 op->dst.format);
204503b705cfSriastradh
204603b705cfSriastradh	ss2 = ~0;
204703b705cfSriastradh	tex_count = 0;
204803b705cfSriastradh	switch (op->src.u.gen3.type) {
204903b705cfSriastradh	case SHADER_OPACITY:
205003b705cfSriastradh	case SHADER_NONE:
205103b705cfSriastradh		assert(0);
205203b705cfSriastradh	case SHADER_ZERO:
205303b705cfSriastradh	case SHADER_BLACK:
205403b705cfSriastradh	case SHADER_WHITE:
205503b705cfSriastradh		break;
205603b705cfSriastradh	case SHADER_CONSTANT:
205703b705cfSriastradh		if (op->src.u.gen3.mode != state->last_diffuse) {
205803b705cfSriastradh			OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
205903b705cfSriastradh			OUT_BATCH(op->src.u.gen3.mode);
206003b705cfSriastradh			state->last_diffuse = op->src.u.gen3.mode;
206103b705cfSriastradh		}
206203b705cfSriastradh		break;
206303b705cfSriastradh	case SHADER_LINEAR:
206403b705cfSriastradh	case SHADER_RADIAL:
206503b705cfSriastradh	case SHADER_TEXTURE:
206603b705cfSriastradh		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
206703b705cfSriastradh		ss2 |= S2_TEXCOORD_FMT(tex_count,
206803b705cfSriastradh				       op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
206903b705cfSriastradh		assert(op->src.card_format);
207003b705cfSriastradh		map[tex_count * 2 + 0] =
207103b705cfSriastradh			op->src.card_format |
207203b705cfSriastradh			gen3_ms_tiling(op->src.bo->tiling) |
207303b705cfSriastradh			(op->src.height - 1) << MS3_HEIGHT_SHIFT |
207403b705cfSriastradh			(op->src.width - 1) << MS3_WIDTH_SHIFT;
207503b705cfSriastradh		map[tex_count * 2 + 1] =
207603b705cfSriastradh			(op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
207703b705cfSriastradh
207803b705cfSriastradh		sampler[tex_count * 2 + 0] = op->src.filter;
207903b705cfSriastradh		sampler[tex_count * 2 + 1] =
208003b705cfSriastradh			op->src.repeat |
208103b705cfSriastradh			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
208203b705cfSriastradh		bo[tex_count] = op->src.bo;
208303b705cfSriastradh		tex_count++;
208403b705cfSriastradh		break;
208503b705cfSriastradh	}
208603b705cfSriastradh	switch (op->mask.u.gen3.type) {
208703b705cfSriastradh	case SHADER_NONE:
208803b705cfSriastradh	case SHADER_ZERO:
208903b705cfSriastradh	case SHADER_BLACK:
209003b705cfSriastradh	case SHADER_WHITE:
209103b705cfSriastradh		break;
209203b705cfSriastradh	case SHADER_CONSTANT:
209303b705cfSriastradh		if (op->mask.u.gen3.mode != state->last_specular) {
209403b705cfSriastradh			OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
209503b705cfSriastradh			OUT_BATCH(op->mask.u.gen3.mode);
209603b705cfSriastradh			state->last_specular = op->mask.u.gen3.mode;
209703b705cfSriastradh		}
209803b705cfSriastradh		break;
209903b705cfSriastradh	case SHADER_LINEAR:
210003b705cfSriastradh	case SHADER_RADIAL:
210103b705cfSriastradh	case SHADER_TEXTURE:
210203b705cfSriastradh		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
210303b705cfSriastradh		ss2 |= S2_TEXCOORD_FMT(tex_count,
210403b705cfSriastradh				       op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
210503b705cfSriastradh		assert(op->mask.card_format);
210603b705cfSriastradh		map[tex_count * 2 + 0] =
210703b705cfSriastradh			op->mask.card_format |
210803b705cfSriastradh			gen3_ms_tiling(op->mask.bo->tiling) |
210903b705cfSriastradh			(op->mask.height - 1) << MS3_HEIGHT_SHIFT |
211003b705cfSriastradh			(op->mask.width - 1) << MS3_WIDTH_SHIFT;
211103b705cfSriastradh		map[tex_count * 2 + 1] =
211203b705cfSriastradh			(op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
211303b705cfSriastradh
211403b705cfSriastradh		sampler[tex_count * 2 + 0] = op->mask.filter;
211503b705cfSriastradh		sampler[tex_count * 2 + 1] =
211603b705cfSriastradh			op->mask.repeat |
211703b705cfSriastradh			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
211803b705cfSriastradh		bo[tex_count] = op->mask.bo;
211903b705cfSriastradh		tex_count++;
212003b705cfSriastradh		break;
212103b705cfSriastradh	case SHADER_OPACITY:
212203b705cfSriastradh		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
212303b705cfSriastradh		ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
212403b705cfSriastradh		break;
212503b705cfSriastradh	}
212603b705cfSriastradh
212703b705cfSriastradh	{
212803b705cfSriastradh		uint32_t blend_offset = sna->kgem.nbatch;
212903b705cfSriastradh
213003b705cfSriastradh		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
213103b705cfSriastradh		OUT_BATCH(ss2);
213203b705cfSriastradh		OUT_BATCH(gen3_get_blend_cntl(op->op,
213303b705cfSriastradh					      op->has_component_alpha,
213403b705cfSriastradh					      op->dst.format));
213503b705cfSriastradh
213603b705cfSriastradh		if (memcmp(sna->kgem.batch + state->last_blend + 1,
213703b705cfSriastradh			   sna->kgem.batch + blend_offset + 1,
213803b705cfSriastradh			   2 * 4) == 0)
213903b705cfSriastradh			sna->kgem.nbatch = blend_offset;
214003b705cfSriastradh		else
214103b705cfSriastradh			state->last_blend = blend_offset;
214203b705cfSriastradh	}
214303b705cfSriastradh
214403b705cfSriastradh	if (op->u.gen3.num_constants) {
214503b705cfSriastradh		int count = op->u.gen3.num_constants;
214603b705cfSriastradh		if (state->last_constants) {
214703b705cfSriastradh			int last = sna->kgem.batch[state->last_constants+1];
214803b705cfSriastradh			if (last == (1 << (count >> 2)) - 1 &&
214903b705cfSriastradh			    memcmp(&sna->kgem.batch[state->last_constants+2],
215003b705cfSriastradh				   op->u.gen3.constants,
215103b705cfSriastradh				   count * sizeof(uint32_t)) == 0)
215203b705cfSriastradh				count = 0;
215303b705cfSriastradh		}
215403b705cfSriastradh		if (count) {
215503b705cfSriastradh			state->last_constants = sna->kgem.nbatch;
215603b705cfSriastradh			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
215703b705cfSriastradh			OUT_BATCH((1 << (count >> 2)) - 1);
215803b705cfSriastradh
215903b705cfSriastradh			memcpy(sna->kgem.batch + sna->kgem.nbatch,
216003b705cfSriastradh			       op->u.gen3.constants,
216103b705cfSriastradh			       count * sizeof(uint32_t));
216203b705cfSriastradh			sna->kgem.nbatch += count;
216303b705cfSriastradh		}
216403b705cfSriastradh	}
216503b705cfSriastradh
216603b705cfSriastradh	if (tex_count != 0) {
216703b705cfSriastradh		uint32_t rewind;
216803b705cfSriastradh
216903b705cfSriastradh		n = 0;
217003b705cfSriastradh		if (tex_count == state->tex_count) {
217103b705cfSriastradh			for (; n < tex_count; n++) {
217203b705cfSriastradh				if (map[2*n+0] != state->tex_map[2*n+0] ||
217303b705cfSriastradh				    map[2*n+1] != state->tex_map[2*n+1] ||
217403b705cfSriastradh				    state->tex_handle[n] != bo[n]->handle ||
217503b705cfSriastradh				    state->tex_delta[n] != bo[n]->delta)
217603b705cfSriastradh					break;
217703b705cfSriastradh			}
217803b705cfSriastradh		}
217903b705cfSriastradh		if (n < tex_count) {
218003b705cfSriastradh			OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
218103b705cfSriastradh			OUT_BATCH((1 << tex_count) - 1);
218203b705cfSriastradh			for (n = 0; n < tex_count; n++) {
218303b705cfSriastradh				OUT_BATCH(kgem_add_reloc(&sna->kgem,
218403b705cfSriastradh							 sna->kgem.nbatch,
218503b705cfSriastradh							 bo[n],
218603b705cfSriastradh							 I915_GEM_DOMAIN_SAMPLER<< 16,
218703b705cfSriastradh							 0));
218803b705cfSriastradh				OUT_BATCH(map[2*n + 0]);
218903b705cfSriastradh				OUT_BATCH(map[2*n + 1]);
219003b705cfSriastradh
219103b705cfSriastradh				state->tex_map[2*n+0] = map[2*n+0];
219203b705cfSriastradh				state->tex_map[2*n+1] = map[2*n+1];
219303b705cfSriastradh				state->tex_handle[n] = bo[n]->handle;
219403b705cfSriastradh				state->tex_delta[n] = bo[n]->delta;
219503b705cfSriastradh			}
219603b705cfSriastradh			state->tex_count = n;
219703b705cfSriastradh		}
219803b705cfSriastradh
219903b705cfSriastradh		rewind = sna->kgem.nbatch;
220003b705cfSriastradh		OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
220103b705cfSriastradh		OUT_BATCH((1 << tex_count) - 1);
220203b705cfSriastradh		for (n = 0; n < tex_count; n++) {
220303b705cfSriastradh			OUT_BATCH(sampler[2*n + 0]);
220403b705cfSriastradh			OUT_BATCH(sampler[2*n + 1]);
220503b705cfSriastradh			OUT_BATCH(0);
220603b705cfSriastradh		}
220703b705cfSriastradh		if (state->last_sampler &&
220803b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_sampler+1],
220903b705cfSriastradh			   &sna->kgem.batch[rewind + 1],
221003b705cfSriastradh			   (3*tex_count + 1)*sizeof(uint32_t)) == 0)
221103b705cfSriastradh			sna->kgem.nbatch = rewind;
221203b705cfSriastradh		else
221303b705cfSriastradh			state->last_sampler = rewind;
221403b705cfSriastradh	}
221503b705cfSriastradh
221603b705cfSriastradh	gen3_composite_emit_shader(sna, op, op->op);
221703b705cfSriastradh}
221803b705cfSriastradh
221903b705cfSriastradhstatic bool gen3_magic_ca_pass(struct sna *sna,
222003b705cfSriastradh			       const struct sna_composite_op *op)
222103b705cfSriastradh{
222203b705cfSriastradh	if (!op->need_magic_ca_pass)
222303b705cfSriastradh		return false;
222403b705cfSriastradh
222503b705cfSriastradh	DBG(("%s(%d)\n", __FUNCTION__,
222603b705cfSriastradh	     sna->render.vertex_index - sna->render.vertex_start));
222703b705cfSriastradh
222803b705cfSriastradh	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
222903b705cfSriastradh	OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
223003b705cfSriastradh	gen3_composite_emit_shader(sna, op, PictOpAdd);
223103b705cfSriastradh
223203b705cfSriastradh	OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
223303b705cfSriastradh		  (sna->render.vertex_index - sna->render.vertex_start));
223403b705cfSriastradh	OUT_BATCH(sna->render.vertex_start);
223503b705cfSriastradh
223603b705cfSriastradh	sna->render_state.gen3.last_blend = 0;
223703b705cfSriastradh	return true;
223803b705cfSriastradh}
223903b705cfSriastradh
224003b705cfSriastradhstatic void gen3_vertex_flush(struct sna *sna)
224103b705cfSriastradh{
224203b705cfSriastradh	assert(sna->render.vertex_offset);
224303b705cfSriastradh
224403b705cfSriastradh	DBG(("%s[%x] = %d\n", __FUNCTION__,
224503b705cfSriastradh	     4*sna->render.vertex_offset,
224603b705cfSriastradh	     sna->render.vertex_index - sna->render.vertex_start));
224703b705cfSriastradh
224803b705cfSriastradh	sna->kgem.batch[sna->render.vertex_offset] =
224903b705cfSriastradh		PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
225003b705cfSriastradh		(sna->render.vertex_index - sna->render.vertex_start);
225103b705cfSriastradh	sna->kgem.batch[sna->render.vertex_offset + 1] =
225203b705cfSriastradh		sna->render.vertex_start;
225303b705cfSriastradh
225403b705cfSriastradh	sna->render.vertex_offset = 0;
225503b705cfSriastradh}
225603b705cfSriastradh
225703b705cfSriastradhstatic int gen3_vertex_finish(struct sna *sna)
225803b705cfSriastradh{
225903b705cfSriastradh	struct kgem_bo *bo;
2260fe8aea9eSmrg	unsigned hint, size;
226103b705cfSriastradh
226203b705cfSriastradh	DBG(("%s: used=%d/%d, vbo active? %d\n",
226303b705cfSriastradh	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
226403b705cfSriastradh	     sna->render.vbo ? sna->render.vbo->handle : 0));
226503b705cfSriastradh	assert(sna->render.vertex_offset == 0);
226603b705cfSriastradh	assert(sna->render.vertex_used);
226703b705cfSriastradh	assert(sna->render.vertex_used <= sna->render.vertex_size);
226803b705cfSriastradh
226903b705cfSriastradh	sna_vertex_wait__locked(&sna->render);
227003b705cfSriastradh
2271fe8aea9eSmrg	hint = CREATE_GTT_MAP;
227203b705cfSriastradh	bo = sna->render.vbo;
227303b705cfSriastradh	if (bo) {
227403b705cfSriastradh		DBG(("%s: reloc = %d\n", __FUNCTION__,
227503b705cfSriastradh		     sna->render.vertex_reloc[0]));
227603b705cfSriastradh
227703b705cfSriastradh		if (sna->render.vertex_reloc[0]) {
227803b705cfSriastradh			sna->kgem.batch[sna->render.vertex_reloc[0]] =
227903b705cfSriastradh				kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
2280fe8aea9eSmrg					       bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, 0);
228103b705cfSriastradh
228203b705cfSriastradh			sna->render.vertex_reloc[0] = 0;
228303b705cfSriastradh		}
228403b705cfSriastradh		sna->render.vertex_used = 0;
228503b705cfSriastradh		sna->render.vertex_index = 0;
228603b705cfSriastradh		sna->render.vbo = NULL;
228703b705cfSriastradh
228803b705cfSriastradh		kgem_bo_destroy(&sna->kgem, bo);
2289fe8aea9eSmrg		hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
229003b705cfSriastradh	}
229103b705cfSriastradh
2292fe8aea9eSmrg	size = 256*1024;
229303b705cfSriastradh	sna->render.vertices = NULL;
2294fe8aea9eSmrg	sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
2295fe8aea9eSmrg	while (sna->render.vbo == NULL && size > sizeof(sna->render.vertex_data)) {
2296fe8aea9eSmrg		size /= 2;
2297fe8aea9eSmrg		sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
2298fe8aea9eSmrg	}
2299fe8aea9eSmrg	if (sna->render.vbo == NULL)
2300fe8aea9eSmrg		sna->render.vbo = kgem_create_linear(&sna->kgem,
2301fe8aea9eSmrg						     256*1024, CREATE_GTT_MAP);
2302fe8aea9eSmrg	if (sna->render.vbo &&
2303fe8aea9eSmrg	    kgem_check_bo(&sna->kgem, sna->render.vbo, NULL))
230403b705cfSriastradh		sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
230503b705cfSriastradh	if (sna->render.vertices == NULL) {
2306fe8aea9eSmrg		if (sna->render.vbo) {
230703b705cfSriastradh			kgem_bo_destroy(&sna->kgem, sna->render.vbo);
2308fe8aea9eSmrg			sna->render.vbo = NULL;
2309fe8aea9eSmrg		}
2310fe8aea9eSmrg		sna->render.vertices = sna->render.vertex_data;
2311fe8aea9eSmrg		sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
231203b705cfSriastradh		return 0;
231303b705cfSriastradh	}
231403b705cfSriastradh	assert(sna->render.vbo->snoop == false);
231503b705cfSriastradh
231603b705cfSriastradh	if (sna->render.vertex_used) {
231703b705cfSriastradh		memcpy(sna->render.vertices,
231803b705cfSriastradh		       sna->render.vertex_data,
231903b705cfSriastradh		       sizeof(float)*sna->render.vertex_used);
232003b705cfSriastradh	}
2321fe8aea9eSmrg
2322fe8aea9eSmrg	size = __kgem_bo_size(sna->render.vbo)/4;
2323fe8aea9eSmrg	if (size >= UINT16_MAX)
2324fe8aea9eSmrg		size = UINT16_MAX - 1;
2325fe8aea9eSmrg	assert(size > sna->render.vertex_used);
2326fe8aea9eSmrg
2327fe8aea9eSmrg	sna->render.vertex_size = size;
2328fe8aea9eSmrg	return size - sna->render.vertex_used;
232903b705cfSriastradh}
233003b705cfSriastradh
233103b705cfSriastradhstatic void gen3_vertex_close(struct sna *sna)
233203b705cfSriastradh{
233303b705cfSriastradh	struct kgem_bo *bo, *free_bo = NULL;
233403b705cfSriastradh	unsigned int delta = 0;
233503b705cfSriastradh
233603b705cfSriastradh	assert(sna->render.vertex_offset == 0);
233703b705cfSriastradh	if (sna->render.vertex_reloc[0] == 0)
233803b705cfSriastradh		return;
233903b705cfSriastradh
234003b705cfSriastradh	DBG(("%s: used=%d/%d, vbo active? %d\n",
234103b705cfSriastradh	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
234203b705cfSriastradh	     sna->render.vbo ? sna->render.vbo->handle : 0));
234303b705cfSriastradh
234403b705cfSriastradh	bo = sna->render.vbo;
234503b705cfSriastradh	if (bo) {
234603b705cfSriastradh		if (sna->render.vertex_size - sna->render.vertex_used < 64) {
234703b705cfSriastradh			DBG(("%s: discarding full vbo\n", __FUNCTION__));
234803b705cfSriastradh			sna->render.vbo = NULL;
234903b705cfSriastradh			sna->render.vertices = sna->render.vertex_data;
235003b705cfSriastradh			sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
235103b705cfSriastradh			free_bo = bo;
235242542f5fSchristos		} else if (sna->render.vertices == MAP(bo->map__cpu)) {
235303b705cfSriastradh			DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
235403b705cfSriastradh			sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
235503b705cfSriastradh			if (sna->render.vertices == NULL) {
235603b705cfSriastradh				DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
235703b705cfSriastradh				sna->render.vbo = NULL;
235803b705cfSriastradh				sna->render.vertices = sna->render.vertex_data;
235903b705cfSriastradh				sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
236003b705cfSriastradh				free_bo = bo;
236103b705cfSriastradh			}
236203b705cfSriastradh		}
236303b705cfSriastradh	} else {
236403b705cfSriastradh		if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
236503b705cfSriastradh			DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
236603b705cfSriastradh			     sna->render.vertex_used, sna->kgem.nbatch));
236703b705cfSriastradh			memcpy(sna->kgem.batch + sna->kgem.nbatch,
236803b705cfSriastradh			       sna->render.vertex_data,
236903b705cfSriastradh			       sna->render.vertex_used * 4);
237003b705cfSriastradh			delta = sna->kgem.nbatch * 4;
237103b705cfSriastradh			bo = NULL;
237203b705cfSriastradh			sna->kgem.nbatch += sna->render.vertex_used;
237303b705cfSriastradh		} else {
237403b705cfSriastradh			DBG(("%s: new vbo: %d\n", __FUNCTION__,
237503b705cfSriastradh			     sna->render.vertex_used));
237603b705cfSriastradh			bo = kgem_create_linear(&sna->kgem,
237703b705cfSriastradh						4*sna->render.vertex_used,
237803b705cfSriastradh						CREATE_NO_THROTTLE);
237903b705cfSriastradh			if (bo) {
238003b705cfSriastradh				assert(bo->snoop == false);
238103b705cfSriastradh				kgem_bo_write(&sna->kgem, bo,
238203b705cfSriastradh					      sna->render.vertex_data,
238303b705cfSriastradh					      4*sna->render.vertex_used);
238403b705cfSriastradh			}
238503b705cfSriastradh			free_bo = bo;
238603b705cfSriastradh		}
238703b705cfSriastradh	}
238803b705cfSriastradh
238903b705cfSriastradh	DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
239003b705cfSriastradh	sna->kgem.batch[sna->render.vertex_reloc[0]] =
239103b705cfSriastradh		kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
2392fe8aea9eSmrg			       bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, delta);
239303b705cfSriastradh	sna->render.vertex_reloc[0] = 0;
239403b705cfSriastradh
239503b705cfSriastradh	if (sna->render.vbo == NULL) {
239603b705cfSriastradh		DBG(("%s: resetting vbo\n", __FUNCTION__));
239703b705cfSriastradh		sna->render.vertex_used = 0;
239803b705cfSriastradh		sna->render.vertex_index = 0;
239903b705cfSriastradh		assert(sna->render.vertices == sna->render.vertex_data);
240003b705cfSriastradh		assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
240103b705cfSriastradh	}
240203b705cfSriastradh
240303b705cfSriastradh	if (free_bo)
240403b705cfSriastradh		kgem_bo_destroy(&sna->kgem, free_bo);
240503b705cfSriastradh}
240603b705cfSriastradh
240703b705cfSriastradhstatic bool gen3_rectangle_begin(struct sna *sna,
240803b705cfSriastradh				 const struct sna_composite_op *op)
240903b705cfSriastradh{
241003b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
241103b705cfSriastradh	int ndwords, i1_cmd = 0, i1_len = 0;
241203b705cfSriastradh
241303b705cfSriastradh	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
241403b705cfSriastradh		return true;
241503b705cfSriastradh
241603b705cfSriastradh	ndwords = 2;
241703b705cfSriastradh	if (op->need_magic_ca_pass)
241803b705cfSriastradh		ndwords += 100;
241903b705cfSriastradh	if (sna->render.vertex_reloc[0] == 0)
242003b705cfSriastradh		i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
242103b705cfSriastradh	if (state->floats_per_vertex != op->floats_per_vertex)
242203b705cfSriastradh		i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
242303b705cfSriastradh
242403b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, ndwords+1))
242503b705cfSriastradh		return false;
242603b705cfSriastradh
242703b705cfSriastradh	if (i1_cmd) {
242803b705cfSriastradh		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
242903b705cfSriastradh		if (sna->render.vertex_reloc[0] == 0)
243003b705cfSriastradh			sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
243103b705cfSriastradh		if (state->floats_per_vertex != op->floats_per_vertex) {
243203b705cfSriastradh			state->floats_per_vertex = op->floats_per_vertex;
243303b705cfSriastradh			OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
243403b705cfSriastradh				  state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
243503b705cfSriastradh		}
243603b705cfSriastradh	}
243703b705cfSriastradh
243803b705cfSriastradh	if (sna->kgem.nbatch == 2 + state->last_vertex_offset &&
243903b705cfSriastradh	    !op->need_magic_ca_pass) {
244003b705cfSriastradh		sna->render.vertex_offset = state->last_vertex_offset;
244103b705cfSriastradh	} else {
244203b705cfSriastradh		sna->render.vertex_offset = sna->kgem.nbatch;
244303b705cfSriastradh		OUT_BATCH(MI_NOOP); /* to be filled later */
244403b705cfSriastradh		OUT_BATCH(MI_NOOP);
244503b705cfSriastradh		sna->render.vertex_start = sna->render.vertex_index;
244603b705cfSriastradh		state->last_vertex_offset = sna->render.vertex_offset;
244703b705cfSriastradh	}
244803b705cfSriastradh
244903b705cfSriastradh	return true;
245003b705cfSriastradh}
245103b705cfSriastradh
245203b705cfSriastradhstatic int gen3_get_rectangles__flush(struct sna *sna,
245303b705cfSriastradh				      const struct sna_composite_op *op)
245403b705cfSriastradh{
245503b705cfSriastradh	/* Preventing discarding new vbo after lock contention */
245603b705cfSriastradh	if (sna_vertex_wait__locked(&sna->render)) {
245703b705cfSriastradh		int rem = vertex_space(sna);
245803b705cfSriastradh		if (rem > op->floats_per_rect)
245903b705cfSriastradh			return rem;
246003b705cfSriastradh	}
246103b705cfSriastradh
246203b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
246303b705cfSriastradh		return 0;
246403b705cfSriastradh	if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
246503b705cfSriastradh		return 0;
246603b705cfSriastradh
246703b705cfSriastradh	if (sna->render.vertex_offset) {
246803b705cfSriastradh		gen3_vertex_flush(sna);
246903b705cfSriastradh		if (gen3_magic_ca_pass(sna, op)) {
247003b705cfSriastradh			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
247103b705cfSriastradh			OUT_BATCH(gen3_get_blend_cntl(op->op,
247203b705cfSriastradh						      op->has_component_alpha,
247303b705cfSriastradh						      op->dst.format));
247403b705cfSriastradh			gen3_composite_emit_shader(sna, op, op->op);
247503b705cfSriastradh		}
247603b705cfSriastradh	}
247703b705cfSriastradh
247803b705cfSriastradh	return gen3_vertex_finish(sna);
247903b705cfSriastradh}
248003b705cfSriastradh
248103b705cfSriastradhinline static int gen3_get_rectangles(struct sna *sna,
248203b705cfSriastradh				      const struct sna_composite_op *op,
248303b705cfSriastradh				      int want)
248403b705cfSriastradh{
248503b705cfSriastradh	int rem;
248603b705cfSriastradh
248703b705cfSriastradh	DBG(("%s: want=%d, rem=%d\n",
248803b705cfSriastradh	     __FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
248903b705cfSriastradh
249003b705cfSriastradh	assert(want);
249103b705cfSriastradh	assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
249203b705cfSriastradh
249303b705cfSriastradhstart:
249403b705cfSriastradh	rem = vertex_space(sna);
249503b705cfSriastradh	if (unlikely(op->floats_per_rect > rem)) {
249603b705cfSriastradh		DBG(("flushing vbo for %s: %d < %d\n",
249703b705cfSriastradh		     __FUNCTION__, rem, op->floats_per_rect));
249803b705cfSriastradh		rem = gen3_get_rectangles__flush(sna, op);
249903b705cfSriastradh		if (unlikely(rem == 0))
250003b705cfSriastradh			goto flush;
250103b705cfSriastradh	}
250203b705cfSriastradh
250303b705cfSriastradh	if (unlikely(sna->render.vertex_offset == 0)) {
250403b705cfSriastradh		if (!gen3_rectangle_begin(sna, op))
250503b705cfSriastradh			goto flush;
250603b705cfSriastradh		else
250703b705cfSriastradh			goto start;
250803b705cfSriastradh	}
250903b705cfSriastradh
251003b705cfSriastradh	assert(rem <= vertex_space(sna));
251103b705cfSriastradh	assert(op->floats_per_rect <= rem);
251203b705cfSriastradh	if (want > 1 && want * op->floats_per_rect > rem)
251303b705cfSriastradh		want = rem / op->floats_per_rect;
251403b705cfSriastradh	sna->render.vertex_index += 3*want;
251503b705cfSriastradh
251603b705cfSriastradh	assert(want);
251703b705cfSriastradh	assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
251803b705cfSriastradh	return want;
251903b705cfSriastradh
252003b705cfSriastradhflush:
252103b705cfSriastradh	DBG(("%s: flushing batch\n", __FUNCTION__));
252203b705cfSriastradh	if (sna->render.vertex_offset) {
252303b705cfSriastradh		gen3_vertex_flush(sna);
252403b705cfSriastradh		gen3_magic_ca_pass(sna, op);
252503b705cfSriastradh	}
252603b705cfSriastradh	sna_vertex_wait__locked(&sna->render);
252703b705cfSriastradh	_kgem_submit(&sna->kgem);
252803b705cfSriastradh	gen3_emit_composite_state(sna, op);
252903b705cfSriastradh	assert(sna->render.vertex_offset == 0);
253003b705cfSriastradh	assert(sna->render.vertex_reloc[0] == 0);
253103b705cfSriastradh	goto start;
253203b705cfSriastradh}
253303b705cfSriastradh
253403b705cfSriastradhfastcall static void
253503b705cfSriastradhgen3_render_composite_blt(struct sna *sna,
253603b705cfSriastradh			  const struct sna_composite_op *op,
253703b705cfSriastradh			  const struct sna_composite_rectangles *r)
253803b705cfSriastradh{
253903b705cfSriastradh	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
254003b705cfSriastradh	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
254103b705cfSriastradh	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
254203b705cfSriastradh	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
254303b705cfSriastradh	     r->width, r->height));
254403b705cfSriastradh
254503b705cfSriastradh	gen3_get_rectangles(sna, op, 1);
254603b705cfSriastradh
254703b705cfSriastradh	op->prim_emit(sna, op, r);
254803b705cfSriastradh}
254903b705cfSriastradh
255003b705cfSriastradhfastcall static void
255103b705cfSriastradhgen3_render_composite_box(struct sna *sna,
255203b705cfSriastradh			  const struct sna_composite_op *op,
255303b705cfSriastradh			  const BoxRec *box)
255403b705cfSriastradh{
255503b705cfSriastradh	struct sna_composite_rectangles r;
255603b705cfSriastradh
255703b705cfSriastradh	DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
255803b705cfSriastradh	     __FUNCTION__,
255903b705cfSriastradh	     op->src.offset[0], op->src.offset[1],
256003b705cfSriastradh	     op->mask.offset[0], op->mask.offset[1],
256103b705cfSriastradh	     op->dst.x, op->dst.y));
256203b705cfSriastradh
256303b705cfSriastradh	gen3_get_rectangles(sna, op, 1);
256403b705cfSriastradh
256503b705cfSriastradh	r.dst.x  = box->x1;
256603b705cfSriastradh	r.dst.y  = box->y1;
256703b705cfSriastradh	r.width  = box->x2 - box->x1;
256803b705cfSriastradh	r.height = box->y2 - box->y1;
256903b705cfSriastradh	r.src = r.mask = r.dst;
257003b705cfSriastradh
257103b705cfSriastradh	op->prim_emit(sna, op, &r);
257203b705cfSriastradh}
257303b705cfSriastradh
257403b705cfSriastradhstatic void
257503b705cfSriastradhgen3_render_composite_boxes__blt(struct sna *sna,
257603b705cfSriastradh				 const struct sna_composite_op *op,
257703b705cfSriastradh				 const BoxRec *box, int nbox)
257803b705cfSriastradh{
257903b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
258003b705cfSriastradh	     __FUNCTION__, nbox,
258103b705cfSriastradh	     op->src.offset[0], op->src.offset[1],
258203b705cfSriastradh	     op->mask.offset[0], op->mask.offset[1],
258303b705cfSriastradh	     op->dst.x, op->dst.y));
258403b705cfSriastradh
258503b705cfSriastradh	do {
258603b705cfSriastradh		int nbox_this_time;
258703b705cfSriastradh
258803b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
258903b705cfSriastradh		nbox -= nbox_this_time;
259003b705cfSriastradh
259103b705cfSriastradh		do {
259203b705cfSriastradh			struct sna_composite_rectangles r;
259303b705cfSriastradh
259403b705cfSriastradh			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
259503b705cfSriastradh			     box->x1, box->y1,
259603b705cfSriastradh			     box->x2 - box->x1,
259703b705cfSriastradh			     box->y2 - box->y1));
259803b705cfSriastradh
259903b705cfSriastradh			r.dst.x  = box->x1; r.dst.y  = box->y1;
260003b705cfSriastradh			r.width = box->x2 - box->x1;
260103b705cfSriastradh			r.height = box->y2 - box->y1;
260203b705cfSriastradh			r.src = r.mask = r.dst;
260303b705cfSriastradh
260403b705cfSriastradh			op->prim_emit(sna, op, &r);
260503b705cfSriastradh			box++;
260603b705cfSriastradh		} while (--nbox_this_time);
260703b705cfSriastradh	} while (nbox);
260803b705cfSriastradh}
260903b705cfSriastradh
261003b705cfSriastradhstatic void
261103b705cfSriastradhgen3_render_composite_boxes(struct sna *sna,
261203b705cfSriastradh			    const struct sna_composite_op *op,
261303b705cfSriastradh			    const BoxRec *box, int nbox)
261403b705cfSriastradh{
261503b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
261603b705cfSriastradh
261703b705cfSriastradh	do {
261803b705cfSriastradh		int nbox_this_time;
261903b705cfSriastradh		float *v;
262003b705cfSriastradh
262103b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
262203b705cfSriastradh		assert(nbox_this_time);
262303b705cfSriastradh		nbox -= nbox_this_time;
262403b705cfSriastradh
262503b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
262603b705cfSriastradh		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
2627fe8aea9eSmrg		assert(sna->render.vertex_used <= sna->render.vertex_size);
262803b705cfSriastradh
262903b705cfSriastradh		op->emit_boxes(op, box, nbox_this_time, v);
263003b705cfSriastradh		box += nbox_this_time;
263103b705cfSriastradh	} while (nbox);
263203b705cfSriastradh}
263303b705cfSriastradh
263403b705cfSriastradhstatic void
263503b705cfSriastradhgen3_render_composite_boxes__thread(struct sna *sna,
263603b705cfSriastradh				    const struct sna_composite_op *op,
263703b705cfSriastradh				    const BoxRec *box, int nbox)
263803b705cfSriastradh{
263903b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
264003b705cfSriastradh
264103b705cfSriastradh	sna_vertex_lock(&sna->render);
264203b705cfSriastradh	do {
264303b705cfSriastradh		int nbox_this_time;
264403b705cfSriastradh		float *v;
264503b705cfSriastradh
264603b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
264703b705cfSriastradh		assert(nbox_this_time);
264803b705cfSriastradh		nbox -= nbox_this_time;
264903b705cfSriastradh
265003b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
265103b705cfSriastradh		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
2652fe8aea9eSmrg		assert(sna->render.vertex_used <= sna->render.vertex_size);
265303b705cfSriastradh
265403b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
265503b705cfSriastradh		sna_vertex_unlock(&sna->render);
265603b705cfSriastradh
265703b705cfSriastradh		op->emit_boxes(op, box, nbox_this_time, v);
265803b705cfSriastradh		box += nbox_this_time;
265903b705cfSriastradh
266003b705cfSriastradh		sna_vertex_lock(&sna->render);
266103b705cfSriastradh		sna_vertex_release__locked(&sna->render);
266203b705cfSriastradh	} while (nbox);
266303b705cfSriastradh	sna_vertex_unlock(&sna->render);
266403b705cfSriastradh}
266503b705cfSriastradh
266603b705cfSriastradhstatic void
266703b705cfSriastradhgen3_render_composite_done(struct sna *sna,
266803b705cfSriastradh			   const struct sna_composite_op *op)
266903b705cfSriastradh{
267003b705cfSriastradh	DBG(("%s()\n", __FUNCTION__));
267103b705cfSriastradh
267203b705cfSriastradh	if (sna->render.vertex_offset) {
267303b705cfSriastradh		gen3_vertex_flush(sna);
267403b705cfSriastradh		gen3_magic_ca_pass(sna, op);
267503b705cfSriastradh	}
267603b705cfSriastradh
267703b705cfSriastradh	if (op->mask.bo)
267803b705cfSriastradh		kgem_bo_destroy(&sna->kgem, op->mask.bo);
267903b705cfSriastradh	if (op->src.bo)
268003b705cfSriastradh		kgem_bo_destroy(&sna->kgem, op->src.bo);
268103b705cfSriastradh
268203b705cfSriastradh	sna_render_composite_redirect_done(sna, op);
268303b705cfSriastradh}
268403b705cfSriastradh
268503b705cfSriastradhstatic void
268603b705cfSriastradhdiscard_vbo(struct sna *sna)
268703b705cfSriastradh{
268803b705cfSriastradh	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
268903b705cfSriastradh	sna->render.vbo = NULL;
269003b705cfSriastradh	sna->render.vertices = sna->render.vertex_data;
269103b705cfSriastradh	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
269203b705cfSriastradh	sna->render.vertex_used = 0;
269303b705cfSriastradh	sna->render.vertex_index = 0;
269403b705cfSriastradh}
269503b705cfSriastradh
269603b705cfSriastradhstatic void
269703b705cfSriastradhgen3_render_reset(struct sna *sna)
269803b705cfSriastradh{
269903b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
270003b705cfSriastradh
270103b705cfSriastradh	state->need_invariant = true;
270203b705cfSriastradh	state->current_dst = 0;
270303b705cfSriastradh	state->tex_count = 0;
270403b705cfSriastradh	state->last_drawrect_limit = ~0U;
270503b705cfSriastradh	state->last_target = 0;
270603b705cfSriastradh	state->last_blend = 0;
270703b705cfSriastradh	state->last_constants = 0;
270803b705cfSriastradh	state->last_sampler = 0;
270903b705cfSriastradh	state->last_shader = 0x7fffffff;
271003b705cfSriastradh	state->last_diffuse = 0xcc00ffee;
271103b705cfSriastradh	state->last_specular = 0xcc00ffee;
271203b705cfSriastradh
271303b705cfSriastradh	state->floats_per_vertex = 0;
271403b705cfSriastradh	state->last_floats_per_vertex = 0;
271503b705cfSriastradh	state->last_vertex_offset = 0;
271603b705cfSriastradh
271742542f5fSchristos	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
271842542f5fSchristos		DBG(("%s: discarding vbo as next access will stall: %lx\n",
271942542f5fSchristos		     __FUNCTION__, (long)sna->render.vbo->presumed_offset));
272003b705cfSriastradh		discard_vbo(sna);
272103b705cfSriastradh	}
272203b705cfSriastradh
272303b705cfSriastradh	sna->render.vertex_reloc[0] = 0;
272403b705cfSriastradh	sna->render.vertex_offset = 0;
272503b705cfSriastradh}
272603b705cfSriastradh
272703b705cfSriastradhstatic void
272803b705cfSriastradhgen3_render_retire(struct kgem *kgem)
272903b705cfSriastradh{
273003b705cfSriastradh	struct sna *sna;
273103b705cfSriastradh
273203b705cfSriastradh	sna = container_of(kgem, struct sna, kgem);
273303b705cfSriastradh	if (sna->render.vertex_reloc[0] == 0 &&
273403b705cfSriastradh	    sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
273503b705cfSriastradh		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
273603b705cfSriastradh		sna->render.vertex_used = 0;
273703b705cfSriastradh		sna->render.vertex_index = 0;
273803b705cfSriastradh	}
273903b705cfSriastradh}
274003b705cfSriastradh
274103b705cfSriastradhstatic void
274203b705cfSriastradhgen3_render_expire(struct kgem *kgem)
274303b705cfSriastradh{
274403b705cfSriastradh	struct sna *sna;
274503b705cfSriastradh
274603b705cfSriastradh	sna = container_of(kgem, struct sna, kgem);
274703b705cfSriastradh	if (sna->render.vbo && !sna->render.vertex_used) {
274803b705cfSriastradh		DBG(("%s: discarding vbo\n", __FUNCTION__));
274903b705cfSriastradh		discard_vbo(sna);
275003b705cfSriastradh	}
275103b705cfSriastradh}
275203b705cfSriastradh
275303b705cfSriastradhstatic bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
275403b705cfSriastradh					      CARD32 format)
275503b705cfSriastradh{
275603b705cfSriastradh	unsigned int i;
275703b705cfSriastradh
275803b705cfSriastradh	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
275903b705cfSriastradh		if (gen3_tex_formats[i].fmt == format) {
276003b705cfSriastradh			channel->card_format = gen3_tex_formats[i].card_fmt;
276103b705cfSriastradh			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
276203b705cfSriastradh			return true;
276303b705cfSriastradh		}
276403b705cfSriastradh	}
276503b705cfSriastradh	return false;
276603b705cfSriastradh}
276703b705cfSriastradh
276803b705cfSriastradhstatic bool source_is_covered(PicturePtr picture,
276903b705cfSriastradh			      int x, int y,
277003b705cfSriastradh			      int width, int height)
277103b705cfSriastradh{
277203b705cfSriastradh	int x1, y1, x2, y2;
277303b705cfSriastradh
277403b705cfSriastradh	if (picture->repeat && picture->repeatType != RepeatNone)
277503b705cfSriastradh		return true;
277603b705cfSriastradh
277703b705cfSriastradh	if (picture->pDrawable == NULL)
277803b705cfSriastradh		return false;
277903b705cfSriastradh
278003b705cfSriastradh	if (picture->transform) {
278103b705cfSriastradh		pixman_box16_t sample;
278203b705cfSriastradh
278303b705cfSriastradh		sample.x1 = x;
278403b705cfSriastradh		sample.y1 = y;
278503b705cfSriastradh		sample.x2 = x + width;
278603b705cfSriastradh		sample.y2 = y + height;
278703b705cfSriastradh
278803b705cfSriastradh		pixman_transform_bounds(picture->transform, &sample);
278903b705cfSriastradh
279003b705cfSriastradh		x1 = sample.x1;
279103b705cfSriastradh		x2 = sample.x2;
279203b705cfSriastradh		y1 = sample.y1;
279303b705cfSriastradh		y2 = sample.y2;
279403b705cfSriastradh	} else {
279503b705cfSriastradh		x1 = x;
279603b705cfSriastradh		y1 = y;
279703b705cfSriastradh		x2 = x + width;
279803b705cfSriastradh		y2 = y + height;
279903b705cfSriastradh	}
280003b705cfSriastradh
280103b705cfSriastradh	return
280203b705cfSriastradh		x1 >= 0 && y1 >= 0 &&
280303b705cfSriastradh		x2 <= picture->pDrawable->width &&
280403b705cfSriastradh		y2 <= picture->pDrawable->height;
280503b705cfSriastradh}
280603b705cfSriastradh
280703b705cfSriastradhstatic bool gen3_composite_channel_set_xformat(PicturePtr picture,
280803b705cfSriastradh					       struct sna_composite_channel *channel,
280903b705cfSriastradh					       int x, int y,
281003b705cfSriastradh					       int width, int height)
281103b705cfSriastradh{
281203b705cfSriastradh	unsigned int i;
281303b705cfSriastradh
281403b705cfSriastradh	if (PICT_FORMAT_A(picture->format) != 0)
281503b705cfSriastradh		return false;
281603b705cfSriastradh
281703b705cfSriastradh	if (width == 0 || height == 0)
281803b705cfSriastradh		return false;
281903b705cfSriastradh
282003b705cfSriastradh	if (!source_is_covered(picture, x, y, width, height))
282103b705cfSriastradh		return false;
282203b705cfSriastradh
282303b705cfSriastradh	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
282403b705cfSriastradh		if (gen3_tex_formats[i].xfmt == picture->format) {
282503b705cfSriastradh			channel->card_format = gen3_tex_formats[i].card_fmt;
282603b705cfSriastradh			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
282703b705cfSriastradh			channel->alpha_fixup = true;
282803b705cfSriastradh			return true;
282903b705cfSriastradh		}
283003b705cfSriastradh	}
283103b705cfSriastradh
283203b705cfSriastradh	return false;
283303b705cfSriastradh}
283403b705cfSriastradh
283503b705cfSriastradhstatic int
283603b705cfSriastradhgen3_init_solid(struct sna_composite_channel *channel, uint32_t color)
283703b705cfSriastradh{
283803b705cfSriastradh	channel->u.gen3.mode = color;
283903b705cfSriastradh	channel->u.gen3.type = SHADER_CONSTANT;
284003b705cfSriastradh	if (color == 0)
284103b705cfSriastradh		channel->u.gen3.type = SHADER_ZERO;
284203b705cfSriastradh	else if (color == 0xff000000)
284303b705cfSriastradh		channel->u.gen3.type = SHADER_BLACK;
284403b705cfSriastradh	else if (color == 0xffffffff)
284503b705cfSriastradh		channel->u.gen3.type = SHADER_WHITE;
284603b705cfSriastradh
284703b705cfSriastradh	channel->bo = NULL;
284803b705cfSriastradh	channel->is_opaque = (color >> 24) == 0xff;
284903b705cfSriastradh	channel->is_affine = 1;
285003b705cfSriastradh	channel->alpha_fixup = 0;
285103b705cfSriastradh	channel->rb_reversed = 0;
285203b705cfSriastradh
285303b705cfSriastradh	DBG(("%s: color=%08x, is_opaque=%d, type=%d\n",
285403b705cfSriastradh	     __FUNCTION__, color, channel->is_opaque, channel->u.gen3.type));
285503b705cfSriastradh
285603b705cfSriastradh	/* for consistency */
285703b705cfSriastradh	channel->repeat = RepeatNormal;
285803b705cfSriastradh	channel->filter = PictFilterNearest;
285903b705cfSriastradh	channel->pict_format = PICT_a8r8g8b8;
286003b705cfSriastradh	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
286103b705cfSriastradh
286203b705cfSriastradh	return 1;
286303b705cfSriastradh}
286403b705cfSriastradh
286503b705cfSriastradhstatic void gen3_composite_channel_convert(struct sna_composite_channel *channel)
286603b705cfSriastradh{
286703b705cfSriastradh	if (channel->u.gen3.type == SHADER_TEXTURE)
286803b705cfSriastradh		channel->repeat = gen3_texture_repeat(channel->repeat);
286903b705cfSriastradh	else
287003b705cfSriastradh		channel->repeat = gen3_gradient_repeat(channel->repeat);
287103b705cfSriastradh
287203b705cfSriastradh	channel->filter = gen3_filter(channel->filter);
287303b705cfSriastradh	if (channel->card_format == 0)
287403b705cfSriastradh		gen3_composite_channel_set_format(channel, channel->pict_format);
287503b705cfSriastradh	assert(channel->card_format);
287603b705cfSriastradh}
287703b705cfSriastradh
287803b705cfSriastradhstatic bool gen3_gradient_setup(struct sna *sna,
287903b705cfSriastradh				PicturePtr picture,
288003b705cfSriastradh				struct sna_composite_channel *channel,
288103b705cfSriastradh				int16_t ox, int16_t oy)
288203b705cfSriastradh{
288303b705cfSriastradh	int16_t dx, dy;
288403b705cfSriastradh
288503b705cfSriastradh	if (picture->repeat == 0) {
288603b705cfSriastradh		channel->repeat = RepeatNone;
288703b705cfSriastradh	} else switch (picture->repeatType) {
288803b705cfSriastradh	case RepeatNone:
288903b705cfSriastradh	case RepeatNormal:
289003b705cfSriastradh	case RepeatPad:
289103b705cfSriastradh	case RepeatReflect:
289203b705cfSriastradh		channel->repeat = picture->repeatType;
289303b705cfSriastradh		break;
289403b705cfSriastradh	default:
289503b705cfSriastradh		return false;
289603b705cfSriastradh	}
289703b705cfSriastradh
289803b705cfSriastradh	channel->bo =
289903b705cfSriastradh		sna_render_get_gradient(sna,
290003b705cfSriastradh					(PictGradient *)picture->pSourcePict);
290103b705cfSriastradh	if (channel->bo == NULL)
290203b705cfSriastradh		return false;
290303b705cfSriastradh
290403b705cfSriastradh	channel->pict_format = PICT_a8r8g8b8;
290503b705cfSriastradh	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
290603b705cfSriastradh	channel->filter = PictFilterNearest;
290703b705cfSriastradh	channel->is_affine = sna_transform_is_affine(picture->transform);
290842542f5fSchristos	if (sna_transform_is_imprecise_integer_translation(picture->transform, PictFilterNearest, false, &dx, &dy)) {
290903b705cfSriastradh		DBG(("%s: integer translation (%d, %d), removing\n",
291003b705cfSriastradh		     __FUNCTION__, dx, dy));
291103b705cfSriastradh		ox += dx;
291203b705cfSriastradh		oy += dy;
291303b705cfSriastradh		channel->transform = NULL;
291403b705cfSriastradh	} else
291503b705cfSriastradh		channel->transform = picture->transform;
291603b705cfSriastradh	channel->width  = channel->bo->pitch / 4;
291703b705cfSriastradh	channel->height = 1;
291803b705cfSriastradh	channel->offset[0] = ox;
291903b705cfSriastradh	channel->offset[1] = oy;
292003b705cfSriastradh	channel->scale[0] = channel->scale[1] = 1;
292103b705cfSriastradh	return true;
292203b705cfSriastradh}
292303b705cfSriastradh
292403b705cfSriastradhstatic int
292503b705cfSriastradhgen3_init_linear(struct sna *sna,
292603b705cfSriastradh		 PicturePtr picture,
292703b705cfSriastradh		 struct sna_composite_op *op,
292803b705cfSriastradh		 struct sna_composite_channel *channel,
292903b705cfSriastradh		 int ox, int oy)
293003b705cfSriastradh{
293103b705cfSriastradh	PictLinearGradient *linear =
293203b705cfSriastradh		(PictLinearGradient *)picture->pSourcePict;
293303b705cfSriastradh	float x0, y0, sf;
293403b705cfSriastradh	float dx, dy, offset;
293503b705cfSriastradh	int n;
293603b705cfSriastradh
293703b705cfSriastradh	DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
293803b705cfSriastradh	     __FUNCTION__,
293903b705cfSriastradh	     xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
294003b705cfSriastradh	     xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
294103b705cfSriastradh
294203b705cfSriastradh	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
294303b705cfSriastradh		return 0;
294403b705cfSriastradh
294503b705cfSriastradh	dx = xFixedToDouble(linear->p2.x - linear->p1.x);
294603b705cfSriastradh	dy = xFixedToDouble(linear->p2.y - linear->p1.y);
294703b705cfSriastradh	sf = dx*dx + dy*dy;
294803b705cfSriastradh	dx /= sf;
294903b705cfSriastradh	dy /= sf;
295003b705cfSriastradh
295103b705cfSriastradh	x0 = xFixedToDouble(linear->p1.x);
295203b705cfSriastradh	y0 = xFixedToDouble(linear->p1.y);
295303b705cfSriastradh	offset = dx*x0 + dy*y0;
295403b705cfSriastradh
295503b705cfSriastradh	n = op->u.gen3.num_constants;
295603b705cfSriastradh	channel->u.gen3.constants = FS_C0 + n / 4;
295703b705cfSriastradh	op->u.gen3.constants[n++] = dx;
295803b705cfSriastradh	op->u.gen3.constants[n++] = dy;
295903b705cfSriastradh	op->u.gen3.constants[n++] = -offset;
296003b705cfSriastradh	op->u.gen3.constants[n++] = 0;
296103b705cfSriastradh
296203b705cfSriastradh	if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
296303b705cfSriastradh		return -1;
296403b705cfSriastradh
296503b705cfSriastradh	channel->u.gen3.type = SHADER_LINEAR;
296603b705cfSriastradh	op->u.gen3.num_constants = n;
296703b705cfSriastradh
296803b705cfSriastradh	DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n",
296903b705cfSriastradh	     __FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0));
297003b705cfSriastradh	return 1;
297103b705cfSriastradh}
297203b705cfSriastradh
297303b705cfSriastradhstatic int
297403b705cfSriastradhgen3_init_radial(struct sna *sna,
297503b705cfSriastradh		 PicturePtr picture,
297603b705cfSriastradh		 struct sna_composite_op *op,
297703b705cfSriastradh		 struct sna_composite_channel *channel,
297803b705cfSriastradh		 int ox, int oy)
297903b705cfSriastradh{
298003b705cfSriastradh	PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict;
298103b705cfSriastradh	double dx, dy, dr, r1;
298203b705cfSriastradh	int n;
298303b705cfSriastradh
298403b705cfSriastradh	dx = xFixedToDouble(radial->c2.x - radial->c1.x);
298503b705cfSriastradh	dy = xFixedToDouble(radial->c2.y - radial->c1.y);
298603b705cfSriastradh	dr = xFixedToDouble(radial->c2.radius - radial->c1.radius);
298703b705cfSriastradh
298803b705cfSriastradh	r1 = xFixedToDouble(radial->c1.radius);
298903b705cfSriastradh
299003b705cfSriastradh	n = op->u.gen3.num_constants;
299103b705cfSriastradh	channel->u.gen3.constants = FS_C0 + n / 4;
299203b705cfSriastradh	if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
299303b705cfSriastradh		if (radial->c2.radius == radial->c1.radius) {
299403b705cfSriastradh			channel->u.gen3.type = SHADER_ZERO;
299503b705cfSriastradh			return 1;
299603b705cfSriastradh		}
299703b705cfSriastradh
299803b705cfSriastradh		op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr;
299903b705cfSriastradh		op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr;
300003b705cfSriastradh		op->u.gen3.constants[n++] = 1. / dr;
300103b705cfSriastradh		op->u.gen3.constants[n++] = -r1 / dr;
300203b705cfSriastradh
300303b705cfSriastradh		channel->u.gen3.mode = RADIAL_ONE;
300403b705cfSriastradh	} else {
300503b705cfSriastradh		op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x);
300603b705cfSriastradh		op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y);
300703b705cfSriastradh		op->u.gen3.constants[n++] = r1;
300803b705cfSriastradh		op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr);
300903b705cfSriastradh
301003b705cfSriastradh		op->u.gen3.constants[n++] = -2 * dx;
301103b705cfSriastradh		op->u.gen3.constants[n++] = -2 * dy;
301203b705cfSriastradh		op->u.gen3.constants[n++] = -2 * r1 * dr;
301303b705cfSriastradh		op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
301403b705cfSriastradh
301503b705cfSriastradh		channel->u.gen3.mode = RADIAL_TWO;
301603b705cfSriastradh	}
301703b705cfSriastradh
301803b705cfSriastradh	if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
301903b705cfSriastradh		return -1;
302003b705cfSriastradh
302103b705cfSriastradh	channel->u.gen3.type = SHADER_RADIAL;
302203b705cfSriastradh	op->u.gen3.num_constants = n;
302303b705cfSriastradh	return 1;
302403b705cfSriastradh}
302503b705cfSriastradh
302603b705cfSriastradhstatic bool
302703b705cfSriastradhsna_picture_is_clear(PicturePtr picture,
302803b705cfSriastradh		     int x, int y, int w, int h,
302903b705cfSriastradh		     uint32_t *color)
303003b705cfSriastradh{
303103b705cfSriastradh	struct sna_pixmap *priv;
303203b705cfSriastradh
303303b705cfSriastradh	if (!picture->pDrawable)
303403b705cfSriastradh		return false;
303503b705cfSriastradh
303603b705cfSriastradh	priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable));
303703b705cfSriastradh	if (priv == NULL || !priv->clear)
303803b705cfSriastradh		return false;
303903b705cfSriastradh
304003b705cfSriastradh	if (!source_is_covered(picture, x, y, w, h))
304103b705cfSriastradh		return false;
304203b705cfSriastradh
304303b705cfSriastradh	*color = priv->clear_color;
304403b705cfSriastradh	return true;
304503b705cfSriastradh}
304603b705cfSriastradh
304703b705cfSriastradhstatic int
304803b705cfSriastradhgen3_composite_picture(struct sna *sna,
304903b705cfSriastradh		       PicturePtr picture,
305003b705cfSriastradh		       struct sna_composite_op *op,
305103b705cfSriastradh		       struct sna_composite_channel *channel,
305203b705cfSriastradh		       int16_t x, int16_t y,
305303b705cfSriastradh		       int16_t w, int16_t h,
305403b705cfSriastradh		       int16_t dst_x, int16_t dst_y,
305503b705cfSriastradh		       bool precise)
305603b705cfSriastradh{
305703b705cfSriastradh	PixmapPtr pixmap;
305803b705cfSriastradh	uint32_t color;
305903b705cfSriastradh	int16_t dx, dy;
306003b705cfSriastradh
306103b705cfSriastradh	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
306203b705cfSriastradh	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
306303b705cfSriastradh
306403b705cfSriastradh	channel->card_format = 0;
306503b705cfSriastradh
306603b705cfSriastradh	if (picture->pDrawable == NULL) {
306703b705cfSriastradh		SourcePict *source = picture->pSourcePict;
306803b705cfSriastradh		int ret = -1;
306903b705cfSriastradh
307003b705cfSriastradh		switch (source->type) {
307103b705cfSriastradh		case SourcePictTypeSolidFill:
307203b705cfSriastradh			DBG(("%s: solid fill [%08x], format %08x\n",
307303b705cfSriastradh			     __FUNCTION__,
307403b705cfSriastradh			     (unsigned)source->solidFill.color,
307503b705cfSriastradh			     (unsigned)picture->format));
307603b705cfSriastradh			ret = gen3_init_solid(channel, source->solidFill.color);
307703b705cfSriastradh			break;
307803b705cfSriastradh
307903b705cfSriastradh		case SourcePictTypeLinear:
308003b705cfSriastradh			ret = gen3_init_linear(sna, picture, op, channel,
308103b705cfSriastradh					       x - dst_x, y - dst_y);
308203b705cfSriastradh			break;
308303b705cfSriastradh
308403b705cfSriastradh		case SourcePictTypeRadial:
308503b705cfSriastradh			ret = gen3_init_radial(sna, picture, op, channel,
308603b705cfSriastradh					       x - dst_x, y - dst_y);
308703b705cfSriastradh			break;
308803b705cfSriastradh		}
308903b705cfSriastradh
309003b705cfSriastradh		if (ret == -1) {
309103b705cfSriastradh			if (!precise)
309203b705cfSriastradh				ret = sna_render_picture_approximate_gradient(sna, picture, channel,
309303b705cfSriastradh									      x, y, w, h, dst_x, dst_y);
309403b705cfSriastradh			if (ret == -1)
309503b705cfSriastradh				ret = sna_render_picture_fixup(sna, picture, channel,
309603b705cfSriastradh							       x, y, w, h, dst_x, dst_y);
309703b705cfSriastradh		}
309803b705cfSriastradh		return ret;
309903b705cfSriastradh	}
310003b705cfSriastradh
310103b705cfSriastradh	if (picture->alphaMap) {
310203b705cfSriastradh		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
310303b705cfSriastradh		return sna_render_picture_fixup(sna, picture, channel,
310403b705cfSriastradh						x, y, w, h, dst_x, dst_y);
310503b705cfSriastradh	}
310603b705cfSriastradh
310703b705cfSriastradh	if (sna_picture_is_solid(picture, &color)) {
310803b705cfSriastradh		DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
310903b705cfSriastradh		return gen3_init_solid(channel, color);
311003b705cfSriastradh	}
311103b705cfSriastradh
311203b705cfSriastradh	if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
311303b705cfSriastradh		DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
3114fe8aea9eSmrg		return gen3_init_solid(channel, solid_color(picture->format, color));
311503b705cfSriastradh	}
311603b705cfSriastradh
311703b705cfSriastradh	if (!gen3_check_repeat(picture))
311803b705cfSriastradh		return sna_render_picture_fixup(sna, picture, channel,
311903b705cfSriastradh						x, y, w, h, dst_x, dst_y);
312003b705cfSriastradh
312103b705cfSriastradh	if (!gen3_check_filter(picture))
312203b705cfSriastradh		return sna_render_picture_fixup(sna, picture, channel,
312303b705cfSriastradh						x, y, w, h, dst_x, dst_y);
312403b705cfSriastradh
312503b705cfSriastradh	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
312603b705cfSriastradh	channel->filter = picture->filter;
312703b705cfSriastradh	channel->pict_format = picture->format;
312803b705cfSriastradh
312903b705cfSriastradh	pixmap = get_drawable_pixmap(picture->pDrawable);
313003b705cfSriastradh	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
313103b705cfSriastradh
313203b705cfSriastradh	x += dx + picture->pDrawable->x;
313303b705cfSriastradh	y += dy + picture->pDrawable->y;
313403b705cfSriastradh
313542542f5fSchristos	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
313603b705cfSriastradh		DBG(("%s: integer translation (%d, %d), removing\n",
313703b705cfSriastradh		     __FUNCTION__, dx, dy));
313803b705cfSriastradh		x += dx;
313903b705cfSriastradh		y += dy;
314003b705cfSriastradh		channel->transform = NULL;
314103b705cfSriastradh		channel->filter = PictFilterNearest;
314242542f5fSchristos
314342542f5fSchristos		if (channel->repeat ||
314442542f5fSchristos		    (x >= 0 &&
314542542f5fSchristos		     y >= 0 &&
3146fe8aea9eSmrg		     x + w <= pixmap->drawable.width &&
3147fe8aea9eSmrg		     y + h <= pixmap->drawable.height)) {
314842542f5fSchristos			struct sna_pixmap *priv = sna_pixmap(pixmap);
314942542f5fSchristos			if (priv && priv->clear) {
315042542f5fSchristos				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
3151fe8aea9eSmrg				return gen3_init_solid(channel, solid_color(picture->format, priv->clear_color));
315242542f5fSchristos			}
315342542f5fSchristos		}
315403b705cfSriastradh	} else {
315503b705cfSriastradh		channel->transform = picture->transform;
315603b705cfSriastradh		channel->is_affine = sna_transform_is_affine(picture->transform);
315703b705cfSriastradh	}
315803b705cfSriastradh
315903b705cfSriastradh	if (!gen3_composite_channel_set_format(channel, picture->format) &&
316003b705cfSriastradh	    !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
316103b705cfSriastradh		return sna_render_picture_convert(sna, picture, channel, pixmap,
316203b705cfSriastradh						  x, y, w, h, dst_x, dst_y,
316303b705cfSriastradh						  false);
316403b705cfSriastradh	assert(channel->card_format);
316503b705cfSriastradh
316603b705cfSriastradh	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
316703b705cfSriastradh		DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
316803b705cfSriastradh		     __FUNCTION__,
316903b705cfSriastradh		     pixmap->drawable.width, pixmap->drawable.height,
317003b705cfSriastradh		     x, y, w, h));
317103b705cfSriastradh		return sna_render_picture_extract(sna, picture, channel,
317203b705cfSriastradh						  x, y, w, h, dst_x, dst_y);
317303b705cfSriastradh	}
317403b705cfSriastradh
317503b705cfSriastradh	return sna_render_pixmap_bo(sna, channel, pixmap,
317603b705cfSriastradh				    x, y, w, h, dst_x, dst_y);
317703b705cfSriastradh}
317803b705cfSriastradh
317942542f5fSchristosstatic void
318042542f5fSchristosgen3_align_vertex(struct sna *sna,
318142542f5fSchristos		  const struct sna_composite_op *op)
318203b705cfSriastradh{
318342542f5fSchristos	int vertex_index;
318403b705cfSriastradh
318542542f5fSchristos	if (op->floats_per_vertex == sna->render_state.gen3.last_floats_per_vertex)
318642542f5fSchristos		return;
318703b705cfSriastradh
318842542f5fSchristos	DBG(("aligning vertex: was %d, now %d floats per vertex\n",
318942542f5fSchristos	     sna->render_state.gen3.last_floats_per_vertex,
319042542f5fSchristos	     op->floats_per_vertex));
319103b705cfSriastradh
319242542f5fSchristos	assert(op->floats_per_rect == 3*op->floats_per_vertex);
319303b705cfSriastradh
319442542f5fSchristos	vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
319542542f5fSchristos	if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) {
319642542f5fSchristos		DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n",
319742542f5fSchristos		     __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex));
319842542f5fSchristos		if (gen3_vertex_finish(sna) < 2*op->floats_per_vertex)
319942542f5fSchristos			kgem_submit(&sna->kgem);
320003b705cfSriastradh
320142542f5fSchristos		vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
320203b705cfSriastradh	}
320303b705cfSriastradh
320442542f5fSchristos	sna->render.vertex_index = vertex_index;
320542542f5fSchristos	sna->render.vertex_used = vertex_index * op->floats_per_vertex;
320603b705cfSriastradh}
320703b705cfSriastradh
320803b705cfSriastradhstatic bool
320903b705cfSriastradhgen3_composite_set_target(struct sna *sna,
321003b705cfSriastradh			  struct sna_composite_op *op,
321103b705cfSriastradh			  PicturePtr dst,
321242542f5fSchristos			  int x, int y, int w, int h,
321342542f5fSchristos			  bool partial)
321403b705cfSriastradh{
321503b705cfSriastradh	BoxRec box;
321642542f5fSchristos	unsigned hint;
321703b705cfSriastradh
321803b705cfSriastradh	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
321903b705cfSriastradh	op->dst.format = dst->format;
322003b705cfSriastradh	op->dst.width = op->dst.pixmap->drawable.width;
322103b705cfSriastradh	op->dst.height = op->dst.pixmap->drawable.height;
322203b705cfSriastradh
322303b705cfSriastradh	if (w && h) {
322403b705cfSriastradh		box.x1 = x;
322503b705cfSriastradh		box.y1 = y;
322603b705cfSriastradh		box.x2 = x + w;
322703b705cfSriastradh		box.y2 = y + h;
322803b705cfSriastradh	} else
322903b705cfSriastradh		sna_render_picture_extents(dst, &box);
323003b705cfSriastradh
3231fe8aea9eSmrg	hint = PREFER_GPU | RENDER_GPU;
3232fe8aea9eSmrg	if (!need_tiling(sna, op->dst.width, op->dst.height))
3233fe8aea9eSmrg		hint |= FORCE_GPU;
323442542f5fSchristos	if (!partial) {
323542542f5fSchristos		hint |= IGNORE_DAMAGE;
323642542f5fSchristos		if (w == op->dst.width && h == op->dst.height)
323742542f5fSchristos			hint |= REPLACES;
323842542f5fSchristos	}
323942542f5fSchristos
324042542f5fSchristos	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
324103b705cfSriastradh	if (op->dst.bo == NULL)
324203b705cfSriastradh		return false;
324303b705cfSriastradh
324442542f5fSchristos	if (hint & REPLACES) {
324542542f5fSchristos		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
324642542f5fSchristos		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
324742542f5fSchristos	}
324842542f5fSchristos
324903b705cfSriastradh	assert(op->dst.bo->unique_id);
325003b705cfSriastradh
325103b705cfSriastradh	/* For single-stream mode there should be no minimum alignment
325203b705cfSriastradh	 * required, except that the width must be at least 2 elements.
325342542f5fSchristos	 * Furthermore, it appears that the pitch must be a multiple of
325442542f5fSchristos	 * 2 elements.
325503b705cfSriastradh	 */
325642542f5fSchristos	if (op->dst.bo->pitch & ((2*op->dst.pixmap->drawable.bitsPerPixel >> 3) - 1))
325742542f5fSchristos		return false;
325803b705cfSriastradh
325903b705cfSriastradh	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
326003b705cfSriastradh			    &op->dst.x, &op->dst.y);
326103b705cfSriastradh
326242542f5fSchristos	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
326303b705cfSriastradh	     __FUNCTION__,
326442542f5fSchristos	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
326503b705cfSriastradh	     op->dst.width, op->dst.height,
326603b705cfSriastradh	     op->dst.bo->pitch,
326703b705cfSriastradh	     op->dst.x, op->dst.y,
326803b705cfSriastradh	     op->damage ? *op->damage : (void *)-1));
326903b705cfSriastradh
327003b705cfSriastradh	assert(op->dst.bo->proxy == NULL);
327142542f5fSchristos
327242542f5fSchristos	if ((too_large(op->dst.width, op->dst.height) ||
327342542f5fSchristos	     !gen3_check_pitch_3d(op->dst.bo)) &&
327442542f5fSchristos	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
327542542f5fSchristos		return false;
327642542f5fSchristos
327703b705cfSriastradh	return true;
327803b705cfSriastradh}
327903b705cfSriastradh
328003b705cfSriastradhstatic inline uint8_t
328103b705cfSriastradhmul_8_8(uint8_t a, uint8_t b)
328203b705cfSriastradh{
328303b705cfSriastradh    uint16_t t = a * (uint16_t)b + 0x7f;
328403b705cfSriastradh    return ((t >> 8) + t) >> 8;
328503b705cfSriastradh}
328603b705cfSriastradh
328703b705cfSriastradhstatic inline uint32_t multa(uint32_t s, uint32_t m, int shift)
328803b705cfSriastradh{
328903b705cfSriastradh	return mul_8_8((s >> shift) & 0xff, m >> 24) << shift;
329003b705cfSriastradh}
329103b705cfSriastradh
329203b705cfSriastradhstatic inline bool is_constant_ps(uint32_t type)
329303b705cfSriastradh{
329403b705cfSriastradh	switch (type) {
329503b705cfSriastradh	case SHADER_NONE: /* be warned! */
329603b705cfSriastradh	case SHADER_ZERO:
329703b705cfSriastradh	case SHADER_BLACK:
329803b705cfSriastradh	case SHADER_WHITE:
329903b705cfSriastradh	case SHADER_CONSTANT:
330003b705cfSriastradh		return true;
330103b705cfSriastradh	default:
330203b705cfSriastradh		return false;
330303b705cfSriastradh	}
330403b705cfSriastradh}
330503b705cfSriastradh
330603b705cfSriastradhstatic bool
330703b705cfSriastradhhas_alphamap(PicturePtr p)
330803b705cfSriastradh{
330903b705cfSriastradh	return p->alphaMap != NULL;
331003b705cfSriastradh}
331103b705cfSriastradh
331203b705cfSriastradhstatic bool
331303b705cfSriastradhneed_upload(PicturePtr p)
331403b705cfSriastradh{
331503b705cfSriastradh	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
331603b705cfSriastradh}
331703b705cfSriastradh
331803b705cfSriastradhstatic bool
331903b705cfSriastradhsource_is_busy(PixmapPtr pixmap)
332003b705cfSriastradh{
332103b705cfSriastradh	struct sna_pixmap *priv = sna_pixmap(pixmap);
332203b705cfSriastradh	if (priv == NULL)
332303b705cfSriastradh		return false;
332403b705cfSriastradh
332503b705cfSriastradh	if (priv->clear)
332603b705cfSriastradh		return false;
332703b705cfSriastradh
332803b705cfSriastradh	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
332903b705cfSriastradh		return true;
333003b705cfSriastradh
333103b705cfSriastradh	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
333203b705cfSriastradh		return true;
333303b705cfSriastradh
333403b705cfSriastradh	return priv->gpu_damage && !priv->cpu_damage;
333503b705cfSriastradh}
333603b705cfSriastradh
333703b705cfSriastradhstatic bool
333803b705cfSriastradhis_unhandled_gradient(PicturePtr picture, bool precise)
333903b705cfSriastradh{
334003b705cfSriastradh	if (picture->pDrawable)
334103b705cfSriastradh		return false;
334203b705cfSriastradh
334303b705cfSriastradh	switch (picture->pSourcePict->type) {
334403b705cfSriastradh	case SourcePictTypeSolidFill:
334503b705cfSriastradh	case SourcePictTypeLinear:
334603b705cfSriastradh	case SourcePictTypeRadial:
334703b705cfSriastradh		return false;
334803b705cfSriastradh	default:
334903b705cfSriastradh		return precise;
335003b705cfSriastradh	}
335103b705cfSriastradh}
335203b705cfSriastradh
335303b705cfSriastradhstatic bool
335403b705cfSriastradhsource_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
335503b705cfSriastradh{
335603b705cfSriastradh	if (sna_picture_is_solid(p, NULL))
335703b705cfSriastradh		return false;
335803b705cfSriastradh
335903b705cfSriastradh	if (is_unhandled_gradient(p, precise))
336003b705cfSriastradh		return true;
336103b705cfSriastradh
336203b705cfSriastradh	if (!gen3_check_xformat(p) || !gen3_check_repeat(p))
336303b705cfSriastradh		return true;
336403b705cfSriastradh
336503b705cfSriastradh	if (pixmap && source_is_busy(pixmap))
336603b705cfSriastradh		return false;
336703b705cfSriastradh
336803b705cfSriastradh	return has_alphamap(p) || !gen3_check_filter(p) || need_upload(p);
336903b705cfSriastradh}
337003b705cfSriastradh
337103b705cfSriastradhstatic bool
337203b705cfSriastradhgen3_composite_fallback(struct sna *sna,
337303b705cfSriastradh			uint8_t op,
337403b705cfSriastradh			PicturePtr src,
337503b705cfSriastradh			PicturePtr mask,
337603b705cfSriastradh			PicturePtr dst)
337703b705cfSriastradh{
337803b705cfSriastradh	PixmapPtr src_pixmap;
337903b705cfSriastradh	PixmapPtr mask_pixmap;
338003b705cfSriastradh	PixmapPtr dst_pixmap;
338103b705cfSriastradh	bool src_fallback, mask_fallback;
338203b705cfSriastradh
338303b705cfSriastradh	if (!gen3_check_dst_format(dst->format)) {
338403b705cfSriastradh		DBG(("%s: unknown destination format: %d\n",
338503b705cfSriastradh		     __FUNCTION__, dst->format));
338603b705cfSriastradh		return true;
338703b705cfSriastradh	}
338803b705cfSriastradh
338903b705cfSriastradh	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
339003b705cfSriastradh
339103b705cfSriastradh	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
339203b705cfSriastradh	src_fallback = source_fallback(src, src_pixmap,
339303b705cfSriastradh				       dst->polyMode == PolyModePrecise);
339403b705cfSriastradh
339503b705cfSriastradh	if (mask) {
339603b705cfSriastradh		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
339703b705cfSriastradh		mask_fallback = source_fallback(mask, mask_pixmap,
339803b705cfSriastradh						dst->polyMode == PolyModePrecise);
339903b705cfSriastradh	} else {
340003b705cfSriastradh		mask_pixmap = NULL;
340103b705cfSriastradh		mask_fallback = false;
340203b705cfSriastradh	}
340303b705cfSriastradh
340403b705cfSriastradh	/* If we are using the destination as a source and need to
340503b705cfSriastradh	 * readback in order to upload the source, do it all
340603b705cfSriastradh	 * on the cpu.
340703b705cfSriastradh	 */
340803b705cfSriastradh	if (src_pixmap == dst_pixmap && src_fallback) {
340903b705cfSriastradh		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
341003b705cfSriastradh		return true;
341103b705cfSriastradh	}
341203b705cfSriastradh	if (mask_pixmap == dst_pixmap && mask_fallback) {
341303b705cfSriastradh		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
341403b705cfSriastradh		return true;
341503b705cfSriastradh	}
341603b705cfSriastradh
341703b705cfSriastradh	if (mask &&
341803b705cfSriastradh	    mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
341903b705cfSriastradh	    gen3_blend_op[op].src_alpha &&
342003b705cfSriastradh	    gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
342103b705cfSriastradh	    op != PictOpOver) {
342203b705cfSriastradh		DBG(("%s: component-alpha mask with op=%d, should fallback\n",
342303b705cfSriastradh		     __FUNCTION__, op));
342403b705cfSriastradh		return true;
342503b705cfSriastradh	}
342603b705cfSriastradh
342703b705cfSriastradh	/* If anything is on the GPU, push everything out to the GPU */
342803b705cfSriastradh	if (dst_use_gpu(dst_pixmap)) {
342903b705cfSriastradh		DBG(("%s: dst is already on the GPU, try to use GPU\n",
343003b705cfSriastradh		     __FUNCTION__));
343103b705cfSriastradh		return false;
343203b705cfSriastradh	}
343303b705cfSriastradh
343403b705cfSriastradh	if (src_pixmap && !src_fallback) {
343503b705cfSriastradh		DBG(("%s: src is already on the GPU, try to use GPU\n",
343603b705cfSriastradh		     __FUNCTION__));
343703b705cfSriastradh		return false;
343803b705cfSriastradh	}
343903b705cfSriastradh	if (mask_pixmap && !mask_fallback) {
344003b705cfSriastradh		DBG(("%s: mask is already on the GPU, try to use GPU\n",
344103b705cfSriastradh		     __FUNCTION__));
344203b705cfSriastradh		return false;
344303b705cfSriastradh	}
344403b705cfSriastradh
344503b705cfSriastradh	/* However if the dst is not on the GPU and we need to
344603b705cfSriastradh	 * render one of the sources using the CPU, we may
344703b705cfSriastradh	 * as well do the entire operation in place onthe CPU.
344803b705cfSriastradh	 */
344903b705cfSriastradh	if (src_fallback) {
345003b705cfSriastradh		DBG(("%s: dst is on the CPU and src will fallback\n",
345103b705cfSriastradh		     __FUNCTION__));
345203b705cfSriastradh		return true;
345303b705cfSriastradh	}
345403b705cfSriastradh
345503b705cfSriastradh	if (mask && mask_fallback) {
345603b705cfSriastradh		DBG(("%s: dst is on the CPU and mask will fallback\n",
345703b705cfSriastradh		     __FUNCTION__));
345803b705cfSriastradh		return true;
345903b705cfSriastradh	}
346003b705cfSriastradh
346103b705cfSriastradh	if (too_large(dst_pixmap->drawable.width,
346203b705cfSriastradh		      dst_pixmap->drawable.height) &&
346303b705cfSriastradh	    dst_is_cpu(dst_pixmap)) {
346403b705cfSriastradh		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
346503b705cfSriastradh		return true;
346603b705cfSriastradh	}
346703b705cfSriastradh
346803b705cfSriastradh	DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
346903b705cfSriastradh	     __FUNCTION__, dst_use_cpu(dst_pixmap)));
347003b705cfSriastradh	return dst_use_cpu(dst_pixmap);
347103b705cfSriastradh}
347203b705cfSriastradh
347303b705cfSriastradhstatic int
347403b705cfSriastradhreuse_source(struct sna *sna,
347503b705cfSriastradh	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
347603b705cfSriastradh	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
347703b705cfSriastradh{
347803b705cfSriastradh	if (src_x != msk_x || src_y != msk_y)
347903b705cfSriastradh		return false;
348003b705cfSriastradh
348103b705cfSriastradh	if (mask == src) {
348203b705cfSriastradh		*mc = *sc;
348303b705cfSriastradh		if (mc->bo)
348403b705cfSriastradh			kgem_bo_reference(mc->bo);
348503b705cfSriastradh		return true;
348603b705cfSriastradh	}
348703b705cfSriastradh
348803b705cfSriastradh	if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable))
348903b705cfSriastradh		return false;
349003b705cfSriastradh
349103b705cfSriastradh	if (sc->is_solid)
349203b705cfSriastradh		return false;
349303b705cfSriastradh
349403b705cfSriastradh	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
349503b705cfSriastradh
349603b705cfSriastradh	if (!sna_transform_equal(src->transform, mask->transform))
349703b705cfSriastradh		return false;
349803b705cfSriastradh
349903b705cfSriastradh	if (!sna_picture_alphamap_equal(src, mask))
350003b705cfSriastradh		return false;
350103b705cfSriastradh
350203b705cfSriastradh	if (!gen3_check_repeat(mask))
350303b705cfSriastradh		return false;
350403b705cfSriastradh
350503b705cfSriastradh	if (!gen3_check_filter(mask))
350603b705cfSriastradh		return false;
350703b705cfSriastradh
350803b705cfSriastradh	if (!gen3_check_format(mask))
350903b705cfSriastradh		return false;
351003b705cfSriastradh
351103b705cfSriastradh	DBG(("%s: reusing source channel for mask with a twist\n",
351203b705cfSriastradh	     __FUNCTION__));
351303b705cfSriastradh
351403b705cfSriastradh	*mc = *sc;
351503b705cfSriastradh	mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone);
351603b705cfSriastradh	mc->filter = gen3_filter(mask->filter);
351703b705cfSriastradh	mc->pict_format = mask->format;
351803b705cfSriastradh	gen3_composite_channel_set_format(mc, mask->format);
351903b705cfSriastradh	assert(mc->card_format);
352003b705cfSriastradh	if (mc->bo)
352103b705cfSriastradh		kgem_bo_reference(mc->bo);
352203b705cfSriastradh	return true;
352303b705cfSriastradh}
352403b705cfSriastradh
352503b705cfSriastradhstatic bool
352603b705cfSriastradhgen3_render_composite(struct sna *sna,
352703b705cfSriastradh		      uint8_t op,
352803b705cfSriastradh		      PicturePtr src,
352903b705cfSriastradh		      PicturePtr mask,
353003b705cfSriastradh		      PicturePtr dst,
353103b705cfSriastradh		      int16_t src_x,  int16_t src_y,
353203b705cfSriastradh		      int16_t mask_x, int16_t mask_y,
353303b705cfSriastradh		      int16_t dst_x,  int16_t dst_y,
353403b705cfSriastradh		      int16_t width,  int16_t height,
353542542f5fSchristos		      unsigned flags,
353603b705cfSriastradh		      struct sna_composite_op *tmp)
353703b705cfSriastradh{
353803b705cfSriastradh	DBG(("%s()\n", __FUNCTION__));
353903b705cfSriastradh
354003b705cfSriastradh	if (op >= ARRAY_SIZE(gen3_blend_op)) {
354103b705cfSriastradh		DBG(("%s: fallback due to unhandled blend op: %d\n",
354203b705cfSriastradh		     __FUNCTION__, op));
354303b705cfSriastradh		return false;
354403b705cfSriastradh	}
354503b705cfSriastradh
354603b705cfSriastradh	/* Try to use the BLT engine unless it implies a
354703b705cfSriastradh	 * 3D -> 2D context switch.
354803b705cfSriastradh	 */
354903b705cfSriastradh	if (mask == NULL &&
355003b705cfSriastradh	    sna_blt_composite(sna,
355103b705cfSriastradh			      op, src, dst,
355203b705cfSriastradh			      src_x, src_y,
355303b705cfSriastradh			      dst_x, dst_y,
355403b705cfSriastradh			      width, height,
355542542f5fSchristos			      flags, tmp))
355603b705cfSriastradh		return true;
355703b705cfSriastradh
355803b705cfSriastradh	if (gen3_composite_fallback(sna, op, src, mask, dst))
355942542f5fSchristos		goto fallback;
356003b705cfSriastradh
356103b705cfSriastradh	if (need_tiling(sna, width, height))
356203b705cfSriastradh		return sna_tiling_composite(op, src, mask, dst,
356303b705cfSriastradh					    src_x,  src_y,
356403b705cfSriastradh					    mask_x, mask_y,
356503b705cfSriastradh					    dst_x,  dst_y,
356603b705cfSriastradh					    width,  height,
356703b705cfSriastradh					    tmp);
356803b705cfSriastradh
356903b705cfSriastradh	if (!gen3_composite_set_target(sna, tmp, dst,
357042542f5fSchristos				       dst_x, dst_y, width, height,
357142542f5fSchristos				       flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
357203b705cfSriastradh		DBG(("%s: unable to set render target\n",
357303b705cfSriastradh		     __FUNCTION__));
357442542f5fSchristos		goto fallback;
357503b705cfSriastradh	}
357603b705cfSriastradh
357703b705cfSriastradh	tmp->op = op;
357803b705cfSriastradh	tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
357903b705cfSriastradh	tmp->u.gen3.num_constants = 0;
358003b705cfSriastradh	tmp->src.u.gen3.type = SHADER_TEXTURE;
358103b705cfSriastradh	tmp->src.is_affine = true;
358203b705cfSriastradh	DBG(("%s: preparing source\n", __FUNCTION__));
358303b705cfSriastradh	switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
358403b705cfSriastradh				       src_x, src_y,
358503b705cfSriastradh				       width, height,
358603b705cfSriastradh				       dst_x, dst_y,
358703b705cfSriastradh				       dst->polyMode == PolyModePrecise)) {
358803b705cfSriastradh	case -1:
358903b705cfSriastradh		goto cleanup_dst;
359003b705cfSriastradh	case 0:
359103b705cfSriastradh		tmp->src.u.gen3.type = SHADER_ZERO;
359203b705cfSriastradh		break;
359303b705cfSriastradh	case 1:
359403b705cfSriastradh		if (mask == NULL && tmp->src.bo &&
359503b705cfSriastradh		    sna_blt_composite__convert(sna,
359603b705cfSriastradh					       dst_x, dst_y, width, height,
359703b705cfSriastradh					       tmp))
359803b705cfSriastradh			return true;
359903b705cfSriastradh
360003b705cfSriastradh		gen3_composite_channel_convert(&tmp->src);
360103b705cfSriastradh		break;
360203b705cfSriastradh	}
360303b705cfSriastradh	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
360403b705cfSriastradh
360503b705cfSriastradh	tmp->mask.u.gen3.type = SHADER_NONE;
360603b705cfSriastradh	tmp->mask.is_affine = true;
360703b705cfSriastradh	tmp->need_magic_ca_pass = false;
360803b705cfSriastradh	tmp->has_component_alpha = false;
360903b705cfSriastradh	if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
361003b705cfSriastradh		if (!reuse_source(sna,
361103b705cfSriastradh				  src, &tmp->src, src_x, src_y,
361203b705cfSriastradh				  mask, &tmp->mask, mask_x, mask_y)) {
361303b705cfSriastradh			tmp->mask.u.gen3.type = SHADER_TEXTURE;
361403b705cfSriastradh			DBG(("%s: preparing mask\n", __FUNCTION__));
361503b705cfSriastradh			switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
361603b705cfSriastradh						       mask_x, mask_y,
361703b705cfSriastradh						       width,  height,
361803b705cfSriastradh						       dst_x,  dst_y,
361903b705cfSriastradh						       dst->polyMode == PolyModePrecise)) {
362003b705cfSriastradh			case -1:
362103b705cfSriastradh				goto cleanup_src;
362203b705cfSriastradh			case 0:
362303b705cfSriastradh				tmp->mask.u.gen3.type = SHADER_ZERO;
362403b705cfSriastradh				break;
362503b705cfSriastradh			case 1:
362603b705cfSriastradh				gen3_composite_channel_convert(&tmp->mask);
362703b705cfSriastradh				break;
362803b705cfSriastradh			}
362903b705cfSriastradh		}
363003b705cfSriastradh		DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
363103b705cfSriastradh		if (tmp->mask.u.gen3.type == SHADER_ZERO) {
363203b705cfSriastradh			if (tmp->src.bo) {
363303b705cfSriastradh				kgem_bo_destroy(&sna->kgem,
363403b705cfSriastradh						tmp->src.bo);
363503b705cfSriastradh				tmp->src.bo = NULL;
363603b705cfSriastradh			}
363703b705cfSriastradh			tmp->src.u.gen3.type = SHADER_ZERO;
363803b705cfSriastradh			tmp->mask.u.gen3.type = SHADER_NONE;
363903b705cfSriastradh		}
364003b705cfSriastradh
364103b705cfSriastradh		if (tmp->mask.u.gen3.type != SHADER_NONE) {
364203b705cfSriastradh			if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
364303b705cfSriastradh				/* Check if it's component alpha that relies on a source alpha
364403b705cfSriastradh				 * and on the source value.  We can only get one of those
364503b705cfSriastradh				 * into the single source value that we get to blend with.
364603b705cfSriastradh				 */
364703b705cfSriastradh				DBG(("%s: component-alpha mask: %d\n",
364803b705cfSriastradh				     __FUNCTION__, tmp->mask.u.gen3.type));
364903b705cfSriastradh				tmp->has_component_alpha = true;
365003b705cfSriastradh				if (tmp->mask.u.gen3.type == SHADER_WHITE) {
365103b705cfSriastradh					tmp->mask.u.gen3.type = SHADER_NONE;
365203b705cfSriastradh					tmp->has_component_alpha = false;
365303b705cfSriastradh				} else if (gen3_blend_op[op].src_alpha &&
365403b705cfSriastradh					   gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
365503b705cfSriastradh					if (op != PictOpOver)
365603b705cfSriastradh						goto cleanup_mask;
365703b705cfSriastradh
365803b705cfSriastradh					tmp->need_magic_ca_pass = true;
365903b705cfSriastradh					tmp->op = PictOpOutReverse;
366003b705cfSriastradh				}
366103b705cfSriastradh			} else {
366203b705cfSriastradh				if (tmp->mask.is_opaque) {
366303b705cfSriastradh					tmp->mask.u.gen3.type = SHADER_NONE;
366403b705cfSriastradh				} else if (is_constant_ps(tmp->src.u.gen3.type) &&
366503b705cfSriastradh					   is_constant_ps(tmp->mask.u.gen3.type)) {
366603b705cfSriastradh					uint32_t v;
366703b705cfSriastradh
366803b705cfSriastradh					v = multa(tmp->src.u.gen3.mode,
366903b705cfSriastradh						  tmp->mask.u.gen3.mode,
367003b705cfSriastradh						  24);
367103b705cfSriastradh					v |= multa(tmp->src.u.gen3.mode,
367203b705cfSriastradh						   tmp->mask.u.gen3.mode,
367303b705cfSriastradh						   16);
367403b705cfSriastradh					v |= multa(tmp->src.u.gen3.mode,
367503b705cfSriastradh						   tmp->mask.u.gen3.mode,
367603b705cfSriastradh						   8);
367703b705cfSriastradh					v |= multa(tmp->src.u.gen3.mode,
367803b705cfSriastradh						   tmp->mask.u.gen3.mode,
367903b705cfSriastradh						   0);
368003b705cfSriastradh
368103b705cfSriastradh					DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
368203b705cfSriastradh					     __FUNCTION__,
368303b705cfSriastradh					     tmp->src.u.gen3.mode,
368403b705cfSriastradh					     tmp->mask.u.gen3.mode,
368503b705cfSriastradh					     v));
368603b705cfSriastradh
368703b705cfSriastradh					tmp->src.u.gen3.type = SHADER_CONSTANT;
368803b705cfSriastradh					tmp->src.u.gen3.mode = v;
368903b705cfSriastradh					tmp->src.is_opaque = false;
369003b705cfSriastradh
369103b705cfSriastradh					tmp->mask.u.gen3.type = SHADER_NONE;
369203b705cfSriastradh				}
369303b705cfSriastradh			}
369403b705cfSriastradh		}
369503b705cfSriastradh	}
3696fe8aea9eSmrg	DBG(("%s: final src/mask type=%d/%d [constant? %d/%d], transform? %d/%d, affine=%d/%d\n", __FUNCTION__,
369703b705cfSriastradh	     tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
3698fe8aea9eSmrg	     is_constant_ps(tmp->src.u.gen3.type),
3699fe8aea9eSmrg	     is_constant_ps(tmp->mask.u.gen3.type),
3700fe8aea9eSmrg	     !!tmp->src.transform, !!tmp->mask.transform,
370103b705cfSriastradh	     tmp->src.is_affine, tmp->mask.is_affine));
370203b705cfSriastradh
370303b705cfSriastradh	tmp->prim_emit = gen3_emit_composite_primitive;
370403b705cfSriastradh	if (is_constant_ps(tmp->mask.u.gen3.type)) {
370503b705cfSriastradh		switch (tmp->src.u.gen3.type) {
370603b705cfSriastradh		case SHADER_NONE:
370703b705cfSriastradh		case SHADER_ZERO:
370803b705cfSriastradh		case SHADER_BLACK:
370903b705cfSriastradh		case SHADER_WHITE:
371003b705cfSriastradh		case SHADER_CONSTANT:
371103b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
371203b705cfSriastradh			if (sna->cpu_features & SSE2) {
371303b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2;
371403b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2;
371503b705cfSriastradh			} else
371603b705cfSriastradh#endif
371703b705cfSriastradh			{
371803b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_primitive_constant;
371903b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_boxes_constant;
372003b705cfSriastradh			}
372103b705cfSriastradh
372203b705cfSriastradh			break;
372303b705cfSriastradh		case SHADER_LINEAR:
372403b705cfSriastradh		case SHADER_RADIAL:
372503b705cfSriastradh			if (tmp->src.transform == NULL) {
372603b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
372703b705cfSriastradh				if (sna->cpu_features & SSE2) {
372803b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2;
372903b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2;
373003b705cfSriastradh				} else
373103b705cfSriastradh#endif
373203b705cfSriastradh				{
373303b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
373403b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
373503b705cfSriastradh				}
373603b705cfSriastradh			} else if (tmp->src.is_affine) {
373703b705cfSriastradh				tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2];
373803b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
373903b705cfSriastradh				if (sna->cpu_features & SSE2) {
374003b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2;
374103b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2;
374203b705cfSriastradh				} else
374303b705cfSriastradh#endif
374403b705cfSriastradh				{
374503b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
374603b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
374703b705cfSriastradh				}
374803b705cfSriastradh			}
374903b705cfSriastradh			break;
375003b705cfSriastradh		case SHADER_TEXTURE:
375103b705cfSriastradh			if (tmp->src.transform == NULL) {
375203b705cfSriastradh				if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
375303b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
375403b705cfSriastradh					if (sna->cpu_features & SSE2) {
375503b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2;
375603b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2;
375703b705cfSriastradh					} else
375803b705cfSriastradh#endif
375903b705cfSriastradh					{
376003b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
376103b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
376203b705cfSriastradh					}
376303b705cfSriastradh				} else {
376403b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
376503b705cfSriastradh					if (sna->cpu_features & SSE2) {
376603b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2;
376703b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2;
376803b705cfSriastradh					} else
376903b705cfSriastradh#endif
377003b705cfSriastradh					{
377103b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
377203b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
377303b705cfSriastradh					}
377403b705cfSriastradh				}
377503b705cfSriastradh			} else if (tmp->src.is_affine) {
377603b705cfSriastradh				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
377703b705cfSriastradh				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
377803b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
377903b705cfSriastradh				if (sna->cpu_features & SSE2) {
378003b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2;
378103b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2;
378203b705cfSriastradh				} else
378303b705cfSriastradh#endif
378403b705cfSriastradh				{
378503b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
378603b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
378703b705cfSriastradh				}
378803b705cfSriastradh			}
378903b705cfSriastradh			break;
379003b705cfSriastradh		}
379103b705cfSriastradh	} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
379203b705cfSriastradh		if (tmp->mask.transform == NULL) {
379303b705cfSriastradh			if (is_constant_ps(tmp->src.u.gen3.type)) {
379403b705cfSriastradh				if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
379503b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
379603b705cfSriastradh					if (sna->cpu_features & SSE2) {
379703b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2;
379803b705cfSriastradh					} else
379903b705cfSriastradh#endif
380003b705cfSriastradh					{
380103b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
380203b705cfSriastradh					}
380303b705cfSriastradh				} else {
380403b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
380503b705cfSriastradh					if (sna->cpu_features & SSE2) {
380603b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2;
380703b705cfSriastradh					} else
380803b705cfSriastradh#endif
380903b705cfSriastradh					{
381003b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
381103b705cfSriastradh					}
381203b705cfSriastradh				}
381303b705cfSriastradh			} else if (tmp->src.transform == NULL) {
381403b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
381503b705cfSriastradh				if (sna->cpu_features & SSE2) {
381603b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2;
381703b705cfSriastradh				} else
381803b705cfSriastradh#endif
381903b705cfSriastradh				{
382003b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
382103b705cfSriastradh				}
382203b705cfSriastradh			} else if (tmp->src.is_affine) {
382303b705cfSriastradh				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
382403b705cfSriastradh				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
382503b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
382603b705cfSriastradh				if (sna->cpu_features & SSE2) {
382703b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2;
382803b705cfSriastradh				} else
382903b705cfSriastradh#endif
383003b705cfSriastradh				{
383103b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
383203b705cfSriastradh				}
383303b705cfSriastradh			}
383403b705cfSriastradh		}
383503b705cfSriastradh	}
383603b705cfSriastradh
383703b705cfSriastradh	tmp->floats_per_vertex = 2;
383803b705cfSriastradh	if (!is_constant_ps(tmp->src.u.gen3.type))
383903b705cfSriastradh		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
384003b705cfSriastradh	if (!is_constant_ps(tmp->mask.u.gen3.type))
384103b705cfSriastradh		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
384203b705cfSriastradh	DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
384303b705cfSriastradh	     !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
384403b705cfSriastradh	     !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
384503b705cfSriastradh	     tmp->floats_per_vertex,
384603b705cfSriastradh	     tmp->prim_emit != gen3_emit_composite_primitive));
384703b705cfSriastradh	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
384803b705cfSriastradh
384903b705cfSriastradh	tmp->blt   = gen3_render_composite_blt;
385003b705cfSriastradh	tmp->box   = gen3_render_composite_box;
385103b705cfSriastradh	tmp->boxes = gen3_render_composite_boxes__blt;
385203b705cfSriastradh	if (tmp->emit_boxes) {
385303b705cfSriastradh		tmp->boxes = gen3_render_composite_boxes;
385403b705cfSriastradh		tmp->thread_boxes = gen3_render_composite_boxes__thread;
385503b705cfSriastradh	}
385603b705cfSriastradh	tmp->done  = gen3_render_composite_done;
385703b705cfSriastradh
385803b705cfSriastradh	if (!kgem_check_bo(&sna->kgem,
385903b705cfSriastradh			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
386003b705cfSriastradh			   NULL)) {
386103b705cfSriastradh		kgem_submit(&sna->kgem);
386203b705cfSriastradh		if (!kgem_check_bo(&sna->kgem,
386303b705cfSriastradh				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
386403b705cfSriastradh				   NULL))
386503b705cfSriastradh			goto cleanup_mask;
386603b705cfSriastradh	}
386703b705cfSriastradh
386803b705cfSriastradh	gen3_align_vertex(sna, tmp);
386942542f5fSchristos	gen3_emit_composite_state(sna, tmp);
387003b705cfSriastradh	return true;
387103b705cfSriastradh
387203b705cfSriastradhcleanup_mask:
387342542f5fSchristos	if (tmp->mask.bo) {
387403b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
387542542f5fSchristos		tmp->mask.bo = NULL;
387642542f5fSchristos	}
387703b705cfSriastradhcleanup_src:
387842542f5fSchristos	if (tmp->src.bo) {
387903b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
388042542f5fSchristos		tmp->src.bo = NULL;
388142542f5fSchristos	}
388203b705cfSriastradhcleanup_dst:
388342542f5fSchristos	if (tmp->redirect.real_bo) {
388403b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
388542542f5fSchristos		tmp->redirect.real_bo = NULL;
388642542f5fSchristos	}
388742542f5fSchristosfallback:
388842542f5fSchristos	return (mask == NULL &&
388942542f5fSchristos		sna_blt_composite(sna,
389042542f5fSchristos				  op, src, dst,
389142542f5fSchristos				  src_x, src_y,
389242542f5fSchristos				  dst_x, dst_y,
389342542f5fSchristos				  width, height,
389442542f5fSchristos				  flags | COMPOSITE_FALLBACK, tmp));
389503b705cfSriastradh}
389603b705cfSriastradh
389703b705cfSriastradhstatic void
389803b705cfSriastradhgen3_emit_composite_spans_vertex(struct sna *sna,
389903b705cfSriastradh				 const struct sna_composite_spans_op *op,
390003b705cfSriastradh				 int16_t x, int16_t y,
390103b705cfSriastradh				 float opacity)
390203b705cfSriastradh{
390303b705cfSriastradh	gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
390403b705cfSriastradh	gen3_emit_composite_texcoord(sna, &op->base.src, x, y);
390503b705cfSriastradh	OUT_VERTEX(opacity);
390603b705cfSriastradh}
390703b705cfSriastradh
390803b705cfSriastradhfastcall static void
390903b705cfSriastradhgen3_emit_composite_spans_primitive_zero(struct sna *sna,
391003b705cfSriastradh					 const struct sna_composite_spans_op *op,
391103b705cfSriastradh					 const BoxRec *box,
391203b705cfSriastradh					 float opacity)
391303b705cfSriastradh{
391403b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
391503b705cfSriastradh	sna->render.vertex_used += 6;
3916fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
391703b705cfSriastradh
391803b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
391903b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
392003b705cfSriastradh
392103b705cfSriastradh	v[2] = op->base.dst.x + box->x1;
392203b705cfSriastradh	v[3] = v[1];
392303b705cfSriastradh
392403b705cfSriastradh	v[4] = v[2];
392503b705cfSriastradh	v[5] = op->base.dst.x + box->y1;
392603b705cfSriastradh}
392703b705cfSriastradh
392803b705cfSriastradhfastcall static void
392903b705cfSriastradhgen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op,
393003b705cfSriastradh						const struct sna_opacity_box *b,
393103b705cfSriastradh						int nbox, float *v)
393203b705cfSriastradh{
393303b705cfSriastradh	do {
393403b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
393503b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
393603b705cfSriastradh
393703b705cfSriastradh		v[2] = op->base.dst.x + b->box.x1;
393803b705cfSriastradh		v[3] = v[1];
393903b705cfSriastradh
394003b705cfSriastradh		v[4] = v[2];
394103b705cfSriastradh		v[5] = op->base.dst.x + b->box.y1;
394203b705cfSriastradh
394303b705cfSriastradh		v += 6;
394403b705cfSriastradh		b++;
394503b705cfSriastradh	} while (--nbox);
394603b705cfSriastradh}
394703b705cfSriastradh
394803b705cfSriastradhfastcall static void
394903b705cfSriastradhgen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
395003b705cfSriastradh						   const struct sna_composite_spans_op *op,
395103b705cfSriastradh						   const BoxRec *box,
395203b705cfSriastradh						   float opacity)
395303b705cfSriastradh{
395403b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
395503b705cfSriastradh	sna->render.vertex_used += 6;
3956fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
395703b705cfSriastradh
395803b705cfSriastradh	v[0] = box->x2;
395903b705cfSriastradh	v[3] = v[1] = box->y2;
396003b705cfSriastradh	v[4] = v[2] = box->x1;
396103b705cfSriastradh	v[5] = box->y1;
396203b705cfSriastradh}
396303b705cfSriastradh
396403b705cfSriastradhfastcall static void
396503b705cfSriastradhgen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op,
396603b705cfSriastradh							  const struct sna_opacity_box *b,
396703b705cfSriastradh							  int nbox, float *v)
396803b705cfSriastradh{
396903b705cfSriastradh	do {
397003b705cfSriastradh		v[0] = b->box.x2;
397103b705cfSriastradh		v[3] = v[1] = b->box.y2;
397203b705cfSriastradh		v[4] = v[2] = b->box.x1;
397303b705cfSriastradh		v[5] = b->box.y1;
397403b705cfSriastradh
397503b705cfSriastradh		b++;
397603b705cfSriastradh		v += 6;
397703b705cfSriastradh	} while (--nbox);
397803b705cfSriastradh}
397903b705cfSriastradh
398003b705cfSriastradhfastcall static void
398103b705cfSriastradhgen3_emit_composite_spans_primitive_constant(struct sna *sna,
398203b705cfSriastradh					     const struct sna_composite_spans_op *op,
398303b705cfSriastradh					     const BoxRec *box,
398403b705cfSriastradh					     float opacity)
398503b705cfSriastradh{
398603b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
398703b705cfSriastradh	sna->render.vertex_used += 9;
3988fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
398903b705cfSriastradh
399003b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
399103b705cfSriastradh	v[6] = v[3] = op->base.dst.x + box->x1;
399203b705cfSriastradh	v[4] = v[1] = op->base.dst.y + box->y2;
399303b705cfSriastradh	v[7] = op->base.dst.y + box->y1;
399403b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
399503b705cfSriastradh}
399603b705cfSriastradh
399703b705cfSriastradhfastcall static void
399803b705cfSriastradhgen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op,
399903b705cfSriastradh						    const struct sna_opacity_box *b,
400003b705cfSriastradh						    int nbox,
400103b705cfSriastradh						    float *v)
400203b705cfSriastradh{
400303b705cfSriastradh	do {
400403b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
400503b705cfSriastradh		v[6] = v[3] = op->base.dst.x + b->box.x1;
400603b705cfSriastradh		v[4] = v[1] = op->base.dst.y + b->box.y2;
400703b705cfSriastradh		v[7] = op->base.dst.y + b->box.y1;
400803b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
400903b705cfSriastradh
401003b705cfSriastradh		v += 9;
401103b705cfSriastradh		b++;
401203b705cfSriastradh	} while (--nbox);
401303b705cfSriastradh}
401403b705cfSriastradh
401503b705cfSriastradhfastcall static void
401603b705cfSriastradhgen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
401703b705cfSriastradh						       const struct sna_composite_spans_op *op,
401803b705cfSriastradh						       const BoxRec *box,
401903b705cfSriastradh						       float opacity)
402003b705cfSriastradh{
402103b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
402203b705cfSriastradh	sna->render.vertex_used += 9;
4023fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
402403b705cfSriastradh
402503b705cfSriastradh	v[0] = box->x2;
402603b705cfSriastradh	v[6] = v[3] = box->x1;
402703b705cfSriastradh	v[4] = v[1] = box->y2;
402803b705cfSriastradh	v[7] = box->y1;
402903b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
403003b705cfSriastradh}
403103b705cfSriastradh
403203b705cfSriastradhfastcall static void
403303b705cfSriastradhgen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op,
403403b705cfSriastradh							      const struct sna_opacity_box *b,
403503b705cfSriastradh							      int nbox, float *v)
403603b705cfSriastradh{
403703b705cfSriastradh	do {
403803b705cfSriastradh		v[0] = b->box.x2;
403903b705cfSriastradh		v[6] = v[3] = b->box.x1;
404003b705cfSriastradh		v[4] = v[1] = b->box.y2;
404103b705cfSriastradh		v[7] = b->box.y1;
404203b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
404303b705cfSriastradh
404403b705cfSriastradh		v += 9;
404503b705cfSriastradh		b++;
404603b705cfSriastradh	} while (--nbox);
404703b705cfSriastradh}
404803b705cfSriastradh
404903b705cfSriastradhfastcall static void
405003b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
405103b705cfSriastradh						    const struct sna_composite_spans_op *op,
405203b705cfSriastradh						    const BoxRec *box,
405303b705cfSriastradh						    float opacity)
405403b705cfSriastradh{
405503b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
405603b705cfSriastradh	sna->render.vertex_used += 15;
4057fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
405803b705cfSriastradh
405903b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
406003b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
406103b705cfSriastradh	v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
406203b705cfSriastradh	v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
406303b705cfSriastradh	v[4] = opacity;
406403b705cfSriastradh
406503b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
406603b705cfSriastradh	v[6] = v[1];
406703b705cfSriastradh	v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
406803b705cfSriastradh	v[8] = v[3];
406903b705cfSriastradh	v[9] = opacity;
407003b705cfSriastradh
407103b705cfSriastradh	v[10] = v[5];
407203b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
407303b705cfSriastradh	v[12] = v[7];
407403b705cfSriastradh	v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
407503b705cfSriastradh	v[14] = opacity;
407603b705cfSriastradh}
407703b705cfSriastradh
407803b705cfSriastradhfastcall static void
407903b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op,
408003b705cfSriastradh							   const struct sna_opacity_box *b,
408103b705cfSriastradh							   int nbox,
408203b705cfSriastradh							   float *v)
408303b705cfSriastradh{
408403b705cfSriastradh	do {
408503b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
408603b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
408703b705cfSriastradh		v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
408803b705cfSriastradh		v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
408903b705cfSriastradh		v[4] = b->alpha;
409003b705cfSriastradh
409103b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
409203b705cfSriastradh		v[6] = v[1];
409303b705cfSriastradh		v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
409403b705cfSriastradh		v[8] = v[3];
409503b705cfSriastradh		v[9] = b->alpha;
409603b705cfSriastradh
409703b705cfSriastradh		v[10] = v[5];
409803b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
409903b705cfSriastradh		v[12] = v[7];
410003b705cfSriastradh		v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
410103b705cfSriastradh		v[14] = b->alpha;
410203b705cfSriastradh
410303b705cfSriastradh		v += 15;
410403b705cfSriastradh		b++;
410503b705cfSriastradh	} while (--nbox);
410603b705cfSriastradh}
410703b705cfSriastradh
410803b705cfSriastradhfastcall static void
410903b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
411003b705cfSriastradh						  const struct sna_composite_spans_op *op,
411103b705cfSriastradh						  const BoxRec *box,
411203b705cfSriastradh						  float opacity)
411303b705cfSriastradh{
411403b705cfSriastradh	PictTransform *transform = op->base.src.transform;
411503b705cfSriastradh	float *v;
411603b705cfSriastradh
411703b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
411803b705cfSriastradh	sna->render.vertex_used += 15;
4119fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
412003b705cfSriastradh
412103b705cfSriastradh	v[0]  = op->base.dst.x + box->x2;
412203b705cfSriastradh	v[6]  = v[1] = op->base.dst.y + box->y2;
412303b705cfSriastradh	v[10] = v[5] = op->base.dst.x + box->x1;
412403b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
412503b705cfSriastradh	v[14] = v[9] = v[4]  = opacity;
412603b705cfSriastradh
412703b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
412803b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
412903b705cfSriastradh				    transform, op->base.src.scale,
413003b705cfSriastradh				    &v[2], &v[3]);
413103b705cfSriastradh
413203b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
413303b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
413403b705cfSriastradh				    transform, op->base.src.scale,
413503b705cfSriastradh				    &v[7], &v[8]);
413603b705cfSriastradh
413703b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
413803b705cfSriastradh				    (int)op->base.src.offset[1] + box->y1,
413903b705cfSriastradh				    transform, op->base.src.scale,
414003b705cfSriastradh				    &v[12], &v[13]);
414103b705cfSriastradh}
414203b705cfSriastradh
414303b705cfSriastradhfastcall static void
414403b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op,
414503b705cfSriastradh							 const struct sna_opacity_box *b,
414603b705cfSriastradh							 int nbox,
414703b705cfSriastradh							 float *v)
414803b705cfSriastradh{
414903b705cfSriastradh	PictTransform *transform = op->base.src.transform;
415003b705cfSriastradh
415103b705cfSriastradh	do {
415203b705cfSriastradh		v[0]  = op->base.dst.x + b->box.x2;
415303b705cfSriastradh		v[6]  = v[1] = op->base.dst.y + b->box.y2;
415403b705cfSriastradh		v[10] = v[5] = op->base.dst.x + b->box.x1;
415503b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
415603b705cfSriastradh		v[14] = v[9] = v[4]  = b->alpha;
415703b705cfSriastradh
415803b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
415903b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
416003b705cfSriastradh					    transform, op->base.src.scale,
416103b705cfSriastradh					    &v[2], &v[3]);
416203b705cfSriastradh
416303b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
416403b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
416503b705cfSriastradh					    transform, op->base.src.scale,
416603b705cfSriastradh					    &v[7], &v[8]);
416703b705cfSriastradh
416803b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
416903b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y1,
417003b705cfSriastradh					    transform, op->base.src.scale,
417103b705cfSriastradh					    &v[12], &v[13]);
417203b705cfSriastradh		v += 15;
417303b705cfSriastradh		b++;
417403b705cfSriastradh	} while (--nbox);
417503b705cfSriastradh}
417603b705cfSriastradh
417703b705cfSriastradhfastcall static void
417803b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
417903b705cfSriastradh						      const struct sna_composite_spans_op *op,
418003b705cfSriastradh						      const BoxRec *box,
418103b705cfSriastradh						      float opacity)
418203b705cfSriastradh{
418303b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
418403b705cfSriastradh	sna->render.vertex_used += 15;
4185fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
418603b705cfSriastradh
418703b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
418803b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
418903b705cfSriastradh	v[2] = op->base.src.offset[0] + box->x2;
419003b705cfSriastradh	v[3] = op->base.src.offset[1] + box->y2;
419103b705cfSriastradh	v[4] = opacity;
419203b705cfSriastradh
419303b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
419403b705cfSriastradh	v[6] = v[1];
419503b705cfSriastradh	v[7] = op->base.src.offset[0] + box->x1;
419603b705cfSriastradh	v[8] = v[3];
419703b705cfSriastradh	v[9] = opacity;
419803b705cfSriastradh
419903b705cfSriastradh	v[10] = v[5];
420003b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
420103b705cfSriastradh	v[12] = v[7];
420203b705cfSriastradh	v[13] = op->base.src.offset[1] + box->y1;
420303b705cfSriastradh	v[14] = opacity;
420403b705cfSriastradh}
420503b705cfSriastradh
420603b705cfSriastradhfastcall static void
420703b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op,
420803b705cfSriastradh							     const struct sna_opacity_box *b,
420903b705cfSriastradh							     int nbox,
421003b705cfSriastradh							     float *v)
421103b705cfSriastradh{
421203b705cfSriastradh	do {
421303b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
421403b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
421503b705cfSriastradh		v[2] = op->base.src.offset[0] + b->box.x2;
421603b705cfSriastradh		v[3] = op->base.src.offset[1] + b->box.y2;
421703b705cfSriastradh		v[4] = b->alpha;
421803b705cfSriastradh
421903b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
422003b705cfSriastradh		v[6] = v[1];
422103b705cfSriastradh		v[7] = op->base.src.offset[0] + b->box.x1;
422203b705cfSriastradh		v[8] = v[3];
422303b705cfSriastradh		v[9] = b->alpha;
422403b705cfSriastradh
422503b705cfSriastradh		v[10] = v[5];
422603b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
422703b705cfSriastradh		v[12] = v[7];
422803b705cfSriastradh		v[13] = op->base.src.offset[1] + b->box.y1;
422903b705cfSriastradh		v[14] = b->alpha;
423003b705cfSriastradh
423103b705cfSriastradh		v += 15;
423203b705cfSriastradh		b++;
423303b705cfSriastradh	} while (--nbox);
423403b705cfSriastradh}
423503b705cfSriastradh
423603b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
423703b705cfSriastradhsse2 fastcall static void
423803b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
423903b705cfSriastradh						   const struct sna_composite_spans_op *op,
424003b705cfSriastradh						   const BoxRec *box,
424103b705cfSriastradh						   float opacity)
424203b705cfSriastradh{
424303b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
424403b705cfSriastradh	sna->render.vertex_used += 9;
4245fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
424603b705cfSriastradh
424703b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
424803b705cfSriastradh	v[6] = v[3] = op->base.dst.x + box->x1;
424903b705cfSriastradh	v[4] = v[1] = op->base.dst.y + box->y2;
425003b705cfSriastradh	v[7] = op->base.dst.y + box->y1;
425103b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
425203b705cfSriastradh}
425303b705cfSriastradh
425403b705cfSriastradhsse2 fastcall static void
425503b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2__boxes(const struct sna_composite_spans_op *op,
425603b705cfSriastradh							  const struct sna_opacity_box *b,
425703b705cfSriastradh							  int nbox,
425803b705cfSriastradh							  float *v)
425903b705cfSriastradh{
426003b705cfSriastradh	do {
426103b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
426203b705cfSriastradh		v[6] = v[3] = op->base.dst.x + b->box.x1;
426303b705cfSriastradh		v[4] = v[1] = op->base.dst.y + b->box.y2;
426403b705cfSriastradh		v[7] = op->base.dst.y + b->box.y1;
426503b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
426603b705cfSriastradh
426703b705cfSriastradh		v += 9;
426803b705cfSriastradh		b++;
426903b705cfSriastradh	} while (--nbox);
427003b705cfSriastradh}
427103b705cfSriastradh
427203b705cfSriastradhsse2 fastcall static void
427303b705cfSriastradhgen3_render_composite_spans_constant_box__sse2(struct sna *sna,
427403b705cfSriastradh					       const struct sna_composite_spans_op *op,
427503b705cfSriastradh					       const BoxRec *box, float opacity)
427603b705cfSriastradh{
427703b705cfSriastradh	float *v;
427803b705cfSriastradh	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
427903b705cfSriastradh	     __FUNCTION__,
428003b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
428103b705cfSriastradh	     opacity,
428203b705cfSriastradh	     op->base.dst.x, op->base.dst.y,
428303b705cfSriastradh	     box->x1, box->y1,
428403b705cfSriastradh	     box->x2 - box->x1,
428503b705cfSriastradh	     box->y2 - box->y1));
428603b705cfSriastradh
428703b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
428803b705cfSriastradh
428903b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
429003b705cfSriastradh	sna->render.vertex_used += 9;
4291fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
429203b705cfSriastradh
429303b705cfSriastradh	v[0] = box->x2;
429403b705cfSriastradh	v[6] = v[3] = box->x1;
429503b705cfSriastradh	v[4] = v[1] = box->y2;
429603b705cfSriastradh	v[7] = box->y1;
429703b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
429803b705cfSriastradh}
429903b705cfSriastradh
430003b705cfSriastradhsse2 fastcall static void
430103b705cfSriastradhgen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna,
430203b705cfSriastradh							 const struct sna_composite_spans_op *op,
430303b705cfSriastradh							 const struct sna_opacity_box *box,
430403b705cfSriastradh							 int nbox)
430503b705cfSriastradh{
430603b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
430703b705cfSriastradh	     __FUNCTION__, nbox,
430803b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
430903b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
431003b705cfSriastradh
431103b705cfSriastradh	sna_vertex_lock(&sna->render);
431203b705cfSriastradh	do {
431303b705cfSriastradh		int nbox_this_time;
431403b705cfSriastradh		float *v;
431503b705cfSriastradh
431603b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
431703b705cfSriastradh		assert(nbox_this_time);
431803b705cfSriastradh		nbox -= nbox_this_time;
431903b705cfSriastradh
432003b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
432103b705cfSriastradh		sna->render.vertex_used += nbox_this_time * 9;
4322fe8aea9eSmrg		assert(sna->render.vertex_used <= sna->render.vertex_size);
432303b705cfSriastradh
432403b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
432503b705cfSriastradh		sna_vertex_unlock(&sna->render);
432603b705cfSriastradh
432703b705cfSriastradh		do {
432803b705cfSriastradh			v[0] = box->box.x2;
432903b705cfSriastradh			v[6] = v[3] = box->box.x1;
433003b705cfSriastradh			v[4] = v[1] = box->box.y2;
433103b705cfSriastradh			v[7] = box->box.y1;
433203b705cfSriastradh			v[8] = v[5] = v[2] = box->alpha;
433303b705cfSriastradh			v += 9;
433403b705cfSriastradh			box++;
433503b705cfSriastradh		} while (--nbox_this_time);
433603b705cfSriastradh
433703b705cfSriastradh		sna_vertex_lock(&sna->render);
433803b705cfSriastradh		sna_vertex_release__locked(&sna->render);
433903b705cfSriastradh	} while (nbox);
434003b705cfSriastradh	sna_vertex_unlock(&sna->render);
434103b705cfSriastradh}
434203b705cfSriastradh
434303b705cfSriastradhsse2 fastcall static void
434403b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna,
434503b705cfSriastradh							      const struct sna_composite_spans_op *op,
434603b705cfSriastradh							      const BoxRec *box,
434703b705cfSriastradh							      float opacity)
434803b705cfSriastradh{
434903b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
435003b705cfSriastradh	sna->render.vertex_used += 9;
4351fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
435203b705cfSriastradh
435303b705cfSriastradh	v[0] = box->x2;
435403b705cfSriastradh	v[6] = v[3] = box->x1;
435503b705cfSriastradh	v[4] = v[1] = box->y2;
435603b705cfSriastradh	v[7] = box->y1;
435703b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
435803b705cfSriastradh}
435903b705cfSriastradh
436003b705cfSriastradhsse2 fastcall static void
436103b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes(const struct sna_composite_spans_op *op,
436203b705cfSriastradh								     const struct sna_opacity_box *b,
436303b705cfSriastradh								     int nbox, float *v)
436403b705cfSriastradh{
436503b705cfSriastradh	do {
436603b705cfSriastradh		v[0] = b->box.x2;
436703b705cfSriastradh		v[6] = v[3] = b->box.x1;
436803b705cfSriastradh		v[4] = v[1] = b->box.y2;
436903b705cfSriastradh		v[7] = b->box.y1;
437003b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
437103b705cfSriastradh
437203b705cfSriastradh		v += 9;
437303b705cfSriastradh		b++;
437403b705cfSriastradh	} while (--nbox);
437503b705cfSriastradh}
437603b705cfSriastradh
437703b705cfSriastradhsse2 fastcall static void
437803b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna,
437903b705cfSriastradh							  const struct sna_composite_spans_op *op,
438003b705cfSriastradh							  const BoxRec *box,
438103b705cfSriastradh							  float opacity)
438203b705cfSriastradh{
438303b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
438403b705cfSriastradh	sna->render.vertex_used += 15;
4385fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
438603b705cfSriastradh
438703b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
438803b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
438903b705cfSriastradh	v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
439003b705cfSriastradh	v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
439103b705cfSriastradh	v[4] = opacity;
439203b705cfSriastradh
439303b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
439403b705cfSriastradh	v[6] = v[1];
439503b705cfSriastradh	v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
439603b705cfSriastradh	v[8] = v[3];
439703b705cfSriastradh	v[9] = opacity;
439803b705cfSriastradh
439903b705cfSriastradh	v[10] = v[5];
440003b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
440103b705cfSriastradh	v[12] = v[7];
440203b705cfSriastradh	v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
440303b705cfSriastradh	v[14] = opacity;
440403b705cfSriastradh}
440503b705cfSriastradh
440603b705cfSriastradhsse2 fastcall static void
440703b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source__sse2__boxes(const struct sna_composite_spans_op *op,
440803b705cfSriastradh								 const struct sna_opacity_box *b,
440903b705cfSriastradh								 int nbox,
441003b705cfSriastradh								 float *v)
441103b705cfSriastradh{
441203b705cfSriastradh	do {
441303b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
441403b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
441503b705cfSriastradh		v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
441603b705cfSriastradh		v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
441703b705cfSriastradh		v[4] = b->alpha;
441803b705cfSriastradh
441903b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
442003b705cfSriastradh		v[6] = v[1];
442103b705cfSriastradh		v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
442203b705cfSriastradh		v[8] = v[3];
442303b705cfSriastradh		v[9] = b->alpha;
442403b705cfSriastradh
442503b705cfSriastradh		v[10] = v[5];
442603b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
442703b705cfSriastradh		v[12] = v[7];
442803b705cfSriastradh		v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
442903b705cfSriastradh		v[14] = b->alpha;
443003b705cfSriastradh
443103b705cfSriastradh		v += 15;
443203b705cfSriastradh		b++;
443303b705cfSriastradh	} while (--nbox);
443403b705cfSriastradh}
443503b705cfSriastradhsse2 fastcall static void
443603b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna,
443703b705cfSriastradh							const struct sna_composite_spans_op *op,
443803b705cfSriastradh							const BoxRec *box,
443903b705cfSriastradh							float opacity)
444003b705cfSriastradh{
444103b705cfSriastradh	PictTransform *transform = op->base.src.transform;
444203b705cfSriastradh	float *v;
444303b705cfSriastradh
444403b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
444503b705cfSriastradh	sna->render.vertex_used += 15;
4446fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
444703b705cfSriastradh
444803b705cfSriastradh	v[0]  = op->base.dst.x + box->x2;
444903b705cfSriastradh	v[6]  = v[1] = op->base.dst.y + box->y2;
445003b705cfSriastradh	v[10] = v[5] = op->base.dst.x + box->x1;
445103b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
445203b705cfSriastradh	v[14] = v[9] = v[4]  = opacity;
445303b705cfSriastradh
445403b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
445503b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
445603b705cfSriastradh				    transform, op->base.src.scale,
445703b705cfSriastradh				    &v[2], &v[3]);
445803b705cfSriastradh
445903b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
446003b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
446103b705cfSriastradh				    transform, op->base.src.scale,
446203b705cfSriastradh				    &v[7], &v[8]);
446303b705cfSriastradh
446403b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
446503b705cfSriastradh				    (int)op->base.src.offset[1] + box->y1,
446603b705cfSriastradh				    transform, op->base.src.scale,
446703b705cfSriastradh				    &v[12], &v[13]);
446803b705cfSriastradh}
446903b705cfSriastradh
447003b705cfSriastradhsse2 fastcall static void
447103b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source__sse2__boxes(const struct sna_composite_spans_op *op,
447203b705cfSriastradh							       const struct sna_opacity_box *b,
447303b705cfSriastradh							       int nbox,
447403b705cfSriastradh							       float *v)
447503b705cfSriastradh{
447603b705cfSriastradh	PictTransform *transform = op->base.src.transform;
447703b705cfSriastradh
447803b705cfSriastradh	do {
447903b705cfSriastradh		v[0]  = op->base.dst.x + b->box.x2;
448003b705cfSriastradh		v[6]  = v[1] = op->base.dst.y + b->box.y2;
448103b705cfSriastradh		v[10] = v[5] = op->base.dst.x + b->box.x1;
448203b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
448303b705cfSriastradh		v[14] = v[9] = v[4]  = b->alpha;
448403b705cfSriastradh
448503b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
448603b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
448703b705cfSriastradh					    transform, op->base.src.scale,
448803b705cfSriastradh					    &v[2], &v[3]);
448903b705cfSriastradh
449003b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
449103b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
449203b705cfSriastradh					    transform, op->base.src.scale,
449303b705cfSriastradh					    &v[7], &v[8]);
449403b705cfSriastradh
449503b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
449603b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y1,
449703b705cfSriastradh					    transform, op->base.src.scale,
449803b705cfSriastradh					    &v[12], &v[13]);
449903b705cfSriastradh		v += 15;
450003b705cfSriastradh		b++;
450103b705cfSriastradh	} while (--nbox);
450203b705cfSriastradh}
450303b705cfSriastradh
450403b705cfSriastradhsse2 fastcall static void
450503b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna,
450603b705cfSriastradh							    const struct sna_composite_spans_op *op,
450703b705cfSriastradh							    const BoxRec *box,
450803b705cfSriastradh							    float opacity)
450903b705cfSriastradh{
451003b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
451103b705cfSriastradh	sna->render.vertex_used += 15;
4512fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
451303b705cfSriastradh
451403b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
451503b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
451603b705cfSriastradh	v[2] = op->base.src.offset[0] + box->x2;
451703b705cfSriastradh	v[3] = op->base.src.offset[1] + box->y2;
451803b705cfSriastradh	v[4] = opacity;
451903b705cfSriastradh
452003b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
452103b705cfSriastradh	v[6] = v[1];
452203b705cfSriastradh	v[7] = op->base.src.offset[0] + box->x1;
452303b705cfSriastradh	v[8] = v[3];
452403b705cfSriastradh	v[9] = opacity;
452503b705cfSriastradh
452603b705cfSriastradh	v[10] = v[5];
452703b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
452803b705cfSriastradh	v[12] = v[7];
452903b705cfSriastradh	v[13] = op->base.src.offset[1] + box->y1;
453003b705cfSriastradh	v[14] = opacity;
453103b705cfSriastradh}
453203b705cfSriastradh
453303b705cfSriastradhsse2 fastcall static void
453403b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
453503b705cfSriastradh								   const struct sna_opacity_box *b,
453603b705cfSriastradh								   int nbox,
453703b705cfSriastradh								   float *v)
453803b705cfSriastradh{
453903b705cfSriastradh	do {
454003b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
454103b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
454203b705cfSriastradh		v[2] = op->base.src.offset[0] + b->box.x2;
454303b705cfSriastradh		v[3] = op->base.src.offset[1] + b->box.y2;
454403b705cfSriastradh		v[4] = b->alpha;
454503b705cfSriastradh
454603b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
454703b705cfSriastradh		v[6] = v[1];
454803b705cfSriastradh		v[7] = op->base.src.offset[0] + b->box.x1;
454903b705cfSriastradh		v[8] = v[3];
455003b705cfSriastradh		v[9] = b->alpha;
455103b705cfSriastradh
455203b705cfSriastradh		v[10] = v[5];
455303b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
455403b705cfSriastradh		v[12] = v[7];
455503b705cfSriastradh		v[13] = op->base.src.offset[1] + b->box.y1;
455603b705cfSriastradh		v[14] = b->alpha;
455703b705cfSriastradh
455803b705cfSriastradh		v += 15;
455903b705cfSriastradh		b++;
456003b705cfSriastradh	} while (--nbox);
456103b705cfSriastradh}
456203b705cfSriastradh
456303b705cfSriastradhsse2 fastcall static void
456403b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna,
456503b705cfSriastradh							  const struct sna_composite_spans_op *op,
456603b705cfSriastradh							  const BoxRec *box,
456703b705cfSriastradh							  float opacity)
456803b705cfSriastradh{
456903b705cfSriastradh	PictTransform *transform = op->base.src.transform;
457003b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
457103b705cfSriastradh	sna->render.vertex_used += 15;
4572fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
457303b705cfSriastradh
457403b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
457503b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
457603b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
457703b705cfSriastradh				    op->base.src.offset[1] + box->y2,
457803b705cfSriastradh				    transform, op->base.src.scale,
457903b705cfSriastradh				    &v[2], &v[3]);
458003b705cfSriastradh	v[4] = opacity;
458103b705cfSriastradh
458203b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
458303b705cfSriastradh	v[6] = v[1];
458403b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
458503b705cfSriastradh				    op->base.src.offset[1] + box->y2,
458603b705cfSriastradh				    transform, op->base.src.scale,
458703b705cfSriastradh				    &v[7], &v[8]);
458803b705cfSriastradh	v[9] = opacity;
458903b705cfSriastradh
459003b705cfSriastradh	v[10] = v[5];
459103b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
459203b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
459303b705cfSriastradh				    op->base.src.offset[1] + box->y1,
459403b705cfSriastradh				    transform, op->base.src.scale,
459503b705cfSriastradh				    &v[12], &v[13]);
459603b705cfSriastradh	v[14] = opacity;
459703b705cfSriastradh}
459803b705cfSriastradh
459903b705cfSriastradhsse2 fastcall static void
460003b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
460103b705cfSriastradh								 const struct sna_opacity_box *b,
460203b705cfSriastradh								 int nbox,
460303b705cfSriastradh								 float *v)
460403b705cfSriastradh{
460503b705cfSriastradh	PictTransform *transform = op->base.src.transform;
460603b705cfSriastradh
460703b705cfSriastradh	do {
460803b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
460903b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
461003b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
461103b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
461203b705cfSriastradh					    transform, op->base.src.scale,
461303b705cfSriastradh					    &v[2], &v[3]);
461403b705cfSriastradh		v[4] = b->alpha;
461503b705cfSriastradh
461603b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
461703b705cfSriastradh		v[6] = v[1];
461803b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
461903b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
462003b705cfSriastradh					    transform, op->base.src.scale,
462103b705cfSriastradh					    &v[7], &v[8]);
462203b705cfSriastradh		v[9] = b->alpha;
462303b705cfSriastradh
462403b705cfSriastradh		v[10] = v[5];
462503b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
462603b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
462703b705cfSriastradh					    op->base.src.offset[1] + b->box.y1,
462803b705cfSriastradh					    transform, op->base.src.scale,
462903b705cfSriastradh					    &v[12], &v[13]);
463003b705cfSriastradh		v[14] = b->alpha;
463103b705cfSriastradh		v += 15;
463203b705cfSriastradh		b++;
463303b705cfSriastradh	} while (--nbox);
463403b705cfSriastradh}
463503b705cfSriastradh#endif
463603b705cfSriastradh
463703b705cfSriastradhfastcall static void
463803b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
463903b705cfSriastradh						    const struct sna_composite_spans_op *op,
464003b705cfSriastradh						    const BoxRec *box,
464103b705cfSriastradh						    float opacity)
464203b705cfSriastradh{
464303b705cfSriastradh	PictTransform *transform = op->base.src.transform;
464403b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
464503b705cfSriastradh	sna->render.vertex_used += 15;
4646fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
464703b705cfSriastradh
464803b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
464903b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
465003b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
465103b705cfSriastradh				    op->base.src.offset[1] + box->y2,
465203b705cfSriastradh				    transform, op->base.src.scale,
465303b705cfSriastradh				    &v[2], &v[3]);
465403b705cfSriastradh	v[4] = opacity;
465503b705cfSriastradh
465603b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
465703b705cfSriastradh	v[6] = v[1];
465803b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
465903b705cfSriastradh				    op->base.src.offset[1] + box->y2,
466003b705cfSriastradh				    transform, op->base.src.scale,
466103b705cfSriastradh				    &v[7], &v[8]);
466203b705cfSriastradh	v[9] = opacity;
466303b705cfSriastradh
466403b705cfSriastradh	v[10] = v[5];
466503b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
466603b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
466703b705cfSriastradh				    op->base.src.offset[1] + box->y1,
466803b705cfSriastradh				    transform, op->base.src.scale,
466903b705cfSriastradh				    &v[12], &v[13]);
467003b705cfSriastradh	v[14] = opacity;
467103b705cfSriastradh}
467203b705cfSriastradh
467303b705cfSriastradhfastcall static void
467403b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op,
467503b705cfSriastradh							   const struct sna_opacity_box *b,
467603b705cfSriastradh							   int nbox,
467703b705cfSriastradh							   float *v)
467803b705cfSriastradh{
467903b705cfSriastradh	PictTransform *transform = op->base.src.transform;
468003b705cfSriastradh
468103b705cfSriastradh	do {
468203b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
468303b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
468403b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
468503b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
468603b705cfSriastradh					    transform, op->base.src.scale,
468703b705cfSriastradh					    &v[2], &v[3]);
468803b705cfSriastradh		v[4] = b->alpha;
468903b705cfSriastradh
469003b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
469103b705cfSriastradh		v[6] = v[1];
469203b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
469303b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
469403b705cfSriastradh					    transform, op->base.src.scale,
469503b705cfSriastradh					    &v[7], &v[8]);
469603b705cfSriastradh		v[9] = b->alpha;
469703b705cfSriastradh
469803b705cfSriastradh		v[10] = v[5];
469903b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
470003b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
470103b705cfSriastradh					    op->base.src.offset[1] + b->box.y1,
470203b705cfSriastradh					    transform, op->base.src.scale,
470303b705cfSriastradh					    &v[12], &v[13]);
470403b705cfSriastradh		v[14] = b->alpha;
470503b705cfSriastradh		v += 15;
470603b705cfSriastradh		b++;
470703b705cfSriastradh	} while (--nbox);
470803b705cfSriastradh}
470903b705cfSriastradh
471003b705cfSriastradhfastcall static void
471103b705cfSriastradhgen3_emit_composite_spans_primitive(struct sna *sna,
471203b705cfSriastradh				    const struct sna_composite_spans_op *op,
471303b705cfSriastradh				    const BoxRec *box,
471403b705cfSriastradh				    float opacity)
471503b705cfSriastradh{
471603b705cfSriastradh	gen3_emit_composite_spans_vertex(sna, op,
471703b705cfSriastradh					 box->x2, box->y2,
471803b705cfSriastradh					 opacity);
471903b705cfSriastradh	gen3_emit_composite_spans_vertex(sna, op,
472003b705cfSriastradh					 box->x1, box->y2,
472103b705cfSriastradh					 opacity);
472203b705cfSriastradh	gen3_emit_composite_spans_vertex(sna, op,
472303b705cfSriastradh					 box->x1, box->y1,
472403b705cfSriastradh					 opacity);
472503b705cfSriastradh}
472603b705cfSriastradh
472703b705cfSriastradhfastcall static void
472803b705cfSriastradhgen3_render_composite_spans_constant_box(struct sna *sna,
472903b705cfSriastradh					 const struct sna_composite_spans_op *op,
473003b705cfSriastradh					 const BoxRec *box, float opacity)
473103b705cfSriastradh{
473203b705cfSriastradh	float *v;
473303b705cfSriastradh	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
473403b705cfSriastradh	     __FUNCTION__,
473503b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
473603b705cfSriastradh	     opacity,
473703b705cfSriastradh	     op->base.dst.x, op->base.dst.y,
473803b705cfSriastradh	     box->x1, box->y1,
473903b705cfSriastradh	     box->x2 - box->x1,
474003b705cfSriastradh	     box->y2 - box->y1));
474103b705cfSriastradh
474203b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
474303b705cfSriastradh
474403b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
474503b705cfSriastradh	sna->render.vertex_used += 9;
4746fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
474703b705cfSriastradh
474803b705cfSriastradh	v[0] = box->x2;
474903b705cfSriastradh	v[6] = v[3] = box->x1;
475003b705cfSriastradh	v[4] = v[1] = box->y2;
475103b705cfSriastradh	v[7] = box->y1;
475203b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
475303b705cfSriastradh}
475403b705cfSriastradh
475503b705cfSriastradhfastcall static void
475603b705cfSriastradhgen3_render_composite_spans_constant_thread_boxes(struct sna *sna,
475703b705cfSriastradh						  const struct sna_composite_spans_op *op,
475803b705cfSriastradh						  const struct sna_opacity_box *box,
475903b705cfSriastradh						  int nbox)
476003b705cfSriastradh{
476103b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
476203b705cfSriastradh	     __FUNCTION__, nbox,
476303b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
476403b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
476503b705cfSriastradh
476603b705cfSriastradh	sna_vertex_lock(&sna->render);
476703b705cfSriastradh	do {
476803b705cfSriastradh		int nbox_this_time;
476903b705cfSriastradh		float *v;
477003b705cfSriastradh
477103b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
477203b705cfSriastradh		assert(nbox_this_time);
477303b705cfSriastradh		nbox -= nbox_this_time;
477403b705cfSriastradh
477503b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
477603b705cfSriastradh		sna->render.vertex_used += nbox_this_time * 9;
4777fe8aea9eSmrg		assert(sna->render.vertex_used <= sna->render.vertex_size);
477803b705cfSriastradh
477903b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
478003b705cfSriastradh		sna_vertex_unlock(&sna->render);
478103b705cfSriastradh
478203b705cfSriastradh		do {
478303b705cfSriastradh			v[0] = box->box.x2;
478403b705cfSriastradh			v[6] = v[3] = box->box.x1;
478503b705cfSriastradh			v[4] = v[1] = box->box.y2;
478603b705cfSriastradh			v[7] = box->box.y1;
478703b705cfSriastradh			v[8] = v[5] = v[2] = box->alpha;
478803b705cfSriastradh			v += 9;
478903b705cfSriastradh			box++;
479003b705cfSriastradh		} while (--nbox_this_time);
479103b705cfSriastradh
479203b705cfSriastradh		sna_vertex_lock(&sna->render);
479303b705cfSriastradh		sna_vertex_release__locked(&sna->render);
479403b705cfSriastradh	} while (nbox);
479503b705cfSriastradh	sna_vertex_unlock(&sna->render);
479603b705cfSriastradh}
479703b705cfSriastradh
479803b705cfSriastradhfastcall static void
479903b705cfSriastradhgen3_render_composite_spans_box(struct sna *sna,
480003b705cfSriastradh				const struct sna_composite_spans_op *op,
480103b705cfSriastradh				const BoxRec *box, float opacity)
480203b705cfSriastradh{
480303b705cfSriastradh	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
480403b705cfSriastradh	     __FUNCTION__,
480503b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
480603b705cfSriastradh	     opacity,
480703b705cfSriastradh	     op->base.dst.x, op->base.dst.y,
480803b705cfSriastradh	     box->x1, box->y1,
480903b705cfSriastradh	     box->x2 - box->x1,
481003b705cfSriastradh	     box->y2 - box->y1));
481103b705cfSriastradh
481203b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
481303b705cfSriastradh	op->prim_emit(sna, op, box, opacity);
481403b705cfSriastradh}
481503b705cfSriastradh
481603b705cfSriastradhstatic void
481703b705cfSriastradhgen3_render_composite_spans_boxes(struct sna *sna,
481803b705cfSriastradh				  const struct sna_composite_spans_op *op,
481903b705cfSriastradh				  const BoxRec *box, int nbox,
482003b705cfSriastradh				  float opacity)
482103b705cfSriastradh{
482203b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
482303b705cfSriastradh	     __FUNCTION__, nbox,
482403b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
482503b705cfSriastradh	     opacity,
482603b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
482703b705cfSriastradh
482803b705cfSriastradh	do {
482903b705cfSriastradh		int nbox_this_time;
483003b705cfSriastradh
483103b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
483203b705cfSriastradh		nbox -= nbox_this_time;
483303b705cfSriastradh
483403b705cfSriastradh		do {
483503b705cfSriastradh			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
483603b705cfSriastradh			     box->x1, box->y1,
483703b705cfSriastradh			     box->x2 - box->x1,
483803b705cfSriastradh			     box->y2 - box->y1));
483903b705cfSriastradh
484003b705cfSriastradh			op->prim_emit(sna, op, box++, opacity);
484103b705cfSriastradh		} while (--nbox_this_time);
484203b705cfSriastradh	} while (nbox);
484303b705cfSriastradh}
484403b705cfSriastradh
484503b705cfSriastradhfastcall static void
484603b705cfSriastradhgen3_render_composite_spans_boxes__thread(struct sna *sna,
484703b705cfSriastradh					  const struct sna_composite_spans_op *op,
484803b705cfSriastradh					  const struct sna_opacity_box *box,
484903b705cfSriastradh					  int nbox)
485003b705cfSriastradh{
485103b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
485203b705cfSriastradh	     __FUNCTION__, nbox,
485303b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
485403b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
485503b705cfSriastradh
485603b705cfSriastradh	sna_vertex_lock(&sna->render);
485703b705cfSriastradh	do {
485803b705cfSriastradh		int nbox_this_time;
485903b705cfSriastradh		float *v;
486003b705cfSriastradh
486103b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
486203b705cfSriastradh		assert(nbox_this_time);
486303b705cfSriastradh		nbox -= nbox_this_time;
486403b705cfSriastradh
486503b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
486603b705cfSriastradh		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
4867fe8aea9eSmrg		assert(sna->render.vertex_used <= sna->render.vertex_size);
486803b705cfSriastradh
486903b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
487003b705cfSriastradh		sna_vertex_unlock(&sna->render);
487103b705cfSriastradh
487203b705cfSriastradh		op->emit_boxes(op, box, nbox_this_time, v);
487303b705cfSriastradh		box += nbox_this_time;
487403b705cfSriastradh
487503b705cfSriastradh		sna_vertex_lock(&sna->render);
487603b705cfSriastradh		sna_vertex_release__locked(&sna->render);
487703b705cfSriastradh	} while (nbox);
487803b705cfSriastradh	sna_vertex_unlock(&sna->render);
487903b705cfSriastradh}
488003b705cfSriastradh
488103b705cfSriastradhfastcall static void
488203b705cfSriastradhgen3_render_composite_spans_done(struct sna *sna,
488303b705cfSriastradh				 const struct sna_composite_spans_op *op)
488403b705cfSriastradh{
488503b705cfSriastradh	if (sna->render.vertex_offset)
488603b705cfSriastradh		gen3_vertex_flush(sna);
488703b705cfSriastradh
488803b705cfSriastradh	DBG(("%s()\n", __FUNCTION__));
488903b705cfSriastradh
489003b705cfSriastradh	if (op->base.src.bo)
489103b705cfSriastradh		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
489203b705cfSriastradh
489303b705cfSriastradh	sna_render_composite_redirect_done(sna, &op->base);
489403b705cfSriastradh}
489503b705cfSriastradh
489603b705cfSriastradhstatic bool
489703b705cfSriastradhgen3_check_composite_spans(struct sna *sna,
489803b705cfSriastradh			   uint8_t op, PicturePtr src, PicturePtr dst,
489903b705cfSriastradh			   int16_t width, int16_t height, unsigned flags)
490003b705cfSriastradh{
490103b705cfSriastradh	if (op >= ARRAY_SIZE(gen3_blend_op))
490203b705cfSriastradh		return false;
490303b705cfSriastradh
490403b705cfSriastradh	if (gen3_composite_fallback(sna, op, src, NULL, dst))
490503b705cfSriastradh		return false;
490603b705cfSriastradh
490703b705cfSriastradh	if (need_tiling(sna, width, height) &&
490803b705cfSriastradh	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
490903b705cfSriastradh		DBG(("%s: fallback, tiled operation not on GPU\n",
491003b705cfSriastradh		     __FUNCTION__));
491103b705cfSriastradh		return false;
491203b705cfSriastradh	}
491303b705cfSriastradh
491403b705cfSriastradh	return true;
491503b705cfSriastradh}
491603b705cfSriastradh
491703b705cfSriastradhstatic bool
491803b705cfSriastradhgen3_render_composite_spans(struct sna *sna,
491903b705cfSriastradh			    uint8_t op,
492003b705cfSriastradh			    PicturePtr src,
492103b705cfSriastradh			    PicturePtr dst,
492203b705cfSriastradh			    int16_t src_x,  int16_t src_y,
492303b705cfSriastradh			    int16_t dst_x,  int16_t dst_y,
492403b705cfSriastradh			    int16_t width,  int16_t height,
492503b705cfSriastradh			    unsigned flags,
492603b705cfSriastradh			    struct sna_composite_spans_op *tmp)
492703b705cfSriastradh{
492803b705cfSriastradh	bool no_offset;
492903b705cfSriastradh
493003b705cfSriastradh	DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
493103b705cfSriastradh	     src_x, src_y, dst_x, dst_y, width, height));
493203b705cfSriastradh
493303b705cfSriastradh	assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags));
493403b705cfSriastradh
493503b705cfSriastradh	if (need_tiling(sna, width, height)) {
493603b705cfSriastradh		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
493703b705cfSriastradh		     __FUNCTION__, width, height));
493803b705cfSriastradh		return sna_tiling_composite_spans(op, src, dst,
493903b705cfSriastradh						  src_x, src_y, dst_x, dst_y,
494003b705cfSriastradh						  width, height, flags, tmp);
494103b705cfSriastradh	}
494203b705cfSriastradh
494303b705cfSriastradh	if (!gen3_composite_set_target(sna, &tmp->base, dst,
494442542f5fSchristos				       dst_x, dst_y, width, height,
494542542f5fSchristos				       true)) {
494603b705cfSriastradh		DBG(("%s: unable to set render target\n",
494703b705cfSriastradh		     __FUNCTION__));
494803b705cfSriastradh		return false;
494903b705cfSriastradh	}
495003b705cfSriastradh
495103b705cfSriastradh	tmp->base.op = op;
495203b705cfSriastradh	tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
495303b705cfSriastradh	tmp->base.src.u.gen3.type = SHADER_TEXTURE;
495403b705cfSriastradh	tmp->base.src.is_affine = true;
495503b705cfSriastradh	DBG(("%s: preparing source\n", __FUNCTION__));
495603b705cfSriastradh	switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src,
495703b705cfSriastradh				       src_x, src_y,
495803b705cfSriastradh				       width, height,
495903b705cfSriastradh				       dst_x, dst_y,
496003b705cfSriastradh				       dst->polyMode == PolyModePrecise)) {
496103b705cfSriastradh	case -1:
496203b705cfSriastradh		goto cleanup_dst;
496303b705cfSriastradh	case 0:
496403b705cfSriastradh		tmp->base.src.u.gen3.type = SHADER_ZERO;
496503b705cfSriastradh		break;
496603b705cfSriastradh	case 1:
496703b705cfSriastradh		gen3_composite_channel_convert(&tmp->base.src);
496803b705cfSriastradh		break;
496903b705cfSriastradh	}
497003b705cfSriastradh	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type));
497103b705cfSriastradh
497203b705cfSriastradh	if (tmp->base.src.u.gen3.type != SHADER_ZERO)
497303b705cfSriastradh		tmp->base.mask.u.gen3.type = SHADER_OPACITY;
497403b705cfSriastradh
497503b705cfSriastradh	no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
497603b705cfSriastradh	tmp->box   = gen3_render_composite_spans_box;
497703b705cfSriastradh	tmp->boxes = gen3_render_composite_spans_boxes;
497803b705cfSriastradh	tmp->thread_boxes = gen3_render_composite_spans_boxes__thread;
497903b705cfSriastradh	tmp->done  = gen3_render_composite_spans_done;
498003b705cfSriastradh	tmp->prim_emit = gen3_emit_composite_spans_primitive;
498103b705cfSriastradh	switch (tmp->base.src.u.gen3.type) {
498203b705cfSriastradh	case SHADER_NONE:
498303b705cfSriastradh		assert(0);
498403b705cfSriastradh	case SHADER_ZERO:
498503b705cfSriastradh		if (no_offset) {
498603b705cfSriastradh			tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset;
498703b705cfSriastradh			tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes;
498803b705cfSriastradh		} else {
498903b705cfSriastradh			tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
499003b705cfSriastradh			tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes;
499103b705cfSriastradh		}
499203b705cfSriastradh		break;
499303b705cfSriastradh	case SHADER_BLACK:
499403b705cfSriastradh	case SHADER_WHITE:
499503b705cfSriastradh	case SHADER_CONSTANT:
499603b705cfSriastradh		if (no_offset) {
499703b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
499803b705cfSriastradh			if (sna->cpu_features & SSE2) {
499903b705cfSriastradh				tmp->box = gen3_render_composite_spans_constant_box__sse2;
500003b705cfSriastradh				tmp->thread_boxes = gen3_render_composite_spans_constant_thread__sse2__boxes;
500103b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2__no_offset;
500203b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes;
500303b705cfSriastradh			} else
500403b705cfSriastradh#endif
500503b705cfSriastradh			{
500603b705cfSriastradh				tmp->box = gen3_render_composite_spans_constant_box;
500703b705cfSriastradh				tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes;
500803b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
500903b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes;
501003b705cfSriastradh			}
501103b705cfSriastradh		} else {
501203b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
501303b705cfSriastradh			if (sna->cpu_features & SSE2) {
501403b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2;
501503b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__boxes;
501603b705cfSriastradh			} else
501703b705cfSriastradh#endif
501803b705cfSriastradh			{
501903b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
502003b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes;
502103b705cfSriastradh			}
502203b705cfSriastradh		}
502303b705cfSriastradh		break;
502403b705cfSriastradh	case SHADER_LINEAR:
502503b705cfSriastradh	case SHADER_RADIAL:
502603b705cfSriastradh		if (tmp->base.src.transform == NULL) {
502703b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
502803b705cfSriastradh			if (sna->cpu_features & SSE2) {
502903b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient__sse2;
503003b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes;
503103b705cfSriastradh			} else
503203b705cfSriastradh#endif
503303b705cfSriastradh			{
503403b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
503503b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes;
503603b705cfSriastradh			}
503703b705cfSriastradh		} else if (tmp->base.src.is_affine) {
503803b705cfSriastradh			tmp->base.src.scale[1] = tmp->base.src.scale[0] = 1. / tmp->base.src.transform->matrix[2][2];
503903b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
504003b705cfSriastradh			if (sna->cpu_features & SSE2) {
504103b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient__sse2;
504203b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes;
504303b705cfSriastradh			} else
504403b705cfSriastradh#endif
504503b705cfSriastradh			{
504603b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
504703b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes;
504803b705cfSriastradh			}
504903b705cfSriastradh		}
505003b705cfSriastradh		break;
505103b705cfSriastradh	case SHADER_TEXTURE:
505203b705cfSriastradh		if (tmp->base.src.transform == NULL) {
505303b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
505403b705cfSriastradh			if (sna->cpu_features & SSE2) {
505503b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source__sse2;
505603b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__sse2__boxes;
505703b705cfSriastradh			} else
505803b705cfSriastradh#endif
505903b705cfSriastradh			{
506003b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
506103b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes;
506203b705cfSriastradh			}
506303b705cfSriastradh		} else if (tmp->base.src.is_affine) {
506403b705cfSriastradh			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
506503b705cfSriastradh			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
506603b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
506703b705cfSriastradh			if (sna->cpu_features & SSE2) {
506803b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source__sse2;
506903b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__sse2__boxes;
507003b705cfSriastradh			} else
507103b705cfSriastradh#endif
507203b705cfSriastradh			{
507303b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
507403b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes;
507503b705cfSriastradh			}
507603b705cfSriastradh		}
507703b705cfSriastradh		break;
507803b705cfSriastradh	}
507903b705cfSriastradh	if (tmp->emit_boxes == NULL)
508003b705cfSriastradh		tmp->thread_boxes = NULL;
508103b705cfSriastradh
508203b705cfSriastradh	tmp->base.mask.bo = NULL;
508303b705cfSriastradh
508403b705cfSriastradh	tmp->base.floats_per_vertex = 2;
508503b705cfSriastradh	if (!is_constant_ps(tmp->base.src.u.gen3.type))
508603b705cfSriastradh		tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
508703b705cfSriastradh	tmp->base.floats_per_vertex +=
508803b705cfSriastradh		tmp->base.mask.u.gen3.type == SHADER_OPACITY;
508903b705cfSriastradh	tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
509003b705cfSriastradh
509103b705cfSriastradh	if (!kgem_check_bo(&sna->kgem,
509203b705cfSriastradh			   tmp->base.dst.bo, tmp->base.src.bo,
509303b705cfSriastradh			   NULL)) {
509403b705cfSriastradh		kgem_submit(&sna->kgem);
509503b705cfSriastradh		if (!kgem_check_bo(&sna->kgem,
509603b705cfSriastradh				   tmp->base.dst.bo, tmp->base.src.bo,
509703b705cfSriastradh				   NULL))
509803b705cfSriastradh			goto cleanup_src;
509903b705cfSriastradh	}
510003b705cfSriastradh
510103b705cfSriastradh	gen3_align_vertex(sna, &tmp->base);
510242542f5fSchristos	gen3_emit_composite_state(sna, &tmp->base);
510303b705cfSriastradh	return true;
510403b705cfSriastradh
510503b705cfSriastradhcleanup_src:
510603b705cfSriastradh	if (tmp->base.src.bo)
510703b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
510803b705cfSriastradhcleanup_dst:
510903b705cfSriastradh	if (tmp->base.redirect.real_bo)
511003b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
511103b705cfSriastradh	return false;
511203b705cfSriastradh}
511303b705cfSriastradh
511403b705cfSriastradhstatic void
511503b705cfSriastradhgen3_emit_video_state(struct sna *sna,
511603b705cfSriastradh		      struct sna_video *video,
511703b705cfSriastradh		      struct sna_video_frame *frame,
511803b705cfSriastradh		      PixmapPtr pixmap,
511903b705cfSriastradh		      struct kgem_bo *dst_bo,
512003b705cfSriastradh		      int width, int height,
512103b705cfSriastradh		      bool bilinear)
512203b705cfSriastradh{
512303b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
512403b705cfSriastradh	uint32_t id, ms3, rewind;
512503b705cfSriastradh
512603b705cfSriastradh	gen3_emit_target(sna, dst_bo, width, height,
512703b705cfSriastradh			 sna_format_for_depth(pixmap->drawable.depth));
512803b705cfSriastradh
512903b705cfSriastradh	/* XXX share with composite? Is it worth the effort? */
513003b705cfSriastradh	if ((state->last_shader & (1<<31)) == 0) {
513103b705cfSriastradh		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
513203b705cfSriastradh			  I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
513303b705cfSriastradh			  2);
513403b705cfSriastradh		OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
513503b705cfSriastradh		OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
513603b705cfSriastradh			  S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
513703b705cfSriastradh			  S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
513803b705cfSriastradh			  S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
513903b705cfSriastradh			  S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
514003b705cfSriastradh			  S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
514103b705cfSriastradh			  S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
514203b705cfSriastradh			  S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
514303b705cfSriastradh		OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
514403b705cfSriastradh			  (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
514503b705cfSriastradh			  S6_COLOR_WRITE_ENABLE);
514603b705cfSriastradh
514703b705cfSriastradh		state->last_blend = 0;
514803b705cfSriastradh		state->floats_per_vertex = 4;
514903b705cfSriastradh	}
515003b705cfSriastradh
515103b705cfSriastradh	if (!is_planar_fourcc(frame->id)) {
515203b705cfSriastradh		rewind = sna->kgem.nbatch;
515303b705cfSriastradh		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
515403b705cfSriastradh		OUT_BATCH(0x0000001);	/* constant 0 */
515503b705cfSriastradh		/* constant 0: brightness/contrast */
515603b705cfSriastradh		OUT_BATCH_F(video->brightness / 128.0);
515703b705cfSriastradh		OUT_BATCH_F(video->contrast / 255.0);
515803b705cfSriastradh		OUT_BATCH_F(0.0);
515903b705cfSriastradh		OUT_BATCH_F(0.0);
516003b705cfSriastradh		if (state->last_constants &&
516103b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_constants],
516203b705cfSriastradh			   &sna->kgem.batch[rewind],
516303b705cfSriastradh			   6*sizeof(uint32_t)) == 0)
516403b705cfSriastradh			sna->kgem.nbatch = rewind;
516503b705cfSriastradh		else
516603b705cfSriastradh			state->last_constants = rewind;
516703b705cfSriastradh
516803b705cfSriastradh		rewind = sna->kgem.nbatch;
516903b705cfSriastradh		OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
517003b705cfSriastradh		OUT_BATCH(0x00000001);
517103b705cfSriastradh		OUT_BATCH(SS2_COLORSPACE_CONVERSION |
517203b705cfSriastradh			  (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
517303b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
517403b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
517503b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
517603b705cfSriastradh			  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
517703b705cfSriastradh			  SS3_NORMALIZED_COORDS);
517803b705cfSriastradh		OUT_BATCH(0x00000000);
517903b705cfSriastradh		if (state->last_sampler &&
518003b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_sampler],
518103b705cfSriastradh			   &sna->kgem.batch[rewind],
518203b705cfSriastradh			   5*sizeof(uint32_t)) == 0)
518303b705cfSriastradh			sna->kgem.nbatch = rewind;
518403b705cfSriastradh		else
518503b705cfSriastradh			state->last_sampler = rewind;
518603b705cfSriastradh
518703b705cfSriastradh		OUT_BATCH(_3DSTATE_MAP_STATE | 3);
518803b705cfSriastradh		OUT_BATCH(0x00000001);	/* texture map #1 */
518903b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
519003b705cfSriastradh					 frame->bo,
519103b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
519203b705cfSriastradh					 0));
519303b705cfSriastradh
519403b705cfSriastradh		ms3 = MAPSURF_422;
519503b705cfSriastradh		switch (frame->id) {
519603b705cfSriastradh		case FOURCC_YUY2:
519703b705cfSriastradh			ms3 |= MT_422_YCRCB_NORMAL;
519803b705cfSriastradh			break;
519903b705cfSriastradh		case FOURCC_UYVY:
520003b705cfSriastradh			ms3 |= MT_422_YCRCB_SWAPY;
520103b705cfSriastradh			break;
520203b705cfSriastradh		}
520303b705cfSriastradh		ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
520403b705cfSriastradh		ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
520503b705cfSriastradh		OUT_BATCH(ms3);
520603b705cfSriastradh		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
520703b705cfSriastradh
520803b705cfSriastradh		id = 1<<31 | 1<<1 | !!video->brightness;
520903b705cfSriastradh		if (state->last_shader != id) {
521003b705cfSriastradh			state->last_shader = id;
521103b705cfSriastradh			id = sna->kgem.nbatch++;
521203b705cfSriastradh
521303b705cfSriastradh			gen3_fs_dcl(FS_S0);
521403b705cfSriastradh			gen3_fs_dcl(FS_T0);
521503b705cfSriastradh			gen3_fs_texld(FS_OC, FS_S0, FS_T0);
521603b705cfSriastradh			if (video->brightness != 0) {
521703b705cfSriastradh				gen3_fs_add(FS_OC,
521803b705cfSriastradh					    gen3_fs_operand_reg(FS_OC),
521903b705cfSriastradh					    gen3_fs_operand(FS_C0, X, X, X, ZERO));
522003b705cfSriastradh			}
522103b705cfSriastradh
522203b705cfSriastradh			sna->kgem.batch[id] =
522303b705cfSriastradh				_3DSTATE_PIXEL_SHADER_PROGRAM |
522403b705cfSriastradh				(sna->kgem.nbatch - id - 2);
522503b705cfSriastradh		}
522603b705cfSriastradh	} else {
522703b705cfSriastradh		/* For the planar formats, we set up three samplers --
522803b705cfSriastradh		 * one for each plane, in a Y8 format.  Because I
522903b705cfSriastradh		 * couldn't get the special PLANAR_TO_PACKED
523003b705cfSriastradh		 * shader setup to work, I did the manual pixel shader:
523103b705cfSriastradh		 *
523203b705cfSriastradh		 * y' = y - .0625
523303b705cfSriastradh		 * u' = u - .5
523403b705cfSriastradh		 * v' = v - .5;
523503b705cfSriastradh		 *
523603b705cfSriastradh		 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
523703b705cfSriastradh		 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
523803b705cfSriastradh		 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
523903b705cfSriastradh		 *
524003b705cfSriastradh		 * register assignment:
524103b705cfSriastradh		 * r0 = (y',u',v',0)
524203b705cfSriastradh		 * r1 = (y,y,y,y)
524303b705cfSriastradh		 * r2 = (u,u,u,u)
524403b705cfSriastradh		 * r3 = (v,v,v,v)
524503b705cfSriastradh		 * OC = (r,g,b,1)
524603b705cfSriastradh		 */
524703b705cfSriastradh		rewind = sna->kgem.nbatch;
524803b705cfSriastradh		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
524903b705cfSriastradh		OUT_BATCH(0x000001f);	/* constants 0-4 */
525003b705cfSriastradh		/* constant 0: normalization offsets */
525103b705cfSriastradh		OUT_BATCH_F(-0.0625);
525203b705cfSriastradh		OUT_BATCH_F(-0.5);
525303b705cfSriastradh		OUT_BATCH_F(-0.5);
525403b705cfSriastradh		OUT_BATCH_F(0.0);
525503b705cfSriastradh		/* constant 1: r coefficients */
525603b705cfSriastradh		OUT_BATCH_F(1.1643);
525703b705cfSriastradh		OUT_BATCH_F(0.0);
525803b705cfSriastradh		OUT_BATCH_F(1.5958);
525903b705cfSriastradh		OUT_BATCH_F(0.0);
526003b705cfSriastradh		/* constant 2: g coefficients */
526103b705cfSriastradh		OUT_BATCH_F(1.1643);
526203b705cfSriastradh		OUT_BATCH_F(-0.39173);
526303b705cfSriastradh		OUT_BATCH_F(-0.81290);
526403b705cfSriastradh		OUT_BATCH_F(0.0);
526503b705cfSriastradh		/* constant 3: b coefficients */
526603b705cfSriastradh		OUT_BATCH_F(1.1643);
526703b705cfSriastradh		OUT_BATCH_F(2.017);
526803b705cfSriastradh		OUT_BATCH_F(0.0);
526903b705cfSriastradh		OUT_BATCH_F(0.0);
527003b705cfSriastradh		/* constant 4: brightness/contrast */
527103b705cfSriastradh		OUT_BATCH_F(video->brightness / 128.0);
527203b705cfSriastradh		OUT_BATCH_F(video->contrast / 255.0);
527303b705cfSriastradh		OUT_BATCH_F(0.0);
527403b705cfSriastradh		OUT_BATCH_F(0.0);
527503b705cfSriastradh		if (state->last_constants &&
527603b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_constants],
527703b705cfSriastradh			   &sna->kgem.batch[rewind],
527803b705cfSriastradh			   22*sizeof(uint32_t)) == 0)
527903b705cfSriastradh			sna->kgem.nbatch = rewind;
528003b705cfSriastradh		else
528103b705cfSriastradh			state->last_constants = rewind;
528203b705cfSriastradh
528303b705cfSriastradh		rewind = sna->kgem.nbatch;
528403b705cfSriastradh		OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
528503b705cfSriastradh		OUT_BATCH(0x00000007);
528603b705cfSriastradh		/* sampler 0 */
528703b705cfSriastradh		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
528803b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
528903b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
529003b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
529103b705cfSriastradh			  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
529203b705cfSriastradh			  SS3_NORMALIZED_COORDS);
529303b705cfSriastradh		OUT_BATCH(0x00000000);
529403b705cfSriastradh		/* sampler 1 */
529503b705cfSriastradh		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
529603b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
529703b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
529803b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
529903b705cfSriastradh			  (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
530003b705cfSriastradh			  SS3_NORMALIZED_COORDS);
530103b705cfSriastradh		OUT_BATCH(0x00000000);
530203b705cfSriastradh		/* sampler 2 */
530303b705cfSriastradh		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
530403b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
530503b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
530603b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
530703b705cfSriastradh			  (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
530803b705cfSriastradh			  SS3_NORMALIZED_COORDS);
530903b705cfSriastradh		OUT_BATCH(0x00000000);
531003b705cfSriastradh		if (state->last_sampler &&
531103b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_sampler],
531203b705cfSriastradh			   &sna->kgem.batch[rewind],
531303b705cfSriastradh			   11*sizeof(uint32_t)) == 0)
531403b705cfSriastradh			sna->kgem.nbatch = rewind;
531503b705cfSriastradh		else
531603b705cfSriastradh			state->last_sampler = rewind;
531703b705cfSriastradh
531803b705cfSriastradh		OUT_BATCH(_3DSTATE_MAP_STATE | 9);
531903b705cfSriastradh		OUT_BATCH(0x00000007);
532003b705cfSriastradh
532103b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
532203b705cfSriastradh					 frame->bo,
532303b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
532403b705cfSriastradh					 0));
532503b705cfSriastradh
532603b705cfSriastradh		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
532703b705cfSriastradh		ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
532803b705cfSriastradh		ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
532903b705cfSriastradh		OUT_BATCH(ms3);
533003b705cfSriastradh		/* check to see if Y has special pitch than normal
533103b705cfSriastradh		 * double u/v pitch, e.g i915 XvMC hw requires at
533203b705cfSriastradh		 * least 1K alignment, so Y pitch might
533303b705cfSriastradh		 * be same as U/V's.*/
533403b705cfSriastradh		if (frame->pitch[1])
533503b705cfSriastradh			OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT);
533603b705cfSriastradh		else
533703b705cfSriastradh			OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT);
533803b705cfSriastradh
533903b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
534003b705cfSriastradh					 frame->bo,
534103b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
534203b705cfSriastradh					 frame->UBufOffset));
534303b705cfSriastradh
534403b705cfSriastradh		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
534503b705cfSriastradh		ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
534603b705cfSriastradh		ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
534703b705cfSriastradh		OUT_BATCH(ms3);
534803b705cfSriastradh		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
534903b705cfSriastradh
535003b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
535103b705cfSriastradh					 frame->bo,
535203b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
535303b705cfSriastradh					 frame->VBufOffset));
535403b705cfSriastradh
535503b705cfSriastradh		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
535603b705cfSriastradh		ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
535703b705cfSriastradh		ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
535803b705cfSriastradh		OUT_BATCH(ms3);
535903b705cfSriastradh		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
536003b705cfSriastradh
536103b705cfSriastradh		id = 1<<31 | 2<<1 | !!video->brightness;
536203b705cfSriastradh		if (state->last_shader != id) {
536303b705cfSriastradh			state->last_shader = id;
536403b705cfSriastradh			id = sna->kgem.nbatch++;
536503b705cfSriastradh
536603b705cfSriastradh			/* Declare samplers */
536703b705cfSriastradh			gen3_fs_dcl(FS_S0);	/* Y */
536803b705cfSriastradh			gen3_fs_dcl(FS_S1);	/* U */
536903b705cfSriastradh			gen3_fs_dcl(FS_S2);	/* V */
537003b705cfSriastradh			gen3_fs_dcl(FS_T0);	/* normalized coords */
537103b705cfSriastradh
537203b705cfSriastradh			/* Load samplers to temporaries. */
537303b705cfSriastradh			gen3_fs_texld(FS_R1, FS_S0, FS_T0);
537403b705cfSriastradh			gen3_fs_texld(FS_R2, FS_S1, FS_T0);
537503b705cfSriastradh			gen3_fs_texld(FS_R3, FS_S2, FS_T0);
537603b705cfSriastradh
537703b705cfSriastradh			/* Move the sampled YUV data in R[123] to the first
537803b705cfSriastradh			 * 3 channels of R0.
537903b705cfSriastradh			 */
538003b705cfSriastradh			gen3_fs_mov_masked(FS_R0, MASK_X,
538103b705cfSriastradh					   gen3_fs_operand_reg(FS_R1));
538203b705cfSriastradh			gen3_fs_mov_masked(FS_R0, MASK_Y,
538303b705cfSriastradh					   gen3_fs_operand_reg(FS_R2));
538403b705cfSriastradh			gen3_fs_mov_masked(FS_R0, MASK_Z,
538503b705cfSriastradh					   gen3_fs_operand_reg(FS_R3));
538603b705cfSriastradh
538703b705cfSriastradh			/* Normalize the YUV data */
538803b705cfSriastradh			gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
538903b705cfSriastradh				    gen3_fs_operand_reg(FS_C0));
539003b705cfSriastradh			/* dot-product the YUV data in R0 by the vectors of
539103b705cfSriastradh			 * coefficients for calculating R, G, and B, storing
539203b705cfSriastradh			 * the results in the R, G, or B channels of the output
539303b705cfSriastradh			 * color.  The OC results are implicitly clamped
539403b705cfSriastradh			 * at the end of the program.
539503b705cfSriastradh			 */
539603b705cfSriastradh			gen3_fs_dp3(FS_OC, MASK_X,
539703b705cfSriastradh				    gen3_fs_operand_reg(FS_R0),
539803b705cfSriastradh				    gen3_fs_operand_reg(FS_C1));
539903b705cfSriastradh			gen3_fs_dp3(FS_OC, MASK_Y,
540003b705cfSriastradh				    gen3_fs_operand_reg(FS_R0),
540103b705cfSriastradh				    gen3_fs_operand_reg(FS_C2));
540203b705cfSriastradh			gen3_fs_dp3(FS_OC, MASK_Z,
540303b705cfSriastradh				    gen3_fs_operand_reg(FS_R0),
540403b705cfSriastradh				    gen3_fs_operand_reg(FS_C3));
540503b705cfSriastradh			/* Set alpha of the output to 1.0, by wiring W to 1
540603b705cfSriastradh			 * and not actually using the source.
540703b705cfSriastradh			 */
540803b705cfSriastradh			gen3_fs_mov_masked(FS_OC, MASK_W,
540903b705cfSriastradh					   gen3_fs_operand_one());
541003b705cfSriastradh
541103b705cfSriastradh			if (video->brightness != 0) {
541203b705cfSriastradh				gen3_fs_add(FS_OC,
541303b705cfSriastradh					    gen3_fs_operand_reg(FS_OC),
541403b705cfSriastradh					    gen3_fs_operand(FS_C4, X, X, X, ZERO));
541503b705cfSriastradh			}
541603b705cfSriastradh
541703b705cfSriastradh			sna->kgem.batch[id] =
541803b705cfSriastradh				_3DSTATE_PIXEL_SHADER_PROGRAM |
541903b705cfSriastradh				(sna->kgem.nbatch - id - 2);
542003b705cfSriastradh		}
542103b705cfSriastradh	}
542203b705cfSriastradh}
542303b705cfSriastradh
542403b705cfSriastradhstatic void
542503b705cfSriastradhgen3_video_get_batch(struct sna *sna, struct kgem_bo *bo)
542603b705cfSriastradh{
542703b705cfSriastradh	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
542803b705cfSriastradh
542903b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, 120) ||
543003b705cfSriastradh	    !kgem_check_reloc(&sna->kgem, 4) ||
543103b705cfSriastradh	    !kgem_check_exec(&sna->kgem, 2)) {
543203b705cfSriastradh		_kgem_submit(&sna->kgem);
543303b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
543403b705cfSriastradh	}
543503b705cfSriastradh
543603b705cfSriastradh	if (sna->render_state.gen3.need_invariant)
543703b705cfSriastradh		gen3_emit_invariant(sna);
543803b705cfSriastradh}
543903b705cfSriastradh
544003b705cfSriastradhstatic int
544103b705cfSriastradhgen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
544203b705cfSriastradh{
544303b705cfSriastradh	int size = floats_per_vertex * 3;
544403b705cfSriastradh	int rem = batch_space(sna) - 1;
544503b705cfSriastradh
544603b705cfSriastradh	if (size * want > rem)
544703b705cfSriastradh		want = rem / size;
544803b705cfSriastradh
544903b705cfSriastradh	return want;
545003b705cfSriastradh}
545103b705cfSriastradh
545203b705cfSriastradhstatic bool
545303b705cfSriastradhgen3_render_video(struct sna *sna,
545403b705cfSriastradh		  struct sna_video *video,
545503b705cfSriastradh		  struct sna_video_frame *frame,
545603b705cfSriastradh		  RegionPtr dstRegion,
545703b705cfSriastradh		  PixmapPtr pixmap)
545803b705cfSriastradh{
545903b705cfSriastradh	struct sna_pixmap *priv = sna_pixmap(pixmap);
546042542f5fSchristos	const BoxRec *pbox = region_rects(dstRegion);
546142542f5fSchristos	int nbox = region_num_rects(dstRegion);
546203b705cfSriastradh	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
546303b705cfSriastradh	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
546403b705cfSriastradh	int src_width = frame->src.x2 - frame->src.x1;
546503b705cfSriastradh	int src_height = frame->src.y2 - frame->src.y1;
546603b705cfSriastradh	float src_offset_x, src_offset_y;
546703b705cfSriastradh	float src_scale_x, src_scale_y;
546803b705cfSriastradh	int pix_xoff, pix_yoff;
546903b705cfSriastradh	struct kgem_bo *dst_bo;
547003b705cfSriastradh	bool bilinear;
547103b705cfSriastradh	int copy = 0;
547203b705cfSriastradh
547303b705cfSriastradh	DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__,
547403b705cfSriastradh	     src_width, src_height, frame->width, frame->height, dst_width, dst_height));
547503b705cfSriastradh
547642542f5fSchristos	assert(priv->gpu_bo);
547703b705cfSriastradh	dst_bo = priv->gpu_bo;
547803b705cfSriastradh
547903b705cfSriastradh	bilinear = src_width != dst_width || src_height != dst_height;
548003b705cfSriastradh
548103b705cfSriastradh	src_scale_x = (float)src_width / dst_width / frame->width;
548203b705cfSriastradh	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
548303b705cfSriastradh
548403b705cfSriastradh	src_scale_y = (float)src_height / dst_height / frame->height;
548503b705cfSriastradh	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
548603b705cfSriastradh	DBG(("%s: src offset (%f, %f), scale (%f, %f)\n",
548703b705cfSriastradh	     __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y));
548803b705cfSriastradh
548903b705cfSriastradh	if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
549003b705cfSriastradh	    !gen3_check_pitch_3d(dst_bo)) {
549103b705cfSriastradh		int bpp = pixmap->drawable.bitsPerPixel;
549203b705cfSriastradh
549303b705cfSriastradh		if (too_large(dst_width, dst_height))
549403b705cfSriastradh			return false;
549503b705cfSriastradh
549603b705cfSriastradh		dst_bo = kgem_create_2d(&sna->kgem,
549703b705cfSriastradh					dst_width, dst_height, bpp,
549803b705cfSriastradh					kgem_choose_tiling(&sna->kgem,
549903b705cfSriastradh							   I915_TILING_X,
550003b705cfSriastradh							   dst_width, dst_height, bpp),
550103b705cfSriastradh					0);
550203b705cfSriastradh		if (!dst_bo)
550303b705cfSriastradh			return false;
550403b705cfSriastradh
550503b705cfSriastradh		pix_xoff = -dstRegion->extents.x1;
550603b705cfSriastradh		pix_yoff = -dstRegion->extents.y1;
550703b705cfSriastradh		copy = 1;
550803b705cfSriastradh	} else {
5509fe8aea9eSmrg		pix_xoff = pix_yoff = 0;
551003b705cfSriastradh		dst_width  = pixmap->drawable.width;
551103b705cfSriastradh		dst_height = pixmap->drawable.height;
551203b705cfSriastradh	}
551303b705cfSriastradh
551403b705cfSriastradh	gen3_video_get_batch(sna, dst_bo);
551503b705cfSriastradh	gen3_emit_video_state(sna, video, frame, pixmap,
551603b705cfSriastradh			      dst_bo, dst_width, dst_height, bilinear);
551703b705cfSriastradh	do {
551803b705cfSriastradh		int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
551903b705cfSriastradh		if (nbox_this_time == 0) {
552003b705cfSriastradh			gen3_video_get_batch(sna, dst_bo);
552103b705cfSriastradh			gen3_emit_video_state(sna, video, frame, pixmap,
552203b705cfSriastradh					      dst_bo, dst_width, dst_height, bilinear);
552303b705cfSriastradh			nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
552403b705cfSriastradh			assert(nbox_this_time);
552503b705cfSriastradh		}
552603b705cfSriastradh		nbox -= nbox_this_time;
552703b705cfSriastradh
552803b705cfSriastradh		OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
552903b705cfSriastradh		do {
553003b705cfSriastradh			int box_x1 = pbox->x1;
553103b705cfSriastradh			int box_y1 = pbox->y1;
553203b705cfSriastradh			int box_x2 = pbox->x2;
553303b705cfSriastradh			int box_y2 = pbox->y2;
553403b705cfSriastradh
553503b705cfSriastradh			pbox++;
553603b705cfSriastradh
553703b705cfSriastradh			DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n",
553803b705cfSriastradh			     __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff,
553903b705cfSriastradh			     box_x1 * src_scale_x + src_offset_x,
554003b705cfSriastradh			     box_y1 * src_scale_y + src_offset_y,
554103b705cfSriastradh			     box_x2 * src_scale_x + src_offset_x,
554203b705cfSriastradh			     box_y2 * src_scale_y + src_offset_y));
554303b705cfSriastradh
554403b705cfSriastradh			/* bottom right */
554503b705cfSriastradh			OUT_BATCH_F(box_x2 + pix_xoff);
554603b705cfSriastradh			OUT_BATCH_F(box_y2 + pix_yoff);
554703b705cfSriastradh			OUT_BATCH_F(box_x2 * src_scale_x + src_offset_x);
554803b705cfSriastradh			OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
554903b705cfSriastradh
555003b705cfSriastradh			/* bottom left */
555103b705cfSriastradh			OUT_BATCH_F(box_x1 + pix_xoff);
555203b705cfSriastradh			OUT_BATCH_F(box_y2 + pix_yoff);
555303b705cfSriastradh			OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
555403b705cfSriastradh			OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
555503b705cfSriastradh
555603b705cfSriastradh			/* top left */
555703b705cfSriastradh			OUT_BATCH_F(box_x1 + pix_xoff);
555803b705cfSriastradh			OUT_BATCH_F(box_y1 + pix_yoff);
555903b705cfSriastradh			OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
556003b705cfSriastradh			OUT_BATCH_F(box_y1 * src_scale_y + src_offset_y);
556103b705cfSriastradh		} while (--nbox_this_time);
556203b705cfSriastradh	} while (nbox);
556303b705cfSriastradh
556403b705cfSriastradh	if (copy) {
556503b705cfSriastradh		sna_blt_copy_boxes(sna, GXcopy,
556603b705cfSriastradh				   dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1,
5567fe8aea9eSmrg				   priv->gpu_bo, 0, 0,
556803b705cfSriastradh				   pixmap->drawable.bitsPerPixel,
556942542f5fSchristos				   region_rects(dstRegion),
557042542f5fSchristos				   region_num_rects(dstRegion));
557103b705cfSriastradh
557203b705cfSriastradh		kgem_bo_destroy(&sna->kgem, dst_bo);
557303b705cfSriastradh	}
557403b705cfSriastradh
5575fe8aea9eSmrg	if (!DAMAGE_IS_ALL(priv->gpu_damage))
5576fe8aea9eSmrg		sna_damage_add(&priv->gpu_damage, dstRegion);
557703b705cfSriastradh
557803b705cfSriastradh	return true;
557903b705cfSriastradh}
558003b705cfSriastradh
558103b705cfSriastradhstatic void
558203b705cfSriastradhgen3_render_copy_setup_source(struct sna_composite_channel *channel,
558342542f5fSchristos			      const DrawableRec *draw,
558403b705cfSriastradh			      struct kgem_bo *bo)
558503b705cfSriastradh{
558603b705cfSriastradh	int i;
558703b705cfSriastradh
558803b705cfSriastradh	channel->u.gen3.type = SHADER_TEXTURE;
558903b705cfSriastradh	channel->filter = gen3_filter(PictFilterNearest);
559003b705cfSriastradh	channel->repeat = gen3_texture_repeat(RepeatNone);
559142542f5fSchristos	channel->width  = draw->width;
559242542f5fSchristos	channel->height = draw->height;
559342542f5fSchristos	channel->scale[0] = 1.f/draw->width;
559442542f5fSchristos	channel->scale[1] = 1.f/draw->height;
559503b705cfSriastradh	channel->offset[0] = 0;
559603b705cfSriastradh	channel->offset[1] = 0;
559703b705cfSriastradh
559842542f5fSchristos	channel->pict_format = sna_format_for_depth(draw->depth);
559903b705cfSriastradh	if (!gen3_composite_channel_set_format(channel, channel->pict_format)) {
560003b705cfSriastradh		for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
560103b705cfSriastradh			if (gen3_tex_formats[i].xfmt == channel->pict_format) {
560203b705cfSriastradh				channel->card_format = gen3_tex_formats[i].card_fmt;
560303b705cfSriastradh				channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
560403b705cfSriastradh				channel->alpha_fixup = true;
560503b705cfSriastradh				break;
560603b705cfSriastradh			}
560703b705cfSriastradh		}
560803b705cfSriastradh	}
560903b705cfSriastradh	assert(channel->card_format);
561003b705cfSriastradh
561103b705cfSriastradh	channel->bo = bo;
561203b705cfSriastradh	channel->is_affine = 1;
561303b705cfSriastradh}
561403b705cfSriastradh
561503b705cfSriastradhstatic bool
561603b705cfSriastradhgen3_render_copy_boxes(struct sna *sna, uint8_t alu,
561742542f5fSchristos		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
561842542f5fSchristos		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
561903b705cfSriastradh		       const BoxRec *box, int n, unsigned flags)
562003b705cfSriastradh{
562103b705cfSriastradh	struct sna_composite_op tmp;
562203b705cfSriastradh
562303b705cfSriastradh#if NO_COPY_BOXES
562442542f5fSchristos	if (!sna_blt_compare_depth(src, dst))
562503b705cfSriastradh		return false;
562603b705cfSriastradh
562703b705cfSriastradh	return sna_blt_copy_boxes(sna, alu,
562803b705cfSriastradh				  src_bo, src_dx, src_dy,
562903b705cfSriastradh				  dst_bo, dst_dx, dst_dy,
563042542f5fSchristos				  dst->bitsPerPixel,
563103b705cfSriastradh				  box, n);
563203b705cfSriastradh#endif
563303b705cfSriastradh
563403b705cfSriastradh	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
563503b705cfSriastradh	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
563603b705cfSriastradh
563742542f5fSchristos	if (sna_blt_compare_depth(src, dst) &&
563803b705cfSriastradh	    sna_blt_copy_boxes(sna, alu,
563903b705cfSriastradh			       src_bo, src_dx, src_dy,
564003b705cfSriastradh			       dst_bo, dst_dx, dst_dy,
564142542f5fSchristos			       dst->bitsPerPixel,
564203b705cfSriastradh			       box, n))
564303b705cfSriastradh		return true;
564403b705cfSriastradh
564503b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
564603b705cfSriastradh	    src_bo == dst_bo || /* XXX handle overlap using 3D ? */
564703b705cfSriastradh	    src_bo->pitch > MAX_3D_PITCH ||
564842542f5fSchristos	    too_large(src->width, src->height)) {
564903b705cfSriastradhfallback_blt:
565003b705cfSriastradh		if (!kgem_bo_can_blt(&sna->kgem, src_bo) ||
565103b705cfSriastradh		    !kgem_bo_can_blt(&sna->kgem, dst_bo))
565203b705cfSriastradh			return false;
565303b705cfSriastradh
565403b705cfSriastradh		return sna_blt_copy_boxes_fallback(sna, alu,
565503b705cfSriastradh						   src, src_bo, src_dx, src_dy,
565603b705cfSriastradh						   dst, dst_bo, dst_dx, dst_dy,
565703b705cfSriastradh						   box, n);
565803b705cfSriastradh	}
565903b705cfSriastradh
566003b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
566103b705cfSriastradh		kgem_submit(&sna->kgem);
566203b705cfSriastradh		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
566303b705cfSriastradh			goto fallback_blt;
566403b705cfSriastradh	}
566503b705cfSriastradh
566603b705cfSriastradh	memset(&tmp, 0, sizeof(tmp));
566703b705cfSriastradh	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
566803b705cfSriastradh
566942542f5fSchristos	tmp.dst.pixmap = (PixmapPtr)dst;
567042542f5fSchristos	tmp.dst.width = dst->width;
567142542f5fSchristos	tmp.dst.height = dst->height;
567242542f5fSchristos	tmp.dst.format = sna_format_for_depth(dst->depth);
567303b705cfSriastradh	tmp.dst.bo = dst_bo;
567403b705cfSriastradh	tmp.dst.x = tmp.dst.y = 0;
567503b705cfSriastradh	tmp.damage = NULL;
567603b705cfSriastradh
567703b705cfSriastradh	sna_render_composite_redirect_init(&tmp);
567803b705cfSriastradh	if (too_large(tmp.dst.width, tmp.dst.height) ||
567903b705cfSriastradh	    dst_bo->pitch > MAX_3D_PITCH) {
568003b705cfSriastradh		BoxRec extents = box[0];
568103b705cfSriastradh		int i;
568203b705cfSriastradh
568303b705cfSriastradh		for (i = 1; i < n; i++) {
568403b705cfSriastradh			if (box[i].x1 < extents.x1)
568503b705cfSriastradh				extents.x1 = box[i].x1;
568603b705cfSriastradh			if (box[i].y1 < extents.y1)
568703b705cfSriastradh				extents.y1 = box[i].y1;
568803b705cfSriastradh
568903b705cfSriastradh			if (box[i].x2 > extents.x2)
569003b705cfSriastradh				extents.x2 = box[i].x2;
569103b705cfSriastradh			if (box[i].y2 > extents.y2)
569203b705cfSriastradh				extents.y2 = box[i].y2;
569303b705cfSriastradh		}
569403b705cfSriastradh		if (!sna_render_composite_redirect(sna, &tmp,
569503b705cfSriastradh						   extents.x1 + dst_dx,
569603b705cfSriastradh						   extents.y1 + dst_dy,
569703b705cfSriastradh						   extents.x2 - extents.x1,
569803b705cfSriastradh						   extents.y2 - extents.y1,
569903b705cfSriastradh						   n > 1))
570003b705cfSriastradh			goto fallback_tiled;
570103b705cfSriastradh	}
570203b705cfSriastradh
570303b705cfSriastradh	gen3_render_copy_setup_source(&tmp.src, src, src_bo);
570403b705cfSriastradh
570503b705cfSriastradh	tmp.floats_per_vertex = 4;
570603b705cfSriastradh	tmp.floats_per_rect = 12;
570703b705cfSriastradh	tmp.mask.bo = NULL;
570803b705cfSriastradh	tmp.mask.u.gen3.type = SHADER_NONE;
570903b705cfSriastradh
571003b705cfSriastradh	dst_dx += tmp.dst.x;
571103b705cfSriastradh	dst_dy += tmp.dst.y;
571203b705cfSriastradh	tmp.dst.x = tmp.dst.y = 0;
571303b705cfSriastradh
571403b705cfSriastradh	gen3_align_vertex(sna, &tmp);
571542542f5fSchristos	gen3_emit_composite_state(sna, &tmp);
571603b705cfSriastradh
571703b705cfSriastradh	do {
571803b705cfSriastradh		int n_this_time;
571903b705cfSriastradh
572003b705cfSriastradh		n_this_time = gen3_get_rectangles(sna, &tmp, n);
572103b705cfSriastradh		n -= n_this_time;
572203b705cfSriastradh
572303b705cfSriastradh		do {
572403b705cfSriastradh			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
572503b705cfSriastradh			     box->x1 + src_dx, box->y1 + src_dy,
572603b705cfSriastradh			     box->x1 + dst_dx, box->y1 + dst_dy,
572703b705cfSriastradh			     box->x2 - box->x1, box->y2 - box->y1));
572803b705cfSriastradh			OUT_VERTEX(box->x2 + dst_dx);
572903b705cfSriastradh			OUT_VERTEX(box->y2 + dst_dy);
573003b705cfSriastradh			OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
573103b705cfSriastradh			OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
573203b705cfSriastradh
573303b705cfSriastradh			OUT_VERTEX(box->x1 + dst_dx);
573403b705cfSriastradh			OUT_VERTEX(box->y2 + dst_dy);
573503b705cfSriastradh			OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
573603b705cfSriastradh			OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
573703b705cfSriastradh
573803b705cfSriastradh			OUT_VERTEX(box->x1 + dst_dx);
573903b705cfSriastradh			OUT_VERTEX(box->y1 + dst_dy);
574003b705cfSriastradh			OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
574103b705cfSriastradh			OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
574203b705cfSriastradh
574303b705cfSriastradh			box++;
574403b705cfSriastradh		} while (--n_this_time);
574503b705cfSriastradh	} while (n);
574603b705cfSriastradh
574703b705cfSriastradh	gen3_vertex_flush(sna);
574803b705cfSriastradh	sna_render_composite_redirect_done(sna, &tmp);
574903b705cfSriastradh	return true;
575003b705cfSriastradh
575103b705cfSriastradhfallback_tiled:
575203b705cfSriastradh	return sna_tiling_copy_boxes(sna, alu,
575303b705cfSriastradh				     src, src_bo, src_dx, src_dy,
575403b705cfSriastradh				     dst, dst_bo, dst_dx, dst_dy,
575503b705cfSriastradh				     box, n);
575603b705cfSriastradh}
575703b705cfSriastradh
575803b705cfSriastradhstatic void
575903b705cfSriastradhgen3_render_copy_blt(struct sna *sna,
576003b705cfSriastradh		     const struct sna_copy_op *op,
576103b705cfSriastradh		     int16_t sx, int16_t sy,
576203b705cfSriastradh		     int16_t w, int16_t h,
576303b705cfSriastradh		     int16_t dx, int16_t dy)
576403b705cfSriastradh{
576503b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
576603b705cfSriastradh
576703b705cfSriastradh	OUT_VERTEX(dx+w);
576803b705cfSriastradh	OUT_VERTEX(dy+h);
576903b705cfSriastradh	OUT_VERTEX((sx+w)*op->base.src.scale[0]);
577003b705cfSriastradh	OUT_VERTEX((sy+h)*op->base.src.scale[1]);
577103b705cfSriastradh
577203b705cfSriastradh	OUT_VERTEX(dx);
577303b705cfSriastradh	OUT_VERTEX(dy+h);
577403b705cfSriastradh	OUT_VERTEX(sx*op->base.src.scale[0]);
577503b705cfSriastradh	OUT_VERTEX((sy+h)*op->base.src.scale[1]);
577603b705cfSriastradh
577703b705cfSriastradh	OUT_VERTEX(dx);
577803b705cfSriastradh	OUT_VERTEX(dy);
577903b705cfSriastradh	OUT_VERTEX(sx*op->base.src.scale[0]);
578003b705cfSriastradh	OUT_VERTEX(sy*op->base.src.scale[1]);
578103b705cfSriastradh}
578203b705cfSriastradh
578303b705cfSriastradhstatic void
578403b705cfSriastradhgen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
578503b705cfSriastradh{
578603b705cfSriastradh	if (sna->render.vertex_offset)
578703b705cfSriastradh		gen3_vertex_flush(sna);
578803b705cfSriastradh}
578903b705cfSriastradh
579003b705cfSriastradhstatic bool
579103b705cfSriastradhgen3_render_copy(struct sna *sna, uint8_t alu,
579203b705cfSriastradh		 PixmapPtr src, struct kgem_bo *src_bo,
579303b705cfSriastradh		 PixmapPtr dst, struct kgem_bo *dst_bo,
579403b705cfSriastradh		 struct sna_copy_op *tmp)
579503b705cfSriastradh{
579603b705cfSriastradh#if NO_COPY
579703b705cfSriastradh	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
579803b705cfSriastradh		return false;
579903b705cfSriastradh
580003b705cfSriastradh	return sna_blt_copy(sna, alu,
580103b705cfSriastradh			    src_bo, dst_bo,
580203b705cfSriastradh			    dst->drawable.bitsPerPixel,
580303b705cfSriastradh			    tmp);
580403b705cfSriastradh#endif
580503b705cfSriastradh
580603b705cfSriastradh	/* Prefer to use the BLT */
580703b705cfSriastradh	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
580803b705cfSriastradh	    sna_blt_copy(sna, alu,
580903b705cfSriastradh			 src_bo, dst_bo,
581003b705cfSriastradh			 dst->drawable.bitsPerPixel,
581103b705cfSriastradh			 tmp))
581203b705cfSriastradh		return true;
581303b705cfSriastradh
581403b705cfSriastradh	/* Must use the BLT if we can't RENDER... */
581503b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
581603b705cfSriastradh	    too_large(src->drawable.width, src->drawable.height) ||
581703b705cfSriastradh	    too_large(dst->drawable.width, dst->drawable.height) ||
581803b705cfSriastradh	    src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch > MAX_3D_PITCH) {
581903b705cfSriastradhfallback:
582003b705cfSriastradh		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
582103b705cfSriastradh			return false;
582203b705cfSriastradh
582303b705cfSriastradh		return sna_blt_copy(sna, alu, src_bo, dst_bo,
582403b705cfSriastradh				    dst->drawable.bitsPerPixel,
582503b705cfSriastradh				    tmp);
582603b705cfSriastradh	}
582703b705cfSriastradh
582803b705cfSriastradh	tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
582903b705cfSriastradh
583003b705cfSriastradh	tmp->base.dst.pixmap = dst;
583103b705cfSriastradh	tmp->base.dst.width = dst->drawable.width;
583203b705cfSriastradh	tmp->base.dst.height = dst->drawable.height;
583303b705cfSriastradh	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
583403b705cfSriastradh	tmp->base.dst.bo = dst_bo;
583503b705cfSriastradh
583642542f5fSchristos	gen3_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo);
583703b705cfSriastradh
583803b705cfSriastradh	tmp->base.floats_per_vertex = 4;
583903b705cfSriastradh	tmp->base.floats_per_rect = 12;
584003b705cfSriastradh	tmp->base.mask.bo = NULL;
584103b705cfSriastradh	tmp->base.mask.u.gen3.type = SHADER_NONE;
584203b705cfSriastradh
584303b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
584403b705cfSriastradh		kgem_submit(&sna->kgem);
584503b705cfSriastradh		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
584603b705cfSriastradh			goto fallback;
584703b705cfSriastradh	}
584803b705cfSriastradh
584903b705cfSriastradh	tmp->blt  = gen3_render_copy_blt;
585003b705cfSriastradh	tmp->done = gen3_render_copy_done;
585103b705cfSriastradh
585203b705cfSriastradh	gen3_align_vertex(sna, &tmp->base);
585342542f5fSchristos	gen3_emit_composite_state(sna, &tmp->base);
585403b705cfSriastradh	return true;
585503b705cfSriastradh}
585603b705cfSriastradh
585703b705cfSriastradhstatic bool
585803b705cfSriastradhgen3_render_fill_boxes_try_blt(struct sna *sna,
585903b705cfSriastradh			       CARD8 op, PictFormat format,
586003b705cfSriastradh			       const xRenderColor *color,
586142542f5fSchristos			       const DrawableRec *dst, struct kgem_bo *dst_bo,
586203b705cfSriastradh			       const BoxRec *box, int n)
586303b705cfSriastradh{
586403b705cfSriastradh	uint8_t alu;
586503b705cfSriastradh	uint32_t pixel;
586603b705cfSriastradh
586703b705cfSriastradh	if (dst_bo->tiling == I915_TILING_Y) {
586803b705cfSriastradh		DBG(("%s: y-tiling, can't blit\n", __FUNCTION__));
586942542f5fSchristos		assert(!too_large(dst->width, dst->height));
587003b705cfSriastradh		return false;
587103b705cfSriastradh	}
587203b705cfSriastradh
587303b705cfSriastradh	if (op > PictOpSrc)
587403b705cfSriastradh		return false;
587503b705cfSriastradh
587603b705cfSriastradh	if (op == PictOpClear) {
587703b705cfSriastradh		alu = GXclear;
587803b705cfSriastradh		pixel = 0;
587903b705cfSriastradh	} else if (!sna_get_pixel_from_rgba(&pixel,
588003b705cfSriastradh					    color->red,
588103b705cfSriastradh					    color->green,
588203b705cfSriastradh					    color->blue,
588303b705cfSriastradh					    color->alpha,
588403b705cfSriastradh					    format))
588503b705cfSriastradh		return false;
588603b705cfSriastradh	else
588703b705cfSriastradh		alu = GXcopy;
588803b705cfSriastradh
588903b705cfSriastradh	return sna_blt_fill_boxes(sna, alu,
589042542f5fSchristos				  dst_bo, dst->bitsPerPixel,
589103b705cfSriastradh				  pixel, box, n);
589203b705cfSriastradh}
589303b705cfSriastradh
589403b705cfSriastradhstatic inline bool prefer_fill_blt(struct sna *sna)
589503b705cfSriastradh{
589603b705cfSriastradh#if PREFER_BLT_FILL
589703b705cfSriastradh	return true;
589803b705cfSriastradh#else
589903b705cfSriastradh	return sna->kgem.mode != KGEM_RENDER;
590003b705cfSriastradh#endif
590103b705cfSriastradh}
590203b705cfSriastradh
590303b705cfSriastradhstatic bool
590403b705cfSriastradhgen3_render_fill_boxes(struct sna *sna,
590503b705cfSriastradh		       CARD8 op,
590603b705cfSriastradh		       PictFormat format,
590703b705cfSriastradh		       const xRenderColor *color,
590842542f5fSchristos		       const DrawableRec *dst, struct kgem_bo *dst_bo,
590903b705cfSriastradh		       const BoxRec *box, int n)
591003b705cfSriastradh{
591103b705cfSriastradh	struct sna_composite_op tmp;
591203b705cfSriastradh	uint32_t pixel;
591303b705cfSriastradh
591403b705cfSriastradh	if (op >= ARRAY_SIZE(gen3_blend_op)) {
591503b705cfSriastradh		DBG(("%s: fallback due to unhandled blend op: %d\n",
591603b705cfSriastradh		     __FUNCTION__, op));
591703b705cfSriastradh		return false;
591803b705cfSriastradh	}
591903b705cfSriastradh
592003b705cfSriastradh#if NO_FILL_BOXES
592103b705cfSriastradh	return gen3_render_fill_boxes_try_blt(sna, op, format, color,
592203b705cfSriastradh					      dst, dst_bo,
592303b705cfSriastradh					      box, n);
592403b705cfSriastradh#endif
592503b705cfSriastradh
592603b705cfSriastradh	DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
592703b705cfSriastradh	     __FUNCTION__, op, (int)format,
592803b705cfSriastradh	     color->red, color->green, color->blue, color->alpha));
592903b705cfSriastradh
593042542f5fSchristos	if (too_large(dst->width, dst->height) ||
593103b705cfSriastradh	    dst_bo->pitch > MAX_3D_PITCH ||
593203b705cfSriastradh	    !gen3_check_dst_format(format)) {
593303b705cfSriastradh		DBG(("%s: try blt, too large or incompatible destination\n",
593403b705cfSriastradh		     __FUNCTION__));
593503b705cfSriastradh		if (gen3_render_fill_boxes_try_blt(sna, op, format, color,
593603b705cfSriastradh						   dst, dst_bo,
593703b705cfSriastradh						   box, n))
593803b705cfSriastradh			return true;
593903b705cfSriastradh
594003b705cfSriastradh		if (!gen3_check_dst_format(format))
594103b705cfSriastradh			return false;
594203b705cfSriastradh
594303b705cfSriastradh		return sna_tiling_fill_boxes(sna, op, format, color,
594403b705cfSriastradh					     dst, dst_bo, box, n);
594503b705cfSriastradh	}
594603b705cfSriastradh
594703b705cfSriastradh	if (prefer_fill_blt(sna) &&
594803b705cfSriastradh	    gen3_render_fill_boxes_try_blt(sna, op, format, color,
594903b705cfSriastradh					   dst, dst_bo,
595003b705cfSriastradh					   box, n))
595103b705cfSriastradh		return true;
595203b705cfSriastradh
595303b705cfSriastradh	if (op == PictOpClear) {
595403b705cfSriastradh		pixel = 0;
595503b705cfSriastradh	} else {
595603b705cfSriastradh		if (!sna_get_pixel_from_rgba(&pixel,
595703b705cfSriastradh					     color->red,
595803b705cfSriastradh					     color->green,
595903b705cfSriastradh					     color->blue,
596003b705cfSriastradh					     color->alpha,
596103b705cfSriastradh					     PICT_a8r8g8b8)) {
596203b705cfSriastradh			assert(0);
596303b705cfSriastradh			return false;
596403b705cfSriastradh		}
596503b705cfSriastradh	}
596642542f5fSchristos	DBG(("%s: using shader for op=%d, format=%08x, pixel=%08x\n",
596703b705cfSriastradh	     __FUNCTION__, op, (int)format, pixel));
596803b705cfSriastradh
596903b705cfSriastradh	tmp.op = op;
597042542f5fSchristos	tmp.dst.pixmap = (PixmapPtr)dst;
597142542f5fSchristos	tmp.dst.width = dst->width;
597242542f5fSchristos	tmp.dst.height = dst->height;
597303b705cfSriastradh	tmp.dst.format = format;
597403b705cfSriastradh	tmp.dst.bo = dst_bo;
597503b705cfSriastradh	tmp.damage = NULL;
597603b705cfSriastradh	tmp.floats_per_vertex = 2;
597703b705cfSriastradh	tmp.floats_per_rect = 6;
597803b705cfSriastradh	tmp.rb_reversed = 0;
597903b705cfSriastradh	tmp.has_component_alpha = 0;
598003b705cfSriastradh	tmp.need_magic_ca_pass = false;
598103b705cfSriastradh
598203b705cfSriastradh	gen3_init_solid(&tmp.src, pixel);
598303b705cfSriastradh	tmp.mask.bo = NULL;
598403b705cfSriastradh	tmp.mask.u.gen3.type = SHADER_NONE;
598503b705cfSriastradh	tmp.u.gen3.num_constants = 0;
598603b705cfSriastradh
598703b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
598803b705cfSriastradh		kgem_submit(&sna->kgem);
598942542f5fSchristos		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
599042542f5fSchristos			return false;
599103b705cfSriastradh	}
599203b705cfSriastradh
599303b705cfSriastradh	gen3_align_vertex(sna, &tmp);
599442542f5fSchristos	gen3_emit_composite_state(sna, &tmp);
599503b705cfSriastradh
599603b705cfSriastradh	do {
599703b705cfSriastradh		int n_this_time;
599803b705cfSriastradh
599903b705cfSriastradh		n_this_time = gen3_get_rectangles(sna, &tmp, n);
600003b705cfSriastradh		n -= n_this_time;
600103b705cfSriastradh
600203b705cfSriastradh		do {
600303b705cfSriastradh			DBG(("	(%d, %d), (%d, %d): %x\n",
600403b705cfSriastradh			     box->x1, box->y1, box->x2, box->y2, pixel));
600503b705cfSriastradh			OUT_VERTEX(box->x2);
600603b705cfSriastradh			OUT_VERTEX(box->y2);
600703b705cfSriastradh			OUT_VERTEX(box->x1);
600803b705cfSriastradh			OUT_VERTEX(box->y2);
600903b705cfSriastradh			OUT_VERTEX(box->x1);
601003b705cfSriastradh			OUT_VERTEX(box->y1);
601103b705cfSriastradh			box++;
601203b705cfSriastradh		} while (--n_this_time);
601303b705cfSriastradh	} while (n);
601403b705cfSriastradh
601503b705cfSriastradh	gen3_vertex_flush(sna);
601603b705cfSriastradh	return true;
601703b705cfSriastradh}
601803b705cfSriastradh
601903b705cfSriastradhstatic void
602003b705cfSriastradhgen3_render_fill_op_blt(struct sna *sna,
602103b705cfSriastradh			const struct sna_fill_op *op,
602203b705cfSriastradh			int16_t x, int16_t y, int16_t w, int16_t h)
602303b705cfSriastradh{
602403b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
602503b705cfSriastradh
602603b705cfSriastradh	OUT_VERTEX(x+w);
602703b705cfSriastradh	OUT_VERTEX(y+h);
602803b705cfSriastradh	OUT_VERTEX(x);
602903b705cfSriastradh	OUT_VERTEX(y+h);
603003b705cfSriastradh	OUT_VERTEX(x);
603103b705cfSriastradh	OUT_VERTEX(y);
603203b705cfSriastradh}
603303b705cfSriastradh
603403b705cfSriastradhfastcall static void
603503b705cfSriastradhgen3_render_fill_op_box(struct sna *sna,
603603b705cfSriastradh			const struct sna_fill_op *op,
603703b705cfSriastradh			const BoxRec *box)
603803b705cfSriastradh{
603903b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
604003b705cfSriastradh
604103b705cfSriastradh	OUT_VERTEX(box->x2);
604203b705cfSriastradh	OUT_VERTEX(box->y2);
604303b705cfSriastradh	OUT_VERTEX(box->x1);
604403b705cfSriastradh	OUT_VERTEX(box->y2);
604503b705cfSriastradh	OUT_VERTEX(box->x1);
604603b705cfSriastradh	OUT_VERTEX(box->y1);
604703b705cfSriastradh}
604803b705cfSriastradh
604903b705cfSriastradhfastcall static void
605003b705cfSriastradhgen3_render_fill_op_boxes(struct sna *sna,
605103b705cfSriastradh			  const struct sna_fill_op *op,
605203b705cfSriastradh			  const BoxRec *box,
605303b705cfSriastradh			  int nbox)
605403b705cfSriastradh{
605503b705cfSriastradh	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
605603b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2, nbox));
605703b705cfSriastradh
605803b705cfSriastradh	do {
605903b705cfSriastradh		int nbox_this_time;
606003b705cfSriastradh
606103b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
606203b705cfSriastradh		nbox -= nbox_this_time;
606303b705cfSriastradh
606403b705cfSriastradh		do {
606503b705cfSriastradh			OUT_VERTEX(box->x2);
606603b705cfSriastradh			OUT_VERTEX(box->y2);
606703b705cfSriastradh			OUT_VERTEX(box->x1);
606803b705cfSriastradh			OUT_VERTEX(box->y2);
606903b705cfSriastradh			OUT_VERTEX(box->x1);
607003b705cfSriastradh			OUT_VERTEX(box->y1);
607103b705cfSriastradh			box++;
607203b705cfSriastradh		} while (--nbox_this_time);
607303b705cfSriastradh	} while (nbox);
607403b705cfSriastradh}
607503b705cfSriastradh
607603b705cfSriastradhstatic void
607703b705cfSriastradhgen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
607803b705cfSriastradh{
607903b705cfSriastradh	if (sna->render.vertex_offset)
608003b705cfSriastradh		gen3_vertex_flush(sna);
608103b705cfSriastradh}
608203b705cfSriastradh
608303b705cfSriastradhstatic bool
608403b705cfSriastradhgen3_render_fill(struct sna *sna, uint8_t alu,
608503b705cfSriastradh		 PixmapPtr dst, struct kgem_bo *dst_bo,
608642542f5fSchristos		 uint32_t color, unsigned flags,
608703b705cfSriastradh		 struct sna_fill_op *tmp)
608803b705cfSriastradh{
608903b705cfSriastradh#if NO_FILL
609003b705cfSriastradh	return sna_blt_fill(sna, alu,
609103b705cfSriastradh			    dst_bo, dst->drawable.bitsPerPixel,
609203b705cfSriastradh			    color,
609303b705cfSriastradh			    tmp);
609403b705cfSriastradh#endif
609503b705cfSriastradh
609603b705cfSriastradh	/* Prefer to use the BLT if already engaged */
609703b705cfSriastradh	if (prefer_fill_blt(sna) &&
609803b705cfSriastradh	    sna_blt_fill(sna, alu,
609903b705cfSriastradh			 dst_bo, dst->drawable.bitsPerPixel,
610003b705cfSriastradh			 color,
610103b705cfSriastradh			 tmp))
610203b705cfSriastradh		return true;
610303b705cfSriastradh
610403b705cfSriastradh	/* Must use the BLT if we can't RENDER... */
610503b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
610603b705cfSriastradh	    too_large(dst->drawable.width, dst->drawable.height) ||
610703b705cfSriastradh	    dst_bo->pitch > MAX_3D_PITCH)
610803b705cfSriastradh		return sna_blt_fill(sna, alu,
610903b705cfSriastradh				    dst_bo, dst->drawable.bitsPerPixel,
611003b705cfSriastradh				    color,
611103b705cfSriastradh				    tmp);
611203b705cfSriastradh
611303b705cfSriastradh	if (alu == GXclear)
611403b705cfSriastradh		color = 0;
611503b705cfSriastradh
611603b705cfSriastradh	tmp->base.op = color == 0 ? PictOpClear : PictOpSrc;
611703b705cfSriastradh	tmp->base.dst.pixmap = dst;
611803b705cfSriastradh	tmp->base.dst.width = dst->drawable.width;
611903b705cfSriastradh	tmp->base.dst.height = dst->drawable.height;
612003b705cfSriastradh	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
612103b705cfSriastradh	tmp->base.dst.bo = dst_bo;
612203b705cfSriastradh	tmp->base.floats_per_vertex = 2;
612303b705cfSriastradh	tmp->base.floats_per_rect = 6;
612403b705cfSriastradh	tmp->base.need_magic_ca_pass = 0;
612503b705cfSriastradh	tmp->base.has_component_alpha = 0;
612603b705cfSriastradh	tmp->base.rb_reversed = 0;
612703b705cfSriastradh
612803b705cfSriastradh	gen3_init_solid(&tmp->base.src,
612903b705cfSriastradh			sna_rgba_for_color(color, dst->drawable.depth));
613003b705cfSriastradh	tmp->base.mask.bo = NULL;
613103b705cfSriastradh	tmp->base.mask.u.gen3.type = SHADER_NONE;
613203b705cfSriastradh	tmp->base.u.gen3.num_constants = 0;
613303b705cfSriastradh
613403b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
613503b705cfSriastradh		kgem_submit(&sna->kgem);
613642542f5fSchristos		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
613742542f5fSchristos			return false;
613803b705cfSriastradh	}
613903b705cfSriastradh
614003b705cfSriastradh	tmp->blt   = gen3_render_fill_op_blt;
614103b705cfSriastradh	tmp->box   = gen3_render_fill_op_box;
614203b705cfSriastradh	tmp->boxes = gen3_render_fill_op_boxes;
614342542f5fSchristos	tmp->points = NULL;
614403b705cfSriastradh	tmp->done  = gen3_render_fill_op_done;
614503b705cfSriastradh
614603b705cfSriastradh	gen3_align_vertex(sna, &tmp->base);
614742542f5fSchristos	gen3_emit_composite_state(sna, &tmp->base);
614803b705cfSriastradh	return true;
614903b705cfSriastradh}
615003b705cfSriastradh
615103b705cfSriastradhstatic bool
615203b705cfSriastradhgen3_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
615303b705cfSriastradh			     uint32_t color,
615403b705cfSriastradh			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
615503b705cfSriastradh			     uint8_t alu)
615603b705cfSriastradh{
615703b705cfSriastradh	BoxRec box;
615803b705cfSriastradh
615903b705cfSriastradh	box.x1 = x1;
616003b705cfSriastradh	box.y1 = y1;
616103b705cfSriastradh	box.x2 = x2;
616203b705cfSriastradh	box.y2 = y2;
616303b705cfSriastradh
616403b705cfSriastradh	return sna_blt_fill_boxes(sna, alu,
616503b705cfSriastradh				  bo, dst->drawable.bitsPerPixel,
616603b705cfSriastradh				  color, &box, 1);
616703b705cfSriastradh}
616803b705cfSriastradh
616903b705cfSriastradhstatic bool
617003b705cfSriastradhgen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
617103b705cfSriastradh		     uint32_t color,
617203b705cfSriastradh		     int16_t x1, int16_t y1,
617303b705cfSriastradh		     int16_t x2, int16_t y2,
617403b705cfSriastradh		     uint8_t alu)
617503b705cfSriastradh{
617603b705cfSriastradh	struct sna_composite_op tmp;
617703b705cfSriastradh
617803b705cfSriastradh#if NO_FILL_ONE
617903b705cfSriastradh	return gen3_render_fill_one_try_blt(sna, dst, bo, color,
618003b705cfSriastradh					    x1, y1, x2, y2, alu);
618103b705cfSriastradh#endif
618203b705cfSriastradh
618303b705cfSriastradh	/* Prefer to use the BLT if already engaged */
618403b705cfSriastradh	if (prefer_fill_blt(sna) &&
618503b705cfSriastradh	    gen3_render_fill_one_try_blt(sna, dst, bo, color,
618603b705cfSriastradh					 x1, y1, x2, y2, alu))
618703b705cfSriastradh		return true;
618803b705cfSriastradh
618903b705cfSriastradh	/* Must use the BLT if we can't RENDER... */
619003b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
619103b705cfSriastradh	    too_large(dst->drawable.width, dst->drawable.height) ||
619203b705cfSriastradh	    bo->pitch > MAX_3D_PITCH)
619303b705cfSriastradh		return gen3_render_fill_one_try_blt(sna, dst, bo, color,
619403b705cfSriastradh						    x1, y1, x2, y2, alu);
619503b705cfSriastradh
619603b705cfSriastradh	if (alu == GXclear)
619703b705cfSriastradh		color = 0;
619803b705cfSriastradh
619903b705cfSriastradh	tmp.op = color == 0 ? PictOpClear : PictOpSrc;
620003b705cfSriastradh	tmp.dst.pixmap = dst;
620103b705cfSriastradh	tmp.dst.width = dst->drawable.width;
620203b705cfSriastradh	tmp.dst.height = dst->drawable.height;
620303b705cfSriastradh	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
620403b705cfSriastradh	tmp.dst.bo = bo;
620503b705cfSriastradh	tmp.floats_per_vertex = 2;
620603b705cfSriastradh	tmp.floats_per_rect = 6;
620703b705cfSriastradh	tmp.need_magic_ca_pass = 0;
620803b705cfSriastradh	tmp.has_component_alpha = 0;
620903b705cfSriastradh	tmp.rb_reversed = 0;
621003b705cfSriastradh
621103b705cfSriastradh	gen3_init_solid(&tmp.src,
621203b705cfSriastradh			sna_rgba_for_color(color, dst->drawable.depth));
621303b705cfSriastradh	tmp.mask.bo = NULL;
621403b705cfSriastradh	tmp.mask.u.gen3.type = SHADER_NONE;
621503b705cfSriastradh	tmp.u.gen3.num_constants = 0;
621603b705cfSriastradh
621703b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
621803b705cfSriastradh		kgem_submit(&sna->kgem);
621942542f5fSchristos
622003b705cfSriastradh		if (gen3_render_fill_one_try_blt(sna, dst, bo, color,
622103b705cfSriastradh						 x1, y1, x2, y2, alu))
622203b705cfSriastradh			return true;
622342542f5fSchristos
622442542f5fSchristos		if (!kgem_check_bo(&sna->kgem, bo, NULL))
622542542f5fSchristos			return false;
622603b705cfSriastradh	}
622703b705cfSriastradh
622803b705cfSriastradh	gen3_align_vertex(sna, &tmp);
622942542f5fSchristos	gen3_emit_composite_state(sna, &tmp);
623003b705cfSriastradh	gen3_get_rectangles(sna, &tmp, 1);
623103b705cfSriastradh	DBG(("	(%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color));
623203b705cfSriastradh	OUT_VERTEX(x2);
623303b705cfSriastradh	OUT_VERTEX(y2);
623403b705cfSriastradh	OUT_VERTEX(x1);
623503b705cfSriastradh	OUT_VERTEX(y2);
623603b705cfSriastradh	OUT_VERTEX(x1);
623703b705cfSriastradh	OUT_VERTEX(y1);
623803b705cfSriastradh	gen3_vertex_flush(sna);
623903b705cfSriastradh
624003b705cfSriastradh	return true;
624103b705cfSriastradh}
624203b705cfSriastradh
624303b705cfSriastradhstatic void gen3_render_flush(struct sna *sna)
624403b705cfSriastradh{
624503b705cfSriastradh	gen3_vertex_close(sna);
624603b705cfSriastradh
624703b705cfSriastradh	assert(sna->render.vertex_reloc[0] == 0);
624803b705cfSriastradh	assert(sna->render.vertex_offset == 0);
624903b705cfSriastradh}
625003b705cfSriastradh
625103b705cfSriastradhstatic void
625203b705cfSriastradhgen3_render_fini(struct sna *sna)
625303b705cfSriastradh{
625403b705cfSriastradh}
625503b705cfSriastradh
625603b705cfSriastradhconst char *gen3_render_init(struct sna *sna, const char *backend)
625703b705cfSriastradh{
625803b705cfSriastradh	struct sna_render *render = &sna->render;
625903b705cfSriastradh
626003b705cfSriastradh#if !NO_COMPOSITE
626103b705cfSriastradh	render->composite = gen3_render_composite;
626203b705cfSriastradh	render->prefer_gpu |= PREFER_GPU_RENDER;
626303b705cfSriastradh#endif
626403b705cfSriastradh#if !NO_COMPOSITE_SPANS
626503b705cfSriastradh	render->check_composite_spans = gen3_check_composite_spans;
626603b705cfSriastradh	render->composite_spans = gen3_render_composite_spans;
626703b705cfSriastradh	render->prefer_gpu |= PREFER_GPU_SPANS;
626803b705cfSriastradh#endif
626903b705cfSriastradh
627003b705cfSriastradh	render->video = gen3_render_video;
627103b705cfSriastradh
627203b705cfSriastradh	render->copy_boxes = gen3_render_copy_boxes;
627303b705cfSriastradh	render->copy = gen3_render_copy;
627403b705cfSriastradh
627503b705cfSriastradh	render->fill_boxes = gen3_render_fill_boxes;
627603b705cfSriastradh	render->fill = gen3_render_fill;
627703b705cfSriastradh	render->fill_one = gen3_render_fill_one;
627803b705cfSriastradh
627903b705cfSriastradh	render->reset = gen3_render_reset;
628003b705cfSriastradh	render->flush = gen3_render_flush;
628103b705cfSriastradh	render->fini = gen3_render_fini;
628203b705cfSriastradh
628303b705cfSriastradh	render->max_3d_size = MAX_3D_SIZE;
628403b705cfSriastradh	render->max_3d_pitch = MAX_3D_PITCH;
628503b705cfSriastradh
628603b705cfSriastradh	sna->kgem.retire = gen3_render_retire;
628703b705cfSriastradh	sna->kgem.expire = gen3_render_expire;
628803b705cfSriastradh	return "Alviso (gen3)";
628903b705cfSriastradh}
6290