gen3_render.c revision 42542f5f
103b705cfSriastradh/*
203b705cfSriastradh * Copyright © 2010-2011 Intel Corporation
303b705cfSriastradh *
403b705cfSriastradh * Permission is hereby granted, free of charge, to any person obtaining a
503b705cfSriastradh * copy of this software and associated documentation files (the "Software"),
603b705cfSriastradh * to deal in the Software without restriction, including without limitation
703b705cfSriastradh * the rights to use, copy, modify, merge, publish, distribute, sublicense,
803b705cfSriastradh * and/or sell copies of the Software, and to permit persons to whom the
903b705cfSriastradh * Software is furnished to do so, subject to the following conditions:
1003b705cfSriastradh *
1103b705cfSriastradh * The above copyright notice and this permission notice (including the next
1203b705cfSriastradh * paragraph) shall be included in all copies or substantial portions of the
1303b705cfSriastradh * Software.
1403b705cfSriastradh *
1503b705cfSriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1603b705cfSriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1703b705cfSriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1803b705cfSriastradh * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1903b705cfSriastradh * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2003b705cfSriastradh * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2103b705cfSriastradh * SOFTWARE.
2203b705cfSriastradh *
2303b705cfSriastradh * Authors:
2403b705cfSriastradh *    Chris Wilson <chris@chris-wilson.co.uk>
2503b705cfSriastradh *
2603b705cfSriastradh */
2703b705cfSriastradh
2803b705cfSriastradh#ifdef HAVE_CONFIG_H
2903b705cfSriastradh#include "config.h"
3003b705cfSriastradh#endif
3103b705cfSriastradh
3203b705cfSriastradh#include "sna.h"
3303b705cfSriastradh#include "sna_render.h"
3403b705cfSriastradh#include "sna_render_inline.h"
3503b705cfSriastradh#include "sna_reg.h"
3603b705cfSriastradh#include "sna_video.h"
3703b705cfSriastradh
3803b705cfSriastradh#include "gen3_render.h"
3903b705cfSriastradh
4003b705cfSriastradh#define NO_COMPOSITE 0
4103b705cfSriastradh#define NO_COMPOSITE_SPANS 0
4203b705cfSriastradh#define NO_COPY 0
4303b705cfSriastradh#define NO_COPY_BOXES 0
4403b705cfSriastradh#define NO_FILL 0
4503b705cfSriastradh#define NO_FILL_ONE 0
4603b705cfSriastradh#define NO_FILL_BOXES 0
4703b705cfSriastradh
4803b705cfSriastradh#define PREFER_BLT_FILL 1
4903b705cfSriastradh
5003b705cfSriastradhenum {
5103b705cfSriastradh	SHADER_NONE = 0,
5203b705cfSriastradh	SHADER_ZERO,
5303b705cfSriastradh	SHADER_BLACK,
5403b705cfSriastradh	SHADER_WHITE,
5503b705cfSriastradh	SHADER_CONSTANT,
5603b705cfSriastradh	SHADER_LINEAR,
5703b705cfSriastradh	SHADER_RADIAL,
5803b705cfSriastradh	SHADER_TEXTURE,
5903b705cfSriastradh	SHADER_OPACITY,
6003b705cfSriastradh};
6103b705cfSriastradh
6203b705cfSriastradh#define MAX_3D_SIZE 2048
6303b705cfSriastradh#define MAX_3D_PITCH 8192
6403b705cfSriastradh
6503b705cfSriastradh#define OUT_BATCH(v) batch_emit(sna, v)
6603b705cfSriastradh#define OUT_BATCH_F(v) batch_emit_float(sna, v)
6703b705cfSriastradh#define OUT_VERTEX(v) vertex_emit(sna, v)
6803b705cfSriastradh
6903b705cfSriastradhenum gen3_radial_mode {
7003b705cfSriastradh	RADIAL_ONE,
7103b705cfSriastradh	RADIAL_TWO
7203b705cfSriastradh};
7303b705cfSriastradh
7403b705cfSriastradhstatic const struct blendinfo {
7503b705cfSriastradh	bool dst_alpha;
7603b705cfSriastradh	bool src_alpha;
7703b705cfSriastradh	uint32_t src_blend;
7803b705cfSriastradh	uint32_t dst_blend;
7903b705cfSriastradh} gen3_blend_op[] = {
8003b705cfSriastradh	/* Clear */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
8103b705cfSriastradh	/* Src */	{0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
8203b705cfSriastradh	/* Dst */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
8303b705cfSriastradh	/* Over */	{0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
8403b705cfSriastradh	/* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
8503b705cfSriastradh	/* In */	{1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
8603b705cfSriastradh	/* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
8703b705cfSriastradh	/* Out */	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
8803b705cfSriastradh	/* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
8903b705cfSriastradh	/* Atop */	{1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
9003b705cfSriastradh	/* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
9103b705cfSriastradh	/* Xor */	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
9203b705cfSriastradh	/* Add */	{0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
9303b705cfSriastradh};
9403b705cfSriastradh
9503b705cfSriastradh#define S6_COLOR_WRITE_ONLY \
9603b705cfSriastradh	(S6_COLOR_WRITE_ENABLE | \
9703b705cfSriastradh	 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
9803b705cfSriastradh	 BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
9903b705cfSriastradh	 BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
10003b705cfSriastradh
10103b705cfSriastradhstatic const struct formatinfo {
10203b705cfSriastradh	unsigned int fmt, xfmt;
10303b705cfSriastradh	uint32_t card_fmt;
10403b705cfSriastradh	bool rb_reversed;
10503b705cfSriastradh} gen3_tex_formats[] = {
10603b705cfSriastradh	{PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
10703b705cfSriastradh	{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
10803b705cfSriastradh	{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
10903b705cfSriastradh	{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
11003b705cfSriastradh	{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
11142542f5fSchristos#ifdef PICT_a2r10g10b10
11203b705cfSriastradh	{PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
11303b705cfSriastradh	{PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
11442542f5fSchristos#endif
11503b705cfSriastradh	{PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
11603b705cfSriastradh	{PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
11703b705cfSriastradh	{PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
11803b705cfSriastradh	{PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
11903b705cfSriastradh	{PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
12003b705cfSriastradh	{PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
12103b705cfSriastradh};
12203b705cfSriastradh
12303b705cfSriastradh#define xFixedToDouble(f) pixman_fixed_to_double(f)
12403b705cfSriastradh
12503b705cfSriastradhstatic inline bool too_large(int width, int height)
12603b705cfSriastradh{
12703b705cfSriastradh	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
12803b705cfSriastradh}
12903b705cfSriastradh
13003b705cfSriastradhstatic inline uint32_t gen3_buf_tiling(uint32_t tiling)
13103b705cfSriastradh{
13203b705cfSriastradh	uint32_t v = 0;
13303b705cfSriastradh	switch (tiling) {
13403b705cfSriastradh	case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
13503b705cfSriastradh	case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
13603b705cfSriastradh	case I915_TILING_NONE: break;
13703b705cfSriastradh	}
13803b705cfSriastradh	return v;
13903b705cfSriastradh}
14003b705cfSriastradh
14103b705cfSriastradhstatic inline bool
14203b705cfSriastradhgen3_check_pitch_3d(struct kgem_bo *bo)
14303b705cfSriastradh{
14403b705cfSriastradh	return bo->pitch <= MAX_3D_PITCH;
14503b705cfSriastradh}
14603b705cfSriastradh
14703b705cfSriastradhstatic uint32_t gen3_get_blend_cntl(int op,
14803b705cfSriastradh				    bool has_component_alpha,
14903b705cfSriastradh				    uint32_t dst_format)
15003b705cfSriastradh{
15103b705cfSriastradh	uint32_t sblend = gen3_blend_op[op].src_blend;
15203b705cfSriastradh	uint32_t dblend = gen3_blend_op[op].dst_blend;
15303b705cfSriastradh
15403b705cfSriastradh	if (op <= PictOpSrc) /* for clear and src disable blending */
15503b705cfSriastradh		return S6_COLOR_WRITE_ONLY;
15603b705cfSriastradh
15703b705cfSriastradh	/* If there's no dst alpha channel, adjust the blend op so that we'll
15803b705cfSriastradh	 * treat it as always 1.
15903b705cfSriastradh	 */
16003b705cfSriastradh	if (gen3_blend_op[op].dst_alpha) {
16103b705cfSriastradh		if (PICT_FORMAT_A(dst_format) == 0) {
16203b705cfSriastradh			if (sblend == BLENDFACT_DST_ALPHA)
16303b705cfSriastradh				sblend = BLENDFACT_ONE;
16403b705cfSriastradh			else if (sblend == BLENDFACT_INV_DST_ALPHA)
16503b705cfSriastradh				sblend = BLENDFACT_ZERO;
16603b705cfSriastradh		}
16703b705cfSriastradh
16803b705cfSriastradh		/* gen3 engine reads 8bit color buffer into green channel
16903b705cfSriastradh		 * in cases like color buffer blending etc., and also writes
17003b705cfSriastradh		 * back green channel.  So with dst_alpha blend we should use
17103b705cfSriastradh		 * color factor. See spec on "8-bit rendering".
17203b705cfSriastradh		 */
17303b705cfSriastradh		if (dst_format == PICT_a8) {
17403b705cfSriastradh			if (sblend == BLENDFACT_DST_ALPHA)
17503b705cfSriastradh				sblend = BLENDFACT_DST_COLR;
17603b705cfSriastradh			else if (sblend == BLENDFACT_INV_DST_ALPHA)
17703b705cfSriastradh				sblend = BLENDFACT_INV_DST_COLR;
17803b705cfSriastradh		}
17903b705cfSriastradh	}
18003b705cfSriastradh
18103b705cfSriastradh	/* If the source alpha is being used, then we should only be in a case
18203b705cfSriastradh	 * where the source blend factor is 0, and the source blend value is the
18303b705cfSriastradh	 * mask channels multiplied by the source picture's alpha.
18403b705cfSriastradh	 */
18503b705cfSriastradh	if (has_component_alpha && gen3_blend_op[op].src_alpha) {
18603b705cfSriastradh		if (dblend == BLENDFACT_SRC_ALPHA)
18703b705cfSriastradh			dblend = BLENDFACT_SRC_COLR;
18803b705cfSriastradh		else if (dblend == BLENDFACT_INV_SRC_ALPHA)
18903b705cfSriastradh			dblend = BLENDFACT_INV_SRC_COLR;
19003b705cfSriastradh	}
19103b705cfSriastradh
19203b705cfSriastradh	return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
19303b705cfSriastradh		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
19403b705cfSriastradh		sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
19503b705cfSriastradh		dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
19603b705cfSriastradh}
19703b705cfSriastradh
19803b705cfSriastradhstatic bool gen3_check_dst_format(uint32_t format)
19903b705cfSriastradh{
20003b705cfSriastradh	switch (format) {
20103b705cfSriastradh	case PICT_a8r8g8b8:
20203b705cfSriastradh	case PICT_x8r8g8b8:
20303b705cfSriastradh	case PICT_a8b8g8r8:
20403b705cfSriastradh	case PICT_x8b8g8r8:
20503b705cfSriastradh	case PICT_r5g6b5:
20603b705cfSriastradh	case PICT_b5g6r5:
20703b705cfSriastradh	case PICT_a1r5g5b5:
20803b705cfSriastradh	case PICT_x1r5g5b5:
20903b705cfSriastradh	case PICT_a1b5g5r5:
21003b705cfSriastradh	case PICT_x1b5g5r5:
21142542f5fSchristos#ifdef PICT_a2r10g10b10
21203b705cfSriastradh	case PICT_a2r10g10b10:
21303b705cfSriastradh	case PICT_x2r10g10b10:
21403b705cfSriastradh	case PICT_a2b10g10r10:
21503b705cfSriastradh	case PICT_x2b10g10r10:
21642542f5fSchristos#endif
21703b705cfSriastradh	case PICT_a8:
21803b705cfSriastradh	case PICT_a4r4g4b4:
21903b705cfSriastradh	case PICT_x4r4g4b4:
22003b705cfSriastradh	case PICT_a4b4g4r4:
22103b705cfSriastradh	case PICT_x4b4g4r4:
22203b705cfSriastradh		return true;
22303b705cfSriastradh	default:
22403b705cfSriastradh		return false;
22503b705cfSriastradh	}
22603b705cfSriastradh}
22703b705cfSriastradh
22803b705cfSriastradhstatic bool gen3_dst_rb_reversed(uint32_t format)
22903b705cfSriastradh{
23003b705cfSriastradh	switch (format) {
23103b705cfSriastradh	case PICT_a8r8g8b8:
23203b705cfSriastradh	case PICT_x8r8g8b8:
23303b705cfSriastradh	case PICT_r5g6b5:
23403b705cfSriastradh	case PICT_a1r5g5b5:
23503b705cfSriastradh	case PICT_x1r5g5b5:
23642542f5fSchristos#ifdef PICT_a2r10g10b10
23703b705cfSriastradh	case PICT_a2r10g10b10:
23803b705cfSriastradh	case PICT_x2r10g10b10:
23942542f5fSchristos#endif
24003b705cfSriastradh	case PICT_a8:
24103b705cfSriastradh	case PICT_a4r4g4b4:
24203b705cfSriastradh	case PICT_x4r4g4b4:
24303b705cfSriastradh		return false;
24403b705cfSriastradh	default:
24503b705cfSriastradh		return true;
24603b705cfSriastradh	}
24703b705cfSriastradh}
24803b705cfSriastradh
24903b705cfSriastradh#define DSTORG_HORT_BIAS(x)             ((x)<<20)
25003b705cfSriastradh#define DSTORG_VERT_BIAS(x)             ((x)<<16)
25103b705cfSriastradh
25203b705cfSriastradhstatic uint32_t gen3_get_dst_format(uint32_t format)
25303b705cfSriastradh{
25403b705cfSriastradh#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
25503b705cfSriastradh	switch (format) {
25603b705cfSriastradh	default:
25703b705cfSriastradh	case PICT_a8r8g8b8:
25803b705cfSriastradh	case PICT_x8r8g8b8:
25903b705cfSriastradh	case PICT_a8b8g8r8:
26003b705cfSriastradh	case PICT_x8b8g8r8:
26103b705cfSriastradh		return BIAS | COLR_BUF_ARGB8888;
26203b705cfSriastradh	case PICT_r5g6b5:
26303b705cfSriastradh	case PICT_b5g6r5:
26403b705cfSriastradh		return BIAS | COLR_BUF_RGB565;
26503b705cfSriastradh	case PICT_a1r5g5b5:
26603b705cfSriastradh	case PICT_x1r5g5b5:
26703b705cfSriastradh	case PICT_a1b5g5r5:
26803b705cfSriastradh	case PICT_x1b5g5r5:
26903b705cfSriastradh		return BIAS | COLR_BUF_ARGB1555;
27042542f5fSchristos#ifdef PICT_a2r10g10b10
27103b705cfSriastradh	case PICT_a2r10g10b10:
27203b705cfSriastradh	case PICT_x2r10g10b10:
27303b705cfSriastradh	case PICT_a2b10g10r10:
27403b705cfSriastradh	case PICT_x2b10g10r10:
27503b705cfSriastradh		return BIAS | COLR_BUF_ARGB2AAA;
27642542f5fSchristos#endif
27703b705cfSriastradh	case PICT_a8:
27803b705cfSriastradh		return BIAS | COLR_BUF_8BIT;
27903b705cfSriastradh	case PICT_a4r4g4b4:
28003b705cfSriastradh	case PICT_x4r4g4b4:
28103b705cfSriastradh	case PICT_a4b4g4r4:
28203b705cfSriastradh	case PICT_x4b4g4r4:
28303b705cfSriastradh		return BIAS | COLR_BUF_ARGB4444;
28403b705cfSriastradh	}
28503b705cfSriastradh#undef BIAS
28603b705cfSriastradh}
28703b705cfSriastradh
28803b705cfSriastradhstatic bool gen3_check_format(PicturePtr p)
28903b705cfSriastradh{
29003b705cfSriastradh	switch (p->format) {
29103b705cfSriastradh	case PICT_a8:
29203b705cfSriastradh	case PICT_a8r8g8b8:
29303b705cfSriastradh	case PICT_x8r8g8b8:
29403b705cfSriastradh	case PICT_a8b8g8r8:
29503b705cfSriastradh	case PICT_x8b8g8r8:
29642542f5fSchristos#ifdef PICT_a2r10g10b10
29703b705cfSriastradh	case PICT_a2r10g10b10:
29803b705cfSriastradh	case PICT_a2b10g10r10:
29942542f5fSchristos#endif
30003b705cfSriastradh	case PICT_r5g6b5:
30103b705cfSriastradh	case PICT_b5g6r5:
30203b705cfSriastradh	case PICT_a1r5g5b5:
30303b705cfSriastradh	case PICT_a1b5g5r5:
30403b705cfSriastradh	case PICT_a4r4g4b4:
30503b705cfSriastradh	case PICT_a4b4g4r4:
30603b705cfSriastradh		return true;
30703b705cfSriastradh	default:
30803b705cfSriastradh		return false;
30903b705cfSriastradh	}
31003b705cfSriastradh}
31103b705cfSriastradh
31203b705cfSriastradhstatic bool gen3_check_xformat(PicturePtr p)
31303b705cfSriastradh{
31403b705cfSriastradh	switch (p->format) {
31503b705cfSriastradh	case PICT_a8r8g8b8:
31603b705cfSriastradh	case PICT_x8r8g8b8:
31703b705cfSriastradh	case PICT_a8b8g8r8:
31803b705cfSriastradh	case PICT_x8b8g8r8:
31903b705cfSriastradh	case PICT_r5g6b5:
32003b705cfSriastradh	case PICT_b5g6r5:
32103b705cfSriastradh	case PICT_a1r5g5b5:
32203b705cfSriastradh	case PICT_x1r5g5b5:
32303b705cfSriastradh	case PICT_a1b5g5r5:
32403b705cfSriastradh	case PICT_x1b5g5r5:
32542542f5fSchristos#ifdef PICT_a2r10g10b10
32603b705cfSriastradh	case PICT_a2r10g10b10:
32703b705cfSriastradh	case PICT_x2r10g10b10:
32803b705cfSriastradh	case PICT_a2b10g10r10:
32903b705cfSriastradh	case PICT_x2b10g10r10:
33042542f5fSchristos#endif
33103b705cfSriastradh	case PICT_a8:
33203b705cfSriastradh	case PICT_a4r4g4b4:
33303b705cfSriastradh	case PICT_x4r4g4b4:
33403b705cfSriastradh	case PICT_a4b4g4r4:
33503b705cfSriastradh	case PICT_x4b4g4r4:
33603b705cfSriastradh		return true;
33703b705cfSriastradh	default:
33803b705cfSriastradh		return false;
33903b705cfSriastradh	}
34003b705cfSriastradh}
34103b705cfSriastradh
34203b705cfSriastradhstatic uint32_t gen3_texture_repeat(uint32_t repeat)
34303b705cfSriastradh{
34403b705cfSriastradh#define REPEAT(x) \
34503b705cfSriastradh	(SS3_NORMALIZED_COORDS | \
34603b705cfSriastradh	 TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
34703b705cfSriastradh	 TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT)
34803b705cfSriastradh	switch (repeat) {
34903b705cfSriastradh	default:
35003b705cfSriastradh	case RepeatNone:
35103b705cfSriastradh		return REPEAT(CLAMP_BORDER);
35203b705cfSriastradh	case RepeatNormal:
35303b705cfSriastradh		return REPEAT(WRAP);
35403b705cfSriastradh	case RepeatPad:
35503b705cfSriastradh		return REPEAT(CLAMP_EDGE);
35603b705cfSriastradh	case RepeatReflect:
35703b705cfSriastradh		return REPEAT(MIRROR);
35803b705cfSriastradh	}
35903b705cfSriastradh#undef REPEAT
36003b705cfSriastradh}
36103b705cfSriastradh
36203b705cfSriastradhstatic uint32_t gen3_gradient_repeat(uint32_t repeat)
36303b705cfSriastradh{
36403b705cfSriastradh#define REPEAT(x) \
36503b705cfSriastradh	(SS3_NORMALIZED_COORDS | \
36603b705cfSriastradh	 TEXCOORDMODE_##x  << SS3_TCX_ADDR_MODE_SHIFT | \
36703b705cfSriastradh	 TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT)
36803b705cfSriastradh	switch (repeat) {
36903b705cfSriastradh	default:
37003b705cfSriastradh	case RepeatNone:
37103b705cfSriastradh		return REPEAT(CLAMP_BORDER);
37203b705cfSriastradh	case RepeatNormal:
37303b705cfSriastradh		return REPEAT(WRAP);
37403b705cfSriastradh	case RepeatPad:
37503b705cfSriastradh		return REPEAT(CLAMP_EDGE);
37603b705cfSriastradh	case RepeatReflect:
37703b705cfSriastradh		return REPEAT(MIRROR);
37803b705cfSriastradh	}
37903b705cfSriastradh#undef REPEAT
38003b705cfSriastradh}
38103b705cfSriastradh
38203b705cfSriastradhstatic bool gen3_check_repeat(PicturePtr p)
38303b705cfSriastradh{
38403b705cfSriastradh	if (!p->repeat)
38503b705cfSriastradh		return true;
38603b705cfSriastradh
38703b705cfSriastradh	switch (p->repeatType) {
38803b705cfSriastradh	case RepeatNone:
38903b705cfSriastradh	case RepeatNormal:
39003b705cfSriastradh	case RepeatPad:
39103b705cfSriastradh	case RepeatReflect:
39203b705cfSriastradh		return true;
39303b705cfSriastradh	default:
39403b705cfSriastradh		return false;
39503b705cfSriastradh	}
39603b705cfSriastradh}
39703b705cfSriastradh
39803b705cfSriastradhstatic uint32_t gen3_filter(uint32_t filter)
39903b705cfSriastradh{
40003b705cfSriastradh	switch (filter) {
40103b705cfSriastradh	default:
40203b705cfSriastradh		assert(0);
40303b705cfSriastradh	case PictFilterNearest:
40403b705cfSriastradh		return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
40503b705cfSriastradh			FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
40603b705cfSriastradh			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
40703b705cfSriastradh	case PictFilterBilinear:
40803b705cfSriastradh		return (FILTER_LINEAR  << SS2_MAG_FILTER_SHIFT |
40903b705cfSriastradh			FILTER_LINEAR  << SS2_MIN_FILTER_SHIFT |
41003b705cfSriastradh			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
41103b705cfSriastradh	}
41203b705cfSriastradh}
41303b705cfSriastradh
41403b705cfSriastradhstatic bool gen3_check_filter(PicturePtr p)
41503b705cfSriastradh{
41603b705cfSriastradh	switch (p->filter) {
41703b705cfSriastradh	case PictFilterNearest:
41803b705cfSriastradh	case PictFilterBilinear:
41903b705cfSriastradh		return true;
42003b705cfSriastradh	default:
42103b705cfSriastradh		return false;
42203b705cfSriastradh	}
42303b705cfSriastradh}
42403b705cfSriastradh
42503b705cfSriastradhstatic inline void
42603b705cfSriastradhgen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY)
42703b705cfSriastradh{
42803b705cfSriastradh	OUT_VERTEX(dstX);
42903b705cfSriastradh	OUT_VERTEX(dstY);
43003b705cfSriastradh}
43103b705cfSriastradh
43203b705cfSriastradhfastcall static void
43303b705cfSriastradhgen3_emit_composite_primitive_constant(struct sna *sna,
43403b705cfSriastradh				       const struct sna_composite_op *op,
43503b705cfSriastradh				       const struct sna_composite_rectangles *r)
43603b705cfSriastradh{
43703b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
43803b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
43903b705cfSriastradh
44003b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
44103b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
44203b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
44303b705cfSriastradh}
44403b705cfSriastradh
44503b705cfSriastradhfastcall static void
44603b705cfSriastradhgen3_emit_composite_boxes_constant(const struct sna_composite_op *op,
44703b705cfSriastradh				   const BoxRec *box, int nbox,
44803b705cfSriastradh				   float *v)
44903b705cfSriastradh{
45003b705cfSriastradh	do {
45103b705cfSriastradh		v[0] = box->x2;
45203b705cfSriastradh		v[1] = box->y2;
45303b705cfSriastradh
45403b705cfSriastradh		v[2] = box->x1;
45503b705cfSriastradh		v[3] = box->y2;
45603b705cfSriastradh
45703b705cfSriastradh		v[4] = box->x1;
45803b705cfSriastradh		v[5] = box->y1;
45903b705cfSriastradh
46003b705cfSriastradh		box++;
46103b705cfSriastradh		v += 6;
46203b705cfSriastradh	} while (--nbox);
46303b705cfSriastradh}
46403b705cfSriastradh
46503b705cfSriastradhfastcall static void
46603b705cfSriastradhgen3_emit_composite_primitive_identity_gradient(struct sna *sna,
46703b705cfSriastradh						const struct sna_composite_op *op,
46803b705cfSriastradh						const struct sna_composite_rectangles *r)
46903b705cfSriastradh{
47003b705cfSriastradh	int16_t dst_x, dst_y;
47103b705cfSriastradh	int16_t src_x, src_y;
47203b705cfSriastradh
47303b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
47403b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
47503b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
47603b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
47703b705cfSriastradh
47803b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
47903b705cfSriastradh	OUT_VERTEX(src_x + r->width);
48003b705cfSriastradh	OUT_VERTEX(src_y + r->height);
48103b705cfSriastradh
48203b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
48303b705cfSriastradh	OUT_VERTEX(src_x);
48403b705cfSriastradh	OUT_VERTEX(src_y + r->height);
48503b705cfSriastradh
48603b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
48703b705cfSriastradh	OUT_VERTEX(src_x);
48803b705cfSriastradh	OUT_VERTEX(src_y);
48903b705cfSriastradh}
49003b705cfSriastradh
49103b705cfSriastradhfastcall static void
49203b705cfSriastradhgen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
49303b705cfSriastradh					    const BoxRec *box, int nbox,
49403b705cfSriastradh					    float *v)
49503b705cfSriastradh{
49603b705cfSriastradh	do {
49703b705cfSriastradh		v[0] = box->x2;
49803b705cfSriastradh		v[1] = box->y2;
49903b705cfSriastradh		v[2] = box->x2 + op->src.offset[0];
50003b705cfSriastradh		v[3] = box->y2 + op->src.offset[1];
50103b705cfSriastradh
50203b705cfSriastradh		v[4] = box->x1;
50303b705cfSriastradh		v[5] = box->y2;
50403b705cfSriastradh		v[6] = box->x1 + op->src.offset[0];
50503b705cfSriastradh		v[7] = box->y2 + op->src.offset[1];
50603b705cfSriastradh
50703b705cfSriastradh		v[8] = box->x1;
50803b705cfSriastradh		v[9] = box->y1;
50903b705cfSriastradh		v[10] = box->x1 + op->src.offset[0];
51003b705cfSriastradh		v[11] = box->y1 + op->src.offset[1];
51103b705cfSriastradh
51203b705cfSriastradh		v += 12;
51303b705cfSriastradh		box++;
51403b705cfSriastradh	} while (--nbox);
51503b705cfSriastradh}
51603b705cfSriastradh
51703b705cfSriastradhfastcall static void
51803b705cfSriastradhgen3_emit_composite_primitive_affine_gradient(struct sna *sna,
51903b705cfSriastradh					      const struct sna_composite_op *op,
52003b705cfSriastradh					      const struct sna_composite_rectangles *r)
52103b705cfSriastradh{
52203b705cfSriastradh	PictTransform *transform = op->src.transform;
52303b705cfSriastradh	int16_t dst_x, dst_y;
52403b705cfSriastradh	int16_t src_x, src_y;
52503b705cfSriastradh	float *v;
52603b705cfSriastradh
52703b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
52803b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
52903b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
53003b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
53103b705cfSriastradh
53203b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
53303b705cfSriastradh	sna->render.vertex_used += 12;
53403b705cfSriastradh
53503b705cfSriastradh	v[0] = dst_x + r->width;
53603b705cfSriastradh	v[1] = dst_y + r->height;
53703b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
53803b705cfSriastradh				    transform, op->src.scale,
53903b705cfSriastradh				    &v[2], &v[3]);
54003b705cfSriastradh
54103b705cfSriastradh	v[4] = dst_x;
54203b705cfSriastradh	v[5] = dst_y + r->height;
54303b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
54403b705cfSriastradh				    transform, op->src.scale,
54503b705cfSriastradh				    &v[6], &v[7]);
54603b705cfSriastradh
54703b705cfSriastradh	v[8] = dst_x;
54803b705cfSriastradh	v[9] = dst_y;
54903b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
55003b705cfSriastradh				    transform, op->src.scale,
55103b705cfSriastradh				    &v[10], &v[11]);
55203b705cfSriastradh}
55303b705cfSriastradh
55403b705cfSriastradhfastcall static void
55503b705cfSriastradhgen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
55603b705cfSriastradh					  const BoxRec *box, int nbox,
55703b705cfSriastradh					  float *v)
55803b705cfSriastradh{
55903b705cfSriastradh	const PictTransform *transform = op->src.transform;
56003b705cfSriastradh
56103b705cfSriastradh	do {
56203b705cfSriastradh		v[0] = box->x2;
56303b705cfSriastradh		v[1] = box->y2;
56403b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
56503b705cfSriastradh					    box->y2 + op->src.offset[1],
56603b705cfSriastradh					    transform, op->src.scale,
56703b705cfSriastradh					    &v[2], &v[3]);
56803b705cfSriastradh
56903b705cfSriastradh		v[4] = box->x1;
57003b705cfSriastradh		v[5] = box->y2;
57103b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
57203b705cfSriastradh					    box->y2 + op->src.offset[1],
57303b705cfSriastradh					    transform, op->src.scale,
57403b705cfSriastradh					    &v[6], &v[7]);
57503b705cfSriastradh
57603b705cfSriastradh		v[8] = box->x1;
57703b705cfSriastradh		v[9] = box->y1;
57803b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
57903b705cfSriastradh					    box->y1 + op->src.offset[1],
58003b705cfSriastradh					    transform, op->src.scale,
58103b705cfSriastradh					    &v[10], &v[11]);
58203b705cfSriastradh
58303b705cfSriastradh		box++;
58403b705cfSriastradh		v += 12;
58503b705cfSriastradh	} while (--nbox);
58603b705cfSriastradh}
58703b705cfSriastradh
58803b705cfSriastradhfastcall static void
58903b705cfSriastradhgen3_emit_composite_primitive_identity_source(struct sna *sna,
59003b705cfSriastradh					      const struct sna_composite_op *op,
59103b705cfSriastradh					      const struct sna_composite_rectangles *r)
59203b705cfSriastradh{
59303b705cfSriastradh	float w = r->width;
59403b705cfSriastradh	float h = r->height;
59503b705cfSriastradh	float *v;
59603b705cfSriastradh
59703b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
59803b705cfSriastradh	sna->render.vertex_used += 12;
59903b705cfSriastradh
60003b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
60103b705cfSriastradh	v[0] = v[4] + w;
60203b705cfSriastradh
60303b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
60403b705cfSriastradh	v[5] = v[1] = v[9] + h;
60503b705cfSriastradh
60603b705cfSriastradh	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
60703b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
60803b705cfSriastradh
60903b705cfSriastradh	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
61003b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
61103b705cfSriastradh}
61203b705cfSriastradh
61303b705cfSriastradhfastcall static void
61403b705cfSriastradhgen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
61503b705cfSriastradh					  const BoxRec *box, int nbox,
61603b705cfSriastradh					  float *v)
61703b705cfSriastradh{
61803b705cfSriastradh	do {
61903b705cfSriastradh		v[0] = box->x2 + op->dst.x;
62003b705cfSriastradh		v[8] = v[4] = box->x1 + op->dst.x;
62103b705cfSriastradh		v[5] = v[1] = box->y2 + op->dst.y;
62203b705cfSriastradh		v[9] = box->y1 + op->dst.y;
62303b705cfSriastradh
62403b705cfSriastradh		v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
62503b705cfSriastradh		v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
62603b705cfSriastradh
62703b705cfSriastradh		v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
62803b705cfSriastradh		v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
62903b705cfSriastradh
63003b705cfSriastradh		v += 12;
63103b705cfSriastradh		box++;
63203b705cfSriastradh	} while (--nbox);
63303b705cfSriastradh}
63403b705cfSriastradh
63503b705cfSriastradhfastcall static void
63603b705cfSriastradhgen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
63703b705cfSriastradh							const struct sna_composite_op *op,
63803b705cfSriastradh							const struct sna_composite_rectangles *r)
63903b705cfSriastradh{
64003b705cfSriastradh	float w = r->width;
64103b705cfSriastradh	float h = r->height;
64203b705cfSriastradh	float *v;
64303b705cfSriastradh
64403b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
64503b705cfSriastradh	sna->render.vertex_used += 12;
64603b705cfSriastradh
64703b705cfSriastradh	v[8] = v[4] = r->dst.x;
64803b705cfSriastradh	v[9] = r->dst.y;
64903b705cfSriastradh
65003b705cfSriastradh	v[0] = v[4] + w;
65103b705cfSriastradh	v[5] = v[1] = v[9] + h;
65203b705cfSriastradh
65303b705cfSriastradh	v[10] = v[6] = r->src.x * op->src.scale[0];
65403b705cfSriastradh	v[11] = r->src.y * op->src.scale[1];
65503b705cfSriastradh
65603b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
65703b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
65803b705cfSriastradh}
65903b705cfSriastradh
66003b705cfSriastradhfastcall static void
66103b705cfSriastradhgen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op,
66203b705cfSriastradh						    const BoxRec *box, int nbox,
66303b705cfSriastradh						    float *v)
66403b705cfSriastradh{
66503b705cfSriastradh	do {
66603b705cfSriastradh		v[0] = box->x2;
66703b705cfSriastradh		v[8] = v[4] = box->x1;
66803b705cfSriastradh		v[5] = v[1] = box->y2;
66903b705cfSriastradh		v[9] = box->y1;
67003b705cfSriastradh
67103b705cfSriastradh		v[10] = v[6] = box->x1 * op->src.scale[0];
67203b705cfSriastradh		v[2] = box->x2 * op->src.scale[0];
67303b705cfSriastradh
67403b705cfSriastradh		v[11] = box->y1 * op->src.scale[1];
67503b705cfSriastradh		v[7] = v[3] = box->y2 * op->src.scale[1];
67603b705cfSriastradh
67703b705cfSriastradh		v += 12;
67803b705cfSriastradh		box++;
67903b705cfSriastradh	} while (--nbox);
68003b705cfSriastradh}
68103b705cfSriastradh
68203b705cfSriastradhfastcall static void
68303b705cfSriastradhgen3_emit_composite_primitive_affine_source(struct sna *sna,
68403b705cfSriastradh					    const struct sna_composite_op *op,
68503b705cfSriastradh					    const struct sna_composite_rectangles *r)
68603b705cfSriastradh{
68703b705cfSriastradh	PictTransform *transform = op->src.transform;
68803b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
68903b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
69003b705cfSriastradh	int src_x = r->src.x + (int)op->src.offset[0];
69103b705cfSriastradh	int src_y = r->src.y + (int)op->src.offset[1];
69203b705cfSriastradh	float *v;
69303b705cfSriastradh
69403b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
69503b705cfSriastradh	sna->render.vertex_used += 12;
69603b705cfSriastradh
69703b705cfSriastradh	v[0] = dst_x + r->width;
69803b705cfSriastradh	v[5] = v[1] = dst_y + r->height;
69903b705cfSriastradh	v[8] = v[4] = dst_x;
70003b705cfSriastradh	v[9] = dst_y;
70103b705cfSriastradh
70203b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
70303b705cfSriastradh				    transform, op->src.scale,
70403b705cfSriastradh				    &v[2], &v[3]);
70503b705cfSriastradh
70603b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
70703b705cfSriastradh				    transform, op->src.scale,
70803b705cfSriastradh				    &v[6], &v[7]);
70903b705cfSriastradh
71003b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
71103b705cfSriastradh				    transform, op->src.scale,
71203b705cfSriastradh				    &v[10], &v[11]);
71303b705cfSriastradh}
71403b705cfSriastradh
71503b705cfSriastradhfastcall static void
71603b705cfSriastradhgen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op,
71703b705cfSriastradh					const BoxRec *box, int nbox,
71803b705cfSriastradh					float *v)
71903b705cfSriastradh{
72003b705cfSriastradh	const PictTransform *transform = op->src.transform;
72103b705cfSriastradh
72203b705cfSriastradh	do {
72303b705cfSriastradh		v[0] = box->x2;
72403b705cfSriastradh		v[5] = v[1] = box->y2;
72503b705cfSriastradh		v[8] = v[4] = box->x1;
72603b705cfSriastradh		v[9] = box->y1;
72703b705cfSriastradh
72803b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
72903b705cfSriastradh					    box->y2 + op->src.offset[1],
73003b705cfSriastradh					    transform, op->src.scale,
73103b705cfSriastradh					    &v[2], &v[3]);
73203b705cfSriastradh
73303b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
73403b705cfSriastradh					    box->y2 + op->src.offset[1],
73503b705cfSriastradh					    transform, op->src.scale,
73603b705cfSriastradh					    &v[6], &v[7]);
73703b705cfSriastradh
73803b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
73903b705cfSriastradh					    box->y1 + op->src.offset[1],
74003b705cfSriastradh					    transform, op->src.scale,
74103b705cfSriastradh					    &v[10], &v[11]);
74203b705cfSriastradh
74303b705cfSriastradh		v += 12;
74403b705cfSriastradh		box++;
74503b705cfSriastradh	} while (--nbox);
74603b705cfSriastradh}
74703b705cfSriastradh
74803b705cfSriastradhfastcall static void
74903b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
75003b705cfSriastradh						     const struct sna_composite_op *op,
75103b705cfSriastradh						     const struct sna_composite_rectangles *r)
75203b705cfSriastradh{
75303b705cfSriastradh	float w = r->width;
75403b705cfSriastradh	float h = r->height;
75503b705cfSriastradh	float *v;
75603b705cfSriastradh
75703b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
75803b705cfSriastradh	sna->render.vertex_used += 12;
75903b705cfSriastradh
76003b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
76103b705cfSriastradh	v[0] = v[4] + w;
76203b705cfSriastradh
76303b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
76403b705cfSriastradh	v[5] = v[1] = v[9] + h;
76503b705cfSriastradh
76603b705cfSriastradh	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
76703b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
76803b705cfSriastradh
76903b705cfSriastradh	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
77003b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
77103b705cfSriastradh}
77203b705cfSriastradh
77303b705cfSriastradhfastcall static void
77403b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna,
77503b705cfSriastradh							       const struct sna_composite_op *op,
77603b705cfSriastradh							       const struct sna_composite_rectangles *r)
77703b705cfSriastradh{
77803b705cfSriastradh	float w = r->width;
77903b705cfSriastradh	float h = r->height;
78003b705cfSriastradh	float *v;
78103b705cfSriastradh
78203b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
78303b705cfSriastradh	sna->render.vertex_used += 12;
78403b705cfSriastradh
78503b705cfSriastradh	v[8] = v[4] = r->dst.x;
78603b705cfSriastradh	v[9] = r->dst.y;
78703b705cfSriastradh
78803b705cfSriastradh	v[0] = v[4] + w;
78903b705cfSriastradh	v[5] = v[1] = v[9] + h;
79003b705cfSriastradh
79103b705cfSriastradh	v[10] = v[6] = r->mask.x * op->mask.scale[0];
79203b705cfSriastradh	v[11] = r->mask.y * op->mask.scale[1];
79303b705cfSriastradh
79403b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
79503b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
79603b705cfSriastradh}
79703b705cfSriastradh
79803b705cfSriastradhfastcall static void
79903b705cfSriastradhgen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
80003b705cfSriastradh						   const struct sna_composite_op *op,
80103b705cfSriastradh						   const struct sna_composite_rectangles *r)
80203b705cfSriastradh{
80303b705cfSriastradh	float dst_x, dst_y;
80403b705cfSriastradh	float src_x, src_y;
80503b705cfSriastradh	float msk_x, msk_y;
80603b705cfSriastradh	float w, h;
80703b705cfSriastradh	float *v;
80803b705cfSriastradh
80903b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
81003b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
81103b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
81203b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
81303b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
81403b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
81503b705cfSriastradh	w = r->width;
81603b705cfSriastradh	h = r->height;
81703b705cfSriastradh
81803b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
81903b705cfSriastradh	sna->render.vertex_used += 18;
82003b705cfSriastradh
82103b705cfSriastradh	v[0] = dst_x + w;
82203b705cfSriastradh	v[1] = dst_y + h;
82303b705cfSriastradh	v[2] = (src_x + w) * op->src.scale[0];
82403b705cfSriastradh	v[3] = (src_y + h) * op->src.scale[1];
82503b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
82603b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
82703b705cfSriastradh
82803b705cfSriastradh	v[6] = dst_x;
82903b705cfSriastradh	v[7] = v[1];
83003b705cfSriastradh	v[8] = src_x * op->src.scale[0];
83103b705cfSriastradh	v[9] = v[3];
83203b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
83303b705cfSriastradh	v[11] =v[5];
83403b705cfSriastradh
83503b705cfSriastradh	v[12] = v[6];
83603b705cfSriastradh	v[13] = dst_y;
83703b705cfSriastradh	v[14] = v[8];
83803b705cfSriastradh	v[15] = src_y * op->src.scale[1];
83903b705cfSriastradh	v[16] = v[10];
84003b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
84103b705cfSriastradh}
84203b705cfSriastradh
84303b705cfSriastradhfastcall static void
84403b705cfSriastradhgen3_emit_composite_primitive_affine_source_mask(struct sna *sna,
84503b705cfSriastradh						 const struct sna_composite_op *op,
84603b705cfSriastradh						 const struct sna_composite_rectangles *r)
84703b705cfSriastradh{
84803b705cfSriastradh	int16_t src_x, src_y;
84903b705cfSriastradh	float dst_x, dst_y;
85003b705cfSriastradh	float msk_x, msk_y;
85103b705cfSriastradh	float w, h;
85203b705cfSriastradh	float *v;
85303b705cfSriastradh
85403b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
85503b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
85603b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
85703b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
85803b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
85903b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
86003b705cfSriastradh	w = r->width;
86103b705cfSriastradh	h = r->height;
86203b705cfSriastradh
86303b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
86403b705cfSriastradh	sna->render.vertex_used += 18;
86503b705cfSriastradh
86603b705cfSriastradh	v[0] = dst_x + w;
86703b705cfSriastradh	v[1] = dst_y + h;
86803b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
86903b705cfSriastradh				    op->src.transform, op->src.scale,
87003b705cfSriastradh				    &v[2], &v[3]);
87103b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
87203b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
87303b705cfSriastradh
87403b705cfSriastradh	v[6] = dst_x;
87503b705cfSriastradh	v[7] = v[1];
87603b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
87703b705cfSriastradh				    op->src.transform, op->src.scale,
87803b705cfSriastradh				    &v[8], &v[9]);
87903b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
88003b705cfSriastradh	v[11] =v[5];
88103b705cfSriastradh
88203b705cfSriastradh	v[12] = v[6];
88303b705cfSriastradh	v[13] = dst_y;
88403b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
88503b705cfSriastradh				    op->src.transform, op->src.scale,
88603b705cfSriastradh				    &v[14], &v[15]);
88703b705cfSriastradh	v[16] = v[10];
88803b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
88903b705cfSriastradh}
89003b705cfSriastradh
89103b705cfSriastradhstatic void
89203b705cfSriastradhgen3_emit_composite_texcoord(struct sna *sna,
89303b705cfSriastradh			     const struct sna_composite_channel *channel,
89403b705cfSriastradh			     int16_t x, int16_t y)
89503b705cfSriastradh{
89603b705cfSriastradh	float s = 0, t = 0, w = 1;
89703b705cfSriastradh
89803b705cfSriastradh	switch (channel->u.gen3.type) {
89903b705cfSriastradh	case SHADER_OPACITY:
90003b705cfSriastradh	case SHADER_NONE:
90103b705cfSriastradh	case SHADER_ZERO:
90203b705cfSriastradh	case SHADER_BLACK:
90303b705cfSriastradh	case SHADER_WHITE:
90403b705cfSriastradh	case SHADER_CONSTANT:
90503b705cfSriastradh		break;
90603b705cfSriastradh
90703b705cfSriastradh	case SHADER_LINEAR:
90803b705cfSriastradh	case SHADER_RADIAL:
90903b705cfSriastradh	case SHADER_TEXTURE:
91003b705cfSriastradh		x += channel->offset[0];
91103b705cfSriastradh		y += channel->offset[1];
91203b705cfSriastradh		if (channel->is_affine) {
91303b705cfSriastradh			sna_get_transformed_coordinates(x, y,
91403b705cfSriastradh							channel->transform,
91503b705cfSriastradh							&s, &t);
91603b705cfSriastradh			OUT_VERTEX(s * channel->scale[0]);
91703b705cfSriastradh			OUT_VERTEX(t * channel->scale[1]);
91803b705cfSriastradh		} else {
91903b705cfSriastradh			sna_get_transformed_coordinates_3d(x, y,
92003b705cfSriastradh							   channel->transform,
92103b705cfSriastradh							   &s, &t, &w);
92203b705cfSriastradh			OUT_VERTEX(s * channel->scale[0]);
92303b705cfSriastradh			OUT_VERTEX(t * channel->scale[1]);
92403b705cfSriastradh			OUT_VERTEX(0);
92503b705cfSriastradh			OUT_VERTEX(w);
92603b705cfSriastradh		}
92703b705cfSriastradh		break;
92803b705cfSriastradh	}
92903b705cfSriastradh}
93003b705cfSriastradh
93103b705cfSriastradhstatic void
93203b705cfSriastradhgen3_emit_composite_vertex(struct sna *sna,
93303b705cfSriastradh			   const struct sna_composite_op *op,
93403b705cfSriastradh			   int16_t srcX, int16_t srcY,
93503b705cfSriastradh			   int16_t maskX, int16_t maskY,
93603b705cfSriastradh			   int16_t dstX, int16_t dstY)
93703b705cfSriastradh{
93803b705cfSriastradh	gen3_emit_composite_dstcoord(sna, dstX, dstY);
93903b705cfSriastradh	gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY);
94003b705cfSriastradh	gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY);
94103b705cfSriastradh}
94203b705cfSriastradh
94303b705cfSriastradhfastcall static void
94403b705cfSriastradhgen3_emit_composite_primitive(struct sna *sna,
94503b705cfSriastradh			      const struct sna_composite_op *op,
94603b705cfSriastradh			      const struct sna_composite_rectangles *r)
94703b705cfSriastradh{
94803b705cfSriastradh	gen3_emit_composite_vertex(sna, op,
94903b705cfSriastradh				   r->src.x + r->width,
95003b705cfSriastradh				   r->src.y + r->height,
95103b705cfSriastradh				   r->mask.x + r->width,
95203b705cfSriastradh				   r->mask.y + r->height,
95303b705cfSriastradh				   op->dst.x + r->dst.x + r->width,
95403b705cfSriastradh				   op->dst.y + r->dst.y + r->height);
95503b705cfSriastradh	gen3_emit_composite_vertex(sna, op,
95603b705cfSriastradh				   r->src.x,
95703b705cfSriastradh				   r->src.y + r->height,
95803b705cfSriastradh				   r->mask.x,
95903b705cfSriastradh				   r->mask.y + r->height,
96003b705cfSriastradh				   op->dst.x + r->dst.x,
96103b705cfSriastradh				   op->dst.y + r->dst.y + r->height);
96203b705cfSriastradh	gen3_emit_composite_vertex(sna, op,
96303b705cfSriastradh				   r->src.x,
96403b705cfSriastradh				   r->src.y,
96503b705cfSriastradh				   r->mask.x,
96603b705cfSriastradh				   r->mask.y,
96703b705cfSriastradh				   op->dst.x + r->dst.x,
96803b705cfSriastradh				   op->dst.y + r->dst.y);
96903b705cfSriastradh}
97003b705cfSriastradh
97103b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
97203b705cfSriastradhsse2 fastcall static void
97303b705cfSriastradhgen3_emit_composite_primitive_constant__sse2(struct sna *sna,
97403b705cfSriastradh					     const struct sna_composite_op *op,
97503b705cfSriastradh					     const struct sna_composite_rectangles *r)
97603b705cfSriastradh{
97703b705cfSriastradh	float *v;
97803b705cfSriastradh
97903b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
98003b705cfSriastradh	sna->render.vertex_used += 6;
98103b705cfSriastradh
98203b705cfSriastradh	v[4] = v[2] = r->dst.x + op->dst.x;
98303b705cfSriastradh	v[5] = r->dst.y + op->dst.y;
98403b705cfSriastradh
98503b705cfSriastradh	v[0] = v[2] + r->width;
98603b705cfSriastradh	v[3] = v[1] = v[5] + r->height;
98703b705cfSriastradh
98803b705cfSriastradh}
98903b705cfSriastradh
99003b705cfSriastradhsse2 fastcall static void
99103b705cfSriastradhgen3_emit_composite_boxes_constant__sse2(const struct sna_composite_op *op,
99203b705cfSriastradh					 const BoxRec *box, int nbox,
99303b705cfSriastradh					 float *v)
99403b705cfSriastradh{
99503b705cfSriastradh	do {
99603b705cfSriastradh		v[0] = box->x2;
99703b705cfSriastradh		v[3] = v[1] = box->y2;
99803b705cfSriastradh		v[4] = v[2] = box->x1;
99903b705cfSriastradh		v[5] = box->y1;
100003b705cfSriastradh
100103b705cfSriastradh		box++;
100203b705cfSriastradh		v += 6;
100303b705cfSriastradh	} while (--nbox);
100403b705cfSriastradh}
100503b705cfSriastradh
100603b705cfSriastradhsse2 fastcall static void
100703b705cfSriastradhgen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna,
100803b705cfSriastradh						      const struct sna_composite_op *op,
100903b705cfSriastradh						      const struct sna_composite_rectangles *r)
101003b705cfSriastradh{
101103b705cfSriastradh	int16_t x, y;
101203b705cfSriastradh	float *v;
101303b705cfSriastradh
101403b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
101503b705cfSriastradh	sna->render.vertex_used += 12;
101603b705cfSriastradh
101703b705cfSriastradh	x = r->dst.x + op->dst.x;
101803b705cfSriastradh	y = r->dst.y + op->dst.y;
101903b705cfSriastradh	v[0] = x + r->width;
102003b705cfSriastradh	v[5] = v[1] = y + r->height;
102103b705cfSriastradh	v[8] = v[4] = x;
102203b705cfSriastradh	v[9] = y;
102303b705cfSriastradh
102403b705cfSriastradh	x = r->src.x + op->src.offset[0];
102503b705cfSriastradh	y = r->src.y + op->src.offset[1];
102603b705cfSriastradh	v[2] = x + r->width;
102703b705cfSriastradh	v[7] = v[3] = y + r->height;
102803b705cfSriastradh	v[10] = v[6] = x;
102903b705cfSriastradh	v[11] = y;
103003b705cfSriastradh}
103103b705cfSriastradh
103203b705cfSriastradhsse2 fastcall static void
103303b705cfSriastradhgen3_emit_composite_boxes_identity_gradient__sse2(const struct sna_composite_op *op,
103403b705cfSriastradh						  const BoxRec *box, int nbox,
103503b705cfSriastradh						  float *v)
103603b705cfSriastradh{
103703b705cfSriastradh	do {
103803b705cfSriastradh		v[0] = box->x2;
103903b705cfSriastradh		v[5] = v[1] = box->y2;
104003b705cfSriastradh		v[8] = v[4] = box->x1;
104103b705cfSriastradh		v[9] = box->y1;
104203b705cfSriastradh
104303b705cfSriastradh		v[2] = box->x2 + op->src.offset[0];
104403b705cfSriastradh		v[7] = v[3] = box->y2 + op->src.offset[1];
104503b705cfSriastradh		v[10] = v[6] = box->x1 + op->src.offset[0];
104603b705cfSriastradh		v[11] = box->y1 + op->src.offset[1];
104703b705cfSriastradh
104803b705cfSriastradh		v += 12;
104903b705cfSriastradh		box++;
105003b705cfSriastradh	} while (--nbox);
105103b705cfSriastradh}
105203b705cfSriastradh
105303b705cfSriastradhsse2 fastcall static void
105403b705cfSriastradhgen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna,
105503b705cfSriastradh						    const struct sna_composite_op *op,
105603b705cfSriastradh						    const struct sna_composite_rectangles *r)
105703b705cfSriastradh{
105803b705cfSriastradh	PictTransform *transform = op->src.transform;
105903b705cfSriastradh	int16_t dst_x, dst_y;
106003b705cfSriastradh	int16_t src_x, src_y;
106103b705cfSriastradh	float *v;
106203b705cfSriastradh
106303b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
106403b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
106503b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
106603b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
106703b705cfSriastradh
106803b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
106903b705cfSriastradh	sna->render.vertex_used += 12;
107003b705cfSriastradh
107103b705cfSriastradh	v[0] = dst_x + r->width;
107203b705cfSriastradh	v[1] = dst_y + r->height;
107303b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
107403b705cfSriastradh				    transform, op->src.scale,
107503b705cfSriastradh				    &v[2], &v[3]);
107603b705cfSriastradh
107703b705cfSriastradh	v[4] = dst_x;
107803b705cfSriastradh	v[5] = dst_y + r->height;
107903b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
108003b705cfSriastradh				    transform, op->src.scale,
108103b705cfSriastradh				    &v[6], &v[7]);
108203b705cfSriastradh
108303b705cfSriastradh	v[8] = dst_x;
108403b705cfSriastradh	v[9] = dst_y;
108503b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
108603b705cfSriastradh				    transform, op->src.scale,
108703b705cfSriastradh				    &v[10], &v[11]);
108803b705cfSriastradh}
108903b705cfSriastradh
109003b705cfSriastradhsse2 fastcall static void
109103b705cfSriastradhgen3_emit_composite_boxes_affine_gradient__sse2(const struct sna_composite_op *op,
109203b705cfSriastradh						const BoxRec *box, int nbox,
109303b705cfSriastradh						float *v)
109403b705cfSriastradh{
109503b705cfSriastradh	const PictTransform *transform = op->src.transform;
109603b705cfSriastradh
109703b705cfSriastradh	do {
109803b705cfSriastradh		v[0] = box->x2;
109903b705cfSriastradh		v[1] = box->y2;
110003b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
110103b705cfSriastradh					    box->y2 + op->src.offset[1],
110203b705cfSriastradh					    transform, op->src.scale,
110303b705cfSriastradh					    &v[2], &v[3]);
110403b705cfSriastradh
110503b705cfSriastradh		v[4] = box->x1;
110603b705cfSriastradh		v[5] = box->y2;
110703b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
110803b705cfSriastradh					    box->y2 + op->src.offset[1],
110903b705cfSriastradh					    transform, op->src.scale,
111003b705cfSriastradh					    &v[6], &v[7]);
111103b705cfSriastradh
111203b705cfSriastradh		v[8] = box->x1;
111303b705cfSriastradh		v[9] = box->y1;
111403b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
111503b705cfSriastradh					    box->y1 + op->src.offset[1],
111603b705cfSriastradh					    transform, op->src.scale,
111703b705cfSriastradh					    &v[10], &v[11]);
111803b705cfSriastradh
111903b705cfSriastradh		box++;
112003b705cfSriastradh		v += 12;
112103b705cfSriastradh	} while (--nbox);
112203b705cfSriastradh}
112303b705cfSriastradh
112403b705cfSriastradhsse2 fastcall static void
112503b705cfSriastradhgen3_emit_composite_primitive_identity_source__sse2(struct sna *sna,
112603b705cfSriastradh						    const struct sna_composite_op *op,
112703b705cfSriastradh						    const struct sna_composite_rectangles *r)
112803b705cfSriastradh{
112903b705cfSriastradh	float w = r->width;
113003b705cfSriastradh	float h = r->height;
113103b705cfSriastradh	float *v;
113203b705cfSriastradh
113303b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
113403b705cfSriastradh	sna->render.vertex_used += 12;
113503b705cfSriastradh
113603b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
113703b705cfSriastradh	v[0] = v[4] + w;
113803b705cfSriastradh
113903b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
114003b705cfSriastradh	v[5] = v[1] = v[9] + h;
114103b705cfSriastradh
114203b705cfSriastradh	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
114303b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
114403b705cfSriastradh
114503b705cfSriastradh	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
114603b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
114703b705cfSriastradh}
114803b705cfSriastradh
114903b705cfSriastradhsse2 fastcall static void
115003b705cfSriastradhgen3_emit_composite_boxes_identity_source__sse2(const struct sna_composite_op *op,
115103b705cfSriastradh						const BoxRec *box, int nbox,
115203b705cfSriastradh						float *v)
115303b705cfSriastradh{
115403b705cfSriastradh	do {
115503b705cfSriastradh		v[0] = box->x2 + op->dst.x;
115603b705cfSriastradh		v[8] = v[4] = box->x1 + op->dst.x;
115703b705cfSriastradh		v[5] = v[1] = box->y2 + op->dst.y;
115803b705cfSriastradh		v[9] = box->y1 + op->dst.y;
115903b705cfSriastradh
116003b705cfSriastradh		v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
116103b705cfSriastradh		v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
116203b705cfSriastradh
116303b705cfSriastradh		v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
116403b705cfSriastradh		v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
116503b705cfSriastradh
116603b705cfSriastradh		v += 12;
116703b705cfSriastradh		box++;
116803b705cfSriastradh	} while (--nbox);
116903b705cfSriastradh}
117003b705cfSriastradh
117103b705cfSriastradhsse2 fastcall static void
117203b705cfSriastradhgen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna,
117303b705cfSriastradh							      const struct sna_composite_op *op,
117403b705cfSriastradh							      const struct sna_composite_rectangles *r)
117503b705cfSriastradh{
117603b705cfSriastradh	float w = r->width;
117703b705cfSriastradh	float h = r->height;
117803b705cfSriastradh	float *v;
117903b705cfSriastradh
118003b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
118103b705cfSriastradh	sna->render.vertex_used += 12;
118203b705cfSriastradh
118303b705cfSriastradh	v[8] = v[4] = r->dst.x;
118403b705cfSriastradh	v[9] = r->dst.y;
118503b705cfSriastradh
118603b705cfSriastradh	v[0] = v[4] + w;
118703b705cfSriastradh	v[5] = v[1] = v[9] + h;
118803b705cfSriastradh
118903b705cfSriastradh	v[10] = v[6] = r->src.x * op->src.scale[0];
119003b705cfSriastradh	v[11] = r->src.y * op->src.scale[1];
119103b705cfSriastradh
119203b705cfSriastradh	v[2] = v[6] + w * op->src.scale[0];
119303b705cfSriastradh	v[7] = v[3] = v[11] + h * op->src.scale[1];
119403b705cfSriastradh}
119503b705cfSriastradh
119603b705cfSriastradhsse2 fastcall static void
119703b705cfSriastradhgen3_emit_composite_boxes_identity_source_no_offset__sse2(const struct sna_composite_op *op,
119803b705cfSriastradh							  const BoxRec *box, int nbox,
119903b705cfSriastradh							  float *v)
120003b705cfSriastradh{
120103b705cfSriastradh	do {
120203b705cfSriastradh		v[0] = box->x2;
120303b705cfSriastradh		v[8] = v[4] = box->x1;
120403b705cfSriastradh		v[5] = v[1] = box->y2;
120503b705cfSriastradh		v[9] = box->y1;
120603b705cfSriastradh
120703b705cfSriastradh		v[10] = v[6] = box->x1 * op->src.scale[0];
120803b705cfSriastradh		v[2] = box->x2 * op->src.scale[0];
120903b705cfSriastradh
121003b705cfSriastradh		v[11] = box->y1 * op->src.scale[1];
121103b705cfSriastradh		v[7] = v[3] = box->y2 * op->src.scale[1];
121203b705cfSriastradh
121303b705cfSriastradh		v += 12;
121403b705cfSriastradh		box++;
121503b705cfSriastradh	} while (--nbox);
121603b705cfSriastradh}
121703b705cfSriastradh
121803b705cfSriastradhsse2 fastcall static void
121903b705cfSriastradhgen3_emit_composite_primitive_affine_source__sse2(struct sna *sna,
122003b705cfSriastradh						  const struct sna_composite_op *op,
122103b705cfSriastradh						  const struct sna_composite_rectangles *r)
122203b705cfSriastradh{
122303b705cfSriastradh	PictTransform *transform = op->src.transform;
122403b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
122503b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
122603b705cfSriastradh	int src_x = r->src.x + (int)op->src.offset[0];
122703b705cfSriastradh	int src_y = r->src.y + (int)op->src.offset[1];
122803b705cfSriastradh	float *v;
122903b705cfSriastradh
123003b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
123103b705cfSriastradh	sna->render.vertex_used += 12;
123203b705cfSriastradh
123303b705cfSriastradh	v[0] = dst_x + r->width;
123403b705cfSriastradh	v[5] = v[1] = dst_y + r->height;
123503b705cfSriastradh	v[8] = v[4] = dst_x;
123603b705cfSriastradh	v[9] = dst_y;
123703b705cfSriastradh
123803b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
123903b705cfSriastradh				    transform, op->src.scale,
124003b705cfSriastradh				    &v[2], &v[3]);
124103b705cfSriastradh
124203b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
124303b705cfSriastradh				    transform, op->src.scale,
124403b705cfSriastradh				    &v[6], &v[7]);
124503b705cfSriastradh
124603b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
124703b705cfSriastradh				    transform, op->src.scale,
124803b705cfSriastradh				    &v[10], &v[11]);
124903b705cfSriastradh}
125003b705cfSriastradh
125103b705cfSriastradhsse2 fastcall static void
125203b705cfSriastradhgen3_emit_composite_boxes_affine_source__sse2(const struct sna_composite_op *op,
125303b705cfSriastradh					      const BoxRec *box, int nbox,
125403b705cfSriastradh					      float *v)
125503b705cfSriastradh{
125603b705cfSriastradh	const PictTransform *transform = op->src.transform;
125703b705cfSriastradh
125803b705cfSriastradh	do {
125903b705cfSriastradh		v[0] = box->x2;
126003b705cfSriastradh		v[5] = v[1] = box->y2;
126103b705cfSriastradh		v[8] = v[4] = box->x1;
126203b705cfSriastradh		v[9] = box->y1;
126303b705cfSriastradh
126403b705cfSriastradh		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
126503b705cfSriastradh					    box->y2 + op->src.offset[1],
126603b705cfSriastradh					    transform, op->src.scale,
126703b705cfSriastradh					    &v[2], &v[3]);
126803b705cfSriastradh
126903b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
127003b705cfSriastradh					    box->y2 + op->src.offset[1],
127103b705cfSriastradh					    transform, op->src.scale,
127203b705cfSriastradh					    &v[6], &v[7]);
127303b705cfSriastradh
127403b705cfSriastradh		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
127503b705cfSriastradh					    box->y1 + op->src.offset[1],
127603b705cfSriastradh					    transform, op->src.scale,
127703b705cfSriastradh					    &v[10], &v[11]);
127803b705cfSriastradh
127903b705cfSriastradh		v += 12;
128003b705cfSriastradh		box++;
128103b705cfSriastradh	} while (--nbox);
128203b705cfSriastradh}
128303b705cfSriastradh
128403b705cfSriastradhsse2 fastcall static void
128503b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna,
128603b705cfSriastradh							   const struct sna_composite_op *op,
128703b705cfSriastradh							   const struct sna_composite_rectangles *r)
128803b705cfSriastradh{
128903b705cfSriastradh	float w = r->width;
129003b705cfSriastradh	float h = r->height;
129103b705cfSriastradh	float *v;
129203b705cfSriastradh
129303b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
129403b705cfSriastradh	sna->render.vertex_used += 12;
129503b705cfSriastradh
129603b705cfSriastradh	v[8] = v[4] = r->dst.x + op->dst.x;
129703b705cfSriastradh	v[0] = v[4] + w;
129803b705cfSriastradh
129903b705cfSriastradh	v[9] = r->dst.y + op->dst.y;
130003b705cfSriastradh	v[5] = v[1] = v[9] + h;
130103b705cfSriastradh
130203b705cfSriastradh	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
130303b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
130403b705cfSriastradh
130503b705cfSriastradh	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
130603b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
130703b705cfSriastradh}
130803b705cfSriastradh
130903b705cfSriastradhsse2 fastcall static void
131003b705cfSriastradhgen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna *sna,
131103b705cfSriastradh								     const struct sna_composite_op *op,
131203b705cfSriastradh								     const struct sna_composite_rectangles *r)
131303b705cfSriastradh{
131403b705cfSriastradh	float w = r->width;
131503b705cfSriastradh	float h = r->height;
131603b705cfSriastradh	float *v;
131703b705cfSriastradh
131803b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
131903b705cfSriastradh	sna->render.vertex_used += 12;
132003b705cfSriastradh
132103b705cfSriastradh	v[8] = v[4] = r->dst.x;
132203b705cfSriastradh	v[9] = r->dst.y;
132303b705cfSriastradh
132403b705cfSriastradh	v[0] = v[4] + w;
132503b705cfSriastradh	v[5] = v[1] = v[9] + h;
132603b705cfSriastradh
132703b705cfSriastradh	v[10] = v[6] = r->mask.x * op->mask.scale[0];
132803b705cfSriastradh	v[11] = r->mask.y * op->mask.scale[1];
132903b705cfSriastradh
133003b705cfSriastradh	v[2] = v[6] + w * op->mask.scale[0];
133103b705cfSriastradh	v[7] = v[3] = v[11] + h * op->mask.scale[1];
133203b705cfSriastradh}
133303b705cfSriastradh
133403b705cfSriastradhsse2 fastcall static void
133503b705cfSriastradhgen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna,
133603b705cfSriastradh							 const struct sna_composite_op *op,
133703b705cfSriastradh							 const struct sna_composite_rectangles *r)
133803b705cfSriastradh{
133903b705cfSriastradh	float dst_x, dst_y;
134003b705cfSriastradh	float src_x, src_y;
134103b705cfSriastradh	float msk_x, msk_y;
134203b705cfSriastradh	float w, h;
134303b705cfSriastradh	float *v;
134403b705cfSriastradh
134503b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
134603b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
134703b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
134803b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
134903b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
135003b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
135103b705cfSriastradh	w = r->width;
135203b705cfSriastradh	h = r->height;
135303b705cfSriastradh
135403b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
135503b705cfSriastradh	sna->render.vertex_used += 18;
135603b705cfSriastradh
135703b705cfSriastradh	v[0] = dst_x + w;
135803b705cfSriastradh	v[1] = dst_y + h;
135903b705cfSriastradh	v[2] = (src_x + w) * op->src.scale[0];
136003b705cfSriastradh	v[3] = (src_y + h) * op->src.scale[1];
136103b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
136203b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
136303b705cfSriastradh
136403b705cfSriastradh	v[6] = dst_x;
136503b705cfSriastradh	v[7] = v[1];
136603b705cfSriastradh	v[8] = src_x * op->src.scale[0];
136703b705cfSriastradh	v[9] = v[3];
136803b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
136903b705cfSriastradh	v[11] =v[5];
137003b705cfSriastradh
137103b705cfSriastradh	v[12] = v[6];
137203b705cfSriastradh	v[13] = dst_y;
137303b705cfSriastradh	v[14] = v[8];
137403b705cfSriastradh	v[15] = src_y * op->src.scale[1];
137503b705cfSriastradh	v[16] = v[10];
137603b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
137703b705cfSriastradh}
137803b705cfSriastradh
137903b705cfSriastradhsse2 fastcall static void
138003b705cfSriastradhgen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna,
138103b705cfSriastradh						       const struct sna_composite_op *op,
138203b705cfSriastradh						       const struct sna_composite_rectangles *r)
138303b705cfSriastradh{
138403b705cfSriastradh	int16_t src_x, src_y;
138503b705cfSriastradh	float dst_x, dst_y;
138603b705cfSriastradh	float msk_x, msk_y;
138703b705cfSriastradh	float w, h;
138803b705cfSriastradh	float *v;
138903b705cfSriastradh
139003b705cfSriastradh	dst_x = r->dst.x + op->dst.x;
139103b705cfSriastradh	dst_y = r->dst.y + op->dst.y;
139203b705cfSriastradh	src_x = r->src.x + op->src.offset[0];
139303b705cfSriastradh	src_y = r->src.y + op->src.offset[1];
139403b705cfSriastradh	msk_x = r->mask.x + op->mask.offset[0];
139503b705cfSriastradh	msk_y = r->mask.y + op->mask.offset[1];
139603b705cfSriastradh	w = r->width;
139703b705cfSriastradh	h = r->height;
139803b705cfSriastradh
139903b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
140003b705cfSriastradh	sna->render.vertex_used += 18;
140103b705cfSriastradh
140203b705cfSriastradh	v[0] = dst_x + w;
140303b705cfSriastradh	v[1] = dst_y + h;
140403b705cfSriastradh	_sna_get_transformed_scaled(src_x + r->width, src_y + r->height,
140503b705cfSriastradh				    op->src.transform, op->src.scale,
140603b705cfSriastradh				    &v[2], &v[3]);
140703b705cfSriastradh	v[4] = (msk_x + w) * op->mask.scale[0];
140803b705cfSriastradh	v[5] = (msk_y + h) * op->mask.scale[1];
140903b705cfSriastradh
141003b705cfSriastradh	v[6] = dst_x;
141103b705cfSriastradh	v[7] = v[1];
141203b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y + r->height,
141303b705cfSriastradh				    op->src.transform, op->src.scale,
141403b705cfSriastradh				    &v[8], &v[9]);
141503b705cfSriastradh	v[10] = msk_x * op->mask.scale[0];
141603b705cfSriastradh	v[11] =v[5];
141703b705cfSriastradh
141803b705cfSriastradh	v[12] = v[6];
141903b705cfSriastradh	v[13] = dst_y;
142003b705cfSriastradh	_sna_get_transformed_scaled(src_x, src_y,
142103b705cfSriastradh				    op->src.transform, op->src.scale,
142203b705cfSriastradh				    &v[14], &v[15]);
142303b705cfSriastradh	v[16] = v[10];
142403b705cfSriastradh	v[17] = msk_y * op->mask.scale[1];
142503b705cfSriastradh}
142603b705cfSriastradh#endif
142703b705cfSriastradh
142803b705cfSriastradhstatic inline void
142903b705cfSriastradhgen3_2d_perspective(struct sna *sna, int in, int out)
143003b705cfSriastradh{
143103b705cfSriastradh	gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
143203b705cfSriastradh	gen3_fs_mul(out,
143303b705cfSriastradh		    gen3_fs_operand(in, X, Y, ZERO, ONE),
143403b705cfSriastradh		    gen3_fs_operand_reg(out));
143503b705cfSriastradh}
143603b705cfSriastradh
143703b705cfSriastradhstatic inline void
143803b705cfSriastradhgen3_linear_coord(struct sna *sna,
143903b705cfSriastradh		  const struct sna_composite_channel *channel,
144003b705cfSriastradh		  int in, int out)
144103b705cfSriastradh{
144203b705cfSriastradh	int c = channel->u.gen3.constants;
144303b705cfSriastradh
144403b705cfSriastradh	if (!channel->is_affine) {
144503b705cfSriastradh		gen3_2d_perspective(sna, in, FS_U0);
144603b705cfSriastradh		in = FS_U0;
144703b705cfSriastradh	}
144803b705cfSriastradh
144903b705cfSriastradh	gen3_fs_mov(out, gen3_fs_operand_zero());
145003b705cfSriastradh	gen3_fs_dp3(out, MASK_X,
145103b705cfSriastradh		    gen3_fs_operand(in, X, Y, ONE, ZERO),
145203b705cfSriastradh		    gen3_fs_operand_reg(c));
145303b705cfSriastradh}
145403b705cfSriastradh
145503b705cfSriastradhstatic void
145603b705cfSriastradhgen3_radial_coord(struct sna *sna,
145703b705cfSriastradh		  const struct sna_composite_channel *channel,
145803b705cfSriastradh		  int in, int out)
145903b705cfSriastradh{
146003b705cfSriastradh	int c = channel->u.gen3.constants;
146103b705cfSriastradh
146203b705cfSriastradh	if (!channel->is_affine) {
146303b705cfSriastradh		gen3_2d_perspective(sna, in, FS_U0);
146403b705cfSriastradh		in = FS_U0;
146503b705cfSriastradh	}
146603b705cfSriastradh
146703b705cfSriastradh	switch (channel->u.gen3.mode) {
146803b705cfSriastradh	case RADIAL_ONE:
146903b705cfSriastradh		/*
147003b705cfSriastradh		   pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
147103b705cfSriastradh		   r² = pdx*pdx + pdy*pdy
147203b705cfSriastradh		   t = r²/sqrt(r²) - r1/dr;
147303b705cfSriastradh		   */
147403b705cfSriastradh		gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
147503b705cfSriastradh			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
147603b705cfSriastradh			    gen3_fs_operand(c, Z, Z, ZERO, ZERO),
147703b705cfSriastradh			    gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
147803b705cfSriastradh		gen3_fs_dp2add(FS_U0, MASK_X,
147903b705cfSriastradh			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
148003b705cfSriastradh			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
148103b705cfSriastradh			       gen3_fs_operand_zero());
148203b705cfSriastradh		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
148303b705cfSriastradh		gen3_fs_mad(out, 0,
148403b705cfSriastradh			    gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
148503b705cfSriastradh			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
148603b705cfSriastradh			    gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
148703b705cfSriastradh		break;
148803b705cfSriastradh
148903b705cfSriastradh	case RADIAL_TWO:
149003b705cfSriastradh		/*
149103b705cfSriastradh		   pdx = x - c1x, pdy = y - c1y;
149203b705cfSriastradh		   A = dx² + dy² - dr²
149303b705cfSriastradh		   B = -2*(pdx*dx + pdy*dy + r1*dr);
149403b705cfSriastradh		   C = pdx² + pdy² - r1²;
149503b705cfSriastradh		   det = B*B - 4*A*C;
149603b705cfSriastradh		   t = (-B + sqrt (det)) / (2 * A)
149703b705cfSriastradh		   */
149803b705cfSriastradh
149903b705cfSriastradh		/* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
150003b705cfSriastradh		gen3_fs_add(FS_U0,
150103b705cfSriastradh			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
150203b705cfSriastradh			    gen3_fs_operand(c, X, Y, Z, ZERO));
150303b705cfSriastradh		/* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
150403b705cfSriastradh		gen3_fs_dp3(FS_U0, MASK_W,
150503b705cfSriastradh			    gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
150603b705cfSriastradh			    gen3_fs_operand(c+1, X, Y, Z, ZERO));
150703b705cfSriastradh		/* u1.x = pdx² + pdy² - r1²; [C] */
150803b705cfSriastradh		gen3_fs_dp3(FS_U1, MASK_X,
150903b705cfSriastradh			    gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
151003b705cfSriastradh			    gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
151103b705cfSriastradh		/* u1.x = C, u1.y = B, u1.z=-4*A; */
151203b705cfSriastradh		gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
151303b705cfSriastradh		gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
151403b705cfSriastradh		/* u1.x = B² - 4*A*C */
151503b705cfSriastradh		gen3_fs_dp2add(FS_U1, MASK_X,
151603b705cfSriastradh			       gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
151703b705cfSriastradh			       gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
151803b705cfSriastradh			       gen3_fs_operand_zero());
151903b705cfSriastradh		/* out.x = -B + sqrt (B² - 4*A*C), */
152003b705cfSriastradh		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
152103b705cfSriastradh		gen3_fs_mad(out, MASK_X,
152203b705cfSriastradh			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
152303b705cfSriastradh			    gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
152403b705cfSriastradh			    gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
152503b705cfSriastradh		/* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */
152603b705cfSriastradh		gen3_fs_mul(out,
152703b705cfSriastradh			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
152803b705cfSriastradh			    gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
152903b705cfSriastradh		break;
153003b705cfSriastradh	}
153103b705cfSriastradh}
153203b705cfSriastradh
153303b705cfSriastradhstatic void
153403b705cfSriastradhgen3_composite_emit_shader(struct sna *sna,
153503b705cfSriastradh			   const struct sna_composite_op *op,
153603b705cfSriastradh			   uint8_t blend)
153703b705cfSriastradh{
153803b705cfSriastradh	bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
153903b705cfSriastradh	const struct sna_composite_channel *src, *mask;
154003b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
154103b705cfSriastradh	uint32_t shader_offset, id;
154203b705cfSriastradh	int src_reg, mask_reg;
154303b705cfSriastradh	int t, length;
154403b705cfSriastradh
154503b705cfSriastradh	src = &op->src;
154603b705cfSriastradh	mask = &op->mask;
154703b705cfSriastradh	if (mask->u.gen3.type == SHADER_NONE)
154803b705cfSriastradh		mask = NULL;
154903b705cfSriastradh
155003b705cfSriastradh	id = (src->u.gen3.type |
155103b705cfSriastradh	      src->is_affine << 4 |
155203b705cfSriastradh	      src->alpha_fixup << 5 |
155303b705cfSriastradh	      src->rb_reversed << 6);
155403b705cfSriastradh	if (mask) {
155503b705cfSriastradh		id |= (mask->u.gen3.type << 8 |
155603b705cfSriastradh		       mask->is_affine << 12 |
155703b705cfSriastradh		       gen3_blend_op[blend].src_alpha << 13 |
155803b705cfSriastradh		       op->has_component_alpha << 14 |
155903b705cfSriastradh		       mask->alpha_fixup << 15 |
156003b705cfSriastradh		       mask->rb_reversed << 16);
156103b705cfSriastradh	}
156203b705cfSriastradh	id |= dst_is_alpha << 24;
156303b705cfSriastradh	id |= op->rb_reversed << 25;
156403b705cfSriastradh
156503b705cfSriastradh	if (id == state->last_shader)
156603b705cfSriastradh		return;
156703b705cfSriastradh
156803b705cfSriastradh	state->last_shader = id;
156903b705cfSriastradh
157003b705cfSriastradh	shader_offset = sna->kgem.nbatch++;
157103b705cfSriastradh	t = 0;
157203b705cfSriastradh	switch (src->u.gen3.type) {
157303b705cfSriastradh	case SHADER_NONE:
157403b705cfSriastradh	case SHADER_OPACITY:
157503b705cfSriastradh		assert(0);
157603b705cfSriastradh	case SHADER_ZERO:
157703b705cfSriastradh	case SHADER_BLACK:
157803b705cfSriastradh	case SHADER_WHITE:
157903b705cfSriastradh		break;
158003b705cfSriastradh	case SHADER_CONSTANT:
158103b705cfSriastradh		gen3_fs_dcl(FS_T8);
158203b705cfSriastradh		src_reg = FS_T8;
158303b705cfSriastradh		break;
158403b705cfSriastradh	case SHADER_TEXTURE:
158503b705cfSriastradh	case SHADER_RADIAL:
158603b705cfSriastradh	case SHADER_LINEAR:
158703b705cfSriastradh		gen3_fs_dcl(FS_S0);
158803b705cfSriastradh		gen3_fs_dcl(FS_T0);
158903b705cfSriastradh		t++;
159003b705cfSriastradh		break;
159103b705cfSriastradh	}
159203b705cfSriastradh
159303b705cfSriastradh	if (mask == NULL) {
159403b705cfSriastradh		switch (src->u.gen3.type) {
159503b705cfSriastradh		case SHADER_ZERO:
159603b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
159703b705cfSriastradh			goto done;
159803b705cfSriastradh		case SHADER_BLACK:
159903b705cfSriastradh			if (dst_is_alpha)
160003b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand_one());
160103b705cfSriastradh			else
160203b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
160303b705cfSriastradh			goto done;
160403b705cfSriastradh		case SHADER_WHITE:
160503b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
160603b705cfSriastradh			goto done;
160703b705cfSriastradh		}
160803b705cfSriastradh		if (src->alpha_fixup && dst_is_alpha) {
160903b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
161003b705cfSriastradh			goto done;
161103b705cfSriastradh		}
161203b705cfSriastradh		/* No mask, so load directly to output color */
161303b705cfSriastradh		if (src->u.gen3.type != SHADER_CONSTANT) {
161403b705cfSriastradh			if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
161503b705cfSriastradh				src_reg = FS_R0;
161603b705cfSriastradh			else
161703b705cfSriastradh				src_reg = FS_OC;
161803b705cfSriastradh		}
161903b705cfSriastradh		switch (src->u.gen3.type) {
162003b705cfSriastradh		case SHADER_LINEAR:
162103b705cfSriastradh			gen3_linear_coord(sna, src, FS_T0, FS_R0);
162203b705cfSriastradh			gen3_fs_texld(src_reg, FS_S0, FS_R0);
162303b705cfSriastradh			break;
162403b705cfSriastradh
162503b705cfSriastradh		case SHADER_RADIAL:
162603b705cfSriastradh			gen3_radial_coord(sna, src, FS_T0, FS_R0);
162703b705cfSriastradh			gen3_fs_texld(src_reg, FS_S0, FS_R0);
162803b705cfSriastradh			break;
162903b705cfSriastradh
163003b705cfSriastradh		case SHADER_TEXTURE:
163103b705cfSriastradh			if (src->is_affine)
163203b705cfSriastradh				gen3_fs_texld(src_reg, FS_S0, FS_T0);
163303b705cfSriastradh			else
163403b705cfSriastradh				gen3_fs_texldp(src_reg, FS_S0, FS_T0);
163503b705cfSriastradh			break;
163603b705cfSriastradh
163703b705cfSriastradh		case SHADER_NONE:
163803b705cfSriastradh		case SHADER_WHITE:
163903b705cfSriastradh		case SHADER_BLACK:
164003b705cfSriastradh		case SHADER_ZERO:
164103b705cfSriastradh			assert(0);
164203b705cfSriastradh		case SHADER_CONSTANT:
164303b705cfSriastradh			break;
164403b705cfSriastradh		}
164503b705cfSriastradh
164603b705cfSriastradh		if (src_reg != FS_OC) {
164703b705cfSriastradh			if (src->alpha_fixup)
164803b705cfSriastradh				gen3_fs_mov(FS_OC,
164903b705cfSriastradh					    src->rb_reversed ^ op->rb_reversed ?
165003b705cfSriastradh					    gen3_fs_operand(src_reg, Z, Y, X, ONE) :
165103b705cfSriastradh					    gen3_fs_operand(src_reg, X, Y, Z, ONE));
165203b705cfSriastradh			else if (dst_is_alpha)
165303b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
165403b705cfSriastradh			else if (src->rb_reversed ^ op->rb_reversed)
165503b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
165603b705cfSriastradh			else
165703b705cfSriastradh				gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
165803b705cfSriastradh		} else if (src->alpha_fixup)
165903b705cfSriastradh			gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
166003b705cfSriastradh	} else {
166103b705cfSriastradh		int out_reg = FS_OC;
166203b705cfSriastradh		if (op->rb_reversed)
166303b705cfSriastradh			out_reg = FS_U0;
166403b705cfSriastradh
166503b705cfSriastradh		switch (mask->u.gen3.type) {
166603b705cfSriastradh		case SHADER_CONSTANT:
166703b705cfSriastradh			gen3_fs_dcl(FS_T9);
166803b705cfSriastradh			mask_reg = FS_T9;
166903b705cfSriastradh			break;
167003b705cfSriastradh		case SHADER_TEXTURE:
167103b705cfSriastradh		case SHADER_LINEAR:
167203b705cfSriastradh		case SHADER_RADIAL:
167303b705cfSriastradh			gen3_fs_dcl(FS_S0 + t);
167403b705cfSriastradh			/* fall through */
167503b705cfSriastradh		case SHADER_OPACITY:
167603b705cfSriastradh			gen3_fs_dcl(FS_T0 + t);
167703b705cfSriastradh			break;
167803b705cfSriastradh		case SHADER_ZERO:
167903b705cfSriastradh		case SHADER_BLACK:
168003b705cfSriastradh			assert(0);
168103b705cfSriastradh		case SHADER_NONE:
168203b705cfSriastradh		case SHADER_WHITE:
168303b705cfSriastradh			break;
168403b705cfSriastradh		}
168503b705cfSriastradh
168603b705cfSriastradh		t = 0;
168703b705cfSriastradh		switch (src->u.gen3.type) {
168803b705cfSriastradh		case SHADER_LINEAR:
168903b705cfSriastradh			gen3_linear_coord(sna, src, FS_T0, FS_R0);
169003b705cfSriastradh			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
169103b705cfSriastradh			src_reg = FS_R0;
169203b705cfSriastradh			t++;
169303b705cfSriastradh			break;
169403b705cfSriastradh
169503b705cfSriastradh		case SHADER_RADIAL:
169603b705cfSriastradh			gen3_radial_coord(sna, src, FS_T0, FS_R0);
169703b705cfSriastradh			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
169803b705cfSriastradh			src_reg = FS_R0;
169903b705cfSriastradh			t++;
170003b705cfSriastradh			break;
170103b705cfSriastradh
170203b705cfSriastradh		case SHADER_TEXTURE:
170303b705cfSriastradh			if (src->is_affine)
170403b705cfSriastradh				gen3_fs_texld(FS_R0, FS_S0, FS_T0);
170503b705cfSriastradh			else
170603b705cfSriastradh				gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
170703b705cfSriastradh			src_reg = FS_R0;
170803b705cfSriastradh			t++;
170903b705cfSriastradh			break;
171003b705cfSriastradh
171103b705cfSriastradh		case SHADER_CONSTANT:
171203b705cfSriastradh		case SHADER_NONE:
171303b705cfSriastradh		case SHADER_ZERO:
171403b705cfSriastradh		case SHADER_BLACK:
171503b705cfSriastradh		case SHADER_WHITE:
171603b705cfSriastradh			break;
171703b705cfSriastradh		}
171803b705cfSriastradh		if (src->alpha_fixup)
171903b705cfSriastradh			gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
172003b705cfSriastradh		if (src->rb_reversed)
172103b705cfSriastradh			gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
172203b705cfSriastradh
172303b705cfSriastradh		switch (mask->u.gen3.type) {
172403b705cfSriastradh		case SHADER_LINEAR:
172503b705cfSriastradh			gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
172603b705cfSriastradh			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
172703b705cfSriastradh			mask_reg = FS_R1;
172803b705cfSriastradh			break;
172903b705cfSriastradh
173003b705cfSriastradh		case SHADER_RADIAL:
173103b705cfSriastradh			gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
173203b705cfSriastradh			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
173303b705cfSriastradh			mask_reg = FS_R1;
173403b705cfSriastradh			break;
173503b705cfSriastradh
173603b705cfSriastradh		case SHADER_TEXTURE:
173703b705cfSriastradh			if (mask->is_affine)
173803b705cfSriastradh				gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
173903b705cfSriastradh			else
174003b705cfSriastradh				gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
174103b705cfSriastradh			mask_reg = FS_R1;
174203b705cfSriastradh			break;
174303b705cfSriastradh
174403b705cfSriastradh		case SHADER_OPACITY:
174503b705cfSriastradh			switch (src->u.gen3.type) {
174603b705cfSriastradh			case SHADER_BLACK:
174703b705cfSriastradh			case SHADER_WHITE:
174803b705cfSriastradh				if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
174903b705cfSriastradh					gen3_fs_mov(out_reg,
175003b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
175103b705cfSriastradh				} else {
175203b705cfSriastradh					gen3_fs_mov(out_reg,
175303b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
175403b705cfSriastradh				}
175503b705cfSriastradh				break;
175603b705cfSriastradh			default:
175703b705cfSriastradh				if (dst_is_alpha) {
175803b705cfSriastradh					gen3_fs_mul(out_reg,
175903b705cfSriastradh						    gen3_fs_operand(src_reg, W, W, W, W),
176003b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
176103b705cfSriastradh				} else {
176203b705cfSriastradh					gen3_fs_mul(out_reg,
176303b705cfSriastradh						    gen3_fs_operand(src_reg, X, Y, Z, W),
176403b705cfSriastradh						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
176503b705cfSriastradh				}
176603b705cfSriastradh			}
176703b705cfSriastradh			goto mask_done;
176803b705cfSriastradh
176903b705cfSriastradh		case SHADER_CONSTANT:
177003b705cfSriastradh		case SHADER_ZERO:
177103b705cfSriastradh		case SHADER_BLACK:
177203b705cfSriastradh		case SHADER_WHITE:
177303b705cfSriastradh		case SHADER_NONE:
177403b705cfSriastradh			break;
177503b705cfSriastradh		}
177603b705cfSriastradh		if (mask->alpha_fixup)
177703b705cfSriastradh			gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
177803b705cfSriastradh		if (mask->rb_reversed)
177903b705cfSriastradh			gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
178003b705cfSriastradh
178103b705cfSriastradh		if (dst_is_alpha) {
178203b705cfSriastradh			switch (src->u.gen3.type) {
178303b705cfSriastradh			case SHADER_BLACK:
178403b705cfSriastradh			case SHADER_WHITE:
178503b705cfSriastradh				gen3_fs_mov(out_reg,
178603b705cfSriastradh					    gen3_fs_operand(mask_reg, W, W, W, W));
178703b705cfSriastradh				break;
178803b705cfSriastradh			default:
178903b705cfSriastradh				gen3_fs_mul(out_reg,
179003b705cfSriastradh					    gen3_fs_operand(src_reg, W, W, W, W),
179103b705cfSriastradh					    gen3_fs_operand(mask_reg, W, W, W, W));
179203b705cfSriastradh				break;
179303b705cfSriastradh			}
179403b705cfSriastradh		} else {
179503b705cfSriastradh			/* If component alpha is active in the mask and the blend
179603b705cfSriastradh			 * operation uses the source alpha, then we know we don't
179703b705cfSriastradh			 * need the source value (otherwise we would have hit a
179803b705cfSriastradh			 * fallback earlier), so we provide the source alpha (src.A *
179903b705cfSriastradh			 * mask.X) as output color.
180003b705cfSriastradh			 * Conversely, if CA is set and we don't need the source alpha,
180103b705cfSriastradh			 * then we produce the source value (src.X * mask.X) and the
180203b705cfSriastradh			 * source alpha is unused.  Otherwise, we provide the non-CA
180303b705cfSriastradh			 * source value (src.X * mask.A).
180403b705cfSriastradh			 */
180503b705cfSriastradh			if (op->has_component_alpha) {
180603b705cfSriastradh				switch (src->u.gen3.type) {
180703b705cfSriastradh				case SHADER_BLACK:
180803b705cfSriastradh					if (gen3_blend_op[blend].src_alpha)
180903b705cfSriastradh						gen3_fs_mov(out_reg,
181003b705cfSriastradh							    gen3_fs_operand_reg(mask_reg));
181103b705cfSriastradh					else
181203b705cfSriastradh						gen3_fs_mov(out_reg,
181303b705cfSriastradh							    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
181403b705cfSriastradh					break;
181503b705cfSriastradh				case SHADER_WHITE:
181603b705cfSriastradh					gen3_fs_mov(out_reg,
181703b705cfSriastradh						    gen3_fs_operand_reg(mask_reg));
181803b705cfSriastradh					break;
181903b705cfSriastradh				default:
182003b705cfSriastradh					if (gen3_blend_op[blend].src_alpha)
182103b705cfSriastradh						gen3_fs_mul(out_reg,
182203b705cfSriastradh							    gen3_fs_operand(src_reg, W, W, W, W),
182303b705cfSriastradh							    gen3_fs_operand_reg(mask_reg));
182403b705cfSriastradh					else
182503b705cfSriastradh						gen3_fs_mul(out_reg,
182603b705cfSriastradh							    gen3_fs_operand_reg(src_reg),
182703b705cfSriastradh							    gen3_fs_operand_reg(mask_reg));
182803b705cfSriastradh					break;
182903b705cfSriastradh				}
183003b705cfSriastradh			} else {
183103b705cfSriastradh				switch (src->u.gen3.type) {
183203b705cfSriastradh				case SHADER_WHITE:
183303b705cfSriastradh					gen3_fs_mov(out_reg,
183403b705cfSriastradh						    gen3_fs_operand(mask_reg, W, W, W, W));
183503b705cfSriastradh					break;
183603b705cfSriastradh				case SHADER_BLACK:
183703b705cfSriastradh					gen3_fs_mov(out_reg,
183803b705cfSriastradh						    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
183903b705cfSriastradh					break;
184003b705cfSriastradh				default:
184103b705cfSriastradh					gen3_fs_mul(out_reg,
184203b705cfSriastradh						    gen3_fs_operand_reg(src_reg),
184303b705cfSriastradh						    gen3_fs_operand(mask_reg, W, W, W, W));
184403b705cfSriastradh					break;
184503b705cfSriastradh				}
184603b705cfSriastradh			}
184703b705cfSriastradh		}
184803b705cfSriastradhmask_done:
184903b705cfSriastradh		if (op->rb_reversed)
185003b705cfSriastradh			gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
185103b705cfSriastradh	}
185203b705cfSriastradh
185303b705cfSriastradhdone:
185403b705cfSriastradh	length = sna->kgem.nbatch - shader_offset;
185503b705cfSriastradh	sna->kgem.batch[shader_offset] =
185603b705cfSriastradh		_3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
185703b705cfSriastradh}
185803b705cfSriastradh
185903b705cfSriastradhstatic uint32_t gen3_ms_tiling(uint32_t tiling)
186003b705cfSriastradh{
186103b705cfSriastradh	uint32_t v = 0;
186203b705cfSriastradh	switch (tiling) {
186303b705cfSriastradh	case I915_TILING_Y: v |= MS3_TILE_WALK;
186403b705cfSriastradh	case I915_TILING_X: v |= MS3_TILED_SURFACE;
186503b705cfSriastradh	case I915_TILING_NONE: break;
186603b705cfSriastradh	}
186703b705cfSriastradh	return v;
186803b705cfSriastradh}
186903b705cfSriastradh
187003b705cfSriastradhstatic void gen3_emit_invariant(struct sna *sna)
187103b705cfSriastradh{
187203b705cfSriastradh	/* Disable independent alpha blend */
187303b705cfSriastradh	OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
187403b705cfSriastradh		  IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
187503b705cfSriastradh		  IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
187603b705cfSriastradh		  IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
187703b705cfSriastradh
187803b705cfSriastradh	OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
187903b705cfSriastradh		  CSB_TCB(0, 0) |
188003b705cfSriastradh		  CSB_TCB(1, 1) |
188103b705cfSriastradh		  CSB_TCB(2, 2) |
188203b705cfSriastradh		  CSB_TCB(3, 3) |
188303b705cfSriastradh		  CSB_TCB(4, 4) |
188403b705cfSriastradh		  CSB_TCB(5, 5) |
188503b705cfSriastradh		  CSB_TCB(6, 6) |
188603b705cfSriastradh		  CSB_TCB(7, 7));
188703b705cfSriastradh
188803b705cfSriastradh	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
188903b705cfSriastradh	OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
189003b705cfSriastradh	OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
189103b705cfSriastradh		  S4_LINE_WIDTH_ONE |
189203b705cfSriastradh		  S4_CULLMODE_NONE |
189303b705cfSriastradh		  S4_VFMT_XY);
189403b705cfSriastradh	OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
189503b705cfSriastradh	OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
189603b705cfSriastradh
189703b705cfSriastradh	OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
189803b705cfSriastradh	OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
189903b705cfSriastradh
190003b705cfSriastradh	OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
190103b705cfSriastradh	OUT_BATCH(0x00000000);
190203b705cfSriastradh
190303b705cfSriastradh	OUT_BATCH(_3DSTATE_STIPPLE);
190403b705cfSriastradh	OUT_BATCH(0x00000000);
190503b705cfSriastradh
190603b705cfSriastradh	sna->render_state.gen3.need_invariant = false;
190703b705cfSriastradh}
190803b705cfSriastradh
190903b705cfSriastradh#define MAX_OBJECTS 3 /* worst case: dst + src + mask  */
191003b705cfSriastradh
191103b705cfSriastradhstatic void
191203b705cfSriastradhgen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
191303b705cfSriastradh{
191403b705cfSriastradh	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
191503b705cfSriastradh
191603b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, 200)) {
191703b705cfSriastradh		DBG(("%s: flushing batch: size %d > %d\n",
191803b705cfSriastradh		     __FUNCTION__, 200,
191903b705cfSriastradh		     sna->kgem.surface-sna->kgem.nbatch));
192003b705cfSriastradh		kgem_submit(&sna->kgem);
192103b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
192203b705cfSriastradh	}
192303b705cfSriastradh
192403b705cfSriastradh	if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
192503b705cfSriastradh		DBG(("%s: flushing batch: reloc %d >= %d\n",
192603b705cfSriastradh		     __FUNCTION__,
192703b705cfSriastradh		     sna->kgem.nreloc,
192803b705cfSriastradh		     (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
192903b705cfSriastradh		kgem_submit(&sna->kgem);
193003b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
193103b705cfSriastradh	}
193203b705cfSriastradh
193303b705cfSriastradh	if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
193403b705cfSriastradh		DBG(("%s: flushing batch: exec %d >= %d\n",
193503b705cfSriastradh		     __FUNCTION__,
193603b705cfSriastradh		     sna->kgem.nexec,
193703b705cfSriastradh		     (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
193803b705cfSriastradh		kgem_submit(&sna->kgem);
193903b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
194003b705cfSriastradh	}
194103b705cfSriastradh
194203b705cfSriastradh	if (sna->render_state.gen3.need_invariant)
194303b705cfSriastradh		gen3_emit_invariant(sna);
194403b705cfSriastradh#undef MAX_OBJECTS
194503b705cfSriastradh}
194603b705cfSriastradh
194703b705cfSriastradhstatic void gen3_emit_target(struct sna *sna,
194803b705cfSriastradh			     struct kgem_bo *bo,
194903b705cfSriastradh			     int width,
195003b705cfSriastradh			     int height,
195103b705cfSriastradh			     int format)
195203b705cfSriastradh{
195303b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
195403b705cfSriastradh
195503b705cfSriastradh	assert(!too_large(width, height));
195603b705cfSriastradh
195703b705cfSriastradh	/* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
195803b705cfSriastradh	assert(bo->unique_id != 0);
195903b705cfSriastradh	if (bo->unique_id != state->current_dst) {
196003b705cfSriastradh		uint32_t v;
196103b705cfSriastradh
196203b705cfSriastradh		DBG(("%s: setting new target id=%d, handle=%d\n",
196303b705cfSriastradh		     __FUNCTION__, bo->unique_id, bo->handle));
196403b705cfSriastradh
196503b705cfSriastradh		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
196603b705cfSriastradh		OUT_BATCH(BUF_3D_ID_COLOR_BACK |
196703b705cfSriastradh			  gen3_buf_tiling(bo->tiling) |
196803b705cfSriastradh			  bo->pitch);
196903b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
197003b705cfSriastradh					 bo,
197103b705cfSriastradh					 I915_GEM_DOMAIN_RENDER << 16 |
197203b705cfSriastradh					 I915_GEM_DOMAIN_RENDER,
197303b705cfSriastradh					 0));
197403b705cfSriastradh
197503b705cfSriastradh		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
197603b705cfSriastradh		OUT_BATCH(gen3_get_dst_format(format));
197703b705cfSriastradh
197803b705cfSriastradh		v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
197903b705cfSriastradh		if (v != state->last_drawrect_limit) {
198003b705cfSriastradh			OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
198103b705cfSriastradh			OUT_BATCH(0); /* XXX dither origin? */
198203b705cfSriastradh			OUT_BATCH(0);
198303b705cfSriastradh			OUT_BATCH(v);
198403b705cfSriastradh			OUT_BATCH(0);
198503b705cfSriastradh			state->last_drawrect_limit = v;
198603b705cfSriastradh		}
198703b705cfSriastradh
198803b705cfSriastradh		state->current_dst = bo->unique_id;
198903b705cfSriastradh	}
199003b705cfSriastradh	assert(bo->exec);
199103b705cfSriastradh	kgem_bo_mark_dirty(bo);
199203b705cfSriastradh}
199303b705cfSriastradh
199403b705cfSriastradhstatic void gen3_emit_composite_state(struct sna *sna,
199503b705cfSriastradh				      const struct sna_composite_op *op)
199603b705cfSriastradh{
199703b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
199803b705cfSriastradh	uint32_t map[4];
199903b705cfSriastradh	uint32_t sampler[4];
200003b705cfSriastradh	struct kgem_bo *bo[2];
200103b705cfSriastradh	unsigned int tex_count, n;
200203b705cfSriastradh	uint32_t ss2;
200303b705cfSriastradh
200403b705cfSriastradh	gen3_get_batch(sna, op);
200503b705cfSriastradh
200603b705cfSriastradh	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
200703b705cfSriastradh		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
200803b705cfSriastradh			OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
200903b705cfSriastradh		else
201003b705cfSriastradh			OUT_BATCH(_3DSTATE_MODES_5_CMD |
201103b705cfSriastradh				  PIPELINE_FLUSH_RENDER_CACHE |
201203b705cfSriastradh				  PIPELINE_FLUSH_TEXTURE_CACHE);
201303b705cfSriastradh		kgem_clear_dirty(&sna->kgem);
201403b705cfSriastradh	}
201503b705cfSriastradh
201603b705cfSriastradh	gen3_emit_target(sna,
201703b705cfSriastradh			 op->dst.bo,
201803b705cfSriastradh			 op->dst.width,
201903b705cfSriastradh			 op->dst.height,
202003b705cfSriastradh			 op->dst.format);
202103b705cfSriastradh
202203b705cfSriastradh	ss2 = ~0;
202303b705cfSriastradh	tex_count = 0;
202403b705cfSriastradh	switch (op->src.u.gen3.type) {
202503b705cfSriastradh	case SHADER_OPACITY:
202603b705cfSriastradh	case SHADER_NONE:
202703b705cfSriastradh		assert(0);
202803b705cfSriastradh	case SHADER_ZERO:
202903b705cfSriastradh	case SHADER_BLACK:
203003b705cfSriastradh	case SHADER_WHITE:
203103b705cfSriastradh		break;
203203b705cfSriastradh	case SHADER_CONSTANT:
203303b705cfSriastradh		if (op->src.u.gen3.mode != state->last_diffuse) {
203403b705cfSriastradh			OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
203503b705cfSriastradh			OUT_BATCH(op->src.u.gen3.mode);
203603b705cfSriastradh			state->last_diffuse = op->src.u.gen3.mode;
203703b705cfSriastradh		}
203803b705cfSriastradh		break;
203903b705cfSriastradh	case SHADER_LINEAR:
204003b705cfSriastradh	case SHADER_RADIAL:
204103b705cfSriastradh	case SHADER_TEXTURE:
204203b705cfSriastradh		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
204303b705cfSriastradh		ss2 |= S2_TEXCOORD_FMT(tex_count,
204403b705cfSriastradh				       op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
204503b705cfSriastradh		assert(op->src.card_format);
204603b705cfSriastradh		map[tex_count * 2 + 0] =
204703b705cfSriastradh			op->src.card_format |
204803b705cfSriastradh			gen3_ms_tiling(op->src.bo->tiling) |
204903b705cfSriastradh			(op->src.height - 1) << MS3_HEIGHT_SHIFT |
205003b705cfSriastradh			(op->src.width - 1) << MS3_WIDTH_SHIFT;
205103b705cfSriastradh		map[tex_count * 2 + 1] =
205203b705cfSriastradh			(op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
205303b705cfSriastradh
205403b705cfSriastradh		sampler[tex_count * 2 + 0] = op->src.filter;
205503b705cfSriastradh		sampler[tex_count * 2 + 1] =
205603b705cfSriastradh			op->src.repeat |
205703b705cfSriastradh			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
205803b705cfSriastradh		bo[tex_count] = op->src.bo;
205903b705cfSriastradh		tex_count++;
206003b705cfSriastradh		break;
206103b705cfSriastradh	}
206203b705cfSriastradh	switch (op->mask.u.gen3.type) {
206303b705cfSriastradh	case SHADER_NONE:
206403b705cfSriastradh	case SHADER_ZERO:
206503b705cfSriastradh	case SHADER_BLACK:
206603b705cfSriastradh	case SHADER_WHITE:
206703b705cfSriastradh		break;
206803b705cfSriastradh	case SHADER_CONSTANT:
206903b705cfSriastradh		if (op->mask.u.gen3.mode != state->last_specular) {
207003b705cfSriastradh			OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
207103b705cfSriastradh			OUT_BATCH(op->mask.u.gen3.mode);
207203b705cfSriastradh			state->last_specular = op->mask.u.gen3.mode;
207303b705cfSriastradh		}
207403b705cfSriastradh		break;
207503b705cfSriastradh	case SHADER_LINEAR:
207603b705cfSriastradh	case SHADER_RADIAL:
207703b705cfSriastradh	case SHADER_TEXTURE:
207803b705cfSriastradh		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
207903b705cfSriastradh		ss2 |= S2_TEXCOORD_FMT(tex_count,
208003b705cfSriastradh				       op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
208103b705cfSriastradh		assert(op->mask.card_format);
208203b705cfSriastradh		map[tex_count * 2 + 0] =
208303b705cfSriastradh			op->mask.card_format |
208403b705cfSriastradh			gen3_ms_tiling(op->mask.bo->tiling) |
208503b705cfSriastradh			(op->mask.height - 1) << MS3_HEIGHT_SHIFT |
208603b705cfSriastradh			(op->mask.width - 1) << MS3_WIDTH_SHIFT;
208703b705cfSriastradh		map[tex_count * 2 + 1] =
208803b705cfSriastradh			(op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
208903b705cfSriastradh
209003b705cfSriastradh		sampler[tex_count * 2 + 0] = op->mask.filter;
209103b705cfSriastradh		sampler[tex_count * 2 + 1] =
209203b705cfSriastradh			op->mask.repeat |
209303b705cfSriastradh			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
209403b705cfSriastradh		bo[tex_count] = op->mask.bo;
209503b705cfSriastradh		tex_count++;
209603b705cfSriastradh		break;
209703b705cfSriastradh	case SHADER_OPACITY:
209803b705cfSriastradh		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
209903b705cfSriastradh		ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
210003b705cfSriastradh		break;
210103b705cfSriastradh	}
210203b705cfSriastradh
210303b705cfSriastradh	{
210403b705cfSriastradh		uint32_t blend_offset = sna->kgem.nbatch;
210503b705cfSriastradh
210603b705cfSriastradh		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
210703b705cfSriastradh		OUT_BATCH(ss2);
210803b705cfSriastradh		OUT_BATCH(gen3_get_blend_cntl(op->op,
210903b705cfSriastradh					      op->has_component_alpha,
211003b705cfSriastradh					      op->dst.format));
211103b705cfSriastradh
211203b705cfSriastradh		if (memcmp(sna->kgem.batch + state->last_blend + 1,
211303b705cfSriastradh			   sna->kgem.batch + blend_offset + 1,
211403b705cfSriastradh			   2 * 4) == 0)
211503b705cfSriastradh			sna->kgem.nbatch = blend_offset;
211603b705cfSriastradh		else
211703b705cfSriastradh			state->last_blend = blend_offset;
211803b705cfSriastradh	}
211903b705cfSriastradh
212003b705cfSriastradh	if (op->u.gen3.num_constants) {
212103b705cfSriastradh		int count = op->u.gen3.num_constants;
212203b705cfSriastradh		if (state->last_constants) {
212303b705cfSriastradh			int last = sna->kgem.batch[state->last_constants+1];
212403b705cfSriastradh			if (last == (1 << (count >> 2)) - 1 &&
212503b705cfSriastradh			    memcmp(&sna->kgem.batch[state->last_constants+2],
212603b705cfSriastradh				   op->u.gen3.constants,
212703b705cfSriastradh				   count * sizeof(uint32_t)) == 0)
212803b705cfSriastradh				count = 0;
212903b705cfSriastradh		}
213003b705cfSriastradh		if (count) {
213103b705cfSriastradh			state->last_constants = sna->kgem.nbatch;
213203b705cfSriastradh			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
213303b705cfSriastradh			OUT_BATCH((1 << (count >> 2)) - 1);
213403b705cfSriastradh
213503b705cfSriastradh			memcpy(sna->kgem.batch + sna->kgem.nbatch,
213603b705cfSriastradh			       op->u.gen3.constants,
213703b705cfSriastradh			       count * sizeof(uint32_t));
213803b705cfSriastradh			sna->kgem.nbatch += count;
213903b705cfSriastradh		}
214003b705cfSriastradh	}
214103b705cfSriastradh
214203b705cfSriastradh	if (tex_count != 0) {
214303b705cfSriastradh		uint32_t rewind;
214403b705cfSriastradh
214503b705cfSriastradh		n = 0;
214603b705cfSriastradh		if (tex_count == state->tex_count) {
214703b705cfSriastradh			for (; n < tex_count; n++) {
214803b705cfSriastradh				if (map[2*n+0] != state->tex_map[2*n+0] ||
214903b705cfSriastradh				    map[2*n+1] != state->tex_map[2*n+1] ||
215003b705cfSriastradh				    state->tex_handle[n] != bo[n]->handle ||
215103b705cfSriastradh				    state->tex_delta[n] != bo[n]->delta)
215203b705cfSriastradh					break;
215303b705cfSriastradh			}
215403b705cfSriastradh		}
215503b705cfSriastradh		if (n < tex_count) {
215603b705cfSriastradh			OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
215703b705cfSriastradh			OUT_BATCH((1 << tex_count) - 1);
215803b705cfSriastradh			for (n = 0; n < tex_count; n++) {
215903b705cfSriastradh				OUT_BATCH(kgem_add_reloc(&sna->kgem,
216003b705cfSriastradh							 sna->kgem.nbatch,
216103b705cfSriastradh							 bo[n],
216203b705cfSriastradh							 I915_GEM_DOMAIN_SAMPLER<< 16,
216303b705cfSriastradh							 0));
216403b705cfSriastradh				OUT_BATCH(map[2*n + 0]);
216503b705cfSriastradh				OUT_BATCH(map[2*n + 1]);
216603b705cfSriastradh
216703b705cfSriastradh				state->tex_map[2*n+0] = map[2*n+0];
216803b705cfSriastradh				state->tex_map[2*n+1] = map[2*n+1];
216903b705cfSriastradh				state->tex_handle[n] = bo[n]->handle;
217003b705cfSriastradh				state->tex_delta[n] = bo[n]->delta;
217103b705cfSriastradh			}
217203b705cfSriastradh			state->tex_count = n;
217303b705cfSriastradh		}
217403b705cfSriastradh
217503b705cfSriastradh		rewind = sna->kgem.nbatch;
217603b705cfSriastradh		OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
217703b705cfSriastradh		OUT_BATCH((1 << tex_count) - 1);
217803b705cfSriastradh		for (n = 0; n < tex_count; n++) {
217903b705cfSriastradh			OUT_BATCH(sampler[2*n + 0]);
218003b705cfSriastradh			OUT_BATCH(sampler[2*n + 1]);
218103b705cfSriastradh			OUT_BATCH(0);
218203b705cfSriastradh		}
218303b705cfSriastradh		if (state->last_sampler &&
218403b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_sampler+1],
218503b705cfSriastradh			   &sna->kgem.batch[rewind + 1],
218603b705cfSriastradh			   (3*tex_count + 1)*sizeof(uint32_t)) == 0)
218703b705cfSriastradh			sna->kgem.nbatch = rewind;
218803b705cfSriastradh		else
218903b705cfSriastradh			state->last_sampler = rewind;
219003b705cfSriastradh	}
219103b705cfSriastradh
219203b705cfSriastradh	gen3_composite_emit_shader(sna, op, op->op);
219303b705cfSriastradh}
219403b705cfSriastradh
219503b705cfSriastradhstatic bool gen3_magic_ca_pass(struct sna *sna,
219603b705cfSriastradh			       const struct sna_composite_op *op)
219703b705cfSriastradh{
219803b705cfSriastradh	if (!op->need_magic_ca_pass)
219903b705cfSriastradh		return false;
220003b705cfSriastradh
220103b705cfSriastradh	DBG(("%s(%d)\n", __FUNCTION__,
220203b705cfSriastradh	     sna->render.vertex_index - sna->render.vertex_start));
220303b705cfSriastradh
220403b705cfSriastradh	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
220503b705cfSriastradh	OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
220603b705cfSriastradh	gen3_composite_emit_shader(sna, op, PictOpAdd);
220703b705cfSriastradh
220803b705cfSriastradh	OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
220903b705cfSriastradh		  (sna->render.vertex_index - sna->render.vertex_start));
221003b705cfSriastradh	OUT_BATCH(sna->render.vertex_start);
221103b705cfSriastradh
221203b705cfSriastradh	sna->render_state.gen3.last_blend = 0;
221303b705cfSriastradh	return true;
221403b705cfSriastradh}
221503b705cfSriastradh
221603b705cfSriastradhstatic void gen3_vertex_flush(struct sna *sna)
221703b705cfSriastradh{
221803b705cfSriastradh	assert(sna->render.vertex_offset);
221903b705cfSriastradh
222003b705cfSriastradh	DBG(("%s[%x] = %d\n", __FUNCTION__,
222103b705cfSriastradh	     4*sna->render.vertex_offset,
222203b705cfSriastradh	     sna->render.vertex_index - sna->render.vertex_start));
222303b705cfSriastradh
222403b705cfSriastradh	sna->kgem.batch[sna->render.vertex_offset] =
222503b705cfSriastradh		PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
222603b705cfSriastradh		(sna->render.vertex_index - sna->render.vertex_start);
222703b705cfSriastradh	sna->kgem.batch[sna->render.vertex_offset + 1] =
222803b705cfSriastradh		sna->render.vertex_start;
222903b705cfSriastradh
223003b705cfSriastradh	sna->render.vertex_offset = 0;
223103b705cfSriastradh}
223203b705cfSriastradh
223303b705cfSriastradhstatic int gen3_vertex_finish(struct sna *sna)
223403b705cfSriastradh{
223503b705cfSriastradh	struct kgem_bo *bo;
223603b705cfSriastradh
223703b705cfSriastradh	DBG(("%s: used=%d/%d, vbo active? %d\n",
223803b705cfSriastradh	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
223903b705cfSriastradh	     sna->render.vbo ? sna->render.vbo->handle : 0));
224003b705cfSriastradh	assert(sna->render.vertex_offset == 0);
224103b705cfSriastradh	assert(sna->render.vertex_used);
224203b705cfSriastradh	assert(sna->render.vertex_used <= sna->render.vertex_size);
224303b705cfSriastradh
224403b705cfSriastradh	sna_vertex_wait__locked(&sna->render);
224503b705cfSriastradh
224603b705cfSriastradh	bo = sna->render.vbo;
224703b705cfSriastradh	if (bo) {
224803b705cfSriastradh		DBG(("%s: reloc = %d\n", __FUNCTION__,
224903b705cfSriastradh		     sna->render.vertex_reloc[0]));
225003b705cfSriastradh
225103b705cfSriastradh		if (sna->render.vertex_reloc[0]) {
225203b705cfSriastradh			sna->kgem.batch[sna->render.vertex_reloc[0]] =
225303b705cfSriastradh				kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
225403b705cfSriastradh					       bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
225503b705cfSriastradh
225603b705cfSriastradh			sna->render.vertex_reloc[0] = 0;
225703b705cfSriastradh		}
225803b705cfSriastradh		sna->render.vertex_used = 0;
225903b705cfSriastradh		sna->render.vertex_index = 0;
226003b705cfSriastradh		sna->render.vbo = NULL;
226103b705cfSriastradh
226203b705cfSriastradh		kgem_bo_destroy(&sna->kgem, bo);
226303b705cfSriastradh	}
226403b705cfSriastradh
226503b705cfSriastradh	sna->render.vertices = NULL;
226603b705cfSriastradh	sna->render.vbo = kgem_create_linear(&sna->kgem,
226703b705cfSriastradh					     256*1024, CREATE_GTT_MAP);
226803b705cfSriastradh	if (sna->render.vbo)
226903b705cfSriastradh		sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
227003b705cfSriastradh	if (sna->render.vertices == NULL) {
227103b705cfSriastradh		if (sna->render.vbo)
227203b705cfSriastradh			kgem_bo_destroy(&sna->kgem, sna->render.vbo);
227303b705cfSriastradh		sna->render.vbo = NULL;
227403b705cfSriastradh		return 0;
227503b705cfSriastradh	}
227603b705cfSriastradh	assert(sna->render.vbo->snoop == false);
227703b705cfSriastradh
227803b705cfSriastradh	if (sna->render.vertex_used) {
227903b705cfSriastradh		memcpy(sna->render.vertices,
228003b705cfSriastradh		       sna->render.vertex_data,
228103b705cfSriastradh		       sizeof(float)*sna->render.vertex_used);
228203b705cfSriastradh	}
228303b705cfSriastradh	sna->render.vertex_size = 64 * 1024 - 1;
228403b705cfSriastradh	return sna->render.vertex_size - sna->render.vertex_used;
228503b705cfSriastradh}
228603b705cfSriastradh
228703b705cfSriastradhstatic void gen3_vertex_close(struct sna *sna)
228803b705cfSriastradh{
228903b705cfSriastradh	struct kgem_bo *bo, *free_bo = NULL;
229003b705cfSriastradh	unsigned int delta = 0;
229103b705cfSriastradh
229203b705cfSriastradh	assert(sna->render.vertex_offset == 0);
229303b705cfSriastradh	if (sna->render.vertex_reloc[0] == 0)
229403b705cfSriastradh		return;
229503b705cfSriastradh
229603b705cfSriastradh	DBG(("%s: used=%d/%d, vbo active? %d\n",
229703b705cfSriastradh	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
229803b705cfSriastradh	     sna->render.vbo ? sna->render.vbo->handle : 0));
229903b705cfSriastradh
230003b705cfSriastradh	bo = sna->render.vbo;
230103b705cfSriastradh	if (bo) {
230203b705cfSriastradh		if (sna->render.vertex_size - sna->render.vertex_used < 64) {
230303b705cfSriastradh			DBG(("%s: discarding full vbo\n", __FUNCTION__));
230403b705cfSriastradh			sna->render.vbo = NULL;
230503b705cfSriastradh			sna->render.vertices = sna->render.vertex_data;
230603b705cfSriastradh			sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
230703b705cfSriastradh			free_bo = bo;
230842542f5fSchristos		} else if (sna->render.vertices == MAP(bo->map__cpu)) {
230903b705cfSriastradh			DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
231003b705cfSriastradh			sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
231103b705cfSriastradh			if (sna->render.vertices == NULL) {
231203b705cfSriastradh				DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
231303b705cfSriastradh				sna->render.vbo = NULL;
231403b705cfSriastradh				sna->render.vertices = sna->render.vertex_data;
231503b705cfSriastradh				sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
231603b705cfSriastradh				free_bo = bo;
231703b705cfSriastradh			}
231803b705cfSriastradh		}
231903b705cfSriastradh	} else {
232003b705cfSriastradh		if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
232103b705cfSriastradh			DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
232203b705cfSriastradh			     sna->render.vertex_used, sna->kgem.nbatch));
232303b705cfSriastradh			memcpy(sna->kgem.batch + sna->kgem.nbatch,
232403b705cfSriastradh			       sna->render.vertex_data,
232503b705cfSriastradh			       sna->render.vertex_used * 4);
232603b705cfSriastradh			delta = sna->kgem.nbatch * 4;
232703b705cfSriastradh			bo = NULL;
232803b705cfSriastradh			sna->kgem.nbatch += sna->render.vertex_used;
232903b705cfSriastradh		} else {
233003b705cfSriastradh			DBG(("%s: new vbo: %d\n", __FUNCTION__,
233103b705cfSriastradh			     sna->render.vertex_used));
233203b705cfSriastradh			bo = kgem_create_linear(&sna->kgem,
233303b705cfSriastradh						4*sna->render.vertex_used,
233403b705cfSriastradh						CREATE_NO_THROTTLE);
233503b705cfSriastradh			if (bo) {
233603b705cfSriastradh				assert(bo->snoop == false);
233703b705cfSriastradh				kgem_bo_write(&sna->kgem, bo,
233803b705cfSriastradh					      sna->render.vertex_data,
233903b705cfSriastradh					      4*sna->render.vertex_used);
234003b705cfSriastradh			}
234103b705cfSriastradh			free_bo = bo;
234203b705cfSriastradh		}
234303b705cfSriastradh	}
234403b705cfSriastradh
234503b705cfSriastradh	DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
234603b705cfSriastradh	sna->kgem.batch[sna->render.vertex_reloc[0]] =
234703b705cfSriastradh		kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
234803b705cfSriastradh			       bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
234903b705cfSriastradh	sna->render.vertex_reloc[0] = 0;
235003b705cfSriastradh
235103b705cfSriastradh	if (sna->render.vbo == NULL) {
235203b705cfSriastradh		DBG(("%s: resetting vbo\n", __FUNCTION__));
235303b705cfSriastradh		sna->render.vertex_used = 0;
235403b705cfSriastradh		sna->render.vertex_index = 0;
235503b705cfSriastradh		assert(sna->render.vertices == sna->render.vertex_data);
235603b705cfSriastradh		assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
235703b705cfSriastradh	}
235803b705cfSriastradh
235903b705cfSriastradh	if (free_bo)
236003b705cfSriastradh		kgem_bo_destroy(&sna->kgem, free_bo);
236103b705cfSriastradh}
236203b705cfSriastradh
236303b705cfSriastradhstatic bool gen3_rectangle_begin(struct sna *sna,
236403b705cfSriastradh				 const struct sna_composite_op *op)
236503b705cfSriastradh{
236603b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
236703b705cfSriastradh	int ndwords, i1_cmd = 0, i1_len = 0;
236803b705cfSriastradh
236903b705cfSriastradh	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
237003b705cfSriastradh		return true;
237103b705cfSriastradh
237203b705cfSriastradh	ndwords = 2;
237303b705cfSriastradh	if (op->need_magic_ca_pass)
237403b705cfSriastradh		ndwords += 100;
237503b705cfSriastradh	if (sna->render.vertex_reloc[0] == 0)
237603b705cfSriastradh		i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
237703b705cfSriastradh	if (state->floats_per_vertex != op->floats_per_vertex)
237803b705cfSriastradh		i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
237903b705cfSriastradh
238003b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, ndwords+1))
238103b705cfSriastradh		return false;
238203b705cfSriastradh
238303b705cfSriastradh	if (i1_cmd) {
238403b705cfSriastradh		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
238503b705cfSriastradh		if (sna->render.vertex_reloc[0] == 0)
238603b705cfSriastradh			sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
238703b705cfSriastradh		if (state->floats_per_vertex != op->floats_per_vertex) {
238803b705cfSriastradh			state->floats_per_vertex = op->floats_per_vertex;
238903b705cfSriastradh			OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
239003b705cfSriastradh				  state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
239103b705cfSriastradh		}
239203b705cfSriastradh	}
239303b705cfSriastradh
239403b705cfSriastradh	if (sna->kgem.nbatch == 2 + state->last_vertex_offset &&
239503b705cfSriastradh	    !op->need_magic_ca_pass) {
239603b705cfSriastradh		sna->render.vertex_offset = state->last_vertex_offset;
239703b705cfSriastradh	} else {
239803b705cfSriastradh		sna->render.vertex_offset = sna->kgem.nbatch;
239903b705cfSriastradh		OUT_BATCH(MI_NOOP); /* to be filled later */
240003b705cfSriastradh		OUT_BATCH(MI_NOOP);
240103b705cfSriastradh		sna->render.vertex_start = sna->render.vertex_index;
240203b705cfSriastradh		state->last_vertex_offset = sna->render.vertex_offset;
240303b705cfSriastradh	}
240403b705cfSriastradh
240503b705cfSriastradh	return true;
240603b705cfSriastradh}
240703b705cfSriastradh
240803b705cfSriastradhstatic int gen3_get_rectangles__flush(struct sna *sna,
240903b705cfSriastradh				      const struct sna_composite_op *op)
241003b705cfSriastradh{
241103b705cfSriastradh	/* Preventing discarding new vbo after lock contention */
241203b705cfSriastradh	if (sna_vertex_wait__locked(&sna->render)) {
241303b705cfSriastradh		int rem = vertex_space(sna);
241403b705cfSriastradh		if (rem > op->floats_per_rect)
241503b705cfSriastradh			return rem;
241603b705cfSriastradh	}
241703b705cfSriastradh
241803b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
241903b705cfSriastradh		return 0;
242003b705cfSriastradh	if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
242103b705cfSriastradh		return 0;
242203b705cfSriastradh
242303b705cfSriastradh	if (sna->render.vertex_offset) {
242403b705cfSriastradh		gen3_vertex_flush(sna);
242503b705cfSriastradh		if (gen3_magic_ca_pass(sna, op)) {
242603b705cfSriastradh			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
242703b705cfSriastradh			OUT_BATCH(gen3_get_blend_cntl(op->op,
242803b705cfSriastradh						      op->has_component_alpha,
242903b705cfSriastradh						      op->dst.format));
243003b705cfSriastradh			gen3_composite_emit_shader(sna, op, op->op);
243103b705cfSriastradh		}
243203b705cfSriastradh	}
243303b705cfSriastradh
243403b705cfSriastradh	return gen3_vertex_finish(sna);
243503b705cfSriastradh}
243603b705cfSriastradh
243703b705cfSriastradhinline static int gen3_get_rectangles(struct sna *sna,
243803b705cfSriastradh				      const struct sna_composite_op *op,
243903b705cfSriastradh				      int want)
244003b705cfSriastradh{
244103b705cfSriastradh	int rem;
244203b705cfSriastradh
244303b705cfSriastradh	DBG(("%s: want=%d, rem=%d\n",
244403b705cfSriastradh	     __FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
244503b705cfSriastradh
244603b705cfSriastradh	assert(want);
244703b705cfSriastradh	assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
244803b705cfSriastradh
244903b705cfSriastradhstart:
245003b705cfSriastradh	rem = vertex_space(sna);
245103b705cfSriastradh	if (unlikely(op->floats_per_rect > rem)) {
245203b705cfSriastradh		DBG(("flushing vbo for %s: %d < %d\n",
245303b705cfSriastradh		     __FUNCTION__, rem, op->floats_per_rect));
245403b705cfSriastradh		rem = gen3_get_rectangles__flush(sna, op);
245503b705cfSriastradh		if (unlikely(rem == 0))
245603b705cfSriastradh			goto flush;
245703b705cfSriastradh	}
245803b705cfSriastradh
245903b705cfSriastradh	if (unlikely(sna->render.vertex_offset == 0)) {
246003b705cfSriastradh		if (!gen3_rectangle_begin(sna, op))
246103b705cfSriastradh			goto flush;
246203b705cfSriastradh		else
246303b705cfSriastradh			goto start;
246403b705cfSriastradh	}
246503b705cfSriastradh
246603b705cfSriastradh	assert(rem <= vertex_space(sna));
246703b705cfSriastradh	assert(op->floats_per_rect <= rem);
246803b705cfSriastradh	if (want > 1 && want * op->floats_per_rect > rem)
246903b705cfSriastradh		want = rem / op->floats_per_rect;
247003b705cfSriastradh	sna->render.vertex_index += 3*want;
247103b705cfSriastradh
247203b705cfSriastradh	assert(want);
247303b705cfSriastradh	assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
247403b705cfSriastradh	return want;
247503b705cfSriastradh
247603b705cfSriastradhflush:
247703b705cfSriastradh	DBG(("%s: flushing batch\n", __FUNCTION__));
247803b705cfSriastradh	if (sna->render.vertex_offset) {
247903b705cfSriastradh		gen3_vertex_flush(sna);
248003b705cfSriastradh		gen3_magic_ca_pass(sna, op);
248103b705cfSriastradh	}
248203b705cfSriastradh	sna_vertex_wait__locked(&sna->render);
248303b705cfSriastradh	_kgem_submit(&sna->kgem);
248403b705cfSriastradh	gen3_emit_composite_state(sna, op);
248503b705cfSriastradh	assert(sna->render.vertex_offset == 0);
248603b705cfSriastradh	assert(sna->render.vertex_reloc[0] == 0);
248703b705cfSriastradh	goto start;
248803b705cfSriastradh}
248903b705cfSriastradh
249003b705cfSriastradhfastcall static void
249103b705cfSriastradhgen3_render_composite_blt(struct sna *sna,
249203b705cfSriastradh			  const struct sna_composite_op *op,
249303b705cfSriastradh			  const struct sna_composite_rectangles *r)
249403b705cfSriastradh{
249503b705cfSriastradh	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
249603b705cfSriastradh	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
249703b705cfSriastradh	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
249803b705cfSriastradh	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
249903b705cfSriastradh	     r->width, r->height));
250003b705cfSriastradh
250103b705cfSriastradh	gen3_get_rectangles(sna, op, 1);
250203b705cfSriastradh
250303b705cfSriastradh	op->prim_emit(sna, op, r);
250403b705cfSriastradh}
250503b705cfSriastradh
250603b705cfSriastradhfastcall static void
250703b705cfSriastradhgen3_render_composite_box(struct sna *sna,
250803b705cfSriastradh			  const struct sna_composite_op *op,
250903b705cfSriastradh			  const BoxRec *box)
251003b705cfSriastradh{
251103b705cfSriastradh	struct sna_composite_rectangles r;
251203b705cfSriastradh
251303b705cfSriastradh	DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
251403b705cfSriastradh	     __FUNCTION__,
251503b705cfSriastradh	     op->src.offset[0], op->src.offset[1],
251603b705cfSriastradh	     op->mask.offset[0], op->mask.offset[1],
251703b705cfSriastradh	     op->dst.x, op->dst.y));
251803b705cfSriastradh
251903b705cfSriastradh	gen3_get_rectangles(sna, op, 1);
252003b705cfSriastradh
252103b705cfSriastradh	r.dst.x  = box->x1;
252203b705cfSriastradh	r.dst.y  = box->y1;
252303b705cfSriastradh	r.width  = box->x2 - box->x1;
252403b705cfSriastradh	r.height = box->y2 - box->y1;
252503b705cfSriastradh	r.src = r.mask = r.dst;
252603b705cfSriastradh
252703b705cfSriastradh	op->prim_emit(sna, op, &r);
252803b705cfSriastradh}
252903b705cfSriastradh
253003b705cfSriastradhstatic void
253103b705cfSriastradhgen3_render_composite_boxes__blt(struct sna *sna,
253203b705cfSriastradh				 const struct sna_composite_op *op,
253303b705cfSriastradh				 const BoxRec *box, int nbox)
253403b705cfSriastradh{
253503b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
253603b705cfSriastradh	     __FUNCTION__, nbox,
253703b705cfSriastradh	     op->src.offset[0], op->src.offset[1],
253803b705cfSriastradh	     op->mask.offset[0], op->mask.offset[1],
253903b705cfSriastradh	     op->dst.x, op->dst.y));
254003b705cfSriastradh
254103b705cfSriastradh	do {
254203b705cfSriastradh		int nbox_this_time;
254303b705cfSriastradh
254403b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
254503b705cfSriastradh		nbox -= nbox_this_time;
254603b705cfSriastradh
254703b705cfSriastradh		do {
254803b705cfSriastradh			struct sna_composite_rectangles r;
254903b705cfSriastradh
255003b705cfSriastradh			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
255103b705cfSriastradh			     box->x1, box->y1,
255203b705cfSriastradh			     box->x2 - box->x1,
255303b705cfSriastradh			     box->y2 - box->y1));
255403b705cfSriastradh
255503b705cfSriastradh			r.dst.x  = box->x1; r.dst.y  = box->y1;
255603b705cfSriastradh			r.width = box->x2 - box->x1;
255703b705cfSriastradh			r.height = box->y2 - box->y1;
255803b705cfSriastradh			r.src = r.mask = r.dst;
255903b705cfSriastradh
256003b705cfSriastradh			op->prim_emit(sna, op, &r);
256103b705cfSriastradh			box++;
256203b705cfSriastradh		} while (--nbox_this_time);
256303b705cfSriastradh	} while (nbox);
256403b705cfSriastradh}
256503b705cfSriastradh
256603b705cfSriastradhstatic void
256703b705cfSriastradhgen3_render_composite_boxes(struct sna *sna,
256803b705cfSriastradh			    const struct sna_composite_op *op,
256903b705cfSriastradh			    const BoxRec *box, int nbox)
257003b705cfSriastradh{
257103b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
257203b705cfSriastradh
257303b705cfSriastradh	do {
257403b705cfSriastradh		int nbox_this_time;
257503b705cfSriastradh		float *v;
257603b705cfSriastradh
257703b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
257803b705cfSriastradh		assert(nbox_this_time);
257903b705cfSriastradh		nbox -= nbox_this_time;
258003b705cfSriastradh
258103b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
258203b705cfSriastradh		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
258303b705cfSriastradh
258403b705cfSriastradh		op->emit_boxes(op, box, nbox_this_time, v);
258503b705cfSriastradh		box += nbox_this_time;
258603b705cfSriastradh	} while (nbox);
258703b705cfSriastradh}
258803b705cfSriastradh
258903b705cfSriastradhstatic void
259003b705cfSriastradhgen3_render_composite_boxes__thread(struct sna *sna,
259103b705cfSriastradh				    const struct sna_composite_op *op,
259203b705cfSriastradh				    const BoxRec *box, int nbox)
259303b705cfSriastradh{
259403b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
259503b705cfSriastradh
259603b705cfSriastradh	sna_vertex_lock(&sna->render);
259703b705cfSriastradh	do {
259803b705cfSriastradh		int nbox_this_time;
259903b705cfSriastradh		float *v;
260003b705cfSriastradh
260103b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
260203b705cfSriastradh		assert(nbox_this_time);
260303b705cfSriastradh		nbox -= nbox_this_time;
260403b705cfSriastradh
260503b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
260603b705cfSriastradh		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
260703b705cfSriastradh
260803b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
260903b705cfSriastradh		sna_vertex_unlock(&sna->render);
261003b705cfSriastradh
261103b705cfSriastradh		op->emit_boxes(op, box, nbox_this_time, v);
261203b705cfSriastradh		box += nbox_this_time;
261303b705cfSriastradh
261403b705cfSriastradh		sna_vertex_lock(&sna->render);
261503b705cfSriastradh		sna_vertex_release__locked(&sna->render);
261603b705cfSriastradh	} while (nbox);
261703b705cfSriastradh	sna_vertex_unlock(&sna->render);
261803b705cfSriastradh}
261903b705cfSriastradh
262003b705cfSriastradhstatic void
262103b705cfSriastradhgen3_render_composite_done(struct sna *sna,
262203b705cfSriastradh			   const struct sna_composite_op *op)
262303b705cfSriastradh{
262403b705cfSriastradh	DBG(("%s()\n", __FUNCTION__));
262503b705cfSriastradh
262603b705cfSriastradh	if (sna->render.vertex_offset) {
262703b705cfSriastradh		gen3_vertex_flush(sna);
262803b705cfSriastradh		gen3_magic_ca_pass(sna, op);
262903b705cfSriastradh	}
263003b705cfSriastradh
263103b705cfSriastradh	if (op->mask.bo)
263203b705cfSriastradh		kgem_bo_destroy(&sna->kgem, op->mask.bo);
263303b705cfSriastradh	if (op->src.bo)
263403b705cfSriastradh		kgem_bo_destroy(&sna->kgem, op->src.bo);
263503b705cfSriastradh
263603b705cfSriastradh	sna_render_composite_redirect_done(sna, op);
263703b705cfSriastradh}
263803b705cfSriastradh
263903b705cfSriastradhstatic void
264003b705cfSriastradhdiscard_vbo(struct sna *sna)
264103b705cfSriastradh{
264203b705cfSriastradh	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
264303b705cfSriastradh	sna->render.vbo = NULL;
264403b705cfSriastradh	sna->render.vertices = sna->render.vertex_data;
264503b705cfSriastradh	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
264603b705cfSriastradh	sna->render.vertex_used = 0;
264703b705cfSriastradh	sna->render.vertex_index = 0;
264803b705cfSriastradh}
264903b705cfSriastradh
265003b705cfSriastradhstatic void
265103b705cfSriastradhgen3_render_reset(struct sna *sna)
265203b705cfSriastradh{
265303b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
265403b705cfSriastradh
265503b705cfSriastradh	state->need_invariant = true;
265603b705cfSriastradh	state->current_dst = 0;
265703b705cfSriastradh	state->tex_count = 0;
265803b705cfSriastradh	state->last_drawrect_limit = ~0U;
265903b705cfSriastradh	state->last_target = 0;
266003b705cfSriastradh	state->last_blend = 0;
266103b705cfSriastradh	state->last_constants = 0;
266203b705cfSriastradh	state->last_sampler = 0;
266303b705cfSriastradh	state->last_shader = 0x7fffffff;
266403b705cfSriastradh	state->last_diffuse = 0xcc00ffee;
266503b705cfSriastradh	state->last_specular = 0xcc00ffee;
266603b705cfSriastradh
266703b705cfSriastradh	state->floats_per_vertex = 0;
266803b705cfSriastradh	state->last_floats_per_vertex = 0;
266903b705cfSriastradh	state->last_vertex_offset = 0;
267003b705cfSriastradh
267142542f5fSchristos	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
267242542f5fSchristos		DBG(("%s: discarding vbo as next access will stall: %lx\n",
267342542f5fSchristos		     __FUNCTION__, (long)sna->render.vbo->presumed_offset));
267403b705cfSriastradh		discard_vbo(sna);
267503b705cfSriastradh	}
267603b705cfSriastradh
267703b705cfSriastradh	sna->render.vertex_reloc[0] = 0;
267803b705cfSriastradh	sna->render.vertex_offset = 0;
267903b705cfSriastradh}
268003b705cfSriastradh
268103b705cfSriastradhstatic void
268203b705cfSriastradhgen3_render_retire(struct kgem *kgem)
268303b705cfSriastradh{
268403b705cfSriastradh	struct sna *sna;
268503b705cfSriastradh
268603b705cfSriastradh	sna = container_of(kgem, struct sna, kgem);
268703b705cfSriastradh	if (sna->render.vertex_reloc[0] == 0 &&
268803b705cfSriastradh	    sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
268903b705cfSriastradh		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
269003b705cfSriastradh		sna->render.vertex_used = 0;
269103b705cfSriastradh		sna->render.vertex_index = 0;
269203b705cfSriastradh	}
269303b705cfSriastradh}
269403b705cfSriastradh
269503b705cfSriastradhstatic void
269603b705cfSriastradhgen3_render_expire(struct kgem *kgem)
269703b705cfSriastradh{
269803b705cfSriastradh	struct sna *sna;
269903b705cfSriastradh
270003b705cfSriastradh	sna = container_of(kgem, struct sna, kgem);
270103b705cfSriastradh	if (sna->render.vbo && !sna->render.vertex_used) {
270203b705cfSriastradh		DBG(("%s: discarding vbo\n", __FUNCTION__));
270303b705cfSriastradh		discard_vbo(sna);
270403b705cfSriastradh	}
270503b705cfSriastradh}
270603b705cfSriastradh
270703b705cfSriastradhstatic bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
270803b705cfSriastradh					      CARD32 format)
270903b705cfSriastradh{
271003b705cfSriastradh	unsigned int i;
271103b705cfSriastradh
271203b705cfSriastradh	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
271303b705cfSriastradh		if (gen3_tex_formats[i].fmt == format) {
271403b705cfSriastradh			channel->card_format = gen3_tex_formats[i].card_fmt;
271503b705cfSriastradh			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
271603b705cfSriastradh			return true;
271703b705cfSriastradh		}
271803b705cfSriastradh	}
271903b705cfSriastradh	return false;
272003b705cfSriastradh}
272103b705cfSriastradh
272203b705cfSriastradhstatic bool source_is_covered(PicturePtr picture,
272303b705cfSriastradh			      int x, int y,
272403b705cfSriastradh			      int width, int height)
272503b705cfSriastradh{
272603b705cfSriastradh	int x1, y1, x2, y2;
272703b705cfSriastradh
272803b705cfSriastradh	if (picture->repeat && picture->repeatType != RepeatNone)
272903b705cfSriastradh		return true;
273003b705cfSriastradh
273103b705cfSriastradh	if (picture->pDrawable == NULL)
273203b705cfSriastradh		return false;
273303b705cfSriastradh
273403b705cfSriastradh	if (picture->transform) {
273503b705cfSriastradh		pixman_box16_t sample;
273603b705cfSriastradh
273703b705cfSriastradh		sample.x1 = x;
273803b705cfSriastradh		sample.y1 = y;
273903b705cfSriastradh		sample.x2 = x + width;
274003b705cfSriastradh		sample.y2 = y + height;
274103b705cfSriastradh
274203b705cfSriastradh		pixman_transform_bounds(picture->transform, &sample);
274303b705cfSriastradh
274403b705cfSriastradh		x1 = sample.x1;
274503b705cfSriastradh		x2 = sample.x2;
274603b705cfSriastradh		y1 = sample.y1;
274703b705cfSriastradh		y2 = sample.y2;
274803b705cfSriastradh	} else {
274903b705cfSriastradh		x1 = x;
275003b705cfSriastradh		y1 = y;
275103b705cfSriastradh		x2 = x + width;
275203b705cfSriastradh		y2 = y + height;
275303b705cfSriastradh	}
275403b705cfSriastradh
275503b705cfSriastradh	return
275603b705cfSriastradh		x1 >= 0 && y1 >= 0 &&
275703b705cfSriastradh		x2 <= picture->pDrawable->width &&
275803b705cfSriastradh		y2 <= picture->pDrawable->height;
275903b705cfSriastradh}
276003b705cfSriastradh
276103b705cfSriastradhstatic bool gen3_composite_channel_set_xformat(PicturePtr picture,
276203b705cfSriastradh					       struct sna_composite_channel *channel,
276303b705cfSriastradh					       int x, int y,
276403b705cfSriastradh					       int width, int height)
276503b705cfSriastradh{
276603b705cfSriastradh	unsigned int i;
276703b705cfSriastradh
276803b705cfSriastradh	if (PICT_FORMAT_A(picture->format) != 0)
276903b705cfSriastradh		return false;
277003b705cfSriastradh
277103b705cfSriastradh	if (width == 0 || height == 0)
277203b705cfSriastradh		return false;
277303b705cfSriastradh
277403b705cfSriastradh	if (!source_is_covered(picture, x, y, width, height))
277503b705cfSriastradh		return false;
277603b705cfSriastradh
277703b705cfSriastradh	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
277803b705cfSriastradh		if (gen3_tex_formats[i].xfmt == picture->format) {
277903b705cfSriastradh			channel->card_format = gen3_tex_formats[i].card_fmt;
278003b705cfSriastradh			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
278103b705cfSriastradh			channel->alpha_fixup = true;
278203b705cfSriastradh			return true;
278303b705cfSriastradh		}
278403b705cfSriastradh	}
278503b705cfSriastradh
278603b705cfSriastradh	return false;
278703b705cfSriastradh}
278803b705cfSriastradh
278903b705cfSriastradhstatic int
279003b705cfSriastradhgen3_init_solid(struct sna_composite_channel *channel, uint32_t color)
279103b705cfSriastradh{
279203b705cfSriastradh	channel->u.gen3.mode = color;
279303b705cfSriastradh	channel->u.gen3.type = SHADER_CONSTANT;
279403b705cfSriastradh	if (color == 0)
279503b705cfSriastradh		channel->u.gen3.type = SHADER_ZERO;
279603b705cfSriastradh	else if (color == 0xff000000)
279703b705cfSriastradh		channel->u.gen3.type = SHADER_BLACK;
279803b705cfSriastradh	else if (color == 0xffffffff)
279903b705cfSriastradh		channel->u.gen3.type = SHADER_WHITE;
280003b705cfSriastradh
280103b705cfSriastradh	channel->bo = NULL;
280203b705cfSriastradh	channel->is_opaque = (color >> 24) == 0xff;
280303b705cfSriastradh	channel->is_affine = 1;
280403b705cfSriastradh	channel->alpha_fixup = 0;
280503b705cfSriastradh	channel->rb_reversed = 0;
280603b705cfSriastradh
280703b705cfSriastradh	DBG(("%s: color=%08x, is_opaque=%d, type=%d\n",
280803b705cfSriastradh	     __FUNCTION__, color, channel->is_opaque, channel->u.gen3.type));
280903b705cfSriastradh
281003b705cfSriastradh	/* for consistency */
281103b705cfSriastradh	channel->repeat = RepeatNormal;
281203b705cfSriastradh	channel->filter = PictFilterNearest;
281303b705cfSriastradh	channel->pict_format = PICT_a8r8g8b8;
281403b705cfSriastradh	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
281503b705cfSriastradh
281603b705cfSriastradh	return 1;
281703b705cfSriastradh}
281803b705cfSriastradh
281903b705cfSriastradhstatic void gen3_composite_channel_convert(struct sna_composite_channel *channel)
282003b705cfSriastradh{
282103b705cfSriastradh	if (channel->u.gen3.type == SHADER_TEXTURE)
282203b705cfSriastradh		channel->repeat = gen3_texture_repeat(channel->repeat);
282303b705cfSriastradh	else
282403b705cfSriastradh		channel->repeat = gen3_gradient_repeat(channel->repeat);
282503b705cfSriastradh
282603b705cfSriastradh	channel->filter = gen3_filter(channel->filter);
282703b705cfSriastradh	if (channel->card_format == 0)
282803b705cfSriastradh		gen3_composite_channel_set_format(channel, channel->pict_format);
282903b705cfSriastradh	assert(channel->card_format);
283003b705cfSriastradh}
283103b705cfSriastradh
283203b705cfSriastradhstatic bool gen3_gradient_setup(struct sna *sna,
283303b705cfSriastradh				PicturePtr picture,
283403b705cfSriastradh				struct sna_composite_channel *channel,
283503b705cfSriastradh				int16_t ox, int16_t oy)
283603b705cfSriastradh{
283703b705cfSriastradh	int16_t dx, dy;
283803b705cfSriastradh
283903b705cfSriastradh	if (picture->repeat == 0) {
284003b705cfSriastradh		channel->repeat = RepeatNone;
284103b705cfSriastradh	} else switch (picture->repeatType) {
284203b705cfSriastradh	case RepeatNone:
284303b705cfSriastradh	case RepeatNormal:
284403b705cfSriastradh	case RepeatPad:
284503b705cfSriastradh	case RepeatReflect:
284603b705cfSriastradh		channel->repeat = picture->repeatType;
284703b705cfSriastradh		break;
284803b705cfSriastradh	default:
284903b705cfSriastradh		return false;
285003b705cfSriastradh	}
285103b705cfSriastradh
285203b705cfSriastradh	channel->bo =
285303b705cfSriastradh		sna_render_get_gradient(sna,
285403b705cfSriastradh					(PictGradient *)picture->pSourcePict);
285503b705cfSriastradh	if (channel->bo == NULL)
285603b705cfSriastradh		return false;
285703b705cfSriastradh
285803b705cfSriastradh	channel->pict_format = PICT_a8r8g8b8;
285903b705cfSriastradh	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
286003b705cfSriastradh	channel->filter = PictFilterNearest;
286103b705cfSriastradh	channel->is_affine = sna_transform_is_affine(picture->transform);
286242542f5fSchristos	if (sna_transform_is_imprecise_integer_translation(picture->transform, PictFilterNearest, false, &dx, &dy)) {
286303b705cfSriastradh		DBG(("%s: integer translation (%d, %d), removing\n",
286403b705cfSriastradh		     __FUNCTION__, dx, dy));
286503b705cfSriastradh		ox += dx;
286603b705cfSriastradh		oy += dy;
286703b705cfSriastradh		channel->transform = NULL;
286803b705cfSriastradh	} else
286903b705cfSriastradh		channel->transform = picture->transform;
287003b705cfSriastradh	channel->width  = channel->bo->pitch / 4;
287103b705cfSriastradh	channel->height = 1;
287203b705cfSriastradh	channel->offset[0] = ox;
287303b705cfSriastradh	channel->offset[1] = oy;
287403b705cfSriastradh	channel->scale[0] = channel->scale[1] = 1;
287503b705cfSriastradh	return true;
287603b705cfSriastradh}
287703b705cfSriastradh
287803b705cfSriastradhstatic int
287903b705cfSriastradhgen3_init_linear(struct sna *sna,
288003b705cfSriastradh		 PicturePtr picture,
288103b705cfSriastradh		 struct sna_composite_op *op,
288203b705cfSriastradh		 struct sna_composite_channel *channel,
288303b705cfSriastradh		 int ox, int oy)
288403b705cfSriastradh{
288503b705cfSriastradh	PictLinearGradient *linear =
288603b705cfSriastradh		(PictLinearGradient *)picture->pSourcePict;
288703b705cfSriastradh	float x0, y0, sf;
288803b705cfSriastradh	float dx, dy, offset;
288903b705cfSriastradh	int n;
289003b705cfSriastradh
289103b705cfSriastradh	DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
289203b705cfSriastradh	     __FUNCTION__,
289303b705cfSriastradh	     xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
289403b705cfSriastradh	     xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
289503b705cfSriastradh
289603b705cfSriastradh	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
289703b705cfSriastradh		return 0;
289803b705cfSriastradh
289903b705cfSriastradh	dx = xFixedToDouble(linear->p2.x - linear->p1.x);
290003b705cfSriastradh	dy = xFixedToDouble(linear->p2.y - linear->p1.y);
290103b705cfSriastradh	sf = dx*dx + dy*dy;
290203b705cfSriastradh	dx /= sf;
290303b705cfSriastradh	dy /= sf;
290403b705cfSriastradh
290503b705cfSriastradh	x0 = xFixedToDouble(linear->p1.x);
290603b705cfSriastradh	y0 = xFixedToDouble(linear->p1.y);
290703b705cfSriastradh	offset = dx*x0 + dy*y0;
290803b705cfSriastradh
290903b705cfSriastradh	n = op->u.gen3.num_constants;
291003b705cfSriastradh	channel->u.gen3.constants = FS_C0 + n / 4;
291103b705cfSriastradh	op->u.gen3.constants[n++] = dx;
291203b705cfSriastradh	op->u.gen3.constants[n++] = dy;
291303b705cfSriastradh	op->u.gen3.constants[n++] = -offset;
291403b705cfSriastradh	op->u.gen3.constants[n++] = 0;
291503b705cfSriastradh
291603b705cfSriastradh	if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
291703b705cfSriastradh		return -1;
291803b705cfSriastradh
291903b705cfSriastradh	channel->u.gen3.type = SHADER_LINEAR;
292003b705cfSriastradh	op->u.gen3.num_constants = n;
292103b705cfSriastradh
292203b705cfSriastradh	DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n",
292303b705cfSriastradh	     __FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0));
292403b705cfSriastradh	return 1;
292503b705cfSriastradh}
292603b705cfSriastradh
292703b705cfSriastradhstatic int
292803b705cfSriastradhgen3_init_radial(struct sna *sna,
292903b705cfSriastradh		 PicturePtr picture,
293003b705cfSriastradh		 struct sna_composite_op *op,
293103b705cfSriastradh		 struct sna_composite_channel *channel,
293203b705cfSriastradh		 int ox, int oy)
293303b705cfSriastradh{
293403b705cfSriastradh	PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict;
293503b705cfSriastradh	double dx, dy, dr, r1;
293603b705cfSriastradh	int n;
293703b705cfSriastradh
293803b705cfSriastradh	dx = xFixedToDouble(radial->c2.x - radial->c1.x);
293903b705cfSriastradh	dy = xFixedToDouble(radial->c2.y - radial->c1.y);
294003b705cfSriastradh	dr = xFixedToDouble(radial->c2.radius - radial->c1.radius);
294103b705cfSriastradh
294203b705cfSriastradh	r1 = xFixedToDouble(radial->c1.radius);
294303b705cfSriastradh
294403b705cfSriastradh	n = op->u.gen3.num_constants;
294503b705cfSriastradh	channel->u.gen3.constants = FS_C0 + n / 4;
294603b705cfSriastradh	if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
294703b705cfSriastradh		if (radial->c2.radius == radial->c1.radius) {
294803b705cfSriastradh			channel->u.gen3.type = SHADER_ZERO;
294903b705cfSriastradh			return 1;
295003b705cfSriastradh		}
295103b705cfSriastradh
295203b705cfSriastradh		op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr;
295303b705cfSriastradh		op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr;
295403b705cfSriastradh		op->u.gen3.constants[n++] = 1. / dr;
295503b705cfSriastradh		op->u.gen3.constants[n++] = -r1 / dr;
295603b705cfSriastradh
295703b705cfSriastradh		channel->u.gen3.mode = RADIAL_ONE;
295803b705cfSriastradh	} else {
295903b705cfSriastradh		op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x);
296003b705cfSriastradh		op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y);
296103b705cfSriastradh		op->u.gen3.constants[n++] = r1;
296203b705cfSriastradh		op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr);
296303b705cfSriastradh
296403b705cfSriastradh		op->u.gen3.constants[n++] = -2 * dx;
296503b705cfSriastradh		op->u.gen3.constants[n++] = -2 * dy;
296603b705cfSriastradh		op->u.gen3.constants[n++] = -2 * r1 * dr;
296703b705cfSriastradh		op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
296803b705cfSriastradh
296903b705cfSriastradh		channel->u.gen3.mode = RADIAL_TWO;
297003b705cfSriastradh	}
297103b705cfSriastradh
297203b705cfSriastradh	if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
297303b705cfSriastradh		return -1;
297403b705cfSriastradh
297503b705cfSriastradh	channel->u.gen3.type = SHADER_RADIAL;
297603b705cfSriastradh	op->u.gen3.num_constants = n;
297703b705cfSriastradh	return 1;
297803b705cfSriastradh}
297903b705cfSriastradh
298003b705cfSriastradhstatic bool
298103b705cfSriastradhsna_picture_is_clear(PicturePtr picture,
298203b705cfSriastradh		     int x, int y, int w, int h,
298303b705cfSriastradh		     uint32_t *color)
298403b705cfSriastradh{
298503b705cfSriastradh	struct sna_pixmap *priv;
298603b705cfSriastradh
298703b705cfSriastradh	if (!picture->pDrawable)
298803b705cfSriastradh		return false;
298903b705cfSriastradh
299003b705cfSriastradh	priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable));
299103b705cfSriastradh	if (priv == NULL || !priv->clear)
299203b705cfSriastradh		return false;
299303b705cfSriastradh
299403b705cfSriastradh	if (!source_is_covered(picture, x, y, w, h))
299503b705cfSriastradh		return false;
299603b705cfSriastradh
299703b705cfSriastradh	*color = priv->clear_color;
299803b705cfSriastradh	return true;
299903b705cfSriastradh}
300003b705cfSriastradh
300103b705cfSriastradhstatic int
300203b705cfSriastradhgen3_composite_picture(struct sna *sna,
300303b705cfSriastradh		       PicturePtr picture,
300403b705cfSriastradh		       struct sna_composite_op *op,
300503b705cfSriastradh		       struct sna_composite_channel *channel,
300603b705cfSriastradh		       int16_t x, int16_t y,
300703b705cfSriastradh		       int16_t w, int16_t h,
300803b705cfSriastradh		       int16_t dst_x, int16_t dst_y,
300903b705cfSriastradh		       bool precise)
301003b705cfSriastradh{
301103b705cfSriastradh	PixmapPtr pixmap;
301203b705cfSriastradh	uint32_t color;
301303b705cfSriastradh	int16_t dx, dy;
301403b705cfSriastradh
301503b705cfSriastradh	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
301603b705cfSriastradh	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
301703b705cfSriastradh
301803b705cfSriastradh	channel->card_format = 0;
301903b705cfSriastradh
302003b705cfSriastradh	if (picture->pDrawable == NULL) {
302103b705cfSriastradh		SourcePict *source = picture->pSourcePict;
302203b705cfSriastradh		int ret = -1;
302303b705cfSriastradh
302403b705cfSriastradh		switch (source->type) {
302503b705cfSriastradh		case SourcePictTypeSolidFill:
302603b705cfSriastradh			DBG(("%s: solid fill [%08x], format %08x\n",
302703b705cfSriastradh			     __FUNCTION__,
302803b705cfSriastradh			     (unsigned)source->solidFill.color,
302903b705cfSriastradh			     (unsigned)picture->format));
303003b705cfSriastradh			ret = gen3_init_solid(channel, source->solidFill.color);
303103b705cfSriastradh			break;
303203b705cfSriastradh
303303b705cfSriastradh		case SourcePictTypeLinear:
303403b705cfSriastradh			ret = gen3_init_linear(sna, picture, op, channel,
303503b705cfSriastradh					       x - dst_x, y - dst_y);
303603b705cfSriastradh			break;
303703b705cfSriastradh
303803b705cfSriastradh		case SourcePictTypeRadial:
303903b705cfSriastradh			ret = gen3_init_radial(sna, picture, op, channel,
304003b705cfSriastradh					       x - dst_x, y - dst_y);
304103b705cfSriastradh			break;
304203b705cfSriastradh		}
304303b705cfSriastradh
304403b705cfSriastradh		if (ret == -1) {
304503b705cfSriastradh			if (!precise)
304603b705cfSriastradh				ret = sna_render_picture_approximate_gradient(sna, picture, channel,
304703b705cfSriastradh									      x, y, w, h, dst_x, dst_y);
304803b705cfSriastradh			if (ret == -1)
304903b705cfSriastradh				ret = sna_render_picture_fixup(sna, picture, channel,
305003b705cfSriastradh							       x, y, w, h, dst_x, dst_y);
305103b705cfSriastradh		}
305203b705cfSriastradh		return ret;
305303b705cfSriastradh	}
305403b705cfSriastradh
305503b705cfSriastradh	if (picture->alphaMap) {
305603b705cfSriastradh		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
305703b705cfSriastradh		return sna_render_picture_fixup(sna, picture, channel,
305803b705cfSriastradh						x, y, w, h, dst_x, dst_y);
305903b705cfSriastradh	}
306003b705cfSriastradh
306103b705cfSriastradh	if (sna_picture_is_solid(picture, &color)) {
306203b705cfSriastradh		DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
306303b705cfSriastradh		return gen3_init_solid(channel, color);
306403b705cfSriastradh	}
306503b705cfSriastradh
306603b705cfSriastradh	if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
306703b705cfSriastradh		DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
306803b705cfSriastradh		return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
306903b705cfSriastradh	}
307003b705cfSriastradh
307103b705cfSriastradh	if (!gen3_check_repeat(picture))
307203b705cfSriastradh		return sna_render_picture_fixup(sna, picture, channel,
307303b705cfSriastradh						x, y, w, h, dst_x, dst_y);
307403b705cfSriastradh
307503b705cfSriastradh	if (!gen3_check_filter(picture))
307603b705cfSriastradh		return sna_render_picture_fixup(sna, picture, channel,
307703b705cfSriastradh						x, y, w, h, dst_x, dst_y);
307803b705cfSriastradh
307903b705cfSriastradh	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
308003b705cfSriastradh	channel->filter = picture->filter;
308103b705cfSriastradh	channel->pict_format = picture->format;
308203b705cfSriastradh
308303b705cfSriastradh	pixmap = get_drawable_pixmap(picture->pDrawable);
308403b705cfSriastradh	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
308503b705cfSriastradh
308603b705cfSriastradh	x += dx + picture->pDrawable->x;
308703b705cfSriastradh	y += dy + picture->pDrawable->y;
308803b705cfSriastradh
308942542f5fSchristos	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
309003b705cfSriastradh		DBG(("%s: integer translation (%d, %d), removing\n",
309103b705cfSriastradh		     __FUNCTION__, dx, dy));
309203b705cfSriastradh		x += dx;
309303b705cfSriastradh		y += dy;
309403b705cfSriastradh		channel->transform = NULL;
309503b705cfSriastradh		channel->filter = PictFilterNearest;
309642542f5fSchristos
309742542f5fSchristos		if (channel->repeat ||
309842542f5fSchristos		    (x >= 0 &&
309942542f5fSchristos		     y >= 0 &&
310042542f5fSchristos		     x + w < pixmap->drawable.width &&
310142542f5fSchristos		     y + h < pixmap->drawable.height)) {
310242542f5fSchristos			struct sna_pixmap *priv = sna_pixmap(pixmap);
310342542f5fSchristos			if (priv && priv->clear) {
310442542f5fSchristos				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
310542542f5fSchristos				return gen3_init_solid(channel, priv->clear_color);
310642542f5fSchristos			}
310742542f5fSchristos		}
310803b705cfSriastradh	} else {
310903b705cfSriastradh		channel->transform = picture->transform;
311003b705cfSriastradh		channel->is_affine = sna_transform_is_affine(picture->transform);
311103b705cfSriastradh	}
311203b705cfSriastradh
311303b705cfSriastradh	if (!gen3_composite_channel_set_format(channel, picture->format) &&
311403b705cfSriastradh	    !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
311503b705cfSriastradh		return sna_render_picture_convert(sna, picture, channel, pixmap,
311603b705cfSriastradh						  x, y, w, h, dst_x, dst_y,
311703b705cfSriastradh						  false);
311803b705cfSriastradh	assert(channel->card_format);
311903b705cfSriastradh
312003b705cfSriastradh	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
312103b705cfSriastradh		DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
312203b705cfSriastradh		     __FUNCTION__,
312303b705cfSriastradh		     pixmap->drawable.width, pixmap->drawable.height,
312403b705cfSriastradh		     x, y, w, h));
312503b705cfSriastradh		return sna_render_picture_extract(sna, picture, channel,
312603b705cfSriastradh						  x, y, w, h, dst_x, dst_y);
312703b705cfSriastradh	}
312803b705cfSriastradh
312903b705cfSriastradh	return sna_render_pixmap_bo(sna, channel, pixmap,
313003b705cfSriastradh				    x, y, w, h, dst_x, dst_y);
313103b705cfSriastradh}
313203b705cfSriastradh
313342542f5fSchristosstatic void
313442542f5fSchristosgen3_align_vertex(struct sna *sna,
313542542f5fSchristos		  const struct sna_composite_op *op)
313603b705cfSriastradh{
313742542f5fSchristos	int vertex_index;
313803b705cfSriastradh
313942542f5fSchristos	if (op->floats_per_vertex == sna->render_state.gen3.last_floats_per_vertex)
314042542f5fSchristos		return;
314103b705cfSriastradh
314242542f5fSchristos	DBG(("aligning vertex: was %d, now %d floats per vertex\n",
314342542f5fSchristos	     sna->render_state.gen3.last_floats_per_vertex,
314442542f5fSchristos	     op->floats_per_vertex));
314503b705cfSriastradh
314642542f5fSchristos	assert(op->floats_per_rect == 3*op->floats_per_vertex);
314703b705cfSriastradh
314842542f5fSchristos	vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
314942542f5fSchristos	if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) {
315042542f5fSchristos		DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n",
315142542f5fSchristos		     __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex));
315242542f5fSchristos		if (gen3_vertex_finish(sna) < 2*op->floats_per_vertex)
315342542f5fSchristos			kgem_submit(&sna->kgem);
315403b705cfSriastradh
315542542f5fSchristos		vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
315603b705cfSriastradh	}
315703b705cfSriastradh
315842542f5fSchristos	sna->render.vertex_index = vertex_index;
315942542f5fSchristos	sna->render.vertex_used = vertex_index * op->floats_per_vertex;
316003b705cfSriastradh}
316103b705cfSriastradh
316203b705cfSriastradhstatic bool
316303b705cfSriastradhgen3_composite_set_target(struct sna *sna,
316403b705cfSriastradh			  struct sna_composite_op *op,
316503b705cfSriastradh			  PicturePtr dst,
316642542f5fSchristos			  int x, int y, int w, int h,
316742542f5fSchristos			  bool partial)
316803b705cfSriastradh{
316903b705cfSriastradh	BoxRec box;
317042542f5fSchristos	unsigned hint;
317103b705cfSriastradh
317203b705cfSriastradh	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
317303b705cfSriastradh	op->dst.format = dst->format;
317403b705cfSriastradh	op->dst.width = op->dst.pixmap->drawable.width;
317503b705cfSriastradh	op->dst.height = op->dst.pixmap->drawable.height;
317603b705cfSriastradh
317703b705cfSriastradh	if (w && h) {
317803b705cfSriastradh		box.x1 = x;
317903b705cfSriastradh		box.y1 = y;
318003b705cfSriastradh		box.x2 = x + w;
318103b705cfSriastradh		box.y2 = y + h;
318203b705cfSriastradh	} else
318303b705cfSriastradh		sna_render_picture_extents(dst, &box);
318403b705cfSriastradh
318542542f5fSchristos	hint = PREFER_GPU | FORCE_GPU | RENDER_GPU;
318642542f5fSchristos	if (!partial) {
318742542f5fSchristos		hint |= IGNORE_DAMAGE;
318842542f5fSchristos		if (w == op->dst.width && h == op->dst.height)
318942542f5fSchristos			hint |= REPLACES;
319042542f5fSchristos	}
319142542f5fSchristos
319242542f5fSchristos	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
319303b705cfSriastradh	if (op->dst.bo == NULL)
319403b705cfSriastradh		return false;
319503b705cfSriastradh
319642542f5fSchristos	if (hint & REPLACES) {
319742542f5fSchristos		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
319842542f5fSchristos		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
319942542f5fSchristos	}
320042542f5fSchristos
320103b705cfSriastradh	assert(op->dst.bo->unique_id);
320203b705cfSriastradh
320303b705cfSriastradh	/* For single-stream mode there should be no minimum alignment
320403b705cfSriastradh	 * required, except that the width must be at least 2 elements.
320542542f5fSchristos	 * Furthermore, it appears that the pitch must be a multiple of
320642542f5fSchristos	 * 2 elements.
320703b705cfSriastradh	 */
320842542f5fSchristos	if (op->dst.bo->pitch & ((2*op->dst.pixmap->drawable.bitsPerPixel >> 3) - 1))
320942542f5fSchristos		return false;
321003b705cfSriastradh
321103b705cfSriastradh	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
321203b705cfSriastradh			    &op->dst.x, &op->dst.y);
321303b705cfSriastradh
321442542f5fSchristos	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
321503b705cfSriastradh	     __FUNCTION__,
321642542f5fSchristos	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
321703b705cfSriastradh	     op->dst.width, op->dst.height,
321803b705cfSriastradh	     op->dst.bo->pitch,
321903b705cfSriastradh	     op->dst.x, op->dst.y,
322003b705cfSriastradh	     op->damage ? *op->damage : (void *)-1));
322103b705cfSriastradh
322203b705cfSriastradh	assert(op->dst.bo->proxy == NULL);
322342542f5fSchristos
322442542f5fSchristos	if ((too_large(op->dst.width, op->dst.height) ||
322542542f5fSchristos	     !gen3_check_pitch_3d(op->dst.bo)) &&
322642542f5fSchristos	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
322742542f5fSchristos		return false;
322842542f5fSchristos
322903b705cfSriastradh	return true;
323003b705cfSriastradh}
323103b705cfSriastradh
323203b705cfSriastradhstatic inline uint8_t
323303b705cfSriastradhmul_8_8(uint8_t a, uint8_t b)
323403b705cfSriastradh{
323503b705cfSriastradh    uint16_t t = a * (uint16_t)b + 0x7f;
323603b705cfSriastradh    return ((t >> 8) + t) >> 8;
323703b705cfSriastradh}
323803b705cfSriastradh
323903b705cfSriastradhstatic inline uint32_t multa(uint32_t s, uint32_t m, int shift)
324003b705cfSriastradh{
324103b705cfSriastradh	return mul_8_8((s >> shift) & 0xff, m >> 24) << shift;
324203b705cfSriastradh}
324303b705cfSriastradh
324403b705cfSriastradhstatic inline bool is_constant_ps(uint32_t type)
324503b705cfSriastradh{
324603b705cfSriastradh	switch (type) {
324703b705cfSriastradh	case SHADER_NONE: /* be warned! */
324803b705cfSriastradh	case SHADER_ZERO:
324903b705cfSriastradh	case SHADER_BLACK:
325003b705cfSriastradh	case SHADER_WHITE:
325103b705cfSriastradh	case SHADER_CONSTANT:
325203b705cfSriastradh		return true;
325303b705cfSriastradh	default:
325403b705cfSriastradh		return false;
325503b705cfSriastradh	}
325603b705cfSriastradh}
325703b705cfSriastradh
325803b705cfSriastradhstatic bool
325903b705cfSriastradhhas_alphamap(PicturePtr p)
326003b705cfSriastradh{
326103b705cfSriastradh	return p->alphaMap != NULL;
326203b705cfSriastradh}
326303b705cfSriastradh
326403b705cfSriastradhstatic bool
326503b705cfSriastradhneed_upload(PicturePtr p)
326603b705cfSriastradh{
326703b705cfSriastradh	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
326803b705cfSriastradh}
326903b705cfSriastradh
327003b705cfSriastradhstatic bool
327103b705cfSriastradhsource_is_busy(PixmapPtr pixmap)
327203b705cfSriastradh{
327303b705cfSriastradh	struct sna_pixmap *priv = sna_pixmap(pixmap);
327403b705cfSriastradh	if (priv == NULL)
327503b705cfSriastradh		return false;
327603b705cfSriastradh
327703b705cfSriastradh	if (priv->clear)
327803b705cfSriastradh		return false;
327903b705cfSriastradh
328003b705cfSriastradh	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
328103b705cfSriastradh		return true;
328203b705cfSriastradh
328303b705cfSriastradh	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
328403b705cfSriastradh		return true;
328503b705cfSriastradh
328603b705cfSriastradh	return priv->gpu_damage && !priv->cpu_damage;
328703b705cfSriastradh}
328803b705cfSriastradh
328903b705cfSriastradhstatic bool
329003b705cfSriastradhis_unhandled_gradient(PicturePtr picture, bool precise)
329103b705cfSriastradh{
329203b705cfSriastradh	if (picture->pDrawable)
329303b705cfSriastradh		return false;
329403b705cfSriastradh
329503b705cfSriastradh	switch (picture->pSourcePict->type) {
329603b705cfSriastradh	case SourcePictTypeSolidFill:
329703b705cfSriastradh	case SourcePictTypeLinear:
329803b705cfSriastradh	case SourcePictTypeRadial:
329903b705cfSriastradh		return false;
330003b705cfSriastradh	default:
330103b705cfSriastradh		return precise;
330203b705cfSriastradh	}
330303b705cfSriastradh}
330403b705cfSriastradh
330503b705cfSriastradhstatic bool
330603b705cfSriastradhsource_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
330703b705cfSriastradh{
330803b705cfSriastradh	if (sna_picture_is_solid(p, NULL))
330903b705cfSriastradh		return false;
331003b705cfSriastradh
331103b705cfSriastradh	if (is_unhandled_gradient(p, precise))
331203b705cfSriastradh		return true;
331303b705cfSriastradh
331403b705cfSriastradh	if (!gen3_check_xformat(p) || !gen3_check_repeat(p))
331503b705cfSriastradh		return true;
331603b705cfSriastradh
331703b705cfSriastradh	if (pixmap && source_is_busy(pixmap))
331803b705cfSriastradh		return false;
331903b705cfSriastradh
332003b705cfSriastradh	return has_alphamap(p) || !gen3_check_filter(p) || need_upload(p);
332103b705cfSriastradh}
332203b705cfSriastradh
332303b705cfSriastradhstatic bool
332403b705cfSriastradhgen3_composite_fallback(struct sna *sna,
332503b705cfSriastradh			uint8_t op,
332603b705cfSriastradh			PicturePtr src,
332703b705cfSriastradh			PicturePtr mask,
332803b705cfSriastradh			PicturePtr dst)
332903b705cfSriastradh{
333003b705cfSriastradh	PixmapPtr src_pixmap;
333103b705cfSriastradh	PixmapPtr mask_pixmap;
333203b705cfSriastradh	PixmapPtr dst_pixmap;
333303b705cfSriastradh	bool src_fallback, mask_fallback;
333403b705cfSriastradh
333503b705cfSriastradh	if (!gen3_check_dst_format(dst->format)) {
333603b705cfSriastradh		DBG(("%s: unknown destination format: %d\n",
333703b705cfSriastradh		     __FUNCTION__, dst->format));
333803b705cfSriastradh		return true;
333903b705cfSriastradh	}
334003b705cfSriastradh
334103b705cfSriastradh	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
334203b705cfSriastradh
334303b705cfSriastradh	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
334403b705cfSriastradh	src_fallback = source_fallback(src, src_pixmap,
334503b705cfSriastradh				       dst->polyMode == PolyModePrecise);
334603b705cfSriastradh
334703b705cfSriastradh	if (mask) {
334803b705cfSriastradh		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
334903b705cfSriastradh		mask_fallback = source_fallback(mask, mask_pixmap,
335003b705cfSriastradh						dst->polyMode == PolyModePrecise);
335103b705cfSriastradh	} else {
335203b705cfSriastradh		mask_pixmap = NULL;
335303b705cfSriastradh		mask_fallback = false;
335403b705cfSriastradh	}
335503b705cfSriastradh
335603b705cfSriastradh	/* If we are using the destination as a source and need to
335703b705cfSriastradh	 * readback in order to upload the source, do it all
335803b705cfSriastradh	 * on the cpu.
335903b705cfSriastradh	 */
336003b705cfSriastradh	if (src_pixmap == dst_pixmap && src_fallback) {
336103b705cfSriastradh		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
336203b705cfSriastradh		return true;
336303b705cfSriastradh	}
336403b705cfSriastradh	if (mask_pixmap == dst_pixmap && mask_fallback) {
336503b705cfSriastradh		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
336603b705cfSriastradh		return true;
336703b705cfSriastradh	}
336803b705cfSriastradh
336903b705cfSriastradh	if (mask &&
337003b705cfSriastradh	    mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
337103b705cfSriastradh	    gen3_blend_op[op].src_alpha &&
337203b705cfSriastradh	    gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
337303b705cfSriastradh	    op != PictOpOver) {
337403b705cfSriastradh		DBG(("%s: component-alpha mask with op=%d, should fallback\n",
337503b705cfSriastradh		     __FUNCTION__, op));
337603b705cfSriastradh		return true;
337703b705cfSriastradh	}
337803b705cfSriastradh
337903b705cfSriastradh	/* If anything is on the GPU, push everything out to the GPU */
338003b705cfSriastradh	if (dst_use_gpu(dst_pixmap)) {
338103b705cfSriastradh		DBG(("%s: dst is already on the GPU, try to use GPU\n",
338203b705cfSriastradh		     __FUNCTION__));
338303b705cfSriastradh		return false;
338403b705cfSriastradh	}
338503b705cfSriastradh
338603b705cfSriastradh	if (src_pixmap && !src_fallback) {
338703b705cfSriastradh		DBG(("%s: src is already on the GPU, try to use GPU\n",
338803b705cfSriastradh		     __FUNCTION__));
338903b705cfSriastradh		return false;
339003b705cfSriastradh	}
339103b705cfSriastradh	if (mask_pixmap && !mask_fallback) {
339203b705cfSriastradh		DBG(("%s: mask is already on the GPU, try to use GPU\n",
339303b705cfSriastradh		     __FUNCTION__));
339403b705cfSriastradh		return false;
339503b705cfSriastradh	}
339603b705cfSriastradh
339703b705cfSriastradh	/* However if the dst is not on the GPU and we need to
339803b705cfSriastradh	 * render one of the sources using the CPU, we may
339903b705cfSriastradh	 * as well do the entire operation in place onthe CPU.
340003b705cfSriastradh	 */
340103b705cfSriastradh	if (src_fallback) {
340203b705cfSriastradh		DBG(("%s: dst is on the CPU and src will fallback\n",
340303b705cfSriastradh		     __FUNCTION__));
340403b705cfSriastradh		return true;
340503b705cfSriastradh	}
340603b705cfSriastradh
340703b705cfSriastradh	if (mask && mask_fallback) {
340803b705cfSriastradh		DBG(("%s: dst is on the CPU and mask will fallback\n",
340903b705cfSriastradh		     __FUNCTION__));
341003b705cfSriastradh		return true;
341103b705cfSriastradh	}
341203b705cfSriastradh
341303b705cfSriastradh	if (too_large(dst_pixmap->drawable.width,
341403b705cfSriastradh		      dst_pixmap->drawable.height) &&
341503b705cfSriastradh	    dst_is_cpu(dst_pixmap)) {
341603b705cfSriastradh		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
341703b705cfSriastradh		return true;
341803b705cfSriastradh	}
341903b705cfSriastradh
342003b705cfSriastradh	DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
342103b705cfSriastradh	     __FUNCTION__, dst_use_cpu(dst_pixmap)));
342203b705cfSriastradh	return dst_use_cpu(dst_pixmap);
342303b705cfSriastradh}
342403b705cfSriastradh
342503b705cfSriastradhstatic int
342603b705cfSriastradhreuse_source(struct sna *sna,
342703b705cfSriastradh	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
342803b705cfSriastradh	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
342903b705cfSriastradh{
343003b705cfSriastradh	if (src_x != msk_x || src_y != msk_y)
343103b705cfSriastradh		return false;
343203b705cfSriastradh
343303b705cfSriastradh	if (mask == src) {
343403b705cfSriastradh		*mc = *sc;
343503b705cfSriastradh		if (mc->bo)
343603b705cfSriastradh			kgem_bo_reference(mc->bo);
343703b705cfSriastradh		return true;
343803b705cfSriastradh	}
343903b705cfSriastradh
344003b705cfSriastradh	if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable))
344103b705cfSriastradh		return false;
344203b705cfSriastradh
344303b705cfSriastradh	if (sc->is_solid)
344403b705cfSriastradh		return false;
344503b705cfSriastradh
344603b705cfSriastradh	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
344703b705cfSriastradh
344803b705cfSriastradh	if (!sna_transform_equal(src->transform, mask->transform))
344903b705cfSriastradh		return false;
345003b705cfSriastradh
345103b705cfSriastradh	if (!sna_picture_alphamap_equal(src, mask))
345203b705cfSriastradh		return false;
345303b705cfSriastradh
345403b705cfSriastradh	if (!gen3_check_repeat(mask))
345503b705cfSriastradh		return false;
345603b705cfSriastradh
345703b705cfSriastradh	if (!gen3_check_filter(mask))
345803b705cfSriastradh		return false;
345903b705cfSriastradh
346003b705cfSriastradh	if (!gen3_check_format(mask))
346103b705cfSriastradh		return false;
346203b705cfSriastradh
346303b705cfSriastradh	DBG(("%s: reusing source channel for mask with a twist\n",
346403b705cfSriastradh	     __FUNCTION__));
346503b705cfSriastradh
346603b705cfSriastradh	*mc = *sc;
346703b705cfSriastradh	mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone);
346803b705cfSriastradh	mc->filter = gen3_filter(mask->filter);
346903b705cfSriastradh	mc->pict_format = mask->format;
347003b705cfSriastradh	gen3_composite_channel_set_format(mc, mask->format);
347103b705cfSriastradh	assert(mc->card_format);
347203b705cfSriastradh	if (mc->bo)
347303b705cfSriastradh		kgem_bo_reference(mc->bo);
347403b705cfSriastradh	return true;
347503b705cfSriastradh}
347603b705cfSriastradh
347703b705cfSriastradhstatic bool
347803b705cfSriastradhgen3_render_composite(struct sna *sna,
347903b705cfSriastradh		      uint8_t op,
348003b705cfSriastradh		      PicturePtr src,
348103b705cfSriastradh		      PicturePtr mask,
348203b705cfSriastradh		      PicturePtr dst,
348303b705cfSriastradh		      int16_t src_x,  int16_t src_y,
348403b705cfSriastradh		      int16_t mask_x, int16_t mask_y,
348503b705cfSriastradh		      int16_t dst_x,  int16_t dst_y,
348603b705cfSriastradh		      int16_t width,  int16_t height,
348742542f5fSchristos		      unsigned flags,
348803b705cfSriastradh		      struct sna_composite_op *tmp)
348903b705cfSriastradh{
349003b705cfSriastradh	DBG(("%s()\n", __FUNCTION__));
349103b705cfSriastradh
349203b705cfSriastradh	if (op >= ARRAY_SIZE(gen3_blend_op)) {
349303b705cfSriastradh		DBG(("%s: fallback due to unhandled blend op: %d\n",
349403b705cfSriastradh		     __FUNCTION__, op));
349503b705cfSriastradh		return false;
349603b705cfSriastradh	}
349703b705cfSriastradh
349803b705cfSriastradh	/* Try to use the BLT engine unless it implies a
349903b705cfSriastradh	 * 3D -> 2D context switch.
350003b705cfSriastradh	 */
350103b705cfSriastradh	if (mask == NULL &&
350203b705cfSriastradh	    sna_blt_composite(sna,
350303b705cfSriastradh			      op, src, dst,
350403b705cfSriastradh			      src_x, src_y,
350503b705cfSriastradh			      dst_x, dst_y,
350603b705cfSriastradh			      width, height,
350742542f5fSchristos			      flags, tmp))
350803b705cfSriastradh		return true;
350903b705cfSriastradh
351003b705cfSriastradh	if (gen3_composite_fallback(sna, op, src, mask, dst))
351142542f5fSchristos		goto fallback;
351203b705cfSriastradh
351303b705cfSriastradh	if (need_tiling(sna, width, height))
351403b705cfSriastradh		return sna_tiling_composite(op, src, mask, dst,
351503b705cfSriastradh					    src_x,  src_y,
351603b705cfSriastradh					    mask_x, mask_y,
351703b705cfSriastradh					    dst_x,  dst_y,
351803b705cfSriastradh					    width,  height,
351903b705cfSriastradh					    tmp);
352003b705cfSriastradh
352103b705cfSriastradh	if (!gen3_composite_set_target(sna, tmp, dst,
352242542f5fSchristos				       dst_x, dst_y, width, height,
352342542f5fSchristos				       flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
352403b705cfSriastradh		DBG(("%s: unable to set render target\n",
352503b705cfSriastradh		     __FUNCTION__));
352642542f5fSchristos		goto fallback;
352703b705cfSriastradh	}
352803b705cfSriastradh
352903b705cfSriastradh	tmp->op = op;
353003b705cfSriastradh	tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
353103b705cfSriastradh	tmp->u.gen3.num_constants = 0;
353203b705cfSriastradh	tmp->src.u.gen3.type = SHADER_TEXTURE;
353303b705cfSriastradh	tmp->src.is_affine = true;
353403b705cfSriastradh	DBG(("%s: preparing source\n", __FUNCTION__));
353503b705cfSriastradh	switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
353603b705cfSriastradh				       src_x, src_y,
353703b705cfSriastradh				       width, height,
353803b705cfSriastradh				       dst_x, dst_y,
353903b705cfSriastradh				       dst->polyMode == PolyModePrecise)) {
354003b705cfSriastradh	case -1:
354103b705cfSriastradh		goto cleanup_dst;
354203b705cfSriastradh	case 0:
354303b705cfSriastradh		tmp->src.u.gen3.type = SHADER_ZERO;
354403b705cfSriastradh		break;
354503b705cfSriastradh	case 1:
354603b705cfSriastradh		if (mask == NULL && tmp->src.bo &&
354703b705cfSriastradh		    sna_blt_composite__convert(sna,
354803b705cfSriastradh					       dst_x, dst_y, width, height,
354903b705cfSriastradh					       tmp))
355003b705cfSriastradh			return true;
355103b705cfSriastradh
355203b705cfSriastradh		gen3_composite_channel_convert(&tmp->src);
355303b705cfSriastradh		break;
355403b705cfSriastradh	}
355503b705cfSriastradh	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
355603b705cfSriastradh
355703b705cfSriastradh	tmp->mask.u.gen3.type = SHADER_NONE;
355803b705cfSriastradh	tmp->mask.is_affine = true;
355903b705cfSriastradh	tmp->need_magic_ca_pass = false;
356003b705cfSriastradh	tmp->has_component_alpha = false;
356103b705cfSriastradh	if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
356203b705cfSriastradh		if (!reuse_source(sna,
356303b705cfSriastradh				  src, &tmp->src, src_x, src_y,
356403b705cfSriastradh				  mask, &tmp->mask, mask_x, mask_y)) {
356503b705cfSriastradh			tmp->mask.u.gen3.type = SHADER_TEXTURE;
356603b705cfSriastradh			DBG(("%s: preparing mask\n", __FUNCTION__));
356703b705cfSriastradh			switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
356803b705cfSriastradh						       mask_x, mask_y,
356903b705cfSriastradh						       width,  height,
357003b705cfSriastradh						       dst_x,  dst_y,
357103b705cfSriastradh						       dst->polyMode == PolyModePrecise)) {
357203b705cfSriastradh			case -1:
357303b705cfSriastradh				goto cleanup_src;
357403b705cfSriastradh			case 0:
357503b705cfSriastradh				tmp->mask.u.gen3.type = SHADER_ZERO;
357603b705cfSriastradh				break;
357703b705cfSriastradh			case 1:
357803b705cfSriastradh				gen3_composite_channel_convert(&tmp->mask);
357903b705cfSriastradh				break;
358003b705cfSriastradh			}
358103b705cfSriastradh		}
358203b705cfSriastradh		DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
358303b705cfSriastradh		if (tmp->mask.u.gen3.type == SHADER_ZERO) {
358403b705cfSriastradh			if (tmp->src.bo) {
358503b705cfSriastradh				kgem_bo_destroy(&sna->kgem,
358603b705cfSriastradh						tmp->src.bo);
358703b705cfSriastradh				tmp->src.bo = NULL;
358803b705cfSriastradh			}
358903b705cfSriastradh			tmp->src.u.gen3.type = SHADER_ZERO;
359003b705cfSriastradh			tmp->mask.u.gen3.type = SHADER_NONE;
359103b705cfSriastradh		}
359203b705cfSriastradh
359303b705cfSriastradh		if (tmp->mask.u.gen3.type != SHADER_NONE) {
359403b705cfSriastradh			if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
359503b705cfSriastradh				/* Check if it's component alpha that relies on a source alpha
359603b705cfSriastradh				 * and on the source value.  We can only get one of those
359703b705cfSriastradh				 * into the single source value that we get to blend with.
359803b705cfSriastradh				 */
359903b705cfSriastradh				DBG(("%s: component-alpha mask: %d\n",
360003b705cfSriastradh				     __FUNCTION__, tmp->mask.u.gen3.type));
360103b705cfSriastradh				tmp->has_component_alpha = true;
360203b705cfSriastradh				if (tmp->mask.u.gen3.type == SHADER_WHITE) {
360303b705cfSriastradh					tmp->mask.u.gen3.type = SHADER_NONE;
360403b705cfSriastradh					tmp->has_component_alpha = false;
360503b705cfSriastradh				} else if (gen3_blend_op[op].src_alpha &&
360603b705cfSriastradh					   gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
360703b705cfSriastradh					if (op != PictOpOver)
360803b705cfSriastradh						goto cleanup_mask;
360903b705cfSriastradh
361003b705cfSriastradh					tmp->need_magic_ca_pass = true;
361103b705cfSriastradh					tmp->op = PictOpOutReverse;
361203b705cfSriastradh				}
361303b705cfSriastradh			} else {
361403b705cfSriastradh				if (tmp->mask.is_opaque) {
361503b705cfSriastradh					tmp->mask.u.gen3.type = SHADER_NONE;
361603b705cfSriastradh				} else if (is_constant_ps(tmp->src.u.gen3.type) &&
361703b705cfSriastradh					   is_constant_ps(tmp->mask.u.gen3.type)) {
361803b705cfSriastradh					uint32_t v;
361903b705cfSriastradh
362003b705cfSriastradh					v = multa(tmp->src.u.gen3.mode,
362103b705cfSriastradh						  tmp->mask.u.gen3.mode,
362203b705cfSriastradh						  24);
362303b705cfSriastradh					v |= multa(tmp->src.u.gen3.mode,
362403b705cfSriastradh						   tmp->mask.u.gen3.mode,
362503b705cfSriastradh						   16);
362603b705cfSriastradh					v |= multa(tmp->src.u.gen3.mode,
362703b705cfSriastradh						   tmp->mask.u.gen3.mode,
362803b705cfSriastradh						   8);
362903b705cfSriastradh					v |= multa(tmp->src.u.gen3.mode,
363003b705cfSriastradh						   tmp->mask.u.gen3.mode,
363103b705cfSriastradh						   0);
363203b705cfSriastradh
363303b705cfSriastradh					DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
363403b705cfSriastradh					     __FUNCTION__,
363503b705cfSriastradh					     tmp->src.u.gen3.mode,
363603b705cfSriastradh					     tmp->mask.u.gen3.mode,
363703b705cfSriastradh					     v));
363803b705cfSriastradh
363903b705cfSriastradh					tmp->src.u.gen3.type = SHADER_CONSTANT;
364003b705cfSriastradh					tmp->src.u.gen3.mode = v;
364103b705cfSriastradh					tmp->src.is_opaque = false;
364203b705cfSriastradh
364303b705cfSriastradh					tmp->mask.u.gen3.type = SHADER_NONE;
364403b705cfSriastradh				}
364503b705cfSriastradh			}
364603b705cfSriastradh		}
364703b705cfSriastradh	}
364803b705cfSriastradh	DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
364903b705cfSriastradh	     tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
365003b705cfSriastradh	     tmp->src.is_affine, tmp->mask.is_affine));
365103b705cfSriastradh
365203b705cfSriastradh	tmp->prim_emit = gen3_emit_composite_primitive;
365303b705cfSriastradh	if (is_constant_ps(tmp->mask.u.gen3.type)) {
365403b705cfSriastradh		switch (tmp->src.u.gen3.type) {
365503b705cfSriastradh		case SHADER_NONE:
365603b705cfSriastradh		case SHADER_ZERO:
365703b705cfSriastradh		case SHADER_BLACK:
365803b705cfSriastradh		case SHADER_WHITE:
365903b705cfSriastradh		case SHADER_CONSTANT:
366003b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
366103b705cfSriastradh			if (sna->cpu_features & SSE2) {
366203b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2;
366303b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2;
366403b705cfSriastradh			} else
366503b705cfSriastradh#endif
366603b705cfSriastradh			{
366703b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_primitive_constant;
366803b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_boxes_constant;
366903b705cfSriastradh			}
367003b705cfSriastradh
367103b705cfSriastradh			break;
367203b705cfSriastradh		case SHADER_LINEAR:
367303b705cfSriastradh		case SHADER_RADIAL:
367403b705cfSriastradh			if (tmp->src.transform == NULL) {
367503b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
367603b705cfSriastradh				if (sna->cpu_features & SSE2) {
367703b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2;
367803b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2;
367903b705cfSriastradh				} else
368003b705cfSriastradh#endif
368103b705cfSriastradh				{
368203b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
368303b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
368403b705cfSriastradh				}
368503b705cfSriastradh			} else if (tmp->src.is_affine) {
368603b705cfSriastradh				tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2];
368703b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
368803b705cfSriastradh				if (sna->cpu_features & SSE2) {
368903b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2;
369003b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2;
369103b705cfSriastradh				} else
369203b705cfSriastradh#endif
369303b705cfSriastradh				{
369403b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
369503b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
369603b705cfSriastradh				}
369703b705cfSriastradh			}
369803b705cfSriastradh			break;
369903b705cfSriastradh		case SHADER_TEXTURE:
370003b705cfSriastradh			if (tmp->src.transform == NULL) {
370103b705cfSriastradh				if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
370203b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
370303b705cfSriastradh					if (sna->cpu_features & SSE2) {
370403b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2;
370503b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2;
370603b705cfSriastradh					} else
370703b705cfSriastradh#endif
370803b705cfSriastradh					{
370903b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
371003b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
371103b705cfSriastradh					}
371203b705cfSriastradh				} else {
371303b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
371403b705cfSriastradh					if (sna->cpu_features & SSE2) {
371503b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2;
371603b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2;
371703b705cfSriastradh					} else
371803b705cfSriastradh#endif
371903b705cfSriastradh					{
372003b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
372103b705cfSriastradh						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
372203b705cfSriastradh					}
372303b705cfSriastradh				}
372403b705cfSriastradh			} else if (tmp->src.is_affine) {
372503b705cfSriastradh				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
372603b705cfSriastradh				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
372703b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
372803b705cfSriastradh				if (sna->cpu_features & SSE2) {
372903b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2;
373003b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2;
373103b705cfSriastradh				} else
373203b705cfSriastradh#endif
373303b705cfSriastradh				{
373403b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
373503b705cfSriastradh					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
373603b705cfSriastradh				}
373703b705cfSriastradh			}
373803b705cfSriastradh			break;
373903b705cfSriastradh		}
374003b705cfSriastradh	} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
374103b705cfSriastradh		if (tmp->mask.transform == NULL) {
374203b705cfSriastradh			if (is_constant_ps(tmp->src.u.gen3.type)) {
374303b705cfSriastradh				if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
374403b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
374503b705cfSriastradh					if (sna->cpu_features & SSE2) {
374603b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2;
374703b705cfSriastradh					} else
374803b705cfSriastradh#endif
374903b705cfSriastradh					{
375003b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
375103b705cfSriastradh					}
375203b705cfSriastradh				} else {
375303b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
375403b705cfSriastradh					if (sna->cpu_features & SSE2) {
375503b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2;
375603b705cfSriastradh					} else
375703b705cfSriastradh#endif
375803b705cfSriastradh					{
375903b705cfSriastradh						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
376003b705cfSriastradh					}
376103b705cfSriastradh				}
376203b705cfSriastradh			} else if (tmp->src.transform == NULL) {
376303b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
376403b705cfSriastradh				if (sna->cpu_features & SSE2) {
376503b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2;
376603b705cfSriastradh				} else
376703b705cfSriastradh#endif
376803b705cfSriastradh				{
376903b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
377003b705cfSriastradh				}
377103b705cfSriastradh			} else if (tmp->src.is_affine) {
377203b705cfSriastradh				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
377303b705cfSriastradh				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
377403b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
377503b705cfSriastradh				if (sna->cpu_features & SSE2) {
377603b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2;
377703b705cfSriastradh				} else
377803b705cfSriastradh#endif
377903b705cfSriastradh				{
378003b705cfSriastradh					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
378103b705cfSriastradh				}
378203b705cfSriastradh			}
378303b705cfSriastradh		}
378403b705cfSriastradh	}
378503b705cfSriastradh
378603b705cfSriastradh	tmp->floats_per_vertex = 2;
378703b705cfSriastradh	if (!is_constant_ps(tmp->src.u.gen3.type))
378803b705cfSriastradh		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
378903b705cfSriastradh	if (!is_constant_ps(tmp->mask.u.gen3.type))
379003b705cfSriastradh		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
379103b705cfSriastradh	DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
379203b705cfSriastradh	     !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
379303b705cfSriastradh	     !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
379403b705cfSriastradh	     tmp->floats_per_vertex,
379503b705cfSriastradh	     tmp->prim_emit != gen3_emit_composite_primitive));
379603b705cfSriastradh	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
379703b705cfSriastradh
379803b705cfSriastradh	tmp->blt   = gen3_render_composite_blt;
379903b705cfSriastradh	tmp->box   = gen3_render_composite_box;
380003b705cfSriastradh	tmp->boxes = gen3_render_composite_boxes__blt;
380103b705cfSriastradh	if (tmp->emit_boxes) {
380203b705cfSriastradh		tmp->boxes = gen3_render_composite_boxes;
380303b705cfSriastradh		tmp->thread_boxes = gen3_render_composite_boxes__thread;
380403b705cfSriastradh	}
380503b705cfSriastradh	tmp->done  = gen3_render_composite_done;
380603b705cfSriastradh
380703b705cfSriastradh	if (!kgem_check_bo(&sna->kgem,
380803b705cfSriastradh			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
380903b705cfSriastradh			   NULL)) {
381003b705cfSriastradh		kgem_submit(&sna->kgem);
381103b705cfSriastradh		if (!kgem_check_bo(&sna->kgem,
381203b705cfSriastradh				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
381303b705cfSriastradh				   NULL))
381403b705cfSriastradh			goto cleanup_mask;
381503b705cfSriastradh	}
381603b705cfSriastradh
381703b705cfSriastradh	gen3_align_vertex(sna, tmp);
381842542f5fSchristos	gen3_emit_composite_state(sna, tmp);
381903b705cfSriastradh	return true;
382003b705cfSriastradh
382103b705cfSriastradhcleanup_mask:
382242542f5fSchristos	if (tmp->mask.bo) {
382303b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
382442542f5fSchristos		tmp->mask.bo = NULL;
382542542f5fSchristos	}
382603b705cfSriastradhcleanup_src:
382742542f5fSchristos	if (tmp->src.bo) {
382803b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
382942542f5fSchristos		tmp->src.bo = NULL;
383042542f5fSchristos	}
383103b705cfSriastradhcleanup_dst:
383242542f5fSchristos	if (tmp->redirect.real_bo) {
383303b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
383442542f5fSchristos		tmp->redirect.real_bo = NULL;
383542542f5fSchristos	}
383642542f5fSchristosfallback:
383742542f5fSchristos	return (mask == NULL &&
383842542f5fSchristos		sna_blt_composite(sna,
383942542f5fSchristos				  op, src, dst,
384042542f5fSchristos				  src_x, src_y,
384142542f5fSchristos				  dst_x, dst_y,
384242542f5fSchristos				  width, height,
384342542f5fSchristos				  flags | COMPOSITE_FALLBACK, tmp));
384403b705cfSriastradh}
384503b705cfSriastradh
384603b705cfSriastradhstatic void
384703b705cfSriastradhgen3_emit_composite_spans_vertex(struct sna *sna,
384803b705cfSriastradh				 const struct sna_composite_spans_op *op,
384903b705cfSriastradh				 int16_t x, int16_t y,
385003b705cfSriastradh				 float opacity)
385103b705cfSriastradh{
385203b705cfSriastradh	gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
385303b705cfSriastradh	gen3_emit_composite_texcoord(sna, &op->base.src, x, y);
385403b705cfSriastradh	OUT_VERTEX(opacity);
385503b705cfSriastradh}
385603b705cfSriastradh
385703b705cfSriastradhfastcall static void
385803b705cfSriastradhgen3_emit_composite_spans_primitive_zero(struct sna *sna,
385903b705cfSriastradh					 const struct sna_composite_spans_op *op,
386003b705cfSriastradh					 const BoxRec *box,
386103b705cfSriastradh					 float opacity)
386203b705cfSriastradh{
386303b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
386403b705cfSriastradh	sna->render.vertex_used += 6;
386503b705cfSriastradh
386603b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
386703b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
386803b705cfSriastradh
386903b705cfSriastradh	v[2] = op->base.dst.x + box->x1;
387003b705cfSriastradh	v[3] = v[1];
387103b705cfSriastradh
387203b705cfSriastradh	v[4] = v[2];
387303b705cfSriastradh	v[5] = op->base.dst.x + box->y1;
387403b705cfSriastradh}
387503b705cfSriastradh
387603b705cfSriastradhfastcall static void
387703b705cfSriastradhgen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op,
387803b705cfSriastradh						const struct sna_opacity_box *b,
387903b705cfSriastradh						int nbox, float *v)
388003b705cfSriastradh{
388103b705cfSriastradh	do {
388203b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
388303b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
388403b705cfSriastradh
388503b705cfSriastradh		v[2] = op->base.dst.x + b->box.x1;
388603b705cfSriastradh		v[3] = v[1];
388703b705cfSriastradh
388803b705cfSriastradh		v[4] = v[2];
388903b705cfSriastradh		v[5] = op->base.dst.x + b->box.y1;
389003b705cfSriastradh
389103b705cfSriastradh		v += 6;
389203b705cfSriastradh		b++;
389303b705cfSriastradh	} while (--nbox);
389403b705cfSriastradh}
389503b705cfSriastradh
389603b705cfSriastradhfastcall static void
389703b705cfSriastradhgen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
389803b705cfSriastradh						   const struct sna_composite_spans_op *op,
389903b705cfSriastradh						   const BoxRec *box,
390003b705cfSriastradh						   float opacity)
390103b705cfSriastradh{
390203b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
390303b705cfSriastradh	sna->render.vertex_used += 6;
390403b705cfSriastradh
390503b705cfSriastradh	v[0] = box->x2;
390603b705cfSriastradh	v[3] = v[1] = box->y2;
390703b705cfSriastradh	v[4] = v[2] = box->x1;
390803b705cfSriastradh	v[5] = box->y1;
390903b705cfSriastradh}
391003b705cfSriastradh
391103b705cfSriastradhfastcall static void
391203b705cfSriastradhgen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op,
391303b705cfSriastradh							  const struct sna_opacity_box *b,
391403b705cfSriastradh							  int nbox, float *v)
391503b705cfSriastradh{
391603b705cfSriastradh	do {
391703b705cfSriastradh		v[0] = b->box.x2;
391803b705cfSriastradh		v[3] = v[1] = b->box.y2;
391903b705cfSriastradh		v[4] = v[2] = b->box.x1;
392003b705cfSriastradh		v[5] = b->box.y1;
392103b705cfSriastradh
392203b705cfSriastradh		b++;
392303b705cfSriastradh		v += 6;
392403b705cfSriastradh	} while (--nbox);
392503b705cfSriastradh}
392603b705cfSriastradh
392703b705cfSriastradhfastcall static void
392803b705cfSriastradhgen3_emit_composite_spans_primitive_constant(struct sna *sna,
392903b705cfSriastradh					     const struct sna_composite_spans_op *op,
393003b705cfSriastradh					     const BoxRec *box,
393103b705cfSriastradh					     float opacity)
393203b705cfSriastradh{
393303b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
393403b705cfSriastradh	sna->render.vertex_used += 9;
393503b705cfSriastradh
393603b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
393703b705cfSriastradh	v[6] = v[3] = op->base.dst.x + box->x1;
393803b705cfSriastradh	v[4] = v[1] = op->base.dst.y + box->y2;
393903b705cfSriastradh	v[7] = op->base.dst.y + box->y1;
394003b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
394103b705cfSriastradh}
394203b705cfSriastradh
394303b705cfSriastradhfastcall static void
394403b705cfSriastradhgen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op,
394503b705cfSriastradh						    const struct sna_opacity_box *b,
394603b705cfSriastradh						    int nbox,
394703b705cfSriastradh						    float *v)
394803b705cfSriastradh{
394903b705cfSriastradh	do {
395003b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
395103b705cfSriastradh		v[6] = v[3] = op->base.dst.x + b->box.x1;
395203b705cfSriastradh		v[4] = v[1] = op->base.dst.y + b->box.y2;
395303b705cfSriastradh		v[7] = op->base.dst.y + b->box.y1;
395403b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
395503b705cfSriastradh
395603b705cfSriastradh		v += 9;
395703b705cfSriastradh		b++;
395803b705cfSriastradh	} while (--nbox);
395903b705cfSriastradh}
396003b705cfSriastradh
396103b705cfSriastradhfastcall static void
396203b705cfSriastradhgen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
396303b705cfSriastradh						       const struct sna_composite_spans_op *op,
396403b705cfSriastradh						       const BoxRec *box,
396503b705cfSriastradh						       float opacity)
396603b705cfSriastradh{
396703b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
396803b705cfSriastradh	sna->render.vertex_used += 9;
396903b705cfSriastradh
397003b705cfSriastradh	v[0] = box->x2;
397103b705cfSriastradh	v[6] = v[3] = box->x1;
397203b705cfSriastradh	v[4] = v[1] = box->y2;
397303b705cfSriastradh	v[7] = box->y1;
397403b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
397503b705cfSriastradh}
397603b705cfSriastradh
397703b705cfSriastradhfastcall static void
397803b705cfSriastradhgen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op,
397903b705cfSriastradh							      const struct sna_opacity_box *b,
398003b705cfSriastradh							      int nbox, float *v)
398103b705cfSriastradh{
398203b705cfSriastradh	do {
398303b705cfSriastradh		v[0] = b->box.x2;
398403b705cfSriastradh		v[6] = v[3] = b->box.x1;
398503b705cfSriastradh		v[4] = v[1] = b->box.y2;
398603b705cfSriastradh		v[7] = b->box.y1;
398703b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
398803b705cfSriastradh
398903b705cfSriastradh		v += 9;
399003b705cfSriastradh		b++;
399103b705cfSriastradh	} while (--nbox);
399203b705cfSriastradh}
399303b705cfSriastradh
399403b705cfSriastradhfastcall static void
399503b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
399603b705cfSriastradh						    const struct sna_composite_spans_op *op,
399703b705cfSriastradh						    const BoxRec *box,
399803b705cfSriastradh						    float opacity)
399903b705cfSriastradh{
400003b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
400103b705cfSriastradh	sna->render.vertex_used += 15;
400203b705cfSriastradh
400303b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
400403b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
400503b705cfSriastradh	v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
400603b705cfSriastradh	v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
400703b705cfSriastradh	v[4] = opacity;
400803b705cfSriastradh
400903b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
401003b705cfSriastradh	v[6] = v[1];
401103b705cfSriastradh	v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
401203b705cfSriastradh	v[8] = v[3];
401303b705cfSriastradh	v[9] = opacity;
401403b705cfSriastradh
401503b705cfSriastradh	v[10] = v[5];
401603b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
401703b705cfSriastradh	v[12] = v[7];
401803b705cfSriastradh	v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
401903b705cfSriastradh	v[14] = opacity;
402003b705cfSriastradh}
402103b705cfSriastradh
402203b705cfSriastradhfastcall static void
402303b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op,
402403b705cfSriastradh							   const struct sna_opacity_box *b,
402503b705cfSriastradh							   int nbox,
402603b705cfSriastradh							   float *v)
402703b705cfSriastradh{
402803b705cfSriastradh	do {
402903b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
403003b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
403103b705cfSriastradh		v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
403203b705cfSriastradh		v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
403303b705cfSriastradh		v[4] = b->alpha;
403403b705cfSriastradh
403503b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
403603b705cfSriastradh		v[6] = v[1];
403703b705cfSriastradh		v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
403803b705cfSriastradh		v[8] = v[3];
403903b705cfSriastradh		v[9] = b->alpha;
404003b705cfSriastradh
404103b705cfSriastradh		v[10] = v[5];
404203b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
404303b705cfSriastradh		v[12] = v[7];
404403b705cfSriastradh		v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
404503b705cfSriastradh		v[14] = b->alpha;
404603b705cfSriastradh
404703b705cfSriastradh		v += 15;
404803b705cfSriastradh		b++;
404903b705cfSriastradh	} while (--nbox);
405003b705cfSriastradh}
405103b705cfSriastradh
405203b705cfSriastradhfastcall static void
405303b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
405403b705cfSriastradh						  const struct sna_composite_spans_op *op,
405503b705cfSriastradh						  const BoxRec *box,
405603b705cfSriastradh						  float opacity)
405703b705cfSriastradh{
405803b705cfSriastradh	PictTransform *transform = op->base.src.transform;
405903b705cfSriastradh	float *v;
406003b705cfSriastradh
406103b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
406203b705cfSriastradh	sna->render.vertex_used += 15;
406303b705cfSriastradh
406403b705cfSriastradh	v[0]  = op->base.dst.x + box->x2;
406503b705cfSriastradh	v[6]  = v[1] = op->base.dst.y + box->y2;
406603b705cfSriastradh	v[10] = v[5] = op->base.dst.x + box->x1;
406703b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
406803b705cfSriastradh	v[14] = v[9] = v[4]  = opacity;
406903b705cfSriastradh
407003b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
407103b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
407203b705cfSriastradh				    transform, op->base.src.scale,
407303b705cfSriastradh				    &v[2], &v[3]);
407403b705cfSriastradh
407503b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
407603b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
407703b705cfSriastradh				    transform, op->base.src.scale,
407803b705cfSriastradh				    &v[7], &v[8]);
407903b705cfSriastradh
408003b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
408103b705cfSriastradh				    (int)op->base.src.offset[1] + box->y1,
408203b705cfSriastradh				    transform, op->base.src.scale,
408303b705cfSriastradh				    &v[12], &v[13]);
408403b705cfSriastradh}
408503b705cfSriastradh
408603b705cfSriastradhfastcall static void
408703b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op,
408803b705cfSriastradh							 const struct sna_opacity_box *b,
408903b705cfSriastradh							 int nbox,
409003b705cfSriastradh							 float *v)
409103b705cfSriastradh{
409203b705cfSriastradh	PictTransform *transform = op->base.src.transform;
409303b705cfSriastradh
409403b705cfSriastradh	do {
409503b705cfSriastradh		v[0]  = op->base.dst.x + b->box.x2;
409603b705cfSriastradh		v[6]  = v[1] = op->base.dst.y + b->box.y2;
409703b705cfSriastradh		v[10] = v[5] = op->base.dst.x + b->box.x1;
409803b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
409903b705cfSriastradh		v[14] = v[9] = v[4]  = b->alpha;
410003b705cfSriastradh
410103b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
410203b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
410303b705cfSriastradh					    transform, op->base.src.scale,
410403b705cfSriastradh					    &v[2], &v[3]);
410503b705cfSriastradh
410603b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
410703b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
410803b705cfSriastradh					    transform, op->base.src.scale,
410903b705cfSriastradh					    &v[7], &v[8]);
411003b705cfSriastradh
411103b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
411203b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y1,
411303b705cfSriastradh					    transform, op->base.src.scale,
411403b705cfSriastradh					    &v[12], &v[13]);
411503b705cfSriastradh		v += 15;
411603b705cfSriastradh		b++;
411703b705cfSriastradh	} while (--nbox);
411803b705cfSriastradh}
411903b705cfSriastradh
412003b705cfSriastradhfastcall static void
412103b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
412203b705cfSriastradh						      const struct sna_composite_spans_op *op,
412303b705cfSriastradh						      const BoxRec *box,
412403b705cfSriastradh						      float opacity)
412503b705cfSriastradh{
412603b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
412703b705cfSriastradh	sna->render.vertex_used += 15;
412803b705cfSriastradh
412903b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
413003b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
413103b705cfSriastradh	v[2] = op->base.src.offset[0] + box->x2;
413203b705cfSriastradh	v[3] = op->base.src.offset[1] + box->y2;
413303b705cfSriastradh	v[4] = opacity;
413403b705cfSriastradh
413503b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
413603b705cfSriastradh	v[6] = v[1];
413703b705cfSriastradh	v[7] = op->base.src.offset[0] + box->x1;
413803b705cfSriastradh	v[8] = v[3];
413903b705cfSriastradh	v[9] = opacity;
414003b705cfSriastradh
414103b705cfSriastradh	v[10] = v[5];
414203b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
414303b705cfSriastradh	v[12] = v[7];
414403b705cfSriastradh	v[13] = op->base.src.offset[1] + box->y1;
414503b705cfSriastradh	v[14] = opacity;
414603b705cfSriastradh}
414703b705cfSriastradh
414803b705cfSriastradhfastcall static void
414903b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op,
415003b705cfSriastradh							     const struct sna_opacity_box *b,
415103b705cfSriastradh							     int nbox,
415203b705cfSriastradh							     float *v)
415303b705cfSriastradh{
415403b705cfSriastradh	do {
415503b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
415603b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
415703b705cfSriastradh		v[2] = op->base.src.offset[0] + b->box.x2;
415803b705cfSriastradh		v[3] = op->base.src.offset[1] + b->box.y2;
415903b705cfSriastradh		v[4] = b->alpha;
416003b705cfSriastradh
416103b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
416203b705cfSriastradh		v[6] = v[1];
416303b705cfSriastradh		v[7] = op->base.src.offset[0] + b->box.x1;
416403b705cfSriastradh		v[8] = v[3];
416503b705cfSriastradh		v[9] = b->alpha;
416603b705cfSriastradh
416703b705cfSriastradh		v[10] = v[5];
416803b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
416903b705cfSriastradh		v[12] = v[7];
417003b705cfSriastradh		v[13] = op->base.src.offset[1] + b->box.y1;
417103b705cfSriastradh		v[14] = b->alpha;
417203b705cfSriastradh
417303b705cfSriastradh		v += 15;
417403b705cfSriastradh		b++;
417503b705cfSriastradh	} while (--nbox);
417603b705cfSriastradh}
417703b705cfSriastradh
417803b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
417903b705cfSriastradhsse2 fastcall static void
418003b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
418103b705cfSriastradh						   const struct sna_composite_spans_op *op,
418203b705cfSriastradh						   const BoxRec *box,
418303b705cfSriastradh						   float opacity)
418403b705cfSriastradh{
418503b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
418603b705cfSriastradh	sna->render.vertex_used += 9;
418703b705cfSriastradh
418803b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
418903b705cfSriastradh	v[6] = v[3] = op->base.dst.x + box->x1;
419003b705cfSriastradh	v[4] = v[1] = op->base.dst.y + box->y2;
419103b705cfSriastradh	v[7] = op->base.dst.y + box->y1;
419203b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
419303b705cfSriastradh}
419403b705cfSriastradh
419503b705cfSriastradhsse2 fastcall static void
419603b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2__boxes(const struct sna_composite_spans_op *op,
419703b705cfSriastradh							  const struct sna_opacity_box *b,
419803b705cfSriastradh							  int nbox,
419903b705cfSriastradh							  float *v)
420003b705cfSriastradh{
420103b705cfSriastradh	do {
420203b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
420303b705cfSriastradh		v[6] = v[3] = op->base.dst.x + b->box.x1;
420403b705cfSriastradh		v[4] = v[1] = op->base.dst.y + b->box.y2;
420503b705cfSriastradh		v[7] = op->base.dst.y + b->box.y1;
420603b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
420703b705cfSriastradh
420803b705cfSriastradh		v += 9;
420903b705cfSriastradh		b++;
421003b705cfSriastradh	} while (--nbox);
421103b705cfSriastradh}
421203b705cfSriastradh
421303b705cfSriastradhsse2 fastcall static void
421403b705cfSriastradhgen3_render_composite_spans_constant_box__sse2(struct sna *sna,
421503b705cfSriastradh					       const struct sna_composite_spans_op *op,
421603b705cfSriastradh					       const BoxRec *box, float opacity)
421703b705cfSriastradh{
421803b705cfSriastradh	float *v;
421903b705cfSriastradh	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
422003b705cfSriastradh	     __FUNCTION__,
422103b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
422203b705cfSriastradh	     opacity,
422303b705cfSriastradh	     op->base.dst.x, op->base.dst.y,
422403b705cfSriastradh	     box->x1, box->y1,
422503b705cfSriastradh	     box->x2 - box->x1,
422603b705cfSriastradh	     box->y2 - box->y1));
422703b705cfSriastradh
422803b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
422903b705cfSriastradh
423003b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
423103b705cfSriastradh	sna->render.vertex_used += 9;
423203b705cfSriastradh
423303b705cfSriastradh	v[0] = box->x2;
423403b705cfSriastradh	v[6] = v[3] = box->x1;
423503b705cfSriastradh	v[4] = v[1] = box->y2;
423603b705cfSriastradh	v[7] = box->y1;
423703b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
423803b705cfSriastradh}
423903b705cfSriastradh
424003b705cfSriastradhsse2 fastcall static void
424103b705cfSriastradhgen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna,
424203b705cfSriastradh							 const struct sna_composite_spans_op *op,
424303b705cfSriastradh							 const struct sna_opacity_box *box,
424403b705cfSriastradh							 int nbox)
424503b705cfSriastradh{
424603b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
424703b705cfSriastradh	     __FUNCTION__, nbox,
424803b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
424903b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
425003b705cfSriastradh
425103b705cfSriastradh	sna_vertex_lock(&sna->render);
425203b705cfSriastradh	do {
425303b705cfSriastradh		int nbox_this_time;
425403b705cfSriastradh		float *v;
425503b705cfSriastradh
425603b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
425703b705cfSriastradh		assert(nbox_this_time);
425803b705cfSriastradh		nbox -= nbox_this_time;
425903b705cfSriastradh
426003b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
426103b705cfSriastradh		sna->render.vertex_used += nbox_this_time * 9;
426203b705cfSriastradh
426303b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
426403b705cfSriastradh		sna_vertex_unlock(&sna->render);
426503b705cfSriastradh
426603b705cfSriastradh		do {
426703b705cfSriastradh			v[0] = box->box.x2;
426803b705cfSriastradh			v[6] = v[3] = box->box.x1;
426903b705cfSriastradh			v[4] = v[1] = box->box.y2;
427003b705cfSriastradh			v[7] = box->box.y1;
427103b705cfSriastradh			v[8] = v[5] = v[2] = box->alpha;
427203b705cfSriastradh			v += 9;
427303b705cfSriastradh			box++;
427403b705cfSriastradh		} while (--nbox_this_time);
427503b705cfSriastradh
427603b705cfSriastradh		sna_vertex_lock(&sna->render);
427703b705cfSriastradh		sna_vertex_release__locked(&sna->render);
427803b705cfSriastradh	} while (nbox);
427903b705cfSriastradh	sna_vertex_unlock(&sna->render);
428003b705cfSriastradh}
428103b705cfSriastradh
428203b705cfSriastradhsse2 fastcall static void
428303b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna,
428403b705cfSriastradh							      const struct sna_composite_spans_op *op,
428503b705cfSriastradh							      const BoxRec *box,
428603b705cfSriastradh							      float opacity)
428703b705cfSriastradh{
428803b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
428903b705cfSriastradh	sna->render.vertex_used += 9;
429003b705cfSriastradh
429103b705cfSriastradh	v[0] = box->x2;
429203b705cfSriastradh	v[6] = v[3] = box->x1;
429303b705cfSriastradh	v[4] = v[1] = box->y2;
429403b705cfSriastradh	v[7] = box->y1;
429503b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
429603b705cfSriastradh}
429703b705cfSriastradh
429803b705cfSriastradhsse2 fastcall static void
429903b705cfSriastradhgen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes(const struct sna_composite_spans_op *op,
430003b705cfSriastradh								     const struct sna_opacity_box *b,
430103b705cfSriastradh								     int nbox, float *v)
430203b705cfSriastradh{
430303b705cfSriastradh	do {
430403b705cfSriastradh		v[0] = b->box.x2;
430503b705cfSriastradh		v[6] = v[3] = b->box.x1;
430603b705cfSriastradh		v[4] = v[1] = b->box.y2;
430703b705cfSriastradh		v[7] = b->box.y1;
430803b705cfSriastradh		v[8] = v[5] = v[2] = b->alpha;
430903b705cfSriastradh
431003b705cfSriastradh		v += 9;
431103b705cfSriastradh		b++;
431203b705cfSriastradh	} while (--nbox);
431303b705cfSriastradh}
431403b705cfSriastradh
431503b705cfSriastradhsse2 fastcall static void
431603b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna,
431703b705cfSriastradh							  const struct sna_composite_spans_op *op,
431803b705cfSriastradh							  const BoxRec *box,
431903b705cfSriastradh							  float opacity)
432003b705cfSriastradh{
432103b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
432203b705cfSriastradh	sna->render.vertex_used += 15;
432303b705cfSriastradh
432403b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
432503b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
432603b705cfSriastradh	v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
432703b705cfSriastradh	v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
432803b705cfSriastradh	v[4] = opacity;
432903b705cfSriastradh
433003b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
433103b705cfSriastradh	v[6] = v[1];
433203b705cfSriastradh	v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
433303b705cfSriastradh	v[8] = v[3];
433403b705cfSriastradh	v[9] = opacity;
433503b705cfSriastradh
433603b705cfSriastradh	v[10] = v[5];
433703b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
433803b705cfSriastradh	v[12] = v[7];
433903b705cfSriastradh	v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
434003b705cfSriastradh	v[14] = opacity;
434103b705cfSriastradh}
434203b705cfSriastradh
434303b705cfSriastradhsse2 fastcall static void
434403b705cfSriastradhgen3_emit_composite_spans_primitive_identity_source__sse2__boxes(const struct sna_composite_spans_op *op,
434503b705cfSriastradh								 const struct sna_opacity_box *b,
434603b705cfSriastradh								 int nbox,
434703b705cfSriastradh								 float *v)
434803b705cfSriastradh{
434903b705cfSriastradh	do {
435003b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
435103b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
435203b705cfSriastradh		v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
435303b705cfSriastradh		v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
435403b705cfSriastradh		v[4] = b->alpha;
435503b705cfSriastradh
435603b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
435703b705cfSriastradh		v[6] = v[1];
435803b705cfSriastradh		v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
435903b705cfSriastradh		v[8] = v[3];
436003b705cfSriastradh		v[9] = b->alpha;
436103b705cfSriastradh
436203b705cfSriastradh		v[10] = v[5];
436303b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
436403b705cfSriastradh		v[12] = v[7];
436503b705cfSriastradh		v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
436603b705cfSriastradh		v[14] = b->alpha;
436703b705cfSriastradh
436803b705cfSriastradh		v += 15;
436903b705cfSriastradh		b++;
437003b705cfSriastradh	} while (--nbox);
437103b705cfSriastradh}
437203b705cfSriastradhsse2 fastcall static void
437303b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna,
437403b705cfSriastradh							const struct sna_composite_spans_op *op,
437503b705cfSriastradh							const BoxRec *box,
437603b705cfSriastradh							float opacity)
437703b705cfSriastradh{
437803b705cfSriastradh	PictTransform *transform = op->base.src.transform;
437903b705cfSriastradh	float *v;
438003b705cfSriastradh
438103b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
438203b705cfSriastradh	sna->render.vertex_used += 15;
438303b705cfSriastradh
438403b705cfSriastradh	v[0]  = op->base.dst.x + box->x2;
438503b705cfSriastradh	v[6]  = v[1] = op->base.dst.y + box->y2;
438603b705cfSriastradh	v[10] = v[5] = op->base.dst.x + box->x1;
438703b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
438803b705cfSriastradh	v[14] = v[9] = v[4]  = opacity;
438903b705cfSriastradh
439003b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
439103b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
439203b705cfSriastradh				    transform, op->base.src.scale,
439303b705cfSriastradh				    &v[2], &v[3]);
439403b705cfSriastradh
439503b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
439603b705cfSriastradh				    (int)op->base.src.offset[1] + box->y2,
439703b705cfSriastradh				    transform, op->base.src.scale,
439803b705cfSriastradh				    &v[7], &v[8]);
439903b705cfSriastradh
440003b705cfSriastradh	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
440103b705cfSriastradh				    (int)op->base.src.offset[1] + box->y1,
440203b705cfSriastradh				    transform, op->base.src.scale,
440303b705cfSriastradh				    &v[12], &v[13]);
440403b705cfSriastradh}
440503b705cfSriastradh
440603b705cfSriastradhsse2 fastcall static void
440703b705cfSriastradhgen3_emit_composite_spans_primitive_affine_source__sse2__boxes(const struct sna_composite_spans_op *op,
440803b705cfSriastradh							       const struct sna_opacity_box *b,
440903b705cfSriastradh							       int nbox,
441003b705cfSriastradh							       float *v)
441103b705cfSriastradh{
441203b705cfSriastradh	PictTransform *transform = op->base.src.transform;
441303b705cfSriastradh
441403b705cfSriastradh	do {
441503b705cfSriastradh		v[0]  = op->base.dst.x + b->box.x2;
441603b705cfSriastradh		v[6]  = v[1] = op->base.dst.y + b->box.y2;
441703b705cfSriastradh		v[10] = v[5] = op->base.dst.x + b->box.x1;
441803b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
441903b705cfSriastradh		v[14] = v[9] = v[4]  = b->alpha;
442003b705cfSriastradh
442103b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
442203b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
442303b705cfSriastradh					    transform, op->base.src.scale,
442403b705cfSriastradh					    &v[2], &v[3]);
442503b705cfSriastradh
442603b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
442703b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y2,
442803b705cfSriastradh					    transform, op->base.src.scale,
442903b705cfSriastradh					    &v[7], &v[8]);
443003b705cfSriastradh
443103b705cfSriastradh		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
443203b705cfSriastradh					    (int)op->base.src.offset[1] + b->box.y1,
443303b705cfSriastradh					    transform, op->base.src.scale,
443403b705cfSriastradh					    &v[12], &v[13]);
443503b705cfSriastradh		v += 15;
443603b705cfSriastradh		b++;
443703b705cfSriastradh	} while (--nbox);
443803b705cfSriastradh}
443903b705cfSriastradh
444003b705cfSriastradhsse2 fastcall static void
444103b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna,
444203b705cfSriastradh							    const struct sna_composite_spans_op *op,
444303b705cfSriastradh							    const BoxRec *box,
444403b705cfSriastradh							    float opacity)
444503b705cfSriastradh{
444603b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
444703b705cfSriastradh	sna->render.vertex_used += 15;
444803b705cfSriastradh
444903b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
445003b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
445103b705cfSriastradh	v[2] = op->base.src.offset[0] + box->x2;
445203b705cfSriastradh	v[3] = op->base.src.offset[1] + box->y2;
445303b705cfSriastradh	v[4] = opacity;
445403b705cfSriastradh
445503b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
445603b705cfSriastradh	v[6] = v[1];
445703b705cfSriastradh	v[7] = op->base.src.offset[0] + box->x1;
445803b705cfSriastradh	v[8] = v[3];
445903b705cfSriastradh	v[9] = opacity;
446003b705cfSriastradh
446103b705cfSriastradh	v[10] = v[5];
446203b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
446303b705cfSriastradh	v[12] = v[7];
446403b705cfSriastradh	v[13] = op->base.src.offset[1] + box->y1;
446503b705cfSriastradh	v[14] = opacity;
446603b705cfSriastradh}
446703b705cfSriastradh
446803b705cfSriastradhsse2 fastcall static void
446903b705cfSriastradhgen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
447003b705cfSriastradh								   const struct sna_opacity_box *b,
447103b705cfSriastradh								   int nbox,
447203b705cfSriastradh								   float *v)
447303b705cfSriastradh{
447403b705cfSriastradh	do {
447503b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
447603b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
447703b705cfSriastradh		v[2] = op->base.src.offset[0] + b->box.x2;
447803b705cfSriastradh		v[3] = op->base.src.offset[1] + b->box.y2;
447903b705cfSriastradh		v[4] = b->alpha;
448003b705cfSriastradh
448103b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
448203b705cfSriastradh		v[6] = v[1];
448303b705cfSriastradh		v[7] = op->base.src.offset[0] + b->box.x1;
448403b705cfSriastradh		v[8] = v[3];
448503b705cfSriastradh		v[9] = b->alpha;
448603b705cfSriastradh
448703b705cfSriastradh		v[10] = v[5];
448803b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
448903b705cfSriastradh		v[12] = v[7];
449003b705cfSriastradh		v[13] = op->base.src.offset[1] + b->box.y1;
449103b705cfSriastradh		v[14] = b->alpha;
449203b705cfSriastradh
449303b705cfSriastradh		v += 15;
449403b705cfSriastradh		b++;
449503b705cfSriastradh	} while (--nbox);
449603b705cfSriastradh}
449703b705cfSriastradh
449803b705cfSriastradhsse2 fastcall static void
449903b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna,
450003b705cfSriastradh							  const struct sna_composite_spans_op *op,
450103b705cfSriastradh							  const BoxRec *box,
450203b705cfSriastradh							  float opacity)
450303b705cfSriastradh{
450403b705cfSriastradh	PictTransform *transform = op->base.src.transform;
450503b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
450603b705cfSriastradh	sna->render.vertex_used += 15;
450703b705cfSriastradh
450803b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
450903b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
451003b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
451103b705cfSriastradh				    op->base.src.offset[1] + box->y2,
451203b705cfSriastradh				    transform, op->base.src.scale,
451303b705cfSriastradh				    &v[2], &v[3]);
451403b705cfSriastradh	v[4] = opacity;
451503b705cfSriastradh
451603b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
451703b705cfSriastradh	v[6] = v[1];
451803b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
451903b705cfSriastradh				    op->base.src.offset[1] + box->y2,
452003b705cfSriastradh				    transform, op->base.src.scale,
452103b705cfSriastradh				    &v[7], &v[8]);
452203b705cfSriastradh	v[9] = opacity;
452303b705cfSriastradh
452403b705cfSriastradh	v[10] = v[5];
452503b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
452603b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
452703b705cfSriastradh				    op->base.src.offset[1] + box->y1,
452803b705cfSriastradh				    transform, op->base.src.scale,
452903b705cfSriastradh				    &v[12], &v[13]);
453003b705cfSriastradh	v[14] = opacity;
453103b705cfSriastradh}
453203b705cfSriastradh
453303b705cfSriastradhsse2 fastcall static void
453403b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
453503b705cfSriastradh								 const struct sna_opacity_box *b,
453603b705cfSriastradh								 int nbox,
453703b705cfSriastradh								 float *v)
453803b705cfSriastradh{
453903b705cfSriastradh	PictTransform *transform = op->base.src.transform;
454003b705cfSriastradh
454103b705cfSriastradh	do {
454203b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
454303b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
454403b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
454503b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
454603b705cfSriastradh					    transform, op->base.src.scale,
454703b705cfSriastradh					    &v[2], &v[3]);
454803b705cfSriastradh		v[4] = b->alpha;
454903b705cfSriastradh
455003b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
455103b705cfSriastradh		v[6] = v[1];
455203b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
455303b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
455403b705cfSriastradh					    transform, op->base.src.scale,
455503b705cfSriastradh					    &v[7], &v[8]);
455603b705cfSriastradh		v[9] = b->alpha;
455703b705cfSriastradh
455803b705cfSriastradh		v[10] = v[5];
455903b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
456003b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
456103b705cfSriastradh					    op->base.src.offset[1] + b->box.y1,
456203b705cfSriastradh					    transform, op->base.src.scale,
456303b705cfSriastradh					    &v[12], &v[13]);
456403b705cfSriastradh		v[14] = b->alpha;
456503b705cfSriastradh		v += 15;
456603b705cfSriastradh		b++;
456703b705cfSriastradh	} while (--nbox);
456803b705cfSriastradh}
456903b705cfSriastradh#endif
457003b705cfSriastradh
457103b705cfSriastradhfastcall static void
457203b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
457303b705cfSriastradh						    const struct sna_composite_spans_op *op,
457403b705cfSriastradh						    const BoxRec *box,
457503b705cfSriastradh						    float opacity)
457603b705cfSriastradh{
457703b705cfSriastradh	PictTransform *transform = op->base.src.transform;
457803b705cfSriastradh	float *v = sna->render.vertices + sna->render.vertex_used;
457903b705cfSriastradh	sna->render.vertex_used += 15;
458003b705cfSriastradh
458103b705cfSriastradh	v[0] = op->base.dst.x + box->x2;
458203b705cfSriastradh	v[1] = op->base.dst.y + box->y2;
458303b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
458403b705cfSriastradh				    op->base.src.offset[1] + box->y2,
458503b705cfSriastradh				    transform, op->base.src.scale,
458603b705cfSriastradh				    &v[2], &v[3]);
458703b705cfSriastradh	v[4] = opacity;
458803b705cfSriastradh
458903b705cfSriastradh	v[5] = op->base.dst.x + box->x1;
459003b705cfSriastradh	v[6] = v[1];
459103b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
459203b705cfSriastradh				    op->base.src.offset[1] + box->y2,
459303b705cfSriastradh				    transform, op->base.src.scale,
459403b705cfSriastradh				    &v[7], &v[8]);
459503b705cfSriastradh	v[9] = opacity;
459603b705cfSriastradh
459703b705cfSriastradh	v[10] = v[5];
459803b705cfSriastradh	v[11] = op->base.dst.y + box->y1;
459903b705cfSriastradh	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
460003b705cfSriastradh				    op->base.src.offset[1] + box->y1,
460103b705cfSriastradh				    transform, op->base.src.scale,
460203b705cfSriastradh				    &v[12], &v[13]);
460303b705cfSriastradh	v[14] = opacity;
460403b705cfSriastradh}
460503b705cfSriastradh
460603b705cfSriastradhfastcall static void
460703b705cfSriastradhgen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op,
460803b705cfSriastradh							   const struct sna_opacity_box *b,
460903b705cfSriastradh							   int nbox,
461003b705cfSriastradh							   float *v)
461103b705cfSriastradh{
461203b705cfSriastradh	PictTransform *transform = op->base.src.transform;
461303b705cfSriastradh
461403b705cfSriastradh	do {
461503b705cfSriastradh		v[0] = op->base.dst.x + b->box.x2;
461603b705cfSriastradh		v[1] = op->base.dst.y + b->box.y2;
461703b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
461803b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
461903b705cfSriastradh					    transform, op->base.src.scale,
462003b705cfSriastradh					    &v[2], &v[3]);
462103b705cfSriastradh		v[4] = b->alpha;
462203b705cfSriastradh
462303b705cfSriastradh		v[5] = op->base.dst.x + b->box.x1;
462403b705cfSriastradh		v[6] = v[1];
462503b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
462603b705cfSriastradh					    op->base.src.offset[1] + b->box.y2,
462703b705cfSriastradh					    transform, op->base.src.scale,
462803b705cfSriastradh					    &v[7], &v[8]);
462903b705cfSriastradh		v[9] = b->alpha;
463003b705cfSriastradh
463103b705cfSriastradh		v[10] = v[5];
463203b705cfSriastradh		v[11] = op->base.dst.y + b->box.y1;
463303b705cfSriastradh		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
463403b705cfSriastradh					    op->base.src.offset[1] + b->box.y1,
463503b705cfSriastradh					    transform, op->base.src.scale,
463603b705cfSriastradh					    &v[12], &v[13]);
463703b705cfSriastradh		v[14] = b->alpha;
463803b705cfSriastradh		v += 15;
463903b705cfSriastradh		b++;
464003b705cfSriastradh	} while (--nbox);
464103b705cfSriastradh}
464203b705cfSriastradh
464303b705cfSriastradhfastcall static void
464403b705cfSriastradhgen3_emit_composite_spans_primitive(struct sna *sna,
464503b705cfSriastradh				    const struct sna_composite_spans_op *op,
464603b705cfSriastradh				    const BoxRec *box,
464703b705cfSriastradh				    float opacity)
464803b705cfSriastradh{
464903b705cfSriastradh	gen3_emit_composite_spans_vertex(sna, op,
465003b705cfSriastradh					 box->x2, box->y2,
465103b705cfSriastradh					 opacity);
465203b705cfSriastradh	gen3_emit_composite_spans_vertex(sna, op,
465303b705cfSriastradh					 box->x1, box->y2,
465403b705cfSriastradh					 opacity);
465503b705cfSriastradh	gen3_emit_composite_spans_vertex(sna, op,
465603b705cfSriastradh					 box->x1, box->y1,
465703b705cfSriastradh					 opacity);
465803b705cfSriastradh}
465903b705cfSriastradh
466003b705cfSriastradhfastcall static void
466103b705cfSriastradhgen3_render_composite_spans_constant_box(struct sna *sna,
466203b705cfSriastradh					 const struct sna_composite_spans_op *op,
466303b705cfSriastradh					 const BoxRec *box, float opacity)
466403b705cfSriastradh{
466503b705cfSriastradh	float *v;
466603b705cfSriastradh	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
466703b705cfSriastradh	     __FUNCTION__,
466803b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
466903b705cfSriastradh	     opacity,
467003b705cfSriastradh	     op->base.dst.x, op->base.dst.y,
467103b705cfSriastradh	     box->x1, box->y1,
467203b705cfSriastradh	     box->x2 - box->x1,
467303b705cfSriastradh	     box->y2 - box->y1));
467403b705cfSriastradh
467503b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
467603b705cfSriastradh
467703b705cfSriastradh	v = sna->render.vertices + sna->render.vertex_used;
467803b705cfSriastradh	sna->render.vertex_used += 9;
467903b705cfSriastradh
468003b705cfSriastradh	v[0] = box->x2;
468103b705cfSriastradh	v[6] = v[3] = box->x1;
468203b705cfSriastradh	v[4] = v[1] = box->y2;
468303b705cfSriastradh	v[7] = box->y1;
468403b705cfSriastradh	v[8] = v[5] = v[2] = opacity;
468503b705cfSriastradh}
468603b705cfSriastradh
468703b705cfSriastradhfastcall static void
468803b705cfSriastradhgen3_render_composite_spans_constant_thread_boxes(struct sna *sna,
468903b705cfSriastradh						  const struct sna_composite_spans_op *op,
469003b705cfSriastradh						  const struct sna_opacity_box *box,
469103b705cfSriastradh						  int nbox)
469203b705cfSriastradh{
469303b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
469403b705cfSriastradh	     __FUNCTION__, nbox,
469503b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
469603b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
469703b705cfSriastradh
469803b705cfSriastradh	sna_vertex_lock(&sna->render);
469903b705cfSriastradh	do {
470003b705cfSriastradh		int nbox_this_time;
470103b705cfSriastradh		float *v;
470203b705cfSriastradh
470303b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
470403b705cfSriastradh		assert(nbox_this_time);
470503b705cfSriastradh		nbox -= nbox_this_time;
470603b705cfSriastradh
470703b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
470803b705cfSriastradh		sna->render.vertex_used += nbox_this_time * 9;
470903b705cfSriastradh
471003b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
471103b705cfSriastradh		sna_vertex_unlock(&sna->render);
471203b705cfSriastradh
471303b705cfSriastradh		do {
471403b705cfSriastradh			v[0] = box->box.x2;
471503b705cfSriastradh			v[6] = v[3] = box->box.x1;
471603b705cfSriastradh			v[4] = v[1] = box->box.y2;
471703b705cfSriastradh			v[7] = box->box.y1;
471803b705cfSriastradh			v[8] = v[5] = v[2] = box->alpha;
471903b705cfSriastradh			v += 9;
472003b705cfSriastradh			box++;
472103b705cfSriastradh		} while (--nbox_this_time);
472203b705cfSriastradh
472303b705cfSriastradh		sna_vertex_lock(&sna->render);
472403b705cfSriastradh		sna_vertex_release__locked(&sna->render);
472503b705cfSriastradh	} while (nbox);
472603b705cfSriastradh	sna_vertex_unlock(&sna->render);
472703b705cfSriastradh}
472803b705cfSriastradh
472903b705cfSriastradhfastcall static void
473003b705cfSriastradhgen3_render_composite_spans_box(struct sna *sna,
473103b705cfSriastradh				const struct sna_composite_spans_op *op,
473203b705cfSriastradh				const BoxRec *box, float opacity)
473303b705cfSriastradh{
473403b705cfSriastradh	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
473503b705cfSriastradh	     __FUNCTION__,
473603b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
473703b705cfSriastradh	     opacity,
473803b705cfSriastradh	     op->base.dst.x, op->base.dst.y,
473903b705cfSriastradh	     box->x1, box->y1,
474003b705cfSriastradh	     box->x2 - box->x1,
474103b705cfSriastradh	     box->y2 - box->y1));
474203b705cfSriastradh
474303b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
474403b705cfSriastradh	op->prim_emit(sna, op, box, opacity);
474503b705cfSriastradh}
474603b705cfSriastradh
474703b705cfSriastradhstatic void
474803b705cfSriastradhgen3_render_composite_spans_boxes(struct sna *sna,
474903b705cfSriastradh				  const struct sna_composite_spans_op *op,
475003b705cfSriastradh				  const BoxRec *box, int nbox,
475103b705cfSriastradh				  float opacity)
475203b705cfSriastradh{
475303b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
475403b705cfSriastradh	     __FUNCTION__, nbox,
475503b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
475603b705cfSriastradh	     opacity,
475703b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
475803b705cfSriastradh
475903b705cfSriastradh	do {
476003b705cfSriastradh		int nbox_this_time;
476103b705cfSriastradh
476203b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
476303b705cfSriastradh		nbox -= nbox_this_time;
476403b705cfSriastradh
476503b705cfSriastradh		do {
476603b705cfSriastradh			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
476703b705cfSriastradh			     box->x1, box->y1,
476803b705cfSriastradh			     box->x2 - box->x1,
476903b705cfSriastradh			     box->y2 - box->y1));
477003b705cfSriastradh
477103b705cfSriastradh			op->prim_emit(sna, op, box++, opacity);
477203b705cfSriastradh		} while (--nbox_this_time);
477303b705cfSriastradh	} while (nbox);
477403b705cfSriastradh}
477503b705cfSriastradh
477603b705cfSriastradhfastcall static void
477703b705cfSriastradhgen3_render_composite_spans_boxes__thread(struct sna *sna,
477803b705cfSriastradh					  const struct sna_composite_spans_op *op,
477903b705cfSriastradh					  const struct sna_opacity_box *box,
478003b705cfSriastradh					  int nbox)
478103b705cfSriastradh{
478203b705cfSriastradh	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
478303b705cfSriastradh	     __FUNCTION__, nbox,
478403b705cfSriastradh	     op->base.src.offset[0], op->base.src.offset[1],
478503b705cfSriastradh	     op->base.dst.x, op->base.dst.y));
478603b705cfSriastradh
478703b705cfSriastradh	sna_vertex_lock(&sna->render);
478803b705cfSriastradh	do {
478903b705cfSriastradh		int nbox_this_time;
479003b705cfSriastradh		float *v;
479103b705cfSriastradh
479203b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
479303b705cfSriastradh		assert(nbox_this_time);
479403b705cfSriastradh		nbox -= nbox_this_time;
479503b705cfSriastradh
479603b705cfSriastradh		v = sna->render.vertices + sna->render.vertex_used;
479703b705cfSriastradh		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
479803b705cfSriastradh
479903b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
480003b705cfSriastradh		sna_vertex_unlock(&sna->render);
480103b705cfSriastradh
480203b705cfSriastradh		op->emit_boxes(op, box, nbox_this_time, v);
480303b705cfSriastradh		box += nbox_this_time;
480403b705cfSriastradh
480503b705cfSriastradh		sna_vertex_lock(&sna->render);
480603b705cfSriastradh		sna_vertex_release__locked(&sna->render);
480703b705cfSriastradh	} while (nbox);
480803b705cfSriastradh	sna_vertex_unlock(&sna->render);
480903b705cfSriastradh}
481003b705cfSriastradh
481103b705cfSriastradhfastcall static void
481203b705cfSriastradhgen3_render_composite_spans_done(struct sna *sna,
481303b705cfSriastradh				 const struct sna_composite_spans_op *op)
481403b705cfSriastradh{
481503b705cfSriastradh	if (sna->render.vertex_offset)
481603b705cfSriastradh		gen3_vertex_flush(sna);
481703b705cfSriastradh
481803b705cfSriastradh	DBG(("%s()\n", __FUNCTION__));
481903b705cfSriastradh
482003b705cfSriastradh	if (op->base.src.bo)
482103b705cfSriastradh		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
482203b705cfSriastradh
482303b705cfSriastradh	sna_render_composite_redirect_done(sna, &op->base);
482403b705cfSriastradh}
482503b705cfSriastradh
482603b705cfSriastradhstatic bool
482703b705cfSriastradhgen3_check_composite_spans(struct sna *sna,
482803b705cfSriastradh			   uint8_t op, PicturePtr src, PicturePtr dst,
482903b705cfSriastradh			   int16_t width, int16_t height, unsigned flags)
483003b705cfSriastradh{
483103b705cfSriastradh	if (op >= ARRAY_SIZE(gen3_blend_op))
483203b705cfSriastradh		return false;
483303b705cfSriastradh
483403b705cfSriastradh	if (gen3_composite_fallback(sna, op, src, NULL, dst))
483503b705cfSriastradh		return false;
483603b705cfSriastradh
483703b705cfSriastradh	if (need_tiling(sna, width, height) &&
483803b705cfSriastradh	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
483903b705cfSriastradh		DBG(("%s: fallback, tiled operation not on GPU\n",
484003b705cfSriastradh		     __FUNCTION__));
484103b705cfSriastradh		return false;
484203b705cfSriastradh	}
484303b705cfSriastradh
484403b705cfSriastradh	return true;
484503b705cfSriastradh}
484603b705cfSriastradh
484703b705cfSriastradhstatic bool
484803b705cfSriastradhgen3_render_composite_spans(struct sna *sna,
484903b705cfSriastradh			    uint8_t op,
485003b705cfSriastradh			    PicturePtr src,
485103b705cfSriastradh			    PicturePtr dst,
485203b705cfSriastradh			    int16_t src_x,  int16_t src_y,
485303b705cfSriastradh			    int16_t dst_x,  int16_t dst_y,
485403b705cfSriastradh			    int16_t width,  int16_t height,
485503b705cfSriastradh			    unsigned flags,
485603b705cfSriastradh			    struct sna_composite_spans_op *tmp)
485703b705cfSriastradh{
485803b705cfSriastradh	bool no_offset;
485903b705cfSriastradh
486003b705cfSriastradh	DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
486103b705cfSriastradh	     src_x, src_y, dst_x, dst_y, width, height));
486203b705cfSriastradh
486303b705cfSriastradh	assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags));
486403b705cfSriastradh
486503b705cfSriastradh	if (need_tiling(sna, width, height)) {
486603b705cfSriastradh		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
486703b705cfSriastradh		     __FUNCTION__, width, height));
486803b705cfSriastradh		return sna_tiling_composite_spans(op, src, dst,
486903b705cfSriastradh						  src_x, src_y, dst_x, dst_y,
487003b705cfSriastradh						  width, height, flags, tmp);
487103b705cfSriastradh	}
487203b705cfSriastradh
487303b705cfSriastradh	if (!gen3_composite_set_target(sna, &tmp->base, dst,
487442542f5fSchristos				       dst_x, dst_y, width, height,
487542542f5fSchristos				       true)) {
487603b705cfSriastradh		DBG(("%s: unable to set render target\n",
487703b705cfSriastradh		     __FUNCTION__));
487803b705cfSriastradh		return false;
487903b705cfSriastradh	}
488003b705cfSriastradh
488103b705cfSriastradh	tmp->base.op = op;
488203b705cfSriastradh	tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
488303b705cfSriastradh	tmp->base.src.u.gen3.type = SHADER_TEXTURE;
488403b705cfSriastradh	tmp->base.src.is_affine = true;
488503b705cfSriastradh	DBG(("%s: preparing source\n", __FUNCTION__));
488603b705cfSriastradh	switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src,
488703b705cfSriastradh				       src_x, src_y,
488803b705cfSriastradh				       width, height,
488903b705cfSriastradh				       dst_x, dst_y,
489003b705cfSriastradh				       dst->polyMode == PolyModePrecise)) {
489103b705cfSriastradh	case -1:
489203b705cfSriastradh		goto cleanup_dst;
489303b705cfSriastradh	case 0:
489403b705cfSriastradh		tmp->base.src.u.gen3.type = SHADER_ZERO;
489503b705cfSriastradh		break;
489603b705cfSriastradh	case 1:
489703b705cfSriastradh		gen3_composite_channel_convert(&tmp->base.src);
489803b705cfSriastradh		break;
489903b705cfSriastradh	}
490003b705cfSriastradh	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type));
490103b705cfSriastradh
490203b705cfSriastradh	if (tmp->base.src.u.gen3.type != SHADER_ZERO)
490303b705cfSriastradh		tmp->base.mask.u.gen3.type = SHADER_OPACITY;
490403b705cfSriastradh
490503b705cfSriastradh	no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
490603b705cfSriastradh	tmp->box   = gen3_render_composite_spans_box;
490703b705cfSriastradh	tmp->boxes = gen3_render_composite_spans_boxes;
490803b705cfSriastradh	tmp->thread_boxes = gen3_render_composite_spans_boxes__thread;
490903b705cfSriastradh	tmp->done  = gen3_render_composite_spans_done;
491003b705cfSriastradh	tmp->prim_emit = gen3_emit_composite_spans_primitive;
491103b705cfSriastradh	switch (tmp->base.src.u.gen3.type) {
491203b705cfSriastradh	case SHADER_NONE:
491303b705cfSriastradh		assert(0);
491403b705cfSriastradh	case SHADER_ZERO:
491503b705cfSriastradh		if (no_offset) {
491603b705cfSriastradh			tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset;
491703b705cfSriastradh			tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes;
491803b705cfSriastradh		} else {
491903b705cfSriastradh			tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
492003b705cfSriastradh			tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes;
492103b705cfSriastradh		}
492203b705cfSriastradh		break;
492303b705cfSriastradh	case SHADER_BLACK:
492403b705cfSriastradh	case SHADER_WHITE:
492503b705cfSriastradh	case SHADER_CONSTANT:
492603b705cfSriastradh		if (no_offset) {
492703b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
492803b705cfSriastradh			if (sna->cpu_features & SSE2) {
492903b705cfSriastradh				tmp->box = gen3_render_composite_spans_constant_box__sse2;
493003b705cfSriastradh				tmp->thread_boxes = gen3_render_composite_spans_constant_thread__sse2__boxes;
493103b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2__no_offset;
493203b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes;
493303b705cfSriastradh			} else
493403b705cfSriastradh#endif
493503b705cfSriastradh			{
493603b705cfSriastradh				tmp->box = gen3_render_composite_spans_constant_box;
493703b705cfSriastradh				tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes;
493803b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
493903b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes;
494003b705cfSriastradh			}
494103b705cfSriastradh		} else {
494203b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
494303b705cfSriastradh			if (sna->cpu_features & SSE2) {
494403b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2;
494503b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__boxes;
494603b705cfSriastradh			} else
494703b705cfSriastradh#endif
494803b705cfSriastradh			{
494903b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
495003b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes;
495103b705cfSriastradh			}
495203b705cfSriastradh		}
495303b705cfSriastradh		break;
495403b705cfSriastradh	case SHADER_LINEAR:
495503b705cfSriastradh	case SHADER_RADIAL:
495603b705cfSriastradh		if (tmp->base.src.transform == NULL) {
495703b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
495803b705cfSriastradh			if (sna->cpu_features & SSE2) {
495903b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient__sse2;
496003b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes;
496103b705cfSriastradh			} else
496203b705cfSriastradh#endif
496303b705cfSriastradh			{
496403b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
496503b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes;
496603b705cfSriastradh			}
496703b705cfSriastradh		} else if (tmp->base.src.is_affine) {
496803b705cfSriastradh			tmp->base.src.scale[1] = tmp->base.src.scale[0] = 1. / tmp->base.src.transform->matrix[2][2];
496903b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
497003b705cfSriastradh			if (sna->cpu_features & SSE2) {
497103b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient__sse2;
497203b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes;
497303b705cfSriastradh			} else
497403b705cfSriastradh#endif
497503b705cfSriastradh			{
497603b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
497703b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes;
497803b705cfSriastradh			}
497903b705cfSriastradh		}
498003b705cfSriastradh		break;
498103b705cfSriastradh	case SHADER_TEXTURE:
498203b705cfSriastradh		if (tmp->base.src.transform == NULL) {
498303b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
498403b705cfSriastradh			if (sna->cpu_features & SSE2) {
498503b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source__sse2;
498603b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__sse2__boxes;
498703b705cfSriastradh			} else
498803b705cfSriastradh#endif
498903b705cfSriastradh			{
499003b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
499103b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes;
499203b705cfSriastradh			}
499303b705cfSriastradh		} else if (tmp->base.src.is_affine) {
499403b705cfSriastradh			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
499503b705cfSriastradh			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
499603b705cfSriastradh#if defined(sse2) && !defined(__x86_64__)
499703b705cfSriastradh			if (sna->cpu_features & SSE2) {
499803b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source__sse2;
499903b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__sse2__boxes;
500003b705cfSriastradh			} else
500103b705cfSriastradh#endif
500203b705cfSriastradh			{
500303b705cfSriastradh				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
500403b705cfSriastradh				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes;
500503b705cfSriastradh			}
500603b705cfSriastradh		}
500703b705cfSriastradh		break;
500803b705cfSriastradh	}
500903b705cfSriastradh	if (tmp->emit_boxes == NULL)
501003b705cfSriastradh		tmp->thread_boxes = NULL;
501103b705cfSriastradh
501203b705cfSriastradh	tmp->base.mask.bo = NULL;
501303b705cfSriastradh
501403b705cfSriastradh	tmp->base.floats_per_vertex = 2;
501503b705cfSriastradh	if (!is_constant_ps(tmp->base.src.u.gen3.type))
501603b705cfSriastradh		tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
501703b705cfSriastradh	tmp->base.floats_per_vertex +=
501803b705cfSriastradh		tmp->base.mask.u.gen3.type == SHADER_OPACITY;
501903b705cfSriastradh	tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
502003b705cfSriastradh
502103b705cfSriastradh	if (!kgem_check_bo(&sna->kgem,
502203b705cfSriastradh			   tmp->base.dst.bo, tmp->base.src.bo,
502303b705cfSriastradh			   NULL)) {
502403b705cfSriastradh		kgem_submit(&sna->kgem);
502503b705cfSriastradh		if (!kgem_check_bo(&sna->kgem,
502603b705cfSriastradh				   tmp->base.dst.bo, tmp->base.src.bo,
502703b705cfSriastradh				   NULL))
502803b705cfSriastradh			goto cleanup_src;
502903b705cfSriastradh	}
503003b705cfSriastradh
503103b705cfSriastradh	gen3_align_vertex(sna, &tmp->base);
503242542f5fSchristos	gen3_emit_composite_state(sna, &tmp->base);
503303b705cfSriastradh	return true;
503403b705cfSriastradh
503503b705cfSriastradhcleanup_src:
503603b705cfSriastradh	if (tmp->base.src.bo)
503703b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
503803b705cfSriastradhcleanup_dst:
503903b705cfSriastradh	if (tmp->base.redirect.real_bo)
504003b705cfSriastradh		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
504103b705cfSriastradh	return false;
504203b705cfSriastradh}
504303b705cfSriastradh
504403b705cfSriastradhstatic void
504503b705cfSriastradhgen3_emit_video_state(struct sna *sna,
504603b705cfSriastradh		      struct sna_video *video,
504703b705cfSriastradh		      struct sna_video_frame *frame,
504803b705cfSriastradh		      PixmapPtr pixmap,
504903b705cfSriastradh		      struct kgem_bo *dst_bo,
505003b705cfSriastradh		      int width, int height,
505103b705cfSriastradh		      bool bilinear)
505203b705cfSriastradh{
505303b705cfSriastradh	struct gen3_render_state *state = &sna->render_state.gen3;
505403b705cfSriastradh	uint32_t id, ms3, rewind;
505503b705cfSriastradh
505603b705cfSriastradh	gen3_emit_target(sna, dst_bo, width, height,
505703b705cfSriastradh			 sna_format_for_depth(pixmap->drawable.depth));
505803b705cfSriastradh
505903b705cfSriastradh	/* XXX share with composite? Is it worth the effort? */
506003b705cfSriastradh	if ((state->last_shader & (1<<31)) == 0) {
506103b705cfSriastradh		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
506203b705cfSriastradh			  I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
506303b705cfSriastradh			  2);
506403b705cfSriastradh		OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
506503b705cfSriastradh		OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
506603b705cfSriastradh			  S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
506703b705cfSriastradh			  S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
506803b705cfSriastradh			  S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
506903b705cfSriastradh			  S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
507003b705cfSriastradh			  S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
507103b705cfSriastradh			  S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
507203b705cfSriastradh			  S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
507303b705cfSriastradh		OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
507403b705cfSriastradh			  (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
507503b705cfSriastradh			  S6_COLOR_WRITE_ENABLE);
507603b705cfSriastradh
507703b705cfSriastradh		state->last_blend = 0;
507803b705cfSriastradh		state->floats_per_vertex = 4;
507903b705cfSriastradh	}
508003b705cfSriastradh
508103b705cfSriastradh	if (!is_planar_fourcc(frame->id)) {
508203b705cfSriastradh		rewind = sna->kgem.nbatch;
508303b705cfSriastradh		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
508403b705cfSriastradh		OUT_BATCH(0x0000001);	/* constant 0 */
508503b705cfSriastradh		/* constant 0: brightness/contrast */
508603b705cfSriastradh		OUT_BATCH_F(video->brightness / 128.0);
508703b705cfSriastradh		OUT_BATCH_F(video->contrast / 255.0);
508803b705cfSriastradh		OUT_BATCH_F(0.0);
508903b705cfSriastradh		OUT_BATCH_F(0.0);
509003b705cfSriastradh		if (state->last_constants &&
509103b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_constants],
509203b705cfSriastradh			   &sna->kgem.batch[rewind],
509303b705cfSriastradh			   6*sizeof(uint32_t)) == 0)
509403b705cfSriastradh			sna->kgem.nbatch = rewind;
509503b705cfSriastradh		else
509603b705cfSriastradh			state->last_constants = rewind;
509703b705cfSriastradh
509803b705cfSriastradh		rewind = sna->kgem.nbatch;
509903b705cfSriastradh		OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
510003b705cfSriastradh		OUT_BATCH(0x00000001);
510103b705cfSriastradh		OUT_BATCH(SS2_COLORSPACE_CONVERSION |
510203b705cfSriastradh			  (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
510303b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
510403b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
510503b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
510603b705cfSriastradh			  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
510703b705cfSriastradh			  SS3_NORMALIZED_COORDS);
510803b705cfSriastradh		OUT_BATCH(0x00000000);
510903b705cfSriastradh		if (state->last_sampler &&
511003b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_sampler],
511103b705cfSriastradh			   &sna->kgem.batch[rewind],
511203b705cfSriastradh			   5*sizeof(uint32_t)) == 0)
511303b705cfSriastradh			sna->kgem.nbatch = rewind;
511403b705cfSriastradh		else
511503b705cfSriastradh			state->last_sampler = rewind;
511603b705cfSriastradh
511703b705cfSriastradh		OUT_BATCH(_3DSTATE_MAP_STATE | 3);
511803b705cfSriastradh		OUT_BATCH(0x00000001);	/* texture map #1 */
511903b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
512003b705cfSriastradh					 frame->bo,
512103b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
512203b705cfSriastradh					 0));
512303b705cfSriastradh
512403b705cfSriastradh		ms3 = MAPSURF_422;
512503b705cfSriastradh		switch (frame->id) {
512603b705cfSriastradh		case FOURCC_YUY2:
512703b705cfSriastradh			ms3 |= MT_422_YCRCB_NORMAL;
512803b705cfSriastradh			break;
512903b705cfSriastradh		case FOURCC_UYVY:
513003b705cfSriastradh			ms3 |= MT_422_YCRCB_SWAPY;
513103b705cfSriastradh			break;
513203b705cfSriastradh		}
513303b705cfSriastradh		ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
513403b705cfSriastradh		ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
513503b705cfSriastradh		OUT_BATCH(ms3);
513603b705cfSriastradh		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
513703b705cfSriastradh
513803b705cfSriastradh		id = 1<<31 | 1<<1 | !!video->brightness;
513903b705cfSriastradh		if (state->last_shader != id) {
514003b705cfSriastradh			state->last_shader = id;
514103b705cfSriastradh			id = sna->kgem.nbatch++;
514203b705cfSriastradh
514303b705cfSriastradh			gen3_fs_dcl(FS_S0);
514403b705cfSriastradh			gen3_fs_dcl(FS_T0);
514503b705cfSriastradh			gen3_fs_texld(FS_OC, FS_S0, FS_T0);
514603b705cfSriastradh			if (video->brightness != 0) {
514703b705cfSriastradh				gen3_fs_add(FS_OC,
514803b705cfSriastradh					    gen3_fs_operand_reg(FS_OC),
514903b705cfSriastradh					    gen3_fs_operand(FS_C0, X, X, X, ZERO));
515003b705cfSriastradh			}
515103b705cfSriastradh
515203b705cfSriastradh			sna->kgem.batch[id] =
515303b705cfSriastradh				_3DSTATE_PIXEL_SHADER_PROGRAM |
515403b705cfSriastradh				(sna->kgem.nbatch - id - 2);
515503b705cfSriastradh		}
515603b705cfSriastradh	} else {
515703b705cfSriastradh		/* For the planar formats, we set up three samplers --
515803b705cfSriastradh		 * one for each plane, in a Y8 format.  Because I
515903b705cfSriastradh		 * couldn't get the special PLANAR_TO_PACKED
516003b705cfSriastradh		 * shader setup to work, I did the manual pixel shader:
516103b705cfSriastradh		 *
516203b705cfSriastradh		 * y' = y - .0625
516303b705cfSriastradh		 * u' = u - .5
516403b705cfSriastradh		 * v' = v - .5;
516503b705cfSriastradh		 *
516603b705cfSriastradh		 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
516703b705cfSriastradh		 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
516803b705cfSriastradh		 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
516903b705cfSriastradh		 *
517003b705cfSriastradh		 * register assignment:
517103b705cfSriastradh		 * r0 = (y',u',v',0)
517203b705cfSriastradh		 * r1 = (y,y,y,y)
517303b705cfSriastradh		 * r2 = (u,u,u,u)
517403b705cfSriastradh		 * r3 = (v,v,v,v)
517503b705cfSriastradh		 * OC = (r,g,b,1)
517603b705cfSriastradh		 */
517703b705cfSriastradh		rewind = sna->kgem.nbatch;
517803b705cfSriastradh		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
517903b705cfSriastradh		OUT_BATCH(0x000001f);	/* constants 0-4 */
518003b705cfSriastradh		/* constant 0: normalization offsets */
518103b705cfSriastradh		OUT_BATCH_F(-0.0625);
518203b705cfSriastradh		OUT_BATCH_F(-0.5);
518303b705cfSriastradh		OUT_BATCH_F(-0.5);
518403b705cfSriastradh		OUT_BATCH_F(0.0);
518503b705cfSriastradh		/* constant 1: r coefficients */
518603b705cfSriastradh		OUT_BATCH_F(1.1643);
518703b705cfSriastradh		OUT_BATCH_F(0.0);
518803b705cfSriastradh		OUT_BATCH_F(1.5958);
518903b705cfSriastradh		OUT_BATCH_F(0.0);
519003b705cfSriastradh		/* constant 2: g coefficients */
519103b705cfSriastradh		OUT_BATCH_F(1.1643);
519203b705cfSriastradh		OUT_BATCH_F(-0.39173);
519303b705cfSriastradh		OUT_BATCH_F(-0.81290);
519403b705cfSriastradh		OUT_BATCH_F(0.0);
519503b705cfSriastradh		/* constant 3: b coefficients */
519603b705cfSriastradh		OUT_BATCH_F(1.1643);
519703b705cfSriastradh		OUT_BATCH_F(2.017);
519803b705cfSriastradh		OUT_BATCH_F(0.0);
519903b705cfSriastradh		OUT_BATCH_F(0.0);
520003b705cfSriastradh		/* constant 4: brightness/contrast */
520103b705cfSriastradh		OUT_BATCH_F(video->brightness / 128.0);
520203b705cfSriastradh		OUT_BATCH_F(video->contrast / 255.0);
520303b705cfSriastradh		OUT_BATCH_F(0.0);
520403b705cfSriastradh		OUT_BATCH_F(0.0);
520503b705cfSriastradh		if (state->last_constants &&
520603b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_constants],
520703b705cfSriastradh			   &sna->kgem.batch[rewind],
520803b705cfSriastradh			   22*sizeof(uint32_t)) == 0)
520903b705cfSriastradh			sna->kgem.nbatch = rewind;
521003b705cfSriastradh		else
521103b705cfSriastradh			state->last_constants = rewind;
521203b705cfSriastradh
521303b705cfSriastradh		rewind = sna->kgem.nbatch;
521403b705cfSriastradh		OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
521503b705cfSriastradh		OUT_BATCH(0x00000007);
521603b705cfSriastradh		/* sampler 0 */
521703b705cfSriastradh		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
521803b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
521903b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
522003b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
522103b705cfSriastradh			  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
522203b705cfSriastradh			  SS3_NORMALIZED_COORDS);
522303b705cfSriastradh		OUT_BATCH(0x00000000);
522403b705cfSriastradh		/* sampler 1 */
522503b705cfSriastradh		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
522603b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
522703b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
522803b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
522903b705cfSriastradh			  (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
523003b705cfSriastradh			  SS3_NORMALIZED_COORDS);
523103b705cfSriastradh		OUT_BATCH(0x00000000);
523203b705cfSriastradh		/* sampler 2 */
523303b705cfSriastradh		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
523403b705cfSriastradh			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
523503b705cfSriastradh		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
523603b705cfSriastradh			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
523703b705cfSriastradh			  (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
523803b705cfSriastradh			  SS3_NORMALIZED_COORDS);
523903b705cfSriastradh		OUT_BATCH(0x00000000);
524003b705cfSriastradh		if (state->last_sampler &&
524103b705cfSriastradh		    memcmp(&sna->kgem.batch[state->last_sampler],
524203b705cfSriastradh			   &sna->kgem.batch[rewind],
524303b705cfSriastradh			   11*sizeof(uint32_t)) == 0)
524403b705cfSriastradh			sna->kgem.nbatch = rewind;
524503b705cfSriastradh		else
524603b705cfSriastradh			state->last_sampler = rewind;
524703b705cfSriastradh
524803b705cfSriastradh		OUT_BATCH(_3DSTATE_MAP_STATE | 9);
524903b705cfSriastradh		OUT_BATCH(0x00000007);
525003b705cfSriastradh
525103b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
525203b705cfSriastradh					 frame->bo,
525303b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
525403b705cfSriastradh					 0));
525503b705cfSriastradh
525603b705cfSriastradh		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
525703b705cfSriastradh		ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
525803b705cfSriastradh		ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
525903b705cfSriastradh		OUT_BATCH(ms3);
526003b705cfSriastradh		/* check to see if Y has special pitch than normal
526103b705cfSriastradh		 * double u/v pitch, e.g i915 XvMC hw requires at
526203b705cfSriastradh		 * least 1K alignment, so Y pitch might
526303b705cfSriastradh		 * be same as U/V's.*/
526403b705cfSriastradh		if (frame->pitch[1])
526503b705cfSriastradh			OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT);
526603b705cfSriastradh		else
526703b705cfSriastradh			OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT);
526803b705cfSriastradh
526903b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
527003b705cfSriastradh					 frame->bo,
527103b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
527203b705cfSriastradh					 frame->UBufOffset));
527303b705cfSriastradh
527403b705cfSriastradh		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
527503b705cfSriastradh		ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
527603b705cfSriastradh		ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
527703b705cfSriastradh		OUT_BATCH(ms3);
527803b705cfSriastradh		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
527903b705cfSriastradh
528003b705cfSriastradh		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
528103b705cfSriastradh					 frame->bo,
528203b705cfSriastradh					 I915_GEM_DOMAIN_SAMPLER << 16,
528303b705cfSriastradh					 frame->VBufOffset));
528403b705cfSriastradh
528503b705cfSriastradh		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
528603b705cfSriastradh		ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
528703b705cfSriastradh		ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
528803b705cfSriastradh		OUT_BATCH(ms3);
528903b705cfSriastradh		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
529003b705cfSriastradh
529103b705cfSriastradh		id = 1<<31 | 2<<1 | !!video->brightness;
529203b705cfSriastradh		if (state->last_shader != id) {
529303b705cfSriastradh			state->last_shader = id;
529403b705cfSriastradh			id = sna->kgem.nbatch++;
529503b705cfSriastradh
529603b705cfSriastradh			/* Declare samplers */
529703b705cfSriastradh			gen3_fs_dcl(FS_S0);	/* Y */
529803b705cfSriastradh			gen3_fs_dcl(FS_S1);	/* U */
529903b705cfSriastradh			gen3_fs_dcl(FS_S2);	/* V */
530003b705cfSriastradh			gen3_fs_dcl(FS_T0);	/* normalized coords */
530103b705cfSriastradh
530203b705cfSriastradh			/* Load samplers to temporaries. */
530303b705cfSriastradh			gen3_fs_texld(FS_R1, FS_S0, FS_T0);
530403b705cfSriastradh			gen3_fs_texld(FS_R2, FS_S1, FS_T0);
530503b705cfSriastradh			gen3_fs_texld(FS_R3, FS_S2, FS_T0);
530603b705cfSriastradh
530703b705cfSriastradh			/* Move the sampled YUV data in R[123] to the first
530803b705cfSriastradh			 * 3 channels of R0.
530903b705cfSriastradh			 */
531003b705cfSriastradh			gen3_fs_mov_masked(FS_R0, MASK_X,
531103b705cfSriastradh					   gen3_fs_operand_reg(FS_R1));
531203b705cfSriastradh			gen3_fs_mov_masked(FS_R0, MASK_Y,
531303b705cfSriastradh					   gen3_fs_operand_reg(FS_R2));
531403b705cfSriastradh			gen3_fs_mov_masked(FS_R0, MASK_Z,
531503b705cfSriastradh					   gen3_fs_operand_reg(FS_R3));
531603b705cfSriastradh
531703b705cfSriastradh			/* Normalize the YUV data */
531803b705cfSriastradh			gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
531903b705cfSriastradh				    gen3_fs_operand_reg(FS_C0));
532003b705cfSriastradh			/* dot-product the YUV data in R0 by the vectors of
532103b705cfSriastradh			 * coefficients for calculating R, G, and B, storing
532203b705cfSriastradh			 * the results in the R, G, or B channels of the output
532303b705cfSriastradh			 * color.  The OC results are implicitly clamped
532403b705cfSriastradh			 * at the end of the program.
532503b705cfSriastradh			 */
532603b705cfSriastradh			gen3_fs_dp3(FS_OC, MASK_X,
532703b705cfSriastradh				    gen3_fs_operand_reg(FS_R0),
532803b705cfSriastradh				    gen3_fs_operand_reg(FS_C1));
532903b705cfSriastradh			gen3_fs_dp3(FS_OC, MASK_Y,
533003b705cfSriastradh				    gen3_fs_operand_reg(FS_R0),
533103b705cfSriastradh				    gen3_fs_operand_reg(FS_C2));
533203b705cfSriastradh			gen3_fs_dp3(FS_OC, MASK_Z,
533303b705cfSriastradh				    gen3_fs_operand_reg(FS_R0),
533403b705cfSriastradh				    gen3_fs_operand_reg(FS_C3));
533503b705cfSriastradh			/* Set alpha of the output to 1.0, by wiring W to 1
533603b705cfSriastradh			 * and not actually using the source.
533703b705cfSriastradh			 */
533803b705cfSriastradh			gen3_fs_mov_masked(FS_OC, MASK_W,
533903b705cfSriastradh					   gen3_fs_operand_one());
534003b705cfSriastradh
534103b705cfSriastradh			if (video->brightness != 0) {
534203b705cfSriastradh				gen3_fs_add(FS_OC,
534303b705cfSriastradh					    gen3_fs_operand_reg(FS_OC),
534403b705cfSriastradh					    gen3_fs_operand(FS_C4, X, X, X, ZERO));
534503b705cfSriastradh			}
534603b705cfSriastradh
534703b705cfSriastradh			sna->kgem.batch[id] =
534803b705cfSriastradh				_3DSTATE_PIXEL_SHADER_PROGRAM |
534903b705cfSriastradh				(sna->kgem.nbatch - id - 2);
535003b705cfSriastradh		}
535103b705cfSriastradh	}
535203b705cfSriastradh}
535303b705cfSriastradh
535403b705cfSriastradhstatic void
535503b705cfSriastradhgen3_video_get_batch(struct sna *sna, struct kgem_bo *bo)
535603b705cfSriastradh{
535703b705cfSriastradh	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
535803b705cfSriastradh
535903b705cfSriastradh	if (!kgem_check_batch(&sna->kgem, 120) ||
536003b705cfSriastradh	    !kgem_check_reloc(&sna->kgem, 4) ||
536103b705cfSriastradh	    !kgem_check_exec(&sna->kgem, 2)) {
536203b705cfSriastradh		_kgem_submit(&sna->kgem);
536303b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
536403b705cfSriastradh	}
536503b705cfSriastradh
536603b705cfSriastradh	if (sna->render_state.gen3.need_invariant)
536703b705cfSriastradh		gen3_emit_invariant(sna);
536803b705cfSriastradh}
536903b705cfSriastradh
537003b705cfSriastradhstatic int
537103b705cfSriastradhgen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
537203b705cfSriastradh{
537303b705cfSriastradh	int size = floats_per_vertex * 3;
537403b705cfSriastradh	int rem = batch_space(sna) - 1;
537503b705cfSriastradh
537603b705cfSriastradh	if (size * want > rem)
537703b705cfSriastradh		want = rem / size;
537803b705cfSriastradh
537903b705cfSriastradh	return want;
538003b705cfSriastradh}
538103b705cfSriastradh
538203b705cfSriastradhstatic bool
538303b705cfSriastradhgen3_render_video(struct sna *sna,
538403b705cfSriastradh		  struct sna_video *video,
538503b705cfSriastradh		  struct sna_video_frame *frame,
538603b705cfSriastradh		  RegionPtr dstRegion,
538703b705cfSriastradh		  PixmapPtr pixmap)
538803b705cfSriastradh{
538903b705cfSriastradh	struct sna_pixmap *priv = sna_pixmap(pixmap);
539042542f5fSchristos	const BoxRec *pbox = region_rects(dstRegion);
539142542f5fSchristos	int nbox = region_num_rects(dstRegion);
539203b705cfSriastradh	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
539303b705cfSriastradh	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
539403b705cfSriastradh	int src_width = frame->src.x2 - frame->src.x1;
539503b705cfSriastradh	int src_height = frame->src.y2 - frame->src.y1;
539603b705cfSriastradh	float src_offset_x, src_offset_y;
539703b705cfSriastradh	float src_scale_x, src_scale_y;
539803b705cfSriastradh	int pix_xoff, pix_yoff;
539903b705cfSriastradh	struct kgem_bo *dst_bo;
540003b705cfSriastradh	bool bilinear;
540103b705cfSriastradh	int copy = 0;
540203b705cfSriastradh
540303b705cfSriastradh	DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__,
540403b705cfSriastradh	     src_width, src_height, frame->width, frame->height, dst_width, dst_height));
540503b705cfSriastradh
540642542f5fSchristos	assert(priv->gpu_bo);
540703b705cfSriastradh	dst_bo = priv->gpu_bo;
540803b705cfSriastradh
540903b705cfSriastradh	bilinear = src_width != dst_width || src_height != dst_height;
541003b705cfSriastradh
541103b705cfSriastradh	src_scale_x = (float)src_width / dst_width / frame->width;
541203b705cfSriastradh	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
541303b705cfSriastradh
541403b705cfSriastradh	src_scale_y = (float)src_height / dst_height / frame->height;
541503b705cfSriastradh	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
541603b705cfSriastradh	DBG(("%s: src offset (%f, %f), scale (%f, %f)\n",
541703b705cfSriastradh	     __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y));
541803b705cfSriastradh
541903b705cfSriastradh	if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
542003b705cfSriastradh	    !gen3_check_pitch_3d(dst_bo)) {
542103b705cfSriastradh		int bpp = pixmap->drawable.bitsPerPixel;
542203b705cfSriastradh
542303b705cfSriastradh		if (too_large(dst_width, dst_height))
542403b705cfSriastradh			return false;
542503b705cfSriastradh
542603b705cfSriastradh		dst_bo = kgem_create_2d(&sna->kgem,
542703b705cfSriastradh					dst_width, dst_height, bpp,
542803b705cfSriastradh					kgem_choose_tiling(&sna->kgem,
542903b705cfSriastradh							   I915_TILING_X,
543003b705cfSriastradh							   dst_width, dst_height, bpp),
543103b705cfSriastradh					0);
543203b705cfSriastradh		if (!dst_bo)
543303b705cfSriastradh			return false;
543403b705cfSriastradh
543503b705cfSriastradh		pix_xoff = -dstRegion->extents.x1;
543603b705cfSriastradh		pix_yoff = -dstRegion->extents.y1;
543703b705cfSriastradh		copy = 1;
543803b705cfSriastradh	} else {
543903b705cfSriastradh		/* Set up the offset for translating from the given region
544003b705cfSriastradh		 * (in screen coordinates) to the backing pixmap.
544103b705cfSriastradh		 */
544203b705cfSriastradh#ifdef COMPOSITE
544303b705cfSriastradh		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
544403b705cfSriastradh		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
544503b705cfSriastradh#else
544603b705cfSriastradh		pix_xoff = 0;
544703b705cfSriastradh		pix_yoff = 0;
544803b705cfSriastradh#endif
544903b705cfSriastradh
545003b705cfSriastradh		dst_width  = pixmap->drawable.width;
545103b705cfSriastradh		dst_height = pixmap->drawable.height;
545203b705cfSriastradh	}
545303b705cfSriastradh
545403b705cfSriastradh	gen3_video_get_batch(sna, dst_bo);
545503b705cfSriastradh	gen3_emit_video_state(sna, video, frame, pixmap,
545603b705cfSriastradh			      dst_bo, dst_width, dst_height, bilinear);
545703b705cfSriastradh	do {
545803b705cfSriastradh		int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
545903b705cfSriastradh		if (nbox_this_time == 0) {
546003b705cfSriastradh			gen3_video_get_batch(sna, dst_bo);
546103b705cfSriastradh			gen3_emit_video_state(sna, video, frame, pixmap,
546203b705cfSriastradh					      dst_bo, dst_width, dst_height, bilinear);
546303b705cfSriastradh			nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
546403b705cfSriastradh			assert(nbox_this_time);
546503b705cfSriastradh		}
546603b705cfSriastradh		nbox -= nbox_this_time;
546703b705cfSriastradh
546803b705cfSriastradh		OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
546903b705cfSriastradh		do {
547003b705cfSriastradh			int box_x1 = pbox->x1;
547103b705cfSriastradh			int box_y1 = pbox->y1;
547203b705cfSriastradh			int box_x2 = pbox->x2;
547303b705cfSriastradh			int box_y2 = pbox->y2;
547403b705cfSriastradh
547503b705cfSriastradh			pbox++;
547603b705cfSriastradh
547703b705cfSriastradh			DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n",
547803b705cfSriastradh			     __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff,
547903b705cfSriastradh			     box_x1 * src_scale_x + src_offset_x,
548003b705cfSriastradh			     box_y1 * src_scale_y + src_offset_y,
548103b705cfSriastradh			     box_x2 * src_scale_x + src_offset_x,
548203b705cfSriastradh			     box_y2 * src_scale_y + src_offset_y));
548303b705cfSriastradh
548403b705cfSriastradh			/* bottom right */
548503b705cfSriastradh			OUT_BATCH_F(box_x2 + pix_xoff);
548603b705cfSriastradh			OUT_BATCH_F(box_y2 + pix_yoff);
548703b705cfSriastradh			OUT_BATCH_F(box_x2 * src_scale_x + src_offset_x);
548803b705cfSriastradh			OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
548903b705cfSriastradh
549003b705cfSriastradh			/* bottom left */
549103b705cfSriastradh			OUT_BATCH_F(box_x1 + pix_xoff);
549203b705cfSriastradh			OUT_BATCH_F(box_y2 + pix_yoff);
549303b705cfSriastradh			OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
549403b705cfSriastradh			OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
549503b705cfSriastradh
549603b705cfSriastradh			/* top left */
549703b705cfSriastradh			OUT_BATCH_F(box_x1 + pix_xoff);
549803b705cfSriastradh			OUT_BATCH_F(box_y1 + pix_yoff);
549903b705cfSriastradh			OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
550003b705cfSriastradh			OUT_BATCH_F(box_y1 * src_scale_y + src_offset_y);
550103b705cfSriastradh		} while (--nbox_this_time);
550203b705cfSriastradh	} while (nbox);
550303b705cfSriastradh
550403b705cfSriastradh	if (copy) {
550503b705cfSriastradh#ifdef COMPOSITE
550603b705cfSriastradh		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
550703b705cfSriastradh		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
550803b705cfSriastradh#else
550903b705cfSriastradh		pix_xoff = 0;
551003b705cfSriastradh		pix_yoff = 0;
551103b705cfSriastradh#endif
551203b705cfSriastradh		sna_blt_copy_boxes(sna, GXcopy,
551303b705cfSriastradh				   dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1,
551403b705cfSriastradh				   priv->gpu_bo, pix_xoff, pix_yoff,
551503b705cfSriastradh				   pixmap->drawable.bitsPerPixel,
551642542f5fSchristos				   region_rects(dstRegion),
551742542f5fSchristos				   region_num_rects(dstRegion));
551803b705cfSriastradh
551903b705cfSriastradh		kgem_bo_destroy(&sna->kgem, dst_bo);
552003b705cfSriastradh	}
552103b705cfSriastradh
552203b705cfSriastradh	if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
552303b705cfSriastradh		if ((pix_xoff | pix_yoff) == 0) {
552403b705cfSriastradh			sna_damage_add(&priv->gpu_damage, dstRegion);
552503b705cfSriastradh			sna_damage_subtract(&priv->cpu_damage, dstRegion);
552603b705cfSriastradh		} else {
552703b705cfSriastradh			sna_damage_add_boxes(&priv->gpu_damage,
552842542f5fSchristos					     region_rects(dstRegion),
552942542f5fSchristos					     region_num_rects(dstRegion),
553003b705cfSriastradh					     pix_xoff, pix_yoff);
553103b705cfSriastradh			sna_damage_subtract_boxes(&priv->cpu_damage,
553242542f5fSchristos						  region_rects(dstRegion),
553342542f5fSchristos						  region_num_rects(dstRegion),
553403b705cfSriastradh						  pix_xoff, pix_yoff);
553503b705cfSriastradh		}
553603b705cfSriastradh	}
553703b705cfSriastradh
553803b705cfSriastradh	return true;
553903b705cfSriastradh}
554003b705cfSriastradh
554103b705cfSriastradhstatic void
554203b705cfSriastradhgen3_render_copy_setup_source(struct sna_composite_channel *channel,
554342542f5fSchristos			      const DrawableRec *draw,
554403b705cfSriastradh			      struct kgem_bo *bo)
554503b705cfSriastradh{
554603b705cfSriastradh	int i;
554703b705cfSriastradh
554803b705cfSriastradh	channel->u.gen3.type = SHADER_TEXTURE;
554903b705cfSriastradh	channel->filter = gen3_filter(PictFilterNearest);
555003b705cfSriastradh	channel->repeat = gen3_texture_repeat(RepeatNone);
555142542f5fSchristos	channel->width  = draw->width;
555242542f5fSchristos	channel->height = draw->height;
555342542f5fSchristos	channel->scale[0] = 1.f/draw->width;
555442542f5fSchristos	channel->scale[1] = 1.f/draw->height;
555503b705cfSriastradh	channel->offset[0] = 0;
555603b705cfSriastradh	channel->offset[1] = 0;
555703b705cfSriastradh
555842542f5fSchristos	channel->pict_format = sna_format_for_depth(draw->depth);
555903b705cfSriastradh	if (!gen3_composite_channel_set_format(channel, channel->pict_format)) {
556003b705cfSriastradh		for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
556103b705cfSriastradh			if (gen3_tex_formats[i].xfmt == channel->pict_format) {
556203b705cfSriastradh				channel->card_format = gen3_tex_formats[i].card_fmt;
556303b705cfSriastradh				channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
556403b705cfSriastradh				channel->alpha_fixup = true;
556503b705cfSriastradh				break;
556603b705cfSriastradh			}
556703b705cfSriastradh		}
556803b705cfSriastradh	}
556903b705cfSriastradh	assert(channel->card_format);
557003b705cfSriastradh
557103b705cfSriastradh	channel->bo = bo;
557203b705cfSriastradh	channel->is_affine = 1;
557303b705cfSriastradh}
557403b705cfSriastradh
557503b705cfSriastradhstatic bool
557603b705cfSriastradhgen3_render_copy_boxes(struct sna *sna, uint8_t alu,
557742542f5fSchristos		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
557842542f5fSchristos		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
557903b705cfSriastradh		       const BoxRec *box, int n, unsigned flags)
558003b705cfSriastradh{
558103b705cfSriastradh	struct sna_composite_op tmp;
558203b705cfSriastradh
558303b705cfSriastradh#if NO_COPY_BOXES
558442542f5fSchristos	if (!sna_blt_compare_depth(src, dst))
558503b705cfSriastradh		return false;
558603b705cfSriastradh
558703b705cfSriastradh	return sna_blt_copy_boxes(sna, alu,
558803b705cfSriastradh				  src_bo, src_dx, src_dy,
558903b705cfSriastradh				  dst_bo, dst_dx, dst_dy,
559042542f5fSchristos				  dst->bitsPerPixel,
559103b705cfSriastradh				  box, n);
559203b705cfSriastradh#endif
559303b705cfSriastradh
559403b705cfSriastradh	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
559503b705cfSriastradh	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
559603b705cfSriastradh
559742542f5fSchristos	if (sna_blt_compare_depth(src, dst) &&
559803b705cfSriastradh	    sna_blt_copy_boxes(sna, alu,
559903b705cfSriastradh			       src_bo, src_dx, src_dy,
560003b705cfSriastradh			       dst_bo, dst_dx, dst_dy,
560142542f5fSchristos			       dst->bitsPerPixel,
560203b705cfSriastradh			       box, n))
560303b705cfSriastradh		return true;
560403b705cfSriastradh
560503b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
560603b705cfSriastradh	    src_bo == dst_bo || /* XXX handle overlap using 3D ? */
560703b705cfSriastradh	    src_bo->pitch > MAX_3D_PITCH ||
560842542f5fSchristos	    too_large(src->width, src->height)) {
560903b705cfSriastradhfallback_blt:
561003b705cfSriastradh		if (!kgem_bo_can_blt(&sna->kgem, src_bo) ||
561103b705cfSriastradh		    !kgem_bo_can_blt(&sna->kgem, dst_bo))
561203b705cfSriastradh			return false;
561303b705cfSriastradh
561403b705cfSriastradh		return sna_blt_copy_boxes_fallback(sna, alu,
561503b705cfSriastradh						   src, src_bo, src_dx, src_dy,
561603b705cfSriastradh						   dst, dst_bo, dst_dx, dst_dy,
561703b705cfSriastradh						   box, n);
561803b705cfSriastradh	}
561903b705cfSriastradh
562003b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
562103b705cfSriastradh		kgem_submit(&sna->kgem);
562203b705cfSriastradh		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
562303b705cfSriastradh			goto fallback_blt;
562403b705cfSriastradh	}
562503b705cfSriastradh
562603b705cfSriastradh	memset(&tmp, 0, sizeof(tmp));
562703b705cfSriastradh	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
562803b705cfSriastradh
562942542f5fSchristos	tmp.dst.pixmap = (PixmapPtr)dst;
563042542f5fSchristos	tmp.dst.width = dst->width;
563142542f5fSchristos	tmp.dst.height = dst->height;
563242542f5fSchristos	tmp.dst.format = sna_format_for_depth(dst->depth);
563303b705cfSriastradh	tmp.dst.bo = dst_bo;
563403b705cfSriastradh	tmp.dst.x = tmp.dst.y = 0;
563503b705cfSriastradh	tmp.damage = NULL;
563603b705cfSriastradh
563703b705cfSriastradh	sna_render_composite_redirect_init(&tmp);
563803b705cfSriastradh	if (too_large(tmp.dst.width, tmp.dst.height) ||
563903b705cfSriastradh	    dst_bo->pitch > MAX_3D_PITCH) {
564003b705cfSriastradh		BoxRec extents = box[0];
564103b705cfSriastradh		int i;
564203b705cfSriastradh
564303b705cfSriastradh		for (i = 1; i < n; i++) {
564403b705cfSriastradh			if (box[i].x1 < extents.x1)
564503b705cfSriastradh				extents.x1 = box[i].x1;
564603b705cfSriastradh			if (box[i].y1 < extents.y1)
564703b705cfSriastradh				extents.y1 = box[i].y1;
564803b705cfSriastradh
564903b705cfSriastradh			if (box[i].x2 > extents.x2)
565003b705cfSriastradh				extents.x2 = box[i].x2;
565103b705cfSriastradh			if (box[i].y2 > extents.y2)
565203b705cfSriastradh				extents.y2 = box[i].y2;
565303b705cfSriastradh		}
565403b705cfSriastradh		if (!sna_render_composite_redirect(sna, &tmp,
565503b705cfSriastradh						   extents.x1 + dst_dx,
565603b705cfSriastradh						   extents.y1 + dst_dy,
565703b705cfSriastradh						   extents.x2 - extents.x1,
565803b705cfSriastradh						   extents.y2 - extents.y1,
565903b705cfSriastradh						   n > 1))
566003b705cfSriastradh			goto fallback_tiled;
566103b705cfSriastradh	}
566203b705cfSriastradh
566303b705cfSriastradh	gen3_render_copy_setup_source(&tmp.src, src, src_bo);
566403b705cfSriastradh
566503b705cfSriastradh	tmp.floats_per_vertex = 4;
566603b705cfSriastradh	tmp.floats_per_rect = 12;
566703b705cfSriastradh	tmp.mask.bo = NULL;
566803b705cfSriastradh	tmp.mask.u.gen3.type = SHADER_NONE;
566903b705cfSriastradh
567003b705cfSriastradh	dst_dx += tmp.dst.x;
567103b705cfSriastradh	dst_dy += tmp.dst.y;
567203b705cfSriastradh	tmp.dst.x = tmp.dst.y = 0;
567303b705cfSriastradh
567403b705cfSriastradh	gen3_align_vertex(sna, &tmp);
567542542f5fSchristos	gen3_emit_composite_state(sna, &tmp);
567603b705cfSriastradh
567703b705cfSriastradh	do {
567803b705cfSriastradh		int n_this_time;
567903b705cfSriastradh
568003b705cfSriastradh		n_this_time = gen3_get_rectangles(sna, &tmp, n);
568103b705cfSriastradh		n -= n_this_time;
568203b705cfSriastradh
568303b705cfSriastradh		do {
568403b705cfSriastradh			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
568503b705cfSriastradh			     box->x1 + src_dx, box->y1 + src_dy,
568603b705cfSriastradh			     box->x1 + dst_dx, box->y1 + dst_dy,
568703b705cfSriastradh			     box->x2 - box->x1, box->y2 - box->y1));
568803b705cfSriastradh			OUT_VERTEX(box->x2 + dst_dx);
568903b705cfSriastradh			OUT_VERTEX(box->y2 + dst_dy);
569003b705cfSriastradh			OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
569103b705cfSriastradh			OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
569203b705cfSriastradh
569303b705cfSriastradh			OUT_VERTEX(box->x1 + dst_dx);
569403b705cfSriastradh			OUT_VERTEX(box->y2 + dst_dy);
569503b705cfSriastradh			OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
569603b705cfSriastradh			OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
569703b705cfSriastradh
569803b705cfSriastradh			OUT_VERTEX(box->x1 + dst_dx);
569903b705cfSriastradh			OUT_VERTEX(box->y1 + dst_dy);
570003b705cfSriastradh			OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
570103b705cfSriastradh			OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
570203b705cfSriastradh
570303b705cfSriastradh			box++;
570403b705cfSriastradh		} while (--n_this_time);
570503b705cfSriastradh	} while (n);
570603b705cfSriastradh
570703b705cfSriastradh	gen3_vertex_flush(sna);
570803b705cfSriastradh	sna_render_composite_redirect_done(sna, &tmp);
570903b705cfSriastradh	return true;
571003b705cfSriastradh
571103b705cfSriastradhfallback_tiled:
571203b705cfSriastradh	return sna_tiling_copy_boxes(sna, alu,
571303b705cfSriastradh				     src, src_bo, src_dx, src_dy,
571403b705cfSriastradh				     dst, dst_bo, dst_dx, dst_dy,
571503b705cfSriastradh				     box, n);
571603b705cfSriastradh}
571703b705cfSriastradh
571803b705cfSriastradhstatic void
571903b705cfSriastradhgen3_render_copy_blt(struct sna *sna,
572003b705cfSriastradh		     const struct sna_copy_op *op,
572103b705cfSriastradh		     int16_t sx, int16_t sy,
572203b705cfSriastradh		     int16_t w, int16_t h,
572303b705cfSriastradh		     int16_t dx, int16_t dy)
572403b705cfSriastradh{
572503b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
572603b705cfSriastradh
572703b705cfSriastradh	OUT_VERTEX(dx+w);
572803b705cfSriastradh	OUT_VERTEX(dy+h);
572903b705cfSriastradh	OUT_VERTEX((sx+w)*op->base.src.scale[0]);
573003b705cfSriastradh	OUT_VERTEX((sy+h)*op->base.src.scale[1]);
573103b705cfSriastradh
573203b705cfSriastradh	OUT_VERTEX(dx);
573303b705cfSriastradh	OUT_VERTEX(dy+h);
573403b705cfSriastradh	OUT_VERTEX(sx*op->base.src.scale[0]);
573503b705cfSriastradh	OUT_VERTEX((sy+h)*op->base.src.scale[1]);
573603b705cfSriastradh
573703b705cfSriastradh	OUT_VERTEX(dx);
573803b705cfSriastradh	OUT_VERTEX(dy);
573903b705cfSriastradh	OUT_VERTEX(sx*op->base.src.scale[0]);
574003b705cfSriastradh	OUT_VERTEX(sy*op->base.src.scale[1]);
574103b705cfSriastradh}
574203b705cfSriastradh
574303b705cfSriastradhstatic void
574403b705cfSriastradhgen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
574503b705cfSriastradh{
574603b705cfSriastradh	if (sna->render.vertex_offset)
574703b705cfSriastradh		gen3_vertex_flush(sna);
574803b705cfSriastradh}
574903b705cfSriastradh
575003b705cfSriastradhstatic bool
575103b705cfSriastradhgen3_render_copy(struct sna *sna, uint8_t alu,
575203b705cfSriastradh		 PixmapPtr src, struct kgem_bo *src_bo,
575303b705cfSriastradh		 PixmapPtr dst, struct kgem_bo *dst_bo,
575403b705cfSriastradh		 struct sna_copy_op *tmp)
575503b705cfSriastradh{
575603b705cfSriastradh#if NO_COPY
575703b705cfSriastradh	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
575803b705cfSriastradh		return false;
575903b705cfSriastradh
576003b705cfSriastradh	return sna_blt_copy(sna, alu,
576103b705cfSriastradh			    src_bo, dst_bo,
576203b705cfSriastradh			    dst->drawable.bitsPerPixel,
576303b705cfSriastradh			    tmp);
576403b705cfSriastradh#endif
576503b705cfSriastradh
576603b705cfSriastradh	/* Prefer to use the BLT */
576703b705cfSriastradh	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
576803b705cfSriastradh	    sna_blt_copy(sna, alu,
576903b705cfSriastradh			 src_bo, dst_bo,
577003b705cfSriastradh			 dst->drawable.bitsPerPixel,
577103b705cfSriastradh			 tmp))
577203b705cfSriastradh		return true;
577303b705cfSriastradh
577403b705cfSriastradh	/* Must use the BLT if we can't RENDER... */
577503b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
577603b705cfSriastradh	    too_large(src->drawable.width, src->drawable.height) ||
577703b705cfSriastradh	    too_large(dst->drawable.width, dst->drawable.height) ||
577803b705cfSriastradh	    src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch > MAX_3D_PITCH) {
577903b705cfSriastradhfallback:
578003b705cfSriastradh		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
578103b705cfSriastradh			return false;
578203b705cfSriastradh
578303b705cfSriastradh		return sna_blt_copy(sna, alu, src_bo, dst_bo,
578403b705cfSriastradh				    dst->drawable.bitsPerPixel,
578503b705cfSriastradh				    tmp);
578603b705cfSriastradh	}
578703b705cfSriastradh
578803b705cfSriastradh	tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
578903b705cfSriastradh
579003b705cfSriastradh	tmp->base.dst.pixmap = dst;
579103b705cfSriastradh	tmp->base.dst.width = dst->drawable.width;
579203b705cfSriastradh	tmp->base.dst.height = dst->drawable.height;
579303b705cfSriastradh	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
579403b705cfSriastradh	tmp->base.dst.bo = dst_bo;
579503b705cfSriastradh
579642542f5fSchristos	gen3_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo);
579703b705cfSriastradh
579803b705cfSriastradh	tmp->base.floats_per_vertex = 4;
579903b705cfSriastradh	tmp->base.floats_per_rect = 12;
580003b705cfSriastradh	tmp->base.mask.bo = NULL;
580103b705cfSriastradh	tmp->base.mask.u.gen3.type = SHADER_NONE;
580203b705cfSriastradh
580303b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
580403b705cfSriastradh		kgem_submit(&sna->kgem);
580503b705cfSriastradh		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
580603b705cfSriastradh			goto fallback;
580703b705cfSriastradh	}
580803b705cfSriastradh
580903b705cfSriastradh	tmp->blt  = gen3_render_copy_blt;
581003b705cfSriastradh	tmp->done = gen3_render_copy_done;
581103b705cfSriastradh
581203b705cfSriastradh	gen3_align_vertex(sna, &tmp->base);
581342542f5fSchristos	gen3_emit_composite_state(sna, &tmp->base);
581403b705cfSriastradh	return true;
581503b705cfSriastradh}
581603b705cfSriastradh
581703b705cfSriastradhstatic bool
581803b705cfSriastradhgen3_render_fill_boxes_try_blt(struct sna *sna,
581903b705cfSriastradh			       CARD8 op, PictFormat format,
582003b705cfSriastradh			       const xRenderColor *color,
582142542f5fSchristos			       const DrawableRec *dst, struct kgem_bo *dst_bo,
582203b705cfSriastradh			       const BoxRec *box, int n)
582303b705cfSriastradh{
582403b705cfSriastradh	uint8_t alu;
582503b705cfSriastradh	uint32_t pixel;
582603b705cfSriastradh
582703b705cfSriastradh	if (dst_bo->tiling == I915_TILING_Y) {
582803b705cfSriastradh		DBG(("%s: y-tiling, can't blit\n", __FUNCTION__));
582942542f5fSchristos		assert(!too_large(dst->width, dst->height));
583003b705cfSriastradh		return false;
583103b705cfSriastradh	}
583203b705cfSriastradh
583303b705cfSriastradh	if (op > PictOpSrc)
583403b705cfSriastradh		return false;
583503b705cfSriastradh
583603b705cfSriastradh	if (op == PictOpClear) {
583703b705cfSriastradh		alu = GXclear;
583803b705cfSriastradh		pixel = 0;
583903b705cfSriastradh	} else if (!sna_get_pixel_from_rgba(&pixel,
584003b705cfSriastradh					    color->red,
584103b705cfSriastradh					    color->green,
584203b705cfSriastradh					    color->blue,
584303b705cfSriastradh					    color->alpha,
584403b705cfSriastradh					    format))
584503b705cfSriastradh		return false;
584603b705cfSriastradh	else
584703b705cfSriastradh		alu = GXcopy;
584803b705cfSriastradh
584903b705cfSriastradh	return sna_blt_fill_boxes(sna, alu,
585042542f5fSchristos				  dst_bo, dst->bitsPerPixel,
585103b705cfSriastradh				  pixel, box, n);
585203b705cfSriastradh}
585303b705cfSriastradh
585403b705cfSriastradhstatic inline bool prefer_fill_blt(struct sna *sna)
585503b705cfSriastradh{
585603b705cfSriastradh#if PREFER_BLT_FILL
585703b705cfSriastradh	return true;
585803b705cfSriastradh#else
585903b705cfSriastradh	return sna->kgem.mode != KGEM_RENDER;
586003b705cfSriastradh#endif
586103b705cfSriastradh}
586203b705cfSriastradh
586303b705cfSriastradhstatic bool
586403b705cfSriastradhgen3_render_fill_boxes(struct sna *sna,
586503b705cfSriastradh		       CARD8 op,
586603b705cfSriastradh		       PictFormat format,
586703b705cfSriastradh		       const xRenderColor *color,
586842542f5fSchristos		       const DrawableRec *dst, struct kgem_bo *dst_bo,
586903b705cfSriastradh		       const BoxRec *box, int n)
587003b705cfSriastradh{
587103b705cfSriastradh	struct sna_composite_op tmp;
587203b705cfSriastradh	uint32_t pixel;
587303b705cfSriastradh
587403b705cfSriastradh	if (op >= ARRAY_SIZE(gen3_blend_op)) {
587503b705cfSriastradh		DBG(("%s: fallback due to unhandled blend op: %d\n",
587603b705cfSriastradh		     __FUNCTION__, op));
587703b705cfSriastradh		return false;
587803b705cfSriastradh	}
587903b705cfSriastradh
588003b705cfSriastradh#if NO_FILL_BOXES
588103b705cfSriastradh	return gen3_render_fill_boxes_try_blt(sna, op, format, color,
588203b705cfSriastradh					      dst, dst_bo,
588303b705cfSriastradh					      box, n);
588403b705cfSriastradh#endif
588503b705cfSriastradh
588603b705cfSriastradh	DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
588703b705cfSriastradh	     __FUNCTION__, op, (int)format,
588803b705cfSriastradh	     color->red, color->green, color->blue, color->alpha));
588903b705cfSriastradh
589042542f5fSchristos	if (too_large(dst->width, dst->height) ||
589103b705cfSriastradh	    dst_bo->pitch > MAX_3D_PITCH ||
589203b705cfSriastradh	    !gen3_check_dst_format(format)) {
589303b705cfSriastradh		DBG(("%s: try blt, too large or incompatible destination\n",
589403b705cfSriastradh		     __FUNCTION__));
589503b705cfSriastradh		if (gen3_render_fill_boxes_try_blt(sna, op, format, color,
589603b705cfSriastradh						   dst, dst_bo,
589703b705cfSriastradh						   box, n))
589803b705cfSriastradh			return true;
589903b705cfSriastradh
590003b705cfSriastradh		if (!gen3_check_dst_format(format))
590103b705cfSriastradh			return false;
590203b705cfSriastradh
590303b705cfSriastradh		return sna_tiling_fill_boxes(sna, op, format, color,
590403b705cfSriastradh					     dst, dst_bo, box, n);
590503b705cfSriastradh	}
590603b705cfSriastradh
590703b705cfSriastradh	if (prefer_fill_blt(sna) &&
590803b705cfSriastradh	    gen3_render_fill_boxes_try_blt(sna, op, format, color,
590903b705cfSriastradh					   dst, dst_bo,
591003b705cfSriastradh					   box, n))
591103b705cfSriastradh		return true;
591203b705cfSriastradh
591303b705cfSriastradh	if (op == PictOpClear) {
591403b705cfSriastradh		pixel = 0;
591503b705cfSriastradh	} else {
591603b705cfSriastradh		if (!sna_get_pixel_from_rgba(&pixel,
591703b705cfSriastradh					     color->red,
591803b705cfSriastradh					     color->green,
591903b705cfSriastradh					     color->blue,
592003b705cfSriastradh					     color->alpha,
592103b705cfSriastradh					     PICT_a8r8g8b8)) {
592203b705cfSriastradh			assert(0);
592303b705cfSriastradh			return false;
592403b705cfSriastradh		}
592503b705cfSriastradh	}
592642542f5fSchristos	DBG(("%s: using shader for op=%d, format=%08x, pixel=%08x\n",
592703b705cfSriastradh	     __FUNCTION__, op, (int)format, pixel));
592803b705cfSriastradh
592903b705cfSriastradh	tmp.op = op;
593042542f5fSchristos	tmp.dst.pixmap = (PixmapPtr)dst;
593142542f5fSchristos	tmp.dst.width = dst->width;
593242542f5fSchristos	tmp.dst.height = dst->height;
593303b705cfSriastradh	tmp.dst.format = format;
593403b705cfSriastradh	tmp.dst.bo = dst_bo;
593503b705cfSriastradh	tmp.damage = NULL;
593603b705cfSriastradh	tmp.floats_per_vertex = 2;
593703b705cfSriastradh	tmp.floats_per_rect = 6;
593803b705cfSriastradh	tmp.rb_reversed = 0;
593903b705cfSriastradh	tmp.has_component_alpha = 0;
594003b705cfSriastradh	tmp.need_magic_ca_pass = false;
594103b705cfSriastradh
594203b705cfSriastradh	gen3_init_solid(&tmp.src, pixel);
594303b705cfSriastradh	tmp.mask.bo = NULL;
594403b705cfSriastradh	tmp.mask.u.gen3.type = SHADER_NONE;
594503b705cfSriastradh	tmp.u.gen3.num_constants = 0;
594603b705cfSriastradh
594703b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
594803b705cfSriastradh		kgem_submit(&sna->kgem);
594942542f5fSchristos		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
595042542f5fSchristos			return false;
595103b705cfSriastradh	}
595203b705cfSriastradh
595303b705cfSriastradh	gen3_align_vertex(sna, &tmp);
595442542f5fSchristos	gen3_emit_composite_state(sna, &tmp);
595503b705cfSriastradh
595603b705cfSriastradh	do {
595703b705cfSriastradh		int n_this_time;
595803b705cfSriastradh
595903b705cfSriastradh		n_this_time = gen3_get_rectangles(sna, &tmp, n);
596003b705cfSriastradh		n -= n_this_time;
596103b705cfSriastradh
596203b705cfSriastradh		do {
596303b705cfSriastradh			DBG(("	(%d, %d), (%d, %d): %x\n",
596403b705cfSriastradh			     box->x1, box->y1, box->x2, box->y2, pixel));
596503b705cfSriastradh			OUT_VERTEX(box->x2);
596603b705cfSriastradh			OUT_VERTEX(box->y2);
596703b705cfSriastradh			OUT_VERTEX(box->x1);
596803b705cfSriastradh			OUT_VERTEX(box->y2);
596903b705cfSriastradh			OUT_VERTEX(box->x1);
597003b705cfSriastradh			OUT_VERTEX(box->y1);
597103b705cfSriastradh			box++;
597203b705cfSriastradh		} while (--n_this_time);
597303b705cfSriastradh	} while (n);
597403b705cfSriastradh
597503b705cfSriastradh	gen3_vertex_flush(sna);
597603b705cfSriastradh	return true;
597703b705cfSriastradh}
597803b705cfSriastradh
597903b705cfSriastradhstatic void
598003b705cfSriastradhgen3_render_fill_op_blt(struct sna *sna,
598103b705cfSriastradh			const struct sna_fill_op *op,
598203b705cfSriastradh			int16_t x, int16_t y, int16_t w, int16_t h)
598303b705cfSriastradh{
598403b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
598503b705cfSriastradh
598603b705cfSriastradh	OUT_VERTEX(x+w);
598703b705cfSriastradh	OUT_VERTEX(y+h);
598803b705cfSriastradh	OUT_VERTEX(x);
598903b705cfSriastradh	OUT_VERTEX(y+h);
599003b705cfSriastradh	OUT_VERTEX(x);
599103b705cfSriastradh	OUT_VERTEX(y);
599203b705cfSriastradh}
599303b705cfSriastradh
599403b705cfSriastradhfastcall static void
599503b705cfSriastradhgen3_render_fill_op_box(struct sna *sna,
599603b705cfSriastradh			const struct sna_fill_op *op,
599703b705cfSriastradh			const BoxRec *box)
599803b705cfSriastradh{
599903b705cfSriastradh	gen3_get_rectangles(sna, &op->base, 1);
600003b705cfSriastradh
600103b705cfSriastradh	OUT_VERTEX(box->x2);
600203b705cfSriastradh	OUT_VERTEX(box->y2);
600303b705cfSriastradh	OUT_VERTEX(box->x1);
600403b705cfSriastradh	OUT_VERTEX(box->y2);
600503b705cfSriastradh	OUT_VERTEX(box->x1);
600603b705cfSriastradh	OUT_VERTEX(box->y1);
600703b705cfSriastradh}
600803b705cfSriastradh
600903b705cfSriastradhfastcall static void
601003b705cfSriastradhgen3_render_fill_op_boxes(struct sna *sna,
601103b705cfSriastradh			  const struct sna_fill_op *op,
601203b705cfSriastradh			  const BoxRec *box,
601303b705cfSriastradh			  int nbox)
601403b705cfSriastradh{
601503b705cfSriastradh	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
601603b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2, nbox));
601703b705cfSriastradh
601803b705cfSriastradh	do {
601903b705cfSriastradh		int nbox_this_time;
602003b705cfSriastradh
602103b705cfSriastradh		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
602203b705cfSriastradh		nbox -= nbox_this_time;
602303b705cfSriastradh
602403b705cfSriastradh		do {
602503b705cfSriastradh			OUT_VERTEX(box->x2);
602603b705cfSriastradh			OUT_VERTEX(box->y2);
602703b705cfSriastradh			OUT_VERTEX(box->x1);
602803b705cfSriastradh			OUT_VERTEX(box->y2);
602903b705cfSriastradh			OUT_VERTEX(box->x1);
603003b705cfSriastradh			OUT_VERTEX(box->y1);
603103b705cfSriastradh			box++;
603203b705cfSriastradh		} while (--nbox_this_time);
603303b705cfSriastradh	} while (nbox);
603403b705cfSriastradh}
603503b705cfSriastradh
603603b705cfSriastradhstatic void
603703b705cfSriastradhgen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
603803b705cfSriastradh{
603903b705cfSriastradh	if (sna->render.vertex_offset)
604003b705cfSriastradh		gen3_vertex_flush(sna);
604103b705cfSriastradh}
604203b705cfSriastradh
604303b705cfSriastradhstatic bool
604403b705cfSriastradhgen3_render_fill(struct sna *sna, uint8_t alu,
604503b705cfSriastradh		 PixmapPtr dst, struct kgem_bo *dst_bo,
604642542f5fSchristos		 uint32_t color, unsigned flags,
604703b705cfSriastradh		 struct sna_fill_op *tmp)
604803b705cfSriastradh{
604903b705cfSriastradh#if NO_FILL
605003b705cfSriastradh	return sna_blt_fill(sna, alu,
605103b705cfSriastradh			    dst_bo, dst->drawable.bitsPerPixel,
605203b705cfSriastradh			    color,
605303b705cfSriastradh			    tmp);
605403b705cfSriastradh#endif
605503b705cfSriastradh
605603b705cfSriastradh	/* Prefer to use the BLT if already engaged */
605703b705cfSriastradh	if (prefer_fill_blt(sna) &&
605803b705cfSriastradh	    sna_blt_fill(sna, alu,
605903b705cfSriastradh			 dst_bo, dst->drawable.bitsPerPixel,
606003b705cfSriastradh			 color,
606103b705cfSriastradh			 tmp))
606203b705cfSriastradh		return true;
606303b705cfSriastradh
606403b705cfSriastradh	/* Must use the BLT if we can't RENDER... */
606503b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
606603b705cfSriastradh	    too_large(dst->drawable.width, dst->drawable.height) ||
606703b705cfSriastradh	    dst_bo->pitch > MAX_3D_PITCH)
606803b705cfSriastradh		return sna_blt_fill(sna, alu,
606903b705cfSriastradh				    dst_bo, dst->drawable.bitsPerPixel,
607003b705cfSriastradh				    color,
607103b705cfSriastradh				    tmp);
607203b705cfSriastradh
607303b705cfSriastradh	if (alu == GXclear)
607403b705cfSriastradh		color = 0;
607503b705cfSriastradh
607603b705cfSriastradh	tmp->base.op = color == 0 ? PictOpClear : PictOpSrc;
607703b705cfSriastradh	tmp->base.dst.pixmap = dst;
607803b705cfSriastradh	tmp->base.dst.width = dst->drawable.width;
607903b705cfSriastradh	tmp->base.dst.height = dst->drawable.height;
608003b705cfSriastradh	tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
608103b705cfSriastradh	tmp->base.dst.bo = dst_bo;
608203b705cfSriastradh	tmp->base.floats_per_vertex = 2;
608303b705cfSriastradh	tmp->base.floats_per_rect = 6;
608403b705cfSriastradh	tmp->base.need_magic_ca_pass = 0;
608503b705cfSriastradh	tmp->base.has_component_alpha = 0;
608603b705cfSriastradh	tmp->base.rb_reversed = 0;
608703b705cfSriastradh
608803b705cfSriastradh	gen3_init_solid(&tmp->base.src,
608903b705cfSriastradh			sna_rgba_for_color(color, dst->drawable.depth));
609003b705cfSriastradh	tmp->base.mask.bo = NULL;
609103b705cfSriastradh	tmp->base.mask.u.gen3.type = SHADER_NONE;
609203b705cfSriastradh	tmp->base.u.gen3.num_constants = 0;
609303b705cfSriastradh
609403b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
609503b705cfSriastradh		kgem_submit(&sna->kgem);
609642542f5fSchristos		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
609742542f5fSchristos			return false;
609803b705cfSriastradh	}
609903b705cfSriastradh
610003b705cfSriastradh	tmp->blt   = gen3_render_fill_op_blt;
610103b705cfSriastradh	tmp->box   = gen3_render_fill_op_box;
610203b705cfSriastradh	tmp->boxes = gen3_render_fill_op_boxes;
610342542f5fSchristos	tmp->points = NULL;
610403b705cfSriastradh	tmp->done  = gen3_render_fill_op_done;
610503b705cfSriastradh
610603b705cfSriastradh	gen3_align_vertex(sna, &tmp->base);
610742542f5fSchristos	gen3_emit_composite_state(sna, &tmp->base);
610803b705cfSriastradh	return true;
610903b705cfSriastradh}
611003b705cfSriastradh
611103b705cfSriastradhstatic bool
611203b705cfSriastradhgen3_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
611303b705cfSriastradh			     uint32_t color,
611403b705cfSriastradh			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
611503b705cfSriastradh			     uint8_t alu)
611603b705cfSriastradh{
611703b705cfSriastradh	BoxRec box;
611803b705cfSriastradh
611903b705cfSriastradh	box.x1 = x1;
612003b705cfSriastradh	box.y1 = y1;
612103b705cfSriastradh	box.x2 = x2;
612203b705cfSriastradh	box.y2 = y2;
612303b705cfSriastradh
612403b705cfSriastradh	return sna_blt_fill_boxes(sna, alu,
612503b705cfSriastradh				  bo, dst->drawable.bitsPerPixel,
612603b705cfSriastradh				  color, &box, 1);
612703b705cfSriastradh}
612803b705cfSriastradh
612903b705cfSriastradhstatic bool
613003b705cfSriastradhgen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
613103b705cfSriastradh		     uint32_t color,
613203b705cfSriastradh		     int16_t x1, int16_t y1,
613303b705cfSriastradh		     int16_t x2, int16_t y2,
613403b705cfSriastradh		     uint8_t alu)
613503b705cfSriastradh{
613603b705cfSriastradh	struct sna_composite_op tmp;
613703b705cfSriastradh
613803b705cfSriastradh#if NO_FILL_ONE
613903b705cfSriastradh	return gen3_render_fill_one_try_blt(sna, dst, bo, color,
614003b705cfSriastradh					    x1, y1, x2, y2, alu);
614103b705cfSriastradh#endif
614203b705cfSriastradh
614303b705cfSriastradh	/* Prefer to use the BLT if already engaged */
614403b705cfSriastradh	if (prefer_fill_blt(sna) &&
614503b705cfSriastradh	    gen3_render_fill_one_try_blt(sna, dst, bo, color,
614603b705cfSriastradh					 x1, y1, x2, y2, alu))
614703b705cfSriastradh		return true;
614803b705cfSriastradh
614903b705cfSriastradh	/* Must use the BLT if we can't RENDER... */
615003b705cfSriastradh	if (!(alu == GXcopy || alu == GXclear) ||
615103b705cfSriastradh	    too_large(dst->drawable.width, dst->drawable.height) ||
615203b705cfSriastradh	    bo->pitch > MAX_3D_PITCH)
615303b705cfSriastradh		return gen3_render_fill_one_try_blt(sna, dst, bo, color,
615403b705cfSriastradh						    x1, y1, x2, y2, alu);
615503b705cfSriastradh
615603b705cfSriastradh	if (alu == GXclear)
615703b705cfSriastradh		color = 0;
615803b705cfSriastradh
615903b705cfSriastradh	tmp.op = color == 0 ? PictOpClear : PictOpSrc;
616003b705cfSriastradh	tmp.dst.pixmap = dst;
616103b705cfSriastradh	tmp.dst.width = dst->drawable.width;
616203b705cfSriastradh	tmp.dst.height = dst->drawable.height;
616303b705cfSriastradh	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
616403b705cfSriastradh	tmp.dst.bo = bo;
616503b705cfSriastradh	tmp.floats_per_vertex = 2;
616603b705cfSriastradh	tmp.floats_per_rect = 6;
616703b705cfSriastradh	tmp.need_magic_ca_pass = 0;
616803b705cfSriastradh	tmp.has_component_alpha = 0;
616903b705cfSriastradh	tmp.rb_reversed = 0;
617003b705cfSriastradh
617103b705cfSriastradh	gen3_init_solid(&tmp.src,
617203b705cfSriastradh			sna_rgba_for_color(color, dst->drawable.depth));
617303b705cfSriastradh	tmp.mask.bo = NULL;
617403b705cfSriastradh	tmp.mask.u.gen3.type = SHADER_NONE;
617503b705cfSriastradh	tmp.u.gen3.num_constants = 0;
617603b705cfSriastradh
617703b705cfSriastradh	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
617803b705cfSriastradh		kgem_submit(&sna->kgem);
617942542f5fSchristos
618003b705cfSriastradh		if (gen3_render_fill_one_try_blt(sna, dst, bo, color,
618103b705cfSriastradh						 x1, y1, x2, y2, alu))
618203b705cfSriastradh			return true;
618342542f5fSchristos
618442542f5fSchristos		if (!kgem_check_bo(&sna->kgem, bo, NULL))
618542542f5fSchristos			return false;
618603b705cfSriastradh	}
618703b705cfSriastradh
618803b705cfSriastradh	gen3_align_vertex(sna, &tmp);
618942542f5fSchristos	gen3_emit_composite_state(sna, &tmp);
619003b705cfSriastradh	gen3_get_rectangles(sna, &tmp, 1);
619103b705cfSriastradh	DBG(("	(%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color));
619203b705cfSriastradh	OUT_VERTEX(x2);
619303b705cfSriastradh	OUT_VERTEX(y2);
619403b705cfSriastradh	OUT_VERTEX(x1);
619503b705cfSriastradh	OUT_VERTEX(y2);
619603b705cfSriastradh	OUT_VERTEX(x1);
619703b705cfSriastradh	OUT_VERTEX(y1);
619803b705cfSriastradh	gen3_vertex_flush(sna);
619903b705cfSriastradh
620003b705cfSriastradh	return true;
620103b705cfSriastradh}
620203b705cfSriastradh
620303b705cfSriastradhstatic void gen3_render_flush(struct sna *sna)
620403b705cfSriastradh{
620503b705cfSriastradh	gen3_vertex_close(sna);
620603b705cfSriastradh
620703b705cfSriastradh	assert(sna->render.vertex_reloc[0] == 0);
620803b705cfSriastradh	assert(sna->render.vertex_offset == 0);
620903b705cfSriastradh}
621003b705cfSriastradh
621103b705cfSriastradhstatic void
621203b705cfSriastradhgen3_render_fini(struct sna *sna)
621303b705cfSriastradh{
621403b705cfSriastradh}
621503b705cfSriastradh
621603b705cfSriastradhconst char *gen3_render_init(struct sna *sna, const char *backend)
621703b705cfSriastradh{
621803b705cfSriastradh	struct sna_render *render = &sna->render;
621903b705cfSriastradh
622003b705cfSriastradh#if !NO_COMPOSITE
622103b705cfSriastradh	render->composite = gen3_render_composite;
622203b705cfSriastradh	render->prefer_gpu |= PREFER_GPU_RENDER;
622303b705cfSriastradh#endif
622403b705cfSriastradh#if !NO_COMPOSITE_SPANS
622503b705cfSriastradh	render->check_composite_spans = gen3_check_composite_spans;
622603b705cfSriastradh	render->composite_spans = gen3_render_composite_spans;
622703b705cfSriastradh	render->prefer_gpu |= PREFER_GPU_SPANS;
622803b705cfSriastradh#endif
622903b705cfSriastradh
623003b705cfSriastradh	render->video = gen3_render_video;
623103b705cfSriastradh
623203b705cfSriastradh	render->copy_boxes = gen3_render_copy_boxes;
623303b705cfSriastradh	render->copy = gen3_render_copy;
623403b705cfSriastradh
623503b705cfSriastradh	render->fill_boxes = gen3_render_fill_boxes;
623603b705cfSriastradh	render->fill = gen3_render_fill;
623703b705cfSriastradh	render->fill_one = gen3_render_fill_one;
623803b705cfSriastradh
623903b705cfSriastradh	render->reset = gen3_render_reset;
624003b705cfSriastradh	render->flush = gen3_render_flush;
624103b705cfSriastradh	render->fini = gen3_render_fini;
624203b705cfSriastradh
624303b705cfSriastradh	render->max_3d_size = MAX_3D_SIZE;
624403b705cfSriastradh	render->max_3d_pitch = MAX_3D_PITCH;
624503b705cfSriastradh
624603b705cfSriastradh	sna->kgem.retire = gen3_render_retire;
624703b705cfSriastradh	sna->kgem.expire = gen3_render_expire;
624803b705cfSriastradh	return "Alviso (gen3)";
624903b705cfSriastradh}
6250