103b705cfSriastradh/*
203b705cfSriastradh * Based on code from intel_uxa.c and i830_xaa.c
303b705cfSriastradh * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
403b705cfSriastradh * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
503b705cfSriastradh * Copyright (c) 2009-2011 Intel Corporation
603b705cfSriastradh *
703b705cfSriastradh * Permission is hereby granted, free of charge, to any person obtaining a
803b705cfSriastradh * copy of this software and associated documentation files (the "Software"),
903b705cfSriastradh * to deal in the Software without restriction, including without limitation
1003b705cfSriastradh * the rights to use, copy, modify, merge, publish, distribute, sublicense,
1103b705cfSriastradh * and/or sell copies of the Software, and to permit persons to whom the
1203b705cfSriastradh * Software is furnished to do so, subject to the following conditions:
1303b705cfSriastradh *
1403b705cfSriastradh * The above copyright notice and this permission notice (including the next
1503b705cfSriastradh * paragraph) shall be included in all copies or substantial portions of the
1603b705cfSriastradh * Software.
1703b705cfSriastradh *
1803b705cfSriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1903b705cfSriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2003b705cfSriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
2103b705cfSriastradh * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2203b705cfSriastradh * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2303b705cfSriastradh * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2403b705cfSriastradh * SOFTWARE.
2503b705cfSriastradh *
2603b705cfSriastradh * Authors:
2703b705cfSriastradh *    Chris Wilson <chris@chris-wilson.co.uk>
2803b705cfSriastradh *
2903b705cfSriastradh */
3003b705cfSriastradh
3103b705cfSriastradh#ifdef HAVE_CONFIG_H
3203b705cfSriastradh#include "config.h"
3303b705cfSriastradh#endif
3403b705cfSriastradh
3503b705cfSriastradh#include "sna.h"
3603b705cfSriastradh#include "sna_render.h"
3703b705cfSriastradh#include "sna_render_inline.h"
3803b705cfSriastradh#include "sna_reg.h"
3903b705cfSriastradh#include "rop.h"
4003b705cfSriastradh
4103b705cfSriastradh#define NO_BLT_COMPOSITE 0
4203b705cfSriastradh#define NO_BLT_COPY 0
4303b705cfSriastradh#define NO_BLT_COPY_BOXES 0
4403b705cfSriastradh#define NO_BLT_FILL 0
4503b705cfSriastradh#define NO_BLT_FILL_BOXES 0
4603b705cfSriastradh
4742542f5fSchristos#ifndef PICT_TYPE_BGRA
4842542f5fSchristos#define PICT_TYPE_BGRA 8
4942542f5fSchristos#endif
5042542f5fSchristos
5103b705cfSriastradhstatic const uint8_t copy_ROP[] = {
5203b705cfSriastradh	ROP_0,                  /* GXclear */
5303b705cfSriastradh	ROP_DSa,                /* GXand */
5403b705cfSriastradh	ROP_SDna,               /* GXandReverse */
5503b705cfSriastradh	ROP_S,                  /* GXcopy */
5603b705cfSriastradh	ROP_DSna,               /* GXandInverted */
5703b705cfSriastradh	ROP_D,                  /* GXnoop */
5803b705cfSriastradh	ROP_DSx,                /* GXxor */
5903b705cfSriastradh	ROP_DSo,                /* GXor */
6003b705cfSriastradh	ROP_DSon,               /* GXnor */
6103b705cfSriastradh	ROP_DSxn,               /* GXequiv */
6203b705cfSriastradh	ROP_Dn,                 /* GXinvert */
6303b705cfSriastradh	ROP_SDno,               /* GXorReverse */
6403b705cfSriastradh	ROP_Sn,                 /* GXcopyInverted */
6503b705cfSriastradh	ROP_DSno,               /* GXorInverted */
6603b705cfSriastradh	ROP_DSan,               /* GXnand */
6703b705cfSriastradh	ROP_1                   /* GXset */
6803b705cfSriastradh};
6903b705cfSriastradh
7003b705cfSriastradhstatic const uint8_t fill_ROP[] = {
7103b705cfSriastradh	ROP_0,
7203b705cfSriastradh	ROP_DPa,
7303b705cfSriastradh	ROP_PDna,
7403b705cfSriastradh	ROP_P,
7503b705cfSriastradh	ROP_DPna,
7603b705cfSriastradh	ROP_D,
7703b705cfSriastradh	ROP_DPx,
7803b705cfSriastradh	ROP_DPo,
7903b705cfSriastradh	ROP_DPon,
8003b705cfSriastradh	ROP_PDxn,
8103b705cfSriastradh	ROP_Dn,
8203b705cfSriastradh	ROP_PDno,
8303b705cfSriastradh	ROP_Pn,
8403b705cfSriastradh	ROP_DPno,
8503b705cfSriastradh	ROP_DPan,
8603b705cfSriastradh	ROP_1
8703b705cfSriastradh};
8803b705cfSriastradh
89fe8aea9eSmrgstatic void sig_done(struct sna *sna, const struct sna_composite_op *op)
90fe8aea9eSmrg{
91fe8aea9eSmrg	sigtrap_put();
92fe8aea9eSmrg}
93fe8aea9eSmrg
9403b705cfSriastradhstatic void nop_done(struct sna *sna, const struct sna_composite_op *op)
9503b705cfSriastradh{
9603b705cfSriastradh	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
9742542f5fSchristos	if (sna->kgem.nexec > 1 && __kgem_ring_empty(&sna->kgem)) {
9842542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
9903b705cfSriastradh		_kgem_submit(&sna->kgem);
10042542f5fSchristos	}
10103b705cfSriastradh	(void)op;
10203b705cfSriastradh}
10303b705cfSriastradh
10403b705cfSriastradhstatic void gen6_blt_copy_done(struct sna *sna, const struct sna_composite_op *op)
10503b705cfSriastradh{
10603b705cfSriastradh	struct kgem *kgem = &sna->kgem;
10703b705cfSriastradh
10803b705cfSriastradh	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
10903b705cfSriastradh	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
11042542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
11103b705cfSriastradh		_kgem_submit(kgem);
11203b705cfSriastradh		return;
11303b705cfSriastradh	}
11403b705cfSriastradh
11503b705cfSriastradh	if (kgem_check_batch(kgem, 3)) {
11603b705cfSriastradh		uint32_t *b = kgem->batch + kgem->nbatch;
11742542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
11803b705cfSriastradh		b[0] = XY_SETUP_CLIP;
11903b705cfSriastradh		b[1] = b[2] = 0;
12003b705cfSriastradh		kgem->nbatch += 3;
12103b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
12203b705cfSriastradh	}
12303b705cfSriastradh	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
12403b705cfSriastradh	(void)op;
12503b705cfSriastradh}
12603b705cfSriastradh
12703b705cfSriastradhstatic bool sna_blt_fill_init(struct sna *sna,
12803b705cfSriastradh			      struct sna_blt_state *blt,
12903b705cfSriastradh			      struct kgem_bo *bo,
13003b705cfSriastradh			      int bpp,
13103b705cfSriastradh			      uint8_t alu,
13203b705cfSriastradh			      uint32_t pixel)
13303b705cfSriastradh{
13403b705cfSriastradh	struct kgem *kgem = &sna->kgem;
13503b705cfSriastradh
13603b705cfSriastradh	assert(kgem_bo_can_blt (kgem, bo));
13703b705cfSriastradh	blt->bo[0] = bo;
13803b705cfSriastradh
13903b705cfSriastradh	blt->br13 = bo->pitch;
14003b705cfSriastradh	blt->cmd = XY_SCANLINE_BLT;
14103b705cfSriastradh	if (kgem->gen >= 040 && bo->tiling) {
14203b705cfSriastradh		blt->cmd |= BLT_DST_TILED;
14303b705cfSriastradh		blt->br13 >>= 2;
14403b705cfSriastradh	}
14503b705cfSriastradh	assert(blt->br13 <= MAXSHORT);
14603b705cfSriastradh
14703b705cfSriastradh	if (alu == GXclear)
14803b705cfSriastradh		pixel = 0;
14903b705cfSriastradh	else if (alu == GXcopy) {
15003b705cfSriastradh		if (pixel == 0)
15103b705cfSriastradh			alu = GXclear;
15203b705cfSriastradh		else if (pixel == -1)
15303b705cfSriastradh			alu = GXset;
15403b705cfSriastradh	}
15503b705cfSriastradh
15603b705cfSriastradh	blt->br13 |= 1<<31 | (fill_ROP[alu] << 16);
15703b705cfSriastradh	switch (bpp) {
15803b705cfSriastradh	default: assert(0);
15903b705cfSriastradh	case 32: blt->br13 |= 1 << 25; /* RGB8888 */
16003b705cfSriastradh	case 16: blt->br13 |= 1 << 24; /* RGB565 */
16103b705cfSriastradh	case 8: break;
16203b705cfSriastradh	}
16303b705cfSriastradh
16403b705cfSriastradh	blt->pixel = pixel;
16503b705cfSriastradh	blt->bpp = bpp;
16642542f5fSchristos	blt->alu = alu;
16703b705cfSriastradh
16803b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, bo);
16942542f5fSchristos	if (!kgem_check_batch(kgem, 14) ||
17003b705cfSriastradh	    !kgem_check_bo_fenced(kgem, bo)) {
17103b705cfSriastradh		kgem_submit(kgem);
17203b705cfSriastradh		if (!kgem_check_bo_fenced(kgem, bo))
17303b705cfSriastradh			return false;
17403b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
17503b705cfSriastradh	}
17603b705cfSriastradh
17703b705cfSriastradh	if (sna->blt_state.fill_bo != bo->unique_id ||
17803b705cfSriastradh	    sna->blt_state.fill_pixel != pixel ||
17903b705cfSriastradh	    sna->blt_state.fill_alu != alu)
18003b705cfSriastradh	{
18103b705cfSriastradh		uint32_t *b;
18203b705cfSriastradh
18313496ba1Ssnj		if (!kgem_check_batch(kgem, 24) ||
18413496ba1Ssnj		    !kgem_check_reloc(kgem, 1)) {
18503b705cfSriastradh			_kgem_submit(kgem);
18642542f5fSchristos			if (!kgem_check_bo_fenced(kgem, bo))
18742542f5fSchristos				return false;
18803b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
18903b705cfSriastradh		}
190fe8aea9eSmrg		kgem_bcs_set_tiling(kgem, NULL, bo);
19103b705cfSriastradh
19242542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
19303b705cfSriastradh		b = kgem->batch + kgem->nbatch;
19442542f5fSchristos		if (sna->kgem.gen >= 0100) {
19542542f5fSchristos			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
19642542f5fSchristos			if (bpp == 32)
19742542f5fSchristos				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
19842542f5fSchristos			if (bo->tiling)
19942542f5fSchristos				b[0] |= BLT_DST_TILED;
20042542f5fSchristos			b[1] = blt->br13;
20142542f5fSchristos			b[2] = 0;
20242542f5fSchristos			b[3] = 0;
20342542f5fSchristos			*(uint64_t *)(b+4) =
20442542f5fSchristos				kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
20542542f5fSchristos						 I915_GEM_DOMAIN_RENDER << 16 |
20642542f5fSchristos						 I915_GEM_DOMAIN_RENDER |
20742542f5fSchristos						 KGEM_RELOC_FENCED,
20842542f5fSchristos						 0);
20942542f5fSchristos			b[6] = pixel;
21042542f5fSchristos			b[7] = pixel;
21142542f5fSchristos			b[8] = 0;
21242542f5fSchristos			b[9] = 0;
21342542f5fSchristos			kgem->nbatch += 10;
21442542f5fSchristos		} else {
21542542f5fSchristos			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
21642542f5fSchristos			if (bpp == 32)
21742542f5fSchristos				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
21842542f5fSchristos			if (bo->tiling && kgem->gen >= 040)
21942542f5fSchristos				b[0] |= BLT_DST_TILED;
22042542f5fSchristos			b[1] = blt->br13;
22142542f5fSchristos			b[2] = 0;
22242542f5fSchristos			b[3] = 0;
22342542f5fSchristos			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
22442542f5fSchristos					      I915_GEM_DOMAIN_RENDER << 16 |
22542542f5fSchristos					      I915_GEM_DOMAIN_RENDER |
22642542f5fSchristos					      KGEM_RELOC_FENCED,
22742542f5fSchristos					      0);
22842542f5fSchristos			b[5] = pixel;
22942542f5fSchristos			b[6] = pixel;
23042542f5fSchristos			b[7] = 0;
23142542f5fSchristos			b[8] = 0;
23242542f5fSchristos			kgem->nbatch += 9;
23342542f5fSchristos		}
23403b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
23503b705cfSriastradh
23603b705cfSriastradh		sna->blt_state.fill_bo = bo->unique_id;
23703b705cfSriastradh		sna->blt_state.fill_pixel = pixel;
23803b705cfSriastradh		sna->blt_state.fill_alu = alu;
23903b705cfSriastradh	}
24003b705cfSriastradh
24113496ba1Ssnj	assert(sna->kgem.mode == KGEM_BLT);
24203b705cfSriastradh	return true;
24303b705cfSriastradh}
24403b705cfSriastradh
245fe8aea9eSmrgnoinline static void __sna_blt_fill_begin(struct sna *sna,
246fe8aea9eSmrg					  const struct sna_blt_state *blt)
24703b705cfSriastradh{
24803b705cfSriastradh	struct kgem *kgem = &sna->kgem;
24903b705cfSriastradh	uint32_t *b;
25003b705cfSriastradh
251fe8aea9eSmrg	kgem_bcs_set_tiling(&sna->kgem, NULL, blt->bo[0]);
25203b705cfSriastradh
25342542f5fSchristos	assert(kgem->mode == KGEM_BLT);
25442542f5fSchristos	b = kgem->batch + kgem->nbatch;
25542542f5fSchristos	if (sna->kgem.gen >= 0100) {
25642542f5fSchristos		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
25742542f5fSchristos		if (blt->bpp == 32)
25842542f5fSchristos			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
25942542f5fSchristos		if (blt->bo[0]->tiling)
26042542f5fSchristos			b[0] |= BLT_DST_TILED;
26142542f5fSchristos		b[1] = blt->br13;
26242542f5fSchristos		b[2] = 0;
26342542f5fSchristos		b[3] = 0;
264fe8aea9eSmrg		*(uint64_t *)(b+4) =
26542542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[0],
26642542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
26742542f5fSchristos					 I915_GEM_DOMAIN_RENDER |
26842542f5fSchristos					 KGEM_RELOC_FENCED,
26942542f5fSchristos					 0);
27042542f5fSchristos		b[6] = blt->pixel;
27142542f5fSchristos		b[7] = blt->pixel;
27242542f5fSchristos		b[8] = 0;
27342542f5fSchristos		b[9] = 0;
27442542f5fSchristos		kgem->nbatch += 10;
27542542f5fSchristos	} else {
27642542f5fSchristos		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
27742542f5fSchristos		if (blt->bpp == 32)
27842542f5fSchristos			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
27942542f5fSchristos		if (blt->bo[0]->tiling && kgem->gen >= 040)
28042542f5fSchristos			b[0] |= BLT_DST_TILED;
28142542f5fSchristos		b[1] = blt->br13;
28242542f5fSchristos		b[2] = 0;
28342542f5fSchristos		b[3] = 0;
28442542f5fSchristos		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
28542542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
28642542f5fSchristos				      I915_GEM_DOMAIN_RENDER |
28742542f5fSchristos				      KGEM_RELOC_FENCED,
28842542f5fSchristos				      0);
28942542f5fSchristos		b[5] = blt->pixel;
29042542f5fSchristos		b[6] = blt->pixel;
29142542f5fSchristos		b[7] = 0;
29242542f5fSchristos		b[8] = 0;
29342542f5fSchristos		kgem->nbatch += 9;
29442542f5fSchristos	}
29503b705cfSriastradh}
29603b705cfSriastradh
297fe8aea9eSmrginline static void sna_blt_fill_begin(struct sna *sna,
298fe8aea9eSmrg				      const struct sna_blt_state *blt)
299fe8aea9eSmrg{
300fe8aea9eSmrg	struct kgem *kgem = &sna->kgem;
301fe8aea9eSmrg
302fe8aea9eSmrg	if (kgem->nreloc) {
303fe8aea9eSmrg		_kgem_submit(kgem);
304fe8aea9eSmrg		_kgem_set_mode(kgem, KGEM_BLT);
305fe8aea9eSmrg		kgem_bcs_set_tiling(kgem, NULL, blt->bo[0]);
306fe8aea9eSmrg		assert(kgem->nbatch == 0);
307fe8aea9eSmrg	}
308fe8aea9eSmrg
309fe8aea9eSmrg	__sna_blt_fill_begin(sna, blt);
310fe8aea9eSmrg}
311fe8aea9eSmrg
31203b705cfSriastradhinline static void sna_blt_fill_one(struct sna *sna,
31303b705cfSriastradh				    const struct sna_blt_state *blt,
31403b705cfSriastradh				    int16_t x, int16_t y,
31503b705cfSriastradh				    int16_t width, int16_t height)
31603b705cfSriastradh{
31703b705cfSriastradh	struct kgem *kgem = &sna->kgem;
31803b705cfSriastradh	uint32_t *b;
31903b705cfSriastradh
32003b705cfSriastradh	DBG(("%s: (%d, %d) x (%d, %d): %08x\n",
32103b705cfSriastradh	     __FUNCTION__, x, y, width, height, blt->pixel));
32203b705cfSriastradh
32303b705cfSriastradh	assert(x >= 0);
32403b705cfSriastradh	assert(y >= 0);
32503b705cfSriastradh	assert((y+height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
32603b705cfSriastradh
32703b705cfSriastradh	if (!kgem_check_batch(kgem, 3))
32803b705cfSriastradh		sna_blt_fill_begin(sna, blt);
32903b705cfSriastradh
33042542f5fSchristos	assert(sna->kgem.mode == KGEM_BLT);
33103b705cfSriastradh	b = kgem->batch + kgem->nbatch;
33203b705cfSriastradh	kgem->nbatch += 3;
33303b705cfSriastradh	assert(kgem->nbatch < kgem->surface);
33403b705cfSriastradh
33503b705cfSriastradh	b[0] = blt->cmd;
33603b705cfSriastradh	b[1] = y << 16 | x;
33703b705cfSriastradh	b[2] = b[1] + (height << 16 | width);
33803b705cfSriastradh}
33903b705cfSriastradh
34003b705cfSriastradhstatic bool sna_blt_copy_init(struct sna *sna,
34103b705cfSriastradh			      struct sna_blt_state *blt,
34203b705cfSriastradh			      struct kgem_bo *src,
34303b705cfSriastradh			      struct kgem_bo *dst,
34403b705cfSriastradh			      int bpp,
34503b705cfSriastradh			      uint8_t alu)
34603b705cfSriastradh{
34703b705cfSriastradh	struct kgem *kgem = &sna->kgem;
34803b705cfSriastradh
349fe8aea9eSmrg	assert(kgem_bo_can_blt(kgem, src));
350fe8aea9eSmrg	assert(kgem_bo_can_blt(kgem, dst));
35103b705cfSriastradh
35203b705cfSriastradh	blt->bo[0] = src;
35303b705cfSriastradh	blt->bo[1] = dst;
35403b705cfSriastradh
35542542f5fSchristos	blt->cmd = XY_SRC_COPY_BLT_CMD | (kgem->gen >= 0100 ? 8 : 6);
35603b705cfSriastradh	if (bpp == 32)
35703b705cfSriastradh		blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
35803b705cfSriastradh
35903b705cfSriastradh	blt->pitch[0] = src->pitch;
36003b705cfSriastradh	if (kgem->gen >= 040 && src->tiling) {
36103b705cfSriastradh		blt->cmd |= BLT_SRC_TILED;
36203b705cfSriastradh		blt->pitch[0] >>= 2;
36303b705cfSriastradh	}
36403b705cfSriastradh	assert(blt->pitch[0] <= MAXSHORT);
36503b705cfSriastradh
36603b705cfSriastradh	blt->pitch[1] = dst->pitch;
36703b705cfSriastradh	if (kgem->gen >= 040 && dst->tiling) {
36803b705cfSriastradh		blt->cmd |= BLT_DST_TILED;
36903b705cfSriastradh		blt->pitch[1] >>= 2;
37003b705cfSriastradh	}
37103b705cfSriastradh	assert(blt->pitch[1] <= MAXSHORT);
37203b705cfSriastradh
37303b705cfSriastradh	blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
37403b705cfSriastradh	blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1];
37503b705cfSriastradh	switch (bpp) {
37603b705cfSriastradh	default: assert(0);
37703b705cfSriastradh	case 32: blt->br13 |= 1 << 25; /* RGB8888 */
37803b705cfSriastradh	case 16: blt->br13 |= 1 << 24; /* RGB565 */
37903b705cfSriastradh	case 8: break;
38003b705cfSriastradh	}
38103b705cfSriastradh
38203b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, dst);
38303b705cfSriastradh	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
38403b705cfSriastradh		kgem_submit(kgem);
38503b705cfSriastradh		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
38603b705cfSriastradh			return false;
38703b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
38803b705cfSriastradh	}
389fe8aea9eSmrg	kgem_bcs_set_tiling(&sna->kgem, src, dst);
39003b705cfSriastradh
39103b705cfSriastradh	sna->blt_state.fill_bo = 0;
39203b705cfSriastradh	return true;
39303b705cfSriastradh}
39403b705cfSriastradh
39503b705cfSriastradhstatic bool sna_blt_alpha_fixup_init(struct sna *sna,
39603b705cfSriastradh				     struct sna_blt_state *blt,
39703b705cfSriastradh				     struct kgem_bo *src,
39803b705cfSriastradh				     struct kgem_bo *dst,
39903b705cfSriastradh				     int bpp, uint32_t alpha)
40003b705cfSriastradh{
40103b705cfSriastradh	struct kgem *kgem = &sna->kgem;
40203b705cfSriastradh
40342542f5fSchristos	DBG(("%s: dst handle=%d, src handle=%d, bpp=%d, fixup=%08x\n",
40442542f5fSchristos	     __FUNCTION__, dst->handle, src->handle, bpp, alpha));
40542542f5fSchristos	assert(kgem_bo_can_blt(kgem, src));
40642542f5fSchristos	assert(kgem_bo_can_blt(kgem, dst));
40703b705cfSriastradh
40803b705cfSriastradh	blt->bo[0] = src;
40903b705cfSriastradh	blt->bo[1] = dst;
41003b705cfSriastradh
41142542f5fSchristos	blt->cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
41203b705cfSriastradh	blt->pitch[0] = src->pitch;
41303b705cfSriastradh	if (kgem->gen >= 040 && src->tiling) {
41403b705cfSriastradh		blt->cmd |= BLT_SRC_TILED;
41503b705cfSriastradh		blt->pitch[0] >>= 2;
41603b705cfSriastradh	}
41703b705cfSriastradh	assert(blt->pitch[0] <= MAXSHORT);
41803b705cfSriastradh
41903b705cfSriastradh	blt->pitch[1] = dst->pitch;
42003b705cfSriastradh	if (kgem->gen >= 040 && dst->tiling) {
42103b705cfSriastradh		blt->cmd |= BLT_DST_TILED;
42203b705cfSriastradh		blt->pitch[1] >>= 2;
42303b705cfSriastradh	}
42403b705cfSriastradh	assert(blt->pitch[1] <= MAXSHORT);
42503b705cfSriastradh
42603b705cfSriastradh	blt->overwrites = 1;
42703b705cfSriastradh	blt->br13 = (0xfc << 16) | blt->pitch[1];
42803b705cfSriastradh	switch (bpp) {
42903b705cfSriastradh	default: assert(0);
43003b705cfSriastradh	case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
43103b705cfSriastradh		 blt->br13 |= 1 << 25; /* RGB8888 */
43203b705cfSriastradh	case 16: blt->br13 |= 1 << 24; /* RGB565 */
43303b705cfSriastradh	case 8: break;
43403b705cfSriastradh	}
43503b705cfSriastradh	blt->pixel = alpha;
43603b705cfSriastradh
43703b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, dst);
43803b705cfSriastradh	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
43903b705cfSriastradh		kgem_submit(kgem);
44003b705cfSriastradh		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
44103b705cfSriastradh			return false;
44203b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
44303b705cfSriastradh	}
444fe8aea9eSmrg	kgem_bcs_set_tiling(&sna->kgem, src, dst);
44503b705cfSriastradh
44603b705cfSriastradh	sna->blt_state.fill_bo = 0;
44703b705cfSriastradh	return true;
44803b705cfSriastradh}
44903b705cfSriastradh
45003b705cfSriastradhstatic void sna_blt_alpha_fixup_one(struct sna *sna,
45103b705cfSriastradh				    const struct sna_blt_state *blt,
45203b705cfSriastradh				    int src_x, int src_y,
45303b705cfSriastradh				    int width, int height,
45403b705cfSriastradh				    int dst_x, int dst_y)
45503b705cfSriastradh{
45603b705cfSriastradh	struct kgem *kgem = &sna->kgem;
45703b705cfSriastradh	uint32_t *b;
45803b705cfSriastradh
45903b705cfSriastradh	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
46003b705cfSriastradh	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
46103b705cfSriastradh
46203b705cfSriastradh	assert(src_x >= 0);
46303b705cfSriastradh	assert(src_y >= 0);
46403b705cfSriastradh	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
46503b705cfSriastradh	assert(dst_x >= 0);
46603b705cfSriastradh	assert(dst_y >= 0);
46703b705cfSriastradh	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
46803b705cfSriastradh	assert(width > 0);
46903b705cfSriastradh	assert(height > 0);
47003b705cfSriastradh
47142542f5fSchristos	if (!kgem_check_batch(kgem, 14) ||
47203b705cfSriastradh	    !kgem_check_reloc(kgem, 2)) {
47303b705cfSriastradh		_kgem_submit(kgem);
47403b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
475fe8aea9eSmrg		kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]);
47603b705cfSriastradh	}
47703b705cfSriastradh
47842542f5fSchristos	assert(sna->kgem.mode == KGEM_BLT);
47903b705cfSriastradh	b = kgem->batch + kgem->nbatch;
48003b705cfSriastradh	b[0] = blt->cmd;
48103b705cfSriastradh	b[1] = blt->br13;
48203b705cfSriastradh	b[2] = (dst_y << 16) | dst_x;
48303b705cfSriastradh	b[3] = ((dst_y + height) << 16) | (dst_x + width);
48442542f5fSchristos	if (sna->kgem.gen >= 0100) {
48542542f5fSchristos		*(uint64_t *)(b+4) =
48642542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
48742542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
48842542f5fSchristos					 I915_GEM_DOMAIN_RENDER |
48942542f5fSchristos					 KGEM_RELOC_FENCED,
49042542f5fSchristos					 0);
49142542f5fSchristos		b[6] = blt->pitch[0];
49242542f5fSchristos		b[7] = (src_y << 16) | src_x;
49342542f5fSchristos		*(uint64_t *)(b+8) =
49442542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
49542542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
49642542f5fSchristos					 KGEM_RELOC_FENCED,
49742542f5fSchristos					 0);
49842542f5fSchristos		b[10] = blt->pixel;
49942542f5fSchristos		b[11] = blt->pixel;
50042542f5fSchristos		b[12] = 0;
50142542f5fSchristos		b[13] = 0;
50242542f5fSchristos		kgem->nbatch += 14;
50342542f5fSchristos	} else {
50442542f5fSchristos		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
50542542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
50642542f5fSchristos				      I915_GEM_DOMAIN_RENDER |
50742542f5fSchristos				      KGEM_RELOC_FENCED,
50842542f5fSchristos				      0);
50942542f5fSchristos		b[5] = blt->pitch[0];
51042542f5fSchristos		b[6] = (src_y << 16) | src_x;
51142542f5fSchristos		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
51242542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
51342542f5fSchristos				      KGEM_RELOC_FENCED,
51442542f5fSchristos				      0);
51542542f5fSchristos		b[8] = blt->pixel;
51642542f5fSchristos		b[9] = blt->pixel;
51742542f5fSchristos		b[10] = 0;
51842542f5fSchristos		b[11] = 0;
51942542f5fSchristos		kgem->nbatch += 12;
52042542f5fSchristos	}
52103b705cfSriastradh	assert(kgem->nbatch < kgem->surface);
52203b705cfSriastradh}
52303b705cfSriastradh
52403b705cfSriastradhstatic void sna_blt_copy_one(struct sna *sna,
52503b705cfSriastradh			     const struct sna_blt_state *blt,
52603b705cfSriastradh			     int src_x, int src_y,
52703b705cfSriastradh			     int width, int height,
52803b705cfSriastradh			     int dst_x, int dst_y)
52903b705cfSriastradh{
53003b705cfSriastradh	struct kgem *kgem = &sna->kgem;
53103b705cfSriastradh	uint32_t *b;
53203b705cfSriastradh
53303b705cfSriastradh	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
53403b705cfSriastradh	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
53503b705cfSriastradh
53603b705cfSriastradh	assert(src_x >= 0);
53703b705cfSriastradh	assert(src_y >= 0);
53803b705cfSriastradh	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
53903b705cfSriastradh	assert(dst_x >= 0);
54003b705cfSriastradh	assert(dst_y >= 0);
54103b705cfSriastradh	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
54203b705cfSriastradh	assert(width > 0);
54303b705cfSriastradh	assert(height > 0);
54403b705cfSriastradh
54503b705cfSriastradh	/* Compare against a previous fill */
54642542f5fSchristos	if (blt->overwrites &&
54703b705cfSriastradh	    kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) {
54842542f5fSchristos		if (sna->kgem.gen >= 0100) {
54942542f5fSchristos			if (kgem->nbatch >= 7 &&
55042542f5fSchristos			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
55142542f5fSchristos			    kgem->batch[kgem->nbatch-5] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
55242542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
55342542f5fSchristos				DBG(("%s: replacing last fill\n", __FUNCTION__));
55442542f5fSchristos				if (kgem_check_batch(kgem, 3)) {
55542542f5fSchristos					assert(kgem->mode == KGEM_BLT);
55642542f5fSchristos					b = kgem->batch + kgem->nbatch - 7;
55742542f5fSchristos					b[0] = blt->cmd;
55842542f5fSchristos					b[1] = blt->br13;
55942542f5fSchristos					b[6] = (src_y << 16) | src_x;
56042542f5fSchristos					b[7] = blt->pitch[0];
56142542f5fSchristos					*(uint64_t *)(b+8) =
56242542f5fSchristos						kgem_add_reloc64(kgem, kgem->nbatch + 8 - 7, blt->bo[0],
56342542f5fSchristos								 I915_GEM_DOMAIN_RENDER << 16 |
56442542f5fSchristos								 KGEM_RELOC_FENCED,
56542542f5fSchristos								 0);
56642542f5fSchristos					kgem->nbatch += 3;
56742542f5fSchristos					assert(kgem->nbatch < kgem->surface);
56842542f5fSchristos					return;
56942542f5fSchristos				}
57042542f5fSchristos				kgem->nbatch -= 7;
57142542f5fSchristos				kgem->nreloc--;
57242542f5fSchristos			}
57342542f5fSchristos		} else {
57442542f5fSchristos			if (kgem->nbatch >= 6 &&
57542542f5fSchristos			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
57642542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
57742542f5fSchristos			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
57842542f5fSchristos				DBG(("%s: replacing last fill\n", __FUNCTION__));
57942542f5fSchristos				if (kgem_check_batch(kgem, 8-6)) {
58042542f5fSchristos					assert(kgem->mode == KGEM_BLT);
58142542f5fSchristos					b = kgem->batch + kgem->nbatch - 6;
58242542f5fSchristos					b[0] = blt->cmd;
58342542f5fSchristos					b[1] = blt->br13;
58442542f5fSchristos					b[5] = (src_y << 16) | src_x;
58542542f5fSchristos					b[6] = blt->pitch[0];
58642542f5fSchristos					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, blt->bo[0],
58742542f5fSchristos							      I915_GEM_DOMAIN_RENDER << 16 |
58842542f5fSchristos							      KGEM_RELOC_FENCED,
58942542f5fSchristos							      0);
59042542f5fSchristos					kgem->nbatch += 8 - 6;
59142542f5fSchristos					assert(kgem->nbatch < kgem->surface);
59242542f5fSchristos					return;
59342542f5fSchristos				}
59442542f5fSchristos				kgem->nbatch -= 6;
59542542f5fSchristos				kgem->nreloc--;
59642542f5fSchristos			}
59703b705cfSriastradh		}
59803b705cfSriastradh	}
59903b705cfSriastradh
60042542f5fSchristos	if (!kgem_check_batch(kgem, 10) ||
60103b705cfSriastradh	    !kgem_check_reloc(kgem, 2)) {
60203b705cfSriastradh		_kgem_submit(kgem);
60303b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
604fe8aea9eSmrg		kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]);
60503b705cfSriastradh	}
60603b705cfSriastradh
60742542f5fSchristos	assert(sna->kgem.mode == KGEM_BLT);
60803b705cfSriastradh	b = kgem->batch + kgem->nbatch;
60903b705cfSriastradh	b[0] = blt->cmd;
61003b705cfSriastradh	b[1] = blt->br13;
61103b705cfSriastradh	b[2] = (dst_y << 16) | dst_x;
61203b705cfSriastradh	b[3] = ((dst_y + height) << 16) | (dst_x + width);
61342542f5fSchristos	if (kgem->gen >= 0100) {
61442542f5fSchristos		*(uint64_t *)(b+4) =
61542542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
61642542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
61742542f5fSchristos					 I915_GEM_DOMAIN_RENDER |
61842542f5fSchristos					 KGEM_RELOC_FENCED,
61942542f5fSchristos					 0);
62042542f5fSchristos		b[6] = (src_y << 16) | src_x;
62142542f5fSchristos		b[7] = blt->pitch[0];
62242542f5fSchristos		*(uint64_t *)(b+8) =
62342542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
62442542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
62542542f5fSchristos					 KGEM_RELOC_FENCED,
62642542f5fSchristos					 0);
62742542f5fSchristos		kgem->nbatch += 10;
62842542f5fSchristos	} else {
62942542f5fSchristos		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
63042542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
63142542f5fSchristos				      I915_GEM_DOMAIN_RENDER |
63242542f5fSchristos				      KGEM_RELOC_FENCED,
63342542f5fSchristos				      0);
63442542f5fSchristos		b[5] = (src_y << 16) | src_x;
63542542f5fSchristos		b[6] = blt->pitch[0];
63642542f5fSchristos		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
63742542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
63842542f5fSchristos				      KGEM_RELOC_FENCED,
63942542f5fSchristos				      0);
64042542f5fSchristos		kgem->nbatch += 8;
64142542f5fSchristos	}
64203b705cfSriastradh	assert(kgem->nbatch < kgem->surface);
64303b705cfSriastradh}
64403b705cfSriastradh
64503b705cfSriastradhbool
64603b705cfSriastradhsna_get_rgba_from_pixel(uint32_t pixel,
64703b705cfSriastradh			uint16_t *red,
64803b705cfSriastradh			uint16_t *green,
64903b705cfSriastradh			uint16_t *blue,
65003b705cfSriastradh			uint16_t *alpha,
65103b705cfSriastradh			uint32_t format)
65203b705cfSriastradh{
65303b705cfSriastradh	int rbits, bbits, gbits, abits;
65403b705cfSriastradh	int rshift, bshift, gshift, ashift;
65503b705cfSriastradh
65603b705cfSriastradh	rbits = PICT_FORMAT_R(format);
65703b705cfSriastradh	gbits = PICT_FORMAT_G(format);
65803b705cfSriastradh	bbits = PICT_FORMAT_B(format);
65903b705cfSriastradh	abits = PICT_FORMAT_A(format);
66003b705cfSriastradh
66103b705cfSriastradh	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
66203b705cfSriastradh		rshift = gshift = bshift = ashift = 0;
66303b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
66403b705cfSriastradh		bshift = 0;
66503b705cfSriastradh		gshift = bbits;
66603b705cfSriastradh		rshift = gshift + gbits;
66703b705cfSriastradh		ashift = rshift + rbits;
66803b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
66903b705cfSriastradh		rshift = 0;
67003b705cfSriastradh		gshift = rbits;
67103b705cfSriastradh		bshift = gshift + gbits;
67203b705cfSriastradh		ashift = bshift + bbits;
67303b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
67403b705cfSriastradh		ashift = 0;
67503b705cfSriastradh		rshift = abits;
67603b705cfSriastradh		if (abits == 0)
67703b705cfSriastradh			rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
67803b705cfSriastradh		gshift = rshift + rbits;
67903b705cfSriastradh		bshift = gshift + gbits;
68003b705cfSriastradh	} else {
68103b705cfSriastradh		return false;
68203b705cfSriastradh	}
68303b705cfSriastradh
68403b705cfSriastradh	if (rbits) {
68503b705cfSriastradh		*red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits);
68603b705cfSriastradh		while (rbits < 16) {
68703b705cfSriastradh			*red |= *red >> rbits;
68803b705cfSriastradh			rbits <<= 1;
68903b705cfSriastradh		}
69003b705cfSriastradh	} else
69103b705cfSriastradh		*red = 0;
69203b705cfSriastradh
69303b705cfSriastradh	if (gbits) {
69403b705cfSriastradh		*green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits);
69503b705cfSriastradh		while (gbits < 16) {
69603b705cfSriastradh			*green |= *green >> gbits;
69703b705cfSriastradh			gbits <<= 1;
69803b705cfSriastradh		}
69903b705cfSriastradh	} else
70003b705cfSriastradh		*green = 0;
70103b705cfSriastradh
70203b705cfSriastradh	if (bbits) {
70303b705cfSriastradh		*blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits);
70403b705cfSriastradh		while (bbits < 16) {
70503b705cfSriastradh			*blue |= *blue >> bbits;
70603b705cfSriastradh			bbits <<= 1;
70703b705cfSriastradh		}
70803b705cfSriastradh	} else
70903b705cfSriastradh		*blue = 0;
71003b705cfSriastradh
71103b705cfSriastradh	if (abits) {
71203b705cfSriastradh		*alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits);
71303b705cfSriastradh		while (abits < 16) {
71403b705cfSriastradh			*alpha |= *alpha >> abits;
71503b705cfSriastradh			abits <<= 1;
71603b705cfSriastradh		}
71703b705cfSriastradh	} else
71803b705cfSriastradh		*alpha = 0xffff;
71903b705cfSriastradh
72003b705cfSriastradh	return true;
72103b705cfSriastradh}
72203b705cfSriastradh
72303b705cfSriastradhbool
72403b705cfSriastradh_sna_get_pixel_from_rgba(uint32_t * pixel,
72503b705cfSriastradh			uint16_t red,
72603b705cfSriastradh			uint16_t green,
72703b705cfSriastradh			uint16_t blue,
72803b705cfSriastradh			uint16_t alpha,
72903b705cfSriastradh			uint32_t format)
73003b705cfSriastradh{
73103b705cfSriastradh	int rbits, bbits, gbits, abits;
73203b705cfSriastradh	int rshift, bshift, gshift, ashift;
73303b705cfSriastradh
73403b705cfSriastradh	rbits = PICT_FORMAT_R(format);
73503b705cfSriastradh	gbits = PICT_FORMAT_G(format);
73603b705cfSriastradh	bbits = PICT_FORMAT_B(format);
73703b705cfSriastradh	abits = PICT_FORMAT_A(format);
73803b705cfSriastradh	if (abits == 0)
73903b705cfSriastradh	    abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
74003b705cfSriastradh
74103b705cfSriastradh	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
74203b705cfSriastradh		*pixel = alpha >> (16 - abits);
74303b705cfSriastradh		return true;
74403b705cfSriastradh	}
74503b705cfSriastradh
74603b705cfSriastradh	if (!PICT_FORMAT_COLOR(format))
74703b705cfSriastradh		return false;
74803b705cfSriastradh
74903b705cfSriastradh	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
75003b705cfSriastradh		bshift = 0;
75103b705cfSriastradh		gshift = bbits;
75203b705cfSriastradh		rshift = gshift + gbits;
75303b705cfSriastradh		ashift = rshift + rbits;
75403b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
75503b705cfSriastradh		rshift = 0;
75603b705cfSriastradh		gshift = rbits;
75703b705cfSriastradh		bshift = gshift + gbits;
75803b705cfSriastradh		ashift = bshift + bbits;
75903b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
76003b705cfSriastradh		ashift = 0;
76103b705cfSriastradh		rshift = abits;
76203b705cfSriastradh		gshift = rshift + rbits;
76303b705cfSriastradh		bshift = gshift + gbits;
76403b705cfSriastradh	} else
76503b705cfSriastradh		return false;
76603b705cfSriastradh
76703b705cfSriastradh	*pixel = 0;
76803b705cfSriastradh	*pixel |= (blue  >> (16 - bbits)) << bshift;
76903b705cfSriastradh	*pixel |= (green >> (16 - gbits)) << gshift;
77003b705cfSriastradh	*pixel |= (red   >> (16 - rbits)) << rshift;
77103b705cfSriastradh	*pixel |= (alpha >> (16 - abits)) << ashift;
77203b705cfSriastradh
77303b705cfSriastradh	return true;
77403b705cfSriastradh}
77503b705cfSriastradh
77603b705cfSriastradhuint32_t
77703b705cfSriastradhsna_rgba_for_color(uint32_t color, int depth)
77803b705cfSriastradh{
77903b705cfSriastradh	return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8);
78003b705cfSriastradh}
78103b705cfSriastradh
78203b705cfSriastradhuint32_t
78303b705cfSriastradhsna_rgba_to_color(uint32_t rgba, uint32_t format)
78403b705cfSriastradh{
78503b705cfSriastradh	return color_convert(rgba, PICT_a8r8g8b8, format);
78603b705cfSriastradh}
78703b705cfSriastradh
78803b705cfSriastradhstatic uint32_t
78903b705cfSriastradhget_pixel(PicturePtr picture)
79003b705cfSriastradh{
79103b705cfSriastradh	PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
79203b705cfSriastradh
79303b705cfSriastradh	DBG(("%s: %p\n", __FUNCTION__, pixmap));
79403b705cfSriastradh
79503b705cfSriastradh	if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
79603b705cfSriastradh		return 0;
79703b705cfSriastradh
79803b705cfSriastradh	switch (pixmap->drawable.bitsPerPixel) {
79903b705cfSriastradh	case 32: return *(uint32_t *)pixmap->devPrivate.ptr;
80003b705cfSriastradh	case 16: return *(uint16_t *)pixmap->devPrivate.ptr;
80103b705cfSriastradh	default: return *(uint8_t *)pixmap->devPrivate.ptr;
80203b705cfSriastradh	}
80303b705cfSriastradh}
80403b705cfSriastradh
80503b705cfSriastradhstatic uint32_t
80603b705cfSriastradhget_solid_color(PicturePtr picture, uint32_t format)
80703b705cfSriastradh{
80803b705cfSriastradh	if (picture->pSourcePict) {
80903b705cfSriastradh		PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict;
81003b705cfSriastradh		return color_convert(fill->color, PICT_a8r8g8b8, format);
81103b705cfSriastradh	} else
81203b705cfSriastradh		return color_convert(get_pixel(picture), picture->format, format);
81303b705cfSriastradh}
81403b705cfSriastradh
81503b705cfSriastradhstatic bool
81603b705cfSriastradhis_solid(PicturePtr picture)
81703b705cfSriastradh{
81803b705cfSriastradh	if (picture->pSourcePict) {
81903b705cfSriastradh		if (picture->pSourcePict->type == SourcePictTypeSolidFill)
82003b705cfSriastradh			return true;
82103b705cfSriastradh	}
82203b705cfSriastradh
82303b705cfSriastradh	if (picture->pDrawable) {
82403b705cfSriastradh		if (picture->pDrawable->width  == 1 &&
82503b705cfSriastradh		    picture->pDrawable->height == 1 &&
82603b705cfSriastradh		    picture->repeat)
82703b705cfSriastradh			return true;
82803b705cfSriastradh	}
82903b705cfSriastradh
83003b705cfSriastradh	return false;
83103b705cfSriastradh}
83203b705cfSriastradh
83303b705cfSriastradhbool
83403b705cfSriastradhsna_picture_is_solid(PicturePtr picture, uint32_t *color)
83503b705cfSriastradh{
83603b705cfSriastradh	if (!is_solid(picture))
83703b705cfSriastradh		return false;
83803b705cfSriastradh
83903b705cfSriastradh	if (color)
84003b705cfSriastradh		*color = get_solid_color(picture, PICT_a8r8g8b8);
84103b705cfSriastradh	return true;
84203b705cfSriastradh}
84303b705cfSriastradh
84442542f5fSchristosstatic bool
84542542f5fSchristospixel_is_transparent(uint32_t pixel, uint32_t format)
84642542f5fSchristos{
84742542f5fSchristos	unsigned int abits;
84842542f5fSchristos
84942542f5fSchristos	abits = PICT_FORMAT_A(format);
85042542f5fSchristos	if (!abits)
85142542f5fSchristos		return false;
85242542f5fSchristos
85342542f5fSchristos	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
85442542f5fSchristos	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
85542542f5fSchristos		return (pixel & ((1 << abits) - 1)) == 0;
85642542f5fSchristos	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
85742542f5fSchristos		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
85842542f5fSchristos		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
85942542f5fSchristos		return (pixel >> ashift) == 0;
86042542f5fSchristos	} else
86142542f5fSchristos		return false;
86242542f5fSchristos}
86342542f5fSchristos
86403b705cfSriastradhstatic bool
86503b705cfSriastradhpixel_is_opaque(uint32_t pixel, uint32_t format)
86603b705cfSriastradh{
86703b705cfSriastradh	unsigned int abits;
86803b705cfSriastradh
86903b705cfSriastradh	abits = PICT_FORMAT_A(format);
87003b705cfSriastradh	if (!abits)
87103b705cfSriastradh		return true;
87203b705cfSriastradh
87303b705cfSriastradh	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
87403b705cfSriastradh	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
87503b705cfSriastradh		return (pixel & ((1 << abits) - 1)) == (unsigned)((1 << abits) - 1);
87603b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
87703b705cfSriastradh		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
87803b705cfSriastradh		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
87903b705cfSriastradh		return (pixel >> ashift) == (unsigned)((1 << abits) - 1);
88003b705cfSriastradh	} else
88103b705cfSriastradh		return false;
88203b705cfSriastradh}
88303b705cfSriastradh
88403b705cfSriastradhstatic bool
88503b705cfSriastradhpixel_is_white(uint32_t pixel, uint32_t format)
88603b705cfSriastradh{
88703b705cfSriastradh	switch (PICT_FORMAT_TYPE(format)) {
88803b705cfSriastradh	case PICT_TYPE_A:
88903b705cfSriastradh	case PICT_TYPE_ARGB:
89003b705cfSriastradh	case PICT_TYPE_ABGR:
89103b705cfSriastradh	case PICT_TYPE_BGRA:
89203b705cfSriastradh		return pixel == ((1U << PICT_FORMAT_BPP(format)) - 1);
89303b705cfSriastradh	default:
89403b705cfSriastradh		return false;
89503b705cfSriastradh	}
89603b705cfSriastradh}
89703b705cfSriastradh
89803b705cfSriastradhstatic bool
89903b705cfSriastradhis_opaque_solid(PicturePtr picture)
90003b705cfSriastradh{
90103b705cfSriastradh	if (picture->pSourcePict) {
90203b705cfSriastradh		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
90303b705cfSriastradh		return (fill->color >> 24) == 0xff;
90403b705cfSriastradh	} else
90503b705cfSriastradh		return pixel_is_opaque(get_pixel(picture), picture->format);
90603b705cfSriastradh}
90703b705cfSriastradh
90803b705cfSriastradhstatic bool
90903b705cfSriastradhis_white(PicturePtr picture)
91003b705cfSriastradh{
91103b705cfSriastradh	if (picture->pSourcePict) {
91203b705cfSriastradh		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
91303b705cfSriastradh		return fill->color == 0xffffffff;
91403b705cfSriastradh	} else
91503b705cfSriastradh		return pixel_is_white(get_pixel(picture), picture->format);
91603b705cfSriastradh}
91703b705cfSriastradh
91842542f5fSchristosstatic bool
91942542f5fSchristosis_transparent(PicturePtr picture)
92042542f5fSchristos{
92142542f5fSchristos	if (picture->pSourcePict) {
92242542f5fSchristos		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
92342542f5fSchristos		return fill->color == 0;
92442542f5fSchristos	} else
92542542f5fSchristos		return pixel_is_transparent(get_pixel(picture), picture->format);
92642542f5fSchristos}
92742542f5fSchristos
92803b705cfSriastradhbool
92903b705cfSriastradhsna_composite_mask_is_opaque(PicturePtr mask)
93003b705cfSriastradh{
93103b705cfSriastradh	if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format))
93203b705cfSriastradh		return is_solid(mask) && is_white(mask);
93303b705cfSriastradh	else if (!PICT_FORMAT_A(mask->format))
93403b705cfSriastradh		return true;
935fe8aea9eSmrg	else if (mask->pSourcePict) {
936fe8aea9eSmrg		PictSolidFill *fill = (PictSolidFill *) mask->pSourcePict;
937fe8aea9eSmrg		return (fill->color >> 24) == 0xff;
938fe8aea9eSmrg	} else {
939fe8aea9eSmrg		struct sna_pixmap *priv;
940fe8aea9eSmrg		assert(mask->pDrawable);
941fe8aea9eSmrg
942fe8aea9eSmrg		if (mask->pDrawable->width  == 1 &&
943fe8aea9eSmrg		    mask->pDrawable->height == 1 &&
944fe8aea9eSmrg		    mask->repeat)
945fe8aea9eSmrg			return pixel_is_opaque(get_pixel(mask), mask->format);
946fe8aea9eSmrg
947fe8aea9eSmrg		if (mask->transform)
948fe8aea9eSmrg			return false;
949fe8aea9eSmrg
950fe8aea9eSmrg		priv = sna_pixmap_from_drawable(mask->pDrawable);
951fe8aea9eSmrg		if (priv == NULL || !priv->clear)
952fe8aea9eSmrg			return false;
953fe8aea9eSmrg
954fe8aea9eSmrg		return pixel_is_opaque(priv->clear_color, mask->format);
955fe8aea9eSmrg	}
95603b705cfSriastradh}
95703b705cfSriastradh
95803b705cfSriastradhfastcall
95903b705cfSriastradhstatic void blt_composite_fill(struct sna *sna,
96003b705cfSriastradh			       const struct sna_composite_op *op,
96103b705cfSriastradh			       const struct sna_composite_rectangles *r)
96203b705cfSriastradh{
96303b705cfSriastradh	int x1, x2, y1, y2;
96403b705cfSriastradh
96503b705cfSriastradh	x1 = r->dst.x + op->dst.x;
96603b705cfSriastradh	y1 = r->dst.y + op->dst.y;
96703b705cfSriastradh	x2 = x1 + r->width;
96803b705cfSriastradh	y2 = y1 + r->height;
96903b705cfSriastradh
97003b705cfSriastradh	if (x1 < 0)
97103b705cfSriastradh		x1 = 0;
97203b705cfSriastradh	if (y1 < 0)
97303b705cfSriastradh		y1 = 0;
97403b705cfSriastradh
97503b705cfSriastradh	if (x2 > op->dst.width)
97603b705cfSriastradh		x2 = op->dst.width;
97703b705cfSriastradh	if (y2 > op->dst.height)
97803b705cfSriastradh		y2 = op->dst.height;
97903b705cfSriastradh
98003b705cfSriastradh	if (x2 <= x1 || y2 <= y1)
98103b705cfSriastradh		return;
98203b705cfSriastradh
98303b705cfSriastradh	sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1);
98403b705cfSriastradh}
98503b705cfSriastradh
98603b705cfSriastradhfastcall
98703b705cfSriastradhstatic void blt_composite_fill__cpu(struct sna *sna,
98803b705cfSriastradh				    const struct sna_composite_op *op,
98903b705cfSriastradh				    const struct sna_composite_rectangles *r)
99003b705cfSriastradh{
99103b705cfSriastradh	int x1, x2, y1, y2;
99203b705cfSriastradh
99303b705cfSriastradh	x1 = r->dst.x + op->dst.x;
99403b705cfSriastradh	y1 = r->dst.y + op->dst.y;
99503b705cfSriastradh	x2 = x1 + r->width;
99603b705cfSriastradh	y2 = y1 + r->height;
99703b705cfSriastradh
99803b705cfSriastradh	if (x1 < 0)
99903b705cfSriastradh		x1 = 0;
100003b705cfSriastradh	if (y1 < 0)
100103b705cfSriastradh		y1 = 0;
100203b705cfSriastradh
100303b705cfSriastradh	if (x2 > op->dst.width)
100403b705cfSriastradh		x2 = op->dst.width;
100503b705cfSriastradh	if (y2 > op->dst.height)
100603b705cfSriastradh		y2 = op->dst.height;
100703b705cfSriastradh
100803b705cfSriastradh	if (x2 <= x1 || y2 <= y1)
100903b705cfSriastradh		return;
101003b705cfSriastradh
101142542f5fSchristos	assert(op->dst.pixmap->devPrivate.ptr);
101242542f5fSchristos	assert(op->dst.pixmap->devKind);
1013fe8aea9eSmrg	sigtrap_assert_active();
101403b705cfSriastradh	pixman_fill(op->dst.pixmap->devPrivate.ptr,
101503b705cfSriastradh		    op->dst.pixmap->devKind / sizeof(uint32_t),
101603b705cfSriastradh		    op->dst.pixmap->drawable.bitsPerPixel,
101703b705cfSriastradh		    x1, y1, x2-x1, y2-y1,
101803b705cfSriastradh		    op->u.blt.pixel);
101903b705cfSriastradh}
102003b705cfSriastradh
102142542f5fSchristosfastcall static void
102242542f5fSchristosblt_composite_fill_box_no_offset__cpu(struct sna *sna,
102342542f5fSchristos				      const struct sna_composite_op *op,
102442542f5fSchristos				      const BoxRec *box)
102542542f5fSchristos{
102642542f5fSchristos	assert(box->x1 >= 0);
102742542f5fSchristos	assert(box->y1 >= 0);
102842542f5fSchristos	assert(box->x2 <= op->dst.pixmap->drawable.width);
102942542f5fSchristos	assert(box->y2 <= op->dst.pixmap->drawable.height);
103042542f5fSchristos
103142542f5fSchristos	assert(op->dst.pixmap->devPrivate.ptr);
103242542f5fSchristos	assert(op->dst.pixmap->devKind);
1033fe8aea9eSmrg	sigtrap_assert_active();
103442542f5fSchristos	pixman_fill(op->dst.pixmap->devPrivate.ptr,
103542542f5fSchristos		    op->dst.pixmap->devKind / sizeof(uint32_t),
103642542f5fSchristos		    op->dst.pixmap->drawable.bitsPerPixel,
103742542f5fSchristos		    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
103842542f5fSchristos		    op->u.blt.pixel);
103942542f5fSchristos}
104042542f5fSchristos
104142542f5fSchristosstatic void
104242542f5fSchristosblt_composite_fill_boxes_no_offset__cpu(struct sna *sna,
104342542f5fSchristos					const struct sna_composite_op *op,
104442542f5fSchristos					const BoxRec *box, int n)
104542542f5fSchristos{
104642542f5fSchristos	do {
104742542f5fSchristos		assert(box->x1 >= 0);
104842542f5fSchristos		assert(box->y1 >= 0);
104942542f5fSchristos		assert(box->x2 <= op->dst.pixmap->drawable.width);
105042542f5fSchristos		assert(box->y2 <= op->dst.pixmap->drawable.height);
105142542f5fSchristos
105242542f5fSchristos		assert(op->dst.pixmap->devPrivate.ptr);
105342542f5fSchristos		assert(op->dst.pixmap->devKind);
1054fe8aea9eSmrg		sigtrap_assert_active();
105542542f5fSchristos		pixman_fill(op->dst.pixmap->devPrivate.ptr,
105642542f5fSchristos			    op->dst.pixmap->devKind / sizeof(uint32_t),
105742542f5fSchristos			    op->dst.pixmap->drawable.bitsPerPixel,
105842542f5fSchristos			    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
105942542f5fSchristos			    op->u.blt.pixel);
106042542f5fSchristos		box++;
106142542f5fSchristos	} while (--n);
106242542f5fSchristos}
106342542f5fSchristos
106403b705cfSriastradhfastcall static void
106503b705cfSriastradhblt_composite_fill_box__cpu(struct sna *sna,
106603b705cfSriastradh			    const struct sna_composite_op *op,
106703b705cfSriastradh			    const BoxRec *box)
106803b705cfSriastradh{
106942542f5fSchristos	assert(box->x1 + op->dst.x >= 0);
107042542f5fSchristos	assert(box->y1 + op->dst.y >= 0);
107142542f5fSchristos	assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
107242542f5fSchristos	assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
107342542f5fSchristos
107442542f5fSchristos	assert(op->dst.pixmap->devPrivate.ptr);
107542542f5fSchristos	assert(op->dst.pixmap->devKind);
1076fe8aea9eSmrg	sigtrap_assert_active();
107703b705cfSriastradh	pixman_fill(op->dst.pixmap->devPrivate.ptr,
107803b705cfSriastradh		    op->dst.pixmap->devKind / sizeof(uint32_t),
107903b705cfSriastradh		    op->dst.pixmap->drawable.bitsPerPixel,
108042542f5fSchristos		    box->x1 + op->dst.x, box->y1 + op->dst.y,
108142542f5fSchristos		    box->x2 - box->x1, box->y2 - box->y1,
108203b705cfSriastradh		    op->u.blt.pixel);
108303b705cfSriastradh}
108403b705cfSriastradh
108503b705cfSriastradhstatic void
108603b705cfSriastradhblt_composite_fill_boxes__cpu(struct sna *sna,
108703b705cfSriastradh			      const struct sna_composite_op *op,
108803b705cfSriastradh			      const BoxRec *box, int n)
108903b705cfSriastradh{
109003b705cfSriastradh	do {
109142542f5fSchristos		assert(box->x1 + op->dst.x >= 0);
109242542f5fSchristos		assert(box->y1 + op->dst.y >= 0);
109342542f5fSchristos		assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
109442542f5fSchristos		assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
109542542f5fSchristos
109642542f5fSchristos		assert(op->dst.pixmap->devPrivate.ptr);
109742542f5fSchristos		assert(op->dst.pixmap->devKind);
1098fe8aea9eSmrg		sigtrap_assert_active();
109903b705cfSriastradh		pixman_fill(op->dst.pixmap->devPrivate.ptr,
110003b705cfSriastradh			    op->dst.pixmap->devKind / sizeof(uint32_t),
110103b705cfSriastradh			    op->dst.pixmap->drawable.bitsPerPixel,
110242542f5fSchristos			    box->x1 + op->dst.x, box->y1 + op->dst.y,
110342542f5fSchristos			    box->x2 - box->x1, box->y2 - box->y1,
110403b705cfSriastradh			    op->u.blt.pixel);
110503b705cfSriastradh		box++;
110603b705cfSriastradh	} while (--n);
110703b705cfSriastradh}
110803b705cfSriastradh
110903b705cfSriastradhinline static void _sna_blt_fill_box(struct sna *sna,
111003b705cfSriastradh				     const struct sna_blt_state *blt,
111103b705cfSriastradh				     const BoxRec *box)
111203b705cfSriastradh{
111303b705cfSriastradh	struct kgem *kgem = &sna->kgem;
111403b705cfSriastradh	uint32_t *b;
111503b705cfSriastradh
111603b705cfSriastradh	DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__,
111703b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2,
111803b705cfSriastradh	     blt->pixel));
111903b705cfSriastradh
112003b705cfSriastradh	assert(box->x1 >= 0);
112103b705cfSriastradh	assert(box->y1 >= 0);
112203b705cfSriastradh	assert(box->y2 * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
112303b705cfSriastradh
112403b705cfSriastradh	if (!kgem_check_batch(kgem, 3))
112503b705cfSriastradh		sna_blt_fill_begin(sna, blt);
112603b705cfSriastradh
112742542f5fSchristos	assert(sna->kgem.mode == KGEM_BLT);
112803b705cfSriastradh	b = kgem->batch + kgem->nbatch;
112903b705cfSriastradh	kgem->nbatch += 3;
113003b705cfSriastradh	assert(kgem->nbatch < kgem->surface);
113103b705cfSriastradh
113203b705cfSriastradh	b[0] = blt->cmd;
113303b705cfSriastradh	*(uint64_t *)(b+1) = *(const uint64_t *)box;
113403b705cfSriastradh}
113503b705cfSriastradh
113603b705cfSriastradhinline static void _sna_blt_fill_boxes(struct sna *sna,
113703b705cfSriastradh				       const struct sna_blt_state *blt,
113803b705cfSriastradh				       const BoxRec *box,
113903b705cfSriastradh				       int nbox)
114003b705cfSriastradh{
114103b705cfSriastradh	struct kgem *kgem = &sna->kgem;
114203b705cfSriastradh	uint32_t cmd = blt->cmd;
114303b705cfSriastradh
114403b705cfSriastradh	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
114503b705cfSriastradh
114603b705cfSriastradh	if (!kgem_check_batch(kgem, 3))
114703b705cfSriastradh		sna_blt_fill_begin(sna, blt);
114803b705cfSriastradh
114903b705cfSriastradh	do {
115003b705cfSriastradh		uint32_t *b = kgem->batch + kgem->nbatch;
115113496ba1Ssnj		int nbox_this_time, rem;
115203b705cfSriastradh
115342542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
115403b705cfSriastradh		nbox_this_time = nbox;
115513496ba1Ssnj		rem = kgem_batch_space(kgem);
115613496ba1Ssnj		if (3*nbox_this_time > rem)
115713496ba1Ssnj			nbox_this_time = rem / 3;
115813496ba1Ssnj		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
115913496ba1Ssnj		     __FUNCTION__, nbox_this_time, nbox, rem));
116013496ba1Ssnj		assert(nbox_this_time > 0);
116103b705cfSriastradh		nbox -= nbox_this_time;
116203b705cfSriastradh
116303b705cfSriastradh		kgem->nbatch += 3 * nbox_this_time;
116403b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
116503b705cfSriastradh		while (nbox_this_time >= 8) {
116603b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
116703b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
116803b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
116903b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
117003b705cfSriastradh			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
117103b705cfSriastradh			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
117203b705cfSriastradh			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
117303b705cfSriastradh			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
117403b705cfSriastradh			b += 24;
117503b705cfSriastradh			nbox_this_time -= 8;
117603b705cfSriastradh		}
117703b705cfSriastradh		if (nbox_this_time & 4) {
117803b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
117903b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
118003b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
118103b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
118203b705cfSriastradh			b += 12;
118303b705cfSriastradh		}
118403b705cfSriastradh		if (nbox_this_time & 2) {
118503b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
118603b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
118703b705cfSriastradh			b += 6;
118803b705cfSriastradh		}
118903b705cfSriastradh		if (nbox_this_time & 1) {
119003b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
119103b705cfSriastradh		}
119203b705cfSriastradh
119303b705cfSriastradh		if (!nbox)
119403b705cfSriastradh			return;
119503b705cfSriastradh
119603b705cfSriastradh		sna_blt_fill_begin(sna, blt);
119703b705cfSriastradh	} while (1);
119803b705cfSriastradh}
119903b705cfSriastradh
120042542f5fSchristosstatic inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const BoxRec *box)
120142542f5fSchristos{
120242542f5fSchristos	if (box->x2 - box->x1 >= op->dst.width &&
120342542f5fSchristos	    box->y2 - box->y1 >= op->dst.height) {
120442542f5fSchristos		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
120542542f5fSchristos		if (op->dst.bo == priv->gpu_bo) {
1206fe8aea9eSmrg			sna_damage_all(&priv->gpu_damage, op->dst.pixmap);
1207fe8aea9eSmrg			sna_damage_destroy(&priv->cpu_damage);
120842542f5fSchristos			priv->clear = true;
120942542f5fSchristos			priv->clear_color = op->u.blt.pixel;
121042542f5fSchristos			DBG(("%s: pixmap=%ld marking clear [%08x]\n",
121142542f5fSchristos			     __FUNCTION__,
121242542f5fSchristos			     op->dst.pixmap->drawable.serialNumber,
121342542f5fSchristos			     op->u.blt.pixel));
1214fe8aea9eSmrg			((struct sna_composite_op *)op)->damage = NULL;
121542542f5fSchristos		}
121642542f5fSchristos	}
121742542f5fSchristos}
121842542f5fSchristos
121903b705cfSriastradhfastcall static void blt_composite_fill_box_no_offset(struct sna *sna,
122003b705cfSriastradh						      const struct sna_composite_op *op,
122103b705cfSriastradh						      const BoxRec *box)
122203b705cfSriastradh{
122303b705cfSriastradh	_sna_blt_fill_box(sna, &op->u.blt, box);
122442542f5fSchristos	_sna_blt_maybe_clear(op, box);
122503b705cfSriastradh}
122603b705cfSriastradh
122703b705cfSriastradhstatic void blt_composite_fill_boxes_no_offset(struct sna *sna,
122803b705cfSriastradh					       const struct sna_composite_op *op,
122903b705cfSriastradh					       const BoxRec *box, int n)
123003b705cfSriastradh{
123103b705cfSriastradh	_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
123203b705cfSriastradh}
123303b705cfSriastradh
123403b705cfSriastradhstatic void blt_composite_fill_boxes_no_offset__thread(struct sna *sna,
123503b705cfSriastradh						       const struct sna_composite_op *op,
123603b705cfSriastradh						       const BoxRec *box, int nbox)
123703b705cfSriastradh{
123803b705cfSriastradh	struct kgem *kgem = &sna->kgem;
123903b705cfSriastradh	const struct sna_blt_state *blt = &op->u.blt;
124003b705cfSriastradh	uint32_t cmd = blt->cmd;
124103b705cfSriastradh
124203b705cfSriastradh	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
124303b705cfSriastradh
124403b705cfSriastradh	sna_vertex_lock(&sna->render);
124542542f5fSchristos	assert(kgem->mode == KGEM_BLT);
124603b705cfSriastradh	if (!kgem_check_batch(kgem, 3)) {
124703b705cfSriastradh		sna_vertex_wait__locked(&sna->render);
124803b705cfSriastradh		sna_blt_fill_begin(sna, blt);
124903b705cfSriastradh	}
125003b705cfSriastradh
125103b705cfSriastradh	do {
125203b705cfSriastradh		uint32_t *b = kgem->batch + kgem->nbatch;
125313496ba1Ssnj		int nbox_this_time, rem;
125403b705cfSriastradh
125542542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
125603b705cfSriastradh		nbox_this_time = nbox;
125713496ba1Ssnj		rem = kgem_batch_space(kgem);
125813496ba1Ssnj		if (3*nbox_this_time > rem)
125913496ba1Ssnj			nbox_this_time = rem / 3;
126013496ba1Ssnj		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
126113496ba1Ssnj		     __FUNCTION__, nbox_this_time, nbox, rem));
126213496ba1Ssnj		assert(nbox_this_time > 0);
126303b705cfSriastradh		nbox -= nbox_this_time;
126403b705cfSriastradh
126503b705cfSriastradh		kgem->nbatch += 3 * nbox_this_time;
126603b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
126703b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
126803b705cfSriastradh		sna_vertex_unlock(&sna->render);
126903b705cfSriastradh
127003b705cfSriastradh		while (nbox_this_time >= 8) {
127103b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
127203b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
127303b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
127403b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
127503b705cfSriastradh			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
127603b705cfSriastradh			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
127703b705cfSriastradh			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
127803b705cfSriastradh			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
127903b705cfSriastradh			b += 24;
128003b705cfSriastradh			nbox_this_time -= 8;
128103b705cfSriastradh		}
128203b705cfSriastradh		if (nbox_this_time & 4) {
128303b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
128403b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
128503b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
128603b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
128703b705cfSriastradh			b += 12;
128803b705cfSriastradh		}
128903b705cfSriastradh		if (nbox_this_time & 2) {
129003b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
129103b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
129203b705cfSriastradh			b += 6;
129303b705cfSriastradh		}
129403b705cfSriastradh		if (nbox_this_time & 1) {
129503b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
129603b705cfSriastradh		}
129703b705cfSriastradh
129803b705cfSriastradh		sna_vertex_lock(&sna->render);
129903b705cfSriastradh		sna_vertex_release__locked(&sna->render);
130003b705cfSriastradh		if (!nbox)
130103b705cfSriastradh			break;
130203b705cfSriastradh
130303b705cfSriastradh		sna_vertex_wait__locked(&sna->render);
130403b705cfSriastradh		sna_blt_fill_begin(sna, blt);
130503b705cfSriastradh	} while (1);
130603b705cfSriastradh	sna_vertex_unlock(&sna->render);
130703b705cfSriastradh}
130803b705cfSriastradh
130903b705cfSriastradhfastcall static void blt_composite_fill_box(struct sna *sna,
131003b705cfSriastradh					    const struct sna_composite_op *op,
131103b705cfSriastradh					    const BoxRec *box)
131203b705cfSriastradh{
131303b705cfSriastradh	sna_blt_fill_one(sna, &op->u.blt,
131403b705cfSriastradh			 box->x1 + op->dst.x,
131503b705cfSriastradh			 box->y1 + op->dst.y,
131603b705cfSriastradh			 box->x2 - box->x1,
131703b705cfSriastradh			 box->y2 - box->y1);
131842542f5fSchristos	_sna_blt_maybe_clear(op, box);
131903b705cfSriastradh}
132003b705cfSriastradh
132103b705cfSriastradhstatic void blt_composite_fill_boxes(struct sna *sna,
132203b705cfSriastradh				     const struct sna_composite_op *op,
132303b705cfSriastradh				     const BoxRec *box, int n)
132403b705cfSriastradh{
132503b705cfSriastradh	do {
132603b705cfSriastradh		sna_blt_fill_one(sna, &op->u.blt,
132703b705cfSriastradh				 box->x1 + op->dst.x, box->y1 + op->dst.y,
132803b705cfSriastradh				 box->x2 - box->x1, box->y2 - box->y1);
132903b705cfSriastradh		box++;
133003b705cfSriastradh	} while (--n);
133103b705cfSriastradh}
133203b705cfSriastradh
133303b705cfSriastradhstatic inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y)
133403b705cfSriastradh{
133503b705cfSriastradh	union {
133603b705cfSriastradh		uint64_t v;
133703b705cfSriastradh		int16_t i[4];
133803b705cfSriastradh	} vi;
133903b705cfSriastradh	vi.v = *(uint64_t *)b;
134003b705cfSriastradh	vi.i[0] += x;
134103b705cfSriastradh	vi.i[1] += y;
134203b705cfSriastradh	vi.i[2] += x;
134303b705cfSriastradh	vi.i[3] += y;
134403b705cfSriastradh	return vi.v;
134503b705cfSriastradh}
134603b705cfSriastradh
134703b705cfSriastradhstatic void blt_composite_fill_boxes__thread(struct sna *sna,
134803b705cfSriastradh					     const struct sna_composite_op *op,
134903b705cfSriastradh					     const BoxRec *box, int nbox)
135003b705cfSriastradh{
135103b705cfSriastradh	struct kgem *kgem = &sna->kgem;
135203b705cfSriastradh	const struct sna_blt_state *blt = &op->u.blt;
135303b705cfSriastradh	uint32_t cmd = blt->cmd;
135403b705cfSriastradh	int16_t dx = op->dst.x;
135503b705cfSriastradh	int16_t dy = op->dst.y;
135603b705cfSriastradh
135703b705cfSriastradh	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
135803b705cfSriastradh
135903b705cfSriastradh	sna_vertex_lock(&sna->render);
136042542f5fSchristos	assert(kgem->mode == KGEM_BLT);
136103b705cfSriastradh	if (!kgem_check_batch(kgem, 3)) {
136203b705cfSriastradh		sna_vertex_wait__locked(&sna->render);
136303b705cfSriastradh		sna_blt_fill_begin(sna, blt);
136403b705cfSriastradh	}
136503b705cfSriastradh
136603b705cfSriastradh	do {
136703b705cfSriastradh		uint32_t *b = kgem->batch + kgem->nbatch;
136813496ba1Ssnj		int nbox_this_time, rem;
136903b705cfSriastradh
137042542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
137103b705cfSriastradh		nbox_this_time = nbox;
137213496ba1Ssnj		rem = kgem_batch_space(kgem);
137313496ba1Ssnj		if (3*nbox_this_time > rem)
137413496ba1Ssnj			nbox_this_time = rem / 3;
137513496ba1Ssnj		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
137613496ba1Ssnj		     __FUNCTION__, nbox_this_time, nbox, rem));
137713496ba1Ssnj		assert(nbox_this_time > 0);
137803b705cfSriastradh		nbox -= nbox_this_time;
137903b705cfSriastradh
138003b705cfSriastradh		kgem->nbatch += 3 * nbox_this_time;
138103b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
138203b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
138303b705cfSriastradh		sna_vertex_unlock(&sna->render);
138403b705cfSriastradh
138503b705cfSriastradh		while (nbox_this_time >= 8) {
138603b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
138703b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
138803b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
138903b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
139003b705cfSriastradh			b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy);
139103b705cfSriastradh			b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy);
139203b705cfSriastradh			b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy);
139303b705cfSriastradh			b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy);
139403b705cfSriastradh			b += 24;
139503b705cfSriastradh			nbox_this_time -= 8;
139603b705cfSriastradh		}
139703b705cfSriastradh		if (nbox_this_time & 4) {
139803b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
139903b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
140003b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
140103b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
140203b705cfSriastradh			b += 12;
140303b705cfSriastradh		}
140403b705cfSriastradh		if (nbox_this_time & 2) {
140503b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
140603b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
140703b705cfSriastradh			b += 6;
140803b705cfSriastradh		}
140903b705cfSriastradh		if (nbox_this_time & 1) {
141003b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
141103b705cfSriastradh		}
141203b705cfSriastradh
141303b705cfSriastradh		sna_vertex_lock(&sna->render);
141403b705cfSriastradh		sna_vertex_release__locked(&sna->render);
141503b705cfSriastradh		if (!nbox)
141603b705cfSriastradh			break;
141703b705cfSriastradh
141803b705cfSriastradh		sna_vertex_wait__locked(&sna->render);
141903b705cfSriastradh		sna_blt_fill_begin(sna, blt);
142003b705cfSriastradh	} while (1);
142103b705cfSriastradh	sna_vertex_unlock(&sna->render);
142203b705cfSriastradh}
142303b705cfSriastradh
142403b705cfSriastradhfastcall
142503b705cfSriastradhstatic void blt_composite_nop(struct sna *sna,
142603b705cfSriastradh			       const struct sna_composite_op *op,
142703b705cfSriastradh			       const struct sna_composite_rectangles *r)
142803b705cfSriastradh{
142903b705cfSriastradh}
143003b705cfSriastradh
143103b705cfSriastradhfastcall static void blt_composite_nop_box(struct sna *sna,
143203b705cfSriastradh					   const struct sna_composite_op *op,
143303b705cfSriastradh					   const BoxRec *box)
143403b705cfSriastradh{
143503b705cfSriastradh}
143603b705cfSriastradh
143703b705cfSriastradhstatic void blt_composite_nop_boxes(struct sna *sna,
143803b705cfSriastradh				    const struct sna_composite_op *op,
143903b705cfSriastradh				    const BoxRec *box, int n)
144003b705cfSriastradh{
144103b705cfSriastradh}
144203b705cfSriastradh
144303b705cfSriastradhstatic bool
144403b705cfSriastradhbegin_blt(struct sna *sna,
144503b705cfSriastradh	  struct sna_composite_op *op)
144603b705cfSriastradh{
144713496ba1Ssnj	assert(sna->kgem.mode == KGEM_BLT);
144803b705cfSriastradh	if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) {
144903b705cfSriastradh		kgem_submit(&sna->kgem);
145003b705cfSriastradh		if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo))
145103b705cfSriastradh			return false;
145203b705cfSriastradh
145303b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_BLT);
1454fe8aea9eSmrg		kgem_bcs_set_tiling(&sna->kgem, NULL, op->dst.bo);
145503b705cfSriastradh	}
145603b705cfSriastradh
145703b705cfSriastradh	return true;
145803b705cfSriastradh}
145903b705cfSriastradh
146003b705cfSriastradhstatic bool
146103b705cfSriastradhprepare_blt_nop(struct sna *sna,
146203b705cfSriastradh		struct sna_composite_op *op)
146303b705cfSriastradh{
146403b705cfSriastradh	DBG(("%s\n", __FUNCTION__));
146503b705cfSriastradh
146603b705cfSriastradh	op->blt   = blt_composite_nop;
146703b705cfSriastradh	op->box   = blt_composite_nop_box;
146803b705cfSriastradh	op->boxes = blt_composite_nop_boxes;
146903b705cfSriastradh	op->done  = nop_done;
147003b705cfSriastradh	return true;
147103b705cfSriastradh}
147203b705cfSriastradh
147303b705cfSriastradhstatic bool
147403b705cfSriastradhprepare_blt_clear(struct sna *sna,
147503b705cfSriastradh		  struct sna_composite_op *op)
147603b705cfSriastradh{
147703b705cfSriastradh	DBG(("%s\n", __FUNCTION__));
147803b705cfSriastradh
147903b705cfSriastradh	if (op->dst.bo == NULL) {
1480fe8aea9eSmrg		op->u.blt.pixel = 0;
148103b705cfSriastradh		op->blt   = blt_composite_fill__cpu;
148242542f5fSchristos		if (op->dst.x|op->dst.y) {
148342542f5fSchristos			op->box   = blt_composite_fill_box__cpu;
148442542f5fSchristos			op->boxes = blt_composite_fill_boxes__cpu;
148542542f5fSchristos			op->thread_boxes = blt_composite_fill_boxes__cpu;
148642542f5fSchristos		} else {
148742542f5fSchristos			op->box   = blt_composite_fill_box_no_offset__cpu;
148842542f5fSchristos			op->boxes = blt_composite_fill_boxes_no_offset__cpu;
148942542f5fSchristos			op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
149042542f5fSchristos		}
1491fe8aea9eSmrg		op->done = sig_done;
1492fe8aea9eSmrg		return sigtrap_get() == 0;
149303b705cfSriastradh	}
149403b705cfSriastradh
149503b705cfSriastradh	op->blt = blt_composite_fill;
149603b705cfSriastradh	if (op->dst.x|op->dst.y) {
149703b705cfSriastradh		op->box   = blt_composite_fill_box;
149803b705cfSriastradh		op->boxes = blt_composite_fill_boxes;
149903b705cfSriastradh		op->thread_boxes = blt_composite_fill_boxes__thread;
150003b705cfSriastradh	} else {
150103b705cfSriastradh		op->box   = blt_composite_fill_box_no_offset;
150203b705cfSriastradh		op->boxes = blt_composite_fill_boxes_no_offset;
150303b705cfSriastradh		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
150403b705cfSriastradh	}
150503b705cfSriastradh	op->done = nop_done;
150603b705cfSriastradh
150703b705cfSriastradh	if (!sna_blt_fill_init(sna, &op->u.blt,
150842542f5fSchristos			       op->dst.bo,
150942542f5fSchristos			       op->dst.pixmap->drawable.bitsPerPixel,
151042542f5fSchristos			       GXclear, 0))
151103b705cfSriastradh		return false;
151203b705cfSriastradh
151303b705cfSriastradh	return begin_blt(sna, op);
151403b705cfSriastradh}
151503b705cfSriastradh
151603b705cfSriastradhstatic bool
151703b705cfSriastradhprepare_blt_fill(struct sna *sna,
151803b705cfSriastradh		 struct sna_composite_op *op,
151903b705cfSriastradh		 uint32_t pixel)
152003b705cfSriastradh{
152103b705cfSriastradh	DBG(("%s\n", __FUNCTION__));
152203b705cfSriastradh
152303b705cfSriastradh	if (op->dst.bo == NULL) {
152403b705cfSriastradh		op->u.blt.pixel = pixel;
152503b705cfSriastradh		op->blt = blt_composite_fill__cpu;
152642542f5fSchristos		if (op->dst.x|op->dst.y) {
152742542f5fSchristos			op->box   = blt_composite_fill_box__cpu;
152842542f5fSchristos			op->boxes = blt_composite_fill_boxes__cpu;
152942542f5fSchristos			op->thread_boxes = blt_composite_fill_boxes__cpu;
153042542f5fSchristos		} else {
153142542f5fSchristos			op->box   = blt_composite_fill_box_no_offset__cpu;
153242542f5fSchristos			op->boxes = blt_composite_fill_boxes_no_offset__cpu;
153342542f5fSchristos			op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
153442542f5fSchristos		}
1535fe8aea9eSmrg		op->done = sig_done;
1536fe8aea9eSmrg		return sigtrap_get() == 0;
153703b705cfSriastradh	}
153803b705cfSriastradh
153903b705cfSriastradh	op->blt = blt_composite_fill;
154003b705cfSriastradh	if (op->dst.x|op->dst.y) {
154103b705cfSriastradh		op->box   = blt_composite_fill_box;
154203b705cfSriastradh		op->boxes = blt_composite_fill_boxes;
154303b705cfSriastradh		op->thread_boxes = blt_composite_fill_boxes__thread;
154403b705cfSriastradh	} else {
154503b705cfSriastradh		op->box   = blt_composite_fill_box_no_offset;
154603b705cfSriastradh		op->boxes = blt_composite_fill_boxes_no_offset;
154703b705cfSriastradh		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
154803b705cfSriastradh	}
154903b705cfSriastradh	op->done = nop_done;
155003b705cfSriastradh
155103b705cfSriastradh	if (!sna_blt_fill_init(sna, &op->u.blt, op->dst.bo,
155203b705cfSriastradh			       op->dst.pixmap->drawable.bitsPerPixel,
155303b705cfSriastradh			       GXcopy, pixel))
155403b705cfSriastradh		return false;
155503b705cfSriastradh
155603b705cfSriastradh	return begin_blt(sna, op);
155703b705cfSriastradh}
155803b705cfSriastradh
155903b705cfSriastradhfastcall static void
156003b705cfSriastradhblt_composite_copy(struct sna *sna,
156103b705cfSriastradh		   const struct sna_composite_op *op,
156203b705cfSriastradh		   const struct sna_composite_rectangles *r)
156303b705cfSriastradh{
156403b705cfSriastradh	int x1, x2, y1, y2;
156503b705cfSriastradh	int src_x, src_y;
156603b705cfSriastradh
156703b705cfSriastradh	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
156803b705cfSriastradh	     __FUNCTION__,
156903b705cfSriastradh	     r->src.x, r->src.y,
157003b705cfSriastradh	     r->dst.x, r->dst.y,
157103b705cfSriastradh	     r->width, r->height));
157203b705cfSriastradh
157303b705cfSriastradh	/* XXX higher layer should have clipped? */
157403b705cfSriastradh
157503b705cfSriastradh	x1 = r->dst.x + op->dst.x;
157603b705cfSriastradh	y1 = r->dst.y + op->dst.y;
157703b705cfSriastradh	x2 = x1 + r->width;
157803b705cfSriastradh	y2 = y1 + r->height;
157903b705cfSriastradh
158042542f5fSchristos	src_x = r->src.x - x1 + op->u.blt.sx;
158142542f5fSchristos	src_y = r->src.y - y1 + op->u.blt.sy;
158203b705cfSriastradh
158303b705cfSriastradh	/* clip against dst */
158403b705cfSriastradh	if (x1 < 0)
158503b705cfSriastradh		x1 = 0;
158603b705cfSriastradh	if (y1 < 0)
158703b705cfSriastradh		y1 = 0;
158803b705cfSriastradh
158903b705cfSriastradh	if (x2 > op->dst.width)
159003b705cfSriastradh		x2 = op->dst.width;
159103b705cfSriastradh
159203b705cfSriastradh	if (y2 > op->dst.height)
159303b705cfSriastradh		y2 = op->dst.height;
159403b705cfSriastradh
159503b705cfSriastradh	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
159603b705cfSriastradh
159703b705cfSriastradh	if (x2 <= x1 || y2 <= y1)
159803b705cfSriastradh		return;
159903b705cfSriastradh
160003b705cfSriastradh	sna_blt_copy_one(sna, &op->u.blt,
160103b705cfSriastradh			 x1 + src_x, y1 + src_y,
160203b705cfSriastradh			 x2 - x1, y2 - y1,
160303b705cfSriastradh			 x1, y1);
160403b705cfSriastradh}
160503b705cfSriastradh
160603b705cfSriastradhfastcall static void blt_composite_copy_box(struct sna *sna,
160703b705cfSriastradh					    const struct sna_composite_op *op,
160803b705cfSriastradh					    const BoxRec *box)
160903b705cfSriastradh{
161003b705cfSriastradh	DBG(("%s: box (%d, %d), (%d, %d)\n",
161103b705cfSriastradh	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
161203b705cfSriastradh	sna_blt_copy_one(sna, &op->u.blt,
161303b705cfSriastradh			 box->x1 + op->u.blt.sx,
161403b705cfSriastradh			 box->y1 + op->u.blt.sy,
161503b705cfSriastradh			 box->x2 - box->x1,
161603b705cfSriastradh			 box->y2 - box->y1,
161703b705cfSriastradh			 box->x1 + op->dst.x,
161803b705cfSriastradh			 box->y1 + op->dst.y);
161903b705cfSriastradh}
162003b705cfSriastradh
162103b705cfSriastradhstatic void blt_composite_copy_boxes(struct sna *sna,
162203b705cfSriastradh				     const struct sna_composite_op *op,
162303b705cfSriastradh				     const BoxRec *box, int nbox)
162403b705cfSriastradh{
162503b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
162603b705cfSriastradh	do {
162703b705cfSriastradh		DBG(("%s: box (%d, %d), (%d, %d)\n",
162803b705cfSriastradh		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
162903b705cfSriastradh		sna_blt_copy_one(sna, &op->u.blt,
163003b705cfSriastradh				 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
163103b705cfSriastradh				 box->x2 - box->x1, box->y2 - box->y1,
163203b705cfSriastradh				 box->x1 + op->dst.x, box->y1 + op->dst.y);
163303b705cfSriastradh		box++;
163403b705cfSriastradh	} while(--nbox);
163503b705cfSriastradh}
163603b705cfSriastradh
163703b705cfSriastradhstatic inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
163803b705cfSriastradh{
163903b705cfSriastradh	x += v & 0xffff;
164003b705cfSriastradh	y += v >> 16;
164103b705cfSriastradh	return (uint16_t)y << 16 | x;
164203b705cfSriastradh}
164303b705cfSriastradh
164403b705cfSriastradhstatic void blt_composite_copy_boxes__thread(struct sna *sna,
164503b705cfSriastradh					     const struct sna_composite_op *op,
164603b705cfSriastradh					     const BoxRec *box, int nbox)
164703b705cfSriastradh{
164803b705cfSriastradh	struct kgem *kgem = &sna->kgem;
164903b705cfSriastradh	int dst_dx = op->dst.x;
165003b705cfSriastradh	int dst_dy = op->dst.y;
165103b705cfSriastradh	int src_dx = op->src.offset[0];
165203b705cfSriastradh	int src_dy = op->src.offset[1];
165303b705cfSriastradh	uint32_t cmd = op->u.blt.cmd;
165403b705cfSriastradh	uint32_t br13 = op->u.blt.br13;
165503b705cfSriastradh	struct kgem_bo *src_bo = op->u.blt.bo[0];
165603b705cfSriastradh	struct kgem_bo *dst_bo = op->u.blt.bo[1];
165703b705cfSriastradh	int src_pitch = op->u.blt.pitch[0];
165803b705cfSriastradh
165903b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
166003b705cfSriastradh
166103b705cfSriastradh	sna_vertex_lock(&sna->render);
166203b705cfSriastradh
166303b705cfSriastradh	if ((dst_dx | dst_dy) == 0) {
166403b705cfSriastradh		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
166503b705cfSriastradh		do {
166613496ba1Ssnj			int nbox_this_time, rem;
166703b705cfSriastradh
166803b705cfSriastradh			nbox_this_time = nbox;
166913496ba1Ssnj			rem = kgem_batch_space(kgem);
167013496ba1Ssnj			if (8*nbox_this_time > rem)
167113496ba1Ssnj				nbox_this_time = rem / 8;
167203b705cfSriastradh			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
167303b705cfSriastradh				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
167413496ba1Ssnj			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
167513496ba1Ssnj			     __FUNCTION__, nbox_this_time, nbox, rem));
167613496ba1Ssnj			assert(nbox_this_time > 0);
167703b705cfSriastradh			nbox -= nbox_this_time;
167803b705cfSriastradh
167942542f5fSchristos			assert(sna->kgem.mode == KGEM_BLT);
168003b705cfSriastradh			do {
168103b705cfSriastradh				uint32_t *b = kgem->batch + kgem->nbatch;
168203b705cfSriastradh
168303b705cfSriastradh				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
168403b705cfSriastradh				     __FUNCTION__,
168503b705cfSriastradh				     box->x1, box->y1,
168603b705cfSriastradh				     box->x2 - box->x1, box->y2 - box->y1));
168703b705cfSriastradh
168803b705cfSriastradh				assert(box->x1 + src_dx >= 0);
168903b705cfSriastradh				assert(box->y1 + src_dy >= 0);
169003b705cfSriastradh				assert(box->x1 + src_dx <= INT16_MAX);
169103b705cfSriastradh				assert(box->y1 + src_dy <= INT16_MAX);
169203b705cfSriastradh
169303b705cfSriastradh				assert(box->x1 >= 0);
169403b705cfSriastradh				assert(box->y1 >= 0);
169503b705cfSriastradh
169603b705cfSriastradh				*(uint64_t *)&b[0] = hdr;
169703b705cfSriastradh				*(uint64_t *)&b[2] = *(const uint64_t *)box;
169803b705cfSriastradh				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
169903b705cfSriastradh						      I915_GEM_DOMAIN_RENDER << 16 |
170003b705cfSriastradh						      I915_GEM_DOMAIN_RENDER |
170103b705cfSriastradh						      KGEM_RELOC_FENCED,
170203b705cfSriastradh						      0);
170303b705cfSriastradh				b[5] = add2(b[2], src_dx, src_dy);
170403b705cfSriastradh				b[6] = src_pitch;
170503b705cfSriastradh				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
170603b705cfSriastradh						      I915_GEM_DOMAIN_RENDER << 16 |
170703b705cfSriastradh						      KGEM_RELOC_FENCED,
170803b705cfSriastradh						      0);
170903b705cfSriastradh				kgem->nbatch += 8;
171003b705cfSriastradh				assert(kgem->nbatch < kgem->surface);
171103b705cfSriastradh				box++;
171203b705cfSriastradh			} while (--nbox_this_time);
171303b705cfSriastradh
171403b705cfSriastradh			if (!nbox)
171503b705cfSriastradh				break;
171603b705cfSriastradh
171703b705cfSriastradh			_kgem_submit(kgem);
171803b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
1719fe8aea9eSmrg			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
172003b705cfSriastradh		} while (1);
172103b705cfSriastradh	} else {
172203b705cfSriastradh		do {
172313496ba1Ssnj			int nbox_this_time, rem;
172403b705cfSriastradh
172503b705cfSriastradh			nbox_this_time = nbox;
172613496ba1Ssnj			rem = kgem_batch_space(kgem);
172713496ba1Ssnj			if (8*nbox_this_time > rem)
172813496ba1Ssnj				nbox_this_time = rem / 8;
172903b705cfSriastradh			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
173003b705cfSriastradh				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
173113496ba1Ssnj			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
173213496ba1Ssnj			     __FUNCTION__, nbox_this_time, nbox, rem));
173313496ba1Ssnj			assert(nbox_this_time > 0);
173403b705cfSriastradh			nbox -= nbox_this_time;
173503b705cfSriastradh
173642542f5fSchristos			assert(sna->kgem.mode == KGEM_BLT);
173703b705cfSriastradh			do {
173803b705cfSriastradh				uint32_t *b = kgem->batch + kgem->nbatch;
173903b705cfSriastradh
174003b705cfSriastradh				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
174103b705cfSriastradh				     __FUNCTION__,
174203b705cfSriastradh				     box->x1, box->y1,
174303b705cfSriastradh				     box->x2 - box->x1, box->y2 - box->y1));
174403b705cfSriastradh
174503b705cfSriastradh				assert(box->x1 + src_dx >= 0);
174603b705cfSriastradh				assert(box->y1 + src_dy >= 0);
174703b705cfSriastradh
174803b705cfSriastradh				assert(box->x1 + dst_dx >= 0);
174903b705cfSriastradh				assert(box->y1 + dst_dy >= 0);
175003b705cfSriastradh
175103b705cfSriastradh				b[0] = cmd;
175203b705cfSriastradh				b[1] = br13;
175303b705cfSriastradh				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
175403b705cfSriastradh				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
175503b705cfSriastradh				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
175603b705cfSriastradh						      I915_GEM_DOMAIN_RENDER << 16 |
175703b705cfSriastradh						      I915_GEM_DOMAIN_RENDER |
175803b705cfSriastradh						      KGEM_RELOC_FENCED,
175903b705cfSriastradh						      0);
176003b705cfSriastradh				b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
176103b705cfSriastradh				b[6] = src_pitch;
176203b705cfSriastradh				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
176303b705cfSriastradh						      I915_GEM_DOMAIN_RENDER << 16 |
176403b705cfSriastradh						      KGEM_RELOC_FENCED,
176503b705cfSriastradh						      0);
176603b705cfSriastradh				kgem->nbatch += 8;
176703b705cfSriastradh				assert(kgem->nbatch < kgem->surface);
176803b705cfSriastradh				box++;
176903b705cfSriastradh			} while (--nbox_this_time);
177003b705cfSriastradh
177103b705cfSriastradh			if (!nbox)
177203b705cfSriastradh				break;
177303b705cfSriastradh
177403b705cfSriastradh			_kgem_submit(kgem);
177503b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
1776fe8aea9eSmrg			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
177703b705cfSriastradh		} while (1);
177803b705cfSriastradh	}
177903b705cfSriastradh	sna_vertex_unlock(&sna->render);
178003b705cfSriastradh}
178103b705cfSriastradh
178242542f5fSchristosstatic void blt_composite_copy_boxes__thread64(struct sna *sna,
178342542f5fSchristos					       const struct sna_composite_op *op,
178442542f5fSchristos					       const BoxRec *box, int nbox)
178503b705cfSriastradh{
178642542f5fSchristos	struct kgem *kgem = &sna->kgem;
178742542f5fSchristos	int dst_dx = op->dst.x;
178842542f5fSchristos	int dst_dy = op->dst.y;
178942542f5fSchristos	int src_dx = op->src.offset[0];
179042542f5fSchristos	int src_dy = op->src.offset[1];
179142542f5fSchristos	uint32_t cmd = op->u.blt.cmd;
179242542f5fSchristos	uint32_t br13 = op->u.blt.br13;
179342542f5fSchristos	struct kgem_bo *src_bo = op->u.blt.bo[0];
179442542f5fSchristos	struct kgem_bo *dst_bo = op->u.blt.bo[1];
179542542f5fSchristos	int src_pitch = op->u.blt.pitch[0];
179603b705cfSriastradh
179742542f5fSchristos	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
179803b705cfSriastradh
179942542f5fSchristos	sna_vertex_lock(&sna->render);
180003b705cfSriastradh
180142542f5fSchristos	if ((dst_dx | dst_dy) == 0) {
180242542f5fSchristos		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
180342542f5fSchristos		do {
180413496ba1Ssnj			int nbox_this_time, rem;
180503b705cfSriastradh
180642542f5fSchristos			nbox_this_time = nbox;
180713496ba1Ssnj			rem = kgem_batch_space(kgem);
180813496ba1Ssnj			if (10*nbox_this_time > rem)
180913496ba1Ssnj				nbox_this_time = rem / 10;
181042542f5fSchristos			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
181142542f5fSchristos				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
181213496ba1Ssnj			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
181313496ba1Ssnj			     __FUNCTION__, nbox_this_time, nbox, rem));
181413496ba1Ssnj			assert(nbox_this_time > 0);
181542542f5fSchristos			nbox -= nbox_this_time;
181603b705cfSriastradh
181742542f5fSchristos			assert(kgem->mode == KGEM_BLT);
181842542f5fSchristos			do {
181942542f5fSchristos				uint32_t *b = kgem->batch + kgem->nbatch;
182003b705cfSriastradh
182142542f5fSchristos				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
182242542f5fSchristos				     __FUNCTION__,
182342542f5fSchristos				     box->x1, box->y1,
182442542f5fSchristos				     box->x2 - box->x1, box->y2 - box->y1));
182503b705cfSriastradh
182642542f5fSchristos				assert(box->x1 + src_dx >= 0);
182742542f5fSchristos				assert(box->y1 + src_dy >= 0);
182842542f5fSchristos				assert(box->x1 + src_dx <= INT16_MAX);
182942542f5fSchristos				assert(box->y1 + src_dy <= INT16_MAX);
183003b705cfSriastradh
183142542f5fSchristos				assert(box->x1 >= 0);
183242542f5fSchristos				assert(box->y1 >= 0);
183303b705cfSriastradh
183442542f5fSchristos				*(uint64_t *)&b[0] = hdr;
183542542f5fSchristos				*(uint64_t *)&b[2] = *(const uint64_t *)box;
183642542f5fSchristos				*(uint64_t *)(b+4) =
183742542f5fSchristos					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
183842542f5fSchristos							 I915_GEM_DOMAIN_RENDER << 16 |
183942542f5fSchristos							 I915_GEM_DOMAIN_RENDER |
184042542f5fSchristos							 KGEM_RELOC_FENCED,
184142542f5fSchristos							 0);
184242542f5fSchristos				b[6] = add2(b[2], src_dx, src_dy);
184342542f5fSchristos				b[7] = src_pitch;
184442542f5fSchristos				*(uint64_t *)(b+8) =
184542542f5fSchristos					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
184642542f5fSchristos							 I915_GEM_DOMAIN_RENDER << 16 |
184742542f5fSchristos							 KGEM_RELOC_FENCED,
184842542f5fSchristos							 0);
184942542f5fSchristos				kgem->nbatch += 10;
185042542f5fSchristos				assert(kgem->nbatch < kgem->surface);
185142542f5fSchristos				box++;
185242542f5fSchristos			} while (--nbox_this_time);
185303b705cfSriastradh
185442542f5fSchristos			if (!nbox)
185542542f5fSchristos				break;
185603b705cfSriastradh
185742542f5fSchristos			_kgem_submit(kgem);
185842542f5fSchristos			_kgem_set_mode(kgem, KGEM_BLT);
1859fe8aea9eSmrg			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
186042542f5fSchristos		} while (1);
186142542f5fSchristos	} else {
186242542f5fSchristos		do {
186313496ba1Ssnj			int nbox_this_time, rem;
186442542f5fSchristos
186542542f5fSchristos			nbox_this_time = nbox;
186613496ba1Ssnj			rem = kgem_batch_space(kgem);
186713496ba1Ssnj			if (10*nbox_this_time > rem)
186813496ba1Ssnj				nbox_this_time = rem / 10;
186942542f5fSchristos			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
187042542f5fSchristos				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
187113496ba1Ssnj			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
187213496ba1Ssnj			     __FUNCTION__, nbox_this_time, nbox, rem));
187313496ba1Ssnj			assert(nbox_this_time > 0);
187442542f5fSchristos			nbox -= nbox_this_time;
187542542f5fSchristos
187642542f5fSchristos			assert(kgem->mode == KGEM_BLT);
187742542f5fSchristos			do {
187842542f5fSchristos				uint32_t *b = kgem->batch + kgem->nbatch;
187942542f5fSchristos
188042542f5fSchristos				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
188142542f5fSchristos				     __FUNCTION__,
188242542f5fSchristos				     box->x1, box->y1,
188342542f5fSchristos				     box->x2 - box->x1, box->y2 - box->y1));
188442542f5fSchristos
188542542f5fSchristos				assert(box->x1 + src_dx >= 0);
188642542f5fSchristos				assert(box->y1 + src_dy >= 0);
188742542f5fSchristos
188842542f5fSchristos				assert(box->x1 + dst_dx >= 0);
188942542f5fSchristos				assert(box->y1 + dst_dy >= 0);
189042542f5fSchristos
189142542f5fSchristos				b[0] = cmd;
189242542f5fSchristos				b[1] = br13;
189342542f5fSchristos				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
189442542f5fSchristos				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
189542542f5fSchristos				*(uint64_t *)(b+4) =
189642542f5fSchristos					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
189742542f5fSchristos							 I915_GEM_DOMAIN_RENDER << 16 |
189842542f5fSchristos							 I915_GEM_DOMAIN_RENDER |
189942542f5fSchristos							 KGEM_RELOC_FENCED,
190042542f5fSchristos							 0);
190142542f5fSchristos				b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
190242542f5fSchristos				b[7] = src_pitch;
190342542f5fSchristos				*(uint64_t *)(b+8) =
190442542f5fSchristos					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
190542542f5fSchristos							 I915_GEM_DOMAIN_RENDER << 16 |
190642542f5fSchristos							 KGEM_RELOC_FENCED,
190742542f5fSchristos							 0);
190842542f5fSchristos				kgem->nbatch += 10;
190942542f5fSchristos				assert(kgem->nbatch < kgem->surface);
191042542f5fSchristos				box++;
191142542f5fSchristos			} while (--nbox_this_time);
191242542f5fSchristos
191342542f5fSchristos			if (!nbox)
191442542f5fSchristos				break;
191542542f5fSchristos
191642542f5fSchristos			_kgem_submit(kgem);
191742542f5fSchristos			_kgem_set_mode(kgem, KGEM_BLT);
1918fe8aea9eSmrg			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
191942542f5fSchristos		} while (1);
192042542f5fSchristos	}
192142542f5fSchristos	sna_vertex_unlock(&sna->render);
192242542f5fSchristos}
192342542f5fSchristos
192442542f5fSchristosfastcall static void
192542542f5fSchristosblt_composite_copy_with_alpha(struct sna *sna,
192642542f5fSchristos			      const struct sna_composite_op *op,
192742542f5fSchristos			      const struct sna_composite_rectangles *r)
192842542f5fSchristos{
192942542f5fSchristos	int x1, x2, y1, y2;
193042542f5fSchristos	int src_x, src_y;
193142542f5fSchristos
193242542f5fSchristos	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
193342542f5fSchristos	     __FUNCTION__,
193442542f5fSchristos	     r->src.x, r->src.y,
193542542f5fSchristos	     r->dst.x, r->dst.y,
193642542f5fSchristos	     r->width, r->height));
193742542f5fSchristos
193842542f5fSchristos	/* XXX higher layer should have clipped? */
193942542f5fSchristos
194042542f5fSchristos	x1 = r->dst.x + op->dst.x;
194142542f5fSchristos	y1 = r->dst.y + op->dst.y;
194242542f5fSchristos	x2 = x1 + r->width;
194342542f5fSchristos	y2 = y1 + r->height;
194442542f5fSchristos
194542542f5fSchristos	src_x = r->src.x - x1 + op->u.blt.sx;
194642542f5fSchristos	src_y = r->src.y - y1 + op->u.blt.sy;
194742542f5fSchristos
194842542f5fSchristos	/* clip against dst */
194942542f5fSchristos	if (x1 < 0)
195042542f5fSchristos		x1 = 0;
195142542f5fSchristos	if (y1 < 0)
195242542f5fSchristos		y1 = 0;
195342542f5fSchristos
195442542f5fSchristos	if (x2 > op->dst.width)
195542542f5fSchristos		x2 = op->dst.width;
195642542f5fSchristos
195742542f5fSchristos	if (y2 > op->dst.height)
195842542f5fSchristos		y2 = op->dst.height;
195942542f5fSchristos
196042542f5fSchristos	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
196142542f5fSchristos
196242542f5fSchristos	if (x2 <= x1 || y2 <= y1)
196342542f5fSchristos		return;
196442542f5fSchristos
196542542f5fSchristos	sna_blt_alpha_fixup_one(sna, &op->u.blt,
196642542f5fSchristos				x1 + src_x, y1 + src_y,
196742542f5fSchristos				x2 - x1, y2 - y1,
196842542f5fSchristos				x1, y1);
196942542f5fSchristos}
197042542f5fSchristos
197142542f5fSchristosfastcall static void
197242542f5fSchristosblt_composite_copy_box_with_alpha(struct sna *sna,
197342542f5fSchristos				  const struct sna_composite_op *op,
197442542f5fSchristos				  const BoxRec *box)
197542542f5fSchristos{
197642542f5fSchristos	DBG(("%s: box (%d, %d), (%d, %d)\n",
197742542f5fSchristos	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
197842542f5fSchristos	sna_blt_alpha_fixup_one(sna, &op->u.blt,
197942542f5fSchristos				box->x1 + op->u.blt.sx,
198042542f5fSchristos				box->y1 + op->u.blt.sy,
198142542f5fSchristos				box->x2 - box->x1,
198242542f5fSchristos				box->y2 - box->y1,
198342542f5fSchristos				box->x1 + op->dst.x,
198442542f5fSchristos				box->y1 + op->dst.y);
198542542f5fSchristos}
198642542f5fSchristos
198742542f5fSchristosstatic void
198842542f5fSchristosblt_composite_copy_boxes_with_alpha(struct sna *sna,
198942542f5fSchristos				    const struct sna_composite_op *op,
199042542f5fSchristos				    const BoxRec *box, int nbox)
199103b705cfSriastradh{
199203b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
199303b705cfSriastradh	do {
199403b705cfSriastradh		DBG(("%s: box (%d, %d), (%d, %d)\n",
199503b705cfSriastradh		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
199603b705cfSriastradh		sna_blt_alpha_fixup_one(sna, &op->u.blt,
199703b705cfSriastradh					box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
199803b705cfSriastradh					box->x2 - box->x1, box->y2 - box->y1,
199903b705cfSriastradh					box->x1 + op->dst.x, box->y1 + op->dst.y);
200003b705cfSriastradh		box++;
200103b705cfSriastradh	} while(--nbox);
200203b705cfSriastradh}
200303b705cfSriastradh
200403b705cfSriastradhstatic bool
200503b705cfSriastradhprepare_blt_copy(struct sna *sna,
200603b705cfSriastradh		 struct sna_composite_op *op,
200703b705cfSriastradh		 struct kgem_bo *bo,
200803b705cfSriastradh		 uint32_t alpha_fixup)
200903b705cfSriastradh{
201003b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
201103b705cfSriastradh
201203b705cfSriastradh	assert(op->dst.bo);
201303b705cfSriastradh	assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo));
201403b705cfSriastradh	assert(kgem_bo_can_blt(&sna->kgem, bo));
201503b705cfSriastradh
201642542f5fSchristos	kgem_set_mode(&sna->kgem, KGEM_BLT, op->dst.bo);
201703b705cfSriastradh	if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) {
201803b705cfSriastradh		kgem_submit(&sna->kgem);
201903b705cfSriastradh		if (!kgem_check_many_bo_fenced(&sna->kgem,
202003b705cfSriastradh					       op->dst.bo, bo, NULL)) {
202103b705cfSriastradh			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
202242542f5fSchristos			return sna_tiling_blt_composite(sna, op, bo,
202342542f5fSchristos							src->drawable.bitsPerPixel,
202442542f5fSchristos							alpha_fixup);
202503b705cfSriastradh		}
202603b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_BLT);
202703b705cfSriastradh	}
2028fe8aea9eSmrg	kgem_bcs_set_tiling(&sna->kgem, bo, op->dst.bo);
202903b705cfSriastradh
203003b705cfSriastradh	DBG(("%s\n", __FUNCTION__));
203103b705cfSriastradh
203203b705cfSriastradh	if (sna->kgem.gen >= 060 && op->dst.bo == bo)
203303b705cfSriastradh		op->done = gen6_blt_copy_done;
203403b705cfSriastradh	else
203503b705cfSriastradh		op->done = nop_done;
203603b705cfSriastradh
203703b705cfSriastradh	if (alpha_fixup) {
203803b705cfSriastradh		op->blt   = blt_composite_copy_with_alpha;
203903b705cfSriastradh		op->box   = blt_composite_copy_box_with_alpha;
204003b705cfSriastradh		op->boxes = blt_composite_copy_boxes_with_alpha;
204103b705cfSriastradh
204203b705cfSriastradh		if (!sna_blt_alpha_fixup_init(sna, &op->u.blt, bo, op->dst.bo,
204303b705cfSriastradh					      src->drawable.bitsPerPixel,
204403b705cfSriastradh					      alpha_fixup))
204503b705cfSriastradh			return false;
204603b705cfSriastradh	} else {
204703b705cfSriastradh		op->blt   = blt_composite_copy;
204803b705cfSriastradh		op->box   = blt_composite_copy_box;
204903b705cfSriastradh		op->boxes = blt_composite_copy_boxes;
205042542f5fSchristos		if (sna->kgem.gen >= 0100)
205142542f5fSchristos			op->thread_boxes = blt_composite_copy_boxes__thread64;
205242542f5fSchristos		else
205342542f5fSchristos			op->thread_boxes = blt_composite_copy_boxes__thread;
205403b705cfSriastradh
205503b705cfSriastradh		if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
205603b705cfSriastradh				       src->drawable.bitsPerPixel,
205703b705cfSriastradh				       GXcopy))
205803b705cfSriastradh			return false;
205903b705cfSriastradh	}
206003b705cfSriastradh
206103b705cfSriastradh	return true;
206203b705cfSriastradh}
206303b705cfSriastradh
206403b705cfSriastradhfastcall static void
206503b705cfSriastradhblt_put_composite__cpu(struct sna *sna,
206603b705cfSriastradh		       const struct sna_composite_op *op,
206703b705cfSriastradh		       const struct sna_composite_rectangles *r)
206803b705cfSriastradh{
206903b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
207003b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
207142542f5fSchristos	assert(src->devPrivate.ptr);
207242542f5fSchristos	assert(src->devKind);
207342542f5fSchristos	assert(dst->devPrivate.ptr);
207442542f5fSchristos	assert(dst->devKind);
207503b705cfSriastradh	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
207603b705cfSriastradh		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
207703b705cfSriastradh		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
207803b705cfSriastradh		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
207903b705cfSriastradh		   r->width, r->height);
208003b705cfSriastradh}
208103b705cfSriastradh
208203b705cfSriastradhfastcall static void
208303b705cfSriastradhblt_put_composite_box__cpu(struct sna *sna,
208403b705cfSriastradh			   const struct sna_composite_op *op,
208503b705cfSriastradh			   const BoxRec *box)
208603b705cfSriastradh{
208703b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
208803b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
208942542f5fSchristos	assert(src->devPrivate.ptr);
209042542f5fSchristos	assert(src->devKind);
209142542f5fSchristos	assert(dst->devPrivate.ptr);
209242542f5fSchristos	assert(dst->devKind);
209303b705cfSriastradh	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
209403b705cfSriastradh		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
209503b705cfSriastradh		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
209603b705cfSriastradh		   box->x1 + op->dst.x, box->y1 + op->dst.y,
209703b705cfSriastradh		   box->x2-box->x1, box->y2-box->y1);
209803b705cfSriastradh}
209903b705cfSriastradh
210003b705cfSriastradhstatic void
210103b705cfSriastradhblt_put_composite_boxes__cpu(struct sna *sna,
210203b705cfSriastradh			     const struct sna_composite_op *op,
210303b705cfSriastradh			     const BoxRec *box, int n)
210403b705cfSriastradh{
210503b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
210603b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
210742542f5fSchristos	assert(src->devPrivate.ptr);
210842542f5fSchristos	assert(src->devKind);
210942542f5fSchristos	assert(dst->devPrivate.ptr);
211042542f5fSchristos	assert(dst->devKind);
211103b705cfSriastradh	do {
211203b705cfSriastradh		memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
211303b705cfSriastradh			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
211403b705cfSriastradh			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
211503b705cfSriastradh			   box->x1 + op->dst.x, box->y1 + op->dst.y,
211603b705cfSriastradh			   box->x2-box->x1, box->y2-box->y1);
211703b705cfSriastradh		box++;
211803b705cfSriastradh	} while (--n);
211903b705cfSriastradh}
212003b705cfSriastradh
212103b705cfSriastradhfastcall static void
212203b705cfSriastradhblt_put_composite_with_alpha__cpu(struct sna *sna,
212303b705cfSriastradh				  const struct sna_composite_op *op,
212403b705cfSriastradh				  const struct sna_composite_rectangles *r)
212503b705cfSriastradh{
212603b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
212703b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
212842542f5fSchristos	assert(src->devPrivate.ptr);
212942542f5fSchristos	assert(src->devKind);
213042542f5fSchristos	assert(dst->devPrivate.ptr);
213142542f5fSchristos	assert(dst->devKind);
213203b705cfSriastradh	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
213303b705cfSriastradh		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
213403b705cfSriastradh		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
213503b705cfSriastradh		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
213603b705cfSriastradh		   r->width, r->height,
213703b705cfSriastradh		   0xffffffff, op->u.blt.pixel);
213803b705cfSriastradh
213903b705cfSriastradh}
214003b705cfSriastradh
214103b705cfSriastradhfastcall static void
214203b705cfSriastradhblt_put_composite_box_with_alpha__cpu(struct sna *sna,
214303b705cfSriastradh				      const struct sna_composite_op *op,
214403b705cfSriastradh				      const BoxRec *box)
214503b705cfSriastradh{
214603b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
214703b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
214842542f5fSchristos	assert(src->devPrivate.ptr);
214942542f5fSchristos	assert(src->devKind);
215042542f5fSchristos	assert(dst->devPrivate.ptr);
215142542f5fSchristos	assert(dst->devKind);
215203b705cfSriastradh	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
215303b705cfSriastradh		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
215403b705cfSriastradh		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
215503b705cfSriastradh		   box->x1 + op->dst.x, box->y1 + op->dst.y,
215603b705cfSriastradh		   box->x2-box->x1, box->y2-box->y1,
215703b705cfSriastradh		   0xffffffff, op->u.blt.pixel);
215803b705cfSriastradh}
215903b705cfSriastradh
216003b705cfSriastradhstatic void
216103b705cfSriastradhblt_put_composite_boxes_with_alpha__cpu(struct sna *sna,
216203b705cfSriastradh					const struct sna_composite_op *op,
216303b705cfSriastradh					const BoxRec *box, int n)
216403b705cfSriastradh{
216503b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
216603b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
216742542f5fSchristos	assert(src->devPrivate.ptr);
216842542f5fSchristos	assert(src->devKind);
216942542f5fSchristos	assert(dst->devPrivate.ptr);
217042542f5fSchristos	assert(dst->devKind);
217103b705cfSriastradh	do {
217203b705cfSriastradh		memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
217303b705cfSriastradh			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
217403b705cfSriastradh			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
217503b705cfSriastradh			   box->x1 + op->dst.x, box->y1 + op->dst.y,
217603b705cfSriastradh			   box->x2-box->x1, box->y2-box->y1,
217703b705cfSriastradh			   0xffffffff, op->u.blt.pixel);
217803b705cfSriastradh		box++;
217903b705cfSriastradh	} while (--n);
218003b705cfSriastradh}
218103b705cfSriastradh
218203b705cfSriastradhfastcall static void
218303b705cfSriastradhblt_put_composite(struct sna *sna,
218403b705cfSriastradh		  const struct sna_composite_op *op,
218503b705cfSriastradh		  const struct sna_composite_rectangles *r)
218603b705cfSriastradh{
218703b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
218803b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
218903b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(dst);
219003b705cfSriastradh	int pitch = src->devKind;
219103b705cfSriastradh	char *data = src->devPrivate.ptr;
219203b705cfSriastradh	int bpp = src->drawable.bitsPerPixel;
219303b705cfSriastradh
219403b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
219503b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
219603b705cfSriastradh	int16_t src_x = r->src.x + op->u.blt.sx;
219703b705cfSriastradh	int16_t src_y = r->src.y + op->u.blt.sy;
219803b705cfSriastradh
219903b705cfSriastradh	if (!dst_priv->pinned &&
220003b705cfSriastradh	    dst_x <= 0 && dst_y <= 0 &&
220103b705cfSriastradh	    dst_x + r->width >= op->dst.width &&
220203b705cfSriastradh	    dst_y + r->height >= op->dst.height) {
220303b705cfSriastradh		data += (src_x - dst_x) * bpp / 8;
220403b705cfSriastradh		data += (src_y - dst_y) * pitch;
220503b705cfSriastradh
220642542f5fSchristos		assert(op->dst.bo == dst_priv->gpu_bo);
220742542f5fSchristos		sna_replace(sna, op->dst.pixmap, data, pitch);
220803b705cfSriastradh	} else {
220903b705cfSriastradh		BoxRec box;
221003b705cfSriastradh		bool ok;
221103b705cfSriastradh
221203b705cfSriastradh		box.x1 = dst_x;
221303b705cfSriastradh		box.y1 = dst_y;
221403b705cfSriastradh		box.x2 = dst_x + r->width;
221503b705cfSriastradh		box.y2 = dst_y + r->height;
221603b705cfSriastradh
221703b705cfSriastradh		ok = sna_write_boxes(sna, dst,
221803b705cfSriastradh				     dst_priv->gpu_bo, 0, 0,
221903b705cfSriastradh				     data, pitch, src_x, src_y,
222003b705cfSriastradh				     &box, 1);
222103b705cfSriastradh		assert(ok);
222203b705cfSriastradh		(void)ok;
222303b705cfSriastradh	}
222403b705cfSriastradh}
222503b705cfSriastradh
222603b705cfSriastradhfastcall static void blt_put_composite_box(struct sna *sna,
222703b705cfSriastradh					   const struct sna_composite_op *op,
222803b705cfSriastradh					   const BoxRec *box)
222903b705cfSriastradh{
223003b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
223103b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
223203b705cfSriastradh
223303b705cfSriastradh	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
223403b705cfSriastradh	     op->u.blt.sx, op->u.blt.sy,
223503b705cfSriastradh	     op->dst.x, op->dst.y));
223603b705cfSriastradh
223742542f5fSchristos	assert(src->devPrivate.ptr);
223842542f5fSchristos	assert(src->devKind);
223903b705cfSriastradh	if (!dst_priv->pinned &&
224003b705cfSriastradh	    box->x2 - box->x1 == op->dst.width &&
224103b705cfSriastradh	    box->y2 - box->y1 == op->dst.height) {
224203b705cfSriastradh		int pitch = src->devKind;
224303b705cfSriastradh		int bpp = src->drawable.bitsPerPixel / 8;
224403b705cfSriastradh		char *data = src->devPrivate.ptr;
224503b705cfSriastradh
224603b705cfSriastradh		data += (box->y1 + op->u.blt.sy) * pitch;
224703b705cfSriastradh		data += (box->x1 + op->u.blt.sx) * bpp;
224803b705cfSriastradh
224942542f5fSchristos		assert(op->dst.bo == dst_priv->gpu_bo);
225042542f5fSchristos		sna_replace(sna, op->dst.pixmap, data, pitch);
225103b705cfSriastradh	} else {
225203b705cfSriastradh		bool ok;
225303b705cfSriastradh
225403b705cfSriastradh		ok = sna_write_boxes(sna, op->dst.pixmap,
225503b705cfSriastradh				     op->dst.bo, op->dst.x, op->dst.y,
225603b705cfSriastradh				     src->devPrivate.ptr,
225703b705cfSriastradh				     src->devKind,
225803b705cfSriastradh				     op->u.blt.sx, op->u.blt.sy,
225903b705cfSriastradh				     box, 1);
226003b705cfSriastradh		assert(ok);
226103b705cfSriastradh		(void)ok;
226203b705cfSriastradh	}
226303b705cfSriastradh}
226403b705cfSriastradh
226503b705cfSriastradhstatic void blt_put_composite_boxes(struct sna *sna,
226603b705cfSriastradh				    const struct sna_composite_op *op,
226703b705cfSriastradh				    const BoxRec *box, int n)
226803b705cfSriastradh{
226903b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
227003b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
227103b705cfSriastradh
227203b705cfSriastradh	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
227303b705cfSriastradh	     op->u.blt.sx, op->u.blt.sy,
227403b705cfSriastradh	     op->dst.x, op->dst.y,
227503b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2, n));
227603b705cfSriastradh
227742542f5fSchristos	assert(src->devPrivate.ptr);
227842542f5fSchristos	assert(src->devKind);
227903b705cfSriastradh	if (n == 1 && !dst_priv->pinned &&
228003b705cfSriastradh	    box->x2 - box->x1 == op->dst.width &&
228103b705cfSriastradh	    box->y2 - box->y1 == op->dst.height) {
228203b705cfSriastradh		int pitch = src->devKind;
228303b705cfSriastradh		int bpp = src->drawable.bitsPerPixel / 8;
228403b705cfSriastradh		char *data = src->devPrivate.ptr;
228503b705cfSriastradh
228603b705cfSriastradh		data += (box->y1 + op->u.blt.sy) * pitch;
228703b705cfSriastradh		data += (box->x1 + op->u.blt.sx) * bpp;
228803b705cfSriastradh
228942542f5fSchristos		assert(op->dst.bo == dst_priv->gpu_bo);
229042542f5fSchristos		sna_replace(sna, op->dst.pixmap, data, pitch);
229103b705cfSriastradh	} else {
229203b705cfSriastradh		bool ok;
229303b705cfSriastradh
229403b705cfSriastradh		ok = sna_write_boxes(sna, op->dst.pixmap,
229503b705cfSriastradh				     op->dst.bo, op->dst.x, op->dst.y,
229603b705cfSriastradh				     src->devPrivate.ptr,
229703b705cfSriastradh				     src->devKind,
229803b705cfSriastradh				     op->u.blt.sx, op->u.blt.sy,
229903b705cfSriastradh				     box, n);
230003b705cfSriastradh		assert(ok);
230103b705cfSriastradh		(void)ok;
230203b705cfSriastradh	}
230303b705cfSriastradh}
230403b705cfSriastradh
230503b705cfSriastradhfastcall static void
230603b705cfSriastradhblt_put_composite_with_alpha(struct sna *sna,
230703b705cfSriastradh			     const struct sna_composite_op *op,
230803b705cfSriastradh			     const struct sna_composite_rectangles *r)
230903b705cfSriastradh{
231003b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
231103b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
231203b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(dst);
231303b705cfSriastradh	int pitch = src->devKind;
231403b705cfSriastradh	char *data = src->devPrivate.ptr;
231503b705cfSriastradh
231603b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
231703b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
231803b705cfSriastradh	int16_t src_x = r->src.x + op->u.blt.sx;
231903b705cfSriastradh	int16_t src_y = r->src.y + op->u.blt.sy;
232003b705cfSriastradh
232142542f5fSchristos	assert(src->devPrivate.ptr);
232242542f5fSchristos	assert(src->devKind);
232342542f5fSchristos
232403b705cfSriastradh	if (!dst_priv->pinned &&
232503b705cfSriastradh	    dst_x <= 0 && dst_y <= 0 &&
232603b705cfSriastradh	    dst_x + r->width >= op->dst.width &&
232703b705cfSriastradh	    dst_y + r->height >= op->dst.height) {
232803b705cfSriastradh		int bpp = dst->drawable.bitsPerPixel / 8;
232903b705cfSriastradh
233003b705cfSriastradh		data += (src_x - dst_x) * bpp;
233103b705cfSriastradh		data += (src_y - dst_y) * pitch;
233203b705cfSriastradh
233342542f5fSchristos		assert(op->dst.bo == dst_priv->gpu_bo);
233442542f5fSchristos		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
233542542f5fSchristos				 0xffffffff, op->u.blt.pixel);
233603b705cfSriastradh	} else {
233703b705cfSriastradh		BoxRec box;
233803b705cfSriastradh
233903b705cfSriastradh		box.x1 = dst_x;
234003b705cfSriastradh		box.y1 = dst_y;
234103b705cfSriastradh		box.x2 = dst_x + r->width;
234203b705cfSriastradh		box.y2 = dst_y + r->height;
234303b705cfSriastradh
234403b705cfSriastradh		sna_write_boxes__xor(sna, dst,
234503b705cfSriastradh				     dst_priv->gpu_bo, 0, 0,
234603b705cfSriastradh				     data, pitch, src_x, src_y,
234703b705cfSriastradh				     &box, 1,
234803b705cfSriastradh				     0xffffffff, op->u.blt.pixel);
234903b705cfSriastradh	}
235003b705cfSriastradh}
235103b705cfSriastradh
235203b705cfSriastradhfastcall static void
235303b705cfSriastradhblt_put_composite_box_with_alpha(struct sna *sna,
235403b705cfSriastradh				 const struct sna_composite_op *op,
235503b705cfSriastradh				 const BoxRec *box)
235603b705cfSriastradh{
235703b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
235803b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
235903b705cfSriastradh
236003b705cfSriastradh	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
236103b705cfSriastradh	     op->u.blt.sx, op->u.blt.sy,
236203b705cfSriastradh	     op->dst.x, op->dst.y));
236303b705cfSriastradh
236442542f5fSchristos	assert(src->devPrivate.ptr);
236542542f5fSchristos	assert(src->devKind);
236642542f5fSchristos
236703b705cfSriastradh	if (!dst_priv->pinned &&
236803b705cfSriastradh	    box->x2 - box->x1 == op->dst.width &&
236903b705cfSriastradh	    box->y2 - box->y1 == op->dst.height) {
237003b705cfSriastradh		int pitch = src->devKind;
237103b705cfSriastradh		int bpp = src->drawable.bitsPerPixel / 8;
237203b705cfSriastradh		char *data = src->devPrivate.ptr;
237303b705cfSriastradh
237403b705cfSriastradh		data += (box->y1 + op->u.blt.sy) * pitch;
237503b705cfSriastradh		data += (box->x1 + op->u.blt.sx) * bpp;
237603b705cfSriastradh
237742542f5fSchristos		assert(op->dst.bo == dst_priv->gpu_bo);
237842542f5fSchristos		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
237942542f5fSchristos				 0xffffffff, op->u.blt.pixel);
238003b705cfSriastradh	} else {
238103b705cfSriastradh		sna_write_boxes__xor(sna, op->dst.pixmap,
238203b705cfSriastradh				     op->dst.bo, op->dst.x, op->dst.y,
238303b705cfSriastradh				     src->devPrivate.ptr,
238403b705cfSriastradh				     src->devKind,
238503b705cfSriastradh				     op->u.blt.sx, op->u.blt.sy,
238603b705cfSriastradh				     box, 1,
238703b705cfSriastradh				     0xffffffff, op->u.blt.pixel);
238803b705cfSriastradh	}
238903b705cfSriastradh}
239003b705cfSriastradh
239103b705cfSriastradhstatic void
239203b705cfSriastradhblt_put_composite_boxes_with_alpha(struct sna *sna,
239303b705cfSriastradh				   const struct sna_composite_op *op,
239403b705cfSriastradh				   const BoxRec *box, int n)
239503b705cfSriastradh{
239603b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
239703b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
239803b705cfSriastradh
239903b705cfSriastradh	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
240003b705cfSriastradh	     op->u.blt.sx, op->u.blt.sy,
240103b705cfSriastradh	     op->dst.x, op->dst.y,
240203b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2, n));
240303b705cfSriastradh
240442542f5fSchristos	assert(src->devPrivate.ptr);
240542542f5fSchristos	assert(src->devKind);
240642542f5fSchristos
240703b705cfSriastradh	if (n == 1 && !dst_priv->pinned &&
240803b705cfSriastradh	    box->x2 - box->x1 == op->dst.width &&
240903b705cfSriastradh	    box->y2 - box->y1 == op->dst.height) {
241003b705cfSriastradh		int pitch = src->devKind;
241103b705cfSriastradh		int bpp = src->drawable.bitsPerPixel / 8;
241203b705cfSriastradh		char *data = src->devPrivate.ptr;
241303b705cfSriastradh
241403b705cfSriastradh		data += (box->y1 + op->u.blt.sy) * pitch;
241503b705cfSriastradh		data += (box->x1 + op->u.blt.sx) * bpp;
241603b705cfSriastradh
241742542f5fSchristos		assert(dst_priv->gpu_bo == op->dst.bo);
241842542f5fSchristos		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
241942542f5fSchristos				 0xffffffff, op->u.blt.pixel);
242003b705cfSriastradh	} else {
242103b705cfSriastradh		sna_write_boxes__xor(sna, op->dst.pixmap,
242203b705cfSriastradh				     op->dst.bo, op->dst.x, op->dst.y,
242303b705cfSriastradh				     src->devPrivate.ptr,
242403b705cfSriastradh				     src->devKind,
242503b705cfSriastradh				     op->u.blt.sx, op->u.blt.sy,
242603b705cfSriastradh				     box, n,
242703b705cfSriastradh				     0xffffffff, op->u.blt.pixel);
242803b705cfSriastradh	}
242903b705cfSriastradh}
243003b705cfSriastradh
243103b705cfSriastradhstatic bool
243203b705cfSriastradhprepare_blt_put(struct sna *sna,
243303b705cfSriastradh		struct sna_composite_op *op,
243403b705cfSriastradh		uint32_t alpha_fixup)
243503b705cfSriastradh{
243603b705cfSriastradh	DBG(("%s\n", __FUNCTION__));
243703b705cfSriastradh
243842542f5fSchristos	assert(!sna_pixmap(op->dst.pixmap)->clear);
243942542f5fSchristos
244003b705cfSriastradh	if (op->dst.bo) {
244103b705cfSriastradh		assert(op->dst.bo == sna_pixmap(op->dst.pixmap)->gpu_bo);
244203b705cfSriastradh		if (alpha_fixup) {
244303b705cfSriastradh			op->u.blt.pixel = alpha_fixup;
244403b705cfSriastradh			op->blt   = blt_put_composite_with_alpha;
244503b705cfSriastradh			op->box   = blt_put_composite_box_with_alpha;
244603b705cfSriastradh			op->boxes = blt_put_composite_boxes_with_alpha;
244703b705cfSriastradh		} else {
244803b705cfSriastradh			op->blt   = blt_put_composite;
244903b705cfSriastradh			op->box   = blt_put_composite_box;
245003b705cfSriastradh			op->boxes = blt_put_composite_boxes;
245103b705cfSriastradh		}
2452fe8aea9eSmrg
2453fe8aea9eSmrg		op->done = nop_done;
2454fe8aea9eSmrg		return true;
245503b705cfSriastradh	} else {
245603b705cfSriastradh		if (alpha_fixup) {
245703b705cfSriastradh			op->u.blt.pixel = alpha_fixup;
245803b705cfSriastradh			op->blt   = blt_put_composite_with_alpha__cpu;
245903b705cfSriastradh			op->box   = blt_put_composite_box_with_alpha__cpu;
246003b705cfSriastradh			op->boxes = blt_put_composite_boxes_with_alpha__cpu;
246103b705cfSriastradh		} else {
246203b705cfSriastradh			op->blt   = blt_put_composite__cpu;
246303b705cfSriastradh			op->box   = blt_put_composite_box__cpu;
246403b705cfSriastradh			op->boxes = blt_put_composite_boxes__cpu;
246503b705cfSriastradh		}
246603b705cfSriastradh
2467fe8aea9eSmrg		op->done = sig_done;
2468fe8aea9eSmrg		return sigtrap_get() == 0;
2469fe8aea9eSmrg	}
247003b705cfSriastradh}
247103b705cfSriastradh
247203b705cfSriastradhstatic bool
247303b705cfSriastradhis_clear(PixmapPtr pixmap)
247403b705cfSriastradh{
247503b705cfSriastradh	struct sna_pixmap *priv = sna_pixmap(pixmap);
247603b705cfSriastradh	return priv && priv->clear;
247703b705cfSriastradh}
247803b705cfSriastradh
247942542f5fSchristosstatic inline uint32_t
248042542f5fSchristosover(uint32_t src, uint32_t dst)
248142542f5fSchristos{
248242542f5fSchristos	uint32_t a = ~src >> 24;
248342542f5fSchristos
248442542f5fSchristos#define G_SHIFT 8
248542542f5fSchristos#define RB_MASK 0xff00ff
248642542f5fSchristos#define RB_ONE_HALF 0x800080
248742542f5fSchristos#define RB_MASK_PLUS_ONE 0x10000100
248842542f5fSchristos
248942542f5fSchristos#define UN8_rb_MUL_UN8(x, a, t) do {				\
249042542f5fSchristos	t  = ((x) & RB_MASK) * (a);				\
249142542f5fSchristos	t += RB_ONE_HALF;					\
249242542f5fSchristos	x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;	\
249342542f5fSchristos	x &= RB_MASK;						\
249442542f5fSchristos} while (0)
249542542f5fSchristos
249642542f5fSchristos#define UN8_rb_ADD_UN8_rb(x, y, t) do {				\
249742542f5fSchristos	t = ((x) + (y));					\
249842542f5fSchristos	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);	\
249942542f5fSchristos	x = (t & RB_MASK);					\
250042542f5fSchristos} while (0)
250142542f5fSchristos
250242542f5fSchristos#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) do {			\
250342542f5fSchristos	uint32_t r1__, r2__, r3__, t__;				\
250442542f5fSchristos	\
250542542f5fSchristos	r1__ = (x);						\
250642542f5fSchristos	r2__ = (y) & RB_MASK;					\
250742542f5fSchristos	UN8_rb_MUL_UN8(r1__, (a), t__);				\
250842542f5fSchristos	UN8_rb_ADD_UN8_rb(r1__, r2__, t__);			\
250942542f5fSchristos	\
251042542f5fSchristos	r2__ = (x) >> G_SHIFT;					\
251142542f5fSchristos	r3__ = ((y) >> G_SHIFT) & RB_MASK;			\
251242542f5fSchristos	UN8_rb_MUL_UN8(r2__, (a), t__);				\
251342542f5fSchristos	UN8_rb_ADD_UN8_rb(r2__, r3__, t__);			\
251442542f5fSchristos	\
251542542f5fSchristos	(x) = r1__ | (r2__ << G_SHIFT);				\
251642542f5fSchristos} while (0)
251742542f5fSchristos
251842542f5fSchristos	UN8x4_MUL_UN8_ADD_UN8x4(dst, a, src);
251942542f5fSchristos
252042542f5fSchristos	return dst;
252142542f5fSchristos}
252242542f5fSchristos
252342542f5fSchristosstatic inline uint32_t
252442542f5fSchristosadd(uint32_t src, uint32_t dst)
252542542f5fSchristos{
252642542f5fSchristos#define UN8x4_ADD_UN8x4(x, y) do {				\
252742542f5fSchristos	uint32_t r1__, r2__, r3__, t__;				\
252842542f5fSchristos	\
252942542f5fSchristos	r1__ = (x) & RB_MASK;					\
253042542f5fSchristos	r2__ = (y) & RB_MASK;					\
253142542f5fSchristos	UN8_rb_ADD_UN8_rb(r1__, r2__, t__);			\
253242542f5fSchristos	\
253342542f5fSchristos	r2__ = ((x) >> G_SHIFT) & RB_MASK;			\
253442542f5fSchristos	r3__ = ((y) >> G_SHIFT) & RB_MASK;			\
253542542f5fSchristos	UN8_rb_ADD_UN8_rb(r2__, r3__, t__);			\
253642542f5fSchristos	\
253742542f5fSchristos	x = r1__ | (r2__ << G_SHIFT);				\
253842542f5fSchristos} while (0)
253942542f5fSchristos
254042542f5fSchristos	UN8x4_ADD_UN8x4(src, dst);
254142542f5fSchristos	return src;
254242542f5fSchristos}
254342542f5fSchristos
254403b705cfSriastradhbool
254503b705cfSriastradhsna_blt_composite(struct sna *sna,
254603b705cfSriastradh		  uint32_t op,
254703b705cfSriastradh		  PicturePtr src,
254803b705cfSriastradh		  PicturePtr dst,
254903b705cfSriastradh		  int16_t x, int16_t y,
255003b705cfSriastradh		  int16_t dst_x, int16_t dst_y,
255103b705cfSriastradh		  int16_t width, int16_t height,
255242542f5fSchristos		  unsigned flags,
255342542f5fSchristos		  struct sna_composite_op *tmp)
255403b705cfSriastradh{
255503b705cfSriastradh	PictFormat src_format = src->format;
255603b705cfSriastradh	PixmapPtr src_pixmap;
255703b705cfSriastradh	struct kgem_bo *bo;
255803b705cfSriastradh	int16_t tx, ty;
255903b705cfSriastradh	BoxRec dst_box, src_box;
256003b705cfSriastradh	uint32_t alpha_fixup;
256103b705cfSriastradh	uint32_t color, hint;
256203b705cfSriastradh	bool was_clear;
256303b705cfSriastradh	bool ret;
256403b705cfSriastradh
256503b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
256603b705cfSriastradh	return false;
256703b705cfSriastradh#endif
256803b705cfSriastradh	DBG(("%s (%d, %d), (%d, %d), %dx%d\n",
256903b705cfSriastradh	     __FUNCTION__, x, y, dst_x, dst_y, width, height));
257003b705cfSriastradh
257103b705cfSriastradh	switch (dst->pDrawable->bitsPerPixel) {
257203b705cfSriastradh	case 8:
257303b705cfSriastradh	case 16:
257403b705cfSriastradh	case 32:
257503b705cfSriastradh		break;
257603b705cfSriastradh	default:
257703b705cfSriastradh		DBG(("%s: unhandled bpp: %d\n", __FUNCTION__,
257803b705cfSriastradh		     dst->pDrawable->bitsPerPixel));
257903b705cfSriastradh		return false;
258003b705cfSriastradh	}
258103b705cfSriastradh
258203b705cfSriastradh	tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
258342542f5fSchristos	was_clear = is_clear(tmp->dst.pixmap);
258403b705cfSriastradh
258503b705cfSriastradh	if (width | height) {
258603b705cfSriastradh		dst_box.x1 = dst_x;
258703b705cfSriastradh		dst_box.x2 = bound(dst_x, width);
258803b705cfSriastradh		dst_box.y1 = dst_y;
258903b705cfSriastradh		dst_box.y2 = bound(dst_y, height);
259003b705cfSriastradh	} else
259103b705cfSriastradh		sna_render_picture_extents(dst, &dst_box);
259203b705cfSriastradh
259303b705cfSriastradh	tmp->dst.format = dst->format;
259403b705cfSriastradh	tmp->dst.width = tmp->dst.pixmap->drawable.width;
259503b705cfSriastradh	tmp->dst.height = tmp->dst.pixmap->drawable.height;
259603b705cfSriastradh	get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap,
259703b705cfSriastradh			    &tmp->dst.x, &tmp->dst.y);
259803b705cfSriastradh
259903b705cfSriastradh	if (op == PictOpClear) {
260003b705cfSriastradhclear:
260142542f5fSchristos		if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) {
260242542f5fSchristos			sna_pixmap(tmp->dst.pixmap)->clear = true;
2603fe8aea9eSmrgnop:
260403b705cfSriastradh			return prepare_blt_nop(sna, tmp);
260542542f5fSchristos		}
260603b705cfSriastradh
260703b705cfSriastradh		hint = 0;
260803b705cfSriastradh		if (can_render(sna)) {
260903b705cfSriastradh			hint |= PREFER_GPU;
261042542f5fSchristos			if ((flags & COMPOSITE_PARTIAL) == 0) {
261142542f5fSchristos				hint |= IGNORE_DAMAGE;
261242542f5fSchristos				if (width  == tmp->dst.pixmap->drawable.width &&
261303b705cfSriastradh				    height == tmp->dst.pixmap->drawable.height)
261403b705cfSriastradh					hint |= REPLACES;
261503b705cfSriastradh			}
261603b705cfSriastradh		}
261703b705cfSriastradh		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
261803b705cfSriastradh						  &dst_box, &tmp->damage);
2619fe8aea9eSmrg		assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
262042542f5fSchristos		if (tmp->dst.bo) {
262142542f5fSchristos			if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
262242542f5fSchristos				DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
262342542f5fSchristos				     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
262442542f5fSchristos				return false;
262542542f5fSchristos			}
262642542f5fSchristos			if (hint & REPLACES)
262742542f5fSchristos				kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2628fe8aea9eSmrg			if (flags & COMPOSITE_UPLOAD)
2629fe8aea9eSmrg				return false;
263042542f5fSchristos		} else {
263103b705cfSriastradh			RegionRec region;
263203b705cfSriastradh
263303b705cfSriastradh			region.extents = dst_box;
263403b705cfSriastradh			region.data = NULL;
263503b705cfSriastradh
263642542f5fSchristos			hint = MOVE_WRITE | MOVE_INPLACE_HINT;
263742542f5fSchristos			if (flags & COMPOSITE_PARTIAL)
263842542f5fSchristos				hint |= MOVE_READ;
263942542f5fSchristos			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, hint))
264003b705cfSriastradh				return false;
264142542f5fSchristos		}
264203b705cfSriastradh
264303b705cfSriastradh		return prepare_blt_clear(sna, tmp);
264403b705cfSriastradh	}
264503b705cfSriastradh
264603b705cfSriastradh	if (is_solid(src)) {
264742542f5fSchristos		if ((op == PictOpOver || op == PictOpAdd) && is_transparent(src)) {
264842542f5fSchristos			sna_pixmap(tmp->dst.pixmap)->clear = was_clear;
264942542f5fSchristos			return prepare_blt_nop(sna, tmp);
265042542f5fSchristos		}
265103b705cfSriastradh		if (op == PictOpOver && is_opaque_solid(src))
265203b705cfSriastradh			op = PictOpSrc;
2653fe8aea9eSmrg		if (op == PictOpAdd &&
2654fe8aea9eSmrg		    PICT_FORMAT_RGB(src->format) == PICT_FORMAT_RGB(dst->format) &&
2655fe8aea9eSmrg		    is_white(src))
265603b705cfSriastradh			op = PictOpSrc;
265742542f5fSchristos		if (was_clear && (op == PictOpAdd || op == PictOpOver)) {
265842542f5fSchristos			if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0)
265942542f5fSchristos				op = PictOpSrc;
266042542f5fSchristos			if (op == PictOpOver) {
2661fe8aea9eSmrg				unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color);
266242542f5fSchristos				color = over(get_solid_color(src, PICT_a8r8g8b8),
2663fe8aea9eSmrg					     dst_color);
266442542f5fSchristos				op = PictOpSrc;
266542542f5fSchristos				DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n",
266642542f5fSchristos				     __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
2667fe8aea9eSmrg				     solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color),
266842542f5fSchristos				     color));
2669fe8aea9eSmrg				if (color == dst_color)
2670fe8aea9eSmrg					goto nop;
2671fe8aea9eSmrg				else
2672fe8aea9eSmrg					goto fill;
267342542f5fSchristos			}
267442542f5fSchristos			if (op == PictOpAdd) {
2675fe8aea9eSmrg				unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color);
267642542f5fSchristos				color = add(get_solid_color(src, PICT_a8r8g8b8),
2677fe8aea9eSmrg					    dst_color);
267842542f5fSchristos				op = PictOpSrc;
267942542f5fSchristos				DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n",
268042542f5fSchristos				     __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
2681fe8aea9eSmrg				     solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color),
268242542f5fSchristos				     color));
2683fe8aea9eSmrg				if (color == dst_color)
2684fe8aea9eSmrg					goto nop;
2685fe8aea9eSmrg				else
2686fe8aea9eSmrg					goto fill;
268742542f5fSchristos			}
268842542f5fSchristos		}
268903b705cfSriastradh		if (op == PictOpOutReverse && is_opaque_solid(src))
269003b705cfSriastradh			goto clear;
269103b705cfSriastradh
269203b705cfSriastradh		if (op != PictOpSrc) {
269303b705cfSriastradh			DBG(("%s: unsupported op [%d] for blitting\n",
269403b705cfSriastradh			     __FUNCTION__, op));
269503b705cfSriastradh			return false;
269603b705cfSriastradh		}
269703b705cfSriastradh
269803b705cfSriastradh		color = get_solid_color(src, tmp->dst.format);
269903b705cfSriastradhfill:
270003b705cfSriastradh		if (color == 0)
270103b705cfSriastradh			goto clear;
270203b705cfSriastradh
270342542f5fSchristos		if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == color) {
270442542f5fSchristos			sna_pixmap(tmp->dst.pixmap)->clear = true;
270542542f5fSchristos			return prepare_blt_nop(sna, tmp);
270642542f5fSchristos		}
270742542f5fSchristos
270803b705cfSriastradh		hint = 0;
270903b705cfSriastradh		if (can_render(sna)) {
271003b705cfSriastradh			hint |= PREFER_GPU;
271142542f5fSchristos			if ((flags & COMPOSITE_PARTIAL) == 0) {
271242542f5fSchristos				hint |= IGNORE_DAMAGE;
271342542f5fSchristos				if (width  == tmp->dst.pixmap->drawable.width &&
271403b705cfSriastradh				    height == tmp->dst.pixmap->drawable.height)
271503b705cfSriastradh					hint |= REPLACES;
271642542f5fSchristos			}
271703b705cfSriastradh		}
271803b705cfSriastradh		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
271903b705cfSriastradh						  &dst_box, &tmp->damage);
2720fe8aea9eSmrg		assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
272142542f5fSchristos		if (tmp->dst.bo) {
272242542f5fSchristos			if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
272342542f5fSchristos				DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
272442542f5fSchristos				     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
272542542f5fSchristos				return false;
272642542f5fSchristos			}
272742542f5fSchristos			if (hint & REPLACES)
272842542f5fSchristos				kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2729fe8aea9eSmrg			if (flags & COMPOSITE_UPLOAD)
2730fe8aea9eSmrg				return false;
273142542f5fSchristos		} else {
273203b705cfSriastradh			RegionRec region;
273303b705cfSriastradh
273403b705cfSriastradh			region.extents = dst_box;
273503b705cfSriastradh			region.data = NULL;
273603b705cfSriastradh
273742542f5fSchristos			hint = MOVE_WRITE | MOVE_INPLACE_HINT;
273842542f5fSchristos			if (flags & COMPOSITE_PARTIAL)
273942542f5fSchristos				hint |= MOVE_READ;
274042542f5fSchristos			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, hint))
274103b705cfSriastradh				return false;
274242542f5fSchristos		}
274303b705cfSriastradh
274403b705cfSriastradh		return prepare_blt_fill(sna, tmp, color);
274503b705cfSriastradh	}
274603b705cfSriastradh
274703b705cfSriastradh	if (!src->pDrawable) {
274803b705cfSriastradh		DBG(("%s: unsupported procedural source\n",
274903b705cfSriastradh		     __FUNCTION__));
275003b705cfSriastradh		return false;
275103b705cfSriastradh	}
275203b705cfSriastradh
275303b705cfSriastradh	if (src->filter == PictFilterConvolution) {
275403b705cfSriastradh		DBG(("%s: convolutions filters not handled\n",
275503b705cfSriastradh		     __FUNCTION__));
275603b705cfSriastradh		return false;
275703b705cfSriastradh	}
275803b705cfSriastradh
275903b705cfSriastradh	if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0)
276003b705cfSriastradh		op = PictOpSrc;
276103b705cfSriastradh
276203b705cfSriastradh	if (op != PictOpSrc) {
276303b705cfSriastradh		DBG(("%s: unsupported op [%d] for blitting\n",
276403b705cfSriastradh		     __FUNCTION__, op));
276503b705cfSriastradh		return false;
276603b705cfSriastradh	}
276703b705cfSriastradh
276842542f5fSchristos	if (!sna_transform_is_imprecise_integer_translation(src->transform, src->filter,
276942542f5fSchristos							    dst->polyMode == PolyModePrecise,
277042542f5fSchristos							    &tx, &ty)) {
277103b705cfSriastradh		DBG(("%s: source transform is not an integer translation\n",
277203b705cfSriastradh		     __FUNCTION__));
277303b705cfSriastradh		return false;
277403b705cfSriastradh	}
277542542f5fSchristos	DBG(("%s: converting transform to integer translation? (%d, %d)\n",
277642542f5fSchristos	     __FUNCTION__, src->transform != NULL, tx, ty));
277703b705cfSriastradh	x += tx;
277803b705cfSriastradh	y += ty;
277903b705cfSriastradh
278003b705cfSriastradh	if ((x >= src->pDrawable->width ||
278103b705cfSriastradh	     y >= src->pDrawable->height ||
278203b705cfSriastradh	     x + width  <= 0 ||
278303b705cfSriastradh	     y + height <= 0) &&
278403b705cfSriastradh	    (!src->repeat || src->repeatType == RepeatNone)) {
278503b705cfSriastradh		DBG(("%s: source is outside of valid area, converting to clear\n",
278603b705cfSriastradh		     __FUNCTION__));
278703b705cfSriastradh		goto clear;
278803b705cfSriastradh	}
278903b705cfSriastradh
279003b705cfSriastradh	src_pixmap = get_drawable_pixmap(src->pDrawable);
279103b705cfSriastradh	if (is_clear(src_pixmap)) {
279242542f5fSchristos		if (src->repeat ||
279342542f5fSchristos		    (x >= 0 && y >= 0 &&
2794fe8aea9eSmrg		     x + width  <= src_pixmap->drawable.width &&
2795fe8aea9eSmrg		     y + height <= src_pixmap->drawable.height)) {
279642542f5fSchristos			color = color_convert(sna_pixmap(src_pixmap)->clear_color,
279742542f5fSchristos					      src->format, tmp->dst.format);
279842542f5fSchristos			goto fill;
279942542f5fSchristos		}
280003b705cfSriastradh	}
280103b705cfSriastradh
280203b705cfSriastradh	alpha_fixup = 0;
280303b705cfSriastradh	if (!(dst->format == src_format ||
280403b705cfSriastradh	      dst->format == alphaless(src_format) ||
280503b705cfSriastradh	      (alphaless(dst->format) == alphaless(src_format) &&
280603b705cfSriastradh	       sna_get_pixel_from_rgba(&alpha_fixup,
280703b705cfSriastradh				       0, 0, 0, 0xffff,
280803b705cfSriastradh				       dst->format)))) {
280903b705cfSriastradh		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
281003b705cfSriastradh		     __FUNCTION__, (unsigned)src_format, dst->format));
281103b705cfSriastradh		return false;
281203b705cfSriastradh	}
281303b705cfSriastradh
281403b705cfSriastradh	/* XXX tiling? fixup extend none? */
281503b705cfSriastradh	if (x < 0 || y < 0 ||
281603b705cfSriastradh	    x + width  > src->pDrawable->width ||
281703b705cfSriastradh	    y + height > src->pDrawable->height) {
281803b705cfSriastradh		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n",
281903b705cfSriastradh		     __FUNCTION__,
282003b705cfSriastradh		     x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType));
282103b705cfSriastradh		if (src->repeat && src->repeatType == RepeatNormal) {
282203b705cfSriastradh			x = x % src->pDrawable->width;
282303b705cfSriastradh			y = y % src->pDrawable->height;
282403b705cfSriastradh			if (x < 0)
282503b705cfSriastradh				x += src->pDrawable->width;
282603b705cfSriastradh			if (y < 0)
282703b705cfSriastradh				y += src->pDrawable->height;
282803b705cfSriastradh			if (x + width  > src->pDrawable->width ||
282903b705cfSriastradh			    y + height > src->pDrawable->height)
283003b705cfSriastradh				return false;
283103b705cfSriastradh		} else
283203b705cfSriastradh			return false;
283303b705cfSriastradh	}
283403b705cfSriastradh
283503b705cfSriastradh	get_drawable_deltas(src->pDrawable, src_pixmap, &tx, &ty);
283603b705cfSriastradh	x += tx + src->pDrawable->x;
283703b705cfSriastradh	y += ty + src->pDrawable->y;
283803b705cfSriastradh	if (x < 0 || y < 0 ||
283903b705cfSriastradh	    x + width  > src_pixmap->drawable.width ||
284003b705cfSriastradh	    y + height > src_pixmap->drawable.height) {
284103b705cfSriastradh		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid pixmap %dx%d\n",
284203b705cfSriastradh		     __FUNCTION__,
284303b705cfSriastradh		     x, y, x+width, y+width, src_pixmap->drawable.width, src_pixmap->drawable.height));
284403b705cfSriastradh		return false;
284503b705cfSriastradh	}
284603b705cfSriastradh
284703b705cfSriastradh	tmp->u.blt.src_pixmap = src_pixmap;
284803b705cfSriastradh	tmp->u.blt.sx = x - dst_x;
284903b705cfSriastradh	tmp->u.blt.sy = y - dst_y;
285003b705cfSriastradh	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
285103b705cfSriastradh	     __FUNCTION__,
285203b705cfSriastradh	     tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
285303b705cfSriastradh
285403b705cfSriastradh	src_box.x1 = x;
285503b705cfSriastradh	src_box.y1 = y;
285603b705cfSriastradh	src_box.x2 = x + width;
285703b705cfSriastradh	src_box.y2 = y + height;
285803b705cfSriastradh	bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
285903b705cfSriastradh	if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) {
286003b705cfSriastradh		DBG(("%s: can not blit from src size=%dx%d, tiling? %d, pitch? %d\n",
286103b705cfSriastradh		     __FUNCTION__,
286203b705cfSriastradh		     src_pixmap->drawable.width  < sna->render.max_3d_size,
286303b705cfSriastradh		     src_pixmap->drawable.height < sna->render.max_3d_size,
286403b705cfSriastradh		     bo->tiling, bo->pitch));
286503b705cfSriastradh
286603b705cfSriastradh		if (src_pixmap->drawable.width  <= sna->render.max_3d_size &&
286703b705cfSriastradh		    src_pixmap->drawable.height <= sna->render.max_3d_size &&
286803b705cfSriastradh		    bo->pitch <= sna->render.max_3d_pitch &&
2869fe8aea9eSmrg		    (flags & (COMPOSITE_UPLOAD | COMPOSITE_FALLBACK)) == 0)
287003b705cfSriastradh		{
287103b705cfSriastradh			return false;
287203b705cfSriastradh		}
287303b705cfSriastradh
287403b705cfSriastradh		bo = NULL;
287503b705cfSriastradh	}
287603b705cfSriastradh
287703b705cfSriastradh	hint = 0;
287803b705cfSriastradh	if (bo || can_render(sna)) {
287903b705cfSriastradh		hint |= PREFER_GPU;
288042542f5fSchristos		if ((flags & COMPOSITE_PARTIAL) == 0) {
288142542f5fSchristos			hint |= IGNORE_DAMAGE;
288242542f5fSchristos			if (width  == tmp->dst.pixmap->drawable.width &&
288303b705cfSriastradh			    height == tmp->dst.pixmap->drawable.height)
288403b705cfSriastradh				hint |= REPLACES;
288503b705cfSriastradh		}
288603b705cfSriastradh		if (bo)
288703b705cfSriastradh			hint |= FORCE_GPU;
288803b705cfSriastradh	}
288903b705cfSriastradh	tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
289003b705cfSriastradh					  &dst_box, &tmp->damage);
2891fe8aea9eSmrg	assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
289203b705cfSriastradh
289342542f5fSchristos	if (tmp->dst.bo && hint & REPLACES) {
289442542f5fSchristos		struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap);
289542542f5fSchristos		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
289642542f5fSchristos	}
289742542f5fSchristos
289842542f5fSchristos	if (tmp->dst.pixmap == src_pixmap)
289942542f5fSchristos		bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
290003b705cfSriastradh
290103b705cfSriastradh	ret = false;
290203b705cfSriastradh	if (bo) {
290303b705cfSriastradh		if (!tmp->dst.bo) {
290403b705cfSriastradh			DBG(("%s: fallback -- unaccelerated read back\n",
290503b705cfSriastradh			     __FUNCTION__));
290642542f5fSchristosfallback:
290742542f5fSchristos			if (flags & COMPOSITE_FALLBACK || !kgem_bo_is_busy(bo))
290803b705cfSriastradh				goto put;
290942542f5fSchristos		} else if (!kgem_bo_can_blt(&sna->kgem, bo)) {
291042542f5fSchristos			DBG(("%s: fallback -- cannot blit from source\n",
291142542f5fSchristos			     __FUNCTION__));
291242542f5fSchristos			goto fallback;
291303b705cfSriastradh		} else if (bo->snoop && tmp->dst.bo->snoop) {
291403b705cfSriastradh			DBG(("%s: fallback -- can not copy between snooped bo\n",
291503b705cfSriastradh			     __FUNCTION__));
291603b705cfSriastradh			goto put;
291703b705cfSriastradh		} else if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
291803b705cfSriastradh			DBG(("%s: fallback -- unaccelerated upload\n",
291903b705cfSriastradh			     __FUNCTION__));
292042542f5fSchristos			goto fallback;
2921fe8aea9eSmrg		} else if ((flags & COMPOSITE_UPLOAD) == 0) {
292203b705cfSriastradh			ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup);
292342542f5fSchristos			if (!ret)
292442542f5fSchristos				goto fallback;
292503b705cfSriastradh		}
292603b705cfSriastradh	} else {
292703b705cfSriastradh		RegionRec region;
292803b705cfSriastradh
292903b705cfSriastradhput:
293003b705cfSriastradh		if (tmp->dst.bo == sna_pixmap(tmp->dst.pixmap)->cpu_bo) {
293142542f5fSchristos			DBG(("%s: dropping upload into CPU bo\n", __FUNCTION__));
293203b705cfSriastradh			tmp->dst.bo = NULL;
293303b705cfSriastradh			tmp->damage = NULL;
293403b705cfSriastradh		}
293503b705cfSriastradh
293603b705cfSriastradh		if (tmp->dst.bo == NULL) {
293703b705cfSriastradh			hint = MOVE_INPLACE_HINT | MOVE_WRITE;
293842542f5fSchristos			if (flags & COMPOSITE_PARTIAL)
293903b705cfSriastradh				hint |= MOVE_READ;
294003b705cfSriastradh
294103b705cfSriastradh			region.extents = dst_box;
294203b705cfSriastradh			region.data = NULL;
294303b705cfSriastradh			if (!sna_drawable_move_region_to_cpu(dst->pDrawable,
294403b705cfSriastradh							     &region, hint))
294503b705cfSriastradh				return false;
294603b705cfSriastradh
294703b705cfSriastradh			assert(tmp->damage == NULL);
294803b705cfSriastradh		}
294903b705cfSriastradh
295003b705cfSriastradh		region.extents = src_box;
295103b705cfSriastradh		region.data = NULL;
295203b705cfSriastradh		if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
295303b705cfSriastradh						     &region, MOVE_READ))
295403b705cfSriastradh			return false;
295503b705cfSriastradh
295603b705cfSriastradh		ret = prepare_blt_put(sna, tmp, alpha_fixup);
295703b705cfSriastradh	}
295803b705cfSriastradh
295903b705cfSriastradh	return ret;
296003b705cfSriastradh}
296103b705cfSriastradh
296203b705cfSriastradhstatic void convert_done(struct sna *sna, const struct sna_composite_op *op)
296303b705cfSriastradh{
296403b705cfSriastradh	struct kgem *kgem = &sna->kgem;
296503b705cfSriastradh
296603b705cfSriastradh	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
296742542f5fSchristos	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
296842542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
296903b705cfSriastradh		_kgem_submit(kgem);
297042542f5fSchristos	}
297103b705cfSriastradh
297203b705cfSriastradh	kgem_bo_destroy(kgem, op->src.bo);
297303b705cfSriastradh	sna_render_composite_redirect_done(sna, op);
297403b705cfSriastradh}
297503b705cfSriastradh
297603b705cfSriastradhstatic void gen6_convert_done(struct sna *sna, const struct sna_composite_op *op)
297703b705cfSriastradh{
297803b705cfSriastradh	struct kgem *kgem = &sna->kgem;
297903b705cfSriastradh
298003b705cfSriastradh	if (kgem_check_batch(kgem, 3)) {
298103b705cfSriastradh		uint32_t *b = kgem->batch + kgem->nbatch;
298242542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
298303b705cfSriastradh		b[0] = XY_SETUP_CLIP;
298403b705cfSriastradh		b[1] = b[2] = 0;
298503b705cfSriastradh		kgem->nbatch += 3;
298603b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
298703b705cfSriastradh	}
298803b705cfSriastradh
298903b705cfSriastradh	convert_done(sna, op);
299003b705cfSriastradh}
299103b705cfSriastradh
299203b705cfSriastradhbool
299303b705cfSriastradhsna_blt_composite__convert(struct sna *sna,
299403b705cfSriastradh			   int x, int y,
299503b705cfSriastradh			   int width, int height,
299603b705cfSriastradh			   struct sna_composite_op *tmp)
299703b705cfSriastradh{
299803b705cfSriastradh	uint32_t alpha_fixup;
299903b705cfSriastradh	int sx, sy;
300003b705cfSriastradh	uint8_t op;
300103b705cfSriastradh
300203b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
300303b705cfSriastradh	return false;
300403b705cfSriastradh#endif
300503b705cfSriastradh
300603b705cfSriastradh	DBG(("%s src=%d, dst=%d (redirect? %d)\n", __FUNCTION__,
300703b705cfSriastradh	     tmp->src.bo->handle, tmp->dst.bo->handle,
300803b705cfSriastradh	     tmp->redirect.real_bo ? tmp->redirect.real_bo->handle : 0));
300903b705cfSriastradh
301003b705cfSriastradh	if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo) ||
301103b705cfSriastradh	    !kgem_bo_can_blt(&sna->kgem, tmp->src.bo)) {
301203b705cfSriastradh		DBG(("%s: cannot blt from src or to dst\n", __FUNCTION__));
301303b705cfSriastradh		return false;
301403b705cfSriastradh	}
301503b705cfSriastradh
301603b705cfSriastradh	if (tmp->src.transform) {
301703b705cfSriastradh		DBG(("%s: transforms not handled by the BLT\n", __FUNCTION__));
301803b705cfSriastradh		return false;
301903b705cfSriastradh	}
302003b705cfSriastradh
302103b705cfSriastradh	if (tmp->src.filter == PictFilterConvolution) {
302203b705cfSriastradh		DBG(("%s: convolutions filters not handled\n",
302303b705cfSriastradh		     __FUNCTION__));
302403b705cfSriastradh		return false;
302503b705cfSriastradh	}
302603b705cfSriastradh
302703b705cfSriastradh	op = tmp->op;
302803b705cfSriastradh	if (op == PictOpOver && PICT_FORMAT_A(tmp->src.pict_format) == 0)
302903b705cfSriastradh		op = PictOpSrc;
303003b705cfSriastradh	if (op != PictOpSrc) {
303103b705cfSriastradh		DBG(("%s: unsupported op [%d] for blitting\n",
303203b705cfSriastradh		     __FUNCTION__, op));
303303b705cfSriastradh		return false;
303403b705cfSriastradh	}
303503b705cfSriastradh
303603b705cfSriastradh	alpha_fixup = 0;
303703b705cfSriastradh	if (!(tmp->dst.format == tmp->src.pict_format ||
303803b705cfSriastradh	      tmp->dst.format == alphaless(tmp->src.pict_format) ||
303903b705cfSriastradh	      (alphaless(tmp->dst.format) == alphaless(tmp->src.pict_format) &&
304003b705cfSriastradh	       sna_get_pixel_from_rgba(&alpha_fixup,
304103b705cfSriastradh				       0, 0, 0, 0xffff,
304203b705cfSriastradh				       tmp->dst.format)))) {
304303b705cfSriastradh		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
304403b705cfSriastradh		     __FUNCTION__,
304503b705cfSriastradh		     (unsigned)tmp->src.pict_format,
304603b705cfSriastradh		     (unsigned)tmp->dst.format));
304703b705cfSriastradh		return false;
304803b705cfSriastradh	}
304903b705cfSriastradh
305003b705cfSriastradh	sx = tmp->src.offset[0];
305103b705cfSriastradh	sy = tmp->src.offset[1];
305203b705cfSriastradh
305303b705cfSriastradh	x += sx;
305403b705cfSriastradh	y += sy;
305503b705cfSriastradh	if (x < 0 || y < 0 ||
305603b705cfSriastradh	    x + width  > tmp->src.width ||
305703b705cfSriastradh	    y + height > tmp->src.height) {
305803b705cfSriastradh		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n",
305903b705cfSriastradh		     __FUNCTION__,
306003b705cfSriastradh		     x, y, x+width, y+width, tmp->src.width, tmp->src.height));
306103b705cfSriastradh		if (tmp->src.repeat == RepeatNormal) {
306203b705cfSriastradh			int xx = x % tmp->src.width;
306303b705cfSriastradh			int yy = y % tmp->src.height;
306403b705cfSriastradh			if (xx < 0)
306503b705cfSriastradh				xx += tmp->src.width;
306603b705cfSriastradh			if (yy < 0)
306703b705cfSriastradh				yy += tmp->src.height;
306803b705cfSriastradh			if (xx + width  > tmp->src.width ||
306903b705cfSriastradh			    yy + height > tmp->src.height)
307003b705cfSriastradh				return false;
307103b705cfSriastradh
307203b705cfSriastradh			sx += xx - x;
307303b705cfSriastradh			sy += yy - y;
307403b705cfSriastradh		} else
307503b705cfSriastradh			return false;
307603b705cfSriastradh	}
307703b705cfSriastradh
307842542f5fSchristos	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
307942542f5fSchristos	     __FUNCTION__,
308042542f5fSchristos	     tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup));
308142542f5fSchristos
308242542f5fSchristos	tmp->u.blt.src_pixmap = NULL;
308342542f5fSchristos	tmp->u.blt.sx = sx;
308442542f5fSchristos	tmp->u.blt.sy = sy;
308542542f5fSchristos
308642542f5fSchristos	kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo);
308703b705cfSriastradh	if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) {
308803b705cfSriastradh		kgem_submit(&sna->kgem);
308903b705cfSriastradh		if (!kgem_check_many_bo_fenced(&sna->kgem,
309003b705cfSriastradh					       tmp->dst.bo, tmp->src.bo, NULL)) {
309103b705cfSriastradh			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
309242542f5fSchristos			return sna_tiling_blt_composite(sna, tmp, tmp->src.bo,
309342542f5fSchristos							PICT_FORMAT_BPP(tmp->src.pict_format),
309442542f5fSchristos							alpha_fixup);
309503b705cfSriastradh		}
309603b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_BLT);
309703b705cfSriastradh	}
3098fe8aea9eSmrg	kgem_bcs_set_tiling(&sna->kgem, tmp->src.bo, tmp->dst.bo);
309903b705cfSriastradh
310003b705cfSriastradh	if (alpha_fixup) {
310103b705cfSriastradh		tmp->blt   = blt_composite_copy_with_alpha;
310203b705cfSriastradh		tmp->box   = blt_composite_copy_box_with_alpha;
310303b705cfSriastradh		tmp->boxes = blt_composite_copy_boxes_with_alpha;
310403b705cfSriastradh
310503b705cfSriastradh		if (!sna_blt_alpha_fixup_init(sna, &tmp->u.blt,
310603b705cfSriastradh					      tmp->src.bo, tmp->dst.bo,
310703b705cfSriastradh					      PICT_FORMAT_BPP(tmp->src.pict_format),
310803b705cfSriastradh					      alpha_fixup))
310903b705cfSriastradh			return false;
311003b705cfSriastradh	} else {
311103b705cfSriastradh		tmp->blt   = blt_composite_copy;
311203b705cfSriastradh		tmp->box   = blt_composite_copy_box;
311303b705cfSriastradh		tmp->boxes = blt_composite_copy_boxes;
3114fe8aea9eSmrg		if (sna->kgem.gen >= 0100)
3115fe8aea9eSmrg			tmp->thread_boxes = blt_composite_copy_boxes__thread64;
3116fe8aea9eSmrg		else
3117fe8aea9eSmrg			tmp->thread_boxes = blt_composite_copy_boxes__thread;
311803b705cfSriastradh
311903b705cfSriastradh		if (!sna_blt_copy_init(sna, &tmp->u.blt,
312003b705cfSriastradh				       tmp->src.bo, tmp->dst.bo,
312103b705cfSriastradh				       PICT_FORMAT_BPP(tmp->src.pict_format),
312203b705cfSriastradh				       GXcopy))
312303b705cfSriastradh			return false;
312403b705cfSriastradh	}
312503b705cfSriastradh
312603b705cfSriastradh	tmp->done = convert_done;
312703b705cfSriastradh	if (sna->kgem.gen >= 060 && tmp->src.bo == tmp->dst.bo)
312803b705cfSriastradh		tmp->done = gen6_convert_done;
312903b705cfSriastradh
313003b705cfSriastradh	return true;
313103b705cfSriastradh}
313203b705cfSriastradh
313303b705cfSriastradhstatic void sna_blt_fill_op_blt(struct sna *sna,
313403b705cfSriastradh				const struct sna_fill_op *op,
313503b705cfSriastradh				int16_t x, int16_t y,
313603b705cfSriastradh				int16_t width, int16_t height)
313703b705cfSriastradh{
313842542f5fSchristos	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
313942542f5fSchristos		const struct sna_blt_state *blt = &op->base.u.blt;
314042542f5fSchristos
3141fe8aea9eSmrg		__sna_blt_fill_begin(sna, blt);
314242542f5fSchristos
314342542f5fSchristos		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
314442542f5fSchristos		sna->blt_state.fill_pixel = blt->pixel;
314542542f5fSchristos		sna->blt_state.fill_alu = blt->alu;
314642542f5fSchristos	}
314742542f5fSchristos
314803b705cfSriastradh	sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height);
314903b705cfSriastradh}
315003b705cfSriastradh
315103b705cfSriastradhfastcall static void sna_blt_fill_op_box(struct sna *sna,
315203b705cfSriastradh					 const struct sna_fill_op *op,
315303b705cfSriastradh					 const BoxRec *box)
315403b705cfSriastradh{
315542542f5fSchristos	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
315642542f5fSchristos		const struct sna_blt_state *blt = &op->base.u.blt;
315742542f5fSchristos
3158fe8aea9eSmrg		__sna_blt_fill_begin(sna, blt);
315942542f5fSchristos
316042542f5fSchristos		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
316142542f5fSchristos		sna->blt_state.fill_pixel = blt->pixel;
316242542f5fSchristos		sna->blt_state.fill_alu = blt->alu;
316342542f5fSchristos	}
316442542f5fSchristos
316503b705cfSriastradh	_sna_blt_fill_box(sna, &op->base.u.blt, box);
316603b705cfSriastradh}
316703b705cfSriastradh
316803b705cfSriastradhfastcall static void sna_blt_fill_op_boxes(struct sna *sna,
316903b705cfSriastradh					   const struct sna_fill_op *op,
317003b705cfSriastradh					   const BoxRec *box,
317103b705cfSriastradh					   int nbox)
317203b705cfSriastradh{
317342542f5fSchristos	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
317442542f5fSchristos		const struct sna_blt_state *blt = &op->base.u.blt;
317542542f5fSchristos
3176fe8aea9eSmrg		__sna_blt_fill_begin(sna, blt);
317742542f5fSchristos
317842542f5fSchristos		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
317942542f5fSchristos		sna->blt_state.fill_pixel = blt->pixel;
318042542f5fSchristos		sna->blt_state.fill_alu = blt->alu;
318142542f5fSchristos	}
318242542f5fSchristos
318303b705cfSriastradh	_sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox);
318403b705cfSriastradh}
318503b705cfSriastradh
318642542f5fSchristosstatic inline uint64_t pt_add(uint32_t cmd, const DDXPointRec *pt, int16_t dx, int16_t dy)
318742542f5fSchristos{
318842542f5fSchristos	union {
318942542f5fSchristos		DDXPointRec pt;
319042542f5fSchristos		uint32_t i;
319142542f5fSchristos	} u;
319242542f5fSchristos
319342542f5fSchristos	u.pt.x = pt->x + dx;
319442542f5fSchristos	u.pt.y = pt->y + dy;
319542542f5fSchristos
319642542f5fSchristos	return cmd | (uint64_t)u.i<<32;
319742542f5fSchristos}
319842542f5fSchristos
319942542f5fSchristosfastcall static void sna_blt_fill_op_points(struct sna *sna,
320042542f5fSchristos					    const struct sna_fill_op *op,
320142542f5fSchristos					    int16_t dx, int16_t dy,
320242542f5fSchristos					    const DDXPointRec *p, int n)
320342542f5fSchristos{
320442542f5fSchristos	const struct sna_blt_state *blt = &op->base.u.blt;
320542542f5fSchristos	struct kgem *kgem = &sna->kgem;
320642542f5fSchristos	uint32_t cmd;
320742542f5fSchristos
320842542f5fSchristos	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n));
320942542f5fSchristos
321042542f5fSchristos	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3211fe8aea9eSmrg		__sna_blt_fill_begin(sna, blt);
321242542f5fSchristos
321342542f5fSchristos		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
321442542f5fSchristos		sna->blt_state.fill_pixel = blt->pixel;
321542542f5fSchristos		sna->blt_state.fill_alu = blt->alu;
321642542f5fSchristos	}
321742542f5fSchristos
321842542f5fSchristos	if (!kgem_check_batch(kgem, 2))
321942542f5fSchristos		sna_blt_fill_begin(sna, blt);
322042542f5fSchristos
322142542f5fSchristos	cmd = XY_PIXEL_BLT;
322242542f5fSchristos	if (kgem->gen >= 040 && op->base.u.blt.bo[0]->tiling)
322342542f5fSchristos		cmd |= BLT_DST_TILED;
322442542f5fSchristos
322542542f5fSchristos	do {
322642542f5fSchristos		uint32_t *b = kgem->batch + kgem->nbatch;
322713496ba1Ssnj		int n_this_time, rem;
322842542f5fSchristos
322942542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
323042542f5fSchristos		n_this_time = n;
323113496ba1Ssnj		rem = kgem_batch_space(kgem);
323213496ba1Ssnj		if (2*n_this_time > rem)
323313496ba1Ssnj			n_this_time = rem / 2;
323442542f5fSchristos		assert(n_this_time);
323542542f5fSchristos		n -= n_this_time;
323642542f5fSchristos
323742542f5fSchristos		kgem->nbatch += 2 * n_this_time;
323842542f5fSchristos		assert(kgem->nbatch < kgem->surface);
323942542f5fSchristos
324042542f5fSchristos		if ((dx|dy) == 0) {
3241fe8aea9eSmrg			do {
3242fe8aea9eSmrg				*(uint64_t *)b = pt_add(cmd, p++, 0, 0);
3243fe8aea9eSmrg				b += 2;
3244fe8aea9eSmrg			} while (--n_this_time);
324542542f5fSchristos		} else {
3246fe8aea9eSmrg			do {
3247fe8aea9eSmrg				*(uint64_t *)b = pt_add(cmd, p++, dx, dy);
3248fe8aea9eSmrg				b += 2;
3249fe8aea9eSmrg			} while (--n_this_time);
325042542f5fSchristos		}
325142542f5fSchristos
325242542f5fSchristos		if (!n)
325342542f5fSchristos			return;
325442542f5fSchristos
325542542f5fSchristos		sna_blt_fill_begin(sna, blt);
325642542f5fSchristos	} while (1);
325742542f5fSchristos}
325842542f5fSchristos
325903b705cfSriastradhbool sna_blt_fill(struct sna *sna, uint8_t alu,
326003b705cfSriastradh		  struct kgem_bo *bo, int bpp,
326103b705cfSriastradh		  uint32_t pixel,
326203b705cfSriastradh		  struct sna_fill_op *fill)
326303b705cfSriastradh{
326403b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_FILL
326503b705cfSriastradh	return false;
326603b705cfSriastradh#endif
326703b705cfSriastradh
326803b705cfSriastradh	DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp));
326903b705cfSriastradh
327003b705cfSriastradh	if (!kgem_bo_can_blt(&sna->kgem, bo)) {
327103b705cfSriastradh		DBG(("%s: rejected due to incompatible Y-tiling\n",
327203b705cfSriastradh		     __FUNCTION__));
327303b705cfSriastradh		return false;
327403b705cfSriastradh	}
327503b705cfSriastradh
327603b705cfSriastradh	if (!sna_blt_fill_init(sna, &fill->base.u.blt,
327703b705cfSriastradh			       bo, bpp, alu, pixel))
327803b705cfSriastradh		return false;
327903b705cfSriastradh
328013496ba1Ssnj	assert(sna->kgem.mode == KGEM_BLT);
328103b705cfSriastradh	fill->blt   = sna_blt_fill_op_blt;
328203b705cfSriastradh	fill->box   = sna_blt_fill_op_box;
328303b705cfSriastradh	fill->boxes = sna_blt_fill_op_boxes;
328442542f5fSchristos	fill->points = sna_blt_fill_op_points;
328503b705cfSriastradh	fill->done  =
328603b705cfSriastradh		(void (*)(struct sna *, const struct sna_fill_op *))nop_done;
328703b705cfSriastradh	return true;
328803b705cfSriastradh}
328903b705cfSriastradh
329003b705cfSriastradhstatic void sna_blt_copy_op_blt(struct sna *sna,
329103b705cfSriastradh				const struct sna_copy_op *op,
329203b705cfSriastradh				int16_t src_x, int16_t src_y,
329303b705cfSriastradh				int16_t width, int16_t height,
329403b705cfSriastradh				int16_t dst_x, int16_t dst_y)
329503b705cfSriastradh{
329603b705cfSriastradh	sna_blt_copy_one(sna, &op->base.u.blt,
329703b705cfSriastradh			 src_x, src_y,
329803b705cfSriastradh			 width, height,
329903b705cfSriastradh			 dst_x, dst_y);
330003b705cfSriastradh}
330103b705cfSriastradh
330203b705cfSriastradhbool sna_blt_copy(struct sna *sna, uint8_t alu,
330303b705cfSriastradh		  struct kgem_bo *src,
330403b705cfSriastradh		  struct kgem_bo *dst,
330503b705cfSriastradh		  int bpp,
330603b705cfSriastradh		  struct sna_copy_op *op)
330703b705cfSriastradh{
330803b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_COPY
330903b705cfSriastradh	return false;
331003b705cfSriastradh#endif
331103b705cfSriastradh
331203b705cfSriastradh	if (!kgem_bo_can_blt(&sna->kgem, src))
331303b705cfSriastradh		return false;
331403b705cfSriastradh
331503b705cfSriastradh	if (!kgem_bo_can_blt(&sna->kgem, dst))
331603b705cfSriastradh		return false;
331703b705cfSriastradh
331803b705cfSriastradh	if (!sna_blt_copy_init(sna, &op->base.u.blt,
331903b705cfSriastradh			       src, dst,
332003b705cfSriastradh			       bpp, alu))
332103b705cfSriastradh		return false;
332203b705cfSriastradh
332303b705cfSriastradh	op->blt  = sna_blt_copy_op_blt;
332403b705cfSriastradh	if (sna->kgem.gen >= 060 && src == dst)
332503b705cfSriastradh		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
332603b705cfSriastradh			    gen6_blt_copy_done;
332703b705cfSriastradh	else
332803b705cfSriastradh		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
332903b705cfSriastradh			    nop_done;
333003b705cfSriastradh	return true;
333103b705cfSriastradh}
333203b705cfSriastradh
333303b705cfSriastradhstatic bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
333403b705cfSriastradh			     struct kgem_bo *bo, int bpp,
333503b705cfSriastradh			     uint32_t color,
333603b705cfSriastradh			     const BoxRec *box)
333703b705cfSriastradh{
333803b705cfSriastradh	struct kgem *kgem = &sna->kgem;
333903b705cfSriastradh	uint32_t br13, cmd, *b;
334003b705cfSriastradh	bool overwrites;
334103b705cfSriastradh
334203b705cfSriastradh	assert(kgem_bo_can_blt (kgem, bo));
334303b705cfSriastradh
334403b705cfSriastradh	DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
334503b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2));
334603b705cfSriastradh
334703b705cfSriastradh	assert(box->x1 >= 0);
334803b705cfSriastradh	assert(box->y1 >= 0);
334903b705cfSriastradh
335042542f5fSchristos	cmd = XY_COLOR_BLT | (kgem->gen >= 0100 ? 5 : 4);
335103b705cfSriastradh	br13 = bo->pitch;
335203b705cfSriastradh	if (kgem->gen >= 040 && bo->tiling) {
335303b705cfSriastradh		cmd |= BLT_DST_TILED;
335403b705cfSriastradh		br13 >>= 2;
335503b705cfSriastradh	}
335603b705cfSriastradh	assert(br13 <= MAXSHORT);
335703b705cfSriastradh
335803b705cfSriastradh	br13 |= fill_ROP[alu] << 16;
335903b705cfSriastradh	switch (bpp) {
336003b705cfSriastradh	default: assert(0);
336103b705cfSriastradh	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
336203b705cfSriastradh		 br13 |= 1 << 25; /* RGB8888 */
336303b705cfSriastradh	case 16: br13 |= 1 << 24; /* RGB565 */
336403b705cfSriastradh	case 8: break;
336503b705cfSriastradh	}
336603b705cfSriastradh
336703b705cfSriastradh	/* All too frequently one blt completely overwrites the previous */
336803b705cfSriastradh	overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
336942542f5fSchristos	if (overwrites) {
337042542f5fSchristos		if (sna->kgem.gen >= 0100) {
337142542f5fSchristos			if (kgem->nbatch >= 7 &&
337242542f5fSchristos			    kgem->batch[kgem->nbatch-7] == cmd &&
337342542f5fSchristos			    *(uint64_t *)&kgem->batch[kgem->nbatch-5] == *(const uint64_t *)box &&
337442542f5fSchristos			    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
337542542f5fSchristos				DBG(("%s: replacing last fill\n", __FUNCTION__));
337642542f5fSchristos				kgem->batch[kgem->nbatch-6] = br13;
337742542f5fSchristos				kgem->batch[kgem->nbatch-1] = color;
337842542f5fSchristos				return true;
337942542f5fSchristos			}
338042542f5fSchristos			if (kgem->nbatch >= 10 &&
338142542f5fSchristos			    (kgem->batch[kgem->nbatch-10] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
338242542f5fSchristos			    *(uint64_t *)&kgem->batch[kgem->nbatch-8] == *(const uint64_t *)box &&
338342542f5fSchristos			    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
338442542f5fSchristos				DBG(("%s: replacing last copy\n", __FUNCTION__));
338542542f5fSchristos				kgem->batch[kgem->nbatch-10] = cmd;
338642542f5fSchristos				kgem->batch[kgem->nbatch-8] = br13;
338742542f5fSchristos				kgem->batch[kgem->nbatch-4] = color;
338842542f5fSchristos				/* Keep the src bo as part of the execlist, just remove
338942542f5fSchristos				 * its relocation entry.
339042542f5fSchristos				 */
339142542f5fSchristos				kgem->nreloc--;
339242542f5fSchristos				kgem->nbatch -= 3;
339342542f5fSchristos				return true;
339442542f5fSchristos			}
339542542f5fSchristos		} else {
339642542f5fSchristos			if (kgem->nbatch >= 6 &&
339742542f5fSchristos			    kgem->batch[kgem->nbatch-6] == cmd &&
339842542f5fSchristos			    *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box &&
339942542f5fSchristos			    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
340042542f5fSchristos				DBG(("%s: replacing last fill\n", __FUNCTION__));
340142542f5fSchristos				kgem->batch[kgem->nbatch-5] = br13;
340242542f5fSchristos				kgem->batch[kgem->nbatch-1] = color;
340342542f5fSchristos				return true;
340442542f5fSchristos			}
340542542f5fSchristos			if (kgem->nbatch >= 8 &&
340642542f5fSchristos			    (kgem->batch[kgem->nbatch-8] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
340742542f5fSchristos			    *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box &&
340842542f5fSchristos			    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
340942542f5fSchristos				DBG(("%s: replacing last copy\n", __FUNCTION__));
341042542f5fSchristos				kgem->batch[kgem->nbatch-8] = cmd;
341142542f5fSchristos				kgem->batch[kgem->nbatch-7] = br13;
341242542f5fSchristos				kgem->batch[kgem->nbatch-3] = color;
341342542f5fSchristos				/* Keep the src bo as part of the execlist, just remove
341442542f5fSchristos				 * its relocation entry.
341542542f5fSchristos				 */
341642542f5fSchristos				kgem->nreloc--;
341742542f5fSchristos				kgem->nbatch -= 2;
341842542f5fSchristos				return true;
341942542f5fSchristos			}
342042542f5fSchristos		}
342103b705cfSriastradh	}
342203b705cfSriastradh
342303b705cfSriastradh	/* If we are currently emitting SCANLINES, keep doing so */
342403b705cfSriastradh	if (sna->blt_state.fill_bo == bo->unique_id &&
342503b705cfSriastradh	    sna->blt_state.fill_pixel == color &&
342603b705cfSriastradh	    (sna->blt_state.fill_alu == alu ||
342703b705cfSriastradh	     sna->blt_state.fill_alu == ~alu)) {
342803b705cfSriastradh		DBG(("%s: matching last fill, converting to scanlines\n",
342903b705cfSriastradh		     __FUNCTION__));
343003b705cfSriastradh		return false;
343103b705cfSriastradh	}
343203b705cfSriastradh
343303b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, bo);
343442542f5fSchristos	if (!kgem_check_batch(kgem, 7) ||
343503b705cfSriastradh	    !kgem_check_reloc(kgem, 1) ||
343603b705cfSriastradh	    !kgem_check_bo_fenced(kgem, bo)) {
343703b705cfSriastradh		kgem_submit(kgem);
343842542f5fSchristos		if (!kgem_check_bo_fenced(&sna->kgem, bo))
343942542f5fSchristos			return false;
344042542f5fSchristos
344103b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
344203b705cfSriastradh	}
3443fe8aea9eSmrg	kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
344403b705cfSriastradh
344542542f5fSchristos	assert(kgem_check_batch(kgem, 6));
344642542f5fSchristos	assert(kgem_check_reloc(kgem, 1));
344742542f5fSchristos
344842542f5fSchristos	assert(sna->kgem.mode == KGEM_BLT);
344903b705cfSriastradh	b = kgem->batch + kgem->nbatch;
345003b705cfSriastradh	b[0] = cmd;
345103b705cfSriastradh	b[1] = br13;
345203b705cfSriastradh	*(uint64_t *)(b+2) = *(const uint64_t *)box;
345342542f5fSchristos	if (kgem->gen >= 0100) {
345442542f5fSchristos		*(uint64_t *)(b+4) =
345542542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
345642542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
345742542f5fSchristos					 I915_GEM_DOMAIN_RENDER |
345842542f5fSchristos					 KGEM_RELOC_FENCED,
345942542f5fSchristos					 0);
346042542f5fSchristos		b[6] = color;
346142542f5fSchristos		kgem->nbatch += 7;
346242542f5fSchristos	} else {
346342542f5fSchristos		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
346442542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
346542542f5fSchristos				      I915_GEM_DOMAIN_RENDER |
346642542f5fSchristos				      KGEM_RELOC_FENCED,
346742542f5fSchristos				      0);
346842542f5fSchristos		b[5] = color;
346942542f5fSchristos		kgem->nbatch += 6;
347042542f5fSchristos	}
347103b705cfSriastradh	assert(kgem->nbatch < kgem->surface);
347203b705cfSriastradh
347303b705cfSriastradh	sna->blt_state.fill_bo = bo->unique_id;
347403b705cfSriastradh	sna->blt_state.fill_pixel = color;
347503b705cfSriastradh	sna->blt_state.fill_alu = ~alu;
347603b705cfSriastradh	return true;
347703b705cfSriastradh}
347803b705cfSriastradh
347903b705cfSriastradhbool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
348003b705cfSriastradh			struct kgem_bo *bo, int bpp,
348103b705cfSriastradh			uint32_t pixel,
348203b705cfSriastradh			const BoxRec *box, int nbox)
348303b705cfSriastradh{
348403b705cfSriastradh	struct kgem *kgem = &sna->kgem;
348503b705cfSriastradh	uint32_t br13, cmd;
348603b705cfSriastradh
348703b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_FILL_BOXES
348803b705cfSriastradh	return false;
348903b705cfSriastradh#endif
349003b705cfSriastradh
349103b705cfSriastradh	DBG(("%s (%d, %08x, %d) x %d\n",
349203b705cfSriastradh	     __FUNCTION__, bpp, pixel, alu, nbox));
349303b705cfSriastradh
349403b705cfSriastradh	if (!kgem_bo_can_blt(kgem, bo)) {
349503b705cfSriastradh		DBG(("%s: fallback -- cannot blt to dst\n", __FUNCTION__));
349603b705cfSriastradh		return false;
349703b705cfSriastradh	}
349803b705cfSriastradh
349903b705cfSriastradh	if (alu == GXclear)
350003b705cfSriastradh		pixel = 0;
350103b705cfSriastradh	else if (alu == GXcopy) {
350203b705cfSriastradh		if (pixel == 0)
350303b705cfSriastradh			alu = GXclear;
350403b705cfSriastradh		else if (pixel == -1)
350503b705cfSriastradh			alu = GXset;
350603b705cfSriastradh	}
350703b705cfSriastradh
350803b705cfSriastradh	if (nbox == 1 && sna_blt_fill_box(sna, alu, bo, bpp, pixel, box))
350903b705cfSriastradh		return true;
351003b705cfSriastradh
351103b705cfSriastradh	br13 = bo->pitch;
351203b705cfSriastradh	cmd = XY_SCANLINE_BLT;
351303b705cfSriastradh	if (kgem->gen >= 040 && bo->tiling) {
351403b705cfSriastradh		cmd |= 1 << 11;
351503b705cfSriastradh		br13 >>= 2;
351603b705cfSriastradh	}
351703b705cfSriastradh	assert(br13 <= MAXSHORT);
351803b705cfSriastradh
351903b705cfSriastradh	br13 |= 1<<31 | fill_ROP[alu] << 16;
352003b705cfSriastradh	switch (bpp) {
352103b705cfSriastradh	default: assert(0);
352203b705cfSriastradh	case 32: br13 |= 1 << 25; /* RGB8888 */
352303b705cfSriastradh	case 16: br13 |= 1 << 24; /* RGB565 */
352403b705cfSriastradh	case 8: break;
352503b705cfSriastradh	}
352603b705cfSriastradh
352703b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, bo);
352842542f5fSchristos	if (!kgem_check_batch(kgem, 14) ||
352903b705cfSriastradh	    !kgem_check_bo_fenced(kgem, bo)) {
353003b705cfSriastradh		kgem_submit(kgem);
353103b705cfSriastradh		if (!kgem_check_bo_fenced(&sna->kgem, bo))
353203b705cfSriastradh			return false;
353303b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
353403b705cfSriastradh	}
353503b705cfSriastradh
353603b705cfSriastradh	if (sna->blt_state.fill_bo != bo->unique_id ||
353703b705cfSriastradh	    sna->blt_state.fill_pixel != pixel ||
353803b705cfSriastradh	    sna->blt_state.fill_alu != alu)
353903b705cfSriastradh	{
354003b705cfSriastradh		uint32_t *b;
354103b705cfSriastradh
354213496ba1Ssnj		if (!kgem_check_batch(kgem, 24) ||
354313496ba1Ssnj		    !kgem_check_reloc(kgem, 1)) {
354403b705cfSriastradh			_kgem_submit(kgem);
354542542f5fSchristos			if (!kgem_check_bo_fenced(&sna->kgem, bo))
354642542f5fSchristos				return false;
354703b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
354803b705cfSriastradh		}
354903b705cfSriastradh
3550fe8aea9eSmrg		kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
3551fe8aea9eSmrg
355242542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
355303b705cfSriastradh		b = kgem->batch + kgem->nbatch;
355442542f5fSchristos		if (kgem->gen >= 0100) {
355542542f5fSchristos			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
355642542f5fSchristos			if (bpp == 32)
355742542f5fSchristos				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
355842542f5fSchristos			if (bo->tiling)
355942542f5fSchristos				b[0] |= BLT_DST_TILED;
356042542f5fSchristos			b[1] = br13;
356142542f5fSchristos			b[2] = 0;
356242542f5fSchristos			b[3] = 0;
356342542f5fSchristos			*(uint64_t *)(b+4) =
356442542f5fSchristos				kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
356542542f5fSchristos						 I915_GEM_DOMAIN_RENDER << 16 |
356642542f5fSchristos						 I915_GEM_DOMAIN_RENDER |
356742542f5fSchristos						 KGEM_RELOC_FENCED,
356842542f5fSchristos						 0);
356942542f5fSchristos			b[6] = pixel;
357042542f5fSchristos			b[7] = pixel;
357142542f5fSchristos			b[8] = 0;
357242542f5fSchristos			b[9] = 0;
357342542f5fSchristos			kgem->nbatch += 10;
357442542f5fSchristos		} else {
357542542f5fSchristos			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
357642542f5fSchristos			if (bpp == 32)
357742542f5fSchristos				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
357842542f5fSchristos			if (bo->tiling && kgem->gen >= 040)
357942542f5fSchristos				b[0] |= BLT_DST_TILED;
358042542f5fSchristos			b[1] = br13;
358142542f5fSchristos			b[2] = 0;
358242542f5fSchristos			b[3] = 0;
358342542f5fSchristos			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
358442542f5fSchristos					      I915_GEM_DOMAIN_RENDER << 16 |
358542542f5fSchristos					      I915_GEM_DOMAIN_RENDER |
358642542f5fSchristos					      KGEM_RELOC_FENCED,
358742542f5fSchristos					      0);
358842542f5fSchristos			b[5] = pixel;
358942542f5fSchristos			b[6] = pixel;
359042542f5fSchristos			b[7] = 0;
359142542f5fSchristos			b[8] = 0;
359242542f5fSchristos			kgem->nbatch += 9;
359342542f5fSchristos		}
359403b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
359503b705cfSriastradh
359603b705cfSriastradh		sna->blt_state.fill_bo = bo->unique_id;
359703b705cfSriastradh		sna->blt_state.fill_pixel = pixel;
359803b705cfSriastradh		sna->blt_state.fill_alu = alu;
359903b705cfSriastradh	}
360003b705cfSriastradh
360103b705cfSriastradh	do {
360213496ba1Ssnj		int nbox_this_time, rem;
360303b705cfSriastradh
360403b705cfSriastradh		nbox_this_time = nbox;
360513496ba1Ssnj		rem = kgem_batch_space(kgem);
360613496ba1Ssnj		if (3*nbox_this_time > rem)
360713496ba1Ssnj			nbox_this_time = rem / 3;
360813496ba1Ssnj		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
360913496ba1Ssnj		     __FUNCTION__, nbox_this_time, nbox, rem));
361013496ba1Ssnj		assert(nbox_this_time > 0);
361103b705cfSriastradh		nbox -= nbox_this_time;
361203b705cfSriastradh
361342542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
361403b705cfSriastradh		do {
361503b705cfSriastradh			uint32_t *b;
361603b705cfSriastradh
361703b705cfSriastradh			DBG(("%s: (%d, %d), (%d, %d): %08x\n",
361803b705cfSriastradh			     __FUNCTION__,
361903b705cfSriastradh			     box->x1, box->y1,
362003b705cfSriastradh			     box->x2, box->y2,
362103b705cfSriastradh			     pixel));
362203b705cfSriastradh
362303b705cfSriastradh			assert(box->x1 >= 0);
362403b705cfSriastradh			assert(box->y1 >= 0);
362503b705cfSriastradh			assert(box->y2 * bo->pitch <= kgem_bo_size(bo));
362603b705cfSriastradh
362703b705cfSriastradh			b = kgem->batch + kgem->nbatch;
362803b705cfSriastradh			kgem->nbatch += 3;
362903b705cfSriastradh			assert(kgem->nbatch < kgem->surface);
363003b705cfSriastradh			b[0] = cmd;
363103b705cfSriastradh			*(uint64_t *)(b+1) = *(const uint64_t *)box;
363203b705cfSriastradh			box++;
363303b705cfSriastradh		} while (--nbox_this_time);
363403b705cfSriastradh
363503b705cfSriastradh		if (nbox) {
363603b705cfSriastradh			uint32_t *b;
363703b705cfSriastradh
363803b705cfSriastradh			_kgem_submit(kgem);
363903b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
3640fe8aea9eSmrg			kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
364103b705cfSriastradh
364242542f5fSchristos			assert(sna->kgem.mode == KGEM_BLT);
364303b705cfSriastradh			b = kgem->batch + kgem->nbatch;
364442542f5fSchristos			if (kgem->gen >= 0100) {
364542542f5fSchristos				b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
364642542f5fSchristos				if (bpp == 32)
364742542f5fSchristos					b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
364842542f5fSchristos				if (bo->tiling)
364942542f5fSchristos					b[0] |= BLT_DST_TILED;
365042542f5fSchristos				b[1] = br13;
365142542f5fSchristos				b[2] = 0;
365242542f5fSchristos				b[3] = 0;
365342542f5fSchristos				*(uint64_t *)(b+4) =
365442542f5fSchristos					kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
365542542f5fSchristos							 I915_GEM_DOMAIN_RENDER << 16 |
365642542f5fSchristos							 I915_GEM_DOMAIN_RENDER |
365742542f5fSchristos							 KGEM_RELOC_FENCED,
365842542f5fSchristos							 0);
365942542f5fSchristos				b[6] = pixel;
366042542f5fSchristos				b[7] = pixel;
366142542f5fSchristos				b[8] = 0;
366242542f5fSchristos				b[9] = 0;
366342542f5fSchristos				kgem->nbatch += 10;
366442542f5fSchristos			} else {
366542542f5fSchristos				b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
366642542f5fSchristos				if (bpp == 32)
366742542f5fSchristos					b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
366842542f5fSchristos				if (bo->tiling && kgem->gen >= 040)
366942542f5fSchristos					b[0] |= BLT_DST_TILED;
367042542f5fSchristos				b[1] = br13;
367142542f5fSchristos				b[2] = 0;
367242542f5fSchristos				b[3] = 0;
367342542f5fSchristos				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
367442542f5fSchristos						      I915_GEM_DOMAIN_RENDER << 16 |
367542542f5fSchristos						      I915_GEM_DOMAIN_RENDER |
367642542f5fSchristos						      KGEM_RELOC_FENCED,
367742542f5fSchristos						      0);
367842542f5fSchristos				b[5] = pixel;
367942542f5fSchristos				b[6] = pixel;
368042542f5fSchristos				b[7] = 0;
368142542f5fSchristos				b[8] = 0;
368242542f5fSchristos				kgem->nbatch += 9;
368342542f5fSchristos			}
368403b705cfSriastradh			assert(kgem->nbatch < kgem->surface);
368513496ba1Ssnj			assert(kgem_check_batch(kgem, 3));
368603b705cfSriastradh		}
368703b705cfSriastradh	} while (nbox);
368803b705cfSriastradh
368942542f5fSchristos	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
369042542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
369103b705cfSriastradh		_kgem_submit(kgem);
369242542f5fSchristos	}
369303b705cfSriastradh
369403b705cfSriastradh	return true;
369503b705cfSriastradh}
369603b705cfSriastradh
369703b705cfSriastradhbool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
369803b705cfSriastradh			struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
369903b705cfSriastradh			struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
370003b705cfSriastradh			int bpp, const BoxRec *box, int nbox)
370103b705cfSriastradh{
370203b705cfSriastradh	struct kgem *kgem = &sna->kgem;
370303b705cfSriastradh	unsigned src_pitch, br13, cmd;
370403b705cfSriastradh
370503b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
370603b705cfSriastradh	return false;
370703b705cfSriastradh#endif
370803b705cfSriastradh
370903b705cfSriastradh	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
371003b705cfSriastradh	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
371103b705cfSriastradh	    src_bo->tiling, dst_bo->tiling,
371203b705cfSriastradh	    src_bo->pitch, dst_bo->pitch));
371342542f5fSchristos	assert(nbox);
371403b705cfSriastradh
371542542f5fSchristos	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
371603b705cfSriastradh		DBG(("%s: cannot blt to src? %d or dst? %d\n",
371703b705cfSriastradh		     __FUNCTION__,
371803b705cfSriastradh		     kgem_bo_can_blt(kgem, src_bo),
371903b705cfSriastradh		     kgem_bo_can_blt(kgem, dst_bo)));
372003b705cfSriastradh		return false;
372103b705cfSriastradh	}
372203b705cfSriastradh
372303b705cfSriastradh	cmd = XY_SRC_COPY_BLT_CMD;
372403b705cfSriastradh	if (bpp == 32)
372503b705cfSriastradh		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
372603b705cfSriastradh
372703b705cfSriastradh	src_pitch = src_bo->pitch;
372803b705cfSriastradh	if (kgem->gen >= 040 && src_bo->tiling) {
372903b705cfSriastradh		cmd |= BLT_SRC_TILED;
373003b705cfSriastradh		src_pitch >>= 2;
373103b705cfSriastradh	}
373203b705cfSriastradh	assert(src_pitch <= MAXSHORT);
373303b705cfSriastradh
373403b705cfSriastradh	br13 = dst_bo->pitch;
373503b705cfSriastradh	if (kgem->gen >= 040 && dst_bo->tiling) {
373603b705cfSriastradh		cmd |= BLT_DST_TILED;
373703b705cfSriastradh		br13 >>= 2;
373803b705cfSriastradh	}
373903b705cfSriastradh	assert(br13 <= MAXSHORT);
374003b705cfSriastradh
374103b705cfSriastradh	br13 |= copy_ROP[alu] << 16;
374203b705cfSriastradh	switch (bpp) {
374303b705cfSriastradh	default: assert(0);
374403b705cfSriastradh	case 32: br13 |= 1 << 25; /* RGB8888 */
374503b705cfSriastradh	case 16: br13 |= 1 << 24; /* RGB565 */
374603b705cfSriastradh	case 8: break;
374703b705cfSriastradh	}
374803b705cfSriastradh
374903b705cfSriastradh	/* Compare first box against a previous fill */
375042542f5fSchristos	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
375142542f5fSchristos	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
375242542f5fSchristos		if (kgem->gen >= 0100) {
375342542f5fSchristos			if (kgem->nbatch >= 7 &&
375442542f5fSchristos			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
375542542f5fSchristos			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
375642542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
375742542f5fSchristos				DBG(("%s: deleting last fill\n", __FUNCTION__));
375842542f5fSchristos				kgem->nbatch -= 7;
375942542f5fSchristos				kgem->nreloc--;
376042542f5fSchristos			}
376142542f5fSchristos		} else {
376242542f5fSchristos			if (kgem->nbatch >= 6 &&
376342542f5fSchristos			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
376442542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
376542542f5fSchristos			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
376642542f5fSchristos				DBG(("%s: deleting last fill\n", __FUNCTION__));
376742542f5fSchristos				kgem->nbatch -= 6;
376842542f5fSchristos				kgem->nreloc--;
376942542f5fSchristos			}
377042542f5fSchristos		}
377103b705cfSriastradh	}
377203b705cfSriastradh
377303b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
377442542f5fSchristos	if (!kgem_check_batch(kgem, 10) ||
377503b705cfSriastradh	    !kgem_check_reloc(kgem, 2) ||
377603b705cfSriastradh	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
377703b705cfSriastradh		kgem_submit(kgem);
377842542f5fSchristos		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
377942542f5fSchristos			DBG(("%s: not enough room in aperture, fallback to tiling copy\n", __FUNCTION__));
378003b705cfSriastradh			return sna_tiling_blt_copy_boxes(sna, alu,
378103b705cfSriastradh							 src_bo, src_dx, src_dy,
378203b705cfSriastradh							 dst_bo, dst_dx, dst_dy,
378303b705cfSriastradh							 bpp, box, nbox);
378442542f5fSchristos		}
378503b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
378603b705cfSriastradh	}
3787fe8aea9eSmrg	kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
378803b705cfSriastradh
378903b705cfSriastradh	if ((dst_dx | dst_dy) == 0) {
379042542f5fSchristos		if (kgem->gen >= 0100) {
379142542f5fSchristos			uint64_t hdr = (uint64_t)br13 << 32 | cmd | 8;
379203b705cfSriastradh			do {
379313496ba1Ssnj				int nbox_this_time, rem;
379442542f5fSchristos
379542542f5fSchristos				nbox_this_time = nbox;
379613496ba1Ssnj				rem = kgem_batch_space(kgem);
379713496ba1Ssnj				if (10*nbox_this_time > rem)
379813496ba1Ssnj					nbox_this_time = rem / 10;
379942542f5fSchristos				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
380042542f5fSchristos					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
380113496ba1Ssnj				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
380213496ba1Ssnj				     __FUNCTION__, nbox_this_time, nbox, rem));
380313496ba1Ssnj				assert(nbox_this_time > 0);
380442542f5fSchristos				nbox -= nbox_this_time;
380542542f5fSchristos
380642542f5fSchristos				assert(sna->kgem.mode == KGEM_BLT);
380742542f5fSchristos				do {
380842542f5fSchristos					uint32_t *b = kgem->batch + kgem->nbatch;
380942542f5fSchristos
381042542f5fSchristos					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
381142542f5fSchristos					     __FUNCTION__,
381242542f5fSchristos					     box->x1, box->y1,
381342542f5fSchristos					     box->x2 - box->x1, box->y2 - box->y1));
381442542f5fSchristos
381542542f5fSchristos					assert(box->x1 + src_dx >= 0);
381642542f5fSchristos					assert(box->y1 + src_dy >= 0);
381742542f5fSchristos					assert(box->x1 + src_dx <= INT16_MAX);
381842542f5fSchristos					assert(box->y1 + src_dy <= INT16_MAX);
381942542f5fSchristos
382042542f5fSchristos					assert(box->x1 >= 0);
382142542f5fSchristos					assert(box->y1 >= 0);
382242542f5fSchristos
382342542f5fSchristos					*(uint64_t *)&b[0] = hdr;
382442542f5fSchristos					*(uint64_t *)&b[2] = *(const uint64_t *)box;
382542542f5fSchristos					*(uint64_t *)(b+4) =
382642542f5fSchristos						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
382742542f5fSchristos								 I915_GEM_DOMAIN_RENDER << 16 |
382842542f5fSchristos								 I915_GEM_DOMAIN_RENDER |
382942542f5fSchristos								 KGEM_RELOC_FENCED,
383042542f5fSchristos								 0);
383142542f5fSchristos					b[6] = add2(b[2], src_dx, src_dy);
383242542f5fSchristos					b[7] = src_pitch;
383342542f5fSchristos					*(uint64_t *)(b+8) =
383442542f5fSchristos						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
383542542f5fSchristos								 I915_GEM_DOMAIN_RENDER << 16 |
383642542f5fSchristos								 KGEM_RELOC_FENCED,
383742542f5fSchristos								 0);
383842542f5fSchristos					kgem->nbatch += 10;
383942542f5fSchristos					assert(kgem->nbatch < kgem->surface);
384042542f5fSchristos					box++;
384142542f5fSchristos				} while (--nbox_this_time);
384242542f5fSchristos
384342542f5fSchristos				if (!nbox)
384442542f5fSchristos					break;
384542542f5fSchristos
384642542f5fSchristos				_kgem_submit(kgem);
384742542f5fSchristos				_kgem_set_mode(kgem, KGEM_BLT);
3848fe8aea9eSmrg				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
384942542f5fSchristos			} while (1);
385042542f5fSchristos		} else {
385142542f5fSchristos			uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6;
385242542f5fSchristos			do {
385313496ba1Ssnj				int nbox_this_time, rem;
385442542f5fSchristos
385542542f5fSchristos				nbox_this_time = nbox;
385613496ba1Ssnj				rem = kgem_batch_space(kgem);
385713496ba1Ssnj				if (8*nbox_this_time > rem)
385813496ba1Ssnj					nbox_this_time = rem / 8;
385942542f5fSchristos				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
386042542f5fSchristos					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
386113496ba1Ssnj				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
386213496ba1Ssnj				     __FUNCTION__, nbox_this_time, nbox, rem));
386313496ba1Ssnj				assert(nbox_this_time > 0);
386442542f5fSchristos				nbox -= nbox_this_time;
386542542f5fSchristos
386642542f5fSchristos				assert(sna->kgem.mode == KGEM_BLT);
386742542f5fSchristos				do {
386842542f5fSchristos					uint32_t *b = kgem->batch + kgem->nbatch;
386942542f5fSchristos
387042542f5fSchristos					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
387142542f5fSchristos					     __FUNCTION__,
387242542f5fSchristos					     box->x1, box->y1,
387342542f5fSchristos					     box->x2 - box->x1, box->y2 - box->y1));
387442542f5fSchristos
387542542f5fSchristos					assert(box->x1 + src_dx >= 0);
387642542f5fSchristos					assert(box->y1 + src_dy >= 0);
387742542f5fSchristos					assert(box->x1 + src_dx <= INT16_MAX);
387842542f5fSchristos					assert(box->y1 + src_dy <= INT16_MAX);
387942542f5fSchristos
388042542f5fSchristos					assert(box->x1 >= 0);
388142542f5fSchristos					assert(box->y1 >= 0);
388242542f5fSchristos
388342542f5fSchristos					*(uint64_t *)&b[0] = hdr;
388442542f5fSchristos					*(uint64_t *)&b[2] = *(const uint64_t *)box;
388542542f5fSchristos					b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
388642542f5fSchristos							      I915_GEM_DOMAIN_RENDER << 16 |
388742542f5fSchristos							      I915_GEM_DOMAIN_RENDER |
388842542f5fSchristos							      KGEM_RELOC_FENCED,
388942542f5fSchristos							      0);
389042542f5fSchristos					b[5] = add2(b[2], src_dx, src_dy);
389142542f5fSchristos					b[6] = src_pitch;
389242542f5fSchristos					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
389342542f5fSchristos							      I915_GEM_DOMAIN_RENDER << 16 |
389442542f5fSchristos							      KGEM_RELOC_FENCED,
389542542f5fSchristos							      0);
389642542f5fSchristos					kgem->nbatch += 8;
389742542f5fSchristos					assert(kgem->nbatch < kgem->surface);
389842542f5fSchristos					box++;
389942542f5fSchristos				} while (--nbox_this_time);
390042542f5fSchristos
390142542f5fSchristos				if (!nbox)
390242542f5fSchristos					break;
390342542f5fSchristos
390442542f5fSchristos				_kgem_submit(kgem);
390542542f5fSchristos				_kgem_set_mode(kgem, KGEM_BLT);
3906fe8aea9eSmrg				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
390742542f5fSchristos			} while (1);
390842542f5fSchristos		}
390942542f5fSchristos	} else {
391042542f5fSchristos		if (kgem->gen >= 0100) {
391142542f5fSchristos			cmd |= 8;
391242542f5fSchristos			do {
391313496ba1Ssnj				int nbox_this_time, rem;
391442542f5fSchristos
391542542f5fSchristos				nbox_this_time = nbox;
391613496ba1Ssnj				rem = kgem_batch_space(kgem);
391713496ba1Ssnj				if (10*nbox_this_time > rem)
391813496ba1Ssnj					nbox_this_time = rem / 10;
391942542f5fSchristos				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
392042542f5fSchristos					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
392113496ba1Ssnj				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
392213496ba1Ssnj				     __FUNCTION__, nbox_this_time, nbox, rem));
392313496ba1Ssnj				assert(nbox_this_time > 0);
392442542f5fSchristos				nbox -= nbox_this_time;
392542542f5fSchristos
392642542f5fSchristos				assert(sna->kgem.mode == KGEM_BLT);
392742542f5fSchristos				do {
392842542f5fSchristos					uint32_t *b = kgem->batch + kgem->nbatch;
392942542f5fSchristos
393042542f5fSchristos					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
393142542f5fSchristos					     __FUNCTION__,
393242542f5fSchristos					     box->x1, box->y1,
393342542f5fSchristos					     box->x2 - box->x1, box->y2 - box->y1));
393442542f5fSchristos
393542542f5fSchristos					assert(box->x1 + src_dx >= 0);
393642542f5fSchristos					assert(box->y1 + src_dy >= 0);
393742542f5fSchristos
393842542f5fSchristos					assert(box->x1 + dst_dx >= 0);
393942542f5fSchristos					assert(box->y1 + dst_dy >= 0);
394042542f5fSchristos
394142542f5fSchristos					b[0] = cmd;
394242542f5fSchristos					b[1] = br13;
394342542f5fSchristos					b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
394442542f5fSchristos					b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
394542542f5fSchristos					*(uint64_t *)(b+4) =
394642542f5fSchristos						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
394742542f5fSchristos								 I915_GEM_DOMAIN_RENDER << 16 |
394842542f5fSchristos								 I915_GEM_DOMAIN_RENDER |
394942542f5fSchristos								 KGEM_RELOC_FENCED,
395042542f5fSchristos								 0);
395142542f5fSchristos					b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
395242542f5fSchristos					b[7] = src_pitch;
395342542f5fSchristos					*(uint64_t *)(b+8) =
395442542f5fSchristos						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
395542542f5fSchristos								 I915_GEM_DOMAIN_RENDER << 16 |
395642542f5fSchristos								 KGEM_RELOC_FENCED,
395742542f5fSchristos								 0);
395842542f5fSchristos					kgem->nbatch += 10;
395942542f5fSchristos					assert(kgem->nbatch < kgem->surface);
396042542f5fSchristos					box++;
396142542f5fSchristos				} while (--nbox_this_time);
396242542f5fSchristos
396342542f5fSchristos				if (!nbox)
396442542f5fSchristos					break;
396542542f5fSchristos
396642542f5fSchristos				_kgem_submit(kgem);
396742542f5fSchristos				_kgem_set_mode(kgem, KGEM_BLT);
3968fe8aea9eSmrg				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
396942542f5fSchristos			} while (1);
397042542f5fSchristos		} else {
397142542f5fSchristos			cmd |= 6;
397242542f5fSchristos			do {
397313496ba1Ssnj				int nbox_this_time, rem;
397442542f5fSchristos
397542542f5fSchristos				nbox_this_time = nbox;
397613496ba1Ssnj				rem = kgem_batch_space(kgem);
397713496ba1Ssnj				if (8*nbox_this_time > rem)
397813496ba1Ssnj					nbox_this_time = rem / 8;
397942542f5fSchristos				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
398042542f5fSchristos					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
398113496ba1Ssnj				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
398213496ba1Ssnj				     __FUNCTION__, nbox_this_time, nbox, rem));
398313496ba1Ssnj				assert(nbox_this_time > 0);
398442542f5fSchristos				nbox -= nbox_this_time;
398542542f5fSchristos
398642542f5fSchristos				assert(sna->kgem.mode == KGEM_BLT);
398742542f5fSchristos				do {
398842542f5fSchristos					uint32_t *b = kgem->batch + kgem->nbatch;
398942542f5fSchristos
399042542f5fSchristos					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
399142542f5fSchristos					     __FUNCTION__,
399242542f5fSchristos					     box->x1, box->y1,
399342542f5fSchristos					     box->x2 - box->x1, box->y2 - box->y1));
399442542f5fSchristos
399542542f5fSchristos					assert(box->x1 + src_dx >= 0);
399642542f5fSchristos					assert(box->y1 + src_dy >= 0);
399742542f5fSchristos
399842542f5fSchristos					assert(box->x1 + dst_dx >= 0);
399942542f5fSchristos					assert(box->y1 + dst_dy >= 0);
400042542f5fSchristos
400142542f5fSchristos					b[0] = cmd;
400242542f5fSchristos					b[1] = br13;
400342542f5fSchristos					b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
400442542f5fSchristos					b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
400542542f5fSchristos					b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
400642542f5fSchristos							      I915_GEM_DOMAIN_RENDER << 16 |
400742542f5fSchristos							      I915_GEM_DOMAIN_RENDER |
400842542f5fSchristos							      KGEM_RELOC_FENCED,
400942542f5fSchristos							      0);
401042542f5fSchristos					b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
401142542f5fSchristos					b[6] = src_pitch;
401242542f5fSchristos					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
401342542f5fSchristos							      I915_GEM_DOMAIN_RENDER << 16 |
401442542f5fSchristos							      KGEM_RELOC_FENCED,
401542542f5fSchristos							      0);
401642542f5fSchristos					kgem->nbatch += 8;
401742542f5fSchristos					assert(kgem->nbatch < kgem->surface);
401842542f5fSchristos					box++;
401942542f5fSchristos				} while (--nbox_this_time);
402042542f5fSchristos
402142542f5fSchristos				if (!nbox)
402242542f5fSchristos					break;
402342542f5fSchristos
402442542f5fSchristos				_kgem_submit(kgem);
402542542f5fSchristos				_kgem_set_mode(kgem, KGEM_BLT);
4026fe8aea9eSmrg				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
402742542f5fSchristos			} while (1);
402842542f5fSchristos		}
402942542f5fSchristos	}
403003b705cfSriastradh
403142542f5fSchristos	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
403242542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
403342542f5fSchristos		_kgem_submit(kgem);
403442542f5fSchristos	} else if (kgem->gen >= 060 && src_bo == dst_bo && kgem_check_batch(kgem, 3)) {
403542542f5fSchristos		uint32_t *b = kgem->batch + kgem->nbatch;
403642542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
403742542f5fSchristos		b[0] = XY_SETUP_CLIP;
403842542f5fSchristos		b[1] = b[2] = 0;
403942542f5fSchristos		kgem->nbatch += 3;
404042542f5fSchristos		assert(kgem->nbatch < kgem->surface);
404142542f5fSchristos	}
404203b705cfSriastradh
404342542f5fSchristos	sna->blt_state.fill_bo = 0;
404442542f5fSchristos	return true;
404542542f5fSchristos}
404603b705cfSriastradh
404742542f5fSchristosbool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
404842542f5fSchristos				    struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
404942542f5fSchristos				    struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
405042542f5fSchristos				    int bpp, int alpha_fixup,
405142542f5fSchristos				    const BoxRec *box, int nbox)
405242542f5fSchristos{
405342542f5fSchristos	struct kgem *kgem = &sna->kgem;
405442542f5fSchristos	unsigned src_pitch, br13, cmd;
405503b705cfSriastradh
405642542f5fSchristos#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
405742542f5fSchristos	return false;
405842542f5fSchristos#endif
405903b705cfSriastradh
406042542f5fSchristos	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
406142542f5fSchristos	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
406242542f5fSchristos	    src_bo->tiling, dst_bo->tiling,
406342542f5fSchristos	    src_bo->pitch, dst_bo->pitch));
406403b705cfSriastradh
406542542f5fSchristos	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
406642542f5fSchristos		DBG(("%s: cannot blt to src? %d or dst? %d\n",
406742542f5fSchristos		     __FUNCTION__,
406842542f5fSchristos		     kgem_bo_can_blt(kgem, src_bo),
406942542f5fSchristos		     kgem_bo_can_blt(kgem, dst_bo)));
407042542f5fSchristos		return false;
407142542f5fSchristos	}
407203b705cfSriastradh
407342542f5fSchristos	cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
407442542f5fSchristos	src_pitch = src_bo->pitch;
407542542f5fSchristos	if (kgem->gen >= 040 && src_bo->tiling) {
407642542f5fSchristos		cmd |= BLT_SRC_TILED;
407742542f5fSchristos		src_pitch >>= 2;
407842542f5fSchristos	}
407942542f5fSchristos	assert(src_pitch <= MAXSHORT);
408003b705cfSriastradh
408142542f5fSchristos	br13 = dst_bo->pitch;
408242542f5fSchristos	if (kgem->gen >= 040 && dst_bo->tiling) {
408342542f5fSchristos		cmd |= BLT_DST_TILED;
408442542f5fSchristos		br13 >>= 2;
408542542f5fSchristos	}
408642542f5fSchristos	assert(br13 <= MAXSHORT);
408703b705cfSriastradh
408842542f5fSchristos	br13 |= copy_ROP[alu] << 16;
408942542f5fSchristos	switch (bpp) {
409042542f5fSchristos	default: assert(0);
409142542f5fSchristos	case 32: br13 |= 1 << 25; /* RGB8888 */
409242542f5fSchristos	case 16: br13 |= 1 << 24; /* RGB565 */
409342542f5fSchristos	case 8: break;
409442542f5fSchristos	}
409503b705cfSriastradh
409642542f5fSchristos	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
409742542f5fSchristos	if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
409842542f5fSchristos		DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__));
409942542f5fSchristos		return false;
410042542f5fSchristos	}
410103b705cfSriastradh
410242542f5fSchristos	/* Compare first box against a previous fill */
410342542f5fSchristos	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
410442542f5fSchristos	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
410542542f5fSchristos		if (kgem->gen >= 0100) {
410642542f5fSchristos			if (kgem->nbatch >= 7 &&
410742542f5fSchristos			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
410842542f5fSchristos			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
410942542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
411042542f5fSchristos				DBG(("%s: deleting last fill\n", __FUNCTION__));
411142542f5fSchristos				kgem->nbatch -= 7;
411242542f5fSchristos				kgem->nreloc--;
411342542f5fSchristos			}
411442542f5fSchristos		} else {
411542542f5fSchristos			if (kgem->nbatch >= 6 &&
411642542f5fSchristos			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
411742542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
411842542f5fSchristos			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
411942542f5fSchristos				DBG(("%s: deleting last fill\n", __FUNCTION__));
412042542f5fSchristos				kgem->nbatch -= 6;
412142542f5fSchristos				kgem->nreloc--;
412242542f5fSchristos			}
412342542f5fSchristos		}
412442542f5fSchristos	}
412503b705cfSriastradh
412642542f5fSchristos	while (nbox--) {
412742542f5fSchristos		uint32_t *b;
412803b705cfSriastradh
412942542f5fSchristos		if (!kgem_check_batch(kgem, 14) ||
413042542f5fSchristos		    !kgem_check_reloc(kgem, 2)) {
413103b705cfSriastradh			_kgem_submit(kgem);
413203b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
4133fe8aea9eSmrg			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
413442542f5fSchristos		}
413542542f5fSchristos
413642542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
413742542f5fSchristos		b = kgem->batch + kgem->nbatch;
413842542f5fSchristos		b[0] = cmd;
413942542f5fSchristos		b[1] = br13;
414042542f5fSchristos		b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
414142542f5fSchristos		b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
414242542f5fSchristos		if (sna->kgem.gen >= 0100) {
414342542f5fSchristos			*(uint64_t *)(b+4) =
414442542f5fSchristos				kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
414542542f5fSchristos						 I915_GEM_DOMAIN_RENDER << 16 |
414642542f5fSchristos						 I915_GEM_DOMAIN_RENDER |
414742542f5fSchristos						 KGEM_RELOC_FENCED,
414842542f5fSchristos						 0);
414942542f5fSchristos			b[6] = src_pitch;
415042542f5fSchristos			b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
415142542f5fSchristos			*(uint64_t *)(b+8) =
415242542f5fSchristos				kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
415342542f5fSchristos						 I915_GEM_DOMAIN_RENDER << 16 |
415442542f5fSchristos						 KGEM_RELOC_FENCED,
415542542f5fSchristos						 0);
415642542f5fSchristos			b[10] = alpha_fixup;
415742542f5fSchristos			b[11] = alpha_fixup;
415842542f5fSchristos			b[12] = 0;
415942542f5fSchristos			b[13] = 0;
416042542f5fSchristos			kgem->nbatch += 14;
416142542f5fSchristos		} else {
416242542f5fSchristos			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
416342542f5fSchristos					      I915_GEM_DOMAIN_RENDER << 16 |
416442542f5fSchristos					      I915_GEM_DOMAIN_RENDER |
416542542f5fSchristos					      KGEM_RELOC_FENCED,
416642542f5fSchristos					      0);
416742542f5fSchristos			b[5] = src_pitch;
416842542f5fSchristos			b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
416942542f5fSchristos			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
417042542f5fSchristos					      I915_GEM_DOMAIN_RENDER << 16 |
417142542f5fSchristos					      KGEM_RELOC_FENCED,
417242542f5fSchristos					      0);
417342542f5fSchristos			b[8] = alpha_fixup;
417442542f5fSchristos			b[9] = alpha_fixup;
417542542f5fSchristos			b[10] = 0;
417642542f5fSchristos			b[11] = 0;
417742542f5fSchristos			kgem->nbatch += 12;
417842542f5fSchristos		}
417942542f5fSchristos		assert(kgem->nbatch < kgem->surface);
418042542f5fSchristos		box++;
418103b705cfSriastradh	}
418203b705cfSriastradh
418303b705cfSriastradh	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
418442542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
418503b705cfSriastradh		_kgem_submit(kgem);
418603b705cfSriastradh	}
418703b705cfSriastradh
418803b705cfSriastradh	sna->blt_state.fill_bo = 0;
418903b705cfSriastradh	return true;
419003b705cfSriastradh}
419103b705cfSriastradh
419203b705cfSriastradhstatic void box_extents(const BoxRec *box, int n, BoxRec *extents)
419303b705cfSriastradh{
419403b705cfSriastradh	*extents = *box;
419503b705cfSriastradh	while (--n) {
419603b705cfSriastradh		box++;
419703b705cfSriastradh		if (box->x1 < extents->x1)
419803b705cfSriastradh			extents->x1 = box->x1;
419903b705cfSriastradh		if (box->y1 < extents->y1)
420003b705cfSriastradh			extents->y1 = box->y1;
420103b705cfSriastradh
420203b705cfSriastradh		if (box->x2 > extents->x2)
420303b705cfSriastradh			extents->x2 = box->x2;
420403b705cfSriastradh		if (box->y2 > extents->y2)
420503b705cfSriastradh			extents->y2 = box->y2;
420603b705cfSriastradh	}
420703b705cfSriastradh}
420803b705cfSriastradh
420903b705cfSriastradhbool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
421042542f5fSchristos				 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
421142542f5fSchristos				 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
421203b705cfSriastradh				 const BoxRec *box, int nbox)
421303b705cfSriastradh{
421403b705cfSriastradh	struct kgem_bo *free_bo = NULL;
421503b705cfSriastradh	bool ret;
421603b705cfSriastradh
421703b705cfSriastradh	DBG(("%s: alu=%d, n=%d\n", __FUNCTION__, alu, nbox));
421803b705cfSriastradh
421942542f5fSchristos	if (!sna_blt_compare_depth(src, dst)) {
422003b705cfSriastradh		DBG(("%s: mismatching depths %d -> %d\n",
422142542f5fSchristos		     __FUNCTION__, src->depth, dst->depth));
422203b705cfSriastradh		return false;
422303b705cfSriastradh	}
422403b705cfSriastradh
422503b705cfSriastradh	if (src_bo == dst_bo) {
422603b705cfSriastradh		DBG(("%s: dst == src\n", __FUNCTION__));
422703b705cfSriastradh
422803b705cfSriastradh		if (src_bo->tiling == I915_TILING_Y &&
4229fe8aea9eSmrg		    !sna->kgem.can_blt_y &&
423003b705cfSriastradh		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
423103b705cfSriastradh			struct kgem_bo *bo;
423203b705cfSriastradh
423303b705cfSriastradh			DBG(("%s: src is Y-tiled\n", __FUNCTION__));
423403b705cfSriastradh
423542542f5fSchristos			if (src->type != DRAWABLE_PIXMAP)
423642542f5fSchristos				return false;
423742542f5fSchristos
423842542f5fSchristos			assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
423942542f5fSchristos			bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
424003b705cfSriastradh			if (bo == NULL) {
424103b705cfSriastradh				BoxRec extents;
424203b705cfSriastradh
424303b705cfSriastradh				DBG(("%s: y-tiling conversion failed\n",
424403b705cfSriastradh				     __FUNCTION__));
424503b705cfSriastradh
424603b705cfSriastradh				box_extents(box, nbox, &extents);
424703b705cfSriastradh				free_bo = kgem_create_2d(&sna->kgem,
424803b705cfSriastradh							 extents.x2 - extents.x1,
424903b705cfSriastradh							 extents.y2 - extents.y1,
425042542f5fSchristos							 src->bitsPerPixel,
425103b705cfSriastradh							 I915_TILING_X, 0);
425203b705cfSriastradh				if (free_bo == NULL) {
425303b705cfSriastradh					DBG(("%s: fallback -- temp allocation failed\n",
425403b705cfSriastradh					     __FUNCTION__));
425503b705cfSriastradh					return false;
425603b705cfSriastradh				}
425703b705cfSriastradh
425803b705cfSriastradh				if (!sna_blt_copy_boxes(sna, GXcopy,
425903b705cfSriastradh							src_bo, src_dx, src_dy,
426003b705cfSriastradh							free_bo, -extents.x1, -extents.y1,
426142542f5fSchristos							src->bitsPerPixel,
426203b705cfSriastradh							box, nbox)) {
426303b705cfSriastradh					DBG(("%s: fallback -- temp copy failed\n",
426403b705cfSriastradh					     __FUNCTION__));
426503b705cfSriastradh					kgem_bo_destroy(&sna->kgem, free_bo);
426603b705cfSriastradh					return false;
426703b705cfSriastradh				}
426803b705cfSriastradh
426903b705cfSriastradh				src_dx = -extents.x1;
427003b705cfSriastradh				src_dy = -extents.y1;
427103b705cfSriastradh				src_bo = free_bo;
427203b705cfSriastradh			} else
427303b705cfSriastradh				dst_bo = src_bo = bo;
427403b705cfSriastradh		}
427503b705cfSriastradh	} else {
427603b705cfSriastradh		if (src_bo->tiling == I915_TILING_Y &&
4277fe8aea9eSmrg		    !sna->kgem.can_blt_y &&
427803b705cfSriastradh		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
427903b705cfSriastradh			DBG(("%s: src is y-tiled\n", __FUNCTION__));
428042542f5fSchristos			if (src->type != DRAWABLE_PIXMAP)
428142542f5fSchristos				return false;
428242542f5fSchristos			assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
428342542f5fSchristos			src_bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
428403b705cfSriastradh			if (src_bo == NULL) {
428503b705cfSriastradh				DBG(("%s: fallback -- src y-tiling conversion failed\n",
428603b705cfSriastradh				     __FUNCTION__));
428703b705cfSriastradh				return false;
428803b705cfSriastradh			}
428903b705cfSriastradh		}
429003b705cfSriastradh
429103b705cfSriastradh		if (dst_bo->tiling == I915_TILING_Y &&
4292fe8aea9eSmrg		    !sna->kgem.can_blt_y &&
429303b705cfSriastradh		    kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) {
429403b705cfSriastradh			DBG(("%s: dst is y-tiled\n", __FUNCTION__));
429542542f5fSchristos			if (dst->type != DRAWABLE_PIXMAP)
429642542f5fSchristos				return false;
429742542f5fSchristos			assert(sna_pixmap((PixmapPtr)dst)->gpu_bo == dst_bo);
429842542f5fSchristos			dst_bo = sna_pixmap_change_tiling((PixmapPtr)dst, I915_TILING_X);
429903b705cfSriastradh			if (dst_bo == NULL) {
430003b705cfSriastradh				DBG(("%s: fallback -- dst y-tiling conversion failed\n",
430103b705cfSriastradh				     __FUNCTION__));
430203b705cfSriastradh				return false;
430303b705cfSriastradh			}
430403b705cfSriastradh		}
430503b705cfSriastradh	}
430603b705cfSriastradh
430703b705cfSriastradh	ret =  sna_blt_copy_boxes(sna, alu,
430803b705cfSriastradh				  src_bo, src_dx, src_dy,
430903b705cfSriastradh				  dst_bo, dst_dx, dst_dy,
431042542f5fSchristos				  dst->bitsPerPixel,
431103b705cfSriastradh				  box, nbox);
431203b705cfSriastradh
431303b705cfSriastradh	if (free_bo)
431403b705cfSriastradh		kgem_bo_destroy(&sna->kgem, free_bo);
431503b705cfSriastradh
431603b705cfSriastradh	return ret;
431703b705cfSriastradh}
4318