sna_blt.c revision 13496ba1
103b705cfSriastradh/*
203b705cfSriastradh * Based on code from intel_uxa.c and i830_xaa.c
303b705cfSriastradh * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
403b705cfSriastradh * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
503b705cfSriastradh * Copyright (c) 2009-2011 Intel Corporation
603b705cfSriastradh *
703b705cfSriastradh * Permission is hereby granted, free of charge, to any person obtaining a
803b705cfSriastradh * copy of this software and associated documentation files (the "Software"),
903b705cfSriastradh * to deal in the Software without restriction, including without limitation
1003b705cfSriastradh * the rights to use, copy, modify, merge, publish, distribute, sublicense,
1103b705cfSriastradh * and/or sell copies of the Software, and to permit persons to whom the
1203b705cfSriastradh * Software is furnished to do so, subject to the following conditions:
1303b705cfSriastradh *
1403b705cfSriastradh * The above copyright notice and this permission notice (including the next
1503b705cfSriastradh * paragraph) shall be included in all copies or substantial portions of the
1603b705cfSriastradh * Software.
1703b705cfSriastradh *
1803b705cfSriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1903b705cfSriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2003b705cfSriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
2103b705cfSriastradh * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2203b705cfSriastradh * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2303b705cfSriastradh * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2403b705cfSriastradh * SOFTWARE.
2503b705cfSriastradh *
2603b705cfSriastradh * Authors:
2703b705cfSriastradh *    Chris Wilson <chris@chris-wilson.co.uk>
2803b705cfSriastradh *
2903b705cfSriastradh */
3003b705cfSriastradh
3103b705cfSriastradh#ifdef HAVE_CONFIG_H
3203b705cfSriastradh#include "config.h"
3303b705cfSriastradh#endif
3403b705cfSriastradh
3503b705cfSriastradh#include "sna.h"
3603b705cfSriastradh#include "sna_render.h"
3703b705cfSriastradh#include "sna_render_inline.h"
3803b705cfSriastradh#include "sna_reg.h"
3903b705cfSriastradh#include "rop.h"
4003b705cfSriastradh
4103b705cfSriastradh#define NO_BLT_COMPOSITE 0
4203b705cfSriastradh#define NO_BLT_COPY 0
4303b705cfSriastradh#define NO_BLT_COPY_BOXES 0
4403b705cfSriastradh#define NO_BLT_FILL 0
4503b705cfSriastradh#define NO_BLT_FILL_BOXES 0
4603b705cfSriastradh
4742542f5fSchristos#ifndef PICT_TYPE_BGRA
4842542f5fSchristos#define PICT_TYPE_BGRA 8
4942542f5fSchristos#endif
5042542f5fSchristos
5103b705cfSriastradhstatic const uint8_t copy_ROP[] = {
5203b705cfSriastradh	ROP_0,                  /* GXclear */
5303b705cfSriastradh	ROP_DSa,                /* GXand */
5403b705cfSriastradh	ROP_SDna,               /* GXandReverse */
5503b705cfSriastradh	ROP_S,                  /* GXcopy */
5603b705cfSriastradh	ROP_DSna,               /* GXandInverted */
5703b705cfSriastradh	ROP_D,                  /* GXnoop */
5803b705cfSriastradh	ROP_DSx,                /* GXxor */
5903b705cfSriastradh	ROP_DSo,                /* GXor */
6003b705cfSriastradh	ROP_DSon,               /* GXnor */
6103b705cfSriastradh	ROP_DSxn,               /* GXequiv */
6203b705cfSriastradh	ROP_Dn,                 /* GXinvert */
6303b705cfSriastradh	ROP_SDno,               /* GXorReverse */
6403b705cfSriastradh	ROP_Sn,                 /* GXcopyInverted */
6503b705cfSriastradh	ROP_DSno,               /* GXorInverted */
6603b705cfSriastradh	ROP_DSan,               /* GXnand */
6703b705cfSriastradh	ROP_1                   /* GXset */
6803b705cfSriastradh};
6903b705cfSriastradh
7003b705cfSriastradhstatic const uint8_t fill_ROP[] = {
7103b705cfSriastradh	ROP_0,
7203b705cfSriastradh	ROP_DPa,
7303b705cfSriastradh	ROP_PDna,
7403b705cfSriastradh	ROP_P,
7503b705cfSriastradh	ROP_DPna,
7603b705cfSriastradh	ROP_D,
7703b705cfSriastradh	ROP_DPx,
7803b705cfSriastradh	ROP_DPo,
7903b705cfSriastradh	ROP_DPon,
8003b705cfSriastradh	ROP_PDxn,
8103b705cfSriastradh	ROP_Dn,
8203b705cfSriastradh	ROP_PDno,
8303b705cfSriastradh	ROP_Pn,
8403b705cfSriastradh	ROP_DPno,
8503b705cfSriastradh	ROP_DPan,
8603b705cfSriastradh	ROP_1
8703b705cfSriastradh};
8803b705cfSriastradh
8903b705cfSriastradhstatic void nop_done(struct sna *sna, const struct sna_composite_op *op)
9003b705cfSriastradh{
9103b705cfSriastradh	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
9242542f5fSchristos	if (sna->kgem.nexec > 1 && __kgem_ring_empty(&sna->kgem)) {
9342542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
9403b705cfSriastradh		_kgem_submit(&sna->kgem);
9542542f5fSchristos	}
9603b705cfSriastradh	(void)op;
9703b705cfSriastradh}
9803b705cfSriastradh
9903b705cfSriastradhstatic void gen6_blt_copy_done(struct sna *sna, const struct sna_composite_op *op)
10003b705cfSriastradh{
10103b705cfSriastradh	struct kgem *kgem = &sna->kgem;
10203b705cfSriastradh
10303b705cfSriastradh	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
10403b705cfSriastradh	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
10542542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
10603b705cfSriastradh		_kgem_submit(kgem);
10703b705cfSriastradh		return;
10803b705cfSriastradh	}
10903b705cfSriastradh
11003b705cfSriastradh	if (kgem_check_batch(kgem, 3)) {
11103b705cfSriastradh		uint32_t *b = kgem->batch + kgem->nbatch;
11242542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
11303b705cfSriastradh		b[0] = XY_SETUP_CLIP;
11403b705cfSriastradh		b[1] = b[2] = 0;
11503b705cfSriastradh		kgem->nbatch += 3;
11603b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
11703b705cfSriastradh	}
11803b705cfSriastradh	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
11903b705cfSriastradh	(void)op;
12003b705cfSriastradh}
12103b705cfSriastradh
12203b705cfSriastradhstatic bool sna_blt_fill_init(struct sna *sna,
12303b705cfSriastradh			      struct sna_blt_state *blt,
12403b705cfSriastradh			      struct kgem_bo *bo,
12503b705cfSriastradh			      int bpp,
12603b705cfSriastradh			      uint8_t alu,
12703b705cfSriastradh			      uint32_t pixel)
12803b705cfSriastradh{
12903b705cfSriastradh	struct kgem *kgem = &sna->kgem;
13003b705cfSriastradh
13103b705cfSriastradh	assert(kgem_bo_can_blt (kgem, bo));
13203b705cfSriastradh	assert(bo->tiling != I915_TILING_Y);
13303b705cfSriastradh	blt->bo[0] = bo;
13403b705cfSriastradh
13503b705cfSriastradh	blt->br13 = bo->pitch;
13603b705cfSriastradh	blt->cmd = XY_SCANLINE_BLT;
13703b705cfSriastradh	if (kgem->gen >= 040 && bo->tiling) {
13803b705cfSriastradh		blt->cmd |= BLT_DST_TILED;
13903b705cfSriastradh		blt->br13 >>= 2;
14003b705cfSriastradh	}
14103b705cfSriastradh	assert(blt->br13 <= MAXSHORT);
14203b705cfSriastradh
14303b705cfSriastradh	if (alu == GXclear)
14403b705cfSriastradh		pixel = 0;
14503b705cfSriastradh	else if (alu == GXcopy) {
14603b705cfSriastradh		if (pixel == 0)
14703b705cfSriastradh			alu = GXclear;
14803b705cfSriastradh		else if (pixel == -1)
14903b705cfSriastradh			alu = GXset;
15003b705cfSriastradh	}
15103b705cfSriastradh
15203b705cfSriastradh	blt->br13 |= 1<<31 | (fill_ROP[alu] << 16);
15303b705cfSriastradh	switch (bpp) {
15403b705cfSriastradh	default: assert(0);
15503b705cfSriastradh	case 32: blt->br13 |= 1 << 25; /* RGB8888 */
15603b705cfSriastradh	case 16: blt->br13 |= 1 << 24; /* RGB565 */
15703b705cfSriastradh	case 8: break;
15803b705cfSriastradh	}
15903b705cfSriastradh
16003b705cfSriastradh	blt->pixel = pixel;
16103b705cfSriastradh	blt->bpp = bpp;
16242542f5fSchristos	blt->alu = alu;
16303b705cfSriastradh
16403b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, bo);
16542542f5fSchristos	if (!kgem_check_batch(kgem, 14) ||
16603b705cfSriastradh	    !kgem_check_bo_fenced(kgem, bo)) {
16703b705cfSriastradh		kgem_submit(kgem);
16803b705cfSriastradh		if (!kgem_check_bo_fenced(kgem, bo))
16903b705cfSriastradh			return false;
17003b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
17103b705cfSriastradh	}
17203b705cfSriastradh
17303b705cfSriastradh	if (sna->blt_state.fill_bo != bo->unique_id ||
17403b705cfSriastradh	    sna->blt_state.fill_pixel != pixel ||
17503b705cfSriastradh	    sna->blt_state.fill_alu != alu)
17603b705cfSriastradh	{
17703b705cfSriastradh		uint32_t *b;
17803b705cfSriastradh
17913496ba1Ssnj		if (!kgem_check_batch(kgem, 24) ||
18013496ba1Ssnj		    !kgem_check_reloc(kgem, 1)) {
18103b705cfSriastradh			_kgem_submit(kgem);
18242542f5fSchristos			if (!kgem_check_bo_fenced(kgem, bo))
18342542f5fSchristos				return false;
18403b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
18503b705cfSriastradh		}
18603b705cfSriastradh
18742542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
18803b705cfSriastradh		b = kgem->batch + kgem->nbatch;
18942542f5fSchristos		if (sna->kgem.gen >= 0100) {
19042542f5fSchristos			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
19142542f5fSchristos			if (bpp == 32)
19242542f5fSchristos				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
19342542f5fSchristos			if (bo->tiling)
19442542f5fSchristos				b[0] |= BLT_DST_TILED;
19542542f5fSchristos			b[1] = blt->br13;
19642542f5fSchristos			b[2] = 0;
19742542f5fSchristos			b[3] = 0;
19842542f5fSchristos			*(uint64_t *)(b+4) =
19942542f5fSchristos				kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
20042542f5fSchristos						 I915_GEM_DOMAIN_RENDER << 16 |
20142542f5fSchristos						 I915_GEM_DOMAIN_RENDER |
20242542f5fSchristos						 KGEM_RELOC_FENCED,
20342542f5fSchristos						 0);
20442542f5fSchristos			b[6] = pixel;
20542542f5fSchristos			b[7] = pixel;
20642542f5fSchristos			b[8] = 0;
20742542f5fSchristos			b[9] = 0;
20842542f5fSchristos			kgem->nbatch += 10;
20942542f5fSchristos		} else {
21042542f5fSchristos			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
21142542f5fSchristos			if (bpp == 32)
21242542f5fSchristos				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
21342542f5fSchristos			if (bo->tiling && kgem->gen >= 040)
21442542f5fSchristos				b[0] |= BLT_DST_TILED;
21542542f5fSchristos			b[1] = blt->br13;
21642542f5fSchristos			b[2] = 0;
21742542f5fSchristos			b[3] = 0;
21842542f5fSchristos			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
21942542f5fSchristos					      I915_GEM_DOMAIN_RENDER << 16 |
22042542f5fSchristos					      I915_GEM_DOMAIN_RENDER |
22142542f5fSchristos					      KGEM_RELOC_FENCED,
22242542f5fSchristos					      0);
22342542f5fSchristos			b[5] = pixel;
22442542f5fSchristos			b[6] = pixel;
22542542f5fSchristos			b[7] = 0;
22642542f5fSchristos			b[8] = 0;
22742542f5fSchristos			kgem->nbatch += 9;
22842542f5fSchristos		}
22903b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
23003b705cfSriastradh
23103b705cfSriastradh		sna->blt_state.fill_bo = bo->unique_id;
23203b705cfSriastradh		sna->blt_state.fill_pixel = pixel;
23303b705cfSriastradh		sna->blt_state.fill_alu = alu;
23403b705cfSriastradh	}
23503b705cfSriastradh
23613496ba1Ssnj	assert(sna->kgem.mode == KGEM_BLT);
23703b705cfSriastradh	return true;
23803b705cfSriastradh}
23903b705cfSriastradh
24003b705cfSriastradhnoinline static void sna_blt_fill_begin(struct sna *sna,
24103b705cfSriastradh					const struct sna_blt_state *blt)
24203b705cfSriastradh{
24303b705cfSriastradh	struct kgem *kgem = &sna->kgem;
24403b705cfSriastradh	uint32_t *b;
24503b705cfSriastradh
24642542f5fSchristos	if (kgem->nreloc) {
24742542f5fSchristos		_kgem_submit(kgem);
24842542f5fSchristos		_kgem_set_mode(kgem, KGEM_BLT);
24942542f5fSchristos		assert(kgem->nbatch == 0);
25042542f5fSchristos	}
25103b705cfSriastradh
25242542f5fSchristos	assert(kgem->mode == KGEM_BLT);
25342542f5fSchristos	b = kgem->batch + kgem->nbatch;
25442542f5fSchristos	if (sna->kgem.gen >= 0100) {
25542542f5fSchristos		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
25642542f5fSchristos		if (blt->bpp == 32)
25742542f5fSchristos			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
25842542f5fSchristos		if (blt->bo[0]->tiling)
25942542f5fSchristos			b[0] |= BLT_DST_TILED;
26042542f5fSchristos		b[1] = blt->br13;
26142542f5fSchristos		b[2] = 0;
26242542f5fSchristos		b[3] = 0;
26342542f5fSchristos		*(uint32_t *)(b+4) =
26442542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[0],
26542542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
26642542f5fSchristos					 I915_GEM_DOMAIN_RENDER |
26742542f5fSchristos					 KGEM_RELOC_FENCED,
26842542f5fSchristos					 0);
26942542f5fSchristos		b[6] = blt->pixel;
27042542f5fSchristos		b[7] = blt->pixel;
27142542f5fSchristos		b[8] = 0;
27242542f5fSchristos		b[9] = 0;
27342542f5fSchristos		kgem->nbatch += 10;
27442542f5fSchristos	} else {
27542542f5fSchristos		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
27642542f5fSchristos		if (blt->bpp == 32)
27742542f5fSchristos			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
27842542f5fSchristos		if (blt->bo[0]->tiling && kgem->gen >= 040)
27942542f5fSchristos			b[0] |= BLT_DST_TILED;
28042542f5fSchristos		b[1] = blt->br13;
28142542f5fSchristos		b[2] = 0;
28242542f5fSchristos		b[3] = 0;
28342542f5fSchristos		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
28442542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
28542542f5fSchristos				      I915_GEM_DOMAIN_RENDER |
28642542f5fSchristos				      KGEM_RELOC_FENCED,
28742542f5fSchristos				      0);
28842542f5fSchristos		b[5] = blt->pixel;
28942542f5fSchristos		b[6] = blt->pixel;
29042542f5fSchristos		b[7] = 0;
29142542f5fSchristos		b[8] = 0;
29242542f5fSchristos		kgem->nbatch += 9;
29342542f5fSchristos	}
29403b705cfSriastradh}
29503b705cfSriastradh
29603b705cfSriastradhinline static void sna_blt_fill_one(struct sna *sna,
29703b705cfSriastradh				    const struct sna_blt_state *blt,
29803b705cfSriastradh				    int16_t x, int16_t y,
29903b705cfSriastradh				    int16_t width, int16_t height)
30003b705cfSriastradh{
30103b705cfSriastradh	struct kgem *kgem = &sna->kgem;
30203b705cfSriastradh	uint32_t *b;
30303b705cfSriastradh
30403b705cfSriastradh	DBG(("%s: (%d, %d) x (%d, %d): %08x\n",
30503b705cfSriastradh	     __FUNCTION__, x, y, width, height, blt->pixel));
30603b705cfSriastradh
30703b705cfSriastradh	assert(x >= 0);
30803b705cfSriastradh	assert(y >= 0);
30903b705cfSriastradh	assert((y+height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
31003b705cfSriastradh
31103b705cfSriastradh	if (!kgem_check_batch(kgem, 3))
31203b705cfSriastradh		sna_blt_fill_begin(sna, blt);
31303b705cfSriastradh
31442542f5fSchristos	assert(sna->kgem.mode == KGEM_BLT);
31503b705cfSriastradh	b = kgem->batch + kgem->nbatch;
31603b705cfSriastradh	kgem->nbatch += 3;
31703b705cfSriastradh	assert(kgem->nbatch < kgem->surface);
31803b705cfSriastradh
31903b705cfSriastradh	b[0] = blt->cmd;
32003b705cfSriastradh	b[1] = y << 16 | x;
32103b705cfSriastradh	b[2] = b[1] + (height << 16 | width);
32203b705cfSriastradh}
32303b705cfSriastradh
32403b705cfSriastradhstatic bool sna_blt_copy_init(struct sna *sna,
32503b705cfSriastradh			      struct sna_blt_state *blt,
32603b705cfSriastradh			      struct kgem_bo *src,
32703b705cfSriastradh			      struct kgem_bo *dst,
32803b705cfSriastradh			      int bpp,
32903b705cfSriastradh			      uint8_t alu)
33003b705cfSriastradh{
33103b705cfSriastradh	struct kgem *kgem = &sna->kgem;
33203b705cfSriastradh
33303b705cfSriastradh	assert(kgem_bo_can_blt (kgem, src));
33403b705cfSriastradh	assert(kgem_bo_can_blt (kgem, dst));
33503b705cfSriastradh
33603b705cfSriastradh	blt->bo[0] = src;
33703b705cfSriastradh	blt->bo[1] = dst;
33803b705cfSriastradh
33942542f5fSchristos	blt->cmd = XY_SRC_COPY_BLT_CMD | (kgem->gen >= 0100 ? 8 : 6);
34003b705cfSriastradh	if (bpp == 32)
34103b705cfSriastradh		blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
34203b705cfSriastradh
34303b705cfSriastradh	blt->pitch[0] = src->pitch;
34403b705cfSriastradh	if (kgem->gen >= 040 && src->tiling) {
34503b705cfSriastradh		blt->cmd |= BLT_SRC_TILED;
34603b705cfSriastradh		blt->pitch[0] >>= 2;
34703b705cfSriastradh	}
34803b705cfSriastradh	assert(blt->pitch[0] <= MAXSHORT);
34903b705cfSriastradh
35003b705cfSriastradh	blt->pitch[1] = dst->pitch;
35103b705cfSriastradh	if (kgem->gen >= 040 && dst->tiling) {
35203b705cfSriastradh		blt->cmd |= BLT_DST_TILED;
35303b705cfSriastradh		blt->pitch[1] >>= 2;
35403b705cfSriastradh	}
35503b705cfSriastradh	assert(blt->pitch[1] <= MAXSHORT);
35603b705cfSriastradh
35703b705cfSriastradh	blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
35803b705cfSriastradh	blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1];
35903b705cfSriastradh	switch (bpp) {
36003b705cfSriastradh	default: assert(0);
36103b705cfSriastradh	case 32: blt->br13 |= 1 << 25; /* RGB8888 */
36203b705cfSriastradh	case 16: blt->br13 |= 1 << 24; /* RGB565 */
36303b705cfSriastradh	case 8: break;
36403b705cfSriastradh	}
36503b705cfSriastradh
36603b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, dst);
36703b705cfSriastradh	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
36803b705cfSriastradh		kgem_submit(kgem);
36903b705cfSriastradh		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
37003b705cfSriastradh			return false;
37103b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
37203b705cfSriastradh	}
37303b705cfSriastradh
37403b705cfSriastradh	sna->blt_state.fill_bo = 0;
37503b705cfSriastradh	return true;
37603b705cfSriastradh}
37703b705cfSriastradh
37803b705cfSriastradhstatic bool sna_blt_alpha_fixup_init(struct sna *sna,
37903b705cfSriastradh				     struct sna_blt_state *blt,
38003b705cfSriastradh				     struct kgem_bo *src,
38103b705cfSriastradh				     struct kgem_bo *dst,
38203b705cfSriastradh				     int bpp, uint32_t alpha)
38303b705cfSriastradh{
38403b705cfSriastradh	struct kgem *kgem = &sna->kgem;
38503b705cfSriastradh
38642542f5fSchristos	DBG(("%s: dst handle=%d, src handle=%d, bpp=%d, fixup=%08x\n",
38742542f5fSchristos	     __FUNCTION__, dst->handle, src->handle, bpp, alpha));
38842542f5fSchristos	assert(kgem_bo_can_blt(kgem, src));
38942542f5fSchristos	assert(kgem_bo_can_blt(kgem, dst));
39003b705cfSriastradh
39103b705cfSriastradh	blt->bo[0] = src;
39203b705cfSriastradh	blt->bo[1] = dst;
39303b705cfSriastradh
39442542f5fSchristos	blt->cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
39503b705cfSriastradh	blt->pitch[0] = src->pitch;
39603b705cfSriastradh	if (kgem->gen >= 040 && src->tiling) {
39703b705cfSriastradh		blt->cmd |= BLT_SRC_TILED;
39803b705cfSriastradh		blt->pitch[0] >>= 2;
39903b705cfSriastradh	}
40003b705cfSriastradh	assert(blt->pitch[0] <= MAXSHORT);
40103b705cfSriastradh
40203b705cfSriastradh	blt->pitch[1] = dst->pitch;
40303b705cfSriastradh	if (kgem->gen >= 040 && dst->tiling) {
40403b705cfSriastradh		blt->cmd |= BLT_DST_TILED;
40503b705cfSriastradh		blt->pitch[1] >>= 2;
40603b705cfSriastradh	}
40703b705cfSriastradh	assert(blt->pitch[1] <= MAXSHORT);
40803b705cfSriastradh
40903b705cfSriastradh	blt->overwrites = 1;
41003b705cfSriastradh	blt->br13 = (0xfc << 16) | blt->pitch[1];
41103b705cfSriastradh	switch (bpp) {
41203b705cfSriastradh	default: assert(0);
41303b705cfSriastradh	case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
41403b705cfSriastradh		 blt->br13 |= 1 << 25; /* RGB8888 */
41503b705cfSriastradh	case 16: blt->br13 |= 1 << 24; /* RGB565 */
41603b705cfSriastradh	case 8: break;
41703b705cfSriastradh	}
41803b705cfSriastradh	blt->pixel = alpha;
41903b705cfSriastradh
42003b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, dst);
42103b705cfSriastradh	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
42203b705cfSriastradh		kgem_submit(kgem);
42303b705cfSriastradh		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
42403b705cfSriastradh			return false;
42503b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
42603b705cfSriastradh	}
42703b705cfSriastradh
42803b705cfSriastradh	sna->blt_state.fill_bo = 0;
42903b705cfSriastradh	return true;
43003b705cfSriastradh}
43103b705cfSriastradh
43203b705cfSriastradhstatic void sna_blt_alpha_fixup_one(struct sna *sna,
43303b705cfSriastradh				    const struct sna_blt_state *blt,
43403b705cfSriastradh				    int src_x, int src_y,
43503b705cfSriastradh				    int width, int height,
43603b705cfSriastradh				    int dst_x, int dst_y)
43703b705cfSriastradh{
43803b705cfSriastradh	struct kgem *kgem = &sna->kgem;
43903b705cfSriastradh	uint32_t *b;
44003b705cfSriastradh
44103b705cfSriastradh	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
44203b705cfSriastradh	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
44303b705cfSriastradh
44403b705cfSriastradh	assert(src_x >= 0);
44503b705cfSriastradh	assert(src_y >= 0);
44603b705cfSriastradh	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
44703b705cfSriastradh	assert(dst_x >= 0);
44803b705cfSriastradh	assert(dst_y >= 0);
44903b705cfSriastradh	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
45003b705cfSriastradh	assert(width > 0);
45103b705cfSriastradh	assert(height > 0);
45203b705cfSriastradh
45342542f5fSchristos	if (!kgem_check_batch(kgem, 14) ||
45403b705cfSriastradh	    !kgem_check_reloc(kgem, 2)) {
45503b705cfSriastradh		_kgem_submit(kgem);
45603b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
45703b705cfSriastradh	}
45803b705cfSriastradh
45942542f5fSchristos	assert(sna->kgem.mode == KGEM_BLT);
46003b705cfSriastradh	b = kgem->batch + kgem->nbatch;
46103b705cfSriastradh	b[0] = blt->cmd;
46203b705cfSriastradh	b[1] = blt->br13;
46303b705cfSriastradh	b[2] = (dst_y << 16) | dst_x;
46403b705cfSriastradh	b[3] = ((dst_y + height) << 16) | (dst_x + width);
46542542f5fSchristos	if (sna->kgem.gen >= 0100) {
46642542f5fSchristos		*(uint64_t *)(b+4) =
46742542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
46842542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
46942542f5fSchristos					 I915_GEM_DOMAIN_RENDER |
47042542f5fSchristos					 KGEM_RELOC_FENCED,
47142542f5fSchristos					 0);
47242542f5fSchristos		b[6] = blt->pitch[0];
47342542f5fSchristos		b[7] = (src_y << 16) | src_x;
47442542f5fSchristos		*(uint64_t *)(b+8) =
47542542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
47642542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
47742542f5fSchristos					 KGEM_RELOC_FENCED,
47842542f5fSchristos					 0);
47942542f5fSchristos		b[10] = blt->pixel;
48042542f5fSchristos		b[11] = blt->pixel;
48142542f5fSchristos		b[12] = 0;
48242542f5fSchristos		b[13] = 0;
48342542f5fSchristos		kgem->nbatch += 14;
48442542f5fSchristos	} else {
48542542f5fSchristos		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
48642542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
48742542f5fSchristos				      I915_GEM_DOMAIN_RENDER |
48842542f5fSchristos				      KGEM_RELOC_FENCED,
48942542f5fSchristos				      0);
49042542f5fSchristos		b[5] = blt->pitch[0];
49142542f5fSchristos		b[6] = (src_y << 16) | src_x;
49242542f5fSchristos		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
49342542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
49442542f5fSchristos				      KGEM_RELOC_FENCED,
49542542f5fSchristos				      0);
49642542f5fSchristos		b[8] = blt->pixel;
49742542f5fSchristos		b[9] = blt->pixel;
49842542f5fSchristos		b[10] = 0;
49942542f5fSchristos		b[11] = 0;
50042542f5fSchristos		kgem->nbatch += 12;
50142542f5fSchristos	}
50203b705cfSriastradh	assert(kgem->nbatch < kgem->surface);
50303b705cfSriastradh}
50403b705cfSriastradh
50503b705cfSriastradhstatic void sna_blt_copy_one(struct sna *sna,
50603b705cfSriastradh			     const struct sna_blt_state *blt,
50703b705cfSriastradh			     int src_x, int src_y,
50803b705cfSriastradh			     int width, int height,
50903b705cfSriastradh			     int dst_x, int dst_y)
51003b705cfSriastradh{
51103b705cfSriastradh	struct kgem *kgem = &sna->kgem;
51203b705cfSriastradh	uint32_t *b;
51303b705cfSriastradh
51403b705cfSriastradh	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
51503b705cfSriastradh	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
51603b705cfSriastradh
51703b705cfSriastradh	assert(src_x >= 0);
51803b705cfSriastradh	assert(src_y >= 0);
51903b705cfSriastradh	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
52003b705cfSriastradh	assert(dst_x >= 0);
52103b705cfSriastradh	assert(dst_y >= 0);
52203b705cfSriastradh	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
52303b705cfSriastradh	assert(width > 0);
52403b705cfSriastradh	assert(height > 0);
52503b705cfSriastradh
52603b705cfSriastradh	/* Compare against a previous fill */
52742542f5fSchristos	if (blt->overwrites &&
52803b705cfSriastradh	    kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) {
52942542f5fSchristos		if (sna->kgem.gen >= 0100) {
53042542f5fSchristos			if (kgem->nbatch >= 7 &&
53142542f5fSchristos			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
53242542f5fSchristos			    kgem->batch[kgem->nbatch-5] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
53342542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
53442542f5fSchristos				DBG(("%s: replacing last fill\n", __FUNCTION__));
53542542f5fSchristos				if (kgem_check_batch(kgem, 3)) {
53642542f5fSchristos					assert(kgem->mode == KGEM_BLT);
53742542f5fSchristos					b = kgem->batch + kgem->nbatch - 7;
53842542f5fSchristos					b[0] = blt->cmd;
53942542f5fSchristos					b[1] = blt->br13;
54042542f5fSchristos					b[6] = (src_y << 16) | src_x;
54142542f5fSchristos					b[7] = blt->pitch[0];
54242542f5fSchristos					*(uint64_t *)(b+8) =
54342542f5fSchristos						kgem_add_reloc64(kgem, kgem->nbatch + 8 - 7, blt->bo[0],
54442542f5fSchristos								 I915_GEM_DOMAIN_RENDER << 16 |
54542542f5fSchristos								 KGEM_RELOC_FENCED,
54642542f5fSchristos								 0);
54742542f5fSchristos					kgem->nbatch += 3;
54842542f5fSchristos					assert(kgem->nbatch < kgem->surface);
54942542f5fSchristos					return;
55042542f5fSchristos				}
55142542f5fSchristos				kgem->nbatch -= 7;
55242542f5fSchristos				kgem->nreloc--;
55342542f5fSchristos			}
55442542f5fSchristos		} else {
55542542f5fSchristos			if (kgem->nbatch >= 6 &&
55642542f5fSchristos			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
55742542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
55842542f5fSchristos			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
55942542f5fSchristos				DBG(("%s: replacing last fill\n", __FUNCTION__));
56042542f5fSchristos				if (kgem_check_batch(kgem, 8-6)) {
56142542f5fSchristos					assert(kgem->mode == KGEM_BLT);
56242542f5fSchristos					b = kgem->batch + kgem->nbatch - 6;
56342542f5fSchristos					b[0] = blt->cmd;
56442542f5fSchristos					b[1] = blt->br13;
56542542f5fSchristos					b[5] = (src_y << 16) | src_x;
56642542f5fSchristos					b[6] = blt->pitch[0];
56742542f5fSchristos					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, blt->bo[0],
56842542f5fSchristos							      I915_GEM_DOMAIN_RENDER << 16 |
56942542f5fSchristos							      KGEM_RELOC_FENCED,
57042542f5fSchristos							      0);
57142542f5fSchristos					kgem->nbatch += 8 - 6;
57242542f5fSchristos					assert(kgem->nbatch < kgem->surface);
57342542f5fSchristos					return;
57442542f5fSchristos				}
57542542f5fSchristos				kgem->nbatch -= 6;
57642542f5fSchristos				kgem->nreloc--;
57742542f5fSchristos			}
57803b705cfSriastradh		}
57903b705cfSriastradh	}
58003b705cfSriastradh
58142542f5fSchristos	if (!kgem_check_batch(kgem, 10) ||
58203b705cfSriastradh	    !kgem_check_reloc(kgem, 2)) {
58303b705cfSriastradh		_kgem_submit(kgem);
58403b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
58503b705cfSriastradh	}
58603b705cfSriastradh
58742542f5fSchristos	assert(sna->kgem.mode == KGEM_BLT);
58803b705cfSriastradh	b = kgem->batch + kgem->nbatch;
58903b705cfSriastradh	b[0] = blt->cmd;
59003b705cfSriastradh	b[1] = blt->br13;
59103b705cfSriastradh	b[2] = (dst_y << 16) | dst_x;
59203b705cfSriastradh	b[3] = ((dst_y + height) << 16) | (dst_x + width);
59342542f5fSchristos	if (kgem->gen >= 0100) {
59442542f5fSchristos		*(uint64_t *)(b+4) =
59542542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
59642542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
59742542f5fSchristos					 I915_GEM_DOMAIN_RENDER |
59842542f5fSchristos					 KGEM_RELOC_FENCED,
59942542f5fSchristos					 0);
60042542f5fSchristos		b[6] = (src_y << 16) | src_x;
60142542f5fSchristos		b[7] = blt->pitch[0];
60242542f5fSchristos		*(uint64_t *)(b+8) =
60342542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
60442542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
60542542f5fSchristos					 KGEM_RELOC_FENCED,
60642542f5fSchristos					 0);
60742542f5fSchristos		kgem->nbatch += 10;
60842542f5fSchristos	} else {
60942542f5fSchristos		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
61042542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
61142542f5fSchristos				      I915_GEM_DOMAIN_RENDER |
61242542f5fSchristos				      KGEM_RELOC_FENCED,
61342542f5fSchristos				      0);
61442542f5fSchristos		b[5] = (src_y << 16) | src_x;
61542542f5fSchristos		b[6] = blt->pitch[0];
61642542f5fSchristos		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
61742542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
61842542f5fSchristos				      KGEM_RELOC_FENCED,
61942542f5fSchristos				      0);
62042542f5fSchristos		kgem->nbatch += 8;
62142542f5fSchristos	}
62203b705cfSriastradh	assert(kgem->nbatch < kgem->surface);
62303b705cfSriastradh}
62403b705cfSriastradh
62503b705cfSriastradhbool
62603b705cfSriastradhsna_get_rgba_from_pixel(uint32_t pixel,
62703b705cfSriastradh			uint16_t *red,
62803b705cfSriastradh			uint16_t *green,
62903b705cfSriastradh			uint16_t *blue,
63003b705cfSriastradh			uint16_t *alpha,
63103b705cfSriastradh			uint32_t format)
63203b705cfSriastradh{
63303b705cfSriastradh	int rbits, bbits, gbits, abits;
63403b705cfSriastradh	int rshift, bshift, gshift, ashift;
63503b705cfSriastradh
63603b705cfSriastradh	rbits = PICT_FORMAT_R(format);
63703b705cfSriastradh	gbits = PICT_FORMAT_G(format);
63803b705cfSriastradh	bbits = PICT_FORMAT_B(format);
63903b705cfSriastradh	abits = PICT_FORMAT_A(format);
64003b705cfSriastradh
64103b705cfSriastradh	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
64203b705cfSriastradh		rshift = gshift = bshift = ashift = 0;
64303b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
64403b705cfSriastradh		bshift = 0;
64503b705cfSriastradh		gshift = bbits;
64603b705cfSriastradh		rshift = gshift + gbits;
64703b705cfSriastradh		ashift = rshift + rbits;
64803b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
64903b705cfSriastradh		rshift = 0;
65003b705cfSriastradh		gshift = rbits;
65103b705cfSriastradh		bshift = gshift + gbits;
65203b705cfSriastradh		ashift = bshift + bbits;
65303b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
65403b705cfSriastradh		ashift = 0;
65503b705cfSriastradh		rshift = abits;
65603b705cfSriastradh		if (abits == 0)
65703b705cfSriastradh			rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
65803b705cfSriastradh		gshift = rshift + rbits;
65903b705cfSriastradh		bshift = gshift + gbits;
66003b705cfSriastradh	} else {
66103b705cfSriastradh		return false;
66203b705cfSriastradh	}
66303b705cfSriastradh
66403b705cfSriastradh	if (rbits) {
66503b705cfSriastradh		*red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits);
66603b705cfSriastradh		while (rbits < 16) {
66703b705cfSriastradh			*red |= *red >> rbits;
66803b705cfSriastradh			rbits <<= 1;
66903b705cfSriastradh		}
67003b705cfSriastradh	} else
67103b705cfSriastradh		*red = 0;
67203b705cfSriastradh
67303b705cfSriastradh	if (gbits) {
67403b705cfSriastradh		*green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits);
67503b705cfSriastradh		while (gbits < 16) {
67603b705cfSriastradh			*green |= *green >> gbits;
67703b705cfSriastradh			gbits <<= 1;
67803b705cfSriastradh		}
67903b705cfSriastradh	} else
68003b705cfSriastradh		*green = 0;
68103b705cfSriastradh
68203b705cfSriastradh	if (bbits) {
68303b705cfSriastradh		*blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits);
68403b705cfSriastradh		while (bbits < 16) {
68503b705cfSriastradh			*blue |= *blue >> bbits;
68603b705cfSriastradh			bbits <<= 1;
68703b705cfSriastradh		}
68803b705cfSriastradh	} else
68903b705cfSriastradh		*blue = 0;
69003b705cfSriastradh
69103b705cfSriastradh	if (abits) {
69203b705cfSriastradh		*alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits);
69303b705cfSriastradh		while (abits < 16) {
69403b705cfSriastradh			*alpha |= *alpha >> abits;
69503b705cfSriastradh			abits <<= 1;
69603b705cfSriastradh		}
69703b705cfSriastradh	} else
69803b705cfSriastradh		*alpha = 0xffff;
69903b705cfSriastradh
70003b705cfSriastradh	return true;
70103b705cfSriastradh}
70203b705cfSriastradh
70303b705cfSriastradhbool
70403b705cfSriastradh_sna_get_pixel_from_rgba(uint32_t * pixel,
70503b705cfSriastradh			uint16_t red,
70603b705cfSriastradh			uint16_t green,
70703b705cfSriastradh			uint16_t blue,
70803b705cfSriastradh			uint16_t alpha,
70903b705cfSriastradh			uint32_t format)
71003b705cfSriastradh{
71103b705cfSriastradh	int rbits, bbits, gbits, abits;
71203b705cfSriastradh	int rshift, bshift, gshift, ashift;
71303b705cfSriastradh
71403b705cfSriastradh	rbits = PICT_FORMAT_R(format);
71503b705cfSriastradh	gbits = PICT_FORMAT_G(format);
71603b705cfSriastradh	bbits = PICT_FORMAT_B(format);
71703b705cfSriastradh	abits = PICT_FORMAT_A(format);
71803b705cfSriastradh	if (abits == 0)
71903b705cfSriastradh	    abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
72003b705cfSriastradh
72103b705cfSriastradh	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
72203b705cfSriastradh		*pixel = alpha >> (16 - abits);
72303b705cfSriastradh		return true;
72403b705cfSriastradh	}
72503b705cfSriastradh
72603b705cfSriastradh	if (!PICT_FORMAT_COLOR(format))
72703b705cfSriastradh		return false;
72803b705cfSriastradh
72903b705cfSriastradh	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
73003b705cfSriastradh		bshift = 0;
73103b705cfSriastradh		gshift = bbits;
73203b705cfSriastradh		rshift = gshift + gbits;
73303b705cfSriastradh		ashift = rshift + rbits;
73403b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
73503b705cfSriastradh		rshift = 0;
73603b705cfSriastradh		gshift = rbits;
73703b705cfSriastradh		bshift = gshift + gbits;
73803b705cfSriastradh		ashift = bshift + bbits;
73903b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
74003b705cfSriastradh		ashift = 0;
74103b705cfSriastradh		rshift = abits;
74203b705cfSriastradh		gshift = rshift + rbits;
74303b705cfSriastradh		bshift = gshift + gbits;
74403b705cfSriastradh	} else
74503b705cfSriastradh		return false;
74603b705cfSriastradh
74703b705cfSriastradh	*pixel = 0;
74803b705cfSriastradh	*pixel |= (blue  >> (16 - bbits)) << bshift;
74903b705cfSriastradh	*pixel |= (green >> (16 - gbits)) << gshift;
75003b705cfSriastradh	*pixel |= (red   >> (16 - rbits)) << rshift;
75103b705cfSriastradh	*pixel |= (alpha >> (16 - abits)) << ashift;
75203b705cfSriastradh
75303b705cfSriastradh	return true;
75403b705cfSriastradh}
75503b705cfSriastradh
75603b705cfSriastradhuint32_t
75703b705cfSriastradhsna_rgba_for_color(uint32_t color, int depth)
75803b705cfSriastradh{
75903b705cfSriastradh	return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8);
76003b705cfSriastradh}
76103b705cfSriastradh
76203b705cfSriastradhuint32_t
76303b705cfSriastradhsna_rgba_to_color(uint32_t rgba, uint32_t format)
76403b705cfSriastradh{
76503b705cfSriastradh	return color_convert(rgba, PICT_a8r8g8b8, format);
76603b705cfSriastradh}
76703b705cfSriastradh
76803b705cfSriastradhstatic uint32_t
76903b705cfSriastradhget_pixel(PicturePtr picture)
77003b705cfSriastradh{
77103b705cfSriastradh	PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
77203b705cfSriastradh
77303b705cfSriastradh	DBG(("%s: %p\n", __FUNCTION__, pixmap));
77403b705cfSriastradh
77503b705cfSriastradh	if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
77603b705cfSriastradh		return 0;
77703b705cfSriastradh
77803b705cfSriastradh	switch (pixmap->drawable.bitsPerPixel) {
77903b705cfSriastradh	case 32: return *(uint32_t *)pixmap->devPrivate.ptr;
78003b705cfSriastradh	case 16: return *(uint16_t *)pixmap->devPrivate.ptr;
78103b705cfSriastradh	default: return *(uint8_t *)pixmap->devPrivate.ptr;
78203b705cfSriastradh	}
78303b705cfSriastradh}
78403b705cfSriastradh
78503b705cfSriastradhstatic uint32_t
78603b705cfSriastradhget_solid_color(PicturePtr picture, uint32_t format)
78703b705cfSriastradh{
78803b705cfSriastradh	if (picture->pSourcePict) {
78903b705cfSriastradh		PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict;
79003b705cfSriastradh		return color_convert(fill->color, PICT_a8r8g8b8, format);
79103b705cfSriastradh	} else
79203b705cfSriastradh		return color_convert(get_pixel(picture), picture->format, format);
79303b705cfSriastradh}
79403b705cfSriastradh
79503b705cfSriastradhstatic bool
79603b705cfSriastradhis_solid(PicturePtr picture)
79703b705cfSriastradh{
79803b705cfSriastradh	if (picture->pSourcePict) {
79903b705cfSriastradh		if (picture->pSourcePict->type == SourcePictTypeSolidFill)
80003b705cfSriastradh			return true;
80103b705cfSriastradh	}
80203b705cfSriastradh
80303b705cfSriastradh	if (picture->pDrawable) {
80403b705cfSriastradh		if (picture->pDrawable->width  == 1 &&
80503b705cfSriastradh		    picture->pDrawable->height == 1 &&
80603b705cfSriastradh		    picture->repeat)
80703b705cfSriastradh			return true;
80803b705cfSriastradh	}
80903b705cfSriastradh
81003b705cfSriastradh	return false;
81103b705cfSriastradh}
81203b705cfSriastradh
81303b705cfSriastradhbool
81403b705cfSriastradhsna_picture_is_solid(PicturePtr picture, uint32_t *color)
81503b705cfSriastradh{
81603b705cfSriastradh	if (!is_solid(picture))
81703b705cfSriastradh		return false;
81803b705cfSriastradh
81903b705cfSriastradh	if (color)
82003b705cfSriastradh		*color = get_solid_color(picture, PICT_a8r8g8b8);
82103b705cfSriastradh	return true;
82203b705cfSriastradh}
82303b705cfSriastradh
82442542f5fSchristosstatic bool
82542542f5fSchristospixel_is_transparent(uint32_t pixel, uint32_t format)
82642542f5fSchristos{
82742542f5fSchristos	unsigned int abits;
82842542f5fSchristos
82942542f5fSchristos	abits = PICT_FORMAT_A(format);
83042542f5fSchristos	if (!abits)
83142542f5fSchristos		return false;
83242542f5fSchristos
83342542f5fSchristos	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
83442542f5fSchristos	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
83542542f5fSchristos		return (pixel & ((1 << abits) - 1)) == 0;
83642542f5fSchristos	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
83742542f5fSchristos		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
83842542f5fSchristos		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
83942542f5fSchristos		return (pixel >> ashift) == 0;
84042542f5fSchristos	} else
84142542f5fSchristos		return false;
84242542f5fSchristos}
84342542f5fSchristos
84403b705cfSriastradhstatic bool
84503b705cfSriastradhpixel_is_opaque(uint32_t pixel, uint32_t format)
84603b705cfSriastradh{
84703b705cfSriastradh	unsigned int abits;
84803b705cfSriastradh
84903b705cfSriastradh	abits = PICT_FORMAT_A(format);
85003b705cfSriastradh	if (!abits)
85103b705cfSriastradh		return true;
85203b705cfSriastradh
85303b705cfSriastradh	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
85403b705cfSriastradh	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
85503b705cfSriastradh		return (pixel & ((1 << abits) - 1)) == (unsigned)((1 << abits) - 1);
85603b705cfSriastradh	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
85703b705cfSriastradh		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
85803b705cfSriastradh		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
85903b705cfSriastradh		return (pixel >> ashift) == (unsigned)((1 << abits) - 1);
86003b705cfSriastradh	} else
86103b705cfSriastradh		return false;
86203b705cfSriastradh}
86303b705cfSriastradh
86403b705cfSriastradhstatic bool
86503b705cfSriastradhpixel_is_white(uint32_t pixel, uint32_t format)
86603b705cfSriastradh{
86703b705cfSriastradh	switch (PICT_FORMAT_TYPE(format)) {
86803b705cfSriastradh	case PICT_TYPE_A:
86903b705cfSriastradh	case PICT_TYPE_ARGB:
87003b705cfSriastradh	case PICT_TYPE_ABGR:
87103b705cfSriastradh	case PICT_TYPE_BGRA:
87203b705cfSriastradh		return pixel == ((1U << PICT_FORMAT_BPP(format)) - 1);
87303b705cfSriastradh	default:
87403b705cfSriastradh		return false;
87503b705cfSriastradh	}
87603b705cfSriastradh}
87703b705cfSriastradh
87803b705cfSriastradhstatic bool
87903b705cfSriastradhis_opaque_solid(PicturePtr picture)
88003b705cfSriastradh{
88103b705cfSriastradh	if (picture->pSourcePict) {
88203b705cfSriastradh		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
88303b705cfSriastradh		return (fill->color >> 24) == 0xff;
88403b705cfSriastradh	} else
88503b705cfSriastradh		return pixel_is_opaque(get_pixel(picture), picture->format);
88603b705cfSriastradh}
88703b705cfSriastradh
88803b705cfSriastradhstatic bool
88903b705cfSriastradhis_white(PicturePtr picture)
89003b705cfSriastradh{
89103b705cfSriastradh	if (picture->pSourcePict) {
89203b705cfSriastradh		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
89303b705cfSriastradh		return fill->color == 0xffffffff;
89403b705cfSriastradh	} else
89503b705cfSriastradh		return pixel_is_white(get_pixel(picture), picture->format);
89603b705cfSriastradh}
89703b705cfSriastradh
89842542f5fSchristosstatic bool
89942542f5fSchristosis_transparent(PicturePtr picture)
90042542f5fSchristos{
90142542f5fSchristos	if (picture->pSourcePict) {
90242542f5fSchristos		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
90342542f5fSchristos		return fill->color == 0;
90442542f5fSchristos	} else
90542542f5fSchristos		return pixel_is_transparent(get_pixel(picture), picture->format);
90642542f5fSchristos}
90742542f5fSchristos
90803b705cfSriastradhbool
90903b705cfSriastradhsna_composite_mask_is_opaque(PicturePtr mask)
91003b705cfSriastradh{
91103b705cfSriastradh	if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format))
91203b705cfSriastradh		return is_solid(mask) && is_white(mask);
91303b705cfSriastradh	else if (!PICT_FORMAT_A(mask->format))
91403b705cfSriastradh		return true;
91503b705cfSriastradh	else
91603b705cfSriastradh		return is_solid(mask) && is_opaque_solid(mask);
91703b705cfSriastradh}
91803b705cfSriastradh
91903b705cfSriastradhfastcall
92003b705cfSriastradhstatic void blt_composite_fill(struct sna *sna,
92103b705cfSriastradh			       const struct sna_composite_op *op,
92203b705cfSriastradh			       const struct sna_composite_rectangles *r)
92303b705cfSriastradh{
92403b705cfSriastradh	int x1, x2, y1, y2;
92503b705cfSriastradh
92603b705cfSriastradh	x1 = r->dst.x + op->dst.x;
92703b705cfSriastradh	y1 = r->dst.y + op->dst.y;
92803b705cfSriastradh	x2 = x1 + r->width;
92903b705cfSriastradh	y2 = y1 + r->height;
93003b705cfSriastradh
93103b705cfSriastradh	if (x1 < 0)
93203b705cfSriastradh		x1 = 0;
93303b705cfSriastradh	if (y1 < 0)
93403b705cfSriastradh		y1 = 0;
93503b705cfSriastradh
93603b705cfSriastradh	if (x2 > op->dst.width)
93703b705cfSriastradh		x2 = op->dst.width;
93803b705cfSriastradh	if (y2 > op->dst.height)
93903b705cfSriastradh		y2 = op->dst.height;
94003b705cfSriastradh
94103b705cfSriastradh	if (x2 <= x1 || y2 <= y1)
94203b705cfSriastradh		return;
94303b705cfSriastradh
94403b705cfSriastradh	sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1);
94503b705cfSriastradh}
94603b705cfSriastradh
94703b705cfSriastradhfastcall
94803b705cfSriastradhstatic void blt_composite_fill__cpu(struct sna *sna,
94903b705cfSriastradh				    const struct sna_composite_op *op,
95003b705cfSriastradh				    const struct sna_composite_rectangles *r)
95103b705cfSriastradh{
95203b705cfSriastradh	int x1, x2, y1, y2;
95303b705cfSriastradh
95403b705cfSriastradh	x1 = r->dst.x + op->dst.x;
95503b705cfSriastradh	y1 = r->dst.y + op->dst.y;
95603b705cfSriastradh	x2 = x1 + r->width;
95703b705cfSriastradh	y2 = y1 + r->height;
95803b705cfSriastradh
95903b705cfSriastradh	if (x1 < 0)
96003b705cfSriastradh		x1 = 0;
96103b705cfSriastradh	if (y1 < 0)
96203b705cfSriastradh		y1 = 0;
96303b705cfSriastradh
96403b705cfSriastradh	if (x2 > op->dst.width)
96503b705cfSriastradh		x2 = op->dst.width;
96603b705cfSriastradh	if (y2 > op->dst.height)
96703b705cfSriastradh		y2 = op->dst.height;
96803b705cfSriastradh
96903b705cfSriastradh	if (x2 <= x1 || y2 <= y1)
97003b705cfSriastradh		return;
97103b705cfSriastradh
97242542f5fSchristos	assert(op->dst.pixmap->devPrivate.ptr);
97342542f5fSchristos	assert(op->dst.pixmap->devKind);
97403b705cfSriastradh	pixman_fill(op->dst.pixmap->devPrivate.ptr,
97503b705cfSriastradh		    op->dst.pixmap->devKind / sizeof(uint32_t),
97603b705cfSriastradh		    op->dst.pixmap->drawable.bitsPerPixel,
97703b705cfSriastradh		    x1, y1, x2-x1, y2-y1,
97803b705cfSriastradh		    op->u.blt.pixel);
97903b705cfSriastradh}
98003b705cfSriastradh
98142542f5fSchristosfastcall static void
98242542f5fSchristosblt_composite_fill_box_no_offset__cpu(struct sna *sna,
98342542f5fSchristos				      const struct sna_composite_op *op,
98442542f5fSchristos				      const BoxRec *box)
98542542f5fSchristos{
98642542f5fSchristos	assert(box->x1 >= 0);
98742542f5fSchristos	assert(box->y1 >= 0);
98842542f5fSchristos	assert(box->x2 <= op->dst.pixmap->drawable.width);
98942542f5fSchristos	assert(box->y2 <= op->dst.pixmap->drawable.height);
99042542f5fSchristos
99142542f5fSchristos	assert(op->dst.pixmap->devPrivate.ptr);
99242542f5fSchristos	assert(op->dst.pixmap->devKind);
99342542f5fSchristos	pixman_fill(op->dst.pixmap->devPrivate.ptr,
99442542f5fSchristos		    op->dst.pixmap->devKind / sizeof(uint32_t),
99542542f5fSchristos		    op->dst.pixmap->drawable.bitsPerPixel,
99642542f5fSchristos		    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
99742542f5fSchristos		    op->u.blt.pixel);
99842542f5fSchristos}
99942542f5fSchristos
100042542f5fSchristosstatic void
100142542f5fSchristosblt_composite_fill_boxes_no_offset__cpu(struct sna *sna,
100242542f5fSchristos					const struct sna_composite_op *op,
100342542f5fSchristos					const BoxRec *box, int n)
100442542f5fSchristos{
100542542f5fSchristos	do {
100642542f5fSchristos		assert(box->x1 >= 0);
100742542f5fSchristos		assert(box->y1 >= 0);
100842542f5fSchristos		assert(box->x2 <= op->dst.pixmap->drawable.width);
100942542f5fSchristos		assert(box->y2 <= op->dst.pixmap->drawable.height);
101042542f5fSchristos
101142542f5fSchristos		assert(op->dst.pixmap->devPrivate.ptr);
101242542f5fSchristos		assert(op->dst.pixmap->devKind);
101342542f5fSchristos		pixman_fill(op->dst.pixmap->devPrivate.ptr,
101442542f5fSchristos			    op->dst.pixmap->devKind / sizeof(uint32_t),
101542542f5fSchristos			    op->dst.pixmap->drawable.bitsPerPixel,
101642542f5fSchristos			    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
101742542f5fSchristos			    op->u.blt.pixel);
101842542f5fSchristos		box++;
101942542f5fSchristos	} while (--n);
102042542f5fSchristos}
102142542f5fSchristos
102203b705cfSriastradhfastcall static void
102303b705cfSriastradhblt_composite_fill_box__cpu(struct sna *sna,
102403b705cfSriastradh			    const struct sna_composite_op *op,
102503b705cfSriastradh			    const BoxRec *box)
102603b705cfSriastradh{
102742542f5fSchristos	assert(box->x1 + op->dst.x >= 0);
102842542f5fSchristos	assert(box->y1 + op->dst.y >= 0);
102942542f5fSchristos	assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
103042542f5fSchristos	assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
103142542f5fSchristos
103242542f5fSchristos	assert(op->dst.pixmap->devPrivate.ptr);
103342542f5fSchristos	assert(op->dst.pixmap->devKind);
103403b705cfSriastradh	pixman_fill(op->dst.pixmap->devPrivate.ptr,
103503b705cfSriastradh		    op->dst.pixmap->devKind / sizeof(uint32_t),
103603b705cfSriastradh		    op->dst.pixmap->drawable.bitsPerPixel,
103742542f5fSchristos		    box->x1 + op->dst.x, box->y1 + op->dst.y,
103842542f5fSchristos		    box->x2 - box->x1, box->y2 - box->y1,
103903b705cfSriastradh		    op->u.blt.pixel);
104003b705cfSriastradh}
104103b705cfSriastradh
104203b705cfSriastradhstatic void
104303b705cfSriastradhblt_composite_fill_boxes__cpu(struct sna *sna,
104403b705cfSriastradh			      const struct sna_composite_op *op,
104503b705cfSriastradh			      const BoxRec *box, int n)
104603b705cfSriastradh{
104703b705cfSriastradh	do {
104842542f5fSchristos		assert(box->x1 + op->dst.x >= 0);
104942542f5fSchristos		assert(box->y1 + op->dst.y >= 0);
105042542f5fSchristos		assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
105142542f5fSchristos		assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
105242542f5fSchristos
105342542f5fSchristos		assert(op->dst.pixmap->devPrivate.ptr);
105442542f5fSchristos		assert(op->dst.pixmap->devKind);
105503b705cfSriastradh		pixman_fill(op->dst.pixmap->devPrivate.ptr,
105603b705cfSriastradh			    op->dst.pixmap->devKind / sizeof(uint32_t),
105703b705cfSriastradh			    op->dst.pixmap->drawable.bitsPerPixel,
105842542f5fSchristos			    box->x1 + op->dst.x, box->y1 + op->dst.y,
105942542f5fSchristos			    box->x2 - box->x1, box->y2 - box->y1,
106003b705cfSriastradh			    op->u.blt.pixel);
106103b705cfSriastradh		box++;
106203b705cfSriastradh	} while (--n);
106303b705cfSriastradh}
106403b705cfSriastradh
106503b705cfSriastradhinline static void _sna_blt_fill_box(struct sna *sna,
106603b705cfSriastradh				     const struct sna_blt_state *blt,
106703b705cfSriastradh				     const BoxRec *box)
106803b705cfSriastradh{
106903b705cfSriastradh	struct kgem *kgem = &sna->kgem;
107003b705cfSriastradh	uint32_t *b;
107103b705cfSriastradh
107203b705cfSriastradh	DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__,
107303b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2,
107403b705cfSriastradh	     blt->pixel));
107503b705cfSriastradh
107603b705cfSriastradh	assert(box->x1 >= 0);
107703b705cfSriastradh	assert(box->y1 >= 0);
107803b705cfSriastradh	assert(box->y2 * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
107903b705cfSriastradh
108003b705cfSriastradh	if (!kgem_check_batch(kgem, 3))
108103b705cfSriastradh		sna_blt_fill_begin(sna, blt);
108203b705cfSriastradh
108342542f5fSchristos	assert(sna->kgem.mode == KGEM_BLT);
108403b705cfSriastradh	b = kgem->batch + kgem->nbatch;
108503b705cfSriastradh	kgem->nbatch += 3;
108603b705cfSriastradh	assert(kgem->nbatch < kgem->surface);
108703b705cfSriastradh
108803b705cfSriastradh	b[0] = blt->cmd;
108903b705cfSriastradh	*(uint64_t *)(b+1) = *(const uint64_t *)box;
109003b705cfSriastradh}
109103b705cfSriastradh
109203b705cfSriastradhinline static void _sna_blt_fill_boxes(struct sna *sna,
109303b705cfSriastradh				       const struct sna_blt_state *blt,
109403b705cfSriastradh				       const BoxRec *box,
109503b705cfSriastradh				       int nbox)
109603b705cfSriastradh{
109703b705cfSriastradh	struct kgem *kgem = &sna->kgem;
109803b705cfSriastradh	uint32_t cmd = blt->cmd;
109903b705cfSriastradh
110003b705cfSriastradh	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
110103b705cfSriastradh
110203b705cfSriastradh	if (!kgem_check_batch(kgem, 3))
110303b705cfSriastradh		sna_blt_fill_begin(sna, blt);
110403b705cfSriastradh
110503b705cfSriastradh	do {
110603b705cfSriastradh		uint32_t *b = kgem->batch + kgem->nbatch;
110713496ba1Ssnj		int nbox_this_time, rem;
110803b705cfSriastradh
110942542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
111003b705cfSriastradh		nbox_this_time = nbox;
111113496ba1Ssnj		rem = kgem_batch_space(kgem);
111213496ba1Ssnj		if (3*nbox_this_time > rem)
111313496ba1Ssnj			nbox_this_time = rem / 3;
111413496ba1Ssnj		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
111513496ba1Ssnj		     __FUNCTION__, nbox_this_time, nbox, rem));
111613496ba1Ssnj		assert(nbox_this_time > 0);
111703b705cfSriastradh		nbox -= nbox_this_time;
111803b705cfSriastradh
111903b705cfSriastradh		kgem->nbatch += 3 * nbox_this_time;
112003b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
112103b705cfSriastradh		while (nbox_this_time >= 8) {
112203b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
112303b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
112403b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
112503b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
112603b705cfSriastradh			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
112703b705cfSriastradh			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
112803b705cfSriastradh			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
112903b705cfSriastradh			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
113003b705cfSriastradh			b += 24;
113103b705cfSriastradh			nbox_this_time -= 8;
113203b705cfSriastradh		}
113303b705cfSriastradh		if (nbox_this_time & 4) {
113403b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
113503b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
113603b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
113703b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
113803b705cfSriastradh			b += 12;
113903b705cfSriastradh		}
114003b705cfSriastradh		if (nbox_this_time & 2) {
114103b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
114203b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
114303b705cfSriastradh			b += 6;
114403b705cfSriastradh		}
114503b705cfSriastradh		if (nbox_this_time & 1) {
114603b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
114703b705cfSriastradh		}
114803b705cfSriastradh
114903b705cfSriastradh		if (!nbox)
115003b705cfSriastradh			return;
115103b705cfSriastradh
115203b705cfSriastradh		sna_blt_fill_begin(sna, blt);
115303b705cfSriastradh	} while (1);
115403b705cfSriastradh}
115503b705cfSriastradh
115642542f5fSchristosstatic inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const BoxRec *box)
115742542f5fSchristos{
115842542f5fSchristos	if (box->x2 - box->x1 >= op->dst.width &&
115942542f5fSchristos	    box->y2 - box->y1 >= op->dst.height) {
116042542f5fSchristos		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
116142542f5fSchristos		if (op->dst.bo == priv->gpu_bo) {
116242542f5fSchristos			priv->clear = true;
116342542f5fSchristos			priv->clear_color = op->u.blt.pixel;
116442542f5fSchristos			DBG(("%s: pixmap=%ld marking clear [%08x]\n",
116542542f5fSchristos			     __FUNCTION__,
116642542f5fSchristos			     op->dst.pixmap->drawable.serialNumber,
116742542f5fSchristos			     op->u.blt.pixel));
116842542f5fSchristos		}
116942542f5fSchristos	}
117042542f5fSchristos}
117142542f5fSchristos
117203b705cfSriastradhfastcall static void blt_composite_fill_box_no_offset(struct sna *sna,
117303b705cfSriastradh						      const struct sna_composite_op *op,
117403b705cfSriastradh						      const BoxRec *box)
117503b705cfSriastradh{
117603b705cfSriastradh	_sna_blt_fill_box(sna, &op->u.blt, box);
117742542f5fSchristos	_sna_blt_maybe_clear(op, box);
117803b705cfSriastradh}
117903b705cfSriastradh
118003b705cfSriastradhstatic void blt_composite_fill_boxes_no_offset(struct sna *sna,
118103b705cfSriastradh					       const struct sna_composite_op *op,
118203b705cfSriastradh					       const BoxRec *box, int n)
118303b705cfSriastradh{
118403b705cfSriastradh	_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
118503b705cfSriastradh}
118603b705cfSriastradh
118703b705cfSriastradhstatic void blt_composite_fill_boxes_no_offset__thread(struct sna *sna,
118803b705cfSriastradh						       const struct sna_composite_op *op,
118903b705cfSriastradh						       const BoxRec *box, int nbox)
119003b705cfSriastradh{
119103b705cfSriastradh	struct kgem *kgem = &sna->kgem;
119203b705cfSriastradh	const struct sna_blt_state *blt = &op->u.blt;
119303b705cfSriastradh	uint32_t cmd = blt->cmd;
119403b705cfSriastradh
119503b705cfSriastradh	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
119603b705cfSriastradh
119703b705cfSriastradh	sna_vertex_lock(&sna->render);
119842542f5fSchristos	assert(kgem->mode == KGEM_BLT);
119903b705cfSriastradh	if (!kgem_check_batch(kgem, 3)) {
120003b705cfSriastradh		sna_vertex_wait__locked(&sna->render);
120103b705cfSriastradh		sna_blt_fill_begin(sna, blt);
120203b705cfSriastradh	}
120303b705cfSriastradh
120403b705cfSriastradh	do {
120503b705cfSriastradh		uint32_t *b = kgem->batch + kgem->nbatch;
120613496ba1Ssnj		int nbox_this_time, rem;
120703b705cfSriastradh
120842542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
120903b705cfSriastradh		nbox_this_time = nbox;
121013496ba1Ssnj		rem = kgem_batch_space(kgem);
121113496ba1Ssnj		if (3*nbox_this_time > rem)
121213496ba1Ssnj			nbox_this_time = rem / 3;
121313496ba1Ssnj		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
121413496ba1Ssnj		     __FUNCTION__, nbox_this_time, nbox, rem));
121513496ba1Ssnj		assert(nbox_this_time > 0);
121603b705cfSriastradh		nbox -= nbox_this_time;
121703b705cfSriastradh
121803b705cfSriastradh		kgem->nbatch += 3 * nbox_this_time;
121903b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
122003b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
122103b705cfSriastradh		sna_vertex_unlock(&sna->render);
122203b705cfSriastradh
122303b705cfSriastradh		while (nbox_this_time >= 8) {
122403b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
122503b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
122603b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
122703b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
122803b705cfSriastradh			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
122903b705cfSriastradh			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
123003b705cfSriastradh			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
123103b705cfSriastradh			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
123203b705cfSriastradh			b += 24;
123303b705cfSriastradh			nbox_this_time -= 8;
123403b705cfSriastradh		}
123503b705cfSriastradh		if (nbox_this_time & 4) {
123603b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
123703b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
123803b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
123903b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
124003b705cfSriastradh			b += 12;
124103b705cfSriastradh		}
124203b705cfSriastradh		if (nbox_this_time & 2) {
124303b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
124403b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
124503b705cfSriastradh			b += 6;
124603b705cfSriastradh		}
124703b705cfSriastradh		if (nbox_this_time & 1) {
124803b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
124903b705cfSriastradh		}
125003b705cfSriastradh
125103b705cfSriastradh		sna_vertex_lock(&sna->render);
125203b705cfSriastradh		sna_vertex_release__locked(&sna->render);
125303b705cfSriastradh		if (!nbox)
125403b705cfSriastradh			break;
125503b705cfSriastradh
125603b705cfSriastradh		sna_vertex_wait__locked(&sna->render);
125703b705cfSriastradh		sna_blt_fill_begin(sna, blt);
125803b705cfSriastradh	} while (1);
125903b705cfSriastradh	sna_vertex_unlock(&sna->render);
126003b705cfSriastradh}
126103b705cfSriastradh
126203b705cfSriastradhfastcall static void blt_composite_fill_box(struct sna *sna,
126303b705cfSriastradh					    const struct sna_composite_op *op,
126403b705cfSriastradh					    const BoxRec *box)
126503b705cfSriastradh{
126603b705cfSriastradh	sna_blt_fill_one(sna, &op->u.blt,
126703b705cfSriastradh			 box->x1 + op->dst.x,
126803b705cfSriastradh			 box->y1 + op->dst.y,
126903b705cfSriastradh			 box->x2 - box->x1,
127003b705cfSriastradh			 box->y2 - box->y1);
127142542f5fSchristos	_sna_blt_maybe_clear(op, box);
127203b705cfSriastradh}
127303b705cfSriastradh
127403b705cfSriastradhstatic void blt_composite_fill_boxes(struct sna *sna,
127503b705cfSriastradh				     const struct sna_composite_op *op,
127603b705cfSriastradh				     const BoxRec *box, int n)
127703b705cfSriastradh{
127803b705cfSriastradh	do {
127903b705cfSriastradh		sna_blt_fill_one(sna, &op->u.blt,
128003b705cfSriastradh				 box->x1 + op->dst.x, box->y1 + op->dst.y,
128103b705cfSriastradh				 box->x2 - box->x1, box->y2 - box->y1);
128203b705cfSriastradh		box++;
128303b705cfSriastradh	} while (--n);
128403b705cfSriastradh}
128503b705cfSriastradh
128603b705cfSriastradhstatic inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y)
128703b705cfSriastradh{
128803b705cfSriastradh	union {
128903b705cfSriastradh		uint64_t v;
129003b705cfSriastradh		int16_t i[4];
129103b705cfSriastradh	} vi;
129203b705cfSriastradh	vi.v = *(uint64_t *)b;
129303b705cfSriastradh	vi.i[0] += x;
129403b705cfSriastradh	vi.i[1] += y;
129503b705cfSriastradh	vi.i[2] += x;
129603b705cfSriastradh	vi.i[3] += y;
129703b705cfSriastradh	return vi.v;
129803b705cfSriastradh}
129903b705cfSriastradh
130003b705cfSriastradhstatic void blt_composite_fill_boxes__thread(struct sna *sna,
130103b705cfSriastradh					     const struct sna_composite_op *op,
130203b705cfSriastradh					     const BoxRec *box, int nbox)
130303b705cfSriastradh{
130403b705cfSriastradh	struct kgem *kgem = &sna->kgem;
130503b705cfSriastradh	const struct sna_blt_state *blt = &op->u.blt;
130603b705cfSriastradh	uint32_t cmd = blt->cmd;
130703b705cfSriastradh	int16_t dx = op->dst.x;
130803b705cfSriastradh	int16_t dy = op->dst.y;
130903b705cfSriastradh
131003b705cfSriastradh	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
131103b705cfSriastradh
131203b705cfSriastradh	sna_vertex_lock(&sna->render);
131342542f5fSchristos	assert(kgem->mode == KGEM_BLT);
131403b705cfSriastradh	if (!kgem_check_batch(kgem, 3)) {
131503b705cfSriastradh		sna_vertex_wait__locked(&sna->render);
131603b705cfSriastradh		sna_blt_fill_begin(sna, blt);
131703b705cfSriastradh	}
131803b705cfSriastradh
131903b705cfSriastradh	do {
132003b705cfSriastradh		uint32_t *b = kgem->batch + kgem->nbatch;
132113496ba1Ssnj		int nbox_this_time, rem;
132203b705cfSriastradh
132342542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
132403b705cfSriastradh		nbox_this_time = nbox;
132513496ba1Ssnj		rem = kgem_batch_space(kgem);
132613496ba1Ssnj		if (3*nbox_this_time > rem)
132713496ba1Ssnj			nbox_this_time = rem / 3;
132813496ba1Ssnj		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
132913496ba1Ssnj		     __FUNCTION__, nbox_this_time, nbox, rem));
133013496ba1Ssnj		assert(nbox_this_time > 0);
133103b705cfSriastradh		nbox -= nbox_this_time;
133203b705cfSriastradh
133303b705cfSriastradh		kgem->nbatch += 3 * nbox_this_time;
133403b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
133503b705cfSriastradh		sna_vertex_acquire__locked(&sna->render);
133603b705cfSriastradh		sna_vertex_unlock(&sna->render);
133703b705cfSriastradh
133803b705cfSriastradh		while (nbox_this_time >= 8) {
133903b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
134003b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
134103b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
134203b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
134303b705cfSriastradh			b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy);
134403b705cfSriastradh			b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy);
134503b705cfSriastradh			b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy);
134603b705cfSriastradh			b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy);
134703b705cfSriastradh			b += 24;
134803b705cfSriastradh			nbox_this_time -= 8;
134903b705cfSriastradh		}
135003b705cfSriastradh		if (nbox_this_time & 4) {
135103b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
135203b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
135303b705cfSriastradh			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
135403b705cfSriastradh			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
135503b705cfSriastradh			b += 12;
135603b705cfSriastradh		}
135703b705cfSriastradh		if (nbox_this_time & 2) {
135803b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
135903b705cfSriastradh			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
136003b705cfSriastradh			b += 6;
136103b705cfSriastradh		}
136203b705cfSriastradh		if (nbox_this_time & 1) {
136303b705cfSriastradh			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
136403b705cfSriastradh		}
136503b705cfSriastradh
136603b705cfSriastradh		sna_vertex_lock(&sna->render);
136703b705cfSriastradh		sna_vertex_release__locked(&sna->render);
136803b705cfSriastradh		if (!nbox)
136903b705cfSriastradh			break;
137003b705cfSriastradh
137103b705cfSriastradh		sna_vertex_wait__locked(&sna->render);
137203b705cfSriastradh		sna_blt_fill_begin(sna, blt);
137303b705cfSriastradh	} while (1);
137403b705cfSriastradh	sna_vertex_unlock(&sna->render);
137503b705cfSriastradh}
137603b705cfSriastradh
137703b705cfSriastradhfastcall
137803b705cfSriastradhstatic void blt_composite_nop(struct sna *sna,
137903b705cfSriastradh			       const struct sna_composite_op *op,
138003b705cfSriastradh			       const struct sna_composite_rectangles *r)
138103b705cfSriastradh{
138203b705cfSriastradh}
138303b705cfSriastradh
138403b705cfSriastradhfastcall static void blt_composite_nop_box(struct sna *sna,
138503b705cfSriastradh					   const struct sna_composite_op *op,
138603b705cfSriastradh					   const BoxRec *box)
138703b705cfSriastradh{
138803b705cfSriastradh}
138903b705cfSriastradh
139003b705cfSriastradhstatic void blt_composite_nop_boxes(struct sna *sna,
139103b705cfSriastradh				    const struct sna_composite_op *op,
139203b705cfSriastradh				    const BoxRec *box, int n)
139303b705cfSriastradh{
139403b705cfSriastradh}
139503b705cfSriastradh
139603b705cfSriastradhstatic bool
139703b705cfSriastradhbegin_blt(struct sna *sna,
139803b705cfSriastradh	  struct sna_composite_op *op)
139903b705cfSriastradh{
140013496ba1Ssnj	assert(sna->kgem.mode == KGEM_BLT);
140103b705cfSriastradh	if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) {
140203b705cfSriastradh		kgem_submit(&sna->kgem);
140303b705cfSriastradh		if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo))
140403b705cfSriastradh			return false;
140503b705cfSriastradh
140603b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_BLT);
140703b705cfSriastradh	}
140803b705cfSriastradh
140903b705cfSriastradh	return true;
141003b705cfSriastradh}
141103b705cfSriastradh
141203b705cfSriastradhstatic bool
141303b705cfSriastradhprepare_blt_nop(struct sna *sna,
141403b705cfSriastradh		struct sna_composite_op *op)
141503b705cfSriastradh{
141603b705cfSriastradh	DBG(("%s\n", __FUNCTION__));
141703b705cfSriastradh
141803b705cfSriastradh	op->blt   = blt_composite_nop;
141903b705cfSriastradh	op->box   = blt_composite_nop_box;
142003b705cfSriastradh	op->boxes = blt_composite_nop_boxes;
142103b705cfSriastradh	op->done  = nop_done;
142203b705cfSriastradh	return true;
142303b705cfSriastradh}
142403b705cfSriastradh
142503b705cfSriastradhstatic bool
142603b705cfSriastradhprepare_blt_clear(struct sna *sna,
142703b705cfSriastradh		  struct sna_composite_op *op)
142803b705cfSriastradh{
142903b705cfSriastradh	DBG(("%s\n", __FUNCTION__));
143003b705cfSriastradh
143103b705cfSriastradh	if (op->dst.bo == NULL) {
143203b705cfSriastradh		op->blt   = blt_composite_fill__cpu;
143342542f5fSchristos		if (op->dst.x|op->dst.y) {
143442542f5fSchristos			op->box   = blt_composite_fill_box__cpu;
143542542f5fSchristos			op->boxes = blt_composite_fill_boxes__cpu;
143642542f5fSchristos			op->thread_boxes = blt_composite_fill_boxes__cpu;
143742542f5fSchristos		} else {
143842542f5fSchristos			op->box   = blt_composite_fill_box_no_offset__cpu;
143942542f5fSchristos			op->boxes = blt_composite_fill_boxes_no_offset__cpu;
144042542f5fSchristos			op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
144142542f5fSchristos		}
144203b705cfSriastradh		op->done  = nop_done;
144303b705cfSriastradh		op->u.blt.pixel = 0;
144403b705cfSriastradh		return true;
144503b705cfSriastradh	}
144603b705cfSriastradh
144703b705cfSriastradh	op->blt = blt_composite_fill;
144803b705cfSriastradh	if (op->dst.x|op->dst.y) {
144903b705cfSriastradh		op->box   = blt_composite_fill_box;
145003b705cfSriastradh		op->boxes = blt_composite_fill_boxes;
145103b705cfSriastradh		op->thread_boxes = blt_composite_fill_boxes__thread;
145203b705cfSriastradh	} else {
145303b705cfSriastradh		op->box   = blt_composite_fill_box_no_offset;
145403b705cfSriastradh		op->boxes = blt_composite_fill_boxes_no_offset;
145503b705cfSriastradh		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
145603b705cfSriastradh	}
145703b705cfSriastradh	op->done = nop_done;
145803b705cfSriastradh
145903b705cfSriastradh	if (!sna_blt_fill_init(sna, &op->u.blt,
146042542f5fSchristos			       op->dst.bo,
146142542f5fSchristos			       op->dst.pixmap->drawable.bitsPerPixel,
146242542f5fSchristos			       GXclear, 0))
146303b705cfSriastradh		return false;
146403b705cfSriastradh
146503b705cfSriastradh	return begin_blt(sna, op);
146603b705cfSriastradh}
146703b705cfSriastradh
146803b705cfSriastradhstatic bool
146903b705cfSriastradhprepare_blt_fill(struct sna *sna,
147003b705cfSriastradh		 struct sna_composite_op *op,
147103b705cfSriastradh		 uint32_t pixel)
147203b705cfSriastradh{
147303b705cfSriastradh	DBG(("%s\n", __FUNCTION__));
147403b705cfSriastradh
147503b705cfSriastradh	if (op->dst.bo == NULL) {
147603b705cfSriastradh		op->u.blt.pixel = pixel;
147703b705cfSriastradh		op->blt = blt_composite_fill__cpu;
147842542f5fSchristos		if (op->dst.x|op->dst.y) {
147942542f5fSchristos			op->box   = blt_composite_fill_box__cpu;
148042542f5fSchristos			op->boxes = blt_composite_fill_boxes__cpu;
148142542f5fSchristos			op->thread_boxes = blt_composite_fill_boxes__cpu;
148242542f5fSchristos		} else {
148342542f5fSchristos			op->box   = blt_composite_fill_box_no_offset__cpu;
148442542f5fSchristos			op->boxes = blt_composite_fill_boxes_no_offset__cpu;
148542542f5fSchristos			op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
148642542f5fSchristos		}
148703b705cfSriastradh		op->done = nop_done;
148803b705cfSriastradh		return true;
148903b705cfSriastradh	}
149003b705cfSriastradh
149103b705cfSriastradh	op->blt = blt_composite_fill;
149203b705cfSriastradh	if (op->dst.x|op->dst.y) {
149303b705cfSriastradh		op->box   = blt_composite_fill_box;
149403b705cfSriastradh		op->boxes = blt_composite_fill_boxes;
149503b705cfSriastradh		op->thread_boxes = blt_composite_fill_boxes__thread;
149603b705cfSriastradh	} else {
149703b705cfSriastradh		op->box   = blt_composite_fill_box_no_offset;
149803b705cfSriastradh		op->boxes = blt_composite_fill_boxes_no_offset;
149903b705cfSriastradh		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
150003b705cfSriastradh	}
150103b705cfSriastradh	op->done = nop_done;
150203b705cfSriastradh
150303b705cfSriastradh	if (!sna_blt_fill_init(sna, &op->u.blt, op->dst.bo,
150403b705cfSriastradh			       op->dst.pixmap->drawable.bitsPerPixel,
150503b705cfSriastradh			       GXcopy, pixel))
150603b705cfSriastradh		return false;
150703b705cfSriastradh
150803b705cfSriastradh	return begin_blt(sna, op);
150903b705cfSriastradh}
151003b705cfSriastradh
151103b705cfSriastradhfastcall static void
151203b705cfSriastradhblt_composite_copy(struct sna *sna,
151303b705cfSriastradh		   const struct sna_composite_op *op,
151403b705cfSriastradh		   const struct sna_composite_rectangles *r)
151503b705cfSriastradh{
151603b705cfSriastradh	int x1, x2, y1, y2;
151703b705cfSriastradh	int src_x, src_y;
151803b705cfSriastradh
151903b705cfSriastradh	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
152003b705cfSriastradh	     __FUNCTION__,
152103b705cfSriastradh	     r->src.x, r->src.y,
152203b705cfSriastradh	     r->dst.x, r->dst.y,
152303b705cfSriastradh	     r->width, r->height));
152403b705cfSriastradh
152503b705cfSriastradh	/* XXX higher layer should have clipped? */
152603b705cfSriastradh
152703b705cfSriastradh	x1 = r->dst.x + op->dst.x;
152803b705cfSriastradh	y1 = r->dst.y + op->dst.y;
152903b705cfSriastradh	x2 = x1 + r->width;
153003b705cfSriastradh	y2 = y1 + r->height;
153103b705cfSriastradh
153242542f5fSchristos	src_x = r->src.x - x1 + op->u.blt.sx;
153342542f5fSchristos	src_y = r->src.y - y1 + op->u.blt.sy;
153403b705cfSriastradh
153503b705cfSriastradh	/* clip against dst */
153603b705cfSriastradh	if (x1 < 0)
153703b705cfSriastradh		x1 = 0;
153803b705cfSriastradh	if (y1 < 0)
153903b705cfSriastradh		y1 = 0;
154003b705cfSriastradh
154103b705cfSriastradh	if (x2 > op->dst.width)
154203b705cfSriastradh		x2 = op->dst.width;
154303b705cfSriastradh
154403b705cfSriastradh	if (y2 > op->dst.height)
154503b705cfSriastradh		y2 = op->dst.height;
154603b705cfSriastradh
154703b705cfSriastradh	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
154803b705cfSriastradh
154903b705cfSriastradh	if (x2 <= x1 || y2 <= y1)
155003b705cfSriastradh		return;
155103b705cfSriastradh
155203b705cfSriastradh	sna_blt_copy_one(sna, &op->u.blt,
155303b705cfSriastradh			 x1 + src_x, y1 + src_y,
155403b705cfSriastradh			 x2 - x1, y2 - y1,
155503b705cfSriastradh			 x1, y1);
155603b705cfSriastradh}
155703b705cfSriastradh
155803b705cfSriastradhfastcall static void blt_composite_copy_box(struct sna *sna,
155903b705cfSriastradh					    const struct sna_composite_op *op,
156003b705cfSriastradh					    const BoxRec *box)
156103b705cfSriastradh{
156203b705cfSriastradh	DBG(("%s: box (%d, %d), (%d, %d)\n",
156303b705cfSriastradh	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
156403b705cfSriastradh	sna_blt_copy_one(sna, &op->u.blt,
156503b705cfSriastradh			 box->x1 + op->u.blt.sx,
156603b705cfSriastradh			 box->y1 + op->u.blt.sy,
156703b705cfSriastradh			 box->x2 - box->x1,
156803b705cfSriastradh			 box->y2 - box->y1,
156903b705cfSriastradh			 box->x1 + op->dst.x,
157003b705cfSriastradh			 box->y1 + op->dst.y);
157103b705cfSriastradh}
157203b705cfSriastradh
157303b705cfSriastradhstatic void blt_composite_copy_boxes(struct sna *sna,
157403b705cfSriastradh				     const struct sna_composite_op *op,
157503b705cfSriastradh				     const BoxRec *box, int nbox)
157603b705cfSriastradh{
157703b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
157803b705cfSriastradh	do {
157903b705cfSriastradh		DBG(("%s: box (%d, %d), (%d, %d)\n",
158003b705cfSriastradh		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
158103b705cfSriastradh		sna_blt_copy_one(sna, &op->u.blt,
158203b705cfSriastradh				 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
158303b705cfSriastradh				 box->x2 - box->x1, box->y2 - box->y1,
158403b705cfSriastradh				 box->x1 + op->dst.x, box->y1 + op->dst.y);
158503b705cfSriastradh		box++;
158603b705cfSriastradh	} while(--nbox);
158703b705cfSriastradh}
158803b705cfSriastradh
158903b705cfSriastradhstatic inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
159003b705cfSriastradh{
159103b705cfSriastradh	x += v & 0xffff;
159203b705cfSriastradh	y += v >> 16;
159303b705cfSriastradh	return (uint16_t)y << 16 | x;
159403b705cfSriastradh}
159503b705cfSriastradh
159603b705cfSriastradhstatic void blt_composite_copy_boxes__thread(struct sna *sna,
159703b705cfSriastradh					     const struct sna_composite_op *op,
159803b705cfSriastradh					     const BoxRec *box, int nbox)
159903b705cfSriastradh{
160003b705cfSriastradh	struct kgem *kgem = &sna->kgem;
160103b705cfSriastradh	int dst_dx = op->dst.x;
160203b705cfSriastradh	int dst_dy = op->dst.y;
160303b705cfSriastradh	int src_dx = op->src.offset[0];
160403b705cfSriastradh	int src_dy = op->src.offset[1];
160503b705cfSriastradh	uint32_t cmd = op->u.blt.cmd;
160603b705cfSriastradh	uint32_t br13 = op->u.blt.br13;
160703b705cfSriastradh	struct kgem_bo *src_bo = op->u.blt.bo[0];
160803b705cfSriastradh	struct kgem_bo *dst_bo = op->u.blt.bo[1];
160903b705cfSriastradh	int src_pitch = op->u.blt.pitch[0];
161003b705cfSriastradh
161103b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
161203b705cfSriastradh
161303b705cfSriastradh	sna_vertex_lock(&sna->render);
161403b705cfSriastradh
161503b705cfSriastradh	if ((dst_dx | dst_dy) == 0) {
161603b705cfSriastradh		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
161703b705cfSriastradh		do {
161813496ba1Ssnj			int nbox_this_time, rem;
161903b705cfSriastradh
162003b705cfSriastradh			nbox_this_time = nbox;
162113496ba1Ssnj			rem = kgem_batch_space(kgem);
162213496ba1Ssnj			if (8*nbox_this_time > rem)
162313496ba1Ssnj				nbox_this_time = rem / 8;
162403b705cfSriastradh			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
162503b705cfSriastradh				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
162613496ba1Ssnj			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
162713496ba1Ssnj			     __FUNCTION__, nbox_this_time, nbox, rem));
162813496ba1Ssnj			assert(nbox_this_time > 0);
162903b705cfSriastradh			nbox -= nbox_this_time;
163003b705cfSriastradh
163142542f5fSchristos			assert(sna->kgem.mode == KGEM_BLT);
163203b705cfSriastradh			do {
163303b705cfSriastradh				uint32_t *b = kgem->batch + kgem->nbatch;
163403b705cfSriastradh
163503b705cfSriastradh				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
163603b705cfSriastradh				     __FUNCTION__,
163703b705cfSriastradh				     box->x1, box->y1,
163803b705cfSriastradh				     box->x2 - box->x1, box->y2 - box->y1));
163903b705cfSriastradh
164003b705cfSriastradh				assert(box->x1 + src_dx >= 0);
164103b705cfSriastradh				assert(box->y1 + src_dy >= 0);
164203b705cfSriastradh				assert(box->x1 + src_dx <= INT16_MAX);
164303b705cfSriastradh				assert(box->y1 + src_dy <= INT16_MAX);
164403b705cfSriastradh
164503b705cfSriastradh				assert(box->x1 >= 0);
164603b705cfSriastradh				assert(box->y1 >= 0);
164703b705cfSriastradh
164803b705cfSriastradh				*(uint64_t *)&b[0] = hdr;
164903b705cfSriastradh				*(uint64_t *)&b[2] = *(const uint64_t *)box;
165003b705cfSriastradh				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
165103b705cfSriastradh						      I915_GEM_DOMAIN_RENDER << 16 |
165203b705cfSriastradh						      I915_GEM_DOMAIN_RENDER |
165303b705cfSriastradh						      KGEM_RELOC_FENCED,
165403b705cfSriastradh						      0);
165503b705cfSriastradh				b[5] = add2(b[2], src_dx, src_dy);
165603b705cfSriastradh				b[6] = src_pitch;
165703b705cfSriastradh				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
165803b705cfSriastradh						      I915_GEM_DOMAIN_RENDER << 16 |
165903b705cfSriastradh						      KGEM_RELOC_FENCED,
166003b705cfSriastradh						      0);
166103b705cfSriastradh				kgem->nbatch += 8;
166203b705cfSriastradh				assert(kgem->nbatch < kgem->surface);
166303b705cfSriastradh				box++;
166403b705cfSriastradh			} while (--nbox_this_time);
166503b705cfSriastradh
166603b705cfSriastradh			if (!nbox)
166703b705cfSriastradh				break;
166803b705cfSriastradh
166903b705cfSriastradh			_kgem_submit(kgem);
167003b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
167103b705cfSriastradh		} while (1);
167203b705cfSriastradh	} else {
167303b705cfSriastradh		do {
167413496ba1Ssnj			int nbox_this_time, rem;
167503b705cfSriastradh
167603b705cfSriastradh			nbox_this_time = nbox;
167713496ba1Ssnj			rem = kgem_batch_space(kgem);
167813496ba1Ssnj			if (8*nbox_this_time > rem)
167913496ba1Ssnj				nbox_this_time = rem / 8;
168003b705cfSriastradh			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
168103b705cfSriastradh				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
168213496ba1Ssnj			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
168313496ba1Ssnj			     __FUNCTION__, nbox_this_time, nbox, rem));
168413496ba1Ssnj			assert(nbox_this_time > 0);
168503b705cfSriastradh			nbox -= nbox_this_time;
168603b705cfSriastradh
168742542f5fSchristos			assert(sna->kgem.mode == KGEM_BLT);
168803b705cfSriastradh			do {
168903b705cfSriastradh				uint32_t *b = kgem->batch + kgem->nbatch;
169003b705cfSriastradh
169103b705cfSriastradh				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
169203b705cfSriastradh				     __FUNCTION__,
169303b705cfSriastradh				     box->x1, box->y1,
169403b705cfSriastradh				     box->x2 - box->x1, box->y2 - box->y1));
169503b705cfSriastradh
169603b705cfSriastradh				assert(box->x1 + src_dx >= 0);
169703b705cfSriastradh				assert(box->y1 + src_dy >= 0);
169803b705cfSriastradh
169903b705cfSriastradh				assert(box->x1 + dst_dx >= 0);
170003b705cfSriastradh				assert(box->y1 + dst_dy >= 0);
170103b705cfSriastradh
170203b705cfSriastradh				b[0] = cmd;
170303b705cfSriastradh				b[1] = br13;
170403b705cfSriastradh				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
170503b705cfSriastradh				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
170603b705cfSriastradh				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
170703b705cfSriastradh						      I915_GEM_DOMAIN_RENDER << 16 |
170803b705cfSriastradh						      I915_GEM_DOMAIN_RENDER |
170903b705cfSriastradh						      KGEM_RELOC_FENCED,
171003b705cfSriastradh						      0);
171103b705cfSriastradh				b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
171203b705cfSriastradh				b[6] = src_pitch;
171303b705cfSriastradh				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
171403b705cfSriastradh						      I915_GEM_DOMAIN_RENDER << 16 |
171503b705cfSriastradh						      KGEM_RELOC_FENCED,
171603b705cfSriastradh						      0);
171703b705cfSriastradh				kgem->nbatch += 8;
171803b705cfSriastradh				assert(kgem->nbatch < kgem->surface);
171903b705cfSriastradh				box++;
172003b705cfSriastradh			} while (--nbox_this_time);
172103b705cfSriastradh
172203b705cfSriastradh			if (!nbox)
172303b705cfSriastradh				break;
172403b705cfSriastradh
172503b705cfSriastradh			_kgem_submit(kgem);
172603b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
172703b705cfSriastradh		} while (1);
172803b705cfSriastradh	}
172903b705cfSriastradh	sna_vertex_unlock(&sna->render);
173003b705cfSriastradh}
173103b705cfSriastradh
173242542f5fSchristosstatic void blt_composite_copy_boxes__thread64(struct sna *sna,
173342542f5fSchristos					       const struct sna_composite_op *op,
173442542f5fSchristos					       const BoxRec *box, int nbox)
173503b705cfSriastradh{
173642542f5fSchristos	struct kgem *kgem = &sna->kgem;
173742542f5fSchristos	int dst_dx = op->dst.x;
173842542f5fSchristos	int dst_dy = op->dst.y;
173942542f5fSchristos	int src_dx = op->src.offset[0];
174042542f5fSchristos	int src_dy = op->src.offset[1];
174142542f5fSchristos	uint32_t cmd = op->u.blt.cmd;
174242542f5fSchristos	uint32_t br13 = op->u.blt.br13;
174342542f5fSchristos	struct kgem_bo *src_bo = op->u.blt.bo[0];
174442542f5fSchristos	struct kgem_bo *dst_bo = op->u.blt.bo[1];
174542542f5fSchristos	int src_pitch = op->u.blt.pitch[0];
174603b705cfSriastradh
174742542f5fSchristos	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
174803b705cfSriastradh
174942542f5fSchristos	sna_vertex_lock(&sna->render);
175003b705cfSriastradh
175142542f5fSchristos	if ((dst_dx | dst_dy) == 0) {
175242542f5fSchristos		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
175342542f5fSchristos		do {
175413496ba1Ssnj			int nbox_this_time, rem;
175503b705cfSriastradh
175642542f5fSchristos			nbox_this_time = nbox;
175713496ba1Ssnj			rem = kgem_batch_space(kgem);
175813496ba1Ssnj			if (10*nbox_this_time > rem)
175913496ba1Ssnj				nbox_this_time = rem / 10;
176042542f5fSchristos			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
176142542f5fSchristos				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
176213496ba1Ssnj			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
176313496ba1Ssnj			     __FUNCTION__, nbox_this_time, nbox, rem));
176413496ba1Ssnj			assert(nbox_this_time > 0);
176542542f5fSchristos			nbox -= nbox_this_time;
176603b705cfSriastradh
176742542f5fSchristos			assert(kgem->mode == KGEM_BLT);
176842542f5fSchristos			do {
176942542f5fSchristos				uint32_t *b = kgem->batch + kgem->nbatch;
177003b705cfSriastradh
177142542f5fSchristos				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
177242542f5fSchristos				     __FUNCTION__,
177342542f5fSchristos				     box->x1, box->y1,
177442542f5fSchristos				     box->x2 - box->x1, box->y2 - box->y1));
177503b705cfSriastradh
177642542f5fSchristos				assert(box->x1 + src_dx >= 0);
177742542f5fSchristos				assert(box->y1 + src_dy >= 0);
177842542f5fSchristos				assert(box->x1 + src_dx <= INT16_MAX);
177942542f5fSchristos				assert(box->y1 + src_dy <= INT16_MAX);
178003b705cfSriastradh
178142542f5fSchristos				assert(box->x1 >= 0);
178242542f5fSchristos				assert(box->y1 >= 0);
178303b705cfSriastradh
178442542f5fSchristos				*(uint64_t *)&b[0] = hdr;
178542542f5fSchristos				*(uint64_t *)&b[2] = *(const uint64_t *)box;
178642542f5fSchristos				*(uint64_t *)(b+4) =
178742542f5fSchristos					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
178842542f5fSchristos							 I915_GEM_DOMAIN_RENDER << 16 |
178942542f5fSchristos							 I915_GEM_DOMAIN_RENDER |
179042542f5fSchristos							 KGEM_RELOC_FENCED,
179142542f5fSchristos							 0);
179242542f5fSchristos				b[6] = add2(b[2], src_dx, src_dy);
179342542f5fSchristos				b[7] = src_pitch;
179442542f5fSchristos				*(uint64_t *)(b+8) =
179542542f5fSchristos					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
179642542f5fSchristos							 I915_GEM_DOMAIN_RENDER << 16 |
179742542f5fSchristos							 KGEM_RELOC_FENCED,
179842542f5fSchristos							 0);
179942542f5fSchristos				kgem->nbatch += 10;
180042542f5fSchristos				assert(kgem->nbatch < kgem->surface);
180142542f5fSchristos				box++;
180242542f5fSchristos			} while (--nbox_this_time);
180303b705cfSriastradh
180442542f5fSchristos			if (!nbox)
180542542f5fSchristos				break;
180603b705cfSriastradh
180742542f5fSchristos			_kgem_submit(kgem);
180842542f5fSchristos			_kgem_set_mode(kgem, KGEM_BLT);
180942542f5fSchristos		} while (1);
181042542f5fSchristos	} else {
181142542f5fSchristos		do {
181213496ba1Ssnj			int nbox_this_time, rem;
181342542f5fSchristos
181442542f5fSchristos			nbox_this_time = nbox;
181513496ba1Ssnj			rem = kgem_batch_space(kgem);
181613496ba1Ssnj			if (10*nbox_this_time > rem)
181713496ba1Ssnj				nbox_this_time = rem / 10;
181842542f5fSchristos			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
181942542f5fSchristos				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
182013496ba1Ssnj			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
182113496ba1Ssnj			     __FUNCTION__, nbox_this_time, nbox, rem));
182213496ba1Ssnj			assert(nbox_this_time > 0);
182342542f5fSchristos			nbox -= nbox_this_time;
182442542f5fSchristos
182542542f5fSchristos			assert(kgem->mode == KGEM_BLT);
182642542f5fSchristos			do {
182742542f5fSchristos				uint32_t *b = kgem->batch + kgem->nbatch;
182842542f5fSchristos
182942542f5fSchristos				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
183042542f5fSchristos				     __FUNCTION__,
183142542f5fSchristos				     box->x1, box->y1,
183242542f5fSchristos				     box->x2 - box->x1, box->y2 - box->y1));
183342542f5fSchristos
183442542f5fSchristos				assert(box->x1 + src_dx >= 0);
183542542f5fSchristos				assert(box->y1 + src_dy >= 0);
183642542f5fSchristos
183742542f5fSchristos				assert(box->x1 + dst_dx >= 0);
183842542f5fSchristos				assert(box->y1 + dst_dy >= 0);
183942542f5fSchristos
184042542f5fSchristos				b[0] = cmd;
184142542f5fSchristos				b[1] = br13;
184242542f5fSchristos				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
184342542f5fSchristos				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
184442542f5fSchristos				*(uint64_t *)(b+4) =
184542542f5fSchristos					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
184642542f5fSchristos							 I915_GEM_DOMAIN_RENDER << 16 |
184742542f5fSchristos							 I915_GEM_DOMAIN_RENDER |
184842542f5fSchristos							 KGEM_RELOC_FENCED,
184942542f5fSchristos							 0);
185042542f5fSchristos				b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
185142542f5fSchristos				b[7] = src_pitch;
185242542f5fSchristos				*(uint64_t *)(b+8) =
185342542f5fSchristos					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
185442542f5fSchristos							 I915_GEM_DOMAIN_RENDER << 16 |
185542542f5fSchristos							 KGEM_RELOC_FENCED,
185642542f5fSchristos							 0);
185742542f5fSchristos				kgem->nbatch += 10;
185842542f5fSchristos				assert(kgem->nbatch < kgem->surface);
185942542f5fSchristos				box++;
186042542f5fSchristos			} while (--nbox_this_time);
186142542f5fSchristos
186242542f5fSchristos			if (!nbox)
186342542f5fSchristos				break;
186442542f5fSchristos
186542542f5fSchristos			_kgem_submit(kgem);
186642542f5fSchristos			_kgem_set_mode(kgem, KGEM_BLT);
186742542f5fSchristos		} while (1);
186842542f5fSchristos	}
186942542f5fSchristos	sna_vertex_unlock(&sna->render);
187042542f5fSchristos}
187142542f5fSchristos
187242542f5fSchristosfastcall static void
187342542f5fSchristosblt_composite_copy_with_alpha(struct sna *sna,
187442542f5fSchristos			      const struct sna_composite_op *op,
187542542f5fSchristos			      const struct sna_composite_rectangles *r)
187642542f5fSchristos{
187742542f5fSchristos	int x1, x2, y1, y2;
187842542f5fSchristos	int src_x, src_y;
187942542f5fSchristos
188042542f5fSchristos	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
188142542f5fSchristos	     __FUNCTION__,
188242542f5fSchristos	     r->src.x, r->src.y,
188342542f5fSchristos	     r->dst.x, r->dst.y,
188442542f5fSchristos	     r->width, r->height));
188542542f5fSchristos
188642542f5fSchristos	/* XXX higher layer should have clipped? */
188742542f5fSchristos
188842542f5fSchristos	x1 = r->dst.x + op->dst.x;
188942542f5fSchristos	y1 = r->dst.y + op->dst.y;
189042542f5fSchristos	x2 = x1 + r->width;
189142542f5fSchristos	y2 = y1 + r->height;
189242542f5fSchristos
189342542f5fSchristos	src_x = r->src.x - x1 + op->u.blt.sx;
189442542f5fSchristos	src_y = r->src.y - y1 + op->u.blt.sy;
189542542f5fSchristos
189642542f5fSchristos	/* clip against dst */
189742542f5fSchristos	if (x1 < 0)
189842542f5fSchristos		x1 = 0;
189942542f5fSchristos	if (y1 < 0)
190042542f5fSchristos		y1 = 0;
190142542f5fSchristos
190242542f5fSchristos	if (x2 > op->dst.width)
190342542f5fSchristos		x2 = op->dst.width;
190442542f5fSchristos
190542542f5fSchristos	if (y2 > op->dst.height)
190642542f5fSchristos		y2 = op->dst.height;
190742542f5fSchristos
190842542f5fSchristos	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
190942542f5fSchristos
191042542f5fSchristos	if (x2 <= x1 || y2 <= y1)
191142542f5fSchristos		return;
191242542f5fSchristos
191342542f5fSchristos	sna_blt_alpha_fixup_one(sna, &op->u.blt,
191442542f5fSchristos				x1 + src_x, y1 + src_y,
191542542f5fSchristos				x2 - x1, y2 - y1,
191642542f5fSchristos				x1, y1);
191742542f5fSchristos}
191842542f5fSchristos
191942542f5fSchristosfastcall static void
192042542f5fSchristosblt_composite_copy_box_with_alpha(struct sna *sna,
192142542f5fSchristos				  const struct sna_composite_op *op,
192242542f5fSchristos				  const BoxRec *box)
192342542f5fSchristos{
192442542f5fSchristos	DBG(("%s: box (%d, %d), (%d, %d)\n",
192542542f5fSchristos	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
192642542f5fSchristos	sna_blt_alpha_fixup_one(sna, &op->u.blt,
192742542f5fSchristos				box->x1 + op->u.blt.sx,
192842542f5fSchristos				box->y1 + op->u.blt.sy,
192942542f5fSchristos				box->x2 - box->x1,
193042542f5fSchristos				box->y2 - box->y1,
193142542f5fSchristos				box->x1 + op->dst.x,
193242542f5fSchristos				box->y1 + op->dst.y);
193342542f5fSchristos}
193442542f5fSchristos
193542542f5fSchristosstatic void
193642542f5fSchristosblt_composite_copy_boxes_with_alpha(struct sna *sna,
193742542f5fSchristos				    const struct sna_composite_op *op,
193842542f5fSchristos				    const BoxRec *box, int nbox)
193903b705cfSriastradh{
194003b705cfSriastradh	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
194103b705cfSriastradh	do {
194203b705cfSriastradh		DBG(("%s: box (%d, %d), (%d, %d)\n",
194303b705cfSriastradh		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
194403b705cfSriastradh		sna_blt_alpha_fixup_one(sna, &op->u.blt,
194503b705cfSriastradh					box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
194603b705cfSriastradh					box->x2 - box->x1, box->y2 - box->y1,
194703b705cfSriastradh					box->x1 + op->dst.x, box->y1 + op->dst.y);
194803b705cfSriastradh		box++;
194903b705cfSriastradh	} while(--nbox);
195003b705cfSriastradh}
195103b705cfSriastradh
195203b705cfSriastradhstatic bool
195303b705cfSriastradhprepare_blt_copy(struct sna *sna,
195403b705cfSriastradh		 struct sna_composite_op *op,
195503b705cfSriastradh		 struct kgem_bo *bo,
195603b705cfSriastradh		 uint32_t alpha_fixup)
195703b705cfSriastradh{
195803b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
195903b705cfSriastradh
196003b705cfSriastradh	assert(op->dst.bo);
196103b705cfSriastradh	assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo));
196203b705cfSriastradh	assert(kgem_bo_can_blt(&sna->kgem, bo));
196303b705cfSriastradh
196442542f5fSchristos	kgem_set_mode(&sna->kgem, KGEM_BLT, op->dst.bo);
196503b705cfSriastradh	if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) {
196603b705cfSriastradh		kgem_submit(&sna->kgem);
196703b705cfSriastradh		if (!kgem_check_many_bo_fenced(&sna->kgem,
196803b705cfSriastradh					       op->dst.bo, bo, NULL)) {
196903b705cfSriastradh			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
197042542f5fSchristos			return sna_tiling_blt_composite(sna, op, bo,
197142542f5fSchristos							src->drawable.bitsPerPixel,
197242542f5fSchristos							alpha_fixup);
197303b705cfSriastradh		}
197403b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_BLT);
197503b705cfSriastradh	}
197603b705cfSriastradh
197703b705cfSriastradh	DBG(("%s\n", __FUNCTION__));
197803b705cfSriastradh
197903b705cfSriastradh	if (sna->kgem.gen >= 060 && op->dst.bo == bo)
198003b705cfSriastradh		op->done = gen6_blt_copy_done;
198103b705cfSriastradh	else
198203b705cfSriastradh		op->done = nop_done;
198303b705cfSriastradh
198403b705cfSriastradh	if (alpha_fixup) {
198503b705cfSriastradh		op->blt   = blt_composite_copy_with_alpha;
198603b705cfSriastradh		op->box   = blt_composite_copy_box_with_alpha;
198703b705cfSriastradh		op->boxes = blt_composite_copy_boxes_with_alpha;
198803b705cfSriastradh
198903b705cfSriastradh		if (!sna_blt_alpha_fixup_init(sna, &op->u.blt, bo, op->dst.bo,
199003b705cfSriastradh					      src->drawable.bitsPerPixel,
199103b705cfSriastradh					      alpha_fixup))
199203b705cfSriastradh			return false;
199303b705cfSriastradh	} else {
199403b705cfSriastradh		op->blt   = blt_composite_copy;
199503b705cfSriastradh		op->box   = blt_composite_copy_box;
199603b705cfSriastradh		op->boxes = blt_composite_copy_boxes;
199742542f5fSchristos		if (sna->kgem.gen >= 0100)
199842542f5fSchristos			op->thread_boxes = blt_composite_copy_boxes__thread64;
199942542f5fSchristos		else
200042542f5fSchristos			op->thread_boxes = blt_composite_copy_boxes__thread;
200103b705cfSriastradh
200203b705cfSriastradh		if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
200303b705cfSriastradh				       src->drawable.bitsPerPixel,
200403b705cfSriastradh				       GXcopy))
200503b705cfSriastradh			return false;
200603b705cfSriastradh	}
200703b705cfSriastradh
200803b705cfSriastradh	return true;
200903b705cfSriastradh}
201003b705cfSriastradh
201103b705cfSriastradhfastcall static void
201203b705cfSriastradhblt_put_composite__cpu(struct sna *sna,
201303b705cfSriastradh		       const struct sna_composite_op *op,
201403b705cfSriastradh		       const struct sna_composite_rectangles *r)
201503b705cfSriastradh{
201603b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
201703b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
201842542f5fSchristos	assert(src->devPrivate.ptr);
201942542f5fSchristos	assert(src->devKind);
202042542f5fSchristos	assert(dst->devPrivate.ptr);
202142542f5fSchristos	assert(dst->devKind);
202203b705cfSriastradh	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
202303b705cfSriastradh		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
202403b705cfSriastradh		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
202503b705cfSriastradh		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
202603b705cfSriastradh		   r->width, r->height);
202703b705cfSriastradh}
202803b705cfSriastradh
202903b705cfSriastradhfastcall static void
203003b705cfSriastradhblt_put_composite_box__cpu(struct sna *sna,
203103b705cfSriastradh			   const struct sna_composite_op *op,
203203b705cfSriastradh			   const BoxRec *box)
203303b705cfSriastradh{
203403b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
203503b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
203642542f5fSchristos	assert(src->devPrivate.ptr);
203742542f5fSchristos	assert(src->devKind);
203842542f5fSchristos	assert(dst->devPrivate.ptr);
203942542f5fSchristos	assert(dst->devKind);
204003b705cfSriastradh	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
204103b705cfSriastradh		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
204203b705cfSriastradh		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
204303b705cfSriastradh		   box->x1 + op->dst.x, box->y1 + op->dst.y,
204403b705cfSriastradh		   box->x2-box->x1, box->y2-box->y1);
204503b705cfSriastradh}
204603b705cfSriastradh
204703b705cfSriastradhstatic void
204803b705cfSriastradhblt_put_composite_boxes__cpu(struct sna *sna,
204903b705cfSriastradh			     const struct sna_composite_op *op,
205003b705cfSriastradh			     const BoxRec *box, int n)
205103b705cfSriastradh{
205203b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
205303b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
205442542f5fSchristos	assert(src->devPrivate.ptr);
205542542f5fSchristos	assert(src->devKind);
205642542f5fSchristos	assert(dst->devPrivate.ptr);
205742542f5fSchristos	assert(dst->devKind);
205803b705cfSriastradh	do {
205903b705cfSriastradh		memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
206003b705cfSriastradh			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
206103b705cfSriastradh			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
206203b705cfSriastradh			   box->x1 + op->dst.x, box->y1 + op->dst.y,
206303b705cfSriastradh			   box->x2-box->x1, box->y2-box->y1);
206403b705cfSriastradh		box++;
206503b705cfSriastradh	} while (--n);
206603b705cfSriastradh}
206703b705cfSriastradh
206803b705cfSriastradhfastcall static void
206903b705cfSriastradhblt_put_composite_with_alpha__cpu(struct sna *sna,
207003b705cfSriastradh				  const struct sna_composite_op *op,
207103b705cfSriastradh				  const struct sna_composite_rectangles *r)
207203b705cfSriastradh{
207303b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
207403b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
207542542f5fSchristos	assert(src->devPrivate.ptr);
207642542f5fSchristos	assert(src->devKind);
207742542f5fSchristos	assert(dst->devPrivate.ptr);
207842542f5fSchristos	assert(dst->devKind);
207903b705cfSriastradh	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
208003b705cfSriastradh		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
208103b705cfSriastradh		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
208203b705cfSriastradh		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
208303b705cfSriastradh		   r->width, r->height,
208403b705cfSriastradh		   0xffffffff, op->u.blt.pixel);
208503b705cfSriastradh
208603b705cfSriastradh}
208703b705cfSriastradh
208803b705cfSriastradhfastcall static void
208903b705cfSriastradhblt_put_composite_box_with_alpha__cpu(struct sna *sna,
209003b705cfSriastradh				      const struct sna_composite_op *op,
209103b705cfSriastradh				      const BoxRec *box)
209203b705cfSriastradh{
209303b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
209403b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
209542542f5fSchristos	assert(src->devPrivate.ptr);
209642542f5fSchristos	assert(src->devKind);
209742542f5fSchristos	assert(dst->devPrivate.ptr);
209842542f5fSchristos	assert(dst->devKind);
209903b705cfSriastradh	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
210003b705cfSriastradh		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
210103b705cfSriastradh		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
210203b705cfSriastradh		   box->x1 + op->dst.x, box->y1 + op->dst.y,
210303b705cfSriastradh		   box->x2-box->x1, box->y2-box->y1,
210403b705cfSriastradh		   0xffffffff, op->u.blt.pixel);
210503b705cfSriastradh}
210603b705cfSriastradh
210703b705cfSriastradhstatic void
210803b705cfSriastradhblt_put_composite_boxes_with_alpha__cpu(struct sna *sna,
210903b705cfSriastradh					const struct sna_composite_op *op,
211003b705cfSriastradh					const BoxRec *box, int n)
211103b705cfSriastradh{
211203b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
211303b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
211442542f5fSchristos	assert(src->devPrivate.ptr);
211542542f5fSchristos	assert(src->devKind);
211642542f5fSchristos	assert(dst->devPrivate.ptr);
211742542f5fSchristos	assert(dst->devKind);
211803b705cfSriastradh	do {
211903b705cfSriastradh		memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
212003b705cfSriastradh			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
212103b705cfSriastradh			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
212203b705cfSriastradh			   box->x1 + op->dst.x, box->y1 + op->dst.y,
212303b705cfSriastradh			   box->x2-box->x1, box->y2-box->y1,
212403b705cfSriastradh			   0xffffffff, op->u.blt.pixel);
212503b705cfSriastradh		box++;
212603b705cfSriastradh	} while (--n);
212703b705cfSriastradh}
212803b705cfSriastradh
212903b705cfSriastradhfastcall static void
213003b705cfSriastradhblt_put_composite(struct sna *sna,
213103b705cfSriastradh		  const struct sna_composite_op *op,
213203b705cfSriastradh		  const struct sna_composite_rectangles *r)
213303b705cfSriastradh{
213403b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
213503b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
213603b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(dst);
213703b705cfSriastradh	int pitch = src->devKind;
213803b705cfSriastradh	char *data = src->devPrivate.ptr;
213903b705cfSriastradh	int bpp = src->drawable.bitsPerPixel;
214003b705cfSriastradh
214103b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
214203b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
214303b705cfSriastradh	int16_t src_x = r->src.x + op->u.blt.sx;
214403b705cfSriastradh	int16_t src_y = r->src.y + op->u.blt.sy;
214503b705cfSriastradh
214603b705cfSriastradh	if (!dst_priv->pinned &&
214703b705cfSriastradh	    dst_x <= 0 && dst_y <= 0 &&
214803b705cfSriastradh	    dst_x + r->width >= op->dst.width &&
214903b705cfSriastradh	    dst_y + r->height >= op->dst.height) {
215003b705cfSriastradh		data += (src_x - dst_x) * bpp / 8;
215103b705cfSriastradh		data += (src_y - dst_y) * pitch;
215203b705cfSriastradh
215342542f5fSchristos		assert(op->dst.bo == dst_priv->gpu_bo);
215442542f5fSchristos		sna_replace(sna, op->dst.pixmap, data, pitch);
215503b705cfSriastradh	} else {
215603b705cfSriastradh		BoxRec box;
215703b705cfSriastradh		bool ok;
215803b705cfSriastradh
215903b705cfSriastradh		box.x1 = dst_x;
216003b705cfSriastradh		box.y1 = dst_y;
216103b705cfSriastradh		box.x2 = dst_x + r->width;
216203b705cfSriastradh		box.y2 = dst_y + r->height;
216303b705cfSriastradh
216403b705cfSriastradh		ok = sna_write_boxes(sna, dst,
216503b705cfSriastradh				     dst_priv->gpu_bo, 0, 0,
216603b705cfSriastradh				     data, pitch, src_x, src_y,
216703b705cfSriastradh				     &box, 1);
216803b705cfSriastradh		assert(ok);
216903b705cfSriastradh		(void)ok;
217003b705cfSriastradh	}
217103b705cfSriastradh}
217203b705cfSriastradh
217303b705cfSriastradhfastcall static void blt_put_composite_box(struct sna *sna,
217403b705cfSriastradh					   const struct sna_composite_op *op,
217503b705cfSriastradh					   const BoxRec *box)
217603b705cfSriastradh{
217703b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
217803b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
217903b705cfSriastradh
218003b705cfSriastradh	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
218103b705cfSriastradh	     op->u.blt.sx, op->u.blt.sy,
218203b705cfSriastradh	     op->dst.x, op->dst.y));
218303b705cfSriastradh
218442542f5fSchristos	assert(src->devPrivate.ptr);
218542542f5fSchristos	assert(src->devKind);
218603b705cfSriastradh	if (!dst_priv->pinned &&
218703b705cfSriastradh	    box->x2 - box->x1 == op->dst.width &&
218803b705cfSriastradh	    box->y2 - box->y1 == op->dst.height) {
218903b705cfSriastradh		int pitch = src->devKind;
219003b705cfSriastradh		int bpp = src->drawable.bitsPerPixel / 8;
219103b705cfSriastradh		char *data = src->devPrivate.ptr;
219203b705cfSriastradh
219303b705cfSriastradh		data += (box->y1 + op->u.blt.sy) * pitch;
219403b705cfSriastradh		data += (box->x1 + op->u.blt.sx) * bpp;
219503b705cfSriastradh
219642542f5fSchristos		assert(op->dst.bo == dst_priv->gpu_bo);
219742542f5fSchristos		sna_replace(sna, op->dst.pixmap, data, pitch);
219803b705cfSriastradh	} else {
219903b705cfSriastradh		bool ok;
220003b705cfSriastradh
220103b705cfSriastradh		ok = sna_write_boxes(sna, op->dst.pixmap,
220203b705cfSriastradh				     op->dst.bo, op->dst.x, op->dst.y,
220303b705cfSriastradh				     src->devPrivate.ptr,
220403b705cfSriastradh				     src->devKind,
220503b705cfSriastradh				     op->u.blt.sx, op->u.blt.sy,
220603b705cfSriastradh				     box, 1);
220703b705cfSriastradh		assert(ok);
220803b705cfSriastradh		(void)ok;
220903b705cfSriastradh	}
221003b705cfSriastradh}
221103b705cfSriastradh
221203b705cfSriastradhstatic void blt_put_composite_boxes(struct sna *sna,
221303b705cfSriastradh				    const struct sna_composite_op *op,
221403b705cfSriastradh				    const BoxRec *box, int n)
221503b705cfSriastradh{
221603b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
221703b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
221803b705cfSriastradh
221903b705cfSriastradh	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
222003b705cfSriastradh	     op->u.blt.sx, op->u.blt.sy,
222103b705cfSriastradh	     op->dst.x, op->dst.y,
222203b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2, n));
222303b705cfSriastradh
222442542f5fSchristos	assert(src->devPrivate.ptr);
222542542f5fSchristos	assert(src->devKind);
222603b705cfSriastradh	if (n == 1 && !dst_priv->pinned &&
222703b705cfSriastradh	    box->x2 - box->x1 == op->dst.width &&
222803b705cfSriastradh	    box->y2 - box->y1 == op->dst.height) {
222903b705cfSriastradh		int pitch = src->devKind;
223003b705cfSriastradh		int bpp = src->drawable.bitsPerPixel / 8;
223103b705cfSriastradh		char *data = src->devPrivate.ptr;
223203b705cfSriastradh
223303b705cfSriastradh		data += (box->y1 + op->u.blt.sy) * pitch;
223403b705cfSriastradh		data += (box->x1 + op->u.blt.sx) * bpp;
223503b705cfSriastradh
223642542f5fSchristos		assert(op->dst.bo == dst_priv->gpu_bo);
223742542f5fSchristos		sna_replace(sna, op->dst.pixmap, data, pitch);
223803b705cfSriastradh	} else {
223903b705cfSriastradh		bool ok;
224003b705cfSriastradh
224103b705cfSriastradh		ok = sna_write_boxes(sna, op->dst.pixmap,
224203b705cfSriastradh				     op->dst.bo, op->dst.x, op->dst.y,
224303b705cfSriastradh				     src->devPrivate.ptr,
224403b705cfSriastradh				     src->devKind,
224503b705cfSriastradh				     op->u.blt.sx, op->u.blt.sy,
224603b705cfSriastradh				     box, n);
224703b705cfSriastradh		assert(ok);
224803b705cfSriastradh		(void)ok;
224903b705cfSriastradh	}
225003b705cfSriastradh}
225103b705cfSriastradh
225203b705cfSriastradhfastcall static void
225303b705cfSriastradhblt_put_composite_with_alpha(struct sna *sna,
225403b705cfSriastradh			     const struct sna_composite_op *op,
225503b705cfSriastradh			     const struct sna_composite_rectangles *r)
225603b705cfSriastradh{
225703b705cfSriastradh	PixmapPtr dst = op->dst.pixmap;
225803b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
225903b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(dst);
226003b705cfSriastradh	int pitch = src->devKind;
226103b705cfSriastradh	char *data = src->devPrivate.ptr;
226203b705cfSriastradh
226303b705cfSriastradh	int16_t dst_x = r->dst.x + op->dst.x;
226403b705cfSriastradh	int16_t dst_y = r->dst.y + op->dst.y;
226503b705cfSriastradh	int16_t src_x = r->src.x + op->u.blt.sx;
226603b705cfSriastradh	int16_t src_y = r->src.y + op->u.blt.sy;
226703b705cfSriastradh
226842542f5fSchristos	assert(src->devPrivate.ptr);
226942542f5fSchristos	assert(src->devKind);
227042542f5fSchristos
227103b705cfSriastradh	if (!dst_priv->pinned &&
227203b705cfSriastradh	    dst_x <= 0 && dst_y <= 0 &&
227303b705cfSriastradh	    dst_x + r->width >= op->dst.width &&
227403b705cfSriastradh	    dst_y + r->height >= op->dst.height) {
227503b705cfSriastradh		int bpp = dst->drawable.bitsPerPixel / 8;
227603b705cfSriastradh
227703b705cfSriastradh		data += (src_x - dst_x) * bpp;
227803b705cfSriastradh		data += (src_y - dst_y) * pitch;
227903b705cfSriastradh
228042542f5fSchristos		assert(op->dst.bo == dst_priv->gpu_bo);
228142542f5fSchristos		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
228242542f5fSchristos				 0xffffffff, op->u.blt.pixel);
228303b705cfSriastradh	} else {
228403b705cfSriastradh		BoxRec box;
228503b705cfSriastradh
228603b705cfSriastradh		box.x1 = dst_x;
228703b705cfSriastradh		box.y1 = dst_y;
228803b705cfSriastradh		box.x2 = dst_x + r->width;
228903b705cfSriastradh		box.y2 = dst_y + r->height;
229003b705cfSriastradh
229103b705cfSriastradh		sna_write_boxes__xor(sna, dst,
229203b705cfSriastradh				     dst_priv->gpu_bo, 0, 0,
229303b705cfSriastradh				     data, pitch, src_x, src_y,
229403b705cfSriastradh				     &box, 1,
229503b705cfSriastradh				     0xffffffff, op->u.blt.pixel);
229603b705cfSriastradh	}
229703b705cfSriastradh}
229803b705cfSriastradh
229903b705cfSriastradhfastcall static void
230003b705cfSriastradhblt_put_composite_box_with_alpha(struct sna *sna,
230103b705cfSriastradh				 const struct sna_composite_op *op,
230203b705cfSriastradh				 const BoxRec *box)
230303b705cfSriastradh{
230403b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
230503b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
230603b705cfSriastradh
230703b705cfSriastradh	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
230803b705cfSriastradh	     op->u.blt.sx, op->u.blt.sy,
230903b705cfSriastradh	     op->dst.x, op->dst.y));
231003b705cfSriastradh
231142542f5fSchristos	assert(src->devPrivate.ptr);
231242542f5fSchristos	assert(src->devKind);
231342542f5fSchristos
231403b705cfSriastradh	if (!dst_priv->pinned &&
231503b705cfSriastradh	    box->x2 - box->x1 == op->dst.width &&
231603b705cfSriastradh	    box->y2 - box->y1 == op->dst.height) {
231703b705cfSriastradh		int pitch = src->devKind;
231803b705cfSriastradh		int bpp = src->drawable.bitsPerPixel / 8;
231903b705cfSriastradh		char *data = src->devPrivate.ptr;
232003b705cfSriastradh
232103b705cfSriastradh		data += (box->y1 + op->u.blt.sy) * pitch;
232203b705cfSriastradh		data += (box->x1 + op->u.blt.sx) * bpp;
232303b705cfSriastradh
232442542f5fSchristos		assert(op->dst.bo == dst_priv->gpu_bo);
232542542f5fSchristos		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
232642542f5fSchristos				 0xffffffff, op->u.blt.pixel);
232703b705cfSriastradh	} else {
232803b705cfSriastradh		sna_write_boxes__xor(sna, op->dst.pixmap,
232903b705cfSriastradh				     op->dst.bo, op->dst.x, op->dst.y,
233003b705cfSriastradh				     src->devPrivate.ptr,
233103b705cfSriastradh				     src->devKind,
233203b705cfSriastradh				     op->u.blt.sx, op->u.blt.sy,
233303b705cfSriastradh				     box, 1,
233403b705cfSriastradh				     0xffffffff, op->u.blt.pixel);
233503b705cfSriastradh	}
233603b705cfSriastradh}
233703b705cfSriastradh
233803b705cfSriastradhstatic void
233903b705cfSriastradhblt_put_composite_boxes_with_alpha(struct sna *sna,
234003b705cfSriastradh				   const struct sna_composite_op *op,
234103b705cfSriastradh				   const BoxRec *box, int n)
234203b705cfSriastradh{
234303b705cfSriastradh	PixmapPtr src = op->u.blt.src_pixmap;
234403b705cfSriastradh	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
234503b705cfSriastradh
234603b705cfSriastradh	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
234703b705cfSriastradh	     op->u.blt.sx, op->u.blt.sy,
234803b705cfSriastradh	     op->dst.x, op->dst.y,
234903b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2, n));
235003b705cfSriastradh
235142542f5fSchristos	assert(src->devPrivate.ptr);
235242542f5fSchristos	assert(src->devKind);
235342542f5fSchristos
235403b705cfSriastradh	if (n == 1 && !dst_priv->pinned &&
235503b705cfSriastradh	    box->x2 - box->x1 == op->dst.width &&
235603b705cfSriastradh	    box->y2 - box->y1 == op->dst.height) {
235703b705cfSriastradh		int pitch = src->devKind;
235803b705cfSriastradh		int bpp = src->drawable.bitsPerPixel / 8;
235903b705cfSriastradh		char *data = src->devPrivate.ptr;
236003b705cfSriastradh
236103b705cfSriastradh		data += (box->y1 + op->u.blt.sy) * pitch;
236203b705cfSriastradh		data += (box->x1 + op->u.blt.sx) * bpp;
236303b705cfSriastradh
236442542f5fSchristos		assert(dst_priv->gpu_bo == op->dst.bo);
236542542f5fSchristos		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
236642542f5fSchristos				 0xffffffff, op->u.blt.pixel);
236703b705cfSriastradh	} else {
236803b705cfSriastradh		sna_write_boxes__xor(sna, op->dst.pixmap,
236903b705cfSriastradh				     op->dst.bo, op->dst.x, op->dst.y,
237003b705cfSriastradh				     src->devPrivate.ptr,
237103b705cfSriastradh				     src->devKind,
237203b705cfSriastradh				     op->u.blt.sx, op->u.blt.sy,
237303b705cfSriastradh				     box, n,
237403b705cfSriastradh				     0xffffffff, op->u.blt.pixel);
237503b705cfSriastradh	}
237603b705cfSriastradh}
237703b705cfSriastradh
237803b705cfSriastradhstatic bool
237903b705cfSriastradhprepare_blt_put(struct sna *sna,
238003b705cfSriastradh		struct sna_composite_op *op,
238103b705cfSriastradh		uint32_t alpha_fixup)
238203b705cfSriastradh{
238303b705cfSriastradh	DBG(("%s\n", __FUNCTION__));
238403b705cfSriastradh
238542542f5fSchristos	assert(!sna_pixmap(op->dst.pixmap)->clear);
238642542f5fSchristos
238703b705cfSriastradh	if (op->dst.bo) {
238803b705cfSriastradh		assert(op->dst.bo == sna_pixmap(op->dst.pixmap)->gpu_bo);
238903b705cfSriastradh		if (alpha_fixup) {
239003b705cfSriastradh			op->u.blt.pixel = alpha_fixup;
239103b705cfSriastradh			op->blt   = blt_put_composite_with_alpha;
239203b705cfSriastradh			op->box   = blt_put_composite_box_with_alpha;
239303b705cfSriastradh			op->boxes = blt_put_composite_boxes_with_alpha;
239403b705cfSriastradh		} else {
239503b705cfSriastradh			op->blt   = blt_put_composite;
239603b705cfSriastradh			op->box   = blt_put_composite_box;
239703b705cfSriastradh			op->boxes = blt_put_composite_boxes;
239803b705cfSriastradh		}
239903b705cfSriastradh	} else {
240003b705cfSriastradh		if (alpha_fixup) {
240103b705cfSriastradh			op->u.blt.pixel = alpha_fixup;
240203b705cfSriastradh			op->blt   = blt_put_composite_with_alpha__cpu;
240303b705cfSriastradh			op->box   = blt_put_composite_box_with_alpha__cpu;
240403b705cfSriastradh			op->boxes = blt_put_composite_boxes_with_alpha__cpu;
240503b705cfSriastradh		} else {
240603b705cfSriastradh			op->blt   = blt_put_composite__cpu;
240703b705cfSriastradh			op->box   = blt_put_composite_box__cpu;
240803b705cfSriastradh			op->boxes = blt_put_composite_boxes__cpu;
240903b705cfSriastradh		}
241003b705cfSriastradh	}
241103b705cfSriastradh	op->done = nop_done;
241203b705cfSriastradh
241303b705cfSriastradh	return true;
241403b705cfSriastradh}
241503b705cfSriastradh
241603b705cfSriastradhstatic bool
241703b705cfSriastradhis_clear(PixmapPtr pixmap)
241803b705cfSriastradh{
241903b705cfSriastradh	struct sna_pixmap *priv = sna_pixmap(pixmap);
242003b705cfSriastradh	return priv && priv->clear;
242103b705cfSriastradh}
242203b705cfSriastradh
242342542f5fSchristosstatic inline uint32_t
242442542f5fSchristosover(uint32_t src, uint32_t dst)
242542542f5fSchristos{
242642542f5fSchristos	uint32_t a = ~src >> 24;
242742542f5fSchristos
242842542f5fSchristos#define G_SHIFT 8
242942542f5fSchristos#define RB_MASK 0xff00ff
243042542f5fSchristos#define RB_ONE_HALF 0x800080
243142542f5fSchristos#define RB_MASK_PLUS_ONE 0x10000100
243242542f5fSchristos
243342542f5fSchristos#define UN8_rb_MUL_UN8(x, a, t) do {				\
243442542f5fSchristos	t  = ((x) & RB_MASK) * (a);				\
243542542f5fSchristos	t += RB_ONE_HALF;					\
243642542f5fSchristos	x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;	\
243742542f5fSchristos	x &= RB_MASK;						\
243842542f5fSchristos} while (0)
243942542f5fSchristos
244042542f5fSchristos#define UN8_rb_ADD_UN8_rb(x, y, t) do {				\
244142542f5fSchristos	t = ((x) + (y));					\
244242542f5fSchristos	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);	\
244342542f5fSchristos	x = (t & RB_MASK);					\
244442542f5fSchristos} while (0)
244542542f5fSchristos
244642542f5fSchristos#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) do {			\
244742542f5fSchristos	uint32_t r1__, r2__, r3__, t__;				\
244842542f5fSchristos	\
244942542f5fSchristos	r1__ = (x);						\
245042542f5fSchristos	r2__ = (y) & RB_MASK;					\
245142542f5fSchristos	UN8_rb_MUL_UN8(r1__, (a), t__);				\
245242542f5fSchristos	UN8_rb_ADD_UN8_rb(r1__, r2__, t__);			\
245342542f5fSchristos	\
245442542f5fSchristos	r2__ = (x) >> G_SHIFT;					\
245542542f5fSchristos	r3__ = ((y) >> G_SHIFT) & RB_MASK;			\
245642542f5fSchristos	UN8_rb_MUL_UN8(r2__, (a), t__);				\
245742542f5fSchristos	UN8_rb_ADD_UN8_rb(r2__, r3__, t__);			\
245842542f5fSchristos	\
245942542f5fSchristos	(x) = r1__ | (r2__ << G_SHIFT);				\
246042542f5fSchristos} while (0)
246142542f5fSchristos
246242542f5fSchristos	UN8x4_MUL_UN8_ADD_UN8x4(dst, a, src);
246342542f5fSchristos
246442542f5fSchristos	return dst;
246542542f5fSchristos}
246642542f5fSchristos
246742542f5fSchristosstatic inline uint32_t
246842542f5fSchristosadd(uint32_t src, uint32_t dst)
246942542f5fSchristos{
247042542f5fSchristos#define UN8x4_ADD_UN8x4(x, y) do {				\
247142542f5fSchristos	uint32_t r1__, r2__, r3__, t__;				\
247242542f5fSchristos	\
247342542f5fSchristos	r1__ = (x) & RB_MASK;					\
247442542f5fSchristos	r2__ = (y) & RB_MASK;					\
247542542f5fSchristos	UN8_rb_ADD_UN8_rb(r1__, r2__, t__);			\
247642542f5fSchristos	\
247742542f5fSchristos	r2__ = ((x) >> G_SHIFT) & RB_MASK;			\
247842542f5fSchristos	r3__ = ((y) >> G_SHIFT) & RB_MASK;			\
247942542f5fSchristos	UN8_rb_ADD_UN8_rb(r2__, r3__, t__);			\
248042542f5fSchristos	\
248142542f5fSchristos	x = r1__ | (r2__ << G_SHIFT);				\
248242542f5fSchristos} while (0)
248342542f5fSchristos
248442542f5fSchristos	UN8x4_ADD_UN8x4(src, dst);
248542542f5fSchristos	return src;
248642542f5fSchristos}
248742542f5fSchristos
248803b705cfSriastradhbool
248903b705cfSriastradhsna_blt_composite(struct sna *sna,
249003b705cfSriastradh		  uint32_t op,
249103b705cfSriastradh		  PicturePtr src,
249203b705cfSriastradh		  PicturePtr dst,
249303b705cfSriastradh		  int16_t x, int16_t y,
249403b705cfSriastradh		  int16_t dst_x, int16_t dst_y,
249503b705cfSriastradh		  int16_t width, int16_t height,
249642542f5fSchristos		  unsigned flags,
249742542f5fSchristos		  struct sna_composite_op *tmp)
249803b705cfSriastradh{
249903b705cfSriastradh	PictFormat src_format = src->format;
250003b705cfSriastradh	PixmapPtr src_pixmap;
250103b705cfSriastradh	struct kgem_bo *bo;
250203b705cfSriastradh	int16_t tx, ty;
250303b705cfSriastradh	BoxRec dst_box, src_box;
250403b705cfSriastradh	uint32_t alpha_fixup;
250503b705cfSriastradh	uint32_t color, hint;
250603b705cfSriastradh	bool was_clear;
250703b705cfSriastradh	bool ret;
250803b705cfSriastradh
250903b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
251003b705cfSriastradh	return false;
251103b705cfSriastradh#endif
251203b705cfSriastradh	DBG(("%s (%d, %d), (%d, %d), %dx%d\n",
251303b705cfSriastradh	     __FUNCTION__, x, y, dst_x, dst_y, width, height));
251403b705cfSriastradh
251503b705cfSriastradh	switch (dst->pDrawable->bitsPerPixel) {
251603b705cfSriastradh	case 8:
251703b705cfSriastradh	case 16:
251803b705cfSriastradh	case 32:
251903b705cfSriastradh		break;
252003b705cfSriastradh	default:
252103b705cfSriastradh		DBG(("%s: unhandled bpp: %d\n", __FUNCTION__,
252203b705cfSriastradh		     dst->pDrawable->bitsPerPixel));
252303b705cfSriastradh		return false;
252403b705cfSriastradh	}
252503b705cfSriastradh
252603b705cfSriastradh	tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
252742542f5fSchristos	was_clear = is_clear(tmp->dst.pixmap);
252803b705cfSriastradh
252903b705cfSriastradh	if (width | height) {
253003b705cfSriastradh		dst_box.x1 = dst_x;
253103b705cfSriastradh		dst_box.x2 = bound(dst_x, width);
253203b705cfSriastradh		dst_box.y1 = dst_y;
253303b705cfSriastradh		dst_box.y2 = bound(dst_y, height);
253403b705cfSriastradh	} else
253503b705cfSriastradh		sna_render_picture_extents(dst, &dst_box);
253603b705cfSriastradh
253703b705cfSriastradh	tmp->dst.format = dst->format;
253803b705cfSriastradh	tmp->dst.width = tmp->dst.pixmap->drawable.width;
253903b705cfSriastradh	tmp->dst.height = tmp->dst.pixmap->drawable.height;
254003b705cfSriastradh	get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap,
254103b705cfSriastradh			    &tmp->dst.x, &tmp->dst.y);
254203b705cfSriastradh
254303b705cfSriastradh	if (op == PictOpClear) {
254403b705cfSriastradhclear:
254542542f5fSchristos		if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) {
254642542f5fSchristos			sna_pixmap(tmp->dst.pixmap)->clear = true;
254703b705cfSriastradh			return prepare_blt_nop(sna, tmp);
254842542f5fSchristos		}
254903b705cfSriastradh
255003b705cfSriastradh		hint = 0;
255103b705cfSriastradh		if (can_render(sna)) {
255203b705cfSriastradh			hint |= PREFER_GPU;
255342542f5fSchristos			if ((flags & COMPOSITE_PARTIAL) == 0) {
255442542f5fSchristos				hint |= IGNORE_DAMAGE;
255542542f5fSchristos				if (width  == tmp->dst.pixmap->drawable.width &&
255603b705cfSriastradh				    height == tmp->dst.pixmap->drawable.height)
255703b705cfSriastradh					hint |= REPLACES;
255803b705cfSriastradh			}
255903b705cfSriastradh		}
256003b705cfSriastradh		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
256103b705cfSriastradh						  &dst_box, &tmp->damage);
256242542f5fSchristos		if (tmp->dst.bo) {
256342542f5fSchristos			if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
256442542f5fSchristos				DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
256542542f5fSchristos				     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
256642542f5fSchristos				return false;
256742542f5fSchristos			}
256842542f5fSchristos			if (hint & REPLACES)
256942542f5fSchristos				kgem_bo_undo(&sna->kgem, tmp->dst.bo);
257042542f5fSchristos		} else {
257103b705cfSriastradh			RegionRec region;
257203b705cfSriastradh
257303b705cfSriastradh			region.extents = dst_box;
257403b705cfSriastradh			region.data = NULL;
257503b705cfSriastradh
257642542f5fSchristos			hint = MOVE_WRITE | MOVE_INPLACE_HINT;
257742542f5fSchristos			if (flags & COMPOSITE_PARTIAL)
257842542f5fSchristos				hint |= MOVE_READ;
257942542f5fSchristos			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, hint))
258003b705cfSriastradh				return false;
258142542f5fSchristos		}
258203b705cfSriastradh
258303b705cfSriastradh		return prepare_blt_clear(sna, tmp);
258403b705cfSriastradh	}
258503b705cfSriastradh
258603b705cfSriastradh	if (is_solid(src)) {
258742542f5fSchristos		if ((op == PictOpOver || op == PictOpAdd) && is_transparent(src)) {
258842542f5fSchristos			sna_pixmap(tmp->dst.pixmap)->clear = was_clear;
258942542f5fSchristos			return prepare_blt_nop(sna, tmp);
259042542f5fSchristos		}
259103b705cfSriastradh		if (op == PictOpOver && is_opaque_solid(src))
259203b705cfSriastradh			op = PictOpSrc;
259303b705cfSriastradh		if (op == PictOpAdd && is_white(src))
259403b705cfSriastradh			op = PictOpSrc;
259542542f5fSchristos		if (was_clear && (op == PictOpAdd || op == PictOpOver)) {
259642542f5fSchristos			if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0)
259742542f5fSchristos				op = PictOpSrc;
259842542f5fSchristos			if (op == PictOpOver) {
259942542f5fSchristos				color = over(get_solid_color(src, PICT_a8r8g8b8),
260042542f5fSchristos					     color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color,
260142542f5fSchristos							   dst->format, PICT_a8r8g8b8));
260242542f5fSchristos				op = PictOpSrc;
260342542f5fSchristos				DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n",
260442542f5fSchristos				     __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
260542542f5fSchristos				     color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color,
260642542f5fSchristos						   dst->format, PICT_a8r8g8b8),
260742542f5fSchristos				     color));
260842542f5fSchristos			}
260942542f5fSchristos			if (op == PictOpAdd) {
261042542f5fSchristos				color = add(get_solid_color(src, PICT_a8r8g8b8),
261142542f5fSchristos					    color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color,
261242542f5fSchristos							  dst->format, PICT_a8r8g8b8));
261342542f5fSchristos				op = PictOpSrc;
261442542f5fSchristos				DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n",
261542542f5fSchristos				     __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
261642542f5fSchristos				     color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color,
261742542f5fSchristos						   dst->format, PICT_a8r8g8b8),
261842542f5fSchristos				     color));
261942542f5fSchristos			}
262042542f5fSchristos		}
262103b705cfSriastradh		if (op == PictOpOutReverse && is_opaque_solid(src))
262203b705cfSriastradh			goto clear;
262303b705cfSriastradh
262403b705cfSriastradh		if (op != PictOpSrc) {
262503b705cfSriastradh			DBG(("%s: unsupported op [%d] for blitting\n",
262603b705cfSriastradh			     __FUNCTION__, op));
262703b705cfSriastradh			return false;
262803b705cfSriastradh		}
262903b705cfSriastradh
263003b705cfSriastradh		color = get_solid_color(src, tmp->dst.format);
263103b705cfSriastradhfill:
263203b705cfSriastradh		if (color == 0)
263303b705cfSriastradh			goto clear;
263403b705cfSriastradh
263542542f5fSchristos		if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == color) {
263642542f5fSchristos			sna_pixmap(tmp->dst.pixmap)->clear = true;
263742542f5fSchristos			return prepare_blt_nop(sna, tmp);
263842542f5fSchristos		}
263942542f5fSchristos
264003b705cfSriastradh		hint = 0;
264103b705cfSriastradh		if (can_render(sna)) {
264203b705cfSriastradh			hint |= PREFER_GPU;
264342542f5fSchristos			if ((flags & COMPOSITE_PARTIAL) == 0) {
264442542f5fSchristos				hint |= IGNORE_DAMAGE;
264542542f5fSchristos				if (width  == tmp->dst.pixmap->drawable.width &&
264603b705cfSriastradh				    height == tmp->dst.pixmap->drawable.height)
264703b705cfSriastradh					hint |= REPLACES;
264842542f5fSchristos			}
264903b705cfSriastradh		}
265003b705cfSriastradh		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
265103b705cfSriastradh						  &dst_box, &tmp->damage);
265242542f5fSchristos		if (tmp->dst.bo) {
265342542f5fSchristos			if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
265442542f5fSchristos				DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
265542542f5fSchristos				     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
265642542f5fSchristos				return false;
265742542f5fSchristos			}
265842542f5fSchristos			if (hint & REPLACES)
265942542f5fSchristos				kgem_bo_undo(&sna->kgem, tmp->dst.bo);
266042542f5fSchristos		} else {
266103b705cfSriastradh			RegionRec region;
266203b705cfSriastradh
266303b705cfSriastradh			region.extents = dst_box;
266403b705cfSriastradh			region.data = NULL;
266503b705cfSriastradh
266642542f5fSchristos			hint = MOVE_WRITE | MOVE_INPLACE_HINT;
266742542f5fSchristos			if (flags & COMPOSITE_PARTIAL)
266842542f5fSchristos				hint |= MOVE_READ;
266942542f5fSchristos			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, hint))
267003b705cfSriastradh				return false;
267142542f5fSchristos		}
267203b705cfSriastradh
267303b705cfSriastradh		return prepare_blt_fill(sna, tmp, color);
267403b705cfSriastradh	}
267503b705cfSriastradh
267603b705cfSriastradh	if (!src->pDrawable) {
267703b705cfSriastradh		DBG(("%s: unsupported procedural source\n",
267803b705cfSriastradh		     __FUNCTION__));
267903b705cfSriastradh		return false;
268003b705cfSriastradh	}
268103b705cfSriastradh
268203b705cfSriastradh	if (src->filter == PictFilterConvolution) {
268303b705cfSriastradh		DBG(("%s: convolutions filters not handled\n",
268403b705cfSriastradh		     __FUNCTION__));
268503b705cfSriastradh		return false;
268603b705cfSriastradh	}
268703b705cfSriastradh
268803b705cfSriastradh	if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0)
268903b705cfSriastradh		op = PictOpSrc;
269003b705cfSriastradh
269103b705cfSriastradh	if (op != PictOpSrc) {
269203b705cfSriastradh		DBG(("%s: unsupported op [%d] for blitting\n",
269303b705cfSriastradh		     __FUNCTION__, op));
269403b705cfSriastradh		return false;
269503b705cfSriastradh	}
269603b705cfSriastradh
269742542f5fSchristos	if (!sna_transform_is_imprecise_integer_translation(src->transform, src->filter,
269842542f5fSchristos							    dst->polyMode == PolyModePrecise,
269942542f5fSchristos							    &tx, &ty)) {
270003b705cfSriastradh		DBG(("%s: source transform is not an integer translation\n",
270103b705cfSriastradh		     __FUNCTION__));
270203b705cfSriastradh		return false;
270303b705cfSriastradh	}
270442542f5fSchristos	DBG(("%s: converting transform to integer translation? (%d, %d)\n",
270542542f5fSchristos	     __FUNCTION__, src->transform != NULL, tx, ty));
270603b705cfSriastradh	x += tx;
270703b705cfSriastradh	y += ty;
270803b705cfSriastradh
270903b705cfSriastradh	if ((x >= src->pDrawable->width ||
271003b705cfSriastradh	     y >= src->pDrawable->height ||
271103b705cfSriastradh	     x + width  <= 0 ||
271203b705cfSriastradh	     y + height <= 0) &&
271303b705cfSriastradh	    (!src->repeat || src->repeatType == RepeatNone)) {
271403b705cfSriastradh		DBG(("%s: source is outside of valid area, converting to clear\n",
271503b705cfSriastradh		     __FUNCTION__));
271603b705cfSriastradh		goto clear;
271703b705cfSriastradh	}
271803b705cfSriastradh
271903b705cfSriastradh	src_pixmap = get_drawable_pixmap(src->pDrawable);
272003b705cfSriastradh	if (is_clear(src_pixmap)) {
272142542f5fSchristos		if (src->repeat ||
272242542f5fSchristos		    (x >= 0 && y >= 0 &&
272342542f5fSchristos		     x + width  < src_pixmap->drawable.width &&
272442542f5fSchristos		     y + height < src_pixmap->drawable.height)) {
272542542f5fSchristos			color = color_convert(sna_pixmap(src_pixmap)->clear_color,
272642542f5fSchristos					      src->format, tmp->dst.format);
272742542f5fSchristos			goto fill;
272842542f5fSchristos		}
272903b705cfSriastradh	}
273003b705cfSriastradh
273103b705cfSriastradh	alpha_fixup = 0;
273203b705cfSriastradh	if (!(dst->format == src_format ||
273303b705cfSriastradh	      dst->format == alphaless(src_format) ||
273403b705cfSriastradh	      (alphaless(dst->format) == alphaless(src_format) &&
273503b705cfSriastradh	       sna_get_pixel_from_rgba(&alpha_fixup,
273603b705cfSriastradh				       0, 0, 0, 0xffff,
273703b705cfSriastradh				       dst->format)))) {
273803b705cfSriastradh		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
273903b705cfSriastradh		     __FUNCTION__, (unsigned)src_format, dst->format));
274003b705cfSriastradh		return false;
274103b705cfSriastradh	}
274203b705cfSriastradh
274303b705cfSriastradh	/* XXX tiling? fixup extend none? */
274403b705cfSriastradh	if (x < 0 || y < 0 ||
274503b705cfSriastradh	    x + width  > src->pDrawable->width ||
274603b705cfSriastradh	    y + height > src->pDrawable->height) {
274703b705cfSriastradh		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n",
274803b705cfSriastradh		     __FUNCTION__,
274903b705cfSriastradh		     x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType));
275003b705cfSriastradh		if (src->repeat && src->repeatType == RepeatNormal) {
275103b705cfSriastradh			x = x % src->pDrawable->width;
275203b705cfSriastradh			y = y % src->pDrawable->height;
275303b705cfSriastradh			if (x < 0)
275403b705cfSriastradh				x += src->pDrawable->width;
275503b705cfSriastradh			if (y < 0)
275603b705cfSriastradh				y += src->pDrawable->height;
275703b705cfSriastradh			if (x + width  > src->pDrawable->width ||
275803b705cfSriastradh			    y + height > src->pDrawable->height)
275903b705cfSriastradh				return false;
276003b705cfSriastradh		} else
276103b705cfSriastradh			return false;
276203b705cfSriastradh	}
276303b705cfSriastradh
276403b705cfSriastradh	get_drawable_deltas(src->pDrawable, src_pixmap, &tx, &ty);
276503b705cfSriastradh	x += tx + src->pDrawable->x;
276603b705cfSriastradh	y += ty + src->pDrawable->y;
276703b705cfSriastradh	if (x < 0 || y < 0 ||
276803b705cfSriastradh	    x + width  > src_pixmap->drawable.width ||
276903b705cfSriastradh	    y + height > src_pixmap->drawable.height) {
277003b705cfSriastradh		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid pixmap %dx%d\n",
277103b705cfSriastradh		     __FUNCTION__,
277203b705cfSriastradh		     x, y, x+width, y+width, src_pixmap->drawable.width, src_pixmap->drawable.height));
277303b705cfSriastradh		return false;
277403b705cfSriastradh	}
277503b705cfSriastradh
277603b705cfSriastradh	tmp->u.blt.src_pixmap = src_pixmap;
277703b705cfSriastradh	tmp->u.blt.sx = x - dst_x;
277803b705cfSriastradh	tmp->u.blt.sy = y - dst_y;
277903b705cfSriastradh	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
278003b705cfSriastradh	     __FUNCTION__,
278103b705cfSriastradh	     tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
278203b705cfSriastradh
278303b705cfSriastradh	src_box.x1 = x;
278403b705cfSriastradh	src_box.y1 = y;
278503b705cfSriastradh	src_box.x2 = x + width;
278603b705cfSriastradh	src_box.y2 = y + height;
278703b705cfSriastradh	bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
278803b705cfSriastradh	if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) {
278903b705cfSriastradh		DBG(("%s: can not blit from src size=%dx%d, tiling? %d, pitch? %d\n",
279003b705cfSriastradh		     __FUNCTION__,
279103b705cfSriastradh		     src_pixmap->drawable.width  < sna->render.max_3d_size,
279203b705cfSriastradh		     src_pixmap->drawable.height < sna->render.max_3d_size,
279303b705cfSriastradh		     bo->tiling, bo->pitch));
279403b705cfSriastradh
279503b705cfSriastradh		if (src_pixmap->drawable.width  <= sna->render.max_3d_size &&
279603b705cfSriastradh		    src_pixmap->drawable.height <= sna->render.max_3d_size &&
279703b705cfSriastradh		    bo->pitch <= sna->render.max_3d_pitch &&
279842542f5fSchristos		    (flags & COMPOSITE_FALLBACK) == 0)
279903b705cfSriastradh		{
280003b705cfSriastradh			return false;
280103b705cfSriastradh		}
280203b705cfSriastradh
280303b705cfSriastradh		bo = NULL;
280403b705cfSriastradh	}
280503b705cfSriastradh
280603b705cfSriastradh	hint = 0;
280703b705cfSriastradh	if (bo || can_render(sna)) {
280803b705cfSriastradh		hint |= PREFER_GPU;
280942542f5fSchristos		if ((flags & COMPOSITE_PARTIAL) == 0) {
281042542f5fSchristos			hint |= IGNORE_DAMAGE;
281142542f5fSchristos			if (width  == tmp->dst.pixmap->drawable.width &&
281203b705cfSriastradh			    height == tmp->dst.pixmap->drawable.height)
281303b705cfSriastradh				hint |= REPLACES;
281403b705cfSriastradh		}
281503b705cfSriastradh		if (bo)
281603b705cfSriastradh			hint |= FORCE_GPU;
281703b705cfSriastradh	}
281803b705cfSriastradh	tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
281903b705cfSriastradh					  &dst_box, &tmp->damage);
282003b705cfSriastradh
282142542f5fSchristos	if (tmp->dst.bo && hint & REPLACES) {
282242542f5fSchristos		struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap);
282342542f5fSchristos		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
282442542f5fSchristos	}
282542542f5fSchristos
282642542f5fSchristos	if (tmp->dst.pixmap == src_pixmap)
282742542f5fSchristos		bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
282803b705cfSriastradh
282903b705cfSriastradh	ret = false;
283003b705cfSriastradh	if (bo) {
283103b705cfSriastradh		if (!tmp->dst.bo) {
283203b705cfSriastradh			DBG(("%s: fallback -- unaccelerated read back\n",
283303b705cfSriastradh			     __FUNCTION__));
283442542f5fSchristosfallback:
283542542f5fSchristos			if (flags & COMPOSITE_FALLBACK || !kgem_bo_is_busy(bo))
283603b705cfSriastradh				goto put;
283742542f5fSchristos		} else if (!kgem_bo_can_blt(&sna->kgem, bo)) {
283842542f5fSchristos			DBG(("%s: fallback -- cannot blit from source\n",
283942542f5fSchristos			     __FUNCTION__));
284042542f5fSchristos			goto fallback;
284103b705cfSriastradh		} else if (bo->snoop && tmp->dst.bo->snoop) {
284203b705cfSriastradh			DBG(("%s: fallback -- can not copy between snooped bo\n",
284303b705cfSriastradh			     __FUNCTION__));
284403b705cfSriastradh			goto put;
284503b705cfSriastradh		} else if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
284603b705cfSriastradh			DBG(("%s: fallback -- unaccelerated upload\n",
284703b705cfSriastradh			     __FUNCTION__));
284842542f5fSchristos			goto fallback;
284903b705cfSriastradh		} else {
285003b705cfSriastradh			ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup);
285142542f5fSchristos			if (!ret)
285242542f5fSchristos				goto fallback;
285303b705cfSriastradh		}
285403b705cfSriastradh	} else {
285503b705cfSriastradh		RegionRec region;
285603b705cfSriastradh
285703b705cfSriastradhput:
285803b705cfSriastradh		if (tmp->dst.bo == sna_pixmap(tmp->dst.pixmap)->cpu_bo) {
285942542f5fSchristos			DBG(("%s: dropping upload into CPU bo\n", __FUNCTION__));
286003b705cfSriastradh			tmp->dst.bo = NULL;
286103b705cfSriastradh			tmp->damage = NULL;
286203b705cfSriastradh		}
286303b705cfSriastradh
286403b705cfSriastradh		if (tmp->dst.bo == NULL) {
286503b705cfSriastradh			hint = MOVE_INPLACE_HINT | MOVE_WRITE;
286642542f5fSchristos			if (flags & COMPOSITE_PARTIAL)
286703b705cfSriastradh				hint |= MOVE_READ;
286803b705cfSriastradh
286903b705cfSriastradh			region.extents = dst_box;
287003b705cfSriastradh			region.data = NULL;
287103b705cfSriastradh			if (!sna_drawable_move_region_to_cpu(dst->pDrawable,
287203b705cfSriastradh							     &region, hint))
287303b705cfSriastradh				return false;
287403b705cfSriastradh
287503b705cfSriastradh			assert(tmp->damage == NULL);
287603b705cfSriastradh		}
287703b705cfSriastradh
287803b705cfSriastradh		region.extents = src_box;
287903b705cfSriastradh		region.data = NULL;
288003b705cfSriastradh		if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
288103b705cfSriastradh						     &region, MOVE_READ))
288203b705cfSriastradh			return false;
288303b705cfSriastradh
288403b705cfSriastradh		ret = prepare_blt_put(sna, tmp, alpha_fixup);
288503b705cfSriastradh	}
288603b705cfSriastradh
288703b705cfSriastradh	return ret;
288803b705cfSriastradh}
288903b705cfSriastradh
289003b705cfSriastradhstatic void convert_done(struct sna *sna, const struct sna_composite_op *op)
289103b705cfSriastradh{
289203b705cfSriastradh	struct kgem *kgem = &sna->kgem;
289303b705cfSriastradh
289403b705cfSriastradh	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
289542542f5fSchristos	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
289642542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
289703b705cfSriastradh		_kgem_submit(kgem);
289842542f5fSchristos	}
289903b705cfSriastradh
290003b705cfSriastradh	kgem_bo_destroy(kgem, op->src.bo);
290103b705cfSriastradh	sna_render_composite_redirect_done(sna, op);
290203b705cfSriastradh}
290303b705cfSriastradh
290403b705cfSriastradhstatic void gen6_convert_done(struct sna *sna, const struct sna_composite_op *op)
290503b705cfSriastradh{
290603b705cfSriastradh	struct kgem *kgem = &sna->kgem;
290703b705cfSriastradh
290803b705cfSriastradh	if (kgem_check_batch(kgem, 3)) {
290903b705cfSriastradh		uint32_t *b = kgem->batch + kgem->nbatch;
291042542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
291103b705cfSriastradh		b[0] = XY_SETUP_CLIP;
291203b705cfSriastradh		b[1] = b[2] = 0;
291303b705cfSriastradh		kgem->nbatch += 3;
291403b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
291503b705cfSriastradh	}
291603b705cfSriastradh
291703b705cfSriastradh	convert_done(sna, op);
291803b705cfSriastradh}
291903b705cfSriastradh
292003b705cfSriastradhbool
292103b705cfSriastradhsna_blt_composite__convert(struct sna *sna,
292203b705cfSriastradh			   int x, int y,
292303b705cfSriastradh			   int width, int height,
292403b705cfSriastradh			   struct sna_composite_op *tmp)
292503b705cfSriastradh{
292603b705cfSriastradh	uint32_t alpha_fixup;
292703b705cfSriastradh	int sx, sy;
292803b705cfSriastradh	uint8_t op;
292903b705cfSriastradh
293003b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
293103b705cfSriastradh	return false;
293203b705cfSriastradh#endif
293303b705cfSriastradh
293403b705cfSriastradh	DBG(("%s src=%d, dst=%d (redirect? %d)\n", __FUNCTION__,
293503b705cfSriastradh	     tmp->src.bo->handle, tmp->dst.bo->handle,
293603b705cfSriastradh	     tmp->redirect.real_bo ? tmp->redirect.real_bo->handle : 0));
293703b705cfSriastradh
293803b705cfSriastradh	if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo) ||
293903b705cfSriastradh	    !kgem_bo_can_blt(&sna->kgem, tmp->src.bo)) {
294003b705cfSriastradh		DBG(("%s: cannot blt from src or to dst\n", __FUNCTION__));
294103b705cfSriastradh		return false;
294203b705cfSriastradh	}
294303b705cfSriastradh
294403b705cfSriastradh	if (tmp->src.transform) {
294503b705cfSriastradh		DBG(("%s: transforms not handled by the BLT\n", __FUNCTION__));
294603b705cfSriastradh		return false;
294703b705cfSriastradh	}
294803b705cfSriastradh
294903b705cfSriastradh	if (tmp->src.filter == PictFilterConvolution) {
295003b705cfSriastradh		DBG(("%s: convolutions filters not handled\n",
295103b705cfSriastradh		     __FUNCTION__));
295203b705cfSriastradh		return false;
295303b705cfSriastradh	}
295403b705cfSriastradh
295503b705cfSriastradh	op = tmp->op;
295603b705cfSriastradh	if (op == PictOpOver && PICT_FORMAT_A(tmp->src.pict_format) == 0)
295703b705cfSriastradh		op = PictOpSrc;
295803b705cfSriastradh	if (op != PictOpSrc) {
295903b705cfSriastradh		DBG(("%s: unsupported op [%d] for blitting\n",
296003b705cfSriastradh		     __FUNCTION__, op));
296103b705cfSriastradh		return false;
296203b705cfSriastradh	}
296303b705cfSriastradh
296403b705cfSriastradh	alpha_fixup = 0;
296503b705cfSriastradh	if (!(tmp->dst.format == tmp->src.pict_format ||
296603b705cfSriastradh	      tmp->dst.format == alphaless(tmp->src.pict_format) ||
296703b705cfSriastradh	      (alphaless(tmp->dst.format) == alphaless(tmp->src.pict_format) &&
296803b705cfSriastradh	       sna_get_pixel_from_rgba(&alpha_fixup,
296903b705cfSriastradh				       0, 0, 0, 0xffff,
297003b705cfSriastradh				       tmp->dst.format)))) {
297103b705cfSriastradh		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
297203b705cfSriastradh		     __FUNCTION__,
297303b705cfSriastradh		     (unsigned)tmp->src.pict_format,
297403b705cfSriastradh		     (unsigned)tmp->dst.format));
297503b705cfSriastradh		return false;
297603b705cfSriastradh	}
297703b705cfSriastradh
297803b705cfSriastradh	sx = tmp->src.offset[0];
297903b705cfSriastradh	sy = tmp->src.offset[1];
298003b705cfSriastradh
298103b705cfSriastradh	x += sx;
298203b705cfSriastradh	y += sy;
298303b705cfSriastradh	if (x < 0 || y < 0 ||
298403b705cfSriastradh	    x + width  > tmp->src.width ||
298503b705cfSriastradh	    y + height > tmp->src.height) {
298603b705cfSriastradh		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n",
298703b705cfSriastradh		     __FUNCTION__,
298803b705cfSriastradh		     x, y, x+width, y+width, tmp->src.width, tmp->src.height));
298903b705cfSriastradh		if (tmp->src.repeat == RepeatNormal) {
299003b705cfSriastradh			int xx = x % tmp->src.width;
299103b705cfSriastradh			int yy = y % tmp->src.height;
299203b705cfSriastradh			if (xx < 0)
299303b705cfSriastradh				xx += tmp->src.width;
299403b705cfSriastradh			if (yy < 0)
299503b705cfSriastradh				yy += tmp->src.height;
299603b705cfSriastradh			if (xx + width  > tmp->src.width ||
299703b705cfSriastradh			    yy + height > tmp->src.height)
299803b705cfSriastradh				return false;
299903b705cfSriastradh
300003b705cfSriastradh			sx += xx - x;
300103b705cfSriastradh			sy += yy - y;
300203b705cfSriastradh		} else
300303b705cfSriastradh			return false;
300403b705cfSriastradh	}
300503b705cfSriastradh
300642542f5fSchristos	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
300742542f5fSchristos	     __FUNCTION__,
300842542f5fSchristos	     tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup));
300942542f5fSchristos
301042542f5fSchristos	tmp->u.blt.src_pixmap = NULL;
301142542f5fSchristos	tmp->u.blt.sx = sx;
301242542f5fSchristos	tmp->u.blt.sy = sy;
301342542f5fSchristos
301442542f5fSchristos	kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo);
301503b705cfSriastradh	if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) {
301603b705cfSriastradh		kgem_submit(&sna->kgem);
301703b705cfSriastradh		if (!kgem_check_many_bo_fenced(&sna->kgem,
301803b705cfSriastradh					       tmp->dst.bo, tmp->src.bo, NULL)) {
301903b705cfSriastradh			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
302042542f5fSchristos			return sna_tiling_blt_composite(sna, tmp, tmp->src.bo,
302142542f5fSchristos							PICT_FORMAT_BPP(tmp->src.pict_format),
302242542f5fSchristos							alpha_fixup);
302303b705cfSriastradh		}
302403b705cfSriastradh		_kgem_set_mode(&sna->kgem, KGEM_BLT);
302503b705cfSriastradh	}
302603b705cfSriastradh
302703b705cfSriastradh	if (alpha_fixup) {
302803b705cfSriastradh		tmp->blt   = blt_composite_copy_with_alpha;
302903b705cfSriastradh		tmp->box   = blt_composite_copy_box_with_alpha;
303003b705cfSriastradh		tmp->boxes = blt_composite_copy_boxes_with_alpha;
303103b705cfSriastradh
303203b705cfSriastradh		if (!sna_blt_alpha_fixup_init(sna, &tmp->u.blt,
303303b705cfSriastradh					      tmp->src.bo, tmp->dst.bo,
303403b705cfSriastradh					      PICT_FORMAT_BPP(tmp->src.pict_format),
303503b705cfSriastradh					      alpha_fixup))
303603b705cfSriastradh			return false;
303703b705cfSriastradh	} else {
303803b705cfSriastradh		tmp->blt   = blt_composite_copy;
303903b705cfSriastradh		tmp->box   = blt_composite_copy_box;
304003b705cfSriastradh		tmp->boxes = blt_composite_copy_boxes;
304103b705cfSriastradh		tmp->thread_boxes = blt_composite_copy_boxes__thread;
304203b705cfSriastradh
304303b705cfSriastradh		if (!sna_blt_copy_init(sna, &tmp->u.blt,
304403b705cfSriastradh				       tmp->src.bo, tmp->dst.bo,
304503b705cfSriastradh				       PICT_FORMAT_BPP(tmp->src.pict_format),
304603b705cfSriastradh				       GXcopy))
304703b705cfSriastradh			return false;
304803b705cfSriastradh	}
304903b705cfSriastradh
305003b705cfSriastradh	tmp->done = convert_done;
305103b705cfSriastradh	if (sna->kgem.gen >= 060 && tmp->src.bo == tmp->dst.bo)
305203b705cfSriastradh		tmp->done = gen6_convert_done;
305303b705cfSriastradh
305403b705cfSriastradh	return true;
305503b705cfSriastradh}
305603b705cfSriastradh
305703b705cfSriastradhstatic void sna_blt_fill_op_blt(struct sna *sna,
305803b705cfSriastradh				const struct sna_fill_op *op,
305903b705cfSriastradh				int16_t x, int16_t y,
306003b705cfSriastradh				int16_t width, int16_t height)
306103b705cfSriastradh{
306242542f5fSchristos	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
306342542f5fSchristos		const struct sna_blt_state *blt = &op->base.u.blt;
306442542f5fSchristos
306542542f5fSchristos		sna_blt_fill_begin(sna, blt);
306642542f5fSchristos
306742542f5fSchristos		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
306842542f5fSchristos		sna->blt_state.fill_pixel = blt->pixel;
306942542f5fSchristos		sna->blt_state.fill_alu = blt->alu;
307042542f5fSchristos	}
307142542f5fSchristos
307203b705cfSriastradh	sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height);
307303b705cfSriastradh}
307403b705cfSriastradh
307503b705cfSriastradhfastcall static void sna_blt_fill_op_box(struct sna *sna,
307603b705cfSriastradh					 const struct sna_fill_op *op,
307703b705cfSriastradh					 const BoxRec *box)
307803b705cfSriastradh{
307942542f5fSchristos	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
308042542f5fSchristos		const struct sna_blt_state *blt = &op->base.u.blt;
308142542f5fSchristos
308242542f5fSchristos		sna_blt_fill_begin(sna, blt);
308342542f5fSchristos
308442542f5fSchristos		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
308542542f5fSchristos		sna->blt_state.fill_pixel = blt->pixel;
308642542f5fSchristos		sna->blt_state.fill_alu = blt->alu;
308742542f5fSchristos	}
308842542f5fSchristos
308903b705cfSriastradh	_sna_blt_fill_box(sna, &op->base.u.blt, box);
309003b705cfSriastradh}
309103b705cfSriastradh
309203b705cfSriastradhfastcall static void sna_blt_fill_op_boxes(struct sna *sna,
309303b705cfSriastradh					   const struct sna_fill_op *op,
309403b705cfSriastradh					   const BoxRec *box,
309503b705cfSriastradh					   int nbox)
309603b705cfSriastradh{
309742542f5fSchristos	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
309842542f5fSchristos		const struct sna_blt_state *blt = &op->base.u.blt;
309942542f5fSchristos
310042542f5fSchristos		sna_blt_fill_begin(sna, blt);
310142542f5fSchristos
310242542f5fSchristos		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
310342542f5fSchristos		sna->blt_state.fill_pixel = blt->pixel;
310442542f5fSchristos		sna->blt_state.fill_alu = blt->alu;
310542542f5fSchristos	}
310642542f5fSchristos
310703b705cfSriastradh	_sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox);
310803b705cfSriastradh}
310903b705cfSriastradh
311042542f5fSchristosstatic inline uint64_t pt_add(uint32_t cmd, const DDXPointRec *pt, int16_t dx, int16_t dy)
311142542f5fSchristos{
311242542f5fSchristos	union {
311342542f5fSchristos		DDXPointRec pt;
311442542f5fSchristos		uint32_t i;
311542542f5fSchristos	} u;
311642542f5fSchristos
311742542f5fSchristos	u.pt.x = pt->x + dx;
311842542f5fSchristos	u.pt.y = pt->y + dy;
311942542f5fSchristos
312042542f5fSchristos	return cmd | (uint64_t)u.i<<32;
312142542f5fSchristos}
312242542f5fSchristos
312342542f5fSchristosfastcall static void sna_blt_fill_op_points(struct sna *sna,
312442542f5fSchristos					    const struct sna_fill_op *op,
312542542f5fSchristos					    int16_t dx, int16_t dy,
312642542f5fSchristos					    const DDXPointRec *p, int n)
312742542f5fSchristos{
312842542f5fSchristos	const struct sna_blt_state *blt = &op->base.u.blt;
312942542f5fSchristos	struct kgem *kgem = &sna->kgem;
313042542f5fSchristos	uint32_t cmd;
313142542f5fSchristos
313242542f5fSchristos	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n));
313342542f5fSchristos
313442542f5fSchristos	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
313542542f5fSchristos		sna_blt_fill_begin(sna, blt);
313642542f5fSchristos
313742542f5fSchristos		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
313842542f5fSchristos		sna->blt_state.fill_pixel = blt->pixel;
313942542f5fSchristos		sna->blt_state.fill_alu = blt->alu;
314042542f5fSchristos	}
314142542f5fSchristos
314242542f5fSchristos	if (!kgem_check_batch(kgem, 2))
314342542f5fSchristos		sna_blt_fill_begin(sna, blt);
314442542f5fSchristos
314542542f5fSchristos	cmd = XY_PIXEL_BLT;
314642542f5fSchristos	if (kgem->gen >= 040 && op->base.u.blt.bo[0]->tiling)
314742542f5fSchristos		cmd |= BLT_DST_TILED;
314842542f5fSchristos
314942542f5fSchristos	do {
315042542f5fSchristos		uint32_t *b = kgem->batch + kgem->nbatch;
315113496ba1Ssnj		int n_this_time, rem;
315242542f5fSchristos
315342542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
315442542f5fSchristos		n_this_time = n;
315513496ba1Ssnj		rem = kgem_batch_space(kgem);
315613496ba1Ssnj		if (2*n_this_time > rem)
315713496ba1Ssnj			n_this_time = rem / 2;
315842542f5fSchristos		assert(n_this_time);
315942542f5fSchristos		n -= n_this_time;
316042542f5fSchristos
316142542f5fSchristos		kgem->nbatch += 2 * n_this_time;
316242542f5fSchristos		assert(kgem->nbatch < kgem->surface);
316342542f5fSchristos
316442542f5fSchristos		if ((dx|dy) == 0) {
316542542f5fSchristos			while (n_this_time >= 8) {
316642542f5fSchristos				*((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
316742542f5fSchristos				*((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
316842542f5fSchristos				*((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0);
316942542f5fSchristos				*((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0);
317042542f5fSchristos				*((uint64_t *)b + 4) = pt_add(cmd, p+4, 0, 0);
317142542f5fSchristos				*((uint64_t *)b + 5) = pt_add(cmd, p+5, 0, 0);
317242542f5fSchristos				*((uint64_t *)b + 6) = pt_add(cmd, p+6, 0, 0);
317342542f5fSchristos				*((uint64_t *)b + 7) = pt_add(cmd, p+7, 0, 0);
317442542f5fSchristos				b += 16;
317542542f5fSchristos				n_this_time -= 8;
317642542f5fSchristos				p += 8;
317742542f5fSchristos			}
317842542f5fSchristos			if (n_this_time & 4) {
317942542f5fSchristos				*((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
318042542f5fSchristos				*((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
318142542f5fSchristos				*((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0);
318242542f5fSchristos				*((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0);
318342542f5fSchristos				b += 8;
318442542f5fSchristos				p += 4;
318542542f5fSchristos			}
318642542f5fSchristos			if (n_this_time & 2) {
318742542f5fSchristos				*((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
318842542f5fSchristos				*((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
318942542f5fSchristos				b += 4;
319042542f5fSchristos				p += 2;
319142542f5fSchristos			}
319242542f5fSchristos			if (n_this_time & 1)
319342542f5fSchristos				*((uint64_t *)b + 0) = pt_add(cmd, p++, 0, 0);
319442542f5fSchristos		} else {
319542542f5fSchristos			while (n_this_time >= 8) {
319642542f5fSchristos				*((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
319742542f5fSchristos				*((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
319842542f5fSchristos				*((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy);
319942542f5fSchristos				*((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy);
320042542f5fSchristos				*((uint64_t *)b + 4) = pt_add(cmd, p+4, dx, dy);
320142542f5fSchristos				*((uint64_t *)b + 5) = pt_add(cmd, p+5, dx, dy);
320242542f5fSchristos				*((uint64_t *)b + 6) = pt_add(cmd, p+6, dx, dy);
320342542f5fSchristos				*((uint64_t *)b + 7) = pt_add(cmd, p+7, dx, dy);
320442542f5fSchristos				b += 16;
320542542f5fSchristos				n_this_time -= 8;
320642542f5fSchristos				p += 8;
320742542f5fSchristos			}
320842542f5fSchristos			if (n_this_time & 4) {
320942542f5fSchristos				*((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
321042542f5fSchristos				*((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
321142542f5fSchristos				*((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy);
321242542f5fSchristos				*((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy);
321342542f5fSchristos				b += 8;
321442542f5fSchristos				p += 8;
321542542f5fSchristos			}
321642542f5fSchristos			if (n_this_time & 2) {
321742542f5fSchristos				*((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
321842542f5fSchristos				*((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
321942542f5fSchristos				b += 4;
322042542f5fSchristos				p += 2;
322142542f5fSchristos			}
322242542f5fSchristos			if (n_this_time & 1)
322342542f5fSchristos				*((uint64_t *)b + 0) = pt_add(cmd, p++, dx, dy);
322442542f5fSchristos		}
322542542f5fSchristos
322642542f5fSchristos		if (!n)
322742542f5fSchristos			return;
322842542f5fSchristos
322942542f5fSchristos		sna_blt_fill_begin(sna, blt);
323042542f5fSchristos	} while (1);
323142542f5fSchristos}
323242542f5fSchristos
323303b705cfSriastradhbool sna_blt_fill(struct sna *sna, uint8_t alu,
323403b705cfSriastradh		  struct kgem_bo *bo, int bpp,
323503b705cfSriastradh		  uint32_t pixel,
323603b705cfSriastradh		  struct sna_fill_op *fill)
323703b705cfSriastradh{
323803b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_FILL
323903b705cfSriastradh	return false;
324003b705cfSriastradh#endif
324103b705cfSriastradh
324203b705cfSriastradh	DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp));
324303b705cfSriastradh
324403b705cfSriastradh	if (!kgem_bo_can_blt(&sna->kgem, bo)) {
324503b705cfSriastradh		DBG(("%s: rejected due to incompatible Y-tiling\n",
324603b705cfSriastradh		     __FUNCTION__));
324703b705cfSriastradh		return false;
324803b705cfSriastradh	}
324903b705cfSriastradh
325003b705cfSriastradh	if (!sna_blt_fill_init(sna, &fill->base.u.blt,
325103b705cfSriastradh			       bo, bpp, alu, pixel))
325203b705cfSriastradh		return false;
325303b705cfSriastradh
325413496ba1Ssnj	assert(sna->kgem.mode == KGEM_BLT);
325503b705cfSriastradh	fill->blt   = sna_blt_fill_op_blt;
325603b705cfSriastradh	fill->box   = sna_blt_fill_op_box;
325703b705cfSriastradh	fill->boxes = sna_blt_fill_op_boxes;
325842542f5fSchristos	fill->points = sna_blt_fill_op_points;
325903b705cfSriastradh	fill->done  =
326003b705cfSriastradh		(void (*)(struct sna *, const struct sna_fill_op *))nop_done;
326103b705cfSriastradh	return true;
326203b705cfSriastradh}
326303b705cfSriastradh
326403b705cfSriastradhstatic void sna_blt_copy_op_blt(struct sna *sna,
326503b705cfSriastradh				const struct sna_copy_op *op,
326603b705cfSriastradh				int16_t src_x, int16_t src_y,
326703b705cfSriastradh				int16_t width, int16_t height,
326803b705cfSriastradh				int16_t dst_x, int16_t dst_y)
326903b705cfSriastradh{
327003b705cfSriastradh	sna_blt_copy_one(sna, &op->base.u.blt,
327103b705cfSriastradh			 src_x, src_y,
327203b705cfSriastradh			 width, height,
327303b705cfSriastradh			 dst_x, dst_y);
327403b705cfSriastradh}
327503b705cfSriastradh
327603b705cfSriastradhbool sna_blt_copy(struct sna *sna, uint8_t alu,
327703b705cfSriastradh		  struct kgem_bo *src,
327803b705cfSriastradh		  struct kgem_bo *dst,
327903b705cfSriastradh		  int bpp,
328003b705cfSriastradh		  struct sna_copy_op *op)
328103b705cfSriastradh{
328203b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_COPY
328303b705cfSriastradh	return false;
328403b705cfSriastradh#endif
328503b705cfSriastradh
328603b705cfSriastradh	if (!kgem_bo_can_blt(&sna->kgem, src))
328703b705cfSriastradh		return false;
328803b705cfSriastradh
328903b705cfSriastradh	if (!kgem_bo_can_blt(&sna->kgem, dst))
329003b705cfSriastradh		return false;
329103b705cfSriastradh
329203b705cfSriastradh	if (!sna_blt_copy_init(sna, &op->base.u.blt,
329303b705cfSriastradh			       src, dst,
329403b705cfSriastradh			       bpp, alu))
329503b705cfSriastradh		return false;
329603b705cfSriastradh
329703b705cfSriastradh	op->blt  = sna_blt_copy_op_blt;
329803b705cfSriastradh	if (sna->kgem.gen >= 060 && src == dst)
329903b705cfSriastradh		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
330003b705cfSriastradh			    gen6_blt_copy_done;
330103b705cfSriastradh	else
330203b705cfSriastradh		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
330303b705cfSriastradh			    nop_done;
330403b705cfSriastradh	return true;
330503b705cfSriastradh}
330603b705cfSriastradh
330703b705cfSriastradhstatic bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
330803b705cfSriastradh			     struct kgem_bo *bo, int bpp,
330903b705cfSriastradh			     uint32_t color,
331003b705cfSriastradh			     const BoxRec *box)
331103b705cfSriastradh{
331203b705cfSriastradh	struct kgem *kgem = &sna->kgem;
331303b705cfSriastradh	uint32_t br13, cmd, *b;
331403b705cfSriastradh	bool overwrites;
331503b705cfSriastradh
331603b705cfSriastradh	assert(kgem_bo_can_blt (kgem, bo));
331703b705cfSriastradh
331803b705cfSriastradh	DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
331903b705cfSriastradh	     box->x1, box->y1, box->x2, box->y2));
332003b705cfSriastradh
332103b705cfSriastradh	assert(box->x1 >= 0);
332203b705cfSriastradh	assert(box->y1 >= 0);
332303b705cfSriastradh
332442542f5fSchristos	cmd = XY_COLOR_BLT | (kgem->gen >= 0100 ? 5 : 4);
332503b705cfSriastradh	br13 = bo->pitch;
332603b705cfSriastradh	if (kgem->gen >= 040 && bo->tiling) {
332703b705cfSriastradh		cmd |= BLT_DST_TILED;
332803b705cfSriastradh		br13 >>= 2;
332903b705cfSriastradh	}
333003b705cfSriastradh	assert(br13 <= MAXSHORT);
333103b705cfSriastradh
333203b705cfSriastradh	br13 |= fill_ROP[alu] << 16;
333303b705cfSriastradh	switch (bpp) {
333403b705cfSriastradh	default: assert(0);
333503b705cfSriastradh	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
333603b705cfSriastradh		 br13 |= 1 << 25; /* RGB8888 */
333703b705cfSriastradh	case 16: br13 |= 1 << 24; /* RGB565 */
333803b705cfSriastradh	case 8: break;
333903b705cfSriastradh	}
334003b705cfSriastradh
334103b705cfSriastradh	/* All too frequently one blt completely overwrites the previous */
334203b705cfSriastradh	overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
334342542f5fSchristos	if (overwrites) {
334442542f5fSchristos		if (sna->kgem.gen >= 0100) {
334542542f5fSchristos			if (kgem->nbatch >= 7 &&
334642542f5fSchristos			    kgem->batch[kgem->nbatch-7] == cmd &&
334742542f5fSchristos			    *(uint64_t *)&kgem->batch[kgem->nbatch-5] == *(const uint64_t *)box &&
334842542f5fSchristos			    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
334942542f5fSchristos				DBG(("%s: replacing last fill\n", __FUNCTION__));
335042542f5fSchristos				kgem->batch[kgem->nbatch-6] = br13;
335142542f5fSchristos				kgem->batch[kgem->nbatch-1] = color;
335242542f5fSchristos				return true;
335342542f5fSchristos			}
335442542f5fSchristos			if (kgem->nbatch >= 10 &&
335542542f5fSchristos			    (kgem->batch[kgem->nbatch-10] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
335642542f5fSchristos			    *(uint64_t *)&kgem->batch[kgem->nbatch-8] == *(const uint64_t *)box &&
335742542f5fSchristos			    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
335842542f5fSchristos				DBG(("%s: replacing last copy\n", __FUNCTION__));
335942542f5fSchristos				kgem->batch[kgem->nbatch-10] = cmd;
336042542f5fSchristos				kgem->batch[kgem->nbatch-8] = br13;
336142542f5fSchristos				kgem->batch[kgem->nbatch-4] = color;
336242542f5fSchristos				/* Keep the src bo as part of the execlist, just remove
336342542f5fSchristos				 * its relocation entry.
336442542f5fSchristos				 */
336542542f5fSchristos				kgem->nreloc--;
336642542f5fSchristos				kgem->nbatch -= 3;
336742542f5fSchristos				return true;
336842542f5fSchristos			}
336942542f5fSchristos		} else {
337042542f5fSchristos			if (kgem->nbatch >= 6 &&
337142542f5fSchristos			    kgem->batch[kgem->nbatch-6] == cmd &&
337242542f5fSchristos			    *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box &&
337342542f5fSchristos			    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
337442542f5fSchristos				DBG(("%s: replacing last fill\n", __FUNCTION__));
337542542f5fSchristos				kgem->batch[kgem->nbatch-5] = br13;
337642542f5fSchristos				kgem->batch[kgem->nbatch-1] = color;
337742542f5fSchristos				return true;
337842542f5fSchristos			}
337942542f5fSchristos			if (kgem->nbatch >= 8 &&
338042542f5fSchristos			    (kgem->batch[kgem->nbatch-8] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
338142542f5fSchristos			    *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box &&
338242542f5fSchristos			    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
338342542f5fSchristos				DBG(("%s: replacing last copy\n", __FUNCTION__));
338442542f5fSchristos				kgem->batch[kgem->nbatch-8] = cmd;
338542542f5fSchristos				kgem->batch[kgem->nbatch-7] = br13;
338642542f5fSchristos				kgem->batch[kgem->nbatch-3] = color;
338742542f5fSchristos				/* Keep the src bo as part of the execlist, just remove
338842542f5fSchristos				 * its relocation entry.
338942542f5fSchristos				 */
339042542f5fSchristos				kgem->nreloc--;
339142542f5fSchristos				kgem->nbatch -= 2;
339242542f5fSchristos				return true;
339342542f5fSchristos			}
339442542f5fSchristos		}
339503b705cfSriastradh	}
339603b705cfSriastradh
339703b705cfSriastradh	/* If we are currently emitting SCANLINES, keep doing so */
339803b705cfSriastradh	if (sna->blt_state.fill_bo == bo->unique_id &&
339903b705cfSriastradh	    sna->blt_state.fill_pixel == color &&
340003b705cfSriastradh	    (sna->blt_state.fill_alu == alu ||
340103b705cfSriastradh	     sna->blt_state.fill_alu == ~alu)) {
340203b705cfSriastradh		DBG(("%s: matching last fill, converting to scanlines\n",
340303b705cfSriastradh		     __FUNCTION__));
340403b705cfSriastradh		return false;
340503b705cfSriastradh	}
340603b705cfSriastradh
340703b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, bo);
340842542f5fSchristos	if (!kgem_check_batch(kgem, 7) ||
340903b705cfSriastradh	    !kgem_check_reloc(kgem, 1) ||
341003b705cfSriastradh	    !kgem_check_bo_fenced(kgem, bo)) {
341103b705cfSriastradh		kgem_submit(kgem);
341242542f5fSchristos		if (!kgem_check_bo_fenced(&sna->kgem, bo))
341342542f5fSchristos			return false;
341442542f5fSchristos
341503b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
341603b705cfSriastradh	}
341703b705cfSriastradh
341842542f5fSchristos	assert(kgem_check_batch(kgem, 6));
341942542f5fSchristos	assert(kgem_check_reloc(kgem, 1));
342042542f5fSchristos
342142542f5fSchristos	assert(sna->kgem.mode == KGEM_BLT);
342203b705cfSriastradh	b = kgem->batch + kgem->nbatch;
342303b705cfSriastradh	b[0] = cmd;
342403b705cfSriastradh	b[1] = br13;
342503b705cfSriastradh	*(uint64_t *)(b+2) = *(const uint64_t *)box;
342642542f5fSchristos	if (kgem->gen >= 0100) {
342742542f5fSchristos		*(uint64_t *)(b+4) =
342842542f5fSchristos			kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
342942542f5fSchristos					 I915_GEM_DOMAIN_RENDER << 16 |
343042542f5fSchristos					 I915_GEM_DOMAIN_RENDER |
343142542f5fSchristos					 KGEM_RELOC_FENCED,
343242542f5fSchristos					 0);
343342542f5fSchristos		b[6] = color;
343442542f5fSchristos		kgem->nbatch += 7;
343542542f5fSchristos	} else {
343642542f5fSchristos		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
343742542f5fSchristos				      I915_GEM_DOMAIN_RENDER << 16 |
343842542f5fSchristos				      I915_GEM_DOMAIN_RENDER |
343942542f5fSchristos				      KGEM_RELOC_FENCED,
344042542f5fSchristos				      0);
344142542f5fSchristos		b[5] = color;
344242542f5fSchristos		kgem->nbatch += 6;
344342542f5fSchristos	}
344403b705cfSriastradh	assert(kgem->nbatch < kgem->surface);
344503b705cfSriastradh
344603b705cfSriastradh	sna->blt_state.fill_bo = bo->unique_id;
344703b705cfSriastradh	sna->blt_state.fill_pixel = color;
344803b705cfSriastradh	sna->blt_state.fill_alu = ~alu;
344903b705cfSriastradh	return true;
345003b705cfSriastradh}
345103b705cfSriastradh
345203b705cfSriastradhbool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
345303b705cfSriastradh			struct kgem_bo *bo, int bpp,
345403b705cfSriastradh			uint32_t pixel,
345503b705cfSriastradh			const BoxRec *box, int nbox)
345603b705cfSriastradh{
345703b705cfSriastradh	struct kgem *kgem = &sna->kgem;
345803b705cfSriastradh	uint32_t br13, cmd;
345903b705cfSriastradh
346003b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_FILL_BOXES
346103b705cfSriastradh	return false;
346203b705cfSriastradh#endif
346303b705cfSriastradh
346403b705cfSriastradh	DBG(("%s (%d, %08x, %d) x %d\n",
346503b705cfSriastradh	     __FUNCTION__, bpp, pixel, alu, nbox));
346603b705cfSriastradh
346703b705cfSriastradh	if (!kgem_bo_can_blt(kgem, bo)) {
346803b705cfSriastradh		DBG(("%s: fallback -- cannot blt to dst\n", __FUNCTION__));
346903b705cfSriastradh		return false;
347003b705cfSriastradh	}
347103b705cfSriastradh
347203b705cfSriastradh	if (alu == GXclear)
347303b705cfSriastradh		pixel = 0;
347403b705cfSriastradh	else if (alu == GXcopy) {
347503b705cfSriastradh		if (pixel == 0)
347603b705cfSriastradh			alu = GXclear;
347703b705cfSriastradh		else if (pixel == -1)
347803b705cfSriastradh			alu = GXset;
347903b705cfSriastradh	}
348003b705cfSriastradh
348103b705cfSriastradh	if (nbox == 1 && sna_blt_fill_box(sna, alu, bo, bpp, pixel, box))
348203b705cfSriastradh		return true;
348303b705cfSriastradh
348403b705cfSriastradh	br13 = bo->pitch;
348503b705cfSriastradh	cmd = XY_SCANLINE_BLT;
348603b705cfSriastradh	if (kgem->gen >= 040 && bo->tiling) {
348703b705cfSriastradh		cmd |= 1 << 11;
348803b705cfSriastradh		br13 >>= 2;
348903b705cfSriastradh	}
349003b705cfSriastradh	assert(br13 <= MAXSHORT);
349103b705cfSriastradh
349203b705cfSriastradh	br13 |= 1<<31 | fill_ROP[alu] << 16;
349303b705cfSriastradh	switch (bpp) {
349403b705cfSriastradh	default: assert(0);
349503b705cfSriastradh	case 32: br13 |= 1 << 25; /* RGB8888 */
349603b705cfSriastradh	case 16: br13 |= 1 << 24; /* RGB565 */
349703b705cfSriastradh	case 8: break;
349803b705cfSriastradh	}
349903b705cfSriastradh
350003b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, bo);
350142542f5fSchristos	if (!kgem_check_batch(kgem, 14) ||
350203b705cfSriastradh	    !kgem_check_bo_fenced(kgem, bo)) {
350303b705cfSriastradh		kgem_submit(kgem);
350403b705cfSriastradh		if (!kgem_check_bo_fenced(&sna->kgem, bo))
350503b705cfSriastradh			return false;
350603b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
350703b705cfSriastradh	}
350803b705cfSriastradh
350903b705cfSriastradh	if (sna->blt_state.fill_bo != bo->unique_id ||
351003b705cfSriastradh	    sna->blt_state.fill_pixel != pixel ||
351103b705cfSriastradh	    sna->blt_state.fill_alu != alu)
351203b705cfSriastradh	{
351303b705cfSriastradh		uint32_t *b;
351403b705cfSriastradh
351513496ba1Ssnj		if (!kgem_check_batch(kgem, 24) ||
351613496ba1Ssnj		    !kgem_check_reloc(kgem, 1)) {
351703b705cfSriastradh			_kgem_submit(kgem);
351842542f5fSchristos			if (!kgem_check_bo_fenced(&sna->kgem, bo))
351942542f5fSchristos				return false;
352003b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
352103b705cfSriastradh		}
352203b705cfSriastradh
352342542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
352403b705cfSriastradh		b = kgem->batch + kgem->nbatch;
352542542f5fSchristos		if (kgem->gen >= 0100) {
352642542f5fSchristos			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
352742542f5fSchristos			if (bpp == 32)
352842542f5fSchristos				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
352942542f5fSchristos			if (bo->tiling)
353042542f5fSchristos				b[0] |= BLT_DST_TILED;
353142542f5fSchristos			b[1] = br13;
353242542f5fSchristos			b[2] = 0;
353342542f5fSchristos			b[3] = 0;
353442542f5fSchristos			*(uint64_t *)(b+4) =
353542542f5fSchristos				kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
353642542f5fSchristos						 I915_GEM_DOMAIN_RENDER << 16 |
353742542f5fSchristos						 I915_GEM_DOMAIN_RENDER |
353842542f5fSchristos						 KGEM_RELOC_FENCED,
353942542f5fSchristos						 0);
354042542f5fSchristos			b[6] = pixel;
354142542f5fSchristos			b[7] = pixel;
354242542f5fSchristos			b[8] = 0;
354342542f5fSchristos			b[9] = 0;
354442542f5fSchristos			kgem->nbatch += 10;
354542542f5fSchristos		} else {
354642542f5fSchristos			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
354742542f5fSchristos			if (bpp == 32)
354842542f5fSchristos				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
354942542f5fSchristos			if (bo->tiling && kgem->gen >= 040)
355042542f5fSchristos				b[0] |= BLT_DST_TILED;
355142542f5fSchristos			b[1] = br13;
355242542f5fSchristos			b[2] = 0;
355342542f5fSchristos			b[3] = 0;
355442542f5fSchristos			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
355542542f5fSchristos					      I915_GEM_DOMAIN_RENDER << 16 |
355642542f5fSchristos					      I915_GEM_DOMAIN_RENDER |
355742542f5fSchristos					      KGEM_RELOC_FENCED,
355842542f5fSchristos					      0);
355942542f5fSchristos			b[5] = pixel;
356042542f5fSchristos			b[6] = pixel;
356142542f5fSchristos			b[7] = 0;
356242542f5fSchristos			b[8] = 0;
356342542f5fSchristos			kgem->nbatch += 9;
356442542f5fSchristos		}
356503b705cfSriastradh		assert(kgem->nbatch < kgem->surface);
356603b705cfSriastradh
356703b705cfSriastradh		sna->blt_state.fill_bo = bo->unique_id;
356803b705cfSriastradh		sna->blt_state.fill_pixel = pixel;
356903b705cfSriastradh		sna->blt_state.fill_alu = alu;
357003b705cfSriastradh	}
357103b705cfSriastradh
357203b705cfSriastradh	do {
357313496ba1Ssnj		int nbox_this_time, rem;
357403b705cfSriastradh
357503b705cfSriastradh		nbox_this_time = nbox;
357613496ba1Ssnj		rem = kgem_batch_space(kgem);
357713496ba1Ssnj		if (3*nbox_this_time > rem)
357813496ba1Ssnj			nbox_this_time = rem / 3;
357913496ba1Ssnj		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
358013496ba1Ssnj		     __FUNCTION__, nbox_this_time, nbox, rem));
358113496ba1Ssnj		assert(nbox_this_time > 0);
358203b705cfSriastradh		nbox -= nbox_this_time;
358303b705cfSriastradh
358442542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
358503b705cfSriastradh		do {
358603b705cfSriastradh			uint32_t *b;
358703b705cfSriastradh
358803b705cfSriastradh			DBG(("%s: (%d, %d), (%d, %d): %08x\n",
358903b705cfSriastradh			     __FUNCTION__,
359003b705cfSriastradh			     box->x1, box->y1,
359103b705cfSriastradh			     box->x2, box->y2,
359203b705cfSriastradh			     pixel));
359303b705cfSriastradh
359403b705cfSriastradh			assert(box->x1 >= 0);
359503b705cfSriastradh			assert(box->y1 >= 0);
359603b705cfSriastradh			assert(box->y2 * bo->pitch <= kgem_bo_size(bo));
359703b705cfSriastradh
359803b705cfSriastradh			b = kgem->batch + kgem->nbatch;
359903b705cfSriastradh			kgem->nbatch += 3;
360003b705cfSriastradh			assert(kgem->nbatch < kgem->surface);
360103b705cfSriastradh			b[0] = cmd;
360203b705cfSriastradh			*(uint64_t *)(b+1) = *(const uint64_t *)box;
360303b705cfSriastradh			box++;
360403b705cfSriastradh		} while (--nbox_this_time);
360503b705cfSriastradh
360603b705cfSriastradh		if (nbox) {
360703b705cfSriastradh			uint32_t *b;
360803b705cfSriastradh
360903b705cfSriastradh			_kgem_submit(kgem);
361003b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
361103b705cfSriastradh
361242542f5fSchristos			assert(sna->kgem.mode == KGEM_BLT);
361303b705cfSriastradh			b = kgem->batch + kgem->nbatch;
361442542f5fSchristos			if (kgem->gen >= 0100) {
361542542f5fSchristos				b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
361642542f5fSchristos				if (bpp == 32)
361742542f5fSchristos					b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
361842542f5fSchristos				if (bo->tiling)
361942542f5fSchristos					b[0] |= BLT_DST_TILED;
362042542f5fSchristos				b[1] = br13;
362142542f5fSchristos				b[2] = 0;
362242542f5fSchristos				b[3] = 0;
362342542f5fSchristos				*(uint64_t *)(b+4) =
362442542f5fSchristos					kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
362542542f5fSchristos							 I915_GEM_DOMAIN_RENDER << 16 |
362642542f5fSchristos							 I915_GEM_DOMAIN_RENDER |
362742542f5fSchristos							 KGEM_RELOC_FENCED,
362842542f5fSchristos							 0);
362942542f5fSchristos				b[6] = pixel;
363042542f5fSchristos				b[7] = pixel;
363142542f5fSchristos				b[8] = 0;
363242542f5fSchristos				b[9] = 0;
363342542f5fSchristos				kgem->nbatch += 10;
363442542f5fSchristos			} else {
363542542f5fSchristos				b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
363642542f5fSchristos				if (bpp == 32)
363742542f5fSchristos					b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
363842542f5fSchristos				if (bo->tiling && kgem->gen >= 040)
363942542f5fSchristos					b[0] |= BLT_DST_TILED;
364042542f5fSchristos				b[1] = br13;
364142542f5fSchristos				b[2] = 0;
364242542f5fSchristos				b[3] = 0;
364342542f5fSchristos				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
364442542f5fSchristos						      I915_GEM_DOMAIN_RENDER << 16 |
364542542f5fSchristos						      I915_GEM_DOMAIN_RENDER |
364642542f5fSchristos						      KGEM_RELOC_FENCED,
364742542f5fSchristos						      0);
364842542f5fSchristos				b[5] = pixel;
364942542f5fSchristos				b[6] = pixel;
365042542f5fSchristos				b[7] = 0;
365142542f5fSchristos				b[8] = 0;
365242542f5fSchristos				kgem->nbatch += 9;
365342542f5fSchristos			}
365403b705cfSriastradh			assert(kgem->nbatch < kgem->surface);
365513496ba1Ssnj			assert(kgem_check_batch(kgem, 3));
365603b705cfSriastradh		}
365703b705cfSriastradh	} while (nbox);
365803b705cfSriastradh
365942542f5fSchristos	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
366042542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
366103b705cfSriastradh		_kgem_submit(kgem);
366242542f5fSchristos	}
366303b705cfSriastradh
366403b705cfSriastradh	return true;
366503b705cfSriastradh}
366603b705cfSriastradh
366703b705cfSriastradhbool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
366803b705cfSriastradh			struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
366903b705cfSriastradh			struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
367003b705cfSriastradh			int bpp, const BoxRec *box, int nbox)
367103b705cfSriastradh{
367203b705cfSriastradh	struct kgem *kgem = &sna->kgem;
367303b705cfSriastradh	unsigned src_pitch, br13, cmd;
367403b705cfSriastradh
367503b705cfSriastradh#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
367603b705cfSriastradh	return false;
367703b705cfSriastradh#endif
367803b705cfSriastradh
367903b705cfSriastradh	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
368003b705cfSriastradh	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
368103b705cfSriastradh	    src_bo->tiling, dst_bo->tiling,
368203b705cfSriastradh	    src_bo->pitch, dst_bo->pitch));
368342542f5fSchristos	assert(nbox);
368403b705cfSriastradh
368542542f5fSchristos	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
368603b705cfSriastradh		DBG(("%s: cannot blt to src? %d or dst? %d\n",
368703b705cfSriastradh		     __FUNCTION__,
368803b705cfSriastradh		     kgem_bo_can_blt(kgem, src_bo),
368903b705cfSriastradh		     kgem_bo_can_blt(kgem, dst_bo)));
369003b705cfSriastradh		return false;
369103b705cfSriastradh	}
369203b705cfSriastradh
369303b705cfSriastradh	cmd = XY_SRC_COPY_BLT_CMD;
369403b705cfSriastradh	if (bpp == 32)
369503b705cfSriastradh		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
369603b705cfSriastradh
369703b705cfSriastradh	src_pitch = src_bo->pitch;
369803b705cfSriastradh	if (kgem->gen >= 040 && src_bo->tiling) {
369903b705cfSriastradh		cmd |= BLT_SRC_TILED;
370003b705cfSriastradh		src_pitch >>= 2;
370103b705cfSriastradh	}
370203b705cfSriastradh	assert(src_pitch <= MAXSHORT);
370303b705cfSriastradh
370403b705cfSriastradh	br13 = dst_bo->pitch;
370503b705cfSriastradh	if (kgem->gen >= 040 && dst_bo->tiling) {
370603b705cfSriastradh		cmd |= BLT_DST_TILED;
370703b705cfSriastradh		br13 >>= 2;
370803b705cfSriastradh	}
370903b705cfSriastradh	assert(br13 <= MAXSHORT);
371003b705cfSriastradh
371103b705cfSriastradh	br13 |= copy_ROP[alu] << 16;
371203b705cfSriastradh	switch (bpp) {
371303b705cfSriastradh	default: assert(0);
371403b705cfSriastradh	case 32: br13 |= 1 << 25; /* RGB8888 */
371503b705cfSriastradh	case 16: br13 |= 1 << 24; /* RGB565 */
371603b705cfSriastradh	case 8: break;
371703b705cfSriastradh	}
371803b705cfSriastradh
371903b705cfSriastradh	/* Compare first box against a previous fill */
372042542f5fSchristos	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
372142542f5fSchristos	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
372242542f5fSchristos		if (kgem->gen >= 0100) {
372342542f5fSchristos			if (kgem->nbatch >= 7 &&
372442542f5fSchristos			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
372542542f5fSchristos			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
372642542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
372742542f5fSchristos				DBG(("%s: deleting last fill\n", __FUNCTION__));
372842542f5fSchristos				kgem->nbatch -= 7;
372942542f5fSchristos				kgem->nreloc--;
373042542f5fSchristos			}
373142542f5fSchristos		} else {
373242542f5fSchristos			if (kgem->nbatch >= 6 &&
373342542f5fSchristos			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
373442542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
373542542f5fSchristos			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
373642542f5fSchristos				DBG(("%s: deleting last fill\n", __FUNCTION__));
373742542f5fSchristos				kgem->nbatch -= 6;
373842542f5fSchristos				kgem->nreloc--;
373942542f5fSchristos			}
374042542f5fSchristos		}
374103b705cfSriastradh	}
374203b705cfSriastradh
374303b705cfSriastradh	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
374442542f5fSchristos	if (!kgem_check_batch(kgem, 10) ||
374503b705cfSriastradh	    !kgem_check_reloc(kgem, 2) ||
374603b705cfSriastradh	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
374703b705cfSriastradh		kgem_submit(kgem);
374842542f5fSchristos		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
374942542f5fSchristos			DBG(("%s: not enough room in aperture, fallback to tiling copy\n", __FUNCTION__));
375003b705cfSriastradh			return sna_tiling_blt_copy_boxes(sna, alu,
375103b705cfSriastradh							 src_bo, src_dx, src_dy,
375203b705cfSriastradh							 dst_bo, dst_dx, dst_dy,
375303b705cfSriastradh							 bpp, box, nbox);
375442542f5fSchristos		}
375503b705cfSriastradh		_kgem_set_mode(kgem, KGEM_BLT);
375603b705cfSriastradh	}
375703b705cfSriastradh
375803b705cfSriastradh	if ((dst_dx | dst_dy) == 0) {
375942542f5fSchristos		if (kgem->gen >= 0100) {
376042542f5fSchristos			uint64_t hdr = (uint64_t)br13 << 32 | cmd | 8;
376103b705cfSriastradh			do {
376213496ba1Ssnj				int nbox_this_time, rem;
376342542f5fSchristos
376442542f5fSchristos				nbox_this_time = nbox;
376513496ba1Ssnj				rem = kgem_batch_space(kgem);
376613496ba1Ssnj				if (10*nbox_this_time > rem)
376713496ba1Ssnj					nbox_this_time = rem / 10;
376842542f5fSchristos				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
376942542f5fSchristos					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
377013496ba1Ssnj				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
377113496ba1Ssnj				     __FUNCTION__, nbox_this_time, nbox, rem));
377213496ba1Ssnj				assert(nbox_this_time > 0);
377342542f5fSchristos				nbox -= nbox_this_time;
377442542f5fSchristos
377542542f5fSchristos				assert(sna->kgem.mode == KGEM_BLT);
377642542f5fSchristos				do {
377742542f5fSchristos					uint32_t *b = kgem->batch + kgem->nbatch;
377842542f5fSchristos
377942542f5fSchristos					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
378042542f5fSchristos					     __FUNCTION__,
378142542f5fSchristos					     box->x1, box->y1,
378242542f5fSchristos					     box->x2 - box->x1, box->y2 - box->y1));
378342542f5fSchristos
378442542f5fSchristos					assert(box->x1 + src_dx >= 0);
378542542f5fSchristos					assert(box->y1 + src_dy >= 0);
378642542f5fSchristos					assert(box->x1 + src_dx <= INT16_MAX);
378742542f5fSchristos					assert(box->y1 + src_dy <= INT16_MAX);
378842542f5fSchristos
378942542f5fSchristos					assert(box->x1 >= 0);
379042542f5fSchristos					assert(box->y1 >= 0);
379142542f5fSchristos
379242542f5fSchristos					*(uint64_t *)&b[0] = hdr;
379342542f5fSchristos					*(uint64_t *)&b[2] = *(const uint64_t *)box;
379442542f5fSchristos					*(uint64_t *)(b+4) =
379542542f5fSchristos						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
379642542f5fSchristos								 I915_GEM_DOMAIN_RENDER << 16 |
379742542f5fSchristos								 I915_GEM_DOMAIN_RENDER |
379842542f5fSchristos								 KGEM_RELOC_FENCED,
379942542f5fSchristos								 0);
380042542f5fSchristos					b[6] = add2(b[2], src_dx, src_dy);
380142542f5fSchristos					b[7] = src_pitch;
380242542f5fSchristos					*(uint64_t *)(b+8) =
380342542f5fSchristos						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
380442542f5fSchristos								 I915_GEM_DOMAIN_RENDER << 16 |
380542542f5fSchristos								 KGEM_RELOC_FENCED,
380642542f5fSchristos								 0);
380742542f5fSchristos					kgem->nbatch += 10;
380842542f5fSchristos					assert(kgem->nbatch < kgem->surface);
380942542f5fSchristos					box++;
381042542f5fSchristos				} while (--nbox_this_time);
381142542f5fSchristos
381242542f5fSchristos				if (!nbox)
381342542f5fSchristos					break;
381442542f5fSchristos
381542542f5fSchristos				_kgem_submit(kgem);
381642542f5fSchristos				_kgem_set_mode(kgem, KGEM_BLT);
381742542f5fSchristos			} while (1);
381842542f5fSchristos		} else {
381942542f5fSchristos			uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6;
382042542f5fSchristos			do {
382113496ba1Ssnj				int nbox_this_time, rem;
382242542f5fSchristos
382342542f5fSchristos				nbox_this_time = nbox;
382413496ba1Ssnj				rem = kgem_batch_space(kgem);
382513496ba1Ssnj				if (8*nbox_this_time > rem)
382613496ba1Ssnj					nbox_this_time = rem / 8;
382742542f5fSchristos				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
382842542f5fSchristos					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
382913496ba1Ssnj				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
383013496ba1Ssnj				     __FUNCTION__, nbox_this_time, nbox, rem));
383113496ba1Ssnj				assert(nbox_this_time > 0);
383242542f5fSchristos				nbox -= nbox_this_time;
383342542f5fSchristos
383442542f5fSchristos				assert(sna->kgem.mode == KGEM_BLT);
383542542f5fSchristos				do {
383642542f5fSchristos					uint32_t *b = kgem->batch + kgem->nbatch;
383742542f5fSchristos
383842542f5fSchristos					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
383942542f5fSchristos					     __FUNCTION__,
384042542f5fSchristos					     box->x1, box->y1,
384142542f5fSchristos					     box->x2 - box->x1, box->y2 - box->y1));
384242542f5fSchristos
384342542f5fSchristos					assert(box->x1 + src_dx >= 0);
384442542f5fSchristos					assert(box->y1 + src_dy >= 0);
384542542f5fSchristos					assert(box->x1 + src_dx <= INT16_MAX);
384642542f5fSchristos					assert(box->y1 + src_dy <= INT16_MAX);
384742542f5fSchristos
384842542f5fSchristos					assert(box->x1 >= 0);
384942542f5fSchristos					assert(box->y1 >= 0);
385042542f5fSchristos
385142542f5fSchristos					*(uint64_t *)&b[0] = hdr;
385242542f5fSchristos					*(uint64_t *)&b[2] = *(const uint64_t *)box;
385342542f5fSchristos					b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
385442542f5fSchristos							      I915_GEM_DOMAIN_RENDER << 16 |
385542542f5fSchristos							      I915_GEM_DOMAIN_RENDER |
385642542f5fSchristos							      KGEM_RELOC_FENCED,
385742542f5fSchristos							      0);
385842542f5fSchristos					b[5] = add2(b[2], src_dx, src_dy);
385942542f5fSchristos					b[6] = src_pitch;
386042542f5fSchristos					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
386142542f5fSchristos							      I915_GEM_DOMAIN_RENDER << 16 |
386242542f5fSchristos							      KGEM_RELOC_FENCED,
386342542f5fSchristos							      0);
386442542f5fSchristos					kgem->nbatch += 8;
386542542f5fSchristos					assert(kgem->nbatch < kgem->surface);
386642542f5fSchristos					box++;
386742542f5fSchristos				} while (--nbox_this_time);
386842542f5fSchristos
386942542f5fSchristos				if (!nbox)
387042542f5fSchristos					break;
387142542f5fSchristos
387242542f5fSchristos				_kgem_submit(kgem);
387342542f5fSchristos				_kgem_set_mode(kgem, KGEM_BLT);
387442542f5fSchristos			} while (1);
387542542f5fSchristos		}
387642542f5fSchristos	} else {
387742542f5fSchristos		if (kgem->gen >= 0100) {
387842542f5fSchristos			cmd |= 8;
387942542f5fSchristos			do {
388013496ba1Ssnj				int nbox_this_time, rem;
388142542f5fSchristos
388242542f5fSchristos				nbox_this_time = nbox;
388313496ba1Ssnj				rem = kgem_batch_space(kgem);
388413496ba1Ssnj				if (10*nbox_this_time > rem)
388513496ba1Ssnj					nbox_this_time = rem / 10;
388642542f5fSchristos				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
388742542f5fSchristos					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
388813496ba1Ssnj				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
388913496ba1Ssnj				     __FUNCTION__, nbox_this_time, nbox, rem));
389013496ba1Ssnj				assert(nbox_this_time > 0);
389142542f5fSchristos				nbox -= nbox_this_time;
389242542f5fSchristos
389342542f5fSchristos				assert(sna->kgem.mode == KGEM_BLT);
389442542f5fSchristos				do {
389542542f5fSchristos					uint32_t *b = kgem->batch + kgem->nbatch;
389642542f5fSchristos
389742542f5fSchristos					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
389842542f5fSchristos					     __FUNCTION__,
389942542f5fSchristos					     box->x1, box->y1,
390042542f5fSchristos					     box->x2 - box->x1, box->y2 - box->y1));
390142542f5fSchristos
390242542f5fSchristos					assert(box->x1 + src_dx >= 0);
390342542f5fSchristos					assert(box->y1 + src_dy >= 0);
390442542f5fSchristos
390542542f5fSchristos					assert(box->x1 + dst_dx >= 0);
390642542f5fSchristos					assert(box->y1 + dst_dy >= 0);
390742542f5fSchristos
390842542f5fSchristos					b[0] = cmd;
390942542f5fSchristos					b[1] = br13;
391042542f5fSchristos					b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
391142542f5fSchristos					b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
391242542f5fSchristos					*(uint64_t *)(b+4) =
391342542f5fSchristos						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
391442542f5fSchristos								 I915_GEM_DOMAIN_RENDER << 16 |
391542542f5fSchristos								 I915_GEM_DOMAIN_RENDER |
391642542f5fSchristos								 KGEM_RELOC_FENCED,
391742542f5fSchristos								 0);
391842542f5fSchristos					b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
391942542f5fSchristos					b[7] = src_pitch;
392042542f5fSchristos					*(uint64_t *)(b+8) =
392142542f5fSchristos						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
392242542f5fSchristos								 I915_GEM_DOMAIN_RENDER << 16 |
392342542f5fSchristos								 KGEM_RELOC_FENCED,
392442542f5fSchristos								 0);
392542542f5fSchristos					kgem->nbatch += 10;
392642542f5fSchristos					assert(kgem->nbatch < kgem->surface);
392742542f5fSchristos					box++;
392842542f5fSchristos				} while (--nbox_this_time);
392942542f5fSchristos
393042542f5fSchristos				if (!nbox)
393142542f5fSchristos					break;
393242542f5fSchristos
393342542f5fSchristos				_kgem_submit(kgem);
393442542f5fSchristos				_kgem_set_mode(kgem, KGEM_BLT);
393542542f5fSchristos			} while (1);
393642542f5fSchristos		} else {
393742542f5fSchristos			cmd |= 6;
393842542f5fSchristos			do {
393913496ba1Ssnj				int nbox_this_time, rem;
394042542f5fSchristos
394142542f5fSchristos				nbox_this_time = nbox;
394213496ba1Ssnj				rem = kgem_batch_space(kgem);
394313496ba1Ssnj				if (8*nbox_this_time > rem)
394413496ba1Ssnj					nbox_this_time = rem / 8;
394542542f5fSchristos				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
394642542f5fSchristos					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
394713496ba1Ssnj				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
394813496ba1Ssnj				     __FUNCTION__, nbox_this_time, nbox, rem));
394913496ba1Ssnj				assert(nbox_this_time > 0);
395042542f5fSchristos				nbox -= nbox_this_time;
395142542f5fSchristos
395242542f5fSchristos				assert(sna->kgem.mode == KGEM_BLT);
395342542f5fSchristos				do {
395442542f5fSchristos					uint32_t *b = kgem->batch + kgem->nbatch;
395542542f5fSchristos
395642542f5fSchristos					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
395742542f5fSchristos					     __FUNCTION__,
395842542f5fSchristos					     box->x1, box->y1,
395942542f5fSchristos					     box->x2 - box->x1, box->y2 - box->y1));
396042542f5fSchristos
396142542f5fSchristos					assert(box->x1 + src_dx >= 0);
396242542f5fSchristos					assert(box->y1 + src_dy >= 0);
396342542f5fSchristos
396442542f5fSchristos					assert(box->x1 + dst_dx >= 0);
396542542f5fSchristos					assert(box->y1 + dst_dy >= 0);
396642542f5fSchristos
396742542f5fSchristos					b[0] = cmd;
396842542f5fSchristos					b[1] = br13;
396942542f5fSchristos					b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
397042542f5fSchristos					b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
397142542f5fSchristos					b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
397242542f5fSchristos							      I915_GEM_DOMAIN_RENDER << 16 |
397342542f5fSchristos							      I915_GEM_DOMAIN_RENDER |
397442542f5fSchristos							      KGEM_RELOC_FENCED,
397542542f5fSchristos							      0);
397642542f5fSchristos					b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
397742542f5fSchristos					b[6] = src_pitch;
397842542f5fSchristos					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
397942542f5fSchristos							      I915_GEM_DOMAIN_RENDER << 16 |
398042542f5fSchristos							      KGEM_RELOC_FENCED,
398142542f5fSchristos							      0);
398242542f5fSchristos					kgem->nbatch += 8;
398342542f5fSchristos					assert(kgem->nbatch < kgem->surface);
398442542f5fSchristos					box++;
398542542f5fSchristos				} while (--nbox_this_time);
398642542f5fSchristos
398742542f5fSchristos				if (!nbox)
398842542f5fSchristos					break;
398942542f5fSchristos
399042542f5fSchristos				_kgem_submit(kgem);
399142542f5fSchristos				_kgem_set_mode(kgem, KGEM_BLT);
399242542f5fSchristos			} while (1);
399342542f5fSchristos		}
399442542f5fSchristos	}
399503b705cfSriastradh
399642542f5fSchristos	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
399742542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
399842542f5fSchristos		_kgem_submit(kgem);
399942542f5fSchristos	} else if (kgem->gen >= 060 && src_bo == dst_bo && kgem_check_batch(kgem, 3)) {
400042542f5fSchristos		uint32_t *b = kgem->batch + kgem->nbatch;
400142542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
400242542f5fSchristos		b[0] = XY_SETUP_CLIP;
400342542f5fSchristos		b[1] = b[2] = 0;
400442542f5fSchristos		kgem->nbatch += 3;
400542542f5fSchristos		assert(kgem->nbatch < kgem->surface);
400642542f5fSchristos	}
400703b705cfSriastradh
400842542f5fSchristos	sna->blt_state.fill_bo = 0;
400942542f5fSchristos	return true;
401042542f5fSchristos}
401103b705cfSriastradh
401242542f5fSchristosbool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
401342542f5fSchristos				    struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
401442542f5fSchristos				    struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
401542542f5fSchristos				    int bpp, int alpha_fixup,
401642542f5fSchristos				    const BoxRec *box, int nbox)
401742542f5fSchristos{
401842542f5fSchristos	struct kgem *kgem = &sna->kgem;
401942542f5fSchristos	unsigned src_pitch, br13, cmd;
402003b705cfSriastradh
402142542f5fSchristos#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
402242542f5fSchristos	return false;
402342542f5fSchristos#endif
402403b705cfSriastradh
402542542f5fSchristos	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
402642542f5fSchristos	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
402742542f5fSchristos	    src_bo->tiling, dst_bo->tiling,
402842542f5fSchristos	    src_bo->pitch, dst_bo->pitch));
402903b705cfSriastradh
403042542f5fSchristos	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
403142542f5fSchristos		DBG(("%s: cannot blt to src? %d or dst? %d\n",
403242542f5fSchristos		     __FUNCTION__,
403342542f5fSchristos		     kgem_bo_can_blt(kgem, src_bo),
403442542f5fSchristos		     kgem_bo_can_blt(kgem, dst_bo)));
403542542f5fSchristos		return false;
403642542f5fSchristos	}
403703b705cfSriastradh
403842542f5fSchristos	cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
403942542f5fSchristos	src_pitch = src_bo->pitch;
404042542f5fSchristos	if (kgem->gen >= 040 && src_bo->tiling) {
404142542f5fSchristos		cmd |= BLT_SRC_TILED;
404242542f5fSchristos		src_pitch >>= 2;
404342542f5fSchristos	}
404442542f5fSchristos	assert(src_pitch <= MAXSHORT);
404503b705cfSriastradh
404642542f5fSchristos	br13 = dst_bo->pitch;
404742542f5fSchristos	if (kgem->gen >= 040 && dst_bo->tiling) {
404842542f5fSchristos		cmd |= BLT_DST_TILED;
404942542f5fSchristos		br13 >>= 2;
405042542f5fSchristos	}
405142542f5fSchristos	assert(br13 <= MAXSHORT);
405203b705cfSriastradh
405342542f5fSchristos	br13 |= copy_ROP[alu] << 16;
405442542f5fSchristos	switch (bpp) {
405542542f5fSchristos	default: assert(0);
405642542f5fSchristos	case 32: br13 |= 1 << 25; /* RGB8888 */
405742542f5fSchristos	case 16: br13 |= 1 << 24; /* RGB565 */
405842542f5fSchristos	case 8: break;
405942542f5fSchristos	}
406003b705cfSriastradh
406142542f5fSchristos	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
406242542f5fSchristos	if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
406342542f5fSchristos		DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__));
406442542f5fSchristos		return false;
406542542f5fSchristos	}
406603b705cfSriastradh
406742542f5fSchristos	/* Compare first box against a previous fill */
406842542f5fSchristos	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
406942542f5fSchristos	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
407042542f5fSchristos		if (kgem->gen >= 0100) {
407142542f5fSchristos			if (kgem->nbatch >= 7 &&
407242542f5fSchristos			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
407342542f5fSchristos			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
407442542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
407542542f5fSchristos				DBG(("%s: deleting last fill\n", __FUNCTION__));
407642542f5fSchristos				kgem->nbatch -= 7;
407742542f5fSchristos				kgem->nreloc--;
407842542f5fSchristos			}
407942542f5fSchristos		} else {
408042542f5fSchristos			if (kgem->nbatch >= 6 &&
408142542f5fSchristos			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
408242542f5fSchristos			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
408342542f5fSchristos			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
408442542f5fSchristos				DBG(("%s: deleting last fill\n", __FUNCTION__));
408542542f5fSchristos				kgem->nbatch -= 6;
408642542f5fSchristos				kgem->nreloc--;
408742542f5fSchristos			}
408842542f5fSchristos		}
408942542f5fSchristos	}
409003b705cfSriastradh
409142542f5fSchristos	while (nbox--) {
409242542f5fSchristos		uint32_t *b;
409303b705cfSriastradh
409442542f5fSchristos		if (!kgem_check_batch(kgem, 14) ||
409542542f5fSchristos		    !kgem_check_reloc(kgem, 2)) {
409603b705cfSriastradh			_kgem_submit(kgem);
409703b705cfSriastradh			_kgem_set_mode(kgem, KGEM_BLT);
409842542f5fSchristos		}
409942542f5fSchristos
410042542f5fSchristos		assert(sna->kgem.mode == KGEM_BLT);
410142542f5fSchristos		b = kgem->batch + kgem->nbatch;
410242542f5fSchristos		b[0] = cmd;
410342542f5fSchristos		b[1] = br13;
410442542f5fSchristos		b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
410542542f5fSchristos		b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
410642542f5fSchristos		if (sna->kgem.gen >= 0100) {
410742542f5fSchristos			*(uint64_t *)(b+4) =
410842542f5fSchristos				kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
410942542f5fSchristos						 I915_GEM_DOMAIN_RENDER << 16 |
411042542f5fSchristos						 I915_GEM_DOMAIN_RENDER |
411142542f5fSchristos						 KGEM_RELOC_FENCED,
411242542f5fSchristos						 0);
411342542f5fSchristos			b[6] = src_pitch;
411442542f5fSchristos			b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
411542542f5fSchristos			*(uint64_t *)(b+8) =
411642542f5fSchristos				kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
411742542f5fSchristos						 I915_GEM_DOMAIN_RENDER << 16 |
411842542f5fSchristos						 KGEM_RELOC_FENCED,
411942542f5fSchristos						 0);
412042542f5fSchristos			b[10] = alpha_fixup;
412142542f5fSchristos			b[11] = alpha_fixup;
412242542f5fSchristos			b[12] = 0;
412342542f5fSchristos			b[13] = 0;
412442542f5fSchristos			kgem->nbatch += 14;
412542542f5fSchristos		} else {
412642542f5fSchristos			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
412742542f5fSchristos					      I915_GEM_DOMAIN_RENDER << 16 |
412842542f5fSchristos					      I915_GEM_DOMAIN_RENDER |
412942542f5fSchristos					      KGEM_RELOC_FENCED,
413042542f5fSchristos					      0);
413142542f5fSchristos			b[5] = src_pitch;
413242542f5fSchristos			b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
413342542f5fSchristos			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
413442542f5fSchristos					      I915_GEM_DOMAIN_RENDER << 16 |
413542542f5fSchristos					      KGEM_RELOC_FENCED,
413642542f5fSchristos					      0);
413742542f5fSchristos			b[8] = alpha_fixup;
413842542f5fSchristos			b[9] = alpha_fixup;
413942542f5fSchristos			b[10] = 0;
414042542f5fSchristos			b[11] = 0;
414142542f5fSchristos			kgem->nbatch += 12;
414242542f5fSchristos		}
414342542f5fSchristos		assert(kgem->nbatch < kgem->surface);
414442542f5fSchristos		box++;
414503b705cfSriastradh	}
414603b705cfSriastradh
414703b705cfSriastradh	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
414842542f5fSchristos		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
414903b705cfSriastradh		_kgem_submit(kgem);
415003b705cfSriastradh	}
415103b705cfSriastradh
415203b705cfSriastradh	sna->blt_state.fill_bo = 0;
415303b705cfSriastradh	return true;
415403b705cfSriastradh}
415503b705cfSriastradh
415603b705cfSriastradhstatic void box_extents(const BoxRec *box, int n, BoxRec *extents)
415703b705cfSriastradh{
415803b705cfSriastradh	*extents = *box;
415903b705cfSriastradh	while (--n) {
416003b705cfSriastradh		box++;
416103b705cfSriastradh		if (box->x1 < extents->x1)
416203b705cfSriastradh			extents->x1 = box->x1;
416303b705cfSriastradh		if (box->y1 < extents->y1)
416403b705cfSriastradh			extents->y1 = box->y1;
416503b705cfSriastradh
416603b705cfSriastradh		if (box->x2 > extents->x2)
416703b705cfSriastradh			extents->x2 = box->x2;
416803b705cfSriastradh		if (box->y2 > extents->y2)
416903b705cfSriastradh			extents->y2 = box->y2;
417003b705cfSriastradh	}
417103b705cfSriastradh}
417203b705cfSriastradh
417303b705cfSriastradhbool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
417442542f5fSchristos				 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
417542542f5fSchristos				 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
417603b705cfSriastradh				 const BoxRec *box, int nbox)
417703b705cfSriastradh{
417803b705cfSriastradh	struct kgem_bo *free_bo = NULL;
417903b705cfSriastradh	bool ret;
418003b705cfSriastradh
418103b705cfSriastradh	DBG(("%s: alu=%d, n=%d\n", __FUNCTION__, alu, nbox));
418203b705cfSriastradh
418342542f5fSchristos	if (!sna_blt_compare_depth(src, dst)) {
418403b705cfSriastradh		DBG(("%s: mismatching depths %d -> %d\n",
418542542f5fSchristos		     __FUNCTION__, src->depth, dst->depth));
418603b705cfSriastradh		return false;
418703b705cfSriastradh	}
418803b705cfSriastradh
418903b705cfSriastradh	if (src_bo == dst_bo) {
419003b705cfSriastradh		DBG(("%s: dst == src\n", __FUNCTION__));
419103b705cfSriastradh
419203b705cfSriastradh		if (src_bo->tiling == I915_TILING_Y &&
419303b705cfSriastradh		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
419403b705cfSriastradh			struct kgem_bo *bo;
419503b705cfSriastradh
419603b705cfSriastradh			DBG(("%s: src is Y-tiled\n", __FUNCTION__));
419703b705cfSriastradh
419842542f5fSchristos			if (src->type != DRAWABLE_PIXMAP)
419942542f5fSchristos				return false;
420042542f5fSchristos
420142542f5fSchristos			assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
420242542f5fSchristos			bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
420303b705cfSriastradh			if (bo == NULL) {
420403b705cfSriastradh				BoxRec extents;
420503b705cfSriastradh
420603b705cfSriastradh				DBG(("%s: y-tiling conversion failed\n",
420703b705cfSriastradh				     __FUNCTION__));
420803b705cfSriastradh
420903b705cfSriastradh				box_extents(box, nbox, &extents);
421003b705cfSriastradh				free_bo = kgem_create_2d(&sna->kgem,
421103b705cfSriastradh							 extents.x2 - extents.x1,
421203b705cfSriastradh							 extents.y2 - extents.y1,
421342542f5fSchristos							 src->bitsPerPixel,
421403b705cfSriastradh							 I915_TILING_X, 0);
421503b705cfSriastradh				if (free_bo == NULL) {
421603b705cfSriastradh					DBG(("%s: fallback -- temp allocation failed\n",
421703b705cfSriastradh					     __FUNCTION__));
421803b705cfSriastradh					return false;
421903b705cfSriastradh				}
422003b705cfSriastradh
422103b705cfSriastradh				if (!sna_blt_copy_boxes(sna, GXcopy,
422203b705cfSriastradh							src_bo, src_dx, src_dy,
422303b705cfSriastradh							free_bo, -extents.x1, -extents.y1,
422442542f5fSchristos							src->bitsPerPixel,
422503b705cfSriastradh							box, nbox)) {
422603b705cfSriastradh					DBG(("%s: fallback -- temp copy failed\n",
422703b705cfSriastradh					     __FUNCTION__));
422803b705cfSriastradh					kgem_bo_destroy(&sna->kgem, free_bo);
422903b705cfSriastradh					return false;
423003b705cfSriastradh				}
423103b705cfSriastradh
423203b705cfSriastradh				src_dx = -extents.x1;
423303b705cfSriastradh				src_dy = -extents.y1;
423403b705cfSriastradh				src_bo = free_bo;
423503b705cfSriastradh			} else
423603b705cfSriastradh				dst_bo = src_bo = bo;
423703b705cfSriastradh		}
423803b705cfSriastradh	} else {
423903b705cfSriastradh		if (src_bo->tiling == I915_TILING_Y &&
424003b705cfSriastradh		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
424103b705cfSriastradh			DBG(("%s: src is y-tiled\n", __FUNCTION__));
424242542f5fSchristos			if (src->type != DRAWABLE_PIXMAP)
424342542f5fSchristos				return false;
424442542f5fSchristos			assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
424542542f5fSchristos			src_bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
424603b705cfSriastradh			if (src_bo == NULL) {
424703b705cfSriastradh				DBG(("%s: fallback -- src y-tiling conversion failed\n",
424803b705cfSriastradh				     __FUNCTION__));
424903b705cfSriastradh				return false;
425003b705cfSriastradh			}
425103b705cfSriastradh		}
425203b705cfSriastradh
425303b705cfSriastradh		if (dst_bo->tiling == I915_TILING_Y &&
425403b705cfSriastradh		    kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) {
425503b705cfSriastradh			DBG(("%s: dst is y-tiled\n", __FUNCTION__));
425642542f5fSchristos			if (dst->type != DRAWABLE_PIXMAP)
425742542f5fSchristos				return false;
425842542f5fSchristos			assert(sna_pixmap((PixmapPtr)dst)->gpu_bo == dst_bo);
425942542f5fSchristos			dst_bo = sna_pixmap_change_tiling((PixmapPtr)dst, I915_TILING_X);
426003b705cfSriastradh			if (dst_bo == NULL) {
426103b705cfSriastradh				DBG(("%s: fallback -- dst y-tiling conversion failed\n",
426203b705cfSriastradh				     __FUNCTION__));
426303b705cfSriastradh				return false;
426403b705cfSriastradh			}
426503b705cfSriastradh		}
426603b705cfSriastradh	}
426703b705cfSriastradh
426803b705cfSriastradh	ret =  sna_blt_copy_boxes(sna, alu,
426903b705cfSriastradh				  src_bo, src_dx, src_dy,
427003b705cfSriastradh				  dst_bo, dst_dx, dst_dy,
427142542f5fSchristos				  dst->bitsPerPixel,
427203b705cfSriastradh				  box, nbox);
427303b705cfSriastradh
427403b705cfSriastradh	if (free_bo)
427503b705cfSriastradh		kgem_bo_destroy(&sna->kgem, free_bo);
427603b705cfSriastradh
427703b705cfSriastradh	return ret;
427803b705cfSriastradh}
4279