sna_blt.c revision 42542f5f
1/*
2 * Based on code from intel_uxa.c and i830_xaa.c
3 * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
4 * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
5 * Copyright (c) 2009-2011 Intel Corporation
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 * Authors:
27 *    Chris Wilson <chris@chris-wilson.co.uk>
28 *
29 */
30
31#ifdef HAVE_CONFIG_H
32#include "config.h"
33#endif
34
35#include "sna.h"
36#include "sna_render.h"
37#include "sna_render_inline.h"
38#include "sna_reg.h"
39#include "rop.h"
40
41#define NO_BLT_COMPOSITE 0
42#define NO_BLT_COPY 0
43#define NO_BLT_COPY_BOXES 0
44#define NO_BLT_FILL 0
45#define NO_BLT_FILL_BOXES 0
46
47#ifndef PICT_TYPE_BGRA
48#define PICT_TYPE_BGRA 8
49#endif
50
51static const uint8_t copy_ROP[] = {
52	ROP_0,                  /* GXclear */
53	ROP_DSa,                /* GXand */
54	ROP_SDna,               /* GXandReverse */
55	ROP_S,                  /* GXcopy */
56	ROP_DSna,               /* GXandInverted */
57	ROP_D,                  /* GXnoop */
58	ROP_DSx,                /* GXxor */
59	ROP_DSo,                /* GXor */
60	ROP_DSon,               /* GXnor */
61	ROP_DSxn,               /* GXequiv */
62	ROP_Dn,                 /* GXinvert */
63	ROP_SDno,               /* GXorReverse */
64	ROP_Sn,                 /* GXcopyInverted */
65	ROP_DSno,               /* GXorInverted */
66	ROP_DSan,               /* GXnand */
67	ROP_1                   /* GXset */
68};
69
70static const uint8_t fill_ROP[] = {
71	ROP_0,
72	ROP_DPa,
73	ROP_PDna,
74	ROP_P,
75	ROP_DPna,
76	ROP_D,
77	ROP_DPx,
78	ROP_DPo,
79	ROP_DPon,
80	ROP_PDxn,
81	ROP_Dn,
82	ROP_PDno,
83	ROP_Pn,
84	ROP_DPno,
85	ROP_DPan,
86	ROP_1
87};
88
89static void nop_done(struct sna *sna, const struct sna_composite_op *op)
90{
91	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
92	if (sna->kgem.nexec > 1 && __kgem_ring_empty(&sna->kgem)) {
93		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
94		_kgem_submit(&sna->kgem);
95	}
96	(void)op;
97}
98
99static void gen6_blt_copy_done(struct sna *sna, const struct sna_composite_op *op)
100{
101	struct kgem *kgem = &sna->kgem;
102
103	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
104	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
105		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
106		_kgem_submit(kgem);
107		return;
108	}
109
110	if (kgem_check_batch(kgem, 3)) {
111		uint32_t *b = kgem->batch + kgem->nbatch;
112		assert(sna->kgem.mode == KGEM_BLT);
113		b[0] = XY_SETUP_CLIP;
114		b[1] = b[2] = 0;
115		kgem->nbatch += 3;
116		assert(kgem->nbatch < kgem->surface);
117	}
118	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
119	(void)op;
120}
121
122static bool sna_blt_fill_init(struct sna *sna,
123			      struct sna_blt_state *blt,
124			      struct kgem_bo *bo,
125			      int bpp,
126			      uint8_t alu,
127			      uint32_t pixel)
128{
129	struct kgem *kgem = &sna->kgem;
130
131	assert(kgem_bo_can_blt (kgem, bo));
132	assert(bo->tiling != I915_TILING_Y);
133	blt->bo[0] = bo;
134
135	blt->br13 = bo->pitch;
136	blt->cmd = XY_SCANLINE_BLT;
137	if (kgem->gen >= 040 && bo->tiling) {
138		blt->cmd |= BLT_DST_TILED;
139		blt->br13 >>= 2;
140	}
141	assert(blt->br13 <= MAXSHORT);
142
143	if (alu == GXclear)
144		pixel = 0;
145	else if (alu == GXcopy) {
146		if (pixel == 0)
147			alu = GXclear;
148		else if (pixel == -1)
149			alu = GXset;
150	}
151
152	blt->br13 |= 1<<31 | (fill_ROP[alu] << 16);
153	switch (bpp) {
154	default: assert(0);
155	case 32: blt->br13 |= 1 << 25; /* RGB8888 */
156	case 16: blt->br13 |= 1 << 24; /* RGB565 */
157	case 8: break;
158	}
159
160	blt->pixel = pixel;
161	blt->bpp = bpp;
162	blt->alu = alu;
163
164	kgem_set_mode(kgem, KGEM_BLT, bo);
165	if (!kgem_check_batch(kgem, 14) ||
166	    !kgem_check_bo_fenced(kgem, bo)) {
167		kgem_submit(kgem);
168		if (!kgem_check_bo_fenced(kgem, bo))
169			return false;
170		_kgem_set_mode(kgem, KGEM_BLT);
171	}
172
173	if (sna->blt_state.fill_bo != bo->unique_id ||
174	    sna->blt_state.fill_pixel != pixel ||
175	    sna->blt_state.fill_alu != alu)
176	{
177		uint32_t *b;
178
179		if (!kgem_check_reloc(kgem, 1)) {
180			_kgem_submit(kgem);
181			if (!kgem_check_bo_fenced(kgem, bo))
182				return false;
183			_kgem_set_mode(kgem, KGEM_BLT);
184		}
185
186		assert(sna->kgem.mode == KGEM_BLT);
187		b = kgem->batch + kgem->nbatch;
188		if (sna->kgem.gen >= 0100) {
189			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
190			if (bpp == 32)
191				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
192			if (bo->tiling)
193				b[0] |= BLT_DST_TILED;
194			b[1] = blt->br13;
195			b[2] = 0;
196			b[3] = 0;
197			*(uint64_t *)(b+4) =
198				kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
199						 I915_GEM_DOMAIN_RENDER << 16 |
200						 I915_GEM_DOMAIN_RENDER |
201						 KGEM_RELOC_FENCED,
202						 0);
203			b[6] = pixel;
204			b[7] = pixel;
205			b[8] = 0;
206			b[9] = 0;
207			kgem->nbatch += 10;
208		} else {
209			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
210			if (bpp == 32)
211				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
212			if (bo->tiling && kgem->gen >= 040)
213				b[0] |= BLT_DST_TILED;
214			b[1] = blt->br13;
215			b[2] = 0;
216			b[3] = 0;
217			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
218					      I915_GEM_DOMAIN_RENDER << 16 |
219					      I915_GEM_DOMAIN_RENDER |
220					      KGEM_RELOC_FENCED,
221					      0);
222			b[5] = pixel;
223			b[6] = pixel;
224			b[7] = 0;
225			b[8] = 0;
226			kgem->nbatch += 9;
227		}
228		assert(kgem->nbatch < kgem->surface);
229
230		sna->blt_state.fill_bo = bo->unique_id;
231		sna->blt_state.fill_pixel = pixel;
232		sna->blt_state.fill_alu = alu;
233	}
234
235	return true;
236}
237
238noinline static void sna_blt_fill_begin(struct sna *sna,
239					const struct sna_blt_state *blt)
240{
241	struct kgem *kgem = &sna->kgem;
242	uint32_t *b;
243
244	if (kgem->nreloc) {
245		_kgem_submit(kgem);
246		_kgem_set_mode(kgem, KGEM_BLT);
247		assert(kgem->nbatch == 0);
248	}
249
250	assert(kgem->mode == KGEM_BLT);
251	b = kgem->batch + kgem->nbatch;
252	if (sna->kgem.gen >= 0100) {
253		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
254		if (blt->bpp == 32)
255			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
256		if (blt->bo[0]->tiling)
257			b[0] |= BLT_DST_TILED;
258		b[1] = blt->br13;
259		b[2] = 0;
260		b[3] = 0;
261		*(uint32_t *)(b+4) =
262			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[0],
263					 I915_GEM_DOMAIN_RENDER << 16 |
264					 I915_GEM_DOMAIN_RENDER |
265					 KGEM_RELOC_FENCED,
266					 0);
267		b[6] = blt->pixel;
268		b[7] = blt->pixel;
269		b[8] = 0;
270		b[9] = 0;
271		kgem->nbatch += 10;
272	} else {
273		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
274		if (blt->bpp == 32)
275			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
276		if (blt->bo[0]->tiling && kgem->gen >= 040)
277			b[0] |= BLT_DST_TILED;
278		b[1] = blt->br13;
279		b[2] = 0;
280		b[3] = 0;
281		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
282				      I915_GEM_DOMAIN_RENDER << 16 |
283				      I915_GEM_DOMAIN_RENDER |
284				      KGEM_RELOC_FENCED,
285				      0);
286		b[5] = blt->pixel;
287		b[6] = blt->pixel;
288		b[7] = 0;
289		b[8] = 0;
290		kgem->nbatch += 9;
291	}
292}
293
294inline static void sna_blt_fill_one(struct sna *sna,
295				    const struct sna_blt_state *blt,
296				    int16_t x, int16_t y,
297				    int16_t width, int16_t height)
298{
299	struct kgem *kgem = &sna->kgem;
300	uint32_t *b;
301
302	DBG(("%s: (%d, %d) x (%d, %d): %08x\n",
303	     __FUNCTION__, x, y, width, height, blt->pixel));
304
305	assert(x >= 0);
306	assert(y >= 0);
307	assert((y+height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
308
309	if (!kgem_check_batch(kgem, 3))
310		sna_blt_fill_begin(sna, blt);
311
312	assert(sna->kgem.mode == KGEM_BLT);
313	b = kgem->batch + kgem->nbatch;
314	kgem->nbatch += 3;
315	assert(kgem->nbatch < kgem->surface);
316
317	b[0] = blt->cmd;
318	b[1] = y << 16 | x;
319	b[2] = b[1] + (height << 16 | width);
320}
321
322static bool sna_blt_copy_init(struct sna *sna,
323			      struct sna_blt_state *blt,
324			      struct kgem_bo *src,
325			      struct kgem_bo *dst,
326			      int bpp,
327			      uint8_t alu)
328{
329	struct kgem *kgem = &sna->kgem;
330
331	assert(kgem_bo_can_blt (kgem, src));
332	assert(kgem_bo_can_blt (kgem, dst));
333
334	blt->bo[0] = src;
335	blt->bo[1] = dst;
336
337	blt->cmd = XY_SRC_COPY_BLT_CMD | (kgem->gen >= 0100 ? 8 : 6);
338	if (bpp == 32)
339		blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
340
341	blt->pitch[0] = src->pitch;
342	if (kgem->gen >= 040 && src->tiling) {
343		blt->cmd |= BLT_SRC_TILED;
344		blt->pitch[0] >>= 2;
345	}
346	assert(blt->pitch[0] <= MAXSHORT);
347
348	blt->pitch[1] = dst->pitch;
349	if (kgem->gen >= 040 && dst->tiling) {
350		blt->cmd |= BLT_DST_TILED;
351		blt->pitch[1] >>= 2;
352	}
353	assert(blt->pitch[1] <= MAXSHORT);
354
355	blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
356	blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1];
357	switch (bpp) {
358	default: assert(0);
359	case 32: blt->br13 |= 1 << 25; /* RGB8888 */
360	case 16: blt->br13 |= 1 << 24; /* RGB565 */
361	case 8: break;
362	}
363
364	kgem_set_mode(kgem, KGEM_BLT, dst);
365	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
366		kgem_submit(kgem);
367		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
368			return false;
369		_kgem_set_mode(kgem, KGEM_BLT);
370	}
371
372	sna->blt_state.fill_bo = 0;
373	return true;
374}
375
376static bool sna_blt_alpha_fixup_init(struct sna *sna,
377				     struct sna_blt_state *blt,
378				     struct kgem_bo *src,
379				     struct kgem_bo *dst,
380				     int bpp, uint32_t alpha)
381{
382	struct kgem *kgem = &sna->kgem;
383
384	DBG(("%s: dst handle=%d, src handle=%d, bpp=%d, fixup=%08x\n",
385	     __FUNCTION__, dst->handle, src->handle, bpp, alpha));
386	assert(kgem_bo_can_blt(kgem, src));
387	assert(kgem_bo_can_blt(kgem, dst));
388
389	blt->bo[0] = src;
390	blt->bo[1] = dst;
391
392	blt->cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
393	blt->pitch[0] = src->pitch;
394	if (kgem->gen >= 040 && src->tiling) {
395		blt->cmd |= BLT_SRC_TILED;
396		blt->pitch[0] >>= 2;
397	}
398	assert(blt->pitch[0] <= MAXSHORT);
399
400	blt->pitch[1] = dst->pitch;
401	if (kgem->gen >= 040 && dst->tiling) {
402		blt->cmd |= BLT_DST_TILED;
403		blt->pitch[1] >>= 2;
404	}
405	assert(blt->pitch[1] <= MAXSHORT);
406
407	blt->overwrites = 1;
408	blt->br13 = (0xfc << 16) | blt->pitch[1];
409	switch (bpp) {
410	default: assert(0);
411	case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
412		 blt->br13 |= 1 << 25; /* RGB8888 */
413	case 16: blt->br13 |= 1 << 24; /* RGB565 */
414	case 8: break;
415	}
416	blt->pixel = alpha;
417
418	kgem_set_mode(kgem, KGEM_BLT, dst);
419	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
420		kgem_submit(kgem);
421		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
422			return false;
423		_kgem_set_mode(kgem, KGEM_BLT);
424	}
425
426	sna->blt_state.fill_bo = 0;
427	return true;
428}
429
430static void sna_blt_alpha_fixup_one(struct sna *sna,
431				    const struct sna_blt_state *blt,
432				    int src_x, int src_y,
433				    int width, int height,
434				    int dst_x, int dst_y)
435{
436	struct kgem *kgem = &sna->kgem;
437	uint32_t *b;
438
439	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
440	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
441
442	assert(src_x >= 0);
443	assert(src_y >= 0);
444	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
445	assert(dst_x >= 0);
446	assert(dst_y >= 0);
447	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
448	assert(width > 0);
449	assert(height > 0);
450
451	if (!kgem_check_batch(kgem, 14) ||
452	    !kgem_check_reloc(kgem, 2)) {
453		_kgem_submit(kgem);
454		_kgem_set_mode(kgem, KGEM_BLT);
455	}
456
457	assert(sna->kgem.mode == KGEM_BLT);
458	b = kgem->batch + kgem->nbatch;
459	b[0] = blt->cmd;
460	b[1] = blt->br13;
461	b[2] = (dst_y << 16) | dst_x;
462	b[3] = ((dst_y + height) << 16) | (dst_x + width);
463	if (sna->kgem.gen >= 0100) {
464		*(uint64_t *)(b+4) =
465			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
466					 I915_GEM_DOMAIN_RENDER << 16 |
467					 I915_GEM_DOMAIN_RENDER |
468					 KGEM_RELOC_FENCED,
469					 0);
470		b[6] = blt->pitch[0];
471		b[7] = (src_y << 16) | src_x;
472		*(uint64_t *)(b+8) =
473			kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
474					 I915_GEM_DOMAIN_RENDER << 16 |
475					 KGEM_RELOC_FENCED,
476					 0);
477		b[10] = blt->pixel;
478		b[11] = blt->pixel;
479		b[12] = 0;
480		b[13] = 0;
481		kgem->nbatch += 14;
482	} else {
483		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
484				      I915_GEM_DOMAIN_RENDER << 16 |
485				      I915_GEM_DOMAIN_RENDER |
486				      KGEM_RELOC_FENCED,
487				      0);
488		b[5] = blt->pitch[0];
489		b[6] = (src_y << 16) | src_x;
490		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
491				      I915_GEM_DOMAIN_RENDER << 16 |
492				      KGEM_RELOC_FENCED,
493				      0);
494		b[8] = blt->pixel;
495		b[9] = blt->pixel;
496		b[10] = 0;
497		b[11] = 0;
498		kgem->nbatch += 12;
499	}
500	assert(kgem->nbatch < kgem->surface);
501}
502
503static void sna_blt_copy_one(struct sna *sna,
504			     const struct sna_blt_state *blt,
505			     int src_x, int src_y,
506			     int width, int height,
507			     int dst_x, int dst_y)
508{
509	struct kgem *kgem = &sna->kgem;
510	uint32_t *b;
511
512	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
513	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
514
515	assert(src_x >= 0);
516	assert(src_y >= 0);
517	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
518	assert(dst_x >= 0);
519	assert(dst_y >= 0);
520	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
521	assert(width > 0);
522	assert(height > 0);
523
524	/* Compare against a previous fill */
525	if (blt->overwrites &&
526	    kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) {
527		if (sna->kgem.gen >= 0100) {
528			if (kgem->nbatch >= 7 &&
529			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
530			    kgem->batch[kgem->nbatch-5] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
531			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
532				DBG(("%s: replacing last fill\n", __FUNCTION__));
533				if (kgem_check_batch(kgem, 3)) {
534					assert(kgem->mode == KGEM_BLT);
535					b = kgem->batch + kgem->nbatch - 7;
536					b[0] = blt->cmd;
537					b[1] = blt->br13;
538					b[6] = (src_y << 16) | src_x;
539					b[7] = blt->pitch[0];
540					*(uint64_t *)(b+8) =
541						kgem_add_reloc64(kgem, kgem->nbatch + 8 - 7, blt->bo[0],
542								 I915_GEM_DOMAIN_RENDER << 16 |
543								 KGEM_RELOC_FENCED,
544								 0);
545					kgem->nbatch += 3;
546					assert(kgem->nbatch < kgem->surface);
547					return;
548				}
549				kgem->nbatch -= 7;
550				kgem->nreloc--;
551			}
552		} else {
553			if (kgem->nbatch >= 6 &&
554			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
555			    kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
556			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
557				DBG(("%s: replacing last fill\n", __FUNCTION__));
558				if (kgem_check_batch(kgem, 8-6)) {
559					assert(kgem->mode == KGEM_BLT);
560					b = kgem->batch + kgem->nbatch - 6;
561					b[0] = blt->cmd;
562					b[1] = blt->br13;
563					b[5] = (src_y << 16) | src_x;
564					b[6] = blt->pitch[0];
565					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, blt->bo[0],
566							      I915_GEM_DOMAIN_RENDER << 16 |
567							      KGEM_RELOC_FENCED,
568							      0);
569					kgem->nbatch += 8 - 6;
570					assert(kgem->nbatch < kgem->surface);
571					return;
572				}
573				kgem->nbatch -= 6;
574				kgem->nreloc--;
575			}
576		}
577	}
578
579	if (!kgem_check_batch(kgem, 10) ||
580	    !kgem_check_reloc(kgem, 2)) {
581		_kgem_submit(kgem);
582		_kgem_set_mode(kgem, KGEM_BLT);
583	}
584
585	assert(sna->kgem.mode == KGEM_BLT);
586	b = kgem->batch + kgem->nbatch;
587	b[0] = blt->cmd;
588	b[1] = blt->br13;
589	b[2] = (dst_y << 16) | dst_x;
590	b[3] = ((dst_y + height) << 16) | (dst_x + width);
591	if (kgem->gen >= 0100) {
592		*(uint64_t *)(b+4) =
593			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
594					 I915_GEM_DOMAIN_RENDER << 16 |
595					 I915_GEM_DOMAIN_RENDER |
596					 KGEM_RELOC_FENCED,
597					 0);
598		b[6] = (src_y << 16) | src_x;
599		b[7] = blt->pitch[0];
600		*(uint64_t *)(b+8) =
601			kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
602					 I915_GEM_DOMAIN_RENDER << 16 |
603					 KGEM_RELOC_FENCED,
604					 0);
605		kgem->nbatch += 10;
606	} else {
607		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
608				      I915_GEM_DOMAIN_RENDER << 16 |
609				      I915_GEM_DOMAIN_RENDER |
610				      KGEM_RELOC_FENCED,
611				      0);
612		b[5] = (src_y << 16) | src_x;
613		b[6] = blt->pitch[0];
614		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
615				      I915_GEM_DOMAIN_RENDER << 16 |
616				      KGEM_RELOC_FENCED,
617				      0);
618		kgem->nbatch += 8;
619	}
620	assert(kgem->nbatch < kgem->surface);
621}
622
623bool
624sna_get_rgba_from_pixel(uint32_t pixel,
625			uint16_t *red,
626			uint16_t *green,
627			uint16_t *blue,
628			uint16_t *alpha,
629			uint32_t format)
630{
631	int rbits, bbits, gbits, abits;
632	int rshift, bshift, gshift, ashift;
633
634	rbits = PICT_FORMAT_R(format);
635	gbits = PICT_FORMAT_G(format);
636	bbits = PICT_FORMAT_B(format);
637	abits = PICT_FORMAT_A(format);
638
639	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
640		rshift = gshift = bshift = ashift = 0;
641	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
642		bshift = 0;
643		gshift = bbits;
644		rshift = gshift + gbits;
645		ashift = rshift + rbits;
646	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
647		rshift = 0;
648		gshift = rbits;
649		bshift = gshift + gbits;
650		ashift = bshift + bbits;
651	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
652		ashift = 0;
653		rshift = abits;
654		if (abits == 0)
655			rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
656		gshift = rshift + rbits;
657		bshift = gshift + gbits;
658	} else {
659		return false;
660	}
661
662	if (rbits) {
663		*red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits);
664		while (rbits < 16) {
665			*red |= *red >> rbits;
666			rbits <<= 1;
667		}
668	} else
669		*red = 0;
670
671	if (gbits) {
672		*green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits);
673		while (gbits < 16) {
674			*green |= *green >> gbits;
675			gbits <<= 1;
676		}
677	} else
678		*green = 0;
679
680	if (bbits) {
681		*blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits);
682		while (bbits < 16) {
683			*blue |= *blue >> bbits;
684			bbits <<= 1;
685		}
686	} else
687		*blue = 0;
688
689	if (abits) {
690		*alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits);
691		while (abits < 16) {
692			*alpha |= *alpha >> abits;
693			abits <<= 1;
694		}
695	} else
696		*alpha = 0xffff;
697
698	return true;
699}
700
701bool
702_sna_get_pixel_from_rgba(uint32_t * pixel,
703			uint16_t red,
704			uint16_t green,
705			uint16_t blue,
706			uint16_t alpha,
707			uint32_t format)
708{
709	int rbits, bbits, gbits, abits;
710	int rshift, bshift, gshift, ashift;
711
712	rbits = PICT_FORMAT_R(format);
713	gbits = PICT_FORMAT_G(format);
714	bbits = PICT_FORMAT_B(format);
715	abits = PICT_FORMAT_A(format);
716	if (abits == 0)
717	    abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
718
719	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
720		*pixel = alpha >> (16 - abits);
721		return true;
722	}
723
724	if (!PICT_FORMAT_COLOR(format))
725		return false;
726
727	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
728		bshift = 0;
729		gshift = bbits;
730		rshift = gshift + gbits;
731		ashift = rshift + rbits;
732	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
733		rshift = 0;
734		gshift = rbits;
735		bshift = gshift + gbits;
736		ashift = bshift + bbits;
737	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
738		ashift = 0;
739		rshift = abits;
740		gshift = rshift + rbits;
741		bshift = gshift + gbits;
742	} else
743		return false;
744
745	*pixel = 0;
746	*pixel |= (blue  >> (16 - bbits)) << bshift;
747	*pixel |= (green >> (16 - gbits)) << gshift;
748	*pixel |= (red   >> (16 - rbits)) << rshift;
749	*pixel |= (alpha >> (16 - abits)) << ashift;
750
751	return true;
752}
753
754uint32_t
755sna_rgba_for_color(uint32_t color, int depth)
756{
757	return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8);
758}
759
760uint32_t
761sna_rgba_to_color(uint32_t rgba, uint32_t format)
762{
763	return color_convert(rgba, PICT_a8r8g8b8, format);
764}
765
766static uint32_t
767get_pixel(PicturePtr picture)
768{
769	PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
770
771	DBG(("%s: %p\n", __FUNCTION__, pixmap));
772
773	if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
774		return 0;
775
776	switch (pixmap->drawable.bitsPerPixel) {
777	case 32: return *(uint32_t *)pixmap->devPrivate.ptr;
778	case 16: return *(uint16_t *)pixmap->devPrivate.ptr;
779	default: return *(uint8_t *)pixmap->devPrivate.ptr;
780	}
781}
782
783static uint32_t
784get_solid_color(PicturePtr picture, uint32_t format)
785{
786	if (picture->pSourcePict) {
787		PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict;
788		return color_convert(fill->color, PICT_a8r8g8b8, format);
789	} else
790		return color_convert(get_pixel(picture), picture->format, format);
791}
792
793static bool
794is_solid(PicturePtr picture)
795{
796	if (picture->pSourcePict) {
797		if (picture->pSourcePict->type == SourcePictTypeSolidFill)
798			return true;
799	}
800
801	if (picture->pDrawable) {
802		if (picture->pDrawable->width  == 1 &&
803		    picture->pDrawable->height == 1 &&
804		    picture->repeat)
805			return true;
806	}
807
808	return false;
809}
810
811bool
812sna_picture_is_solid(PicturePtr picture, uint32_t *color)
813{
814	if (!is_solid(picture))
815		return false;
816
817	if (color)
818		*color = get_solid_color(picture, PICT_a8r8g8b8);
819	return true;
820}
821
822static bool
823pixel_is_transparent(uint32_t pixel, uint32_t format)
824{
825	unsigned int abits;
826
827	abits = PICT_FORMAT_A(format);
828	if (!abits)
829		return false;
830
831	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
832	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
833		return (pixel & ((1 << abits) - 1)) == 0;
834	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
835		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
836		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
837		return (pixel >> ashift) == 0;
838	} else
839		return false;
840}
841
842static bool
843pixel_is_opaque(uint32_t pixel, uint32_t format)
844{
845	unsigned int abits;
846
847	abits = PICT_FORMAT_A(format);
848	if (!abits)
849		return true;
850
851	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
852	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
853		return (pixel & ((1 << abits) - 1)) == (unsigned)((1 << abits) - 1);
854	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
855		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
856		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
857		return (pixel >> ashift) == (unsigned)((1 << abits) - 1);
858	} else
859		return false;
860}
861
862static bool
863pixel_is_white(uint32_t pixel, uint32_t format)
864{
865	switch (PICT_FORMAT_TYPE(format)) {
866	case PICT_TYPE_A:
867	case PICT_TYPE_ARGB:
868	case PICT_TYPE_ABGR:
869	case PICT_TYPE_BGRA:
870		return pixel == ((1U << PICT_FORMAT_BPP(format)) - 1);
871	default:
872		return false;
873	}
874}
875
876static bool
877is_opaque_solid(PicturePtr picture)
878{
879	if (picture->pSourcePict) {
880		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
881		return (fill->color >> 24) == 0xff;
882	} else
883		return pixel_is_opaque(get_pixel(picture), picture->format);
884}
885
886static bool
887is_white(PicturePtr picture)
888{
889	if (picture->pSourcePict) {
890		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
891		return fill->color == 0xffffffff;
892	} else
893		return pixel_is_white(get_pixel(picture), picture->format);
894}
895
896static bool
897is_transparent(PicturePtr picture)
898{
899	if (picture->pSourcePict) {
900		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
901		return fill->color == 0;
902	} else
903		return pixel_is_transparent(get_pixel(picture), picture->format);
904}
905
906bool
907sna_composite_mask_is_opaque(PicturePtr mask)
908{
909	if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format))
910		return is_solid(mask) && is_white(mask);
911	else if (!PICT_FORMAT_A(mask->format))
912		return true;
913	else
914		return is_solid(mask) && is_opaque_solid(mask);
915}
916
917fastcall
918static void blt_composite_fill(struct sna *sna,
919			       const struct sna_composite_op *op,
920			       const struct sna_composite_rectangles *r)
921{
922	int x1, x2, y1, y2;
923
924	x1 = r->dst.x + op->dst.x;
925	y1 = r->dst.y + op->dst.y;
926	x2 = x1 + r->width;
927	y2 = y1 + r->height;
928
929	if (x1 < 0)
930		x1 = 0;
931	if (y1 < 0)
932		y1 = 0;
933
934	if (x2 > op->dst.width)
935		x2 = op->dst.width;
936	if (y2 > op->dst.height)
937		y2 = op->dst.height;
938
939	if (x2 <= x1 || y2 <= y1)
940		return;
941
942	sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1);
943}
944
945fastcall
946static void blt_composite_fill__cpu(struct sna *sna,
947				    const struct sna_composite_op *op,
948				    const struct sna_composite_rectangles *r)
949{
950	int x1, x2, y1, y2;
951
952	x1 = r->dst.x + op->dst.x;
953	y1 = r->dst.y + op->dst.y;
954	x2 = x1 + r->width;
955	y2 = y1 + r->height;
956
957	if (x1 < 0)
958		x1 = 0;
959	if (y1 < 0)
960		y1 = 0;
961
962	if (x2 > op->dst.width)
963		x2 = op->dst.width;
964	if (y2 > op->dst.height)
965		y2 = op->dst.height;
966
967	if (x2 <= x1 || y2 <= y1)
968		return;
969
970	assert(op->dst.pixmap->devPrivate.ptr);
971	assert(op->dst.pixmap->devKind);
972	pixman_fill(op->dst.pixmap->devPrivate.ptr,
973		    op->dst.pixmap->devKind / sizeof(uint32_t),
974		    op->dst.pixmap->drawable.bitsPerPixel,
975		    x1, y1, x2-x1, y2-y1,
976		    op->u.blt.pixel);
977}
978
979fastcall static void
980blt_composite_fill_box_no_offset__cpu(struct sna *sna,
981				      const struct sna_composite_op *op,
982				      const BoxRec *box)
983{
984	assert(box->x1 >= 0);
985	assert(box->y1 >= 0);
986	assert(box->x2 <= op->dst.pixmap->drawable.width);
987	assert(box->y2 <= op->dst.pixmap->drawable.height);
988
989	assert(op->dst.pixmap->devPrivate.ptr);
990	assert(op->dst.pixmap->devKind);
991	pixman_fill(op->dst.pixmap->devPrivate.ptr,
992		    op->dst.pixmap->devKind / sizeof(uint32_t),
993		    op->dst.pixmap->drawable.bitsPerPixel,
994		    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
995		    op->u.blt.pixel);
996}
997
998static void
999blt_composite_fill_boxes_no_offset__cpu(struct sna *sna,
1000					const struct sna_composite_op *op,
1001					const BoxRec *box, int n)
1002{
1003	do {
1004		assert(box->x1 >= 0);
1005		assert(box->y1 >= 0);
1006		assert(box->x2 <= op->dst.pixmap->drawable.width);
1007		assert(box->y2 <= op->dst.pixmap->drawable.height);
1008
1009		assert(op->dst.pixmap->devPrivate.ptr);
1010		assert(op->dst.pixmap->devKind);
1011		pixman_fill(op->dst.pixmap->devPrivate.ptr,
1012			    op->dst.pixmap->devKind / sizeof(uint32_t),
1013			    op->dst.pixmap->drawable.bitsPerPixel,
1014			    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
1015			    op->u.blt.pixel);
1016		box++;
1017	} while (--n);
1018}
1019
1020fastcall static void
1021blt_composite_fill_box__cpu(struct sna *sna,
1022			    const struct sna_composite_op *op,
1023			    const BoxRec *box)
1024{
1025	assert(box->x1 + op->dst.x >= 0);
1026	assert(box->y1 + op->dst.y >= 0);
1027	assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
1028	assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
1029
1030	assert(op->dst.pixmap->devPrivate.ptr);
1031	assert(op->dst.pixmap->devKind);
1032	pixman_fill(op->dst.pixmap->devPrivate.ptr,
1033		    op->dst.pixmap->devKind / sizeof(uint32_t),
1034		    op->dst.pixmap->drawable.bitsPerPixel,
1035		    box->x1 + op->dst.x, box->y1 + op->dst.y,
1036		    box->x2 - box->x1, box->y2 - box->y1,
1037		    op->u.blt.pixel);
1038}
1039
1040static void
1041blt_composite_fill_boxes__cpu(struct sna *sna,
1042			      const struct sna_composite_op *op,
1043			      const BoxRec *box, int n)
1044{
1045	do {
1046		assert(box->x1 + op->dst.x >= 0);
1047		assert(box->y1 + op->dst.y >= 0);
1048		assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
1049		assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
1050
1051		assert(op->dst.pixmap->devPrivate.ptr);
1052		assert(op->dst.pixmap->devKind);
1053		pixman_fill(op->dst.pixmap->devPrivate.ptr,
1054			    op->dst.pixmap->devKind / sizeof(uint32_t),
1055			    op->dst.pixmap->drawable.bitsPerPixel,
1056			    box->x1 + op->dst.x, box->y1 + op->dst.y,
1057			    box->x2 - box->x1, box->y2 - box->y1,
1058			    op->u.blt.pixel);
1059		box++;
1060	} while (--n);
1061}
1062
1063inline static void _sna_blt_fill_box(struct sna *sna,
1064				     const struct sna_blt_state *blt,
1065				     const BoxRec *box)
1066{
1067	struct kgem *kgem = &sna->kgem;
1068	uint32_t *b;
1069
1070	DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__,
1071	     box->x1, box->y1, box->x2, box->y2,
1072	     blt->pixel));
1073
1074	assert(box->x1 >= 0);
1075	assert(box->y1 >= 0);
1076	assert(box->y2 * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
1077
1078	if (!kgem_check_batch(kgem, 3))
1079		sna_blt_fill_begin(sna, blt);
1080
1081	assert(sna->kgem.mode == KGEM_BLT);
1082	b = kgem->batch + kgem->nbatch;
1083	kgem->nbatch += 3;
1084	assert(kgem->nbatch < kgem->surface);
1085
1086	b[0] = blt->cmd;
1087	*(uint64_t *)(b+1) = *(const uint64_t *)box;
1088}
1089
1090inline static void _sna_blt_fill_boxes(struct sna *sna,
1091				       const struct sna_blt_state *blt,
1092				       const BoxRec *box,
1093				       int nbox)
1094{
1095	struct kgem *kgem = &sna->kgem;
1096	uint32_t cmd = blt->cmd;
1097
1098	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1099
1100	if (!kgem_check_batch(kgem, 3))
1101		sna_blt_fill_begin(sna, blt);
1102
1103	do {
1104		uint32_t *b = kgem->batch + kgem->nbatch;
1105		int nbox_this_time;
1106
1107		assert(sna->kgem.mode == KGEM_BLT);
1108		nbox_this_time = nbox;
1109		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1110			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
1111		assert(nbox_this_time);
1112		nbox -= nbox_this_time;
1113
1114		kgem->nbatch += 3 * nbox_this_time;
1115		assert(kgem->nbatch < kgem->surface);
1116		while (nbox_this_time >= 8) {
1117			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1118			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1119			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1120			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1121			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
1122			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
1123			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
1124			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
1125			b += 24;
1126			nbox_this_time -= 8;
1127		}
1128		if (nbox_this_time & 4) {
1129			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1130			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1131			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1132			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1133			b += 12;
1134		}
1135		if (nbox_this_time & 2) {
1136			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1137			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1138			b += 6;
1139		}
1140		if (nbox_this_time & 1) {
1141			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1142		}
1143
1144		if (!nbox)
1145			return;
1146
1147		sna_blt_fill_begin(sna, blt);
1148	} while (1);
1149}
1150
1151static inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const BoxRec *box)
1152{
1153	if (box->x2 - box->x1 >= op->dst.width &&
1154	    box->y2 - box->y1 >= op->dst.height) {
1155		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1156		if (op->dst.bo == priv->gpu_bo) {
1157			priv->clear = true;
1158			priv->clear_color = op->u.blt.pixel;
1159			DBG(("%s: pixmap=%ld marking clear [%08x]\n",
1160			     __FUNCTION__,
1161			     op->dst.pixmap->drawable.serialNumber,
1162			     op->u.blt.pixel));
1163		}
1164	}
1165}
1166
1167fastcall static void blt_composite_fill_box_no_offset(struct sna *sna,
1168						      const struct sna_composite_op *op,
1169						      const BoxRec *box)
1170{
1171	_sna_blt_fill_box(sna, &op->u.blt, box);
1172	_sna_blt_maybe_clear(op, box);
1173}
1174
1175static void blt_composite_fill_boxes_no_offset(struct sna *sna,
1176					       const struct sna_composite_op *op,
1177					       const BoxRec *box, int n)
1178{
1179	_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
1180}
1181
1182static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna,
1183						       const struct sna_composite_op *op,
1184						       const BoxRec *box, int nbox)
1185{
1186	struct kgem *kgem = &sna->kgem;
1187	const struct sna_blt_state *blt = &op->u.blt;
1188	uint32_t cmd = blt->cmd;
1189
1190	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1191
1192	sna_vertex_lock(&sna->render);
1193	assert(kgem->mode == KGEM_BLT);
1194	if (!kgem_check_batch(kgem, 3)) {
1195		sna_vertex_wait__locked(&sna->render);
1196		sna_blt_fill_begin(sna, blt);
1197	}
1198
1199	do {
1200		uint32_t *b = kgem->batch + kgem->nbatch;
1201		int nbox_this_time;
1202
1203		assert(sna->kgem.mode == KGEM_BLT);
1204		nbox_this_time = nbox;
1205		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1206			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
1207		assert(nbox_this_time);
1208		nbox -= nbox_this_time;
1209
1210		kgem->nbatch += 3 * nbox_this_time;
1211		assert(kgem->nbatch < kgem->surface);
1212		sna_vertex_acquire__locked(&sna->render);
1213		sna_vertex_unlock(&sna->render);
1214
1215		while (nbox_this_time >= 8) {
1216			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1217			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1218			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1219			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1220			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
1221			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
1222			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
1223			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
1224			b += 24;
1225			nbox_this_time -= 8;
1226		}
1227		if (nbox_this_time & 4) {
1228			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1229			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1230			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1231			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1232			b += 12;
1233		}
1234		if (nbox_this_time & 2) {
1235			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1236			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1237			b += 6;
1238		}
1239		if (nbox_this_time & 1) {
1240			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1241		}
1242
1243		sna_vertex_lock(&sna->render);
1244		sna_vertex_release__locked(&sna->render);
1245		if (!nbox)
1246			break;
1247
1248		sna_vertex_wait__locked(&sna->render);
1249		sna_blt_fill_begin(sna, blt);
1250	} while (1);
1251	sna_vertex_unlock(&sna->render);
1252}
1253
1254fastcall static void blt_composite_fill_box(struct sna *sna,
1255					    const struct sna_composite_op *op,
1256					    const BoxRec *box)
1257{
1258	sna_blt_fill_one(sna, &op->u.blt,
1259			 box->x1 + op->dst.x,
1260			 box->y1 + op->dst.y,
1261			 box->x2 - box->x1,
1262			 box->y2 - box->y1);
1263	_sna_blt_maybe_clear(op, box);
1264}
1265
1266static void blt_composite_fill_boxes(struct sna *sna,
1267				     const struct sna_composite_op *op,
1268				     const BoxRec *box, int n)
1269{
1270	do {
1271		sna_blt_fill_one(sna, &op->u.blt,
1272				 box->x1 + op->dst.x, box->y1 + op->dst.y,
1273				 box->x2 - box->x1, box->y2 - box->y1);
1274		box++;
1275	} while (--n);
1276}
1277
1278static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y)
1279{
1280	union {
1281		uint64_t v;
1282		int16_t i[4];
1283	} vi;
1284	vi.v = *(uint64_t *)b;
1285	vi.i[0] += x;
1286	vi.i[1] += y;
1287	vi.i[2] += x;
1288	vi.i[3] += y;
1289	return vi.v;
1290}
1291
1292static void blt_composite_fill_boxes__thread(struct sna *sna,
1293					     const struct sna_composite_op *op,
1294					     const BoxRec *box, int nbox)
1295{
1296	struct kgem *kgem = &sna->kgem;
1297	const struct sna_blt_state *blt = &op->u.blt;
1298	uint32_t cmd = blt->cmd;
1299	int16_t dx = op->dst.x;
1300	int16_t dy = op->dst.y;
1301
1302	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1303
1304	sna_vertex_lock(&sna->render);
1305	assert(kgem->mode == KGEM_BLT);
1306	if (!kgem_check_batch(kgem, 3)) {
1307		sna_vertex_wait__locked(&sna->render);
1308		sna_blt_fill_begin(sna, blt);
1309	}
1310
1311	do {
1312		uint32_t *b = kgem->batch + kgem->nbatch;
1313		int nbox_this_time;
1314
1315		assert(sna->kgem.mode == KGEM_BLT);
1316		nbox_this_time = nbox;
1317		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1318			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
1319		assert(nbox_this_time);
1320		nbox -= nbox_this_time;
1321
1322		kgem->nbatch += 3 * nbox_this_time;
1323		assert(kgem->nbatch < kgem->surface);
1324		sna_vertex_acquire__locked(&sna->render);
1325		sna_vertex_unlock(&sna->render);
1326
1327		while (nbox_this_time >= 8) {
1328			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1329			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1330			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
1331			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
1332			b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy);
1333			b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy);
1334			b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy);
1335			b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy);
1336			b += 24;
1337			nbox_this_time -= 8;
1338		}
1339		if (nbox_this_time & 4) {
1340			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1341			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1342			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
1343			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
1344			b += 12;
1345		}
1346		if (nbox_this_time & 2) {
1347			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1348			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1349			b += 6;
1350		}
1351		if (nbox_this_time & 1) {
1352			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1353		}
1354
1355		sna_vertex_lock(&sna->render);
1356		sna_vertex_release__locked(&sna->render);
1357		if (!nbox)
1358			break;
1359
1360		sna_vertex_wait__locked(&sna->render);
1361		sna_blt_fill_begin(sna, blt);
1362	} while (1);
1363	sna_vertex_unlock(&sna->render);
1364}
1365
1366fastcall
1367static void blt_composite_nop(struct sna *sna,
1368			       const struct sna_composite_op *op,
1369			       const struct sna_composite_rectangles *r)
1370{
1371}
1372
1373fastcall static void blt_composite_nop_box(struct sna *sna,
1374					   const struct sna_composite_op *op,
1375					   const BoxRec *box)
1376{
1377}
1378
1379static void blt_composite_nop_boxes(struct sna *sna,
1380				    const struct sna_composite_op *op,
1381				    const BoxRec *box, int n)
1382{
1383}
1384
1385static bool
1386begin_blt(struct sna *sna,
1387	  struct sna_composite_op *op)
1388{
1389	if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) {
1390		kgem_submit(&sna->kgem);
1391		if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo))
1392			return false;
1393
1394		_kgem_set_mode(&sna->kgem, KGEM_BLT);
1395	}
1396
1397	return true;
1398}
1399
1400static bool
1401prepare_blt_nop(struct sna *sna,
1402		struct sna_composite_op *op)
1403{
1404	DBG(("%s\n", __FUNCTION__));
1405
1406	op->blt   = blt_composite_nop;
1407	op->box   = blt_composite_nop_box;
1408	op->boxes = blt_composite_nop_boxes;
1409	op->done  = nop_done;
1410	return true;
1411}
1412
1413static bool
1414prepare_blt_clear(struct sna *sna,
1415		  struct sna_composite_op *op)
1416{
1417	DBG(("%s\n", __FUNCTION__));
1418
1419	if (op->dst.bo == NULL) {
1420		op->blt   = blt_composite_fill__cpu;
1421		if (op->dst.x|op->dst.y) {
1422			op->box   = blt_composite_fill_box__cpu;
1423			op->boxes = blt_composite_fill_boxes__cpu;
1424			op->thread_boxes = blt_composite_fill_boxes__cpu;
1425		} else {
1426			op->box   = blt_composite_fill_box_no_offset__cpu;
1427			op->boxes = blt_composite_fill_boxes_no_offset__cpu;
1428			op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
1429		}
1430		op->done  = nop_done;
1431		op->u.blt.pixel = 0;
1432		return true;
1433	}
1434
1435	op->blt = blt_composite_fill;
1436	if (op->dst.x|op->dst.y) {
1437		op->box   = blt_composite_fill_box;
1438		op->boxes = blt_composite_fill_boxes;
1439		op->thread_boxes = blt_composite_fill_boxes__thread;
1440	} else {
1441		op->box   = blt_composite_fill_box_no_offset;
1442		op->boxes = blt_composite_fill_boxes_no_offset;
1443		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
1444	}
1445	op->done = nop_done;
1446
1447	if (!sna_blt_fill_init(sna, &op->u.blt,
1448			       op->dst.bo,
1449			       op->dst.pixmap->drawable.bitsPerPixel,
1450			       GXclear, 0))
1451		return false;
1452
1453	return begin_blt(sna, op);
1454}
1455
1456static bool
1457prepare_blt_fill(struct sna *sna,
1458		 struct sna_composite_op *op,
1459		 uint32_t pixel)
1460{
1461	DBG(("%s\n", __FUNCTION__));
1462
1463	if (op->dst.bo == NULL) {
1464		op->u.blt.pixel = pixel;
1465		op->blt = blt_composite_fill__cpu;
1466		if (op->dst.x|op->dst.y) {
1467			op->box   = blt_composite_fill_box__cpu;
1468			op->boxes = blt_composite_fill_boxes__cpu;
1469			op->thread_boxes = blt_composite_fill_boxes__cpu;
1470		} else {
1471			op->box   = blt_composite_fill_box_no_offset__cpu;
1472			op->boxes = blt_composite_fill_boxes_no_offset__cpu;
1473			op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
1474		}
1475		op->done = nop_done;
1476		return true;
1477	}
1478
1479	op->blt = blt_composite_fill;
1480	if (op->dst.x|op->dst.y) {
1481		op->box   = blt_composite_fill_box;
1482		op->boxes = blt_composite_fill_boxes;
1483		op->thread_boxes = blt_composite_fill_boxes__thread;
1484	} else {
1485		op->box   = blt_composite_fill_box_no_offset;
1486		op->boxes = blt_composite_fill_boxes_no_offset;
1487		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
1488	}
1489	op->done = nop_done;
1490
1491	if (!sna_blt_fill_init(sna, &op->u.blt, op->dst.bo,
1492			       op->dst.pixmap->drawable.bitsPerPixel,
1493			       GXcopy, pixel))
1494		return false;
1495
1496	return begin_blt(sna, op);
1497}
1498
1499fastcall static void
1500blt_composite_copy(struct sna *sna,
1501		   const struct sna_composite_op *op,
1502		   const struct sna_composite_rectangles *r)
1503{
1504	int x1, x2, y1, y2;
1505	int src_x, src_y;
1506
1507	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
1508	     __FUNCTION__,
1509	     r->src.x, r->src.y,
1510	     r->dst.x, r->dst.y,
1511	     r->width, r->height));
1512
1513	/* XXX higher layer should have clipped? */
1514
1515	x1 = r->dst.x + op->dst.x;
1516	y1 = r->dst.y + op->dst.y;
1517	x2 = x1 + r->width;
1518	y2 = y1 + r->height;
1519
1520	src_x = r->src.x - x1 + op->u.blt.sx;
1521	src_y = r->src.y - y1 + op->u.blt.sy;
1522
1523	/* clip against dst */
1524	if (x1 < 0)
1525		x1 = 0;
1526	if (y1 < 0)
1527		y1 = 0;
1528
1529	if (x2 > op->dst.width)
1530		x2 = op->dst.width;
1531
1532	if (y2 > op->dst.height)
1533		y2 = op->dst.height;
1534
1535	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
1536
1537	if (x2 <= x1 || y2 <= y1)
1538		return;
1539
1540	sna_blt_copy_one(sna, &op->u.blt,
1541			 x1 + src_x, y1 + src_y,
1542			 x2 - x1, y2 - y1,
1543			 x1, y1);
1544}
1545
1546fastcall static void blt_composite_copy_box(struct sna *sna,
1547					    const struct sna_composite_op *op,
1548					    const BoxRec *box)
1549{
1550	DBG(("%s: box (%d, %d), (%d, %d)\n",
1551	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1552	sna_blt_copy_one(sna, &op->u.blt,
1553			 box->x1 + op->u.blt.sx,
1554			 box->y1 + op->u.blt.sy,
1555			 box->x2 - box->x1,
1556			 box->y2 - box->y1,
1557			 box->x1 + op->dst.x,
1558			 box->y1 + op->dst.y);
1559}
1560
1561static void blt_composite_copy_boxes(struct sna *sna,
1562				     const struct sna_composite_op *op,
1563				     const BoxRec *box, int nbox)
1564{
1565	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1566	do {
1567		DBG(("%s: box (%d, %d), (%d, %d)\n",
1568		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1569		sna_blt_copy_one(sna, &op->u.blt,
1570				 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1571				 box->x2 - box->x1, box->y2 - box->y1,
1572				 box->x1 + op->dst.x, box->y1 + op->dst.y);
1573		box++;
1574	} while(--nbox);
1575}
1576
1577static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
1578{
1579	x += v & 0xffff;
1580	y += v >> 16;
1581	return (uint16_t)y << 16 | x;
1582}
1583
1584static void blt_composite_copy_boxes__thread(struct sna *sna,
1585					     const struct sna_composite_op *op,
1586					     const BoxRec *box, int nbox)
1587{
1588	struct kgem *kgem = &sna->kgem;
1589	int dst_dx = op->dst.x;
1590	int dst_dy = op->dst.y;
1591	int src_dx = op->src.offset[0];
1592	int src_dy = op->src.offset[1];
1593	uint32_t cmd = op->u.blt.cmd;
1594	uint32_t br13 = op->u.blt.br13;
1595	struct kgem_bo *src_bo = op->u.blt.bo[0];
1596	struct kgem_bo *dst_bo = op->u.blt.bo[1];
1597	int src_pitch = op->u.blt.pitch[0];
1598
1599	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1600
1601	sna_vertex_lock(&sna->render);
1602
1603	if ((dst_dx | dst_dy) == 0) {
1604		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
1605		do {
1606			int nbox_this_time;
1607
1608			nbox_this_time = nbox;
1609			if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1610				nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
1611			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1612				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1613			assert(nbox_this_time);
1614			nbox -= nbox_this_time;
1615
1616			assert(sna->kgem.mode == KGEM_BLT);
1617			do {
1618				uint32_t *b = kgem->batch + kgem->nbatch;
1619
1620				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1621				     __FUNCTION__,
1622				     box->x1, box->y1,
1623				     box->x2 - box->x1, box->y2 - box->y1));
1624
1625				assert(box->x1 + src_dx >= 0);
1626				assert(box->y1 + src_dy >= 0);
1627				assert(box->x1 + src_dx <= INT16_MAX);
1628				assert(box->y1 + src_dy <= INT16_MAX);
1629
1630				assert(box->x1 >= 0);
1631				assert(box->y1 >= 0);
1632
1633				*(uint64_t *)&b[0] = hdr;
1634				*(uint64_t *)&b[2] = *(const uint64_t *)box;
1635				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1636						      I915_GEM_DOMAIN_RENDER << 16 |
1637						      I915_GEM_DOMAIN_RENDER |
1638						      KGEM_RELOC_FENCED,
1639						      0);
1640				b[5] = add2(b[2], src_dx, src_dy);
1641				b[6] = src_pitch;
1642				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1643						      I915_GEM_DOMAIN_RENDER << 16 |
1644						      KGEM_RELOC_FENCED,
1645						      0);
1646				kgem->nbatch += 8;
1647				assert(kgem->nbatch < kgem->surface);
1648				box++;
1649			} while (--nbox_this_time);
1650
1651			if (!nbox)
1652				break;
1653
1654			_kgem_submit(kgem);
1655			_kgem_set_mode(kgem, KGEM_BLT);
1656		} while (1);
1657	} else {
1658		do {
1659			int nbox_this_time;
1660
1661			nbox_this_time = nbox;
1662			if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1663				nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
1664			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1665				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1666			assert(nbox_this_time);
1667			nbox -= nbox_this_time;
1668
1669			assert(sna->kgem.mode == KGEM_BLT);
1670			do {
1671				uint32_t *b = kgem->batch + kgem->nbatch;
1672
1673				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1674				     __FUNCTION__,
1675				     box->x1, box->y1,
1676				     box->x2 - box->x1, box->y2 - box->y1));
1677
1678				assert(box->x1 + src_dx >= 0);
1679				assert(box->y1 + src_dy >= 0);
1680
1681				assert(box->x1 + dst_dx >= 0);
1682				assert(box->y1 + dst_dy >= 0);
1683
1684				b[0] = cmd;
1685				b[1] = br13;
1686				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
1687				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
1688				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1689						      I915_GEM_DOMAIN_RENDER << 16 |
1690						      I915_GEM_DOMAIN_RENDER |
1691						      KGEM_RELOC_FENCED,
1692						      0);
1693				b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
1694				b[6] = src_pitch;
1695				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1696						      I915_GEM_DOMAIN_RENDER << 16 |
1697						      KGEM_RELOC_FENCED,
1698						      0);
1699				kgem->nbatch += 8;
1700				assert(kgem->nbatch < kgem->surface);
1701				box++;
1702			} while (--nbox_this_time);
1703
1704			if (!nbox)
1705				break;
1706
1707			_kgem_submit(kgem);
1708			_kgem_set_mode(kgem, KGEM_BLT);
1709		} while (1);
1710	}
1711	sna_vertex_unlock(&sna->render);
1712}
1713
1714static void blt_composite_copy_boxes__thread64(struct sna *sna,
1715					       const struct sna_composite_op *op,
1716					       const BoxRec *box, int nbox)
1717{
1718	struct kgem *kgem = &sna->kgem;
1719	int dst_dx = op->dst.x;
1720	int dst_dy = op->dst.y;
1721	int src_dx = op->src.offset[0];
1722	int src_dy = op->src.offset[1];
1723	uint32_t cmd = op->u.blt.cmd;
1724	uint32_t br13 = op->u.blt.br13;
1725	struct kgem_bo *src_bo = op->u.blt.bo[0];
1726	struct kgem_bo *dst_bo = op->u.blt.bo[1];
1727	int src_pitch = op->u.blt.pitch[0];
1728
1729	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1730
1731	sna_vertex_lock(&sna->render);
1732
1733	if ((dst_dx | dst_dy) == 0) {
1734		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
1735		do {
1736			int nbox_this_time;
1737
1738			nbox_this_time = nbox;
1739			if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1740				nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 10;
1741			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1742				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1743			assert(nbox_this_time);
1744			nbox -= nbox_this_time;
1745
1746			assert(kgem->mode == KGEM_BLT);
1747			do {
1748				uint32_t *b = kgem->batch + kgem->nbatch;
1749
1750				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1751				     __FUNCTION__,
1752				     box->x1, box->y1,
1753				     box->x2 - box->x1, box->y2 - box->y1));
1754
1755				assert(box->x1 + src_dx >= 0);
1756				assert(box->y1 + src_dy >= 0);
1757				assert(box->x1 + src_dx <= INT16_MAX);
1758				assert(box->y1 + src_dy <= INT16_MAX);
1759
1760				assert(box->x1 >= 0);
1761				assert(box->y1 >= 0);
1762
1763				*(uint64_t *)&b[0] = hdr;
1764				*(uint64_t *)&b[2] = *(const uint64_t *)box;
1765				*(uint64_t *)(b+4) =
1766					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1767							 I915_GEM_DOMAIN_RENDER << 16 |
1768							 I915_GEM_DOMAIN_RENDER |
1769							 KGEM_RELOC_FENCED,
1770							 0);
1771				b[6] = add2(b[2], src_dx, src_dy);
1772				b[7] = src_pitch;
1773				*(uint64_t *)(b+8) =
1774					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1775							 I915_GEM_DOMAIN_RENDER << 16 |
1776							 KGEM_RELOC_FENCED,
1777							 0);
1778				kgem->nbatch += 10;
1779				assert(kgem->nbatch < kgem->surface);
1780				box++;
1781			} while (--nbox_this_time);
1782
1783			if (!nbox)
1784				break;
1785
1786			_kgem_submit(kgem);
1787			_kgem_set_mode(kgem, KGEM_BLT);
1788		} while (1);
1789	} else {
1790		do {
1791			int nbox_this_time;
1792
1793			nbox_this_time = nbox;
1794			if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1795				nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 10;
1796			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1797				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1798			assert(nbox_this_time);
1799			nbox -= nbox_this_time;
1800
1801			assert(kgem->mode == KGEM_BLT);
1802			do {
1803				uint32_t *b = kgem->batch + kgem->nbatch;
1804
1805				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1806				     __FUNCTION__,
1807				     box->x1, box->y1,
1808				     box->x2 - box->x1, box->y2 - box->y1));
1809
1810				assert(box->x1 + src_dx >= 0);
1811				assert(box->y1 + src_dy >= 0);
1812
1813				assert(box->x1 + dst_dx >= 0);
1814				assert(box->y1 + dst_dy >= 0);
1815
1816				b[0] = cmd;
1817				b[1] = br13;
1818				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
1819				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
1820				*(uint64_t *)(b+4) =
1821					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1822							 I915_GEM_DOMAIN_RENDER << 16 |
1823							 I915_GEM_DOMAIN_RENDER |
1824							 KGEM_RELOC_FENCED,
1825							 0);
1826				b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
1827				b[7] = src_pitch;
1828				*(uint64_t *)(b+8) =
1829					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1830							 I915_GEM_DOMAIN_RENDER << 16 |
1831							 KGEM_RELOC_FENCED,
1832							 0);
1833				kgem->nbatch += 10;
1834				assert(kgem->nbatch < kgem->surface);
1835				box++;
1836			} while (--nbox_this_time);
1837
1838			if (!nbox)
1839				break;
1840
1841			_kgem_submit(kgem);
1842			_kgem_set_mode(kgem, KGEM_BLT);
1843		} while (1);
1844	}
1845	sna_vertex_unlock(&sna->render);
1846}
1847
1848fastcall static void
1849blt_composite_copy_with_alpha(struct sna *sna,
1850			      const struct sna_composite_op *op,
1851			      const struct sna_composite_rectangles *r)
1852{
1853	int x1, x2, y1, y2;
1854	int src_x, src_y;
1855
1856	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
1857	     __FUNCTION__,
1858	     r->src.x, r->src.y,
1859	     r->dst.x, r->dst.y,
1860	     r->width, r->height));
1861
1862	/* XXX higher layer should have clipped? */
1863
1864	x1 = r->dst.x + op->dst.x;
1865	y1 = r->dst.y + op->dst.y;
1866	x2 = x1 + r->width;
1867	y2 = y1 + r->height;
1868
1869	src_x = r->src.x - x1 + op->u.blt.sx;
1870	src_y = r->src.y - y1 + op->u.blt.sy;
1871
1872	/* clip against dst */
1873	if (x1 < 0)
1874		x1 = 0;
1875	if (y1 < 0)
1876		y1 = 0;
1877
1878	if (x2 > op->dst.width)
1879		x2 = op->dst.width;
1880
1881	if (y2 > op->dst.height)
1882		y2 = op->dst.height;
1883
1884	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
1885
1886	if (x2 <= x1 || y2 <= y1)
1887		return;
1888
1889	sna_blt_alpha_fixup_one(sna, &op->u.blt,
1890				x1 + src_x, y1 + src_y,
1891				x2 - x1, y2 - y1,
1892				x1, y1);
1893}
1894
1895fastcall static void
1896blt_composite_copy_box_with_alpha(struct sna *sna,
1897				  const struct sna_composite_op *op,
1898				  const BoxRec *box)
1899{
1900	DBG(("%s: box (%d, %d), (%d, %d)\n",
1901	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1902	sna_blt_alpha_fixup_one(sna, &op->u.blt,
1903				box->x1 + op->u.blt.sx,
1904				box->y1 + op->u.blt.sy,
1905				box->x2 - box->x1,
1906				box->y2 - box->y1,
1907				box->x1 + op->dst.x,
1908				box->y1 + op->dst.y);
1909}
1910
1911static void
1912blt_composite_copy_boxes_with_alpha(struct sna *sna,
1913				    const struct sna_composite_op *op,
1914				    const BoxRec *box, int nbox)
1915{
1916	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1917	do {
1918		DBG(("%s: box (%d, %d), (%d, %d)\n",
1919		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1920		sna_blt_alpha_fixup_one(sna, &op->u.blt,
1921					box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1922					box->x2 - box->x1, box->y2 - box->y1,
1923					box->x1 + op->dst.x, box->y1 + op->dst.y);
1924		box++;
1925	} while(--nbox);
1926}
1927
1928static bool
1929prepare_blt_copy(struct sna *sna,
1930		 struct sna_composite_op *op,
1931		 struct kgem_bo *bo,
1932		 uint32_t alpha_fixup)
1933{
1934	PixmapPtr src = op->u.blt.src_pixmap;
1935
1936	assert(op->dst.bo);
1937	assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo));
1938	assert(kgem_bo_can_blt(&sna->kgem, bo));
1939
1940	kgem_set_mode(&sna->kgem, KGEM_BLT, op->dst.bo);
1941	if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) {
1942		kgem_submit(&sna->kgem);
1943		if (!kgem_check_many_bo_fenced(&sna->kgem,
1944					       op->dst.bo, bo, NULL)) {
1945			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
1946			return sna_tiling_blt_composite(sna, op, bo,
1947							src->drawable.bitsPerPixel,
1948							alpha_fixup);
1949		}
1950		_kgem_set_mode(&sna->kgem, KGEM_BLT);
1951	}
1952
1953	DBG(("%s\n", __FUNCTION__));
1954
1955	if (sna->kgem.gen >= 060 && op->dst.bo == bo)
1956		op->done = gen6_blt_copy_done;
1957	else
1958		op->done = nop_done;
1959
1960	if (alpha_fixup) {
1961		op->blt   = blt_composite_copy_with_alpha;
1962		op->box   = blt_composite_copy_box_with_alpha;
1963		op->boxes = blt_composite_copy_boxes_with_alpha;
1964
1965		if (!sna_blt_alpha_fixup_init(sna, &op->u.blt, bo, op->dst.bo,
1966					      src->drawable.bitsPerPixel,
1967					      alpha_fixup))
1968			return false;
1969	} else {
1970		op->blt   = blt_composite_copy;
1971		op->box   = blt_composite_copy_box;
1972		op->boxes = blt_composite_copy_boxes;
1973		if (sna->kgem.gen >= 0100)
1974			op->thread_boxes = blt_composite_copy_boxes__thread64;
1975		else
1976			op->thread_boxes = blt_composite_copy_boxes__thread;
1977
1978		if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
1979				       src->drawable.bitsPerPixel,
1980				       GXcopy))
1981			return false;
1982	}
1983
1984	return true;
1985}
1986
1987fastcall static void
1988blt_put_composite__cpu(struct sna *sna,
1989		       const struct sna_composite_op *op,
1990		       const struct sna_composite_rectangles *r)
1991{
1992	PixmapPtr dst = op->dst.pixmap;
1993	PixmapPtr src = op->u.blt.src_pixmap;
1994	assert(src->devPrivate.ptr);
1995	assert(src->devKind);
1996	assert(dst->devPrivate.ptr);
1997	assert(dst->devKind);
1998	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
1999		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2000		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
2001		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
2002		   r->width, r->height);
2003}
2004
2005fastcall static void
2006blt_put_composite_box__cpu(struct sna *sna,
2007			   const struct sna_composite_op *op,
2008			   const BoxRec *box)
2009{
2010	PixmapPtr dst = op->dst.pixmap;
2011	PixmapPtr src = op->u.blt.src_pixmap;
2012	assert(src->devPrivate.ptr);
2013	assert(src->devKind);
2014	assert(dst->devPrivate.ptr);
2015	assert(dst->devKind);
2016	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2017		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2018		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2019		   box->x1 + op->dst.x, box->y1 + op->dst.y,
2020		   box->x2-box->x1, box->y2-box->y1);
2021}
2022
2023static void
2024blt_put_composite_boxes__cpu(struct sna *sna,
2025			     const struct sna_composite_op *op,
2026			     const BoxRec *box, int n)
2027{
2028	PixmapPtr dst = op->dst.pixmap;
2029	PixmapPtr src = op->u.blt.src_pixmap;
2030	assert(src->devPrivate.ptr);
2031	assert(src->devKind);
2032	assert(dst->devPrivate.ptr);
2033	assert(dst->devKind);
2034	do {
2035		memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2036			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2037			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2038			   box->x1 + op->dst.x, box->y1 + op->dst.y,
2039			   box->x2-box->x1, box->y2-box->y1);
2040		box++;
2041	} while (--n);
2042}
2043
2044fastcall static void
2045blt_put_composite_with_alpha__cpu(struct sna *sna,
2046				  const struct sna_composite_op *op,
2047				  const struct sna_composite_rectangles *r)
2048{
2049	PixmapPtr dst = op->dst.pixmap;
2050	PixmapPtr src = op->u.blt.src_pixmap;
2051	assert(src->devPrivate.ptr);
2052	assert(src->devKind);
2053	assert(dst->devPrivate.ptr);
2054	assert(dst->devKind);
2055	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2056		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2057		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
2058		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
2059		   r->width, r->height,
2060		   0xffffffff, op->u.blt.pixel);
2061
2062}
2063
2064fastcall static void
2065blt_put_composite_box_with_alpha__cpu(struct sna *sna,
2066				      const struct sna_composite_op *op,
2067				      const BoxRec *box)
2068{
2069	PixmapPtr dst = op->dst.pixmap;
2070	PixmapPtr src = op->u.blt.src_pixmap;
2071	assert(src->devPrivate.ptr);
2072	assert(src->devKind);
2073	assert(dst->devPrivate.ptr);
2074	assert(dst->devKind);
2075	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2076		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2077		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2078		   box->x1 + op->dst.x, box->y1 + op->dst.y,
2079		   box->x2-box->x1, box->y2-box->y1,
2080		   0xffffffff, op->u.blt.pixel);
2081}
2082
2083static void
2084blt_put_composite_boxes_with_alpha__cpu(struct sna *sna,
2085					const struct sna_composite_op *op,
2086					const BoxRec *box, int n)
2087{
2088	PixmapPtr dst = op->dst.pixmap;
2089	PixmapPtr src = op->u.blt.src_pixmap;
2090	assert(src->devPrivate.ptr);
2091	assert(src->devKind);
2092	assert(dst->devPrivate.ptr);
2093	assert(dst->devKind);
2094	do {
2095		memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2096			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2097			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2098			   box->x1 + op->dst.x, box->y1 + op->dst.y,
2099			   box->x2-box->x1, box->y2-box->y1,
2100			   0xffffffff, op->u.blt.pixel);
2101		box++;
2102	} while (--n);
2103}
2104
2105fastcall static void
2106blt_put_composite(struct sna *sna,
2107		  const struct sna_composite_op *op,
2108		  const struct sna_composite_rectangles *r)
2109{
2110	PixmapPtr dst = op->dst.pixmap;
2111	PixmapPtr src = op->u.blt.src_pixmap;
2112	struct sna_pixmap *dst_priv = sna_pixmap(dst);
2113	int pitch = src->devKind;
2114	char *data = src->devPrivate.ptr;
2115	int bpp = src->drawable.bitsPerPixel;
2116
2117	int16_t dst_x = r->dst.x + op->dst.x;
2118	int16_t dst_y = r->dst.y + op->dst.y;
2119	int16_t src_x = r->src.x + op->u.blt.sx;
2120	int16_t src_y = r->src.y + op->u.blt.sy;
2121
2122	if (!dst_priv->pinned &&
2123	    dst_x <= 0 && dst_y <= 0 &&
2124	    dst_x + r->width >= op->dst.width &&
2125	    dst_y + r->height >= op->dst.height) {
2126		data += (src_x - dst_x) * bpp / 8;
2127		data += (src_y - dst_y) * pitch;
2128
2129		assert(op->dst.bo == dst_priv->gpu_bo);
2130		sna_replace(sna, op->dst.pixmap, data, pitch);
2131	} else {
2132		BoxRec box;
2133		bool ok;
2134
2135		box.x1 = dst_x;
2136		box.y1 = dst_y;
2137		box.x2 = dst_x + r->width;
2138		box.y2 = dst_y + r->height;
2139
2140		ok = sna_write_boxes(sna, dst,
2141				     dst_priv->gpu_bo, 0, 0,
2142				     data, pitch, src_x, src_y,
2143				     &box, 1);
2144		assert(ok);
2145		(void)ok;
2146	}
2147}
2148
2149fastcall static void blt_put_composite_box(struct sna *sna,
2150					   const struct sna_composite_op *op,
2151					   const BoxRec *box)
2152{
2153	PixmapPtr src = op->u.blt.src_pixmap;
2154	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2155
2156	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
2157	     op->u.blt.sx, op->u.blt.sy,
2158	     op->dst.x, op->dst.y));
2159
2160	assert(src->devPrivate.ptr);
2161	assert(src->devKind);
2162	if (!dst_priv->pinned &&
2163	    box->x2 - box->x1 == op->dst.width &&
2164	    box->y2 - box->y1 == op->dst.height) {
2165		int pitch = src->devKind;
2166		int bpp = src->drawable.bitsPerPixel / 8;
2167		char *data = src->devPrivate.ptr;
2168
2169		data += (box->y1 + op->u.blt.sy) * pitch;
2170		data += (box->x1 + op->u.blt.sx) * bpp;
2171
2172		assert(op->dst.bo == dst_priv->gpu_bo);
2173		sna_replace(sna, op->dst.pixmap, data, pitch);
2174	} else {
2175		bool ok;
2176
2177		ok = sna_write_boxes(sna, op->dst.pixmap,
2178				     op->dst.bo, op->dst.x, op->dst.y,
2179				     src->devPrivate.ptr,
2180				     src->devKind,
2181				     op->u.blt.sx, op->u.blt.sy,
2182				     box, 1);
2183		assert(ok);
2184		(void)ok;
2185	}
2186}
2187
2188static void blt_put_composite_boxes(struct sna *sna,
2189				    const struct sna_composite_op *op,
2190				    const BoxRec *box, int n)
2191{
2192	PixmapPtr src = op->u.blt.src_pixmap;
2193	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2194
2195	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
2196	     op->u.blt.sx, op->u.blt.sy,
2197	     op->dst.x, op->dst.y,
2198	     box->x1, box->y1, box->x2, box->y2, n));
2199
2200	assert(src->devPrivate.ptr);
2201	assert(src->devKind);
2202	if (n == 1 && !dst_priv->pinned &&
2203	    box->x2 - box->x1 == op->dst.width &&
2204	    box->y2 - box->y1 == op->dst.height) {
2205		int pitch = src->devKind;
2206		int bpp = src->drawable.bitsPerPixel / 8;
2207		char *data = src->devPrivate.ptr;
2208
2209		data += (box->y1 + op->u.blt.sy) * pitch;
2210		data += (box->x1 + op->u.blt.sx) * bpp;
2211
2212		assert(op->dst.bo == dst_priv->gpu_bo);
2213		sna_replace(sna, op->dst.pixmap, data, pitch);
2214	} else {
2215		bool ok;
2216
2217		ok = sna_write_boxes(sna, op->dst.pixmap,
2218				     op->dst.bo, op->dst.x, op->dst.y,
2219				     src->devPrivate.ptr,
2220				     src->devKind,
2221				     op->u.blt.sx, op->u.blt.sy,
2222				     box, n);
2223		assert(ok);
2224		(void)ok;
2225	}
2226}
2227
2228fastcall static void
2229blt_put_composite_with_alpha(struct sna *sna,
2230			     const struct sna_composite_op *op,
2231			     const struct sna_composite_rectangles *r)
2232{
2233	PixmapPtr dst = op->dst.pixmap;
2234	PixmapPtr src = op->u.blt.src_pixmap;
2235	struct sna_pixmap *dst_priv = sna_pixmap(dst);
2236	int pitch = src->devKind;
2237	char *data = src->devPrivate.ptr;
2238
2239	int16_t dst_x = r->dst.x + op->dst.x;
2240	int16_t dst_y = r->dst.y + op->dst.y;
2241	int16_t src_x = r->src.x + op->u.blt.sx;
2242	int16_t src_y = r->src.y + op->u.blt.sy;
2243
2244	assert(src->devPrivate.ptr);
2245	assert(src->devKind);
2246
2247	if (!dst_priv->pinned &&
2248	    dst_x <= 0 && dst_y <= 0 &&
2249	    dst_x + r->width >= op->dst.width &&
2250	    dst_y + r->height >= op->dst.height) {
2251		int bpp = dst->drawable.bitsPerPixel / 8;
2252
2253		data += (src_x - dst_x) * bpp;
2254		data += (src_y - dst_y) * pitch;
2255
2256		assert(op->dst.bo == dst_priv->gpu_bo);
2257		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2258				 0xffffffff, op->u.blt.pixel);
2259	} else {
2260		BoxRec box;
2261
2262		box.x1 = dst_x;
2263		box.y1 = dst_y;
2264		box.x2 = dst_x + r->width;
2265		box.y2 = dst_y + r->height;
2266
2267		sna_write_boxes__xor(sna, dst,
2268				     dst_priv->gpu_bo, 0, 0,
2269				     data, pitch, src_x, src_y,
2270				     &box, 1,
2271				     0xffffffff, op->u.blt.pixel);
2272	}
2273}
2274
2275fastcall static void
2276blt_put_composite_box_with_alpha(struct sna *sna,
2277				 const struct sna_composite_op *op,
2278				 const BoxRec *box)
2279{
2280	PixmapPtr src = op->u.blt.src_pixmap;
2281	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2282
2283	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
2284	     op->u.blt.sx, op->u.blt.sy,
2285	     op->dst.x, op->dst.y));
2286
2287	assert(src->devPrivate.ptr);
2288	assert(src->devKind);
2289
2290	if (!dst_priv->pinned &&
2291	    box->x2 - box->x1 == op->dst.width &&
2292	    box->y2 - box->y1 == op->dst.height) {
2293		int pitch = src->devKind;
2294		int bpp = src->drawable.bitsPerPixel / 8;
2295		char *data = src->devPrivate.ptr;
2296
2297		data += (box->y1 + op->u.blt.sy) * pitch;
2298		data += (box->x1 + op->u.blt.sx) * bpp;
2299
2300		assert(op->dst.bo == dst_priv->gpu_bo);
2301		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2302				 0xffffffff, op->u.blt.pixel);
2303	} else {
2304		sna_write_boxes__xor(sna, op->dst.pixmap,
2305				     op->dst.bo, op->dst.x, op->dst.y,
2306				     src->devPrivate.ptr,
2307				     src->devKind,
2308				     op->u.blt.sx, op->u.blt.sy,
2309				     box, 1,
2310				     0xffffffff, op->u.blt.pixel);
2311	}
2312}
2313
2314static void
2315blt_put_composite_boxes_with_alpha(struct sna *sna,
2316				   const struct sna_composite_op *op,
2317				   const BoxRec *box, int n)
2318{
2319	PixmapPtr src = op->u.blt.src_pixmap;
2320	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2321
2322	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
2323	     op->u.blt.sx, op->u.blt.sy,
2324	     op->dst.x, op->dst.y,
2325	     box->x1, box->y1, box->x2, box->y2, n));
2326
2327	assert(src->devPrivate.ptr);
2328	assert(src->devKind);
2329
2330	if (n == 1 && !dst_priv->pinned &&
2331	    box->x2 - box->x1 == op->dst.width &&
2332	    box->y2 - box->y1 == op->dst.height) {
2333		int pitch = src->devKind;
2334		int bpp = src->drawable.bitsPerPixel / 8;
2335		char *data = src->devPrivate.ptr;
2336
2337		data += (box->y1 + op->u.blt.sy) * pitch;
2338		data += (box->x1 + op->u.blt.sx) * bpp;
2339
2340		assert(dst_priv->gpu_bo == op->dst.bo);
2341		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2342				 0xffffffff, op->u.blt.pixel);
2343	} else {
2344		sna_write_boxes__xor(sna, op->dst.pixmap,
2345				     op->dst.bo, op->dst.x, op->dst.y,
2346				     src->devPrivate.ptr,
2347				     src->devKind,
2348				     op->u.blt.sx, op->u.blt.sy,
2349				     box, n,
2350				     0xffffffff, op->u.blt.pixel);
2351	}
2352}
2353
2354static bool
2355prepare_blt_put(struct sna *sna,
2356		struct sna_composite_op *op,
2357		uint32_t alpha_fixup)
2358{
2359	DBG(("%s\n", __FUNCTION__));
2360
2361	assert(!sna_pixmap(op->dst.pixmap)->clear);
2362
2363	if (op->dst.bo) {
2364		assert(op->dst.bo == sna_pixmap(op->dst.pixmap)->gpu_bo);
2365		if (alpha_fixup) {
2366			op->u.blt.pixel = alpha_fixup;
2367			op->blt   = blt_put_composite_with_alpha;
2368			op->box   = blt_put_composite_box_with_alpha;
2369			op->boxes = blt_put_composite_boxes_with_alpha;
2370		} else {
2371			op->blt   = blt_put_composite;
2372			op->box   = blt_put_composite_box;
2373			op->boxes = blt_put_composite_boxes;
2374		}
2375	} else {
2376		if (alpha_fixup) {
2377			op->u.blt.pixel = alpha_fixup;
2378			op->blt   = blt_put_composite_with_alpha__cpu;
2379			op->box   = blt_put_composite_box_with_alpha__cpu;
2380			op->boxes = blt_put_composite_boxes_with_alpha__cpu;
2381		} else {
2382			op->blt   = blt_put_composite__cpu;
2383			op->box   = blt_put_composite_box__cpu;
2384			op->boxes = blt_put_composite_boxes__cpu;
2385		}
2386	}
2387	op->done = nop_done;
2388
2389	return true;
2390}
2391
2392static bool
2393is_clear(PixmapPtr pixmap)
2394{
2395	struct sna_pixmap *priv = sna_pixmap(pixmap);
2396	return priv && priv->clear;
2397}
2398
2399static inline uint32_t
2400over(uint32_t src, uint32_t dst)
2401{
2402	uint32_t a = ~src >> 24;
2403
2404#define G_SHIFT 8
2405#define RB_MASK 0xff00ff
2406#define RB_ONE_HALF 0x800080
2407#define RB_MASK_PLUS_ONE 0x10000100
2408
2409#define UN8_rb_MUL_UN8(x, a, t) do {				\
2410	t  = ((x) & RB_MASK) * (a);				\
2411	t += RB_ONE_HALF;					\
2412	x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;	\
2413	x &= RB_MASK;						\
2414} while (0)
2415
2416#define UN8_rb_ADD_UN8_rb(x, y, t) do {				\
2417	t = ((x) + (y));					\
2418	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);	\
2419	x = (t & RB_MASK);					\
2420} while (0)
2421
2422#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) do {			\
2423	uint32_t r1__, r2__, r3__, t__;				\
2424	\
2425	r1__ = (x);						\
2426	r2__ = (y) & RB_MASK;					\
2427	UN8_rb_MUL_UN8(r1__, (a), t__);				\
2428	UN8_rb_ADD_UN8_rb(r1__, r2__, t__);			\
2429	\
2430	r2__ = (x) >> G_SHIFT;					\
2431	r3__ = ((y) >> G_SHIFT) & RB_MASK;			\
2432	UN8_rb_MUL_UN8(r2__, (a), t__);				\
2433	UN8_rb_ADD_UN8_rb(r2__, r3__, t__);			\
2434	\
2435	(x) = r1__ | (r2__ << G_SHIFT);				\
2436} while (0)
2437
2438	UN8x4_MUL_UN8_ADD_UN8x4(dst, a, src);
2439
2440	return dst;
2441}
2442
2443static inline uint32_t
2444add(uint32_t src, uint32_t dst)
2445{
2446#define UN8x4_ADD_UN8x4(x, y) do {				\
2447	uint32_t r1__, r2__, r3__, t__;				\
2448	\
2449	r1__ = (x) & RB_MASK;					\
2450	r2__ = (y) & RB_MASK;					\
2451	UN8_rb_ADD_UN8_rb(r1__, r2__, t__);			\
2452	\
2453	r2__ = ((x) >> G_SHIFT) & RB_MASK;			\
2454	r3__ = ((y) >> G_SHIFT) & RB_MASK;			\
2455	UN8_rb_ADD_UN8_rb(r2__, r3__, t__);			\
2456	\
2457	x = r1__ | (r2__ << G_SHIFT);				\
2458} while (0)
2459
2460	UN8x4_ADD_UN8x4(src, dst);
2461	return src;
2462}
2463
2464bool
2465sna_blt_composite(struct sna *sna,
2466		  uint32_t op,
2467		  PicturePtr src,
2468		  PicturePtr dst,
2469		  int16_t x, int16_t y,
2470		  int16_t dst_x, int16_t dst_y,
2471		  int16_t width, int16_t height,
2472		  unsigned flags,
2473		  struct sna_composite_op *tmp)
2474{
2475	PictFormat src_format = src->format;
2476	PixmapPtr src_pixmap;
2477	struct kgem_bo *bo;
2478	int16_t tx, ty;
2479	BoxRec dst_box, src_box;
2480	uint32_t alpha_fixup;
2481	uint32_t color, hint;
2482	bool was_clear;
2483	bool ret;
2484
2485#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
2486	return false;
2487#endif
2488	DBG(("%s (%d, %d), (%d, %d), %dx%d\n",
2489	     __FUNCTION__, x, y, dst_x, dst_y, width, height));
2490
2491	switch (dst->pDrawable->bitsPerPixel) {
2492	case 8:
2493	case 16:
2494	case 32:
2495		break;
2496	default:
2497		DBG(("%s: unhandled bpp: %d\n", __FUNCTION__,
2498		     dst->pDrawable->bitsPerPixel));
2499		return false;
2500	}
2501
2502	tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2503	was_clear = is_clear(tmp->dst.pixmap);
2504
2505	if (width | height) {
2506		dst_box.x1 = dst_x;
2507		dst_box.x2 = bound(dst_x, width);
2508		dst_box.y1 = dst_y;
2509		dst_box.y2 = bound(dst_y, height);
2510	} else
2511		sna_render_picture_extents(dst, &dst_box);
2512
2513	tmp->dst.format = dst->format;
2514	tmp->dst.width = tmp->dst.pixmap->drawable.width;
2515	tmp->dst.height = tmp->dst.pixmap->drawable.height;
2516	get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap,
2517			    &tmp->dst.x, &tmp->dst.y);
2518
2519	if (op == PictOpClear) {
2520clear:
2521		if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) {
2522			sna_pixmap(tmp->dst.pixmap)->clear = true;
2523			return prepare_blt_nop(sna, tmp);
2524		}
2525
2526		hint = 0;
2527		if (can_render(sna)) {
2528			hint |= PREFER_GPU;
2529			if ((flags & COMPOSITE_PARTIAL) == 0) {
2530				hint |= IGNORE_DAMAGE;
2531				if (width  == tmp->dst.pixmap->drawable.width &&
2532				    height == tmp->dst.pixmap->drawable.height)
2533					hint |= REPLACES;
2534			}
2535		}
2536		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2537						  &dst_box, &tmp->damage);
2538		if (tmp->dst.bo) {
2539			if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2540				DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
2541				     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
2542				return false;
2543			}
2544			if (hint & REPLACES)
2545				kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2546		} else {
2547			RegionRec region;
2548
2549			region.extents = dst_box;
2550			region.data = NULL;
2551
2552			hint = MOVE_WRITE | MOVE_INPLACE_HINT;
2553			if (flags & COMPOSITE_PARTIAL)
2554				hint |= MOVE_READ;
2555			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, hint))
2556				return false;
2557		}
2558
2559		return prepare_blt_clear(sna, tmp);
2560	}
2561
2562	if (is_solid(src)) {
2563		if ((op == PictOpOver || op == PictOpAdd) && is_transparent(src)) {
2564			sna_pixmap(tmp->dst.pixmap)->clear = was_clear;
2565			return prepare_blt_nop(sna, tmp);
2566		}
2567		if (op == PictOpOver && is_opaque_solid(src))
2568			op = PictOpSrc;
2569		if (op == PictOpAdd && is_white(src))
2570			op = PictOpSrc;
2571		if (was_clear && (op == PictOpAdd || op == PictOpOver)) {
2572			if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0)
2573				op = PictOpSrc;
2574			if (op == PictOpOver) {
2575				color = over(get_solid_color(src, PICT_a8r8g8b8),
2576					     color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color,
2577							   dst->format, PICT_a8r8g8b8));
2578				op = PictOpSrc;
2579				DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n",
2580				     __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
2581				     color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color,
2582						   dst->format, PICT_a8r8g8b8),
2583				     color));
2584			}
2585			if (op == PictOpAdd) {
2586				color = add(get_solid_color(src, PICT_a8r8g8b8),
2587					    color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color,
2588							  dst->format, PICT_a8r8g8b8));
2589				op = PictOpSrc;
2590				DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n",
2591				     __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
2592				     color_convert(sna_pixmap(tmp->dst.pixmap)->clear_color,
2593						   dst->format, PICT_a8r8g8b8),
2594				     color));
2595			}
2596		}
2597		if (op == PictOpOutReverse && is_opaque_solid(src))
2598			goto clear;
2599
2600		if (op != PictOpSrc) {
2601			DBG(("%s: unsupported op [%d] for blitting\n",
2602			     __FUNCTION__, op));
2603			return false;
2604		}
2605
2606		color = get_solid_color(src, tmp->dst.format);
2607fill:
2608		if (color == 0)
2609			goto clear;
2610
2611		if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == color) {
2612			sna_pixmap(tmp->dst.pixmap)->clear = true;
2613			return prepare_blt_nop(sna, tmp);
2614		}
2615
2616		hint = 0;
2617		if (can_render(sna)) {
2618			hint |= PREFER_GPU;
2619			if ((flags & COMPOSITE_PARTIAL) == 0) {
2620				hint |= IGNORE_DAMAGE;
2621				if (width  == tmp->dst.pixmap->drawable.width &&
2622				    height == tmp->dst.pixmap->drawable.height)
2623					hint |= REPLACES;
2624			}
2625		}
2626		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2627						  &dst_box, &tmp->damage);
2628		if (tmp->dst.bo) {
2629			if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2630				DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
2631				     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
2632				return false;
2633			}
2634			if (hint & REPLACES)
2635				kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2636		} else {
2637			RegionRec region;
2638
2639			region.extents = dst_box;
2640			region.data = NULL;
2641
2642			hint = MOVE_WRITE | MOVE_INPLACE_HINT;
2643			if (flags & COMPOSITE_PARTIAL)
2644				hint |= MOVE_READ;
2645			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, hint))
2646				return false;
2647		}
2648
2649		return prepare_blt_fill(sna, tmp, color);
2650	}
2651
2652	if (!src->pDrawable) {
2653		DBG(("%s: unsupported procedural source\n",
2654		     __FUNCTION__));
2655		return false;
2656	}
2657
2658	if (src->filter == PictFilterConvolution) {
2659		DBG(("%s: convolutions filters not handled\n",
2660		     __FUNCTION__));
2661		return false;
2662	}
2663
2664	if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0)
2665		op = PictOpSrc;
2666
2667	if (op != PictOpSrc) {
2668		DBG(("%s: unsupported op [%d] for blitting\n",
2669		     __FUNCTION__, op));
2670		return false;
2671	}
2672
2673	if (!sna_transform_is_imprecise_integer_translation(src->transform, src->filter,
2674							    dst->polyMode == PolyModePrecise,
2675							    &tx, &ty)) {
2676		DBG(("%s: source transform is not an integer translation\n",
2677		     __FUNCTION__));
2678		return false;
2679	}
2680	DBG(("%s: converting transform to integer translation? (%d, %d)\n",
2681	     __FUNCTION__, src->transform != NULL, tx, ty));
2682	x += tx;
2683	y += ty;
2684
2685	if ((x >= src->pDrawable->width ||
2686	     y >= src->pDrawable->height ||
2687	     x + width  <= 0 ||
2688	     y + height <= 0) &&
2689	    (!src->repeat || src->repeatType == RepeatNone)) {
2690		DBG(("%s: source is outside of valid area, converting to clear\n",
2691		     __FUNCTION__));
2692		goto clear;
2693	}
2694
2695	src_pixmap = get_drawable_pixmap(src->pDrawable);
2696	if (is_clear(src_pixmap)) {
2697		if (src->repeat ||
2698		    (x >= 0 && y >= 0 &&
2699		     x + width  < src_pixmap->drawable.width &&
2700		     y + height < src_pixmap->drawable.height)) {
2701			color = color_convert(sna_pixmap(src_pixmap)->clear_color,
2702					      src->format, tmp->dst.format);
2703			goto fill;
2704		}
2705	}
2706
2707	alpha_fixup = 0;
2708	if (!(dst->format == src_format ||
2709	      dst->format == alphaless(src_format) ||
2710	      (alphaless(dst->format) == alphaless(src_format) &&
2711	       sna_get_pixel_from_rgba(&alpha_fixup,
2712				       0, 0, 0, 0xffff,
2713				       dst->format)))) {
2714		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
2715		     __FUNCTION__, (unsigned)src_format, dst->format));
2716		return false;
2717	}
2718
2719	/* XXX tiling? fixup extend none? */
2720	if (x < 0 || y < 0 ||
2721	    x + width  > src->pDrawable->width ||
2722	    y + height > src->pDrawable->height) {
2723		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n",
2724		     __FUNCTION__,
2725		     x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType));
2726		if (src->repeat && src->repeatType == RepeatNormal) {
2727			x = x % src->pDrawable->width;
2728			y = y % src->pDrawable->height;
2729			if (x < 0)
2730				x += src->pDrawable->width;
2731			if (y < 0)
2732				y += src->pDrawable->height;
2733			if (x + width  > src->pDrawable->width ||
2734			    y + height > src->pDrawable->height)
2735				return false;
2736		} else
2737			return false;
2738	}
2739
2740	get_drawable_deltas(src->pDrawable, src_pixmap, &tx, &ty);
2741	x += tx + src->pDrawable->x;
2742	y += ty + src->pDrawable->y;
2743	if (x < 0 || y < 0 ||
2744	    x + width  > src_pixmap->drawable.width ||
2745	    y + height > src_pixmap->drawable.height) {
2746		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid pixmap %dx%d\n",
2747		     __FUNCTION__,
2748		     x, y, x+width, y+width, src_pixmap->drawable.width, src_pixmap->drawable.height));
2749		return false;
2750	}
2751
2752	tmp->u.blt.src_pixmap = src_pixmap;
2753	tmp->u.blt.sx = x - dst_x;
2754	tmp->u.blt.sy = y - dst_y;
2755	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
2756	     __FUNCTION__,
2757	     tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
2758
2759	src_box.x1 = x;
2760	src_box.y1 = y;
2761	src_box.x2 = x + width;
2762	src_box.y2 = y + height;
2763	bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
2764	if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) {
2765		DBG(("%s: can not blit from src size=%dx%d, tiling? %d, pitch? %d\n",
2766		     __FUNCTION__,
2767		     src_pixmap->drawable.width  < sna->render.max_3d_size,
2768		     src_pixmap->drawable.height < sna->render.max_3d_size,
2769		     bo->tiling, bo->pitch));
2770
2771		if (src_pixmap->drawable.width  <= sna->render.max_3d_size &&
2772		    src_pixmap->drawable.height <= sna->render.max_3d_size &&
2773		    bo->pitch <= sna->render.max_3d_pitch &&
2774		    (flags & COMPOSITE_FALLBACK) == 0)
2775		{
2776			return false;
2777		}
2778
2779		bo = NULL;
2780	}
2781
2782	hint = 0;
2783	if (bo || can_render(sna)) {
2784		hint |= PREFER_GPU;
2785		if ((flags & COMPOSITE_PARTIAL) == 0) {
2786			hint |= IGNORE_DAMAGE;
2787			if (width  == tmp->dst.pixmap->drawable.width &&
2788			    height == tmp->dst.pixmap->drawable.height)
2789				hint |= REPLACES;
2790		}
2791		if (bo)
2792			hint |= FORCE_GPU;
2793	}
2794	tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2795					  &dst_box, &tmp->damage);
2796
2797	if (tmp->dst.bo && hint & REPLACES) {
2798		struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap);
2799		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
2800	}
2801
2802	if (tmp->dst.pixmap == src_pixmap)
2803		bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
2804
2805	ret = false;
2806	if (bo) {
2807		if (!tmp->dst.bo) {
2808			DBG(("%s: fallback -- unaccelerated read back\n",
2809			     __FUNCTION__));
2810fallback:
2811			if (flags & COMPOSITE_FALLBACK || !kgem_bo_is_busy(bo))
2812				goto put;
2813		} else if (!kgem_bo_can_blt(&sna->kgem, bo)) {
2814			DBG(("%s: fallback -- cannot blit from source\n",
2815			     __FUNCTION__));
2816			goto fallback;
2817		} else if (bo->snoop && tmp->dst.bo->snoop) {
2818			DBG(("%s: fallback -- can not copy between snooped bo\n",
2819			     __FUNCTION__));
2820			goto put;
2821		} else if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2822			DBG(("%s: fallback -- unaccelerated upload\n",
2823			     __FUNCTION__));
2824			goto fallback;
2825		} else {
2826			ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup);
2827			if (!ret)
2828				goto fallback;
2829		}
2830	} else {
2831		RegionRec region;
2832
2833put:
2834		if (tmp->dst.bo == sna_pixmap(tmp->dst.pixmap)->cpu_bo) {
2835			DBG(("%s: dropping upload into CPU bo\n", __FUNCTION__));
2836			tmp->dst.bo = NULL;
2837			tmp->damage = NULL;
2838		}
2839
2840		if (tmp->dst.bo == NULL) {
2841			hint = MOVE_INPLACE_HINT | MOVE_WRITE;
2842			if (flags & COMPOSITE_PARTIAL)
2843				hint |= MOVE_READ;
2844
2845			region.extents = dst_box;
2846			region.data = NULL;
2847			if (!sna_drawable_move_region_to_cpu(dst->pDrawable,
2848							     &region, hint))
2849				return false;
2850
2851			assert(tmp->damage == NULL);
2852		}
2853
2854		region.extents = src_box;
2855		region.data = NULL;
2856		if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
2857						     &region, MOVE_READ))
2858			return false;
2859
2860		ret = prepare_blt_put(sna, tmp, alpha_fixup);
2861	}
2862
2863	return ret;
2864}
2865
2866static void convert_done(struct sna *sna, const struct sna_composite_op *op)
2867{
2868	struct kgem *kgem = &sna->kgem;
2869
2870	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
2871	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
2872		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
2873		_kgem_submit(kgem);
2874	}
2875
2876	kgem_bo_destroy(kgem, op->src.bo);
2877	sna_render_composite_redirect_done(sna, op);
2878}
2879
2880static void gen6_convert_done(struct sna *sna, const struct sna_composite_op *op)
2881{
2882	struct kgem *kgem = &sna->kgem;
2883
2884	if (kgem_check_batch(kgem, 3)) {
2885		uint32_t *b = kgem->batch + kgem->nbatch;
2886		assert(sna->kgem.mode == KGEM_BLT);
2887		b[0] = XY_SETUP_CLIP;
2888		b[1] = b[2] = 0;
2889		kgem->nbatch += 3;
2890		assert(kgem->nbatch < kgem->surface);
2891	}
2892
2893	convert_done(sna, op);
2894}
2895
2896bool
2897sna_blt_composite__convert(struct sna *sna,
2898			   int x, int y,
2899			   int width, int height,
2900			   struct sna_composite_op *tmp)
2901{
2902	uint32_t alpha_fixup;
2903	int sx, sy;
2904	uint8_t op;
2905
2906#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
2907	return false;
2908#endif
2909
2910	DBG(("%s src=%d, dst=%d (redirect? %d)\n", __FUNCTION__,
2911	     tmp->src.bo->handle, tmp->dst.bo->handle,
2912	     tmp->redirect.real_bo ? tmp->redirect.real_bo->handle : 0));
2913
2914	if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo) ||
2915	    !kgem_bo_can_blt(&sna->kgem, tmp->src.bo)) {
2916		DBG(("%s: cannot blt from src or to dst\n", __FUNCTION__));
2917		return false;
2918	}
2919
2920	if (tmp->src.transform) {
2921		DBG(("%s: transforms not handled by the BLT\n", __FUNCTION__));
2922		return false;
2923	}
2924
2925	if (tmp->src.filter == PictFilterConvolution) {
2926		DBG(("%s: convolutions filters not handled\n",
2927		     __FUNCTION__));
2928		return false;
2929	}
2930
2931	op = tmp->op;
2932	if (op == PictOpOver && PICT_FORMAT_A(tmp->src.pict_format) == 0)
2933		op = PictOpSrc;
2934	if (op != PictOpSrc) {
2935		DBG(("%s: unsupported op [%d] for blitting\n",
2936		     __FUNCTION__, op));
2937		return false;
2938	}
2939
2940	alpha_fixup = 0;
2941	if (!(tmp->dst.format == tmp->src.pict_format ||
2942	      tmp->dst.format == alphaless(tmp->src.pict_format) ||
2943	      (alphaless(tmp->dst.format) == alphaless(tmp->src.pict_format) &&
2944	       sna_get_pixel_from_rgba(&alpha_fixup,
2945				       0, 0, 0, 0xffff,
2946				       tmp->dst.format)))) {
2947		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
2948		     __FUNCTION__,
2949		     (unsigned)tmp->src.pict_format,
2950		     (unsigned)tmp->dst.format));
2951		return false;
2952	}
2953
2954	sx = tmp->src.offset[0];
2955	sy = tmp->src.offset[1];
2956
2957	x += sx;
2958	y += sy;
2959	if (x < 0 || y < 0 ||
2960	    x + width  > tmp->src.width ||
2961	    y + height > tmp->src.height) {
2962		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n",
2963		     __FUNCTION__,
2964		     x, y, x+width, y+width, tmp->src.width, tmp->src.height));
2965		if (tmp->src.repeat == RepeatNormal) {
2966			int xx = x % tmp->src.width;
2967			int yy = y % tmp->src.height;
2968			if (xx < 0)
2969				xx += tmp->src.width;
2970			if (yy < 0)
2971				yy += tmp->src.height;
2972			if (xx + width  > tmp->src.width ||
2973			    yy + height > tmp->src.height)
2974				return false;
2975
2976			sx += xx - x;
2977			sy += yy - y;
2978		} else
2979			return false;
2980	}
2981
2982	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
2983	     __FUNCTION__,
2984	     tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup));
2985
2986	tmp->u.blt.src_pixmap = NULL;
2987	tmp->u.blt.sx = sx;
2988	tmp->u.blt.sy = sy;
2989
2990	kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo);
2991	if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) {
2992		kgem_submit(&sna->kgem);
2993		if (!kgem_check_many_bo_fenced(&sna->kgem,
2994					       tmp->dst.bo, tmp->src.bo, NULL)) {
2995			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
2996			return sna_tiling_blt_composite(sna, tmp, tmp->src.bo,
2997							PICT_FORMAT_BPP(tmp->src.pict_format),
2998							alpha_fixup);
2999		}
3000		_kgem_set_mode(&sna->kgem, KGEM_BLT);
3001	}
3002
3003	if (alpha_fixup) {
3004		tmp->blt   = blt_composite_copy_with_alpha;
3005		tmp->box   = blt_composite_copy_box_with_alpha;
3006		tmp->boxes = blt_composite_copy_boxes_with_alpha;
3007
3008		if (!sna_blt_alpha_fixup_init(sna, &tmp->u.blt,
3009					      tmp->src.bo, tmp->dst.bo,
3010					      PICT_FORMAT_BPP(tmp->src.pict_format),
3011					      alpha_fixup))
3012			return false;
3013	} else {
3014		tmp->blt   = blt_composite_copy;
3015		tmp->box   = blt_composite_copy_box;
3016		tmp->boxes = blt_composite_copy_boxes;
3017		tmp->thread_boxes = blt_composite_copy_boxes__thread;
3018
3019		if (!sna_blt_copy_init(sna, &tmp->u.blt,
3020				       tmp->src.bo, tmp->dst.bo,
3021				       PICT_FORMAT_BPP(tmp->src.pict_format),
3022				       GXcopy))
3023			return false;
3024	}
3025
3026	tmp->done = convert_done;
3027	if (sna->kgem.gen >= 060 && tmp->src.bo == tmp->dst.bo)
3028		tmp->done = gen6_convert_done;
3029
3030	return true;
3031}
3032
3033static void sna_blt_fill_op_blt(struct sna *sna,
3034				const struct sna_fill_op *op,
3035				int16_t x, int16_t y,
3036				int16_t width, int16_t height)
3037{
3038	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3039		const struct sna_blt_state *blt = &op->base.u.blt;
3040
3041		sna_blt_fill_begin(sna, blt);
3042
3043		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3044		sna->blt_state.fill_pixel = blt->pixel;
3045		sna->blt_state.fill_alu = blt->alu;
3046	}
3047
3048	sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height);
3049}
3050
3051fastcall static void sna_blt_fill_op_box(struct sna *sna,
3052					 const struct sna_fill_op *op,
3053					 const BoxRec *box)
3054{
3055	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3056		const struct sna_blt_state *blt = &op->base.u.blt;
3057
3058		sna_blt_fill_begin(sna, blt);
3059
3060		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3061		sna->blt_state.fill_pixel = blt->pixel;
3062		sna->blt_state.fill_alu = blt->alu;
3063	}
3064
3065	_sna_blt_fill_box(sna, &op->base.u.blt, box);
3066}
3067
3068fastcall static void sna_blt_fill_op_boxes(struct sna *sna,
3069					   const struct sna_fill_op *op,
3070					   const BoxRec *box,
3071					   int nbox)
3072{
3073	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3074		const struct sna_blt_state *blt = &op->base.u.blt;
3075
3076		sna_blt_fill_begin(sna, blt);
3077
3078		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3079		sna->blt_state.fill_pixel = blt->pixel;
3080		sna->blt_state.fill_alu = blt->alu;
3081	}
3082
3083	_sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox);
3084}
3085
3086static inline uint64_t pt_add(uint32_t cmd, const DDXPointRec *pt, int16_t dx, int16_t dy)
3087{
3088	union {
3089		DDXPointRec pt;
3090		uint32_t i;
3091	} u;
3092
3093	u.pt.x = pt->x + dx;
3094	u.pt.y = pt->y + dy;
3095
3096	return cmd | (uint64_t)u.i<<32;
3097}
3098
3099fastcall static void sna_blt_fill_op_points(struct sna *sna,
3100					    const struct sna_fill_op *op,
3101					    int16_t dx, int16_t dy,
3102					    const DDXPointRec *p, int n)
3103{
3104	const struct sna_blt_state *blt = &op->base.u.blt;
3105	struct kgem *kgem = &sna->kgem;
3106	uint32_t cmd;
3107
3108	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n));
3109
3110	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3111		sna_blt_fill_begin(sna, blt);
3112
3113		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3114		sna->blt_state.fill_pixel = blt->pixel;
3115		sna->blt_state.fill_alu = blt->alu;
3116	}
3117
3118	if (!kgem_check_batch(kgem, 2))
3119		sna_blt_fill_begin(sna, blt);
3120
3121	cmd = XY_PIXEL_BLT;
3122	if (kgem->gen >= 040 && op->base.u.blt.bo[0]->tiling)
3123		cmd |= BLT_DST_TILED;
3124
3125	do {
3126		uint32_t *b = kgem->batch + kgem->nbatch;
3127		int n_this_time;
3128
3129		assert(sna->kgem.mode == KGEM_BLT);
3130		n_this_time = n;
3131		if (2*n_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
3132			n_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 2;
3133		assert(n_this_time);
3134		n -= n_this_time;
3135
3136		kgem->nbatch += 2 * n_this_time;
3137		assert(kgem->nbatch < kgem->surface);
3138
3139		if ((dx|dy) == 0) {
3140			while (n_this_time >= 8) {
3141				*((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
3142				*((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
3143				*((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0);
3144				*((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0);
3145				*((uint64_t *)b + 4) = pt_add(cmd, p+4, 0, 0);
3146				*((uint64_t *)b + 5) = pt_add(cmd, p+5, 0, 0);
3147				*((uint64_t *)b + 6) = pt_add(cmd, p+6, 0, 0);
3148				*((uint64_t *)b + 7) = pt_add(cmd, p+7, 0, 0);
3149				b += 16;
3150				n_this_time -= 8;
3151				p += 8;
3152			}
3153			if (n_this_time & 4) {
3154				*((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
3155				*((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
3156				*((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0);
3157				*((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0);
3158				b += 8;
3159				p += 4;
3160			}
3161			if (n_this_time & 2) {
3162				*((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
3163				*((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
3164				b += 4;
3165				p += 2;
3166			}
3167			if (n_this_time & 1)
3168				*((uint64_t *)b + 0) = pt_add(cmd, p++, 0, 0);
3169		} else {
3170			while (n_this_time >= 8) {
3171				*((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
3172				*((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
3173				*((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy);
3174				*((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy);
3175				*((uint64_t *)b + 4) = pt_add(cmd, p+4, dx, dy);
3176				*((uint64_t *)b + 5) = pt_add(cmd, p+5, dx, dy);
3177				*((uint64_t *)b + 6) = pt_add(cmd, p+6, dx, dy);
3178				*((uint64_t *)b + 7) = pt_add(cmd, p+7, dx, dy);
3179				b += 16;
3180				n_this_time -= 8;
3181				p += 8;
3182			}
3183			if (n_this_time & 4) {
3184				*((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
3185				*((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
3186				*((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy);
3187				*((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy);
3188				b += 8;
3189				p += 8;
3190			}
3191			if (n_this_time & 2) {
3192				*((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
3193				*((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
3194				b += 4;
3195				p += 2;
3196			}
3197			if (n_this_time & 1)
3198				*((uint64_t *)b + 0) = pt_add(cmd, p++, dx, dy);
3199		}
3200
3201		if (!n)
3202			return;
3203
3204		sna_blt_fill_begin(sna, blt);
3205	} while (1);
3206}
3207
3208bool sna_blt_fill(struct sna *sna, uint8_t alu,
3209		  struct kgem_bo *bo, int bpp,
3210		  uint32_t pixel,
3211		  struct sna_fill_op *fill)
3212{
3213#if DEBUG_NO_BLT || NO_BLT_FILL
3214	return false;
3215#endif
3216
3217	DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp));
3218
3219	if (!kgem_bo_can_blt(&sna->kgem, bo)) {
3220		DBG(("%s: rejected due to incompatible Y-tiling\n",
3221		     __FUNCTION__));
3222		return false;
3223	}
3224
3225	if (!sna_blt_fill_init(sna, &fill->base.u.blt,
3226			       bo, bpp, alu, pixel))
3227		return false;
3228
3229	fill->blt   = sna_blt_fill_op_blt;
3230	fill->box   = sna_blt_fill_op_box;
3231	fill->boxes = sna_blt_fill_op_boxes;
3232	fill->points = sna_blt_fill_op_points;
3233	fill->done  =
3234		(void (*)(struct sna *, const struct sna_fill_op *))nop_done;
3235	return true;
3236}
3237
3238static void sna_blt_copy_op_blt(struct sna *sna,
3239				const struct sna_copy_op *op,
3240				int16_t src_x, int16_t src_y,
3241				int16_t width, int16_t height,
3242				int16_t dst_x, int16_t dst_y)
3243{
3244	sna_blt_copy_one(sna, &op->base.u.blt,
3245			 src_x, src_y,
3246			 width, height,
3247			 dst_x, dst_y);
3248}
3249
3250bool sna_blt_copy(struct sna *sna, uint8_t alu,
3251		  struct kgem_bo *src,
3252		  struct kgem_bo *dst,
3253		  int bpp,
3254		  struct sna_copy_op *op)
3255{
3256#if DEBUG_NO_BLT || NO_BLT_COPY
3257	return false;
3258#endif
3259
3260	if (!kgem_bo_can_blt(&sna->kgem, src))
3261		return false;
3262
3263	if (!kgem_bo_can_blt(&sna->kgem, dst))
3264		return false;
3265
3266	if (!sna_blt_copy_init(sna, &op->base.u.blt,
3267			       src, dst,
3268			       bpp, alu))
3269		return false;
3270
3271	op->blt  = sna_blt_copy_op_blt;
3272	if (sna->kgem.gen >= 060 && src == dst)
3273		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
3274			    gen6_blt_copy_done;
3275	else
3276		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
3277			    nop_done;
3278	return true;
3279}
3280
3281static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
3282			     struct kgem_bo *bo, int bpp,
3283			     uint32_t color,
3284			     const BoxRec *box)
3285{
3286	struct kgem *kgem = &sna->kgem;
3287	uint32_t br13, cmd, *b;
3288	bool overwrites;
3289
3290	assert(kgem_bo_can_blt (kgem, bo));
3291
3292	DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
3293	     box->x1, box->y1, box->x2, box->y2));
3294
3295	assert(box->x1 >= 0);
3296	assert(box->y1 >= 0);
3297
3298	cmd = XY_COLOR_BLT | (kgem->gen >= 0100 ? 5 : 4);
3299	br13 = bo->pitch;
3300	if (kgem->gen >= 040 && bo->tiling) {
3301		cmd |= BLT_DST_TILED;
3302		br13 >>= 2;
3303	}
3304	assert(br13 <= MAXSHORT);
3305
3306	br13 |= fill_ROP[alu] << 16;
3307	switch (bpp) {
3308	default: assert(0);
3309	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3310		 br13 |= 1 << 25; /* RGB8888 */
3311	case 16: br13 |= 1 << 24; /* RGB565 */
3312	case 8: break;
3313	}
3314
3315	/* All too frequently one blt completely overwrites the previous */
3316	overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
3317	if (overwrites) {
3318		if (sna->kgem.gen >= 0100) {
3319			if (kgem->nbatch >= 7 &&
3320			    kgem->batch[kgem->nbatch-7] == cmd &&
3321			    *(uint64_t *)&kgem->batch[kgem->nbatch-5] == *(const uint64_t *)box &&
3322			    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
3323				DBG(("%s: replacing last fill\n", __FUNCTION__));
3324				kgem->batch[kgem->nbatch-6] = br13;
3325				kgem->batch[kgem->nbatch-1] = color;
3326				return true;
3327			}
3328			if (kgem->nbatch >= 10 &&
3329			    (kgem->batch[kgem->nbatch-10] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
3330			    *(uint64_t *)&kgem->batch[kgem->nbatch-8] == *(const uint64_t *)box &&
3331			    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
3332				DBG(("%s: replacing last copy\n", __FUNCTION__));
3333				kgem->batch[kgem->nbatch-10] = cmd;
3334				kgem->batch[kgem->nbatch-8] = br13;
3335				kgem->batch[kgem->nbatch-4] = color;
3336				/* Keep the src bo as part of the execlist, just remove
3337				 * its relocation entry.
3338				 */
3339				kgem->nreloc--;
3340				kgem->nbatch -= 3;
3341				return true;
3342			}
3343		} else {
3344			if (kgem->nbatch >= 6 &&
3345			    kgem->batch[kgem->nbatch-6] == cmd &&
3346			    *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box &&
3347			    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
3348				DBG(("%s: replacing last fill\n", __FUNCTION__));
3349				kgem->batch[kgem->nbatch-5] = br13;
3350				kgem->batch[kgem->nbatch-1] = color;
3351				return true;
3352			}
3353			if (kgem->nbatch >= 8 &&
3354			    (kgem->batch[kgem->nbatch-8] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
3355			    *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box &&
3356			    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
3357				DBG(("%s: replacing last copy\n", __FUNCTION__));
3358				kgem->batch[kgem->nbatch-8] = cmd;
3359				kgem->batch[kgem->nbatch-7] = br13;
3360				kgem->batch[kgem->nbatch-3] = color;
3361				/* Keep the src bo as part of the execlist, just remove
3362				 * its relocation entry.
3363				 */
3364				kgem->nreloc--;
3365				kgem->nbatch -= 2;
3366				return true;
3367			}
3368		}
3369	}
3370
3371	/* If we are currently emitting SCANLINES, keep doing so */
3372	if (sna->blt_state.fill_bo == bo->unique_id &&
3373	    sna->blt_state.fill_pixel == color &&
3374	    (sna->blt_state.fill_alu == alu ||
3375	     sna->blt_state.fill_alu == ~alu)) {
3376		DBG(("%s: matching last fill, converting to scanlines\n",
3377		     __FUNCTION__));
3378		return false;
3379	}
3380
3381	kgem_set_mode(kgem, KGEM_BLT, bo);
3382	if (!kgem_check_batch(kgem, 7) ||
3383	    !kgem_check_reloc(kgem, 1) ||
3384	    !kgem_check_bo_fenced(kgem, bo)) {
3385		kgem_submit(kgem);
3386		if (!kgem_check_bo_fenced(&sna->kgem, bo))
3387			return false;
3388
3389		_kgem_set_mode(kgem, KGEM_BLT);
3390	}
3391
3392	assert(kgem_check_batch(kgem, 6));
3393	assert(kgem_check_reloc(kgem, 1));
3394
3395	assert(sna->kgem.mode == KGEM_BLT);
3396	b = kgem->batch + kgem->nbatch;
3397	b[0] = cmd;
3398	b[1] = br13;
3399	*(uint64_t *)(b+2) = *(const uint64_t *)box;
3400	if (kgem->gen >= 0100) {
3401		*(uint64_t *)(b+4) =
3402			kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3403					 I915_GEM_DOMAIN_RENDER << 16 |
3404					 I915_GEM_DOMAIN_RENDER |
3405					 KGEM_RELOC_FENCED,
3406					 0);
3407		b[6] = color;
3408		kgem->nbatch += 7;
3409	} else {
3410		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3411				      I915_GEM_DOMAIN_RENDER << 16 |
3412				      I915_GEM_DOMAIN_RENDER |
3413				      KGEM_RELOC_FENCED,
3414				      0);
3415		b[5] = color;
3416		kgem->nbatch += 6;
3417	}
3418	assert(kgem->nbatch < kgem->surface);
3419
3420	sna->blt_state.fill_bo = bo->unique_id;
3421	sna->blt_state.fill_pixel = color;
3422	sna->blt_state.fill_alu = ~alu;
3423	return true;
3424}
3425
3426bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
3427			struct kgem_bo *bo, int bpp,
3428			uint32_t pixel,
3429			const BoxRec *box, int nbox)
3430{
3431	struct kgem *kgem = &sna->kgem;
3432	uint32_t br13, cmd;
3433
3434#if DEBUG_NO_BLT || NO_BLT_FILL_BOXES
3435	return false;
3436#endif
3437
3438	DBG(("%s (%d, %08x, %d) x %d\n",
3439	     __FUNCTION__, bpp, pixel, alu, nbox));
3440
3441	if (!kgem_bo_can_blt(kgem, bo)) {
3442		DBG(("%s: fallback -- cannot blt to dst\n", __FUNCTION__));
3443		return false;
3444	}
3445
3446	if (alu == GXclear)
3447		pixel = 0;
3448	else if (alu == GXcopy) {
3449		if (pixel == 0)
3450			alu = GXclear;
3451		else if (pixel == -1)
3452			alu = GXset;
3453	}
3454
3455	if (nbox == 1 && sna_blt_fill_box(sna, alu, bo, bpp, pixel, box))
3456		return true;
3457
3458	br13 = bo->pitch;
3459	cmd = XY_SCANLINE_BLT;
3460	if (kgem->gen >= 040 && bo->tiling) {
3461		cmd |= 1 << 11;
3462		br13 >>= 2;
3463	}
3464	assert(br13 <= MAXSHORT);
3465
3466	br13 |= 1<<31 | fill_ROP[alu] << 16;
3467	switch (bpp) {
3468	default: assert(0);
3469	case 32: br13 |= 1 << 25; /* RGB8888 */
3470	case 16: br13 |= 1 << 24; /* RGB565 */
3471	case 8: break;
3472	}
3473
3474	kgem_set_mode(kgem, KGEM_BLT, bo);
3475	if (!kgem_check_batch(kgem, 14) ||
3476	    !kgem_check_bo_fenced(kgem, bo)) {
3477		kgem_submit(kgem);
3478		if (!kgem_check_bo_fenced(&sna->kgem, bo))
3479			return false;
3480		_kgem_set_mode(kgem, KGEM_BLT);
3481	}
3482
3483	if (sna->blt_state.fill_bo != bo->unique_id ||
3484	    sna->blt_state.fill_pixel != pixel ||
3485	    sna->blt_state.fill_alu != alu)
3486	{
3487		uint32_t *b;
3488
3489		if (!kgem_check_reloc(kgem, 1)) {
3490			_kgem_submit(kgem);
3491			if (!kgem_check_bo_fenced(&sna->kgem, bo))
3492				return false;
3493			_kgem_set_mode(kgem, KGEM_BLT);
3494		}
3495
3496		assert(sna->kgem.mode == KGEM_BLT);
3497		b = kgem->batch + kgem->nbatch;
3498		if (kgem->gen >= 0100) {
3499			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
3500			if (bpp == 32)
3501				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3502			if (bo->tiling)
3503				b[0] |= BLT_DST_TILED;
3504			b[1] = br13;
3505			b[2] = 0;
3506			b[3] = 0;
3507			*(uint64_t *)(b+4) =
3508				kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3509						 I915_GEM_DOMAIN_RENDER << 16 |
3510						 I915_GEM_DOMAIN_RENDER |
3511						 KGEM_RELOC_FENCED,
3512						 0);
3513			b[6] = pixel;
3514			b[7] = pixel;
3515			b[8] = 0;
3516			b[9] = 0;
3517			kgem->nbatch += 10;
3518		} else {
3519			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
3520			if (bpp == 32)
3521				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3522			if (bo->tiling && kgem->gen >= 040)
3523				b[0] |= BLT_DST_TILED;
3524			b[1] = br13;
3525			b[2] = 0;
3526			b[3] = 0;
3527			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3528					      I915_GEM_DOMAIN_RENDER << 16 |
3529					      I915_GEM_DOMAIN_RENDER |
3530					      KGEM_RELOC_FENCED,
3531					      0);
3532			b[5] = pixel;
3533			b[6] = pixel;
3534			b[7] = 0;
3535			b[8] = 0;
3536			kgem->nbatch += 9;
3537		}
3538		assert(kgem->nbatch < kgem->surface);
3539
3540		sna->blt_state.fill_bo = bo->unique_id;
3541		sna->blt_state.fill_pixel = pixel;
3542		sna->blt_state.fill_alu = alu;
3543	}
3544
3545	do {
3546		int nbox_this_time;
3547
3548		nbox_this_time = nbox;
3549		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
3550			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
3551		assert(nbox_this_time);
3552		nbox -= nbox_this_time;
3553
3554		assert(sna->kgem.mode == KGEM_BLT);
3555		do {
3556			uint32_t *b;
3557
3558			DBG(("%s: (%d, %d), (%d, %d): %08x\n",
3559			     __FUNCTION__,
3560			     box->x1, box->y1,
3561			     box->x2, box->y2,
3562			     pixel));
3563
3564			assert(box->x1 >= 0);
3565			assert(box->y1 >= 0);
3566			assert(box->y2 * bo->pitch <= kgem_bo_size(bo));
3567
3568			b = kgem->batch + kgem->nbatch;
3569			kgem->nbatch += 3;
3570			assert(kgem->nbatch < kgem->surface);
3571			b[0] = cmd;
3572			*(uint64_t *)(b+1) = *(const uint64_t *)box;
3573			box++;
3574		} while (--nbox_this_time);
3575
3576		if (nbox) {
3577			uint32_t *b;
3578
3579			_kgem_submit(kgem);
3580			_kgem_set_mode(kgem, KGEM_BLT);
3581
3582			assert(sna->kgem.mode == KGEM_BLT);
3583			b = kgem->batch + kgem->nbatch;
3584			if (kgem->gen >= 0100) {
3585				b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
3586				if (bpp == 32)
3587					b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3588				if (bo->tiling)
3589					b[0] |= BLT_DST_TILED;
3590				b[1] = br13;
3591				b[2] = 0;
3592				b[3] = 0;
3593				*(uint64_t *)(b+4) =
3594					kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3595							 I915_GEM_DOMAIN_RENDER << 16 |
3596							 I915_GEM_DOMAIN_RENDER |
3597							 KGEM_RELOC_FENCED,
3598							 0);
3599				b[6] = pixel;
3600				b[7] = pixel;
3601				b[8] = 0;
3602				b[9] = 0;
3603				kgem->nbatch += 10;
3604			} else {
3605				b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
3606				if (bpp == 32)
3607					b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3608				if (bo->tiling && kgem->gen >= 040)
3609					b[0] |= BLT_DST_TILED;
3610				b[1] = br13;
3611				b[2] = 0;
3612				b[3] = 0;
3613				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3614						      I915_GEM_DOMAIN_RENDER << 16 |
3615						      I915_GEM_DOMAIN_RENDER |
3616						      KGEM_RELOC_FENCED,
3617						      0);
3618				b[5] = pixel;
3619				b[6] = pixel;
3620				b[7] = 0;
3621				b[8] = 0;
3622				kgem->nbatch += 9;
3623			}
3624			assert(kgem->nbatch < kgem->surface);
3625		}
3626	} while (nbox);
3627
3628	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
3629		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
3630		_kgem_submit(kgem);
3631	}
3632
3633	return true;
3634}
3635
3636bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
3637			struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
3638			struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
3639			int bpp, const BoxRec *box, int nbox)
3640{
3641	struct kgem *kgem = &sna->kgem;
3642	unsigned src_pitch, br13, cmd;
3643
3644#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
3645	return false;
3646#endif
3647
3648	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
3649	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
3650	    src_bo->tiling, dst_bo->tiling,
3651	    src_bo->pitch, dst_bo->pitch));
3652	assert(nbox);
3653
3654	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
3655		DBG(("%s: cannot blt to src? %d or dst? %d\n",
3656		     __FUNCTION__,
3657		     kgem_bo_can_blt(kgem, src_bo),
3658		     kgem_bo_can_blt(kgem, dst_bo)));
3659		return false;
3660	}
3661
3662	cmd = XY_SRC_COPY_BLT_CMD;
3663	if (bpp == 32)
3664		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3665
3666	src_pitch = src_bo->pitch;
3667	if (kgem->gen >= 040 && src_bo->tiling) {
3668		cmd |= BLT_SRC_TILED;
3669		src_pitch >>= 2;
3670	}
3671	assert(src_pitch <= MAXSHORT);
3672
3673	br13 = dst_bo->pitch;
3674	if (kgem->gen >= 040 && dst_bo->tiling) {
3675		cmd |= BLT_DST_TILED;
3676		br13 >>= 2;
3677	}
3678	assert(br13 <= MAXSHORT);
3679
3680	br13 |= copy_ROP[alu] << 16;
3681	switch (bpp) {
3682	default: assert(0);
3683	case 32: br13 |= 1 << 25; /* RGB8888 */
3684	case 16: br13 |= 1 << 24; /* RGB565 */
3685	case 8: break;
3686	}
3687
3688	/* Compare first box against a previous fill */
3689	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
3690	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
3691		if (kgem->gen >= 0100) {
3692			if (kgem->nbatch >= 7 &&
3693			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
3694			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
3695			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
3696				DBG(("%s: deleting last fill\n", __FUNCTION__));
3697				kgem->nbatch -= 7;
3698				kgem->nreloc--;
3699			}
3700		} else {
3701			if (kgem->nbatch >= 6 &&
3702			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
3703			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
3704			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
3705				DBG(("%s: deleting last fill\n", __FUNCTION__));
3706				kgem->nbatch -= 6;
3707				kgem->nreloc--;
3708			}
3709		}
3710	}
3711
3712	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
3713	if (!kgem_check_batch(kgem, 10) ||
3714	    !kgem_check_reloc(kgem, 2) ||
3715	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
3716		kgem_submit(kgem);
3717		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
3718			DBG(("%s: not enough room in aperture, fallback to tiling copy\n", __FUNCTION__));
3719			return sna_tiling_blt_copy_boxes(sna, alu,
3720							 src_bo, src_dx, src_dy,
3721							 dst_bo, dst_dx, dst_dy,
3722							 bpp, box, nbox);
3723		}
3724		_kgem_set_mode(kgem, KGEM_BLT);
3725	}
3726
3727	if ((dst_dx | dst_dy) == 0) {
3728		if (kgem->gen >= 0100) {
3729			uint64_t hdr = (uint64_t)br13 << 32 | cmd | 8;
3730			do {
3731				int nbox_this_time;
3732
3733				nbox_this_time = nbox;
3734				if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
3735					nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
3736				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3737					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3738				assert(nbox_this_time);
3739				nbox -= nbox_this_time;
3740
3741				assert(sna->kgem.mode == KGEM_BLT);
3742				do {
3743					uint32_t *b = kgem->batch + kgem->nbatch;
3744
3745					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3746					     __FUNCTION__,
3747					     box->x1, box->y1,
3748					     box->x2 - box->x1, box->y2 - box->y1));
3749
3750					assert(box->x1 + src_dx >= 0);
3751					assert(box->y1 + src_dy >= 0);
3752					assert(box->x1 + src_dx <= INT16_MAX);
3753					assert(box->y1 + src_dy <= INT16_MAX);
3754
3755					assert(box->x1 >= 0);
3756					assert(box->y1 >= 0);
3757
3758					*(uint64_t *)&b[0] = hdr;
3759					*(uint64_t *)&b[2] = *(const uint64_t *)box;
3760					*(uint64_t *)(b+4) =
3761						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
3762								 I915_GEM_DOMAIN_RENDER << 16 |
3763								 I915_GEM_DOMAIN_RENDER |
3764								 KGEM_RELOC_FENCED,
3765								 0);
3766					b[6] = add2(b[2], src_dx, src_dy);
3767					b[7] = src_pitch;
3768					*(uint64_t *)(b+8) =
3769						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
3770								 I915_GEM_DOMAIN_RENDER << 16 |
3771								 KGEM_RELOC_FENCED,
3772								 0);
3773					kgem->nbatch += 10;
3774					assert(kgem->nbatch < kgem->surface);
3775					box++;
3776				} while (--nbox_this_time);
3777
3778				if (!nbox)
3779					break;
3780
3781				_kgem_submit(kgem);
3782				_kgem_set_mode(kgem, KGEM_BLT);
3783			} while (1);
3784		} else {
3785			uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6;
3786			do {
3787				int nbox_this_time;
3788
3789				nbox_this_time = nbox;
3790				if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
3791					nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
3792				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3793					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3794				assert(nbox_this_time);
3795				nbox -= nbox_this_time;
3796
3797				assert(sna->kgem.mode == KGEM_BLT);
3798				do {
3799					uint32_t *b = kgem->batch + kgem->nbatch;
3800
3801					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3802					     __FUNCTION__,
3803					     box->x1, box->y1,
3804					     box->x2 - box->x1, box->y2 - box->y1));
3805
3806					assert(box->x1 + src_dx >= 0);
3807					assert(box->y1 + src_dy >= 0);
3808					assert(box->x1 + src_dx <= INT16_MAX);
3809					assert(box->y1 + src_dy <= INT16_MAX);
3810
3811					assert(box->x1 >= 0);
3812					assert(box->y1 >= 0);
3813
3814					*(uint64_t *)&b[0] = hdr;
3815					*(uint64_t *)&b[2] = *(const uint64_t *)box;
3816					b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
3817							      I915_GEM_DOMAIN_RENDER << 16 |
3818							      I915_GEM_DOMAIN_RENDER |
3819							      KGEM_RELOC_FENCED,
3820							      0);
3821					b[5] = add2(b[2], src_dx, src_dy);
3822					b[6] = src_pitch;
3823					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
3824							      I915_GEM_DOMAIN_RENDER << 16 |
3825							      KGEM_RELOC_FENCED,
3826							      0);
3827					kgem->nbatch += 8;
3828					assert(kgem->nbatch < kgem->surface);
3829					box++;
3830				} while (--nbox_this_time);
3831
3832				if (!nbox)
3833					break;
3834
3835				_kgem_submit(kgem);
3836				_kgem_set_mode(kgem, KGEM_BLT);
3837			} while (1);
3838		}
3839	} else {
3840		if (kgem->gen >= 0100) {
3841			cmd |= 8;
3842			do {
3843				int nbox_this_time;
3844
3845				nbox_this_time = nbox;
3846				if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
3847					nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
3848				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3849					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3850				assert(nbox_this_time);
3851				nbox -= nbox_this_time;
3852
3853				assert(sna->kgem.mode == KGEM_BLT);
3854				do {
3855					uint32_t *b = kgem->batch + kgem->nbatch;
3856
3857					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3858					     __FUNCTION__,
3859					     box->x1, box->y1,
3860					     box->x2 - box->x1, box->y2 - box->y1));
3861
3862					assert(box->x1 + src_dx >= 0);
3863					assert(box->y1 + src_dy >= 0);
3864
3865					assert(box->x1 + dst_dx >= 0);
3866					assert(box->y1 + dst_dy >= 0);
3867
3868					b[0] = cmd;
3869					b[1] = br13;
3870					b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
3871					b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
3872					*(uint64_t *)(b+4) =
3873						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
3874								 I915_GEM_DOMAIN_RENDER << 16 |
3875								 I915_GEM_DOMAIN_RENDER |
3876								 KGEM_RELOC_FENCED,
3877								 0);
3878					b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
3879					b[7] = src_pitch;
3880					*(uint64_t *)(b+8) =
3881						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
3882								 I915_GEM_DOMAIN_RENDER << 16 |
3883								 KGEM_RELOC_FENCED,
3884								 0);
3885					kgem->nbatch += 10;
3886					assert(kgem->nbatch < kgem->surface);
3887					box++;
3888				} while (--nbox_this_time);
3889
3890				if (!nbox)
3891					break;
3892
3893				_kgem_submit(kgem);
3894				_kgem_set_mode(kgem, KGEM_BLT);
3895			} while (1);
3896		} else {
3897			cmd |= 6;
3898			do {
3899				int nbox_this_time;
3900
3901				nbox_this_time = nbox;
3902				if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
3903					nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
3904				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3905					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3906				assert(nbox_this_time);
3907				nbox -= nbox_this_time;
3908
3909				assert(sna->kgem.mode == KGEM_BLT);
3910				do {
3911					uint32_t *b = kgem->batch + kgem->nbatch;
3912
3913					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3914					     __FUNCTION__,
3915					     box->x1, box->y1,
3916					     box->x2 - box->x1, box->y2 - box->y1));
3917
3918					assert(box->x1 + src_dx >= 0);
3919					assert(box->y1 + src_dy >= 0);
3920
3921					assert(box->x1 + dst_dx >= 0);
3922					assert(box->y1 + dst_dy >= 0);
3923
3924					b[0] = cmd;
3925					b[1] = br13;
3926					b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
3927					b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
3928					b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
3929							      I915_GEM_DOMAIN_RENDER << 16 |
3930							      I915_GEM_DOMAIN_RENDER |
3931							      KGEM_RELOC_FENCED,
3932							      0);
3933					b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
3934					b[6] = src_pitch;
3935					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
3936							      I915_GEM_DOMAIN_RENDER << 16 |
3937							      KGEM_RELOC_FENCED,
3938							      0);
3939					kgem->nbatch += 8;
3940					assert(kgem->nbatch < kgem->surface);
3941					box++;
3942				} while (--nbox_this_time);
3943
3944				if (!nbox)
3945					break;
3946
3947				_kgem_submit(kgem);
3948				_kgem_set_mode(kgem, KGEM_BLT);
3949			} while (1);
3950		}
3951	}
3952
3953	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
3954		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
3955		_kgem_submit(kgem);
3956	} else if (kgem->gen >= 060 && src_bo == dst_bo && kgem_check_batch(kgem, 3)) {
3957		uint32_t *b = kgem->batch + kgem->nbatch;
3958		assert(sna->kgem.mode == KGEM_BLT);
3959		b[0] = XY_SETUP_CLIP;
3960		b[1] = b[2] = 0;
3961		kgem->nbatch += 3;
3962		assert(kgem->nbatch < kgem->surface);
3963	}
3964
3965	sna->blt_state.fill_bo = 0;
3966	return true;
3967}
3968
3969bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
3970				    struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
3971				    struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
3972				    int bpp, int alpha_fixup,
3973				    const BoxRec *box, int nbox)
3974{
3975	struct kgem *kgem = &sna->kgem;
3976	unsigned src_pitch, br13, cmd;
3977
3978#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
3979	return false;
3980#endif
3981
3982	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
3983	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
3984	    src_bo->tiling, dst_bo->tiling,
3985	    src_bo->pitch, dst_bo->pitch));
3986
3987	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
3988		DBG(("%s: cannot blt to src? %d or dst? %d\n",
3989		     __FUNCTION__,
3990		     kgem_bo_can_blt(kgem, src_bo),
3991		     kgem_bo_can_blt(kgem, dst_bo)));
3992		return false;
3993	}
3994
3995	cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
3996	src_pitch = src_bo->pitch;
3997	if (kgem->gen >= 040 && src_bo->tiling) {
3998		cmd |= BLT_SRC_TILED;
3999		src_pitch >>= 2;
4000	}
4001	assert(src_pitch <= MAXSHORT);
4002
4003	br13 = dst_bo->pitch;
4004	if (kgem->gen >= 040 && dst_bo->tiling) {
4005		cmd |= BLT_DST_TILED;
4006		br13 >>= 2;
4007	}
4008	assert(br13 <= MAXSHORT);
4009
4010	br13 |= copy_ROP[alu] << 16;
4011	switch (bpp) {
4012	default: assert(0);
4013	case 32: br13 |= 1 << 25; /* RGB8888 */
4014	case 16: br13 |= 1 << 24; /* RGB565 */
4015	case 8: break;
4016	}
4017
4018	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
4019	if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
4020		DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__));
4021		return false;
4022	}
4023
4024	/* Compare first box against a previous fill */
4025	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
4026	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
4027		if (kgem->gen >= 0100) {
4028			if (kgem->nbatch >= 7 &&
4029			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
4030			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
4031			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
4032				DBG(("%s: deleting last fill\n", __FUNCTION__));
4033				kgem->nbatch -= 7;
4034				kgem->nreloc--;
4035			}
4036		} else {
4037			if (kgem->nbatch >= 6 &&
4038			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
4039			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
4040			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
4041				DBG(("%s: deleting last fill\n", __FUNCTION__));
4042				kgem->nbatch -= 6;
4043				kgem->nreloc--;
4044			}
4045		}
4046	}
4047
4048	while (nbox--) {
4049		uint32_t *b;
4050
4051		if (!kgem_check_batch(kgem, 14) ||
4052		    !kgem_check_reloc(kgem, 2)) {
4053			_kgem_submit(kgem);
4054			_kgem_set_mode(kgem, KGEM_BLT);
4055		}
4056
4057		assert(sna->kgem.mode == KGEM_BLT);
4058		b = kgem->batch + kgem->nbatch;
4059		b[0] = cmd;
4060		b[1] = br13;
4061		b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
4062		b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
4063		if (sna->kgem.gen >= 0100) {
4064			*(uint64_t *)(b+4) =
4065				kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
4066						 I915_GEM_DOMAIN_RENDER << 16 |
4067						 I915_GEM_DOMAIN_RENDER |
4068						 KGEM_RELOC_FENCED,
4069						 0);
4070			b[6] = src_pitch;
4071			b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
4072			*(uint64_t *)(b+8) =
4073				kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
4074						 I915_GEM_DOMAIN_RENDER << 16 |
4075						 KGEM_RELOC_FENCED,
4076						 0);
4077			b[10] = alpha_fixup;
4078			b[11] = alpha_fixup;
4079			b[12] = 0;
4080			b[13] = 0;
4081			kgem->nbatch += 14;
4082		} else {
4083			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
4084					      I915_GEM_DOMAIN_RENDER << 16 |
4085					      I915_GEM_DOMAIN_RENDER |
4086					      KGEM_RELOC_FENCED,
4087					      0);
4088			b[5] = src_pitch;
4089			b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
4090			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
4091					      I915_GEM_DOMAIN_RENDER << 16 |
4092					      KGEM_RELOC_FENCED,
4093					      0);
4094			b[8] = alpha_fixup;
4095			b[9] = alpha_fixup;
4096			b[10] = 0;
4097			b[11] = 0;
4098			kgem->nbatch += 12;
4099		}
4100		assert(kgem->nbatch < kgem->surface);
4101		box++;
4102	}
4103
4104	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
4105		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
4106		_kgem_submit(kgem);
4107	}
4108
4109	sna->blt_state.fill_bo = 0;
4110	return true;
4111}
4112
4113static void box_extents(const BoxRec *box, int n, BoxRec *extents)
4114{
4115	*extents = *box;
4116	while (--n) {
4117		box++;
4118		if (box->x1 < extents->x1)
4119			extents->x1 = box->x1;
4120		if (box->y1 < extents->y1)
4121			extents->y1 = box->y1;
4122
4123		if (box->x2 > extents->x2)
4124			extents->x2 = box->x2;
4125		if (box->y2 > extents->y2)
4126			extents->y2 = box->y2;
4127	}
4128}
4129
4130bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
4131				 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
4132				 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
4133				 const BoxRec *box, int nbox)
4134{
4135	struct kgem_bo *free_bo = NULL;
4136	bool ret;
4137
4138	DBG(("%s: alu=%d, n=%d\n", __FUNCTION__, alu, nbox));
4139
4140	if (!sna_blt_compare_depth(src, dst)) {
4141		DBG(("%s: mismatching depths %d -> %d\n",
4142		     __FUNCTION__, src->depth, dst->depth));
4143		return false;
4144	}
4145
4146	if (src_bo == dst_bo) {
4147		DBG(("%s: dst == src\n", __FUNCTION__));
4148
4149		if (src_bo->tiling == I915_TILING_Y &&
4150		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
4151			struct kgem_bo *bo;
4152
4153			DBG(("%s: src is Y-tiled\n", __FUNCTION__));
4154
4155			if (src->type != DRAWABLE_PIXMAP)
4156				return false;
4157
4158			assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
4159			bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
4160			if (bo == NULL) {
4161				BoxRec extents;
4162
4163				DBG(("%s: y-tiling conversion failed\n",
4164				     __FUNCTION__));
4165
4166				box_extents(box, nbox, &extents);
4167				free_bo = kgem_create_2d(&sna->kgem,
4168							 extents.x2 - extents.x1,
4169							 extents.y2 - extents.y1,
4170							 src->bitsPerPixel,
4171							 I915_TILING_X, 0);
4172				if (free_bo == NULL) {
4173					DBG(("%s: fallback -- temp allocation failed\n",
4174					     __FUNCTION__));
4175					return false;
4176				}
4177
4178				if (!sna_blt_copy_boxes(sna, GXcopy,
4179							src_bo, src_dx, src_dy,
4180							free_bo, -extents.x1, -extents.y1,
4181							src->bitsPerPixel,
4182							box, nbox)) {
4183					DBG(("%s: fallback -- temp copy failed\n",
4184					     __FUNCTION__));
4185					kgem_bo_destroy(&sna->kgem, free_bo);
4186					return false;
4187				}
4188
4189				src_dx = -extents.x1;
4190				src_dy = -extents.y1;
4191				src_bo = free_bo;
4192			} else
4193				dst_bo = src_bo = bo;
4194		}
4195	} else {
4196		if (src_bo->tiling == I915_TILING_Y &&
4197		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
4198			DBG(("%s: src is y-tiled\n", __FUNCTION__));
4199			if (src->type != DRAWABLE_PIXMAP)
4200				return false;
4201			assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
4202			src_bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
4203			if (src_bo == NULL) {
4204				DBG(("%s: fallback -- src y-tiling conversion failed\n",
4205				     __FUNCTION__));
4206				return false;
4207			}
4208		}
4209
4210		if (dst_bo->tiling == I915_TILING_Y &&
4211		    kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) {
4212			DBG(("%s: dst is y-tiled\n", __FUNCTION__));
4213			if (dst->type != DRAWABLE_PIXMAP)
4214				return false;
4215			assert(sna_pixmap((PixmapPtr)dst)->gpu_bo == dst_bo);
4216			dst_bo = sna_pixmap_change_tiling((PixmapPtr)dst, I915_TILING_X);
4217			if (dst_bo == NULL) {
4218				DBG(("%s: fallback -- dst y-tiling conversion failed\n",
4219				     __FUNCTION__));
4220				return false;
4221			}
4222		}
4223	}
4224
4225	ret =  sna_blt_copy_boxes(sna, alu,
4226				  src_bo, src_dx, src_dy,
4227				  dst_bo, dst_dx, dst_dy,
4228				  dst->bitsPerPixel,
4229				  box, nbox);
4230
4231	if (free_bo)
4232		kgem_bo_destroy(&sna->kgem, free_bo);
4233
4234	return ret;
4235}
4236