1/*
2 * Based on code from intel_uxa.c and i830_xaa.c
3 * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
4 * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
5 * Copyright (c) 2009-2011 Intel Corporation
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 * Authors:
27 *    Chris Wilson <chris@chris-wilson.co.uk>
28 *
29 */
30
31#ifdef HAVE_CONFIG_H
32#include "config.h"
33#endif
34
35#include "sna.h"
36#include "sna_render.h"
37#include "sna_render_inline.h"
38#include "sna_reg.h"
39#include "rop.h"
40
41#define NO_BLT_COMPOSITE 0
42#define NO_BLT_COPY 0
43#define NO_BLT_COPY_BOXES 0
44#define NO_BLT_FILL 0
45#define NO_BLT_FILL_BOXES 0
46
47#ifndef PICT_TYPE_BGRA
48#define PICT_TYPE_BGRA 8
49#endif
50
51static const uint8_t copy_ROP[] = {
52	ROP_0,                  /* GXclear */
53	ROP_DSa,                /* GXand */
54	ROP_SDna,               /* GXandReverse */
55	ROP_S,                  /* GXcopy */
56	ROP_DSna,               /* GXandInverted */
57	ROP_D,                  /* GXnoop */
58	ROP_DSx,                /* GXxor */
59	ROP_DSo,                /* GXor */
60	ROP_DSon,               /* GXnor */
61	ROP_DSxn,               /* GXequiv */
62	ROP_Dn,                 /* GXinvert */
63	ROP_SDno,               /* GXorReverse */
64	ROP_Sn,                 /* GXcopyInverted */
65	ROP_DSno,               /* GXorInverted */
66	ROP_DSan,               /* GXnand */
67	ROP_1                   /* GXset */
68};
69
70static const uint8_t fill_ROP[] = {
71	ROP_0,
72	ROP_DPa,
73	ROP_PDna,
74	ROP_P,
75	ROP_DPna,
76	ROP_D,
77	ROP_DPx,
78	ROP_DPo,
79	ROP_DPon,
80	ROP_PDxn,
81	ROP_Dn,
82	ROP_PDno,
83	ROP_Pn,
84	ROP_DPno,
85	ROP_DPan,
86	ROP_1
87};
88
89static void sig_done(struct sna *sna, const struct sna_composite_op *op)
90{
91	sigtrap_put();
92}
93
94static void nop_done(struct sna *sna, const struct sna_composite_op *op)
95{
96	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
97	if (sna->kgem.nexec > 1 && __kgem_ring_empty(&sna->kgem)) {
98		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
99		_kgem_submit(&sna->kgem);
100	}
101	(void)op;
102}
103
104static void gen6_blt_copy_done(struct sna *sna, const struct sna_composite_op *op)
105{
106	struct kgem *kgem = &sna->kgem;
107
108	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
109	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
110		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
111		_kgem_submit(kgem);
112		return;
113	}
114
115	if (kgem_check_batch(kgem, 3)) {
116		uint32_t *b = kgem->batch + kgem->nbatch;
117		assert(sna->kgem.mode == KGEM_BLT);
118		b[0] = XY_SETUP_CLIP;
119		b[1] = b[2] = 0;
120		kgem->nbatch += 3;
121		assert(kgem->nbatch < kgem->surface);
122	}
123	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
124	(void)op;
125}
126
127static bool sna_blt_fill_init(struct sna *sna,
128			      struct sna_blt_state *blt,
129			      struct kgem_bo *bo,
130			      int bpp,
131			      uint8_t alu,
132			      uint32_t pixel)
133{
134	struct kgem *kgem = &sna->kgem;
135
136	assert(kgem_bo_can_blt (kgem, bo));
137	blt->bo[0] = bo;
138
139	blt->br13 = bo->pitch;
140	blt->cmd = XY_SCANLINE_BLT;
141	if (kgem->gen >= 040 && bo->tiling) {
142		blt->cmd |= BLT_DST_TILED;
143		blt->br13 >>= 2;
144	}
145	assert(blt->br13 <= MAXSHORT);
146
147	if (alu == GXclear)
148		pixel = 0;
149	else if (alu == GXcopy) {
150		if (pixel == 0)
151			alu = GXclear;
152		else if (pixel == -1)
153			alu = GXset;
154	}
155
156	blt->br13 |= 1<<31 | (fill_ROP[alu] << 16);
157	switch (bpp) {
158	default: assert(0);
159	case 32: blt->br13 |= 1 << 25; /* RGB8888 */
160	case 16: blt->br13 |= 1 << 24; /* RGB565 */
161	case 8: break;
162	}
163
164	blt->pixel = pixel;
165	blt->bpp = bpp;
166	blt->alu = alu;
167
168	kgem_set_mode(kgem, KGEM_BLT, bo);
169	if (!kgem_check_batch(kgem, 14) ||
170	    !kgem_check_bo_fenced(kgem, bo)) {
171		kgem_submit(kgem);
172		if (!kgem_check_bo_fenced(kgem, bo))
173			return false;
174		_kgem_set_mode(kgem, KGEM_BLT);
175	}
176
177	if (sna->blt_state.fill_bo != bo->unique_id ||
178	    sna->blt_state.fill_pixel != pixel ||
179	    sna->blt_state.fill_alu != alu)
180	{
181		uint32_t *b;
182
183		if (!kgem_check_batch(kgem, 24) ||
184		    !kgem_check_reloc(kgem, 1)) {
185			_kgem_submit(kgem);
186			if (!kgem_check_bo_fenced(kgem, bo))
187				return false;
188			_kgem_set_mode(kgem, KGEM_BLT);
189		}
190		kgem_bcs_set_tiling(kgem, NULL, bo);
191
192		assert(sna->kgem.mode == KGEM_BLT);
193		b = kgem->batch + kgem->nbatch;
194		if (sna->kgem.gen >= 0100) {
195			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
196			if (bpp == 32)
197				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
198			if (bo->tiling)
199				b[0] |= BLT_DST_TILED;
200			b[1] = blt->br13;
201			b[2] = 0;
202			b[3] = 0;
203			*(uint64_t *)(b+4) =
204				kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
205						 I915_GEM_DOMAIN_RENDER << 16 |
206						 I915_GEM_DOMAIN_RENDER |
207						 KGEM_RELOC_FENCED,
208						 0);
209			b[6] = pixel;
210			b[7] = pixel;
211			b[8] = 0;
212			b[9] = 0;
213			kgem->nbatch += 10;
214		} else {
215			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
216			if (bpp == 32)
217				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
218			if (bo->tiling && kgem->gen >= 040)
219				b[0] |= BLT_DST_TILED;
220			b[1] = blt->br13;
221			b[2] = 0;
222			b[3] = 0;
223			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
224					      I915_GEM_DOMAIN_RENDER << 16 |
225					      I915_GEM_DOMAIN_RENDER |
226					      KGEM_RELOC_FENCED,
227					      0);
228			b[5] = pixel;
229			b[6] = pixel;
230			b[7] = 0;
231			b[8] = 0;
232			kgem->nbatch += 9;
233		}
234		assert(kgem->nbatch < kgem->surface);
235
236		sna->blt_state.fill_bo = bo->unique_id;
237		sna->blt_state.fill_pixel = pixel;
238		sna->blt_state.fill_alu = alu;
239	}
240
241	assert(sna->kgem.mode == KGEM_BLT);
242	return true;
243}
244
245noinline static void __sna_blt_fill_begin(struct sna *sna,
246					  const struct sna_blt_state *blt)
247{
248	struct kgem *kgem = &sna->kgem;
249	uint32_t *b;
250
251	kgem_bcs_set_tiling(&sna->kgem, NULL, blt->bo[0]);
252
253	assert(kgem->mode == KGEM_BLT);
254	b = kgem->batch + kgem->nbatch;
255	if (sna->kgem.gen >= 0100) {
256		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
257		if (blt->bpp == 32)
258			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
259		if (blt->bo[0]->tiling)
260			b[0] |= BLT_DST_TILED;
261		b[1] = blt->br13;
262		b[2] = 0;
263		b[3] = 0;
264		*(uint64_t *)(b+4) =
265			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[0],
266					 I915_GEM_DOMAIN_RENDER << 16 |
267					 I915_GEM_DOMAIN_RENDER |
268					 KGEM_RELOC_FENCED,
269					 0);
270		b[6] = blt->pixel;
271		b[7] = blt->pixel;
272		b[8] = 0;
273		b[9] = 0;
274		kgem->nbatch += 10;
275	} else {
276		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
277		if (blt->bpp == 32)
278			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
279		if (blt->bo[0]->tiling && kgem->gen >= 040)
280			b[0] |= BLT_DST_TILED;
281		b[1] = blt->br13;
282		b[2] = 0;
283		b[3] = 0;
284		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
285				      I915_GEM_DOMAIN_RENDER << 16 |
286				      I915_GEM_DOMAIN_RENDER |
287				      KGEM_RELOC_FENCED,
288				      0);
289		b[5] = blt->pixel;
290		b[6] = blt->pixel;
291		b[7] = 0;
292		b[8] = 0;
293		kgem->nbatch += 9;
294	}
295}
296
297inline static void sna_blt_fill_begin(struct sna *sna,
298				      const struct sna_blt_state *blt)
299{
300	struct kgem *kgem = &sna->kgem;
301
302	if (kgem->nreloc) {
303		_kgem_submit(kgem);
304		_kgem_set_mode(kgem, KGEM_BLT);
305		kgem_bcs_set_tiling(kgem, NULL, blt->bo[0]);
306		assert(kgem->nbatch == 0);
307	}
308
309	__sna_blt_fill_begin(sna, blt);
310}
311
312inline static void sna_blt_fill_one(struct sna *sna,
313				    const struct sna_blt_state *blt,
314				    int16_t x, int16_t y,
315				    int16_t width, int16_t height)
316{
317	struct kgem *kgem = &sna->kgem;
318	uint32_t *b;
319
320	DBG(("%s: (%d, %d) x (%d, %d): %08x\n",
321	     __FUNCTION__, x, y, width, height, blt->pixel));
322
323	assert(x >= 0);
324	assert(y >= 0);
325	assert((y+height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
326
327	if (!kgem_check_batch(kgem, 3))
328		sna_blt_fill_begin(sna, blt);
329
330	assert(sna->kgem.mode == KGEM_BLT);
331	b = kgem->batch + kgem->nbatch;
332	kgem->nbatch += 3;
333	assert(kgem->nbatch < kgem->surface);
334
335	b[0] = blt->cmd;
336	b[1] = y << 16 | x;
337	b[2] = b[1] + (height << 16 | width);
338}
339
340static bool sna_blt_copy_init(struct sna *sna,
341			      struct sna_blt_state *blt,
342			      struct kgem_bo *src,
343			      struct kgem_bo *dst,
344			      int bpp,
345			      uint8_t alu)
346{
347	struct kgem *kgem = &sna->kgem;
348
349	assert(kgem_bo_can_blt(kgem, src));
350	assert(kgem_bo_can_blt(kgem, dst));
351
352	blt->bo[0] = src;
353	blt->bo[1] = dst;
354
355	blt->cmd = XY_SRC_COPY_BLT_CMD | (kgem->gen >= 0100 ? 8 : 6);
356	if (bpp == 32)
357		blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
358
359	blt->pitch[0] = src->pitch;
360	if (kgem->gen >= 040 && src->tiling) {
361		blt->cmd |= BLT_SRC_TILED;
362		blt->pitch[0] >>= 2;
363	}
364	assert(blt->pitch[0] <= MAXSHORT);
365
366	blt->pitch[1] = dst->pitch;
367	if (kgem->gen >= 040 && dst->tiling) {
368		blt->cmd |= BLT_DST_TILED;
369		blt->pitch[1] >>= 2;
370	}
371	assert(blt->pitch[1] <= MAXSHORT);
372
373	blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
374	blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1];
375	switch (bpp) {
376	default: assert(0);
377	case 32: blt->br13 |= 1 << 25; /* RGB8888 */
378	case 16: blt->br13 |= 1 << 24; /* RGB565 */
379	case 8: break;
380	}
381
382	kgem_set_mode(kgem, KGEM_BLT, dst);
383	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
384		kgem_submit(kgem);
385		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
386			return false;
387		_kgem_set_mode(kgem, KGEM_BLT);
388	}
389	kgem_bcs_set_tiling(&sna->kgem, src, dst);
390
391	sna->blt_state.fill_bo = 0;
392	return true;
393}
394
395static bool sna_blt_alpha_fixup_init(struct sna *sna,
396				     struct sna_blt_state *blt,
397				     struct kgem_bo *src,
398				     struct kgem_bo *dst,
399				     int bpp, uint32_t alpha)
400{
401	struct kgem *kgem = &sna->kgem;
402
403	DBG(("%s: dst handle=%d, src handle=%d, bpp=%d, fixup=%08x\n",
404	     __FUNCTION__, dst->handle, src->handle, bpp, alpha));
405	assert(kgem_bo_can_blt(kgem, src));
406	assert(kgem_bo_can_blt(kgem, dst));
407
408	blt->bo[0] = src;
409	blt->bo[1] = dst;
410
411	blt->cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
412	blt->pitch[0] = src->pitch;
413	if (kgem->gen >= 040 && src->tiling) {
414		blt->cmd |= BLT_SRC_TILED;
415		blt->pitch[0] >>= 2;
416	}
417	assert(blt->pitch[0] <= MAXSHORT);
418
419	blt->pitch[1] = dst->pitch;
420	if (kgem->gen >= 040 && dst->tiling) {
421		blt->cmd |= BLT_DST_TILED;
422		blt->pitch[1] >>= 2;
423	}
424	assert(blt->pitch[1] <= MAXSHORT);
425
426	blt->overwrites = 1;
427	blt->br13 = (0xfc << 16) | blt->pitch[1];
428	switch (bpp) {
429	default: assert(0);
430	case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
431		 blt->br13 |= 1 << 25; /* RGB8888 */
432	case 16: blt->br13 |= 1 << 24; /* RGB565 */
433	case 8: break;
434	}
435	blt->pixel = alpha;
436
437	kgem_set_mode(kgem, KGEM_BLT, dst);
438	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
439		kgem_submit(kgem);
440		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
441			return false;
442		_kgem_set_mode(kgem, KGEM_BLT);
443	}
444	kgem_bcs_set_tiling(&sna->kgem, src, dst);
445
446	sna->blt_state.fill_bo = 0;
447	return true;
448}
449
450static void sna_blt_alpha_fixup_one(struct sna *sna,
451				    const struct sna_blt_state *blt,
452				    int src_x, int src_y,
453				    int width, int height,
454				    int dst_x, int dst_y)
455{
456	struct kgem *kgem = &sna->kgem;
457	uint32_t *b;
458
459	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
460	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
461
462	assert(src_x >= 0);
463	assert(src_y >= 0);
464	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
465	assert(dst_x >= 0);
466	assert(dst_y >= 0);
467	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
468	assert(width > 0);
469	assert(height > 0);
470
471	if (!kgem_check_batch(kgem, 14) ||
472	    !kgem_check_reloc(kgem, 2)) {
473		_kgem_submit(kgem);
474		_kgem_set_mode(kgem, KGEM_BLT);
475		kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]);
476	}
477
478	assert(sna->kgem.mode == KGEM_BLT);
479	b = kgem->batch + kgem->nbatch;
480	b[0] = blt->cmd;
481	b[1] = blt->br13;
482	b[2] = (dst_y << 16) | dst_x;
483	b[3] = ((dst_y + height) << 16) | (dst_x + width);
484	if (sna->kgem.gen >= 0100) {
485		*(uint64_t *)(b+4) =
486			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
487					 I915_GEM_DOMAIN_RENDER << 16 |
488					 I915_GEM_DOMAIN_RENDER |
489					 KGEM_RELOC_FENCED,
490					 0);
491		b[6] = blt->pitch[0];
492		b[7] = (src_y << 16) | src_x;
493		*(uint64_t *)(b+8) =
494			kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
495					 I915_GEM_DOMAIN_RENDER << 16 |
496					 KGEM_RELOC_FENCED,
497					 0);
498		b[10] = blt->pixel;
499		b[11] = blt->pixel;
500		b[12] = 0;
501		b[13] = 0;
502		kgem->nbatch += 14;
503	} else {
504		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
505				      I915_GEM_DOMAIN_RENDER << 16 |
506				      I915_GEM_DOMAIN_RENDER |
507				      KGEM_RELOC_FENCED,
508				      0);
509		b[5] = blt->pitch[0];
510		b[6] = (src_y << 16) | src_x;
511		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
512				      I915_GEM_DOMAIN_RENDER << 16 |
513				      KGEM_RELOC_FENCED,
514				      0);
515		b[8] = blt->pixel;
516		b[9] = blt->pixel;
517		b[10] = 0;
518		b[11] = 0;
519		kgem->nbatch += 12;
520	}
521	assert(kgem->nbatch < kgem->surface);
522}
523
524static void sna_blt_copy_one(struct sna *sna,
525			     const struct sna_blt_state *blt,
526			     int src_x, int src_y,
527			     int width, int height,
528			     int dst_x, int dst_y)
529{
530	struct kgem *kgem = &sna->kgem;
531	uint32_t *b;
532
533	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
534	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
535
536	assert(src_x >= 0);
537	assert(src_y >= 0);
538	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
539	assert(dst_x >= 0);
540	assert(dst_y >= 0);
541	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
542	assert(width > 0);
543	assert(height > 0);
544
545	/* Compare against a previous fill */
546	if (blt->overwrites &&
547	    kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) {
548		if (sna->kgem.gen >= 0100) {
549			if (kgem->nbatch >= 7 &&
550			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
551			    kgem->batch[kgem->nbatch-5] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
552			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
553				DBG(("%s: replacing last fill\n", __FUNCTION__));
554				if (kgem_check_batch(kgem, 3)) {
555					assert(kgem->mode == KGEM_BLT);
556					b = kgem->batch + kgem->nbatch - 7;
557					b[0] = blt->cmd;
558					b[1] = blt->br13;
559					b[6] = (src_y << 16) | src_x;
560					b[7] = blt->pitch[0];
561					*(uint64_t *)(b+8) =
562						kgem_add_reloc64(kgem, kgem->nbatch + 8 - 7, blt->bo[0],
563								 I915_GEM_DOMAIN_RENDER << 16 |
564								 KGEM_RELOC_FENCED,
565								 0);
566					kgem->nbatch += 3;
567					assert(kgem->nbatch < kgem->surface);
568					return;
569				}
570				kgem->nbatch -= 7;
571				kgem->nreloc--;
572			}
573		} else {
574			if (kgem->nbatch >= 6 &&
575			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
576			    kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
577			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
578				DBG(("%s: replacing last fill\n", __FUNCTION__));
579				if (kgem_check_batch(kgem, 8-6)) {
580					assert(kgem->mode == KGEM_BLT);
581					b = kgem->batch + kgem->nbatch - 6;
582					b[0] = blt->cmd;
583					b[1] = blt->br13;
584					b[5] = (src_y << 16) | src_x;
585					b[6] = blt->pitch[0];
586					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, blt->bo[0],
587							      I915_GEM_DOMAIN_RENDER << 16 |
588							      KGEM_RELOC_FENCED,
589							      0);
590					kgem->nbatch += 8 - 6;
591					assert(kgem->nbatch < kgem->surface);
592					return;
593				}
594				kgem->nbatch -= 6;
595				kgem->nreloc--;
596			}
597		}
598	}
599
600	if (!kgem_check_batch(kgem, 10) ||
601	    !kgem_check_reloc(kgem, 2)) {
602		_kgem_submit(kgem);
603		_kgem_set_mode(kgem, KGEM_BLT);
604		kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]);
605	}
606
607	assert(sna->kgem.mode == KGEM_BLT);
608	b = kgem->batch + kgem->nbatch;
609	b[0] = blt->cmd;
610	b[1] = blt->br13;
611	b[2] = (dst_y << 16) | dst_x;
612	b[3] = ((dst_y + height) << 16) | (dst_x + width);
613	if (kgem->gen >= 0100) {
614		*(uint64_t *)(b+4) =
615			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
616					 I915_GEM_DOMAIN_RENDER << 16 |
617					 I915_GEM_DOMAIN_RENDER |
618					 KGEM_RELOC_FENCED,
619					 0);
620		b[6] = (src_y << 16) | src_x;
621		b[7] = blt->pitch[0];
622		*(uint64_t *)(b+8) =
623			kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
624					 I915_GEM_DOMAIN_RENDER << 16 |
625					 KGEM_RELOC_FENCED,
626					 0);
627		kgem->nbatch += 10;
628	} else {
629		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
630				      I915_GEM_DOMAIN_RENDER << 16 |
631				      I915_GEM_DOMAIN_RENDER |
632				      KGEM_RELOC_FENCED,
633				      0);
634		b[5] = (src_y << 16) | src_x;
635		b[6] = blt->pitch[0];
636		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
637				      I915_GEM_DOMAIN_RENDER << 16 |
638				      KGEM_RELOC_FENCED,
639				      0);
640		kgem->nbatch += 8;
641	}
642	assert(kgem->nbatch < kgem->surface);
643}
644
645bool
646sna_get_rgba_from_pixel(uint32_t pixel,
647			uint16_t *red,
648			uint16_t *green,
649			uint16_t *blue,
650			uint16_t *alpha,
651			uint32_t format)
652{
653	int rbits, bbits, gbits, abits;
654	int rshift, bshift, gshift, ashift;
655
656	rbits = PICT_FORMAT_R(format);
657	gbits = PICT_FORMAT_G(format);
658	bbits = PICT_FORMAT_B(format);
659	abits = PICT_FORMAT_A(format);
660
661	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
662		rshift = gshift = bshift = ashift = 0;
663	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
664		bshift = 0;
665		gshift = bbits;
666		rshift = gshift + gbits;
667		ashift = rshift + rbits;
668	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
669		rshift = 0;
670		gshift = rbits;
671		bshift = gshift + gbits;
672		ashift = bshift + bbits;
673	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
674		ashift = 0;
675		rshift = abits;
676		if (abits == 0)
677			rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
678		gshift = rshift + rbits;
679		bshift = gshift + gbits;
680	} else {
681		return false;
682	}
683
684	if (rbits) {
685		*red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits);
686		while (rbits < 16) {
687			*red |= *red >> rbits;
688			rbits <<= 1;
689		}
690	} else
691		*red = 0;
692
693	if (gbits) {
694		*green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits);
695		while (gbits < 16) {
696			*green |= *green >> gbits;
697			gbits <<= 1;
698		}
699	} else
700		*green = 0;
701
702	if (bbits) {
703		*blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits);
704		while (bbits < 16) {
705			*blue |= *blue >> bbits;
706			bbits <<= 1;
707		}
708	} else
709		*blue = 0;
710
711	if (abits) {
712		*alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits);
713		while (abits < 16) {
714			*alpha |= *alpha >> abits;
715			abits <<= 1;
716		}
717	} else
718		*alpha = 0xffff;
719
720	return true;
721}
722
723bool
724_sna_get_pixel_from_rgba(uint32_t * pixel,
725			uint16_t red,
726			uint16_t green,
727			uint16_t blue,
728			uint16_t alpha,
729			uint32_t format)
730{
731	int rbits, bbits, gbits, abits;
732	int rshift, bshift, gshift, ashift;
733
734	rbits = PICT_FORMAT_R(format);
735	gbits = PICT_FORMAT_G(format);
736	bbits = PICT_FORMAT_B(format);
737	abits = PICT_FORMAT_A(format);
738	if (abits == 0)
739	    abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
740
741	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
742		*pixel = alpha >> (16 - abits);
743		return true;
744	}
745
746	if (!PICT_FORMAT_COLOR(format))
747		return false;
748
749	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
750		bshift = 0;
751		gshift = bbits;
752		rshift = gshift + gbits;
753		ashift = rshift + rbits;
754	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
755		rshift = 0;
756		gshift = rbits;
757		bshift = gshift + gbits;
758		ashift = bshift + bbits;
759	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
760		ashift = 0;
761		rshift = abits;
762		gshift = rshift + rbits;
763		bshift = gshift + gbits;
764	} else
765		return false;
766
767	*pixel = 0;
768	*pixel |= (blue  >> (16 - bbits)) << bshift;
769	*pixel |= (green >> (16 - gbits)) << gshift;
770	*pixel |= (red   >> (16 - rbits)) << rshift;
771	*pixel |= (alpha >> (16 - abits)) << ashift;
772
773	return true;
774}
775
776uint32_t
777sna_rgba_for_color(uint32_t color, int depth)
778{
779	return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8);
780}
781
782uint32_t
783sna_rgba_to_color(uint32_t rgba, uint32_t format)
784{
785	return color_convert(rgba, PICT_a8r8g8b8, format);
786}
787
788static uint32_t
789get_pixel(PicturePtr picture)
790{
791	PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
792
793	DBG(("%s: %p\n", __FUNCTION__, pixmap));
794
795	if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
796		return 0;
797
798	switch (pixmap->drawable.bitsPerPixel) {
799	case 32: return *(uint32_t *)pixmap->devPrivate.ptr;
800	case 16: return *(uint16_t *)pixmap->devPrivate.ptr;
801	default: return *(uint8_t *)pixmap->devPrivate.ptr;
802	}
803}
804
805static uint32_t
806get_solid_color(PicturePtr picture, uint32_t format)
807{
808	if (picture->pSourcePict) {
809		PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict;
810		return color_convert(fill->color, PICT_a8r8g8b8, format);
811	} else
812		return color_convert(get_pixel(picture), picture->format, format);
813}
814
815static bool
816is_solid(PicturePtr picture)
817{
818	if (picture->pSourcePict) {
819		if (picture->pSourcePict->type == SourcePictTypeSolidFill)
820			return true;
821	}
822
823	if (picture->pDrawable) {
824		if (picture->pDrawable->width  == 1 &&
825		    picture->pDrawable->height == 1 &&
826		    picture->repeat)
827			return true;
828	}
829
830	return false;
831}
832
833bool
834sna_picture_is_solid(PicturePtr picture, uint32_t *color)
835{
836	if (!is_solid(picture))
837		return false;
838
839	if (color)
840		*color = get_solid_color(picture, PICT_a8r8g8b8);
841	return true;
842}
843
844static bool
845pixel_is_transparent(uint32_t pixel, uint32_t format)
846{
847	unsigned int abits;
848
849	abits = PICT_FORMAT_A(format);
850	if (!abits)
851		return false;
852
853	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
854	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
855		return (pixel & ((1 << abits) - 1)) == 0;
856	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
857		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
858		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
859		return (pixel >> ashift) == 0;
860	} else
861		return false;
862}
863
864static bool
865pixel_is_opaque(uint32_t pixel, uint32_t format)
866{
867	unsigned int abits;
868
869	abits = PICT_FORMAT_A(format);
870	if (!abits)
871		return true;
872
873	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
874	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
875		return (pixel & ((1 << abits) - 1)) == (unsigned)((1 << abits) - 1);
876	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
877		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
878		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
879		return (pixel >> ashift) == (unsigned)((1 << abits) - 1);
880	} else
881		return false;
882}
883
884static bool
885pixel_is_white(uint32_t pixel, uint32_t format)
886{
887	switch (PICT_FORMAT_TYPE(format)) {
888	case PICT_TYPE_A:
889	case PICT_TYPE_ARGB:
890	case PICT_TYPE_ABGR:
891	case PICT_TYPE_BGRA:
892		return pixel == ((1U << PICT_FORMAT_BPP(format)) - 1);
893	default:
894		return false;
895	}
896}
897
898static bool
899is_opaque_solid(PicturePtr picture)
900{
901	if (picture->pSourcePict) {
902		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
903		return (fill->color >> 24) == 0xff;
904	} else
905		return pixel_is_opaque(get_pixel(picture), picture->format);
906}
907
908static bool
909is_white(PicturePtr picture)
910{
911	if (picture->pSourcePict) {
912		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
913		return fill->color == 0xffffffff;
914	} else
915		return pixel_is_white(get_pixel(picture), picture->format);
916}
917
918static bool
919is_transparent(PicturePtr picture)
920{
921	if (picture->pSourcePict) {
922		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
923		return fill->color == 0;
924	} else
925		return pixel_is_transparent(get_pixel(picture), picture->format);
926}
927
928bool
929sna_composite_mask_is_opaque(PicturePtr mask)
930{
931	if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format))
932		return is_solid(mask) && is_white(mask);
933	else if (!PICT_FORMAT_A(mask->format))
934		return true;
935	else if (mask->pSourcePict) {
936		PictSolidFill *fill = (PictSolidFill *) mask->pSourcePict;
937		return (fill->color >> 24) == 0xff;
938	} else {
939		struct sna_pixmap *priv;
940		assert(mask->pDrawable);
941
942		if (mask->pDrawable->width  == 1 &&
943		    mask->pDrawable->height == 1 &&
944		    mask->repeat)
945			return pixel_is_opaque(get_pixel(mask), mask->format);
946
947		if (mask->transform)
948			return false;
949
950		priv = sna_pixmap_from_drawable(mask->pDrawable);
951		if (priv == NULL || !priv->clear)
952			return false;
953
954		return pixel_is_opaque(priv->clear_color, mask->format);
955	}
956}
957
958fastcall
959static void blt_composite_fill(struct sna *sna,
960			       const struct sna_composite_op *op,
961			       const struct sna_composite_rectangles *r)
962{
963	int x1, x2, y1, y2;
964
965	x1 = r->dst.x + op->dst.x;
966	y1 = r->dst.y + op->dst.y;
967	x2 = x1 + r->width;
968	y2 = y1 + r->height;
969
970	if (x1 < 0)
971		x1 = 0;
972	if (y1 < 0)
973		y1 = 0;
974
975	if (x2 > op->dst.width)
976		x2 = op->dst.width;
977	if (y2 > op->dst.height)
978		y2 = op->dst.height;
979
980	if (x2 <= x1 || y2 <= y1)
981		return;
982
983	sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1);
984}
985
986fastcall
987static void blt_composite_fill__cpu(struct sna *sna,
988				    const struct sna_composite_op *op,
989				    const struct sna_composite_rectangles *r)
990{
991	int x1, x2, y1, y2;
992
993	x1 = r->dst.x + op->dst.x;
994	y1 = r->dst.y + op->dst.y;
995	x2 = x1 + r->width;
996	y2 = y1 + r->height;
997
998	if (x1 < 0)
999		x1 = 0;
1000	if (y1 < 0)
1001		y1 = 0;
1002
1003	if (x2 > op->dst.width)
1004		x2 = op->dst.width;
1005	if (y2 > op->dst.height)
1006		y2 = op->dst.height;
1007
1008	if (x2 <= x1 || y2 <= y1)
1009		return;
1010
1011	assert(op->dst.pixmap->devPrivate.ptr);
1012	assert(op->dst.pixmap->devKind);
1013	sigtrap_assert_active();
1014	pixman_fill(op->dst.pixmap->devPrivate.ptr,
1015		    op->dst.pixmap->devKind / sizeof(uint32_t),
1016		    op->dst.pixmap->drawable.bitsPerPixel,
1017		    x1, y1, x2-x1, y2-y1,
1018		    op->u.blt.pixel);
1019}
1020
1021fastcall static void
1022blt_composite_fill_box_no_offset__cpu(struct sna *sna,
1023				      const struct sna_composite_op *op,
1024				      const BoxRec *box)
1025{
1026	assert(box->x1 >= 0);
1027	assert(box->y1 >= 0);
1028	assert(box->x2 <= op->dst.pixmap->drawable.width);
1029	assert(box->y2 <= op->dst.pixmap->drawable.height);
1030
1031	assert(op->dst.pixmap->devPrivate.ptr);
1032	assert(op->dst.pixmap->devKind);
1033	sigtrap_assert_active();
1034	pixman_fill(op->dst.pixmap->devPrivate.ptr,
1035		    op->dst.pixmap->devKind / sizeof(uint32_t),
1036		    op->dst.pixmap->drawable.bitsPerPixel,
1037		    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
1038		    op->u.blt.pixel);
1039}
1040
1041static void
1042blt_composite_fill_boxes_no_offset__cpu(struct sna *sna,
1043					const struct sna_composite_op *op,
1044					const BoxRec *box, int n)
1045{
1046	do {
1047		assert(box->x1 >= 0);
1048		assert(box->y1 >= 0);
1049		assert(box->x2 <= op->dst.pixmap->drawable.width);
1050		assert(box->y2 <= op->dst.pixmap->drawable.height);
1051
1052		assert(op->dst.pixmap->devPrivate.ptr);
1053		assert(op->dst.pixmap->devKind);
1054		sigtrap_assert_active();
1055		pixman_fill(op->dst.pixmap->devPrivate.ptr,
1056			    op->dst.pixmap->devKind / sizeof(uint32_t),
1057			    op->dst.pixmap->drawable.bitsPerPixel,
1058			    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
1059			    op->u.blt.pixel);
1060		box++;
1061	} while (--n);
1062}
1063
1064fastcall static void
1065blt_composite_fill_box__cpu(struct sna *sna,
1066			    const struct sna_composite_op *op,
1067			    const BoxRec *box)
1068{
1069	assert(box->x1 + op->dst.x >= 0);
1070	assert(box->y1 + op->dst.y >= 0);
1071	assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
1072	assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
1073
1074	assert(op->dst.pixmap->devPrivate.ptr);
1075	assert(op->dst.pixmap->devKind);
1076	sigtrap_assert_active();
1077	pixman_fill(op->dst.pixmap->devPrivate.ptr,
1078		    op->dst.pixmap->devKind / sizeof(uint32_t),
1079		    op->dst.pixmap->drawable.bitsPerPixel,
1080		    box->x1 + op->dst.x, box->y1 + op->dst.y,
1081		    box->x2 - box->x1, box->y2 - box->y1,
1082		    op->u.blt.pixel);
1083}
1084
1085static void
1086blt_composite_fill_boxes__cpu(struct sna *sna,
1087			      const struct sna_composite_op *op,
1088			      const BoxRec *box, int n)
1089{
1090	do {
1091		assert(box->x1 + op->dst.x >= 0);
1092		assert(box->y1 + op->dst.y >= 0);
1093		assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
1094		assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
1095
1096		assert(op->dst.pixmap->devPrivate.ptr);
1097		assert(op->dst.pixmap->devKind);
1098		sigtrap_assert_active();
1099		pixman_fill(op->dst.pixmap->devPrivate.ptr,
1100			    op->dst.pixmap->devKind / sizeof(uint32_t),
1101			    op->dst.pixmap->drawable.bitsPerPixel,
1102			    box->x1 + op->dst.x, box->y1 + op->dst.y,
1103			    box->x2 - box->x1, box->y2 - box->y1,
1104			    op->u.blt.pixel);
1105		box++;
1106	} while (--n);
1107}
1108
1109inline static void _sna_blt_fill_box(struct sna *sna,
1110				     const struct sna_blt_state *blt,
1111				     const BoxRec *box)
1112{
1113	struct kgem *kgem = &sna->kgem;
1114	uint32_t *b;
1115
1116	DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__,
1117	     box->x1, box->y1, box->x2, box->y2,
1118	     blt->pixel));
1119
1120	assert(box->x1 >= 0);
1121	assert(box->y1 >= 0);
1122	assert(box->y2 * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
1123
1124	if (!kgem_check_batch(kgem, 3))
1125		sna_blt_fill_begin(sna, blt);
1126
1127	assert(sna->kgem.mode == KGEM_BLT);
1128	b = kgem->batch + kgem->nbatch;
1129	kgem->nbatch += 3;
1130	assert(kgem->nbatch < kgem->surface);
1131
1132	b[0] = blt->cmd;
1133	*(uint64_t *)(b+1) = *(const uint64_t *)box;
1134}
1135
1136inline static void _sna_blt_fill_boxes(struct sna *sna,
1137				       const struct sna_blt_state *blt,
1138				       const BoxRec *box,
1139				       int nbox)
1140{
1141	struct kgem *kgem = &sna->kgem;
1142	uint32_t cmd = blt->cmd;
1143
1144	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1145
1146	if (!kgem_check_batch(kgem, 3))
1147		sna_blt_fill_begin(sna, blt);
1148
1149	do {
1150		uint32_t *b = kgem->batch + kgem->nbatch;
1151		int nbox_this_time, rem;
1152
1153		assert(sna->kgem.mode == KGEM_BLT);
1154		nbox_this_time = nbox;
1155		rem = kgem_batch_space(kgem);
1156		if (3*nbox_this_time > rem)
1157			nbox_this_time = rem / 3;
1158		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1159		     __FUNCTION__, nbox_this_time, nbox, rem));
1160		assert(nbox_this_time > 0);
1161		nbox -= nbox_this_time;
1162
1163		kgem->nbatch += 3 * nbox_this_time;
1164		assert(kgem->nbatch < kgem->surface);
1165		while (nbox_this_time >= 8) {
1166			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1167			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1168			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1169			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1170			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
1171			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
1172			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
1173			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
1174			b += 24;
1175			nbox_this_time -= 8;
1176		}
1177		if (nbox_this_time & 4) {
1178			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1179			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1180			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1181			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1182			b += 12;
1183		}
1184		if (nbox_this_time & 2) {
1185			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1186			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1187			b += 6;
1188		}
1189		if (nbox_this_time & 1) {
1190			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1191		}
1192
1193		if (!nbox)
1194			return;
1195
1196		sna_blt_fill_begin(sna, blt);
1197	} while (1);
1198}
1199
1200static inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const BoxRec *box)
1201{
1202	if (box->x2 - box->x1 >= op->dst.width &&
1203	    box->y2 - box->y1 >= op->dst.height) {
1204		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1205		if (op->dst.bo == priv->gpu_bo) {
1206			sna_damage_all(&priv->gpu_damage, op->dst.pixmap);
1207			sna_damage_destroy(&priv->cpu_damage);
1208			priv->clear = true;
1209			priv->clear_color = op->u.blt.pixel;
1210			DBG(("%s: pixmap=%ld marking clear [%08x]\n",
1211			     __FUNCTION__,
1212			     op->dst.pixmap->drawable.serialNumber,
1213			     op->u.blt.pixel));
1214			((struct sna_composite_op *)op)->damage = NULL;
1215		}
1216	}
1217}
1218
1219fastcall static void blt_composite_fill_box_no_offset(struct sna *sna,
1220						      const struct sna_composite_op *op,
1221						      const BoxRec *box)
1222{
1223	_sna_blt_fill_box(sna, &op->u.blt, box);
1224	_sna_blt_maybe_clear(op, box);
1225}
1226
1227static void blt_composite_fill_boxes_no_offset(struct sna *sna,
1228					       const struct sna_composite_op *op,
1229					       const BoxRec *box, int n)
1230{
1231	_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
1232}
1233
1234static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna,
1235						       const struct sna_composite_op *op,
1236						       const BoxRec *box, int nbox)
1237{
1238	struct kgem *kgem = &sna->kgem;
1239	const struct sna_blt_state *blt = &op->u.blt;
1240	uint32_t cmd = blt->cmd;
1241
1242	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1243
1244	sna_vertex_lock(&sna->render);
1245	assert(kgem->mode == KGEM_BLT);
1246	if (!kgem_check_batch(kgem, 3)) {
1247		sna_vertex_wait__locked(&sna->render);
1248		sna_blt_fill_begin(sna, blt);
1249	}
1250
1251	do {
1252		uint32_t *b = kgem->batch + kgem->nbatch;
1253		int nbox_this_time, rem;
1254
1255		assert(sna->kgem.mode == KGEM_BLT);
1256		nbox_this_time = nbox;
1257		rem = kgem_batch_space(kgem);
1258		if (3*nbox_this_time > rem)
1259			nbox_this_time = rem / 3;
1260		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1261		     __FUNCTION__, nbox_this_time, nbox, rem));
1262		assert(nbox_this_time > 0);
1263		nbox -= nbox_this_time;
1264
1265		kgem->nbatch += 3 * nbox_this_time;
1266		assert(kgem->nbatch < kgem->surface);
1267		sna_vertex_acquire__locked(&sna->render);
1268		sna_vertex_unlock(&sna->render);
1269
1270		while (nbox_this_time >= 8) {
1271			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1272			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1273			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1274			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1275			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
1276			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
1277			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
1278			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
1279			b += 24;
1280			nbox_this_time -= 8;
1281		}
1282		if (nbox_this_time & 4) {
1283			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1284			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1285			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1286			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1287			b += 12;
1288		}
1289		if (nbox_this_time & 2) {
1290			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1291			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1292			b += 6;
1293		}
1294		if (nbox_this_time & 1) {
1295			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1296		}
1297
1298		sna_vertex_lock(&sna->render);
1299		sna_vertex_release__locked(&sna->render);
1300		if (!nbox)
1301			break;
1302
1303		sna_vertex_wait__locked(&sna->render);
1304		sna_blt_fill_begin(sna, blt);
1305	} while (1);
1306	sna_vertex_unlock(&sna->render);
1307}
1308
1309fastcall static void blt_composite_fill_box(struct sna *sna,
1310					    const struct sna_composite_op *op,
1311					    const BoxRec *box)
1312{
1313	sna_blt_fill_one(sna, &op->u.blt,
1314			 box->x1 + op->dst.x,
1315			 box->y1 + op->dst.y,
1316			 box->x2 - box->x1,
1317			 box->y2 - box->y1);
1318	_sna_blt_maybe_clear(op, box);
1319}
1320
1321static void blt_composite_fill_boxes(struct sna *sna,
1322				     const struct sna_composite_op *op,
1323				     const BoxRec *box, int n)
1324{
1325	do {
1326		sna_blt_fill_one(sna, &op->u.blt,
1327				 box->x1 + op->dst.x, box->y1 + op->dst.y,
1328				 box->x2 - box->x1, box->y2 - box->y1);
1329		box++;
1330	} while (--n);
1331}
1332
1333static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y)
1334{
1335	union {
1336		uint64_t v;
1337		int16_t i[4];
1338	} vi;
1339	vi.v = *(uint64_t *)b;
1340	vi.i[0] += x;
1341	vi.i[1] += y;
1342	vi.i[2] += x;
1343	vi.i[3] += y;
1344	return vi.v;
1345}
1346
1347static void blt_composite_fill_boxes__thread(struct sna *sna,
1348					     const struct sna_composite_op *op,
1349					     const BoxRec *box, int nbox)
1350{
1351	struct kgem *kgem = &sna->kgem;
1352	const struct sna_blt_state *blt = &op->u.blt;
1353	uint32_t cmd = blt->cmd;
1354	int16_t dx = op->dst.x;
1355	int16_t dy = op->dst.y;
1356
1357	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1358
1359	sna_vertex_lock(&sna->render);
1360	assert(kgem->mode == KGEM_BLT);
1361	if (!kgem_check_batch(kgem, 3)) {
1362		sna_vertex_wait__locked(&sna->render);
1363		sna_blt_fill_begin(sna, blt);
1364	}
1365
1366	do {
1367		uint32_t *b = kgem->batch + kgem->nbatch;
1368		int nbox_this_time, rem;
1369
1370		assert(sna->kgem.mode == KGEM_BLT);
1371		nbox_this_time = nbox;
1372		rem = kgem_batch_space(kgem);
1373		if (3*nbox_this_time > rem)
1374			nbox_this_time = rem / 3;
1375		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1376		     __FUNCTION__, nbox_this_time, nbox, rem));
1377		assert(nbox_this_time > 0);
1378		nbox -= nbox_this_time;
1379
1380		kgem->nbatch += 3 * nbox_this_time;
1381		assert(kgem->nbatch < kgem->surface);
1382		sna_vertex_acquire__locked(&sna->render);
1383		sna_vertex_unlock(&sna->render);
1384
1385		while (nbox_this_time >= 8) {
1386			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1387			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1388			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
1389			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
1390			b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy);
1391			b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy);
1392			b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy);
1393			b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy);
1394			b += 24;
1395			nbox_this_time -= 8;
1396		}
1397		if (nbox_this_time & 4) {
1398			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1399			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1400			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
1401			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
1402			b += 12;
1403		}
1404		if (nbox_this_time & 2) {
1405			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1406			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1407			b += 6;
1408		}
1409		if (nbox_this_time & 1) {
1410			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1411		}
1412
1413		sna_vertex_lock(&sna->render);
1414		sna_vertex_release__locked(&sna->render);
1415		if (!nbox)
1416			break;
1417
1418		sna_vertex_wait__locked(&sna->render);
1419		sna_blt_fill_begin(sna, blt);
1420	} while (1);
1421	sna_vertex_unlock(&sna->render);
1422}
1423
1424fastcall
1425static void blt_composite_nop(struct sna *sna,
1426			       const struct sna_composite_op *op,
1427			       const struct sna_composite_rectangles *r)
1428{
1429}
1430
1431fastcall static void blt_composite_nop_box(struct sna *sna,
1432					   const struct sna_composite_op *op,
1433					   const BoxRec *box)
1434{
1435}
1436
1437static void blt_composite_nop_boxes(struct sna *sna,
1438				    const struct sna_composite_op *op,
1439				    const BoxRec *box, int n)
1440{
1441}
1442
1443static bool
1444begin_blt(struct sna *sna,
1445	  struct sna_composite_op *op)
1446{
1447	assert(sna->kgem.mode == KGEM_BLT);
1448	if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) {
1449		kgem_submit(&sna->kgem);
1450		if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo))
1451			return false;
1452
1453		_kgem_set_mode(&sna->kgem, KGEM_BLT);
1454		kgem_bcs_set_tiling(&sna->kgem, NULL, op->dst.bo);
1455	}
1456
1457	return true;
1458}
1459
1460static bool
1461prepare_blt_nop(struct sna *sna,
1462		struct sna_composite_op *op)
1463{
1464	DBG(("%s\n", __FUNCTION__));
1465
1466	op->blt   = blt_composite_nop;
1467	op->box   = blt_composite_nop_box;
1468	op->boxes = blt_composite_nop_boxes;
1469	op->done  = nop_done;
1470	return true;
1471}
1472
1473static bool
1474prepare_blt_clear(struct sna *sna,
1475		  struct sna_composite_op *op)
1476{
1477	DBG(("%s\n", __FUNCTION__));
1478
1479	if (op->dst.bo == NULL) {
1480		op->u.blt.pixel = 0;
1481		op->blt   = blt_composite_fill__cpu;
1482		if (op->dst.x|op->dst.y) {
1483			op->box   = blt_composite_fill_box__cpu;
1484			op->boxes = blt_composite_fill_boxes__cpu;
1485			op->thread_boxes = blt_composite_fill_boxes__cpu;
1486		} else {
1487			op->box   = blt_composite_fill_box_no_offset__cpu;
1488			op->boxes = blt_composite_fill_boxes_no_offset__cpu;
1489			op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
1490		}
1491		op->done = sig_done;
1492		return sigtrap_get() == 0;
1493	}
1494
1495	op->blt = blt_composite_fill;
1496	if (op->dst.x|op->dst.y) {
1497		op->box   = blt_composite_fill_box;
1498		op->boxes = blt_composite_fill_boxes;
1499		op->thread_boxes = blt_composite_fill_boxes__thread;
1500	} else {
1501		op->box   = blt_composite_fill_box_no_offset;
1502		op->boxes = blt_composite_fill_boxes_no_offset;
1503		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
1504	}
1505	op->done = nop_done;
1506
1507	if (!sna_blt_fill_init(sna, &op->u.blt,
1508			       op->dst.bo,
1509			       op->dst.pixmap->drawable.bitsPerPixel,
1510			       GXclear, 0))
1511		return false;
1512
1513	return begin_blt(sna, op);
1514}
1515
1516static bool
1517prepare_blt_fill(struct sna *sna,
1518		 struct sna_composite_op *op,
1519		 uint32_t pixel)
1520{
1521	DBG(("%s\n", __FUNCTION__));
1522
1523	if (op->dst.bo == NULL) {
1524		op->u.blt.pixel = pixel;
1525		op->blt = blt_composite_fill__cpu;
1526		if (op->dst.x|op->dst.y) {
1527			op->box   = blt_composite_fill_box__cpu;
1528			op->boxes = blt_composite_fill_boxes__cpu;
1529			op->thread_boxes = blt_composite_fill_boxes__cpu;
1530		} else {
1531			op->box   = blt_composite_fill_box_no_offset__cpu;
1532			op->boxes = blt_composite_fill_boxes_no_offset__cpu;
1533			op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
1534		}
1535		op->done = sig_done;
1536		return sigtrap_get() == 0;
1537	}
1538
1539	op->blt = blt_composite_fill;
1540	if (op->dst.x|op->dst.y) {
1541		op->box   = blt_composite_fill_box;
1542		op->boxes = blt_composite_fill_boxes;
1543		op->thread_boxes = blt_composite_fill_boxes__thread;
1544	} else {
1545		op->box   = blt_composite_fill_box_no_offset;
1546		op->boxes = blt_composite_fill_boxes_no_offset;
1547		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
1548	}
1549	op->done = nop_done;
1550
1551	if (!sna_blt_fill_init(sna, &op->u.blt, op->dst.bo,
1552			       op->dst.pixmap->drawable.bitsPerPixel,
1553			       GXcopy, pixel))
1554		return false;
1555
1556	return begin_blt(sna, op);
1557}
1558
1559fastcall static void
1560blt_composite_copy(struct sna *sna,
1561		   const struct sna_composite_op *op,
1562		   const struct sna_composite_rectangles *r)
1563{
1564	int x1, x2, y1, y2;
1565	int src_x, src_y;
1566
1567	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
1568	     __FUNCTION__,
1569	     r->src.x, r->src.y,
1570	     r->dst.x, r->dst.y,
1571	     r->width, r->height));
1572
1573	/* XXX higher layer should have clipped? */
1574
1575	x1 = r->dst.x + op->dst.x;
1576	y1 = r->dst.y + op->dst.y;
1577	x2 = x1 + r->width;
1578	y2 = y1 + r->height;
1579
1580	src_x = r->src.x - x1 + op->u.blt.sx;
1581	src_y = r->src.y - y1 + op->u.blt.sy;
1582
1583	/* clip against dst */
1584	if (x1 < 0)
1585		x1 = 0;
1586	if (y1 < 0)
1587		y1 = 0;
1588
1589	if (x2 > op->dst.width)
1590		x2 = op->dst.width;
1591
1592	if (y2 > op->dst.height)
1593		y2 = op->dst.height;
1594
1595	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
1596
1597	if (x2 <= x1 || y2 <= y1)
1598		return;
1599
1600	sna_blt_copy_one(sna, &op->u.blt,
1601			 x1 + src_x, y1 + src_y,
1602			 x2 - x1, y2 - y1,
1603			 x1, y1);
1604}
1605
1606fastcall static void blt_composite_copy_box(struct sna *sna,
1607					    const struct sna_composite_op *op,
1608					    const BoxRec *box)
1609{
1610	DBG(("%s: box (%d, %d), (%d, %d)\n",
1611	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1612	sna_blt_copy_one(sna, &op->u.blt,
1613			 box->x1 + op->u.blt.sx,
1614			 box->y1 + op->u.blt.sy,
1615			 box->x2 - box->x1,
1616			 box->y2 - box->y1,
1617			 box->x1 + op->dst.x,
1618			 box->y1 + op->dst.y);
1619}
1620
1621static void blt_composite_copy_boxes(struct sna *sna,
1622				     const struct sna_composite_op *op,
1623				     const BoxRec *box, int nbox)
1624{
1625	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1626	do {
1627		DBG(("%s: box (%d, %d), (%d, %d)\n",
1628		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1629		sna_blt_copy_one(sna, &op->u.blt,
1630				 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1631				 box->x2 - box->x1, box->y2 - box->y1,
1632				 box->x1 + op->dst.x, box->y1 + op->dst.y);
1633		box++;
1634	} while(--nbox);
1635}
1636
1637static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
1638{
1639	x += v & 0xffff;
1640	y += v >> 16;
1641	return (uint16_t)y << 16 | x;
1642}
1643
1644static void blt_composite_copy_boxes__thread(struct sna *sna,
1645					     const struct sna_composite_op *op,
1646					     const BoxRec *box, int nbox)
1647{
1648	struct kgem *kgem = &sna->kgem;
1649	int dst_dx = op->dst.x;
1650	int dst_dy = op->dst.y;
1651	int src_dx = op->src.offset[0];
1652	int src_dy = op->src.offset[1];
1653	uint32_t cmd = op->u.blt.cmd;
1654	uint32_t br13 = op->u.blt.br13;
1655	struct kgem_bo *src_bo = op->u.blt.bo[0];
1656	struct kgem_bo *dst_bo = op->u.blt.bo[1];
1657	int src_pitch = op->u.blt.pitch[0];
1658
1659	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1660
1661	sna_vertex_lock(&sna->render);
1662
1663	if ((dst_dx | dst_dy) == 0) {
1664		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
1665		do {
1666			int nbox_this_time, rem;
1667
1668			nbox_this_time = nbox;
1669			rem = kgem_batch_space(kgem);
1670			if (8*nbox_this_time > rem)
1671				nbox_this_time = rem / 8;
1672			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1673				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1674			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1675			     __FUNCTION__, nbox_this_time, nbox, rem));
1676			assert(nbox_this_time > 0);
1677			nbox -= nbox_this_time;
1678
1679			assert(sna->kgem.mode == KGEM_BLT);
1680			do {
1681				uint32_t *b = kgem->batch + kgem->nbatch;
1682
1683				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1684				     __FUNCTION__,
1685				     box->x1, box->y1,
1686				     box->x2 - box->x1, box->y2 - box->y1));
1687
1688				assert(box->x1 + src_dx >= 0);
1689				assert(box->y1 + src_dy >= 0);
1690				assert(box->x1 + src_dx <= INT16_MAX);
1691				assert(box->y1 + src_dy <= INT16_MAX);
1692
1693				assert(box->x1 >= 0);
1694				assert(box->y1 >= 0);
1695
1696				*(uint64_t *)&b[0] = hdr;
1697				*(uint64_t *)&b[2] = *(const uint64_t *)box;
1698				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1699						      I915_GEM_DOMAIN_RENDER << 16 |
1700						      I915_GEM_DOMAIN_RENDER |
1701						      KGEM_RELOC_FENCED,
1702						      0);
1703				b[5] = add2(b[2], src_dx, src_dy);
1704				b[6] = src_pitch;
1705				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1706						      I915_GEM_DOMAIN_RENDER << 16 |
1707						      KGEM_RELOC_FENCED,
1708						      0);
1709				kgem->nbatch += 8;
1710				assert(kgem->nbatch < kgem->surface);
1711				box++;
1712			} while (--nbox_this_time);
1713
1714			if (!nbox)
1715				break;
1716
1717			_kgem_submit(kgem);
1718			_kgem_set_mode(kgem, KGEM_BLT);
1719			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1720		} while (1);
1721	} else {
1722		do {
1723			int nbox_this_time, rem;
1724
1725			nbox_this_time = nbox;
1726			rem = kgem_batch_space(kgem);
1727			if (8*nbox_this_time > rem)
1728				nbox_this_time = rem / 8;
1729			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1730				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1731			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1732			     __FUNCTION__, nbox_this_time, nbox, rem));
1733			assert(nbox_this_time > 0);
1734			nbox -= nbox_this_time;
1735
1736			assert(sna->kgem.mode == KGEM_BLT);
1737			do {
1738				uint32_t *b = kgem->batch + kgem->nbatch;
1739
1740				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1741				     __FUNCTION__,
1742				     box->x1, box->y1,
1743				     box->x2 - box->x1, box->y2 - box->y1));
1744
1745				assert(box->x1 + src_dx >= 0);
1746				assert(box->y1 + src_dy >= 0);
1747
1748				assert(box->x1 + dst_dx >= 0);
1749				assert(box->y1 + dst_dy >= 0);
1750
1751				b[0] = cmd;
1752				b[1] = br13;
1753				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
1754				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
1755				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1756						      I915_GEM_DOMAIN_RENDER << 16 |
1757						      I915_GEM_DOMAIN_RENDER |
1758						      KGEM_RELOC_FENCED,
1759						      0);
1760				b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
1761				b[6] = src_pitch;
1762				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1763						      I915_GEM_DOMAIN_RENDER << 16 |
1764						      KGEM_RELOC_FENCED,
1765						      0);
1766				kgem->nbatch += 8;
1767				assert(kgem->nbatch < kgem->surface);
1768				box++;
1769			} while (--nbox_this_time);
1770
1771			if (!nbox)
1772				break;
1773
1774			_kgem_submit(kgem);
1775			_kgem_set_mode(kgem, KGEM_BLT);
1776			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1777		} while (1);
1778	}
1779	sna_vertex_unlock(&sna->render);
1780}
1781
1782static void blt_composite_copy_boxes__thread64(struct sna *sna,
1783					       const struct sna_composite_op *op,
1784					       const BoxRec *box, int nbox)
1785{
1786	struct kgem *kgem = &sna->kgem;
1787	int dst_dx = op->dst.x;
1788	int dst_dy = op->dst.y;
1789	int src_dx = op->src.offset[0];
1790	int src_dy = op->src.offset[1];
1791	uint32_t cmd = op->u.blt.cmd;
1792	uint32_t br13 = op->u.blt.br13;
1793	struct kgem_bo *src_bo = op->u.blt.bo[0];
1794	struct kgem_bo *dst_bo = op->u.blt.bo[1];
1795	int src_pitch = op->u.blt.pitch[0];
1796
1797	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1798
1799	sna_vertex_lock(&sna->render);
1800
1801	if ((dst_dx | dst_dy) == 0) {
1802		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
1803		do {
1804			int nbox_this_time, rem;
1805
1806			nbox_this_time = nbox;
1807			rem = kgem_batch_space(kgem);
1808			if (10*nbox_this_time > rem)
1809				nbox_this_time = rem / 10;
1810			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1811				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1812			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1813			     __FUNCTION__, nbox_this_time, nbox, rem));
1814			assert(nbox_this_time > 0);
1815			nbox -= nbox_this_time;
1816
1817			assert(kgem->mode == KGEM_BLT);
1818			do {
1819				uint32_t *b = kgem->batch + kgem->nbatch;
1820
1821				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1822				     __FUNCTION__,
1823				     box->x1, box->y1,
1824				     box->x2 - box->x1, box->y2 - box->y1));
1825
1826				assert(box->x1 + src_dx >= 0);
1827				assert(box->y1 + src_dy >= 0);
1828				assert(box->x1 + src_dx <= INT16_MAX);
1829				assert(box->y1 + src_dy <= INT16_MAX);
1830
1831				assert(box->x1 >= 0);
1832				assert(box->y1 >= 0);
1833
1834				*(uint64_t *)&b[0] = hdr;
1835				*(uint64_t *)&b[2] = *(const uint64_t *)box;
1836				*(uint64_t *)(b+4) =
1837					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1838							 I915_GEM_DOMAIN_RENDER << 16 |
1839							 I915_GEM_DOMAIN_RENDER |
1840							 KGEM_RELOC_FENCED,
1841							 0);
1842				b[6] = add2(b[2], src_dx, src_dy);
1843				b[7] = src_pitch;
1844				*(uint64_t *)(b+8) =
1845					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1846							 I915_GEM_DOMAIN_RENDER << 16 |
1847							 KGEM_RELOC_FENCED,
1848							 0);
1849				kgem->nbatch += 10;
1850				assert(kgem->nbatch < kgem->surface);
1851				box++;
1852			} while (--nbox_this_time);
1853
1854			if (!nbox)
1855				break;
1856
1857			_kgem_submit(kgem);
1858			_kgem_set_mode(kgem, KGEM_BLT);
1859			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1860		} while (1);
1861	} else {
1862		do {
1863			int nbox_this_time, rem;
1864
1865			nbox_this_time = nbox;
1866			rem = kgem_batch_space(kgem);
1867			if (10*nbox_this_time > rem)
1868				nbox_this_time = rem / 10;
1869			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1870				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1871			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1872			     __FUNCTION__, nbox_this_time, nbox, rem));
1873			assert(nbox_this_time > 0);
1874			nbox -= nbox_this_time;
1875
1876			assert(kgem->mode == KGEM_BLT);
1877			do {
1878				uint32_t *b = kgem->batch + kgem->nbatch;
1879
1880				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1881				     __FUNCTION__,
1882				     box->x1, box->y1,
1883				     box->x2 - box->x1, box->y2 - box->y1));
1884
1885				assert(box->x1 + src_dx >= 0);
1886				assert(box->y1 + src_dy >= 0);
1887
1888				assert(box->x1 + dst_dx >= 0);
1889				assert(box->y1 + dst_dy >= 0);
1890
1891				b[0] = cmd;
1892				b[1] = br13;
1893				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
1894				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
1895				*(uint64_t *)(b+4) =
1896					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1897							 I915_GEM_DOMAIN_RENDER << 16 |
1898							 I915_GEM_DOMAIN_RENDER |
1899							 KGEM_RELOC_FENCED,
1900							 0);
1901				b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
1902				b[7] = src_pitch;
1903				*(uint64_t *)(b+8) =
1904					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1905							 I915_GEM_DOMAIN_RENDER << 16 |
1906							 KGEM_RELOC_FENCED,
1907							 0);
1908				kgem->nbatch += 10;
1909				assert(kgem->nbatch < kgem->surface);
1910				box++;
1911			} while (--nbox_this_time);
1912
1913			if (!nbox)
1914				break;
1915
1916			_kgem_submit(kgem);
1917			_kgem_set_mode(kgem, KGEM_BLT);
1918			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1919		} while (1);
1920	}
1921	sna_vertex_unlock(&sna->render);
1922}
1923
1924fastcall static void
1925blt_composite_copy_with_alpha(struct sna *sna,
1926			      const struct sna_composite_op *op,
1927			      const struct sna_composite_rectangles *r)
1928{
1929	int x1, x2, y1, y2;
1930	int src_x, src_y;
1931
1932	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
1933	     __FUNCTION__,
1934	     r->src.x, r->src.y,
1935	     r->dst.x, r->dst.y,
1936	     r->width, r->height));
1937
1938	/* XXX higher layer should have clipped? */
1939
1940	x1 = r->dst.x + op->dst.x;
1941	y1 = r->dst.y + op->dst.y;
1942	x2 = x1 + r->width;
1943	y2 = y1 + r->height;
1944
1945	src_x = r->src.x - x1 + op->u.blt.sx;
1946	src_y = r->src.y - y1 + op->u.blt.sy;
1947
1948	/* clip against dst */
1949	if (x1 < 0)
1950		x1 = 0;
1951	if (y1 < 0)
1952		y1 = 0;
1953
1954	if (x2 > op->dst.width)
1955		x2 = op->dst.width;
1956
1957	if (y2 > op->dst.height)
1958		y2 = op->dst.height;
1959
1960	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
1961
1962	if (x2 <= x1 || y2 <= y1)
1963		return;
1964
1965	sna_blt_alpha_fixup_one(sna, &op->u.blt,
1966				x1 + src_x, y1 + src_y,
1967				x2 - x1, y2 - y1,
1968				x1, y1);
1969}
1970
1971fastcall static void
1972blt_composite_copy_box_with_alpha(struct sna *sna,
1973				  const struct sna_composite_op *op,
1974				  const BoxRec *box)
1975{
1976	DBG(("%s: box (%d, %d), (%d, %d)\n",
1977	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1978	sna_blt_alpha_fixup_one(sna, &op->u.blt,
1979				box->x1 + op->u.blt.sx,
1980				box->y1 + op->u.blt.sy,
1981				box->x2 - box->x1,
1982				box->y2 - box->y1,
1983				box->x1 + op->dst.x,
1984				box->y1 + op->dst.y);
1985}
1986
1987static void
1988blt_composite_copy_boxes_with_alpha(struct sna *sna,
1989				    const struct sna_composite_op *op,
1990				    const BoxRec *box, int nbox)
1991{
1992	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1993	do {
1994		DBG(("%s: box (%d, %d), (%d, %d)\n",
1995		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1996		sna_blt_alpha_fixup_one(sna, &op->u.blt,
1997					box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1998					box->x2 - box->x1, box->y2 - box->y1,
1999					box->x1 + op->dst.x, box->y1 + op->dst.y);
2000		box++;
2001	} while(--nbox);
2002}
2003
2004static bool
2005prepare_blt_copy(struct sna *sna,
2006		 struct sna_composite_op *op,
2007		 struct kgem_bo *bo,
2008		 uint32_t alpha_fixup)
2009{
2010	PixmapPtr src = op->u.blt.src_pixmap;
2011
2012	assert(op->dst.bo);
2013	assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo));
2014	assert(kgem_bo_can_blt(&sna->kgem, bo));
2015
2016	kgem_set_mode(&sna->kgem, KGEM_BLT, op->dst.bo);
2017	if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) {
2018		kgem_submit(&sna->kgem);
2019		if (!kgem_check_many_bo_fenced(&sna->kgem,
2020					       op->dst.bo, bo, NULL)) {
2021			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
2022			return sna_tiling_blt_composite(sna, op, bo,
2023							src->drawable.bitsPerPixel,
2024							alpha_fixup);
2025		}
2026		_kgem_set_mode(&sna->kgem, KGEM_BLT);
2027	}
2028	kgem_bcs_set_tiling(&sna->kgem, bo, op->dst.bo);
2029
2030	DBG(("%s\n", __FUNCTION__));
2031
2032	if (sna->kgem.gen >= 060 && op->dst.bo == bo)
2033		op->done = gen6_blt_copy_done;
2034	else
2035		op->done = nop_done;
2036
2037	if (alpha_fixup) {
2038		op->blt   = blt_composite_copy_with_alpha;
2039		op->box   = blt_composite_copy_box_with_alpha;
2040		op->boxes = blt_composite_copy_boxes_with_alpha;
2041
2042		if (!sna_blt_alpha_fixup_init(sna, &op->u.blt, bo, op->dst.bo,
2043					      src->drawable.bitsPerPixel,
2044					      alpha_fixup))
2045			return false;
2046	} else {
2047		op->blt   = blt_composite_copy;
2048		op->box   = blt_composite_copy_box;
2049		op->boxes = blt_composite_copy_boxes;
2050		if (sna->kgem.gen >= 0100)
2051			op->thread_boxes = blt_composite_copy_boxes__thread64;
2052		else
2053			op->thread_boxes = blt_composite_copy_boxes__thread;
2054
2055		if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
2056				       src->drawable.bitsPerPixel,
2057				       GXcopy))
2058			return false;
2059	}
2060
2061	return true;
2062}
2063
2064fastcall static void
2065blt_put_composite__cpu(struct sna *sna,
2066		       const struct sna_composite_op *op,
2067		       const struct sna_composite_rectangles *r)
2068{
2069	PixmapPtr dst = op->dst.pixmap;
2070	PixmapPtr src = op->u.blt.src_pixmap;
2071	assert(src->devPrivate.ptr);
2072	assert(src->devKind);
2073	assert(dst->devPrivate.ptr);
2074	assert(dst->devKind);
2075	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2076		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2077		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
2078		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
2079		   r->width, r->height);
2080}
2081
2082fastcall static void
2083blt_put_composite_box__cpu(struct sna *sna,
2084			   const struct sna_composite_op *op,
2085			   const BoxRec *box)
2086{
2087	PixmapPtr dst = op->dst.pixmap;
2088	PixmapPtr src = op->u.blt.src_pixmap;
2089	assert(src->devPrivate.ptr);
2090	assert(src->devKind);
2091	assert(dst->devPrivate.ptr);
2092	assert(dst->devKind);
2093	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2094		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2095		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2096		   box->x1 + op->dst.x, box->y1 + op->dst.y,
2097		   box->x2-box->x1, box->y2-box->y1);
2098}
2099
2100static void
2101blt_put_composite_boxes__cpu(struct sna *sna,
2102			     const struct sna_composite_op *op,
2103			     const BoxRec *box, int n)
2104{
2105	PixmapPtr dst = op->dst.pixmap;
2106	PixmapPtr src = op->u.blt.src_pixmap;
2107	assert(src->devPrivate.ptr);
2108	assert(src->devKind);
2109	assert(dst->devPrivate.ptr);
2110	assert(dst->devKind);
2111	do {
2112		memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2113			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2114			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2115			   box->x1 + op->dst.x, box->y1 + op->dst.y,
2116			   box->x2-box->x1, box->y2-box->y1);
2117		box++;
2118	} while (--n);
2119}
2120
2121fastcall static void
2122blt_put_composite_with_alpha__cpu(struct sna *sna,
2123				  const struct sna_composite_op *op,
2124				  const struct sna_composite_rectangles *r)
2125{
2126	PixmapPtr dst = op->dst.pixmap;
2127	PixmapPtr src = op->u.blt.src_pixmap;
2128	assert(src->devPrivate.ptr);
2129	assert(src->devKind);
2130	assert(dst->devPrivate.ptr);
2131	assert(dst->devKind);
2132	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2133		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2134		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
2135		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
2136		   r->width, r->height,
2137		   0xffffffff, op->u.blt.pixel);
2138
2139}
2140
2141fastcall static void
2142blt_put_composite_box_with_alpha__cpu(struct sna *sna,
2143				      const struct sna_composite_op *op,
2144				      const BoxRec *box)
2145{
2146	PixmapPtr dst = op->dst.pixmap;
2147	PixmapPtr src = op->u.blt.src_pixmap;
2148	assert(src->devPrivate.ptr);
2149	assert(src->devKind);
2150	assert(dst->devPrivate.ptr);
2151	assert(dst->devKind);
2152	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2153		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2154		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2155		   box->x1 + op->dst.x, box->y1 + op->dst.y,
2156		   box->x2-box->x1, box->y2-box->y1,
2157		   0xffffffff, op->u.blt.pixel);
2158}
2159
2160static void
2161blt_put_composite_boxes_with_alpha__cpu(struct sna *sna,
2162					const struct sna_composite_op *op,
2163					const BoxRec *box, int n)
2164{
2165	PixmapPtr dst = op->dst.pixmap;
2166	PixmapPtr src = op->u.blt.src_pixmap;
2167	assert(src->devPrivate.ptr);
2168	assert(src->devKind);
2169	assert(dst->devPrivate.ptr);
2170	assert(dst->devKind);
2171	do {
2172		memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2173			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2174			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2175			   box->x1 + op->dst.x, box->y1 + op->dst.y,
2176			   box->x2-box->x1, box->y2-box->y1,
2177			   0xffffffff, op->u.blt.pixel);
2178		box++;
2179	} while (--n);
2180}
2181
2182fastcall static void
2183blt_put_composite(struct sna *sna,
2184		  const struct sna_composite_op *op,
2185		  const struct sna_composite_rectangles *r)
2186{
2187	PixmapPtr dst = op->dst.pixmap;
2188	PixmapPtr src = op->u.blt.src_pixmap;
2189	struct sna_pixmap *dst_priv = sna_pixmap(dst);
2190	int pitch = src->devKind;
2191	char *data = src->devPrivate.ptr;
2192	int bpp = src->drawable.bitsPerPixel;
2193
2194	int16_t dst_x = r->dst.x + op->dst.x;
2195	int16_t dst_y = r->dst.y + op->dst.y;
2196	int16_t src_x = r->src.x + op->u.blt.sx;
2197	int16_t src_y = r->src.y + op->u.blt.sy;
2198
2199	if (!dst_priv->pinned &&
2200	    dst_x <= 0 && dst_y <= 0 &&
2201	    dst_x + r->width >= op->dst.width &&
2202	    dst_y + r->height >= op->dst.height) {
2203		data += (src_x - dst_x) * bpp / 8;
2204		data += (src_y - dst_y) * pitch;
2205
2206		assert(op->dst.bo == dst_priv->gpu_bo);
2207		sna_replace(sna, op->dst.pixmap, data, pitch);
2208	} else {
2209		BoxRec box;
2210		bool ok;
2211
2212		box.x1 = dst_x;
2213		box.y1 = dst_y;
2214		box.x2 = dst_x + r->width;
2215		box.y2 = dst_y + r->height;
2216
2217		ok = sna_write_boxes(sna, dst,
2218				     dst_priv->gpu_bo, 0, 0,
2219				     data, pitch, src_x, src_y,
2220				     &box, 1);
2221		assert(ok);
2222		(void)ok;
2223	}
2224}
2225
2226fastcall static void blt_put_composite_box(struct sna *sna,
2227					   const struct sna_composite_op *op,
2228					   const BoxRec *box)
2229{
2230	PixmapPtr src = op->u.blt.src_pixmap;
2231	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2232
2233	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
2234	     op->u.blt.sx, op->u.blt.sy,
2235	     op->dst.x, op->dst.y));
2236
2237	assert(src->devPrivate.ptr);
2238	assert(src->devKind);
2239	if (!dst_priv->pinned &&
2240	    box->x2 - box->x1 == op->dst.width &&
2241	    box->y2 - box->y1 == op->dst.height) {
2242		int pitch = src->devKind;
2243		int bpp = src->drawable.bitsPerPixel / 8;
2244		char *data = src->devPrivate.ptr;
2245
2246		data += (box->y1 + op->u.blt.sy) * pitch;
2247		data += (box->x1 + op->u.blt.sx) * bpp;
2248
2249		assert(op->dst.bo == dst_priv->gpu_bo);
2250		sna_replace(sna, op->dst.pixmap, data, pitch);
2251	} else {
2252		bool ok;
2253
2254		ok = sna_write_boxes(sna, op->dst.pixmap,
2255				     op->dst.bo, op->dst.x, op->dst.y,
2256				     src->devPrivate.ptr,
2257				     src->devKind,
2258				     op->u.blt.sx, op->u.blt.sy,
2259				     box, 1);
2260		assert(ok);
2261		(void)ok;
2262	}
2263}
2264
2265static void blt_put_composite_boxes(struct sna *sna,
2266				    const struct sna_composite_op *op,
2267				    const BoxRec *box, int n)
2268{
2269	PixmapPtr src = op->u.blt.src_pixmap;
2270	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2271
2272	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
2273	     op->u.blt.sx, op->u.blt.sy,
2274	     op->dst.x, op->dst.y,
2275	     box->x1, box->y1, box->x2, box->y2, n));
2276
2277	assert(src->devPrivate.ptr);
2278	assert(src->devKind);
2279	if (n == 1 && !dst_priv->pinned &&
2280	    box->x2 - box->x1 == op->dst.width &&
2281	    box->y2 - box->y1 == op->dst.height) {
2282		int pitch = src->devKind;
2283		int bpp = src->drawable.bitsPerPixel / 8;
2284		char *data = src->devPrivate.ptr;
2285
2286		data += (box->y1 + op->u.blt.sy) * pitch;
2287		data += (box->x1 + op->u.blt.sx) * bpp;
2288
2289		assert(op->dst.bo == dst_priv->gpu_bo);
2290		sna_replace(sna, op->dst.pixmap, data, pitch);
2291	} else {
2292		bool ok;
2293
2294		ok = sna_write_boxes(sna, op->dst.pixmap,
2295				     op->dst.bo, op->dst.x, op->dst.y,
2296				     src->devPrivate.ptr,
2297				     src->devKind,
2298				     op->u.blt.sx, op->u.blt.sy,
2299				     box, n);
2300		assert(ok);
2301		(void)ok;
2302	}
2303}
2304
2305fastcall static void
2306blt_put_composite_with_alpha(struct sna *sna,
2307			     const struct sna_composite_op *op,
2308			     const struct sna_composite_rectangles *r)
2309{
2310	PixmapPtr dst = op->dst.pixmap;
2311	PixmapPtr src = op->u.blt.src_pixmap;
2312	struct sna_pixmap *dst_priv = sna_pixmap(dst);
2313	int pitch = src->devKind;
2314	char *data = src->devPrivate.ptr;
2315
2316	int16_t dst_x = r->dst.x + op->dst.x;
2317	int16_t dst_y = r->dst.y + op->dst.y;
2318	int16_t src_x = r->src.x + op->u.blt.sx;
2319	int16_t src_y = r->src.y + op->u.blt.sy;
2320
2321	assert(src->devPrivate.ptr);
2322	assert(src->devKind);
2323
2324	if (!dst_priv->pinned &&
2325	    dst_x <= 0 && dst_y <= 0 &&
2326	    dst_x + r->width >= op->dst.width &&
2327	    dst_y + r->height >= op->dst.height) {
2328		int bpp = dst->drawable.bitsPerPixel / 8;
2329
2330		data += (src_x - dst_x) * bpp;
2331		data += (src_y - dst_y) * pitch;
2332
2333		assert(op->dst.bo == dst_priv->gpu_bo);
2334		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2335				 0xffffffff, op->u.blt.pixel);
2336	} else {
2337		BoxRec box;
2338
2339		box.x1 = dst_x;
2340		box.y1 = dst_y;
2341		box.x2 = dst_x + r->width;
2342		box.y2 = dst_y + r->height;
2343
2344		sna_write_boxes__xor(sna, dst,
2345				     dst_priv->gpu_bo, 0, 0,
2346				     data, pitch, src_x, src_y,
2347				     &box, 1,
2348				     0xffffffff, op->u.blt.pixel);
2349	}
2350}
2351
2352fastcall static void
2353blt_put_composite_box_with_alpha(struct sna *sna,
2354				 const struct sna_composite_op *op,
2355				 const BoxRec *box)
2356{
2357	PixmapPtr src = op->u.blt.src_pixmap;
2358	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2359
2360	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
2361	     op->u.blt.sx, op->u.blt.sy,
2362	     op->dst.x, op->dst.y));
2363
2364	assert(src->devPrivate.ptr);
2365	assert(src->devKind);
2366
2367	if (!dst_priv->pinned &&
2368	    box->x2 - box->x1 == op->dst.width &&
2369	    box->y2 - box->y1 == op->dst.height) {
2370		int pitch = src->devKind;
2371		int bpp = src->drawable.bitsPerPixel / 8;
2372		char *data = src->devPrivate.ptr;
2373
2374		data += (box->y1 + op->u.blt.sy) * pitch;
2375		data += (box->x1 + op->u.blt.sx) * bpp;
2376
2377		assert(op->dst.bo == dst_priv->gpu_bo);
2378		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2379				 0xffffffff, op->u.blt.pixel);
2380	} else {
2381		sna_write_boxes__xor(sna, op->dst.pixmap,
2382				     op->dst.bo, op->dst.x, op->dst.y,
2383				     src->devPrivate.ptr,
2384				     src->devKind,
2385				     op->u.blt.sx, op->u.blt.sy,
2386				     box, 1,
2387				     0xffffffff, op->u.blt.pixel);
2388	}
2389}
2390
2391static void
2392blt_put_composite_boxes_with_alpha(struct sna *sna,
2393				   const struct sna_composite_op *op,
2394				   const BoxRec *box, int n)
2395{
2396	PixmapPtr src = op->u.blt.src_pixmap;
2397	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2398
2399	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
2400	     op->u.blt.sx, op->u.blt.sy,
2401	     op->dst.x, op->dst.y,
2402	     box->x1, box->y1, box->x2, box->y2, n));
2403
2404	assert(src->devPrivate.ptr);
2405	assert(src->devKind);
2406
2407	if (n == 1 && !dst_priv->pinned &&
2408	    box->x2 - box->x1 == op->dst.width &&
2409	    box->y2 - box->y1 == op->dst.height) {
2410		int pitch = src->devKind;
2411		int bpp = src->drawable.bitsPerPixel / 8;
2412		char *data = src->devPrivate.ptr;
2413
2414		data += (box->y1 + op->u.blt.sy) * pitch;
2415		data += (box->x1 + op->u.blt.sx) * bpp;
2416
2417		assert(dst_priv->gpu_bo == op->dst.bo);
2418		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2419				 0xffffffff, op->u.blt.pixel);
2420	} else {
2421		sna_write_boxes__xor(sna, op->dst.pixmap,
2422				     op->dst.bo, op->dst.x, op->dst.y,
2423				     src->devPrivate.ptr,
2424				     src->devKind,
2425				     op->u.blt.sx, op->u.blt.sy,
2426				     box, n,
2427				     0xffffffff, op->u.blt.pixel);
2428	}
2429}
2430
2431static bool
2432prepare_blt_put(struct sna *sna,
2433		struct sna_composite_op *op,
2434		uint32_t alpha_fixup)
2435{
2436	DBG(("%s\n", __FUNCTION__));
2437
2438	assert(!sna_pixmap(op->dst.pixmap)->clear);
2439
2440	if (op->dst.bo) {
2441		assert(op->dst.bo == sna_pixmap(op->dst.pixmap)->gpu_bo);
2442		if (alpha_fixup) {
2443			op->u.blt.pixel = alpha_fixup;
2444			op->blt   = blt_put_composite_with_alpha;
2445			op->box   = blt_put_composite_box_with_alpha;
2446			op->boxes = blt_put_composite_boxes_with_alpha;
2447		} else {
2448			op->blt   = blt_put_composite;
2449			op->box   = blt_put_composite_box;
2450			op->boxes = blt_put_composite_boxes;
2451		}
2452
2453		op->done = nop_done;
2454		return true;
2455	} else {
2456		if (alpha_fixup) {
2457			op->u.blt.pixel = alpha_fixup;
2458			op->blt   = blt_put_composite_with_alpha__cpu;
2459			op->box   = blt_put_composite_box_with_alpha__cpu;
2460			op->boxes = blt_put_composite_boxes_with_alpha__cpu;
2461		} else {
2462			op->blt   = blt_put_composite__cpu;
2463			op->box   = blt_put_composite_box__cpu;
2464			op->boxes = blt_put_composite_boxes__cpu;
2465		}
2466
2467		op->done = sig_done;
2468		return sigtrap_get() == 0;
2469	}
2470}
2471
2472static bool
2473is_clear(PixmapPtr pixmap)
2474{
2475	struct sna_pixmap *priv = sna_pixmap(pixmap);
2476	return priv && priv->clear;
2477}
2478
2479static inline uint32_t
2480over(uint32_t src, uint32_t dst)
2481{
2482	uint32_t a = ~src >> 24;
2483
2484#define G_SHIFT 8
2485#define RB_MASK 0xff00ff
2486#define RB_ONE_HALF 0x800080
2487#define RB_MASK_PLUS_ONE 0x10000100
2488
2489#define UN8_rb_MUL_UN8(x, a, t) do {				\
2490	t  = ((x) & RB_MASK) * (a);				\
2491	t += RB_ONE_HALF;					\
2492	x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;	\
2493	x &= RB_MASK;						\
2494} while (0)
2495
2496#define UN8_rb_ADD_UN8_rb(x, y, t) do {				\
2497	t = ((x) + (y));					\
2498	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);	\
2499	x = (t & RB_MASK);					\
2500} while (0)
2501
2502#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) do {			\
2503	uint32_t r1__, r2__, r3__, t__;				\
2504	\
2505	r1__ = (x);						\
2506	r2__ = (y) & RB_MASK;					\
2507	UN8_rb_MUL_UN8(r1__, (a), t__);				\
2508	UN8_rb_ADD_UN8_rb(r1__, r2__, t__);			\
2509	\
2510	r2__ = (x) >> G_SHIFT;					\
2511	r3__ = ((y) >> G_SHIFT) & RB_MASK;			\
2512	UN8_rb_MUL_UN8(r2__, (a), t__);				\
2513	UN8_rb_ADD_UN8_rb(r2__, r3__, t__);			\
2514	\
2515	(x) = r1__ | (r2__ << G_SHIFT);				\
2516} while (0)
2517
2518	UN8x4_MUL_UN8_ADD_UN8x4(dst, a, src);
2519
2520	return dst;
2521}
2522
2523static inline uint32_t
2524add(uint32_t src, uint32_t dst)
2525{
2526#define UN8x4_ADD_UN8x4(x, y) do {				\
2527	uint32_t r1__, r2__, r3__, t__;				\
2528	\
2529	r1__ = (x) & RB_MASK;					\
2530	r2__ = (y) & RB_MASK;					\
2531	UN8_rb_ADD_UN8_rb(r1__, r2__, t__);			\
2532	\
2533	r2__ = ((x) >> G_SHIFT) & RB_MASK;			\
2534	r3__ = ((y) >> G_SHIFT) & RB_MASK;			\
2535	UN8_rb_ADD_UN8_rb(r2__, r3__, t__);			\
2536	\
2537	x = r1__ | (r2__ << G_SHIFT);				\
2538} while (0)
2539
2540	UN8x4_ADD_UN8x4(src, dst);
2541	return src;
2542}
2543
2544bool
2545sna_blt_composite(struct sna *sna,
2546		  uint32_t op,
2547		  PicturePtr src,
2548		  PicturePtr dst,
2549		  int16_t x, int16_t y,
2550		  int16_t dst_x, int16_t dst_y,
2551		  int16_t width, int16_t height,
2552		  unsigned flags,
2553		  struct sna_composite_op *tmp)
2554{
2555	PictFormat src_format = src->format;
2556	PixmapPtr src_pixmap;
2557	struct kgem_bo *bo;
2558	int16_t tx, ty;
2559	BoxRec dst_box, src_box;
2560	uint32_t alpha_fixup;
2561	uint32_t color, hint;
2562	bool was_clear;
2563	bool ret;
2564
2565#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
2566	return false;
2567#endif
2568	DBG(("%s (%d, %d), (%d, %d), %dx%d\n",
2569	     __FUNCTION__, x, y, dst_x, dst_y, width, height));
2570
2571	switch (dst->pDrawable->bitsPerPixel) {
2572	case 8:
2573	case 16:
2574	case 32:
2575		break;
2576	default:
2577		DBG(("%s: unhandled bpp: %d\n", __FUNCTION__,
2578		     dst->pDrawable->bitsPerPixel));
2579		return false;
2580	}
2581
2582	tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2583	was_clear = is_clear(tmp->dst.pixmap);
2584
2585	if (width | height) {
2586		dst_box.x1 = dst_x;
2587		dst_box.x2 = bound(dst_x, width);
2588		dst_box.y1 = dst_y;
2589		dst_box.y2 = bound(dst_y, height);
2590	} else
2591		sna_render_picture_extents(dst, &dst_box);
2592
2593	tmp->dst.format = dst->format;
2594	tmp->dst.width = tmp->dst.pixmap->drawable.width;
2595	tmp->dst.height = tmp->dst.pixmap->drawable.height;
2596	get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap,
2597			    &tmp->dst.x, &tmp->dst.y);
2598
2599	if (op == PictOpClear) {
2600clear:
2601		if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) {
2602			sna_pixmap(tmp->dst.pixmap)->clear = true;
2603nop:
2604			return prepare_blt_nop(sna, tmp);
2605		}
2606
2607		hint = 0;
2608		if (can_render(sna)) {
2609			hint |= PREFER_GPU;
2610			if ((flags & COMPOSITE_PARTIAL) == 0) {
2611				hint |= IGNORE_DAMAGE;
2612				if (width  == tmp->dst.pixmap->drawable.width &&
2613				    height == tmp->dst.pixmap->drawable.height)
2614					hint |= REPLACES;
2615			}
2616		}
2617		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2618						  &dst_box, &tmp->damage);
2619		assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
2620		if (tmp->dst.bo) {
2621			if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2622				DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
2623				     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
2624				return false;
2625			}
2626			if (hint & REPLACES)
2627				kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2628			if (flags & COMPOSITE_UPLOAD)
2629				return false;
2630		} else {
2631			RegionRec region;
2632
2633			region.extents = dst_box;
2634			region.data = NULL;
2635
2636			hint = MOVE_WRITE | MOVE_INPLACE_HINT;
2637			if (flags & COMPOSITE_PARTIAL)
2638				hint |= MOVE_READ;
2639			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, hint))
2640				return false;
2641		}
2642
2643		return prepare_blt_clear(sna, tmp);
2644	}
2645
2646	if (is_solid(src)) {
2647		if ((op == PictOpOver || op == PictOpAdd) && is_transparent(src)) {
2648			sna_pixmap(tmp->dst.pixmap)->clear = was_clear;
2649			return prepare_blt_nop(sna, tmp);
2650		}
2651		if (op == PictOpOver && is_opaque_solid(src))
2652			op = PictOpSrc;
2653		if (op == PictOpAdd &&
2654		    PICT_FORMAT_RGB(src->format) == PICT_FORMAT_RGB(dst->format) &&
2655		    is_white(src))
2656			op = PictOpSrc;
2657		if (was_clear && (op == PictOpAdd || op == PictOpOver)) {
2658			if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0)
2659				op = PictOpSrc;
2660			if (op == PictOpOver) {
2661				unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color);
2662				color = over(get_solid_color(src, PICT_a8r8g8b8),
2663					     dst_color);
2664				op = PictOpSrc;
2665				DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n",
2666				     __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
2667				     solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color),
2668				     color));
2669				if (color == dst_color)
2670					goto nop;
2671				else
2672					goto fill;
2673			}
2674			if (op == PictOpAdd) {
2675				unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color);
2676				color = add(get_solid_color(src, PICT_a8r8g8b8),
2677					    dst_color);
2678				op = PictOpSrc;
2679				DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n",
2680				     __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
2681				     solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color),
2682				     color));
2683				if (color == dst_color)
2684					goto nop;
2685				else
2686					goto fill;
2687			}
2688		}
2689		if (op == PictOpOutReverse && is_opaque_solid(src))
2690			goto clear;
2691
2692		if (op != PictOpSrc) {
2693			DBG(("%s: unsupported op [%d] for blitting\n",
2694			     __FUNCTION__, op));
2695			return false;
2696		}
2697
2698		color = get_solid_color(src, tmp->dst.format);
2699fill:
2700		if (color == 0)
2701			goto clear;
2702
2703		if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == color) {
2704			sna_pixmap(tmp->dst.pixmap)->clear = true;
2705			return prepare_blt_nop(sna, tmp);
2706		}
2707
2708		hint = 0;
2709		if (can_render(sna)) {
2710			hint |= PREFER_GPU;
2711			if ((flags & COMPOSITE_PARTIAL) == 0) {
2712				hint |= IGNORE_DAMAGE;
2713				if (width  == tmp->dst.pixmap->drawable.width &&
2714				    height == tmp->dst.pixmap->drawable.height)
2715					hint |= REPLACES;
2716			}
2717		}
2718		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2719						  &dst_box, &tmp->damage);
2720		assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
2721		if (tmp->dst.bo) {
2722			if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2723				DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
2724				     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
2725				return false;
2726			}
2727			if (hint & REPLACES)
2728				kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2729			if (flags & COMPOSITE_UPLOAD)
2730				return false;
2731		} else {
2732			RegionRec region;
2733
2734			region.extents = dst_box;
2735			region.data = NULL;
2736
2737			hint = MOVE_WRITE | MOVE_INPLACE_HINT;
2738			if (flags & COMPOSITE_PARTIAL)
2739				hint |= MOVE_READ;
2740			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, hint))
2741				return false;
2742		}
2743
2744		return prepare_blt_fill(sna, tmp, color);
2745	}
2746
2747	if (!src->pDrawable) {
2748		DBG(("%s: unsupported procedural source\n",
2749		     __FUNCTION__));
2750		return false;
2751	}
2752
2753	if (src->filter == PictFilterConvolution) {
2754		DBG(("%s: convolutions filters not handled\n",
2755		     __FUNCTION__));
2756		return false;
2757	}
2758
2759	if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0)
2760		op = PictOpSrc;
2761
2762	if (op != PictOpSrc) {
2763		DBG(("%s: unsupported op [%d] for blitting\n",
2764		     __FUNCTION__, op));
2765		return false;
2766	}
2767
2768	if (!sna_transform_is_imprecise_integer_translation(src->transform, src->filter,
2769							    dst->polyMode == PolyModePrecise,
2770							    &tx, &ty)) {
2771		DBG(("%s: source transform is not an integer translation\n",
2772		     __FUNCTION__));
2773		return false;
2774	}
2775	DBG(("%s: converting transform to integer translation? (%d, %d)\n",
2776	     __FUNCTION__, src->transform != NULL, tx, ty));
2777	x += tx;
2778	y += ty;
2779
2780	if ((x >= src->pDrawable->width ||
2781	     y >= src->pDrawable->height ||
2782	     x + width  <= 0 ||
2783	     y + height <= 0) &&
2784	    (!src->repeat || src->repeatType == RepeatNone)) {
2785		DBG(("%s: source is outside of valid area, converting to clear\n",
2786		     __FUNCTION__));
2787		goto clear;
2788	}
2789
2790	src_pixmap = get_drawable_pixmap(src->pDrawable);
2791	if (is_clear(src_pixmap)) {
2792		if (src->repeat ||
2793		    (x >= 0 && y >= 0 &&
2794		     x + width  <= src_pixmap->drawable.width &&
2795		     y + height <= src_pixmap->drawable.height)) {
2796			color = color_convert(sna_pixmap(src_pixmap)->clear_color,
2797					      src->format, tmp->dst.format);
2798			goto fill;
2799		}
2800	}
2801
2802	alpha_fixup = 0;
2803	if (!(dst->format == src_format ||
2804	      dst->format == alphaless(src_format) ||
2805	      (alphaless(dst->format) == alphaless(src_format) &&
2806	       sna_get_pixel_from_rgba(&alpha_fixup,
2807				       0, 0, 0, 0xffff,
2808				       dst->format)))) {
2809		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
2810		     __FUNCTION__, (unsigned)src_format, dst->format));
2811		return false;
2812	}
2813
2814	/* XXX tiling? fixup extend none? */
2815	if (x < 0 || y < 0 ||
2816	    x + width  > src->pDrawable->width ||
2817	    y + height > src->pDrawable->height) {
2818		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n",
2819		     __FUNCTION__,
2820		     x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType));
2821		if (src->repeat && src->repeatType == RepeatNormal) {
2822			x = x % src->pDrawable->width;
2823			y = y % src->pDrawable->height;
2824			if (x < 0)
2825				x += src->pDrawable->width;
2826			if (y < 0)
2827				y += src->pDrawable->height;
2828			if (x + width  > src->pDrawable->width ||
2829			    y + height > src->pDrawable->height)
2830				return false;
2831		} else
2832			return false;
2833	}
2834
2835	get_drawable_deltas(src->pDrawable, src_pixmap, &tx, &ty);
2836	x += tx + src->pDrawable->x;
2837	y += ty + src->pDrawable->y;
2838	if (x < 0 || y < 0 ||
2839	    x + width  > src_pixmap->drawable.width ||
2840	    y + height > src_pixmap->drawable.height) {
2841		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid pixmap %dx%d\n",
2842		     __FUNCTION__,
2843		     x, y, x+width, y+width, src_pixmap->drawable.width, src_pixmap->drawable.height));
2844		return false;
2845	}
2846
2847	tmp->u.blt.src_pixmap = src_pixmap;
2848	tmp->u.blt.sx = x - dst_x;
2849	tmp->u.blt.sy = y - dst_y;
2850	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
2851	     __FUNCTION__,
2852	     tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
2853
2854	src_box.x1 = x;
2855	src_box.y1 = y;
2856	src_box.x2 = x + width;
2857	src_box.y2 = y + height;
2858	bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
2859	if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) {
2860		DBG(("%s: can not blit from src size=%dx%d, tiling? %d, pitch? %d\n",
2861		     __FUNCTION__,
2862		     src_pixmap->drawable.width  < sna->render.max_3d_size,
2863		     src_pixmap->drawable.height < sna->render.max_3d_size,
2864		     bo->tiling, bo->pitch));
2865
2866		if (src_pixmap->drawable.width  <= sna->render.max_3d_size &&
2867		    src_pixmap->drawable.height <= sna->render.max_3d_size &&
2868		    bo->pitch <= sna->render.max_3d_pitch &&
2869		    (flags & (COMPOSITE_UPLOAD | COMPOSITE_FALLBACK)) == 0)
2870		{
2871			return false;
2872		}
2873
2874		bo = NULL;
2875	}
2876
2877	hint = 0;
2878	if (bo || can_render(sna)) {
2879		hint |= PREFER_GPU;
2880		if ((flags & COMPOSITE_PARTIAL) == 0) {
2881			hint |= IGNORE_DAMAGE;
2882			if (width  == tmp->dst.pixmap->drawable.width &&
2883			    height == tmp->dst.pixmap->drawable.height)
2884				hint |= REPLACES;
2885		}
2886		if (bo)
2887			hint |= FORCE_GPU;
2888	}
2889	tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2890					  &dst_box, &tmp->damage);
2891	assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
2892
2893	if (tmp->dst.bo && hint & REPLACES) {
2894		struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap);
2895		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
2896	}
2897
2898	if (tmp->dst.pixmap == src_pixmap)
2899		bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
2900
2901	ret = false;
2902	if (bo) {
2903		if (!tmp->dst.bo) {
2904			DBG(("%s: fallback -- unaccelerated read back\n",
2905			     __FUNCTION__));
2906fallback:
2907			if (flags & COMPOSITE_FALLBACK || !kgem_bo_is_busy(bo))
2908				goto put;
2909		} else if (!kgem_bo_can_blt(&sna->kgem, bo)) {
2910			DBG(("%s: fallback -- cannot blit from source\n",
2911			     __FUNCTION__));
2912			goto fallback;
2913		} else if (bo->snoop && tmp->dst.bo->snoop) {
2914			DBG(("%s: fallback -- can not copy between snooped bo\n",
2915			     __FUNCTION__));
2916			goto put;
2917		} else if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2918			DBG(("%s: fallback -- unaccelerated upload\n",
2919			     __FUNCTION__));
2920			goto fallback;
2921		} else if ((flags & COMPOSITE_UPLOAD) == 0) {
2922			ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup);
2923			if (!ret)
2924				goto fallback;
2925		}
2926	} else {
2927		RegionRec region;
2928
2929put:
2930		if (tmp->dst.bo == sna_pixmap(tmp->dst.pixmap)->cpu_bo) {
2931			DBG(("%s: dropping upload into CPU bo\n", __FUNCTION__));
2932			tmp->dst.bo = NULL;
2933			tmp->damage = NULL;
2934		}
2935
2936		if (tmp->dst.bo == NULL) {
2937			hint = MOVE_INPLACE_HINT | MOVE_WRITE;
2938			if (flags & COMPOSITE_PARTIAL)
2939				hint |= MOVE_READ;
2940
2941			region.extents = dst_box;
2942			region.data = NULL;
2943			if (!sna_drawable_move_region_to_cpu(dst->pDrawable,
2944							     &region, hint))
2945				return false;
2946
2947			assert(tmp->damage == NULL);
2948		}
2949
2950		region.extents = src_box;
2951		region.data = NULL;
2952		if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
2953						     &region, MOVE_READ))
2954			return false;
2955
2956		ret = prepare_blt_put(sna, tmp, alpha_fixup);
2957	}
2958
2959	return ret;
2960}
2961
2962static void convert_done(struct sna *sna, const struct sna_composite_op *op)
2963{
2964	struct kgem *kgem = &sna->kgem;
2965
2966	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
2967	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
2968		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
2969		_kgem_submit(kgem);
2970	}
2971
2972	kgem_bo_destroy(kgem, op->src.bo);
2973	sna_render_composite_redirect_done(sna, op);
2974}
2975
2976static void gen6_convert_done(struct sna *sna, const struct sna_composite_op *op)
2977{
2978	struct kgem *kgem = &sna->kgem;
2979
2980	if (kgem_check_batch(kgem, 3)) {
2981		uint32_t *b = kgem->batch + kgem->nbatch;
2982		assert(sna->kgem.mode == KGEM_BLT);
2983		b[0] = XY_SETUP_CLIP;
2984		b[1] = b[2] = 0;
2985		kgem->nbatch += 3;
2986		assert(kgem->nbatch < kgem->surface);
2987	}
2988
2989	convert_done(sna, op);
2990}
2991
2992bool
2993sna_blt_composite__convert(struct sna *sna,
2994			   int x, int y,
2995			   int width, int height,
2996			   struct sna_composite_op *tmp)
2997{
2998	uint32_t alpha_fixup;
2999	int sx, sy;
3000	uint8_t op;
3001
3002#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
3003	return false;
3004#endif
3005
3006	DBG(("%s src=%d, dst=%d (redirect? %d)\n", __FUNCTION__,
3007	     tmp->src.bo->handle, tmp->dst.bo->handle,
3008	     tmp->redirect.real_bo ? tmp->redirect.real_bo->handle : 0));
3009
3010	if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo) ||
3011	    !kgem_bo_can_blt(&sna->kgem, tmp->src.bo)) {
3012		DBG(("%s: cannot blt from src or to dst\n", __FUNCTION__));
3013		return false;
3014	}
3015
3016	if (tmp->src.transform) {
3017		DBG(("%s: transforms not handled by the BLT\n", __FUNCTION__));
3018		return false;
3019	}
3020
3021	if (tmp->src.filter == PictFilterConvolution) {
3022		DBG(("%s: convolutions filters not handled\n",
3023		     __FUNCTION__));
3024		return false;
3025	}
3026
3027	op = tmp->op;
3028	if (op == PictOpOver && PICT_FORMAT_A(tmp->src.pict_format) == 0)
3029		op = PictOpSrc;
3030	if (op != PictOpSrc) {
3031		DBG(("%s: unsupported op [%d] for blitting\n",
3032		     __FUNCTION__, op));
3033		return false;
3034	}
3035
3036	alpha_fixup = 0;
3037	if (!(tmp->dst.format == tmp->src.pict_format ||
3038	      tmp->dst.format == alphaless(tmp->src.pict_format) ||
3039	      (alphaless(tmp->dst.format) == alphaless(tmp->src.pict_format) &&
3040	       sna_get_pixel_from_rgba(&alpha_fixup,
3041				       0, 0, 0, 0xffff,
3042				       tmp->dst.format)))) {
3043		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
3044		     __FUNCTION__,
3045		     (unsigned)tmp->src.pict_format,
3046		     (unsigned)tmp->dst.format));
3047		return false;
3048	}
3049
3050	sx = tmp->src.offset[0];
3051	sy = tmp->src.offset[1];
3052
3053	x += sx;
3054	y += sy;
3055	if (x < 0 || y < 0 ||
3056	    x + width  > tmp->src.width ||
3057	    y + height > tmp->src.height) {
3058		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n",
3059		     __FUNCTION__,
3060		     x, y, x+width, y+width, tmp->src.width, tmp->src.height));
3061		if (tmp->src.repeat == RepeatNormal) {
3062			int xx = x % tmp->src.width;
3063			int yy = y % tmp->src.height;
3064			if (xx < 0)
3065				xx += tmp->src.width;
3066			if (yy < 0)
3067				yy += tmp->src.height;
3068			if (xx + width  > tmp->src.width ||
3069			    yy + height > tmp->src.height)
3070				return false;
3071
3072			sx += xx - x;
3073			sy += yy - y;
3074		} else
3075			return false;
3076	}
3077
3078	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
3079	     __FUNCTION__,
3080	     tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup));
3081
3082	tmp->u.blt.src_pixmap = NULL;
3083	tmp->u.blt.sx = sx;
3084	tmp->u.blt.sy = sy;
3085
3086	kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo);
3087	if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) {
3088		kgem_submit(&sna->kgem);
3089		if (!kgem_check_many_bo_fenced(&sna->kgem,
3090					       tmp->dst.bo, tmp->src.bo, NULL)) {
3091			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
3092			return sna_tiling_blt_composite(sna, tmp, tmp->src.bo,
3093							PICT_FORMAT_BPP(tmp->src.pict_format),
3094							alpha_fixup);
3095		}
3096		_kgem_set_mode(&sna->kgem, KGEM_BLT);
3097	}
3098	kgem_bcs_set_tiling(&sna->kgem, tmp->src.bo, tmp->dst.bo);
3099
3100	if (alpha_fixup) {
3101		tmp->blt   = blt_composite_copy_with_alpha;
3102		tmp->box   = blt_composite_copy_box_with_alpha;
3103		tmp->boxes = blt_composite_copy_boxes_with_alpha;
3104
3105		if (!sna_blt_alpha_fixup_init(sna, &tmp->u.blt,
3106					      tmp->src.bo, tmp->dst.bo,
3107					      PICT_FORMAT_BPP(tmp->src.pict_format),
3108					      alpha_fixup))
3109			return false;
3110	} else {
3111		tmp->blt   = blt_composite_copy;
3112		tmp->box   = blt_composite_copy_box;
3113		tmp->boxes = blt_composite_copy_boxes;
3114		if (sna->kgem.gen >= 0100)
3115			tmp->thread_boxes = blt_composite_copy_boxes__thread64;
3116		else
3117			tmp->thread_boxes = blt_composite_copy_boxes__thread;
3118
3119		if (!sna_blt_copy_init(sna, &tmp->u.blt,
3120				       tmp->src.bo, tmp->dst.bo,
3121				       PICT_FORMAT_BPP(tmp->src.pict_format),
3122				       GXcopy))
3123			return false;
3124	}
3125
3126	tmp->done = convert_done;
3127	if (sna->kgem.gen >= 060 && tmp->src.bo == tmp->dst.bo)
3128		tmp->done = gen6_convert_done;
3129
3130	return true;
3131}
3132
3133static void sna_blt_fill_op_blt(struct sna *sna,
3134				const struct sna_fill_op *op,
3135				int16_t x, int16_t y,
3136				int16_t width, int16_t height)
3137{
3138	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3139		const struct sna_blt_state *blt = &op->base.u.blt;
3140
3141		__sna_blt_fill_begin(sna, blt);
3142
3143		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3144		sna->blt_state.fill_pixel = blt->pixel;
3145		sna->blt_state.fill_alu = blt->alu;
3146	}
3147
3148	sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height);
3149}
3150
3151fastcall static void sna_blt_fill_op_box(struct sna *sna,
3152					 const struct sna_fill_op *op,
3153					 const BoxRec *box)
3154{
3155	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3156		const struct sna_blt_state *blt = &op->base.u.blt;
3157
3158		__sna_blt_fill_begin(sna, blt);
3159
3160		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3161		sna->blt_state.fill_pixel = blt->pixel;
3162		sna->blt_state.fill_alu = blt->alu;
3163	}
3164
3165	_sna_blt_fill_box(sna, &op->base.u.blt, box);
3166}
3167
3168fastcall static void sna_blt_fill_op_boxes(struct sna *sna,
3169					   const struct sna_fill_op *op,
3170					   const BoxRec *box,
3171					   int nbox)
3172{
3173	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3174		const struct sna_blt_state *blt = &op->base.u.blt;
3175
3176		__sna_blt_fill_begin(sna, blt);
3177
3178		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3179		sna->blt_state.fill_pixel = blt->pixel;
3180		sna->blt_state.fill_alu = blt->alu;
3181	}
3182
3183	_sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox);
3184}
3185
3186static inline uint64_t pt_add(uint32_t cmd, const DDXPointRec *pt, int16_t dx, int16_t dy)
3187{
3188	union {
3189		DDXPointRec pt;
3190		uint32_t i;
3191	} u;
3192
3193	u.pt.x = pt->x + dx;
3194	u.pt.y = pt->y + dy;
3195
3196	return cmd | (uint64_t)u.i<<32;
3197}
3198
3199fastcall static void sna_blt_fill_op_points(struct sna *sna,
3200					    const struct sna_fill_op *op,
3201					    int16_t dx, int16_t dy,
3202					    const DDXPointRec *p, int n)
3203{
3204	const struct sna_blt_state *blt = &op->base.u.blt;
3205	struct kgem *kgem = &sna->kgem;
3206	uint32_t cmd;
3207
3208	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n));
3209
3210	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3211		__sna_blt_fill_begin(sna, blt);
3212
3213		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3214		sna->blt_state.fill_pixel = blt->pixel;
3215		sna->blt_state.fill_alu = blt->alu;
3216	}
3217
3218	if (!kgem_check_batch(kgem, 2))
3219		sna_blt_fill_begin(sna, blt);
3220
3221	cmd = XY_PIXEL_BLT;
3222	if (kgem->gen >= 040 && op->base.u.blt.bo[0]->tiling)
3223		cmd |= BLT_DST_TILED;
3224
3225	do {
3226		uint32_t *b = kgem->batch + kgem->nbatch;
3227		int n_this_time, rem;
3228
3229		assert(sna->kgem.mode == KGEM_BLT);
3230		n_this_time = n;
3231		rem = kgem_batch_space(kgem);
3232		if (2*n_this_time > rem)
3233			n_this_time = rem / 2;
3234		assert(n_this_time);
3235		n -= n_this_time;
3236
3237		kgem->nbatch += 2 * n_this_time;
3238		assert(kgem->nbatch < kgem->surface);
3239
3240		if ((dx|dy) == 0) {
3241			do {
3242				*(uint64_t *)b = pt_add(cmd, p++, 0, 0);
3243				b += 2;
3244			} while (--n_this_time);
3245		} else {
3246			do {
3247				*(uint64_t *)b = pt_add(cmd, p++, dx, dy);
3248				b += 2;
3249			} while (--n_this_time);
3250		}
3251
3252		if (!n)
3253			return;
3254
3255		sna_blt_fill_begin(sna, blt);
3256	} while (1);
3257}
3258
3259bool sna_blt_fill(struct sna *sna, uint8_t alu,
3260		  struct kgem_bo *bo, int bpp,
3261		  uint32_t pixel,
3262		  struct sna_fill_op *fill)
3263{
3264#if DEBUG_NO_BLT || NO_BLT_FILL
3265	return false;
3266#endif
3267
3268	DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp));
3269
3270	if (!kgem_bo_can_blt(&sna->kgem, bo)) {
3271		DBG(("%s: rejected due to incompatible Y-tiling\n",
3272		     __FUNCTION__));
3273		return false;
3274	}
3275
3276	if (!sna_blt_fill_init(sna, &fill->base.u.blt,
3277			       bo, bpp, alu, pixel))
3278		return false;
3279
3280	assert(sna->kgem.mode == KGEM_BLT);
3281	fill->blt   = sna_blt_fill_op_blt;
3282	fill->box   = sna_blt_fill_op_box;
3283	fill->boxes = sna_blt_fill_op_boxes;
3284	fill->points = sna_blt_fill_op_points;
3285	fill->done  =
3286		(void (*)(struct sna *, const struct sna_fill_op *))nop_done;
3287	return true;
3288}
3289
3290static void sna_blt_copy_op_blt(struct sna *sna,
3291				const struct sna_copy_op *op,
3292				int16_t src_x, int16_t src_y,
3293				int16_t width, int16_t height,
3294				int16_t dst_x, int16_t dst_y)
3295{
3296	sna_blt_copy_one(sna, &op->base.u.blt,
3297			 src_x, src_y,
3298			 width, height,
3299			 dst_x, dst_y);
3300}
3301
3302bool sna_blt_copy(struct sna *sna, uint8_t alu,
3303		  struct kgem_bo *src,
3304		  struct kgem_bo *dst,
3305		  int bpp,
3306		  struct sna_copy_op *op)
3307{
3308#if DEBUG_NO_BLT || NO_BLT_COPY
3309	return false;
3310#endif
3311
3312	if (!kgem_bo_can_blt(&sna->kgem, src))
3313		return false;
3314
3315	if (!kgem_bo_can_blt(&sna->kgem, dst))
3316		return false;
3317
3318	if (!sna_blt_copy_init(sna, &op->base.u.blt,
3319			       src, dst,
3320			       bpp, alu))
3321		return false;
3322
3323	op->blt  = sna_blt_copy_op_blt;
3324	if (sna->kgem.gen >= 060 && src == dst)
3325		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
3326			    gen6_blt_copy_done;
3327	else
3328		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
3329			    nop_done;
3330	return true;
3331}
3332
3333static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
3334			     struct kgem_bo *bo, int bpp,
3335			     uint32_t color,
3336			     const BoxRec *box)
3337{
3338	struct kgem *kgem = &sna->kgem;
3339	uint32_t br13, cmd, *b;
3340	bool overwrites;
3341
3342	assert(kgem_bo_can_blt (kgem, bo));
3343
3344	DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
3345	     box->x1, box->y1, box->x2, box->y2));
3346
3347	assert(box->x1 >= 0);
3348	assert(box->y1 >= 0);
3349
3350	cmd = XY_COLOR_BLT | (kgem->gen >= 0100 ? 5 : 4);
3351	br13 = bo->pitch;
3352	if (kgem->gen >= 040 && bo->tiling) {
3353		cmd |= BLT_DST_TILED;
3354		br13 >>= 2;
3355	}
3356	assert(br13 <= MAXSHORT);
3357
3358	br13 |= fill_ROP[alu] << 16;
3359	switch (bpp) {
3360	default: assert(0);
3361	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3362		 br13 |= 1 << 25; /* RGB8888 */
3363	case 16: br13 |= 1 << 24; /* RGB565 */
3364	case 8: break;
3365	}
3366
3367	/* All too frequently one blt completely overwrites the previous */
3368	overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
3369	if (overwrites) {
3370		if (sna->kgem.gen >= 0100) {
3371			if (kgem->nbatch >= 7 &&
3372			    kgem->batch[kgem->nbatch-7] == cmd &&
3373			    *(uint64_t *)&kgem->batch[kgem->nbatch-5] == *(const uint64_t *)box &&
3374			    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
3375				DBG(("%s: replacing last fill\n", __FUNCTION__));
3376				kgem->batch[kgem->nbatch-6] = br13;
3377				kgem->batch[kgem->nbatch-1] = color;
3378				return true;
3379			}
3380			if (kgem->nbatch >= 10 &&
3381			    (kgem->batch[kgem->nbatch-10] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
3382			    *(uint64_t *)&kgem->batch[kgem->nbatch-8] == *(const uint64_t *)box &&
3383			    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
3384				DBG(("%s: replacing last copy\n", __FUNCTION__));
3385				kgem->batch[kgem->nbatch-10] = cmd;
3386				kgem->batch[kgem->nbatch-8] = br13;
3387				kgem->batch[kgem->nbatch-4] = color;
3388				/* Keep the src bo as part of the execlist, just remove
3389				 * its relocation entry.
3390				 */
3391				kgem->nreloc--;
3392				kgem->nbatch -= 3;
3393				return true;
3394			}
3395		} else {
3396			if (kgem->nbatch >= 6 &&
3397			    kgem->batch[kgem->nbatch-6] == cmd &&
3398			    *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box &&
3399			    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
3400				DBG(("%s: replacing last fill\n", __FUNCTION__));
3401				kgem->batch[kgem->nbatch-5] = br13;
3402				kgem->batch[kgem->nbatch-1] = color;
3403				return true;
3404			}
3405			if (kgem->nbatch >= 8 &&
3406			    (kgem->batch[kgem->nbatch-8] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
3407			    *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box &&
3408			    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
3409				DBG(("%s: replacing last copy\n", __FUNCTION__));
3410				kgem->batch[kgem->nbatch-8] = cmd;
3411				kgem->batch[kgem->nbatch-7] = br13;
3412				kgem->batch[kgem->nbatch-3] = color;
3413				/* Keep the src bo as part of the execlist, just remove
3414				 * its relocation entry.
3415				 */
3416				kgem->nreloc--;
3417				kgem->nbatch -= 2;
3418				return true;
3419			}
3420		}
3421	}
3422
3423	/* If we are currently emitting SCANLINES, keep doing so */
3424	if (sna->blt_state.fill_bo == bo->unique_id &&
3425	    sna->blt_state.fill_pixel == color &&
3426	    (sna->blt_state.fill_alu == alu ||
3427	     sna->blt_state.fill_alu == ~alu)) {
3428		DBG(("%s: matching last fill, converting to scanlines\n",
3429		     __FUNCTION__));
3430		return false;
3431	}
3432
3433	kgem_set_mode(kgem, KGEM_BLT, bo);
3434	if (!kgem_check_batch(kgem, 7) ||
3435	    !kgem_check_reloc(kgem, 1) ||
3436	    !kgem_check_bo_fenced(kgem, bo)) {
3437		kgem_submit(kgem);
3438		if (!kgem_check_bo_fenced(&sna->kgem, bo))
3439			return false;
3440
3441		_kgem_set_mode(kgem, KGEM_BLT);
3442	}
3443	kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
3444
3445	assert(kgem_check_batch(kgem, 6));
3446	assert(kgem_check_reloc(kgem, 1));
3447
3448	assert(sna->kgem.mode == KGEM_BLT);
3449	b = kgem->batch + kgem->nbatch;
3450	b[0] = cmd;
3451	b[1] = br13;
3452	*(uint64_t *)(b+2) = *(const uint64_t *)box;
3453	if (kgem->gen >= 0100) {
3454		*(uint64_t *)(b+4) =
3455			kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3456					 I915_GEM_DOMAIN_RENDER << 16 |
3457					 I915_GEM_DOMAIN_RENDER |
3458					 KGEM_RELOC_FENCED,
3459					 0);
3460		b[6] = color;
3461		kgem->nbatch += 7;
3462	} else {
3463		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3464				      I915_GEM_DOMAIN_RENDER << 16 |
3465				      I915_GEM_DOMAIN_RENDER |
3466				      KGEM_RELOC_FENCED,
3467				      0);
3468		b[5] = color;
3469		kgem->nbatch += 6;
3470	}
3471	assert(kgem->nbatch < kgem->surface);
3472
3473	sna->blt_state.fill_bo = bo->unique_id;
3474	sna->blt_state.fill_pixel = color;
3475	sna->blt_state.fill_alu = ~alu;
3476	return true;
3477}
3478
3479bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
3480			struct kgem_bo *bo, int bpp,
3481			uint32_t pixel,
3482			const BoxRec *box, int nbox)
3483{
3484	struct kgem *kgem = &sna->kgem;
3485	uint32_t br13, cmd;
3486
3487#if DEBUG_NO_BLT || NO_BLT_FILL_BOXES
3488	return false;
3489#endif
3490
3491	DBG(("%s (%d, %08x, %d) x %d\n",
3492	     __FUNCTION__, bpp, pixel, alu, nbox));
3493
3494	if (!kgem_bo_can_blt(kgem, bo)) {
3495		DBG(("%s: fallback -- cannot blt to dst\n", __FUNCTION__));
3496		return false;
3497	}
3498
3499	if (alu == GXclear)
3500		pixel = 0;
3501	else if (alu == GXcopy) {
3502		if (pixel == 0)
3503			alu = GXclear;
3504		else if (pixel == -1)
3505			alu = GXset;
3506	}
3507
3508	if (nbox == 1 && sna_blt_fill_box(sna, alu, bo, bpp, pixel, box))
3509		return true;
3510
3511	br13 = bo->pitch;
3512	cmd = XY_SCANLINE_BLT;
3513	if (kgem->gen >= 040 && bo->tiling) {
3514		cmd |= 1 << 11;
3515		br13 >>= 2;
3516	}
3517	assert(br13 <= MAXSHORT);
3518
3519	br13 |= 1<<31 | fill_ROP[alu] << 16;
3520	switch (bpp) {
3521	default: assert(0);
3522	case 32: br13 |= 1 << 25; /* RGB8888 */
3523	case 16: br13 |= 1 << 24; /* RGB565 */
3524	case 8: break;
3525	}
3526
3527	kgem_set_mode(kgem, KGEM_BLT, bo);
3528	if (!kgem_check_batch(kgem, 14) ||
3529	    !kgem_check_bo_fenced(kgem, bo)) {
3530		kgem_submit(kgem);
3531		if (!kgem_check_bo_fenced(&sna->kgem, bo))
3532			return false;
3533		_kgem_set_mode(kgem, KGEM_BLT);
3534	}
3535
3536	if (sna->blt_state.fill_bo != bo->unique_id ||
3537	    sna->blt_state.fill_pixel != pixel ||
3538	    sna->blt_state.fill_alu != alu)
3539	{
3540		uint32_t *b;
3541
3542		if (!kgem_check_batch(kgem, 24) ||
3543		    !kgem_check_reloc(kgem, 1)) {
3544			_kgem_submit(kgem);
3545			if (!kgem_check_bo_fenced(&sna->kgem, bo))
3546				return false;
3547			_kgem_set_mode(kgem, KGEM_BLT);
3548		}
3549
3550		kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
3551
3552		assert(sna->kgem.mode == KGEM_BLT);
3553		b = kgem->batch + kgem->nbatch;
3554		if (kgem->gen >= 0100) {
3555			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
3556			if (bpp == 32)
3557				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3558			if (bo->tiling)
3559				b[0] |= BLT_DST_TILED;
3560			b[1] = br13;
3561			b[2] = 0;
3562			b[3] = 0;
3563			*(uint64_t *)(b+4) =
3564				kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3565						 I915_GEM_DOMAIN_RENDER << 16 |
3566						 I915_GEM_DOMAIN_RENDER |
3567						 KGEM_RELOC_FENCED,
3568						 0);
3569			b[6] = pixel;
3570			b[7] = pixel;
3571			b[8] = 0;
3572			b[9] = 0;
3573			kgem->nbatch += 10;
3574		} else {
3575			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
3576			if (bpp == 32)
3577				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3578			if (bo->tiling && kgem->gen >= 040)
3579				b[0] |= BLT_DST_TILED;
3580			b[1] = br13;
3581			b[2] = 0;
3582			b[3] = 0;
3583			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3584					      I915_GEM_DOMAIN_RENDER << 16 |
3585					      I915_GEM_DOMAIN_RENDER |
3586					      KGEM_RELOC_FENCED,
3587					      0);
3588			b[5] = pixel;
3589			b[6] = pixel;
3590			b[7] = 0;
3591			b[8] = 0;
3592			kgem->nbatch += 9;
3593		}
3594		assert(kgem->nbatch < kgem->surface);
3595
3596		sna->blt_state.fill_bo = bo->unique_id;
3597		sna->blt_state.fill_pixel = pixel;
3598		sna->blt_state.fill_alu = alu;
3599	}
3600
3601	do {
3602		int nbox_this_time, rem;
3603
3604		nbox_this_time = nbox;
3605		rem = kgem_batch_space(kgem);
3606		if (3*nbox_this_time > rem)
3607			nbox_this_time = rem / 3;
3608		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3609		     __FUNCTION__, nbox_this_time, nbox, rem));
3610		assert(nbox_this_time > 0);
3611		nbox -= nbox_this_time;
3612
3613		assert(sna->kgem.mode == KGEM_BLT);
3614		do {
3615			uint32_t *b;
3616
3617			DBG(("%s: (%d, %d), (%d, %d): %08x\n",
3618			     __FUNCTION__,
3619			     box->x1, box->y1,
3620			     box->x2, box->y2,
3621			     pixel));
3622
3623			assert(box->x1 >= 0);
3624			assert(box->y1 >= 0);
3625			assert(box->y2 * bo->pitch <= kgem_bo_size(bo));
3626
3627			b = kgem->batch + kgem->nbatch;
3628			kgem->nbatch += 3;
3629			assert(kgem->nbatch < kgem->surface);
3630			b[0] = cmd;
3631			*(uint64_t *)(b+1) = *(const uint64_t *)box;
3632			box++;
3633		} while (--nbox_this_time);
3634
3635		if (nbox) {
3636			uint32_t *b;
3637
3638			_kgem_submit(kgem);
3639			_kgem_set_mode(kgem, KGEM_BLT);
3640			kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
3641
3642			assert(sna->kgem.mode == KGEM_BLT);
3643			b = kgem->batch + kgem->nbatch;
3644			if (kgem->gen >= 0100) {
3645				b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
3646				if (bpp == 32)
3647					b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3648				if (bo->tiling)
3649					b[0] |= BLT_DST_TILED;
3650				b[1] = br13;
3651				b[2] = 0;
3652				b[3] = 0;
3653				*(uint64_t *)(b+4) =
3654					kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3655							 I915_GEM_DOMAIN_RENDER << 16 |
3656							 I915_GEM_DOMAIN_RENDER |
3657							 KGEM_RELOC_FENCED,
3658							 0);
3659				b[6] = pixel;
3660				b[7] = pixel;
3661				b[8] = 0;
3662				b[9] = 0;
3663				kgem->nbatch += 10;
3664			} else {
3665				b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
3666				if (bpp == 32)
3667					b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3668				if (bo->tiling && kgem->gen >= 040)
3669					b[0] |= BLT_DST_TILED;
3670				b[1] = br13;
3671				b[2] = 0;
3672				b[3] = 0;
3673				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3674						      I915_GEM_DOMAIN_RENDER << 16 |
3675						      I915_GEM_DOMAIN_RENDER |
3676						      KGEM_RELOC_FENCED,
3677						      0);
3678				b[5] = pixel;
3679				b[6] = pixel;
3680				b[7] = 0;
3681				b[8] = 0;
3682				kgem->nbatch += 9;
3683			}
3684			assert(kgem->nbatch < kgem->surface);
3685			assert(kgem_check_batch(kgem, 3));
3686		}
3687	} while (nbox);
3688
3689	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
3690		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
3691		_kgem_submit(kgem);
3692	}
3693
3694	return true;
3695}
3696
3697bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
3698			struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
3699			struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
3700			int bpp, const BoxRec *box, int nbox)
3701{
3702	struct kgem *kgem = &sna->kgem;
3703	unsigned src_pitch, br13, cmd;
3704
3705#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
3706	return false;
3707#endif
3708
3709	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
3710	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
3711	    src_bo->tiling, dst_bo->tiling,
3712	    src_bo->pitch, dst_bo->pitch));
3713	assert(nbox);
3714
3715	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
3716		DBG(("%s: cannot blt to src? %d or dst? %d\n",
3717		     __FUNCTION__,
3718		     kgem_bo_can_blt(kgem, src_bo),
3719		     kgem_bo_can_blt(kgem, dst_bo)));
3720		return false;
3721	}
3722
3723	cmd = XY_SRC_COPY_BLT_CMD;
3724	if (bpp == 32)
3725		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3726
3727	src_pitch = src_bo->pitch;
3728	if (kgem->gen >= 040 && src_bo->tiling) {
3729		cmd |= BLT_SRC_TILED;
3730		src_pitch >>= 2;
3731	}
3732	assert(src_pitch <= MAXSHORT);
3733
3734	br13 = dst_bo->pitch;
3735	if (kgem->gen >= 040 && dst_bo->tiling) {
3736		cmd |= BLT_DST_TILED;
3737		br13 >>= 2;
3738	}
3739	assert(br13 <= MAXSHORT);
3740
3741	br13 |= copy_ROP[alu] << 16;
3742	switch (bpp) {
3743	default: assert(0);
3744	case 32: br13 |= 1 << 25; /* RGB8888 */
3745	case 16: br13 |= 1 << 24; /* RGB565 */
3746	case 8: break;
3747	}
3748
3749	/* Compare first box against a previous fill */
3750	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
3751	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
3752		if (kgem->gen >= 0100) {
3753			if (kgem->nbatch >= 7 &&
3754			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
3755			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
3756			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
3757				DBG(("%s: deleting last fill\n", __FUNCTION__));
3758				kgem->nbatch -= 7;
3759				kgem->nreloc--;
3760			}
3761		} else {
3762			if (kgem->nbatch >= 6 &&
3763			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
3764			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
3765			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
3766				DBG(("%s: deleting last fill\n", __FUNCTION__));
3767				kgem->nbatch -= 6;
3768				kgem->nreloc--;
3769			}
3770		}
3771	}
3772
3773	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
3774	if (!kgem_check_batch(kgem, 10) ||
3775	    !kgem_check_reloc(kgem, 2) ||
3776	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
3777		kgem_submit(kgem);
3778		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
3779			DBG(("%s: not enough room in aperture, fallback to tiling copy\n", __FUNCTION__));
3780			return sna_tiling_blt_copy_boxes(sna, alu,
3781							 src_bo, src_dx, src_dy,
3782							 dst_bo, dst_dx, dst_dy,
3783							 bpp, box, nbox);
3784		}
3785		_kgem_set_mode(kgem, KGEM_BLT);
3786	}
3787	kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3788
3789	if ((dst_dx | dst_dy) == 0) {
3790		if (kgem->gen >= 0100) {
3791			uint64_t hdr = (uint64_t)br13 << 32 | cmd | 8;
3792			do {
3793				int nbox_this_time, rem;
3794
3795				nbox_this_time = nbox;
3796				rem = kgem_batch_space(kgem);
3797				if (10*nbox_this_time > rem)
3798					nbox_this_time = rem / 10;
3799				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3800					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3801				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3802				     __FUNCTION__, nbox_this_time, nbox, rem));
3803				assert(nbox_this_time > 0);
3804				nbox -= nbox_this_time;
3805
3806				assert(sna->kgem.mode == KGEM_BLT);
3807				do {
3808					uint32_t *b = kgem->batch + kgem->nbatch;
3809
3810					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3811					     __FUNCTION__,
3812					     box->x1, box->y1,
3813					     box->x2 - box->x1, box->y2 - box->y1));
3814
3815					assert(box->x1 + src_dx >= 0);
3816					assert(box->y1 + src_dy >= 0);
3817					assert(box->x1 + src_dx <= INT16_MAX);
3818					assert(box->y1 + src_dy <= INT16_MAX);
3819
3820					assert(box->x1 >= 0);
3821					assert(box->y1 >= 0);
3822
3823					*(uint64_t *)&b[0] = hdr;
3824					*(uint64_t *)&b[2] = *(const uint64_t *)box;
3825					*(uint64_t *)(b+4) =
3826						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
3827								 I915_GEM_DOMAIN_RENDER << 16 |
3828								 I915_GEM_DOMAIN_RENDER |
3829								 KGEM_RELOC_FENCED,
3830								 0);
3831					b[6] = add2(b[2], src_dx, src_dy);
3832					b[7] = src_pitch;
3833					*(uint64_t *)(b+8) =
3834						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
3835								 I915_GEM_DOMAIN_RENDER << 16 |
3836								 KGEM_RELOC_FENCED,
3837								 0);
3838					kgem->nbatch += 10;
3839					assert(kgem->nbatch < kgem->surface);
3840					box++;
3841				} while (--nbox_this_time);
3842
3843				if (!nbox)
3844					break;
3845
3846				_kgem_submit(kgem);
3847				_kgem_set_mode(kgem, KGEM_BLT);
3848				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3849			} while (1);
3850		} else {
3851			uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6;
3852			do {
3853				int nbox_this_time, rem;
3854
3855				nbox_this_time = nbox;
3856				rem = kgem_batch_space(kgem);
3857				if (8*nbox_this_time > rem)
3858					nbox_this_time = rem / 8;
3859				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3860					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3861				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3862				     __FUNCTION__, nbox_this_time, nbox, rem));
3863				assert(nbox_this_time > 0);
3864				nbox -= nbox_this_time;
3865
3866				assert(sna->kgem.mode == KGEM_BLT);
3867				do {
3868					uint32_t *b = kgem->batch + kgem->nbatch;
3869
3870					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3871					     __FUNCTION__,
3872					     box->x1, box->y1,
3873					     box->x2 - box->x1, box->y2 - box->y1));
3874
3875					assert(box->x1 + src_dx >= 0);
3876					assert(box->y1 + src_dy >= 0);
3877					assert(box->x1 + src_dx <= INT16_MAX);
3878					assert(box->y1 + src_dy <= INT16_MAX);
3879
3880					assert(box->x1 >= 0);
3881					assert(box->y1 >= 0);
3882
3883					*(uint64_t *)&b[0] = hdr;
3884					*(uint64_t *)&b[2] = *(const uint64_t *)box;
3885					b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
3886							      I915_GEM_DOMAIN_RENDER << 16 |
3887							      I915_GEM_DOMAIN_RENDER |
3888							      KGEM_RELOC_FENCED,
3889							      0);
3890					b[5] = add2(b[2], src_dx, src_dy);
3891					b[6] = src_pitch;
3892					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
3893							      I915_GEM_DOMAIN_RENDER << 16 |
3894							      KGEM_RELOC_FENCED,
3895							      0);
3896					kgem->nbatch += 8;
3897					assert(kgem->nbatch < kgem->surface);
3898					box++;
3899				} while (--nbox_this_time);
3900
3901				if (!nbox)
3902					break;
3903
3904				_kgem_submit(kgem);
3905				_kgem_set_mode(kgem, KGEM_BLT);
3906				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3907			} while (1);
3908		}
3909	} else {
3910		if (kgem->gen >= 0100) {
3911			cmd |= 8;
3912			do {
3913				int nbox_this_time, rem;
3914
3915				nbox_this_time = nbox;
3916				rem = kgem_batch_space(kgem);
3917				if (10*nbox_this_time > rem)
3918					nbox_this_time = rem / 10;
3919				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3920					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3921				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3922				     __FUNCTION__, nbox_this_time, nbox, rem));
3923				assert(nbox_this_time > 0);
3924				nbox -= nbox_this_time;
3925
3926				assert(sna->kgem.mode == KGEM_BLT);
3927				do {
3928					uint32_t *b = kgem->batch + kgem->nbatch;
3929
3930					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3931					     __FUNCTION__,
3932					     box->x1, box->y1,
3933					     box->x2 - box->x1, box->y2 - box->y1));
3934
3935					assert(box->x1 + src_dx >= 0);
3936					assert(box->y1 + src_dy >= 0);
3937
3938					assert(box->x1 + dst_dx >= 0);
3939					assert(box->y1 + dst_dy >= 0);
3940
3941					b[0] = cmd;
3942					b[1] = br13;
3943					b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
3944					b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
3945					*(uint64_t *)(b+4) =
3946						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
3947								 I915_GEM_DOMAIN_RENDER << 16 |
3948								 I915_GEM_DOMAIN_RENDER |
3949								 KGEM_RELOC_FENCED,
3950								 0);
3951					b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
3952					b[7] = src_pitch;
3953					*(uint64_t *)(b+8) =
3954						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
3955								 I915_GEM_DOMAIN_RENDER << 16 |
3956								 KGEM_RELOC_FENCED,
3957								 0);
3958					kgem->nbatch += 10;
3959					assert(kgem->nbatch < kgem->surface);
3960					box++;
3961				} while (--nbox_this_time);
3962
3963				if (!nbox)
3964					break;
3965
3966				_kgem_submit(kgem);
3967				_kgem_set_mode(kgem, KGEM_BLT);
3968				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3969			} while (1);
3970		} else {
3971			cmd |= 6;
3972			do {
3973				int nbox_this_time, rem;
3974
3975				nbox_this_time = nbox;
3976				rem = kgem_batch_space(kgem);
3977				if (8*nbox_this_time > rem)
3978					nbox_this_time = rem / 8;
3979				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3980					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3981				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3982				     __FUNCTION__, nbox_this_time, nbox, rem));
3983				assert(nbox_this_time > 0);
3984				nbox -= nbox_this_time;
3985
3986				assert(sna->kgem.mode == KGEM_BLT);
3987				do {
3988					uint32_t *b = kgem->batch + kgem->nbatch;
3989
3990					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3991					     __FUNCTION__,
3992					     box->x1, box->y1,
3993					     box->x2 - box->x1, box->y2 - box->y1));
3994
3995					assert(box->x1 + src_dx >= 0);
3996					assert(box->y1 + src_dy >= 0);
3997
3998					assert(box->x1 + dst_dx >= 0);
3999					assert(box->y1 + dst_dy >= 0);
4000
4001					b[0] = cmd;
4002					b[1] = br13;
4003					b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
4004					b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
4005					b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
4006							      I915_GEM_DOMAIN_RENDER << 16 |
4007							      I915_GEM_DOMAIN_RENDER |
4008							      KGEM_RELOC_FENCED,
4009							      0);
4010					b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
4011					b[6] = src_pitch;
4012					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
4013							      I915_GEM_DOMAIN_RENDER << 16 |
4014							      KGEM_RELOC_FENCED,
4015							      0);
4016					kgem->nbatch += 8;
4017					assert(kgem->nbatch < kgem->surface);
4018					box++;
4019				} while (--nbox_this_time);
4020
4021				if (!nbox)
4022					break;
4023
4024				_kgem_submit(kgem);
4025				_kgem_set_mode(kgem, KGEM_BLT);
4026				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
4027			} while (1);
4028		}
4029	}
4030
4031	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
4032		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
4033		_kgem_submit(kgem);
4034	} else if (kgem->gen >= 060 && src_bo == dst_bo && kgem_check_batch(kgem, 3)) {
4035		uint32_t *b = kgem->batch + kgem->nbatch;
4036		assert(sna->kgem.mode == KGEM_BLT);
4037		b[0] = XY_SETUP_CLIP;
4038		b[1] = b[2] = 0;
4039		kgem->nbatch += 3;
4040		assert(kgem->nbatch < kgem->surface);
4041	}
4042
4043	sna->blt_state.fill_bo = 0;
4044	return true;
4045}
4046
4047bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
4048				    struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
4049				    struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
4050				    int bpp, int alpha_fixup,
4051				    const BoxRec *box, int nbox)
4052{
4053	struct kgem *kgem = &sna->kgem;
4054	unsigned src_pitch, br13, cmd;
4055
4056#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
4057	return false;
4058#endif
4059
4060	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
4061	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
4062	    src_bo->tiling, dst_bo->tiling,
4063	    src_bo->pitch, dst_bo->pitch));
4064
4065	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
4066		DBG(("%s: cannot blt to src? %d or dst? %d\n",
4067		     __FUNCTION__,
4068		     kgem_bo_can_blt(kgem, src_bo),
4069		     kgem_bo_can_blt(kgem, dst_bo)));
4070		return false;
4071	}
4072
4073	cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
4074	src_pitch = src_bo->pitch;
4075	if (kgem->gen >= 040 && src_bo->tiling) {
4076		cmd |= BLT_SRC_TILED;
4077		src_pitch >>= 2;
4078	}
4079	assert(src_pitch <= MAXSHORT);
4080
4081	br13 = dst_bo->pitch;
4082	if (kgem->gen >= 040 && dst_bo->tiling) {
4083		cmd |= BLT_DST_TILED;
4084		br13 >>= 2;
4085	}
4086	assert(br13 <= MAXSHORT);
4087
4088	br13 |= copy_ROP[alu] << 16;
4089	switch (bpp) {
4090	default: assert(0);
4091	case 32: br13 |= 1 << 25; /* RGB8888 */
4092	case 16: br13 |= 1 << 24; /* RGB565 */
4093	case 8: break;
4094	}
4095
4096	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
4097	if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
4098		DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__));
4099		return false;
4100	}
4101
4102	/* Compare first box against a previous fill */
4103	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
4104	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
4105		if (kgem->gen >= 0100) {
4106			if (kgem->nbatch >= 7 &&
4107			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
4108			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
4109			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
4110				DBG(("%s: deleting last fill\n", __FUNCTION__));
4111				kgem->nbatch -= 7;
4112				kgem->nreloc--;
4113			}
4114		} else {
4115			if (kgem->nbatch >= 6 &&
4116			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
4117			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
4118			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
4119				DBG(("%s: deleting last fill\n", __FUNCTION__));
4120				kgem->nbatch -= 6;
4121				kgem->nreloc--;
4122			}
4123		}
4124	}
4125
4126	while (nbox--) {
4127		uint32_t *b;
4128
4129		if (!kgem_check_batch(kgem, 14) ||
4130		    !kgem_check_reloc(kgem, 2)) {
4131			_kgem_submit(kgem);
4132			_kgem_set_mode(kgem, KGEM_BLT);
4133			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
4134		}
4135
4136		assert(sna->kgem.mode == KGEM_BLT);
4137		b = kgem->batch + kgem->nbatch;
4138		b[0] = cmd;
4139		b[1] = br13;
4140		b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
4141		b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
4142		if (sna->kgem.gen >= 0100) {
4143			*(uint64_t *)(b+4) =
4144				kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
4145						 I915_GEM_DOMAIN_RENDER << 16 |
4146						 I915_GEM_DOMAIN_RENDER |
4147						 KGEM_RELOC_FENCED,
4148						 0);
4149			b[6] = src_pitch;
4150			b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
4151			*(uint64_t *)(b+8) =
4152				kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
4153						 I915_GEM_DOMAIN_RENDER << 16 |
4154						 KGEM_RELOC_FENCED,
4155						 0);
4156			b[10] = alpha_fixup;
4157			b[11] = alpha_fixup;
4158			b[12] = 0;
4159			b[13] = 0;
4160			kgem->nbatch += 14;
4161		} else {
4162			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
4163					      I915_GEM_DOMAIN_RENDER << 16 |
4164					      I915_GEM_DOMAIN_RENDER |
4165					      KGEM_RELOC_FENCED,
4166					      0);
4167			b[5] = src_pitch;
4168			b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
4169			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
4170					      I915_GEM_DOMAIN_RENDER << 16 |
4171					      KGEM_RELOC_FENCED,
4172					      0);
4173			b[8] = alpha_fixup;
4174			b[9] = alpha_fixup;
4175			b[10] = 0;
4176			b[11] = 0;
4177			kgem->nbatch += 12;
4178		}
4179		assert(kgem->nbatch < kgem->surface);
4180		box++;
4181	}
4182
4183	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
4184		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
4185		_kgem_submit(kgem);
4186	}
4187
4188	sna->blt_state.fill_bo = 0;
4189	return true;
4190}
4191
4192static void box_extents(const BoxRec *box, int n, BoxRec *extents)
4193{
4194	*extents = *box;
4195	while (--n) {
4196		box++;
4197		if (box->x1 < extents->x1)
4198			extents->x1 = box->x1;
4199		if (box->y1 < extents->y1)
4200			extents->y1 = box->y1;
4201
4202		if (box->x2 > extents->x2)
4203			extents->x2 = box->x2;
4204		if (box->y2 > extents->y2)
4205			extents->y2 = box->y2;
4206	}
4207}
4208
4209bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
4210				 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
4211				 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
4212				 const BoxRec *box, int nbox)
4213{
4214	struct kgem_bo *free_bo = NULL;
4215	bool ret;
4216
4217	DBG(("%s: alu=%d, n=%d\n", __FUNCTION__, alu, nbox));
4218
4219	if (!sna_blt_compare_depth(src, dst)) {
4220		DBG(("%s: mismatching depths %d -> %d\n",
4221		     __FUNCTION__, src->depth, dst->depth));
4222		return false;
4223	}
4224
4225	if (src_bo == dst_bo) {
4226		DBG(("%s: dst == src\n", __FUNCTION__));
4227
4228		if (src_bo->tiling == I915_TILING_Y &&
4229		    !sna->kgem.can_blt_y &&
4230		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
4231			struct kgem_bo *bo;
4232
4233			DBG(("%s: src is Y-tiled\n", __FUNCTION__));
4234
4235			if (src->type != DRAWABLE_PIXMAP)
4236				return false;
4237
4238			assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
4239			bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
4240			if (bo == NULL) {
4241				BoxRec extents;
4242
4243				DBG(("%s: y-tiling conversion failed\n",
4244				     __FUNCTION__));
4245
4246				box_extents(box, nbox, &extents);
4247				free_bo = kgem_create_2d(&sna->kgem,
4248							 extents.x2 - extents.x1,
4249							 extents.y2 - extents.y1,
4250							 src->bitsPerPixel,
4251							 I915_TILING_X, 0);
4252				if (free_bo == NULL) {
4253					DBG(("%s: fallback -- temp allocation failed\n",
4254					     __FUNCTION__));
4255					return false;
4256				}
4257
4258				if (!sna_blt_copy_boxes(sna, GXcopy,
4259							src_bo, src_dx, src_dy,
4260							free_bo, -extents.x1, -extents.y1,
4261							src->bitsPerPixel,
4262							box, nbox)) {
4263					DBG(("%s: fallback -- temp copy failed\n",
4264					     __FUNCTION__));
4265					kgem_bo_destroy(&sna->kgem, free_bo);
4266					return false;
4267				}
4268
4269				src_dx = -extents.x1;
4270				src_dy = -extents.y1;
4271				src_bo = free_bo;
4272			} else
4273				dst_bo = src_bo = bo;
4274		}
4275	} else {
4276		if (src_bo->tiling == I915_TILING_Y &&
4277		    !sna->kgem.can_blt_y &&
4278		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
4279			DBG(("%s: src is y-tiled\n", __FUNCTION__));
4280			if (src->type != DRAWABLE_PIXMAP)
4281				return false;
4282			assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
4283			src_bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
4284			if (src_bo == NULL) {
4285				DBG(("%s: fallback -- src y-tiling conversion failed\n",
4286				     __FUNCTION__));
4287				return false;
4288			}
4289		}
4290
4291		if (dst_bo->tiling == I915_TILING_Y &&
4292		    !sna->kgem.can_blt_y &&
4293		    kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) {
4294			DBG(("%s: dst is y-tiled\n", __FUNCTION__));
4295			if (dst->type != DRAWABLE_PIXMAP)
4296				return false;
4297			assert(sna_pixmap((PixmapPtr)dst)->gpu_bo == dst_bo);
4298			dst_bo = sna_pixmap_change_tiling((PixmapPtr)dst, I915_TILING_X);
4299			if (dst_bo == NULL) {
4300				DBG(("%s: fallback -- dst y-tiling conversion failed\n",
4301				     __FUNCTION__));
4302				return false;
4303			}
4304		}
4305	}
4306
4307	ret =  sna_blt_copy_boxes(sna, alu,
4308				  src_bo, src_dx, src_dy,
4309				  dst_bo, dst_dx, dst_dy,
4310				  dst->bitsPerPixel,
4311				  box, nbox);
4312
4313	if (free_bo)
4314		kgem_bo_destroy(&sna->kgem, free_bo);
4315
4316	return ret;
4317}
4318