sna_blt.c revision 03b705cf
1/*
2 * Based on code from intel_uxa.c and i830_xaa.c
3 * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
4 * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
5 * Copyright (c) 2009-2011 Intel Corporation
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 * Authors:
27 *    Chris Wilson <chris@chris-wilson.co.uk>
28 *
29 */
30
31#ifdef HAVE_CONFIG_H
32#include "config.h"
33#endif
34
35#include "sna.h"
36#include "sna_render.h"
37#include "sna_render_inline.h"
38#include "sna_reg.h"
39#include "rop.h"
40
41#define NO_BLT_COMPOSITE 0
42#define NO_BLT_COPY 0
43#define NO_BLT_COPY_BOXES 0
44#define NO_BLT_FILL 0
45#define NO_BLT_FILL_BOXES 0
46
47static const uint8_t copy_ROP[] = {
48	ROP_0,                  /* GXclear */
49	ROP_DSa,                /* GXand */
50	ROP_SDna,               /* GXandReverse */
51	ROP_S,                  /* GXcopy */
52	ROP_DSna,               /* GXandInverted */
53	ROP_D,                  /* GXnoop */
54	ROP_DSx,                /* GXxor */
55	ROP_DSo,                /* GXor */
56	ROP_DSon,               /* GXnor */
57	ROP_DSxn,               /* GXequiv */
58	ROP_Dn,                 /* GXinvert */
59	ROP_SDno,               /* GXorReverse */
60	ROP_Sn,                 /* GXcopyInverted */
61	ROP_DSno,               /* GXorInverted */
62	ROP_DSan,               /* GXnand */
63	ROP_1                   /* GXset */
64};
65
66static const uint8_t fill_ROP[] = {
67	ROP_0,
68	ROP_DPa,
69	ROP_PDna,
70	ROP_P,
71	ROP_DPna,
72	ROP_D,
73	ROP_DPx,
74	ROP_DPo,
75	ROP_DPon,
76	ROP_PDxn,
77	ROP_Dn,
78	ROP_PDno,
79	ROP_Pn,
80	ROP_DPno,
81	ROP_DPan,
82	ROP_1
83};
84
85static void nop_done(struct sna *sna, const struct sna_composite_op *op)
86{
87	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
88	if (sna->kgem.nexec > 1 && __kgem_ring_empty(&sna->kgem))
89		_kgem_submit(&sna->kgem);
90	(void)op;
91}
92
93static void gen6_blt_copy_done(struct sna *sna, const struct sna_composite_op *op)
94{
95	struct kgem *kgem = &sna->kgem;
96
97	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
98	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
99		_kgem_submit(kgem);
100		return;
101	}
102
103	if (kgem_check_batch(kgem, 3)) {
104		uint32_t *b = kgem->batch + kgem->nbatch;
105		b[0] = XY_SETUP_CLIP;
106		b[1] = b[2] = 0;
107		kgem->nbatch += 3;
108		assert(kgem->nbatch < kgem->surface);
109	}
110	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
111	(void)op;
112}
113
114static bool sna_blt_fill_init(struct sna *sna,
115			      struct sna_blt_state *blt,
116			      struct kgem_bo *bo,
117			      int bpp,
118			      uint8_t alu,
119			      uint32_t pixel)
120{
121	struct kgem *kgem = &sna->kgem;
122
123	assert(kgem_bo_can_blt (kgem, bo));
124	assert(bo->tiling != I915_TILING_Y);
125	blt->bo[0] = bo;
126
127	blt->br13 = bo->pitch;
128	blt->cmd = XY_SCANLINE_BLT;
129	if (kgem->gen >= 040 && bo->tiling) {
130		blt->cmd |= BLT_DST_TILED;
131		blt->br13 >>= 2;
132	}
133	assert(blt->br13 <= MAXSHORT);
134
135	if (alu == GXclear)
136		pixel = 0;
137	else if (alu == GXcopy) {
138		if (pixel == 0)
139			alu = GXclear;
140		else if (pixel == -1)
141			alu = GXset;
142	}
143
144	blt->br13 |= 1<<31 | (fill_ROP[alu] << 16);
145	switch (bpp) {
146	default: assert(0);
147	case 32: blt->br13 |= 1 << 25; /* RGB8888 */
148	case 16: blt->br13 |= 1 << 24; /* RGB565 */
149	case 8: break;
150	}
151
152	blt->pixel = pixel;
153	blt->bpp = bpp;
154
155	kgem_set_mode(kgem, KGEM_BLT, bo);
156	if (!kgem_check_batch(kgem, 12) ||
157	    !kgem_check_bo_fenced(kgem, bo)) {
158		kgem_submit(kgem);
159		if (!kgem_check_bo_fenced(kgem, bo))
160			return false;
161		_kgem_set_mode(kgem, KGEM_BLT);
162	}
163
164	if (sna->blt_state.fill_bo != bo->unique_id ||
165	    sna->blt_state.fill_pixel != pixel ||
166	    sna->blt_state.fill_alu != alu)
167	{
168		uint32_t *b;
169
170		if (!kgem_check_reloc(kgem, 1)) {
171			_kgem_submit(kgem);
172			_kgem_set_mode(kgem, KGEM_BLT);
173		}
174
175		b = kgem->batch + kgem->nbatch;
176		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
177		if (bpp == 32)
178			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
179		b[1] = blt->br13;
180		b[2] = 0;
181		b[3] = 0;
182		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
183				      I915_GEM_DOMAIN_RENDER << 16 |
184				      I915_GEM_DOMAIN_RENDER |
185				      KGEM_RELOC_FENCED,
186				      0);
187		b[5] = pixel;
188		b[6] = pixel;
189		b[7] = 0;
190		b[8] = 0;
191		kgem->nbatch += 9;
192		assert(kgem->nbatch < kgem->surface);
193
194		sna->blt_state.fill_bo = bo->unique_id;
195		sna->blt_state.fill_pixel = pixel;
196		sna->blt_state.fill_alu = alu;
197	}
198
199	return true;
200}
201
202noinline static void sna_blt_fill_begin(struct sna *sna,
203					const struct sna_blt_state *blt)
204{
205	struct kgem *kgem = &sna->kgem;
206	uint32_t *b;
207
208	_kgem_submit(kgem);
209	_kgem_set_mode(kgem, KGEM_BLT);
210
211	assert(kgem->nbatch == 0);
212	b = kgem->batch;
213	b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
214	if (blt->bpp == 32)
215		b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
216	b[1] = blt->br13;
217	b[2] = 0;
218	b[3] = 0;
219	b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
220			      I915_GEM_DOMAIN_RENDER << 16 |
221			      I915_GEM_DOMAIN_RENDER |
222			      KGEM_RELOC_FENCED,
223			      0);
224	b[5] = blt->pixel;
225	b[6] = blt->pixel;
226	b[7] = 0;
227	b[8] = 0;
228	kgem->nbatch = 9;
229}
230
231inline static void sna_blt_fill_one(struct sna *sna,
232				    const struct sna_blt_state *blt,
233				    int16_t x, int16_t y,
234				    int16_t width, int16_t height)
235{
236	struct kgem *kgem = &sna->kgem;
237	uint32_t *b;
238
239	DBG(("%s: (%d, %d) x (%d, %d): %08x\n",
240	     __FUNCTION__, x, y, width, height, blt->pixel));
241
242	assert(x >= 0);
243	assert(y >= 0);
244	assert((y+height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
245
246	if (!kgem_check_batch(kgem, 3))
247		sna_blt_fill_begin(sna, blt);
248
249	b = kgem->batch + kgem->nbatch;
250	kgem->nbatch += 3;
251	assert(kgem->nbatch < kgem->surface);
252
253	b[0] = blt->cmd;
254	b[1] = y << 16 | x;
255	b[2] = b[1] + (height << 16 | width);
256}
257
258static bool sna_blt_copy_init(struct sna *sna,
259			      struct sna_blt_state *blt,
260			      struct kgem_bo *src,
261			      struct kgem_bo *dst,
262			      int bpp,
263			      uint8_t alu)
264{
265	struct kgem *kgem = &sna->kgem;
266
267	assert(kgem_bo_can_blt (kgem, src));
268	assert(kgem_bo_can_blt (kgem, dst));
269
270	blt->bo[0] = src;
271	blt->bo[1] = dst;
272
273	blt->cmd = XY_SRC_COPY_BLT_CMD;
274	if (bpp == 32)
275		blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
276
277	blt->pitch[0] = src->pitch;
278	if (kgem->gen >= 040 && src->tiling) {
279		blt->cmd |= BLT_SRC_TILED;
280		blt->pitch[0] >>= 2;
281	}
282	assert(blt->pitch[0] <= MAXSHORT);
283
284	blt->pitch[1] = dst->pitch;
285	if (kgem->gen >= 040 && dst->tiling) {
286		blt->cmd |= BLT_DST_TILED;
287		blt->pitch[1] >>= 2;
288	}
289	assert(blt->pitch[1] <= MAXSHORT);
290
291	blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
292	blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1];
293	switch (bpp) {
294	default: assert(0);
295	case 32: blt->br13 |= 1 << 25; /* RGB8888 */
296	case 16: blt->br13 |= 1 << 24; /* RGB565 */
297	case 8: break;
298	}
299
300	kgem_set_mode(kgem, KGEM_BLT, dst);
301	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
302		kgem_submit(kgem);
303		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
304			return false;
305		_kgem_set_mode(kgem, KGEM_BLT);
306	}
307
308	sna->blt_state.fill_bo = 0;
309	return true;
310}
311
312static bool sna_blt_alpha_fixup_init(struct sna *sna,
313				     struct sna_blt_state *blt,
314				     struct kgem_bo *src,
315				     struct kgem_bo *dst,
316				     int bpp, uint32_t alpha)
317{
318	struct kgem *kgem = &sna->kgem;
319
320	assert(kgem_bo_can_blt (kgem, src));
321	assert(kgem_bo_can_blt (kgem, dst));
322
323	blt->bo[0] = src;
324	blt->bo[1] = dst;
325
326	blt->cmd = XY_FULL_MONO_PATTERN_BLT;
327	blt->pitch[0] = src->pitch;
328	if (kgem->gen >= 040 && src->tiling) {
329		blt->cmd |= BLT_SRC_TILED;
330		blt->pitch[0] >>= 2;
331	}
332	assert(blt->pitch[0] <= MAXSHORT);
333
334	blt->pitch[1] = dst->pitch;
335	if (kgem->gen >= 040 && dst->tiling) {
336		blt->cmd |= BLT_DST_TILED;
337		blt->pitch[1] >>= 2;
338	}
339	assert(blt->pitch[1] <= MAXSHORT);
340
341	blt->overwrites = 1;
342	blt->br13 = (0xfc << 16) | blt->pitch[1];
343	switch (bpp) {
344	default: assert(0);
345	case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
346		 blt->br13 |= 1 << 25; /* RGB8888 */
347	case 16: blt->br13 |= 1 << 24; /* RGB565 */
348	case 8: break;
349	}
350	blt->pixel = alpha;
351
352	kgem_set_mode(kgem, KGEM_BLT, dst);
353	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
354		kgem_submit(kgem);
355		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
356			return false;
357		_kgem_set_mode(kgem, KGEM_BLT);
358	}
359
360	sna->blt_state.fill_bo = 0;
361	return true;
362}
363
364static void sna_blt_alpha_fixup_one(struct sna *sna,
365				    const struct sna_blt_state *blt,
366				    int src_x, int src_y,
367				    int width, int height,
368				    int dst_x, int dst_y)
369{
370	struct kgem *kgem = &sna->kgem;
371	uint32_t *b;
372
373	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
374	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
375
376	assert(src_x >= 0);
377	assert(src_y >= 0);
378	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
379	assert(dst_x >= 0);
380	assert(dst_y >= 0);
381	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
382	assert(width > 0);
383	assert(height > 0);
384
385	if (!kgem_check_batch(kgem, 12) ||
386	    !kgem_check_reloc(kgem, 2)) {
387		_kgem_submit(kgem);
388		_kgem_set_mode(kgem, KGEM_BLT);
389	}
390
391	b = kgem->batch + kgem->nbatch;
392	b[0] = blt->cmd;
393	b[1] = blt->br13;
394	b[2] = (dst_y << 16) | dst_x;
395	b[3] = ((dst_y + height) << 16) | (dst_x + width);
396	b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
397			      blt->bo[1],
398			      I915_GEM_DOMAIN_RENDER << 16 |
399			      I915_GEM_DOMAIN_RENDER |
400			      KGEM_RELOC_FENCED,
401			      0);
402	b[5] = blt->pitch[0];
403	b[6] = (src_y << 16) | src_x;
404	b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7,
405			      blt->bo[0],
406			      I915_GEM_DOMAIN_RENDER << 16 |
407			      KGEM_RELOC_FENCED,
408			      0);
409	b[8] = blt->pixel;
410	b[9] = blt->pixel;
411	b[10] = 0;
412	b[11] = 0;
413	kgem->nbatch += 12;
414	assert(kgem->nbatch < kgem->surface);
415}
416
417static void sna_blt_copy_one(struct sna *sna,
418			     const struct sna_blt_state *blt,
419			     int src_x, int src_y,
420			     int width, int height,
421			     int dst_x, int dst_y)
422{
423	struct kgem *kgem = &sna->kgem;
424	uint32_t *b;
425
426	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
427	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
428
429	assert(src_x >= 0);
430	assert(src_y >= 0);
431	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
432	assert(dst_x >= 0);
433	assert(dst_y >= 0);
434	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
435	assert(width > 0);
436	assert(height > 0);
437
438	/* Compare against a previous fill */
439	if (kgem->nbatch >= 6 &&
440	    blt->overwrites &&
441	    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB))) &&
442	    kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
443	    kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width)) &&
444	    kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) {
445		DBG(("%s: replacing last fill\n", __FUNCTION__));
446		if (kgem_check_batch(kgem, 8-6)) {
447			b = kgem->batch + kgem->nbatch - 6;
448			b[0] = blt->cmd;
449			b[1] = blt->br13;
450			b[5] = (src_y << 16) | src_x;
451			b[6] = blt->pitch[0];
452			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6,
453					      blt->bo[0],
454					      I915_GEM_DOMAIN_RENDER << 16 |
455					      KGEM_RELOC_FENCED,
456					      0);
457			kgem->nbatch += 8 - 6;
458			assert(kgem->nbatch < kgem->surface);
459			return;
460		}
461		kgem->nbatch -= 6;
462		kgem->nreloc--;
463	}
464
465	if (!kgem_check_batch(kgem, 8) ||
466	    !kgem_check_reloc(kgem, 2)) {
467		_kgem_submit(kgem);
468		_kgem_set_mode(kgem, KGEM_BLT);
469	}
470
471	b = kgem->batch + kgem->nbatch;
472	b[0] = blt->cmd;
473	b[1] = blt->br13;
474	b[2] = (dst_y << 16) | dst_x;
475	b[3] = ((dst_y + height) << 16) | (dst_x + width);
476	b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
477			      blt->bo[1],
478			      I915_GEM_DOMAIN_RENDER << 16 |
479			      I915_GEM_DOMAIN_RENDER |
480			      KGEM_RELOC_FENCED,
481			      0);
482	b[5] = (src_y << 16) | src_x;
483	b[6] = blt->pitch[0];
484	b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7,
485			      blt->bo[0],
486			      I915_GEM_DOMAIN_RENDER << 16 |
487			      KGEM_RELOC_FENCED,
488			      0);
489	kgem->nbatch += 8;
490	assert(kgem->nbatch < kgem->surface);
491}
492
493bool
494sna_get_rgba_from_pixel(uint32_t pixel,
495			uint16_t *red,
496			uint16_t *green,
497			uint16_t *blue,
498			uint16_t *alpha,
499			uint32_t format)
500{
501	int rbits, bbits, gbits, abits;
502	int rshift, bshift, gshift, ashift;
503
504	rbits = PICT_FORMAT_R(format);
505	gbits = PICT_FORMAT_G(format);
506	bbits = PICT_FORMAT_B(format);
507	abits = PICT_FORMAT_A(format);
508
509	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
510		rshift = gshift = bshift = ashift = 0;
511	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
512		bshift = 0;
513		gshift = bbits;
514		rshift = gshift + gbits;
515		ashift = rshift + rbits;
516	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
517		rshift = 0;
518		gshift = rbits;
519		bshift = gshift + gbits;
520		ashift = bshift + bbits;
521	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
522		ashift = 0;
523		rshift = abits;
524		if (abits == 0)
525			rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
526		gshift = rshift + rbits;
527		bshift = gshift + gbits;
528	} else {
529		return false;
530	}
531
532	if (rbits) {
533		*red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits);
534		while (rbits < 16) {
535			*red |= *red >> rbits;
536			rbits <<= 1;
537		}
538	} else
539		*red = 0;
540
541	if (gbits) {
542		*green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits);
543		while (gbits < 16) {
544			*green |= *green >> gbits;
545			gbits <<= 1;
546		}
547	} else
548		*green = 0;
549
550	if (bbits) {
551		*blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits);
552		while (bbits < 16) {
553			*blue |= *blue >> bbits;
554			bbits <<= 1;
555		}
556	} else
557		*blue = 0;
558
559	if (abits) {
560		*alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits);
561		while (abits < 16) {
562			*alpha |= *alpha >> abits;
563			abits <<= 1;
564		}
565	} else
566		*alpha = 0xffff;
567
568	return true;
569}
570
571bool
572_sna_get_pixel_from_rgba(uint32_t * pixel,
573			uint16_t red,
574			uint16_t green,
575			uint16_t blue,
576			uint16_t alpha,
577			uint32_t format)
578{
579	int rbits, bbits, gbits, abits;
580	int rshift, bshift, gshift, ashift;
581
582	rbits = PICT_FORMAT_R(format);
583	gbits = PICT_FORMAT_G(format);
584	bbits = PICT_FORMAT_B(format);
585	abits = PICT_FORMAT_A(format);
586	if (abits == 0)
587	    abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
588
589	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
590		*pixel = alpha >> (16 - abits);
591		return true;
592	}
593
594	if (!PICT_FORMAT_COLOR(format))
595		return false;
596
597	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
598		bshift = 0;
599		gshift = bbits;
600		rshift = gshift + gbits;
601		ashift = rshift + rbits;
602	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
603		rshift = 0;
604		gshift = rbits;
605		bshift = gshift + gbits;
606		ashift = bshift + bbits;
607	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
608		ashift = 0;
609		rshift = abits;
610		gshift = rshift + rbits;
611		bshift = gshift + gbits;
612	} else
613		return false;
614
615	*pixel = 0;
616	*pixel |= (blue  >> (16 - bbits)) << bshift;
617	*pixel |= (green >> (16 - gbits)) << gshift;
618	*pixel |= (red   >> (16 - rbits)) << rshift;
619	*pixel |= (alpha >> (16 - abits)) << ashift;
620
621	return true;
622}
623
624uint32_t
625sna_rgba_for_color(uint32_t color, int depth)
626{
627	return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8);
628}
629
630uint32_t
631sna_rgba_to_color(uint32_t rgba, uint32_t format)
632{
633	return color_convert(rgba, PICT_a8r8g8b8, format);
634}
635
636static uint32_t
637get_pixel(PicturePtr picture)
638{
639	PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
640
641	DBG(("%s: %p\n", __FUNCTION__, pixmap));
642
643	if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
644		return 0;
645
646	switch (pixmap->drawable.bitsPerPixel) {
647	case 32: return *(uint32_t *)pixmap->devPrivate.ptr;
648	case 16: return *(uint16_t *)pixmap->devPrivate.ptr;
649	default: return *(uint8_t *)pixmap->devPrivate.ptr;
650	}
651}
652
653static uint32_t
654get_solid_color(PicturePtr picture, uint32_t format)
655{
656	if (picture->pSourcePict) {
657		PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict;
658		return color_convert(fill->color, PICT_a8r8g8b8, format);
659	} else
660		return color_convert(get_pixel(picture), picture->format, format);
661}
662
663static bool
664is_solid(PicturePtr picture)
665{
666	if (picture->pSourcePict) {
667		if (picture->pSourcePict->type == SourcePictTypeSolidFill)
668			return true;
669	}
670
671	if (picture->pDrawable) {
672		if (picture->pDrawable->width  == 1 &&
673		    picture->pDrawable->height == 1 &&
674		    picture->repeat)
675			return true;
676	}
677
678	return false;
679}
680
681bool
682sna_picture_is_solid(PicturePtr picture, uint32_t *color)
683{
684	if (!is_solid(picture))
685		return false;
686
687	if (color)
688		*color = get_solid_color(picture, PICT_a8r8g8b8);
689	return true;
690}
691
692static bool
693pixel_is_opaque(uint32_t pixel, uint32_t format)
694{
695	unsigned int abits;
696
697	abits = PICT_FORMAT_A(format);
698	if (!abits)
699		return true;
700
701	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
702	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
703		return (pixel & ((1 << abits) - 1)) == (unsigned)((1 << abits) - 1);
704	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
705		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
706		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
707		return (pixel >> ashift) == (unsigned)((1 << abits) - 1);
708	} else
709		return false;
710}
711
712static bool
713pixel_is_white(uint32_t pixel, uint32_t format)
714{
715	switch (PICT_FORMAT_TYPE(format)) {
716	case PICT_TYPE_A:
717	case PICT_TYPE_ARGB:
718	case PICT_TYPE_ABGR:
719	case PICT_TYPE_BGRA:
720		return pixel == ((1U << PICT_FORMAT_BPP(format)) - 1);
721	default:
722		return false;
723	}
724}
725
726static bool
727is_opaque_solid(PicturePtr picture)
728{
729	if (picture->pSourcePict) {
730		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
731		return (fill->color >> 24) == 0xff;
732	} else
733		return pixel_is_opaque(get_pixel(picture), picture->format);
734}
735
736static bool
737is_white(PicturePtr picture)
738{
739	if (picture->pSourcePict) {
740		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
741		return fill->color == 0xffffffff;
742	} else
743		return pixel_is_white(get_pixel(picture), picture->format);
744}
745
746bool
747sna_composite_mask_is_opaque(PicturePtr mask)
748{
749	if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format))
750		return is_solid(mask) && is_white(mask);
751	else if (!PICT_FORMAT_A(mask->format))
752		return true;
753	else
754		return is_solid(mask) && is_opaque_solid(mask);
755}
756
757fastcall
758static void blt_composite_fill(struct sna *sna,
759			       const struct sna_composite_op *op,
760			       const struct sna_composite_rectangles *r)
761{
762	int x1, x2, y1, y2;
763
764	x1 = r->dst.x + op->dst.x;
765	y1 = r->dst.y + op->dst.y;
766	x2 = x1 + r->width;
767	y2 = y1 + r->height;
768
769	if (x1 < 0)
770		x1 = 0;
771	if (y1 < 0)
772		y1 = 0;
773
774	if (x2 > op->dst.width)
775		x2 = op->dst.width;
776	if (y2 > op->dst.height)
777		y2 = op->dst.height;
778
779	if (x2 <= x1 || y2 <= y1)
780		return;
781
782	sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1);
783}
784
785fastcall
786static void blt_composite_fill__cpu(struct sna *sna,
787				    const struct sna_composite_op *op,
788				    const struct sna_composite_rectangles *r)
789{
790	int x1, x2, y1, y2;
791
792	x1 = r->dst.x + op->dst.x;
793	y1 = r->dst.y + op->dst.y;
794	x2 = x1 + r->width;
795	y2 = y1 + r->height;
796
797	if (x1 < 0)
798		x1 = 0;
799	if (y1 < 0)
800		y1 = 0;
801
802	if (x2 > op->dst.width)
803		x2 = op->dst.width;
804	if (y2 > op->dst.height)
805		y2 = op->dst.height;
806
807	if (x2 <= x1 || y2 <= y1)
808		return;
809
810	pixman_fill(op->dst.pixmap->devPrivate.ptr,
811		    op->dst.pixmap->devKind / sizeof(uint32_t),
812		    op->dst.pixmap->drawable.bitsPerPixel,
813		    x1, y1, x2-x1, y2-y1,
814		    op->u.blt.pixel);
815}
816
817fastcall static void
818blt_composite_fill_box__cpu(struct sna *sna,
819			    const struct sna_composite_op *op,
820			    const BoxRec *box)
821{
822	pixman_fill(op->dst.pixmap->devPrivate.ptr,
823		    op->dst.pixmap->devKind / sizeof(uint32_t),
824		    op->dst.pixmap->drawable.bitsPerPixel,
825		    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
826		    op->u.blt.pixel);
827}
828
829static void
830blt_composite_fill_boxes__cpu(struct sna *sna,
831			      const struct sna_composite_op *op,
832			      const BoxRec *box, int n)
833{
834	do {
835		pixman_fill(op->dst.pixmap->devPrivate.ptr,
836			    op->dst.pixmap->devKind / sizeof(uint32_t),
837			    op->dst.pixmap->drawable.bitsPerPixel,
838			    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
839			    op->u.blt.pixel);
840		box++;
841	} while (--n);
842}
843
844inline static void _sna_blt_fill_box(struct sna *sna,
845				     const struct sna_blt_state *blt,
846				     const BoxRec *box)
847{
848	struct kgem *kgem = &sna->kgem;
849	uint32_t *b;
850
851	DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__,
852	     box->x1, box->y1, box->x2, box->y2,
853	     blt->pixel));
854
855	assert(box->x1 >= 0);
856	assert(box->y1 >= 0);
857	assert(box->y2 * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
858
859	if (!kgem_check_batch(kgem, 3))
860		sna_blt_fill_begin(sna, blt);
861
862	b = kgem->batch + kgem->nbatch;
863	kgem->nbatch += 3;
864	assert(kgem->nbatch < kgem->surface);
865
866	b[0] = blt->cmd;
867	*(uint64_t *)(b+1) = *(const uint64_t *)box;
868}
869
870inline static void _sna_blt_fill_boxes(struct sna *sna,
871				       const struct sna_blt_state *blt,
872				       const BoxRec *box,
873				       int nbox)
874{
875	struct kgem *kgem = &sna->kgem;
876	uint32_t cmd = blt->cmd;
877
878	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
879
880	if (!kgem_check_batch(kgem, 3))
881		sna_blt_fill_begin(sna, blt);
882
883	do {
884		uint32_t *b = kgem->batch + kgem->nbatch;
885		int nbox_this_time;
886
887		nbox_this_time = nbox;
888		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
889			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
890		assert(nbox_this_time);
891		nbox -= nbox_this_time;
892
893		kgem->nbatch += 3 * nbox_this_time;
894		assert(kgem->nbatch < kgem->surface);
895		while (nbox_this_time >= 8) {
896			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
897			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
898			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
899			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
900			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
901			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
902			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
903			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
904			b += 24;
905			nbox_this_time -= 8;
906		}
907		if (nbox_this_time & 4) {
908			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
909			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
910			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
911			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
912			b += 12;
913		}
914		if (nbox_this_time & 2) {
915			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
916			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
917			b += 6;
918		}
919		if (nbox_this_time & 1) {
920			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
921		}
922
923		if (!nbox)
924			return;
925
926		sna_blt_fill_begin(sna, blt);
927	} while (1);
928}
929
930fastcall static void blt_composite_fill_box_no_offset(struct sna *sna,
931						      const struct sna_composite_op *op,
932						      const BoxRec *box)
933{
934	_sna_blt_fill_box(sna, &op->u.blt, box);
935}
936
937static void blt_composite_fill_boxes_no_offset(struct sna *sna,
938					       const struct sna_composite_op *op,
939					       const BoxRec *box, int n)
940{
941	_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
942}
943
944static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna,
945						       const struct sna_composite_op *op,
946						       const BoxRec *box, int nbox)
947{
948	struct kgem *kgem = &sna->kgem;
949	const struct sna_blt_state *blt = &op->u.blt;
950	uint32_t cmd = blt->cmd;
951
952	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
953
954	sna_vertex_lock(&sna->render);
955	if (!kgem_check_batch(kgem, 3)) {
956		sna_vertex_wait__locked(&sna->render);
957		sna_blt_fill_begin(sna, blt);
958	}
959
960	do {
961		uint32_t *b = kgem->batch + kgem->nbatch;
962		int nbox_this_time;
963
964		nbox_this_time = nbox;
965		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
966			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
967		assert(nbox_this_time);
968		nbox -= nbox_this_time;
969
970		kgem->nbatch += 3 * nbox_this_time;
971		assert(kgem->nbatch < kgem->surface);
972		sna_vertex_acquire__locked(&sna->render);
973		sna_vertex_unlock(&sna->render);
974
975		while (nbox_this_time >= 8) {
976			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
977			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
978			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
979			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
980			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
981			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
982			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
983			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
984			b += 24;
985			nbox_this_time -= 8;
986		}
987		if (nbox_this_time & 4) {
988			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
989			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
990			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
991			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
992			b += 12;
993		}
994		if (nbox_this_time & 2) {
995			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
996			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
997			b += 6;
998		}
999		if (nbox_this_time & 1) {
1000			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1001		}
1002
1003		sna_vertex_lock(&sna->render);
1004		sna_vertex_release__locked(&sna->render);
1005		if (!nbox)
1006			break;
1007
1008		sna_vertex_wait__locked(&sna->render);
1009		sna_blt_fill_begin(sna, blt);
1010	} while (1);
1011	sna_vertex_unlock(&sna->render);
1012}
1013
1014fastcall static void blt_composite_fill_box(struct sna *sna,
1015					    const struct sna_composite_op *op,
1016					    const BoxRec *box)
1017{
1018	sna_blt_fill_one(sna, &op->u.blt,
1019			 box->x1 + op->dst.x,
1020			 box->y1 + op->dst.y,
1021			 box->x2 - box->x1,
1022			 box->y2 - box->y1);
1023}
1024
1025static void blt_composite_fill_boxes(struct sna *sna,
1026				     const struct sna_composite_op *op,
1027				     const BoxRec *box, int n)
1028{
1029	do {
1030		sna_blt_fill_one(sna, &op->u.blt,
1031				 box->x1 + op->dst.x, box->y1 + op->dst.y,
1032				 box->x2 - box->x1, box->y2 - box->y1);
1033		box++;
1034	} while (--n);
1035}
1036
1037static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y)
1038{
1039	union {
1040		uint64_t v;
1041		int16_t i[4];
1042	} vi;
1043	vi.v = *(uint64_t *)b;
1044	vi.i[0] += x;
1045	vi.i[1] += y;
1046	vi.i[2] += x;
1047	vi.i[3] += y;
1048	return vi.v;
1049}
1050
1051static void blt_composite_fill_boxes__thread(struct sna *sna,
1052					     const struct sna_composite_op *op,
1053					     const BoxRec *box, int nbox)
1054{
1055	struct kgem *kgem = &sna->kgem;
1056	const struct sna_blt_state *blt = &op->u.blt;
1057	uint32_t cmd = blt->cmd;
1058	int16_t dx = op->dst.x;
1059	int16_t dy = op->dst.y;
1060
1061	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1062
1063	sna_vertex_lock(&sna->render);
1064	if (!kgem_check_batch(kgem, 3)) {
1065		sna_vertex_wait__locked(&sna->render);
1066		sna_blt_fill_begin(sna, blt);
1067	}
1068
1069	do {
1070		uint32_t *b = kgem->batch + kgem->nbatch;
1071		int nbox_this_time;
1072
1073		nbox_this_time = nbox;
1074		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1075			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
1076		assert(nbox_this_time);
1077		nbox -= nbox_this_time;
1078
1079		kgem->nbatch += 3 * nbox_this_time;
1080		assert(kgem->nbatch < kgem->surface);
1081		sna_vertex_acquire__locked(&sna->render);
1082		sna_vertex_unlock(&sna->render);
1083
1084		while (nbox_this_time >= 8) {
1085			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1086			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1087			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
1088			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
1089			b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy);
1090			b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy);
1091			b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy);
1092			b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy);
1093			b += 24;
1094			nbox_this_time -= 8;
1095		}
1096		if (nbox_this_time & 4) {
1097			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1098			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1099			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
1100			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
1101			b += 12;
1102		}
1103		if (nbox_this_time & 2) {
1104			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1105			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1106			b += 6;
1107		}
1108		if (nbox_this_time & 1) {
1109			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1110		}
1111
1112		sna_vertex_lock(&sna->render);
1113		sna_vertex_release__locked(&sna->render);
1114		if (!nbox)
1115			break;
1116
1117		sna_vertex_wait__locked(&sna->render);
1118		sna_blt_fill_begin(sna, blt);
1119	} while (1);
1120	sna_vertex_unlock(&sna->render);
1121}
1122
1123fastcall
1124static void blt_composite_nop(struct sna *sna,
1125			       const struct sna_composite_op *op,
1126			       const struct sna_composite_rectangles *r)
1127{
1128}
1129
1130fastcall static void blt_composite_nop_box(struct sna *sna,
1131					   const struct sna_composite_op *op,
1132					   const BoxRec *box)
1133{
1134}
1135
1136static void blt_composite_nop_boxes(struct sna *sna,
1137				    const struct sna_composite_op *op,
1138				    const BoxRec *box, int n)
1139{
1140}
1141
1142static bool
1143begin_blt(struct sna *sna,
1144	  struct sna_composite_op *op)
1145{
1146	if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) {
1147		kgem_submit(&sna->kgem);
1148		if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo))
1149			return false;
1150
1151		_kgem_set_mode(&sna->kgem, KGEM_BLT);
1152	}
1153
1154	return true;
1155}
1156
1157static bool
1158prepare_blt_nop(struct sna *sna,
1159		struct sna_composite_op *op)
1160{
1161	DBG(("%s\n", __FUNCTION__));
1162
1163	op->blt   = blt_composite_nop;
1164	op->box   = blt_composite_nop_box;
1165	op->boxes = blt_composite_nop_boxes;
1166	op->done  = nop_done;
1167	return true;
1168}
1169
1170static bool
1171prepare_blt_clear(struct sna *sna,
1172		  struct sna_composite_op *op)
1173{
1174	DBG(("%s\n", __FUNCTION__));
1175
1176	if (op->dst.bo == NULL) {
1177		op->blt   = blt_composite_fill__cpu;
1178		op->box   = blt_composite_fill_box__cpu;
1179		op->boxes = blt_composite_fill_boxes__cpu;
1180		op->thread_boxes = blt_composite_fill_boxes__cpu;
1181		op->done  = nop_done;
1182		op->u.blt.pixel = 0;
1183		return true;
1184	}
1185
1186	op->blt = blt_composite_fill;
1187	if (op->dst.x|op->dst.y) {
1188		op->box   = blt_composite_fill_box;
1189		op->boxes = blt_composite_fill_boxes;
1190		op->thread_boxes = blt_composite_fill_boxes__thread;
1191	} else {
1192		op->box   = blt_composite_fill_box_no_offset;
1193		op->boxes = blt_composite_fill_boxes_no_offset;
1194		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
1195	}
1196	op->done = nop_done;
1197
1198	if (!sna_blt_fill_init(sna, &op->u.blt,
1199				 op->dst.bo,
1200				 op->dst.pixmap->drawable.bitsPerPixel,
1201				 GXclear, 0))
1202		return false;
1203
1204	return begin_blt(sna, op);
1205}
1206
1207static bool
1208prepare_blt_fill(struct sna *sna,
1209		 struct sna_composite_op *op,
1210		 uint32_t pixel)
1211{
1212	DBG(("%s\n", __FUNCTION__));
1213
1214	if (op->dst.bo == NULL) {
1215		op->u.blt.pixel = pixel;
1216		op->blt = blt_composite_fill__cpu;
1217		op->box   = blt_composite_fill_box__cpu;
1218		op->boxes = blt_composite_fill_boxes__cpu;
1219		op->thread_boxes = blt_composite_fill_boxes__cpu;
1220		op->done = nop_done;
1221		return true;
1222	}
1223
1224	op->blt = blt_composite_fill;
1225	if (op->dst.x|op->dst.y) {
1226		op->box   = blt_composite_fill_box;
1227		op->boxes = blt_composite_fill_boxes;
1228		op->thread_boxes = blt_composite_fill_boxes__thread;
1229	} else {
1230		op->box   = blt_composite_fill_box_no_offset;
1231		op->boxes = blt_composite_fill_boxes_no_offset;
1232		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
1233	}
1234	op->done = nop_done;
1235
1236	if (!sna_blt_fill_init(sna, &op->u.blt, op->dst.bo,
1237			       op->dst.pixmap->drawable.bitsPerPixel,
1238			       GXcopy, pixel))
1239		return false;
1240
1241	return begin_blt(sna, op);
1242}
1243
1244fastcall static void
1245blt_composite_copy(struct sna *sna,
1246		   const struct sna_composite_op *op,
1247		   const struct sna_composite_rectangles *r)
1248{
1249	int x1, x2, y1, y2;
1250	int src_x, src_y;
1251
1252	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
1253	     __FUNCTION__,
1254	     r->src.x, r->src.y,
1255	     r->dst.x, r->dst.y,
1256	     r->width, r->height));
1257
1258	/* XXX higher layer should have clipped? */
1259
1260	x1 = r->dst.x + op->dst.x;
1261	y1 = r->dst.y + op->dst.y;
1262	x2 = x1 + r->width;
1263	y2 = y1 + r->height;
1264
1265	src_x = r->src.x - x1;
1266	src_y = r->src.y - y1;
1267
1268	/* clip against dst */
1269	if (x1 < 0)
1270		x1 = 0;
1271	if (y1 < 0)
1272		y1 = 0;
1273
1274	if (x2 > op->dst.width)
1275		x2 = op->dst.width;
1276
1277	if (y2 > op->dst.height)
1278		y2 = op->dst.height;
1279
1280	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
1281
1282	if (x2 <= x1 || y2 <= y1)
1283		return;
1284
1285	sna_blt_copy_one(sna, &op->u.blt,
1286			 x1 + src_x, y1 + src_y,
1287			 x2 - x1, y2 - y1,
1288			 x1, y1);
1289}
1290
1291fastcall static void blt_composite_copy_box(struct sna *sna,
1292					    const struct sna_composite_op *op,
1293					    const BoxRec *box)
1294{
1295	DBG(("%s: box (%d, %d), (%d, %d)\n",
1296	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1297	sna_blt_copy_one(sna, &op->u.blt,
1298			 box->x1 + op->u.blt.sx,
1299			 box->y1 + op->u.blt.sy,
1300			 box->x2 - box->x1,
1301			 box->y2 - box->y1,
1302			 box->x1 + op->dst.x,
1303			 box->y1 + op->dst.y);
1304}
1305
1306static void blt_composite_copy_boxes(struct sna *sna,
1307				     const struct sna_composite_op *op,
1308				     const BoxRec *box, int nbox)
1309{
1310	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1311	do {
1312		DBG(("%s: box (%d, %d), (%d, %d)\n",
1313		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1314		sna_blt_copy_one(sna, &op->u.blt,
1315				 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1316				 box->x2 - box->x1, box->y2 - box->y1,
1317				 box->x1 + op->dst.x, box->y1 + op->dst.y);
1318		box++;
1319	} while(--nbox);
1320}
1321
1322static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
1323{
1324	x += v & 0xffff;
1325	y += v >> 16;
1326	return (uint16_t)y << 16 | x;
1327}
1328
1329static void blt_composite_copy_boxes__thread(struct sna *sna,
1330					     const struct sna_composite_op *op,
1331					     const BoxRec *box, int nbox)
1332{
1333	struct kgem *kgem = &sna->kgem;
1334	int dst_dx = op->dst.x;
1335	int dst_dy = op->dst.y;
1336	int src_dx = op->src.offset[0];
1337	int src_dy = op->src.offset[1];
1338	uint32_t cmd = op->u.blt.cmd;
1339	uint32_t br13 = op->u.blt.br13;
1340	struct kgem_bo *src_bo = op->u.blt.bo[0];
1341	struct kgem_bo *dst_bo = op->u.blt.bo[1];
1342	int src_pitch = op->u.blt.pitch[0];
1343
1344	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1345
1346	sna_vertex_lock(&sna->render);
1347
1348	if ((dst_dx | dst_dy) == 0) {
1349		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
1350		do {
1351			int nbox_this_time;
1352
1353			nbox_this_time = nbox;
1354			if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1355				nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
1356			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1357				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1358			assert(nbox_this_time);
1359			nbox -= nbox_this_time;
1360
1361			do {
1362				uint32_t *b = kgem->batch + kgem->nbatch;
1363
1364				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1365				     __FUNCTION__,
1366				     box->x1, box->y1,
1367				     box->x2 - box->x1, box->y2 - box->y1));
1368
1369				assert(box->x1 + src_dx >= 0);
1370				assert(box->y1 + src_dy >= 0);
1371				assert(box->x1 + src_dx <= INT16_MAX);
1372				assert(box->y1 + src_dy <= INT16_MAX);
1373
1374				assert(box->x1 >= 0);
1375				assert(box->y1 >= 0);
1376
1377				*(uint64_t *)&b[0] = hdr;
1378				*(uint64_t *)&b[2] = *(const uint64_t *)box;
1379				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1380						      I915_GEM_DOMAIN_RENDER << 16 |
1381						      I915_GEM_DOMAIN_RENDER |
1382						      KGEM_RELOC_FENCED,
1383						      0);
1384				b[5] = add2(b[2], src_dx, src_dy);
1385				b[6] = src_pitch;
1386				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1387						      I915_GEM_DOMAIN_RENDER << 16 |
1388						      KGEM_RELOC_FENCED,
1389						      0);
1390				kgem->nbatch += 8;
1391				assert(kgem->nbatch < kgem->surface);
1392				box++;
1393			} while (--nbox_this_time);
1394
1395			if (!nbox)
1396				break;
1397
1398			_kgem_submit(kgem);
1399			_kgem_set_mode(kgem, KGEM_BLT);
1400		} while (1);
1401	} else {
1402		do {
1403			int nbox_this_time;
1404
1405			nbox_this_time = nbox;
1406			if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1407				nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
1408			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1409				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1410			assert(nbox_this_time);
1411			nbox -= nbox_this_time;
1412
1413			do {
1414				uint32_t *b = kgem->batch + kgem->nbatch;
1415
1416				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1417				     __FUNCTION__,
1418				     box->x1, box->y1,
1419				     box->x2 - box->x1, box->y2 - box->y1));
1420
1421				assert(box->x1 + src_dx >= 0);
1422				assert(box->y1 + src_dy >= 0);
1423
1424				assert(box->x1 + dst_dx >= 0);
1425				assert(box->y1 + dst_dy >= 0);
1426
1427				b[0] = cmd;
1428				b[1] = br13;
1429				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
1430				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
1431				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1432						      I915_GEM_DOMAIN_RENDER << 16 |
1433						      I915_GEM_DOMAIN_RENDER |
1434						      KGEM_RELOC_FENCED,
1435						      0);
1436				b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
1437				b[6] = src_pitch;
1438				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1439						      I915_GEM_DOMAIN_RENDER << 16 |
1440						      KGEM_RELOC_FENCED,
1441						      0);
1442				kgem->nbatch += 8;
1443				assert(kgem->nbatch < kgem->surface);
1444				box++;
1445			} while (--nbox_this_time);
1446
1447			if (!nbox)
1448				break;
1449
1450			_kgem_submit(kgem);
1451			_kgem_set_mode(kgem, KGEM_BLT);
1452		} while (1);
1453	}
1454	sna_vertex_unlock(&sna->render);
1455}
1456
1457fastcall static void
1458blt_composite_copy_with_alpha(struct sna *sna,
1459			      const struct sna_composite_op *op,
1460			      const struct sna_composite_rectangles *r)
1461{
1462	int x1, x2, y1, y2;
1463	int src_x, src_y;
1464
1465	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
1466	     __FUNCTION__,
1467	     r->src.x, r->src.y,
1468	     r->dst.x, r->dst.y,
1469	     r->width, r->height));
1470
1471	/* XXX higher layer should have clipped? */
1472
1473	x1 = r->dst.x + op->dst.x;
1474	y1 = r->dst.y + op->dst.y;
1475	x2 = x1 + r->width;
1476	y2 = y1 + r->height;
1477
1478	src_x = r->src.x - x1;
1479	src_y = r->src.y - y1;
1480
1481	/* clip against dst */
1482	if (x1 < 0)
1483		x1 = 0;
1484	if (y1 < 0)
1485		y1 = 0;
1486
1487	if (x2 > op->dst.width)
1488		x2 = op->dst.width;
1489
1490	if (y2 > op->dst.height)
1491		y2 = op->dst.height;
1492
1493	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
1494
1495	if (x2 <= x1 || y2 <= y1)
1496		return;
1497
1498	sna_blt_alpha_fixup_one(sna, &op->u.blt,
1499				x1 + src_x, y1 + src_y,
1500				x2 - x1, y2 - y1,
1501				x1, y1);
1502}
1503
1504fastcall static void
1505blt_composite_copy_box_with_alpha(struct sna *sna,
1506				  const struct sna_composite_op *op,
1507				  const BoxRec *box)
1508{
1509	DBG(("%s: box (%d, %d), (%d, %d)\n",
1510	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1511	sna_blt_alpha_fixup_one(sna, &op->u.blt,
1512				box->x1 + op->u.blt.sx,
1513				box->y1 + op->u.blt.sy,
1514				box->x2 - box->x1,
1515				box->y2 - box->y1,
1516				box->x1 + op->dst.x,
1517				box->y1 + op->dst.y);
1518}
1519
1520static void
1521blt_composite_copy_boxes_with_alpha(struct sna *sna,
1522				    const struct sna_composite_op *op,
1523				    const BoxRec *box, int nbox)
1524{
1525	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1526	do {
1527		DBG(("%s: box (%d, %d), (%d, %d)\n",
1528		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1529		sna_blt_alpha_fixup_one(sna, &op->u.blt,
1530					box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1531					box->x2 - box->x1, box->y2 - box->y1,
1532					box->x1 + op->dst.x, box->y1 + op->dst.y);
1533		box++;
1534	} while(--nbox);
1535}
1536
1537static bool
1538prepare_blt_copy(struct sna *sna,
1539		 struct sna_composite_op *op,
1540		 struct kgem_bo *bo,
1541		 uint32_t alpha_fixup)
1542{
1543	PixmapPtr src = op->u.blt.src_pixmap;
1544
1545	assert(op->dst.bo);
1546	assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo));
1547	assert(kgem_bo_can_blt(&sna->kgem, bo));
1548
1549	if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) {
1550		kgem_submit(&sna->kgem);
1551		if (!kgem_check_many_bo_fenced(&sna->kgem,
1552					       op->dst.bo, bo, NULL)) {
1553			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
1554			return false;
1555		}
1556		_kgem_set_mode(&sna->kgem, KGEM_BLT);
1557	}
1558
1559	DBG(("%s\n", __FUNCTION__));
1560
1561	if (sna->kgem.gen >= 060 && op->dst.bo == bo)
1562		op->done = gen6_blt_copy_done;
1563	else
1564		op->done = nop_done;
1565
1566	if (alpha_fixup) {
1567		op->blt   = blt_composite_copy_with_alpha;
1568		op->box   = blt_composite_copy_box_with_alpha;
1569		op->boxes = blt_composite_copy_boxes_with_alpha;
1570
1571		if (!sna_blt_alpha_fixup_init(sna, &op->u.blt, bo, op->dst.bo,
1572					      src->drawable.bitsPerPixel,
1573					      alpha_fixup))
1574			return false;
1575	} else {
1576		op->blt   = blt_composite_copy;
1577		op->box   = blt_composite_copy_box;
1578		op->boxes = blt_composite_copy_boxes;
1579		op->thread_boxes = blt_composite_copy_boxes__thread;
1580
1581		if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
1582				       src->drawable.bitsPerPixel,
1583				       GXcopy))
1584			return false;
1585	}
1586
1587	return true;
1588}
1589
1590fastcall static void
1591blt_put_composite__cpu(struct sna *sna,
1592		       const struct sna_composite_op *op,
1593		       const struct sna_composite_rectangles *r)
1594{
1595	PixmapPtr dst = op->dst.pixmap;
1596	PixmapPtr src = op->u.blt.src_pixmap;
1597	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
1598		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
1599		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
1600		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
1601		   r->width, r->height);
1602}
1603
1604fastcall static void
1605blt_put_composite_box__cpu(struct sna *sna,
1606			   const struct sna_composite_op *op,
1607			   const BoxRec *box)
1608{
1609	PixmapPtr dst = op->dst.pixmap;
1610	PixmapPtr src = op->u.blt.src_pixmap;
1611	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
1612		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
1613		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1614		   box->x1 + op->dst.x, box->y1 + op->dst.y,
1615		   box->x2-box->x1, box->y2-box->y1);
1616}
1617
1618static void
1619blt_put_composite_boxes__cpu(struct sna *sna,
1620			     const struct sna_composite_op *op,
1621			     const BoxRec *box, int n)
1622{
1623	PixmapPtr dst = op->dst.pixmap;
1624	PixmapPtr src = op->u.blt.src_pixmap;
1625	do {
1626		memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
1627			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
1628			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1629			   box->x1 + op->dst.x, box->y1 + op->dst.y,
1630			   box->x2-box->x1, box->y2-box->y1);
1631		box++;
1632	} while (--n);
1633}
1634
1635fastcall static void
1636blt_put_composite_with_alpha__cpu(struct sna *sna,
1637				  const struct sna_composite_op *op,
1638				  const struct sna_composite_rectangles *r)
1639{
1640	PixmapPtr dst = op->dst.pixmap;
1641	PixmapPtr src = op->u.blt.src_pixmap;
1642	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
1643		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
1644		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
1645		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
1646		   r->width, r->height,
1647		   0xffffffff, op->u.blt.pixel);
1648
1649}
1650
1651fastcall static void
1652blt_put_composite_box_with_alpha__cpu(struct sna *sna,
1653				      const struct sna_composite_op *op,
1654				      const BoxRec *box)
1655{
1656	PixmapPtr dst = op->dst.pixmap;
1657	PixmapPtr src = op->u.blt.src_pixmap;
1658	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
1659		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
1660		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1661		   box->x1 + op->dst.x, box->y1 + op->dst.y,
1662		   box->x2-box->x1, box->y2-box->y1,
1663		   0xffffffff, op->u.blt.pixel);
1664}
1665
1666static void
1667blt_put_composite_boxes_with_alpha__cpu(struct sna *sna,
1668					const struct sna_composite_op *op,
1669					const BoxRec *box, int n)
1670{
1671	PixmapPtr dst = op->dst.pixmap;
1672	PixmapPtr src = op->u.blt.src_pixmap;
1673	do {
1674		memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
1675			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
1676			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1677			   box->x1 + op->dst.x, box->y1 + op->dst.y,
1678			   box->x2-box->x1, box->y2-box->y1,
1679			   0xffffffff, op->u.blt.pixel);
1680		box++;
1681	} while (--n);
1682}
1683
1684fastcall static void
1685blt_put_composite(struct sna *sna,
1686		  const struct sna_composite_op *op,
1687		  const struct sna_composite_rectangles *r)
1688{
1689	PixmapPtr dst = op->dst.pixmap;
1690	PixmapPtr src = op->u.blt.src_pixmap;
1691	struct sna_pixmap *dst_priv = sna_pixmap(dst);
1692	int pitch = src->devKind;
1693	char *data = src->devPrivate.ptr;
1694	int bpp = src->drawable.bitsPerPixel;
1695
1696	int16_t dst_x = r->dst.x + op->dst.x;
1697	int16_t dst_y = r->dst.y + op->dst.y;
1698	int16_t src_x = r->src.x + op->u.blt.sx;
1699	int16_t src_y = r->src.y + op->u.blt.sy;
1700
1701	if (!dst_priv->pinned &&
1702	    dst_x <= 0 && dst_y <= 0 &&
1703	    dst_x + r->width >= op->dst.width &&
1704	    dst_y + r->height >= op->dst.height) {
1705		data += (src_x - dst_x) * bpp / 8;
1706		data += (src_y - dst_y) * pitch;
1707
1708		sna_replace(sna, op->dst.pixmap, &dst_priv->gpu_bo,
1709			    data, pitch);
1710	} else {
1711		BoxRec box;
1712		bool ok;
1713
1714		box.x1 = dst_x;
1715		box.y1 = dst_y;
1716		box.x2 = dst_x + r->width;
1717		box.y2 = dst_y + r->height;
1718
1719		ok = sna_write_boxes(sna, dst,
1720				     dst_priv->gpu_bo, 0, 0,
1721				     data, pitch, src_x, src_y,
1722				     &box, 1);
1723		assert(ok);
1724		(void)ok;
1725	}
1726}
1727
1728fastcall static void blt_put_composite_box(struct sna *sna,
1729					   const struct sna_composite_op *op,
1730					   const BoxRec *box)
1731{
1732	PixmapPtr src = op->u.blt.src_pixmap;
1733	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
1734
1735	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
1736	     op->u.blt.sx, op->u.blt.sy,
1737	     op->dst.x, op->dst.y));
1738
1739	if (!dst_priv->pinned &&
1740	    box->x2 - box->x1 == op->dst.width &&
1741	    box->y2 - box->y1 == op->dst.height) {
1742		int pitch = src->devKind;
1743		int bpp = src->drawable.bitsPerPixel / 8;
1744		char *data = src->devPrivate.ptr;
1745
1746		data += (box->y1 + op->u.blt.sy) * pitch;
1747		data += (box->x1 + op->u.blt.sx) * bpp;
1748
1749		sna_replace(sna, op->dst.pixmap, &dst_priv->gpu_bo,
1750			    data, pitch);
1751	} else {
1752		bool ok;
1753
1754		ok = sna_write_boxes(sna, op->dst.pixmap,
1755				     op->dst.bo, op->dst.x, op->dst.y,
1756				     src->devPrivate.ptr,
1757				     src->devKind,
1758				     op->u.blt.sx, op->u.blt.sy,
1759				     box, 1);
1760		assert(ok);
1761		(void)ok;
1762	}
1763}
1764
1765static void blt_put_composite_boxes(struct sna *sna,
1766				    const struct sna_composite_op *op,
1767				    const BoxRec *box, int n)
1768{
1769	PixmapPtr src = op->u.blt.src_pixmap;
1770	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
1771
1772	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
1773	     op->u.blt.sx, op->u.blt.sy,
1774	     op->dst.x, op->dst.y,
1775	     box->x1, box->y1, box->x2, box->y2, n));
1776
1777	if (n == 1 && !dst_priv->pinned &&
1778	    box->x2 - box->x1 == op->dst.width &&
1779	    box->y2 - box->y1 == op->dst.height) {
1780		int pitch = src->devKind;
1781		int bpp = src->drawable.bitsPerPixel / 8;
1782		char *data = src->devPrivate.ptr;
1783
1784		data += (box->y1 + op->u.blt.sy) * pitch;
1785		data += (box->x1 + op->u.blt.sx) * bpp;
1786
1787		sna_replace(sna, op->dst.pixmap, &dst_priv->gpu_bo,
1788			    data, pitch);
1789	} else {
1790		bool ok;
1791
1792		ok = sna_write_boxes(sna, op->dst.pixmap,
1793				     op->dst.bo, op->dst.x, op->dst.y,
1794				     src->devPrivate.ptr,
1795				     src->devKind,
1796				     op->u.blt.sx, op->u.blt.sy,
1797				     box, n);
1798		assert(ok);
1799		(void)ok;
1800	}
1801}
1802
1803fastcall static void
1804blt_put_composite_with_alpha(struct sna *sna,
1805			     const struct sna_composite_op *op,
1806			     const struct sna_composite_rectangles *r)
1807{
1808	PixmapPtr dst = op->dst.pixmap;
1809	PixmapPtr src = op->u.blt.src_pixmap;
1810	struct sna_pixmap *dst_priv = sna_pixmap(dst);
1811	int pitch = src->devKind;
1812	char *data = src->devPrivate.ptr;
1813
1814	int16_t dst_x = r->dst.x + op->dst.x;
1815	int16_t dst_y = r->dst.y + op->dst.y;
1816	int16_t src_x = r->src.x + op->u.blt.sx;
1817	int16_t src_y = r->src.y + op->u.blt.sy;
1818
1819	if (!dst_priv->pinned &&
1820	    dst_x <= 0 && dst_y <= 0 &&
1821	    dst_x + r->width >= op->dst.width &&
1822	    dst_y + r->height >= op->dst.height) {
1823		int bpp = dst->drawable.bitsPerPixel / 8;
1824
1825		data += (src_x - dst_x) * bpp;
1826		data += (src_y - dst_y) * pitch;
1827
1828		dst_priv->gpu_bo =
1829			sna_replace__xor(sna, op->dst.pixmap, dst_priv->gpu_bo,
1830					 data, pitch,
1831					 0xffffffff, op->u.blt.pixel);
1832	} else {
1833		BoxRec box;
1834
1835		box.x1 = dst_x;
1836		box.y1 = dst_y;
1837		box.x2 = dst_x + r->width;
1838		box.y2 = dst_y + r->height;
1839
1840		sna_write_boxes__xor(sna, dst,
1841				     dst_priv->gpu_bo, 0, 0,
1842				     data, pitch, src_x, src_y,
1843				     &box, 1,
1844				     0xffffffff, op->u.blt.pixel);
1845	}
1846}
1847
1848fastcall static void
1849blt_put_composite_box_with_alpha(struct sna *sna,
1850				 const struct sna_composite_op *op,
1851				 const BoxRec *box)
1852{
1853	PixmapPtr src = op->u.blt.src_pixmap;
1854	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
1855
1856	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
1857	     op->u.blt.sx, op->u.blt.sy,
1858	     op->dst.x, op->dst.y));
1859
1860	if (!dst_priv->pinned &&
1861	    box->x2 - box->x1 == op->dst.width &&
1862	    box->y2 - box->y1 == op->dst.height) {
1863		int pitch = src->devKind;
1864		int bpp = src->drawable.bitsPerPixel / 8;
1865		char *data = src->devPrivate.ptr;
1866
1867		data += (box->y1 + op->u.blt.sy) * pitch;
1868		data += (box->x1 + op->u.blt.sx) * bpp;
1869
1870		dst_priv->gpu_bo =
1871			sna_replace__xor(sna, op->dst.pixmap, op->dst.bo,
1872					 data, pitch,
1873					 0xffffffff, op->u.blt.pixel);
1874	} else {
1875		sna_write_boxes__xor(sna, op->dst.pixmap,
1876				     op->dst.bo, op->dst.x, op->dst.y,
1877				     src->devPrivate.ptr,
1878				     src->devKind,
1879				     op->u.blt.sx, op->u.blt.sy,
1880				     box, 1,
1881				     0xffffffff, op->u.blt.pixel);
1882	}
1883}
1884
1885static void
1886blt_put_composite_boxes_with_alpha(struct sna *sna,
1887				   const struct sna_composite_op *op,
1888				   const BoxRec *box, int n)
1889{
1890	PixmapPtr src = op->u.blt.src_pixmap;
1891	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
1892
1893	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
1894	     op->u.blt.sx, op->u.blt.sy,
1895	     op->dst.x, op->dst.y,
1896	     box->x1, box->y1, box->x2, box->y2, n));
1897
1898	if (n == 1 && !dst_priv->pinned &&
1899	    box->x2 - box->x1 == op->dst.width &&
1900	    box->y2 - box->y1 == op->dst.height) {
1901		int pitch = src->devKind;
1902		int bpp = src->drawable.bitsPerPixel / 8;
1903		char *data = src->devPrivate.ptr;
1904
1905		data += (box->y1 + op->u.blt.sy) * pitch;
1906		data += (box->x1 + op->u.blt.sx) * bpp;
1907
1908		dst_priv->gpu_bo =
1909			sna_replace__xor(sna, op->dst.pixmap, op->dst.bo,
1910					 data, pitch,
1911					 0xffffffff, op->u.blt.pixel);
1912	} else {
1913		sna_write_boxes__xor(sna, op->dst.pixmap,
1914				     op->dst.bo, op->dst.x, op->dst.y,
1915				     src->devPrivate.ptr,
1916				     src->devKind,
1917				     op->u.blt.sx, op->u.blt.sy,
1918				     box, n,
1919				     0xffffffff, op->u.blt.pixel);
1920	}
1921}
1922
1923static bool
1924prepare_blt_put(struct sna *sna,
1925		struct sna_composite_op *op,
1926		uint32_t alpha_fixup)
1927{
1928	DBG(("%s\n", __FUNCTION__));
1929
1930	if (op->dst.bo) {
1931		assert(op->dst.bo == sna_pixmap(op->dst.pixmap)->gpu_bo);
1932		if (alpha_fixup) {
1933			op->u.blt.pixel = alpha_fixup;
1934			op->blt   = blt_put_composite_with_alpha;
1935			op->box   = blt_put_composite_box_with_alpha;
1936			op->boxes = blt_put_composite_boxes_with_alpha;
1937		} else {
1938			op->blt   = blt_put_composite;
1939			op->box   = blt_put_composite_box;
1940			op->boxes = blt_put_composite_boxes;
1941		}
1942	} else {
1943		if (alpha_fixup) {
1944			op->u.blt.pixel = alpha_fixup;
1945			op->blt   = blt_put_composite_with_alpha__cpu;
1946			op->box   = blt_put_composite_box_with_alpha__cpu;
1947			op->boxes = blt_put_composite_boxes_with_alpha__cpu;
1948		} else {
1949			op->blt   = blt_put_composite__cpu;
1950			op->box   = blt_put_composite_box__cpu;
1951			op->boxes = blt_put_composite_boxes__cpu;
1952		}
1953	}
1954	op->done = nop_done;
1955
1956	return true;
1957}
1958
1959static bool
1960is_clear(PixmapPtr pixmap)
1961{
1962	struct sna_pixmap *priv = sna_pixmap(pixmap);
1963	return priv && priv->clear;
1964}
1965
1966bool
1967sna_blt_composite(struct sna *sna,
1968		  uint32_t op,
1969		  PicturePtr src,
1970		  PicturePtr dst,
1971		  int16_t x, int16_t y,
1972		  int16_t dst_x, int16_t dst_y,
1973		  int16_t width, int16_t height,
1974		  struct sna_composite_op *tmp,
1975		  bool fallback)
1976{
1977	PictFormat src_format = src->format;
1978	PixmapPtr src_pixmap;
1979	struct kgem_bo *bo;
1980	int16_t tx, ty;
1981	BoxRec dst_box, src_box;
1982	uint32_t alpha_fixup;
1983	uint32_t color, hint;
1984	bool was_clear;
1985	bool ret;
1986
1987#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
1988	return false;
1989#endif
1990
1991	DBG(("%s (%d, %d), (%d, %d), %dx%d\n",
1992	     __FUNCTION__, x, y, dst_x, dst_y, width, height));
1993
1994	switch (dst->pDrawable->bitsPerPixel) {
1995	case 8:
1996	case 16:
1997	case 32:
1998		break;
1999	default:
2000		DBG(("%s: unhandled bpp: %d\n", __FUNCTION__,
2001		     dst->pDrawable->bitsPerPixel));
2002		return false;
2003	}
2004
2005	was_clear = sna_drawable_is_clear(dst->pDrawable);
2006	tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2007
2008	if (width | height) {
2009		dst_box.x1 = dst_x;
2010		dst_box.x2 = bound(dst_x, width);
2011		dst_box.y1 = dst_y;
2012		dst_box.y2 = bound(dst_y, height);
2013	} else
2014		sna_render_picture_extents(dst, &dst_box);
2015
2016	tmp->dst.format = dst->format;
2017	tmp->dst.width = tmp->dst.pixmap->drawable.width;
2018	tmp->dst.height = tmp->dst.pixmap->drawable.height;
2019	get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap,
2020			    &tmp->dst.x, &tmp->dst.y);
2021
2022	if (op == PictOpClear) {
2023clear:
2024		if (was_clear)
2025			return prepare_blt_nop(sna, tmp);
2026
2027		hint = 0;
2028		if (can_render(sna)) {
2029			hint |= PREFER_GPU;
2030			if (dst->pCompositeClip->data == NULL && (width | height)) {
2031				hint |= IGNORE_CPU;
2032				if (width == tmp->dst.pixmap->drawable.width &&
2033				    height == tmp->dst.pixmap->drawable.height)
2034					hint |= REPLACES;
2035			}
2036		}
2037		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2038						  &dst_box, &tmp->damage);
2039		if (tmp->dst.bo && !kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2040			DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
2041			     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
2042			return false;
2043		}
2044
2045		if (!tmp->dst.bo) {
2046			RegionRec region;
2047
2048			region.extents = dst_box;
2049			region.data = NULL;
2050
2051			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
2052							     MOVE_INPLACE_HINT | MOVE_WRITE))
2053				return false;
2054		} else if (hint & REPLACES)
2055			kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2056
2057		return prepare_blt_clear(sna, tmp);
2058	}
2059
2060	if (is_solid(src)) {
2061		if (op == PictOpOver && is_opaque_solid(src))
2062			op = PictOpSrc;
2063		if (op == PictOpAdd && is_white(src))
2064			op = PictOpSrc;
2065		if (was_clear && (op == PictOpAdd || op == PictOpOver))
2066			op = PictOpSrc;
2067		if (op == PictOpOutReverse && is_opaque_solid(src))
2068			goto clear;
2069
2070		if (op != PictOpSrc) {
2071			DBG(("%s: unsupported op [%d] for blitting\n",
2072			     __FUNCTION__, op));
2073			return false;
2074		}
2075
2076		color = get_solid_color(src, tmp->dst.format);
2077fill:
2078		if (color == 0)
2079			goto clear;
2080
2081		hint = 0;
2082		if (can_render(sna)) {
2083			hint |= PREFER_GPU;
2084			if (dst->pCompositeClip->data == NULL && (width | height))
2085				hint |= IGNORE_CPU;
2086				if (width == tmp->dst.pixmap->drawable.width &&
2087				    height == tmp->dst.pixmap->drawable.height)
2088					hint |= REPLACES;
2089		}
2090		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2091						  &dst_box, &tmp->damage);
2092		if (tmp->dst.bo && !kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2093			DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
2094			     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
2095			return false;
2096		}
2097
2098		if (!tmp->dst.bo) {
2099			RegionRec region;
2100
2101			region.extents = dst_box;
2102			region.data = NULL;
2103
2104			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
2105							MOVE_INPLACE_HINT | MOVE_WRITE))
2106				return false;
2107		} else if (hint & REPLACES)
2108			kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2109
2110		return prepare_blt_fill(sna, tmp, color);
2111	}
2112
2113	if (!src->pDrawable) {
2114		DBG(("%s: unsupported procedural source\n",
2115		     __FUNCTION__));
2116		return false;
2117	}
2118
2119	if (src->filter == PictFilterConvolution) {
2120		DBG(("%s: convolutions filters not handled\n",
2121		     __FUNCTION__));
2122		return false;
2123	}
2124
2125	if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0)
2126		op = PictOpSrc;
2127
2128	if (op != PictOpSrc) {
2129		DBG(("%s: unsupported op [%d] for blitting\n",
2130		     __FUNCTION__, op));
2131		return false;
2132	}
2133
2134	if (!sna_transform_is_integer_translation(src->transform, &tx, &ty)) {
2135		DBG(("%s: source transform is not an integer translation\n",
2136		     __FUNCTION__));
2137		return false;
2138	}
2139	x += tx;
2140	y += ty;
2141
2142	if ((x >= src->pDrawable->width ||
2143	     y >= src->pDrawable->height ||
2144	     x + width  <= 0 ||
2145	     y + height <= 0) &&
2146	    (!src->repeat || src->repeatType == RepeatNone)) {
2147		DBG(("%s: source is outside of valid area, converting to clear\n",
2148		     __FUNCTION__));
2149		goto clear;
2150	}
2151
2152	src_pixmap = get_drawable_pixmap(src->pDrawable);
2153	if (is_clear(src_pixmap)) {
2154		color = color_convert(sna_pixmap(src_pixmap)->clear_color,
2155				      src->format, tmp->dst.format);
2156		goto fill;
2157	}
2158
2159	alpha_fixup = 0;
2160	if (!(dst->format == src_format ||
2161	      dst->format == alphaless(src_format) ||
2162	      (alphaless(dst->format) == alphaless(src_format) &&
2163	       sna_get_pixel_from_rgba(&alpha_fixup,
2164				       0, 0, 0, 0xffff,
2165				       dst->format)))) {
2166		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
2167		     __FUNCTION__, (unsigned)src_format, dst->format));
2168		return false;
2169	}
2170
2171	/* XXX tiling? fixup extend none? */
2172	if (x < 0 || y < 0 ||
2173	    x + width  > src->pDrawable->width ||
2174	    y + height > src->pDrawable->height) {
2175		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n",
2176		     __FUNCTION__,
2177		     x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType));
2178		if (src->repeat && src->repeatType == RepeatNormal) {
2179			x = x % src->pDrawable->width;
2180			y = y % src->pDrawable->height;
2181			if (x < 0)
2182				x += src->pDrawable->width;
2183			if (y < 0)
2184				y += src->pDrawable->height;
2185			if (x + width  > src->pDrawable->width ||
2186			    y + height > src->pDrawable->height)
2187				return false;
2188		} else
2189			return false;
2190	}
2191
2192	get_drawable_deltas(src->pDrawable, src_pixmap, &tx, &ty);
2193	x += tx + src->pDrawable->x;
2194	y += ty + src->pDrawable->y;
2195	if (x < 0 || y < 0 ||
2196	    x + width  > src_pixmap->drawable.width ||
2197	    y + height > src_pixmap->drawable.height) {
2198		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid pixmap %dx%d\n",
2199		     __FUNCTION__,
2200		     x, y, x+width, y+width, src_pixmap->drawable.width, src_pixmap->drawable.height));
2201		return false;
2202	}
2203
2204	tmp->u.blt.src_pixmap = src_pixmap;
2205	tmp->u.blt.sx = x - dst_x;
2206	tmp->u.blt.sy = y - dst_y;
2207	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
2208	     __FUNCTION__,
2209	     tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
2210
2211	src_box.x1 = x;
2212	src_box.y1 = y;
2213	src_box.x2 = x + width;
2214	src_box.y2 = y + height;
2215	bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
2216	if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) {
2217		DBG(("%s: can not blit from src size=%dx%d, tiling? %d, pitch? %d\n",
2218		     __FUNCTION__,
2219		     src_pixmap->drawable.width  < sna->render.max_3d_size,
2220		     src_pixmap->drawable.height < sna->render.max_3d_size,
2221		     bo->tiling, bo->pitch));
2222
2223		if (src_pixmap->drawable.width  <= sna->render.max_3d_size &&
2224		    src_pixmap->drawable.height <= sna->render.max_3d_size &&
2225		    bo->pitch <= sna->render.max_3d_pitch &&
2226		    !fallback)
2227		{
2228			return false;
2229		}
2230
2231		bo = NULL;
2232	}
2233
2234	hint = 0;
2235	if (bo || can_render(sna)) {
2236		hint |= PREFER_GPU;
2237		if (dst->pCompositeClip->data == NULL && (width | height)) {
2238			hint |= IGNORE_CPU;
2239			if (width == tmp->dst.pixmap->drawable.width &&
2240			    height == tmp->dst.pixmap->drawable.height)
2241				hint |= REPLACES;
2242		}
2243		if (bo)
2244			hint |= FORCE_GPU;
2245	}
2246	tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2247					  &dst_box, &tmp->damage);
2248
2249	if (hint & REPLACES)
2250		kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2251
2252	ret = false;
2253	if (bo) {
2254		if (!tmp->dst.bo) {
2255			DBG(("%s: fallback -- unaccelerated read back\n",
2256			     __FUNCTION__));
2257			if (fallback || !kgem_bo_is_busy(bo))
2258				goto put;
2259		} else if (bo->snoop && tmp->dst.bo->snoop) {
2260			DBG(("%s: fallback -- can not copy between snooped bo\n",
2261			     __FUNCTION__));
2262			goto put;
2263		} else if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2264			DBG(("%s: fallback -- unaccelerated upload\n",
2265			     __FUNCTION__));
2266			if (fallback || !kgem_bo_is_busy(bo))
2267				goto put;
2268		} else {
2269			ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup);
2270			if (fallback && !ret)
2271				goto put;
2272		}
2273	} else {
2274		RegionRec region;
2275
2276put:
2277		if (tmp->dst.bo == sna_pixmap(tmp->dst.pixmap)->cpu_bo) {
2278			tmp->dst.bo = NULL;
2279			tmp->damage = NULL;
2280		}
2281
2282		if (tmp->dst.bo == NULL) {
2283			hint = MOVE_INPLACE_HINT | MOVE_WRITE;
2284			if (dst->pCompositeClip->data)
2285				hint |= MOVE_READ;
2286
2287			region.extents = dst_box;
2288			region.data = NULL;
2289			if (!sna_drawable_move_region_to_cpu(dst->pDrawable,
2290							     &region, hint))
2291				return false;
2292
2293			assert(tmp->damage == NULL);
2294		}
2295
2296		region.extents = src_box;
2297		region.data = NULL;
2298		if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
2299						     &region, MOVE_READ))
2300			return false;
2301
2302		ret = prepare_blt_put(sna, tmp, alpha_fixup);
2303	}
2304
2305	return ret;
2306}
2307
2308static void convert_done(struct sna *sna, const struct sna_composite_op *op)
2309{
2310	struct kgem *kgem = &sna->kgem;
2311
2312	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
2313	if (kgem->nexec > 1 && __kgem_ring_empty(kgem))
2314		_kgem_submit(kgem);
2315
2316	kgem_bo_destroy(kgem, op->src.bo);
2317	sna_render_composite_redirect_done(sna, op);
2318}
2319
2320static void gen6_convert_done(struct sna *sna, const struct sna_composite_op *op)
2321{
2322	struct kgem *kgem = &sna->kgem;
2323
2324	if (kgem_check_batch(kgem, 3)) {
2325		uint32_t *b = kgem->batch + kgem->nbatch;
2326		b[0] = XY_SETUP_CLIP;
2327		b[1] = b[2] = 0;
2328		kgem->nbatch += 3;
2329		assert(kgem->nbatch < kgem->surface);
2330	}
2331
2332	convert_done(sna, op);
2333}
2334
2335bool
2336sna_blt_composite__convert(struct sna *sna,
2337			   int x, int y,
2338			   int width, int height,
2339			   struct sna_composite_op *tmp)
2340{
2341	uint32_t alpha_fixup;
2342	int sx, sy;
2343	uint8_t op;
2344
2345#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
2346	return false;
2347#endif
2348
2349	DBG(("%s src=%d, dst=%d (redirect? %d)\n", __FUNCTION__,
2350	     tmp->src.bo->handle, tmp->dst.bo->handle,
2351	     tmp->redirect.real_bo ? tmp->redirect.real_bo->handle : 0));
2352
2353	if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo) ||
2354	    !kgem_bo_can_blt(&sna->kgem, tmp->src.bo)) {
2355		DBG(("%s: cannot blt from src or to dst\n", __FUNCTION__));
2356		return false;
2357	}
2358
2359	if (tmp->src.transform) {
2360		DBG(("%s: transforms not handled by the BLT\n", __FUNCTION__));
2361		return false;
2362	}
2363
2364	if (tmp->src.filter == PictFilterConvolution) {
2365		DBG(("%s: convolutions filters not handled\n",
2366		     __FUNCTION__));
2367		return false;
2368	}
2369
2370	op = tmp->op;
2371	if (op == PictOpOver && PICT_FORMAT_A(tmp->src.pict_format) == 0)
2372		op = PictOpSrc;
2373	if (op != PictOpSrc) {
2374		DBG(("%s: unsupported op [%d] for blitting\n",
2375		     __FUNCTION__, op));
2376		return false;
2377	}
2378
2379	alpha_fixup = 0;
2380	if (!(tmp->dst.format == tmp->src.pict_format ||
2381	      tmp->dst.format == alphaless(tmp->src.pict_format) ||
2382	      (alphaless(tmp->dst.format) == alphaless(tmp->src.pict_format) &&
2383	       sna_get_pixel_from_rgba(&alpha_fixup,
2384				       0, 0, 0, 0xffff,
2385				       tmp->dst.format)))) {
2386		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
2387		     __FUNCTION__,
2388		     (unsigned)tmp->src.pict_format,
2389		     (unsigned)tmp->dst.format));
2390		return false;
2391	}
2392
2393	sx = tmp->src.offset[0];
2394	sy = tmp->src.offset[1];
2395
2396	x += sx;
2397	y += sy;
2398	if (x < 0 || y < 0 ||
2399	    x + width  > tmp->src.width ||
2400	    y + height > tmp->src.height) {
2401		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n",
2402		     __FUNCTION__,
2403		     x, y, x+width, y+width, tmp->src.width, tmp->src.height));
2404		if (tmp->src.repeat == RepeatNormal) {
2405			int xx = x % tmp->src.width;
2406			int yy = y % tmp->src.height;
2407			if (xx < 0)
2408				xx += tmp->src.width;
2409			if (yy < 0)
2410				yy += tmp->src.height;
2411			if (xx + width  > tmp->src.width ||
2412			    yy + height > tmp->src.height)
2413				return false;
2414
2415			sx += xx - x;
2416			sy += yy - y;
2417		} else
2418			return false;
2419	}
2420
2421	if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) {
2422		kgem_submit(&sna->kgem);
2423		if (!kgem_check_many_bo_fenced(&sna->kgem,
2424					       tmp->dst.bo, tmp->src.bo, NULL)) {
2425			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
2426			return false;
2427		}
2428		_kgem_set_mode(&sna->kgem, KGEM_BLT);
2429	}
2430
2431	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
2432	     __FUNCTION__,
2433	     tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
2434
2435	tmp->u.blt.src_pixmap = NULL;
2436	tmp->u.blt.sx = sx;
2437	tmp->u.blt.sy = sy;
2438
2439	if (alpha_fixup) {
2440		tmp->blt   = blt_composite_copy_with_alpha;
2441		tmp->box   = blt_composite_copy_box_with_alpha;
2442		tmp->boxes = blt_composite_copy_boxes_with_alpha;
2443
2444		if (!sna_blt_alpha_fixup_init(sna, &tmp->u.blt,
2445					      tmp->src.bo, tmp->dst.bo,
2446					      PICT_FORMAT_BPP(tmp->src.pict_format),
2447					      alpha_fixup))
2448			return false;
2449	} else {
2450		tmp->blt   = blt_composite_copy;
2451		tmp->box   = blt_composite_copy_box;
2452		tmp->boxes = blt_composite_copy_boxes;
2453		tmp->thread_boxes = blt_composite_copy_boxes__thread;
2454
2455		if (!sna_blt_copy_init(sna, &tmp->u.blt,
2456				       tmp->src.bo, tmp->dst.bo,
2457				       PICT_FORMAT_BPP(tmp->src.pict_format),
2458				       GXcopy))
2459			return false;
2460	}
2461
2462	tmp->done = convert_done;
2463	if (sna->kgem.gen >= 060 && tmp->src.bo == tmp->dst.bo)
2464		tmp->done = gen6_convert_done;
2465
2466	return true;
2467}
2468
2469static void sna_blt_fill_op_blt(struct sna *sna,
2470				const struct sna_fill_op *op,
2471				int16_t x, int16_t y,
2472				int16_t width, int16_t height)
2473{
2474	sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height);
2475}
2476
2477fastcall static void sna_blt_fill_op_box(struct sna *sna,
2478					 const struct sna_fill_op *op,
2479					 const BoxRec *box)
2480{
2481	_sna_blt_fill_box(sna, &op->base.u.blt, box);
2482}
2483
2484fastcall static void sna_blt_fill_op_boxes(struct sna *sna,
2485					   const struct sna_fill_op *op,
2486					   const BoxRec *box,
2487					   int nbox)
2488{
2489	_sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox);
2490}
2491
2492bool sna_blt_fill(struct sna *sna, uint8_t alu,
2493		  struct kgem_bo *bo, int bpp,
2494		  uint32_t pixel,
2495		  struct sna_fill_op *fill)
2496{
2497#if DEBUG_NO_BLT || NO_BLT_FILL
2498	return false;
2499#endif
2500
2501	DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp));
2502
2503	if (!kgem_bo_can_blt(&sna->kgem, bo)) {
2504		DBG(("%s: rejected due to incompatible Y-tiling\n",
2505		     __FUNCTION__));
2506		return false;
2507	}
2508
2509	if (!sna_blt_fill_init(sna, &fill->base.u.blt,
2510			       bo, bpp, alu, pixel))
2511		return false;
2512
2513	fill->blt   = sna_blt_fill_op_blt;
2514	fill->box   = sna_blt_fill_op_box;
2515	fill->boxes = sna_blt_fill_op_boxes;
2516	fill->done  =
2517		(void (*)(struct sna *, const struct sna_fill_op *))nop_done;
2518	return true;
2519}
2520
2521static void sna_blt_copy_op_blt(struct sna *sna,
2522				const struct sna_copy_op *op,
2523				int16_t src_x, int16_t src_y,
2524				int16_t width, int16_t height,
2525				int16_t dst_x, int16_t dst_y)
2526{
2527	sna_blt_copy_one(sna, &op->base.u.blt,
2528			 src_x, src_y,
2529			 width, height,
2530			 dst_x, dst_y);
2531}
2532
2533bool sna_blt_copy(struct sna *sna, uint8_t alu,
2534		  struct kgem_bo *src,
2535		  struct kgem_bo *dst,
2536		  int bpp,
2537		  struct sna_copy_op *op)
2538{
2539#if DEBUG_NO_BLT || NO_BLT_COPY
2540	return false;
2541#endif
2542
2543	if (!kgem_bo_can_blt(&sna->kgem, src))
2544		return false;
2545
2546	if (!kgem_bo_can_blt(&sna->kgem, dst))
2547		return false;
2548
2549	if (!sna_blt_copy_init(sna, &op->base.u.blt,
2550			       src, dst,
2551			       bpp, alu))
2552		return false;
2553
2554	op->blt  = sna_blt_copy_op_blt;
2555	if (sna->kgem.gen >= 060 && src == dst)
2556		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
2557			    gen6_blt_copy_done;
2558	else
2559		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
2560			    nop_done;
2561	return true;
2562}
2563
2564static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
2565			     struct kgem_bo *bo, int bpp,
2566			     uint32_t color,
2567			     const BoxRec *box)
2568{
2569	struct kgem *kgem = &sna->kgem;
2570	uint32_t br13, cmd, *b;
2571	bool overwrites;
2572
2573	assert(kgem_bo_can_blt (kgem, bo));
2574
2575	DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
2576	     box->x1, box->y1, box->x2, box->y2));
2577
2578	assert(box->x1 >= 0);
2579	assert(box->y1 >= 0);
2580
2581	cmd = XY_COLOR_BLT;
2582	br13 = bo->pitch;
2583	if (kgem->gen >= 040 && bo->tiling) {
2584		cmd |= BLT_DST_TILED;
2585		br13 >>= 2;
2586	}
2587	assert(br13 <= MAXSHORT);
2588
2589	br13 |= fill_ROP[alu] << 16;
2590	switch (bpp) {
2591	default: assert(0);
2592	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
2593		 br13 |= 1 << 25; /* RGB8888 */
2594	case 16: br13 |= 1 << 24; /* RGB565 */
2595	case 8: break;
2596	}
2597
2598	/* All too frequently one blt completely overwrites the previous */
2599	overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
2600	if (overwrites && kgem->nbatch >= 6 &&
2601	    kgem->batch[kgem->nbatch-6] == cmd &&
2602	    *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box &&
2603	    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
2604		DBG(("%s: replacing last fill\n", __FUNCTION__));
2605		kgem->batch[kgem->nbatch-5] = br13;
2606		kgem->batch[kgem->nbatch-1] = color;
2607		return true;
2608	}
2609	if (overwrites && kgem->nbatch >= 8 &&
2610	    (kgem->batch[kgem->nbatch-8] & 0xffc0000f) == XY_SRC_COPY_BLT_CMD &&
2611	    *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box &&
2612	    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
2613		DBG(("%s: replacing last copy\n", __FUNCTION__));
2614		kgem->batch[kgem->nbatch-8] = cmd;
2615		kgem->batch[kgem->nbatch-7] = br13;
2616		kgem->batch[kgem->nbatch-3] = color;
2617		/* Keep the src bo as part of the execlist, just remove
2618		 * its relocation entry.
2619		 */
2620		kgem->nreloc--;
2621		kgem->nbatch -= 2;
2622		return true;
2623	}
2624
2625	/* If we are currently emitting SCANLINES, keep doing so */
2626	if (sna->blt_state.fill_bo == bo->unique_id &&
2627	    sna->blt_state.fill_pixel == color &&
2628	    (sna->blt_state.fill_alu == alu ||
2629	     sna->blt_state.fill_alu == ~alu)) {
2630		DBG(("%s: matching last fill, converting to scanlines\n",
2631		     __FUNCTION__));
2632		return false;
2633	}
2634
2635	kgem_set_mode(kgem, KGEM_BLT, bo);
2636	if (!kgem_check_batch(kgem, 6) ||
2637	    !kgem_check_reloc(kgem, 1) ||
2638	    !kgem_check_bo_fenced(kgem, bo)) {
2639		kgem_submit(kgem);
2640		assert(kgem_check_bo_fenced(&sna->kgem, bo));
2641		_kgem_set_mode(kgem, KGEM_BLT);
2642	}
2643
2644	b = kgem->batch + kgem->nbatch;
2645	b[0] = cmd;
2646	b[1] = br13;
2647	*(uint64_t *)(b+2) = *(const uint64_t *)box;
2648	b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
2649			      I915_GEM_DOMAIN_RENDER << 16 |
2650			      I915_GEM_DOMAIN_RENDER |
2651			      KGEM_RELOC_FENCED,
2652			      0);
2653	b[5] = color;
2654	kgem->nbatch += 6;
2655	assert(kgem->nbatch < kgem->surface);
2656
2657	sna->blt_state.fill_bo = bo->unique_id;
2658	sna->blt_state.fill_pixel = color;
2659	sna->blt_state.fill_alu = ~alu;
2660	return true;
2661}
2662
2663bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
2664			struct kgem_bo *bo, int bpp,
2665			uint32_t pixel,
2666			const BoxRec *box, int nbox)
2667{
2668	struct kgem *kgem = &sna->kgem;
2669	uint32_t br13, cmd;
2670
2671#if DEBUG_NO_BLT || NO_BLT_FILL_BOXES
2672	return false;
2673#endif
2674
2675	DBG(("%s (%d, %08x, %d) x %d\n",
2676	     __FUNCTION__, bpp, pixel, alu, nbox));
2677
2678	if (!kgem_bo_can_blt(kgem, bo)) {
2679		DBG(("%s: fallback -- cannot blt to dst\n", __FUNCTION__));
2680		return false;
2681	}
2682
2683	if (alu == GXclear)
2684		pixel = 0;
2685	else if (alu == GXcopy) {
2686		if (pixel == 0)
2687			alu = GXclear;
2688		else if (pixel == -1)
2689			alu = GXset;
2690	}
2691
2692	if (nbox == 1 && sna_blt_fill_box(sna, alu, bo, bpp, pixel, box))
2693		return true;
2694
2695	br13 = bo->pitch;
2696	cmd = XY_SCANLINE_BLT;
2697	if (kgem->gen >= 040 && bo->tiling) {
2698		cmd |= 1 << 11;
2699		br13 >>= 2;
2700	}
2701	assert(br13 <= MAXSHORT);
2702
2703	br13 |= 1<<31 | fill_ROP[alu] << 16;
2704	switch (bpp) {
2705	default: assert(0);
2706	case 32: br13 |= 1 << 25; /* RGB8888 */
2707	case 16: br13 |= 1 << 24; /* RGB565 */
2708	case 8: break;
2709	}
2710
2711	kgem_set_mode(kgem, KGEM_BLT, bo);
2712	if (!kgem_check_batch(kgem, 12) ||
2713	    !kgem_check_bo_fenced(kgem, bo)) {
2714		kgem_submit(kgem);
2715		if (!kgem_check_bo_fenced(&sna->kgem, bo))
2716			return false;
2717		_kgem_set_mode(kgem, KGEM_BLT);
2718	}
2719
2720	if (sna->blt_state.fill_bo != bo->unique_id ||
2721	    sna->blt_state.fill_pixel != pixel ||
2722	    sna->blt_state.fill_alu != alu)
2723	{
2724		uint32_t *b;
2725
2726		if (!kgem_check_reloc(kgem, 1)) {
2727			_kgem_submit(kgem);
2728			_kgem_set_mode(kgem, KGEM_BLT);
2729		}
2730
2731		b = kgem->batch + kgem->nbatch;
2732		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
2733		if (bpp == 32)
2734			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
2735		b[1] = br13;
2736		b[2] = 0;
2737		b[3] = 0;
2738		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
2739				      I915_GEM_DOMAIN_RENDER << 16 |
2740				      I915_GEM_DOMAIN_RENDER |
2741				      KGEM_RELOC_FENCED,
2742				      0);
2743		b[5] = pixel;
2744		b[6] = pixel;
2745		b[7] = 0;
2746		b[8] = 0;
2747		kgem->nbatch += 9;
2748		assert(kgem->nbatch < kgem->surface);
2749
2750		sna->blt_state.fill_bo = bo->unique_id;
2751		sna->blt_state.fill_pixel = pixel;
2752		sna->blt_state.fill_alu = alu;
2753	}
2754
2755	do {
2756		int nbox_this_time;
2757
2758		nbox_this_time = nbox;
2759		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
2760			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
2761		assert(nbox_this_time);
2762		nbox -= nbox_this_time;
2763
2764		do {
2765			uint32_t *b;
2766
2767			DBG(("%s: (%d, %d), (%d, %d): %08x\n",
2768			     __FUNCTION__,
2769			     box->x1, box->y1,
2770			     box->x2, box->y2,
2771			     pixel));
2772
2773			assert(box->x1 >= 0);
2774			assert(box->y1 >= 0);
2775			assert(box->y2 * bo->pitch <= kgem_bo_size(bo));
2776
2777			b = kgem->batch + kgem->nbatch;
2778			kgem->nbatch += 3;
2779			assert(kgem->nbatch < kgem->surface);
2780			b[0] = cmd;
2781			*(uint64_t *)(b+1) = *(const uint64_t *)box;
2782			box++;
2783		} while (--nbox_this_time);
2784
2785		if (nbox) {
2786			uint32_t *b;
2787
2788			_kgem_submit(kgem);
2789			_kgem_set_mode(kgem, KGEM_BLT);
2790
2791			b = kgem->batch + kgem->nbatch;
2792			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
2793			if (bpp == 32)
2794				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
2795			b[1] = br13;
2796			b[2] = 0;
2797			b[3] = 0;
2798			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
2799					      I915_GEM_DOMAIN_RENDER << 16 |
2800					      I915_GEM_DOMAIN_RENDER |
2801					      KGEM_RELOC_FENCED,
2802					      0);
2803			b[5] = pixel;
2804			b[6] = pixel;
2805			b[7] = 0;
2806			b[8] = 0;
2807			kgem->nbatch += 9;
2808			assert(kgem->nbatch < kgem->surface);
2809		}
2810	} while (nbox);
2811
2812	if (kgem->nexec > 1 && __kgem_ring_empty(kgem))
2813		_kgem_submit(kgem);
2814
2815	return true;
2816}
2817
2818bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
2819			struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2820			struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2821			int bpp, const BoxRec *box, int nbox)
2822{
2823	struct kgem *kgem = &sna->kgem;
2824	unsigned src_pitch, br13, cmd;
2825
2826#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
2827	return false;
2828#endif
2829
2830	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
2831	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
2832	    src_bo->tiling, dst_bo->tiling,
2833	    src_bo->pitch, dst_bo->pitch));
2834
2835	if (!kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
2836		DBG(("%s: cannot blt to src? %d or dst? %d\n",
2837		     __FUNCTION__,
2838		     kgem_bo_can_blt(kgem, src_bo),
2839		     kgem_bo_can_blt(kgem, dst_bo)));
2840		return false;
2841	}
2842
2843	cmd = XY_SRC_COPY_BLT_CMD;
2844	if (bpp == 32)
2845		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
2846
2847	src_pitch = src_bo->pitch;
2848	if (kgem->gen >= 040 && src_bo->tiling) {
2849		cmd |= BLT_SRC_TILED;
2850		src_pitch >>= 2;
2851	}
2852	assert(src_pitch <= MAXSHORT);
2853
2854	br13 = dst_bo->pitch;
2855	if (kgem->gen >= 040 && dst_bo->tiling) {
2856		cmd |= BLT_DST_TILED;
2857		br13 >>= 2;
2858	}
2859	assert(br13 <= MAXSHORT);
2860
2861	br13 |= copy_ROP[alu] << 16;
2862	switch (bpp) {
2863	default: assert(0);
2864	case 32: br13 |= 1 << 25; /* RGB8888 */
2865	case 16: br13 |= 1 << 24; /* RGB565 */
2866	case 8: break;
2867	}
2868
2869	/* Compare first box against a previous fill */
2870	if (kgem->nbatch >= 6 &&
2871	    (alu == GXcopy || alu == GXclear || alu == GXset) &&
2872	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle &&
2873	    kgem->batch[kgem->nbatch-6] == ((cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT) &&
2874	    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
2875	    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
2876		DBG(("%s: deleting last fill\n", __FUNCTION__));
2877		kgem->nbatch -= 6;
2878		kgem->nreloc--;
2879	}
2880
2881	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
2882	if (!kgem_check_batch(kgem, 8) ||
2883	    !kgem_check_reloc(kgem, 2) ||
2884	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
2885		kgem_submit(kgem);
2886		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL))
2887			return sna_tiling_blt_copy_boxes(sna, alu,
2888							 src_bo, src_dx, src_dy,
2889							 dst_bo, dst_dx, dst_dy,
2890							 bpp, box, nbox);
2891		_kgem_set_mode(kgem, KGEM_BLT);
2892	}
2893
2894	if ((dst_dx | dst_dy) == 0) {
2895		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
2896		do {
2897			int nbox_this_time;
2898
2899			nbox_this_time = nbox;
2900			if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
2901				nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
2902			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
2903				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
2904			assert(nbox_this_time);
2905			nbox -= nbox_this_time;
2906
2907			do {
2908				uint32_t *b = kgem->batch + kgem->nbatch;
2909
2910				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
2911				     __FUNCTION__,
2912				     box->x1, box->y1,
2913				     box->x2 - box->x1, box->y2 - box->y1));
2914
2915				assert(box->x1 + src_dx >= 0);
2916				assert(box->y1 + src_dy >= 0);
2917				assert(box->x1 + src_dx <= INT16_MAX);
2918				assert(box->y1 + src_dy <= INT16_MAX);
2919
2920				assert(box->x1 >= 0);
2921				assert(box->y1 >= 0);
2922
2923				*(uint64_t *)&b[0] = hdr;
2924				*(uint64_t *)&b[2] = *(const uint64_t *)box;
2925				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
2926						      I915_GEM_DOMAIN_RENDER << 16 |
2927						      I915_GEM_DOMAIN_RENDER |
2928						      KGEM_RELOC_FENCED,
2929						      0);
2930				b[5] = add2(b[2], src_dx, src_dy);
2931				b[6] = src_pitch;
2932				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
2933						      I915_GEM_DOMAIN_RENDER << 16 |
2934						      KGEM_RELOC_FENCED,
2935						      0);
2936				kgem->nbatch += 8;
2937				assert(kgem->nbatch < kgem->surface);
2938				box++;
2939			} while (--nbox_this_time);
2940
2941			if (!nbox)
2942				break;
2943
2944			_kgem_submit(kgem);
2945			_kgem_set_mode(kgem, KGEM_BLT);
2946		} while (1);
2947	} else {
2948		do {
2949			int nbox_this_time;
2950
2951			nbox_this_time = nbox;
2952			if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
2953				nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
2954			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
2955				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
2956			assert(nbox_this_time);
2957			nbox -= nbox_this_time;
2958
2959			do {
2960				uint32_t *b = kgem->batch + kgem->nbatch;
2961
2962				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
2963				     __FUNCTION__,
2964				     box->x1, box->y1,
2965				     box->x2 - box->x1, box->y2 - box->y1));
2966
2967				assert(box->x1 + src_dx >= 0);
2968				assert(box->y1 + src_dy >= 0);
2969
2970				assert(box->x1 + dst_dx >= 0);
2971				assert(box->y1 + dst_dy >= 0);
2972
2973				b[0] = cmd;
2974				b[1] = br13;
2975				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
2976				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
2977				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
2978						      I915_GEM_DOMAIN_RENDER << 16 |
2979						      I915_GEM_DOMAIN_RENDER |
2980						      KGEM_RELOC_FENCED,
2981						      0);
2982				b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
2983				b[6] = src_pitch;
2984				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
2985						      I915_GEM_DOMAIN_RENDER << 16 |
2986						      KGEM_RELOC_FENCED,
2987						      0);
2988				kgem->nbatch += 8;
2989				assert(kgem->nbatch < kgem->surface);
2990				box++;
2991			} while (--nbox_this_time);
2992
2993			if (!nbox)
2994				break;
2995
2996			_kgem_submit(kgem);
2997			_kgem_set_mode(kgem, KGEM_BLT);
2998		} while (1);
2999	}
3000
3001	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
3002		_kgem_submit(kgem);
3003	} else if (kgem->gen >= 060 && kgem_check_batch(kgem, 3)) {
3004		uint32_t *b = kgem->batch + kgem->nbatch;
3005		b[0] = XY_SETUP_CLIP;
3006		b[1] = b[2] = 0;
3007		kgem->nbatch += 3;
3008		assert(kgem->nbatch < kgem->surface);
3009	}
3010
3011	sna->blt_state.fill_bo = 0;
3012	return true;
3013}
3014
3015static void box_extents(const BoxRec *box, int n, BoxRec *extents)
3016{
3017	*extents = *box;
3018	while (--n) {
3019		box++;
3020		if (box->x1 < extents->x1)
3021			extents->x1 = box->x1;
3022		if (box->y1 < extents->y1)
3023			extents->y1 = box->y1;
3024
3025		if (box->x2 > extents->x2)
3026			extents->x2 = box->x2;
3027		if (box->y2 > extents->y2)
3028			extents->y2 = box->y2;
3029	}
3030}
3031
3032bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
3033				 PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
3034				 PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
3035				 const BoxRec *box, int nbox)
3036{
3037	struct kgem_bo *free_bo = NULL;
3038	bool ret;
3039
3040	DBG(("%s: alu=%d, n=%d\n", __FUNCTION__, alu, nbox));
3041
3042	if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) {
3043		DBG(("%s: mismatching depths %d -> %d\n",
3044		     __FUNCTION__, src->drawable.depth, dst->drawable.depth));
3045		return false;
3046	}
3047
3048	if (src_bo == dst_bo) {
3049		DBG(("%s: dst == src\n", __FUNCTION__));
3050
3051		if (src_bo->tiling == I915_TILING_Y &&
3052		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
3053			struct kgem_bo *bo;
3054
3055			DBG(("%s: src is Y-tiled\n", __FUNCTION__));
3056
3057			assert(src_bo == sna_pixmap(src)->gpu_bo);
3058			bo = sna_pixmap_change_tiling(src, I915_TILING_X);
3059			if (bo == NULL) {
3060				BoxRec extents;
3061
3062				DBG(("%s: y-tiling conversion failed\n",
3063				     __FUNCTION__));
3064
3065				box_extents(box, nbox, &extents);
3066				free_bo = kgem_create_2d(&sna->kgem,
3067							 extents.x2 - extents.x1,
3068							 extents.y2 - extents.y1,
3069							 src->drawable.bitsPerPixel,
3070							 I915_TILING_X, 0);
3071				if (free_bo == NULL) {
3072					DBG(("%s: fallback -- temp allocation failed\n",
3073					     __FUNCTION__));
3074					return false;
3075				}
3076
3077				if (!sna_blt_copy_boxes(sna, GXcopy,
3078							src_bo, src_dx, src_dy,
3079							free_bo, -extents.x1, -extents.y1,
3080							src->drawable.bitsPerPixel,
3081							box, nbox)) {
3082					DBG(("%s: fallback -- temp copy failed\n",
3083					     __FUNCTION__));
3084					kgem_bo_destroy(&sna->kgem, free_bo);
3085					return false;
3086				}
3087
3088				src_dx = -extents.x1;
3089				src_dy = -extents.y1;
3090				src_bo = free_bo;
3091			} else
3092				dst_bo = src_bo = bo;
3093		}
3094	} else {
3095		if (src_bo->tiling == I915_TILING_Y &&
3096		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
3097			DBG(("%s: src is y-tiled\n", __FUNCTION__));
3098			assert(src_bo == sna_pixmap(src)->gpu_bo);
3099			src_bo = sna_pixmap_change_tiling(src, I915_TILING_X);
3100			if (src_bo == NULL) {
3101				DBG(("%s: fallback -- src y-tiling conversion failed\n",
3102				     __FUNCTION__));
3103				return false;
3104			}
3105		}
3106
3107		if (dst_bo->tiling == I915_TILING_Y &&
3108		    kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) {
3109			DBG(("%s: dst is y-tiled\n", __FUNCTION__));
3110			assert(dst_bo == sna_pixmap(dst)->gpu_bo);
3111			dst_bo = sna_pixmap_change_tiling(dst, I915_TILING_X);
3112			if (dst_bo == NULL) {
3113				DBG(("%s: fallback -- dst y-tiling conversion failed\n",
3114				     __FUNCTION__));
3115				return false;
3116			}
3117		}
3118	}
3119
3120	ret =  sna_blt_copy_boxes(sna, alu,
3121				  src_bo, src_dx, src_dy,
3122				  dst_bo, dst_dx, dst_dy,
3123				  dst->drawable.bitsPerPixel,
3124				  box, nbox);
3125
3126	if (free_bo)
3127		kgem_bo_destroy(&sna->kgem, free_bo);
3128
3129	return ret;
3130}
3131