1/*
2 * Copyright (c) 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "sna.h"
33#include "sna_render.h"
34#include "sna_reg.h"
35
36#include <sys/mman.h>
37
38#define PITCH(x, y) ALIGN((x)*(y), 4)
39
40#define FORCE_INPLACE 0 /* 1 upload directly, -1 force indirect */
41
42/* XXX Need to avoid using GTT fenced access for I915_TILING_Y on 855GM */
43
44static inline bool upload_too_large(struct sna *sna, int width, int height)
45{
46	return width * height * 4 > sna->kgem.max_upload_tile_size;
47}
48
49static inline bool must_tile(struct sna *sna, int width, int height)
50{
51	return (width  > sna->render.max_3d_size ||
52		height > sna->render.max_3d_size ||
53		upload_too_large(sna, width, height));
54}
55
56static bool download_inplace__cpu(struct kgem *kgem,
57				  PixmapPtr p, struct kgem_bo *bo,
58				  const BoxRec *box, int nbox)
59{
60	BoxRec extents;
61
62	switch (bo->tiling) {
63	case I915_TILING_X:
64		if (!kgem->memcpy_from_tiled_x)
65			return false;
66	case I915_TILING_NONE:
67		break;
68	default:
69		return false;
70	}
71
72	if (!kgem_bo_can_map__cpu(kgem, bo, false))
73		return false;
74
75	if (kgem->has_llc)
76		return true;
77
78	extents = *box;
79	while (--nbox) {
80		++box;
81		if (box->x1 < extents.x1)
82			extents.x1 = box->x1;
83		if (box->x2 > extents.x2)
84			extents.x2 = box->x2;
85		extents.y2 = box->y2;
86	}
87
88	if (extents.x2 - extents.x1 == p->drawable.width &&
89	    extents.y2 - extents.y1 == p->drawable.height)
90		return true;
91
92	return __kgem_bo_size(bo) <= PAGE_SIZE;
93}
94
95static bool
96read_boxes_inplace__cpu(struct kgem *kgem,
97			PixmapPtr pixmap, struct kgem_bo *bo,
98			const BoxRec *box, int n)
99{
100	int bpp = pixmap->drawable.bitsPerPixel;
101	void *src, *dst = pixmap->devPrivate.ptr;
102	int src_pitch = bo->pitch;
103	int dst_pitch = pixmap->devKind;
104
105	if (!download_inplace__cpu(kgem, dst, bo, box, n))
106		return false;
107
108	if (bo->tiling == I915_TILING_Y)
109		return false;
110
111	assert(kgem_bo_can_map__cpu(kgem, bo, false));
112
113	src = kgem_bo_map__cpu(kgem, bo);
114	if (src == NULL)
115		return false;
116
117	kgem_bo_sync__cpu_full(kgem, bo, 0);
118
119	if (sigtrap_get())
120		return false;
121
122	DBG(("%s x %d\n", __FUNCTION__, n));
123
124	if (bo->tiling == I915_TILING_X) {
125		do {
126			memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch,
127					    box->x1, box->y1,
128					    box->x1, box->y1,
129					    box->x2 - box->x1, box->y2 - box->y1);
130			box++;
131		} while (--n);
132	} else {
133		do {
134			memcpy_blt(src, dst, bpp, src_pitch, dst_pitch,
135				   box->x1, box->y1,
136				   box->x1, box->y1,
137				   box->x2 - box->x1, box->y2 - box->y1);
138			box++;
139		} while (--n);
140	}
141
142	sigtrap_put();
143	return true;
144}
145
146static void read_boxes_inplace(struct kgem *kgem,
147			       PixmapPtr pixmap, struct kgem_bo *bo,
148			       const BoxRec *box, int n)
149{
150	int bpp = pixmap->drawable.bitsPerPixel;
151	void *src, *dst = pixmap->devPrivate.ptr;
152	int src_pitch = bo->pitch;
153	int dst_pitch = pixmap->devKind;
154
155	if (read_boxes_inplace__cpu(kgem, pixmap, bo, box, n))
156		return;
157
158	DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
159
160	if (!kgem_bo_can_map(kgem, bo))
161		return;
162
163	kgem_bo_submit(kgem, bo);
164
165	src = kgem_bo_map(kgem, bo);
166	if (src == NULL)
167		return;
168
169	if (sigtrap_get())
170		return;
171
172	assert(src != dst);
173	do {
174		DBG(("%s: copying box (%d, %d), (%d, %d)\n",
175		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
176
177		assert(box->x2 > box->x1);
178		assert(box->y2 > box->y1);
179
180		assert(box->x1 >= 0);
181		assert(box->y1 >= 0);
182		assert(box->x2 <= pixmap->drawable.width);
183		assert(box->y2 <= pixmap->drawable.height);
184
185		assert(box->x1 >= 0);
186		assert(box->y1 >= 0);
187		assert(box->x2 <= pixmap->drawable.width);
188		assert(box->y2 <= pixmap->drawable.height);
189
190		memcpy_blt(src, dst, bpp,
191			   src_pitch, dst_pitch,
192			   box->x1, box->y1,
193			   box->x1, box->y1,
194			   box->x2 - box->x1, box->y2 - box->y1);
195		box++;
196	} while (--n);
197
198	sigtrap_put();
199}
200
201static bool download_inplace(struct kgem *kgem,
202			     PixmapPtr p, struct kgem_bo *bo,
203			     const BoxRec *box, int nbox)
204{
205	bool cpu;
206
207	if (unlikely(kgem->wedged))
208		return true;
209
210	cpu = download_inplace__cpu(kgem, p, bo, box, nbox);
211	if (!cpu && !kgem_bo_can_map(kgem, bo))
212		return false;
213
214	if (FORCE_INPLACE)
215		return FORCE_INPLACE > 0;
216
217	if (cpu)
218		return true;
219
220	if (kgem->can_blt_cpu && kgem->max_cpu_size)
221		return false;
222
223	return !__kgem_bo_is_busy(kgem, bo);
224}
225
226void sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo,
227		    const BoxRec *box, int nbox)
228{
229	struct kgem *kgem = &sna->kgem;
230	struct kgem_bo *dst_bo;
231	BoxRec extents;
232	const BoxRec *tmp_box;
233	int tmp_nbox;
234	void *ptr;
235	int src_pitch, cpp, offset;
236	int n, cmd, br13;
237	bool can_blt;
238
239	DBG(("%s x %d, src=(handle=%d), dst=(size=(%d, %d)\n",
240	     __FUNCTION__, nbox, src_bo->handle,
241	     dst->drawable.width, dst->drawable.height));
242
243#ifndef NDEBUG
244	for (n = 0; n < nbox; n++) {
245		if (box[n].x1 < 0 || box[n].y1 < 0 ||
246		    box[n].x2 * dst->drawable.bitsPerPixel/8 > src_bo->pitch ||
247		    box[n].y2 * src_bo->pitch > kgem_bo_size(src_bo))
248		{
249			FatalError("source out-of-bounds box[%d]=(%d, %d), (%d, %d), pitch=%d, size=%d\n", n,
250				   box[n].x1, box[n].y1,
251				   box[n].x2, box[n].y2,
252				   src_bo->pitch, kgem_bo_size(src_bo));
253		}
254	}
255#endif
256
257	/* XXX The gpu is faster to perform detiling in bulk, but takes
258	 * longer to setup and retrieve the results, with an additional
259	 * copy. The long term solution is to use snoopable bo and avoid
260	 * this path.
261	 */
262
263	if (download_inplace(kgem, dst, src_bo, box, nbox)) {
264fallback:
265		read_boxes_inplace(kgem, dst, src_bo, box, nbox);
266		return;
267	}
268
269	can_blt = kgem_bo_can_blt(kgem, src_bo) &&
270		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
271	extents = box[0];
272	for (n = 1; n < nbox; n++) {
273		if (box[n].x1 < extents.x1)
274			extents.x1 = box[n].x1;
275		if (box[n].x2 > extents.x2)
276			extents.x2 = box[n].x2;
277
278		if (can_blt)
279			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
280
281		if (box[n].y1 < extents.y1)
282			extents.y1 = box[n].y1;
283		if (box[n].y2 > extents.y2)
284			extents.y2 = box[n].y2;
285	}
286	if (!can_blt && sna->render.max_3d_size == 0)
287		goto fallback;
288
289	if (kgem_bo_can_map(kgem, src_bo)) {
290		/* Is it worth detiling? */
291		if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096)
292			goto fallback;
293	}
294
295	/* Try to avoid switching rings... */
296	if (!can_blt || kgem->ring == KGEM_RENDER ||
297	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
298		DrawableRec tmp;
299
300		tmp.width  = extents.x2 - extents.x1;
301		tmp.height = extents.y2 - extents.y1;
302		tmp.depth  = dst->drawable.depth;
303		tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
304
305		assert(tmp.width);
306		assert(tmp.height);
307
308		if (must_tile(sna, tmp.width, tmp.height)) {
309			BoxRec tile, stack[64], *clipped, *c;
310			int step;
311
312			if (n > ARRAY_SIZE(stack)) {
313				clipped = malloc(sizeof(BoxRec) * n);
314				if (clipped == NULL)
315					goto fallback;
316			} else
317				clipped = stack;
318
319			step = MIN(sna->render.max_3d_size,
320				   8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
321			while (step * step * 4 > sna->kgem.max_upload_tile_size)
322				step /= 2;
323
324			DBG(("%s: tiling download, using %dx%d tiles\n",
325			     __FUNCTION__, step, step));
326			assert(step);
327
328			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
329				int y2 = tile.y1 + step;
330				if (y2 > extents.y2)
331					y2 = extents.y2;
332				tile.y2 = y2;
333
334				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
335					int x2 = tile.x1 + step;
336					if (x2 > extents.x2)
337						x2 = extents.x2;
338					tile.x2 = x2;
339
340					tmp.width  = tile.x2 - tile.x1;
341					tmp.height = tile.y2 - tile.y1;
342
343					c = clipped;
344					for (n = 0; n < nbox; n++) {
345						*c = box[n];
346						if (!box_intersect(c, &tile))
347							continue;
348
349						DBG(("%s: box(%d, %d), (%d, %d),, dst=(%d, %d)\n",
350						     __FUNCTION__,
351						     c->x1, c->y1,
352						     c->x2, c->y2,
353						     c->x1 - tile.x1,
354						     c->y1 - tile.y1));
355						c++;
356					}
357					if (c == clipped)
358						continue;
359
360					dst_bo = kgem_create_buffer_2d(kgem,
361								       tmp.width,
362								       tmp.height,
363								       tmp.bitsPerPixel,
364								       KGEM_BUFFER_LAST,
365								       &ptr);
366					if (!dst_bo) {
367						if (clipped != stack)
368							free(clipped);
369						goto fallback;
370					}
371
372					if (!sna->render.copy_boxes(sna, GXcopy,
373								    &dst->drawable, src_bo, 0, 0,
374								    &tmp, dst_bo, -tile.x1, -tile.y1,
375								    clipped, c-clipped, COPY_LAST)) {
376						kgem_bo_destroy(&sna->kgem, dst_bo);
377						if (clipped != stack)
378							free(clipped);
379						goto fallback;
380					}
381
382					kgem_bo_submit(&sna->kgem, dst_bo);
383					kgem_buffer_read_sync(kgem, dst_bo);
384
385					if (sigtrap_get() == 0) {
386						while (c-- != clipped) {
387							memcpy_blt(ptr, dst->devPrivate.ptr, tmp.bitsPerPixel,
388								   dst_bo->pitch, dst->devKind,
389								   c->x1 - tile.x1,
390								   c->y1 - tile.y1,
391								   c->x1, c->y1,
392								   c->x2 - c->x1,
393								   c->y2 - c->y1);
394						}
395						sigtrap_put();
396					}
397
398					kgem_bo_destroy(&sna->kgem, dst_bo);
399				}
400			}
401
402			if (clipped != stack)
403				free(clipped);
404		} else {
405			dst_bo = kgem_create_buffer_2d(kgem,
406						       tmp.width,
407						       tmp.height,
408						       tmp.bitsPerPixel,
409						       KGEM_BUFFER_LAST,
410						       &ptr);
411			if (!dst_bo)
412				goto fallback;
413
414			if (!sna->render.copy_boxes(sna, GXcopy,
415						    &dst->drawable, src_bo, 0, 0,
416						    &tmp, dst_bo, -extents.x1, -extents.y1,
417						    box, nbox, COPY_LAST)) {
418				kgem_bo_destroy(&sna->kgem, dst_bo);
419				goto fallback;
420			}
421
422			kgem_bo_submit(&sna->kgem, dst_bo);
423			kgem_buffer_read_sync(kgem, dst_bo);
424
425			if (sigtrap_get() == 0) {
426				for (n = 0; n < nbox; n++) {
427					memcpy_blt(ptr, dst->devPrivate.ptr, tmp.bitsPerPixel,
428						   dst_bo->pitch, dst->devKind,
429						   box[n].x1 - extents.x1,
430						   box[n].y1 - extents.y1,
431						   box[n].x1, box[n].y1,
432						   box[n].x2 - box[n].x1,
433						   box[n].y2 - box[n].y1);
434				}
435				sigtrap_put();
436			}
437
438			kgem_bo_destroy(&sna->kgem, dst_bo);
439		}
440		return;
441	}
442
443	/* count the total number of bytes to be read and allocate a bo */
444	cpp = dst->drawable.bitsPerPixel / 8;
445	offset = 0;
446	for (n = 0; n < nbox; n++) {
447		int height = box[n].y2 - box[n].y1;
448		int width = box[n].x2 - box[n].x1;
449		offset += PITCH(width, cpp) * height;
450	}
451
452	DBG(("    read buffer size=%d\n", offset));
453
454	dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr);
455	if (!dst_bo) {
456		read_boxes_inplace(kgem, dst, src_bo, box, nbox);
457		return;
458	}
459
460	cmd = XY_SRC_COPY_BLT_CMD;
461	src_pitch = src_bo->pitch;
462	if (kgem->gen >= 040 && src_bo->tiling) {
463		cmd |= BLT_SRC_TILED;
464		src_pitch >>= 2;
465	}
466
467	br13 = 0xcc << 16;
468	switch (cpp) {
469	default:
470	case 4: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
471		br13 |= 1 << 25; /* RGB8888 */
472	case 2: br13 |= 1 << 24; /* RGB565 */
473	case 1: break;
474	}
475
476	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
477	if (!kgem_check_batch(kgem, 10) ||
478	    !kgem_check_reloc_and_exec(kgem, 2) ||
479	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
480		kgem_submit(kgem);
481		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL))
482			goto fallback;
483		_kgem_set_mode(kgem, KGEM_BLT);
484	}
485	kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL);
486
487	tmp_nbox = nbox;
488	tmp_box = box;
489	offset = 0;
490	if (sna->kgem.gen >= 0100) {
491		cmd |= 8;
492		do {
493			int nbox_this_time, rem;
494
495			nbox_this_time = tmp_nbox;
496			rem = kgem_batch_space(kgem);
497			if (10*nbox_this_time > rem)
498				nbox_this_time = rem / 8;
499			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
500				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
501			assert(nbox_this_time);
502			tmp_nbox -= nbox_this_time;
503
504			assert(kgem->mode == KGEM_BLT);
505			for (n = 0; n < nbox_this_time; n++) {
506				int height = tmp_box[n].y2 - tmp_box[n].y1;
507				int width = tmp_box[n].x2 - tmp_box[n].x1;
508				int pitch = PITCH(width, cpp);
509				uint32_t *b = kgem->batch + kgem->nbatch;
510
511				DBG(("    blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
512				     offset,
513				     tmp_box[n].x1, tmp_box[n].y1,
514				     width, height, pitch));
515
516				assert(tmp_box[n].x1 >= 0);
517				assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
518				assert(tmp_box[n].y1 >= 0);
519				assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
520
521				b[0] = cmd;
522				b[1] = br13 | pitch;
523				b[2] = 0;
524				b[3] = height << 16 | width;
525				*(uint64_t *)(b+4) =
526					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
527							 I915_GEM_DOMAIN_RENDER << 16 |
528							 I915_GEM_DOMAIN_RENDER |
529							 KGEM_RELOC_FENCED,
530							 offset);
531				b[6] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
532				b[7] = src_pitch;
533				*(uint64_t *)(b+8) =
534					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
535							 I915_GEM_DOMAIN_RENDER << 16 |
536							 KGEM_RELOC_FENCED,
537							 0);
538				kgem->nbatch += 10;
539
540				offset += pitch * height;
541			}
542
543			_kgem_submit(kgem);
544			if (!tmp_nbox)
545				break;
546
547			_kgem_set_mode(kgem, KGEM_BLT);
548			kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL);
549			tmp_box += nbox_this_time;
550		} while (1);
551	} else {
552		cmd |= 6;
553		do {
554			int nbox_this_time, rem;
555
556			nbox_this_time = tmp_nbox;
557			rem = kgem_batch_space(kgem);
558			if (8*nbox_this_time > rem)
559				nbox_this_time = rem / 8;
560			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
561				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
562			assert(nbox_this_time);
563			tmp_nbox -= nbox_this_time;
564
565			assert(kgem->mode == KGEM_BLT);
566			for (n = 0; n < nbox_this_time; n++) {
567				int height = tmp_box[n].y2 - tmp_box[n].y1;
568				int width = tmp_box[n].x2 - tmp_box[n].x1;
569				int pitch = PITCH(width, cpp);
570				uint32_t *b = kgem->batch + kgem->nbatch;
571
572				DBG(("    blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
573				     offset,
574				     tmp_box[n].x1, tmp_box[n].y1,
575				     width, height, pitch));
576
577				assert(tmp_box[n].x1 >= 0);
578				assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
579				assert(tmp_box[n].y1 >= 0);
580				assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
581
582				b[0] = cmd;
583				b[1] = br13 | pitch;
584				b[2] = 0;
585				b[3] = height << 16 | width;
586				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
587						      I915_GEM_DOMAIN_RENDER << 16 |
588						      I915_GEM_DOMAIN_RENDER |
589						      KGEM_RELOC_FENCED,
590						      offset);
591				b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
592				b[6] = src_pitch;
593				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
594						      I915_GEM_DOMAIN_RENDER << 16 |
595						      KGEM_RELOC_FENCED,
596						      0);
597				kgem->nbatch += 8;
598
599				offset += pitch * height;
600			}
601
602			_kgem_submit(kgem);
603			if (!tmp_nbox)
604				break;
605
606			_kgem_set_mode(kgem, KGEM_BLT);
607			kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL);
608			tmp_box += nbox_this_time;
609		} while (1);
610	}
611	assert(offset == __kgem_buffer_size(dst_bo));
612
613	kgem_buffer_read_sync(kgem, dst_bo);
614
615	if (sigtrap_get() == 0) {
616		char *src = ptr;
617		do {
618			int height = box->y2 - box->y1;
619			int width  = box->x2 - box->x1;
620			int pitch = PITCH(width, cpp);
621
622			DBG(("    copy offset %lx [%08x...%08x...%08x]: (%d, %d) x (%d, %d), src pitch=%d, dst pitch=%d, bpp=%d\n",
623			     (long)((char *)src - (char *)ptr),
624			     *(uint32_t*)src, *(uint32_t*)(src+pitch*height/2 + pitch/2 - 4), *(uint32_t*)(src+pitch*height - 4),
625			     box->x1, box->y1,
626			     width, height,
627			     pitch, dst->devKind, cpp*8));
628
629			assert(box->x1 >= 0);
630			assert(box->x2 <= dst->drawable.width);
631			assert(box->y1 >= 0);
632			assert(box->y2 <= dst->drawable.height);
633
634			memcpy_blt(src, dst->devPrivate.ptr, cpp*8,
635				   pitch, dst->devKind,
636				   0, 0,
637				   box->x1, box->y1,
638				   width, height);
639			box++;
640
641			src += pitch * height;
642		} while (--nbox);
643		assert(src - (char *)ptr == __kgem_buffer_size(dst_bo));
644		sigtrap_put();
645	}
646	kgem_bo_destroy(kgem, dst_bo);
647	sna->blt_state.fill_bo = 0;
648}
649
650static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
651{
652	DBG(("%s: tiling=%d\n", __FUNCTION__, bo->tiling));
653	switch (bo->tiling) {
654	case I915_TILING_Y:
655		return false;
656	case I915_TILING_X:
657		if (!kgem->memcpy_to_tiled_x)
658			return false;
659	default:
660		break;
661	}
662
663	if (kgem->has_wc_mmap)
664		return true;
665
666	return kgem_bo_can_map__cpu(kgem, bo, true);
667}
668
669static bool
670write_boxes_inplace__tiled(struct kgem *kgem,
671                           const uint8_t *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
672                           struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
673                           const BoxRec *box, int n)
674{
675	uint8_t *dst;
676
677	if (bo->tiling == I915_TILING_Y)
678		return false;
679
680	assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true));
681
682	if (kgem_bo_can_map__cpu(kgem, bo, true)) {
683		dst = kgem_bo_map__cpu(kgem, bo);
684		if (dst == NULL)
685			return false;
686
687		kgem_bo_sync__cpu(kgem, bo);
688	} else {
689		dst = kgem_bo_map__wc(kgem, bo);
690		if (dst == NULL)
691			return false;
692
693		kgem_bo_sync__gtt(kgem, bo);
694	}
695
696	if (sigtrap_get())
697		return false;
698
699	if (bo->tiling) {
700		do {
701			memcpy_to_tiled_x(kgem, src, dst, bpp, stride, bo->pitch,
702					  box->x1 + src_dx, box->y1 + src_dy,
703					  box->x1 + dst_dx, box->y1 + dst_dy,
704					  box->x2 - box->x1, box->y2 - box->y1);
705			box++;
706		} while (--n);
707	} else {
708		do {
709			memcpy_blt(src, dst, bpp, stride, bo->pitch,
710				   box->x1 + src_dx, box->y1 + src_dy,
711				   box->x1 + dst_dx, box->y1 + dst_dy,
712				   box->x2 - box->x1, box->y2 - box->y1);
713			box++;
714		} while (--n);
715	}
716
717	sigtrap_put();
718	return true;
719}
720
721static bool write_boxes_inplace(struct kgem *kgem,
722				const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
723				struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
724				const BoxRec *box, int n)
725{
726	void *dst;
727
728	DBG(("%s x %d, handle=%d, tiling=%d\n",
729	     __FUNCTION__, n, bo->handle, bo->tiling));
730
731	if (upload_inplace__tiled(kgem, bo) &&
732	    write_boxes_inplace__tiled(kgem, src, stride, bpp, src_dx, src_dy,
733				       bo, dst_dx, dst_dy, box, n))
734		return true;
735
736	if (!kgem_bo_can_map(kgem, bo))
737		return false;
738
739	kgem_bo_submit(kgem, bo);
740
741	dst = kgem_bo_map(kgem, bo);
742	if (dst == NULL)
743		return false;
744
745	assert(dst != src);
746
747	if (sigtrap_get())
748		return false;
749
750	do {
751		DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
752		     box->x1 + src_dx, box->y1 + src_dy,
753		     box->x1 + dst_dx, box->y1 + dst_dy,
754		     box->x2 - box->x1, box->y2 - box->y1,
755		     bpp, stride, bo->pitch));
756
757		assert(box->x2 > box->x1);
758		assert(box->y2 > box->y1);
759
760		assert(box->x1 + dst_dx >= 0);
761		assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
762		assert(box->y1 + dst_dy >= 0);
763		assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
764
765		assert(box->x1 + src_dx >= 0);
766		assert((box->x2 + src_dx)*bpp <= 8*stride);
767		assert(box->y1 + src_dy >= 0);
768
769		memcpy_blt(src, dst, bpp,
770			   stride, bo->pitch,
771			   box->x1 + src_dx, box->y1 + src_dy,
772			   box->x1 + dst_dx, box->y1 + dst_dy,
773			   box->x2 - box->x1, box->y2 - box->y1);
774		box++;
775	} while (--n);
776
777	sigtrap_put();
778	return true;
779}
780
781static bool __upload_inplace(struct kgem *kgem,
782			     struct kgem_bo *bo,
783			     const BoxRec *box,
784			     int n, int bpp)
785{
786	unsigned int bytes;
787
788	if (FORCE_INPLACE)
789		return FORCE_INPLACE > 0;
790
791	if (bo->exec)
792		return false;
793
794	if (bo->flush)
795		return true;
796
797	if (kgem_bo_can_map__cpu(kgem, bo, true))
798		return true;
799
800	/* If we are writing through the GTT, check first if we might be
801	 * able to almagamate a series of small writes into a single
802	 * operation.
803	 */
804	bytes = 0;
805	while (n--) {
806		bytes += (box->x2 - box->x1) * (box->y2 - box->y1);
807		box++;
808	}
809	if (__kgem_bo_is_busy(kgem, bo))
810		return bytes * bpp >> 12 >= kgem->half_cpu_cache_pages;
811	else
812		return bytes * bpp >> 12;
813}
814
815static bool upload_inplace(struct kgem *kgem,
816			   struct kgem_bo *bo,
817			   const BoxRec *box,
818			   int n, int bpp)
819{
820	if (unlikely(kgem->wedged))
821		return true;
822
823	if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo))
824		return false;
825
826	return __upload_inplace(kgem, bo, box, n,bpp);
827}
828
829bool sna_write_boxes(struct sna *sna, PixmapPtr dst,
830		     struct kgem_bo * const dst_bo, int16_t const dst_dx, int16_t const dst_dy,
831		     const void * const src, int const stride, int16_t const src_dx, int16_t const src_dy,
832		     const BoxRec *box, int nbox)
833{
834	struct kgem *kgem = &sna->kgem;
835	struct kgem_bo *src_bo;
836	BoxRec extents;
837	void *ptr;
838	int offset;
839	int n, cmd, br13;
840	bool can_blt;
841
842	DBG(("%s x %d, src stride=%d,  src dx=(%d, %d)\n", __FUNCTION__, nbox, stride, src_dx, src_dy));
843
844	if (upload_inplace(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel) &&
845	    write_boxes_inplace(kgem,
846				src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
847				dst_bo, dst_dx, dst_dy,
848				box, nbox))
849		return true;
850
851	if (wedged(sna))
852		return false;
853
854	can_blt = kgem_bo_can_blt(kgem, dst_bo) &&
855		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
856	extents = box[0];
857	for (n = 1; n < nbox; n++) {
858		if (box[n].x1 < extents.x1)
859			extents.x1 = box[n].x1;
860		if (box[n].x2 > extents.x2)
861			extents.x2 = box[n].x2;
862
863		if (can_blt)
864			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
865
866		if (box[n].y1 < extents.y1)
867			extents.y1 = box[n].y1;
868		if (box[n].y2 > extents.y2)
869			extents.y2 = box[n].y2;
870	}
871	if (!can_blt && sna->render.max_3d_size == 0)
872		goto fallback;
873
874	/* Try to avoid switching rings... */
875	if (!can_blt || kgem->ring == KGEM_RENDER ||
876	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
877		DrawableRec tmp;
878
879		tmp.width  = extents.x2 - extents.x1;
880		tmp.height = extents.y2 - extents.y1;
881		tmp.depth  = dst->drawable.depth;
882		tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
883
884		assert(tmp.width);
885		assert(tmp.height);
886
887		DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n",
888		     __FUNCTION__,
889		     extents.x1, extents.y1,
890		     tmp.width, tmp.height,
891		     sna->render.max_3d_size, sna->render.max_3d_size));
892		if (must_tile(sna, tmp.width, tmp.height)) {
893			BoxRec tile, stack[64], *clipped;
894			int cpp, step;
895
896tile:
897			cpp = dst->drawable.bitsPerPixel / 8;
898			step = MIN(sna->render.max_3d_size,
899				   (MAXSHORT&~63) / cpp);
900			while (step * step * cpp > sna->kgem.max_upload_tile_size)
901				step /= 2;
902
903			if (step * cpp > 4096)
904				step = 4096 / cpp;
905			assert(step);
906
907			DBG(("%s: tiling upload, using %dx%d tiles\n",
908			     __FUNCTION__, step, step));
909
910			if (n > ARRAY_SIZE(stack)) {
911				clipped = malloc(sizeof(BoxRec) * n);
912				if (clipped == NULL)
913					goto fallback;
914			} else
915				clipped = stack;
916
917			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
918				int y2 = tile.y1 + step;
919				if (y2 > extents.y2)
920					y2 = extents.y2;
921				tile.y2 = y2;
922
923				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
924					int x2 = tile.x1 + step;
925					if (x2 > extents.x2)
926						x2 = extents.x2;
927					tile.x2 = x2;
928
929					tmp.width  = tile.x2 - tile.x1;
930					tmp.height = tile.y2 - tile.y1;
931
932					src_bo = kgem_create_buffer_2d(kgem,
933								       tmp.width,
934								       tmp.height,
935								       tmp.bitsPerPixel,
936								       KGEM_BUFFER_WRITE_INPLACE,
937								       &ptr);
938					if (!src_bo) {
939						if (clipped != stack)
940							free(clipped);
941						goto fallback;
942					}
943
944					if (sigtrap_get() == 0) {
945						BoxRec *c = clipped;
946						for (n = 0; n < nbox; n++) {
947							*c = box[n];
948							if (!box_intersect(c, &tile))
949								continue;
950
951							DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
952							     __FUNCTION__,
953							     c->x1, c->y1,
954							     c->x2, c->y2,
955							     src_dx, src_dy,
956							     c->x1 - tile.x1,
957							     c->y1 - tile.y1));
958							memcpy_blt(src, ptr, tmp.bitsPerPixel,
959								   stride, src_bo->pitch,
960								   c->x1 + src_dx,
961								   c->y1 + src_dy,
962								   c->x1 - tile.x1,
963								   c->y1 - tile.y1,
964								   c->x2 - c->x1,
965								   c->y2 - c->y1);
966							c++;
967						}
968
969						if (c != clipped)
970							n = sna->render.copy_boxes(sna, GXcopy,
971										   &tmp, src_bo, -tile.x1, -tile.y1,
972										   &dst->drawable, dst_bo, dst_dx, dst_dy,
973										   clipped, c - clipped, 0);
974						else
975							n = 1;
976						sigtrap_put();
977					} else
978						n = 0;
979
980					kgem_bo_destroy(&sna->kgem, src_bo);
981
982					if (!n) {
983						if (clipped != stack)
984							free(clipped);
985						goto fallback;
986					}
987				}
988			}
989
990			if (clipped != stack)
991				free(clipped);
992		} else {
993			src_bo = kgem_create_buffer_2d(kgem,
994						       tmp.width,
995						       tmp.height,
996						       tmp.bitsPerPixel,
997						       KGEM_BUFFER_WRITE_INPLACE,
998						       &ptr);
999			if (!src_bo)
1000				goto fallback;
1001
1002			if (sigtrap_get() == 0) {
1003				for (n = 0; n < nbox; n++) {
1004					DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1005					     __FUNCTION__,
1006					     box[n].x1, box[n].y1,
1007					     box[n].x2, box[n].y2,
1008					     src_dx, src_dy,
1009					     box[n].x1 - extents.x1,
1010					     box[n].y1 - extents.y1));
1011					memcpy_blt(src, ptr, tmp.bitsPerPixel,
1012						   stride, src_bo->pitch,
1013						   box[n].x1 + src_dx,
1014						   box[n].y1 + src_dy,
1015						   box[n].x1 - extents.x1,
1016						   box[n].y1 - extents.y1,
1017						   box[n].x2 - box[n].x1,
1018						   box[n].y2 - box[n].y1);
1019				}
1020
1021				n = sna->render.copy_boxes(sna, GXcopy,
1022							   &tmp, src_bo, -extents.x1, -extents.y1,
1023							   &dst->drawable, dst_bo, dst_dx, dst_dy,
1024							   box, nbox, 0);
1025				sigtrap_put();
1026			} else
1027				n = 0;
1028
1029			kgem_bo_destroy(&sna->kgem, src_bo);
1030
1031			if (!n)
1032				goto tile;
1033		}
1034
1035		return true;
1036	}
1037
1038	cmd = XY_SRC_COPY_BLT_CMD;
1039	br13 = dst_bo->pitch;
1040	if (kgem->gen >= 040 && dst_bo->tiling) {
1041		cmd |= BLT_DST_TILED;
1042		br13 >>= 2;
1043	}
1044	br13 |= 0xcc << 16;
1045	switch (dst->drawable.bitsPerPixel) {
1046	default:
1047	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
1048		 br13 |= 1 << 25; /* RGB8888 */
1049	case 16: br13 |= 1 << 24; /* RGB565 */
1050	case 8: break;
1051	}
1052
1053	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
1054	if (!kgem_check_batch(kgem, 10) ||
1055	    !kgem_check_reloc_and_exec(kgem, 2) ||
1056	    !kgem_check_bo_fenced(kgem, dst_bo)) {
1057		kgem_submit(kgem);
1058		if (!kgem_check_bo_fenced(kgem, dst_bo))
1059			goto fallback;
1060		_kgem_set_mode(kgem, KGEM_BLT);
1061	}
1062	kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1063
1064	if (kgem->gen >= 0100) {
1065		cmd |= 8;
1066		do {
1067			int nbox_this_time, rem;
1068
1069			nbox_this_time = nbox;
1070			rem = kgem_batch_space(kgem);
1071			if (10*nbox_this_time > rem)
1072				nbox_this_time = rem / 8;
1073			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1074				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1075			assert(nbox_this_time);
1076			nbox -= nbox_this_time;
1077
1078			/* Count the total number of bytes to be read and allocate a
1079			 * single buffer large enough. Or if it is very small, combine
1080			 * with other allocations. */
1081			offset = 0;
1082			for (n = 0; n < nbox_this_time; n++) {
1083				int height = box[n].y2 - box[n].y1;
1084				int width = box[n].x2 - box[n].x1;
1085				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1086			}
1087
1088			src_bo = kgem_create_buffer(kgem, offset,
1089						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1090						    &ptr);
1091			if (!src_bo)
1092				break;
1093
1094			if (sigtrap_get() == 0) {
1095				offset = 0;
1096				do {
1097					int height = box->y2 - box->y1;
1098					int width = box->x2 - box->x1;
1099					int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1100					uint32_t *b;
1101
1102					DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1103					     __FUNCTION__,
1104					     box->x1 + src_dx, box->y1 + src_dy,
1105					     box->x1 + dst_dx, box->y1 + dst_dy,
1106					     width, height,
1107					     offset, pitch));
1108
1109					assert(box->x1 + src_dx >= 0);
1110					assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1111					assert(box->y1 + src_dy >= 0);
1112
1113					assert(box->x1 + dst_dx >= 0);
1114					assert(box->y1 + dst_dy >= 0);
1115
1116					memcpy_blt(src, (char *)ptr + offset,
1117						   dst->drawable.bitsPerPixel,
1118						   stride, pitch,
1119						   box->x1 + src_dx, box->y1 + src_dy,
1120						   0, 0,
1121						   width, height);
1122
1123					assert(kgem->mode == KGEM_BLT);
1124					b = kgem->batch + kgem->nbatch;
1125					b[0] = cmd;
1126					b[1] = br13;
1127					b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1128					b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1129					*(uint64_t *)(b+4) =
1130						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1131								 I915_GEM_DOMAIN_RENDER << 16 |
1132								 I915_GEM_DOMAIN_RENDER |
1133								 KGEM_RELOC_FENCED,
1134								 0);
1135					b[6] = 0;
1136					b[7] = pitch;
1137					*(uint64_t *)(b+8) =
1138						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1139								 I915_GEM_DOMAIN_RENDER << 16 |
1140								 KGEM_RELOC_FENCED,
1141								 offset);
1142					kgem->nbatch += 10;
1143
1144					box++;
1145					offset += pitch * height;
1146				} while (--nbox_this_time);
1147				assert(offset == __kgem_buffer_size(src_bo));
1148				sigtrap_put();
1149			}
1150
1151			if (nbox) {
1152				_kgem_submit(kgem);
1153				_kgem_set_mode(kgem, KGEM_BLT);
1154				kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1155			}
1156
1157			kgem_bo_destroy(kgem, src_bo);
1158		} while (nbox);
1159	} else {
1160		cmd |= 6;
1161		do {
1162			int nbox_this_time, rem;
1163
1164			nbox_this_time = nbox;
1165			rem = kgem_batch_space(kgem);
1166			if (8*nbox_this_time > rem)
1167				nbox_this_time = rem / 8;
1168			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1169				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1170			assert(nbox_this_time);
1171			nbox -= nbox_this_time;
1172
1173			/* Count the total number of bytes to be read and allocate a
1174			 * single buffer large enough. Or if it is very small, combine
1175			 * with other allocations. */
1176			offset = 0;
1177			for (n = 0; n < nbox_this_time; n++) {
1178				int height = box[n].y2 - box[n].y1;
1179				int width = box[n].x2 - box[n].x1;
1180				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1181			}
1182
1183			src_bo = kgem_create_buffer(kgem, offset,
1184						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1185						    &ptr);
1186			if (!src_bo)
1187				break;
1188
1189			if (sigtrap_get()) {
1190				kgem_bo_destroy(kgem, src_bo);
1191				goto fallback;
1192			}
1193
1194			offset = 0;
1195			do {
1196				int height = box->y2 - box->y1;
1197				int width = box->x2 - box->x1;
1198				int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1199				uint32_t *b;
1200
1201				DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1202				     __FUNCTION__,
1203				     box->x1 + src_dx, box->y1 + src_dy,
1204				     box->x1 + dst_dx, box->y1 + dst_dy,
1205				     width, height,
1206				     offset, pitch));
1207
1208				assert(box->x1 + src_dx >= 0);
1209				assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1210				assert(box->y1 + src_dy >= 0);
1211
1212				assert(box->x1 + dst_dx >= 0);
1213				assert(box->y1 + dst_dy >= 0);
1214
1215				memcpy_blt(src, (char *)ptr + offset,
1216					   dst->drawable.bitsPerPixel,
1217					   stride, pitch,
1218					   box->x1 + src_dx, box->y1 + src_dy,
1219					   0, 0,
1220					   width, height);
1221
1222				assert(kgem->mode == KGEM_BLT);
1223				b = kgem->batch + kgem->nbatch;
1224				b[0] = cmd;
1225				b[1] = br13;
1226				b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1227				b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1228				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1229						      I915_GEM_DOMAIN_RENDER << 16 |
1230						      I915_GEM_DOMAIN_RENDER |
1231						      KGEM_RELOC_FENCED,
1232						      0);
1233				b[5] = 0;
1234				b[6] = pitch;
1235				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1236						      I915_GEM_DOMAIN_RENDER << 16 |
1237						      KGEM_RELOC_FENCED,
1238						      offset);
1239				kgem->nbatch += 8;
1240
1241				box++;
1242				offset += pitch * height;
1243			} while (--nbox_this_time);
1244			assert(offset == __kgem_buffer_size(src_bo));
1245			sigtrap_put();
1246
1247			if (nbox) {
1248				_kgem_submit(kgem);
1249				_kgem_set_mode(kgem, KGEM_BLT);
1250				kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1251			}
1252
1253			kgem_bo_destroy(kgem, src_bo);
1254		} while (nbox);
1255	}
1256
1257	sna->blt_state.fill_bo = 0;
1258	return true;
1259
1260fallback:
1261	return write_boxes_inplace(kgem,
1262				   src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1263				   dst_bo, dst_dx, dst_dy,
1264				   box, nbox);
1265}
1266
1267static bool
1268write_boxes_inplace__xor(struct kgem *kgem,
1269			 const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
1270			 struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
1271			 const BoxRec *box, int n,
1272			 uint32_t and, uint32_t or)
1273{
1274	void *dst;
1275
1276	DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
1277
1278	if (!kgem_bo_can_map(kgem, bo))
1279		return false;
1280
1281	kgem_bo_submit(kgem, bo);
1282
1283	dst = kgem_bo_map(kgem, bo);
1284	if (dst == NULL)
1285		return false;
1286
1287	if (sigtrap_get())
1288		return false;
1289
1290	do {
1291		DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
1292		     box->x1 + src_dx, box->y1 + src_dy,
1293		     box->x1 + dst_dx, box->y1 + dst_dy,
1294		     box->x2 - box->x1, box->y2 - box->y1,
1295		     bpp, stride, bo->pitch));
1296
1297		assert(box->x2 > box->x1);
1298		assert(box->y2 > box->y1);
1299
1300		assert(box->x1 + dst_dx >= 0);
1301		assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
1302		assert(box->y1 + dst_dy >= 0);
1303		assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
1304
1305		assert(box->x1 + src_dx >= 0);
1306		assert((box->x2 + src_dx)*bpp <= 8*stride);
1307		assert(box->y1 + src_dy >= 0);
1308
1309		memcpy_xor(src, dst, bpp,
1310			   stride, bo->pitch,
1311			   box->x1 + src_dx, box->y1 + src_dy,
1312			   box->x1 + dst_dx, box->y1 + dst_dy,
1313			   box->x2 - box->x1, box->y2 - box->y1,
1314			   and, or);
1315		box++;
1316	} while (--n);
1317
1318	sigtrap_put();
1319	return true;
1320}
1321
1322static bool upload_inplace__xor(struct kgem *kgem,
1323				struct kgem_bo *bo,
1324				const BoxRec *box,
1325				int n, int bpp)
1326{
1327	if (unlikely(kgem->wedged))
1328		return true;
1329
1330	if (!kgem_bo_can_map(kgem, bo))
1331		return false;
1332
1333	return __upload_inplace(kgem, bo, box, n, bpp);
1334}
1335
1336bool sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
1337			  struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
1338			  const void *src, int stride, int16_t src_dx, int16_t src_dy,
1339			  const BoxRec *box, int nbox,
1340			  uint32_t and, uint32_t or)
1341{
1342	struct kgem *kgem = &sna->kgem;
1343	struct kgem_bo *src_bo;
1344	BoxRec extents;
1345	bool can_blt;
1346	void *ptr;
1347	int offset;
1348	int n, cmd, br13;
1349
1350	DBG(("%s x %d\n", __FUNCTION__, nbox));
1351
1352	if (upload_inplace__xor(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel) &&
1353	    write_boxes_inplace__xor(kgem,
1354				     src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1355				     dst_bo, dst_dx, dst_dy,
1356				     box, nbox,
1357				     and, or))
1358		return true;
1359
1360	if (wedged(sna))
1361		return false;
1362
1363	can_blt = kgem_bo_can_blt(kgem, dst_bo) &&
1364		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
1365	extents = box[0];
1366	for (n = 1; n < nbox; n++) {
1367		if (box[n].x1 < extents.x1)
1368			extents.x1 = box[n].x1;
1369		if (box[n].x2 > extents.x2)
1370			extents.x2 = box[n].x2;
1371
1372		if (can_blt)
1373			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
1374
1375		if (box[n].y1 < extents.y1)
1376			extents.y1 = box[n].y1;
1377		if (box[n].y2 > extents.y2)
1378			extents.y2 = box[n].y2;
1379	}
1380
1381	/* Try to avoid switching rings... */
1382	if (!can_blt || kgem->ring == KGEM_RENDER ||
1383	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
1384		DrawableRec tmp;
1385
1386		tmp.width  = extents.x2 - extents.x1;
1387		tmp.height = extents.y2 - extents.y1;
1388		tmp.depth  = dst->drawable.depth;
1389		tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
1390
1391		assert(tmp.width);
1392		assert(tmp.height);
1393
1394		DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n",
1395		     __FUNCTION__,
1396		     extents.x1, extents.y1,
1397		     tmp.width, tmp.height,
1398		     sna->render.max_3d_size, sna->render.max_3d_size));
1399		if (must_tile(sna, tmp.width, tmp.height)) {
1400			BoxRec tile, stack[64], *clipped;
1401			int step;
1402
1403tile:
1404			step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel,
1405				   8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
1406			while (step * step * 4 > sna->kgem.max_upload_tile_size)
1407				step /= 2;
1408
1409			DBG(("%s: tiling upload, using %dx%d tiles\n",
1410			     __FUNCTION__, step, step));
1411			assert(step);
1412
1413			if (n > ARRAY_SIZE(stack)) {
1414				clipped = malloc(sizeof(BoxRec) * n);
1415				if (clipped == NULL)
1416					goto fallback;
1417			} else
1418				clipped = stack;
1419
1420			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
1421				int y2 = tile.y1 + step;
1422				if (y2 > extents.y2)
1423					y2 = extents.y2;
1424				tile.y2 = y2;
1425
1426				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
1427					int x2 = tile.x1 + step;
1428					if (x2 > extents.x2)
1429						x2 = extents.x2;
1430					tile.x2 = x2;
1431
1432					tmp.width  = tile.x2 - tile.x1;
1433					tmp.height = tile.y2 - tile.y1;
1434
1435					src_bo = kgem_create_buffer_2d(kgem,
1436								       tmp.width,
1437								       tmp.height,
1438								       tmp.bitsPerPixel,
1439								       KGEM_BUFFER_WRITE_INPLACE,
1440								       &ptr);
1441					if (!src_bo) {
1442						if (clipped != stack)
1443							free(clipped);
1444						goto fallback;
1445					}
1446
1447					if (sigtrap_get() == 0) {
1448						BoxRec *c = clipped;
1449						for (n = 0; n < nbox; n++) {
1450							*c = box[n];
1451							if (!box_intersect(c, &tile))
1452								continue;
1453
1454							DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1455							     __FUNCTION__,
1456							     c->x1, c->y1,
1457							     c->x2, c->y2,
1458							     src_dx, src_dy,
1459							     c->x1 - tile.x1,
1460							     c->y1 - tile.y1));
1461							memcpy_xor(src, ptr, tmp.bitsPerPixel,
1462								   stride, src_bo->pitch,
1463								   c->x1 + src_dx,
1464								   c->y1 + src_dy,
1465								   c->x1 - tile.x1,
1466								   c->y1 - tile.y1,
1467								   c->x2 - c->x1,
1468								   c->y2 - c->y1,
1469								   and, or);
1470							c++;
1471						}
1472
1473						if (c != clipped)
1474							n = sna->render.copy_boxes(sna, GXcopy,
1475										   &tmp, src_bo, -tile.x1, -tile.y1,
1476										   &dst->drawable, dst_bo, dst_dx, dst_dy,
1477										   clipped, c - clipped, 0);
1478						else
1479							n = 1;
1480
1481						sigtrap_put();
1482					} else
1483						n = 0;
1484
1485					kgem_bo_destroy(&sna->kgem, src_bo);
1486
1487					if (!n) {
1488						if (clipped != stack)
1489							free(clipped);
1490						goto fallback;
1491					}
1492				}
1493			}
1494
1495			if (clipped != stack)
1496				free(clipped);
1497		} else {
1498			src_bo = kgem_create_buffer_2d(kgem,
1499						       tmp.width,
1500						       tmp.height,
1501						       tmp.bitsPerPixel,
1502						       KGEM_BUFFER_WRITE_INPLACE,
1503						       &ptr);
1504			if (!src_bo)
1505				goto fallback;
1506
1507			if (sigtrap_get() == 0) {
1508				for (n = 0; n < nbox; n++) {
1509					DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1510					     __FUNCTION__,
1511					     box[n].x1, box[n].y1,
1512					     box[n].x2, box[n].y2,
1513					     src_dx, src_dy,
1514					     box[n].x1 - extents.x1,
1515					     box[n].y1 - extents.y1));
1516					memcpy_xor(src, ptr, tmp.bitsPerPixel,
1517						   stride, src_bo->pitch,
1518						   box[n].x1 + src_dx,
1519						   box[n].y1 + src_dy,
1520						   box[n].x1 - extents.x1,
1521						   box[n].y1 - extents.y1,
1522						   box[n].x2 - box[n].x1,
1523						   box[n].y2 - box[n].y1,
1524						   and, or);
1525				}
1526
1527				n = sna->render.copy_boxes(sna, GXcopy,
1528							   &tmp, src_bo, -extents.x1, -extents.y1,
1529							   &dst->drawable, dst_bo, dst_dx, dst_dy,
1530							   box, nbox, 0);
1531				sigtrap_put();
1532			} else
1533				n = 0;
1534
1535			kgem_bo_destroy(&sna->kgem, src_bo);
1536
1537			if (!n)
1538				goto tile;
1539		}
1540
1541		return true;
1542	}
1543
1544	cmd = XY_SRC_COPY_BLT_CMD;
1545	br13 = dst_bo->pitch;
1546	if (kgem->gen >= 040 && dst_bo->tiling) {
1547		cmd |= BLT_DST_TILED;
1548		br13 >>= 2;
1549	}
1550	br13 |= 0xcc << 16;
1551	switch (dst->drawable.bitsPerPixel) {
1552	default:
1553	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
1554		 br13 |= 1 << 25; /* RGB8888 */
1555	case 16: br13 |= 1 << 24; /* RGB565 */
1556	case 8: break;
1557	}
1558
1559	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
1560	if (!kgem_check_batch(kgem, 10) ||
1561	    !kgem_check_reloc_and_exec(kgem, 2) ||
1562	    !kgem_check_bo_fenced(kgem, dst_bo)) {
1563		kgem_submit(kgem);
1564		if (!kgem_check_bo_fenced(kgem, dst_bo))
1565			goto fallback;
1566		_kgem_set_mode(kgem, KGEM_BLT);
1567	}
1568	kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1569
1570	if (sna->kgem.gen >= 0100) {
1571		cmd |= 8;
1572		do {
1573			int nbox_this_time, rem;
1574
1575			nbox_this_time = nbox;
1576			rem = kgem_batch_space(kgem);
1577			if (10*nbox_this_time > rem)
1578				nbox_this_time = rem / 8;
1579			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1580				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1581			assert(nbox_this_time);
1582			nbox -= nbox_this_time;
1583
1584			/* Count the total number of bytes to be read and allocate a
1585			 * single buffer large enough. Or if it is very small, combine
1586			 * with other allocations. */
1587			offset = 0;
1588			for (n = 0; n < nbox_this_time; n++) {
1589				int height = box[n].y2 - box[n].y1;
1590				int width = box[n].x2 - box[n].x1;
1591				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1592			}
1593
1594			src_bo = kgem_create_buffer(kgem, offset,
1595						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1596						    &ptr);
1597			if (!src_bo)
1598				goto fallback;
1599
1600			if (sigtrap_get()) {
1601				kgem_bo_destroy(kgem, src_bo);
1602				goto fallback;
1603			}
1604
1605			offset = 0;
1606			do {
1607				int height = box->y2 - box->y1;
1608				int width = box->x2 - box->x1;
1609				int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1610				uint32_t *b;
1611
1612				DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1613				     __FUNCTION__,
1614				     box->x1 + src_dx, box->y1 + src_dy,
1615				     box->x1 + dst_dx, box->y1 + dst_dy,
1616				     width, height,
1617				     offset, pitch));
1618
1619				assert(box->x1 + src_dx >= 0);
1620				assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1621				assert(box->y1 + src_dy >= 0);
1622
1623				assert(box->x1 + dst_dx >= 0);
1624				assert(box->y1 + dst_dy >= 0);
1625
1626				memcpy_xor(src, (char *)ptr + offset,
1627					   dst->drawable.bitsPerPixel,
1628					   stride, pitch,
1629					   box->x1 + src_dx, box->y1 + src_dy,
1630					   0, 0,
1631					   width, height,
1632					   and, or);
1633
1634				assert(kgem->mode == KGEM_BLT);
1635				b = kgem->batch + kgem->nbatch;
1636				b[0] = cmd;
1637				b[1] = br13;
1638				b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1639				b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1640				*(uint64_t *)(b+4) =
1641					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1642							 I915_GEM_DOMAIN_RENDER << 16 |
1643							 I915_GEM_DOMAIN_RENDER |
1644							 KGEM_RELOC_FENCED,
1645							 0);
1646				b[6] = 0;
1647				b[7] = pitch;
1648				*(uint64_t *)(b+8) =
1649					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1650							 I915_GEM_DOMAIN_RENDER << 16 |
1651							 KGEM_RELOC_FENCED,
1652							 offset);
1653				kgem->nbatch += 10;
1654
1655				box++;
1656				offset += pitch * height;
1657			} while (--nbox_this_time);
1658			assert(offset == __kgem_buffer_size(src_bo));
1659			sigtrap_put();
1660
1661			if (nbox) {
1662				_kgem_submit(kgem);
1663				_kgem_set_mode(kgem, KGEM_BLT);
1664				kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1665			}
1666
1667			kgem_bo_destroy(kgem, src_bo);
1668		} while (nbox);
1669	} else {
1670		cmd |= 6;
1671		do {
1672			int nbox_this_time, rem;
1673
1674			nbox_this_time = nbox;
1675			rem = kgem_batch_space(kgem);
1676			if (8*nbox_this_time > rem)
1677				nbox_this_time = rem / 8;
1678			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1679				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1680			assert(nbox_this_time);
1681			nbox -= nbox_this_time;
1682
1683			/* Count the total number of bytes to be read and allocate a
1684			 * single buffer large enough. Or if it is very small, combine
1685			 * with other allocations. */
1686			offset = 0;
1687			for (n = 0; n < nbox_this_time; n++) {
1688				int height = box[n].y2 - box[n].y1;
1689				int width = box[n].x2 - box[n].x1;
1690				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1691			}
1692
1693			src_bo = kgem_create_buffer(kgem, offset,
1694						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1695						    &ptr);
1696			if (!src_bo)
1697				goto fallback;
1698
1699			if (sigtrap_get()) {
1700				kgem_bo_destroy(kgem, src_bo);
1701				goto fallback;
1702			}
1703
1704			offset = 0;
1705			do {
1706				int height = box->y2 - box->y1;
1707				int width = box->x2 - box->x1;
1708				int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1709				uint32_t *b;
1710
1711				DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1712				     __FUNCTION__,
1713				     box->x1 + src_dx, box->y1 + src_dy,
1714				     box->x1 + dst_dx, box->y1 + dst_dy,
1715				     width, height,
1716				     offset, pitch));
1717
1718				assert(box->x1 + src_dx >= 0);
1719				assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1720				assert(box->y1 + src_dy >= 0);
1721
1722				assert(box->x1 + dst_dx >= 0);
1723				assert(box->y1 + dst_dy >= 0);
1724
1725				memcpy_xor(src, (char *)ptr + offset,
1726					   dst->drawable.bitsPerPixel,
1727					   stride, pitch,
1728					   box->x1 + src_dx, box->y1 + src_dy,
1729					   0, 0,
1730					   width, height,
1731					   and, or);
1732
1733				assert(kgem->mode == KGEM_BLT);
1734				b = kgem->batch + kgem->nbatch;
1735				b[0] = cmd;
1736				b[1] = br13;
1737				b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1738				b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1739				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1740						      I915_GEM_DOMAIN_RENDER << 16 |
1741						      I915_GEM_DOMAIN_RENDER |
1742						      KGEM_RELOC_FENCED,
1743						      0);
1744				b[5] = 0;
1745				b[6] = pitch;
1746				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1747						      I915_GEM_DOMAIN_RENDER << 16 |
1748						      KGEM_RELOC_FENCED,
1749						      offset);
1750				kgem->nbatch += 8;
1751
1752				box++;
1753				offset += pitch * height;
1754			} while (--nbox_this_time);
1755			assert(offset == __kgem_buffer_size(src_bo));
1756			sigtrap_put();
1757
1758			if (nbox) {
1759				_kgem_submit(kgem);
1760				_kgem_set_mode(kgem, KGEM_BLT);
1761				kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1762			}
1763
1764			kgem_bo_destroy(kgem, src_bo);
1765		} while (nbox);
1766	}
1767
1768	sna->blt_state.fill_bo = 0;
1769	return true;
1770
1771fallback:
1772	return write_boxes_inplace__xor(kgem,
1773					src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1774					dst_bo, dst_dx, dst_dy,
1775					box, nbox,
1776					and, or);
1777}
1778
1779static bool
1780indirect_replace(struct sna *sna,
1781		 PixmapPtr pixmap,
1782		 struct kgem_bo *bo,
1783		 const void *src, int stride)
1784{
1785	struct kgem *kgem = &sna->kgem;
1786	struct kgem_bo *src_bo;
1787	BoxRec box;
1788	void *ptr;
1789	bool ret;
1790
1791	DBG(("%s: size=%d vs %d\n",
1792	     __FUNCTION__,
1793	     stride * pixmap->drawable.height >> 12,
1794	     kgem->half_cpu_cache_pages));
1795
1796	if (stride * pixmap->drawable.height >> 12 > kgem->half_cpu_cache_pages)
1797		return false;
1798
1799	if (!kgem_bo_can_blt(kgem, bo) &&
1800	    must_tile(sna, pixmap->drawable.width, pixmap->drawable.height))
1801		return false;
1802
1803	src_bo = kgem_create_buffer_2d(kgem,
1804				       pixmap->drawable.width,
1805				       pixmap->drawable.height,
1806				       pixmap->drawable.bitsPerPixel,
1807				       KGEM_BUFFER_WRITE_INPLACE,
1808				       &ptr);
1809	if (!src_bo)
1810		return false;
1811
1812	ret = false;
1813	if (sigtrap_get() == 0) {
1814		memcpy_blt(src, ptr, pixmap->drawable.bitsPerPixel,
1815			   stride, src_bo->pitch,
1816			   0, 0,
1817			   0, 0,
1818			   pixmap->drawable.width,
1819			   pixmap->drawable.height);
1820
1821		box.x1 = box.y1 = 0;
1822		box.x2 = pixmap->drawable.width;
1823		box.y2 = pixmap->drawable.height;
1824
1825		ret = sna->render.copy_boxes(sna, GXcopy,
1826					     &pixmap->drawable, src_bo, 0, 0,
1827					     &pixmap->drawable, bo, 0, 0,
1828					     &box, 1, 0);
1829		sigtrap_put();
1830	}
1831
1832	kgem_bo_destroy(kgem, src_bo);
1833
1834	return ret;
1835}
1836
1837bool sna_replace(struct sna *sna, PixmapPtr pixmap,
1838		 const void *src, int stride)
1839{
1840	struct sna_pixmap *priv = sna_pixmap(pixmap);
1841	struct kgem_bo *bo = priv->gpu_bo;
1842	void *dst;
1843
1844	assert(bo);
1845	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d) busy?=%d\n",
1846	     __FUNCTION__, bo->handle,
1847	     pixmap->drawable.width,
1848	     pixmap->drawable.height,
1849	     pixmap->drawable.bitsPerPixel,
1850	     bo->tiling,
1851	     __kgem_bo_is_busy(&sna->kgem, bo)));
1852
1853	assert(!priv->pinned);
1854
1855	kgem_bo_undo(&sna->kgem, bo);
1856
1857	if (__kgem_bo_is_busy(&sna->kgem, bo)) {
1858		struct kgem_bo *new_bo;
1859
1860		if (indirect_replace(sna, pixmap, bo, src, stride))
1861			return true;
1862
1863		new_bo = kgem_create_2d(&sna->kgem,
1864					pixmap->drawable.width,
1865					pixmap->drawable.height,
1866					pixmap->drawable.bitsPerPixel,
1867					bo->tiling,
1868					CREATE_GTT_MAP | CREATE_INACTIVE);
1869		if (new_bo)
1870			bo = new_bo;
1871	}
1872
1873	if (bo->tiling == I915_TILING_NONE && bo->pitch == stride &&
1874	    kgem_bo_write(&sna->kgem, bo, src,
1875			  (pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8))
1876			goto done;
1877
1878	if (upload_inplace__tiled(&sna->kgem, bo)) {
1879		BoxRec box;
1880
1881		box.x1 = box.y1 = 0;
1882		box.x2 = pixmap->drawable.width;
1883		box.y2 = pixmap->drawable.height;
1884
1885		if (write_boxes_inplace__tiled(&sna->kgem, src,
1886					       stride, pixmap->drawable.bitsPerPixel, 0, 0,
1887					       bo, 0, 0, &box, 1))
1888			goto done;
1889	}
1890
1891	if (kgem_bo_can_map(&sna->kgem, bo) &&
1892	    (dst = kgem_bo_map(&sna->kgem, bo)) != NULL &&
1893	    sigtrap_get() == 0) {
1894		memcpy_blt(src, dst, pixmap->drawable.bitsPerPixel,
1895			   stride, bo->pitch,
1896			   0, 0,
1897			   0, 0,
1898			   pixmap->drawable.width,
1899			   pixmap->drawable.height);
1900		sigtrap_put();
1901	} else {
1902		BoxRec box;
1903
1904		if (bo != priv->gpu_bo) {
1905			kgem_bo_destroy(&sna->kgem, bo);
1906			bo = priv->gpu_bo;
1907		}
1908
1909		box.x1 = box.y1 = 0;
1910		box.x2 = pixmap->drawable.width;
1911		box.y2 = pixmap->drawable.height;
1912
1913		if (!sna_write_boxes(sna, pixmap,
1914				     bo, 0, 0,
1915				     src, stride, 0, 0,
1916				     &box, 1))
1917			return false;
1918	}
1919
1920done:
1921	if (bo != priv->gpu_bo) {
1922		sna_pixmap_unmap(pixmap, priv);
1923		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
1924		priv->gpu_bo = bo;
1925	}
1926
1927	return true;
1928}
1929
1930bool
1931sna_replace__xor(struct sna *sna, PixmapPtr pixmap,
1932		 const void *src, int stride,
1933		 uint32_t and, uint32_t or)
1934{
1935	struct sna_pixmap *priv = sna_pixmap(pixmap);
1936	struct kgem_bo *bo = priv->gpu_bo;
1937	void *dst;
1938
1939	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d)\n",
1940	     __FUNCTION__, bo->handle,
1941	     pixmap->drawable.width,
1942	     pixmap->drawable.height,
1943	     pixmap->drawable.bitsPerPixel,
1944	     bo->tiling));
1945
1946	assert(!priv->pinned);
1947
1948	kgem_bo_undo(&sna->kgem, bo);
1949
1950	if (!kgem_bo_can_map(&sna->kgem, bo) ||
1951	    __kgem_bo_is_busy(&sna->kgem, bo)) {
1952		struct kgem_bo *new_bo;
1953
1954		new_bo = kgem_create_2d(&sna->kgem,
1955					pixmap->drawable.width,
1956					pixmap->drawable.height,
1957					pixmap->drawable.bitsPerPixel,
1958					bo->tiling,
1959					CREATE_GTT_MAP | CREATE_INACTIVE);
1960		if (new_bo)
1961			bo = new_bo;
1962	}
1963
1964	if (kgem_bo_can_map(&sna->kgem, bo) &&
1965	    (dst = kgem_bo_map(&sna->kgem, bo)) != NULL &&
1966	    sigtrap_get() == 0) {
1967		memcpy_xor(src, dst, pixmap->drawable.bitsPerPixel,
1968			   stride, bo->pitch,
1969			   0, 0,
1970			   0, 0,
1971			   pixmap->drawable.width,
1972			   pixmap->drawable.height,
1973			   and, or);
1974		sigtrap_put();
1975	} else {
1976		BoxRec box;
1977
1978		if (bo != priv->gpu_bo) {
1979			kgem_bo_destroy(&sna->kgem, bo);
1980			bo = priv->gpu_bo;
1981		}
1982
1983		box.x1 = box.y1 = 0;
1984		box.x2 = pixmap->drawable.width;
1985		box.y2 = pixmap->drawable.height;
1986
1987		if (!sna_write_boxes__xor(sna, pixmap,
1988					  bo, 0, 0,
1989					  src, stride, 0, 0,
1990					  &box, 1,
1991					  and, or))
1992			return false;
1993	}
1994
1995	if (bo != priv->gpu_bo) {
1996		sna_pixmap_unmap(pixmap, priv);
1997		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
1998		priv->gpu_bo = bo;
1999	}
2000
2001	return true;
2002}
2003