sna_io.c revision 03b705cf
1/*
2 * Copyright (c) 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "sna.h"
33#include "sna_render.h"
34#include "sna_reg.h"
35
36#include <sys/mman.h>
37
38#define PITCH(x, y) ALIGN((x)*(y), 4)
39
40#define FORCE_INPLACE 0 /* 1 upload directly, -1 force indirect */
41
42/* XXX Need to avoid using GTT fenced access for I915_TILING_Y on 855GM */
43
44static inline bool upload_too_large(struct sna *sna, int width, int height)
45{
46	return width * height * 4 > sna->kgem.max_upload_tile_size;
47}
48
49static inline bool must_tile(struct sna *sna, int width, int height)
50{
51	return (width  > sna->render.max_3d_size ||
52		height > sna->render.max_3d_size ||
53		upload_too_large(sna, width, height));
54}
55
56static bool download_inplace__cpu(struct kgem *kgem,
57				  PixmapPtr p, struct kgem_bo *bo,
58				  const BoxRec *box, int nbox)
59{
60	BoxRec extents;
61
62	switch (bo->tiling) {
63	case I915_TILING_X:
64		if (!kgem->memcpy_from_tiled_x)
65			return false;
66	case I915_TILING_NONE:
67		break;
68	default:
69		return false;
70	}
71
72	if (!kgem_bo_can_map__cpu(kgem, bo, false))
73		return false;
74
75	if (kgem->has_llc)
76		return true;
77
78	extents = *box;
79	while (--nbox) {
80		++box;
81		if (box->x1 < extents.x1)
82			extents.x1 = box->x1;
83		if (box->x2 > extents.x2)
84			extents.x2 = box->x2;
85		extents.y2 = box->y2;
86	}
87
88	if (extents.x2 - extents.x1 == p->drawable.width &&
89	    extents.y2 - extents.y1 == p->drawable.height)
90		return true;
91
92	return __kgem_bo_size(bo) <= PAGE_SIZE;
93}
94
95static bool
96read_boxes_inplace__cpu(struct kgem *kgem,
97			PixmapPtr pixmap, struct kgem_bo *bo,
98			const BoxRec *box, int n)
99{
100	int bpp = pixmap->drawable.bitsPerPixel;
101	void *src, *dst = pixmap->devPrivate.ptr;
102	int src_pitch = bo->pitch;
103	int dst_pitch = pixmap->devKind;
104
105	if (!download_inplace__cpu(kgem, dst, bo, box, n))
106		return false;
107
108	assert(kgem_bo_can_map__cpu(kgem, bo, false));
109	assert(bo->tiling != I915_TILING_Y);
110
111	src = __kgem_bo_map__cpu(kgem, bo);
112	if (src == NULL)
113		return false;
114
115	kgem_bo_sync__cpu_full(kgem, bo, 0);
116	if (bo->tiling == I915_TILING_X) {
117		assert(kgem->memcpy_from_tiled_x);
118		do {
119			memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch,
120					    box->x1, box->y1,
121					    box->x1, box->y1,
122					    box->x2 - box->x1, box->y2 - box->y1);
123			box++;
124		} while (--n);
125	} else {
126		do {
127			memcpy_blt(src, dst, bpp, src_pitch, dst_pitch,
128				   box->x1, box->y1,
129				   box->x1, box->y1,
130				   box->x2 - box->x1, box->y2 - box->y1);
131			box++;
132		} while (--n);
133	}
134	__kgem_bo_unmap__cpu(kgem, bo, src);
135
136	return true;
137}
138
139static void read_boxes_inplace(struct kgem *kgem,
140			       PixmapPtr pixmap, struct kgem_bo *bo,
141			       const BoxRec *box, int n)
142{
143	int bpp = pixmap->drawable.bitsPerPixel;
144	void *src, *dst = pixmap->devPrivate.ptr;
145	int src_pitch = bo->pitch;
146	int dst_pitch = pixmap->devKind;
147
148	if (read_boxes_inplace__cpu(kgem, pixmap, bo, box, n))
149		return;
150
151	DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
152
153	if (!kgem_bo_can_map(kgem, bo))
154		return;
155
156	kgem_bo_submit(kgem, bo);
157
158	src = kgem_bo_map(kgem, bo);
159	if (src == NULL)
160		return;
161
162	assert(src != dst);
163	do {
164		DBG(("%s: copying box (%d, %d), (%d, %d)\n",
165		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
166
167		assert(box->x2 > box->x1);
168		assert(box->y2 > box->y1);
169
170		assert(box->x1 >= 0);
171		assert(box->y1 >= 0);
172		assert(box->x2 <= pixmap->drawable.width);
173		assert(box->y2 <= pixmap->drawable.height);
174
175		assert(box->x1 >= 0);
176		assert(box->y1 >= 0);
177		assert(box->x2 <= pixmap->drawable.width);
178		assert(box->y2 <= pixmap->drawable.height);
179
180		memcpy_blt(src, dst, bpp,
181			   src_pitch, dst_pitch,
182			   box->x1, box->y1,
183			   box->x1, box->y1,
184			   box->x2 - box->x1, box->y2 - box->y1);
185		box++;
186	} while (--n);
187}
188
189static bool download_inplace(struct kgem *kgem,
190			     PixmapPtr p, struct kgem_bo *bo,
191			     const BoxRec *box, int nbox)
192{
193	bool cpu;
194
195	if (unlikely(kgem->wedged))
196		return true;
197
198	cpu = download_inplace__cpu(kgem, p, bo, box, nbox);
199	if (!cpu && !kgem_bo_can_map(kgem, bo))
200		return false;
201
202	if (FORCE_INPLACE)
203		return FORCE_INPLACE > 0;
204
205	if (kgem->can_blt_cpu && kgem->max_cpu_size)
206		return false;
207
208	return !__kgem_bo_is_busy(kgem, bo) || cpu;
209}
210
211void sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo,
212		    const BoxRec *box, int nbox)
213{
214	struct kgem *kgem = &sna->kgem;
215	struct kgem_bo *dst_bo;
216	BoxRec extents;
217	const BoxRec *tmp_box;
218	int tmp_nbox;
219	char *src;
220	void *ptr;
221	int src_pitch, cpp, offset;
222	int n, cmd, br13;
223	bool can_blt;
224
225	DBG(("%s x %d, src=(handle=%d), dst=(size=(%d, %d)\n",
226	     __FUNCTION__, nbox, src_bo->handle,
227	     dst->drawable.width, dst->drawable.height));
228
229#ifndef NDEBUG
230	for (n = 0; n < nbox; n++) {
231		if (box[n].x1 < 0 || box[n].y1 < 0 ||
232		    box[n].x2 * dst->drawable.bitsPerPixel/8 > src_bo->pitch ||
233		    box[n].y2 * src_bo->pitch > kgem_bo_size(src_bo))
234		{
235			FatalError("source out-of-bounds box[%d]=(%d, %d), (%d, %d), pitch=%d, size=%d\n", n,
236				   box[n].x1, box[n].y1,
237				   box[n].x2, box[n].y2,
238				   src_bo->pitch, kgem_bo_size(src_bo));
239		}
240	}
241#endif
242
243	/* XXX The gpu is faster to perform detiling in bulk, but takes
244	 * longer to setup and retrieve the results, with an additional
245	 * copy. The long term solution is to use snoopable bo and avoid
246	 * this path.
247	 */
248
249	if (download_inplace(kgem, dst, src_bo, box ,nbox)) {
250fallback:
251		read_boxes_inplace(kgem, dst, src_bo, box, nbox);
252		return;
253	}
254
255	can_blt = kgem_bo_can_blt(kgem, src_bo) &&
256		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
257	extents = box[0];
258	for (n = 1; n < nbox; n++) {
259		if (box[n].x1 < extents.x1)
260			extents.x1 = box[n].x1;
261		if (box[n].x2 > extents.x2)
262			extents.x2 = box[n].x2;
263
264		if (can_blt)
265			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
266
267		if (box[n].y1 < extents.y1)
268			extents.y1 = box[n].y1;
269		if (box[n].y2 > extents.y2)
270			extents.y2 = box[n].y2;
271	}
272	if (kgem_bo_is_mappable(kgem, src_bo)) {
273		/* Is it worth detiling? */
274		if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096)
275			goto fallback;
276	}
277
278	/* Try to avoid switching rings... */
279	if (!can_blt || kgem->ring == KGEM_RENDER ||
280	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
281		PixmapRec tmp;
282
283		tmp.drawable.width  = extents.x2 - extents.x1;
284		tmp.drawable.height = extents.y2 - extents.y1;
285		tmp.drawable.depth  = dst->drawable.depth;
286		tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel;
287		tmp.devPrivate.ptr = NULL;
288
289		assert(tmp.drawable.width);
290		assert(tmp.drawable.height);
291
292		if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) {
293			BoxRec tile, stack[64], *clipped, *c;
294			int step;
295
296			if (n > ARRAY_SIZE(stack)) {
297				clipped = malloc(sizeof(BoxRec) * n);
298				if (clipped == NULL)
299					goto fallback;
300			} else
301				clipped = stack;
302
303			step = MIN(sna->render.max_3d_size,
304				   8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
305			while (step * step * 4 > sna->kgem.max_upload_tile_size)
306				step /= 2;
307
308			DBG(("%s: tiling download, using %dx%d tiles\n",
309			     __FUNCTION__, step, step));
310			assert(step);
311
312			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
313				int y2 = tile.y1 + step;
314				if (y2 > extents.y2)
315					y2 = extents.y2;
316				tile.y2 = y2;
317
318				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
319					int x2 = tile.x1 + step;
320					if (x2 > extents.x2)
321						x2 = extents.x2;
322					tile.x2 = x2;
323
324					tmp.drawable.width  = tile.x2 - tile.x1;
325					tmp.drawable.height = tile.y2 - tile.y1;
326
327					c = clipped;
328					for (n = 0; n < nbox; n++) {
329						*c = box[n];
330						if (!box_intersect(c, &tile))
331							continue;
332
333						DBG(("%s: box(%d, %d), (%d, %d),, dst=(%d, %d)\n",
334						     __FUNCTION__,
335						     c->x1, c->y1,
336						     c->x2, c->y2,
337						     c->x1 - tile.x1,
338						     c->y1 - tile.y1));
339						c++;
340					}
341					if (c == clipped)
342						continue;
343
344					dst_bo = kgem_create_buffer_2d(kgem,
345								       tmp.drawable.width,
346								       tmp.drawable.height,
347								       tmp.drawable.bitsPerPixel,
348								       KGEM_BUFFER_LAST,
349								       &ptr);
350					if (!dst_bo) {
351						if (clipped != stack)
352							free(clipped);
353						goto fallback;
354					}
355
356					if (!sna->render.copy_boxes(sna, GXcopy,
357								    dst, src_bo, 0, 0,
358								    &tmp, dst_bo, -tile.x1, -tile.y1,
359								    clipped, c-clipped, COPY_LAST)) {
360						kgem_bo_destroy(&sna->kgem, dst_bo);
361						if (clipped != stack)
362							free(clipped);
363						goto fallback;
364					}
365
366					kgem_bo_submit(&sna->kgem, dst_bo);
367					kgem_buffer_read_sync(kgem, dst_bo);
368
369					while (c-- != clipped) {
370						memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel,
371							   dst_bo->pitch, dst->devKind,
372							   c->x1 - tile.x1,
373							   c->y1 - tile.y1,
374							   c->x1, c->y1,
375							   c->x2 - c->x1,
376							   c->y2 - c->y1);
377					}
378
379					kgem_bo_destroy(&sna->kgem, dst_bo);
380				}
381			}
382
383			if (clipped != stack)
384				free(clipped);
385		} else {
386			dst_bo = kgem_create_buffer_2d(kgem,
387						       tmp.drawable.width,
388						       tmp.drawable.height,
389						       tmp.drawable.bitsPerPixel,
390						       KGEM_BUFFER_LAST,
391						       &ptr);
392			if (!dst_bo)
393				goto fallback;
394
395			if (!sna->render.copy_boxes(sna, GXcopy,
396						    dst, src_bo, 0, 0,
397						    &tmp, dst_bo, -extents.x1, -extents.y1,
398						    box, nbox, COPY_LAST)) {
399				kgem_bo_destroy(&sna->kgem, dst_bo);
400				goto fallback;
401			}
402
403			kgem_bo_submit(&sna->kgem, dst_bo);
404			kgem_buffer_read_sync(kgem, dst_bo);
405
406			for (n = 0; n < nbox; n++) {
407				memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel,
408					   dst_bo->pitch, dst->devKind,
409					   box[n].x1 - extents.x1,
410					   box[n].y1 - extents.y1,
411					   box[n].x1, box[n].y1,
412					   box[n].x2 - box[n].x1,
413					   box[n].y2 - box[n].y1);
414			}
415
416			kgem_bo_destroy(&sna->kgem, dst_bo);
417		}
418		return;
419	}
420
421	/* count the total number of bytes to be read and allocate a bo */
422	cpp = dst->drawable.bitsPerPixel / 8;
423	offset = 0;
424	for (n = 0; n < nbox; n++) {
425		int height = box[n].y2 - box[n].y1;
426		int width = box[n].x2 - box[n].x1;
427		offset += PITCH(width, cpp) * height;
428	}
429
430	DBG(("    read buffer size=%d\n", offset));
431
432	dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr);
433	if (!dst_bo) {
434		read_boxes_inplace(kgem, dst, src_bo, box, nbox);
435		return;
436	}
437
438	cmd = XY_SRC_COPY_BLT_CMD;
439	src_pitch = src_bo->pitch;
440	if (kgem->gen >= 040 && src_bo->tiling) {
441		cmd |= BLT_SRC_TILED;
442		src_pitch >>= 2;
443	}
444
445	br13 = 0xcc << 16;
446	switch (cpp) {
447	default:
448	case 4: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
449		br13 |= 1 << 25; /* RGB8888 */
450	case 2: br13 |= 1 << 24; /* RGB565 */
451	case 1: break;
452	}
453
454	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
455	if (!kgem_check_batch(kgem, 8) ||
456	    !kgem_check_reloc_and_exec(kgem, 2) ||
457	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
458		kgem_submit(kgem);
459		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL))
460			goto fallback;
461		_kgem_set_mode(kgem, KGEM_BLT);
462	}
463
464	tmp_nbox = nbox;
465	tmp_box = box;
466	offset = 0;
467	do {
468		int nbox_this_time;
469
470		nbox_this_time = tmp_nbox;
471		if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
472			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
473		if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
474			nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
475		assert(nbox_this_time);
476		tmp_nbox -= nbox_this_time;
477
478		for (n = 0; n < nbox_this_time; n++) {
479			int height = tmp_box[n].y2 - tmp_box[n].y1;
480			int width = tmp_box[n].x2 - tmp_box[n].x1;
481			int pitch = PITCH(width, cpp);
482			uint32_t *b = kgem->batch + kgem->nbatch;
483
484			DBG(("    blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
485			     offset, tmp_box[n].x1, tmp_box[n].y1,
486			     width, height, pitch));
487
488			assert(tmp_box[n].x1 >= 0);
489			assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
490			assert(tmp_box[n].y1 >= 0);
491			assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
492
493			b[0] = cmd;
494			b[1] = br13 | pitch;
495			b[2] = 0;
496			b[3] = height << 16 | width;
497			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
498					      I915_GEM_DOMAIN_RENDER << 16 |
499					      I915_GEM_DOMAIN_RENDER |
500					      KGEM_RELOC_FENCED,
501					      offset);
502			b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
503			b[6] = src_pitch;
504			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
505					      I915_GEM_DOMAIN_RENDER << 16 |
506					      KGEM_RELOC_FENCED,
507					      0);
508			kgem->nbatch += 8;
509
510			offset += pitch * height;
511		}
512
513		_kgem_submit(kgem);
514		if (!tmp_nbox)
515			break;
516
517		_kgem_set_mode(kgem, KGEM_BLT);
518		tmp_box += nbox_this_time;
519	} while (1);
520	assert(offset == __kgem_buffer_size(dst_bo));
521
522	kgem_buffer_read_sync(kgem, dst_bo);
523
524	src = ptr;
525	do {
526		int height = box->y2 - box->y1;
527		int width  = box->x2 - box->x1;
528		int pitch = PITCH(width, cpp);
529
530		DBG(("    copy offset %lx [%08x...%08x...%08x]: (%d, %d) x (%d, %d), src pitch=%d, dst pitch=%d, bpp=%d\n",
531		     (long)((char *)src - (char *)ptr),
532		     *(uint32_t*)src, *(uint32_t*)(src+pitch*height/2 + pitch/2 - 4), *(uint32_t*)(src+pitch*height - 4),
533		     box->x1, box->y1,
534		     width, height,
535		     pitch, dst->devKind, cpp*8));
536
537		assert(box->x1 >= 0);
538		assert(box->x2 <= dst->drawable.width);
539		assert(box->y1 >= 0);
540		assert(box->y2 <= dst->drawable.height);
541
542		memcpy_blt(src, dst->devPrivate.ptr, cpp*8,
543			   pitch, dst->devKind,
544			   0, 0,
545			   box->x1, box->y1,
546			   width, height);
547		box++;
548
549		src += pitch * height;
550	} while (--nbox);
551	assert(src - (char *)ptr == __kgem_buffer_size(dst_bo));
552	kgem_bo_destroy(kgem, dst_bo);
553	sna->blt_state.fill_bo = 0;
554}
555
556static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
557{
558	if (!kgem->memcpy_to_tiled_x)
559		return false;
560
561	if (bo->tiling != I915_TILING_X)
562		return false;
563
564	return kgem_bo_can_map__cpu(kgem, bo, true);
565}
566
567static bool
568write_boxes_inplace__tiled(struct kgem *kgem,
569                           const uint8_t *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
570                           struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
571                           const BoxRec *box, int n)
572{
573	uint8_t *dst;
574
575	assert(bo->tiling == I915_TILING_X);
576
577	dst = __kgem_bo_map__cpu(kgem, bo);
578	if (dst == NULL)
579		return false;
580
581	kgem_bo_sync__cpu(kgem, bo);
582	do {
583		memcpy_to_tiled_x(kgem, src, dst, bpp, stride, bo->pitch,
584				  box->x1 + src_dx, box->y1 + src_dy,
585				  box->x1 + dst_dx, box->y1 + dst_dy,
586				  box->x2 - box->x1, box->y2 - box->y1);
587		box++;
588	} while (--n);
589	__kgem_bo_unmap__cpu(kgem, bo, dst);
590
591	return true;
592}
593
594static bool write_boxes_inplace(struct kgem *kgem,
595				const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
596				struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
597				const BoxRec *box, int n)
598{
599	void *dst;
600
601	DBG(("%s x %d, handle=%d, tiling=%d\n",
602	     __FUNCTION__, n, bo->handle, bo->tiling));
603
604	if (upload_inplace__tiled(kgem, bo) &&
605	    write_boxes_inplace__tiled(kgem, src, stride, bpp, src_dx, src_dy,
606				       bo, dst_dx, dst_dy, box, n))
607		return true;
608
609	if (!kgem_bo_can_map(kgem, bo))
610		return false;
611
612	kgem_bo_submit(kgem, bo);
613
614	dst = kgem_bo_map(kgem, bo);
615	if (dst == NULL)
616		return false;
617
618	assert(dst != src);
619
620	do {
621		DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
622		     box->x1 + src_dx, box->y1 + src_dy,
623		     box->x1 + dst_dx, box->y1 + dst_dy,
624		     box->x2 - box->x1, box->y2 - box->y1,
625		     bpp, stride, bo->pitch));
626
627		assert(box->x2 > box->x1);
628		assert(box->y2 > box->y1);
629
630		assert(box->x1 + dst_dx >= 0);
631		assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
632		assert(box->y1 + dst_dy >= 0);
633		assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
634
635		assert(box->x1 + src_dx >= 0);
636		assert((box->x2 + src_dx)*bpp <= 8*stride);
637		assert(box->y1 + src_dy >= 0);
638
639		memcpy_blt(src, dst, bpp,
640			   stride, bo->pitch,
641			   box->x1 + src_dx, box->y1 + src_dy,
642			   box->x1 + dst_dx, box->y1 + dst_dy,
643			   box->x2 - box->x1, box->y2 - box->y1);
644		box++;
645	} while (--n);
646	return true;
647}
648
649static bool __upload_inplace(struct kgem *kgem,
650			     struct kgem_bo *bo,
651			     const BoxRec *box,
652			     int n, int bpp)
653{
654	unsigned int bytes;
655
656	if (FORCE_INPLACE)
657		return FORCE_INPLACE > 0;
658
659	/* If we are writing through the GTT, check first if we might be
660	 * able to almagamate a series of small writes into a single
661	 * operation.
662	 */
663	bytes = 0;
664	while (n--) {
665		bytes += (box->x2 - box->x1) * (box->y2 - box->y1);
666		box++;
667	}
668	if (__kgem_bo_is_busy(kgem, bo))
669		return bytes * bpp >> 12 >= kgem->half_cpu_cache_pages;
670	else
671		return bytes * bpp >> 12;
672}
673
674static bool upload_inplace(struct kgem *kgem,
675			   struct kgem_bo *bo,
676			   const BoxRec *box,
677			   int n, int bpp)
678{
679	if (unlikely(kgem->wedged))
680		return true;
681
682	if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo))
683		return false;
684
685	return __upload_inplace(kgem, bo, box, n,bpp);
686}
687
688bool sna_write_boxes(struct sna *sna, PixmapPtr dst,
689		     struct kgem_bo * const dst_bo, int16_t const dst_dx, int16_t const dst_dy,
690		     const void * const src, int const stride, int16_t const src_dx, int16_t const src_dy,
691		     const BoxRec *box, int nbox)
692{
693	struct kgem *kgem = &sna->kgem;
694	struct kgem_bo *src_bo;
695	BoxRec extents;
696	void *ptr;
697	int offset;
698	int n, cmd, br13;
699	bool can_blt;
700
701	DBG(("%s x %d, src stride=%d,  src dx=(%d, %d)\n", __FUNCTION__, nbox, stride, src_dx, src_dy));
702
703	if (upload_inplace(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel)&&
704	    write_boxes_inplace(kgem,
705				src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
706				dst_bo, dst_dx, dst_dy,
707				box, nbox))
708		return true;
709
710	can_blt = kgem_bo_can_blt(kgem, dst_bo) &&
711		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
712	extents = box[0];
713	for (n = 1; n < nbox; n++) {
714		if (box[n].x1 < extents.x1)
715			extents.x1 = box[n].x1;
716		if (box[n].x2 > extents.x2)
717			extents.x2 = box[n].x2;
718
719		if (can_blt)
720			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
721
722		if (box[n].y1 < extents.y1)
723			extents.y1 = box[n].y1;
724		if (box[n].y2 > extents.y2)
725			extents.y2 = box[n].y2;
726	}
727
728	/* Try to avoid switching rings... */
729	if (!can_blt || kgem->ring == KGEM_RENDER ||
730	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
731		PixmapRec tmp;
732
733		tmp.drawable.width  = extents.x2 - extents.x1;
734		tmp.drawable.height = extents.y2 - extents.y1;
735		tmp.drawable.depth  = dst->drawable.depth;
736		tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel;
737		tmp.devPrivate.ptr = NULL;
738
739		assert(tmp.drawable.width);
740		assert(tmp.drawable.height);
741
742		DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n",
743		     __FUNCTION__,
744		     extents.x1, extents.y1,
745		     tmp.drawable.width, tmp.drawable.height,
746		     sna->render.max_3d_size, sna->render.max_3d_size));
747		if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) {
748			BoxRec tile, stack[64], *clipped, *c;
749			int cpp, step;
750
751tile:
752			cpp = dst->drawable.bitsPerPixel / 8;
753			step = MIN(sna->render.max_3d_size,
754				   (MAXSHORT&~63) / cpp);
755			while (step * step * cpp > sna->kgem.max_upload_tile_size)
756				step /= 2;
757
758			if (step * cpp > 4096)
759				step = 4096 / cpp;
760			assert(step);
761
762			DBG(("%s: tiling upload, using %dx%d tiles\n",
763			     __FUNCTION__, step, step));
764
765			if (n > ARRAY_SIZE(stack)) {
766				clipped = malloc(sizeof(BoxRec) * n);
767				if (clipped == NULL)
768					goto fallback;
769			} else
770				clipped = stack;
771
772			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
773				int y2 = tile.y1 + step;
774				if (y2 > extents.y2)
775					y2 = extents.y2;
776				tile.y2 = y2;
777
778				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
779					int x2 = tile.x1 + step;
780					if (x2 > extents.x2)
781						x2 = extents.x2;
782					tile.x2 = x2;
783
784					tmp.drawable.width  = tile.x2 - tile.x1;
785					tmp.drawable.height = tile.y2 - tile.y1;
786
787					src_bo = kgem_create_buffer_2d(kgem,
788								       tmp.drawable.width,
789								       tmp.drawable.height,
790								       tmp.drawable.bitsPerPixel,
791								       KGEM_BUFFER_WRITE_INPLACE,
792								       &ptr);
793					if (!src_bo) {
794						if (clipped != stack)
795							free(clipped);
796						goto fallback;
797					}
798
799					c = clipped;
800					for (n = 0; n < nbox; n++) {
801						*c = box[n];
802						if (!box_intersect(c, &tile))
803							continue;
804
805						DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
806						     __FUNCTION__,
807						     c->x1, c->y1,
808						     c->x2, c->y2,
809						     src_dx, src_dy,
810						     c->x1 - tile.x1,
811						     c->y1 - tile.y1));
812						memcpy_blt(src, ptr, tmp.drawable.bitsPerPixel,
813							   stride, src_bo->pitch,
814							   c->x1 + src_dx,
815							   c->y1 + src_dy,
816							   c->x1 - tile.x1,
817							   c->y1 - tile.y1,
818							   c->x2 - c->x1,
819							   c->y2 - c->y1);
820						c++;
821					}
822
823					if (c != clipped)
824						n = sna->render.copy_boxes(sna, GXcopy,
825									   &tmp, src_bo, -tile.x1, -tile.y1,
826									   dst, dst_bo, dst_dx, dst_dy,
827									   clipped, c - clipped, 0);
828					else
829						n = 1;
830
831					kgem_bo_destroy(&sna->kgem, src_bo);
832
833					if (!n) {
834						if (clipped != stack)
835							free(clipped);
836						goto fallback;
837					}
838				}
839			}
840
841			if (clipped != stack)
842				free(clipped);
843		} else {
844			src_bo = kgem_create_buffer_2d(kgem,
845						       tmp.drawable.width,
846						       tmp.drawable.height,
847						       tmp.drawable.bitsPerPixel,
848						       KGEM_BUFFER_WRITE_INPLACE,
849						       &ptr);
850			if (!src_bo)
851				goto fallback;
852
853			for (n = 0; n < nbox; n++) {
854				DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
855				     __FUNCTION__,
856				     box[n].x1, box[n].y1,
857				     box[n].x2, box[n].y2,
858				     src_dx, src_dy,
859				     box[n].x1 - extents.x1,
860				     box[n].y1 - extents.y1));
861				memcpy_blt(src, ptr, tmp.drawable.bitsPerPixel,
862					   stride, src_bo->pitch,
863					   box[n].x1 + src_dx,
864					   box[n].y1 + src_dy,
865					   box[n].x1 - extents.x1,
866					   box[n].y1 - extents.y1,
867					   box[n].x2 - box[n].x1,
868					   box[n].y2 - box[n].y1);
869			}
870
871			n = sna->render.copy_boxes(sna, GXcopy,
872						   &tmp, src_bo, -extents.x1, -extents.y1,
873						   dst, dst_bo, dst_dx, dst_dy,
874						   box, nbox, 0);
875
876			kgem_bo_destroy(&sna->kgem, src_bo);
877
878			if (!n)
879				goto tile;
880		}
881
882		return true;
883	}
884
885	cmd = XY_SRC_COPY_BLT_CMD;
886	br13 = dst_bo->pitch;
887	if (kgem->gen >= 040 && dst_bo->tiling) {
888		cmd |= BLT_DST_TILED;
889		br13 >>= 2;
890	}
891	br13 |= 0xcc << 16;
892	switch (dst->drawable.bitsPerPixel) {
893	default:
894	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
895		 br13 |= 1 << 25; /* RGB8888 */
896	case 16: br13 |= 1 << 24; /* RGB565 */
897	case 8: break;
898	}
899
900	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
901	if (!kgem_check_batch(kgem, 8) ||
902	    !kgem_check_reloc_and_exec(kgem, 2) ||
903	    !kgem_check_bo_fenced(kgem, dst_bo)) {
904		kgem_submit(kgem);
905		if (!kgem_check_bo_fenced(kgem, dst_bo))
906			goto fallback;
907		_kgem_set_mode(kgem, KGEM_BLT);
908	}
909
910	do {
911		int nbox_this_time;
912
913		nbox_this_time = nbox;
914		if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
915			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
916		if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
917			nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
918		assert(nbox_this_time);
919		nbox -= nbox_this_time;
920
921		/* Count the total number of bytes to be read and allocate a
922		 * single buffer large enough. Or if it is very small, combine
923		 * with other allocations. */
924		offset = 0;
925		for (n = 0; n < nbox_this_time; n++) {
926			int height = box[n].y2 - box[n].y1;
927			int width = box[n].x2 - box[n].x1;
928			offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
929		}
930
931		src_bo = kgem_create_buffer(kgem, offset,
932					    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
933					    &ptr);
934		if (!src_bo)
935			break;
936
937		offset = 0;
938		do {
939			int height = box->y2 - box->y1;
940			int width = box->x2 - box->x1;
941			int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
942			uint32_t *b;
943
944			DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
945			     __FUNCTION__,
946			     box->x1 + src_dx, box->y1 + src_dy,
947			     box->x1 + dst_dx, box->y1 + dst_dy,
948			     width, height,
949			     offset, pitch));
950
951			assert(box->x1 + src_dx >= 0);
952			assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
953			assert(box->y1 + src_dy >= 0);
954
955			assert(box->x1 + dst_dx >= 0);
956			assert(box->y1 + dst_dy >= 0);
957
958			memcpy_blt(src, (char *)ptr + offset,
959				   dst->drawable.bitsPerPixel,
960				   stride, pitch,
961				   box->x1 + src_dx, box->y1 + src_dy,
962				   0, 0,
963				   width, height);
964
965			b = kgem->batch + kgem->nbatch;
966			b[0] = cmd;
967			b[1] = br13;
968			b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
969			b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
970			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
971					      I915_GEM_DOMAIN_RENDER << 16 |
972					      I915_GEM_DOMAIN_RENDER |
973					      KGEM_RELOC_FENCED,
974					      0);
975			b[5] = 0;
976			b[6] = pitch;
977			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
978					      I915_GEM_DOMAIN_RENDER << 16 |
979					      KGEM_RELOC_FENCED,
980					      offset);
981			kgem->nbatch += 8;
982
983			box++;
984			offset += pitch * height;
985		} while (--nbox_this_time);
986		assert(offset == __kgem_buffer_size(src_bo));
987
988		if (nbox) {
989			_kgem_submit(kgem);
990			_kgem_set_mode(kgem, KGEM_BLT);
991		}
992
993		kgem_bo_destroy(kgem, src_bo);
994	} while (nbox);
995
996	sna->blt_state.fill_bo = 0;
997	return true;
998
999fallback:
1000	return write_boxes_inplace(kgem,
1001				   src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1002				   dst_bo, dst_dx, dst_dy,
1003				   box, nbox);
1004}
1005
1006static void
1007write_boxes_inplace__xor(struct kgem *kgem,
1008			 const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
1009			 struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
1010			 const BoxRec *box, int n,
1011			 uint32_t and, uint32_t or)
1012{
1013	void *dst;
1014
1015	DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
1016
1017	kgem_bo_submit(kgem, bo);
1018
1019	dst = kgem_bo_map(kgem, bo);
1020	if (dst == NULL)
1021		return;
1022
1023	do {
1024		DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
1025		     box->x1 + src_dx, box->y1 + src_dy,
1026		     box->x1 + dst_dx, box->y1 + dst_dy,
1027		     box->x2 - box->x1, box->y2 - box->y1,
1028		     bpp, stride, bo->pitch));
1029
1030		assert(box->x2 > box->x1);
1031		assert(box->y2 > box->y1);
1032
1033		assert(box->x1 + dst_dx >= 0);
1034		assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
1035		assert(box->y1 + dst_dy >= 0);
1036		assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
1037
1038		assert(box->x1 + src_dx >= 0);
1039		assert((box->x2 + src_dx)*bpp <= 8*stride);
1040		assert(box->y1 + src_dy >= 0);
1041
1042		memcpy_xor(src, dst, bpp,
1043			   stride, bo->pitch,
1044			   box->x1 + src_dx, box->y1 + src_dy,
1045			   box->x1 + dst_dx, box->y1 + dst_dy,
1046			   box->x2 - box->x1, box->y2 - box->y1,
1047			   and, or);
1048		box++;
1049	} while (--n);
1050}
1051
1052static bool upload_inplace__xor(struct kgem *kgem,
1053				struct kgem_bo *bo,
1054				const BoxRec *box,
1055				int n, int bpp)
1056{
1057	if (unlikely(kgem->wedged))
1058		return true;
1059
1060	if (!kgem_bo_can_map(kgem, bo))
1061		return false;
1062
1063	return __upload_inplace(kgem, bo, box, n, bpp);
1064}
1065
1066void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
1067			  struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
1068			  const void *src, int stride, int16_t src_dx, int16_t src_dy,
1069			  const BoxRec *box, int nbox,
1070			  uint32_t and, uint32_t or)
1071{
1072	struct kgem *kgem = &sna->kgem;
1073	struct kgem_bo *src_bo;
1074	BoxRec extents;
1075	bool can_blt;
1076	void *ptr;
1077	int offset;
1078	int n, cmd, br13;
1079
1080	DBG(("%s x %d\n", __FUNCTION__, nbox));
1081
1082	if (upload_inplace__xor(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel)) {
1083fallback:
1084		write_boxes_inplace__xor(kgem,
1085					 src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1086					 dst_bo, dst_dx, dst_dy,
1087					 box, nbox,
1088					 and, or);
1089		return;
1090	}
1091
1092	can_blt = kgem_bo_can_blt(kgem, dst_bo) &&
1093		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
1094	extents = box[0];
1095	for (n = 1; n < nbox; n++) {
1096		if (box[n].x1 < extents.x1)
1097			extents.x1 = box[n].x1;
1098		if (box[n].x2 > extents.x2)
1099			extents.x2 = box[n].x2;
1100
1101		if (can_blt)
1102			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
1103
1104		if (box[n].y1 < extents.y1)
1105			extents.y1 = box[n].y1;
1106		if (box[n].y2 > extents.y2)
1107			extents.y2 = box[n].y2;
1108	}
1109
1110	/* Try to avoid switching rings... */
1111	if (!can_blt || kgem->ring == KGEM_RENDER ||
1112	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
1113		PixmapRec tmp;
1114
1115		tmp.drawable.width  = extents.x2 - extents.x1;
1116		tmp.drawable.height = extents.y2 - extents.y1;
1117		tmp.drawable.depth  = dst->drawable.depth;
1118		tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel;
1119		tmp.devPrivate.ptr = NULL;
1120
1121		assert(tmp.drawable.width);
1122		assert(tmp.drawable.height);
1123
1124		DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n",
1125		     __FUNCTION__,
1126		     extents.x1, extents.y1,
1127		     tmp.drawable.width, tmp.drawable.height,
1128		     sna->render.max_3d_size, sna->render.max_3d_size));
1129		if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) {
1130			BoxRec tile, stack[64], *clipped, *c;
1131			int step;
1132
1133tile:
1134			step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel,
1135				   8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
1136			while (step * step * 4 > sna->kgem.max_upload_tile_size)
1137				step /= 2;
1138
1139			DBG(("%s: tiling upload, using %dx%d tiles\n",
1140			     __FUNCTION__, step, step));
1141			assert(step);
1142
1143			if (n > ARRAY_SIZE(stack)) {
1144				clipped = malloc(sizeof(BoxRec) * n);
1145				if (clipped == NULL)
1146					goto fallback;
1147			} else
1148				clipped = stack;
1149
1150			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
1151				int y2 = tile.y1 + step;
1152				if (y2 > extents.y2)
1153					y2 = extents.y2;
1154				tile.y2 = y2;
1155
1156				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
1157					int x2 = tile.x1 + step;
1158					if (x2 > extents.x2)
1159						x2 = extents.x2;
1160					tile.x2 = x2;
1161
1162					tmp.drawable.width  = tile.x2 - tile.x1;
1163					tmp.drawable.height = tile.y2 - tile.y1;
1164
1165					src_bo = kgem_create_buffer_2d(kgem,
1166								       tmp.drawable.width,
1167								       tmp.drawable.height,
1168								       tmp.drawable.bitsPerPixel,
1169								       KGEM_BUFFER_WRITE_INPLACE,
1170								       &ptr);
1171					if (!src_bo) {
1172						if (clipped != stack)
1173							free(clipped);
1174						goto fallback;
1175					}
1176
1177					c = clipped;
1178					for (n = 0; n < nbox; n++) {
1179						*c = box[n];
1180						if (!box_intersect(c, &tile))
1181							continue;
1182
1183						DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1184						     __FUNCTION__,
1185						     c->x1, c->y1,
1186						     c->x2, c->y2,
1187						     src_dx, src_dy,
1188						     c->x1 - tile.x1,
1189						     c->y1 - tile.y1));
1190						memcpy_xor(src, ptr, tmp.drawable.bitsPerPixel,
1191							   stride, src_bo->pitch,
1192							   c->x1 + src_dx,
1193							   c->y1 + src_dy,
1194							   c->x1 - tile.x1,
1195							   c->y1 - tile.y1,
1196							   c->x2 - c->x1,
1197							   c->y2 - c->y1,
1198							   and, or);
1199						c++;
1200					}
1201
1202					if (c != clipped)
1203						n = sna->render.copy_boxes(sna, GXcopy,
1204									   &tmp, src_bo, -tile.x1, -tile.y1,
1205									   dst, dst_bo, dst_dx, dst_dy,
1206									   clipped, c - clipped, 0);
1207					else
1208						n = 1;
1209
1210					kgem_bo_destroy(&sna->kgem, src_bo);
1211
1212					if (!n) {
1213						if (clipped != stack)
1214							free(clipped);
1215						goto fallback;
1216					}
1217				}
1218			}
1219
1220			if (clipped != stack)
1221				free(clipped);
1222		} else {
1223			src_bo = kgem_create_buffer_2d(kgem,
1224						       tmp.drawable.width,
1225						       tmp.drawable.height,
1226						       tmp.drawable.bitsPerPixel,
1227						       KGEM_BUFFER_WRITE_INPLACE,
1228						       &ptr);
1229			if (!src_bo)
1230				goto fallback;
1231
1232			for (n = 0; n < nbox; n++) {
1233				DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1234				     __FUNCTION__,
1235				     box[n].x1, box[n].y1,
1236				     box[n].x2, box[n].y2,
1237				     src_dx, src_dy,
1238				     box[n].x1 - extents.x1,
1239				     box[n].y1 - extents.y1));
1240				memcpy_xor(src, ptr, tmp.drawable.bitsPerPixel,
1241					   stride, src_bo->pitch,
1242					   box[n].x1 + src_dx,
1243					   box[n].y1 + src_dy,
1244					   box[n].x1 - extents.x1,
1245					   box[n].y1 - extents.y1,
1246					   box[n].x2 - box[n].x1,
1247					   box[n].y2 - box[n].y1,
1248					   and, or);
1249			}
1250
1251			n = sna->render.copy_boxes(sna, GXcopy,
1252						   &tmp, src_bo, -extents.x1, -extents.y1,
1253						   dst, dst_bo, dst_dx, dst_dy,
1254						   box, nbox, 0);
1255
1256			kgem_bo_destroy(&sna->kgem, src_bo);
1257
1258			if (!n)
1259				goto tile;
1260		}
1261
1262		return;
1263	}
1264
1265	cmd = XY_SRC_COPY_BLT_CMD;
1266	br13 = dst_bo->pitch;
1267	if (kgem->gen >= 040 && dst_bo->tiling) {
1268		cmd |= BLT_DST_TILED;
1269		br13 >>= 2;
1270	}
1271	br13 |= 0xcc << 16;
1272	switch (dst->drawable.bitsPerPixel) {
1273	default:
1274	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
1275		 br13 |= 1 << 25; /* RGB8888 */
1276	case 16: br13 |= 1 << 24; /* RGB565 */
1277	case 8: break;
1278	}
1279
1280	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
1281	if (!kgem_check_batch(kgem, 8) ||
1282	    !kgem_check_reloc_and_exec(kgem, 2) ||
1283	    !kgem_check_bo_fenced(kgem, dst_bo)) {
1284		kgem_submit(kgem);
1285		if (!kgem_check_bo_fenced(kgem, dst_bo))
1286			goto fallback;
1287		_kgem_set_mode(kgem, KGEM_BLT);
1288	}
1289
1290	do {
1291		int nbox_this_time;
1292
1293		nbox_this_time = nbox;
1294		if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
1295			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
1296		if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1297			nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1298		assert(nbox_this_time);
1299		nbox -= nbox_this_time;
1300
1301		/* Count the total number of bytes to be read and allocate a
1302		 * single buffer large enough. Or if it is very small, combine
1303		 * with other allocations. */
1304		offset = 0;
1305		for (n = 0; n < nbox_this_time; n++) {
1306			int height = box[n].y2 - box[n].y1;
1307			int width = box[n].x2 - box[n].x1;
1308			offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1309		}
1310
1311		src_bo = kgem_create_buffer(kgem, offset,
1312					    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1313					    &ptr);
1314		if (!src_bo)
1315			break;
1316
1317		offset = 0;
1318		do {
1319			int height = box->y2 - box->y1;
1320			int width = box->x2 - box->x1;
1321			int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1322			uint32_t *b;
1323
1324			DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1325			     __FUNCTION__,
1326			     box->x1 + src_dx, box->y1 + src_dy,
1327			     box->x1 + dst_dx, box->y1 + dst_dy,
1328			     width, height,
1329			     offset, pitch));
1330
1331			assert(box->x1 + src_dx >= 0);
1332			assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1333			assert(box->y1 + src_dy >= 0);
1334
1335			assert(box->x1 + dst_dx >= 0);
1336			assert(box->y1 + dst_dy >= 0);
1337
1338			memcpy_xor(src, (char *)ptr + offset,
1339				   dst->drawable.bitsPerPixel,
1340				   stride, pitch,
1341				   box->x1 + src_dx, box->y1 + src_dy,
1342				   0, 0,
1343				   width, height,
1344				   and, or);
1345
1346			b = kgem->batch + kgem->nbatch;
1347			b[0] = cmd;
1348			b[1] = br13;
1349			b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1350			b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1351			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1352					      I915_GEM_DOMAIN_RENDER << 16 |
1353					      I915_GEM_DOMAIN_RENDER |
1354					      KGEM_RELOC_FENCED,
1355					      0);
1356			b[5] = 0;
1357			b[6] = pitch;
1358			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1359					      I915_GEM_DOMAIN_RENDER << 16 |
1360					      KGEM_RELOC_FENCED,
1361					      offset);
1362			kgem->nbatch += 8;
1363
1364			box++;
1365			offset += pitch * height;
1366		} while (--nbox_this_time);
1367		assert(offset == __kgem_buffer_size(src_bo));
1368
1369		if (nbox) {
1370			_kgem_submit(kgem);
1371			_kgem_set_mode(kgem, KGEM_BLT);
1372		}
1373
1374		kgem_bo_destroy(kgem, src_bo);
1375	} while (nbox);
1376
1377	sna->blt_state.fill_bo = 0;
1378}
1379
1380static bool
1381indirect_replace(struct sna *sna,
1382		 PixmapPtr pixmap,
1383		 struct kgem_bo *bo,
1384		 const void *src, int stride)
1385{
1386	struct kgem *kgem = &sna->kgem;
1387	struct kgem_bo *src_bo;
1388	BoxRec box;
1389	void *ptr;
1390	bool ret;
1391
1392	DBG(("%s: size=%d vs %d\n",
1393	     __FUNCTION__,
1394	     (int)pixmap->devKind * pixmap->drawable.height >> 12,
1395	     kgem->half_cpu_cache_pages));
1396
1397	if ((int)pixmap->devKind * pixmap->drawable.height >> 12 > kgem->half_cpu_cache_pages)
1398		return false;
1399
1400	if (!kgem_bo_can_blt(kgem, bo) &&
1401	    must_tile(sna, pixmap->drawable.width, pixmap->drawable.height))
1402		return false;
1403
1404	src_bo = kgem_create_buffer_2d(kgem,
1405				       pixmap->drawable.width,
1406				       pixmap->drawable.height,
1407				       pixmap->drawable.bitsPerPixel,
1408				       KGEM_BUFFER_WRITE_INPLACE,
1409				       &ptr);
1410	if (!src_bo)
1411		return false;
1412
1413	memcpy_blt(src, ptr, pixmap->drawable.bitsPerPixel,
1414		   stride, src_bo->pitch,
1415		   0, 0,
1416		   0, 0,
1417		   pixmap->drawable.width,
1418		   pixmap->drawable.height);
1419
1420	box.x1 = box.y1 = 0;
1421	box.x2 = pixmap->drawable.width;
1422	box.y2 = pixmap->drawable.height;
1423
1424	ret = sna->render.copy_boxes(sna, GXcopy,
1425				     pixmap, src_bo, 0, 0,
1426				     pixmap, bo, 0, 0,
1427				     &box, 1, 0);
1428
1429	kgem_bo_destroy(kgem, src_bo);
1430
1431	return ret;
1432}
1433
1434bool sna_replace(struct sna *sna,
1435		 PixmapPtr pixmap,
1436		 struct kgem_bo **_bo,
1437		 const void *src, int stride)
1438{
1439	struct kgem_bo *bo = *_bo;
1440	struct kgem *kgem = &sna->kgem;
1441	void *dst;
1442
1443	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d) busy?=%d\n",
1444	     __FUNCTION__, bo->handle,
1445	     pixmap->drawable.width,
1446	     pixmap->drawable.height,
1447	     pixmap->drawable.bitsPerPixel,
1448	     bo->tiling,
1449	     __kgem_bo_is_busy(kgem, bo)));
1450
1451	assert(!sna_pixmap(pixmap)->pinned);
1452
1453	kgem_bo_undo(kgem, bo);
1454
1455	if (__kgem_bo_is_busy(kgem, bo)) {
1456		struct kgem_bo *new_bo;
1457
1458		if (indirect_replace(sna, pixmap, bo, src, stride))
1459			return true;
1460
1461		new_bo = kgem_create_2d(kgem,
1462					pixmap->drawable.width,
1463					pixmap->drawable.height,
1464					pixmap->drawable.bitsPerPixel,
1465					bo->tiling,
1466					CREATE_GTT_MAP | CREATE_INACTIVE);
1467		if (new_bo)
1468			bo = new_bo;
1469	}
1470
1471	if (bo->tiling == I915_TILING_NONE && bo->pitch == stride &&
1472	    kgem_bo_write(kgem, bo, src,
1473			  (pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8))
1474			goto done;
1475
1476	if (upload_inplace__tiled(kgem, bo)) {
1477		BoxRec box;
1478
1479		box.x1 = box.y1 = 0;
1480		box.x2 = pixmap->drawable.width;
1481		box.y2 = pixmap->drawable.height;
1482
1483		if (write_boxes_inplace__tiled(kgem, src,
1484					       stride, pixmap->drawable.bitsPerPixel, 0, 0,
1485					       bo, 0, 0, &box, 1))
1486			goto done;
1487	}
1488
1489	if (kgem_bo_is_mappable(kgem, bo) &&
1490	    (dst = kgem_bo_map(kgem, bo)) != NULL) {
1491		memcpy_blt(src, dst, pixmap->drawable.bitsPerPixel,
1492			   stride, bo->pitch,
1493			   0, 0,
1494			   0, 0,
1495			   pixmap->drawable.width,
1496			   pixmap->drawable.height);
1497	} else {
1498		BoxRec box;
1499
1500		box.x1 = box.y1 = 0;
1501		box.x2 = pixmap->drawable.width;
1502		box.y2 = pixmap->drawable.height;
1503
1504		if (!sna_write_boxes(sna, pixmap,
1505				     bo, 0, 0,
1506				     src, stride, 0, 0,
1507				     &box, 1))
1508			goto err;
1509	}
1510
1511done:
1512	if (bo != *_bo)
1513		kgem_bo_destroy(kgem, *_bo);
1514	*_bo = bo;
1515	return true;
1516
1517err:
1518	if (bo != *_bo)
1519		kgem_bo_destroy(kgem, bo);
1520	return false;
1521}
1522
1523struct kgem_bo *sna_replace__xor(struct sna *sna,
1524				 PixmapPtr pixmap,
1525				 struct kgem_bo *bo,
1526				 const void *src, int stride,
1527				 uint32_t and, uint32_t or)
1528{
1529	struct kgem *kgem = &sna->kgem;
1530	void *dst;
1531
1532	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d)\n",
1533	     __FUNCTION__, bo->handle,
1534	     pixmap->drawable.width,
1535	     pixmap->drawable.height,
1536	     pixmap->drawable.bitsPerPixel,
1537	     bo->tiling));
1538
1539	assert(!sna_pixmap(pixmap)->pinned);
1540
1541	if (kgem_bo_is_busy(bo)) {
1542		struct kgem_bo *new_bo;
1543
1544		new_bo = kgem_create_2d(kgem,
1545					pixmap->drawable.width,
1546					pixmap->drawable.height,
1547					pixmap->drawable.bitsPerPixel,
1548					bo->tiling,
1549					CREATE_GTT_MAP | CREATE_INACTIVE);
1550		if (new_bo) {
1551			kgem_bo_destroy(kgem, bo);
1552			bo = new_bo;
1553		}
1554	}
1555
1556	if (kgem_bo_is_mappable(kgem, bo)) {
1557		dst = kgem_bo_map(kgem, bo);
1558		if (dst) {
1559			memcpy_xor(src, dst, pixmap->drawable.bitsPerPixel,
1560				   stride, bo->pitch,
1561				   0, 0,
1562				   0, 0,
1563				   pixmap->drawable.width,
1564				   pixmap->drawable.height,
1565				   and, or);
1566		}
1567	} else {
1568		BoxRec box;
1569
1570		box.x1 = box.y1 = 0;
1571		box.x2 = pixmap->drawable.width;
1572		box.y2 = pixmap->drawable.height;
1573
1574		sna_write_boxes__xor(sna, pixmap,
1575				     bo, 0, 0,
1576				     src, stride, 0, 0,
1577				     &box, 1,
1578				     and, or);
1579	}
1580
1581	return bo;
1582}
1583