1428d7b3dSmrg/*
2428d7b3dSmrg * Copyright (c) 2011 Intel Corporation
3428d7b3dSmrg *
4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"),
6428d7b3dSmrg * to deal in the Software without restriction, including without limitation
7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the
9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions:
10428d7b3dSmrg *
11428d7b3dSmrg * The above copyright notice and this permission notice (including the next
12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the
13428d7b3dSmrg * Software.
14428d7b3dSmrg *
15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21428d7b3dSmrg * SOFTWARE.
22428d7b3dSmrg *
23428d7b3dSmrg * Authors:
24428d7b3dSmrg *    Chris Wilson <chris@chris-wilson.co.uk>
25428d7b3dSmrg *
26428d7b3dSmrg */
27428d7b3dSmrg
28428d7b3dSmrg#ifdef HAVE_CONFIG_H
29428d7b3dSmrg#include "config.h"
30428d7b3dSmrg#endif
31428d7b3dSmrg
32428d7b3dSmrg#include "sna.h"
33428d7b3dSmrg#include "sna_render.h"
34428d7b3dSmrg#include "sna_reg.h"
35428d7b3dSmrg
36428d7b3dSmrg#include <sys/mman.h>
37428d7b3dSmrg
38428d7b3dSmrg#define PITCH(x, y) ALIGN((x)*(y), 4)
39428d7b3dSmrg
40428d7b3dSmrg#define FORCE_INPLACE 0 /* 1 upload directly, -1 force indirect */
41428d7b3dSmrg
42428d7b3dSmrg/* XXX Need to avoid using GTT fenced access for I915_TILING_Y on 855GM */
43428d7b3dSmrg
44428d7b3dSmrgstatic inline bool upload_too_large(struct sna *sna, int width, int height)
45428d7b3dSmrg{
46428d7b3dSmrg	return width * height * 4 > sna->kgem.max_upload_tile_size;
47428d7b3dSmrg}
48428d7b3dSmrg
49428d7b3dSmrgstatic inline bool must_tile(struct sna *sna, int width, int height)
50428d7b3dSmrg{
51428d7b3dSmrg	return (width  > sna->render.max_3d_size ||
52428d7b3dSmrg		height > sna->render.max_3d_size ||
53428d7b3dSmrg		upload_too_large(sna, width, height));
54428d7b3dSmrg}
55428d7b3dSmrg
56428d7b3dSmrgstatic bool download_inplace__cpu(struct kgem *kgem,
57428d7b3dSmrg				  PixmapPtr p, struct kgem_bo *bo,
58428d7b3dSmrg				  const BoxRec *box, int nbox)
59428d7b3dSmrg{
60428d7b3dSmrg	BoxRec extents;
61428d7b3dSmrg
62428d7b3dSmrg	switch (bo->tiling) {
63428d7b3dSmrg	case I915_TILING_X:
64428d7b3dSmrg		if (!kgem->memcpy_from_tiled_x)
65428d7b3dSmrg			return false;
66428d7b3dSmrg	case I915_TILING_NONE:
67428d7b3dSmrg		break;
68428d7b3dSmrg	default:
69428d7b3dSmrg		return false;
70428d7b3dSmrg	}
71428d7b3dSmrg
72428d7b3dSmrg	if (!kgem_bo_can_map__cpu(kgem, bo, false))
73428d7b3dSmrg		return false;
74428d7b3dSmrg
75428d7b3dSmrg	if (kgem->has_llc)
76428d7b3dSmrg		return true;
77428d7b3dSmrg
78428d7b3dSmrg	extents = *box;
79428d7b3dSmrg	while (--nbox) {
80428d7b3dSmrg		++box;
81428d7b3dSmrg		if (box->x1 < extents.x1)
82428d7b3dSmrg			extents.x1 = box->x1;
83428d7b3dSmrg		if (box->x2 > extents.x2)
84428d7b3dSmrg			extents.x2 = box->x2;
85428d7b3dSmrg		extents.y2 = box->y2;
86428d7b3dSmrg	}
87428d7b3dSmrg
88428d7b3dSmrg	if (extents.x2 - extents.x1 == p->drawable.width &&
89428d7b3dSmrg	    extents.y2 - extents.y1 == p->drawable.height)
90428d7b3dSmrg		return true;
91428d7b3dSmrg
92428d7b3dSmrg	return __kgem_bo_size(bo) <= PAGE_SIZE;
93428d7b3dSmrg}
94428d7b3dSmrg
95428d7b3dSmrgstatic bool
96428d7b3dSmrgread_boxes_inplace__cpu(struct kgem *kgem,
97428d7b3dSmrg			PixmapPtr pixmap, struct kgem_bo *bo,
98428d7b3dSmrg			const BoxRec *box, int n)
99428d7b3dSmrg{
100428d7b3dSmrg	int bpp = pixmap->drawable.bitsPerPixel;
101428d7b3dSmrg	void *src, *dst = pixmap->devPrivate.ptr;
102428d7b3dSmrg	int src_pitch = bo->pitch;
103428d7b3dSmrg	int dst_pitch = pixmap->devKind;
104428d7b3dSmrg
105428d7b3dSmrg	if (!download_inplace__cpu(kgem, dst, bo, box, n))
106428d7b3dSmrg		return false;
107428d7b3dSmrg
108428d7b3dSmrg	assert(kgem_bo_can_map__cpu(kgem, bo, false));
109428d7b3dSmrg	assert(bo->tiling != I915_TILING_Y);
110428d7b3dSmrg
111428d7b3dSmrg	src = kgem_bo_map__cpu(kgem, bo);
112428d7b3dSmrg	if (src == NULL)
113428d7b3dSmrg		return false;
114428d7b3dSmrg
115428d7b3dSmrg	kgem_bo_sync__cpu_full(kgem, bo, 0);
116428d7b3dSmrg
117428d7b3dSmrg	if (sigtrap_get())
118428d7b3dSmrg		return false;
119428d7b3dSmrg
120428d7b3dSmrg	DBG(("%s x %d\n", __FUNCTION__, n));
121428d7b3dSmrg
122428d7b3dSmrg	if (bo->tiling == I915_TILING_X) {
123428d7b3dSmrg		do {
124428d7b3dSmrg			memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch,
125428d7b3dSmrg					    box->x1, box->y1,
126428d7b3dSmrg					    box->x1, box->y1,
127428d7b3dSmrg					    box->x2 - box->x1, box->y2 - box->y1);
128428d7b3dSmrg			box++;
129428d7b3dSmrg		} while (--n);
130428d7b3dSmrg	} else {
131428d7b3dSmrg		do {
132428d7b3dSmrg			memcpy_blt(src, dst, bpp, src_pitch, dst_pitch,
133428d7b3dSmrg				   box->x1, box->y1,
134428d7b3dSmrg				   box->x1, box->y1,
135428d7b3dSmrg				   box->x2 - box->x1, box->y2 - box->y1);
136428d7b3dSmrg			box++;
137428d7b3dSmrg		} while (--n);
138428d7b3dSmrg	}
139428d7b3dSmrg
140428d7b3dSmrg	sigtrap_put();
141428d7b3dSmrg	return true;
142428d7b3dSmrg}
143428d7b3dSmrg
144428d7b3dSmrgstatic void read_boxes_inplace(struct kgem *kgem,
145428d7b3dSmrg			       PixmapPtr pixmap, struct kgem_bo *bo,
146428d7b3dSmrg			       const BoxRec *box, int n)
147428d7b3dSmrg{
148428d7b3dSmrg	int bpp = pixmap->drawable.bitsPerPixel;
149428d7b3dSmrg	void *src, *dst = pixmap->devPrivate.ptr;
150428d7b3dSmrg	int src_pitch = bo->pitch;
151428d7b3dSmrg	int dst_pitch = pixmap->devKind;
152428d7b3dSmrg
153428d7b3dSmrg	if (read_boxes_inplace__cpu(kgem, pixmap, bo, box, n))
154428d7b3dSmrg		return;
155428d7b3dSmrg
156428d7b3dSmrg	DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
157428d7b3dSmrg
158428d7b3dSmrg	if (!kgem_bo_can_map(kgem, bo))
159428d7b3dSmrg		return;
160428d7b3dSmrg
161428d7b3dSmrg	kgem_bo_submit(kgem, bo);
162428d7b3dSmrg
163428d7b3dSmrg	src = kgem_bo_map(kgem, bo);
164428d7b3dSmrg	if (src == NULL)
165428d7b3dSmrg		return;
166428d7b3dSmrg
167428d7b3dSmrg	if (sigtrap_get())
168428d7b3dSmrg		return;
169428d7b3dSmrg
170428d7b3dSmrg	assert(src != dst);
171428d7b3dSmrg	do {
172428d7b3dSmrg		DBG(("%s: copying box (%d, %d), (%d, %d)\n",
173428d7b3dSmrg		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
174428d7b3dSmrg
175428d7b3dSmrg		assert(box->x2 > box->x1);
176428d7b3dSmrg		assert(box->y2 > box->y1);
177428d7b3dSmrg
178428d7b3dSmrg		assert(box->x1 >= 0);
179428d7b3dSmrg		assert(box->y1 >= 0);
180428d7b3dSmrg		assert(box->x2 <= pixmap->drawable.width);
181428d7b3dSmrg		assert(box->y2 <= pixmap->drawable.height);
182428d7b3dSmrg
183428d7b3dSmrg		assert(box->x1 >= 0);
184428d7b3dSmrg		assert(box->y1 >= 0);
185428d7b3dSmrg		assert(box->x2 <= pixmap->drawable.width);
186428d7b3dSmrg		assert(box->y2 <= pixmap->drawable.height);
187428d7b3dSmrg
188428d7b3dSmrg		memcpy_blt(src, dst, bpp,
189428d7b3dSmrg			   src_pitch, dst_pitch,
190428d7b3dSmrg			   box->x1, box->y1,
191428d7b3dSmrg			   box->x1, box->y1,
192428d7b3dSmrg			   box->x2 - box->x1, box->y2 - box->y1);
193428d7b3dSmrg		box++;
194428d7b3dSmrg	} while (--n);
195428d7b3dSmrg
196428d7b3dSmrg	sigtrap_put();
197428d7b3dSmrg}
198428d7b3dSmrg
199428d7b3dSmrgstatic bool download_inplace(struct kgem *kgem,
200428d7b3dSmrg			     PixmapPtr p, struct kgem_bo *bo,
201428d7b3dSmrg			     const BoxRec *box, int nbox)
202428d7b3dSmrg{
203428d7b3dSmrg	bool cpu;
204428d7b3dSmrg
205428d7b3dSmrg	if (unlikely(kgem->wedged))
206428d7b3dSmrg		return true;
207428d7b3dSmrg
208428d7b3dSmrg	cpu = download_inplace__cpu(kgem, p, bo, box, nbox);
209428d7b3dSmrg	if (!cpu && !kgem_bo_can_map(kgem, bo))
210428d7b3dSmrg		return false;
211428d7b3dSmrg
212428d7b3dSmrg	if (FORCE_INPLACE)
213428d7b3dSmrg		return FORCE_INPLACE > 0;
214428d7b3dSmrg
215428d7b3dSmrg	if (cpu)
216428d7b3dSmrg		return true;
217428d7b3dSmrg
218428d7b3dSmrg	if (kgem->can_blt_cpu && kgem->max_cpu_size)
219428d7b3dSmrg		return false;
220428d7b3dSmrg
221428d7b3dSmrg	return !__kgem_bo_is_busy(kgem, bo);
222428d7b3dSmrg}
223428d7b3dSmrg
224428d7b3dSmrgvoid sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo,
225428d7b3dSmrg		    const BoxRec *box, int nbox)
226428d7b3dSmrg{
227428d7b3dSmrg	struct kgem *kgem = &sna->kgem;
228428d7b3dSmrg	struct kgem_bo *dst_bo;
229428d7b3dSmrg	BoxRec extents;
230428d7b3dSmrg	const BoxRec *tmp_box;
231428d7b3dSmrg	int tmp_nbox;
232428d7b3dSmrg	void *ptr;
233428d7b3dSmrg	int src_pitch, cpp, offset;
234428d7b3dSmrg	int n, cmd, br13;
235428d7b3dSmrg	bool can_blt;
236428d7b3dSmrg
237428d7b3dSmrg	DBG(("%s x %d, src=(handle=%d), dst=(size=(%d, %d)\n",
238428d7b3dSmrg	     __FUNCTION__, nbox, src_bo->handle,
239428d7b3dSmrg	     dst->drawable.width, dst->drawable.height));
240428d7b3dSmrg
241428d7b3dSmrg#ifndef NDEBUG
242428d7b3dSmrg	for (n = 0; n < nbox; n++) {
243428d7b3dSmrg		if (box[n].x1 < 0 || box[n].y1 < 0 ||
244428d7b3dSmrg		    box[n].x2 * dst->drawable.bitsPerPixel/8 > src_bo->pitch ||
245428d7b3dSmrg		    box[n].y2 * src_bo->pitch > kgem_bo_size(src_bo))
246428d7b3dSmrg		{
247428d7b3dSmrg			FatalError("source out-of-bounds box[%d]=(%d, %d), (%d, %d), pitch=%d, size=%d\n", n,
248428d7b3dSmrg				   box[n].x1, box[n].y1,
249428d7b3dSmrg				   box[n].x2, box[n].y2,
250428d7b3dSmrg				   src_bo->pitch, kgem_bo_size(src_bo));
251428d7b3dSmrg		}
252428d7b3dSmrg	}
253428d7b3dSmrg#endif
254428d7b3dSmrg
255428d7b3dSmrg	/* XXX The gpu is faster to perform detiling in bulk, but takes
256428d7b3dSmrg	 * longer to setup and retrieve the results, with an additional
257428d7b3dSmrg	 * copy. The long term solution is to use snoopable bo and avoid
258428d7b3dSmrg	 * this path.
259428d7b3dSmrg	 */
260428d7b3dSmrg
261428d7b3dSmrg	if (download_inplace(kgem, dst, src_bo, box, nbox)) {
262428d7b3dSmrgfallback:
263428d7b3dSmrg		read_boxes_inplace(kgem, dst, src_bo, box, nbox);
264428d7b3dSmrg		return;
265428d7b3dSmrg	}
266428d7b3dSmrg
267428d7b3dSmrg	can_blt = kgem_bo_can_blt(kgem, src_bo) &&
268428d7b3dSmrg		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
269428d7b3dSmrg	extents = box[0];
270428d7b3dSmrg	for (n = 1; n < nbox; n++) {
271428d7b3dSmrg		if (box[n].x1 < extents.x1)
272428d7b3dSmrg			extents.x1 = box[n].x1;
273428d7b3dSmrg		if (box[n].x2 > extents.x2)
274428d7b3dSmrg			extents.x2 = box[n].x2;
275428d7b3dSmrg
276428d7b3dSmrg		if (can_blt)
277428d7b3dSmrg			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
278428d7b3dSmrg
279428d7b3dSmrg		if (box[n].y1 < extents.y1)
280428d7b3dSmrg			extents.y1 = box[n].y1;
281428d7b3dSmrg		if (box[n].y2 > extents.y2)
282428d7b3dSmrg			extents.y2 = box[n].y2;
283428d7b3dSmrg	}
284428d7b3dSmrg	if (kgem_bo_can_map(kgem, src_bo)) {
285428d7b3dSmrg		/* Is it worth detiling? */
286428d7b3dSmrg		if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096)
287428d7b3dSmrg			goto fallback;
288428d7b3dSmrg	}
289428d7b3dSmrg
290428d7b3dSmrg	/* Try to avoid switching rings... */
291428d7b3dSmrg	if (!can_blt || kgem->ring == KGEM_RENDER ||
292428d7b3dSmrg	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
293428d7b3dSmrg		DrawableRec tmp;
294428d7b3dSmrg
295428d7b3dSmrg		tmp.width  = extents.x2 - extents.x1;
296428d7b3dSmrg		tmp.height = extents.y2 - extents.y1;
297428d7b3dSmrg		tmp.depth  = dst->drawable.depth;
298428d7b3dSmrg		tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
299428d7b3dSmrg
300428d7b3dSmrg		assert(tmp.width);
301428d7b3dSmrg		assert(tmp.height);
302428d7b3dSmrg
303428d7b3dSmrg		if (must_tile(sna, tmp.width, tmp.height)) {
304428d7b3dSmrg			BoxRec tile, stack[64], *clipped, *c;
305428d7b3dSmrg			int step;
306428d7b3dSmrg
307428d7b3dSmrg			if (n > ARRAY_SIZE(stack)) {
308428d7b3dSmrg				clipped = malloc(sizeof(BoxRec) * n);
309428d7b3dSmrg				if (clipped == NULL)
310428d7b3dSmrg					goto fallback;
311428d7b3dSmrg			} else
312428d7b3dSmrg				clipped = stack;
313428d7b3dSmrg
314428d7b3dSmrg			step = MIN(sna->render.max_3d_size,
315428d7b3dSmrg				   8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
316428d7b3dSmrg			while (step * step * 4 > sna->kgem.max_upload_tile_size)
317428d7b3dSmrg				step /= 2;
318428d7b3dSmrg
319428d7b3dSmrg			DBG(("%s: tiling download, using %dx%d tiles\n",
320428d7b3dSmrg			     __FUNCTION__, step, step));
321428d7b3dSmrg			assert(step);
322428d7b3dSmrg
323428d7b3dSmrg			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
324428d7b3dSmrg				int y2 = tile.y1 + step;
325428d7b3dSmrg				if (y2 > extents.y2)
326428d7b3dSmrg					y2 = extents.y2;
327428d7b3dSmrg				tile.y2 = y2;
328428d7b3dSmrg
329428d7b3dSmrg				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
330428d7b3dSmrg					int x2 = tile.x1 + step;
331428d7b3dSmrg					if (x2 > extents.x2)
332428d7b3dSmrg						x2 = extents.x2;
333428d7b3dSmrg					tile.x2 = x2;
334428d7b3dSmrg
335428d7b3dSmrg					tmp.width  = tile.x2 - tile.x1;
336428d7b3dSmrg					tmp.height = tile.y2 - tile.y1;
337428d7b3dSmrg
338428d7b3dSmrg					c = clipped;
339428d7b3dSmrg					for (n = 0; n < nbox; n++) {
340428d7b3dSmrg						*c = box[n];
341428d7b3dSmrg						if (!box_intersect(c, &tile))
342428d7b3dSmrg							continue;
343428d7b3dSmrg
344428d7b3dSmrg						DBG(("%s: box(%d, %d), (%d, %d),, dst=(%d, %d)\n",
345428d7b3dSmrg						     __FUNCTION__,
346428d7b3dSmrg						     c->x1, c->y1,
347428d7b3dSmrg						     c->x2, c->y2,
348428d7b3dSmrg						     c->x1 - tile.x1,
349428d7b3dSmrg						     c->y1 - tile.y1));
350428d7b3dSmrg						c++;
351428d7b3dSmrg					}
352428d7b3dSmrg					if (c == clipped)
353428d7b3dSmrg						continue;
354428d7b3dSmrg
355428d7b3dSmrg					dst_bo = kgem_create_buffer_2d(kgem,
356428d7b3dSmrg								       tmp.width,
357428d7b3dSmrg								       tmp.height,
358428d7b3dSmrg								       tmp.bitsPerPixel,
359428d7b3dSmrg								       KGEM_BUFFER_LAST,
360428d7b3dSmrg								       &ptr);
361428d7b3dSmrg					if (!dst_bo) {
362428d7b3dSmrg						if (clipped != stack)
363428d7b3dSmrg							free(clipped);
364428d7b3dSmrg						goto fallback;
365428d7b3dSmrg					}
366428d7b3dSmrg
367428d7b3dSmrg					if (!sna->render.copy_boxes(sna, GXcopy,
368428d7b3dSmrg								    &dst->drawable, src_bo, 0, 0,
369428d7b3dSmrg								    &tmp, dst_bo, -tile.x1, -tile.y1,
370428d7b3dSmrg								    clipped, c-clipped, COPY_LAST)) {
371428d7b3dSmrg						kgem_bo_destroy(&sna->kgem, dst_bo);
372428d7b3dSmrg						if (clipped != stack)
373428d7b3dSmrg							free(clipped);
374428d7b3dSmrg						goto fallback;
375428d7b3dSmrg					}
376428d7b3dSmrg
377428d7b3dSmrg					kgem_bo_submit(&sna->kgem, dst_bo);
378428d7b3dSmrg					kgem_buffer_read_sync(kgem, dst_bo);
379428d7b3dSmrg
380428d7b3dSmrg					if (sigtrap_get() == 0) {
381428d7b3dSmrg						while (c-- != clipped) {
382428d7b3dSmrg							memcpy_blt(ptr, dst->devPrivate.ptr, tmp.bitsPerPixel,
383428d7b3dSmrg								   dst_bo->pitch, dst->devKind,
384428d7b3dSmrg								   c->x1 - tile.x1,
385428d7b3dSmrg								   c->y1 - tile.y1,
386428d7b3dSmrg								   c->x1, c->y1,
387428d7b3dSmrg								   c->x2 - c->x1,
388428d7b3dSmrg								   c->y2 - c->y1);
389428d7b3dSmrg						}
390428d7b3dSmrg						sigtrap_put();
391428d7b3dSmrg					}
392428d7b3dSmrg
393428d7b3dSmrg					kgem_bo_destroy(&sna->kgem, dst_bo);
394428d7b3dSmrg				}
395428d7b3dSmrg			}
396428d7b3dSmrg
397428d7b3dSmrg			if (clipped != stack)
398428d7b3dSmrg				free(clipped);
399428d7b3dSmrg		} else {
400428d7b3dSmrg			dst_bo = kgem_create_buffer_2d(kgem,
401428d7b3dSmrg						       tmp.width,
402428d7b3dSmrg						       tmp.height,
403428d7b3dSmrg						       tmp.bitsPerPixel,
404428d7b3dSmrg						       KGEM_BUFFER_LAST,
405428d7b3dSmrg						       &ptr);
406428d7b3dSmrg			if (!dst_bo)
407428d7b3dSmrg				goto fallback;
408428d7b3dSmrg
409428d7b3dSmrg			if (!sna->render.copy_boxes(sna, GXcopy,
410428d7b3dSmrg						    &dst->drawable, src_bo, 0, 0,
411428d7b3dSmrg						    &tmp, dst_bo, -extents.x1, -extents.y1,
412428d7b3dSmrg						    box, nbox, COPY_LAST)) {
413428d7b3dSmrg				kgem_bo_destroy(&sna->kgem, dst_bo);
414428d7b3dSmrg				goto fallback;
415428d7b3dSmrg			}
416428d7b3dSmrg
417428d7b3dSmrg			kgem_bo_submit(&sna->kgem, dst_bo);
418428d7b3dSmrg			kgem_buffer_read_sync(kgem, dst_bo);
419428d7b3dSmrg
420428d7b3dSmrg			if (sigtrap_get() == 0) {
421428d7b3dSmrg				for (n = 0; n < nbox; n++) {
422428d7b3dSmrg					memcpy_blt(ptr, dst->devPrivate.ptr, tmp.bitsPerPixel,
423428d7b3dSmrg						   dst_bo->pitch, dst->devKind,
424428d7b3dSmrg						   box[n].x1 - extents.x1,
425428d7b3dSmrg						   box[n].y1 - extents.y1,
426428d7b3dSmrg						   box[n].x1, box[n].y1,
427428d7b3dSmrg						   box[n].x2 - box[n].x1,
428428d7b3dSmrg						   box[n].y2 - box[n].y1);
429428d7b3dSmrg				}
430428d7b3dSmrg				sigtrap_put();
431428d7b3dSmrg			}
432428d7b3dSmrg
433428d7b3dSmrg			kgem_bo_destroy(&sna->kgem, dst_bo);
434428d7b3dSmrg		}
435428d7b3dSmrg		return;
436428d7b3dSmrg	}
437428d7b3dSmrg
438428d7b3dSmrg	/* count the total number of bytes to be read and allocate a bo */
439428d7b3dSmrg	cpp = dst->drawable.bitsPerPixel / 8;
440428d7b3dSmrg	offset = 0;
441428d7b3dSmrg	for (n = 0; n < nbox; n++) {
442428d7b3dSmrg		int height = box[n].y2 - box[n].y1;
443428d7b3dSmrg		int width = box[n].x2 - box[n].x1;
444428d7b3dSmrg		offset += PITCH(width, cpp) * height;
445428d7b3dSmrg	}
446428d7b3dSmrg
447428d7b3dSmrg	DBG(("    read buffer size=%d\n", offset));
448428d7b3dSmrg
449428d7b3dSmrg	dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr);
450428d7b3dSmrg	if (!dst_bo) {
451428d7b3dSmrg		read_boxes_inplace(kgem, dst, src_bo, box, nbox);
452428d7b3dSmrg		return;
453428d7b3dSmrg	}
454428d7b3dSmrg
455428d7b3dSmrg	cmd = XY_SRC_COPY_BLT_CMD;
456428d7b3dSmrg	src_pitch = src_bo->pitch;
457428d7b3dSmrg	if (kgem->gen >= 040 && src_bo->tiling) {
458428d7b3dSmrg		cmd |= BLT_SRC_TILED;
459428d7b3dSmrg		src_pitch >>= 2;
460428d7b3dSmrg	}
461428d7b3dSmrg
462428d7b3dSmrg	br13 = 0xcc << 16;
463428d7b3dSmrg	switch (cpp) {
464428d7b3dSmrg	default:
465428d7b3dSmrg	case 4: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
466428d7b3dSmrg		br13 |= 1 << 25; /* RGB8888 */
467428d7b3dSmrg	case 2: br13 |= 1 << 24; /* RGB565 */
468428d7b3dSmrg	case 1: break;
469428d7b3dSmrg	}
470428d7b3dSmrg
471428d7b3dSmrg	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
472428d7b3dSmrg	if (!kgem_check_batch(kgem, 10) ||
473428d7b3dSmrg	    !kgem_check_reloc_and_exec(kgem, 2) ||
474428d7b3dSmrg	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
475428d7b3dSmrg		kgem_submit(kgem);
476428d7b3dSmrg		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL))
477428d7b3dSmrg			goto fallback;
478428d7b3dSmrg		_kgem_set_mode(kgem, KGEM_BLT);
479428d7b3dSmrg	}
480428d7b3dSmrg
481428d7b3dSmrg	tmp_nbox = nbox;
482428d7b3dSmrg	tmp_box = box;
483428d7b3dSmrg	offset = 0;
484428d7b3dSmrg	if (sna->kgem.gen >= 0100) {
485428d7b3dSmrg		cmd |= 8;
486428d7b3dSmrg		do {
487428d7b3dSmrg			int nbox_this_time, rem;
488428d7b3dSmrg
489428d7b3dSmrg			nbox_this_time = tmp_nbox;
490428d7b3dSmrg			rem = kgem_batch_space(kgem);
491428d7b3dSmrg			if (10*nbox_this_time > rem)
492428d7b3dSmrg				nbox_this_time = rem / 8;
493428d7b3dSmrg			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
494428d7b3dSmrg				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
495428d7b3dSmrg			assert(nbox_this_time);
496428d7b3dSmrg			tmp_nbox -= nbox_this_time;
497428d7b3dSmrg
498428d7b3dSmrg			assert(kgem->mode == KGEM_BLT);
499428d7b3dSmrg			for (n = 0; n < nbox_this_time; n++) {
500428d7b3dSmrg				int height = tmp_box[n].y2 - tmp_box[n].y1;
501428d7b3dSmrg				int width = tmp_box[n].x2 - tmp_box[n].x1;
502428d7b3dSmrg				int pitch = PITCH(width, cpp);
503428d7b3dSmrg				uint32_t *b = kgem->batch + kgem->nbatch;
504428d7b3dSmrg
505428d7b3dSmrg				DBG(("    blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
506428d7b3dSmrg				     offset,
507428d7b3dSmrg				     tmp_box[n].x1, tmp_box[n].y1,
508428d7b3dSmrg				     width, height, pitch));
509428d7b3dSmrg
510428d7b3dSmrg				assert(tmp_box[n].x1 >= 0);
511428d7b3dSmrg				assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
512428d7b3dSmrg				assert(tmp_box[n].y1 >= 0);
513428d7b3dSmrg				assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
514428d7b3dSmrg
515428d7b3dSmrg				b[0] = cmd;
516428d7b3dSmrg				b[1] = br13 | pitch;
517428d7b3dSmrg				b[2] = 0;
518428d7b3dSmrg				b[3] = height << 16 | width;
519428d7b3dSmrg				*(uint64_t *)(b+4) =
520428d7b3dSmrg					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
521428d7b3dSmrg							 I915_GEM_DOMAIN_RENDER << 16 |
522428d7b3dSmrg							 I915_GEM_DOMAIN_RENDER |
523428d7b3dSmrg							 KGEM_RELOC_FENCED,
524428d7b3dSmrg							 offset);
525428d7b3dSmrg				b[6] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
526428d7b3dSmrg				b[7] = src_pitch;
527428d7b3dSmrg				*(uint64_t *)(b+8) =
528428d7b3dSmrg					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
529428d7b3dSmrg							 I915_GEM_DOMAIN_RENDER << 16 |
530428d7b3dSmrg							 KGEM_RELOC_FENCED,
531428d7b3dSmrg							 0);
532428d7b3dSmrg				kgem->nbatch += 10;
533428d7b3dSmrg
534428d7b3dSmrg				offset += pitch * height;
535428d7b3dSmrg			}
536428d7b3dSmrg
537428d7b3dSmrg			_kgem_submit(kgem);
538428d7b3dSmrg			if (!tmp_nbox)
539428d7b3dSmrg				break;
540428d7b3dSmrg
541428d7b3dSmrg			_kgem_set_mode(kgem, KGEM_BLT);
542428d7b3dSmrg			tmp_box += nbox_this_time;
543428d7b3dSmrg		} while (1);
544428d7b3dSmrg	} else {
545428d7b3dSmrg		cmd |= 6;
546428d7b3dSmrg		do {
547428d7b3dSmrg			int nbox_this_time, rem;
548428d7b3dSmrg
549428d7b3dSmrg			nbox_this_time = tmp_nbox;
550428d7b3dSmrg			rem = kgem_batch_space(kgem);
551428d7b3dSmrg			if (8*nbox_this_time > rem)
552428d7b3dSmrg				nbox_this_time = rem / 8;
553428d7b3dSmrg			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
554428d7b3dSmrg				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
555428d7b3dSmrg			assert(nbox_this_time);
556428d7b3dSmrg			tmp_nbox -= nbox_this_time;
557428d7b3dSmrg
558428d7b3dSmrg			assert(kgem->mode == KGEM_BLT);
559428d7b3dSmrg			for (n = 0; n < nbox_this_time; n++) {
560428d7b3dSmrg				int height = tmp_box[n].y2 - tmp_box[n].y1;
561428d7b3dSmrg				int width = tmp_box[n].x2 - tmp_box[n].x1;
562428d7b3dSmrg				int pitch = PITCH(width, cpp);
563428d7b3dSmrg				uint32_t *b = kgem->batch + kgem->nbatch;
564428d7b3dSmrg
565428d7b3dSmrg				DBG(("    blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
566428d7b3dSmrg				     offset,
567428d7b3dSmrg				     tmp_box[n].x1, tmp_box[n].y1,
568428d7b3dSmrg				     width, height, pitch));
569428d7b3dSmrg
570428d7b3dSmrg				assert(tmp_box[n].x1 >= 0);
571428d7b3dSmrg				assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
572428d7b3dSmrg				assert(tmp_box[n].y1 >= 0);
573428d7b3dSmrg				assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
574428d7b3dSmrg
575428d7b3dSmrg				b[0] = cmd;
576428d7b3dSmrg				b[1] = br13 | pitch;
577428d7b3dSmrg				b[2] = 0;
578428d7b3dSmrg				b[3] = height << 16 | width;
579428d7b3dSmrg				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
580428d7b3dSmrg						      I915_GEM_DOMAIN_RENDER << 16 |
581428d7b3dSmrg						      I915_GEM_DOMAIN_RENDER |
582428d7b3dSmrg						      KGEM_RELOC_FENCED,
583428d7b3dSmrg						      offset);
584428d7b3dSmrg				b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
585428d7b3dSmrg				b[6] = src_pitch;
586428d7b3dSmrg				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
587428d7b3dSmrg						      I915_GEM_DOMAIN_RENDER << 16 |
588428d7b3dSmrg						      KGEM_RELOC_FENCED,
589428d7b3dSmrg						      0);
590428d7b3dSmrg				kgem->nbatch += 8;
591428d7b3dSmrg
592428d7b3dSmrg				offset += pitch * height;
593428d7b3dSmrg			}
594428d7b3dSmrg
595428d7b3dSmrg			_kgem_submit(kgem);
596428d7b3dSmrg			if (!tmp_nbox)
597428d7b3dSmrg				break;
598428d7b3dSmrg
599428d7b3dSmrg			_kgem_set_mode(kgem, KGEM_BLT);
600428d7b3dSmrg			tmp_box += nbox_this_time;
601428d7b3dSmrg		} while (1);
602428d7b3dSmrg	}
603428d7b3dSmrg	assert(offset == __kgem_buffer_size(dst_bo));
604428d7b3dSmrg
605428d7b3dSmrg	kgem_buffer_read_sync(kgem, dst_bo);
606428d7b3dSmrg
607428d7b3dSmrg	if (sigtrap_get() == 0) {
608428d7b3dSmrg		char *src = ptr;
609428d7b3dSmrg		do {
610428d7b3dSmrg			int height = box->y2 - box->y1;
611428d7b3dSmrg			int width  = box->x2 - box->x1;
612428d7b3dSmrg			int pitch = PITCH(width, cpp);
613428d7b3dSmrg
614428d7b3dSmrg			DBG(("    copy offset %lx [%08x...%08x...%08x]: (%d, %d) x (%d, %d), src pitch=%d, dst pitch=%d, bpp=%d\n",
615428d7b3dSmrg			     (long)((char *)src - (char *)ptr),
616428d7b3dSmrg			     *(uint32_t*)src, *(uint32_t*)(src+pitch*height/2 + pitch/2 - 4), *(uint32_t*)(src+pitch*height - 4),
617428d7b3dSmrg			     box->x1, box->y1,
618428d7b3dSmrg			     width, height,
619428d7b3dSmrg			     pitch, dst->devKind, cpp*8));
620428d7b3dSmrg
621428d7b3dSmrg			assert(box->x1 >= 0);
622428d7b3dSmrg			assert(box->x2 <= dst->drawable.width);
623428d7b3dSmrg			assert(box->y1 >= 0);
624428d7b3dSmrg			assert(box->y2 <= dst->drawable.height);
625428d7b3dSmrg
626428d7b3dSmrg			memcpy_blt(src, dst->devPrivate.ptr, cpp*8,
627428d7b3dSmrg				   pitch, dst->devKind,
628428d7b3dSmrg				   0, 0,
629428d7b3dSmrg				   box->x1, box->y1,
630428d7b3dSmrg				   width, height);
631428d7b3dSmrg			box++;
632428d7b3dSmrg
633428d7b3dSmrg			src += pitch * height;
634428d7b3dSmrg		} while (--nbox);
635428d7b3dSmrg		assert(src - (char *)ptr == __kgem_buffer_size(dst_bo));
636428d7b3dSmrg		sigtrap_put();
637428d7b3dSmrg	}
638428d7b3dSmrg	kgem_bo_destroy(kgem, dst_bo);
639428d7b3dSmrg	sna->blt_state.fill_bo = 0;
640428d7b3dSmrg}
641428d7b3dSmrg
642428d7b3dSmrgstatic bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
643428d7b3dSmrg{
644428d7b3dSmrg	DBG(("%s: tiling=%d\n", __FUNCTION__, bo->tiling));
645428d7b3dSmrg	switch (bo->tiling) {
646428d7b3dSmrg	case I915_TILING_Y:
647428d7b3dSmrg		return false;
648428d7b3dSmrg	case I915_TILING_X:
649428d7b3dSmrg		if (!kgem->memcpy_to_tiled_x)
650428d7b3dSmrg			return false;
651428d7b3dSmrg	default:
652428d7b3dSmrg		break;
653428d7b3dSmrg	}
654428d7b3dSmrg
655428d7b3dSmrg	if (kgem->has_wc_mmap)
656428d7b3dSmrg		return true;
657428d7b3dSmrg
658428d7b3dSmrg	return kgem_bo_can_map__cpu(kgem, bo, true);
659428d7b3dSmrg}
660428d7b3dSmrg
661428d7b3dSmrgstatic bool
662428d7b3dSmrgwrite_boxes_inplace__tiled(struct kgem *kgem,
663428d7b3dSmrg                           const uint8_t *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
664428d7b3dSmrg                           struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
665428d7b3dSmrg                           const BoxRec *box, int n)
666428d7b3dSmrg{
667428d7b3dSmrg	uint8_t *dst;
668428d7b3dSmrg
669428d7b3dSmrg	assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true));
670428d7b3dSmrg	assert(bo->tiling != I915_TILING_Y);
671428d7b3dSmrg
672428d7b3dSmrg	if (kgem_bo_can_map__cpu(kgem, bo, true)) {
673428d7b3dSmrg		dst = kgem_bo_map__cpu(kgem, bo);
674428d7b3dSmrg		if (dst == NULL)
675428d7b3dSmrg			return false;
676428d7b3dSmrg
677428d7b3dSmrg		kgem_bo_sync__cpu(kgem, bo);
678428d7b3dSmrg	} else {
679428d7b3dSmrg		dst = kgem_bo_map__wc(kgem, bo);
680428d7b3dSmrg		if (dst == NULL)
681428d7b3dSmrg			return false;
682428d7b3dSmrg
683428d7b3dSmrg		kgem_bo_sync__gtt(kgem, bo);
684428d7b3dSmrg	}
685428d7b3dSmrg
686428d7b3dSmrg	if (sigtrap_get())
687428d7b3dSmrg		return false;
688428d7b3dSmrg
689428d7b3dSmrg	if (bo->tiling) {
690428d7b3dSmrg		do {
691428d7b3dSmrg			memcpy_to_tiled_x(kgem, src, dst, bpp, stride, bo->pitch,
692428d7b3dSmrg					  box->x1 + src_dx, box->y1 + src_dy,
693428d7b3dSmrg					  box->x1 + dst_dx, box->y1 + dst_dy,
694428d7b3dSmrg					  box->x2 - box->x1, box->y2 - box->y1);
695428d7b3dSmrg			box++;
696428d7b3dSmrg		} while (--n);
697428d7b3dSmrg	} else {
698428d7b3dSmrg		do {
699428d7b3dSmrg			memcpy_blt(src, dst, bpp, stride, bo->pitch,
700428d7b3dSmrg				   box->x1 + src_dx, box->y1 + src_dy,
701428d7b3dSmrg				   box->x1 + dst_dx, box->y1 + dst_dy,
702428d7b3dSmrg				   box->x2 - box->x1, box->y2 - box->y1);
703428d7b3dSmrg			box++;
704428d7b3dSmrg		} while (--n);
705428d7b3dSmrg	}
706428d7b3dSmrg
707428d7b3dSmrg	sigtrap_put();
708428d7b3dSmrg	return true;
709428d7b3dSmrg}
710428d7b3dSmrg
711428d7b3dSmrgstatic bool write_boxes_inplace(struct kgem *kgem,
712428d7b3dSmrg				const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
713428d7b3dSmrg				struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
714428d7b3dSmrg				const BoxRec *box, int n)
715428d7b3dSmrg{
716428d7b3dSmrg	void *dst;
717428d7b3dSmrg
718428d7b3dSmrg	DBG(("%s x %d, handle=%d, tiling=%d\n",
719428d7b3dSmrg	     __FUNCTION__, n, bo->handle, bo->tiling));
720428d7b3dSmrg
721428d7b3dSmrg	if (upload_inplace__tiled(kgem, bo) &&
722428d7b3dSmrg	    write_boxes_inplace__tiled(kgem, src, stride, bpp, src_dx, src_dy,
723428d7b3dSmrg				       bo, dst_dx, dst_dy, box, n))
724428d7b3dSmrg		return true;
725428d7b3dSmrg
726428d7b3dSmrg	if (!kgem_bo_can_map(kgem, bo))
727428d7b3dSmrg		return false;
728428d7b3dSmrg
729428d7b3dSmrg	kgem_bo_submit(kgem, bo);
730428d7b3dSmrg
731428d7b3dSmrg	dst = kgem_bo_map(kgem, bo);
732428d7b3dSmrg	if (dst == NULL)
733428d7b3dSmrg		return false;
734428d7b3dSmrg
735428d7b3dSmrg	assert(dst != src);
736428d7b3dSmrg
737428d7b3dSmrg	if (sigtrap_get())
738428d7b3dSmrg		return false;
739428d7b3dSmrg
740428d7b3dSmrg	do {
741428d7b3dSmrg		DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
742428d7b3dSmrg		     box->x1 + src_dx, box->y1 + src_dy,
743428d7b3dSmrg		     box->x1 + dst_dx, box->y1 + dst_dy,
744428d7b3dSmrg		     box->x2 - box->x1, box->y2 - box->y1,
745428d7b3dSmrg		     bpp, stride, bo->pitch));
746428d7b3dSmrg
747428d7b3dSmrg		assert(box->x2 > box->x1);
748428d7b3dSmrg		assert(box->y2 > box->y1);
749428d7b3dSmrg
750428d7b3dSmrg		assert(box->x1 + dst_dx >= 0);
751428d7b3dSmrg		assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
752428d7b3dSmrg		assert(box->y1 + dst_dy >= 0);
753428d7b3dSmrg		assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
754428d7b3dSmrg
755428d7b3dSmrg		assert(box->x1 + src_dx >= 0);
756428d7b3dSmrg		assert((box->x2 + src_dx)*bpp <= 8*stride);
757428d7b3dSmrg		assert(box->y1 + src_dy >= 0);
758428d7b3dSmrg
759428d7b3dSmrg		memcpy_blt(src, dst, bpp,
760428d7b3dSmrg			   stride, bo->pitch,
761428d7b3dSmrg			   box->x1 + src_dx, box->y1 + src_dy,
762428d7b3dSmrg			   box->x1 + dst_dx, box->y1 + dst_dy,
763428d7b3dSmrg			   box->x2 - box->x1, box->y2 - box->y1);
764428d7b3dSmrg		box++;
765428d7b3dSmrg	} while (--n);
766428d7b3dSmrg
767428d7b3dSmrg	sigtrap_put();
768428d7b3dSmrg	return true;
769428d7b3dSmrg}
770428d7b3dSmrg
771428d7b3dSmrgstatic bool __upload_inplace(struct kgem *kgem,
772428d7b3dSmrg			     struct kgem_bo *bo,
773428d7b3dSmrg			     const BoxRec *box,
774428d7b3dSmrg			     int n, int bpp)
775428d7b3dSmrg{
776428d7b3dSmrg	unsigned int bytes;
777428d7b3dSmrg
778428d7b3dSmrg	if (FORCE_INPLACE)
779428d7b3dSmrg		return FORCE_INPLACE > 0;
780428d7b3dSmrg
781428d7b3dSmrg	/* If we are writing through the GTT, check first if we might be
782428d7b3dSmrg	 * able to almagamate a series of small writes into a single
783428d7b3dSmrg	 * operation.
784428d7b3dSmrg	 */
785428d7b3dSmrg	bytes = 0;
786428d7b3dSmrg	while (n--) {
787428d7b3dSmrg		bytes += (box->x2 - box->x1) * (box->y2 - box->y1);
788428d7b3dSmrg		box++;
789428d7b3dSmrg	}
790428d7b3dSmrg	if (__kgem_bo_is_busy(kgem, bo))
791428d7b3dSmrg		return bytes * bpp >> 12 >= kgem->half_cpu_cache_pages;
792428d7b3dSmrg	else
793428d7b3dSmrg		return bytes * bpp >> 12;
794428d7b3dSmrg}
795428d7b3dSmrg
796428d7b3dSmrgstatic bool upload_inplace(struct kgem *kgem,
797428d7b3dSmrg			   struct kgem_bo *bo,
798428d7b3dSmrg			   const BoxRec *box,
799428d7b3dSmrg			   int n, int bpp)
800428d7b3dSmrg{
801428d7b3dSmrg	if (unlikely(kgem->wedged))
802428d7b3dSmrg		return true;
803428d7b3dSmrg
804428d7b3dSmrg	if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo))
805428d7b3dSmrg		return false;
806428d7b3dSmrg
807428d7b3dSmrg	return __upload_inplace(kgem, bo, box, n,bpp);
808428d7b3dSmrg}
809428d7b3dSmrg
810428d7b3dSmrgbool sna_write_boxes(struct sna *sna, PixmapPtr dst,
811428d7b3dSmrg		     struct kgem_bo * const dst_bo, int16_t const dst_dx, int16_t const dst_dy,
812428d7b3dSmrg		     const void * const src, int const stride, int16_t const src_dx, int16_t const src_dy,
813428d7b3dSmrg		     const BoxRec *box, int nbox)
814428d7b3dSmrg{
815428d7b3dSmrg	struct kgem *kgem = &sna->kgem;
816428d7b3dSmrg	struct kgem_bo *src_bo;
817428d7b3dSmrg	BoxRec extents;
818428d7b3dSmrg	void *ptr;
819428d7b3dSmrg	int offset;
820428d7b3dSmrg	int n, cmd, br13;
821428d7b3dSmrg	bool can_blt;
822428d7b3dSmrg
823428d7b3dSmrg	DBG(("%s x %d, src stride=%d,  src dx=(%d, %d)\n", __FUNCTION__, nbox, stride, src_dx, src_dy));
824428d7b3dSmrg
825428d7b3dSmrg	if (upload_inplace(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel) &&
826428d7b3dSmrg	    write_boxes_inplace(kgem,
827428d7b3dSmrg				src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
828428d7b3dSmrg				dst_bo, dst_dx, dst_dy,
829428d7b3dSmrg				box, nbox))
830428d7b3dSmrg		return true;
831428d7b3dSmrg
832428d7b3dSmrg	if (wedged(sna))
833428d7b3dSmrg		return false;
834428d7b3dSmrg
835428d7b3dSmrg	can_blt = kgem_bo_can_blt(kgem, dst_bo) &&
836428d7b3dSmrg		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
837428d7b3dSmrg	extents = box[0];
838428d7b3dSmrg	for (n = 1; n < nbox; n++) {
839428d7b3dSmrg		if (box[n].x1 < extents.x1)
840428d7b3dSmrg			extents.x1 = box[n].x1;
841428d7b3dSmrg		if (box[n].x2 > extents.x2)
842428d7b3dSmrg			extents.x2 = box[n].x2;
843428d7b3dSmrg
844428d7b3dSmrg		if (can_blt)
845428d7b3dSmrg			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
846428d7b3dSmrg
847428d7b3dSmrg		if (box[n].y1 < extents.y1)
848428d7b3dSmrg			extents.y1 = box[n].y1;
849428d7b3dSmrg		if (box[n].y2 > extents.y2)
850428d7b3dSmrg			extents.y2 = box[n].y2;
851428d7b3dSmrg	}
852428d7b3dSmrg
853428d7b3dSmrg	/* Try to avoid switching rings... */
854428d7b3dSmrg	if (!can_blt || kgem->ring == KGEM_RENDER ||
855428d7b3dSmrg	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
856428d7b3dSmrg		DrawableRec tmp;
857428d7b3dSmrg
858428d7b3dSmrg		tmp.width  = extents.x2 - extents.x1;
859428d7b3dSmrg		tmp.height = extents.y2 - extents.y1;
860428d7b3dSmrg		tmp.depth  = dst->drawable.depth;
861428d7b3dSmrg		tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
862428d7b3dSmrg
863428d7b3dSmrg		assert(tmp.width);
864428d7b3dSmrg		assert(tmp.height);
865428d7b3dSmrg
866428d7b3dSmrg		DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n",
867428d7b3dSmrg		     __FUNCTION__,
868428d7b3dSmrg		     extents.x1, extents.y1,
869428d7b3dSmrg		     tmp.width, tmp.height,
870428d7b3dSmrg		     sna->render.max_3d_size, sna->render.max_3d_size));
871428d7b3dSmrg		if (must_tile(sna, tmp.width, tmp.height)) {
872428d7b3dSmrg			BoxRec tile, stack[64], *clipped;
873428d7b3dSmrg			int cpp, step;
874428d7b3dSmrg
875428d7b3dSmrgtile:
876428d7b3dSmrg			cpp = dst->drawable.bitsPerPixel / 8;
877428d7b3dSmrg			step = MIN(sna->render.max_3d_size,
878428d7b3dSmrg				   (MAXSHORT&~63) / cpp);
879428d7b3dSmrg			while (step * step * cpp > sna->kgem.max_upload_tile_size)
880428d7b3dSmrg				step /= 2;
881428d7b3dSmrg
882428d7b3dSmrg			if (step * cpp > 4096)
883428d7b3dSmrg				step = 4096 / cpp;
884428d7b3dSmrg			assert(step);
885428d7b3dSmrg
886428d7b3dSmrg			DBG(("%s: tiling upload, using %dx%d tiles\n",
887428d7b3dSmrg			     __FUNCTION__, step, step));
888428d7b3dSmrg
889428d7b3dSmrg			if (n > ARRAY_SIZE(stack)) {
890428d7b3dSmrg				clipped = malloc(sizeof(BoxRec) * n);
891428d7b3dSmrg				if (clipped == NULL)
892428d7b3dSmrg					goto fallback;
893428d7b3dSmrg			} else
894428d7b3dSmrg				clipped = stack;
895428d7b3dSmrg
896428d7b3dSmrg			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
897428d7b3dSmrg				int y2 = tile.y1 + step;
898428d7b3dSmrg				if (y2 > extents.y2)
899428d7b3dSmrg					y2 = extents.y2;
900428d7b3dSmrg				tile.y2 = y2;
901428d7b3dSmrg
902428d7b3dSmrg				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
903428d7b3dSmrg					int x2 = tile.x1 + step;
904428d7b3dSmrg					if (x2 > extents.x2)
905428d7b3dSmrg						x2 = extents.x2;
906428d7b3dSmrg					tile.x2 = x2;
907428d7b3dSmrg
908428d7b3dSmrg					tmp.width  = tile.x2 - tile.x1;
909428d7b3dSmrg					tmp.height = tile.y2 - tile.y1;
910428d7b3dSmrg
911428d7b3dSmrg					src_bo = kgem_create_buffer_2d(kgem,
912428d7b3dSmrg								       tmp.width,
913428d7b3dSmrg								       tmp.height,
914428d7b3dSmrg								       tmp.bitsPerPixel,
915428d7b3dSmrg								       KGEM_BUFFER_WRITE_INPLACE,
916428d7b3dSmrg								       &ptr);
917428d7b3dSmrg					if (!src_bo) {
918428d7b3dSmrg						if (clipped != stack)
919428d7b3dSmrg							free(clipped);
920428d7b3dSmrg						goto fallback;
921428d7b3dSmrg					}
922428d7b3dSmrg
923428d7b3dSmrg					if (sigtrap_get() == 0) {
924428d7b3dSmrg						BoxRec *c = clipped;
925428d7b3dSmrg						for (n = 0; n < nbox; n++) {
926428d7b3dSmrg							*c = box[n];
927428d7b3dSmrg							if (!box_intersect(c, &tile))
928428d7b3dSmrg								continue;
929428d7b3dSmrg
930428d7b3dSmrg							DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
931428d7b3dSmrg							     __FUNCTION__,
932428d7b3dSmrg							     c->x1, c->y1,
933428d7b3dSmrg							     c->x2, c->y2,
934428d7b3dSmrg							     src_dx, src_dy,
935428d7b3dSmrg							     c->x1 - tile.x1,
936428d7b3dSmrg							     c->y1 - tile.y1));
937428d7b3dSmrg							memcpy_blt(src, ptr, tmp.bitsPerPixel,
938428d7b3dSmrg								   stride, src_bo->pitch,
939428d7b3dSmrg								   c->x1 + src_dx,
940428d7b3dSmrg								   c->y1 + src_dy,
941428d7b3dSmrg								   c->x1 - tile.x1,
942428d7b3dSmrg								   c->y1 - tile.y1,
943428d7b3dSmrg								   c->x2 - c->x1,
944428d7b3dSmrg								   c->y2 - c->y1);
945428d7b3dSmrg							c++;
946428d7b3dSmrg						}
947428d7b3dSmrg
948428d7b3dSmrg						if (c != clipped)
949428d7b3dSmrg							n = sna->render.copy_boxes(sna, GXcopy,
950428d7b3dSmrg										   &tmp, src_bo, -tile.x1, -tile.y1,
951428d7b3dSmrg										   &dst->drawable, dst_bo, dst_dx, dst_dy,
952428d7b3dSmrg										   clipped, c - clipped, 0);
953428d7b3dSmrg						else
954428d7b3dSmrg							n = 1;
955428d7b3dSmrg						sigtrap_put();
956428d7b3dSmrg					} else
957428d7b3dSmrg						n = 0;
958428d7b3dSmrg
959428d7b3dSmrg					kgem_bo_destroy(&sna->kgem, src_bo);
960428d7b3dSmrg
961428d7b3dSmrg					if (!n) {
962428d7b3dSmrg						if (clipped != stack)
963428d7b3dSmrg							free(clipped);
964428d7b3dSmrg						goto fallback;
965428d7b3dSmrg					}
966428d7b3dSmrg				}
967428d7b3dSmrg			}
968428d7b3dSmrg
969428d7b3dSmrg			if (clipped != stack)
970428d7b3dSmrg				free(clipped);
971428d7b3dSmrg		} else {
972428d7b3dSmrg			src_bo = kgem_create_buffer_2d(kgem,
973428d7b3dSmrg						       tmp.width,
974428d7b3dSmrg						       tmp.height,
975428d7b3dSmrg						       tmp.bitsPerPixel,
976428d7b3dSmrg						       KGEM_BUFFER_WRITE_INPLACE,
977428d7b3dSmrg						       &ptr);
978428d7b3dSmrg			if (!src_bo)
979428d7b3dSmrg				goto fallback;
980428d7b3dSmrg
981428d7b3dSmrg			if (sigtrap_get() == 0) {
982428d7b3dSmrg				for (n = 0; n < nbox; n++) {
983428d7b3dSmrg					DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
984428d7b3dSmrg					     __FUNCTION__,
985428d7b3dSmrg					     box[n].x1, box[n].y1,
986428d7b3dSmrg					     box[n].x2, box[n].y2,
987428d7b3dSmrg					     src_dx, src_dy,
988428d7b3dSmrg					     box[n].x1 - extents.x1,
989428d7b3dSmrg					     box[n].y1 - extents.y1));
990428d7b3dSmrg					memcpy_blt(src, ptr, tmp.bitsPerPixel,
991428d7b3dSmrg						   stride, src_bo->pitch,
992428d7b3dSmrg						   box[n].x1 + src_dx,
993428d7b3dSmrg						   box[n].y1 + src_dy,
994428d7b3dSmrg						   box[n].x1 - extents.x1,
995428d7b3dSmrg						   box[n].y1 - extents.y1,
996428d7b3dSmrg						   box[n].x2 - box[n].x1,
997428d7b3dSmrg						   box[n].y2 - box[n].y1);
998428d7b3dSmrg				}
999428d7b3dSmrg
1000428d7b3dSmrg				n = sna->render.copy_boxes(sna, GXcopy,
1001428d7b3dSmrg							   &tmp, src_bo, -extents.x1, -extents.y1,
1002428d7b3dSmrg							   &dst->drawable, dst_bo, dst_dx, dst_dy,
1003428d7b3dSmrg							   box, nbox, 0);
1004428d7b3dSmrg				sigtrap_put();
1005428d7b3dSmrg			} else
1006428d7b3dSmrg				n = 0;
1007428d7b3dSmrg
1008428d7b3dSmrg			kgem_bo_destroy(&sna->kgem, src_bo);
1009428d7b3dSmrg
1010428d7b3dSmrg			if (!n)
1011428d7b3dSmrg				goto tile;
1012428d7b3dSmrg		}
1013428d7b3dSmrg
1014428d7b3dSmrg		return true;
1015428d7b3dSmrg	}
1016428d7b3dSmrg
1017428d7b3dSmrg	cmd = XY_SRC_COPY_BLT_CMD;
1018428d7b3dSmrg	br13 = dst_bo->pitch;
1019428d7b3dSmrg	if (kgem->gen >= 040 && dst_bo->tiling) {
1020428d7b3dSmrg		cmd |= BLT_DST_TILED;
1021428d7b3dSmrg		br13 >>= 2;
1022428d7b3dSmrg	}
1023428d7b3dSmrg	br13 |= 0xcc << 16;
1024428d7b3dSmrg	switch (dst->drawable.bitsPerPixel) {
1025428d7b3dSmrg	default:
1026428d7b3dSmrg	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
1027428d7b3dSmrg		 br13 |= 1 << 25; /* RGB8888 */
1028428d7b3dSmrg	case 16: br13 |= 1 << 24; /* RGB565 */
1029428d7b3dSmrg	case 8: break;
1030428d7b3dSmrg	}
1031428d7b3dSmrg
1032428d7b3dSmrg	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
1033428d7b3dSmrg	if (!kgem_check_batch(kgem, 10) ||
1034428d7b3dSmrg	    !kgem_check_reloc_and_exec(kgem, 2) ||
1035428d7b3dSmrg	    !kgem_check_bo_fenced(kgem, dst_bo)) {
1036428d7b3dSmrg		kgem_submit(kgem);
1037428d7b3dSmrg		if (!kgem_check_bo_fenced(kgem, dst_bo))
1038428d7b3dSmrg			goto fallback;
1039428d7b3dSmrg		_kgem_set_mode(kgem, KGEM_BLT);
1040428d7b3dSmrg	}
1041428d7b3dSmrg
1042428d7b3dSmrg	if (kgem->gen >= 0100) {
1043428d7b3dSmrg		cmd |= 8;
1044428d7b3dSmrg		do {
1045428d7b3dSmrg			int nbox_this_time, rem;
1046428d7b3dSmrg
1047428d7b3dSmrg			nbox_this_time = nbox;
1048428d7b3dSmrg			rem = kgem_batch_space(kgem);
1049428d7b3dSmrg			if (10*nbox_this_time > rem)
1050428d7b3dSmrg				nbox_this_time = rem / 8;
1051428d7b3dSmrg			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1052428d7b3dSmrg				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1053428d7b3dSmrg			assert(nbox_this_time);
1054428d7b3dSmrg			nbox -= nbox_this_time;
1055428d7b3dSmrg
1056428d7b3dSmrg			/* Count the total number of bytes to be read and allocate a
1057428d7b3dSmrg			 * single buffer large enough. Or if it is very small, combine
1058428d7b3dSmrg			 * with other allocations. */
1059428d7b3dSmrg			offset = 0;
1060428d7b3dSmrg			for (n = 0; n < nbox_this_time; n++) {
1061428d7b3dSmrg				int height = box[n].y2 - box[n].y1;
1062428d7b3dSmrg				int width = box[n].x2 - box[n].x1;
1063428d7b3dSmrg				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1064428d7b3dSmrg			}
1065428d7b3dSmrg
1066428d7b3dSmrg			src_bo = kgem_create_buffer(kgem, offset,
1067428d7b3dSmrg						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1068428d7b3dSmrg						    &ptr);
1069428d7b3dSmrg			if (!src_bo)
1070428d7b3dSmrg				break;
1071428d7b3dSmrg
1072428d7b3dSmrg			if (sigtrap_get() == 0) {
1073428d7b3dSmrg				offset = 0;
1074428d7b3dSmrg				do {
1075428d7b3dSmrg					int height = box->y2 - box->y1;
1076428d7b3dSmrg					int width = box->x2 - box->x1;
1077428d7b3dSmrg					int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1078428d7b3dSmrg					uint32_t *b;
1079428d7b3dSmrg
1080428d7b3dSmrg					DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1081428d7b3dSmrg					     __FUNCTION__,
1082428d7b3dSmrg					     box->x1 + src_dx, box->y1 + src_dy,
1083428d7b3dSmrg					     box->x1 + dst_dx, box->y1 + dst_dy,
1084428d7b3dSmrg					     width, height,
1085428d7b3dSmrg					     offset, pitch));
1086428d7b3dSmrg
1087428d7b3dSmrg					assert(box->x1 + src_dx >= 0);
1088428d7b3dSmrg					assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1089428d7b3dSmrg					assert(box->y1 + src_dy >= 0);
1090428d7b3dSmrg
1091428d7b3dSmrg					assert(box->x1 + dst_dx >= 0);
1092428d7b3dSmrg					assert(box->y1 + dst_dy >= 0);
1093428d7b3dSmrg
1094428d7b3dSmrg					memcpy_blt(src, (char *)ptr + offset,
1095428d7b3dSmrg						   dst->drawable.bitsPerPixel,
1096428d7b3dSmrg						   stride, pitch,
1097428d7b3dSmrg						   box->x1 + src_dx, box->y1 + src_dy,
1098428d7b3dSmrg						   0, 0,
1099428d7b3dSmrg						   width, height);
1100428d7b3dSmrg
1101428d7b3dSmrg					assert(kgem->mode == KGEM_BLT);
1102428d7b3dSmrg					b = kgem->batch + kgem->nbatch;
1103428d7b3dSmrg					b[0] = cmd;
1104428d7b3dSmrg					b[1] = br13;
1105428d7b3dSmrg					b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1106428d7b3dSmrg					b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1107428d7b3dSmrg					*(uint64_t *)(b+4) =
1108428d7b3dSmrg						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1109428d7b3dSmrg								 I915_GEM_DOMAIN_RENDER << 16 |
1110428d7b3dSmrg								 I915_GEM_DOMAIN_RENDER |
1111428d7b3dSmrg								 KGEM_RELOC_FENCED,
1112428d7b3dSmrg								 0);
1113428d7b3dSmrg					b[6] = 0;
1114428d7b3dSmrg					b[7] = pitch;
1115428d7b3dSmrg					*(uint64_t *)(b+8) =
1116428d7b3dSmrg						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1117428d7b3dSmrg								 I915_GEM_DOMAIN_RENDER << 16 |
1118428d7b3dSmrg								 KGEM_RELOC_FENCED,
1119428d7b3dSmrg								 offset);
1120428d7b3dSmrg					kgem->nbatch += 10;
1121428d7b3dSmrg
1122428d7b3dSmrg					box++;
1123428d7b3dSmrg					offset += pitch * height;
1124428d7b3dSmrg				} while (--nbox_this_time);
1125428d7b3dSmrg				assert(offset == __kgem_buffer_size(src_bo));
1126428d7b3dSmrg				sigtrap_put();
1127428d7b3dSmrg			}
1128428d7b3dSmrg
1129428d7b3dSmrg			if (nbox) {
1130428d7b3dSmrg				_kgem_submit(kgem);
1131428d7b3dSmrg				_kgem_set_mode(kgem, KGEM_BLT);
1132428d7b3dSmrg			}
1133428d7b3dSmrg
1134428d7b3dSmrg			kgem_bo_destroy(kgem, src_bo);
1135428d7b3dSmrg		} while (nbox);
1136428d7b3dSmrg	} else {
1137428d7b3dSmrg		cmd |= 6;
1138428d7b3dSmrg		do {
1139428d7b3dSmrg			int nbox_this_time, rem;
1140428d7b3dSmrg
1141428d7b3dSmrg			nbox_this_time = nbox;
1142428d7b3dSmrg			rem = kgem_batch_space(kgem);
1143428d7b3dSmrg			if (8*nbox_this_time > rem)
1144428d7b3dSmrg				nbox_this_time = rem / 8;
1145428d7b3dSmrg			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1146428d7b3dSmrg				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1147428d7b3dSmrg			assert(nbox_this_time);
1148428d7b3dSmrg			nbox -= nbox_this_time;
1149428d7b3dSmrg
1150428d7b3dSmrg			/* Count the total number of bytes to be read and allocate a
1151428d7b3dSmrg			 * single buffer large enough. Or if it is very small, combine
1152428d7b3dSmrg			 * with other allocations. */
1153428d7b3dSmrg			offset = 0;
1154428d7b3dSmrg			for (n = 0; n < nbox_this_time; n++) {
1155428d7b3dSmrg				int height = box[n].y2 - box[n].y1;
1156428d7b3dSmrg				int width = box[n].x2 - box[n].x1;
1157428d7b3dSmrg				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1158428d7b3dSmrg			}
1159428d7b3dSmrg
1160428d7b3dSmrg			src_bo = kgem_create_buffer(kgem, offset,
1161428d7b3dSmrg						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1162428d7b3dSmrg						    &ptr);
1163428d7b3dSmrg			if (!src_bo)
1164428d7b3dSmrg				break;
1165428d7b3dSmrg
1166428d7b3dSmrg			if (sigtrap_get()) {
1167428d7b3dSmrg				kgem_bo_destroy(kgem, src_bo);
1168428d7b3dSmrg				goto fallback;
1169428d7b3dSmrg			}
1170428d7b3dSmrg
1171428d7b3dSmrg			offset = 0;
1172428d7b3dSmrg			do {
1173428d7b3dSmrg				int height = box->y2 - box->y1;
1174428d7b3dSmrg				int width = box->x2 - box->x1;
1175428d7b3dSmrg				int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1176428d7b3dSmrg				uint32_t *b;
1177428d7b3dSmrg
1178428d7b3dSmrg				DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1179428d7b3dSmrg				     __FUNCTION__,
1180428d7b3dSmrg				     box->x1 + src_dx, box->y1 + src_dy,
1181428d7b3dSmrg				     box->x1 + dst_dx, box->y1 + dst_dy,
1182428d7b3dSmrg				     width, height,
1183428d7b3dSmrg				     offset, pitch));
1184428d7b3dSmrg
1185428d7b3dSmrg				assert(box->x1 + src_dx >= 0);
1186428d7b3dSmrg				assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1187428d7b3dSmrg				assert(box->y1 + src_dy >= 0);
1188428d7b3dSmrg
1189428d7b3dSmrg				assert(box->x1 + dst_dx >= 0);
1190428d7b3dSmrg				assert(box->y1 + dst_dy >= 0);
1191428d7b3dSmrg
1192428d7b3dSmrg				memcpy_blt(src, (char *)ptr + offset,
1193428d7b3dSmrg					   dst->drawable.bitsPerPixel,
1194428d7b3dSmrg					   stride, pitch,
1195428d7b3dSmrg					   box->x1 + src_dx, box->y1 + src_dy,
1196428d7b3dSmrg					   0, 0,
1197428d7b3dSmrg					   width, height);
1198428d7b3dSmrg
1199428d7b3dSmrg				assert(kgem->mode == KGEM_BLT);
1200428d7b3dSmrg				b = kgem->batch + kgem->nbatch;
1201428d7b3dSmrg				b[0] = cmd;
1202428d7b3dSmrg				b[1] = br13;
1203428d7b3dSmrg				b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1204428d7b3dSmrg				b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1205428d7b3dSmrg				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1206428d7b3dSmrg						      I915_GEM_DOMAIN_RENDER << 16 |
1207428d7b3dSmrg						      I915_GEM_DOMAIN_RENDER |
1208428d7b3dSmrg						      KGEM_RELOC_FENCED,
1209428d7b3dSmrg						      0);
1210428d7b3dSmrg				b[5] = 0;
1211428d7b3dSmrg				b[6] = pitch;
1212428d7b3dSmrg				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1213428d7b3dSmrg						      I915_GEM_DOMAIN_RENDER << 16 |
1214428d7b3dSmrg						      KGEM_RELOC_FENCED,
1215428d7b3dSmrg						      offset);
1216428d7b3dSmrg				kgem->nbatch += 8;
1217428d7b3dSmrg
1218428d7b3dSmrg				box++;
1219428d7b3dSmrg				offset += pitch * height;
1220428d7b3dSmrg			} while (--nbox_this_time);
1221428d7b3dSmrg			assert(offset == __kgem_buffer_size(src_bo));
1222428d7b3dSmrg			sigtrap_put();
1223428d7b3dSmrg
1224428d7b3dSmrg			if (nbox) {
1225428d7b3dSmrg				_kgem_submit(kgem);
1226428d7b3dSmrg				_kgem_set_mode(kgem, KGEM_BLT);
1227428d7b3dSmrg			}
1228428d7b3dSmrg
1229428d7b3dSmrg			kgem_bo_destroy(kgem, src_bo);
1230428d7b3dSmrg		} while (nbox);
1231428d7b3dSmrg	}
1232428d7b3dSmrg
1233428d7b3dSmrg	sna->blt_state.fill_bo = 0;
1234428d7b3dSmrg	return true;
1235428d7b3dSmrg
1236428d7b3dSmrgfallback:
1237428d7b3dSmrg	return write_boxes_inplace(kgem,
1238428d7b3dSmrg				   src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1239428d7b3dSmrg				   dst_bo, dst_dx, dst_dy,
1240428d7b3dSmrg				   box, nbox);
1241428d7b3dSmrg}
1242428d7b3dSmrg
1243428d7b3dSmrgstatic bool
1244428d7b3dSmrgwrite_boxes_inplace__xor(struct kgem *kgem,
1245428d7b3dSmrg			 const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
1246428d7b3dSmrg			 struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
1247428d7b3dSmrg			 const BoxRec *box, int n,
1248428d7b3dSmrg			 uint32_t and, uint32_t or)
1249428d7b3dSmrg{
1250428d7b3dSmrg	void *dst;
1251428d7b3dSmrg
1252428d7b3dSmrg	DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
1253428d7b3dSmrg
1254428d7b3dSmrg	if (!kgem_bo_can_map(kgem, bo))
1255428d7b3dSmrg		return false;
1256428d7b3dSmrg
1257428d7b3dSmrg	kgem_bo_submit(kgem, bo);
1258428d7b3dSmrg
1259428d7b3dSmrg	dst = kgem_bo_map(kgem, bo);
1260428d7b3dSmrg	if (dst == NULL)
1261428d7b3dSmrg		return false;
1262428d7b3dSmrg
1263428d7b3dSmrg	if (sigtrap_get())
1264428d7b3dSmrg		return false;
1265428d7b3dSmrg
1266428d7b3dSmrg	do {
1267428d7b3dSmrg		DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
1268428d7b3dSmrg		     box->x1 + src_dx, box->y1 + src_dy,
1269428d7b3dSmrg		     box->x1 + dst_dx, box->y1 + dst_dy,
1270428d7b3dSmrg		     box->x2 - box->x1, box->y2 - box->y1,
1271428d7b3dSmrg		     bpp, stride, bo->pitch));
1272428d7b3dSmrg
1273428d7b3dSmrg		assert(box->x2 > box->x1);
1274428d7b3dSmrg		assert(box->y2 > box->y1);
1275428d7b3dSmrg
1276428d7b3dSmrg		assert(box->x1 + dst_dx >= 0);
1277428d7b3dSmrg		assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
1278428d7b3dSmrg		assert(box->y1 + dst_dy >= 0);
1279428d7b3dSmrg		assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
1280428d7b3dSmrg
1281428d7b3dSmrg		assert(box->x1 + src_dx >= 0);
1282428d7b3dSmrg		assert((box->x2 + src_dx)*bpp <= 8*stride);
1283428d7b3dSmrg		assert(box->y1 + src_dy >= 0);
1284428d7b3dSmrg
1285428d7b3dSmrg		memcpy_xor(src, dst, bpp,
1286428d7b3dSmrg			   stride, bo->pitch,
1287428d7b3dSmrg			   box->x1 + src_dx, box->y1 + src_dy,
1288428d7b3dSmrg			   box->x1 + dst_dx, box->y1 + dst_dy,
1289428d7b3dSmrg			   box->x2 - box->x1, box->y2 - box->y1,
1290428d7b3dSmrg			   and, or);
1291428d7b3dSmrg		box++;
1292428d7b3dSmrg	} while (--n);
1293428d7b3dSmrg
1294428d7b3dSmrg	sigtrap_put();
1295428d7b3dSmrg	return true;
1296428d7b3dSmrg}
1297428d7b3dSmrg
1298428d7b3dSmrgstatic bool upload_inplace__xor(struct kgem *kgem,
1299428d7b3dSmrg				struct kgem_bo *bo,
1300428d7b3dSmrg				const BoxRec *box,
1301428d7b3dSmrg				int n, int bpp)
1302428d7b3dSmrg{
1303428d7b3dSmrg	if (unlikely(kgem->wedged))
1304428d7b3dSmrg		return true;
1305428d7b3dSmrg
1306428d7b3dSmrg	if (!kgem_bo_can_map(kgem, bo))
1307428d7b3dSmrg		return false;
1308428d7b3dSmrg
1309428d7b3dSmrg	return __upload_inplace(kgem, bo, box, n, bpp);
1310428d7b3dSmrg}
1311428d7b3dSmrg
1312428d7b3dSmrgbool sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
1313428d7b3dSmrg			  struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
1314428d7b3dSmrg			  const void *src, int stride, int16_t src_dx, int16_t src_dy,
1315428d7b3dSmrg			  const BoxRec *box, int nbox,
1316428d7b3dSmrg			  uint32_t and, uint32_t or)
1317428d7b3dSmrg{
1318428d7b3dSmrg	struct kgem *kgem = &sna->kgem;
1319428d7b3dSmrg	struct kgem_bo *src_bo;
1320428d7b3dSmrg	BoxRec extents;
1321428d7b3dSmrg	bool can_blt;
1322428d7b3dSmrg	void *ptr;
1323428d7b3dSmrg	int offset;
1324428d7b3dSmrg	int n, cmd, br13;
1325428d7b3dSmrg
1326428d7b3dSmrg	DBG(("%s x %d\n", __FUNCTION__, nbox));
1327428d7b3dSmrg
1328428d7b3dSmrg	if (upload_inplace__xor(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel) &&
1329428d7b3dSmrg	    write_boxes_inplace__xor(kgem,
1330428d7b3dSmrg				     src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1331428d7b3dSmrg				     dst_bo, dst_dx, dst_dy,
1332428d7b3dSmrg				     box, nbox,
1333428d7b3dSmrg				     and, or))
1334428d7b3dSmrg		return true;
1335428d7b3dSmrg
1336428d7b3dSmrg	if (wedged(sna))
1337428d7b3dSmrg		return false;
1338428d7b3dSmrg
1339428d7b3dSmrg	can_blt = kgem_bo_can_blt(kgem, dst_bo) &&
1340428d7b3dSmrg		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
1341428d7b3dSmrg	extents = box[0];
1342428d7b3dSmrg	for (n = 1; n < nbox; n++) {
1343428d7b3dSmrg		if (box[n].x1 < extents.x1)
1344428d7b3dSmrg			extents.x1 = box[n].x1;
1345428d7b3dSmrg		if (box[n].x2 > extents.x2)
1346428d7b3dSmrg			extents.x2 = box[n].x2;
1347428d7b3dSmrg
1348428d7b3dSmrg		if (can_blt)
1349428d7b3dSmrg			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
1350428d7b3dSmrg
1351428d7b3dSmrg		if (box[n].y1 < extents.y1)
1352428d7b3dSmrg			extents.y1 = box[n].y1;
1353428d7b3dSmrg		if (box[n].y2 > extents.y2)
1354428d7b3dSmrg			extents.y2 = box[n].y2;
1355428d7b3dSmrg	}
1356428d7b3dSmrg
1357428d7b3dSmrg	/* Try to avoid switching rings... */
1358428d7b3dSmrg	if (!can_blt || kgem->ring == KGEM_RENDER ||
1359428d7b3dSmrg	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
1360428d7b3dSmrg		DrawableRec tmp;
1361428d7b3dSmrg
1362428d7b3dSmrg		tmp.width  = extents.x2 - extents.x1;
1363428d7b3dSmrg		tmp.height = extents.y2 - extents.y1;
1364428d7b3dSmrg		tmp.depth  = dst->drawable.depth;
1365428d7b3dSmrg		tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
1366428d7b3dSmrg
1367428d7b3dSmrg		assert(tmp.width);
1368428d7b3dSmrg		assert(tmp.height);
1369428d7b3dSmrg
1370428d7b3dSmrg		DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n",
1371428d7b3dSmrg		     __FUNCTION__,
1372428d7b3dSmrg		     extents.x1, extents.y1,
1373428d7b3dSmrg		     tmp.width, tmp.height,
1374428d7b3dSmrg		     sna->render.max_3d_size, sna->render.max_3d_size));
1375428d7b3dSmrg		if (must_tile(sna, tmp.width, tmp.height)) {
1376428d7b3dSmrg			BoxRec tile, stack[64], *clipped;
1377428d7b3dSmrg			int step;
1378428d7b3dSmrg
1379428d7b3dSmrgtile:
1380428d7b3dSmrg			step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel,
1381428d7b3dSmrg				   8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
1382428d7b3dSmrg			while (step * step * 4 > sna->kgem.max_upload_tile_size)
1383428d7b3dSmrg				step /= 2;
1384428d7b3dSmrg
1385428d7b3dSmrg			DBG(("%s: tiling upload, using %dx%d tiles\n",
1386428d7b3dSmrg			     __FUNCTION__, step, step));
1387428d7b3dSmrg			assert(step);
1388428d7b3dSmrg
1389428d7b3dSmrg			if (n > ARRAY_SIZE(stack)) {
1390428d7b3dSmrg				clipped = malloc(sizeof(BoxRec) * n);
1391428d7b3dSmrg				if (clipped == NULL)
1392428d7b3dSmrg					goto fallback;
1393428d7b3dSmrg			} else
1394428d7b3dSmrg				clipped = stack;
1395428d7b3dSmrg
1396428d7b3dSmrg			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
1397428d7b3dSmrg				int y2 = tile.y1 + step;
1398428d7b3dSmrg				if (y2 > extents.y2)
1399428d7b3dSmrg					y2 = extents.y2;
1400428d7b3dSmrg				tile.y2 = y2;
1401428d7b3dSmrg
1402428d7b3dSmrg				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
1403428d7b3dSmrg					int x2 = tile.x1 + step;
1404428d7b3dSmrg					if (x2 > extents.x2)
1405428d7b3dSmrg						x2 = extents.x2;
1406428d7b3dSmrg					tile.x2 = x2;
1407428d7b3dSmrg
1408428d7b3dSmrg					tmp.width  = tile.x2 - tile.x1;
1409428d7b3dSmrg					tmp.height = tile.y2 - tile.y1;
1410428d7b3dSmrg
1411428d7b3dSmrg					src_bo = kgem_create_buffer_2d(kgem,
1412428d7b3dSmrg								       tmp.width,
1413428d7b3dSmrg								       tmp.height,
1414428d7b3dSmrg								       tmp.bitsPerPixel,
1415428d7b3dSmrg								       KGEM_BUFFER_WRITE_INPLACE,
1416428d7b3dSmrg								       &ptr);
1417428d7b3dSmrg					if (!src_bo) {
1418428d7b3dSmrg						if (clipped != stack)
1419428d7b3dSmrg							free(clipped);
1420428d7b3dSmrg						goto fallback;
1421428d7b3dSmrg					}
1422428d7b3dSmrg
1423428d7b3dSmrg					if (sigtrap_get() == 0) {
1424428d7b3dSmrg						BoxRec *c = clipped;
1425428d7b3dSmrg						for (n = 0; n < nbox; n++) {
1426428d7b3dSmrg							*c = box[n];
1427428d7b3dSmrg							if (!box_intersect(c, &tile))
1428428d7b3dSmrg								continue;
1429428d7b3dSmrg
1430428d7b3dSmrg							DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1431428d7b3dSmrg							     __FUNCTION__,
1432428d7b3dSmrg							     c->x1, c->y1,
1433428d7b3dSmrg							     c->x2, c->y2,
1434428d7b3dSmrg							     src_dx, src_dy,
1435428d7b3dSmrg							     c->x1 - tile.x1,
1436428d7b3dSmrg							     c->y1 - tile.y1));
1437428d7b3dSmrg							memcpy_xor(src, ptr, tmp.bitsPerPixel,
1438428d7b3dSmrg								   stride, src_bo->pitch,
1439428d7b3dSmrg								   c->x1 + src_dx,
1440428d7b3dSmrg								   c->y1 + src_dy,
1441428d7b3dSmrg								   c->x1 - tile.x1,
1442428d7b3dSmrg								   c->y1 - tile.y1,
1443428d7b3dSmrg								   c->x2 - c->x1,
1444428d7b3dSmrg								   c->y2 - c->y1,
1445428d7b3dSmrg								   and, or);
1446428d7b3dSmrg							c++;
1447428d7b3dSmrg						}
1448428d7b3dSmrg
1449428d7b3dSmrg						if (c != clipped)
1450428d7b3dSmrg							n = sna->render.copy_boxes(sna, GXcopy,
1451428d7b3dSmrg										   &tmp, src_bo, -tile.x1, -tile.y1,
1452428d7b3dSmrg										   &dst->drawable, dst_bo, dst_dx, dst_dy,
1453428d7b3dSmrg										   clipped, c - clipped, 0);
1454428d7b3dSmrg						else
1455428d7b3dSmrg							n = 1;
1456428d7b3dSmrg
1457428d7b3dSmrg						sigtrap_put();
1458428d7b3dSmrg					} else
1459428d7b3dSmrg						n = 0;
1460428d7b3dSmrg
1461428d7b3dSmrg					kgem_bo_destroy(&sna->kgem, src_bo);
1462428d7b3dSmrg
1463428d7b3dSmrg					if (!n) {
1464428d7b3dSmrg						if (clipped != stack)
1465428d7b3dSmrg							free(clipped);
1466428d7b3dSmrg						goto fallback;
1467428d7b3dSmrg					}
1468428d7b3dSmrg				}
1469428d7b3dSmrg			}
1470428d7b3dSmrg
1471428d7b3dSmrg			if (clipped != stack)
1472428d7b3dSmrg				free(clipped);
1473428d7b3dSmrg		} else {
1474428d7b3dSmrg			src_bo = kgem_create_buffer_2d(kgem,
1475428d7b3dSmrg						       tmp.width,
1476428d7b3dSmrg						       tmp.height,
1477428d7b3dSmrg						       tmp.bitsPerPixel,
1478428d7b3dSmrg						       KGEM_BUFFER_WRITE_INPLACE,
1479428d7b3dSmrg						       &ptr);
1480428d7b3dSmrg			if (!src_bo)
1481428d7b3dSmrg				goto fallback;
1482428d7b3dSmrg
1483428d7b3dSmrg			if (sigtrap_get() == 0) {
1484428d7b3dSmrg				for (n = 0; n < nbox; n++) {
1485428d7b3dSmrg					DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1486428d7b3dSmrg					     __FUNCTION__,
1487428d7b3dSmrg					     box[n].x1, box[n].y1,
1488428d7b3dSmrg					     box[n].x2, box[n].y2,
1489428d7b3dSmrg					     src_dx, src_dy,
1490428d7b3dSmrg					     box[n].x1 - extents.x1,
1491428d7b3dSmrg					     box[n].y1 - extents.y1));
1492428d7b3dSmrg					memcpy_xor(src, ptr, tmp.bitsPerPixel,
1493428d7b3dSmrg						   stride, src_bo->pitch,
1494428d7b3dSmrg						   box[n].x1 + src_dx,
1495428d7b3dSmrg						   box[n].y1 + src_dy,
1496428d7b3dSmrg						   box[n].x1 - extents.x1,
1497428d7b3dSmrg						   box[n].y1 - extents.y1,
1498428d7b3dSmrg						   box[n].x2 - box[n].x1,
1499428d7b3dSmrg						   box[n].y2 - box[n].y1,
1500428d7b3dSmrg						   and, or);
1501428d7b3dSmrg				}
1502428d7b3dSmrg
1503428d7b3dSmrg				n = sna->render.copy_boxes(sna, GXcopy,
1504428d7b3dSmrg							   &tmp, src_bo, -extents.x1, -extents.y1,
1505428d7b3dSmrg							   &dst->drawable, dst_bo, dst_dx, dst_dy,
1506428d7b3dSmrg							   box, nbox, 0);
1507428d7b3dSmrg				sigtrap_put();
1508428d7b3dSmrg			} else
1509428d7b3dSmrg				n = 0;
1510428d7b3dSmrg
1511428d7b3dSmrg			kgem_bo_destroy(&sna->kgem, src_bo);
1512428d7b3dSmrg
1513428d7b3dSmrg			if (!n)
1514428d7b3dSmrg				goto tile;
1515428d7b3dSmrg		}
1516428d7b3dSmrg
1517428d7b3dSmrg		return true;
1518428d7b3dSmrg	}
1519428d7b3dSmrg
1520428d7b3dSmrg	cmd = XY_SRC_COPY_BLT_CMD;
1521428d7b3dSmrg	br13 = dst_bo->pitch;
1522428d7b3dSmrg	if (kgem->gen >= 040 && dst_bo->tiling) {
1523428d7b3dSmrg		cmd |= BLT_DST_TILED;
1524428d7b3dSmrg		br13 >>= 2;
1525428d7b3dSmrg	}
1526428d7b3dSmrg	br13 |= 0xcc << 16;
1527428d7b3dSmrg	switch (dst->drawable.bitsPerPixel) {
1528428d7b3dSmrg	default:
1529428d7b3dSmrg	case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
1530428d7b3dSmrg		 br13 |= 1 << 25; /* RGB8888 */
1531428d7b3dSmrg	case 16: br13 |= 1 << 24; /* RGB565 */
1532428d7b3dSmrg	case 8: break;
1533428d7b3dSmrg	}
1534428d7b3dSmrg
1535428d7b3dSmrg	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
1536428d7b3dSmrg	if (!kgem_check_batch(kgem, 10) ||
1537428d7b3dSmrg	    !kgem_check_reloc_and_exec(kgem, 2) ||
1538428d7b3dSmrg	    !kgem_check_bo_fenced(kgem, dst_bo)) {
1539428d7b3dSmrg		kgem_submit(kgem);
1540428d7b3dSmrg		if (!kgem_check_bo_fenced(kgem, dst_bo))
1541428d7b3dSmrg			goto fallback;
1542428d7b3dSmrg		_kgem_set_mode(kgem, KGEM_BLT);
1543428d7b3dSmrg	}
1544428d7b3dSmrg
1545428d7b3dSmrg	if (sna->kgem.gen >= 0100) {
1546428d7b3dSmrg		cmd |= 8;
1547428d7b3dSmrg		do {
1548428d7b3dSmrg			int nbox_this_time, rem;
1549428d7b3dSmrg
1550428d7b3dSmrg			nbox_this_time = nbox;
1551428d7b3dSmrg			rem = kgem_batch_space(kgem);
1552428d7b3dSmrg			if (10*nbox_this_time > rem)
1553428d7b3dSmrg				nbox_this_time = rem / 8;
1554428d7b3dSmrg			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1555428d7b3dSmrg				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1556428d7b3dSmrg			assert(nbox_this_time);
1557428d7b3dSmrg			nbox -= nbox_this_time;
1558428d7b3dSmrg
1559428d7b3dSmrg			/* Count the total number of bytes to be read and allocate a
1560428d7b3dSmrg			 * single buffer large enough. Or if it is very small, combine
1561428d7b3dSmrg			 * with other allocations. */
1562428d7b3dSmrg			offset = 0;
1563428d7b3dSmrg			for (n = 0; n < nbox_this_time; n++) {
1564428d7b3dSmrg				int height = box[n].y2 - box[n].y1;
1565428d7b3dSmrg				int width = box[n].x2 - box[n].x1;
1566428d7b3dSmrg				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1567428d7b3dSmrg			}
1568428d7b3dSmrg
1569428d7b3dSmrg			src_bo = kgem_create_buffer(kgem, offset,
1570428d7b3dSmrg						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1571428d7b3dSmrg						    &ptr);
1572428d7b3dSmrg			if (!src_bo)
1573428d7b3dSmrg				goto fallback;
1574428d7b3dSmrg
1575428d7b3dSmrg			if (sigtrap_get()) {
1576428d7b3dSmrg				kgem_bo_destroy(kgem, src_bo);
1577428d7b3dSmrg				goto fallback;
1578428d7b3dSmrg			}
1579428d7b3dSmrg
1580428d7b3dSmrg			offset = 0;
1581428d7b3dSmrg			do {
1582428d7b3dSmrg				int height = box->y2 - box->y1;
1583428d7b3dSmrg				int width = box->x2 - box->x1;
1584428d7b3dSmrg				int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1585428d7b3dSmrg				uint32_t *b;
1586428d7b3dSmrg
1587428d7b3dSmrg				DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1588428d7b3dSmrg				     __FUNCTION__,
1589428d7b3dSmrg				     box->x1 + src_dx, box->y1 + src_dy,
1590428d7b3dSmrg				     box->x1 + dst_dx, box->y1 + dst_dy,
1591428d7b3dSmrg				     width, height,
1592428d7b3dSmrg				     offset, pitch));
1593428d7b3dSmrg
1594428d7b3dSmrg				assert(box->x1 + src_dx >= 0);
1595428d7b3dSmrg				assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1596428d7b3dSmrg				assert(box->y1 + src_dy >= 0);
1597428d7b3dSmrg
1598428d7b3dSmrg				assert(box->x1 + dst_dx >= 0);
1599428d7b3dSmrg				assert(box->y1 + dst_dy >= 0);
1600428d7b3dSmrg
1601428d7b3dSmrg				memcpy_xor(src, (char *)ptr + offset,
1602428d7b3dSmrg					   dst->drawable.bitsPerPixel,
1603428d7b3dSmrg					   stride, pitch,
1604428d7b3dSmrg					   box->x1 + src_dx, box->y1 + src_dy,
1605428d7b3dSmrg					   0, 0,
1606428d7b3dSmrg					   width, height,
1607428d7b3dSmrg					   and, or);
1608428d7b3dSmrg
1609428d7b3dSmrg				assert(kgem->mode == KGEM_BLT);
1610428d7b3dSmrg				b = kgem->batch + kgem->nbatch;
1611428d7b3dSmrg				b[0] = cmd;
1612428d7b3dSmrg				b[1] = br13;
1613428d7b3dSmrg				b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1614428d7b3dSmrg				b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1615428d7b3dSmrg				*(uint64_t *)(b+4) =
1616428d7b3dSmrg					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1617428d7b3dSmrg							 I915_GEM_DOMAIN_RENDER << 16 |
1618428d7b3dSmrg							 I915_GEM_DOMAIN_RENDER |
1619428d7b3dSmrg							 KGEM_RELOC_FENCED,
1620428d7b3dSmrg							 0);
1621428d7b3dSmrg				b[6] = 0;
1622428d7b3dSmrg				b[7] = pitch;
1623428d7b3dSmrg				*(uint64_t *)(b+8) =
1624428d7b3dSmrg					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1625428d7b3dSmrg							 I915_GEM_DOMAIN_RENDER << 16 |
1626428d7b3dSmrg							 KGEM_RELOC_FENCED,
1627428d7b3dSmrg							 offset);
1628428d7b3dSmrg				kgem->nbatch += 10;
1629428d7b3dSmrg
1630428d7b3dSmrg				box++;
1631428d7b3dSmrg				offset += pitch * height;
1632428d7b3dSmrg			} while (--nbox_this_time);
1633428d7b3dSmrg			assert(offset == __kgem_buffer_size(src_bo));
1634428d7b3dSmrg			sigtrap_put();
1635428d7b3dSmrg
1636428d7b3dSmrg			if (nbox) {
1637428d7b3dSmrg				_kgem_submit(kgem);
1638428d7b3dSmrg				_kgem_set_mode(kgem, KGEM_BLT);
1639428d7b3dSmrg			}
1640428d7b3dSmrg
1641428d7b3dSmrg			kgem_bo_destroy(kgem, src_bo);
1642428d7b3dSmrg		} while (nbox);
1643428d7b3dSmrg	} else {
1644428d7b3dSmrg		cmd |= 6;
1645428d7b3dSmrg		do {
1646428d7b3dSmrg			int nbox_this_time, rem;
1647428d7b3dSmrg
1648428d7b3dSmrg			nbox_this_time = nbox;
1649428d7b3dSmrg			rem = kgem_batch_space(kgem);
1650428d7b3dSmrg			if (8*nbox_this_time > rem)
1651428d7b3dSmrg				nbox_this_time = rem / 8;
1652428d7b3dSmrg			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1653428d7b3dSmrg				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1654428d7b3dSmrg			assert(nbox_this_time);
1655428d7b3dSmrg			nbox -= nbox_this_time;
1656428d7b3dSmrg
1657428d7b3dSmrg			/* Count the total number of bytes to be read and allocate a
1658428d7b3dSmrg			 * single buffer large enough. Or if it is very small, combine
1659428d7b3dSmrg			 * with other allocations. */
1660428d7b3dSmrg			offset = 0;
1661428d7b3dSmrg			for (n = 0; n < nbox_this_time; n++) {
1662428d7b3dSmrg				int height = box[n].y2 - box[n].y1;
1663428d7b3dSmrg				int width = box[n].x2 - box[n].x1;
1664428d7b3dSmrg				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1665428d7b3dSmrg			}
1666428d7b3dSmrg
1667428d7b3dSmrg			src_bo = kgem_create_buffer(kgem, offset,
1668428d7b3dSmrg						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1669428d7b3dSmrg						    &ptr);
1670428d7b3dSmrg			if (!src_bo)
1671428d7b3dSmrg				goto fallback;
1672428d7b3dSmrg
1673428d7b3dSmrg			if (sigtrap_get()) {
1674428d7b3dSmrg				kgem_bo_destroy(kgem, src_bo);
1675428d7b3dSmrg				goto fallback;
1676428d7b3dSmrg			}
1677428d7b3dSmrg
1678428d7b3dSmrg			offset = 0;
1679428d7b3dSmrg			do {
1680428d7b3dSmrg				int height = box->y2 - box->y1;
1681428d7b3dSmrg				int width = box->x2 - box->x1;
1682428d7b3dSmrg				int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1683428d7b3dSmrg				uint32_t *b;
1684428d7b3dSmrg
1685428d7b3dSmrg				DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1686428d7b3dSmrg				     __FUNCTION__,
1687428d7b3dSmrg				     box->x1 + src_dx, box->y1 + src_dy,
1688428d7b3dSmrg				     box->x1 + dst_dx, box->y1 + dst_dy,
1689428d7b3dSmrg				     width, height,
1690428d7b3dSmrg				     offset, pitch));
1691428d7b3dSmrg
1692428d7b3dSmrg				assert(box->x1 + src_dx >= 0);
1693428d7b3dSmrg				assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1694428d7b3dSmrg				assert(box->y1 + src_dy >= 0);
1695428d7b3dSmrg
1696428d7b3dSmrg				assert(box->x1 + dst_dx >= 0);
1697428d7b3dSmrg				assert(box->y1 + dst_dy >= 0);
1698428d7b3dSmrg
1699428d7b3dSmrg				memcpy_xor(src, (char *)ptr + offset,
1700428d7b3dSmrg					   dst->drawable.bitsPerPixel,
1701428d7b3dSmrg					   stride, pitch,
1702428d7b3dSmrg					   box->x1 + src_dx, box->y1 + src_dy,
1703428d7b3dSmrg					   0, 0,
1704428d7b3dSmrg					   width, height,
1705428d7b3dSmrg					   and, or);
1706428d7b3dSmrg
1707428d7b3dSmrg				assert(kgem->mode == KGEM_BLT);
1708428d7b3dSmrg				b = kgem->batch + kgem->nbatch;
1709428d7b3dSmrg				b[0] = cmd;
1710428d7b3dSmrg				b[1] = br13;
1711428d7b3dSmrg				b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1712428d7b3dSmrg				b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1713428d7b3dSmrg				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1714428d7b3dSmrg						      I915_GEM_DOMAIN_RENDER << 16 |
1715428d7b3dSmrg						      I915_GEM_DOMAIN_RENDER |
1716428d7b3dSmrg						      KGEM_RELOC_FENCED,
1717428d7b3dSmrg						      0);
1718428d7b3dSmrg				b[5] = 0;
1719428d7b3dSmrg				b[6] = pitch;
1720428d7b3dSmrg				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1721428d7b3dSmrg						      I915_GEM_DOMAIN_RENDER << 16 |
1722428d7b3dSmrg						      KGEM_RELOC_FENCED,
1723428d7b3dSmrg						      offset);
1724428d7b3dSmrg				kgem->nbatch += 8;
1725428d7b3dSmrg
1726428d7b3dSmrg				box++;
1727428d7b3dSmrg				offset += pitch * height;
1728428d7b3dSmrg			} while (--nbox_this_time);
1729428d7b3dSmrg			assert(offset == __kgem_buffer_size(src_bo));
1730428d7b3dSmrg			sigtrap_put();
1731428d7b3dSmrg
1732428d7b3dSmrg			if (nbox) {
1733428d7b3dSmrg				_kgem_submit(kgem);
1734428d7b3dSmrg				_kgem_set_mode(kgem, KGEM_BLT);
1735428d7b3dSmrg			}
1736428d7b3dSmrg
1737428d7b3dSmrg			kgem_bo_destroy(kgem, src_bo);
1738428d7b3dSmrg		} while (nbox);
1739428d7b3dSmrg	}
1740428d7b3dSmrg
1741428d7b3dSmrg	sna->blt_state.fill_bo = 0;
1742428d7b3dSmrg	return true;
1743428d7b3dSmrg
1744428d7b3dSmrgfallback:
1745428d7b3dSmrg	return write_boxes_inplace__xor(kgem,
1746428d7b3dSmrg					src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1747428d7b3dSmrg					dst_bo, dst_dx, dst_dy,
1748428d7b3dSmrg					box, nbox,
1749428d7b3dSmrg					and, or);
1750428d7b3dSmrg}
1751428d7b3dSmrg
1752428d7b3dSmrgstatic bool
1753428d7b3dSmrgindirect_replace(struct sna *sna,
1754428d7b3dSmrg		 PixmapPtr pixmap,
1755428d7b3dSmrg		 struct kgem_bo *bo,
1756428d7b3dSmrg		 const void *src, int stride)
1757428d7b3dSmrg{
1758428d7b3dSmrg	struct kgem *kgem = &sna->kgem;
1759428d7b3dSmrg	struct kgem_bo *src_bo;
1760428d7b3dSmrg	BoxRec box;
1761428d7b3dSmrg	void *ptr;
1762428d7b3dSmrg	bool ret;
1763428d7b3dSmrg
1764428d7b3dSmrg	DBG(("%s: size=%d vs %d\n",
1765428d7b3dSmrg	     __FUNCTION__,
1766428d7b3dSmrg	     stride * pixmap->drawable.height >> 12,
1767428d7b3dSmrg	     kgem->half_cpu_cache_pages));
1768428d7b3dSmrg
1769428d7b3dSmrg	if (stride * pixmap->drawable.height >> 12 > kgem->half_cpu_cache_pages)
1770428d7b3dSmrg		return false;
1771428d7b3dSmrg
1772428d7b3dSmrg	if (!kgem_bo_can_blt(kgem, bo) &&
1773428d7b3dSmrg	    must_tile(sna, pixmap->drawable.width, pixmap->drawable.height))
1774428d7b3dSmrg		return false;
1775428d7b3dSmrg
1776428d7b3dSmrg	src_bo = kgem_create_buffer_2d(kgem,
1777428d7b3dSmrg				       pixmap->drawable.width,
1778428d7b3dSmrg				       pixmap->drawable.height,
1779428d7b3dSmrg				       pixmap->drawable.bitsPerPixel,
1780428d7b3dSmrg				       KGEM_BUFFER_WRITE_INPLACE,
1781428d7b3dSmrg				       &ptr);
1782428d7b3dSmrg	if (!src_bo)
1783428d7b3dSmrg		return false;
1784428d7b3dSmrg
1785428d7b3dSmrg	ret = false;
1786428d7b3dSmrg	if (sigtrap_get() == 0) {
1787428d7b3dSmrg		memcpy_blt(src, ptr, pixmap->drawable.bitsPerPixel,
1788428d7b3dSmrg			   stride, src_bo->pitch,
1789428d7b3dSmrg			   0, 0,
1790428d7b3dSmrg			   0, 0,
1791428d7b3dSmrg			   pixmap->drawable.width,
1792428d7b3dSmrg			   pixmap->drawable.height);
1793428d7b3dSmrg
1794428d7b3dSmrg		box.x1 = box.y1 = 0;
1795428d7b3dSmrg		box.x2 = pixmap->drawable.width;
1796428d7b3dSmrg		box.y2 = pixmap->drawable.height;
1797428d7b3dSmrg
1798428d7b3dSmrg		ret = sna->render.copy_boxes(sna, GXcopy,
1799428d7b3dSmrg					     &pixmap->drawable, src_bo, 0, 0,
1800428d7b3dSmrg					     &pixmap->drawable, bo, 0, 0,
1801428d7b3dSmrg					     &box, 1, 0);
1802428d7b3dSmrg		sigtrap_put();
1803428d7b3dSmrg	}
1804428d7b3dSmrg
1805428d7b3dSmrg	kgem_bo_destroy(kgem, src_bo);
1806428d7b3dSmrg
1807428d7b3dSmrg	return ret;
1808428d7b3dSmrg}
1809428d7b3dSmrg
1810428d7b3dSmrgbool sna_replace(struct sna *sna, PixmapPtr pixmap,
1811428d7b3dSmrg		 const void *src, int stride)
1812428d7b3dSmrg{
1813428d7b3dSmrg	struct sna_pixmap *priv = sna_pixmap(pixmap);
1814428d7b3dSmrg	struct kgem_bo *bo = priv->gpu_bo;
1815428d7b3dSmrg	void *dst;
1816428d7b3dSmrg
1817428d7b3dSmrg	assert(bo);
1818428d7b3dSmrg	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d) busy?=%d\n",
1819428d7b3dSmrg	     __FUNCTION__, bo->handle,
1820428d7b3dSmrg	     pixmap->drawable.width,
1821428d7b3dSmrg	     pixmap->drawable.height,
1822428d7b3dSmrg	     pixmap->drawable.bitsPerPixel,
1823428d7b3dSmrg	     bo->tiling,
1824428d7b3dSmrg	     __kgem_bo_is_busy(&sna->kgem, bo)));
1825428d7b3dSmrg
1826428d7b3dSmrg	assert(!priv->pinned);
1827428d7b3dSmrg
1828428d7b3dSmrg	kgem_bo_undo(&sna->kgem, bo);
1829428d7b3dSmrg
1830428d7b3dSmrg	if (__kgem_bo_is_busy(&sna->kgem, bo)) {
1831428d7b3dSmrg		struct kgem_bo *new_bo;
1832428d7b3dSmrg
1833428d7b3dSmrg		if (indirect_replace(sna, pixmap, bo, src, stride))
1834428d7b3dSmrg			return true;
1835428d7b3dSmrg
1836428d7b3dSmrg		new_bo = kgem_create_2d(&sna->kgem,
1837428d7b3dSmrg					pixmap->drawable.width,
1838428d7b3dSmrg					pixmap->drawable.height,
1839428d7b3dSmrg					pixmap->drawable.bitsPerPixel,
1840428d7b3dSmrg					bo->tiling,
1841428d7b3dSmrg					CREATE_GTT_MAP | CREATE_INACTIVE);
1842428d7b3dSmrg		if (new_bo)
1843428d7b3dSmrg			bo = new_bo;
1844428d7b3dSmrg	}
1845428d7b3dSmrg
1846428d7b3dSmrg	if (bo->tiling == I915_TILING_NONE && bo->pitch == stride &&
1847428d7b3dSmrg	    kgem_bo_write(&sna->kgem, bo, src,
1848428d7b3dSmrg			  (pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8))
1849428d7b3dSmrg			goto done;
1850428d7b3dSmrg
1851428d7b3dSmrg	if (upload_inplace__tiled(&sna->kgem, bo)) {
1852428d7b3dSmrg		BoxRec box;
1853428d7b3dSmrg
1854428d7b3dSmrg		box.x1 = box.y1 = 0;
1855428d7b3dSmrg		box.x2 = pixmap->drawable.width;
1856428d7b3dSmrg		box.y2 = pixmap->drawable.height;
1857428d7b3dSmrg
1858428d7b3dSmrg		if (write_boxes_inplace__tiled(&sna->kgem, src,
1859428d7b3dSmrg					       stride, pixmap->drawable.bitsPerPixel, 0, 0,
1860428d7b3dSmrg					       bo, 0, 0, &box, 1))
1861428d7b3dSmrg			goto done;
1862428d7b3dSmrg	}
1863428d7b3dSmrg
1864428d7b3dSmrg	if (kgem_bo_can_map(&sna->kgem, bo) &&
1865428d7b3dSmrg	    (dst = kgem_bo_map(&sna->kgem, bo)) != NULL &&
1866428d7b3dSmrg	    sigtrap_get() == 0) {
1867428d7b3dSmrg		memcpy_blt(src, dst, pixmap->drawable.bitsPerPixel,
1868428d7b3dSmrg			   stride, bo->pitch,
1869428d7b3dSmrg			   0, 0,
1870428d7b3dSmrg			   0, 0,
1871428d7b3dSmrg			   pixmap->drawable.width,
1872428d7b3dSmrg			   pixmap->drawable.height);
1873428d7b3dSmrg		sigtrap_put();
1874428d7b3dSmrg	} else {
1875428d7b3dSmrg		BoxRec box;
1876428d7b3dSmrg
1877428d7b3dSmrg		if (bo != priv->gpu_bo) {
1878428d7b3dSmrg			kgem_bo_destroy(&sna->kgem, bo);
1879428d7b3dSmrg			bo = priv->gpu_bo;
1880428d7b3dSmrg		}
1881428d7b3dSmrg
1882428d7b3dSmrg		box.x1 = box.y1 = 0;
1883428d7b3dSmrg		box.x2 = pixmap->drawable.width;
1884428d7b3dSmrg		box.y2 = pixmap->drawable.height;
1885428d7b3dSmrg
1886428d7b3dSmrg		if (!sna_write_boxes(sna, pixmap,
1887428d7b3dSmrg				     bo, 0, 0,
1888428d7b3dSmrg				     src, stride, 0, 0,
1889428d7b3dSmrg				     &box, 1))
1890428d7b3dSmrg			return false;
1891428d7b3dSmrg	}
1892428d7b3dSmrg
1893428d7b3dSmrgdone:
1894428d7b3dSmrg	if (bo != priv->gpu_bo) {
1895428d7b3dSmrg		sna_pixmap_unmap(pixmap, priv);
1896428d7b3dSmrg		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
1897428d7b3dSmrg		priv->gpu_bo = bo;
1898428d7b3dSmrg	}
1899428d7b3dSmrg
1900428d7b3dSmrg	return true;
1901428d7b3dSmrg}
1902428d7b3dSmrg
1903428d7b3dSmrgbool
1904428d7b3dSmrgsna_replace__xor(struct sna *sna, PixmapPtr pixmap,
1905428d7b3dSmrg		 const void *src, int stride,
1906428d7b3dSmrg		 uint32_t and, uint32_t or)
1907428d7b3dSmrg{
1908428d7b3dSmrg	struct sna_pixmap *priv = sna_pixmap(pixmap);
1909428d7b3dSmrg	struct kgem_bo *bo = priv->gpu_bo;
1910428d7b3dSmrg	void *dst;
1911428d7b3dSmrg
1912428d7b3dSmrg	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d)\n",
1913428d7b3dSmrg	     __FUNCTION__, bo->handle,
1914428d7b3dSmrg	     pixmap->drawable.width,
1915428d7b3dSmrg	     pixmap->drawable.height,
1916428d7b3dSmrg	     pixmap->drawable.bitsPerPixel,
1917428d7b3dSmrg	     bo->tiling));
1918428d7b3dSmrg
1919428d7b3dSmrg	assert(!priv->pinned);
1920428d7b3dSmrg
1921428d7b3dSmrg	kgem_bo_undo(&sna->kgem, bo);
1922428d7b3dSmrg
1923428d7b3dSmrg	if (!kgem_bo_can_map(&sna->kgem, bo) ||
1924428d7b3dSmrg	    __kgem_bo_is_busy(&sna->kgem, bo)) {
1925428d7b3dSmrg		struct kgem_bo *new_bo;
1926428d7b3dSmrg
1927428d7b3dSmrg		new_bo = kgem_create_2d(&sna->kgem,
1928428d7b3dSmrg					pixmap->drawable.width,
1929428d7b3dSmrg					pixmap->drawable.height,
1930428d7b3dSmrg					pixmap->drawable.bitsPerPixel,
1931428d7b3dSmrg					bo->tiling,
1932428d7b3dSmrg					CREATE_GTT_MAP | CREATE_INACTIVE);
1933428d7b3dSmrg		if (new_bo)
1934428d7b3dSmrg			bo = new_bo;
1935428d7b3dSmrg	}
1936428d7b3dSmrg
1937428d7b3dSmrg	if (kgem_bo_can_map(&sna->kgem, bo) &&
1938428d7b3dSmrg	    (dst = kgem_bo_map(&sna->kgem, bo)) != NULL &&
1939428d7b3dSmrg	    sigtrap_get() == 0) {
1940428d7b3dSmrg		memcpy_xor(src, dst, pixmap->drawable.bitsPerPixel,
1941428d7b3dSmrg			   stride, bo->pitch,
1942428d7b3dSmrg			   0, 0,
1943428d7b3dSmrg			   0, 0,
1944428d7b3dSmrg			   pixmap->drawable.width,
1945428d7b3dSmrg			   pixmap->drawable.height,
1946428d7b3dSmrg			   and, or);
1947428d7b3dSmrg		sigtrap_put();
1948428d7b3dSmrg	} else {
1949428d7b3dSmrg		BoxRec box;
1950428d7b3dSmrg
1951428d7b3dSmrg		if (bo != priv->gpu_bo) {
1952428d7b3dSmrg			kgem_bo_destroy(&sna->kgem, bo);
1953428d7b3dSmrg			bo = priv->gpu_bo;
1954428d7b3dSmrg		}
1955428d7b3dSmrg
1956428d7b3dSmrg		box.x1 = box.y1 = 0;
1957428d7b3dSmrg		box.x2 = pixmap->drawable.width;
1958428d7b3dSmrg		box.y2 = pixmap->drawable.height;
1959428d7b3dSmrg
1960428d7b3dSmrg		if (!sna_write_boxes__xor(sna, pixmap,
1961428d7b3dSmrg					  bo, 0, 0,
1962428d7b3dSmrg					  src, stride, 0, 0,
1963428d7b3dSmrg					  &box, 1,
1964428d7b3dSmrg					  and, or))
1965428d7b3dSmrg			return false;
1966428d7b3dSmrg	}
1967428d7b3dSmrg
1968428d7b3dSmrg	if (bo != priv->gpu_bo) {
1969428d7b3dSmrg		sna_pixmap_unmap(pixmap, priv);
1970428d7b3dSmrg		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
1971428d7b3dSmrg		priv->gpu_bo = bo;
1972428d7b3dSmrg	}
1973428d7b3dSmrg
1974428d7b3dSmrg	return true;
1975428d7b3dSmrg}
1976