1428d7b3dSmrg/* 2428d7b3dSmrg * Copyright (c) 2011 Intel Corporation 3428d7b3dSmrg * 4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"), 6428d7b3dSmrg * to deal in the Software without restriction, including without limitation 7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the 9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions: 10428d7b3dSmrg * 11428d7b3dSmrg * The above copyright notice and this permission notice (including the next 12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the 13428d7b3dSmrg * Software. 14428d7b3dSmrg * 15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21428d7b3dSmrg * SOFTWARE. 22428d7b3dSmrg * 23428d7b3dSmrg * Authors: 24428d7b3dSmrg * Chris Wilson <chris@chris-wilson.co.uk> 25428d7b3dSmrg * 26428d7b3dSmrg */ 27428d7b3dSmrg 28428d7b3dSmrg#ifdef HAVE_CONFIG_H 29428d7b3dSmrg#include "config.h" 30428d7b3dSmrg#endif 31428d7b3dSmrg 32428d7b3dSmrg#include "sna.h" 33428d7b3dSmrg#include "sna_render.h" 34428d7b3dSmrg#include "sna_reg.h" 35428d7b3dSmrg 36428d7b3dSmrg#include <sys/mman.h> 37428d7b3dSmrg 38428d7b3dSmrg#define PITCH(x, y) ALIGN((x)*(y), 4) 39428d7b3dSmrg 40428d7b3dSmrg#define FORCE_INPLACE 0 /* 1 upload directly, -1 force indirect */ 41428d7b3dSmrg 42428d7b3dSmrg/* XXX Need to avoid using GTT fenced access for I915_TILING_Y on 855GM */ 43428d7b3dSmrg 44428d7b3dSmrgstatic inline bool upload_too_large(struct sna *sna, int width, int height) 45428d7b3dSmrg{ 46428d7b3dSmrg return width * height * 4 > sna->kgem.max_upload_tile_size; 47428d7b3dSmrg} 48428d7b3dSmrg 49428d7b3dSmrgstatic inline bool must_tile(struct sna *sna, int width, int height) 50428d7b3dSmrg{ 51428d7b3dSmrg return (width > sna->render.max_3d_size || 52428d7b3dSmrg height > sna->render.max_3d_size || 53428d7b3dSmrg upload_too_large(sna, width, height)); 54428d7b3dSmrg} 55428d7b3dSmrg 56428d7b3dSmrgstatic bool download_inplace__cpu(struct kgem *kgem, 57428d7b3dSmrg PixmapPtr p, struct kgem_bo *bo, 58428d7b3dSmrg const BoxRec *box, int nbox) 59428d7b3dSmrg{ 60428d7b3dSmrg BoxRec extents; 61428d7b3dSmrg 62428d7b3dSmrg switch (bo->tiling) { 63428d7b3dSmrg case I915_TILING_X: 64428d7b3dSmrg if (!kgem->memcpy_from_tiled_x) 65428d7b3dSmrg return false; 66428d7b3dSmrg case I915_TILING_NONE: 67428d7b3dSmrg break; 68428d7b3dSmrg default: 69428d7b3dSmrg return false; 70428d7b3dSmrg } 71428d7b3dSmrg 72428d7b3dSmrg if (!kgem_bo_can_map__cpu(kgem, bo, false)) 73428d7b3dSmrg return false; 74428d7b3dSmrg 75428d7b3dSmrg if (kgem->has_llc) 76428d7b3dSmrg return true; 77428d7b3dSmrg 78428d7b3dSmrg extents = *box; 79428d7b3dSmrg while (--nbox) { 80428d7b3dSmrg ++box; 81428d7b3dSmrg if (box->x1 < extents.x1) 82428d7b3dSmrg extents.x1 = box->x1; 83428d7b3dSmrg if (box->x2 > extents.x2) 84428d7b3dSmrg extents.x2 = box->x2; 85428d7b3dSmrg extents.y2 = box->y2; 86428d7b3dSmrg } 87428d7b3dSmrg 88428d7b3dSmrg if (extents.x2 - extents.x1 == p->drawable.width && 89428d7b3dSmrg extents.y2 - extents.y1 == p->drawable.height) 90428d7b3dSmrg return true; 91428d7b3dSmrg 92428d7b3dSmrg return __kgem_bo_size(bo) <= PAGE_SIZE; 93428d7b3dSmrg} 94428d7b3dSmrg 95428d7b3dSmrgstatic bool 96428d7b3dSmrgread_boxes_inplace__cpu(struct kgem *kgem, 97428d7b3dSmrg PixmapPtr pixmap, struct kgem_bo *bo, 98428d7b3dSmrg const BoxRec *box, int n) 99428d7b3dSmrg{ 100428d7b3dSmrg int bpp = pixmap->drawable.bitsPerPixel; 101428d7b3dSmrg void *src, *dst = pixmap->devPrivate.ptr; 102428d7b3dSmrg int src_pitch = bo->pitch; 103428d7b3dSmrg int dst_pitch = pixmap->devKind; 104428d7b3dSmrg 105428d7b3dSmrg if (!download_inplace__cpu(kgem, dst, bo, box, n)) 106428d7b3dSmrg return false; 107428d7b3dSmrg 108428d7b3dSmrg assert(kgem_bo_can_map__cpu(kgem, bo, false)); 109428d7b3dSmrg assert(bo->tiling != I915_TILING_Y); 110428d7b3dSmrg 111428d7b3dSmrg src = kgem_bo_map__cpu(kgem, bo); 112428d7b3dSmrg if (src == NULL) 113428d7b3dSmrg return false; 114428d7b3dSmrg 115428d7b3dSmrg kgem_bo_sync__cpu_full(kgem, bo, 0); 116428d7b3dSmrg 117428d7b3dSmrg if (sigtrap_get()) 118428d7b3dSmrg return false; 119428d7b3dSmrg 120428d7b3dSmrg DBG(("%s x %d\n", __FUNCTION__, n)); 121428d7b3dSmrg 122428d7b3dSmrg if (bo->tiling == I915_TILING_X) { 123428d7b3dSmrg do { 124428d7b3dSmrg memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch, 125428d7b3dSmrg box->x1, box->y1, 126428d7b3dSmrg box->x1, box->y1, 127428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1); 128428d7b3dSmrg box++; 129428d7b3dSmrg } while (--n); 130428d7b3dSmrg } else { 131428d7b3dSmrg do { 132428d7b3dSmrg memcpy_blt(src, dst, bpp, src_pitch, dst_pitch, 133428d7b3dSmrg box->x1, box->y1, 134428d7b3dSmrg box->x1, box->y1, 135428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1); 136428d7b3dSmrg box++; 137428d7b3dSmrg } while (--n); 138428d7b3dSmrg } 139428d7b3dSmrg 140428d7b3dSmrg sigtrap_put(); 141428d7b3dSmrg return true; 142428d7b3dSmrg} 143428d7b3dSmrg 144428d7b3dSmrgstatic void read_boxes_inplace(struct kgem *kgem, 145428d7b3dSmrg PixmapPtr pixmap, struct kgem_bo *bo, 146428d7b3dSmrg const BoxRec *box, int n) 147428d7b3dSmrg{ 148428d7b3dSmrg int bpp = pixmap->drawable.bitsPerPixel; 149428d7b3dSmrg void *src, *dst = pixmap->devPrivate.ptr; 150428d7b3dSmrg int src_pitch = bo->pitch; 151428d7b3dSmrg int dst_pitch = pixmap->devKind; 152428d7b3dSmrg 153428d7b3dSmrg if (read_boxes_inplace__cpu(kgem, pixmap, bo, box, n)) 154428d7b3dSmrg return; 155428d7b3dSmrg 156428d7b3dSmrg DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling)); 157428d7b3dSmrg 158428d7b3dSmrg if (!kgem_bo_can_map(kgem, bo)) 159428d7b3dSmrg return; 160428d7b3dSmrg 161428d7b3dSmrg kgem_bo_submit(kgem, bo); 162428d7b3dSmrg 163428d7b3dSmrg src = kgem_bo_map(kgem, bo); 164428d7b3dSmrg if (src == NULL) 165428d7b3dSmrg return; 166428d7b3dSmrg 167428d7b3dSmrg if (sigtrap_get()) 168428d7b3dSmrg return; 169428d7b3dSmrg 170428d7b3dSmrg assert(src != dst); 171428d7b3dSmrg do { 172428d7b3dSmrg DBG(("%s: copying box (%d, %d), (%d, %d)\n", 173428d7b3dSmrg __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); 174428d7b3dSmrg 175428d7b3dSmrg assert(box->x2 > box->x1); 176428d7b3dSmrg assert(box->y2 > box->y1); 177428d7b3dSmrg 178428d7b3dSmrg assert(box->x1 >= 0); 179428d7b3dSmrg assert(box->y1 >= 0); 180428d7b3dSmrg assert(box->x2 <= pixmap->drawable.width); 181428d7b3dSmrg assert(box->y2 <= pixmap->drawable.height); 182428d7b3dSmrg 183428d7b3dSmrg assert(box->x1 >= 0); 184428d7b3dSmrg assert(box->y1 >= 0); 185428d7b3dSmrg assert(box->x2 <= pixmap->drawable.width); 186428d7b3dSmrg assert(box->y2 <= pixmap->drawable.height); 187428d7b3dSmrg 188428d7b3dSmrg memcpy_blt(src, dst, bpp, 189428d7b3dSmrg src_pitch, dst_pitch, 190428d7b3dSmrg box->x1, box->y1, 191428d7b3dSmrg box->x1, box->y1, 192428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1); 193428d7b3dSmrg box++; 194428d7b3dSmrg } while (--n); 195428d7b3dSmrg 196428d7b3dSmrg sigtrap_put(); 197428d7b3dSmrg} 198428d7b3dSmrg 199428d7b3dSmrgstatic bool download_inplace(struct kgem *kgem, 200428d7b3dSmrg PixmapPtr p, struct kgem_bo *bo, 201428d7b3dSmrg const BoxRec *box, int nbox) 202428d7b3dSmrg{ 203428d7b3dSmrg bool cpu; 204428d7b3dSmrg 205428d7b3dSmrg if (unlikely(kgem->wedged)) 206428d7b3dSmrg return true; 207428d7b3dSmrg 208428d7b3dSmrg cpu = download_inplace__cpu(kgem, p, bo, box, nbox); 209428d7b3dSmrg if (!cpu && !kgem_bo_can_map(kgem, bo)) 210428d7b3dSmrg return false; 211428d7b3dSmrg 212428d7b3dSmrg if (FORCE_INPLACE) 213428d7b3dSmrg return FORCE_INPLACE > 0; 214428d7b3dSmrg 215428d7b3dSmrg if (cpu) 216428d7b3dSmrg return true; 217428d7b3dSmrg 218428d7b3dSmrg if (kgem->can_blt_cpu && kgem->max_cpu_size) 219428d7b3dSmrg return false; 220428d7b3dSmrg 221428d7b3dSmrg return !__kgem_bo_is_busy(kgem, bo); 222428d7b3dSmrg} 223428d7b3dSmrg 224428d7b3dSmrgvoid sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo, 225428d7b3dSmrg const BoxRec *box, int nbox) 226428d7b3dSmrg{ 227428d7b3dSmrg struct kgem *kgem = &sna->kgem; 228428d7b3dSmrg struct kgem_bo *dst_bo; 229428d7b3dSmrg BoxRec extents; 230428d7b3dSmrg const BoxRec *tmp_box; 231428d7b3dSmrg int tmp_nbox; 232428d7b3dSmrg void *ptr; 233428d7b3dSmrg int src_pitch, cpp, offset; 234428d7b3dSmrg int n, cmd, br13; 235428d7b3dSmrg bool can_blt; 236428d7b3dSmrg 237428d7b3dSmrg DBG(("%s x %d, src=(handle=%d), dst=(size=(%d, %d)\n", 238428d7b3dSmrg __FUNCTION__, nbox, src_bo->handle, 239428d7b3dSmrg dst->drawable.width, dst->drawable.height)); 240428d7b3dSmrg 241428d7b3dSmrg#ifndef NDEBUG 242428d7b3dSmrg for (n = 0; n < nbox; n++) { 243428d7b3dSmrg if (box[n].x1 < 0 || box[n].y1 < 0 || 244428d7b3dSmrg box[n].x2 * dst->drawable.bitsPerPixel/8 > src_bo->pitch || 245428d7b3dSmrg box[n].y2 * src_bo->pitch > kgem_bo_size(src_bo)) 246428d7b3dSmrg { 247428d7b3dSmrg FatalError("source out-of-bounds box[%d]=(%d, %d), (%d, %d), pitch=%d, size=%d\n", n, 248428d7b3dSmrg box[n].x1, box[n].y1, 249428d7b3dSmrg box[n].x2, box[n].y2, 250428d7b3dSmrg src_bo->pitch, kgem_bo_size(src_bo)); 251428d7b3dSmrg } 252428d7b3dSmrg } 253428d7b3dSmrg#endif 254428d7b3dSmrg 255428d7b3dSmrg /* XXX The gpu is faster to perform detiling in bulk, but takes 256428d7b3dSmrg * longer to setup and retrieve the results, with an additional 257428d7b3dSmrg * copy. The long term solution is to use snoopable bo and avoid 258428d7b3dSmrg * this path. 259428d7b3dSmrg */ 260428d7b3dSmrg 261428d7b3dSmrg if (download_inplace(kgem, dst, src_bo, box, nbox)) { 262428d7b3dSmrgfallback: 263428d7b3dSmrg read_boxes_inplace(kgem, dst, src_bo, box, nbox); 264428d7b3dSmrg return; 265428d7b3dSmrg } 266428d7b3dSmrg 267428d7b3dSmrg can_blt = kgem_bo_can_blt(kgem, src_bo) && 268428d7b3dSmrg (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 269428d7b3dSmrg extents = box[0]; 270428d7b3dSmrg for (n = 1; n < nbox; n++) { 271428d7b3dSmrg if (box[n].x1 < extents.x1) 272428d7b3dSmrg extents.x1 = box[n].x1; 273428d7b3dSmrg if (box[n].x2 > extents.x2) 274428d7b3dSmrg extents.x2 = box[n].x2; 275428d7b3dSmrg 276428d7b3dSmrg if (can_blt) 277428d7b3dSmrg can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 278428d7b3dSmrg 279428d7b3dSmrg if (box[n].y1 < extents.y1) 280428d7b3dSmrg extents.y1 = box[n].y1; 281428d7b3dSmrg if (box[n].y2 > extents.y2) 282428d7b3dSmrg extents.y2 = box[n].y2; 283428d7b3dSmrg } 284428d7b3dSmrg if (kgem_bo_can_map(kgem, src_bo)) { 285428d7b3dSmrg /* Is it worth detiling? */ 286428d7b3dSmrg if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096) 287428d7b3dSmrg goto fallback; 288428d7b3dSmrg } 289428d7b3dSmrg 290428d7b3dSmrg /* Try to avoid switching rings... */ 291428d7b3dSmrg if (!can_blt || kgem->ring == KGEM_RENDER || 292428d7b3dSmrg upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) { 293428d7b3dSmrg DrawableRec tmp; 294428d7b3dSmrg 295428d7b3dSmrg tmp.width = extents.x2 - extents.x1; 296428d7b3dSmrg tmp.height = extents.y2 - extents.y1; 297428d7b3dSmrg tmp.depth = dst->drawable.depth; 298428d7b3dSmrg tmp.bitsPerPixel = dst->drawable.bitsPerPixel; 299428d7b3dSmrg 300428d7b3dSmrg assert(tmp.width); 301428d7b3dSmrg assert(tmp.height); 302428d7b3dSmrg 303428d7b3dSmrg if (must_tile(sna, tmp.width, tmp.height)) { 304428d7b3dSmrg BoxRec tile, stack[64], *clipped, *c; 305428d7b3dSmrg int step; 306428d7b3dSmrg 307428d7b3dSmrg if (n > ARRAY_SIZE(stack)) { 308428d7b3dSmrg clipped = malloc(sizeof(BoxRec) * n); 309428d7b3dSmrg if (clipped == NULL) 310428d7b3dSmrg goto fallback; 311428d7b3dSmrg } else 312428d7b3dSmrg clipped = stack; 313428d7b3dSmrg 314428d7b3dSmrg step = MIN(sna->render.max_3d_size, 315428d7b3dSmrg 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel); 316428d7b3dSmrg while (step * step * 4 > sna->kgem.max_upload_tile_size) 317428d7b3dSmrg step /= 2; 318428d7b3dSmrg 319428d7b3dSmrg DBG(("%s: tiling download, using %dx%d tiles\n", 320428d7b3dSmrg __FUNCTION__, step, step)); 321428d7b3dSmrg assert(step); 322428d7b3dSmrg 323428d7b3dSmrg for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { 324428d7b3dSmrg int y2 = tile.y1 + step; 325428d7b3dSmrg if (y2 > extents.y2) 326428d7b3dSmrg y2 = extents.y2; 327428d7b3dSmrg tile.y2 = y2; 328428d7b3dSmrg 329428d7b3dSmrg for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { 330428d7b3dSmrg int x2 = tile.x1 + step; 331428d7b3dSmrg if (x2 > extents.x2) 332428d7b3dSmrg x2 = extents.x2; 333428d7b3dSmrg tile.x2 = x2; 334428d7b3dSmrg 335428d7b3dSmrg tmp.width = tile.x2 - tile.x1; 336428d7b3dSmrg tmp.height = tile.y2 - tile.y1; 337428d7b3dSmrg 338428d7b3dSmrg c = clipped; 339428d7b3dSmrg for (n = 0; n < nbox; n++) { 340428d7b3dSmrg *c = box[n]; 341428d7b3dSmrg if (!box_intersect(c, &tile)) 342428d7b3dSmrg continue; 343428d7b3dSmrg 344428d7b3dSmrg DBG(("%s: box(%d, %d), (%d, %d),, dst=(%d, %d)\n", 345428d7b3dSmrg __FUNCTION__, 346428d7b3dSmrg c->x1, c->y1, 347428d7b3dSmrg c->x2, c->y2, 348428d7b3dSmrg c->x1 - tile.x1, 349428d7b3dSmrg c->y1 - tile.y1)); 350428d7b3dSmrg c++; 351428d7b3dSmrg } 352428d7b3dSmrg if (c == clipped) 353428d7b3dSmrg continue; 354428d7b3dSmrg 355428d7b3dSmrg dst_bo = kgem_create_buffer_2d(kgem, 356428d7b3dSmrg tmp.width, 357428d7b3dSmrg tmp.height, 358428d7b3dSmrg tmp.bitsPerPixel, 359428d7b3dSmrg KGEM_BUFFER_LAST, 360428d7b3dSmrg &ptr); 361428d7b3dSmrg if (!dst_bo) { 362428d7b3dSmrg if (clipped != stack) 363428d7b3dSmrg free(clipped); 364428d7b3dSmrg goto fallback; 365428d7b3dSmrg } 366428d7b3dSmrg 367428d7b3dSmrg if (!sna->render.copy_boxes(sna, GXcopy, 368428d7b3dSmrg &dst->drawable, src_bo, 0, 0, 369428d7b3dSmrg &tmp, dst_bo, -tile.x1, -tile.y1, 370428d7b3dSmrg clipped, c-clipped, COPY_LAST)) { 371428d7b3dSmrg kgem_bo_destroy(&sna->kgem, dst_bo); 372428d7b3dSmrg if (clipped != stack) 373428d7b3dSmrg free(clipped); 374428d7b3dSmrg goto fallback; 375428d7b3dSmrg } 376428d7b3dSmrg 377428d7b3dSmrg kgem_bo_submit(&sna->kgem, dst_bo); 378428d7b3dSmrg kgem_buffer_read_sync(kgem, dst_bo); 379428d7b3dSmrg 380428d7b3dSmrg if (sigtrap_get() == 0) { 381428d7b3dSmrg while (c-- != clipped) { 382428d7b3dSmrg memcpy_blt(ptr, dst->devPrivate.ptr, tmp.bitsPerPixel, 383428d7b3dSmrg dst_bo->pitch, dst->devKind, 384428d7b3dSmrg c->x1 - tile.x1, 385428d7b3dSmrg c->y1 - tile.y1, 386428d7b3dSmrg c->x1, c->y1, 387428d7b3dSmrg c->x2 - c->x1, 388428d7b3dSmrg c->y2 - c->y1); 389428d7b3dSmrg } 390428d7b3dSmrg sigtrap_put(); 391428d7b3dSmrg } 392428d7b3dSmrg 393428d7b3dSmrg kgem_bo_destroy(&sna->kgem, dst_bo); 394428d7b3dSmrg } 395428d7b3dSmrg } 396428d7b3dSmrg 397428d7b3dSmrg if (clipped != stack) 398428d7b3dSmrg free(clipped); 399428d7b3dSmrg } else { 400428d7b3dSmrg dst_bo = kgem_create_buffer_2d(kgem, 401428d7b3dSmrg tmp.width, 402428d7b3dSmrg tmp.height, 403428d7b3dSmrg tmp.bitsPerPixel, 404428d7b3dSmrg KGEM_BUFFER_LAST, 405428d7b3dSmrg &ptr); 406428d7b3dSmrg if (!dst_bo) 407428d7b3dSmrg goto fallback; 408428d7b3dSmrg 409428d7b3dSmrg if (!sna->render.copy_boxes(sna, GXcopy, 410428d7b3dSmrg &dst->drawable, src_bo, 0, 0, 411428d7b3dSmrg &tmp, dst_bo, -extents.x1, -extents.y1, 412428d7b3dSmrg box, nbox, COPY_LAST)) { 413428d7b3dSmrg kgem_bo_destroy(&sna->kgem, dst_bo); 414428d7b3dSmrg goto fallback; 415428d7b3dSmrg } 416428d7b3dSmrg 417428d7b3dSmrg kgem_bo_submit(&sna->kgem, dst_bo); 418428d7b3dSmrg kgem_buffer_read_sync(kgem, dst_bo); 419428d7b3dSmrg 420428d7b3dSmrg if (sigtrap_get() == 0) { 421428d7b3dSmrg for (n = 0; n < nbox; n++) { 422428d7b3dSmrg memcpy_blt(ptr, dst->devPrivate.ptr, tmp.bitsPerPixel, 423428d7b3dSmrg dst_bo->pitch, dst->devKind, 424428d7b3dSmrg box[n].x1 - extents.x1, 425428d7b3dSmrg box[n].y1 - extents.y1, 426428d7b3dSmrg box[n].x1, box[n].y1, 427428d7b3dSmrg box[n].x2 - box[n].x1, 428428d7b3dSmrg box[n].y2 - box[n].y1); 429428d7b3dSmrg } 430428d7b3dSmrg sigtrap_put(); 431428d7b3dSmrg } 432428d7b3dSmrg 433428d7b3dSmrg kgem_bo_destroy(&sna->kgem, dst_bo); 434428d7b3dSmrg } 435428d7b3dSmrg return; 436428d7b3dSmrg } 437428d7b3dSmrg 438428d7b3dSmrg /* count the total number of bytes to be read and allocate a bo */ 439428d7b3dSmrg cpp = dst->drawable.bitsPerPixel / 8; 440428d7b3dSmrg offset = 0; 441428d7b3dSmrg for (n = 0; n < nbox; n++) { 442428d7b3dSmrg int height = box[n].y2 - box[n].y1; 443428d7b3dSmrg int width = box[n].x2 - box[n].x1; 444428d7b3dSmrg offset += PITCH(width, cpp) * height; 445428d7b3dSmrg } 446428d7b3dSmrg 447428d7b3dSmrg DBG((" read buffer size=%d\n", offset)); 448428d7b3dSmrg 449428d7b3dSmrg dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr); 450428d7b3dSmrg if (!dst_bo) { 451428d7b3dSmrg read_boxes_inplace(kgem, dst, src_bo, box, nbox); 452428d7b3dSmrg return; 453428d7b3dSmrg } 454428d7b3dSmrg 455428d7b3dSmrg cmd = XY_SRC_COPY_BLT_CMD; 456428d7b3dSmrg src_pitch = src_bo->pitch; 457428d7b3dSmrg if (kgem->gen >= 040 && src_bo->tiling) { 458428d7b3dSmrg cmd |= BLT_SRC_TILED; 459428d7b3dSmrg src_pitch >>= 2; 460428d7b3dSmrg } 461428d7b3dSmrg 462428d7b3dSmrg br13 = 0xcc << 16; 463428d7b3dSmrg switch (cpp) { 464428d7b3dSmrg default: 465428d7b3dSmrg case 4: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 466428d7b3dSmrg br13 |= 1 << 25; /* RGB8888 */ 467428d7b3dSmrg case 2: br13 |= 1 << 24; /* RGB565 */ 468428d7b3dSmrg case 1: break; 469428d7b3dSmrg } 470428d7b3dSmrg 471428d7b3dSmrg kgem_set_mode(kgem, KGEM_BLT, dst_bo); 472428d7b3dSmrg if (!kgem_check_batch(kgem, 10) || 473428d7b3dSmrg !kgem_check_reloc_and_exec(kgem, 2) || 474428d7b3dSmrg !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { 475428d7b3dSmrg kgem_submit(kgem); 476428d7b3dSmrg if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) 477428d7b3dSmrg goto fallback; 478428d7b3dSmrg _kgem_set_mode(kgem, KGEM_BLT); 479428d7b3dSmrg } 480428d7b3dSmrg 481428d7b3dSmrg tmp_nbox = nbox; 482428d7b3dSmrg tmp_box = box; 483428d7b3dSmrg offset = 0; 484428d7b3dSmrg if (sna->kgem.gen >= 0100) { 485428d7b3dSmrg cmd |= 8; 486428d7b3dSmrg do { 487428d7b3dSmrg int nbox_this_time, rem; 488428d7b3dSmrg 489428d7b3dSmrg nbox_this_time = tmp_nbox; 490428d7b3dSmrg rem = kgem_batch_space(kgem); 491428d7b3dSmrg if (10*nbox_this_time > rem) 492428d7b3dSmrg nbox_this_time = rem / 8; 493428d7b3dSmrg if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 494428d7b3dSmrg nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; 495428d7b3dSmrg assert(nbox_this_time); 496428d7b3dSmrg tmp_nbox -= nbox_this_time; 497428d7b3dSmrg 498428d7b3dSmrg assert(kgem->mode == KGEM_BLT); 499428d7b3dSmrg for (n = 0; n < nbox_this_time; n++) { 500428d7b3dSmrg int height = tmp_box[n].y2 - tmp_box[n].y1; 501428d7b3dSmrg int width = tmp_box[n].x2 - tmp_box[n].x1; 502428d7b3dSmrg int pitch = PITCH(width, cpp); 503428d7b3dSmrg uint32_t *b = kgem->batch + kgem->nbatch; 504428d7b3dSmrg 505428d7b3dSmrg DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n", 506428d7b3dSmrg offset, 507428d7b3dSmrg tmp_box[n].x1, tmp_box[n].y1, 508428d7b3dSmrg width, height, pitch)); 509428d7b3dSmrg 510428d7b3dSmrg assert(tmp_box[n].x1 >= 0); 511428d7b3dSmrg assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch); 512428d7b3dSmrg assert(tmp_box[n].y1 >= 0); 513428d7b3dSmrg assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo)); 514428d7b3dSmrg 515428d7b3dSmrg b[0] = cmd; 516428d7b3dSmrg b[1] = br13 | pitch; 517428d7b3dSmrg b[2] = 0; 518428d7b3dSmrg b[3] = height << 16 | width; 519428d7b3dSmrg *(uint64_t *)(b+4) = 520428d7b3dSmrg kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 521428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 522428d7b3dSmrg I915_GEM_DOMAIN_RENDER | 523428d7b3dSmrg KGEM_RELOC_FENCED, 524428d7b3dSmrg offset); 525428d7b3dSmrg b[6] = tmp_box[n].y1 << 16 | tmp_box[n].x1; 526428d7b3dSmrg b[7] = src_pitch; 527428d7b3dSmrg *(uint64_t *)(b+8) = 528428d7b3dSmrg kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 529428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 530428d7b3dSmrg KGEM_RELOC_FENCED, 531428d7b3dSmrg 0); 532428d7b3dSmrg kgem->nbatch += 10; 533428d7b3dSmrg 534428d7b3dSmrg offset += pitch * height; 535428d7b3dSmrg } 536428d7b3dSmrg 537428d7b3dSmrg _kgem_submit(kgem); 538428d7b3dSmrg if (!tmp_nbox) 539428d7b3dSmrg break; 540428d7b3dSmrg 541428d7b3dSmrg _kgem_set_mode(kgem, KGEM_BLT); 542428d7b3dSmrg tmp_box += nbox_this_time; 543428d7b3dSmrg } while (1); 544428d7b3dSmrg } else { 545428d7b3dSmrg cmd |= 6; 546428d7b3dSmrg do { 547428d7b3dSmrg int nbox_this_time, rem; 548428d7b3dSmrg 549428d7b3dSmrg nbox_this_time = tmp_nbox; 550428d7b3dSmrg rem = kgem_batch_space(kgem); 551428d7b3dSmrg if (8*nbox_this_time > rem) 552428d7b3dSmrg nbox_this_time = rem / 8; 553428d7b3dSmrg if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 554428d7b3dSmrg nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; 555428d7b3dSmrg assert(nbox_this_time); 556428d7b3dSmrg tmp_nbox -= nbox_this_time; 557428d7b3dSmrg 558428d7b3dSmrg assert(kgem->mode == KGEM_BLT); 559428d7b3dSmrg for (n = 0; n < nbox_this_time; n++) { 560428d7b3dSmrg int height = tmp_box[n].y2 - tmp_box[n].y1; 561428d7b3dSmrg int width = tmp_box[n].x2 - tmp_box[n].x1; 562428d7b3dSmrg int pitch = PITCH(width, cpp); 563428d7b3dSmrg uint32_t *b = kgem->batch + kgem->nbatch; 564428d7b3dSmrg 565428d7b3dSmrg DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n", 566428d7b3dSmrg offset, 567428d7b3dSmrg tmp_box[n].x1, tmp_box[n].y1, 568428d7b3dSmrg width, height, pitch)); 569428d7b3dSmrg 570428d7b3dSmrg assert(tmp_box[n].x1 >= 0); 571428d7b3dSmrg assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch); 572428d7b3dSmrg assert(tmp_box[n].y1 >= 0); 573428d7b3dSmrg assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo)); 574428d7b3dSmrg 575428d7b3dSmrg b[0] = cmd; 576428d7b3dSmrg b[1] = br13 | pitch; 577428d7b3dSmrg b[2] = 0; 578428d7b3dSmrg b[3] = height << 16 | width; 579428d7b3dSmrg b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 580428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 581428d7b3dSmrg I915_GEM_DOMAIN_RENDER | 582428d7b3dSmrg KGEM_RELOC_FENCED, 583428d7b3dSmrg offset); 584428d7b3dSmrg b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1; 585428d7b3dSmrg b[6] = src_pitch; 586428d7b3dSmrg b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 587428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 588428d7b3dSmrg KGEM_RELOC_FENCED, 589428d7b3dSmrg 0); 590428d7b3dSmrg kgem->nbatch += 8; 591428d7b3dSmrg 592428d7b3dSmrg offset += pitch * height; 593428d7b3dSmrg } 594428d7b3dSmrg 595428d7b3dSmrg _kgem_submit(kgem); 596428d7b3dSmrg if (!tmp_nbox) 597428d7b3dSmrg break; 598428d7b3dSmrg 599428d7b3dSmrg _kgem_set_mode(kgem, KGEM_BLT); 600428d7b3dSmrg tmp_box += nbox_this_time; 601428d7b3dSmrg } while (1); 602428d7b3dSmrg } 603428d7b3dSmrg assert(offset == __kgem_buffer_size(dst_bo)); 604428d7b3dSmrg 605428d7b3dSmrg kgem_buffer_read_sync(kgem, dst_bo); 606428d7b3dSmrg 607428d7b3dSmrg if (sigtrap_get() == 0) { 608428d7b3dSmrg char *src = ptr; 609428d7b3dSmrg do { 610428d7b3dSmrg int height = box->y2 - box->y1; 611428d7b3dSmrg int width = box->x2 - box->x1; 612428d7b3dSmrg int pitch = PITCH(width, cpp); 613428d7b3dSmrg 614428d7b3dSmrg DBG((" copy offset %lx [%08x...%08x...%08x]: (%d, %d) x (%d, %d), src pitch=%d, dst pitch=%d, bpp=%d\n", 615428d7b3dSmrg (long)((char *)src - (char *)ptr), 616428d7b3dSmrg *(uint32_t*)src, *(uint32_t*)(src+pitch*height/2 + pitch/2 - 4), *(uint32_t*)(src+pitch*height - 4), 617428d7b3dSmrg box->x1, box->y1, 618428d7b3dSmrg width, height, 619428d7b3dSmrg pitch, dst->devKind, cpp*8)); 620428d7b3dSmrg 621428d7b3dSmrg assert(box->x1 >= 0); 622428d7b3dSmrg assert(box->x2 <= dst->drawable.width); 623428d7b3dSmrg assert(box->y1 >= 0); 624428d7b3dSmrg assert(box->y2 <= dst->drawable.height); 625428d7b3dSmrg 626428d7b3dSmrg memcpy_blt(src, dst->devPrivate.ptr, cpp*8, 627428d7b3dSmrg pitch, dst->devKind, 628428d7b3dSmrg 0, 0, 629428d7b3dSmrg box->x1, box->y1, 630428d7b3dSmrg width, height); 631428d7b3dSmrg box++; 632428d7b3dSmrg 633428d7b3dSmrg src += pitch * height; 634428d7b3dSmrg } while (--nbox); 635428d7b3dSmrg assert(src - (char *)ptr == __kgem_buffer_size(dst_bo)); 636428d7b3dSmrg sigtrap_put(); 637428d7b3dSmrg } 638428d7b3dSmrg kgem_bo_destroy(kgem, dst_bo); 639428d7b3dSmrg sna->blt_state.fill_bo = 0; 640428d7b3dSmrg} 641428d7b3dSmrg 642428d7b3dSmrgstatic bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo) 643428d7b3dSmrg{ 644428d7b3dSmrg DBG(("%s: tiling=%d\n", __FUNCTION__, bo->tiling)); 645428d7b3dSmrg switch (bo->tiling) { 646428d7b3dSmrg case I915_TILING_Y: 647428d7b3dSmrg return false; 648428d7b3dSmrg case I915_TILING_X: 649428d7b3dSmrg if (!kgem->memcpy_to_tiled_x) 650428d7b3dSmrg return false; 651428d7b3dSmrg default: 652428d7b3dSmrg break; 653428d7b3dSmrg } 654428d7b3dSmrg 655428d7b3dSmrg if (kgem->has_wc_mmap) 656428d7b3dSmrg return true; 657428d7b3dSmrg 658428d7b3dSmrg return kgem_bo_can_map__cpu(kgem, bo, true); 659428d7b3dSmrg} 660428d7b3dSmrg 661428d7b3dSmrgstatic bool 662428d7b3dSmrgwrite_boxes_inplace__tiled(struct kgem *kgem, 663428d7b3dSmrg const uint8_t *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, 664428d7b3dSmrg struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy, 665428d7b3dSmrg const BoxRec *box, int n) 666428d7b3dSmrg{ 667428d7b3dSmrg uint8_t *dst; 668428d7b3dSmrg 669428d7b3dSmrg assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true)); 670428d7b3dSmrg assert(bo->tiling != I915_TILING_Y); 671428d7b3dSmrg 672428d7b3dSmrg if (kgem_bo_can_map__cpu(kgem, bo, true)) { 673428d7b3dSmrg dst = kgem_bo_map__cpu(kgem, bo); 674428d7b3dSmrg if (dst == NULL) 675428d7b3dSmrg return false; 676428d7b3dSmrg 677428d7b3dSmrg kgem_bo_sync__cpu(kgem, bo); 678428d7b3dSmrg } else { 679428d7b3dSmrg dst = kgem_bo_map__wc(kgem, bo); 680428d7b3dSmrg if (dst == NULL) 681428d7b3dSmrg return false; 682428d7b3dSmrg 683428d7b3dSmrg kgem_bo_sync__gtt(kgem, bo); 684428d7b3dSmrg } 685428d7b3dSmrg 686428d7b3dSmrg if (sigtrap_get()) 687428d7b3dSmrg return false; 688428d7b3dSmrg 689428d7b3dSmrg if (bo->tiling) { 690428d7b3dSmrg do { 691428d7b3dSmrg memcpy_to_tiled_x(kgem, src, dst, bpp, stride, bo->pitch, 692428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 693428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 694428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1); 695428d7b3dSmrg box++; 696428d7b3dSmrg } while (--n); 697428d7b3dSmrg } else { 698428d7b3dSmrg do { 699428d7b3dSmrg memcpy_blt(src, dst, bpp, stride, bo->pitch, 700428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 701428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 702428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1); 703428d7b3dSmrg box++; 704428d7b3dSmrg } while (--n); 705428d7b3dSmrg } 706428d7b3dSmrg 707428d7b3dSmrg sigtrap_put(); 708428d7b3dSmrg return true; 709428d7b3dSmrg} 710428d7b3dSmrg 711428d7b3dSmrgstatic bool write_boxes_inplace(struct kgem *kgem, 712428d7b3dSmrg const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, 713428d7b3dSmrg struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy, 714428d7b3dSmrg const BoxRec *box, int n) 715428d7b3dSmrg{ 716428d7b3dSmrg void *dst; 717428d7b3dSmrg 718428d7b3dSmrg DBG(("%s x %d, handle=%d, tiling=%d\n", 719428d7b3dSmrg __FUNCTION__, n, bo->handle, bo->tiling)); 720428d7b3dSmrg 721428d7b3dSmrg if (upload_inplace__tiled(kgem, bo) && 722428d7b3dSmrg write_boxes_inplace__tiled(kgem, src, stride, bpp, src_dx, src_dy, 723428d7b3dSmrg bo, dst_dx, dst_dy, box, n)) 724428d7b3dSmrg return true; 725428d7b3dSmrg 726428d7b3dSmrg if (!kgem_bo_can_map(kgem, bo)) 727428d7b3dSmrg return false; 728428d7b3dSmrg 729428d7b3dSmrg kgem_bo_submit(kgem, bo); 730428d7b3dSmrg 731428d7b3dSmrg dst = kgem_bo_map(kgem, bo); 732428d7b3dSmrg if (dst == NULL) 733428d7b3dSmrg return false; 734428d7b3dSmrg 735428d7b3dSmrg assert(dst != src); 736428d7b3dSmrg 737428d7b3dSmrg if (sigtrap_get()) 738428d7b3dSmrg return false; 739428d7b3dSmrg 740428d7b3dSmrg do { 741428d7b3dSmrg DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__, 742428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 743428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 744428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1, 745428d7b3dSmrg bpp, stride, bo->pitch)); 746428d7b3dSmrg 747428d7b3dSmrg assert(box->x2 > box->x1); 748428d7b3dSmrg assert(box->y2 > box->y1); 749428d7b3dSmrg 750428d7b3dSmrg assert(box->x1 + dst_dx >= 0); 751428d7b3dSmrg assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch); 752428d7b3dSmrg assert(box->y1 + dst_dy >= 0); 753428d7b3dSmrg assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo)); 754428d7b3dSmrg 755428d7b3dSmrg assert(box->x1 + src_dx >= 0); 756428d7b3dSmrg assert((box->x2 + src_dx)*bpp <= 8*stride); 757428d7b3dSmrg assert(box->y1 + src_dy >= 0); 758428d7b3dSmrg 759428d7b3dSmrg memcpy_blt(src, dst, bpp, 760428d7b3dSmrg stride, bo->pitch, 761428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 762428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 763428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1); 764428d7b3dSmrg box++; 765428d7b3dSmrg } while (--n); 766428d7b3dSmrg 767428d7b3dSmrg sigtrap_put(); 768428d7b3dSmrg return true; 769428d7b3dSmrg} 770428d7b3dSmrg 771428d7b3dSmrgstatic bool __upload_inplace(struct kgem *kgem, 772428d7b3dSmrg struct kgem_bo *bo, 773428d7b3dSmrg const BoxRec *box, 774428d7b3dSmrg int n, int bpp) 775428d7b3dSmrg{ 776428d7b3dSmrg unsigned int bytes; 777428d7b3dSmrg 778428d7b3dSmrg if (FORCE_INPLACE) 779428d7b3dSmrg return FORCE_INPLACE > 0; 780428d7b3dSmrg 781428d7b3dSmrg /* If we are writing through the GTT, check first if we might be 782428d7b3dSmrg * able to almagamate a series of small writes into a single 783428d7b3dSmrg * operation. 784428d7b3dSmrg */ 785428d7b3dSmrg bytes = 0; 786428d7b3dSmrg while (n--) { 787428d7b3dSmrg bytes += (box->x2 - box->x1) * (box->y2 - box->y1); 788428d7b3dSmrg box++; 789428d7b3dSmrg } 790428d7b3dSmrg if (__kgem_bo_is_busy(kgem, bo)) 791428d7b3dSmrg return bytes * bpp >> 12 >= kgem->half_cpu_cache_pages; 792428d7b3dSmrg else 793428d7b3dSmrg return bytes * bpp >> 12; 794428d7b3dSmrg} 795428d7b3dSmrg 796428d7b3dSmrgstatic bool upload_inplace(struct kgem *kgem, 797428d7b3dSmrg struct kgem_bo *bo, 798428d7b3dSmrg const BoxRec *box, 799428d7b3dSmrg int n, int bpp) 800428d7b3dSmrg{ 801428d7b3dSmrg if (unlikely(kgem->wedged)) 802428d7b3dSmrg return true; 803428d7b3dSmrg 804428d7b3dSmrg if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo)) 805428d7b3dSmrg return false; 806428d7b3dSmrg 807428d7b3dSmrg return __upload_inplace(kgem, bo, box, n,bpp); 808428d7b3dSmrg} 809428d7b3dSmrg 810428d7b3dSmrgbool sna_write_boxes(struct sna *sna, PixmapPtr dst, 811428d7b3dSmrg struct kgem_bo * const dst_bo, int16_t const dst_dx, int16_t const dst_dy, 812428d7b3dSmrg const void * const src, int const stride, int16_t const src_dx, int16_t const src_dy, 813428d7b3dSmrg const BoxRec *box, int nbox) 814428d7b3dSmrg{ 815428d7b3dSmrg struct kgem *kgem = &sna->kgem; 816428d7b3dSmrg struct kgem_bo *src_bo; 817428d7b3dSmrg BoxRec extents; 818428d7b3dSmrg void *ptr; 819428d7b3dSmrg int offset; 820428d7b3dSmrg int n, cmd, br13; 821428d7b3dSmrg bool can_blt; 822428d7b3dSmrg 823428d7b3dSmrg DBG(("%s x %d, src stride=%d, src dx=(%d, %d)\n", __FUNCTION__, nbox, stride, src_dx, src_dy)); 824428d7b3dSmrg 825428d7b3dSmrg if (upload_inplace(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel) && 826428d7b3dSmrg write_boxes_inplace(kgem, 827428d7b3dSmrg src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, 828428d7b3dSmrg dst_bo, dst_dx, dst_dy, 829428d7b3dSmrg box, nbox)) 830428d7b3dSmrg return true; 831428d7b3dSmrg 832428d7b3dSmrg if (wedged(sna)) 833428d7b3dSmrg return false; 834428d7b3dSmrg 835428d7b3dSmrg can_blt = kgem_bo_can_blt(kgem, dst_bo) && 836428d7b3dSmrg (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 837428d7b3dSmrg extents = box[0]; 838428d7b3dSmrg for (n = 1; n < nbox; n++) { 839428d7b3dSmrg if (box[n].x1 < extents.x1) 840428d7b3dSmrg extents.x1 = box[n].x1; 841428d7b3dSmrg if (box[n].x2 > extents.x2) 842428d7b3dSmrg extents.x2 = box[n].x2; 843428d7b3dSmrg 844428d7b3dSmrg if (can_blt) 845428d7b3dSmrg can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 846428d7b3dSmrg 847428d7b3dSmrg if (box[n].y1 < extents.y1) 848428d7b3dSmrg extents.y1 = box[n].y1; 849428d7b3dSmrg if (box[n].y2 > extents.y2) 850428d7b3dSmrg extents.y2 = box[n].y2; 851428d7b3dSmrg } 852428d7b3dSmrg 853428d7b3dSmrg /* Try to avoid switching rings... */ 854428d7b3dSmrg if (!can_blt || kgem->ring == KGEM_RENDER || 855428d7b3dSmrg upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) { 856428d7b3dSmrg DrawableRec tmp; 857428d7b3dSmrg 858428d7b3dSmrg tmp.width = extents.x2 - extents.x1; 859428d7b3dSmrg tmp.height = extents.y2 - extents.y1; 860428d7b3dSmrg tmp.depth = dst->drawable.depth; 861428d7b3dSmrg tmp.bitsPerPixel = dst->drawable.bitsPerPixel; 862428d7b3dSmrg 863428d7b3dSmrg assert(tmp.width); 864428d7b3dSmrg assert(tmp.height); 865428d7b3dSmrg 866428d7b3dSmrg DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n", 867428d7b3dSmrg __FUNCTION__, 868428d7b3dSmrg extents.x1, extents.y1, 869428d7b3dSmrg tmp.width, tmp.height, 870428d7b3dSmrg sna->render.max_3d_size, sna->render.max_3d_size)); 871428d7b3dSmrg if (must_tile(sna, tmp.width, tmp.height)) { 872428d7b3dSmrg BoxRec tile, stack[64], *clipped; 873428d7b3dSmrg int cpp, step; 874428d7b3dSmrg 875428d7b3dSmrgtile: 876428d7b3dSmrg cpp = dst->drawable.bitsPerPixel / 8; 877428d7b3dSmrg step = MIN(sna->render.max_3d_size, 878428d7b3dSmrg (MAXSHORT&~63) / cpp); 879428d7b3dSmrg while (step * step * cpp > sna->kgem.max_upload_tile_size) 880428d7b3dSmrg step /= 2; 881428d7b3dSmrg 882428d7b3dSmrg if (step * cpp > 4096) 883428d7b3dSmrg step = 4096 / cpp; 884428d7b3dSmrg assert(step); 885428d7b3dSmrg 886428d7b3dSmrg DBG(("%s: tiling upload, using %dx%d tiles\n", 887428d7b3dSmrg __FUNCTION__, step, step)); 888428d7b3dSmrg 889428d7b3dSmrg if (n > ARRAY_SIZE(stack)) { 890428d7b3dSmrg clipped = malloc(sizeof(BoxRec) * n); 891428d7b3dSmrg if (clipped == NULL) 892428d7b3dSmrg goto fallback; 893428d7b3dSmrg } else 894428d7b3dSmrg clipped = stack; 895428d7b3dSmrg 896428d7b3dSmrg for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { 897428d7b3dSmrg int y2 = tile.y1 + step; 898428d7b3dSmrg if (y2 > extents.y2) 899428d7b3dSmrg y2 = extents.y2; 900428d7b3dSmrg tile.y2 = y2; 901428d7b3dSmrg 902428d7b3dSmrg for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { 903428d7b3dSmrg int x2 = tile.x1 + step; 904428d7b3dSmrg if (x2 > extents.x2) 905428d7b3dSmrg x2 = extents.x2; 906428d7b3dSmrg tile.x2 = x2; 907428d7b3dSmrg 908428d7b3dSmrg tmp.width = tile.x2 - tile.x1; 909428d7b3dSmrg tmp.height = tile.y2 - tile.y1; 910428d7b3dSmrg 911428d7b3dSmrg src_bo = kgem_create_buffer_2d(kgem, 912428d7b3dSmrg tmp.width, 913428d7b3dSmrg tmp.height, 914428d7b3dSmrg tmp.bitsPerPixel, 915428d7b3dSmrg KGEM_BUFFER_WRITE_INPLACE, 916428d7b3dSmrg &ptr); 917428d7b3dSmrg if (!src_bo) { 918428d7b3dSmrg if (clipped != stack) 919428d7b3dSmrg free(clipped); 920428d7b3dSmrg goto fallback; 921428d7b3dSmrg } 922428d7b3dSmrg 923428d7b3dSmrg if (sigtrap_get() == 0) { 924428d7b3dSmrg BoxRec *c = clipped; 925428d7b3dSmrg for (n = 0; n < nbox; n++) { 926428d7b3dSmrg *c = box[n]; 927428d7b3dSmrg if (!box_intersect(c, &tile)) 928428d7b3dSmrg continue; 929428d7b3dSmrg 930428d7b3dSmrg DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", 931428d7b3dSmrg __FUNCTION__, 932428d7b3dSmrg c->x1, c->y1, 933428d7b3dSmrg c->x2, c->y2, 934428d7b3dSmrg src_dx, src_dy, 935428d7b3dSmrg c->x1 - tile.x1, 936428d7b3dSmrg c->y1 - tile.y1)); 937428d7b3dSmrg memcpy_blt(src, ptr, tmp.bitsPerPixel, 938428d7b3dSmrg stride, src_bo->pitch, 939428d7b3dSmrg c->x1 + src_dx, 940428d7b3dSmrg c->y1 + src_dy, 941428d7b3dSmrg c->x1 - tile.x1, 942428d7b3dSmrg c->y1 - tile.y1, 943428d7b3dSmrg c->x2 - c->x1, 944428d7b3dSmrg c->y2 - c->y1); 945428d7b3dSmrg c++; 946428d7b3dSmrg } 947428d7b3dSmrg 948428d7b3dSmrg if (c != clipped) 949428d7b3dSmrg n = sna->render.copy_boxes(sna, GXcopy, 950428d7b3dSmrg &tmp, src_bo, -tile.x1, -tile.y1, 951428d7b3dSmrg &dst->drawable, dst_bo, dst_dx, dst_dy, 952428d7b3dSmrg clipped, c - clipped, 0); 953428d7b3dSmrg else 954428d7b3dSmrg n = 1; 955428d7b3dSmrg sigtrap_put(); 956428d7b3dSmrg } else 957428d7b3dSmrg n = 0; 958428d7b3dSmrg 959428d7b3dSmrg kgem_bo_destroy(&sna->kgem, src_bo); 960428d7b3dSmrg 961428d7b3dSmrg if (!n) { 962428d7b3dSmrg if (clipped != stack) 963428d7b3dSmrg free(clipped); 964428d7b3dSmrg goto fallback; 965428d7b3dSmrg } 966428d7b3dSmrg } 967428d7b3dSmrg } 968428d7b3dSmrg 969428d7b3dSmrg if (clipped != stack) 970428d7b3dSmrg free(clipped); 971428d7b3dSmrg } else { 972428d7b3dSmrg src_bo = kgem_create_buffer_2d(kgem, 973428d7b3dSmrg tmp.width, 974428d7b3dSmrg tmp.height, 975428d7b3dSmrg tmp.bitsPerPixel, 976428d7b3dSmrg KGEM_BUFFER_WRITE_INPLACE, 977428d7b3dSmrg &ptr); 978428d7b3dSmrg if (!src_bo) 979428d7b3dSmrg goto fallback; 980428d7b3dSmrg 981428d7b3dSmrg if (sigtrap_get() == 0) { 982428d7b3dSmrg for (n = 0; n < nbox; n++) { 983428d7b3dSmrg DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", 984428d7b3dSmrg __FUNCTION__, 985428d7b3dSmrg box[n].x1, box[n].y1, 986428d7b3dSmrg box[n].x2, box[n].y2, 987428d7b3dSmrg src_dx, src_dy, 988428d7b3dSmrg box[n].x1 - extents.x1, 989428d7b3dSmrg box[n].y1 - extents.y1)); 990428d7b3dSmrg memcpy_blt(src, ptr, tmp.bitsPerPixel, 991428d7b3dSmrg stride, src_bo->pitch, 992428d7b3dSmrg box[n].x1 + src_dx, 993428d7b3dSmrg box[n].y1 + src_dy, 994428d7b3dSmrg box[n].x1 - extents.x1, 995428d7b3dSmrg box[n].y1 - extents.y1, 996428d7b3dSmrg box[n].x2 - box[n].x1, 997428d7b3dSmrg box[n].y2 - box[n].y1); 998428d7b3dSmrg } 999428d7b3dSmrg 1000428d7b3dSmrg n = sna->render.copy_boxes(sna, GXcopy, 1001428d7b3dSmrg &tmp, src_bo, -extents.x1, -extents.y1, 1002428d7b3dSmrg &dst->drawable, dst_bo, dst_dx, dst_dy, 1003428d7b3dSmrg box, nbox, 0); 1004428d7b3dSmrg sigtrap_put(); 1005428d7b3dSmrg } else 1006428d7b3dSmrg n = 0; 1007428d7b3dSmrg 1008428d7b3dSmrg kgem_bo_destroy(&sna->kgem, src_bo); 1009428d7b3dSmrg 1010428d7b3dSmrg if (!n) 1011428d7b3dSmrg goto tile; 1012428d7b3dSmrg } 1013428d7b3dSmrg 1014428d7b3dSmrg return true; 1015428d7b3dSmrg } 1016428d7b3dSmrg 1017428d7b3dSmrg cmd = XY_SRC_COPY_BLT_CMD; 1018428d7b3dSmrg br13 = dst_bo->pitch; 1019428d7b3dSmrg if (kgem->gen >= 040 && dst_bo->tiling) { 1020428d7b3dSmrg cmd |= BLT_DST_TILED; 1021428d7b3dSmrg br13 >>= 2; 1022428d7b3dSmrg } 1023428d7b3dSmrg br13 |= 0xcc << 16; 1024428d7b3dSmrg switch (dst->drawable.bitsPerPixel) { 1025428d7b3dSmrg default: 1026428d7b3dSmrg case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 1027428d7b3dSmrg br13 |= 1 << 25; /* RGB8888 */ 1028428d7b3dSmrg case 16: br13 |= 1 << 24; /* RGB565 */ 1029428d7b3dSmrg case 8: break; 1030428d7b3dSmrg } 1031428d7b3dSmrg 1032428d7b3dSmrg kgem_set_mode(kgem, KGEM_BLT, dst_bo); 1033428d7b3dSmrg if (!kgem_check_batch(kgem, 10) || 1034428d7b3dSmrg !kgem_check_reloc_and_exec(kgem, 2) || 1035428d7b3dSmrg !kgem_check_bo_fenced(kgem, dst_bo)) { 1036428d7b3dSmrg kgem_submit(kgem); 1037428d7b3dSmrg if (!kgem_check_bo_fenced(kgem, dst_bo)) 1038428d7b3dSmrg goto fallback; 1039428d7b3dSmrg _kgem_set_mode(kgem, KGEM_BLT); 1040428d7b3dSmrg } 1041428d7b3dSmrg 1042428d7b3dSmrg if (kgem->gen >= 0100) { 1043428d7b3dSmrg cmd |= 8; 1044428d7b3dSmrg do { 1045428d7b3dSmrg int nbox_this_time, rem; 1046428d7b3dSmrg 1047428d7b3dSmrg nbox_this_time = nbox; 1048428d7b3dSmrg rem = kgem_batch_space(kgem); 1049428d7b3dSmrg if (10*nbox_this_time > rem) 1050428d7b3dSmrg nbox_this_time = rem / 8; 1051428d7b3dSmrg if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1052428d7b3dSmrg nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; 1053428d7b3dSmrg assert(nbox_this_time); 1054428d7b3dSmrg nbox -= nbox_this_time; 1055428d7b3dSmrg 1056428d7b3dSmrg /* Count the total number of bytes to be read and allocate a 1057428d7b3dSmrg * single buffer large enough. Or if it is very small, combine 1058428d7b3dSmrg * with other allocations. */ 1059428d7b3dSmrg offset = 0; 1060428d7b3dSmrg for (n = 0; n < nbox_this_time; n++) { 1061428d7b3dSmrg int height = box[n].y2 - box[n].y1; 1062428d7b3dSmrg int width = box[n].x2 - box[n].x1; 1063428d7b3dSmrg offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; 1064428d7b3dSmrg } 1065428d7b3dSmrg 1066428d7b3dSmrg src_bo = kgem_create_buffer(kgem, offset, 1067428d7b3dSmrg KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), 1068428d7b3dSmrg &ptr); 1069428d7b3dSmrg if (!src_bo) 1070428d7b3dSmrg break; 1071428d7b3dSmrg 1072428d7b3dSmrg if (sigtrap_get() == 0) { 1073428d7b3dSmrg offset = 0; 1074428d7b3dSmrg do { 1075428d7b3dSmrg int height = box->y2 - box->y1; 1076428d7b3dSmrg int width = box->x2 - box->x1; 1077428d7b3dSmrg int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); 1078428d7b3dSmrg uint32_t *b; 1079428d7b3dSmrg 1080428d7b3dSmrg DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", 1081428d7b3dSmrg __FUNCTION__, 1082428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 1083428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 1084428d7b3dSmrg width, height, 1085428d7b3dSmrg offset, pitch)); 1086428d7b3dSmrg 1087428d7b3dSmrg assert(box->x1 + src_dx >= 0); 1088428d7b3dSmrg assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); 1089428d7b3dSmrg assert(box->y1 + src_dy >= 0); 1090428d7b3dSmrg 1091428d7b3dSmrg assert(box->x1 + dst_dx >= 0); 1092428d7b3dSmrg assert(box->y1 + dst_dy >= 0); 1093428d7b3dSmrg 1094428d7b3dSmrg memcpy_blt(src, (char *)ptr + offset, 1095428d7b3dSmrg dst->drawable.bitsPerPixel, 1096428d7b3dSmrg stride, pitch, 1097428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 1098428d7b3dSmrg 0, 0, 1099428d7b3dSmrg width, height); 1100428d7b3dSmrg 1101428d7b3dSmrg assert(kgem->mode == KGEM_BLT); 1102428d7b3dSmrg b = kgem->batch + kgem->nbatch; 1103428d7b3dSmrg b[0] = cmd; 1104428d7b3dSmrg b[1] = br13; 1105428d7b3dSmrg b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); 1106428d7b3dSmrg b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); 1107428d7b3dSmrg *(uint64_t *)(b+4) = 1108428d7b3dSmrg kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 1109428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 1110428d7b3dSmrg I915_GEM_DOMAIN_RENDER | 1111428d7b3dSmrg KGEM_RELOC_FENCED, 1112428d7b3dSmrg 0); 1113428d7b3dSmrg b[6] = 0; 1114428d7b3dSmrg b[7] = pitch; 1115428d7b3dSmrg *(uint64_t *)(b+8) = 1116428d7b3dSmrg kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 1117428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 1118428d7b3dSmrg KGEM_RELOC_FENCED, 1119428d7b3dSmrg offset); 1120428d7b3dSmrg kgem->nbatch += 10; 1121428d7b3dSmrg 1122428d7b3dSmrg box++; 1123428d7b3dSmrg offset += pitch * height; 1124428d7b3dSmrg } while (--nbox_this_time); 1125428d7b3dSmrg assert(offset == __kgem_buffer_size(src_bo)); 1126428d7b3dSmrg sigtrap_put(); 1127428d7b3dSmrg } 1128428d7b3dSmrg 1129428d7b3dSmrg if (nbox) { 1130428d7b3dSmrg _kgem_submit(kgem); 1131428d7b3dSmrg _kgem_set_mode(kgem, KGEM_BLT); 1132428d7b3dSmrg } 1133428d7b3dSmrg 1134428d7b3dSmrg kgem_bo_destroy(kgem, src_bo); 1135428d7b3dSmrg } while (nbox); 1136428d7b3dSmrg } else { 1137428d7b3dSmrg cmd |= 6; 1138428d7b3dSmrg do { 1139428d7b3dSmrg int nbox_this_time, rem; 1140428d7b3dSmrg 1141428d7b3dSmrg nbox_this_time = nbox; 1142428d7b3dSmrg rem = kgem_batch_space(kgem); 1143428d7b3dSmrg if (8*nbox_this_time > rem) 1144428d7b3dSmrg nbox_this_time = rem / 8; 1145428d7b3dSmrg if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1146428d7b3dSmrg nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; 1147428d7b3dSmrg assert(nbox_this_time); 1148428d7b3dSmrg nbox -= nbox_this_time; 1149428d7b3dSmrg 1150428d7b3dSmrg /* Count the total number of bytes to be read and allocate a 1151428d7b3dSmrg * single buffer large enough. Or if it is very small, combine 1152428d7b3dSmrg * with other allocations. */ 1153428d7b3dSmrg offset = 0; 1154428d7b3dSmrg for (n = 0; n < nbox_this_time; n++) { 1155428d7b3dSmrg int height = box[n].y2 - box[n].y1; 1156428d7b3dSmrg int width = box[n].x2 - box[n].x1; 1157428d7b3dSmrg offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; 1158428d7b3dSmrg } 1159428d7b3dSmrg 1160428d7b3dSmrg src_bo = kgem_create_buffer(kgem, offset, 1161428d7b3dSmrg KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), 1162428d7b3dSmrg &ptr); 1163428d7b3dSmrg if (!src_bo) 1164428d7b3dSmrg break; 1165428d7b3dSmrg 1166428d7b3dSmrg if (sigtrap_get()) { 1167428d7b3dSmrg kgem_bo_destroy(kgem, src_bo); 1168428d7b3dSmrg goto fallback; 1169428d7b3dSmrg } 1170428d7b3dSmrg 1171428d7b3dSmrg offset = 0; 1172428d7b3dSmrg do { 1173428d7b3dSmrg int height = box->y2 - box->y1; 1174428d7b3dSmrg int width = box->x2 - box->x1; 1175428d7b3dSmrg int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); 1176428d7b3dSmrg uint32_t *b; 1177428d7b3dSmrg 1178428d7b3dSmrg DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", 1179428d7b3dSmrg __FUNCTION__, 1180428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 1181428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 1182428d7b3dSmrg width, height, 1183428d7b3dSmrg offset, pitch)); 1184428d7b3dSmrg 1185428d7b3dSmrg assert(box->x1 + src_dx >= 0); 1186428d7b3dSmrg assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); 1187428d7b3dSmrg assert(box->y1 + src_dy >= 0); 1188428d7b3dSmrg 1189428d7b3dSmrg assert(box->x1 + dst_dx >= 0); 1190428d7b3dSmrg assert(box->y1 + dst_dy >= 0); 1191428d7b3dSmrg 1192428d7b3dSmrg memcpy_blt(src, (char *)ptr + offset, 1193428d7b3dSmrg dst->drawable.bitsPerPixel, 1194428d7b3dSmrg stride, pitch, 1195428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 1196428d7b3dSmrg 0, 0, 1197428d7b3dSmrg width, height); 1198428d7b3dSmrg 1199428d7b3dSmrg assert(kgem->mode == KGEM_BLT); 1200428d7b3dSmrg b = kgem->batch + kgem->nbatch; 1201428d7b3dSmrg b[0] = cmd; 1202428d7b3dSmrg b[1] = br13; 1203428d7b3dSmrg b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); 1204428d7b3dSmrg b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); 1205428d7b3dSmrg b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 1206428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 1207428d7b3dSmrg I915_GEM_DOMAIN_RENDER | 1208428d7b3dSmrg KGEM_RELOC_FENCED, 1209428d7b3dSmrg 0); 1210428d7b3dSmrg b[5] = 0; 1211428d7b3dSmrg b[6] = pitch; 1212428d7b3dSmrg b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 1213428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 1214428d7b3dSmrg KGEM_RELOC_FENCED, 1215428d7b3dSmrg offset); 1216428d7b3dSmrg kgem->nbatch += 8; 1217428d7b3dSmrg 1218428d7b3dSmrg box++; 1219428d7b3dSmrg offset += pitch * height; 1220428d7b3dSmrg } while (--nbox_this_time); 1221428d7b3dSmrg assert(offset == __kgem_buffer_size(src_bo)); 1222428d7b3dSmrg sigtrap_put(); 1223428d7b3dSmrg 1224428d7b3dSmrg if (nbox) { 1225428d7b3dSmrg _kgem_submit(kgem); 1226428d7b3dSmrg _kgem_set_mode(kgem, KGEM_BLT); 1227428d7b3dSmrg } 1228428d7b3dSmrg 1229428d7b3dSmrg kgem_bo_destroy(kgem, src_bo); 1230428d7b3dSmrg } while (nbox); 1231428d7b3dSmrg } 1232428d7b3dSmrg 1233428d7b3dSmrg sna->blt_state.fill_bo = 0; 1234428d7b3dSmrg return true; 1235428d7b3dSmrg 1236428d7b3dSmrgfallback: 1237428d7b3dSmrg return write_boxes_inplace(kgem, 1238428d7b3dSmrg src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, 1239428d7b3dSmrg dst_bo, dst_dx, dst_dy, 1240428d7b3dSmrg box, nbox); 1241428d7b3dSmrg} 1242428d7b3dSmrg 1243428d7b3dSmrgstatic bool 1244428d7b3dSmrgwrite_boxes_inplace__xor(struct kgem *kgem, 1245428d7b3dSmrg const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, 1246428d7b3dSmrg struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy, 1247428d7b3dSmrg const BoxRec *box, int n, 1248428d7b3dSmrg uint32_t and, uint32_t or) 1249428d7b3dSmrg{ 1250428d7b3dSmrg void *dst; 1251428d7b3dSmrg 1252428d7b3dSmrg DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling)); 1253428d7b3dSmrg 1254428d7b3dSmrg if (!kgem_bo_can_map(kgem, bo)) 1255428d7b3dSmrg return false; 1256428d7b3dSmrg 1257428d7b3dSmrg kgem_bo_submit(kgem, bo); 1258428d7b3dSmrg 1259428d7b3dSmrg dst = kgem_bo_map(kgem, bo); 1260428d7b3dSmrg if (dst == NULL) 1261428d7b3dSmrg return false; 1262428d7b3dSmrg 1263428d7b3dSmrg if (sigtrap_get()) 1264428d7b3dSmrg return false; 1265428d7b3dSmrg 1266428d7b3dSmrg do { 1267428d7b3dSmrg DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__, 1268428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 1269428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 1270428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1, 1271428d7b3dSmrg bpp, stride, bo->pitch)); 1272428d7b3dSmrg 1273428d7b3dSmrg assert(box->x2 > box->x1); 1274428d7b3dSmrg assert(box->y2 > box->y1); 1275428d7b3dSmrg 1276428d7b3dSmrg assert(box->x1 + dst_dx >= 0); 1277428d7b3dSmrg assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch); 1278428d7b3dSmrg assert(box->y1 + dst_dy >= 0); 1279428d7b3dSmrg assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo)); 1280428d7b3dSmrg 1281428d7b3dSmrg assert(box->x1 + src_dx >= 0); 1282428d7b3dSmrg assert((box->x2 + src_dx)*bpp <= 8*stride); 1283428d7b3dSmrg assert(box->y1 + src_dy >= 0); 1284428d7b3dSmrg 1285428d7b3dSmrg memcpy_xor(src, dst, bpp, 1286428d7b3dSmrg stride, bo->pitch, 1287428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 1288428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 1289428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1, 1290428d7b3dSmrg and, or); 1291428d7b3dSmrg box++; 1292428d7b3dSmrg } while (--n); 1293428d7b3dSmrg 1294428d7b3dSmrg sigtrap_put(); 1295428d7b3dSmrg return true; 1296428d7b3dSmrg} 1297428d7b3dSmrg 1298428d7b3dSmrgstatic bool upload_inplace__xor(struct kgem *kgem, 1299428d7b3dSmrg struct kgem_bo *bo, 1300428d7b3dSmrg const BoxRec *box, 1301428d7b3dSmrg int n, int bpp) 1302428d7b3dSmrg{ 1303428d7b3dSmrg if (unlikely(kgem->wedged)) 1304428d7b3dSmrg return true; 1305428d7b3dSmrg 1306428d7b3dSmrg if (!kgem_bo_can_map(kgem, bo)) 1307428d7b3dSmrg return false; 1308428d7b3dSmrg 1309428d7b3dSmrg return __upload_inplace(kgem, bo, box, n, bpp); 1310428d7b3dSmrg} 1311428d7b3dSmrg 1312428d7b3dSmrgbool sna_write_boxes__xor(struct sna *sna, PixmapPtr dst, 1313428d7b3dSmrg struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 1314428d7b3dSmrg const void *src, int stride, int16_t src_dx, int16_t src_dy, 1315428d7b3dSmrg const BoxRec *box, int nbox, 1316428d7b3dSmrg uint32_t and, uint32_t or) 1317428d7b3dSmrg{ 1318428d7b3dSmrg struct kgem *kgem = &sna->kgem; 1319428d7b3dSmrg struct kgem_bo *src_bo; 1320428d7b3dSmrg BoxRec extents; 1321428d7b3dSmrg bool can_blt; 1322428d7b3dSmrg void *ptr; 1323428d7b3dSmrg int offset; 1324428d7b3dSmrg int n, cmd, br13; 1325428d7b3dSmrg 1326428d7b3dSmrg DBG(("%s x %d\n", __FUNCTION__, nbox)); 1327428d7b3dSmrg 1328428d7b3dSmrg if (upload_inplace__xor(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel) && 1329428d7b3dSmrg write_boxes_inplace__xor(kgem, 1330428d7b3dSmrg src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, 1331428d7b3dSmrg dst_bo, dst_dx, dst_dy, 1332428d7b3dSmrg box, nbox, 1333428d7b3dSmrg and, or)) 1334428d7b3dSmrg return true; 1335428d7b3dSmrg 1336428d7b3dSmrg if (wedged(sna)) 1337428d7b3dSmrg return false; 1338428d7b3dSmrg 1339428d7b3dSmrg can_blt = kgem_bo_can_blt(kgem, dst_bo) && 1340428d7b3dSmrg (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 1341428d7b3dSmrg extents = box[0]; 1342428d7b3dSmrg for (n = 1; n < nbox; n++) { 1343428d7b3dSmrg if (box[n].x1 < extents.x1) 1344428d7b3dSmrg extents.x1 = box[n].x1; 1345428d7b3dSmrg if (box[n].x2 > extents.x2) 1346428d7b3dSmrg extents.x2 = box[n].x2; 1347428d7b3dSmrg 1348428d7b3dSmrg if (can_blt) 1349428d7b3dSmrg can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4); 1350428d7b3dSmrg 1351428d7b3dSmrg if (box[n].y1 < extents.y1) 1352428d7b3dSmrg extents.y1 = box[n].y1; 1353428d7b3dSmrg if (box[n].y2 > extents.y2) 1354428d7b3dSmrg extents.y2 = box[n].y2; 1355428d7b3dSmrg } 1356428d7b3dSmrg 1357428d7b3dSmrg /* Try to avoid switching rings... */ 1358428d7b3dSmrg if (!can_blt || kgem->ring == KGEM_RENDER || 1359428d7b3dSmrg upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) { 1360428d7b3dSmrg DrawableRec tmp; 1361428d7b3dSmrg 1362428d7b3dSmrg tmp.width = extents.x2 - extents.x1; 1363428d7b3dSmrg tmp.height = extents.y2 - extents.y1; 1364428d7b3dSmrg tmp.depth = dst->drawable.depth; 1365428d7b3dSmrg tmp.bitsPerPixel = dst->drawable.bitsPerPixel; 1366428d7b3dSmrg 1367428d7b3dSmrg assert(tmp.width); 1368428d7b3dSmrg assert(tmp.height); 1369428d7b3dSmrg 1370428d7b3dSmrg DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n", 1371428d7b3dSmrg __FUNCTION__, 1372428d7b3dSmrg extents.x1, extents.y1, 1373428d7b3dSmrg tmp.width, tmp.height, 1374428d7b3dSmrg sna->render.max_3d_size, sna->render.max_3d_size)); 1375428d7b3dSmrg if (must_tile(sna, tmp.width, tmp.height)) { 1376428d7b3dSmrg BoxRec tile, stack[64], *clipped; 1377428d7b3dSmrg int step; 1378428d7b3dSmrg 1379428d7b3dSmrgtile: 1380428d7b3dSmrg step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel, 1381428d7b3dSmrg 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel); 1382428d7b3dSmrg while (step * step * 4 > sna->kgem.max_upload_tile_size) 1383428d7b3dSmrg step /= 2; 1384428d7b3dSmrg 1385428d7b3dSmrg DBG(("%s: tiling upload, using %dx%d tiles\n", 1386428d7b3dSmrg __FUNCTION__, step, step)); 1387428d7b3dSmrg assert(step); 1388428d7b3dSmrg 1389428d7b3dSmrg if (n > ARRAY_SIZE(stack)) { 1390428d7b3dSmrg clipped = malloc(sizeof(BoxRec) * n); 1391428d7b3dSmrg if (clipped == NULL) 1392428d7b3dSmrg goto fallback; 1393428d7b3dSmrg } else 1394428d7b3dSmrg clipped = stack; 1395428d7b3dSmrg 1396428d7b3dSmrg for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { 1397428d7b3dSmrg int y2 = tile.y1 + step; 1398428d7b3dSmrg if (y2 > extents.y2) 1399428d7b3dSmrg y2 = extents.y2; 1400428d7b3dSmrg tile.y2 = y2; 1401428d7b3dSmrg 1402428d7b3dSmrg for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { 1403428d7b3dSmrg int x2 = tile.x1 + step; 1404428d7b3dSmrg if (x2 > extents.x2) 1405428d7b3dSmrg x2 = extents.x2; 1406428d7b3dSmrg tile.x2 = x2; 1407428d7b3dSmrg 1408428d7b3dSmrg tmp.width = tile.x2 - tile.x1; 1409428d7b3dSmrg tmp.height = tile.y2 - tile.y1; 1410428d7b3dSmrg 1411428d7b3dSmrg src_bo = kgem_create_buffer_2d(kgem, 1412428d7b3dSmrg tmp.width, 1413428d7b3dSmrg tmp.height, 1414428d7b3dSmrg tmp.bitsPerPixel, 1415428d7b3dSmrg KGEM_BUFFER_WRITE_INPLACE, 1416428d7b3dSmrg &ptr); 1417428d7b3dSmrg if (!src_bo) { 1418428d7b3dSmrg if (clipped != stack) 1419428d7b3dSmrg free(clipped); 1420428d7b3dSmrg goto fallback; 1421428d7b3dSmrg } 1422428d7b3dSmrg 1423428d7b3dSmrg if (sigtrap_get() == 0) { 1424428d7b3dSmrg BoxRec *c = clipped; 1425428d7b3dSmrg for (n = 0; n < nbox; n++) { 1426428d7b3dSmrg *c = box[n]; 1427428d7b3dSmrg if (!box_intersect(c, &tile)) 1428428d7b3dSmrg continue; 1429428d7b3dSmrg 1430428d7b3dSmrg DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", 1431428d7b3dSmrg __FUNCTION__, 1432428d7b3dSmrg c->x1, c->y1, 1433428d7b3dSmrg c->x2, c->y2, 1434428d7b3dSmrg src_dx, src_dy, 1435428d7b3dSmrg c->x1 - tile.x1, 1436428d7b3dSmrg c->y1 - tile.y1)); 1437428d7b3dSmrg memcpy_xor(src, ptr, tmp.bitsPerPixel, 1438428d7b3dSmrg stride, src_bo->pitch, 1439428d7b3dSmrg c->x1 + src_dx, 1440428d7b3dSmrg c->y1 + src_dy, 1441428d7b3dSmrg c->x1 - tile.x1, 1442428d7b3dSmrg c->y1 - tile.y1, 1443428d7b3dSmrg c->x2 - c->x1, 1444428d7b3dSmrg c->y2 - c->y1, 1445428d7b3dSmrg and, or); 1446428d7b3dSmrg c++; 1447428d7b3dSmrg } 1448428d7b3dSmrg 1449428d7b3dSmrg if (c != clipped) 1450428d7b3dSmrg n = sna->render.copy_boxes(sna, GXcopy, 1451428d7b3dSmrg &tmp, src_bo, -tile.x1, -tile.y1, 1452428d7b3dSmrg &dst->drawable, dst_bo, dst_dx, dst_dy, 1453428d7b3dSmrg clipped, c - clipped, 0); 1454428d7b3dSmrg else 1455428d7b3dSmrg n = 1; 1456428d7b3dSmrg 1457428d7b3dSmrg sigtrap_put(); 1458428d7b3dSmrg } else 1459428d7b3dSmrg n = 0; 1460428d7b3dSmrg 1461428d7b3dSmrg kgem_bo_destroy(&sna->kgem, src_bo); 1462428d7b3dSmrg 1463428d7b3dSmrg if (!n) { 1464428d7b3dSmrg if (clipped != stack) 1465428d7b3dSmrg free(clipped); 1466428d7b3dSmrg goto fallback; 1467428d7b3dSmrg } 1468428d7b3dSmrg } 1469428d7b3dSmrg } 1470428d7b3dSmrg 1471428d7b3dSmrg if (clipped != stack) 1472428d7b3dSmrg free(clipped); 1473428d7b3dSmrg } else { 1474428d7b3dSmrg src_bo = kgem_create_buffer_2d(kgem, 1475428d7b3dSmrg tmp.width, 1476428d7b3dSmrg tmp.height, 1477428d7b3dSmrg tmp.bitsPerPixel, 1478428d7b3dSmrg KGEM_BUFFER_WRITE_INPLACE, 1479428d7b3dSmrg &ptr); 1480428d7b3dSmrg if (!src_bo) 1481428d7b3dSmrg goto fallback; 1482428d7b3dSmrg 1483428d7b3dSmrg if (sigtrap_get() == 0) { 1484428d7b3dSmrg for (n = 0; n < nbox; n++) { 1485428d7b3dSmrg DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", 1486428d7b3dSmrg __FUNCTION__, 1487428d7b3dSmrg box[n].x1, box[n].y1, 1488428d7b3dSmrg box[n].x2, box[n].y2, 1489428d7b3dSmrg src_dx, src_dy, 1490428d7b3dSmrg box[n].x1 - extents.x1, 1491428d7b3dSmrg box[n].y1 - extents.y1)); 1492428d7b3dSmrg memcpy_xor(src, ptr, tmp.bitsPerPixel, 1493428d7b3dSmrg stride, src_bo->pitch, 1494428d7b3dSmrg box[n].x1 + src_dx, 1495428d7b3dSmrg box[n].y1 + src_dy, 1496428d7b3dSmrg box[n].x1 - extents.x1, 1497428d7b3dSmrg box[n].y1 - extents.y1, 1498428d7b3dSmrg box[n].x2 - box[n].x1, 1499428d7b3dSmrg box[n].y2 - box[n].y1, 1500428d7b3dSmrg and, or); 1501428d7b3dSmrg } 1502428d7b3dSmrg 1503428d7b3dSmrg n = sna->render.copy_boxes(sna, GXcopy, 1504428d7b3dSmrg &tmp, src_bo, -extents.x1, -extents.y1, 1505428d7b3dSmrg &dst->drawable, dst_bo, dst_dx, dst_dy, 1506428d7b3dSmrg box, nbox, 0); 1507428d7b3dSmrg sigtrap_put(); 1508428d7b3dSmrg } else 1509428d7b3dSmrg n = 0; 1510428d7b3dSmrg 1511428d7b3dSmrg kgem_bo_destroy(&sna->kgem, src_bo); 1512428d7b3dSmrg 1513428d7b3dSmrg if (!n) 1514428d7b3dSmrg goto tile; 1515428d7b3dSmrg } 1516428d7b3dSmrg 1517428d7b3dSmrg return true; 1518428d7b3dSmrg } 1519428d7b3dSmrg 1520428d7b3dSmrg cmd = XY_SRC_COPY_BLT_CMD; 1521428d7b3dSmrg br13 = dst_bo->pitch; 1522428d7b3dSmrg if (kgem->gen >= 040 && dst_bo->tiling) { 1523428d7b3dSmrg cmd |= BLT_DST_TILED; 1524428d7b3dSmrg br13 >>= 2; 1525428d7b3dSmrg } 1526428d7b3dSmrg br13 |= 0xcc << 16; 1527428d7b3dSmrg switch (dst->drawable.bitsPerPixel) { 1528428d7b3dSmrg default: 1529428d7b3dSmrg case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; 1530428d7b3dSmrg br13 |= 1 << 25; /* RGB8888 */ 1531428d7b3dSmrg case 16: br13 |= 1 << 24; /* RGB565 */ 1532428d7b3dSmrg case 8: break; 1533428d7b3dSmrg } 1534428d7b3dSmrg 1535428d7b3dSmrg kgem_set_mode(kgem, KGEM_BLT, dst_bo); 1536428d7b3dSmrg if (!kgem_check_batch(kgem, 10) || 1537428d7b3dSmrg !kgem_check_reloc_and_exec(kgem, 2) || 1538428d7b3dSmrg !kgem_check_bo_fenced(kgem, dst_bo)) { 1539428d7b3dSmrg kgem_submit(kgem); 1540428d7b3dSmrg if (!kgem_check_bo_fenced(kgem, dst_bo)) 1541428d7b3dSmrg goto fallback; 1542428d7b3dSmrg _kgem_set_mode(kgem, KGEM_BLT); 1543428d7b3dSmrg } 1544428d7b3dSmrg 1545428d7b3dSmrg if (sna->kgem.gen >= 0100) { 1546428d7b3dSmrg cmd |= 8; 1547428d7b3dSmrg do { 1548428d7b3dSmrg int nbox_this_time, rem; 1549428d7b3dSmrg 1550428d7b3dSmrg nbox_this_time = nbox; 1551428d7b3dSmrg rem = kgem_batch_space(kgem); 1552428d7b3dSmrg if (10*nbox_this_time > rem) 1553428d7b3dSmrg nbox_this_time = rem / 8; 1554428d7b3dSmrg if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1555428d7b3dSmrg nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; 1556428d7b3dSmrg assert(nbox_this_time); 1557428d7b3dSmrg nbox -= nbox_this_time; 1558428d7b3dSmrg 1559428d7b3dSmrg /* Count the total number of bytes to be read and allocate a 1560428d7b3dSmrg * single buffer large enough. Or if it is very small, combine 1561428d7b3dSmrg * with other allocations. */ 1562428d7b3dSmrg offset = 0; 1563428d7b3dSmrg for (n = 0; n < nbox_this_time; n++) { 1564428d7b3dSmrg int height = box[n].y2 - box[n].y1; 1565428d7b3dSmrg int width = box[n].x2 - box[n].x1; 1566428d7b3dSmrg offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; 1567428d7b3dSmrg } 1568428d7b3dSmrg 1569428d7b3dSmrg src_bo = kgem_create_buffer(kgem, offset, 1570428d7b3dSmrg KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), 1571428d7b3dSmrg &ptr); 1572428d7b3dSmrg if (!src_bo) 1573428d7b3dSmrg goto fallback; 1574428d7b3dSmrg 1575428d7b3dSmrg if (sigtrap_get()) { 1576428d7b3dSmrg kgem_bo_destroy(kgem, src_bo); 1577428d7b3dSmrg goto fallback; 1578428d7b3dSmrg } 1579428d7b3dSmrg 1580428d7b3dSmrg offset = 0; 1581428d7b3dSmrg do { 1582428d7b3dSmrg int height = box->y2 - box->y1; 1583428d7b3dSmrg int width = box->x2 - box->x1; 1584428d7b3dSmrg int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); 1585428d7b3dSmrg uint32_t *b; 1586428d7b3dSmrg 1587428d7b3dSmrg DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", 1588428d7b3dSmrg __FUNCTION__, 1589428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 1590428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 1591428d7b3dSmrg width, height, 1592428d7b3dSmrg offset, pitch)); 1593428d7b3dSmrg 1594428d7b3dSmrg assert(box->x1 + src_dx >= 0); 1595428d7b3dSmrg assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); 1596428d7b3dSmrg assert(box->y1 + src_dy >= 0); 1597428d7b3dSmrg 1598428d7b3dSmrg assert(box->x1 + dst_dx >= 0); 1599428d7b3dSmrg assert(box->y1 + dst_dy >= 0); 1600428d7b3dSmrg 1601428d7b3dSmrg memcpy_xor(src, (char *)ptr + offset, 1602428d7b3dSmrg dst->drawable.bitsPerPixel, 1603428d7b3dSmrg stride, pitch, 1604428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 1605428d7b3dSmrg 0, 0, 1606428d7b3dSmrg width, height, 1607428d7b3dSmrg and, or); 1608428d7b3dSmrg 1609428d7b3dSmrg assert(kgem->mode == KGEM_BLT); 1610428d7b3dSmrg b = kgem->batch + kgem->nbatch; 1611428d7b3dSmrg b[0] = cmd; 1612428d7b3dSmrg b[1] = br13; 1613428d7b3dSmrg b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); 1614428d7b3dSmrg b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); 1615428d7b3dSmrg *(uint64_t *)(b+4) = 1616428d7b3dSmrg kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, 1617428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 1618428d7b3dSmrg I915_GEM_DOMAIN_RENDER | 1619428d7b3dSmrg KGEM_RELOC_FENCED, 1620428d7b3dSmrg 0); 1621428d7b3dSmrg b[6] = 0; 1622428d7b3dSmrg b[7] = pitch; 1623428d7b3dSmrg *(uint64_t *)(b+8) = 1624428d7b3dSmrg kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, 1625428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 1626428d7b3dSmrg KGEM_RELOC_FENCED, 1627428d7b3dSmrg offset); 1628428d7b3dSmrg kgem->nbatch += 10; 1629428d7b3dSmrg 1630428d7b3dSmrg box++; 1631428d7b3dSmrg offset += pitch * height; 1632428d7b3dSmrg } while (--nbox_this_time); 1633428d7b3dSmrg assert(offset == __kgem_buffer_size(src_bo)); 1634428d7b3dSmrg sigtrap_put(); 1635428d7b3dSmrg 1636428d7b3dSmrg if (nbox) { 1637428d7b3dSmrg _kgem_submit(kgem); 1638428d7b3dSmrg _kgem_set_mode(kgem, KGEM_BLT); 1639428d7b3dSmrg } 1640428d7b3dSmrg 1641428d7b3dSmrg kgem_bo_destroy(kgem, src_bo); 1642428d7b3dSmrg } while (nbox); 1643428d7b3dSmrg } else { 1644428d7b3dSmrg cmd |= 6; 1645428d7b3dSmrg do { 1646428d7b3dSmrg int nbox_this_time, rem; 1647428d7b3dSmrg 1648428d7b3dSmrg nbox_this_time = nbox; 1649428d7b3dSmrg rem = kgem_batch_space(kgem); 1650428d7b3dSmrg if (8*nbox_this_time > rem) 1651428d7b3dSmrg nbox_this_time = rem / 8; 1652428d7b3dSmrg if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) 1653428d7b3dSmrg nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; 1654428d7b3dSmrg assert(nbox_this_time); 1655428d7b3dSmrg nbox -= nbox_this_time; 1656428d7b3dSmrg 1657428d7b3dSmrg /* Count the total number of bytes to be read and allocate a 1658428d7b3dSmrg * single buffer large enough. Or if it is very small, combine 1659428d7b3dSmrg * with other allocations. */ 1660428d7b3dSmrg offset = 0; 1661428d7b3dSmrg for (n = 0; n < nbox_this_time; n++) { 1662428d7b3dSmrg int height = box[n].y2 - box[n].y1; 1663428d7b3dSmrg int width = box[n].x2 - box[n].x1; 1664428d7b3dSmrg offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; 1665428d7b3dSmrg } 1666428d7b3dSmrg 1667428d7b3dSmrg src_bo = kgem_create_buffer(kgem, offset, 1668428d7b3dSmrg KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), 1669428d7b3dSmrg &ptr); 1670428d7b3dSmrg if (!src_bo) 1671428d7b3dSmrg goto fallback; 1672428d7b3dSmrg 1673428d7b3dSmrg if (sigtrap_get()) { 1674428d7b3dSmrg kgem_bo_destroy(kgem, src_bo); 1675428d7b3dSmrg goto fallback; 1676428d7b3dSmrg } 1677428d7b3dSmrg 1678428d7b3dSmrg offset = 0; 1679428d7b3dSmrg do { 1680428d7b3dSmrg int height = box->y2 - box->y1; 1681428d7b3dSmrg int width = box->x2 - box->x1; 1682428d7b3dSmrg int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); 1683428d7b3dSmrg uint32_t *b; 1684428d7b3dSmrg 1685428d7b3dSmrg DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", 1686428d7b3dSmrg __FUNCTION__, 1687428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 1688428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 1689428d7b3dSmrg width, height, 1690428d7b3dSmrg offset, pitch)); 1691428d7b3dSmrg 1692428d7b3dSmrg assert(box->x1 + src_dx >= 0); 1693428d7b3dSmrg assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); 1694428d7b3dSmrg assert(box->y1 + src_dy >= 0); 1695428d7b3dSmrg 1696428d7b3dSmrg assert(box->x1 + dst_dx >= 0); 1697428d7b3dSmrg assert(box->y1 + dst_dy >= 0); 1698428d7b3dSmrg 1699428d7b3dSmrg memcpy_xor(src, (char *)ptr + offset, 1700428d7b3dSmrg dst->drawable.bitsPerPixel, 1701428d7b3dSmrg stride, pitch, 1702428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 1703428d7b3dSmrg 0, 0, 1704428d7b3dSmrg width, height, 1705428d7b3dSmrg and, or); 1706428d7b3dSmrg 1707428d7b3dSmrg assert(kgem->mode == KGEM_BLT); 1708428d7b3dSmrg b = kgem->batch + kgem->nbatch; 1709428d7b3dSmrg b[0] = cmd; 1710428d7b3dSmrg b[1] = br13; 1711428d7b3dSmrg b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); 1712428d7b3dSmrg b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); 1713428d7b3dSmrg b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, 1714428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 1715428d7b3dSmrg I915_GEM_DOMAIN_RENDER | 1716428d7b3dSmrg KGEM_RELOC_FENCED, 1717428d7b3dSmrg 0); 1718428d7b3dSmrg b[5] = 0; 1719428d7b3dSmrg b[6] = pitch; 1720428d7b3dSmrg b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, 1721428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 1722428d7b3dSmrg KGEM_RELOC_FENCED, 1723428d7b3dSmrg offset); 1724428d7b3dSmrg kgem->nbatch += 8; 1725428d7b3dSmrg 1726428d7b3dSmrg box++; 1727428d7b3dSmrg offset += pitch * height; 1728428d7b3dSmrg } while (--nbox_this_time); 1729428d7b3dSmrg assert(offset == __kgem_buffer_size(src_bo)); 1730428d7b3dSmrg sigtrap_put(); 1731428d7b3dSmrg 1732428d7b3dSmrg if (nbox) { 1733428d7b3dSmrg _kgem_submit(kgem); 1734428d7b3dSmrg _kgem_set_mode(kgem, KGEM_BLT); 1735428d7b3dSmrg } 1736428d7b3dSmrg 1737428d7b3dSmrg kgem_bo_destroy(kgem, src_bo); 1738428d7b3dSmrg } while (nbox); 1739428d7b3dSmrg } 1740428d7b3dSmrg 1741428d7b3dSmrg sna->blt_state.fill_bo = 0; 1742428d7b3dSmrg return true; 1743428d7b3dSmrg 1744428d7b3dSmrgfallback: 1745428d7b3dSmrg return write_boxes_inplace__xor(kgem, 1746428d7b3dSmrg src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, 1747428d7b3dSmrg dst_bo, dst_dx, dst_dy, 1748428d7b3dSmrg box, nbox, 1749428d7b3dSmrg and, or); 1750428d7b3dSmrg} 1751428d7b3dSmrg 1752428d7b3dSmrgstatic bool 1753428d7b3dSmrgindirect_replace(struct sna *sna, 1754428d7b3dSmrg PixmapPtr pixmap, 1755428d7b3dSmrg struct kgem_bo *bo, 1756428d7b3dSmrg const void *src, int stride) 1757428d7b3dSmrg{ 1758428d7b3dSmrg struct kgem *kgem = &sna->kgem; 1759428d7b3dSmrg struct kgem_bo *src_bo; 1760428d7b3dSmrg BoxRec box; 1761428d7b3dSmrg void *ptr; 1762428d7b3dSmrg bool ret; 1763428d7b3dSmrg 1764428d7b3dSmrg DBG(("%s: size=%d vs %d\n", 1765428d7b3dSmrg __FUNCTION__, 1766428d7b3dSmrg stride * pixmap->drawable.height >> 12, 1767428d7b3dSmrg kgem->half_cpu_cache_pages)); 1768428d7b3dSmrg 1769428d7b3dSmrg if (stride * pixmap->drawable.height >> 12 > kgem->half_cpu_cache_pages) 1770428d7b3dSmrg return false; 1771428d7b3dSmrg 1772428d7b3dSmrg if (!kgem_bo_can_blt(kgem, bo) && 1773428d7b3dSmrg must_tile(sna, pixmap->drawable.width, pixmap->drawable.height)) 1774428d7b3dSmrg return false; 1775428d7b3dSmrg 1776428d7b3dSmrg src_bo = kgem_create_buffer_2d(kgem, 1777428d7b3dSmrg pixmap->drawable.width, 1778428d7b3dSmrg pixmap->drawable.height, 1779428d7b3dSmrg pixmap->drawable.bitsPerPixel, 1780428d7b3dSmrg KGEM_BUFFER_WRITE_INPLACE, 1781428d7b3dSmrg &ptr); 1782428d7b3dSmrg if (!src_bo) 1783428d7b3dSmrg return false; 1784428d7b3dSmrg 1785428d7b3dSmrg ret = false; 1786428d7b3dSmrg if (sigtrap_get() == 0) { 1787428d7b3dSmrg memcpy_blt(src, ptr, pixmap->drawable.bitsPerPixel, 1788428d7b3dSmrg stride, src_bo->pitch, 1789428d7b3dSmrg 0, 0, 1790428d7b3dSmrg 0, 0, 1791428d7b3dSmrg pixmap->drawable.width, 1792428d7b3dSmrg pixmap->drawable.height); 1793428d7b3dSmrg 1794428d7b3dSmrg box.x1 = box.y1 = 0; 1795428d7b3dSmrg box.x2 = pixmap->drawable.width; 1796428d7b3dSmrg box.y2 = pixmap->drawable.height; 1797428d7b3dSmrg 1798428d7b3dSmrg ret = sna->render.copy_boxes(sna, GXcopy, 1799428d7b3dSmrg &pixmap->drawable, src_bo, 0, 0, 1800428d7b3dSmrg &pixmap->drawable, bo, 0, 0, 1801428d7b3dSmrg &box, 1, 0); 1802428d7b3dSmrg sigtrap_put(); 1803428d7b3dSmrg } 1804428d7b3dSmrg 1805428d7b3dSmrg kgem_bo_destroy(kgem, src_bo); 1806428d7b3dSmrg 1807428d7b3dSmrg return ret; 1808428d7b3dSmrg} 1809428d7b3dSmrg 1810428d7b3dSmrgbool sna_replace(struct sna *sna, PixmapPtr pixmap, 1811428d7b3dSmrg const void *src, int stride) 1812428d7b3dSmrg{ 1813428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1814428d7b3dSmrg struct kgem_bo *bo = priv->gpu_bo; 1815428d7b3dSmrg void *dst; 1816428d7b3dSmrg 1817428d7b3dSmrg assert(bo); 1818428d7b3dSmrg DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d) busy?=%d\n", 1819428d7b3dSmrg __FUNCTION__, bo->handle, 1820428d7b3dSmrg pixmap->drawable.width, 1821428d7b3dSmrg pixmap->drawable.height, 1822428d7b3dSmrg pixmap->drawable.bitsPerPixel, 1823428d7b3dSmrg bo->tiling, 1824428d7b3dSmrg __kgem_bo_is_busy(&sna->kgem, bo))); 1825428d7b3dSmrg 1826428d7b3dSmrg assert(!priv->pinned); 1827428d7b3dSmrg 1828428d7b3dSmrg kgem_bo_undo(&sna->kgem, bo); 1829428d7b3dSmrg 1830428d7b3dSmrg if (__kgem_bo_is_busy(&sna->kgem, bo)) { 1831428d7b3dSmrg struct kgem_bo *new_bo; 1832428d7b3dSmrg 1833428d7b3dSmrg if (indirect_replace(sna, pixmap, bo, src, stride)) 1834428d7b3dSmrg return true; 1835428d7b3dSmrg 1836428d7b3dSmrg new_bo = kgem_create_2d(&sna->kgem, 1837428d7b3dSmrg pixmap->drawable.width, 1838428d7b3dSmrg pixmap->drawable.height, 1839428d7b3dSmrg pixmap->drawable.bitsPerPixel, 1840428d7b3dSmrg bo->tiling, 1841428d7b3dSmrg CREATE_GTT_MAP | CREATE_INACTIVE); 1842428d7b3dSmrg if (new_bo) 1843428d7b3dSmrg bo = new_bo; 1844428d7b3dSmrg } 1845428d7b3dSmrg 1846428d7b3dSmrg if (bo->tiling == I915_TILING_NONE && bo->pitch == stride && 1847428d7b3dSmrg kgem_bo_write(&sna->kgem, bo, src, 1848428d7b3dSmrg (pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8)) 1849428d7b3dSmrg goto done; 1850428d7b3dSmrg 1851428d7b3dSmrg if (upload_inplace__tiled(&sna->kgem, bo)) { 1852428d7b3dSmrg BoxRec box; 1853428d7b3dSmrg 1854428d7b3dSmrg box.x1 = box.y1 = 0; 1855428d7b3dSmrg box.x2 = pixmap->drawable.width; 1856428d7b3dSmrg box.y2 = pixmap->drawable.height; 1857428d7b3dSmrg 1858428d7b3dSmrg if (write_boxes_inplace__tiled(&sna->kgem, src, 1859428d7b3dSmrg stride, pixmap->drawable.bitsPerPixel, 0, 0, 1860428d7b3dSmrg bo, 0, 0, &box, 1)) 1861428d7b3dSmrg goto done; 1862428d7b3dSmrg } 1863428d7b3dSmrg 1864428d7b3dSmrg if (kgem_bo_can_map(&sna->kgem, bo) && 1865428d7b3dSmrg (dst = kgem_bo_map(&sna->kgem, bo)) != NULL && 1866428d7b3dSmrg sigtrap_get() == 0) { 1867428d7b3dSmrg memcpy_blt(src, dst, pixmap->drawable.bitsPerPixel, 1868428d7b3dSmrg stride, bo->pitch, 1869428d7b3dSmrg 0, 0, 1870428d7b3dSmrg 0, 0, 1871428d7b3dSmrg pixmap->drawable.width, 1872428d7b3dSmrg pixmap->drawable.height); 1873428d7b3dSmrg sigtrap_put(); 1874428d7b3dSmrg } else { 1875428d7b3dSmrg BoxRec box; 1876428d7b3dSmrg 1877428d7b3dSmrg if (bo != priv->gpu_bo) { 1878428d7b3dSmrg kgem_bo_destroy(&sna->kgem, bo); 1879428d7b3dSmrg bo = priv->gpu_bo; 1880428d7b3dSmrg } 1881428d7b3dSmrg 1882428d7b3dSmrg box.x1 = box.y1 = 0; 1883428d7b3dSmrg box.x2 = pixmap->drawable.width; 1884428d7b3dSmrg box.y2 = pixmap->drawable.height; 1885428d7b3dSmrg 1886428d7b3dSmrg if (!sna_write_boxes(sna, pixmap, 1887428d7b3dSmrg bo, 0, 0, 1888428d7b3dSmrg src, stride, 0, 0, 1889428d7b3dSmrg &box, 1)) 1890428d7b3dSmrg return false; 1891428d7b3dSmrg } 1892428d7b3dSmrg 1893428d7b3dSmrgdone: 1894428d7b3dSmrg if (bo != priv->gpu_bo) { 1895428d7b3dSmrg sna_pixmap_unmap(pixmap, priv); 1896428d7b3dSmrg kgem_bo_destroy(&sna->kgem, priv->gpu_bo); 1897428d7b3dSmrg priv->gpu_bo = bo; 1898428d7b3dSmrg } 1899428d7b3dSmrg 1900428d7b3dSmrg return true; 1901428d7b3dSmrg} 1902428d7b3dSmrg 1903428d7b3dSmrgbool 1904428d7b3dSmrgsna_replace__xor(struct sna *sna, PixmapPtr pixmap, 1905428d7b3dSmrg const void *src, int stride, 1906428d7b3dSmrg uint32_t and, uint32_t or) 1907428d7b3dSmrg{ 1908428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1909428d7b3dSmrg struct kgem_bo *bo = priv->gpu_bo; 1910428d7b3dSmrg void *dst; 1911428d7b3dSmrg 1912428d7b3dSmrg DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d)\n", 1913428d7b3dSmrg __FUNCTION__, bo->handle, 1914428d7b3dSmrg pixmap->drawable.width, 1915428d7b3dSmrg pixmap->drawable.height, 1916428d7b3dSmrg pixmap->drawable.bitsPerPixel, 1917428d7b3dSmrg bo->tiling)); 1918428d7b3dSmrg 1919428d7b3dSmrg assert(!priv->pinned); 1920428d7b3dSmrg 1921428d7b3dSmrg kgem_bo_undo(&sna->kgem, bo); 1922428d7b3dSmrg 1923428d7b3dSmrg if (!kgem_bo_can_map(&sna->kgem, bo) || 1924428d7b3dSmrg __kgem_bo_is_busy(&sna->kgem, bo)) { 1925428d7b3dSmrg struct kgem_bo *new_bo; 1926428d7b3dSmrg 1927428d7b3dSmrg new_bo = kgem_create_2d(&sna->kgem, 1928428d7b3dSmrg pixmap->drawable.width, 1929428d7b3dSmrg pixmap->drawable.height, 1930428d7b3dSmrg pixmap->drawable.bitsPerPixel, 1931428d7b3dSmrg bo->tiling, 1932428d7b3dSmrg CREATE_GTT_MAP | CREATE_INACTIVE); 1933428d7b3dSmrg if (new_bo) 1934428d7b3dSmrg bo = new_bo; 1935428d7b3dSmrg } 1936428d7b3dSmrg 1937428d7b3dSmrg if (kgem_bo_can_map(&sna->kgem, bo) && 1938428d7b3dSmrg (dst = kgem_bo_map(&sna->kgem, bo)) != NULL && 1939428d7b3dSmrg sigtrap_get() == 0) { 1940428d7b3dSmrg memcpy_xor(src, dst, pixmap->drawable.bitsPerPixel, 1941428d7b3dSmrg stride, bo->pitch, 1942428d7b3dSmrg 0, 0, 1943428d7b3dSmrg 0, 0, 1944428d7b3dSmrg pixmap->drawable.width, 1945428d7b3dSmrg pixmap->drawable.height, 1946428d7b3dSmrg and, or); 1947428d7b3dSmrg sigtrap_put(); 1948428d7b3dSmrg } else { 1949428d7b3dSmrg BoxRec box; 1950428d7b3dSmrg 1951428d7b3dSmrg if (bo != priv->gpu_bo) { 1952428d7b3dSmrg kgem_bo_destroy(&sna->kgem, bo); 1953428d7b3dSmrg bo = priv->gpu_bo; 1954428d7b3dSmrg } 1955428d7b3dSmrg 1956428d7b3dSmrg box.x1 = box.y1 = 0; 1957428d7b3dSmrg box.x2 = pixmap->drawable.width; 1958428d7b3dSmrg box.y2 = pixmap->drawable.height; 1959428d7b3dSmrg 1960428d7b3dSmrg if (!sna_write_boxes__xor(sna, pixmap, 1961428d7b3dSmrg bo, 0, 0, 1962428d7b3dSmrg src, stride, 0, 0, 1963428d7b3dSmrg &box, 1, 1964428d7b3dSmrg and, or)) 1965428d7b3dSmrg return false; 1966428d7b3dSmrg } 1967428d7b3dSmrg 1968428d7b3dSmrg if (bo != priv->gpu_bo) { 1969428d7b3dSmrg sna_pixmap_unmap(pixmap, priv); 1970428d7b3dSmrg kgem_bo_destroy(&sna->kgem, priv->gpu_bo); 1971428d7b3dSmrg priv->gpu_bo = bo; 1972428d7b3dSmrg } 1973428d7b3dSmrg 1974428d7b3dSmrg return true; 1975428d7b3dSmrg} 1976