1 /* $NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $ */ 2 3 // SPDX-License-Identifier: MIT 4 /* 5 * Copyright 2019 Intel Corporation 6 */ 7 8 #include <sys/cdefs.h> 9 __KERNEL_RCSID(0, "$NetBSD: i915_gem_object_blt.c,v 1.2 2021/12/18 23:45:30 riastradh Exp $"); 10 11 #include "i915_drv.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_pm.h" 14 #include "gt/intel_engine_pool.h" 15 #include "gt/intel_gt.h" 16 #include "gt/intel_ring.h" 17 #include "i915_gem_clflush.h" 18 #include "i915_gem_object_blt.h" 19 20 struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, 21 struct i915_vma *vma, 22 u32 value) 23 { 24 struct drm_i915_private *i915 = ce->vm->i915; 25 const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ 26 struct intel_engine_pool_node *pool; 27 struct i915_vma *batch; 28 u64 offset; 29 u64 count; 30 u64 rem; 31 u32 size; 32 u32 *cmd; 33 int err; 34 35 GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); 36 intel_engine_pm_get(ce->engine); 37 38 count = div_u64(round_up(vma->size, block_size), block_size); 39 size = (1 + 8 * count) * sizeof(u32); 40 size = round_up(size, PAGE_SIZE); 41 pool = intel_engine_get_pool(ce->engine, size); 42 if (IS_ERR(pool)) { 43 err = PTR_ERR(pool); 44 goto out_pm; 45 } 46 47 cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); 48 if (IS_ERR(cmd)) { 49 err = PTR_ERR(cmd); 50 goto out_put; 51 } 52 53 rem = vma->size; 54 offset = vma->node.start; 55 56 do { 57 u32 size = min_t(u64, rem, block_size); 58 59 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); 60 61 if (INTEL_GEN(i915) >= 8) { 62 *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); 63 *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; 64 *cmd++ = 0; 65 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; 66 *cmd++ = lower_32_bits(offset); 67 *cmd++ = upper_32_bits(offset); 68 *cmd++ = value; 69 } else { 70 *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); 71 *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; 72 *cmd++ = 0; 73 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; 74 *cmd++ = offset; 75 *cmd++ = value; 76 } 77 78 /* Allow ourselves to be preempted in between blocks. */ 79 *cmd++ = MI_ARB_CHECK; 80 81 offset += size; 82 rem -= size; 83 } while (rem); 84 85 *cmd = MI_BATCH_BUFFER_END; 86 intel_gt_chipset_flush(ce->vm->gt); 87 88 i915_gem_object_unpin_map(pool->obj); 89 90 batch = i915_vma_instance(pool->obj, ce->vm, NULL); 91 if (IS_ERR(batch)) { 92 err = PTR_ERR(batch); 93 goto out_put; 94 } 95 96 err = i915_vma_pin(batch, 0, 0, PIN_USER); 97 if (unlikely(err)) 98 goto out_put; 99 100 batch->private = pool; 101 return batch; 102 103 out_put: 104 intel_engine_pool_put(pool); 105 out_pm: 106 intel_engine_pm_put(ce->engine); 107 return ERR_PTR(err); 108 } 109 110 int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) 111 { 112 int err; 113 114 i915_vma_lock(vma); 115 err = i915_request_await_object(rq, vma->obj, false); 116 if (err == 0) 117 err = i915_vma_move_to_active(vma, rq, 0); 118 i915_vma_unlock(vma); 119 if (unlikely(err)) 120 return err; 121 122 return intel_engine_pool_mark_active(vma->private, rq); 123 } 124 125 void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) 126 { 127 i915_vma_unpin(vma); 128 intel_engine_pool_put(vma->private); 129 intel_engine_pm_put(ce->engine); 130 } 131 132 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, 133 struct intel_context *ce, 134 u32 value) 135 { 136 struct i915_request *rq; 137 struct i915_vma *batch; 138 struct i915_vma *vma; 139 int err; 140 141 vma = i915_vma_instance(obj, ce->vm, NULL); 142 if (IS_ERR(vma)) 143 return PTR_ERR(vma); 144 145 err = i915_vma_pin(vma, 0, 0, PIN_USER); 146 if (unlikely(err)) 147 return err; 148 149 if (obj->cache_dirty & ~obj->cache_coherent) { 150 i915_gem_object_lock(obj); 151 i915_gem_clflush_object(obj, 0); 152 i915_gem_object_unlock(obj); 153 } 154 155 batch = intel_emit_vma_fill_blt(ce, vma, value); 156 if (IS_ERR(batch)) { 157 err = PTR_ERR(batch); 158 goto out_unpin; 159 } 160 161 rq = intel_context_create_request(ce); 162 if (IS_ERR(rq)) { 163 err = PTR_ERR(rq); 164 goto out_batch; 165 } 166 167 err = intel_emit_vma_mark_active(batch, rq); 168 if (unlikely(err)) 169 goto out_request; 170 171 err = i915_request_await_object(rq, obj, true); 172 if (unlikely(err)) 173 goto out_request; 174 175 if (ce->engine->emit_init_breadcrumb) { 176 err = ce->engine->emit_init_breadcrumb(rq); 177 if (unlikely(err)) 178 goto out_request; 179 } 180 181 i915_vma_lock(vma); 182 err = i915_request_await_object(rq, vma->obj, true); 183 if (err == 0) 184 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 185 i915_vma_unlock(vma); 186 if (unlikely(err)) 187 goto out_request; 188 189 err = ce->engine->emit_bb_start(rq, 190 batch->node.start, batch->node.size, 191 0); 192 out_request: 193 if (unlikely(err)) 194 i915_request_skip(rq, err); 195 196 i915_request_add(rq); 197 out_batch: 198 intel_emit_vma_release(ce, batch); 199 out_unpin: 200 i915_vma_unpin(vma); 201 return err; 202 } 203 204 struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, 205 struct i915_vma *src, 206 struct i915_vma *dst) 207 { 208 struct drm_i915_private *i915 = ce->vm->i915; 209 const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ 210 struct intel_engine_pool_node *pool; 211 struct i915_vma *batch; 212 u64 src_offset, dst_offset; 213 u64 count, rem; 214 u32 size, *cmd; 215 int err; 216 217 GEM_BUG_ON(src->size != dst->size); 218 219 GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); 220 intel_engine_pm_get(ce->engine); 221 222 count = div_u64(round_up(dst->size, block_size), block_size); 223 size = (1 + 11 * count) * sizeof(u32); 224 size = round_up(size, PAGE_SIZE); 225 pool = intel_engine_get_pool(ce->engine, size); 226 if (IS_ERR(pool)) { 227 err = PTR_ERR(pool); 228 goto out_pm; 229 } 230 231 cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); 232 if (IS_ERR(cmd)) { 233 err = PTR_ERR(cmd); 234 goto out_put; 235 } 236 237 rem = src->size; 238 src_offset = src->node.start; 239 dst_offset = dst->node.start; 240 241 do { 242 size = min_t(u64, rem, block_size); 243 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); 244 245 if (INTEL_GEN(i915) >= 9) { 246 *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); 247 *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; 248 *cmd++ = 0; 249 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; 250 *cmd++ = lower_32_bits(dst_offset); 251 *cmd++ = upper_32_bits(dst_offset); 252 *cmd++ = 0; 253 *cmd++ = PAGE_SIZE; 254 *cmd++ = lower_32_bits(src_offset); 255 *cmd++ = upper_32_bits(src_offset); 256 } else if (INTEL_GEN(i915) >= 8) { 257 *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); 258 *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; 259 *cmd++ = 0; 260 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; 261 *cmd++ = lower_32_bits(dst_offset); 262 *cmd++ = upper_32_bits(dst_offset); 263 *cmd++ = 0; 264 *cmd++ = PAGE_SIZE; 265 *cmd++ = lower_32_bits(src_offset); 266 *cmd++ = upper_32_bits(src_offset); 267 } else { 268 *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); 269 *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; 270 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; 271 *cmd++ = dst_offset; 272 *cmd++ = PAGE_SIZE; 273 *cmd++ = src_offset; 274 } 275 276 /* Allow ourselves to be preempted in between blocks. */ 277 *cmd++ = MI_ARB_CHECK; 278 279 src_offset += size; 280 dst_offset += size; 281 rem -= size; 282 } while (rem); 283 284 *cmd = MI_BATCH_BUFFER_END; 285 intel_gt_chipset_flush(ce->vm->gt); 286 287 i915_gem_object_unpin_map(pool->obj); 288 289 batch = i915_vma_instance(pool->obj, ce->vm, NULL); 290 if (IS_ERR(batch)) { 291 err = PTR_ERR(batch); 292 goto out_put; 293 } 294 295 err = i915_vma_pin(batch, 0, 0, PIN_USER); 296 if (unlikely(err)) 297 goto out_put; 298 299 batch->private = pool; 300 return batch; 301 302 out_put: 303 intel_engine_pool_put(pool); 304 out_pm: 305 intel_engine_pm_put(ce->engine); 306 return ERR_PTR(err); 307 } 308 309 static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write) 310 { 311 struct drm_i915_gem_object *obj = vma->obj; 312 313 if (obj->cache_dirty & ~obj->cache_coherent) 314 i915_gem_clflush_object(obj, 0); 315 316 return i915_request_await_object(rq, obj, write); 317 } 318 319 int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, 320 struct drm_i915_gem_object *dst, 321 struct intel_context *ce) 322 { 323 struct drm_gem_object *objs[] = { &src->base, &dst->base }; 324 struct i915_address_space *vm = ce->vm; 325 struct i915_vma *vma[2], *batch; 326 struct ww_acquire_ctx acquire; 327 struct i915_request *rq; 328 int err, i; 329 330 vma[0] = i915_vma_instance(src, vm, NULL); 331 if (IS_ERR(vma[0])) 332 return PTR_ERR(vma[0]); 333 334 err = i915_vma_pin(vma[0], 0, 0, PIN_USER); 335 if (unlikely(err)) 336 return err; 337 338 vma[1] = i915_vma_instance(dst, vm, NULL); 339 if (IS_ERR(vma[1])) 340 goto out_unpin_src; 341 342 err = i915_vma_pin(vma[1], 0, 0, PIN_USER); 343 if (unlikely(err)) 344 goto out_unpin_src; 345 346 batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]); 347 if (IS_ERR(batch)) { 348 err = PTR_ERR(batch); 349 goto out_unpin_dst; 350 } 351 352 rq = intel_context_create_request(ce); 353 if (IS_ERR(rq)) { 354 err = PTR_ERR(rq); 355 goto out_batch; 356 } 357 358 err = intel_emit_vma_mark_active(batch, rq); 359 if (unlikely(err)) 360 goto out_request; 361 362 err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire); 363 if (unlikely(err)) 364 goto out_request; 365 366 for (i = 0; i < ARRAY_SIZE(vma); i++) { 367 err = move_to_gpu(vma[i], rq, i); 368 if (unlikely(err)) 369 goto out_unlock; 370 } 371 372 for (i = 0; i < ARRAY_SIZE(vma); i++) { 373 unsigned int flags = i ? EXEC_OBJECT_WRITE : 0; 374 375 err = i915_vma_move_to_active(vma[i], rq, flags); 376 if (unlikely(err)) 377 goto out_unlock; 378 } 379 380 if (rq->engine->emit_init_breadcrumb) { 381 err = rq->engine->emit_init_breadcrumb(rq); 382 if (unlikely(err)) 383 goto out_unlock; 384 } 385 386 err = rq->engine->emit_bb_start(rq, 387 batch->node.start, batch->node.size, 388 0); 389 out_unlock: 390 drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire); 391 out_request: 392 if (unlikely(err)) 393 i915_request_skip(rq, err); 394 395 i915_request_add(rq); 396 out_batch: 397 intel_emit_vma_release(ce, batch); 398 out_unpin_dst: 399 i915_vma_unpin(vma[1]); 400 out_unpin_src: 401 i915_vma_unpin(vma[0]); 402 return err; 403 } 404 405 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 406 #include "selftests/i915_gem_object_blt.c" 407 #endif 408