amdgpu_test.c revision 1.1 1 /* $NetBSD: amdgpu_test.c,v 1.1 2018/08/27 01:34:44 riastradh Exp $ */
2
3 /*
4 * Copyright 2009 VMware, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Michel Dnzer
25 */
26 #include <sys/cdefs.h>
27 __KERNEL_RCSID(0, "$NetBSD: amdgpu_test.c,v 1.1 2018/08/27 01:34:44 riastradh Exp $");
28
29 #include <drm/drmP.h>
30 #include <drm/amdgpu_drm.h>
31 #include "amdgpu.h"
32 #include "amdgpu_uvd.h"
33 #include "amdgpu_vce.h"
34
35 /* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
36 static void amdgpu_do_test_moves(struct amdgpu_device *adev)
37 {
38 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
39 struct amdgpu_bo *vram_obj = NULL;
40 struct amdgpu_bo **gtt_obj = NULL;
41 uint64_t gtt_addr, vram_addr;
42 unsigned n, size;
43 int i, r;
44
45 size = 1024 * 1024;
46
47 /* Number of tests =
48 * (Total GTT - IB pool - writeback page - ring buffers) / test size
49 */
50 n = adev->mc.gtt_size - AMDGPU_IB_POOL_SIZE*64*1024;
51 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
52 if (adev->rings[i])
53 n -= adev->rings[i]->ring_size;
54 if (adev->wb.wb_obj)
55 n -= AMDGPU_GPU_PAGE_SIZE;
56 if (adev->irq.ih.ring_obj)
57 n -= adev->irq.ih.ring_size;
58 n /= size;
59
60 gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
61 if (!gtt_obj) {
62 DRM_ERROR("Failed to allocate %d pointers\n", n);
63 r = 1;
64 goto out_cleanup;
65 }
66
67 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
68 AMDGPU_GEM_DOMAIN_VRAM, 0,
69 NULL, NULL, &vram_obj);
70 if (r) {
71 DRM_ERROR("Failed to create VRAM object\n");
72 goto out_cleanup;
73 }
74 r = amdgpu_bo_reserve(vram_obj, false);
75 if (unlikely(r != 0))
76 goto out_unref;
77 r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr);
78 if (r) {
79 DRM_ERROR("Failed to pin VRAM object\n");
80 goto out_unres;
81 }
82 for (i = 0; i < n; i++) {
83 void *gtt_map, *vram_map;
84 void **gtt_start, **gtt_end;
85 void **vram_start, **vram_end;
86 struct fence *fence = NULL;
87
88 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
89 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
90 NULL, gtt_obj + i);
91 if (r) {
92 DRM_ERROR("Failed to create GTT object %d\n", i);
93 goto out_lclean;
94 }
95
96 r = amdgpu_bo_reserve(gtt_obj[i], false);
97 if (unlikely(r != 0))
98 goto out_lclean_unref;
99 r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, >t_addr);
100 if (r) {
101 DRM_ERROR("Failed to pin GTT object %d\n", i);
102 goto out_lclean_unres;
103 }
104
105 r = amdgpu_bo_kmap(gtt_obj[i], >t_map);
106 if (r) {
107 DRM_ERROR("Failed to map GTT object %d\n", i);
108 goto out_lclean_unpin;
109 }
110
111 for (gtt_start = gtt_map, gtt_end = gtt_map + size;
112 gtt_start < gtt_end;
113 gtt_start++)
114 *gtt_start = gtt_start;
115
116 amdgpu_bo_kunmap(gtt_obj[i]);
117
118 r = amdgpu_copy_buffer(ring, gtt_addr, vram_addr,
119 size, NULL, &fence);
120
121 if (r) {
122 DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
123 goto out_lclean_unpin;
124 }
125
126 r = fence_wait(fence, false);
127 if (r) {
128 DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
129 goto out_lclean_unpin;
130 }
131
132 fence_put(fence);
133
134 r = amdgpu_bo_kmap(vram_obj, &vram_map);
135 if (r) {
136 DRM_ERROR("Failed to map VRAM object after copy %d\n", i);
137 goto out_lclean_unpin;
138 }
139
140 for (gtt_start = gtt_map, gtt_end = gtt_map + size,
141 vram_start = vram_map, vram_end = vram_map + size;
142 vram_start < vram_end;
143 gtt_start++, vram_start++) {
144 if (*vram_start != gtt_start) {
145 DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
146 "expected 0x%p (GTT/VRAM offset "
147 "0x%16llx/0x%16llx)\n",
148 i, *vram_start, gtt_start,
149 (unsigned long long)
150 (gtt_addr - adev->mc.gtt_start +
151 (void*)gtt_start - gtt_map),
152 (unsigned long long)
153 (vram_addr - adev->mc.vram_start +
154 (void*)gtt_start - gtt_map));
155 amdgpu_bo_kunmap(vram_obj);
156 goto out_lclean_unpin;
157 }
158 *vram_start = vram_start;
159 }
160
161 amdgpu_bo_kunmap(vram_obj);
162
163 r = amdgpu_copy_buffer(ring, vram_addr, gtt_addr,
164 size, NULL, &fence);
165
166 if (r) {
167 DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
168 goto out_lclean_unpin;
169 }
170
171 r = fence_wait(fence, false);
172 if (r) {
173 DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
174 goto out_lclean_unpin;
175 }
176
177 fence_put(fence);
178
179 r = amdgpu_bo_kmap(gtt_obj[i], >t_map);
180 if (r) {
181 DRM_ERROR("Failed to map GTT object after copy %d\n", i);
182 goto out_lclean_unpin;
183 }
184
185 for (gtt_start = gtt_map, gtt_end = gtt_map + size,
186 vram_start = vram_map, vram_end = vram_map + size;
187 gtt_start < gtt_end;
188 gtt_start++, vram_start++) {
189 if (*gtt_start != vram_start) {
190 DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
191 "expected 0x%p (VRAM/GTT offset "
192 "0x%16llx/0x%16llx)\n",
193 i, *gtt_start, vram_start,
194 (unsigned long long)
195 (vram_addr - adev->mc.vram_start +
196 (void*)vram_start - vram_map),
197 (unsigned long long)
198 (gtt_addr - adev->mc.gtt_start +
199 (void*)vram_start - vram_map));
200 amdgpu_bo_kunmap(gtt_obj[i]);
201 goto out_lclean_unpin;
202 }
203 }
204
205 amdgpu_bo_kunmap(gtt_obj[i]);
206
207 DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
208 gtt_addr - adev->mc.gtt_start);
209 continue;
210
211 out_lclean_unpin:
212 amdgpu_bo_unpin(gtt_obj[i]);
213 out_lclean_unres:
214 amdgpu_bo_unreserve(gtt_obj[i]);
215 out_lclean_unref:
216 amdgpu_bo_unref(>t_obj[i]);
217 out_lclean:
218 for (--i; i >= 0; --i) {
219 amdgpu_bo_unpin(gtt_obj[i]);
220 amdgpu_bo_unreserve(gtt_obj[i]);
221 amdgpu_bo_unref(>t_obj[i]);
222 }
223 if (fence)
224 fence_put(fence);
225 break;
226 }
227
228 amdgpu_bo_unpin(vram_obj);
229 out_unres:
230 amdgpu_bo_unreserve(vram_obj);
231 out_unref:
232 amdgpu_bo_unref(&vram_obj);
233 out_cleanup:
234 kfree(gtt_obj);
235 if (r) {
236 printk(KERN_WARNING "Error while testing BO move.\n");
237 }
238 }
239
240 void amdgpu_test_moves(struct amdgpu_device *adev)
241 {
242 if (adev->mman.buffer_funcs)
243 amdgpu_do_test_moves(adev);
244 }
245
246 static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
247 struct amdgpu_ring *ring,
248 struct fence **fence)
249 {
250 uint32_t handle = ring->idx ^ 0xdeafbeef;
251 int r;
252
253 if (ring == &adev->uvd.ring) {
254 r = amdgpu_uvd_get_create_msg(ring, handle, NULL);
255 if (r) {
256 DRM_ERROR("Failed to get dummy create msg\n");
257 return r;
258 }
259
260 r = amdgpu_uvd_get_destroy_msg(ring, handle, fence);
261 if (r) {
262 DRM_ERROR("Failed to get dummy destroy msg\n");
263 return r;
264 }
265
266 } else if (ring == &adev->vce.ring[0] ||
267 ring == &adev->vce.ring[1]) {
268 r = amdgpu_vce_get_create_msg(ring, handle, NULL);
269 if (r) {
270 DRM_ERROR("Failed to get dummy create msg\n");
271 return r;
272 }
273
274 r = amdgpu_vce_get_destroy_msg(ring, handle, fence);
275 if (r) {
276 DRM_ERROR("Failed to get dummy destroy msg\n");
277 return r;
278 }
279 } else {
280 struct amdgpu_fence *a_fence = NULL;
281 r = amdgpu_ring_lock(ring, 64);
282 if (r) {
283 DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
284 return r;
285 }
286 amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence);
287 amdgpu_ring_unlock_commit(ring);
288 *fence = &a_fence->base;
289 }
290 return 0;
291 }
292
293 void amdgpu_test_ring_sync(struct amdgpu_device *adev,
294 struct amdgpu_ring *ringA,
295 struct amdgpu_ring *ringB)
296 {
297 struct fence *fence1 = NULL, *fence2 = NULL;
298 struct amdgpu_semaphore *semaphore = NULL;
299 int r;
300
301 r = amdgpu_semaphore_create(adev, &semaphore);
302 if (r) {
303 DRM_ERROR("Failed to create semaphore\n");
304 goto out_cleanup;
305 }
306
307 r = amdgpu_ring_lock(ringA, 64);
308 if (r) {
309 DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
310 goto out_cleanup;
311 }
312 amdgpu_semaphore_emit_wait(ringA, semaphore);
313 amdgpu_ring_unlock_commit(ringA);
314
315 r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1);
316 if (r)
317 goto out_cleanup;
318
319 r = amdgpu_ring_lock(ringA, 64);
320 if (r) {
321 DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
322 goto out_cleanup;
323 }
324 amdgpu_semaphore_emit_wait(ringA, semaphore);
325 amdgpu_ring_unlock_commit(ringA);
326
327 r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2);
328 if (r)
329 goto out_cleanup;
330
331 mdelay(1000);
332
333 if (fence_is_signaled(fence1)) {
334 DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
335 goto out_cleanup;
336 }
337
338 r = amdgpu_ring_lock(ringB, 64);
339 if (r) {
340 DRM_ERROR("Failed to lock ring B %p\n", ringB);
341 goto out_cleanup;
342 }
343 amdgpu_semaphore_emit_signal(ringB, semaphore);
344 amdgpu_ring_unlock_commit(ringB);
345
346 r = fence_wait(fence1, false);
347 if (r) {
348 DRM_ERROR("Failed to wait for sync fence 1\n");
349 goto out_cleanup;
350 }
351
352 mdelay(1000);
353
354 if (fence_is_signaled(fence2)) {
355 DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
356 goto out_cleanup;
357 }
358
359 r = amdgpu_ring_lock(ringB, 64);
360 if (r) {
361 DRM_ERROR("Failed to lock ring B %p\n", ringB);
362 goto out_cleanup;
363 }
364 amdgpu_semaphore_emit_signal(ringB, semaphore);
365 amdgpu_ring_unlock_commit(ringB);
366
367 r = fence_wait(fence2, false);
368 if (r) {
369 DRM_ERROR("Failed to wait for sync fence 1\n");
370 goto out_cleanup;
371 }
372
373 out_cleanup:
374 amdgpu_semaphore_free(adev, &semaphore, NULL);
375
376 if (fence1)
377 fence_put(fence1);
378
379 if (fence2)
380 fence_put(fence2);
381
382 if (r)
383 printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
384 }
385
386 static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
387 struct amdgpu_ring *ringA,
388 struct amdgpu_ring *ringB,
389 struct amdgpu_ring *ringC)
390 {
391 struct fence *fenceA = NULL, *fenceB = NULL;
392 struct amdgpu_semaphore *semaphore = NULL;
393 bool sigA, sigB;
394 int i, r;
395
396 r = amdgpu_semaphore_create(adev, &semaphore);
397 if (r) {
398 DRM_ERROR("Failed to create semaphore\n");
399 goto out_cleanup;
400 }
401
402 r = amdgpu_ring_lock(ringA, 64);
403 if (r) {
404 DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
405 goto out_cleanup;
406 }
407 amdgpu_semaphore_emit_wait(ringA, semaphore);
408 amdgpu_ring_unlock_commit(ringA);
409
410 r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA);
411 if (r)
412 goto out_cleanup;
413
414 r = amdgpu_ring_lock(ringB, 64);
415 if (r) {
416 DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
417 goto out_cleanup;
418 }
419 amdgpu_semaphore_emit_wait(ringB, semaphore);
420 amdgpu_ring_unlock_commit(ringB);
421 r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB);
422 if (r)
423 goto out_cleanup;
424
425 mdelay(1000);
426
427 if (fence_is_signaled(fenceA)) {
428 DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
429 goto out_cleanup;
430 }
431 if (fence_is_signaled(fenceB)) {
432 DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
433 goto out_cleanup;
434 }
435
436 r = amdgpu_ring_lock(ringC, 64);
437 if (r) {
438 DRM_ERROR("Failed to lock ring B %p\n", ringC);
439 goto out_cleanup;
440 }
441 amdgpu_semaphore_emit_signal(ringC, semaphore);
442 amdgpu_ring_unlock_commit(ringC);
443
444 for (i = 0; i < 30; ++i) {
445 mdelay(100);
446 sigA = fence_is_signaled(fenceA);
447 sigB = fence_is_signaled(fenceB);
448 if (sigA || sigB)
449 break;
450 }
451
452 if (!sigA && !sigB) {
453 DRM_ERROR("Neither fence A nor B has been signaled\n");
454 goto out_cleanup;
455 } else if (sigA && sigB) {
456 DRM_ERROR("Both fence A and B has been signaled\n");
457 goto out_cleanup;
458 }
459
460 DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
461
462 r = amdgpu_ring_lock(ringC, 64);
463 if (r) {
464 DRM_ERROR("Failed to lock ring B %p\n", ringC);
465 goto out_cleanup;
466 }
467 amdgpu_semaphore_emit_signal(ringC, semaphore);
468 amdgpu_ring_unlock_commit(ringC);
469
470 mdelay(1000);
471
472 r = fence_wait(fenceA, false);
473 if (r) {
474 DRM_ERROR("Failed to wait for sync fence A\n");
475 goto out_cleanup;
476 }
477 r = fence_wait(fenceB, false);
478 if (r) {
479 DRM_ERROR("Failed to wait for sync fence B\n");
480 goto out_cleanup;
481 }
482
483 out_cleanup:
484 amdgpu_semaphore_free(adev, &semaphore, NULL);
485
486 if (fenceA)
487 fence_put(fenceA);
488
489 if (fenceB)
490 fence_put(fenceB);
491
492 if (r)
493 printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
494 }
495
496 static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,
497 struct amdgpu_ring *ringB)
498 {
499 if (ringA == &ringA->adev->vce.ring[0] &&
500 ringB == &ringB->adev->vce.ring[1])
501 return false;
502
503 return true;
504 }
505
506 void amdgpu_test_syncing(struct amdgpu_device *adev)
507 {
508 int i, j, k;
509
510 for (i = 1; i < AMDGPU_MAX_RINGS; ++i) {
511 struct amdgpu_ring *ringA = adev->rings[i];
512 if (!ringA || !ringA->ready)
513 continue;
514
515 for (j = 0; j < i; ++j) {
516 struct amdgpu_ring *ringB = adev->rings[j];
517 if (!ringB || !ringB->ready)
518 continue;
519
520 if (!amdgpu_test_sync_possible(ringA, ringB))
521 continue;
522
523 DRM_INFO("Testing syncing between rings %d and %d...\n", i, j);
524 amdgpu_test_ring_sync(adev, ringA, ringB);
525
526 DRM_INFO("Testing syncing between rings %d and %d...\n", j, i);
527 amdgpu_test_ring_sync(adev, ringB, ringA);
528
529 for (k = 0; k < j; ++k) {
530 struct amdgpu_ring *ringC = adev->rings[k];
531 if (!ringC || !ringC->ready)
532 continue;
533
534 if (!amdgpu_test_sync_possible(ringA, ringC))
535 continue;
536
537 if (!amdgpu_test_sync_possible(ringB, ringC))
538 continue;
539
540 DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k);
541 amdgpu_test_ring_sync2(adev, ringA, ringB, ringC);
542
543 DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, k, j);
544 amdgpu_test_ring_sync2(adev, ringA, ringC, ringB);
545
546 DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, i, k);
547 amdgpu_test_ring_sync2(adev, ringB, ringA, ringC);
548
549 DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, k, i);
550 amdgpu_test_ring_sync2(adev, ringB, ringC, ringA);
551
552 DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, i, j);
553 amdgpu_test_ring_sync2(adev, ringC, ringA, ringB);
554
555 DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, j, i);
556 amdgpu_test_ring_sync2(adev, ringC, ringB, ringA);
557 }
558 }
559 }
560 }
561