1/* 2 * Copyright 2012 Nouveau Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Christoph Bumiller 23 */ 24 25#include "nvc0/nvc0_context.h" 26#include "nvc0/nve4_compute.h" 27 28#include "codegen/nv50_ir_driver.h" 29 30#ifdef DEBUG 31static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *); 32static void gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *); 33#endif 34 35 36int 37nve4_screen_compute_setup(struct nvc0_screen *screen, 38 struct nouveau_pushbuf *push) 39{ 40 struct nouveau_device *dev = screen->base.device; 41 struct nouveau_object *chan = screen->base.channel; 42 int i; 43 int ret; 44 uint32_t obj_class; 45 uint64_t address; 46 47 switch (dev->chipset & ~0xf) { 48 case 0x100: 49 case 0xf0: 50 obj_class = NVF0_COMPUTE_CLASS; /* GK110 */ 51 break; 52 case 0xe0: 53 obj_class = NVE4_COMPUTE_CLASS; /* GK104 */ 54 break; 55 case 0x110: 56 obj_class = GM107_COMPUTE_CLASS; 57 break; 58 case 0x120: 59 obj_class = GM200_COMPUTE_CLASS; 60 break; 61 case 0x130: 62 obj_class = (dev->chipset == 0x130 || dev->chipset == 0x13b) ? 63 GP100_COMPUTE_CLASS : GP104_COMPUTE_CLASS; 64 break; 65 default: 66 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); 67 return -1; 68 } 69 70 ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0, 71 &screen->compute); 72 if (ret) { 73 NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); 74 return ret; 75 } 76 77 BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); 78 PUSH_DATA (push, screen->compute->oclass); 79 80 BEGIN_NVC0(push, NVE4_CP(TEMP_ADDRESS_HIGH), 2); 81 PUSH_DATAh(push, screen->tls->offset); 82 PUSH_DATA (push, screen->tls->offset); 83 /* No idea why there are 2. Divide size by 2 to be safe. 84 * Actually this might be per-MP TEMP size and looks like I'm only using 85 * 2 MPs instead of all 8. 86 */ 87 BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(0)), 3); 88 PUSH_DATAh(push, screen->tls->size / screen->mp_count); 89 PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); 90 PUSH_DATA (push, 0xff); 91 BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3); 92 PUSH_DATAh(push, screen->tls->size / screen->mp_count); 93 PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); 94 PUSH_DATA (push, 0xff); 95 96 /* Unified address space ? Who needs that ? Certainly not OpenCL. 97 * 98 * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be 99 * accessible. We cannot prevent that at the moment, so expect failure. 100 */ 101 BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1); 102 PUSH_DATA (push, 0xff << 24); 103 BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1); 104 PUSH_DATA (push, 0xfe << 24); 105 106 BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2); 107 PUSH_DATAh(push, screen->text->offset); 108 PUSH_DATA (push, screen->text->offset); 109 110 BEGIN_NVC0(push, SUBC_CP(0x0310), 1); 111 PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300); 112 113 /* NOTE: these do not affect the state used by the 3D object */ 114 BEGIN_NVC0(push, NVE4_CP(TIC_ADDRESS_HIGH), 3); 115 PUSH_DATAh(push, screen->txc->offset); 116 PUSH_DATA (push, screen->txc->offset); 117 PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); 118 BEGIN_NVC0(push, NVE4_CP(TSC_ADDRESS_HIGH), 3); 119 PUSH_DATAh(push, screen->txc->offset + 65536); 120 PUSH_DATA (push, screen->txc->offset + 65536); 121 PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); 122 123 if (obj_class >= NVF0_COMPUTE_CLASS) { 124 /* The blob calls GK110_COMPUTE.FIRMWARE[0x6], along with the args (0x1) 125 * passed with GK110_COMPUTE.GRAPH.SCRATCH[0x2]. This is currently 126 * disabled because our firmware doesn't support these commands and the 127 * GPU hangs if they are used. */ 128 BEGIN_NIC0(push, SUBC_CP(0x0248), 64); 129 for (i = 63; i >= 0; i--) 130 PUSH_DATA(push, 0x38000 | i); 131 IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0); 132 } 133 134 BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1); 135 PUSH_DATA (push, 7); /* does not interfere with 3D */ 136 137 /* Disabling this UNK command avoid a read fault when using texelFetch() 138 * from a compute shader for weird reasons. 139 if (obj_class == NVF0_COMPUTE_CLASS) 140 IMMED_NVC0(push, SUBC_CP(0x02c4), 1); 141 */ 142 143 address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5); 144 145 /* MS sample coordinate offsets: these do not work with _ALT modes ! */ 146 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 147 PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO); 148 PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO); 149 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 150 PUSH_DATA (push, 64); 151 PUSH_DATA (push, 1); 152 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17); 153 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 154 PUSH_DATA (push, 0); /* 0 */ 155 PUSH_DATA (push, 0); 156 PUSH_DATA (push, 1); /* 1 */ 157 PUSH_DATA (push, 0); 158 PUSH_DATA (push, 0); /* 2 */ 159 PUSH_DATA (push, 1); 160 PUSH_DATA (push, 1); /* 3 */ 161 PUSH_DATA (push, 1); 162 PUSH_DATA (push, 2); /* 4 */ 163 PUSH_DATA (push, 0); 164 PUSH_DATA (push, 3); /* 5 */ 165 PUSH_DATA (push, 0); 166 PUSH_DATA (push, 2); /* 6 */ 167 PUSH_DATA (push, 1); 168 PUSH_DATA (push, 3); /* 7 */ 169 PUSH_DATA (push, 1); 170 171#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER 172 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 173 PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); 174 PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); 175 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 176 PUSH_DATA (push, 28); 177 PUSH_DATA (push, 1); 178 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 8); 179 PUSH_DATA (push, 1); 180 PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); 181 PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); 182 PUSH_DATA (push, screen->tls->offset); 183 PUSH_DATAh(push, screen->tls->offset); 184 PUSH_DATA (push, screen->tls->size / 2); /* MP TEMP block size */ 185 PUSH_DATA (push, screen->tls->size / 2 / 64); /* warp TEMP block size */ 186 PUSH_DATA (push, 0); /* warp cfstack size */ 187#endif 188 189 BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); 190 PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); 191 192 return 0; 193} 194 195static void 196gm107_compute_validate_surfaces(struct nvc0_context *nvc0, 197 struct pipe_image_view *view, int slot) 198{ 199 struct nv04_resource *res = nv04_resource(view->resource); 200 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 201 struct nvc0_screen *screen = nvc0->screen; 202 struct nouveau_bo *txc = nvc0->screen->txc; 203 struct nv50_tic_entry *tic; 204 uint64_t address; 205 const int s = 5; 206 207 tic = nv50_tic_entry(nvc0->images_tic[s][slot]); 208 209 res = nv04_resource(tic->pipe.texture); 210 nvc0_update_tic(nvc0, tic, res); 211 212 if (tic->id < 0) { 213 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); 214 215 /* upload the texture view */ 216 PUSH_SPACE(push, 16); 217 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 218 PUSH_DATAh(push, txc->offset + (tic->id * 32)); 219 PUSH_DATA (push, txc->offset + (tic->id * 32)); 220 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 221 PUSH_DATA (push, 32); 222 PUSH_DATA (push, 1); 223 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9); 224 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 225 PUSH_DATAp(push, &tic->tic[0], 8); 226 227 BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), 1); 228 PUSH_DATA (push, (tic->id << 4) | 1); 229 } else 230 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { 231 BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), 1); 232 PUSH_DATA (push, (tic->id << 4) | 1); 233 } 234 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); 235 236 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; 237 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; 238 239 BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD); 240 241 address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); 242 243 /* upload the texture handle */ 244 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 245 PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(slot + 32)); 246 PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(slot + 32)); 247 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 248 PUSH_DATA (push, 4); 249 PUSH_DATA (push, 0x1); 250 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 2); 251 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 252 PUSH_DATA (push, tic->id); 253 254 BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); 255 PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); 256} 257 258static void 259nve4_compute_validate_surfaces(struct nvc0_context *nvc0) 260{ 261 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 262 uint64_t address; 263 const int s = 5; 264 int i, j; 265 266 if (!nvc0->images_dirty[s]) 267 return; 268 269 address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); 270 271 for (i = 0; i < NVC0_MAX_IMAGES; ++i) { 272 struct pipe_image_view *view = &nvc0->images[s][i]; 273 274 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 275 PUSH_DATAh(push, address + NVC0_CB_AUX_SU_INFO(i)); 276 PUSH_DATA (push, address + NVC0_CB_AUX_SU_INFO(i)); 277 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 278 PUSH_DATA (push, 16 * 4); 279 PUSH_DATA (push, 0x1); 280 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 16); 281 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 282 283 if (view->resource) { 284 struct nv04_resource *res = nv04_resource(view->resource); 285 286 if (res->base.target == PIPE_BUFFER) { 287 if (view->access & PIPE_IMAGE_ACCESS_WRITE) 288 nvc0_mark_image_range_valid(view); 289 } 290 291 nve4_set_surface_info(push, view, nvc0); 292 BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR); 293 294 if (nvc0->screen->base.class_3d >= GM107_3D_CLASS) 295 gm107_compute_validate_surfaces(nvc0, view, i); 296 } else { 297 for (j = 0; j < 16; j++) 298 PUSH_DATA(push, 0); 299 } 300 } 301} 302 303/* Thankfully, textures with samplers follow the normal rules. */ 304static void 305nve4_compute_validate_samplers(struct nvc0_context *nvc0) 306{ 307 bool need_flush = nve4_validate_tsc(nvc0, 5); 308 if (need_flush) { 309 BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1); 310 PUSH_DATA (nvc0->base.pushbuf, 0); 311 } 312 313 /* Invalidate all 3D samplers because they are aliased. */ 314 for (int s = 0; s < 5; s++) 315 nvc0->samplers_dirty[s] = ~0; 316 nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS; 317} 318 319/* (Code duplicated at bottom for various non-convincing reasons. 320 * E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC 321 * entries to avoid a subchannel switch. 322 * Same for texture cache flushes. 323 * Also, the bufctx differs, and more IFs in the 3D version looks ugly.) 324 */ 325static void nve4_compute_validate_textures(struct nvc0_context *); 326 327static void 328nve4_compute_set_tex_handles(struct nvc0_context *nvc0) 329{ 330 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 331 struct nvc0_screen *screen = nvc0->screen; 332 uint64_t address; 333 const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE); 334 unsigned i, n; 335 uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; 336 337 if (!dirty) 338 return; 339 i = ffs(dirty) - 1; 340 n = util_logbase2(dirty) + 1 - i; 341 assert(n); 342 343 address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); 344 345 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 346 PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i)); 347 PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i)); 348 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 349 PUSH_DATA (push, n * 4); 350 PUSH_DATA (push, 0x1); 351 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + n); 352 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 353 PUSH_DATAp(push, &nvc0->tex_handles[s][i], n); 354 355 BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); 356 PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); 357 358 nvc0->textures_dirty[s] = 0; 359 nvc0->samplers_dirty[s] = 0; 360} 361 362static void 363nve4_compute_validate_constbufs(struct nvc0_context *nvc0) 364{ 365 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 366 const int s = 5; 367 368 while (nvc0->constbuf_dirty[s]) { 369 int i = ffs(nvc0->constbuf_dirty[s]) - 1; 370 nvc0->constbuf_dirty[s] &= ~(1 << i); 371 372 if (nvc0->constbuf[s][i].user) { 373 struct nouveau_bo *bo = nvc0->screen->uniform_bo; 374 const unsigned base = NVC0_CB_USR_INFO(s); 375 const unsigned size = nvc0->constbuf[s][0].size; 376 assert(i == 0); /* we really only want OpenGL uniforms here */ 377 assert(nvc0->constbuf[s][0].u.data); 378 379 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 380 PUSH_DATAh(push, bo->offset + base); 381 PUSH_DATA (push, bo->offset + base); 382 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 383 PUSH_DATA (push, size); 384 PUSH_DATA (push, 0x1); 385 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (size / 4)); 386 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 387 PUSH_DATAp(push, nvc0->constbuf[s][0].u.data, size / 4); 388 } 389 else { 390 struct nv04_resource *res = 391 nv04_resource(nvc0->constbuf[s][i].u.buf); 392 if (res) { 393 uint64_t address 394 = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); 395 396 /* constbufs above 0 will are fetched via ubo info in the shader */ 397 if (i > 0) { 398 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 399 PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1)); 400 PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1)); 401 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 402 PUSH_DATA (push, 4 * 4); 403 PUSH_DATA (push, 0x1); 404 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4); 405 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 406 407 PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); 408 PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); 409 PUSH_DATA (push, nvc0->constbuf[s][i].size); 410 PUSH_DATA (push, 0); 411 } 412 413 BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); 414 res->cb_bindings[s] |= 1 << i; 415 } 416 } 417 } 418 419 BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); 420 PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); 421} 422 423static void 424nve4_compute_validate_buffers(struct nvc0_context *nvc0) 425{ 426 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 427 uint64_t address; 428 const int s = 5; 429 int i; 430 431 address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); 432 433 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 434 PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(0)); 435 PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(0)); 436 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 437 PUSH_DATA (push, 4 * NVC0_MAX_BUFFERS * 4); 438 PUSH_DATA (push, 0x1); 439 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4 * NVC0_MAX_BUFFERS); 440 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 441 442 for (i = 0; i < NVC0_MAX_BUFFERS; i++) { 443 if (nvc0->buffers[s][i].buffer) { 444 struct nv04_resource *res = 445 nv04_resource(nvc0->buffers[s][i].buffer); 446 PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); 447 PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); 448 PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); 449 PUSH_DATA (push, 0); 450 BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR); 451 util_range_add(&res->valid_buffer_range, 452 nvc0->buffers[s][i].buffer_offset, 453 nvc0->buffers[s][i].buffer_offset + 454 nvc0->buffers[s][i].buffer_size); 455 } else { 456 PUSH_DATA (push, 0); 457 PUSH_DATA (push, 0); 458 PUSH_DATA (push, 0); 459 PUSH_DATA (push, 0); 460 } 461 } 462} 463 464static struct nvc0_state_validate 465validate_list_cp[] = { 466 { nvc0_compprog_validate, NVC0_NEW_CP_PROGRAM }, 467 { nve4_compute_validate_textures, NVC0_NEW_CP_TEXTURES }, 468 { nve4_compute_validate_samplers, NVC0_NEW_CP_SAMPLERS }, 469 { nve4_compute_set_tex_handles, NVC0_NEW_CP_TEXTURES | 470 NVC0_NEW_CP_SAMPLERS }, 471 { nve4_compute_validate_surfaces, NVC0_NEW_CP_SURFACES }, 472 { nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS }, 473 { nve4_compute_validate_buffers, NVC0_NEW_CP_BUFFERS }, 474 { nve4_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF }, 475}; 476 477static bool 478nve4_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask) 479{ 480 bool ret; 481 482 ret = nvc0_state_validate(nvc0, mask, validate_list_cp, 483 ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp, 484 nvc0->bufctx_cp); 485 486 if (unlikely(nvc0->state.flushed)) 487 nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true); 488 return ret; 489} 490 491static void 492nve4_compute_upload_input(struct nvc0_context *nvc0, 493 const struct pipe_grid_info *info) 494{ 495 struct nvc0_screen *screen = nvc0->screen; 496 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 497 struct nvc0_program *cp = nvc0->compprog; 498 uint64_t address; 499 500 address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5); 501 502 if (cp->parm_size) { 503 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 504 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_USR_INFO(5)); 505 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_USR_INFO(5)); 506 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 507 PUSH_DATA (push, cp->parm_size); 508 PUSH_DATA (push, 0x1); 509 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (cp->parm_size / 4)); 510 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 511 PUSH_DATAp(push, info->input, cp->parm_size / 4); 512 } 513 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 514 PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO(0)); 515 PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO(0)); 516 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 517 PUSH_DATA (push, 8 * 4); 518 PUSH_DATA (push, 0x1); 519 520 if (unlikely(info->indirect)) { 521 struct nv04_resource *res = nv04_resource(info->indirect); 522 uint32_t offset = res->offset + info->indirect_offset; 523 524 nouveau_pushbuf_space(push, 32, 0, 1); 525 PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); 526 527 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8); 528 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 529 PUSH_DATAp(push, info->block, 3); 530 nouveau_pushbuf_data(push, res->bo, offset, 531 NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); 532 } else { 533 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8); 534 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 535 PUSH_DATAp(push, info->block, 3); 536 PUSH_DATAp(push, info->grid, 3); 537 } 538 PUSH_DATA (push, 0); 539 PUSH_DATA (push, info->work_dim); 540 541 BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); 542 PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); 543} 544 545static inline uint8_t 546nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size) 547{ 548 if (shared_size > (32 << 10)) 549 return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1; 550 if (shared_size > (16 << 10)) 551 return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1; 552 return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1; 553} 554 555static void 556nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc) 557{ 558 // only user constant buffers 0-6 can be put in the descriptor, the rest are 559 // loaded through global memory 560 for (int i = 0; i <= 6; i++) { 561 if (nvc0->constbuf[5][i].user || !nvc0->constbuf[5][i].u.buf) 562 continue; 563 564 struct nv04_resource *res = 565 nv04_resource(nvc0->constbuf[5][i].u.buf); 566 567 uint32_t base = res->offset + nvc0->constbuf[5][i].offset; 568 uint32_t size = nvc0->constbuf[5][i].size; 569 if (gp100) 570 gp100_cp_launch_desc_set_cb(desc, i, res->bo, base, size); 571 else 572 nve4_cp_launch_desc_set_cb(desc, i, res->bo, base, size); 573 } 574 575 // there is no need to do FLUSH(NVE4_COMPUTE_FLUSH_CB) because 576 // nve4_compute_upload_input() does it later 577} 578 579static void 580nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, 581 struct nve4_cp_launch_desc *desc, 582 const struct pipe_grid_info *info) 583{ 584 const struct nvc0_screen *screen = nvc0->screen; 585 const struct nvc0_program *cp = nvc0->compprog; 586 587 nve4_cp_launch_desc_init_default(desc); 588 589 desc->entry = nvc0_program_symbol_offset(cp, info->pc); 590 591 desc->griddim_x = info->grid[0]; 592 desc->griddim_y = info->grid[1]; 593 desc->griddim_z = info->grid[2]; 594 desc->blockdim_x = info->block[0]; 595 desc->blockdim_y = info->block[1]; 596 desc->blockdim_z = info->block[2]; 597 598 desc->shared_size = align(cp->cp.smem_size, 0x100); 599 desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10); 600 desc->local_size_n = 0; 601 desc->cstack_size = 0x800; 602 desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size); 603 604 desc->gpr_alloc = cp->num_gprs; 605 desc->bar_alloc = cp->num_barriers; 606 607 // Only bind user uniforms and the driver constant buffer through the 608 // launch descriptor because UBOs are sticked to the driver cb to avoid the 609 // limitation of 8 CBs. 610 if (nvc0->constbuf[5][0].user || cp->parm_size) { 611 nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, 612 NVC0_CB_USR_INFO(5), 1 << 16); 613 614 // Later logic will attempt to bind a real buffer at position 0. That 615 // should not happen if we've bound a user buffer. 616 assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); 617 } 618 nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, 619 NVC0_CB_AUX_INFO(5), 1 << 11); 620 621 nve4_compute_setup_buf_cb(nvc0, false, desc); 622} 623 624static void 625gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, 626 struct gp100_cp_launch_desc *desc, 627 const struct pipe_grid_info *info) 628{ 629 const struct nvc0_screen *screen = nvc0->screen; 630 const struct nvc0_program *cp = nvc0->compprog; 631 632 gp100_cp_launch_desc_init_default(desc); 633 634 desc->entry = nvc0_program_symbol_offset(cp, info->pc); 635 636 desc->griddim_x = info->grid[0]; 637 desc->griddim_y = info->grid[1]; 638 desc->griddim_z = info->grid[2]; 639 desc->blockdim_x = info->block[0]; 640 desc->blockdim_y = info->block[1]; 641 desc->blockdim_z = info->block[2]; 642 643 desc->shared_size = align(cp->cp.smem_size, 0x100); 644 desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10); 645 desc->local_size_n = 0; 646 desc->cstack_size = 0x800; 647 648 desc->gpr_alloc = cp->num_gprs; 649 desc->bar_alloc = cp->num_barriers; 650 651 // Only bind user uniforms and the driver constant buffer through the 652 // launch descriptor because UBOs are sticked to the driver cb to avoid the 653 // limitation of 8 CBs. 654 if (nvc0->constbuf[5][0].user || cp->parm_size) { 655 gp100_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, 656 NVC0_CB_USR_INFO(5), 1 << 16); 657 658 // Later logic will attempt to bind a real buffer at position 0. That 659 // should not happen if we've bound a user buffer. 660 assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); 661 } 662 gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, 663 NVC0_CB_AUX_INFO(5), 1 << 11); 664 665 nve4_compute_setup_buf_cb(nvc0, true, desc); 666} 667 668static inline void * 669nve4_compute_alloc_launch_desc(struct nouveau_context *nv, 670 struct nouveau_bo **pbo, uint64_t *pgpuaddr) 671{ 672 uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo); 673 if (!ptr) 674 return NULL; 675 if (*pgpuaddr & 255) { 676 unsigned adj = 256 - (*pgpuaddr & 255); 677 ptr += adj; 678 *pgpuaddr += adj; 679 } 680 return ptr; 681} 682 683static void 684nve4_upload_indirect_desc(struct nouveau_pushbuf *push, 685 struct nv04_resource *res, uint64_t gpuaddr, 686 uint32_t length, uint32_t bo_offset) 687{ 688 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 689 PUSH_DATAh(push, gpuaddr); 690 PUSH_DATA (push, gpuaddr); 691 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 692 PUSH_DATA (push, length); 693 PUSH_DATA (push, 1); 694 695 nouveau_pushbuf_space(push, 32, 0, 1); 696 PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); 697 698 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (length / 4)); 699 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1)); 700 nouveau_pushbuf_data(push, res->bo, bo_offset, 701 NVC0_IB_ENTRY_1_NO_PREFETCH | length); 702} 703 704void 705nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) 706{ 707 struct nvc0_context *nvc0 = nvc0_context(pipe); 708 struct nvc0_screen *screen = nvc0->screen; 709 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 710 void *desc; 711 uint64_t desc_gpuaddr; 712 struct nouveau_bo *desc_bo; 713 int ret; 714 715 desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr); 716 if (!desc) { 717 ret = -1; 718 goto out; 719 } 720 BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD, 721 desc_bo); 722 723 list_for_each_entry(struct nvc0_resident, resident, &nvc0->tex_head, list) { 724 nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf, 725 resident->flags); 726 } 727 728 list_for_each_entry(struct nvc0_resident, resident, &nvc0->img_head, list) { 729 nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf, 730 resident->flags); 731 } 732 733 ret = !nve4_state_validate_cp(nvc0, ~0); 734 if (ret) 735 goto out; 736 737 if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) 738 gp100_compute_setup_launch_desc(nvc0, desc, info); 739 else 740 nve4_compute_setup_launch_desc(nvc0, desc, info); 741 742 nve4_compute_upload_input(nvc0, info); 743 744#ifdef DEBUG 745 if (debug_get_num_option("NV50_PROG_DEBUG", 0)) { 746 if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) 747 gp100_compute_dump_launch_desc(desc); 748 else 749 nve4_compute_dump_launch_desc(desc); 750 } 751#endif 752 753 if (unlikely(info->indirect)) { 754 struct nv04_resource *res = nv04_resource(info->indirect); 755 uint32_t offset = res->offset + info->indirect_offset; 756 757 /* upload the descriptor */ 758 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 759 PUSH_DATAh(push, desc_gpuaddr); 760 PUSH_DATA (push, desc_gpuaddr); 761 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 762 PUSH_DATA (push, 256); 763 PUSH_DATA (push, 1); 764 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4)); 765 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1)); 766 PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4); 767 768 if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) { 769 nve4_upload_indirect_desc(push, res, desc_gpuaddr + 48, 12, offset); 770 } else { 771 /* overwrite griddim_x and griddim_y as two 32-bits integers even 772 * if griddim_y must be a 16-bits integer */ 773 nve4_upload_indirect_desc(push, res, desc_gpuaddr + 48, 8, offset); 774 775 /* overwrite the 16 high bits of griddim_y with griddim_z because 776 * we need (z << 16) | x */ 777 nve4_upload_indirect_desc(push, res, desc_gpuaddr + 54, 4, offset + 8); 778 } 779 } 780 781 /* upload descriptor and flush */ 782 nouveau_pushbuf_space(push, 32, 1, 0); 783 PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD); 784 BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1); 785 PUSH_DATA (push, desc_gpuaddr >> 8); 786 BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1); 787 PUSH_DATA (push, 0x3); 788 BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); 789 PUSH_DATA (push, 0); 790 791 nvc0_update_compute_invocations_counter(nvc0, info); 792 793out: 794 if (ret) 795 NOUVEAU_ERR("Failed to launch grid !\n"); 796 nouveau_scratch_done(&nvc0->base); 797 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC); 798 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS); 799} 800 801 802#define NVE4_TIC_ENTRY_INVALID 0x000fffff 803 804static void 805nve4_compute_validate_textures(struct nvc0_context *nvc0) 806{ 807 struct nouveau_bo *txc = nvc0->screen->txc; 808 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 809 const unsigned s = 5; 810 unsigned i; 811 uint32_t commands[2][32]; 812 unsigned n[2] = { 0, 0 }; 813 814 for (i = 0; i < nvc0->num_textures[s]; ++i) { 815 struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); 816 struct nv04_resource *res; 817 const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i)); 818 819 if (!tic) { 820 nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; 821 continue; 822 } 823 res = nv04_resource(tic->pipe.texture); 824 nvc0_update_tic(nvc0, tic, res); 825 826 if (tic->id < 0) { 827 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); 828 829 PUSH_SPACE(push, 16); 830 BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); 831 PUSH_DATAh(push, txc->offset + (tic->id * 32)); 832 PUSH_DATA (push, txc->offset + (tic->id * 32)); 833 BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); 834 PUSH_DATA (push, 32); 835 PUSH_DATA (push, 1); 836 BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9); 837 PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); 838 PUSH_DATAp(push, &tic->tic[0], 8); 839 840 commands[0][n[0]++] = (tic->id << 4) | 1; 841 } else 842 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { 843 commands[1][n[1]++] = (tic->id << 4) | 1; 844 } 845 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); 846 847 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; 848 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; 849 850 nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; 851 nvc0->tex_handles[s][i] |= tic->id; 852 if (dirty) 853 BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD); 854 } 855 for (; i < nvc0->state.num_textures[s]; ++i) { 856 nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; 857 nvc0->textures_dirty[s] |= 1 << i; 858 } 859 860 if (n[0]) { 861 BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]); 862 PUSH_DATAp(push, commands[0], n[0]); 863 } 864 if (n[1]) { 865 BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]); 866 PUSH_DATAp(push, commands[1], n[1]); 867 } 868 869 nvc0->state.num_textures[s] = nvc0->num_textures[s]; 870 871 /* Invalidate all 3D textures because they are aliased. */ 872 for (int s = 0; s < 5; s++) { 873 for (int i = 0; i < nvc0->num_textures[s]; i++) 874 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); 875 nvc0->textures_dirty[s] = ~0; 876 } 877 nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; 878} 879 880 881#ifdef DEBUG 882static const char *nve4_cache_split_name(unsigned value) 883{ 884 switch (value) { 885 case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1"; 886 case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1"; 887 case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1"; 888 default: 889 return "(invalid)"; 890 } 891} 892 893static void 894nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc) 895{ 896 const uint32_t *data = (const uint32_t *)desc; 897 unsigned i; 898 bool zero = false; 899 900 debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n"); 901 902 for (i = 0; i < sizeof(*desc); i += 4) { 903 if (data[i / 4]) { 904 debug_printf("[%x]: 0x%08x\n", i, data[i / 4]); 905 zero = false; 906 } else 907 if (!zero) { 908 debug_printf("...\n"); 909 zero = true; 910 } 911 } 912 913 debug_printf("entry = 0x%x\n", desc->entry); 914 debug_printf("grid dimensions = %ux%ux%u\n", 915 desc->griddim_x, desc->griddim_y, desc->griddim_z); 916 debug_printf("block dimensions = %ux%ux%u\n", 917 desc->blockdim_x, desc->blockdim_y, desc->blockdim_z); 918 debug_printf("s[] size: 0x%x\n", desc->shared_size); 919 debug_printf("l[] size: -0x%x / +0x%x\n", 920 desc->local_size_n, desc->local_size_p); 921 debug_printf("stack size: 0x%x\n", desc->cstack_size); 922 debug_printf("barrier count: %u\n", desc->bar_alloc); 923 debug_printf("$r count: %u\n", desc->gpr_alloc); 924 debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split)); 925 debug_printf("linked tsc: %d\n", desc->linked_tsc); 926 927 for (i = 0; i < 8; ++i) { 928 uint64_t address; 929 uint32_t size = desc->cb[i].size; 930 bool valid = !!(desc->cb_mask & (1 << i)); 931 932 address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l; 933 934 if (!valid && !address && !size) 935 continue; 936 debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n", 937 i, address, size, valid ? "" : " (invalid)"); 938 } 939} 940 941static void 942gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *desc) 943{ 944 const uint32_t *data = (const uint32_t *)desc; 945 unsigned i; 946 bool zero = false; 947 948 debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n"); 949 950 for (i = 0; i < sizeof(*desc); i += 4) { 951 if (data[i / 4]) { 952 debug_printf("[%x]: 0x%08x\n", i, data[i / 4]); 953 zero = false; 954 } else 955 if (!zero) { 956 debug_printf("...\n"); 957 zero = true; 958 } 959 } 960 961 debug_printf("entry = 0x%x\n", desc->entry); 962 debug_printf("grid dimensions = %ux%ux%u\n", 963 desc->griddim_x, desc->griddim_y, desc->griddim_z); 964 debug_printf("block dimensions = %ux%ux%u\n", 965 desc->blockdim_x, desc->blockdim_y, desc->blockdim_z); 966 debug_printf("s[] size: 0x%x\n", desc->shared_size); 967 debug_printf("l[] size: -0x%x / +0x%x\n", 968 desc->local_size_n, desc->local_size_p); 969 debug_printf("stack size: 0x%x\n", desc->cstack_size); 970 debug_printf("barrier count: %u\n", desc->bar_alloc); 971 debug_printf("$r count: %u\n", desc->gpr_alloc); 972 debug_printf("linked tsc: %d\n", desc->linked_tsc); 973 974 for (i = 0; i < 8; ++i) { 975 uint64_t address; 976 uint32_t size = desc->cb[i].size_sh4 << 4; 977 bool valid = !!(desc->cb_mask & (1 << i)); 978 979 address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l; 980 981 if (!valid && !address && !size) 982 continue; 983 debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n", 984 i, address, size, valid ? "" : " (invalid)"); 985 } 986} 987#endif 988 989#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER 990static void 991nve4_compute_trap_info(struct nvc0_context *nvc0) 992{ 993 struct nvc0_screen *screen = nvc0->screen; 994 struct nouveau_bo *bo = screen->parm; 995 int ret, i; 996 volatile struct nve4_mp_trap_info *info; 997 uint8_t *map; 998 999 ret = nouveau_bo_map(bo, NOUVEAU_BO_RDWR, nvc0->base.client); 1000 if (ret) 1001 return; 1002 map = (uint8_t *)bo->map; 1003 info = (volatile struct nve4_mp_trap_info *)(map + NVE4_CP_PARAM_TRAP_INFO); 1004 1005 if (info->lock) { 1006 debug_printf("trapstat = %08x\n", info->trapstat); 1007 debug_printf("warperr = %08x\n", info->warperr); 1008 debug_printf("PC = %x\n", info->pc); 1009 debug_printf("tid = %u %u %u\n", 1010 info->tid[0], info->tid[1], info->tid[2]); 1011 debug_printf("ctaid = %u %u %u\n", 1012 info->ctaid[0], info->ctaid[1], info->ctaid[2]); 1013 for (i = 0; i <= 63; ++i) 1014 debug_printf("$r%i = %08x\n", i, info->r[i]); 1015 for (i = 0; i <= 6; ++i) 1016 debug_printf("$p%i = %i\n", i, (info->flags >> i) & 1); 1017 debug_printf("$c = %x\n", info->flags >> 12); 1018 } 1019 info->lock = 0; 1020} 1021#endif 1022