1/* 2 * Copyright 2011 Christoph Bumiller 3 * Copyright 2015 Samuel Pitoiset 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING 25 26#include "nvc0/nvc0_context.h" 27#include "nvc0/nvc0_query_hw.h" 28#include "nvc0/nvc0_query_hw_metric.h" 29#include "nvc0/nvc0_query_hw_sm.h" 30 31#define NVC0_HW_QUERY_ALLOC_SPACE 256 32 33bool 34nvc0_hw_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, 35 int size) 36{ 37 struct nvc0_hw_query *hq = nvc0_hw_query(q); 38 struct nvc0_screen *screen = nvc0->screen; 39 int ret; 40 41 if (hq->bo) { 42 nouveau_bo_ref(NULL, &hq->bo); 43 if (hq->mm) { 44 if (hq->state == NVC0_HW_QUERY_STATE_READY) 45 nouveau_mm_free(hq->mm); 46 else 47 nouveau_fence_work(screen->base.fence.current, 48 nouveau_mm_free_work, hq->mm); 49 } 50 } 51 if (size) { 52 hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &hq->bo, 53 &hq->base_offset); 54 if (!hq->bo) 55 return false; 56 hq->offset = hq->base_offset; 57 58 ret = nouveau_bo_map(hq->bo, 0, screen->base.client); 59 if (ret) { 60 nvc0_hw_query_allocate(nvc0, q, 0); 61 return false; 62 } 63 hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset); 64 } 65 return true; 66} 67 68static void 69nvc0_hw_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q, 70 unsigned offset, uint32_t get) 71{ 72 struct nvc0_hw_query *hq = nvc0_hw_query(q); 73 74 offset += hq->offset; 75 76 PUSH_SPACE(push, 5); 77 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); 78 BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4); 79 PUSH_DATAh(push, hq->bo->offset + offset); 80 PUSH_DATA (push, hq->bo->offset + offset); 81 PUSH_DATA (push, hq->sequence); 82 PUSH_DATA (push, get); 83} 84 85static void 86nvc0_hw_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q) 87{ 88 struct nvc0_hw_query *hq = nvc0_hw_query(q); 89 90 hq->offset += hq->rotate; 91 hq->data += hq->rotate / sizeof(*hq->data); 92 if (hq->offset - hq->base_offset == NVC0_HW_QUERY_ALLOC_SPACE) 93 nvc0_hw_query_allocate(nvc0, q, NVC0_HW_QUERY_ALLOC_SPACE); 94} 95 96static inline void 97nvc0_hw_query_update(struct nouveau_client *cli, struct nvc0_query *q) 98{ 99 struct nvc0_hw_query *hq = nvc0_hw_query(q); 100 101 if (hq->is64bit) { 102 if (nouveau_fence_signalled(hq->fence)) 103 hq->state = NVC0_HW_QUERY_STATE_READY; 104 } else { 105 if (hq->data[0] == hq->sequence) 106 hq->state = NVC0_HW_QUERY_STATE_READY; 107 } 108} 109 110static void 111nvc0_hw_destroy_query(struct nvc0_context *nvc0, struct nvc0_query *q) 112{ 113 struct nvc0_hw_query *hq = nvc0_hw_query(q); 114 115 if (hq->funcs && hq->funcs->destroy_query) { 116 hq->funcs->destroy_query(nvc0, hq); 117 return; 118 } 119 120 nvc0_hw_query_allocate(nvc0, q, 0); 121 nouveau_fence_ref(NULL, &hq->fence); 122 FREE(hq); 123} 124 125static void 126nvc0_hw_query_write_compute_invocations(struct nvc0_context *nvc0, 127 struct nvc0_hw_query *hq, 128 uint32_t offset) 129{ 130 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 131 132 nouveau_pushbuf_space(push, 16, 0, 8); 133 PUSH_REFN(push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); 134 BEGIN_1IC0(push, NVC0_3D(MACRO_COMPUTE_COUNTER_TO_QUERY), 4); 135 PUSH_DATA (push, nvc0->compute_invocations); 136 PUSH_DATAh(push, nvc0->compute_invocations); 137 PUSH_DATAh(push, hq->bo->offset + hq->offset + offset); 138 PUSH_DATA (push, hq->bo->offset + hq->offset + offset); 139} 140 141static bool 142nvc0_hw_begin_query(struct nvc0_context *nvc0, struct nvc0_query *q) 143{ 144 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 145 struct nvc0_hw_query *hq = nvc0_hw_query(q); 146 bool ret = true; 147 148 if (hq->funcs && hq->funcs->begin_query) 149 return hq->funcs->begin_query(nvc0, hq); 150 151 /* For occlusion queries we have to change the storage, because a previous 152 * query might set the initial render condition to false even *after* we re- 153 * initialized it to true. 154 */ 155 if (hq->rotate) { 156 nvc0_hw_query_rotate(nvc0, q); 157 158 /* XXX: can we do this with the GPU, and sync with respect to a previous 159 * query ? 160 */ 161 hq->data[0] = hq->sequence; /* initialize sequence */ 162 hq->data[1] = 1; /* initial render condition = true */ 163 hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */ 164 hq->data[5] = 0; 165 } 166 hq->sequence++; 167 168 switch (q->type) { 169 case PIPE_QUERY_OCCLUSION_COUNTER: 170 case PIPE_QUERY_OCCLUSION_PREDICATE: 171 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 172 if (nvc0->screen->num_occlusion_queries_active++) { 173 nvc0_hw_query_get(push, q, 0x10, 0x0100f002); 174 } else { 175 PUSH_SPACE(push, 3); 176 BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1); 177 PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT); 178 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1); 179 /* Given that the counter is reset, the contents at 0x10 are 180 * equivalent to doing the query -- we would get hq->sequence as the 181 * payload and 0 as the reported value. This is already set up above 182 * as in the hq->rotate case. 183 */ 184 } 185 break; 186 case PIPE_QUERY_PRIMITIVES_GENERATED: 187 nvc0_hw_query_get(push, q, 0x10, 0x09005002 | (q->index << 5)); 188 break; 189 case PIPE_QUERY_PRIMITIVES_EMITTED: 190 nvc0_hw_query_get(push, q, 0x10, 0x05805002 | (q->index << 5)); 191 break; 192 case PIPE_QUERY_SO_STATISTICS: 193 nvc0_hw_query_get(push, q, 0x20, 0x05805002 | (q->index << 5)); 194 nvc0_hw_query_get(push, q, 0x30, 0x06805002 | (q->index << 5)); 195 break; 196 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 197 nvc0_hw_query_get(push, q, 0x10, 0x03005002 | (q->index << 5)); 198 break; 199 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 200 /* XXX: This get actually writes the number of overflowed streams */ 201 nvc0_hw_query_get(push, q, 0x10, 0x0f005002); 202 break; 203 case PIPE_QUERY_TIME_ELAPSED: 204 nvc0_hw_query_get(push, q, 0x10, 0x00005002); 205 break; 206 case PIPE_QUERY_PIPELINE_STATISTICS: 207 nvc0_hw_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */ 208 nvc0_hw_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */ 209 nvc0_hw_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */ 210 nvc0_hw_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */ 211 nvc0_hw_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */ 212 nvc0_hw_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */ 213 nvc0_hw_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */ 214 nvc0_hw_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */ 215 nvc0_hw_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */ 216 nvc0_hw_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */ 217 nvc0_hw_query_write_compute_invocations(nvc0, hq, 0xc0 + 0xa0); 218 break; 219 default: 220 break; 221 } 222 hq->state = NVC0_HW_QUERY_STATE_ACTIVE; 223 return ret; 224} 225 226static void 227nvc0_hw_end_query(struct nvc0_context *nvc0, struct nvc0_query *q) 228{ 229 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 230 struct nvc0_hw_query *hq = nvc0_hw_query(q); 231 232 if (hq->funcs && hq->funcs->end_query) { 233 hq->funcs->end_query(nvc0, hq); 234 return; 235 } 236 237 if (hq->state != NVC0_HW_QUERY_STATE_ACTIVE) { 238 /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */ 239 if (hq->rotate) 240 nvc0_hw_query_rotate(nvc0, q); 241 hq->sequence++; 242 } 243 hq->state = NVC0_HW_QUERY_STATE_ENDED; 244 245 switch (q->type) { 246 case PIPE_QUERY_OCCLUSION_COUNTER: 247 case PIPE_QUERY_OCCLUSION_PREDICATE: 248 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 249 nvc0_hw_query_get(push, q, 0, 0x0100f002); 250 if (--nvc0->screen->num_occlusion_queries_active == 0) { 251 PUSH_SPACE(push, 1); 252 IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0); 253 } 254 break; 255 case PIPE_QUERY_PRIMITIVES_GENERATED: 256 nvc0_hw_query_get(push, q, 0, 0x09005002 | (q->index << 5)); 257 break; 258 case PIPE_QUERY_PRIMITIVES_EMITTED: 259 nvc0_hw_query_get(push, q, 0, 0x05805002 | (q->index << 5)); 260 break; 261 case PIPE_QUERY_SO_STATISTICS: 262 nvc0_hw_query_get(push, q, 0x00, 0x05805002 | (q->index << 5)); 263 nvc0_hw_query_get(push, q, 0x10, 0x06805002 | (q->index << 5)); 264 break; 265 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 266 nvc0_hw_query_get(push, q, 0x00, 0x03005002 | (q->index << 5)); 267 break; 268 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 269 /* XXX: This get actually writes the number of overflowed streams */ 270 nvc0_hw_query_get(push, q, 0x00, 0x0f005002); 271 break; 272 case PIPE_QUERY_TIMESTAMP: 273 case PIPE_QUERY_TIME_ELAPSED: 274 nvc0_hw_query_get(push, q, 0, 0x00005002); 275 break; 276 case PIPE_QUERY_GPU_FINISHED: 277 nvc0_hw_query_get(push, q, 0, 0x1000f010); 278 break; 279 case PIPE_QUERY_PIPELINE_STATISTICS: 280 nvc0_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ 281 nvc0_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ 282 nvc0_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ 283 nvc0_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ 284 nvc0_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ 285 nvc0_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ 286 nvc0_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ 287 nvc0_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ 288 nvc0_hw_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */ 289 nvc0_hw_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */ 290 nvc0_hw_query_write_compute_invocations(nvc0, hq, 0xa0); 291 break; 292 case PIPE_QUERY_TIMESTAMP_DISJOINT: 293 /* This query is not issued on GPU because disjoint is forced to false */ 294 hq->state = NVC0_HW_QUERY_STATE_READY; 295 break; 296 case NVC0_HW_QUERY_TFB_BUFFER_OFFSET: 297 /* indexed by TFB buffer instead of by vertex stream */ 298 nvc0_hw_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5)); 299 break; 300 default: 301 break; 302 } 303 if (hq->is64bit) 304 nouveau_fence_ref(nvc0->screen->base.fence.current, &hq->fence); 305} 306 307static bool 308nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q, 309 bool wait, union pipe_query_result *result) 310{ 311 struct nvc0_hw_query *hq = nvc0_hw_query(q); 312 uint64_t *res64 = (uint64_t*)result; 313 uint32_t *res32 = (uint32_t*)result; 314 uint8_t *res8 = (uint8_t*)result; 315 uint64_t *data64 = (uint64_t *)hq->data; 316 unsigned i; 317 318 if (hq->funcs && hq->funcs->get_query_result) 319 return hq->funcs->get_query_result(nvc0, hq, wait, result); 320 321 if (hq->state != NVC0_HW_QUERY_STATE_READY) 322 nvc0_hw_query_update(nvc0->screen->base.client, q); 323 324 if (hq->state != NVC0_HW_QUERY_STATE_READY) { 325 if (!wait) { 326 if (hq->state != NVC0_HW_QUERY_STATE_FLUSHED) { 327 hq->state = NVC0_HW_QUERY_STATE_FLUSHED; 328 /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */ 329 PUSH_KICK(nvc0->base.pushbuf); 330 } 331 return false; 332 } 333 if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->screen->base.client)) 334 return false; 335 NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1); 336 } 337 hq->state = NVC0_HW_QUERY_STATE_READY; 338 339 switch (q->type) { 340 case PIPE_QUERY_GPU_FINISHED: 341 res8[0] = true; 342 break; 343 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ 344 res64[0] = hq->data[1] - hq->data[5]; 345 break; 346 case PIPE_QUERY_OCCLUSION_PREDICATE: 347 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 348 res8[0] = hq->data[1] != hq->data[5]; 349 break; 350 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ 351 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ 352 res64[0] = data64[0] - data64[2]; 353 break; 354 case PIPE_QUERY_SO_STATISTICS: 355 res64[0] = data64[0] - data64[4]; 356 res64[1] = data64[2] - data64[6]; 357 break; 358 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 359 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 360 res8[0] = data64[0] != data64[2]; 361 break; 362 case PIPE_QUERY_TIMESTAMP: 363 res64[0] = data64[1]; 364 break; 365 case PIPE_QUERY_TIMESTAMP_DISJOINT: 366 res64[0] = 1000000000; 367 res8[8] = false; 368 break; 369 case PIPE_QUERY_TIME_ELAPSED: 370 res64[0] = data64[1] - data64[3]; 371 break; 372 case PIPE_QUERY_PIPELINE_STATISTICS: 373 for (i = 0; i < 11; ++i) 374 res64[i] = data64[i * 2] - data64[24 + i * 2]; 375 break; 376 case NVC0_HW_QUERY_TFB_BUFFER_OFFSET: 377 res32[0] = hq->data[1]; 378 break; 379 default: 380 assert(0); /* can't happen, we don't create queries with invalid type */ 381 return false; 382 } 383 384 return true; 385} 386 387static void 388nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0, 389 struct nvc0_query *q, 390 bool wait, 391 enum pipe_query_value_type result_type, 392 int index, 393 struct pipe_resource *resource, 394 unsigned offset) 395{ 396 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 397 struct nvc0_hw_query *hq = nvc0_hw_query(q); 398 struct nv04_resource *buf = nv04_resource(resource); 399 unsigned qoffset = 0, stride; 400 401 assert(!hq->funcs || !hq->funcs->get_query_result); 402 403 if (index == -1) { 404 /* TODO: Use a macro to write the availability of the query */ 405 if (hq->state != NVC0_HW_QUERY_STATE_READY) 406 nvc0_hw_query_update(nvc0->screen->base.client, q); 407 uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY}; 408 nvc0->base.push_cb(&nvc0->base, buf, offset, 409 result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1, 410 ready); 411 412 util_range_add(&buf->base, &buf->valid_buffer_range, offset, 413 offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4)); 414 415 nvc0_resource_validate(buf, NOUVEAU_BO_WR); 416 417 return; 418 } 419 420 /* If the fence guarding this query has not been emitted, that makes a lot 421 * of the following logic more complicated. 422 */ 423 if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED) 424 nouveau_fence_emit(hq->fence); 425 426 /* We either need to compute a 32- or 64-bit difference between 2 values, 427 * and then store the result as either a 32- or 64-bit value. As such let's 428 * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit 429 * ones), and have one macro that clamps result to i32, u32, or just 430 * outputs the difference (no need to worry about 64-bit clamping). 431 */ 432 if (hq->state != NVC0_HW_QUERY_STATE_READY) 433 nvc0_hw_query_update(nvc0->screen->base.client, q); 434 435 if (wait && hq->state != NVC0_HW_QUERY_STATE_READY) 436 nvc0_hw_query_fifo_wait(nvc0, q); 437 438 nouveau_pushbuf_space(push, 32, 2, 3); 439 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); 440 PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR); 441 BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 9); 442 switch (q->type) { 443 case PIPE_QUERY_OCCLUSION_PREDICATE: 444 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: /* XXX what if 64-bit? */ 445 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 446 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 447 PUSH_DATA(push, 0x00000001); 448 break; 449 default: 450 if (result_type == PIPE_QUERY_TYPE_I32) 451 PUSH_DATA(push, 0x7fffffff); 452 else if (result_type == PIPE_QUERY_TYPE_U32) 453 PUSH_DATA(push, 0xffffffff); 454 else 455 PUSH_DATA(push, 0x00000000); 456 break; 457 } 458 459 switch (q->type) { 460 case PIPE_QUERY_SO_STATISTICS: 461 stride = 2; 462 break; 463 case PIPE_QUERY_PIPELINE_STATISTICS: 464 stride = 12; 465 break; 466 case PIPE_QUERY_TIME_ELAPSED: 467 case PIPE_QUERY_TIMESTAMP: 468 qoffset = 8; 469 FALLTHROUGH; 470 default: 471 assert(index == 0); 472 stride = 1; 473 break; 474 } 475 476 if (hq->is64bit || qoffset) { 477 nouveau_pushbuf_data(push, hq->bo, hq->offset + qoffset + 16 * index, 478 8 | NVC0_IB_ENTRY_1_NO_PREFETCH); 479 if (q->type == PIPE_QUERY_TIMESTAMP) { 480 PUSH_DATA(push, 0); 481 PUSH_DATA(push, 0); 482 } else { 483 nouveau_pushbuf_data(push, hq->bo, hq->offset + qoffset + 484 16 * (index + stride), 485 8 | NVC0_IB_ENTRY_1_NO_PREFETCH); 486 } 487 } else { 488 nouveau_pushbuf_data(push, hq->bo, hq->offset + 4, 489 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); 490 PUSH_DATA(push, 0); 491 nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4, 492 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); 493 PUSH_DATA(push, 0); 494 } 495 496 if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) { 497 PUSH_DATA(push, 0); 498 PUSH_DATA(push, 0); 499 } else if (hq->is64bit) { 500 PUSH_DATA(push, hq->fence->sequence); 501 nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0, 502 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); 503 } else { 504 PUSH_DATA(push, hq->sequence); 505 nouveau_pushbuf_data(push, hq->bo, hq->offset, 506 4 | NVC0_IB_ENTRY_1_NO_PREFETCH); 507 } 508 PUSH_DATAh(push, buf->address + offset); 509 PUSH_DATA (push, buf->address + offset); 510 511 util_range_add(&buf->base, &buf->valid_buffer_range, offset, 512 offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4)); 513 514 nvc0_resource_validate(buf, NOUVEAU_BO_WR); 515} 516 517static const struct nvc0_query_funcs hw_query_funcs = { 518 .destroy_query = nvc0_hw_destroy_query, 519 .begin_query = nvc0_hw_begin_query, 520 .end_query = nvc0_hw_end_query, 521 .get_query_result = nvc0_hw_get_query_result, 522 .get_query_result_resource = nvc0_hw_get_query_result_resource, 523}; 524 525struct nvc0_query * 526nvc0_hw_create_query(struct nvc0_context *nvc0, unsigned type, unsigned index) 527{ 528 struct nvc0_hw_query *hq; 529 struct nvc0_query *q; 530 unsigned space = NVC0_HW_QUERY_ALLOC_SPACE; 531 532 hq = nvc0_hw_sm_create_query(nvc0, type); 533 if (hq) { 534 hq->base.funcs = &hw_query_funcs; 535 return (struct nvc0_query *)hq; 536 } 537 538 hq = nvc0_hw_metric_create_query(nvc0, type); 539 if (hq) { 540 hq->base.funcs = &hw_query_funcs; 541 return (struct nvc0_query *)hq; 542 } 543 544 hq = CALLOC_STRUCT(nvc0_hw_query); 545 if (!hq) 546 return NULL; 547 548 q = &hq->base; 549 q->funcs = &hw_query_funcs; 550 q->type = type; 551 q->index = index; 552 553 switch (q->type) { 554 case PIPE_QUERY_OCCLUSION_COUNTER: 555 case PIPE_QUERY_OCCLUSION_PREDICATE: 556 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 557 hq->rotate = 32; 558 space = NVC0_HW_QUERY_ALLOC_SPACE; 559 break; 560 case PIPE_QUERY_PIPELINE_STATISTICS: 561 hq->is64bit = true; 562 space = 512; 563 break; 564 case PIPE_QUERY_SO_STATISTICS: 565 hq->is64bit = true; 566 space = 64; 567 break; 568 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 569 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 570 case PIPE_QUERY_PRIMITIVES_GENERATED: 571 case PIPE_QUERY_PRIMITIVES_EMITTED: 572 hq->is64bit = true; 573 space = 32; 574 break; 575 case PIPE_QUERY_TIME_ELAPSED: 576 case PIPE_QUERY_TIMESTAMP: 577 case PIPE_QUERY_TIMESTAMP_DISJOINT: 578 case PIPE_QUERY_GPU_FINISHED: 579 space = 32; 580 break; 581 case NVC0_HW_QUERY_TFB_BUFFER_OFFSET: 582 space = 16; 583 break; 584 default: 585 debug_printf("invalid query type: %u\n", type); 586 FREE(q); 587 return NULL; 588 } 589 590 if (!nvc0_hw_query_allocate(nvc0, q, space)) { 591 FREE(hq); 592 return NULL; 593 } 594 595 if (hq->rotate) { 596 /* we advance before query_begin ! */ 597 hq->offset -= hq->rotate; 598 hq->data -= hq->rotate / sizeof(*hq->data); 599 } else 600 if (!hq->is64bit) 601 hq->data[0] = 0; /* initialize sequence */ 602 603 return q; 604} 605 606int 607nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id, 608 struct pipe_driver_query_info *info) 609{ 610 int num_hw_sm_queries = 0, num_hw_metric_queries = 0; 611 612 num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, NULL); 613 num_hw_metric_queries = 614 nvc0_hw_metric_get_driver_query_info(screen, 0, NULL); 615 616 if (!info) 617 return num_hw_sm_queries + num_hw_metric_queries; 618 619 if (id < num_hw_sm_queries) 620 return nvc0_hw_sm_get_driver_query_info(screen, id, info); 621 622 return nvc0_hw_metric_get_driver_query_info(screen, 623 id - num_hw_sm_queries, info); 624} 625 626void 627nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, 628 struct nvc0_query *q, unsigned result_offset) 629{ 630 struct nvc0_hw_query *hq = nvc0_hw_query(q); 631 632 PUSH_REFN(push, hq->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART); 633 nouveau_pushbuf_data(push, hq->bo, hq->offset + result_offset, 4 | 634 NVC0_IB_ENTRY_1_NO_PREFETCH); 635} 636 637void 638nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q) 639{ 640 struct nouveau_pushbuf *push = nvc0->base.pushbuf; 641 struct nvc0_hw_query *hq = nvc0_hw_query(q); 642 unsigned offset = hq->offset; 643 644 /* ensure the query's fence has been emitted */ 645 if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED) 646 nouveau_fence_emit(hq->fence); 647 648 PUSH_SPACE(push, 5); 649 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); 650 BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); 651 if (hq->is64bit) { 652 PUSH_DATAh(push, nvc0->screen->fence.bo->offset); 653 PUSH_DATA (push, nvc0->screen->fence.bo->offset); 654 PUSH_DATA (push, hq->fence->sequence); 655 } else { 656 PUSH_DATAh(push, hq->bo->offset + offset); 657 PUSH_DATA (push, hq->bo->offset + offset); 658 PUSH_DATA (push, hq->sequence); 659 } 660 PUSH_DATA (push, (1 << 12) | 661 NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL); 662} 663