1/* 2 * Copyright 2011 Christoph Bumiller 3 * Copyright 2015 Samuel Pitoiset 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24#define NV50_PUSH_EXPLICIT_SPACE_CHECKING 25 26#include "nv50/nv50_context.h" 27#include "nv50/nv50_query_hw.h" 28#include "nv50/nv50_query_hw_metric.h" 29#include "nv50/nv50_query_hw_sm.h" 30#include "nv_object.xml.h" 31 32/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts 33 * (since we use only a single GPU channel per screen) will not work properly. 34 * 35 * The first is not that big of an issue because OpenGL does not allow nested 36 * queries anyway. 37 */ 38 39#define NV50_HW_QUERY_ALLOC_SPACE 256 40 41bool 42nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q, 43 int size) 44{ 45 struct nv50_screen *screen = nv50->screen; 46 struct nv50_hw_query *hq = nv50_hw_query(q); 47 int ret; 48 49 if (hq->bo) { 50 nouveau_bo_ref(NULL, &hq->bo); 51 if (hq->mm) { 52 if (hq->state == NV50_HW_QUERY_STATE_READY) 53 nouveau_mm_free(hq->mm); 54 else 55 nouveau_fence_work(screen->base.fence.current, 56 nouveau_mm_free_work, hq->mm); 57 } 58 } 59 if (size) { 60 hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size, 61 &hq->bo, &hq->base_offset); 62 if (!hq->bo) 63 return false; 64 hq->offset = hq->base_offset; 65 66 ret = nouveau_bo_map(hq->bo, 0, screen->base.client); 67 if (ret) { 68 nv50_hw_query_allocate(nv50, q, 0); 69 return false; 70 } 71 hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset); 72 } 73 return true; 74} 75 76static void 77nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, 78 unsigned offset, uint32_t get) 79{ 80 struct nv50_hw_query *hq = nv50_hw_query(q); 81 82 offset += hq->offset; 83 84 PUSH_SPACE(push, 5); 85 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); 86 BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); 87 PUSH_DATAh(push, hq->bo->offset + offset); 88 PUSH_DATA (push, hq->bo->offset + offset); 89 PUSH_DATA (push, hq->sequence); 90 PUSH_DATA (push, get); 91} 92 93static inline void 94nv50_hw_query_update(struct nv50_query *q) 95{ 96 struct nv50_hw_query *hq = nv50_hw_query(q); 97 98 if (hq->is64bit) { 99 if (nouveau_fence_signalled(hq->fence)) 100 hq->state = NV50_HW_QUERY_STATE_READY; 101 } else { 102 if (hq->data[0] == hq->sequence) 103 hq->state = NV50_HW_QUERY_STATE_READY; 104 } 105} 106 107static void 108nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q) 109{ 110 struct nv50_hw_query *hq = nv50_hw_query(q); 111 112 if (hq->funcs && hq->funcs->destroy_query) { 113 hq->funcs->destroy_query(nv50, hq); 114 return; 115 } 116 117 nv50_hw_query_allocate(nv50, q, 0); 118 nouveau_fence_ref(NULL, &hq->fence); 119 FREE(hq); 120} 121 122static boolean 123nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) 124{ 125 struct nouveau_pushbuf *push = nv50->base.pushbuf; 126 struct nv50_hw_query *hq = nv50_hw_query(q); 127 128 if (hq->funcs && hq->funcs->begin_query) 129 return hq->funcs->begin_query(nv50, hq); 130 131 /* For occlusion queries we have to change the storage, because a previous 132 * query might set the initial render condition to false even *after* we re- 133 * initialized it to true. 134 */ 135 if (hq->rotate) { 136 hq->offset += hq->rotate; 137 hq->data += hq->rotate / sizeof(*hq->data); 138 if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) 139 nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); 140 141 /* XXX: can we do this with the GPU, and sync with respect to a previous 142 * query ? 143 */ 144 hq->data[0] = hq->sequence; /* initialize sequence */ 145 hq->data[1] = 1; /* initial render condition = true */ 146 hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */ 147 hq->data[5] = 0; 148 } 149 if (!hq->is64bit) 150 hq->data[0] = hq->sequence++; /* the previously used one */ 151 152 switch (q->type) { 153 case PIPE_QUERY_OCCLUSION_COUNTER: 154 case PIPE_QUERY_OCCLUSION_PREDICATE: 155 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 156 if (nv50->screen->num_occlusion_queries_active++) { 157 nv50_hw_query_get(push, q, 0x10, 0x0100f002); 158 } else { 159 PUSH_SPACE(push, 4); 160 BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); 161 PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); 162 BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); 163 PUSH_DATA (push, 1); 164 } 165 break; 166 case PIPE_QUERY_PRIMITIVES_GENERATED: 167 nv50_hw_query_get(push, q, 0x10, 0x06805002); 168 break; 169 case PIPE_QUERY_PRIMITIVES_EMITTED: 170 nv50_hw_query_get(push, q, 0x10, 0x05805002); 171 break; 172 case PIPE_QUERY_SO_STATISTICS: 173 nv50_hw_query_get(push, q, 0x20, 0x05805002); 174 nv50_hw_query_get(push, q, 0x30, 0x06805002); 175 break; 176 case PIPE_QUERY_PIPELINE_STATISTICS: 177 nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ 178 nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ 179 nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ 180 nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ 181 nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ 182 nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ 183 nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ 184 nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ 185 break; 186 case PIPE_QUERY_TIME_ELAPSED: 187 nv50_hw_query_get(push, q, 0x10, 0x00005002); 188 break; 189 default: 190 assert(0); 191 return false; 192 } 193 hq->state = NV50_HW_QUERY_STATE_ACTIVE; 194 return true; 195} 196 197static void 198nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) 199{ 200 struct nouveau_pushbuf *push = nv50->base.pushbuf; 201 struct nv50_hw_query *hq = nv50_hw_query(q); 202 203 if (hq->funcs && hq->funcs->end_query) { 204 hq->funcs->end_query(nv50, hq); 205 return; 206 } 207 208 hq->state = NV50_HW_QUERY_STATE_ENDED; 209 210 switch (q->type) { 211 case PIPE_QUERY_OCCLUSION_COUNTER: 212 case PIPE_QUERY_OCCLUSION_PREDICATE: 213 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 214 nv50_hw_query_get(push, q, 0, 0x0100f002); 215 if (--nv50->screen->num_occlusion_queries_active == 0) { 216 PUSH_SPACE(push, 2); 217 BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); 218 PUSH_DATA (push, 0); 219 } 220 break; 221 case PIPE_QUERY_PRIMITIVES_GENERATED: 222 nv50_hw_query_get(push, q, 0, 0x06805002); 223 break; 224 case PIPE_QUERY_PRIMITIVES_EMITTED: 225 nv50_hw_query_get(push, q, 0, 0x05805002); 226 break; 227 case PIPE_QUERY_SO_STATISTICS: 228 nv50_hw_query_get(push, q, 0x00, 0x05805002); 229 nv50_hw_query_get(push, q, 0x10, 0x06805002); 230 break; 231 case PIPE_QUERY_PIPELINE_STATISTICS: 232 nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ 233 nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ 234 nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ 235 nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ 236 nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ 237 nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ 238 nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ 239 nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ 240 break; 241 case PIPE_QUERY_TIMESTAMP: 242 hq->sequence++; 243 /* fall through */ 244 case PIPE_QUERY_TIME_ELAPSED: 245 nv50_hw_query_get(push, q, 0, 0x00005002); 246 break; 247 case PIPE_QUERY_GPU_FINISHED: 248 hq->sequence++; 249 nv50_hw_query_get(push, q, 0, 0x1000f010); 250 break; 251 case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: 252 hq->sequence++; 253 nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); 254 break; 255 case PIPE_QUERY_TIMESTAMP_DISJOINT: 256 /* This query is not issued on GPU because disjoint is forced to false */ 257 hq->state = NV50_HW_QUERY_STATE_READY; 258 break; 259 default: 260 assert(0); 261 break; 262 } 263 if (hq->is64bit) 264 nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence); 265} 266 267static boolean 268nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, 269 boolean wait, union pipe_query_result *result) 270{ 271 struct nv50_hw_query *hq = nv50_hw_query(q); 272 uint64_t *res64 = (uint64_t *)result; 273 uint32_t *res32 = (uint32_t *)result; 274 uint8_t *res8 = (uint8_t *)result; 275 uint64_t *data64 = (uint64_t *)hq->data; 276 int i; 277 278 if (hq->funcs && hq->funcs->get_query_result) 279 return hq->funcs->get_query_result(nv50, hq, wait, result); 280 281 if (hq->state != NV50_HW_QUERY_STATE_READY) 282 nv50_hw_query_update(q); 283 284 if (hq->state != NV50_HW_QUERY_STATE_READY) { 285 if (!wait) { 286 /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ 287 if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) { 288 hq->state = NV50_HW_QUERY_STATE_FLUSHED; 289 PUSH_KICK(nv50->base.pushbuf); 290 } 291 return false; 292 } 293 if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) 294 return false; 295 } 296 hq->state = NV50_HW_QUERY_STATE_READY; 297 298 switch (q->type) { 299 case PIPE_QUERY_GPU_FINISHED: 300 res8[0] = true; 301 break; 302 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ 303 res64[0] = hq->data[1] - hq->data[5]; 304 break; 305 case PIPE_QUERY_OCCLUSION_PREDICATE: 306 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 307 res8[0] = hq->data[1] != hq->data[5]; 308 break; 309 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ 310 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ 311 res64[0] = data64[0] - data64[2]; 312 break; 313 case PIPE_QUERY_SO_STATISTICS: 314 res64[0] = data64[0] - data64[4]; 315 res64[1] = data64[2] - data64[6]; 316 break; 317 case PIPE_QUERY_PIPELINE_STATISTICS: 318 for (i = 0; i < 8; ++i) 319 res64[i] = data64[i * 2] - data64[16 + i * 2]; 320 break; 321 case PIPE_QUERY_TIMESTAMP: 322 res64[0] = data64[1]; 323 break; 324 case PIPE_QUERY_TIMESTAMP_DISJOINT: 325 res64[0] = 1000000000; 326 res8[8] = false; 327 break; 328 case PIPE_QUERY_TIME_ELAPSED: 329 res64[0] = data64[1] - data64[3]; 330 break; 331 case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: 332 res32[0] = hq->data[1]; 333 break; 334 default: 335 assert(0); 336 return false; 337 } 338 339 return true; 340} 341 342static const struct nv50_query_funcs hw_query_funcs = { 343 .destroy_query = nv50_hw_destroy_query, 344 .begin_query = nv50_hw_begin_query, 345 .end_query = nv50_hw_end_query, 346 .get_query_result = nv50_hw_get_query_result, 347}; 348 349struct nv50_query * 350nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) 351{ 352 struct nv50_hw_query *hq; 353 struct nv50_query *q; 354 355 hq = nv50_hw_sm_create_query(nv50, type); 356 if (hq) { 357 hq->base.funcs = &hw_query_funcs; 358 return (struct nv50_query *)hq; 359 } 360 361 hq = nv50_hw_metric_create_query(nv50, type); 362 if (hq) { 363 hq->base.funcs = &hw_query_funcs; 364 return (struct nv50_query *)hq; 365 } 366 367 hq = CALLOC_STRUCT(nv50_hw_query); 368 if (!hq) 369 return NULL; 370 371 q = &hq->base; 372 q->funcs = &hw_query_funcs; 373 q->type = type; 374 375 switch (q->type) { 376 case PIPE_QUERY_OCCLUSION_COUNTER: 377 case PIPE_QUERY_OCCLUSION_PREDICATE: 378 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 379 hq->rotate = 32; 380 break; 381 case PIPE_QUERY_PRIMITIVES_GENERATED: 382 case PIPE_QUERY_PRIMITIVES_EMITTED: 383 case PIPE_QUERY_SO_STATISTICS: 384 case PIPE_QUERY_PIPELINE_STATISTICS: 385 hq->is64bit = true; 386 break; 387 case PIPE_QUERY_TIME_ELAPSED: 388 case PIPE_QUERY_TIMESTAMP: 389 case PIPE_QUERY_TIMESTAMP_DISJOINT: 390 case PIPE_QUERY_GPU_FINISHED: 391 case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: 392 break; 393 default: 394 debug_printf("invalid query type: %u\n", type); 395 FREE(q); 396 return NULL; 397 } 398 399 if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { 400 FREE(hq); 401 return NULL; 402 } 403 404 if (hq->rotate) { 405 /* we advance before query_begin ! */ 406 hq->offset -= hq->rotate; 407 hq->data -= hq->rotate / sizeof(*hq->data); 408 } 409 410 return q; 411} 412 413int 414nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id, 415 struct pipe_driver_query_info *info) 416{ 417 int num_hw_sm_queries = 0, num_hw_metric_queries = 0; 418 419 num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL); 420 num_hw_metric_queries = 421 nv50_hw_metric_get_driver_query_info(screen, 0, NULL); 422 423 if (!info) 424 return num_hw_sm_queries + num_hw_metric_queries; 425 426 if (id < num_hw_sm_queries) 427 return nv50_hw_sm_get_driver_query_info(screen, id, info); 428 429 return nv50_hw_metric_get_driver_query_info(screen, 430 id - num_hw_sm_queries, info); 431} 432 433void 434nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, 435 struct nv50_query *q, unsigned result_offset) 436{ 437 struct nv50_hw_query *hq = nv50_hw_query(q); 438 439 nv50_hw_query_update(q); 440 if (hq->state != NV50_HW_QUERY_STATE_READY) 441 nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client); 442 hq->state = NV50_HW_QUERY_STATE_READY; 443 444 BEGIN_NV04(push, SUBC_3D(method), 1); 445 PUSH_DATA (push, hq->data[result_offset / 4]); 446} 447 448void 449nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q) 450{ 451 struct nv50_hw_query *hq = nv50_hw_query(q); 452 unsigned offset = hq->offset; 453 454 PUSH_SPACE(push, 5); 455 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); 456 BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); 457 PUSH_DATAh(push, hq->bo->offset + offset); 458 PUSH_DATA (push, hq->bo->offset + offset); 459 PUSH_DATA (push, hq->sequence); 460 PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); 461} 462