1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include <limits.h> 29#include "util/u_memory.h" 30#include "util/u_math.h" 31#include "util/u_rect.h" 32#include "util/u_surface.h" 33#include "util/u_pack_color.h" 34#include "util/u_string.h" 35#include "util/u_thread.h" 36#include "util/u_memset.h" 37#include "util/os_time.h" 38 39#include "lp_scene_queue.h" 40#include "lp_context.h" 41#include "lp_debug.h" 42#include "lp_fence.h" 43#include "lp_perf.h" 44#include "lp_query.h" 45#include "lp_rast.h" 46#include "lp_rast_priv.h" 47#include "gallivm/lp_bld_format.h" 48#include "gallivm/lp_bld_debug.h" 49#include "lp_scene.h" 50#include "lp_tex_sample.h" 51 52 53#ifdef DEBUG 54int jit_line = 0; 55const struct lp_rast_state *jit_state = NULL; 56const struct lp_rasterizer_task *jit_task = NULL; 57#endif 58 59const float lp_sample_pos_4x[4][2] = { { 0.375, 0.125 }, 60 { 0.875, 0.375 }, 61 { 0.125, 0.625 }, 62 { 0.625, 0.875 } }; 63 64/** 65 * Begin rasterizing a scene. 66 * Called once per scene by one thread. 67 */ 68static void 69lp_rast_begin( struct lp_rasterizer *rast, 70 struct lp_scene *scene ) 71{ 72 rast->curr_scene = scene; 73 74 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 75 76 lp_scene_begin_rasterization( scene ); 77 lp_scene_bin_iter_begin( scene ); 78} 79 80 81static void 82lp_rast_end( struct lp_rasterizer *rast ) 83{ 84 lp_scene_end_rasterization( rast->curr_scene ); 85 86 rast->curr_scene = NULL; 87} 88 89 90/** 91 * Beginning rasterization of a tile. 92 * \param x window X position of the tile, in pixels 93 * \param y window Y position of the tile, in pixels 94 */ 95static void 96lp_rast_tile_begin(struct lp_rasterizer_task *task, 97 const struct cmd_bin *bin, 98 int x, int y) 99{ 100 unsigned i; 101 struct lp_scene *scene = task->scene; 102 103 LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); 104 105 task->bin = bin; 106 task->x = x * TILE_SIZE; 107 task->y = y * TILE_SIZE; 108 task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ? 109 task->scene->fb.width - x * TILE_SIZE : TILE_SIZE; 110 task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ? 111 task->scene->fb.height - y * TILE_SIZE : TILE_SIZE; 112 113 task->thread_data.vis_counter = 0; 114 task->thread_data.ps_invocations = 0; 115 116 for (i = 0; i < task->scene->fb.nr_cbufs; i++) { 117 if (task->scene->fb.cbufs[i]) { 118 task->color_tiles[i] = scene->cbufs[i].map + 119 scene->cbufs[i].stride * task->y + 120 scene->cbufs[i].format_bytes * task->x; 121 } 122 } 123 if (task->scene->fb.zsbuf) { 124 task->depth_tile = scene->zsbuf.map + 125 scene->zsbuf.stride * task->y + 126 scene->zsbuf.format_bytes * task->x; 127 } 128} 129 130 131/** 132 * Clear the rasterizer's current color tile. 133 * This is a bin command called during bin processing. 134 * Clear commands always clear all bound layers. 135 */ 136static void 137lp_rast_clear_color(struct lp_rasterizer_task *task, 138 const union lp_rast_cmd_arg arg) 139{ 140 const struct lp_scene *scene = task->scene; 141 unsigned cbuf = arg.clear_rb->cbuf; 142 union util_color uc; 143 enum pipe_format format; 144 145 /* we never bin clear commands for non-existing buffers */ 146 assert(cbuf < scene->fb.nr_cbufs); 147 assert(scene->fb.cbufs[cbuf]); 148 149 format = scene->fb.cbufs[cbuf]->format; 150 uc = arg.clear_rb->color_val; 151 152 /* 153 * this is pretty rough since we have target format (bunch of bytes...) here. 154 * dump it as raw 4 dwords. 155 */ 156 LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n", 157 __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]); 158 159 for (unsigned s = 0; s < scene->cbufs[cbuf].nr_samples; s++) { 160 void *map = (char *)scene->cbufs[cbuf].map + scene->cbufs[cbuf].sample_stride * s; 161 util_fill_box(map, 162 format, 163 scene->cbufs[cbuf].stride, 164 scene->cbufs[cbuf].layer_stride, 165 task->x, 166 task->y, 167 0, 168 task->width, 169 task->height, 170 scene->fb_max_layer + 1, 171 &uc); 172 } 173 174 /* this will increase for each rb which probably doesn't mean much */ 175 LP_COUNT(nr_color_tile_clear); 176} 177 178 179/** 180 * Clear the rasterizer's current z/stencil tile. 181 * This is a bin command called during bin processing. 182 * Clear commands always clear all bound layers. 183 */ 184static void 185lp_rast_clear_zstencil(struct lp_rasterizer_task *task, 186 const union lp_rast_cmd_arg arg) 187{ 188 const struct lp_scene *scene = task->scene; 189 uint64_t clear_value64 = arg.clear_zstencil.value; 190 uint64_t clear_mask64 = arg.clear_zstencil.mask; 191 uint32_t clear_value = (uint32_t) clear_value64; 192 uint32_t clear_mask = (uint32_t) clear_mask64; 193 const unsigned height = task->height; 194 const unsigned width = task->width; 195 const unsigned dst_stride = scene->zsbuf.stride; 196 uint8_t *dst; 197 unsigned i, j; 198 unsigned block_size; 199 200 LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", 201 __FUNCTION__, clear_value, clear_mask); 202 203 /* 204 * Clear the area of the depth/depth buffer matching this tile. 205 */ 206 207 if (scene->fb.zsbuf) { 208 unsigned layer; 209 210 for (unsigned s = 0; s < scene->zsbuf.nr_samples; s++) { 211 uint8_t *dst_layer = task->depth_tile + (s * scene->zsbuf.sample_stride); 212 block_size = util_format_get_blocksize(scene->fb.zsbuf->format); 213 214 clear_value &= clear_mask; 215 216 for (layer = 0; layer <= scene->fb_max_layer; layer++) { 217 dst = dst_layer; 218 219 switch (block_size) { 220 case 1: 221 assert(clear_mask == 0xff); 222 for (i = 0; i < height; i++) { 223 uint8_t *row = (uint8_t *)dst; 224 memset(row, (uint8_t) clear_value, width); 225 dst += dst_stride; 226 } 227 break; 228 case 2: 229 if (clear_mask == 0xffff) { 230 for (i = 0; i < height; i++) { 231 uint16_t *row = (uint16_t *)dst; 232 for (j = 0; j < width; j++) 233 *row++ = (uint16_t) clear_value; 234 dst += dst_stride; 235 } 236 } 237 else { 238 for (i = 0; i < height; i++) { 239 uint16_t *row = (uint16_t *)dst; 240 for (j = 0; j < width; j++) { 241 uint16_t tmp = ~clear_mask & *row; 242 *row++ = clear_value | tmp; 243 } 244 dst += dst_stride; 245 } 246 } 247 break; 248 case 4: 249 if (clear_mask == 0xffffffff) { 250 for (i = 0; i < height; i++) { 251 util_memset32(dst, clear_value, width); 252 dst += dst_stride; 253 } 254 } 255 else { 256 for (i = 0; i < height; i++) { 257 uint32_t *row = (uint32_t *)dst; 258 for (j = 0; j < width; j++) { 259 uint32_t tmp = ~clear_mask & *row; 260 *row++ = clear_value | tmp; 261 } 262 dst += dst_stride; 263 } 264 } 265 break; 266 case 8: 267 clear_value64 &= clear_mask64; 268 if (clear_mask64 == 0xffffffffffULL) { 269 for (i = 0; i < height; i++) { 270 util_memset64(dst, clear_value64, width); 271 dst += dst_stride; 272 } 273 } 274 else { 275 for (i = 0; i < height; i++) { 276 uint64_t *row = (uint64_t *)dst; 277 for (j = 0; j < width; j++) { 278 uint64_t tmp = ~clear_mask64 & *row; 279 *row++ = clear_value64 | tmp; 280 } 281 dst += dst_stride; 282 } 283 } 284 break; 285 286 default: 287 assert(0); 288 break; 289 } 290 dst_layer += scene->zsbuf.layer_stride; 291 } 292 } 293 } 294} 295 296 297 298/** 299 * Run the shader on all blocks in a tile. This is used when a tile is 300 * completely contained inside a triangle. 301 * This is a bin command called during bin processing. 302 */ 303static void 304lp_rast_shade_tile(struct lp_rasterizer_task *task, 305 const union lp_rast_cmd_arg arg) 306{ 307 const struct lp_scene *scene = task->scene; 308 const struct lp_rast_shader_inputs *inputs = arg.shade_tile; 309 const struct lp_rast_state *state; 310 struct lp_fragment_shader_variant *variant; 311 const unsigned tile_x = task->x, tile_y = task->y; 312 unsigned x, y; 313 314 if (inputs->disable) { 315 /* This command was partially binned and has been disabled */ 316 return; 317 } 318 319 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 320 321 state = task->state; 322 assert(state); 323 if (!state) { 324 return; 325 } 326 variant = state->variant; 327 328 /* render the whole 64x64 tile in 4x4 chunks */ 329 for (y = 0; y < task->height; y += 4){ 330 for (x = 0; x < task->width; x += 4) { 331 uint8_t *color[PIPE_MAX_COLOR_BUFS]; 332 unsigned stride[PIPE_MAX_COLOR_BUFS]; 333 unsigned sample_stride[PIPE_MAX_COLOR_BUFS]; 334 uint8_t *depth = NULL; 335 unsigned depth_stride = 0; 336 unsigned depth_sample_stride = 0; 337 unsigned i; 338 339 /* color buffer */ 340 for (i = 0; i < scene->fb.nr_cbufs; i++){ 341 if (scene->fb.cbufs[i]) { 342 stride[i] = scene->cbufs[i].stride; 343 sample_stride[i] = scene->cbufs[i].sample_stride; 344 color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, 345 tile_y + y, inputs->layer + inputs->view_index); 346 } 347 else { 348 stride[i] = 0; 349 sample_stride[i] = 0; 350 color[i] = NULL; 351 } 352 } 353 354 /* depth buffer */ 355 if (scene->zsbuf.map) { 356 depth = lp_rast_get_depth_block_pointer(task, tile_x + x, 357 tile_y + y, inputs->layer + inputs->view_index); 358 depth_stride = scene->zsbuf.stride; 359 depth_sample_stride = scene->zsbuf.sample_stride; 360 } 361 362 uint64_t mask = 0; 363 for (unsigned i = 0; i < scene->fb_max_samples; i++) 364 mask |= (uint64_t)(0xffff) << (16 * i); 365 366 /* Propagate non-interpolated raster state. */ 367 task->thread_data.raster_state.viewport_index = inputs->viewport_index; 368 task->thread_data.raster_state.view_index = inputs->view_index; 369 370 /* run shader on 4x4 block */ 371 BEGIN_JIT_CALL(state, task); 372 variant->jit_function[RAST_WHOLE]( &state->jit_context, 373 tile_x + x, tile_y + y, 374 inputs->frontfacing, 375 GET_A0(inputs), 376 GET_DADX(inputs), 377 GET_DADY(inputs), 378 color, 379 depth, 380 mask, 381 &task->thread_data, 382 stride, 383 depth_stride, 384 sample_stride, 385 depth_sample_stride); 386 END_JIT_CALL(); 387 } 388 } 389} 390 391 392/** 393 * Run the shader on all blocks in a tile. This is used when a tile is 394 * completely contained inside a triangle, and the shader is opaque. 395 * This is a bin command called during bin processing. 396 */ 397static void 398lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, 399 const union lp_rast_cmd_arg arg) 400{ 401 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 402 403 assert(task->state); 404 if (!task->state) { 405 return; 406 } 407 408 lp_rast_shade_tile(task, arg); 409} 410 411 412/** 413 * Compute shading for a 4x4 block of pixels inside a triangle. 414 * This is a bin command called during bin processing. 415 * \param x X position of quad in window coords 416 * \param y Y position of quad in window coords 417 */ 418void 419lp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task, 420 const struct lp_rast_shader_inputs *inputs, 421 unsigned x, unsigned y, 422 uint64_t mask) 423{ 424 const struct lp_rast_state *state = task->state; 425 struct lp_fragment_shader_variant *variant = state->variant; 426 const struct lp_scene *scene = task->scene; 427 uint8_t *color[PIPE_MAX_COLOR_BUFS]; 428 unsigned stride[PIPE_MAX_COLOR_BUFS]; 429 unsigned sample_stride[PIPE_MAX_COLOR_BUFS]; 430 uint8_t *depth = NULL; 431 unsigned depth_stride = 0; 432 unsigned depth_sample_stride = 0; 433 unsigned i; 434 435 assert(state); 436 437 /* Sanity checks */ 438 assert(x < scene->tiles_x * TILE_SIZE); 439 assert(y < scene->tiles_y * TILE_SIZE); 440 assert(x % TILE_VECTOR_WIDTH == 0); 441 assert(y % TILE_VECTOR_HEIGHT == 0); 442 443 assert((x % 4) == 0); 444 assert((y % 4) == 0); 445 446 /* color buffer */ 447 for (i = 0; i < scene->fb.nr_cbufs; i++) { 448 if (scene->fb.cbufs[i]) { 449 stride[i] = scene->cbufs[i].stride; 450 sample_stride[i] = scene->cbufs[i].sample_stride; 451 color[i] = lp_rast_get_color_block_pointer(task, i, x, y, 452 inputs->layer + inputs->view_index); 453 } 454 else { 455 stride[i] = 0; 456 sample_stride[i] = 0; 457 color[i] = NULL; 458 } 459 } 460 461 /* depth buffer */ 462 if (scene->zsbuf.map) { 463 depth_stride = scene->zsbuf.stride; 464 depth_sample_stride = scene->zsbuf.sample_stride; 465 depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer + inputs->view_index); 466 } 467 468 assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); 469 470 /* 471 * The rasterizer may produce fragments outside our 472 * allocated 4x4 blocks hence need to filter them out here. 473 */ 474 if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { 475 /* Propagate non-interpolated raster state. */ 476 task->thread_data.raster_state.viewport_index = inputs->viewport_index; 477 task->thread_data.raster_state.view_index = inputs->view_index; 478 479 /* run shader on 4x4 block */ 480 BEGIN_JIT_CALL(state, task); 481 variant->jit_function[RAST_EDGE_TEST](&state->jit_context, 482 x, y, 483 inputs->frontfacing, 484 GET_A0(inputs), 485 GET_DADX(inputs), 486 GET_DADY(inputs), 487 color, 488 depth, 489 mask, 490 &task->thread_data, 491 stride, 492 depth_stride, 493 sample_stride, 494 depth_sample_stride); 495 END_JIT_CALL(); 496 } 497} 498 499void 500lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, 501 const struct lp_rast_shader_inputs *inputs, 502 unsigned x, unsigned y, 503 unsigned mask) 504{ 505 uint64_t new_mask = 0; 506 for (unsigned i = 0; i < task->scene->fb_max_samples; i++) 507 new_mask |= ((uint64_t)mask) << (16 * i); 508 lp_rast_shade_quads_mask_sample(task, inputs, x, y, new_mask); 509} 510 511/** 512 * Directly copy pixels from a texture to the destination color buffer. 513 * This is a bin command called during bin processing. 514 */ 515static void 516lp_rast_blit_tile_to_dest(struct lp_rasterizer_task *task, 517 const union lp_rast_cmd_arg arg) 518{ 519 const struct lp_scene *scene = task->scene; 520 const struct lp_rast_shader_inputs *inputs = arg.shade_tile; 521 const struct lp_rast_state *state = task->state; 522 struct lp_fragment_shader_variant *variant = state->variant; 523 const struct lp_jit_texture *texture = &state->jit_context.textures[0]; 524 const uint8_t *src; 525 uint8_t *dst; 526 unsigned src_stride; 527 unsigned dst_stride; 528 struct pipe_surface *cbuf = scene->fb.cbufs[0]; 529 const unsigned face_slice = cbuf->u.tex.first_layer; 530 const unsigned level = cbuf->u.tex.level; 531 struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); 532 int src_x, src_y; 533 534 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 535 536 if (inputs->disable) { 537 /* This command was partially binned and has been disabled */ 538 return; 539 } 540 541 dst = llvmpipe_get_texture_image_address(lpt, face_slice, level); 542 543 if (!dst) 544 return; 545 546 dst_stride = lpt->row_stride[level]; 547 548 src = texture->base; 549 src_stride = texture->row_stride[0]; 550 551 src_x = util_iround(GET_A0(inputs)[1][0]*texture->width - 0.5f); 552 src_y = util_iround(GET_A0(inputs)[1][1]*texture->height - 0.5f); 553 554 src_x = src_x + task->x; 555 src_y = src_y + task->y; 556 557 if (0) { 558 union util_color uc; 559 uc.ui[0] = 0xff0000ff; 560 util_fill_rect(dst, 561 cbuf->format, 562 dst_stride, 563 task->x, 564 task->y, 565 task->width, 566 task->height, 567 &uc); 568 return; 569 } 570 571 if (src_x >= 0 && 572 src_y >= 0 && 573 src_x + task->width <= texture->width && 574 src_y + task->height <= texture->height) { 575 576 if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA || 577 (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 && 578 cbuf->format == PIPE_FORMAT_B8G8R8X8_UNORM)) { 579 util_copy_rect(dst, 580 cbuf->format, 581 dst_stride, 582 task->x, task->y, 583 task->width, task->height, 584 src, src_stride, 585 src_x, src_y); 586 return; 587 } 588 589 if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1) { 590 if (cbuf->format == PIPE_FORMAT_B8G8R8A8_UNORM) { 591 int x, y; 592 593 dst += task->x * 4; 594 src += src_x * 4; 595 dst += task->y * dst_stride; 596 src += src_y * src_stride; 597 598 for (y = 0; y < task->height; ++y) { 599 const uint32_t *src_row = (const uint32_t *)src; 600 uint32_t *dst_row = (uint32_t *)dst; 601 602 for (x = 0; x < task->width; ++x) { 603 *dst_row++ = *src_row++ | 0xff000000; 604 } 605 dst += dst_stride; 606 src += src_stride; 607 } 608 609 return; 610 } 611 } 612 613 } 614 615 /* 616 * Fall back to the jit shaders. 617 */ 618 619 lp_rast_shade_tile_opaque(task, arg); 620} 621 622static void 623lp_rast_blit_tile(struct lp_rasterizer_task *task, 624 const union lp_rast_cmd_arg arg) 625{ 626 /* This kindof just works, but isn't efficient: 627 */ 628 lp_rast_blit_tile_to_dest(task, arg); 629} 630 631/** 632 * Begin a new occlusion query. 633 * This is a bin command put in all bins. 634 * Called per thread. 635 */ 636static void 637lp_rast_begin_query(struct lp_rasterizer_task *task, 638 const union lp_rast_cmd_arg arg) 639{ 640 struct llvmpipe_query *pq = arg.query_obj; 641 642 switch (pq->type) { 643 case PIPE_QUERY_OCCLUSION_COUNTER: 644 case PIPE_QUERY_OCCLUSION_PREDICATE: 645 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 646 pq->start[task->thread_index] = task->thread_data.vis_counter; 647 break; 648 case PIPE_QUERY_PIPELINE_STATISTICS: 649 pq->start[task->thread_index] = task->thread_data.ps_invocations; 650 break; 651 case PIPE_QUERY_TIME_ELAPSED: 652 pq->start[task->thread_index] = os_time_get_nano(); 653 break; 654 default: 655 assert(0); 656 break; 657 } 658} 659 660 661/** 662 * End the current occlusion query. 663 * This is a bin command put in all bins. 664 * Called per thread. 665 */ 666static void 667lp_rast_end_query(struct lp_rasterizer_task *task, 668 const union lp_rast_cmd_arg arg) 669{ 670 struct llvmpipe_query *pq = arg.query_obj; 671 672 switch (pq->type) { 673 case PIPE_QUERY_OCCLUSION_COUNTER: 674 case PIPE_QUERY_OCCLUSION_PREDICATE: 675 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 676 pq->end[task->thread_index] += 677 task->thread_data.vis_counter - pq->start[task->thread_index]; 678 pq->start[task->thread_index] = 0; 679 break; 680 case PIPE_QUERY_TIMESTAMP: 681 case PIPE_QUERY_TIME_ELAPSED: 682 pq->end[task->thread_index] = os_time_get_nano(); 683 break; 684 case PIPE_QUERY_PIPELINE_STATISTICS: 685 pq->end[task->thread_index] += 686 task->thread_data.ps_invocations - pq->start[task->thread_index]; 687 pq->start[task->thread_index] = 0; 688 break; 689 default: 690 assert(0); 691 break; 692 } 693} 694 695 696void 697lp_rast_set_state(struct lp_rasterizer_task *task, 698 const union lp_rast_cmd_arg arg) 699{ 700 task->state = arg.state; 701} 702 703 704 705/** 706 * Called when we're done writing to a color tile. 707 */ 708static void 709lp_rast_tile_end(struct lp_rasterizer_task *task) 710{ 711 unsigned i; 712 713 for (i = 0; i < task->scene->num_active_queries; ++i) { 714 lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i])); 715 } 716 717 /* debug */ 718 memset(task->color_tiles, 0, sizeof(task->color_tiles)); 719 task->depth_tile = NULL; 720 721 task->bin = NULL; 722} 723 724 725 726 727 728 729/* Currently have two rendering paths only - the general case triangle 730 * path and the super-specialized blit/clear path. 731 */ 732#define TRI ((LP_RAST_FLAGS_TRI <<1)-1) /* general case */ 733#define RECT ((LP_RAST_FLAGS_RECT<<1)-1) /* direct rectangle rasterizer */ 734#define BLIT ((LP_RAST_FLAGS_BLIT<<1)-1) /* write direct-to-dest */ 735 736static const unsigned 737rast_flags[] = { 738 BLIT, /* clear color */ 739 TRI, /* clear zstencil */ 740 TRI, /* triangle_1 */ 741 TRI, /* triangle_2 */ 742 TRI, /* triangle_3 */ 743 TRI, /* triangle_4 */ 744 TRI, /* triangle_5 */ 745 TRI, /* triangle_6 */ 746 TRI, /* triangle_7 */ 747 TRI, /* triangle_8 */ 748 TRI, /* triangle_3_4 */ 749 TRI, /* triangle_3_16 */ 750 TRI, /* triangle_4_16 */ 751 RECT, /* shade_tile */ 752 RECT, /* shade_tile_opaque */ 753 TRI, /* begin_query */ 754 TRI, /* end_query */ 755 BLIT, /* set_state, */ 756 TRI, /* lp_rast_triangle_32_1 */ 757 TRI, /* lp_rast_triangle_32_2 */ 758 TRI, /* lp_rast_triangle_32_3 */ 759 TRI, /* lp_rast_triangle_32_4 */ 760 TRI, /* lp_rast_triangle_32_5 */ 761 TRI, /* lp_rast_triangle_32_6 */ 762 TRI, /* lp_rast_triangle_32_7 */ 763 TRI, /* lp_rast_triangle_32_8 */ 764 TRI, /* lp_rast_triangle_32_3_4 */ 765 TRI, /* lp_rast_triangle_32_3_16 */ 766 TRI, /* lp_rast_triangle_32_4_16 */ 767 TRI, /* lp_rast_triangle_ms_1 */ 768 TRI, /* lp_rast_triangle_ms_2 */ 769 TRI, /* lp_rast_triangle_ms_3 */ 770 TRI, /* lp_rast_triangle_ms_4 */ 771 TRI, /* lp_rast_triangle_ms_5 */ 772 TRI, /* lp_rast_triangle_ms_6 */ 773 TRI, /* lp_rast_triangle_ms_7 */ 774 TRI, /* lp_rast_triangle_ms_8 */ 775 TRI, /* lp_rast_triangle_ms_3_4 */ 776 TRI, /* lp_rast_triangle_ms_3_16 */ 777 TRI, /* lp_rast_triangle_ms_4_16 */ 778 779 RECT, /* rectangle */ 780 BLIT, /* blit */ 781}; 782 783/* 784 */ 785static const lp_rast_cmd_func 786dispatch_blit[] = { 787 lp_rast_clear_color, 788 NULL, /* clear_zstencil */ 789 NULL, /* triangle_1 */ 790 NULL, /* triangle_2 */ 791 NULL, /* triangle_3 */ 792 NULL, /* triangle_4 */ 793 NULL, /* triangle_5 */ 794 NULL, /* triangle_6 */ 795 NULL, /* triangle_7 */ 796 NULL, /* triangle_8 */ 797 NULL, /* triangle_3_4 */ 798 NULL, /* triangle_3_16 */ 799 NULL, /* triangle_4_16 */ 800 NULL, /* shade_tile */ 801 NULL, /* shade_tile_opaque */ 802 NULL, /* begin_query */ 803 NULL, /* end_query */ 804 lp_rast_set_state, /* set_state */ 805 NULL, /* lp_rast_triangle_32_1 */ 806 NULL, /* lp_rast_triangle_32_2 */ 807 NULL, /* lp_rast_triangle_32_3 */ 808 NULL, /* lp_rast_triangle_32_4 */ 809 NULL, /* lp_rast_triangle_32_5 */ 810 NULL, /* lp_rast_triangle_32_6 */ 811 NULL, /* lp_rast_triangle_32_7 */ 812 NULL, /* lp_rast_triangle_32_8 */ 813 NULL, /* lp_rast_triangle_32_3_4 */ 814 NULL, /* lp_rast_triangle_32_3_16 */ 815 NULL, /* lp_rast_triangle_32_4_16 */ 816 NULL, /* lp_rast_triangle_ms_1 */ 817 NULL, /* lp_rast_triangle_ms_2 */ 818 NULL, /* lp_rast_triangle_ms_3 */ 819 NULL, /* lp_rast_triangle_ms_4 */ 820 NULL, /* lp_rast_triangle_ms_5 */ 821 NULL, /* lp_rast_triangle_ms_6 */ 822 NULL, /* lp_rast_triangle_ms_7 */ 823 NULL, /* lp_rast_triangle_ms_8 */ 824 NULL, /* lp_rast_triangle_ms_3_4 */ 825 NULL, /* lp_rast_triangle_ms_3_16 */ 826 NULL, /* lp_rast_triangle_ms_4_16 */ 827 828 NULL, /* rectangle */ 829 lp_rast_blit_tile_to_dest, 830}; 831 832 833 834/* Triangle and general case rasterization: Use the SOA llvm shdaers, 835 * an active swizzled tile for each color buf, etc. Don't blit/clear 836 * directly to destination surface as we know there are swizzled 837 * operations coming. 838 */ 839static const lp_rast_cmd_func 840dispatch_tri[] = { 841 lp_rast_clear_color, 842 lp_rast_clear_zstencil, 843 lp_rast_triangle_1, 844 lp_rast_triangle_2, 845 lp_rast_triangle_3, 846 lp_rast_triangle_4, 847 lp_rast_triangle_5, 848 lp_rast_triangle_6, 849 lp_rast_triangle_7, 850 lp_rast_triangle_8, 851 lp_rast_triangle_3_4, 852 lp_rast_triangle_3_16, 853 lp_rast_triangle_4_16, 854 lp_rast_shade_tile, 855 lp_rast_shade_tile_opaque, 856 lp_rast_begin_query, 857 lp_rast_end_query, 858 lp_rast_set_state, 859 lp_rast_triangle_32_1, 860 lp_rast_triangle_32_2, 861 lp_rast_triangle_32_3, 862 lp_rast_triangle_32_4, 863 lp_rast_triangle_32_5, 864 lp_rast_triangle_32_6, 865 lp_rast_triangle_32_7, 866 lp_rast_triangle_32_8, 867 lp_rast_triangle_32_3_4, 868 lp_rast_triangle_32_3_16, 869 lp_rast_triangle_32_4_16, 870 lp_rast_triangle_ms_1, 871 lp_rast_triangle_ms_2, 872 lp_rast_triangle_ms_3, 873 lp_rast_triangle_ms_4, 874 lp_rast_triangle_ms_5, 875 lp_rast_triangle_ms_6, 876 lp_rast_triangle_ms_7, 877 lp_rast_triangle_ms_8, 878 lp_rast_triangle_ms_3_4, 879 lp_rast_triangle_ms_3_16, 880 lp_rast_triangle_ms_4_16, 881 lp_rast_rectangle, 882 lp_rast_blit_tile, 883}; 884 885 886/* Debug rasterization with most fastpaths disabled. 887 */ 888static const lp_rast_cmd_func 889dispatch_tri_debug[] = 890{ 891 lp_rast_clear_color, 892 lp_rast_clear_zstencil, 893 lp_rast_triangle_1, 894 lp_rast_triangle_2, 895 lp_rast_triangle_3, 896 lp_rast_triangle_4, 897 lp_rast_triangle_5, 898 lp_rast_triangle_6, 899 lp_rast_triangle_7, 900 lp_rast_triangle_8, 901 lp_rast_triangle_3_4, 902 lp_rast_triangle_3_16, 903 lp_rast_triangle_4_16, 904 lp_rast_shade_tile, 905 lp_rast_shade_tile, 906 lp_rast_begin_query, 907 lp_rast_end_query, 908 lp_rast_set_state, 909 lp_rast_triangle_32_1, 910 lp_rast_triangle_32_2, 911 lp_rast_triangle_32_3, 912 lp_rast_triangle_32_4, 913 lp_rast_triangle_32_5, 914 lp_rast_triangle_32_6, 915 lp_rast_triangle_32_7, 916 lp_rast_triangle_32_8, 917 lp_rast_triangle_32_3_4, 918 lp_rast_triangle_32_3_16, 919 lp_rast_triangle_32_4_16, 920 lp_rast_triangle_ms_1, 921 lp_rast_triangle_ms_2, 922 lp_rast_triangle_ms_3, 923 lp_rast_triangle_ms_4, 924 lp_rast_triangle_ms_5, 925 lp_rast_triangle_ms_6, 926 lp_rast_triangle_ms_7, 927 lp_rast_triangle_ms_8, 928 lp_rast_triangle_ms_3_4, 929 lp_rast_triangle_ms_3_16, 930 lp_rast_triangle_ms_4_16, 931 932 lp_rast_rectangle, 933 lp_rast_shade_tile, 934}; 935 936struct lp_bin_info 937lp_characterize_bin(const struct cmd_bin *bin) 938{ 939 struct cmd_block *block; 940 struct lp_bin_info info; 941 unsigned andflags = ~0; 942 unsigned k, j = 0; 943 944 STATIC_ASSERT(ARRAY_SIZE(rast_flags) == LP_RAST_OP_MAX); 945 946 for (block = bin->head; block; block = block->next) { 947 for (k = 0; k < block->count; k++, j++) { 948 andflags &= rast_flags[block->cmd[k]]; 949 } 950 } 951 952 info.type = andflags; 953 info.count = j; 954 955 return info; 956} 957 958 959static void 960blit_rasterize_bin(struct lp_rasterizer_task *task, 961 const struct cmd_bin *bin) 962{ 963 const struct cmd_block *block; 964 unsigned k; 965 966 STATIC_ASSERT(ARRAY_SIZE(dispatch_blit) == LP_RAST_OP_MAX); 967 968 if (0) debug_printf("%s\n", __FUNCTION__); 969 for (block = bin->head; block; block = block->next) { 970 for (k = 0; k < block->count; k++) { 971 dispatch_blit[block->cmd[k]]( task, block->arg[k] ); 972 } 973 } 974} 975 976static void 977tri_rasterize_bin(struct lp_rasterizer_task *task, 978 const struct cmd_bin *bin, 979 int x, int y) 980{ 981 const struct cmd_block *block; 982 unsigned k; 983 984 STATIC_ASSERT(ARRAY_SIZE(dispatch_tri) == LP_RAST_OP_MAX); 985 986 for (block = bin->head; block; block = block->next) { 987 for (k = 0; k < block->count; k++) { 988 dispatch_tri[block->cmd[k]]( task, block->arg[k] ); 989 } 990 } 991} 992 993static void 994debug_rasterize_bin(struct lp_rasterizer_task *task, 995 const struct cmd_bin *bin) 996{ 997 const struct cmd_block *block; 998 unsigned k; 999 1000 STATIC_ASSERT(ARRAY_SIZE(dispatch_tri_debug) == LP_RAST_OP_MAX); 1001 1002 for (block = bin->head; block; block = block->next) { 1003 for (k = 0; k < block->count; k++) { 1004 dispatch_tri_debug[block->cmd[k]]( task, block->arg[k] ); 1005 } 1006 } 1007} 1008 1009 1010/** 1011 * Rasterize commands for a single bin. 1012 * \param x, y position of the bin's tile in the framebuffer 1013 * Must be called between lp_rast_begin() and lp_rast_end(). 1014 * Called per thread. 1015 */ 1016static void 1017rasterize_bin(struct lp_rasterizer_task *task, 1018 const struct cmd_bin *bin, int x, int y ) 1019{ 1020 struct lp_bin_info info = lp_characterize_bin(bin); 1021 1022 lp_rast_tile_begin( task, bin, x, y ); 1023 1024 if (LP_DEBUG & DEBUG_NO_FASTPATH) 1025 debug_rasterize_bin(task, bin); 1026 else if (info.type & LP_RAST_FLAGS_BLIT) 1027 blit_rasterize_bin(task, bin); 1028 else if (task->scene->permit_linear_rasterizer && 1029 !(LP_PERF & PERF_NO_RAST_LINEAR) && 1030 (info.type & LP_RAST_FLAGS_RECT)) 1031 lp_linear_rasterize_bin(task, bin); 1032 else 1033 tri_rasterize_bin(task, bin, x, y); 1034 1035 lp_rast_tile_end(task); 1036 1037#ifdef DEBUG 1038 /* Debug/Perf flags: 1039 */ 1040 if (bin->head->count == 1) { 1041 if (bin->head->cmd[0] == LP_RAST_OP_BLIT) 1042 LP_COUNT(nr_pure_blit_64); 1043 else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) 1044 LP_COUNT(nr_pure_shade_opaque_64); 1045 else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE) 1046 LP_COUNT(nr_pure_shade_64); 1047 } 1048#endif 1049} 1050 1051 1052/* An empty bin is one that just loads the contents of the tile and 1053 * stores them again unchanged. This typically happens when bins have 1054 * been flushed for some reason in the middle of a frame, or when 1055 * incremental updates are being made to a render target. 1056 * 1057 * Try to avoid doing pointless work in this case. 1058 */ 1059static boolean 1060is_empty_bin( const struct cmd_bin *bin ) 1061{ 1062 return bin->head == NULL; 1063} 1064 1065 1066/** 1067 * Rasterize/execute all bins within a scene. 1068 * Called per thread. 1069 */ 1070static void 1071rasterize_scene(struct lp_rasterizer_task *task, 1072 struct lp_scene *scene) 1073{ 1074 task->scene = scene; 1075 1076 /* Clear the cache tags. This should not always be necessary but 1077 simpler for now. */ 1078#if LP_USE_TEXTURE_CACHE 1079 memset(task->thread_data.cache->cache_tags, 0, 1080 sizeof(task->thread_data.cache->cache_tags)); 1081#if LP_BUILD_FORMAT_CACHE_DEBUG 1082 task->thread_data.cache->cache_access_total = 0; 1083 task->thread_data.cache->cache_access_miss = 0; 1084#endif 1085#endif 1086 1087 if (!task->rast->no_rast) { 1088 /* loop over scene bins, rasterize each */ 1089 { 1090 struct cmd_bin *bin; 1091 int i, j; 1092 1093 assert(scene); 1094 while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { 1095 if (!is_empty_bin( bin )) 1096 rasterize_bin(task, bin, i, j); 1097 } 1098 } 1099 } 1100 1101 1102#if LP_BUILD_FORMAT_CACHE_DEBUG 1103 { 1104 uint64_t total, miss; 1105 total = task->thread_data.cache->cache_access_total; 1106 miss = task->thread_data.cache->cache_access_miss; 1107 if (total) { 1108 debug_printf("thread %d cache access %llu miss %llu hit rate %f\n", 1109 task->thread_index, (long long unsigned)total, 1110 (long long unsigned)miss, 1111 (float)(total - miss)/(float)total); 1112 } 1113 } 1114#endif 1115 1116 if (scene->fence) { 1117 lp_fence_signal(scene->fence); 1118 } 1119 1120 task->scene = NULL; 1121} 1122 1123 1124/** 1125 * Called by setup module when it has something for us to render. 1126 */ 1127void 1128lp_rast_queue_scene( struct lp_rasterizer *rast, 1129 struct lp_scene *scene) 1130{ 1131 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); 1132 1133 if (rast->num_threads == 0) { 1134 /* no threading */ 1135 unsigned fpstate = util_fpstate_get(); 1136 1137 /* Make sure that denorms are treated like zeros. This is 1138 * the behavior required by D3D10. OpenGL doesn't care. 1139 */ 1140 util_fpstate_set_denorms_to_zero(fpstate); 1141 1142 lp_rast_begin( rast, scene ); 1143 1144 rasterize_scene( &rast->tasks[0], scene ); 1145 1146 lp_rast_end( rast ); 1147 1148 util_fpstate_set(fpstate); 1149 1150 rast->curr_scene = NULL; 1151 } 1152 else { 1153 /* threaded rendering! */ 1154 unsigned i; 1155 1156 lp_scene_enqueue( rast->full_scenes, scene ); 1157 1158 /* signal the threads that there's work to do */ 1159 for (i = 0; i < rast->num_threads; i++) { 1160 pipe_semaphore_signal(&rast->tasks[i].work_ready); 1161 } 1162 } 1163 1164 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); 1165} 1166 1167 1168void 1169lp_rast_finish( struct lp_rasterizer *rast ) 1170{ 1171 if (rast->num_threads == 0) { 1172 /* nothing to do */ 1173 } 1174 else { 1175 int i; 1176 1177 /* wait for work to complete */ 1178 for (i = 0; i < rast->num_threads; i++) { 1179 pipe_semaphore_wait(&rast->tasks[i].work_done); 1180 } 1181 } 1182} 1183 1184 1185/** 1186 * This is the thread's main entrypoint. 1187 * It's a simple loop: 1188 * 1. wait for work 1189 * 2. do work 1190 * 3. signal that we're done 1191 */ 1192static int 1193thread_function(void *init_data) 1194{ 1195 struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; 1196 struct lp_rasterizer *rast = task->rast; 1197 boolean debug = false; 1198 char thread_name[16]; 1199 unsigned fpstate; 1200 1201 snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index); 1202 u_thread_setname(thread_name); 1203 1204 /* Make sure that denorms are treated like zeros. This is 1205 * the behavior required by D3D10. OpenGL doesn't care. 1206 */ 1207 fpstate = util_fpstate_get(); 1208 util_fpstate_set_denorms_to_zero(fpstate); 1209 1210 while (1) { 1211 /* wait for work */ 1212 if (debug) 1213 debug_printf("thread %d waiting for work\n", task->thread_index); 1214 pipe_semaphore_wait(&task->work_ready); 1215 1216 if (rast->exit_flag) 1217 break; 1218 1219 if (task->thread_index == 0) { 1220 /* thread[0]: 1221 * - get next scene to rasterize 1222 * - map the framebuffer surfaces 1223 */ 1224 lp_rast_begin( rast, 1225 lp_scene_dequeue( rast->full_scenes, TRUE ) ); 1226 } 1227 1228 /* Wait for all threads to get here so that threads[1+] don't 1229 * get a null rast->curr_scene pointer. 1230 */ 1231 util_barrier_wait( &rast->barrier ); 1232 1233 /* do work */ 1234 if (debug) 1235 debug_printf("thread %d doing work\n", task->thread_index); 1236 1237 rasterize_scene(task, 1238 rast->curr_scene); 1239 1240 /* wait for all threads to finish with this scene */ 1241 util_barrier_wait( &rast->barrier ); 1242 1243 /* XXX: shouldn't be necessary: 1244 */ 1245 if (task->thread_index == 0) { 1246 lp_rast_end( rast ); 1247 } 1248 1249 /* signal done with work */ 1250 if (debug) 1251 debug_printf("thread %d done working\n", task->thread_index); 1252 1253 pipe_semaphore_signal(&task->work_done); 1254 } 1255 1256#ifdef _WIN32 1257 pipe_semaphore_signal(&task->work_done); 1258#endif 1259 1260 return 0; 1261} 1262 1263 1264/** 1265 * Initialize semaphores and spawn the threads. 1266 */ 1267static void 1268create_rast_threads(struct lp_rasterizer *rast) 1269{ 1270 unsigned i; 1271 1272 /* NOTE: if num_threads is zero, we won't use any threads */ 1273 for (i = 0; i < rast->num_threads; i++) { 1274 pipe_semaphore_init(&rast->tasks[i].work_ready, 0); 1275 pipe_semaphore_init(&rast->tasks[i].work_done, 0); 1276 rast->threads[i] = u_thread_create(thread_function, 1277 (void *) &rast->tasks[i]); 1278 if (!rast->threads[i]) { 1279 rast->num_threads = i; /* previous thread is max */ 1280 break; 1281 } 1282 } 1283} 1284 1285 1286 1287/** 1288 * Create new lp_rasterizer. If num_threads is zero, don't create any 1289 * new threads, do rendering synchronously. 1290 * \param num_threads number of rasterizer threads to create 1291 */ 1292struct lp_rasterizer * 1293lp_rast_create( unsigned num_threads ) 1294{ 1295 struct lp_rasterizer *rast; 1296 unsigned i; 1297 1298 rast = CALLOC_STRUCT(lp_rasterizer); 1299 if (!rast) { 1300 goto no_rast; 1301 } 1302 1303 rast->full_scenes = lp_scene_queue_create(); 1304 if (!rast->full_scenes) { 1305 goto no_full_scenes; 1306 } 1307 1308 for (i = 0; i < MAX2(1, num_threads); i++) { 1309 struct lp_rasterizer_task *task = &rast->tasks[i]; 1310 task->rast = rast; 1311 task->thread_index = i; 1312 task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache), 1313 16); 1314 if (!task->thread_data.cache) { 1315 goto no_thread_data_cache; 1316 } 1317 } 1318 1319 rast->num_threads = num_threads; 1320 1321 rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE); 1322 1323 create_rast_threads(rast); 1324 1325 /* for synchronizing rasterization threads */ 1326 if (rast->num_threads > 0) { 1327 util_barrier_init( &rast->barrier, rast->num_threads ); 1328 } 1329 1330 memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); 1331 1332 return rast; 1333 1334no_thread_data_cache: 1335 for (i = 0; i < MAX2(1, rast->num_threads); i++) { 1336 if (rast->tasks[i].thread_data.cache) { 1337 align_free(rast->tasks[i].thread_data.cache); 1338 } 1339 } 1340 1341 lp_scene_queue_destroy(rast->full_scenes); 1342no_full_scenes: 1343 FREE(rast); 1344no_rast: 1345 return NULL; 1346} 1347 1348 1349/* Shutdown: 1350 */ 1351void lp_rast_destroy( struct lp_rasterizer *rast ) 1352{ 1353 unsigned i; 1354 1355 /* Set exit_flag and signal each thread's work_ready semaphore. 1356 * Each thread will be woken up, notice that the exit_flag is set and 1357 * break out of its main loop. The thread will then exit. 1358 */ 1359 rast->exit_flag = TRUE; 1360 for (i = 0; i < rast->num_threads; i++) { 1361 pipe_semaphore_signal(&rast->tasks[i].work_ready); 1362 } 1363 1364 /* Wait for threads to terminate before cleaning up per-thread data. 1365 * We don't actually call pipe_thread_wait to avoid dead lock on Windows 1366 * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */ 1367 for (i = 0; i < rast->num_threads; i++) { 1368#ifdef _WIN32 1369 /* Threads might already be dead - Windows apparently terminates other threads when 1370 * returning from main. 1371 */ 1372 DWORD exit_code = STILL_ACTIVE; 1373 if (GetExitCodeThread(rast->threads[i], &exit_code) && exit_code == STILL_ACTIVE) 1374 pipe_semaphore_wait(&rast->tasks[i].work_done); 1375#else 1376 thrd_join(rast->threads[i], NULL); 1377#endif 1378 } 1379 1380 /* Clean up per-thread data */ 1381 for (i = 0; i < rast->num_threads; i++) { 1382 pipe_semaphore_destroy(&rast->tasks[i].work_ready); 1383 pipe_semaphore_destroy(&rast->tasks[i].work_done); 1384 } 1385 for (i = 0; i < MAX2(1, rast->num_threads); i++) { 1386 align_free(rast->tasks[i].thread_data.cache); 1387 } 1388 1389 /* for synchronizing rasterization threads */ 1390 if (rast->num_threads > 0) { 1391 util_barrier_destroy( &rast->barrier ); 1392 } 1393 1394 lp_scene_queue_destroy(rast->full_scenes); 1395 1396 FREE(rast); 1397} 1398 1399 1400