1848b8605Smrg/************************************************************************** 2848b8605Smrg * 3848b8605Smrg * Copyright 2009 VMware, Inc. 4848b8605Smrg * All Rights Reserved. 5848b8605Smrg * 6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7848b8605Smrg * copy of this software and associated documentation files (the 8848b8605Smrg * "Software"), to deal in the Software without restriction, including 9848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish, 10848b8605Smrg * distribute, sub license, and/or sell copies of the Software, and to 11848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to 12848b8605Smrg * the following conditions: 13848b8605Smrg * 14848b8605Smrg * The above copyright notice and this permission notice (including the 15848b8605Smrg * next paragraph) shall be included in all copies or substantial portions 16848b8605Smrg * of the Software. 17848b8605Smrg * 18848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21848b8605Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22848b8605Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23848b8605Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24848b8605Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25848b8605Smrg * 26848b8605Smrg **************************************************************************/ 27848b8605Smrg 28848b8605Smrg#include <limits.h> 29848b8605Smrg#include "util/u_memory.h" 30848b8605Smrg#include "util/u_math.h" 31848b8605Smrg#include "util/u_rect.h" 32848b8605Smrg#include "util/u_surface.h" 33848b8605Smrg#include "util/u_pack_color.h" 34b8e80941Smrg#include "util/u_string.h" 35b8e80941Smrg#include "util/u_thread.h" 36848b8605Smrg 37b8e80941Smrg#include "util/os_time.h" 38848b8605Smrg 39848b8605Smrg#include "lp_scene_queue.h" 40848b8605Smrg#include "lp_context.h" 41848b8605Smrg#include "lp_debug.h" 42848b8605Smrg#include "lp_fence.h" 43848b8605Smrg#include "lp_perf.h" 44848b8605Smrg#include "lp_query.h" 45848b8605Smrg#include "lp_rast.h" 46848b8605Smrg#include "lp_rast_priv.h" 47b8e80941Smrg#include "gallivm/lp_bld_format.h" 48848b8605Smrg#include "gallivm/lp_bld_debug.h" 49848b8605Smrg#include "lp_scene.h" 50848b8605Smrg#include "lp_tex_sample.h" 51848b8605Smrg 52848b8605Smrg 53848b8605Smrg#ifdef DEBUG 54848b8605Smrgint jit_line = 0; 55848b8605Smrgconst struct lp_rast_state *jit_state = NULL; 56848b8605Smrgconst struct lp_rasterizer_task *jit_task = NULL; 57848b8605Smrg#endif 58848b8605Smrg 59848b8605Smrg 60848b8605Smrg/** 61848b8605Smrg * Begin rasterizing a scene. 62848b8605Smrg * Called once per scene by one thread. 63848b8605Smrg */ 64848b8605Smrgstatic void 65848b8605Smrglp_rast_begin( struct lp_rasterizer *rast, 66848b8605Smrg struct lp_scene *scene ) 67848b8605Smrg{ 68848b8605Smrg rast->curr_scene = scene; 69848b8605Smrg 70848b8605Smrg LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 71848b8605Smrg 72848b8605Smrg lp_scene_begin_rasterization( scene ); 73848b8605Smrg lp_scene_bin_iter_begin( scene ); 74848b8605Smrg} 75848b8605Smrg 76848b8605Smrg 77848b8605Smrgstatic void 78848b8605Smrglp_rast_end( struct lp_rasterizer *rast ) 79848b8605Smrg{ 80848b8605Smrg lp_scene_end_rasterization( rast->curr_scene ); 81848b8605Smrg 82848b8605Smrg rast->curr_scene = NULL; 83848b8605Smrg} 84848b8605Smrg 85848b8605Smrg 86848b8605Smrg/** 87b8e80941Smrg * Beginning rasterization of a tile. 88848b8605Smrg * \param x window X position of the tile, in pixels 89848b8605Smrg * \param y window Y position of the tile, in pixels 90848b8605Smrg */ 91848b8605Smrgstatic void 92848b8605Smrglp_rast_tile_begin(struct lp_rasterizer_task *task, 93848b8605Smrg const struct cmd_bin *bin, 94848b8605Smrg int x, int y) 95848b8605Smrg{ 96b8e80941Smrg unsigned i; 97b8e80941Smrg struct lp_scene *scene = task->scene; 98b8e80941Smrg 99848b8605Smrg LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); 100848b8605Smrg 101848b8605Smrg task->bin = bin; 102848b8605Smrg task->x = x * TILE_SIZE; 103848b8605Smrg task->y = y * TILE_SIZE; 104848b8605Smrg task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ? 105848b8605Smrg task->scene->fb.width - x * TILE_SIZE : TILE_SIZE; 106848b8605Smrg task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ? 107848b8605Smrg task->scene->fb.height - y * TILE_SIZE : TILE_SIZE; 108848b8605Smrg 109848b8605Smrg task->thread_data.vis_counter = 0; 110b8e80941Smrg task->thread_data.ps_invocations = 0; 111848b8605Smrg 112b8e80941Smrg for (i = 0; i < task->scene->fb.nr_cbufs; i++) { 113b8e80941Smrg if (task->scene->fb.cbufs[i]) { 114b8e80941Smrg task->color_tiles[i] = scene->cbufs[i].map + 115b8e80941Smrg scene->cbufs[i].stride * task->y + 116b8e80941Smrg scene->cbufs[i].format_bytes * task->x; 117b8e80941Smrg } 118b8e80941Smrg } 119b8e80941Smrg if (task->scene->fb.zsbuf) { 120b8e80941Smrg task->depth_tile = scene->zsbuf.map + 121b8e80941Smrg scene->zsbuf.stride * task->y + 122b8e80941Smrg scene->zsbuf.format_bytes * task->x; 123b8e80941Smrg } 124848b8605Smrg} 125848b8605Smrg 126848b8605Smrg 127848b8605Smrg/** 128848b8605Smrg * Clear the rasterizer's current color tile. 129848b8605Smrg * This is a bin command called during bin processing. 130848b8605Smrg * Clear commands always clear all bound layers. 131848b8605Smrg */ 132848b8605Smrgstatic void 133848b8605Smrglp_rast_clear_color(struct lp_rasterizer_task *task, 134848b8605Smrg const union lp_rast_cmd_arg arg) 135848b8605Smrg{ 136848b8605Smrg const struct lp_scene *scene = task->scene; 137848b8605Smrg unsigned cbuf = arg.clear_rb->cbuf; 138848b8605Smrg union util_color uc; 139848b8605Smrg enum pipe_format format; 140848b8605Smrg 141848b8605Smrg /* we never bin clear commands for non-existing buffers */ 142848b8605Smrg assert(cbuf < scene->fb.nr_cbufs); 143848b8605Smrg assert(scene->fb.cbufs[cbuf]); 144848b8605Smrg 145848b8605Smrg format = scene->fb.cbufs[cbuf]->format; 146848b8605Smrg uc = arg.clear_rb->color_val; 147848b8605Smrg 148848b8605Smrg /* 149848b8605Smrg * this is pretty rough since we have target format (bunch of bytes...) here. 150848b8605Smrg * dump it as raw 4 dwords. 151848b8605Smrg */ 152848b8605Smrg LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n", 153848b8605Smrg __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]); 154848b8605Smrg 155848b8605Smrg 156848b8605Smrg util_fill_box(scene->cbufs[cbuf].map, 157848b8605Smrg format, 158848b8605Smrg scene->cbufs[cbuf].stride, 159848b8605Smrg scene->cbufs[cbuf].layer_stride, 160848b8605Smrg task->x, 161848b8605Smrg task->y, 162848b8605Smrg 0, 163848b8605Smrg task->width, 164848b8605Smrg task->height, 165848b8605Smrg scene->fb_max_layer + 1, 166848b8605Smrg &uc); 167848b8605Smrg 168848b8605Smrg /* this will increase for each rb which probably doesn't mean much */ 169848b8605Smrg LP_COUNT(nr_color_tile_clear); 170848b8605Smrg} 171848b8605Smrg 172848b8605Smrg 173848b8605Smrg/** 174848b8605Smrg * Clear the rasterizer's current z/stencil tile. 175848b8605Smrg * This is a bin command called during bin processing. 176848b8605Smrg * Clear commands always clear all bound layers. 177848b8605Smrg */ 178848b8605Smrgstatic void 179848b8605Smrglp_rast_clear_zstencil(struct lp_rasterizer_task *task, 180848b8605Smrg const union lp_rast_cmd_arg arg) 181848b8605Smrg{ 182848b8605Smrg const struct lp_scene *scene = task->scene; 183848b8605Smrg uint64_t clear_value64 = arg.clear_zstencil.value; 184848b8605Smrg uint64_t clear_mask64 = arg.clear_zstencil.mask; 185848b8605Smrg uint32_t clear_value = (uint32_t) clear_value64; 186848b8605Smrg uint32_t clear_mask = (uint32_t) clear_mask64; 187848b8605Smrg const unsigned height = task->height; 188848b8605Smrg const unsigned width = task->width; 189848b8605Smrg const unsigned dst_stride = scene->zsbuf.stride; 190848b8605Smrg uint8_t *dst; 191848b8605Smrg unsigned i, j; 192848b8605Smrg unsigned block_size; 193848b8605Smrg 194848b8605Smrg LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", 195848b8605Smrg __FUNCTION__, clear_value, clear_mask); 196848b8605Smrg 197848b8605Smrg /* 198848b8605Smrg * Clear the area of the depth/depth buffer matching this tile. 199848b8605Smrg */ 200848b8605Smrg 201848b8605Smrg if (scene->fb.zsbuf) { 202848b8605Smrg unsigned layer; 203b8e80941Smrg uint8_t *dst_layer = task->depth_tile; 204848b8605Smrg block_size = util_format_get_blocksize(scene->fb.zsbuf->format); 205848b8605Smrg 206848b8605Smrg clear_value &= clear_mask; 207848b8605Smrg 208848b8605Smrg for (layer = 0; layer <= scene->fb_max_layer; layer++) { 209848b8605Smrg dst = dst_layer; 210848b8605Smrg 211848b8605Smrg switch (block_size) { 212848b8605Smrg case 1: 213848b8605Smrg assert(clear_mask == 0xff); 214848b8605Smrg memset(dst, (uint8_t) clear_value, height * width); 215848b8605Smrg break; 216848b8605Smrg case 2: 217848b8605Smrg if (clear_mask == 0xffff) { 218848b8605Smrg for (i = 0; i < height; i++) { 219848b8605Smrg uint16_t *row = (uint16_t *)dst; 220848b8605Smrg for (j = 0; j < width; j++) 221848b8605Smrg *row++ = (uint16_t) clear_value; 222848b8605Smrg dst += dst_stride; 223848b8605Smrg } 224848b8605Smrg } 225848b8605Smrg else { 226848b8605Smrg for (i = 0; i < height; i++) { 227848b8605Smrg uint16_t *row = (uint16_t *)dst; 228848b8605Smrg for (j = 0; j < width; j++) { 229848b8605Smrg uint16_t tmp = ~clear_mask & *row; 230848b8605Smrg *row++ = clear_value | tmp; 231848b8605Smrg } 232848b8605Smrg dst += dst_stride; 233848b8605Smrg } 234848b8605Smrg } 235848b8605Smrg break; 236848b8605Smrg case 4: 237848b8605Smrg if (clear_mask == 0xffffffff) { 238848b8605Smrg for (i = 0; i < height; i++) { 239848b8605Smrg uint32_t *row = (uint32_t *)dst; 240848b8605Smrg for (j = 0; j < width; j++) 241848b8605Smrg *row++ = clear_value; 242848b8605Smrg dst += dst_stride; 243848b8605Smrg } 244848b8605Smrg } 245848b8605Smrg else { 246848b8605Smrg for (i = 0; i < height; i++) { 247848b8605Smrg uint32_t *row = (uint32_t *)dst; 248848b8605Smrg for (j = 0; j < width; j++) { 249848b8605Smrg uint32_t tmp = ~clear_mask & *row; 250848b8605Smrg *row++ = clear_value | tmp; 251848b8605Smrg } 252848b8605Smrg dst += dst_stride; 253848b8605Smrg } 254848b8605Smrg } 255848b8605Smrg break; 256848b8605Smrg case 8: 257848b8605Smrg clear_value64 &= clear_mask64; 258848b8605Smrg if (clear_mask64 == 0xffffffffffULL) { 259848b8605Smrg for (i = 0; i < height; i++) { 260848b8605Smrg uint64_t *row = (uint64_t *)dst; 261848b8605Smrg for (j = 0; j < width; j++) 262848b8605Smrg *row++ = clear_value64; 263848b8605Smrg dst += dst_stride; 264848b8605Smrg } 265848b8605Smrg } 266848b8605Smrg else { 267848b8605Smrg for (i = 0; i < height; i++) { 268848b8605Smrg uint64_t *row = (uint64_t *)dst; 269848b8605Smrg for (j = 0; j < width; j++) { 270848b8605Smrg uint64_t tmp = ~clear_mask64 & *row; 271848b8605Smrg *row++ = clear_value64 | tmp; 272848b8605Smrg } 273848b8605Smrg dst += dst_stride; 274848b8605Smrg } 275848b8605Smrg } 276848b8605Smrg break; 277848b8605Smrg 278848b8605Smrg default: 279848b8605Smrg assert(0); 280848b8605Smrg break; 281848b8605Smrg } 282848b8605Smrg dst_layer += scene->zsbuf.layer_stride; 283848b8605Smrg } 284848b8605Smrg } 285848b8605Smrg} 286848b8605Smrg 287848b8605Smrg 288848b8605Smrg 289848b8605Smrg/** 290848b8605Smrg * Run the shader on all blocks in a tile. This is used when a tile is 291848b8605Smrg * completely contained inside a triangle. 292848b8605Smrg * This is a bin command called during bin processing. 293848b8605Smrg */ 294848b8605Smrgstatic void 295848b8605Smrglp_rast_shade_tile(struct lp_rasterizer_task *task, 296848b8605Smrg const union lp_rast_cmd_arg arg) 297848b8605Smrg{ 298848b8605Smrg const struct lp_scene *scene = task->scene; 299848b8605Smrg const struct lp_rast_shader_inputs *inputs = arg.shade_tile; 300848b8605Smrg const struct lp_rast_state *state; 301848b8605Smrg struct lp_fragment_shader_variant *variant; 302848b8605Smrg const unsigned tile_x = task->x, tile_y = task->y; 303848b8605Smrg unsigned x, y; 304848b8605Smrg 305848b8605Smrg if (inputs->disable) { 306848b8605Smrg /* This command was partially binned and has been disabled */ 307848b8605Smrg return; 308848b8605Smrg } 309848b8605Smrg 310848b8605Smrg LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 311848b8605Smrg 312848b8605Smrg state = task->state; 313848b8605Smrg assert(state); 314848b8605Smrg if (!state) { 315848b8605Smrg return; 316848b8605Smrg } 317848b8605Smrg variant = state->variant; 318848b8605Smrg 319848b8605Smrg /* render the whole 64x64 tile in 4x4 chunks */ 320848b8605Smrg for (y = 0; y < task->height; y += 4){ 321848b8605Smrg for (x = 0; x < task->width; x += 4) { 322848b8605Smrg uint8_t *color[PIPE_MAX_COLOR_BUFS]; 323848b8605Smrg unsigned stride[PIPE_MAX_COLOR_BUFS]; 324848b8605Smrg uint8_t *depth = NULL; 325848b8605Smrg unsigned depth_stride = 0; 326848b8605Smrg unsigned i; 327848b8605Smrg 328848b8605Smrg /* color buffer */ 329848b8605Smrg for (i = 0; i < scene->fb.nr_cbufs; i++){ 330848b8605Smrg if (scene->fb.cbufs[i]) { 331848b8605Smrg stride[i] = scene->cbufs[i].stride; 332848b8605Smrg color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, 333848b8605Smrg tile_y + y, inputs->layer); 334848b8605Smrg } 335848b8605Smrg else { 336848b8605Smrg stride[i] = 0; 337848b8605Smrg color[i] = NULL; 338848b8605Smrg } 339848b8605Smrg } 340848b8605Smrg 341848b8605Smrg /* depth buffer */ 342848b8605Smrg if (scene->zsbuf.map) { 343848b8605Smrg depth = lp_rast_get_depth_block_pointer(task, tile_x + x, 344848b8605Smrg tile_y + y, inputs->layer); 345848b8605Smrg depth_stride = scene->zsbuf.stride; 346848b8605Smrg } 347848b8605Smrg 348848b8605Smrg /* Propagate non-interpolated raster state. */ 349848b8605Smrg task->thread_data.raster_state.viewport_index = inputs->viewport_index; 350848b8605Smrg 351848b8605Smrg /* run shader on 4x4 block */ 352848b8605Smrg BEGIN_JIT_CALL(state, task); 353848b8605Smrg variant->jit_function[RAST_WHOLE]( &state->jit_context, 354848b8605Smrg tile_x + x, tile_y + y, 355848b8605Smrg inputs->frontfacing, 356848b8605Smrg GET_A0(inputs), 357848b8605Smrg GET_DADX(inputs), 358848b8605Smrg GET_DADY(inputs), 359848b8605Smrg color, 360848b8605Smrg depth, 361848b8605Smrg 0xffff, 362848b8605Smrg &task->thread_data, 363848b8605Smrg stride, 364848b8605Smrg depth_stride); 365848b8605Smrg END_JIT_CALL(); 366848b8605Smrg } 367848b8605Smrg } 368848b8605Smrg} 369848b8605Smrg 370848b8605Smrg 371848b8605Smrg/** 372848b8605Smrg * Run the shader on all blocks in a tile. This is used when a tile is 373848b8605Smrg * completely contained inside a triangle, and the shader is opaque. 374848b8605Smrg * This is a bin command called during bin processing. 375848b8605Smrg */ 376848b8605Smrgstatic void 377848b8605Smrglp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, 378848b8605Smrg const union lp_rast_cmd_arg arg) 379848b8605Smrg{ 380848b8605Smrg LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 381848b8605Smrg 382848b8605Smrg assert(task->state); 383848b8605Smrg if (!task->state) { 384848b8605Smrg return; 385848b8605Smrg } 386848b8605Smrg 387848b8605Smrg lp_rast_shade_tile(task, arg); 388848b8605Smrg} 389848b8605Smrg 390848b8605Smrg 391848b8605Smrg/** 392848b8605Smrg * Compute shading for a 4x4 block of pixels inside a triangle. 393848b8605Smrg * This is a bin command called during bin processing. 394848b8605Smrg * \param x X position of quad in window coords 395848b8605Smrg * \param y Y position of quad in window coords 396848b8605Smrg */ 397848b8605Smrgvoid 398848b8605Smrglp_rast_shade_quads_mask(struct lp_rasterizer_task *task, 399848b8605Smrg const struct lp_rast_shader_inputs *inputs, 400848b8605Smrg unsigned x, unsigned y, 401848b8605Smrg unsigned mask) 402848b8605Smrg{ 403848b8605Smrg const struct lp_rast_state *state = task->state; 404848b8605Smrg struct lp_fragment_shader_variant *variant = state->variant; 405848b8605Smrg const struct lp_scene *scene = task->scene; 406848b8605Smrg uint8_t *color[PIPE_MAX_COLOR_BUFS]; 407848b8605Smrg unsigned stride[PIPE_MAX_COLOR_BUFS]; 408848b8605Smrg uint8_t *depth = NULL; 409848b8605Smrg unsigned depth_stride = 0; 410848b8605Smrg unsigned i; 411848b8605Smrg 412848b8605Smrg assert(state); 413848b8605Smrg 414848b8605Smrg /* Sanity checks */ 415848b8605Smrg assert(x < scene->tiles_x * TILE_SIZE); 416848b8605Smrg assert(y < scene->tiles_y * TILE_SIZE); 417848b8605Smrg assert(x % TILE_VECTOR_WIDTH == 0); 418848b8605Smrg assert(y % TILE_VECTOR_HEIGHT == 0); 419848b8605Smrg 420848b8605Smrg assert((x % 4) == 0); 421848b8605Smrg assert((y % 4) == 0); 422848b8605Smrg 423848b8605Smrg /* color buffer */ 424848b8605Smrg for (i = 0; i < scene->fb.nr_cbufs; i++) { 425848b8605Smrg if (scene->fb.cbufs[i]) { 426848b8605Smrg stride[i] = scene->cbufs[i].stride; 427848b8605Smrg color[i] = lp_rast_get_color_block_pointer(task, i, x, y, 428848b8605Smrg inputs->layer); 429848b8605Smrg } 430848b8605Smrg else { 431848b8605Smrg stride[i] = 0; 432848b8605Smrg color[i] = NULL; 433848b8605Smrg } 434848b8605Smrg } 435848b8605Smrg 436848b8605Smrg /* depth buffer */ 437848b8605Smrg if (scene->zsbuf.map) { 438848b8605Smrg depth_stride = scene->zsbuf.stride; 439848b8605Smrg depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer); 440848b8605Smrg } 441848b8605Smrg 442848b8605Smrg assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); 443848b8605Smrg 444848b8605Smrg /* 445848b8605Smrg * The rasterizer may produce fragments outside our 446848b8605Smrg * allocated 4x4 blocks hence need to filter them out here. 447848b8605Smrg */ 448848b8605Smrg if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { 449848b8605Smrg /* Propagate non-interpolated raster state. */ 450848b8605Smrg task->thread_data.raster_state.viewport_index = inputs->viewport_index; 451848b8605Smrg 452848b8605Smrg /* run shader on 4x4 block */ 453848b8605Smrg BEGIN_JIT_CALL(state, task); 454848b8605Smrg variant->jit_function[RAST_EDGE_TEST](&state->jit_context, 455848b8605Smrg x, y, 456848b8605Smrg inputs->frontfacing, 457848b8605Smrg GET_A0(inputs), 458848b8605Smrg GET_DADX(inputs), 459848b8605Smrg GET_DADY(inputs), 460848b8605Smrg color, 461848b8605Smrg depth, 462848b8605Smrg mask, 463848b8605Smrg &task->thread_data, 464848b8605Smrg stride, 465848b8605Smrg depth_stride); 466848b8605Smrg END_JIT_CALL(); 467848b8605Smrg } 468848b8605Smrg} 469848b8605Smrg 470848b8605Smrg 471848b8605Smrg 472848b8605Smrg/** 473848b8605Smrg * Begin a new occlusion query. 474848b8605Smrg * This is a bin command put in all bins. 475848b8605Smrg * Called per thread. 476848b8605Smrg */ 477848b8605Smrgstatic void 478848b8605Smrglp_rast_begin_query(struct lp_rasterizer_task *task, 479848b8605Smrg const union lp_rast_cmd_arg arg) 480848b8605Smrg{ 481848b8605Smrg struct llvmpipe_query *pq = arg.query_obj; 482848b8605Smrg 483848b8605Smrg switch (pq->type) { 484848b8605Smrg case PIPE_QUERY_OCCLUSION_COUNTER: 485848b8605Smrg case PIPE_QUERY_OCCLUSION_PREDICATE: 486b8e80941Smrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 487848b8605Smrg pq->start[task->thread_index] = task->thread_data.vis_counter; 488848b8605Smrg break; 489848b8605Smrg case PIPE_QUERY_PIPELINE_STATISTICS: 490b8e80941Smrg pq->start[task->thread_index] = task->thread_data.ps_invocations; 491848b8605Smrg break; 492848b8605Smrg default: 493848b8605Smrg assert(0); 494848b8605Smrg break; 495848b8605Smrg } 496848b8605Smrg} 497848b8605Smrg 498848b8605Smrg 499848b8605Smrg/** 500848b8605Smrg * End the current occlusion query. 501848b8605Smrg * This is a bin command put in all bins. 502848b8605Smrg * Called per thread. 503848b8605Smrg */ 504848b8605Smrgstatic void 505848b8605Smrglp_rast_end_query(struct lp_rasterizer_task *task, 506848b8605Smrg const union lp_rast_cmd_arg arg) 507848b8605Smrg{ 508848b8605Smrg struct llvmpipe_query *pq = arg.query_obj; 509848b8605Smrg 510848b8605Smrg switch (pq->type) { 511848b8605Smrg case PIPE_QUERY_OCCLUSION_COUNTER: 512848b8605Smrg case PIPE_QUERY_OCCLUSION_PREDICATE: 513b8e80941Smrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 514848b8605Smrg pq->end[task->thread_index] += 515848b8605Smrg task->thread_data.vis_counter - pq->start[task->thread_index]; 516848b8605Smrg pq->start[task->thread_index] = 0; 517848b8605Smrg break; 518848b8605Smrg case PIPE_QUERY_TIMESTAMP: 519848b8605Smrg pq->end[task->thread_index] = os_time_get_nano(); 520848b8605Smrg break; 521848b8605Smrg case PIPE_QUERY_PIPELINE_STATISTICS: 522848b8605Smrg pq->end[task->thread_index] += 523b8e80941Smrg task->thread_data.ps_invocations - pq->start[task->thread_index]; 524848b8605Smrg pq->start[task->thread_index] = 0; 525848b8605Smrg break; 526848b8605Smrg default: 527848b8605Smrg assert(0); 528848b8605Smrg break; 529848b8605Smrg } 530848b8605Smrg} 531848b8605Smrg 532848b8605Smrg 533848b8605Smrgvoid 534848b8605Smrglp_rast_set_state(struct lp_rasterizer_task *task, 535848b8605Smrg const union lp_rast_cmd_arg arg) 536848b8605Smrg{ 537848b8605Smrg task->state = arg.state; 538848b8605Smrg} 539848b8605Smrg 540848b8605Smrg 541848b8605Smrg 542848b8605Smrg/** 543848b8605Smrg * Called when we're done writing to a color tile. 544848b8605Smrg */ 545848b8605Smrgstatic void 546848b8605Smrglp_rast_tile_end(struct lp_rasterizer_task *task) 547848b8605Smrg{ 548848b8605Smrg unsigned i; 549848b8605Smrg 550848b8605Smrg for (i = 0; i < task->scene->num_active_queries; ++i) { 551848b8605Smrg lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i])); 552848b8605Smrg } 553848b8605Smrg 554848b8605Smrg /* debug */ 555848b8605Smrg memset(task->color_tiles, 0, sizeof(task->color_tiles)); 556848b8605Smrg task->depth_tile = NULL; 557848b8605Smrg 558848b8605Smrg task->bin = NULL; 559848b8605Smrg} 560848b8605Smrg 561848b8605Smrgstatic lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = 562848b8605Smrg{ 563848b8605Smrg lp_rast_clear_color, 564848b8605Smrg lp_rast_clear_zstencil, 565848b8605Smrg lp_rast_triangle_1, 566848b8605Smrg lp_rast_triangle_2, 567848b8605Smrg lp_rast_triangle_3, 568848b8605Smrg lp_rast_triangle_4, 569848b8605Smrg lp_rast_triangle_5, 570848b8605Smrg lp_rast_triangle_6, 571848b8605Smrg lp_rast_triangle_7, 572848b8605Smrg lp_rast_triangle_8, 573848b8605Smrg lp_rast_triangle_3_4, 574848b8605Smrg lp_rast_triangle_3_16, 575848b8605Smrg lp_rast_triangle_4_16, 576848b8605Smrg lp_rast_shade_tile, 577848b8605Smrg lp_rast_shade_tile_opaque, 578848b8605Smrg lp_rast_begin_query, 579848b8605Smrg lp_rast_end_query, 580848b8605Smrg lp_rast_set_state, 581848b8605Smrg lp_rast_triangle_32_1, 582848b8605Smrg lp_rast_triangle_32_2, 583848b8605Smrg lp_rast_triangle_32_3, 584848b8605Smrg lp_rast_triangle_32_4, 585848b8605Smrg lp_rast_triangle_32_5, 586848b8605Smrg lp_rast_triangle_32_6, 587848b8605Smrg lp_rast_triangle_32_7, 588848b8605Smrg lp_rast_triangle_32_8, 589848b8605Smrg lp_rast_triangle_32_3_4, 590848b8605Smrg lp_rast_triangle_32_3_16, 591848b8605Smrg lp_rast_triangle_32_4_16 592848b8605Smrg}; 593848b8605Smrg 594848b8605Smrg 595848b8605Smrgstatic void 596848b8605Smrgdo_rasterize_bin(struct lp_rasterizer_task *task, 597848b8605Smrg const struct cmd_bin *bin, 598848b8605Smrg int x, int y) 599848b8605Smrg{ 600848b8605Smrg const struct cmd_block *block; 601848b8605Smrg unsigned k; 602848b8605Smrg 603848b8605Smrg if (0) 604848b8605Smrg lp_debug_bin(bin, x, y); 605848b8605Smrg 606848b8605Smrg for (block = bin->head; block; block = block->next) { 607848b8605Smrg for (k = 0; k < block->count; k++) { 608848b8605Smrg dispatch[block->cmd[k]]( task, block->arg[k] ); 609848b8605Smrg } 610848b8605Smrg } 611848b8605Smrg} 612848b8605Smrg 613848b8605Smrg 614848b8605Smrg 615848b8605Smrg/** 616848b8605Smrg * Rasterize commands for a single bin. 617848b8605Smrg * \param x, y position of the bin's tile in the framebuffer 618848b8605Smrg * Must be called between lp_rast_begin() and lp_rast_end(). 619848b8605Smrg * Called per thread. 620848b8605Smrg */ 621848b8605Smrgstatic void 622848b8605Smrgrasterize_bin(struct lp_rasterizer_task *task, 623848b8605Smrg const struct cmd_bin *bin, int x, int y ) 624848b8605Smrg{ 625848b8605Smrg lp_rast_tile_begin( task, bin, x, y ); 626848b8605Smrg 627848b8605Smrg do_rasterize_bin(task, bin, x, y); 628848b8605Smrg 629848b8605Smrg lp_rast_tile_end(task); 630848b8605Smrg 631848b8605Smrg 632848b8605Smrg /* Debug/Perf flags: 633848b8605Smrg */ 634848b8605Smrg if (bin->head->count == 1) { 635848b8605Smrg if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) 636848b8605Smrg LP_COUNT(nr_pure_shade_opaque_64); 637848b8605Smrg else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE) 638848b8605Smrg LP_COUNT(nr_pure_shade_64); 639848b8605Smrg } 640848b8605Smrg} 641848b8605Smrg 642848b8605Smrg 643848b8605Smrg/* An empty bin is one that just loads the contents of the tile and 644848b8605Smrg * stores them again unchanged. This typically happens when bins have 645848b8605Smrg * been flushed for some reason in the middle of a frame, or when 646848b8605Smrg * incremental updates are being made to a render target. 647848b8605Smrg * 648848b8605Smrg * Try to avoid doing pointless work in this case. 649848b8605Smrg */ 650848b8605Smrgstatic boolean 651848b8605Smrgis_empty_bin( const struct cmd_bin *bin ) 652848b8605Smrg{ 653848b8605Smrg return bin->head == NULL; 654848b8605Smrg} 655848b8605Smrg 656848b8605Smrg 657848b8605Smrg/** 658848b8605Smrg * Rasterize/execute all bins within a scene. 659848b8605Smrg * Called per thread. 660848b8605Smrg */ 661848b8605Smrgstatic void 662848b8605Smrgrasterize_scene(struct lp_rasterizer_task *task, 663848b8605Smrg struct lp_scene *scene) 664848b8605Smrg{ 665848b8605Smrg task->scene = scene; 666848b8605Smrg 667b8e80941Smrg /* Clear the cache tags. This should not always be necessary but 668b8e80941Smrg simpler for now. */ 669b8e80941Smrg#if LP_USE_TEXTURE_CACHE 670b8e80941Smrg memset(task->thread_data.cache->cache_tags, 0, 671b8e80941Smrg sizeof(task->thread_data.cache->cache_tags)); 672b8e80941Smrg#if LP_BUILD_FORMAT_CACHE_DEBUG 673b8e80941Smrg task->thread_data.cache->cache_access_total = 0; 674b8e80941Smrg task->thread_data.cache->cache_access_miss = 0; 675b8e80941Smrg#endif 676b8e80941Smrg#endif 677b8e80941Smrg 678b8e80941Smrg if (!task->rast->no_rast) { 679848b8605Smrg /* loop over scene bins, rasterize each */ 680848b8605Smrg { 681848b8605Smrg struct cmd_bin *bin; 682848b8605Smrg int i, j; 683848b8605Smrg 684848b8605Smrg assert(scene); 685848b8605Smrg while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { 686848b8605Smrg if (!is_empty_bin( bin )) 687848b8605Smrg rasterize_bin(task, bin, i, j); 688848b8605Smrg } 689848b8605Smrg } 690848b8605Smrg } 691848b8605Smrg 692848b8605Smrg 693b8e80941Smrg#if LP_BUILD_FORMAT_CACHE_DEBUG 694b8e80941Smrg { 695b8e80941Smrg uint64_t total, miss; 696b8e80941Smrg total = task->thread_data.cache->cache_access_total; 697b8e80941Smrg miss = task->thread_data.cache->cache_access_miss; 698b8e80941Smrg if (total) { 699b8e80941Smrg debug_printf("thread %d cache access %llu miss %llu hit rate %f\n", 700b8e80941Smrg task->thread_index, (long long unsigned)total, 701b8e80941Smrg (long long unsigned)miss, 702b8e80941Smrg (float)(total - miss)/(float)total); 703b8e80941Smrg } 704b8e80941Smrg } 705b8e80941Smrg#endif 706b8e80941Smrg 707848b8605Smrg if (scene->fence) { 708848b8605Smrg lp_fence_signal(scene->fence); 709848b8605Smrg } 710848b8605Smrg 711848b8605Smrg task->scene = NULL; 712848b8605Smrg} 713848b8605Smrg 714848b8605Smrg 715848b8605Smrg/** 716848b8605Smrg * Called by setup module when it has something for us to render. 717848b8605Smrg */ 718848b8605Smrgvoid 719848b8605Smrglp_rast_queue_scene( struct lp_rasterizer *rast, 720848b8605Smrg struct lp_scene *scene) 721848b8605Smrg{ 722848b8605Smrg LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); 723848b8605Smrg 724848b8605Smrg if (rast->num_threads == 0) { 725848b8605Smrg /* no threading */ 726848b8605Smrg unsigned fpstate = util_fpstate_get(); 727848b8605Smrg 728848b8605Smrg /* Make sure that denorms are treated like zeros. This is 729848b8605Smrg * the behavior required by D3D10. OpenGL doesn't care. 730848b8605Smrg */ 731848b8605Smrg util_fpstate_set_denorms_to_zero(fpstate); 732848b8605Smrg 733848b8605Smrg lp_rast_begin( rast, scene ); 734848b8605Smrg 735848b8605Smrg rasterize_scene( &rast->tasks[0], scene ); 736848b8605Smrg 737848b8605Smrg lp_rast_end( rast ); 738848b8605Smrg 739848b8605Smrg util_fpstate_set(fpstate); 740848b8605Smrg 741848b8605Smrg rast->curr_scene = NULL; 742848b8605Smrg } 743848b8605Smrg else { 744848b8605Smrg /* threaded rendering! */ 745848b8605Smrg unsigned i; 746848b8605Smrg 747848b8605Smrg lp_scene_enqueue( rast->full_scenes, scene ); 748848b8605Smrg 749848b8605Smrg /* signal the threads that there's work to do */ 750848b8605Smrg for (i = 0; i < rast->num_threads; i++) { 751848b8605Smrg pipe_semaphore_signal(&rast->tasks[i].work_ready); 752848b8605Smrg } 753848b8605Smrg } 754848b8605Smrg 755848b8605Smrg LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); 756848b8605Smrg} 757848b8605Smrg 758848b8605Smrg 759848b8605Smrgvoid 760848b8605Smrglp_rast_finish( struct lp_rasterizer *rast ) 761848b8605Smrg{ 762848b8605Smrg if (rast->num_threads == 0) { 763848b8605Smrg /* nothing to do */ 764848b8605Smrg } 765848b8605Smrg else { 766848b8605Smrg int i; 767848b8605Smrg 768848b8605Smrg /* wait for work to complete */ 769848b8605Smrg for (i = 0; i < rast->num_threads; i++) { 770848b8605Smrg pipe_semaphore_wait(&rast->tasks[i].work_done); 771848b8605Smrg } 772848b8605Smrg } 773848b8605Smrg} 774848b8605Smrg 775848b8605Smrg 776848b8605Smrg/** 777848b8605Smrg * This is the thread's main entrypoint. 778848b8605Smrg * It's a simple loop: 779848b8605Smrg * 1. wait for work 780848b8605Smrg * 2. do work 781848b8605Smrg * 3. signal that we're done 782848b8605Smrg */ 783b8e80941Smrgstatic int 784b8e80941Smrgthread_function(void *init_data) 785848b8605Smrg{ 786848b8605Smrg struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; 787848b8605Smrg struct lp_rasterizer *rast = task->rast; 788848b8605Smrg boolean debug = false; 789b8e80941Smrg char thread_name[16]; 790b8e80941Smrg unsigned fpstate; 791b8e80941Smrg 792b8e80941Smrg util_snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index); 793b8e80941Smrg u_thread_setname(thread_name); 794848b8605Smrg 795848b8605Smrg /* Make sure that denorms are treated like zeros. This is 796848b8605Smrg * the behavior required by D3D10. OpenGL doesn't care. 797848b8605Smrg */ 798b8e80941Smrg fpstate = util_fpstate_get(); 799848b8605Smrg util_fpstate_set_denorms_to_zero(fpstate); 800848b8605Smrg 801848b8605Smrg while (1) { 802848b8605Smrg /* wait for work */ 803848b8605Smrg if (debug) 804848b8605Smrg debug_printf("thread %d waiting for work\n", task->thread_index); 805848b8605Smrg pipe_semaphore_wait(&task->work_ready); 806848b8605Smrg 807848b8605Smrg if (rast->exit_flag) 808848b8605Smrg break; 809848b8605Smrg 810848b8605Smrg if (task->thread_index == 0) { 811848b8605Smrg /* thread[0]: 812848b8605Smrg * - get next scene to rasterize 813848b8605Smrg * - map the framebuffer surfaces 814848b8605Smrg */ 815848b8605Smrg lp_rast_begin( rast, 816848b8605Smrg lp_scene_dequeue( rast->full_scenes, TRUE ) ); 817848b8605Smrg } 818848b8605Smrg 819848b8605Smrg /* Wait for all threads to get here so that threads[1+] don't 820848b8605Smrg * get a null rast->curr_scene pointer. 821848b8605Smrg */ 822b8e80941Smrg util_barrier_wait( &rast->barrier ); 823848b8605Smrg 824848b8605Smrg /* do work */ 825848b8605Smrg if (debug) 826848b8605Smrg debug_printf("thread %d doing work\n", task->thread_index); 827848b8605Smrg 828848b8605Smrg rasterize_scene(task, 829848b8605Smrg rast->curr_scene); 830848b8605Smrg 831848b8605Smrg /* wait for all threads to finish with this scene */ 832b8e80941Smrg util_barrier_wait( &rast->barrier ); 833848b8605Smrg 834848b8605Smrg /* XXX: shouldn't be necessary: 835848b8605Smrg */ 836848b8605Smrg if (task->thread_index == 0) { 837848b8605Smrg lp_rast_end( rast ); 838848b8605Smrg } 839848b8605Smrg 840848b8605Smrg /* signal done with work */ 841848b8605Smrg if (debug) 842848b8605Smrg debug_printf("thread %d done working\n", task->thread_index); 843848b8605Smrg 844848b8605Smrg pipe_semaphore_signal(&task->work_done); 845848b8605Smrg } 846848b8605Smrg 847848b8605Smrg#ifdef _WIN32 848848b8605Smrg pipe_semaphore_signal(&task->work_done); 849848b8605Smrg#endif 850848b8605Smrg 851848b8605Smrg return 0; 852848b8605Smrg} 853848b8605Smrg 854848b8605Smrg 855848b8605Smrg/** 856848b8605Smrg * Initialize semaphores and spawn the threads. 857848b8605Smrg */ 858848b8605Smrgstatic void 859848b8605Smrgcreate_rast_threads(struct lp_rasterizer *rast) 860848b8605Smrg{ 861848b8605Smrg unsigned i; 862848b8605Smrg 863848b8605Smrg /* NOTE: if num_threads is zero, we won't use any threads */ 864848b8605Smrg for (i = 0; i < rast->num_threads; i++) { 865848b8605Smrg pipe_semaphore_init(&rast->tasks[i].work_ready, 0); 866848b8605Smrg pipe_semaphore_init(&rast->tasks[i].work_done, 0); 867b8e80941Smrg rast->threads[i] = u_thread_create(thread_function, 868848b8605Smrg (void *) &rast->tasks[i]); 869848b8605Smrg } 870848b8605Smrg} 871848b8605Smrg 872848b8605Smrg 873848b8605Smrg 874848b8605Smrg/** 875848b8605Smrg * Create new lp_rasterizer. If num_threads is zero, don't create any 876848b8605Smrg * new threads, do rendering synchronously. 877848b8605Smrg * \param num_threads number of rasterizer threads to create 878848b8605Smrg */ 879848b8605Smrgstruct lp_rasterizer * 880848b8605Smrglp_rast_create( unsigned num_threads ) 881848b8605Smrg{ 882848b8605Smrg struct lp_rasterizer *rast; 883848b8605Smrg unsigned i; 884848b8605Smrg 885848b8605Smrg rast = CALLOC_STRUCT(lp_rasterizer); 886848b8605Smrg if (!rast) { 887848b8605Smrg goto no_rast; 888848b8605Smrg } 889848b8605Smrg 890848b8605Smrg rast->full_scenes = lp_scene_queue_create(); 891848b8605Smrg if (!rast->full_scenes) { 892848b8605Smrg goto no_full_scenes; 893848b8605Smrg } 894848b8605Smrg 895b8e80941Smrg for (i = 0; i < MAX2(1, num_threads); i++) { 896848b8605Smrg struct lp_rasterizer_task *task = &rast->tasks[i]; 897848b8605Smrg task->rast = rast; 898848b8605Smrg task->thread_index = i; 899b8e80941Smrg task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache), 900b8e80941Smrg 16); 901b8e80941Smrg if (!task->thread_data.cache) { 902b8e80941Smrg goto no_thread_data_cache; 903b8e80941Smrg } 904848b8605Smrg } 905848b8605Smrg 906848b8605Smrg rast->num_threads = num_threads; 907848b8605Smrg 908848b8605Smrg rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE); 909848b8605Smrg 910848b8605Smrg create_rast_threads(rast); 911848b8605Smrg 912848b8605Smrg /* for synchronizing rasterization threads */ 913b8e80941Smrg if (rast->num_threads > 0) { 914b8e80941Smrg util_barrier_init( &rast->barrier, rast->num_threads ); 915b8e80941Smrg } 916848b8605Smrg 917848b8605Smrg memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); 918848b8605Smrg 919848b8605Smrg return rast; 920848b8605Smrg 921b8e80941Smrgno_thread_data_cache: 922b8e80941Smrg for (i = 0; i < MAX2(1, rast->num_threads); i++) { 923b8e80941Smrg if (rast->tasks[i].thread_data.cache) { 924b8e80941Smrg align_free(rast->tasks[i].thread_data.cache); 925b8e80941Smrg } 926b8e80941Smrg } 927b8e80941Smrg 928b8e80941Smrg lp_scene_queue_destroy(rast->full_scenes); 929848b8605Smrgno_full_scenes: 930848b8605Smrg FREE(rast); 931848b8605Smrgno_rast: 932848b8605Smrg return NULL; 933848b8605Smrg} 934848b8605Smrg 935848b8605Smrg 936848b8605Smrg/* Shutdown: 937848b8605Smrg */ 938848b8605Smrgvoid lp_rast_destroy( struct lp_rasterizer *rast ) 939848b8605Smrg{ 940848b8605Smrg unsigned i; 941848b8605Smrg 942848b8605Smrg /* Set exit_flag and signal each thread's work_ready semaphore. 943848b8605Smrg * Each thread will be woken up, notice that the exit_flag is set and 944848b8605Smrg * break out of its main loop. The thread will then exit. 945848b8605Smrg */ 946848b8605Smrg rast->exit_flag = TRUE; 947848b8605Smrg for (i = 0; i < rast->num_threads; i++) { 948848b8605Smrg pipe_semaphore_signal(&rast->tasks[i].work_ready); 949848b8605Smrg } 950848b8605Smrg 951848b8605Smrg /* Wait for threads to terminate before cleaning up per-thread data. 952848b8605Smrg * We don't actually call pipe_thread_wait to avoid dead lock on Windows 953848b8605Smrg * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */ 954848b8605Smrg for (i = 0; i < rast->num_threads; i++) { 955848b8605Smrg#ifdef _WIN32 956848b8605Smrg pipe_semaphore_wait(&rast->tasks[i].work_done); 957848b8605Smrg#else 958b8e80941Smrg thrd_join(rast->threads[i], NULL); 959848b8605Smrg#endif 960848b8605Smrg } 961848b8605Smrg 962848b8605Smrg /* Clean up per-thread data */ 963848b8605Smrg for (i = 0; i < rast->num_threads; i++) { 964848b8605Smrg pipe_semaphore_destroy(&rast->tasks[i].work_ready); 965848b8605Smrg pipe_semaphore_destroy(&rast->tasks[i].work_done); 966848b8605Smrg } 967b8e80941Smrg for (i = 0; i < MAX2(1, rast->num_threads); i++) { 968b8e80941Smrg align_free(rast->tasks[i].thread_data.cache); 969b8e80941Smrg } 970848b8605Smrg 971848b8605Smrg /* for synchronizing rasterization threads */ 972b8e80941Smrg if (rast->num_threads > 0) { 973b8e80941Smrg util_barrier_destroy( &rast->barrier ); 974b8e80941Smrg } 975848b8605Smrg 976848b8605Smrg lp_scene_queue_destroy(rast->full_scenes); 977848b8605Smrg 978848b8605Smrg FREE(rast); 979848b8605Smrg} 980848b8605Smrg 981848b8605Smrg 982