1cdc920a0Smrg/************************************************************************** 2cdc920a0Smrg * 3cdc920a0Smrg * Copyright 2009 VMware, Inc. 4cdc920a0Smrg * All Rights Reserved. 5cdc920a0Smrg * 6cdc920a0Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7cdc920a0Smrg * copy of this software and associated documentation files (the 8cdc920a0Smrg * "Software"), to deal in the Software without restriction, including 9cdc920a0Smrg * without limitation the rights to use, copy, modify, merge, publish, 10cdc920a0Smrg * distribute, sub license, and/or sell copies of the Software, and to 11cdc920a0Smrg * permit persons to whom the Software is furnished to do so, subject to 12cdc920a0Smrg * the following conditions: 13cdc920a0Smrg * 14cdc920a0Smrg * The above copyright notice and this permission notice (including the 15cdc920a0Smrg * next paragraph) shall be included in all copies or substantial portions 16cdc920a0Smrg * of the Software. 17cdc920a0Smrg * 18cdc920a0Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19cdc920a0Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20cdc920a0Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21cdc920a0Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22cdc920a0Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23cdc920a0Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24cdc920a0Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25cdc920a0Smrg * 26cdc920a0Smrg **************************************************************************/ 27cdc920a0Smrg 28cdc920a0Smrg#include <limits.h> 29cdc920a0Smrg#include "util/u_memory.h" 30cdc920a0Smrg#include "util/u_math.h" 313464ebd5Sriastradh#include "util/u_rect.h" 32cdc920a0Smrg#include "util/u_surface.h" 333464ebd5Sriastradh#include "util/u_pack_color.h" 3401e04c3fSmrg#include "util/u_string.h" 3501e04c3fSmrg#include "util/u_thread.h" 367ec681f3Smrg#include "util/u_memset.h" 3701e04c3fSmrg#include "util/os_time.h" 38af69d88dSmrg 39cdc920a0Smrg#include "lp_scene_queue.h" 40af69d88dSmrg#include "lp_context.h" 41cdc920a0Smrg#include "lp_debug.h" 42cdc920a0Smrg#include "lp_fence.h" 43cdc920a0Smrg#include "lp_perf.h" 443464ebd5Sriastradh#include "lp_query.h" 45cdc920a0Smrg#include "lp_rast.h" 46cdc920a0Smrg#include "lp_rast_priv.h" 4701e04c3fSmrg#include "gallivm/lp_bld_format.h" 48cdc920a0Smrg#include "gallivm/lp_bld_debug.h" 49cdc920a0Smrg#include "lp_scene.h" 50af69d88dSmrg#include "lp_tex_sample.h" 51cdc920a0Smrg 52cdc920a0Smrg 533464ebd5Sriastradh#ifdef DEBUG 543464ebd5Sriastradhint jit_line = 0; 553464ebd5Sriastradhconst struct lp_rast_state *jit_state = NULL; 563464ebd5Sriastradhconst struct lp_rasterizer_task *jit_task = NULL; 573464ebd5Sriastradh#endif 583464ebd5Sriastradh 597ec681f3Smrgconst float lp_sample_pos_4x[4][2] = { { 0.375, 0.125 }, 607ec681f3Smrg { 0.875, 0.375 }, 617ec681f3Smrg { 0.125, 0.625 }, 627ec681f3Smrg { 0.625, 0.875 } }; 633464ebd5Sriastradh 643464ebd5Sriastradh/** 653464ebd5Sriastradh * Begin rasterizing a scene. 663464ebd5Sriastradh * Called once per scene by one thread. 67cdc920a0Smrg */ 683464ebd5Sriastradhstatic void 69cdc920a0Smrglp_rast_begin( struct lp_rasterizer *rast, 70cdc920a0Smrg struct lp_scene *scene ) 71cdc920a0Smrg{ 72cdc920a0Smrg rast->curr_scene = scene; 73cdc920a0Smrg 74cdc920a0Smrg LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 75cdc920a0Smrg 763464ebd5Sriastradh lp_scene_begin_rasterization( scene ); 77cdc920a0Smrg lp_scene_bin_iter_begin( scene ); 78cdc920a0Smrg} 79cdc920a0Smrg 80cdc920a0Smrg 81cdc920a0Smrgstatic void 82cdc920a0Smrglp_rast_end( struct lp_rasterizer *rast ) 83cdc920a0Smrg{ 843464ebd5Sriastradh lp_scene_end_rasterization( rast->curr_scene ); 85cdc920a0Smrg 86cdc920a0Smrg rast->curr_scene = NULL; 87cdc920a0Smrg} 88cdc920a0Smrg 893464ebd5Sriastradh 90cdc920a0Smrg/** 9101e04c3fSmrg * Beginning rasterization of a tile. 92cdc920a0Smrg * \param x window X position of the tile, in pixels 93cdc920a0Smrg * \param y window Y position of the tile, in pixels 94cdc920a0Smrg */ 95cdc920a0Smrgstatic void 963464ebd5Sriastradhlp_rast_tile_begin(struct lp_rasterizer_task *task, 97af69d88dSmrg const struct cmd_bin *bin, 98af69d88dSmrg int x, int y) 99cdc920a0Smrg{ 10001e04c3fSmrg unsigned i; 10101e04c3fSmrg struct lp_scene *scene = task->scene; 10201e04c3fSmrg 103af69d88dSmrg LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); 1043464ebd5Sriastradh 1053464ebd5Sriastradh task->bin = bin; 106af69d88dSmrg task->x = x * TILE_SIZE; 107af69d88dSmrg task->y = y * TILE_SIZE; 108af69d88dSmrg task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ? 109af69d88dSmrg task->scene->fb.width - x * TILE_SIZE : TILE_SIZE; 110af69d88dSmrg task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ? 111af69d88dSmrg task->scene->fb.height - y * TILE_SIZE : TILE_SIZE; 1123464ebd5Sriastradh 113af69d88dSmrg task->thread_data.vis_counter = 0; 11401e04c3fSmrg task->thread_data.ps_invocations = 0; 1153464ebd5Sriastradh 11601e04c3fSmrg for (i = 0; i < task->scene->fb.nr_cbufs; i++) { 11701e04c3fSmrg if (task->scene->fb.cbufs[i]) { 11801e04c3fSmrg task->color_tiles[i] = scene->cbufs[i].map + 11901e04c3fSmrg scene->cbufs[i].stride * task->y + 12001e04c3fSmrg scene->cbufs[i].format_bytes * task->x; 12101e04c3fSmrg } 12201e04c3fSmrg } 12301e04c3fSmrg if (task->scene->fb.zsbuf) { 12401e04c3fSmrg task->depth_tile = scene->zsbuf.map + 12501e04c3fSmrg scene->zsbuf.stride * task->y + 12601e04c3fSmrg scene->zsbuf.format_bytes * task->x; 12701e04c3fSmrg } 128cdc920a0Smrg} 129cdc920a0Smrg 130cdc920a0Smrg 131cdc920a0Smrg/** 132cdc920a0Smrg * Clear the rasterizer's current color tile. 133cdc920a0Smrg * This is a bin command called during bin processing. 134af69d88dSmrg * Clear commands always clear all bound layers. 135cdc920a0Smrg */ 1363464ebd5Sriastradhstatic void 137cdc920a0Smrglp_rast_clear_color(struct lp_rasterizer_task *task, 138cdc920a0Smrg const union lp_rast_cmd_arg arg) 139cdc920a0Smrg{ 1403464ebd5Sriastradh const struct lp_scene *scene = task->scene; 141af69d88dSmrg unsigned cbuf = arg.clear_rb->cbuf; 142af69d88dSmrg union util_color uc; 143af69d88dSmrg enum pipe_format format; 1443464ebd5Sriastradh 145af69d88dSmrg /* we never bin clear commands for non-existing buffers */ 146af69d88dSmrg assert(cbuf < scene->fb.nr_cbufs); 147af69d88dSmrg assert(scene->fb.cbufs[cbuf]); 148cdc920a0Smrg 149af69d88dSmrg format = scene->fb.cbufs[cbuf]->format; 150af69d88dSmrg uc = arg.clear_rb->color_val; 151cdc920a0Smrg 152af69d88dSmrg /* 153af69d88dSmrg * this is pretty rough since we have target format (bunch of bytes...) here. 154af69d88dSmrg * dump it as raw 4 dwords. 155af69d88dSmrg */ 156af69d88dSmrg LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n", 157af69d88dSmrg __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]); 158af69d88dSmrg 1597ec681f3Smrg for (unsigned s = 0; s < scene->cbufs[cbuf].nr_samples; s++) { 1607ec681f3Smrg void *map = (char *)scene->cbufs[cbuf].map + scene->cbufs[cbuf].sample_stride * s; 1617ec681f3Smrg util_fill_box(map, 1627ec681f3Smrg format, 1637ec681f3Smrg scene->cbufs[cbuf].stride, 1647ec681f3Smrg scene->cbufs[cbuf].layer_stride, 1657ec681f3Smrg task->x, 1667ec681f3Smrg task->y, 1677ec681f3Smrg 0, 1687ec681f3Smrg task->width, 1697ec681f3Smrg task->height, 1707ec681f3Smrg scene->fb_max_layer + 1, 1717ec681f3Smrg &uc); 1727ec681f3Smrg } 173af69d88dSmrg 174af69d88dSmrg /* this will increase for each rb which probably doesn't mean much */ 175cdc920a0Smrg LP_COUNT(nr_color_tile_clear); 176cdc920a0Smrg} 177cdc920a0Smrg 178cdc920a0Smrg 179cdc920a0Smrg/** 180cdc920a0Smrg * Clear the rasterizer's current z/stencil tile. 181cdc920a0Smrg * This is a bin command called during bin processing. 182af69d88dSmrg * Clear commands always clear all bound layers. 183cdc920a0Smrg */ 1843464ebd5Sriastradhstatic void 185cdc920a0Smrglp_rast_clear_zstencil(struct lp_rasterizer_task *task, 186cdc920a0Smrg const union lp_rast_cmd_arg arg) 187cdc920a0Smrg{ 1883464ebd5Sriastradh const struct lp_scene *scene = task->scene; 189af69d88dSmrg uint64_t clear_value64 = arg.clear_zstencil.value; 190af69d88dSmrg uint64_t clear_mask64 = arg.clear_zstencil.mask; 191af69d88dSmrg uint32_t clear_value = (uint32_t) clear_value64; 192af69d88dSmrg uint32_t clear_mask = (uint32_t) clear_mask64; 193af69d88dSmrg const unsigned height = task->height; 194af69d88dSmrg const unsigned width = task->width; 195af69d88dSmrg const unsigned dst_stride = scene->zsbuf.stride; 196cdc920a0Smrg uint8_t *dst; 197cdc920a0Smrg unsigned i, j; 198af69d88dSmrg unsigned block_size; 199cdc920a0Smrg 2003464ebd5Sriastradh LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", 2013464ebd5Sriastradh __FUNCTION__, clear_value, clear_mask); 202cdc920a0Smrg 203cdc920a0Smrg /* 204af69d88dSmrg * Clear the area of the depth/depth buffer matching this tile. 205cdc920a0Smrg */ 206cdc920a0Smrg 207af69d88dSmrg if (scene->fb.zsbuf) { 208af69d88dSmrg unsigned layer; 209af69d88dSmrg 2107ec681f3Smrg for (unsigned s = 0; s < scene->zsbuf.nr_samples; s++) { 2117ec681f3Smrg uint8_t *dst_layer = task->depth_tile + (s * scene->zsbuf.sample_stride); 2127ec681f3Smrg block_size = util_format_get_blocksize(scene->fb.zsbuf->format); 213af69d88dSmrg 2147ec681f3Smrg clear_value &= clear_mask; 215af69d88dSmrg 2167ec681f3Smrg for (layer = 0; layer <= scene->fb_max_layer; layer++) { 2177ec681f3Smrg dst = dst_layer; 2187ec681f3Smrg 2197ec681f3Smrg switch (block_size) { 2207ec681f3Smrg case 1: 2217ec681f3Smrg assert(clear_mask == 0xff); 222af69d88dSmrg for (i = 0; i < height; i++) { 2237ec681f3Smrg uint8_t *row = (uint8_t *)dst; 2247ec681f3Smrg memset(row, (uint8_t) clear_value, width); 225af69d88dSmrg dst += dst_stride; 226af69d88dSmrg } 2277ec681f3Smrg break; 2287ec681f3Smrg case 2: 2297ec681f3Smrg if (clear_mask == 0xffff) { 2307ec681f3Smrg for (i = 0; i < height; i++) { 2317ec681f3Smrg uint16_t *row = (uint16_t *)dst; 2327ec681f3Smrg for (j = 0; j < width; j++) 2337ec681f3Smrg *row++ = (uint16_t) clear_value; 2347ec681f3Smrg dst += dst_stride; 235af69d88dSmrg } 236af69d88dSmrg } 2377ec681f3Smrg else { 2387ec681f3Smrg for (i = 0; i < height; i++) { 2397ec681f3Smrg uint16_t *row = (uint16_t *)dst; 2407ec681f3Smrg for (j = 0; j < width; j++) { 2417ec681f3Smrg uint16_t tmp = ~clear_mask & *row; 2427ec681f3Smrg *row++ = clear_value | tmp; 2437ec681f3Smrg } 2447ec681f3Smrg dst += dst_stride; 2457ec681f3Smrg } 246af69d88dSmrg } 2477ec681f3Smrg break; 2487ec681f3Smrg case 4: 2497ec681f3Smrg if (clear_mask == 0xffffffff) { 2507ec681f3Smrg for (i = 0; i < height; i++) { 2517ec681f3Smrg util_memset32(dst, clear_value, width); 2527ec681f3Smrg dst += dst_stride; 253af69d88dSmrg } 254af69d88dSmrg } 2557ec681f3Smrg else { 2567ec681f3Smrg for (i = 0; i < height; i++) { 2577ec681f3Smrg uint32_t *row = (uint32_t *)dst; 2587ec681f3Smrg for (j = 0; j < width; j++) { 2597ec681f3Smrg uint32_t tmp = ~clear_mask & *row; 2607ec681f3Smrg *row++ = clear_value | tmp; 2617ec681f3Smrg } 2627ec681f3Smrg dst += dst_stride; 2637ec681f3Smrg } 264af69d88dSmrg } 2657ec681f3Smrg break; 2667ec681f3Smrg case 8: 2677ec681f3Smrg clear_value64 &= clear_mask64; 2687ec681f3Smrg if (clear_mask64 == 0xffffffffffULL) { 2697ec681f3Smrg for (i = 0; i < height; i++) { 2707ec681f3Smrg util_memset64(dst, clear_value64, width); 2717ec681f3Smrg dst += dst_stride; 272af69d88dSmrg } 273af69d88dSmrg } 2747ec681f3Smrg else { 2757ec681f3Smrg for (i = 0; i < height; i++) { 2767ec681f3Smrg uint64_t *row = (uint64_t *)dst; 2777ec681f3Smrg for (j = 0; j < width; j++) { 2787ec681f3Smrg uint64_t tmp = ~clear_mask64 & *row; 2797ec681f3Smrg *row++ = clear_value64 | tmp; 2807ec681f3Smrg } 2817ec681f3Smrg dst += dst_stride; 2827ec681f3Smrg } 2837ec681f3Smrg } 2847ec681f3Smrg break; 285af69d88dSmrg 2867ec681f3Smrg default: 2877ec681f3Smrg assert(0); 2887ec681f3Smrg break; 2897ec681f3Smrg } 2907ec681f3Smrg dst_layer += scene->zsbuf.layer_stride; 2913464ebd5Sriastradh } 292cdc920a0Smrg } 293cdc920a0Smrg } 294cdc920a0Smrg} 295cdc920a0Smrg 296cdc920a0Smrg 297cdc920a0Smrg 298cdc920a0Smrg/** 299cdc920a0Smrg * Run the shader on all blocks in a tile. This is used when a tile is 300cdc920a0Smrg * completely contained inside a triangle. 301cdc920a0Smrg * This is a bin command called during bin processing. 302cdc920a0Smrg */ 3033464ebd5Sriastradhstatic void 304cdc920a0Smrglp_rast_shade_tile(struct lp_rasterizer_task *task, 305cdc920a0Smrg const union lp_rast_cmd_arg arg) 306cdc920a0Smrg{ 3073464ebd5Sriastradh const struct lp_scene *scene = task->scene; 308cdc920a0Smrg const struct lp_rast_shader_inputs *inputs = arg.shade_tile; 309af69d88dSmrg const struct lp_rast_state *state; 310af69d88dSmrg struct lp_fragment_shader_variant *variant; 311cdc920a0Smrg const unsigned tile_x = task->x, tile_y = task->y; 312cdc920a0Smrg unsigned x, y; 313cdc920a0Smrg 3143464ebd5Sriastradh if (inputs->disable) { 3153464ebd5Sriastradh /* This command was partially binned and has been disabled */ 3163464ebd5Sriastradh return; 3173464ebd5Sriastradh } 3183464ebd5Sriastradh 319cdc920a0Smrg LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 320cdc920a0Smrg 321af69d88dSmrg state = task->state; 322af69d88dSmrg assert(state); 323af69d88dSmrg if (!state) { 324af69d88dSmrg return; 325af69d88dSmrg } 326af69d88dSmrg variant = state->variant; 327af69d88dSmrg 328cdc920a0Smrg /* render the whole 64x64 tile in 4x4 chunks */ 329af69d88dSmrg for (y = 0; y < task->height; y += 4){ 330af69d88dSmrg for (x = 0; x < task->width; x += 4) { 331cdc920a0Smrg uint8_t *color[PIPE_MAX_COLOR_BUFS]; 332af69d88dSmrg unsigned stride[PIPE_MAX_COLOR_BUFS]; 3337ec681f3Smrg unsigned sample_stride[PIPE_MAX_COLOR_BUFS]; 334af69d88dSmrg uint8_t *depth = NULL; 335af69d88dSmrg unsigned depth_stride = 0; 3367ec681f3Smrg unsigned depth_sample_stride = 0; 3373464ebd5Sriastradh unsigned i; 338cdc920a0Smrg 339cdc920a0Smrg /* color buffer */ 340af69d88dSmrg for (i = 0; i < scene->fb.nr_cbufs; i++){ 341af69d88dSmrg if (scene->fb.cbufs[i]) { 342af69d88dSmrg stride[i] = scene->cbufs[i].stride; 3437ec681f3Smrg sample_stride[i] = scene->cbufs[i].sample_stride; 344af69d88dSmrg color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, 3457ec681f3Smrg tile_y + y, inputs->layer + inputs->view_index); 346af69d88dSmrg } 347af69d88dSmrg else { 348af69d88dSmrg stride[i] = 0; 3497ec681f3Smrg sample_stride[i] = 0; 350af69d88dSmrg color[i] = NULL; 351af69d88dSmrg } 352af69d88dSmrg } 353cdc920a0Smrg 354cdc920a0Smrg /* depth buffer */ 355af69d88dSmrg if (scene->zsbuf.map) { 356af69d88dSmrg depth = lp_rast_get_depth_block_pointer(task, tile_x + x, 3577ec681f3Smrg tile_y + y, inputs->layer + inputs->view_index); 358af69d88dSmrg depth_stride = scene->zsbuf.stride; 3597ec681f3Smrg depth_sample_stride = scene->zsbuf.sample_stride; 360af69d88dSmrg } 361af69d88dSmrg 3627ec681f3Smrg uint64_t mask = 0; 3637ec681f3Smrg for (unsigned i = 0; i < scene->fb_max_samples; i++) 3647ec681f3Smrg mask |= (uint64_t)(0xffff) << (16 * i); 3657ec681f3Smrg 366af69d88dSmrg /* Propagate non-interpolated raster state. */ 367af69d88dSmrg task->thread_data.raster_state.viewport_index = inputs->viewport_index; 3687ec681f3Smrg task->thread_data.raster_state.view_index = inputs->view_index; 3693464ebd5Sriastradh 3703464ebd5Sriastradh /* run shader on 4x4 block */ 3713464ebd5Sriastradh BEGIN_JIT_CALL(state, task); 3723464ebd5Sriastradh variant->jit_function[RAST_WHOLE]( &state->jit_context, 3733464ebd5Sriastradh tile_x + x, tile_y + y, 3743464ebd5Sriastradh inputs->frontfacing, 3753464ebd5Sriastradh GET_A0(inputs), 3763464ebd5Sriastradh GET_DADX(inputs), 3773464ebd5Sriastradh GET_DADY(inputs), 3783464ebd5Sriastradh color, 3793464ebd5Sriastradh depth, 3807ec681f3Smrg mask, 381af69d88dSmrg &task->thread_data, 382af69d88dSmrg stride, 3837ec681f3Smrg depth_stride, 3847ec681f3Smrg sample_stride, 3857ec681f3Smrg depth_sample_stride); 3863464ebd5Sriastradh END_JIT_CALL(); 387cdc920a0Smrg } 388cdc920a0Smrg } 389cdc920a0Smrg} 390cdc920a0Smrg 391cdc920a0Smrg 392cdc920a0Smrg/** 3933464ebd5Sriastradh * Run the shader on all blocks in a tile. This is used when a tile is 3943464ebd5Sriastradh * completely contained inside a triangle, and the shader is opaque. 395cdc920a0Smrg * This is a bin command called during bin processing. 396cdc920a0Smrg */ 3973464ebd5Sriastradhstatic void 3983464ebd5Sriastradhlp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, 3993464ebd5Sriastradh const union lp_rast_cmd_arg arg) 400cdc920a0Smrg{ 4013464ebd5Sriastradh LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 4023464ebd5Sriastradh 403af69d88dSmrg assert(task->state); 404af69d88dSmrg if (!task->state) { 405af69d88dSmrg return; 4063464ebd5Sriastradh } 4073464ebd5Sriastradh 4083464ebd5Sriastradh lp_rast_shade_tile(task, arg); 4093464ebd5Sriastradh} 4103464ebd5Sriastradh 4113464ebd5Sriastradh 4123464ebd5Sriastradh/** 4133464ebd5Sriastradh * Compute shading for a 4x4 block of pixels inside a triangle. 4143464ebd5Sriastradh * This is a bin command called during bin processing. 4153464ebd5Sriastradh * \param x X position of quad in window coords 4163464ebd5Sriastradh * \param y Y position of quad in window coords 4173464ebd5Sriastradh */ 4183464ebd5Sriastradhvoid 4197ec681f3Smrglp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task, 4207ec681f3Smrg const struct lp_rast_shader_inputs *inputs, 4217ec681f3Smrg unsigned x, unsigned y, 4227ec681f3Smrg uint64_t mask) 4233464ebd5Sriastradh{ 4243464ebd5Sriastradh const struct lp_rast_state *state = task->state; 4253464ebd5Sriastradh struct lp_fragment_shader_variant *variant = state->variant; 4263464ebd5Sriastradh const struct lp_scene *scene = task->scene; 427cdc920a0Smrg uint8_t *color[PIPE_MAX_COLOR_BUFS]; 428af69d88dSmrg unsigned stride[PIPE_MAX_COLOR_BUFS]; 4297ec681f3Smrg unsigned sample_stride[PIPE_MAX_COLOR_BUFS]; 430af69d88dSmrg uint8_t *depth = NULL; 431af69d88dSmrg unsigned depth_stride = 0; 4327ec681f3Smrg unsigned depth_sample_stride = 0; 433cdc920a0Smrg unsigned i; 434cdc920a0Smrg 435cdc920a0Smrg assert(state); 436cdc920a0Smrg 437cdc920a0Smrg /* Sanity checks */ 438af69d88dSmrg assert(x < scene->tiles_x * TILE_SIZE); 439af69d88dSmrg assert(y < scene->tiles_y * TILE_SIZE); 440cdc920a0Smrg assert(x % TILE_VECTOR_WIDTH == 0); 441cdc920a0Smrg assert(y % TILE_VECTOR_HEIGHT == 0); 442cdc920a0Smrg 443cdc920a0Smrg assert((x % 4) == 0); 444cdc920a0Smrg assert((y % 4) == 0); 445cdc920a0Smrg 446cdc920a0Smrg /* color buffer */ 4473464ebd5Sriastradh for (i = 0; i < scene->fb.nr_cbufs; i++) { 448af69d88dSmrg if (scene->fb.cbufs[i]) { 449af69d88dSmrg stride[i] = scene->cbufs[i].stride; 4507ec681f3Smrg sample_stride[i] = scene->cbufs[i].sample_stride; 451af69d88dSmrg color[i] = lp_rast_get_color_block_pointer(task, i, x, y, 4527ec681f3Smrg inputs->layer + inputs->view_index); 453af69d88dSmrg } 454af69d88dSmrg else { 455af69d88dSmrg stride[i] = 0; 4567ec681f3Smrg sample_stride[i] = 0; 457af69d88dSmrg color[i] = NULL; 458af69d88dSmrg } 4593464ebd5Sriastradh } 460cdc920a0Smrg 461cdc920a0Smrg /* depth buffer */ 462af69d88dSmrg if (scene->zsbuf.map) { 463af69d88dSmrg depth_stride = scene->zsbuf.stride; 4647ec681f3Smrg depth_sample_stride = scene->zsbuf.sample_stride; 4657ec681f3Smrg depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer + inputs->view_index); 466af69d88dSmrg } 467af69d88dSmrg 468af69d88dSmrg assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); 469af69d88dSmrg 470af69d88dSmrg /* 471af69d88dSmrg * The rasterizer may produce fragments outside our 472af69d88dSmrg * allocated 4x4 blocks hence need to filter them out here. 473af69d88dSmrg */ 474af69d88dSmrg if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { 475af69d88dSmrg /* Propagate non-interpolated raster state. */ 476af69d88dSmrg task->thread_data.raster_state.viewport_index = inputs->viewport_index; 4777ec681f3Smrg task->thread_data.raster_state.view_index = inputs->view_index; 478af69d88dSmrg 479af69d88dSmrg /* run shader on 4x4 block */ 480af69d88dSmrg BEGIN_JIT_CALL(state, task); 481af69d88dSmrg variant->jit_function[RAST_EDGE_TEST](&state->jit_context, 482af69d88dSmrg x, y, 483af69d88dSmrg inputs->frontfacing, 484af69d88dSmrg GET_A0(inputs), 485af69d88dSmrg GET_DADX(inputs), 486af69d88dSmrg GET_DADY(inputs), 487af69d88dSmrg color, 488af69d88dSmrg depth, 489af69d88dSmrg mask, 490af69d88dSmrg &task->thread_data, 491af69d88dSmrg stride, 4927ec681f3Smrg depth_stride, 4937ec681f3Smrg sample_stride, 4947ec681f3Smrg depth_sample_stride); 495af69d88dSmrg END_JIT_CALL(); 496af69d88dSmrg } 497cdc920a0Smrg} 498cdc920a0Smrg 4997ec681f3Smrgvoid 5007ec681f3Smrglp_rast_shade_quads_mask(struct lp_rasterizer_task *task, 5017ec681f3Smrg const struct lp_rast_shader_inputs *inputs, 5027ec681f3Smrg unsigned x, unsigned y, 5037ec681f3Smrg unsigned mask) 5047ec681f3Smrg{ 5057ec681f3Smrg uint64_t new_mask = 0; 5067ec681f3Smrg for (unsigned i = 0; i < task->scene->fb_max_samples; i++) 5077ec681f3Smrg new_mask |= ((uint64_t)mask) << (16 * i); 5087ec681f3Smrg lp_rast_shade_quads_mask_sample(task, inputs, x, y, new_mask); 5097ec681f3Smrg} 5107ec681f3Smrg 5117ec681f3Smrg/** 5127ec681f3Smrg * Directly copy pixels from a texture to the destination color buffer. 5137ec681f3Smrg * This is a bin command called during bin processing. 5147ec681f3Smrg */ 5157ec681f3Smrgstatic void 5167ec681f3Smrglp_rast_blit_tile_to_dest(struct lp_rasterizer_task *task, 5177ec681f3Smrg const union lp_rast_cmd_arg arg) 5187ec681f3Smrg{ 5197ec681f3Smrg const struct lp_scene *scene = task->scene; 5207ec681f3Smrg const struct lp_rast_shader_inputs *inputs = arg.shade_tile; 5217ec681f3Smrg const struct lp_rast_state *state = task->state; 5227ec681f3Smrg struct lp_fragment_shader_variant *variant = state->variant; 5237ec681f3Smrg const struct lp_jit_texture *texture = &state->jit_context.textures[0]; 5247ec681f3Smrg const uint8_t *src; 5257ec681f3Smrg uint8_t *dst; 5267ec681f3Smrg unsigned src_stride; 5277ec681f3Smrg unsigned dst_stride; 5287ec681f3Smrg struct pipe_surface *cbuf = scene->fb.cbufs[0]; 5297ec681f3Smrg const unsigned face_slice = cbuf->u.tex.first_layer; 5307ec681f3Smrg const unsigned level = cbuf->u.tex.level; 5317ec681f3Smrg struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture); 5327ec681f3Smrg int src_x, src_y; 5337ec681f3Smrg 5347ec681f3Smrg LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 5357ec681f3Smrg 5367ec681f3Smrg if (inputs->disable) { 5377ec681f3Smrg /* This command was partially binned and has been disabled */ 5387ec681f3Smrg return; 5397ec681f3Smrg } 5407ec681f3Smrg 5417ec681f3Smrg dst = llvmpipe_get_texture_image_address(lpt, face_slice, level); 5427ec681f3Smrg 5437ec681f3Smrg if (!dst) 5447ec681f3Smrg return; 5457ec681f3Smrg 5467ec681f3Smrg dst_stride = lpt->row_stride[level]; 5477ec681f3Smrg 5487ec681f3Smrg src = texture->base; 5497ec681f3Smrg src_stride = texture->row_stride[0]; 5507ec681f3Smrg 5517ec681f3Smrg src_x = util_iround(GET_A0(inputs)[1][0]*texture->width - 0.5f); 5527ec681f3Smrg src_y = util_iround(GET_A0(inputs)[1][1]*texture->height - 0.5f); 5537ec681f3Smrg 5547ec681f3Smrg src_x = src_x + task->x; 5557ec681f3Smrg src_y = src_y + task->y; 5567ec681f3Smrg 5577ec681f3Smrg if (0) { 5587ec681f3Smrg union util_color uc; 5597ec681f3Smrg uc.ui[0] = 0xff0000ff; 5607ec681f3Smrg util_fill_rect(dst, 5617ec681f3Smrg cbuf->format, 5627ec681f3Smrg dst_stride, 5637ec681f3Smrg task->x, 5647ec681f3Smrg task->y, 5657ec681f3Smrg task->width, 5667ec681f3Smrg task->height, 5677ec681f3Smrg &uc); 5687ec681f3Smrg return; 5697ec681f3Smrg } 5707ec681f3Smrg 5717ec681f3Smrg if (src_x >= 0 && 5727ec681f3Smrg src_y >= 0 && 5737ec681f3Smrg src_x + task->width <= texture->width && 5747ec681f3Smrg src_y + task->height <= texture->height) { 5757ec681f3Smrg 5767ec681f3Smrg if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA || 5777ec681f3Smrg (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 && 5787ec681f3Smrg cbuf->format == PIPE_FORMAT_B8G8R8X8_UNORM)) { 5797ec681f3Smrg util_copy_rect(dst, 5807ec681f3Smrg cbuf->format, 5817ec681f3Smrg dst_stride, 5827ec681f3Smrg task->x, task->y, 5837ec681f3Smrg task->width, task->height, 5847ec681f3Smrg src, src_stride, 5857ec681f3Smrg src_x, src_y); 5867ec681f3Smrg return; 5877ec681f3Smrg } 5887ec681f3Smrg 5897ec681f3Smrg if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1) { 5907ec681f3Smrg if (cbuf->format == PIPE_FORMAT_B8G8R8A8_UNORM) { 5917ec681f3Smrg int x, y; 5927ec681f3Smrg 5937ec681f3Smrg dst += task->x * 4; 5947ec681f3Smrg src += src_x * 4; 5957ec681f3Smrg dst += task->y * dst_stride; 5967ec681f3Smrg src += src_y * src_stride; 5977ec681f3Smrg 5987ec681f3Smrg for (y = 0; y < task->height; ++y) { 5997ec681f3Smrg const uint32_t *src_row = (const uint32_t *)src; 6007ec681f3Smrg uint32_t *dst_row = (uint32_t *)dst; 601cdc920a0Smrg 6027ec681f3Smrg for (x = 0; x < task->width; ++x) { 6037ec681f3Smrg *dst_row++ = *src_row++ | 0xff000000; 6047ec681f3Smrg } 6057ec681f3Smrg dst += dst_stride; 6067ec681f3Smrg src += src_stride; 6077ec681f3Smrg } 6087ec681f3Smrg 6097ec681f3Smrg return; 6107ec681f3Smrg } 6117ec681f3Smrg } 6127ec681f3Smrg 6137ec681f3Smrg } 6147ec681f3Smrg 6157ec681f3Smrg /* 6167ec681f3Smrg * Fall back to the jit shaders. 6177ec681f3Smrg */ 6187ec681f3Smrg 6197ec681f3Smrg lp_rast_shade_tile_opaque(task, arg); 6207ec681f3Smrg} 6217ec681f3Smrg 6227ec681f3Smrgstatic void 6237ec681f3Smrglp_rast_blit_tile(struct lp_rasterizer_task *task, 6247ec681f3Smrg const union lp_rast_cmd_arg arg) 6257ec681f3Smrg{ 6267ec681f3Smrg /* This kindof just works, but isn't efficient: 6277ec681f3Smrg */ 6287ec681f3Smrg lp_rast_blit_tile_to_dest(task, arg); 6297ec681f3Smrg} 6303464ebd5Sriastradh 6313464ebd5Sriastradh/** 6323464ebd5Sriastradh * Begin a new occlusion query. 6333464ebd5Sriastradh * This is a bin command put in all bins. 6343464ebd5Sriastradh * Called per thread. 6353464ebd5Sriastradh */ 6363464ebd5Sriastradhstatic void 6373464ebd5Sriastradhlp_rast_begin_query(struct lp_rasterizer_task *task, 6383464ebd5Sriastradh const union lp_rast_cmd_arg arg) 6393464ebd5Sriastradh{ 6403464ebd5Sriastradh struct llvmpipe_query *pq = arg.query_obj; 6413464ebd5Sriastradh 642af69d88dSmrg switch (pq->type) { 643af69d88dSmrg case PIPE_QUERY_OCCLUSION_COUNTER: 644af69d88dSmrg case PIPE_QUERY_OCCLUSION_PREDICATE: 64501e04c3fSmrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 646af69d88dSmrg pq->start[task->thread_index] = task->thread_data.vis_counter; 647af69d88dSmrg break; 648af69d88dSmrg case PIPE_QUERY_PIPELINE_STATISTICS: 64901e04c3fSmrg pq->start[task->thread_index] = task->thread_data.ps_invocations; 650af69d88dSmrg break; 6517ec681f3Smrg case PIPE_QUERY_TIME_ELAPSED: 6527ec681f3Smrg pq->start[task->thread_index] = os_time_get_nano(); 6537ec681f3Smrg break; 654af69d88dSmrg default: 655af69d88dSmrg assert(0); 656af69d88dSmrg break; 657af69d88dSmrg } 6583464ebd5Sriastradh} 6593464ebd5Sriastradh 6603464ebd5Sriastradh 6613464ebd5Sriastradh/** 6623464ebd5Sriastradh * End the current occlusion query. 6633464ebd5Sriastradh * This is a bin command put in all bins. 6643464ebd5Sriastradh * Called per thread. 6653464ebd5Sriastradh */ 6663464ebd5Sriastradhstatic void 6673464ebd5Sriastradhlp_rast_end_query(struct lp_rasterizer_task *task, 6683464ebd5Sriastradh const union lp_rast_cmd_arg arg) 6693464ebd5Sriastradh{ 670af69d88dSmrg struct llvmpipe_query *pq = arg.query_obj; 671af69d88dSmrg 672af69d88dSmrg switch (pq->type) { 673af69d88dSmrg case PIPE_QUERY_OCCLUSION_COUNTER: 674af69d88dSmrg case PIPE_QUERY_OCCLUSION_PREDICATE: 67501e04c3fSmrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 676af69d88dSmrg pq->end[task->thread_index] += 677af69d88dSmrg task->thread_data.vis_counter - pq->start[task->thread_index]; 678af69d88dSmrg pq->start[task->thread_index] = 0; 679af69d88dSmrg break; 680af69d88dSmrg case PIPE_QUERY_TIMESTAMP: 6817ec681f3Smrg case PIPE_QUERY_TIME_ELAPSED: 682af69d88dSmrg pq->end[task->thread_index] = os_time_get_nano(); 683af69d88dSmrg break; 684af69d88dSmrg case PIPE_QUERY_PIPELINE_STATISTICS: 685af69d88dSmrg pq->end[task->thread_index] += 68601e04c3fSmrg task->thread_data.ps_invocations - pq->start[task->thread_index]; 687af69d88dSmrg pq->start[task->thread_index] = 0; 688af69d88dSmrg break; 689af69d88dSmrg default: 690af69d88dSmrg assert(0); 691af69d88dSmrg break; 6923464ebd5Sriastradh } 6933464ebd5Sriastradh} 6943464ebd5Sriastradh 6953464ebd5Sriastradh 6963464ebd5Sriastradhvoid 6973464ebd5Sriastradhlp_rast_set_state(struct lp_rasterizer_task *task, 6983464ebd5Sriastradh const union lp_rast_cmd_arg arg) 6993464ebd5Sriastradh{ 7003464ebd5Sriastradh task->state = arg.state; 7013464ebd5Sriastradh} 7023464ebd5Sriastradh 7033464ebd5Sriastradh 7043464ebd5Sriastradh 705cdc920a0Smrg/** 7063464ebd5Sriastradh * Called when we're done writing to a color tile. 707cdc920a0Smrg */ 708cdc920a0Smrgstatic void 7093464ebd5Sriastradhlp_rast_tile_end(struct lp_rasterizer_task *task) 710cdc920a0Smrg{ 711af69d88dSmrg unsigned i; 712cdc920a0Smrg 713af69d88dSmrg for (i = 0; i < task->scene->num_active_queries; ++i) { 714af69d88dSmrg lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i])); 715cdc920a0Smrg } 716cdc920a0Smrg 7173464ebd5Sriastradh /* debug */ 7183464ebd5Sriastradh memset(task->color_tiles, 0, sizeof(task->color_tiles)); 7193464ebd5Sriastradh task->depth_tile = NULL; 720cdc920a0Smrg 7213464ebd5Sriastradh task->bin = NULL; 7223464ebd5Sriastradh} 723cdc920a0Smrg 7247ec681f3Smrg 7257ec681f3Smrg 7267ec681f3Smrg 7277ec681f3Smrg 7287ec681f3Smrg 7297ec681f3Smrg/* Currently have two rendering paths only - the general case triangle 7307ec681f3Smrg * path and the super-specialized blit/clear path. 7317ec681f3Smrg */ 7327ec681f3Smrg#define TRI ((LP_RAST_FLAGS_TRI <<1)-1) /* general case */ 7337ec681f3Smrg#define RECT ((LP_RAST_FLAGS_RECT<<1)-1) /* direct rectangle rasterizer */ 7347ec681f3Smrg#define BLIT ((LP_RAST_FLAGS_BLIT<<1)-1) /* write direct-to-dest */ 7357ec681f3Smrg 7367ec681f3Smrgstatic const unsigned 7377ec681f3Smrgrast_flags[] = { 7387ec681f3Smrg BLIT, /* clear color */ 7397ec681f3Smrg TRI, /* clear zstencil */ 7407ec681f3Smrg TRI, /* triangle_1 */ 7417ec681f3Smrg TRI, /* triangle_2 */ 7427ec681f3Smrg TRI, /* triangle_3 */ 7437ec681f3Smrg TRI, /* triangle_4 */ 7447ec681f3Smrg TRI, /* triangle_5 */ 7457ec681f3Smrg TRI, /* triangle_6 */ 7467ec681f3Smrg TRI, /* triangle_7 */ 7477ec681f3Smrg TRI, /* triangle_8 */ 7487ec681f3Smrg TRI, /* triangle_3_4 */ 7497ec681f3Smrg TRI, /* triangle_3_16 */ 7507ec681f3Smrg TRI, /* triangle_4_16 */ 7517ec681f3Smrg RECT, /* shade_tile */ 7527ec681f3Smrg RECT, /* shade_tile_opaque */ 7537ec681f3Smrg TRI, /* begin_query */ 7547ec681f3Smrg TRI, /* end_query */ 7557ec681f3Smrg BLIT, /* set_state, */ 7567ec681f3Smrg TRI, /* lp_rast_triangle_32_1 */ 7577ec681f3Smrg TRI, /* lp_rast_triangle_32_2 */ 7587ec681f3Smrg TRI, /* lp_rast_triangle_32_3 */ 7597ec681f3Smrg TRI, /* lp_rast_triangle_32_4 */ 7607ec681f3Smrg TRI, /* lp_rast_triangle_32_5 */ 7617ec681f3Smrg TRI, /* lp_rast_triangle_32_6 */ 7627ec681f3Smrg TRI, /* lp_rast_triangle_32_7 */ 7637ec681f3Smrg TRI, /* lp_rast_triangle_32_8 */ 7647ec681f3Smrg TRI, /* lp_rast_triangle_32_3_4 */ 7657ec681f3Smrg TRI, /* lp_rast_triangle_32_3_16 */ 7667ec681f3Smrg TRI, /* lp_rast_triangle_32_4_16 */ 7677ec681f3Smrg TRI, /* lp_rast_triangle_ms_1 */ 7687ec681f3Smrg TRI, /* lp_rast_triangle_ms_2 */ 7697ec681f3Smrg TRI, /* lp_rast_triangle_ms_3 */ 7707ec681f3Smrg TRI, /* lp_rast_triangle_ms_4 */ 7717ec681f3Smrg TRI, /* lp_rast_triangle_ms_5 */ 7727ec681f3Smrg TRI, /* lp_rast_triangle_ms_6 */ 7737ec681f3Smrg TRI, /* lp_rast_triangle_ms_7 */ 7747ec681f3Smrg TRI, /* lp_rast_triangle_ms_8 */ 7757ec681f3Smrg TRI, /* lp_rast_triangle_ms_3_4 */ 7767ec681f3Smrg TRI, /* lp_rast_triangle_ms_3_16 */ 7777ec681f3Smrg TRI, /* lp_rast_triangle_ms_4_16 */ 7787ec681f3Smrg 7797ec681f3Smrg RECT, /* rectangle */ 7807ec681f3Smrg BLIT, /* blit */ 7817ec681f3Smrg}; 7827ec681f3Smrg 7837ec681f3Smrg/* 7847ec681f3Smrg */ 7857ec681f3Smrgstatic const lp_rast_cmd_func 7867ec681f3Smrgdispatch_blit[] = { 7877ec681f3Smrg lp_rast_clear_color, 7887ec681f3Smrg NULL, /* clear_zstencil */ 7897ec681f3Smrg NULL, /* triangle_1 */ 7907ec681f3Smrg NULL, /* triangle_2 */ 7917ec681f3Smrg NULL, /* triangle_3 */ 7927ec681f3Smrg NULL, /* triangle_4 */ 7937ec681f3Smrg NULL, /* triangle_5 */ 7947ec681f3Smrg NULL, /* triangle_6 */ 7957ec681f3Smrg NULL, /* triangle_7 */ 7967ec681f3Smrg NULL, /* triangle_8 */ 7977ec681f3Smrg NULL, /* triangle_3_4 */ 7987ec681f3Smrg NULL, /* triangle_3_16 */ 7997ec681f3Smrg NULL, /* triangle_4_16 */ 8007ec681f3Smrg NULL, /* shade_tile */ 8017ec681f3Smrg NULL, /* shade_tile_opaque */ 8027ec681f3Smrg NULL, /* begin_query */ 8037ec681f3Smrg NULL, /* end_query */ 8047ec681f3Smrg lp_rast_set_state, /* set_state */ 8057ec681f3Smrg NULL, /* lp_rast_triangle_32_1 */ 8067ec681f3Smrg NULL, /* lp_rast_triangle_32_2 */ 8077ec681f3Smrg NULL, /* lp_rast_triangle_32_3 */ 8087ec681f3Smrg NULL, /* lp_rast_triangle_32_4 */ 8097ec681f3Smrg NULL, /* lp_rast_triangle_32_5 */ 8107ec681f3Smrg NULL, /* lp_rast_triangle_32_6 */ 8117ec681f3Smrg NULL, /* lp_rast_triangle_32_7 */ 8127ec681f3Smrg NULL, /* lp_rast_triangle_32_8 */ 8137ec681f3Smrg NULL, /* lp_rast_triangle_32_3_4 */ 8147ec681f3Smrg NULL, /* lp_rast_triangle_32_3_16 */ 8157ec681f3Smrg NULL, /* lp_rast_triangle_32_4_16 */ 8167ec681f3Smrg NULL, /* lp_rast_triangle_ms_1 */ 8177ec681f3Smrg NULL, /* lp_rast_triangle_ms_2 */ 8187ec681f3Smrg NULL, /* lp_rast_triangle_ms_3 */ 8197ec681f3Smrg NULL, /* lp_rast_triangle_ms_4 */ 8207ec681f3Smrg NULL, /* lp_rast_triangle_ms_5 */ 8217ec681f3Smrg NULL, /* lp_rast_triangle_ms_6 */ 8227ec681f3Smrg NULL, /* lp_rast_triangle_ms_7 */ 8237ec681f3Smrg NULL, /* lp_rast_triangle_ms_8 */ 8247ec681f3Smrg NULL, /* lp_rast_triangle_ms_3_4 */ 8257ec681f3Smrg NULL, /* lp_rast_triangle_ms_3_16 */ 8267ec681f3Smrg NULL, /* lp_rast_triangle_ms_4_16 */ 8277ec681f3Smrg 8287ec681f3Smrg NULL, /* rectangle */ 8297ec681f3Smrg lp_rast_blit_tile_to_dest, 8307ec681f3Smrg}; 8317ec681f3Smrg 8327ec681f3Smrg 8337ec681f3Smrg 8347ec681f3Smrg/* Triangle and general case rasterization: Use the SOA llvm shdaers, 8357ec681f3Smrg * an active swizzled tile for each color buf, etc. Don't blit/clear 8367ec681f3Smrg * directly to destination surface as we know there are swizzled 8377ec681f3Smrg * operations coming. 8387ec681f3Smrg */ 8397ec681f3Smrgstatic const lp_rast_cmd_func 8407ec681f3Smrgdispatch_tri[] = { 8413464ebd5Sriastradh lp_rast_clear_color, 8423464ebd5Sriastradh lp_rast_clear_zstencil, 8433464ebd5Sriastradh lp_rast_triangle_1, 8443464ebd5Sriastradh lp_rast_triangle_2, 8453464ebd5Sriastradh lp_rast_triangle_3, 8463464ebd5Sriastradh lp_rast_triangle_4, 8473464ebd5Sriastradh lp_rast_triangle_5, 8483464ebd5Sriastradh lp_rast_triangle_6, 8493464ebd5Sriastradh lp_rast_triangle_7, 8503464ebd5Sriastradh lp_rast_triangle_8, 8513464ebd5Sriastradh lp_rast_triangle_3_4, 8523464ebd5Sriastradh lp_rast_triangle_3_16, 8533464ebd5Sriastradh lp_rast_triangle_4_16, 8543464ebd5Sriastradh lp_rast_shade_tile, 8553464ebd5Sriastradh lp_rast_shade_tile_opaque, 8563464ebd5Sriastradh lp_rast_begin_query, 8573464ebd5Sriastradh lp_rast_end_query, 8583464ebd5Sriastradh lp_rast_set_state, 859af69d88dSmrg lp_rast_triangle_32_1, 860af69d88dSmrg lp_rast_triangle_32_2, 861af69d88dSmrg lp_rast_triangle_32_3, 862af69d88dSmrg lp_rast_triangle_32_4, 863af69d88dSmrg lp_rast_triangle_32_5, 864af69d88dSmrg lp_rast_triangle_32_6, 865af69d88dSmrg lp_rast_triangle_32_7, 866af69d88dSmrg lp_rast_triangle_32_8, 867af69d88dSmrg lp_rast_triangle_32_3_4, 868af69d88dSmrg lp_rast_triangle_32_3_16, 8697ec681f3Smrg lp_rast_triangle_32_4_16, 8707ec681f3Smrg lp_rast_triangle_ms_1, 8717ec681f3Smrg lp_rast_triangle_ms_2, 8727ec681f3Smrg lp_rast_triangle_ms_3, 8737ec681f3Smrg lp_rast_triangle_ms_4, 8747ec681f3Smrg lp_rast_triangle_ms_5, 8757ec681f3Smrg lp_rast_triangle_ms_6, 8767ec681f3Smrg lp_rast_triangle_ms_7, 8777ec681f3Smrg lp_rast_triangle_ms_8, 8787ec681f3Smrg lp_rast_triangle_ms_3_4, 8797ec681f3Smrg lp_rast_triangle_ms_3_16, 8807ec681f3Smrg lp_rast_triangle_ms_4_16, 8817ec681f3Smrg lp_rast_rectangle, 8827ec681f3Smrg lp_rast_blit_tile, 8833464ebd5Sriastradh}; 884cdc920a0Smrg 885cdc920a0Smrg 8867ec681f3Smrg/* Debug rasterization with most fastpaths disabled. 8877ec681f3Smrg */ 8887ec681f3Smrgstatic const lp_rast_cmd_func 8897ec681f3Smrgdispatch_tri_debug[] = 8907ec681f3Smrg{ 8917ec681f3Smrg lp_rast_clear_color, 8927ec681f3Smrg lp_rast_clear_zstencil, 8937ec681f3Smrg lp_rast_triangle_1, 8947ec681f3Smrg lp_rast_triangle_2, 8957ec681f3Smrg lp_rast_triangle_3, 8967ec681f3Smrg lp_rast_triangle_4, 8977ec681f3Smrg lp_rast_triangle_5, 8987ec681f3Smrg lp_rast_triangle_6, 8997ec681f3Smrg lp_rast_triangle_7, 9007ec681f3Smrg lp_rast_triangle_8, 9017ec681f3Smrg lp_rast_triangle_3_4, 9027ec681f3Smrg lp_rast_triangle_3_16, 9037ec681f3Smrg lp_rast_triangle_4_16, 9047ec681f3Smrg lp_rast_shade_tile, 9057ec681f3Smrg lp_rast_shade_tile, 9067ec681f3Smrg lp_rast_begin_query, 9077ec681f3Smrg lp_rast_end_query, 9087ec681f3Smrg lp_rast_set_state, 9097ec681f3Smrg lp_rast_triangle_32_1, 9107ec681f3Smrg lp_rast_triangle_32_2, 9117ec681f3Smrg lp_rast_triangle_32_3, 9127ec681f3Smrg lp_rast_triangle_32_4, 9137ec681f3Smrg lp_rast_triangle_32_5, 9147ec681f3Smrg lp_rast_triangle_32_6, 9157ec681f3Smrg lp_rast_triangle_32_7, 9167ec681f3Smrg lp_rast_triangle_32_8, 9177ec681f3Smrg lp_rast_triangle_32_3_4, 9187ec681f3Smrg lp_rast_triangle_32_3_16, 9197ec681f3Smrg lp_rast_triangle_32_4_16, 9207ec681f3Smrg lp_rast_triangle_ms_1, 9217ec681f3Smrg lp_rast_triangle_ms_2, 9227ec681f3Smrg lp_rast_triangle_ms_3, 9237ec681f3Smrg lp_rast_triangle_ms_4, 9247ec681f3Smrg lp_rast_triangle_ms_5, 9257ec681f3Smrg lp_rast_triangle_ms_6, 9267ec681f3Smrg lp_rast_triangle_ms_7, 9277ec681f3Smrg lp_rast_triangle_ms_8, 9287ec681f3Smrg lp_rast_triangle_ms_3_4, 9297ec681f3Smrg lp_rast_triangle_ms_3_16, 9307ec681f3Smrg lp_rast_triangle_ms_4_16, 9317ec681f3Smrg 9327ec681f3Smrg lp_rast_rectangle, 9337ec681f3Smrg lp_rast_shade_tile, 9347ec681f3Smrg}; 9357ec681f3Smrg 9367ec681f3Smrgstruct lp_bin_info 9377ec681f3Smrglp_characterize_bin(const struct cmd_bin *bin) 9387ec681f3Smrg{ 9397ec681f3Smrg struct cmd_block *block; 9407ec681f3Smrg struct lp_bin_info info; 9417ec681f3Smrg unsigned andflags = ~0; 9427ec681f3Smrg unsigned k, j = 0; 9437ec681f3Smrg 9447ec681f3Smrg STATIC_ASSERT(ARRAY_SIZE(rast_flags) == LP_RAST_OP_MAX); 9457ec681f3Smrg 9467ec681f3Smrg for (block = bin->head; block; block = block->next) { 9477ec681f3Smrg for (k = 0; k < block->count; k++, j++) { 9487ec681f3Smrg andflags &= rast_flags[block->cmd[k]]; 9497ec681f3Smrg } 9507ec681f3Smrg } 9517ec681f3Smrg 9527ec681f3Smrg info.type = andflags; 9537ec681f3Smrg info.count = j; 9547ec681f3Smrg 9557ec681f3Smrg return info; 9567ec681f3Smrg} 9577ec681f3Smrg 9587ec681f3Smrg 9593464ebd5Sriastradhstatic void 9607ec681f3Smrgblit_rasterize_bin(struct lp_rasterizer_task *task, 9617ec681f3Smrg const struct cmd_bin *bin) 9623464ebd5Sriastradh{ 9633464ebd5Sriastradh const struct cmd_block *block; 9643464ebd5Sriastradh unsigned k; 965cdc920a0Smrg 9667ec681f3Smrg STATIC_ASSERT(ARRAY_SIZE(dispatch_blit) == LP_RAST_OP_MAX); 967cdc920a0Smrg 9687ec681f3Smrg if (0) debug_printf("%s\n", __FUNCTION__); 9693464ebd5Sriastradh for (block = bin->head; block; block = block->next) { 9703464ebd5Sriastradh for (k = 0; k < block->count; k++) { 9717ec681f3Smrg dispatch_blit[block->cmd[k]]( task, block->arg[k] ); 9723464ebd5Sriastradh } 9733464ebd5Sriastradh } 974cdc920a0Smrg} 975cdc920a0Smrg 9767ec681f3Smrgstatic void 9777ec681f3Smrgtri_rasterize_bin(struct lp_rasterizer_task *task, 9787ec681f3Smrg const struct cmd_bin *bin, 9797ec681f3Smrg int x, int y) 9807ec681f3Smrg{ 9817ec681f3Smrg const struct cmd_block *block; 9827ec681f3Smrg unsigned k; 9837ec681f3Smrg 9847ec681f3Smrg STATIC_ASSERT(ARRAY_SIZE(dispatch_tri) == LP_RAST_OP_MAX); 9857ec681f3Smrg 9867ec681f3Smrg for (block = bin->head; block; block = block->next) { 9877ec681f3Smrg for (k = 0; k < block->count; k++) { 9887ec681f3Smrg dispatch_tri[block->cmd[k]]( task, block->arg[k] ); 9897ec681f3Smrg } 9907ec681f3Smrg } 9917ec681f3Smrg} 9927ec681f3Smrg 9937ec681f3Smrgstatic void 9947ec681f3Smrgdebug_rasterize_bin(struct lp_rasterizer_task *task, 9957ec681f3Smrg const struct cmd_bin *bin) 9967ec681f3Smrg{ 9977ec681f3Smrg const struct cmd_block *block; 9987ec681f3Smrg unsigned k; 9997ec681f3Smrg 10007ec681f3Smrg STATIC_ASSERT(ARRAY_SIZE(dispatch_tri_debug) == LP_RAST_OP_MAX); 10017ec681f3Smrg 10027ec681f3Smrg for (block = bin->head; block; block = block->next) { 10037ec681f3Smrg for (k = 0; k < block->count; k++) { 10047ec681f3Smrg dispatch_tri_debug[block->cmd[k]]( task, block->arg[k] ); 10057ec681f3Smrg } 10067ec681f3Smrg } 10077ec681f3Smrg} 1008cdc920a0Smrg 1009cdc920a0Smrg 1010cdc920a0Smrg/** 1011cdc920a0Smrg * Rasterize commands for a single bin. 1012cdc920a0Smrg * \param x, y position of the bin's tile in the framebuffer 1013cdc920a0Smrg * Must be called between lp_rast_begin() and lp_rast_end(). 1014cdc920a0Smrg * Called per thread. 1015cdc920a0Smrg */ 1016cdc920a0Smrgstatic void 1017cdc920a0Smrgrasterize_bin(struct lp_rasterizer_task *task, 1018af69d88dSmrg const struct cmd_bin *bin, int x, int y ) 1019cdc920a0Smrg{ 10207ec681f3Smrg struct lp_bin_info info = lp_characterize_bin(bin); 10217ec681f3Smrg 1022af69d88dSmrg lp_rast_tile_begin( task, bin, x, y ); 1023cdc920a0Smrg 10247ec681f3Smrg if (LP_DEBUG & DEBUG_NO_FASTPATH) 10257ec681f3Smrg debug_rasterize_bin(task, bin); 10267ec681f3Smrg else if (info.type & LP_RAST_FLAGS_BLIT) 10277ec681f3Smrg blit_rasterize_bin(task, bin); 10287ec681f3Smrg else if (task->scene->permit_linear_rasterizer && 10297ec681f3Smrg !(LP_PERF & PERF_NO_RAST_LINEAR) && 10307ec681f3Smrg (info.type & LP_RAST_FLAGS_RECT)) 10317ec681f3Smrg lp_linear_rasterize_bin(task, bin); 10327ec681f3Smrg else 10337ec681f3Smrg tri_rasterize_bin(task, bin, x, y); 1034cdc920a0Smrg 10353464ebd5Sriastradh lp_rast_tile_end(task); 1036cdc920a0Smrg 10377ec681f3Smrg#ifdef DEBUG 10383464ebd5Sriastradh /* Debug/Perf flags: 1039cdc920a0Smrg */ 10403464ebd5Sriastradh if (bin->head->count == 1) { 10417ec681f3Smrg if (bin->head->cmd[0] == LP_RAST_OP_BLIT) 10427ec681f3Smrg LP_COUNT(nr_pure_blit_64); 10437ec681f3Smrg else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) 10443464ebd5Sriastradh LP_COUNT(nr_pure_shade_opaque_64); 10453464ebd5Sriastradh else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE) 10463464ebd5Sriastradh LP_COUNT(nr_pure_shade_64); 1047cdc920a0Smrg } 10487ec681f3Smrg#endif 1049cdc920a0Smrg} 1050cdc920a0Smrg 10513464ebd5Sriastradh 1052cdc920a0Smrg/* An empty bin is one that just loads the contents of the tile and 1053cdc920a0Smrg * stores them again unchanged. This typically happens when bins have 1054cdc920a0Smrg * been flushed for some reason in the middle of a frame, or when 1055cdc920a0Smrg * incremental updates are being made to a render target. 1056cdc920a0Smrg * 1057cdc920a0Smrg * Try to avoid doing pointless work in this case. 1058cdc920a0Smrg */ 1059cdc920a0Smrgstatic boolean 1060cdc920a0Smrgis_empty_bin( const struct cmd_bin *bin ) 1061cdc920a0Smrg{ 10623464ebd5Sriastradh return bin->head == NULL; 1063cdc920a0Smrg} 1064cdc920a0Smrg 1065cdc920a0Smrg 1066cdc920a0Smrg/** 1067cdc920a0Smrg * Rasterize/execute all bins within a scene. 1068cdc920a0Smrg * Called per thread. 1069cdc920a0Smrg */ 1070cdc920a0Smrgstatic void 1071cdc920a0Smrgrasterize_scene(struct lp_rasterizer_task *task, 1072cdc920a0Smrg struct lp_scene *scene) 1073cdc920a0Smrg{ 10743464ebd5Sriastradh task->scene = scene; 1075af69d88dSmrg 107601e04c3fSmrg /* Clear the cache tags. This should not always be necessary but 107701e04c3fSmrg simpler for now. */ 107801e04c3fSmrg#if LP_USE_TEXTURE_CACHE 107901e04c3fSmrg memset(task->thread_data.cache->cache_tags, 0, 108001e04c3fSmrg sizeof(task->thread_data.cache->cache_tags)); 108101e04c3fSmrg#if LP_BUILD_FORMAT_CACHE_DEBUG 108201e04c3fSmrg task->thread_data.cache->cache_access_total = 0; 108301e04c3fSmrg task->thread_data.cache->cache_access_miss = 0; 108401e04c3fSmrg#endif 108501e04c3fSmrg#endif 108601e04c3fSmrg 108701e04c3fSmrg if (!task->rast->no_rast) { 1088af69d88dSmrg /* loop over scene bins, rasterize each */ 1089af69d88dSmrg { 1090af69d88dSmrg struct cmd_bin *bin; 1091af69d88dSmrg int i, j; 1092af69d88dSmrg 1093af69d88dSmrg assert(scene); 1094af69d88dSmrg while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { 1095af69d88dSmrg if (!is_empty_bin( bin )) 1096af69d88dSmrg rasterize_bin(task, bin, i, j); 1097cdc920a0Smrg } 1098cdc920a0Smrg } 1099cdc920a0Smrg } 1100cdc920a0Smrg 11013464ebd5Sriastradh 110201e04c3fSmrg#if LP_BUILD_FORMAT_CACHE_DEBUG 110301e04c3fSmrg { 110401e04c3fSmrg uint64_t total, miss; 110501e04c3fSmrg total = task->thread_data.cache->cache_access_total; 110601e04c3fSmrg miss = task->thread_data.cache->cache_access_miss; 110701e04c3fSmrg if (total) { 110801e04c3fSmrg debug_printf("thread %d cache access %llu miss %llu hit rate %f\n", 110901e04c3fSmrg task->thread_index, (long long unsigned)total, 111001e04c3fSmrg (long long unsigned)miss, 111101e04c3fSmrg (float)(total - miss)/(float)total); 111201e04c3fSmrg } 111301e04c3fSmrg } 111401e04c3fSmrg#endif 111501e04c3fSmrg 11163464ebd5Sriastradh if (scene->fence) { 11173464ebd5Sriastradh lp_fence_signal(scene->fence); 11183464ebd5Sriastradh } 11193464ebd5Sriastradh 11203464ebd5Sriastradh task->scene = NULL; 1121cdc920a0Smrg} 1122cdc920a0Smrg 1123cdc920a0Smrg 1124cdc920a0Smrg/** 1125cdc920a0Smrg * Called by setup module when it has something for us to render. 1126cdc920a0Smrg */ 1127cdc920a0Smrgvoid 1128cdc920a0Smrglp_rast_queue_scene( struct lp_rasterizer *rast, 1129cdc920a0Smrg struct lp_scene *scene) 1130cdc920a0Smrg{ 1131cdc920a0Smrg LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); 1132cdc920a0Smrg 1133cdc920a0Smrg if (rast->num_threads == 0) { 1134cdc920a0Smrg /* no threading */ 1135af69d88dSmrg unsigned fpstate = util_fpstate_get(); 1136af69d88dSmrg 1137af69d88dSmrg /* Make sure that denorms are treated like zeros. This is 1138af69d88dSmrg * the behavior required by D3D10. OpenGL doesn't care. 1139af69d88dSmrg */ 1140af69d88dSmrg util_fpstate_set_denorms_to_zero(fpstate); 1141cdc920a0Smrg 1142cdc920a0Smrg lp_rast_begin( rast, scene ); 1143cdc920a0Smrg 1144cdc920a0Smrg rasterize_scene( &rast->tasks[0], scene ); 1145cdc920a0Smrg 11463464ebd5Sriastradh lp_rast_end( rast ); 11473464ebd5Sriastradh 1148af69d88dSmrg util_fpstate_set(fpstate); 1149af69d88dSmrg 1150cdc920a0Smrg rast->curr_scene = NULL; 1151cdc920a0Smrg } 1152cdc920a0Smrg else { 1153cdc920a0Smrg /* threaded rendering! */ 1154cdc920a0Smrg unsigned i; 1155cdc920a0Smrg 1156cdc920a0Smrg lp_scene_enqueue( rast->full_scenes, scene ); 1157cdc920a0Smrg 1158cdc920a0Smrg /* signal the threads that there's work to do */ 1159cdc920a0Smrg for (i = 0; i < rast->num_threads; i++) { 1160cdc920a0Smrg pipe_semaphore_signal(&rast->tasks[i].work_ready); 1161cdc920a0Smrg } 1162cdc920a0Smrg } 1163cdc920a0Smrg 1164cdc920a0Smrg LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); 1165cdc920a0Smrg} 1166cdc920a0Smrg 1167cdc920a0Smrg 1168cdc920a0Smrgvoid 1169cdc920a0Smrglp_rast_finish( struct lp_rasterizer *rast ) 1170cdc920a0Smrg{ 1171cdc920a0Smrg if (rast->num_threads == 0) { 1172cdc920a0Smrg /* nothing to do */ 1173cdc920a0Smrg } 1174cdc920a0Smrg else { 1175cdc920a0Smrg int i; 1176cdc920a0Smrg 1177cdc920a0Smrg /* wait for work to complete */ 1178cdc920a0Smrg for (i = 0; i < rast->num_threads; i++) { 1179cdc920a0Smrg pipe_semaphore_wait(&rast->tasks[i].work_done); 1180cdc920a0Smrg } 1181cdc920a0Smrg } 1182cdc920a0Smrg} 1183cdc920a0Smrg 1184cdc920a0Smrg 1185cdc920a0Smrg/** 1186cdc920a0Smrg * This is the thread's main entrypoint. 1187cdc920a0Smrg * It's a simple loop: 1188cdc920a0Smrg * 1. wait for work 1189cdc920a0Smrg * 2. do work 1190cdc920a0Smrg * 3. signal that we're done 1191cdc920a0Smrg */ 119201e04c3fSmrgstatic int 119301e04c3fSmrgthread_function(void *init_data) 1194cdc920a0Smrg{ 1195cdc920a0Smrg struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; 1196cdc920a0Smrg struct lp_rasterizer *rast = task->rast; 1197cdc920a0Smrg boolean debug = false; 119801e04c3fSmrg char thread_name[16]; 119901e04c3fSmrg unsigned fpstate; 120001e04c3fSmrg 12017ec681f3Smrg snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index); 120201e04c3fSmrg u_thread_setname(thread_name); 1203af69d88dSmrg 1204af69d88dSmrg /* Make sure that denorms are treated like zeros. This is 1205af69d88dSmrg * the behavior required by D3D10. OpenGL doesn't care. 1206af69d88dSmrg */ 120701e04c3fSmrg fpstate = util_fpstate_get(); 1208af69d88dSmrg util_fpstate_set_denorms_to_zero(fpstate); 1209cdc920a0Smrg 1210cdc920a0Smrg while (1) { 1211cdc920a0Smrg /* wait for work */ 1212cdc920a0Smrg if (debug) 1213cdc920a0Smrg debug_printf("thread %d waiting for work\n", task->thread_index); 1214cdc920a0Smrg pipe_semaphore_wait(&task->work_ready); 1215cdc920a0Smrg 1216cdc920a0Smrg if (rast->exit_flag) 1217cdc920a0Smrg break; 1218cdc920a0Smrg 1219cdc920a0Smrg if (task->thread_index == 0) { 1220cdc920a0Smrg /* thread[0]: 1221cdc920a0Smrg * - get next scene to rasterize 1222cdc920a0Smrg * - map the framebuffer surfaces 1223cdc920a0Smrg */ 1224cdc920a0Smrg lp_rast_begin( rast, 1225cdc920a0Smrg lp_scene_dequeue( rast->full_scenes, TRUE ) ); 1226cdc920a0Smrg } 1227cdc920a0Smrg 1228cdc920a0Smrg /* Wait for all threads to get here so that threads[1+] don't 1229cdc920a0Smrg * get a null rast->curr_scene pointer. 1230cdc920a0Smrg */ 123101e04c3fSmrg util_barrier_wait( &rast->barrier ); 1232cdc920a0Smrg 1233cdc920a0Smrg /* do work */ 1234cdc920a0Smrg if (debug) 1235cdc920a0Smrg debug_printf("thread %d doing work\n", task->thread_index); 1236cdc920a0Smrg 1237cdc920a0Smrg rasterize_scene(task, 1238cdc920a0Smrg rast->curr_scene); 1239cdc920a0Smrg 1240cdc920a0Smrg /* wait for all threads to finish with this scene */ 124101e04c3fSmrg util_barrier_wait( &rast->barrier ); 1242cdc920a0Smrg 1243cdc920a0Smrg /* XXX: shouldn't be necessary: 1244cdc920a0Smrg */ 1245cdc920a0Smrg if (task->thread_index == 0) { 1246cdc920a0Smrg lp_rast_end( rast ); 1247cdc920a0Smrg } 1248cdc920a0Smrg 1249cdc920a0Smrg /* signal done with work */ 1250cdc920a0Smrg if (debug) 1251cdc920a0Smrg debug_printf("thread %d done working\n", task->thread_index); 1252cdc920a0Smrg 1253cdc920a0Smrg pipe_semaphore_signal(&task->work_done); 1254cdc920a0Smrg } 1255cdc920a0Smrg 1256af69d88dSmrg#ifdef _WIN32 1257af69d88dSmrg pipe_semaphore_signal(&task->work_done); 1258af69d88dSmrg#endif 1259af69d88dSmrg 1260af69d88dSmrg return 0; 1261cdc920a0Smrg} 1262cdc920a0Smrg 1263cdc920a0Smrg 1264cdc920a0Smrg/** 1265cdc920a0Smrg * Initialize semaphores and spawn the threads. 1266cdc920a0Smrg */ 1267cdc920a0Smrgstatic void 1268cdc920a0Smrgcreate_rast_threads(struct lp_rasterizer *rast) 1269cdc920a0Smrg{ 1270cdc920a0Smrg unsigned i; 1271cdc920a0Smrg 1272cdc920a0Smrg /* NOTE: if num_threads is zero, we won't use any threads */ 1273cdc920a0Smrg for (i = 0; i < rast->num_threads; i++) { 1274cdc920a0Smrg pipe_semaphore_init(&rast->tasks[i].work_ready, 0); 1275cdc920a0Smrg pipe_semaphore_init(&rast->tasks[i].work_done, 0); 127601e04c3fSmrg rast->threads[i] = u_thread_create(thread_function, 1277cdc920a0Smrg (void *) &rast->tasks[i]); 12787ec681f3Smrg if (!rast->threads[i]) { 12797ec681f3Smrg rast->num_threads = i; /* previous thread is max */ 12807ec681f3Smrg break; 12817ec681f3Smrg } 1282cdc920a0Smrg } 1283cdc920a0Smrg} 1284cdc920a0Smrg 1285cdc920a0Smrg 1286cdc920a0Smrg 1287cdc920a0Smrg/** 12883464ebd5Sriastradh * Create new lp_rasterizer. If num_threads is zero, don't create any 12893464ebd5Sriastradh * new threads, do rendering synchronously. 12903464ebd5Sriastradh * \param num_threads number of rasterizer threads to create 1291cdc920a0Smrg */ 1292cdc920a0Smrgstruct lp_rasterizer * 12933464ebd5Sriastradhlp_rast_create( unsigned num_threads ) 1294cdc920a0Smrg{ 1295cdc920a0Smrg struct lp_rasterizer *rast; 12963464ebd5Sriastradh unsigned i; 1297cdc920a0Smrg 1298cdc920a0Smrg rast = CALLOC_STRUCT(lp_rasterizer); 1299af69d88dSmrg if (!rast) { 1300af69d88dSmrg goto no_rast; 1301af69d88dSmrg } 1302cdc920a0Smrg 1303cdc920a0Smrg rast->full_scenes = lp_scene_queue_create(); 1304af69d88dSmrg if (!rast->full_scenes) { 1305af69d88dSmrg goto no_full_scenes; 1306af69d88dSmrg } 1307cdc920a0Smrg 130801e04c3fSmrg for (i = 0; i < MAX2(1, num_threads); i++) { 1309cdc920a0Smrg struct lp_rasterizer_task *task = &rast->tasks[i]; 1310cdc920a0Smrg task->rast = rast; 1311cdc920a0Smrg task->thread_index = i; 131201e04c3fSmrg task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache), 131301e04c3fSmrg 16); 131401e04c3fSmrg if (!task->thread_data.cache) { 131501e04c3fSmrg goto no_thread_data_cache; 131601e04c3fSmrg } 1317cdc920a0Smrg } 1318cdc920a0Smrg 13193464ebd5Sriastradh rast->num_threads = num_threads; 13203464ebd5Sriastradh 1321af69d88dSmrg rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE); 1322af69d88dSmrg 1323cdc920a0Smrg create_rast_threads(rast); 1324cdc920a0Smrg 1325cdc920a0Smrg /* for synchronizing rasterization threads */ 132601e04c3fSmrg if (rast->num_threads > 0) { 132701e04c3fSmrg util_barrier_init( &rast->barrier, rast->num_threads ); 132801e04c3fSmrg } 1329cdc920a0Smrg 13303464ebd5Sriastradh memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); 13313464ebd5Sriastradh 1332cdc920a0Smrg return rast; 1333af69d88dSmrg 133401e04c3fSmrgno_thread_data_cache: 133501e04c3fSmrg for (i = 0; i < MAX2(1, rast->num_threads); i++) { 133601e04c3fSmrg if (rast->tasks[i].thread_data.cache) { 133701e04c3fSmrg align_free(rast->tasks[i].thread_data.cache); 133801e04c3fSmrg } 133901e04c3fSmrg } 134001e04c3fSmrg 134101e04c3fSmrg lp_scene_queue_destroy(rast->full_scenes); 1342af69d88dSmrgno_full_scenes: 1343af69d88dSmrg FREE(rast); 1344af69d88dSmrgno_rast: 1345af69d88dSmrg return NULL; 1346cdc920a0Smrg} 1347cdc920a0Smrg 1348cdc920a0Smrg 1349cdc920a0Smrg/* Shutdown: 1350cdc920a0Smrg */ 1351cdc920a0Smrgvoid lp_rast_destroy( struct lp_rasterizer *rast ) 1352cdc920a0Smrg{ 13533464ebd5Sriastradh unsigned i; 1354cdc920a0Smrg 1355cdc920a0Smrg /* Set exit_flag and signal each thread's work_ready semaphore. 1356cdc920a0Smrg * Each thread will be woken up, notice that the exit_flag is set and 1357cdc920a0Smrg * break out of its main loop. The thread will then exit. 1358cdc920a0Smrg */ 1359cdc920a0Smrg rast->exit_flag = TRUE; 1360cdc920a0Smrg for (i = 0; i < rast->num_threads; i++) { 1361cdc920a0Smrg pipe_semaphore_signal(&rast->tasks[i].work_ready); 1362cdc920a0Smrg } 1363cdc920a0Smrg 1364af69d88dSmrg /* Wait for threads to terminate before cleaning up per-thread data. 1365af69d88dSmrg * We don't actually call pipe_thread_wait to avoid dead lock on Windows 1366af69d88dSmrg * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */ 1367cdc920a0Smrg for (i = 0; i < rast->num_threads; i++) { 1368af69d88dSmrg#ifdef _WIN32 13697ec681f3Smrg /* Threads might already be dead - Windows apparently terminates other threads when 13707ec681f3Smrg * returning from main. 13717ec681f3Smrg */ 13727ec681f3Smrg DWORD exit_code = STILL_ACTIVE; 13737ec681f3Smrg if (GetExitCodeThread(rast->threads[i], &exit_code) && exit_code == STILL_ACTIVE) 13747ec681f3Smrg pipe_semaphore_wait(&rast->tasks[i].work_done); 1375af69d88dSmrg#else 137601e04c3fSmrg thrd_join(rast->threads[i], NULL); 1377af69d88dSmrg#endif 1378cdc920a0Smrg } 1379cdc920a0Smrg 1380cdc920a0Smrg /* Clean up per-thread data */ 1381cdc920a0Smrg for (i = 0; i < rast->num_threads; i++) { 1382cdc920a0Smrg pipe_semaphore_destroy(&rast->tasks[i].work_ready); 1383cdc920a0Smrg pipe_semaphore_destroy(&rast->tasks[i].work_done); 1384cdc920a0Smrg } 138501e04c3fSmrg for (i = 0; i < MAX2(1, rast->num_threads); i++) { 138601e04c3fSmrg align_free(rast->tasks[i].thread_data.cache); 138701e04c3fSmrg } 1388cdc920a0Smrg 1389cdc920a0Smrg /* for synchronizing rasterization threads */ 139001e04c3fSmrg if (rast->num_threads > 0) { 139101e04c3fSmrg util_barrier_destroy( &rast->barrier ); 139201e04c3fSmrg } 1393cdc920a0Smrg 13943464ebd5Sriastradh lp_scene_queue_destroy(rast->full_scenes); 13953464ebd5Sriastradh 1396cdc920a0Smrg FREE(rast); 1397cdc920a0Smrg} 1398cdc920a0Smrg 1399cdc920a0Smrg 1400