1cdc920a0Smrg/**************************************************************************
2cdc920a0Smrg *
3cdc920a0Smrg * Copyright 2009 VMware, Inc.
4cdc920a0Smrg * All Rights Reserved.
5cdc920a0Smrg *
6cdc920a0Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7cdc920a0Smrg * copy of this software and associated documentation files (the
8cdc920a0Smrg * "Software"), to deal in the Software without restriction, including
9cdc920a0Smrg * without limitation the rights to use, copy, modify, merge, publish,
10cdc920a0Smrg * distribute, sub license, and/or sell copies of the Software, and to
11cdc920a0Smrg * permit persons to whom the Software is furnished to do so, subject to
12cdc920a0Smrg * the following conditions:
13cdc920a0Smrg *
14cdc920a0Smrg * The above copyright notice and this permission notice (including the
15cdc920a0Smrg * next paragraph) shall be included in all copies or substantial portions
16cdc920a0Smrg * of the Software.
17cdc920a0Smrg *
18cdc920a0Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19cdc920a0Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20cdc920a0Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21cdc920a0Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22cdc920a0Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23cdc920a0Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24cdc920a0Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25cdc920a0Smrg *
26cdc920a0Smrg **************************************************************************/
27cdc920a0Smrg
28cdc920a0Smrg#include <limits.h>
29cdc920a0Smrg#include "util/u_memory.h"
30cdc920a0Smrg#include "util/u_math.h"
313464ebd5Sriastradh#include "util/u_rect.h"
32cdc920a0Smrg#include "util/u_surface.h"
333464ebd5Sriastradh#include "util/u_pack_color.h"
3401e04c3fSmrg#include "util/u_string.h"
3501e04c3fSmrg#include "util/u_thread.h"
367ec681f3Smrg#include "util/u_memset.h"
3701e04c3fSmrg#include "util/os_time.h"
38af69d88dSmrg
39cdc920a0Smrg#include "lp_scene_queue.h"
40af69d88dSmrg#include "lp_context.h"
41cdc920a0Smrg#include "lp_debug.h"
42cdc920a0Smrg#include "lp_fence.h"
43cdc920a0Smrg#include "lp_perf.h"
443464ebd5Sriastradh#include "lp_query.h"
45cdc920a0Smrg#include "lp_rast.h"
46cdc920a0Smrg#include "lp_rast_priv.h"
4701e04c3fSmrg#include "gallivm/lp_bld_format.h"
48cdc920a0Smrg#include "gallivm/lp_bld_debug.h"
49cdc920a0Smrg#include "lp_scene.h"
50af69d88dSmrg#include "lp_tex_sample.h"
51cdc920a0Smrg
52cdc920a0Smrg
533464ebd5Sriastradh#ifdef DEBUG
543464ebd5Sriastradhint jit_line = 0;
553464ebd5Sriastradhconst struct lp_rast_state *jit_state = NULL;
563464ebd5Sriastradhconst struct lp_rasterizer_task *jit_task = NULL;
573464ebd5Sriastradh#endif
583464ebd5Sriastradh
597ec681f3Smrgconst float lp_sample_pos_4x[4][2] = { { 0.375, 0.125 },
607ec681f3Smrg                                       { 0.875, 0.375 },
617ec681f3Smrg                                       { 0.125, 0.625 },
627ec681f3Smrg                                       { 0.625, 0.875 } };
633464ebd5Sriastradh
643464ebd5Sriastradh/**
653464ebd5Sriastradh * Begin rasterizing a scene.
663464ebd5Sriastradh * Called once per scene by one thread.
67cdc920a0Smrg */
683464ebd5Sriastradhstatic void
69cdc920a0Smrglp_rast_begin( struct lp_rasterizer *rast,
70cdc920a0Smrg               struct lp_scene *scene )
71cdc920a0Smrg{
72cdc920a0Smrg   rast->curr_scene = scene;
73cdc920a0Smrg
74cdc920a0Smrg   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
75cdc920a0Smrg
763464ebd5Sriastradh   lp_scene_begin_rasterization( scene );
77cdc920a0Smrg   lp_scene_bin_iter_begin( scene );
78cdc920a0Smrg}
79cdc920a0Smrg
80cdc920a0Smrg
81cdc920a0Smrgstatic void
82cdc920a0Smrglp_rast_end( struct lp_rasterizer *rast )
83cdc920a0Smrg{
843464ebd5Sriastradh   lp_scene_end_rasterization( rast->curr_scene );
85cdc920a0Smrg
86cdc920a0Smrg   rast->curr_scene = NULL;
87cdc920a0Smrg}
88cdc920a0Smrg
893464ebd5Sriastradh
90cdc920a0Smrg/**
9101e04c3fSmrg * Beginning rasterization of a tile.
92cdc920a0Smrg * \param x  window X position of the tile, in pixels
93cdc920a0Smrg * \param y  window Y position of the tile, in pixels
94cdc920a0Smrg */
95cdc920a0Smrgstatic void
963464ebd5Sriastradhlp_rast_tile_begin(struct lp_rasterizer_task *task,
97af69d88dSmrg                   const struct cmd_bin *bin,
98af69d88dSmrg                   int x, int y)
99cdc920a0Smrg{
10001e04c3fSmrg   unsigned i;
10101e04c3fSmrg   struct lp_scene *scene = task->scene;
10201e04c3fSmrg
103af69d88dSmrg   LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
1043464ebd5Sriastradh
1053464ebd5Sriastradh   task->bin = bin;
106af69d88dSmrg   task->x = x * TILE_SIZE;
107af69d88dSmrg   task->y = y * TILE_SIZE;
108af69d88dSmrg   task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ?
109af69d88dSmrg                    task->scene->fb.width - x * TILE_SIZE : TILE_SIZE;
110af69d88dSmrg   task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
111af69d88dSmrg                    task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
1123464ebd5Sriastradh
113af69d88dSmrg   task->thread_data.vis_counter = 0;
11401e04c3fSmrg   task->thread_data.ps_invocations = 0;
1153464ebd5Sriastradh
11601e04c3fSmrg   for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
11701e04c3fSmrg      if (task->scene->fb.cbufs[i]) {
11801e04c3fSmrg         task->color_tiles[i] = scene->cbufs[i].map +
11901e04c3fSmrg                                scene->cbufs[i].stride * task->y +
12001e04c3fSmrg                                scene->cbufs[i].format_bytes * task->x;
12101e04c3fSmrg      }
12201e04c3fSmrg   }
12301e04c3fSmrg   if (task->scene->fb.zsbuf) {
12401e04c3fSmrg      task->depth_tile = scene->zsbuf.map +
12501e04c3fSmrg                         scene->zsbuf.stride * task->y +
12601e04c3fSmrg                         scene->zsbuf.format_bytes * task->x;
12701e04c3fSmrg   }
128cdc920a0Smrg}
129cdc920a0Smrg
130cdc920a0Smrg
131cdc920a0Smrg/**
132cdc920a0Smrg * Clear the rasterizer's current color tile.
133cdc920a0Smrg * This is a bin command called during bin processing.
134af69d88dSmrg * Clear commands always clear all bound layers.
135cdc920a0Smrg */
1363464ebd5Sriastradhstatic void
137cdc920a0Smrglp_rast_clear_color(struct lp_rasterizer_task *task,
138cdc920a0Smrg                    const union lp_rast_cmd_arg arg)
139cdc920a0Smrg{
1403464ebd5Sriastradh   const struct lp_scene *scene = task->scene;
141af69d88dSmrg   unsigned cbuf = arg.clear_rb->cbuf;
142af69d88dSmrg   union util_color uc;
143af69d88dSmrg   enum pipe_format format;
1443464ebd5Sriastradh
145af69d88dSmrg   /* we never bin clear commands for non-existing buffers */
146af69d88dSmrg   assert(cbuf < scene->fb.nr_cbufs);
147af69d88dSmrg   assert(scene->fb.cbufs[cbuf]);
148cdc920a0Smrg
149af69d88dSmrg   format = scene->fb.cbufs[cbuf]->format;
150af69d88dSmrg   uc = arg.clear_rb->color_val;
151cdc920a0Smrg
152af69d88dSmrg   /*
153af69d88dSmrg    * this is pretty rough since we have target format (bunch of bytes...) here.
154af69d88dSmrg    * dump it as raw 4 dwords.
155af69d88dSmrg    */
156af69d88dSmrg   LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n",
157af69d88dSmrg          __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]);
158af69d88dSmrg
1597ec681f3Smrg   for (unsigned s = 0; s < scene->cbufs[cbuf].nr_samples; s++) {
1607ec681f3Smrg      void *map = (char *)scene->cbufs[cbuf].map + scene->cbufs[cbuf].sample_stride * s;
1617ec681f3Smrg      util_fill_box(map,
1627ec681f3Smrg                    format,
1637ec681f3Smrg                    scene->cbufs[cbuf].stride,
1647ec681f3Smrg                    scene->cbufs[cbuf].layer_stride,
1657ec681f3Smrg                    task->x,
1667ec681f3Smrg                    task->y,
1677ec681f3Smrg                    0,
1687ec681f3Smrg                    task->width,
1697ec681f3Smrg                    task->height,
1707ec681f3Smrg                    scene->fb_max_layer + 1,
1717ec681f3Smrg                    &uc);
1727ec681f3Smrg   }
173af69d88dSmrg
174af69d88dSmrg   /* this will increase for each rb which probably doesn't mean much */
175cdc920a0Smrg   LP_COUNT(nr_color_tile_clear);
176cdc920a0Smrg}
177cdc920a0Smrg
178cdc920a0Smrg
179cdc920a0Smrg/**
180cdc920a0Smrg * Clear the rasterizer's current z/stencil tile.
181cdc920a0Smrg * This is a bin command called during bin processing.
182af69d88dSmrg * Clear commands always clear all bound layers.
183cdc920a0Smrg */
1843464ebd5Sriastradhstatic void
185cdc920a0Smrglp_rast_clear_zstencil(struct lp_rasterizer_task *task,
186cdc920a0Smrg                       const union lp_rast_cmd_arg arg)
187cdc920a0Smrg{
1883464ebd5Sriastradh   const struct lp_scene *scene = task->scene;
189af69d88dSmrg   uint64_t clear_value64 = arg.clear_zstencil.value;
190af69d88dSmrg   uint64_t clear_mask64 = arg.clear_zstencil.mask;
191af69d88dSmrg   uint32_t clear_value = (uint32_t) clear_value64;
192af69d88dSmrg   uint32_t clear_mask = (uint32_t) clear_mask64;
193af69d88dSmrg   const unsigned height = task->height;
194af69d88dSmrg   const unsigned width = task->width;
195af69d88dSmrg   const unsigned dst_stride = scene->zsbuf.stride;
196cdc920a0Smrg   uint8_t *dst;
197cdc920a0Smrg   unsigned i, j;
198af69d88dSmrg   unsigned block_size;
199cdc920a0Smrg
2003464ebd5Sriastradh   LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n",
2013464ebd5Sriastradh           __FUNCTION__, clear_value, clear_mask);
202cdc920a0Smrg
203cdc920a0Smrg   /*
204af69d88dSmrg    * Clear the area of the depth/depth buffer matching this tile.
205cdc920a0Smrg    */
206cdc920a0Smrg
207af69d88dSmrg   if (scene->fb.zsbuf) {
208af69d88dSmrg      unsigned layer;
209af69d88dSmrg
2107ec681f3Smrg      for (unsigned s = 0; s < scene->zsbuf.nr_samples; s++) {
2117ec681f3Smrg         uint8_t *dst_layer = task->depth_tile + (s * scene->zsbuf.sample_stride);
2127ec681f3Smrg         block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
213af69d88dSmrg
2147ec681f3Smrg         clear_value &= clear_mask;
215af69d88dSmrg
2167ec681f3Smrg         for (layer = 0; layer <= scene->fb_max_layer; layer++) {
2177ec681f3Smrg            dst = dst_layer;
2187ec681f3Smrg
2197ec681f3Smrg            switch (block_size) {
2207ec681f3Smrg            case 1:
2217ec681f3Smrg               assert(clear_mask == 0xff);
222af69d88dSmrg               for (i = 0; i < height; i++) {
2237ec681f3Smrg                  uint8_t *row = (uint8_t *)dst;
2247ec681f3Smrg                  memset(row, (uint8_t) clear_value, width);
225af69d88dSmrg                  dst += dst_stride;
226af69d88dSmrg               }
2277ec681f3Smrg               break;
2287ec681f3Smrg            case 2:
2297ec681f3Smrg               if (clear_mask == 0xffff) {
2307ec681f3Smrg                  for (i = 0; i < height; i++) {
2317ec681f3Smrg                     uint16_t *row = (uint16_t *)dst;
2327ec681f3Smrg                     for (j = 0; j < width; j++)
2337ec681f3Smrg                        *row++ = (uint16_t) clear_value;
2347ec681f3Smrg                     dst += dst_stride;
235af69d88dSmrg                  }
236af69d88dSmrg               }
2377ec681f3Smrg               else {
2387ec681f3Smrg                  for (i = 0; i < height; i++) {
2397ec681f3Smrg                     uint16_t *row = (uint16_t *)dst;
2407ec681f3Smrg                     for (j = 0; j < width; j++) {
2417ec681f3Smrg                        uint16_t tmp = ~clear_mask & *row;
2427ec681f3Smrg                        *row++ = clear_value | tmp;
2437ec681f3Smrg                     }
2447ec681f3Smrg                     dst += dst_stride;
2457ec681f3Smrg                  }
246af69d88dSmrg               }
2477ec681f3Smrg               break;
2487ec681f3Smrg            case 4:
2497ec681f3Smrg               if (clear_mask == 0xffffffff) {
2507ec681f3Smrg                  for (i = 0; i < height; i++) {
2517ec681f3Smrg                     util_memset32(dst, clear_value, width);
2527ec681f3Smrg                     dst += dst_stride;
253af69d88dSmrg                  }
254af69d88dSmrg               }
2557ec681f3Smrg               else {
2567ec681f3Smrg                  for (i = 0; i < height; i++) {
2577ec681f3Smrg                     uint32_t *row = (uint32_t *)dst;
2587ec681f3Smrg                     for (j = 0; j < width; j++) {
2597ec681f3Smrg                        uint32_t tmp = ~clear_mask & *row;
2607ec681f3Smrg                        *row++ = clear_value | tmp;
2617ec681f3Smrg                     }
2627ec681f3Smrg                     dst += dst_stride;
2637ec681f3Smrg                  }
264af69d88dSmrg               }
2657ec681f3Smrg               break;
2667ec681f3Smrg            case 8:
2677ec681f3Smrg               clear_value64 &= clear_mask64;
2687ec681f3Smrg               if (clear_mask64 == 0xffffffffffULL) {
2697ec681f3Smrg                  for (i = 0; i < height; i++) {
2707ec681f3Smrg                     util_memset64(dst, clear_value64, width);
2717ec681f3Smrg                     dst += dst_stride;
272af69d88dSmrg                  }
273af69d88dSmrg               }
2747ec681f3Smrg               else {
2757ec681f3Smrg                  for (i = 0; i < height; i++) {
2767ec681f3Smrg                     uint64_t *row = (uint64_t *)dst;
2777ec681f3Smrg                     for (j = 0; j < width; j++) {
2787ec681f3Smrg                        uint64_t tmp = ~clear_mask64 & *row;
2797ec681f3Smrg                        *row++ = clear_value64 | tmp;
2807ec681f3Smrg                     }
2817ec681f3Smrg                     dst += dst_stride;
2827ec681f3Smrg                  }
2837ec681f3Smrg               }
2847ec681f3Smrg               break;
285af69d88dSmrg
2867ec681f3Smrg            default:
2877ec681f3Smrg               assert(0);
2887ec681f3Smrg               break;
2897ec681f3Smrg            }
2907ec681f3Smrg            dst_layer += scene->zsbuf.layer_stride;
2913464ebd5Sriastradh         }
292cdc920a0Smrg      }
293cdc920a0Smrg   }
294cdc920a0Smrg}
295cdc920a0Smrg
296cdc920a0Smrg
297cdc920a0Smrg
298cdc920a0Smrg/**
299cdc920a0Smrg * Run the shader on all blocks in a tile.  This is used when a tile is
300cdc920a0Smrg * completely contained inside a triangle.
301cdc920a0Smrg * This is a bin command called during bin processing.
302cdc920a0Smrg */
3033464ebd5Sriastradhstatic void
304cdc920a0Smrglp_rast_shade_tile(struct lp_rasterizer_task *task,
305cdc920a0Smrg                   const union lp_rast_cmd_arg arg)
306cdc920a0Smrg{
3073464ebd5Sriastradh   const struct lp_scene *scene = task->scene;
308cdc920a0Smrg   const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
309af69d88dSmrg   const struct lp_rast_state *state;
310af69d88dSmrg   struct lp_fragment_shader_variant *variant;
311cdc920a0Smrg   const unsigned tile_x = task->x, tile_y = task->y;
312cdc920a0Smrg   unsigned x, y;
313cdc920a0Smrg
3143464ebd5Sriastradh   if (inputs->disable) {
3153464ebd5Sriastradh      /* This command was partially binned and has been disabled */
3163464ebd5Sriastradh      return;
3173464ebd5Sriastradh   }
3183464ebd5Sriastradh
319cdc920a0Smrg   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
320cdc920a0Smrg
321af69d88dSmrg   state = task->state;
322af69d88dSmrg   assert(state);
323af69d88dSmrg   if (!state) {
324af69d88dSmrg      return;
325af69d88dSmrg   }
326af69d88dSmrg   variant = state->variant;
327af69d88dSmrg
328cdc920a0Smrg   /* render the whole 64x64 tile in 4x4 chunks */
329af69d88dSmrg   for (y = 0; y < task->height; y += 4){
330af69d88dSmrg      for (x = 0; x < task->width; x += 4) {
331cdc920a0Smrg         uint8_t *color[PIPE_MAX_COLOR_BUFS];
332af69d88dSmrg         unsigned stride[PIPE_MAX_COLOR_BUFS];
3337ec681f3Smrg         unsigned sample_stride[PIPE_MAX_COLOR_BUFS];
334af69d88dSmrg         uint8_t *depth = NULL;
335af69d88dSmrg         unsigned depth_stride = 0;
3367ec681f3Smrg         unsigned depth_sample_stride = 0;
3373464ebd5Sriastradh         unsigned i;
338cdc920a0Smrg
339cdc920a0Smrg         /* color buffer */
340af69d88dSmrg         for (i = 0; i < scene->fb.nr_cbufs; i++){
341af69d88dSmrg            if (scene->fb.cbufs[i]) {
342af69d88dSmrg               stride[i] = scene->cbufs[i].stride;
3437ec681f3Smrg               sample_stride[i] = scene->cbufs[i].sample_stride;
344af69d88dSmrg               color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x,
3457ec681f3Smrg                                                          tile_y + y, inputs->layer + inputs->view_index);
346af69d88dSmrg            }
347af69d88dSmrg            else {
348af69d88dSmrg               stride[i] = 0;
3497ec681f3Smrg               sample_stride[i] = 0;
350af69d88dSmrg               color[i] = NULL;
351af69d88dSmrg            }
352af69d88dSmrg         }
353cdc920a0Smrg
354cdc920a0Smrg         /* depth buffer */
355af69d88dSmrg         if (scene->zsbuf.map) {
356af69d88dSmrg            depth = lp_rast_get_depth_block_pointer(task, tile_x + x,
3577ec681f3Smrg                                                    tile_y + y, inputs->layer + inputs->view_index);
358af69d88dSmrg            depth_stride = scene->zsbuf.stride;
3597ec681f3Smrg            depth_sample_stride = scene->zsbuf.sample_stride;
360af69d88dSmrg         }
361af69d88dSmrg
3627ec681f3Smrg         uint64_t mask = 0;
3637ec681f3Smrg         for (unsigned i = 0; i < scene->fb_max_samples; i++)
3647ec681f3Smrg            mask |= (uint64_t)(0xffff) << (16 * i);
3657ec681f3Smrg
366af69d88dSmrg         /* Propagate non-interpolated raster state. */
367af69d88dSmrg         task->thread_data.raster_state.viewport_index = inputs->viewport_index;
3687ec681f3Smrg         task->thread_data.raster_state.view_index = inputs->view_index;
3693464ebd5Sriastradh
3703464ebd5Sriastradh         /* run shader on 4x4 block */
3713464ebd5Sriastradh         BEGIN_JIT_CALL(state, task);
3723464ebd5Sriastradh         variant->jit_function[RAST_WHOLE]( &state->jit_context,
3733464ebd5Sriastradh                                            tile_x + x, tile_y + y,
3743464ebd5Sriastradh                                            inputs->frontfacing,
3753464ebd5Sriastradh                                            GET_A0(inputs),
3763464ebd5Sriastradh                                            GET_DADX(inputs),
3773464ebd5Sriastradh                                            GET_DADY(inputs),
3783464ebd5Sriastradh                                            color,
3793464ebd5Sriastradh                                            depth,
3807ec681f3Smrg                                            mask,
381af69d88dSmrg                                            &task->thread_data,
382af69d88dSmrg                                            stride,
3837ec681f3Smrg                                            depth_stride,
3847ec681f3Smrg                                            sample_stride,
3857ec681f3Smrg                                            depth_sample_stride);
3863464ebd5Sriastradh         END_JIT_CALL();
387cdc920a0Smrg      }
388cdc920a0Smrg   }
389cdc920a0Smrg}
390cdc920a0Smrg
391cdc920a0Smrg
392cdc920a0Smrg/**
3933464ebd5Sriastradh * Run the shader on all blocks in a tile.  This is used when a tile is
3943464ebd5Sriastradh * completely contained inside a triangle, and the shader is opaque.
395cdc920a0Smrg * This is a bin command called during bin processing.
396cdc920a0Smrg */
3973464ebd5Sriastradhstatic void
3983464ebd5Sriastradhlp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
3993464ebd5Sriastradh                          const union lp_rast_cmd_arg arg)
400cdc920a0Smrg{
4013464ebd5Sriastradh   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
4023464ebd5Sriastradh
403af69d88dSmrg   assert(task->state);
404af69d88dSmrg   if (!task->state) {
405af69d88dSmrg      return;
4063464ebd5Sriastradh   }
4073464ebd5Sriastradh
4083464ebd5Sriastradh   lp_rast_shade_tile(task, arg);
4093464ebd5Sriastradh}
4103464ebd5Sriastradh
4113464ebd5Sriastradh
4123464ebd5Sriastradh/**
4133464ebd5Sriastradh * Compute shading for a 4x4 block of pixels inside a triangle.
4143464ebd5Sriastradh * This is a bin command called during bin processing.
4153464ebd5Sriastradh * \param x  X position of quad in window coords
4163464ebd5Sriastradh * \param y  Y position of quad in window coords
4173464ebd5Sriastradh */
4183464ebd5Sriastradhvoid
4197ec681f3Smrglp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task,
4207ec681f3Smrg                                const struct lp_rast_shader_inputs *inputs,
4217ec681f3Smrg                                unsigned x, unsigned y,
4227ec681f3Smrg                                uint64_t mask)
4233464ebd5Sriastradh{
4243464ebd5Sriastradh   const struct lp_rast_state *state = task->state;
4253464ebd5Sriastradh   struct lp_fragment_shader_variant *variant = state->variant;
4263464ebd5Sriastradh   const struct lp_scene *scene = task->scene;
427cdc920a0Smrg   uint8_t *color[PIPE_MAX_COLOR_BUFS];
428af69d88dSmrg   unsigned stride[PIPE_MAX_COLOR_BUFS];
4297ec681f3Smrg   unsigned sample_stride[PIPE_MAX_COLOR_BUFS];
430af69d88dSmrg   uint8_t *depth = NULL;
431af69d88dSmrg   unsigned depth_stride = 0;
4327ec681f3Smrg   unsigned depth_sample_stride = 0;
433cdc920a0Smrg   unsigned i;
434cdc920a0Smrg
435cdc920a0Smrg   assert(state);
436cdc920a0Smrg
437cdc920a0Smrg   /* Sanity checks */
438af69d88dSmrg   assert(x < scene->tiles_x * TILE_SIZE);
439af69d88dSmrg   assert(y < scene->tiles_y * TILE_SIZE);
440cdc920a0Smrg   assert(x % TILE_VECTOR_WIDTH == 0);
441cdc920a0Smrg   assert(y % TILE_VECTOR_HEIGHT == 0);
442cdc920a0Smrg
443cdc920a0Smrg   assert((x % 4) == 0);
444cdc920a0Smrg   assert((y % 4) == 0);
445cdc920a0Smrg
446cdc920a0Smrg   /* color buffer */
4473464ebd5Sriastradh   for (i = 0; i < scene->fb.nr_cbufs; i++) {
448af69d88dSmrg      if (scene->fb.cbufs[i]) {
449af69d88dSmrg         stride[i] = scene->cbufs[i].stride;
4507ec681f3Smrg         sample_stride[i] = scene->cbufs[i].sample_stride;
451af69d88dSmrg         color[i] = lp_rast_get_color_block_pointer(task, i, x, y,
4527ec681f3Smrg                                                    inputs->layer + inputs->view_index);
453af69d88dSmrg      }
454af69d88dSmrg      else {
455af69d88dSmrg         stride[i] = 0;
4567ec681f3Smrg         sample_stride[i] = 0;
457af69d88dSmrg         color[i] = NULL;
458af69d88dSmrg      }
4593464ebd5Sriastradh   }
460cdc920a0Smrg
461cdc920a0Smrg   /* depth buffer */
462af69d88dSmrg   if (scene->zsbuf.map) {
463af69d88dSmrg      depth_stride = scene->zsbuf.stride;
4647ec681f3Smrg      depth_sample_stride = scene->zsbuf.sample_stride;
4657ec681f3Smrg      depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer + inputs->view_index);
466af69d88dSmrg   }
467af69d88dSmrg
468af69d88dSmrg   assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
469af69d88dSmrg
470af69d88dSmrg   /*
471af69d88dSmrg    * The rasterizer may produce fragments outside our
472af69d88dSmrg    * allocated 4x4 blocks hence need to filter them out here.
473af69d88dSmrg    */
474af69d88dSmrg   if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
475af69d88dSmrg      /* Propagate non-interpolated raster state. */
476af69d88dSmrg      task->thread_data.raster_state.viewport_index = inputs->viewport_index;
4777ec681f3Smrg      task->thread_data.raster_state.view_index = inputs->view_index;
478af69d88dSmrg
479af69d88dSmrg      /* run shader on 4x4 block */
480af69d88dSmrg      BEGIN_JIT_CALL(state, task);
481af69d88dSmrg      variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
482af69d88dSmrg                                            x, y,
483af69d88dSmrg                                            inputs->frontfacing,
484af69d88dSmrg                                            GET_A0(inputs),
485af69d88dSmrg                                            GET_DADX(inputs),
486af69d88dSmrg                                            GET_DADY(inputs),
487af69d88dSmrg                                            color,
488af69d88dSmrg                                            depth,
489af69d88dSmrg                                            mask,
490af69d88dSmrg                                            &task->thread_data,
491af69d88dSmrg                                            stride,
4927ec681f3Smrg                                            depth_stride,
4937ec681f3Smrg                                            sample_stride,
4947ec681f3Smrg                                            depth_sample_stride);
495af69d88dSmrg      END_JIT_CALL();
496af69d88dSmrg   }
497cdc920a0Smrg}
498cdc920a0Smrg
4997ec681f3Smrgvoid
5007ec681f3Smrglp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
5017ec681f3Smrg                         const struct lp_rast_shader_inputs *inputs,
5027ec681f3Smrg                         unsigned x, unsigned y,
5037ec681f3Smrg                         unsigned mask)
5047ec681f3Smrg{
5057ec681f3Smrg   uint64_t new_mask = 0;
5067ec681f3Smrg   for (unsigned i = 0; i < task->scene->fb_max_samples; i++)
5077ec681f3Smrg      new_mask |= ((uint64_t)mask) << (16 * i);
5087ec681f3Smrg   lp_rast_shade_quads_mask_sample(task, inputs, x, y, new_mask);
5097ec681f3Smrg}
5107ec681f3Smrg
5117ec681f3Smrg/**
5127ec681f3Smrg * Directly copy pixels from a texture to the destination color buffer.
5137ec681f3Smrg * This is a bin command called during bin processing.
5147ec681f3Smrg */
5157ec681f3Smrgstatic void
5167ec681f3Smrglp_rast_blit_tile_to_dest(struct lp_rasterizer_task *task,
5177ec681f3Smrg                          const union lp_rast_cmd_arg arg)
5187ec681f3Smrg{
5197ec681f3Smrg   const struct lp_scene *scene = task->scene;
5207ec681f3Smrg   const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
5217ec681f3Smrg   const struct lp_rast_state *state = task->state;
5227ec681f3Smrg   struct lp_fragment_shader_variant *variant = state->variant;
5237ec681f3Smrg   const struct lp_jit_texture *texture = &state->jit_context.textures[0];
5247ec681f3Smrg   const uint8_t *src;
5257ec681f3Smrg   uint8_t *dst;
5267ec681f3Smrg   unsigned src_stride;
5277ec681f3Smrg   unsigned dst_stride;
5287ec681f3Smrg   struct pipe_surface *cbuf = scene->fb.cbufs[0];
5297ec681f3Smrg   const unsigned face_slice = cbuf->u.tex.first_layer;
5307ec681f3Smrg   const unsigned level = cbuf->u.tex.level;
5317ec681f3Smrg   struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
5327ec681f3Smrg   int src_x, src_y;
5337ec681f3Smrg
5347ec681f3Smrg   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
5357ec681f3Smrg
5367ec681f3Smrg   if (inputs->disable) {
5377ec681f3Smrg      /* This command was partially binned and has been disabled */
5387ec681f3Smrg      return;
5397ec681f3Smrg   }
5407ec681f3Smrg
5417ec681f3Smrg   dst = llvmpipe_get_texture_image_address(lpt, face_slice, level);
5427ec681f3Smrg
5437ec681f3Smrg   if (!dst)
5447ec681f3Smrg      return;
5457ec681f3Smrg
5467ec681f3Smrg   dst_stride = lpt->row_stride[level];
5477ec681f3Smrg
5487ec681f3Smrg   src = texture->base;
5497ec681f3Smrg   src_stride = texture->row_stride[0];
5507ec681f3Smrg
5517ec681f3Smrg   src_x = util_iround(GET_A0(inputs)[1][0]*texture->width - 0.5f);
5527ec681f3Smrg   src_y = util_iround(GET_A0(inputs)[1][1]*texture->height - 0.5f);
5537ec681f3Smrg
5547ec681f3Smrg   src_x = src_x + task->x;
5557ec681f3Smrg   src_y = src_y + task->y;
5567ec681f3Smrg
5577ec681f3Smrg   if (0) {
5587ec681f3Smrg      union util_color uc;
5597ec681f3Smrg      uc.ui[0] = 0xff0000ff;
5607ec681f3Smrg      util_fill_rect(dst,
5617ec681f3Smrg                     cbuf->format,
5627ec681f3Smrg                     dst_stride,
5637ec681f3Smrg                     task->x,
5647ec681f3Smrg                     task->y,
5657ec681f3Smrg                     task->width,
5667ec681f3Smrg                     task->height,
5677ec681f3Smrg                     &uc);
5687ec681f3Smrg      return;
5697ec681f3Smrg   }
5707ec681f3Smrg
5717ec681f3Smrg   if (src_x >= 0 &&
5727ec681f3Smrg       src_y >= 0 &&
5737ec681f3Smrg       src_x + task->width <= texture->width &&
5747ec681f3Smrg       src_y + task->height <= texture->height) {
5757ec681f3Smrg
5767ec681f3Smrg      if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA ||
5777ec681f3Smrg          (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 &&
5787ec681f3Smrg           cbuf->format == PIPE_FORMAT_B8G8R8X8_UNORM)) {
5797ec681f3Smrg         util_copy_rect(dst,
5807ec681f3Smrg                        cbuf->format,
5817ec681f3Smrg                        dst_stride,
5827ec681f3Smrg                        task->x, task->y,
5837ec681f3Smrg                        task->width, task->height,
5847ec681f3Smrg                        src, src_stride,
5857ec681f3Smrg                        src_x, src_y);
5867ec681f3Smrg         return;
5877ec681f3Smrg      }
5887ec681f3Smrg
5897ec681f3Smrg      if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1) {
5907ec681f3Smrg         if (cbuf->format == PIPE_FORMAT_B8G8R8A8_UNORM) {
5917ec681f3Smrg            int x, y;
5927ec681f3Smrg
5937ec681f3Smrg            dst += task->x * 4;
5947ec681f3Smrg            src += src_x * 4;
5957ec681f3Smrg            dst += task->y * dst_stride;
5967ec681f3Smrg            src += src_y * src_stride;
5977ec681f3Smrg
5987ec681f3Smrg            for (y = 0; y < task->height; ++y) {
5997ec681f3Smrg               const uint32_t *src_row = (const uint32_t *)src;
6007ec681f3Smrg               uint32_t *dst_row = (uint32_t *)dst;
601cdc920a0Smrg
6027ec681f3Smrg               for (x = 0; x < task->width; ++x) {
6037ec681f3Smrg                  *dst_row++ = *src_row++ | 0xff000000;
6047ec681f3Smrg               }
6057ec681f3Smrg               dst += dst_stride;
6067ec681f3Smrg               src += src_stride;
6077ec681f3Smrg            }
6087ec681f3Smrg
6097ec681f3Smrg            return;
6107ec681f3Smrg         }
6117ec681f3Smrg      }
6127ec681f3Smrg
6137ec681f3Smrg   }
6147ec681f3Smrg
6157ec681f3Smrg   /*
6167ec681f3Smrg    * Fall back to the jit shaders.
6177ec681f3Smrg    */
6187ec681f3Smrg
6197ec681f3Smrg   lp_rast_shade_tile_opaque(task, arg);
6207ec681f3Smrg}
6217ec681f3Smrg
6227ec681f3Smrgstatic void
6237ec681f3Smrglp_rast_blit_tile(struct lp_rasterizer_task *task,
6247ec681f3Smrg                  const union lp_rast_cmd_arg arg)
6257ec681f3Smrg{
6267ec681f3Smrg   /* This kindof just works, but isn't efficient:
6277ec681f3Smrg    */
6287ec681f3Smrg   lp_rast_blit_tile_to_dest(task, arg);
6297ec681f3Smrg}
6303464ebd5Sriastradh
6313464ebd5Sriastradh/**
6323464ebd5Sriastradh * Begin a new occlusion query.
6333464ebd5Sriastradh * This is a bin command put in all bins.
6343464ebd5Sriastradh * Called per thread.
6353464ebd5Sriastradh */
6363464ebd5Sriastradhstatic void
6373464ebd5Sriastradhlp_rast_begin_query(struct lp_rasterizer_task *task,
6383464ebd5Sriastradh                    const union lp_rast_cmd_arg arg)
6393464ebd5Sriastradh{
6403464ebd5Sriastradh   struct llvmpipe_query *pq = arg.query_obj;
6413464ebd5Sriastradh
642af69d88dSmrg   switch (pq->type) {
643af69d88dSmrg   case PIPE_QUERY_OCCLUSION_COUNTER:
644af69d88dSmrg   case PIPE_QUERY_OCCLUSION_PREDICATE:
64501e04c3fSmrg   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
646af69d88dSmrg      pq->start[task->thread_index] = task->thread_data.vis_counter;
647af69d88dSmrg      break;
648af69d88dSmrg   case PIPE_QUERY_PIPELINE_STATISTICS:
64901e04c3fSmrg      pq->start[task->thread_index] = task->thread_data.ps_invocations;
650af69d88dSmrg      break;
6517ec681f3Smrg   case PIPE_QUERY_TIME_ELAPSED:
6527ec681f3Smrg      pq->start[task->thread_index] = os_time_get_nano();
6537ec681f3Smrg      break;
654af69d88dSmrg   default:
655af69d88dSmrg      assert(0);
656af69d88dSmrg      break;
657af69d88dSmrg   }
6583464ebd5Sriastradh}
6593464ebd5Sriastradh
6603464ebd5Sriastradh
6613464ebd5Sriastradh/**
6623464ebd5Sriastradh * End the current occlusion query.
6633464ebd5Sriastradh * This is a bin command put in all bins.
6643464ebd5Sriastradh * Called per thread.
6653464ebd5Sriastradh */
6663464ebd5Sriastradhstatic void
6673464ebd5Sriastradhlp_rast_end_query(struct lp_rasterizer_task *task,
6683464ebd5Sriastradh                  const union lp_rast_cmd_arg arg)
6693464ebd5Sriastradh{
670af69d88dSmrg   struct llvmpipe_query *pq = arg.query_obj;
671af69d88dSmrg
672af69d88dSmrg   switch (pq->type) {
673af69d88dSmrg   case PIPE_QUERY_OCCLUSION_COUNTER:
674af69d88dSmrg   case PIPE_QUERY_OCCLUSION_PREDICATE:
67501e04c3fSmrg   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
676af69d88dSmrg      pq->end[task->thread_index] +=
677af69d88dSmrg         task->thread_data.vis_counter - pq->start[task->thread_index];
678af69d88dSmrg      pq->start[task->thread_index] = 0;
679af69d88dSmrg      break;
680af69d88dSmrg   case PIPE_QUERY_TIMESTAMP:
6817ec681f3Smrg   case PIPE_QUERY_TIME_ELAPSED:
682af69d88dSmrg      pq->end[task->thread_index] = os_time_get_nano();
683af69d88dSmrg      break;
684af69d88dSmrg   case PIPE_QUERY_PIPELINE_STATISTICS:
685af69d88dSmrg      pq->end[task->thread_index] +=
68601e04c3fSmrg         task->thread_data.ps_invocations - pq->start[task->thread_index];
687af69d88dSmrg      pq->start[task->thread_index] = 0;
688af69d88dSmrg      break;
689af69d88dSmrg   default:
690af69d88dSmrg      assert(0);
691af69d88dSmrg      break;
6923464ebd5Sriastradh   }
6933464ebd5Sriastradh}
6943464ebd5Sriastradh
6953464ebd5Sriastradh
6963464ebd5Sriastradhvoid
6973464ebd5Sriastradhlp_rast_set_state(struct lp_rasterizer_task *task,
6983464ebd5Sriastradh                  const union lp_rast_cmd_arg arg)
6993464ebd5Sriastradh{
7003464ebd5Sriastradh   task->state = arg.state;
7013464ebd5Sriastradh}
7023464ebd5Sriastradh
7033464ebd5Sriastradh
7043464ebd5Sriastradh
705cdc920a0Smrg/**
7063464ebd5Sriastradh * Called when we're done writing to a color tile.
707cdc920a0Smrg */
708cdc920a0Smrgstatic void
7093464ebd5Sriastradhlp_rast_tile_end(struct lp_rasterizer_task *task)
710cdc920a0Smrg{
711af69d88dSmrg   unsigned i;
712cdc920a0Smrg
713af69d88dSmrg   for (i = 0; i < task->scene->num_active_queries; ++i) {
714af69d88dSmrg      lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i]));
715cdc920a0Smrg   }
716cdc920a0Smrg
7173464ebd5Sriastradh   /* debug */
7183464ebd5Sriastradh   memset(task->color_tiles, 0, sizeof(task->color_tiles));
7193464ebd5Sriastradh   task->depth_tile = NULL;
720cdc920a0Smrg
7213464ebd5Sriastradh   task->bin = NULL;
7223464ebd5Sriastradh}
723cdc920a0Smrg
7247ec681f3Smrg
7257ec681f3Smrg
7267ec681f3Smrg
7277ec681f3Smrg
7287ec681f3Smrg
7297ec681f3Smrg/* Currently have two rendering paths only - the general case triangle
7307ec681f3Smrg * path and the super-specialized blit/clear path.
7317ec681f3Smrg */
7327ec681f3Smrg#define TRI   ((LP_RAST_FLAGS_TRI <<1)-1)        /* general case */
7337ec681f3Smrg#define RECT  ((LP_RAST_FLAGS_RECT<<1)-1)        /* direct rectangle rasterizer */
7347ec681f3Smrg#define BLIT  ((LP_RAST_FLAGS_BLIT<<1)-1)        /* write direct-to-dest */
7357ec681f3Smrg
7367ec681f3Smrgstatic const unsigned
7377ec681f3Smrgrast_flags[] = {
7387ec681f3Smrg   BLIT,                        /* clear color */
7397ec681f3Smrg   TRI,                         /* clear zstencil */
7407ec681f3Smrg   TRI,                         /* triangle_1 */
7417ec681f3Smrg   TRI,                         /* triangle_2 */
7427ec681f3Smrg   TRI,                         /* triangle_3 */
7437ec681f3Smrg   TRI,                         /* triangle_4 */
7447ec681f3Smrg   TRI,                         /* triangle_5 */
7457ec681f3Smrg   TRI,                         /* triangle_6 */
7467ec681f3Smrg   TRI,                         /* triangle_7 */
7477ec681f3Smrg   TRI,                         /* triangle_8 */
7487ec681f3Smrg   TRI,                         /* triangle_3_4 */
7497ec681f3Smrg   TRI,                         /* triangle_3_16 */
7507ec681f3Smrg   TRI,                         /* triangle_4_16 */
7517ec681f3Smrg   RECT,                        /* shade_tile */
7527ec681f3Smrg   RECT,                        /* shade_tile_opaque */
7537ec681f3Smrg   TRI,                         /* begin_query */
7547ec681f3Smrg   TRI,                         /* end_query */
7557ec681f3Smrg   BLIT,                        /* set_state, */
7567ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_1 */
7577ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_2 */
7587ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_3 */
7597ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_4 */
7607ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_5 */
7617ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_6 */
7627ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_7 */
7637ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_8 */
7647ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_3_4 */
7657ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_3_16 */
7667ec681f3Smrg   TRI,                         /* lp_rast_triangle_32_4_16 */
7677ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_1 */
7687ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_2 */
7697ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_3 */
7707ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_4 */
7717ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_5 */
7727ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_6 */
7737ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_7 */
7747ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_8 */
7757ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_3_4 */
7767ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_3_16 */
7777ec681f3Smrg   TRI,                         /* lp_rast_triangle_ms_4_16 */
7787ec681f3Smrg
7797ec681f3Smrg   RECT,                        /* rectangle */
7807ec681f3Smrg   BLIT,                        /* blit */
7817ec681f3Smrg};
7827ec681f3Smrg
7837ec681f3Smrg/*
7847ec681f3Smrg */
7857ec681f3Smrgstatic const lp_rast_cmd_func
7867ec681f3Smrgdispatch_blit[] = {
7877ec681f3Smrg   lp_rast_clear_color,
7887ec681f3Smrg   NULL,                        /* clear_zstencil */
7897ec681f3Smrg   NULL,                        /* triangle_1 */
7907ec681f3Smrg   NULL,                        /* triangle_2 */
7917ec681f3Smrg   NULL,                        /* triangle_3 */
7927ec681f3Smrg   NULL,                        /* triangle_4 */
7937ec681f3Smrg   NULL,                        /* triangle_5 */
7947ec681f3Smrg   NULL,                        /* triangle_6 */
7957ec681f3Smrg   NULL,                        /* triangle_7 */
7967ec681f3Smrg   NULL,                        /* triangle_8 */
7977ec681f3Smrg   NULL,                        /* triangle_3_4 */
7987ec681f3Smrg   NULL,                        /* triangle_3_16 */
7997ec681f3Smrg   NULL,                        /* triangle_4_16 */
8007ec681f3Smrg   NULL,                        /* shade_tile */
8017ec681f3Smrg   NULL,                        /* shade_tile_opaque */
8027ec681f3Smrg   NULL,                        /* begin_query */
8037ec681f3Smrg   NULL,                        /* end_query */
8047ec681f3Smrg   lp_rast_set_state,           /* set_state */
8057ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_1 */
8067ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_2 */
8077ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_3 */
8087ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_4 */
8097ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_5 */
8107ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_6 */
8117ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_7 */
8127ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_8 */
8137ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_3_4 */
8147ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_3_16 */
8157ec681f3Smrg   NULL,                        /* lp_rast_triangle_32_4_16 */
8167ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_1 */
8177ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_2 */
8187ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_3 */
8197ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_4 */
8207ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_5 */
8217ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_6 */
8227ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_7 */
8237ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_8 */
8247ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_3_4 */
8257ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_3_16 */
8267ec681f3Smrg   NULL,                        /* lp_rast_triangle_ms_4_16 */
8277ec681f3Smrg
8287ec681f3Smrg   NULL,                        /* rectangle */
8297ec681f3Smrg   lp_rast_blit_tile_to_dest,
8307ec681f3Smrg};
8317ec681f3Smrg
8327ec681f3Smrg
8337ec681f3Smrg
8347ec681f3Smrg/* Triangle and general case rasterization: Use the SOA llvm shdaers,
8357ec681f3Smrg * an active swizzled tile for each color buf, etc.  Don't blit/clear
8367ec681f3Smrg * directly to destination surface as we know there are swizzled
8377ec681f3Smrg * operations coming.
8387ec681f3Smrg */
8397ec681f3Smrgstatic const lp_rast_cmd_func
8407ec681f3Smrgdispatch_tri[] = {
8413464ebd5Sriastradh   lp_rast_clear_color,
8423464ebd5Sriastradh   lp_rast_clear_zstencil,
8433464ebd5Sriastradh   lp_rast_triangle_1,
8443464ebd5Sriastradh   lp_rast_triangle_2,
8453464ebd5Sriastradh   lp_rast_triangle_3,
8463464ebd5Sriastradh   lp_rast_triangle_4,
8473464ebd5Sriastradh   lp_rast_triangle_5,
8483464ebd5Sriastradh   lp_rast_triangle_6,
8493464ebd5Sriastradh   lp_rast_triangle_7,
8503464ebd5Sriastradh   lp_rast_triangle_8,
8513464ebd5Sriastradh   lp_rast_triangle_3_4,
8523464ebd5Sriastradh   lp_rast_triangle_3_16,
8533464ebd5Sriastradh   lp_rast_triangle_4_16,
8543464ebd5Sriastradh   lp_rast_shade_tile,
8553464ebd5Sriastradh   lp_rast_shade_tile_opaque,
8563464ebd5Sriastradh   lp_rast_begin_query,
8573464ebd5Sriastradh   lp_rast_end_query,
8583464ebd5Sriastradh   lp_rast_set_state,
859af69d88dSmrg   lp_rast_triangle_32_1,
860af69d88dSmrg   lp_rast_triangle_32_2,
861af69d88dSmrg   lp_rast_triangle_32_3,
862af69d88dSmrg   lp_rast_triangle_32_4,
863af69d88dSmrg   lp_rast_triangle_32_5,
864af69d88dSmrg   lp_rast_triangle_32_6,
865af69d88dSmrg   lp_rast_triangle_32_7,
866af69d88dSmrg   lp_rast_triangle_32_8,
867af69d88dSmrg   lp_rast_triangle_32_3_4,
868af69d88dSmrg   lp_rast_triangle_32_3_16,
8697ec681f3Smrg   lp_rast_triangle_32_4_16,
8707ec681f3Smrg   lp_rast_triangle_ms_1,
8717ec681f3Smrg   lp_rast_triangle_ms_2,
8727ec681f3Smrg   lp_rast_triangle_ms_3,
8737ec681f3Smrg   lp_rast_triangle_ms_4,
8747ec681f3Smrg   lp_rast_triangle_ms_5,
8757ec681f3Smrg   lp_rast_triangle_ms_6,
8767ec681f3Smrg   lp_rast_triangle_ms_7,
8777ec681f3Smrg   lp_rast_triangle_ms_8,
8787ec681f3Smrg   lp_rast_triangle_ms_3_4,
8797ec681f3Smrg   lp_rast_triangle_ms_3_16,
8807ec681f3Smrg   lp_rast_triangle_ms_4_16,
8817ec681f3Smrg   lp_rast_rectangle,
8827ec681f3Smrg   lp_rast_blit_tile,
8833464ebd5Sriastradh};
884cdc920a0Smrg
885cdc920a0Smrg
8867ec681f3Smrg/* Debug rasterization with most fastpaths disabled.
8877ec681f3Smrg */
8887ec681f3Smrgstatic const lp_rast_cmd_func
8897ec681f3Smrgdispatch_tri_debug[] =
8907ec681f3Smrg{
8917ec681f3Smrg   lp_rast_clear_color,
8927ec681f3Smrg   lp_rast_clear_zstencil,
8937ec681f3Smrg   lp_rast_triangle_1,
8947ec681f3Smrg   lp_rast_triangle_2,
8957ec681f3Smrg   lp_rast_triangle_3,
8967ec681f3Smrg   lp_rast_triangle_4,
8977ec681f3Smrg   lp_rast_triangle_5,
8987ec681f3Smrg   lp_rast_triangle_6,
8997ec681f3Smrg   lp_rast_triangle_7,
9007ec681f3Smrg   lp_rast_triangle_8,
9017ec681f3Smrg   lp_rast_triangle_3_4,
9027ec681f3Smrg   lp_rast_triangle_3_16,
9037ec681f3Smrg   lp_rast_triangle_4_16,
9047ec681f3Smrg   lp_rast_shade_tile,
9057ec681f3Smrg   lp_rast_shade_tile,
9067ec681f3Smrg   lp_rast_begin_query,
9077ec681f3Smrg   lp_rast_end_query,
9087ec681f3Smrg   lp_rast_set_state,
9097ec681f3Smrg   lp_rast_triangle_32_1,
9107ec681f3Smrg   lp_rast_triangle_32_2,
9117ec681f3Smrg   lp_rast_triangle_32_3,
9127ec681f3Smrg   lp_rast_triangle_32_4,
9137ec681f3Smrg   lp_rast_triangle_32_5,
9147ec681f3Smrg   lp_rast_triangle_32_6,
9157ec681f3Smrg   lp_rast_triangle_32_7,
9167ec681f3Smrg   lp_rast_triangle_32_8,
9177ec681f3Smrg   lp_rast_triangle_32_3_4,
9187ec681f3Smrg   lp_rast_triangle_32_3_16,
9197ec681f3Smrg   lp_rast_triangle_32_4_16,
9207ec681f3Smrg   lp_rast_triangle_ms_1,
9217ec681f3Smrg   lp_rast_triangle_ms_2,
9227ec681f3Smrg   lp_rast_triangle_ms_3,
9237ec681f3Smrg   lp_rast_triangle_ms_4,
9247ec681f3Smrg   lp_rast_triangle_ms_5,
9257ec681f3Smrg   lp_rast_triangle_ms_6,
9267ec681f3Smrg   lp_rast_triangle_ms_7,
9277ec681f3Smrg   lp_rast_triangle_ms_8,
9287ec681f3Smrg   lp_rast_triangle_ms_3_4,
9297ec681f3Smrg   lp_rast_triangle_ms_3_16,
9307ec681f3Smrg   lp_rast_triangle_ms_4_16,
9317ec681f3Smrg
9327ec681f3Smrg   lp_rast_rectangle,
9337ec681f3Smrg   lp_rast_shade_tile,
9347ec681f3Smrg};
9357ec681f3Smrg
9367ec681f3Smrgstruct lp_bin_info
9377ec681f3Smrglp_characterize_bin(const struct cmd_bin *bin)
9387ec681f3Smrg{
9397ec681f3Smrg   struct cmd_block *block;
9407ec681f3Smrg   struct lp_bin_info info;
9417ec681f3Smrg   unsigned andflags = ~0;
9427ec681f3Smrg   unsigned k, j = 0;
9437ec681f3Smrg
9447ec681f3Smrg   STATIC_ASSERT(ARRAY_SIZE(rast_flags) == LP_RAST_OP_MAX);
9457ec681f3Smrg
9467ec681f3Smrg   for (block = bin->head; block; block = block->next) {
9477ec681f3Smrg      for (k = 0; k < block->count; k++, j++) {
9487ec681f3Smrg         andflags &= rast_flags[block->cmd[k]];
9497ec681f3Smrg      }
9507ec681f3Smrg   }
9517ec681f3Smrg
9527ec681f3Smrg   info.type = andflags;
9537ec681f3Smrg   info.count = j;
9547ec681f3Smrg
9557ec681f3Smrg   return info;
9567ec681f3Smrg}
9577ec681f3Smrg
9587ec681f3Smrg
9593464ebd5Sriastradhstatic void
9607ec681f3Smrgblit_rasterize_bin(struct lp_rasterizer_task *task,
9617ec681f3Smrg                   const struct cmd_bin *bin)
9623464ebd5Sriastradh{
9633464ebd5Sriastradh   const struct cmd_block *block;
9643464ebd5Sriastradh   unsigned k;
965cdc920a0Smrg
9667ec681f3Smrg   STATIC_ASSERT(ARRAY_SIZE(dispatch_blit) == LP_RAST_OP_MAX);
967cdc920a0Smrg
9687ec681f3Smrg   if (0) debug_printf("%s\n", __FUNCTION__);
9693464ebd5Sriastradh   for (block = bin->head; block; block = block->next) {
9703464ebd5Sriastradh      for (k = 0; k < block->count; k++) {
9717ec681f3Smrg         dispatch_blit[block->cmd[k]]( task, block->arg[k] );
9723464ebd5Sriastradh      }
9733464ebd5Sriastradh   }
974cdc920a0Smrg}
975cdc920a0Smrg
9767ec681f3Smrgstatic void
9777ec681f3Smrgtri_rasterize_bin(struct lp_rasterizer_task *task,
9787ec681f3Smrg                  const struct cmd_bin *bin,
9797ec681f3Smrg                  int x, int y)
9807ec681f3Smrg{
9817ec681f3Smrg   const struct cmd_block *block;
9827ec681f3Smrg   unsigned k;
9837ec681f3Smrg
9847ec681f3Smrg   STATIC_ASSERT(ARRAY_SIZE(dispatch_tri) == LP_RAST_OP_MAX);
9857ec681f3Smrg
9867ec681f3Smrg   for (block = bin->head; block; block = block->next) {
9877ec681f3Smrg      for (k = 0; k < block->count; k++) {
9887ec681f3Smrg         dispatch_tri[block->cmd[k]]( task, block->arg[k] );
9897ec681f3Smrg      }
9907ec681f3Smrg   }
9917ec681f3Smrg}
9927ec681f3Smrg
9937ec681f3Smrgstatic void
9947ec681f3Smrgdebug_rasterize_bin(struct lp_rasterizer_task *task,
9957ec681f3Smrg                  const struct cmd_bin *bin)
9967ec681f3Smrg{
9977ec681f3Smrg   const struct cmd_block *block;
9987ec681f3Smrg   unsigned k;
9997ec681f3Smrg
10007ec681f3Smrg   STATIC_ASSERT(ARRAY_SIZE(dispatch_tri_debug) == LP_RAST_OP_MAX);
10017ec681f3Smrg
10027ec681f3Smrg   for (block = bin->head; block; block = block->next) {
10037ec681f3Smrg      for (k = 0; k < block->count; k++) {
10047ec681f3Smrg         dispatch_tri_debug[block->cmd[k]]( task, block->arg[k] );
10057ec681f3Smrg      }
10067ec681f3Smrg   }
10077ec681f3Smrg}
1008cdc920a0Smrg
1009cdc920a0Smrg
1010cdc920a0Smrg/**
1011cdc920a0Smrg * Rasterize commands for a single bin.
1012cdc920a0Smrg * \param x, y  position of the bin's tile in the framebuffer
1013cdc920a0Smrg * Must be called between lp_rast_begin() and lp_rast_end().
1014cdc920a0Smrg * Called per thread.
1015cdc920a0Smrg */
1016cdc920a0Smrgstatic void
1017cdc920a0Smrgrasterize_bin(struct lp_rasterizer_task *task,
1018af69d88dSmrg              const struct cmd_bin *bin, int x, int y )
1019cdc920a0Smrg{
10207ec681f3Smrg   struct lp_bin_info info = lp_characterize_bin(bin);
10217ec681f3Smrg
1022af69d88dSmrg   lp_rast_tile_begin( task, bin, x, y );
1023cdc920a0Smrg
10247ec681f3Smrg   if (LP_DEBUG & DEBUG_NO_FASTPATH)
10257ec681f3Smrg      debug_rasterize_bin(task, bin);
10267ec681f3Smrg   else if (info.type & LP_RAST_FLAGS_BLIT)
10277ec681f3Smrg      blit_rasterize_bin(task, bin);
10287ec681f3Smrg   else if (task->scene->permit_linear_rasterizer &&
10297ec681f3Smrg            !(LP_PERF & PERF_NO_RAST_LINEAR) &&
10307ec681f3Smrg            (info.type & LP_RAST_FLAGS_RECT))
10317ec681f3Smrg      lp_linear_rasterize_bin(task, bin);
10327ec681f3Smrg   else
10337ec681f3Smrg      tri_rasterize_bin(task, bin, x, y);
1034cdc920a0Smrg
10353464ebd5Sriastradh   lp_rast_tile_end(task);
1036cdc920a0Smrg
10377ec681f3Smrg#ifdef DEBUG
10383464ebd5Sriastradh   /* Debug/Perf flags:
1039cdc920a0Smrg    */
10403464ebd5Sriastradh   if (bin->head->count == 1) {
10417ec681f3Smrg      if (bin->head->cmd[0] == LP_RAST_OP_BLIT)
10427ec681f3Smrg         LP_COUNT(nr_pure_blit_64);
10437ec681f3Smrg      else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
10443464ebd5Sriastradh         LP_COUNT(nr_pure_shade_opaque_64);
10453464ebd5Sriastradh      else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE)
10463464ebd5Sriastradh         LP_COUNT(nr_pure_shade_64);
1047cdc920a0Smrg   }
10487ec681f3Smrg#endif
1049cdc920a0Smrg}
1050cdc920a0Smrg
10513464ebd5Sriastradh
1052cdc920a0Smrg/* An empty bin is one that just loads the contents of the tile and
1053cdc920a0Smrg * stores them again unchanged.  This typically happens when bins have
1054cdc920a0Smrg * been flushed for some reason in the middle of a frame, or when
1055cdc920a0Smrg * incremental updates are being made to a render target.
1056cdc920a0Smrg *
1057cdc920a0Smrg * Try to avoid doing pointless work in this case.
1058cdc920a0Smrg */
1059cdc920a0Smrgstatic boolean
1060cdc920a0Smrgis_empty_bin( const struct cmd_bin *bin )
1061cdc920a0Smrg{
10623464ebd5Sriastradh   return bin->head == NULL;
1063cdc920a0Smrg}
1064cdc920a0Smrg
1065cdc920a0Smrg
1066cdc920a0Smrg/**
1067cdc920a0Smrg * Rasterize/execute all bins within a scene.
1068cdc920a0Smrg * Called per thread.
1069cdc920a0Smrg */
1070cdc920a0Smrgstatic void
1071cdc920a0Smrgrasterize_scene(struct lp_rasterizer_task *task,
1072cdc920a0Smrg                struct lp_scene *scene)
1073cdc920a0Smrg{
10743464ebd5Sriastradh   task->scene = scene;
1075af69d88dSmrg
107601e04c3fSmrg   /* Clear the cache tags. This should not always be necessary but
107701e04c3fSmrg      simpler for now. */
107801e04c3fSmrg#if LP_USE_TEXTURE_CACHE
107901e04c3fSmrg   memset(task->thread_data.cache->cache_tags, 0,
108001e04c3fSmrg          sizeof(task->thread_data.cache->cache_tags));
108101e04c3fSmrg#if LP_BUILD_FORMAT_CACHE_DEBUG
108201e04c3fSmrg   task->thread_data.cache->cache_access_total = 0;
108301e04c3fSmrg   task->thread_data.cache->cache_access_miss = 0;
108401e04c3fSmrg#endif
108501e04c3fSmrg#endif
108601e04c3fSmrg
108701e04c3fSmrg   if (!task->rast->no_rast) {
1088af69d88dSmrg      /* loop over scene bins, rasterize each */
1089af69d88dSmrg      {
1090af69d88dSmrg         struct cmd_bin *bin;
1091af69d88dSmrg         int i, j;
1092af69d88dSmrg
1093af69d88dSmrg         assert(scene);
1094af69d88dSmrg         while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) {
1095af69d88dSmrg            if (!is_empty_bin( bin ))
1096af69d88dSmrg               rasterize_bin(task, bin, i, j);
1097cdc920a0Smrg         }
1098cdc920a0Smrg      }
1099cdc920a0Smrg   }
1100cdc920a0Smrg
11013464ebd5Sriastradh
110201e04c3fSmrg#if LP_BUILD_FORMAT_CACHE_DEBUG
110301e04c3fSmrg   {
110401e04c3fSmrg      uint64_t total, miss;
110501e04c3fSmrg      total = task->thread_data.cache->cache_access_total;
110601e04c3fSmrg      miss = task->thread_data.cache->cache_access_miss;
110701e04c3fSmrg      if (total) {
110801e04c3fSmrg         debug_printf("thread %d cache access %llu miss %llu hit rate %f\n",
110901e04c3fSmrg                 task->thread_index, (long long unsigned)total,
111001e04c3fSmrg                 (long long unsigned)miss,
111101e04c3fSmrg                 (float)(total - miss)/(float)total);
111201e04c3fSmrg      }
111301e04c3fSmrg   }
111401e04c3fSmrg#endif
111501e04c3fSmrg
11163464ebd5Sriastradh   if (scene->fence) {
11173464ebd5Sriastradh      lp_fence_signal(scene->fence);
11183464ebd5Sriastradh   }
11193464ebd5Sriastradh
11203464ebd5Sriastradh   task->scene = NULL;
1121cdc920a0Smrg}
1122cdc920a0Smrg
1123cdc920a0Smrg
1124cdc920a0Smrg/**
1125cdc920a0Smrg * Called by setup module when it has something for us to render.
1126cdc920a0Smrg */
1127cdc920a0Smrgvoid
1128cdc920a0Smrglp_rast_queue_scene( struct lp_rasterizer *rast,
1129cdc920a0Smrg                     struct lp_scene *scene)
1130cdc920a0Smrg{
1131cdc920a0Smrg   LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
1132cdc920a0Smrg
1133cdc920a0Smrg   if (rast->num_threads == 0) {
1134cdc920a0Smrg      /* no threading */
1135af69d88dSmrg      unsigned fpstate = util_fpstate_get();
1136af69d88dSmrg
1137af69d88dSmrg      /* Make sure that denorms are treated like zeros. This is
1138af69d88dSmrg       * the behavior required by D3D10. OpenGL doesn't care.
1139af69d88dSmrg       */
1140af69d88dSmrg      util_fpstate_set_denorms_to_zero(fpstate);
1141cdc920a0Smrg
1142cdc920a0Smrg      lp_rast_begin( rast, scene );
1143cdc920a0Smrg
1144cdc920a0Smrg      rasterize_scene( &rast->tasks[0], scene );
1145cdc920a0Smrg
11463464ebd5Sriastradh      lp_rast_end( rast );
11473464ebd5Sriastradh
1148af69d88dSmrg      util_fpstate_set(fpstate);
1149af69d88dSmrg
1150cdc920a0Smrg      rast->curr_scene = NULL;
1151cdc920a0Smrg   }
1152cdc920a0Smrg   else {
1153cdc920a0Smrg      /* threaded rendering! */
1154cdc920a0Smrg      unsigned i;
1155cdc920a0Smrg
1156cdc920a0Smrg      lp_scene_enqueue( rast->full_scenes, scene );
1157cdc920a0Smrg
1158cdc920a0Smrg      /* signal the threads that there's work to do */
1159cdc920a0Smrg      for (i = 0; i < rast->num_threads; i++) {
1160cdc920a0Smrg         pipe_semaphore_signal(&rast->tasks[i].work_ready);
1161cdc920a0Smrg      }
1162cdc920a0Smrg   }
1163cdc920a0Smrg
1164cdc920a0Smrg   LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
1165cdc920a0Smrg}
1166cdc920a0Smrg
1167cdc920a0Smrg
1168cdc920a0Smrgvoid
1169cdc920a0Smrglp_rast_finish( struct lp_rasterizer *rast )
1170cdc920a0Smrg{
1171cdc920a0Smrg   if (rast->num_threads == 0) {
1172cdc920a0Smrg      /* nothing to do */
1173cdc920a0Smrg   }
1174cdc920a0Smrg   else {
1175cdc920a0Smrg      int i;
1176cdc920a0Smrg
1177cdc920a0Smrg      /* wait for work to complete */
1178cdc920a0Smrg      for (i = 0; i < rast->num_threads; i++) {
1179cdc920a0Smrg         pipe_semaphore_wait(&rast->tasks[i].work_done);
1180cdc920a0Smrg      }
1181cdc920a0Smrg   }
1182cdc920a0Smrg}
1183cdc920a0Smrg
1184cdc920a0Smrg
1185cdc920a0Smrg/**
1186cdc920a0Smrg * This is the thread's main entrypoint.
1187cdc920a0Smrg * It's a simple loop:
1188cdc920a0Smrg *   1. wait for work
1189cdc920a0Smrg *   2. do work
1190cdc920a0Smrg *   3. signal that we're done
1191cdc920a0Smrg */
119201e04c3fSmrgstatic int
119301e04c3fSmrgthread_function(void *init_data)
1194cdc920a0Smrg{
1195cdc920a0Smrg   struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
1196cdc920a0Smrg   struct lp_rasterizer *rast = task->rast;
1197cdc920a0Smrg   boolean debug = false;
119801e04c3fSmrg   char thread_name[16];
119901e04c3fSmrg   unsigned fpstate;
120001e04c3fSmrg
12017ec681f3Smrg   snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index);
120201e04c3fSmrg   u_thread_setname(thread_name);
1203af69d88dSmrg
1204af69d88dSmrg   /* Make sure that denorms are treated like zeros. This is
1205af69d88dSmrg    * the behavior required by D3D10. OpenGL doesn't care.
1206af69d88dSmrg    */
120701e04c3fSmrg   fpstate = util_fpstate_get();
1208af69d88dSmrg   util_fpstate_set_denorms_to_zero(fpstate);
1209cdc920a0Smrg
1210cdc920a0Smrg   while (1) {
1211cdc920a0Smrg      /* wait for work */
1212cdc920a0Smrg      if (debug)
1213cdc920a0Smrg         debug_printf("thread %d waiting for work\n", task->thread_index);
1214cdc920a0Smrg      pipe_semaphore_wait(&task->work_ready);
1215cdc920a0Smrg
1216cdc920a0Smrg      if (rast->exit_flag)
1217cdc920a0Smrg         break;
1218cdc920a0Smrg
1219cdc920a0Smrg      if (task->thread_index == 0) {
1220cdc920a0Smrg         /* thread[0]:
1221cdc920a0Smrg          *  - get next scene to rasterize
1222cdc920a0Smrg          *  - map the framebuffer surfaces
1223cdc920a0Smrg          */
1224cdc920a0Smrg         lp_rast_begin( rast,
1225cdc920a0Smrg                        lp_scene_dequeue( rast->full_scenes, TRUE ) );
1226cdc920a0Smrg      }
1227cdc920a0Smrg
1228cdc920a0Smrg      /* Wait for all threads to get here so that threads[1+] don't
1229cdc920a0Smrg       * get a null rast->curr_scene pointer.
1230cdc920a0Smrg       */
123101e04c3fSmrg      util_barrier_wait( &rast->barrier );
1232cdc920a0Smrg
1233cdc920a0Smrg      /* do work */
1234cdc920a0Smrg      if (debug)
1235cdc920a0Smrg         debug_printf("thread %d doing work\n", task->thread_index);
1236cdc920a0Smrg
1237cdc920a0Smrg      rasterize_scene(task,
1238cdc920a0Smrg                      rast->curr_scene);
1239cdc920a0Smrg
1240cdc920a0Smrg      /* wait for all threads to finish with this scene */
124101e04c3fSmrg      util_barrier_wait( &rast->barrier );
1242cdc920a0Smrg
1243cdc920a0Smrg      /* XXX: shouldn't be necessary:
1244cdc920a0Smrg       */
1245cdc920a0Smrg      if (task->thread_index == 0) {
1246cdc920a0Smrg         lp_rast_end( rast );
1247cdc920a0Smrg      }
1248cdc920a0Smrg
1249cdc920a0Smrg      /* signal done with work */
1250cdc920a0Smrg      if (debug)
1251cdc920a0Smrg         debug_printf("thread %d done working\n", task->thread_index);
1252cdc920a0Smrg
1253cdc920a0Smrg      pipe_semaphore_signal(&task->work_done);
1254cdc920a0Smrg   }
1255cdc920a0Smrg
1256af69d88dSmrg#ifdef _WIN32
1257af69d88dSmrg   pipe_semaphore_signal(&task->work_done);
1258af69d88dSmrg#endif
1259af69d88dSmrg
1260af69d88dSmrg   return 0;
1261cdc920a0Smrg}
1262cdc920a0Smrg
1263cdc920a0Smrg
1264cdc920a0Smrg/**
1265cdc920a0Smrg * Initialize semaphores and spawn the threads.
1266cdc920a0Smrg */
1267cdc920a0Smrgstatic void
1268cdc920a0Smrgcreate_rast_threads(struct lp_rasterizer *rast)
1269cdc920a0Smrg{
1270cdc920a0Smrg   unsigned i;
1271cdc920a0Smrg
1272cdc920a0Smrg   /* NOTE: if num_threads is zero, we won't use any threads */
1273cdc920a0Smrg   for (i = 0; i < rast->num_threads; i++) {
1274cdc920a0Smrg      pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
1275cdc920a0Smrg      pipe_semaphore_init(&rast->tasks[i].work_done, 0);
127601e04c3fSmrg      rast->threads[i] = u_thread_create(thread_function,
1277cdc920a0Smrg                                            (void *) &rast->tasks[i]);
12787ec681f3Smrg      if (!rast->threads[i]) {
12797ec681f3Smrg         rast->num_threads = i; /* previous thread is max */
12807ec681f3Smrg         break;
12817ec681f3Smrg      }
1282cdc920a0Smrg   }
1283cdc920a0Smrg}
1284cdc920a0Smrg
1285cdc920a0Smrg
1286cdc920a0Smrg
1287cdc920a0Smrg/**
12883464ebd5Sriastradh * Create new lp_rasterizer.  If num_threads is zero, don't create any
12893464ebd5Sriastradh * new threads, do rendering synchronously.
12903464ebd5Sriastradh * \param num_threads  number of rasterizer threads to create
1291cdc920a0Smrg */
1292cdc920a0Smrgstruct lp_rasterizer *
12933464ebd5Sriastradhlp_rast_create( unsigned num_threads )
1294cdc920a0Smrg{
1295cdc920a0Smrg   struct lp_rasterizer *rast;
12963464ebd5Sriastradh   unsigned i;
1297cdc920a0Smrg
1298cdc920a0Smrg   rast = CALLOC_STRUCT(lp_rasterizer);
1299af69d88dSmrg   if (!rast) {
1300af69d88dSmrg      goto no_rast;
1301af69d88dSmrg   }
1302cdc920a0Smrg
1303cdc920a0Smrg   rast->full_scenes = lp_scene_queue_create();
1304af69d88dSmrg   if (!rast->full_scenes) {
1305af69d88dSmrg      goto no_full_scenes;
1306af69d88dSmrg   }
1307cdc920a0Smrg
130801e04c3fSmrg   for (i = 0; i < MAX2(1, num_threads); i++) {
1309cdc920a0Smrg      struct lp_rasterizer_task *task = &rast->tasks[i];
1310cdc920a0Smrg      task->rast = rast;
1311cdc920a0Smrg      task->thread_index = i;
131201e04c3fSmrg      task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache),
131301e04c3fSmrg                                             16);
131401e04c3fSmrg      if (!task->thread_data.cache) {
131501e04c3fSmrg         goto no_thread_data_cache;
131601e04c3fSmrg      }
1317cdc920a0Smrg   }
1318cdc920a0Smrg
13193464ebd5Sriastradh   rast->num_threads = num_threads;
13203464ebd5Sriastradh
1321af69d88dSmrg   rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE);
1322af69d88dSmrg
1323cdc920a0Smrg   create_rast_threads(rast);
1324cdc920a0Smrg
1325cdc920a0Smrg   /* for synchronizing rasterization threads */
132601e04c3fSmrg   if (rast->num_threads > 0) {
132701e04c3fSmrg      util_barrier_init( &rast->barrier, rast->num_threads );
132801e04c3fSmrg   }
1329cdc920a0Smrg
13303464ebd5Sriastradh   memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
13313464ebd5Sriastradh
1332cdc920a0Smrg   return rast;
1333af69d88dSmrg
133401e04c3fSmrgno_thread_data_cache:
133501e04c3fSmrg   for (i = 0; i < MAX2(1, rast->num_threads); i++) {
133601e04c3fSmrg      if (rast->tasks[i].thread_data.cache) {
133701e04c3fSmrg         align_free(rast->tasks[i].thread_data.cache);
133801e04c3fSmrg      }
133901e04c3fSmrg   }
134001e04c3fSmrg
134101e04c3fSmrg   lp_scene_queue_destroy(rast->full_scenes);
1342af69d88dSmrgno_full_scenes:
1343af69d88dSmrg   FREE(rast);
1344af69d88dSmrgno_rast:
1345af69d88dSmrg   return NULL;
1346cdc920a0Smrg}
1347cdc920a0Smrg
1348cdc920a0Smrg
1349cdc920a0Smrg/* Shutdown:
1350cdc920a0Smrg */
1351cdc920a0Smrgvoid lp_rast_destroy( struct lp_rasterizer *rast )
1352cdc920a0Smrg{
13533464ebd5Sriastradh   unsigned i;
1354cdc920a0Smrg
1355cdc920a0Smrg   /* Set exit_flag and signal each thread's work_ready semaphore.
1356cdc920a0Smrg    * Each thread will be woken up, notice that the exit_flag is set and
1357cdc920a0Smrg    * break out of its main loop.  The thread will then exit.
1358cdc920a0Smrg    */
1359cdc920a0Smrg   rast->exit_flag = TRUE;
1360cdc920a0Smrg   for (i = 0; i < rast->num_threads; i++) {
1361cdc920a0Smrg      pipe_semaphore_signal(&rast->tasks[i].work_ready);
1362cdc920a0Smrg   }
1363cdc920a0Smrg
1364af69d88dSmrg   /* Wait for threads to terminate before cleaning up per-thread data.
1365af69d88dSmrg    * We don't actually call pipe_thread_wait to avoid dead lock on Windows
1366af69d88dSmrg    * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */
1367cdc920a0Smrg   for (i = 0; i < rast->num_threads; i++) {
1368af69d88dSmrg#ifdef _WIN32
13697ec681f3Smrg      /* Threads might already be dead - Windows apparently terminates other threads when
13707ec681f3Smrg       * returning from main.
13717ec681f3Smrg       */
13727ec681f3Smrg      DWORD exit_code = STILL_ACTIVE;
13737ec681f3Smrg      if (GetExitCodeThread(rast->threads[i], &exit_code) && exit_code == STILL_ACTIVE)
13747ec681f3Smrg         pipe_semaphore_wait(&rast->tasks[i].work_done);
1375af69d88dSmrg#else
137601e04c3fSmrg      thrd_join(rast->threads[i], NULL);
1377af69d88dSmrg#endif
1378cdc920a0Smrg   }
1379cdc920a0Smrg
1380cdc920a0Smrg   /* Clean up per-thread data */
1381cdc920a0Smrg   for (i = 0; i < rast->num_threads; i++) {
1382cdc920a0Smrg      pipe_semaphore_destroy(&rast->tasks[i].work_ready);
1383cdc920a0Smrg      pipe_semaphore_destroy(&rast->tasks[i].work_done);
1384cdc920a0Smrg   }
138501e04c3fSmrg   for (i = 0; i < MAX2(1, rast->num_threads); i++) {
138601e04c3fSmrg      align_free(rast->tasks[i].thread_data.cache);
138701e04c3fSmrg   }
1388cdc920a0Smrg
1389cdc920a0Smrg   /* for synchronizing rasterization threads */
139001e04c3fSmrg   if (rast->num_threads > 0) {
139101e04c3fSmrg      util_barrier_destroy( &rast->barrier );
139201e04c3fSmrg   }
1393cdc920a0Smrg
13943464ebd5Sriastradh   lp_scene_queue_destroy(rast->full_scenes);
13953464ebd5Sriastradh
1396cdc920a0Smrg   FREE(rast);
1397cdc920a0Smrg}
1398cdc920a0Smrg
1399cdc920a0Smrg
1400