1848b8605Smrg/**************************************************************************
2848b8605Smrg *
3848b8605Smrg * Copyright 2009 VMware, Inc.
4848b8605Smrg * All Rights Reserved.
5848b8605Smrg *
6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7848b8605Smrg * copy of this software and associated documentation files (the
8848b8605Smrg * "Software"), to deal in the Software without restriction, including
9848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish,
10848b8605Smrg * distribute, sub license, and/or sell copies of the Software, and to
11848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to
12848b8605Smrg * the following conditions:
13848b8605Smrg *
14848b8605Smrg * The above copyright notice and this permission notice (including the
15848b8605Smrg * next paragraph) shall be included in all copies or substantial portions
16848b8605Smrg * of the Software.
17848b8605Smrg *
18848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21848b8605Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22848b8605Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23848b8605Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24848b8605Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25848b8605Smrg *
26848b8605Smrg **************************************************************************/
27848b8605Smrg
28848b8605Smrg#include <limits.h>
29848b8605Smrg#include "util/u_memory.h"
30848b8605Smrg#include "util/u_math.h"
31848b8605Smrg#include "util/u_rect.h"
32848b8605Smrg#include "util/u_surface.h"
33848b8605Smrg#include "util/u_pack_color.h"
34b8e80941Smrg#include "util/u_string.h"
35b8e80941Smrg#include "util/u_thread.h"
36848b8605Smrg
37b8e80941Smrg#include "util/os_time.h"
38848b8605Smrg
39848b8605Smrg#include "lp_scene_queue.h"
40848b8605Smrg#include "lp_context.h"
41848b8605Smrg#include "lp_debug.h"
42848b8605Smrg#include "lp_fence.h"
43848b8605Smrg#include "lp_perf.h"
44848b8605Smrg#include "lp_query.h"
45848b8605Smrg#include "lp_rast.h"
46848b8605Smrg#include "lp_rast_priv.h"
47b8e80941Smrg#include "gallivm/lp_bld_format.h"
48848b8605Smrg#include "gallivm/lp_bld_debug.h"
49848b8605Smrg#include "lp_scene.h"
50848b8605Smrg#include "lp_tex_sample.h"
51848b8605Smrg
52848b8605Smrg
53848b8605Smrg#ifdef DEBUG
54848b8605Smrgint jit_line = 0;
55848b8605Smrgconst struct lp_rast_state *jit_state = NULL;
56848b8605Smrgconst struct lp_rasterizer_task *jit_task = NULL;
57848b8605Smrg#endif
58848b8605Smrg
59848b8605Smrg
60848b8605Smrg/**
61848b8605Smrg * Begin rasterizing a scene.
62848b8605Smrg * Called once per scene by one thread.
63848b8605Smrg */
64848b8605Smrgstatic void
65848b8605Smrglp_rast_begin( struct lp_rasterizer *rast,
66848b8605Smrg               struct lp_scene *scene )
67848b8605Smrg{
68848b8605Smrg   rast->curr_scene = scene;
69848b8605Smrg
70848b8605Smrg   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
71848b8605Smrg
72848b8605Smrg   lp_scene_begin_rasterization( scene );
73848b8605Smrg   lp_scene_bin_iter_begin( scene );
74848b8605Smrg}
75848b8605Smrg
76848b8605Smrg
77848b8605Smrgstatic void
78848b8605Smrglp_rast_end( struct lp_rasterizer *rast )
79848b8605Smrg{
80848b8605Smrg   lp_scene_end_rasterization( rast->curr_scene );
81848b8605Smrg
82848b8605Smrg   rast->curr_scene = NULL;
83848b8605Smrg}
84848b8605Smrg
85848b8605Smrg
86848b8605Smrg/**
87b8e80941Smrg * Beginning rasterization of a tile.
88848b8605Smrg * \param x  window X position of the tile, in pixels
89848b8605Smrg * \param y  window Y position of the tile, in pixels
90848b8605Smrg */
91848b8605Smrgstatic void
92848b8605Smrglp_rast_tile_begin(struct lp_rasterizer_task *task,
93848b8605Smrg                   const struct cmd_bin *bin,
94848b8605Smrg                   int x, int y)
95848b8605Smrg{
96b8e80941Smrg   unsigned i;
97b8e80941Smrg   struct lp_scene *scene = task->scene;
98b8e80941Smrg
99848b8605Smrg   LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
100848b8605Smrg
101848b8605Smrg   task->bin = bin;
102848b8605Smrg   task->x = x * TILE_SIZE;
103848b8605Smrg   task->y = y * TILE_SIZE;
104848b8605Smrg   task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ?
105848b8605Smrg                    task->scene->fb.width - x * TILE_SIZE : TILE_SIZE;
106848b8605Smrg   task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
107848b8605Smrg                    task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
108848b8605Smrg
109848b8605Smrg   task->thread_data.vis_counter = 0;
110b8e80941Smrg   task->thread_data.ps_invocations = 0;
111848b8605Smrg
112b8e80941Smrg   for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
113b8e80941Smrg      if (task->scene->fb.cbufs[i]) {
114b8e80941Smrg         task->color_tiles[i] = scene->cbufs[i].map +
115b8e80941Smrg                                scene->cbufs[i].stride * task->y +
116b8e80941Smrg                                scene->cbufs[i].format_bytes * task->x;
117b8e80941Smrg      }
118b8e80941Smrg   }
119b8e80941Smrg   if (task->scene->fb.zsbuf) {
120b8e80941Smrg      task->depth_tile = scene->zsbuf.map +
121b8e80941Smrg                         scene->zsbuf.stride * task->y +
122b8e80941Smrg                         scene->zsbuf.format_bytes * task->x;
123b8e80941Smrg   }
124848b8605Smrg}
125848b8605Smrg
126848b8605Smrg
127848b8605Smrg/**
128848b8605Smrg * Clear the rasterizer's current color tile.
129848b8605Smrg * This is a bin command called during bin processing.
130848b8605Smrg * Clear commands always clear all bound layers.
131848b8605Smrg */
132848b8605Smrgstatic void
133848b8605Smrglp_rast_clear_color(struct lp_rasterizer_task *task,
134848b8605Smrg                    const union lp_rast_cmd_arg arg)
135848b8605Smrg{
136848b8605Smrg   const struct lp_scene *scene = task->scene;
137848b8605Smrg   unsigned cbuf = arg.clear_rb->cbuf;
138848b8605Smrg   union util_color uc;
139848b8605Smrg   enum pipe_format format;
140848b8605Smrg
141848b8605Smrg   /* we never bin clear commands for non-existing buffers */
142848b8605Smrg   assert(cbuf < scene->fb.nr_cbufs);
143848b8605Smrg   assert(scene->fb.cbufs[cbuf]);
144848b8605Smrg
145848b8605Smrg   format = scene->fb.cbufs[cbuf]->format;
146848b8605Smrg   uc = arg.clear_rb->color_val;
147848b8605Smrg
148848b8605Smrg   /*
149848b8605Smrg    * this is pretty rough since we have target format (bunch of bytes...) here.
150848b8605Smrg    * dump it as raw 4 dwords.
151848b8605Smrg    */
152848b8605Smrg   LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n",
153848b8605Smrg          __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]);
154848b8605Smrg
155848b8605Smrg
156848b8605Smrg   util_fill_box(scene->cbufs[cbuf].map,
157848b8605Smrg                 format,
158848b8605Smrg                 scene->cbufs[cbuf].stride,
159848b8605Smrg                 scene->cbufs[cbuf].layer_stride,
160848b8605Smrg                 task->x,
161848b8605Smrg                 task->y,
162848b8605Smrg                 0,
163848b8605Smrg                 task->width,
164848b8605Smrg                 task->height,
165848b8605Smrg                 scene->fb_max_layer + 1,
166848b8605Smrg                 &uc);
167848b8605Smrg
168848b8605Smrg   /* this will increase for each rb which probably doesn't mean much */
169848b8605Smrg   LP_COUNT(nr_color_tile_clear);
170848b8605Smrg}
171848b8605Smrg
172848b8605Smrg
173848b8605Smrg/**
174848b8605Smrg * Clear the rasterizer's current z/stencil tile.
175848b8605Smrg * This is a bin command called during bin processing.
176848b8605Smrg * Clear commands always clear all bound layers.
177848b8605Smrg */
178848b8605Smrgstatic void
179848b8605Smrglp_rast_clear_zstencil(struct lp_rasterizer_task *task,
180848b8605Smrg                       const union lp_rast_cmd_arg arg)
181848b8605Smrg{
182848b8605Smrg   const struct lp_scene *scene = task->scene;
183848b8605Smrg   uint64_t clear_value64 = arg.clear_zstencil.value;
184848b8605Smrg   uint64_t clear_mask64 = arg.clear_zstencil.mask;
185848b8605Smrg   uint32_t clear_value = (uint32_t) clear_value64;
186848b8605Smrg   uint32_t clear_mask = (uint32_t) clear_mask64;
187848b8605Smrg   const unsigned height = task->height;
188848b8605Smrg   const unsigned width = task->width;
189848b8605Smrg   const unsigned dst_stride = scene->zsbuf.stride;
190848b8605Smrg   uint8_t *dst;
191848b8605Smrg   unsigned i, j;
192848b8605Smrg   unsigned block_size;
193848b8605Smrg
194848b8605Smrg   LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n",
195848b8605Smrg           __FUNCTION__, clear_value, clear_mask);
196848b8605Smrg
197848b8605Smrg   /*
198848b8605Smrg    * Clear the area of the depth/depth buffer matching this tile.
199848b8605Smrg    */
200848b8605Smrg
201848b8605Smrg   if (scene->fb.zsbuf) {
202848b8605Smrg      unsigned layer;
203b8e80941Smrg      uint8_t *dst_layer = task->depth_tile;
204848b8605Smrg      block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
205848b8605Smrg
206848b8605Smrg      clear_value &= clear_mask;
207848b8605Smrg
208848b8605Smrg      for (layer = 0; layer <= scene->fb_max_layer; layer++) {
209848b8605Smrg         dst = dst_layer;
210848b8605Smrg
211848b8605Smrg         switch (block_size) {
212848b8605Smrg         case 1:
213848b8605Smrg            assert(clear_mask == 0xff);
214848b8605Smrg            memset(dst, (uint8_t) clear_value, height * width);
215848b8605Smrg            break;
216848b8605Smrg         case 2:
217848b8605Smrg            if (clear_mask == 0xffff) {
218848b8605Smrg               for (i = 0; i < height; i++) {
219848b8605Smrg                  uint16_t *row = (uint16_t *)dst;
220848b8605Smrg                  for (j = 0; j < width; j++)
221848b8605Smrg                     *row++ = (uint16_t) clear_value;
222848b8605Smrg                  dst += dst_stride;
223848b8605Smrg               }
224848b8605Smrg            }
225848b8605Smrg            else {
226848b8605Smrg               for (i = 0; i < height; i++) {
227848b8605Smrg                  uint16_t *row = (uint16_t *)dst;
228848b8605Smrg                  for (j = 0; j < width; j++) {
229848b8605Smrg                     uint16_t tmp = ~clear_mask & *row;
230848b8605Smrg                     *row++ = clear_value | tmp;
231848b8605Smrg                  }
232848b8605Smrg                  dst += dst_stride;
233848b8605Smrg               }
234848b8605Smrg            }
235848b8605Smrg            break;
236848b8605Smrg         case 4:
237848b8605Smrg            if (clear_mask == 0xffffffff) {
238848b8605Smrg               for (i = 0; i < height; i++) {
239848b8605Smrg                  uint32_t *row = (uint32_t *)dst;
240848b8605Smrg                  for (j = 0; j < width; j++)
241848b8605Smrg                     *row++ = clear_value;
242848b8605Smrg                  dst += dst_stride;
243848b8605Smrg               }
244848b8605Smrg            }
245848b8605Smrg            else {
246848b8605Smrg               for (i = 0; i < height; i++) {
247848b8605Smrg                  uint32_t *row = (uint32_t *)dst;
248848b8605Smrg                  for (j = 0; j < width; j++) {
249848b8605Smrg                     uint32_t tmp = ~clear_mask & *row;
250848b8605Smrg                     *row++ = clear_value | tmp;
251848b8605Smrg                  }
252848b8605Smrg                  dst += dst_stride;
253848b8605Smrg               }
254848b8605Smrg            }
255848b8605Smrg            break;
256848b8605Smrg         case 8:
257848b8605Smrg            clear_value64 &= clear_mask64;
258848b8605Smrg            if (clear_mask64 == 0xffffffffffULL) {
259848b8605Smrg               for (i = 0; i < height; i++) {
260848b8605Smrg                  uint64_t *row = (uint64_t *)dst;
261848b8605Smrg                  for (j = 0; j < width; j++)
262848b8605Smrg                     *row++ = clear_value64;
263848b8605Smrg                  dst += dst_stride;
264848b8605Smrg               }
265848b8605Smrg            }
266848b8605Smrg            else {
267848b8605Smrg               for (i = 0; i < height; i++) {
268848b8605Smrg                  uint64_t *row = (uint64_t *)dst;
269848b8605Smrg                  for (j = 0; j < width; j++) {
270848b8605Smrg                     uint64_t tmp = ~clear_mask64 & *row;
271848b8605Smrg                     *row++ = clear_value64 | tmp;
272848b8605Smrg                  }
273848b8605Smrg                  dst += dst_stride;
274848b8605Smrg               }
275848b8605Smrg            }
276848b8605Smrg            break;
277848b8605Smrg
278848b8605Smrg         default:
279848b8605Smrg            assert(0);
280848b8605Smrg            break;
281848b8605Smrg         }
282848b8605Smrg         dst_layer += scene->zsbuf.layer_stride;
283848b8605Smrg      }
284848b8605Smrg   }
285848b8605Smrg}
286848b8605Smrg
287848b8605Smrg
288848b8605Smrg
289848b8605Smrg/**
290848b8605Smrg * Run the shader on all blocks in a tile.  This is used when a tile is
291848b8605Smrg * completely contained inside a triangle.
292848b8605Smrg * This is a bin command called during bin processing.
293848b8605Smrg */
294848b8605Smrgstatic void
295848b8605Smrglp_rast_shade_tile(struct lp_rasterizer_task *task,
296848b8605Smrg                   const union lp_rast_cmd_arg arg)
297848b8605Smrg{
298848b8605Smrg   const struct lp_scene *scene = task->scene;
299848b8605Smrg   const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
300848b8605Smrg   const struct lp_rast_state *state;
301848b8605Smrg   struct lp_fragment_shader_variant *variant;
302848b8605Smrg   const unsigned tile_x = task->x, tile_y = task->y;
303848b8605Smrg   unsigned x, y;
304848b8605Smrg
305848b8605Smrg   if (inputs->disable) {
306848b8605Smrg      /* This command was partially binned and has been disabled */
307848b8605Smrg      return;
308848b8605Smrg   }
309848b8605Smrg
310848b8605Smrg   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
311848b8605Smrg
312848b8605Smrg   state = task->state;
313848b8605Smrg   assert(state);
314848b8605Smrg   if (!state) {
315848b8605Smrg      return;
316848b8605Smrg   }
317848b8605Smrg   variant = state->variant;
318848b8605Smrg
319848b8605Smrg   /* render the whole 64x64 tile in 4x4 chunks */
320848b8605Smrg   for (y = 0; y < task->height; y += 4){
321848b8605Smrg      for (x = 0; x < task->width; x += 4) {
322848b8605Smrg         uint8_t *color[PIPE_MAX_COLOR_BUFS];
323848b8605Smrg         unsigned stride[PIPE_MAX_COLOR_BUFS];
324848b8605Smrg         uint8_t *depth = NULL;
325848b8605Smrg         unsigned depth_stride = 0;
326848b8605Smrg         unsigned i;
327848b8605Smrg
328848b8605Smrg         /* color buffer */
329848b8605Smrg         for (i = 0; i < scene->fb.nr_cbufs; i++){
330848b8605Smrg            if (scene->fb.cbufs[i]) {
331848b8605Smrg               stride[i] = scene->cbufs[i].stride;
332848b8605Smrg               color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x,
333848b8605Smrg                                                          tile_y + y, inputs->layer);
334848b8605Smrg            }
335848b8605Smrg            else {
336848b8605Smrg               stride[i] = 0;
337848b8605Smrg               color[i] = NULL;
338848b8605Smrg            }
339848b8605Smrg         }
340848b8605Smrg
341848b8605Smrg         /* depth buffer */
342848b8605Smrg         if (scene->zsbuf.map) {
343848b8605Smrg            depth = lp_rast_get_depth_block_pointer(task, tile_x + x,
344848b8605Smrg                                                    tile_y + y, inputs->layer);
345848b8605Smrg            depth_stride = scene->zsbuf.stride;
346848b8605Smrg         }
347848b8605Smrg
348848b8605Smrg         /* Propagate non-interpolated raster state. */
349848b8605Smrg         task->thread_data.raster_state.viewport_index = inputs->viewport_index;
350848b8605Smrg
351848b8605Smrg         /* run shader on 4x4 block */
352848b8605Smrg         BEGIN_JIT_CALL(state, task);
353848b8605Smrg         variant->jit_function[RAST_WHOLE]( &state->jit_context,
354848b8605Smrg                                            tile_x + x, tile_y + y,
355848b8605Smrg                                            inputs->frontfacing,
356848b8605Smrg                                            GET_A0(inputs),
357848b8605Smrg                                            GET_DADX(inputs),
358848b8605Smrg                                            GET_DADY(inputs),
359848b8605Smrg                                            color,
360848b8605Smrg                                            depth,
361848b8605Smrg                                            0xffff,
362848b8605Smrg                                            &task->thread_data,
363848b8605Smrg                                            stride,
364848b8605Smrg                                            depth_stride);
365848b8605Smrg         END_JIT_CALL();
366848b8605Smrg      }
367848b8605Smrg   }
368848b8605Smrg}
369848b8605Smrg
370848b8605Smrg
371848b8605Smrg/**
372848b8605Smrg * Run the shader on all blocks in a tile.  This is used when a tile is
373848b8605Smrg * completely contained inside a triangle, and the shader is opaque.
374848b8605Smrg * This is a bin command called during bin processing.
375848b8605Smrg */
376848b8605Smrgstatic void
377848b8605Smrglp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
378848b8605Smrg                          const union lp_rast_cmd_arg arg)
379848b8605Smrg{
380848b8605Smrg   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
381848b8605Smrg
382848b8605Smrg   assert(task->state);
383848b8605Smrg   if (!task->state) {
384848b8605Smrg      return;
385848b8605Smrg   }
386848b8605Smrg
387848b8605Smrg   lp_rast_shade_tile(task, arg);
388848b8605Smrg}
389848b8605Smrg
390848b8605Smrg
391848b8605Smrg/**
392848b8605Smrg * Compute shading for a 4x4 block of pixels inside a triangle.
393848b8605Smrg * This is a bin command called during bin processing.
394848b8605Smrg * \param x  X position of quad in window coords
395848b8605Smrg * \param y  Y position of quad in window coords
396848b8605Smrg */
397848b8605Smrgvoid
398848b8605Smrglp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
399848b8605Smrg                         const struct lp_rast_shader_inputs *inputs,
400848b8605Smrg                         unsigned x, unsigned y,
401848b8605Smrg                         unsigned mask)
402848b8605Smrg{
403848b8605Smrg   const struct lp_rast_state *state = task->state;
404848b8605Smrg   struct lp_fragment_shader_variant *variant = state->variant;
405848b8605Smrg   const struct lp_scene *scene = task->scene;
406848b8605Smrg   uint8_t *color[PIPE_MAX_COLOR_BUFS];
407848b8605Smrg   unsigned stride[PIPE_MAX_COLOR_BUFS];
408848b8605Smrg   uint8_t *depth = NULL;
409848b8605Smrg   unsigned depth_stride = 0;
410848b8605Smrg   unsigned i;
411848b8605Smrg
412848b8605Smrg   assert(state);
413848b8605Smrg
414848b8605Smrg   /* Sanity checks */
415848b8605Smrg   assert(x < scene->tiles_x * TILE_SIZE);
416848b8605Smrg   assert(y < scene->tiles_y * TILE_SIZE);
417848b8605Smrg   assert(x % TILE_VECTOR_WIDTH == 0);
418848b8605Smrg   assert(y % TILE_VECTOR_HEIGHT == 0);
419848b8605Smrg
420848b8605Smrg   assert((x % 4) == 0);
421848b8605Smrg   assert((y % 4) == 0);
422848b8605Smrg
423848b8605Smrg   /* color buffer */
424848b8605Smrg   for (i = 0; i < scene->fb.nr_cbufs; i++) {
425848b8605Smrg      if (scene->fb.cbufs[i]) {
426848b8605Smrg         stride[i] = scene->cbufs[i].stride;
427848b8605Smrg         color[i] = lp_rast_get_color_block_pointer(task, i, x, y,
428848b8605Smrg                                                    inputs->layer);
429848b8605Smrg      }
430848b8605Smrg      else {
431848b8605Smrg         stride[i] = 0;
432848b8605Smrg         color[i] = NULL;
433848b8605Smrg      }
434848b8605Smrg   }
435848b8605Smrg
436848b8605Smrg   /* depth buffer */
437848b8605Smrg   if (scene->zsbuf.map) {
438848b8605Smrg      depth_stride = scene->zsbuf.stride;
439848b8605Smrg      depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer);
440848b8605Smrg   }
441848b8605Smrg
442848b8605Smrg   assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
443848b8605Smrg
444848b8605Smrg   /*
445848b8605Smrg    * The rasterizer may produce fragments outside our
446848b8605Smrg    * allocated 4x4 blocks hence need to filter them out here.
447848b8605Smrg    */
448848b8605Smrg   if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
449848b8605Smrg      /* Propagate non-interpolated raster state. */
450848b8605Smrg      task->thread_data.raster_state.viewport_index = inputs->viewport_index;
451848b8605Smrg
452848b8605Smrg      /* run shader on 4x4 block */
453848b8605Smrg      BEGIN_JIT_CALL(state, task);
454848b8605Smrg      variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
455848b8605Smrg                                            x, y,
456848b8605Smrg                                            inputs->frontfacing,
457848b8605Smrg                                            GET_A0(inputs),
458848b8605Smrg                                            GET_DADX(inputs),
459848b8605Smrg                                            GET_DADY(inputs),
460848b8605Smrg                                            color,
461848b8605Smrg                                            depth,
462848b8605Smrg                                            mask,
463848b8605Smrg                                            &task->thread_data,
464848b8605Smrg                                            stride,
465848b8605Smrg                                            depth_stride);
466848b8605Smrg      END_JIT_CALL();
467848b8605Smrg   }
468848b8605Smrg}
469848b8605Smrg
470848b8605Smrg
471848b8605Smrg
472848b8605Smrg/**
473848b8605Smrg * Begin a new occlusion query.
474848b8605Smrg * This is a bin command put in all bins.
475848b8605Smrg * Called per thread.
476848b8605Smrg */
477848b8605Smrgstatic void
478848b8605Smrglp_rast_begin_query(struct lp_rasterizer_task *task,
479848b8605Smrg                    const union lp_rast_cmd_arg arg)
480848b8605Smrg{
481848b8605Smrg   struct llvmpipe_query *pq = arg.query_obj;
482848b8605Smrg
483848b8605Smrg   switch (pq->type) {
484848b8605Smrg   case PIPE_QUERY_OCCLUSION_COUNTER:
485848b8605Smrg   case PIPE_QUERY_OCCLUSION_PREDICATE:
486b8e80941Smrg   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
487848b8605Smrg      pq->start[task->thread_index] = task->thread_data.vis_counter;
488848b8605Smrg      break;
489848b8605Smrg   case PIPE_QUERY_PIPELINE_STATISTICS:
490b8e80941Smrg      pq->start[task->thread_index] = task->thread_data.ps_invocations;
491848b8605Smrg      break;
492848b8605Smrg   default:
493848b8605Smrg      assert(0);
494848b8605Smrg      break;
495848b8605Smrg   }
496848b8605Smrg}
497848b8605Smrg
498848b8605Smrg
499848b8605Smrg/**
500848b8605Smrg * End the current occlusion query.
501848b8605Smrg * This is a bin command put in all bins.
502848b8605Smrg * Called per thread.
503848b8605Smrg */
504848b8605Smrgstatic void
505848b8605Smrglp_rast_end_query(struct lp_rasterizer_task *task,
506848b8605Smrg                  const union lp_rast_cmd_arg arg)
507848b8605Smrg{
508848b8605Smrg   struct llvmpipe_query *pq = arg.query_obj;
509848b8605Smrg
510848b8605Smrg   switch (pq->type) {
511848b8605Smrg   case PIPE_QUERY_OCCLUSION_COUNTER:
512848b8605Smrg   case PIPE_QUERY_OCCLUSION_PREDICATE:
513b8e80941Smrg   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
514848b8605Smrg      pq->end[task->thread_index] +=
515848b8605Smrg         task->thread_data.vis_counter - pq->start[task->thread_index];
516848b8605Smrg      pq->start[task->thread_index] = 0;
517848b8605Smrg      break;
518848b8605Smrg   case PIPE_QUERY_TIMESTAMP:
519848b8605Smrg      pq->end[task->thread_index] = os_time_get_nano();
520848b8605Smrg      break;
521848b8605Smrg   case PIPE_QUERY_PIPELINE_STATISTICS:
522848b8605Smrg      pq->end[task->thread_index] +=
523b8e80941Smrg         task->thread_data.ps_invocations - pq->start[task->thread_index];
524848b8605Smrg      pq->start[task->thread_index] = 0;
525848b8605Smrg      break;
526848b8605Smrg   default:
527848b8605Smrg      assert(0);
528848b8605Smrg      break;
529848b8605Smrg   }
530848b8605Smrg}
531848b8605Smrg
532848b8605Smrg
533848b8605Smrgvoid
534848b8605Smrglp_rast_set_state(struct lp_rasterizer_task *task,
535848b8605Smrg                  const union lp_rast_cmd_arg arg)
536848b8605Smrg{
537848b8605Smrg   task->state = arg.state;
538848b8605Smrg}
539848b8605Smrg
540848b8605Smrg
541848b8605Smrg
542848b8605Smrg/**
543848b8605Smrg * Called when we're done writing to a color tile.
544848b8605Smrg */
545848b8605Smrgstatic void
546848b8605Smrglp_rast_tile_end(struct lp_rasterizer_task *task)
547848b8605Smrg{
548848b8605Smrg   unsigned i;
549848b8605Smrg
550848b8605Smrg   for (i = 0; i < task->scene->num_active_queries; ++i) {
551848b8605Smrg      lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i]));
552848b8605Smrg   }
553848b8605Smrg
554848b8605Smrg   /* debug */
555848b8605Smrg   memset(task->color_tiles, 0, sizeof(task->color_tiles));
556848b8605Smrg   task->depth_tile = NULL;
557848b8605Smrg
558848b8605Smrg   task->bin = NULL;
559848b8605Smrg}
560848b8605Smrg
561848b8605Smrgstatic lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
562848b8605Smrg{
563848b8605Smrg   lp_rast_clear_color,
564848b8605Smrg   lp_rast_clear_zstencil,
565848b8605Smrg   lp_rast_triangle_1,
566848b8605Smrg   lp_rast_triangle_2,
567848b8605Smrg   lp_rast_triangle_3,
568848b8605Smrg   lp_rast_triangle_4,
569848b8605Smrg   lp_rast_triangle_5,
570848b8605Smrg   lp_rast_triangle_6,
571848b8605Smrg   lp_rast_triangle_7,
572848b8605Smrg   lp_rast_triangle_8,
573848b8605Smrg   lp_rast_triangle_3_4,
574848b8605Smrg   lp_rast_triangle_3_16,
575848b8605Smrg   lp_rast_triangle_4_16,
576848b8605Smrg   lp_rast_shade_tile,
577848b8605Smrg   lp_rast_shade_tile_opaque,
578848b8605Smrg   lp_rast_begin_query,
579848b8605Smrg   lp_rast_end_query,
580848b8605Smrg   lp_rast_set_state,
581848b8605Smrg   lp_rast_triangle_32_1,
582848b8605Smrg   lp_rast_triangle_32_2,
583848b8605Smrg   lp_rast_triangle_32_3,
584848b8605Smrg   lp_rast_triangle_32_4,
585848b8605Smrg   lp_rast_triangle_32_5,
586848b8605Smrg   lp_rast_triangle_32_6,
587848b8605Smrg   lp_rast_triangle_32_7,
588848b8605Smrg   lp_rast_triangle_32_8,
589848b8605Smrg   lp_rast_triangle_32_3_4,
590848b8605Smrg   lp_rast_triangle_32_3_16,
591848b8605Smrg   lp_rast_triangle_32_4_16
592848b8605Smrg};
593848b8605Smrg
594848b8605Smrg
595848b8605Smrgstatic void
596848b8605Smrgdo_rasterize_bin(struct lp_rasterizer_task *task,
597848b8605Smrg                 const struct cmd_bin *bin,
598848b8605Smrg                 int x, int y)
599848b8605Smrg{
600848b8605Smrg   const struct cmd_block *block;
601848b8605Smrg   unsigned k;
602848b8605Smrg
603848b8605Smrg   if (0)
604848b8605Smrg      lp_debug_bin(bin, x, y);
605848b8605Smrg
606848b8605Smrg   for (block = bin->head; block; block = block->next) {
607848b8605Smrg      for (k = 0; k < block->count; k++) {
608848b8605Smrg         dispatch[block->cmd[k]]( task, block->arg[k] );
609848b8605Smrg      }
610848b8605Smrg   }
611848b8605Smrg}
612848b8605Smrg
613848b8605Smrg
614848b8605Smrg
615848b8605Smrg/**
616848b8605Smrg * Rasterize commands for a single bin.
617848b8605Smrg * \param x, y  position of the bin's tile in the framebuffer
618848b8605Smrg * Must be called between lp_rast_begin() and lp_rast_end().
619848b8605Smrg * Called per thread.
620848b8605Smrg */
621848b8605Smrgstatic void
622848b8605Smrgrasterize_bin(struct lp_rasterizer_task *task,
623848b8605Smrg              const struct cmd_bin *bin, int x, int y )
624848b8605Smrg{
625848b8605Smrg   lp_rast_tile_begin( task, bin, x, y );
626848b8605Smrg
627848b8605Smrg   do_rasterize_bin(task, bin, x, y);
628848b8605Smrg
629848b8605Smrg   lp_rast_tile_end(task);
630848b8605Smrg
631848b8605Smrg
632848b8605Smrg   /* Debug/Perf flags:
633848b8605Smrg    */
634848b8605Smrg   if (bin->head->count == 1) {
635848b8605Smrg      if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
636848b8605Smrg         LP_COUNT(nr_pure_shade_opaque_64);
637848b8605Smrg      else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE)
638848b8605Smrg         LP_COUNT(nr_pure_shade_64);
639848b8605Smrg   }
640848b8605Smrg}
641848b8605Smrg
642848b8605Smrg
643848b8605Smrg/* An empty bin is one that just loads the contents of the tile and
644848b8605Smrg * stores them again unchanged.  This typically happens when bins have
645848b8605Smrg * been flushed for some reason in the middle of a frame, or when
646848b8605Smrg * incremental updates are being made to a render target.
647848b8605Smrg *
648848b8605Smrg * Try to avoid doing pointless work in this case.
649848b8605Smrg */
650848b8605Smrgstatic boolean
651848b8605Smrgis_empty_bin( const struct cmd_bin *bin )
652848b8605Smrg{
653848b8605Smrg   return bin->head == NULL;
654848b8605Smrg}
655848b8605Smrg
656848b8605Smrg
657848b8605Smrg/**
658848b8605Smrg * Rasterize/execute all bins within a scene.
659848b8605Smrg * Called per thread.
660848b8605Smrg */
661848b8605Smrgstatic void
662848b8605Smrgrasterize_scene(struct lp_rasterizer_task *task,
663848b8605Smrg                struct lp_scene *scene)
664848b8605Smrg{
665848b8605Smrg   task->scene = scene;
666848b8605Smrg
667b8e80941Smrg   /* Clear the cache tags. This should not always be necessary but
668b8e80941Smrg      simpler for now. */
669b8e80941Smrg#if LP_USE_TEXTURE_CACHE
670b8e80941Smrg   memset(task->thread_data.cache->cache_tags, 0,
671b8e80941Smrg          sizeof(task->thread_data.cache->cache_tags));
672b8e80941Smrg#if LP_BUILD_FORMAT_CACHE_DEBUG
673b8e80941Smrg   task->thread_data.cache->cache_access_total = 0;
674b8e80941Smrg   task->thread_data.cache->cache_access_miss = 0;
675b8e80941Smrg#endif
676b8e80941Smrg#endif
677b8e80941Smrg
678b8e80941Smrg   if (!task->rast->no_rast) {
679848b8605Smrg      /* loop over scene bins, rasterize each */
680848b8605Smrg      {
681848b8605Smrg         struct cmd_bin *bin;
682848b8605Smrg         int i, j;
683848b8605Smrg
684848b8605Smrg         assert(scene);
685848b8605Smrg         while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) {
686848b8605Smrg            if (!is_empty_bin( bin ))
687848b8605Smrg               rasterize_bin(task, bin, i, j);
688848b8605Smrg         }
689848b8605Smrg      }
690848b8605Smrg   }
691848b8605Smrg
692848b8605Smrg
693b8e80941Smrg#if LP_BUILD_FORMAT_CACHE_DEBUG
694b8e80941Smrg   {
695b8e80941Smrg      uint64_t total, miss;
696b8e80941Smrg      total = task->thread_data.cache->cache_access_total;
697b8e80941Smrg      miss = task->thread_data.cache->cache_access_miss;
698b8e80941Smrg      if (total) {
699b8e80941Smrg         debug_printf("thread %d cache access %llu miss %llu hit rate %f\n",
700b8e80941Smrg                 task->thread_index, (long long unsigned)total,
701b8e80941Smrg                 (long long unsigned)miss,
702b8e80941Smrg                 (float)(total - miss)/(float)total);
703b8e80941Smrg      }
704b8e80941Smrg   }
705b8e80941Smrg#endif
706b8e80941Smrg
707848b8605Smrg   if (scene->fence) {
708848b8605Smrg      lp_fence_signal(scene->fence);
709848b8605Smrg   }
710848b8605Smrg
711848b8605Smrg   task->scene = NULL;
712848b8605Smrg}
713848b8605Smrg
714848b8605Smrg
715848b8605Smrg/**
716848b8605Smrg * Called by setup module when it has something for us to render.
717848b8605Smrg */
718848b8605Smrgvoid
719848b8605Smrglp_rast_queue_scene( struct lp_rasterizer *rast,
720848b8605Smrg                     struct lp_scene *scene)
721848b8605Smrg{
722848b8605Smrg   LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
723848b8605Smrg
724848b8605Smrg   if (rast->num_threads == 0) {
725848b8605Smrg      /* no threading */
726848b8605Smrg      unsigned fpstate = util_fpstate_get();
727848b8605Smrg
728848b8605Smrg      /* Make sure that denorms are treated like zeros. This is
729848b8605Smrg       * the behavior required by D3D10. OpenGL doesn't care.
730848b8605Smrg       */
731848b8605Smrg      util_fpstate_set_denorms_to_zero(fpstate);
732848b8605Smrg
733848b8605Smrg      lp_rast_begin( rast, scene );
734848b8605Smrg
735848b8605Smrg      rasterize_scene( &rast->tasks[0], scene );
736848b8605Smrg
737848b8605Smrg      lp_rast_end( rast );
738848b8605Smrg
739848b8605Smrg      util_fpstate_set(fpstate);
740848b8605Smrg
741848b8605Smrg      rast->curr_scene = NULL;
742848b8605Smrg   }
743848b8605Smrg   else {
744848b8605Smrg      /* threaded rendering! */
745848b8605Smrg      unsigned i;
746848b8605Smrg
747848b8605Smrg      lp_scene_enqueue( rast->full_scenes, scene );
748848b8605Smrg
749848b8605Smrg      /* signal the threads that there's work to do */
750848b8605Smrg      for (i = 0; i < rast->num_threads; i++) {
751848b8605Smrg         pipe_semaphore_signal(&rast->tasks[i].work_ready);
752848b8605Smrg      }
753848b8605Smrg   }
754848b8605Smrg
755848b8605Smrg   LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
756848b8605Smrg}
757848b8605Smrg
758848b8605Smrg
759848b8605Smrgvoid
760848b8605Smrglp_rast_finish( struct lp_rasterizer *rast )
761848b8605Smrg{
762848b8605Smrg   if (rast->num_threads == 0) {
763848b8605Smrg      /* nothing to do */
764848b8605Smrg   }
765848b8605Smrg   else {
766848b8605Smrg      int i;
767848b8605Smrg
768848b8605Smrg      /* wait for work to complete */
769848b8605Smrg      for (i = 0; i < rast->num_threads; i++) {
770848b8605Smrg         pipe_semaphore_wait(&rast->tasks[i].work_done);
771848b8605Smrg      }
772848b8605Smrg   }
773848b8605Smrg}
774848b8605Smrg
775848b8605Smrg
776848b8605Smrg/**
777848b8605Smrg * This is the thread's main entrypoint.
778848b8605Smrg * It's a simple loop:
779848b8605Smrg *   1. wait for work
780848b8605Smrg *   2. do work
781848b8605Smrg *   3. signal that we're done
782848b8605Smrg */
783b8e80941Smrgstatic int
784b8e80941Smrgthread_function(void *init_data)
785848b8605Smrg{
786848b8605Smrg   struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
787848b8605Smrg   struct lp_rasterizer *rast = task->rast;
788848b8605Smrg   boolean debug = false;
789b8e80941Smrg   char thread_name[16];
790b8e80941Smrg   unsigned fpstate;
791b8e80941Smrg
792b8e80941Smrg   util_snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index);
793b8e80941Smrg   u_thread_setname(thread_name);
794848b8605Smrg
795848b8605Smrg   /* Make sure that denorms are treated like zeros. This is
796848b8605Smrg    * the behavior required by D3D10. OpenGL doesn't care.
797848b8605Smrg    */
798b8e80941Smrg   fpstate = util_fpstate_get();
799848b8605Smrg   util_fpstate_set_denorms_to_zero(fpstate);
800848b8605Smrg
801848b8605Smrg   while (1) {
802848b8605Smrg      /* wait for work */
803848b8605Smrg      if (debug)
804848b8605Smrg         debug_printf("thread %d waiting for work\n", task->thread_index);
805848b8605Smrg      pipe_semaphore_wait(&task->work_ready);
806848b8605Smrg
807848b8605Smrg      if (rast->exit_flag)
808848b8605Smrg         break;
809848b8605Smrg
810848b8605Smrg      if (task->thread_index == 0) {
811848b8605Smrg         /* thread[0]:
812848b8605Smrg          *  - get next scene to rasterize
813848b8605Smrg          *  - map the framebuffer surfaces
814848b8605Smrg          */
815848b8605Smrg         lp_rast_begin( rast,
816848b8605Smrg                        lp_scene_dequeue( rast->full_scenes, TRUE ) );
817848b8605Smrg      }
818848b8605Smrg
819848b8605Smrg      /* Wait for all threads to get here so that threads[1+] don't
820848b8605Smrg       * get a null rast->curr_scene pointer.
821848b8605Smrg       */
822b8e80941Smrg      util_barrier_wait( &rast->barrier );
823848b8605Smrg
824848b8605Smrg      /* do work */
825848b8605Smrg      if (debug)
826848b8605Smrg         debug_printf("thread %d doing work\n", task->thread_index);
827848b8605Smrg
828848b8605Smrg      rasterize_scene(task,
829848b8605Smrg                      rast->curr_scene);
830848b8605Smrg
831848b8605Smrg      /* wait for all threads to finish with this scene */
832b8e80941Smrg      util_barrier_wait( &rast->barrier );
833848b8605Smrg
834848b8605Smrg      /* XXX: shouldn't be necessary:
835848b8605Smrg       */
836848b8605Smrg      if (task->thread_index == 0) {
837848b8605Smrg         lp_rast_end( rast );
838848b8605Smrg      }
839848b8605Smrg
840848b8605Smrg      /* signal done with work */
841848b8605Smrg      if (debug)
842848b8605Smrg         debug_printf("thread %d done working\n", task->thread_index);
843848b8605Smrg
844848b8605Smrg      pipe_semaphore_signal(&task->work_done);
845848b8605Smrg   }
846848b8605Smrg
847848b8605Smrg#ifdef _WIN32
848848b8605Smrg   pipe_semaphore_signal(&task->work_done);
849848b8605Smrg#endif
850848b8605Smrg
851848b8605Smrg   return 0;
852848b8605Smrg}
853848b8605Smrg
854848b8605Smrg
855848b8605Smrg/**
856848b8605Smrg * Initialize semaphores and spawn the threads.
857848b8605Smrg */
858848b8605Smrgstatic void
859848b8605Smrgcreate_rast_threads(struct lp_rasterizer *rast)
860848b8605Smrg{
861848b8605Smrg   unsigned i;
862848b8605Smrg
863848b8605Smrg   /* NOTE: if num_threads is zero, we won't use any threads */
864848b8605Smrg   for (i = 0; i < rast->num_threads; i++) {
865848b8605Smrg      pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
866848b8605Smrg      pipe_semaphore_init(&rast->tasks[i].work_done, 0);
867b8e80941Smrg      rast->threads[i] = u_thread_create(thread_function,
868848b8605Smrg                                            (void *) &rast->tasks[i]);
869848b8605Smrg   }
870848b8605Smrg}
871848b8605Smrg
872848b8605Smrg
873848b8605Smrg
874848b8605Smrg/**
875848b8605Smrg * Create new lp_rasterizer.  If num_threads is zero, don't create any
876848b8605Smrg * new threads, do rendering synchronously.
877848b8605Smrg * \param num_threads  number of rasterizer threads to create
878848b8605Smrg */
879848b8605Smrgstruct lp_rasterizer *
880848b8605Smrglp_rast_create( unsigned num_threads )
881848b8605Smrg{
882848b8605Smrg   struct lp_rasterizer *rast;
883848b8605Smrg   unsigned i;
884848b8605Smrg
885848b8605Smrg   rast = CALLOC_STRUCT(lp_rasterizer);
886848b8605Smrg   if (!rast) {
887848b8605Smrg      goto no_rast;
888848b8605Smrg   }
889848b8605Smrg
890848b8605Smrg   rast->full_scenes = lp_scene_queue_create();
891848b8605Smrg   if (!rast->full_scenes) {
892848b8605Smrg      goto no_full_scenes;
893848b8605Smrg   }
894848b8605Smrg
895b8e80941Smrg   for (i = 0; i < MAX2(1, num_threads); i++) {
896848b8605Smrg      struct lp_rasterizer_task *task = &rast->tasks[i];
897848b8605Smrg      task->rast = rast;
898848b8605Smrg      task->thread_index = i;
899b8e80941Smrg      task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache),
900b8e80941Smrg                                             16);
901b8e80941Smrg      if (!task->thread_data.cache) {
902b8e80941Smrg         goto no_thread_data_cache;
903b8e80941Smrg      }
904848b8605Smrg   }
905848b8605Smrg
906848b8605Smrg   rast->num_threads = num_threads;
907848b8605Smrg
908848b8605Smrg   rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE);
909848b8605Smrg
910848b8605Smrg   create_rast_threads(rast);
911848b8605Smrg
912848b8605Smrg   /* for synchronizing rasterization threads */
913b8e80941Smrg   if (rast->num_threads > 0) {
914b8e80941Smrg      util_barrier_init( &rast->barrier, rast->num_threads );
915b8e80941Smrg   }
916848b8605Smrg
917848b8605Smrg   memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
918848b8605Smrg
919848b8605Smrg   return rast;
920848b8605Smrg
921b8e80941Smrgno_thread_data_cache:
922b8e80941Smrg   for (i = 0; i < MAX2(1, rast->num_threads); i++) {
923b8e80941Smrg      if (rast->tasks[i].thread_data.cache) {
924b8e80941Smrg         align_free(rast->tasks[i].thread_data.cache);
925b8e80941Smrg      }
926b8e80941Smrg   }
927b8e80941Smrg
928b8e80941Smrg   lp_scene_queue_destroy(rast->full_scenes);
929848b8605Smrgno_full_scenes:
930848b8605Smrg   FREE(rast);
931848b8605Smrgno_rast:
932848b8605Smrg   return NULL;
933848b8605Smrg}
934848b8605Smrg
935848b8605Smrg
936848b8605Smrg/* Shutdown:
937848b8605Smrg */
938848b8605Smrgvoid lp_rast_destroy( struct lp_rasterizer *rast )
939848b8605Smrg{
940848b8605Smrg   unsigned i;
941848b8605Smrg
942848b8605Smrg   /* Set exit_flag and signal each thread's work_ready semaphore.
943848b8605Smrg    * Each thread will be woken up, notice that the exit_flag is set and
944848b8605Smrg    * break out of its main loop.  The thread will then exit.
945848b8605Smrg    */
946848b8605Smrg   rast->exit_flag = TRUE;
947848b8605Smrg   for (i = 0; i < rast->num_threads; i++) {
948848b8605Smrg      pipe_semaphore_signal(&rast->tasks[i].work_ready);
949848b8605Smrg   }
950848b8605Smrg
951848b8605Smrg   /* Wait for threads to terminate before cleaning up per-thread data.
952848b8605Smrg    * We don't actually call pipe_thread_wait to avoid dead lock on Windows
953848b8605Smrg    * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */
954848b8605Smrg   for (i = 0; i < rast->num_threads; i++) {
955848b8605Smrg#ifdef _WIN32
956848b8605Smrg      pipe_semaphore_wait(&rast->tasks[i].work_done);
957848b8605Smrg#else
958b8e80941Smrg      thrd_join(rast->threads[i], NULL);
959848b8605Smrg#endif
960848b8605Smrg   }
961848b8605Smrg
962848b8605Smrg   /* Clean up per-thread data */
963848b8605Smrg   for (i = 0; i < rast->num_threads; i++) {
964848b8605Smrg      pipe_semaphore_destroy(&rast->tasks[i].work_ready);
965848b8605Smrg      pipe_semaphore_destroy(&rast->tasks[i].work_done);
966848b8605Smrg   }
967b8e80941Smrg   for (i = 0; i < MAX2(1, rast->num_threads); i++) {
968b8e80941Smrg      align_free(rast->tasks[i].thread_data.cache);
969b8e80941Smrg   }
970848b8605Smrg
971848b8605Smrg   /* for synchronizing rasterization threads */
972b8e80941Smrg   if (rast->num_threads > 0) {
973b8e80941Smrg      util_barrier_destroy( &rast->barrier );
974b8e80941Smrg   }
975848b8605Smrg
976848b8605Smrg   lp_scene_queue_destroy(rast->full_scenes);
977848b8605Smrg
978848b8605Smrg   FREE(rast);
979848b8605Smrg}
980848b8605Smrg
981848b8605Smrg
982