draw_gs.c revision b8e80941
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "draw_gs.h"
29
30#include "draw_private.h"
31#include "draw_context.h"
32#ifdef HAVE_LLVM
33#include "draw_llvm.h"
34#endif
35
36#include "tgsi/tgsi_parse.h"
37#include "tgsi/tgsi_exec.h"
38
39#include "pipe/p_shader_tokens.h"
40
41#include "util/u_math.h"
42#include "util/u_memory.h"
43#include "util/u_prim.h"
44
45/* fixme: move it from here */
46#define MAX_PRIMITIVES 64
47
48static inline int
49draw_gs_get_input_index(int semantic, int index,
50                        const struct tgsi_shader_info *input_info)
51{
52   int i;
53   const ubyte *input_semantic_names = input_info->output_semantic_name;
54   const ubyte *input_semantic_indices = input_info->output_semantic_index;
55   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
56      if (input_semantic_names[i] == semantic &&
57          input_semantic_indices[i] == index)
58         return i;
59   }
60   return -1;
61}
62
63/**
64 * We execute geometry shaders in the SOA mode, so ideally we want to
65 * flush when the number of currently fetched primitives is equal to
66 * the number of elements in the SOA vector. This ensures that the
67 * throughput is optimized for the given vector instruction set.
68 */
69static inline boolean
70draw_gs_should_flush(struct draw_geometry_shader *shader)
71{
72   return (shader->fetched_prim_count == shader->vector_length);
73}
74
75/*#define DEBUG_OUTPUTS 1*/
76static void
77tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
78                      unsigned stream,
79                      unsigned num_primitives,
80                      float (**p_output)[4])
81{
82   struct tgsi_exec_machine *machine = shader->machine;
83   unsigned prim_idx, j, slot;
84   float (*output)[4];
85
86   output = *p_output;
87
88   /* Unswizzle all output results.
89    */
90
91   for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
92      unsigned num_verts_per_prim = machine->Primitives[stream][prim_idx];
93      unsigned prim_offset = machine->PrimitiveOffsets[stream][prim_idx];
94      shader->stream[stream].primitive_lengths[prim_idx + shader->stream[stream].emitted_primitives] =
95         machine->Primitives[stream][prim_idx];
96      shader->stream[stream].emitted_vertices += num_verts_per_prim;
97
98      for (j = 0; j < num_verts_per_prim; j++) {
99         int idx = prim_offset + j * shader->info.num_outputs;
100#ifdef DEBUG_OUTPUTS
101         debug_printf("%d/%d) Output vert:\n", stream, idx / shader->info.num_outputs);
102#endif
103         for (slot = 0; slot < shader->info.num_outputs; slot++) {
104            output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0];
105            output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0];
106            output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0];
107            output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0];
108#ifdef DEBUG_OUTPUTS
109            debug_printf("\t%d: %f %f %f %f\n", slot,
110                         output[slot][0],
111                         output[slot][1],
112                         output[slot][2],
113                         output[slot][3]);
114#endif
115         }
116         output = (float (*)[4])((char *)output + shader->vertex_size);
117      }
118   }
119   *p_output = output;
120   shader->stream[stream].emitted_primitives += num_primitives;
121}
122
123/*#define DEBUG_INPUTS 1*/
124static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader,
125                                unsigned *indices,
126                                unsigned num_vertices,
127                                unsigned prim_idx)
128{
129   struct tgsi_exec_machine *machine = shader->machine;
130   unsigned slot, i;
131   int vs_slot;
132   unsigned input_vertex_stride = shader->input_vertex_stride;
133   const float (*input_ptr)[4];
134
135   input_ptr = shader->input;
136
137   for (i = 0; i < num_vertices; ++i) {
138      const float (*input)[4];
139#if DEBUG_INPUTS
140      debug_printf("%d) vertex index = %d (prim idx = %d)\n",
141                   i, indices[i], prim_idx);
142#endif
143      input = (const float (*)[4])(
144         (const char *)input_ptr + (indices[i] * input_vertex_stride));
145      for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
146         unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot;
147         if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
148            machine->Inputs[idx].xyzw[0].u[prim_idx] = shader->in_prim_idx;
149            machine->Inputs[idx].xyzw[1].u[prim_idx] = shader->in_prim_idx;
150            machine->Inputs[idx].xyzw[2].u[prim_idx] = shader->in_prim_idx;
151            machine->Inputs[idx].xyzw[3].u[prim_idx] = shader->in_prim_idx;
152         } else {
153            vs_slot = draw_gs_get_input_index(
154               shader->info.input_semantic_name[slot],
155               shader->info.input_semantic_index[slot],
156               shader->input_info);
157            if (vs_slot < 0) {
158               debug_printf("VS/GS signature mismatch!\n");
159               machine->Inputs[idx].xyzw[0].f[prim_idx] = 0;
160               machine->Inputs[idx].xyzw[1].f[prim_idx] = 0;
161               machine->Inputs[idx].xyzw[2].f[prim_idx] = 0;
162               machine->Inputs[idx].xyzw[3].f[prim_idx] = 0;
163            } else {
164#if DEBUG_INPUTS
165               debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
166                            slot, vs_slot, idx);
167               assert(!util_is_inf_or_nan(input[vs_slot][0]));
168               assert(!util_is_inf_or_nan(input[vs_slot][1]));
169               assert(!util_is_inf_or_nan(input[vs_slot][2]));
170               assert(!util_is_inf_or_nan(input[vs_slot][3]));
171#endif
172               machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0];
173               machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1];
174               machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2];
175               machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3];
176#if DEBUG_INPUTS
177               debug_printf("\t\t%f %f %f %f\n",
178                            machine->Inputs[idx].xyzw[0].f[prim_idx],
179                            machine->Inputs[idx].xyzw[1].f[prim_idx],
180                            machine->Inputs[idx].xyzw[2].f[prim_idx],
181                            machine->Inputs[idx].xyzw[3].f[prim_idx]);
182#endif
183               ++vs_slot;
184            }
185         }
186      }
187   }
188}
189
190static void tgsi_gs_prepare(struct draw_geometry_shader *shader,
191                            const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
192                            const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
193{
194   struct tgsi_exec_machine *machine = shader->machine;
195   int j;
196   tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
197                                  constants, constants_size);
198
199   if (shader->info.uses_invocationid) {
200      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INVOCATIONID];
201      for (j = 0; j < TGSI_QUAD_SIZE; j++)
202         machine->SystemValue[i].xyzw[0].i[j] = shader->invocation_id;
203   }
204}
205
206static void tgsi_gs_run(struct draw_geometry_shader *shader,
207                            unsigned input_primitives,
208                            unsigned *out_prims)
209{
210   struct tgsi_exec_machine *machine = shader->machine;
211   int i;
212
213   /* run interpreter */
214   tgsi_exec_machine_run(machine, 0);
215
216   for (i = 0; i < 4; i++) {
217      int prim_i;
218      int prim_c;
219      switch (i) {
220      case 0:
221         prim_i = TGSI_EXEC_TEMP_PRIMITIVE_I;
222         prim_c = TGSI_EXEC_TEMP_PRIMITIVE_C;
223         break;
224      case 1:
225         prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S1_I;
226         prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S1_C;
227         break;
228      case 2:
229         prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S2_I;
230         prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S2_C;
231         break;
232      case 3:
233         prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S3_I;
234         prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S3_C;
235         break;
236      };
237
238      out_prims[i] = machine->Temps[prim_i].xyzw[prim_c].u[0];
239   }
240}
241
242#ifdef HAVE_LLVM
243
244static void
245llvm_fetch_gs_input(struct draw_geometry_shader *shader,
246                    unsigned *indices,
247                    unsigned num_vertices,
248                    unsigned prim_idx)
249{
250   unsigned slot, i;
251   int vs_slot;
252   unsigned input_vertex_stride = shader->input_vertex_stride;
253   const float (*input_ptr)[4];
254   float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data;
255
256   shader->llvm_prim_ids[shader->fetched_prim_count] = shader->in_prim_idx;
257
258   input_ptr = shader->input;
259
260   for (i = 0; i < num_vertices; ++i) {
261      const float (*input)[4];
262#if DEBUG_INPUTS
263      debug_printf("%d) vertex index = %d (prim idx = %d)\n",
264                   i, indices[i], prim_idx);
265#endif
266      input = (const float (*)[4])(
267         (const char *)input_ptr + (indices[i] * input_vertex_stride));
268      for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
269         if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
270            /* skip. we handle system values through gallivm */
271            /* NOTE: If we hit this case here it's an ordinary input not a sv,
272             * even though it probably should be a sv.
273             * Not sure how to set it up as regular input however if that even,
274             * would make sense so hack around this later in gallivm.
275             */
276         } else {
277            vs_slot = draw_gs_get_input_index(
278               shader->info.input_semantic_name[slot],
279               shader->info.input_semantic_index[slot],
280               shader->input_info);
281            if (vs_slot < 0) {
282               debug_printf("VS/GS signature mismatch!\n");
283               (*input_data)[i][slot][0][prim_idx] = 0;
284               (*input_data)[i][slot][1][prim_idx] = 0;
285               (*input_data)[i][slot][2][prim_idx] = 0;
286               (*input_data)[i][slot][3][prim_idx] = 0;
287            } else {
288#if DEBUG_INPUTS
289               debug_printf("\tSlot = %d, vs_slot = %d, i = %d:\n",
290                            slot, vs_slot, i);
291               assert(!util_is_inf_or_nan(input[vs_slot][0]));
292               assert(!util_is_inf_or_nan(input[vs_slot][1]));
293               assert(!util_is_inf_or_nan(input[vs_slot][2]));
294               assert(!util_is_inf_or_nan(input[vs_slot][3]));
295#endif
296               (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0];
297               (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1];
298               (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2];
299               (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3];
300#if DEBUG_INPUTS
301               debug_printf("\t\t%f %f %f %f\n",
302                            (*input_data)[i][slot][0][prim_idx],
303                            (*input_data)[i][slot][1][prim_idx],
304                            (*input_data)[i][slot][2][prim_idx],
305                            (*input_data)[i][slot][3][prim_idx]);
306#endif
307               ++vs_slot;
308            }
309         }
310      }
311   }
312}
313
314static void
315llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
316                      unsigned stream,
317                      unsigned num_primitives,
318                      float (**p_output)[4])
319{
320   int total_verts = 0;
321   int vertex_count = 0;
322   int total_prims = 0;
323   int max_prims_per_invocation = 0;
324   char *output_ptr = (char*)shader->gs_output;
325   int i, j, prim_idx;
326   unsigned next_prim_boundary = shader->primitive_boundary;
327
328   for (i = 0; i < shader->vector_length; ++i) {
329      int prims = shader->llvm_emitted_primitives[i];
330      total_prims += prims;
331      max_prims_per_invocation = MAX2(max_prims_per_invocation, prims);
332   }
333   for (i = 0; i < shader->vector_length; ++i) {
334      total_verts += shader->llvm_emitted_vertices[i];
335   }
336
337   output_ptr += shader->stream[0].emitted_vertices * shader->vertex_size;
338   for (i = 0; i < shader->vector_length - 1; ++i) {
339      int current_verts = shader->llvm_emitted_vertices[i];
340      int next_verts = shader->llvm_emitted_vertices[i + 1];
341#if 0
342      int j;
343      for (j = 0; j < current_verts; ++j) {
344         struct vertex_header *vh = (struct vertex_header *)
345            (output_ptr + shader->vertex_size * (i * next_prim_boundary + j));
346         debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count,
347                      vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]);
348
349      }
350#endif
351      debug_assert(current_verts <= shader->max_output_vertices);
352      debug_assert(next_verts <= shader->max_output_vertices);
353      if (next_verts) {
354         memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
355                 output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size,
356                 shader->vertex_size * next_verts);
357      }
358      vertex_count += current_verts;
359   }
360
361#if 0
362   {
363      int i;
364      for (i = 0; i < total_verts; ++i) {
365         struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i);
366         debug_printf("%d) Vertex:\n", i);
367         for (j = 0; j < shader->info.num_outputs; ++j) {
368            unsigned *udata = (unsigned*)vh->data[j];
369            debug_printf("    %d) [%f, %f, %f, %f] [%d, %d, %d, %d]\n", j,
370                         vh->data[j][0], vh->data[j][1], vh->data[j][2], vh->data[j][3],
371                         udata[0], udata[1], udata[2], udata[3]);
372         }
373
374      }
375   }
376#endif
377
378   prim_idx = 0;
379   for (i = 0; i < shader->vector_length; ++i) {
380      int num_prims = shader->llvm_emitted_primitives[i];
381      for (j = 0; j < num_prims; ++j) {
382         int prim_length =
383            shader->llvm_prim_lengths[j][i];
384         shader->stream[0].primitive_lengths[shader->stream[0].emitted_primitives + prim_idx] =
385            prim_length;
386         ++prim_idx;
387      }
388   }
389
390   shader->stream[0].emitted_primitives += total_prims;
391   shader->stream[0].emitted_vertices += total_verts;
392}
393
394static void
395llvm_gs_prepare(struct draw_geometry_shader *shader,
396                const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
397                const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
398{
399}
400
401static void
402llvm_gs_run(struct draw_geometry_shader *shader,
403            unsigned input_primitives, unsigned *out_prims)
404{
405   unsigned ret;
406   char *input = (char*)shader->gs_output;
407
408   input += (shader->stream[0].emitted_vertices * shader->vertex_size);
409
410   ret = shader->current_variant->jit_func(
411      shader->jit_context, shader->gs_input->data,
412      (struct vertex_header*)input,
413      input_primitives,
414      shader->draw->instance_id,
415      shader->llvm_prim_ids,
416      shader->invocation_id);
417
418   *out_prims = ret;
419}
420
421#endif
422
423static void gs_flush(struct draw_geometry_shader *shader)
424{
425   unsigned out_prim_count[TGSI_MAX_VERTEX_STREAMS];
426   unsigned i;
427   unsigned input_primitives = shader->fetched_prim_count;
428
429   if (shader->draw->collect_statistics) {
430      shader->draw->statistics.gs_invocations += input_primitives;
431   }
432
433   debug_assert(input_primitives > 0 &&
434                input_primitives <= 4);
435
436   shader->run(shader, input_primitives, out_prim_count);
437   for (i = 0; i < shader->num_vertex_streams; i++) {
438      shader->fetch_outputs(shader, i, out_prim_count[i],
439                            &shader->stream[i].tmp_output);
440   }
441
442#if 0
443   for (i = 0; i < shader->num_vertex_streams; i++) {
444      debug_printf("stream %d: PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
445                   i,
446                   shader->stream[i].emitted_primitives, shader->stream[i].emitted_vertices,
447                   out_prim_count[i]);
448   }
449#endif
450
451   shader->fetched_prim_count = 0;
452}
453
454static void gs_point(struct draw_geometry_shader *shader,
455                     int idx)
456{
457   unsigned indices[1];
458
459   indices[0] = idx;
460
461   shader->fetch_inputs(shader, indices, 1,
462                        shader->fetched_prim_count);
463   ++shader->in_prim_idx;
464   ++shader->fetched_prim_count;
465
466   if (draw_gs_should_flush(shader))
467      gs_flush(shader);
468}
469
470static void gs_line(struct draw_geometry_shader *shader,
471                    int i0, int i1)
472{
473   unsigned indices[2];
474
475   indices[0] = i0;
476   indices[1] = i1;
477
478   shader->fetch_inputs(shader, indices, 2,
479                        shader->fetched_prim_count);
480   ++shader->in_prim_idx;
481   ++shader->fetched_prim_count;
482
483   if (draw_gs_should_flush(shader))
484      gs_flush(shader);
485}
486
487static void gs_line_adj(struct draw_geometry_shader *shader,
488                        int i0, int i1, int i2, int i3)
489{
490   unsigned indices[4];
491
492   indices[0] = i0;
493   indices[1] = i1;
494   indices[2] = i2;
495   indices[3] = i3;
496
497   shader->fetch_inputs(shader, indices, 4,
498                        shader->fetched_prim_count);
499   ++shader->in_prim_idx;
500   ++shader->fetched_prim_count;
501
502   if (draw_gs_should_flush(shader))
503      gs_flush(shader);
504}
505
506static void gs_tri(struct draw_geometry_shader *shader,
507                   int i0, int i1, int i2)
508{
509   unsigned indices[3];
510
511   indices[0] = i0;
512   indices[1] = i1;
513   indices[2] = i2;
514
515   shader->fetch_inputs(shader, indices, 3,
516                        shader->fetched_prim_count);
517   ++shader->in_prim_idx;
518   ++shader->fetched_prim_count;
519
520   if (draw_gs_should_flush(shader))
521      gs_flush(shader);
522}
523
524static void gs_tri_adj(struct draw_geometry_shader *shader,
525                       int i0, int i1, int i2,
526                       int i3, int i4, int i5)
527{
528   unsigned indices[6];
529
530   indices[0] = i0;
531   indices[1] = i1;
532   indices[2] = i2;
533   indices[3] = i3;
534   indices[4] = i4;
535   indices[5] = i5;
536
537   shader->fetch_inputs(shader, indices, 6,
538                        shader->fetched_prim_count);
539   ++shader->in_prim_idx;
540   ++shader->fetched_prim_count;
541
542   if (draw_gs_should_flush(shader))
543      gs_flush(shader);
544}
545
546#define FUNC         gs_run
547#define GET_ELT(idx) (idx)
548#include "draw_gs_tmp.h"
549
550
551#define FUNC         gs_run_elts
552#define LOCAL_VARS   const ushort *elts = input_prims->elts;
553#define GET_ELT(idx) (elts[idx])
554#include "draw_gs_tmp.h"
555
556
557/**
558 * Execute geometry shader.
559 */
560int draw_geometry_shader_run(struct draw_geometry_shader *shader,
561                             const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
562                             const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
563                             const struct draw_vertex_info *input_verts,
564                             const struct draw_prim_info *input_prim,
565                             const struct tgsi_shader_info *input_info,
566                             struct draw_vertex_info *output_verts,
567                             struct draw_prim_info *output_prims )
568{
569   const float (*input)[4] = (const float (*)[4])input_verts->verts->data;
570   unsigned input_stride = input_verts->vertex_size;
571   unsigned num_outputs = draw_total_gs_outputs(shader->draw);
572   unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float);
573   unsigned num_input_verts = input_prim->linear ?
574      input_verts->count :
575      input_prim->count;
576   unsigned num_in_primitives =
577      align(
578         MAX2(u_decomposed_prims_for_vertices(input_prim->prim,
579                                              num_input_verts),
580              u_decomposed_prims_for_vertices(shader->input_primitive,
581                                              num_input_verts)),
582         shader->vector_length);
583   unsigned max_out_prims =
584      u_decomposed_prims_for_vertices(shader->output_primitive,
585                                      shader->max_output_vertices)
586      * num_in_primitives;
587   /* we allocate exactly one extra vertex per primitive to allow the GS to emit
588    * overflown vertices into some area where they won't harm anyone */
589   unsigned total_verts_per_buffer = shader->primitive_boundary *
590      num_in_primitives;
591   unsigned invocation;
592   int i;
593   //Assume at least one primitive
594   max_out_prims = MAX2(max_out_prims, 1);
595
596   for (i = 0; i < shader->num_vertex_streams; i++) {
597      /* write all the vertex data into all the streams */
598      output_verts[i].vertex_size = vertex_size;
599      output_verts[i].stride = output_verts[i].vertex_size;
600      output_verts[i].verts =
601         (struct vertex_header *)MALLOC(output_verts[i].vertex_size *
602                                        total_verts_per_buffer * shader->num_invocations);
603      debug_assert(output_verts[i].verts);
604   }
605
606#if 0
607   debug_printf("%s count = %d (in prims # = %d)\n",
608                __FUNCTION__, num_input_verts, num_in_primitives);
609   debug_printf("\tlinear = %d, prim_info->count = %d\n",
610                input_prim->linear, input_prim->count);
611   debug_printf("\tprim pipe = %s, shader in = %s, shader out = %s\n"
612                u_prim_name(input_prim->prim),
613                u_prim_name(shader->input_primitive),
614                u_prim_name(shader->output_primitive));
615   debug_printf("\tmaxv  = %d, maxp = %d, primitive_boundary = %d, "
616                "vertex_size = %d, tverts = %d\n",
617                shader->max_output_vertices, max_out_prims,
618                shader->primitive_boundary, output_verts->vertex_size,
619                total_verts_per_buffer);
620#endif
621
622   for (i = 0; i < shader->num_vertex_streams; i++) {
623      shader->stream[i].emitted_vertices = 0;
624      shader->stream[i].emitted_primitives = 0;
625      FREE(shader->stream[i].primitive_lengths);
626      shader->stream[i].primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * shader->num_invocations);
627      shader->stream[i].tmp_output = (float (*)[4])output_verts[i].verts->data;
628   }
629   shader->vertex_size = vertex_size;
630   shader->fetched_prim_count = 0;
631   shader->input_vertex_stride = input_stride;
632   shader->input = input;
633   shader->input_info = input_info;
634
635#ifdef HAVE_LLVM
636   if (shader->draw->llvm) {
637      shader->gs_output = output_verts[0].verts;
638      if (max_out_prims > shader->max_out_prims) {
639         unsigned i;
640         if (shader->llvm_prim_lengths) {
641            for (i = 0; i < shader->max_out_prims; ++i) {
642               align_free(shader->llvm_prim_lengths[i]);
643            }
644            FREE(shader->llvm_prim_lengths);
645         }
646
647         shader->llvm_prim_lengths = MALLOC(max_out_prims * sizeof(unsigned*));
648         for (i = 0; i < max_out_prims; ++i) {
649            int vector_size = shader->vector_length * sizeof(unsigned);
650            shader->llvm_prim_lengths[i] =
651               align_malloc(vector_size, vector_size);
652         }
653
654         shader->max_out_prims = max_out_prims;
655      }
656      shader->jit_context->prim_lengths = shader->llvm_prim_lengths;
657      shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices;
658      shader->jit_context->emitted_prims = shader->llvm_emitted_primitives;
659   }
660#endif
661
662   for (invocation = 0; invocation < shader->num_invocations; invocation++) {
663      shader->invocation_id = invocation;
664
665      shader->prepare(shader, constants, constants_size);
666
667      if (input_prim->linear)
668         gs_run(shader, input_prim, input_verts,
669                output_prims, output_verts);
670      else
671         gs_run_elts(shader, input_prim, input_verts,
672                     output_prims, output_verts);
673
674      /* Flush the remaining primitives. Will happen if
675       * num_input_primitives % 4 != 0
676       */
677      if (shader->fetched_prim_count > 0) {
678         gs_flush(shader);
679      }
680      debug_assert(shader->fetched_prim_count == 0);
681   }
682
683   /* Update prim_info:
684    */
685   for (i = 0; i < shader->num_vertex_streams; i++) {
686      output_prims[i].linear = TRUE;
687      output_prims[i].elts = NULL;
688      output_prims[i].start = 0;
689      output_prims[i].count = shader->stream[i].emitted_vertices;
690      output_prims[i].prim = shader->output_primitive;
691      output_prims[i].flags = 0x0;
692      output_prims[i].primitive_lengths = shader->stream[i].primitive_lengths;
693      output_prims[i].primitive_count = shader->stream[i].emitted_primitives;
694      output_verts[i].count = shader->stream[i].emitted_vertices;
695
696      if (shader->draw->collect_statistics) {
697         unsigned j;
698         for (j = 0; j < shader->stream[i].emitted_primitives; ++j) {
699            shader->draw->statistics.gs_primitives +=
700               u_decomposed_prims_for_vertices(shader->output_primitive,
701                                               shader->stream[i].primitive_lengths[j]);
702         }
703      }
704   }
705
706#if 0
707   debug_printf("GS finished\n");
708   for (i = 0; i < 4; i++)
709      debug_printf("stream %d: prims = %d verts = %d\n", i, output_prims[i].primitive_count, output_verts[i].count);
710#endif
711
712   return 0;
713}
714
715void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
716                                  struct draw_context *draw)
717{
718   boolean use_llvm = draw->llvm != NULL;
719   if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) {
720      tgsi_exec_machine_bind_shader(shader->machine,
721                                    shader->state.tokens,
722                                    draw->gs.tgsi.sampler,
723                                    draw->gs.tgsi.image,
724                                    draw->gs.tgsi.buffer);
725   }
726}
727
728
729boolean
730draw_gs_init( struct draw_context *draw )
731{
732   if (!draw->llvm) {
733      draw->gs.tgsi.machine = tgsi_exec_machine_create(PIPE_SHADER_GEOMETRY);
734
735      for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
736         draw->gs.tgsi.machine->Primitives[i] = align_malloc(
737            MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
738         draw->gs.tgsi.machine->PrimitiveOffsets[i] = align_malloc(
739            MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
740         if (!draw->gs.tgsi.machine->Primitives[i] || !draw->gs.tgsi.machine->PrimitiveOffsets[i])
741            return FALSE;
742         memset(draw->gs.tgsi.machine->Primitives[i], 0,
743                MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
744         memset(draw->gs.tgsi.machine->PrimitiveOffsets[i], 0,
745                MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
746      }
747   }
748
749   return TRUE;
750}
751
752void draw_gs_destroy( struct draw_context *draw )
753{
754   int i;
755   if (draw->gs.tgsi.machine) {
756      for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
757         align_free(draw->gs.tgsi.machine->Primitives[i]);
758         align_free(draw->gs.tgsi.machine->PrimitiveOffsets[i]);
759      }
760      tgsi_exec_machine_destroy(draw->gs.tgsi.machine);
761   }
762}
763
764struct draw_geometry_shader *
765draw_create_geometry_shader(struct draw_context *draw,
766                            const struct pipe_shader_state *state)
767{
768#ifdef HAVE_LLVM
769   boolean use_llvm = draw->llvm != NULL;
770   struct llvm_geometry_shader *llvm_gs = NULL;
771#endif
772   struct draw_geometry_shader *gs;
773   unsigned i;
774
775#ifdef HAVE_LLVM
776   if (use_llvm) {
777      llvm_gs = CALLOC_STRUCT(llvm_geometry_shader);
778
779      if (!llvm_gs)
780         return NULL;
781
782      gs = &llvm_gs->base;
783
784      make_empty_list(&llvm_gs->variants);
785   } else
786#endif
787   {
788      gs = CALLOC_STRUCT(draw_geometry_shader);
789   }
790
791   if (!gs)
792      return NULL;
793
794   gs->draw = draw;
795   gs->state = *state;
796   gs->state.tokens = tgsi_dup_tokens(state->tokens);
797   if (!gs->state.tokens) {
798      FREE(gs);
799      return NULL;
800   }
801
802   tgsi_scan_shader(state->tokens, &gs->info);
803
804   /* setup the defaults */
805   gs->max_out_prims = 0;
806
807#ifdef HAVE_LLVM
808   if (use_llvm) {
809      /* TODO: change the input array to handle the following
810         vector length, instead of the currently hardcoded
811         TGSI_NUM_CHANNELS
812      gs->vector_length = lp_native_vector_width / 32;*/
813      gs->vector_length = TGSI_NUM_CHANNELS;
814   } else
815#endif
816   {
817      gs->vector_length = 1;
818   }
819
820   gs->input_primitive =
821         gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM];
822   gs->output_primitive =
823         gs->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
824   gs->max_output_vertices =
825         gs->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
826   gs->num_invocations =
827      gs->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
828   if (!gs->max_output_vertices)
829      gs->max_output_vertices = 32;
830
831   /* Primitive boundary is bigger than max_output_vertices by one, because
832    * the specification says that the geometry shader should exit if the
833    * number of emitted vertices is bigger or equal to max_output_vertices and
834    * we can't do that because we're running in the SoA mode, which means that
835    * our storing routines will keep getting called on channels that have
836    * overflown.
837    * So we need some scratch area where we can keep writing the overflown
838    * vertices without overwriting anything important or crashing.
839    */
840   gs->primitive_boundary = gs->max_output_vertices + 1;
841
842   gs->position_output = -1;
843   for (i = 0; i < gs->info.num_outputs; i++) {
844      if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
845          gs->info.output_semantic_index[i] == 0)
846         gs->position_output = i;
847      if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX)
848         gs->viewport_index_output = i;
849      if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
850         debug_assert(gs->info.output_semantic_index[i] <
851                      PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
852         gs->ccdistance_output[gs->info.output_semantic_index[i]] = i;
853      }
854   }
855
856   gs->machine = draw->gs.tgsi.machine;
857
858   gs->num_vertex_streams = 1;
859   for (i = 0; i < gs->state.stream_output.num_outputs; i++) {
860      if (gs->state.stream_output.output[i].stream >= gs->num_vertex_streams)
861         gs->num_vertex_streams = gs->state.stream_output.output[i].stream + 1;
862   }
863
864#ifdef HAVE_LLVM
865   if (use_llvm) {
866      int vector_size = gs->vector_length * sizeof(float);
867      gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16);
868      memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs));
869      gs->llvm_prim_lengths = 0;
870
871      gs->llvm_emitted_primitives = align_malloc(vector_size, vector_size);
872      gs->llvm_emitted_vertices = align_malloc(vector_size, vector_size);
873      gs->llvm_prim_ids = align_malloc(vector_size, vector_size);
874
875      gs->fetch_outputs = llvm_fetch_gs_outputs;
876      gs->fetch_inputs = llvm_fetch_gs_input;
877      gs->prepare = llvm_gs_prepare;
878      gs->run = llvm_gs_run;
879
880      gs->jit_context = &draw->llvm->gs_jit_context;
881
882
883      llvm_gs->variant_key_size =
884         draw_gs_llvm_variant_key_size(
885            MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1,
886                 gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1));
887   } else
888#endif
889   {
890      gs->fetch_outputs = tgsi_fetch_gs_outputs;
891      gs->fetch_inputs = tgsi_fetch_gs_input;
892      gs->prepare = tgsi_gs_prepare;
893      gs->run = tgsi_gs_run;
894   }
895
896   return gs;
897}
898
899void draw_bind_geometry_shader(struct draw_context *draw,
900                               struct draw_geometry_shader *dgs)
901{
902   draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
903
904   if (dgs) {
905      draw->gs.geometry_shader = dgs;
906      draw->gs.num_gs_outputs = dgs->info.num_outputs;
907      draw->gs.position_output = dgs->position_output;
908      draw_geometry_shader_prepare(dgs, draw);
909   }
910   else {
911      draw->gs.geometry_shader = NULL;
912      draw->gs.num_gs_outputs = 0;
913   }
914}
915
916void draw_delete_geometry_shader(struct draw_context *draw,
917                                 struct draw_geometry_shader *dgs)
918{
919   int i;
920   if (!dgs) {
921      return;
922   }
923#ifdef HAVE_LLVM
924   if (draw->llvm) {
925      struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs);
926      struct draw_gs_llvm_variant_list_item *li;
927
928      li = first_elem(&shader->variants);
929      while(!at_end(&shader->variants, li)) {
930         struct draw_gs_llvm_variant_list_item *next = next_elem(li);
931         draw_gs_llvm_destroy_variant(li->base);
932         li = next;
933      }
934
935      assert(shader->variants_cached == 0);
936
937      if (dgs->llvm_prim_lengths) {
938         unsigned i;
939         for (i = 0; i < dgs->max_out_prims; ++i) {
940            align_free(dgs->llvm_prim_lengths[i]);
941         }
942         FREE(dgs->llvm_prim_lengths);
943      }
944      align_free(dgs->llvm_emitted_primitives);
945      align_free(dgs->llvm_emitted_vertices);
946      align_free(dgs->llvm_prim_ids);
947
948      align_free(dgs->gs_input);
949   }
950#endif
951
952   for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++)
953      FREE(dgs->stream[i].primitive_lengths);
954   FREE((void*) dgs->state.tokens);
955   FREE(dgs);
956}
957
958
959#ifdef HAVE_LLVM
960void draw_gs_set_current_variant(struct draw_geometry_shader *shader,
961                                 struct draw_gs_llvm_variant *variant)
962{
963   shader->current_variant = variant;
964}
965#endif
966
967/*
968 * Called at the very begin of the draw call with a new instance
969 * Used to reset state that should persist between primitive restart.
970 */
971void
972draw_geometry_shader_new_instance(struct draw_geometry_shader *gs)
973{
974   if (!gs)
975      return;
976
977   gs->in_prim_idx = 0;
978}
979