1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32#include "brw_compiler.h"
33#include "brw_eu.h"
34
35#include "dev/intel_debug.h"
36
37#define MAX_GS_VERTS (4)
38
39struct brw_ff_gs_compile {
40   struct brw_codegen func;
41   struct brw_ff_gs_prog_key key;
42   struct brw_ff_gs_prog_data *prog_data;
43
44   struct {
45      struct brw_reg R0;
46
47      /**
48       * Register holding streamed vertex buffer pointers -- see the Sandy
49       * Bridge PRM, volume 2 part 1, section 4.4.2 (GS Thread Payload
50       * [DevSNB]).  These pointers are delivered in GRF 1.
51       */
52      struct brw_reg SVBI;
53
54      struct brw_reg vertex[MAX_GS_VERTS];
55      struct brw_reg header;
56      struct brw_reg temp;
57
58      /**
59       * Register holding destination indices for streamed buffer writes.
60       * Only used for SOL programs.
61       */
62      struct brw_reg destination_indices;
63   } reg;
64
65   /* Number of registers used to store vertex data */
66   GLuint nr_regs;
67
68   struct brw_vue_map vue_map;
69};
70
71/**
72 * Allocate registers for GS.
73 *
74 * If sol_program is true, then:
75 *
76 * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF
77 *   1 needs to be set aside to hold the streamed vertex buffer indices.
78 *
79 * - The thread will need to use the destination_indices register.
80 */
81static void brw_ff_gs_alloc_regs(struct brw_ff_gs_compile *c,
82                                 GLuint nr_verts,
83                                 bool sol_program)
84{
85   GLuint i = 0,j;
86
87   /* Register usage is static, precompute here:
88    */
89   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
90
91   /* Streamed vertex buffer indices */
92   if (sol_program)
93      c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
94
95   /* Payload vertices plus space for more generated vertices:
96    */
97   for (j = 0; j < nr_verts; j++) {
98      c->reg.vertex[j] = brw_vec4_grf(i, 0);
99      i += c->nr_regs;
100   }
101
102   c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
103   c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
104
105   if (sol_program) {
106      c->reg.destination_indices =
107         retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD);
108   }
109
110   c->prog_data->urb_read_length = c->nr_regs;
111   c->prog_data->total_grf = i;
112}
113
114
115/**
116 * Set up the initial value of c->reg.header register based on c->reg.R0.
117 *
118 * The following information is passed to the GS thread in R0, and needs to be
119 * included in the first URB_WRITE or FF_SYNC message sent by the GS:
120 *
121 * - DWORD 0 [31:0] handle info (Gen4 only)
122 * - DWORD 5 [7:0] FFTID
123 * - DWORD 6 [31:0] Debug info
124 * - DWORD 7 [31:0] Debug info
125 *
126 * This function sets up the above data by copying by copying the contents of
127 * R0 to the header register.
128 */
129static void brw_ff_gs_initialize_header(struct brw_ff_gs_compile *c)
130{
131   struct brw_codegen *p = &c->func;
132   brw_MOV(p, c->reg.header, c->reg.R0);
133}
134
135/**
136 * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value.
137 *
138 * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart,
139 * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we
140 * need to be able to update on a per-vertex basis.
141 */
142static void brw_ff_gs_overwrite_header_dw2(struct brw_ff_gs_compile *c,
143                                           unsigned dw2)
144{
145   struct brw_codegen *p = &c->func;
146   brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2));
147}
148
149/**
150 * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0.
151 *
152 * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0
153 * of DWORD 2.  URB_WRITE messages need the primitive type in bits 6:2 of
154 * DWORD 2.  So this function extracts the primitive type field, bitshifts it
155 * appropriately, and stores it in c->reg.header.
156 */
157static void brw_ff_gs_overwrite_header_dw2_from_r0(struct brw_ff_gs_compile *c)
158{
159   struct brw_codegen *p = &c->func;
160   brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2),
161           brw_imm_ud(0x1f));
162   brw_SHL(p, get_element_ud(c->reg.header, 2),
163           get_element_ud(c->reg.header, 2), brw_imm_ud(2));
164}
165
166/**
167 * Apply an additive offset to DWORD 2 of c->reg.header.
168 *
169 * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately
170 * for each vertex.
171 */
172static void brw_ff_gs_offset_header_dw2(struct brw_ff_gs_compile *c,
173                                        int offset)
174{
175   struct brw_codegen *p = &c->func;
176   brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2),
177           brw_imm_d(offset));
178}
179
180
181/**
182 * Emit a vertex using the URB_WRITE message.  Use the contents of
183 * c->reg.header for the message header, and the registers starting at \c vert
184 * for the vertex data.
185 *
186 * If \c last is true, then this is the last vertex, so no further URB space
187 * should be allocated, and this message should end the thread.
188 *
189 * If \c last is false, then a new URB entry will be allocated, and its handle
190 * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE
191 * message.
192 */
193static void brw_ff_gs_emit_vue(struct brw_ff_gs_compile *c,
194                               struct brw_reg vert,
195                               bool last)
196{
197   struct brw_codegen *p = &c->func;
198   int write_offset = 0;
199   bool complete = false;
200
201   do {
202      /* We can't write more than 14 registers at a time to the URB */
203      int write_len = MIN2(c->nr_regs - write_offset, 14);
204      if (write_len == c->nr_regs - write_offset)
205         complete = true;
206
207      /* Copy the vertex from vertn into m1..mN+1:
208       */
209      brw_copy8(p, brw_message_reg(1), offset(vert, write_offset), write_len);
210
211      /* Send the vertex data to the URB.  If this is the last write for this
212       * vertex, then we mark it as complete, and either end the thread or
213       * allocate another vertex URB entry (depending whether this is the last
214       * vertex).
215       */
216      enum brw_urb_write_flags flags;
217      if (!complete)
218         flags = BRW_URB_WRITE_NO_FLAGS;
219      else if (last)
220         flags = BRW_URB_WRITE_EOT_COMPLETE;
221      else
222         flags = BRW_URB_WRITE_ALLOCATE_COMPLETE;
223      brw_urb_WRITE(p,
224                    (flags & BRW_URB_WRITE_ALLOCATE) ? c->reg.temp
225                    : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
226                    0,
227                    c->reg.header,
228                    flags,
229                    write_len + 1, /* msg length */
230                    (flags & BRW_URB_WRITE_ALLOCATE) ? 1
231                    : 0, /* response length */
232                    write_offset,  /* urb offset */
233                    BRW_URB_SWIZZLE_NONE);
234      write_offset += write_len;
235   } while (!complete);
236
237   if (!last) {
238      brw_MOV(p, get_element_ud(c->reg.header, 0),
239              get_element_ud(c->reg.temp, 0));
240   }
241}
242
243/**
244 * Send an FF_SYNC message to ensure that all previously spawned GS threads
245 * have finished sending primitives down the pipeline, and to allocate a URB
246 * entry for the first output vertex.  Only needed on Ironlake+.
247 *
248 * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which
249 * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to
250 * the allocated URB entry (which will be needed by the URB_WRITE meesage that
251 * follows).
252 */
253static void brw_ff_gs_ff_sync(struct brw_ff_gs_compile *c, int num_prim)
254{
255   struct brw_codegen *p = &c->func;
256
257   brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim));
258   brw_ff_sync(p,
259               c->reg.temp,
260               0,
261               c->reg.header,
262               1, /* allocate */
263               1, /* response length */
264               0 /* eot */);
265   brw_MOV(p, get_element_ud(c->reg.header, 0),
266           get_element_ud(c->reg.temp, 0));
267}
268
269
270static void
271brw_ff_gs_quads(struct brw_ff_gs_compile *c,
272		const struct brw_ff_gs_prog_key *key)
273{
274   brw_ff_gs_alloc_regs(c, 4, false);
275   brw_ff_gs_initialize_header(c);
276   /* Use polygons for correct edgeflag behaviour. Note that vertex 3
277    * is the PV for quads, but vertex 0 for polygons:
278    */
279   if (c->func.devinfo->ver == 5)
280      brw_ff_gs_ff_sync(c, 1);
281   brw_ff_gs_overwrite_header_dw2(
282      c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
283          | URB_WRITE_PRIM_START));
284   if (key->pv_first) {
285      brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
286      brw_ff_gs_overwrite_header_dw2(
287         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
288      brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
289      brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
290      brw_ff_gs_overwrite_header_dw2(
291         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
292             | URB_WRITE_PRIM_END));
293      brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1);
294   }
295   else {
296      brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0);
297      brw_ff_gs_overwrite_header_dw2(
298         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
299      brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
300      brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
301      brw_ff_gs_overwrite_header_dw2(
302         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
303             | URB_WRITE_PRIM_END));
304      brw_ff_gs_emit_vue(c, c->reg.vertex[2], 1);
305   }
306}
307
308static void
309brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c,
310                     const struct brw_ff_gs_prog_key *key)
311{
312   brw_ff_gs_alloc_regs(c, 4, false);
313   brw_ff_gs_initialize_header(c);
314
315   if (c->func.devinfo->ver == 5)
316      brw_ff_gs_ff_sync(c, 1);
317   brw_ff_gs_overwrite_header_dw2(
318      c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
319          | URB_WRITE_PRIM_START));
320   if (key->pv_first) {
321      brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
322      brw_ff_gs_overwrite_header_dw2(
323         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
324      brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0);
325      brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
326      brw_ff_gs_overwrite_header_dw2(
327         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
328             | URB_WRITE_PRIM_END));
329      brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1);
330   }
331   else {
332      brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0);
333      brw_ff_gs_overwrite_header_dw2(
334         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
335      brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0);
336      brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
337      brw_ff_gs_overwrite_header_dw2(
338         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
339             | URB_WRITE_PRIM_END));
340      brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1);
341   }
342}
343
344static void brw_ff_gs_lines(struct brw_ff_gs_compile *c)
345{
346   brw_ff_gs_alloc_regs(c, 2, false);
347   brw_ff_gs_initialize_header(c);
348
349   if (c->func.devinfo->ver == 5)
350      brw_ff_gs_ff_sync(c, 1);
351   brw_ff_gs_overwrite_header_dw2(
352      c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
353          | URB_WRITE_PRIM_START));
354   brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0);
355   brw_ff_gs_overwrite_header_dw2(
356      c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
357          | URB_WRITE_PRIM_END));
358   brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1);
359}
360
361/**
362 * Generate the geometry shader program used on Gen6 to perform stream output
363 * (transform feedback).
364 */
365static void
366gfx6_sol_program(struct brw_ff_gs_compile *c, const struct brw_ff_gs_prog_key *key,
367                 unsigned num_verts, bool check_edge_flags)
368{
369   struct brw_codegen *p = &c->func;
370   brw_inst *inst;
371   c->prog_data->svbi_postincrement_value = num_verts;
372
373   brw_ff_gs_alloc_regs(c, num_verts, true);
374   brw_ff_gs_initialize_header(c);
375
376   if (key->num_transform_feedback_bindings > 0) {
377      unsigned vertex, binding;
378      struct brw_reg destination_indices_uw =
379         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));
380
381      /* Note: since we use the binding table to keep track of buffer offsets
382       * and stride, the GS doesn't need to keep track of a separate pointer
383       * into each buffer; it uses a single pointer which increments by 1 for
384       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
385       * transform feedback is in interleaved or separate attribs mode.
386       *
387       * Make sure that the buffers have enough room for all the vertices.
388       */
389      brw_ADD(p, get_element_ud(c->reg.temp, 0),
390                 get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
391      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
392                 get_element_ud(c->reg.temp, 0),
393                 get_element_ud(c->reg.SVBI, 4));
394      brw_IF(p, BRW_EXECUTE_1);
395
396      /* Compute the destination indices to write to.  Usually we use SVBI[0]
397       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
398       * vertices come down the pipeline in reversed winding order, so we need
399       * to flip the order when writing to the transform feedback buffer.  To
400       * ensure that flatshading accuracy is preserved, we need to write them
401       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
402       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
403       * the last provoking vertex convention.
404       *
405       * Note: since brw_imm_v can only be used in instructions in
406       * packed-word execution mode, and SVBI is a double-word, we need to
407       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
408       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
409       * using a separate instruction.  Also, since the immediate constant is
410       * expressed as packed words, and we need to load double-words into
411       * destination_indices, we need to intersperse zeros to fill the upper
412       * halves of each double-word.
413       */
414      brw_MOV(p, destination_indices_uw,
415              brw_imm_v(0x00020100)); /* (0, 1, 2) */
416      if (num_verts == 3) {
417         /* Get primitive type into temp register. */
418         brw_AND(p, get_element_ud(c->reg.temp, 0),
419                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));
420
421         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
422          * an 8-wide comparison so that the conditional MOV that follows
423          * moves all 8 words correctly.
424          */
425         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
426                 get_element_ud(c->reg.temp, 0),
427                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
428
429         /* If so, then overwrite destination_indices_uw with the appropriate
430          * reordering.
431          */
432         inst = brw_MOV(p, destination_indices_uw,
433                        brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
434                                                : 0x00020001)); /* (1, 0, 2) */
435         brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL);
436      }
437
438      assert(c->reg.destination_indices.width == BRW_EXECUTE_4);
439      brw_push_insn_state(p);
440      brw_set_default_exec_size(p, BRW_EXECUTE_4);
441      brw_ADD(p, c->reg.destination_indices,
442              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));
443      brw_pop_insn_state(p);
444      /* For each vertex, generate code to output each varying using the
445       * appropriate binding table entry.
446       */
447      for (vertex = 0; vertex < num_verts; ++vertex) {
448         /* Set up the correct destination index for this vertex */
449         brw_MOV(p, get_element_ud(c->reg.header, 5),
450                 get_element_ud(c->reg.destination_indices, vertex));
451
452         for (binding = 0; binding < key->num_transform_feedback_bindings;
453              ++binding) {
454            unsigned char varying =
455               key->transform_feedback_bindings[binding];
456            unsigned char slot = c->vue_map.varying_to_slot[varying];
457            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
458             *
459             *   "Prior to End of Thread with a URB_WRITE, the kernel must
460             *   ensure that all writes are complete by sending the final
461             *   write as a committed write."
462             */
463            bool final_write =
464               binding == key->num_transform_feedback_bindings - 1 &&
465               vertex == num_verts - 1;
466            struct brw_reg vertex_slot = c->reg.vertex[vertex];
467            vertex_slot.nr += slot / 2;
468            vertex_slot.subnr = (slot % 2) * 16;
469            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
470            vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ
471               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
472            brw_set_default_access_mode(p, BRW_ALIGN_16);
473            brw_push_insn_state(p);
474            brw_set_default_exec_size(p, BRW_EXECUTE_4);
475
476            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
477                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
478            brw_pop_insn_state(p);
479
480            brw_set_default_access_mode(p, BRW_ALIGN_1);
481            brw_svb_write(p,
482                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
483                          1, /* msg_reg_nr */
484                          c->reg.header, /* src0 */
485                          BRW_GFX6_SOL_BINDING_START + binding, /* binding_table_index */
486                          final_write); /* send_commit_msg */
487         }
488      }
489      brw_ENDIF(p);
490
491      /* Now, reinitialize the header register from R0 to restore the parts of
492       * the register that we overwrote while streaming out transform feedback
493       * data.
494       */
495      brw_ff_gs_initialize_header(c);
496
497      /* Finally, wait for the write commit to occur so that we can proceed to
498       * other things safely.
499       *
500       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
501       *
502       *   The write commit does not modify the destination register, but
503       *   merely clears the dependency associated with the destination
504       *   register. Thus, a simple “mov” instruction using the register as a
505       *   source is sufficient to wait for the write commit to occur.
506       */
507      brw_MOV(p, c->reg.temp, c->reg.temp);
508   }
509
510   brw_ff_gs_ff_sync(c, 1);
511
512   brw_ff_gs_overwrite_header_dw2_from_r0(c);
513   switch (num_verts) {
514   case 1:
515      brw_ff_gs_offset_header_dw2(c,
516                                  URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
517      brw_ff_gs_emit_vue(c, c->reg.vertex[0], true);
518      break;
519   case 2:
520      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
521      brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
522      brw_ff_gs_offset_header_dw2(c,
523                                  URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
524      brw_ff_gs_emit_vue(c, c->reg.vertex[1], true);
525      break;
526   case 3:
527      if (check_edge_flags) {
528         /* Only emit vertices 0 and 1 if this is the first triangle of the
529          * polygon.  Otherwise they are redundant.
530          */
531         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
532                 get_element_ud(c->reg.R0, 2),
533                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
534         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
535         brw_IF(p, BRW_EXECUTE_1);
536      }
537      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
538      brw_ff_gs_emit_vue(c, c->reg.vertex[0], false);
539      brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
540      brw_ff_gs_emit_vue(c, c->reg.vertex[1], false);
541      if (check_edge_flags) {
542         brw_ENDIF(p);
543         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
544          * of the polygon.  Otherwise leave the primitive incomplete because
545          * there are more polygon vertices coming.
546          */
547         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
548                 get_element_ud(c->reg.R0, 2),
549                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
550         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
551         brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
552      }
553      brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
554      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
555      brw_ff_gs_emit_vue(c, c->reg.vertex[2], true);
556      break;
557   }
558}
559
560const unsigned *
561brw_compile_ff_gs_prog(struct brw_compiler *compiler,
562		       void *mem_ctx,
563		       const struct brw_ff_gs_prog_key *key,
564		       struct brw_ff_gs_prog_data *prog_data,
565		       struct brw_vue_map *vue_map,
566		       unsigned *final_assembly_size)
567{
568   struct brw_ff_gs_compile c;
569   const GLuint *program;
570
571   memset(&c, 0, sizeof(c));
572
573   c.key = *key;
574   c.vue_map = *vue_map;
575   c.nr_regs = (c.vue_map.num_slots + 1)/2;
576   c.prog_data = prog_data;
577
578   mem_ctx = ralloc_context(NULL);
579
580   /* Begin the compilation:
581    */
582   brw_init_codegen(compiler->devinfo, &c.func, mem_ctx);
583
584   c.func.single_program_flow = 1;
585
586   /* For some reason the thread is spawned with only 4 channels
587    * unmasked.
588    */
589   brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE);
590
591   if (compiler->devinfo->ver >= 6) {
592      unsigned num_verts;
593      bool check_edge_flag;
594      /* On Sandybridge, we use the GS for implementing transform feedback
595       * (called "Stream Out" in the PRM).
596       */
597      switch (key->primitive) {
598      case _3DPRIM_POINTLIST:
599         num_verts = 1;
600         check_edge_flag = false;
601         break;
602      case _3DPRIM_LINELIST:
603      case _3DPRIM_LINESTRIP:
604      case _3DPRIM_LINELOOP:
605         num_verts = 2;
606         check_edge_flag = false;
607         break;
608      case _3DPRIM_TRILIST:
609      case _3DPRIM_TRIFAN:
610      case _3DPRIM_TRISTRIP:
611      case _3DPRIM_RECTLIST:
612         num_verts = 3;
613         check_edge_flag = false;
614         break;
615      case _3DPRIM_QUADLIST:
616      case _3DPRIM_QUADSTRIP:
617      case _3DPRIM_POLYGON:
618         num_verts = 3;
619         check_edge_flag = true;
620         break;
621      default:
622         unreachable("Unexpected primitive type in Gen6 SOL program.");
623      }
624      gfx6_sol_program(&c, key, num_verts, check_edge_flag);
625   } else {
626      /* On Gen4-5, we use the GS to decompose certain types of primitives.
627       * Note that primitives which don't require a GS program have already
628       * been weeded out by now.
629       */
630      switch (key->primitive) {
631      case _3DPRIM_QUADLIST:
632         brw_ff_gs_quads( &c, key );
633         break;
634      case _3DPRIM_QUADSTRIP:
635         brw_ff_gs_quad_strip( &c, key );
636         break;
637      case _3DPRIM_LINELOOP:
638         brw_ff_gs_lines( &c );
639         break;
640      default:
641         return NULL;
642      }
643   }
644
645   brw_compact_instructions(&c.func, 0, NULL);
646
647   /* get the program
648    */
649   program = brw_get_program(&c.func, final_assembly_size);
650
651   if (INTEL_DEBUG(DEBUG_GS)) {
652      fprintf(stderr, "gs:\n");
653      brw_disassemble_with_labels(compiler->devinfo, c.func.store,
654                                  0, *final_assembly_size, stderr);
655      fprintf(stderr, "\n");
656    }
657
658   return program;
659}
660
661