1/*
2 * Copyright © 2006 - 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_compiler.h"
25#include "brw_eu.h"
26
27#include "dev/intel_debug.h"
28
29struct brw_sf_compile {
30   struct brw_codegen func;
31   struct brw_sf_prog_key key;
32   struct brw_sf_prog_data prog_data;
33
34   struct brw_reg pv;
35   struct brw_reg det;
36   struct brw_reg dx0;
37   struct brw_reg dx2;
38   struct brw_reg dy0;
39   struct brw_reg dy2;
40
41   /* z and 1/w passed in seperately:
42    */
43   struct brw_reg z[3];
44   struct brw_reg inv_w[3];
45
46   /* The vertices:
47    */
48   struct brw_reg vert[3];
49
50    /* Temporaries, allocated after last vertex reg.
51    */
52   struct brw_reg inv_det;
53   struct brw_reg a1_sub_a0;
54   struct brw_reg a2_sub_a0;
55   struct brw_reg tmp;
56
57   struct brw_reg m1Cx;
58   struct brw_reg m2Cy;
59   struct brw_reg m3C0;
60
61   GLuint nr_verts;
62   GLuint nr_attr_regs;
63   GLuint nr_setup_regs;
64   int urb_entry_read_offset;
65
66   /** The last known value of the f0.0 flag register. */
67   unsigned flag_value;
68
69   struct brw_vue_map vue_map;
70};
71
72/**
73 * Determine the vue slot corresponding to the given half of the given register.
74 */
75static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
76                                       int half)
77{
78   return (reg + c->urb_entry_read_offset) * 2 + half;
79}
80
81/**
82 * Determine the varying corresponding to the given half of the given
83 * register.  half=0 means the first half of a register, half=1 means the
84 * second half.
85 */
86static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
87                                      int half)
88{
89   int vue_slot = vert_reg_to_vue_slot(c, reg, half);
90   return c->vue_map.slot_to_varying[vue_slot];
91}
92
93/**
94 * Determine the register corresponding to the given vue slot
95 */
96static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
97                                   struct brw_reg vert,
98                                   int vue_slot)
99{
100   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
101   GLuint sub = vue_slot % 2;
102
103   return brw_vec4_grf(vert.nr + off, sub * 4);
104}
105
106/**
107 * Determine the register corresponding to the given varying.
108 */
109static struct brw_reg get_varying(struct brw_sf_compile *c,
110                                  struct brw_reg vert,
111                                  GLuint varying)
112{
113   int vue_slot = c->vue_map.varying_to_slot[varying];
114   assert (vue_slot >= c->urb_entry_read_offset);
115   return get_vue_slot(c, vert, vue_slot);
116}
117
118static bool
119have_attr(struct brw_sf_compile *c, GLuint attr)
120{
121   return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
122}
123
124/***********************************************************************
125 * Twoside lighting
126 */
127static void copy_bfc( struct brw_sf_compile *c,
128		      struct brw_reg vert )
129{
130   struct brw_codegen *p = &c->func;
131   GLuint i;
132
133   for (i = 0; i < 2; i++) {
134      if (have_attr(c, VARYING_SLOT_COL0+i) &&
135	  have_attr(c, VARYING_SLOT_BFC0+i))
136	 brw_MOV(p,
137		 get_varying(c, vert, VARYING_SLOT_COL0+i),
138		 get_varying(c, vert, VARYING_SLOT_BFC0+i));
139   }
140}
141
142
143static void do_twoside_color( struct brw_sf_compile *c )
144{
145   struct brw_codegen *p = &c->func;
146   GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
147
148   /* Already done in clip program:
149    */
150   if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
151      return;
152
153   /* If the vertex shader provides backface color, do the selection. The VS
154    * promises to set up the front color if the backface color is provided, but
155    * it may contain junk if never written to.
156    */
157   if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
158       !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
159      return;
160
161   /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
162    * to get all channels active inside the IF.  In the clipping code
163    * we run with NoMask, so it's not an option and we can use
164    * BRW_EXECUTE_1 for all comparisions.
165    */
166   brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
167   brw_IF(p, BRW_EXECUTE_4);
168   {
169      switch (c->nr_verts) {
170      case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;
171      case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;
172      case 1: copy_bfc(c, c->vert[0]);
173      }
174   }
175   brw_ENDIF(p);
176}
177
178
179
180/***********************************************************************
181 * Flat shading
182 */
183
184static void copy_flatshaded_attributes(struct brw_sf_compile *c,
185                                       struct brw_reg dst,
186                                       struct brw_reg src)
187{
188   struct brw_codegen *p = &c->func;
189   int i;
190
191   for (i = 0; i < c->vue_map.num_slots; i++) {
192      if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
193         brw_MOV(p,
194                 get_vue_slot(c, dst, i),
195                 get_vue_slot(c, src, i));
196      }
197   }
198}
199
200static int count_flatshaded_attributes(struct brw_sf_compile *c)
201{
202   int i;
203   int count = 0;
204
205   for (i = 0; i < c->vue_map.num_slots; i++)
206      if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
207         count++;
208
209   return count;
210}
211
212
213
214/* Need to use a computed jump to copy flatshaded attributes as the
215 * vertices are ordered according to y-coordinate before reaching this
216 * point, so the PV could be anywhere.
217 */
218static void do_flatshade_triangle( struct brw_sf_compile *c )
219{
220   struct brw_codegen *p = &c->func;
221   GLuint nr;
222   GLuint jmpi = 1;
223
224   /* Already done in clip program:
225    */
226   if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
227      return;
228
229   if (p->devinfo->ver == 5)
230       jmpi = 2;
231
232   nr = count_flatshaded_attributes(c);
233
234   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
235   brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
236
237   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
238   copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
239   brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
240
241   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
242   copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
243   brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
244
245   copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
246   copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
247}
248
249
250static void do_flatshade_line( struct brw_sf_compile *c )
251{
252   struct brw_codegen *p = &c->func;
253   GLuint nr;
254   GLuint jmpi = 1;
255
256   /* Already done in clip program:
257    */
258   if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
259      return;
260
261   if (p->devinfo->ver == 5)
262       jmpi = 2;
263
264   nr = count_flatshaded_attributes(c);
265
266   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
267   brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
268   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
269
270   brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
271   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
272}
273
274
275/***********************************************************************
276 * Triangle setup.
277 */
278
279
280static void alloc_regs( struct brw_sf_compile *c )
281{
282   GLuint reg, i;
283
284   /* Values computed by fixed function unit:
285    */
286   c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
287   c->det = brw_vec1_grf(1, 2);
288   c->dx0 = brw_vec1_grf(1, 3);
289   c->dx2 = brw_vec1_grf(1, 4);
290   c->dy0 = brw_vec1_grf(1, 5);
291   c->dy2 = brw_vec1_grf(1, 6);
292
293   /* z and 1/w passed in seperately:
294    */
295   c->z[0]     = brw_vec1_grf(2, 0);
296   c->inv_w[0] = brw_vec1_grf(2, 1);
297   c->z[1]     = brw_vec1_grf(2, 2);
298   c->inv_w[1] = brw_vec1_grf(2, 3);
299   c->z[2]     = brw_vec1_grf(2, 4);
300   c->inv_w[2] = brw_vec1_grf(2, 5);
301
302   /* The vertices:
303    */
304   reg = 3;
305   for (i = 0; i < c->nr_verts; i++) {
306      c->vert[i] = brw_vec8_grf(reg, 0);
307      reg += c->nr_attr_regs;
308   }
309
310   /* Temporaries, allocated after last vertex reg.
311    */
312   c->inv_det = brw_vec1_grf(reg, 0);  reg++;
313   c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
314   c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
315   c->tmp = brw_vec8_grf(reg, 0);  reg++;
316
317   /* Note grf allocation:
318    */
319   c->prog_data.total_grf = reg;
320
321
322   /* Outputs of this program - interpolation coefficients for
323    * rasterization:
324    */
325   c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
326   c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
327   c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
328}
329
330
331static void copy_z_inv_w( struct brw_sf_compile *c )
332{
333   struct brw_codegen *p = &c->func;
334   GLuint i;
335
336   /* Copy both scalars with a single MOV:
337    */
338   for (i = 0; i < c->nr_verts; i++)
339      brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
340}
341
342
343static void invert_det( struct brw_sf_compile *c)
344{
345   /* Looks like we invert all 8 elements just to get 1/det in
346    * position 2 !?!
347    */
348   gfx4_math(&c->func,
349	     c->inv_det,
350	     BRW_MATH_FUNCTION_INV,
351	     0,
352	     c->det,
353	     BRW_MATH_PRECISION_FULL);
354
355}
356
357
358static bool
359calculate_masks(struct brw_sf_compile *c,
360                GLuint reg,
361                GLushort *pc,
362                GLushort *pc_persp,
363                GLushort *pc_linear)
364{
365   bool is_last_attr = (reg == c->nr_setup_regs - 1);
366   enum glsl_interp_mode interp;
367
368   *pc_persp = 0;
369   *pc_linear = 0;
370   *pc = 0xf;
371
372   interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
373   if (interp == INTERP_MODE_SMOOTH) {
374      *pc_linear = 0xf;
375      *pc_persp = 0xf;
376   } else if (interp == INTERP_MODE_NOPERSPECTIVE)
377      *pc_linear = 0xf;
378
379   /* Maybe only processs one attribute on the final round:
380    */
381   if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
382      *pc |= 0xf0;
383
384      interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
385      if (interp == INTERP_MODE_SMOOTH) {
386         *pc_linear |= 0xf0;
387         *pc_persp |= 0xf0;
388      } else if (interp == INTERP_MODE_NOPERSPECTIVE)
389         *pc_linear |= 0xf0;
390   }
391
392   return is_last_attr;
393}
394
395/* Calculates the predicate control for which channels of a reg
396 * (containing 2 attrs) to do point sprite coordinate replacement on.
397 */
398static uint16_t
399calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
400{
401   int varying1, varying2;
402   uint16_t pc = 0;
403
404   varying1 = vert_reg_to_varying(c, reg, 0);
405   if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
406      if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
407	 pc |= 0x0f;
408   }
409   if (varying1 == BRW_VARYING_SLOT_PNTC)
410      pc |= 0x0f;
411
412   varying2 = vert_reg_to_varying(c, reg, 1);
413   if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
414      if (c->key.point_sprite_coord_replace & (1 << (varying2 -
415                                                     VARYING_SLOT_TEX0)))
416         pc |= 0xf0;
417   }
418   if (varying2 == BRW_VARYING_SLOT_PNTC)
419      pc |= 0xf0;
420
421   return pc;
422}
423
424static void
425set_predicate_control_flag_value(struct brw_codegen *p,
426                                 struct brw_sf_compile *c,
427                                 unsigned value)
428{
429   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
430
431   if (value != 0xff) {
432      if (value != c->flag_value) {
433         brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
434         c->flag_value = value;
435      }
436
437      brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
438   }
439}
440
441static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
442{
443   struct brw_codegen *p = &c->func;
444   GLuint i;
445
446   c->flag_value = 0xff;
447   c->nr_verts = 3;
448
449   if (allocate)
450      alloc_regs(c);
451
452   invert_det(c);
453   copy_z_inv_w(c);
454
455   if (c->key.do_twoside_color)
456      do_twoside_color(c);
457
458   if (c->key.contains_flat_varying)
459      do_flatshade_triangle(c);
460
461
462   for (i = 0; i < c->nr_setup_regs; i++)
463   {
464      /* Pair of incoming attributes:
465       */
466      struct brw_reg a0 = offset(c->vert[0], i);
467      struct brw_reg a1 = offset(c->vert[1], i);
468      struct brw_reg a2 = offset(c->vert[2], i);
469      GLushort pc, pc_persp, pc_linear;
470      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
471
472      if (pc_persp)
473      {
474	 set_predicate_control_flag_value(p, c, pc_persp);
475	 brw_MUL(p, a0, a0, c->inv_w[0]);
476	 brw_MUL(p, a1, a1, c->inv_w[1]);
477	 brw_MUL(p, a2, a2, c->inv_w[2]);
478      }
479
480
481      /* Calculate coefficients for interpolated values:
482       */
483      if (pc_linear)
484      {
485	 set_predicate_control_flag_value(p, c, pc_linear);
486
487	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
488	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
489
490	 /* calculate dA/dx
491	  */
492	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
493	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
494	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
495
496	 /* calculate dA/dy
497	  */
498	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
499	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
500	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
501      }
502
503      {
504	 set_predicate_control_flag_value(p, c, pc);
505	 /* start point for interpolation
506	  */
507	 brw_MOV(p, c->m3C0, a0);
508
509	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
510	  * the send instruction:
511	  */
512	 brw_urb_WRITE(p,
513		       brw_null_reg(),
514		       0,
515		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
516                       last ? BRW_URB_WRITE_EOT_COMPLETE
517                       : BRW_URB_WRITE_NO_FLAGS,
518		       4, 	/* msg len */
519		       0,	/* response len */
520		       i*4,	/* offset */
521		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
522      }
523   }
524
525   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
526}
527
528
529
530static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
531{
532   struct brw_codegen *p = &c->func;
533   GLuint i;
534
535   c->flag_value = 0xff;
536   c->nr_verts = 2;
537
538   if (allocate)
539      alloc_regs(c);
540
541   invert_det(c);
542   copy_z_inv_w(c);
543
544   if (c->key.contains_flat_varying)
545      do_flatshade_line(c);
546
547   for (i = 0; i < c->nr_setup_regs; i++)
548   {
549      /* Pair of incoming attributes:
550       */
551      struct brw_reg a0 = offset(c->vert[0], i);
552      struct brw_reg a1 = offset(c->vert[1], i);
553      GLushort pc, pc_persp, pc_linear;
554      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
555
556      if (pc_persp)
557      {
558	 set_predicate_control_flag_value(p, c, pc_persp);
559	 brw_MUL(p, a0, a0, c->inv_w[0]);
560	 brw_MUL(p, a1, a1, c->inv_w[1]);
561      }
562
563      /* Calculate coefficients for position, color:
564       */
565      if (pc_linear) {
566	 set_predicate_control_flag_value(p, c, pc_linear);
567
568	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
569
570	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
571	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
572
573	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
574	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
575      }
576
577      {
578	 set_predicate_control_flag_value(p, c, pc);
579
580	 /* start point for interpolation
581	  */
582	 brw_MOV(p, c->m3C0, a0);
583
584	 /* Copy m0..m3 to URB.
585	  */
586	 brw_urb_WRITE(p,
587		       brw_null_reg(),
588		       0,
589		       brw_vec8_grf(0, 0),
590                       last ? BRW_URB_WRITE_EOT_COMPLETE
591                       : BRW_URB_WRITE_NO_FLAGS,
592		       4, 	/* msg len */
593		       0,	/* response len */
594		       i*4,	/* urb destination offset */
595		       BRW_URB_SWIZZLE_TRANSPOSE);
596      }
597   }
598
599   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
600}
601
602static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
603{
604   struct brw_codegen *p = &c->func;
605   GLuint i;
606
607   c->flag_value = 0xff;
608   c->nr_verts = 1;
609
610   if (allocate)
611      alloc_regs(c);
612
613   copy_z_inv_w(c);
614   for (i = 0; i < c->nr_setup_regs; i++)
615   {
616      struct brw_reg a0 = offset(c->vert[0], i);
617      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
618      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
619
620      pc_coord_replace = calculate_point_sprite_mask(c, i);
621      pc_persp &= ~pc_coord_replace;
622
623      if (pc_persp) {
624	 set_predicate_control_flag_value(p, c, pc_persp);
625	 brw_MUL(p, a0, a0, c->inv_w[0]);
626      }
627
628      /* Point sprite coordinate replacement: A texcoord with this
629       * enabled gets replaced with the value (x, y, 0, 1) where x and
630       * y vary from 0 to 1 across the horizontal and vertical of the
631       * point.
632       */
633      if (pc_coord_replace) {
634	 set_predicate_control_flag_value(p, c, pc_coord_replace);
635	 /* Caculate 1.0/PointWidth */
636	 gfx4_math(&c->func,
637		   c->tmp,
638		   BRW_MATH_FUNCTION_INV,
639		   0,
640		   c->dx0,
641		   BRW_MATH_PRECISION_FULL);
642
643	 brw_set_default_access_mode(p, BRW_ALIGN_16);
644
645	 /* dA/dx, dA/dy */
646	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
647	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
648	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
649	 if (c->key.sprite_origin_lower_left) {
650	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
651	 } else {
652	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
653	 }
654
655	 /* attribute constant offset */
656	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
657	 if (c->key.sprite_origin_lower_left) {
658	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
659	 } else {
660	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
661	 }
662
663	 brw_set_default_access_mode(p, BRW_ALIGN_1);
664      }
665
666      if (pc & ~pc_coord_replace) {
667	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
668	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
669	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
670	 brw_MOV(p, c->m3C0, a0); /* constant value */
671      }
672
673
674      set_predicate_control_flag_value(p, c, pc);
675      /* Copy m0..m3 to URB. */
676      brw_urb_WRITE(p,
677		    brw_null_reg(),
678		    0,
679		    brw_vec8_grf(0, 0),
680                    last ? BRW_URB_WRITE_EOT_COMPLETE
681                    : BRW_URB_WRITE_NO_FLAGS,
682		    4, 	/* msg len */
683		    0,	/* response len */
684		    i*4,	/* urb destination offset */
685		    BRW_URB_SWIZZLE_TRANSPOSE);
686   }
687
688   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
689}
690
691/* Points setup - several simplifications as all attributes are
692 * constant across the face of the point (point sprites excluded!)
693 */
694static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
695{
696   struct brw_codegen *p = &c->func;
697   GLuint i;
698
699   c->flag_value = 0xff;
700   c->nr_verts = 1;
701
702   if (allocate)
703      alloc_regs(c);
704
705   copy_z_inv_w(c);
706
707   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
708   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
709
710   for (i = 0; i < c->nr_setup_regs; i++)
711   {
712      struct brw_reg a0 = offset(c->vert[0], i);
713      GLushort pc, pc_persp, pc_linear;
714      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
715
716      if (pc_persp)
717      {
718	 /* This seems odd as the values are all constant, but the
719	  * fragment shader will be expecting it:
720	  */
721	 set_predicate_control_flag_value(p, c, pc_persp);
722	 brw_MUL(p, a0, a0, c->inv_w[0]);
723      }
724
725
726      /* The delta values are always zero, just send the starting
727       * coordinate.  Again, this is to fit in with the interpolation
728       * code in the fragment shader.
729       */
730      {
731	 set_predicate_control_flag_value(p, c, pc);
732
733	 brw_MOV(p, c->m3C0, a0); /* constant value */
734
735	 /* Copy m0..m3 to URB.
736	  */
737	 brw_urb_WRITE(p,
738		       brw_null_reg(),
739		       0,
740		       brw_vec8_grf(0, 0),
741                       last ? BRW_URB_WRITE_EOT_COMPLETE
742                       : BRW_URB_WRITE_NO_FLAGS,
743		       4, 	/* msg len */
744		       0,	/* response len */
745		       i*4,	/* urb destination offset */
746		       BRW_URB_SWIZZLE_TRANSPOSE);
747      }
748   }
749
750   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
751}
752
753static void brw_emit_anyprim_setup( struct brw_sf_compile *c )
754{
755   struct brw_codegen *p = &c->func;
756   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
757   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
758   struct brw_reg primmask;
759   int jmp;
760   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
761
762   c->nr_verts = 3;
763   alloc_regs(c);
764
765   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
766
767   brw_MOV(p, primmask, brw_imm_ud(1));
768   brw_SHL(p, primmask, primmask, payload_prim);
769
770   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
771					       (1<<_3DPRIM_TRISTRIP) |
772					       (1<<_3DPRIM_TRIFAN) |
773					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
774					       (1<<_3DPRIM_POLYGON) |
775					       (1<<_3DPRIM_RECTLIST) |
776					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
777   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
778   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
779   brw_emit_tri_setup(c, false);
780   brw_land_fwd_jump(p, jmp);
781
782   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
783					       (1<<_3DPRIM_LINESTRIP) |
784					       (1<<_3DPRIM_LINELOOP) |
785					       (1<<_3DPRIM_LINESTRIP_CONT) |
786					       (1<<_3DPRIM_LINESTRIP_BF) |
787					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
788   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
789   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
790   brw_emit_line_setup(c, false);
791   brw_land_fwd_jump(p, jmp);
792
793   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
794   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
795   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
796   brw_emit_point_sprite_setup(c, false);
797   brw_land_fwd_jump(p, jmp);
798
799   brw_emit_point_setup( c, false );
800}
801
802const unsigned *
803brw_compile_sf(const struct brw_compiler *compiler,
804               void *mem_ctx,
805               const struct brw_sf_prog_key *key,
806               struct brw_sf_prog_data *prog_data,
807               struct brw_vue_map *vue_map,
808               unsigned *final_assembly_size)
809{
810   struct brw_sf_compile c;
811   memset(&c, 0, sizeof(c));
812
813   /* Begin the compilation:
814    */
815   brw_init_codegen(compiler->devinfo, &c.func, mem_ctx);
816
817   c.key = *key;
818   c.vue_map = *vue_map;
819   if (c.key.do_point_coord) {
820      /*
821       * gl_PointCoord is a FS instead of VS builtin variable, thus it's
822       * not included in c.vue_map generated in VS stage. Here we add
823       * it manually to let SF shader generate the needed interpolation
824       * coefficient for FS shader.
825       */
826      c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
827      c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
828   }
829   c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
830   c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
831   c.nr_setup_regs = c.nr_attr_regs;
832
833   c.prog_data.urb_read_length = c.nr_attr_regs;
834   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
835
836   /* Which primitive?  Or all three?
837    */
838   switch (key->primitive) {
839   case BRW_SF_PRIM_TRIANGLES:
840      c.nr_verts = 3;
841      brw_emit_tri_setup( &c, true );
842      break;
843   case BRW_SF_PRIM_LINES:
844      c.nr_verts = 2;
845      brw_emit_line_setup( &c, true );
846      break;
847   case BRW_SF_PRIM_POINTS:
848      c.nr_verts = 1;
849      if (key->do_point_sprite)
850	  brw_emit_point_sprite_setup( &c, true );
851      else
852	  brw_emit_point_setup( &c, true );
853      break;
854   case BRW_SF_PRIM_UNFILLED_TRIS:
855      c.nr_verts = 3;
856      brw_emit_anyprim_setup( &c );
857      break;
858   default:
859      unreachable("not reached");
860   }
861
862   /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
863    * source). Compacting would be difficult.
864    */
865   /* brw_compact_instructions(&c.func, 0, 0, NULL); */
866
867   *prog_data = c.prog_data;
868
869   const unsigned *program = brw_get_program(&c.func, final_assembly_size);
870
871   if (INTEL_DEBUG(DEBUG_SF)) {
872      fprintf(stderr, "sf:\n");
873      brw_disassemble_with_labels(compiler->devinfo,
874                                  program, 0, *final_assembly_size, stderr);
875      fprintf(stderr, "\n");
876   }
877
878   return program;
879}
880