101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2006 - 2017 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#include "brw_compiler.h"
2501e04c3fSmrg#include "brw_eu.h"
2601e04c3fSmrg
277ec681f3Smrg#include "dev/intel_debug.h"
2801e04c3fSmrg
2901e04c3fSmrgstruct brw_sf_compile {
3001e04c3fSmrg   struct brw_codegen func;
3101e04c3fSmrg   struct brw_sf_prog_key key;
3201e04c3fSmrg   struct brw_sf_prog_data prog_data;
3301e04c3fSmrg
3401e04c3fSmrg   struct brw_reg pv;
3501e04c3fSmrg   struct brw_reg det;
3601e04c3fSmrg   struct brw_reg dx0;
3701e04c3fSmrg   struct brw_reg dx2;
3801e04c3fSmrg   struct brw_reg dy0;
3901e04c3fSmrg   struct brw_reg dy2;
4001e04c3fSmrg
4101e04c3fSmrg   /* z and 1/w passed in seperately:
4201e04c3fSmrg    */
4301e04c3fSmrg   struct brw_reg z[3];
4401e04c3fSmrg   struct brw_reg inv_w[3];
4501e04c3fSmrg
4601e04c3fSmrg   /* The vertices:
4701e04c3fSmrg    */
4801e04c3fSmrg   struct brw_reg vert[3];
4901e04c3fSmrg
5001e04c3fSmrg    /* Temporaries, allocated after last vertex reg.
5101e04c3fSmrg    */
5201e04c3fSmrg   struct brw_reg inv_det;
5301e04c3fSmrg   struct brw_reg a1_sub_a0;
5401e04c3fSmrg   struct brw_reg a2_sub_a0;
5501e04c3fSmrg   struct brw_reg tmp;
5601e04c3fSmrg
5701e04c3fSmrg   struct brw_reg m1Cx;
5801e04c3fSmrg   struct brw_reg m2Cy;
5901e04c3fSmrg   struct brw_reg m3C0;
6001e04c3fSmrg
6101e04c3fSmrg   GLuint nr_verts;
6201e04c3fSmrg   GLuint nr_attr_regs;
6301e04c3fSmrg   GLuint nr_setup_regs;
6401e04c3fSmrg   int urb_entry_read_offset;
6501e04c3fSmrg
6601e04c3fSmrg   /** The last known value of the f0.0 flag register. */
6701e04c3fSmrg   unsigned flag_value;
6801e04c3fSmrg
6901e04c3fSmrg   struct brw_vue_map vue_map;
7001e04c3fSmrg};
7101e04c3fSmrg
7201e04c3fSmrg/**
7301e04c3fSmrg * Determine the vue slot corresponding to the given half of the given register.
7401e04c3fSmrg */
7501e04c3fSmrgstatic inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
7601e04c3fSmrg                                       int half)
7701e04c3fSmrg{
7801e04c3fSmrg   return (reg + c->urb_entry_read_offset) * 2 + half;
7901e04c3fSmrg}
8001e04c3fSmrg
8101e04c3fSmrg/**
8201e04c3fSmrg * Determine the varying corresponding to the given half of the given
8301e04c3fSmrg * register.  half=0 means the first half of a register, half=1 means the
8401e04c3fSmrg * second half.
8501e04c3fSmrg */
8601e04c3fSmrgstatic inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
8701e04c3fSmrg                                      int half)
8801e04c3fSmrg{
8901e04c3fSmrg   int vue_slot = vert_reg_to_vue_slot(c, reg, half);
9001e04c3fSmrg   return c->vue_map.slot_to_varying[vue_slot];
9101e04c3fSmrg}
9201e04c3fSmrg
9301e04c3fSmrg/**
9401e04c3fSmrg * Determine the register corresponding to the given vue slot
9501e04c3fSmrg */
9601e04c3fSmrgstatic struct brw_reg get_vue_slot(struct brw_sf_compile *c,
9701e04c3fSmrg                                   struct brw_reg vert,
9801e04c3fSmrg                                   int vue_slot)
9901e04c3fSmrg{
10001e04c3fSmrg   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
10101e04c3fSmrg   GLuint sub = vue_slot % 2;
10201e04c3fSmrg
10301e04c3fSmrg   return brw_vec4_grf(vert.nr + off, sub * 4);
10401e04c3fSmrg}
10501e04c3fSmrg
10601e04c3fSmrg/**
10701e04c3fSmrg * Determine the register corresponding to the given varying.
10801e04c3fSmrg */
10901e04c3fSmrgstatic struct brw_reg get_varying(struct brw_sf_compile *c,
11001e04c3fSmrg                                  struct brw_reg vert,
11101e04c3fSmrg                                  GLuint varying)
11201e04c3fSmrg{
11301e04c3fSmrg   int vue_slot = c->vue_map.varying_to_slot[varying];
11401e04c3fSmrg   assert (vue_slot >= c->urb_entry_read_offset);
11501e04c3fSmrg   return get_vue_slot(c, vert, vue_slot);
11601e04c3fSmrg}
11701e04c3fSmrg
11801e04c3fSmrgstatic bool
11901e04c3fSmrghave_attr(struct brw_sf_compile *c, GLuint attr)
12001e04c3fSmrg{
12101e04c3fSmrg   return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
12201e04c3fSmrg}
12301e04c3fSmrg
12401e04c3fSmrg/***********************************************************************
12501e04c3fSmrg * Twoside lighting
12601e04c3fSmrg */
12701e04c3fSmrgstatic void copy_bfc( struct brw_sf_compile *c,
12801e04c3fSmrg		      struct brw_reg vert )
12901e04c3fSmrg{
13001e04c3fSmrg   struct brw_codegen *p = &c->func;
13101e04c3fSmrg   GLuint i;
13201e04c3fSmrg
13301e04c3fSmrg   for (i = 0; i < 2; i++) {
13401e04c3fSmrg      if (have_attr(c, VARYING_SLOT_COL0+i) &&
13501e04c3fSmrg	  have_attr(c, VARYING_SLOT_BFC0+i))
13601e04c3fSmrg	 brw_MOV(p,
13701e04c3fSmrg		 get_varying(c, vert, VARYING_SLOT_COL0+i),
13801e04c3fSmrg		 get_varying(c, vert, VARYING_SLOT_BFC0+i));
13901e04c3fSmrg   }
14001e04c3fSmrg}
14101e04c3fSmrg
14201e04c3fSmrg
14301e04c3fSmrgstatic void do_twoside_color( struct brw_sf_compile *c )
14401e04c3fSmrg{
14501e04c3fSmrg   struct brw_codegen *p = &c->func;
14601e04c3fSmrg   GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
14701e04c3fSmrg
14801e04c3fSmrg   /* Already done in clip program:
14901e04c3fSmrg    */
15001e04c3fSmrg   if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
15101e04c3fSmrg      return;
15201e04c3fSmrg
15301e04c3fSmrg   /* If the vertex shader provides backface color, do the selection. The VS
15401e04c3fSmrg    * promises to set up the front color if the backface color is provided, but
15501e04c3fSmrg    * it may contain junk if never written to.
15601e04c3fSmrg    */
15701e04c3fSmrg   if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
15801e04c3fSmrg       !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
15901e04c3fSmrg      return;
16001e04c3fSmrg
16101e04c3fSmrg   /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
16201e04c3fSmrg    * to get all channels active inside the IF.  In the clipping code
16301e04c3fSmrg    * we run with NoMask, so it's not an option and we can use
16401e04c3fSmrg    * BRW_EXECUTE_1 for all comparisions.
16501e04c3fSmrg    */
16601e04c3fSmrg   brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
16701e04c3fSmrg   brw_IF(p, BRW_EXECUTE_4);
16801e04c3fSmrg   {
16901e04c3fSmrg      switch (c->nr_verts) {
1707ec681f3Smrg      case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;
1717ec681f3Smrg      case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;
17201e04c3fSmrg      case 1: copy_bfc(c, c->vert[0]);
17301e04c3fSmrg      }
17401e04c3fSmrg   }
17501e04c3fSmrg   brw_ENDIF(p);
17601e04c3fSmrg}
17701e04c3fSmrg
17801e04c3fSmrg
17901e04c3fSmrg
18001e04c3fSmrg/***********************************************************************
18101e04c3fSmrg * Flat shading
18201e04c3fSmrg */
18301e04c3fSmrg
18401e04c3fSmrgstatic void copy_flatshaded_attributes(struct brw_sf_compile *c,
18501e04c3fSmrg                                       struct brw_reg dst,
18601e04c3fSmrg                                       struct brw_reg src)
18701e04c3fSmrg{
18801e04c3fSmrg   struct brw_codegen *p = &c->func;
18901e04c3fSmrg   int i;
19001e04c3fSmrg
19101e04c3fSmrg   for (i = 0; i < c->vue_map.num_slots; i++) {
19201e04c3fSmrg      if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
19301e04c3fSmrg         brw_MOV(p,
19401e04c3fSmrg                 get_vue_slot(c, dst, i),
19501e04c3fSmrg                 get_vue_slot(c, src, i));
19601e04c3fSmrg      }
19701e04c3fSmrg   }
19801e04c3fSmrg}
19901e04c3fSmrg
20001e04c3fSmrgstatic int count_flatshaded_attributes(struct brw_sf_compile *c)
20101e04c3fSmrg{
20201e04c3fSmrg   int i;
20301e04c3fSmrg   int count = 0;
20401e04c3fSmrg
20501e04c3fSmrg   for (i = 0; i < c->vue_map.num_slots; i++)
20601e04c3fSmrg      if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
20701e04c3fSmrg         count++;
20801e04c3fSmrg
20901e04c3fSmrg   return count;
21001e04c3fSmrg}
21101e04c3fSmrg
21201e04c3fSmrg
21301e04c3fSmrg
21401e04c3fSmrg/* Need to use a computed jump to copy flatshaded attributes as the
21501e04c3fSmrg * vertices are ordered according to y-coordinate before reaching this
21601e04c3fSmrg * point, so the PV could be anywhere.
21701e04c3fSmrg */
21801e04c3fSmrgstatic void do_flatshade_triangle( struct brw_sf_compile *c )
21901e04c3fSmrg{
22001e04c3fSmrg   struct brw_codegen *p = &c->func;
22101e04c3fSmrg   GLuint nr;
22201e04c3fSmrg   GLuint jmpi = 1;
22301e04c3fSmrg
22401e04c3fSmrg   /* Already done in clip program:
22501e04c3fSmrg    */
22601e04c3fSmrg   if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
22701e04c3fSmrg      return;
22801e04c3fSmrg
2297ec681f3Smrg   if (p->devinfo->ver == 5)
23001e04c3fSmrg       jmpi = 2;
23101e04c3fSmrg
23201e04c3fSmrg   nr = count_flatshaded_attributes(c);
23301e04c3fSmrg
23401e04c3fSmrg   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
23501e04c3fSmrg   brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
23601e04c3fSmrg
23701e04c3fSmrg   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
23801e04c3fSmrg   copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
23901e04c3fSmrg   brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
24001e04c3fSmrg
24101e04c3fSmrg   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
24201e04c3fSmrg   copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
24301e04c3fSmrg   brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
24401e04c3fSmrg
24501e04c3fSmrg   copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
24601e04c3fSmrg   copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
24701e04c3fSmrg}
24801e04c3fSmrg
24901e04c3fSmrg
25001e04c3fSmrgstatic void do_flatshade_line( struct brw_sf_compile *c )
25101e04c3fSmrg{
25201e04c3fSmrg   struct brw_codegen *p = &c->func;
25301e04c3fSmrg   GLuint nr;
25401e04c3fSmrg   GLuint jmpi = 1;
25501e04c3fSmrg
25601e04c3fSmrg   /* Already done in clip program:
25701e04c3fSmrg    */
25801e04c3fSmrg   if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
25901e04c3fSmrg      return;
26001e04c3fSmrg
2617ec681f3Smrg   if (p->devinfo->ver == 5)
26201e04c3fSmrg       jmpi = 2;
26301e04c3fSmrg
26401e04c3fSmrg   nr = count_flatshaded_attributes(c);
26501e04c3fSmrg
26601e04c3fSmrg   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
26701e04c3fSmrg   brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
26801e04c3fSmrg   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
26901e04c3fSmrg
27001e04c3fSmrg   brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
27101e04c3fSmrg   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
27201e04c3fSmrg}
27301e04c3fSmrg
27401e04c3fSmrg
27501e04c3fSmrg/***********************************************************************
27601e04c3fSmrg * Triangle setup.
27701e04c3fSmrg */
27801e04c3fSmrg
27901e04c3fSmrg
28001e04c3fSmrgstatic void alloc_regs( struct brw_sf_compile *c )
28101e04c3fSmrg{
28201e04c3fSmrg   GLuint reg, i;
28301e04c3fSmrg
28401e04c3fSmrg   /* Values computed by fixed function unit:
28501e04c3fSmrg    */
28601e04c3fSmrg   c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
28701e04c3fSmrg   c->det = brw_vec1_grf(1, 2);
28801e04c3fSmrg   c->dx0 = brw_vec1_grf(1, 3);
28901e04c3fSmrg   c->dx2 = brw_vec1_grf(1, 4);
29001e04c3fSmrg   c->dy0 = brw_vec1_grf(1, 5);
29101e04c3fSmrg   c->dy2 = brw_vec1_grf(1, 6);
29201e04c3fSmrg
29301e04c3fSmrg   /* z and 1/w passed in seperately:
29401e04c3fSmrg    */
29501e04c3fSmrg   c->z[0]     = brw_vec1_grf(2, 0);
29601e04c3fSmrg   c->inv_w[0] = brw_vec1_grf(2, 1);
29701e04c3fSmrg   c->z[1]     = brw_vec1_grf(2, 2);
29801e04c3fSmrg   c->inv_w[1] = brw_vec1_grf(2, 3);
29901e04c3fSmrg   c->z[2]     = brw_vec1_grf(2, 4);
30001e04c3fSmrg   c->inv_w[2] = brw_vec1_grf(2, 5);
30101e04c3fSmrg
30201e04c3fSmrg   /* The vertices:
30301e04c3fSmrg    */
30401e04c3fSmrg   reg = 3;
30501e04c3fSmrg   for (i = 0; i < c->nr_verts; i++) {
30601e04c3fSmrg      c->vert[i] = brw_vec8_grf(reg, 0);
30701e04c3fSmrg      reg += c->nr_attr_regs;
30801e04c3fSmrg   }
30901e04c3fSmrg
31001e04c3fSmrg   /* Temporaries, allocated after last vertex reg.
31101e04c3fSmrg    */
31201e04c3fSmrg   c->inv_det = brw_vec1_grf(reg, 0);  reg++;
31301e04c3fSmrg   c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
31401e04c3fSmrg   c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
31501e04c3fSmrg   c->tmp = brw_vec8_grf(reg, 0);  reg++;
31601e04c3fSmrg
31701e04c3fSmrg   /* Note grf allocation:
31801e04c3fSmrg    */
31901e04c3fSmrg   c->prog_data.total_grf = reg;
32001e04c3fSmrg
32101e04c3fSmrg
32201e04c3fSmrg   /* Outputs of this program - interpolation coefficients for
32301e04c3fSmrg    * rasterization:
32401e04c3fSmrg    */
32501e04c3fSmrg   c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
32601e04c3fSmrg   c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
32701e04c3fSmrg   c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
32801e04c3fSmrg}
32901e04c3fSmrg
33001e04c3fSmrg
33101e04c3fSmrgstatic void copy_z_inv_w( struct brw_sf_compile *c )
33201e04c3fSmrg{
33301e04c3fSmrg   struct brw_codegen *p = &c->func;
33401e04c3fSmrg   GLuint i;
33501e04c3fSmrg
33601e04c3fSmrg   /* Copy both scalars with a single MOV:
33701e04c3fSmrg    */
33801e04c3fSmrg   for (i = 0; i < c->nr_verts; i++)
33901e04c3fSmrg      brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
34001e04c3fSmrg}
34101e04c3fSmrg
34201e04c3fSmrg
34301e04c3fSmrgstatic void invert_det( struct brw_sf_compile *c)
34401e04c3fSmrg{
34501e04c3fSmrg   /* Looks like we invert all 8 elements just to get 1/det in
34601e04c3fSmrg    * position 2 !?!
34701e04c3fSmrg    */
3487ec681f3Smrg   gfx4_math(&c->func,
34901e04c3fSmrg	     c->inv_det,
35001e04c3fSmrg	     BRW_MATH_FUNCTION_INV,
35101e04c3fSmrg	     0,
35201e04c3fSmrg	     c->det,
35301e04c3fSmrg	     BRW_MATH_PRECISION_FULL);
35401e04c3fSmrg
35501e04c3fSmrg}
35601e04c3fSmrg
35701e04c3fSmrg
35801e04c3fSmrgstatic bool
35901e04c3fSmrgcalculate_masks(struct brw_sf_compile *c,
36001e04c3fSmrg                GLuint reg,
36101e04c3fSmrg                GLushort *pc,
36201e04c3fSmrg                GLushort *pc_persp,
36301e04c3fSmrg                GLushort *pc_linear)
36401e04c3fSmrg{
36501e04c3fSmrg   bool is_last_attr = (reg == c->nr_setup_regs - 1);
36601e04c3fSmrg   enum glsl_interp_mode interp;
36701e04c3fSmrg
36801e04c3fSmrg   *pc_persp = 0;
36901e04c3fSmrg   *pc_linear = 0;
37001e04c3fSmrg   *pc = 0xf;
37101e04c3fSmrg
37201e04c3fSmrg   interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
37301e04c3fSmrg   if (interp == INTERP_MODE_SMOOTH) {
37401e04c3fSmrg      *pc_linear = 0xf;
37501e04c3fSmrg      *pc_persp = 0xf;
37601e04c3fSmrg   } else if (interp == INTERP_MODE_NOPERSPECTIVE)
37701e04c3fSmrg      *pc_linear = 0xf;
37801e04c3fSmrg
37901e04c3fSmrg   /* Maybe only processs one attribute on the final round:
38001e04c3fSmrg    */
38101e04c3fSmrg   if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
38201e04c3fSmrg      *pc |= 0xf0;
38301e04c3fSmrg
38401e04c3fSmrg      interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
38501e04c3fSmrg      if (interp == INTERP_MODE_SMOOTH) {
38601e04c3fSmrg         *pc_linear |= 0xf0;
38701e04c3fSmrg         *pc_persp |= 0xf0;
38801e04c3fSmrg      } else if (interp == INTERP_MODE_NOPERSPECTIVE)
38901e04c3fSmrg         *pc_linear |= 0xf0;
39001e04c3fSmrg   }
39101e04c3fSmrg
39201e04c3fSmrg   return is_last_attr;
39301e04c3fSmrg}
39401e04c3fSmrg
39501e04c3fSmrg/* Calculates the predicate control for which channels of a reg
39601e04c3fSmrg * (containing 2 attrs) to do point sprite coordinate replacement on.
39701e04c3fSmrg */
39801e04c3fSmrgstatic uint16_t
39901e04c3fSmrgcalculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
40001e04c3fSmrg{
40101e04c3fSmrg   int varying1, varying2;
40201e04c3fSmrg   uint16_t pc = 0;
40301e04c3fSmrg
40401e04c3fSmrg   varying1 = vert_reg_to_varying(c, reg, 0);
40501e04c3fSmrg   if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
40601e04c3fSmrg      if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
40701e04c3fSmrg	 pc |= 0x0f;
40801e04c3fSmrg   }
40901e04c3fSmrg   if (varying1 == BRW_VARYING_SLOT_PNTC)
41001e04c3fSmrg      pc |= 0x0f;
41101e04c3fSmrg
41201e04c3fSmrg   varying2 = vert_reg_to_varying(c, reg, 1);
41301e04c3fSmrg   if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
41401e04c3fSmrg      if (c->key.point_sprite_coord_replace & (1 << (varying2 -
41501e04c3fSmrg                                                     VARYING_SLOT_TEX0)))
41601e04c3fSmrg         pc |= 0xf0;
41701e04c3fSmrg   }
41801e04c3fSmrg   if (varying2 == BRW_VARYING_SLOT_PNTC)
41901e04c3fSmrg      pc |= 0xf0;
42001e04c3fSmrg
42101e04c3fSmrg   return pc;
42201e04c3fSmrg}
42301e04c3fSmrg
42401e04c3fSmrgstatic void
42501e04c3fSmrgset_predicate_control_flag_value(struct brw_codegen *p,
42601e04c3fSmrg                                 struct brw_sf_compile *c,
42701e04c3fSmrg                                 unsigned value)
42801e04c3fSmrg{
42901e04c3fSmrg   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
43001e04c3fSmrg
43101e04c3fSmrg   if (value != 0xff) {
43201e04c3fSmrg      if (value != c->flag_value) {
43301e04c3fSmrg         brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
43401e04c3fSmrg         c->flag_value = value;
43501e04c3fSmrg      }
43601e04c3fSmrg
43701e04c3fSmrg      brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
43801e04c3fSmrg   }
43901e04c3fSmrg}
44001e04c3fSmrg
44101e04c3fSmrgstatic void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
44201e04c3fSmrg{
44301e04c3fSmrg   struct brw_codegen *p = &c->func;
44401e04c3fSmrg   GLuint i;
44501e04c3fSmrg
44601e04c3fSmrg   c->flag_value = 0xff;
44701e04c3fSmrg   c->nr_verts = 3;
44801e04c3fSmrg
44901e04c3fSmrg   if (allocate)
45001e04c3fSmrg      alloc_regs(c);
45101e04c3fSmrg
45201e04c3fSmrg   invert_det(c);
45301e04c3fSmrg   copy_z_inv_w(c);
45401e04c3fSmrg
45501e04c3fSmrg   if (c->key.do_twoside_color)
45601e04c3fSmrg      do_twoside_color(c);
45701e04c3fSmrg
45801e04c3fSmrg   if (c->key.contains_flat_varying)
45901e04c3fSmrg      do_flatshade_triangle(c);
46001e04c3fSmrg
46101e04c3fSmrg
46201e04c3fSmrg   for (i = 0; i < c->nr_setup_regs; i++)
46301e04c3fSmrg   {
46401e04c3fSmrg      /* Pair of incoming attributes:
46501e04c3fSmrg       */
46601e04c3fSmrg      struct brw_reg a0 = offset(c->vert[0], i);
46701e04c3fSmrg      struct brw_reg a1 = offset(c->vert[1], i);
46801e04c3fSmrg      struct brw_reg a2 = offset(c->vert[2], i);
46901e04c3fSmrg      GLushort pc, pc_persp, pc_linear;
47001e04c3fSmrg      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
47101e04c3fSmrg
47201e04c3fSmrg      if (pc_persp)
47301e04c3fSmrg      {
47401e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc_persp);
47501e04c3fSmrg	 brw_MUL(p, a0, a0, c->inv_w[0]);
47601e04c3fSmrg	 brw_MUL(p, a1, a1, c->inv_w[1]);
47701e04c3fSmrg	 brw_MUL(p, a2, a2, c->inv_w[2]);
47801e04c3fSmrg      }
47901e04c3fSmrg
48001e04c3fSmrg
48101e04c3fSmrg      /* Calculate coefficients for interpolated values:
48201e04c3fSmrg       */
48301e04c3fSmrg      if (pc_linear)
48401e04c3fSmrg      {
48501e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc_linear);
48601e04c3fSmrg
48701e04c3fSmrg	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
48801e04c3fSmrg	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
48901e04c3fSmrg
49001e04c3fSmrg	 /* calculate dA/dx
49101e04c3fSmrg	  */
49201e04c3fSmrg	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
49301e04c3fSmrg	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
49401e04c3fSmrg	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
49501e04c3fSmrg
49601e04c3fSmrg	 /* calculate dA/dy
49701e04c3fSmrg	  */
49801e04c3fSmrg	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
49901e04c3fSmrg	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
50001e04c3fSmrg	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
50101e04c3fSmrg      }
50201e04c3fSmrg
50301e04c3fSmrg      {
50401e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc);
50501e04c3fSmrg	 /* start point for interpolation
50601e04c3fSmrg	  */
50701e04c3fSmrg	 brw_MOV(p, c->m3C0, a0);
50801e04c3fSmrg
50901e04c3fSmrg	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
51001e04c3fSmrg	  * the send instruction:
51101e04c3fSmrg	  */
51201e04c3fSmrg	 brw_urb_WRITE(p,
51301e04c3fSmrg		       brw_null_reg(),
51401e04c3fSmrg		       0,
51501e04c3fSmrg		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
51601e04c3fSmrg                       last ? BRW_URB_WRITE_EOT_COMPLETE
51701e04c3fSmrg                       : BRW_URB_WRITE_NO_FLAGS,
51801e04c3fSmrg		       4, 	/* msg len */
51901e04c3fSmrg		       0,	/* response len */
52001e04c3fSmrg		       i*4,	/* offset */
52101e04c3fSmrg		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
52201e04c3fSmrg      }
52301e04c3fSmrg   }
52401e04c3fSmrg
52501e04c3fSmrg   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
52601e04c3fSmrg}
52701e04c3fSmrg
52801e04c3fSmrg
52901e04c3fSmrg
53001e04c3fSmrgstatic void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
53101e04c3fSmrg{
53201e04c3fSmrg   struct brw_codegen *p = &c->func;
53301e04c3fSmrg   GLuint i;
53401e04c3fSmrg
53501e04c3fSmrg   c->flag_value = 0xff;
53601e04c3fSmrg   c->nr_verts = 2;
53701e04c3fSmrg
53801e04c3fSmrg   if (allocate)
53901e04c3fSmrg      alloc_regs(c);
54001e04c3fSmrg
54101e04c3fSmrg   invert_det(c);
54201e04c3fSmrg   copy_z_inv_w(c);
54301e04c3fSmrg
54401e04c3fSmrg   if (c->key.contains_flat_varying)
54501e04c3fSmrg      do_flatshade_line(c);
54601e04c3fSmrg
54701e04c3fSmrg   for (i = 0; i < c->nr_setup_regs; i++)
54801e04c3fSmrg   {
54901e04c3fSmrg      /* Pair of incoming attributes:
55001e04c3fSmrg       */
55101e04c3fSmrg      struct brw_reg a0 = offset(c->vert[0], i);
55201e04c3fSmrg      struct brw_reg a1 = offset(c->vert[1], i);
55301e04c3fSmrg      GLushort pc, pc_persp, pc_linear;
55401e04c3fSmrg      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
55501e04c3fSmrg
55601e04c3fSmrg      if (pc_persp)
55701e04c3fSmrg      {
55801e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc_persp);
55901e04c3fSmrg	 brw_MUL(p, a0, a0, c->inv_w[0]);
56001e04c3fSmrg	 brw_MUL(p, a1, a1, c->inv_w[1]);
56101e04c3fSmrg      }
56201e04c3fSmrg
56301e04c3fSmrg      /* Calculate coefficients for position, color:
56401e04c3fSmrg       */
56501e04c3fSmrg      if (pc_linear) {
56601e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc_linear);
56701e04c3fSmrg
56801e04c3fSmrg	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
56901e04c3fSmrg
57001e04c3fSmrg	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
57101e04c3fSmrg	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
57201e04c3fSmrg
57301e04c3fSmrg	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
57401e04c3fSmrg	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
57501e04c3fSmrg      }
57601e04c3fSmrg
57701e04c3fSmrg      {
57801e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc);
57901e04c3fSmrg
58001e04c3fSmrg	 /* start point for interpolation
58101e04c3fSmrg	  */
58201e04c3fSmrg	 brw_MOV(p, c->m3C0, a0);
58301e04c3fSmrg
58401e04c3fSmrg	 /* Copy m0..m3 to URB.
58501e04c3fSmrg	  */
58601e04c3fSmrg	 brw_urb_WRITE(p,
58701e04c3fSmrg		       brw_null_reg(),
58801e04c3fSmrg		       0,
58901e04c3fSmrg		       brw_vec8_grf(0, 0),
59001e04c3fSmrg                       last ? BRW_URB_WRITE_EOT_COMPLETE
59101e04c3fSmrg                       : BRW_URB_WRITE_NO_FLAGS,
59201e04c3fSmrg		       4, 	/* msg len */
59301e04c3fSmrg		       0,	/* response len */
59401e04c3fSmrg		       i*4,	/* urb destination offset */
59501e04c3fSmrg		       BRW_URB_SWIZZLE_TRANSPOSE);
59601e04c3fSmrg      }
59701e04c3fSmrg   }
59801e04c3fSmrg
59901e04c3fSmrg   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
60001e04c3fSmrg}
60101e04c3fSmrg
60201e04c3fSmrgstatic void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
60301e04c3fSmrg{
60401e04c3fSmrg   struct brw_codegen *p = &c->func;
60501e04c3fSmrg   GLuint i;
60601e04c3fSmrg
60701e04c3fSmrg   c->flag_value = 0xff;
60801e04c3fSmrg   c->nr_verts = 1;
60901e04c3fSmrg
61001e04c3fSmrg   if (allocate)
61101e04c3fSmrg      alloc_regs(c);
61201e04c3fSmrg
61301e04c3fSmrg   copy_z_inv_w(c);
61401e04c3fSmrg   for (i = 0; i < c->nr_setup_regs; i++)
61501e04c3fSmrg   {
61601e04c3fSmrg      struct brw_reg a0 = offset(c->vert[0], i);
61701e04c3fSmrg      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
61801e04c3fSmrg      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
61901e04c3fSmrg
62001e04c3fSmrg      pc_coord_replace = calculate_point_sprite_mask(c, i);
62101e04c3fSmrg      pc_persp &= ~pc_coord_replace;
62201e04c3fSmrg
62301e04c3fSmrg      if (pc_persp) {
62401e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc_persp);
62501e04c3fSmrg	 brw_MUL(p, a0, a0, c->inv_w[0]);
62601e04c3fSmrg      }
62701e04c3fSmrg
62801e04c3fSmrg      /* Point sprite coordinate replacement: A texcoord with this
62901e04c3fSmrg       * enabled gets replaced with the value (x, y, 0, 1) where x and
63001e04c3fSmrg       * y vary from 0 to 1 across the horizontal and vertical of the
63101e04c3fSmrg       * point.
63201e04c3fSmrg       */
63301e04c3fSmrg      if (pc_coord_replace) {
63401e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc_coord_replace);
63501e04c3fSmrg	 /* Caculate 1.0/PointWidth */
6367ec681f3Smrg	 gfx4_math(&c->func,
63701e04c3fSmrg		   c->tmp,
63801e04c3fSmrg		   BRW_MATH_FUNCTION_INV,
63901e04c3fSmrg		   0,
64001e04c3fSmrg		   c->dx0,
64101e04c3fSmrg		   BRW_MATH_PRECISION_FULL);
64201e04c3fSmrg
64301e04c3fSmrg	 brw_set_default_access_mode(p, BRW_ALIGN_16);
64401e04c3fSmrg
64501e04c3fSmrg	 /* dA/dx, dA/dy */
64601e04c3fSmrg	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
64701e04c3fSmrg	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
64801e04c3fSmrg	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
64901e04c3fSmrg	 if (c->key.sprite_origin_lower_left) {
65001e04c3fSmrg	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
65101e04c3fSmrg	 } else {
65201e04c3fSmrg	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
65301e04c3fSmrg	 }
65401e04c3fSmrg
65501e04c3fSmrg	 /* attribute constant offset */
65601e04c3fSmrg	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
65701e04c3fSmrg	 if (c->key.sprite_origin_lower_left) {
65801e04c3fSmrg	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
65901e04c3fSmrg	 } else {
66001e04c3fSmrg	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
66101e04c3fSmrg	 }
66201e04c3fSmrg
66301e04c3fSmrg	 brw_set_default_access_mode(p, BRW_ALIGN_1);
66401e04c3fSmrg      }
66501e04c3fSmrg
66601e04c3fSmrg      if (pc & ~pc_coord_replace) {
66701e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
66801e04c3fSmrg	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
66901e04c3fSmrg	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
67001e04c3fSmrg	 brw_MOV(p, c->m3C0, a0); /* constant value */
67101e04c3fSmrg      }
67201e04c3fSmrg
67301e04c3fSmrg
67401e04c3fSmrg      set_predicate_control_flag_value(p, c, pc);
67501e04c3fSmrg      /* Copy m0..m3 to URB. */
67601e04c3fSmrg      brw_urb_WRITE(p,
67701e04c3fSmrg		    brw_null_reg(),
67801e04c3fSmrg		    0,
67901e04c3fSmrg		    brw_vec8_grf(0, 0),
68001e04c3fSmrg                    last ? BRW_URB_WRITE_EOT_COMPLETE
68101e04c3fSmrg                    : BRW_URB_WRITE_NO_FLAGS,
68201e04c3fSmrg		    4, 	/* msg len */
68301e04c3fSmrg		    0,	/* response len */
68401e04c3fSmrg		    i*4,	/* urb destination offset */
68501e04c3fSmrg		    BRW_URB_SWIZZLE_TRANSPOSE);
68601e04c3fSmrg   }
68701e04c3fSmrg
68801e04c3fSmrg   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
68901e04c3fSmrg}
69001e04c3fSmrg
69101e04c3fSmrg/* Points setup - several simplifications as all attributes are
69201e04c3fSmrg * constant across the face of the point (point sprites excluded!)
69301e04c3fSmrg */
69401e04c3fSmrgstatic void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
69501e04c3fSmrg{
69601e04c3fSmrg   struct brw_codegen *p = &c->func;
69701e04c3fSmrg   GLuint i;
69801e04c3fSmrg
69901e04c3fSmrg   c->flag_value = 0xff;
70001e04c3fSmrg   c->nr_verts = 1;
70101e04c3fSmrg
70201e04c3fSmrg   if (allocate)
70301e04c3fSmrg      alloc_regs(c);
70401e04c3fSmrg
70501e04c3fSmrg   copy_z_inv_w(c);
70601e04c3fSmrg
70701e04c3fSmrg   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
70801e04c3fSmrg   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
70901e04c3fSmrg
71001e04c3fSmrg   for (i = 0; i < c->nr_setup_regs; i++)
71101e04c3fSmrg   {
71201e04c3fSmrg      struct brw_reg a0 = offset(c->vert[0], i);
71301e04c3fSmrg      GLushort pc, pc_persp, pc_linear;
71401e04c3fSmrg      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
71501e04c3fSmrg
71601e04c3fSmrg      if (pc_persp)
71701e04c3fSmrg      {
71801e04c3fSmrg	 /* This seems odd as the values are all constant, but the
71901e04c3fSmrg	  * fragment shader will be expecting it:
72001e04c3fSmrg	  */
72101e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc_persp);
72201e04c3fSmrg	 brw_MUL(p, a0, a0, c->inv_w[0]);
72301e04c3fSmrg      }
72401e04c3fSmrg
72501e04c3fSmrg
72601e04c3fSmrg      /* The delta values are always zero, just send the starting
72701e04c3fSmrg       * coordinate.  Again, this is to fit in with the interpolation
72801e04c3fSmrg       * code in the fragment shader.
72901e04c3fSmrg       */
73001e04c3fSmrg      {
73101e04c3fSmrg	 set_predicate_control_flag_value(p, c, pc);
73201e04c3fSmrg
73301e04c3fSmrg	 brw_MOV(p, c->m3C0, a0); /* constant value */
73401e04c3fSmrg
73501e04c3fSmrg	 /* Copy m0..m3 to URB.
73601e04c3fSmrg	  */
73701e04c3fSmrg	 brw_urb_WRITE(p,
73801e04c3fSmrg		       brw_null_reg(),
73901e04c3fSmrg		       0,
74001e04c3fSmrg		       brw_vec8_grf(0, 0),
74101e04c3fSmrg                       last ? BRW_URB_WRITE_EOT_COMPLETE
74201e04c3fSmrg                       : BRW_URB_WRITE_NO_FLAGS,
74301e04c3fSmrg		       4, 	/* msg len */
74401e04c3fSmrg		       0,	/* response len */
74501e04c3fSmrg		       i*4,	/* urb destination offset */
74601e04c3fSmrg		       BRW_URB_SWIZZLE_TRANSPOSE);
74701e04c3fSmrg      }
74801e04c3fSmrg   }
74901e04c3fSmrg
75001e04c3fSmrg   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
75101e04c3fSmrg}
75201e04c3fSmrg
75301e04c3fSmrgstatic void brw_emit_anyprim_setup( struct brw_sf_compile *c )
75401e04c3fSmrg{
75501e04c3fSmrg   struct brw_codegen *p = &c->func;
75601e04c3fSmrg   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
75701e04c3fSmrg   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
75801e04c3fSmrg   struct brw_reg primmask;
75901e04c3fSmrg   int jmp;
76001e04c3fSmrg   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
76101e04c3fSmrg
76201e04c3fSmrg   c->nr_verts = 3;
76301e04c3fSmrg   alloc_regs(c);
76401e04c3fSmrg
76501e04c3fSmrg   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
76601e04c3fSmrg
76701e04c3fSmrg   brw_MOV(p, primmask, brw_imm_ud(1));
76801e04c3fSmrg   brw_SHL(p, primmask, primmask, payload_prim);
76901e04c3fSmrg
77001e04c3fSmrg   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
77101e04c3fSmrg					       (1<<_3DPRIM_TRISTRIP) |
77201e04c3fSmrg					       (1<<_3DPRIM_TRIFAN) |
77301e04c3fSmrg					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
77401e04c3fSmrg					       (1<<_3DPRIM_POLYGON) |
77501e04c3fSmrg					       (1<<_3DPRIM_RECTLIST) |
77601e04c3fSmrg					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
77701e04c3fSmrg   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
77801e04c3fSmrg   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
77901e04c3fSmrg   brw_emit_tri_setup(c, false);
78001e04c3fSmrg   brw_land_fwd_jump(p, jmp);
78101e04c3fSmrg
78201e04c3fSmrg   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
78301e04c3fSmrg					       (1<<_3DPRIM_LINESTRIP) |
78401e04c3fSmrg					       (1<<_3DPRIM_LINELOOP) |
78501e04c3fSmrg					       (1<<_3DPRIM_LINESTRIP_CONT) |
78601e04c3fSmrg					       (1<<_3DPRIM_LINESTRIP_BF) |
78701e04c3fSmrg					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
78801e04c3fSmrg   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
78901e04c3fSmrg   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
79001e04c3fSmrg   brw_emit_line_setup(c, false);
79101e04c3fSmrg   brw_land_fwd_jump(p, jmp);
79201e04c3fSmrg
79301e04c3fSmrg   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
79401e04c3fSmrg   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
79501e04c3fSmrg   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
79601e04c3fSmrg   brw_emit_point_sprite_setup(c, false);
79701e04c3fSmrg   brw_land_fwd_jump(p, jmp);
79801e04c3fSmrg
79901e04c3fSmrg   brw_emit_point_setup( c, false );
80001e04c3fSmrg}
80101e04c3fSmrg
80201e04c3fSmrgconst unsigned *
80301e04c3fSmrgbrw_compile_sf(const struct brw_compiler *compiler,
80401e04c3fSmrg               void *mem_ctx,
80501e04c3fSmrg               const struct brw_sf_prog_key *key,
80601e04c3fSmrg               struct brw_sf_prog_data *prog_data,
80701e04c3fSmrg               struct brw_vue_map *vue_map,
80801e04c3fSmrg               unsigned *final_assembly_size)
80901e04c3fSmrg{
81001e04c3fSmrg   struct brw_sf_compile c;
81101e04c3fSmrg   memset(&c, 0, sizeof(c));
81201e04c3fSmrg
81301e04c3fSmrg   /* Begin the compilation:
81401e04c3fSmrg    */
81501e04c3fSmrg   brw_init_codegen(compiler->devinfo, &c.func, mem_ctx);
81601e04c3fSmrg
81701e04c3fSmrg   c.key = *key;
81801e04c3fSmrg   c.vue_map = *vue_map;
81901e04c3fSmrg   if (c.key.do_point_coord) {
82001e04c3fSmrg      /*
82101e04c3fSmrg       * gl_PointCoord is a FS instead of VS builtin variable, thus it's
82201e04c3fSmrg       * not included in c.vue_map generated in VS stage. Here we add
82301e04c3fSmrg       * it manually to let SF shader generate the needed interpolation
82401e04c3fSmrg       * coefficient for FS shader.
82501e04c3fSmrg       */
82601e04c3fSmrg      c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
82701e04c3fSmrg      c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
82801e04c3fSmrg   }
82901e04c3fSmrg   c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
83001e04c3fSmrg   c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
83101e04c3fSmrg   c.nr_setup_regs = c.nr_attr_regs;
83201e04c3fSmrg
83301e04c3fSmrg   c.prog_data.urb_read_length = c.nr_attr_regs;
83401e04c3fSmrg   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
83501e04c3fSmrg
83601e04c3fSmrg   /* Which primitive?  Or all three?
83701e04c3fSmrg    */
83801e04c3fSmrg   switch (key->primitive) {
83901e04c3fSmrg   case BRW_SF_PRIM_TRIANGLES:
84001e04c3fSmrg      c.nr_verts = 3;
84101e04c3fSmrg      brw_emit_tri_setup( &c, true );
84201e04c3fSmrg      break;
84301e04c3fSmrg   case BRW_SF_PRIM_LINES:
84401e04c3fSmrg      c.nr_verts = 2;
84501e04c3fSmrg      brw_emit_line_setup( &c, true );
84601e04c3fSmrg      break;
84701e04c3fSmrg   case BRW_SF_PRIM_POINTS:
84801e04c3fSmrg      c.nr_verts = 1;
84901e04c3fSmrg      if (key->do_point_sprite)
85001e04c3fSmrg	  brw_emit_point_sprite_setup( &c, true );
85101e04c3fSmrg      else
85201e04c3fSmrg	  brw_emit_point_setup( &c, true );
85301e04c3fSmrg      break;
85401e04c3fSmrg   case BRW_SF_PRIM_UNFILLED_TRIS:
85501e04c3fSmrg      c.nr_verts = 3;
85601e04c3fSmrg      brw_emit_anyprim_setup( &c );
85701e04c3fSmrg      break;
85801e04c3fSmrg   default:
85901e04c3fSmrg      unreachable("not reached");
86001e04c3fSmrg   }
86101e04c3fSmrg
86201e04c3fSmrg   /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
86301e04c3fSmrg    * source). Compacting would be difficult.
86401e04c3fSmrg    */
86501e04c3fSmrg   /* brw_compact_instructions(&c.func, 0, 0, NULL); */
86601e04c3fSmrg
86701e04c3fSmrg   *prog_data = c.prog_data;
86801e04c3fSmrg
86901e04c3fSmrg   const unsigned *program = brw_get_program(&c.func, final_assembly_size);
87001e04c3fSmrg
8717ec681f3Smrg   if (INTEL_DEBUG(DEBUG_SF)) {
87201e04c3fSmrg      fprintf(stderr, "sf:\n");
8737ec681f3Smrg      brw_disassemble_with_labels(compiler->devinfo,
8747ec681f3Smrg                                  program, 0, *final_assembly_size, stderr);
87501e04c3fSmrg      fprintf(stderr, "\n");
87601e04c3fSmrg   }
87701e04c3fSmrg
87801e04c3fSmrg   return program;
87901e04c3fSmrg}
880