1/* 2 * Copyright © 2006 - 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_compiler.h" 25#include "brw_eu.h" 26 27#include "dev/gen_debug.h" 28 29struct brw_sf_compile { 30 struct brw_codegen func; 31 struct brw_sf_prog_key key; 32 struct brw_sf_prog_data prog_data; 33 34 struct brw_reg pv; 35 struct brw_reg det; 36 struct brw_reg dx0; 37 struct brw_reg dx2; 38 struct brw_reg dy0; 39 struct brw_reg dy2; 40 41 /* z and 1/w passed in seperately: 42 */ 43 struct brw_reg z[3]; 44 struct brw_reg inv_w[3]; 45 46 /* The vertices: 47 */ 48 struct brw_reg vert[3]; 49 50 /* Temporaries, allocated after last vertex reg. 51 */ 52 struct brw_reg inv_det; 53 struct brw_reg a1_sub_a0; 54 struct brw_reg a2_sub_a0; 55 struct brw_reg tmp; 56 57 struct brw_reg m1Cx; 58 struct brw_reg m2Cy; 59 struct brw_reg m3C0; 60 61 GLuint nr_verts; 62 GLuint nr_attr_regs; 63 GLuint nr_setup_regs; 64 int urb_entry_read_offset; 65 66 /** The last known value of the f0.0 flag register. */ 67 unsigned flag_value; 68 69 struct brw_vue_map vue_map; 70}; 71 72/** 73 * Determine the vue slot corresponding to the given half of the given register. 74 */ 75static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg, 76 int half) 77{ 78 return (reg + c->urb_entry_read_offset) * 2 + half; 79} 80 81/** 82 * Determine the varying corresponding to the given half of the given 83 * register. half=0 means the first half of a register, half=1 means the 84 * second half. 85 */ 86static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg, 87 int half) 88{ 89 int vue_slot = vert_reg_to_vue_slot(c, reg, half); 90 return c->vue_map.slot_to_varying[vue_slot]; 91} 92 93/** 94 * Determine the register corresponding to the given vue slot 95 */ 96static struct brw_reg get_vue_slot(struct brw_sf_compile *c, 97 struct brw_reg vert, 98 int vue_slot) 99{ 100 GLuint off = vue_slot / 2 - c->urb_entry_read_offset; 101 GLuint sub = vue_slot % 2; 102 103 return brw_vec4_grf(vert.nr + off, sub * 4); 104} 105 106/** 107 * Determine the register corresponding to the given varying. 108 */ 109static struct brw_reg get_varying(struct brw_sf_compile *c, 110 struct brw_reg vert, 111 GLuint varying) 112{ 113 int vue_slot = c->vue_map.varying_to_slot[varying]; 114 assert (vue_slot >= c->urb_entry_read_offset); 115 return get_vue_slot(c, vert, vue_slot); 116} 117 118static bool 119have_attr(struct brw_sf_compile *c, GLuint attr) 120{ 121 return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0; 122} 123 124/*********************************************************************** 125 * Twoside lighting 126 */ 127static void copy_bfc( struct brw_sf_compile *c, 128 struct brw_reg vert ) 129{ 130 struct brw_codegen *p = &c->func; 131 GLuint i; 132 133 for (i = 0; i < 2; i++) { 134 if (have_attr(c, VARYING_SLOT_COL0+i) && 135 have_attr(c, VARYING_SLOT_BFC0+i)) 136 brw_MOV(p, 137 get_varying(c, vert, VARYING_SLOT_COL0+i), 138 get_varying(c, vert, VARYING_SLOT_BFC0+i)); 139 } 140} 141 142 143static void do_twoside_color( struct brw_sf_compile *c ) 144{ 145 struct brw_codegen *p = &c->func; 146 GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L; 147 148 /* Already done in clip program: 149 */ 150 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS) 151 return; 152 153 /* If the vertex shader provides backface color, do the selection. The VS 154 * promises to set up the front color if the backface color is provided, but 155 * it may contain junk if never written to. 156 */ 157 if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) && 158 !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1))) 159 return; 160 161 /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order 162 * to get all channels active inside the IF. In the clipping code 163 * we run with NoMask, so it's not an option and we can use 164 * BRW_EXECUTE_1 for all comparisions. 165 */ 166 brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0)); 167 brw_IF(p, BRW_EXECUTE_4); 168 { 169 switch (c->nr_verts) { 170 case 3: copy_bfc(c, c->vert[2]); 171 case 2: copy_bfc(c, c->vert[1]); 172 case 1: copy_bfc(c, c->vert[0]); 173 } 174 } 175 brw_ENDIF(p); 176} 177 178 179 180/*********************************************************************** 181 * Flat shading 182 */ 183 184static void copy_flatshaded_attributes(struct brw_sf_compile *c, 185 struct brw_reg dst, 186 struct brw_reg src) 187{ 188 struct brw_codegen *p = &c->func; 189 int i; 190 191 for (i = 0; i < c->vue_map.num_slots; i++) { 192 if (c->key.interp_mode[i] == INTERP_MODE_FLAT) { 193 brw_MOV(p, 194 get_vue_slot(c, dst, i), 195 get_vue_slot(c, src, i)); 196 } 197 } 198} 199 200static int count_flatshaded_attributes(struct brw_sf_compile *c) 201{ 202 int i; 203 int count = 0; 204 205 for (i = 0; i < c->vue_map.num_slots; i++) 206 if (c->key.interp_mode[i] == INTERP_MODE_FLAT) 207 count++; 208 209 return count; 210} 211 212 213 214/* Need to use a computed jump to copy flatshaded attributes as the 215 * vertices are ordered according to y-coordinate before reaching this 216 * point, so the PV could be anywhere. 217 */ 218static void do_flatshade_triangle( struct brw_sf_compile *c ) 219{ 220 struct brw_codegen *p = &c->func; 221 GLuint nr; 222 GLuint jmpi = 1; 223 224 /* Already done in clip program: 225 */ 226 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS) 227 return; 228 229 if (p->devinfo->gen == 5) 230 jmpi = 2; 231 232 nr = count_flatshaded_attributes(c); 233 234 brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); 235 brw_JMPI(p, c->pv, BRW_PREDICATE_NONE); 236 237 copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); 238 copy_flatshaded_attributes(c, c->vert[2], c->vert[0]); 239 brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE); 240 241 copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); 242 copy_flatshaded_attributes(c, c->vert[2], c->vert[1]); 243 brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE); 244 245 copy_flatshaded_attributes(c, c->vert[0], c->vert[2]); 246 copy_flatshaded_attributes(c, c->vert[1], c->vert[2]); 247} 248 249 250static void do_flatshade_line( struct brw_sf_compile *c ) 251{ 252 struct brw_codegen *p = &c->func; 253 GLuint nr; 254 GLuint jmpi = 1; 255 256 /* Already done in clip program: 257 */ 258 if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS) 259 return; 260 261 if (p->devinfo->gen == 5) 262 jmpi = 2; 263 264 nr = count_flatshaded_attributes(c); 265 266 brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); 267 brw_JMPI(p, c->pv, BRW_PREDICATE_NONE); 268 copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); 269 270 brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE); 271 copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); 272} 273 274 275/*********************************************************************** 276 * Triangle setup. 277 */ 278 279 280static void alloc_regs( struct brw_sf_compile *c ) 281{ 282 GLuint reg, i; 283 284 /* Values computed by fixed function unit: 285 */ 286 c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); 287 c->det = brw_vec1_grf(1, 2); 288 c->dx0 = brw_vec1_grf(1, 3); 289 c->dx2 = brw_vec1_grf(1, 4); 290 c->dy0 = brw_vec1_grf(1, 5); 291 c->dy2 = brw_vec1_grf(1, 6); 292 293 /* z and 1/w passed in seperately: 294 */ 295 c->z[0] = brw_vec1_grf(2, 0); 296 c->inv_w[0] = brw_vec1_grf(2, 1); 297 c->z[1] = brw_vec1_grf(2, 2); 298 c->inv_w[1] = brw_vec1_grf(2, 3); 299 c->z[2] = brw_vec1_grf(2, 4); 300 c->inv_w[2] = brw_vec1_grf(2, 5); 301 302 /* The vertices: 303 */ 304 reg = 3; 305 for (i = 0; i < c->nr_verts; i++) { 306 c->vert[i] = brw_vec8_grf(reg, 0); 307 reg += c->nr_attr_regs; 308 } 309 310 /* Temporaries, allocated after last vertex reg. 311 */ 312 c->inv_det = brw_vec1_grf(reg, 0); reg++; 313 c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; 314 c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; 315 c->tmp = brw_vec8_grf(reg, 0); reg++; 316 317 /* Note grf allocation: 318 */ 319 c->prog_data.total_grf = reg; 320 321 322 /* Outputs of this program - interpolation coefficients for 323 * rasterization: 324 */ 325 c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); 326 c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); 327 c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); 328} 329 330 331static void copy_z_inv_w( struct brw_sf_compile *c ) 332{ 333 struct brw_codegen *p = &c->func; 334 GLuint i; 335 336 /* Copy both scalars with a single MOV: 337 */ 338 for (i = 0; i < c->nr_verts; i++) 339 brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); 340} 341 342 343static void invert_det( struct brw_sf_compile *c) 344{ 345 /* Looks like we invert all 8 elements just to get 1/det in 346 * position 2 !?! 347 */ 348 gen4_math(&c->func, 349 c->inv_det, 350 BRW_MATH_FUNCTION_INV, 351 0, 352 c->det, 353 BRW_MATH_PRECISION_FULL); 354 355} 356 357 358static bool 359calculate_masks(struct brw_sf_compile *c, 360 GLuint reg, 361 GLushort *pc, 362 GLushort *pc_persp, 363 GLushort *pc_linear) 364{ 365 bool is_last_attr = (reg == c->nr_setup_regs - 1); 366 enum glsl_interp_mode interp; 367 368 *pc_persp = 0; 369 *pc_linear = 0; 370 *pc = 0xf; 371 372 interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)]; 373 if (interp == INTERP_MODE_SMOOTH) { 374 *pc_linear = 0xf; 375 *pc_persp = 0xf; 376 } else if (interp == INTERP_MODE_NOPERSPECTIVE) 377 *pc_linear = 0xf; 378 379 /* Maybe only processs one attribute on the final round: 380 */ 381 if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) { 382 *pc |= 0xf0; 383 384 interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)]; 385 if (interp == INTERP_MODE_SMOOTH) { 386 *pc_linear |= 0xf0; 387 *pc_persp |= 0xf0; 388 } else if (interp == INTERP_MODE_NOPERSPECTIVE) 389 *pc_linear |= 0xf0; 390 } 391 392 return is_last_attr; 393} 394 395/* Calculates the predicate control for which channels of a reg 396 * (containing 2 attrs) to do point sprite coordinate replacement on. 397 */ 398static uint16_t 399calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg) 400{ 401 int varying1, varying2; 402 uint16_t pc = 0; 403 404 varying1 = vert_reg_to_varying(c, reg, 0); 405 if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) { 406 if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0))) 407 pc |= 0x0f; 408 } 409 if (varying1 == BRW_VARYING_SLOT_PNTC) 410 pc |= 0x0f; 411 412 varying2 = vert_reg_to_varying(c, reg, 1); 413 if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) { 414 if (c->key.point_sprite_coord_replace & (1 << (varying2 - 415 VARYING_SLOT_TEX0))) 416 pc |= 0xf0; 417 } 418 if (varying2 == BRW_VARYING_SLOT_PNTC) 419 pc |= 0xf0; 420 421 return pc; 422} 423 424static void 425set_predicate_control_flag_value(struct brw_codegen *p, 426 struct brw_sf_compile *c, 427 unsigned value) 428{ 429 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 430 431 if (value != 0xff) { 432 if (value != c->flag_value) { 433 brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value)); 434 c->flag_value = value; 435 } 436 437 brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); 438 } 439} 440 441static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) 442{ 443 struct brw_codegen *p = &c->func; 444 GLuint i; 445 446 c->flag_value = 0xff; 447 c->nr_verts = 3; 448 449 if (allocate) 450 alloc_regs(c); 451 452 invert_det(c); 453 copy_z_inv_w(c); 454 455 if (c->key.do_twoside_color) 456 do_twoside_color(c); 457 458 if (c->key.contains_flat_varying) 459 do_flatshade_triangle(c); 460 461 462 for (i = 0; i < c->nr_setup_regs; i++) 463 { 464 /* Pair of incoming attributes: 465 */ 466 struct brw_reg a0 = offset(c->vert[0], i); 467 struct brw_reg a1 = offset(c->vert[1], i); 468 struct brw_reg a2 = offset(c->vert[2], i); 469 GLushort pc, pc_persp, pc_linear; 470 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); 471 472 if (pc_persp) 473 { 474 set_predicate_control_flag_value(p, c, pc_persp); 475 brw_MUL(p, a0, a0, c->inv_w[0]); 476 brw_MUL(p, a1, a1, c->inv_w[1]); 477 brw_MUL(p, a2, a2, c->inv_w[2]); 478 } 479 480 481 /* Calculate coefficients for interpolated values: 482 */ 483 if (pc_linear) 484 { 485 set_predicate_control_flag_value(p, c, pc_linear); 486 487 brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); 488 brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); 489 490 /* calculate dA/dx 491 */ 492 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); 493 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); 494 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); 495 496 /* calculate dA/dy 497 */ 498 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); 499 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); 500 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); 501 } 502 503 { 504 set_predicate_control_flag_value(p, c, pc); 505 /* start point for interpolation 506 */ 507 brw_MOV(p, c->m3C0, a0); 508 509 /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in 510 * the send instruction: 511 */ 512 brw_urb_WRITE(p, 513 brw_null_reg(), 514 0, 515 brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ 516 last ? BRW_URB_WRITE_EOT_COMPLETE 517 : BRW_URB_WRITE_NO_FLAGS, 518 4, /* msg len */ 519 0, /* response len */ 520 i*4, /* offset */ 521 BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ 522 } 523 } 524 525 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 526} 527 528 529 530static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) 531{ 532 struct brw_codegen *p = &c->func; 533 GLuint i; 534 535 c->flag_value = 0xff; 536 c->nr_verts = 2; 537 538 if (allocate) 539 alloc_regs(c); 540 541 invert_det(c); 542 copy_z_inv_w(c); 543 544 if (c->key.contains_flat_varying) 545 do_flatshade_line(c); 546 547 for (i = 0; i < c->nr_setup_regs; i++) 548 { 549 /* Pair of incoming attributes: 550 */ 551 struct brw_reg a0 = offset(c->vert[0], i); 552 struct brw_reg a1 = offset(c->vert[1], i); 553 GLushort pc, pc_persp, pc_linear; 554 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); 555 556 if (pc_persp) 557 { 558 set_predicate_control_flag_value(p, c, pc_persp); 559 brw_MUL(p, a0, a0, c->inv_w[0]); 560 brw_MUL(p, a1, a1, c->inv_w[1]); 561 } 562 563 /* Calculate coefficients for position, color: 564 */ 565 if (pc_linear) { 566 set_predicate_control_flag_value(p, c, pc_linear); 567 568 brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); 569 570 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); 571 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); 572 573 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); 574 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); 575 } 576 577 { 578 set_predicate_control_flag_value(p, c, pc); 579 580 /* start point for interpolation 581 */ 582 brw_MOV(p, c->m3C0, a0); 583 584 /* Copy m0..m3 to URB. 585 */ 586 brw_urb_WRITE(p, 587 brw_null_reg(), 588 0, 589 brw_vec8_grf(0, 0), 590 last ? BRW_URB_WRITE_EOT_COMPLETE 591 : BRW_URB_WRITE_NO_FLAGS, 592 4, /* msg len */ 593 0, /* response len */ 594 i*4, /* urb destination offset */ 595 BRW_URB_SWIZZLE_TRANSPOSE); 596 } 597 } 598 599 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 600} 601 602static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate) 603{ 604 struct brw_codegen *p = &c->func; 605 GLuint i; 606 607 c->flag_value = 0xff; 608 c->nr_verts = 1; 609 610 if (allocate) 611 alloc_regs(c); 612 613 copy_z_inv_w(c); 614 for (i = 0; i < c->nr_setup_regs; i++) 615 { 616 struct brw_reg a0 = offset(c->vert[0], i); 617 GLushort pc, pc_persp, pc_linear, pc_coord_replace; 618 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); 619 620 pc_coord_replace = calculate_point_sprite_mask(c, i); 621 pc_persp &= ~pc_coord_replace; 622 623 if (pc_persp) { 624 set_predicate_control_flag_value(p, c, pc_persp); 625 brw_MUL(p, a0, a0, c->inv_w[0]); 626 } 627 628 /* Point sprite coordinate replacement: A texcoord with this 629 * enabled gets replaced with the value (x, y, 0, 1) where x and 630 * y vary from 0 to 1 across the horizontal and vertical of the 631 * point. 632 */ 633 if (pc_coord_replace) { 634 set_predicate_control_flag_value(p, c, pc_coord_replace); 635 /* Caculate 1.0/PointWidth */ 636 gen4_math(&c->func, 637 c->tmp, 638 BRW_MATH_FUNCTION_INV, 639 0, 640 c->dx0, 641 BRW_MATH_PRECISION_FULL); 642 643 brw_set_default_access_mode(p, BRW_ALIGN_16); 644 645 /* dA/dx, dA/dy */ 646 brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); 647 brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); 648 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); 649 if (c->key.sprite_origin_lower_left) { 650 brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); 651 } else { 652 brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); 653 } 654 655 /* attribute constant offset */ 656 brw_MOV(p, c->m3C0, brw_imm_f(0.0)); 657 if (c->key.sprite_origin_lower_left) { 658 brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); 659 } else { 660 brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); 661 } 662 663 brw_set_default_access_mode(p, BRW_ALIGN_1); 664 } 665 666 if (pc & ~pc_coord_replace) { 667 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace); 668 brw_MOV(p, c->m1Cx, brw_imm_ud(0)); 669 brw_MOV(p, c->m2Cy, brw_imm_ud(0)); 670 brw_MOV(p, c->m3C0, a0); /* constant value */ 671 } 672 673 674 set_predicate_control_flag_value(p, c, pc); 675 /* Copy m0..m3 to URB. */ 676 brw_urb_WRITE(p, 677 brw_null_reg(), 678 0, 679 brw_vec8_grf(0, 0), 680 last ? BRW_URB_WRITE_EOT_COMPLETE 681 : BRW_URB_WRITE_NO_FLAGS, 682 4, /* msg len */ 683 0, /* response len */ 684 i*4, /* urb destination offset */ 685 BRW_URB_SWIZZLE_TRANSPOSE); 686 } 687 688 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 689} 690 691/* Points setup - several simplifications as all attributes are 692 * constant across the face of the point (point sprites excluded!) 693 */ 694static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate) 695{ 696 struct brw_codegen *p = &c->func; 697 GLuint i; 698 699 c->flag_value = 0xff; 700 c->nr_verts = 1; 701 702 if (allocate) 703 alloc_regs(c); 704 705 copy_z_inv_w(c); 706 707 brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ 708 brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ 709 710 for (i = 0; i < c->nr_setup_regs; i++) 711 { 712 struct brw_reg a0 = offset(c->vert[0], i); 713 GLushort pc, pc_persp, pc_linear; 714 bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); 715 716 if (pc_persp) 717 { 718 /* This seems odd as the values are all constant, but the 719 * fragment shader will be expecting it: 720 */ 721 set_predicate_control_flag_value(p, c, pc_persp); 722 brw_MUL(p, a0, a0, c->inv_w[0]); 723 } 724 725 726 /* The delta values are always zero, just send the starting 727 * coordinate. Again, this is to fit in with the interpolation 728 * code in the fragment shader. 729 */ 730 { 731 set_predicate_control_flag_value(p, c, pc); 732 733 brw_MOV(p, c->m3C0, a0); /* constant value */ 734 735 /* Copy m0..m3 to URB. 736 */ 737 brw_urb_WRITE(p, 738 brw_null_reg(), 739 0, 740 brw_vec8_grf(0, 0), 741 last ? BRW_URB_WRITE_EOT_COMPLETE 742 : BRW_URB_WRITE_NO_FLAGS, 743 4, /* msg len */ 744 0, /* response len */ 745 i*4, /* urb destination offset */ 746 BRW_URB_SWIZZLE_TRANSPOSE); 747 } 748 } 749 750 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 751} 752 753static void brw_emit_anyprim_setup( struct brw_sf_compile *c ) 754{ 755 struct brw_codegen *p = &c->func; 756 struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); 757 struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); 758 struct brw_reg primmask; 759 int jmp; 760 struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); 761 762 c->nr_verts = 3; 763 alloc_regs(c); 764 765 primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); 766 767 brw_MOV(p, primmask, brw_imm_ud(1)); 768 brw_SHL(p, primmask, primmask, payload_prim); 769 770 brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | 771 (1<<_3DPRIM_TRISTRIP) | 772 (1<<_3DPRIM_TRIFAN) | 773 (1<<_3DPRIM_TRISTRIP_REVERSE) | 774 (1<<_3DPRIM_POLYGON) | 775 (1<<_3DPRIM_RECTLIST) | 776 (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); 777 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); 778 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; 779 brw_emit_tri_setup(c, false); 780 brw_land_fwd_jump(p, jmp); 781 782 brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | 783 (1<<_3DPRIM_LINESTRIP) | 784 (1<<_3DPRIM_LINELOOP) | 785 (1<<_3DPRIM_LINESTRIP_CONT) | 786 (1<<_3DPRIM_LINESTRIP_BF) | 787 (1<<_3DPRIM_LINESTRIP_CONT_BF))); 788 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); 789 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; 790 brw_emit_line_setup(c, false); 791 brw_land_fwd_jump(p, jmp); 792 793 brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); 794 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); 795 jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; 796 brw_emit_point_sprite_setup(c, false); 797 brw_land_fwd_jump(p, jmp); 798 799 brw_emit_point_setup( c, false ); 800} 801 802const unsigned * 803brw_compile_sf(const struct brw_compiler *compiler, 804 void *mem_ctx, 805 const struct brw_sf_prog_key *key, 806 struct brw_sf_prog_data *prog_data, 807 struct brw_vue_map *vue_map, 808 unsigned *final_assembly_size) 809{ 810 struct brw_sf_compile c; 811 memset(&c, 0, sizeof(c)); 812 813 /* Begin the compilation: 814 */ 815 brw_init_codegen(compiler->devinfo, &c.func, mem_ctx); 816 817 c.key = *key; 818 c.vue_map = *vue_map; 819 if (c.key.do_point_coord) { 820 /* 821 * gl_PointCoord is a FS instead of VS builtin variable, thus it's 822 * not included in c.vue_map generated in VS stage. Here we add 823 * it manually to let SF shader generate the needed interpolation 824 * coefficient for FS shader. 825 */ 826 c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots; 827 c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC; 828 } 829 c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; 830 c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; 831 c.nr_setup_regs = c.nr_attr_regs; 832 833 c.prog_data.urb_read_length = c.nr_attr_regs; 834 c.prog_data.urb_entry_size = c.nr_setup_regs * 2; 835 836 /* Which primitive? Or all three? 837 */ 838 switch (key->primitive) { 839 case BRW_SF_PRIM_TRIANGLES: 840 c.nr_verts = 3; 841 brw_emit_tri_setup( &c, true ); 842 break; 843 case BRW_SF_PRIM_LINES: 844 c.nr_verts = 2; 845 brw_emit_line_setup( &c, true ); 846 break; 847 case BRW_SF_PRIM_POINTS: 848 c.nr_verts = 1; 849 if (key->do_point_sprite) 850 brw_emit_point_sprite_setup( &c, true ); 851 else 852 brw_emit_point_setup( &c, true ); 853 break; 854 case BRW_SF_PRIM_UNFILLED_TRIS: 855 c.nr_verts = 3; 856 brw_emit_anyprim_setup( &c ); 857 break; 858 default: 859 unreachable("not reached"); 860 } 861 862 /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register 863 * source). Compacting would be difficult. 864 */ 865 /* brw_compact_instructions(&c.func, 0, 0, NULL); */ 866 867 *prog_data = c.prog_data; 868 869 const unsigned *program = brw_get_program(&c.func, final_assembly_size); 870 871 if (unlikely(INTEL_DEBUG & DEBUG_SF)) { 872 fprintf(stderr, "sf:\n"); 873 brw_disassemble(compiler->devinfo, 874 program, 0, *final_assembly_size, stderr); 875 fprintf(stderr, "\n"); 876 } 877 878 return program; 879} 880