1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keithw@vmware.com> 30 */ 31 32#include "brw_compiler.h" 33#include "brw_eu.h" 34 35#include "dev/intel_debug.h" 36 37#define MAX_GS_VERTS (4) 38 39struct brw_ff_gs_compile { 40 struct brw_codegen func; 41 struct brw_ff_gs_prog_key key; 42 struct brw_ff_gs_prog_data *prog_data; 43 44 struct { 45 struct brw_reg R0; 46 47 /** 48 * Register holding streamed vertex buffer pointers -- see the Sandy 49 * Bridge PRM, volume 2 part 1, section 4.4.2 (GS Thread Payload 50 * [DevSNB]). These pointers are delivered in GRF 1. 51 */ 52 struct brw_reg SVBI; 53 54 struct brw_reg vertex[MAX_GS_VERTS]; 55 struct brw_reg header; 56 struct brw_reg temp; 57 58 /** 59 * Register holding destination indices for streamed buffer writes. 60 * Only used for SOL programs. 61 */ 62 struct brw_reg destination_indices; 63 } reg; 64 65 /* Number of registers used to store vertex data */ 66 GLuint nr_regs; 67 68 struct brw_vue_map vue_map; 69}; 70 71/** 72 * Allocate registers for GS. 73 * 74 * If sol_program is true, then: 75 * 76 * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF 77 * 1 needs to be set aside to hold the streamed vertex buffer indices. 78 * 79 * - The thread will need to use the destination_indices register. 80 */ 81static void brw_ff_gs_alloc_regs(struct brw_ff_gs_compile *c, 82 GLuint nr_verts, 83 bool sol_program) 84{ 85 GLuint i = 0,j; 86 87 /* Register usage is static, precompute here: 88 */ 89 c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; 90 91 /* Streamed vertex buffer indices */ 92 if (sol_program) 93 c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); 94 95 /* Payload vertices plus space for more generated vertices: 96 */ 97 for (j = 0; j < nr_verts; j++) { 98 c->reg.vertex[j] = brw_vec4_grf(i, 0); 99 i += c->nr_regs; 100 } 101 102 c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); 103 c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); 104 105 if (sol_program) { 106 c->reg.destination_indices = 107 retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD); 108 } 109 110 c->prog_data->urb_read_length = c->nr_regs; 111 c->prog_data->total_grf = i; 112} 113 114 115/** 116 * Set up the initial value of c->reg.header register based on c->reg.R0. 117 * 118 * The following information is passed to the GS thread in R0, and needs to be 119 * included in the first URB_WRITE or FF_SYNC message sent by the GS: 120 * 121 * - DWORD 0 [31:0] handle info (Gen4 only) 122 * - DWORD 5 [7:0] FFTID 123 * - DWORD 6 [31:0] Debug info 124 * - DWORD 7 [31:0] Debug info 125 * 126 * This function sets up the above data by copying by copying the contents of 127 * R0 to the header register. 128 */ 129static void brw_ff_gs_initialize_header(struct brw_ff_gs_compile *c) 130{ 131 struct brw_codegen *p = &c->func; 132 brw_MOV(p, c->reg.header, c->reg.R0); 133} 134 135/** 136 * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value. 137 * 138 * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart, 139 * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we 140 * need to be able to update on a per-vertex basis. 141 */ 142static void brw_ff_gs_overwrite_header_dw2(struct brw_ff_gs_compile *c, 143 unsigned dw2) 144{ 145 struct brw_codegen *p = &c->func; 146 brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2)); 147} 148 149/** 150 * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0. 151 * 152 * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0 153 * of DWORD 2. URB_WRITE messages need the primitive type in bits 6:2 of 154 * DWORD 2. So this function extracts the primitive type field, bitshifts it 155 * appropriately, and stores it in c->reg.header. 156 */ 157static void brw_ff_gs_overwrite_header_dw2_from_r0(struct brw_ff_gs_compile *c) 158{ 159 struct brw_codegen *p = &c->func; 160 brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2), 161 brw_imm_ud(0x1f)); 162 brw_SHL(p, get_element_ud(c->reg.header, 2), 163 get_element_ud(c->reg.header, 2), brw_imm_ud(2)); 164} 165 166/** 167 * Apply an additive offset to DWORD 2 of c->reg.header. 168 * 169 * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately 170 * for each vertex. 171 */ 172static void brw_ff_gs_offset_header_dw2(struct brw_ff_gs_compile *c, 173 int offset) 174{ 175 struct brw_codegen *p = &c->func; 176 brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2), 177 brw_imm_d(offset)); 178} 179 180 181/** 182 * Emit a vertex using the URB_WRITE message. Use the contents of 183 * c->reg.header for the message header, and the registers starting at \c vert 184 * for the vertex data. 185 * 186 * If \c last is true, then this is the last vertex, so no further URB space 187 * should be allocated, and this message should end the thread. 188 * 189 * If \c last is false, then a new URB entry will be allocated, and its handle 190 * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE 191 * message. 192 */ 193static void brw_ff_gs_emit_vue(struct brw_ff_gs_compile *c, 194 struct brw_reg vert, 195 bool last) 196{ 197 struct brw_codegen *p = &c->func; 198 int write_offset = 0; 199 bool complete = false; 200 201 do { 202 /* We can't write more than 14 registers at a time to the URB */ 203 int write_len = MIN2(c->nr_regs - write_offset, 14); 204 if (write_len == c->nr_regs - write_offset) 205 complete = true; 206 207 /* Copy the vertex from vertn into m1..mN+1: 208 */ 209 brw_copy8(p, brw_message_reg(1), offset(vert, write_offset), write_len); 210 211 /* Send the vertex data to the URB. If this is the last write for this 212 * vertex, then we mark it as complete, and either end the thread or 213 * allocate another vertex URB entry (depending whether this is the last 214 * vertex). 215 */ 216 enum brw_urb_write_flags flags; 217 if (!complete) 218 flags = BRW_URB_WRITE_NO_FLAGS; 219 else if (last) 220 flags = BRW_URB_WRITE_EOT_COMPLETE; 221 else 222 flags = BRW_URB_WRITE_ALLOCATE_COMPLETE; 223 brw_urb_WRITE(p, 224 (flags & BRW_URB_WRITE_ALLOCATE) ? c->reg.temp 225 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 226 0, 227 c->reg.header, 228 flags, 229 write_len + 1, /* msg length */ 230 (flags & BRW_URB_WRITE_ALLOCATE) ? 1 231 : 0, /* response length */ 232 write_offset, /* urb offset */ 233 BRW_URB_SWIZZLE_NONE); 234 write_offset += write_len; 235 } while (!complete); 236 237 if (!last) { 238 brw_MOV(p, get_element_ud(c->reg.header, 0), 239 get_element_ud(c->reg.temp, 0)); 240 } 241} 242 243/** 244 * Send an FF_SYNC message to ensure that all previously spawned GS threads 245 * have finished sending primitives down the pipeline, and to allocate a URB 246 * entry for the first output vertex. Only needed on Ironlake+. 247 * 248 * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which 249 * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to 250 * the allocated URB entry (which will be needed by the URB_WRITE meesage that 251 * follows). 252 */ 253static void brw_ff_gs_ff_sync(struct brw_ff_gs_compile *c, int num_prim) 254{ 255 struct brw_codegen *p = &c->func; 256 257 brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim)); 258 brw_ff_sync(p, 259 c->reg.temp, 260 0, 261 c->reg.header, 262 1, /* allocate */ 263 1, /* response length */ 264 0 /* eot */); 265 brw_MOV(p, get_element_ud(c->reg.header, 0), 266 get_element_ud(c->reg.temp, 0)); 267} 268 269 270static void 271brw_ff_gs_quads(struct brw_ff_gs_compile *c, 272 const struct brw_ff_gs_prog_key *key) 273{ 274 brw_ff_gs_alloc_regs(c, 4, false); 275 brw_ff_gs_initialize_header(c); 276 /* Use polygons for correct edgeflag behaviour. Note that vertex 3 277 * is the PV for quads, but vertex 0 for polygons: 278 */ 279 if (c->func.devinfo->ver == 5) 280 brw_ff_gs_ff_sync(c, 1); 281 brw_ff_gs_overwrite_header_dw2( 282 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) 283 | URB_WRITE_PRIM_START)); 284 if (key->pv_first) { 285 brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); 286 brw_ff_gs_overwrite_header_dw2( 287 c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); 288 brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0); 289 brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0); 290 brw_ff_gs_overwrite_header_dw2( 291 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) 292 | URB_WRITE_PRIM_END)); 293 brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1); 294 } 295 else { 296 brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0); 297 brw_ff_gs_overwrite_header_dw2( 298 c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); 299 brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); 300 brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0); 301 brw_ff_gs_overwrite_header_dw2( 302 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) 303 | URB_WRITE_PRIM_END)); 304 brw_ff_gs_emit_vue(c, c->reg.vertex[2], 1); 305 } 306} 307 308static void 309brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c, 310 const struct brw_ff_gs_prog_key *key) 311{ 312 brw_ff_gs_alloc_regs(c, 4, false); 313 brw_ff_gs_initialize_header(c); 314 315 if (c->func.devinfo->ver == 5) 316 brw_ff_gs_ff_sync(c, 1); 317 brw_ff_gs_overwrite_header_dw2( 318 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) 319 | URB_WRITE_PRIM_START)); 320 if (key->pv_first) { 321 brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); 322 brw_ff_gs_overwrite_header_dw2( 323 c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); 324 brw_ff_gs_emit_vue(c, c->reg.vertex[1], 0); 325 brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0); 326 brw_ff_gs_overwrite_header_dw2( 327 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) 328 | URB_WRITE_PRIM_END)); 329 brw_ff_gs_emit_vue(c, c->reg.vertex[3], 1); 330 } 331 else { 332 brw_ff_gs_emit_vue(c, c->reg.vertex[2], 0); 333 brw_ff_gs_overwrite_header_dw2( 334 c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); 335 brw_ff_gs_emit_vue(c, c->reg.vertex[3], 0); 336 brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); 337 brw_ff_gs_overwrite_header_dw2( 338 c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) 339 | URB_WRITE_PRIM_END)); 340 brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1); 341 } 342} 343 344static void brw_ff_gs_lines(struct brw_ff_gs_compile *c) 345{ 346 brw_ff_gs_alloc_regs(c, 2, false); 347 brw_ff_gs_initialize_header(c); 348 349 if (c->func.devinfo->ver == 5) 350 brw_ff_gs_ff_sync(c, 1); 351 brw_ff_gs_overwrite_header_dw2( 352 c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) 353 | URB_WRITE_PRIM_START)); 354 brw_ff_gs_emit_vue(c, c->reg.vertex[0], 0); 355 brw_ff_gs_overwrite_header_dw2( 356 c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) 357 | URB_WRITE_PRIM_END)); 358 brw_ff_gs_emit_vue(c, c->reg.vertex[1], 1); 359} 360 361/** 362 * Generate the geometry shader program used on Gen6 to perform stream output 363 * (transform feedback). 364 */ 365static void 366gfx6_sol_program(struct brw_ff_gs_compile *c, const struct brw_ff_gs_prog_key *key, 367 unsigned num_verts, bool check_edge_flags) 368{ 369 struct brw_codegen *p = &c->func; 370 brw_inst *inst; 371 c->prog_data->svbi_postincrement_value = num_verts; 372 373 brw_ff_gs_alloc_regs(c, num_verts, true); 374 brw_ff_gs_initialize_header(c); 375 376 if (key->num_transform_feedback_bindings > 0) { 377 unsigned vertex, binding; 378 struct brw_reg destination_indices_uw = 379 vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); 380 381 /* Note: since we use the binding table to keep track of buffer offsets 382 * and stride, the GS doesn't need to keep track of a separate pointer 383 * into each buffer; it uses a single pointer which increments by 1 for 384 * each vertex. So we use SVBI0 for this pointer, regardless of whether 385 * transform feedback is in interleaved or separate attribs mode. 386 * 387 * Make sure that the buffers have enough room for all the vertices. 388 */ 389 brw_ADD(p, get_element_ud(c->reg.temp, 0), 390 get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); 391 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, 392 get_element_ud(c->reg.temp, 0), 393 get_element_ud(c->reg.SVBI, 4)); 394 brw_IF(p, BRW_EXECUTE_1); 395 396 /* Compute the destination indices to write to. Usually we use SVBI[0] 397 * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the 398 * vertices come down the pipeline in reversed winding order, so we need 399 * to flip the order when writing to the transform feedback buffer. To 400 * ensure that flatshading accuracy is preserved, we need to write them 401 * in order SVBI[0] + (0, 2, 1) if we're using the first provoking 402 * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using 403 * the last provoking vertex convention. 404 * 405 * Note: since brw_imm_v can only be used in instructions in 406 * packed-word execution mode, and SVBI is a double-word, we need to 407 * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), 408 * or (1, 0, 2)) to the destination_indices register, and then add SVBI 409 * using a separate instruction. Also, since the immediate constant is 410 * expressed as packed words, and we need to load double-words into 411 * destination_indices, we need to intersperse zeros to fill the upper 412 * halves of each double-word. 413 */ 414 brw_MOV(p, destination_indices_uw, 415 brw_imm_v(0x00020100)); /* (0, 1, 2) */ 416 if (num_verts == 3) { 417 /* Get primitive type into temp register. */ 418 brw_AND(p, get_element_ud(c->reg.temp, 0), 419 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); 420 421 /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as 422 * an 8-wide comparison so that the conditional MOV that follows 423 * moves all 8 words correctly. 424 */ 425 brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, 426 get_element_ud(c->reg.temp, 0), 427 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); 428 429 /* If so, then overwrite destination_indices_uw with the appropriate 430 * reordering. 431 */ 432 inst = brw_MOV(p, destination_indices_uw, 433 brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ 434 : 0x00020001)); /* (1, 0, 2) */ 435 brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL); 436 } 437 438 assert(c->reg.destination_indices.width == BRW_EXECUTE_4); 439 brw_push_insn_state(p); 440 brw_set_default_exec_size(p, BRW_EXECUTE_4); 441 brw_ADD(p, c->reg.destination_indices, 442 c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); 443 brw_pop_insn_state(p); 444 /* For each vertex, generate code to output each varying using the 445 * appropriate binding table entry. 446 */ 447 for (vertex = 0; vertex < num_verts; ++vertex) { 448 /* Set up the correct destination index for this vertex */ 449 brw_MOV(p, get_element_ud(c->reg.header, 5), 450 get_element_ud(c->reg.destination_indices, vertex)); 451 452 for (binding = 0; binding < key->num_transform_feedback_bindings; 453 ++binding) { 454 unsigned char varying = 455 key->transform_feedback_bindings[binding]; 456 unsigned char slot = c->vue_map.varying_to_slot[varying]; 457 /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: 458 * 459 * "Prior to End of Thread with a URB_WRITE, the kernel must 460 * ensure that all writes are complete by sending the final 461 * write as a committed write." 462 */ 463 bool final_write = 464 binding == key->num_transform_feedback_bindings - 1 && 465 vertex == num_verts - 1; 466 struct brw_reg vertex_slot = c->reg.vertex[vertex]; 467 vertex_slot.nr += slot / 2; 468 vertex_slot.subnr = (slot % 2) * 16; 469 /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ 470 vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ 471 ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; 472 brw_set_default_access_mode(p, BRW_ALIGN_16); 473 brw_push_insn_state(p); 474 brw_set_default_exec_size(p, BRW_EXECUTE_4); 475 476 brw_MOV(p, stride(c->reg.header, 4, 4, 1), 477 retype(vertex_slot, BRW_REGISTER_TYPE_UD)); 478 brw_pop_insn_state(p); 479 480 brw_set_default_access_mode(p, BRW_ALIGN_1); 481 brw_svb_write(p, 482 final_write ? c->reg.temp : brw_null_reg(), /* dest */ 483 1, /* msg_reg_nr */ 484 c->reg.header, /* src0 */ 485 BRW_GFX6_SOL_BINDING_START + binding, /* binding_table_index */ 486 final_write); /* send_commit_msg */ 487 } 488 } 489 brw_ENDIF(p); 490 491 /* Now, reinitialize the header register from R0 to restore the parts of 492 * the register that we overwrote while streaming out transform feedback 493 * data. 494 */ 495 brw_ff_gs_initialize_header(c); 496 497 /* Finally, wait for the write commit to occur so that we can proceed to 498 * other things safely. 499 * 500 * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: 501 * 502 * The write commit does not modify the destination register, but 503 * merely clears the dependency associated with the destination 504 * register. Thus, a simple “mov” instruction using the register as a 505 * source is sufficient to wait for the write commit to occur. 506 */ 507 brw_MOV(p, c->reg.temp, c->reg.temp); 508 } 509 510 brw_ff_gs_ff_sync(c, 1); 511 512 brw_ff_gs_overwrite_header_dw2_from_r0(c); 513 switch (num_verts) { 514 case 1: 515 brw_ff_gs_offset_header_dw2(c, 516 URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); 517 brw_ff_gs_emit_vue(c, c->reg.vertex[0], true); 518 break; 519 case 2: 520 brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); 521 brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); 522 brw_ff_gs_offset_header_dw2(c, 523 URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); 524 brw_ff_gs_emit_vue(c, c->reg.vertex[1], true); 525 break; 526 case 3: 527 if (check_edge_flags) { 528 /* Only emit vertices 0 and 1 if this is the first triangle of the 529 * polygon. Otherwise they are redundant. 530 */ 531 brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 532 get_element_ud(c->reg.R0, 2), 533 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); 534 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); 535 brw_IF(p, BRW_EXECUTE_1); 536 } 537 brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); 538 brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); 539 brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); 540 brw_ff_gs_emit_vue(c, c->reg.vertex[1], false); 541 if (check_edge_flags) { 542 brw_ENDIF(p); 543 /* Only emit vertex 2 in PRIM_END mode if this is the last triangle 544 * of the polygon. Otherwise leave the primitive incomplete because 545 * there are more polygon vertices coming. 546 */ 547 brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 548 get_element_ud(c->reg.R0, 2), 549 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); 550 brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); 551 brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); 552 } 553 brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); 554 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 555 brw_ff_gs_emit_vue(c, c->reg.vertex[2], true); 556 break; 557 } 558} 559 560const unsigned * 561brw_compile_ff_gs_prog(struct brw_compiler *compiler, 562 void *mem_ctx, 563 const struct brw_ff_gs_prog_key *key, 564 struct brw_ff_gs_prog_data *prog_data, 565 struct brw_vue_map *vue_map, 566 unsigned *final_assembly_size) 567{ 568 struct brw_ff_gs_compile c; 569 const GLuint *program; 570 571 memset(&c, 0, sizeof(c)); 572 573 c.key = *key; 574 c.vue_map = *vue_map; 575 c.nr_regs = (c.vue_map.num_slots + 1)/2; 576 c.prog_data = prog_data; 577 578 mem_ctx = ralloc_context(NULL); 579 580 /* Begin the compilation: 581 */ 582 brw_init_codegen(compiler->devinfo, &c.func, mem_ctx); 583 584 c.func.single_program_flow = 1; 585 586 /* For some reason the thread is spawned with only 4 channels 587 * unmasked. 588 */ 589 brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); 590 591 if (compiler->devinfo->ver >= 6) { 592 unsigned num_verts; 593 bool check_edge_flag; 594 /* On Sandybridge, we use the GS for implementing transform feedback 595 * (called "Stream Out" in the PRM). 596 */ 597 switch (key->primitive) { 598 case _3DPRIM_POINTLIST: 599 num_verts = 1; 600 check_edge_flag = false; 601 break; 602 case _3DPRIM_LINELIST: 603 case _3DPRIM_LINESTRIP: 604 case _3DPRIM_LINELOOP: 605 num_verts = 2; 606 check_edge_flag = false; 607 break; 608 case _3DPRIM_TRILIST: 609 case _3DPRIM_TRIFAN: 610 case _3DPRIM_TRISTRIP: 611 case _3DPRIM_RECTLIST: 612 num_verts = 3; 613 check_edge_flag = false; 614 break; 615 case _3DPRIM_QUADLIST: 616 case _3DPRIM_QUADSTRIP: 617 case _3DPRIM_POLYGON: 618 num_verts = 3; 619 check_edge_flag = true; 620 break; 621 default: 622 unreachable("Unexpected primitive type in Gen6 SOL program."); 623 } 624 gfx6_sol_program(&c, key, num_verts, check_edge_flag); 625 } else { 626 /* On Gen4-5, we use the GS to decompose certain types of primitives. 627 * Note that primitives which don't require a GS program have already 628 * been weeded out by now. 629 */ 630 switch (key->primitive) { 631 case _3DPRIM_QUADLIST: 632 brw_ff_gs_quads( &c, key ); 633 break; 634 case _3DPRIM_QUADSTRIP: 635 brw_ff_gs_quad_strip( &c, key ); 636 break; 637 case _3DPRIM_LINELOOP: 638 brw_ff_gs_lines( &c ); 639 break; 640 default: 641 return NULL; 642 } 643 } 644 645 brw_compact_instructions(&c.func, 0, NULL); 646 647 /* get the program 648 */ 649 program = brw_get_program(&c.func, final_assembly_size); 650 651 if (INTEL_DEBUG(DEBUG_GS)) { 652 fprintf(stderr, "gs:\n"); 653 brw_disassemble_with_labels(compiler->devinfo, c.func.store, 654 0, *final_assembly_size, stderr); 655 fprintf(stderr, "\n"); 656 } 657 658 return program; 659} 660 661