draw_gs.c revision 7ec681f3
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "draw_gs.h" 29 30#include "draw_private.h" 31#include "draw_context.h" 32#ifdef DRAW_LLVM_AVAILABLE 33#include "draw_llvm.h" 34#endif 35 36#include "tgsi/tgsi_parse.h" 37#include "tgsi/tgsi_exec.h" 38#include "nir/nir_to_tgsi_info.h" 39#include "pipe/p_shader_tokens.h" 40 41#include "util/u_math.h" 42#include "util/u_memory.h" 43#include "util/u_prim.h" 44#include "util/ralloc.h" 45/* fixme: move it from here */ 46#define MAX_PRIMITIVES 64 47 48static inline int 49draw_gs_get_input_index(int semantic, int index, 50 const struct tgsi_shader_info *input_info) 51{ 52 int i; 53 const ubyte *input_semantic_names = input_info->output_semantic_name; 54 const ubyte *input_semantic_indices = input_info->output_semantic_index; 55 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 56 if (input_semantic_names[i] == semantic && 57 input_semantic_indices[i] == index) 58 return i; 59 } 60 return -1; 61} 62 63/** 64 * We execute geometry shaders in the SOA mode, so ideally we want to 65 * flush when the number of currently fetched primitives is equal to 66 * the number of elements in the SOA vector. This ensures that the 67 * throughput is optimized for the given vector instruction set. 68 */ 69static inline boolean 70draw_gs_should_flush(struct draw_geometry_shader *shader) 71{ 72 return (shader->fetched_prim_count == shader->vector_length || shader->num_invocations > 1); 73} 74 75/*#define DEBUG_OUTPUTS 1*/ 76static void 77tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, 78 unsigned stream, 79 unsigned num_primitives, 80 float (**p_output)[4]) 81{ 82 struct tgsi_exec_machine *machine = shader->machine; 83 unsigned prim_idx, j, slot; 84 float (*output)[4]; 85 86 output = *p_output; 87 88 /* Unswizzle all output results. 89 */ 90 91 for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { 92 unsigned num_verts_per_prim = machine->Primitives[stream][prim_idx]; 93 unsigned prim_offset = machine->PrimitiveOffsets[stream][prim_idx]; 94 shader->stream[stream].primitive_lengths[prim_idx + shader->stream[stream].emitted_primitives] = 95 machine->Primitives[stream][prim_idx]; 96 shader->stream[stream].emitted_vertices += num_verts_per_prim; 97 98 for (j = 0; j < num_verts_per_prim; j++) { 99 int idx = prim_offset + j * shader->info.num_outputs; 100#ifdef DEBUG_OUTPUTS 101 debug_printf("%d/%d) Output vert:\n", stream, idx / shader->info.num_outputs); 102#endif 103 for (slot = 0; slot < shader->info.num_outputs; slot++) { 104 output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0]; 105 output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0]; 106 output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0]; 107 output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0]; 108#ifdef DEBUG_OUTPUTS 109 debug_printf("\t%d: %f %f %f %f\n", slot, 110 output[slot][0], 111 output[slot][1], 112 output[slot][2], 113 output[slot][3]); 114#endif 115 } 116 output = (float (*)[4])((char *)output + shader->vertex_size); 117 } 118 } 119 *p_output = output; 120 shader->stream[stream].emitted_primitives += num_primitives; 121} 122 123/*#define DEBUG_INPUTS 1*/ 124static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader, 125 unsigned *indices, 126 unsigned num_vertices, 127 unsigned prim_idx) 128{ 129 struct tgsi_exec_machine *machine = shader->machine; 130 unsigned slot, i; 131 int vs_slot; 132 unsigned input_vertex_stride = shader->input_vertex_stride; 133 const float (*input_ptr)[4]; 134 135 int primid_sv = machine->SysSemanticToIndex[TGSI_SEMANTIC_PRIMID]; 136 if (primid_sv != -1) { 137 for (unsigned j = 0; j < TGSI_QUAD_SIZE; j++) 138 machine->SystemValue[primid_sv].xyzw[0].i[j] = shader->in_prim_idx; 139 } 140 141 input_ptr = shader->input; 142 143 for (i = 0; i < num_vertices; ++i) { 144 const float (*input)[4]; 145#if DEBUG_INPUTS 146 debug_printf("%d) vertex index = %d (prim idx = %d)\n", 147 i, indices[i], prim_idx); 148#endif 149 input = (const float (*)[4])( 150 (const char *)input_ptr + (indices[i] * input_vertex_stride)); 151 for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { 152 unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; 153 if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { 154 machine->Inputs[idx].xyzw[0].u[prim_idx] = shader->in_prim_idx; 155 machine->Inputs[idx].xyzw[1].u[prim_idx] = shader->in_prim_idx; 156 machine->Inputs[idx].xyzw[2].u[prim_idx] = shader->in_prim_idx; 157 machine->Inputs[idx].xyzw[3].u[prim_idx] = shader->in_prim_idx; 158 } else { 159 vs_slot = draw_gs_get_input_index( 160 shader->info.input_semantic_name[slot], 161 shader->info.input_semantic_index[slot], 162 shader->input_info); 163 if (vs_slot < 0) { 164 debug_printf("VS/GS signature mismatch!\n"); 165 machine->Inputs[idx].xyzw[0].f[prim_idx] = 0; 166 machine->Inputs[idx].xyzw[1].f[prim_idx] = 0; 167 machine->Inputs[idx].xyzw[2].f[prim_idx] = 0; 168 machine->Inputs[idx].xyzw[3].f[prim_idx] = 0; 169 } else { 170#if DEBUG_INPUTS 171 debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", 172 slot, vs_slot, idx); 173 assert(!util_is_inf_or_nan(input[vs_slot][0])); 174 assert(!util_is_inf_or_nan(input[vs_slot][1])); 175 assert(!util_is_inf_or_nan(input[vs_slot][2])); 176 assert(!util_is_inf_or_nan(input[vs_slot][3])); 177#endif 178 machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0]; 179 machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1]; 180 machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2]; 181 machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3]; 182#if DEBUG_INPUTS 183 debug_printf("\t\t%f %f %f %f\n", 184 machine->Inputs[idx].xyzw[0].f[prim_idx], 185 machine->Inputs[idx].xyzw[1].f[prim_idx], 186 machine->Inputs[idx].xyzw[2].f[prim_idx], 187 machine->Inputs[idx].xyzw[3].f[prim_idx]); 188#endif 189 ++vs_slot; 190 } 191 } 192 } 193 } 194} 195 196static void tgsi_gs_prepare(struct draw_geometry_shader *shader, 197 const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 198 const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) 199{ 200 struct tgsi_exec_machine *machine = shader->machine; 201 tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, 202 constants, constants_size); 203} 204 205static void tgsi_gs_run(struct draw_geometry_shader *shader, 206 unsigned input_primitives, 207 unsigned *out_prims) 208{ 209 struct tgsi_exec_machine *machine = shader->machine; 210 int i; 211 212 if (shader->info.uses_invocationid) { 213 unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INVOCATIONID]; 214 for (int j = 0; j < TGSI_QUAD_SIZE; j++) 215 machine->SystemValue[i].xyzw[0].i[j] = shader->invocation_id; 216 } 217 218 /* run interpreter */ 219 tgsi_exec_machine_run(machine, 0); 220 221 for (i = 0; i < 4; i++) 222 out_prims[i] = machine->OutputPrimCount[i]; 223} 224 225#ifdef DRAW_LLVM_AVAILABLE 226 227static void 228llvm_fetch_gs_input(struct draw_geometry_shader *shader, 229 unsigned *indices, 230 unsigned num_vertices, 231 unsigned prim_idx) 232{ 233 unsigned slot, i; 234 int vs_slot; 235 unsigned input_vertex_stride = shader->input_vertex_stride; 236 const float (*input_ptr)[4]; 237 float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data; 238 239 shader->llvm_prim_ids[shader->fetched_prim_count] = shader->in_prim_idx; 240 241 input_ptr = shader->input; 242 243 for (i = 0; i < num_vertices; ++i) { 244 const float (*input)[4]; 245#if DEBUG_INPUTS 246 debug_printf("%d) vertex index = %d (prim idx = %d)\n", 247 i, indices[i], prim_idx); 248#endif 249 input = (const float (*)[4])( 250 (const char *)input_ptr + (indices[i] * input_vertex_stride)); 251 for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { 252 if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { 253 /* skip. we handle system values through gallivm */ 254 /* NOTE: If we hit this case here it's an ordinary input not a sv, 255 * even though it probably should be a sv. 256 * Not sure how to set it up as regular input however if that even, 257 * would make sense so hack around this later in gallivm. 258 */ 259 } else { 260 vs_slot = draw_gs_get_input_index( 261 shader->info.input_semantic_name[slot], 262 shader->info.input_semantic_index[slot], 263 shader->input_info); 264 if (vs_slot < 0) { 265 debug_printf("VS/GS signature mismatch!\n"); 266 (*input_data)[i][slot][0][prim_idx] = 0; 267 (*input_data)[i][slot][1][prim_idx] = 0; 268 (*input_data)[i][slot][2][prim_idx] = 0; 269 (*input_data)[i][slot][3][prim_idx] = 0; 270 } else { 271#if DEBUG_INPUTS 272 debug_printf("\tSlot = %d, vs_slot = %d, i = %d:\n", 273 slot, vs_slot, i); 274 assert(!util_is_inf_or_nan(input[vs_slot][0])); 275 assert(!util_is_inf_or_nan(input[vs_slot][1])); 276 assert(!util_is_inf_or_nan(input[vs_slot][2])); 277 assert(!util_is_inf_or_nan(input[vs_slot][3])); 278#endif 279 (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0]; 280 (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1]; 281 (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2]; 282 (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3]; 283#if DEBUG_INPUTS 284 debug_printf("\t\t%f %f %f %f\n", 285 (*input_data)[i][slot][0][prim_idx], 286 (*input_data)[i][slot][1][prim_idx], 287 (*input_data)[i][slot][2][prim_idx], 288 (*input_data)[i][slot][3][prim_idx]); 289#endif 290 ++vs_slot; 291 } 292 } 293 } 294 } 295} 296 297static void 298llvm_fetch_gs_outputs(struct draw_geometry_shader *shader, 299 unsigned stream, 300 unsigned num_primitives, 301 float (**p_output)[4]) 302{ 303 int total_verts = 0; 304 int vertex_count = 0; 305 int total_prims = 0; 306 int max_prims_per_invocation = 0; 307 char *output_ptr = (char*)shader->gs_output[stream]; 308 int i, j, prim_idx; 309 unsigned next_prim_boundary = shader->primitive_boundary; 310 311 for (i = 0; i < shader->vector_length; ++i) { 312 int prims = shader->llvm_emitted_primitives[i + (stream * shader->vector_length)]; 313 total_prims += prims; 314 max_prims_per_invocation = MAX2(max_prims_per_invocation, prims); 315 } 316 for (i = 0; i < shader->vector_length; ++i) { 317 total_verts += shader->llvm_emitted_vertices[i + (stream * shader->vector_length)]; 318 } 319 320 output_ptr += shader->stream[stream].emitted_vertices * shader->vertex_size; 321 for (i = 0; i < shader->vector_length - 1; ++i) { 322 int current_verts = shader->llvm_emitted_vertices[i + (stream * shader->vector_length)]; 323 int next_verts = shader->llvm_emitted_vertices[i + 1 + (stream * shader->vector_length)]; 324#if 0 325 int j; 326 for (j = 0; j < current_verts; ++j) { 327 struct vertex_header *vh = (struct vertex_header *) 328 (output_ptr + shader->vertex_size * (i * next_prim_boundary + j)); 329 debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count, 330 vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]); 331 332 } 333#endif 334 debug_assert(current_verts <= shader->max_output_vertices); 335 debug_assert(next_verts <= shader->max_output_vertices); 336 if (next_verts) { 337 memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size, 338 output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size, 339 shader->vertex_size * next_verts); 340 } 341 vertex_count += current_verts; 342 } 343 344#if 0 345 { 346 int i; 347 for (i = 0; i < total_verts; ++i) { 348 struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i); 349 debug_printf("%d) Vertex:\n", i); 350 for (j = 0; j < shader->info.num_outputs; ++j) { 351 unsigned *udata = (unsigned*)vh->data[j]; 352 debug_printf(" %d) [%f, %f, %f, %f] [%d, %d, %d, %d]\n", j, 353 vh->data[j][0], vh->data[j][1], vh->data[j][2], vh->data[j][3], 354 udata[0], udata[1], udata[2], udata[3]); 355 } 356 357 } 358 } 359#endif 360 361 prim_idx = 0; 362 for (i = 0; i < shader->vector_length; ++i) { 363 int num_prims = shader->llvm_emitted_primitives[i + (stream * shader->vector_length)]; 364 for (j = 0; j < num_prims; ++j) { 365 int prim_length = 366 shader->llvm_prim_lengths[j * shader->num_vertex_streams + stream][i]; 367 shader->stream[stream].primitive_lengths[shader->stream[stream].emitted_primitives + prim_idx] = 368 prim_length; 369 ++prim_idx; 370 } 371 } 372 373 shader->stream[stream].emitted_primitives += total_prims; 374 shader->stream[stream].emitted_vertices += total_verts; 375} 376 377static void 378llvm_gs_prepare(struct draw_geometry_shader *shader, 379 const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 380 const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) 381{ 382} 383 384static void 385llvm_gs_run(struct draw_geometry_shader *shader, 386 unsigned input_primitives, unsigned *out_prims) 387{ 388 struct vertex_header *input[PIPE_MAX_VERTEX_STREAMS]; 389 for (unsigned i = 0; i < shader->num_vertex_streams; i++) { 390 char *tmp = (char *)shader->gs_output[i]; 391 tmp += shader->stream[i].emitted_vertices * shader->vertex_size; 392 input[i] = (struct vertex_header *)tmp; 393 } 394 395 shader->current_variant->jit_func( 396 shader->jit_context, shader->gs_input->data, 397 input, 398 input_primitives, 399 shader->draw->instance_id, 400 shader->llvm_prim_ids, 401 shader->invocation_id, 402 shader->draw->pt.user.viewid); 403 404 for (unsigned i = 0; i < shader->num_vertex_streams; i++) { 405 out_prims[i] = shader->jit_context->emitted_prims[i]; 406 } 407} 408 409#endif 410 411static void gs_flush(struct draw_geometry_shader *shader) 412{ 413 unsigned out_prim_count[TGSI_MAX_VERTEX_STREAMS]; 414 unsigned i; 415 unsigned input_primitives = shader->fetched_prim_count; 416 417 if (shader->draw->collect_statistics) { 418 shader->draw->statistics.gs_invocations += input_primitives; 419 } 420 421 debug_assert(input_primitives > 0 && 422 input_primitives <= 4); 423 424 for (unsigned invocation = 0; invocation < shader->num_invocations; invocation++) { 425 shader->invocation_id = invocation; 426 shader->run(shader, input_primitives, out_prim_count); 427 for (i = 0; i < shader->num_vertex_streams; i++) { 428 shader->fetch_outputs(shader, i, out_prim_count[i], 429 &shader->stream[i].tmp_output); 430 } 431 } 432 433#if 0 434 for (i = 0; i < shader->num_vertex_streams; i++) { 435 debug_printf("stream %d: PRIM emitted prims = %d (verts=%d), cur prim count = %d\n", 436 i, 437 shader->stream[i].emitted_primitives, shader->stream[i].emitted_vertices, 438 out_prim_count[i]); 439 } 440#endif 441 442 shader->fetched_prim_count = 0; 443} 444 445static void gs_point(struct draw_geometry_shader *shader, 446 int idx) 447{ 448 unsigned indices[1]; 449 450 indices[0] = idx; 451 452 shader->fetch_inputs(shader, indices, 1, 453 shader->fetched_prim_count); 454 ++shader->in_prim_idx; 455 ++shader->fetched_prim_count; 456 457 if (draw_gs_should_flush(shader)) 458 gs_flush(shader); 459} 460 461static void gs_line(struct draw_geometry_shader *shader, 462 int i0, int i1) 463{ 464 unsigned indices[2]; 465 466 indices[0] = i0; 467 indices[1] = i1; 468 469 shader->fetch_inputs(shader, indices, 2, 470 shader->fetched_prim_count); 471 ++shader->in_prim_idx; 472 ++shader->fetched_prim_count; 473 474 if (draw_gs_should_flush(shader)) 475 gs_flush(shader); 476} 477 478static void gs_line_adj(struct draw_geometry_shader *shader, 479 int i0, int i1, int i2, int i3) 480{ 481 unsigned indices[4]; 482 483 indices[0] = i0; 484 indices[1] = i1; 485 indices[2] = i2; 486 indices[3] = i3; 487 488 shader->fetch_inputs(shader, indices, 4, 489 shader->fetched_prim_count); 490 ++shader->in_prim_idx; 491 ++shader->fetched_prim_count; 492 493 if (draw_gs_should_flush(shader)) 494 gs_flush(shader); 495} 496 497static void gs_tri(struct draw_geometry_shader *shader, 498 int i0, int i1, int i2) 499{ 500 unsigned indices[3]; 501 502 indices[0] = i0; 503 indices[1] = i1; 504 indices[2] = i2; 505 506 shader->fetch_inputs(shader, indices, 3, 507 shader->fetched_prim_count); 508 ++shader->in_prim_idx; 509 ++shader->fetched_prim_count; 510 511 if (draw_gs_should_flush(shader)) 512 gs_flush(shader); 513} 514 515static void gs_tri_adj(struct draw_geometry_shader *shader, 516 int i0, int i1, int i2, 517 int i3, int i4, int i5) 518{ 519 unsigned indices[6]; 520 521 indices[0] = i0; 522 indices[1] = i1; 523 indices[2] = i2; 524 indices[3] = i3; 525 indices[4] = i4; 526 indices[5] = i5; 527 528 shader->fetch_inputs(shader, indices, 6, 529 shader->fetched_prim_count); 530 ++shader->in_prim_idx; 531 ++shader->fetched_prim_count; 532 533 if (draw_gs_should_flush(shader)) 534 gs_flush(shader); 535} 536 537#define FUNC gs_run 538#define GET_ELT(idx) (idx) 539#include "draw_gs_tmp.h" 540 541 542#define FUNC gs_run_elts 543#define LOCAL_VARS const ushort *elts = input_prims->elts; 544#define GET_ELT(idx) (elts[idx]) 545#include "draw_gs_tmp.h" 546 547 548/** 549 * Execute geometry shader. 550 */ 551int draw_geometry_shader_run(struct draw_geometry_shader *shader, 552 const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 553 const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], 554 const struct draw_vertex_info *input_verts, 555 const struct draw_prim_info *input_prim, 556 const struct tgsi_shader_info *input_info, 557 struct draw_vertex_info *output_verts, 558 struct draw_prim_info *output_prims ) 559{ 560 const float (*input)[4] = (const float (*)[4])input_verts->verts->data; 561 unsigned input_stride = input_verts->vertex_size; 562 unsigned num_outputs = draw_total_gs_outputs(shader->draw); 563 unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float); 564 unsigned num_input_verts = input_prim->linear ? 565 input_verts->count : 566 input_prim->count; 567 unsigned num_in_primitives = 568 align( 569 MAX2(u_decomposed_prims_for_vertices(input_prim->prim, 570 num_input_verts), 571 u_decomposed_prims_for_vertices(shader->input_primitive, 572 num_input_verts)), 573 shader->vector_length); 574 unsigned max_out_prims = 575 u_decomposed_prims_for_vertices(shader->output_primitive, 576 shader->max_output_vertices) 577 * num_in_primitives; 578 /* we allocate exactly one extra vertex per primitive to allow the GS to emit 579 * overflown vertices into some area where they won't harm anyone */ 580 unsigned total_verts_per_buffer = shader->primitive_boundary * 581 num_in_primitives; 582 int i; 583 //Assume at least one primitive 584 max_out_prims = MAX2(max_out_prims, 1); 585 586 for (i = 0; i < shader->num_vertex_streams; i++) { 587 /* write all the vertex data into all the streams */ 588 output_verts[i].vertex_size = vertex_size; 589 output_verts[i].stride = output_verts[i].vertex_size; 590 output_verts[i].verts = 591 (struct vertex_header *)MALLOC(output_verts[i].vertex_size * 592 total_verts_per_buffer * shader->num_invocations + 593 DRAW_EXTRA_VERTICES_PADDING); 594 debug_assert(output_verts[i].verts); 595 } 596 597#if 0 598 debug_printf("%s count = %d (in prims # = %d, invocs = %d, streams = %d)\n", 599 __FUNCTION__, num_input_verts, num_in_primitives, 600 shader->num_invocations, shader->num_vertex_streams); 601 debug_printf("\tlinear = %d, prim_info->count = %d\n", 602 input_prim->linear, input_prim->count); 603 debug_printf("\tprim pipe = %s, shader in = %s, shader out = %s\n", 604 u_prim_name(input_prim->prim), 605 u_prim_name(shader->input_primitive), 606 u_prim_name(shader->output_primitive)); 607 debug_printf("\tmaxv = %d, maxp = %d, primitive_boundary = %d, " 608 "vertex_size = %d, tverts = %d\n", 609 shader->max_output_vertices, max_out_prims, 610 shader->primitive_boundary, output_verts->vertex_size, 611 total_verts_per_buffer); 612#endif 613 614 for (i = 0; i < shader->num_vertex_streams; i++) { 615 shader->stream[i].emitted_vertices = 0; 616 shader->stream[i].emitted_primitives = 0; 617 FREE(shader->stream[i].primitive_lengths); 618 shader->stream[i].primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * shader->num_invocations); 619 shader->stream[i].tmp_output = (float (*)[4])output_verts[i].verts->data; 620 } 621 shader->vertex_size = vertex_size; 622 shader->fetched_prim_count = 0; 623 shader->input_vertex_stride = input_stride; 624 shader->input = input; 625 shader->input_info = input_info; 626 627#ifdef DRAW_LLVM_AVAILABLE 628 if (shader->draw->llvm) { 629 for (i = 0; i < shader->num_vertex_streams; i++) { 630 shader->gs_output[i] = output_verts[i].verts; 631 } 632 if (max_out_prims > shader->max_out_prims) { 633 unsigned i; 634 if (shader->llvm_prim_lengths) { 635 for (i = 0; i < shader->num_vertex_streams * shader->max_out_prims; ++i) { 636 align_free(shader->llvm_prim_lengths[i]); 637 } 638 FREE(shader->llvm_prim_lengths); 639 } 640 641 shader->llvm_prim_lengths = MALLOC(shader->num_vertex_streams * max_out_prims * sizeof(unsigned*)); 642 for (i = 0; i < shader->num_vertex_streams * max_out_prims; ++i) { 643 int vector_size = shader->vector_length * sizeof(unsigned); 644 shader->llvm_prim_lengths[i] = 645 align_malloc(vector_size, vector_size); 646 } 647 648 shader->max_out_prims = max_out_prims; 649 } 650 shader->jit_context->prim_lengths = shader->llvm_prim_lengths; 651 shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices; 652 shader->jit_context->emitted_prims = shader->llvm_emitted_primitives; 653 } 654#endif 655 656 shader->prepare(shader, constants, constants_size); 657 658 if (input_prim->linear) 659 gs_run(shader, input_prim, input_verts, 660 output_prims, output_verts); 661 else 662 gs_run_elts(shader, input_prim, input_verts, 663 output_prims, output_verts); 664 665 /* Flush the remaining primitives. Will happen if 666 * num_input_primitives % 4 != 0 667 */ 668 if (shader->fetched_prim_count > 0) { 669 gs_flush(shader); 670 } 671 debug_assert(shader->fetched_prim_count == 0); 672 673 /* Update prim_info: 674 */ 675 for (i = 0; i < shader->num_vertex_streams; i++) { 676 output_prims[i].linear = TRUE; 677 output_prims[i].elts = NULL; 678 output_prims[i].start = 0; 679 output_prims[i].count = shader->stream[i].emitted_vertices; 680 output_prims[i].prim = shader->output_primitive; 681 output_prims[i].flags = 0x0; 682 output_prims[i].primitive_lengths = shader->stream[i].primitive_lengths; 683 output_prims[i].primitive_count = shader->stream[i].emitted_primitives; 684 output_verts[i].count = shader->stream[i].emitted_vertices; 685 686 if (shader->draw->collect_statistics) { 687 unsigned j; 688 for (j = 0; j < shader->stream[i].emitted_primitives; ++j) { 689 shader->draw->statistics.gs_primitives += 690 u_decomposed_prims_for_vertices(shader->output_primitive, 691 shader->stream[i].primitive_lengths[j]); 692 } 693 } 694 } 695 696#if 0 697 debug_printf("GS finished\n"); 698 for (i = 0; i < 4; i++) 699 debug_printf("stream %d: prims = %d verts = %d\n", i, output_prims[i].primitive_count, output_verts[i].count); 700#endif 701 702 return 0; 703} 704 705void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, 706 struct draw_context *draw) 707{ 708 boolean use_llvm = draw->llvm != NULL; 709 if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) { 710 tgsi_exec_machine_bind_shader(shader->machine, 711 shader->state.tokens, 712 draw->gs.tgsi.sampler, 713 draw->gs.tgsi.image, 714 draw->gs.tgsi.buffer); 715 } 716} 717 718 719boolean 720draw_gs_init( struct draw_context *draw ) 721{ 722 if (!draw->llvm) { 723 draw->gs.tgsi.machine = tgsi_exec_machine_create(PIPE_SHADER_GEOMETRY); 724 725 for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { 726 draw->gs.tgsi.machine->Primitives[i] = align_malloc( 727 MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); 728 draw->gs.tgsi.machine->PrimitiveOffsets[i] = align_malloc( 729 MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); 730 if (!draw->gs.tgsi.machine->Primitives[i] || !draw->gs.tgsi.machine->PrimitiveOffsets[i]) 731 return FALSE; 732 memset(draw->gs.tgsi.machine->Primitives[i], 0, 733 MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); 734 memset(draw->gs.tgsi.machine->PrimitiveOffsets[i], 0, 735 MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); 736 } 737 } 738 739 return TRUE; 740} 741 742void draw_gs_destroy( struct draw_context *draw ) 743{ 744 int i; 745 if (draw->gs.tgsi.machine) { 746 for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { 747 align_free(draw->gs.tgsi.machine->Primitives[i]); 748 align_free(draw->gs.tgsi.machine->PrimitiveOffsets[i]); 749 } 750 tgsi_exec_machine_destroy(draw->gs.tgsi.machine); 751 } 752} 753 754struct draw_geometry_shader * 755draw_create_geometry_shader(struct draw_context *draw, 756 const struct pipe_shader_state *state) 757{ 758#ifdef DRAW_LLVM_AVAILABLE 759 boolean use_llvm = draw->llvm != NULL; 760 struct llvm_geometry_shader *llvm_gs = NULL; 761#endif 762 struct draw_geometry_shader *gs; 763 unsigned i; 764 765#ifdef DRAW_LLVM_AVAILABLE 766 if (use_llvm) { 767 llvm_gs = CALLOC_STRUCT(llvm_geometry_shader); 768 769 if (!llvm_gs) 770 return NULL; 771 772 gs = &llvm_gs->base; 773 774 make_empty_list(&llvm_gs->variants); 775 } else 776#endif 777 { 778 gs = CALLOC_STRUCT(draw_geometry_shader); 779 } 780 781 if (!gs) 782 return NULL; 783 784 gs->draw = draw; 785 gs->state = *state; 786 787 if (state->type == PIPE_SHADER_IR_TGSI) { 788 gs->state.tokens = tgsi_dup_tokens(state->tokens); 789 if (!gs->state.tokens) { 790 FREE(gs); 791 return NULL; 792 } 793 794 tgsi_scan_shader(state->tokens, &gs->info); 795 } else 796 nir_tgsi_scan_shader(state->ir.nir, &gs->info, true); 797 798 /* setup the defaults */ 799 gs->max_out_prims = 0; 800 801#ifdef DRAW_LLVM_AVAILABLE 802 if (use_llvm) { 803 /* TODO: change the input array to handle the following 804 vector length, instead of the currently hardcoded 805 TGSI_NUM_CHANNELS 806 gs->vector_length = lp_native_vector_width / 32;*/ 807 gs->vector_length = TGSI_NUM_CHANNELS; 808 } else 809#endif 810 { 811 gs->vector_length = 1; 812 } 813 814 gs->input_primitive = 815 gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]; 816 gs->output_primitive = 817 gs->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; 818 gs->max_output_vertices = 819 gs->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; 820 gs->num_invocations = 821 gs->info.properties[TGSI_PROPERTY_GS_INVOCATIONS]; 822 if (!gs->max_output_vertices) 823 gs->max_output_vertices = 32; 824 825 /* Primitive boundary is bigger than max_output_vertices by one, because 826 * the specification says that the geometry shader should exit if the 827 * number of emitted vertices is bigger or equal to max_output_vertices and 828 * we can't do that because we're running in the SoA mode, which means that 829 * our storing routines will keep getting called on channels that have 830 * overflown. 831 * So we need some scratch area where we can keep writing the overflown 832 * vertices without overwriting anything important or crashing. 833 */ 834 gs->primitive_boundary = gs->max_output_vertices + 1; 835 836 gs->position_output = -1; 837 bool found_clipvertex = false; 838 for (i = 0; i < gs->info.num_outputs; i++) { 839 if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && 840 gs->info.output_semantic_index[i] == 0) 841 gs->position_output = i; 842 if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX) 843 gs->viewport_index_output = i; 844 if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPVERTEX && 845 gs->info.output_semantic_index[i] == 0) { 846 found_clipvertex = true; 847 gs->clipvertex_output = i; 848 } 849 if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) { 850 debug_assert(gs->info.output_semantic_index[i] < 851 PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT); 852 gs->ccdistance_output[gs->info.output_semantic_index[i]] = i; 853 } 854 } 855 856 if (!found_clipvertex) 857 gs->clipvertex_output = gs->position_output; 858 859 gs->machine = draw->gs.tgsi.machine; 860 861 gs->num_vertex_streams = 1; 862 for (i = 0; i < gs->state.stream_output.num_outputs; i++) { 863 if (gs->state.stream_output.output[i].stream >= gs->num_vertex_streams) 864 gs->num_vertex_streams = gs->state.stream_output.output[i].stream + 1; 865 } 866 867#ifdef DRAW_LLVM_AVAILABLE 868 if (use_llvm) { 869 int vector_size = gs->vector_length * sizeof(float); 870 gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16); 871 memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs)); 872 gs->llvm_prim_lengths = 0; 873 874 gs->llvm_emitted_primitives = align_malloc(vector_size * gs->num_vertex_streams, vector_size); 875 gs->llvm_emitted_vertices = align_malloc(vector_size * gs->num_vertex_streams, vector_size); 876 gs->llvm_prim_ids = align_calloc(vector_size, vector_size); 877 878 gs->fetch_outputs = llvm_fetch_gs_outputs; 879 gs->fetch_inputs = llvm_fetch_gs_input; 880 gs->prepare = llvm_gs_prepare; 881 gs->run = llvm_gs_run; 882 883 gs->jit_context = &draw->llvm->gs_jit_context; 884 885 886 llvm_gs->variant_key_size = 887 draw_gs_llvm_variant_key_size( 888 MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1, 889 gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1), 890 gs->info.file_max[TGSI_FILE_IMAGE]+1); 891 } else 892#endif 893 { 894 gs->fetch_outputs = tgsi_fetch_gs_outputs; 895 gs->fetch_inputs = tgsi_fetch_gs_input; 896 gs->prepare = tgsi_gs_prepare; 897 gs->run = tgsi_gs_run; 898 } 899 900 return gs; 901} 902 903void draw_bind_geometry_shader(struct draw_context *draw, 904 struct draw_geometry_shader *dgs) 905{ 906 draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); 907 908 if (dgs) { 909 draw->gs.geometry_shader = dgs; 910 draw->gs.num_gs_outputs = dgs->info.num_outputs; 911 draw->gs.position_output = dgs->position_output; 912 draw->gs.clipvertex_output = dgs->clipvertex_output; 913 draw_geometry_shader_prepare(dgs, draw); 914 } 915 else { 916 draw->gs.geometry_shader = NULL; 917 draw->gs.num_gs_outputs = 0; 918 } 919} 920 921void draw_delete_geometry_shader(struct draw_context *draw, 922 struct draw_geometry_shader *dgs) 923{ 924 int i; 925 if (!dgs) { 926 return; 927 } 928#ifdef DRAW_LLVM_AVAILABLE 929 if (draw->llvm) { 930 struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs); 931 struct draw_gs_llvm_variant_list_item *li; 932 933 li = first_elem(&shader->variants); 934 while(!at_end(&shader->variants, li)) { 935 struct draw_gs_llvm_variant_list_item *next = next_elem(li); 936 draw_gs_llvm_destroy_variant(li->base); 937 li = next; 938 } 939 940 assert(shader->variants_cached == 0); 941 942 if (dgs->llvm_prim_lengths) { 943 unsigned i; 944 for (i = 0; i < dgs->num_vertex_streams * dgs->max_out_prims; ++i) { 945 align_free(dgs->llvm_prim_lengths[i]); 946 } 947 FREE(dgs->llvm_prim_lengths); 948 } 949 align_free(dgs->llvm_emitted_primitives); 950 align_free(dgs->llvm_emitted_vertices); 951 align_free(dgs->llvm_prim_ids); 952 953 align_free(dgs->gs_input); 954 } 955#endif 956 957 if (draw->gs.tgsi.machine && draw->gs.tgsi.machine->Tokens == dgs->state.tokens) 958 draw->gs.tgsi.machine->Tokens = NULL; 959 960 for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) 961 FREE(dgs->stream[i].primitive_lengths); 962 963 if (dgs->state.ir.nir) 964 ralloc_free(dgs->state.ir.nir); 965 FREE((void*) dgs->state.tokens); 966 FREE(dgs); 967} 968 969 970#ifdef DRAW_LLVM_AVAILABLE 971void draw_gs_set_current_variant(struct draw_geometry_shader *shader, 972 struct draw_gs_llvm_variant *variant) 973{ 974 shader->current_variant = variant; 975} 976#endif 977 978/* 979 * Called at the very begin of the draw call with a new instance 980 * Used to reset state that should persist between primitive restart. 981 */ 982void 983draw_geometry_shader_new_instance(struct draw_geometry_shader *gs) 984{ 985 if (!gs) 986 return; 987 988 gs->in_prim_idx = 0; 989} 990