1848b8605Smrg/************************************************************************** 2848b8605Smrg * 3848b8605Smrg * Copyright 2009 VMware, Inc. 4848b8605Smrg * All Rights Reserved. 5848b8605Smrg * 6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7848b8605Smrg * copy of this software and associated documentation files (the 8848b8605Smrg * "Software"), to deal in the Software without restriction, including 9848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish, 10848b8605Smrg * distribute, sub license, and/or sell copies of the Software, and to 11848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to 12848b8605Smrg * the following conditions: 13848b8605Smrg * 14848b8605Smrg * The above copyright notice and this permission notice (including the 15848b8605Smrg * next paragraph) shall be included in all copies or substantial portions 16848b8605Smrg * of the Software. 17848b8605Smrg * 18848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21848b8605Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22848b8605Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23848b8605Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24848b8605Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25848b8605Smrg * 26848b8605Smrg **************************************************************************/ 27848b8605Smrg 28848b8605Smrg#include "draw_gs.h" 29848b8605Smrg 30848b8605Smrg#include "draw_private.h" 31848b8605Smrg#include "draw_context.h" 32848b8605Smrg#ifdef HAVE_LLVM 33848b8605Smrg#include "draw_llvm.h" 34848b8605Smrg#endif 35848b8605Smrg 36848b8605Smrg#include "tgsi/tgsi_parse.h" 37848b8605Smrg#include "tgsi/tgsi_exec.h" 38848b8605Smrg 39848b8605Smrg#include "pipe/p_shader_tokens.h" 40848b8605Smrg 41848b8605Smrg#include "util/u_math.h" 42848b8605Smrg#include "util/u_memory.h" 43848b8605Smrg#include "util/u_prim.h" 44848b8605Smrg 45848b8605Smrg/* fixme: move it from here */ 46848b8605Smrg#define MAX_PRIMITIVES 64 47848b8605Smrg 48b8e80941Smrgstatic inline int 49848b8605Smrgdraw_gs_get_input_index(int semantic, int index, 50848b8605Smrg const struct tgsi_shader_info *input_info) 51848b8605Smrg{ 52848b8605Smrg int i; 53848b8605Smrg const ubyte *input_semantic_names = input_info->output_semantic_name; 54848b8605Smrg const ubyte *input_semantic_indices = input_info->output_semantic_index; 55848b8605Smrg for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 56848b8605Smrg if (input_semantic_names[i] == semantic && 57848b8605Smrg input_semantic_indices[i] == index) 58848b8605Smrg return i; 59848b8605Smrg } 60848b8605Smrg return -1; 61848b8605Smrg} 62848b8605Smrg 63848b8605Smrg/** 64848b8605Smrg * We execute geometry shaders in the SOA mode, so ideally we want to 65848b8605Smrg * flush when the number of currently fetched primitives is equal to 66848b8605Smrg * the number of elements in the SOA vector. This ensures that the 67b8e80941Smrg * throughput is optimized for the given vector instruction set. 68848b8605Smrg */ 69b8e80941Smrgstatic inline boolean 70848b8605Smrgdraw_gs_should_flush(struct draw_geometry_shader *shader) 71848b8605Smrg{ 72848b8605Smrg return (shader->fetched_prim_count == shader->vector_length); 73848b8605Smrg} 74848b8605Smrg 75848b8605Smrg/*#define DEBUG_OUTPUTS 1*/ 76848b8605Smrgstatic void 77848b8605Smrgtgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, 78b8e80941Smrg unsigned stream, 79848b8605Smrg unsigned num_primitives, 80848b8605Smrg float (**p_output)[4]) 81848b8605Smrg{ 82848b8605Smrg struct tgsi_exec_machine *machine = shader->machine; 83848b8605Smrg unsigned prim_idx, j, slot; 84848b8605Smrg float (*output)[4]; 85848b8605Smrg 86848b8605Smrg output = *p_output; 87848b8605Smrg 88848b8605Smrg /* Unswizzle all output results. 89848b8605Smrg */ 90848b8605Smrg 91848b8605Smrg for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { 92b8e80941Smrg unsigned num_verts_per_prim = machine->Primitives[stream][prim_idx]; 93b8e80941Smrg unsigned prim_offset = machine->PrimitiveOffsets[stream][prim_idx]; 94b8e80941Smrg shader->stream[stream].primitive_lengths[prim_idx + shader->stream[stream].emitted_primitives] = 95b8e80941Smrg machine->Primitives[stream][prim_idx]; 96b8e80941Smrg shader->stream[stream].emitted_vertices += num_verts_per_prim; 97b8e80941Smrg 98b8e80941Smrg for (j = 0; j < num_verts_per_prim; j++) { 99b8e80941Smrg int idx = prim_offset + j * shader->info.num_outputs; 100848b8605Smrg#ifdef DEBUG_OUTPUTS 101b8e80941Smrg debug_printf("%d/%d) Output vert:\n", stream, idx / shader->info.num_outputs); 102848b8605Smrg#endif 103848b8605Smrg for (slot = 0; slot < shader->info.num_outputs; slot++) { 104848b8605Smrg output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0]; 105848b8605Smrg output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0]; 106848b8605Smrg output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0]; 107848b8605Smrg output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0]; 108848b8605Smrg#ifdef DEBUG_OUTPUTS 109848b8605Smrg debug_printf("\t%d: %f %f %f %f\n", slot, 110848b8605Smrg output[slot][0], 111848b8605Smrg output[slot][1], 112848b8605Smrg output[slot][2], 113848b8605Smrg output[slot][3]); 114848b8605Smrg#endif 115848b8605Smrg } 116848b8605Smrg output = (float (*)[4])((char *)output + shader->vertex_size); 117848b8605Smrg } 118848b8605Smrg } 119848b8605Smrg *p_output = output; 120b8e80941Smrg shader->stream[stream].emitted_primitives += num_primitives; 121848b8605Smrg} 122848b8605Smrg 123848b8605Smrg/*#define DEBUG_INPUTS 1*/ 124848b8605Smrgstatic void tgsi_fetch_gs_input(struct draw_geometry_shader *shader, 125848b8605Smrg unsigned *indices, 126848b8605Smrg unsigned num_vertices, 127848b8605Smrg unsigned prim_idx) 128848b8605Smrg{ 129848b8605Smrg struct tgsi_exec_machine *machine = shader->machine; 130848b8605Smrg unsigned slot, i; 131848b8605Smrg int vs_slot; 132848b8605Smrg unsigned input_vertex_stride = shader->input_vertex_stride; 133848b8605Smrg const float (*input_ptr)[4]; 134848b8605Smrg 135848b8605Smrg input_ptr = shader->input; 136848b8605Smrg 137848b8605Smrg for (i = 0; i < num_vertices; ++i) { 138848b8605Smrg const float (*input)[4]; 139848b8605Smrg#if DEBUG_INPUTS 140848b8605Smrg debug_printf("%d) vertex index = %d (prim idx = %d)\n", 141848b8605Smrg i, indices[i], prim_idx); 142848b8605Smrg#endif 143848b8605Smrg input = (const float (*)[4])( 144848b8605Smrg (const char *)input_ptr + (indices[i] * input_vertex_stride)); 145848b8605Smrg for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { 146848b8605Smrg unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; 147848b8605Smrg if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { 148848b8605Smrg machine->Inputs[idx].xyzw[0].u[prim_idx] = shader->in_prim_idx; 149848b8605Smrg machine->Inputs[idx].xyzw[1].u[prim_idx] = shader->in_prim_idx; 150848b8605Smrg machine->Inputs[idx].xyzw[2].u[prim_idx] = shader->in_prim_idx; 151848b8605Smrg machine->Inputs[idx].xyzw[3].u[prim_idx] = shader->in_prim_idx; 152848b8605Smrg } else { 153848b8605Smrg vs_slot = draw_gs_get_input_index( 154848b8605Smrg shader->info.input_semantic_name[slot], 155848b8605Smrg shader->info.input_semantic_index[slot], 156848b8605Smrg shader->input_info); 157848b8605Smrg if (vs_slot < 0) { 158848b8605Smrg debug_printf("VS/GS signature mismatch!\n"); 159848b8605Smrg machine->Inputs[idx].xyzw[0].f[prim_idx] = 0; 160848b8605Smrg machine->Inputs[idx].xyzw[1].f[prim_idx] = 0; 161848b8605Smrg machine->Inputs[idx].xyzw[2].f[prim_idx] = 0; 162848b8605Smrg machine->Inputs[idx].xyzw[3].f[prim_idx] = 0; 163848b8605Smrg } else { 164848b8605Smrg#if DEBUG_INPUTS 165848b8605Smrg debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", 166848b8605Smrg slot, vs_slot, idx); 167848b8605Smrg assert(!util_is_inf_or_nan(input[vs_slot][0])); 168848b8605Smrg assert(!util_is_inf_or_nan(input[vs_slot][1])); 169848b8605Smrg assert(!util_is_inf_or_nan(input[vs_slot][2])); 170848b8605Smrg assert(!util_is_inf_or_nan(input[vs_slot][3])); 171848b8605Smrg#endif 172848b8605Smrg machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0]; 173848b8605Smrg machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1]; 174848b8605Smrg machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2]; 175848b8605Smrg machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3]; 176848b8605Smrg#if DEBUG_INPUTS 177848b8605Smrg debug_printf("\t\t%f %f %f %f\n", 178848b8605Smrg machine->Inputs[idx].xyzw[0].f[prim_idx], 179848b8605Smrg machine->Inputs[idx].xyzw[1].f[prim_idx], 180848b8605Smrg machine->Inputs[idx].xyzw[2].f[prim_idx], 181848b8605Smrg machine->Inputs[idx].xyzw[3].f[prim_idx]); 182848b8605Smrg#endif 183848b8605Smrg ++vs_slot; 184848b8605Smrg } 185848b8605Smrg } 186848b8605Smrg } 187848b8605Smrg } 188848b8605Smrg} 189848b8605Smrg 190848b8605Smrgstatic void tgsi_gs_prepare(struct draw_geometry_shader *shader, 191848b8605Smrg const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 192848b8605Smrg const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) 193848b8605Smrg{ 194848b8605Smrg struct tgsi_exec_machine *machine = shader->machine; 195b8e80941Smrg int j; 196848b8605Smrg tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, 197848b8605Smrg constants, constants_size); 198b8e80941Smrg 199b8e80941Smrg if (shader->info.uses_invocationid) { 200b8e80941Smrg unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INVOCATIONID]; 201b8e80941Smrg for (j = 0; j < TGSI_QUAD_SIZE; j++) 202b8e80941Smrg machine->SystemValue[i].xyzw[0].i[j] = shader->invocation_id; 203b8e80941Smrg } 204848b8605Smrg} 205848b8605Smrg 206b8e80941Smrgstatic void tgsi_gs_run(struct draw_geometry_shader *shader, 207b8e80941Smrg unsigned input_primitives, 208b8e80941Smrg unsigned *out_prims) 209848b8605Smrg{ 210848b8605Smrg struct tgsi_exec_machine *machine = shader->machine; 211b8e80941Smrg int i; 212848b8605Smrg 213848b8605Smrg /* run interpreter */ 214b8e80941Smrg tgsi_exec_machine_run(machine, 0); 215b8e80941Smrg 216b8e80941Smrg for (i = 0; i < 4; i++) { 217b8e80941Smrg int prim_i; 218b8e80941Smrg int prim_c; 219b8e80941Smrg switch (i) { 220b8e80941Smrg case 0: 221b8e80941Smrg prim_i = TGSI_EXEC_TEMP_PRIMITIVE_I; 222b8e80941Smrg prim_c = TGSI_EXEC_TEMP_PRIMITIVE_C; 223b8e80941Smrg break; 224b8e80941Smrg case 1: 225b8e80941Smrg prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S1_I; 226b8e80941Smrg prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S1_C; 227b8e80941Smrg break; 228b8e80941Smrg case 2: 229b8e80941Smrg prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S2_I; 230b8e80941Smrg prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S2_C; 231b8e80941Smrg break; 232b8e80941Smrg case 3: 233b8e80941Smrg prim_i = TGSI_EXEC_TEMP_PRIMITIVE_S3_I; 234b8e80941Smrg prim_c = TGSI_EXEC_TEMP_PRIMITIVE_S3_C; 235b8e80941Smrg break; 236b8e80941Smrg }; 237b8e80941Smrg 238b8e80941Smrg out_prims[i] = machine->Temps[prim_i].xyzw[prim_c].u[0]; 239b8e80941Smrg } 240848b8605Smrg} 241848b8605Smrg 242848b8605Smrg#ifdef HAVE_LLVM 243848b8605Smrg 244848b8605Smrgstatic void 245848b8605Smrgllvm_fetch_gs_input(struct draw_geometry_shader *shader, 246848b8605Smrg unsigned *indices, 247848b8605Smrg unsigned num_vertices, 248848b8605Smrg unsigned prim_idx) 249848b8605Smrg{ 250848b8605Smrg unsigned slot, i; 251848b8605Smrg int vs_slot; 252848b8605Smrg unsigned input_vertex_stride = shader->input_vertex_stride; 253848b8605Smrg const float (*input_ptr)[4]; 254848b8605Smrg float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data; 255848b8605Smrg 256848b8605Smrg shader->llvm_prim_ids[shader->fetched_prim_count] = shader->in_prim_idx; 257848b8605Smrg 258848b8605Smrg input_ptr = shader->input; 259848b8605Smrg 260848b8605Smrg for (i = 0; i < num_vertices; ++i) { 261848b8605Smrg const float (*input)[4]; 262848b8605Smrg#if DEBUG_INPUTS 263848b8605Smrg debug_printf("%d) vertex index = %d (prim idx = %d)\n", 264848b8605Smrg i, indices[i], prim_idx); 265848b8605Smrg#endif 266848b8605Smrg input = (const float (*)[4])( 267848b8605Smrg (const char *)input_ptr + (indices[i] * input_vertex_stride)); 268848b8605Smrg for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { 269848b8605Smrg if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { 270848b8605Smrg /* skip. we handle system values through gallivm */ 271848b8605Smrg /* NOTE: If we hit this case here it's an ordinary input not a sv, 272848b8605Smrg * even though it probably should be a sv. 273848b8605Smrg * Not sure how to set it up as regular input however if that even, 274848b8605Smrg * would make sense so hack around this later in gallivm. 275848b8605Smrg */ 276848b8605Smrg } else { 277848b8605Smrg vs_slot = draw_gs_get_input_index( 278848b8605Smrg shader->info.input_semantic_name[slot], 279848b8605Smrg shader->info.input_semantic_index[slot], 280848b8605Smrg shader->input_info); 281848b8605Smrg if (vs_slot < 0) { 282848b8605Smrg debug_printf("VS/GS signature mismatch!\n"); 283848b8605Smrg (*input_data)[i][slot][0][prim_idx] = 0; 284848b8605Smrg (*input_data)[i][slot][1][prim_idx] = 0; 285848b8605Smrg (*input_data)[i][slot][2][prim_idx] = 0; 286848b8605Smrg (*input_data)[i][slot][3][prim_idx] = 0; 287848b8605Smrg } else { 288848b8605Smrg#if DEBUG_INPUTS 289848b8605Smrg debug_printf("\tSlot = %d, vs_slot = %d, i = %d:\n", 290848b8605Smrg slot, vs_slot, i); 291848b8605Smrg assert(!util_is_inf_or_nan(input[vs_slot][0])); 292848b8605Smrg assert(!util_is_inf_or_nan(input[vs_slot][1])); 293848b8605Smrg assert(!util_is_inf_or_nan(input[vs_slot][2])); 294848b8605Smrg assert(!util_is_inf_or_nan(input[vs_slot][3])); 295848b8605Smrg#endif 296848b8605Smrg (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0]; 297848b8605Smrg (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1]; 298848b8605Smrg (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2]; 299848b8605Smrg (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3]; 300848b8605Smrg#if DEBUG_INPUTS 301848b8605Smrg debug_printf("\t\t%f %f %f %f\n", 302848b8605Smrg (*input_data)[i][slot][0][prim_idx], 303848b8605Smrg (*input_data)[i][slot][1][prim_idx], 304848b8605Smrg (*input_data)[i][slot][2][prim_idx], 305848b8605Smrg (*input_data)[i][slot][3][prim_idx]); 306848b8605Smrg#endif 307848b8605Smrg ++vs_slot; 308848b8605Smrg } 309848b8605Smrg } 310848b8605Smrg } 311848b8605Smrg } 312848b8605Smrg} 313848b8605Smrg 314848b8605Smrgstatic void 315848b8605Smrgllvm_fetch_gs_outputs(struct draw_geometry_shader *shader, 316b8e80941Smrg unsigned stream, 317848b8605Smrg unsigned num_primitives, 318848b8605Smrg float (**p_output)[4]) 319848b8605Smrg{ 320848b8605Smrg int total_verts = 0; 321848b8605Smrg int vertex_count = 0; 322848b8605Smrg int total_prims = 0; 323848b8605Smrg int max_prims_per_invocation = 0; 324848b8605Smrg char *output_ptr = (char*)shader->gs_output; 325848b8605Smrg int i, j, prim_idx; 326848b8605Smrg unsigned next_prim_boundary = shader->primitive_boundary; 327848b8605Smrg 328848b8605Smrg for (i = 0; i < shader->vector_length; ++i) { 329848b8605Smrg int prims = shader->llvm_emitted_primitives[i]; 330848b8605Smrg total_prims += prims; 331848b8605Smrg max_prims_per_invocation = MAX2(max_prims_per_invocation, prims); 332848b8605Smrg } 333848b8605Smrg for (i = 0; i < shader->vector_length; ++i) { 334848b8605Smrg total_verts += shader->llvm_emitted_vertices[i]; 335848b8605Smrg } 336848b8605Smrg 337b8e80941Smrg output_ptr += shader->stream[0].emitted_vertices * shader->vertex_size; 338848b8605Smrg for (i = 0; i < shader->vector_length - 1; ++i) { 339848b8605Smrg int current_verts = shader->llvm_emitted_vertices[i]; 340848b8605Smrg int next_verts = shader->llvm_emitted_vertices[i + 1]; 341848b8605Smrg#if 0 342848b8605Smrg int j; 343848b8605Smrg for (j = 0; j < current_verts; ++j) { 344848b8605Smrg struct vertex_header *vh = (struct vertex_header *) 345848b8605Smrg (output_ptr + shader->vertex_size * (i * next_prim_boundary + j)); 346848b8605Smrg debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count, 347848b8605Smrg vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]); 348848b8605Smrg 349848b8605Smrg } 350848b8605Smrg#endif 351848b8605Smrg debug_assert(current_verts <= shader->max_output_vertices); 352848b8605Smrg debug_assert(next_verts <= shader->max_output_vertices); 353848b8605Smrg if (next_verts) { 354848b8605Smrg memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size, 355848b8605Smrg output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size, 356848b8605Smrg shader->vertex_size * next_verts); 357848b8605Smrg } 358848b8605Smrg vertex_count += current_verts; 359848b8605Smrg } 360848b8605Smrg 361848b8605Smrg#if 0 362848b8605Smrg { 363848b8605Smrg int i; 364848b8605Smrg for (i = 0; i < total_verts; ++i) { 365848b8605Smrg struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i); 366848b8605Smrg debug_printf("%d) Vertex:\n", i); 367848b8605Smrg for (j = 0; j < shader->info.num_outputs; ++j) { 368848b8605Smrg unsigned *udata = (unsigned*)vh->data[j]; 369848b8605Smrg debug_printf(" %d) [%f, %f, %f, %f] [%d, %d, %d, %d]\n", j, 370848b8605Smrg vh->data[j][0], vh->data[j][1], vh->data[j][2], vh->data[j][3], 371848b8605Smrg udata[0], udata[1], udata[2], udata[3]); 372848b8605Smrg } 373848b8605Smrg 374848b8605Smrg } 375848b8605Smrg } 376848b8605Smrg#endif 377848b8605Smrg 378848b8605Smrg prim_idx = 0; 379848b8605Smrg for (i = 0; i < shader->vector_length; ++i) { 380848b8605Smrg int num_prims = shader->llvm_emitted_primitives[i]; 381848b8605Smrg for (j = 0; j < num_prims; ++j) { 382848b8605Smrg int prim_length = 383848b8605Smrg shader->llvm_prim_lengths[j][i]; 384b8e80941Smrg shader->stream[0].primitive_lengths[shader->stream[0].emitted_primitives + prim_idx] = 385848b8605Smrg prim_length; 386848b8605Smrg ++prim_idx; 387848b8605Smrg } 388848b8605Smrg } 389848b8605Smrg 390b8e80941Smrg shader->stream[0].emitted_primitives += total_prims; 391b8e80941Smrg shader->stream[0].emitted_vertices += total_verts; 392848b8605Smrg} 393848b8605Smrg 394848b8605Smrgstatic void 395848b8605Smrgllvm_gs_prepare(struct draw_geometry_shader *shader, 396848b8605Smrg const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 397848b8605Smrg const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) 398848b8605Smrg{ 399848b8605Smrg} 400848b8605Smrg 401b8e80941Smrgstatic void 402848b8605Smrgllvm_gs_run(struct draw_geometry_shader *shader, 403b8e80941Smrg unsigned input_primitives, unsigned *out_prims) 404848b8605Smrg{ 405848b8605Smrg unsigned ret; 406848b8605Smrg char *input = (char*)shader->gs_output; 407848b8605Smrg 408b8e80941Smrg input += (shader->stream[0].emitted_vertices * shader->vertex_size); 409848b8605Smrg 410848b8605Smrg ret = shader->current_variant->jit_func( 411848b8605Smrg shader->jit_context, shader->gs_input->data, 412848b8605Smrg (struct vertex_header*)input, 413848b8605Smrg input_primitives, 414848b8605Smrg shader->draw->instance_id, 415b8e80941Smrg shader->llvm_prim_ids, 416b8e80941Smrg shader->invocation_id); 417848b8605Smrg 418b8e80941Smrg *out_prims = ret; 419848b8605Smrg} 420848b8605Smrg 421848b8605Smrg#endif 422848b8605Smrg 423848b8605Smrgstatic void gs_flush(struct draw_geometry_shader *shader) 424848b8605Smrg{ 425b8e80941Smrg unsigned out_prim_count[TGSI_MAX_VERTEX_STREAMS]; 426b8e80941Smrg unsigned i; 427848b8605Smrg unsigned input_primitives = shader->fetched_prim_count; 428848b8605Smrg 429848b8605Smrg if (shader->draw->collect_statistics) { 430848b8605Smrg shader->draw->statistics.gs_invocations += input_primitives; 431848b8605Smrg } 432848b8605Smrg 433848b8605Smrg debug_assert(input_primitives > 0 && 434848b8605Smrg input_primitives <= 4); 435848b8605Smrg 436b8e80941Smrg shader->run(shader, input_primitives, out_prim_count); 437b8e80941Smrg for (i = 0; i < shader->num_vertex_streams; i++) { 438b8e80941Smrg shader->fetch_outputs(shader, i, out_prim_count[i], 439b8e80941Smrg &shader->stream[i].tmp_output); 440b8e80941Smrg } 441848b8605Smrg 442848b8605Smrg#if 0 443b8e80941Smrg for (i = 0; i < shader->num_vertex_streams; i++) { 444b8e80941Smrg debug_printf("stream %d: PRIM emitted prims = %d (verts=%d), cur prim count = %d\n", 445b8e80941Smrg i, 446b8e80941Smrg shader->stream[i].emitted_primitives, shader->stream[i].emitted_vertices, 447b8e80941Smrg out_prim_count[i]); 448b8e80941Smrg } 449848b8605Smrg#endif 450848b8605Smrg 451848b8605Smrg shader->fetched_prim_count = 0; 452848b8605Smrg} 453848b8605Smrg 454848b8605Smrgstatic void gs_point(struct draw_geometry_shader *shader, 455848b8605Smrg int idx) 456848b8605Smrg{ 457848b8605Smrg unsigned indices[1]; 458848b8605Smrg 459848b8605Smrg indices[0] = idx; 460848b8605Smrg 461848b8605Smrg shader->fetch_inputs(shader, indices, 1, 462848b8605Smrg shader->fetched_prim_count); 463848b8605Smrg ++shader->in_prim_idx; 464848b8605Smrg ++shader->fetched_prim_count; 465848b8605Smrg 466848b8605Smrg if (draw_gs_should_flush(shader)) 467848b8605Smrg gs_flush(shader); 468848b8605Smrg} 469848b8605Smrg 470848b8605Smrgstatic void gs_line(struct draw_geometry_shader *shader, 471848b8605Smrg int i0, int i1) 472848b8605Smrg{ 473848b8605Smrg unsigned indices[2]; 474848b8605Smrg 475848b8605Smrg indices[0] = i0; 476848b8605Smrg indices[1] = i1; 477848b8605Smrg 478848b8605Smrg shader->fetch_inputs(shader, indices, 2, 479848b8605Smrg shader->fetched_prim_count); 480848b8605Smrg ++shader->in_prim_idx; 481848b8605Smrg ++shader->fetched_prim_count; 482848b8605Smrg 483848b8605Smrg if (draw_gs_should_flush(shader)) 484848b8605Smrg gs_flush(shader); 485848b8605Smrg} 486848b8605Smrg 487848b8605Smrgstatic void gs_line_adj(struct draw_geometry_shader *shader, 488848b8605Smrg int i0, int i1, int i2, int i3) 489848b8605Smrg{ 490848b8605Smrg unsigned indices[4]; 491848b8605Smrg 492848b8605Smrg indices[0] = i0; 493848b8605Smrg indices[1] = i1; 494848b8605Smrg indices[2] = i2; 495848b8605Smrg indices[3] = i3; 496848b8605Smrg 497848b8605Smrg shader->fetch_inputs(shader, indices, 4, 498848b8605Smrg shader->fetched_prim_count); 499848b8605Smrg ++shader->in_prim_idx; 500848b8605Smrg ++shader->fetched_prim_count; 501848b8605Smrg 502848b8605Smrg if (draw_gs_should_flush(shader)) 503848b8605Smrg gs_flush(shader); 504848b8605Smrg} 505848b8605Smrg 506848b8605Smrgstatic void gs_tri(struct draw_geometry_shader *shader, 507848b8605Smrg int i0, int i1, int i2) 508848b8605Smrg{ 509848b8605Smrg unsigned indices[3]; 510848b8605Smrg 511848b8605Smrg indices[0] = i0; 512848b8605Smrg indices[1] = i1; 513848b8605Smrg indices[2] = i2; 514848b8605Smrg 515848b8605Smrg shader->fetch_inputs(shader, indices, 3, 516848b8605Smrg shader->fetched_prim_count); 517848b8605Smrg ++shader->in_prim_idx; 518848b8605Smrg ++shader->fetched_prim_count; 519848b8605Smrg 520848b8605Smrg if (draw_gs_should_flush(shader)) 521848b8605Smrg gs_flush(shader); 522848b8605Smrg} 523848b8605Smrg 524848b8605Smrgstatic void gs_tri_adj(struct draw_geometry_shader *shader, 525848b8605Smrg int i0, int i1, int i2, 526848b8605Smrg int i3, int i4, int i5) 527848b8605Smrg{ 528848b8605Smrg unsigned indices[6]; 529848b8605Smrg 530848b8605Smrg indices[0] = i0; 531848b8605Smrg indices[1] = i1; 532848b8605Smrg indices[2] = i2; 533848b8605Smrg indices[3] = i3; 534848b8605Smrg indices[4] = i4; 535848b8605Smrg indices[5] = i5; 536848b8605Smrg 537848b8605Smrg shader->fetch_inputs(shader, indices, 6, 538848b8605Smrg shader->fetched_prim_count); 539848b8605Smrg ++shader->in_prim_idx; 540848b8605Smrg ++shader->fetched_prim_count; 541848b8605Smrg 542848b8605Smrg if (draw_gs_should_flush(shader)) 543848b8605Smrg gs_flush(shader); 544848b8605Smrg} 545848b8605Smrg 546848b8605Smrg#define FUNC gs_run 547848b8605Smrg#define GET_ELT(idx) (idx) 548848b8605Smrg#include "draw_gs_tmp.h" 549848b8605Smrg 550848b8605Smrg 551848b8605Smrg#define FUNC gs_run_elts 552848b8605Smrg#define LOCAL_VARS const ushort *elts = input_prims->elts; 553848b8605Smrg#define GET_ELT(idx) (elts[idx]) 554848b8605Smrg#include "draw_gs_tmp.h" 555848b8605Smrg 556848b8605Smrg 557848b8605Smrg/** 558848b8605Smrg * Execute geometry shader. 559848b8605Smrg */ 560848b8605Smrgint draw_geometry_shader_run(struct draw_geometry_shader *shader, 561848b8605Smrg const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 562848b8605Smrg const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], 563848b8605Smrg const struct draw_vertex_info *input_verts, 564848b8605Smrg const struct draw_prim_info *input_prim, 565848b8605Smrg const struct tgsi_shader_info *input_info, 566848b8605Smrg struct draw_vertex_info *output_verts, 567848b8605Smrg struct draw_prim_info *output_prims ) 568848b8605Smrg{ 569848b8605Smrg const float (*input)[4] = (const float (*)[4])input_verts->verts->data; 570848b8605Smrg unsigned input_stride = input_verts->vertex_size; 571848b8605Smrg unsigned num_outputs = draw_total_gs_outputs(shader->draw); 572848b8605Smrg unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float); 573848b8605Smrg unsigned num_input_verts = input_prim->linear ? 574848b8605Smrg input_verts->count : 575848b8605Smrg input_prim->count; 576848b8605Smrg unsigned num_in_primitives = 577848b8605Smrg align( 578848b8605Smrg MAX2(u_decomposed_prims_for_vertices(input_prim->prim, 579848b8605Smrg num_input_verts), 580848b8605Smrg u_decomposed_prims_for_vertices(shader->input_primitive, 581848b8605Smrg num_input_verts)), 582848b8605Smrg shader->vector_length); 583848b8605Smrg unsigned max_out_prims = 584848b8605Smrg u_decomposed_prims_for_vertices(shader->output_primitive, 585848b8605Smrg shader->max_output_vertices) 586848b8605Smrg * num_in_primitives; 587848b8605Smrg /* we allocate exactly one extra vertex per primitive to allow the GS to emit 588848b8605Smrg * overflown vertices into some area where they won't harm anyone */ 589848b8605Smrg unsigned total_verts_per_buffer = shader->primitive_boundary * 590848b8605Smrg num_in_primitives; 591b8e80941Smrg unsigned invocation; 592b8e80941Smrg int i; 593848b8605Smrg //Assume at least one primitive 594848b8605Smrg max_out_prims = MAX2(max_out_prims, 1); 595848b8605Smrg 596b8e80941Smrg for (i = 0; i < shader->num_vertex_streams; i++) { 597b8e80941Smrg /* write all the vertex data into all the streams */ 598b8e80941Smrg output_verts[i].vertex_size = vertex_size; 599b8e80941Smrg output_verts[i].stride = output_verts[i].vertex_size; 600b8e80941Smrg output_verts[i].verts = 601b8e80941Smrg (struct vertex_header *)MALLOC(output_verts[i].vertex_size * 602b8e80941Smrg total_verts_per_buffer * shader->num_invocations); 603b8e80941Smrg debug_assert(output_verts[i].verts); 604b8e80941Smrg } 605848b8605Smrg 606848b8605Smrg#if 0 607848b8605Smrg debug_printf("%s count = %d (in prims # = %d)\n", 608848b8605Smrg __FUNCTION__, num_input_verts, num_in_primitives); 609848b8605Smrg debug_printf("\tlinear = %d, prim_info->count = %d\n", 610848b8605Smrg input_prim->linear, input_prim->count); 611848b8605Smrg debug_printf("\tprim pipe = %s, shader in = %s, shader out = %s\n" 612848b8605Smrg u_prim_name(input_prim->prim), 613848b8605Smrg u_prim_name(shader->input_primitive), 614848b8605Smrg u_prim_name(shader->output_primitive)); 615848b8605Smrg debug_printf("\tmaxv = %d, maxp = %d, primitive_boundary = %d, " 616848b8605Smrg "vertex_size = %d, tverts = %d\n", 617848b8605Smrg shader->max_output_vertices, max_out_prims, 618848b8605Smrg shader->primitive_boundary, output_verts->vertex_size, 619848b8605Smrg total_verts_per_buffer); 620848b8605Smrg#endif 621848b8605Smrg 622b8e80941Smrg for (i = 0; i < shader->num_vertex_streams; i++) { 623b8e80941Smrg shader->stream[i].emitted_vertices = 0; 624b8e80941Smrg shader->stream[i].emitted_primitives = 0; 625b8e80941Smrg FREE(shader->stream[i].primitive_lengths); 626b8e80941Smrg shader->stream[i].primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * shader->num_invocations); 627b8e80941Smrg shader->stream[i].tmp_output = (float (*)[4])output_verts[i].verts->data; 628b8e80941Smrg } 629848b8605Smrg shader->vertex_size = vertex_size; 630848b8605Smrg shader->fetched_prim_count = 0; 631848b8605Smrg shader->input_vertex_stride = input_stride; 632848b8605Smrg shader->input = input; 633848b8605Smrg shader->input_info = input_info; 634848b8605Smrg 635848b8605Smrg#ifdef HAVE_LLVM 636848b8605Smrg if (shader->draw->llvm) { 637b8e80941Smrg shader->gs_output = output_verts[0].verts; 638848b8605Smrg if (max_out_prims > shader->max_out_prims) { 639848b8605Smrg unsigned i; 640848b8605Smrg if (shader->llvm_prim_lengths) { 641848b8605Smrg for (i = 0; i < shader->max_out_prims; ++i) { 642848b8605Smrg align_free(shader->llvm_prim_lengths[i]); 643848b8605Smrg } 644848b8605Smrg FREE(shader->llvm_prim_lengths); 645848b8605Smrg } 646848b8605Smrg 647848b8605Smrg shader->llvm_prim_lengths = MALLOC(max_out_prims * sizeof(unsigned*)); 648848b8605Smrg for (i = 0; i < max_out_prims; ++i) { 649848b8605Smrg int vector_size = shader->vector_length * sizeof(unsigned); 650848b8605Smrg shader->llvm_prim_lengths[i] = 651848b8605Smrg align_malloc(vector_size, vector_size); 652848b8605Smrg } 653848b8605Smrg 654848b8605Smrg shader->max_out_prims = max_out_prims; 655848b8605Smrg } 656848b8605Smrg shader->jit_context->prim_lengths = shader->llvm_prim_lengths; 657848b8605Smrg shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices; 658848b8605Smrg shader->jit_context->emitted_prims = shader->llvm_emitted_primitives; 659848b8605Smrg } 660848b8605Smrg#endif 661848b8605Smrg 662b8e80941Smrg for (invocation = 0; invocation < shader->num_invocations; invocation++) { 663b8e80941Smrg shader->invocation_id = invocation; 664848b8605Smrg 665b8e80941Smrg shader->prepare(shader, constants, constants_size); 666848b8605Smrg 667b8e80941Smrg if (input_prim->linear) 668b8e80941Smrg gs_run(shader, input_prim, input_verts, 669b8e80941Smrg output_prims, output_verts); 670b8e80941Smrg else 671b8e80941Smrg gs_run_elts(shader, input_prim, input_verts, 672b8e80941Smrg output_prims, output_verts); 673848b8605Smrg 674b8e80941Smrg /* Flush the remaining primitives. Will happen if 675b8e80941Smrg * num_input_primitives % 4 != 0 676b8e80941Smrg */ 677b8e80941Smrg if (shader->fetched_prim_count > 0) { 678b8e80941Smrg gs_flush(shader); 679b8e80941Smrg } 680b8e80941Smrg debug_assert(shader->fetched_prim_count == 0); 681b8e80941Smrg } 682848b8605Smrg 683848b8605Smrg /* Update prim_info: 684848b8605Smrg */ 685b8e80941Smrg for (i = 0; i < shader->num_vertex_streams; i++) { 686b8e80941Smrg output_prims[i].linear = TRUE; 687b8e80941Smrg output_prims[i].elts = NULL; 688b8e80941Smrg output_prims[i].start = 0; 689b8e80941Smrg output_prims[i].count = shader->stream[i].emitted_vertices; 690b8e80941Smrg output_prims[i].prim = shader->output_primitive; 691b8e80941Smrg output_prims[i].flags = 0x0; 692b8e80941Smrg output_prims[i].primitive_lengths = shader->stream[i].primitive_lengths; 693b8e80941Smrg output_prims[i].primitive_count = shader->stream[i].emitted_primitives; 694b8e80941Smrg output_verts[i].count = shader->stream[i].emitted_vertices; 695b8e80941Smrg 696b8e80941Smrg if (shader->draw->collect_statistics) { 697b8e80941Smrg unsigned j; 698b8e80941Smrg for (j = 0; j < shader->stream[i].emitted_primitives; ++j) { 699b8e80941Smrg shader->draw->statistics.gs_primitives += 700b8e80941Smrg u_decomposed_prims_for_vertices(shader->output_primitive, 701b8e80941Smrg shader->stream[i].primitive_lengths[j]); 702b8e80941Smrg } 703848b8605Smrg } 704848b8605Smrg } 705848b8605Smrg 706848b8605Smrg#if 0 707b8e80941Smrg debug_printf("GS finished\n"); 708b8e80941Smrg for (i = 0; i < 4; i++) 709b8e80941Smrg debug_printf("stream %d: prims = %d verts = %d\n", i, output_prims[i].primitive_count, output_verts[i].count); 710848b8605Smrg#endif 711848b8605Smrg 712b8e80941Smrg return 0; 713848b8605Smrg} 714848b8605Smrg 715848b8605Smrgvoid draw_geometry_shader_prepare(struct draw_geometry_shader *shader, 716848b8605Smrg struct draw_context *draw) 717848b8605Smrg{ 718848b8605Smrg boolean use_llvm = draw->llvm != NULL; 719848b8605Smrg if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) { 720848b8605Smrg tgsi_exec_machine_bind_shader(shader->machine, 721848b8605Smrg shader->state.tokens, 722b8e80941Smrg draw->gs.tgsi.sampler, 723b8e80941Smrg draw->gs.tgsi.image, 724b8e80941Smrg draw->gs.tgsi.buffer); 725848b8605Smrg } 726848b8605Smrg} 727848b8605Smrg 728848b8605Smrg 729848b8605Smrgboolean 730848b8605Smrgdraw_gs_init( struct draw_context *draw ) 731848b8605Smrg{ 732848b8605Smrg if (!draw->llvm) { 733b8e80941Smrg draw->gs.tgsi.machine = tgsi_exec_machine_create(PIPE_SHADER_GEOMETRY); 734b8e80941Smrg 735b8e80941Smrg for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { 736b8e80941Smrg draw->gs.tgsi.machine->Primitives[i] = align_malloc( 737b8e80941Smrg MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); 738b8e80941Smrg draw->gs.tgsi.machine->PrimitiveOffsets[i] = align_malloc( 739b8e80941Smrg MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); 740b8e80941Smrg if (!draw->gs.tgsi.machine->Primitives[i] || !draw->gs.tgsi.machine->PrimitiveOffsets[i]) 741b8e80941Smrg return FALSE; 742b8e80941Smrg memset(draw->gs.tgsi.machine->Primitives[i], 0, 743b8e80941Smrg MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); 744b8e80941Smrg memset(draw->gs.tgsi.machine->PrimitiveOffsets[i], 0, 745b8e80941Smrg MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); 746b8e80941Smrg } 747848b8605Smrg } 748848b8605Smrg 749848b8605Smrg return TRUE; 750848b8605Smrg} 751848b8605Smrg 752848b8605Smrgvoid draw_gs_destroy( struct draw_context *draw ) 753848b8605Smrg{ 754b8e80941Smrg int i; 755848b8605Smrg if (draw->gs.tgsi.machine) { 756b8e80941Smrg for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { 757b8e80941Smrg align_free(draw->gs.tgsi.machine->Primitives[i]); 758b8e80941Smrg align_free(draw->gs.tgsi.machine->PrimitiveOffsets[i]); 759b8e80941Smrg } 760848b8605Smrg tgsi_exec_machine_destroy(draw->gs.tgsi.machine); 761848b8605Smrg } 762848b8605Smrg} 763848b8605Smrg 764848b8605Smrgstruct draw_geometry_shader * 765848b8605Smrgdraw_create_geometry_shader(struct draw_context *draw, 766848b8605Smrg const struct pipe_shader_state *state) 767848b8605Smrg{ 768848b8605Smrg#ifdef HAVE_LLVM 769848b8605Smrg boolean use_llvm = draw->llvm != NULL; 770b8e80941Smrg struct llvm_geometry_shader *llvm_gs = NULL; 771848b8605Smrg#endif 772848b8605Smrg struct draw_geometry_shader *gs; 773848b8605Smrg unsigned i; 774848b8605Smrg 775848b8605Smrg#ifdef HAVE_LLVM 776848b8605Smrg if (use_llvm) { 777848b8605Smrg llvm_gs = CALLOC_STRUCT(llvm_geometry_shader); 778848b8605Smrg 779b8e80941Smrg if (!llvm_gs) 780848b8605Smrg return NULL; 781848b8605Smrg 782848b8605Smrg gs = &llvm_gs->base; 783848b8605Smrg 784848b8605Smrg make_empty_list(&llvm_gs->variants); 785848b8605Smrg } else 786848b8605Smrg#endif 787848b8605Smrg { 788848b8605Smrg gs = CALLOC_STRUCT(draw_geometry_shader); 789848b8605Smrg } 790848b8605Smrg 791848b8605Smrg if (!gs) 792848b8605Smrg return NULL; 793848b8605Smrg 794848b8605Smrg gs->draw = draw; 795848b8605Smrg gs->state = *state; 796848b8605Smrg gs->state.tokens = tgsi_dup_tokens(state->tokens); 797848b8605Smrg if (!gs->state.tokens) { 798848b8605Smrg FREE(gs); 799848b8605Smrg return NULL; 800848b8605Smrg } 801848b8605Smrg 802848b8605Smrg tgsi_scan_shader(state->tokens, &gs->info); 803848b8605Smrg 804848b8605Smrg /* setup the defaults */ 805848b8605Smrg gs->max_out_prims = 0; 806848b8605Smrg 807848b8605Smrg#ifdef HAVE_LLVM 808848b8605Smrg if (use_llvm) { 809848b8605Smrg /* TODO: change the input array to handle the following 810848b8605Smrg vector length, instead of the currently hardcoded 811848b8605Smrg TGSI_NUM_CHANNELS 812848b8605Smrg gs->vector_length = lp_native_vector_width / 32;*/ 813848b8605Smrg gs->vector_length = TGSI_NUM_CHANNELS; 814848b8605Smrg } else 815848b8605Smrg#endif 816848b8605Smrg { 817848b8605Smrg gs->vector_length = 1; 818848b8605Smrg } 819848b8605Smrg 820b8e80941Smrg gs->input_primitive = 821b8e80941Smrg gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]; 822b8e80941Smrg gs->output_primitive = 823b8e80941Smrg gs->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; 824b8e80941Smrg gs->max_output_vertices = 825b8e80941Smrg gs->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; 826b8e80941Smrg gs->num_invocations = 827b8e80941Smrg gs->info.properties[TGSI_PROPERTY_GS_INVOCATIONS]; 828b8e80941Smrg if (!gs->max_output_vertices) 829b8e80941Smrg gs->max_output_vertices = 32; 830b8e80941Smrg 831848b8605Smrg /* Primitive boundary is bigger than max_output_vertices by one, because 832848b8605Smrg * the specification says that the geometry shader should exit if the 833848b8605Smrg * number of emitted vertices is bigger or equal to max_output_vertices and 834848b8605Smrg * we can't do that because we're running in the SoA mode, which means that 835848b8605Smrg * our storing routines will keep getting called on channels that have 836848b8605Smrg * overflown. 837848b8605Smrg * So we need some scratch area where we can keep writing the overflown 838848b8605Smrg * vertices without overwriting anything important or crashing. 839848b8605Smrg */ 840848b8605Smrg gs->primitive_boundary = gs->max_output_vertices + 1; 841848b8605Smrg 842848b8605Smrg gs->position_output = -1; 843848b8605Smrg for (i = 0; i < gs->info.num_outputs; i++) { 844848b8605Smrg if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && 845848b8605Smrg gs->info.output_semantic_index[i] == 0) 846848b8605Smrg gs->position_output = i; 847848b8605Smrg if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX) 848848b8605Smrg gs->viewport_index_output = i; 849848b8605Smrg if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) { 850848b8605Smrg debug_assert(gs->info.output_semantic_index[i] < 851848b8605Smrg PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT); 852b8e80941Smrg gs->ccdistance_output[gs->info.output_semantic_index[i]] = i; 853848b8605Smrg } 854848b8605Smrg } 855848b8605Smrg 856848b8605Smrg gs->machine = draw->gs.tgsi.machine; 857848b8605Smrg 858b8e80941Smrg gs->num_vertex_streams = 1; 859b8e80941Smrg for (i = 0; i < gs->state.stream_output.num_outputs; i++) { 860b8e80941Smrg if (gs->state.stream_output.output[i].stream >= gs->num_vertex_streams) 861b8e80941Smrg gs->num_vertex_streams = gs->state.stream_output.output[i].stream + 1; 862b8e80941Smrg } 863b8e80941Smrg 864848b8605Smrg#ifdef HAVE_LLVM 865848b8605Smrg if (use_llvm) { 866848b8605Smrg int vector_size = gs->vector_length * sizeof(float); 867848b8605Smrg gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16); 868848b8605Smrg memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs)); 869848b8605Smrg gs->llvm_prim_lengths = 0; 870848b8605Smrg 871848b8605Smrg gs->llvm_emitted_primitives = align_malloc(vector_size, vector_size); 872848b8605Smrg gs->llvm_emitted_vertices = align_malloc(vector_size, vector_size); 873848b8605Smrg gs->llvm_prim_ids = align_malloc(vector_size, vector_size); 874848b8605Smrg 875848b8605Smrg gs->fetch_outputs = llvm_fetch_gs_outputs; 876848b8605Smrg gs->fetch_inputs = llvm_fetch_gs_input; 877848b8605Smrg gs->prepare = llvm_gs_prepare; 878848b8605Smrg gs->run = llvm_gs_run; 879848b8605Smrg 880848b8605Smrg gs->jit_context = &draw->llvm->gs_jit_context; 881848b8605Smrg 882848b8605Smrg 883848b8605Smrg llvm_gs->variant_key_size = 884848b8605Smrg draw_gs_llvm_variant_key_size( 885848b8605Smrg MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1, 886848b8605Smrg gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1)); 887848b8605Smrg } else 888848b8605Smrg#endif 889848b8605Smrg { 890848b8605Smrg gs->fetch_outputs = tgsi_fetch_gs_outputs; 891848b8605Smrg gs->fetch_inputs = tgsi_fetch_gs_input; 892848b8605Smrg gs->prepare = tgsi_gs_prepare; 893848b8605Smrg gs->run = tgsi_gs_run; 894848b8605Smrg } 895848b8605Smrg 896848b8605Smrg return gs; 897848b8605Smrg} 898848b8605Smrg 899848b8605Smrgvoid draw_bind_geometry_shader(struct draw_context *draw, 900848b8605Smrg struct draw_geometry_shader *dgs) 901848b8605Smrg{ 902848b8605Smrg draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); 903848b8605Smrg 904848b8605Smrg if (dgs) { 905848b8605Smrg draw->gs.geometry_shader = dgs; 906848b8605Smrg draw->gs.num_gs_outputs = dgs->info.num_outputs; 907848b8605Smrg draw->gs.position_output = dgs->position_output; 908848b8605Smrg draw_geometry_shader_prepare(dgs, draw); 909848b8605Smrg } 910848b8605Smrg else { 911848b8605Smrg draw->gs.geometry_shader = NULL; 912848b8605Smrg draw->gs.num_gs_outputs = 0; 913848b8605Smrg } 914848b8605Smrg} 915848b8605Smrg 916848b8605Smrgvoid draw_delete_geometry_shader(struct draw_context *draw, 917848b8605Smrg struct draw_geometry_shader *dgs) 918848b8605Smrg{ 919b8e80941Smrg int i; 920848b8605Smrg if (!dgs) { 921848b8605Smrg return; 922848b8605Smrg } 923848b8605Smrg#ifdef HAVE_LLVM 924848b8605Smrg if (draw->llvm) { 925848b8605Smrg struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs); 926848b8605Smrg struct draw_gs_llvm_variant_list_item *li; 927848b8605Smrg 928848b8605Smrg li = first_elem(&shader->variants); 929848b8605Smrg while(!at_end(&shader->variants, li)) { 930848b8605Smrg struct draw_gs_llvm_variant_list_item *next = next_elem(li); 931848b8605Smrg draw_gs_llvm_destroy_variant(li->base); 932848b8605Smrg li = next; 933848b8605Smrg } 934848b8605Smrg 935848b8605Smrg assert(shader->variants_cached == 0); 936848b8605Smrg 937848b8605Smrg if (dgs->llvm_prim_lengths) { 938848b8605Smrg unsigned i; 939848b8605Smrg for (i = 0; i < dgs->max_out_prims; ++i) { 940848b8605Smrg align_free(dgs->llvm_prim_lengths[i]); 941848b8605Smrg } 942848b8605Smrg FREE(dgs->llvm_prim_lengths); 943848b8605Smrg } 944848b8605Smrg align_free(dgs->llvm_emitted_primitives); 945848b8605Smrg align_free(dgs->llvm_emitted_vertices); 946848b8605Smrg align_free(dgs->llvm_prim_ids); 947848b8605Smrg 948848b8605Smrg align_free(dgs->gs_input); 949848b8605Smrg } 950848b8605Smrg#endif 951848b8605Smrg 952b8e80941Smrg for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) 953b8e80941Smrg FREE(dgs->stream[i].primitive_lengths); 954848b8605Smrg FREE((void*) dgs->state.tokens); 955848b8605Smrg FREE(dgs); 956848b8605Smrg} 957848b8605Smrg 958848b8605Smrg 959848b8605Smrg#ifdef HAVE_LLVM 960848b8605Smrgvoid draw_gs_set_current_variant(struct draw_geometry_shader *shader, 961848b8605Smrg struct draw_gs_llvm_variant *variant) 962848b8605Smrg{ 963848b8605Smrg shader->current_variant = variant; 964848b8605Smrg} 965848b8605Smrg#endif 966848b8605Smrg 967848b8605Smrg/* 968848b8605Smrg * Called at the very begin of the draw call with a new instance 969848b8605Smrg * Used to reset state that should persist between primitive restart. 970848b8605Smrg */ 971848b8605Smrgvoid 972848b8605Smrgdraw_geometry_shader_new_instance(struct draw_geometry_shader *gs) 973848b8605Smrg{ 974848b8605Smrg if (!gs) 975848b8605Smrg return; 976848b8605Smrg 977848b8605Smrg gs->in_prim_idx = 0; 978848b8605Smrg} 979