13464ebd5Sriastradh/**************************************************************************
23464ebd5Sriastradh *
33464ebd5Sriastradh * Copyright 2010 VMware, Inc.
43464ebd5Sriastradh * All Rights Reserved.
53464ebd5Sriastradh *
63464ebd5Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a
73464ebd5Sriastradh * copy of this software and associated documentation files (the
83464ebd5Sriastradh * "Software"), to deal in the Software without restriction, including
93464ebd5Sriastradh * without limitation the rights to use, copy, modify, merge, publish,
103464ebd5Sriastradh * distribute, sub license, and/or sell copies of the Software, and to
113464ebd5Sriastradh * permit persons to whom the Software is furnished to do so, subject to
123464ebd5Sriastradh * the following conditions:
133464ebd5Sriastradh *
143464ebd5Sriastradh * The above copyright notice and this permission notice (including the
153464ebd5Sriastradh * next paragraph) shall be included in all copies or substantial portions
163464ebd5Sriastradh * of the Software.
173464ebd5Sriastradh *
183464ebd5Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
193464ebd5Sriastradh * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
203464ebd5Sriastradh * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
213464ebd5Sriastradh * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
223464ebd5Sriastradh * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
233464ebd5Sriastradh * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
243464ebd5Sriastradh * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
253464ebd5Sriastradh *
263464ebd5Sriastradh **************************************************************************/
273464ebd5Sriastradh
283464ebd5Sriastradh#include "draw_llvm.h"
293464ebd5Sriastradh
303464ebd5Sriastradh#include "draw_context.h"
313464ebd5Sriastradh#include "draw_vs.h"
32af69d88dSmrg#include "draw_gs.h"
333464ebd5Sriastradh
343464ebd5Sriastradh#include "gallivm/lp_bld_arit.h"
35af69d88dSmrg#include "gallivm/lp_bld_arit_overflow.h"
3601e04c3fSmrg#include "gallivm/lp_bld_bitarit.h"
3701e04c3fSmrg#include "gallivm/lp_bld_gather.h"
383464ebd5Sriastradh#include "gallivm/lp_bld_logic.h"
393464ebd5Sriastradh#include "gallivm/lp_bld_const.h"
407ec681f3Smrg#include "gallivm/lp_bld_coro.h"
413464ebd5Sriastradh#include "gallivm/lp_bld_swizzle.h"
423464ebd5Sriastradh#include "gallivm/lp_bld_struct.h"
433464ebd5Sriastradh#include "gallivm/lp_bld_type.h"
443464ebd5Sriastradh#include "gallivm/lp_bld_flow.h"
453464ebd5Sriastradh#include "gallivm/lp_bld_debug.h"
463464ebd5Sriastradh#include "gallivm/lp_bld_tgsi.h"
477ec681f3Smrg#include "gallivm/lp_bld_nir.h"
483464ebd5Sriastradh#include "gallivm/lp_bld_printf.h"
493464ebd5Sriastradh#include "gallivm/lp_bld_intr.h"
503464ebd5Sriastradh#include "gallivm/lp_bld_init.h"
513464ebd5Sriastradh#include "gallivm/lp_bld_type.h"
52af69d88dSmrg#include "gallivm/lp_bld_pack.h"
53af69d88dSmrg#include "gallivm/lp_bld_format.h"
547ec681f3Smrg#include "gallivm/lp_bld_misc.h"
553464ebd5Sriastradh#include "tgsi/tgsi_exec.h"
563464ebd5Sriastradh#include "tgsi/tgsi_dump.h"
573464ebd5Sriastradh
583464ebd5Sriastradh#include "util/u_math.h"
593464ebd5Sriastradh#include "util/u_pointer.h"
603464ebd5Sriastradh#include "util/u_string.h"
6101e04c3fSmrg#include "util/simple_list.h"
627ec681f3Smrg#include "nir_serialize.h"
637ec681f3Smrg#include "util/mesa-sha1.h"
643464ebd5Sriastradh#define DEBUG_STORE 0
653464ebd5Sriastradh
663464ebd5Sriastradh
673464ebd5Sriastradhstatic void
6801e04c3fSmrgdraw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
693464ebd5Sriastradh
70af69d88dSmrg
71af69d88dSmrgstruct draw_gs_llvm_iface {
727ec681f3Smrg   struct lp_build_gs_iface base;
73af69d88dSmrg
74af69d88dSmrg   struct draw_gs_llvm_variant *variant;
75af69d88dSmrg   LLVMValueRef input;
76af69d88dSmrg};
77af69d88dSmrg
7801e04c3fSmrgstatic inline const struct draw_gs_llvm_iface *
797ec681f3Smrgdraw_gs_llvm_iface(const struct lp_build_gs_iface *iface)
80af69d88dSmrg{
81af69d88dSmrg   return (const struct draw_gs_llvm_iface *)iface;
823464ebd5Sriastradh}
833464ebd5Sriastradh
847ec681f3Smrgstruct draw_tcs_llvm_iface {
857ec681f3Smrg   struct lp_build_tcs_iface base;
867ec681f3Smrg
877ec681f3Smrg   struct draw_tcs_llvm_variant *variant;
887ec681f3Smrg   LLVMValueRef input;
897ec681f3Smrg   LLVMValueRef output;
907ec681f3Smrg};
917ec681f3Smrg
927ec681f3Smrgstatic inline const struct draw_tcs_llvm_iface *
937ec681f3Smrgdraw_tcs_llvm_iface(const struct lp_build_tcs_iface *iface)
947ec681f3Smrg{
957ec681f3Smrg   return (const struct draw_tcs_llvm_iface *)iface;
967ec681f3Smrg}
977ec681f3Smrg
987ec681f3Smrgstruct draw_tes_llvm_iface {
997ec681f3Smrg   struct lp_build_tes_iface base;
1007ec681f3Smrg
1017ec681f3Smrg   struct draw_tes_llvm_variant *variant;
1027ec681f3Smrg   LLVMValueRef input;
1037ec681f3Smrg};
1047ec681f3Smrg
1057ec681f3Smrgstatic inline const struct draw_tes_llvm_iface *
1067ec681f3Smrgdraw_tes_llvm_iface(const struct lp_build_tes_iface *iface)
1077ec681f3Smrg{
1087ec681f3Smrg   return (const struct draw_tes_llvm_iface *)iface;
1097ec681f3Smrg}
1107ec681f3Smrg
111af69d88dSmrg/**
112af69d88dSmrg * Create LLVM type for draw_vertex_buffer.
113af69d88dSmrg */
114af69d88dSmrgstatic LLVMTypeRef
115af69d88dSmrgcreate_jit_dvbuffer_type(struct gallivm_state *gallivm,
116af69d88dSmrg                         const char *struct_name)
117af69d88dSmrg{
118af69d88dSmrg   LLVMTargetDataRef target = gallivm->target;
119af69d88dSmrg   LLVMTypeRef dvbuffer_type;
120af69d88dSmrg   LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
121af69d88dSmrg   LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
1223464ebd5Sriastradh
123af69d88dSmrg   elem_types[DRAW_JIT_DVBUFFER_MAP] =
124af69d88dSmrg      LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
125af69d88dSmrg   elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
1263464ebd5Sriastradh
127af69d88dSmrg   dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
12801e04c3fSmrg                                           ARRAY_SIZE(elem_types), 0);
1293464ebd5Sriastradh
13001e04c3fSmrg   (void) target; /* silence unused var warning for non-debug build */
131af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
132af69d88dSmrg                          target, dvbuffer_type,
133af69d88dSmrg                          DRAW_JIT_DVBUFFER_MAP);
134af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
135af69d88dSmrg                          target, dvbuffer_type,
136af69d88dSmrg                          DRAW_JIT_DVBUFFER_SIZE);
137af69d88dSmrg
138af69d88dSmrg   return dvbuffer_type;
139af69d88dSmrg}
1403464ebd5Sriastradh
1413464ebd5Sriastradh/**
1423464ebd5Sriastradh * Create LLVM type for struct draw_jit_texture
1433464ebd5Sriastradh */
1443464ebd5Sriastradhstatic LLVMTypeRef
145af69d88dSmrgcreate_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
1463464ebd5Sriastradh{
1473464ebd5Sriastradh   LLVMTargetDataRef target = gallivm->target;
1483464ebd5Sriastradh   LLVMTypeRef texture_type;
1493464ebd5Sriastradh   LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
1503464ebd5Sriastradh   LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
1513464ebd5Sriastradh
1523464ebd5Sriastradh   elem_types[DRAW_JIT_TEXTURE_WIDTH]  =
1533464ebd5Sriastradh   elem_types[DRAW_JIT_TEXTURE_HEIGHT] =
1543464ebd5Sriastradh   elem_types[DRAW_JIT_TEXTURE_DEPTH] =
1557ec681f3Smrg   elem_types[DRAW_JIT_TEXTURE_NUM_SAMPLES] =
1567ec681f3Smrg   elem_types[DRAW_JIT_TEXTURE_SAMPLE_STRIDE] =
1573464ebd5Sriastradh   elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] =
1583464ebd5Sriastradh   elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;
159af69d88dSmrg   elem_types[DRAW_JIT_TEXTURE_BASE] =
160af69d88dSmrg      LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
1613464ebd5Sriastradh   elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
1623464ebd5Sriastradh   elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
163af69d88dSmrg   elem_types[DRAW_JIT_TEXTURE_MIP_OFFSETS] =
1643464ebd5Sriastradh      LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);
1653464ebd5Sriastradh
1663464ebd5Sriastradh   texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
16701e04c3fSmrg                                          ARRAY_SIZE(elem_types), 0);
1683464ebd5Sriastradh
16901e04c3fSmrg   (void) target; /* silence unused var warning for non-debug build */
1703464ebd5Sriastradh   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
1713464ebd5Sriastradh                          target, texture_type,
1723464ebd5Sriastradh                          DRAW_JIT_TEXTURE_WIDTH);
1733464ebd5Sriastradh   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
1743464ebd5Sriastradh                          target, texture_type,
1753464ebd5Sriastradh                          DRAW_JIT_TEXTURE_HEIGHT);
1763464ebd5Sriastradh   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
1773464ebd5Sriastradh                          target, texture_type,
1783464ebd5Sriastradh                          DRAW_JIT_TEXTURE_DEPTH);
179af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, base,
180af69d88dSmrg                          target, texture_type,
181af69d88dSmrg                          DRAW_JIT_TEXTURE_BASE);
1823464ebd5Sriastradh   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
1833464ebd5Sriastradh                          target, texture_type,
1843464ebd5Sriastradh                          DRAW_JIT_TEXTURE_ROW_STRIDE);
1853464ebd5Sriastradh   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
1863464ebd5Sriastradh                          target, texture_type,
1873464ebd5Sriastradh                          DRAW_JIT_TEXTURE_IMG_STRIDE);
1887ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level,
1897ec681f3Smrg                          target, texture_type,
1907ec681f3Smrg                          DRAW_JIT_TEXTURE_FIRST_LEVEL);
1917ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
1927ec681f3Smrg                          target, texture_type,
1937ec681f3Smrg                          DRAW_JIT_TEXTURE_LAST_LEVEL);
194af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, mip_offsets,
1953464ebd5Sriastradh                          target, texture_type,
196af69d88dSmrg                          DRAW_JIT_TEXTURE_MIP_OFFSETS);
1977ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, num_samples,
1987ec681f3Smrg                          target, texture_type,
1997ec681f3Smrg                          DRAW_JIT_TEXTURE_NUM_SAMPLES);
2007ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, sample_stride,
2017ec681f3Smrg                          target, texture_type,
2027ec681f3Smrg                          DRAW_JIT_TEXTURE_SAMPLE_STRIDE);
2033464ebd5Sriastradh
2043464ebd5Sriastradh   LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);
2053464ebd5Sriastradh
2063464ebd5Sriastradh   return texture_type;
2073464ebd5Sriastradh}
2083464ebd5Sriastradh
2093464ebd5Sriastradh
2103464ebd5Sriastradh/**
211af69d88dSmrg * Create LLVM type for struct draw_jit_sampler
212af69d88dSmrg */
213af69d88dSmrgstatic LLVMTypeRef
214af69d88dSmrgcreate_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)
215af69d88dSmrg{
216af69d88dSmrg   LLVMTargetDataRef target = gallivm->target;
217af69d88dSmrg   LLVMTypeRef sampler_type;
218af69d88dSmrg   LLVMTypeRef elem_types[DRAW_JIT_SAMPLER_NUM_FIELDS];
219af69d88dSmrg
220af69d88dSmrg   elem_types[DRAW_JIT_SAMPLER_MIN_LOD] =
221af69d88dSmrg   elem_types[DRAW_JIT_SAMPLER_MAX_LOD] =
2227ec681f3Smrg   elem_types[DRAW_JIT_SAMPLER_LOD_BIAS] =
2237ec681f3Smrg   elem_types[DRAW_JIT_SAMPLER_MAX_ANISO] = LLVMFloatTypeInContext(gallivm->context);
224af69d88dSmrg   elem_types[DRAW_JIT_SAMPLER_BORDER_COLOR] =
225af69d88dSmrg      LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
226af69d88dSmrg
227af69d88dSmrg   sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,
22801e04c3fSmrg                                          ARRAY_SIZE(elem_types), 0);
229af69d88dSmrg
23001e04c3fSmrg   (void) target; /* silence unused var warning for non-debug build */
231af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,
232af69d88dSmrg                          target, sampler_type,
233af69d88dSmrg                          DRAW_JIT_SAMPLER_MIN_LOD);
234af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_lod,
235af69d88dSmrg                          target, sampler_type,
236af69d88dSmrg                          DRAW_JIT_SAMPLER_MAX_LOD);
237af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, lod_bias,
238af69d88dSmrg                          target, sampler_type,
239af69d88dSmrg                          DRAW_JIT_SAMPLER_LOD_BIAS);
240af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, border_color,
241af69d88dSmrg                          target, sampler_type,
242af69d88dSmrg                          DRAW_JIT_SAMPLER_BORDER_COLOR);
2437ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_aniso,
2447ec681f3Smrg                          target, sampler_type,
2457ec681f3Smrg                          DRAW_JIT_SAMPLER_MAX_ANISO);
246af69d88dSmrg
247af69d88dSmrg   LP_CHECK_STRUCT_SIZE(struct draw_jit_sampler, target, sampler_type);
248af69d88dSmrg
249af69d88dSmrg   return sampler_type;
250af69d88dSmrg}
251af69d88dSmrg
2527ec681f3Smrg/**
2537ec681f3Smrg * Create LLVM type for struct draw_jit_texture
2547ec681f3Smrg */
2557ec681f3Smrgstatic LLVMTypeRef
2567ec681f3Smrgcreate_jit_image_type(struct gallivm_state *gallivm, const char *struct_name)
2577ec681f3Smrg{
2587ec681f3Smrg   LLVMTargetDataRef target = gallivm->target;
2597ec681f3Smrg   LLVMTypeRef image_type;
2607ec681f3Smrg   LLVMTypeRef elem_types[DRAW_JIT_IMAGE_NUM_FIELDS];
2617ec681f3Smrg   LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
2627ec681f3Smrg
2637ec681f3Smrg   elem_types[DRAW_JIT_IMAGE_WIDTH]  =
2647ec681f3Smrg   elem_types[DRAW_JIT_IMAGE_HEIGHT] =
2657ec681f3Smrg   elem_types[DRAW_JIT_IMAGE_DEPTH] =
2667ec681f3Smrg   elem_types[DRAW_JIT_IMAGE_ROW_STRIDE] =
2677ec681f3Smrg   elem_types[DRAW_JIT_IMAGE_IMG_STRIDE] =
2687ec681f3Smrg   elem_types[DRAW_JIT_IMAGE_NUM_SAMPLES] =
2697ec681f3Smrg   elem_types[DRAW_JIT_IMAGE_SAMPLE_STRIDE] = int32_type;
2707ec681f3Smrg   elem_types[DRAW_JIT_IMAGE_BASE] =
2717ec681f3Smrg      LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
2727ec681f3Smrg
2737ec681f3Smrg   image_type = LLVMStructTypeInContext(gallivm->context, elem_types,
2747ec681f3Smrg                                          ARRAY_SIZE(elem_types), 0);
2757ec681f3Smrg
2767ec681f3Smrg   (void) target; /* silence unused var warning for non-debug build */
2777ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, width,
2787ec681f3Smrg                          target, image_type,
2797ec681f3Smrg                          DRAW_JIT_IMAGE_WIDTH);
2807ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, height,
2817ec681f3Smrg                          target, image_type,
2827ec681f3Smrg                          DRAW_JIT_IMAGE_HEIGHT);
2837ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, depth,
2847ec681f3Smrg                          target, image_type,
2857ec681f3Smrg                          DRAW_JIT_IMAGE_DEPTH);
2867ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, base,
2877ec681f3Smrg                          target, image_type,
2887ec681f3Smrg                          DRAW_JIT_IMAGE_BASE);
2897ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, row_stride,
2907ec681f3Smrg                          target, image_type,
2917ec681f3Smrg                          DRAW_JIT_IMAGE_ROW_STRIDE);
2927ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, img_stride,
2937ec681f3Smrg                          target, image_type,
2947ec681f3Smrg                          DRAW_JIT_IMAGE_IMG_STRIDE);
2957ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, num_samples,
2967ec681f3Smrg                          target, image_type,
2977ec681f3Smrg                          DRAW_JIT_IMAGE_NUM_SAMPLES);
2987ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, sample_stride,
2997ec681f3Smrg                          target, image_type,
3007ec681f3Smrg                          DRAW_JIT_IMAGE_SAMPLE_STRIDE);
3017ec681f3Smrg
3027ec681f3Smrg   LP_CHECK_STRUCT_SIZE(struct draw_jit_image, target, image_type);
3037ec681f3Smrg
3047ec681f3Smrg   return image_type;
3057ec681f3Smrg}
306af69d88dSmrg
307af69d88dSmrg/**
308af69d88dSmrg * Create LLVM type for struct draw_jit_context
3093464ebd5Sriastradh */
3103464ebd5Sriastradhstatic LLVMTypeRef
3113464ebd5Sriastradhcreate_jit_context_type(struct gallivm_state *gallivm,
312af69d88dSmrg                        LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
3137ec681f3Smrg                        LLVMTypeRef image_type,
314af69d88dSmrg                        const char *struct_name)
3153464ebd5Sriastradh{
3163464ebd5Sriastradh   LLVMTargetDataRef target = gallivm->target;
3173464ebd5Sriastradh   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
318af69d88dSmrg   LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
319af69d88dSmrg   LLVMTypeRef elem_types[DRAW_JIT_CTX_NUM_FIELDS];
3203464ebd5Sriastradh   LLVMTypeRef context_type;
3213464ebd5Sriastradh
322af69d88dSmrg   elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* vs_constants */
323af69d88dSmrg                                 LP_MAX_TGSI_CONST_BUFFERS);
324af69d88dSmrg   elem_types[1] = LLVMArrayType(int_type, /* num_vs_constants */
325af69d88dSmrg                                 LP_MAX_TGSI_CONST_BUFFERS);
326af69d88dSmrg   elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
327af69d88dSmrg                                                 DRAW_TOTAL_CLIP_PLANES), 0);
32801e04c3fSmrg   elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */
3293464ebd5Sriastradh   elem_types[4] = LLVMArrayType(texture_type,
330af69d88dSmrg                                 PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
331af69d88dSmrg   elem_types[5] = LLVMArrayType(sampler_type,
332af69d88dSmrg                                 PIPE_MAX_SAMPLERS); /* samplers */
3337ec681f3Smrg   elem_types[6] = LLVMArrayType(image_type,
3347ec681f3Smrg                                 PIPE_MAX_SHADER_IMAGES); /* images */
3357ec681f3Smrg   elem_types[7] = LLVMArrayType(LLVMPointerType(int_type, 0), /* vs_ssbo */
3367ec681f3Smrg                                 LP_MAX_TGSI_SHADER_BUFFERS);
3377ec681f3Smrg   elem_types[8] = LLVMArrayType(int_type, /* num_vs_ssbos */
3387ec681f3Smrg                                 LP_MAX_TGSI_SHADER_BUFFERS);
3397ec681f3Smrg   elem_types[9] = LLVMPointerType(float_type, 0); /* aniso table */
3403464ebd5Sriastradh   context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
34101e04c3fSmrg                                          ARRAY_SIZE(elem_types), 0);
34201e04c3fSmrg
34301e04c3fSmrg   (void) target; /* silence unused var warning for non-debug build */
3443464ebd5Sriastradh   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
345af69d88dSmrg                          target, context_type, DRAW_JIT_CTX_CONSTANTS);
346af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_constants,
347af69d88dSmrg                          target, context_type, DRAW_JIT_CTX_NUM_CONSTANTS);
3483464ebd5Sriastradh   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
349af69d88dSmrg                          target, context_type, DRAW_JIT_CTX_PLANES);
35001e04c3fSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewports,
351af69d88dSmrg                          target, context_type, DRAW_JIT_CTX_VIEWPORT);
3523464ebd5Sriastradh   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
3533464ebd5Sriastradh                          target, context_type,
3543464ebd5Sriastradh                          DRAW_JIT_CTX_TEXTURES);
355af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, samplers,
356af69d88dSmrg                          target, context_type,
357af69d88dSmrg                          DRAW_JIT_CTX_SAMPLERS);
3587ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, images,
3597ec681f3Smrg                          target, context_type, DRAW_JIT_CTX_IMAGES);
3607ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_ssbos,
3617ec681f3Smrg                          target, context_type, DRAW_JIT_CTX_SSBOS);
3627ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_ssbos,
3637ec681f3Smrg                          target, context_type, DRAW_JIT_CTX_NUM_SSBOS);
3647ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, aniso_filter_table,
3657ec681f3Smrg                          target, context_type, DRAW_JIT_CTX_ANISO_FILTER_TABLE);
3663464ebd5Sriastradh   LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
3673464ebd5Sriastradh                        target, context_type);
3683464ebd5Sriastradh
3693464ebd5Sriastradh   return context_type;
3703464ebd5Sriastradh}
3713464ebd5Sriastradh
3723464ebd5Sriastradh
373af69d88dSmrg/**
374af69d88dSmrg * Create LLVM type for struct draw_gs_jit_context
375af69d88dSmrg */
376af69d88dSmrgstatic LLVMTypeRef
377af69d88dSmrgcreate_gs_jit_context_type(struct gallivm_state *gallivm,
378af69d88dSmrg                           unsigned vector_length,
379af69d88dSmrg                           LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
3807ec681f3Smrg                           LLVMTypeRef image_type,
381af69d88dSmrg                           const char *struct_name)
382af69d88dSmrg{
383af69d88dSmrg   LLVMTargetDataRef target = gallivm->target;
384af69d88dSmrg   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
385af69d88dSmrg   LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
386af69d88dSmrg   LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
387af69d88dSmrg   LLVMTypeRef context_type;
388af69d88dSmrg
389af69d88dSmrg   elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
390af69d88dSmrg                                 LP_MAX_TGSI_CONST_BUFFERS);
391af69d88dSmrg   elem_types[1] = LLVMArrayType(int_type, /* num_constants */
392af69d88dSmrg                                 LP_MAX_TGSI_CONST_BUFFERS);
393af69d88dSmrg   elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
394af69d88dSmrg                                                 DRAW_TOTAL_CLIP_PLANES), 0);
39501e04c3fSmrg   elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */
396af69d88dSmrg
397af69d88dSmrg   elem_types[4] = LLVMArrayType(texture_type,
398af69d88dSmrg                                 PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
399af69d88dSmrg   elem_types[5] = LLVMArrayType(sampler_type,
400af69d88dSmrg                                 PIPE_MAX_SAMPLERS); /* samplers */
4017ec681f3Smrg   elem_types[6] = LLVMArrayType(image_type,
4027ec681f3Smrg                                 PIPE_MAX_SHADER_IMAGES); /* images */
4037ec681f3Smrg   elem_types[7] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
404af69d88dSmrg   elem_types[8] = LLVMPointerType(LLVMVectorType(int_type,
405af69d88dSmrg                                                  vector_length), 0);
4067ec681f3Smrg   elem_types[9] = LLVMPointerType(LLVMVectorType(int_type,
4077ec681f3Smrg                                                  vector_length), 0);
408af69d88dSmrg
4097ec681f3Smrg   elem_types[10] = LLVMArrayType(LLVMPointerType(int_type, 0), /* ssbos */
4107ec681f3Smrg                                 LP_MAX_TGSI_SHADER_BUFFERS);
4117ec681f3Smrg   elem_types[11] = LLVMArrayType(int_type, /* num_ssbos */
4127ec681f3Smrg                                 LP_MAX_TGSI_SHADER_BUFFERS);
4137ec681f3Smrg   elem_types[12] = LLVMPointerType(float_type, 0); /* aniso table */
414af69d88dSmrg   context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
41501e04c3fSmrg                                          ARRAY_SIZE(elem_types), 0);
416af69d88dSmrg
41701e04c3fSmrg   (void) target; /* silence unused var warning for non-debug build */
418af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,
419af69d88dSmrg                          target, context_type, DRAW_GS_JIT_CTX_CONSTANTS);
420af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_constants,
421af69d88dSmrg                          target, context_type, DRAW_GS_JIT_CTX_NUM_CONSTANTS);
422af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
423af69d88dSmrg                          target, context_type, DRAW_GS_JIT_CTX_PLANES);
42401e04c3fSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports,
425af69d88dSmrg                          target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
426af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,
427af69d88dSmrg                          target, context_type,
428af69d88dSmrg                          DRAW_GS_JIT_CTX_TEXTURES);
429af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,
430af69d88dSmrg                          target, context_type,
431af69d88dSmrg                          DRAW_GS_JIT_CTX_SAMPLERS);
432af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
433af69d88dSmrg                          target, context_type,
434af69d88dSmrg                          DRAW_GS_JIT_CTX_PRIM_LENGTHS);
435af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
436af69d88dSmrg                          target, context_type,
437af69d88dSmrg                          DRAW_GS_JIT_CTX_EMITTED_VERTICES);
438af69d88dSmrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
439af69d88dSmrg                          target, context_type,
440af69d88dSmrg                          DRAW_GS_JIT_CTX_EMITTED_PRIMS);
4417ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, ssbos,
4427ec681f3Smrg                          target, context_type, DRAW_GS_JIT_CTX_SSBOS);
4437ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_ssbos,
4447ec681f3Smrg                          target, context_type, DRAW_GS_JIT_CTX_NUM_SSBOS);
4457ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, images,
4467ec681f3Smrg                          target, context_type, DRAW_GS_JIT_CTX_IMAGES);
4477ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, aniso_filter_table,
4487ec681f3Smrg                          target, context_type, DRAW_GS_JIT_CTX_ANISO_FILTER_TABLE);
449af69d88dSmrg   LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
450af69d88dSmrg                        target, context_type);
451af69d88dSmrg
452af69d88dSmrg   return context_type;
453af69d88dSmrg}
454af69d88dSmrg
455af69d88dSmrg
456af69d88dSmrgstatic LLVMTypeRef
457af69d88dSmrgcreate_gs_jit_input_type(struct gallivm_state *gallivm)
458af69d88dSmrg{
459af69d88dSmrg   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
460af69d88dSmrg   LLVMTypeRef input_array;
461af69d88dSmrg
462af69d88dSmrg   input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
463af69d88dSmrg   input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
464af69d88dSmrg   input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
465af69d88dSmrg   input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
466af69d88dSmrg
467af69d88dSmrg   return input_array;
468af69d88dSmrg}
469af69d88dSmrg
4703464ebd5Sriastradh/**
4713464ebd5Sriastradh * Create LLVM type for struct pipe_vertex_buffer
4723464ebd5Sriastradh */
4733464ebd5Sriastradhstatic LLVMTypeRef
474af69d88dSmrgcreate_jit_vertex_buffer_type(struct gallivm_state *gallivm,
475af69d88dSmrg                              const char *struct_name)
4763464ebd5Sriastradh{
4773464ebd5Sriastradh   LLVMTargetDataRef target = gallivm->target;
478af69d88dSmrg   LLVMTypeRef elem_types[4];
4793464ebd5Sriastradh   LLVMTypeRef vb_type;
4803464ebd5Sriastradh
48101e04c3fSmrg   elem_types[0] = LLVMInt16TypeInContext(gallivm->context);
48201e04c3fSmrg   elem_types[1] = LLVMInt8TypeInContext(gallivm->context);
48301e04c3fSmrg   elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
484af69d88dSmrg   elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
4853464ebd5Sriastradh
4863464ebd5Sriastradh   vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
48701e04c3fSmrg                                     ARRAY_SIZE(elem_types), 0);
4883464ebd5Sriastradh
48901e04c3fSmrg   (void) target; /* silence unused var warning for non-debug build */
4903464ebd5Sriastradh   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
4913464ebd5Sriastradh                          target, vb_type, 0);
49201e04c3fSmrg   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer,
4933464ebd5Sriastradh                          target, vb_type, 1);
49401e04c3fSmrg   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
49501e04c3fSmrg                          target, vb_type, 2);
49601e04c3fSmrg   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource,
49701e04c3fSmrg                          target, vb_type, 3);
4983464ebd5Sriastradh
4993464ebd5Sriastradh   LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
5003464ebd5Sriastradh
5013464ebd5Sriastradh   return vb_type;
5023464ebd5Sriastradh}
5033464ebd5Sriastradh
5043464ebd5Sriastradh
5053464ebd5Sriastradh/**
5063464ebd5Sriastradh * Create LLVM type for struct vertex_header;
5073464ebd5Sriastradh */
5083464ebd5Sriastradhstatic LLVMTypeRef
5093464ebd5Sriastradhcreate_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
5103464ebd5Sriastradh{
5113464ebd5Sriastradh   LLVMTargetDataRef target = gallivm->target;
51201e04c3fSmrg   LLVMTypeRef elem_types[3];
5133464ebd5Sriastradh   LLVMTypeRef vertex_header;
5143464ebd5Sriastradh   char struct_name[24];
5153464ebd5Sriastradh
5167ec681f3Smrg   snprintf(struct_name, 23, "vertex_header%d", data_elems);
5173464ebd5Sriastradh
518af69d88dSmrg   elem_types[DRAW_JIT_VERTEX_VERTEX_ID]  = LLVMIntTypeInContext(gallivm->context, 32);
51901e04c3fSmrg   elem_types[DRAW_JIT_VERTEX_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
520af69d88dSmrg   elem_types[DRAW_JIT_VERTEX_DATA]  = LLVMArrayType(elem_types[1], data_elems);
5213464ebd5Sriastradh
5223464ebd5Sriastradh   vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
52301e04c3fSmrg                                           ARRAY_SIZE(elem_types), 0);
5243464ebd5Sriastradh
5253464ebd5Sriastradh   /* these are bit-fields and we can't take address of them
5263464ebd5Sriastradh      LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
5273464ebd5Sriastradh      target, vertex_header,
5283464ebd5Sriastradh      DRAW_JIT_VERTEX_CLIPMASK);
5293464ebd5Sriastradh      LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
5303464ebd5Sriastradh      target, vertex_header,
5313464ebd5Sriastradh      DRAW_JIT_VERTEX_EDGEFLAG);
5323464ebd5Sriastradh      LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
5333464ebd5Sriastradh      target, vertex_header,
5343464ebd5Sriastradh      DRAW_JIT_VERTEX_PAD);
5353464ebd5Sriastradh      LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
5363464ebd5Sriastradh      target, vertex_header,
5373464ebd5Sriastradh      DRAW_JIT_VERTEX_VERTEX_ID);
5383464ebd5Sriastradh   */
53901e04c3fSmrg   (void) target; /* silence unused var warning for non-debug build */
54001e04c3fSmrg   LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip_pos,
541af69d88dSmrg                          target, vertex_header,
54201e04c3fSmrg                          DRAW_JIT_VERTEX_CLIP_POS);
5433464ebd5Sriastradh   LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
5443464ebd5Sriastradh                          target, vertex_header,
5453464ebd5Sriastradh                          DRAW_JIT_VERTEX_DATA);
5463464ebd5Sriastradh
547af69d88dSmrg   assert(LLVMABISizeOfType(target, vertex_header) ==
548af69d88dSmrg          offsetof(struct vertex_header, data[data_elems]));
5493464ebd5Sriastradh
5503464ebd5Sriastradh   return vertex_header;
5513464ebd5Sriastradh}
5523464ebd5Sriastradh
5537ec681f3Smrg/**
5547ec681f3Smrg * Create LLVM type for struct draw_tcs_jit_context
5557ec681f3Smrg */
5567ec681f3Smrgstatic LLVMTypeRef
5577ec681f3Smrgcreate_tcs_jit_context_type(struct gallivm_state *gallivm,
5587ec681f3Smrg                            unsigned vector_length,
5597ec681f3Smrg                            LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
5607ec681f3Smrg                            LLVMTypeRef image_type,
5617ec681f3Smrg                            const char *struct_name)
5627ec681f3Smrg{
5637ec681f3Smrg   LLVMTargetDataRef target = gallivm->target;
5647ec681f3Smrg   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
5657ec681f3Smrg   LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
5667ec681f3Smrg   LLVMTypeRef elem_types[DRAW_TCS_JIT_CTX_NUM_FIELDS];
5677ec681f3Smrg   LLVMTypeRef context_type;
5687ec681f3Smrg
5697ec681f3Smrg   elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
5707ec681f3Smrg                                 LP_MAX_TGSI_CONST_BUFFERS);
5717ec681f3Smrg   elem_types[1] = LLVMArrayType(int_type, /* num_constants */
5727ec681f3Smrg                                 LP_MAX_TGSI_CONST_BUFFERS);
5737ec681f3Smrg   elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
5747ec681f3Smrg   elem_types[3] = LLVMInt32TypeInContext(gallivm->context);
5757ec681f3Smrg
5767ec681f3Smrg   elem_types[4] = LLVMArrayType(texture_type,
5777ec681f3Smrg                                 PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
5787ec681f3Smrg   elem_types[5] = LLVMArrayType(sampler_type,
5797ec681f3Smrg                                 PIPE_MAX_SAMPLERS); /* samplers */
5807ec681f3Smrg   elem_types[6] = LLVMArrayType(image_type,
5817ec681f3Smrg                                 PIPE_MAX_SHADER_IMAGES); /* images */
5827ec681f3Smrg
5837ec681f3Smrg   elem_types[7] = LLVMArrayType(LLVMPointerType(int_type, 0), /* ssbos */
5847ec681f3Smrg                                 LP_MAX_TGSI_SHADER_BUFFERS);
5857ec681f3Smrg   elem_types[8] = LLVMArrayType(int_type, /* num_ssbos */
5867ec681f3Smrg                                 LP_MAX_TGSI_SHADER_BUFFERS);
5877ec681f3Smrg   elem_types[9] = LLVMPointerType(float_type, 0); /* aniso table */
5887ec681f3Smrg   context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
5897ec681f3Smrg                                          ARRAY_SIZE(elem_types), 0);
5907ec681f3Smrg
5917ec681f3Smrg   (void) target; /* silence unused var warning for non-debug build */
5927ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, constants,
5937ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_CONSTANTS);
5947ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, num_constants,
5957ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_NUM_CONSTANTS);
5967ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, textures,
5977ec681f3Smrg                          target, context_type,
5987ec681f3Smrg                          DRAW_TCS_JIT_CTX_TEXTURES);
5997ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, samplers,
6007ec681f3Smrg                          target, context_type,
6017ec681f3Smrg                          DRAW_TCS_JIT_CTX_SAMPLERS);
6027ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, ssbos,
6037ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_SSBOS);
6047ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, num_ssbos,
6057ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_NUM_SSBOS);
6067ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, images,
6077ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_IMAGES);
6087ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, aniso_filter_table,
6097ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_ANISO_FILTER_TABLE);
6107ec681f3Smrg   LP_CHECK_STRUCT_SIZE(struct draw_tcs_jit_context,
6117ec681f3Smrg                        target, context_type);
6127ec681f3Smrg
6137ec681f3Smrg   return context_type;
6147ec681f3Smrg}
6157ec681f3Smrg
6167ec681f3Smrgstatic LLVMTypeRef
6177ec681f3Smrgcreate_tcs_jit_input_type(struct gallivm_state *gallivm)
6187ec681f3Smrg{
6197ec681f3Smrg   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
6207ec681f3Smrg   LLVMTypeRef input_array;
6217ec681f3Smrg
6227ec681f3Smrg   input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
6237ec681f3Smrg   input_array = LLVMArrayType(input_array, NUM_TCS_INPUTS); /* num attrs per vertex */
6247ec681f3Smrg   input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
6257ec681f3Smrg
6267ec681f3Smrg   return input_array;
6277ec681f3Smrg}
6287ec681f3Smrg
6297ec681f3Smrgstatic LLVMTypeRef
6307ec681f3Smrgcreate_tcs_jit_output_type(struct gallivm_state *gallivm)
6317ec681f3Smrg{
6327ec681f3Smrg   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
6337ec681f3Smrg   LLVMTypeRef output_array;
6347ec681f3Smrg
6357ec681f3Smrg   output_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
6367ec681f3Smrg   output_array = LLVMArrayType(output_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
6377ec681f3Smrg   output_array = LLVMPointerType(output_array, 0); /* num vertices per prim */
6387ec681f3Smrg
6397ec681f3Smrg   return output_array;
6407ec681f3Smrg}
6417ec681f3Smrg
6427ec681f3Smrgstatic LLVMTypeRef
6437ec681f3Smrgcreate_tes_jit_input_type(struct gallivm_state *gallivm)
6447ec681f3Smrg{
6457ec681f3Smrg   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
6467ec681f3Smrg   LLVMTypeRef input_array;
6477ec681f3Smrg
6487ec681f3Smrg   input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
6497ec681f3Smrg   input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
6507ec681f3Smrg   input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
6517ec681f3Smrg
6527ec681f3Smrg   return input_array;
6537ec681f3Smrg}
6547ec681f3Smrg
6557ec681f3Smrg/**
6567ec681f3Smrg * Create LLVM type for struct draw_tes_jit_context
6577ec681f3Smrg */
6587ec681f3Smrgstatic LLVMTypeRef
6597ec681f3Smrgcreate_tes_jit_context_type(struct gallivm_state *gallivm,
6607ec681f3Smrg                            unsigned vector_length,
6617ec681f3Smrg                            LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
6627ec681f3Smrg                            LLVMTypeRef image_type,
6637ec681f3Smrg                            const char *struct_name)
6647ec681f3Smrg{
6657ec681f3Smrg   LLVMTargetDataRef target = gallivm->target;
6667ec681f3Smrg   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
6677ec681f3Smrg   LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
6687ec681f3Smrg   LLVMTypeRef elem_types[DRAW_TCS_JIT_CTX_NUM_FIELDS];
6697ec681f3Smrg   LLVMTypeRef context_type;
6707ec681f3Smrg
6717ec681f3Smrg   elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
6727ec681f3Smrg                                 LP_MAX_TGSI_CONST_BUFFERS);
6737ec681f3Smrg   elem_types[1] = LLVMArrayType(int_type, /* num_constants */
6747ec681f3Smrg                                 LP_MAX_TGSI_CONST_BUFFERS);
6757ec681f3Smrg   elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
6767ec681f3Smrg   elem_types[3] = LLVMInt32TypeInContext(gallivm->context);
6777ec681f3Smrg
6787ec681f3Smrg   elem_types[4] = LLVMArrayType(texture_type,
6797ec681f3Smrg                                 PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
6807ec681f3Smrg   elem_types[5] = LLVMArrayType(sampler_type,
6817ec681f3Smrg                                 PIPE_MAX_SAMPLERS); /* samplers */
6827ec681f3Smrg   elem_types[6] = LLVMArrayType(image_type,
6837ec681f3Smrg                                 PIPE_MAX_SHADER_IMAGES); /* images */
6847ec681f3Smrg
6857ec681f3Smrg   elem_types[7] = LLVMArrayType(LLVMPointerType(int_type, 0), /* ssbos */
6867ec681f3Smrg                                 LP_MAX_TGSI_SHADER_BUFFERS);
6877ec681f3Smrg   elem_types[8] = LLVMArrayType(int_type, /* num_ssbos */
6887ec681f3Smrg                                 LP_MAX_TGSI_SHADER_BUFFERS);
6897ec681f3Smrg   elem_types[9] = LLVMPointerType(float_type, 0); /* aniso table */
6907ec681f3Smrg   context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
6917ec681f3Smrg                                          ARRAY_SIZE(elem_types), 0);
6927ec681f3Smrg
6937ec681f3Smrg   (void) target; /* silence unused var warning for non-debug build */
6947ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, constants,
6957ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_CONSTANTS);
6967ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, num_constants,
6977ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_NUM_CONSTANTS);
6987ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, textures,
6997ec681f3Smrg                          target, context_type,
7007ec681f3Smrg                          DRAW_TCS_JIT_CTX_TEXTURES);
7017ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, samplers,
7027ec681f3Smrg                          target, context_type,
7037ec681f3Smrg                          DRAW_TCS_JIT_CTX_SAMPLERS);
7047ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, ssbos,
7057ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_SSBOS);
7067ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, num_ssbos,
7077ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_NUM_SSBOS);
7087ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, images,
7097ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_IMAGES);
7107ec681f3Smrg   LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, aniso_filter_table,
7117ec681f3Smrg                          target, context_type, DRAW_TCS_JIT_CTX_ANISO_FILTER_TABLE);
7127ec681f3Smrg   LP_CHECK_STRUCT_SIZE(struct draw_tes_jit_context,
7137ec681f3Smrg                        target, context_type);
7147ec681f3Smrg
7157ec681f3Smrg   return context_type;
7167ec681f3Smrg}
7173464ebd5Sriastradh
7183464ebd5Sriastradh/**
7193464ebd5Sriastradh * Create LLVM types for various structures.
7203464ebd5Sriastradh */
7213464ebd5Sriastradhstatic void
722af69d88dSmrgcreate_jit_types(struct draw_llvm_variant *variant)
7233464ebd5Sriastradh{
724af69d88dSmrg   struct gallivm_state *gallivm = variant->gallivm;
725af69d88dSmrg   LLVMTypeRef texture_type, sampler_type, context_type, buffer_type,
7267ec681f3Smrg      vb_type, image_type;
7273464ebd5Sriastradh
728af69d88dSmrg   texture_type = create_jit_texture_type(gallivm, "texture");
729af69d88dSmrg   sampler_type = create_jit_sampler_type(gallivm, "sampler");
7307ec681f3Smrg   image_type = create_jit_image_type(gallivm, "image");
7313464ebd5Sriastradh
732af69d88dSmrg   context_type = create_jit_context_type(gallivm, texture_type, sampler_type,
7337ec681f3Smrg                                          image_type,
734af69d88dSmrg                                          "draw_jit_context");
735af69d88dSmrg   variant->context_ptr_type = LLVMPointerType(context_type, 0);
7363464ebd5Sriastradh
737af69d88dSmrg   buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
738af69d88dSmrg   variant->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
739af69d88dSmrg
740af69d88dSmrg   vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
741af69d88dSmrg   variant->vb_ptr_type = LLVMPointerType(vb_type, 0);
7423464ebd5Sriastradh}
7433464ebd5Sriastradh
7443464ebd5Sriastradh
7453464ebd5Sriastradhstatic LLVMTypeRef
746af69d88dSmrgget_context_ptr_type(struct draw_llvm_variant *variant)
7473464ebd5Sriastradh{
748af69d88dSmrg   if (!variant->context_ptr_type)
749af69d88dSmrg      create_jit_types(variant);
750af69d88dSmrg   return variant->context_ptr_type;
7513464ebd5Sriastradh}
7523464ebd5Sriastradh
7533464ebd5Sriastradh
7543464ebd5Sriastradhstatic LLVMTypeRef
755af69d88dSmrgget_buffer_ptr_type(struct draw_llvm_variant *variant)
7563464ebd5Sriastradh{
757af69d88dSmrg   if (!variant->buffer_ptr_type)
758af69d88dSmrg      create_jit_types(variant);
759af69d88dSmrg   return variant->buffer_ptr_type;
7603464ebd5Sriastradh}
7613464ebd5Sriastradh
7623464ebd5Sriastradh
7633464ebd5Sriastradhstatic LLVMTypeRef
764af69d88dSmrgget_vb_ptr_type(struct draw_llvm_variant *variant)
7653464ebd5Sriastradh{
766af69d88dSmrg   if (!variant->vb_ptr_type)
767af69d88dSmrg      create_jit_types(variant);
768af69d88dSmrg   return variant->vb_ptr_type;
7693464ebd5Sriastradh}
7703464ebd5Sriastradh
7713464ebd5Sriastradhstatic LLVMTypeRef
772af69d88dSmrgget_vertex_header_ptr_type(struct draw_llvm_variant *variant)
7733464ebd5Sriastradh{
774af69d88dSmrg   if (!variant->vertex_header_ptr_type)
775af69d88dSmrg      create_jit_types(variant);
776af69d88dSmrg   return variant->vertex_header_ptr_type;
7773464ebd5Sriastradh}
7783464ebd5Sriastradh
7793464ebd5Sriastradh
7803464ebd5Sriastradh/**
7813464ebd5Sriastradh * Create per-context LLVM info.
7823464ebd5Sriastradh */
7833464ebd5Sriastradhstruct draw_llvm *
78401e04c3fSmrgdraw_llvm_create(struct draw_context *draw, LLVMContextRef context)
7853464ebd5Sriastradh{
7863464ebd5Sriastradh   struct draw_llvm *llvm;
7873464ebd5Sriastradh
78801e04c3fSmrg   if (!lp_build_init())
78901e04c3fSmrg      return NULL;
79001e04c3fSmrg
7913464ebd5Sriastradh   llvm = CALLOC_STRUCT( draw_llvm );
7923464ebd5Sriastradh   if (!llvm)
7933464ebd5Sriastradh      return NULL;
7943464ebd5Sriastradh
7953464ebd5Sriastradh   llvm->draw = draw;
7963464ebd5Sriastradh
79701e04c3fSmrg   llvm->context = context;
79801e04c3fSmrg   if (!llvm->context) {
79901e04c3fSmrg      llvm->context = LLVMContextCreate();
80001e04c3fSmrg      llvm->context_owned = true;
80101e04c3fSmrg   }
80201e04c3fSmrg   if (!llvm->context)
80301e04c3fSmrg      goto fail;
80401e04c3fSmrg
8053464ebd5Sriastradh   llvm->nr_variants = 0;
8063464ebd5Sriastradh   make_empty_list(&llvm->vs_variants_list);
8073464ebd5Sriastradh
808af69d88dSmrg   llvm->nr_gs_variants = 0;
809af69d88dSmrg   make_empty_list(&llvm->gs_variants_list);
8103464ebd5Sriastradh
8117ec681f3Smrg   llvm->nr_tcs_variants = 0;
8127ec681f3Smrg   make_empty_list(&llvm->tcs_variants_list);
8137ec681f3Smrg
8147ec681f3Smrg   llvm->nr_tes_variants = 0;
8157ec681f3Smrg   make_empty_list(&llvm->tes_variants_list);
8167ec681f3Smrg
8173464ebd5Sriastradh   return llvm;
81801e04c3fSmrg
81901e04c3fSmrgfail:
82001e04c3fSmrg   draw_llvm_destroy(llvm);
82101e04c3fSmrg   return NULL;
8223464ebd5Sriastradh}
8233464ebd5Sriastradh
8243464ebd5Sriastradh
8253464ebd5Sriastradh/**
8263464ebd5Sriastradh * Free per-context LLVM info.
8273464ebd5Sriastradh */
8283464ebd5Sriastradhvoid
8293464ebd5Sriastradhdraw_llvm_destroy(struct draw_llvm *llvm)
8303464ebd5Sriastradh{
83101e04c3fSmrg   if (llvm->context_owned)
83201e04c3fSmrg      LLVMContextDispose(llvm->context);
83301e04c3fSmrg   llvm->context = NULL;
83401e04c3fSmrg
8353464ebd5Sriastradh   /* XXX free other draw_llvm data? */
8363464ebd5Sriastradh   FREE(llvm);
8373464ebd5Sriastradh}
8383464ebd5Sriastradh
8397ec681f3Smrgstatic void
8407ec681f3Smrgdraw_get_ir_cache_key(struct nir_shader *nir,
8417ec681f3Smrg                      const void *key, size_t key_size,
8427ec681f3Smrg                      uint32_t val_32bit,
8437ec681f3Smrg                      unsigned char ir_sha1_cache_key[20])
8447ec681f3Smrg{
8457ec681f3Smrg   struct blob blob = { 0 };
8467ec681f3Smrg   unsigned ir_size;
8477ec681f3Smrg   void *ir_binary;
8487ec681f3Smrg
8497ec681f3Smrg   blob_init(&blob);
8507ec681f3Smrg   nir_serialize(&blob, nir, true);
8517ec681f3Smrg   ir_binary = blob.data;
8527ec681f3Smrg   ir_size = blob.size;
8537ec681f3Smrg
8547ec681f3Smrg   struct mesa_sha1 ctx;
8557ec681f3Smrg   _mesa_sha1_init(&ctx);
8567ec681f3Smrg   _mesa_sha1_update(&ctx, key, key_size);
8577ec681f3Smrg   _mesa_sha1_update(&ctx, ir_binary, ir_size);
8587ec681f3Smrg   _mesa_sha1_update(&ctx, &val_32bit, 4);
8597ec681f3Smrg   _mesa_sha1_final(&ctx, ir_sha1_cache_key);
8607ec681f3Smrg
8617ec681f3Smrg   blob_finish(&blob);
8627ec681f3Smrg}
8633464ebd5Sriastradh
8643464ebd5Sriastradh/**
8653464ebd5Sriastradh * Create LLVM-generated code for a vertex shader.
8663464ebd5Sriastradh */
8673464ebd5Sriastradhstruct draw_llvm_variant *
8683464ebd5Sriastradhdraw_llvm_create_variant(struct draw_llvm *llvm,
869af69d88dSmrg                         unsigned num_inputs,
870af69d88dSmrg                         const struct draw_llvm_variant_key *key)
8713464ebd5Sriastradh{
8723464ebd5Sriastradh   struct draw_llvm_variant *variant;
8733464ebd5Sriastradh   struct llvm_vertex_shader *shader =
8743464ebd5Sriastradh      llvm_vertex_shader(llvm->draw->vs.vertex_shader);
8753464ebd5Sriastradh   LLVMTypeRef vertex_header;
876af69d88dSmrg   char module_name[64];
8777ec681f3Smrg   unsigned char ir_sha1_cache_key[20];
8787ec681f3Smrg   struct lp_cached_code cached = { 0 };
8797ec681f3Smrg   bool needs_caching = false;
8803464ebd5Sriastradh   variant = MALLOC(sizeof *variant +
881af69d88dSmrg                    shader->variant_key_size -
882af69d88dSmrg                    sizeof variant->key);
88301e04c3fSmrg   if (!variant)
8843464ebd5Sriastradh      return NULL;
8853464ebd5Sriastradh
8863464ebd5Sriastradh   variant->llvm = llvm;
887af69d88dSmrg   variant->shader = shader;
8887ec681f3Smrg   memcpy(&variant->key, key, shader->variant_key_size);
889af69d88dSmrg
8907ec681f3Smrg   snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u",
8917ec681f3Smrg            variant->shader->variants_cached);
8927ec681f3Smrg
8937ec681f3Smrg   if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
8947ec681f3Smrg      draw_get_ir_cache_key(shader->base.state.ir.nir,
8957ec681f3Smrg                            key,
8967ec681f3Smrg                            shader->variant_key_size,
8977ec681f3Smrg                            num_inputs,
8987ec681f3Smrg                            ir_sha1_cache_key);
8997ec681f3Smrg
9007ec681f3Smrg      llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
9017ec681f3Smrg                                         &cached,
9027ec681f3Smrg                                         ir_sha1_cache_key);
9037ec681f3Smrg      if (!cached.data_size)
9047ec681f3Smrg         needs_caching = true;
9057ec681f3Smrg   }
9067ec681f3Smrg   variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
907af69d88dSmrg
908af69d88dSmrg   create_jit_types(variant);
9093464ebd5Sriastradh
91001e04c3fSmrg   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
9117ec681f3Smrg      if (llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_TGSI)
9127ec681f3Smrg         tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0);
9137ec681f3Smrg      else
9147ec681f3Smrg         nir_print_shader(llvm->draw->vs.vertex_shader->state.ir.nir, stderr);
91501e04c3fSmrg      draw_llvm_dump_variant_key(&variant->key);
91601e04c3fSmrg   }
91701e04c3fSmrg
918af69d88dSmrg   vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs);
9193464ebd5Sriastradh
920af69d88dSmrg   variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
9213464ebd5Sriastradh
92201e04c3fSmrg   draw_llvm_generate(llvm, variant);
923af69d88dSmrg
924af69d88dSmrg   gallivm_compile_module(variant->gallivm);
925af69d88dSmrg
926af69d88dSmrg   variant->jit_func = (draw_jit_vert_func)
927af69d88dSmrg         gallivm_jit_function(variant->gallivm, variant->function);
928af69d88dSmrg
9297ec681f3Smrg   if (needs_caching)
9307ec681f3Smrg      llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
9317ec681f3Smrg                                           &cached,
9327ec681f3Smrg                                           ir_sha1_cache_key);
933af69d88dSmrg   gallivm_free_ir(variant->gallivm);
9343464ebd5Sriastradh
9353464ebd5Sriastradh   variant->list_item_global.base = variant;
9363464ebd5Sriastradh   variant->list_item_local.base = variant;
9373464ebd5Sriastradh   /*variant->no = */shader->variants_created++;
9383464ebd5Sriastradh   variant->list_item_global.base = variant;
9393464ebd5Sriastradh
9403464ebd5Sriastradh   return variant;
9413464ebd5Sriastradh}
9423464ebd5Sriastradh
9437ec681f3Smrgstatic void
9447ec681f3Smrgdo_clamp_vertex_color(struct gallivm_state *gallivm,
9457ec681f3Smrg                      struct lp_type type,
9467ec681f3Smrg                      const struct tgsi_shader_info *info,
9477ec681f3Smrg                      LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
9487ec681f3Smrg{
9497ec681f3Smrg   LLVMBuilderRef builder = gallivm->builder;
9507ec681f3Smrg   LLVMValueRef out;
9517ec681f3Smrg   unsigned chan, attrib;
9527ec681f3Smrg   struct lp_build_context bld;
9537ec681f3Smrg   lp_build_context_init(&bld, gallivm, type);
9547ec681f3Smrg
9557ec681f3Smrg   for (attrib = 0; attrib < info->num_outputs; ++attrib) {
9567ec681f3Smrg      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
9577ec681f3Smrg         if (outputs[attrib][chan]) {
9587ec681f3Smrg            switch (info->output_semantic_name[attrib]) {
9597ec681f3Smrg            case TGSI_SEMANTIC_COLOR:
9607ec681f3Smrg            case TGSI_SEMANTIC_BCOLOR:
9617ec681f3Smrg               out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
9627ec681f3Smrg               out = lp_build_clamp(&bld, out, bld.zero, bld.one);
9637ec681f3Smrg               LLVMBuildStore(builder, out, outputs[attrib][chan]);
9647ec681f3Smrg               break;
9657ec681f3Smrg            }
9667ec681f3Smrg         }
9677ec681f3Smrg      }
9687ec681f3Smrg   }
9697ec681f3Smrg}
9703464ebd5Sriastradh
9713464ebd5Sriastradhstatic void
972af69d88dSmrggenerate_vs(struct draw_llvm_variant *variant,
9733464ebd5Sriastradh            LLVMBuilderRef builder,
974af69d88dSmrg            struct lp_type vs_type,
975af69d88dSmrg            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
976af69d88dSmrg            const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
977af69d88dSmrg            const struct lp_bld_tgsi_system_values *system_values,
9783464ebd5Sriastradh            LLVMValueRef context_ptr,
97901e04c3fSmrg            const struct lp_build_sampler_soa *draw_sampler,
9807ec681f3Smrg            const struct lp_build_image_soa *draw_image,
9817ec681f3Smrg            boolean clamp_vertex_color,
9827ec681f3Smrg            struct lp_build_mask_context *bld_mask)
9833464ebd5Sriastradh{
984af69d88dSmrg   struct draw_llvm *llvm = variant->llvm;
9853464ebd5Sriastradh   const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
986af69d88dSmrg   LLVMValueRef consts_ptr =
987af69d88dSmrg      draw_jit_context_vs_constants(variant->gallivm, context_ptr);
988af69d88dSmrg   LLVMValueRef num_consts_ptr =
989af69d88dSmrg      draw_jit_context_num_vs_constants(variant->gallivm, context_ptr);
9907ec681f3Smrg   LLVMValueRef ssbos_ptr =
9917ec681f3Smrg      draw_jit_context_vs_ssbos(variant->gallivm, context_ptr);
9927ec681f3Smrg   LLVMValueRef num_ssbos_ptr =
9937ec681f3Smrg      draw_jit_context_num_vs_ssbos(variant->gallivm, context_ptr);
9947ec681f3Smrg
9957ec681f3Smrg   struct lp_build_tgsi_params params;
9967ec681f3Smrg   memset(&params, 0, sizeof(params));
9977ec681f3Smrg
9987ec681f3Smrg   params.type = vs_type;
9997ec681f3Smrg   params.mask = bld_mask;
10007ec681f3Smrg   params.consts_ptr = consts_ptr;
10017ec681f3Smrg   params.const_sizes_ptr = num_consts_ptr;
10027ec681f3Smrg   params.system_values = system_values;
10037ec681f3Smrg   params.inputs = inputs;
10047ec681f3Smrg   params.context_ptr = context_ptr;
10057ec681f3Smrg   params.sampler = draw_sampler;
10067ec681f3Smrg   params.info = &llvm->draw->vs.vertex_shader->info;
10077ec681f3Smrg   params.ssbo_ptr = ssbos_ptr;
10087ec681f3Smrg   params.ssbo_sizes_ptr = num_ssbos_ptr;
10097ec681f3Smrg   params.image = draw_image;
10107ec681f3Smrg   params.aniso_filter_table = draw_jit_context_aniso_filter_table(variant->gallivm, context_ptr);
10117ec681f3Smrg
10127ec681f3Smrg   if (llvm->draw->vs.vertex_shader->state.ir.nir &&
10137ec681f3Smrg       llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_NIR)
10147ec681f3Smrg      lp_build_nir_soa(variant->gallivm,
10157ec681f3Smrg                       llvm->draw->vs.vertex_shader->state.ir.nir,
10167ec681f3Smrg                       &params,
10177ec681f3Smrg                       outputs);
10187ec681f3Smrg   else
10197ec681f3Smrg      lp_build_tgsi_soa(variant->gallivm,
10207ec681f3Smrg                        tokens,
10217ec681f3Smrg                        &params,
10227ec681f3Smrg                        outputs);
10237ec681f3Smrg
10247ec681f3Smrg   if (clamp_vertex_color) {
10257ec681f3Smrg      const struct tgsi_shader_info *info = &llvm->draw->vs.vertex_shader->info;
10267ec681f3Smrg      do_clamp_vertex_color(variant->gallivm,
10277ec681f3Smrg                            vs_type, info,
10287ec681f3Smrg                            outputs);
10293464ebd5Sriastradh   }
10303464ebd5Sriastradh}
10313464ebd5Sriastradh
103201e04c3fSmrg
10333464ebd5Sriastradhstatic void
103401e04c3fSmrgfetch_instanced(struct gallivm_state *gallivm,
103501e04c3fSmrg                const struct util_format_description *format_desc,
103601e04c3fSmrg                struct lp_type vs_type,
103701e04c3fSmrg                LLVMValueRef vb_stride,
103801e04c3fSmrg                LLVMValueRef map_ptr,
103901e04c3fSmrg                LLVMValueRef buffer_size_adj,
104001e04c3fSmrg                LLVMValueRef *inputs,
104101e04c3fSmrg                LLVMValueRef index)
10423464ebd5Sriastradh{
104301e04c3fSmrg   LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
104401e04c3fSmrg   LLVMTypeRef aosf_t, aosi_t;
104501e04c3fSmrg   LLVMValueRef zero = LLVMConstNull(i32_t);
10463464ebd5Sriastradh   LLVMBuilderRef builder = gallivm->builder;
104701e04c3fSmrg   LLVMValueRef stride, buffer_overflowed, aos, index_valid;
104801e04c3fSmrg   unsigned i;
10493464ebd5Sriastradh
105001e04c3fSmrg   aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
105101e04c3fSmrg   aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
10523464ebd5Sriastradh
105301e04c3fSmrg   /* This mul can overflow. Wraparound is ok. */
105401e04c3fSmrg   stride = LLVMBuildMul(builder, vb_stride, index, "");
105501e04c3fSmrg
105601e04c3fSmrg   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
105701e04c3fSmrg                                     stride, buffer_size_adj,
1058af69d88dSmrg                                     "buffer_overflowed");
10593464ebd5Sriastradh
106001e04c3fSmrg   if (0) {
106101e04c3fSmrg      lp_build_print_value(gallivm, "   instance index = ", index);
106201e04c3fSmrg      lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
1063af69d88dSmrg   }
10643464ebd5Sriastradh
106501e04c3fSmrg   index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
106601e04c3fSmrg   index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
106701e04c3fSmrg   stride = LLVMBuildAnd(builder, stride, index_valid, "");
106801e04c3fSmrg
106901e04c3fSmrg   aos = lp_build_fetch_rgba_aos(gallivm,
107001e04c3fSmrg                                 format_desc,
107101e04c3fSmrg                                 lp_float32_vec4_type(),
107201e04c3fSmrg                                 FALSE,
107301e04c3fSmrg                                 map_ptr,
107401e04c3fSmrg                                 stride, zero, zero,
107501e04c3fSmrg                                 NULL);
107601e04c3fSmrg
107701e04c3fSmrg   index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
107801e04c3fSmrg   aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
107901e04c3fSmrg   aos = LLVMBuildAnd(builder, aos, index_valid, "");
108001e04c3fSmrg   aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
108101e04c3fSmrg
108201e04c3fSmrg   for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
108301e04c3fSmrg      LLVMValueRef index = lp_build_const_int32(gallivm, i);
108401e04c3fSmrg      inputs[i] = lp_build_extract_broadcast(gallivm,
108501e04c3fSmrg                                             lp_float32_vec4_type(),
108601e04c3fSmrg                                             vs_type, aos, index);
108701e04c3fSmrg   }
10883464ebd5Sriastradh}
10893464ebd5Sriastradh
109001e04c3fSmrg
10913464ebd5Sriastradhstatic void
109201e04c3fSmrgfetch_vector(struct gallivm_state *gallivm,
109301e04c3fSmrg             const struct util_format_description *format_desc,
109401e04c3fSmrg             struct lp_type vs_type,
109501e04c3fSmrg             LLVMValueRef vb_stride,
109601e04c3fSmrg             LLVMValueRef map_ptr,
109701e04c3fSmrg             LLVMValueRef buffer_size_adj,
109801e04c3fSmrg             LLVMValueRef *inputs,
109901e04c3fSmrg             LLVMValueRef indices)
11003464ebd5Sriastradh{
110101e04c3fSmrg   LLVMBuilderRef builder = gallivm->builder;
110201e04c3fSmrg   struct lp_build_context blduivec;
110301e04c3fSmrg   struct lp_type fetch_type = vs_type;
110401e04c3fSmrg   LLVMValueRef offset, valid_mask;
110501e04c3fSmrg   unsigned i;
11063464ebd5Sriastradh
110701e04c3fSmrg   lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
11083464ebd5Sriastradh
110901e04c3fSmrg   vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride);
111001e04c3fSmrg   buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj);
11113464ebd5Sriastradh
111201e04c3fSmrg   /* This mul can overflow. Wraparound is ok. */
111301e04c3fSmrg   offset = lp_build_mul(&blduivec, vb_stride, indices);
11143464ebd5Sriastradh
111501e04c3fSmrg   valid_mask = lp_build_compare(gallivm, blduivec.type,
111601e04c3fSmrg                                 PIPE_FUNC_LESS, offset, buffer_size_adj);
11173464ebd5Sriastradh
111801e04c3fSmrg   /* not valid elements use offset 0 */
111901e04c3fSmrg   offset = LLVMBuildAnd(builder, offset, valid_mask, "");
11203464ebd5Sriastradh
112101e04c3fSmrg   if (0) {
112201e04c3fSmrg      lp_build_print_value(gallivm, "   indices = ", indices);
112301e04c3fSmrg      lp_build_print_value(gallivm, "   offsets = ", offset);
112401e04c3fSmrg      lp_build_print_value(gallivm, "   valid_mask = ", valid_mask);
112501e04c3fSmrg   }
112601e04c3fSmrg
112701e04c3fSmrg   /*
112801e04c3fSmrg    * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
112901e04c3fSmrg    * This should always produce better code.
113001e04c3fSmrg    */
11313464ebd5Sriastradh
113201e04c3fSmrg   /* The type handling is annoying here... */
113301e04c3fSmrg   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
113401e04c3fSmrg       format_desc->channel[0].pure_integer) {
113501e04c3fSmrg      if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
113601e04c3fSmrg         fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
113701e04c3fSmrg      }
113801e04c3fSmrg      else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
113901e04c3fSmrg         fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
1140af69d88dSmrg      }
114101e04c3fSmrg   }
114201e04c3fSmrg
114301e04c3fSmrg   lp_build_fetch_rgba_soa(gallivm, format_desc,
114401e04c3fSmrg                           fetch_type, FALSE, map_ptr, offset,
114501e04c3fSmrg                           blduivec.zero, blduivec.zero,
114601e04c3fSmrg                           NULL, inputs);
11473464ebd5Sriastradh
114801e04c3fSmrg   for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
114901e04c3fSmrg      inputs[i] = LLVMBuildBitCast(builder, inputs[i],
115001e04c3fSmrg                                   lp_build_vec_type(gallivm, vs_type), "");
115101e04c3fSmrg   }
115201e04c3fSmrg
115301e04c3fSmrg   /* out-of-bound fetches return all zeros */
11547ec681f3Smrg   for (i = 0; i < format_desc->nr_channels; i++) {
115501e04c3fSmrg      inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
115601e04c3fSmrg      inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
115701e04c3fSmrg      inputs[i] = LLVMBuildBitCast(builder, inputs[i],
115801e04c3fSmrg                                   lp_build_vec_type(gallivm, vs_type), "");
11593464ebd5Sriastradh   }
11603464ebd5Sriastradh}
11613464ebd5Sriastradh
11623464ebd5Sriastradh
11633464ebd5Sriastradhstatic void
11643464ebd5Sriastradhstore_aos(struct gallivm_state *gallivm,
11653464ebd5Sriastradh          LLVMValueRef io_ptr,
11663464ebd5Sriastradh          LLVMValueRef index,
1167af69d88dSmrg          LLVMValueRef value)
11683464ebd5Sriastradh{
1169af69d88dSmrg   LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
11703464ebd5Sriastradh   LLVMBuilderRef builder = gallivm->builder;
11713464ebd5Sriastradh   LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
11723464ebd5Sriastradh   LLVMValueRef indices[3];
11733464ebd5Sriastradh
11743464ebd5Sriastradh   indices[0] = lp_build_const_int32(gallivm, 0);
11753464ebd5Sriastradh   indices[1] = index;
11763464ebd5Sriastradh   indices[2] = lp_build_const_int32(gallivm, 0);
11773464ebd5Sriastradh
1178af69d88dSmrg   data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
1179af69d88dSmrg   data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
11803464ebd5Sriastradh
11813464ebd5Sriastradh#if DEBUG_STORE
1182af69d88dSmrg   lp_build_printf(gallivm, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
11833464ebd5Sriastradh#endif
1184af69d88dSmrg
1185af69d88dSmrg   /* Unaligned store due to the vertex header */
118601e04c3fSmrg   LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
11873464ebd5Sriastradh}
11883464ebd5Sriastradh
1189af69d88dSmrg/**
1190af69d88dSmrg * Adjust the mask to architecture endianess. The mask will the store in struct:
1191af69d88dSmrg *
1192af69d88dSmrg * struct vertex_header {
1193af69d88dSmrg *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
1194af69d88dSmrg *    unsigned edgeflag:1;
119501e04c3fSmrg *    unsigned pad:1;
1196af69d88dSmrg *    unsigned vertex_id:16;
1197af69d88dSmrg *    [...]
1198af69d88dSmrg * }
1199af69d88dSmrg *
1200af69d88dSmrg * On little-endian machine nothing needs to done, however on bit-endian machine
1201af69d88dSmrg * the mask's fields need to be adjusted with the algorithm:
1202af69d88dSmrg *
1203af69d88dSmrg * uint32_t reverse (uint32_t x)
1204af69d88dSmrg * {
1205af69d88dSmrg *   return (x >> 16) |              // vertex_id
1206af69d88dSmrg *          ((x & 0x3fff) << 18) |   // clipmask
12077ec681f3Smrg *          ((x & 0x4000) << 3) |    // edgeflag
12087ec681f3Smrg *          ((x & 0x8000) << 1);     // pad
1209af69d88dSmrg * }
1210af69d88dSmrg */
1211af69d88dSmrgstatic LLVMValueRef
1212af69d88dSmrgadjust_mask(struct gallivm_state *gallivm,
1213af69d88dSmrg            LLVMValueRef mask)
1214af69d88dSmrg{
12157ec681f3Smrg#if UTIL_ARCH_BIG_ENDIAN
1216af69d88dSmrg   LLVMBuilderRef builder = gallivm->builder;
1217af69d88dSmrg   LLVMValueRef vertex_id;
1218af69d88dSmrg   LLVMValueRef clipmask;
121901e04c3fSmrg   LLVMValueRef pad;
1220af69d88dSmrg   LLVMValueRef edgeflag;
1221af69d88dSmrg
1222af69d88dSmrg   vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
1223af69d88dSmrg   clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
1224af69d88dSmrg   clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
122501e04c3fSmrg   if (0) {
12267ec681f3Smrg      pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
12277ec681f3Smrg      pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 1), "");
122801e04c3fSmrg   }
12297ec681f3Smrg   edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
12307ec681f3Smrg   edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 3), "");
1231af69d88dSmrg
1232af69d88dSmrg   mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
123301e04c3fSmrg   if (0) {
123401e04c3fSmrg      mask = LLVMBuildOr(builder, mask, pad, "");
123501e04c3fSmrg   }
1236af69d88dSmrg   mask = LLVMBuildOr(builder, mask, edgeflag, "");
1237af69d88dSmrg#endif
1238af69d88dSmrg   return mask;
1239af69d88dSmrg}
12403464ebd5Sriastradh
12413464ebd5Sriastradhstatic void
12423464ebd5Sriastradhstore_aos_array(struct gallivm_state *gallivm,
1243af69d88dSmrg                struct lp_type soa_type,
12443464ebd5Sriastradh                LLVMValueRef io_ptr,
1245af69d88dSmrg                LLVMValueRef *indices,
1246af69d88dSmrg                LLVMValueRef* aos,
12473464ebd5Sriastradh                int attrib,
12483464ebd5Sriastradh                int num_outputs,
1249af69d88dSmrg                LLVMValueRef clipmask,
125001e04c3fSmrg                boolean need_edgeflag)
12513464ebd5Sriastradh{
12523464ebd5Sriastradh   LLVMBuilderRef builder = gallivm->builder;
12533464ebd5Sriastradh   LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
1254af69d88dSmrg   LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1255af69d88dSmrg   LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];
1256af69d88dSmrg   LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1257af69d88dSmrg   int vector_length = soa_type.length;
1258af69d88dSmrg   int i;
12593464ebd5Sriastradh
1260af69d88dSmrg   debug_assert(TGSI_NUM_CHANNELS == 4);
1261af69d88dSmrg
1262af69d88dSmrg   for (i = 0; i < vector_length; i++) {
1263af69d88dSmrg      linear_inds[i] = lp_build_const_int32(gallivm, i);
1264af69d88dSmrg      if (indices) {
1265af69d88dSmrg         inds[i] = indices[i];
1266af69d88dSmrg      } else {
1267af69d88dSmrg         inds[i] = linear_inds[i];
1268af69d88dSmrg      }
1269af69d88dSmrg      io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
1270af69d88dSmrg   }
1271af69d88dSmrg
1272af69d88dSmrg   if (attrib == 0) {
1273af69d88dSmrg      /* store vertex header for each of the n vertices */
1274af69d88dSmrg      LLVMValueRef val, cliptmp;
1275af69d88dSmrg      int vertex_id_pad_edgeflag;
1276af69d88dSmrg
1277af69d88dSmrg      /* If this assertion fails, it means we need to update the bit twidding
1278af69d88dSmrg       * code here.  See struct vertex_header in draw_private.h.
1279af69d88dSmrg       */
1280af69d88dSmrg      assert(DRAW_TOTAL_CLIP_PLANES==14);
128101e04c3fSmrg      /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
128201e04c3fSmrg      if (!need_edgeflag) {
128301e04c3fSmrg         vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
128401e04c3fSmrg      }
128501e04c3fSmrg      else {
128601e04c3fSmrg         vertex_id_pad_edgeflag = (0xffff << 16);
128701e04c3fSmrg      }
128801e04c3fSmrg      val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
128901e04c3fSmrg                                   vertex_id_pad_edgeflag);
1290af69d88dSmrg      /* OR with the clipmask */
1291af69d88dSmrg      cliptmp = LLVMBuildOr(builder, val, clipmask, "");
1292af69d88dSmrg      for (i = 0; i < vector_length; i++) {
1293af69d88dSmrg         LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptrs[i]);
1294af69d88dSmrg         val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");
1295af69d88dSmrg         val = adjust_mask(gallivm, val);
12963464ebd5Sriastradh#if DEBUG_STORE
1297af69d88dSmrg         lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
1298af69d88dSmrg                         io_ptrs[i], inds[i], val);
12993464ebd5Sriastradh#endif
1300af69d88dSmrg         LLVMBuildStore(builder, val, id_ptr);
1301af69d88dSmrg      }
1302af69d88dSmrg   }
1303af69d88dSmrg
1304af69d88dSmrg   /* store for each of the n vertices */
1305af69d88dSmrg   for (i = 0; i < vector_length; i++) {
1306af69d88dSmrg      store_aos(gallivm, io_ptrs[i], attr_index, aos[i]);
1307af69d88dSmrg   }
13083464ebd5Sriastradh}
13093464ebd5Sriastradh
13103464ebd5Sriastradh
13113464ebd5Sriastradhstatic void
13123464ebd5Sriastradhconvert_to_aos(struct gallivm_state *gallivm,
13133464ebd5Sriastradh               LLVMValueRef io,
1314af69d88dSmrg               LLVMValueRef *indices,
1315af69d88dSmrg               LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
13163464ebd5Sriastradh               LLVMValueRef clipmask,
13173464ebd5Sriastradh               int num_outputs,
1318af69d88dSmrg               struct lp_type soa_type,
131901e04c3fSmrg               boolean need_edgeflag)
13203464ebd5Sriastradh{
13213464ebd5Sriastradh   LLVMBuilderRef builder = gallivm->builder;
1322af69d88dSmrg   unsigned chan, attrib, i;
13233464ebd5Sriastradh
13243464ebd5Sriastradh#if DEBUG_STORE
1325af69d88dSmrg   lp_build_printf(gallivm, "   # storing begin\n");
13263464ebd5Sriastradh#endif
13273464ebd5Sriastradh   for (attrib = 0; attrib < num_outputs; ++attrib) {
1328af69d88dSmrg      LLVMValueRef soa[TGSI_NUM_CHANNELS];
1329af69d88dSmrg      LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
1330af69d88dSmrg      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
13313464ebd5Sriastradh         if (outputs[attrib][chan]) {
13323464ebd5Sriastradh            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
13333464ebd5Sriastradh            lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
1334af69d88dSmrg#if DEBUG_STORE
1335af69d88dSmrg            lp_build_printf(gallivm, "output %d : %d ",
1336af69d88dSmrg                            LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
1337af69d88dSmrg                                         attrib, 0),
1338af69d88dSmrg                            LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
1339af69d88dSmrg                                         chan, 0));
1340af69d88dSmrg            lp_build_print_value(gallivm, "val = ", out);
1341af69d88dSmrg            {
1342af69d88dSmrg               LLVMValueRef iv =
1343af69d88dSmrg                  LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
1344af69d88dSmrg
1345af69d88dSmrg               lp_build_print_value(gallivm, "  ival = ", iv);
1346af69d88dSmrg            }
1347af69d88dSmrg#endif
13483464ebd5Sriastradh            soa[chan] = out;
13493464ebd5Sriastradh         }
13503464ebd5Sriastradh         else {
13513464ebd5Sriastradh            soa[chan] = 0;
13523464ebd5Sriastradh         }
13533464ebd5Sriastradh      }
1354af69d88dSmrg
1355af69d88dSmrg
1356af69d88dSmrg      if (soa_type.length == TGSI_NUM_CHANNELS) {
1357af69d88dSmrg         lp_build_transpose_aos(gallivm, soa_type, soa, aos);
1358af69d88dSmrg      } else {
1359af69d88dSmrg         lp_build_transpose_aos(gallivm, soa_type, soa, soa);
1360af69d88dSmrg
1361af69d88dSmrg         for (i = 0; i < soa_type.length; ++i) {
1362af69d88dSmrg            aos[i] = lp_build_extract_range(gallivm,
1363af69d88dSmrg                                            soa[i % TGSI_NUM_CHANNELS],
1364af69d88dSmrg                                            (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1365af69d88dSmrg                                            TGSI_NUM_CHANNELS);
1366af69d88dSmrg         }
1367af69d88dSmrg      }
1368af69d88dSmrg
13693464ebd5Sriastradh      store_aos_array(gallivm,
1370af69d88dSmrg                      soa_type,
1371af69d88dSmrg                      io, indices,
13723464ebd5Sriastradh                      aos,
13733464ebd5Sriastradh                      attrib,
13743464ebd5Sriastradh                      num_outputs,
137501e04c3fSmrg                      clipmask,
137601e04c3fSmrg                      need_edgeflag);
13773464ebd5Sriastradh   }
13783464ebd5Sriastradh#if DEBUG_STORE
1379af69d88dSmrg   lp_build_printf(gallivm, "   # storing end\n");
13803464ebd5Sriastradh#endif
13813464ebd5Sriastradh}
13823464ebd5Sriastradh
13833464ebd5Sriastradh
13843464ebd5Sriastradh/**
13853464ebd5Sriastradh * Stores original vertex positions in clip coordinates
13863464ebd5Sriastradh */
13873464ebd5Sriastradhstatic void
13883464ebd5Sriastradhstore_clip(struct gallivm_state *gallivm,
1389af69d88dSmrg           const struct lp_type vs_type,
1390af69d88dSmrg           LLVMValueRef io_ptr,
1391af69d88dSmrg           LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
139201e04c3fSmrg           int idx)
13933464ebd5Sriastradh{
13943464ebd5Sriastradh   LLVMBuilderRef builder = gallivm->builder;
1395af69d88dSmrg   LLVMValueRef soa[4];
1396af69d88dSmrg   LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
1397af69d88dSmrg   LLVMValueRef indices[2];
1398af69d88dSmrg   LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1399af69d88dSmrg   LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1400af69d88dSmrg   LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1401af69d88dSmrg   LLVMTypeRef clip_ptr_type =
1402af69d88dSmrg      LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context),
1403af69d88dSmrg                                     4), 0);
1404af69d88dSmrg   int i, j;
14053464ebd5Sriastradh
14063464ebd5Sriastradh   indices[0] =
14073464ebd5Sriastradh   indices[1] = lp_build_const_int32(gallivm, 0);
14083464ebd5Sriastradh
1409af69d88dSmrg   for (i = 0; i < vs_type.length; i++) {
1410af69d88dSmrg      inds[i] = lp_build_const_int32(gallivm, i);
1411af69d88dSmrg      io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
1412af69d88dSmrg   }
14133464ebd5Sriastradh
1414af69d88dSmrg   soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/
1415af69d88dSmrg   soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/
1416af69d88dSmrg   soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
1417af69d88dSmrg   soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/
14183464ebd5Sriastradh
141901e04c3fSmrg   for (i = 0; i < vs_type.length; i++) {
142001e04c3fSmrg      clip_ptrs[i] = draw_jit_header_clip_pos(gallivm, io_ptrs[i]);
1421af69d88dSmrg   }
14223464ebd5Sriastradh
1423af69d88dSmrg   lp_build_transpose_aos(gallivm, vs_type, soa, soa);
1424af69d88dSmrg   for (i = 0; i < vs_type.length; ++i) {
1425af69d88dSmrg      aos[i] = lp_build_extract_range(gallivm,
1426af69d88dSmrg                                      soa[i % TGSI_NUM_CHANNELS],
1427af69d88dSmrg                                      (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1428af69d88dSmrg                                      TGSI_NUM_CHANNELS);
14293464ebd5Sriastradh   }
14303464ebd5Sriastradh
1431af69d88dSmrg   for (j = 0; j < vs_type.length; j++) {
1432af69d88dSmrg      LLVMValueRef clip_ptr;
1433af69d88dSmrg
1434af69d88dSmrg      clip_ptr = LLVMBuildGEP(builder, clip_ptrs[j], indices, 2, "clipo");
1435af69d88dSmrg      clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, "");
1436af69d88dSmrg
1437af69d88dSmrg      /* Unaligned store */
143801e04c3fSmrg      LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
1439af69d88dSmrg   }
14403464ebd5Sriastradh}
14413464ebd5Sriastradh
14423464ebd5Sriastradh
14433464ebd5Sriastradh/**
14443464ebd5Sriastradh * Transforms the outputs for viewport mapping
14453464ebd5Sriastradh */
14463464ebd5Sriastradhstatic void
1447af69d88dSmrggenerate_viewport(struct draw_llvm_variant *variant,
14483464ebd5Sriastradh                  LLVMBuilderRef builder,
1449af69d88dSmrg                  struct lp_type vs_type,
1450af69d88dSmrg                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
14513464ebd5Sriastradh                  LLVMValueRef context_ptr)
14523464ebd5Sriastradh{
14533464ebd5Sriastradh   int i;
1454af69d88dSmrg   struct gallivm_state *gallivm = variant->gallivm;
1455af69d88dSmrg   struct lp_type f32_type = vs_type;
1456af69d88dSmrg   const unsigned pos = variant->llvm->draw->vs.position_output;
1457af69d88dSmrg   LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1458af69d88dSmrg   LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn*/
1459af69d88dSmrg   LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
146001e04c3fSmrg   LLVMValueRef vp_ptr = draw_jit_context_viewports(gallivm, context_ptr);
146101e04c3fSmrg
146201e04c3fSmrg   /* We treat pipe_viewport_state as a float array */
146301e04c3fSmrg   const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float);
146401e04c3fSmrg   const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float);
14653464ebd5Sriastradh
14663464ebd5Sriastradh   /* for 1/w convention*/
14673464ebd5Sriastradh   out3 = LLVMBuildFDiv(builder, const1, out3, "");
1468af69d88dSmrg   LLVMBuildStore(builder, out3, outputs[pos][3]);
1469af69d88dSmrg
14703464ebd5Sriastradh   /* Viewport Mapping */
14713464ebd5Sriastradh   for (i=0; i<3; i++) {
1472af69d88dSmrg      LLVMValueRef out = LLVMBuildLoad(builder, outputs[pos][i], ""); /*x0 x1 .. xn*/
14733464ebd5Sriastradh      LLVMValueRef scale;
14743464ebd5Sriastradh      LLVMValueRef trans;
14753464ebd5Sriastradh      LLVMValueRef scale_i;
14763464ebd5Sriastradh      LLVMValueRef trans_i;
14773464ebd5Sriastradh      LLVMValueRef index;
1478af69d88dSmrg
147901e04c3fSmrg      index = lp_build_const_int32(gallivm, i + scale_index_offset);
14803464ebd5Sriastradh      scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
14813464ebd5Sriastradh
148201e04c3fSmrg      index = lp_build_const_int32(gallivm, i + trans_index_offset);
14833464ebd5Sriastradh      trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
14843464ebd5Sriastradh
1485af69d88dSmrg      scale = lp_build_broadcast(gallivm, vs_type_llvm,
1486af69d88dSmrg                                 LLVMBuildLoad(builder, scale_i, "scale"));
1487af69d88dSmrg      trans = lp_build_broadcast(gallivm, vs_type_llvm,
1488af69d88dSmrg                                 LLVMBuildLoad(builder, trans_i, "trans"));
14893464ebd5Sriastradh
14903464ebd5Sriastradh      /* divide by w */
14913464ebd5Sriastradh      out = LLVMBuildFMul(builder, out, out3, "");
149201e04c3fSmrg      /* mult by scale, add translation */
149301e04c3fSmrg      out = lp_build_fmuladd(builder, out, scale, trans);
14943464ebd5Sriastradh
14953464ebd5Sriastradh      /* store transformed outputs */
1496af69d88dSmrg      LLVMBuildStore(builder, out, outputs[pos][i]);
14973464ebd5Sriastradh   }
1498af69d88dSmrg
14993464ebd5Sriastradh}
15003464ebd5Sriastradh
15013464ebd5Sriastradh
15023464ebd5Sriastradh/**
1503af69d88dSmrg * Returns clipmask as nxi32 bitmask for the n vertices
15043464ebd5Sriastradh */
1505af69d88dSmrgstatic LLVMValueRef
1506af69d88dSmrggenerate_clipmask(struct draw_llvm *llvm,
1507af69d88dSmrg                  struct gallivm_state *gallivm,
1508af69d88dSmrg                  struct lp_type vs_type,
1509af69d88dSmrg                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
151001e04c3fSmrg                  struct draw_llvm_variant_key *key,
1511af69d88dSmrg                  LLVMValueRef context_ptr,
1512af69d88dSmrg                  boolean *have_clipdist)
15133464ebd5Sriastradh{
15143464ebd5Sriastradh   LLVMBuilderRef builder = gallivm->builder;
1515af69d88dSmrg   LLVMValueRef mask; /* stores the <nxi32> clipmasks */
1516af69d88dSmrg   LLVMValueRef test, temp;
15173464ebd5Sriastradh   LLVMValueRef zero, shift;
15183464ebd5Sriastradh   LLVMValueRef pos_x, pos_y, pos_z, pos_w;
1519af69d88dSmrg   LLVMValueRef cv_x, cv_y, cv_z, cv_w;
15203464ebd5Sriastradh   LLVMValueRef plane1, planes, plane_ptr, sum;
1521af69d88dSmrg   struct lp_type f32_type = vs_type;
1522af69d88dSmrg   struct lp_type i32_type = lp_int_type(vs_type);
1523af69d88dSmrg   const unsigned pos = llvm->draw->vs.position_output;
1524af69d88dSmrg   const unsigned cv = llvm->draw->vs.clipvertex_output;
1525af69d88dSmrg   int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
152601e04c3fSmrg   boolean have_cd = false;
152701e04c3fSmrg   boolean clip_user = key->clip_user;
152801e04c3fSmrg   unsigned ucp_enable = key->ucp_enable;
1529af69d88dSmrg   unsigned cd[2];
1530af69d88dSmrg
153101e04c3fSmrg   cd[0] = llvm->draw->vs.ccdistance_output[0];
153201e04c3fSmrg   cd[1] = llvm->draw->vs.ccdistance_output[1];
1533af69d88dSmrg
1534af69d88dSmrg   if (cd[0] != pos || cd[1] != pos)
1535af69d88dSmrg      have_cd = true;
1536af69d88dSmrg
1537af69d88dSmrg   if (num_written_clipdistance && !clip_user) {
1538af69d88dSmrg      clip_user = true;
1539af69d88dSmrg      ucp_enable = (1 << num_written_clipdistance) - 1;
1540af69d88dSmrg   }
15413464ebd5Sriastradh
1542af69d88dSmrg   mask = lp_build_const_int_vec(gallivm, i32_type, 0);
1543af69d88dSmrg   temp = lp_build_const_int_vec(gallivm, i32_type, 0);
1544af69d88dSmrg   zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
1545af69d88dSmrg   shift = lp_build_const_int_vec(gallivm, i32_type, 1);    /* 1 1 1 1 */
15463464ebd5Sriastradh
1547af69d88dSmrg   /*
1548af69d88dSmrg    * load clipvertex and position from correct locations.
1549af69d88dSmrg    * if they are the same just load them once.
1550af69d88dSmrg    */
1551af69d88dSmrg   pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 .. xn */
1552af69d88dSmrg   pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 .. yn */
1553af69d88dSmrg   pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 .. zn */
1554af69d88dSmrg   pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn */
1555af69d88dSmrg
1556af69d88dSmrg   if (clip_user && cv != pos) {
1557af69d88dSmrg      cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 .. xn */
1558af69d88dSmrg      cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 .. yn */
1559af69d88dSmrg      cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 .. zn */
1560af69d88dSmrg      cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 .. wn */
1561af69d88dSmrg   } else {
1562af69d88dSmrg      cv_x = pos_x;
1563af69d88dSmrg      cv_y = pos_y;
1564af69d88dSmrg      cv_z = pos_z;
1565af69d88dSmrg      cv_w = pos_w;
1566af69d88dSmrg   }
15673464ebd5Sriastradh
156801e04c3fSmrg   /*
156901e04c3fSmrg    * Be careful with the comparisons and NaNs (using llvm's unordered
157001e04c3fSmrg    * comparisons here).
157101e04c3fSmrg    */
15723464ebd5Sriastradh   /* Cliptest, for hardwired planes */
157301e04c3fSmrg   /*
157401e04c3fSmrg    * XXX should take guardband into account (currently not in key).
157501e04c3fSmrg    * Otherwise might run the draw pipeline stages for nothing.
157601e04c3fSmrg    */
157701e04c3fSmrg   if (key->clip_xy) {
15783464ebd5Sriastradh      /* plane 1 */
15793464ebd5Sriastradh      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
15803464ebd5Sriastradh      temp = shift;
1581af69d88dSmrg      test = LLVMBuildAnd(builder, test, temp, "");
15823464ebd5Sriastradh      mask = test;
1583af69d88dSmrg
15843464ebd5Sriastradh      /* plane 2 */
15853464ebd5Sriastradh      test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
15863464ebd5Sriastradh      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
15873464ebd5Sriastradh      temp = LLVMBuildShl(builder, temp, shift, "");
1588af69d88dSmrg      test = LLVMBuildAnd(builder, test, temp, "");
15893464ebd5Sriastradh      mask = LLVMBuildOr(builder, mask, test, "");
1590af69d88dSmrg
15913464ebd5Sriastradh      /* plane 3 */
15923464ebd5Sriastradh      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
15933464ebd5Sriastradh      temp = LLVMBuildShl(builder, temp, shift, "");
1594af69d88dSmrg      test = LLVMBuildAnd(builder, test, temp, "");
15953464ebd5Sriastradh      mask = LLVMBuildOr(builder, mask, test, "");
15963464ebd5Sriastradh
15973464ebd5Sriastradh      /* plane 4 */
15983464ebd5Sriastradh      test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
15993464ebd5Sriastradh      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
16003464ebd5Sriastradh      temp = LLVMBuildShl(builder, temp, shift, "");
1601af69d88dSmrg      test = LLVMBuildAnd(builder, test, temp, "");
16023464ebd5Sriastradh      mask = LLVMBuildOr(builder, mask, test, "");
16033464ebd5Sriastradh   }
16043464ebd5Sriastradh
160501e04c3fSmrg   if (key->clip_z) {
1606af69d88dSmrg      temp = lp_build_const_int_vec(gallivm, i32_type, 16);
160701e04c3fSmrg      if (key->clip_halfz) {
16083464ebd5Sriastradh         /* plane 5 */
16093464ebd5Sriastradh         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1610af69d88dSmrg         test = LLVMBuildAnd(builder, test, temp, "");
16113464ebd5Sriastradh         mask = LLVMBuildOr(builder, mask, test, "");
1612af69d88dSmrg      }
16133464ebd5Sriastradh      else {
16143464ebd5Sriastradh         /* plane 5 */
16153464ebd5Sriastradh         test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
16163464ebd5Sriastradh         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1617af69d88dSmrg         test = LLVMBuildAnd(builder, test, temp, "");
16183464ebd5Sriastradh         mask = LLVMBuildOr(builder, mask, test, "");
16193464ebd5Sriastradh      }
16203464ebd5Sriastradh      /* plane 6 */
16213464ebd5Sriastradh      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
16223464ebd5Sriastradh      temp = LLVMBuildShl(builder, temp, shift, "");
1623af69d88dSmrg      test = LLVMBuildAnd(builder, test, temp, "");
16243464ebd5Sriastradh      mask = LLVMBuildOr(builder, mask, test, "");
1625af69d88dSmrg   }
16263464ebd5Sriastradh
16273464ebd5Sriastradh   if (clip_user) {
16283464ebd5Sriastradh      LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
16293464ebd5Sriastradh      LLVMValueRef indices[3];
1630af69d88dSmrg      LLVMValueRef is_nan_or_inf;
16313464ebd5Sriastradh
16323464ebd5Sriastradh      /* userclip planes */
1633af69d88dSmrg      while (ucp_enable) {
1634af69d88dSmrg         unsigned plane_idx = ffs(ucp_enable)-1;
1635af69d88dSmrg         ucp_enable &= ~(1 << plane_idx);
1636af69d88dSmrg         plane_idx += 6;
1637af69d88dSmrg
1638af69d88dSmrg         if (have_cd && num_written_clipdistance) {
1639af69d88dSmrg            LLVMValueRef clipdist;
1640af69d88dSmrg            int i;
1641af69d88dSmrg            i = plane_idx - 6;
1642af69d88dSmrg
1643af69d88dSmrg            *have_clipdist = TRUE;
1644af69d88dSmrg            if (i < 4) {
1645af69d88dSmrg               clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], "");
1646af69d88dSmrg            } else {
1647af69d88dSmrg               clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");
1648af69d88dSmrg            }
1649af69d88dSmrg            test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
1650af69d88dSmrg            is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);
1651af69d88dSmrg            test = LLVMBuildOr(builder, test, is_nan_or_inf, "");
165201e04c3fSmrg            temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1653af69d88dSmrg            test = LLVMBuildAnd(builder, test, temp, "");
1654af69d88dSmrg            mask = LLVMBuildOr(builder, mask, test, "");
1655af69d88dSmrg         } else {
1656af69d88dSmrg            LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1657af69d88dSmrg            indices[0] = lp_build_const_int32(gallivm, 0);
1658af69d88dSmrg            indices[1] = lp_build_const_int32(gallivm, plane_idx);
1659af69d88dSmrg
1660af69d88dSmrg            indices[2] = lp_build_const_int32(gallivm, 0);
1661af69d88dSmrg            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1662af69d88dSmrg            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
1663af69d88dSmrg            planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1664af69d88dSmrg            sum = LLVMBuildFMul(builder, planes, cv_x, "");
1665af69d88dSmrg
1666af69d88dSmrg            indices[2] = lp_build_const_int32(gallivm, 1);
1667af69d88dSmrg            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1668af69d88dSmrg            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
1669af69d88dSmrg            planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
167001e04c3fSmrg            sum = lp_build_fmuladd(builder, planes, cv_y, sum);
1671af69d88dSmrg
1672af69d88dSmrg            indices[2] = lp_build_const_int32(gallivm, 2);
1673af69d88dSmrg            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1674af69d88dSmrg            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
1675af69d88dSmrg            planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
167601e04c3fSmrg            sum = lp_build_fmuladd(builder, planes, cv_z, sum);
1677af69d88dSmrg
1678af69d88dSmrg            indices[2] = lp_build_const_int32(gallivm, 3);
1679af69d88dSmrg            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
1680af69d88dSmrg            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
1681af69d88dSmrg            planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
168201e04c3fSmrg            sum = lp_build_fmuladd(builder, planes, cv_w, sum);
1683af69d88dSmrg
1684af69d88dSmrg            test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
168501e04c3fSmrg            temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1686af69d88dSmrg            test = LLVMBuildAnd(builder, test, temp, "");
1687af69d88dSmrg            mask = LLVMBuildOr(builder, mask, test, "");
1688af69d88dSmrg         }
16893464ebd5Sriastradh      }
16903464ebd5Sriastradh   }
169101e04c3fSmrg   if (key->need_edgeflags) {
169201e04c3fSmrg      /*
169301e04c3fSmrg       * This isn't really part of clipmask but stored the same in vertex
169401e04c3fSmrg       * header later, so do it here.
169501e04c3fSmrg       */
169601e04c3fSmrg      unsigned edge_attr = llvm->draw->vs.edgeflag_output;
169701e04c3fSmrg      LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
169801e04c3fSmrg      LLVMValueRef edgeflag = LLVMBuildLoad(builder, outputs[edge_attr][0], "");
169901e04c3fSmrg      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
170001e04c3fSmrg      temp = lp_build_const_int_vec(gallivm, i32_type,
170101e04c3fSmrg                                    1LL << DRAW_TOTAL_CLIP_PLANES);
170201e04c3fSmrg      test = LLVMBuildAnd(builder, test, temp, "");
170301e04c3fSmrg      mask = LLVMBuildOr(builder, mask, test, "");
170401e04c3fSmrg   }
17053464ebd5Sriastradh   return mask;
17063464ebd5Sriastradh}
17073464ebd5Sriastradh
17083464ebd5Sriastradh
17093464ebd5Sriastradh/**
17103464ebd5Sriastradh * Returns boolean if any clipping has occurred
171101e04c3fSmrg * Used zero/one i8 value to represent boolean
17123464ebd5Sriastradh */
1713af69d88dSmrgstatic LLVMValueRef
171401e04c3fSmrgclipmask_booli8(struct gallivm_state *gallivm,
171501e04c3fSmrg                const struct lp_type vs_type,
171601e04c3fSmrg                LLVMValueRef clipmask_bool_ptr,
171701e04c3fSmrg                boolean edgeflag_in_clipmask)
17183464ebd5Sriastradh{
17193464ebd5Sriastradh   LLVMBuilderRef builder = gallivm->builder;
172001e04c3fSmrg   LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
1721af69d88dSmrg   LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
172201e04c3fSmrg   LLVMValueRef ret;
172301e04c3fSmrg   struct lp_build_context bldivec;
172401e04c3fSmrg
172501e04c3fSmrg   lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type));
17263464ebd5Sriastradh
1727af69d88dSmrg   /*
172801e04c3fSmrg    * We need to invert the edgeflag bit from the clipmask here
172901e04c3fSmrg    * (because the result is really if we want to run the pipeline or not
173001e04c3fSmrg    * and we (may) need it if edgeflag was 0).
1731af69d88dSmrg    */
173201e04c3fSmrg   if (edgeflag_in_clipmask) {
173301e04c3fSmrg      LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type,
173401e04c3fSmrg                                                 1LL << DRAW_TOTAL_CLIP_PLANES);
173501e04c3fSmrg      clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
17363464ebd5Sriastradh   }
173701e04c3fSmrg
173801e04c3fSmrg   /*
173901e04c3fSmrg    * XXX: probably should mask off bits from the mask which come from
174001e04c3fSmrg    * vertices which were beyond the count (i.e. indices_valid for
174101e04c3fSmrg    * linear fetches, for elts ones we don't have the correct mask
174201e04c3fSmrg    * right now). Otherwise might run the pipeline for nothing,
174301e04c3fSmrg    * though everything should still work.
174401e04c3fSmrg    */
174501e04c3fSmrg   ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool);
174601e04c3fSmrg   ret = LLVMBuildZExt(builder, ret, int8_type, "");
1747af69d88dSmrg   return ret;
17483464ebd5Sriastradh}
17493464ebd5Sriastradh
1750af69d88dSmrgstatic LLVMValueRef
17517ec681f3Smrgdraw_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
17527ec681f3Smrg                         struct lp_build_context * bld,
1753af69d88dSmrg                         boolean is_vindex_indirect,
1754af69d88dSmrg                         LLVMValueRef vertex_index,
1755af69d88dSmrg                         boolean is_aindex_indirect,
1756af69d88dSmrg                         LLVMValueRef attrib_index,
1757af69d88dSmrg                         LLVMValueRef swizzle_index)
17583464ebd5Sriastradh{
1759af69d88dSmrg   const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
17607ec681f3Smrg   struct gallivm_state *gallivm = bld->gallivm;
1761af69d88dSmrg   LLVMBuilderRef builder = gallivm->builder;
1762af69d88dSmrg   LLVMValueRef indices[3];
1763af69d88dSmrg   LLVMValueRef res;
17647ec681f3Smrg   struct lp_type type = bld->type;
1765af69d88dSmrg
1766af69d88dSmrg   if (is_vindex_indirect || is_aindex_indirect) {
1767af69d88dSmrg      int i;
17687ec681f3Smrg      res = bld->zero;
1769af69d88dSmrg      for (i = 0; i < type.length; ++i) {
1770af69d88dSmrg         LLVMValueRef idx = lp_build_const_int32(gallivm, i);
1771af69d88dSmrg         LLVMValueRef vert_chan_index = vertex_index;
1772af69d88dSmrg         LLVMValueRef attr_chan_index = attrib_index;
1773af69d88dSmrg         LLVMValueRef channel_vec, value;
1774af69d88dSmrg
1775af69d88dSmrg         if (is_vindex_indirect) {
1776af69d88dSmrg            vert_chan_index = LLVMBuildExtractElement(builder,
1777af69d88dSmrg                                                      vertex_index, idx, "");
1778af69d88dSmrg         }
1779af69d88dSmrg         if (is_aindex_indirect) {
1780af69d88dSmrg            attr_chan_index = LLVMBuildExtractElement(builder,
1781af69d88dSmrg                                                      attrib_index, idx, "");
1782af69d88dSmrg         }
17833464ebd5Sriastradh
1784af69d88dSmrg         indices[0] = vert_chan_index;
1785af69d88dSmrg         indices[1] = attr_chan_index;
1786af69d88dSmrg         indices[2] = swizzle_index;
17873464ebd5Sriastradh
1788af69d88dSmrg         channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, "");
1789af69d88dSmrg         channel_vec = LLVMBuildLoad(builder, channel_vec, "");
1790af69d88dSmrg         value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
17913464ebd5Sriastradh
1792af69d88dSmrg         res = LLVMBuildInsertElement(builder, res, value, idx, "");
1793af69d88dSmrg      }
1794af69d88dSmrg   } else {
1795af69d88dSmrg      indices[0] = vertex_index;
1796af69d88dSmrg      indices[1] = attrib_index;
1797af69d88dSmrg      indices[2] = swizzle_index;
17983464ebd5Sriastradh
1799af69d88dSmrg      res = LLVMBuildGEP(builder, gs->input, indices, 3, "");
1800af69d88dSmrg      res = LLVMBuildLoad(builder, res, "");
18013464ebd5Sriastradh   }
18023464ebd5Sriastradh
1803af69d88dSmrg   return res;
1804af69d88dSmrg}
18053464ebd5Sriastradh
1806af69d88dSmrgstatic void
18077ec681f3Smrgdraw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
18087ec681f3Smrg                         struct lp_build_context * bld,
1809af69d88dSmrg                         LLVMValueRef (*outputs)[4],
18107ec681f3Smrg                         LLVMValueRef emitted_vertices_vec,
18117ec681f3Smrg                         LLVMValueRef mask_vec, LLVMValueRef stream_id)
1812af69d88dSmrg{
1813af69d88dSmrg   const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1814af69d88dSmrg   struct draw_gs_llvm_variant *variant = gs_iface->variant;
1815af69d88dSmrg   struct gallivm_state *gallivm = variant->gallivm;
1816af69d88dSmrg   LLVMBuilderRef builder = gallivm->builder;
18177ec681f3Smrg   struct lp_type gs_type = bld->type;
1818af69d88dSmrg   LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
1819af69d88dSmrg                                                  lp_int_type(gs_type), 0);
1820af69d88dSmrg   LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
1821af69d88dSmrg   LLVMValueRef next_prim_offset =
1822af69d88dSmrg      lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
1823af69d88dSmrg   LLVMValueRef io = variant->io_ptr;
1824af69d88dSmrg   unsigned i;
1825af69d88dSmrg   const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
1826af69d88dSmrg
18277ec681f3Smrg   LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1828af69d88dSmrg   for (i = 0; i < gs_type.length; ++i) {
1829af69d88dSmrg      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1830af69d88dSmrg      LLVMValueRef currently_emitted =
1831af69d88dSmrg         LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
1832af69d88dSmrg      indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
1833af69d88dSmrg      indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
18347ec681f3Smrg      indices[i] = LLVMBuildSelect(builder, LLVMBuildExtractElement(builder, cond, ind, ""), indices[i],
18357ec681f3Smrg                                   lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary - 1), "");
18363464ebd5Sriastradh   }
18373464ebd5Sriastradh
18387ec681f3Smrg   LLVMValueRef stream_idx = LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), "");
18397ec681f3Smrg   LLVMValueRef cnd = LLVMBuildICmp(builder, LLVMIntULT, stream_idx, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
18407ec681f3Smrg   struct lp_build_if_state if_ctx;
18417ec681f3Smrg   lp_build_if(&if_ctx, gallivm, cnd);
18427ec681f3Smrg   io = lp_build_pointer_get(builder, io, LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), ""));
18437ec681f3Smrg
18447ec681f3Smrg   if (variant->key.clamp_vertex_color) {
18457ec681f3Smrg      do_clamp_vertex_color(gallivm, gs_type,
18467ec681f3Smrg                            gs_info, outputs);
18477ec681f3Smrg   }
1848af69d88dSmrg   convert_to_aos(gallivm, io, indices,
1849af69d88dSmrg                  outputs, clipmask,
1850af69d88dSmrg                  gs_info->num_outputs, gs_type,
1851af69d88dSmrg                  FALSE);
18527ec681f3Smrg   lp_build_endif(&if_ctx);
1853af69d88dSmrg}
18543464ebd5Sriastradh
1855af69d88dSmrgstatic void
18567ec681f3Smrgdraw_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
18577ec681f3Smrg                           struct lp_build_context * bld,
18587ec681f3Smrg                           LLVMValueRef total_emitted_vertices_vec_ptr,
1859af69d88dSmrg                           LLVMValueRef verts_per_prim_vec,
18607ec681f3Smrg                           LLVMValueRef emitted_prims_vec,
18617ec681f3Smrg                           LLVMValueRef mask_vec, unsigned stream)
1862af69d88dSmrg{
1863af69d88dSmrg   const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1864af69d88dSmrg   struct draw_gs_llvm_variant *variant = gs_iface->variant;
1865af69d88dSmrg   struct gallivm_state *gallivm = variant->gallivm;
1866af69d88dSmrg   LLVMBuilderRef builder = gallivm->builder;
1867af69d88dSmrg   LLVMValueRef prim_lengts_ptr =
1868af69d88dSmrg      draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr);
1869af69d88dSmrg   unsigned i;
18703464ebd5Sriastradh
18717ec681f3Smrg   LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
18727ec681f3Smrg   for (i = 0; i < bld->type.length; ++i) {
1873af69d88dSmrg      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1874af69d88dSmrg      LLVMValueRef prims_emitted =
1875af69d88dSmrg         LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
1876af69d88dSmrg      LLVMValueRef store_ptr;
1877af69d88dSmrg      LLVMValueRef num_vertices =
1878af69d88dSmrg         LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
1879af69d88dSmrg
18807ec681f3Smrg      LLVMValueRef this_cond = LLVMBuildExtractElement(gallivm->builder, cond, ind, "");
18817ec681f3Smrg      struct lp_build_if_state ifthen;
18827ec681f3Smrg      lp_build_if(&ifthen, gallivm, this_cond);
18837ec681f3Smrg      prims_emitted = LLVMBuildMul(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
18847ec681f3Smrg      prims_emitted = LLVMBuildAdd(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, stream), "");
1885af69d88dSmrg      store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, "");
1886af69d88dSmrg      store_ptr = LLVMBuildLoad(builder, store_ptr, "");
1887af69d88dSmrg      store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");
1888af69d88dSmrg      LLVMBuildStore(builder, num_vertices, store_ptr);
18897ec681f3Smrg      lp_build_endif(&ifthen);
18903464ebd5Sriastradh   }
18913464ebd5Sriastradh}
18923464ebd5Sriastradh
1893af69d88dSmrgstatic void
18947ec681f3Smrgdraw_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
1895af69d88dSmrg                      LLVMValueRef total_emitted_vertices_vec,
18967ec681f3Smrg                      LLVMValueRef emitted_prims_vec, unsigned stream)
1897af69d88dSmrg{
1898af69d88dSmrg   const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1899af69d88dSmrg   struct draw_gs_llvm_variant *variant = gs_iface->variant;
1900af69d88dSmrg   struct gallivm_state *gallivm = variant->gallivm;
1901af69d88dSmrg   LLVMBuilderRef builder = gallivm->builder;
1902af69d88dSmrg   LLVMValueRef emitted_verts_ptr =
1903af69d88dSmrg      draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr);
1904af69d88dSmrg   LLVMValueRef emitted_prims_ptr =
1905af69d88dSmrg      draw_gs_jit_emitted_prims(gallivm, variant->context_ptr);
19067ec681f3Smrg   LLVMValueRef stream_val = lp_build_const_int32(gallivm, stream);
1907af69d88dSmrg
19087ec681f3Smrg   emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &stream_val, 1, "");
19097ec681f3Smrg   emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &stream_val, 1, "");
1910af69d88dSmrg
1911af69d88dSmrg   LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
1912af69d88dSmrg   LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
1913af69d88dSmrg}
19143464ebd5Sriastradh
19153464ebd5Sriastradhstatic void
191601e04c3fSmrgdraw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
19173464ebd5Sriastradh{
1918af69d88dSmrg   struct gallivm_state *gallivm = variant->gallivm;
19193464ebd5Sriastradh   LLVMContextRef context = gallivm->context;
19203464ebd5Sriastradh   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
19217ec681f3Smrg   LLVMTypeRef arg_types[13];
192201e04c3fSmrg   unsigned num_arg_types = ARRAY_SIZE(arg_types);
19233464ebd5Sriastradh   LLVMTypeRef func_type;
19243464ebd5Sriastradh   LLVMValueRef context_ptr;
19253464ebd5Sriastradh   LLVMBasicBlockRef block;
19263464ebd5Sriastradh   LLVMBuilderRef builder;
1927af69d88dSmrg   char func_name[64];
1928af69d88dSmrg   struct lp_type vs_type;
192901e04c3fSmrg   LLVMValueRef count, fetch_elts, start_or_maxelt;
19307ec681f3Smrg   LLVMValueRef vertex_id_offset;
1931af69d88dSmrg   LLVMValueRef stride, step, io_itr;
193201e04c3fSmrg   LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp;
19333464ebd5Sriastradh   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
193401e04c3fSmrg   LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
193501e04c3fSmrg   LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
193601e04c3fSmrg   LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
193701e04c3fSmrg   LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
193801e04c3fSmrg   LLVMValueRef fake_buf_ptr, fake_buf;
193901e04c3fSmrg
19403464ebd5Sriastradh   struct draw_context *draw = llvm->draw;
19413464ebd5Sriastradh   const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
19423464ebd5Sriastradh   unsigned i, j;
194301e04c3fSmrg   struct lp_build_context bld, blduivec;
19443464ebd5Sriastradh   struct lp_build_loop_state lp_loop;
194501e04c3fSmrg   struct lp_build_if_state if_ctx;
1946af69d88dSmrg   const int vector_length = lp_native_vector_width / 32;
1947af69d88dSmrg   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
19483464ebd5Sriastradh   struct lp_build_sampler_soa *sampler = 0;
19497ec681f3Smrg   struct lp_build_image_soa *image = NULL;
1950af69d88dSmrg   LLVMValueRef ret, clipmask_bool_ptr;
1951af69d88dSmrg   struct draw_llvm_variant_key *key = &variant->key;
1952af69d88dSmrg   /* If geometry shader is present we need to skip both the viewport
1953af69d88dSmrg    * transformation and clipping otherwise the inputs to the geometry
1954af69d88dSmrg    * shader will be incorrect.
195501e04c3fSmrg    * The code can't handle vp transform when vs writes vp index neither
195601e04c3fSmrg    * (though this would be fixable here, but couldn't just broadcast
195701e04c3fSmrg    * the values).
1958af69d88dSmrg    */
19597ec681f3Smrg   const boolean bypass_viewport = key->has_gs_or_tes || key->bypass_viewport ||
196001e04c3fSmrg                                   vs_info->writes_viewport_index;
19617ec681f3Smrg   const boolean enable_cliptest = !key->has_gs_or_tes && (key->clip_xy ||
196201e04c3fSmrg                                                    key->clip_z ||
196301e04c3fSmrg                                                    key->clip_user ||
196401e04c3fSmrg                                                    key->need_edgeflags);
1965af69d88dSmrg   LLVMValueRef variant_func;
196601e04c3fSmrg   const unsigned pos = draw->vs.position_output;
196701e04c3fSmrg   const unsigned cv = draw->vs.clipvertex_output;
1968af69d88dSmrg   boolean have_clipdist = FALSE;
1969af69d88dSmrg   struct lp_bld_tgsi_system_values system_values;
1970af69d88dSmrg
1971af69d88dSmrg   memset(&system_values, 0, sizeof(system_values));
19727ec681f3Smrg   memset(&outputs, 0, sizeof(outputs));
19737ec681f3Smrg   snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant");
1974af69d88dSmrg
1975af69d88dSmrg   i = 0;
1976af69d88dSmrg   arg_types[i++] = get_context_ptr_type(variant);       /* context */
1977af69d88dSmrg   arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
1978af69d88dSmrg   arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
197901e04c3fSmrg   arg_types[i++] = int32_type;                          /* count */
198001e04c3fSmrg   arg_types[i++] = int32_type;                          /* start/fetch_elt_max */
198101e04c3fSmrg   arg_types[i++] = int32_type;                          /* stride */
198201e04c3fSmrg   arg_types[i++] = get_vb_ptr_type(variant);            /* pipe_vertex_buffer's */
198301e04c3fSmrg   arg_types[i++] = int32_type;                          /* instance_id */
198401e04c3fSmrg   arg_types[i++] = int32_type;                          /* vertex_id_offset */
198501e04c3fSmrg   arg_types[i++] = int32_type;                          /* start_instance */
198601e04c3fSmrg   arg_types[i++] = LLVMPointerType(int32_type, 0);      /* fetch_elts  */
19877ec681f3Smrg   arg_types[i++] = int32_type;                          /* draw_id */
19887ec681f3Smrg   arg_types[i++] = int32_type;                          /* view_id */
198901e04c3fSmrg
199001e04c3fSmrg   func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
199101e04c3fSmrg                                arg_types, num_arg_types, 0);
1992af69d88dSmrg
1993af69d88dSmrg   variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
199401e04c3fSmrg   variant->function = variant_func;
1995af69d88dSmrg
1996af69d88dSmrg   LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
1997af69d88dSmrg   for (i = 0; i < num_arg_types; ++i)
19983464ebd5Sriastradh      if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
199901e04c3fSmrg         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
20003464ebd5Sriastradh
20017ec681f3Smrg   if (gallivm->cache && gallivm->cache->data_size)
20027ec681f3Smrg      return;
2003af69d88dSmrg   context_ptr               = LLVMGetParam(variant_func, 0);
2004af69d88dSmrg   io_ptr                    = LLVMGetParam(variant_func, 1);
2005af69d88dSmrg   vbuffers_ptr              = LLVMGetParam(variant_func, 2);
200601e04c3fSmrg   count                     = LLVMGetParam(variant_func, 3);
200701e04c3fSmrg   /*
200801e04c3fSmrg    * XXX: the maxelt part is unused. Not really useful, since we cannot
200901e04c3fSmrg    * get index buffer overflows due to vsplit (which provides its own
201001e04c3fSmrg    * elts buffer, with a different size than what's passed in here).
201101e04c3fSmrg    */
201201e04c3fSmrg   start_or_maxelt           = LLVMGetParam(variant_func, 4);
201301e04c3fSmrg   /*
201401e04c3fSmrg    * XXX: stride is actually unused. The stride we use is strictly calculated
201501e04c3fSmrg    * from the number of outputs (including the draw_extra outputs).
201601e04c3fSmrg    * Should probably fix some day (we need a new vs just because of extra
201701e04c3fSmrg    * outputs which the generated vs won't touch).
201801e04c3fSmrg    */
201901e04c3fSmrg   stride                    = LLVMGetParam(variant_func, 5);
202001e04c3fSmrg   vb_ptr                    = LLVMGetParam(variant_func, 6);
202101e04c3fSmrg   system_values.instance_id = LLVMGetParam(variant_func, 7);
202201e04c3fSmrg   vertex_id_offset          = LLVMGetParam(variant_func, 8);
20237ec681f3Smrg   system_values.base_instance = LLVMGetParam(variant_func, 9);
202401e04c3fSmrg   fetch_elts                = LLVMGetParam(variant_func, 10);
20257ec681f3Smrg   system_values.draw_id     = LLVMGetParam(variant_func, 11);
20267ec681f3Smrg   system_values.view_index  = LLVMGetParam(variant_func, 12);
20273464ebd5Sriastradh
20283464ebd5Sriastradh   lp_build_name(context_ptr, "context");
20293464ebd5Sriastradh   lp_build_name(io_ptr, "io");
20303464ebd5Sriastradh   lp_build_name(vbuffers_ptr, "vbuffers");
203101e04c3fSmrg   lp_build_name(count, "count");
203201e04c3fSmrg   lp_build_name(start_or_maxelt, "start_or_maxelt");
20333464ebd5Sriastradh   lp_build_name(stride, "stride");
20343464ebd5Sriastradh   lp_build_name(vb_ptr, "vb");
2035af69d88dSmrg   lp_build_name(system_values.instance_id, "instance_id");
2036af69d88dSmrg   lp_build_name(vertex_id_offset, "vertex_id_offset");
20377ec681f3Smrg   lp_build_name(system_values.base_instance, "start_instance");
203801e04c3fSmrg   lp_build_name(fetch_elts, "fetch_elts");
20397ec681f3Smrg   lp_build_name(system_values.draw_id, "draw_id");
20403464ebd5Sriastradh
20413464ebd5Sriastradh   /*
20423464ebd5Sriastradh    * Function body
20433464ebd5Sriastradh    */
20443464ebd5Sriastradh
2045af69d88dSmrg   block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
20463464ebd5Sriastradh   builder = gallivm->builder;
20473464ebd5Sriastradh   LLVMPositionBuilderAtEnd(builder, block);
20483464ebd5Sriastradh
2049af69d88dSmrg   memset(&vs_type, 0, sizeof vs_type);
2050af69d88dSmrg   vs_type.floating = TRUE; /* floating point values */
2051af69d88dSmrg   vs_type.sign = TRUE;     /* values are signed */
2052af69d88dSmrg   vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
2053af69d88dSmrg   vs_type.width = 32;      /* 32-bit float */
2054af69d88dSmrg   vs_type.length = vector_length;
20553464ebd5Sriastradh
205601e04c3fSmrg   lp_build_context_init(&bld, gallivm, lp_type_uint(32));
205701e04c3fSmrg   lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
205801e04c3fSmrg
2059af69d88dSmrg   /* hold temporary "bool" clipmask */
206001e04c3fSmrg   clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, "");
206101e04c3fSmrg
206201e04c3fSmrg   fake_buf = lp_build_alloca_undef(gallivm,
206301e04c3fSmrg                 LLVMVectorType(LLVMInt64TypeInContext(context), 4), "");
206401e04c3fSmrg   fake_buf = LLVMBuildBitCast(builder, fake_buf,
206501e04c3fSmrg                 LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
206601e04c3fSmrg   fake_buf_ptr = LLVMBuildGEP(builder, fake_buf, &bld.zero, 1, "");
20673464ebd5Sriastradh
20683464ebd5Sriastradh   /* code generated texture sampling */
20697ec681f3Smrg   sampler = draw_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key), key->nr_samplers);
20707ec681f3Smrg
20717ec681f3Smrg   image = draw_llvm_image_soa_create(draw_llvm_variant_key_images(key),
20727ec681f3Smrg                                      key->nr_images);
2073af69d88dSmrg
2074af69d88dSmrg   step = lp_build_const_int32(gallivm, vector_length);
20753464ebd5Sriastradh
207601e04c3fSmrg   ind_vec = blduivec.undef;
207701e04c3fSmrg   for (i = 0; i < vs_type.length; i++) {
207801e04c3fSmrg      LLVMValueRef index = lp_build_const_int32(gallivm, i);
207901e04c3fSmrg      ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
208001e04c3fSmrg   }
20813464ebd5Sriastradh
208201e04c3fSmrg   have_elts = LLVMBuildICmp(builder, LLVMIntNE,
208301e04c3fSmrg                             LLVMConstPointerNull(arg_types[10]), fetch_elts, "");
208401e04c3fSmrg
208501e04c3fSmrg   fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max");
208601e04c3fSmrg   fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);
208701e04c3fSmrg   /*
208801e04c3fSmrg    * Only needed for non-indexed path.
208901e04c3fSmrg    */
209001e04c3fSmrg   start_vec = lp_build_broadcast_scalar(&blduivec, start_or_maxelt);
209101e04c3fSmrg
209201e04c3fSmrg   /*
209301e04c3fSmrg    * Pre-calculate everything which is constant per shader invocation.
209401e04c3fSmrg    */
209501e04c3fSmrg   for (j = 0; j < key->nr_vertex_elements; ++j) {
209601e04c3fSmrg      LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr;
209701e04c3fSmrg      LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit;
209801e04c3fSmrg      struct pipe_vertex_element *velem = &key->vertex_element[j];
209901e04c3fSmrg      LLVMValueRef vb_index =
210001e04c3fSmrg         lp_build_const_int32(gallivm, velem->vertex_buffer_index);
210101e04c3fSmrg      LLVMValueRef bsize = lp_build_const_int32(gallivm,
210201e04c3fSmrg                                                util_format_get_blocksize(velem->src_format));
210301e04c3fSmrg      LLVMValueRef src_offset = lp_build_const_int32(gallivm,
210401e04c3fSmrg                                                     velem->src_offset);
210501e04c3fSmrg      struct lp_build_if_state if_ctx;
210601e04c3fSmrg
210701e04c3fSmrg      if (velem->src_format != PIPE_FORMAT_NONE) {
210801e04c3fSmrg         vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &vb_index, 1, "");
210901e04c3fSmrg         vb_info = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
211001e04c3fSmrg         vb_stride[j] = draw_jit_vbuffer_stride(gallivm, vb_info);
211101e04c3fSmrg         vb_stride[j] = LLVMBuildZExt(gallivm->builder, vb_stride[j],
211201e04c3fSmrg                                      LLVMInt32TypeInContext(context), "");
211301e04c3fSmrg         vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vb_info);
211401e04c3fSmrg         map_ptr[j] = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr);
211501e04c3fSmrg         buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
211601e04c3fSmrg
211701e04c3fSmrg         ofbit = NULL;
211801e04c3fSmrg         /*
211901e04c3fSmrg          * We'll set buffer_size_adj to zero if we have of, so it will
212001e04c3fSmrg          * always overflow later automatically without having to keep ofbit.
212101e04c3fSmrg          * Overflows (with normal wraparound) doing the actual offset
212201e04c3fSmrg          * calculation should be ok, just not for the buffer size calc.
212301e04c3fSmrg          * It would also be possible to detect such overflows and return
212401e04c3fSmrg          * zeros if that happens, but this would be more complex.
212501e04c3fSmrg          */
212601e04c3fSmrg         buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset);
212701e04c3fSmrg         tmp = lp_build_sub(&bld, bsize, bld.one);
212801e04c3fSmrg         buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
212901e04c3fSmrg                                                     &ofbit);
213001e04c3fSmrg         buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
213101e04c3fSmrg                                                     buf_offset, &ofbit);
213201e04c3fSmrg
213301e04c3fSmrg         /*
213401e04c3fSmrg          * We can't easily set fake vertex buffers outside the generated code.
213501e04c3fSmrg          * Hence, set fake vertex buffers here instead basically, so fetch
213601e04c3fSmrg          * code can always fetch using offset 0, eliminating all control flow
213701e04c3fSmrg          * inside the main loop.
213801e04c3fSmrg          * (Alternatively, could have control flow per vector skipping fetch
213901e04c3fSmrg          * if ofbit is true.)
214001e04c3fSmrg          */
214101e04c3fSmrg         if (velem->instance_divisor) {
214201e04c3fSmrg            /*
214301e04c3fSmrg             * Index is equal to the start instance plus the number of current
214401e04c3fSmrg             * instance divided by the divisor. In this case we compute it as:
214501e04c3fSmrg             * index = start_instance + (instance_id  / divisor).
214601e04c3fSmrg             * Note we could actually do the fetch here, outside the loop -
214701e04c3fSmrg             * it's all constant, hopefully llvm recognizes this.
214801e04c3fSmrg             */
214901e04c3fSmrg            LLVMValueRef current_instance;
215001e04c3fSmrg            current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
215101e04c3fSmrg                                             lp_build_const_int32(gallivm,
215201e04c3fSmrg                                                                  velem->instance_divisor),
215301e04c3fSmrg                                             "instance_divisor");
21547ec681f3Smrg            instance_index[j] = lp_build_uadd_overflow(gallivm, system_values.base_instance,
215501e04c3fSmrg                                                       current_instance, &ofbit);
215601e04c3fSmrg         }
215701e04c3fSmrg
215801e04c3fSmrg         buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
215901e04c3fSmrg                                              buffer_size_adj[j], "");
216001e04c3fSmrg
216101e04c3fSmrg         temp_ptr = lp_build_alloca_undef(gallivm,
216201e04c3fSmrg                       LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
216301e04c3fSmrg
216401e04c3fSmrg         lp_build_if(&if_ctx, gallivm, ofbit);
216501e04c3fSmrg         {
216601e04c3fSmrg            LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
216701e04c3fSmrg         }
216801e04c3fSmrg         lp_build_else(&if_ctx);
216901e04c3fSmrg         {
217001e04c3fSmrg            map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");
217101e04c3fSmrg            LLVMBuildStore(builder, map_ptr[j], temp_ptr);
217201e04c3fSmrg         }
217301e04c3fSmrg         lp_build_endif(&if_ctx);
217401e04c3fSmrg         map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr");
217501e04c3fSmrg
217601e04c3fSmrg         if (0) {
217701e04c3fSmrg            lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
217801e04c3fSmrg                            lp_build_const_int32(gallivm, j),
217901e04c3fSmrg                            vb_index, vb_stride[j]);
218001e04c3fSmrg            lp_build_printf(gallivm,
218101e04c3fSmrg                            "   vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n",
218201e04c3fSmrg                            vb_buffer_offset, src_offset, buf_offset);
218301e04c3fSmrg            lp_build_printf(gallivm, "   buffer size = %u, blocksize = %u\n",
218401e04c3fSmrg                            buffer_size, bsize);
218501e04c3fSmrg            lp_build_printf(gallivm, "   instance_id = %u\n", system_values.instance_id);
218601e04c3fSmrg         }
218701e04c3fSmrg      }
218801e04c3fSmrg   }
218901e04c3fSmrg
219001e04c3fSmrg   lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
21913464ebd5Sriastradh   {
2192af69d88dSmrg      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
21933464ebd5Sriastradh      LLVMValueRef io;
21943464ebd5Sriastradh      LLVMValueRef clipmask;   /* holds the clipmask value */
219501e04c3fSmrg      LLVMValueRef true_index_array, index_store;
2196af69d88dSmrg      const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
21973464ebd5Sriastradh
21983464ebd5Sriastradh      io_itr = lp_loop.counter;
2199af69d88dSmrg
22003464ebd5Sriastradh      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
22013464ebd5Sriastradh#if DEBUG_STORE
2202af69d88dSmrg      lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
22033464ebd5Sriastradh                      io_itr, io, lp_loop.counter);
22043464ebd5Sriastradh#endif
220501e04c3fSmrg
220601e04c3fSmrg      true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);
220701e04c3fSmrg      true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");
220801e04c3fSmrg
22097ec681f3Smrg      LLVMValueRef exec_mask = lp_build_cmp(&blduivec, PIPE_FUNC_LEQUAL, true_index_array, fetch_max);
221001e04c3fSmrg      /*
221101e04c3fSmrg       * Limit indices to fetch_max, otherwise might try to access indices
221201e04c3fSmrg       * beyond index buffer (or rather vsplit elt buffer) size.
221301e04c3fSmrg       * Could probably safely (?) skip this for non-indexed draws and
221401e04c3fSmrg       * simplify things minimally (by removing it could combine the ind_vec
221501e04c3fSmrg       * and start_vec adds). I think the only effect for non-indexed draws will
221601e04c3fSmrg       * be that for the invalid elements they will be all fetched from the
221701e04c3fSmrg       * same location as the last valid one, but noone should really care.
221801e04c3fSmrg       */
221901e04c3fSmrg      true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);
222001e04c3fSmrg
222101e04c3fSmrg      index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");
222201e04c3fSmrg
222301e04c3fSmrg      lp_build_if(&if_ctx, gallivm, have_elts);
222401e04c3fSmrg      {
222501e04c3fSmrg         /*
222601e04c3fSmrg          * Note: you'd expect some comparison/clamp against fetch_elt_max
222701e04c3fSmrg          * here.
222801e04c3fSmrg          * There used to be one here but it was incorrect: overflow was
222901e04c3fSmrg          * detected if index > fetch_elt_max - but the correct condition
223001e04c3fSmrg          * would be index >= fetch_elt_max (since this is just size of elts
223101e04c3fSmrg          * buffer / element size).
223201e04c3fSmrg          * Using the correct condition however will cause failures - due to
223301e04c3fSmrg          * vsplit/vcache code which rebases indices. So, as an example, if
223401e04c3fSmrg          * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will
223501e04c3fSmrg          * replace all invalid indices with 0 - which in case of elt_bias
223601e04c3fSmrg          * not being zero will get a different fetch index than the valid
223701e04c3fSmrg          * index 0. So, just rely on vsplit code preventing out-of-bounds
223801e04c3fSmrg          * fetches. This is also why it's safe to do elts fetch even if there
223901e04c3fSmrg          * was no index buffer bound - the real buffer is never seen here, at
224001e04c3fSmrg          * least not if there are index buffer overflows...
224101e04c3fSmrg          */
224201e04c3fSmrg
224301e04c3fSmrg         /*
224401e04c3fSmrg          * XXX should not have to do this, as scale can be handled
224501e04c3fSmrg          * natively by loads (hits asserts though).
224601e04c3fSmrg          */
224701e04c3fSmrg         tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);
224801e04c3fSmrg         fetch_elts = LLVMBuildBitCast(builder, fetch_elts,
224901e04c3fSmrg                                       LLVMPointerType(LLVMInt8TypeInContext(context),
225001e04c3fSmrg                                                       0), "");
225101e04c3fSmrg         tmp = lp_build_gather(gallivm, vs_type.length,
225201e04c3fSmrg                               32, bld.type, TRUE,
225301e04c3fSmrg                               fetch_elts, tmp, FALSE);
225401e04c3fSmrg         LLVMBuildStore(builder, tmp, index_store);
225501e04c3fSmrg      }
225601e04c3fSmrg      lp_build_else(&if_ctx);
225701e04c3fSmrg      {
225801e04c3fSmrg         tmp = LLVMBuildAdd(builder, true_index_array, start_vec, "");
225901e04c3fSmrg         LLVMBuildStore(builder, tmp, index_store);
226001e04c3fSmrg      }
226101e04c3fSmrg      lp_build_endif(&if_ctx);
226201e04c3fSmrg
226301e04c3fSmrg      true_index_array = LLVMBuildLoad(builder, index_store, "");
226401e04c3fSmrg
226501e04c3fSmrg      for (j = 0; j < key->nr_vertex_elements; ++j) {
226601e04c3fSmrg         struct pipe_vertex_element *velem = &key->vertex_element[j];
226701e04c3fSmrg         const struct util_format_description *format_desc =
226801e04c3fSmrg            util_format_description(velem->src_format);
226901e04c3fSmrg
227001e04c3fSmrg         if (format_desc->format == PIPE_FORMAT_NONE) {
227101e04c3fSmrg            for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
227201e04c3fSmrg               inputs[j][i] = lp_build_zero(gallivm, vs_type);
2273af69d88dSmrg            }
2274af69d88dSmrg         }
227501e04c3fSmrg         else if (velem->instance_divisor) {
227601e04c3fSmrg            fetch_instanced(gallivm, format_desc, vs_type,
227701e04c3fSmrg                            vb_stride[j], map_ptr[j],
227801e04c3fSmrg                            buffer_size_adj[j],
227901e04c3fSmrg                            inputs[j], instance_index[j]);
228001e04c3fSmrg         }
228101e04c3fSmrg         else {
228201e04c3fSmrg            fetch_vector(gallivm, format_desc, vs_type,
228301e04c3fSmrg                         vb_stride[j], map_ptr[j],
228401e04c3fSmrg                         buffer_size_adj[j],
228501e04c3fSmrg                         inputs[j], true_index_array);
22863464ebd5Sriastradh         }
22873464ebd5Sriastradh      }
228801e04c3fSmrg
22897ec681f3Smrg      struct lp_build_mask_context mask;
22907ec681f3Smrg
22917ec681f3Smrg      lp_build_mask_begin(&mask, gallivm, vs_type, exec_mask);
229201e04c3fSmrg      /* In the paths with elts vertex id has to be unaffected by the
229301e04c3fSmrg       * index bias and because indices inside our elements array have
229401e04c3fSmrg       * already had index bias applied we need to subtract it here to
229501e04c3fSmrg       * get back to the original index.
229601e04c3fSmrg       * in the linear paths vertex id has to be unaffected by the
229701e04c3fSmrg       * original start index and because we abuse the 'start' variable
229801e04c3fSmrg       * to either represent the actual start index or the index at which
229901e04c3fSmrg       * the primitive was split (we split rendering into chunks of at
230001e04c3fSmrg       * most 4095-vertices) we need to back out the original start
230101e04c3fSmrg       * index out of our vertex id here.
23027ec681f3Smrg       * for ARB_shader_draw_parameters, base_vertex should be 0 for non-indexed draws.
230301e04c3fSmrg       */
23047ec681f3Smrg      LLVMValueRef base_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, lp_build_const_int32(gallivm, 0));
23057ec681f3Smrg      system_values.basevertex = lp_build_broadcast_scalar(&blduivec, base_vertex);
23067ec681f3Smrg      /* first vertex is for Vulkan base vertex support */
23077ec681f3Smrg      LLVMValueRef first_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, start_or_maxelt);
23087ec681f3Smrg      system_values.firstvertex = lp_build_broadcast_scalar(&blduivec, first_vertex);
230901e04c3fSmrg      system_values.vertex_id = true_index_array;
231001e04c3fSmrg      system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,
23117ec681f3Smrg                                                    lp_build_broadcast_scalar(&blduivec, vertex_id_offset), "");
23123464ebd5Sriastradh
2313af69d88dSmrg      ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
2314af69d88dSmrg      generate_vs(variant,
23153464ebd5Sriastradh                  builder,
2316af69d88dSmrg                  vs_type,
23173464ebd5Sriastradh                  outputs,
23183464ebd5Sriastradh                  ptr_aos,
2319af69d88dSmrg                  &system_values,
23203464ebd5Sriastradh                  context_ptr,
23213464ebd5Sriastradh                  sampler,
23227ec681f3Smrg                  image,
23237ec681f3Smrg                  key->clamp_vertex_color,
23247ec681f3Smrg                  &mask);
2325af69d88dSmrg
23267ec681f3Smrg      lp_build_mask_end(&mask);
2327af69d88dSmrg      if (pos != -1 && cv != -1) {
2328af69d88dSmrg         /* store original positions in clip before further manipulation */
232901e04c3fSmrg         store_clip(gallivm, vs_type, io, outputs, pos);
2330af69d88dSmrg
2331af69d88dSmrg         /* do cliptest */
2332af69d88dSmrg         if (enable_cliptest) {
2333af69d88dSmrg            LLVMValueRef temp = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
2334af69d88dSmrg            /* allocate clipmask, assign it integer type */
2335af69d88dSmrg            clipmask = generate_clipmask(llvm,
2336af69d88dSmrg                                         gallivm,
2337af69d88dSmrg                                         vs_type,
2338af69d88dSmrg                                         outputs,
233901e04c3fSmrg                                         key,
2340af69d88dSmrg                                         context_ptr, &have_clipdist);
2341af69d88dSmrg            temp = LLVMBuildOr(builder, clipmask, temp, "");
2342af69d88dSmrg            /* store temporary clipping boolean value */
2343af69d88dSmrg            LLVMBuildStore(builder, temp, clipmask_bool_ptr);
2344af69d88dSmrg         }
2345af69d88dSmrg         else {
234601e04c3fSmrg            clipmask = blduivec.zero;
2347af69d88dSmrg         }
2348af69d88dSmrg
2349af69d88dSmrg         /* do viewport mapping */
2350af69d88dSmrg         if (!bypass_viewport) {
2351af69d88dSmrg            generate_viewport(variant, builder, vs_type, outputs, context_ptr);
2352af69d88dSmrg         }
23533464ebd5Sriastradh      }
23543464ebd5Sriastradh      else {
235501e04c3fSmrg         clipmask = blduivec.zero;
23563464ebd5Sriastradh      }
23573464ebd5Sriastradh
2358af69d88dSmrg      /* store clipmask in vertex header,
2359af69d88dSmrg       * original positions in clip
2360af69d88dSmrg       * and transformed positions in data
2361af69d88dSmrg       */
2362af69d88dSmrg      convert_to_aos(gallivm, io, NULL, outputs, clipmask,
2363af69d88dSmrg                     vs_info->num_outputs, vs_type,
236401e04c3fSmrg                     enable_cliptest && key->need_edgeflags);
23653464ebd5Sriastradh   }
2366af69d88dSmrg   lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
23673464ebd5Sriastradh
23683464ebd5Sriastradh   sampler->destroy(sampler);
23697ec681f3Smrg   image->destroy(image);
23703464ebd5Sriastradh
2371af69d88dSmrg   /* return clipping boolean value for function */
237201e04c3fSmrg   ret = clipmask_booli8(gallivm, vs_type, clipmask_bool_ptr,
237301e04c3fSmrg                         enable_cliptest && key->need_edgeflags);
23743464ebd5Sriastradh
2375af69d88dSmrg   LLVMBuildRet(builder, ret);
23763464ebd5Sriastradh
2377af69d88dSmrg   gallivm_verify_function(gallivm, variant_func);
23783464ebd5Sriastradh}
23793464ebd5Sriastradh
23803464ebd5Sriastradh
23813464ebd5Sriastradhstruct draw_llvm_variant_key *
23823464ebd5Sriastradhdraw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
23833464ebd5Sriastradh{
23843464ebd5Sriastradh   unsigned i;
23853464ebd5Sriastradh   struct draw_llvm_variant_key *key;
2386af69d88dSmrg   struct draw_sampler_static_state *draw_sampler;
23877ec681f3Smrg   struct draw_image_static_state *draw_image;
23883464ebd5Sriastradh
23893464ebd5Sriastradh   key = (struct draw_llvm_variant_key *)store;
23903464ebd5Sriastradh
239101e04c3fSmrg   memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0]));
23923464ebd5Sriastradh
23933464ebd5Sriastradh
23943464ebd5Sriastradh   /* will have to rig this up properly later */
23953464ebd5Sriastradh   key->clip_xy = llvm->draw->clip_xy;
23963464ebd5Sriastradh   key->clip_z = llvm->draw->clip_z;
23973464ebd5Sriastradh   key->clip_user = llvm->draw->clip_user;
239801e04c3fSmrg   key->bypass_viewport = llvm->draw->bypass_viewport;
2399af69d88dSmrg   key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
240001e04c3fSmrg   /* XXX assumes edgeflag output not at 0 */
24013464ebd5Sriastradh   key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
2402af69d88dSmrg   key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
24037ec681f3Smrg   key->has_gs_or_tes = llvm->draw->gs.geometry_shader != NULL || llvm->draw->tes.tess_eval_shader != NULL;
2404af69d88dSmrg   key->num_outputs = draw_total_vs_outputs(llvm->draw);
24053464ebd5Sriastradh
24067ec681f3Smrg   key->clamp_vertex_color = !key->has_gs_or_tes &&
24077ec681f3Smrg      llvm->draw->rasterizer->clamp_vertex_color;
24087ec681f3Smrg
24093464ebd5Sriastradh   /* All variants of this shader will have the same value for
24103464ebd5Sriastradh    * nr_samplers.  Not yet trying to compact away holes in the
24113464ebd5Sriastradh    * sampler array.
24123464ebd5Sriastradh    */
24133464ebd5Sriastradh   key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2414af69d88dSmrg   if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2415af69d88dSmrg      key->nr_sampler_views =
2416af69d88dSmrg         llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2417af69d88dSmrg   }
2418af69d88dSmrg   else {
2419af69d88dSmrg      key->nr_sampler_views = key->nr_samplers;
2420af69d88dSmrg   }
24213464ebd5Sriastradh
24227ec681f3Smrg   key->nr_images = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
24237ec681f3Smrg
242401e04c3fSmrg   /* Presumably all variants of the shader should have the same
242501e04c3fSmrg    * number of vertex elements - ie the number of shader inputs.
242601e04c3fSmrg    * NOTE: we NEED to store the needed number of needed inputs
242701e04c3fSmrg    * here, not the number of provided elements to match keysize
242801e04c3fSmrg    * (and the offset of sampler state in the key).
242901e04c3fSmrg    * If we have excess number of vertex elements, this is valid,
243001e04c3fSmrg    * but the excess ones don't matter.
243101e04c3fSmrg    * If we don't have enough vertex elements (which looks not really
243201e04c3fSmrg    * valid but we'll handle it gracefully) fill out missing ones with
243301e04c3fSmrg    * zero (we'll recognize these later by PIPE_FORMAT_NONE).
243401e04c3fSmrg    */
243501e04c3fSmrg   key->nr_vertex_elements =
243601e04c3fSmrg      llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
243701e04c3fSmrg
243801e04c3fSmrg   if (llvm->draw->pt.nr_vertex_elements < key->nr_vertex_elements) {
243901e04c3fSmrg      debug_printf("draw: vs with %d inputs but only have %d vertex elements\n",
244001e04c3fSmrg                   key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements);
244101e04c3fSmrg      memset(key->vertex_element, 0,
244201e04c3fSmrg             sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
244301e04c3fSmrg   }
24443464ebd5Sriastradh   memcpy(key->vertex_element,
24453464ebd5Sriastradh          llvm->draw->pt.vertex_element,
244601e04c3fSmrg          sizeof(struct pipe_vertex_element) *
244701e04c3fSmrg             MIN2(key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements));
2448af69d88dSmrg
244901e04c3fSmrg   draw_sampler = draw_llvm_variant_key_samplers(key);
245001e04c3fSmrg   memset(draw_sampler, 0,
245101e04c3fSmrg          MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
24523464ebd5Sriastradh
24533464ebd5Sriastradh   for (i = 0 ; i < key->nr_samplers; i++) {
2454af69d88dSmrg      lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2455af69d88dSmrg                                      llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
2456af69d88dSmrg   }
2457af69d88dSmrg   for (i = 0 ; i < key->nr_sampler_views; i++) {
2458af69d88dSmrg      lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2459af69d88dSmrg                                      llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
24603464ebd5Sriastradh   }
24613464ebd5Sriastradh
24627ec681f3Smrg   draw_image = draw_llvm_variant_key_images(key);
24637ec681f3Smrg   memset(draw_image, 0,
24647ec681f3Smrg          key->nr_images * sizeof *draw_image);
24657ec681f3Smrg   for (i = 0; i < key->nr_images; i++) {
24667ec681f3Smrg      lp_sampler_static_texture_state_image(&draw_image[i].image_state,
24677ec681f3Smrg                                            llvm->draw->images[PIPE_SHADER_VERTEX][i]);
24687ec681f3Smrg   }
24693464ebd5Sriastradh   return key;
24703464ebd5Sriastradh}
24713464ebd5Sriastradh
24723464ebd5Sriastradh
2473af69d88dSmrgvoid
2474af69d88dSmrgdraw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
2475af69d88dSmrg{
2476af69d88dSmrg   unsigned i;
2477af69d88dSmrg   struct draw_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
24787ec681f3Smrg   struct draw_image_static_state *image = draw_llvm_variant_key_images(key);
2479af69d88dSmrg   debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2480af69d88dSmrg   debug_printf("clip_xy = %u\n", key->clip_xy);
2481af69d88dSmrg   debug_printf("clip_z = %u\n", key->clip_z);
2482af69d88dSmrg   debug_printf("clip_user = %u\n", key->clip_user);
2483af69d88dSmrg   debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
2484af69d88dSmrg   debug_printf("clip_halfz = %u\n", key->clip_halfz);
2485af69d88dSmrg   debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
24867ec681f3Smrg   debug_printf("has_gs_or_tes = %u\n", key->has_gs_or_tes);
2487af69d88dSmrg   debug_printf("ucp_enable = %u\n", key->ucp_enable);
2488af69d88dSmrg
2489af69d88dSmrg   for (i = 0 ; i < key->nr_vertex_elements; i++) {
2490af69d88dSmrg      debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
2491af69d88dSmrg      debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
2492af69d88dSmrg      debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
2493af69d88dSmrg      debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
2494af69d88dSmrg   }
2495af69d88dSmrg
2496af69d88dSmrg   for (i = 0 ; i < key->nr_sampler_views; i++) {
2497af69d88dSmrg      debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
2498af69d88dSmrg   }
24997ec681f3Smrg
25007ec681f3Smrg   for (i = 0 ; i < key->nr_images; i++)
25017ec681f3Smrg      debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2502af69d88dSmrg}
2503af69d88dSmrg
2504af69d88dSmrg
25053464ebd5Sriastradhvoid
25063464ebd5Sriastradhdraw_llvm_set_mapped_texture(struct draw_context *draw,
250701e04c3fSmrg                             enum pipe_shader_type shader_stage,
2508af69d88dSmrg                             unsigned sview_idx,
25093464ebd5Sriastradh                             uint32_t width, uint32_t height, uint32_t depth,
25103464ebd5Sriastradh                             uint32_t first_level, uint32_t last_level,
25117ec681f3Smrg                             uint32_t num_samples,
25127ec681f3Smrg                             uint32_t sample_stride,
2513af69d88dSmrg                             const void *base_ptr,
25143464ebd5Sriastradh                             uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
25153464ebd5Sriastradh                             uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
2516af69d88dSmrg                             uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
25173464ebd5Sriastradh{
25183464ebd5Sriastradh   unsigned j;
25193464ebd5Sriastradh   struct draw_jit_texture *jit_tex;
25203464ebd5Sriastradh
25217ec681f3Smrg   switch (shader_stage) {
25227ec681f3Smrg   case PIPE_SHADER_VERTEX:
252301e04c3fSmrg      assert(sview_idx < ARRAY_SIZE(draw->llvm->jit_context.textures));
2524af69d88dSmrg      jit_tex = &draw->llvm->jit_context.textures[sview_idx];
25257ec681f3Smrg      break;
25267ec681f3Smrg   case PIPE_SHADER_GEOMETRY:
252701e04c3fSmrg      assert(sview_idx < ARRAY_SIZE(draw->llvm->gs_jit_context.textures));
2528af69d88dSmrg      jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx];
25297ec681f3Smrg      break;
25307ec681f3Smrg   case PIPE_SHADER_TESS_CTRL:
25317ec681f3Smrg      assert(sview_idx < ARRAY_SIZE(draw->llvm->tcs_jit_context.textures));
25327ec681f3Smrg      jit_tex = &draw->llvm->tcs_jit_context.textures[sview_idx];
25337ec681f3Smrg      break;
25347ec681f3Smrg   case PIPE_SHADER_TESS_EVAL:
25357ec681f3Smrg      assert(sview_idx < ARRAY_SIZE(draw->llvm->tes_jit_context.textures));
25367ec681f3Smrg      jit_tex = &draw->llvm->tes_jit_context.textures[sview_idx];
25377ec681f3Smrg      break;
25387ec681f3Smrg   default:
2539af69d88dSmrg      assert(0);
2540af69d88dSmrg      return;
2541af69d88dSmrg   }
25423464ebd5Sriastradh
25433464ebd5Sriastradh   jit_tex->width = width;
25443464ebd5Sriastradh   jit_tex->height = height;
25453464ebd5Sriastradh   jit_tex->depth = depth;
25463464ebd5Sriastradh   jit_tex->first_level = first_level;
25473464ebd5Sriastradh   jit_tex->last_level = last_level;
2548af69d88dSmrg   jit_tex->base = base_ptr;
25497ec681f3Smrg   jit_tex->num_samples = num_samples;
25507ec681f3Smrg   jit_tex->sample_stride = sample_stride;
25513464ebd5Sriastradh
25523464ebd5Sriastradh   for (j = first_level; j <= last_level; j++) {
2553af69d88dSmrg      jit_tex->mip_offsets[j] = mip_offsets[j];
25543464ebd5Sriastradh      jit_tex->row_stride[j] = row_stride[j];
25553464ebd5Sriastradh      jit_tex->img_stride[j] = img_stride[j];
25563464ebd5Sriastradh   }
25573464ebd5Sriastradh}
25583464ebd5Sriastradh
25597ec681f3Smrgvoid
25607ec681f3Smrgdraw_llvm_set_mapped_image(struct draw_context *draw,
25617ec681f3Smrg                           enum pipe_shader_type shader_stage,
25627ec681f3Smrg                           unsigned idx,
25637ec681f3Smrg                           uint32_t width, uint32_t height, uint32_t depth,
25647ec681f3Smrg                           const void *base_ptr,
25657ec681f3Smrg                           uint32_t row_stride,
25667ec681f3Smrg                           uint32_t img_stride,
25677ec681f3Smrg                           uint32_t num_samples,
25687ec681f3Smrg                           uint32_t sample_stride)
25697ec681f3Smrg{
25707ec681f3Smrg   struct draw_jit_image *jit_image;
25717ec681f3Smrg
25727ec681f3Smrg   switch (shader_stage) {
25737ec681f3Smrg   case PIPE_SHADER_VERTEX:
25747ec681f3Smrg      assert(idx < ARRAY_SIZE(draw->llvm->jit_context.images));
25757ec681f3Smrg      jit_image = &draw->llvm->jit_context.images[idx];
25767ec681f3Smrg      break;
25777ec681f3Smrg   case PIPE_SHADER_GEOMETRY:
25787ec681f3Smrg      assert(idx < ARRAY_SIZE(draw->llvm->gs_jit_context.images));
25797ec681f3Smrg      jit_image = &draw->llvm->gs_jit_context.images[idx];
25807ec681f3Smrg      break;
25817ec681f3Smrg   case PIPE_SHADER_TESS_CTRL:
25827ec681f3Smrg      assert(idx < ARRAY_SIZE(draw->llvm->tcs_jit_context.images));
25837ec681f3Smrg      jit_image = &draw->llvm->tcs_jit_context.images[idx];
25847ec681f3Smrg      break;
25857ec681f3Smrg   case PIPE_SHADER_TESS_EVAL:
25867ec681f3Smrg      assert(idx < ARRAY_SIZE(draw->llvm->tes_jit_context.images));
25877ec681f3Smrg      jit_image = &draw->llvm->tes_jit_context.images[idx];
25887ec681f3Smrg      break;
25897ec681f3Smrg   default:
25907ec681f3Smrg      assert(0);
25917ec681f3Smrg      return;
25927ec681f3Smrg   }
25937ec681f3Smrg
25947ec681f3Smrg   jit_image->width = width;
25957ec681f3Smrg   jit_image->height = height;
25967ec681f3Smrg   jit_image->depth = depth;
25977ec681f3Smrg   jit_image->base = base_ptr;
25987ec681f3Smrg
25997ec681f3Smrg   jit_image->row_stride = row_stride;
26007ec681f3Smrg   jit_image->img_stride = img_stride;
26017ec681f3Smrg   jit_image->num_samples = num_samples;
26027ec681f3Smrg   jit_image->sample_stride = sample_stride;
26037ec681f3Smrg}
26047ec681f3Smrg
26053464ebd5Sriastradh
26063464ebd5Sriastradhvoid
2607af69d88dSmrgdraw_llvm_set_sampler_state(struct draw_context *draw,
260801e04c3fSmrg                            enum pipe_shader_type shader_type)
26093464ebd5Sriastradh{
26103464ebd5Sriastradh   unsigned i;
26113464ebd5Sriastradh
26127ec681f3Smrg   switch (shader_type) {
26137ec681f3Smrg   case PIPE_SHADER_VERTEX:
2614af69d88dSmrg      for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) {
2615af69d88dSmrg         struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i];
2616af69d88dSmrg
261701e04c3fSmrg         if (draw->samplers[PIPE_SHADER_VERTEX][i]) {
2618af69d88dSmrg            const struct pipe_sampler_state *s
2619af69d88dSmrg               = draw->samplers[PIPE_SHADER_VERTEX][i];
2620af69d88dSmrg            jit_sam->min_lod = s->min_lod;
2621af69d88dSmrg            jit_sam->max_lod = s->max_lod;
2622af69d88dSmrg            jit_sam->lod_bias = s->lod_bias;
26237ec681f3Smrg            jit_sam->max_aniso = s->max_anisotropy;
2624af69d88dSmrg            COPY_4V(jit_sam->border_color, s->border_color.f);
2625af69d88dSmrg         }
2626af69d88dSmrg      }
26277ec681f3Smrg      break;
26287ec681f3Smrg   case PIPE_SHADER_GEOMETRY:
2629af69d88dSmrg      for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) {
2630af69d88dSmrg         struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i];
2631af69d88dSmrg
263201e04c3fSmrg         if (draw->samplers[PIPE_SHADER_GEOMETRY][i]) {
2633af69d88dSmrg            const struct pipe_sampler_state *s
2634af69d88dSmrg               = draw->samplers[PIPE_SHADER_GEOMETRY][i];
2635af69d88dSmrg            jit_sam->min_lod = s->min_lod;
2636af69d88dSmrg            jit_sam->max_lod = s->max_lod;
2637af69d88dSmrg            jit_sam->lod_bias = s->lod_bias;
26387ec681f3Smrg            jit_sam->max_aniso = s->max_anisotropy;
2639af69d88dSmrg            COPY_4V(jit_sam->border_color, s->border_color.f);
2640af69d88dSmrg         }
26413464ebd5Sriastradh      }
26427ec681f3Smrg      break;
26437ec681f3Smrg   case PIPE_SHADER_TESS_CTRL:
26447ec681f3Smrg      for (i = 0; i < draw->num_samplers[PIPE_SHADER_TESS_CTRL]; i++) {
26457ec681f3Smrg         struct draw_jit_sampler *jit_sam = &draw->llvm->tcs_jit_context.samplers[i];
26467ec681f3Smrg
26477ec681f3Smrg         if (draw->samplers[PIPE_SHADER_TESS_CTRL][i]) {
26487ec681f3Smrg            const struct pipe_sampler_state *s
26497ec681f3Smrg               = draw->samplers[PIPE_SHADER_TESS_CTRL][i];
26507ec681f3Smrg            jit_sam->min_lod = s->min_lod;
26517ec681f3Smrg            jit_sam->max_lod = s->max_lod;
26527ec681f3Smrg            jit_sam->lod_bias = s->lod_bias;
26537ec681f3Smrg            jit_sam->max_aniso = s->max_anisotropy;
26547ec681f3Smrg            COPY_4V(jit_sam->border_color, s->border_color.f);
26557ec681f3Smrg         }
26567ec681f3Smrg      }
26577ec681f3Smrg      break;
26587ec681f3Smrg   case PIPE_SHADER_TESS_EVAL:
26597ec681f3Smrg      for (i = 0; i < draw->num_samplers[PIPE_SHADER_TESS_EVAL]; i++) {
26607ec681f3Smrg         struct draw_jit_sampler *jit_sam = &draw->llvm->tes_jit_context.samplers[i];
26617ec681f3Smrg
26627ec681f3Smrg         if (draw->samplers[PIPE_SHADER_TESS_EVAL][i]) {
26637ec681f3Smrg            const struct pipe_sampler_state *s
26647ec681f3Smrg               = draw->samplers[PIPE_SHADER_TESS_EVAL][i];
26657ec681f3Smrg            jit_sam->min_lod = s->min_lod;
26667ec681f3Smrg            jit_sam->max_lod = s->max_lod;
26677ec681f3Smrg            jit_sam->lod_bias = s->lod_bias;
26687ec681f3Smrg            jit_sam->max_aniso = s->max_anisotropy;
26697ec681f3Smrg            COPY_4V(jit_sam->border_color, s->border_color.f);
26707ec681f3Smrg         }
26717ec681f3Smrg      }
26727ec681f3Smrg      break;
26737ec681f3Smrg   default:
26747ec681f3Smrg      assert(0);
26757ec681f3Smrg      break;
26763464ebd5Sriastradh   }
26773464ebd5Sriastradh}
26783464ebd5Sriastradh
26793464ebd5Sriastradh
26803464ebd5Sriastradhvoid
26813464ebd5Sriastradhdraw_llvm_destroy_variant(struct draw_llvm_variant *variant)
26823464ebd5Sriastradh{
26833464ebd5Sriastradh   struct draw_llvm *llvm = variant->llvm;
26843464ebd5Sriastradh
268501e04c3fSmrg   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
268601e04c3fSmrg      debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n",
268701e04c3fSmrg                    variant->shader->variants_cached, llvm->nr_variants);
268801e04c3fSmrg   }
268901e04c3fSmrg
2690af69d88dSmrg   gallivm_destroy(variant->gallivm);
2691af69d88dSmrg
2692af69d88dSmrg   remove_from_list(&variant->list_item_local);
2693af69d88dSmrg   variant->shader->variants_cached--;
2694af69d88dSmrg   remove_from_list(&variant->list_item_global);
2695af69d88dSmrg   llvm->nr_variants--;
2696af69d88dSmrg   FREE(variant);
2697af69d88dSmrg}
2698af69d88dSmrg
2699af69d88dSmrg
2700af69d88dSmrg/**
2701af69d88dSmrg * Create LLVM types for various structures.
2702af69d88dSmrg */
2703af69d88dSmrgstatic void
2704af69d88dSmrgcreate_gs_jit_types(struct draw_gs_llvm_variant *var)
2705af69d88dSmrg{
2706af69d88dSmrg   struct gallivm_state *gallivm = var->gallivm;
27077ec681f3Smrg   LLVMTypeRef texture_type, sampler_type, image_type, context_type;
2708af69d88dSmrg
2709af69d88dSmrg   texture_type = create_jit_texture_type(gallivm, "texture");
2710af69d88dSmrg   sampler_type = create_jit_sampler_type(gallivm, "sampler");
27117ec681f3Smrg   image_type = create_jit_image_type(gallivm, "image");
2712af69d88dSmrg
2713af69d88dSmrg   context_type = create_gs_jit_context_type(gallivm,
2714af69d88dSmrg                                             var->shader->base.vector_length,
2715af69d88dSmrg                                             texture_type, sampler_type,
27167ec681f3Smrg                                             image_type,
2717af69d88dSmrg                                             "draw_gs_jit_context");
2718af69d88dSmrg   var->context_ptr_type = LLVMPointerType(context_type, 0);
2719af69d88dSmrg
2720af69d88dSmrg   var->input_array_type = create_gs_jit_input_type(gallivm);
2721af69d88dSmrg}
2722af69d88dSmrg
2723af69d88dSmrgstatic LLVMTypeRef
2724af69d88dSmrgget_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
2725af69d88dSmrg{
2726af69d88dSmrg   if (!variant->context_ptr_type)
2727af69d88dSmrg      create_gs_jit_types(variant);
2728af69d88dSmrg   return variant->context_ptr_type;
2729af69d88dSmrg}
2730af69d88dSmrg
2731af69d88dSmrgstatic LLVMValueRef
2732af69d88dSmrggenerate_mask_value(struct draw_gs_llvm_variant *variant,
2733af69d88dSmrg                    struct lp_type gs_type)
2734af69d88dSmrg{
2735af69d88dSmrg   struct gallivm_state *gallivm = variant->gallivm;
2736af69d88dSmrg   LLVMBuilderRef builder = gallivm->builder;
2737af69d88dSmrg   struct lp_type mask_type = lp_int_type(gs_type);
2738af69d88dSmrg   LLVMValueRef num_prims;
2739af69d88dSmrg   LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2740af69d88dSmrg   unsigned i;
2741af69d88dSmrg
2742af69d88dSmrg   num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
2743af69d88dSmrg                                  variant->num_prims);
274401e04c3fSmrg   for (i = 0; i < gs_type.length; i++) {
2745af69d88dSmrg      LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2746af69d88dSmrg      mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
2747af69d88dSmrg   }
2748af69d88dSmrg   mask_val = lp_build_compare(gallivm, mask_type,
2749af69d88dSmrg                               PIPE_FUNC_GREATER, num_prims, mask_val);
2750af69d88dSmrg
2751af69d88dSmrg   return mask_val;
2752af69d88dSmrg}
2753af69d88dSmrg
2754af69d88dSmrgstatic void
2755af69d88dSmrgdraw_gs_llvm_generate(struct draw_llvm *llvm,
2756af69d88dSmrg                      struct draw_gs_llvm_variant *variant)
2757af69d88dSmrg{
2758af69d88dSmrg   struct gallivm_state *gallivm = variant->gallivm;
2759af69d88dSmrg   LLVMContextRef context = gallivm->context;
2760af69d88dSmrg   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
27617ec681f3Smrg   LLVMTypeRef arg_types[8];
2762af69d88dSmrg   LLVMTypeRef func_type;
2763af69d88dSmrg   LLVMValueRef variant_func;
2764af69d88dSmrg   LLVMValueRef context_ptr;
2765af69d88dSmrg   LLVMValueRef prim_id_ptr;
2766af69d88dSmrg   LLVMBasicBlockRef block;
2767af69d88dSmrg   LLVMBuilderRef builder;
2768af69d88dSmrg   LLVMValueRef io_ptr, input_array, num_prims, mask_val;
2769af69d88dSmrg   struct lp_build_sampler_soa *sampler = 0;
27707ec681f3Smrg   struct lp_build_image_soa *image = NULL;
2771af69d88dSmrg   struct lp_build_context bld;
2772af69d88dSmrg   struct lp_bld_tgsi_system_values system_values;
2773af69d88dSmrg   char func_name[64];
2774af69d88dSmrg   struct lp_type gs_type;
2775af69d88dSmrg   unsigned i;
2776af69d88dSmrg   struct draw_gs_llvm_iface gs_iface;
2777af69d88dSmrg   const struct tgsi_token *tokens = variant->shader->base.state.tokens;
2778af69d88dSmrg   LLVMValueRef consts_ptr, num_consts_ptr;
27797ec681f3Smrg   LLVMValueRef ssbos_ptr, num_ssbos_ptr;
2780af69d88dSmrg   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2781af69d88dSmrg   struct lp_build_mask_context mask;
2782af69d88dSmrg   const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
2783af69d88dSmrg   unsigned vector_length = variant->shader->base.vector_length;
2784af69d88dSmrg
2785af69d88dSmrg   memset(&system_values, 0, sizeof(system_values));
27867ec681f3Smrg   memset(&outputs, 0, sizeof(outputs));
2787af69d88dSmrg
27887ec681f3Smrg   snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant");
2789af69d88dSmrg
2790af69d88dSmrg   assert(variant->vertex_header_ptr_type);
2791af69d88dSmrg
2792af69d88dSmrg   arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
2793af69d88dSmrg   arg_types[1] = variant->input_array_type;           /* input */
27947ec681f3Smrg   arg_types[2] = LLVMPointerType(variant->vertex_header_ptr_type, 0);     /* vertex_header */
2795af69d88dSmrg   arg_types[3] = int32_type;                          /* num_prims */
2796af69d88dSmrg   arg_types[4] = int32_type;                          /* instance_id */
2797af69d88dSmrg   arg_types[5] = LLVMPointerType(
2798af69d88dSmrg      LLVMVectorType(int32_type, vector_length), 0);   /* prim_id_ptr */
279901e04c3fSmrg   arg_types[6] = int32_type;
28007ec681f3Smrg   arg_types[7] = int32_type;
2801af69d88dSmrg
280201e04c3fSmrg   func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
2803af69d88dSmrg
2804af69d88dSmrg   variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2805af69d88dSmrg
2806af69d88dSmrg   variant->function = variant_func;
2807af69d88dSmrg
2808af69d88dSmrg   LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2809af69d88dSmrg
281001e04c3fSmrg   for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
2811af69d88dSmrg      if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
281201e04c3fSmrg         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2813af69d88dSmrg
28147ec681f3Smrg   if (gallivm->cache && gallivm->cache->data_size)
28157ec681f3Smrg      return;
2816af69d88dSmrg   context_ptr               = LLVMGetParam(variant_func, 0);
2817af69d88dSmrg   input_array               = LLVMGetParam(variant_func, 1);
2818af69d88dSmrg   io_ptr                    = LLVMGetParam(variant_func, 2);
2819af69d88dSmrg   num_prims                 = LLVMGetParam(variant_func, 3);
2820af69d88dSmrg   system_values.instance_id = LLVMGetParam(variant_func, 4);
2821af69d88dSmrg   prim_id_ptr               = LLVMGetParam(variant_func, 5);
282201e04c3fSmrg   system_values.invocation_id = LLVMGetParam(variant_func, 6);
28237ec681f3Smrg   system_values.view_index  = LLVMGetParam(variant_func, 7);
2824af69d88dSmrg
2825af69d88dSmrg   lp_build_name(context_ptr, "context");
2826af69d88dSmrg   lp_build_name(input_array, "input");
2827af69d88dSmrg   lp_build_name(io_ptr, "io");
2828af69d88dSmrg   lp_build_name(num_prims, "num_prims");
2829af69d88dSmrg   lp_build_name(system_values.instance_id, "instance_id");
2830af69d88dSmrg   lp_build_name(prim_id_ptr, "prim_id_ptr");
283101e04c3fSmrg   lp_build_name(system_values.invocation_id, "invocation_id");
28327ec681f3Smrg   lp_build_name(system_values.view_index, "view_index");
2833af69d88dSmrg
2834af69d88dSmrg   variant->context_ptr = context_ptr;
2835af69d88dSmrg   variant->io_ptr = io_ptr;
2836af69d88dSmrg   variant->num_prims = num_prims;
2837af69d88dSmrg
2838af69d88dSmrg   gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
2839af69d88dSmrg   gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
2840af69d88dSmrg   gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
2841af69d88dSmrg   gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
2842af69d88dSmrg   gs_iface.input = input_array;
2843af69d88dSmrg   gs_iface.variant = variant;
2844af69d88dSmrg
2845af69d88dSmrg   /*
2846af69d88dSmrg    * Function body
2847af69d88dSmrg    */
2848af69d88dSmrg
2849af69d88dSmrg   block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2850af69d88dSmrg   builder = gallivm->builder;
2851af69d88dSmrg   LLVMPositionBuilderAtEnd(builder, block);
2852af69d88dSmrg
2853af69d88dSmrg   lp_build_context_init(&bld, gallivm, lp_type_int(32));
2854af69d88dSmrg
2855af69d88dSmrg   memset(&gs_type, 0, sizeof gs_type);
2856af69d88dSmrg   gs_type.floating = TRUE; /* floating point values */
2857af69d88dSmrg   gs_type.sign = TRUE;     /* values are signed */
2858af69d88dSmrg   gs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
2859af69d88dSmrg   gs_type.width = 32;      /* 32-bit float */
2860af69d88dSmrg   gs_type.length = vector_length;
2861af69d88dSmrg
2862af69d88dSmrg   consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr);
2863af69d88dSmrg   num_consts_ptr =
2864af69d88dSmrg      draw_gs_jit_context_num_constants(variant->gallivm, context_ptr);
2865af69d88dSmrg
28667ec681f3Smrg   ssbos_ptr = draw_gs_jit_context_ssbos(variant->gallivm, context_ptr);
28677ec681f3Smrg   num_ssbos_ptr =
28687ec681f3Smrg      draw_gs_jit_context_num_ssbos(variant->gallivm, context_ptr);
2869af69d88dSmrg
28707ec681f3Smrg   /* code generated texture sampling */
28717ec681f3Smrg   sampler = draw_llvm_sampler_soa_create(variant->key.samplers, variant->key.nr_samplers);
28727ec681f3Smrg   image = draw_llvm_image_soa_create(draw_gs_llvm_variant_key_images(&variant->key),
28737ec681f3Smrg                                      variant->key.nr_images);
2874af69d88dSmrg   mask_val = generate_mask_value(variant, gs_type);
2875af69d88dSmrg   lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
2876af69d88dSmrg
2877af69d88dSmrg   if (gs_info->uses_primid) {
2878af69d88dSmrg      system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");
28793464ebd5Sriastradh   }
28803464ebd5Sriastradh
2881af69d88dSmrg   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
28827ec681f3Smrg      if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
28837ec681f3Smrg         tgsi_dump(tokens, 0);
28847ec681f3Smrg      else
28857ec681f3Smrg         nir_print_shader(llvm->draw->gs.geometry_shader->state.ir.nir, stderr);
2886af69d88dSmrg      draw_gs_llvm_dump_variant_key(&variant->key);
28873464ebd5Sriastradh   }
28883464ebd5Sriastradh
28897ec681f3Smrg   struct lp_build_tgsi_params params;
28907ec681f3Smrg   memset(&params, 0, sizeof(params));
28917ec681f3Smrg
28927ec681f3Smrg   params.type = gs_type;
28937ec681f3Smrg   params.mask = &mask;
28947ec681f3Smrg   params.consts_ptr = consts_ptr;
28957ec681f3Smrg   params.const_sizes_ptr = num_consts_ptr;
28967ec681f3Smrg   params.system_values = &system_values;
28977ec681f3Smrg   params.context_ptr = context_ptr;
28987ec681f3Smrg   params.sampler = sampler;
28997ec681f3Smrg   params.info = &llvm->draw->gs.geometry_shader->info;
29007ec681f3Smrg   params.gs_iface = (const struct lp_build_gs_iface *)&gs_iface;
29017ec681f3Smrg   params.ssbo_ptr = ssbos_ptr;
29027ec681f3Smrg   params.ssbo_sizes_ptr = num_ssbos_ptr;
29037ec681f3Smrg   params.image = image;
29047ec681f3Smrg   params.gs_vertex_streams = variant->shader->base.num_vertex_streams;
29057ec681f3Smrg   params.aniso_filter_table = draw_gs_jit_context_aniso_filter_table(gallivm, context_ptr);
29067ec681f3Smrg
29077ec681f3Smrg   if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
29087ec681f3Smrg      lp_build_tgsi_soa(variant->gallivm,
29097ec681f3Smrg                        tokens,
29107ec681f3Smrg                        &params,
29117ec681f3Smrg                        outputs);
29127ec681f3Smrg   else
29137ec681f3Smrg      lp_build_nir_soa(variant->gallivm,
29147ec681f3Smrg                       llvm->draw->gs.geometry_shader->state.ir.nir,
29157ec681f3Smrg                       &params,
29167ec681f3Smrg                       outputs);
2917af69d88dSmrg
2918af69d88dSmrg   sampler->destroy(sampler);
29197ec681f3Smrg   image->destroy(image);
2920af69d88dSmrg
2921af69d88dSmrg   lp_build_mask_end(&mask);
2922af69d88dSmrg
2923af69d88dSmrg   LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
2924af69d88dSmrg
2925af69d88dSmrg   gallivm_verify_function(gallivm, variant_func);
2926af69d88dSmrg}
2927af69d88dSmrg
2928af69d88dSmrgstruct draw_gs_llvm_variant *
2929af69d88dSmrgdraw_gs_llvm_create_variant(struct draw_llvm *llvm,
2930af69d88dSmrg                            unsigned num_outputs,
2931af69d88dSmrg                            const struct draw_gs_llvm_variant_key *key)
2932af69d88dSmrg{
2933af69d88dSmrg   struct draw_gs_llvm_variant *variant;
2934af69d88dSmrg   struct llvm_geometry_shader *shader =
2935af69d88dSmrg      llvm_geometry_shader(llvm->draw->gs.geometry_shader);
2936af69d88dSmrg   LLVMTypeRef vertex_header;
2937af69d88dSmrg   char module_name[64];
29387ec681f3Smrg   unsigned char ir_sha1_cache_key[20];
29397ec681f3Smrg   struct lp_cached_code cached = { 0 };
29407ec681f3Smrg   bool needs_caching = false;
2941af69d88dSmrg
2942af69d88dSmrg   variant = MALLOC(sizeof *variant +
2943af69d88dSmrg                    shader->variant_key_size -
2944af69d88dSmrg                    sizeof variant->key);
294501e04c3fSmrg   if (!variant)
2946af69d88dSmrg      return NULL;
2947af69d88dSmrg
2948af69d88dSmrg   variant->llvm = llvm;
2949af69d88dSmrg   variant->shader = shader;
2950af69d88dSmrg
29517ec681f3Smrg   snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u",
29527ec681f3Smrg            variant->shader->variants_cached);
2953af69d88dSmrg
29547ec681f3Smrg   memcpy(&variant->key, key, shader->variant_key_size);
2955af69d88dSmrg
29567ec681f3Smrg   if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
29577ec681f3Smrg      draw_get_ir_cache_key(shader->base.state.ir.nir,
29587ec681f3Smrg                            key,
29597ec681f3Smrg                            shader->variant_key_size,
29607ec681f3Smrg                            num_outputs,
29617ec681f3Smrg                            ir_sha1_cache_key);
29627ec681f3Smrg
29637ec681f3Smrg      llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
29647ec681f3Smrg                                         &cached,
29657ec681f3Smrg                                         ir_sha1_cache_key);
29667ec681f3Smrg      if (!cached.data_size)
29677ec681f3Smrg         needs_caching = true;
29687ec681f3Smrg   }
29697ec681f3Smrg   variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
2970af69d88dSmrg
29717ec681f3Smrg   create_gs_jit_types(variant);
2972af69d88dSmrg
2973af69d88dSmrg   vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);
2974af69d88dSmrg
2975af69d88dSmrg   variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
2976af69d88dSmrg
2977af69d88dSmrg   draw_gs_llvm_generate(llvm, variant);
2978af69d88dSmrg
2979af69d88dSmrg   gallivm_compile_module(variant->gallivm);
2980af69d88dSmrg
2981af69d88dSmrg   variant->jit_func = (draw_gs_jit_func)
2982af69d88dSmrg         gallivm_jit_function(variant->gallivm, variant->function);
2983af69d88dSmrg
29847ec681f3Smrg   if (needs_caching)
29857ec681f3Smrg      llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
29867ec681f3Smrg                                           &cached,
29877ec681f3Smrg                                           ir_sha1_cache_key);
2988af69d88dSmrg   gallivm_free_ir(variant->gallivm);
2989af69d88dSmrg
2990af69d88dSmrg   variant->list_item_global.base = variant;
2991af69d88dSmrg   variant->list_item_local.base = variant;
2992af69d88dSmrg   /*variant->no = */shader->variants_created++;
2993af69d88dSmrg   variant->list_item_global.base = variant;
2994af69d88dSmrg
2995af69d88dSmrg   return variant;
2996af69d88dSmrg}
2997af69d88dSmrg
2998af69d88dSmrgvoid
2999af69d88dSmrgdraw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
3000af69d88dSmrg{
3001af69d88dSmrg   struct draw_llvm *llvm = variant->llvm;
3002af69d88dSmrg
300301e04c3fSmrg   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
300401e04c3fSmrg      debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n",
300501e04c3fSmrg                    variant->shader->variants_cached, llvm->nr_gs_variants);
300601e04c3fSmrg   }
300701e04c3fSmrg
3008af69d88dSmrg   gallivm_destroy(variant->gallivm);
3009af69d88dSmrg
30103464ebd5Sriastradh   remove_from_list(&variant->list_item_local);
30113464ebd5Sriastradh   variant->shader->variants_cached--;
30123464ebd5Sriastradh   remove_from_list(&variant->list_item_global);
3013af69d88dSmrg   llvm->nr_gs_variants--;
30143464ebd5Sriastradh   FREE(variant);
30153464ebd5Sriastradh}
3016af69d88dSmrg
3017af69d88dSmrgstruct draw_gs_llvm_variant_key *
3018af69d88dSmrgdraw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3019af69d88dSmrg{
3020af69d88dSmrg   unsigned i;
3021af69d88dSmrg   struct draw_gs_llvm_variant_key *key;
3022af69d88dSmrg   struct draw_sampler_static_state *draw_sampler;
30237ec681f3Smrg   struct draw_image_static_state *draw_image;
3024af69d88dSmrg
3025af69d88dSmrg   key = (struct draw_gs_llvm_variant_key *)store;
3026af69d88dSmrg
302701e04c3fSmrg   memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0]));
302801e04c3fSmrg
3029af69d88dSmrg   key->num_outputs = draw_total_gs_outputs(llvm->draw);
3030af69d88dSmrg
30317ec681f3Smrg   key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color;
30327ec681f3Smrg
3033af69d88dSmrg   /* All variants of this shader will have the same value for
3034af69d88dSmrg    * nr_samplers.  Not yet trying to compact away holes in the
3035af69d88dSmrg    * sampler array.
3036af69d88dSmrg    */
3037af69d88dSmrg   key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3038af69d88dSmrg   if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3039af69d88dSmrg      key->nr_sampler_views =
3040af69d88dSmrg         llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3041af69d88dSmrg   }
3042af69d88dSmrg   else {
3043af69d88dSmrg      key->nr_sampler_views = key->nr_samplers;
3044af69d88dSmrg   }
3045af69d88dSmrg
30467ec681f3Smrg   key->nr_images = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
30477ec681f3Smrg
3048af69d88dSmrg   draw_sampler = key->samplers;
3049af69d88dSmrg
3050af69d88dSmrg   memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3051af69d88dSmrg
3052af69d88dSmrg   for (i = 0 ; i < key->nr_samplers; i++) {
3053af69d88dSmrg      lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3054af69d88dSmrg                                      llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
3055af69d88dSmrg   }
3056af69d88dSmrg   for (i = 0 ; i < key->nr_sampler_views; i++) {
3057af69d88dSmrg      lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3058af69d88dSmrg                                      llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
3059af69d88dSmrg   }
3060af69d88dSmrg
30617ec681f3Smrg   draw_image = draw_gs_llvm_variant_key_images(key);
30627ec681f3Smrg   memset(draw_image, 0,
30637ec681f3Smrg          key->nr_images * sizeof *draw_image);
30647ec681f3Smrg   for (i = 0; i < key->nr_images; i++) {
30657ec681f3Smrg      lp_sampler_static_texture_state_image(&draw_image[i].image_state,
30667ec681f3Smrg                                            llvm->draw->images[PIPE_SHADER_GEOMETRY][i]);
30677ec681f3Smrg   }
3068af69d88dSmrg   return key;
3069af69d88dSmrg}
3070af69d88dSmrg
3071af69d88dSmrgvoid
3072af69d88dSmrgdraw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
3073af69d88dSmrg{
3074af69d88dSmrg   unsigned i;
3075af69d88dSmrg   struct draw_sampler_static_state *sampler = key->samplers;
30767ec681f3Smrg   struct draw_image_static_state *image = draw_gs_llvm_variant_key_images(key);
3077af69d88dSmrg
30787ec681f3Smrg   debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
3079af69d88dSmrg   for (i = 0 ; i < key->nr_sampler_views; i++) {
3080af69d88dSmrg      debug_printf("sampler[%i].src_format = %s\n", i,
3081af69d88dSmrg                   util_format_name(sampler[i].texture_state.format));
3082af69d88dSmrg   }
30837ec681f3Smrg
30847ec681f3Smrg   for (i = 0 ; i < key->nr_images; i++)
30857ec681f3Smrg      debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
30867ec681f3Smrg
30877ec681f3Smrg}
30887ec681f3Smrg
30897ec681f3Smrgstatic void
30907ec681f3Smrgcreate_tcs_jit_types(struct draw_tcs_llvm_variant *var)
30917ec681f3Smrg{
30927ec681f3Smrg   struct gallivm_state *gallivm = var->gallivm;
30937ec681f3Smrg   LLVMTypeRef texture_type, sampler_type, image_type, context_type;
30947ec681f3Smrg
30957ec681f3Smrg   texture_type = create_jit_texture_type(gallivm, "texture");
30967ec681f3Smrg   sampler_type = create_jit_sampler_type(gallivm, "sampler");
30977ec681f3Smrg   image_type = create_jit_image_type(gallivm, "image");
30987ec681f3Smrg
30997ec681f3Smrg   context_type = create_tcs_jit_context_type(gallivm,
31007ec681f3Smrg                                              0,
31017ec681f3Smrg                                              texture_type, sampler_type,
31027ec681f3Smrg                                              image_type,
31037ec681f3Smrg                                              "draw_tcs_jit_context");
31047ec681f3Smrg   var->input_array_type = create_tcs_jit_input_type(gallivm);
31057ec681f3Smrg   var->output_array_type = create_tcs_jit_output_type(gallivm);
31067ec681f3Smrg   var->context_ptr_type = LLVMPointerType(context_type, 0);
31077ec681f3Smrg}
31087ec681f3Smrg
31097ec681f3Smrgstatic LLVMTypeRef
31107ec681f3Smrgget_tcs_context_ptr_type(struct draw_tcs_llvm_variant *variant)
31117ec681f3Smrg{
31127ec681f3Smrg   if (!variant->context_ptr_type)
31137ec681f3Smrg      create_tcs_jit_types(variant);
31147ec681f3Smrg   return variant->context_ptr_type;
31157ec681f3Smrg}
31167ec681f3Smrg
31177ec681f3Smrgstatic LLVMValueRef
31187ec681f3Smrgdraw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface *tes_iface,
31197ec681f3Smrg                               struct lp_build_context *bld,
31207ec681f3Smrg                               boolean is_vindex_indirect,
31217ec681f3Smrg                               LLVMValueRef vertex_index,
31227ec681f3Smrg                               boolean is_aindex_indirect,
31237ec681f3Smrg                               LLVMValueRef attrib_index,
31247ec681f3Smrg                               boolean is_sindex_indirect,
31257ec681f3Smrg                               LLVMValueRef swizzle_index)
31267ec681f3Smrg{
31277ec681f3Smrg   const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
31287ec681f3Smrg   struct gallivm_state *gallivm = bld->gallivm;
31297ec681f3Smrg   LLVMBuilderRef builder = gallivm->builder;
31307ec681f3Smrg   LLVMValueRef indices[3];
31317ec681f3Smrg   LLVMValueRef res;
31327ec681f3Smrg   struct lp_type type = bld->type;
31337ec681f3Smrg
31347ec681f3Smrg   if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
31357ec681f3Smrg      int i;
31367ec681f3Smrg
31377ec681f3Smrg      res = bld->zero;
31387ec681f3Smrg      for (i = 0; i < type.length; ++i) {
31397ec681f3Smrg         LLVMValueRef idx = lp_build_const_int32(gallivm, i);
31407ec681f3Smrg         LLVMValueRef vert_chan_index = vertex_index;
31417ec681f3Smrg         LLVMValueRef attr_chan_index = attrib_index;
31427ec681f3Smrg         LLVMValueRef swiz_chan_index = swizzle_index;
31437ec681f3Smrg         LLVMValueRef channel_vec;
31447ec681f3Smrg
31457ec681f3Smrg         if (is_vindex_indirect) {
31467ec681f3Smrg            vert_chan_index = LLVMBuildExtractElement(builder,
31477ec681f3Smrg                                                      vertex_index, idx, "");
31487ec681f3Smrg         }
31497ec681f3Smrg         if (is_aindex_indirect) {
31507ec681f3Smrg            attr_chan_index = LLVMBuildExtractElement(builder,
31517ec681f3Smrg                                                      attrib_index, idx, "");
31527ec681f3Smrg         }
31537ec681f3Smrg         if (is_sindex_indirect) {
31547ec681f3Smrg            swiz_chan_index = LLVMBuildExtractElement(builder,
31557ec681f3Smrg                                                      swizzle_index, idx, "");
31567ec681f3Smrg         }
31577ec681f3Smrg
31587ec681f3Smrg         indices[0] = vert_chan_index;
31597ec681f3Smrg         indices[1] = attr_chan_index;
31607ec681f3Smrg         indices[2] = swiz_chan_index;
31617ec681f3Smrg
31627ec681f3Smrg         channel_vec = LLVMBuildGEP(builder, tcs->input, indices, 3, "");
31637ec681f3Smrg         channel_vec = LLVMBuildLoad(builder, channel_vec, "");
31647ec681f3Smrg
31657ec681f3Smrg         res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
31667ec681f3Smrg      }
31677ec681f3Smrg   } else {
31687ec681f3Smrg      indices[0] = vertex_index;
31697ec681f3Smrg      indices[1] = attrib_index;
31707ec681f3Smrg      indices[2] = swizzle_index;
31717ec681f3Smrg
31727ec681f3Smrg      res = LLVMBuildGEP(builder, tcs->input, indices, 3, "");
31737ec681f3Smrg      res = LLVMBuildLoad(builder, res, "");
31747ec681f3Smrg      res = lp_build_broadcast_scalar(bld, res);
31757ec681f3Smrg   }
31767ec681f3Smrg   return res;
31777ec681f3Smrg}
31787ec681f3Smrg
31797ec681f3Smrgstatic LLVMValueRef
31807ec681f3Smrgdraw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface *tes_iface,
31817ec681f3Smrg                                struct lp_build_context *bld,
31827ec681f3Smrg                                boolean is_vindex_indirect,
31837ec681f3Smrg                                LLVMValueRef vertex_index,
31847ec681f3Smrg                                boolean is_aindex_indirect,
31857ec681f3Smrg                                LLVMValueRef attrib_index,
31867ec681f3Smrg                                boolean is_sindex_indirect,
31877ec681f3Smrg                                LLVMValueRef swizzle_index,
31887ec681f3Smrg                                uint32_t name)
31897ec681f3Smrg{
31907ec681f3Smrg   const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
31917ec681f3Smrg   struct gallivm_state *gallivm = bld->gallivm;
31927ec681f3Smrg   LLVMBuilderRef builder = gallivm->builder;
31937ec681f3Smrg   LLVMValueRef indices[3];
31947ec681f3Smrg   LLVMValueRef res;
31957ec681f3Smrg   struct lp_type type = bld->type;
31967ec681f3Smrg
31977ec681f3Smrg   if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
31987ec681f3Smrg      int i;
31997ec681f3Smrg
32007ec681f3Smrg      res = bld->zero;
32017ec681f3Smrg      for (i = 0; i < type.length; ++i) {
32027ec681f3Smrg         LLVMValueRef idx = lp_build_const_int32(gallivm, i);
32037ec681f3Smrg         LLVMValueRef vert_chan_index = vertex_index;
32047ec681f3Smrg         LLVMValueRef attr_chan_index = attrib_index;
32057ec681f3Smrg         LLVMValueRef swiz_chan_index = swizzle_index;
32067ec681f3Smrg         LLVMValueRef channel_vec;
32077ec681f3Smrg
32087ec681f3Smrg         if (is_vindex_indirect) {
32097ec681f3Smrg            vert_chan_index = LLVMBuildExtractElement(builder,
32107ec681f3Smrg                                                      vertex_index, idx, "");
32117ec681f3Smrg         }
32127ec681f3Smrg         if (is_aindex_indirect) {
32137ec681f3Smrg            attr_chan_index = LLVMBuildExtractElement(builder,
32147ec681f3Smrg                                                      attrib_index, idx, "");
32157ec681f3Smrg         }
32167ec681f3Smrg         if (is_sindex_indirect) {
32177ec681f3Smrg            swiz_chan_index = LLVMBuildExtractElement(builder,
32187ec681f3Smrg                                                      swizzle_index, idx, "");
32197ec681f3Smrg         }
32207ec681f3Smrg
32217ec681f3Smrg         indices[0] = vert_chan_index;
32227ec681f3Smrg         indices[1] = attr_chan_index;
32237ec681f3Smrg         indices[2] = swiz_chan_index;
32247ec681f3Smrg
32257ec681f3Smrg         channel_vec = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
32267ec681f3Smrg         channel_vec = LLVMBuildLoad(builder, channel_vec, "");
32277ec681f3Smrg
32287ec681f3Smrg         res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
32297ec681f3Smrg      }
32307ec681f3Smrg   } else {
32317ec681f3Smrg      indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
32327ec681f3Smrg      indices[1] = attrib_index;
32337ec681f3Smrg      indices[2] = swizzle_index;
32347ec681f3Smrg
32357ec681f3Smrg      res = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
32367ec681f3Smrg      res = LLVMBuildLoad(builder, res, "");
32377ec681f3Smrg      res = lp_build_broadcast_scalar(bld, res);
32387ec681f3Smrg   }
32397ec681f3Smrg   return res;
32407ec681f3Smrg}
32417ec681f3Smrg
32427ec681f3Smrgstatic void
32437ec681f3Smrgdraw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface *tes_iface,
32447ec681f3Smrg                                struct lp_build_context *bld,
32457ec681f3Smrg                                unsigned name,
32467ec681f3Smrg                                boolean is_vindex_indirect,
32477ec681f3Smrg                                LLVMValueRef vertex_index,
32487ec681f3Smrg                                boolean is_aindex_indirect,
32497ec681f3Smrg                                LLVMValueRef attrib_index,
32507ec681f3Smrg                                boolean is_sindex_indirect,
32517ec681f3Smrg                                LLVMValueRef swizzle_index,
32527ec681f3Smrg                                LLVMValueRef value,
32537ec681f3Smrg                                LLVMValueRef mask_vec)
32547ec681f3Smrg{
32557ec681f3Smrg   const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
32567ec681f3Smrg   struct gallivm_state *gallivm = bld->gallivm;
32577ec681f3Smrg   LLVMBuilderRef builder = gallivm->builder;
32587ec681f3Smrg   LLVMValueRef indices[3];
32597ec681f3Smrg   LLVMValueRef res;
32607ec681f3Smrg   struct lp_type type = bld->type;
32617ec681f3Smrg
32627ec681f3Smrg   if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
32637ec681f3Smrg      int i;
32647ec681f3Smrg
32657ec681f3Smrg      for (i = 0; i < type.length; ++i) {
32667ec681f3Smrg         LLVMValueRef idx = lp_build_const_int32(gallivm, i);
32677ec681f3Smrg         LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
32687ec681f3Smrg         LLVMValueRef attr_chan_index = attrib_index;
32697ec681f3Smrg         LLVMValueRef swiz_chan_index = swizzle_index;
32707ec681f3Smrg         LLVMValueRef channel_vec;
32717ec681f3Smrg
32727ec681f3Smrg         if (is_vindex_indirect) {
32737ec681f3Smrg            vert_chan_index = LLVMBuildExtractElement(builder,
32747ec681f3Smrg                                                      vertex_index, idx, "");
32757ec681f3Smrg         }
32767ec681f3Smrg         if (is_aindex_indirect) {
32777ec681f3Smrg            attr_chan_index = LLVMBuildExtractElement(builder,
32787ec681f3Smrg                                                      attrib_index, idx, "");
32797ec681f3Smrg         }
32807ec681f3Smrg
32817ec681f3Smrg         if (is_sindex_indirect) {
32827ec681f3Smrg            swiz_chan_index = LLVMBuildExtractElement(builder,
32837ec681f3Smrg                                                      swizzle_index, idx, "");
32847ec681f3Smrg         }
32857ec681f3Smrg
32867ec681f3Smrg         indices[0] = vert_chan_index;
32877ec681f3Smrg         indices[1] = attr_chan_index;
32887ec681f3Smrg         indices[2] = swiz_chan_index;
32897ec681f3Smrg
32907ec681f3Smrg         channel_vec = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
32917ec681f3Smrg
32927ec681f3Smrg         res = LLVMBuildExtractElement(builder, value, idx, "");
32937ec681f3Smrg
32947ec681f3Smrg         struct lp_build_if_state ifthen;
32957ec681f3Smrg         LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
32967ec681f3Smrg         cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
32977ec681f3Smrg         lp_build_if(&ifthen, gallivm, cond);
32987ec681f3Smrg         LLVMBuildStore(builder, res, channel_vec);
32997ec681f3Smrg         lp_build_endif(&ifthen);
33007ec681f3Smrg      }
33017ec681f3Smrg   } else {
33027ec681f3Smrg      indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
33037ec681f3Smrg      indices[1] = attrib_index;
33047ec681f3Smrg      indices[2] = swizzle_index;
33057ec681f3Smrg
33067ec681f3Smrg      res = LLVMBuildGEP(builder, tcs->output, indices, 3, "");
33077ec681f3Smrg      for (unsigned i = 0; i < type.length; ++i) {
33087ec681f3Smrg         LLVMValueRef idx = lp_build_const_int32(gallivm, i);
33097ec681f3Smrg         LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, "");
33107ec681f3Smrg
33117ec681f3Smrg         struct lp_build_if_state ifthen;
33127ec681f3Smrg         LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
33137ec681f3Smrg         cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
33147ec681f3Smrg         lp_build_if(&ifthen, gallivm, cond);
33157ec681f3Smrg         LLVMBuildStore(builder, val, res);
33167ec681f3Smrg         lp_build_endif(&ifthen);
33177ec681f3Smrg      }
33187ec681f3Smrg   }
33197ec681f3Smrg}
33207ec681f3Smrg
33217ec681f3Smrg
33227ec681f3Smrgstatic LLVMValueRef
33237ec681f3Smrggenerate_tcs_mask_value(struct draw_tcs_llvm_variant *variant,
33247ec681f3Smrg                        struct lp_type tcs_type, LLVMValueRef limit, LLVMValueRef loop_counter)
33257ec681f3Smrg{
33267ec681f3Smrg   struct gallivm_state *gallivm = variant->gallivm;
33277ec681f3Smrg   LLVMBuilderRef builder = gallivm->builder;
33287ec681f3Smrg   struct lp_type mask_type = lp_int_type(tcs_type);
33297ec681f3Smrg   LLVMValueRef num_vecs;
33307ec681f3Smrg   LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
33317ec681f3Smrg   unsigned i;
33327ec681f3Smrg
33337ec681f3Smrg   num_vecs = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
33347ec681f3Smrg   for (i = 0; i < tcs_type.length; i++) {
33357ec681f3Smrg      LLVMValueRef idx = lp_build_const_int32(gallivm, i);
33367ec681f3Smrg      mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
33377ec681f3Smrg   }
33387ec681f3Smrg   mask_val = lp_build_compare(gallivm, mask_type,
33397ec681f3Smrg                               PIPE_FUNC_GREATER, num_vecs, mask_val);
33407ec681f3Smrg
33417ec681f3Smrg   return mask_val;
33427ec681f3Smrg}
33437ec681f3Smrg
33447ec681f3Smrgstatic void
33457ec681f3Smrgdraw_tcs_llvm_generate(struct draw_llvm *llvm,
33467ec681f3Smrg                       struct draw_tcs_llvm_variant *variant)
33477ec681f3Smrg{
33487ec681f3Smrg   struct gallivm_state *gallivm = variant->gallivm;
33497ec681f3Smrg   LLVMContextRef context = gallivm->context;
33507ec681f3Smrg   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
33517ec681f3Smrg   LLVMTypeRef arg_types[7];
33527ec681f3Smrg   LLVMTypeRef func_type, coro_func_type;
33537ec681f3Smrg   LLVMValueRef variant_func, variant_coro;
33547ec681f3Smrg   LLVMValueRef context_ptr;
33557ec681f3Smrg   LLVMValueRef view_index;
33567ec681f3Smrg   LLVMValueRef input_array, output_array, prim_id, patch_vertices_in;
33577ec681f3Smrg   LLVMValueRef mask_val;
33587ec681f3Smrg   LLVMBasicBlockRef block;
33597ec681f3Smrg   LLVMBuilderRef builder;
33607ec681f3Smrg   struct lp_build_context bld, bldvec;
33617ec681f3Smrg   struct lp_build_sampler_soa *sampler = 0;
33627ec681f3Smrg   struct lp_build_image_soa *image = NULL;
33637ec681f3Smrg   struct lp_bld_tgsi_system_values system_values;
33647ec681f3Smrg   char func_name[64], func_name_coro[64];
33657ec681f3Smrg   unsigned i;
33667ec681f3Smrg   struct draw_tcs_llvm_iface tcs_iface;
33677ec681f3Smrg   struct lp_build_mask_context mask;
33687ec681f3Smrg   LLVMValueRef consts_ptr, num_consts_ptr;
33697ec681f3Smrg   LLVMValueRef ssbos_ptr, num_ssbos_ptr;
33707ec681f3Smrg   struct lp_type tcs_type;
33717ec681f3Smrg   unsigned vector_length = variant->shader->base.vector_length;
33727ec681f3Smrg
33737ec681f3Smrg   memset(&system_values, 0, sizeof(system_values));
33747ec681f3Smrg
33757ec681f3Smrg   snprintf(func_name, sizeof(func_name), "draw_llvm_tcs_variant");
33767ec681f3Smrg
33777ec681f3Smrg   snprintf(func_name_coro, sizeof(func_name_coro), "draw_llvm_tcs_coro_variant");
33787ec681f3Smrg
33797ec681f3Smrg   arg_types[0] = get_tcs_context_ptr_type(variant);    /* context */
33807ec681f3Smrg   arg_types[1] = variant->input_array_type;           /* input */
33817ec681f3Smrg   arg_types[2] = variant->output_array_type;
33827ec681f3Smrg   arg_types[3] = int32_type;
33837ec681f3Smrg   arg_types[4] = int32_type;
33847ec681f3Smrg   arg_types[5] = int32_type;
33857ec681f3Smrg   arg_types[6] = int32_type; /* coroutine only */
33867ec681f3Smrg
33877ec681f3Smrg   func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types) - 1, 0);
33887ec681f3Smrg
33897ec681f3Smrg   coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), arg_types, ARRAY_SIZE(arg_types), 0);
33907ec681f3Smrg
33917ec681f3Smrg   variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
33927ec681f3Smrg
33937ec681f3Smrg   variant_coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
33947ec681f3Smrg
33957ec681f3Smrg   variant->function = variant_func;
33967ec681f3Smrg   LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
33977ec681f3Smrg
33987ec681f3Smrg   LLVMSetFunctionCallConv(variant_coro, LLVMCCallConv);
33997ec681f3Smrg
34007ec681f3Smrg   for (i = 0; i < ARRAY_SIZE(arg_types); ++i) {
34017ec681f3Smrg      if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
34027ec681f3Smrg         lp_add_function_attr(variant_coro, i + 1, LP_FUNC_ATTR_NOALIAS);
34037ec681f3Smrg         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
34047ec681f3Smrg      }
34057ec681f3Smrg   }
34067ec681f3Smrg
34077ec681f3Smrg   if (gallivm->cache && gallivm->cache->data_size)
34087ec681f3Smrg      return;
34097ec681f3Smrg   context_ptr               = LLVMGetParam(variant_func, 0);
34107ec681f3Smrg   input_array               = LLVMGetParam(variant_func, 1);
34117ec681f3Smrg   output_array              = LLVMGetParam(variant_func, 2);
34127ec681f3Smrg   prim_id                   = LLVMGetParam(variant_func, 3);
34137ec681f3Smrg   patch_vertices_in         = LLVMGetParam(variant_func, 4);
34147ec681f3Smrg   view_index                = LLVMGetParam(variant_func, 5);
34157ec681f3Smrg
34167ec681f3Smrg   lp_build_name(context_ptr, "context");
34177ec681f3Smrg   lp_build_name(input_array, "input");
34187ec681f3Smrg   lp_build_name(output_array, "output");
34197ec681f3Smrg   lp_build_name(prim_id, "prim_id");
34207ec681f3Smrg   lp_build_name(patch_vertices_in, "patch_vertices_in");
34217ec681f3Smrg   lp_build_name(view_index, "view_index");
34227ec681f3Smrg
34237ec681f3Smrg   block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
34247ec681f3Smrg   builder = gallivm->builder;
34257ec681f3Smrg   LLVMPositionBuilderAtEnd(builder, block);
34267ec681f3Smrg
34277ec681f3Smrg   lp_build_context_init(&bld, gallivm, lp_type_int(32));
34287ec681f3Smrg
34297ec681f3Smrg   memset(&tcs_type, 0, sizeof tcs_type);
34307ec681f3Smrg   tcs_type.floating = TRUE; /* floating point values */
34317ec681f3Smrg   tcs_type.sign = TRUE;     /* values are signed */
34327ec681f3Smrg   tcs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
34337ec681f3Smrg   tcs_type.width = 32;      /* 32-bit float */
34347ec681f3Smrg   tcs_type.length = vector_length;
34357ec681f3Smrg
34367ec681f3Smrg   lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tcs_type));
34377ec681f3Smrg
34387ec681f3Smrg   LLVMValueRef count = lp_build_const_int32(gallivm, variant->shader->base.vertices_out);
34397ec681f3Smrg   LLVMValueRef step = lp_build_const_int32(gallivm, vector_length);
34407ec681f3Smrg
34417ec681f3Smrg   struct lp_build_loop_state loop_state[2];
34427ec681f3Smrg   LLVMValueRef num_inner_loop;
34437ec681f3Smrg   unsigned count_align = util_align_npot(variant->shader->base.vertices_out, tcs_type.length);
34447ec681f3Smrg   num_inner_loop = lp_build_const_int32(gallivm, count_align / tcs_type.length);
34457ec681f3Smrg   LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
34467ec681f3Smrg   LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_inner_loop, "coro_hdls");
34477ec681f3Smrg   unsigned end_coroutine = INT_MAX;
34487ec681f3Smrg   lp_build_loop_begin(&loop_state[1], gallivm,
34497ec681f3Smrg                       lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
34507ec681f3Smrg   lp_build_loop_begin(&loop_state[0], gallivm,
34517ec681f3Smrg                       lp_build_const_int32(gallivm, 0)); /* inner loop */
34527ec681f3Smrg   {
34537ec681f3Smrg      LLVMValueRef args[7];
34547ec681f3Smrg      args[0] = context_ptr;
34557ec681f3Smrg      args[1] = input_array;
34567ec681f3Smrg      args[2] = output_array;
34577ec681f3Smrg      args[3] = prim_id;
34587ec681f3Smrg      args[4] = patch_vertices_in;
34597ec681f3Smrg      args[5] = view_index;
34607ec681f3Smrg      args[6] = loop_state[0].counter;
34617ec681f3Smrg      LLVMValueRef coro_entry = LLVMBuildGEP(builder, coro_hdls, &loop_state[0].counter, 1, "");
34627ec681f3Smrg      LLVMValueRef coro_hdl = LLVMBuildLoad(builder, coro_entry, "coro_hdl");
34637ec681f3Smrg
34647ec681f3Smrg      struct lp_build_if_state ifstate;
34657ec681f3Smrg      LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntEQ, loop_state[1].counter,
34667ec681f3Smrg                                       lp_build_const_int32(gallivm, 0), "");
34677ec681f3Smrg      /* first time here - call the coroutine function entry point */
34687ec681f3Smrg      lp_build_if(&ifstate, gallivm, cmp);
34697ec681f3Smrg      LLVMValueRef coro_ret = LLVMBuildCall(builder, variant_coro, args, 7, "");
34707ec681f3Smrg      LLVMBuildStore(builder, coro_ret, coro_entry);
34717ec681f3Smrg      lp_build_else(&ifstate);
34727ec681f3Smrg      /* subsequent calls for this invocation - check if done. */
34737ec681f3Smrg      LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
34747ec681f3Smrg      struct lp_build_if_state ifstate2;
34757ec681f3Smrg      lp_build_if(&ifstate2, gallivm, coro_done);
34767ec681f3Smrg      /* if done destroy and force loop exit */
34777ec681f3Smrg      lp_build_coro_destroy(gallivm, coro_hdl);
34787ec681f3Smrg      lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
34797ec681f3Smrg      lp_build_else(&ifstate2);
34807ec681f3Smrg      /* otherwise resume the coroutine */
34817ec681f3Smrg      lp_build_coro_resume(gallivm, coro_hdl);
34827ec681f3Smrg      lp_build_endif(&ifstate2);
34837ec681f3Smrg      lp_build_endif(&ifstate);
34847ec681f3Smrg      lp_build_loop_force_reload_counter(&loop_state[1]);
34857ec681f3Smrg   }
34867ec681f3Smrg   lp_build_loop_end_cond(&loop_state[0],
34877ec681f3Smrg                          num_inner_loop,
34887ec681f3Smrg                          NULL,  LLVMIntUGE);
34897ec681f3Smrg   lp_build_loop_end_cond(&loop_state[1],
34907ec681f3Smrg                          lp_build_const_int32(gallivm, end_coroutine),
34917ec681f3Smrg                          NULL, LLVMIntEQ);
34927ec681f3Smrg   LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
34937ec681f3Smrg
34947ec681f3Smrg   block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "entry");
34957ec681f3Smrg   LLVMPositionBuilderAtEnd(builder, block);
34967ec681f3Smrg
34977ec681f3Smrg   context_ptr = LLVMGetParam(variant_coro, 0);
34987ec681f3Smrg   input_array = LLVMGetParam(variant_coro, 1);
34997ec681f3Smrg   output_array = LLVMGetParam(variant_coro, 2);
35007ec681f3Smrg   prim_id = LLVMGetParam(variant_coro, 3);
35017ec681f3Smrg   patch_vertices_in = LLVMGetParam(variant_coro, 4);
35027ec681f3Smrg   view_index = LLVMGetParam(variant_coro, 5);
35037ec681f3Smrg
35047ec681f3Smrg   consts_ptr = draw_tcs_jit_context_constants(variant->gallivm, context_ptr);
35057ec681f3Smrg   num_consts_ptr =
35067ec681f3Smrg      draw_tcs_jit_context_num_constants(variant->gallivm, context_ptr);
35077ec681f3Smrg
35087ec681f3Smrg   ssbos_ptr = draw_tcs_jit_context_ssbos(variant->gallivm, context_ptr);
35097ec681f3Smrg   num_ssbos_ptr =
35107ec681f3Smrg      draw_tcs_jit_context_num_ssbos(variant->gallivm, context_ptr);
35117ec681f3Smrg   sampler = draw_llvm_sampler_soa_create(variant->key.samplers, variant->key.nr_samplers);
35127ec681f3Smrg   image = draw_llvm_image_soa_create(draw_tcs_llvm_variant_key_images(&variant->key),
35137ec681f3Smrg                                      variant->key.nr_images);
35147ec681f3Smrg
35157ec681f3Smrg   LLVMValueRef counter = LLVMGetParam(variant_coro, 6);
35167ec681f3Smrg   LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length));
35177ec681f3Smrg   for (i = 0; i < vector_length; i++) {
35187ec681f3Smrg      LLVMValueRef loop_iter = lp_build_const_int32(gallivm, i);
35197ec681f3Smrg      LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), loop_iter, "");
35207ec681f3Smrg      invocvec = LLVMBuildInsertElement(builder, invocvec, idx, loop_iter, "");
35217ec681f3Smrg   }
35227ec681f3Smrg
35237ec681f3Smrg   system_values.invocation_id = invocvec;
35247ec681f3Smrg   system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
35257ec681f3Smrg   system_values.view_index = view_index;
35267ec681f3Smrg   system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
35277ec681f3Smrg   tcs_iface.input = input_array;
35287ec681f3Smrg   tcs_iface.output = output_array;
35297ec681f3Smrg   tcs_iface.base.emit_fetch_input = draw_tcs_llvm_emit_fetch_input;
35307ec681f3Smrg   tcs_iface.base.emit_fetch_output = draw_tcs_llvm_emit_fetch_output;
35317ec681f3Smrg   tcs_iface.base.emit_store_output = draw_tcs_llvm_emit_store_output;
35327ec681f3Smrg
35337ec681f3Smrg
35347ec681f3Smrg   {
35357ec681f3Smrg      LLVMValueRef coro_id = lp_build_coro_id(gallivm);
35367ec681f3Smrg      LLVMValueRef coro_hdl = lp_build_coro_begin_alloc_mem(gallivm, coro_id);
35377ec681f3Smrg
35387ec681f3Smrg      mask_val = generate_tcs_mask_value(variant, tcs_type, count, LLVMBuildMul(builder, counter, step, ""));
35397ec681f3Smrg      lp_build_mask_begin(&mask, gallivm, tcs_type, mask_val);
35407ec681f3Smrg
35417ec681f3Smrg      struct lp_build_coro_suspend_info coro_info;
35427ec681f3Smrg
35437ec681f3Smrg      LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "suspend");
35447ec681f3Smrg      LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "cleanup");
35457ec681f3Smrg
35467ec681f3Smrg      coro_info.suspend = sus_block;
35477ec681f3Smrg      coro_info.cleanup = clean_block;
35487ec681f3Smrg
35497ec681f3Smrg      struct lp_build_tgsi_params params;
35507ec681f3Smrg      memset(&params, 0, sizeof(params));
35517ec681f3Smrg
35527ec681f3Smrg      params.type = tcs_type;
35537ec681f3Smrg      params.mask = &mask;
35547ec681f3Smrg      params.consts_ptr = consts_ptr;
35557ec681f3Smrg      params.const_sizes_ptr = num_consts_ptr;
35567ec681f3Smrg      params.system_values = &system_values;
35577ec681f3Smrg      params.context_ptr = context_ptr;
35587ec681f3Smrg      params.sampler = sampler;
35597ec681f3Smrg      params.info = &llvm->draw->tcs.tess_ctrl_shader->info;
35607ec681f3Smrg      params.ssbo_ptr = ssbos_ptr;
35617ec681f3Smrg      params.ssbo_sizes_ptr = num_ssbos_ptr;
35627ec681f3Smrg      params.image = image;
35637ec681f3Smrg      params.coro = &coro_info;
35647ec681f3Smrg      params.tcs_iface = &tcs_iface.base;
35657ec681f3Smrg      params.aniso_filter_table = draw_tcs_jit_context_aniso_filter_table(gallivm, context_ptr);
35667ec681f3Smrg
35677ec681f3Smrg      lp_build_nir_soa(variant->gallivm,
35687ec681f3Smrg                       llvm->draw->tcs.tess_ctrl_shader->state.ir.nir,
35697ec681f3Smrg                       &params, NULL);
35707ec681f3Smrg
35717ec681f3Smrg      lp_build_mask_end(&mask);
35727ec681f3Smrg
35737ec681f3Smrg      lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
35747ec681f3Smrg      LLVMPositionBuilderAtEnd(builder, clean_block);
35757ec681f3Smrg
35767ec681f3Smrg      lp_build_coro_free_mem(gallivm, coro_id, coro_hdl);
35777ec681f3Smrg
35787ec681f3Smrg      LLVMBuildBr(builder, sus_block);
35797ec681f3Smrg      LLVMPositionBuilderAtEnd(builder, sus_block);
35807ec681f3Smrg
35817ec681f3Smrg      lp_build_coro_end(gallivm, coro_hdl);
35827ec681f3Smrg      LLVMBuildRet(builder, coro_hdl);
35837ec681f3Smrg   }
35847ec681f3Smrg
35857ec681f3Smrg   sampler->destroy(sampler);
35867ec681f3Smrg   image->destroy(image);
35877ec681f3Smrg   gallivm_verify_function(gallivm, variant_func);
35887ec681f3Smrg   gallivm_verify_function(gallivm, variant_coro);
35897ec681f3Smrg}
35907ec681f3Smrg
35917ec681f3Smrgstruct draw_tcs_llvm_variant *
35927ec681f3Smrgdraw_tcs_llvm_create_variant(struct draw_llvm *llvm,
35937ec681f3Smrg                             unsigned num_outputs,
35947ec681f3Smrg                             const struct draw_tcs_llvm_variant_key *key)
35957ec681f3Smrg{
35967ec681f3Smrg   struct draw_tcs_llvm_variant *variant;
35977ec681f3Smrg   struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(llvm->draw->tcs.tess_ctrl_shader);
35987ec681f3Smrg   char module_name[64];
35997ec681f3Smrg   unsigned char ir_sha1_cache_key[20];
36007ec681f3Smrg   struct lp_cached_code cached = { 0 };
36017ec681f3Smrg   bool needs_caching = false;
36027ec681f3Smrg
36037ec681f3Smrg   variant = MALLOC(sizeof *variant +
36047ec681f3Smrg                    shader->variant_key_size - sizeof variant->key);
36057ec681f3Smrg   if (!variant)
36067ec681f3Smrg      return NULL;
36077ec681f3Smrg
36087ec681f3Smrg   variant->llvm = llvm;
36097ec681f3Smrg   variant->shader = shader;
36107ec681f3Smrg
36117ec681f3Smrg   snprintf(module_name, sizeof(module_name), "draw_llvm_tcs_variant%u",
36127ec681f3Smrg            variant->shader->variants_cached);
36137ec681f3Smrg
36147ec681f3Smrg   memcpy(&variant->key, key, shader->variant_key_size);
36157ec681f3Smrg
36167ec681f3Smrg   if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
36177ec681f3Smrg      draw_get_ir_cache_key(shader->base.state.ir.nir,
36187ec681f3Smrg                            key,
36197ec681f3Smrg                            shader->variant_key_size,
36207ec681f3Smrg                            num_outputs,
36217ec681f3Smrg                            ir_sha1_cache_key);
36227ec681f3Smrg
36237ec681f3Smrg      llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
36247ec681f3Smrg                                         &cached,
36257ec681f3Smrg                                         ir_sha1_cache_key);
36267ec681f3Smrg      if (!cached.data_size)
36277ec681f3Smrg         needs_caching = true;
36287ec681f3Smrg   }
36297ec681f3Smrg
36307ec681f3Smrg   variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
36317ec681f3Smrg
36327ec681f3Smrg   create_tcs_jit_types(variant);
36337ec681f3Smrg
36347ec681f3Smrg   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
36357ec681f3Smrg      nir_print_shader(llvm->draw->tcs.tess_ctrl_shader->state.ir.nir, stderr);
36367ec681f3Smrg      draw_tcs_llvm_dump_variant_key(&variant->key);
36377ec681f3Smrg   }
36387ec681f3Smrg
36397ec681f3Smrg   lp_build_coro_declare_malloc_hooks(variant->gallivm);
36407ec681f3Smrg   draw_tcs_llvm_generate(llvm, variant);
36417ec681f3Smrg
36427ec681f3Smrg   gallivm_compile_module(variant->gallivm);
36437ec681f3Smrg
36447ec681f3Smrg   lp_build_coro_add_malloc_hooks(variant->gallivm);
36457ec681f3Smrg   variant->jit_func = (draw_tcs_jit_func)
36467ec681f3Smrg      gallivm_jit_function(variant->gallivm, variant->function);
36477ec681f3Smrg
36487ec681f3Smrg   if (needs_caching)
36497ec681f3Smrg      llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
36507ec681f3Smrg                                           &cached,
36517ec681f3Smrg                                           ir_sha1_cache_key);
36527ec681f3Smrg   gallivm_free_ir(variant->gallivm);
36537ec681f3Smrg
36547ec681f3Smrg   variant->list_item_global.base = variant;
36557ec681f3Smrg   variant->list_item_local.base = variant;
36567ec681f3Smrg   /*variant->no = */shader->variants_created++;
36577ec681f3Smrg   variant->list_item_global.base = variant;
36587ec681f3Smrg
36597ec681f3Smrg   return variant;
36607ec681f3Smrg}
36617ec681f3Smrg
36627ec681f3Smrgvoid
36637ec681f3Smrgdraw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant *variant)
36647ec681f3Smrg{
36657ec681f3Smrg   struct draw_llvm *llvm = variant->llvm;
36667ec681f3Smrg
36677ec681f3Smrg   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
36687ec681f3Smrg      debug_printf("Deleting TCS variant: %u tcs variants,\t%u total variants\n",
36697ec681f3Smrg                    variant->shader->variants_cached, llvm->nr_tcs_variants);
36707ec681f3Smrg   }
36717ec681f3Smrg
36727ec681f3Smrg   gallivm_destroy(variant->gallivm);
36737ec681f3Smrg
36747ec681f3Smrg   remove_from_list(&variant->list_item_local);
36757ec681f3Smrg   variant->shader->variants_cached--;
36767ec681f3Smrg   remove_from_list(&variant->list_item_global);
36777ec681f3Smrg   llvm->nr_tcs_variants--;
36787ec681f3Smrg   FREE(variant);
36797ec681f3Smrg}
36807ec681f3Smrg
36817ec681f3Smrgstruct draw_tcs_llvm_variant_key *
36827ec681f3Smrgdraw_tcs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
36837ec681f3Smrg{
36847ec681f3Smrg   unsigned i;
36857ec681f3Smrg   struct draw_tcs_llvm_variant_key *key;
36867ec681f3Smrg   struct draw_sampler_static_state *draw_sampler;
36877ec681f3Smrg   struct draw_image_static_state *draw_image;
36887ec681f3Smrg
36897ec681f3Smrg   key = (struct draw_tcs_llvm_variant_key *)store;
36907ec681f3Smrg
36917ec681f3Smrg   memset(key, 0, offsetof(struct draw_tcs_llvm_variant_key, samplers[0]));
36927ec681f3Smrg
36937ec681f3Smrg   /* All variants of this shader will have the same value for
36947ec681f3Smrg    * nr_samplers.  Not yet trying to compact away holes in the
36957ec681f3Smrg    * sampler array.
36967ec681f3Smrg    */
36977ec681f3Smrg   key->nr_samplers = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
36987ec681f3Smrg   if (llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
36997ec681f3Smrg      key->nr_sampler_views =
37007ec681f3Smrg         llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
37017ec681f3Smrg   }
37027ec681f3Smrg   else {
37037ec681f3Smrg      key->nr_sampler_views = key->nr_samplers;
37047ec681f3Smrg   }
37057ec681f3Smrg
37067ec681f3Smrg   key->nr_images = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
37077ec681f3Smrg
37087ec681f3Smrg   draw_sampler = key->samplers;
37097ec681f3Smrg
37107ec681f3Smrg   memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
37117ec681f3Smrg
37127ec681f3Smrg   for (i = 0 ; i < key->nr_samplers; i++) {
37137ec681f3Smrg      lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
37147ec681f3Smrg                                      llvm->draw->samplers[PIPE_SHADER_TESS_CTRL][i]);
37157ec681f3Smrg   }
37167ec681f3Smrg   for (i = 0 ; i < key->nr_sampler_views; i++) {
37177ec681f3Smrg      lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
37187ec681f3Smrg                                      llvm->draw->sampler_views[PIPE_SHADER_TESS_CTRL][i]);
37197ec681f3Smrg   }
37207ec681f3Smrg
37217ec681f3Smrg   draw_image = draw_tcs_llvm_variant_key_images(key);
37227ec681f3Smrg   memset(draw_image, 0,
37237ec681f3Smrg          key->nr_images * sizeof *draw_image);
37247ec681f3Smrg   for (i = 0; i < key->nr_images; i++) {
37257ec681f3Smrg      lp_sampler_static_texture_state_image(&draw_image[i].image_state,
37267ec681f3Smrg                                            llvm->draw->images[PIPE_SHADER_TESS_CTRL][i]);
37277ec681f3Smrg   }
37287ec681f3Smrg   return key;
37297ec681f3Smrg}
37307ec681f3Smrg
37317ec681f3Smrgvoid
37327ec681f3Smrgdraw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key *key)
37337ec681f3Smrg{
37347ec681f3Smrg   unsigned i;
37357ec681f3Smrg   struct draw_sampler_static_state *sampler = key->samplers;
37367ec681f3Smrg   struct draw_image_static_state *image = draw_tcs_llvm_variant_key_images(key);
37377ec681f3Smrg   for (i = 0 ; i < key->nr_sampler_views; i++) {
37387ec681f3Smrg      debug_printf("sampler[%i].src_format = %s\n", i,
37397ec681f3Smrg                   util_format_name(sampler[i].texture_state.format));
37407ec681f3Smrg   }
37417ec681f3Smrg
37427ec681f3Smrg   for (i = 0 ; i < key->nr_images; i++)
37437ec681f3Smrg      debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
37447ec681f3Smrg
37457ec681f3Smrg}
37467ec681f3Smrg
37477ec681f3Smrgstatic void
37487ec681f3Smrgcreate_tes_jit_types(struct draw_tes_llvm_variant *var)
37497ec681f3Smrg{
37507ec681f3Smrg   struct gallivm_state *gallivm = var->gallivm;
37517ec681f3Smrg   LLVMTypeRef texture_type, sampler_type, image_type, context_type;
37527ec681f3Smrg
37537ec681f3Smrg   texture_type = create_jit_texture_type(gallivm, "texture");
37547ec681f3Smrg   sampler_type = create_jit_sampler_type(gallivm, "sampler");
37557ec681f3Smrg   image_type = create_jit_image_type(gallivm, "image");
37567ec681f3Smrg
37577ec681f3Smrg   context_type = create_tes_jit_context_type(gallivm,
37587ec681f3Smrg                                              0,
37597ec681f3Smrg                                              texture_type, sampler_type,
37607ec681f3Smrg                                              image_type,
37617ec681f3Smrg                                              "draw_tes_jit_context");
37627ec681f3Smrg   var->context_ptr_type = LLVMPointerType(context_type, 0);
37637ec681f3Smrg
37647ec681f3Smrg   var->input_array_type = create_tes_jit_input_type(gallivm);
37657ec681f3Smrg}
37667ec681f3Smrg
37677ec681f3Smrgstatic LLVMTypeRef
37687ec681f3Smrgget_tes_context_ptr_type(struct draw_tes_llvm_variant *variant)
37697ec681f3Smrg{
37707ec681f3Smrg   if (!variant->context_ptr_type)
37717ec681f3Smrg      create_tes_jit_types(variant);
37727ec681f3Smrg   return variant->context_ptr_type;
37737ec681f3Smrg}
37747ec681f3Smrg
37757ec681f3Smrgstatic LLVMValueRef
37767ec681f3Smrggenerate_tes_mask_value(struct draw_tes_llvm_variant *variant,
37777ec681f3Smrg                        struct lp_type tes_type, LLVMValueRef limit, LLVMValueRef loop_counter)
37787ec681f3Smrg{
37797ec681f3Smrg   struct gallivm_state *gallivm = variant->gallivm;
37807ec681f3Smrg   LLVMBuilderRef builder = gallivm->builder;
37817ec681f3Smrg   struct lp_type mask_type = lp_int_type(tes_type);
37827ec681f3Smrg   LLVMValueRef num_prims;
37837ec681f3Smrg   LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
37847ec681f3Smrg   unsigned i;
37857ec681f3Smrg
37867ec681f3Smrg   num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
37877ec681f3Smrg   for (i = 0; i < tes_type.length; i++) {
37887ec681f3Smrg      LLVMValueRef idx = lp_build_const_int32(gallivm, i);
37897ec681f3Smrg      mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
37907ec681f3Smrg   }
37917ec681f3Smrg   mask_val = lp_build_compare(gallivm, mask_type,
37927ec681f3Smrg                               PIPE_FUNC_GREATER, num_prims, mask_val);
37937ec681f3Smrg
37947ec681f3Smrg   return mask_val;
37957ec681f3Smrg}
37967ec681f3Smrg
37977ec681f3Smrgstatic LLVMValueRef
37987ec681f3Smrgdraw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface *tes_iface,
37997ec681f3Smrg                                 struct lp_build_context *bld,
38007ec681f3Smrg                                 boolean is_vindex_indirect,
38017ec681f3Smrg                                 LLVMValueRef vertex_index,
38027ec681f3Smrg                                 boolean is_aindex_indirect,
38037ec681f3Smrg                                 LLVMValueRef attrib_index,
38047ec681f3Smrg                                 boolean is_sindex_indirect,
38057ec681f3Smrg                                 LLVMValueRef swizzle_index)
38067ec681f3Smrg{
38077ec681f3Smrg   const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
38087ec681f3Smrg   struct gallivm_state *gallivm = bld->gallivm;
38097ec681f3Smrg   LLVMBuilderRef builder = gallivm->builder;
38107ec681f3Smrg   LLVMValueRef indices[3];
38117ec681f3Smrg   LLVMValueRef res;
38127ec681f3Smrg   struct lp_type type = bld->type;
38137ec681f3Smrg
38147ec681f3Smrg   if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
38157ec681f3Smrg      int i;
38167ec681f3Smrg
38177ec681f3Smrg      res = bld->zero;
38187ec681f3Smrg
38197ec681f3Smrg      for (i = 0; i < type.length; ++i) {
38207ec681f3Smrg         LLVMValueRef idx = lp_build_const_int32(gallivm, i);
38217ec681f3Smrg         LLVMValueRef vert_chan_index = vertex_index;
38227ec681f3Smrg         LLVMValueRef attr_chan_index = attrib_index;
38237ec681f3Smrg         LLVMValueRef swiz_chan_index = swizzle_index;
38247ec681f3Smrg         LLVMValueRef channel_vec;
38257ec681f3Smrg
38267ec681f3Smrg         if (is_vindex_indirect) {
38277ec681f3Smrg            vert_chan_index = LLVMBuildExtractElement(builder,
38287ec681f3Smrg                                                      vertex_index, idx, "");
38297ec681f3Smrg         }
38307ec681f3Smrg         if (is_aindex_indirect) {
38317ec681f3Smrg            attr_chan_index = LLVMBuildExtractElement(builder,
38327ec681f3Smrg                                                      attrib_index, idx, "");
38337ec681f3Smrg         }
38347ec681f3Smrg         if (is_sindex_indirect) {
38357ec681f3Smrg            swiz_chan_index = LLVMBuildExtractElement(builder,
38367ec681f3Smrg                                                      swizzle_index, idx, "");
38377ec681f3Smrg         }
38387ec681f3Smrg
38397ec681f3Smrg         indices[0] = vert_chan_index;
38407ec681f3Smrg         indices[1] = attr_chan_index;
38417ec681f3Smrg         indices[2] = swiz_chan_index;
38427ec681f3Smrg
38437ec681f3Smrg         channel_vec = LLVMBuildGEP(builder, tes->input, indices, 3, "");
38447ec681f3Smrg         channel_vec = LLVMBuildLoad(builder, channel_vec, "");
38457ec681f3Smrg
38467ec681f3Smrg         res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
38477ec681f3Smrg      }
38487ec681f3Smrg   } else {
38497ec681f3Smrg      indices[0] = vertex_index;
38507ec681f3Smrg      indices[1] = attrib_index;
38517ec681f3Smrg      indices[2] = swizzle_index;
38527ec681f3Smrg
38537ec681f3Smrg      res = LLVMBuildGEP(builder, tes->input, indices, 3, "");
38547ec681f3Smrg      res = LLVMBuildLoad(builder, res, "");
38557ec681f3Smrg      res = lp_build_broadcast_scalar(bld, res);
38567ec681f3Smrg   }
38577ec681f3Smrg   return res;
38587ec681f3Smrg}
38597ec681f3Smrg
38607ec681f3Smrgstatic LLVMValueRef
38617ec681f3Smrgdraw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
38627ec681f3Smrg                                struct lp_build_context *bld,
38637ec681f3Smrg                                boolean is_aindex_indirect,
38647ec681f3Smrg                                LLVMValueRef attrib_index,
38657ec681f3Smrg                                LLVMValueRef swizzle_index)
38667ec681f3Smrg{
38677ec681f3Smrg   const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
38687ec681f3Smrg   struct gallivm_state *gallivm = bld->gallivm;
38697ec681f3Smrg   LLVMBuilderRef builder = gallivm->builder;
38707ec681f3Smrg   LLVMValueRef indices[3];
38717ec681f3Smrg   LLVMValueRef res;
38727ec681f3Smrg   struct lp_type type = bld->type;
38737ec681f3Smrg
38747ec681f3Smrg   if (is_aindex_indirect) {
38757ec681f3Smrg      int i;
38767ec681f3Smrg
38777ec681f3Smrg      res = bld->zero;
38787ec681f3Smrg
38797ec681f3Smrg      for (i = 0; i < type.length; ++i) {
38807ec681f3Smrg         LLVMValueRef idx = lp_build_const_int32(gallivm, i);
38817ec681f3Smrg         LLVMValueRef attr_chan_index = attrib_index;
38827ec681f3Smrg         LLVMValueRef channel_vec;
38837ec681f3Smrg
38847ec681f3Smrg         if (is_aindex_indirect) {
38857ec681f3Smrg            attr_chan_index = LLVMBuildExtractElement(builder,
38867ec681f3Smrg                                                      attrib_index, idx, "");
38877ec681f3Smrg         }
38887ec681f3Smrg
38897ec681f3Smrg         indices[0] = lp_build_const_int32(gallivm, 0);
38907ec681f3Smrg         indices[1] = attr_chan_index;
38917ec681f3Smrg         indices[2] = swizzle_index;
38927ec681f3Smrg
38937ec681f3Smrg         channel_vec = LLVMBuildGEP(builder, tes->input, indices, 3, "");
38947ec681f3Smrg         channel_vec = LLVMBuildLoad(builder, channel_vec, "");
38957ec681f3Smrg
38967ec681f3Smrg         res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
38977ec681f3Smrg      }
38987ec681f3Smrg   } else {
38997ec681f3Smrg      indices[0] = lp_build_const_int32(gallivm, 0);
39007ec681f3Smrg      indices[1] = attrib_index;
39017ec681f3Smrg      indices[2] = swizzle_index;
39027ec681f3Smrg
39037ec681f3Smrg      res = LLVMBuildGEP(builder, tes->input, indices, 3, "");
39047ec681f3Smrg      res = LLVMBuildLoad(builder, res, "");
39057ec681f3Smrg      res = lp_build_broadcast_scalar(bld, res);
39067ec681f3Smrg   }
39077ec681f3Smrg   return res;
39087ec681f3Smrg}
39097ec681f3Smrg
39107ec681f3Smrgstatic void
39117ec681f3Smrgdraw_tes_llvm_generate(struct draw_llvm *llvm,
39127ec681f3Smrg                       struct draw_tes_llvm_variant *variant)
39137ec681f3Smrg{
39147ec681f3Smrg   struct gallivm_state *gallivm = variant->gallivm;
39157ec681f3Smrg   LLVMContextRef context = gallivm->context;
39167ec681f3Smrg   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
39177ec681f3Smrg   LLVMTypeRef flt_type = LLVMFloatTypeInContext(context);
39187ec681f3Smrg   LLVMTypeRef arg_types[11];
39197ec681f3Smrg   LLVMTypeRef func_type;
39207ec681f3Smrg   LLVMValueRef variant_func;
39217ec681f3Smrg   LLVMValueRef context_ptr;
39227ec681f3Smrg   LLVMValueRef tess_coord[2], io_ptr, input_array, num_tess_coord;
39237ec681f3Smrg   LLVMValueRef view_index;
39247ec681f3Smrg   LLVMValueRef tess_inner, tess_outer, prim_id, patch_vertices_in;
39257ec681f3Smrg   LLVMBasicBlockRef block;
39267ec681f3Smrg   LLVMBuilderRef builder;
39277ec681f3Smrg   LLVMValueRef mask_val;
39287ec681f3Smrg   struct lp_build_context bld, bldvec;
39297ec681f3Smrg   struct lp_build_sampler_soa *sampler = 0;
39307ec681f3Smrg   struct lp_build_image_soa *image = NULL;
39317ec681f3Smrg   struct lp_bld_tgsi_system_values system_values;
39327ec681f3Smrg   char func_name[64];
39337ec681f3Smrg   unsigned i;
39347ec681f3Smrg   struct draw_tes_llvm_iface tes_iface;
39357ec681f3Smrg   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
39367ec681f3Smrg   struct lp_build_mask_context mask;
39377ec681f3Smrg   LLVMValueRef consts_ptr, num_consts_ptr;
39387ec681f3Smrg   LLVMValueRef ssbos_ptr, num_ssbos_ptr;
39397ec681f3Smrg   LLVMValueRef step;
39407ec681f3Smrg   struct lp_type tes_type;
39417ec681f3Smrg   unsigned vector_length = variant->shader->base.vector_length;
39427ec681f3Smrg
39437ec681f3Smrg   memset(&system_values, 0, sizeof(system_values));
39447ec681f3Smrg   memset(&outputs, 0, sizeof(outputs));
39457ec681f3Smrg
39467ec681f3Smrg   snprintf(func_name, sizeof(func_name), "draw_llvm_tes_variant");
39477ec681f3Smrg
39487ec681f3Smrg   arg_types[0] = get_tes_context_ptr_type(variant);    /* context */
39497ec681f3Smrg   arg_types[1] = variant->input_array_type;           /* input */
39507ec681f3Smrg   arg_types[2] = variant->vertex_header_ptr_type;
39517ec681f3Smrg   arg_types[3] = int32_type;
39527ec681f3Smrg   arg_types[4] = int32_type;
39537ec681f3Smrg   arg_types[5] = LLVMPointerType(flt_type, 0);
39547ec681f3Smrg   arg_types[6] = LLVMPointerType(flt_type, 0);
39557ec681f3Smrg   arg_types[7] = LLVMPointerType(LLVMArrayType(flt_type, 4), 0);
39567ec681f3Smrg   arg_types[8] = LLVMPointerType(LLVMArrayType(flt_type, 2), 0);
39577ec681f3Smrg   arg_types[9] = int32_type;
39587ec681f3Smrg   arg_types[10] = int32_type;
39597ec681f3Smrg
39607ec681f3Smrg   func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
39617ec681f3Smrg   variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
39627ec681f3Smrg
39637ec681f3Smrg   variant->function = variant_func;
39647ec681f3Smrg   LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
39657ec681f3Smrg
39667ec681f3Smrg   for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
39677ec681f3Smrg      if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
39687ec681f3Smrg         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
39697ec681f3Smrg
39707ec681f3Smrg   if (gallivm->cache && gallivm->cache->data_size)
39717ec681f3Smrg      return;
39727ec681f3Smrg   context_ptr               = LLVMGetParam(variant_func, 0);
39737ec681f3Smrg   input_array               = LLVMGetParam(variant_func, 1);
39747ec681f3Smrg   io_ptr                    = LLVMGetParam(variant_func, 2);
39757ec681f3Smrg   prim_id                   = LLVMGetParam(variant_func, 3);
39767ec681f3Smrg   num_tess_coord            = LLVMGetParam(variant_func, 4);
39777ec681f3Smrg   tess_coord[0]             = LLVMGetParam(variant_func, 5);
39787ec681f3Smrg   tess_coord[1]             = LLVMGetParam(variant_func, 6);
39797ec681f3Smrg   tess_outer                = LLVMGetParam(variant_func, 7);
39807ec681f3Smrg   tess_inner                = LLVMGetParam(variant_func, 8);
39817ec681f3Smrg   patch_vertices_in         = LLVMGetParam(variant_func, 9);
39827ec681f3Smrg   view_index                = LLVMGetParam(variant_func, 10);
39837ec681f3Smrg
39847ec681f3Smrg   lp_build_name(context_ptr, "context");
39857ec681f3Smrg   lp_build_name(input_array, "input");
39867ec681f3Smrg   lp_build_name(io_ptr, "io");
39877ec681f3Smrg   lp_build_name(prim_id, "prim_id");
39887ec681f3Smrg   lp_build_name(num_tess_coord, "num_tess_coord");
39897ec681f3Smrg   lp_build_name(tess_coord[0], "tess_coord[0]");
39907ec681f3Smrg   lp_build_name(tess_coord[1], "tess_coord[1]");
39917ec681f3Smrg   lp_build_name(tess_outer, "tess_outer");
39927ec681f3Smrg   lp_build_name(tess_inner, "tess_inner");
39937ec681f3Smrg   lp_build_name(patch_vertices_in, "patch_vertices_in");
39947ec681f3Smrg   lp_build_name(view_index, "view_index");
39957ec681f3Smrg
39967ec681f3Smrg   tes_iface.base.fetch_vertex_input = draw_tes_llvm_fetch_vertex_input;
39977ec681f3Smrg   tes_iface.base.fetch_patch_input = draw_tes_llvm_fetch_patch_input;
39987ec681f3Smrg   tes_iface.input = input_array;
39997ec681f3Smrg   tes_iface.variant = variant;
40007ec681f3Smrg
40017ec681f3Smrg   block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
40027ec681f3Smrg   builder = gallivm->builder;
40037ec681f3Smrg   LLVMPositionBuilderAtEnd(builder, block);
40047ec681f3Smrg
40057ec681f3Smrg   lp_build_context_init(&bld, gallivm, lp_type_int(32));
40067ec681f3Smrg
40077ec681f3Smrg   memset(&tes_type, 0, sizeof tes_type);
40087ec681f3Smrg   tes_type.floating = TRUE; /* floating point values */
40097ec681f3Smrg   tes_type.sign = TRUE;     /* values are signed */
40107ec681f3Smrg   tes_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
40117ec681f3Smrg   tes_type.width = 32;      /* 32-bit float */
40127ec681f3Smrg   tes_type.length = vector_length;
40137ec681f3Smrg
40147ec681f3Smrg   lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tes_type));
40157ec681f3Smrg   consts_ptr = draw_tes_jit_context_constants(variant->gallivm, context_ptr);
40167ec681f3Smrg   num_consts_ptr =
40177ec681f3Smrg      draw_tes_jit_context_num_constants(variant->gallivm, context_ptr);
40187ec681f3Smrg
40197ec681f3Smrg   ssbos_ptr = draw_tes_jit_context_ssbos(variant->gallivm, context_ptr);
40207ec681f3Smrg   num_ssbos_ptr =
40217ec681f3Smrg      draw_tes_jit_context_num_ssbos(variant->gallivm, context_ptr);
40227ec681f3Smrg   sampler = draw_llvm_sampler_soa_create(variant->key.samplers, variant->key.nr_samplers);
40237ec681f3Smrg   image = draw_llvm_image_soa_create(draw_tes_llvm_variant_key_images(&variant->key),
40247ec681f3Smrg                                      variant->key.nr_images);
40257ec681f3Smrg   step = lp_build_const_int32(gallivm, vector_length);
40267ec681f3Smrg
40277ec681f3Smrg   system_values.tess_outer = LLVMBuildLoad(builder, tess_outer, "");
40287ec681f3Smrg   system_values.tess_inner = LLVMBuildLoad(builder, tess_inner, "");
40297ec681f3Smrg
40307ec681f3Smrg   system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
40317ec681f3Smrg
40327ec681f3Smrg   system_values.view_index = view_index;
40337ec681f3Smrg
40347ec681f3Smrg   system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
40357ec681f3Smrg
40367ec681f3Smrg   if (variant->key.primid_needed) {
40377ec681f3Smrg      int slot = variant->key.primid_output;
40387ec681f3Smrg      for (unsigned i = 0; i < 4; i++) {
40397ec681f3Smrg         outputs[slot][i] = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, tes_type), "primid");
40407ec681f3Smrg         LLVMBuildStore(builder, system_values.prim_id, outputs[slot][i]);
40417ec681f3Smrg      }
40427ec681f3Smrg   }
40437ec681f3Smrg   struct lp_build_loop_state lp_loop;
40447ec681f3Smrg   lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
40457ec681f3Smrg   {
40467ec681f3Smrg      LLVMValueRef io;
40477ec681f3Smrg
40487ec681f3Smrg      io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, "");
40497ec681f3Smrg      mask_val = generate_tes_mask_value(variant, tes_type, num_tess_coord, lp_loop.counter);
40507ec681f3Smrg      lp_build_mask_begin(&mask, gallivm, tes_type, mask_val);
40517ec681f3Smrg
40527ec681f3Smrg      system_values.tess_coord = LLVMGetUndef(LLVMArrayType(LLVMVectorType(flt_type, vector_length), 3));
40537ec681f3Smrg      for (i = 0; i < 3; i++) {
40547ec681f3Smrg         LLVMValueRef tess_coord_chan = LLVMGetUndef(LLVMVectorType(flt_type, vector_length));
40557ec681f3Smrg         for (unsigned j = 0; j < vector_length; j++) {
40567ec681f3Smrg            LLVMValueRef idx = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, j), "");
40577ec681f3Smrg            LLVMValueRef tc_val;
40587ec681f3Smrg            if (i == 2) {
40597ec681f3Smrg               if (variant->shader->base.prim_mode == PIPE_PRIM_TRIANGLES) {
40607ec681f3Smrg                  tc_val = lp_build_const_float(gallivm, 1.0);
40617ec681f3Smrg                  tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get(builder, tess_coord[0], idx), "");
40627ec681f3Smrg                  tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get(builder, tess_coord[1], idx), "");
40637ec681f3Smrg               } else
40647ec681f3Smrg                  tc_val = lp_build_const_float(gallivm, 0.0);
40657ec681f3Smrg            } else
40667ec681f3Smrg               tc_val = lp_build_pointer_get(builder, tess_coord[i], idx);
40677ec681f3Smrg
40687ec681f3Smrg            tess_coord_chan = LLVMBuildInsertElement(builder, tess_coord_chan, tc_val, lp_build_const_int32(gallivm, j), "");
40697ec681f3Smrg         }
40707ec681f3Smrg         system_values.tess_coord = LLVMBuildInsertValue(builder, system_values.tess_coord, tess_coord_chan, i, "");
40717ec681f3Smrg      }
40727ec681f3Smrg
40737ec681f3Smrg      struct lp_build_tgsi_params params;
40747ec681f3Smrg      memset(&params, 0, sizeof(params));
40757ec681f3Smrg
40767ec681f3Smrg      params.type = tes_type;
40777ec681f3Smrg      params.mask = &mask;
40787ec681f3Smrg      params.consts_ptr = consts_ptr;
40797ec681f3Smrg      params.const_sizes_ptr = num_consts_ptr;
40807ec681f3Smrg      params.system_values = &system_values;
40817ec681f3Smrg      params.context_ptr = context_ptr;
40827ec681f3Smrg      params.sampler = sampler;
40837ec681f3Smrg      params.info = &llvm->draw->tes.tess_eval_shader->info;
40847ec681f3Smrg      params.ssbo_ptr = ssbos_ptr;
40857ec681f3Smrg      params.ssbo_sizes_ptr = num_ssbos_ptr;
40867ec681f3Smrg      params.image = image;
40877ec681f3Smrg      params.tes_iface = &tes_iface.base;
40887ec681f3Smrg      params.aniso_filter_table = draw_tes_jit_context_aniso_filter_table(variant->gallivm, context_ptr);
40897ec681f3Smrg
40907ec681f3Smrg      lp_build_nir_soa(variant->gallivm,
40917ec681f3Smrg                       llvm->draw->tes.tess_eval_shader->state.ir.nir,
40927ec681f3Smrg                       &params,
40937ec681f3Smrg                       outputs);
40947ec681f3Smrg
40957ec681f3Smrg      lp_build_mask_end(&mask);
40967ec681f3Smrg
40977ec681f3Smrg      if (variant->key.clamp_vertex_color) {
40987ec681f3Smrg         const struct tgsi_shader_info *info = &llvm->draw->tes.tess_eval_shader->info;
40997ec681f3Smrg         do_clamp_vertex_color(variant->gallivm,
41007ec681f3Smrg                               tes_type, info,
41017ec681f3Smrg                               outputs);
41027ec681f3Smrg      }
41037ec681f3Smrg      LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
41047ec681f3Smrg                                                     lp_int_type(tes_type), 0);
41057ec681f3Smrg
41067ec681f3Smrg      convert_to_aos(gallivm, io, NULL, outputs, clipmask,
41077ec681f3Smrg                     draw_total_tes_outputs(llvm->draw), tes_type, FALSE);
41087ec681f3Smrg   }
41097ec681f3Smrg   lp_build_loop_end_cond(&lp_loop, num_tess_coord, step, LLVMIntUGE);
41107ec681f3Smrg   sampler->destroy(sampler);
41117ec681f3Smrg   image->destroy(image);
41127ec681f3Smrg
41137ec681f3Smrg   LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
41147ec681f3Smrg   gallivm_verify_function(gallivm, variant_func);
41157ec681f3Smrg}
41167ec681f3Smrg
41177ec681f3Smrgstruct draw_tes_llvm_variant *
41187ec681f3Smrgdraw_tes_llvm_create_variant(struct draw_llvm *llvm,
41197ec681f3Smrg                             unsigned num_outputs,
41207ec681f3Smrg                             const struct draw_tes_llvm_variant_key *key)
41217ec681f3Smrg{
41227ec681f3Smrg   struct draw_tes_llvm_variant *variant;
41237ec681f3Smrg   struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(llvm->draw->tes.tess_eval_shader);
41247ec681f3Smrg   LLVMTypeRef vertex_header;
41257ec681f3Smrg   char module_name[64];
41267ec681f3Smrg   unsigned char ir_sha1_cache_key[20];
41277ec681f3Smrg   struct lp_cached_code cached = { 0 };
41287ec681f3Smrg   bool needs_caching = false;
41297ec681f3Smrg
41307ec681f3Smrg   variant = MALLOC(sizeof *variant +
41317ec681f3Smrg                    shader->variant_key_size - sizeof variant->key);
41327ec681f3Smrg   if (!variant)
41337ec681f3Smrg      return NULL;
41347ec681f3Smrg
41357ec681f3Smrg   variant->llvm = llvm;
41367ec681f3Smrg   variant->shader = shader;
41377ec681f3Smrg
41387ec681f3Smrg   snprintf(module_name, sizeof(module_name), "draw_llvm_tes_variant%u",
41397ec681f3Smrg            variant->shader->variants_cached);
41407ec681f3Smrg
41417ec681f3Smrg   memcpy(&variant->key, key, shader->variant_key_size);
41427ec681f3Smrg   if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
41437ec681f3Smrg      draw_get_ir_cache_key(shader->base.state.ir.nir,
41447ec681f3Smrg                            key,
41457ec681f3Smrg                            shader->variant_key_size,
41467ec681f3Smrg                            num_outputs,
41477ec681f3Smrg                            ir_sha1_cache_key);
41487ec681f3Smrg
41497ec681f3Smrg      llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
41507ec681f3Smrg                                         &cached,
41517ec681f3Smrg                                         ir_sha1_cache_key);
41527ec681f3Smrg      if (!cached.data_size)
41537ec681f3Smrg         needs_caching = true;
41547ec681f3Smrg   }
41557ec681f3Smrg   variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
41567ec681f3Smrg
41577ec681f3Smrg   create_tes_jit_types(variant);
41587ec681f3Smrg
41597ec681f3Smrg   vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);
41607ec681f3Smrg
41617ec681f3Smrg   variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
41627ec681f3Smrg
41637ec681f3Smrg   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
41647ec681f3Smrg      nir_print_shader(llvm->draw->tes.tess_eval_shader->state.ir.nir, stderr);
41657ec681f3Smrg      draw_tes_llvm_dump_variant_key(&variant->key);
41667ec681f3Smrg   }
41677ec681f3Smrg
41687ec681f3Smrg   draw_tes_llvm_generate(llvm, variant);
41697ec681f3Smrg
41707ec681f3Smrg   gallivm_compile_module(variant->gallivm);
41717ec681f3Smrg
41727ec681f3Smrg   variant->jit_func = (draw_tes_jit_func)
41737ec681f3Smrg      gallivm_jit_function(variant->gallivm, variant->function);
41747ec681f3Smrg
41757ec681f3Smrg   if (needs_caching)
41767ec681f3Smrg      llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
41777ec681f3Smrg                                           &cached,
41787ec681f3Smrg                                           ir_sha1_cache_key);
41797ec681f3Smrg   gallivm_free_ir(variant->gallivm);
41807ec681f3Smrg
41817ec681f3Smrg   variant->list_item_global.base = variant;
41827ec681f3Smrg   variant->list_item_local.base = variant;
41837ec681f3Smrg   /*variant->no = */shader->variants_created++;
41847ec681f3Smrg   variant->list_item_global.base = variant;
41857ec681f3Smrg
41867ec681f3Smrg   return variant;
41877ec681f3Smrg}
41887ec681f3Smrg
41897ec681f3Smrgvoid
41907ec681f3Smrgdraw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant *variant)
41917ec681f3Smrg{
41927ec681f3Smrg   struct draw_llvm *llvm = variant->llvm;
41937ec681f3Smrg
41947ec681f3Smrg   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
41957ec681f3Smrg      debug_printf("Deleting TES variant: %u tes variants,\t%u total variants\n",
41967ec681f3Smrg                    variant->shader->variants_cached, llvm->nr_tes_variants);
41977ec681f3Smrg   }
41987ec681f3Smrg
41997ec681f3Smrg   gallivm_destroy(variant->gallivm);
42007ec681f3Smrg
42017ec681f3Smrg   remove_from_list(&variant->list_item_local);
42027ec681f3Smrg   variant->shader->variants_cached--;
42037ec681f3Smrg   remove_from_list(&variant->list_item_global);
42047ec681f3Smrg   llvm->nr_tes_variants--;
42057ec681f3Smrg   FREE(variant);
42067ec681f3Smrg}
42077ec681f3Smrg
42087ec681f3Smrgstruct draw_tes_llvm_variant_key *
42097ec681f3Smrgdraw_tes_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
42107ec681f3Smrg{
42117ec681f3Smrg   unsigned i;
42127ec681f3Smrg   struct draw_tes_llvm_variant_key *key;
42137ec681f3Smrg   struct draw_sampler_static_state *draw_sampler;
42147ec681f3Smrg   struct draw_image_static_state *draw_image;
42157ec681f3Smrg
42167ec681f3Smrg   key = (struct draw_tes_llvm_variant_key *)store;
42177ec681f3Smrg
42187ec681f3Smrg   memset(key, 0, offsetof(struct draw_tes_llvm_variant_key, samplers[0]));
42197ec681f3Smrg
42207ec681f3Smrg   int primid_output = draw_find_shader_output(llvm->draw, TGSI_SEMANTIC_PRIMID, 0);
42217ec681f3Smrg   if (primid_output >= 0) {
42227ec681f3Smrg      key->primid_output = primid_output;
42237ec681f3Smrg      key->primid_needed = true;
42247ec681f3Smrg   }
42257ec681f3Smrg
42267ec681f3Smrg   key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color &&
42277ec681f3Smrg      llvm->draw->gs.geometry_shader == NULL;
42287ec681f3Smrg
42297ec681f3Smrg   /* All variants of this shader will have the same value for
42307ec681f3Smrg    * nr_samplers.  Not yet trying to compact away holes in the
42317ec681f3Smrg    * sampler array.
42327ec681f3Smrg    */
42337ec681f3Smrg   key->nr_samplers = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
42347ec681f3Smrg   if (llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
42357ec681f3Smrg      key->nr_sampler_views =
42367ec681f3Smrg         llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
42377ec681f3Smrg   }
42387ec681f3Smrg   else {
42397ec681f3Smrg      key->nr_sampler_views = key->nr_samplers;
42407ec681f3Smrg   }
42417ec681f3Smrg
42427ec681f3Smrg   key->nr_images = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
42437ec681f3Smrg
42447ec681f3Smrg   draw_sampler = key->samplers;
42457ec681f3Smrg
42467ec681f3Smrg   memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
42477ec681f3Smrg
42487ec681f3Smrg   for (i = 0 ; i < key->nr_samplers; i++) {
42497ec681f3Smrg      lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
42507ec681f3Smrg                                      llvm->draw->samplers[PIPE_SHADER_TESS_EVAL][i]);
42517ec681f3Smrg   }
42527ec681f3Smrg   for (i = 0 ; i < key->nr_sampler_views; i++) {
42537ec681f3Smrg      lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
42547ec681f3Smrg                                      llvm->draw->sampler_views[PIPE_SHADER_TESS_EVAL][i]);
42557ec681f3Smrg   }
42567ec681f3Smrg
42577ec681f3Smrg   draw_image = draw_tes_llvm_variant_key_images(key);
42587ec681f3Smrg   memset(draw_image, 0,
42597ec681f3Smrg          key->nr_images * sizeof *draw_image);
42607ec681f3Smrg   for (i = 0; i < key->nr_images; i++) {
42617ec681f3Smrg      lp_sampler_static_texture_state_image(&draw_image[i].image_state,
42627ec681f3Smrg                                            llvm->draw->images[PIPE_SHADER_TESS_EVAL][i]);
42637ec681f3Smrg   }
42647ec681f3Smrg   return key;
42657ec681f3Smrg}
42667ec681f3Smrg
42677ec681f3Smrgvoid
42687ec681f3Smrgdraw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key *key)
42697ec681f3Smrg{
42707ec681f3Smrg   unsigned i;
42717ec681f3Smrg   struct draw_sampler_static_state *sampler = key->samplers;
42727ec681f3Smrg   struct draw_image_static_state *image = draw_tes_llvm_variant_key_images(key);
42737ec681f3Smrg
42747ec681f3Smrg   if (key->primid_needed)
42757ec681f3Smrg      debug_printf("prim id output %d\n", key->primid_output);
42767ec681f3Smrg   debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
42777ec681f3Smrg   for (i = 0 ; i < key->nr_sampler_views; i++) {
42787ec681f3Smrg      debug_printf("sampler[%i].src_format = %s\n", i,
42797ec681f3Smrg                   util_format_name(sampler[i].texture_state.format));
42807ec681f3Smrg   }
42817ec681f3Smrg
42827ec681f3Smrg   for (i = 0 ; i < key->nr_images; i++)
42837ec681f3Smrg      debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
42847ec681f3Smrg
4285af69d88dSmrg}
4286