1b8e80941Smrg/****************************************************************************
2b8e80941Smrg * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg ***************************************************************************/
23b8e80941Smrg
24b8e80941Smrg// llvm redefines DEBUG
25b8e80941Smrg#pragma push_macro("DEBUG")
26b8e80941Smrg#undef DEBUG
27b8e80941Smrg#include "JitManager.h"
28b8e80941Smrg#include "llvm-c/Core.h"
29b8e80941Smrg#include "llvm/Support/CBindingWrapping.h"
30b8e80941Smrg#include "llvm/IR/LegacyPassManager.h"
31b8e80941Smrg#pragma pop_macro("DEBUG")
32b8e80941Smrg
33b8e80941Smrg#include "state.h"
34b8e80941Smrg#include "gen_state_llvm.h"
35b8e80941Smrg#include "builder.h"
36b8e80941Smrg#include "functionpasses/passes.h"
37b8e80941Smrg
38b8e80941Smrg#include "tgsi/tgsi_strings.h"
39b8e80941Smrg#include "util/u_format.h"
40b8e80941Smrg#include "util/u_prim.h"
41b8e80941Smrg#include "gallivm/lp_bld_init.h"
42b8e80941Smrg#include "gallivm/lp_bld_flow.h"
43b8e80941Smrg#include "gallivm/lp_bld_struct.h"
44b8e80941Smrg#include "gallivm/lp_bld_tgsi.h"
45b8e80941Smrg
46b8e80941Smrg#include "swr_context.h"
47b8e80941Smrg#include "gen_swr_context_llvm.h"
48b8e80941Smrg#include "swr_resource.h"
49b8e80941Smrg#include "swr_state.h"
50b8e80941Smrg#include "swr_screen.h"
51b8e80941Smrg
52b8e80941Smrgusing namespace SwrJit;
53b8e80941Smrgusing namespace llvm;
54b8e80941Smrg
55b8e80941Smrgstatic unsigned
56b8e80941Smrglocate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
57b8e80941Smrg
58b8e80941Smrgbool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
59b8e80941Smrg{
60b8e80941Smrg   return !memcmp(&lhs, &rhs, sizeof(lhs));
61b8e80941Smrg}
62b8e80941Smrg
63b8e80941Smrgbool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
64b8e80941Smrg{
65b8e80941Smrg   return !memcmp(&lhs, &rhs, sizeof(lhs));
66b8e80941Smrg}
67b8e80941Smrg
68b8e80941Smrgbool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs)
69b8e80941Smrg{
70b8e80941Smrg   return !memcmp(&lhs, &rhs, sizeof(lhs));
71b8e80941Smrg}
72b8e80941Smrg
73b8e80941Smrgbool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs)
74b8e80941Smrg{
75b8e80941Smrg   return !memcmp(&lhs, &rhs, sizeof(lhs));
76b8e80941Smrg}
77b8e80941Smrg
78b8e80941Smrgstatic void
79b8e80941Smrgswr_generate_sampler_key(const struct lp_tgsi_info &info,
80b8e80941Smrg                         struct swr_context *ctx,
81b8e80941Smrg                         enum pipe_shader_type shader_type,
82b8e80941Smrg                         struct swr_jit_sampler_key &key)
83b8e80941Smrg{
84b8e80941Smrg   key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
85b8e80941Smrg
86b8e80941Smrg   for (unsigned i = 0; i < key.nr_samplers; i++) {
87b8e80941Smrg      if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
88b8e80941Smrg         lp_sampler_static_sampler_state(
89b8e80941Smrg            &key.sampler[i].sampler_state,
90b8e80941Smrg            ctx->samplers[shader_type][i]);
91b8e80941Smrg      }
92b8e80941Smrg   }
93b8e80941Smrg
94b8e80941Smrg   /*
95b8e80941Smrg    * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
96b8e80941Smrg    * are dx10-style? Can't really have mixed opcodes, at least not
97b8e80941Smrg    * if we want to skip the holes here (without rescanning tgsi).
98b8e80941Smrg    */
99b8e80941Smrg   if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
100b8e80941Smrg      key.nr_sampler_views =
101b8e80941Smrg         info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
102b8e80941Smrg      for (unsigned i = 0; i < key.nr_sampler_views; i++) {
103b8e80941Smrg         if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) {
104b8e80941Smrg            const struct pipe_sampler_view *view =
105b8e80941Smrg               ctx->sampler_views[shader_type][i];
106b8e80941Smrg            lp_sampler_static_texture_state(
107b8e80941Smrg               &key.sampler[i].texture_state, view);
108b8e80941Smrg            if (view) {
109b8e80941Smrg               struct swr_resource *swr_res = swr_resource(view->texture);
110b8e80941Smrg               const struct util_format_description *desc =
111b8e80941Smrg                  util_format_description(view->format);
112b8e80941Smrg               if (swr_res->has_depth && swr_res->has_stencil &&
113b8e80941Smrg                   !util_format_has_depth(desc))
114b8e80941Smrg                  key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
115b8e80941Smrg            }
116b8e80941Smrg         }
117b8e80941Smrg      }
118b8e80941Smrg   } else {
119b8e80941Smrg      key.nr_sampler_views = key.nr_samplers;
120b8e80941Smrg      for (unsigned i = 0; i < key.nr_sampler_views; i++) {
121b8e80941Smrg         if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
122b8e80941Smrg            const struct pipe_sampler_view *view =
123b8e80941Smrg               ctx->sampler_views[shader_type][i];
124b8e80941Smrg            lp_sampler_static_texture_state(
125b8e80941Smrg               &key.sampler[i].texture_state, view);
126b8e80941Smrg            if (view) {
127b8e80941Smrg               struct swr_resource *swr_res = swr_resource(view->texture);
128b8e80941Smrg               const struct util_format_description *desc =
129b8e80941Smrg                  util_format_description(view->format);
130b8e80941Smrg               if (swr_res->has_depth && swr_res->has_stencil &&
131b8e80941Smrg                   !util_format_has_depth(desc))
132b8e80941Smrg                  key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
133b8e80941Smrg            }
134b8e80941Smrg         }
135b8e80941Smrg      }
136b8e80941Smrg   }
137b8e80941Smrg}
138b8e80941Smrg
139b8e80941Smrgvoid
140b8e80941Smrgswr_generate_fs_key(struct swr_jit_fs_key &key,
141b8e80941Smrg                    struct swr_context *ctx,
142b8e80941Smrg                    swr_fragment_shader *swr_fs)
143b8e80941Smrg{
144b8e80941Smrg   memset(&key, 0, sizeof(key));
145b8e80941Smrg
146b8e80941Smrg   key.nr_cbufs = ctx->framebuffer.nr_cbufs;
147b8e80941Smrg   key.light_twoside = ctx->rasterizer->light_twoside;
148b8e80941Smrg   key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
149b8e80941Smrg
150b8e80941Smrg   struct tgsi_shader_info *pPrevShader;
151b8e80941Smrg   if (ctx->gs)
152b8e80941Smrg      pPrevShader = &ctx->gs->info.base;
153b8e80941Smrg   else
154b8e80941Smrg      pPrevShader = &ctx->vs->info.base;
155b8e80941Smrg
156b8e80941Smrg   memcpy(&key.vs_output_semantic_name,
157b8e80941Smrg          &pPrevShader->output_semantic_name,
158b8e80941Smrg          sizeof(key.vs_output_semantic_name));
159b8e80941Smrg   memcpy(&key.vs_output_semantic_idx,
160b8e80941Smrg          &pPrevShader->output_semantic_index,
161b8e80941Smrg          sizeof(key.vs_output_semantic_idx));
162b8e80941Smrg
163b8e80941Smrg   swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
164b8e80941Smrg
165b8e80941Smrg   key.poly_stipple_enable = ctx->rasterizer->poly_stipple_enable &&
166b8e80941Smrg      ctx->poly_stipple.prim_is_poly;
167b8e80941Smrg}
168b8e80941Smrg
169b8e80941Smrgvoid
170b8e80941Smrgswr_generate_vs_key(struct swr_jit_vs_key &key,
171b8e80941Smrg                    struct swr_context *ctx,
172b8e80941Smrg                    swr_vertex_shader *swr_vs)
173b8e80941Smrg{
174b8e80941Smrg   memset(&key, 0, sizeof(key));
175b8e80941Smrg
176b8e80941Smrg   key.clip_plane_mask =
177b8e80941Smrg      swr_vs->info.base.clipdist_writemask ?
178b8e80941Smrg      swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
179b8e80941Smrg      ctx->rasterizer->clip_plane_enable;
180b8e80941Smrg
181b8e80941Smrg   swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
182b8e80941Smrg}
183b8e80941Smrg
184b8e80941Smrgvoid
185b8e80941Smrgswr_generate_fetch_key(struct swr_jit_fetch_key &key,
186b8e80941Smrg                       struct swr_vertex_element_state *velems)
187b8e80941Smrg{
188b8e80941Smrg   memset(&key, 0, sizeof(key));
189b8e80941Smrg
190b8e80941Smrg   key.fsState = velems->fsState;
191b8e80941Smrg}
192b8e80941Smrg
193b8e80941Smrgvoid
194b8e80941Smrgswr_generate_gs_key(struct swr_jit_gs_key &key,
195b8e80941Smrg                    struct swr_context *ctx,
196b8e80941Smrg                    swr_geometry_shader *swr_gs)
197b8e80941Smrg{
198b8e80941Smrg   memset(&key, 0, sizeof(key));
199b8e80941Smrg
200b8e80941Smrg   struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base;
201b8e80941Smrg
202b8e80941Smrg   memcpy(&key.vs_output_semantic_name,
203b8e80941Smrg          &pPrevShader->output_semantic_name,
204b8e80941Smrg          sizeof(key.vs_output_semantic_name));
205b8e80941Smrg   memcpy(&key.vs_output_semantic_idx,
206b8e80941Smrg          &pPrevShader->output_semantic_index,
207b8e80941Smrg          sizeof(key.vs_output_semantic_idx));
208b8e80941Smrg
209b8e80941Smrg   swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key);
210b8e80941Smrg}
211b8e80941Smrg
212b8e80941Smrgstruct BuilderSWR : public Builder {
213b8e80941Smrg   BuilderSWR(JitManager *pJitMgr, const char *pName)
214b8e80941Smrg      : Builder(pJitMgr)
215b8e80941Smrg   {
216b8e80941Smrg      pJitMgr->SetupNewModule();
217b8e80941Smrg      gallivm = gallivm_create(pName, wrap(&JM()->mContext));
218b8e80941Smrg      pJitMgr->mpCurrentModule = unwrap(gallivm->module);
219b8e80941Smrg   }
220b8e80941Smrg
221b8e80941Smrg   ~BuilderSWR() {
222b8e80941Smrg      gallivm_free_ir(gallivm);
223b8e80941Smrg   }
224b8e80941Smrg
225b8e80941Smrg   void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput,
226b8e80941Smrg                unsigned slot, unsigned channel);
227b8e80941Smrg
228b8e80941Smrg   struct gallivm_state *gallivm;
229b8e80941Smrg   PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
230b8e80941Smrg   PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
231b8e80941Smrg   PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key);
232b8e80941Smrg
233b8e80941Smrg   LLVMValueRef
234b8e80941Smrg   swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
235b8e80941Smrg                           struct lp_build_tgsi_context * bld_base,
236b8e80941Smrg                           boolean is_vindex_indirect,
237b8e80941Smrg                           LLVMValueRef vertex_index,
238b8e80941Smrg                           boolean is_aindex_indirect,
239b8e80941Smrg                           LLVMValueRef attrib_index,
240b8e80941Smrg                           LLVMValueRef swizzle_index);
241b8e80941Smrg   void
242b8e80941Smrg   swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
243b8e80941Smrg                           struct lp_build_tgsi_context * bld_base,
244b8e80941Smrg                           LLVMValueRef (*outputs)[4],
245b8e80941Smrg                           LLVMValueRef emitted_vertices_vec);
246b8e80941Smrg
247b8e80941Smrg   void
248b8e80941Smrg   swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
249b8e80941Smrg                             struct lp_build_tgsi_context * bld_base,
250b8e80941Smrg                             LLVMValueRef verts_per_prim_vec,
251b8e80941Smrg                             LLVMValueRef emitted_prims_vec);
252b8e80941Smrg
253b8e80941Smrg   void
254b8e80941Smrg   swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
255b8e80941Smrg                        struct lp_build_tgsi_context * bld_base,
256b8e80941Smrg                        LLVMValueRef total_emitted_vertices_vec,
257b8e80941Smrg                        LLVMValueRef emitted_prims_vec);
258b8e80941Smrg
259b8e80941Smrg};
260b8e80941Smrg
261b8e80941Smrgstruct swr_gs_llvm_iface {
262b8e80941Smrg   struct lp_build_tgsi_gs_iface base;
263b8e80941Smrg   struct tgsi_shader_info *info;
264b8e80941Smrg
265b8e80941Smrg   BuilderSWR *pBuilder;
266b8e80941Smrg
267b8e80941Smrg   Value *pGsCtx;
268b8e80941Smrg   SWR_GS_STATE *pGsState;
269b8e80941Smrg   uint32_t num_outputs;
270b8e80941Smrg   uint32_t num_verts_per_prim;
271b8e80941Smrg
272b8e80941Smrg   Value *pVtxAttribMap;
273b8e80941Smrg};
274b8e80941Smrg
275b8e80941Smrg// trampoline functions so we can use the builder llvm construction methods
276b8e80941Smrgstatic LLVMValueRef
277b8e80941Smrgswr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
278b8e80941Smrg                           struct lp_build_tgsi_context * bld_base,
279b8e80941Smrg                           boolean is_vindex_indirect,
280b8e80941Smrg                           LLVMValueRef vertex_index,
281b8e80941Smrg                           boolean is_aindex_indirect,
282b8e80941Smrg                           LLVMValueRef attrib_index,
283b8e80941Smrg                           LLVMValueRef swizzle_index)
284b8e80941Smrg{
285b8e80941Smrg    swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
286b8e80941Smrg
287b8e80941Smrg    return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld_base,
288b8e80941Smrg                                                   is_vindex_indirect,
289b8e80941Smrg                                                   vertex_index,
290b8e80941Smrg                                                   is_aindex_indirect,
291b8e80941Smrg                                                   attrib_index,
292b8e80941Smrg                                                   swizzle_index);
293b8e80941Smrg}
294b8e80941Smrg
295b8e80941Smrgstatic void
296b8e80941Smrgswr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
297b8e80941Smrg                           struct lp_build_tgsi_context * bld_base,
298b8e80941Smrg                           LLVMValueRef (*outputs)[4],
299b8e80941Smrg                           LLVMValueRef emitted_vertices_vec)
300b8e80941Smrg{
301b8e80941Smrg    swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
302b8e80941Smrg
303b8e80941Smrg    iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld_base,
304b8e80941Smrg                                            outputs,
305b8e80941Smrg                                            emitted_vertices_vec);
306b8e80941Smrg}
307b8e80941Smrg
308b8e80941Smrgstatic void
309b8e80941Smrgswr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
310b8e80941Smrg                             struct lp_build_tgsi_context * bld_base,
311b8e80941Smrg                             LLVMValueRef verts_per_prim_vec,
312b8e80941Smrg                             LLVMValueRef emitted_prims_vec)
313b8e80941Smrg{
314b8e80941Smrg    swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
315b8e80941Smrg
316b8e80941Smrg    iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld_base,
317b8e80941Smrg                                              verts_per_prim_vec,
318b8e80941Smrg                                              emitted_prims_vec);
319b8e80941Smrg}
320b8e80941Smrg
321b8e80941Smrgstatic void
322b8e80941Smrgswr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
323b8e80941Smrg                        struct lp_build_tgsi_context * bld_base,
324b8e80941Smrg                        LLVMValueRef total_emitted_vertices_vec,
325b8e80941Smrg                        LLVMValueRef emitted_prims_vec)
326b8e80941Smrg{
327b8e80941Smrg    swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
328b8e80941Smrg
329b8e80941Smrg    iface->pBuilder->swr_gs_llvm_epilogue(gs_base, bld_base,
330b8e80941Smrg                                         total_emitted_vertices_vec,
331b8e80941Smrg                                         emitted_prims_vec);
332b8e80941Smrg}
333b8e80941Smrg
334b8e80941SmrgLLVMValueRef
335b8e80941SmrgBuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
336b8e80941Smrg                           struct lp_build_tgsi_context * bld_base,
337b8e80941Smrg                           boolean is_vindex_indirect,
338b8e80941Smrg                           LLVMValueRef vertex_index,
339b8e80941Smrg                           boolean is_aindex_indirect,
340b8e80941Smrg                           LLVMValueRef attrib_index,
341b8e80941Smrg                           LLVMValueRef swizzle_index)
342b8e80941Smrg{
343b8e80941Smrg    swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
344b8e80941Smrg    Value *vert_index = unwrap(vertex_index);
345b8e80941Smrg    Value *attr_index = unwrap(attrib_index);
346b8e80941Smrg
347b8e80941Smrg    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
348b8e80941Smrg
349b8e80941Smrg    if (is_vindex_indirect || is_aindex_indirect) {
350b8e80941Smrg       int i;
351b8e80941Smrg       Value *res = unwrap(bld_base->base.zero);
352b8e80941Smrg       struct lp_type type = bld_base->base.type;
353b8e80941Smrg
354b8e80941Smrg       for (i = 0; i < type.length; i++) {
355b8e80941Smrg          Value *vert_chan_index = vert_index;
356b8e80941Smrg          Value *attr_chan_index = attr_index;
357b8e80941Smrg
358b8e80941Smrg          if (is_vindex_indirect) {
359b8e80941Smrg             vert_chan_index = VEXTRACT(vert_index, C(i));
360b8e80941Smrg          }
361b8e80941Smrg          if (is_aindex_indirect) {
362b8e80941Smrg             attr_chan_index = VEXTRACT(attr_index, C(i));
363b8e80941Smrg          }
364b8e80941Smrg
365b8e80941Smrg          Value *attrib =
366b8e80941Smrg             LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
367b8e80941Smrg
368b8e80941Smrg          Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
369b8e80941Smrg          Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
370b8e80941Smrg
371b8e80941Smrg          Value *pVector = ADD(MUL(vert_chan_index, pInputVertStride), attrib);
372b8e80941Smrg          Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
373b8e80941Smrg
374b8e80941Smrg          Value *value = VEXTRACT(pInput, C(i));
375b8e80941Smrg          res = VINSERT(res, value, C(i));
376b8e80941Smrg       }
377b8e80941Smrg
378b8e80941Smrg       return wrap(res);
379b8e80941Smrg    } else {
380b8e80941Smrg       Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
381b8e80941Smrg
382b8e80941Smrg       Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
383b8e80941Smrg       Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
384b8e80941Smrg
385b8e80941Smrg       Value *pVector = ADD(MUL(vert_index, pInputVertStride), attrib);
386b8e80941Smrg
387b8e80941Smrg       Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
388b8e80941Smrg
389b8e80941Smrg       return wrap(pInput);
390b8e80941Smrg    }
391b8e80941Smrg}
392b8e80941Smrg
393b8e80941Smrg// GS output stream layout
394b8e80941Smrg#define VERTEX_COUNT_SIZE 32
395b8e80941Smrg#define CONTROL_HEADER_SIZE (8*32)
396b8e80941Smrg
397b8e80941Smrgvoid
398b8e80941SmrgBuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
399b8e80941Smrg                           struct lp_build_tgsi_context * bld_base,
400b8e80941Smrg                           LLVMValueRef (*outputs)[4],
401b8e80941Smrg                           LLVMValueRef emitted_vertices_vec)
402b8e80941Smrg{
403b8e80941Smrg    swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
404b8e80941Smrg
405b8e80941Smrg    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
406b8e80941Smrg
407b8e80941Smrg    const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE;
408b8e80941Smrg    const uint32_t attribSize = 4 * sizeof(float);
409b8e80941Smrg    const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS;
410b8e80941Smrg    Value *pVertexOffset = MUL(unwrap(emitted_vertices_vec), VIMMED1(vertSize));
411b8e80941Smrg
412b8e80941Smrg    Value *vMask = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_mask});
413b8e80941Smrg    Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, mVWidth));
414b8e80941Smrg
415b8e80941Smrg    Value *pStack = STACKSAVE();
416b8e80941Smrg    Value *pTmpPtr = ALLOCA(mFP32Ty, C(4)); // used for dummy write for lane masking
417b8e80941Smrg
418b8e80941Smrg    for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
419b8e80941Smrg       uint32_t attribSlot = attrib;
420b8e80941Smrg       uint32_t sgvChannel = 0;
421b8e80941Smrg       if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
422b8e80941Smrg          attribSlot = VERTEX_SGV_SLOT;
423b8e80941Smrg          sgvChannel = VERTEX_SGV_POINT_SIZE_COMP;
424b8e80941Smrg       } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) {
425b8e80941Smrg          attribSlot = VERTEX_SGV_SLOT;
426b8e80941Smrg          sgvChannel = VERTEX_SGV_RTAI_COMP;
427b8e80941Smrg       } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
428b8e80941Smrg          attribSlot = VERTEX_POSITION_SLOT;
429b8e80941Smrg       } else {
430b8e80941Smrg          attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
431b8e80941Smrg          if (iface->info->writes_position) {
432b8e80941Smrg             attribSlot--;
433b8e80941Smrg          }
434b8e80941Smrg       }
435b8e80941Smrg
436b8e80941Smrg       Value *pOutputOffset = ADD(pVertexOffset, VIMMED1(headerSize + attribSize * attribSlot)); // + sgvChannel ?
437b8e80941Smrg
438b8e80941Smrg       for (uint32_t lane = 0; lane < mVWidth; ++lane) {
439b8e80941Smrg          Value *pLaneOffset = VEXTRACT(pOutputOffset, C(lane));
440b8e80941Smrg          Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
441b8e80941Smrg          Value *pStreamOffset = GEP(pStream, pLaneOffset);
442b8e80941Smrg          pStreamOffset = BITCAST(pStreamOffset, mFP32PtrTy);
443b8e80941Smrg
444b8e80941Smrg          Value *pLaneMask = VEXTRACT(vMask1, C(lane));
445b8e80941Smrg          pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
446b8e80941Smrg
447b8e80941Smrg          for (uint32_t channel = 0; channel < 4; ++channel) {
448b8e80941Smrg             Value *vData;
449b8e80941Smrg
450b8e80941Smrg             if (attribSlot == VERTEX_SGV_SLOT)
451b8e80941Smrg                vData = LOAD(unwrap(outputs[attrib][0]));
452b8e80941Smrg             else
453b8e80941Smrg                vData = LOAD(unwrap(outputs[attrib][channel]));
454b8e80941Smrg
455b8e80941Smrg             if (attribSlot != VERTEX_SGV_SLOT ||
456b8e80941Smrg                 sgvChannel == channel) {
457b8e80941Smrg                vData = VEXTRACT(vData, C(lane));
458b8e80941Smrg                STORE(vData, pStreamOffset);
459b8e80941Smrg             }
460b8e80941Smrg             pStreamOffset = GEP(pStreamOffset, C(1));
461b8e80941Smrg          }
462b8e80941Smrg       }
463b8e80941Smrg    }
464b8e80941Smrg
465b8e80941Smrg    STACKRESTORE(pStack);
466b8e80941Smrg}
467b8e80941Smrg
468b8e80941Smrgvoid
469b8e80941SmrgBuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
470b8e80941Smrg                             struct lp_build_tgsi_context * bld_base,
471b8e80941Smrg                             LLVMValueRef verts_per_prim_vec,
472b8e80941Smrg                             LLVMValueRef emitted_prims_vec)
473b8e80941Smrg{
474b8e80941Smrg    swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
475b8e80941Smrg
476b8e80941Smrg    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
477b8e80941Smrg
478b8e80941Smrg    Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
479b8e80941Smrg    Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, 8));
480b8e80941Smrg
481b8e80941Smrg    uint32_t vertsPerPrim = iface->num_verts_per_prim;
482b8e80941Smrg
483b8e80941Smrg    Value *vCount =
484b8e80941Smrg       ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)),
485b8e80941Smrg           unwrap(verts_per_prim_vec));
486b8e80941Smrg
487b8e80941Smrg    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
488b8e80941Smrg    vCount = LOAD(unwrap(bld->total_emitted_vertices_vec_ptr));
489b8e80941Smrg
490b8e80941Smrg    struct lp_exec_mask *exec_mask = &bld->exec_mask;
491b8e80941Smrg    Value *mask = unwrap(lp_build_mask_value(bld->mask));
492b8e80941Smrg    if (exec_mask->has_mask)
493b8e80941Smrg       mask = AND(mask, unwrap(exec_mask->exec_mask));
494b8e80941Smrg
495b8e80941Smrg    Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0)));
496b8e80941Smrg    mask = AND(mask, cmpMask);
497b8e80941Smrg    vMask1 = TRUNC(mask, VectorType::get(mInt1Ty, 8));
498b8e80941Smrg
499b8e80941Smrg    vCount = SUB(vCount, VIMMED1(1));
500b8e80941Smrg    Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE));
501b8e80941Smrg    Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8)));
502b8e80941Smrg
503b8e80941Smrg    vValue = TRUNC(vValue, VectorType::get(mInt8Ty, 8));
504b8e80941Smrg
505b8e80941Smrg    Value *pStack = STACKSAVE();
506b8e80941Smrg    Value *pTmpPtr = ALLOCA(mInt8Ty, C(4)); // used for dummy read/write for lane masking
507b8e80941Smrg
508b8e80941Smrg    for (uint32_t lane = 0; lane < mVWidth; ++lane) {
509b8e80941Smrg       Value *vLaneOffset = VEXTRACT(vOffset, C(lane));
510b8e80941Smrg       Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
511b8e80941Smrg       Value *pStreamOffset = GEP(pStream, vLaneOffset);
512b8e80941Smrg
513b8e80941Smrg       Value *pLaneMask = VEXTRACT(vMask1, C(lane));
514b8e80941Smrg       pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
515b8e80941Smrg
516b8e80941Smrg       Value *vVal = LOAD(pStreamOffset);
517b8e80941Smrg       vVal = OR(vVal, VEXTRACT(vValue, C(lane)));
518b8e80941Smrg       STORE(vVal, pStreamOffset);
519b8e80941Smrg    }
520b8e80941Smrg
521b8e80941Smrg    STACKRESTORE(pStack);
522b8e80941Smrg}
523b8e80941Smrg
524b8e80941Smrgvoid
525b8e80941SmrgBuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
526b8e80941Smrg                        struct lp_build_tgsi_context * bld_base,
527b8e80941Smrg                        LLVMValueRef total_emitted_vertices_vec,
528b8e80941Smrg                        LLVMValueRef emitted_prims_vec)
529b8e80941Smrg{
530b8e80941Smrg   swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
531b8e80941Smrg
532b8e80941Smrg   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
533b8e80941Smrg
534b8e80941Smrg   // Store emit count to each output stream in the first DWORD
535b8e80941Smrg   for (uint32_t lane = 0; lane < mVWidth; ++lane)
536b8e80941Smrg   {
537b8e80941Smrg      Value* pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
538b8e80941Smrg      pStream = BITCAST(pStream, mInt32PtrTy);
539b8e80941Smrg      Value* pLaneCount = VEXTRACT(unwrap(total_emitted_vertices_vec), C(lane));
540b8e80941Smrg      STORE(pLaneCount, pStream);
541b8e80941Smrg   }
542b8e80941Smrg}
543b8e80941Smrg
544b8e80941SmrgPFN_GS_FUNC
545b8e80941SmrgBuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
546b8e80941Smrg{
547b8e80941Smrg   SWR_GS_STATE *pGS = &ctx->gs->gsState;
548b8e80941Smrg   struct tgsi_shader_info *info = &ctx->gs->info.base;
549b8e80941Smrg
550b8e80941Smrg   memset(pGS, 0, sizeof(*pGS));
551b8e80941Smrg
552b8e80941Smrg   pGS->gsEnable = true;
553b8e80941Smrg
554b8e80941Smrg   pGS->numInputAttribs = info->num_inputs;
555b8e80941Smrg   pGS->outputTopology =
556b8e80941Smrg      swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
557b8e80941Smrg   pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
558b8e80941Smrg   pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
559b8e80941Smrg
560b8e80941Smrg   // XXX: single stream for now...
561b8e80941Smrg   pGS->isSingleStream = true;
562b8e80941Smrg   pGS->singleStreamID = 0;
563b8e80941Smrg
564b8e80941Smrg   pGS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
565b8e80941Smrg   pGS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
566b8e80941Smrg   pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;
567b8e80941Smrg   pGS->outputVertexSize = SWR_VTX_NUM_SLOTS;
568b8e80941Smrg   pGS->controlDataSize = 8; // GS ouputs max of 8 32B units
569b8e80941Smrg   pGS->controlDataOffset = VERTEX_COUNT_SIZE;
570b8e80941Smrg   pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE;
571b8e80941Smrg
572b8e80941Smrg   pGS->allocationSize =
573b8e80941Smrg      VERTEX_COUNT_SIZE + // vertex count
574b8e80941Smrg      CONTROL_HEADER_SIZE + // control header
575b8e80941Smrg      (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex
576b8e80941Smrg      pGS->maxNumVerts; // num verts
577b8e80941Smrg
578b8e80941Smrg   struct swr_geometry_shader *gs = ctx->gs;
579b8e80941Smrg
580b8e80941Smrg   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
581b8e80941Smrg   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
582b8e80941Smrg
583b8e80941Smrg   memset(outputs, 0, sizeof(outputs));
584b8e80941Smrg
585b8e80941Smrg   AttrBuilder attrBuilder;
586b8e80941Smrg   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
587b8e80941Smrg
588b8e80941Smrg   std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
589b8e80941Smrg                              PointerType::get(mInt8Ty, 0),
590b8e80941Smrg                              PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
591b8e80941Smrg   FunctionType *vsFuncType =
592b8e80941Smrg      FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
593b8e80941Smrg
594b8e80941Smrg   // create new vertex shader function
595b8e80941Smrg   auto pFunction = Function::Create(vsFuncType,
596b8e80941Smrg                                     GlobalValue::ExternalLinkage,
597b8e80941Smrg                                     "GS",
598b8e80941Smrg                                     JM()->mpCurrentModule);
599b8e80941Smrg#if HAVE_LLVM < 0x0500
600b8e80941Smrg   AttributeSet attrSet = AttributeSet::get(
601b8e80941Smrg      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
602b8e80941Smrg   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
603b8e80941Smrg#else
604b8e80941Smrg   pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
605b8e80941Smrg#endif
606b8e80941Smrg
607b8e80941Smrg   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
608b8e80941Smrg   IRB()->SetInsertPoint(block);
609b8e80941Smrg   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
610b8e80941Smrg
611b8e80941Smrg   auto argitr = pFunction->arg_begin();
612b8e80941Smrg   Value *hPrivateData = &*argitr++;
613b8e80941Smrg   hPrivateData->setName("hPrivateData");
614b8e80941Smrg   Value *pWorkerData = &*argitr++;
615b8e80941Smrg   pWorkerData->setName("pWorkerData");
616b8e80941Smrg   Value *pGsCtx = &*argitr++;
617b8e80941Smrg   pGsCtx->setName("gsCtx");
618b8e80941Smrg
619b8e80941Smrg   Value *consts_ptr =
620b8e80941Smrg      GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)});
621b8e80941Smrg   consts_ptr->setName("gs_constants");
622b8e80941Smrg   Value *const_sizes_ptr =
623b8e80941Smrg      GEP(hPrivateData, {0, swr_draw_context_num_constantsGS});
624b8e80941Smrg   const_sizes_ptr->setName("num_gs_constants");
625b8e80941Smrg
626b8e80941Smrg   struct lp_build_sampler_soa *sampler =
627b8e80941Smrg      swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY);
628b8e80941Smrg
629b8e80941Smrg   struct lp_bld_tgsi_system_values system_values;
630b8e80941Smrg   memset(&system_values, 0, sizeof(system_values));
631b8e80941Smrg   system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID}));
632b8e80941Smrg   system_values.instance_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID}));
633b8e80941Smrg
634b8e80941Smrg   std::vector<Constant*> mapConstants;
635b8e80941Smrg   Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
636b8e80941Smrg   for (unsigned slot = 0; slot < info->num_inputs; slot++) {
637b8e80941Smrg      ubyte semantic_name = info->input_semantic_name[slot];
638b8e80941Smrg      ubyte semantic_idx = info->input_semantic_index[slot];
639b8e80941Smrg
640b8e80941Smrg      unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
641b8e80941Smrg
642b8e80941Smrg      vs_slot += VERTEX_ATTRIB_START_SLOT;
643b8e80941Smrg
644b8e80941Smrg      if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
645b8e80941Smrg         vs_slot--;
646b8e80941Smrg
647b8e80941Smrg      if (semantic_name == TGSI_SEMANTIC_POSITION)
648b8e80941Smrg         vs_slot = VERTEX_POSITION_SLOT;
649b8e80941Smrg
650b8e80941Smrg      STORE(C(vs_slot), vtxAttribMap, {0, slot});
651b8e80941Smrg      mapConstants.push_back(C(vs_slot));
652b8e80941Smrg   }
653b8e80941Smrg
654b8e80941Smrg   struct lp_build_mask_context mask;
655b8e80941Smrg   Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask");
656b8e80941Smrg   lp_build_mask_begin(&mask, gallivm,
657b8e80941Smrg                       lp_type_float_vec(32, 32 * 8), wrap(mask_val));
658b8e80941Smrg
659b8e80941Smrg   // zero out cut buffer so we can load/modify/store bits
660b8e80941Smrg   for (uint32_t lane = 0; lane < mVWidth; ++lane)
661b8e80941Smrg   {
662b8e80941Smrg      Value* pStream = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
663b8e80941Smrg      MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH);
664b8e80941Smrg   }
665b8e80941Smrg
666b8e80941Smrg   struct swr_gs_llvm_iface gs_iface;
667b8e80941Smrg   gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input;
668b8e80941Smrg   gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex;
669b8e80941Smrg   gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive;
670b8e80941Smrg   gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue;
671b8e80941Smrg   gs_iface.pBuilder = this;
672b8e80941Smrg   gs_iface.pGsCtx = pGsCtx;
673b8e80941Smrg   gs_iface.pGsState = pGS;
674b8e80941Smrg   gs_iface.num_outputs = gs->info.base.num_outputs;
675b8e80941Smrg   gs_iface.num_verts_per_prim =
676b8e80941Smrg      u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
677b8e80941Smrg   gs_iface.info = info;
678b8e80941Smrg   gs_iface.pVtxAttribMap = vtxAttribMap;
679b8e80941Smrg
680b8e80941Smrg   lp_build_tgsi_soa(gallivm,
681b8e80941Smrg                     gs->pipe.tokens,
682b8e80941Smrg                     lp_type_float_vec(32, 32 * 8),
683b8e80941Smrg                     &mask,
684b8e80941Smrg                     wrap(consts_ptr),
685b8e80941Smrg                     wrap(const_sizes_ptr),
686b8e80941Smrg                     &system_values,
687b8e80941Smrg                     inputs,
688b8e80941Smrg                     outputs,
689b8e80941Smrg                     wrap(hPrivateData), // (sampler context)
690b8e80941Smrg                     NULL, // thread data
691b8e80941Smrg                     sampler,
692b8e80941Smrg                     &gs->info.base,
693b8e80941Smrg                     &gs_iface.base);
694b8e80941Smrg
695b8e80941Smrg   lp_build_mask_end(&mask);
696b8e80941Smrg
697b8e80941Smrg   sampler->destroy(sampler);
698b8e80941Smrg
699b8e80941Smrg   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
700b8e80941Smrg
701b8e80941Smrg   RET_VOID();
702b8e80941Smrg
703b8e80941Smrg   gallivm_verify_function(gallivm, wrap(pFunction));
704b8e80941Smrg   gallivm_compile_module(gallivm);
705b8e80941Smrg
706b8e80941Smrg   PFN_GS_FUNC pFunc =
707b8e80941Smrg      (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
708b8e80941Smrg
709b8e80941Smrg   debug_printf("geom shader  %p\n", pFunc);
710b8e80941Smrg   assert(pFunc && "Error: GeomShader = NULL");
711b8e80941Smrg
712b8e80941Smrg   JM()->mIsModuleFinalized = true;
713b8e80941Smrg
714b8e80941Smrg   return pFunc;
715b8e80941Smrg}
716b8e80941Smrg
717b8e80941SmrgPFN_GS_FUNC
718b8e80941Smrgswr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
719b8e80941Smrg{
720b8e80941Smrg   BuilderSWR builder(
721b8e80941Smrg      reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
722b8e80941Smrg      "GS");
723b8e80941Smrg   PFN_GS_FUNC func = builder.CompileGS(ctx, key);
724b8e80941Smrg
725b8e80941Smrg   ctx->gs->map.insert(std::make_pair(key, make_unique<VariantGS>(builder.gallivm, func)));
726b8e80941Smrg   return func;
727b8e80941Smrg}
728b8e80941Smrg
729b8e80941Smrgvoid
730b8e80941SmrgBuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
731b8e80941Smrg{
732b8e80941Smrg#if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
733b8e80941Smrg   // interleave the simdvertex components into the dest simd16vertex
734b8e80941Smrg   //   slot16offset = slot8offset * 2
735b8e80941Smrg   //   comp16offset = comp8offset * 2 + alternateOffset
736b8e80941Smrg
737b8e80941Smrg   Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset });
738b8e80941Smrg   Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } );
739b8e80941Smrg   STORE(pVal, pOut, {channel * 2});
740b8e80941Smrg#else
741b8e80941Smrg   Value *pOut = GEP(pVtxOutput, {0, 0, slot});
742b8e80941Smrg   STORE(pVal, pOut, {0, channel});
743b8e80941Smrg#endif
744b8e80941Smrg}
745b8e80941Smrg
746b8e80941SmrgPFN_VERTEX_FUNC
747b8e80941SmrgBuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
748b8e80941Smrg{
749b8e80941Smrg   struct swr_vertex_shader *swr_vs = ctx->vs;
750b8e80941Smrg
751b8e80941Smrg   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
752b8e80941Smrg   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
753b8e80941Smrg
754b8e80941Smrg   memset(outputs, 0, sizeof(outputs));
755b8e80941Smrg
756b8e80941Smrg   AttrBuilder attrBuilder;
757b8e80941Smrg   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
758b8e80941Smrg
759b8e80941Smrg   std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
760b8e80941Smrg                              PointerType::get(mInt8Ty, 0),
761b8e80941Smrg                              PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
762b8e80941Smrg   FunctionType *vsFuncType =
763b8e80941Smrg      FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
764b8e80941Smrg
765b8e80941Smrg   // create new vertex shader function
766b8e80941Smrg   auto pFunction = Function::Create(vsFuncType,
767b8e80941Smrg                                     GlobalValue::ExternalLinkage,
768b8e80941Smrg                                     "VS",
769b8e80941Smrg                                     JM()->mpCurrentModule);
770b8e80941Smrg#if HAVE_LLVM < 0x0500
771b8e80941Smrg   AttributeSet attrSet = AttributeSet::get(
772b8e80941Smrg      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
773b8e80941Smrg   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
774b8e80941Smrg#else
775b8e80941Smrg   pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
776b8e80941Smrg#endif
777b8e80941Smrg
778b8e80941Smrg   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
779b8e80941Smrg   IRB()->SetInsertPoint(block);
780b8e80941Smrg   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
781b8e80941Smrg
782b8e80941Smrg   auto argitr = pFunction->arg_begin();
783b8e80941Smrg   Value *hPrivateData = &*argitr++;
784b8e80941Smrg   hPrivateData->setName("hPrivateData");
785b8e80941Smrg   Value *pWorkerData = &*argitr++;
786b8e80941Smrg   pWorkerData->setName("pWorkerData");
787b8e80941Smrg   Value *pVsCtx = &*argitr++;
788b8e80941Smrg   pVsCtx->setName("vsCtx");
789b8e80941Smrg
790b8e80941Smrg   Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
791b8e80941Smrg
792b8e80941Smrg   consts_ptr->setName("vs_constants");
793b8e80941Smrg   Value *const_sizes_ptr =
794b8e80941Smrg      GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
795b8e80941Smrg   const_sizes_ptr->setName("num_vs_constants");
796b8e80941Smrg
797b8e80941Smrg   Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
798b8e80941Smrg#if USE_SIMD16_VS
799b8e80941Smrg   vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0));
800b8e80941Smrg#endif
801b8e80941Smrg
802b8e80941Smrg   for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
803b8e80941Smrg      const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
804b8e80941Smrg      for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
805b8e80941Smrg         if (mask & (1 << channel)) {
806b8e80941Smrg            inputs[attrib][channel] =
807b8e80941Smrg               wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
808b8e80941Smrg         }
809b8e80941Smrg      }
810b8e80941Smrg   }
811b8e80941Smrg
812b8e80941Smrg   struct lp_build_sampler_soa *sampler =
813b8e80941Smrg      swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
814b8e80941Smrg
815b8e80941Smrg   struct lp_bld_tgsi_system_values system_values;
816b8e80941Smrg   memset(&system_values, 0, sizeof(system_values));
817b8e80941Smrg   system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
818b8e80941Smrg
819b8e80941Smrg#if USE_SIMD16_VS
820b8e80941Smrg   system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16}));
821b8e80941Smrg#else
822b8e80941Smrg   system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
823b8e80941Smrg#endif
824b8e80941Smrg
825b8e80941Smrg#if USE_SIMD16_VS
826b8e80941Smrg   uint32_t vectorWidth = mVWidth16;
827b8e80941Smrg#else
828b8e80941Smrg   uint32_t vectorWidth = mVWidth;
829b8e80941Smrg#endif
830b8e80941Smrg
831b8e80941Smrg   lp_build_tgsi_soa(gallivm,
832b8e80941Smrg                     swr_vs->pipe.tokens,
833b8e80941Smrg                     lp_type_float_vec(32, 32 * vectorWidth),
834b8e80941Smrg                     NULL, // mask
835b8e80941Smrg                     wrap(consts_ptr),
836b8e80941Smrg                     wrap(const_sizes_ptr),
837b8e80941Smrg                     &system_values,
838b8e80941Smrg                     inputs,
839b8e80941Smrg                     outputs,
840b8e80941Smrg                     wrap(hPrivateData), // (sampler context)
841b8e80941Smrg                     NULL, // thread data
842b8e80941Smrg                     sampler, // sampler
843b8e80941Smrg                     &swr_vs->info.base,
844b8e80941Smrg                     NULL); // geometry shader face
845b8e80941Smrg
846b8e80941Smrg   sampler->destroy(sampler);
847b8e80941Smrg
848b8e80941Smrg   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
849b8e80941Smrg
850b8e80941Smrg   Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
851b8e80941Smrg#if USE_SIMD16_VS
852b8e80941Smrg   vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0));
853b8e80941Smrg#endif
854b8e80941Smrg
855b8e80941Smrg   for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
856b8e80941Smrg      for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
857b8e80941Smrg         if (!outputs[attrib][channel])
858b8e80941Smrg            continue;
859b8e80941Smrg
860b8e80941Smrg         Value *val;
861b8e80941Smrg         uint32_t outSlot;
862b8e80941Smrg
863b8e80941Smrg         if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
864b8e80941Smrg            if (channel != VERTEX_SGV_POINT_SIZE_COMP)
865b8e80941Smrg               continue;
866b8e80941Smrg            val = LOAD(unwrap(outputs[attrib][0]));
867b8e80941Smrg            outSlot = VERTEX_SGV_SLOT;
868b8e80941Smrg         } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
869b8e80941Smrg            val = LOAD(unwrap(outputs[attrib][channel]));
870b8e80941Smrg            outSlot = VERTEX_POSITION_SLOT;
871b8e80941Smrg         } else {
872b8e80941Smrg            val = LOAD(unwrap(outputs[attrib][channel]));
873b8e80941Smrg            outSlot = VERTEX_ATTRIB_START_SLOT + attrib;
874b8e80941Smrg            if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
875b8e80941Smrg               outSlot--;
876b8e80941Smrg         }
877b8e80941Smrg
878b8e80941Smrg         WriteVS(val, pVsCtx, vtxOutput, outSlot, channel);
879b8e80941Smrg      }
880b8e80941Smrg   }
881b8e80941Smrg
882b8e80941Smrg   if (ctx->rasterizer->clip_plane_enable ||
883b8e80941Smrg       swr_vs->info.base.culldist_writemask) {
884b8e80941Smrg      unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
885b8e80941Smrg
886b8e80941Smrg      unsigned cv = 0;
887b8e80941Smrg      if (swr_vs->info.base.writes_clipvertex) {
888b8e80941Smrg         cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
889b8e80941Smrg                             &swr_vs->info.base);
890b8e80941Smrg      } else {
891b8e80941Smrg         for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
892b8e80941Smrg            if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
893b8e80941Smrg                swr_vs->info.base.output_semantic_index[i] == 0) {
894b8e80941Smrg               cv = i;
895b8e80941Smrg               break;
896b8e80941Smrg            }
897b8e80941Smrg         }
898b8e80941Smrg      }
899b8e80941Smrg      LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
900b8e80941Smrg      LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
901b8e80941Smrg      LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
902b8e80941Smrg      LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
903b8e80941Smrg
904b8e80941Smrg      for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
905b8e80941Smrg         // clip distance overrides user clip planes
906b8e80941Smrg         if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
907b8e80941Smrg             ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) {
908b8e80941Smrg            unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1,
909b8e80941Smrg                                         &swr_vs->info.base);
910b8e80941Smrg            if (val < 4) {
911b8e80941Smrg               LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
912b8e80941Smrg               WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
913b8e80941Smrg            } else {
914b8e80941Smrg               LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
915b8e80941Smrg               WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
916b8e80941Smrg            }
917b8e80941Smrg            continue;
918b8e80941Smrg         }
919b8e80941Smrg
920b8e80941Smrg         if (!(clip_mask & (1 << val)))
921b8e80941Smrg            continue;
922b8e80941Smrg
923b8e80941Smrg         Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
924b8e80941Smrg         Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
925b8e80941Smrg         Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
926b8e80941Smrg         Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
927b8e80941Smrg#if USE_SIMD16_VS
928b8e80941Smrg         Value *bpx = VBROADCAST_16(px);
929b8e80941Smrg         Value *bpy = VBROADCAST_16(py);
930b8e80941Smrg         Value *bpz = VBROADCAST_16(pz);
931b8e80941Smrg         Value *bpw = VBROADCAST_16(pw);
932b8e80941Smrg#else
933b8e80941Smrg         Value *bpx = VBROADCAST(px);
934b8e80941Smrg         Value *bpy = VBROADCAST(py);
935b8e80941Smrg         Value *bpz = VBROADCAST(pz);
936b8e80941Smrg         Value *bpw = VBROADCAST(pw);
937b8e80941Smrg#endif
938b8e80941Smrg         Value *dist = FADD(FMUL(unwrap(cx), bpx),
939b8e80941Smrg                            FADD(FMUL(unwrap(cy), bpy),
940b8e80941Smrg                                 FADD(FMUL(unwrap(cz), bpz),
941b8e80941Smrg                                      FMUL(unwrap(cw), bpw))));
942b8e80941Smrg
943b8e80941Smrg         if (val < 4)
944b8e80941Smrg            WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
945b8e80941Smrg         else
946b8e80941Smrg            WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
947b8e80941Smrg      }
948b8e80941Smrg   }
949b8e80941Smrg
950b8e80941Smrg   RET_VOID();
951b8e80941Smrg
952b8e80941Smrg   gallivm_verify_function(gallivm, wrap(pFunction));
953b8e80941Smrg   gallivm_compile_module(gallivm);
954b8e80941Smrg
955b8e80941Smrg   //   lp_debug_dump_value(func);
956b8e80941Smrg
957b8e80941Smrg   PFN_VERTEX_FUNC pFunc =
958b8e80941Smrg      (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
959b8e80941Smrg
960b8e80941Smrg   debug_printf("vert shader  %p\n", pFunc);
961b8e80941Smrg   assert(pFunc && "Error: VertShader = NULL");
962b8e80941Smrg
963b8e80941Smrg   JM()->mIsModuleFinalized = true;
964b8e80941Smrg
965b8e80941Smrg   return pFunc;
966b8e80941Smrg}
967b8e80941Smrg
968b8e80941SmrgPFN_VERTEX_FUNC
969b8e80941Smrgswr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
970b8e80941Smrg{
971b8e80941Smrg   if (!ctx->vs->pipe.tokens)
972b8e80941Smrg      return NULL;
973b8e80941Smrg
974b8e80941Smrg   BuilderSWR builder(
975b8e80941Smrg      reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
976b8e80941Smrg      "VS");
977b8e80941Smrg   PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
978b8e80941Smrg
979b8e80941Smrg   ctx->vs->map.insert(std::make_pair(key, make_unique<VariantVS>(builder.gallivm, func)));
980b8e80941Smrg   return func;
981b8e80941Smrg}
982b8e80941Smrg
983b8e80941Smrgunsigned
984b8e80941Smrgswr_so_adjust_attrib(unsigned in_attrib,
985b8e80941Smrg                     swr_vertex_shader *swr_vs)
986b8e80941Smrg{
987b8e80941Smrg   ubyte semantic_name;
988b8e80941Smrg   unsigned attrib;
989b8e80941Smrg
990b8e80941Smrg   attrib = in_attrib + VERTEX_ATTRIB_START_SLOT;
991b8e80941Smrg
992b8e80941Smrg   if (swr_vs) {
993b8e80941Smrg      semantic_name = swr_vs->info.base.output_semantic_name[in_attrib];
994b8e80941Smrg      if (semantic_name == TGSI_SEMANTIC_POSITION) {
995b8e80941Smrg         attrib = VERTEX_POSITION_SLOT;
996b8e80941Smrg      } else if (semantic_name == TGSI_SEMANTIC_PSIZE) {
997b8e80941Smrg         attrib = VERTEX_SGV_SLOT;
998b8e80941Smrg      } else if (semantic_name == TGSI_SEMANTIC_LAYER) {
999b8e80941Smrg         attrib = VERTEX_SGV_SLOT;
1000b8e80941Smrg      } else {
1001b8e80941Smrg         if (swr_vs->info.base.writes_position) {
1002b8e80941Smrg               attrib--;
1003b8e80941Smrg         }
1004b8e80941Smrg      }
1005b8e80941Smrg   }
1006b8e80941Smrg
1007b8e80941Smrg   return attrib;
1008b8e80941Smrg}
1009b8e80941Smrg
1010b8e80941Smrgstatic unsigned
1011b8e80941Smrglocate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
1012b8e80941Smrg{
1013b8e80941Smrg   for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1014b8e80941Smrg      if ((info->output_semantic_name[i] == name)
1015b8e80941Smrg          && (info->output_semantic_index[i] == index)) {
1016b8e80941Smrg         return i;
1017b8e80941Smrg      }
1018b8e80941Smrg   }
1019b8e80941Smrg
1020b8e80941Smrg   return 0xFFFFFFFF;
1021b8e80941Smrg}
1022b8e80941Smrg
1023b8e80941SmrgPFN_PIXEL_KERNEL
1024b8e80941SmrgBuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
1025b8e80941Smrg{
1026b8e80941Smrg   struct swr_fragment_shader *swr_fs = ctx->fs;
1027b8e80941Smrg
1028b8e80941Smrg   struct tgsi_shader_info *pPrevShader;
1029b8e80941Smrg   if (ctx->gs)
1030b8e80941Smrg      pPrevShader = &ctx->gs->info.base;
1031b8e80941Smrg   else
1032b8e80941Smrg      pPrevShader = &ctx->vs->info.base;
1033b8e80941Smrg
1034b8e80941Smrg   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1035b8e80941Smrg   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1036b8e80941Smrg
1037b8e80941Smrg   memset(inputs, 0, sizeof(inputs));
1038b8e80941Smrg   memset(outputs, 0, sizeof(outputs));
1039b8e80941Smrg
1040b8e80941Smrg   struct lp_build_sampler_soa *sampler = NULL;
1041b8e80941Smrg
1042b8e80941Smrg   AttrBuilder attrBuilder;
1043b8e80941Smrg   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
1044b8e80941Smrg
1045b8e80941Smrg   std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
1046b8e80941Smrg                              PointerType::get(mInt8Ty, 0),
1047b8e80941Smrg                              PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
1048b8e80941Smrg   FunctionType *funcType =
1049b8e80941Smrg      FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
1050b8e80941Smrg
1051b8e80941Smrg   auto pFunction = Function::Create(funcType,
1052b8e80941Smrg                                     GlobalValue::ExternalLinkage,
1053b8e80941Smrg                                     "FS",
1054b8e80941Smrg                                     JM()->mpCurrentModule);
1055b8e80941Smrg#if HAVE_LLVM < 0x0500
1056b8e80941Smrg   AttributeSet attrSet = AttributeSet::get(
1057b8e80941Smrg      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
1058b8e80941Smrg   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
1059b8e80941Smrg#else
1060b8e80941Smrg   pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
1061b8e80941Smrg#endif
1062b8e80941Smrg
1063b8e80941Smrg   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
1064b8e80941Smrg   IRB()->SetInsertPoint(block);
1065b8e80941Smrg   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
1066b8e80941Smrg
1067b8e80941Smrg   auto args = pFunction->arg_begin();
1068b8e80941Smrg   Value *hPrivateData = &*args++;
1069b8e80941Smrg   hPrivateData->setName("hPrivateData");
1070b8e80941Smrg   Value *pWorkerData = &*args++;
1071b8e80941Smrg   pWorkerData->setName("pWorkerData");
1072b8e80941Smrg   Value *pPS = &*args++;
1073b8e80941Smrg   pPS->setName("psCtx");
1074b8e80941Smrg
1075b8e80941Smrg   Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
1076b8e80941Smrg   consts_ptr->setName("fs_constants");
1077b8e80941Smrg   Value *const_sizes_ptr =
1078b8e80941Smrg      GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
1079b8e80941Smrg   const_sizes_ptr->setName("num_fs_constants");
1080b8e80941Smrg
1081b8e80941Smrg   // load *pAttribs, *pPerspAttribs
1082b8e80941Smrg   Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
1083b8e80941Smrg   Value *pPerspAttribs =
1084b8e80941Smrg      LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
1085b8e80941Smrg
1086b8e80941Smrg   swr_fs->constantMask = 0;
1087b8e80941Smrg   swr_fs->flatConstantMask = 0;
1088b8e80941Smrg   swr_fs->pointSpriteMask = 0;
1089b8e80941Smrg
1090b8e80941Smrg   for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
1091b8e80941Smrg      const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
1092b8e80941Smrg      const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
1093b8e80941Smrg      const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
1094b8e80941Smrg
1095b8e80941Smrg      if (!mask)
1096b8e80941Smrg         continue;
1097b8e80941Smrg
1098b8e80941Smrg      // load i,j
1099b8e80941Smrg      Value *vi = nullptr, *vj = nullptr;
1100b8e80941Smrg      switch (interpLoc) {
1101b8e80941Smrg      case TGSI_INTERPOLATE_LOC_CENTER:
1102b8e80941Smrg         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
1103b8e80941Smrg         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
1104b8e80941Smrg         break;
1105b8e80941Smrg      case TGSI_INTERPOLATE_LOC_CENTROID:
1106b8e80941Smrg         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
1107b8e80941Smrg         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
1108b8e80941Smrg         break;
1109b8e80941Smrg      case TGSI_INTERPOLATE_LOC_SAMPLE:
1110b8e80941Smrg         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
1111b8e80941Smrg         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
1112b8e80941Smrg         break;
1113b8e80941Smrg      }
1114b8e80941Smrg
1115b8e80941Smrg      // load/compute w
1116b8e80941Smrg      Value *vw = nullptr, *pAttribs;
1117b8e80941Smrg      if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
1118b8e80941Smrg          interpMode == TGSI_INTERPOLATE_COLOR) {
1119b8e80941Smrg         pAttribs = pPerspAttribs;
1120b8e80941Smrg         switch (interpLoc) {
1121b8e80941Smrg         case TGSI_INTERPOLATE_LOC_CENTER:
1122b8e80941Smrg            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
1123b8e80941Smrg            break;
1124b8e80941Smrg         case TGSI_INTERPOLATE_LOC_CENTROID:
1125b8e80941Smrg            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
1126b8e80941Smrg            break;
1127b8e80941Smrg         case TGSI_INTERPOLATE_LOC_SAMPLE:
1128b8e80941Smrg            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
1129b8e80941Smrg            break;
1130b8e80941Smrg         }
1131b8e80941Smrg      } else {
1132b8e80941Smrg         pAttribs = pRawAttribs;
1133b8e80941Smrg         vw = VIMMED1(1.f);
1134b8e80941Smrg      }
1135b8e80941Smrg
1136b8e80941Smrg      vw->setName("w");
1137b8e80941Smrg
1138b8e80941Smrg      ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
1139b8e80941Smrg      ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
1140b8e80941Smrg
1141b8e80941Smrg      if (semantic_name == TGSI_SEMANTIC_FACE) {
1142b8e80941Smrg         Value *ff =
1143b8e80941Smrg            UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
1144b8e80941Smrg         ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
1145b8e80941Smrg         ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
1146b8e80941Smrg
1147b8e80941Smrg         inputs[attrib][0] = wrap(ff);
1148b8e80941Smrg         inputs[attrib][1] = wrap(VIMMED1(0.0f));
1149b8e80941Smrg         inputs[attrib][2] = wrap(VIMMED1(0.0f));
1150b8e80941Smrg         inputs[attrib][3] = wrap(VIMMED1(1.0f));
1151b8e80941Smrg         continue;
1152b8e80941Smrg      } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
1153b8e80941Smrg         if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
1154b8e80941Smrg             TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) {
1155b8e80941Smrg            inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
1156b8e80941Smrg            inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
1157b8e80941Smrg         } else {
1158b8e80941Smrg            inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX"));
1159b8e80941Smrg            inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY"));
1160b8e80941Smrg         }
1161b8e80941Smrg         inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
1162b8e80941Smrg         inputs[attrib][3] =
1163b8e80941Smrg            wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
1164b8e80941Smrg         continue;
1165b8e80941Smrg      }
1166b8e80941Smrg
1167b8e80941Smrg      unsigned linkedAttrib =
1168b8e80941Smrg         locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1;
1169b8e80941Smrg
1170b8e80941Smrg      uint32_t extraAttribs = 0;
1171b8e80941Smrg      if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) {
1172b8e80941Smrg         /* non-gs generated primID - need to grab from swizzleMap override */
1173b8e80941Smrg         linkedAttrib = pPrevShader->num_outputs - 1;
1174b8e80941Smrg         swr_fs->constantMask |= 1 << linkedAttrib;
1175b8e80941Smrg         extraAttribs++;
1176b8e80941Smrg      } else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
1177b8e80941Smrg          key.sprite_coord_enable & (1 << semantic_idx)) {
1178b8e80941Smrg         /* we add an extra attrib to the backendState in swr_update_derived. */
1179b8e80941Smrg         linkedAttrib = pPrevShader->num_outputs + extraAttribs - 1;
1180b8e80941Smrg         swr_fs->pointSpriteMask |= (1 << linkedAttrib);
1181b8e80941Smrg         extraAttribs++;
1182b8e80941Smrg      } else if (linkedAttrib == 0xFFFFFFFF) {
1183b8e80941Smrg         inputs[attrib][0] = wrap(VIMMED1(0.0f));
1184b8e80941Smrg         inputs[attrib][1] = wrap(VIMMED1(0.0f));
1185b8e80941Smrg         inputs[attrib][2] = wrap(VIMMED1(0.0f));
1186b8e80941Smrg         inputs[attrib][3] = wrap(VIMMED1(1.0f));
1187b8e80941Smrg         /* If we're reading in color and 2-sided lighting is enabled, we have
1188b8e80941Smrg          * to keep going.
1189b8e80941Smrg          */
1190b8e80941Smrg         if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside)
1191b8e80941Smrg            continue;
1192b8e80941Smrg      } else {
1193b8e80941Smrg         if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1194b8e80941Smrg            swr_fs->constantMask |= 1 << linkedAttrib;
1195b8e80941Smrg         } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
1196b8e80941Smrg            swr_fs->flatConstantMask |= 1 << linkedAttrib;
1197b8e80941Smrg         }
1198b8e80941Smrg      }
1199b8e80941Smrg
1200b8e80941Smrg      unsigned bcolorAttrib = 0xFFFFFFFF;
1201b8e80941Smrg      Value *offset = NULL;
1202b8e80941Smrg      if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) {
1203b8e80941Smrg         bcolorAttrib = locate_linkage(
1204b8e80941Smrg               TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader) - 1;
1205b8e80941Smrg         /* Neither front nor back colors were available. Nothing to load. */
1206b8e80941Smrg         if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF)
1207b8e80941Smrg            continue;
1208b8e80941Smrg         /* If there is no front color, just always use the back color. */
1209b8e80941Smrg         if (linkedAttrib == 0xFFFFFFFF)
1210b8e80941Smrg            linkedAttrib = bcolorAttrib;
1211b8e80941Smrg
1212b8e80941Smrg         if (bcolorAttrib != 0xFFFFFFFF) {
1213b8e80941Smrg            if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1214b8e80941Smrg               swr_fs->constantMask |= 1 << bcolorAttrib;
1215b8e80941Smrg            } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
1216b8e80941Smrg               swr_fs->flatConstantMask |= 1 << bcolorAttrib;
1217b8e80941Smrg            }
1218b8e80941Smrg
1219b8e80941Smrg            unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
1220b8e80941Smrg
1221b8e80941Smrg            if (diff) {
1222b8e80941Smrg               Value *back =
1223b8e80941Smrg                  XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
1224b8e80941Smrg
1225b8e80941Smrg               offset = MUL(back, C(diff));
1226b8e80941Smrg               offset->setName("offset");
1227b8e80941Smrg            }
1228b8e80941Smrg         }
1229b8e80941Smrg      }
1230b8e80941Smrg
1231b8e80941Smrg      for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
1232b8e80941Smrg         if (mask & (1 << channel)) {
1233b8e80941Smrg            Value *indexA = C(linkedAttrib * 12 + channel);
1234b8e80941Smrg            Value *indexB = C(linkedAttrib * 12 + channel + 4);
1235b8e80941Smrg            Value *indexC = C(linkedAttrib * 12 + channel + 8);
1236b8e80941Smrg
1237b8e80941Smrg            if (offset) {
1238b8e80941Smrg               indexA = ADD(indexA, offset);
1239b8e80941Smrg               indexB = ADD(indexB, offset);
1240b8e80941Smrg               indexC = ADD(indexC, offset);
1241b8e80941Smrg            }
1242b8e80941Smrg
1243b8e80941Smrg            Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
1244b8e80941Smrg            Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
1245b8e80941Smrg            Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
1246b8e80941Smrg
1247b8e80941Smrg            if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1248b8e80941Smrg               inputs[attrib][channel] = wrap(va);
1249b8e80941Smrg            } else {
1250b8e80941Smrg               Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
1251b8e80941Smrg
1252b8e80941Smrg               vc = FMUL(vk, vc);
1253b8e80941Smrg
1254b8e80941Smrg               Value *interp = FMUL(va, vi);
1255b8e80941Smrg               Value *interp1 = FMUL(vb, vj);
1256b8e80941Smrg               interp = FADD(interp, interp1);
1257b8e80941Smrg               interp = FADD(interp, vc);
1258b8e80941Smrg               if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
1259b8e80941Smrg                   interpMode == TGSI_INTERPOLATE_COLOR)
1260b8e80941Smrg                  interp = FMUL(interp, vw);
1261b8e80941Smrg               inputs[attrib][channel] = wrap(interp);
1262b8e80941Smrg            }
1263b8e80941Smrg         }
1264b8e80941Smrg      }
1265b8e80941Smrg   }
1266b8e80941Smrg
1267b8e80941Smrg   sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
1268b8e80941Smrg
1269b8e80941Smrg   struct lp_bld_tgsi_system_values system_values;
1270b8e80941Smrg   memset(&system_values, 0, sizeof(system_values));
1271b8e80941Smrg
1272b8e80941Smrg   struct lp_build_mask_context mask;
1273b8e80941Smrg   bool uses_mask = false;
1274b8e80941Smrg
1275b8e80941Smrg   if (swr_fs->info.base.uses_kill ||
1276b8e80941Smrg       key.poly_stipple_enable) {
1277b8e80941Smrg      Value *vActiveMask = NULL;
1278b8e80941Smrg      if (swr_fs->info.base.uses_kill) {
1279b8e80941Smrg         vActiveMask = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
1280b8e80941Smrg      }
1281b8e80941Smrg      if (key.poly_stipple_enable) {
1282b8e80941Smrg         // first get fragment xy coords and clip to stipple bounds
1283b8e80941Smrg         Value *vXf = LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL});
1284b8e80941Smrg         Value *vYf = LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL});
1285b8e80941Smrg         Value *vXu = FP_TO_UI(vXf, mSimdInt32Ty);
1286b8e80941Smrg         Value *vYu = FP_TO_UI(vYf, mSimdInt32Ty);
1287b8e80941Smrg
1288b8e80941Smrg         // stipple pattern is 32x32, which means that one line of stipple
1289b8e80941Smrg         // is stored in one word:
1290b8e80941Smrg         // vXstipple is bit offset inside 32-bit stipple word
1291b8e80941Smrg         // vYstipple is word index is stipple array
1292b8e80941Smrg         Value *vXstipple = AND(vXu, VIMMED1(0x1f)); // & (32-1)
1293b8e80941Smrg         Value *vYstipple = AND(vYu, VIMMED1(0x1f)); // & (32-1)
1294b8e80941Smrg
1295b8e80941Smrg         // grab stipple pattern base address
1296b8e80941Smrg         Value *stipplePtr = GEP(hPrivateData, {0, swr_draw_context_polyStipple, 0});
1297b8e80941Smrg         stipplePtr = BITCAST(stipplePtr, mInt8PtrTy);
1298b8e80941Smrg
1299b8e80941Smrg         // peform a gather to grab stipple words for each lane
1300b8e80941Smrg         Value *vStipple = GATHERDD(VUNDEF_I(), stipplePtr, vYstipple,
1301b8e80941Smrg                                    VIMMED1(0xffffffff), 4);
1302b8e80941Smrg
1303b8e80941Smrg         // create a mask with one bit corresponding to the x stipple
1304b8e80941Smrg         // and AND it with the pattern, to see if we have a bit
1305b8e80941Smrg         Value *vBitMask = LSHR(VIMMED1(0x80000000), vXstipple);
1306b8e80941Smrg         Value *vStippleMask = AND(vStipple, vBitMask);
1307b8e80941Smrg         vStippleMask = ICMP_NE(vStippleMask, VIMMED1(0));
1308b8e80941Smrg         vStippleMask = VMASK(vStippleMask);
1309b8e80941Smrg
1310b8e80941Smrg         if (swr_fs->info.base.uses_kill) {
1311b8e80941Smrg            vActiveMask = AND(vActiveMask, vStippleMask);
1312b8e80941Smrg         } else {
1313b8e80941Smrg            vActiveMask = vStippleMask;
1314b8e80941Smrg         }
1315b8e80941Smrg      }
1316b8e80941Smrg      lp_build_mask_begin(
1317b8e80941Smrg         &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask));
1318b8e80941Smrg      uses_mask = true;
1319b8e80941Smrg   }
1320b8e80941Smrg
1321b8e80941Smrg   lp_build_tgsi_soa(gallivm,
1322b8e80941Smrg                     swr_fs->pipe.tokens,
1323b8e80941Smrg                     lp_type_float_vec(32, 32 * 8),
1324b8e80941Smrg                     uses_mask ? &mask : NULL, // mask
1325b8e80941Smrg                     wrap(consts_ptr),
1326b8e80941Smrg                     wrap(const_sizes_ptr),
1327b8e80941Smrg                     &system_values,
1328b8e80941Smrg                     inputs,
1329b8e80941Smrg                     outputs,
1330b8e80941Smrg                     wrap(hPrivateData),
1331b8e80941Smrg                     NULL, // thread data
1332b8e80941Smrg                     sampler, // sampler
1333b8e80941Smrg                     &swr_fs->info.base,
1334b8e80941Smrg                     NULL); // geometry shader face
1335b8e80941Smrg
1336b8e80941Smrg   sampler->destroy(sampler);
1337b8e80941Smrg
1338b8e80941Smrg   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1339b8e80941Smrg
1340b8e80941Smrg   for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
1341b8e80941Smrg        attrib++) {
1342b8e80941Smrg      switch (swr_fs->info.base.output_semantic_name[attrib]) {
1343b8e80941Smrg      case TGSI_SEMANTIC_POSITION: {
1344b8e80941Smrg         // write z
1345b8e80941Smrg         LLVMValueRef outZ =
1346b8e80941Smrg            LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
1347b8e80941Smrg         STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
1348b8e80941Smrg         break;
1349b8e80941Smrg      }
1350b8e80941Smrg      case TGSI_SEMANTIC_COLOR: {
1351b8e80941Smrg         for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
1352b8e80941Smrg            if (!outputs[attrib][channel])
1353b8e80941Smrg               continue;
1354b8e80941Smrg
1355b8e80941Smrg            LLVMValueRef out =
1356b8e80941Smrg               LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
1357b8e80941Smrg            if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
1358b8e80941Smrg                swr_fs->info.base.output_semantic_index[attrib] == 0) {
1359b8e80941Smrg               for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
1360b8e80941Smrg                  STORE(unwrap(out),
1361b8e80941Smrg                        pPS,
1362b8e80941Smrg                        {0, SWR_PS_CONTEXT_shaded, rt, channel});
1363b8e80941Smrg               }
1364b8e80941Smrg            } else {
1365b8e80941Smrg               STORE(unwrap(out),
1366b8e80941Smrg                     pPS,
1367b8e80941Smrg                     {0,
1368b8e80941Smrg                           SWR_PS_CONTEXT_shaded,
1369b8e80941Smrg                           swr_fs->info.base.output_semantic_index[attrib],
1370b8e80941Smrg                           channel});
1371b8e80941Smrg            }
1372b8e80941Smrg         }
1373b8e80941Smrg         break;
1374b8e80941Smrg      }
1375b8e80941Smrg      default: {
1376b8e80941Smrg         fprintf(stderr,
1377b8e80941Smrg                 "unknown output from FS %s[%d]\n",
1378b8e80941Smrg                 tgsi_semantic_names[swr_fs->info.base
1379b8e80941Smrg                                        .output_semantic_name[attrib]],
1380b8e80941Smrg                 swr_fs->info.base.output_semantic_index[attrib]);
1381b8e80941Smrg         break;
1382b8e80941Smrg      }
1383b8e80941Smrg      }
1384b8e80941Smrg   }
1385b8e80941Smrg
1386b8e80941Smrg   LLVMValueRef mask_result = 0;
1387b8e80941Smrg   if (uses_mask) {
1388b8e80941Smrg      mask_result = lp_build_mask_end(&mask);
1389b8e80941Smrg   }
1390b8e80941Smrg
1391b8e80941Smrg   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1392b8e80941Smrg
1393b8e80941Smrg   if (uses_mask) {
1394b8e80941Smrg      STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
1395b8e80941Smrg   }
1396b8e80941Smrg
1397b8e80941Smrg   RET_VOID();
1398b8e80941Smrg
1399b8e80941Smrg   gallivm_verify_function(gallivm, wrap(pFunction));
1400b8e80941Smrg
1401b8e80941Smrg   gallivm_compile_module(gallivm);
1402b8e80941Smrg
1403b8e80941Smrg   // after the gallivm passes, we have to lower the core's intrinsics
1404b8e80941Smrg   llvm::legacy::FunctionPassManager lowerPass(JM()->mpCurrentModule);
1405b8e80941Smrg   lowerPass.add(createLowerX86Pass(this));
1406b8e80941Smrg   lowerPass.run(*pFunction);
1407b8e80941Smrg
1408b8e80941Smrg   PFN_PIXEL_KERNEL kernel =
1409b8e80941Smrg      (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
1410b8e80941Smrg   debug_printf("frag shader  %p\n", kernel);
1411b8e80941Smrg   assert(kernel && "Error: FragShader = NULL");
1412b8e80941Smrg
1413b8e80941Smrg   JM()->mIsModuleFinalized = true;
1414b8e80941Smrg
1415b8e80941Smrg   return kernel;
1416b8e80941Smrg}
1417b8e80941Smrg
1418b8e80941SmrgPFN_PIXEL_KERNEL
1419b8e80941Smrgswr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
1420b8e80941Smrg{
1421b8e80941Smrg   if (!ctx->fs->pipe.tokens)
1422b8e80941Smrg      return NULL;
1423b8e80941Smrg
1424b8e80941Smrg   BuilderSWR builder(
1425b8e80941Smrg      reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
1426b8e80941Smrg      "FS");
1427b8e80941Smrg   PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);
1428b8e80941Smrg
1429b8e80941Smrg   ctx->fs->map.insert(std::make_pair(key, make_unique<VariantFS>(builder.gallivm, func)));
1430b8e80941Smrg   return func;
1431b8e80941Smrg}
1432