1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Texture sampling -- AoS.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36#include "pipe/p_defines.h"
37#include "pipe/p_state.h"
38#include "util/u_debug.h"
39#include "util/u_dump.h"
40#include "util/u_memory.h"
41#include "util/u_math.h"
42#include "util/format/u_format.h"
43#include "util/u_cpu_detect.h"
44#include "lp_bld_debug.h"
45#include "lp_bld_type.h"
46#include "lp_bld_const.h"
47#include "lp_bld_conv.h"
48#include "lp_bld_arit.h"
49#include "lp_bld_bitarit.h"
50#include "lp_bld_logic.h"
51#include "lp_bld_swizzle.h"
52#include "lp_bld_pack.h"
53#include "lp_bld_flow.h"
54#include "lp_bld_gather.h"
55#include "lp_bld_format.h"
56#include "lp_bld_init.h"
57#include "lp_bld_sample.h"
58#include "lp_bld_sample_aos.h"
59#include "lp_bld_quad.h"
60
61
62/**
63 * Build LLVM code for texture coord wrapping, for nearest filtering,
64 * for scaled integer texcoords.
65 * \param block_length  is the length of the pixel block along the
66 *                      coordinate axis
67 * \param coord  the incoming texcoord (s,t or r) scaled to the texture size
68 * \param coord_f  the incoming texcoord (s,t or r) as float vec
69 * \param length  the texture size along one dimension
70 * \param stride  pixel stride along the coordinate axis (in bytes)
71 * \param offset  the texel offset along the coord axis
72 * \param is_pot  if TRUE, length is a power of two
73 * \param wrap_mode  one of PIPE_TEX_WRAP_x
74 * \param out_offset  byte offset for the wrapped coordinate
75 * \param out_i  resulting sub-block pixel coordinate for coord0
76 */
77static void
78lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
79                                 unsigned block_length,
80                                 LLVMValueRef coord,
81                                 LLVMValueRef coord_f,
82                                 LLVMValueRef length,
83                                 LLVMValueRef stride,
84                                 LLVMValueRef offset,
85                                 boolean is_pot,
86                                 unsigned wrap_mode,
87                                 LLVMValueRef *out_offset,
88                                 LLVMValueRef *out_i)
89{
90   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
91   LLVMBuilderRef builder = bld->gallivm->builder;
92   LLVMValueRef length_minus_one;
93
94   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
95
96   switch(wrap_mode) {
97   case PIPE_TEX_WRAP_REPEAT:
98      if(is_pot)
99         coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
100      else {
101         struct lp_build_context *coord_bld = &bld->coord_bld;
102         LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
103         if (offset) {
104            offset = lp_build_int_to_float(coord_bld, offset);
105            offset = lp_build_div(coord_bld, offset, length_f);
106            coord_f = lp_build_add(coord_bld, coord_f, offset);
107         }
108         coord = lp_build_fract_safe(coord_bld, coord_f);
109         coord = lp_build_mul(coord_bld, coord, length_f);
110         coord = lp_build_itrunc(coord_bld, coord);
111      }
112      break;
113
114   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
115      coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
116      coord = lp_build_min(int_coord_bld, coord, length_minus_one);
117      break;
118
119   case PIPE_TEX_WRAP_CLAMP:
120   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
121   case PIPE_TEX_WRAP_MIRROR_REPEAT:
122   case PIPE_TEX_WRAP_MIRROR_CLAMP:
123   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
124   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
125   default:
126      assert(0);
127   }
128
129   lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
130                                  out_offset, out_i);
131}
132
133
134/**
135 * Helper to compute the first coord and the weight for
136 * linear wrap repeat npot textures
137 */
138static void
139lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld,
140                                      LLVMValueRef coord_f,
141                                      LLVMValueRef length_i,
142                                      LLVMValueRef length_f,
143                                      LLVMValueRef *coord0_i,
144                                      LLVMValueRef *weight_i)
145{
146   struct lp_build_context *coord_bld = &bld->coord_bld;
147   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
148   struct lp_build_context abs_coord_bld;
149   struct lp_type abs_type;
150   LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
151                                                int_coord_bld->one);
152   LLVMValueRef mask, i32_c8, i32_c128, i32_c255;
153
154   /* wrap with normalized floats is just fract */
155   coord_f = lp_build_fract(coord_bld, coord_f);
156   /* mul by size */
157   coord_f = lp_build_mul(coord_bld, coord_f, length_f);
158   /* convert to int, compute lerp weight */
159   coord_f = lp_build_mul_imm(&bld->coord_bld, coord_f, 256);
160
161   /* At this point we don't have any negative numbers so use non-signed
162    * build context which might help on some archs.
163    */
164   abs_type = coord_bld->type;
165   abs_type.sign = 0;
166   lp_build_context_init(&abs_coord_bld, bld->gallivm, abs_type);
167   *coord0_i = lp_build_iround(&abs_coord_bld, coord_f);
168
169   /* subtract 0.5 (add -128) */
170   i32_c128 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, -128);
171   *coord0_i = LLVMBuildAdd(bld->gallivm->builder, *coord0_i, i32_c128, "");
172
173   /* compute fractional part (AND with 0xff) */
174   i32_c255 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 255);
175   *weight_i = LLVMBuildAnd(bld->gallivm->builder, *coord0_i, i32_c255, "");
176
177   /* compute floor (shift right 8) */
178   i32_c8 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 8);
179   *coord0_i = LLVMBuildAShr(bld->gallivm->builder, *coord0_i, i32_c8, "");
180   /*
181    * we avoided the 0.5/length division before the repeat wrap,
182    * now need to fix up edge cases with selects
183    */
184   mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
185                           PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
186   *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
187   /*
188    * We should never get values too large - except if coord was nan or inf,
189    * in which case things go terribly wrong...
190    * Alternatively, could use fract_safe above...
191    */
192   *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one);
193}
194
195
196/**
197 * Build LLVM code for texture coord wrapping, for linear filtering,
198 * for scaled integer texcoords.
199 * \param block_length  is the length of the pixel block along the
200 *                      coordinate axis
201 * \param coord0  the incoming texcoord (s,t or r) scaled to the texture size
202 * \param coord_f  the incoming texcoord (s,t or r) as float vec
203 * \param length  the texture size along one dimension
204 * \param stride  pixel stride along the coordinate axis (in bytes)
205 * \param offset  the texel offset along the coord axis
206 * \param is_pot  if TRUE, length is a power of two
207 * \param wrap_mode  one of PIPE_TEX_WRAP_x
208 * \param offset0  resulting relative offset for coord0
209 * \param offset1  resulting relative offset for coord0 + 1
210 * \param i0  resulting sub-block pixel coordinate for coord0
211 * \param i1  resulting sub-block pixel coordinate for coord0 + 1
212 */
213static void
214lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
215                                unsigned block_length,
216                                LLVMValueRef coord0,
217                                LLVMValueRef *weight_i,
218                                LLVMValueRef coord_f,
219                                LLVMValueRef length,
220                                LLVMValueRef stride,
221                                LLVMValueRef offset,
222                                boolean is_pot,
223                                unsigned wrap_mode,
224                                LLVMValueRef *offset0,
225                                LLVMValueRef *offset1,
226                                LLVMValueRef *i0,
227                                LLVMValueRef *i1)
228{
229   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
230   LLVMBuilderRef builder = bld->gallivm->builder;
231   LLVMValueRef length_minus_one;
232   LLVMValueRef lmask, umask, mask;
233
234   /*
235    * If the pixel block covers more than one pixel then there is no easy
236    * way to calculate offset1 relative to offset0. Instead, compute them
237    * independently. Otherwise, try to compute offset0 and offset1 with
238    * a single stride multiplication.
239    */
240
241   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
242
243   if (block_length != 1) {
244      LLVMValueRef coord1;
245      switch(wrap_mode) {
246      case PIPE_TEX_WRAP_REPEAT:
247         if (is_pot) {
248            coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
249            coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
250            coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
251         }
252         else {
253            LLVMValueRef mask;
254            LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
255            if (offset) {
256               offset = lp_build_int_to_float(&bld->coord_bld, offset);
257               offset = lp_build_div(&bld->coord_bld, offset, length_f);
258               coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
259            }
260            lp_build_coord_repeat_npot_linear_int(bld, coord_f,
261                                                  length, length_f,
262                                                  &coord0, weight_i);
263            mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
264                                    PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
265            coord1 = LLVMBuildAnd(builder,
266                                  lp_build_add(int_coord_bld, coord0,
267                                               int_coord_bld->one),
268                                  mask, "");
269         }
270         break;
271
272      case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
273         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
274         coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
275                                length_minus_one);
276         coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
277                                length_minus_one);
278         break;
279
280      case PIPE_TEX_WRAP_CLAMP:
281      case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
282      case PIPE_TEX_WRAP_MIRROR_REPEAT:
283      case PIPE_TEX_WRAP_MIRROR_CLAMP:
284      case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
285      case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
286      default:
287         assert(0);
288         coord0 = int_coord_bld->zero;
289         coord1 = int_coord_bld->zero;
290         break;
291      }
292      lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
293                                     offset0, i0);
294      lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
295                                     offset1, i1);
296      return;
297   }
298
299   *i0 = int_coord_bld->zero;
300   *i1 = int_coord_bld->zero;
301
302   switch(wrap_mode) {
303   case PIPE_TEX_WRAP_REPEAT:
304      if (is_pot) {
305         coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
306      }
307      else {
308         LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
309         if (offset) {
310            offset = lp_build_int_to_float(&bld->coord_bld, offset);
311            offset = lp_build_div(&bld->coord_bld, offset, length_f);
312            coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
313         }
314         lp_build_coord_repeat_npot_linear_int(bld, coord_f,
315                                               length, length_f,
316                                               &coord0, weight_i);
317      }
318
319      mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
320                              PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
321
322      *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
323      *offset1 = LLVMBuildAnd(builder,
324                              lp_build_add(int_coord_bld, *offset0, stride),
325                              mask, "");
326      break;
327
328   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
329      /* XXX this might be slower than the separate path
330       * on some newer cpus. With sse41 this is 8 instructions vs. 7
331       * - at least on SNB this is almost certainly slower since
332       * min/max are cheaper than selects, and the muls aren't bad.
333       */
334      lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
335                               PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
336      umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
337                               PIPE_FUNC_LESS, coord0, length_minus_one);
338
339      coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
340      coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
341
342      mask = LLVMBuildAnd(builder, lmask, umask, "");
343
344      *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
345      *offset1 = lp_build_add(int_coord_bld,
346                              *offset0,
347                              LLVMBuildAnd(builder, stride, mask, ""));
348      break;
349
350   case PIPE_TEX_WRAP_CLAMP:
351   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
352   case PIPE_TEX_WRAP_MIRROR_REPEAT:
353   case PIPE_TEX_WRAP_MIRROR_CLAMP:
354   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
355   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
356   default:
357      assert(0);
358      *offset0 = int_coord_bld->zero;
359      *offset1 = int_coord_bld->zero;
360      break;
361   }
362}
363
364
365/**
366 * Fetch texels for image with nearest sampling.
367 * Return filtered color as two vectors of 16-bit fixed point values.
368 */
369static void
370lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
371                                    LLVMValueRef data_ptr,
372                                    LLVMValueRef offset,
373                                    LLVMValueRef x_subcoord,
374                                    LLVMValueRef y_subcoord,
375                                    LLVMValueRef *colors)
376{
377   /*
378    * Fetch the pixels as 4 x 32bit (rgba order might differ):
379    *
380    *   rgba0 rgba1 rgba2 rgba3
381    *
382    * bit cast them into 16 x u8
383    *
384    *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
385    *
386    * unpack them into two 8 x i16:
387    *
388    *   r0 g0 b0 a0 r1 g1 b1 a1
389    *   r2 g2 b2 a2 r3 g3 b3 a3
390    *
391    * The higher 8 bits of the resulting elements will be zero.
392    */
393   LLVMBuilderRef builder = bld->gallivm->builder;
394   LLVMValueRef rgba8;
395   struct lp_build_context u8n;
396   LLVMTypeRef u8n_vec_type;
397   struct lp_type fetch_type;
398
399   lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
400   u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
401
402   fetch_type = lp_type_uint(bld->texel_type.width);
403   if (util_format_is_rgba8_variant(bld->format_desc)) {
404      /*
405       * Given the format is a rgba8, just read the pixels as is,
406       * without any swizzling. Swizzling will be done later.
407       */
408      rgba8 = lp_build_gather(bld->gallivm,
409                              bld->texel_type.length,
410                              bld->format_desc->block.bits,
411                              fetch_type,
412                              TRUE,
413                              data_ptr, offset, TRUE);
414
415      rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
416   }
417   else {
418      rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
419                                      bld->format_desc,
420                                      u8n.type,
421                                      TRUE,
422                                      data_ptr, offset,
423                                      x_subcoord,
424                                      y_subcoord,
425                                      bld->cache);
426   }
427
428   *colors = rgba8;
429}
430
431
432/**
433 * Sample a single texture image with nearest sampling.
434 * If sampling a cube texture, r = cube face in [0,5].
435 * Return filtered color as two vectors of 16-bit fixed point values.
436 */
437static void
438lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
439                              LLVMValueRef int_size,
440                              LLVMValueRef row_stride_vec,
441                              LLVMValueRef img_stride_vec,
442                              LLVMValueRef data_ptr,
443                              LLVMValueRef mipoffsets,
444                              LLVMValueRef s,
445                              LLVMValueRef t,
446                              LLVMValueRef r,
447                              const LLVMValueRef *offsets,
448                              LLVMValueRef *colors)
449{
450   const unsigned dims = bld->dims;
451   struct lp_build_context i32;
452   LLVMValueRef width_vec, height_vec, depth_vec;
453   LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
454   LLVMValueRef s_float, t_float = NULL, r_float = NULL;
455   LLVMValueRef x_stride;
456   LLVMValueRef x_offset, offset;
457   LLVMValueRef x_subcoord, y_subcoord = NULL, z_subcoord;
458
459   lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
460
461   lp_build_extract_image_sizes(bld,
462                                &bld->int_size_bld,
463                                bld->int_coord_type,
464                                int_size,
465                                &width_vec,
466                                &height_vec,
467                                &depth_vec);
468
469   s_float = s; t_float = t; r_float = r;
470
471   if (bld->static_sampler_state->normalized_coords) {
472      LLVMValueRef flt_size;
473
474      flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
475
476      lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
477   }
478
479   /* convert float to int */
480   /* For correct rounding, need floor, not truncation here.
481    * Note that in some cases (clamp to edge, no texel offsets) we
482    * could use a non-signed build context which would help archs
483    * greatly which don't have arch rounding.
484    */
485   s_ipart = lp_build_ifloor(&bld->coord_bld, s);
486   if (dims >= 2)
487      t_ipart = lp_build_ifloor(&bld->coord_bld, t);
488   if (dims >= 3)
489      r_ipart = lp_build_ifloor(&bld->coord_bld, r);
490
491   /* add texel offsets */
492   if (offsets[0]) {
493      s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
494      if (dims >= 2) {
495         t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
496         if (dims >= 3) {
497            r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
498         }
499      }
500   }
501
502   /* get pixel, row, image strides */
503   x_stride = lp_build_const_vec(bld->gallivm,
504                                 bld->int_coord_bld.type,
505                                 bld->format_desc->block.bits/8);
506
507   /* Do texcoord wrapping, compute texel offset */
508   lp_build_sample_wrap_nearest_int(bld,
509                                    bld->format_desc->block.width,
510                                    s_ipart, s_float,
511                                    width_vec, x_stride, offsets[0],
512                                    bld->static_texture_state->pot_width,
513                                    bld->static_sampler_state->wrap_s,
514                                    &x_offset, &x_subcoord);
515   offset = x_offset;
516   if (dims >= 2) {
517      LLVMValueRef y_offset;
518      lp_build_sample_wrap_nearest_int(bld,
519                                       bld->format_desc->block.height,
520                                       t_ipart, t_float,
521                                       height_vec, row_stride_vec, offsets[1],
522                                       bld->static_texture_state->pot_height,
523                                       bld->static_sampler_state->wrap_t,
524                                       &y_offset, &y_subcoord);
525      offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
526      if (dims >= 3) {
527         LLVMValueRef z_offset;
528         lp_build_sample_wrap_nearest_int(bld,
529                                          1, /* block length (depth) */
530                                          r_ipart, r_float,
531                                          depth_vec, img_stride_vec, offsets[2],
532                                          bld->static_texture_state->pot_depth,
533                                          bld->static_sampler_state->wrap_r,
534                                          &z_offset, &z_subcoord);
535         offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
536      }
537   }
538   if (has_layer_coord(bld->static_texture_state->target)) {
539      LLVMValueRef z_offset;
540      /* The r coord is the cube face in [0,5] or array layer */
541      z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
542      offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
543   }
544   if (mipoffsets) {
545      offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
546   }
547
548   lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
549                                       x_subcoord, y_subcoord,
550                                       colors);
551}
552
553
554/**
555 * Fetch texels for image with linear sampling.
556 * Return filtered color as two vectors of 16-bit fixed point values.
557 */
558static void
559lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
560                                   LLVMValueRef data_ptr,
561                                   LLVMValueRef offset[2][2][2],
562                                   LLVMValueRef x_subcoord[2],
563                                   LLVMValueRef y_subcoord[2],
564                                   LLVMValueRef s_fpart,
565                                   LLVMValueRef t_fpart,
566                                   LLVMValueRef r_fpart,
567                                   LLVMValueRef *colors)
568{
569   const unsigned dims = bld->dims;
570   LLVMBuilderRef builder = bld->gallivm->builder;
571   struct lp_build_context u8n;
572   LLVMTypeRef u8n_vec_type;
573   LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
574   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
575   LLVMValueRef shuffle;
576   LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
577   LLVMValueRef packed;
578   unsigned i, j, k;
579   unsigned numj, numk;
580
581   lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
582   u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
583
584   /*
585    * Transform 4 x i32 in
586    *
587    *   s_fpart = {s0, s1, s2, s3}
588    *
589    * where each value is between 0 and 0xff,
590    *
591    * into one 16 x i20
592    *
593    *   s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
594    *
595    * and likewise for t_fpart. There is no risk of loosing precision here
596    * since the fractional parts only use the lower 8bits.
597    */
598   s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
599   if (dims >= 2)
600      t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
601   if (dims >= 3)
602      r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
603
604   for (j = 0; j < u8n.type.length; j += 4) {
605#if UTIL_ARCH_LITTLE_ENDIAN
606      unsigned subindex = 0;
607#else
608      unsigned subindex = 3;
609#endif
610      LLVMValueRef index;
611
612      index = LLVMConstInt(elem_type, j + subindex, 0);
613      for (i = 0; i < 4; ++i)
614         shuffles[j + i] = index;
615   }
616
617   shuffle = LLVMConstVector(shuffles, u8n.type.length);
618
619   s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
620                                    shuffle, "");
621   if (dims >= 2) {
622      t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
623                                       shuffle, "");
624   }
625   if (dims >= 3) {
626      r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
627                                       shuffle, "");
628   }
629
630   /*
631    * Fetch the pixels as 4 x 32bit (rgba order might differ):
632    *
633    *   rgba0 rgba1 rgba2 rgba3
634    *
635    * bit cast them into 16 x u8
636    *
637    *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
638    *
639    * unpack them into two 8 x i16:
640    *
641    *   r0 g0 b0 a0 r1 g1 b1 a1
642    *   r2 g2 b2 a2 r3 g3 b3 a3
643    *
644    * The higher 8 bits of the resulting elements will be zero.
645    */
646   numj = 1 + (dims >= 2);
647   numk = 1 + (dims >= 3);
648
649   for (k = 0; k < numk; k++) {
650      for (j = 0; j < numj; j++) {
651         for (i = 0; i < 2; i++) {
652            LLVMValueRef rgba8;
653
654            if (util_format_is_rgba8_variant(bld->format_desc)) {
655               struct lp_type fetch_type;
656               /*
657                * Given the format is a rgba8, just read the pixels as is,
658                * without any swizzling. Swizzling will be done later.
659                */
660               fetch_type = lp_type_uint(bld->texel_type.width);
661               rgba8 = lp_build_gather(bld->gallivm,
662                                       bld->texel_type.length,
663                                       bld->format_desc->block.bits,
664                                       fetch_type,
665                                       TRUE,
666                                       data_ptr, offset[k][j][i], TRUE);
667
668               rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
669            }
670            else {
671               rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
672                                               bld->format_desc,
673                                               u8n.type,
674                                               TRUE,
675                                               data_ptr, offset[k][j][i],
676                                               x_subcoord[i],
677                                               y_subcoord[j],
678                                               bld->cache);
679            }
680
681            neighbors[k][j][i] = rgba8;
682         }
683      }
684   }
685
686   /*
687    * Linear interpolation with 8.8 fixed point.
688    */
689
690   /* general 1/2/3-D lerping */
691   if (dims == 1) {
692      lp_build_reduce_filter(&u8n,
693                              bld->static_sampler_state->reduction_mode,
694                              LP_BLD_LERP_PRESCALED_WEIGHTS,
695                              1,
696                              s_fpart,
697                              &neighbors[0][0][0],
698                              &neighbors[0][0][1],
699                              &packed);
700   } else if (dims == 2) {
701      /* 2-D lerp */
702      lp_build_reduce_filter_2d(&u8n,
703                                 bld->static_sampler_state->reduction_mode,
704                                 LP_BLD_LERP_PRESCALED_WEIGHTS,
705                                 1,
706                                 s_fpart, t_fpart,
707                                 &neighbors[0][0][0],
708                                 &neighbors[0][0][1],
709                                 &neighbors[0][1][0],
710                                 &neighbors[0][1][1],
711                                 &packed);
712   } else {
713      /* 3-D lerp */
714      assert(dims == 3);
715      lp_build_reduce_filter_3d(&u8n,
716                                 bld->static_sampler_state->reduction_mode,
717                                 LP_BLD_LERP_PRESCALED_WEIGHTS,
718                                 1,
719                                 s_fpart, t_fpart, r_fpart,
720                                 &neighbors[0][0][0],
721                                 &neighbors[0][0][1],
722                                 &neighbors[0][1][0],
723                                 &neighbors[0][1][1],
724                                 &neighbors[1][0][0],
725                                 &neighbors[1][0][1],
726                                 &neighbors[1][1][0],
727                                 &neighbors[1][1][1],
728                                 &packed);
729   }
730
731   *colors = packed;
732}
733
734/**
735 * Sample a single texture image with (bi-)(tri-)linear sampling.
736 * Return filtered color as two vectors of 16-bit fixed point values.
737 */
738static void
739lp_build_sample_image_linear(struct lp_build_sample_context *bld,
740                             LLVMValueRef int_size,
741                             LLVMValueRef row_stride_vec,
742                             LLVMValueRef img_stride_vec,
743                             LLVMValueRef data_ptr,
744                             LLVMValueRef mipoffsets,
745                             LLVMValueRef s,
746                             LLVMValueRef t,
747                             LLVMValueRef r,
748                             const LLVMValueRef *offsets,
749                             LLVMValueRef *colors)
750{
751   const unsigned dims = bld->dims;
752   LLVMBuilderRef builder = bld->gallivm->builder;
753   struct lp_build_context i32;
754   LLVMValueRef i32_c8, i32_c128, i32_c255;
755   LLVMValueRef width_vec, height_vec, depth_vec;
756   LLVMValueRef s_ipart, s_fpart, s_float;
757   LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
758   LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
759   LLVMValueRef x_stride, y_stride, z_stride;
760   LLVMValueRef x_offset0, x_offset1;
761   LLVMValueRef y_offset0, y_offset1;
762   LLVMValueRef z_offset0, z_offset1;
763   LLVMValueRef offset[2][2][2]; /* [z][y][x] */
764   LLVMValueRef x_subcoord[2], y_subcoord[2] = {NULL, NULL}, z_subcoord[2];
765   unsigned x, y, z;
766
767   lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
768
769   lp_build_extract_image_sizes(bld,
770                                &bld->int_size_bld,
771                                bld->int_coord_type,
772                                int_size,
773                                &width_vec,
774                                &height_vec,
775                                &depth_vec);
776
777   s_float = s; t_float = t; r_float = r;
778
779   if (bld->static_sampler_state->normalized_coords) {
780      LLVMValueRef scaled_size;
781      LLVMValueRef flt_size;
782
783      /* scale size by 256 (8 fractional bits) */
784      scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
785
786      flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
787
788      lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
789   }
790   else {
791      /* scale coords by 256 (8 fractional bits) */
792      s = lp_build_mul_imm(&bld->coord_bld, s, 256);
793      if (dims >= 2)
794         t = lp_build_mul_imm(&bld->coord_bld, t, 256);
795      if (dims >= 3)
796         r = lp_build_mul_imm(&bld->coord_bld, r, 256);
797   }
798
799   /* convert float to int */
800   /* For correct rounding, need round to nearest, not truncation here.
801    * Note that in some cases (clamp to edge, no texel offsets) we
802    * could use a non-signed build context which would help archs which
803    * don't have fptosi intrinsic with nearest rounding implemented.
804    */
805   s = lp_build_iround(&bld->coord_bld, s);
806   if (dims >= 2)
807      t = lp_build_iround(&bld->coord_bld, t);
808   if (dims >= 3)
809      r = lp_build_iround(&bld->coord_bld, r);
810
811   /* subtract 0.5 (add -128) */
812   i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
813
814   s = LLVMBuildAdd(builder, s, i32_c128, "");
815   if (dims >= 2) {
816      t = LLVMBuildAdd(builder, t, i32_c128, "");
817   }
818   if (dims >= 3) {
819      r = LLVMBuildAdd(builder, r, i32_c128, "");
820   }
821
822   /* compute floor (shift right 8) */
823   i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
824   s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
825   if (dims >= 2)
826      t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
827   if (dims >= 3)
828      r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
829
830   /* add texel offsets */
831   if (offsets[0]) {
832      s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
833      if (dims >= 2) {
834         t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
835         if (dims >= 3) {
836            r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
837         }
838      }
839   }
840
841   /* compute fractional part (AND with 0xff) */
842   i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
843   s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
844   if (dims >= 2)
845      t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
846   if (dims >= 3)
847      r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
848
849   /* get pixel, row and image strides */
850   x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
851                                 bld->format_desc->block.bits/8);
852   y_stride = row_stride_vec;
853   z_stride = img_stride_vec;
854
855   /* do texcoord wrapping and compute texel offsets */
856   lp_build_sample_wrap_linear_int(bld,
857                                   bld->format_desc->block.width,
858                                   s_ipart, &s_fpart, s_float,
859                                   width_vec, x_stride, offsets[0],
860                                   bld->static_texture_state->pot_width,
861                                   bld->static_sampler_state->wrap_s,
862                                   &x_offset0, &x_offset1,
863                                   &x_subcoord[0], &x_subcoord[1]);
864
865   /* add potential cube/array/mip offsets now as they are constant per pixel */
866   if (has_layer_coord(bld->static_texture_state->target)) {
867      LLVMValueRef z_offset;
868      z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
869      /* The r coord is the cube face in [0,5] or array layer */
870      x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
871      x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
872   }
873   if (mipoffsets) {
874      x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
875      x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
876   }
877
878   for (z = 0; z < 2; z++) {
879      for (y = 0; y < 2; y++) {
880         offset[z][y][0] = x_offset0;
881         offset[z][y][1] = x_offset1;
882      }
883   }
884
885   if (dims >= 2) {
886      lp_build_sample_wrap_linear_int(bld,
887                                      bld->format_desc->block.height,
888                                      t_ipart, &t_fpart, t_float,
889                                      height_vec, y_stride, offsets[1],
890                                      bld->static_texture_state->pot_height,
891                                      bld->static_sampler_state->wrap_t,
892                                      &y_offset0, &y_offset1,
893                                      &y_subcoord[0], &y_subcoord[1]);
894
895      for (z = 0; z < 2; z++) {
896         for (x = 0; x < 2; x++) {
897            offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
898                                           offset[z][0][x], y_offset0);
899            offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
900                                           offset[z][1][x], y_offset1);
901         }
902      }
903   }
904
905   if (dims >= 3) {
906      lp_build_sample_wrap_linear_int(bld,
907                                      1, /* block length (depth) */
908                                      r_ipart, &r_fpart, r_float,
909                                      depth_vec, z_stride, offsets[2],
910                                      bld->static_texture_state->pot_depth,
911                                      bld->static_sampler_state->wrap_r,
912                                      &z_offset0, &z_offset1,
913                                      &z_subcoord[0], &z_subcoord[1]);
914      for (y = 0; y < 2; y++) {
915         for (x = 0; x < 2; x++) {
916            offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
917                                           offset[0][y][x], z_offset0);
918            offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
919                                           offset[1][y][x], z_offset1);
920         }
921      }
922   }
923
924   lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
925                                      x_subcoord, y_subcoord,
926                                      s_fpart, t_fpart, r_fpart,
927                                      colors);
928}
929
930
931/**
932 * Sample the texture/mipmap using given image filter and mip filter.
933 * data0_ptr and data1_ptr point to the two mipmap levels to sample
934 * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
935 * If we're using nearest miplevel sampling the '1' values will be null/unused.
936 */
937static void
938lp_build_sample_mipmap(struct lp_build_sample_context *bld,
939                       unsigned img_filter,
940                       unsigned mip_filter,
941                       LLVMValueRef s,
942                       LLVMValueRef t,
943                       LLVMValueRef r,
944                       const LLVMValueRef *offsets,
945                       LLVMValueRef ilevel0,
946                       LLVMValueRef ilevel1,
947                       LLVMValueRef lod_fpart,
948                       LLVMValueRef colors_var)
949{
950   LLVMBuilderRef builder = bld->gallivm->builder;
951   LLVMValueRef size0;
952   LLVMValueRef size1;
953   LLVMValueRef row_stride0_vec = NULL;
954   LLVMValueRef row_stride1_vec = NULL;
955   LLVMValueRef img_stride0_vec = NULL;
956   LLVMValueRef img_stride1_vec = NULL;
957   LLVMValueRef data_ptr0;
958   LLVMValueRef data_ptr1;
959   LLVMValueRef mipoff0 = NULL;
960   LLVMValueRef mipoff1 = NULL;
961   LLVMValueRef colors0;
962   LLVMValueRef colors1;
963
964   /* sample the first mipmap level */
965   lp_build_mipmap_level_sizes(bld, ilevel0,
966                               &size0,
967                               &row_stride0_vec, &img_stride0_vec);
968   if (bld->num_mips == 1) {
969      data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
970   }
971   else {
972      /* This path should work for num_lods 1 too but slightly less efficient */
973      data_ptr0 = bld->base_ptr;
974      mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
975   }
976
977   if (img_filter == PIPE_TEX_FILTER_NEAREST) {
978      lp_build_sample_image_nearest(bld,
979                                    size0,
980                                    row_stride0_vec, img_stride0_vec,
981                                    data_ptr0, mipoff0, s, t, r, offsets,
982                                    &colors0);
983   }
984   else {
985      assert(img_filter == PIPE_TEX_FILTER_LINEAR);
986      lp_build_sample_image_linear(bld,
987                                   size0,
988                                   row_stride0_vec, img_stride0_vec,
989                                   data_ptr0, mipoff0, s, t, r, offsets,
990                                   &colors0);
991   }
992
993   /* Store the first level's colors in the output variables */
994   LLVMBuildStore(builder, colors0, colors_var);
995
996   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
997      LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
998                                                     bld->lodf_bld.type, 256.0);
999      LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
1000      struct lp_build_if_state if_ctx;
1001      LLVMValueRef need_lerp;
1002      unsigned num_quads = bld->coord_bld.type.length / 4;
1003      unsigned i;
1004
1005      lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1006      lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1007
1008      /* need_lerp = lod_fpart > 0 */
1009      if (bld->num_lods == 1) {
1010         need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1011                                   lod_fpart, bld->lodi_bld.zero,
1012                                   "need_lerp");
1013      }
1014      else {
1015         /*
1016          * We'll do mip filtering if any of the quads need it.
1017          * It might be better to split the vectors here and only fetch/filter
1018          * quads which need it.
1019          */
1020         /*
1021          * We need to clamp lod_fpart here since we can get negative
1022          * values which would screw up filtering if not all
1023          * lod_fpart values have same sign.
1024          * We can however then skip the greater than comparison.
1025          */
1026         lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
1027                                  bld->lodi_bld.zero);
1028         need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
1029      }
1030
1031      lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1032      {
1033         struct lp_build_context u8n_bld;
1034
1035         lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1036
1037         /* sample the second mipmap level */
1038         lp_build_mipmap_level_sizes(bld, ilevel1,
1039                                     &size1,
1040                                     &row_stride1_vec, &img_stride1_vec);
1041         if (bld->num_mips == 1) {
1042            data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1043         }
1044         else {
1045            data_ptr1 = bld->base_ptr;
1046            mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1047         }
1048
1049         if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1050            lp_build_sample_image_nearest(bld,
1051                                          size1,
1052                                          row_stride1_vec, img_stride1_vec,
1053                                          data_ptr1, mipoff1, s, t, r, offsets,
1054                                          &colors1);
1055         }
1056         else {
1057            lp_build_sample_image_linear(bld,
1058                                         size1,
1059                                         row_stride1_vec, img_stride1_vec,
1060                                         data_ptr1, mipoff1, s, t, r, offsets,
1061                                         &colors1);
1062         }
1063
1064         /* interpolate samples from the two mipmap levels */
1065
1066         if (num_quads == 1 && bld->num_lods == 1) {
1067            lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
1068            lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
1069         }
1070         else {
1071            unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
1072            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
1073            LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
1074
1075            /* Take the LSB of lod_fpart */
1076            lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
1077
1078            /* Broadcast each lod weight into their respective channels */
1079            for (i = 0; i < u8n_bld.type.length; ++i) {
1080               shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
1081            }
1082            lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
1083                                               LLVMConstVector(shuffle, u8n_bld.type.length), "");
1084         }
1085
1086         lp_build_reduce_filter(&u8n_bld,
1087                                bld->static_sampler_state->reduction_mode,
1088                                LP_BLD_LERP_PRESCALED_WEIGHTS,
1089                                1,
1090                                lod_fpart,
1091                                &colors0,
1092                                &colors1,
1093                                &colors0);
1094
1095         LLVMBuildStore(builder, colors0, colors_var);
1096      }
1097      lp_build_endif(&if_ctx);
1098   }
1099}
1100
1101
1102
1103/**
1104 * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
1105 * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
1106 * but only limited texture coord wrap modes.
1107 */
1108void
1109lp_build_sample_aos(struct lp_build_sample_context *bld,
1110                    unsigned sampler_unit,
1111                    LLVMValueRef s,
1112                    LLVMValueRef t,
1113                    LLVMValueRef r,
1114                    const LLVMValueRef *offsets,
1115                    LLVMValueRef lod_positive,
1116                    LLVMValueRef lod_fpart,
1117                    LLVMValueRef ilevel0,
1118                    LLVMValueRef ilevel1,
1119                    LLVMValueRef texel_out[4])
1120{
1121   LLVMBuilderRef builder = bld->gallivm->builder;
1122   const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1123   const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1124   const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1125   const unsigned dims = bld->dims;
1126   LLVMValueRef packed_var, packed;
1127   LLVMValueRef unswizzled[4];
1128   struct lp_build_context u8n_bld;
1129
1130   /* we only support the common/simple wrap modes at this time */
1131   assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
1132   if (dims >= 2)
1133      assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
1134   if (dims >= 3)
1135      assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
1136
1137
1138   /* make 8-bit unorm builder context */
1139   lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1140
1141   /*
1142    * Get/interpolate texture colors.
1143    */
1144
1145   packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
1146
1147   if (min_filter == mag_filter) {
1148      /* no need to distinguish between minification and magnification */
1149      lp_build_sample_mipmap(bld,
1150                             min_filter, mip_filter,
1151                             s, t, r, offsets,
1152                             ilevel0, ilevel1, lod_fpart,
1153                             packed_var);
1154   }
1155   else {
1156      /* Emit conditional to choose min image filter or mag image filter
1157       * depending on the lod being > 0 or <= 0, respectively.
1158       */
1159      struct lp_build_if_state if_ctx;
1160
1161      /*
1162       * FIXME this should take all lods into account, if some are min
1163       * some max probably could hack up the weights in the linear
1164       * path with selects to work for nearest.
1165       */
1166      if (bld->num_lods > 1)
1167         lod_positive = LLVMBuildExtractElement(builder, lod_positive,
1168                                                lp_build_const_int32(bld->gallivm, 0), "");
1169
1170      lod_positive = LLVMBuildTrunc(builder, lod_positive,
1171                                    LLVMInt1TypeInContext(bld->gallivm->context), "");
1172
1173      lp_build_if(&if_ctx, bld->gallivm, lod_positive);
1174      {
1175         /* Use the minification filter */
1176         lp_build_sample_mipmap(bld,
1177                                min_filter, mip_filter,
1178                                s, t, r, offsets,
1179                                ilevel0, ilevel1, lod_fpart,
1180                                packed_var);
1181      }
1182      lp_build_else(&if_ctx);
1183      {
1184         /* Use the magnification filter */
1185         lp_build_sample_mipmap(bld,
1186                                mag_filter, PIPE_TEX_MIPFILTER_NONE,
1187                                s, t, r, offsets,
1188                                ilevel0, NULL, NULL,
1189                                packed_var);
1190      }
1191      lp_build_endif(&if_ctx);
1192   }
1193
1194   packed = LLVMBuildLoad(builder, packed_var, "");
1195
1196   /*
1197    * Convert to SoA and swizzle.
1198    */
1199   lp_build_rgba8_to_fi32_soa(bld->gallivm,
1200                             bld->texel_type,
1201                             packed, unswizzled);
1202
1203   if (util_format_is_rgba8_variant(bld->format_desc)) {
1204      lp_build_format_swizzle_soa(bld->format_desc,
1205                                  &bld->texel_bld,
1206                                  unswizzled, texel_out);
1207   }
1208   else {
1209      texel_out[0] = unswizzled[0];
1210      texel_out[1] = unswizzled[1];
1211      texel_out[2] = unswizzled[2];
1212      texel_out[3] = unswizzled[3];
1213   }
1214}
1215