1/************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Texture sampling -- AoS. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 * @author Brian Paul <brianp@vmware.com> 34 */ 35 36#include "pipe/p_defines.h" 37#include "pipe/p_state.h" 38#include "util/u_debug.h" 39#include "util/u_dump.h" 40#include "util/u_memory.h" 41#include "util/u_math.h" 42#include "util/format/u_format.h" 43#include "util/u_cpu_detect.h" 44#include "lp_bld_debug.h" 45#include "lp_bld_type.h" 46#include "lp_bld_const.h" 47#include "lp_bld_conv.h" 48#include "lp_bld_arit.h" 49#include "lp_bld_bitarit.h" 50#include "lp_bld_logic.h" 51#include "lp_bld_swizzle.h" 52#include "lp_bld_pack.h" 53#include "lp_bld_flow.h" 54#include "lp_bld_gather.h" 55#include "lp_bld_format.h" 56#include "lp_bld_init.h" 57#include "lp_bld_sample.h" 58#include "lp_bld_sample_aos.h" 59#include "lp_bld_quad.h" 60 61 62/** 63 * Build LLVM code for texture coord wrapping, for nearest filtering, 64 * for scaled integer texcoords. 65 * \param block_length is the length of the pixel block along the 66 * coordinate axis 67 * \param coord the incoming texcoord (s,t or r) scaled to the texture size 68 * \param coord_f the incoming texcoord (s,t or r) as float vec 69 * \param length the texture size along one dimension 70 * \param stride pixel stride along the coordinate axis (in bytes) 71 * \param offset the texel offset along the coord axis 72 * \param is_pot if TRUE, length is a power of two 73 * \param wrap_mode one of PIPE_TEX_WRAP_x 74 * \param out_offset byte offset for the wrapped coordinate 75 * \param out_i resulting sub-block pixel coordinate for coord0 76 */ 77static void 78lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, 79 unsigned block_length, 80 LLVMValueRef coord, 81 LLVMValueRef coord_f, 82 LLVMValueRef length, 83 LLVMValueRef stride, 84 LLVMValueRef offset, 85 boolean is_pot, 86 unsigned wrap_mode, 87 LLVMValueRef *out_offset, 88 LLVMValueRef *out_i) 89{ 90 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 91 LLVMBuilderRef builder = bld->gallivm->builder; 92 LLVMValueRef length_minus_one; 93 94 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 95 96 switch(wrap_mode) { 97 case PIPE_TEX_WRAP_REPEAT: 98 if(is_pot) 99 coord = LLVMBuildAnd(builder, coord, length_minus_one, ""); 100 else { 101 struct lp_build_context *coord_bld = &bld->coord_bld; 102 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); 103 if (offset) { 104 offset = lp_build_int_to_float(coord_bld, offset); 105 offset = lp_build_div(coord_bld, offset, length_f); 106 coord_f = lp_build_add(coord_bld, coord_f, offset); 107 } 108 coord = lp_build_fract_safe(coord_bld, coord_f); 109 coord = lp_build_mul(coord_bld, coord, length_f); 110 coord = lp_build_itrunc(coord_bld, coord); 111 } 112 break; 113 114 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 115 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); 116 coord = lp_build_min(int_coord_bld, coord, length_minus_one); 117 break; 118 119 case PIPE_TEX_WRAP_CLAMP: 120 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 121 case PIPE_TEX_WRAP_MIRROR_REPEAT: 122 case PIPE_TEX_WRAP_MIRROR_CLAMP: 123 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 124 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 125 default: 126 assert(0); 127 } 128 129 lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, 130 out_offset, out_i); 131} 132 133 134/** 135 * Helper to compute the first coord and the weight for 136 * linear wrap repeat npot textures 137 */ 138static void 139lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld, 140 LLVMValueRef coord_f, 141 LLVMValueRef length_i, 142 LLVMValueRef length_f, 143 LLVMValueRef *coord0_i, 144 LLVMValueRef *weight_i) 145{ 146 struct lp_build_context *coord_bld = &bld->coord_bld; 147 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 148 struct lp_build_context abs_coord_bld; 149 struct lp_type abs_type; 150 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i, 151 int_coord_bld->one); 152 LLVMValueRef mask, i32_c8, i32_c128, i32_c255; 153 154 /* wrap with normalized floats is just fract */ 155 coord_f = lp_build_fract(coord_bld, coord_f); 156 /* mul by size */ 157 coord_f = lp_build_mul(coord_bld, coord_f, length_f); 158 /* convert to int, compute lerp weight */ 159 coord_f = lp_build_mul_imm(&bld->coord_bld, coord_f, 256); 160 161 /* At this point we don't have any negative numbers so use non-signed 162 * build context which might help on some archs. 163 */ 164 abs_type = coord_bld->type; 165 abs_type.sign = 0; 166 lp_build_context_init(&abs_coord_bld, bld->gallivm, abs_type); 167 *coord0_i = lp_build_iround(&abs_coord_bld, coord_f); 168 169 /* subtract 0.5 (add -128) */ 170 i32_c128 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, -128); 171 *coord0_i = LLVMBuildAdd(bld->gallivm->builder, *coord0_i, i32_c128, ""); 172 173 /* compute fractional part (AND with 0xff) */ 174 i32_c255 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 255); 175 *weight_i = LLVMBuildAnd(bld->gallivm->builder, *coord0_i, i32_c255, ""); 176 177 /* compute floor (shift right 8) */ 178 i32_c8 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 8); 179 *coord0_i = LLVMBuildAShr(bld->gallivm->builder, *coord0_i, i32_c8, ""); 180 /* 181 * we avoided the 0.5/length division before the repeat wrap, 182 * now need to fix up edge cases with selects 183 */ 184 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, 185 PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero); 186 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i); 187 /* 188 * We should never get values too large - except if coord was nan or inf, 189 * in which case things go terribly wrong... 190 * Alternatively, could use fract_safe above... 191 */ 192 *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one); 193} 194 195 196/** 197 * Build LLVM code for texture coord wrapping, for linear filtering, 198 * for scaled integer texcoords. 199 * \param block_length is the length of the pixel block along the 200 * coordinate axis 201 * \param coord0 the incoming texcoord (s,t or r) scaled to the texture size 202 * \param coord_f the incoming texcoord (s,t or r) as float vec 203 * \param length the texture size along one dimension 204 * \param stride pixel stride along the coordinate axis (in bytes) 205 * \param offset the texel offset along the coord axis 206 * \param is_pot if TRUE, length is a power of two 207 * \param wrap_mode one of PIPE_TEX_WRAP_x 208 * \param offset0 resulting relative offset for coord0 209 * \param offset1 resulting relative offset for coord0 + 1 210 * \param i0 resulting sub-block pixel coordinate for coord0 211 * \param i1 resulting sub-block pixel coordinate for coord0 + 1 212 */ 213static void 214lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, 215 unsigned block_length, 216 LLVMValueRef coord0, 217 LLVMValueRef *weight_i, 218 LLVMValueRef coord_f, 219 LLVMValueRef length, 220 LLVMValueRef stride, 221 LLVMValueRef offset, 222 boolean is_pot, 223 unsigned wrap_mode, 224 LLVMValueRef *offset0, 225 LLVMValueRef *offset1, 226 LLVMValueRef *i0, 227 LLVMValueRef *i1) 228{ 229 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 230 LLVMBuilderRef builder = bld->gallivm->builder; 231 LLVMValueRef length_minus_one; 232 LLVMValueRef lmask, umask, mask; 233 234 /* 235 * If the pixel block covers more than one pixel then there is no easy 236 * way to calculate offset1 relative to offset0. Instead, compute them 237 * independently. Otherwise, try to compute offset0 and offset1 with 238 * a single stride multiplication. 239 */ 240 241 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 242 243 if (block_length != 1) { 244 LLVMValueRef coord1; 245 switch(wrap_mode) { 246 case PIPE_TEX_WRAP_REPEAT: 247 if (is_pot) { 248 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 249 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); 250 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, ""); 251 } 252 else { 253 LLVMValueRef mask; 254 LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length); 255 if (offset) { 256 offset = lp_build_int_to_float(&bld->coord_bld, offset); 257 offset = lp_build_div(&bld->coord_bld, offset, length_f); 258 coord_f = lp_build_add(&bld->coord_bld, coord_f, offset); 259 } 260 lp_build_coord_repeat_npot_linear_int(bld, coord_f, 261 length, length_f, 262 &coord0, weight_i); 263 mask = lp_build_compare(bld->gallivm, int_coord_bld->type, 264 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); 265 coord1 = LLVMBuildAnd(builder, 266 lp_build_add(int_coord_bld, coord0, 267 int_coord_bld->one), 268 mask, ""); 269 } 270 break; 271 272 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 273 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 274 coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero, 275 length_minus_one); 276 coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero, 277 length_minus_one); 278 break; 279 280 case PIPE_TEX_WRAP_CLAMP: 281 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 282 case PIPE_TEX_WRAP_MIRROR_REPEAT: 283 case PIPE_TEX_WRAP_MIRROR_CLAMP: 284 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 285 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 286 default: 287 assert(0); 288 coord0 = int_coord_bld->zero; 289 coord1 = int_coord_bld->zero; 290 break; 291 } 292 lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride, 293 offset0, i0); 294 lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride, 295 offset1, i1); 296 return; 297 } 298 299 *i0 = int_coord_bld->zero; 300 *i1 = int_coord_bld->zero; 301 302 switch(wrap_mode) { 303 case PIPE_TEX_WRAP_REPEAT: 304 if (is_pot) { 305 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); 306 } 307 else { 308 LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length); 309 if (offset) { 310 offset = lp_build_int_to_float(&bld->coord_bld, offset); 311 offset = lp_build_div(&bld->coord_bld, offset, length_f); 312 coord_f = lp_build_add(&bld->coord_bld, coord_f, offset); 313 } 314 lp_build_coord_repeat_npot_linear_int(bld, coord_f, 315 length, length_f, 316 &coord0, weight_i); 317 } 318 319 mask = lp_build_compare(bld->gallivm, int_coord_bld->type, 320 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); 321 322 *offset0 = lp_build_mul(int_coord_bld, coord0, stride); 323 *offset1 = LLVMBuildAnd(builder, 324 lp_build_add(int_coord_bld, *offset0, stride), 325 mask, ""); 326 break; 327 328 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 329 /* XXX this might be slower than the separate path 330 * on some newer cpus. With sse41 this is 8 instructions vs. 7 331 * - at least on SNB this is almost certainly slower since 332 * min/max are cheaper than selects, and the muls aren't bad. 333 */ 334 lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, 335 PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); 336 umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, 337 PIPE_FUNC_LESS, coord0, length_minus_one); 338 339 coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); 340 coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); 341 342 mask = LLVMBuildAnd(builder, lmask, umask, ""); 343 344 *offset0 = lp_build_mul(int_coord_bld, coord0, stride); 345 *offset1 = lp_build_add(int_coord_bld, 346 *offset0, 347 LLVMBuildAnd(builder, stride, mask, "")); 348 break; 349 350 case PIPE_TEX_WRAP_CLAMP: 351 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 352 case PIPE_TEX_WRAP_MIRROR_REPEAT: 353 case PIPE_TEX_WRAP_MIRROR_CLAMP: 354 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 355 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 356 default: 357 assert(0); 358 *offset0 = int_coord_bld->zero; 359 *offset1 = int_coord_bld->zero; 360 break; 361 } 362} 363 364 365/** 366 * Fetch texels for image with nearest sampling. 367 * Return filtered color as two vectors of 16-bit fixed point values. 368 */ 369static void 370lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld, 371 LLVMValueRef data_ptr, 372 LLVMValueRef offset, 373 LLVMValueRef x_subcoord, 374 LLVMValueRef y_subcoord, 375 LLVMValueRef *colors) 376{ 377 /* 378 * Fetch the pixels as 4 x 32bit (rgba order might differ): 379 * 380 * rgba0 rgba1 rgba2 rgba3 381 * 382 * bit cast them into 16 x u8 383 * 384 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 385 * 386 * unpack them into two 8 x i16: 387 * 388 * r0 g0 b0 a0 r1 g1 b1 a1 389 * r2 g2 b2 a2 r3 g3 b3 a3 390 * 391 * The higher 8 bits of the resulting elements will be zero. 392 */ 393 LLVMBuilderRef builder = bld->gallivm->builder; 394 LLVMValueRef rgba8; 395 struct lp_build_context u8n; 396 LLVMTypeRef u8n_vec_type; 397 struct lp_type fetch_type; 398 399 lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width)); 400 u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); 401 402 fetch_type = lp_type_uint(bld->texel_type.width); 403 if (util_format_is_rgba8_variant(bld->format_desc)) { 404 /* 405 * Given the format is a rgba8, just read the pixels as is, 406 * without any swizzling. Swizzling will be done later. 407 */ 408 rgba8 = lp_build_gather(bld->gallivm, 409 bld->texel_type.length, 410 bld->format_desc->block.bits, 411 fetch_type, 412 TRUE, 413 data_ptr, offset, TRUE); 414 415 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); 416 } 417 else { 418 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, 419 bld->format_desc, 420 u8n.type, 421 TRUE, 422 data_ptr, offset, 423 x_subcoord, 424 y_subcoord, 425 bld->cache); 426 } 427 428 *colors = rgba8; 429} 430 431 432/** 433 * Sample a single texture image with nearest sampling. 434 * If sampling a cube texture, r = cube face in [0,5]. 435 * Return filtered color as two vectors of 16-bit fixed point values. 436 */ 437static void 438lp_build_sample_image_nearest(struct lp_build_sample_context *bld, 439 LLVMValueRef int_size, 440 LLVMValueRef row_stride_vec, 441 LLVMValueRef img_stride_vec, 442 LLVMValueRef data_ptr, 443 LLVMValueRef mipoffsets, 444 LLVMValueRef s, 445 LLVMValueRef t, 446 LLVMValueRef r, 447 const LLVMValueRef *offsets, 448 LLVMValueRef *colors) 449{ 450 const unsigned dims = bld->dims; 451 struct lp_build_context i32; 452 LLVMValueRef width_vec, height_vec, depth_vec; 453 LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL; 454 LLVMValueRef s_float, t_float = NULL, r_float = NULL; 455 LLVMValueRef x_stride; 456 LLVMValueRef x_offset, offset; 457 LLVMValueRef x_subcoord, y_subcoord = NULL, z_subcoord; 458 459 lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width)); 460 461 lp_build_extract_image_sizes(bld, 462 &bld->int_size_bld, 463 bld->int_coord_type, 464 int_size, 465 &width_vec, 466 &height_vec, 467 &depth_vec); 468 469 s_float = s; t_float = t; r_float = r; 470 471 if (bld->static_sampler_state->normalized_coords) { 472 LLVMValueRef flt_size; 473 474 flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); 475 476 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); 477 } 478 479 /* convert float to int */ 480 /* For correct rounding, need floor, not truncation here. 481 * Note that in some cases (clamp to edge, no texel offsets) we 482 * could use a non-signed build context which would help archs 483 * greatly which don't have arch rounding. 484 */ 485 s_ipart = lp_build_ifloor(&bld->coord_bld, s); 486 if (dims >= 2) 487 t_ipart = lp_build_ifloor(&bld->coord_bld, t); 488 if (dims >= 3) 489 r_ipart = lp_build_ifloor(&bld->coord_bld, r); 490 491 /* add texel offsets */ 492 if (offsets[0]) { 493 s_ipart = lp_build_add(&i32, s_ipart, offsets[0]); 494 if (dims >= 2) { 495 t_ipart = lp_build_add(&i32, t_ipart, offsets[1]); 496 if (dims >= 3) { 497 r_ipart = lp_build_add(&i32, r_ipart, offsets[2]); 498 } 499 } 500 } 501 502 /* get pixel, row, image strides */ 503 x_stride = lp_build_const_vec(bld->gallivm, 504 bld->int_coord_bld.type, 505 bld->format_desc->block.bits/8); 506 507 /* Do texcoord wrapping, compute texel offset */ 508 lp_build_sample_wrap_nearest_int(bld, 509 bld->format_desc->block.width, 510 s_ipart, s_float, 511 width_vec, x_stride, offsets[0], 512 bld->static_texture_state->pot_width, 513 bld->static_sampler_state->wrap_s, 514 &x_offset, &x_subcoord); 515 offset = x_offset; 516 if (dims >= 2) { 517 LLVMValueRef y_offset; 518 lp_build_sample_wrap_nearest_int(bld, 519 bld->format_desc->block.height, 520 t_ipart, t_float, 521 height_vec, row_stride_vec, offsets[1], 522 bld->static_texture_state->pot_height, 523 bld->static_sampler_state->wrap_t, 524 &y_offset, &y_subcoord); 525 offset = lp_build_add(&bld->int_coord_bld, offset, y_offset); 526 if (dims >= 3) { 527 LLVMValueRef z_offset; 528 lp_build_sample_wrap_nearest_int(bld, 529 1, /* block length (depth) */ 530 r_ipart, r_float, 531 depth_vec, img_stride_vec, offsets[2], 532 bld->static_texture_state->pot_depth, 533 bld->static_sampler_state->wrap_r, 534 &z_offset, &z_subcoord); 535 offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); 536 } 537 } 538 if (has_layer_coord(bld->static_texture_state->target)) { 539 LLVMValueRef z_offset; 540 /* The r coord is the cube face in [0,5] or array layer */ 541 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); 542 offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); 543 } 544 if (mipoffsets) { 545 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets); 546 } 547 548 lp_build_sample_fetch_image_nearest(bld, data_ptr, offset, 549 x_subcoord, y_subcoord, 550 colors); 551} 552 553 554/** 555 * Fetch texels for image with linear sampling. 556 * Return filtered color as two vectors of 16-bit fixed point values. 557 */ 558static void 559lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, 560 LLVMValueRef data_ptr, 561 LLVMValueRef offset[2][2][2], 562 LLVMValueRef x_subcoord[2], 563 LLVMValueRef y_subcoord[2], 564 LLVMValueRef s_fpart, 565 LLVMValueRef t_fpart, 566 LLVMValueRef r_fpart, 567 LLVMValueRef *colors) 568{ 569 const unsigned dims = bld->dims; 570 LLVMBuilderRef builder = bld->gallivm->builder; 571 struct lp_build_context u8n; 572 LLVMTypeRef u8n_vec_type; 573 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 574 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 575 LLVMValueRef shuffle; 576 LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */ 577 LLVMValueRef packed; 578 unsigned i, j, k; 579 unsigned numj, numk; 580 581 lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width)); 582 u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); 583 584 /* 585 * Transform 4 x i32 in 586 * 587 * s_fpart = {s0, s1, s2, s3} 588 * 589 * where each value is between 0 and 0xff, 590 * 591 * into one 16 x i20 592 * 593 * s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3} 594 * 595 * and likewise for t_fpart. There is no risk of loosing precision here 596 * since the fractional parts only use the lower 8bits. 597 */ 598 s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, ""); 599 if (dims >= 2) 600 t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, ""); 601 if (dims >= 3) 602 r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, ""); 603 604 for (j = 0; j < u8n.type.length; j += 4) { 605#if UTIL_ARCH_LITTLE_ENDIAN 606 unsigned subindex = 0; 607#else 608 unsigned subindex = 3; 609#endif 610 LLVMValueRef index; 611 612 index = LLVMConstInt(elem_type, j + subindex, 0); 613 for (i = 0; i < 4; ++i) 614 shuffles[j + i] = index; 615 } 616 617 shuffle = LLVMConstVector(shuffles, u8n.type.length); 618 619 s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef, 620 shuffle, ""); 621 if (dims >= 2) { 622 t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef, 623 shuffle, ""); 624 } 625 if (dims >= 3) { 626 r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef, 627 shuffle, ""); 628 } 629 630 /* 631 * Fetch the pixels as 4 x 32bit (rgba order might differ): 632 * 633 * rgba0 rgba1 rgba2 rgba3 634 * 635 * bit cast them into 16 x u8 636 * 637 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 638 * 639 * unpack them into two 8 x i16: 640 * 641 * r0 g0 b0 a0 r1 g1 b1 a1 642 * r2 g2 b2 a2 r3 g3 b3 a3 643 * 644 * The higher 8 bits of the resulting elements will be zero. 645 */ 646 numj = 1 + (dims >= 2); 647 numk = 1 + (dims >= 3); 648 649 for (k = 0; k < numk; k++) { 650 for (j = 0; j < numj; j++) { 651 for (i = 0; i < 2; i++) { 652 LLVMValueRef rgba8; 653 654 if (util_format_is_rgba8_variant(bld->format_desc)) { 655 struct lp_type fetch_type; 656 /* 657 * Given the format is a rgba8, just read the pixels as is, 658 * without any swizzling. Swizzling will be done later. 659 */ 660 fetch_type = lp_type_uint(bld->texel_type.width); 661 rgba8 = lp_build_gather(bld->gallivm, 662 bld->texel_type.length, 663 bld->format_desc->block.bits, 664 fetch_type, 665 TRUE, 666 data_ptr, offset[k][j][i], TRUE); 667 668 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); 669 } 670 else { 671 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, 672 bld->format_desc, 673 u8n.type, 674 TRUE, 675 data_ptr, offset[k][j][i], 676 x_subcoord[i], 677 y_subcoord[j], 678 bld->cache); 679 } 680 681 neighbors[k][j][i] = rgba8; 682 } 683 } 684 } 685 686 /* 687 * Linear interpolation with 8.8 fixed point. 688 */ 689 690 /* general 1/2/3-D lerping */ 691 if (dims == 1) { 692 lp_build_reduce_filter(&u8n, 693 bld->static_sampler_state->reduction_mode, 694 LP_BLD_LERP_PRESCALED_WEIGHTS, 695 1, 696 s_fpart, 697 &neighbors[0][0][0], 698 &neighbors[0][0][1], 699 &packed); 700 } else if (dims == 2) { 701 /* 2-D lerp */ 702 lp_build_reduce_filter_2d(&u8n, 703 bld->static_sampler_state->reduction_mode, 704 LP_BLD_LERP_PRESCALED_WEIGHTS, 705 1, 706 s_fpart, t_fpart, 707 &neighbors[0][0][0], 708 &neighbors[0][0][1], 709 &neighbors[0][1][0], 710 &neighbors[0][1][1], 711 &packed); 712 } else { 713 /* 3-D lerp */ 714 assert(dims == 3); 715 lp_build_reduce_filter_3d(&u8n, 716 bld->static_sampler_state->reduction_mode, 717 LP_BLD_LERP_PRESCALED_WEIGHTS, 718 1, 719 s_fpart, t_fpart, r_fpart, 720 &neighbors[0][0][0], 721 &neighbors[0][0][1], 722 &neighbors[0][1][0], 723 &neighbors[0][1][1], 724 &neighbors[1][0][0], 725 &neighbors[1][0][1], 726 &neighbors[1][1][0], 727 &neighbors[1][1][1], 728 &packed); 729 } 730 731 *colors = packed; 732} 733 734/** 735 * Sample a single texture image with (bi-)(tri-)linear sampling. 736 * Return filtered color as two vectors of 16-bit fixed point values. 737 */ 738static void 739lp_build_sample_image_linear(struct lp_build_sample_context *bld, 740 LLVMValueRef int_size, 741 LLVMValueRef row_stride_vec, 742 LLVMValueRef img_stride_vec, 743 LLVMValueRef data_ptr, 744 LLVMValueRef mipoffsets, 745 LLVMValueRef s, 746 LLVMValueRef t, 747 LLVMValueRef r, 748 const LLVMValueRef *offsets, 749 LLVMValueRef *colors) 750{ 751 const unsigned dims = bld->dims; 752 LLVMBuilderRef builder = bld->gallivm->builder; 753 struct lp_build_context i32; 754 LLVMValueRef i32_c8, i32_c128, i32_c255; 755 LLVMValueRef width_vec, height_vec, depth_vec; 756 LLVMValueRef s_ipart, s_fpart, s_float; 757 LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL; 758 LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL; 759 LLVMValueRef x_stride, y_stride, z_stride; 760 LLVMValueRef x_offset0, x_offset1; 761 LLVMValueRef y_offset0, y_offset1; 762 LLVMValueRef z_offset0, z_offset1; 763 LLVMValueRef offset[2][2][2]; /* [z][y][x] */ 764 LLVMValueRef x_subcoord[2], y_subcoord[2] = {NULL, NULL}, z_subcoord[2]; 765 unsigned x, y, z; 766 767 lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width)); 768 769 lp_build_extract_image_sizes(bld, 770 &bld->int_size_bld, 771 bld->int_coord_type, 772 int_size, 773 &width_vec, 774 &height_vec, 775 &depth_vec); 776 777 s_float = s; t_float = t; r_float = r; 778 779 if (bld->static_sampler_state->normalized_coords) { 780 LLVMValueRef scaled_size; 781 LLVMValueRef flt_size; 782 783 /* scale size by 256 (8 fractional bits) */ 784 scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); 785 786 flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); 787 788 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); 789 } 790 else { 791 /* scale coords by 256 (8 fractional bits) */ 792 s = lp_build_mul_imm(&bld->coord_bld, s, 256); 793 if (dims >= 2) 794 t = lp_build_mul_imm(&bld->coord_bld, t, 256); 795 if (dims >= 3) 796 r = lp_build_mul_imm(&bld->coord_bld, r, 256); 797 } 798 799 /* convert float to int */ 800 /* For correct rounding, need round to nearest, not truncation here. 801 * Note that in some cases (clamp to edge, no texel offsets) we 802 * could use a non-signed build context which would help archs which 803 * don't have fptosi intrinsic with nearest rounding implemented. 804 */ 805 s = lp_build_iround(&bld->coord_bld, s); 806 if (dims >= 2) 807 t = lp_build_iround(&bld->coord_bld, t); 808 if (dims >= 3) 809 r = lp_build_iround(&bld->coord_bld, r); 810 811 /* subtract 0.5 (add -128) */ 812 i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128); 813 814 s = LLVMBuildAdd(builder, s, i32_c128, ""); 815 if (dims >= 2) { 816 t = LLVMBuildAdd(builder, t, i32_c128, ""); 817 } 818 if (dims >= 3) { 819 r = LLVMBuildAdd(builder, r, i32_c128, ""); 820 } 821 822 /* compute floor (shift right 8) */ 823 i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); 824 s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); 825 if (dims >= 2) 826 t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); 827 if (dims >= 3) 828 r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); 829 830 /* add texel offsets */ 831 if (offsets[0]) { 832 s_ipart = lp_build_add(&i32, s_ipart, offsets[0]); 833 if (dims >= 2) { 834 t_ipart = lp_build_add(&i32, t_ipart, offsets[1]); 835 if (dims >= 3) { 836 r_ipart = lp_build_add(&i32, r_ipart, offsets[2]); 837 } 838 } 839 } 840 841 /* compute fractional part (AND with 0xff) */ 842 i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255); 843 s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); 844 if (dims >= 2) 845 t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); 846 if (dims >= 3) 847 r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); 848 849 /* get pixel, row and image strides */ 850 x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type, 851 bld->format_desc->block.bits/8); 852 y_stride = row_stride_vec; 853 z_stride = img_stride_vec; 854 855 /* do texcoord wrapping and compute texel offsets */ 856 lp_build_sample_wrap_linear_int(bld, 857 bld->format_desc->block.width, 858 s_ipart, &s_fpart, s_float, 859 width_vec, x_stride, offsets[0], 860 bld->static_texture_state->pot_width, 861 bld->static_sampler_state->wrap_s, 862 &x_offset0, &x_offset1, 863 &x_subcoord[0], &x_subcoord[1]); 864 865 /* add potential cube/array/mip offsets now as they are constant per pixel */ 866 if (has_layer_coord(bld->static_texture_state->target)) { 867 LLVMValueRef z_offset; 868 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); 869 /* The r coord is the cube face in [0,5] or array layer */ 870 x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset); 871 x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset); 872 } 873 if (mipoffsets) { 874 x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets); 875 x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets); 876 } 877 878 for (z = 0; z < 2; z++) { 879 for (y = 0; y < 2; y++) { 880 offset[z][y][0] = x_offset0; 881 offset[z][y][1] = x_offset1; 882 } 883 } 884 885 if (dims >= 2) { 886 lp_build_sample_wrap_linear_int(bld, 887 bld->format_desc->block.height, 888 t_ipart, &t_fpart, t_float, 889 height_vec, y_stride, offsets[1], 890 bld->static_texture_state->pot_height, 891 bld->static_sampler_state->wrap_t, 892 &y_offset0, &y_offset1, 893 &y_subcoord[0], &y_subcoord[1]); 894 895 for (z = 0; z < 2; z++) { 896 for (x = 0; x < 2; x++) { 897 offset[z][0][x] = lp_build_add(&bld->int_coord_bld, 898 offset[z][0][x], y_offset0); 899 offset[z][1][x] = lp_build_add(&bld->int_coord_bld, 900 offset[z][1][x], y_offset1); 901 } 902 } 903 } 904 905 if (dims >= 3) { 906 lp_build_sample_wrap_linear_int(bld, 907 1, /* block length (depth) */ 908 r_ipart, &r_fpart, r_float, 909 depth_vec, z_stride, offsets[2], 910 bld->static_texture_state->pot_depth, 911 bld->static_sampler_state->wrap_r, 912 &z_offset0, &z_offset1, 913 &z_subcoord[0], &z_subcoord[1]); 914 for (y = 0; y < 2; y++) { 915 for (x = 0; x < 2; x++) { 916 offset[0][y][x] = lp_build_add(&bld->int_coord_bld, 917 offset[0][y][x], z_offset0); 918 offset[1][y][x] = lp_build_add(&bld->int_coord_bld, 919 offset[1][y][x], z_offset1); 920 } 921 } 922 } 923 924 lp_build_sample_fetch_image_linear(bld, data_ptr, offset, 925 x_subcoord, y_subcoord, 926 s_fpart, t_fpart, r_fpart, 927 colors); 928} 929 930 931/** 932 * Sample the texture/mipmap using given image filter and mip filter. 933 * data0_ptr and data1_ptr point to the two mipmap levels to sample 934 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. 935 * If we're using nearest miplevel sampling the '1' values will be null/unused. 936 */ 937static void 938lp_build_sample_mipmap(struct lp_build_sample_context *bld, 939 unsigned img_filter, 940 unsigned mip_filter, 941 LLVMValueRef s, 942 LLVMValueRef t, 943 LLVMValueRef r, 944 const LLVMValueRef *offsets, 945 LLVMValueRef ilevel0, 946 LLVMValueRef ilevel1, 947 LLVMValueRef lod_fpart, 948 LLVMValueRef colors_var) 949{ 950 LLVMBuilderRef builder = bld->gallivm->builder; 951 LLVMValueRef size0; 952 LLVMValueRef size1; 953 LLVMValueRef row_stride0_vec = NULL; 954 LLVMValueRef row_stride1_vec = NULL; 955 LLVMValueRef img_stride0_vec = NULL; 956 LLVMValueRef img_stride1_vec = NULL; 957 LLVMValueRef data_ptr0; 958 LLVMValueRef data_ptr1; 959 LLVMValueRef mipoff0 = NULL; 960 LLVMValueRef mipoff1 = NULL; 961 LLVMValueRef colors0; 962 LLVMValueRef colors1; 963 964 /* sample the first mipmap level */ 965 lp_build_mipmap_level_sizes(bld, ilevel0, 966 &size0, 967 &row_stride0_vec, &img_stride0_vec); 968 if (bld->num_mips == 1) { 969 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); 970 } 971 else { 972 /* This path should work for num_lods 1 too but slightly less efficient */ 973 data_ptr0 = bld->base_ptr; 974 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); 975 } 976 977 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 978 lp_build_sample_image_nearest(bld, 979 size0, 980 row_stride0_vec, img_stride0_vec, 981 data_ptr0, mipoff0, s, t, r, offsets, 982 &colors0); 983 } 984 else { 985 assert(img_filter == PIPE_TEX_FILTER_LINEAR); 986 lp_build_sample_image_linear(bld, 987 size0, 988 row_stride0_vec, img_stride0_vec, 989 data_ptr0, mipoff0, s, t, r, offsets, 990 &colors0); 991 } 992 993 /* Store the first level's colors in the output variables */ 994 LLVMBuildStore(builder, colors0, colors_var); 995 996 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 997 LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm, 998 bld->lodf_bld.type, 256.0); 999 LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type; 1000 struct lp_build_if_state if_ctx; 1001 LLVMValueRef need_lerp; 1002 unsigned num_quads = bld->coord_bld.type.length / 4; 1003 unsigned i; 1004 1005 lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, ""); 1006 lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16"); 1007 1008 /* need_lerp = lod_fpart > 0 */ 1009 if (bld->num_lods == 1) { 1010 need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, 1011 lod_fpart, bld->lodi_bld.zero, 1012 "need_lerp"); 1013 } 1014 else { 1015 /* 1016 * We'll do mip filtering if any of the quads need it. 1017 * It might be better to split the vectors here and only fetch/filter 1018 * quads which need it. 1019 */ 1020 /* 1021 * We need to clamp lod_fpart here since we can get negative 1022 * values which would screw up filtering if not all 1023 * lod_fpart values have same sign. 1024 * We can however then skip the greater than comparison. 1025 */ 1026 lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart, 1027 bld->lodi_bld.zero); 1028 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart); 1029 } 1030 1031 lp_build_if(&if_ctx, bld->gallivm, need_lerp); 1032 { 1033 struct lp_build_context u8n_bld; 1034 1035 lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width)); 1036 1037 /* sample the second mipmap level */ 1038 lp_build_mipmap_level_sizes(bld, ilevel1, 1039 &size1, 1040 &row_stride1_vec, &img_stride1_vec); 1041 if (bld->num_mips == 1) { 1042 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); 1043 } 1044 else { 1045 data_ptr1 = bld->base_ptr; 1046 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); 1047 } 1048 1049 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1050 lp_build_sample_image_nearest(bld, 1051 size1, 1052 row_stride1_vec, img_stride1_vec, 1053 data_ptr1, mipoff1, s, t, r, offsets, 1054 &colors1); 1055 } 1056 else { 1057 lp_build_sample_image_linear(bld, 1058 size1, 1059 row_stride1_vec, img_stride1_vec, 1060 data_ptr1, mipoff1, s, t, r, offsets, 1061 &colors1); 1062 } 1063 1064 /* interpolate samples from the two mipmap levels */ 1065 1066 if (num_quads == 1 && bld->num_lods == 1) { 1067 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, ""); 1068 lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart); 1069 } 1070 else { 1071 unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods; 1072 LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length); 1073 LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH]; 1074 1075 /* Take the LSB of lod_fpart */ 1076 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, ""); 1077 1078 /* Broadcast each lod weight into their respective channels */ 1079 for (i = 0; i < u8n_bld.type.length; ++i) { 1080 shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod); 1081 } 1082 lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type), 1083 LLVMConstVector(shuffle, u8n_bld.type.length), ""); 1084 } 1085 1086 lp_build_reduce_filter(&u8n_bld, 1087 bld->static_sampler_state->reduction_mode, 1088 LP_BLD_LERP_PRESCALED_WEIGHTS, 1089 1, 1090 lod_fpart, 1091 &colors0, 1092 &colors1, 1093 &colors0); 1094 1095 LLVMBuildStore(builder, colors0, colors_var); 1096 } 1097 lp_build_endif(&if_ctx); 1098 } 1099} 1100 1101 1102 1103/** 1104 * Texture sampling in AoS format. Used when sampling common 32-bit/texel 1105 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes 1106 * but only limited texture coord wrap modes. 1107 */ 1108void 1109lp_build_sample_aos(struct lp_build_sample_context *bld, 1110 unsigned sampler_unit, 1111 LLVMValueRef s, 1112 LLVMValueRef t, 1113 LLVMValueRef r, 1114 const LLVMValueRef *offsets, 1115 LLVMValueRef lod_positive, 1116 LLVMValueRef lod_fpart, 1117 LLVMValueRef ilevel0, 1118 LLVMValueRef ilevel1, 1119 LLVMValueRef texel_out[4]) 1120{ 1121 LLVMBuilderRef builder = bld->gallivm->builder; 1122 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter; 1123 const unsigned min_filter = bld->static_sampler_state->min_img_filter; 1124 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter; 1125 const unsigned dims = bld->dims; 1126 LLVMValueRef packed_var, packed; 1127 LLVMValueRef unswizzled[4]; 1128 struct lp_build_context u8n_bld; 1129 1130 /* we only support the common/simple wrap modes at this time */ 1131 assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s)); 1132 if (dims >= 2) 1133 assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t)); 1134 if (dims >= 3) 1135 assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r)); 1136 1137 1138 /* make 8-bit unorm builder context */ 1139 lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width)); 1140 1141 /* 1142 * Get/interpolate texture colors. 1143 */ 1144 1145 packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var"); 1146 1147 if (min_filter == mag_filter) { 1148 /* no need to distinguish between minification and magnification */ 1149 lp_build_sample_mipmap(bld, 1150 min_filter, mip_filter, 1151 s, t, r, offsets, 1152 ilevel0, ilevel1, lod_fpart, 1153 packed_var); 1154 } 1155 else { 1156 /* Emit conditional to choose min image filter or mag image filter 1157 * depending on the lod being > 0 or <= 0, respectively. 1158 */ 1159 struct lp_build_if_state if_ctx; 1160 1161 /* 1162 * FIXME this should take all lods into account, if some are min 1163 * some max probably could hack up the weights in the linear 1164 * path with selects to work for nearest. 1165 */ 1166 if (bld->num_lods > 1) 1167 lod_positive = LLVMBuildExtractElement(builder, lod_positive, 1168 lp_build_const_int32(bld->gallivm, 0), ""); 1169 1170 lod_positive = LLVMBuildTrunc(builder, lod_positive, 1171 LLVMInt1TypeInContext(bld->gallivm->context), ""); 1172 1173 lp_build_if(&if_ctx, bld->gallivm, lod_positive); 1174 { 1175 /* Use the minification filter */ 1176 lp_build_sample_mipmap(bld, 1177 min_filter, mip_filter, 1178 s, t, r, offsets, 1179 ilevel0, ilevel1, lod_fpart, 1180 packed_var); 1181 } 1182 lp_build_else(&if_ctx); 1183 { 1184 /* Use the magnification filter */ 1185 lp_build_sample_mipmap(bld, 1186 mag_filter, PIPE_TEX_MIPFILTER_NONE, 1187 s, t, r, offsets, 1188 ilevel0, NULL, NULL, 1189 packed_var); 1190 } 1191 lp_build_endif(&if_ctx); 1192 } 1193 1194 packed = LLVMBuildLoad(builder, packed_var, ""); 1195 1196 /* 1197 * Convert to SoA and swizzle. 1198 */ 1199 lp_build_rgba8_to_fi32_soa(bld->gallivm, 1200 bld->texel_type, 1201 packed, unswizzled); 1202 1203 if (util_format_is_rgba8_variant(bld->format_desc)) { 1204 lp_build_format_swizzle_soa(bld->format_desc, 1205 &bld->texel_bld, 1206 unswizzled, texel_out); 1207 } 1208 else { 1209 texel_out[0] = unswizzled[0]; 1210 texel_out[1] = unswizzled[1]; 1211 texel_out[2] = unswizzled[2]; 1212 texel_out[3] = unswizzled[3]; 1213 } 1214} 1215