1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Texture sampling -- SoA. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 * @author Brian Paul <brianp@vmware.com> 34 */ 35 36#include "pipe/p_defines.h" 37#include "pipe/p_state.h" 38#include "pipe/p_shader_tokens.h" 39#include "util/compiler.h" 40#include "util/u_debug.h" 41#include "util/u_dump.h" 42#include "util/u_memory.h" 43#include "util/u_math.h" 44#include "util/format/u_format.h" 45#include "util/u_cpu_detect.h" 46#include "util/format_rgb9e5.h" 47#include "lp_bld_debug.h" 48#include "lp_bld_type.h" 49#include "lp_bld_const.h" 50#include "lp_bld_conv.h" 51#include "lp_bld_arit.h" 52#include "lp_bld_bitarit.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_printf.h" 55#include "lp_bld_swizzle.h" 56#include "lp_bld_flow.h" 57#include "lp_bld_gather.h" 58#include "lp_bld_format.h" 59#include "lp_bld_sample.h" 60#include "lp_bld_sample_aos.h" 61#include "lp_bld_struct.h" 62#include "lp_bld_quad.h" 63#include "lp_bld_pack.h" 64#include "lp_bld_intr.h" 65#include "lp_bld_misc.h" 66 67 68/** 69 * Generate code to fetch a texel from a texture at int coords (x, y, z). 70 * The computation depends on whether the texture is 1D, 2D or 3D. 71 * The result, texel, will be float vectors: 72 * texel[0] = red values 73 * texel[1] = green values 74 * texel[2] = blue values 75 * texel[3] = alpha values 76 */ 77static void 78lp_build_sample_texel_soa(struct lp_build_sample_context *bld, 79 LLVMValueRef width, 80 LLVMValueRef height, 81 LLVMValueRef depth, 82 LLVMValueRef x, 83 LLVMValueRef y, 84 LLVMValueRef z, 85 LLVMValueRef y_stride, 86 LLVMValueRef z_stride, 87 LLVMValueRef data_ptr, 88 LLVMValueRef mipoffsets, 89 LLVMValueRef texel_out[4]) 90{ 91 const struct lp_static_sampler_state *static_state = bld->static_sampler_state; 92 const unsigned dims = bld->dims; 93 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 94 LLVMBuilderRef builder = bld->gallivm->builder; 95 LLVMValueRef offset; 96 LLVMValueRef i, j; 97 LLVMValueRef use_border = NULL; 98 99 /* use_border = x < 0 || x >= width || y < 0 || y >= height */ 100 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s, 101 static_state->min_img_filter, 102 static_state->mag_img_filter)) { 103 LLVMValueRef b1, b2; 104 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); 105 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); 106 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 107 } 108 109 if (dims >= 2 && 110 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t, 111 static_state->min_img_filter, 112 static_state->mag_img_filter)) { 113 LLVMValueRef b1, b2; 114 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); 115 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); 116 if (use_border) { 117 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); 118 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); 119 } 120 else { 121 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 122 } 123 } 124 125 if (dims == 3 && 126 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r, 127 static_state->min_img_filter, 128 static_state->mag_img_filter)) { 129 LLVMValueRef b1, b2; 130 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); 131 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); 132 if (use_border) { 133 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); 134 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); 135 } 136 else { 137 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 138 } 139 } 140 141 /* convert x,y,z coords to linear offset from start of texture, in bytes */ 142 lp_build_sample_offset(&bld->int_coord_bld, 143 bld->format_desc, 144 x, y, z, y_stride, z_stride, 145 &offset, &i, &j); 146 if (mipoffsets) { 147 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets); 148 } 149 150 if (use_border) { 151 /* If we can sample the border color, it means that texcoords may 152 * lie outside the bounds of the texture image. We need to do 153 * something to prevent reading out of bounds and causing a segfault. 154 * 155 * Simply AND the texture coords with !use_border. This will cause 156 * coords which are out of bounds to become zero. Zero's guaranteed 157 * to be inside the texture image. 158 */ 159 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border); 160 } 161 162 lp_build_fetch_rgba_soa(bld->gallivm, 163 bld->format_desc, 164 bld->texel_type, TRUE, 165 data_ptr, offset, 166 i, j, 167 bld->cache, 168 texel_out); 169 170 /* 171 * Note: if we find an app which frequently samples the texture border 172 * we might want to implement a true conditional here to avoid sampling 173 * the texture whenever possible (since that's quite a bit of code). 174 * Ex: 175 * if (use_border) { 176 * texel = border_color; 177 * } 178 * else { 179 * texel = sample_texture(coord); 180 * } 181 * As it is now, we always sample the texture, then selectively replace 182 * the texel color results with the border color. 183 */ 184 185 if (use_border) { 186 /* select texel color or border color depending on use_border. */ 187 const struct util_format_description *format_desc = bld->format_desc; 188 int chan; 189 struct lp_type border_type = bld->texel_type; 190 border_type.length = 4; 191 /* 192 * Only replace channels which are actually present. The others should 193 * get optimized away eventually by sampler_view swizzle anyway but it's 194 * easier too. 195 */ 196 for (chan = 0; chan < 4; chan++) { 197 unsigned chan_s; 198 /* reverse-map channel... */ 199 if (util_format_has_stencil(format_desc)) { 200 if (chan == 0) 201 chan_s = 0; 202 else 203 break; 204 } 205 else { 206 for (chan_s = 0; chan_s < 4; chan_s++) { 207 if (chan_s == format_desc->swizzle[chan]) { 208 break; 209 } 210 } 211 } 212 if (chan_s <= 3) { 213 /* use the already clamped color */ 214 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan); 215 LLVMValueRef border_chan; 216 217 border_chan = lp_build_extract_broadcast(bld->gallivm, 218 border_type, 219 bld->texel_type, 220 bld->border_color_clamped, 221 idx); 222 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border, 223 border_chan, texel_out[chan]); 224 } 225 } 226 } 227} 228 229 230/** 231 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode. 232 * (Note that with pot sizes could do this much more easily post-scale 233 * with some bit arithmetic.) 234 */ 235static LLVMValueRef 236lp_build_coord_mirror(struct lp_build_sample_context *bld, 237 LLVMValueRef coord, boolean posOnly) 238{ 239 struct lp_build_context *coord_bld = &bld->coord_bld; 240 LLVMValueRef fract; 241 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 242 243 /* 244 * We can just use 2*(x - round(0.5*x)) to do all the mirroring, 245 * it all works out. (The result is in range [-1, 1.0], negative if 246 * the coord is in the "odd" section, otherwise positive.) 247 */ 248 249 coord = lp_build_mul(coord_bld, coord, half); 250 fract = lp_build_round(coord_bld, coord); 251 fract = lp_build_sub(coord_bld, coord, fract); 252 coord = lp_build_add(coord_bld, fract, fract); 253 254 if (posOnly) { 255 /* 256 * Theoretically it's not quite 100% accurate because the spec says 257 * that ultimately a scaled coord of -x.0 should map to int coord 258 * -x + 1 with mirroring, not -x (this does not matter for bilinear 259 * filtering). 260 */ 261 coord = lp_build_abs(coord_bld, coord); 262 /* kill off NaNs */ 263 /* XXX: not safe without arch rounding, fract can be anything. */ 264 coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero, 265 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 266 } 267 268 return coord; 269} 270 271 272/** 273 * Helper to compute the first coord and the weight for 274 * linear wrap repeat npot textures 275 */ 276void 277lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld, 278 LLVMValueRef coord_f, 279 LLVMValueRef length_i, 280 LLVMValueRef length_f, 281 LLVMValueRef *coord0_i, 282 LLVMValueRef *weight_f) 283{ 284 struct lp_build_context *coord_bld = &bld->coord_bld; 285 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 286 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 287 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i, 288 int_coord_bld->one); 289 LLVMValueRef mask; 290 /* wrap with normalized floats is just fract */ 291 coord_f = lp_build_fract(coord_bld, coord_f); 292 /* mul by size and subtract 0.5 */ 293 coord_f = lp_build_mul(coord_bld, coord_f, length_f); 294 coord_f = lp_build_sub(coord_bld, coord_f, half); 295 /* 296 * we avoided the 0.5/length division before the repeat wrap, 297 * now need to fix up edge cases with selects 298 */ 299 /* 300 * Note we do a float (unordered) compare so we can eliminate NaNs. 301 * (Otherwise would need fract_safe above). 302 */ 303 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, 304 PIPE_FUNC_LESS, coord_f, coord_bld->zero); 305 306 /* convert to int, compute lerp weight */ 307 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f); 308 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i); 309} 310 311 312/** 313 * Build LLVM code for texture wrap mode for linear filtering. 314 * \param x0_out returns first integer texcoord 315 * \param x1_out returns second integer texcoord 316 * \param weight_out returns linear interpolation weight 317 */ 318static void 319lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, 320 boolean is_gather, 321 LLVMValueRef coord, 322 LLVMValueRef length, 323 LLVMValueRef length_f, 324 LLVMValueRef offset, 325 boolean is_pot, 326 unsigned wrap_mode, 327 LLVMValueRef *x0_out, 328 LLVMValueRef *x1_out, 329 LLVMValueRef *weight_out) 330{ 331 struct lp_build_context *coord_bld = &bld->coord_bld; 332 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 333 LLVMBuilderRef builder = bld->gallivm->builder; 334 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 335 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 336 LLVMValueRef coord0, coord1, weight; 337 338 switch(wrap_mode) { 339 case PIPE_TEX_WRAP_REPEAT: 340 if (is_pot) { 341 /* mul by size and subtract 0.5 */ 342 coord = lp_build_mul(coord_bld, coord, length_f); 343 coord = lp_build_sub(coord_bld, coord, half); 344 if (offset) { 345 offset = lp_build_int_to_float(coord_bld, offset); 346 coord = lp_build_add(coord_bld, coord, offset); 347 } 348 /* convert to int, compute lerp weight */ 349 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 350 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 351 /* repeat wrap */ 352 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); 353 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, ""); 354 } 355 else { 356 LLVMValueRef mask; 357 if (offset) { 358 offset = lp_build_int_to_float(coord_bld, offset); 359 offset = lp_build_div(coord_bld, offset, length_f); 360 coord = lp_build_add(coord_bld, coord, offset); 361 } 362 lp_build_coord_repeat_npot_linear(bld, coord, 363 length, length_f, 364 &coord0, &weight); 365 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, 366 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); 367 coord1 = LLVMBuildAnd(builder, 368 lp_build_add(int_coord_bld, coord0, int_coord_bld->one), 369 mask, ""); 370 } 371 break; 372 373 case PIPE_TEX_WRAP_CLAMP: 374 if (bld->static_sampler_state->normalized_coords) { 375 /* scale coord to length */ 376 coord = lp_build_mul(coord_bld, coord, length_f); 377 } 378 if (offset) { 379 offset = lp_build_int_to_float(coord_bld, offset); 380 coord = lp_build_add(coord_bld, coord, offset); 381 } 382 383 /* 384 * clamp to [0, length] 385 * 386 * Unlike some other wrap modes, this should be correct for gather 387 * too. GL_CLAMP explicitly does this clamp on the coord prior to 388 * actual wrapping (which is per sample). 389 */ 390 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f); 391 392 coord = lp_build_sub(coord_bld, coord, half); 393 394 /* convert to int, compute lerp weight */ 395 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 396 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 397 break; 398 399 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 400 { 401 struct lp_build_context abs_coord_bld = bld->coord_bld; 402 abs_coord_bld.type.sign = FALSE; 403 404 if (bld->static_sampler_state->normalized_coords) { 405 /* mul by tex size */ 406 coord = lp_build_mul(coord_bld, coord, length_f); 407 } 408 if (offset) { 409 offset = lp_build_int_to_float(coord_bld, offset); 410 coord = lp_build_add(coord_bld, coord, offset); 411 } 412 413 /* clamp to length max */ 414 coord = lp_build_min_ext(coord_bld, coord, length_f, 415 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 416 if (!is_gather) { 417 /* subtract 0.5 */ 418 coord = lp_build_sub(coord_bld, coord, half); 419 /* clamp to [0, length - 0.5] */ 420 coord = lp_build_max(coord_bld, coord, coord_bld->zero); 421 /* convert to int, compute lerp weight */ 422 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); 423 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 424 } else { 425 /* 426 * The non-gather path will end up with coords 0, 1 if coord was 427 * smaller than 0.5 (with corresponding weight 0.0 so it doesn't 428 * really matter what the second coord is). But for gather, we 429 * really need to end up with coords 0, 0. 430 */ 431 coord = lp_build_max(coord_bld, coord, coord_bld->zero); 432 coord0 = lp_build_sub(coord_bld, coord, half); 433 coord1 = lp_build_add(coord_bld, coord, half); 434 /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */ 435 coord0 = lp_build_itrunc(coord_bld, coord0); 436 coord1 = lp_build_itrunc(coord_bld, coord1); 437 weight = coord_bld->undef; 438 } 439 /* coord1 = min(coord1, length-1) */ 440 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 441 break; 442 } 443 444 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 445 if (bld->static_sampler_state->normalized_coords) { 446 /* scale coord to length */ 447 coord = lp_build_mul(coord_bld, coord, length_f); 448 } 449 if (offset) { 450 offset = lp_build_int_to_float(coord_bld, offset); 451 coord = lp_build_add(coord_bld, coord, offset); 452 } 453 /* 454 * We don't need any clamp. Technically, for very large (pos or neg) 455 * (or infinite) values, clamp against [-length, length] would be 456 * correct, but we don't need to guarantee any specific 457 * result for such coords (the ifloor will be undefined, but for modes 458 * requiring border all resulting coords are safe). 459 */ 460 coord = lp_build_sub(coord_bld, coord, half); 461 /* convert to int, compute lerp weight */ 462 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 463 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 464 break; 465 466 case PIPE_TEX_WRAP_MIRROR_REPEAT: 467 if (offset) { 468 offset = lp_build_int_to_float(coord_bld, offset); 469 offset = lp_build_div(coord_bld, offset, length_f); 470 coord = lp_build_add(coord_bld, coord, offset); 471 } 472 if (!is_gather) { 473 /* compute mirror function */ 474 coord = lp_build_coord_mirror(bld, coord, TRUE); 475 476 /* scale coord to length */ 477 coord = lp_build_mul(coord_bld, coord, length_f); 478 coord = lp_build_sub(coord_bld, coord, half); 479 480 /* convert to int, compute lerp weight */ 481 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 482 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 483 484 /* coord0 = max(coord0, 0) */ 485 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); 486 /* coord1 = min(coord1, length-1) */ 487 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 488 } else { 489 /* 490 * This is pretty reasonable in the end, all what the tests care 491 * about is nasty edge cases (scaled coords x.5, so the individual 492 * coords are actually integers, which is REALLY tricky to get right 493 * due to this working differently both for negative numbers as well 494 * as for even/odd cases). But with enough magic it's not too complex 495 * after all. 496 * Maybe should try a bit arithmetic one though for POT textures... 497 */ 498 LLVMValueRef isNeg; 499 /* 500 * Wrapping just once still works, even though it means we can 501 * get "wrong" sign due to performing mirror in the middle of the 502 * two coords (because this can only happen very near the odd/even 503 * edges, so both coords will actually end up as 0 or length - 1 504 * in the end). 505 * For GL4 gather with per-sample offsets we'd need to the mirroring 506 * per coord too. 507 */ 508 coord = lp_build_coord_mirror(bld, coord, FALSE); 509 coord = lp_build_mul(coord_bld, coord, length_f); 510 511 /* 512 * NaNs should be safe here, we'll do away with them with 513 * the ones' complement plus min. 514 */ 515 coord0 = lp_build_sub(coord_bld, coord, half); 516 coord0 = lp_build_ifloor(coord_bld, coord0); 517 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 518 /* ones complement for neg numbers (mirror(negX) = X - 1) */ 519 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, 520 coord0, int_coord_bld->zero); 521 coord0 = lp_build_xor(int_coord_bld, coord0, isNeg); 522 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, 523 coord1, int_coord_bld->zero); 524 coord1 = lp_build_xor(int_coord_bld, coord1, isNeg); 525 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one); 526 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 527 528 weight = coord_bld->undef; 529 } 530 break; 531 532 case PIPE_TEX_WRAP_MIRROR_CLAMP: 533 if (bld->static_sampler_state->normalized_coords) { 534 /* scale coord to length */ 535 coord = lp_build_mul(coord_bld, coord, length_f); 536 } 537 if (offset) { 538 offset = lp_build_int_to_float(coord_bld, offset); 539 coord = lp_build_add(coord_bld, coord, offset); 540 } 541 /* 542 * XXX: probably not correct for gather, albeit I'm not 543 * entirely sure as it's poorly specified. The wrapping looks 544 * correct according to the spec which is against gl 1.2.1, 545 * however negative values will be swapped - gl re-specified 546 * wrapping with newer versions (no more pre-clamp except with 547 * GL_CLAMP). 548 */ 549 coord = lp_build_abs(coord_bld, coord); 550 551 /* clamp to [0, length] */ 552 coord = lp_build_min_ext(coord_bld, coord, length_f, 553 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 554 555 coord = lp_build_sub(coord_bld, coord, half); 556 557 /* convert to int, compute lerp weight */ 558 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 559 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 560 break; 561 562 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 563 { 564 struct lp_build_context abs_coord_bld = bld->coord_bld; 565 abs_coord_bld.type.sign = FALSE; 566 567 if (bld->static_sampler_state->normalized_coords) { 568 /* scale coord to length */ 569 coord = lp_build_mul(coord_bld, coord, length_f); 570 } 571 if (offset) { 572 offset = lp_build_int_to_float(coord_bld, offset); 573 coord = lp_build_add(coord_bld, coord, offset); 574 } 575 if (!is_gather) { 576 coord = lp_build_abs(coord_bld, coord); 577 578 /* clamp to length max */ 579 coord = lp_build_min_ext(coord_bld, coord, length_f, 580 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 581 /* subtract 0.5 */ 582 coord = lp_build_sub(coord_bld, coord, half); 583 /* clamp to [0, length - 0.5] */ 584 coord = lp_build_max(coord_bld, coord, coord_bld->zero); 585 586 /* convert to int, compute lerp weight */ 587 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); 588 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 589 /* coord1 = min(coord1, length-1) */ 590 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 591 } else { 592 /* 593 * The non-gather path will swap coord0/1 if coord was negative, 594 * which is ok for filtering since the filter weight matches 595 * accordingly. Also, if coord is close to zero, coord0/1 will 596 * be 0 and 1, instead of 0 and 0 (again ok due to filter 597 * weight being 0.0). Both issues need to be fixed for gather. 598 */ 599 LLVMValueRef isNeg; 600 601 /* 602 * Actually wanted to cheat here and use: 603 * coord1 = lp_build_iround(coord_bld, coord); 604 * but it's not good enough for some tests (even piglit 605 * textureGather is set up in a way so the coords area always 606 * .5, that is right at the crossover points). 607 * So do ordinary sub/floor, then do ones' complement 608 * for negative numbers. 609 * (Note can't just do sub|add/abs/itrunc per coord neither - 610 * because the spec demands that mirror(3.0) = 3 but 611 * mirror(-3.0) = 2.) 612 */ 613 coord = lp_build_sub(coord_bld, coord, half); 614 coord0 = lp_build_ifloor(coord_bld, coord); 615 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 616 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord0, 617 int_coord_bld->zero); 618 coord0 = lp_build_xor(int_coord_bld, isNeg, coord0); 619 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one); 620 621 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord1, 622 int_coord_bld->zero); 623 coord1 = lp_build_xor(int_coord_bld, isNeg, coord1); 624 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 625 626 weight = coord_bld->undef; 627 } 628 } 629 break; 630 631 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 632 { 633 if (bld->static_sampler_state->normalized_coords) { 634 /* scale coord to length */ 635 coord = lp_build_mul(coord_bld, coord, length_f); 636 } 637 if (offset) { 638 offset = lp_build_int_to_float(coord_bld, offset); 639 coord = lp_build_add(coord_bld, coord, offset); 640 } 641 /* 642 * XXX: probably not correct for gather due to swapped 643 * order if coord is negative (same rationale as for 644 * MIRROR_CLAMP). 645 */ 646 coord = lp_build_abs(coord_bld, coord); 647 648 /* 649 * We don't need any clamp. Technically, for very large 650 * (or infinite) values, clamp against length would be 651 * correct, but we don't need to guarantee any specific 652 * result for such coords (the ifloor will be undefined, but 653 * for modes requiring border all resulting coords are safe). 654 */ 655 coord = lp_build_sub(coord_bld, coord, half); 656 657 /* convert to int, compute lerp weight */ 658 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 659 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 660 } 661 break; 662 663 default: 664 assert(0); 665 coord0 = NULL; 666 coord1 = NULL; 667 weight = NULL; 668 } 669 670 *x0_out = coord0; 671 *x1_out = coord1; 672 *weight_out = weight; 673} 674 675 676/** 677 * Build LLVM code for texture wrap mode for nearest filtering. 678 * \param coord the incoming texcoord (nominally in [0,1]) 679 * \param length the texture size along one dimension, as int vector 680 * \param length_f the texture size along one dimension, as float vector 681 * \param offset texel offset along one dimension (as int vector) 682 * \param is_pot if TRUE, length is a power of two 683 * \param wrap_mode one of PIPE_TEX_WRAP_x 684 */ 685static LLVMValueRef 686lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, 687 LLVMValueRef coord, 688 LLVMValueRef length, 689 LLVMValueRef length_f, 690 LLVMValueRef offset, 691 boolean is_pot, 692 unsigned wrap_mode) 693{ 694 struct lp_build_context *coord_bld = &bld->coord_bld; 695 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 696 LLVMBuilderRef builder = bld->gallivm->builder; 697 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 698 LLVMValueRef icoord; 699 700 switch(wrap_mode) { 701 case PIPE_TEX_WRAP_REPEAT: 702 if (is_pot) { 703 coord = lp_build_mul(coord_bld, coord, length_f); 704 icoord = lp_build_ifloor(coord_bld, coord); 705 if (offset) { 706 icoord = lp_build_add(int_coord_bld, icoord, offset); 707 } 708 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, ""); 709 } 710 else { 711 if (offset) { 712 offset = lp_build_int_to_float(coord_bld, offset); 713 offset = lp_build_div(coord_bld, offset, length_f); 714 coord = lp_build_add(coord_bld, coord, offset); 715 } 716 /* take fraction, unnormalize */ 717 coord = lp_build_fract_safe(coord_bld, coord); 718 coord = lp_build_mul(coord_bld, coord, length_f); 719 icoord = lp_build_itrunc(coord_bld, coord); 720 } 721 break; 722 723 case PIPE_TEX_WRAP_CLAMP: 724 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 725 if (bld->static_sampler_state->normalized_coords) { 726 /* scale coord to length */ 727 coord = lp_build_mul(coord_bld, coord, length_f); 728 } 729 730 if (offset) { 731 offset = lp_build_int_to_float(coord_bld, offset); 732 coord = lp_build_add(coord_bld, coord, offset); 733 } 734 /* floor */ 735 /* use itrunc instead since we clamp to 0 anyway */ 736 icoord = lp_build_itrunc(coord_bld, coord); 737 738 /* clamp to [0, length - 1]. */ 739 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, 740 length_minus_one); 741 break; 742 743 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 744 if (bld->static_sampler_state->normalized_coords) { 745 /* scale coord to length */ 746 coord = lp_build_mul(coord_bld, coord, length_f); 747 } 748 /* no clamp necessary, border masking will handle this */ 749 icoord = lp_build_ifloor(coord_bld, coord); 750 if (offset) { 751 icoord = lp_build_add(int_coord_bld, icoord, offset); 752 } 753 break; 754 755 case PIPE_TEX_WRAP_MIRROR_REPEAT: 756 if (offset) { 757 offset = lp_build_int_to_float(coord_bld, offset); 758 offset = lp_build_div(coord_bld, offset, length_f); 759 coord = lp_build_add(coord_bld, coord, offset); 760 } 761 /* compute mirror function */ 762 coord = lp_build_coord_mirror(bld, coord, TRUE); 763 764 /* scale coord to length */ 765 assert(bld->static_sampler_state->normalized_coords); 766 coord = lp_build_mul(coord_bld, coord, length_f); 767 768 /* itrunc == ifloor here */ 769 icoord = lp_build_itrunc(coord_bld, coord); 770 771 /* clamp to [0, length - 1] */ 772 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); 773 break; 774 775 case PIPE_TEX_WRAP_MIRROR_CLAMP: 776 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 777 if (bld->static_sampler_state->normalized_coords) { 778 /* scale coord to length */ 779 coord = lp_build_mul(coord_bld, coord, length_f); 780 } 781 if (offset) { 782 offset = lp_build_int_to_float(coord_bld, offset); 783 coord = lp_build_add(coord_bld, coord, offset); 784 } 785 coord = lp_build_abs(coord_bld, coord); 786 787 /* itrunc == ifloor here */ 788 icoord = lp_build_itrunc(coord_bld, coord); 789 /* 790 * Use unsigned min due to possible undef values (NaNs, overflow) 791 */ 792 { 793 struct lp_build_context abs_coord_bld = *int_coord_bld; 794 abs_coord_bld.type.sign = FALSE; 795 /* clamp to [0, length - 1] */ 796 icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one); 797 } 798 break; 799 800 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 801 if (bld->static_sampler_state->normalized_coords) { 802 /* scale coord to length */ 803 coord = lp_build_mul(coord_bld, coord, length_f); 804 } 805 if (offset) { 806 offset = lp_build_int_to_float(coord_bld, offset); 807 coord = lp_build_add(coord_bld, coord, offset); 808 } 809 coord = lp_build_abs(coord_bld, coord); 810 811 /* itrunc == ifloor here */ 812 icoord = lp_build_itrunc(coord_bld, coord); 813 break; 814 815 default: 816 assert(0); 817 icoord = NULL; 818 } 819 820 return icoord; 821} 822 823 824/** 825 * Do shadow test/comparison. 826 * \param p shadow ref value 827 * \param texel the texel to compare against 828 */ 829static LLVMValueRef 830lp_build_sample_comparefunc(struct lp_build_sample_context *bld, 831 LLVMValueRef p, 832 LLVMValueRef texel) 833{ 834 struct lp_build_context *texel_bld = &bld->texel_bld; 835 LLVMValueRef res; 836 837 if (0) { 838 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p); 839 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel); 840 } 841 842 /* result = (p FUNC texel) ? 1 : 0 */ 843 /* 844 * honor d3d10 floating point rules here, which state that comparisons 845 * are ordered except NOT_EQUAL which is unordered. 846 */ 847 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) { 848 res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func, 849 p, texel); 850 } 851 else { 852 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func, 853 p, texel); 854 } 855 return res; 856} 857 858 859/** 860 * Generate code to sample a mipmap level with nearest filtering. 861 * If sampling a cube texture, r = cube face in [0,5]. 862 */ 863static void 864lp_build_sample_image_nearest(struct lp_build_sample_context *bld, 865 LLVMValueRef size, 866 LLVMValueRef row_stride_vec, 867 LLVMValueRef img_stride_vec, 868 LLVMValueRef data_ptr, 869 LLVMValueRef mipoffsets, 870 const LLVMValueRef *coords, 871 const LLVMValueRef *offsets, 872 LLVMValueRef colors_out[4]) 873{ 874 const unsigned dims = bld->dims; 875 LLVMValueRef width_vec; 876 LLVMValueRef height_vec; 877 LLVMValueRef depth_vec; 878 LLVMValueRef flt_size; 879 LLVMValueRef flt_width_vec; 880 LLVMValueRef flt_height_vec; 881 LLVMValueRef flt_depth_vec; 882 LLVMValueRef x, y = NULL, z = NULL; 883 884 lp_build_extract_image_sizes(bld, 885 &bld->int_size_bld, 886 bld->int_coord_type, 887 size, 888 &width_vec, &height_vec, &depth_vec); 889 890 flt_size = lp_build_int_to_float(&bld->float_size_bld, size); 891 892 lp_build_extract_image_sizes(bld, 893 &bld->float_size_bld, 894 bld->coord_type, 895 flt_size, 896 &flt_width_vec, &flt_height_vec, &flt_depth_vec); 897 898 /* 899 * Compute integer texcoords. 900 */ 901 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec, 902 flt_width_vec, offsets[0], 903 bld->static_texture_state->pot_width, 904 bld->static_sampler_state->wrap_s); 905 lp_build_name(x, "tex.x.wrapped"); 906 907 if (dims >= 2) { 908 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec, 909 flt_height_vec, offsets[1], 910 bld->static_texture_state->pot_height, 911 bld->static_sampler_state->wrap_t); 912 lp_build_name(y, "tex.y.wrapped"); 913 914 if (dims == 3) { 915 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec, 916 flt_depth_vec, offsets[2], 917 bld->static_texture_state->pot_depth, 918 bld->static_sampler_state->wrap_r); 919 lp_build_name(z, "tex.z.wrapped"); 920 } 921 } 922 if (has_layer_coord(bld->static_texture_state->target)) { 923 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 924 /* add cube layer to face */ 925 z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]); 926 } 927 else { 928 z = coords[2]; 929 } 930 lp_build_name(z, "tex.z.layer"); 931 } 932 933 /* 934 * Get texture colors. 935 */ 936 lp_build_sample_texel_soa(bld, 937 width_vec, height_vec, depth_vec, 938 x, y, z, 939 row_stride_vec, img_stride_vec, 940 data_ptr, mipoffsets, colors_out); 941 942 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) { 943 LLVMValueRef cmpval; 944 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]); 945 /* this is really just a AND 1.0, cmpval but llvm is clever enough */ 946 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval, 947 bld->texel_bld.one, bld->texel_bld.zero); 948 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; 949 } 950 951} 952 953 954/** 955 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly. 956 */ 957static LLVMValueRef 958lp_build_masklerp(struct lp_build_context *bld, 959 LLVMValueRef weight, 960 LLVMValueRef mask0, 961 LLVMValueRef mask1) 962{ 963 struct gallivm_state *gallivm = bld->gallivm; 964 LLVMBuilderRef builder = gallivm->builder; 965 LLVMValueRef weight2; 966 967 weight2 = lp_build_sub(bld, bld->one, weight); 968 weight = LLVMBuildBitCast(builder, weight, 969 lp_build_int_vec_type(gallivm, bld->type), ""); 970 weight2 = LLVMBuildBitCast(builder, weight2, 971 lp_build_int_vec_type(gallivm, bld->type), ""); 972 weight = LLVMBuildAnd(builder, weight, mask1, ""); 973 weight2 = LLVMBuildAnd(builder, weight2, mask0, ""); 974 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, ""); 975 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, ""); 976 return lp_build_add(bld, weight, weight2); 977} 978 979/** 980 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly. 981 */ 982static LLVMValueRef 983lp_build_masklerp2d(struct lp_build_context *bld, 984 LLVMValueRef weight0, 985 LLVMValueRef weight1, 986 LLVMValueRef mask00, 987 LLVMValueRef mask01, 988 LLVMValueRef mask10, 989 LLVMValueRef mask11) 990{ 991 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01); 992 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11); 993 return lp_build_lerp(bld, weight1, val0, val1, 0); 994} 995 996/* 997 * this is a bit excessive code for something OpenGL just recommends 998 * but does not require. 999 */ 1000#define ACCURATE_CUBE_CORNERS 1 1001 1002/** 1003 * Generate code to sample a mipmap level with linear filtering. 1004 * If sampling a cube texture, r = cube face in [0,5]. 1005 * If linear_mask is present, only pixels having their mask set 1006 * will receive linear filtering, the rest will use nearest. 1007 */ 1008static void 1009lp_build_sample_image_linear(struct lp_build_sample_context *bld, 1010 boolean is_gather, 1011 LLVMValueRef size, 1012 LLVMValueRef linear_mask, 1013 LLVMValueRef row_stride_vec, 1014 LLVMValueRef img_stride_vec, 1015 LLVMValueRef data_ptr, 1016 LLVMValueRef mipoffsets, 1017 const LLVMValueRef *coords, 1018 const LLVMValueRef *offsets, 1019 LLVMValueRef colors_out[4]) 1020{ 1021 LLVMBuilderRef builder = bld->gallivm->builder; 1022 struct lp_build_context *ivec_bld = &bld->int_coord_bld; 1023 struct lp_build_context *coord_bld = &bld->coord_bld; 1024 struct lp_build_context *texel_bld = &bld->texel_bld; 1025 const unsigned dims = bld->dims; 1026 LLVMValueRef width_vec; 1027 LLVMValueRef height_vec; 1028 LLVMValueRef depth_vec; 1029 LLVMValueRef flt_size; 1030 LLVMValueRef flt_width_vec; 1031 LLVMValueRef flt_height_vec; 1032 LLVMValueRef flt_depth_vec; 1033 LLVMValueRef fall_off[4] = { 0 }, have_corners = NULL; 1034 LLVMValueRef z1 = NULL; 1035 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL; 1036 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL; 1037 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL; 1038 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL; 1039 LLVMValueRef xs[4], ys[4], zs[4]; 1040 LLVMValueRef neighbors[2][2][4]; 1041 int chan, texel_index; 1042 boolean seamless_cube_filter, accurate_cube_corners; 1043 unsigned chan_swiz = bld->static_texture_state->swizzle_r; 1044 1045 if (is_gather) { 1046 switch (bld->gather_comp) { 1047 case 0: chan_swiz = bld->static_texture_state->swizzle_r; break; 1048 case 1: chan_swiz = bld->static_texture_state->swizzle_g; break; 1049 case 2: chan_swiz = bld->static_texture_state->swizzle_b; break; 1050 case 3: chan_swiz = bld->static_texture_state->swizzle_a; break; 1051 default: 1052 break; 1053 } 1054 } 1055 1056 seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || 1057 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && 1058 bld->static_sampler_state->seamless_cube_map; 1059 1060 /* 1061 * Disable accurate cube corners for integer textures, which should only 1062 * get here in the gather path. 1063 */ 1064 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter && 1065 !util_format_is_pure_integer(bld->static_texture_state->format); 1066 1067 lp_build_extract_image_sizes(bld, 1068 &bld->int_size_bld, 1069 bld->int_coord_type, 1070 size, 1071 &width_vec, &height_vec, &depth_vec); 1072 1073 flt_size = lp_build_int_to_float(&bld->float_size_bld, size); 1074 1075 lp_build_extract_image_sizes(bld, 1076 &bld->float_size_bld, 1077 bld->coord_type, 1078 flt_size, 1079 &flt_width_vec, &flt_height_vec, &flt_depth_vec); 1080 1081 /* 1082 * Compute integer texcoords. 1083 */ 1084 1085 if (!seamless_cube_filter) { 1086 lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec, 1087 flt_width_vec, offsets[0], 1088 bld->static_texture_state->pot_width, 1089 bld->static_sampler_state->wrap_s, 1090 &x00, &x01, &s_fpart); 1091 lp_build_name(x00, "tex.x0.wrapped"); 1092 lp_build_name(x01, "tex.x1.wrapped"); 1093 x10 = x00; 1094 x11 = x01; 1095 1096 if (dims >= 2) { 1097 lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec, 1098 flt_height_vec, offsets[1], 1099 bld->static_texture_state->pot_height, 1100 bld->static_sampler_state->wrap_t, 1101 &y00, &y10, &t_fpart); 1102 lp_build_name(y00, "tex.y0.wrapped"); 1103 lp_build_name(y10, "tex.y1.wrapped"); 1104 y01 = y00; 1105 y11 = y10; 1106 1107 if (dims == 3) { 1108 lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec, 1109 flt_depth_vec, offsets[2], 1110 bld->static_texture_state->pot_depth, 1111 bld->static_sampler_state->wrap_r, 1112 &z00, &z1, &r_fpart); 1113 z01 = z10 = z11 = z00; 1114 lp_build_name(z00, "tex.z0.wrapped"); 1115 lp_build_name(z1, "tex.z1.wrapped"); 1116 } 1117 } 1118 if (has_layer_coord(bld->static_texture_state->target)) { 1119 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 1120 /* add cube layer to face */ 1121 z00 = z01 = z10 = z11 = z1 = 1122 lp_build_add(&bld->int_coord_bld, coords[2], coords[3]); 1123 } 1124 else { 1125 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */ 1126 } 1127 lp_build_name(z00, "tex.z0.layer"); 1128 lp_build_name(z1, "tex.z1.layer"); 1129 } 1130 } 1131 else { 1132 struct lp_build_if_state edge_if; 1133 LLVMTypeRef int1t; 1134 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2]; 1135 LLVMValueRef coord0, coord1, have_edge, have_corner; 1136 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y; 1137 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp; 1138 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped; 1139 LLVMValueRef face = coords[2]; 1140 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f); 1141 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one); 1142 /* XXX drop height calcs. Could (should) do this without seamless filtering too */ 1143 height_vec = width_vec; 1144 flt_height_vec = flt_width_vec; 1145 1146 /* XXX the overflow logic is actually sort of duplicated with trilinear, 1147 * since an overflow in one mip should also have a corresponding overflow 1148 * in another. 1149 */ 1150 /* should always have normalized coords, and offsets are undefined */ 1151 assert(bld->static_sampler_state->normalized_coords); 1152 /* 1153 * The coords should all be between [0,1] however we can have NaNs, 1154 * which will wreak havoc. In particular the y1_clamped value below 1155 * can be -INT_MAX (on x86) and be propagated right through (probably 1156 * other values might be bogus in the end too). 1157 * So kill off the NaNs here. 1158 */ 1159 coord0 = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero, 1160 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 1161 coord0 = lp_build_mul(coord_bld, coord0, flt_width_vec); 1162 /* instead of clamp, build mask if overflowed */ 1163 coord0 = lp_build_sub(coord_bld, coord0, half); 1164 /* convert to int, compute lerp weight */ 1165 /* not ideal with AVX (and no AVX2) */ 1166 lp_build_ifloor_fract(coord_bld, coord0, &x0, &s_fpart); 1167 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one); 1168 coord1 = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero, 1169 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 1170 coord1 = lp_build_mul(coord_bld, coord1, flt_height_vec); 1171 coord1 = lp_build_sub(coord_bld, coord1, half); 1172 lp_build_ifloor_fract(coord_bld, coord1, &y0, &t_fpart); 1173 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one); 1174 1175 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero); 1176 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one); 1177 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero); 1178 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one); 1179 1180 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]); 1181 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]); 1182 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y); 1183 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge); 1184 1185 /* needed for accurate corner filtering branch later, rely on 0 init */ 1186 int1t = LLVMInt1TypeInContext(bld->gallivm->context); 1187 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner"); 1188 1189 for (texel_index = 0; texel_index < 4; texel_index++) { 1190 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs"); 1191 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys"); 1192 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs"); 1193 } 1194 1195 lp_build_if(&edge_if, bld->gallivm, have_edge); 1196 1197 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y); 1198 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner); 1199 LLVMBuildStore(builder, have_corner, have_corners); 1200 1201 /* 1202 * Need to feed clamped values here for cheap corner handling, 1203 * but only for y coord (as when falling off both edges we only 1204 * fall off the x one) - this should be sufficient. 1205 */ 1206 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero); 1207 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one); 1208 1209 /* 1210 * Get all possible new coords. 1211 */ 1212 lp_build_cube_new_coords(ivec_bld, face, 1213 x0, x1, y0_clamped, y1_clamped, 1214 length_minus_one, 1215 new_faces, new_xcoords, new_ycoords); 1216 1217 /* handle fall off x-, x+ direction */ 1218 /* determine new coords, face (not both fall_off vars can be true at same time) */ 1219 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0); 1220 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped); 1221 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0); 1222 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped); 1223 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1); 1224 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped); 1225 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1); 1226 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped); 1227 1228 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face); 1229 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face); 1230 1231 /* handle fall off y-, y+ direction */ 1232 /* 1233 * Cheap corner logic: just hack up things so a texel doesn't fall 1234 * off both sides (which means filter weights will be wrong but we'll only 1235 * use valid texels in the filter). 1236 * This means however (y) coords must additionally be clamped (see above). 1237 * This corner handling should be fully OpenGL (but not d3d10) compliant. 1238 */ 1239 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]); 1240 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]); 1241 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]); 1242 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]); 1243 1244 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00); 1245 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00); 1246 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01); 1247 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01); 1248 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10); 1249 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10); 1250 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11); 1251 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11); 1252 1253 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00); 1254 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01); 1255 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10); 1256 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11); 1257 1258 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 1259 /* now can add cube layer to face (per sample) */ 1260 z00 = lp_build_add(ivec_bld, z00, coords[3]); 1261 z01 = lp_build_add(ivec_bld, z01, coords[3]); 1262 z10 = lp_build_add(ivec_bld, z10, coords[3]); 1263 z11 = lp_build_add(ivec_bld, z11, coords[3]); 1264 } 1265 1266 LLVMBuildStore(builder, x00, xs[0]); 1267 LLVMBuildStore(builder, x01, xs[1]); 1268 LLVMBuildStore(builder, x10, xs[2]); 1269 LLVMBuildStore(builder, x11, xs[3]); 1270 LLVMBuildStore(builder, y00, ys[0]); 1271 LLVMBuildStore(builder, y01, ys[1]); 1272 LLVMBuildStore(builder, y10, ys[2]); 1273 LLVMBuildStore(builder, y11, ys[3]); 1274 LLVMBuildStore(builder, z00, zs[0]); 1275 LLVMBuildStore(builder, z01, zs[1]); 1276 LLVMBuildStore(builder, z10, zs[2]); 1277 LLVMBuildStore(builder, z11, zs[3]); 1278 1279 lp_build_else(&edge_if); 1280 1281 LLVMBuildStore(builder, x0, xs[0]); 1282 LLVMBuildStore(builder, x1, xs[1]); 1283 LLVMBuildStore(builder, x0, xs[2]); 1284 LLVMBuildStore(builder, x1, xs[3]); 1285 LLVMBuildStore(builder, y0, ys[0]); 1286 LLVMBuildStore(builder, y0, ys[1]); 1287 LLVMBuildStore(builder, y1, ys[2]); 1288 LLVMBuildStore(builder, y1, ys[3]); 1289 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 1290 LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]); 1291 LLVMBuildStore(builder, cube_layer, zs[0]); 1292 LLVMBuildStore(builder, cube_layer, zs[1]); 1293 LLVMBuildStore(builder, cube_layer, zs[2]); 1294 LLVMBuildStore(builder, cube_layer, zs[3]); 1295 } 1296 else { 1297 LLVMBuildStore(builder, face, zs[0]); 1298 LLVMBuildStore(builder, face, zs[1]); 1299 LLVMBuildStore(builder, face, zs[2]); 1300 LLVMBuildStore(builder, face, zs[3]); 1301 } 1302 1303 lp_build_endif(&edge_if); 1304 1305 x00 = LLVMBuildLoad(builder, xs[0], ""); 1306 x01 = LLVMBuildLoad(builder, xs[1], ""); 1307 x10 = LLVMBuildLoad(builder, xs[2], ""); 1308 x11 = LLVMBuildLoad(builder, xs[3], ""); 1309 y00 = LLVMBuildLoad(builder, ys[0], ""); 1310 y01 = LLVMBuildLoad(builder, ys[1], ""); 1311 y10 = LLVMBuildLoad(builder, ys[2], ""); 1312 y11 = LLVMBuildLoad(builder, ys[3], ""); 1313 z00 = LLVMBuildLoad(builder, zs[0], ""); 1314 z01 = LLVMBuildLoad(builder, zs[1], ""); 1315 z10 = LLVMBuildLoad(builder, zs[2], ""); 1316 z11 = LLVMBuildLoad(builder, zs[3], ""); 1317 } 1318 1319 if (linear_mask) { 1320 /* 1321 * Whack filter weights into place. Whatever texel had more weight is 1322 * the one which should have been selected by nearest filtering hence 1323 * just use 100% weight for it. 1324 */ 1325 struct lp_build_context *c_bld = &bld->coord_bld; 1326 LLVMValueRef w1_mask, w1_weight; 1327 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f); 1328 1329 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half); 1330 /* this select is really just a "and" */ 1331 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); 1332 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight); 1333 if (dims >= 2) { 1334 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half); 1335 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); 1336 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight); 1337 if (dims == 3) { 1338 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half); 1339 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); 1340 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight); 1341 } 1342 } 1343 } 1344 1345 /* 1346 * Get texture colors. 1347 */ 1348 /* get x0/x1 texels */ 1349 lp_build_sample_texel_soa(bld, 1350 width_vec, height_vec, depth_vec, 1351 x00, y00, z00, 1352 row_stride_vec, img_stride_vec, 1353 data_ptr, mipoffsets, neighbors[0][0]); 1354 lp_build_sample_texel_soa(bld, 1355 width_vec, height_vec, depth_vec, 1356 x01, y01, z01, 1357 row_stride_vec, img_stride_vec, 1358 data_ptr, mipoffsets, neighbors[0][1]); 1359 1360 if (dims == 1) { 1361 assert(!is_gather); 1362 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { 1363 lp_build_reduce_filter(texel_bld, 1364 bld->static_sampler_state->reduction_mode, 1365 0, 1366 4, 1367 s_fpart, 1368 neighbors[0][0], 1369 neighbors[0][1], 1370 colors_out); 1371 } 1372 else { 1373 LLVMValueRef cmpval0, cmpval1; 1374 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); 1375 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); 1376 /* simplified lerp, AND mask with weight and add */ 1377 colors_out[0] = lp_build_masklerp(texel_bld, s_fpart, 1378 cmpval0, cmpval1); 1379 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; 1380 } 1381 } 1382 else { 1383 /* 2D/3D texture */ 1384 struct lp_build_if_state corner_if; 1385 LLVMValueRef colors0[4], colorss[4] = { 0 }; 1386 1387 /* get x0/x1 texels at y1 */ 1388 lp_build_sample_texel_soa(bld, 1389 width_vec, height_vec, depth_vec, 1390 x10, y10, z10, 1391 row_stride_vec, img_stride_vec, 1392 data_ptr, mipoffsets, neighbors[1][0]); 1393 lp_build_sample_texel_soa(bld, 1394 width_vec, height_vec, depth_vec, 1395 x11, y11, z11, 1396 row_stride_vec, img_stride_vec, 1397 data_ptr, mipoffsets, neighbors[1][1]); 1398 1399 /* 1400 * To avoid having to duplicate linear_mask / fetch code use 1401 * another branch (with corner condition though edge would work 1402 * as well) here. 1403 */ 1404 if (have_corners && accurate_cube_corners && 1405 bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) { 1406 LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f; 1407 LLVMValueRef have_corner, one_third; 1408 1409 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0"); 1410 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1"); 1411 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2"); 1412 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3"); 1413 1414 have_corner = LLVMBuildLoad(builder, have_corners, ""); 1415 1416 lp_build_if(&corner_if, bld->gallivm, have_corner); 1417 1418 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1419 1.0f/3.0f); 1420 1421 /* find corner */ 1422 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]); 1423 c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, ""); 1424 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]); 1425 c01f = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, ""); 1426 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]); 1427 c10f = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, ""); 1428 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]); 1429 c11f = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, ""); 1430 1431 if (!is_gather) { 1432 /* 1433 * we can't use standard 2d lerp as we need per-element weight 1434 * in case of corners, so just calculate bilinear result as 1435 * w00*s00 + w01*s01 + w10*s10 + w11*s11. 1436 * (This is actually less work than using 2d lerp, 7 vs. 9 1437 * instructions, however calculating the weights needs another 6, 1438 * so actually probably not slower than 2d lerp only for 4 channels 1439 * as weights only need to be calculated once - of course fixing 1440 * the weights has additional cost.) 1441 */ 1442 LLVMValueRef w00, w01, w10, w11, wx0, wy0, c_weight, tmp; 1443 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart); 1444 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart); 1445 w00 = lp_build_mul(coord_bld, wx0, wy0); 1446 w01 = lp_build_mul(coord_bld, s_fpart, wy0); 1447 w10 = lp_build_mul(coord_bld, wx0, t_fpart); 1448 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart); 1449 1450 /* find corner weight */ 1451 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero); 1452 c_weight = lp_build_select(coord_bld, c01, w01, c_weight); 1453 c_weight = lp_build_select(coord_bld, c10, w10, c_weight); 1454 c_weight = lp_build_select(coord_bld, c11, w11, c_weight); 1455 1456 /* 1457 * add 1/3 of the corner weight to the weight of the 3 other 1458 * samples and null out corner weight. 1459 */ 1460 c_weight = lp_build_mul(coord_bld, c_weight, one_third); 1461 w00 = lp_build_add(coord_bld, w00, c_weight); 1462 w00 = lp_build_andnot(coord_bld, w00, c00f); 1463 w01 = lp_build_add(coord_bld, w01, c_weight); 1464 w01 = lp_build_andnot(coord_bld, w01, c01f); 1465 w10 = lp_build_add(coord_bld, w10, c_weight); 1466 w10 = lp_build_andnot(coord_bld, w10, c10f); 1467 w11 = lp_build_add(coord_bld, w11, c_weight); 1468 w11 = lp_build_andnot(coord_bld, w11, c11f); 1469 1470 if (bld->static_sampler_state->compare_mode == 1471 PIPE_TEX_COMPARE_NONE) { 1472 for (chan = 0; chan < 4; chan++) { 1473 colors0[chan] = lp_build_mul(coord_bld, w00, 1474 neighbors[0][0][chan]); 1475 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]); 1476 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); 1477 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]); 1478 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); 1479 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]); 1480 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); 1481 } 1482 } 1483 else { 1484 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1485 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], 1486 neighbors[0][0][0]); 1487 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], 1488 neighbors[0][1][0]); 1489 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], 1490 neighbors[1][0][0]); 1491 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], 1492 neighbors[1][1][0]); 1493 /* 1494 * inputs to interpolation are just masks so just add 1495 * masked weights together 1496 */ 1497 cmpval00 = LLVMBuildBitCast(builder, cmpval00, 1498 coord_bld->vec_type, ""); 1499 cmpval01 = LLVMBuildBitCast(builder, cmpval01, 1500 coord_bld->vec_type, ""); 1501 cmpval10 = LLVMBuildBitCast(builder, cmpval10, 1502 coord_bld->vec_type, ""); 1503 cmpval11 = LLVMBuildBitCast(builder, cmpval11, 1504 coord_bld->vec_type, ""); 1505 colors0[0] = lp_build_and(coord_bld, w00, cmpval00); 1506 tmp = lp_build_and(coord_bld, w01, cmpval01); 1507 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); 1508 tmp = lp_build_and(coord_bld, w10, cmpval10); 1509 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); 1510 tmp = lp_build_and(coord_bld, w11, cmpval11); 1511 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); 1512 colors0[1] = colors0[2] = colors0[3] = colors0[0]; 1513 } 1514 } 1515 else { 1516 /* 1517 * We don't have any weights to adjust, so instead calculate 1518 * the fourth texel as simply the average of the other 3. 1519 * (This would work for non-gather too, however we'd have 1520 * a boatload more of the select stuff due to there being 1521 * 4 times as many colors as weights.) 1522 */ 1523 LLVMValueRef col00, col01, col10, col11; 1524 LLVMValueRef colc, colc0, colc1; 1525 col10 = lp_build_swizzle_soa_channel(texel_bld, 1526 neighbors[1][0], chan_swiz); 1527 col11 = lp_build_swizzle_soa_channel(texel_bld, 1528 neighbors[1][1], chan_swiz); 1529 col01 = lp_build_swizzle_soa_channel(texel_bld, 1530 neighbors[0][1], chan_swiz); 1531 col00 = lp_build_swizzle_soa_channel(texel_bld, 1532 neighbors[0][0], chan_swiz); 1533 1534 /* 1535 * The spec says for comparison filtering, the comparison 1536 * must happen before synthesizing the new value. 1537 * This means all gathered values are always 0 or 1, 1538 * except for the non-existing texel, which can be 0,1/3,2/3,1... 1539 * Seems like we'd be allowed to just return 0 or 1 too, so we 1540 * could simplify and pass down the compare mask values to the 1541 * end (using int arithmetic/compare on the mask values to 1542 * construct the fourth texel) and only there convert to floats 1543 * but it's probably not worth it (it might be easier for the cpu 1544 * but not for the code)... 1545 */ 1546 if (bld->static_sampler_state->compare_mode != 1547 PIPE_TEX_COMPARE_NONE) { 1548 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1549 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], col00); 1550 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], col01); 1551 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], col10); 1552 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], col11); 1553 col00 = lp_build_select(texel_bld, cmpval00, 1554 texel_bld->one, texel_bld->zero); 1555 col01 = lp_build_select(texel_bld, cmpval01, 1556 texel_bld->one, texel_bld->zero); 1557 col10 = lp_build_select(texel_bld, cmpval10, 1558 texel_bld->one, texel_bld->zero); 1559 col11 = lp_build_select(texel_bld, cmpval11, 1560 texel_bld->one, texel_bld->zero); 1561 } 1562 1563 /* 1564 * Null out corner color. 1565 */ 1566 col00 = lp_build_andnot(coord_bld, col00, c00f); 1567 col01 = lp_build_andnot(coord_bld, col01, c01f); 1568 col10 = lp_build_andnot(coord_bld, col10, c10f); 1569 col11 = lp_build_andnot(coord_bld, col11, c11f); 1570 1571 /* 1572 * New corner texel color is all colors added / 3. 1573 */ 1574 colc0 = lp_build_add(coord_bld, col00, col01); 1575 colc1 = lp_build_add(coord_bld, col10, col11); 1576 colc = lp_build_add(coord_bld, colc0, colc1); 1577 colc = lp_build_mul(coord_bld, one_third, colc); 1578 1579 /* 1580 * Replace the corner texel color with the new value. 1581 */ 1582 col00 = lp_build_select(coord_bld, c00, colc, col00); 1583 col01 = lp_build_select(coord_bld, c01, colc, col01); 1584 col10 = lp_build_select(coord_bld, c10, colc, col10); 1585 col11 = lp_build_select(coord_bld, c11, colc, col11); 1586 1587 colors0[0] = col10; 1588 colors0[1] = col11; 1589 colors0[2] = col01; 1590 colors0[3] = col00; 1591 } 1592 1593 LLVMBuildStore(builder, colors0[0], colorss[0]); 1594 LLVMBuildStore(builder, colors0[1], colorss[1]); 1595 LLVMBuildStore(builder, colors0[2], colorss[2]); 1596 LLVMBuildStore(builder, colors0[3], colorss[3]); 1597 1598 lp_build_else(&corner_if); 1599 } 1600 1601 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { 1602 if (is_gather) { 1603 /* 1604 * Just assign the red channel (no component selection yet). 1605 * This is a bit hackish, we usually do the swizzle at the 1606 * end of sampling (much less values to swizzle), but this 1607 * obviously cannot work when using gather. 1608 */ 1609 colors0[0] = lp_build_swizzle_soa_channel(texel_bld, 1610 neighbors[1][0], 1611 chan_swiz); 1612 colors0[1] = lp_build_swizzle_soa_channel(texel_bld, 1613 neighbors[1][1], 1614 chan_swiz); 1615 colors0[2] = lp_build_swizzle_soa_channel(texel_bld, 1616 neighbors[0][1], 1617 chan_swiz); 1618 colors0[3] = lp_build_swizzle_soa_channel(texel_bld, 1619 neighbors[0][0], 1620 chan_swiz); 1621 } 1622 else { 1623 /* Bilinear interpolate the four samples from the 2D image / 3D slice */ 1624 lp_build_reduce_filter_2d(texel_bld, 1625 bld->static_sampler_state->reduction_mode, 1626 0, 1627 4, 1628 s_fpart, 1629 t_fpart, 1630 neighbors[0][0], 1631 neighbors[0][1], 1632 neighbors[1][0], 1633 neighbors[1][1], 1634 colors0); 1635 } 1636 } 1637 else { 1638 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1639 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); 1640 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); 1641 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); 1642 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); 1643 1644 if (is_gather) { 1645 /* more hacks for swizzling, should be X, ONE or ZERO... */ 1646 colors0[0] = lp_build_select(texel_bld, cmpval10, 1647 texel_bld->one, texel_bld->zero); 1648 colors0[1] = lp_build_select(texel_bld, cmpval11, 1649 texel_bld->one, texel_bld->zero); 1650 colors0[2] = lp_build_select(texel_bld, cmpval01, 1651 texel_bld->one, texel_bld->zero); 1652 colors0[3] = lp_build_select(texel_bld, cmpval00, 1653 texel_bld->one, texel_bld->zero); 1654 } 1655 else { 1656 colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart, 1657 cmpval00, cmpval01, cmpval10, cmpval11); 1658 colors0[1] = colors0[2] = colors0[3] = colors0[0]; 1659 } 1660 } 1661 1662 if (have_corners && accurate_cube_corners && 1663 bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) { 1664 LLVMBuildStore(builder, colors0[0], colorss[0]); 1665 LLVMBuildStore(builder, colors0[1], colorss[1]); 1666 LLVMBuildStore(builder, colors0[2], colorss[2]); 1667 LLVMBuildStore(builder, colors0[3], colorss[3]); 1668 1669 lp_build_endif(&corner_if); 1670 1671 colors0[0] = LLVMBuildLoad(builder, colorss[0], ""); 1672 colors0[1] = LLVMBuildLoad(builder, colorss[1], ""); 1673 colors0[2] = LLVMBuildLoad(builder, colorss[2], ""); 1674 colors0[3] = LLVMBuildLoad(builder, colorss[3], ""); 1675 } 1676 1677 if (dims == 3) { 1678 LLVMValueRef neighbors1[2][2][4]; 1679 LLVMValueRef colors1[4]; 1680 1681 assert(!is_gather); 1682 1683 /* get x0/x1/y0/y1 texels at z1 */ 1684 lp_build_sample_texel_soa(bld, 1685 width_vec, height_vec, depth_vec, 1686 x00, y00, z1, 1687 row_stride_vec, img_stride_vec, 1688 data_ptr, mipoffsets, neighbors1[0][0]); 1689 lp_build_sample_texel_soa(bld, 1690 width_vec, height_vec, depth_vec, 1691 x01, y01, z1, 1692 row_stride_vec, img_stride_vec, 1693 data_ptr, mipoffsets, neighbors1[0][1]); 1694 lp_build_sample_texel_soa(bld, 1695 width_vec, height_vec, depth_vec, 1696 x10, y10, z1, 1697 row_stride_vec, img_stride_vec, 1698 data_ptr, mipoffsets, neighbors1[1][0]); 1699 lp_build_sample_texel_soa(bld, 1700 width_vec, height_vec, depth_vec, 1701 x11, y11, z1, 1702 row_stride_vec, img_stride_vec, 1703 data_ptr, mipoffsets, neighbors1[1][1]); 1704 1705 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { 1706 /* Bilinear interpolate the four samples from the second Z slice */ 1707 lp_build_reduce_filter_2d(texel_bld, 1708 bld->static_sampler_state->reduction_mode, 1709 0, 1710 4, 1711 s_fpart, 1712 t_fpart, 1713 neighbors1[0][0], 1714 neighbors1[0][1], 1715 neighbors1[1][0], 1716 neighbors1[1][1], 1717 colors1); 1718 1719 /* Linearly interpolate the two samples from the two 3D slices */ 1720 lp_build_reduce_filter(texel_bld, 1721 bld->static_sampler_state->reduction_mode, 1722 0, 1723 4, 1724 r_fpart, 1725 colors0, 1726 colors1, 1727 colors_out); 1728 } 1729 else { 1730 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1731 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); 1732 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); 1733 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); 1734 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); 1735 colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart, 1736 cmpval00, cmpval01, cmpval10, cmpval11); 1737 /* Linearly interpolate the two samples from the two 3D slices */ 1738 colors_out[0] = lp_build_lerp(texel_bld, 1739 r_fpart, 1740 colors0[0], colors1[0], 1741 0); 1742 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; 1743 } 1744 } 1745 else { 1746 /* 2D tex */ 1747 for (chan = 0; chan < 4; chan++) { 1748 colors_out[chan] = colors0[chan]; 1749 } 1750 } 1751 } 1752 if (is_gather) { 1753 /* 1754 * For gather, we can't do our usual channel swizzling done later, 1755 * so do it here. It only really matters for 0/1 swizzles in case 1756 * of comparison filtering, since in this case the results would be 1757 * wrong, without comparison it should all work out alright but it 1758 * can't hurt to do that here, since it will instantly drop all 1759 * calculations above, though it's a rather stupid idea to do 1760 * gather on a channel which will always return 0 or 1 in any case... 1761 */ 1762 if (chan_swiz == PIPE_SWIZZLE_1) { 1763 for (chan = 0; chan < 4; chan++) { 1764 colors_out[chan] = texel_bld->one; 1765 } 1766 } else if (chan_swiz == PIPE_SWIZZLE_0) { 1767 for (chan = 0; chan < 4; chan++) { 1768 colors_out[chan] = texel_bld->zero; 1769 } 1770 } 1771 } 1772} 1773 1774 1775/** 1776 * Sample the texture/mipmap using given image filter and mip filter. 1777 * ilevel0 and ilevel1 indicate the two mipmap levels to sample 1778 * from (vectors or scalars). 1779 * If we're using nearest miplevel sampling the '1' values will be null/unused. 1780 */ 1781static void 1782lp_build_sample_mipmap(struct lp_build_sample_context *bld, 1783 unsigned img_filter, 1784 unsigned mip_filter, 1785 boolean is_gather, 1786 const LLVMValueRef *coords, 1787 const LLVMValueRef *offsets, 1788 LLVMValueRef ilevel0, 1789 LLVMValueRef ilevel1, 1790 LLVMValueRef lod_fpart, 1791 LLVMValueRef *colors_out) 1792{ 1793 LLVMBuilderRef builder = bld->gallivm->builder; 1794 LLVMValueRef size0 = NULL; 1795 LLVMValueRef size1 = NULL; 1796 LLVMValueRef row_stride0_vec = NULL; 1797 LLVMValueRef row_stride1_vec = NULL; 1798 LLVMValueRef img_stride0_vec = NULL; 1799 LLVMValueRef img_stride1_vec = NULL; 1800 LLVMValueRef data_ptr0 = NULL; 1801 LLVMValueRef data_ptr1 = NULL; 1802 LLVMValueRef mipoff0 = NULL; 1803 LLVMValueRef mipoff1 = NULL; 1804 LLVMValueRef colors0[4], colors1[4]; 1805 unsigned chan; 1806 1807 /* sample the first mipmap level */ 1808 lp_build_mipmap_level_sizes(bld, ilevel0, 1809 &size0, 1810 &row_stride0_vec, &img_stride0_vec); 1811 if (bld->num_mips == 1) { 1812 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); 1813 } 1814 else { 1815 /* This path should work for num_lods 1 too but slightly less efficient */ 1816 data_ptr0 = bld->base_ptr; 1817 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); 1818 } 1819 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1820 lp_build_sample_image_nearest(bld, size0, 1821 row_stride0_vec, img_stride0_vec, 1822 data_ptr0, mipoff0, coords, offsets, 1823 colors0); 1824 } 1825 else { 1826 assert(img_filter == PIPE_TEX_FILTER_LINEAR); 1827 lp_build_sample_image_linear(bld, is_gather, size0, NULL, 1828 row_stride0_vec, img_stride0_vec, 1829 data_ptr0, mipoff0, coords, offsets, 1830 colors0); 1831 } 1832 1833 /* Store the first level's colors in the output variables */ 1834 for (chan = 0; chan < 4; chan++) { 1835 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1836 } 1837 1838 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1839 struct lp_build_if_state if_ctx; 1840 LLVMValueRef need_lerp; 1841 1842 /* need_lerp = lod_fpart > 0 */ 1843 if (bld->num_lods == 1) { 1844 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, 1845 lod_fpart, bld->lodf_bld.zero, 1846 "need_lerp"); 1847 } 1848 else { 1849 /* 1850 * We'll do mip filtering if any of the quads (or individual 1851 * pixel in case of per-pixel lod) need it. 1852 * It might be better to split the vectors here and only fetch/filter 1853 * quads which need it (if there's one lod per quad). 1854 */ 1855 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type, 1856 PIPE_FUNC_GREATER, 1857 lod_fpart, bld->lodf_bld.zero); 1858 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp); 1859 lp_build_name(need_lerp, "need_lerp"); 1860 } 1861 1862 lp_build_if(&if_ctx, bld->gallivm, need_lerp); 1863 { 1864 /* 1865 * We unfortunately need to clamp lod_fpart here since we can get 1866 * negative values which would screw up filtering if not all 1867 * lod_fpart values have same sign. 1868 */ 1869 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart, 1870 bld->lodf_bld.zero); 1871 /* sample the second mipmap level */ 1872 lp_build_mipmap_level_sizes(bld, ilevel1, 1873 &size1, 1874 &row_stride1_vec, &img_stride1_vec); 1875 if (bld->num_mips == 1) { 1876 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); 1877 } 1878 else { 1879 data_ptr1 = bld->base_ptr; 1880 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); 1881 } 1882 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1883 lp_build_sample_image_nearest(bld, size1, 1884 row_stride1_vec, img_stride1_vec, 1885 data_ptr1, mipoff1, coords, offsets, 1886 colors1); 1887 } 1888 else { 1889 lp_build_sample_image_linear(bld, FALSE, size1, NULL, 1890 row_stride1_vec, img_stride1_vec, 1891 data_ptr1, mipoff1, coords, offsets, 1892 colors1); 1893 } 1894 1895 /* interpolate samples from the two mipmap levels */ 1896 1897 if (bld->num_lods != bld->coord_type.length) 1898 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 1899 bld->lodf_bld.type, 1900 bld->texel_bld.type, 1901 lod_fpart); 1902 1903 for (chan = 0; chan < 4; chan++) { 1904 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, 1905 colors0[chan], colors1[chan], 1906 0); 1907 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1908 } 1909 } 1910 lp_build_endif(&if_ctx); 1911 } 1912} 1913 1914 1915/** 1916 * Sample the texture/mipmap using given mip filter, and using 1917 * both nearest and linear filtering at the same time depending 1918 * on linear_mask. 1919 * lod can be per quad but linear_mask is always per pixel. 1920 * ilevel0 and ilevel1 indicate the two mipmap levels to sample 1921 * from (vectors or scalars). 1922 * If we're using nearest miplevel sampling the '1' values will be null/unused. 1923 */ 1924static void 1925lp_build_sample_mipmap_both(struct lp_build_sample_context *bld, 1926 LLVMValueRef linear_mask, 1927 unsigned mip_filter, 1928 const LLVMValueRef *coords, 1929 const LLVMValueRef *offsets, 1930 LLVMValueRef ilevel0, 1931 LLVMValueRef ilevel1, 1932 LLVMValueRef lod_fpart, 1933 LLVMValueRef lod_positive, 1934 LLVMValueRef *colors_out) 1935{ 1936 LLVMBuilderRef builder = bld->gallivm->builder; 1937 LLVMValueRef size0 = NULL; 1938 LLVMValueRef size1 = NULL; 1939 LLVMValueRef row_stride0_vec = NULL; 1940 LLVMValueRef row_stride1_vec = NULL; 1941 LLVMValueRef img_stride0_vec = NULL; 1942 LLVMValueRef img_stride1_vec = NULL; 1943 LLVMValueRef data_ptr0 = NULL; 1944 LLVMValueRef data_ptr1 = NULL; 1945 LLVMValueRef mipoff0 = NULL; 1946 LLVMValueRef mipoff1 = NULL; 1947 LLVMValueRef colors0[4], colors1[4]; 1948 unsigned chan; 1949 1950 /* sample the first mipmap level */ 1951 lp_build_mipmap_level_sizes(bld, ilevel0, 1952 &size0, 1953 &row_stride0_vec, &img_stride0_vec); 1954 if (bld->num_mips == 1) { 1955 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); 1956 } 1957 else { 1958 /* This path should work for num_lods 1 too but slightly less efficient */ 1959 data_ptr0 = bld->base_ptr; 1960 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); 1961 } 1962 1963 lp_build_sample_image_linear(bld, FALSE, size0, linear_mask, 1964 row_stride0_vec, img_stride0_vec, 1965 data_ptr0, mipoff0, coords, offsets, 1966 colors0); 1967 1968 /* Store the first level's colors in the output variables */ 1969 for (chan = 0; chan < 4; chan++) { 1970 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1971 } 1972 1973 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1974 struct lp_build_if_state if_ctx; 1975 LLVMValueRef need_lerp; 1976 1977 /* 1978 * We'll do mip filtering if any of the quads (or individual 1979 * pixel in case of per-pixel lod) need it. 1980 * Note using lod_positive here not lod_fpart since it may be the same 1981 * condition as that used in the outer "if" in the caller hence llvm 1982 * should be able to merge the branches in this case. 1983 */ 1984 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive); 1985 lp_build_name(need_lerp, "need_lerp"); 1986 1987 lp_build_if(&if_ctx, bld->gallivm, need_lerp); 1988 { 1989 /* 1990 * We unfortunately need to clamp lod_fpart here since we can get 1991 * negative values which would screw up filtering if not all 1992 * lod_fpart values have same sign. 1993 */ 1994 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart, 1995 bld->lodf_bld.zero); 1996 /* sample the second mipmap level */ 1997 lp_build_mipmap_level_sizes(bld, ilevel1, 1998 &size1, 1999 &row_stride1_vec, &img_stride1_vec); 2000 if (bld->num_mips == 1) { 2001 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); 2002 } 2003 else { 2004 data_ptr1 = bld->base_ptr; 2005 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); 2006 } 2007 2008 lp_build_sample_image_linear(bld, FALSE, size1, linear_mask, 2009 row_stride1_vec, img_stride1_vec, 2010 data_ptr1, mipoff1, coords, offsets, 2011 colors1); 2012 2013 /* interpolate samples from the two mipmap levels */ 2014 2015 if (bld->num_lods != bld->coord_type.length) 2016 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 2017 bld->lodf_bld.type, 2018 bld->texel_bld.type, 2019 lod_fpart); 2020 2021 for (chan = 0; chan < 4; chan++) { 2022 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, 2023 colors0[chan], colors1[chan], 2024 0); 2025 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 2026 } 2027 } 2028 lp_build_endif(&if_ctx); 2029 } 2030} 2031 2032 2033/** 2034 * Build (per-coord) layer value. 2035 * Either clamp layer to valid values or fill in optional out_of_bounds 2036 * value and just return value unclamped. 2037 */ 2038static LLVMValueRef 2039lp_build_layer_coord(struct lp_build_sample_context *bld, 2040 unsigned texture_unit, 2041 boolean is_cube_array, 2042 LLVMValueRef layer, 2043 LLVMValueRef *out_of_bounds) 2044{ 2045 LLVMValueRef num_layers; 2046 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 2047 2048 num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm, 2049 bld->context_ptr, texture_unit, NULL); 2050 2051 if (out_of_bounds) { 2052 LLVMValueRef out1, out; 2053 assert(!is_cube_array); 2054 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers); 2055 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero); 2056 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers); 2057 *out_of_bounds = lp_build_or(int_coord_bld, out, out1); 2058 return layer; 2059 } 2060 else { 2061 LLVMValueRef maxlayer; 2062 LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) : 2063 bld->int_bld.one; 2064 maxlayer = lp_build_sub(&bld->int_bld, num_layers, s); 2065 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer); 2066 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer); 2067 } 2068} 2069 2070#define WEIGHT_LUT_SIZE 1024 2071 2072static void 2073lp_build_sample_aniso(struct lp_build_sample_context *bld, 2074 unsigned img_filter, 2075 unsigned mip_filter, 2076 boolean is_gather, 2077 const LLVMValueRef *coords, 2078 const LLVMValueRef *offsets, 2079 LLVMValueRef ilevel0, 2080 LLVMValueRef ilevel1, 2081 LLVMValueRef lod_fpart, 2082 LLVMValueRef *colors_out) 2083{ 2084 struct gallivm_state *gallivm = bld->gallivm; 2085 LLVMBuilderRef builder = gallivm->builder; 2086 struct lp_build_context *coord_bld = &bld->coord_bld; 2087 struct lp_build_context *float_size_bld = &bld->float_size_in_bld; 2088 LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(&bld->coord_bld, coords[0], coords[1]); 2089 LLVMValueRef float_size; 2090 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 2091 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); 2092 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0); 2093 unsigned length = bld->coord_bld.type.length; 2094 unsigned num_quads = length / 4; 2095 unsigned i; 2096 LLVMValueRef filter_table = bld->aniso_filter_table; 2097 LLVMValueRef size0, row_stride0_vec, img_stride0_vec; 2098 LLVMValueRef data_ptr0, mipoff0 = NULL; 2099 2100 lp_build_mipmap_level_sizes(bld, ilevel0, 2101 &size0, 2102 &row_stride0_vec, &img_stride0_vec); 2103 if (bld->num_mips == 1) { 2104 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); 2105 } 2106 else { 2107 /* This path should work for num_lods 1 too but slightly less efficient */ 2108 data_ptr0 = bld->base_ptr; 2109 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); 2110 } 2111 2112 float_size = lp_build_int_to_float(&bld->float_size_in_bld, bld->int_size); 2113 2114 LLVMValueRef float_size_lvl = lp_build_int_to_float(&bld->float_size_bld, size0); 2115 /* extract width and height into vectors for use later */ 2116 static const unsigned char swizzle15[] = { /* no-op swizzle */ 2117 1, 1, 1, 1, 5, 5, 5, 5 2118 }; 2119 static const unsigned char swizzle04[] = { /* no-op swizzle */ 2120 0, 0, 0, 0, 4, 4, 4, 4 2121 }; 2122 LLVMValueRef width_dim, height_dim; 2123 2124 width_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle04, bld->float_size_bld.type.length, bld->coord_bld.type.length); 2125 height_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle15, bld->float_size_bld.type.length, bld->coord_bld.type.length); 2126 2127 2128 /* shuffle width/height for ddx/ddy calculations. */ 2129 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; 2130 2131 for (i = 0; i < num_quads; i++) { 2132 shuffles[i*4+0] = shuffles[i*4+1] = index0; 2133 shuffles[i*4+2] = shuffles[i*4+3] = index1; 2134 } 2135 2136 LLVMValueRef floatdim = LLVMBuildShuffleVector(builder, float_size, float_size, 2137 LLVMConstVector(shuffles, length), ""); 2138 2139 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim); 2140 2141 LLVMValueRef scaling = lp_build_shl(&bld->leveli_bld, bld->leveli_bld.one, ilevel0); 2142 scaling = lp_build_int_to_float(&bld->levelf_bld, scaling); 2143 scaling = lp_build_rcp(&bld->levelf_bld, scaling); 2144 2145 if (bld->num_lods != length) { 2146 if (bld->levelf_bld.type.length == 1) 2147 scaling = lp_build_broadcast_scalar(coord_bld, 2148 scaling); 2149 else 2150 scaling = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 2151 bld->levelf_bld.type, 2152 coord_bld->type, 2153 scaling); 2154 } 2155 2156 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, scaling); 2157 2158 static const unsigned char swizzle01[] = { /* no-op swizzle */ 2159 0, 1, 0, 1, 2160 }; 2161 static const unsigned char swizzle23[] = { 2162 2, 3, 2, 3, 2163 }; 2164 2165 LLVMValueRef ddx_ddys, ddx_ddyt; 2166 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01); 2167 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23); 2168 2169 /* compute ellipse coefficients */ 2170 /* * A*x*x + B*x*y + C*y*y = F.*/ 2171 /* float A = vx*vx+vy*vy+1; */ 2172 LLVMValueRef A = lp_build_mul(coord_bld, ddx_ddyt, ddx_ddyt); 2173 2174 LLVMValueRef Ay = lp_build_swizzle_aos(coord_bld, A, swizzle15); 2175 A = lp_build_add(coord_bld, A, Ay); 2176 A = lp_build_add(coord_bld, A, coord_bld->one); 2177 A = lp_build_swizzle_aos(coord_bld, A, swizzle04); 2178 2179 /* float B = -2*(ux*vx+uy*vy); */ 2180 LLVMValueRef B = lp_build_mul(coord_bld, ddx_ddys, ddx_ddyt); 2181 LLVMValueRef By = lp_build_swizzle_aos(coord_bld, B, swizzle15); 2182 B = lp_build_add(coord_bld, B, By); 2183 B = lp_build_mul_imm(coord_bld, B, -2); 2184 B = lp_build_swizzle_aos(coord_bld, B, swizzle04); 2185 2186 /* float C = ux*ux+uy*uy+1; */ 2187 LLVMValueRef C = lp_build_mul(coord_bld, ddx_ddys, ddx_ddys); 2188 LLVMValueRef Cy = lp_build_swizzle_aos(coord_bld, C, swizzle15); 2189 C = lp_build_add(coord_bld, C, Cy); 2190 C = lp_build_add(coord_bld, C, coord_bld->one); 2191 C = lp_build_swizzle_aos(coord_bld, C, swizzle04); 2192 2193 /* float F = A*C-B*B/4.0f; */ 2194 LLVMValueRef F = lp_build_mul(coord_bld, B, B); 2195 F = lp_build_div(coord_bld, F, lp_build_const_vec(gallivm, coord_bld->type, 4.0)); 2196 LLVMValueRef F_p2 = lp_build_mul(coord_bld, A, C); 2197 F = lp_build_sub(coord_bld, F_p2, F); 2198 2199 /* compute ellipse bounding box in texture space */ 2200 /* const float d = -B*B+4.0f*C*A; */ 2201 LLVMValueRef d = lp_build_sub(coord_bld, coord_bld->zero, lp_build_mul(coord_bld, B, B)); 2202 LLVMValueRef d_p2 = lp_build_mul(coord_bld, A, C); 2203 d_p2 = lp_build_mul_imm(coord_bld, d_p2, 4); 2204 d = lp_build_add(coord_bld, d, d_p2); 2205 2206 /* const float box_u = 2.0f / d * sqrtf(d*C*F); */ 2207 /* box_u -> half of bbox with */ 2208 LLVMValueRef temp; 2209 temp = lp_build_mul(coord_bld, d, C); 2210 temp = lp_build_mul(coord_bld, temp, F); 2211 temp = lp_build_sqrt(coord_bld, temp); 2212 2213 LLVMValueRef box_u = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, 2.0), d); 2214 box_u = lp_build_mul(coord_bld, box_u, temp); 2215 2216 /* const float box_v = 2.0f / d * sqrtf(A*d*F); */ 2217 /* box_v -> half of bbox height */ 2218 temp = lp_build_mul(coord_bld, A, d); 2219 temp = lp_build_mul(coord_bld, temp, F); 2220 temp = lp_build_sqrt(coord_bld, temp); 2221 2222 LLVMValueRef box_v = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, 2.0), d); 2223 box_v = lp_build_mul(coord_bld, box_v, temp); 2224 2225 /* Scale ellipse formula to directly index the Filter Lookup Table. 2226 * i.e. scale so that F = WEIGHT_LUT_SIZE-1 2227 */ 2228 LLVMValueRef formScale = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, WEIGHT_LUT_SIZE - 1), F); 2229 2230 A = lp_build_mul(coord_bld, A, formScale); 2231 B = lp_build_mul(coord_bld, B, formScale); 2232 C = lp_build_mul(coord_bld, C, formScale); 2233 /* F *= formScale; */ /* no need to scale F as we don't use it below here */ 2234 2235 LLVMValueRef ddq = lp_build_mul_imm(coord_bld, A, 2); 2236 2237 /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse 2238 * and incrementally update the value of Ax^2+Bxy*Cy^2; when this 2239 * value, q, is less than F, we're inside the ellipse 2240 */ 2241 2242 LLVMValueRef float_size0 = lp_build_int_to_float(float_size_bld, bld->int_size); 2243 LLVMValueRef width0 = lp_build_extract_broadcast(gallivm, 2244 float_size_bld->type, 2245 coord_bld->type, 2246 float_size0, index0); 2247 LLVMValueRef height0 = lp_build_extract_broadcast(gallivm, 2248 float_size_bld->type, 2249 coord_bld->type, 2250 float_size0, index1); 2251 2252 /* texture->width0 * scaling */ 2253 width0 = lp_build_mul(coord_bld, width0, scaling); 2254 /* texture->height0 * scaling */ 2255 height0 = lp_build_mul(coord_bld, height0, scaling); 2256 2257 /* tex_u = -0.5f * s[j] * texture->width0 * scaling */ 2258 LLVMValueRef tex_u = lp_build_mul(coord_bld, coords[0], width0); 2259 tex_u = lp_build_add(coord_bld, tex_u, lp_build_const_vec(gallivm, coord_bld->type, -0.5f)); 2260 2261 /* tex_v = -0.5f * t[j] * texture->height0 * scaling */ 2262 LLVMValueRef tex_v = lp_build_mul(coord_bld, coords[1], height0); 2263 tex_v = lp_build_add(coord_bld, tex_v, lp_build_const_vec(gallivm, coord_bld->type, -0.5f)); 2264 2265 /* const int u0 = (int) floorf(tex_u - box_u); */ 2266 LLVMValueRef u0 = lp_build_itrunc(coord_bld, lp_build_floor(coord_bld, lp_build_sub(coord_bld, tex_u, box_u))); 2267 /* const int u1 = (int) ceilf(tex_u + box_u); */ 2268 LLVMValueRef u1 = lp_build_itrunc(coord_bld, lp_build_ceil(coord_bld, lp_build_add(coord_bld, tex_u, box_u))); 2269 2270 /* const int v0 = (int) floorf(tex_v - box_v); */ 2271 LLVMValueRef v0 = lp_build_itrunc(coord_bld, lp_build_floor(coord_bld, lp_build_sub(coord_bld, tex_v, box_v))); 2272 /* const int v1 = (int) ceilf(tex_v + box_v); */ 2273 LLVMValueRef v1 = lp_build_itrunc(coord_bld, lp_build_ceil(coord_bld, lp_build_add(coord_bld, tex_v, box_v))); 2274 2275 /* const float U = u0 - tex_u; */ 2276 LLVMValueRef U = lp_build_sub(coord_bld, lp_build_int_to_float(coord_bld, u0), tex_u); 2277 2278 /* A * (2 * U + 1) */ 2279 LLVMValueRef dq_base = lp_build_mul_imm(coord_bld, U, 2); 2280 dq_base = lp_build_add(coord_bld, dq_base, coord_bld->one); 2281 dq_base = lp_build_mul(coord_bld, dq_base, A); 2282 2283 /* A * U * U */ 2284 LLVMValueRef q_base = lp_build_mul(coord_bld, U, U); 2285 q_base = lp_build_mul(coord_bld, q_base, A); 2286 2287 LLVMValueRef colors0[4]; 2288 LLVMValueRef den_store = lp_build_alloca(gallivm, bld->texel_bld.vec_type, "den"); 2289 2290 unsigned chan; 2291 for (chan = 0; chan < 4; chan++) 2292 colors0[chan] = lp_build_alloca(gallivm, bld->texel_bld.vec_type, "colors"); 2293 2294 LLVMValueRef q_store, dq_store; 2295 q_store = lp_build_alloca(gallivm, bld->coord_bld.vec_type, "q"); 2296 dq_store = lp_build_alloca(gallivm, bld->coord_bld.vec_type, "dq"); 2297 2298 LLVMValueRef v_limiter = lp_build_alloca(gallivm, bld->int_coord_bld.vec_type, "v_limiter"); 2299 LLVMValueRef u_limiter = lp_build_alloca(gallivm, bld->int_coord_bld.vec_type, "u_limiter"); 2300 2301 LLVMBuildStore(builder, v0, v_limiter); 2302 2303 /* create an LLVM loop block for the V iterator */ 2304 LLVMBasicBlockRef v_loop_block = lp_build_insert_new_block(gallivm, "vloop"); 2305 2306 LLVMBuildBr(builder, v_loop_block); 2307 LLVMPositionBuilderAtEnd(builder, v_loop_block); 2308 2309 LLVMValueRef v_val = LLVMBuildLoad(builder, v_limiter, ""); 2310 LLVMValueRef v_mask = LLVMBuildICmp(builder, 2311 LLVMIntSLE, 2312 v_val, 2313 v1, ""); 2314 2315 /* loop over V values. */ 2316 { 2317 /* const float V = v - tex_v; */ 2318 LLVMValueRef V = lp_build_sub(coord_bld, lp_build_int_to_float(coord_bld, v_val), tex_v); 2319 2320 /* float dq = dq_base + B * V; */ 2321 LLVMValueRef dq = lp_build_mul(coord_bld, V, B); 2322 dq = lp_build_add(coord_bld, dq, dq_base); 2323 2324 /* float q = (C * V + B * U) * V + q_base */ 2325 LLVMValueRef q = lp_build_mul(coord_bld, C, V); 2326 q = lp_build_add(coord_bld, q, lp_build_mul(coord_bld, B, U)); 2327 q = lp_build_mul(coord_bld, q, V); 2328 q = lp_build_add(coord_bld, q, q_base); 2329 2330 LLVMBuildStore(builder, q, q_store); 2331 LLVMBuildStore(builder, dq, dq_store); 2332 2333 LLVMBuildStore(builder, u0, u_limiter); 2334 2335 /* create an LLVM loop block for the V iterator */ 2336 LLVMBasicBlockRef u_loop_block = lp_build_insert_new_block(gallivm, "uloop"); 2337 2338 LLVMBuildBr(builder, u_loop_block); 2339 LLVMPositionBuilderAtEnd(builder, u_loop_block); 2340 2341 LLVMValueRef u_val = LLVMBuildLoad(builder, u_limiter, ""); 2342 LLVMValueRef u_mask = LLVMBuildICmp(builder, 2343 LLVMIntSLE, 2344 u_val, 2345 u1, ""); 2346 2347 /* loop over U values */ 2348 { 2349 /* q = (int)q */ 2350 q = lp_build_itrunc(coord_bld, LLVMBuildLoad(builder, q_store, "")); 2351 2352 /* 2353 * avoid OOB access to filter table, generate a mask for q > 1024, 2354 * then truncate it. 2355 */ 2356 LLVMValueRef q_mask = LLVMBuildICmp(builder, 2357 LLVMIntSLE, 2358 q, 2359 lp_build_const_int_vec(gallivm, bld->int_coord_bld.type, 0x3ff), ""); 2360 q_mask = LLVMBuildSExt(builder, q_mask, bld->int_coord_bld.vec_type, ""); 2361 2362 q = lp_build_max(&bld->int_coord_bld, q, bld->int_coord_bld.zero); 2363 q = lp_build_and(&bld->int_coord_bld, q, lp_build_const_int_vec(gallivm, bld->int_coord_bld.type, 0x3ff)); 2364 2365 /* update the offsets to deal with float size. */ 2366 q = lp_build_mul_imm(&bld->int_coord_bld, q, 4); 2367 filter_table = LLVMBuildBitCast(gallivm->builder, filter_table, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), ""); 2368 2369 /* Lookup weights in filter table */ 2370 LLVMValueRef weights = lp_build_gather(gallivm, coord_bld->type.length, 2371 coord_bld->type.width, 2372 lp_elem_type(coord_bld->type), 2373 TRUE, filter_table, q, TRUE); 2374 2375 /* 2376 * Mask off the weights here which should ensure no-op for loops 2377 * where some of the u/v values are not being calculated. 2378 */ 2379 weights = LLVMBuildBitCast(builder, weights, bld->int_coord_bld.vec_type, ""); 2380 weights = lp_build_and(&bld->int_coord_bld, weights, LLVMBuildSExt(builder, u_mask, bld->int_coord_bld.vec_type, "")); 2381 weights = lp_build_and(&bld->int_coord_bld, weights, LLVMBuildSExt(builder, v_mask, bld->int_coord_bld.vec_type, "")); 2382 weights = lp_build_and(&bld->int_coord_bld, weights, q_mask); 2383 weights = LLVMBuildBitCast(builder, weights, bld->coord_bld.vec_type, ""); 2384 2385 /* if the weights are all 0 avoid doing the sampling at all. */ 2386 struct lp_build_if_state noloadw0; 2387 2388 LLVMValueRef wnz = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE, 2389 weights, bld->coord_bld.zero, ""); 2390 wnz = LLVMBuildSExt(builder, wnz, bld->int_coord_bld.vec_type, ""); 2391 wnz = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, wnz); 2392 lp_build_if(&noloadw0, gallivm, wnz); 2393 LLVMValueRef new_coords[3]; 2394 new_coords[0] = lp_build_div(coord_bld, lp_build_int_to_float(coord_bld, u_val), width_dim); 2395 new_coords[1] = lp_build_div(coord_bld, lp_build_int_to_float(coord_bld, v_val), height_dim); 2396 new_coords[2] = coords[2]; 2397 2398 /* lookup q in filter table */ 2399 LLVMValueRef temp_colors[4]; 2400 lp_build_sample_image_nearest(bld, size0, 2401 row_stride0_vec, img_stride0_vec, 2402 data_ptr0, mipoff0, new_coords, offsets, 2403 temp_colors); 2404 2405 for (chan = 0; chan < 4; chan++) { 2406 LLVMValueRef tcolor = LLVMBuildLoad(builder, colors0[chan], ""); 2407 2408 tcolor = lp_build_add(&bld->texel_bld, tcolor, lp_build_mul(&bld->texel_bld, temp_colors[chan], weights)); 2409 LLVMBuildStore(builder, tcolor, colors0[chan]); 2410 } 2411 2412 /* multiple colors by weight and add in. */ 2413 /* den += weight; */ 2414 LLVMValueRef den = LLVMBuildLoad(builder, den_store, ""); 2415 den = lp_build_add(&bld->texel_bld, den, weights); 2416 LLVMBuildStore(builder, den, den_store); 2417 2418 lp_build_endif(&noloadw0); 2419 /* q += dq; */ 2420 /* dq += ddq; */ 2421 q = LLVMBuildLoad(builder, q_store, ""); 2422 dq = LLVMBuildLoad(builder, dq_store, ""); 2423 q = lp_build_add(coord_bld, q, dq); 2424 dq = lp_build_add(coord_bld, dq, ddq); 2425 LLVMBuildStore(builder, q, q_store); 2426 LLVMBuildStore(builder, dq, dq_store); 2427 } 2428 /* u += 1 */ 2429 u_val = LLVMBuildLoad(builder, u_limiter, ""); 2430 u_val = lp_build_add(&bld->int_coord_bld, u_val, bld->int_coord_bld.one); 2431 LLVMBuildStore(builder, u_val, u_limiter); 2432 2433 u_mask = LLVMBuildICmp(builder, 2434 LLVMIntSLE, 2435 u_val, 2436 u1, ""); 2437 LLVMValueRef u_end_cond = LLVMBuildSExt(builder, u_mask, bld->int_coord_bld.vec_type, ""); 2438 u_end_cond = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, u_end_cond); 2439 2440 LLVMBasicBlockRef u_end_loop = lp_build_insert_new_block(gallivm, "u_end_loop"); 2441 2442 LLVMBuildCondBr(builder, u_end_cond, 2443 u_loop_block, u_end_loop); 2444 2445 LLVMPositionBuilderAtEnd(builder, u_end_loop); 2446 2447 } 2448 2449 /* v += 1 */ 2450 v_val = LLVMBuildLoad(builder, v_limiter, ""); 2451 v_val = lp_build_add(&bld->int_coord_bld, v_val, bld->int_coord_bld.one); 2452 LLVMBuildStore(builder, v_val, v_limiter); 2453 2454 v_mask = LLVMBuildICmp(builder, 2455 LLVMIntSLE, 2456 v_val, 2457 v1, ""); 2458 LLVMValueRef v_end_cond = LLVMBuildSExt(builder, v_mask, bld->int_coord_bld.vec_type, ""); 2459 v_end_cond = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, v_end_cond); 2460 2461 LLVMBasicBlockRef v_end_loop = lp_build_insert_new_block(gallivm, "v_end_loop"); 2462 2463 LLVMBuildCondBr(builder, v_end_cond, 2464 v_loop_block, v_end_loop); 2465 2466 LLVMPositionBuilderAtEnd(builder, v_end_loop); 2467 2468 LLVMValueRef den = LLVMBuildLoad(builder, den_store, ""); 2469 2470 for (chan = 0; chan < 4; chan++) 2471 colors0[chan] = lp_build_div(&bld->texel_bld, LLVMBuildLoad(builder, colors0[chan], ""), den); 2472 LLVMValueRef den0 = lp_build_cmp(&bld->coord_bld, PIPE_FUNC_EQUAL, den, bld->coord_bld.zero); 2473 2474 LLVMValueRef den0_any = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, den0); 2475 2476 struct lp_build_if_state den0_fallback; 2477 lp_build_if(&den0_fallback, gallivm, den0_any); 2478 2479 LLVMValueRef colors_den0[4]; 2480 lp_build_sample_image_linear(bld, false, size0, NULL, 2481 row_stride0_vec, img_stride0_vec, 2482 data_ptr0, mipoff0, coords, offsets, 2483 colors_den0); 2484 for (chan = 0; chan < 4; chan++) { 2485 LLVMValueRef chan_val = lp_build_select(&bld->texel_bld, den0, colors_den0[chan], colors0[chan]); 2486 LLVMBuildStore(builder, chan_val, colors_out[chan]); 2487 } 2488 lp_build_else(&den0_fallback); 2489 for (chan = 0; chan < 4; chan++) 2490 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 2491 lp_build_endif(&den0_fallback); 2492} 2493 2494/** 2495 * Calculate cube face, lod, mip levels. 2496 */ 2497static void 2498lp_build_sample_common(struct lp_build_sample_context *bld, 2499 boolean is_lodq, 2500 unsigned texture_index, 2501 unsigned sampler_index, 2502 LLVMValueRef *coords, 2503 const struct lp_derivatives *derivs, /* optional */ 2504 LLVMValueRef lod_bias, /* optional */ 2505 LLVMValueRef explicit_lod, /* optional */ 2506 LLVMValueRef *lod_pos_or_zero, 2507 LLVMValueRef *lod, 2508 LLVMValueRef *lod_fpart, 2509 LLVMValueRef *ilevel0, 2510 LLVMValueRef *ilevel1) 2511{ 2512 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter; 2513 const unsigned min_filter = bld->static_sampler_state->min_img_filter; 2514 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter; 2515 const unsigned target = bld->static_texture_state->target; 2516 const bool aniso = bld->static_sampler_state->aniso; 2517 LLVMValueRef first_level, cube_rho = NULL; 2518 LLVMValueRef lod_ipart = NULL; 2519 struct lp_derivatives cube_derivs; 2520 2521 /* 2522 printf("%s mip %d min %d mag %d\n", __FUNCTION__, 2523 mip_filter, min_filter, mag_filter); 2524 */ 2525 2526 /* 2527 * Choose cube face, recompute texcoords for the chosen face and 2528 * compute rho here too (as it requires transform of derivatives). 2529 */ 2530 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) { 2531 boolean need_derivs; 2532 need_derivs = ((min_filter != mag_filter || 2533 mip_filter != PIPE_TEX_MIPFILTER_NONE) && 2534 !bld->static_sampler_state->min_max_lod_equal && 2535 !explicit_lod); 2536 lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs); 2537 derivs = &cube_derivs; 2538 if (target == PIPE_TEXTURE_CUBE_ARRAY && !is_lodq) { 2539 /* calculate cube layer coord now */ 2540 LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]); 2541 LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6); 2542 layer = lp_build_mul(&bld->int_coord_bld, layer, six); 2543 coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL); 2544 /* because of seamless filtering can't add it to face (coords[2]) here. */ 2545 } 2546 } 2547 else if ((target == PIPE_TEXTURE_1D_ARRAY || 2548 target == PIPE_TEXTURE_2D_ARRAY) && !is_lodq) { 2549 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]); 2550 coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL); 2551 } 2552 2553 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) { 2554 /* 2555 * Clamp p coords to [0,1] for fixed function depth texture format here. 2556 * Technically this is not entirely correct for unorm depth as the ref value 2557 * should be converted to the depth format (quantization!) and comparison 2558 * then done in texture format. This would actually help performance (since 2559 * only need to do it once and could save the per-sample conversion of texels 2560 * to floats instead), but it would need more messy code (would need to push 2561 * at least some bits down to actual fetch so conversion could be skipped, 2562 * and would have ugly interaction with border color, would need to convert 2563 * border color to that format too or do some other tricks to make it work). 2564 */ 2565 const struct util_format_description *format_desc = bld->format_desc; 2566 unsigned chan_type; 2567 /* not entirely sure we couldn't end up with non-valid swizzle here */ 2568 chan_type = format_desc->swizzle[0] <= PIPE_SWIZZLE_W ? 2569 format_desc->channel[format_desc->swizzle[0]].type : 2570 UTIL_FORMAT_TYPE_FLOAT; 2571 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) { 2572 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4], 2573 bld->coord_bld.zero, bld->coord_bld.one); 2574 } 2575 } 2576 2577 /* 2578 * Compute the level of detail (float). 2579 */ 2580 if (min_filter != mag_filter || 2581 mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) { 2582 LLVMValueRef max_aniso = NULL; 2583 2584 if (aniso) 2585 max_aniso = bld->dynamic_state->max_aniso(bld->dynamic_state, 2586 bld->gallivm, 2587 bld->context_ptr, 2588 sampler_index); 2589 2590 /* Need to compute lod either to choose mipmap levels or to 2591 * distinguish between minification/magnification with one mipmap level. 2592 */ 2593 lp_build_lod_selector(bld, is_lodq, texture_index, sampler_index, 2594 coords[0], coords[1], coords[2], cube_rho, 2595 derivs, lod_bias, explicit_lod, 2596 mip_filter, max_aniso, lod, 2597 &lod_ipart, lod_fpart, lod_pos_or_zero); 2598 if (is_lodq) { 2599 LLVMValueRef last_level; 2600 last_level = bld->dynamic_state->last_level(bld->dynamic_state, 2601 bld->gallivm, 2602 bld->context_ptr, 2603 texture_index, NULL); 2604 first_level = bld->dynamic_state->first_level(bld->dynamic_state, 2605 bld->gallivm, 2606 bld->context_ptr, 2607 texture_index, NULL); 2608 last_level = lp_build_sub(&bld->int_bld, last_level, first_level); 2609 last_level = lp_build_int_to_float(&bld->float_bld, last_level); 2610 last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level); 2611 2612 switch (mip_filter) { 2613 case PIPE_TEX_MIPFILTER_NONE: 2614 *lod_fpart = bld->lodf_bld.zero; 2615 break; 2616 case PIPE_TEX_MIPFILTER_NEAREST: 2617 *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart); 2618 FALLTHROUGH; 2619 case PIPE_TEX_MIPFILTER_LINEAR: 2620 *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart, 2621 bld->lodf_bld.zero, last_level); 2622 break; 2623 } 2624 return; 2625 } 2626 2627 } else { 2628 lod_ipart = bld->lodi_bld.zero; 2629 *lod_pos_or_zero = bld->lodi_bld.zero; 2630 } 2631 2632 if ((bld->num_lods != bld->num_mips || bld->num_lods == 1) && 2633 bld->lodi_bld.type.length != 1) { 2634 /* only makes sense if there's just a single mip level */ 2635 assert(bld->num_mips == 1); 2636 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1); 2637 } 2638 2639 /* 2640 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 2641 */ 2642 2643 if (aniso) { 2644 lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL); 2645 return; 2646 } 2647 2648 switch (mip_filter) { 2649 default: 2650 debug_assert(0 && "bad mip_filter value in lp_build_sample_soa()"); 2651#if defined(NDEBUG) || defined(DEBUG) 2652 FALLTHROUGH; 2653#endif 2654 case PIPE_TEX_MIPFILTER_NONE: 2655 /* always use mip level 0 */ 2656 first_level = bld->dynamic_state->first_level(bld->dynamic_state, 2657 bld->gallivm, bld->context_ptr, 2658 texture_index, NULL); 2659 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level); 2660 *ilevel0 = first_level; 2661 break; 2662 case PIPE_TEX_MIPFILTER_NEAREST: 2663 assert(lod_ipart); 2664 lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL); 2665 break; 2666 case PIPE_TEX_MIPFILTER_LINEAR: 2667 assert(lod_ipart); 2668 assert(*lod_fpart); 2669 lp_build_linear_mip_levels(bld, texture_index, 2670 lod_ipart, lod_fpart, 2671 ilevel0, ilevel1); 2672 break; 2673 } 2674} 2675 2676static void 2677lp_build_clamp_border_color(struct lp_build_sample_context *bld, 2678 unsigned sampler_unit) 2679{ 2680 struct gallivm_state *gallivm = bld->gallivm; 2681 LLVMBuilderRef builder = gallivm->builder; 2682 LLVMValueRef border_color_ptr = 2683 bld->dynamic_state->border_color(bld->dynamic_state, gallivm, 2684 bld->context_ptr, sampler_unit); 2685 LLVMValueRef border_color; 2686 const struct util_format_description *format_desc = bld->format_desc; 2687 struct lp_type vec4_type = bld->texel_type; 2688 struct lp_build_context vec4_bld; 2689 LLVMValueRef min_clamp = NULL; 2690 LLVMValueRef max_clamp = NULL; 2691 2692 /* 2693 * For normalized format need to clamp border color (technically 2694 * probably should also quantize the data). Really sucks doing this 2695 * here but can't avoid at least for now since this is part of 2696 * sampler state and texture format is part of sampler_view state. 2697 * GL expects also expects clamping for uint/sint formats too so 2698 * do that as well (d3d10 can't end up here with uint/sint since it 2699 * only supports them with ld). 2700 */ 2701 vec4_type.length = 4; 2702 lp_build_context_init(&vec4_bld, gallivm, vec4_type); 2703 2704 /* 2705 * Vectorized clamping of border color. Loading is a bit of a hack since 2706 * we just cast the pointer to float array to pointer to vec4 2707 * (int or float). 2708 */ 2709 border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr, 2710 lp_build_const_int32(gallivm, 0)); 2711 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr, 2712 LLVMPointerType(vec4_bld.vec_type, 0), ""); 2713 border_color = LLVMBuildLoad(builder, border_color_ptr, ""); 2714 /* we don't have aligned type in the dynamic state unfortunately */ 2715 LLVMSetAlignment(border_color, 4); 2716 2717 /* 2718 * Instead of having some incredibly complex logic which will try to figure out 2719 * clamping necessary for each channel, simply use the first channel, and treat 2720 * mixed signed/unsigned normalized formats specially. 2721 * (Mixed non-normalized, which wouldn't work at all here, do not exist for a 2722 * good reason.) 2723 */ 2724 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { 2725 int chan; 2726 /* d/s needs special handling because both present means just sampling depth */ 2727 if (util_format_is_depth_and_stencil(format_desc->format)) { 2728 chan = format_desc->swizzle[0]; 2729 } 2730 else { 2731 chan = util_format_get_first_non_void_channel(format_desc->format); 2732 } 2733 if (chan >= 0 && chan <= PIPE_SWIZZLE_W) { 2734 unsigned chan_type = format_desc->channel[chan].type; 2735 unsigned chan_norm = format_desc->channel[chan].normalized; 2736 unsigned chan_pure = format_desc->channel[chan].pure_integer; 2737 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) { 2738 if (chan_norm) { 2739 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); 2740 max_clamp = vec4_bld.one; 2741 } 2742 else if (chan_pure) { 2743 /* 2744 * Border color was stored as int, hence need min/max clamp 2745 * only if chan has less than 32 bits.. 2746 */ 2747 unsigned chan_size = format_desc->channel[chan].size; 2748 if (chan_size < 32) { 2749 min_clamp = lp_build_const_int_vec(gallivm, vec4_type, 2750 0 - (1 << (chan_size - 1))); 2751 max_clamp = lp_build_const_int_vec(gallivm, vec4_type, 2752 (1 << (chan_size - 1)) - 1); 2753 } 2754 } 2755 /* TODO: no idea about non-pure, non-normalized! */ 2756 } 2757 else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) { 2758 if (chan_norm) { 2759 min_clamp = vec4_bld.zero; 2760 max_clamp = vec4_bld.one; 2761 } 2762 /* 2763 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24 2764 * we use Z32_FLOAT_S8X24 to imply sampling depth component 2765 * and ignoring stencil, which will blow up here if we try to 2766 * do a uint clamp in a float texel build... 2767 * And even if we had that format, mesa st also thinks using z24s8 2768 * means depth sampling ignoring stencil. 2769 */ 2770 else if (chan_pure) { 2771 /* 2772 * Border color was stored as uint, hence never need min 2773 * clamp, and only need max clamp if chan has less than 32 bits. 2774 */ 2775 unsigned chan_size = format_desc->channel[chan].size; 2776 if (chan_size < 32) { 2777 max_clamp = lp_build_const_int_vec(gallivm, vec4_type, 2778 (1 << chan_size) - 1); 2779 } 2780 /* TODO: no idea about non-pure, non-normalized! */ 2781 } 2782 } 2783 else if (chan_type == UTIL_FORMAT_TYPE_FIXED) { 2784 /* TODO: I have no idea what clamp this would need if any! */ 2785 } 2786 } 2787 /* mixed plain formats (or different pure size) */ 2788 switch (format_desc->format) { 2789 case PIPE_FORMAT_B10G10R10A2_UINT: 2790 case PIPE_FORMAT_R10G10B10A2_UINT: 2791 { 2792 unsigned max10 = (1 << 10) - 1; 2793 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10, 2794 max10, (1 << 2) - 1, NULL); 2795 } 2796 break; 2797 case PIPE_FORMAT_R10SG10SB10SA2U_NORM: 2798 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F, 2799 -1.0F, 0.0F, NULL); 2800 max_clamp = vec4_bld.one; 2801 break; 2802 case PIPE_FORMAT_R8SG8SB8UX8U_NORM: 2803 case PIPE_FORMAT_R5SG5SB6U_NORM: 2804 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F, 2805 0.0F, 0.0F, NULL); 2806 max_clamp = vec4_bld.one; 2807 break; 2808 default: 2809 break; 2810 } 2811 } 2812 else { 2813 /* cannot figure this out from format description */ 2814 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 2815 /* s3tc formats are always unorm */ 2816 min_clamp = vec4_bld.zero; 2817 max_clamp = vec4_bld.one; 2818 } 2819 else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC || 2820 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC || 2821 format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 2822 switch (format_desc->format) { 2823 case PIPE_FORMAT_RGTC1_UNORM: 2824 case PIPE_FORMAT_RGTC2_UNORM: 2825 case PIPE_FORMAT_LATC1_UNORM: 2826 case PIPE_FORMAT_LATC2_UNORM: 2827 case PIPE_FORMAT_ETC1_RGB8: 2828 case PIPE_FORMAT_BPTC_RGBA_UNORM: 2829 case PIPE_FORMAT_BPTC_SRGBA: 2830 min_clamp = vec4_bld.zero; 2831 max_clamp = vec4_bld.one; 2832 break; 2833 case PIPE_FORMAT_RGTC1_SNORM: 2834 case PIPE_FORMAT_RGTC2_SNORM: 2835 case PIPE_FORMAT_LATC1_SNORM: 2836 case PIPE_FORMAT_LATC2_SNORM: 2837 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); 2838 max_clamp = vec4_bld.one; 2839 break; 2840 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2841 /* not sure if we should clamp to max half float? */ 2842 break; 2843 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 2844 min_clamp = vec4_bld.zero; 2845 break; 2846 default: 2847 assert(0); 2848 break; 2849 } 2850 } 2851 /* 2852 * all others from subsampled/other group, though we don't care 2853 * about yuv (and should not have any from zs here) 2854 */ 2855 else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){ 2856 switch (format_desc->format) { 2857 case PIPE_FORMAT_R8G8_B8G8_UNORM: 2858 case PIPE_FORMAT_G8R8_G8B8_UNORM: 2859 case PIPE_FORMAT_G8R8_B8R8_UNORM: 2860 case PIPE_FORMAT_R8G8_R8B8_UNORM: 2861 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */ 2862 min_clamp = vec4_bld.zero; 2863 max_clamp = vec4_bld.one; 2864 break; 2865 case PIPE_FORMAT_R8G8Bx_SNORM: 2866 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); 2867 max_clamp = vec4_bld.one; 2868 break; 2869 /* 2870 * Note smallfloat formats usually don't need clamping 2871 * (they still have infinite range) however this is not 2872 * true for r11g11b10 and r9g9b9e5, which can't represent 2873 * negative numbers (and additionally r9g9b9e5 can't represent 2874 * very large numbers). d3d10 seems happy without clamping in 2875 * this case, but gl spec is pretty clear: "for floating 2876 * point and integer formats, border values are clamped to 2877 * the representable range of the format" so do that here. 2878 */ 2879 case PIPE_FORMAT_R11G11B10_FLOAT: 2880 min_clamp = vec4_bld.zero; 2881 break; 2882 case PIPE_FORMAT_R9G9B9E5_FLOAT: 2883 min_clamp = vec4_bld.zero; 2884 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5); 2885 break; 2886 default: 2887 assert(0); 2888 break; 2889 } 2890 } 2891 } 2892 2893 if (min_clamp) { 2894 border_color = lp_build_max(&vec4_bld, border_color, min_clamp); 2895 } 2896 if (max_clamp) { 2897 border_color = lp_build_min(&vec4_bld, border_color, max_clamp); 2898 } 2899 2900 bld->border_color_clamped = border_color; 2901} 2902 2903 2904/** 2905 * General texture sampling codegen. 2906 * This function handles texture sampling for all texture targets (1D, 2907 * 2D, 3D, cube) and all filtering modes. 2908 */ 2909static void 2910lp_build_sample_general(struct lp_build_sample_context *bld, 2911 unsigned sampler_unit, 2912 boolean is_gather, 2913 const LLVMValueRef *coords, 2914 const LLVMValueRef *offsets, 2915 LLVMValueRef lod_positive, 2916 LLVMValueRef lod_fpart, 2917 LLVMValueRef ilevel0, 2918 LLVMValueRef ilevel1, 2919 LLVMValueRef *colors_out) 2920{ 2921 LLVMBuilderRef builder = bld->gallivm->builder; 2922 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state; 2923 const unsigned mip_filter = sampler_state->min_mip_filter; 2924 const unsigned min_filter = sampler_state->min_img_filter; 2925 const unsigned mag_filter = sampler_state->mag_img_filter; 2926 LLVMValueRef texels[4]; 2927 unsigned chan; 2928 2929 /* if we need border color, (potentially) clamp it now */ 2930 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s, 2931 min_filter, 2932 mag_filter) || 2933 (bld->dims > 1 && 2934 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t, 2935 min_filter, 2936 mag_filter)) || 2937 (bld->dims > 2 && 2938 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r, 2939 min_filter, 2940 mag_filter))) { 2941 lp_build_clamp_border_color(bld, sampler_unit); 2942 } 2943 2944 2945 /* 2946 * Get/interpolate texture colors. 2947 */ 2948 2949 for (chan = 0; chan < 4; ++chan) { 2950 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, ""); 2951 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]); 2952 } 2953 2954 if (sampler_state->aniso) { 2955 lp_build_sample_aniso(bld, PIPE_TEX_FILTER_NEAREST, mip_filter, 2956 false, coords, offsets, ilevel0, 2957 ilevel1, lod_fpart, texels); 2958 } else if (min_filter == mag_filter) { 2959 /* no need to distinguish between minification and magnification */ 2960 lp_build_sample_mipmap(bld, min_filter, mip_filter, 2961 is_gather, 2962 coords, offsets, 2963 ilevel0, ilevel1, lod_fpart, 2964 texels); 2965 } 2966 else { 2967 /* 2968 * Could also get rid of the if-logic and always use mipmap_both, both 2969 * for the single lod and multi-lod case if nothing really uses this. 2970 */ 2971 if (bld->num_lods == 1) { 2972 /* Emit conditional to choose min image filter or mag image filter 2973 * depending on the lod being > 0 or <= 0, respectively. 2974 */ 2975 struct lp_build_if_state if_ctx; 2976 2977 lod_positive = LLVMBuildTrunc(builder, lod_positive, 2978 LLVMInt1TypeInContext(bld->gallivm->context), 2979 "lod_pos"); 2980 2981 lp_build_if(&if_ctx, bld->gallivm, lod_positive); 2982 { 2983 /* Use the minification filter */ 2984 lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE, 2985 coords, offsets, 2986 ilevel0, ilevel1, lod_fpart, 2987 texels); 2988 } 2989 lp_build_else(&if_ctx); 2990 { 2991 /* Use the magnification filter */ 2992 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE, 2993 FALSE, 2994 coords, offsets, 2995 ilevel0, NULL, NULL, 2996 texels); 2997 } 2998 lp_build_endif(&if_ctx); 2999 } 3000 else { 3001 LLVMValueRef need_linear, linear_mask; 3002 unsigned mip_filter_for_nearest; 3003 struct lp_build_if_state if_ctx; 3004 3005 if (min_filter == PIPE_TEX_FILTER_LINEAR) { 3006 linear_mask = lod_positive; 3007 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE; 3008 } 3009 else { 3010 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive); 3011 mip_filter_for_nearest = mip_filter; 3012 } 3013 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, 3014 linear_mask); 3015 lp_build_name(need_linear, "need_linear"); 3016 3017 if (bld->num_lods != bld->coord_type.length) { 3018 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 3019 bld->lodi_type, 3020 bld->int_coord_type, 3021 linear_mask); 3022 } 3023 3024 lp_build_if(&if_ctx, bld->gallivm, need_linear); 3025 { 3026 /* 3027 * Do sampling with both filters simultaneously. This means using 3028 * a linear filter and doing some tricks (with weights) for the pixels 3029 * which need nearest filter. 3030 * Note that it's probably rare some pixels need nearest and some 3031 * linear filter but the fixups required for the nearest pixels 3032 * aren't all that complicated so just always run a combined path 3033 * if at least some pixels require linear. 3034 */ 3035 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter, 3036 coords, offsets, 3037 ilevel0, ilevel1, 3038 lod_fpart, lod_positive, 3039 texels); 3040 } 3041 lp_build_else(&if_ctx); 3042 { 3043 /* 3044 * All pixels require just nearest filtering, which is way 3045 * cheaper than linear, hence do a separate path for that. 3046 */ 3047 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST, 3048 mip_filter_for_nearest, FALSE, 3049 coords, offsets, 3050 ilevel0, ilevel1, lod_fpart, 3051 texels); 3052 } 3053 lp_build_endif(&if_ctx); 3054 } 3055 } 3056 3057 for (chan = 0; chan < 4; ++chan) { 3058 colors_out[chan] = LLVMBuildLoad(builder, texels[chan], ""); 3059 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]); 3060 } 3061} 3062 3063 3064/** 3065 * Texel fetch function. 3066 * In contrast to general sampling there is no filtering, no coord minification, 3067 * lod (if any) is always explicit uint, coords are uints (in terms of texel units) 3068 * directly to be applied to the selected mip level (after adding texel offsets). 3069 * This function handles texel fetch for all targets where texel fetch is supported 3070 * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too). 3071 */ 3072static void 3073lp_build_fetch_texel(struct lp_build_sample_context *bld, 3074 unsigned texture_unit, 3075 LLVMValueRef ms_index, 3076 const LLVMValueRef *coords, 3077 LLVMValueRef explicit_lod, 3078 const LLVMValueRef *offsets, 3079 LLVMValueRef *colors_out) 3080{ 3081 struct lp_build_context *perquadi_bld = &bld->lodi_bld; 3082 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 3083 unsigned dims = bld->dims, chan; 3084 unsigned target = bld->static_texture_state->target; 3085 boolean out_of_bound_ret_zero = TRUE; 3086 LLVMValueRef size, ilevel; 3087 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL; 3088 LLVMValueRef x = coords[0], y = coords[1], z = coords[2]; 3089 LLVMValueRef width, height, depth, i, j; 3090 LLVMValueRef offset, out_of_bounds, out1; 3091 3092 out_of_bounds = int_coord_bld->zero; 3093 3094 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) { 3095 if (bld->num_mips != int_coord_bld->type.length) { 3096 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type, 3097 perquadi_bld->type, explicit_lod, 0); 3098 } 3099 else { 3100 ilevel = explicit_lod; 3101 } 3102 lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel, 3103 out_of_bound_ret_zero ? &out_of_bounds : NULL); 3104 } 3105 else { 3106 assert(bld->num_mips == 1); 3107 if (bld->static_texture_state->target != PIPE_BUFFER) { 3108 ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, 3109 bld->context_ptr, texture_unit, NULL); 3110 } 3111 else { 3112 ilevel = lp_build_const_int32(bld->gallivm, 0); 3113 } 3114 } 3115 lp_build_mipmap_level_sizes(bld, ilevel, 3116 &size, 3117 &row_stride_vec, &img_stride_vec); 3118 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type, 3119 size, &width, &height, &depth); 3120 3121 if (target == PIPE_TEXTURE_1D_ARRAY || 3122 target == PIPE_TEXTURE_2D_ARRAY) { 3123 if (out_of_bound_ret_zero) { 3124 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1); 3125 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 3126 } 3127 else { 3128 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL); 3129 } 3130 } 3131 3132 /* This is a lot like border sampling */ 3133 if (offsets[0]) { 3134 /* 3135 * coords are really unsigned, offsets are signed, but I don't think 3136 * exceeding 31 bits is possible 3137 */ 3138 x = lp_build_add(int_coord_bld, x, offsets[0]); 3139 } 3140 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); 3141 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 3142 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); 3143 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 3144 3145 if (dims >= 2) { 3146 if (offsets[1]) { 3147 y = lp_build_add(int_coord_bld, y, offsets[1]); 3148 } 3149 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); 3150 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 3151 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); 3152 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 3153 3154 if (dims >= 3) { 3155 if (offsets[2]) { 3156 z = lp_build_add(int_coord_bld, z, offsets[2]); 3157 } 3158 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); 3159 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 3160 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); 3161 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 3162 } 3163 } 3164 3165 lp_build_sample_offset(int_coord_bld, 3166 bld->format_desc, 3167 x, y, z, row_stride_vec, img_stride_vec, 3168 &offset, &i, &j); 3169 3170 if (bld->static_texture_state->target != PIPE_BUFFER) { 3171 offset = lp_build_add(int_coord_bld, offset, 3172 lp_build_get_mip_offsets(bld, ilevel)); 3173 } 3174 3175 if (bld->fetch_ms) { 3176 LLVMValueRef num_samples; 3177 num_samples = bld->dynamic_state->num_samples(bld->dynamic_state, bld->gallivm, 3178 bld->context_ptr, texture_unit, NULL); 3179 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, ms_index, int_coord_bld->zero); 3180 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 3181 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, lp_build_broadcast_scalar(int_coord_bld, num_samples)); 3182 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 3183 offset = lp_build_add(int_coord_bld, offset, 3184 lp_build_mul(int_coord_bld, bld->sample_stride, ms_index)); 3185 } 3186 3187 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds); 3188 3189 lp_build_fetch_rgba_soa(bld->gallivm, 3190 bld->format_desc, 3191 bld->texel_type, TRUE, 3192 bld->base_ptr, offset, 3193 i, j, 3194 bld->cache, 3195 colors_out); 3196 3197 if (out_of_bound_ret_zero) { 3198 /* 3199 * Only needed for ARB_robust_buffer_access_behavior and d3d10. 3200 * Could use min/max above instead of out-of-bounds comparisons 3201 * if we don't care about the result returned for out-of-bounds. 3202 */ 3203 for (chan = 0; chan < 4; chan++) { 3204 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds, 3205 bld->texel_bld.zero, colors_out[chan]); 3206 } 3207 } 3208} 3209 3210 3211/** 3212 * Just set texels to white instead of actually sampling the texture. 3213 * For debugging. 3214 */ 3215void 3216lp_build_sample_nop(struct gallivm_state *gallivm, 3217 struct lp_type type, 3218 const LLVMValueRef *coords, 3219 LLVMValueRef texel_out[4]) 3220{ 3221 LLVMValueRef one = lp_build_one(gallivm, type); 3222 unsigned chan; 3223 3224 for (chan = 0; chan < 4; chan++) { 3225 texel_out[chan] = one; 3226 } 3227} 3228 3229static struct lp_type 3230lp_build_texel_type(struct lp_type texel_type, 3231 const struct util_format_description *format_desc) 3232{ 3233 /* always using the first channel hopefully should be safe, 3234 * if not things WILL break in other places anyway. 3235 */ 3236 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && 3237 format_desc->channel[0].pure_integer) { 3238 if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { 3239 texel_type = lp_type_int_vec(texel_type.width, texel_type.width * texel_type.length); 3240 } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { 3241 texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length); 3242 } 3243 } 3244 else if (util_format_has_stencil(format_desc) && 3245 !util_format_has_depth(format_desc)) { 3246 /* for stencil only formats, sample stencil (uint) */ 3247 texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length); 3248 } 3249 return texel_type; 3250} 3251 3252 3253/** 3254 * Build the actual texture sampling code. 3255 * 'texel' will return a vector of four LLVMValueRefs corresponding to 3256 * R, G, B, A. 3257 * \param type vector float type to use for coords, etc. 3258 * \param sample_key 3259 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y 3260 */ 3261static void 3262lp_build_sample_soa_code(struct gallivm_state *gallivm, 3263 const struct lp_static_texture_state *static_texture_state, 3264 const struct lp_static_sampler_state *static_sampler_state, 3265 struct lp_sampler_dynamic_state *dynamic_state, 3266 struct lp_type type, 3267 unsigned sample_key, 3268 unsigned texture_index, 3269 unsigned sampler_index, 3270 LLVMValueRef context_ptr, 3271 LLVMValueRef thread_data_ptr, 3272 const LLVMValueRef *coords, 3273 const LLVMValueRef *offsets, 3274 const struct lp_derivatives *derivs, /* optional */ 3275 LLVMValueRef lod, /* optional */ 3276 LLVMValueRef ms_index, /* optional */ 3277 LLVMValueRef aniso_filter_table, 3278 LLVMValueRef texel_out[4]) 3279{ 3280 unsigned target = static_texture_state->target; 3281 unsigned dims = texture_dims(target); 3282 unsigned num_quads = type.length / 4; 3283 unsigned mip_filter, min_img_filter, mag_img_filter, i; 3284 struct lp_build_sample_context bld; 3285 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state; 3286 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 3287 LLVMBuilderRef builder = gallivm->builder; 3288 LLVMValueRef tex_width, newcoords[5]; 3289 enum lp_sampler_lod_property lod_property; 3290 enum lp_sampler_lod_control lod_control; 3291 enum lp_sampler_op_type op_type; 3292 LLVMValueRef lod_bias = NULL; 3293 LLVMValueRef explicit_lod = NULL; 3294 boolean op_is_tex, op_is_lodq, op_is_gather, fetch_ms; 3295 3296 if (0) { 3297 enum pipe_format fmt = static_texture_state->format; 3298 debug_printf("Sample from %s\n", util_format_name(fmt)); 3299 } 3300 3301 lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >> 3302 LP_SAMPLER_LOD_PROPERTY_SHIFT; 3303 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> 3304 LP_SAMPLER_LOD_CONTROL_SHIFT; 3305 op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> 3306 LP_SAMPLER_OP_TYPE_SHIFT; 3307 fetch_ms = !!(sample_key & LP_SAMPLER_FETCH_MS); 3308 3309 op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE; 3310 op_is_lodq = op_type == LP_SAMPLER_OP_LODQ; 3311 op_is_gather = op_type == LP_SAMPLER_OP_GATHER; 3312 3313 if (lod_control == LP_SAMPLER_LOD_BIAS) { 3314 lod_bias = lod; 3315 assert(lod); 3316 assert(derivs == NULL); 3317 } 3318 else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) { 3319 explicit_lod = lod; 3320 assert(lod); 3321 assert(derivs == NULL); 3322 } 3323 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 3324 assert(derivs); 3325 assert(lod == NULL); 3326 } 3327 else { 3328 assert(derivs == NULL); 3329 assert(lod == NULL); 3330 } 3331 3332 if (static_texture_state->format == PIPE_FORMAT_NONE) { 3333 /* 3334 * If there's nothing bound, format is NONE, and we must return 3335 * all zero as mandated by d3d10 in this case. 3336 */ 3337 unsigned chan; 3338 LLVMValueRef zero = lp_build_zero(gallivm, type); 3339 for (chan = 0; chan < 4; chan++) { 3340 texel_out[chan] = zero; 3341 } 3342 return; 3343 } 3344 3345 assert(type.floating); 3346 3347 /* Setup our build context */ 3348 memset(&bld, 0, sizeof bld); 3349 bld.gallivm = gallivm; 3350 bld.context_ptr = context_ptr; 3351 bld.aniso_filter_table = aniso_filter_table; 3352 bld.static_sampler_state = &derived_sampler_state; 3353 bld.static_texture_state = static_texture_state; 3354 bld.dynamic_state = dynamic_state; 3355 bld.format_desc = util_format_description(static_texture_state->format); 3356 bld.dims = dims; 3357 3358 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD || op_is_lodq) { 3359 bld.no_quad_lod = TRUE; 3360 } 3361 if (!(gallivm_perf & GALLIVM_PERF_RHO_APPROX) || op_is_lodq) { 3362 bld.no_rho_approx = TRUE; 3363 } 3364 if (!(gallivm_perf & GALLIVM_PERF_BRILINEAR) || op_is_lodq || lod_bias || explicit_lod) { 3365 bld.no_brilinear = TRUE; 3366 } 3367 3368 bld.vector_width = lp_type_width(type); 3369 3370 bld.float_type = lp_type_float(32); 3371 bld.int_type = lp_type_int(32); 3372 bld.coord_type = type; 3373 bld.int_coord_type = lp_int_type(type); 3374 bld.float_size_in_type = lp_type_float(32); 3375 bld.float_size_in_type.length = dims > 1 ? 4 : 1; 3376 bld.int_size_in_type = lp_int_type(bld.float_size_in_type); 3377 3378 bld.texel_type = lp_build_texel_type(type, bld.format_desc); 3379 3380 if (!static_texture_state->level_zero_only || 3381 !static_sampler_state->max_lod_pos || op_is_lodq) { 3382 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter; 3383 } else { 3384 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 3385 } 3386 if (op_is_gather) { 3387 /* 3388 * gather4 is exactly like GL_LINEAR filtering but in the end skipping 3389 * the actual filtering. Using mostly the same paths, so cube face 3390 * selection, coord wrapping etc. all naturally uses the same code. 3391 */ 3392 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 3393 derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR; 3394 derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR; 3395 } 3396 mip_filter = derived_sampler_state.min_mip_filter; 3397 3398 if (0) { 3399 debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter); 3400 } 3401 3402 if (static_texture_state->target == PIPE_TEXTURE_CUBE || 3403 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) 3404 { 3405 /* 3406 * Seamless filtering ignores wrap modes. 3407 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for 3408 * bilinear it's not correct but way better than using for instance repeat. 3409 * Note we even set this for non-seamless. Technically GL allows any wrap 3410 * mode, which made sense when supporting true borders (can get seamless 3411 * effect with border and CLAMP_TO_BORDER), but gallium doesn't support 3412 * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix 3413 * up the sampler state (as it makes it texture dependent). 3414 */ 3415 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 3416 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 3417 } 3418 /* 3419 * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest, 3420 * so AoS path could be used. Not sure it's worth the trouble... 3421 */ 3422 3423 min_img_filter = derived_sampler_state.min_img_filter; 3424 mag_img_filter = derived_sampler_state.mag_img_filter; 3425 3426 3427 /* 3428 * This is all a bit complicated different paths are chosen for performance 3429 * reasons. 3430 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for 3431 * everything (the last two options are equivalent for 4-wide case). 3432 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad 3433 * lod is calculated then the lod value extracted afterwards so making this 3434 * case basically the same as far as lod handling is concerned for the 3435 * further sample/filter code as the 1 lod for everything case. 3436 * Different lod handling mostly shows up when building mipmap sizes 3437 * (lp_build_mipmap_level_sizes() and friends) and also in filtering 3438 * (getting the fractional part of the lod to the right texels). 3439 */ 3440 3441 /* 3442 * There are other situations where at least the multiple int lods could be 3443 * avoided like min and max lod being equal. 3444 */ 3445 bld.num_mips = bld.num_lods = 1; 3446 3447 if (bld.no_quad_lod && bld.no_rho_approx && 3448 ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex && 3449 (static_texture_state->target == PIPE_TEXTURE_CUBE || 3450 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) || 3451 op_is_lodq)) { 3452 /* 3453 * special case for using per-pixel lod even for implicit lod, 3454 * which is generally never required (ok by APIs) except to please 3455 * some (somewhat broken imho) tests (because per-pixel face selection 3456 * can cause derivatives to be different for pixels outside the primitive 3457 * due to the major axis division even if pre-project derivatives are 3458 * looking normal). 3459 * For lodq, we do it to simply avoid scalar pack / unpack (albeit for 3460 * cube maps we do indeed get per-pixel lod values). 3461 */ 3462 bld.num_mips = type.length; 3463 bld.num_lods = type.length; 3464 } 3465 else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT || 3466 (explicit_lod || lod_bias || derivs)) { 3467 if ((!op_is_tex && target != PIPE_BUFFER) || 3468 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 3469 bld.num_mips = type.length; 3470 bld.num_lods = type.length; 3471 } 3472 else if (op_is_tex && min_img_filter != mag_img_filter) { 3473 bld.num_mips = 1; 3474 bld.num_lods = type.length; 3475 } 3476 } 3477 /* TODO: for true scalar_lod should only use 1 lod value */ 3478 else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) || 3479 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 3480 bld.num_mips = num_quads; 3481 bld.num_lods = num_quads; 3482 } 3483 else if (op_is_tex && min_img_filter != mag_img_filter) { 3484 bld.num_mips = 1; 3485 bld.num_lods = num_quads; 3486 } 3487 3488 bld.fetch_ms = fetch_ms; 3489 if (op_is_gather) 3490 bld.gather_comp = (sample_key & LP_SAMPLER_GATHER_COMP_MASK) >> LP_SAMPLER_GATHER_COMP_SHIFT; 3491 bld.lodf_type = type; 3492 /* we want native vector size to be able to use our intrinsics */ 3493 if (bld.num_lods != type.length) { 3494 /* TODO: this currently always has to be per-quad or per-element */ 3495 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1; 3496 } 3497 bld.lodi_type = lp_int_type(bld.lodf_type); 3498 bld.levelf_type = bld.lodf_type; 3499 if (bld.num_mips == 1) { 3500 bld.levelf_type.length = 1; 3501 } 3502 bld.leveli_type = lp_int_type(bld.levelf_type); 3503 bld.float_size_type = bld.float_size_in_type; 3504 /* Note: size vectors may not be native. They contain minified w/h/d/_ values, 3505 * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */ 3506 if (bld.num_mips > 1) { 3507 bld.float_size_type.length = bld.num_mips == type.length ? 3508 bld.num_mips * bld.float_size_in_type.length : 3509 type.length; 3510 } 3511 bld.int_size_type = lp_int_type(bld.float_size_type); 3512 3513 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type); 3514 lp_build_context_init(&bld.float_vec_bld, gallivm, type); 3515 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type); 3516 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type); 3517 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type); 3518 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type); 3519 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type); 3520 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type); 3521 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type); 3522 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type); 3523 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type); 3524 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type); 3525 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type); 3526 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type); 3527 3528 /* Get the dynamic state */ 3529 tex_width = dynamic_state->width(dynamic_state, gallivm, 3530 context_ptr, texture_index, NULL); 3531 bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, 3532 context_ptr, texture_index, NULL); 3533 bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, 3534 context_ptr, texture_index, NULL); 3535 bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, 3536 context_ptr, texture_index, NULL); 3537 bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, 3538 context_ptr, texture_index, NULL); 3539 3540 if (fetch_ms) 3541 bld.sample_stride = lp_build_broadcast_scalar(&bld.int_coord_bld, dynamic_state->sample_stride(dynamic_state, gallivm, 3542 context_ptr, texture_index, NULL)); 3543 /* Note that mip_offsets is an array[level] of offsets to texture images */ 3544 3545 if (dynamic_state->cache_ptr && thread_data_ptr) { 3546 bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm, 3547 thread_data_ptr, texture_index); 3548 } 3549 3550 /* width, height, depth as single int vector */ 3551 if (dims <= 1) { 3552 bld.int_size = tex_width; 3553 } 3554 else { 3555 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef, 3556 tex_width, 3557 LLVMConstInt(i32t, 0, 0), ""); 3558 if (dims >= 2) { 3559 LLVMValueRef tex_height = 3560 dynamic_state->height(dynamic_state, gallivm, 3561 context_ptr, texture_index, NULL); 3562 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, 3563 tex_height, 3564 LLVMConstInt(i32t, 1, 0), ""); 3565 if (dims >= 3) { 3566 LLVMValueRef tex_depth = 3567 dynamic_state->depth(dynamic_state, gallivm, context_ptr, 3568 texture_index, NULL); 3569 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, 3570 tex_depth, 3571 LLVMConstInt(i32t, 2, 0), ""); 3572 } 3573 } 3574 } 3575 3576 for (i = 0; i < 5; i++) { 3577 newcoords[i] = coords[i]; 3578 } 3579 3580 if (util_format_is_pure_integer(static_texture_state->format) && 3581 !util_format_has_depth(bld.format_desc) && op_is_tex && 3582 (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR || 3583 static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR || 3584 static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) { 3585 /* 3586 * Bail if impossible filtering is specified (the awkard additional 3587 * depth check is because it is legal in gallium to have things like S8Z24 3588 * here which would say it's pure int despite such formats should sample 3589 * the depth component). 3590 * In GL such filters make the texture incomplete, this makes it robust 3591 * against gallium frontends which set this up regardless (we'd crash in the 3592 * lerp later otherwise). 3593 * At least in some apis it may be legal to use such filters with lod 3594 * queries and/or gather (at least for gather d3d10 says only the wrap 3595 * bits are really used hence filter bits are likely simply ignored). 3596 * For fetch, we don't get valid samplers either way here. 3597 */ 3598 unsigned chan; 3599 LLVMValueRef zero = lp_build_zero(gallivm, type); 3600 for (chan = 0; chan < 4; chan++) { 3601 texel_out[chan] = zero; 3602 } 3603 return; 3604 } 3605 3606 if (0) { 3607 /* For debug: no-op texture sampling */ 3608 lp_build_sample_nop(gallivm, 3609 bld.texel_type, 3610 newcoords, 3611 texel_out); 3612 } 3613 3614 else if (op_type == LP_SAMPLER_OP_FETCH) { 3615 lp_build_fetch_texel(&bld, texture_index, ms_index, newcoords, 3616 lod, offsets, 3617 texel_out); 3618 } 3619 3620 else { 3621 LLVMValueRef lod_fpart = NULL, lod_positive = NULL; 3622 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL; 3623 boolean use_aos; 3624 3625 use_aos = util_format_fits_8unorm(bld.format_desc) && 3626 op_is_tex && 3627 /* not sure this is strictly needed or simply impossible */ 3628 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE && 3629 derived_sampler_state.aniso == 0 && 3630 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s); 3631 3632 use_aos &= bld.num_lods <= num_quads || 3633 derived_sampler_state.min_img_filter == 3634 derived_sampler_state.mag_img_filter; 3635 3636 if(gallivm_perf & GALLIVM_PERF_NO_AOS_SAMPLING) { 3637 use_aos = 0; 3638 } 3639 3640 if (dims > 1) { 3641 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t); 3642 if (dims > 2) { 3643 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r); 3644 } 3645 } 3646 if ((static_texture_state->target == PIPE_TEXTURE_CUBE || 3647 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && 3648 derived_sampler_state.seamless_cube_map && 3649 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR || 3650 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) { 3651 /* theoretically possible with AoS filtering but not implemented (complex!) */ 3652 use_aos = 0; 3653 } 3654 3655 if ((gallivm_debug & GALLIVM_DEBUG_PERF) && 3656 !use_aos && util_format_fits_8unorm(bld.format_desc)) { 3657 debug_printf("%s: using floating point linear filtering for %s\n", 3658 __FUNCTION__, bld.format_desc->short_name); 3659 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d" 3660 " wraps %d wrapt %d wrapr %d\n", 3661 derived_sampler_state.min_img_filter, 3662 derived_sampler_state.mag_img_filter, 3663 derived_sampler_state.min_mip_filter, 3664 static_texture_state->target, 3665 derived_sampler_state.seamless_cube_map, 3666 derived_sampler_state.wrap_s, 3667 derived_sampler_state.wrap_t, 3668 derived_sampler_state.wrap_r); 3669 } 3670 3671 lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index, 3672 newcoords, 3673 derivs, lod_bias, explicit_lod, 3674 &lod_positive, &lod, &lod_fpart, 3675 &ilevel0, &ilevel1); 3676 3677 if (op_is_lodq) { 3678 texel_out[0] = lod_fpart; 3679 texel_out[1] = lod; 3680 texel_out[2] = texel_out[3] = bld.coord_bld.zero; 3681 return; 3682 } 3683 3684 if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 3685 /* The aos path doesn't do seamless filtering so simply add cube layer 3686 * to face now. 3687 */ 3688 newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]); 3689 } 3690 3691 /* 3692 * we only try 8-wide sampling with soa or if we have AVX2 3693 * as it appears to be a loss with just AVX) 3694 */ 3695 if (num_quads == 1 || !use_aos || 3696 (util_get_cpu_caps()->has_avx2 && 3697 (bld.num_lods == 1 || 3698 derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) { 3699 if (use_aos) { 3700 /* do sampling/filtering with fixed pt arithmetic */ 3701 lp_build_sample_aos(&bld, sampler_index, 3702 newcoords[0], newcoords[1], 3703 newcoords[2], 3704 offsets, lod_positive, lod_fpart, 3705 ilevel0, ilevel1, 3706 texel_out); 3707 } 3708 3709 else { 3710 lp_build_sample_general(&bld, sampler_index, 3711 op_type == LP_SAMPLER_OP_GATHER, 3712 newcoords, offsets, 3713 lod_positive, lod_fpart, 3714 ilevel0, ilevel1, 3715 texel_out); 3716 } 3717 } 3718 else { 3719 unsigned j; 3720 struct lp_build_sample_context bld4; 3721 struct lp_type type4 = type; 3722 unsigned i; 3723 LLVMValueRef texelout4[4]; 3724 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16]; 3725 3726 type4.length = 4; 3727 3728 /* Setup our build context */ 3729 memset(&bld4, 0, sizeof bld4); 3730 bld4.no_quad_lod = bld.no_quad_lod; 3731 bld4.no_rho_approx = bld.no_rho_approx; 3732 bld4.no_brilinear = bld.no_brilinear; 3733 bld4.gallivm = bld.gallivm; 3734 bld4.context_ptr = bld.context_ptr; 3735 bld4.aniso_filter_table = aniso_filter_table; 3736 bld4.static_texture_state = bld.static_texture_state; 3737 bld4.static_sampler_state = bld.static_sampler_state; 3738 bld4.dynamic_state = bld.dynamic_state; 3739 bld4.format_desc = bld.format_desc; 3740 bld4.dims = bld.dims; 3741 bld4.row_stride_array = bld.row_stride_array; 3742 bld4.img_stride_array = bld.img_stride_array; 3743 bld4.base_ptr = bld.base_ptr; 3744 bld4.mip_offsets = bld.mip_offsets; 3745 bld4.int_size = bld.int_size; 3746 bld4.cache = bld.cache; 3747 3748 bld4.vector_width = lp_type_width(type4); 3749 3750 bld4.float_type = lp_type_float(32); 3751 bld4.int_type = lp_type_int(32); 3752 bld4.coord_type = type4; 3753 bld4.int_coord_type = lp_int_type(type4); 3754 bld4.float_size_in_type = lp_type_float(32); 3755 bld4.float_size_in_type.length = dims > 1 ? 4 : 1; 3756 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type); 3757 bld4.texel_type = bld.texel_type; 3758 bld4.texel_type.length = 4; 3759 3760 bld4.num_mips = bld4.num_lods = 1; 3761 if (bld4.no_quad_lod && bld4.no_rho_approx && 3762 (static_texture_state->target == PIPE_TEXTURE_CUBE || 3763 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && 3764 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 3765 bld4.num_mips = type4.length; 3766 bld4.num_lods = type4.length; 3767 } 3768 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT && 3769 (explicit_lod || lod_bias || derivs)) { 3770 if ((!op_is_tex && target != PIPE_BUFFER) || 3771 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 3772 bld4.num_mips = type4.length; 3773 bld4.num_lods = type4.length; 3774 } 3775 else if (op_is_tex && min_img_filter != mag_img_filter) { 3776 bld4.num_mips = 1; 3777 bld4.num_lods = type4.length; 3778 } 3779 } 3780 3781 /* we want native vector size to be able to use our intrinsics */ 3782 bld4.lodf_type = type4; 3783 if (bld4.num_lods != type4.length) { 3784 bld4.lodf_type.length = 1; 3785 } 3786 bld4.lodi_type = lp_int_type(bld4.lodf_type); 3787 bld4.levelf_type = type4; 3788 if (bld4.num_mips != type4.length) { 3789 bld4.levelf_type.length = 1; 3790 } 3791 bld4.leveli_type = lp_int_type(bld4.levelf_type); 3792 bld4.float_size_type = bld4.float_size_in_type; 3793 if (bld4.num_mips > 1) { 3794 bld4.float_size_type.length = bld4.num_mips == type4.length ? 3795 bld4.num_mips * bld4.float_size_in_type.length : 3796 type4.length; 3797 } 3798 bld4.int_size_type = lp_int_type(bld4.float_size_type); 3799 3800 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type); 3801 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4); 3802 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type); 3803 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type); 3804 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type); 3805 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type); 3806 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type); 3807 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type); 3808 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type); 3809 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type); 3810 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type); 3811 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type); 3812 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type); 3813 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type); 3814 3815 for (i = 0; i < num_quads; i++) { 3816 LLVMValueRef s4, t4, r4; 3817 LLVMValueRef lod_positive4, lod_fpart4 = NULL; 3818 LLVMValueRef ilevel04, ilevel14 = NULL; 3819 LLVMValueRef offsets4[4] = { NULL }; 3820 unsigned num_lods = bld4.num_lods; 3821 3822 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4); 3823 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4); 3824 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4); 3825 3826 if (offsets[0]) { 3827 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4); 3828 if (dims > 1) { 3829 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4); 3830 if (dims > 2) { 3831 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4); 3832 } 3833 } 3834 } 3835 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods); 3836 ilevel04 = bld.num_mips == 1 ? ilevel0 : 3837 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods); 3838 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 3839 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods); 3840 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods); 3841 } 3842 3843 if (use_aos) { 3844 /* do sampling/filtering with fixed pt arithmetic */ 3845 lp_build_sample_aos(&bld4, sampler_index, 3846 s4, t4, r4, offsets4, 3847 lod_positive4, lod_fpart4, 3848 ilevel04, ilevel14, 3849 texelout4); 3850 } 3851 3852 else { 3853 /* this path is currently unreachable and hence might break easily... */ 3854 LLVMValueRef newcoords4[5]; 3855 newcoords4[0] = s4; 3856 newcoords4[1] = t4; 3857 newcoords4[2] = r4; 3858 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4); 3859 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4); 3860 3861 lp_build_sample_general(&bld4, sampler_index, 3862 op_type == LP_SAMPLER_OP_GATHER, 3863 newcoords4, offsets4, 3864 lod_positive4, lod_fpart4, 3865 ilevel04, ilevel14, 3866 texelout4); 3867 } 3868 for (j = 0; j < 4; j++) { 3869 texelouttmp[j][i] = texelout4[j]; 3870 } 3871 } 3872 3873 for (j = 0; j < 4; j++) { 3874 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads); 3875 } 3876 } 3877 } 3878 3879 if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) { 3880 apply_sampler_swizzle(&bld, texel_out); 3881 } 3882 3883 /* 3884 * texel type can be a (32bit) int/uint (for pure int formats only), 3885 * however we are expected to always return floats (storage is untyped). 3886 */ 3887 if (!bld.texel_type.floating) { 3888 unsigned chan; 3889 for (chan = 0; chan < 4; chan++) { 3890 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan], 3891 lp_build_vec_type(gallivm, type), ""); 3892 } 3893 } 3894} 3895 3896 3897#define USE_TEX_FUNC_CALL 1 3898 3899#define LP_MAX_TEX_FUNC_ARGS 32 3900 3901static inline void 3902get_target_info(enum pipe_texture_target target, 3903 unsigned *num_coords, unsigned *num_derivs, 3904 unsigned *num_offsets, unsigned *layer) 3905{ 3906 unsigned dims = texture_dims(target); 3907 *num_coords = dims; 3908 *num_offsets = dims; 3909 *num_derivs = (target == PIPE_TEXTURE_CUBE || 3910 target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims; 3911 *layer = has_layer_coord(target) ? 2: 0; 3912 if (target == PIPE_TEXTURE_CUBE_ARRAY) { 3913 /* 3914 * dims doesn't include r coord for cubes - this is handled 3915 * by layer instead, but need to fix up for cube arrays... 3916 */ 3917 *layer = 3; 3918 *num_coords = 3; 3919 } 3920} 3921 3922 3923/** 3924 * Generate the function body for a texture sampling function. 3925 */ 3926static void 3927lp_build_sample_gen_func(struct gallivm_state *gallivm, 3928 const struct lp_static_texture_state *static_texture_state, 3929 const struct lp_static_sampler_state *static_sampler_state, 3930 struct lp_sampler_dynamic_state *dynamic_state, 3931 struct lp_type type, 3932 unsigned texture_index, 3933 unsigned sampler_index, 3934 LLVMValueRef function, 3935 unsigned num_args, 3936 unsigned sample_key, 3937 bool has_aniso_filter_table) 3938{ 3939 LLVMBuilderRef old_builder; 3940 LLVMBasicBlockRef block; 3941 LLVMValueRef coords[5]; 3942 LLVMValueRef offsets[3] = { NULL }; 3943 LLVMValueRef lod = NULL; 3944 LLVMValueRef ms_index = NULL; 3945 LLVMValueRef context_ptr; 3946 LLVMValueRef thread_data_ptr = NULL; 3947 LLVMValueRef aniso_filter_table = NULL; 3948 LLVMValueRef texel_out[4]; 3949 struct lp_derivatives derivs; 3950 struct lp_derivatives *deriv_ptr = NULL; 3951 unsigned num_param = 0; 3952 unsigned i, num_coords, num_derivs, num_offsets, layer; 3953 enum lp_sampler_lod_control lod_control; 3954 enum lp_sampler_op_type op_type; 3955 boolean need_cache = FALSE; 3956 3957 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> 3958 LP_SAMPLER_LOD_CONTROL_SHIFT; 3959 3960 op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> 3961 LP_SAMPLER_OP_TYPE_SHIFT; 3962 3963 get_target_info(static_texture_state->target, 3964 &num_coords, &num_derivs, &num_offsets, &layer); 3965 3966 /* lod query doesn't take a layer */ 3967 if (layer && op_type == LP_SAMPLER_OP_LODQ) 3968 layer = 0; 3969 3970 if (dynamic_state->cache_ptr) { 3971 const struct util_format_description *format_desc; 3972 format_desc = util_format_description(static_texture_state->format); 3973 if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 3974 need_cache = TRUE; 3975 } 3976 } 3977 3978 /* "unpack" arguments */ 3979 context_ptr = LLVMGetParam(function, num_param++); 3980 if (has_aniso_filter_table) 3981 aniso_filter_table = LLVMGetParam(function, num_param++); 3982 if (need_cache) { 3983 thread_data_ptr = LLVMGetParam(function, num_param++); 3984 } 3985 for (i = 0; i < num_coords; i++) { 3986 coords[i] = LLVMGetParam(function, num_param++); 3987 } 3988 for (i = num_coords; i < 5; i++) { 3989 /* This is rather unfortunate... */ 3990 coords[i] = lp_build_undef(gallivm, type); 3991 } 3992 if (layer) { 3993 coords[layer] = LLVMGetParam(function, num_param++); 3994 } 3995 if (sample_key & LP_SAMPLER_SHADOW) { 3996 coords[4] = LLVMGetParam(function, num_param++); 3997 } 3998 if (sample_key & LP_SAMPLER_FETCH_MS) { 3999 ms_index = LLVMGetParam(function, num_param++); 4000 } 4001 if (sample_key & LP_SAMPLER_OFFSETS) { 4002 for (i = 0; i < num_offsets; i++) { 4003 offsets[i] = LLVMGetParam(function, num_param++); 4004 } 4005 } 4006 if (lod_control == LP_SAMPLER_LOD_BIAS || 4007 lod_control == LP_SAMPLER_LOD_EXPLICIT) { 4008 lod = LLVMGetParam(function, num_param++); 4009 } 4010 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 4011 for (i = 0; i < num_derivs; i++) { 4012 derivs.ddx[i] = LLVMGetParam(function, num_param++); 4013 derivs.ddy[i] = LLVMGetParam(function, num_param++); 4014 } 4015 deriv_ptr = &derivs; 4016 } 4017 4018 assert(num_args == num_param); 4019 4020 /* 4021 * Function body 4022 */ 4023 4024 old_builder = gallivm->builder; 4025 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry"); 4026 gallivm->builder = LLVMCreateBuilderInContext(gallivm->context); 4027 LLVMPositionBuilderAtEnd(gallivm->builder, block); 4028 4029 lp_build_sample_soa_code(gallivm, 4030 static_texture_state, 4031 static_sampler_state, 4032 dynamic_state, 4033 type, 4034 sample_key, 4035 texture_index, 4036 sampler_index, 4037 context_ptr, 4038 thread_data_ptr, 4039 coords, 4040 offsets, 4041 deriv_ptr, 4042 lod, 4043 ms_index, 4044 aniso_filter_table, 4045 texel_out); 4046 4047 LLVMBuildAggregateRet(gallivm->builder, texel_out, 4); 4048 4049 LLVMDisposeBuilder(gallivm->builder); 4050 gallivm->builder = old_builder; 4051 4052 gallivm_verify_function(gallivm, function); 4053} 4054 4055 4056/** 4057 * Call the matching function for texture sampling. 4058 * If there's no match, generate a new one. 4059 */ 4060static void 4061lp_build_sample_soa_func(struct gallivm_state *gallivm, 4062 const struct lp_static_texture_state *static_texture_state, 4063 const struct lp_static_sampler_state *static_sampler_state, 4064 struct lp_sampler_dynamic_state *dynamic_state, 4065 const struct lp_sampler_params *params, 4066 int texture_index, int sampler_index, 4067 LLVMValueRef *tex_ret) 4068{ 4069 LLVMBuilderRef builder = gallivm->builder; 4070 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( 4071 LLVMGetInsertBlock(builder))); 4072 LLVMValueRef function, inst; 4073 LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS]; 4074 LLVMBasicBlockRef bb; 4075 unsigned num_args = 0; 4076 char func_name[64]; 4077 unsigned i, num_coords, num_derivs, num_offsets, layer; 4078 unsigned sample_key = params->sample_key; 4079 const LLVMValueRef *coords = params->coords; 4080 const LLVMValueRef *offsets = params->offsets; 4081 const struct lp_derivatives *derivs = params->derivs; 4082 enum lp_sampler_lod_control lod_control; 4083 enum lp_sampler_op_type op_type; 4084 boolean need_cache = FALSE; 4085 4086 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> 4087 LP_SAMPLER_LOD_CONTROL_SHIFT; 4088 4089 op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> 4090 LP_SAMPLER_OP_TYPE_SHIFT; 4091 4092 get_target_info(static_texture_state->target, 4093 &num_coords, &num_derivs, &num_offsets, &layer); 4094 4095 /* lod query doesn't take a layer */ 4096 if (layer && op_type == LP_SAMPLER_OP_LODQ) 4097 layer = 0; 4098 4099 if (dynamic_state->cache_ptr) { 4100 const struct util_format_description *format_desc; 4101 format_desc = util_format_description(static_texture_state->format); 4102 if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 4103 need_cache = TRUE; 4104 } 4105 } 4106 /* 4107 * texture function matches are found by name. 4108 * Thus the name has to include both the texture and sampler unit 4109 * (which covers all static state) plus the actual texture function 4110 * (including things like offsets, shadow coord, lod control). 4111 * Additionally lod_property has to be included too. 4112 */ 4113 4114 snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x", 4115 texture_index, sampler_index, sample_key); 4116 4117 function = LLVMGetNamedFunction(module, func_name); 4118 4119 if(!function) { 4120 LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS]; 4121 LLVMTypeRef ret_type; 4122 LLVMTypeRef function_type; 4123 LLVMTypeRef val_type[4]; 4124 unsigned num_param = 0; 4125 4126 /* 4127 * Generate the function prototype. 4128 */ 4129 4130 arg_types[num_param++] = LLVMTypeOf(params->context_ptr); 4131 if (params->aniso_filter_table) 4132 arg_types[num_param++] = LLVMTypeOf(params->aniso_filter_table); 4133 if (need_cache) { 4134 arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr); 4135 } 4136 for (i = 0; i < num_coords; i++) { 4137 arg_types[num_param++] = LLVMTypeOf(coords[0]); 4138 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i])); 4139 } 4140 if (layer) { 4141 arg_types[num_param++] = LLVMTypeOf(coords[layer]); 4142 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer])); 4143 } 4144 if (sample_key & LP_SAMPLER_SHADOW) { 4145 arg_types[num_param++] = LLVMTypeOf(coords[0]); 4146 } 4147 if (sample_key & LP_SAMPLER_FETCH_MS) { 4148 arg_types[num_param++] = LLVMTypeOf(params->ms_index); 4149 } 4150 if (sample_key & LP_SAMPLER_OFFSETS) { 4151 for (i = 0; i < num_offsets; i++) { 4152 arg_types[num_param++] = LLVMTypeOf(offsets[0]); 4153 assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i])); 4154 } 4155 } 4156 if (lod_control == LP_SAMPLER_LOD_BIAS || 4157 lod_control == LP_SAMPLER_LOD_EXPLICIT) { 4158 arg_types[num_param++] = LLVMTypeOf(params->lod); 4159 } 4160 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 4161 for (i = 0; i < num_derivs; i++) { 4162 arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]); 4163 arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]); 4164 assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i])); 4165 assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i])); 4166 } 4167 } 4168 4169 val_type[0] = val_type[1] = val_type[2] = val_type[3] = 4170 lp_build_vec_type(gallivm, params->type); 4171 ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0); 4172 function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0); 4173 function = LLVMAddFunction(module, func_name, function_type); 4174 4175 for (i = 0; i < num_param; ++i) { 4176 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) { 4177 4178 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); 4179 } 4180 } 4181 4182 LLVMSetFunctionCallConv(function, LLVMFastCallConv); 4183 LLVMSetLinkage(function, LLVMInternalLinkage); 4184 4185 lp_build_sample_gen_func(gallivm, 4186 static_texture_state, 4187 static_sampler_state, 4188 dynamic_state, 4189 params->type, 4190 texture_index, 4191 sampler_index, 4192 function, 4193 num_param, 4194 sample_key, 4195 params->aniso_filter_table ? true : false); 4196 } 4197 4198 num_args = 0; 4199 args[num_args++] = params->context_ptr; 4200 if (params->aniso_filter_table) 4201 args[num_args++] = params->aniso_filter_table; 4202 if (need_cache) { 4203 args[num_args++] = params->thread_data_ptr; 4204 } 4205 for (i = 0; i < num_coords; i++) { 4206 args[num_args++] = coords[i]; 4207 } 4208 if (layer) { 4209 args[num_args++] = coords[layer]; 4210 } 4211 if (sample_key & LP_SAMPLER_SHADOW) { 4212 args[num_args++] = coords[4]; 4213 } 4214 if (sample_key & LP_SAMPLER_FETCH_MS) { 4215 args[num_args++] = params->ms_index; 4216 } 4217 if (sample_key & LP_SAMPLER_OFFSETS) { 4218 for (i = 0; i < num_offsets; i++) { 4219 args[num_args++] = offsets[i]; 4220 } 4221 } 4222 if (lod_control == LP_SAMPLER_LOD_BIAS || 4223 lod_control == LP_SAMPLER_LOD_EXPLICIT) { 4224 args[num_args++] = params->lod; 4225 } 4226 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 4227 for (i = 0; i < num_derivs; i++) { 4228 args[num_args++] = derivs->ddx[i]; 4229 args[num_args++] = derivs->ddy[i]; 4230 } 4231 } 4232 4233 assert(num_args <= LP_MAX_TEX_FUNC_ARGS); 4234 4235 *tex_ret = LLVMBuildCall(builder, function, args, num_args, ""); 4236 bb = LLVMGetInsertBlock(builder); 4237 inst = LLVMGetLastInstruction(bb); 4238 LLVMSetInstructionCallConv(inst, LLVMFastCallConv); 4239 4240} 4241 4242 4243/** 4244 * Build texture sampling code. 4245 * Either via a function call or inline it directly. 4246 */ 4247void 4248lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state, 4249 const struct lp_static_sampler_state *static_sampler_state, 4250 struct lp_sampler_dynamic_state *dynamic_state, 4251 struct gallivm_state *gallivm, 4252 const struct lp_sampler_params *params) 4253{ 4254 boolean use_tex_func = FALSE; 4255 4256 /* 4257 * Do not use a function call if the sampling is "simple enough". 4258 * We define this by 4259 * a) format 4260 * b) no mips (either one level only or no mip filter) 4261 * No mips will definitely make the code smaller, though 4262 * the format requirement is a bit iffy - there's some (SoA) formats 4263 * which definitely generate less code. This does happen to catch 4264 * some important cases though which are hurt quite a bit by using 4265 * a call (though not really because of the call overhead but because 4266 * they are reusing the same texture unit with some of the same 4267 * parameters). 4268 * Ideally we'd let llvm recognize this stuff by doing IPO passes. 4269 */ 4270 4271 if (USE_TEX_FUNC_CALL) { 4272 const struct util_format_description *format_desc; 4273 boolean simple_format; 4274 boolean simple_tex; 4275 enum lp_sampler_op_type op_type; 4276 format_desc = util_format_description(static_texture_state->format); 4277 simple_format = !format_desc || 4278 (util_format_is_rgba8_variant(format_desc) && 4279 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); 4280 4281 op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >> 4282 LP_SAMPLER_OP_TYPE_SHIFT; 4283 simple_tex = 4284 op_type != LP_SAMPLER_OP_TEXTURE || 4285 ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE || 4286 static_texture_state->level_zero_only == TRUE) && 4287 static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter); 4288 4289 use_tex_func = format_desc && !(simple_format && simple_tex); 4290 } 4291 4292 if (use_tex_func) { 4293 LLVMValueRef tex_ret; 4294 lp_build_sample_soa_func(gallivm, 4295 static_texture_state, 4296 static_sampler_state, 4297 dynamic_state, 4298 params, params->texture_index, params->sampler_index, &tex_ret); 4299 4300 for (unsigned i = 0; i < 4; i++) { 4301 params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, ""); 4302 } 4303 } 4304 else { 4305 lp_build_sample_soa_code(gallivm, 4306 static_texture_state, 4307 static_sampler_state, 4308 dynamic_state, 4309 params->type, 4310 params->sample_key, 4311 params->texture_index, 4312 params->sampler_index, 4313 params->context_ptr, 4314 params->thread_data_ptr, 4315 params->coords, 4316 params->offsets, 4317 params->derivs, 4318 params->lod, 4319 params->ms_index, 4320 params->aniso_filter_table, 4321 params->texel); 4322 } 4323} 4324 4325 4326void 4327lp_build_size_query_soa(struct gallivm_state *gallivm, 4328 const struct lp_static_texture_state *static_state, 4329 struct lp_sampler_dynamic_state *dynamic_state, 4330 const struct lp_sampler_size_query_params *params) 4331{ 4332 LLVMValueRef lod, level = 0, size; 4333 LLVMValueRef first_level = NULL; 4334 int dims, i; 4335 boolean has_array; 4336 unsigned num_lods = 1; 4337 struct lp_build_context bld_int_vec4; 4338 LLVMValueRef context_ptr = params->context_ptr; 4339 unsigned texture_unit = params->texture_unit; 4340 unsigned target = params->target; 4341 LLVMValueRef texture_unit_offset = params->texture_unit_offset; 4342 4343 if (static_state->format == PIPE_FORMAT_NONE) { 4344 /* 4345 * If there's nothing bound, format is NONE, and we must return 4346 * all zero as mandated by d3d10 in this case. 4347 */ 4348 unsigned chan; 4349 LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F); 4350 for (chan = 0; chan < 4; chan++) { 4351 params->sizes_out[chan] = zero; 4352 } 4353 return; 4354 } 4355 4356 /* 4357 * Do some sanity verification about bound texture and shader dcl target. 4358 * Not entirely sure what's possible but assume array/non-array 4359 * always compatible (probably not ok for OpenGL but d3d10 has no 4360 * distinction of arrays at the resource level). 4361 * Everything else looks bogus (though not entirely sure about rect/2d). 4362 * Currently disabled because it causes assertion failures if there's 4363 * nothing bound (or rather a dummy texture, not that this case would 4364 * return the right values). 4365 */ 4366 if (0 && static_state->target != target) { 4367 if (static_state->target == PIPE_TEXTURE_1D) 4368 assert(target == PIPE_TEXTURE_1D_ARRAY); 4369 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY) 4370 assert(target == PIPE_TEXTURE_1D); 4371 else if (static_state->target == PIPE_TEXTURE_2D) 4372 assert(target == PIPE_TEXTURE_2D_ARRAY); 4373 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY) 4374 assert(target == PIPE_TEXTURE_2D); 4375 else if (static_state->target == PIPE_TEXTURE_CUBE) 4376 assert(target == PIPE_TEXTURE_CUBE_ARRAY); 4377 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY) 4378 assert(target == PIPE_TEXTURE_CUBE); 4379 else 4380 assert(0); 4381 } 4382 4383 dims = texture_dims(target); 4384 4385 has_array = has_layer_coord(target); 4386 4387 assert(!params->int_type.floating); 4388 4389 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128)); 4390 4391 if (params->samples_only) { 4392 params->sizes_out[0] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, params->int_type), 4393 dynamic_state->num_samples(dynamic_state, gallivm, 4394 context_ptr, texture_unit, 4395 texture_unit_offset)); 4396 return; 4397 } 4398 if (params->explicit_lod) { 4399 /* FIXME: this needs to honor per-element lod */ 4400 lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod, 4401 lp_build_const_int32(gallivm, 0), ""); 4402 first_level = dynamic_state->first_level(dynamic_state, gallivm, 4403 context_ptr, texture_unit, texture_unit_offset); 4404 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level"); 4405 lod = lp_build_broadcast_scalar(&bld_int_vec4, level); 4406 } else { 4407 lod = bld_int_vec4.zero; 4408 } 4409 4410 size = bld_int_vec4.undef; 4411 4412 size = LLVMBuildInsertElement(gallivm->builder, size, 4413 dynamic_state->width(dynamic_state, gallivm, 4414 context_ptr, texture_unit, texture_unit_offset), 4415 lp_build_const_int32(gallivm, 0), ""); 4416 4417 if (dims >= 2) { 4418 size = LLVMBuildInsertElement(gallivm->builder, size, 4419 dynamic_state->height(dynamic_state, gallivm, 4420 context_ptr, texture_unit, texture_unit_offset), 4421 lp_build_const_int32(gallivm, 1), ""); 4422 } 4423 4424 if (dims >= 3) { 4425 size = LLVMBuildInsertElement(gallivm->builder, size, 4426 dynamic_state->depth(dynamic_state, gallivm, 4427 context_ptr, texture_unit, texture_unit_offset), 4428 lp_build_const_int32(gallivm, 2), ""); 4429 } 4430 4431 size = lp_build_minify(&bld_int_vec4, size, lod, TRUE); 4432 4433 if (has_array) { 4434 LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm, 4435 context_ptr, texture_unit, texture_unit_offset); 4436 if (target == PIPE_TEXTURE_CUBE_ARRAY) { 4437 /* 4438 * It looks like GL wants number of cubes, d3d10.1 has it undefined? 4439 * Could avoid this by passing in number of cubes instead of total 4440 * number of layers (might make things easier elsewhere too). 4441 */ 4442 LLVMValueRef six = lp_build_const_int32(gallivm, 6); 4443 layers = LLVMBuildSDiv(gallivm->builder, layers, six, ""); 4444 } 4445 size = LLVMBuildInsertElement(gallivm->builder, size, layers, 4446 lp_build_const_int32(gallivm, dims), ""); 4447 } 4448 4449 /* 4450 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels) 4451 * if level is out of bounds (note this can't cover unbound texture 4452 * here, which also requires returning zero). 4453 */ 4454 if (params->explicit_lod && params->is_sviewinfo) { 4455 LLVMValueRef last_level, out, out1; 4456 struct lp_build_context leveli_bld; 4457 4458 /* everything is scalar for now */ 4459 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32)); 4460 last_level = dynamic_state->last_level(dynamic_state, gallivm, 4461 context_ptr, texture_unit, texture_unit_offset); 4462 4463 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level); 4464 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level); 4465 out = lp_build_or(&leveli_bld, out, out1); 4466 if (num_lods == 1) { 4467 out = lp_build_broadcast_scalar(&bld_int_vec4, out); 4468 } 4469 else { 4470 /* TODO */ 4471 assert(0); 4472 } 4473 size = lp_build_andnot(&bld_int_vec4, size, out); 4474 } 4475 for (i = 0; i < dims + (has_array ? 1 : 0); i++) { 4476 params->sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, params->int_type, 4477 size, 4478 lp_build_const_int32(gallivm, i)); 4479 } 4480 if (params->is_sviewinfo) { 4481 for (; i < 4; i++) { 4482 params->sizes_out[i] = lp_build_const_vec(gallivm, params->int_type, 0.0); 4483 } 4484 } 4485 4486 /* 4487 * if there's no explicit_lod (buffers, rects) queries requiring nr of 4488 * mips would be illegal. 4489 */ 4490 if (params->is_sviewinfo && params->explicit_lod) { 4491 struct lp_build_context bld_int_scalar; 4492 LLVMValueRef num_levels; 4493 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32)); 4494 4495 if (static_state->level_zero_only) { 4496 num_levels = bld_int_scalar.one; 4497 } 4498 else { 4499 LLVMValueRef last_level; 4500 4501 last_level = dynamic_state->last_level(dynamic_state, gallivm, 4502 context_ptr, texture_unit, texture_unit_offset); 4503 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level); 4504 num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one); 4505 } 4506 params->sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, params->int_type), 4507 num_levels); 4508 } 4509} 4510 4511static void 4512lp_build_do_atomic_soa(struct gallivm_state *gallivm, 4513 const struct util_format_description *format_desc, 4514 struct lp_type type, 4515 LLVMValueRef exec_mask, 4516 LLVMValueRef base_ptr, 4517 LLVMValueRef offset, 4518 LLVMValueRef out_of_bounds, 4519 unsigned img_op, 4520 LLVMAtomicRMWBinOp op, 4521 const LLVMValueRef rgba_in[4], 4522 const LLVMValueRef rgba2_in[4], 4523 LLVMValueRef atomic_result[4]) 4524{ 4525 enum pipe_format format = format_desc->format; 4526 4527 if (format != PIPE_FORMAT_R32_UINT && format != PIPE_FORMAT_R32_SINT && format != PIPE_FORMAT_R32_FLOAT) { 4528 atomic_result[0] = lp_build_zero(gallivm, type); 4529 return; 4530 } 4531 4532 LLVMValueRef atom_res = lp_build_alloca(gallivm, 4533 LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), type.length), ""); 4534 4535 offset = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, ""); 4536 struct lp_build_loop_state loop_state; 4537 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); 4538 struct lp_build_if_state ifthen; 4539 LLVMValueRef cond; 4540 LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0]; 4541 4542 LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask"); 4543 assert(exec_mask); 4544 4545 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), ""); 4546 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); 4547 lp_build_if(&ifthen, gallivm, cond); 4548 4549 LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, ""); 4550 LLVMValueRef cast_base_ptr = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, ""); 4551 cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), ""); 4552 data = LLVMBuildBitCast(gallivm->builder, data, LLVMInt32TypeInContext(gallivm->context), ""); 4553 4554 if (img_op == LP_IMG_ATOMIC_CAS) { 4555 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, packed2, loop_state.counter, ""); 4556 LLVMValueRef cas_src = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, LLVMInt32TypeInContext(gallivm->context), ""); 4557 data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data, 4558 cas_src, 4559 LLVMAtomicOrderingSequentiallyConsistent, 4560 LLVMAtomicOrderingSequentiallyConsistent, 4561 false); 4562 data = LLVMBuildExtractValue(gallivm->builder, data, 0, ""); 4563 } else { 4564 data = LLVMBuildAtomicRMW(gallivm->builder, op, 4565 cast_base_ptr, data, 4566 LLVMAtomicOrderingSequentiallyConsistent, 4567 false); 4568 } 4569 4570 LLVMValueRef temp_res = LLVMBuildLoad(gallivm->builder, atom_res, ""); 4571 temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data, loop_state.counter, ""); 4572 LLVMBuildStore(gallivm->builder, temp_res, atom_res); 4573 4574 lp_build_endif(&ifthen); 4575 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length), 4576 NULL, LLVMIntUGE); 4577 atomic_result[0] = LLVMBuildLoad(gallivm->builder, atom_res, ""); 4578} 4579 4580static void 4581lp_build_img_op_no_format(struct gallivm_state *gallivm, 4582 const struct lp_img_params *params, 4583 LLVMValueRef outdata[4]) 4584{ 4585 /* 4586 * If there's nothing bound, format is NONE, and we must return 4587 * all zero as mandated by d3d10 in this case. 4588 */ 4589 if (params->img_op != LP_IMG_STORE) { 4590 LLVMValueRef zero = lp_build_zero(gallivm, params->type); 4591 for (unsigned chan = 0; chan < (params->img_op == LP_IMG_LOAD ? 4 : 1); chan++) { 4592 outdata[chan] = zero; 4593 } 4594 } 4595} 4596 4597void 4598lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state, 4599 struct lp_sampler_dynamic_state *dynamic_state, 4600 struct gallivm_state *gallivm, 4601 const struct lp_img_params *params, 4602 LLVMValueRef outdata[4]) 4603{ 4604 unsigned target = params->target; 4605 unsigned dims = texture_dims(target); 4606 /** regular scalar int type */ 4607 struct lp_type int_coord_type; 4608 struct lp_build_context int_coord_bld; 4609 const struct util_format_description *format_desc = util_format_description(static_texture_state->format); 4610 LLVMValueRef x = params->coords[0], y = params->coords[1], z = params->coords[2]; 4611 LLVMValueRef ms_index = params->ms_index; 4612 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL; 4613 int_coord_type = lp_uint_type(params->type); 4614 lp_build_context_init(&int_coord_bld, gallivm, int_coord_type); 4615 4616 if (static_texture_state->format == PIPE_FORMAT_NONE) { 4617 lp_build_img_op_no_format(gallivm, params, outdata); 4618 return; 4619 } 4620 LLVMValueRef offset, i, j; 4621 4622 LLVMValueRef row_stride = dynamic_state->row_stride(dynamic_state, gallivm, 4623 params->context_ptr, params->image_index, NULL); 4624 LLVMValueRef img_stride = dynamic_state->img_stride(dynamic_state, gallivm, 4625 params->context_ptr, params->image_index, NULL); 4626 LLVMValueRef base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, 4627 params->context_ptr, params->image_index, NULL); 4628 LLVMValueRef width = dynamic_state->width(dynamic_state, gallivm, 4629 params->context_ptr, params->image_index, NULL); 4630 LLVMValueRef height = dynamic_state->height(dynamic_state, gallivm, 4631 params->context_ptr, params->image_index, NULL); 4632 LLVMValueRef depth = dynamic_state->depth(dynamic_state, gallivm, 4633 params->context_ptr, params->image_index, NULL); 4634 LLVMValueRef num_samples = NULL, sample_stride = NULL; 4635 if (ms_index) { 4636 num_samples = dynamic_state->num_samples(dynamic_state, gallivm, 4637 params->context_ptr, params->image_index, NULL); 4638 sample_stride = dynamic_state->sample_stride(dynamic_state, gallivm, 4639 params->context_ptr, params->image_index, NULL); 4640 } 4641 4642 boolean layer_coord = has_layer_coord(target); 4643 4644 width = lp_build_broadcast_scalar(&int_coord_bld, width); 4645 if (dims >= 2) { 4646 height = lp_build_broadcast_scalar(&int_coord_bld, height); 4647 row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride); 4648 } 4649 if (dims >= 3 || layer_coord) { 4650 depth = lp_build_broadcast_scalar(&int_coord_bld, depth); 4651 img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride); 4652 } 4653 4654 LLVMValueRef out_of_bounds = int_coord_bld.zero; 4655 LLVMValueRef out1; 4656 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width); 4657 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); 4658 4659 if (dims >= 2) { 4660 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height); 4661 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); 4662 } 4663 if (dims >= 3) { 4664 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); 4665 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); 4666 } 4667 lp_build_sample_offset(&int_coord_bld, 4668 format_desc, 4669 x, y, z, row_stride_vec, img_stride_vec, 4670 &offset, &i, &j); 4671 4672 if (ms_index) { 4673 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, lp_build_broadcast_scalar(&int_coord_bld, num_samples)); 4674 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); 4675 4676 offset = lp_build_add(&int_coord_bld, offset, 4677 lp_build_mul(&int_coord_bld, lp_build_broadcast_scalar(&int_coord_bld, sample_stride), 4678 ms_index)); 4679 } 4680 if (params->img_op == LP_IMG_LOAD) { 4681 struct lp_type texel_type = lp_build_texel_type(params->type, format_desc); 4682 4683 offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds); 4684 struct lp_build_context texel_bld; 4685 lp_build_context_init(&texel_bld, gallivm, texel_type); 4686 lp_build_fetch_rgba_soa(gallivm, 4687 format_desc, 4688 texel_type, TRUE, 4689 base_ptr, offset, 4690 i, j, 4691 NULL, 4692 outdata); 4693 4694 for (unsigned chan = 0; chan < 3; chan++) { 4695 outdata[chan] = lp_build_select(&texel_bld, out_of_bounds, 4696 texel_bld.zero, outdata[chan]); 4697 } 4698 if (format_desc->swizzle[3] == PIPE_SWIZZLE_1) 4699 outdata[3] = lp_build_select(&texel_bld, out_of_bounds, 4700 texel_bld.one, outdata[3]); 4701 else 4702 outdata[3] = lp_build_select(&texel_bld, out_of_bounds, 4703 texel_bld.zero, outdata[3]); 4704 } else if (params->img_op == LP_IMG_STORE) { 4705 lp_build_store_rgba_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds, 4706 params->indata); 4707 } else { 4708 lp_build_do_atomic_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds, 4709 params->img_op, params->op, params->indata, params->indata2, outdata); 4710 } 4711} 4712 4713/* 4714 * These functions are for indirect texture access suppoort. 4715 * 4716 * Indirect textures are implemented using a switch statement, that 4717 * takes the texture index and jumps to the sampler functions for 4718 * that texture unit. 4719 */ 4720 4721/* 4722 * Initialise an indexed sampler switch block. 4723 * 4724 * This sets up the switch_info state and adds the LLVM flow control pieces. 4725 */ 4726void 4727lp_build_sample_array_init_soa(struct lp_build_sample_array_switch *switch_info, 4728 struct gallivm_state *gallivm, 4729 const struct lp_sampler_params *params, 4730 LLVMValueRef idx, 4731 unsigned base, unsigned range) 4732{ 4733 switch_info->gallivm = gallivm; 4734 switch_info->params = *params; 4735 switch_info->base = base; 4736 switch_info->range = range; 4737 4738 /* for generating the switch functions we don't want the texture index offset */ 4739 switch_info->params.texture_index_offset = 0; 4740 4741 LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder); 4742 switch_info->merge_ref = lp_build_insert_new_block(gallivm, "texmerge"); 4743 4744 switch_info->switch_ref = LLVMBuildSwitch(gallivm->builder, idx, 4745 switch_info->merge_ref, range - base); 4746 4747 LLVMTypeRef val_type[4]; 4748 val_type[0] = val_type[1] = val_type[2] = val_type[3] = 4749 lp_build_vec_type(gallivm, params->type); 4750 LLVMTypeRef ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0); 4751 4752 LLVMValueRef undef_val = LLVMGetUndef(ret_type); 4753 4754 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref); 4755 4756 switch_info->phi = LLVMBuildPhi(gallivm->builder, ret_type, ""); 4757 LLVMAddIncoming(switch_info->phi, &undef_val, &initial_block, 1); 4758} 4759 4760/* 4761 * Add an individual entry to the indirect texture switch. 4762 * 4763 * This builds the sample function and links a case for it into the switch statement. 4764 */ 4765void 4766lp_build_sample_array_case_soa(struct lp_build_sample_array_switch *switch_info, 4767 int idx, 4768 const struct lp_static_texture_state *static_texture_state, 4769 const struct lp_static_sampler_state *static_sampler_state, 4770 struct lp_sampler_dynamic_state *dynamic_texture_state) 4771{ 4772 struct gallivm_state *gallivm = switch_info->gallivm; 4773 LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "texblock"); 4774 LLVMValueRef tex_ret; 4775 4776 LLVMAddCase(switch_info->switch_ref, LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), idx, 0), this_block); 4777 LLVMPositionBuilderAtEnd(gallivm->builder, this_block); 4778 4779 lp_build_sample_soa_func(gallivm, static_texture_state, 4780 static_sampler_state, dynamic_texture_state, &switch_info->params, idx, idx, 4781 &tex_ret); 4782 4783 LLVMAddIncoming(switch_info->phi, &tex_ret, &this_block, 1); 4784 LLVMBuildBr(gallivm->builder, switch_info->merge_ref); 4785} 4786 4787/* 4788 * Finish a switch statement. 4789 * 4790 * This handles extract the results from the switch. 4791 */ 4792void lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch *switch_info) 4793{ 4794 struct gallivm_state *gallivm = switch_info->gallivm; 4795 4796 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref); 4797 for (unsigned i = 0; i < 4; i++) 4798 switch_info->params.texel[i] = LLVMBuildExtractValue(gallivm->builder, switch_info->phi, i, ""); 4799} 4800 4801void 4802lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch *switch_info, 4803 struct gallivm_state *gallivm, 4804 const struct lp_img_params *params, 4805 LLVMValueRef idx, 4806 unsigned base, unsigned range) 4807{ 4808 switch_info->gallivm = gallivm; 4809 switch_info->params = *params; 4810 switch_info->base = base; 4811 switch_info->range = range; 4812 4813 /* for generating the switch functions we don't want the texture index offset */ 4814 switch_info->params.image_index_offset = 0; 4815 4816 LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder); 4817 switch_info->merge_ref = lp_build_insert_new_block(gallivm, "imgmerge"); 4818 4819 switch_info->switch_ref = LLVMBuildSwitch(gallivm->builder, idx, 4820 switch_info->merge_ref, range - base); 4821 4822 if (params->img_op != LP_IMG_STORE) { 4823 LLVMTypeRef ret_type = lp_build_vec_type(gallivm, params->type); 4824 LLVMValueRef undef_val = LLVMGetUndef(ret_type); 4825 4826 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref); 4827 4828 for (unsigned i = 0; i < ((params->img_op == LP_IMG_LOAD) ? 4 : 1); i++) { 4829 switch_info->phi[i] = LLVMBuildPhi(gallivm->builder, ret_type, ""); 4830 LLVMAddIncoming(switch_info->phi[i], &undef_val, &initial_block, 1); 4831 } 4832 } 4833} 4834 4835void 4836lp_build_image_op_array_case(struct lp_build_img_op_array_switch *switch_info, 4837 int idx, 4838 const struct lp_static_texture_state *static_texture_state, 4839 struct lp_sampler_dynamic_state *dynamic_state) 4840{ 4841 struct gallivm_state *gallivm = switch_info->gallivm; 4842 LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "img"); 4843 LLVMValueRef tex_ret[4]; 4844 4845 LLVMAddCase(switch_info->switch_ref, lp_build_const_int32(gallivm, idx), this_block); 4846 LLVMPositionBuilderAtEnd(gallivm->builder, this_block); 4847 4848 switch_info->params.image_index = idx; 4849 4850 lp_build_img_op_soa(static_texture_state, dynamic_state, switch_info->gallivm, &switch_info->params, tex_ret); 4851 if (switch_info->params.img_op != LP_IMG_STORE) { 4852 for (unsigned i = 0; i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) 4853 tex_ret[i] = LLVMBuildBitCast(gallivm->builder, tex_ret[i], lp_build_vec_type(gallivm, switch_info->params.type), ""); 4854 4855 this_block = LLVMGetInsertBlock(gallivm->builder); 4856 for (unsigned i = 0; i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) { 4857 LLVMAddIncoming(switch_info->phi[i], &tex_ret[i], &this_block, 1); 4858 } 4859 } 4860 LLVMBuildBr(gallivm->builder, switch_info->merge_ref); 4861} 4862 4863void lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch *switch_info) 4864{ 4865 struct gallivm_state *gallivm = switch_info->gallivm; 4866 4867 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref); 4868 4869 if (switch_info->params.img_op != LP_IMG_STORE) { 4870 for (unsigned i = 0; i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) { 4871 switch_info->params.outdata[i] = switch_info->phi[i]; 4872 } 4873 } 4874} 4875