nir_lower_tex.c revision 01e04c3f
1/* 2 * Copyright © 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/* 25 * This lowering pass supports (as configured via nir_lower_tex_options) 26 * various texture related conversions: 27 * + texture projector lowering: converts the coordinate division for 28 * texture projection to be done in ALU instructions instead of 29 * asking the texture operation to do so. 30 * + lowering RECT: converts the un-normalized RECT texture coordinates 31 * to normalized coordinates with txs plus ALU instructions 32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes, 33 * inserts instructions to clamp specified coordinates to [0.0, 1.0]. 34 * Note that this automatically triggers texture projector lowering if 35 * needed, since clamping must happen after projector lowering. 36 */ 37 38#include "nir.h" 39#include "nir_builder.h" 40#include "nir_format_convert.h" 41 42static void 43project_src(nir_builder *b, nir_tex_instr *tex) 44{ 45 /* Find the projector in the srcs list, if present. */ 46 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector); 47 if (proj_index < 0) 48 return; 49 50 b->cursor = nir_before_instr(&tex->instr); 51 52 nir_ssa_def *inv_proj = 53 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); 54 55 /* Walk through the sources projecting the arguments. */ 56 for (unsigned i = 0; i < tex->num_srcs; i++) { 57 switch (tex->src[i].src_type) { 58 case nir_tex_src_coord: 59 case nir_tex_src_comparator: 60 break; 61 default: 62 continue; 63 } 64 nir_ssa_def *unprojected = 65 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); 66 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); 67 68 /* Array indices don't get projected, so make an new vector with the 69 * coordinate's array index untouched. 70 */ 71 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { 72 switch (tex->coord_components) { 73 case 4: 74 projected = nir_vec4(b, 75 nir_channel(b, projected, 0), 76 nir_channel(b, projected, 1), 77 nir_channel(b, projected, 2), 78 nir_channel(b, unprojected, 3)); 79 break; 80 case 3: 81 projected = nir_vec3(b, 82 nir_channel(b, projected, 0), 83 nir_channel(b, projected, 1), 84 nir_channel(b, unprojected, 2)); 85 break; 86 case 2: 87 projected = nir_vec2(b, 88 nir_channel(b, projected, 0), 89 nir_channel(b, unprojected, 1)); 90 break; 91 default: 92 unreachable("bad texture coord count for array"); 93 break; 94 } 95 } 96 97 nir_instr_rewrite_src(&tex->instr, 98 &tex->src[i].src, 99 nir_src_for_ssa(projected)); 100 } 101 102 nir_tex_instr_remove_src(tex, proj_index); 103} 104 105static nir_ssa_def * 106get_texture_size(nir_builder *b, nir_tex_instr *tex) 107{ 108 b->cursor = nir_before_instr(&tex->instr); 109 110 nir_tex_instr *txs; 111 112 unsigned num_srcs = 1; /* One for the LOD */ 113 for (unsigned i = 0; i < tex->num_srcs; i++) { 114 if (tex->src[i].src_type == nir_tex_src_texture_deref || 115 tex->src[i].src_type == nir_tex_src_sampler_deref || 116 tex->src[i].src_type == nir_tex_src_texture_offset || 117 tex->src[i].src_type == nir_tex_src_sampler_offset) 118 num_srcs++; 119 } 120 121 txs = nir_tex_instr_create(b->shader, num_srcs); 122 txs->op = nir_texop_txs; 123 txs->sampler_dim = tex->sampler_dim; 124 txs->is_array = tex->is_array; 125 txs->is_shadow = tex->is_shadow; 126 txs->is_new_style_shadow = tex->is_new_style_shadow; 127 txs->texture_index = tex->texture_index; 128 txs->sampler_index = tex->sampler_index; 129 txs->dest_type = nir_type_int; 130 131 unsigned idx = 0; 132 for (unsigned i = 0; i < tex->num_srcs; i++) { 133 if (tex->src[i].src_type == nir_tex_src_texture_deref || 134 tex->src[i].src_type == nir_tex_src_sampler_deref || 135 tex->src[i].src_type == nir_tex_src_texture_offset || 136 tex->src[i].src_type == nir_tex_src_sampler_offset) { 137 nir_src_copy(&txs->src[idx].src, &tex->src[i].src, txs); 138 txs->src[idx].src_type = tex->src[i].src_type; 139 idx++; 140 } 141 } 142 /* Add in an LOD because some back-ends require it */ 143 txs->src[idx].src = nir_src_for_ssa(nir_imm_int(b, 0)); 144 txs->src[idx].src_type = nir_tex_src_lod; 145 146 nir_ssa_dest_init(&txs->instr, &txs->dest, 147 nir_tex_instr_dest_size(txs), 32, NULL); 148 nir_builder_instr_insert(b, &txs->instr); 149 150 return nir_i2f32(b, &txs->dest.ssa); 151} 152 153static bool 154lower_offset(nir_builder *b, nir_tex_instr *tex) 155{ 156 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); 157 if (offset_index < 0) 158 return false; 159 160 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 161 assert(coord_index >= 0); 162 163 assert(tex->src[offset_index].src.is_ssa); 164 assert(tex->src[coord_index].src.is_ssa); 165 nir_ssa_def *offset = tex->src[offset_index].src.ssa; 166 nir_ssa_def *coord = tex->src[coord_index].src.ssa; 167 168 b->cursor = nir_before_instr(&tex->instr); 169 170 nir_ssa_def *offset_coord; 171 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) { 172 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 173 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset)); 174 } else { 175 nir_ssa_def *txs = get_texture_size(b, tex); 176 nir_ssa_def *scale = nir_frcp(b, txs); 177 178 offset_coord = nir_fadd(b, coord, 179 nir_fmul(b, 180 nir_i2f32(b, offset), 181 scale)); 182 } 183 } else { 184 offset_coord = nir_iadd(b, coord, offset); 185 } 186 187 if (tex->is_array) { 188 /* The offset is not applied to the array index */ 189 if (tex->coord_components == 2) { 190 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0), 191 nir_channel(b, coord, 1)); 192 } else if (tex->coord_components == 3) { 193 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0), 194 nir_channel(b, offset_coord, 1), 195 nir_channel(b, coord, 2)); 196 } else { 197 unreachable("Invalid number of components"); 198 } 199 } 200 201 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src, 202 nir_src_for_ssa(offset_coord)); 203 204 nir_tex_instr_remove_src(tex, offset_index); 205 206 return true; 207} 208 209static void 210lower_rect(nir_builder *b, nir_tex_instr *tex) 211{ 212 nir_ssa_def *txs = get_texture_size(b, tex); 213 nir_ssa_def *scale = nir_frcp(b, txs); 214 215 /* Walk through the sources normalizing the requested arguments. */ 216 for (unsigned i = 0; i < tex->num_srcs; i++) { 217 if (tex->src[i].src_type != nir_tex_src_coord) 218 continue; 219 220 nir_ssa_def *coords = 221 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components); 222 nir_instr_rewrite_src(&tex->instr, 223 &tex->src[i].src, 224 nir_src_for_ssa(nir_fmul(b, coords, scale))); 225 } 226 227 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 228} 229 230static nir_ssa_def * 231sample_plane(nir_builder *b, nir_tex_instr *tex, int plane) 232{ 233 assert(tex->dest.is_ssa); 234 assert(nir_tex_instr_dest_size(tex) == 4); 235 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 236 assert(tex->op == nir_texop_tex); 237 assert(tex->coord_components == 2); 238 239 nir_tex_instr *plane_tex = 240 nir_tex_instr_create(b->shader, tex->num_srcs + 1); 241 for (unsigned i = 0; i < tex->num_srcs; i++) { 242 nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src, plane_tex); 243 plane_tex->src[i].src_type = tex->src[i].src_type; 244 } 245 plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane)); 246 plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane; 247 plane_tex->op = nir_texop_tex; 248 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 249 plane_tex->dest_type = nir_type_float; 250 plane_tex->coord_components = 2; 251 252 plane_tex->texture_index = tex->texture_index; 253 plane_tex->sampler_index = tex->sampler_index; 254 255 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 32, NULL); 256 257 nir_builder_instr_insert(b, &plane_tex->instr); 258 259 return &plane_tex->dest.ssa; 260} 261 262static void 263convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, 264 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v) 265{ 266 nir_const_value m[3] = { 267 { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, 268 { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, 269 { .f32 = { 1.0f, 2.01723214f, 0.0f, 0.0f } } 270 }; 271 272 nir_ssa_def *yuv = 273 nir_vec4(b, 274 nir_fmul(b, nir_imm_float(b, 1.16438356f), 275 nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))), 276 nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0), 277 nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0), 278 nir_imm_float(b, 0.0)); 279 280 nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); 281 nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); 282 nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); 283 284 nir_ssa_def *result = nir_vec4(b, red, green, blue, nir_imm_float(b, 1.0f)); 285 286 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result)); 287} 288 289static void 290lower_y_uv_external(nir_builder *b, nir_tex_instr *tex) 291{ 292 b->cursor = nir_after_instr(&tex->instr); 293 294 nir_ssa_def *y = sample_plane(b, tex, 0); 295 nir_ssa_def *uv = sample_plane(b, tex, 1); 296 297 convert_yuv_to_rgb(b, tex, 298 nir_channel(b, y, 0), 299 nir_channel(b, uv, 0), 300 nir_channel(b, uv, 1)); 301} 302 303static void 304lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex) 305{ 306 b->cursor = nir_after_instr(&tex->instr); 307 308 nir_ssa_def *y = sample_plane(b, tex, 0); 309 nir_ssa_def *u = sample_plane(b, tex, 1); 310 nir_ssa_def *v = sample_plane(b, tex, 2); 311 312 convert_yuv_to_rgb(b, tex, 313 nir_channel(b, y, 0), 314 nir_channel(b, u, 0), 315 nir_channel(b, v, 0)); 316} 317 318static void 319lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex) 320{ 321 b->cursor = nir_after_instr(&tex->instr); 322 323 nir_ssa_def *y = sample_plane(b, tex, 0); 324 nir_ssa_def *xuxv = sample_plane(b, tex, 1); 325 326 convert_yuv_to_rgb(b, tex, 327 nir_channel(b, y, 0), 328 nir_channel(b, xuxv, 1), 329 nir_channel(b, xuxv, 3)); 330} 331 332static void 333lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex) 334{ 335 b->cursor = nir_after_instr(&tex->instr); 336 337 nir_ssa_def *y = sample_plane(b, tex, 0); 338 nir_ssa_def *uxvx = sample_plane(b, tex, 1); 339 340 convert_yuv_to_rgb(b, tex, 341 nir_channel(b, y, 1), 342 nir_channel(b, uxvx, 0), 343 nir_channel(b, uxvx, 2)); 344} 345 346/* 347 * Emits a textureLod operation used to replace an existing 348 * textureGrad instruction. 349 */ 350static void 351replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex) 352{ 353 /* We are going to emit a textureLod() with the same parameters except that 354 * we replace ddx/ddy with lod. 355 */ 356 int num_srcs = tex->num_srcs - 1; 357 nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs); 358 359 txl->op = nir_texop_txl; 360 txl->sampler_dim = tex->sampler_dim; 361 txl->texture_index = tex->texture_index; 362 txl->dest_type = tex->dest_type; 363 txl->is_array = tex->is_array; 364 txl->is_shadow = tex->is_shadow; 365 txl->is_new_style_shadow = tex->is_new_style_shadow; 366 txl->sampler_index = tex->sampler_index; 367 txl->coord_components = tex->coord_components; 368 369 nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL); 370 371 int src_num = 0; 372 for (int i = 0; i < tex->num_srcs; i++) { 373 if (tex->src[i].src_type == nir_tex_src_ddx || 374 tex->src[i].src_type == nir_tex_src_ddy) 375 continue; 376 nir_src_copy(&txl->src[src_num].src, &tex->src[i].src, txl); 377 txl->src[src_num].src_type = tex->src[i].src_type; 378 src_num++; 379 } 380 381 txl->src[src_num].src = nir_src_for_ssa(lod); 382 txl->src[src_num].src_type = nir_tex_src_lod; 383 src_num++; 384 385 assert(src_num == num_srcs); 386 387 nir_ssa_dest_init(&txl->instr, &txl->dest, 388 tex->dest.ssa.num_components, 32, NULL); 389 nir_builder_instr_insert(b, &txl->instr); 390 391 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(&txl->dest.ssa)); 392 393 nir_instr_remove(&tex->instr); 394} 395 396static void 397lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex) 398{ 399 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE); 400 assert(tex->op == nir_texop_txd); 401 assert(tex->dest.is_ssa); 402 403 /* Use textureSize() to get the width and height of LOD 0 */ 404 nir_ssa_def *size = get_texture_size(b, tex); 405 406 /* Cubemap texture lookups first generate a texture coordinate normalized 407 * to [-1, 1] on the appropiate face. The appropiate face is determined 408 * by which component has largest magnitude and its sign. The texture 409 * coordinate is the quotient of the remaining texture coordinates against 410 * that absolute value of the component of largest magnitude. This 411 * division requires that the computing of the derivative of the texel 412 * coordinate must use the quotient rule. The high level GLSL code is as 413 * follows: 414 * 415 * Step 1: selection 416 * 417 * vec3 abs_p, Q, dQdx, dQdy; 418 * abs_p = abs(ir->coordinate); 419 * if (abs_p.x >= max(abs_p.y, abs_p.z)) { 420 * Q = ir->coordinate.yzx; 421 * dQdx = ir->lod_info.grad.dPdx.yzx; 422 * dQdy = ir->lod_info.grad.dPdy.yzx; 423 * } 424 * if (abs_p.y >= max(abs_p.x, abs_p.z)) { 425 * Q = ir->coordinate.xzy; 426 * dQdx = ir->lod_info.grad.dPdx.xzy; 427 * dQdy = ir->lod_info.grad.dPdy.xzy; 428 * } 429 * if (abs_p.z >= max(abs_p.x, abs_p.y)) { 430 * Q = ir->coordinate; 431 * dQdx = ir->lod_info.grad.dPdx; 432 * dQdy = ir->lod_info.grad.dPdy; 433 * } 434 * 435 * Step 2: use quotient rule to compute derivative. The normalized to 436 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are 437 * only concerned with the magnitudes of the derivatives whose values are 438 * not affected by the sign. We drop the sign from the computation. 439 * 440 * vec2 dx, dy; 441 * float recip; 442 * 443 * recip = 1.0 / Q.z; 444 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) ); 445 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) ); 446 * 447 * Step 3: compute LOD. At this point we have the derivatives of the 448 * texture coordinates normalized to [-1,1]. We take the LOD to be 449 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L) 450 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L) 451 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L) 452 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy)))) 453 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy))) 454 * where L is the dimension of the cubemap. The code is: 455 * 456 * float M, result; 457 * M = max(dot(dx, dx), dot(dy, dy)); 458 * L = textureSize(sampler, 0).x; 459 * result = -1.0 + 0.5 * log2(L * L * M); 460 */ 461 462 /* coordinate */ 463 nir_ssa_def *p = 464 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa; 465 466 /* unmodified dPdx, dPdy values */ 467 nir_ssa_def *dPdx = 468 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 469 nir_ssa_def *dPdy = 470 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 471 472 nir_ssa_def *abs_p = nir_fabs(b, p); 473 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0); 474 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1); 475 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2); 476 477 /* 1. compute selector */ 478 nir_ssa_def *Q, *dQdx, *dQdy; 479 480 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y)); 481 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z)); 482 483 unsigned yzx[3] = { 1, 2, 0 }; 484 unsigned xzy[3] = { 0, 2, 1 }; 485 486 Q = nir_bcsel(b, cond_z, 487 p, 488 nir_bcsel(b, cond_y, 489 nir_swizzle(b, p, xzy, 3, false), 490 nir_swizzle(b, p, yzx, 3, false))); 491 492 dQdx = nir_bcsel(b, cond_z, 493 dPdx, 494 nir_bcsel(b, cond_y, 495 nir_swizzle(b, dPdx, xzy, 3, false), 496 nir_swizzle(b, dPdx, yzx, 3, false))); 497 498 dQdy = nir_bcsel(b, cond_z, 499 dPdy, 500 nir_bcsel(b, cond_y, 501 nir_swizzle(b, dPdy, xzy, 3, false), 502 nir_swizzle(b, dPdy, yzx, 3, false))); 503 504 /* 2. quotient rule */ 505 506 /* tmp = Q.xy * recip; 507 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) ); 508 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) ); 509 */ 510 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2)); 511 512 nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3); 513 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z); 514 515 nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3); 516 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2); 517 nir_ssa_def *dx = 518 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z))); 519 520 nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3); 521 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2); 522 nir_ssa_def *dy = 523 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z))); 524 525 /* M = max(dot(dx, dx), dot(dy, dy)); */ 526 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy)); 527 528 /* size has textureSize() of LOD 0 */ 529 nir_ssa_def *L = nir_channel(b, size, 0); 530 531 /* lod = -1.0 + 0.5 * log2(L * L * M); */ 532 nir_ssa_def *lod = 533 nir_fadd(b, 534 nir_imm_float(b, -1.0f), 535 nir_fmul(b, 536 nir_imm_float(b, 0.5f), 537 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M))))); 538 539 /* 3. Replace the gradient instruction with an equivalent lod instruction */ 540 replace_gradient_with_lod(b, lod, tex); 541} 542 543static void 544lower_gradient(nir_builder *b, nir_tex_instr *tex) 545{ 546 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE); 547 assert(tex->op == nir_texop_txd); 548 assert(tex->dest.is_ssa); 549 550 /* Use textureSize() to get the width and height of LOD 0 */ 551 unsigned component_mask; 552 switch (tex->sampler_dim) { 553 case GLSL_SAMPLER_DIM_3D: 554 component_mask = 7; 555 break; 556 case GLSL_SAMPLER_DIM_1D: 557 component_mask = 1; 558 break; 559 default: 560 component_mask = 3; 561 break; 562 } 563 564 nir_ssa_def *size = 565 nir_channels(b, get_texture_size(b, tex), component_mask); 566 567 /* Scale the gradients by width and height. Effectively, the incoming 568 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the 569 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y). 570 */ 571 nir_ssa_def *ddx = 572 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 573 nir_ssa_def *ddy = 574 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 575 576 nir_ssa_def *dPdx = nir_fmul(b, ddx, size); 577 nir_ssa_def *dPdy = nir_fmul(b, ddy, size); 578 579 nir_ssa_def *rho; 580 if (dPdx->num_components == 1) { 581 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy)); 582 } else { 583 rho = nir_fmax(b, 584 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)), 585 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy))); 586 } 587 588 /* lod = log2(rho). We're ignoring GL state biases for now. */ 589 nir_ssa_def *lod = nir_flog2(b, rho); 590 591 /* Replace the gradient instruction with an equivalent lod instruction */ 592 replace_gradient_with_lod(b, lod, tex); 593} 594 595static void 596saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) 597{ 598 b->cursor = nir_before_instr(&tex->instr); 599 600 /* Walk through the sources saturating the requested arguments. */ 601 for (unsigned i = 0; i < tex->num_srcs; i++) { 602 if (tex->src[i].src_type != nir_tex_src_coord) 603 continue; 604 605 nir_ssa_def *src = 606 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components); 607 608 /* split src into components: */ 609 nir_ssa_def *comp[4]; 610 611 assume(tex->coord_components >= 1); 612 613 for (unsigned j = 0; j < tex->coord_components; j++) 614 comp[j] = nir_channel(b, src, j); 615 616 /* clamp requested components, array index does not get clamped: */ 617 unsigned ncomp = tex->coord_components; 618 if (tex->is_array) 619 ncomp--; 620 621 for (unsigned j = 0; j < ncomp; j++) { 622 if ((1 << j) & sat_mask) { 623 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 624 /* non-normalized texture coords, so clamp to texture 625 * size rather than [0.0, 1.0] 626 */ 627 nir_ssa_def *txs = get_texture_size(b, tex); 628 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0)); 629 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j)); 630 } else { 631 comp[j] = nir_fsat(b, comp[j]); 632 } 633 } 634 } 635 636 /* and move the result back into a single vecN: */ 637 src = nir_vec(b, comp, tex->coord_components); 638 639 nir_instr_rewrite_src(&tex->instr, 640 &tex->src[i].src, 641 nir_src_for_ssa(src)); 642 } 643} 644 645static nir_ssa_def * 646get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) 647{ 648 nir_const_value v; 649 650 memset(&v, 0, sizeof(v)); 651 652 if (swizzle_val == 4) { 653 v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 0; 654 } else { 655 assert(swizzle_val == 5); 656 if (type == nir_type_float) 657 v.f32[0] = v.f32[1] = v.f32[2] = v.f32[3] = 1.0; 658 else 659 v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 1; 660 } 661 662 return nir_build_imm(b, 4, 32, v); 663} 664 665static void 666swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) 667{ 668 assert(tex->dest.is_ssa); 669 670 b->cursor = nir_after_instr(&tex->instr); 671 672 nir_ssa_def *swizzled; 673 if (tex->op == nir_texop_tg4) { 674 if (swizzle[tex->component] < 4) { 675 /* This one's easy */ 676 tex->component = swizzle[tex->component]; 677 return; 678 } else { 679 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]); 680 } 681 } else { 682 assert(nir_tex_instr_dest_size(tex) == 4); 683 if (swizzle[0] < 4 && swizzle[1] < 4 && 684 swizzle[2] < 4 && swizzle[3] < 4) { 685 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; 686 /* We have no 0s or 1s, just emit a swizzling MOV */ 687 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false); 688 } else { 689 nir_ssa_def *srcs[4]; 690 for (unsigned i = 0; i < 4; i++) { 691 if (swizzle[i] < 4) { 692 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]); 693 } else { 694 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]); 695 } 696 } 697 swizzled = nir_vec(b, srcs, 4); 698 } 699 } 700 701 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled), 702 swizzled->parent_instr); 703} 704 705static void 706linearize_srgb_result(nir_builder *b, nir_tex_instr *tex) 707{ 708 assert(tex->dest.is_ssa); 709 assert(nir_tex_instr_dest_size(tex) == 4); 710 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 711 712 b->cursor = nir_after_instr(&tex->instr); 713 714 nir_ssa_def *rgb = 715 nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7)); 716 717 /* alpha is untouched: */ 718 nir_ssa_def *result = nir_vec4(b, 719 nir_channel(b, rgb, 0), 720 nir_channel(b, rgb, 1), 721 nir_channel(b, rgb, 2), 722 nir_channel(b, &tex->dest.ssa, 3)); 723 724 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result), 725 result->parent_instr); 726} 727 728static bool 729nir_lower_tex_block(nir_block *block, nir_builder *b, 730 const nir_lower_tex_options *options) 731{ 732 bool progress = false; 733 734 nir_foreach_instr_safe(instr, block) { 735 if (instr->type != nir_instr_type_tex) 736 continue; 737 738 nir_tex_instr *tex = nir_instr_as_tex(instr); 739 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim)); 740 741 /* mask of src coords to saturate (clamp): */ 742 unsigned sat_mask = 0; 743 744 if ((1 << tex->sampler_index) & options->saturate_r) 745 sat_mask |= (1 << 2); /* .z */ 746 if ((1 << tex->sampler_index) & options->saturate_t) 747 sat_mask |= (1 << 1); /* .y */ 748 if ((1 << tex->sampler_index) & options->saturate_s) 749 sat_mask |= (1 << 0); /* .x */ 750 751 /* If we are clamping any coords, we must lower projector first 752 * as clamping happens *after* projection: 753 */ 754 if (lower_txp || sat_mask) { 755 project_src(b, tex); 756 progress = true; 757 } 758 759 if ((tex->op == nir_texop_txf && options->lower_txf_offset) || 760 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) || 761 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT && 762 options->lower_rect_offset)) { 763 progress = lower_offset(b, tex) || progress; 764 } 765 766 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) { 767 lower_rect(b, tex); 768 progress = true; 769 } 770 771 if ((1 << tex->texture_index) & options->lower_y_uv_external) { 772 lower_y_uv_external(b, tex); 773 progress = true; 774 } 775 776 if ((1 << tex->texture_index) & options->lower_y_u_v_external) { 777 lower_y_u_v_external(b, tex); 778 progress = true; 779 } 780 781 if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) { 782 lower_yx_xuxv_external(b, tex); 783 progress = true; 784 } 785 786 if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) { 787 lower_xy_uxvx_external(b, tex); 788 progress = true; 789 } 790 791 if (sat_mask) { 792 saturate_src(b, tex, sat_mask); 793 progress = true; 794 } 795 796 if (((1 << tex->texture_index) & options->swizzle_result) && 797 !nir_tex_instr_is_query(tex) && 798 !(tex->is_shadow && tex->is_new_style_shadow)) { 799 swizzle_result(b, tex, options->swizzles[tex->texture_index]); 800 progress = true; 801 } 802 803 /* should be after swizzle so we know which channels are rgb: */ 804 if (((1 << tex->texture_index) & options->lower_srgb) && 805 !nir_tex_instr_is_query(tex) && !tex->is_shadow) { 806 linearize_srgb_result(b, tex); 807 progress = true; 808 } 809 810 if (tex->op == nir_texop_txd && 811 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && 812 (options->lower_txd || 813 options->lower_txd_cube_map || 814 (tex->is_shadow && options->lower_txd_shadow))) { 815 lower_gradient_cube_map(b, tex); 816 progress = true; 817 continue; 818 } 819 820 if (tex->op == nir_texop_txd && 821 (options->lower_txd || 822 (options->lower_txd_shadow && 823 tex->is_shadow && tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE))) { 824 lower_gradient(b, tex); 825 progress = true; 826 continue; 827 } 828 829 /* TXF, TXS and TXL require a LOD but not everything we implement using those 830 * three opcodes provides one. Provide a default LOD of 0. 831 */ 832 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) && 833 (tex->op == nir_texop_txf || tex->op == nir_texop_txs || 834 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels || 835 (tex->op == nir_texop_tex && 836 b->shader->info.stage != MESA_SHADER_FRAGMENT))) { 837 b->cursor = nir_before_instr(&tex->instr); 838 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0))); 839 progress = true; 840 continue; 841 } 842 } 843 844 return progress; 845} 846 847static bool 848nir_lower_tex_impl(nir_function_impl *impl, 849 const nir_lower_tex_options *options) 850{ 851 bool progress = false; 852 nir_builder builder; 853 nir_builder_init(&builder, impl); 854 855 nir_foreach_block(block, impl) { 856 progress |= nir_lower_tex_block(block, &builder, options); 857 } 858 859 nir_metadata_preserve(impl, nir_metadata_block_index | 860 nir_metadata_dominance); 861 return progress; 862} 863 864bool 865nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options) 866{ 867 bool progress = false; 868 869 nir_foreach_function(function, shader) { 870 if (function->impl) 871 progress |= nir_lower_tex_impl(function->impl, options); 872 } 873 874 return progress; 875} 876