1/*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/*
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 *   + texture projector lowering: converts the coordinate division for
28 *     texture projection to be done in ALU instructions instead of
29 *     asking the texture operation to do so.
30 *   + lowering RECT: converts the un-normalized RECT texture coordinates
31 *     to normalized coordinates with txs plus ALU instructions
32 *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 *     Note that this automatically triggers texture projector lowering if
35 *     needed, since clamping must happen after projector lowering.
36 */
37
38#include "nir.h"
39#include "nir_builder.h"
40#include "nir_format_convert.h"
41
42static void
43project_src(nir_builder *b, nir_tex_instr *tex)
44{
45   /* Find the projector in the srcs list, if present. */
46   int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
47   if (proj_index < 0)
48      return;
49
50   b->cursor = nir_before_instr(&tex->instr);
51
52   nir_ssa_def *inv_proj =
53      nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
54
55   /* Walk through the sources projecting the arguments. */
56   for (unsigned i = 0; i < tex->num_srcs; i++) {
57      switch (tex->src[i].src_type) {
58      case nir_tex_src_coord:
59      case nir_tex_src_comparator:
60         break;
61      default:
62         continue;
63      }
64      nir_ssa_def *unprojected =
65         nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
66      nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
67
68      /* Array indices don't get projected, so make an new vector with the
69       * coordinate's array index untouched.
70       */
71      if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
72         switch (tex->coord_components) {
73         case 4:
74            projected = nir_vec4(b,
75                                 nir_channel(b, projected, 0),
76                                 nir_channel(b, projected, 1),
77                                 nir_channel(b, projected, 2),
78                                 nir_channel(b, unprojected, 3));
79            break;
80         case 3:
81            projected = nir_vec3(b,
82                                 nir_channel(b, projected, 0),
83                                 nir_channel(b, projected, 1),
84                                 nir_channel(b, unprojected, 2));
85            break;
86         case 2:
87            projected = nir_vec2(b,
88                                 nir_channel(b, projected, 0),
89                                 nir_channel(b, unprojected, 1));
90            break;
91         default:
92            unreachable("bad texture coord count for array");
93            break;
94         }
95      }
96
97      nir_instr_rewrite_src(&tex->instr,
98                            &tex->src[i].src,
99                            nir_src_for_ssa(projected));
100   }
101
102   nir_tex_instr_remove_src(tex, proj_index);
103}
104
105static nir_ssa_def *
106get_texture_size(nir_builder *b, nir_tex_instr *tex)
107{
108   b->cursor = nir_before_instr(&tex->instr);
109
110   nir_tex_instr *txs;
111
112   unsigned num_srcs = 1; /* One for the LOD */
113   for (unsigned i = 0; i < tex->num_srcs; i++) {
114      if (tex->src[i].src_type == nir_tex_src_texture_deref ||
115          tex->src[i].src_type == nir_tex_src_sampler_deref ||
116          tex->src[i].src_type == nir_tex_src_texture_offset ||
117          tex->src[i].src_type == nir_tex_src_sampler_offset ||
118          tex->src[i].src_type == nir_tex_src_texture_handle ||
119          tex->src[i].src_type == nir_tex_src_sampler_handle)
120         num_srcs++;
121   }
122
123   txs = nir_tex_instr_create(b->shader, num_srcs);
124   txs->op = nir_texop_txs;
125   txs->sampler_dim = tex->sampler_dim;
126   txs->is_array = tex->is_array;
127   txs->is_shadow = tex->is_shadow;
128   txs->is_new_style_shadow = tex->is_new_style_shadow;
129   txs->texture_index = tex->texture_index;
130   txs->sampler_index = tex->sampler_index;
131   txs->dest_type = nir_type_int;
132
133   unsigned idx = 0;
134   for (unsigned i = 0; i < tex->num_srcs; i++) {
135      if (tex->src[i].src_type == nir_tex_src_texture_deref ||
136          tex->src[i].src_type == nir_tex_src_sampler_deref ||
137          tex->src[i].src_type == nir_tex_src_texture_offset ||
138          tex->src[i].src_type == nir_tex_src_sampler_offset ||
139          tex->src[i].src_type == nir_tex_src_texture_handle ||
140          tex->src[i].src_type == nir_tex_src_sampler_handle) {
141         nir_src_copy(&txs->src[idx].src, &tex->src[i].src, txs);
142         txs->src[idx].src_type = tex->src[i].src_type;
143         idx++;
144      }
145   }
146   /* Add in an LOD because some back-ends require it */
147   txs->src[idx].src = nir_src_for_ssa(nir_imm_int(b, 0));
148   txs->src[idx].src_type = nir_tex_src_lod;
149
150   nir_ssa_dest_init(&txs->instr, &txs->dest,
151                     nir_tex_instr_dest_size(txs), 32, NULL);
152   nir_builder_instr_insert(b, &txs->instr);
153
154   return nir_i2f32(b, &txs->dest.ssa);
155}
156
157static nir_ssa_def *
158get_texture_lod(nir_builder *b, nir_tex_instr *tex)
159{
160   b->cursor = nir_before_instr(&tex->instr);
161
162   nir_tex_instr *tql;
163
164   unsigned num_srcs = 0;
165   for (unsigned i = 0; i < tex->num_srcs; i++) {
166      if (tex->src[i].src_type == nir_tex_src_coord ||
167          tex->src[i].src_type == nir_tex_src_texture_deref ||
168          tex->src[i].src_type == nir_tex_src_sampler_deref ||
169          tex->src[i].src_type == nir_tex_src_texture_offset ||
170          tex->src[i].src_type == nir_tex_src_sampler_offset ||
171          tex->src[i].src_type == nir_tex_src_texture_handle ||
172          tex->src[i].src_type == nir_tex_src_sampler_handle)
173         num_srcs++;
174   }
175
176   tql = nir_tex_instr_create(b->shader, num_srcs);
177   tql->op = nir_texop_lod;
178   tql->coord_components = tex->coord_components;
179   tql->sampler_dim = tex->sampler_dim;
180   tql->is_array = tex->is_array;
181   tql->is_shadow = tex->is_shadow;
182   tql->is_new_style_shadow = tex->is_new_style_shadow;
183   tql->texture_index = tex->texture_index;
184   tql->sampler_index = tex->sampler_index;
185   tql->dest_type = nir_type_float;
186
187   unsigned idx = 0;
188   for (unsigned i = 0; i < tex->num_srcs; i++) {
189      if (tex->src[i].src_type == nir_tex_src_coord ||
190          tex->src[i].src_type == nir_tex_src_texture_deref ||
191          tex->src[i].src_type == nir_tex_src_sampler_deref ||
192          tex->src[i].src_type == nir_tex_src_texture_offset ||
193          tex->src[i].src_type == nir_tex_src_sampler_offset ||
194          tex->src[i].src_type == nir_tex_src_texture_handle ||
195          tex->src[i].src_type == nir_tex_src_sampler_handle) {
196         nir_src_copy(&tql->src[idx].src, &tex->src[i].src, tql);
197         tql->src[idx].src_type = tex->src[i].src_type;
198         idx++;
199      }
200   }
201
202   nir_ssa_dest_init(&tql->instr, &tql->dest, 2, 32, NULL);
203   nir_builder_instr_insert(b, &tql->instr);
204
205   /* The LOD is the y component of the result */
206   return nir_channel(b, &tql->dest.ssa, 1);
207}
208
209static bool
210lower_offset(nir_builder *b, nir_tex_instr *tex)
211{
212   int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
213   if (offset_index < 0)
214      return false;
215
216   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
217   assert(coord_index >= 0);
218
219   assert(tex->src[offset_index].src.is_ssa);
220   assert(tex->src[coord_index].src.is_ssa);
221   nir_ssa_def *offset = tex->src[offset_index].src.ssa;
222   nir_ssa_def *coord = tex->src[coord_index].src.ssa;
223
224   b->cursor = nir_before_instr(&tex->instr);
225
226   nir_ssa_def *offset_coord;
227   if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
228      if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
229         offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
230      } else {
231         nir_ssa_def *txs = get_texture_size(b, tex);
232         nir_ssa_def *scale = nir_frcp(b, txs);
233
234         offset_coord = nir_fadd(b, coord,
235                                 nir_fmul(b,
236                                          nir_i2f32(b, offset),
237                                          scale));
238      }
239   } else {
240      offset_coord = nir_iadd(b, coord, offset);
241   }
242
243   if (tex->is_array) {
244      /* The offset is not applied to the array index */
245      if (tex->coord_components == 2) {
246         offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
247                                    nir_channel(b, coord, 1));
248      } else if (tex->coord_components == 3) {
249         offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
250                                    nir_channel(b, offset_coord, 1),
251                                    nir_channel(b, coord, 2));
252      } else {
253         unreachable("Invalid number of components");
254      }
255   }
256
257   nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
258                         nir_src_for_ssa(offset_coord));
259
260   nir_tex_instr_remove_src(tex, offset_index);
261
262   return true;
263}
264
265static void
266lower_rect(nir_builder *b, nir_tex_instr *tex)
267{
268   nir_ssa_def *txs = get_texture_size(b, tex);
269   nir_ssa_def *scale = nir_frcp(b, txs);
270
271   /* Walk through the sources normalizing the requested arguments. */
272   for (unsigned i = 0; i < tex->num_srcs; i++) {
273      if (tex->src[i].src_type != nir_tex_src_coord)
274         continue;
275
276      nir_ssa_def *coords =
277         nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
278      nir_instr_rewrite_src(&tex->instr,
279                            &tex->src[i].src,
280                            nir_src_for_ssa(nir_fmul(b, coords, scale)));
281   }
282
283   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
284}
285
286static void
287lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
288{
289   assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
290   assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
291   assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
292   assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
293
294   b->cursor = nir_before_instr(&tex->instr);
295
296   nir_ssa_def *lod = get_texture_lod(b, tex);
297
298   int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
299   if (bias_idx >= 0) {
300      /* If we have a bias, add it in */
301      lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
302      nir_tex_instr_remove_src(tex, bias_idx);
303   }
304
305   int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
306   if (min_lod_idx >= 0) {
307      /* If we have a minimum LOD, clamp LOD accordingly */
308      lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
309      nir_tex_instr_remove_src(tex, min_lod_idx);
310   }
311
312   nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
313   tex->op = nir_texop_txl;
314}
315
316static nir_ssa_def *
317sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
318             const nir_lower_tex_options *options)
319{
320   assert(tex->dest.is_ssa);
321   assert(nir_tex_instr_dest_size(tex) == 4);
322   assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
323   assert(tex->op == nir_texop_tex);
324   assert(tex->coord_components == 2);
325
326   nir_tex_instr *plane_tex =
327      nir_tex_instr_create(b->shader, tex->num_srcs + 1);
328   for (unsigned i = 0; i < tex->num_srcs; i++) {
329      nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src, plane_tex);
330      plane_tex->src[i].src_type = tex->src[i].src_type;
331   }
332   plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
333   plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
334   plane_tex->op = nir_texop_tex;
335   plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
336   plane_tex->dest_type = nir_type_float;
337   plane_tex->coord_components = 2;
338
339   plane_tex->texture_index = tex->texture_index;
340   plane_tex->sampler_index = tex->sampler_index;
341
342   nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 32, NULL);
343
344   nir_builder_instr_insert(b, &plane_tex->instr);
345
346   /* If scaling_factor is set, return a scaled value. */
347   if (options->scale_factors[tex->texture_index])
348      return nir_fmul_imm(b, &plane_tex->dest.ssa,
349                          options->scale_factors[tex->texture_index]);
350
351   return &plane_tex->dest.ssa;
352}
353
354static void
355convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
356                   nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
357                   nir_ssa_def *a)
358{
359   nir_const_value m[3][4] = {
360      { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 0.0f } },
361      { { .f32 = 0.0f        }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f }, { .f32 = 0.0f } },
362      { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f        }, { .f32 = 0.0f } },
363   };
364
365   nir_ssa_def *offset =
366      nir_vec4(b,
367               nir_imm_float(b, -0.874202214f),
368               nir_imm_float(b,  0.531667820f),
369               nir_imm_float(b, -1.085630787f),
370               a);
371
372   nir_ssa_def *result =
373      nir_ffma(b, y, nir_build_imm(b, 4, 32, m[0]),
374               nir_ffma(b, u, nir_build_imm(b, 4, 32, m[1]),
375                        nir_ffma(b, v, nir_build_imm(b, 4, 32, m[2]),
376                                 offset)));
377
378   nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
379}
380
381static void
382lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
383                    const nir_lower_tex_options *options)
384{
385   b->cursor = nir_after_instr(&tex->instr);
386
387   nir_ssa_def *y = sample_plane(b, tex, 0, options);
388   nir_ssa_def *uv = sample_plane(b, tex, 1, options);
389
390   convert_yuv_to_rgb(b, tex,
391                      nir_channel(b, y, 0),
392                      nir_channel(b, uv, 0),
393                      nir_channel(b, uv, 1),
394                      nir_imm_float(b, 1.0f));
395}
396
397static void
398lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
399                     const nir_lower_tex_options *options)
400{
401   b->cursor = nir_after_instr(&tex->instr);
402
403   nir_ssa_def *y = sample_plane(b, tex, 0, options);
404   nir_ssa_def *u = sample_plane(b, tex, 1, options);
405   nir_ssa_def *v = sample_plane(b, tex, 2, options);
406
407   convert_yuv_to_rgb(b, tex,
408                      nir_channel(b, y, 0),
409                      nir_channel(b, u, 0),
410                      nir_channel(b, v, 0),
411                      nir_imm_float(b, 1.0f));
412}
413
414static void
415lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
416                       const nir_lower_tex_options *options)
417{
418   b->cursor = nir_after_instr(&tex->instr);
419
420   nir_ssa_def *y = sample_plane(b, tex, 0, options);
421   nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
422
423   convert_yuv_to_rgb(b, tex,
424                      nir_channel(b, y, 0),
425                      nir_channel(b, xuxv, 1),
426                      nir_channel(b, xuxv, 3),
427                      nir_imm_float(b, 1.0f));
428}
429
430static void
431lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
432                       const nir_lower_tex_options *options)
433{
434  b->cursor = nir_after_instr(&tex->instr);
435
436  nir_ssa_def *y = sample_plane(b, tex, 0, options);
437  nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
438
439  convert_yuv_to_rgb(b, tex,
440                     nir_channel(b, y, 1),
441                     nir_channel(b, uxvx, 0),
442                     nir_channel(b, uxvx, 2),
443                     nir_imm_float(b, 1.0f));
444}
445
446static void
447lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
448                    const nir_lower_tex_options *options)
449{
450  b->cursor = nir_after_instr(&tex->instr);
451
452  nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
453
454  convert_yuv_to_rgb(b, tex,
455                     nir_channel(b, ayuv, 2),
456                     nir_channel(b, ayuv, 1),
457                     nir_channel(b, ayuv, 0),
458                     nir_channel(b, ayuv, 3));
459}
460
461static void
462lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
463                    const nir_lower_tex_options *options)
464{
465  b->cursor = nir_after_instr(&tex->instr);
466
467  nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
468
469  convert_yuv_to_rgb(b, tex,
470                     nir_channel(b, xyuv, 2),
471                     nir_channel(b, xyuv, 1),
472                     nir_channel(b, xyuv, 0),
473                     nir_imm_float(b, 1.0f));
474}
475
476/*
477 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
478 * computed from the gradients.
479 */
480static void
481replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
482{
483   assert(tex->op == nir_texop_txd);
484
485   nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
486   nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
487
488   int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
489   if (min_lod_idx >= 0) {
490      /* If we have a minimum LOD, clamp LOD accordingly */
491      lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
492      nir_tex_instr_remove_src(tex, min_lod_idx);
493   }
494
495   nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
496   tex->op = nir_texop_txl;
497}
498
499static void
500lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
501{
502   assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
503   assert(tex->op == nir_texop_txd);
504   assert(tex->dest.is_ssa);
505
506   /* Use textureSize() to get the width and height of LOD 0 */
507   nir_ssa_def *size = get_texture_size(b, tex);
508
509   /* Cubemap texture lookups first generate a texture coordinate normalized
510    * to [-1, 1] on the appropiate face. The appropiate face is determined
511    * by which component has largest magnitude and its sign. The texture
512    * coordinate is the quotient of the remaining texture coordinates against
513    * that absolute value of the component of largest magnitude. This
514    * division requires that the computing of the derivative of the texel
515    * coordinate must use the quotient rule. The high level GLSL code is as
516    * follows:
517    *
518    * Step 1: selection
519    *
520    * vec3 abs_p, Q, dQdx, dQdy;
521    * abs_p = abs(ir->coordinate);
522    * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
523    *    Q = ir->coordinate.yzx;
524    *    dQdx = ir->lod_info.grad.dPdx.yzx;
525    *    dQdy = ir->lod_info.grad.dPdy.yzx;
526    * }
527    * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
528    *    Q = ir->coordinate.xzy;
529    *    dQdx = ir->lod_info.grad.dPdx.xzy;
530    *    dQdy = ir->lod_info.grad.dPdy.xzy;
531    * }
532    * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
533    *    Q = ir->coordinate;
534    *    dQdx = ir->lod_info.grad.dPdx;
535    *    dQdy = ir->lod_info.grad.dPdy;
536    * }
537    *
538    * Step 2: use quotient rule to compute derivative. The normalized to
539    * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
540    * only concerned with the magnitudes of the derivatives whose values are
541    * not affected by the sign. We drop the sign from the computation.
542    *
543    * vec2 dx, dy;
544    * float recip;
545    *
546    * recip = 1.0 / Q.z;
547    * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
548    * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
549    *
550    * Step 3: compute LOD. At this point we have the derivatives of the
551    * texture coordinates normalized to [-1,1]. We take the LOD to be
552    *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
553    *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
554    *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
555    *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
556    *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
557    * where L is the dimension of the cubemap. The code is:
558    *
559    * float M, result;
560    * M = max(dot(dx, dx), dot(dy, dy));
561    * L = textureSize(sampler, 0).x;
562    * result = -1.0 + 0.5 * log2(L * L * M);
563    */
564
565   /* coordinate */
566   nir_ssa_def *p =
567      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
568
569   /* unmodified dPdx, dPdy values */
570   nir_ssa_def *dPdx =
571      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
572   nir_ssa_def *dPdy =
573      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
574
575   nir_ssa_def *abs_p = nir_fabs(b, p);
576   nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
577   nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
578   nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
579
580   /* 1. compute selector */
581   nir_ssa_def *Q, *dQdx, *dQdy;
582
583   nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
584   nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
585
586   unsigned yzx[3] = { 1, 2, 0 };
587   unsigned xzy[3] = { 0, 2, 1 };
588
589   Q = nir_bcsel(b, cond_z,
590                 p,
591                 nir_bcsel(b, cond_y,
592                           nir_swizzle(b, p, xzy, 3, false),
593                           nir_swizzle(b, p, yzx, 3, false)));
594
595   dQdx = nir_bcsel(b, cond_z,
596                    dPdx,
597                    nir_bcsel(b, cond_y,
598                              nir_swizzle(b, dPdx, xzy, 3, false),
599                              nir_swizzle(b, dPdx, yzx, 3, false)));
600
601   dQdy = nir_bcsel(b, cond_z,
602                    dPdy,
603                    nir_bcsel(b, cond_y,
604                              nir_swizzle(b, dPdy, xzy, 3, false),
605                              nir_swizzle(b, dPdy, yzx, 3, false)));
606
607   /* 2. quotient rule */
608
609   /* tmp = Q.xy * recip;
610    * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
611    * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
612    */
613   nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
614
615   nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
616   nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
617
618   nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
619   nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
620   nir_ssa_def *dx =
621      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
622
623   nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
624   nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
625   nir_ssa_def *dy =
626      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
627
628   /* M = max(dot(dx, dx), dot(dy, dy)); */
629   nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
630
631   /* size has textureSize() of LOD 0 */
632   nir_ssa_def *L = nir_channel(b, size, 0);
633
634   /* lod = -1.0 + 0.5 * log2(L * L * M); */
635   nir_ssa_def *lod =
636      nir_fadd(b,
637               nir_imm_float(b, -1.0f),
638               nir_fmul(b,
639                        nir_imm_float(b, 0.5f),
640                        nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
641
642   /* 3. Replace the gradient instruction with an equivalent lod instruction */
643   replace_gradient_with_lod(b, lod, tex);
644}
645
646static void
647lower_gradient(nir_builder *b, nir_tex_instr *tex)
648{
649   /* Cubes are more complicated and have their own function */
650   if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
651      lower_gradient_cube_map(b, tex);
652      return;
653   }
654
655   assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
656   assert(tex->op == nir_texop_txd);
657   assert(tex->dest.is_ssa);
658
659   /* Use textureSize() to get the width and height of LOD 0 */
660   unsigned component_mask;
661   switch (tex->sampler_dim) {
662   case GLSL_SAMPLER_DIM_3D:
663      component_mask = 7;
664      break;
665   case GLSL_SAMPLER_DIM_1D:
666      component_mask = 1;
667      break;
668   default:
669      component_mask = 3;
670      break;
671   }
672
673   nir_ssa_def *size =
674      nir_channels(b, get_texture_size(b, tex), component_mask);
675
676   /* Scale the gradients by width and height.  Effectively, the incoming
677    * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
678    * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
679    */
680   nir_ssa_def *ddx =
681      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
682   nir_ssa_def *ddy =
683      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
684
685   nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
686   nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
687
688   nir_ssa_def *rho;
689   if (dPdx->num_components == 1) {
690      rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
691   } else {
692      rho = nir_fmax(b,
693                     nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
694                     nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
695   }
696
697   /* lod = log2(rho).  We're ignoring GL state biases for now. */
698   nir_ssa_def *lod = nir_flog2(b, rho);
699
700   /* Replace the gradient instruction with an equivalent lod instruction */
701   replace_gradient_with_lod(b, lod, tex);
702}
703
704static void
705saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
706{
707   b->cursor = nir_before_instr(&tex->instr);
708
709   /* Walk through the sources saturating the requested arguments. */
710   for (unsigned i = 0; i < tex->num_srcs; i++) {
711      if (tex->src[i].src_type != nir_tex_src_coord)
712         continue;
713
714      nir_ssa_def *src =
715         nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
716
717      /* split src into components: */
718      nir_ssa_def *comp[4];
719
720      assume(tex->coord_components >= 1);
721
722      for (unsigned j = 0; j < tex->coord_components; j++)
723         comp[j] = nir_channel(b, src, j);
724
725      /* clamp requested components, array index does not get clamped: */
726      unsigned ncomp = tex->coord_components;
727      if (tex->is_array)
728         ncomp--;
729
730      for (unsigned j = 0; j < ncomp; j++) {
731         if ((1 << j) & sat_mask) {
732            if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
733               /* non-normalized texture coords, so clamp to texture
734                * size rather than [0.0, 1.0]
735                */
736               nir_ssa_def *txs = get_texture_size(b, tex);
737               comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
738               comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
739            } else {
740               comp[j] = nir_fsat(b, comp[j]);
741            }
742         }
743      }
744
745      /* and move the result back into a single vecN: */
746      src = nir_vec(b, comp, tex->coord_components);
747
748      nir_instr_rewrite_src(&tex->instr,
749                            &tex->src[i].src,
750                            nir_src_for_ssa(src));
751   }
752}
753
754static nir_ssa_def *
755get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
756{
757   nir_const_value v[4];
758
759   memset(&v, 0, sizeof(v));
760
761   if (swizzle_val == 4) {
762      v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
763   } else {
764      assert(swizzle_val == 5);
765      if (type == nir_type_float)
766         v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
767      else
768         v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
769   }
770
771   return nir_build_imm(b, 4, 32, v);
772}
773
774static void
775swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
776{
777   assert(tex->dest.is_ssa);
778
779   b->cursor = nir_after_instr(&tex->instr);
780
781   assert(nir_tex_instr_dest_size(tex) == 4);
782   unsigned swiz[4] = { 2, 3, 1, 0 };
783   nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
784
785   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
786                                  swizzled->parent_instr);
787}
788
789static void
790swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
791{
792   assert(tex->dest.is_ssa);
793
794   b->cursor = nir_after_instr(&tex->instr);
795
796   nir_ssa_def *swizzled;
797   if (tex->op == nir_texop_tg4) {
798      if (swizzle[tex->component] < 4) {
799         /* This one's easy */
800         tex->component = swizzle[tex->component];
801         return;
802      } else {
803         swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
804      }
805   } else {
806      assert(nir_tex_instr_dest_size(tex) == 4);
807      if (swizzle[0] < 4 && swizzle[1] < 4 &&
808          swizzle[2] < 4 && swizzle[3] < 4) {
809         unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
810         /* We have no 0s or 1s, just emit a swizzling MOV */
811         swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
812      } else {
813         nir_ssa_def *srcs[4];
814         for (unsigned i = 0; i < 4; i++) {
815            if (swizzle[i] < 4) {
816               srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
817            } else {
818               srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
819            }
820         }
821         swizzled = nir_vec(b, srcs, 4);
822      }
823   }
824
825   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
826                                  swizzled->parent_instr);
827}
828
829static void
830linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
831{
832   assert(tex->dest.is_ssa);
833   assert(nir_tex_instr_dest_size(tex) == 4);
834   assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
835
836   b->cursor = nir_after_instr(&tex->instr);
837
838   nir_ssa_def *rgb =
839      nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
840
841   /* alpha is untouched: */
842   nir_ssa_def *result = nir_vec4(b,
843                                  nir_channel(b, rgb, 0),
844                                  nir_channel(b, rgb, 1),
845                                  nir_channel(b, rgb, 2),
846                                  nir_channel(b, &tex->dest.ssa, 3));
847
848   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
849                                  result->parent_instr);
850}
851
852/**
853 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
854 * i16, or u16, or a single unorm4x8 value.
855 *
856 * Note that we don't change the destination num_components, because
857 * nir_tex_instr_dest_size() will still return 4.  The driver is just expected
858 * to not store the other channels, given that nothing at the NIR level will
859 * read them.
860 */
861static void
862lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
863                  const nir_lower_tex_options *options)
864{
865   nir_ssa_def *color = &tex->dest.ssa;
866
867   b->cursor = nir_after_instr(&tex->instr);
868
869   switch (options->lower_tex_packing[tex->sampler_index]) {
870   case nir_lower_tex_packing_none:
871      return;
872
873   case nir_lower_tex_packing_16: {
874      static const unsigned bits[4] = {16, 16, 16, 16};
875
876      switch (nir_alu_type_get_base_type(tex->dest_type)) {
877      case nir_type_float:
878         if (tex->is_shadow && tex->is_new_style_shadow) {
879            color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
880         } else {
881            nir_ssa_def *rg = nir_channel(b, color, 0);
882            nir_ssa_def *ba = nir_channel(b, color, 1);
883            color = nir_vec4(b,
884                             nir_unpack_half_2x16_split_x(b, rg),
885                             nir_unpack_half_2x16_split_y(b, rg),
886                             nir_unpack_half_2x16_split_x(b, ba),
887                             nir_unpack_half_2x16_split_y(b, ba));
888         }
889         break;
890
891      case nir_type_int:
892         color = nir_format_unpack_sint(b, color, bits, 4);
893         break;
894
895      case nir_type_uint:
896         color = nir_format_unpack_uint(b, color, bits, 4);
897         break;
898
899      default:
900         unreachable("unknown base type");
901      }
902      break;
903   }
904
905   case nir_lower_tex_packing_8:
906      assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
907      color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
908      break;
909   }
910
911   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(color),
912                                  color->parent_instr);
913}
914
915static bool
916sampler_index_lt(nir_tex_instr *tex, unsigned max)
917{
918   assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
919
920   unsigned sampler_index = tex->sampler_index;
921
922   int sampler_offset_idx =
923      nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
924   if (sampler_offset_idx >= 0) {
925      if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
926         return false;
927
928      sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
929   }
930
931   return sampler_index < max;
932}
933
934static bool
935lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
936{
937   assert(tex->op == nir_texop_tg4);
938   assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
939   assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
940
941   b->cursor = nir_after_instr(&tex->instr);
942
943   nir_ssa_def *dest[4];
944   for (unsigned i = 0; i < 4; ++i) {
945      nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
946      tex_copy->op = tex->op;
947      tex_copy->coord_components = tex->coord_components;
948      tex_copy->sampler_dim = tex->sampler_dim;
949      tex_copy->is_array = tex->is_array;
950      tex_copy->is_shadow = tex->is_shadow;
951      tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
952      tex_copy->component = tex->component;
953      tex_copy->dest_type = tex->dest_type;
954
955      for (unsigned j = 0; j < tex->num_srcs; ++j) {
956         nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src, tex_copy);
957         tex_copy->src[j].src_type = tex->src[j].src_type;
958      }
959
960      nir_tex_src src;
961      src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
962                                                 tex->tg4_offsets[i][1]));
963      src.src_type = nir_tex_src_offset;
964      tex_copy->src[tex_copy->num_srcs - 1] = src;
965
966      nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
967                        nir_tex_instr_dest_size(tex), 32, NULL);
968
969      nir_builder_instr_insert(b, &tex_copy->instr);
970
971      dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
972   }
973
974   nir_ssa_def *res = nir_vec4(b, dest[0], dest[1], dest[2], dest[3]);
975   nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(res));
976   nir_instr_remove(&tex->instr);
977
978   return true;
979}
980
981static bool
982nir_lower_tex_block(nir_block *block, nir_builder *b,
983                    const nir_lower_tex_options *options)
984{
985   bool progress = false;
986
987   nir_foreach_instr_safe(instr, block) {
988      if (instr->type != nir_instr_type_tex)
989         continue;
990
991      nir_tex_instr *tex = nir_instr_as_tex(instr);
992      bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
993
994      /* mask of src coords to saturate (clamp): */
995      unsigned sat_mask = 0;
996
997      if ((1 << tex->sampler_index) & options->saturate_r)
998         sat_mask |= (1 << 2);    /* .z */
999      if ((1 << tex->sampler_index) & options->saturate_t)
1000         sat_mask |= (1 << 1);    /* .y */
1001      if ((1 << tex->sampler_index) & options->saturate_s)
1002         sat_mask |= (1 << 0);    /* .x */
1003
1004      /* If we are clamping any coords, we must lower projector first
1005       * as clamping happens *after* projection:
1006       */
1007      if (lower_txp || sat_mask) {
1008         project_src(b, tex);
1009         progress = true;
1010      }
1011
1012      if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1013          (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1014          (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1015           options->lower_rect_offset)) {
1016         progress = lower_offset(b, tex) || progress;
1017      }
1018
1019      if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) {
1020         lower_rect(b, tex);
1021         progress = true;
1022      }
1023
1024      if ((1 << tex->texture_index) & options->lower_y_uv_external) {
1025         lower_y_uv_external(b, tex, options);
1026         progress = true;
1027      }
1028
1029      if ((1 << tex->texture_index) & options->lower_y_u_v_external) {
1030         lower_y_u_v_external(b, tex, options);
1031         progress = true;
1032      }
1033
1034      if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) {
1035         lower_yx_xuxv_external(b, tex, options);
1036         progress = true;
1037      }
1038
1039      if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) {
1040         lower_xy_uxvx_external(b, tex, options);
1041         progress = true;
1042      }
1043
1044      if ((1 << tex->texture_index) & options->lower_ayuv_external) {
1045         lower_ayuv_external(b, tex, options);
1046         progress = true;
1047      }
1048
1049      if ((1 << tex->texture_index) & options->lower_xyuv_external) {
1050         lower_xyuv_external(b, tex, options);
1051         progress = true;
1052      }
1053
1054      if (sat_mask) {
1055         saturate_src(b, tex, sat_mask);
1056         progress = true;
1057      }
1058
1059      if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1060         swizzle_tg4_broadcom(b, tex);
1061         progress = true;
1062      }
1063
1064      if (((1 << tex->texture_index) & options->swizzle_result) &&
1065          !nir_tex_instr_is_query(tex) &&
1066          !(tex->is_shadow && tex->is_new_style_shadow)) {
1067         swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1068         progress = true;
1069      }
1070
1071      /* should be after swizzle so we know which channels are rgb: */
1072      if (((1 << tex->texture_index) & options->lower_srgb) &&
1073          !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1074         linearize_srgb_result(b, tex);
1075         progress = true;
1076      }
1077
1078      const bool has_min_lod =
1079         nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1080      const bool has_offset =
1081         nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1082
1083      if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1084          options->lower_txb_shadow_clamp) {
1085         lower_implicit_lod(b, tex);
1086         progress = true;
1087      }
1088
1089      if (options->lower_tex_packing[tex->sampler_index] !=
1090          nir_lower_tex_packing_none &&
1091          tex->op != nir_texop_txs &&
1092          tex->op != nir_texop_query_levels) {
1093         lower_tex_packing(b, tex, options);
1094         progress = true;
1095      }
1096
1097      if (tex->op == nir_texop_txd &&
1098          (options->lower_txd ||
1099           (options->lower_txd_shadow && tex->is_shadow) ||
1100           (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1101           (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1102           (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1103            nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1104           (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1105            has_min_lod && !sampler_index_lt(tex, 16)) ||
1106           (options->lower_txd_cube_map &&
1107            tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1108           (options->lower_txd_3d &&
1109            tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1110         lower_gradient(b, tex);
1111         progress = true;
1112         continue;
1113      }
1114
1115      bool shader_supports_implicit_lod =
1116         b->shader->info.stage == MESA_SHADER_FRAGMENT ||
1117         (b->shader->info.stage == MESA_SHADER_COMPUTE &&
1118          b->shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE);
1119
1120      /* TXF, TXS and TXL require a LOD but not everything we implement using those
1121       * three opcodes provides one.  Provide a default LOD of 0.
1122       */
1123      if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1124          (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1125           tex->op == nir_texop_txl || tex->op == nir_texop_query_levels ||
1126           (tex->op == nir_texop_tex && !shader_supports_implicit_lod))) {
1127         b->cursor = nir_before_instr(&tex->instr);
1128         nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1129         if (tex->op == nir_texop_tex && options->lower_tex_without_implicit_lod)
1130            tex->op = nir_texop_txl;
1131         progress = true;
1132         continue;
1133      }
1134
1135      /* has to happen after all the other lowerings as the original tg4 gets
1136       * replaced by 4 tg4 instructions.
1137       */
1138      if (tex->op == nir_texop_tg4 &&
1139          nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1140          options->lower_tg4_offsets) {
1141         progress |= lower_tg4_offsets(b, tex);
1142         continue;
1143      }
1144   }
1145
1146   return progress;
1147}
1148
1149static bool
1150nir_lower_tex_impl(nir_function_impl *impl,
1151                   const nir_lower_tex_options *options)
1152{
1153   bool progress = false;
1154   nir_builder builder;
1155   nir_builder_init(&builder, impl);
1156
1157   nir_foreach_block(block, impl) {
1158      progress |= nir_lower_tex_block(block, &builder, options);
1159   }
1160
1161   nir_metadata_preserve(impl, nir_metadata_block_index |
1162                               nir_metadata_dominance);
1163   return progress;
1164}
1165
1166bool
1167nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1168{
1169   bool progress = false;
1170
1171   nir_foreach_function(function, shader) {
1172      if (function->impl)
1173         progress |= nir_lower_tex_impl(function->impl, options);
1174   }
1175
1176   return progress;
1177}
1178