1/*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/*
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 *   + texture projector lowering: converts the coordinate division for
28 *     texture projection to be done in ALU instructions instead of
29 *     asking the texture operation to do so.
30 *   + lowering RECT: converts the un-normalized RECT texture coordinates
31 *     to normalized coordinates with txs plus ALU instructions
32 *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 *     Note that this automatically triggers texture projector lowering if
35 *     needed, since clamping must happen after projector lowering.
36 */
37
38#include "nir.h"
39#include "nir_builder.h"
40#include "nir_builtin_builder.h"
41#include "nir_format_convert.h"
42
43typedef struct nir_const_value_3_4 {
44   nir_const_value v[3][4];
45} nir_const_value_3_4;
46
47static const nir_const_value_3_4 bt601_csc_coeffs = { {
48   { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
49   { { .f32 = 0.0f        }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } },
50   { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f        } },
51} };
52static const nir_const_value_3_4 bt709_csc_coeffs = { {
53   { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
54   { { .f32 = 0.0f        }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } },
55   { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f        } },
56} };
57static const nir_const_value_3_4 bt2020_csc_coeffs = { {
58   { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
59   { { .f32 = 0.0f        }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } },
60   { { .f32 = 1.67867411f }, { .f32 = -0.65042432f }, { .f32 = 0.0f        } },
61} };
62
63static const float bt601_csc_offsets[3] = {
64   -0.874202218f, 0.531667823f, -1.085630789f
65};
66static const float bt709_csc_offsets[3] = {
67   -0.972945075f, 0.301482665f, -1.133402218f
68};
69static const float bt2020_csc_offsets[3] = {
70   -0.915687932f, 0.347458499f, -1.148145075f
71};
72
73static bool
74project_src(nir_builder *b, nir_tex_instr *tex)
75{
76   /* Find the projector in the srcs list, if present. */
77   int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
78   if (proj_index < 0)
79      return false;
80
81   b->cursor = nir_before_instr(&tex->instr);
82
83   nir_ssa_def *inv_proj =
84      nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
85
86   /* Walk through the sources projecting the arguments. */
87   for (unsigned i = 0; i < tex->num_srcs; i++) {
88      switch (tex->src[i].src_type) {
89      case nir_tex_src_coord:
90      case nir_tex_src_comparator:
91         break;
92      default:
93         continue;
94      }
95      nir_ssa_def *unprojected =
96         nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
97      nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
98
99      /* Array indices don't get projected, so make an new vector with the
100       * coordinate's array index untouched.
101       */
102      if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
103         switch (tex->coord_components) {
104         case 4:
105            projected = nir_vec4(b,
106                                 nir_channel(b, projected, 0),
107                                 nir_channel(b, projected, 1),
108                                 nir_channel(b, projected, 2),
109                                 nir_channel(b, unprojected, 3));
110            break;
111         case 3:
112            projected = nir_vec3(b,
113                                 nir_channel(b, projected, 0),
114                                 nir_channel(b, projected, 1),
115                                 nir_channel(b, unprojected, 2));
116            break;
117         case 2:
118            projected = nir_vec2(b,
119                                 nir_channel(b, projected, 0),
120                                 nir_channel(b, unprojected, 1));
121            break;
122         default:
123            unreachable("bad texture coord count for array");
124            break;
125         }
126      }
127
128      nir_instr_rewrite_src(&tex->instr,
129                            &tex->src[i].src,
130                            nir_src_for_ssa(projected));
131   }
132
133   nir_tex_instr_remove_src(tex, proj_index);
134   return true;
135}
136
137static bool
138lower_offset(nir_builder *b, nir_tex_instr *tex)
139{
140   int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
141   if (offset_index < 0)
142      return false;
143
144   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
145   assert(coord_index >= 0);
146
147   assert(tex->src[offset_index].src.is_ssa);
148   assert(tex->src[coord_index].src.is_ssa);
149   nir_ssa_def *offset = tex->src[offset_index].src.ssa;
150   nir_ssa_def *coord = tex->src[coord_index].src.ssa;
151
152   b->cursor = nir_before_instr(&tex->instr);
153
154   nir_ssa_def *offset_coord;
155   if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
156      if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
157         offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
158      } else {
159         nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
160         nir_ssa_def *scale = nir_frcp(b, txs);
161
162         offset_coord = nir_fadd(b, coord,
163                                 nir_fmul(b,
164                                          nir_i2f32(b, offset),
165                                          scale));
166      }
167   } else {
168      offset_coord = nir_iadd(b, coord, offset);
169   }
170
171   if (tex->is_array) {
172      /* The offset is not applied to the array index */
173      if (tex->coord_components == 2) {
174         offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
175                                    nir_channel(b, coord, 1));
176      } else if (tex->coord_components == 3) {
177         offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
178                                    nir_channel(b, offset_coord, 1),
179                                    nir_channel(b, coord, 2));
180      } else {
181         unreachable("Invalid number of components");
182      }
183   }
184
185   nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
186                         nir_src_for_ssa(offset_coord));
187
188   nir_tex_instr_remove_src(tex, offset_index);
189
190   return true;
191}
192
193static void
194lower_rect(nir_builder *b, nir_tex_instr *tex)
195{
196   /* Set the sampler_dim to 2D here so that get_texture_size picks up the
197    * right dimensionality.
198    */
199   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
200
201   nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
202   nir_ssa_def *scale = nir_frcp(b, txs);
203   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
204
205   if (coord_index != -1) {
206      nir_ssa_def *coords =
207         nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
208      nir_instr_rewrite_src(&tex->instr,
209                            &tex->src[coord_index].src,
210                            nir_src_for_ssa(nir_fmul(b, coords, scale)));
211   }
212}
213
214static void
215lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex)
216{
217   b->cursor = nir_before_instr(&tex->instr);
218
219   nir_ssa_def *idx = nir_imm_int(b, tex->texture_index);
220   nir_ssa_def *scale = nir_build_load_texture_rect_scaling(b, 32, idx);
221   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
222
223   if (coord_index != -1) {
224      nir_ssa_def *coords =
225         nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
226      nir_instr_rewrite_src(&tex->instr,
227                            &tex->src[coord_index].src,
228                            nir_src_for_ssa(nir_fmul(b, coords, scale)));
229   }
230}
231
232static void
233lower_lod(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *lod)
234{
235   assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
236   assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
237   assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
238   assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
239
240   int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
241   if (bias_idx >= 0) {
242      /* If we have a bias, add it in */
243      lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
244      nir_tex_instr_remove_src(tex, bias_idx);
245   }
246
247   int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
248   if (min_lod_idx >= 0) {
249      /* If we have a minimum LOD, clamp LOD accordingly */
250      lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
251      nir_tex_instr_remove_src(tex, min_lod_idx);
252   }
253
254   nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
255   tex->op = nir_texop_txl;
256}
257
258static void
259lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
260{
261   b->cursor = nir_before_instr(&tex->instr);
262   lower_lod(b, tex, nir_get_texture_lod(b, tex));
263}
264
265static void
266lower_zero_lod(nir_builder *b, nir_tex_instr *tex)
267{
268   b->cursor = nir_before_instr(&tex->instr);
269
270   if (tex->op == nir_texop_lod) {
271      nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_imm_int(b, 0));
272      nir_instr_remove(&tex->instr);
273      return;
274   }
275
276   lower_lod(b, tex, nir_imm_int(b, 0));
277}
278
279static nir_ssa_def *
280sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
281             const nir_lower_tex_options *options)
282{
283   assert(tex->dest.is_ssa);
284   assert(nir_tex_instr_dest_size(tex) == 4);
285   assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
286   assert(tex->op == nir_texop_tex);
287   assert(tex->coord_components == 2);
288
289   nir_tex_instr *plane_tex =
290      nir_tex_instr_create(b->shader, tex->num_srcs + 1);
291   for (unsigned i = 0; i < tex->num_srcs; i++) {
292      nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src);
293      plane_tex->src[i].src_type = tex->src[i].src_type;
294   }
295   plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
296   plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
297   plane_tex->op = nir_texop_tex;
298   plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
299   plane_tex->dest_type = nir_type_float | nir_dest_bit_size(tex->dest);
300   plane_tex->coord_components = 2;
301
302   plane_tex->texture_index = tex->texture_index;
303   plane_tex->sampler_index = tex->sampler_index;
304
305   nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4,
306         nir_dest_bit_size(tex->dest), NULL);
307
308   nir_builder_instr_insert(b, &plane_tex->instr);
309
310   /* If scaling_factor is set, return a scaled value. */
311   if (options->scale_factors[tex->texture_index])
312      return nir_fmul_imm(b, &plane_tex->dest.ssa,
313                          options->scale_factors[tex->texture_index]);
314
315   return &plane_tex->dest.ssa;
316}
317
318static void
319convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
320                   nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
321                   nir_ssa_def *a,
322                   const nir_lower_tex_options *options,
323                   unsigned texture_index)
324{
325
326   const float *offset_vals;
327   const nir_const_value_3_4 *m;
328   assert((options->bt709_external & options->bt2020_external) == 0);
329   if (options->bt709_external & (1u << texture_index)) {
330      m = &bt709_csc_coeffs;
331      offset_vals = bt709_csc_offsets;
332   } else if (options->bt2020_external & (1u << texture_index)) {
333      m = &bt2020_csc_coeffs;
334      offset_vals = bt2020_csc_offsets;
335   } else {
336      m = &bt601_csc_coeffs;
337      offset_vals = bt601_csc_offsets;
338   }
339
340   unsigned bit_size = nir_dest_bit_size(tex->dest);
341
342   nir_ssa_def *offset =
343      nir_vec4(b,
344               nir_imm_floatN_t(b, offset_vals[0], a->bit_size),
345               nir_imm_floatN_t(b, offset_vals[1], a->bit_size),
346               nir_imm_floatN_t(b, offset_vals[2], a->bit_size),
347               a);
348
349   offset = nir_f2fN(b, offset, bit_size);
350
351   nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size);
352   nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size);
353   nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size);
354
355   nir_ssa_def *result =
356      nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
357
358   nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
359}
360
361static void
362lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
363                    const nir_lower_tex_options *options,
364                    unsigned texture_index)
365{
366   b->cursor = nir_after_instr(&tex->instr);
367
368   nir_ssa_def *y = sample_plane(b, tex, 0, options);
369   nir_ssa_def *uv = sample_plane(b, tex, 1, options);
370
371   convert_yuv_to_rgb(b, tex,
372                      nir_channel(b, y, 0),
373                      nir_channel(b, uv, 0),
374                      nir_channel(b, uv, 1),
375                      nir_imm_float(b, 1.0f),
376                      options,
377                      texture_index);
378}
379
380static void
381lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
382                     const nir_lower_tex_options *options,
383                     unsigned texture_index)
384{
385   b->cursor = nir_after_instr(&tex->instr);
386
387   nir_ssa_def *y = sample_plane(b, tex, 0, options);
388   nir_ssa_def *u = sample_plane(b, tex, 1, options);
389   nir_ssa_def *v = sample_plane(b, tex, 2, options);
390
391   convert_yuv_to_rgb(b, tex,
392                      nir_channel(b, y, 0),
393                      nir_channel(b, u, 0),
394                      nir_channel(b, v, 0),
395                      nir_imm_float(b, 1.0f),
396                      options,
397                      texture_index);
398}
399
400static void
401lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
402                       const nir_lower_tex_options *options,
403                       unsigned texture_index)
404{
405   b->cursor = nir_after_instr(&tex->instr);
406
407   nir_ssa_def *y = sample_plane(b, tex, 0, options);
408   nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
409
410   convert_yuv_to_rgb(b, tex,
411                      nir_channel(b, y, 0),
412                      nir_channel(b, xuxv, 1),
413                      nir_channel(b, xuxv, 3),
414                      nir_imm_float(b, 1.0f),
415                      options,
416                      texture_index);
417}
418
419static void
420lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
421                       const nir_lower_tex_options *options,
422                       unsigned texture_index)
423{
424  b->cursor = nir_after_instr(&tex->instr);
425
426  nir_ssa_def *y = sample_plane(b, tex, 0, options);
427  nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
428
429  convert_yuv_to_rgb(b, tex,
430                     nir_channel(b, y, 1),
431                     nir_channel(b, uxvx, 0),
432                     nir_channel(b, uxvx, 2),
433                     nir_imm_float(b, 1.0f),
434                     options,
435                     texture_index);
436}
437
438static void
439lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
440                    const nir_lower_tex_options *options,
441                    unsigned texture_index)
442{
443  b->cursor = nir_after_instr(&tex->instr);
444
445  nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
446
447  convert_yuv_to_rgb(b, tex,
448                     nir_channel(b, ayuv, 2),
449                     nir_channel(b, ayuv, 1),
450                     nir_channel(b, ayuv, 0),
451                     nir_channel(b, ayuv, 3),
452                     options,
453                     texture_index);
454}
455
456static void
457lower_y41x_external(nir_builder *b, nir_tex_instr *tex,
458                    const nir_lower_tex_options *options,
459                    unsigned texture_index)
460{
461  b->cursor = nir_after_instr(&tex->instr);
462
463  nir_ssa_def *y41x = sample_plane(b, tex, 0, options);
464
465  convert_yuv_to_rgb(b, tex,
466                     nir_channel(b, y41x, 1),
467                     nir_channel(b, y41x, 0),
468                     nir_channel(b, y41x, 2),
469                     nir_channel(b, y41x, 3),
470                     options,
471                     texture_index);
472}
473
474static void
475lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
476                    const nir_lower_tex_options *options,
477                    unsigned texture_index)
478{
479  b->cursor = nir_after_instr(&tex->instr);
480
481  nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
482
483  convert_yuv_to_rgb(b, tex,
484                     nir_channel(b, xyuv, 2),
485                     nir_channel(b, xyuv, 1),
486                     nir_channel(b, xyuv, 0),
487                     nir_imm_float(b, 1.0f),
488                     options,
489                     texture_index);
490}
491
492static void
493lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
494                   const nir_lower_tex_options *options,
495                   unsigned texture_index)
496{
497  b->cursor = nir_after_instr(&tex->instr);
498
499  nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
500
501  convert_yuv_to_rgb(b, tex,
502                     nir_channel(b, yuv, 0),
503                     nir_channel(b, yuv, 1),
504                     nir_channel(b, yuv, 2),
505                     nir_imm_float(b, 1.0f),
506                     options,
507                     texture_index);
508}
509
510static void
511lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex,
512                     const nir_lower_tex_options *options,
513                     unsigned texture_index)
514{
515  b->cursor = nir_after_instr(&tex->instr);
516
517  nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
518
519  convert_yuv_to_rgb(b, tex,
520                     nir_channel(b, yuv, 1),
521                     nir_channel(b, yuv, 2),
522                     nir_channel(b, yuv, 0),
523                     nir_imm_float(b, 1.0f),
524                     options,
525                     texture_index);
526}
527
528/*
529 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
530 * computed from the gradients.
531 */
532static void
533replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
534{
535   assert(tex->op == nir_texop_txd);
536
537   nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
538   nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
539
540   int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
541   if (min_lod_idx >= 0) {
542      /* If we have a minimum LOD, clamp LOD accordingly */
543      lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
544      nir_tex_instr_remove_src(tex, min_lod_idx);
545   }
546
547   nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
548   tex->op = nir_texop_txl;
549}
550
551static void
552lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
553{
554   assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
555   assert(tex->op == nir_texop_txd);
556   assert(tex->dest.is_ssa);
557
558   /* Use textureSize() to get the width and height of LOD 0 */
559   nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
560
561   /* Cubemap texture lookups first generate a texture coordinate normalized
562    * to [-1, 1] on the appropiate face. The appropiate face is determined
563    * by which component has largest magnitude and its sign. The texture
564    * coordinate is the quotient of the remaining texture coordinates against
565    * that absolute value of the component of largest magnitude. This
566    * division requires that the computing of the derivative of the texel
567    * coordinate must use the quotient rule. The high level GLSL code is as
568    * follows:
569    *
570    * Step 1: selection
571    *
572    * vec3 abs_p, Q, dQdx, dQdy;
573    * abs_p = abs(ir->coordinate);
574    * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
575    *    Q = ir->coordinate.yzx;
576    *    dQdx = ir->lod_info.grad.dPdx.yzx;
577    *    dQdy = ir->lod_info.grad.dPdy.yzx;
578    * }
579    * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
580    *    Q = ir->coordinate.xzy;
581    *    dQdx = ir->lod_info.grad.dPdx.xzy;
582    *    dQdy = ir->lod_info.grad.dPdy.xzy;
583    * }
584    * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
585    *    Q = ir->coordinate;
586    *    dQdx = ir->lod_info.grad.dPdx;
587    *    dQdy = ir->lod_info.grad.dPdy;
588    * }
589    *
590    * Step 2: use quotient rule to compute derivative. The normalized to
591    * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
592    * only concerned with the magnitudes of the derivatives whose values are
593    * not affected by the sign. We drop the sign from the computation.
594    *
595    * vec2 dx, dy;
596    * float recip;
597    *
598    * recip = 1.0 / Q.z;
599    * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
600    * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
601    *
602    * Step 3: compute LOD. At this point we have the derivatives of the
603    * texture coordinates normalized to [-1,1]. We take the LOD to be
604    *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
605    *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
606    *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
607    *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
608    *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
609    * where L is the dimension of the cubemap. The code is:
610    *
611    * float M, result;
612    * M = max(dot(dx, dx), dot(dy, dy));
613    * L = textureSize(sampler, 0).x;
614    * result = -1.0 + 0.5 * log2(L * L * M);
615    */
616
617   /* coordinate */
618   nir_ssa_def *p =
619      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
620
621   /* unmodified dPdx, dPdy values */
622   nir_ssa_def *dPdx =
623      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
624   nir_ssa_def *dPdy =
625      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
626
627   nir_ssa_def *abs_p = nir_fabs(b, p);
628   nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
629   nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
630   nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
631
632   /* 1. compute selector */
633   nir_ssa_def *Q, *dQdx, *dQdy;
634
635   nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
636   nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
637
638   unsigned yzx[3] = { 1, 2, 0 };
639   unsigned xzy[3] = { 0, 2, 1 };
640
641   Q = nir_bcsel(b, cond_z,
642                 p,
643                 nir_bcsel(b, cond_y,
644                           nir_swizzle(b, p, xzy, 3),
645                           nir_swizzle(b, p, yzx, 3)));
646
647   dQdx = nir_bcsel(b, cond_z,
648                    dPdx,
649                    nir_bcsel(b, cond_y,
650                              nir_swizzle(b, dPdx, xzy, 3),
651                              nir_swizzle(b, dPdx, yzx, 3)));
652
653   dQdy = nir_bcsel(b, cond_z,
654                    dPdy,
655                    nir_bcsel(b, cond_y,
656                              nir_swizzle(b, dPdy, xzy, 3),
657                              nir_swizzle(b, dPdy, yzx, 3)));
658
659   /* 2. quotient rule */
660
661   /* tmp = Q.xy * recip;
662    * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
663    * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
664    */
665   nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
666
667   nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
668   nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
669
670   nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
671   nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
672   nir_ssa_def *dx =
673      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
674
675   nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
676   nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
677   nir_ssa_def *dy =
678      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
679
680   /* M = max(dot(dx, dx), dot(dy, dy)); */
681   nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
682
683   /* size has textureSize() of LOD 0 */
684   nir_ssa_def *L = nir_channel(b, size, 0);
685
686   /* lod = -1.0 + 0.5 * log2(L * L * M); */
687   nir_ssa_def *lod =
688      nir_fadd(b,
689               nir_imm_float(b, -1.0f),
690               nir_fmul(b,
691                        nir_imm_float(b, 0.5f),
692                        nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
693
694   /* 3. Replace the gradient instruction with an equivalent lod instruction */
695   replace_gradient_with_lod(b, lod, tex);
696}
697
698static void
699lower_gradient(nir_builder *b, nir_tex_instr *tex)
700{
701   /* Cubes are more complicated and have their own function */
702   if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
703      lower_gradient_cube_map(b, tex);
704      return;
705   }
706
707   assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
708   assert(tex->op == nir_texop_txd);
709   assert(tex->dest.is_ssa);
710
711   /* Use textureSize() to get the width and height of LOD 0 */
712   unsigned component_mask;
713   switch (tex->sampler_dim) {
714   case GLSL_SAMPLER_DIM_3D:
715      component_mask = 7;
716      break;
717   case GLSL_SAMPLER_DIM_1D:
718      component_mask = 1;
719      break;
720   default:
721      component_mask = 3;
722      break;
723   }
724
725   nir_ssa_def *size =
726      nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
727                      component_mask);
728
729   /* Scale the gradients by width and height.  Effectively, the incoming
730    * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
731    * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
732    */
733   nir_ssa_def *ddx =
734      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
735   nir_ssa_def *ddy =
736      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
737
738   nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
739   nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
740
741   nir_ssa_def *rho;
742   if (dPdx->num_components == 1) {
743      rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
744   } else {
745      rho = nir_fmax(b,
746                     nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
747                     nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
748   }
749
750   /* lod = log2(rho).  We're ignoring GL state biases for now. */
751   nir_ssa_def *lod = nir_flog2(b, rho);
752
753   /* Replace the gradient instruction with an equivalent lod instruction */
754   replace_gradient_with_lod(b, lod, tex);
755}
756
757/* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */
758static nir_tex_instr *
759lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex)
760{
761   b->cursor = nir_after_instr(&tex->instr);
762   nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2);
763
764   txd->op = nir_texop_txd;
765   txd->sampler_dim = tex->sampler_dim;
766   txd->dest_type = tex->dest_type;
767   txd->coord_components = tex->coord_components;
768   txd->texture_index = tex->texture_index;
769   txd->sampler_index = tex->sampler_index;
770
771   /* reuse existing srcs */
772   for (unsigned i = 0; i < tex->num_srcs; i++) {
773      nir_src_copy(&txd->src[i].src, &tex->src[i].src);
774      txd->src[i].src_type = tex->src[i].src_type;
775   }
776   int coord = nir_tex_instr_src_index(tex, nir_tex_src_coord);
777   assert(coord >= 0);
778   nir_ssa_def *dfdx = nir_fddx(b, tex->src[coord].src.ssa);
779   nir_ssa_def *dfdy = nir_fddy(b, tex->src[coord].src.ssa);
780   txd->src[tex->num_srcs].src = nir_src_for_ssa(dfdx);
781   txd->src[tex->num_srcs].src_type = nir_tex_src_ddx;
782   txd->src[tex->num_srcs + 1].src = nir_src_for_ssa(dfdy);
783   txd->src[tex->num_srcs + 1].src_type = nir_tex_src_ddy;
784
785   nir_ssa_dest_init(&txd->instr, &txd->dest, nir_dest_num_components(tex->dest),
786                     nir_dest_bit_size(tex->dest), NULL);
787   nir_builder_instr_insert(b, &txd->instr);
788   nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txd->dest.ssa);
789   nir_instr_remove(&tex->instr);
790   return txd;
791}
792
793/* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */
794static nir_tex_instr *
795lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex)
796{
797   b->cursor = nir_after_instr(&tex->instr);
798   nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs);
799
800   txl->op = nir_texop_txl;
801   txl->sampler_dim = tex->sampler_dim;
802   txl->dest_type = tex->dest_type;
803   txl->coord_components = tex->coord_components;
804   txl->texture_index = tex->texture_index;
805   txl->sampler_index = tex->sampler_index;
806
807   /* reuse all but bias src */
808   for (int i = 0; i < 2; i++) {
809      if (tex->src[i].src_type != nir_tex_src_bias) {
810         nir_src_copy(&txl->src[i].src, &tex->src[i].src);
811         txl->src[i].src_type = tex->src[i].src_type;
812      }
813   }
814   nir_ssa_def *lod = nir_get_texture_lod(b, txl);
815
816   int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
817   assert(bias_idx >= 0);
818   lod = nir_fadd(b, nir_channel(b, lod, 1), nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
819   txl->src[tex->num_srcs - 1].src = nir_src_for_ssa(lod);
820   txl->src[tex->num_srcs - 1].src_type = nir_tex_src_lod;
821
822   nir_ssa_dest_init(&txl->instr, &txl->dest, nir_dest_num_components(tex->dest),
823                     nir_dest_bit_size(tex->dest), NULL);
824   nir_builder_instr_insert(b, &txl->instr);
825   nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txl->dest.ssa);
826   nir_instr_remove(&tex->instr);
827   return txl;
828}
829
830static nir_tex_instr *
831saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
832{
833   if (tex->op == nir_texop_tex)
834      tex = lower_tex_to_txd(b, tex);
835   else if (tex->op == nir_texop_txb)
836      tex = lower_txb_to_txl(b, tex);
837
838   b->cursor = nir_before_instr(&tex->instr);
839   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
840
841   if (coord_index != -1) {
842      nir_ssa_def *src =
843         nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
844
845      /* split src into components: */
846      nir_ssa_def *comp[4];
847
848      assume(tex->coord_components >= 1);
849
850      for (unsigned j = 0; j < tex->coord_components; j++)
851         comp[j] = nir_channel(b, src, j);
852
853      /* clamp requested components, array index does not get clamped: */
854      unsigned ncomp = tex->coord_components;
855      if (tex->is_array)
856         ncomp--;
857
858      for (unsigned j = 0; j < ncomp; j++) {
859         if ((1 << j) & sat_mask) {
860            if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
861               /* non-normalized texture coords, so clamp to texture
862                * size rather than [0.0, 1.0]
863                */
864               nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
865               comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
866               comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
867            } else {
868               comp[j] = nir_fsat(b, comp[j]);
869            }
870         }
871      }
872
873      /* and move the result back into a single vecN: */
874      src = nir_vec(b, comp, tex->coord_components);
875
876      nir_instr_rewrite_src(&tex->instr,
877                            &tex->src[coord_index].src,
878                            nir_src_for_ssa(src));
879   }
880   return tex;
881}
882
883static nir_ssa_def *
884get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
885{
886   nir_const_value v[4];
887
888   memset(&v, 0, sizeof(v));
889
890   if (swizzle_val == 4) {
891      v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
892   } else {
893      assert(swizzle_val == 5);
894      if (type == nir_type_float32)
895         v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
896      else
897         v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
898   }
899
900   return nir_build_imm(b, 4, 32, v);
901}
902
903static void
904swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
905{
906   assert(tex->dest.is_ssa);
907
908   b->cursor = nir_after_instr(&tex->instr);
909
910   assert(nir_tex_instr_dest_size(tex) == 4);
911   unsigned swiz[4] = { 2, 3, 1, 0 };
912   nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
913
914   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled,
915                                  swizzled->parent_instr);
916}
917
918static void
919swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
920{
921   assert(tex->dest.is_ssa);
922
923   b->cursor = nir_after_instr(&tex->instr);
924
925   nir_ssa_def *swizzled;
926   if (tex->op == nir_texop_tg4) {
927      if (swizzle[tex->component] < 4) {
928         /* This one's easy */
929         tex->component = swizzle[tex->component];
930         return;
931      } else {
932         swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
933      }
934   } else {
935      assert(nir_tex_instr_dest_size(tex) == 4);
936      if (swizzle[0] < 4 && swizzle[1] < 4 &&
937          swizzle[2] < 4 && swizzle[3] < 4) {
938         unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
939         /* We have no 0s or 1s, just emit a swizzling MOV */
940         swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
941      } else {
942         nir_ssa_def *srcs[4];
943         for (unsigned i = 0; i < 4; i++) {
944            if (swizzle[i] < 4) {
945               srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
946            } else {
947               srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
948            }
949         }
950         swizzled = nir_vec(b, srcs, 4);
951      }
952   }
953
954   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled,
955                                  swizzled->parent_instr);
956}
957
958static void
959linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
960{
961   assert(tex->dest.is_ssa);
962   assert(nir_tex_instr_dest_size(tex) == 4);
963   assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
964
965   b->cursor = nir_after_instr(&tex->instr);
966
967   nir_ssa_def *rgb =
968      nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
969
970   /* alpha is untouched: */
971   nir_ssa_def *result = nir_vec4(b,
972                                  nir_channel(b, rgb, 0),
973                                  nir_channel(b, rgb, 1),
974                                  nir_channel(b, rgb, 2),
975                                  nir_channel(b, &tex->dest.ssa, 3));
976
977   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result,
978                                  result->parent_instr);
979}
980
981/**
982 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
983 * i16, or u16, or a single unorm4x8 value.
984 *
985 * Note that we don't change the destination num_components, because
986 * nir_tex_instr_dest_size() will still return 4.  The driver is just expected
987 * to not store the other channels, given that nothing at the NIR level will
988 * read them.
989 */
990static void
991lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
992                  const nir_lower_tex_options *options)
993{
994   nir_ssa_def *color = &tex->dest.ssa;
995
996   b->cursor = nir_after_instr(&tex->instr);
997
998   switch (options->lower_tex_packing[tex->sampler_index]) {
999   case nir_lower_tex_packing_none:
1000      return;
1001
1002   case nir_lower_tex_packing_16: {
1003      static const unsigned bits[4] = {16, 16, 16, 16};
1004
1005      switch (nir_alu_type_get_base_type(tex->dest_type)) {
1006      case nir_type_float:
1007         switch (nir_tex_instr_dest_size(tex)) {
1008         case 1:
1009            assert(tex->is_shadow && tex->is_new_style_shadow);
1010            color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
1011            break;
1012         case 2: {
1013            nir_ssa_def *rg = nir_channel(b, color, 0);
1014            color = nir_vec2(b,
1015                             nir_unpack_half_2x16_split_x(b, rg),
1016                             nir_unpack_half_2x16_split_y(b, rg));
1017            break;
1018         }
1019         case 4: {
1020            nir_ssa_def *rg = nir_channel(b, color, 0);
1021            nir_ssa_def *ba = nir_channel(b, color, 1);
1022            color = nir_vec4(b,
1023                             nir_unpack_half_2x16_split_x(b, rg),
1024                             nir_unpack_half_2x16_split_y(b, rg),
1025                             nir_unpack_half_2x16_split_x(b, ba),
1026                             nir_unpack_half_2x16_split_y(b, ba));
1027            break;
1028         }
1029         default:
1030            unreachable("wrong dest_size");
1031         }
1032         break;
1033
1034      case nir_type_int:
1035         color = nir_format_unpack_sint(b, color, bits, 4);
1036         break;
1037
1038      case nir_type_uint:
1039         color = nir_format_unpack_uint(b, color, bits, 4);
1040         break;
1041
1042      default:
1043         unreachable("unknown base type");
1044      }
1045      break;
1046   }
1047
1048   case nir_lower_tex_packing_8:
1049      assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1050      color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
1051      break;
1052   }
1053
1054   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, color,
1055                                  color->parent_instr);
1056}
1057
1058static bool
1059sampler_index_lt(nir_tex_instr *tex, unsigned max)
1060{
1061   assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
1062
1063   unsigned sampler_index = tex->sampler_index;
1064
1065   int sampler_offset_idx =
1066      nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
1067   if (sampler_offset_idx >= 0) {
1068      if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
1069         return false;
1070
1071      sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
1072   }
1073
1074   return sampler_index < max;
1075}
1076
1077static bool
1078lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
1079{
1080   assert(tex->op == nir_texop_tg4);
1081   assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
1082   assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
1083
1084   b->cursor = nir_after_instr(&tex->instr);
1085
1086   nir_ssa_def *dest[5] = {NULL};
1087   for (unsigned i = 0; i < 4; ++i) {
1088      nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
1089      tex_copy->op = tex->op;
1090      tex_copy->coord_components = tex->coord_components;
1091      tex_copy->sampler_dim = tex->sampler_dim;
1092      tex_copy->is_array = tex->is_array;
1093      tex_copy->is_shadow = tex->is_shadow;
1094      tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
1095      tex_copy->is_sparse = tex->is_sparse;
1096      tex_copy->component = tex->component;
1097      tex_copy->dest_type = tex->dest_type;
1098
1099      for (unsigned j = 0; j < tex->num_srcs; ++j) {
1100         nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src);
1101         tex_copy->src[j].src_type = tex->src[j].src_type;
1102      }
1103
1104      nir_tex_src src;
1105      src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
1106                                                 tex->tg4_offsets[i][1]));
1107      src.src_type = nir_tex_src_offset;
1108      tex_copy->src[tex_copy->num_srcs - 1] = src;
1109
1110      nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
1111                        nir_tex_instr_dest_size(tex), 32, NULL);
1112
1113      nir_builder_instr_insert(b, &tex_copy->instr);
1114
1115      dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
1116      if (tex->is_sparse) {
1117         nir_ssa_def *code = nir_channel(b, &tex_copy->dest.ssa, 4);
1118         dest[4] = dest[4] ? nir_sparse_residency_code_and(b, dest[4], code) : code;
1119      }
1120   }
1121
1122   nir_ssa_def *res = nir_vec(b, dest, tex->dest.ssa.num_components);
1123   nir_ssa_def_rewrite_uses(&tex->dest.ssa, res);
1124   nir_instr_remove(&tex->instr);
1125
1126   return true;
1127}
1128
1129static bool
1130nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
1131{
1132   int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1133   if (lod_idx < 0 ||
1134       (nir_src_is_const(tex->src[lod_idx].src) &&
1135        nir_src_as_int(tex->src[lod_idx].src) == 0))
1136      return false;
1137
1138   unsigned dest_size = nir_tex_instr_dest_size(tex);
1139
1140   b->cursor = nir_before_instr(&tex->instr);
1141   nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
1142
1143   /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
1144   nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
1145                         nir_src_for_ssa(nir_imm_int(b, 0)));
1146
1147   /* TXS(LOD) = max(TXS(0) >> LOD, 1)
1148    * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface,
1149    * which should return 0, not 1.
1150    */
1151   b->cursor = nir_after_instr(&tex->instr);
1152   nir_ssa_def *minified = nir_imin(b, &tex->dest.ssa,
1153                                    nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod),
1154                                             nir_imm_int(b, 1)));
1155
1156   /* Make sure the component encoding the array size (if any) is not
1157    * minified.
1158    */
1159   if (tex->is_array) {
1160      nir_ssa_def *comp[3];
1161
1162      assert(dest_size <= ARRAY_SIZE(comp));
1163      for (unsigned i = 0; i < dest_size - 1; i++)
1164         comp[i] = nir_channel(b, minified, i);
1165
1166      comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1);
1167      minified = nir_vec(b, comp, dest_size);
1168   }
1169
1170   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, minified,
1171                                  minified->parent_instr);
1172   return true;
1173}
1174
1175static void
1176nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex)
1177{
1178   assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array);
1179   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1180
1181   b->cursor = nir_after_instr(&tex->instr);
1182
1183   assert(tex->dest.is_ssa);
1184   assert(tex->dest.ssa.num_components == 3);
1185   nir_ssa_def *size = &tex->dest.ssa;
1186   size = nir_vec3(b, nir_channel(b, size, 0),
1187                      nir_channel(b, size, 1),
1188                      nir_idiv(b, nir_channel(b, size, 2),
1189                                  nir_imm_int(b, 6)));
1190
1191   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, size, size->parent_instr);
1192}
1193
1194static void
1195nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1196{
1197   lower_offset(b, tex);
1198
1199   b->cursor = nir_before_instr(&tex->instr);
1200
1201   /* Create FMASK fetch. */
1202   assert(tex->texture_index == 0);
1203   nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1);
1204   fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1205   fmask_fetch->coord_components = tex->coord_components;
1206   fmask_fetch->sampler_dim = tex->sampler_dim;
1207   fmask_fetch->is_array = tex->is_array;
1208   fmask_fetch->texture_non_uniform = tex->texture_non_uniform;
1209   fmask_fetch->dest_type = nir_type_uint32;
1210   nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL);
1211
1212   fmask_fetch->num_srcs = 0;
1213   for (unsigned i = 0; i < tex->num_srcs; i++) {
1214      if (tex->src[i].src_type == nir_tex_src_ms_index)
1215         continue;
1216      nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++];
1217      src->src = nir_src_for_ssa(tex->src[i].src.ssa);
1218      src->src_type = tex->src[i].src_type;
1219   }
1220
1221   nir_builder_instr_insert(b, &fmask_fetch->instr);
1222
1223   /* Obtain new sample index. */
1224   int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1225   assert(ms_index >= 0);
1226   nir_src sample = tex->src[ms_index].src;
1227   nir_ssa_def *new_sample = NULL;
1228   if (nir_src_is_const(sample) && (nir_src_as_uint(sample) == 0 || nir_src_as_uint(sample) == 7)) {
1229      if (nir_src_as_uint(sample) == 7)
1230         new_sample = nir_ushr(b, &fmask_fetch->dest.ssa, nir_imm_int(b, 28));
1231      else
1232         new_sample = nir_iand_imm(b, &fmask_fetch->dest.ssa, 0xf);
1233   } else {
1234      new_sample = nir_ubitfield_extract(b, &fmask_fetch->dest.ssa,
1235                                         nir_imul_imm(b, sample.ssa, 4), nir_imm_int(b, 4));
1236   }
1237
1238   /* Update instruction. */
1239   tex->op = nir_texop_fragment_fetch_amd;
1240   nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[ms_index].src, new_sample);
1241}
1242
1243static void
1244nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1245{
1246   b->cursor = nir_after_instr(&tex->instr);
1247
1248   nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1249   fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1250   fmask_fetch->dest_type = nir_type_uint32;
1251   nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL);
1252   nir_builder_instr_insert(b, &fmask_fetch->instr);
1253
1254   nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_ieq_imm(b, &fmask_fetch->dest.ssa, 0));
1255   nir_instr_remove_v(&tex->instr);
1256}
1257
1258static bool
1259nir_lower_tex_block(nir_block *block, nir_builder *b,
1260                    const nir_lower_tex_options *options,
1261                    const struct nir_shader_compiler_options *compiler_options)
1262{
1263   bool progress = false;
1264
1265   nir_foreach_instr_safe(instr, block) {
1266      if (instr->type != nir_instr_type_tex)
1267         continue;
1268
1269      nir_tex_instr *tex = nir_instr_as_tex(instr);
1270      bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1271
1272      /* mask of src coords to saturate (clamp): */
1273      unsigned sat_mask = 0;
1274
1275      if ((1 << tex->sampler_index) & options->saturate_r)
1276         sat_mask |= (1 << 2);    /* .z */
1277      if ((1 << tex->sampler_index) & options->saturate_t)
1278         sat_mask |= (1 << 1);    /* .y */
1279      if ((1 << tex->sampler_index) & options->saturate_s)
1280         sat_mask |= (1 << 0);    /* .x */
1281
1282      /* If we are clamping any coords, we must lower projector first
1283       * as clamping happens *after* projection:
1284       */
1285      if (lower_txp || sat_mask) {
1286         progress |= project_src(b, tex);
1287      }
1288
1289      if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1290          (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1291          (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1292           options->lower_rect_offset)) {
1293         progress = lower_offset(b, tex) || progress;
1294      }
1295
1296      if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1297          tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) {
1298
1299         if (compiler_options->has_txs)
1300            lower_rect(b, tex);
1301         else
1302            lower_rect_tex_scale(b, tex);
1303
1304         progress = true;
1305      }
1306
1307      unsigned texture_index = tex->texture_index;
1308      uint32_t texture_mask = 1u << texture_index;
1309      int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1310      if (tex_index >= 0) {
1311         nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src);
1312         nir_variable *var = nir_deref_instr_get_variable(deref);
1313         texture_index = var ? var->data.binding : 0;
1314         texture_mask = var ? (1u << texture_index) : 0u;
1315      }
1316
1317      if (texture_mask & options->lower_y_uv_external) {
1318         lower_y_uv_external(b, tex, options, texture_index);
1319         progress = true;
1320      }
1321
1322      if (texture_mask & options->lower_y_u_v_external) {
1323         lower_y_u_v_external(b, tex, options, texture_index);
1324         progress = true;
1325      }
1326
1327      if (texture_mask & options->lower_yx_xuxv_external) {
1328         lower_yx_xuxv_external(b, tex, options, texture_index);
1329         progress = true;
1330      }
1331
1332      if (texture_mask & options->lower_xy_uxvx_external) {
1333         lower_xy_uxvx_external(b, tex, options, texture_index);
1334         progress = true;
1335      }
1336
1337      if (texture_mask & options->lower_ayuv_external) {
1338         lower_ayuv_external(b, tex, options, texture_index);
1339         progress = true;
1340      }
1341
1342      if (texture_mask & options->lower_xyuv_external) {
1343         lower_xyuv_external(b, tex, options, texture_index);
1344         progress = true;
1345      }
1346
1347      if (texture_mask & options->lower_yuv_external) {
1348         lower_yuv_external(b, tex, options, texture_index);
1349         progress = true;
1350      }
1351
1352      if ((1 << tex->texture_index) & options->lower_yu_yv_external) {
1353         lower_yu_yv_external(b, tex, options, texture_index);
1354         progress = true;
1355      }
1356
1357      if ((1 << tex->texture_index) & options->lower_y41x_external) {
1358         lower_y41x_external(b, tex, options, texture_index);
1359         progress = true;
1360      }
1361
1362      if (sat_mask) {
1363         tex = saturate_src(b, tex, sat_mask);
1364         progress = true;
1365      }
1366
1367      if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1368         swizzle_tg4_broadcom(b, tex);
1369         progress = true;
1370      }
1371
1372      if ((texture_mask & options->swizzle_result) &&
1373          !nir_tex_instr_is_query(tex) &&
1374          !(tex->is_shadow && tex->is_new_style_shadow)) {
1375         swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1376         progress = true;
1377      }
1378
1379      /* should be after swizzle so we know which channels are rgb: */
1380      if ((texture_mask & options->lower_srgb) &&
1381          !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1382         linearize_srgb_result(b, tex);
1383         progress = true;
1384      }
1385
1386      const bool has_min_lod =
1387         nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1388      const bool has_offset =
1389         nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1390
1391      if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1392          options->lower_txb_shadow_clamp) {
1393         lower_implicit_lod(b, tex);
1394         progress = true;
1395      }
1396
1397      if (options->lower_tex_packing[tex->sampler_index] !=
1398          nir_lower_tex_packing_none &&
1399          tex->op != nir_texop_txs &&
1400          tex->op != nir_texop_query_levels &&
1401          tex->op != nir_texop_texture_samples) {
1402         lower_tex_packing(b, tex, options);
1403         progress = true;
1404      }
1405
1406      if (tex->op == nir_texop_txd &&
1407          (options->lower_txd ||
1408           (options->lower_txd_shadow && tex->is_shadow) ||
1409           (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1410           (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1411           (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1412            nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1413           (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1414            has_min_lod && !sampler_index_lt(tex, 16)) ||
1415           (options->lower_txd_cube_map &&
1416            tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1417           (options->lower_txd_3d &&
1418            tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1419         lower_gradient(b, tex);
1420         progress = true;
1421         continue;
1422      }
1423
1424      /* TXF, TXS and TXL require a LOD but not everything we implement using those
1425       * three opcodes provides one.  Provide a default LOD of 0.
1426       */
1427      if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1428          (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1429           tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
1430         b->cursor = nir_before_instr(&tex->instr);
1431         nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1432         progress = true;
1433         continue;
1434      }
1435
1436      /* Only fragment and compute (in some cases) support implicit
1437       * derivatives.  Lower those opcodes which use implicit derivatives to
1438       * use an explicit LOD of 0.
1439       */
1440      if (nir_tex_instr_has_implicit_derivative(tex) &&
1441          !nir_shader_supports_implicit_lod(b->shader)) {
1442         lower_zero_lod(b, tex);
1443         progress = true;
1444      }
1445
1446      if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1447         progress |= nir_lower_txs_lod(b, tex);
1448         continue;
1449      }
1450
1451      if (options->lower_txs_cube_array && tex->op == nir_texop_txs &&
1452          tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) {
1453         nir_lower_txs_cube_array(b, tex);
1454         progress = true;
1455         continue;
1456      }
1457
1458      /* has to happen after all the other lowerings as the original tg4 gets
1459       * replaced by 4 tg4 instructions.
1460       */
1461      if (tex->op == nir_texop_tg4 &&
1462          nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1463          options->lower_tg4_offsets) {
1464         progress |= lower_tg4_offsets(b, tex);
1465         continue;
1466      }
1467
1468      if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) {
1469         nir_lower_ms_txf_to_fragment_fetch(b, tex);
1470         progress = true;
1471         continue;
1472      }
1473
1474      if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) {
1475         nir_lower_samples_identical_to_fragment_fetch(b, tex);
1476         progress = true;
1477         continue;
1478      }
1479   }
1480
1481   return progress;
1482}
1483
1484static bool
1485nir_lower_tex_impl(nir_function_impl *impl,
1486                   const nir_lower_tex_options *options,
1487                   const struct nir_shader_compiler_options *compiler_options)
1488{
1489   bool progress = false;
1490   nir_builder builder;
1491   nir_builder_init(&builder, impl);
1492
1493   nir_foreach_block(block, impl) {
1494      progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
1495   }
1496
1497   nir_metadata_preserve(impl, nir_metadata_block_index |
1498                               nir_metadata_dominance);
1499   return progress;
1500}
1501
1502bool
1503nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1504{
1505   bool progress = false;
1506
1507   nir_foreach_function(function, shader) {
1508      if (function->impl)
1509         progress |= nir_lower_tex_impl(function->impl, options, shader->options);
1510   }
1511
1512   return progress;
1513}
1514