1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir_builder.h"
25
26#include "util/format_rgb9e5.h"
27
28static inline nir_ssa_def *
29nir_shift(nir_builder *b, nir_ssa_def *value, int left_shift)
30{
31   if (left_shift > 0)
32      return nir_ishl(b, value, nir_imm_int(b, left_shift));
33   else if (left_shift < 0)
34      return nir_ushr(b, value, nir_imm_int(b, -left_shift));
35   else
36      return value;
37}
38
39static inline nir_ssa_def *
40nir_mask_shift(struct nir_builder *b, nir_ssa_def *src,
41               uint32_t mask, int left_shift)
42{
43   return nir_shift(b, nir_iand(b, src, nir_imm_int(b, mask)), left_shift);
44}
45
46static inline nir_ssa_def *
47nir_mask_shift_or(struct nir_builder *b, nir_ssa_def *dst, nir_ssa_def *src,
48                  uint32_t src_mask, int src_left_shift)
49{
50   return nir_ior(b, nir_mask_shift(b, src, src_mask, src_left_shift), dst);
51}
52
53static inline nir_ssa_def *
54nir_format_mask_uvec(nir_builder *b, nir_ssa_def *src, const unsigned *bits)
55{
56   nir_const_value mask[NIR_MAX_VEC_COMPONENTS];
57   memset(mask, 0, sizeof(mask));
58   for (unsigned i = 0; i < src->num_components; i++) {
59      assert(bits[i] < 32);
60      mask[i].u32 = (1u << bits[i]) - 1;
61   }
62   return nir_iand(b, src, nir_build_imm(b, src->num_components, 32, mask));
63}
64
65static inline nir_ssa_def *
66nir_format_sign_extend_ivec(nir_builder *b, nir_ssa_def *src,
67                            const unsigned *bits)
68{
69   assert(src->num_components <= 4);
70   nir_ssa_def *comps[4];
71   for (unsigned i = 0; i < src->num_components; i++) {
72      nir_ssa_def *shift = nir_imm_int(b, src->bit_size - bits[i]);
73      comps[i] = nir_ishr(b, nir_ishl(b, nir_channel(b, src, i), shift), shift);
74   }
75   return nir_vec(b, comps, src->num_components);
76}
77
78
79static inline nir_ssa_def *
80nir_format_unpack_int(nir_builder *b, nir_ssa_def *packed,
81                      const unsigned *bits, unsigned num_components,
82                      bool sign_extend)
83{
84   assert(num_components >= 1 && num_components <= 4);
85   const unsigned bit_size = packed->bit_size;
86   nir_ssa_def *comps[4];
87
88   if (bits[0] >= bit_size) {
89      assert(bits[0] == bit_size);
90      assert(num_components == 1);
91      return packed;
92   }
93
94   unsigned next_chan = 0;
95   unsigned offset = 0;
96   for (unsigned i = 0; i < num_components; i++) {
97      assert(bits[i] < bit_size);
98      assert(offset + bits[i] <= bit_size);
99      nir_ssa_def *chan = nir_channel(b, packed, next_chan);
100      nir_ssa_def *lshift = nir_imm_int(b, bit_size - (offset + bits[i]));
101      nir_ssa_def *rshift = nir_imm_int(b, bit_size - bits[i]);
102      if (sign_extend)
103         comps[i] = nir_ishr(b, nir_ishl(b, chan, lshift), rshift);
104      else
105         comps[i] = nir_ushr(b, nir_ishl(b, chan, lshift), rshift);
106      offset += bits[i];
107      if (offset >= bit_size) {
108         next_chan++;
109         offset -= bit_size;
110      }
111   }
112
113   return nir_vec(b, comps, num_components);
114}
115
116static inline nir_ssa_def *
117nir_format_unpack_uint(nir_builder *b, nir_ssa_def *packed,
118                       const unsigned *bits, unsigned num_components)
119{
120   return nir_format_unpack_int(b, packed, bits, num_components, false);
121}
122
123static inline nir_ssa_def *
124nir_format_unpack_sint(nir_builder *b, nir_ssa_def *packed,
125                       const unsigned *bits, unsigned num_components)
126{
127   return nir_format_unpack_int(b, packed, bits, num_components, true);
128}
129
130static inline nir_ssa_def *
131nir_format_pack_uint_unmasked(nir_builder *b, nir_ssa_def *color,
132                              const unsigned *bits, unsigned num_components)
133{
134   assert(num_components >= 1 && num_components <= 4);
135   nir_ssa_def *packed = nir_imm_int(b, 0);
136   unsigned offset = 0;
137   for (unsigned i = 0; i < num_components; i++) {
138      packed = nir_ior(b, packed, nir_shift(b, nir_channel(b, color, i),
139                                               offset));
140      offset += bits[i];
141   }
142   assert(offset <= packed->bit_size);
143
144   return packed;
145}
146
147static inline nir_ssa_def *
148nir_format_pack_uint(nir_builder *b, nir_ssa_def *color,
149                     const unsigned *bits, unsigned num_components)
150{
151   return nir_format_pack_uint_unmasked(b, nir_format_mask_uvec(b, color, bits),
152                                        bits, num_components);
153}
154
155static inline nir_ssa_def *
156nir_format_bitcast_uvec_unmasked(nir_builder *b, nir_ssa_def *src,
157                                 unsigned src_bits, unsigned dst_bits)
158{
159   assert(src->bit_size >= src_bits && src->bit_size >= dst_bits);
160   assert(src_bits == 8 || src_bits == 16 || src_bits == 32);
161   assert(dst_bits == 8 || dst_bits == 16 || dst_bits == 32);
162
163   if (src_bits == dst_bits)
164      return src;
165
166   const unsigned dst_components =
167      DIV_ROUND_UP(src->num_components * src_bits, dst_bits);
168   assert(dst_components <= 4);
169
170   nir_ssa_def *dst_chan[4] = {0};
171   if (dst_bits > src_bits) {
172      unsigned shift = 0;
173      unsigned dst_idx = 0;
174      for (unsigned i = 0; i < src->num_components; i++) {
175         nir_ssa_def *shifted = nir_ishl(b, nir_channel(b, src, i),
176                                            nir_imm_int(b, shift));
177         if (shift == 0) {
178            dst_chan[dst_idx] = shifted;
179         } else {
180            dst_chan[dst_idx] = nir_ior(b, dst_chan[dst_idx], shifted);
181         }
182
183         shift += src_bits;
184         if (shift >= dst_bits) {
185            dst_idx++;
186            shift = 0;
187         }
188      }
189   } else {
190      nir_ssa_def *mask = nir_imm_int(b, ~0u >> (32 - dst_bits));
191
192      unsigned src_idx = 0;
193      unsigned shift = 0;
194      for (unsigned i = 0; i < dst_components; i++) {
195         dst_chan[i] = nir_iand(b, nir_ushr(b, nir_channel(b, src, src_idx),
196                                               nir_imm_int(b, shift)),
197                                   mask);
198         shift += dst_bits;
199         if (shift >= src_bits) {
200            src_idx++;
201            shift = 0;
202         }
203      }
204   }
205
206   return nir_vec(b, dst_chan, dst_components);
207}
208
209static inline nir_ssa_def *
210_nir_format_norm_factor(nir_builder *b, const unsigned *bits,
211                        unsigned num_components,
212                        bool is_signed)
213{
214   nir_const_value factor[NIR_MAX_VEC_COMPONENTS];
215   memset(factor, 0, sizeof(factor));
216   for (unsigned i = 0; i < num_components; i++) {
217      assert(bits[i] < 32);
218      factor[i].f32 = (1ul << (bits[i] - is_signed)) - 1;
219   }
220   return nir_build_imm(b, num_components, 32, factor);
221}
222
223static inline nir_ssa_def *
224nir_format_unorm_to_float(nir_builder *b, nir_ssa_def *u, const unsigned *bits)
225{
226   nir_ssa_def *factor =
227      _nir_format_norm_factor(b, bits, u->num_components, false);
228
229   return nir_fdiv(b, nir_u2f32(b, u), factor);
230}
231
232static inline nir_ssa_def *
233nir_format_snorm_to_float(nir_builder *b, nir_ssa_def *s, const unsigned *bits)
234{
235   nir_ssa_def *factor =
236      _nir_format_norm_factor(b, bits, s->num_components, true);
237
238   return nir_fmax(b, nir_fdiv(b, nir_i2f32(b, s), factor),
239                      nir_imm_float(b, -1.0f));
240}
241
242static inline nir_ssa_def *
243nir_format_float_to_unorm(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
244{
245   nir_ssa_def *factor =
246      _nir_format_norm_factor(b, bits, f->num_components, false);
247
248   /* Clamp to the range [0, 1] */
249   f = nir_fsat(b, f);
250
251   return nir_f2u32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
252}
253
254static inline nir_ssa_def *
255nir_format_float_to_snorm(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
256{
257   nir_ssa_def *factor =
258      _nir_format_norm_factor(b, bits, f->num_components, true);
259
260   /* Clamp to the range [-1, 1] */
261   f = nir_fmin(b, nir_fmax(b, f, nir_imm_float(b, -1)), nir_imm_float(b, 1));
262
263   return nir_f2i32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
264}
265
266/* Converts a vector of floats to a vector of half-floats packed in the low 16
267 * bits.
268 */
269static inline nir_ssa_def *
270nir_format_float_to_half(nir_builder *b, nir_ssa_def *f)
271{
272   nir_ssa_def *zero = nir_imm_float(b, 0);
273   nir_ssa_def *f16comps[4];
274   for (unsigned i = 0; i < f->num_components; i++)
275      f16comps[i] = nir_pack_half_2x16_split(b, nir_channel(b, f, i), zero);
276   return nir_vec(b, f16comps, f->num_components);
277}
278
279static inline nir_ssa_def *
280nir_format_linear_to_srgb(nir_builder *b, nir_ssa_def *c)
281{
282   nir_ssa_def *linear = nir_fmul(b, c, nir_imm_float(b, 12.92f));
283   nir_ssa_def *curved =
284      nir_fsub(b, nir_fmul(b, nir_imm_float(b, 1.055f),
285                              nir_fpow(b, c, nir_imm_float(b, 1.0 / 2.4))),
286                  nir_imm_float(b, 0.055f));
287
288   return nir_fsat(b, nir_bcsel(b, nir_flt(b, c, nir_imm_float(b, 0.0031308f)),
289                                   linear, curved));
290}
291
292static inline nir_ssa_def *
293nir_format_srgb_to_linear(nir_builder *b, nir_ssa_def *c)
294{
295   nir_ssa_def *linear = nir_fdiv(b, c, nir_imm_float(b, 12.92f));
296   nir_ssa_def *curved =
297      nir_fpow(b, nir_fdiv(b, nir_fadd(b, c, nir_imm_float(b, 0.055f)),
298                              nir_imm_float(b, 1.055f)),
299                  nir_imm_float(b, 2.4f));
300
301   return nir_fsat(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 0.04045f), c),
302                                   linear, curved));
303}
304
305/* Clamps a vector of uints so they don't extend beyond the given number of
306 * bits per channel.
307 */
308static inline nir_ssa_def *
309nir_format_clamp_uint(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
310{
311   if (bits[0] == 32)
312      return f;
313
314   nir_const_value max[NIR_MAX_VEC_COMPONENTS];
315   memset(max, 0, sizeof(max));
316   for (unsigned i = 0; i < f->num_components; i++) {
317      assert(bits[i] < 32);
318      max[i].u32 = (1 << bits[i]) - 1;
319   }
320   return nir_umin(b, f, nir_build_imm(b, f->num_components, 32, max));
321}
322
323/* Clamps a vector of sints so they don't extend beyond the given number of
324 * bits per channel.
325 */
326static inline nir_ssa_def *
327nir_format_clamp_sint(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
328{
329   if (bits[0] == 32)
330      return f;
331
332   nir_const_value min[NIR_MAX_VEC_COMPONENTS], max[NIR_MAX_VEC_COMPONENTS];
333   memset(min, 0, sizeof(min));
334   memset(max, 0, sizeof(max));
335   for (unsigned i = 0; i < f->num_components; i++) {
336      assert(bits[i] < 32);
337      max[i].i32 = (1 << (bits[i] - 1)) - 1;
338      min[i].i32 = -(1 << (bits[i] - 1));
339   }
340   f = nir_imin(b, f, nir_build_imm(b, f->num_components, 32, max));
341   f = nir_imax(b, f, nir_build_imm(b, f->num_components, 32, min));
342
343   return f;
344}
345
346static inline nir_ssa_def *
347nir_format_unpack_11f11f10f(nir_builder *b, nir_ssa_def *packed)
348{
349   nir_ssa_def *chans[3];
350   chans[0] = nir_mask_shift(b, packed, 0x000007ff, 4);
351   chans[1] = nir_mask_shift(b, packed, 0x003ff800, -7);
352   chans[2] = nir_mask_shift(b, packed, 0xffc00000, -17);
353
354   for (unsigned i = 0; i < 3; i++)
355      chans[i] = nir_unpack_half_2x16_split_x(b, chans[i]);
356
357   return nir_vec(b, chans, 3);
358}
359
360static inline nir_ssa_def *
361nir_format_pack_11f11f10f(nir_builder *b, nir_ssa_def *color)
362{
363   /* 10 and 11-bit floats are unsigned.  Clamp to non-negative */
364   nir_ssa_def *clamped = nir_fmax(b, color, nir_imm_float(b, 0));
365
366   nir_ssa_def *undef = nir_ssa_undef(b, 1, color->bit_size);
367   nir_ssa_def *p1 = nir_pack_half_2x16_split(b, nir_channel(b, clamped, 0),
368                                                 nir_channel(b, clamped, 1));
369   nir_ssa_def *p2 = nir_pack_half_2x16_split(b, nir_channel(b, clamped, 2),
370                                                 undef);
371
372   /* A 10 or 11-bit float has the same exponent as a 16-bit float but with
373    * fewer mantissa bits and no sign bit.  All we have to do is throw away
374    * the sign bit and the bottom mantissa bits and shift it into place.
375    */
376   nir_ssa_def *packed = nir_imm_int(b, 0);
377   packed = nir_mask_shift_or(b, packed, p1, 0x00007ff0, -4);
378   packed = nir_mask_shift_or(b, packed, p1, 0x7ff00000, -9);
379   packed = nir_mask_shift_or(b, packed, p2, 0x00007fe0, 17);
380
381   return packed;
382}
383
384static inline nir_ssa_def *
385nir_format_pack_r9g9b9e5(nir_builder *b, nir_ssa_def *color)
386{
387   /* See also float3_to_rgb9e5 */
388
389   /* First, we need to clamp it to range. */
390   nir_ssa_def *clamped = nir_fmin(b, color, nir_imm_float(b, MAX_RGB9E5));
391
392   /* Get rid of negatives and NaN */
393   clamped = nir_bcsel(b, nir_ult(b, nir_imm_int(b, 0x7f800000), color),
394                          nir_imm_float(b, 0), clamped);
395
396   /* maxrgb.u = MAX3(rc.u, gc.u, bc.u); */
397   nir_ssa_def *maxu = nir_umax(b, nir_channel(b, clamped, 0),
398                       nir_umax(b, nir_channel(b, clamped, 1),
399                                   nir_channel(b, clamped, 2)));
400
401   /* maxrgb.u += maxrgb.u & (1 << (23-9)); */
402   maxu = nir_iadd(b, maxu, nir_iand(b, maxu, nir_imm_int(b, 1 << 14)));
403
404   /* exp_shared = MAX2((maxrgb.u >> 23), -RGB9E5_EXP_BIAS - 1 + 127) +
405    *              1 + RGB9E5_EXP_BIAS - 127;
406    */
407   nir_ssa_def *exp_shared =
408      nir_iadd(b, nir_umax(b, nir_ushr(b, maxu, nir_imm_int(b, 23)),
409                              nir_imm_int(b, -RGB9E5_EXP_BIAS - 1 + 127)),
410                  nir_imm_int(b, 1 + RGB9E5_EXP_BIAS - 127));
411
412   /* revdenom_biasedexp = 127 - (exp_shared - RGB9E5_EXP_BIAS -
413    *                             RGB9E5_MANTISSA_BITS) + 1;
414    */
415   nir_ssa_def *revdenom_biasedexp =
416      nir_isub(b, nir_imm_int(b, 127 + RGB9E5_EXP_BIAS +
417                                 RGB9E5_MANTISSA_BITS + 1),
418                  exp_shared);
419
420   /* revdenom.u = revdenom_biasedexp << 23; */
421   nir_ssa_def *revdenom =
422      nir_ishl(b, revdenom_biasedexp, nir_imm_int(b, 23));
423
424   /* rm = (int) (rc.f * revdenom.f);
425    * gm = (int) (gc.f * revdenom.f);
426    * bm = (int) (bc.f * revdenom.f);
427    */
428   nir_ssa_def *mantissa =
429      nir_f2i32(b, nir_fmul(b, clamped, revdenom));
430
431   /* rm = (rm & 1) + (rm >> 1);
432    * gm = (gm & 1) + (gm >> 1);
433    * bm = (bm & 1) + (bm >> 1);
434    */
435   mantissa = nir_iadd(b, nir_iand(b, mantissa, nir_imm_int(b, 1)),
436                          nir_ushr(b, mantissa, nir_imm_int(b, 1)));
437
438   nir_ssa_def *packed = nir_channel(b, mantissa, 0);
439   packed = nir_mask_shift_or(b, packed, nir_channel(b, mantissa, 1), ~0, 9);
440   packed = nir_mask_shift_or(b, packed, nir_channel(b, mantissa, 2), ~0, 18);
441   packed = nir_mask_shift_or(b, packed, exp_shared, ~0, 27);
442
443   return packed;
444}
445