17ec681f3Smrg/**************************************************************************
27ec681f3Smrg *
37ec681f3Smrg * Copyright 2010-2021 VMware, Inc.
47ec681f3Smrg * All Rights Reserved.
57ec681f3Smrg *
67ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
77ec681f3Smrg * copy of this software and associated documentation files (the
87ec681f3Smrg * "Software"), to deal in the Software without restriction, including
97ec681f3Smrg * without limitation the rights to use, copy, modify, merge, publish,
107ec681f3Smrg * distribute, sub license, and/or sell copies of the Software, and to
117ec681f3Smrg * permit persons to whom the Software is furnished to do so, subject to
127ec681f3Smrg * the following conditions:
137ec681f3Smrg *
147ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
157ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
167ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
177ec681f3Smrg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
187ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
197ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
207ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
217ec681f3Smrg *
227ec681f3Smrg * The above copyright notice and this permission notice (including the
237ec681f3Smrg * next paragraph) shall be included in all copies or substantial portions
247ec681f3Smrg * of the Software.
257ec681f3Smrg *
267ec681f3Smrg **************************************************************************/
277ec681f3Smrg
287ec681f3Smrg
297ec681f3Smrg#include "pipe/p_config.h"
307ec681f3Smrg
317ec681f3Smrg#include "util/u_math.h"
327ec681f3Smrg#include "util/u_cpu_detect.h"
337ec681f3Smrg#include "util/u_pack_color.h"
347ec681f3Smrg#include "util/u_rect.h"
357ec681f3Smrg#include "util/u_sse.h"
367ec681f3Smrg
377ec681f3Smrg#include "lp_jit.h"
387ec681f3Smrg#include "lp_rast.h"
397ec681f3Smrg#include "lp_debug.h"
407ec681f3Smrg#include "lp_state_fs.h"
417ec681f3Smrg#include "lp_linear_priv.h"
427ec681f3Smrg
437ec681f3Smrg
447ec681f3Smrg#if defined(PIPE_ARCH_SSE)
457ec681f3Smrg
467ec681f3Smrg
477ec681f3Smrg/* For debugging (LP_DEBUG=linear), shade areas of run-time fallback
487ec681f3Smrg * purple.  Keep blending active so we can see more of what's going
497ec681f3Smrg * on.
507ec681f3Smrg */
517ec681f3Smrgstatic boolean
527ec681f3Smrglinear_fallback(const struct lp_rast_state *state,
537ec681f3Smrg                unsigned x, unsigned y,
547ec681f3Smrg                unsigned width, unsigned height,
557ec681f3Smrg                uint8_t *color,
567ec681f3Smrg                unsigned stride)
577ec681f3Smrg{
587ec681f3Smrg   unsigned col = 0x808000ff;
597ec681f3Smrg   int i;
607ec681f3Smrg
617ec681f3Smrg   for (y = 0; y < height; y++) {
627ec681f3Smrg      for (i = 0; i < 64; i++) {
637ec681f3Smrg         *((uint32_t *)(color + y*stride) + x + i) = col;
647ec681f3Smrg      }
657ec681f3Smrg   }
667ec681f3Smrg
677ec681f3Smrg   return TRUE;
687ec681f3Smrg}
697ec681f3Smrg
707ec681f3Smrg
717ec681f3Smrg/* Run our configurable linear shader pipeline:
727ec681f3Smrg */
737ec681f3Smrgstatic boolean
747ec681f3Smrglp_fs_linear_run(const struct lp_rast_state *state,
757ec681f3Smrg                 unsigned x, unsigned y,
767ec681f3Smrg                 unsigned width, unsigned height,
777ec681f3Smrg                 const float (*a0)[4],
787ec681f3Smrg                 const float (*dadx)[4],
797ec681f3Smrg                 const float (*dady)[4],
807ec681f3Smrg                 uint8_t *color,
817ec681f3Smrg                 unsigned stride)
827ec681f3Smrg{
837ec681f3Smrg   const struct lp_fragment_shader_variant *variant = state->variant;
847ec681f3Smrg   const struct lp_tgsi_info *info = &variant->shader->info;
857ec681f3Smrg   struct lp_jit_linear_context jit;
867ec681f3Smrg   lp_jit_linear_llvm_func jit_func = variant->jit_linear_llvm;
877ec681f3Smrg
887ec681f3Smrg   struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
897ec681f3Smrg   struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
907ec681f3Smrg   uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
917ec681f3Smrg
927ec681f3Smrg   const float w0 = a0[0][3];
937ec681f3Smrg   float oow = 1.0f/w0;
947ec681f3Smrg
957ec681f3Smrg   unsigned input_mask = variant->linear_input_mask;
967ec681f3Smrg   int nr_consts = info->base.file_max[TGSI_FILE_CONSTANT]+1;
977ec681f3Smrg   int nr_tex = info->num_texs;
987ec681f3Smrg   int i, j;
997ec681f3Smrg
1007ec681f3Smrg   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
1017ec681f3Smrg
1027ec681f3Smrg   /* Require constant w in these rectangles:
1037ec681f3Smrg    */
1047ec681f3Smrg   if (dadx[0][3] != 0.0f ||
1057ec681f3Smrg       dady[0][3] != 0.0f) {
1067ec681f3Smrg      if (LP_DEBUG & DEBUG_LINEAR2)
1077ec681f3Smrg         debug_printf("  -- w not constant\n");
1087ec681f3Smrg      goto fail;
1097ec681f3Smrg   }
1107ec681f3Smrg
1117ec681f3Smrg   /* XXX: Per statechange:
1127ec681f3Smrg    */
1137ec681f3Smrg   for (i = 0; i < nr_consts; i++) {
1147ec681f3Smrg      for (j = 0; j < 4; j++) {
1157ec681f3Smrg         float val = state->jit_context.constants[0][i*4+j];
1167ec681f3Smrg         if (val < 0.0f || val > 1.0f) {
1177ec681f3Smrg            if (LP_DEBUG & DEBUG_LINEAR2)
1187ec681f3Smrg               debug_printf("  -- const[%d] out of range\n", i);
1197ec681f3Smrg            goto fail;
1207ec681f3Smrg         }
1217ec681f3Smrg         constants[i][j] = (uint8_t)(val * 255.0f);
1227ec681f3Smrg      }
1237ec681f3Smrg   }
1247ec681f3Smrg   jit.constants = (const uint8_t (*)[4])constants;
1257ec681f3Smrg
1267ec681f3Smrg   /* We assume BGRA ordering */
1277ec681f3Smrg   assert(variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM ||
1287ec681f3Smrg          variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8A8_UNORM);
1297ec681f3Smrg
1307ec681f3Smrg   jit.blend_color =
1317ec681f3Smrg         state->jit_context.u8_blend_color[32] +
1327ec681f3Smrg         (state->jit_context.u8_blend_color[16] << 8) +
1337ec681f3Smrg         (state->jit_context.u8_blend_color[0] << 16) +
1347ec681f3Smrg         (state->jit_context.u8_blend_color[48] << 24);
1357ec681f3Smrg
1367ec681f3Smrg   jit.alpha_ref_value = float_to_ubyte(state->jit_context.alpha_ref_value);
1377ec681f3Smrg
1387ec681f3Smrg   /* XXX: Per primitive:
1397ec681f3Smrg    */
1407ec681f3Smrg   while (input_mask) {
1417ec681f3Smrg      int i = u_bit_scan(&input_mask);
1427ec681f3Smrg      unsigned usage_mask = info->base.input_usage_mask[i];
1437ec681f3Smrg      boolean perspective =
1447ec681f3Smrg            info->base.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE ||
1457ec681f3Smrg            (info->base.input_interpolate[i] == TGSI_INTERPOLATE_COLOR &&
1467ec681f3Smrg             !variant->key.flatshade);
1477ec681f3Smrg
1487ec681f3Smrg      if (!lp_linear_init_interp(&interp[i],
1497ec681f3Smrg                                 x, y, width, height,
1507ec681f3Smrg                                 usage_mask,
1517ec681f3Smrg                                 perspective,
1527ec681f3Smrg                                 oow,
1537ec681f3Smrg                                 a0[i+1],
1547ec681f3Smrg                                 dadx[i+1],
1557ec681f3Smrg                                 dady[i+1])) {
1567ec681f3Smrg         if (LP_DEBUG & DEBUG_LINEAR2)
1577ec681f3Smrg            debug_printf("  -- init_interp(%d) failed\n", i);
1587ec681f3Smrg         goto fail;
1597ec681f3Smrg      }
1607ec681f3Smrg
1617ec681f3Smrg      jit.inputs[i] = &interp[i].base;
1627ec681f3Smrg   }
1637ec681f3Smrg
1647ec681f3Smrg
1657ec681f3Smrg   /* XXX: Per primitive: Initialize linear or nearest samplers:
1667ec681f3Smrg    */
1677ec681f3Smrg   for (i = 0; i < nr_tex; i++) {
1687ec681f3Smrg      const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
1697ec681f3Smrg      unsigned unit = tex_info->sampler_unit;
1707ec681f3Smrg
1717ec681f3Smrg      /* XXX: some texture coordinates are linear!
1727ec681f3Smrg       */
1737ec681f3Smrg      //boolean perspective = (info->base.input_interpolate[i] ==
1747ec681f3Smrg      //                       TGSI_INTERPOLATE_PERSPECTIVE);
1757ec681f3Smrg
1767ec681f3Smrg      if (!lp_linear_init_sampler(&samp[i],
1777ec681f3Smrg                                  tex_info,
1787ec681f3Smrg                                  lp_fs_variant_key_sampler_idx(&variant->key, unit),
1797ec681f3Smrg                                  &state->jit_context.textures[unit],
1807ec681f3Smrg                                  x, y, width, height,
1817ec681f3Smrg                                  a0, dadx, dady)) {
1827ec681f3Smrg         if (LP_DEBUG & DEBUG_LINEAR2)
1837ec681f3Smrg            debug_printf("  -- init_sampler(%d) failed\n", i);
1847ec681f3Smrg         goto fail;
1857ec681f3Smrg      }
1867ec681f3Smrg
1877ec681f3Smrg      jit.tex[i] = &samp[i].base;
1887ec681f3Smrg   }
1897ec681f3Smrg
1907ec681f3Smrg   /* JIT function already does blending */
1917ec681f3Smrg   jit.color0 = color + x * 4 + y * stride;
1927ec681f3Smrg   for (y = 0; y < height; y++) {
1937ec681f3Smrg      jit_func(&jit, 0, 0, width);
1947ec681f3Smrg      jit.color0 += stride;
1957ec681f3Smrg   }
1967ec681f3Smrg
1977ec681f3Smrg   return TRUE;
1987ec681f3Smrg
1997ec681f3Smrgfail:
2007ec681f3Smrg   /* Visually distinguish this from other fallbacks:
2017ec681f3Smrg    */
2027ec681f3Smrg   if (LP_DEBUG & DEBUG_LINEAR) {
2037ec681f3Smrg      return linear_fallback(state, x, y, width, height, color, stride);
2047ec681f3Smrg   }
2057ec681f3Smrg
2067ec681f3Smrg   return FALSE;
2077ec681f3Smrg}
2087ec681f3Smrg
2097ec681f3Smrg
2107ec681f3Smrgstatic void
2117ec681f3Smrgcheck_linear_interp_mask_a(struct lp_fragment_shader_variant *variant)
2127ec681f3Smrg{
2137ec681f3Smrg   const struct lp_tgsi_info *info = &variant->shader->info;
2147ec681f3Smrg   struct lp_jit_linear_context jit;
2157ec681f3Smrg
2167ec681f3Smrg   struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
2177ec681f3Smrg   struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
2187ec681f3Smrg   uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
2197ec681f3Smrg   PIPE_ALIGN_VAR(16) uint8_t color0[TILE_SIZE*4];
2207ec681f3Smrg
2217ec681f3Smrg   int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1;
2227ec681f3Smrg   int nr_tex = info->num_texs;
2237ec681f3Smrg   int i;
2247ec681f3Smrg
2257ec681f3Smrg   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
2267ec681f3Smrg
2277ec681f3Smrg   jit.constants = (const uint8_t (*)[4])constants;
2287ec681f3Smrg
2297ec681f3Smrg   for (i = 0; i < nr_tex; i++) {
2307ec681f3Smrg      lp_linear_init_noop_sampler(&samp[i]);
2317ec681f3Smrg      jit.tex[i] = &samp[i].base;
2327ec681f3Smrg   }
2337ec681f3Smrg
2347ec681f3Smrg   for (i = 0; i < nr_inputs; i++) {
2357ec681f3Smrg      lp_linear_init_noop_interp(&interp[i]);
2367ec681f3Smrg      jit.inputs[i] = &interp[i].base;
2377ec681f3Smrg   }
2387ec681f3Smrg
2397ec681f3Smrg   jit.color0 = color0;
2407ec681f3Smrg
2417ec681f3Smrg   (void)variant->jit_linear_llvm(&jit, 0, 0, 0);
2427ec681f3Smrg
2437ec681f3Smrg   /* Find out which interpolators were called, and store this as a
2447ec681f3Smrg    * mask:
2457ec681f3Smrg    */
2467ec681f3Smrg   for (i = 0; i < nr_inputs; i++)
2477ec681f3Smrg      variant->linear_input_mask |= (interp[i].row[0] << i);
2487ec681f3Smrg}
2497ec681f3Smrg
2507ec681f3Smrg
2517ec681f3Smrg/* Until the above is working, look at texture information and guess
2527ec681f3Smrg * that any input used as a texture coordinate is not used for
2537ec681f3Smrg * anything else.
2547ec681f3Smrg */
2557ec681f3Smrgstatic void
2567ec681f3Smrgcheck_linear_interp_mask_b(struct lp_fragment_shader_variant *variant)
2577ec681f3Smrg{
2587ec681f3Smrg   const struct lp_tgsi_info *info = &variant->shader->info;
2597ec681f3Smrg   int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1;
2607ec681f3Smrg   int nr_tex = info->num_texs;
2617ec681f3Smrg   unsigned tex_mask = 0;
2627ec681f3Smrg   int i;
2637ec681f3Smrg
2647ec681f3Smrg   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
2657ec681f3Smrg
2667ec681f3Smrg   for (i = 0; i < nr_tex; i++) {
2677ec681f3Smrg      const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
2687ec681f3Smrg      const struct lp_tgsi_channel_info *schan = &tex_info->coord[0];
2697ec681f3Smrg      const struct lp_tgsi_channel_info *tchan = &tex_info->coord[1];
2707ec681f3Smrg      tex_mask |= 1 << schan->u.index;
2717ec681f3Smrg      tex_mask |= 1 << tchan->u.index;
2727ec681f3Smrg   }
2737ec681f3Smrg
2747ec681f3Smrg   variant->linear_input_mask = ((1 << nr_inputs) - 1) & ~tex_mask;
2757ec681f3Smrg}
2767ec681f3Smrg
2777ec681f3Smrg
2787ec681f3Smrgvoid
2797ec681f3Smrglp_linear_check_variant(struct lp_fragment_shader_variant *variant)
2807ec681f3Smrg{
2817ec681f3Smrg   const struct lp_fragment_shader_variant_key *key = &variant->key;
2827ec681f3Smrg   const struct lp_fragment_shader *shader = variant->shader;
2837ec681f3Smrg   const struct lp_tgsi_info *info = &shader->info;
2847ec681f3Smrg   int i;
2857ec681f3Smrg
2867ec681f3Smrg   if (info->base.file_max[TGSI_FILE_CONSTANT] >= LP_MAX_LINEAR_CONSTANTS ||
2877ec681f3Smrg       info->base.file_max[TGSI_FILE_INPUT] >= LP_MAX_LINEAR_INPUTS) {
2887ec681f3Smrg      if (LP_DEBUG & DEBUG_LINEAR)
2897ec681f3Smrg         debug_printf("  -- too many inputs/constants\n");
2907ec681f3Smrg      goto fail;
2917ec681f3Smrg   }
2927ec681f3Smrg
2937ec681f3Smrg   /* If we have a fastpath which implements the entire varient, use
2947ec681f3Smrg    * that.
2957ec681f3Smrg    */
2967ec681f3Smrg   if (lp_linear_check_fastpath(variant)) {
2977ec681f3Smrg      return;
2987ec681f3Smrg   }
2997ec681f3Smrg
3007ec681f3Smrg   /* Otherwise, can we build up a spanline-based linear path for this
3017ec681f3Smrg    * variant?
3027ec681f3Smrg    */
3037ec681f3Smrg
3047ec681f3Smrg   /* Check static sampler state.
3057ec681f3Smrg    */
3067ec681f3Smrg   for (i = 0; i < info->num_texs; i++) {
3077ec681f3Smrg      const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
3087ec681f3Smrg      unsigned unit = tex_info->sampler_unit;
3097ec681f3Smrg
3107ec681f3Smrg      /* XXX: Relax this once setup premultiplies by oow:
3117ec681f3Smrg       */
3127ec681f3Smrg      if (info->base.input_interpolate[unit] != TGSI_INTERPOLATE_PERSPECTIVE) {
3137ec681f3Smrg         if (LP_DEBUG & DEBUG_LINEAR)
3147ec681f3Smrg            debug_printf(" -- samp[%d]: texcoord not perspective\n", i);
3157ec681f3Smrg         goto fail;
3167ec681f3Smrg      }
3177ec681f3Smrg
3187ec681f3Smrg      struct lp_sampler_static_state *samp = lp_fs_variant_key_sampler_idx(key, unit);
3197ec681f3Smrg      if (!lp_linear_check_sampler(samp, tex_info)) {
3207ec681f3Smrg         if (LP_DEBUG & DEBUG_LINEAR)
3217ec681f3Smrg            debug_printf(" -- samp[%d]: check_sampler failed\n", i);
3227ec681f3Smrg         goto fail;
3237ec681f3Smrg      }
3247ec681f3Smrg   }
3257ec681f3Smrg
3267ec681f3Smrg   /* Check shader.  May not have been jitted.
3277ec681f3Smrg    */
3287ec681f3Smrg   if (variant->linear_function == NULL) {
3297ec681f3Smrg      if (LP_DEBUG & DEBUG_LINEAR)
3307ec681f3Smrg         debug_printf("  -- no linear shader\n");
3317ec681f3Smrg      goto fail;
3327ec681f3Smrg   }
3337ec681f3Smrg
3347ec681f3Smrg   /* Hook in the catchall shader runner:
3357ec681f3Smrg    */
3367ec681f3Smrg   variant->jit_linear = lp_fs_linear_run;
3377ec681f3Smrg
3387ec681f3Smrg   /* Figure out which inputs we don't need to interpolate (because
3397ec681f3Smrg    * they are only used as texture coordinates).  This is important
3407ec681f3Smrg    * as we can cope with texture coordinates which exceed 1.0, but
3417ec681f3Smrg    * cannot do so for regular inputs.
3427ec681f3Smrg    */
3437ec681f3Smrg   if (1)
3447ec681f3Smrg      check_linear_interp_mask_a(variant);
3457ec681f3Smrg   else
3467ec681f3Smrg      check_linear_interp_mask_b(variant);
3477ec681f3Smrg
3487ec681f3Smrg
3497ec681f3Smrg   if (0) {
3507ec681f3Smrg      lp_debug_fs_variant(variant);
3517ec681f3Smrg      debug_printf("linear input mask: 0x%x\n", variant->linear_input_mask);
3527ec681f3Smrg   }
3537ec681f3Smrg
3547ec681f3Smrg   return;
3557ec681f3Smrg
3567ec681f3Smrgfail:
3577ec681f3Smrg   if (LP_DEBUG & DEBUG_LINEAR) {
3587ec681f3Smrg      lp_debug_fs_variant(variant);
3597ec681f3Smrg      debug_printf("    ----> no linear path for this variant\n");
3607ec681f3Smrg   }
3617ec681f3Smrg}
3627ec681f3Smrg
3637ec681f3Smrg
3647ec681f3Smrg#else
3657ec681f3Smrgvoid
3667ec681f3Smrglp_linear_check_variant(struct lp_fragment_shader_variant *variant)
3677ec681f3Smrg{
3687ec681f3Smrg}
3697ec681f3Smrg#endif
370