1/**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29#include "pipe/p_config.h"
30
31#include "util/u_math.h"
32#include "util/u_cpu_detect.h"
33#include "util/u_pack_color.h"
34#include "util/u_surface.h"
35#include "util/u_sse.h"
36
37#include "lp_jit.h"
38#include "lp_rast.h"
39#include "lp_debug.h"
40#include "lp_state_fs.h"
41#include "lp_linear_priv.h"
42
43
44#if defined(PIPE_ARCH_SSE)
45
46
47/* This file contains various special-case fastpaths which implement
48 * the entire linear pipeline in a single funciton.
49 *
50 * These include simple blits and some debug code.
51 *
52 * These functions fully implement the linear path and do not need to
53 * be combined with blending, interpolation or sampling routines.
54 */
55
56/* Linear shader which implements the BLIT_RGBA shader with the
57 * additional constraints imposed by lp_setup_is_blit().
58 */
59static boolean
60lp_linear_blit_rgba_blit(const struct lp_rast_state *state,
61               unsigned x, unsigned y,
62               unsigned width, unsigned height,
63               const float (*a0)[4],
64               const float (*dadx)[4],
65               const float (*dady)[4],
66               uint8_t *color,
67               unsigned stride)
68{
69   const struct lp_jit_context *context = &state->jit_context;
70   const struct lp_jit_texture *texture = &context->textures[0];
71   const uint8_t *src;
72   unsigned src_stride;
73   int src_x, src_y;
74
75   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
76
77   /* Require w==1.0:
78    */
79   if (a0[0][3] != 1.0 ||
80       dadx[0][3] != 0.0 ||
81       dady[0][3] != 0.0)
82      return FALSE;
83
84   src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
85   src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
86
87   src = texture->base;
88   src_stride = texture->row_stride[0];
89
90   /* Fall back to blit_rgba() if clamping required:
91    */
92   if (src_x < 0 ||
93       src_y < 0 ||
94       src_x + width > texture->width ||
95       src_y + height > texture->height)
96      return FALSE;
97
98   util_copy_rect(color, PIPE_FORMAT_B8G8R8A8_UNORM, stride,
99                  x, y,
100                  width, height,
101                  src, src_stride,
102                  src_x, src_y);
103
104   return TRUE;
105}
106
107
108/* Linear shader which implements the BLIT_RGB1 shader, with the
109 * additional constraints imposed by lp_setup_is_blit().
110 */
111static boolean
112lp_linear_blit_rgb1_blit(const struct lp_rast_state *state,
113               unsigned x, unsigned y,
114               unsigned width, unsigned height,
115               const float (*a0)[4],
116               const float (*dadx)[4],
117               const float (*dady)[4],
118               uint8_t *color,
119               unsigned stride)
120{
121   const struct lp_jit_context *context = &state->jit_context;
122   const struct lp_jit_texture *texture = &context->textures[0];
123   const uint8_t *src;
124   unsigned src_stride;
125   int src_x, src_y;
126
127   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
128
129   /* Require w==1.0:
130    */
131   if (a0[0][3] != 1.0 ||
132       dadx[0][3] != 0.0 ||
133       dady[0][3] != 0.0)
134      return FALSE;
135
136   color += x * 4 + y * stride;
137
138   src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
139   src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
140
141   src = texture->base;
142   src_stride = texture->row_stride[0];
143   src += src_x * 4;
144   src += src_y * src_stride;
145
146   if (src_x < 0 ||
147       src_y < 0 ||
148       src_x + width > texture->width ||
149       src_y + height > texture->height)
150      return FALSE;
151
152   for (y = 0; y < height; y++) {
153      const uint32_t *src_row = (const uint32_t *)src;
154      uint32_t *dst_row = (uint32_t *)color;
155
156      for (x = 0; x < width; x++) {
157         *dst_row++ = *src_row++ | 0xff000000;
158      }
159
160      color += stride;
161      src += src_stride;
162   }
163
164   return TRUE;
165}
166
167/* Linear shader which always emits purple.  Used for debugging.
168 */
169static boolean
170lp_linear_purple(const struct lp_rast_state *state,
171              unsigned x, unsigned y,
172              unsigned width, unsigned height,
173              const float (*a0)[4],
174              const float (*dadx)[4],
175              const float (*dady)[4],
176              uint8_t *color,
177              unsigned stride)
178{
179   union util_color uc;
180
181   util_pack_color_ub(0xff, 0, 0xff, 0xff,
182                      PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
183
184   util_fill_rect(color,
185                  PIPE_FORMAT_B8G8R8A8_UNORM,
186                  stride,
187                  x,
188                  y,
189                  width,
190                  height,
191                  &uc);
192
193   return TRUE;
194}
195
196/* Examine the fragment shader varient and determine whether we can
197 * substitute a fastpath linear shader implementation.
198 */
199boolean
200lp_linear_check_fastpath(struct lp_fragment_shader_variant *variant)
201{
202   struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(&variant->key, 0);
203
204   if (!samp0)
205      return false;
206
207   enum pipe_format tex_format = samp0->texture_state.format;
208   if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA &&
209       tex_format == PIPE_FORMAT_B8G8R8A8_UNORM &&
210       is_nearest_clamp_sampler(samp0) &&
211       variant->opaque) {
212      variant->jit_linear_blit             = lp_linear_blit_rgba_blit;
213   }
214
215   if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 &&
216       variant->opaque &&
217       (tex_format == PIPE_FORMAT_B8G8R8A8_UNORM ||
218        tex_format == PIPE_FORMAT_B8G8R8X8_UNORM) &&
219       is_nearest_clamp_sampler(samp0)) {
220      variant->jit_linear_blit             = lp_linear_blit_rgb1_blit;
221   }
222
223   if (0) {
224      variant->jit_linear                   = lp_linear_purple;
225   }
226
227
228   /* Stop now if jit_linear has been initialized.  Otherwise keep
229    * searching - even if jit_linear_blit has been instantiated.
230    */
231   return variant->jit_linear != NULL;
232}
233#else
234boolean
235lp_linear_check_fastpath(struct lp_fragment_shader_variant *variant)
236{
237   return FALSE;
238}
239#endif
240
241