1/************************************************************************** 2 * 3 * Copyright 2010-2021 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 * 26 **************************************************************************/ 27 28 29#include "pipe/p_config.h" 30 31#include "util/u_math.h" 32#include "util/u_cpu_detect.h" 33#include "util/u_pack_color.h" 34#include "util/u_rect.h" 35#include "util/u_sse.h" 36 37#include "lp_jit.h" 38#include "lp_rast.h" 39#include "lp_debug.h" 40#include "lp_state_fs.h" 41#include "lp_linear_priv.h" 42 43 44#if defined(PIPE_ARCH_SSE) 45 46 47/* For debugging (LP_DEBUG=linear), shade areas of run-time fallback 48 * purple. Keep blending active so we can see more of what's going 49 * on. 50 */ 51static boolean 52linear_fallback(const struct lp_rast_state *state, 53 unsigned x, unsigned y, 54 unsigned width, unsigned height, 55 uint8_t *color, 56 unsigned stride) 57{ 58 unsigned col = 0x808000ff; 59 int i; 60 61 for (y = 0; y < height; y++) { 62 for (i = 0; i < 64; i++) { 63 *((uint32_t *)(color + y*stride) + x + i) = col; 64 } 65 } 66 67 return TRUE; 68} 69 70 71/* Run our configurable linear shader pipeline: 72 */ 73static boolean 74lp_fs_linear_run(const struct lp_rast_state *state, 75 unsigned x, unsigned y, 76 unsigned width, unsigned height, 77 const float (*a0)[4], 78 const float (*dadx)[4], 79 const float (*dady)[4], 80 uint8_t *color, 81 unsigned stride) 82{ 83 const struct lp_fragment_shader_variant *variant = state->variant; 84 const struct lp_tgsi_info *info = &variant->shader->info; 85 struct lp_jit_linear_context jit; 86 lp_jit_linear_llvm_func jit_func = variant->jit_linear_llvm; 87 88 struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES]; 89 struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS]; 90 uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4]; 91 92 const float w0 = a0[0][3]; 93 float oow = 1.0f/w0; 94 95 unsigned input_mask = variant->linear_input_mask; 96 int nr_consts = info->base.file_max[TGSI_FILE_CONSTANT]+1; 97 int nr_tex = info->num_texs; 98 int i, j; 99 100 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 101 102 /* Require constant w in these rectangles: 103 */ 104 if (dadx[0][3] != 0.0f || 105 dady[0][3] != 0.0f) { 106 if (LP_DEBUG & DEBUG_LINEAR2) 107 debug_printf(" -- w not constant\n"); 108 goto fail; 109 } 110 111 /* XXX: Per statechange: 112 */ 113 for (i = 0; i < nr_consts; i++) { 114 for (j = 0; j < 4; j++) { 115 float val = state->jit_context.constants[0][i*4+j]; 116 if (val < 0.0f || val > 1.0f) { 117 if (LP_DEBUG & DEBUG_LINEAR2) 118 debug_printf(" -- const[%d] out of range\n", i); 119 goto fail; 120 } 121 constants[i][j] = (uint8_t)(val * 255.0f); 122 } 123 } 124 jit.constants = (const uint8_t (*)[4])constants; 125 126 /* We assume BGRA ordering */ 127 assert(variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM || 128 variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8A8_UNORM); 129 130 jit.blend_color = 131 state->jit_context.u8_blend_color[32] + 132 (state->jit_context.u8_blend_color[16] << 8) + 133 (state->jit_context.u8_blend_color[0] << 16) + 134 (state->jit_context.u8_blend_color[48] << 24); 135 136 jit.alpha_ref_value = float_to_ubyte(state->jit_context.alpha_ref_value); 137 138 /* XXX: Per primitive: 139 */ 140 while (input_mask) { 141 int i = u_bit_scan(&input_mask); 142 unsigned usage_mask = info->base.input_usage_mask[i]; 143 boolean perspective = 144 info->base.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE || 145 (info->base.input_interpolate[i] == TGSI_INTERPOLATE_COLOR && 146 !variant->key.flatshade); 147 148 if (!lp_linear_init_interp(&interp[i], 149 x, y, width, height, 150 usage_mask, 151 perspective, 152 oow, 153 a0[i+1], 154 dadx[i+1], 155 dady[i+1])) { 156 if (LP_DEBUG & DEBUG_LINEAR2) 157 debug_printf(" -- init_interp(%d) failed\n", i); 158 goto fail; 159 } 160 161 jit.inputs[i] = &interp[i].base; 162 } 163 164 165 /* XXX: Per primitive: Initialize linear or nearest samplers: 166 */ 167 for (i = 0; i < nr_tex; i++) { 168 const struct lp_tgsi_texture_info *tex_info = &info->tex[i]; 169 unsigned unit = tex_info->sampler_unit; 170 171 /* XXX: some texture coordinates are linear! 172 */ 173 //boolean perspective = (info->base.input_interpolate[i] == 174 // TGSI_INTERPOLATE_PERSPECTIVE); 175 176 if (!lp_linear_init_sampler(&samp[i], 177 tex_info, 178 lp_fs_variant_key_sampler_idx(&variant->key, unit), 179 &state->jit_context.textures[unit], 180 x, y, width, height, 181 a0, dadx, dady)) { 182 if (LP_DEBUG & DEBUG_LINEAR2) 183 debug_printf(" -- init_sampler(%d) failed\n", i); 184 goto fail; 185 } 186 187 jit.tex[i] = &samp[i].base; 188 } 189 190 /* JIT function already does blending */ 191 jit.color0 = color + x * 4 + y * stride; 192 for (y = 0; y < height; y++) { 193 jit_func(&jit, 0, 0, width); 194 jit.color0 += stride; 195 } 196 197 return TRUE; 198 199fail: 200 /* Visually distinguish this from other fallbacks: 201 */ 202 if (LP_DEBUG & DEBUG_LINEAR) { 203 return linear_fallback(state, x, y, width, height, color, stride); 204 } 205 206 return FALSE; 207} 208 209 210static void 211check_linear_interp_mask_a(struct lp_fragment_shader_variant *variant) 212{ 213 const struct lp_tgsi_info *info = &variant->shader->info; 214 struct lp_jit_linear_context jit; 215 216 struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES]; 217 struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS]; 218 uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4]; 219 PIPE_ALIGN_VAR(16) uint8_t color0[TILE_SIZE*4]; 220 221 int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1; 222 int nr_tex = info->num_texs; 223 int i; 224 225 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 226 227 jit.constants = (const uint8_t (*)[4])constants; 228 229 for (i = 0; i < nr_tex; i++) { 230 lp_linear_init_noop_sampler(&samp[i]); 231 jit.tex[i] = &samp[i].base; 232 } 233 234 for (i = 0; i < nr_inputs; i++) { 235 lp_linear_init_noop_interp(&interp[i]); 236 jit.inputs[i] = &interp[i].base; 237 } 238 239 jit.color0 = color0; 240 241 (void)variant->jit_linear_llvm(&jit, 0, 0, 0); 242 243 /* Find out which interpolators were called, and store this as a 244 * mask: 245 */ 246 for (i = 0; i < nr_inputs; i++) 247 variant->linear_input_mask |= (interp[i].row[0] << i); 248} 249 250 251/* Until the above is working, look at texture information and guess 252 * that any input used as a texture coordinate is not used for 253 * anything else. 254 */ 255static void 256check_linear_interp_mask_b(struct lp_fragment_shader_variant *variant) 257{ 258 const struct lp_tgsi_info *info = &variant->shader->info; 259 int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1; 260 int nr_tex = info->num_texs; 261 unsigned tex_mask = 0; 262 int i; 263 264 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 265 266 for (i = 0; i < nr_tex; i++) { 267 const struct lp_tgsi_texture_info *tex_info = &info->tex[i]; 268 const struct lp_tgsi_channel_info *schan = &tex_info->coord[0]; 269 const struct lp_tgsi_channel_info *tchan = &tex_info->coord[1]; 270 tex_mask |= 1 << schan->u.index; 271 tex_mask |= 1 << tchan->u.index; 272 } 273 274 variant->linear_input_mask = ((1 << nr_inputs) - 1) & ~tex_mask; 275} 276 277 278void 279lp_linear_check_variant(struct lp_fragment_shader_variant *variant) 280{ 281 const struct lp_fragment_shader_variant_key *key = &variant->key; 282 const struct lp_fragment_shader *shader = variant->shader; 283 const struct lp_tgsi_info *info = &shader->info; 284 int i; 285 286 if (info->base.file_max[TGSI_FILE_CONSTANT] >= LP_MAX_LINEAR_CONSTANTS || 287 info->base.file_max[TGSI_FILE_INPUT] >= LP_MAX_LINEAR_INPUTS) { 288 if (LP_DEBUG & DEBUG_LINEAR) 289 debug_printf(" -- too many inputs/constants\n"); 290 goto fail; 291 } 292 293 /* If we have a fastpath which implements the entire varient, use 294 * that. 295 */ 296 if (lp_linear_check_fastpath(variant)) { 297 return; 298 } 299 300 /* Otherwise, can we build up a spanline-based linear path for this 301 * variant? 302 */ 303 304 /* Check static sampler state. 305 */ 306 for (i = 0; i < info->num_texs; i++) { 307 const struct lp_tgsi_texture_info *tex_info = &info->tex[i]; 308 unsigned unit = tex_info->sampler_unit; 309 310 /* XXX: Relax this once setup premultiplies by oow: 311 */ 312 if (info->base.input_interpolate[unit] != TGSI_INTERPOLATE_PERSPECTIVE) { 313 if (LP_DEBUG & DEBUG_LINEAR) 314 debug_printf(" -- samp[%d]: texcoord not perspective\n", i); 315 goto fail; 316 } 317 318 struct lp_sampler_static_state *samp = lp_fs_variant_key_sampler_idx(key, unit); 319 if (!lp_linear_check_sampler(samp, tex_info)) { 320 if (LP_DEBUG & DEBUG_LINEAR) 321 debug_printf(" -- samp[%d]: check_sampler failed\n", i); 322 goto fail; 323 } 324 } 325 326 /* Check shader. May not have been jitted. 327 */ 328 if (variant->linear_function == NULL) { 329 if (LP_DEBUG & DEBUG_LINEAR) 330 debug_printf(" -- no linear shader\n"); 331 goto fail; 332 } 333 334 /* Hook in the catchall shader runner: 335 */ 336 variant->jit_linear = lp_fs_linear_run; 337 338 /* Figure out which inputs we don't need to interpolate (because 339 * they are only used as texture coordinates). This is important 340 * as we can cope with texture coordinates which exceed 1.0, but 341 * cannot do so for regular inputs. 342 */ 343 if (1) 344 check_linear_interp_mask_a(variant); 345 else 346 check_linear_interp_mask_b(variant); 347 348 349 if (0) { 350 lp_debug_fs_variant(variant); 351 debug_printf("linear input mask: 0x%x\n", variant->linear_input_mask); 352 } 353 354 return; 355 356fail: 357 if (LP_DEBUG & DEBUG_LINEAR) { 358 lp_debug_fs_variant(variant); 359 debug_printf(" ----> no linear path for this variant\n"); 360 } 361} 362 363 364#else 365void 366lp_linear_check_variant(struct lp_fragment_shader_variant *variant) 367{ 368} 369#endif 370