1/************************************************************************** 2 * 3 * Copyright 2010-2021 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 * 26 **************************************************************************/ 27 28 29#include "pipe/p_config.h" 30 31#include "util/u_math.h" 32#include "util/u_cpu_detect.h" 33#include "util/u_sse.h" 34 35#include "lp_jit.h" 36#include "lp_state_fs.h" 37#include "lp_debug.h" 38 39 40#if defined(PIPE_ARCH_SSE) 41 42#include <emmintrin.h> 43 44 45static void 46no_op(const struct lp_jit_context *context, 47 uint32_t x, 48 uint32_t y, 49 uint32_t facing, 50 const void *a0, 51 const void *dadx, 52 const void *dady, 53 uint8_t **cbufs, 54 uint8_t *depth, 55 uint64_t mask, 56 struct lp_jit_thread_data *thread_data, 57 unsigned *strides, 58 unsigned depth_stride, 59 unsigned *color_sample_stride, 60 unsigned depth_sample_stride) 61{ 62} 63 64 65/* 66 * m ? a : b 67 */ 68static inline __m128i 69mm_select_si128(__m128i m, __m128i a, __m128i b) 70{ 71 __m128i res; 72 73 /* 74 * TODO: use PBLENVB when available. 75 */ 76 77 res = _mm_or_si128(_mm_and_si128(m, a), 78 _mm_andnot_si128(m, b)); 79 80 return res; 81} 82 83 84/* 85 * *p = m ? a : *p; 86 */ 87static inline void 88mm_store_mask_si128(__m128i *p, __m128i m, __m128i a) 89{ 90 _mm_store_si128(p, mm_select_si128(m, a, _mm_load_si128(p))); 91} 92 93 94/** 95 * Expand the mask from a 16 bit integer to a 4 x 4 x 32 bit vector mask, ie. 96 * 1 bit -> 32bits. 97 */ 98static inline void 99expand_mask(uint32_t int_mask, 100 __m128i *vec_mask) 101{ 102 __m128i inv_mask = _mm_set1_epi32(~int_mask & 0xffff); 103 __m128i zero = _mm_setzero_si128(); 104 105 vec_mask[0] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0001, 0x0002, 0x0004, 0x0008)); 106 vec_mask[1] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0010, 0x0020, 0x0040, 0x0080)); 107 inv_mask = _mm_srli_epi32(inv_mask, 8); 108 vec_mask[2] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0001, 0x0002, 0x0004, 0x0008)); 109 vec_mask[3] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0010, 0x0020, 0x0040, 0x0080)); 110 111 vec_mask[0] = _mm_cmpeq_epi32(vec_mask[0], zero); 112 vec_mask[1] = _mm_cmpeq_epi32(vec_mask[1], zero); 113 vec_mask[2] = _mm_cmpeq_epi32(vec_mask[2], zero); 114 vec_mask[3] = _mm_cmpeq_epi32(vec_mask[3], zero); 115} 116 117 118/** 119 * Draw opaque color (for debugging). 120 */ 121static void 122opaque_color(uint8_t **cbufs, unsigned *strides, 123 uint32_t int_mask, 124 uint32_t color) 125{ 126 __m128i *cbuf = (__m128i *)cbufs[0]; 127 unsigned stride = strides[0] / sizeof *cbuf; 128 __m128i vec_mask[4]; 129 __m128i vec_color = _mm_set1_epi32(color); 130 131 expand_mask(int_mask, vec_mask); 132 133 mm_store_mask_si128(cbuf, vec_mask[0], vec_color); cbuf += stride; 134 mm_store_mask_si128(cbuf, vec_mask[1], vec_color); cbuf += stride; 135 mm_store_mask_si128(cbuf, vec_mask[2], vec_color); cbuf += stride; 136 mm_store_mask_si128(cbuf, vec_mask[3], vec_color); 137} 138 139 140/** 141 * Draw opaque red (for debugging). 142 */ 143static void 144red(const struct lp_jit_context *context, 145 uint32_t x, 146 uint32_t y, 147 uint32_t facing, 148 const void *a0, 149 const void *dadx, 150 const void *dady, 151 uint8_t **cbufs, 152 uint8_t *depth, 153 uint64_t int_mask, 154 struct lp_jit_thread_data *thread_data, 155 unsigned *strides, 156 unsigned depth_stride, 157 unsigned *sample_stride, 158 unsigned depth_sample_stride) 159{ 160 opaque_color(cbufs, strides, int_mask, 0xffff0000); 161 (void)facing; 162 (void)depth; 163 (void)thread_data; 164} 165 166 167/** 168 * Draw opaque green (for debugging). 169 */ 170static void 171green(const struct lp_jit_context *context, 172 uint32_t x, 173 uint32_t y, 174 uint32_t facing, 175 const void *a0, 176 const void *dadx, 177 const void *dady, 178 uint8_t **cbufs, 179 uint8_t *depth, 180 uint64_t int_mask, 181 struct lp_jit_thread_data *thread_data, 182 unsigned *strides, 183 unsigned depth_stride, 184 unsigned *sample_stride, 185 unsigned depth_sample_stride) 186{ 187 opaque_color(cbufs, strides, int_mask, 0xff00ff00); 188 (void)facing; 189 (void)depth; 190 (void)thread_data; 191} 192 193 194void 195llvmpipe_fs_variant_fastpath(struct lp_fragment_shader_variant *variant) 196{ 197 variant->jit_function[RAST_WHOLE] = NULL; 198 variant->jit_function[RAST_EDGE_TEST] = NULL; 199 200 if (LP_DEBUG & DEBUG_NO_FASTPATH) 201 return; 202 203 if (variant->key.cbuf_format[0] != PIPE_FORMAT_B8G8R8A8_UNORM && 204 variant->key.cbuf_format[0] != PIPE_FORMAT_B8G8R8X8_UNORM) { 205 return; 206 } 207 208 if (0) { 209 variant->jit_function[RAST_WHOLE] = red; 210 variant->jit_function[RAST_EDGE_TEST] = red; 211 } 212 213 if (0) { 214 variant->jit_function[RAST_WHOLE] = green; 215 variant->jit_function[RAST_EDGE_TEST] = green; 216 } 217 218 if (0) { 219 variant->jit_function[RAST_WHOLE] = no_op; 220 variant->jit_function[RAST_EDGE_TEST] = no_op; 221 } 222 223 /* Make it easier to see triangles: 224 */ 225 if ((LP_DEBUG & DEBUG_LINEAR) || (LP_PERF & PERF_NO_SHADE)) { 226 variant->jit_function[RAST_EDGE_TEST] = red; 227 variant->jit_function[RAST_WHOLE] = green; 228 } 229} 230 231#else 232 233void 234llvmpipe_fs_variant_fastpath(struct lp_fragment_shader_variant *variant) 235{ 236} 237 238#endif 239