1/**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29#include "pipe/p_config.h"
30
31#include "util/u_math.h"
32#include "util/u_cpu_detect.h"
33#include "util/u_sse.h"
34
35#include "lp_jit.h"
36#include "lp_state_fs.h"
37#include "lp_debug.h"
38
39
40#if defined(PIPE_ARCH_SSE)
41
42#include <emmintrin.h>
43
44
45static void
46no_op(const struct lp_jit_context *context,
47      uint32_t x,
48      uint32_t y,
49      uint32_t facing,
50      const void *a0,
51      const void *dadx,
52      const void *dady,
53      uint8_t **cbufs,
54      uint8_t *depth,
55      uint64_t mask,
56      struct lp_jit_thread_data *thread_data,
57      unsigned *strides,
58      unsigned depth_stride,
59      unsigned *color_sample_stride,
60      unsigned depth_sample_stride)
61{
62}
63
64
65/*
66 * m ? a : b
67 */
68static inline __m128i
69mm_select_si128(__m128i m, __m128i a, __m128i b)
70{
71   __m128i res;
72
73   /*
74    * TODO: use PBLENVB when available.
75    */
76
77   res = _mm_or_si128(_mm_and_si128(m, a),
78                      _mm_andnot_si128(m, b));
79
80   return res;
81}
82
83
84/*
85 * *p = m ? a : *p;
86 */
87static inline void
88mm_store_mask_si128(__m128i *p, __m128i m, __m128i a)
89{
90   _mm_store_si128(p, mm_select_si128(m, a, _mm_load_si128(p)));
91}
92
93
94/**
95 * Expand the mask from a 16 bit integer to a 4 x 4 x 32 bit vector mask, ie.
96 * 1 bit -> 32bits.
97 */
98static inline void
99expand_mask(uint32_t int_mask,
100            __m128i *vec_mask)
101{
102   __m128i inv_mask = _mm_set1_epi32(~int_mask & 0xffff);
103   __m128i zero = _mm_setzero_si128();
104
105   vec_mask[0] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0001, 0x0002, 0x0004, 0x0008));
106   vec_mask[1] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0010, 0x0020, 0x0040, 0x0080));
107   inv_mask = _mm_srli_epi32(inv_mask, 8);
108   vec_mask[2] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0001, 0x0002, 0x0004, 0x0008));
109   vec_mask[3] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0010, 0x0020, 0x0040, 0x0080));
110
111   vec_mask[0] = _mm_cmpeq_epi32(vec_mask[0], zero);
112   vec_mask[1] = _mm_cmpeq_epi32(vec_mask[1], zero);
113   vec_mask[2] = _mm_cmpeq_epi32(vec_mask[2], zero);
114   vec_mask[3] = _mm_cmpeq_epi32(vec_mask[3], zero);
115}
116
117
118/**
119 * Draw opaque color (for debugging).
120 */
121static void
122opaque_color(uint8_t **cbufs, unsigned *strides,
123             uint32_t int_mask,
124             uint32_t color)
125{
126   __m128i *cbuf = (__m128i *)cbufs[0];
127   unsigned stride = strides[0] / sizeof *cbuf;
128   __m128i vec_mask[4];
129   __m128i vec_color = _mm_set1_epi32(color);
130
131   expand_mask(int_mask, vec_mask);
132
133   mm_store_mask_si128(cbuf, vec_mask[0], vec_color); cbuf += stride;
134   mm_store_mask_si128(cbuf, vec_mask[1], vec_color); cbuf += stride;
135   mm_store_mask_si128(cbuf, vec_mask[2], vec_color); cbuf += stride;
136   mm_store_mask_si128(cbuf, vec_mask[3], vec_color);
137}
138
139
140/**
141 * Draw opaque red (for debugging).
142 */
143static void
144red(const struct lp_jit_context *context,
145    uint32_t x,
146    uint32_t y,
147    uint32_t facing,
148    const void *a0,
149    const void *dadx,
150    const void *dady,
151    uint8_t **cbufs,
152    uint8_t *depth,
153    uint64_t int_mask,
154    struct lp_jit_thread_data *thread_data,
155    unsigned *strides,
156    unsigned depth_stride,
157    unsigned *sample_stride,
158    unsigned depth_sample_stride)
159{
160   opaque_color(cbufs, strides, int_mask, 0xffff0000);
161   (void)facing;
162   (void)depth;
163   (void)thread_data;
164}
165
166
167/**
168 * Draw opaque green (for debugging).
169 */
170static void
171green(const struct lp_jit_context *context,
172      uint32_t x,
173      uint32_t y,
174      uint32_t facing,
175      const void *a0,
176      const void *dadx,
177      const void *dady,
178      uint8_t **cbufs,
179      uint8_t *depth,
180      uint64_t int_mask,
181      struct lp_jit_thread_data *thread_data,
182      unsigned *strides,
183      unsigned depth_stride,
184      unsigned *sample_stride,
185      unsigned depth_sample_stride)
186{
187   opaque_color(cbufs, strides, int_mask, 0xff00ff00);
188   (void)facing;
189   (void)depth;
190   (void)thread_data;
191}
192
193
194void
195llvmpipe_fs_variant_fastpath(struct lp_fragment_shader_variant *variant)
196{
197   variant->jit_function[RAST_WHOLE]     = NULL;
198   variant->jit_function[RAST_EDGE_TEST] = NULL;
199
200   if (LP_DEBUG & DEBUG_NO_FASTPATH)
201      return;
202
203   if (variant->key.cbuf_format[0] != PIPE_FORMAT_B8G8R8A8_UNORM &&
204       variant->key.cbuf_format[0] != PIPE_FORMAT_B8G8R8X8_UNORM) {
205      return;
206   }
207
208   if (0) {
209      variant->jit_function[RAST_WHOLE]     = red;
210      variant->jit_function[RAST_EDGE_TEST] = red;
211   }
212
213   if (0) {
214      variant->jit_function[RAST_WHOLE]     = green;
215      variant->jit_function[RAST_EDGE_TEST] = green;
216   }
217
218   if (0) {
219      variant->jit_function[RAST_WHOLE]     = no_op;
220      variant->jit_function[RAST_EDGE_TEST] = no_op;
221   }
222
223   /* Make it easier to see triangles:
224    */
225   if ((LP_DEBUG & DEBUG_LINEAR) || (LP_PERF & PERF_NO_SHADE)) {
226      variant->jit_function[RAST_EDGE_TEST] = red;
227      variant->jit_function[RAST_WHOLE] = green;
228   }
229}
230
231#else
232
233void
234llvmpipe_fs_variant_fastpath(struct lp_fragment_shader_variant *variant)
235{
236}
237
238#endif
239