1/**********************************************************
2 * Copyright 2009-2011 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 *********************************************************
25 * Authors:
26 * Zack Rusin <zackr-at-vmware-dot-com>
27 */
28#include "xa_priv.h"
29
30#include "pipe/p_format.h"
31#include "pipe/p_context.h"
32#include "pipe/p_state.h"
33#include "pipe/p_shader_tokens.h"
34
35#include "util/u_memory.h"
36
37#include "tgsi/tgsi_ureg.h"
38
39#include "cso_cache/cso_context.h"
40#include "cso_cache/cso_hash.h"
41
42/* Vertex shader:
43 * IN[0]    = vertex pos
44 * IN[1]    = src tex coord | solid fill color
45 * IN[2]    = mask tex coord
46 * IN[3]    = dst tex coord
47 * CONST[0] = (2/dst_width, 2/dst_height, 1, 1)
48 * CONST[1] = (-1, -1, 0, 0)
49 *
50 * OUT[0]   = vertex pos
51 * OUT[1]   = src tex coord
52 * OUT[2]   = mask tex coord
53 * OUT[3]   = dst tex coord
54 */
55
56/* Fragment shader. Samplers are allocated when needed.
57 * SAMP[0]  = sampler for first texture (src or mask if src is solid)
58 * SAMP[1]  = sampler for second texture (mask or none)
59 * IN[0]    = first texture coordinates if present
60 * IN[1]    = second texture coordinates if present
61 * CONST[0] = Solid color (src if src solid or mask if mask solid
62 *            or src in mask if both solid).
63 *
64 * OUT[0] = color
65 */
66
67static void
68print_fs_traits(int fs_traits)
69{
70    const char *strings[] = {
71	"FS_COMPOSITE",		/* = 1 << 0, */
72	"FS_MASK",		/* = 1 << 1, */
73	"FS_SRC_SRC",	        /* = 1 << 2, */
74	"FS_MASK_SRC",	        /* = 1 << 3, */
75	"FS_YUV",	        /* = 1 << 4, */
76	"FS_SRC_REPEAT_NONE",	/* = 1 << 5, */
77	"FS_MASK_REPEAT_NONE",	/* = 1 << 6, */
78	"FS_SRC_SWIZZLE_RGB",	/* = 1 << 7, */
79	"FS_MASK_SWIZZLE_RGB",	/* = 1 << 8, */
80	"FS_SRC_SET_ALPHA",	/* = 1 << 9, */
81	"FS_MASK_SET_ALPHA",	/* = 1 << 10, */
82	"FS_SRC_LUMINANCE",	/* = 1 << 11, */
83	"FS_MASK_LUMINANCE",	/* = 1 << 12, */
84	"FS_DST_LUMINANCE",     /* = 1 << 13, */
85        "FS_CA",                /* = 1 << 14, */
86    };
87    int i, k;
88
89    debug_printf("%s: ", __func__);
90
91    for (i = 0, k = 1; k < (1 << 16); i++, k <<= 1) {
92	if (fs_traits & k)
93	    debug_printf("%s, ", strings[i]);
94    }
95
96    debug_printf("\n");
97}
98
99struct xa_shaders {
100    struct xa_context *r;
101
102    struct cso_hash *vs_hash;
103    struct cso_hash *fs_hash;
104};
105
106static inline void
107src_in_mask(struct ureg_program *ureg,
108	    struct ureg_dst dst,
109	    struct ureg_src src,
110	    struct ureg_src mask,
111	    unsigned mask_luminance, boolean component_alpha)
112{
113    if (mask_luminance)
114        if (component_alpha) {
115            ureg_MOV(ureg, dst, src);
116            ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
117                     src, ureg_scalar(mask, TGSI_SWIZZLE_X));
118        } else {
119            ureg_MUL(ureg, dst, src, ureg_scalar(mask, TGSI_SWIZZLE_X));
120        }
121    else if (!component_alpha)
122        ureg_MUL(ureg, dst, src, ureg_scalar(mask, TGSI_SWIZZLE_W));
123    else
124        ureg_MUL(ureg, dst, src, mask);
125}
126
127static struct ureg_src
128vs_normalize_coords(struct ureg_program *ureg,
129		    struct ureg_src coords,
130		    struct ureg_src const0, struct ureg_src const1)
131{
132    struct ureg_dst tmp = ureg_DECL_temporary(ureg);
133    struct ureg_src ret;
134
135    ureg_MAD(ureg, tmp, coords, const0, const1);
136    ret = ureg_src(tmp);
137    ureg_release_temporary(ureg, tmp);
138    return ret;
139}
140
141static void *
142create_vs(struct pipe_context *pipe, unsigned vs_traits)
143{
144    struct ureg_program *ureg;
145    struct ureg_src src;
146    struct ureg_dst dst;
147    struct ureg_src const0, const1;
148    boolean is_composite = (vs_traits & VS_COMPOSITE) != 0;
149    boolean has_mask = (vs_traits & VS_MASK) != 0;
150    boolean is_yuv = (vs_traits & VS_YUV) != 0;
151    boolean is_src_src = (vs_traits & VS_SRC_SRC) != 0;
152    boolean is_mask_src = (vs_traits & VS_MASK_SRC) != 0;
153    unsigned input_slot = 0;
154
155    ureg = ureg_create(PIPE_SHADER_VERTEX);
156    if (ureg == NULL)
157	return 0;
158
159    const0 = ureg_DECL_constant(ureg, 0);
160    const1 = ureg_DECL_constant(ureg, 1);
161
162    /* it has to be either a fill or a composite op */
163    src = ureg_DECL_vs_input(ureg, input_slot++);
164    dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
165    src = vs_normalize_coords(ureg, src, const0, const1);
166    ureg_MOV(ureg, dst, src);
167
168    if (is_yuv) {
169	src = ureg_DECL_vs_input(ureg, input_slot++);
170	dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
171	ureg_MOV(ureg, dst, src);
172    }
173
174    if (is_composite) {
175        if (!is_src_src || (has_mask && !is_mask_src)) {
176            src = ureg_DECL_vs_input(ureg, input_slot++);
177            dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
178            ureg_MOV(ureg, dst, src);
179        }
180
181        if (!is_src_src && (has_mask && !is_mask_src)) {
182            src = ureg_DECL_vs_input(ureg, input_slot++);
183            dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1);
184            ureg_MOV(ureg, dst, src);
185        }
186    }
187
188    ureg_END(ureg);
189
190    return ureg_create_shader_and_destroy(ureg, pipe);
191}
192
193static void *
194create_yuv_shader(struct pipe_context *pipe, struct ureg_program *ureg)
195{
196    struct ureg_src y_sampler, u_sampler, v_sampler;
197    struct ureg_src pos;
198    struct ureg_src matrow0, matrow1, matrow2, matrow3;
199    struct ureg_dst y, u, v, rgb;
200    struct ureg_dst out = ureg_DECL_output(ureg,
201					   TGSI_SEMANTIC_COLOR,
202					   0);
203
204    pos = ureg_DECL_fs_input(ureg,
205			     TGSI_SEMANTIC_GENERIC, 0,
206			     TGSI_INTERPOLATE_PERSPECTIVE);
207
208    rgb = ureg_DECL_temporary(ureg);
209    y = ureg_DECL_temporary(ureg);
210    u = ureg_DECL_temporary(ureg);
211    v = ureg_DECL_temporary(ureg);
212
213    y_sampler = ureg_DECL_sampler(ureg, 0);
214    u_sampler = ureg_DECL_sampler(ureg, 1);
215    v_sampler = ureg_DECL_sampler(ureg, 2);
216
217    ureg_DECL_sampler_view(ureg, 0, TGSI_TEXTURE_2D,
218                           TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT,
219                           TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT);
220    ureg_DECL_sampler_view(ureg, 1, TGSI_TEXTURE_2D,
221                           TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT,
222                           TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT);
223    ureg_DECL_sampler_view(ureg, 2, TGSI_TEXTURE_2D,
224                           TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT,
225                           TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT);
226
227    matrow0 = ureg_DECL_constant(ureg, 0);
228    matrow1 = ureg_DECL_constant(ureg, 1);
229    matrow2 = ureg_DECL_constant(ureg, 2);
230    matrow3 = ureg_DECL_constant(ureg, 3);
231
232    ureg_TEX(ureg, y, TGSI_TEXTURE_2D, pos, y_sampler);
233    ureg_TEX(ureg, u, TGSI_TEXTURE_2D, pos, u_sampler);
234    ureg_TEX(ureg, v, TGSI_TEXTURE_2D, pos, v_sampler);
235
236    ureg_MOV(ureg, rgb, matrow3);
237    ureg_MAD(ureg, rgb,
238	     ureg_scalar(ureg_src(y), TGSI_SWIZZLE_X), matrow0, ureg_src(rgb));
239    ureg_MAD(ureg, rgb,
240	     ureg_scalar(ureg_src(u), TGSI_SWIZZLE_X), matrow1, ureg_src(rgb));
241    ureg_MAD(ureg, rgb,
242	     ureg_scalar(ureg_src(v), TGSI_SWIZZLE_X), matrow2, ureg_src(rgb));
243
244    ureg_MOV(ureg, out, ureg_src(rgb));
245
246    ureg_release_temporary(ureg, rgb);
247    ureg_release_temporary(ureg, y);
248    ureg_release_temporary(ureg, u);
249    ureg_release_temporary(ureg, v);
250
251    ureg_END(ureg);
252
253    return ureg_create_shader_and_destroy(ureg, pipe);
254}
255
256static inline void
257xrender_tex(struct ureg_program *ureg,
258	    struct ureg_dst dst,
259	    struct ureg_src coords,
260	    struct ureg_src sampler,
261	    const struct ureg_src *imm0,
262	    boolean repeat_none, boolean swizzle, boolean set_alpha)
263{
264    if (repeat_none) {
265	struct ureg_dst tmp0 = ureg_DECL_temporary(ureg);
266	struct ureg_dst tmp1 = ureg_DECL_temporary(ureg);
267
268	ureg_SGT(ureg, tmp1, ureg_swizzle(coords,
269					  TGSI_SWIZZLE_X,
270					  TGSI_SWIZZLE_Y,
271					  TGSI_SWIZZLE_X,
272					  TGSI_SWIZZLE_Y), ureg_scalar(*imm0,
273								       TGSI_SWIZZLE_X));
274	ureg_SLT(ureg, tmp0,
275		 ureg_swizzle(coords, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
276			      TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y), ureg_scalar(*imm0,
277									   TGSI_SWIZZLE_W));
278	ureg_MIN(ureg, tmp0, ureg_src(tmp0), ureg_src(tmp1));
279	ureg_MIN(ureg, tmp0, ureg_scalar(ureg_src(tmp0), TGSI_SWIZZLE_X),
280		 ureg_scalar(ureg_src(tmp0), TGSI_SWIZZLE_Y));
281	ureg_TEX(ureg, tmp1, TGSI_TEXTURE_2D, coords, sampler);
282	if (swizzle)
283	    ureg_MOV(ureg, tmp1, ureg_swizzle(ureg_src(tmp1),
284					      TGSI_SWIZZLE_Z,
285					      TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X,
286					      TGSI_SWIZZLE_W));
287	if (set_alpha)
288	    ureg_MOV(ureg,
289		     ureg_writemask(tmp1, TGSI_WRITEMASK_W),
290		     ureg_scalar(*imm0, TGSI_SWIZZLE_W));
291	ureg_MUL(ureg, dst, ureg_src(tmp1), ureg_src(tmp0));
292	ureg_release_temporary(ureg, tmp0);
293	ureg_release_temporary(ureg, tmp1);
294    } else {
295	if (swizzle) {
296	    struct ureg_dst tmp = ureg_DECL_temporary(ureg);
297
298	    ureg_TEX(ureg, tmp, TGSI_TEXTURE_2D, coords, sampler);
299	    ureg_MOV(ureg, dst, ureg_swizzle(ureg_src(tmp),
300					     TGSI_SWIZZLE_Z,
301					     TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X,
302					     TGSI_SWIZZLE_W));
303	    ureg_release_temporary(ureg, tmp);
304	} else {
305	    ureg_TEX(ureg, dst, TGSI_TEXTURE_2D, coords, sampler);
306	}
307	if (set_alpha)
308	    ureg_MOV(ureg,
309		     ureg_writemask(dst, TGSI_WRITEMASK_W),
310		     ureg_scalar(*imm0, TGSI_SWIZZLE_W));
311    }
312}
313
314static void
315read_input(struct ureg_program *ureg,
316           struct ureg_dst dst,
317           const struct ureg_src *imm0,
318           boolean repeat_none, boolean swizzle, boolean set_alpha,
319           boolean is_src, unsigned *cur_constant, unsigned *cur_sampler)
320{
321    struct ureg_src input, sampler;
322
323    if (is_src) {
324        input = ureg_DECL_constant(ureg, (*cur_constant)++);
325        ureg_MOV(ureg, dst, input);
326    } else {
327        sampler = ureg_DECL_sampler(ureg, *cur_sampler);
328        ureg_DECL_sampler_view(ureg, *cur_sampler, TGSI_TEXTURE_2D,
329                               TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT,
330                               TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT);
331        input = ureg_DECL_fs_input(ureg,
332                                   TGSI_SEMANTIC_GENERIC, (*cur_sampler)++,
333                                   TGSI_INTERPOLATE_PERSPECTIVE);
334        xrender_tex(ureg, dst, input, sampler, imm0,
335                    repeat_none, swizzle, set_alpha);
336    }
337}
338
339static void *
340create_fs(struct pipe_context *pipe, unsigned fs_traits)
341{
342    struct ureg_program *ureg;
343    struct ureg_dst src, mask;
344    struct ureg_dst out;
345    struct ureg_src imm0 = { 0 };
346    unsigned has_mask = (fs_traits & FS_MASK) != 0;
347    unsigned is_yuv = (fs_traits & FS_YUV) != 0;
348    unsigned src_repeat_none = (fs_traits & FS_SRC_REPEAT_NONE) != 0;
349    unsigned mask_repeat_none = (fs_traits & FS_MASK_REPEAT_NONE) != 0;
350    unsigned src_swizzle = (fs_traits & FS_SRC_SWIZZLE_RGB) != 0;
351    unsigned mask_swizzle = (fs_traits & FS_MASK_SWIZZLE_RGB) != 0;
352    unsigned src_set_alpha = (fs_traits & FS_SRC_SET_ALPHA) != 0;
353    unsigned mask_set_alpha = (fs_traits & FS_MASK_SET_ALPHA) != 0;
354    unsigned src_luminance = (fs_traits & FS_SRC_LUMINANCE) != 0;
355    unsigned mask_luminance = (fs_traits & FS_MASK_LUMINANCE) != 0;
356    unsigned dst_luminance = (fs_traits & FS_DST_LUMINANCE) != 0;
357    unsigned is_src_src = (fs_traits & FS_SRC_SRC) != 0;
358    unsigned is_mask_src = (fs_traits & FS_MASK_SRC) != 0;
359    boolean component_alpha = (fs_traits & FS_CA) != 0;
360    unsigned cur_sampler = 0;
361    unsigned cur_constant = 0;
362
363#if 0
364    print_fs_traits(fs_traits);
365#else
366    (void)print_fs_traits;
367#endif
368
369    ureg = ureg_create(PIPE_SHADER_FRAGMENT);
370    if (ureg == NULL)
371	return 0;
372
373    if (is_yuv)
374       return create_yuv_shader(pipe, ureg);
375
376    out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
377
378    if (src_repeat_none || mask_repeat_none ||
379	src_set_alpha || mask_set_alpha || src_luminance) {
380	imm0 = ureg_imm4f(ureg, 0, 0, 0, 1);
381    }
382
383    src = (has_mask || src_luminance || dst_luminance) ?
384        ureg_DECL_temporary(ureg) : out;
385
386    read_input(ureg, src, &imm0, src_repeat_none, src_swizzle,
387               src_set_alpha, is_src_src, &cur_constant, &cur_sampler);
388
389    if (src_luminance) {
390	ureg_MOV(ureg, src, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_X));
391	ureg_MOV(ureg, ureg_writemask(src, TGSI_WRITEMASK_XYZ),
392		 ureg_scalar(imm0, TGSI_SWIZZLE_X));
393	if (!has_mask && !dst_luminance)
394	    ureg_MOV(ureg, out, ureg_src(src));
395    }
396
397    if (has_mask) {
398	mask = ureg_DECL_temporary(ureg);
399        read_input(ureg, mask, &imm0, mask_repeat_none,
400                   mask_swizzle, mask_set_alpha, is_mask_src, &cur_constant,
401                   &cur_sampler);
402
403	src_in_mask(ureg, (dst_luminance) ? src : out, ureg_src(src),
404		    ureg_src(mask), mask_luminance, component_alpha);
405
406	ureg_release_temporary(ureg, mask);
407    }
408
409    if (dst_luminance) {
410	/*
411	 * Make sure the alpha channel goes into the output L8 surface.
412	 */
413	ureg_MOV(ureg, out, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_W));
414    }
415
416    ureg_END(ureg);
417
418    return ureg_create_shader_and_destroy(ureg, pipe);
419}
420
421struct xa_shaders *
422xa_shaders_create(struct xa_context *r)
423{
424    struct xa_shaders *sc = CALLOC_STRUCT(xa_shaders);
425
426    sc->r = r;
427    sc->vs_hash = cso_hash_create();
428    sc->fs_hash = cso_hash_create();
429
430    return sc;
431}
432
433static void
434cache_destroy(struct cso_context *cso,
435	      struct cso_hash *hash, unsigned processor)
436{
437    struct cso_hash_iter iter = cso_hash_first_node(hash);
438
439    while (!cso_hash_iter_is_null(iter)) {
440	void *shader = (void *)cso_hash_iter_data(iter);
441
442	if (processor == PIPE_SHADER_FRAGMENT) {
443	    cso_delete_fragment_shader(cso, shader);
444	} else if (processor == PIPE_SHADER_VERTEX) {
445	    cso_delete_vertex_shader(cso, shader);
446	}
447	iter = cso_hash_erase(hash, iter);
448    }
449    cso_hash_delete(hash);
450}
451
452void
453xa_shaders_destroy(struct xa_shaders *sc)
454{
455    cache_destroy(sc->r->cso, sc->vs_hash, PIPE_SHADER_VERTEX);
456    cache_destroy(sc->r->cso, sc->fs_hash, PIPE_SHADER_FRAGMENT);
457
458    FREE(sc);
459}
460
461static inline void *
462shader_from_cache(struct pipe_context *pipe,
463		  unsigned type, struct cso_hash *hash, unsigned key)
464{
465    void *shader = 0;
466
467    struct cso_hash_iter iter = cso_hash_find(hash, key);
468
469    if (cso_hash_iter_is_null(iter)) {
470	if (type == PIPE_SHADER_VERTEX)
471	    shader = create_vs(pipe, key);
472	else
473	    shader = create_fs(pipe, key);
474	cso_hash_insert(hash, key, shader);
475    } else
476	shader = (void *)cso_hash_iter_data(iter);
477
478    return shader;
479}
480
481struct xa_shader
482xa_shaders_get(struct xa_shaders *sc, unsigned vs_traits, unsigned fs_traits)
483{
484    struct xa_shader shader = { NULL, NULL };
485    void *vs, *fs;
486
487    vs = shader_from_cache(sc->r->pipe, PIPE_SHADER_VERTEX,
488			   sc->vs_hash, vs_traits);
489    fs = shader_from_cache(sc->r->pipe, PIPE_SHADER_FRAGMENT,
490			   sc->fs_hash, fs_traits);
491
492    debug_assert(vs && fs);
493    if (!vs || !fs)
494	return shader;
495
496    shader.vs = vs;
497    shader.fs = fs;
498
499    return shader;
500}
501