1428d7b3dSmrg/*
2428d7b3dSmrg * Copyright © 2006 Intel Corporation
3428d7b3dSmrg *
4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"),
6428d7b3dSmrg * to deal in the Software without restriction, including without limitation
7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the
9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions:
10428d7b3dSmrg *
11428d7b3dSmrg * The above copyright notice and this permission notice (including the next
12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the
13428d7b3dSmrg * Software.
14428d7b3dSmrg *
15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21428d7b3dSmrg * SOFTWARE.
22428d7b3dSmrg *
23428d7b3dSmrg * Authors:
24428d7b3dSmrg *    Wang Zhenyu <zhenyu.z.wang@intel.com>
25428d7b3dSmrg *    Eric Anholt <eric@anholt.net>
26428d7b3dSmrg *
27428d7b3dSmrg */
28428d7b3dSmrg
29428d7b3dSmrg#ifdef HAVE_CONFIG_H
30428d7b3dSmrg#include "config.h"
31428d7b3dSmrg#endif
32428d7b3dSmrg
33428d7b3dSmrg#include "xorg-server.h"
34428d7b3dSmrg#include "xf86.h"
35428d7b3dSmrg#include "intel.h"
36428d7b3dSmrg#include "intel_uxa.h"
37428d7b3dSmrg#include "i915_reg.h"
38428d7b3dSmrg#include "i915_3d.h"
39428d7b3dSmrg
40428d7b3dSmrgstruct formatinfo {
41428d7b3dSmrg	int fmt;
42428d7b3dSmrg	uint32_t card_fmt;
43428d7b3dSmrg};
44428d7b3dSmrg
45428d7b3dSmrgstruct blendinfo {
46428d7b3dSmrg	Bool dst_alpha;
47428d7b3dSmrg	Bool src_alpha;
48428d7b3dSmrg	uint32_t src_blend;
49428d7b3dSmrg	uint32_t dst_blend;
50428d7b3dSmrg};
51428d7b3dSmrg
52428d7b3dSmrgstatic struct blendinfo i915_blend_op[] = {
53428d7b3dSmrg	/* Clear */
54428d7b3dSmrg	{0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
55428d7b3dSmrg	/* Src */
56428d7b3dSmrg	{0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
57428d7b3dSmrg	/* Dst */
58428d7b3dSmrg	{0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
59428d7b3dSmrg	/* Over */
60428d7b3dSmrg	{0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
61428d7b3dSmrg	/* OverReverse */
62428d7b3dSmrg	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
63428d7b3dSmrg	/* In */
64428d7b3dSmrg	{1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
65428d7b3dSmrg	/* InReverse */
66428d7b3dSmrg	{0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
67428d7b3dSmrg	/* Out */
68428d7b3dSmrg	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
69428d7b3dSmrg	/* OutReverse */
70428d7b3dSmrg	{0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
71428d7b3dSmrg	/* Atop */
72428d7b3dSmrg	{1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
73428d7b3dSmrg	/* AtopReverse */
74428d7b3dSmrg	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
75428d7b3dSmrg	/* Xor */
76428d7b3dSmrg	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
77428d7b3dSmrg	/* Add */
78428d7b3dSmrg	{0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
79428d7b3dSmrg};
80428d7b3dSmrg
81428d7b3dSmrgstatic struct formatinfo i915_tex_formats[] = {
82428d7b3dSmrg	{PICT_a8, MAPSURF_8BIT | MT_8BIT_A8},
83428d7b3dSmrg	{PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888},
84428d7b3dSmrg	{PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888},
85428d7b3dSmrg	{PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888},
86428d7b3dSmrg	{PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
87428d7b3dSmrg#if XORG_VERSION_CURRENT >= 10699900
88428d7b3dSmrg	{PICT_a2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010},
89428d7b3dSmrg	{PICT_a2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010},
90428d7b3dSmrg#endif
91428d7b3dSmrg	{PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565},
92428d7b3dSmrg	{PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555},
93428d7b3dSmrg	{PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444},
94428d7b3dSmrg};
95428d7b3dSmrg
96428d7b3dSmrgstatic uint32_t i915_get_blend_cntl(int op, PicturePtr mask,
97428d7b3dSmrg				    uint32_t dst_format)
98428d7b3dSmrg{
99428d7b3dSmrg	uint32_t sblend, dblend;
100428d7b3dSmrg
101428d7b3dSmrg	sblend = i915_blend_op[op].src_blend;
102428d7b3dSmrg	dblend = i915_blend_op[op].dst_blend;
103428d7b3dSmrg
104428d7b3dSmrg	/* If there's no dst alpha channel, adjust the blend op so that we'll
105428d7b3dSmrg	 * treat it as always 1.
106428d7b3dSmrg	 */
107428d7b3dSmrg	if (PICT_FORMAT_A(dst_format) == 0 && i915_blend_op[op].dst_alpha) {
108428d7b3dSmrg		if (sblend == BLENDFACT_DST_ALPHA)
109428d7b3dSmrg			sblend = BLENDFACT_ONE;
110428d7b3dSmrg		else if (sblend == BLENDFACT_INV_DST_ALPHA)
111428d7b3dSmrg			sblend = BLENDFACT_ZERO;
112428d7b3dSmrg	}
113428d7b3dSmrg
114428d7b3dSmrg	/* i915 engine reads 8bit color buffer into green channel in cases
115428d7b3dSmrg	   like color buffer blending .etc, and also writes back green channel.
116428d7b3dSmrg	   So with dst_alpha blend we should use color factor. See spec on
117428d7b3dSmrg	   "8-bit rendering" */
118428d7b3dSmrg	if ((dst_format == PICT_a8) && i915_blend_op[op].dst_alpha) {
119428d7b3dSmrg		if (sblend == BLENDFACT_DST_ALPHA)
120428d7b3dSmrg			sblend = BLENDFACT_DST_COLR;
121428d7b3dSmrg		else if (sblend == BLENDFACT_INV_DST_ALPHA)
122428d7b3dSmrg			sblend = BLENDFACT_INV_DST_COLR;
123428d7b3dSmrg	}
124428d7b3dSmrg
125428d7b3dSmrg	/* If the source alpha is being used, then we should only be in a case
126428d7b3dSmrg	 * where the source blend factor is 0, and the source blend value is the
127428d7b3dSmrg	 * mask channels multiplied by the source picture's alpha.
128428d7b3dSmrg	 */
129428d7b3dSmrg	if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
130428d7b3dSmrg	    i915_blend_op[op].src_alpha) {
131428d7b3dSmrg		if (dblend == BLENDFACT_SRC_ALPHA) {
132428d7b3dSmrg			dblend = BLENDFACT_SRC_COLR;
133428d7b3dSmrg		} else if (dblend == BLENDFACT_INV_SRC_ALPHA) {
134428d7b3dSmrg			dblend = BLENDFACT_INV_SRC_COLR;
135428d7b3dSmrg		}
136428d7b3dSmrg	}
137428d7b3dSmrg
138428d7b3dSmrg	return S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
139428d7b3dSmrg		(BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
140428d7b3dSmrg		(sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
141428d7b3dSmrg		(dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
142428d7b3dSmrg}
143428d7b3dSmrg
144428d7b3dSmrg#define DSTORG_HORT_BIAS(x)             ((x)<<20)
145428d7b3dSmrg#define DSTORG_VERT_BIAS(x)             ((x)<<16)
146428d7b3dSmrg
147428d7b3dSmrgstatic Bool i915_get_dest_format(PicturePtr dest_picture, uint32_t * dst_format)
148428d7b3dSmrg{
149428d7b3dSmrg	ScrnInfoPtr scrn;
150428d7b3dSmrg
151428d7b3dSmrg	switch (dest_picture->format) {
152428d7b3dSmrg	case PICT_a8r8g8b8:
153428d7b3dSmrg	case PICT_x8r8g8b8:
154428d7b3dSmrg		*dst_format = COLR_BUF_ARGB8888;
155428d7b3dSmrg		break;
156428d7b3dSmrg	case PICT_r5g6b5:
157428d7b3dSmrg		*dst_format = COLR_BUF_RGB565;
158428d7b3dSmrg		break;
159428d7b3dSmrg	case PICT_a1r5g5b5:
160428d7b3dSmrg	case PICT_x1r5g5b5:
161428d7b3dSmrg		*dst_format = COLR_BUF_ARGB1555;
162428d7b3dSmrg		break;
163428d7b3dSmrg#if XORG_VERSION_CURRENT >= 10699900
164428d7b3dSmrg	case PICT_a2r10g10b10:
165428d7b3dSmrg	case PICT_x2r10g10b10:
166428d7b3dSmrg		*dst_format = COLR_BUF_ARGB2AAA;
167428d7b3dSmrg		break;
168428d7b3dSmrg#endif
169428d7b3dSmrg	case PICT_a8:
170428d7b3dSmrg		*dst_format = COLR_BUF_8BIT;
171428d7b3dSmrg		break;
172428d7b3dSmrg	case PICT_a4r4g4b4:
173428d7b3dSmrg	case PICT_x4r4g4b4:
174428d7b3dSmrg		*dst_format = COLR_BUF_ARGB4444;
175428d7b3dSmrg		break;
176428d7b3dSmrg	default:
177428d7b3dSmrg		scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
178428d7b3dSmrg		intel_uxa_debug_fallback(scrn,
179428d7b3dSmrg				     "Unsupported dest format 0x%x\n",
180428d7b3dSmrg				     (int)dest_picture->format);
181428d7b3dSmrg		return FALSE;
182428d7b3dSmrg	}
183428d7b3dSmrg	*dst_format |= DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8);
184428d7b3dSmrg	return TRUE;
185428d7b3dSmrg}
186428d7b3dSmrg
187428d7b3dSmrgBool
188428d7b3dSmrgi915_check_composite(int op,
189428d7b3dSmrg		     PicturePtr source_picture,
190428d7b3dSmrg		     PicturePtr mask_picture,
191428d7b3dSmrg		     PicturePtr dest_picture,
192428d7b3dSmrg		     int width, int height)
193428d7b3dSmrg{
194428d7b3dSmrg	ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
195428d7b3dSmrg	uint32_t tmp1;
196428d7b3dSmrg
197428d7b3dSmrg	/* Check for unsupported compositing operations. */
198428d7b3dSmrg	if (op >= sizeof(i915_blend_op) / sizeof(i915_blend_op[0])) {
199428d7b3dSmrg		intel_uxa_debug_fallback(scrn, "Unsupported Composite op 0x%x\n",
200428d7b3dSmrg				     op);
201428d7b3dSmrg		return FALSE;
202428d7b3dSmrg	}
203428d7b3dSmrg	if (mask_picture != NULL && mask_picture->componentAlpha &&
204428d7b3dSmrg	    PICT_FORMAT_RGB(mask_picture->format)) {
205428d7b3dSmrg		/* Check if it's component alpha that relies on a source alpha
206428d7b3dSmrg		 * and on the source value.  We can only get one of those
207428d7b3dSmrg		 * into the single source value that we get to blend with.
208428d7b3dSmrg		 */
209428d7b3dSmrg		if (i915_blend_op[op].src_alpha &&
210428d7b3dSmrg		    (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) {
211428d7b3dSmrg			if (op != PictOpOver) {
212428d7b3dSmrg				intel_uxa_debug_fallback(scrn,
213428d7b3dSmrg						     "Component alpha not supported "
214428d7b3dSmrg						     "with source alpha and source "
215428d7b3dSmrg						     "value blending.\n");
216428d7b3dSmrg				return FALSE;
217428d7b3dSmrg			}
218428d7b3dSmrg		}
219428d7b3dSmrg	}
220428d7b3dSmrg
221428d7b3dSmrg	if (!i915_get_dest_format(dest_picture, &tmp1)) {
222428d7b3dSmrg		intel_uxa_debug_fallback(scrn, "Get Color buffer format\n");
223428d7b3dSmrg		return FALSE;
224428d7b3dSmrg	}
225428d7b3dSmrg
226428d7b3dSmrg	if (width > 2048 || height > 2048)
227428d7b3dSmrg		return FALSE;
228428d7b3dSmrg
229428d7b3dSmrg	return TRUE;
230428d7b3dSmrg}
231428d7b3dSmrg
232428d7b3dSmrgBool
233428d7b3dSmrgi915_check_composite_target(PixmapPtr pixmap)
234428d7b3dSmrg{
235428d7b3dSmrg	if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048)
236428d7b3dSmrg		return FALSE;
237428d7b3dSmrg
238428d7b3dSmrg	if(!intel_uxa_check_pitch_3d(pixmap))
239428d7b3dSmrg		return FALSE;
240428d7b3dSmrg
241428d7b3dSmrg	return TRUE;
242428d7b3dSmrg}
243428d7b3dSmrg
244428d7b3dSmrgBool
245428d7b3dSmrgi915_check_composite_texture(ScreenPtr screen, PicturePtr picture)
246428d7b3dSmrg{
247428d7b3dSmrg	if (picture->repeatType > RepeatReflect) {
248428d7b3dSmrg		ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
249428d7b3dSmrg		intel_uxa_debug_fallback(scrn, "Unsupported picture repeat %d\n",
250428d7b3dSmrg			     picture->repeatType);
251428d7b3dSmrg		return FALSE;
252428d7b3dSmrg	}
253428d7b3dSmrg
254428d7b3dSmrg	if (picture->filter != PictFilterNearest &&
255428d7b3dSmrg	    picture->filter != PictFilterBilinear) {
256428d7b3dSmrg		ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
257428d7b3dSmrg		intel_uxa_debug_fallback(scrn, "Unsupported filter 0x%x\n",
258428d7b3dSmrg				     picture->filter);
259428d7b3dSmrg		return FALSE;
260428d7b3dSmrg	}
261428d7b3dSmrg
262428d7b3dSmrg	if (picture->pSourcePict)
263428d7b3dSmrg		return FALSE;
264428d7b3dSmrg
265428d7b3dSmrg	if (picture->pDrawable) {
266428d7b3dSmrg		int w, h, i;
267428d7b3dSmrg
268428d7b3dSmrg		w = picture->pDrawable->width;
269428d7b3dSmrg		h = picture->pDrawable->height;
270428d7b3dSmrg		if ((w > 2048) || (h > 2048)) {
271428d7b3dSmrg			ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
272428d7b3dSmrg			intel_uxa_debug_fallback(scrn,
273428d7b3dSmrg					     "Picture w/h too large (%dx%d)\n",
274428d7b3dSmrg					     w, h);
275428d7b3dSmrg			return FALSE;
276428d7b3dSmrg		}
277428d7b3dSmrg
278428d7b3dSmrg		for (i = 0;
279428d7b3dSmrg		     i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]);
280428d7b3dSmrg		     i++) {
281428d7b3dSmrg			if (i915_tex_formats[i].fmt == picture->format)
282428d7b3dSmrg				break;
283428d7b3dSmrg		}
284428d7b3dSmrg		if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]))
285428d7b3dSmrg		{
286428d7b3dSmrg			ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
287428d7b3dSmrg			intel_uxa_debug_fallback(scrn, "Unsupported picture format "
288428d7b3dSmrg					     "0x%x\n",
289428d7b3dSmrg					     (int)picture->format);
290428d7b3dSmrg			return FALSE;
291428d7b3dSmrg		}
292428d7b3dSmrg
293428d7b3dSmrg		return TRUE;
294428d7b3dSmrg	}
295428d7b3dSmrg
296428d7b3dSmrg	return FALSE;
297428d7b3dSmrg}
298428d7b3dSmrg
299428d7b3dSmrgstatic Bool i915_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit)
300428d7b3dSmrg{
301428d7b3dSmrg	ScrnInfoPtr scrn = xf86ScreenToScrn(picture->pDrawable->pScreen);
302428d7b3dSmrg	intel_screen_private *intel = intel_get_screen_private(scrn);
303428d7b3dSmrg	uint32_t format, pitch, filter;
304428d7b3dSmrg	uint32_t wrap_mode, tiling_bits;
305428d7b3dSmrg	int i;
306428d7b3dSmrg
307428d7b3dSmrg	pitch = intel_pixmap_pitch(pixmap);
308428d7b3dSmrg	intel->scale_units[unit][0] = 1. / pixmap->drawable.width;
309428d7b3dSmrg	intel->scale_units[unit][1] = 1. / pixmap->drawable.height;
310428d7b3dSmrg
311428d7b3dSmrg	for (i = 0; i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]);
312428d7b3dSmrg	     i++) {
313428d7b3dSmrg		if (i915_tex_formats[i].fmt == picture->format)
314428d7b3dSmrg			break;
315428d7b3dSmrg	}
316428d7b3dSmrg	if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0])) {
317428d7b3dSmrg		intel_uxa_debug_fallback(scrn, "unknown texture format\n");
318428d7b3dSmrg		return FALSE;
319428d7b3dSmrg	}
320428d7b3dSmrg	format = i915_tex_formats[i].card_fmt;
321428d7b3dSmrg
322428d7b3dSmrg	switch (picture->repeatType) {
323428d7b3dSmrg	case RepeatNone:
324428d7b3dSmrg		wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
325428d7b3dSmrg		break;
326428d7b3dSmrg	case RepeatNormal:
327428d7b3dSmrg		wrap_mode = TEXCOORDMODE_WRAP;
328428d7b3dSmrg		break;
329428d7b3dSmrg	case RepeatPad:
330428d7b3dSmrg		wrap_mode = TEXCOORDMODE_CLAMP_EDGE;
331428d7b3dSmrg		break;
332428d7b3dSmrg	case RepeatReflect:
333428d7b3dSmrg		wrap_mode = TEXCOORDMODE_MIRROR;
334428d7b3dSmrg		break;
335428d7b3dSmrg	default:
336428d7b3dSmrg		FatalError("Unknown repeat type %d\n", picture->repeatType);
337428d7b3dSmrg	}
338428d7b3dSmrg
339428d7b3dSmrg	switch (picture->filter) {
340428d7b3dSmrg	case PictFilterNearest:
341428d7b3dSmrg		filter = (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT) |
342428d7b3dSmrg		    (FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
343428d7b3dSmrg		break;
344428d7b3dSmrg	case PictFilterBilinear:
345428d7b3dSmrg		filter = (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
346428d7b3dSmrg		    (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT);
347428d7b3dSmrg		break;
348428d7b3dSmrg	default:
349428d7b3dSmrg		intel_uxa_debug_fallback(scrn, "Bad filter 0x%x\n",
350428d7b3dSmrg				     picture->filter);
351428d7b3dSmrg		return FALSE;
352428d7b3dSmrg	}
353428d7b3dSmrg
354428d7b3dSmrg	/* offset filled in at emit time */
355428d7b3dSmrg	if (intel_uxa_pixmap_tiled(pixmap)) {
356428d7b3dSmrg		tiling_bits = MS3_TILED_SURFACE;
357428d7b3dSmrg		if (intel_uxa_get_pixmap_private(pixmap)->tiling
358428d7b3dSmrg				== I915_TILING_Y)
359428d7b3dSmrg			tiling_bits |= MS3_TILE_WALK;
360428d7b3dSmrg	} else
361428d7b3dSmrg		tiling_bits = 0;
362428d7b3dSmrg
363428d7b3dSmrg	intel->texture[unit] = pixmap;
364428d7b3dSmrg	intel->mapstate[unit * 3 + 0] = 0;
365428d7b3dSmrg	intel->mapstate[unit * 3 + 1] = format |
366428d7b3dSmrg	    tiling_bits |
367428d7b3dSmrg	    ((pixmap->drawable.height - 1) << MS3_HEIGHT_SHIFT) |
368428d7b3dSmrg	    ((pixmap->drawable.width - 1) << MS3_WIDTH_SHIFT);
369428d7b3dSmrg	intel->mapstate[unit * 3 + 2] = ((pitch / 4) - 1) << MS4_PITCH_SHIFT;
370428d7b3dSmrg
371428d7b3dSmrg	intel->samplerstate[unit * 3 + 0] = (MIPFILTER_NONE <<
372428d7b3dSmrg					     SS2_MIP_FILTER_SHIFT);
373428d7b3dSmrg	intel->samplerstate[unit * 3 + 0] |= filter;
374428d7b3dSmrg	intel->samplerstate[unit * 3 + 1] = SS3_NORMALIZED_COORDS;
375428d7b3dSmrg	intel->samplerstate[unit * 3 + 1] |=
376428d7b3dSmrg	    wrap_mode << SS3_TCX_ADDR_MODE_SHIFT;
377428d7b3dSmrg	intel->samplerstate[unit * 3 + 1] |=
378428d7b3dSmrg	    wrap_mode << SS3_TCY_ADDR_MODE_SHIFT;
379428d7b3dSmrg	intel->samplerstate[unit * 3 + 1] |= unit << SS3_TEXTUREMAP_INDEX_SHIFT;
380428d7b3dSmrg	intel->samplerstate[unit * 3 + 2] = 0x00000000;	/* border color */
381428d7b3dSmrg
382428d7b3dSmrg	intel->transform[unit] = picture->transform;
383428d7b3dSmrg
384428d7b3dSmrg	return TRUE;
385428d7b3dSmrg}
386428d7b3dSmrg
387428d7b3dSmrgstatic void
388428d7b3dSmrgi915_emit_composite_primitive_identity_source(intel_screen_private *intel,
389428d7b3dSmrg					      int srcX, int srcY,
390428d7b3dSmrg					      int maskX, int maskY,
391428d7b3dSmrg					      int dstX, int dstY,
392428d7b3dSmrg					      int w, int h)
393428d7b3dSmrg{
394428d7b3dSmrg	OUT_VERTEX(dstX + w);
395428d7b3dSmrg	OUT_VERTEX(dstY + h);
396428d7b3dSmrg	OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
397428d7b3dSmrg	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
398428d7b3dSmrg
399428d7b3dSmrg	OUT_VERTEX(dstX);
400428d7b3dSmrg	OUT_VERTEX(dstY + h);
401428d7b3dSmrg	OUT_VERTEX(srcX * intel->scale_units[0][0]);
402428d7b3dSmrg	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
403428d7b3dSmrg
404428d7b3dSmrg	OUT_VERTEX(dstX);
405428d7b3dSmrg	OUT_VERTEX(dstY);
406428d7b3dSmrg	OUT_VERTEX(srcX * intel->scale_units[0][0]);
407428d7b3dSmrg	OUT_VERTEX(srcY * intel->scale_units[0][1]);
408428d7b3dSmrg}
409428d7b3dSmrg
410428d7b3dSmrgstatic void
411428d7b3dSmrgi915_emit_composite_primitive_affine_source(intel_screen_private *intel,
412428d7b3dSmrg					    int srcX, int srcY,
413428d7b3dSmrg					    int maskX, int maskY,
414428d7b3dSmrg					    int dstX, int dstY,
415428d7b3dSmrg					    int w, int h)
416428d7b3dSmrg{
417428d7b3dSmrg	float src_x[3], src_y[3];
418428d7b3dSmrg
419428d7b3dSmrg	if (!intel_uxa_get_transformed_coordinates(srcX, srcY,
420428d7b3dSmrg					      intel->transform[0],
421428d7b3dSmrg					      &src_x[0],
422428d7b3dSmrg					      &src_y[0]))
423428d7b3dSmrg		return;
424428d7b3dSmrg
425428d7b3dSmrg	if (!intel_uxa_get_transformed_coordinates(srcX, srcY + h,
426428d7b3dSmrg					      intel->transform[0],
427428d7b3dSmrg					      &src_x[1],
428428d7b3dSmrg					      &src_y[1]))
429428d7b3dSmrg		return;
430428d7b3dSmrg
431428d7b3dSmrg	if (!intel_uxa_get_transformed_coordinates(srcX + w, srcY + h,
432428d7b3dSmrg					      intel->transform[0],
433428d7b3dSmrg					      &src_x[2],
434428d7b3dSmrg					      &src_y[2]))
435428d7b3dSmrg		return;
436428d7b3dSmrg
437428d7b3dSmrg	OUT_VERTEX(dstX + w);
438428d7b3dSmrg	OUT_VERTEX(dstY + h);
439428d7b3dSmrg	OUT_VERTEX(src_x[2] * intel->scale_units[0][0]);
440428d7b3dSmrg	OUT_VERTEX(src_y[2] * intel->scale_units[0][1]);
441428d7b3dSmrg
442428d7b3dSmrg	OUT_VERTEX(dstX);
443428d7b3dSmrg	OUT_VERTEX(dstY + h);
444428d7b3dSmrg	OUT_VERTEX(src_x[1] * intel->scale_units[0][0]);
445428d7b3dSmrg	OUT_VERTEX(src_y[1] * intel->scale_units[0][1]);
446428d7b3dSmrg
447428d7b3dSmrg	OUT_VERTEX(dstX);
448428d7b3dSmrg	OUT_VERTEX(dstY);
449428d7b3dSmrg	OUT_VERTEX(src_x[0] * intel->scale_units[0][0]);
450428d7b3dSmrg	OUT_VERTEX(src_y[0] * intel->scale_units[0][1]);
451428d7b3dSmrg}
452428d7b3dSmrg
453428d7b3dSmrgstatic void
454428d7b3dSmrgi915_emit_composite_primitive_identity_source_mask(intel_screen_private *intel,
455428d7b3dSmrg						   int srcX, int srcY,
456428d7b3dSmrg						   int maskX, int maskY,
457428d7b3dSmrg						   int dstX, int dstY,
458428d7b3dSmrg						   int w, int h)
459428d7b3dSmrg{
460428d7b3dSmrg	OUT_VERTEX(dstX + w);
461428d7b3dSmrg	OUT_VERTEX(dstY + h);
462428d7b3dSmrg	OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
463428d7b3dSmrg	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
464428d7b3dSmrg	OUT_VERTEX((maskX + w) * intel->scale_units[1][0]);
465428d7b3dSmrg	OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
466428d7b3dSmrg
467428d7b3dSmrg	OUT_VERTEX(dstX);
468428d7b3dSmrg	OUT_VERTEX(dstY + h);
469428d7b3dSmrg	OUT_VERTEX(srcX * intel->scale_units[0][0]);
470428d7b3dSmrg	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
471428d7b3dSmrg	OUT_VERTEX(maskX * intel->scale_units[1][0]);
472428d7b3dSmrg	OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
473428d7b3dSmrg
474428d7b3dSmrg	OUT_VERTEX(dstX);
475428d7b3dSmrg	OUT_VERTEX(dstY);
476428d7b3dSmrg	OUT_VERTEX(srcX * intel->scale_units[0][0]);
477428d7b3dSmrg	OUT_VERTEX(srcY * intel->scale_units[0][1]);
478428d7b3dSmrg	OUT_VERTEX(maskX * intel->scale_units[1][0]);
479428d7b3dSmrg	OUT_VERTEX(maskY * intel->scale_units[1][1]);
480428d7b3dSmrg}
481428d7b3dSmrg
482428d7b3dSmrgstatic void
483428d7b3dSmrgi915_emit_composite_primitive(intel_screen_private *intel,
484428d7b3dSmrg			      int srcX, int srcY,
485428d7b3dSmrg			      int maskX, int maskY,
486428d7b3dSmrg			      int dstX, int dstY,
487428d7b3dSmrg			      int w, int h)
488428d7b3dSmrg{
489428d7b3dSmrg	Bool is_affine_src = TRUE, is_affine_mask = TRUE;
490428d7b3dSmrg	int tex_unit = 0;
491428d7b3dSmrg	int src_unit = -1, mask_unit = -1;
492428d7b3dSmrg	float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
493428d7b3dSmrg
494428d7b3dSmrg	src_unit = tex_unit++;
495428d7b3dSmrg
496428d7b3dSmrg	is_affine_src = intel_uxa_transform_is_affine(intel->transform[src_unit]);
497428d7b3dSmrg	if (is_affine_src) {
498428d7b3dSmrg		if (!intel_uxa_get_transformed_coordinates(srcX, srcY,
499428d7b3dSmrg						      intel->
500428d7b3dSmrg						      transform[src_unit],
501428d7b3dSmrg						      &src_x[0],
502428d7b3dSmrg						      &src_y[0]))
503428d7b3dSmrg			return;
504428d7b3dSmrg
505428d7b3dSmrg		if (!intel_uxa_get_transformed_coordinates(srcX, srcY + h,
506428d7b3dSmrg						      intel->
507428d7b3dSmrg						      transform[src_unit],
508428d7b3dSmrg						      &src_x[1],
509428d7b3dSmrg						      &src_y[1]))
510428d7b3dSmrg			return;
511428d7b3dSmrg
512428d7b3dSmrg		if (!intel_uxa_get_transformed_coordinates(srcX + w, srcY + h,
513428d7b3dSmrg						      intel->
514428d7b3dSmrg						      transform[src_unit],
515428d7b3dSmrg						      &src_x[2],
516428d7b3dSmrg						      &src_y[2]))
517428d7b3dSmrg			return;
518428d7b3dSmrg	} else {
519428d7b3dSmrg		if (!intel_uxa_get_transformed_coordinates_3d(srcX, srcY,
520428d7b3dSmrg							 intel->
521428d7b3dSmrg							 transform[src_unit],
522428d7b3dSmrg							 &src_x[0],
523428d7b3dSmrg							 &src_y[0],
524428d7b3dSmrg							 &src_w[0]))
525428d7b3dSmrg			return;
526428d7b3dSmrg
527428d7b3dSmrg		if (!intel_uxa_get_transformed_coordinates_3d(srcX, srcY + h,
528428d7b3dSmrg							 intel->
529428d7b3dSmrg							 transform[src_unit],
530428d7b3dSmrg							 &src_x[1],
531428d7b3dSmrg							 &src_y[1],
532428d7b3dSmrg							 &src_w[1]))
533428d7b3dSmrg			return;
534428d7b3dSmrg
535428d7b3dSmrg		if (!intel_uxa_get_transformed_coordinates_3d(srcX + w, srcY + h,
536428d7b3dSmrg							 intel->
537428d7b3dSmrg							 transform[src_unit],
538428d7b3dSmrg							 &src_x[2],
539428d7b3dSmrg							 &src_y[2],
540428d7b3dSmrg							 &src_w[2]))
541428d7b3dSmrg			return;
542428d7b3dSmrg	}
543428d7b3dSmrg
544428d7b3dSmrg	if (intel->render_mask) {
545428d7b3dSmrg		mask_unit = tex_unit++;
546428d7b3dSmrg
547428d7b3dSmrg		is_affine_mask = intel_uxa_transform_is_affine(intel->transform[mask_unit]);
548428d7b3dSmrg		if (is_affine_mask) {
549428d7b3dSmrg			if (!intel_uxa_get_transformed_coordinates(maskX, maskY,
550428d7b3dSmrg							      intel->
551428d7b3dSmrg							      transform[mask_unit],
552428d7b3dSmrg							      &mask_x[0],
553428d7b3dSmrg							      &mask_y[0]))
554428d7b3dSmrg				return;
555428d7b3dSmrg
556428d7b3dSmrg			if (!intel_uxa_get_transformed_coordinates(maskX, maskY + h,
557428d7b3dSmrg							      intel->
558428d7b3dSmrg							      transform[mask_unit],
559428d7b3dSmrg							      &mask_x[1],
560428d7b3dSmrg							      &mask_y[1]))
561428d7b3dSmrg				return;
562428d7b3dSmrg
563428d7b3dSmrg			if (!intel_uxa_get_transformed_coordinates(maskX + w, maskY + h,
564428d7b3dSmrg							      intel->
565428d7b3dSmrg							      transform[mask_unit],
566428d7b3dSmrg							      &mask_x[2],
567428d7b3dSmrg							      &mask_y[2]))
568428d7b3dSmrg				return;
569428d7b3dSmrg		} else {
570428d7b3dSmrg			if (!intel_uxa_get_transformed_coordinates_3d(maskX, maskY,
571428d7b3dSmrg								 intel->
572428d7b3dSmrg								 transform[mask_unit],
573428d7b3dSmrg								 &mask_x[0],
574428d7b3dSmrg								 &mask_y[0],
575428d7b3dSmrg								 &mask_w[0]))
576428d7b3dSmrg				return;
577428d7b3dSmrg
578428d7b3dSmrg			if (!intel_uxa_get_transformed_coordinates_3d(maskX, maskY + h,
579428d7b3dSmrg								 intel->
580428d7b3dSmrg								 transform[mask_unit],
581428d7b3dSmrg								 &mask_x[1],
582428d7b3dSmrg								 &mask_y[1],
583428d7b3dSmrg								 &mask_w[1]))
584428d7b3dSmrg				return;
585428d7b3dSmrg
586428d7b3dSmrg			if (!intel_uxa_get_transformed_coordinates_3d(maskX + w, maskY + h,
587428d7b3dSmrg								 intel->
588428d7b3dSmrg								 transform[mask_unit],
589428d7b3dSmrg								 &mask_x[2],
590428d7b3dSmrg								 &mask_y[2],
591428d7b3dSmrg								 &mask_w[2]))
592428d7b3dSmrg				return;
593428d7b3dSmrg		}
594428d7b3dSmrg	}
595428d7b3dSmrg
596428d7b3dSmrg	OUT_VERTEX(dstX + w);
597428d7b3dSmrg	OUT_VERTEX(dstY + h);
598428d7b3dSmrg	OUT_VERTEX(src_x[2] * intel->scale_units[src_unit][0]);
599428d7b3dSmrg	OUT_VERTEX(src_y[2] * intel->scale_units[src_unit][1]);
600428d7b3dSmrg	if (!is_affine_src) {
601428d7b3dSmrg		OUT_VERTEX(0.0);
602428d7b3dSmrg		OUT_VERTEX(src_w[2]);
603428d7b3dSmrg	}
604428d7b3dSmrg	if (intel->render_mask) {
605428d7b3dSmrg		OUT_VERTEX(mask_x[2] * intel->scale_units[mask_unit][0]);
606428d7b3dSmrg		OUT_VERTEX(mask_y[2] * intel->scale_units[mask_unit][1]);
607428d7b3dSmrg		if (!is_affine_mask) {
608428d7b3dSmrg			OUT_VERTEX(0.0);
609428d7b3dSmrg			OUT_VERTEX(mask_w[2]);
610428d7b3dSmrg		}
611428d7b3dSmrg	}
612428d7b3dSmrg
613428d7b3dSmrg	OUT_VERTEX(dstX);
614428d7b3dSmrg	OUT_VERTEX(dstY + h);
615428d7b3dSmrg	OUT_VERTEX(src_x[1] * intel->scale_units[src_unit][0]);
616428d7b3dSmrg	OUT_VERTEX(src_y[1] * intel->scale_units[src_unit][1]);
617428d7b3dSmrg	if (!is_affine_src) {
618428d7b3dSmrg		OUT_VERTEX(0.0);
619428d7b3dSmrg		OUT_VERTEX(src_w[1]);
620428d7b3dSmrg	}
621428d7b3dSmrg	if (intel->render_mask) {
622428d7b3dSmrg		OUT_VERTEX(mask_x[1] * intel->scale_units[mask_unit][0]);
623428d7b3dSmrg		OUT_VERTEX(mask_y[1] * intel->scale_units[mask_unit][1]);
624428d7b3dSmrg		if (!is_affine_mask) {
625428d7b3dSmrg			OUT_VERTEX(0.0);
626428d7b3dSmrg			OUT_VERTEX(mask_w[1]);
627428d7b3dSmrg		}
628428d7b3dSmrg	}
629428d7b3dSmrg
630428d7b3dSmrg	OUT_VERTEX(dstX);
631428d7b3dSmrg	OUT_VERTEX(dstY);
632428d7b3dSmrg	OUT_VERTEX(src_x[0] * intel->scale_units[src_unit][0]);
633428d7b3dSmrg	OUT_VERTEX(src_y[0] * intel->scale_units[src_unit][1]);
634428d7b3dSmrg	if (!is_affine_src) {
635428d7b3dSmrg		OUT_VERTEX(0.0);
636428d7b3dSmrg		OUT_VERTEX(src_w[0]);
637428d7b3dSmrg	}
638428d7b3dSmrg	if (intel->render_mask) {
639428d7b3dSmrg		OUT_VERTEX(mask_x[0] * intel->scale_units[mask_unit][0]);
640428d7b3dSmrg		OUT_VERTEX(mask_y[0] * intel->scale_units[mask_unit][1]);
641428d7b3dSmrg		if (!is_affine_mask) {
642428d7b3dSmrg			OUT_VERTEX(0.0);
643428d7b3dSmrg			OUT_VERTEX(mask_w[0]);
644428d7b3dSmrg		}
645428d7b3dSmrg	}
646428d7b3dSmrg}
647428d7b3dSmrg
648428d7b3dSmrgBool
649428d7b3dSmrgi915_prepare_composite(int op, PicturePtr source_picture,
650428d7b3dSmrg		       PicturePtr mask_picture, PicturePtr dest_picture,
651428d7b3dSmrg		       PixmapPtr source, PixmapPtr mask, PixmapPtr dest)
652428d7b3dSmrg{
653428d7b3dSmrg	ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
654428d7b3dSmrg	intel_screen_private *intel = intel_get_screen_private(scrn);
655428d7b3dSmrg	drm_intel_bo *bo_table[] = {
656428d7b3dSmrg		NULL,		/* batch_bo */
657428d7b3dSmrg		intel_uxa_get_pixmap_bo(dest),
658428d7b3dSmrg		intel_uxa_get_pixmap_bo(source),
659428d7b3dSmrg		mask ? intel_uxa_get_pixmap_bo(mask) : NULL,
660428d7b3dSmrg	};
661428d7b3dSmrg	int tex_unit = 0;
662428d7b3dSmrg	int floats_per_vertex;
663428d7b3dSmrg
664428d7b3dSmrg	intel->render_source_picture = source_picture;
665428d7b3dSmrg	intel->render_source = source;
666428d7b3dSmrg	intel->render_mask_picture = mask_picture;
667428d7b3dSmrg	intel->render_mask = mask;
668428d7b3dSmrg	intel->render_dest_picture = dest_picture;
669428d7b3dSmrg	intel->render_dest = dest;
670428d7b3dSmrg
671428d7b3dSmrg	if (!intel_uxa_check_pitch_3d(source))
672428d7b3dSmrg		return FALSE;
673428d7b3dSmrg
674428d7b3dSmrg	if (mask && !intel_uxa_check_pitch_3d(mask))
675428d7b3dSmrg		return FALSE;
676428d7b3dSmrg
677428d7b3dSmrg	if (!intel_uxa_check_pitch_3d(dest))
678428d7b3dSmrg		return FALSE;
679428d7b3dSmrg
680428d7b3dSmrg	if (!i915_get_dest_format(dest_picture,
681428d7b3dSmrg				  &intel->i915_render_state.dst_format))
682428d7b3dSmrg		return FALSE;
683428d7b3dSmrg
684428d7b3dSmrg	if (!intel_uxa_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
685428d7b3dSmrg		return FALSE;
686428d7b3dSmrg
687428d7b3dSmrg	if (mask_picture != NULL && mask_picture->componentAlpha &&
688428d7b3dSmrg	    PICT_FORMAT_RGB(mask_picture->format)) {
689428d7b3dSmrg		/* Check if it's component alpha that relies on a source alpha
690428d7b3dSmrg		 * and on the source value.  We can only get one of those
691428d7b3dSmrg		 * into the single source value that we get to blend with.
692428d7b3dSmrg		 */
693428d7b3dSmrg		if (i915_blend_op[op].src_alpha &&
694428d7b3dSmrg		    (i915_blend_op[op].src_blend != BLENDFACT_ZERO))
695428d7b3dSmrg			return FALSE;
696428d7b3dSmrg	}
697428d7b3dSmrg
698428d7b3dSmrg	intel->transform[0] = NULL;
699428d7b3dSmrg	intel->scale_units[0][0] = -1;
700428d7b3dSmrg	intel->scale_units[0][1] = -1;
701428d7b3dSmrg	intel->transform[1] = NULL;
702428d7b3dSmrg	intel->scale_units[1][0] = -1;
703428d7b3dSmrg	intel->scale_units[1][1] = -1;
704428d7b3dSmrg
705428d7b3dSmrg	floats_per_vertex = 2;		/* dest x/y */
706428d7b3dSmrg	if (!i915_texture_setup(source_picture, source, tex_unit++)) {
707428d7b3dSmrg		intel_uxa_debug_fallback(scrn, "fail to setup src texture\n");
708428d7b3dSmrg		return FALSE;
709428d7b3dSmrg	}
710428d7b3dSmrg
711428d7b3dSmrg	if (intel_uxa_transform_is_affine(source_picture->transform))
712428d7b3dSmrg		floats_per_vertex += 2;	/* src x/y */
713428d7b3dSmrg	else
714428d7b3dSmrg		floats_per_vertex += 4;	/* src x/y/z/w */
715428d7b3dSmrg
716428d7b3dSmrg	if (mask_picture != NULL) {
717428d7b3dSmrg		assert(mask != NULL);
718428d7b3dSmrg		if (!i915_texture_setup(mask_picture, mask, tex_unit++)) {
719428d7b3dSmrg			intel_uxa_debug_fallback(scrn,
720428d7b3dSmrg					     "fail to setup mask texture\n");
721428d7b3dSmrg			return FALSE;
722428d7b3dSmrg		}
723428d7b3dSmrg
724428d7b3dSmrg		if (intel_uxa_transform_is_affine(mask_picture->transform))
725428d7b3dSmrg			floats_per_vertex += 2;	/* mask x/y */
726428d7b3dSmrg		else
727428d7b3dSmrg			floats_per_vertex += 4;	/* mask x/y/z/w */
728428d7b3dSmrg	}
729428d7b3dSmrg
730428d7b3dSmrg	intel->i915_render_state.op = op;
731428d7b3dSmrg
732428d7b3dSmrg	if (intel_uxa_pixmap_is_dirty(source) || intel_uxa_pixmap_is_dirty(mask))
733428d7b3dSmrg		intel_batch_emit_flush(scrn);
734428d7b3dSmrg
735428d7b3dSmrg	intel->needs_render_state_emit = TRUE;
736428d7b3dSmrg
737428d7b3dSmrg	intel->prim_emit = i915_emit_composite_primitive;
738428d7b3dSmrg	if (!mask) {
739428d7b3dSmrg		if (intel->transform[0] == NULL)
740428d7b3dSmrg			intel->prim_emit = i915_emit_composite_primitive_identity_source;
741428d7b3dSmrg		else if (intel_uxa_transform_is_affine(intel->transform[0]))
742428d7b3dSmrg			intel->prim_emit = i915_emit_composite_primitive_affine_source;
743428d7b3dSmrg	} else {
744428d7b3dSmrg		if (intel->transform[0] == NULL) {
745428d7b3dSmrg			if (intel->transform[1] == NULL)
746428d7b3dSmrg				intel->prim_emit = i915_emit_composite_primitive_identity_source_mask;
747428d7b3dSmrg		}
748428d7b3dSmrg	}
749428d7b3dSmrg
750428d7b3dSmrg	if (floats_per_vertex != intel->floats_per_vertex) {
751428d7b3dSmrg		intel->floats_per_vertex = floats_per_vertex;
752428d7b3dSmrg		intel->needs_render_vertex_emit = TRUE;
753428d7b3dSmrg	}
754428d7b3dSmrg
755428d7b3dSmrg	return TRUE;
756428d7b3dSmrg}
757428d7b3dSmrg
758428d7b3dSmrgstatic void
759428d7b3dSmrgi915_composite_emit_shader(intel_screen_private *intel, CARD8 op)
760428d7b3dSmrg{
761428d7b3dSmrg	PicturePtr mask_picture = intel->render_mask_picture;
762428d7b3dSmrg	PixmapPtr mask = intel->render_mask;
763428d7b3dSmrg	int src_reg, mask_reg;
764428d7b3dSmrg	Bool dest_is_alpha = PIXMAN_FORMAT_RGB(intel->render_dest_picture->format) == 0;
765428d7b3dSmrg	FS_LOCALS();
766428d7b3dSmrg
767428d7b3dSmrg	FS_BEGIN();
768428d7b3dSmrg
769428d7b3dSmrg	/* Declare the registers necessary for our program.  */
770428d7b3dSmrg	i915_fs_dcl(FS_T0);
771428d7b3dSmrg	i915_fs_dcl(FS_S0);
772428d7b3dSmrg	if (!mask) {
773428d7b3dSmrg		/* No mask, so load directly to output color */
774428d7b3dSmrg		if (dest_is_alpha)
775428d7b3dSmrg			src_reg = FS_R0;
776428d7b3dSmrg		else
777428d7b3dSmrg			src_reg = FS_OC;
778428d7b3dSmrg
779428d7b3dSmrg		if (intel_uxa_transform_is_affine(intel->transform[0]))
780428d7b3dSmrg			i915_fs_texld(src_reg, FS_S0, FS_T0);
781428d7b3dSmrg		else
782428d7b3dSmrg			i915_fs_texldp(src_reg, FS_S0, FS_T0);
783428d7b3dSmrg
784428d7b3dSmrg		if (src_reg != FS_OC)
785428d7b3dSmrg			i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W));
786428d7b3dSmrg	} else {
787428d7b3dSmrg		i915_fs_dcl(FS_T1);
788428d7b3dSmrg		i915_fs_dcl(FS_S1);
789428d7b3dSmrg
790428d7b3dSmrg		/* Load the source_picture texel */
791428d7b3dSmrg		if (intel_uxa_transform_is_affine(intel->transform[0]))
792428d7b3dSmrg			i915_fs_texld(FS_R0, FS_S0, FS_T0);
793428d7b3dSmrg		else
794428d7b3dSmrg			i915_fs_texldp(FS_R0, FS_S0, FS_T0);
795428d7b3dSmrg
796428d7b3dSmrg		src_reg = FS_R0;
797428d7b3dSmrg
798428d7b3dSmrg		/* Load the mask_picture texel */
799428d7b3dSmrg		if (intel_uxa_transform_is_affine(intel->transform[1]))
800428d7b3dSmrg			i915_fs_texld(FS_R1, FS_S1, FS_T1);
801428d7b3dSmrg		else
802428d7b3dSmrg			i915_fs_texldp(FS_R1, FS_S1, FS_T1);
803428d7b3dSmrg
804428d7b3dSmrg		mask_reg = FS_R1;
805428d7b3dSmrg
806428d7b3dSmrg		if (dest_is_alpha) {
807428d7b3dSmrg			i915_fs_mul(FS_OC,
808428d7b3dSmrg				    i915_fs_operand(src_reg, W, W, W, W),
809428d7b3dSmrg				    i915_fs_operand(mask_reg, W, W, W, W));
810428d7b3dSmrg		} else {
811428d7b3dSmrg			/* If component alpha is active in the mask and the blend
812428d7b3dSmrg			 * operation uses the source alpha, then we know we don't
813428d7b3dSmrg			 * need the source value (otherwise we would have hit a
814428d7b3dSmrg			 * fallback earlier), so we provide the source alpha (src.A *
815428d7b3dSmrg			 * mask.X) as output color.
816428d7b3dSmrg			 * Conversely, if CA is set and we don't need the source alpha,
817428d7b3dSmrg			 * then we produce the source value (src.X * mask.X) and the
818428d7b3dSmrg			 * source alpha is unused.  Otherwise, we provide the non-CA
819428d7b3dSmrg			 * source value (src.X * mask.A).
820428d7b3dSmrg			 */
821428d7b3dSmrg			if (mask_picture->componentAlpha &&
822428d7b3dSmrg			    PICT_FORMAT_RGB(mask_picture->format)) {
823428d7b3dSmrg				if (i915_blend_op[op].src_alpha) {
824428d7b3dSmrg					i915_fs_mul(FS_OC,
825428d7b3dSmrg						    i915_fs_operand(src_reg, W, W, W, W),
826428d7b3dSmrg						    i915_fs_operand_reg(mask_reg));
827428d7b3dSmrg				} else {
828428d7b3dSmrg					i915_fs_mul(FS_OC,
829428d7b3dSmrg						    i915_fs_operand_reg(src_reg),
830428d7b3dSmrg						    i915_fs_operand_reg(mask_reg));
831428d7b3dSmrg				}
832428d7b3dSmrg			} else {
833428d7b3dSmrg				i915_fs_mul(FS_OC,
834428d7b3dSmrg					    i915_fs_operand_reg(src_reg),
835428d7b3dSmrg					    i915_fs_operand(mask_reg, W, W, W, W));
836428d7b3dSmrg			}
837428d7b3dSmrg		}
838428d7b3dSmrg	}
839428d7b3dSmrg
840428d7b3dSmrg	FS_END();
841428d7b3dSmrg}
842428d7b3dSmrg
843428d7b3dSmrgstatic void i915_emit_composite_setup(ScrnInfoPtr scrn)
844428d7b3dSmrg{
845428d7b3dSmrg	intel_screen_private *intel = intel_get_screen_private(scrn);
846428d7b3dSmrg	int op = intel->i915_render_state.op;
847428d7b3dSmrg	PicturePtr mask_picture = intel->render_mask_picture;
848428d7b3dSmrg	PicturePtr dest_picture = intel->render_dest_picture;
849428d7b3dSmrg	PixmapPtr mask = intel->render_mask;
850428d7b3dSmrg	PixmapPtr dest = intel->render_dest;
851428d7b3dSmrg	int tex_count, t;
852428d7b3dSmrg
853428d7b3dSmrg	intel->needs_render_state_emit = FALSE;
854428d7b3dSmrg
855428d7b3dSmrg	IntelEmitInvarientState(scrn);
856428d7b3dSmrg	intel->last_3d = LAST_3D_RENDER;
857428d7b3dSmrg
858428d7b3dSmrg	tex_count = 1 + (mask != NULL);
859428d7b3dSmrg
860428d7b3dSmrg	assert(intel->in_batch_atomic);
861428d7b3dSmrg
862428d7b3dSmrg	if (tex_count != 0) {
863428d7b3dSmrg	    OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
864428d7b3dSmrg	    OUT_BATCH((1 << tex_count) - 1);
865428d7b3dSmrg	    for (t = 0; t < tex_count; t++) {
866428d7b3dSmrg		OUT_RELOC_PIXMAP(intel->texture[t], I915_GEM_DOMAIN_SAMPLER, 0, 0);
867428d7b3dSmrg		OUT_BATCH(intel->mapstate[3*t + 1]);
868428d7b3dSmrg		OUT_BATCH(intel->mapstate[3*t + 2]);
869428d7b3dSmrg	    }
870428d7b3dSmrg
871428d7b3dSmrg	    OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
872428d7b3dSmrg	    OUT_BATCH((1 << tex_count) - 1);
873428d7b3dSmrg	    for (t = 0; t < tex_count; t++) {
874428d7b3dSmrg		OUT_BATCH(intel->samplerstate[3*t + 0]);
875428d7b3dSmrg		OUT_BATCH(intel->samplerstate[3*t + 1]);
876428d7b3dSmrg		OUT_BATCH(intel->samplerstate[3*t + 2]);
877428d7b3dSmrg	    }
878428d7b3dSmrg	}
879428d7b3dSmrg
880428d7b3dSmrg	/* BUF_INFO is an implicit flush, so avoid if the target has not changed.
881428d7b3dSmrg	 * XXX However for reasons unfathomed, correct rendering in KDE requires
882428d7b3dSmrg	 * at least a MI_FLUSH | INHIBIT_RENDER_CACHE_FLUSH here.
883428d7b3dSmrg	 */
884428d7b3dSmrg	if (1) {
885428d7b3dSmrg		uint32_t tiling_bits;
886428d7b3dSmrg
887428d7b3dSmrg		if (intel_uxa_pixmap_tiled(dest)) {
888428d7b3dSmrg			tiling_bits = BUF_3D_TILED_SURFACE;
889428d7b3dSmrg			if (intel_uxa_get_pixmap_private(dest)->tiling
890428d7b3dSmrg			    == I915_TILING_Y)
891428d7b3dSmrg				tiling_bits |= BUF_3D_TILE_WALK_Y;
892428d7b3dSmrg		} else
893428d7b3dSmrg			tiling_bits = 0;
894428d7b3dSmrg
895428d7b3dSmrg		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
896428d7b3dSmrg		OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits |
897428d7b3dSmrg			  BUF_3D_PITCH(intel_pixmap_pitch(dest)));
898428d7b3dSmrg		OUT_RELOC_PIXMAP(dest, I915_GEM_DOMAIN_RENDER,
899428d7b3dSmrg				 I915_GEM_DOMAIN_RENDER, 0);
900428d7b3dSmrg
901428d7b3dSmrg		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
902428d7b3dSmrg		OUT_BATCH(intel->i915_render_state.dst_format);
903428d7b3dSmrg
904428d7b3dSmrg		/* draw rect is unconditional */
905428d7b3dSmrg		OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
906428d7b3dSmrg		OUT_BATCH(0x00000000);
907428d7b3dSmrg		OUT_BATCH(0x00000000);	/* ymin, xmin */
908428d7b3dSmrg		OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) |
909428d7b3dSmrg			  DRAW_XMAX(dest->drawable.width - 1));
910428d7b3dSmrg		/* yorig, xorig (relate to color buffer?) */
911428d7b3dSmrg		OUT_BATCH(0x00000000);
912428d7b3dSmrg	}
913428d7b3dSmrg
914428d7b3dSmrg	{
915428d7b3dSmrg		uint32_t ss2;
916428d7b3dSmrg
917428d7b3dSmrg		ss2 = ~0;
918428d7b3dSmrg		ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT);
919428d7b3dSmrg		ss2 |= S2_TEXCOORD_FMT(0,
920428d7b3dSmrg				       intel_uxa_transform_is_affine(intel->transform[0]) ?
921428d7b3dSmrg				       TEXCOORDFMT_2D : TEXCOORDFMT_4D);
922428d7b3dSmrg		if (mask) {
923428d7b3dSmrg		    ss2 &= ~S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT);
924428d7b3dSmrg		    ss2 |= S2_TEXCOORD_FMT(1,
925428d7b3dSmrg					   intel_uxa_transform_is_affine(intel->transform[1]) ?
926428d7b3dSmrg					   TEXCOORDFMT_2D : TEXCOORDFMT_4D);
927428d7b3dSmrg		}
928428d7b3dSmrg
929428d7b3dSmrg		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
930428d7b3dSmrg		OUT_BATCH(ss2);
931428d7b3dSmrg		OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format));
932428d7b3dSmrg	}
933428d7b3dSmrg
934428d7b3dSmrg	i915_composite_emit_shader(intel, op);
935428d7b3dSmrg}
936428d7b3dSmrg
937428d7b3dSmrgvoid
938428d7b3dSmrgi915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
939428d7b3dSmrg	       int dstX, int dstY, int w, int h)
940428d7b3dSmrg{
941428d7b3dSmrg	ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen);
942428d7b3dSmrg	intel_screen_private *intel = intel_get_screen_private(scrn);
943428d7b3dSmrg
944428d7b3dSmrg	/* 28 + 16 + 10 + 20 + 32 + 16 */
945428d7b3dSmrg	intel_batch_start_atomic(scrn, 150);
946428d7b3dSmrg
947428d7b3dSmrg	if (intel->needs_render_state_emit)
948428d7b3dSmrg		i915_emit_composite_setup(scrn);
949428d7b3dSmrg
950428d7b3dSmrg	if (intel->needs_render_vertex_emit ||
951428d7b3dSmrg	    intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
952428d7b3dSmrg		i915_vertex_flush(intel);
953428d7b3dSmrg
954428d7b3dSmrg		if (intel_vertex_space(intel) < 256) {
955428d7b3dSmrg			intel_next_vertex(intel);
956428d7b3dSmrg
957428d7b3dSmrg			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
958428d7b3dSmrg				  I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
959428d7b3dSmrg			OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
960428d7b3dSmrg			OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
961428d7b3dSmrg				  (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
962428d7b3dSmrg			intel->vertex_index = 0;
963428d7b3dSmrg		} else if (intel->floats_per_vertex != intel->last_floats_per_vertex){
964428d7b3dSmrg			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
965428d7b3dSmrg				  I1_LOAD_S(1) | 0);
966428d7b3dSmrg			OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
967428d7b3dSmrg				  (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
968428d7b3dSmrg
969428d7b3dSmrg			intel->vertex_index =
970428d7b3dSmrg				(intel->vertex_used + intel->floats_per_vertex - 1) /  intel->floats_per_vertex;
971428d7b3dSmrg			intel->vertex_used = intel->vertex_index * intel->floats_per_vertex;
972428d7b3dSmrg		}
973428d7b3dSmrg
974428d7b3dSmrg		intel->last_floats_per_vertex = intel->floats_per_vertex;
975428d7b3dSmrg		intel->needs_render_vertex_emit = FALSE;
976428d7b3dSmrg	}
977428d7b3dSmrg
978428d7b3dSmrg	if (intel->prim_offset == 0) {
979428d7b3dSmrg		intel->prim_offset = intel->batch_used;
980428d7b3dSmrg		OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
981428d7b3dSmrg		OUT_BATCH(intel->vertex_index);
982428d7b3dSmrg	}
983428d7b3dSmrg	intel->vertex_count += 3;
984428d7b3dSmrg
985428d7b3dSmrg	intel->prim_emit(intel,
986428d7b3dSmrg			 srcX, srcY,
987428d7b3dSmrg			 maskX, maskY,
988428d7b3dSmrg			 dstX, dstY,
989428d7b3dSmrg			 w, h);
990428d7b3dSmrg
991428d7b3dSmrg	intel_batch_end_atomic(scrn);
992428d7b3dSmrg}
993428d7b3dSmrg
994428d7b3dSmrgvoid
995428d7b3dSmrgi915_vertex_flush(intel_screen_private *intel)
996428d7b3dSmrg{
997428d7b3dSmrg	if (intel->prim_offset == 0)
998428d7b3dSmrg		return;
999428d7b3dSmrg
1000428d7b3dSmrg	intel->batch_ptr[intel->prim_offset] |= intel->vertex_count;
1001428d7b3dSmrg	intel->prim_offset = 0;
1002428d7b3dSmrg
1003428d7b3dSmrg	intel->vertex_index += intel->vertex_count;
1004428d7b3dSmrg	intel->vertex_count = 0;
1005428d7b3dSmrg}
1006428d7b3dSmrg
1007428d7b3dSmrgvoid
1008428d7b3dSmrgi915_batch_commit_notify(intel_screen_private *intel)
1009428d7b3dSmrg{
1010428d7b3dSmrg	intel->needs_render_state_emit = TRUE;
1011428d7b3dSmrg	intel->last_floats_per_vertex = 0;
1012428d7b3dSmrg}
1013