1/*
2 * Copyright © 2006 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Wang Zhenyu <zhenyu.z.wang@intel.com>
25 *    Eric Anholt <eric@anholt.net>
26 *
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include "xorg-server.h"
34#include "xf86.h"
35#include "intel.h"
36#include "intel_uxa.h"
37#include "i915_reg.h"
38#include "i915_3d.h"
39
40struct formatinfo {
41	int fmt;
42	uint32_t card_fmt;
43};
44
45struct blendinfo {
46	Bool dst_alpha;
47	Bool src_alpha;
48	uint32_t src_blend;
49	uint32_t dst_blend;
50};
51
52static struct blendinfo i915_blend_op[] = {
53	/* Clear */
54	{0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
55	/* Src */
56	{0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
57	/* Dst */
58	{0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
59	/* Over */
60	{0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
61	/* OverReverse */
62	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
63	/* In */
64	{1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
65	/* InReverse */
66	{0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
67	/* Out */
68	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
69	/* OutReverse */
70	{0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
71	/* Atop */
72	{1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
73	/* AtopReverse */
74	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
75	/* Xor */
76	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
77	/* Add */
78	{0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
79};
80
81static struct formatinfo i915_tex_formats[] = {
82	{PICT_a8, MAPSURF_8BIT | MT_8BIT_A8},
83	{PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888},
84	{PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888},
85	{PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888},
86	{PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
87#if XORG_VERSION_CURRENT >= 10699900
88	{PICT_a2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010},
89	{PICT_a2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010},
90#endif
91	{PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565},
92	{PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555},
93	{PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444},
94};
95
96static uint32_t i915_get_blend_cntl(int op, PicturePtr mask,
97				    uint32_t dst_format)
98{
99	uint32_t sblend, dblend;
100
101	sblend = i915_blend_op[op].src_blend;
102	dblend = i915_blend_op[op].dst_blend;
103
104	/* If there's no dst alpha channel, adjust the blend op so that we'll
105	 * treat it as always 1.
106	 */
107	if (PICT_FORMAT_A(dst_format) == 0 && i915_blend_op[op].dst_alpha) {
108		if (sblend == BLENDFACT_DST_ALPHA)
109			sblend = BLENDFACT_ONE;
110		else if (sblend == BLENDFACT_INV_DST_ALPHA)
111			sblend = BLENDFACT_ZERO;
112	}
113
114	/* i915 engine reads 8bit color buffer into green channel in cases
115	   like color buffer blending .etc, and also writes back green channel.
116	   So with dst_alpha blend we should use color factor. See spec on
117	   "8-bit rendering" */
118	if ((dst_format == PICT_a8) && i915_blend_op[op].dst_alpha) {
119		if (sblend == BLENDFACT_DST_ALPHA)
120			sblend = BLENDFACT_DST_COLR;
121		else if (sblend == BLENDFACT_INV_DST_ALPHA)
122			sblend = BLENDFACT_INV_DST_COLR;
123	}
124
125	/* If the source alpha is being used, then we should only be in a case
126	 * where the source blend factor is 0, and the source blend value is the
127	 * mask channels multiplied by the source picture's alpha.
128	 */
129	if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
130	    i915_blend_op[op].src_alpha) {
131		if (dblend == BLENDFACT_SRC_ALPHA) {
132			dblend = BLENDFACT_SRC_COLR;
133		} else if (dblend == BLENDFACT_INV_SRC_ALPHA) {
134			dblend = BLENDFACT_INV_SRC_COLR;
135		}
136	}
137
138	return S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
139		(BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
140		(sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
141		(dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
142}
143
144#define DSTORG_HORT_BIAS(x)             ((x)<<20)
145#define DSTORG_VERT_BIAS(x)             ((x)<<16)
146
147static Bool i915_get_dest_format(PicturePtr dest_picture, uint32_t * dst_format)
148{
149	ScrnInfoPtr scrn;
150
151	switch (dest_picture->format) {
152	case PICT_a8r8g8b8:
153	case PICT_x8r8g8b8:
154		*dst_format = COLR_BUF_ARGB8888;
155		break;
156	case PICT_r5g6b5:
157		*dst_format = COLR_BUF_RGB565;
158		break;
159	case PICT_a1r5g5b5:
160	case PICT_x1r5g5b5:
161		*dst_format = COLR_BUF_ARGB1555;
162		break;
163#if XORG_VERSION_CURRENT >= 10699900
164	case PICT_a2r10g10b10:
165	case PICT_x2r10g10b10:
166		*dst_format = COLR_BUF_ARGB2AAA;
167		break;
168#endif
169	case PICT_a8:
170		*dst_format = COLR_BUF_8BIT;
171		break;
172	case PICT_a4r4g4b4:
173	case PICT_x4r4g4b4:
174		*dst_format = COLR_BUF_ARGB4444;
175		break;
176	default:
177		scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
178		intel_uxa_debug_fallback(scrn,
179				     "Unsupported dest format 0x%x\n",
180				     (int)dest_picture->format);
181		return FALSE;
182	}
183	*dst_format |= DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8);
184	return TRUE;
185}
186
187Bool
188i915_check_composite(int op,
189		     PicturePtr source_picture,
190		     PicturePtr mask_picture,
191		     PicturePtr dest_picture,
192		     int width, int height)
193{
194	ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
195	uint32_t tmp1;
196
197	/* Check for unsupported compositing operations. */
198	if (op >= sizeof(i915_blend_op) / sizeof(i915_blend_op[0])) {
199		intel_uxa_debug_fallback(scrn, "Unsupported Composite op 0x%x\n",
200				     op);
201		return FALSE;
202	}
203	if (mask_picture != NULL && mask_picture->componentAlpha &&
204	    PICT_FORMAT_RGB(mask_picture->format)) {
205		/* Check if it's component alpha that relies on a source alpha
206		 * and on the source value.  We can only get one of those
207		 * into the single source value that we get to blend with.
208		 */
209		if (i915_blend_op[op].src_alpha &&
210		    (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) {
211			if (op != PictOpOver) {
212				intel_uxa_debug_fallback(scrn,
213						     "Component alpha not supported "
214						     "with source alpha and source "
215						     "value blending.\n");
216				return FALSE;
217			}
218		}
219	}
220
221	if (!i915_get_dest_format(dest_picture, &tmp1)) {
222		intel_uxa_debug_fallback(scrn, "Get Color buffer format\n");
223		return FALSE;
224	}
225
226	if (width > 2048 || height > 2048)
227		return FALSE;
228
229	return TRUE;
230}
231
232Bool
233i915_check_composite_target(PixmapPtr pixmap)
234{
235	if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048)
236		return FALSE;
237
238	if(!intel_uxa_check_pitch_3d(pixmap))
239		return FALSE;
240
241	return TRUE;
242}
243
244Bool
245i915_check_composite_texture(ScreenPtr screen, PicturePtr picture)
246{
247	if (picture->repeatType > RepeatReflect) {
248		ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
249		intel_uxa_debug_fallback(scrn, "Unsupported picture repeat %d\n",
250			     picture->repeatType);
251		return FALSE;
252	}
253
254	if (picture->filter != PictFilterNearest &&
255	    picture->filter != PictFilterBilinear) {
256		ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
257		intel_uxa_debug_fallback(scrn, "Unsupported filter 0x%x\n",
258				     picture->filter);
259		return FALSE;
260	}
261
262	if (picture->pSourcePict)
263		return FALSE;
264
265	if (picture->pDrawable) {
266		int w, h, i;
267
268		w = picture->pDrawable->width;
269		h = picture->pDrawable->height;
270		if ((w > 2048) || (h > 2048)) {
271			ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
272			intel_uxa_debug_fallback(scrn,
273					     "Picture w/h too large (%dx%d)\n",
274					     w, h);
275			return FALSE;
276		}
277
278		for (i = 0;
279		     i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]);
280		     i++) {
281			if (i915_tex_formats[i].fmt == picture->format)
282				break;
283		}
284		if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]))
285		{
286			ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
287			intel_uxa_debug_fallback(scrn, "Unsupported picture format "
288					     "0x%x\n",
289					     (int)picture->format);
290			return FALSE;
291		}
292
293		return TRUE;
294	}
295
296	return FALSE;
297}
298
299static Bool i915_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit)
300{
301	ScrnInfoPtr scrn = xf86ScreenToScrn(picture->pDrawable->pScreen);
302	intel_screen_private *intel = intel_get_screen_private(scrn);
303	uint32_t format, pitch, filter;
304	uint32_t wrap_mode, tiling_bits;
305	int i;
306
307	pitch = intel_pixmap_pitch(pixmap);
308	intel->scale_units[unit][0] = 1. / pixmap->drawable.width;
309	intel->scale_units[unit][1] = 1. / pixmap->drawable.height;
310
311	for (i = 0; i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]);
312	     i++) {
313		if (i915_tex_formats[i].fmt == picture->format)
314			break;
315	}
316	if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0])) {
317		intel_uxa_debug_fallback(scrn, "unknown texture format\n");
318		return FALSE;
319	}
320	format = i915_tex_formats[i].card_fmt;
321
322	switch (picture->repeatType) {
323	case RepeatNone:
324		wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
325		break;
326	case RepeatNormal:
327		wrap_mode = TEXCOORDMODE_WRAP;
328		break;
329	case RepeatPad:
330		wrap_mode = TEXCOORDMODE_CLAMP_EDGE;
331		break;
332	case RepeatReflect:
333		wrap_mode = TEXCOORDMODE_MIRROR;
334		break;
335	default:
336		FatalError("Unknown repeat type %d\n", picture->repeatType);
337	}
338
339	switch (picture->filter) {
340	case PictFilterNearest:
341		filter = (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT) |
342		    (FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
343		break;
344	case PictFilterBilinear:
345		filter = (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
346		    (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT);
347		break;
348	default:
349		intel_uxa_debug_fallback(scrn, "Bad filter 0x%x\n",
350				     picture->filter);
351		return FALSE;
352	}
353
354	/* offset filled in at emit time */
355	if (intel_uxa_pixmap_tiled(pixmap)) {
356		tiling_bits = MS3_TILED_SURFACE;
357		if (intel_uxa_get_pixmap_private(pixmap)->tiling
358				== I915_TILING_Y)
359			tiling_bits |= MS3_TILE_WALK;
360	} else
361		tiling_bits = 0;
362
363	intel->texture[unit] = pixmap;
364	intel->mapstate[unit * 3 + 0] = 0;
365	intel->mapstate[unit * 3 + 1] = format |
366	    tiling_bits |
367	    ((pixmap->drawable.height - 1) << MS3_HEIGHT_SHIFT) |
368	    ((pixmap->drawable.width - 1) << MS3_WIDTH_SHIFT);
369	intel->mapstate[unit * 3 + 2] = ((pitch / 4) - 1) << MS4_PITCH_SHIFT;
370
371	intel->samplerstate[unit * 3 + 0] = (MIPFILTER_NONE <<
372					     SS2_MIP_FILTER_SHIFT);
373	intel->samplerstate[unit * 3 + 0] |= filter;
374	intel->samplerstate[unit * 3 + 1] = SS3_NORMALIZED_COORDS;
375	intel->samplerstate[unit * 3 + 1] |=
376	    wrap_mode << SS3_TCX_ADDR_MODE_SHIFT;
377	intel->samplerstate[unit * 3 + 1] |=
378	    wrap_mode << SS3_TCY_ADDR_MODE_SHIFT;
379	intel->samplerstate[unit * 3 + 1] |= unit << SS3_TEXTUREMAP_INDEX_SHIFT;
380	intel->samplerstate[unit * 3 + 2] = 0x00000000;	/* border color */
381
382	intel->transform[unit] = picture->transform;
383
384	return TRUE;
385}
386
387static void
388i915_emit_composite_primitive_identity_source(intel_screen_private *intel,
389					      int srcX, int srcY,
390					      int maskX, int maskY,
391					      int dstX, int dstY,
392					      int w, int h)
393{
394	OUT_VERTEX(dstX + w);
395	OUT_VERTEX(dstY + h);
396	OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
397	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
398
399	OUT_VERTEX(dstX);
400	OUT_VERTEX(dstY + h);
401	OUT_VERTEX(srcX * intel->scale_units[0][0]);
402	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
403
404	OUT_VERTEX(dstX);
405	OUT_VERTEX(dstY);
406	OUT_VERTEX(srcX * intel->scale_units[0][0]);
407	OUT_VERTEX(srcY * intel->scale_units[0][1]);
408}
409
410static void
411i915_emit_composite_primitive_affine_source(intel_screen_private *intel,
412					    int srcX, int srcY,
413					    int maskX, int maskY,
414					    int dstX, int dstY,
415					    int w, int h)
416{
417	float src_x[3], src_y[3];
418
419	if (!intel_uxa_get_transformed_coordinates(srcX, srcY,
420					      intel->transform[0],
421					      &src_x[0],
422					      &src_y[0]))
423		return;
424
425	if (!intel_uxa_get_transformed_coordinates(srcX, srcY + h,
426					      intel->transform[0],
427					      &src_x[1],
428					      &src_y[1]))
429		return;
430
431	if (!intel_uxa_get_transformed_coordinates(srcX + w, srcY + h,
432					      intel->transform[0],
433					      &src_x[2],
434					      &src_y[2]))
435		return;
436
437	OUT_VERTEX(dstX + w);
438	OUT_VERTEX(dstY + h);
439	OUT_VERTEX(src_x[2] * intel->scale_units[0][0]);
440	OUT_VERTEX(src_y[2] * intel->scale_units[0][1]);
441
442	OUT_VERTEX(dstX);
443	OUT_VERTEX(dstY + h);
444	OUT_VERTEX(src_x[1] * intel->scale_units[0][0]);
445	OUT_VERTEX(src_y[1] * intel->scale_units[0][1]);
446
447	OUT_VERTEX(dstX);
448	OUT_VERTEX(dstY);
449	OUT_VERTEX(src_x[0] * intel->scale_units[0][0]);
450	OUT_VERTEX(src_y[0] * intel->scale_units[0][1]);
451}
452
453static void
454i915_emit_composite_primitive_identity_source_mask(intel_screen_private *intel,
455						   int srcX, int srcY,
456						   int maskX, int maskY,
457						   int dstX, int dstY,
458						   int w, int h)
459{
460	OUT_VERTEX(dstX + w);
461	OUT_VERTEX(dstY + h);
462	OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
463	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
464	OUT_VERTEX((maskX + w) * intel->scale_units[1][0]);
465	OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
466
467	OUT_VERTEX(dstX);
468	OUT_VERTEX(dstY + h);
469	OUT_VERTEX(srcX * intel->scale_units[0][0]);
470	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
471	OUT_VERTEX(maskX * intel->scale_units[1][0]);
472	OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
473
474	OUT_VERTEX(dstX);
475	OUT_VERTEX(dstY);
476	OUT_VERTEX(srcX * intel->scale_units[0][0]);
477	OUT_VERTEX(srcY * intel->scale_units[0][1]);
478	OUT_VERTEX(maskX * intel->scale_units[1][0]);
479	OUT_VERTEX(maskY * intel->scale_units[1][1]);
480}
481
482static void
483i915_emit_composite_primitive(intel_screen_private *intel,
484			      int srcX, int srcY,
485			      int maskX, int maskY,
486			      int dstX, int dstY,
487			      int w, int h)
488{
489	Bool is_affine_src = TRUE, is_affine_mask = TRUE;
490	int tex_unit = 0;
491	int src_unit = -1, mask_unit = -1;
492	float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
493
494	src_unit = tex_unit++;
495
496	is_affine_src = intel_uxa_transform_is_affine(intel->transform[src_unit]);
497	if (is_affine_src) {
498		if (!intel_uxa_get_transformed_coordinates(srcX, srcY,
499						      intel->
500						      transform[src_unit],
501						      &src_x[0],
502						      &src_y[0]))
503			return;
504
505		if (!intel_uxa_get_transformed_coordinates(srcX, srcY + h,
506						      intel->
507						      transform[src_unit],
508						      &src_x[1],
509						      &src_y[1]))
510			return;
511
512		if (!intel_uxa_get_transformed_coordinates(srcX + w, srcY + h,
513						      intel->
514						      transform[src_unit],
515						      &src_x[2],
516						      &src_y[2]))
517			return;
518	} else {
519		if (!intel_uxa_get_transformed_coordinates_3d(srcX, srcY,
520							 intel->
521							 transform[src_unit],
522							 &src_x[0],
523							 &src_y[0],
524							 &src_w[0]))
525			return;
526
527		if (!intel_uxa_get_transformed_coordinates_3d(srcX, srcY + h,
528							 intel->
529							 transform[src_unit],
530							 &src_x[1],
531							 &src_y[1],
532							 &src_w[1]))
533			return;
534
535		if (!intel_uxa_get_transformed_coordinates_3d(srcX + w, srcY + h,
536							 intel->
537							 transform[src_unit],
538							 &src_x[2],
539							 &src_y[2],
540							 &src_w[2]))
541			return;
542	}
543
544	if (intel->render_mask) {
545		mask_unit = tex_unit++;
546
547		is_affine_mask = intel_uxa_transform_is_affine(intel->transform[mask_unit]);
548		if (is_affine_mask) {
549			if (!intel_uxa_get_transformed_coordinates(maskX, maskY,
550							      intel->
551							      transform[mask_unit],
552							      &mask_x[0],
553							      &mask_y[0]))
554				return;
555
556			if (!intel_uxa_get_transformed_coordinates(maskX, maskY + h,
557							      intel->
558							      transform[mask_unit],
559							      &mask_x[1],
560							      &mask_y[1]))
561				return;
562
563			if (!intel_uxa_get_transformed_coordinates(maskX + w, maskY + h,
564							      intel->
565							      transform[mask_unit],
566							      &mask_x[2],
567							      &mask_y[2]))
568				return;
569		} else {
570			if (!intel_uxa_get_transformed_coordinates_3d(maskX, maskY,
571								 intel->
572								 transform[mask_unit],
573								 &mask_x[0],
574								 &mask_y[0],
575								 &mask_w[0]))
576				return;
577
578			if (!intel_uxa_get_transformed_coordinates_3d(maskX, maskY + h,
579								 intel->
580								 transform[mask_unit],
581								 &mask_x[1],
582								 &mask_y[1],
583								 &mask_w[1]))
584				return;
585
586			if (!intel_uxa_get_transformed_coordinates_3d(maskX + w, maskY + h,
587								 intel->
588								 transform[mask_unit],
589								 &mask_x[2],
590								 &mask_y[2],
591								 &mask_w[2]))
592				return;
593		}
594	}
595
596	OUT_VERTEX(dstX + w);
597	OUT_VERTEX(dstY + h);
598	OUT_VERTEX(src_x[2] * intel->scale_units[src_unit][0]);
599	OUT_VERTEX(src_y[2] * intel->scale_units[src_unit][1]);
600	if (!is_affine_src) {
601		OUT_VERTEX(0.0);
602		OUT_VERTEX(src_w[2]);
603	}
604	if (intel->render_mask) {
605		OUT_VERTEX(mask_x[2] * intel->scale_units[mask_unit][0]);
606		OUT_VERTEX(mask_y[2] * intel->scale_units[mask_unit][1]);
607		if (!is_affine_mask) {
608			OUT_VERTEX(0.0);
609			OUT_VERTEX(mask_w[2]);
610		}
611	}
612
613	OUT_VERTEX(dstX);
614	OUT_VERTEX(dstY + h);
615	OUT_VERTEX(src_x[1] * intel->scale_units[src_unit][0]);
616	OUT_VERTEX(src_y[1] * intel->scale_units[src_unit][1]);
617	if (!is_affine_src) {
618		OUT_VERTEX(0.0);
619		OUT_VERTEX(src_w[1]);
620	}
621	if (intel->render_mask) {
622		OUT_VERTEX(mask_x[1] * intel->scale_units[mask_unit][0]);
623		OUT_VERTEX(mask_y[1] * intel->scale_units[mask_unit][1]);
624		if (!is_affine_mask) {
625			OUT_VERTEX(0.0);
626			OUT_VERTEX(mask_w[1]);
627		}
628	}
629
630	OUT_VERTEX(dstX);
631	OUT_VERTEX(dstY);
632	OUT_VERTEX(src_x[0] * intel->scale_units[src_unit][0]);
633	OUT_VERTEX(src_y[0] * intel->scale_units[src_unit][1]);
634	if (!is_affine_src) {
635		OUT_VERTEX(0.0);
636		OUT_VERTEX(src_w[0]);
637	}
638	if (intel->render_mask) {
639		OUT_VERTEX(mask_x[0] * intel->scale_units[mask_unit][0]);
640		OUT_VERTEX(mask_y[0] * intel->scale_units[mask_unit][1]);
641		if (!is_affine_mask) {
642			OUT_VERTEX(0.0);
643			OUT_VERTEX(mask_w[0]);
644		}
645	}
646}
647
648Bool
649i915_prepare_composite(int op, PicturePtr source_picture,
650		       PicturePtr mask_picture, PicturePtr dest_picture,
651		       PixmapPtr source, PixmapPtr mask, PixmapPtr dest)
652{
653	ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
654	intel_screen_private *intel = intel_get_screen_private(scrn);
655	drm_intel_bo *bo_table[] = {
656		NULL,		/* batch_bo */
657		intel_uxa_get_pixmap_bo(dest),
658		intel_uxa_get_pixmap_bo(source),
659		mask ? intel_uxa_get_pixmap_bo(mask) : NULL,
660	};
661	int tex_unit = 0;
662	int floats_per_vertex;
663
664	intel->render_source_picture = source_picture;
665	intel->render_source = source;
666	intel->render_mask_picture = mask_picture;
667	intel->render_mask = mask;
668	intel->render_dest_picture = dest_picture;
669	intel->render_dest = dest;
670
671	if (!intel_uxa_check_pitch_3d(source))
672		return FALSE;
673
674	if (mask && !intel_uxa_check_pitch_3d(mask))
675		return FALSE;
676
677	if (!intel_uxa_check_pitch_3d(dest))
678		return FALSE;
679
680	if (!i915_get_dest_format(dest_picture,
681				  &intel->i915_render_state.dst_format))
682		return FALSE;
683
684	if (!intel_uxa_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
685		return FALSE;
686
687	if (mask_picture != NULL && mask_picture->componentAlpha &&
688	    PICT_FORMAT_RGB(mask_picture->format)) {
689		/* Check if it's component alpha that relies on a source alpha
690		 * and on the source value.  We can only get one of those
691		 * into the single source value that we get to blend with.
692		 */
693		if (i915_blend_op[op].src_alpha &&
694		    (i915_blend_op[op].src_blend != BLENDFACT_ZERO))
695			return FALSE;
696	}
697
698	intel->transform[0] = NULL;
699	intel->scale_units[0][0] = -1;
700	intel->scale_units[0][1] = -1;
701	intel->transform[1] = NULL;
702	intel->scale_units[1][0] = -1;
703	intel->scale_units[1][1] = -1;
704
705	floats_per_vertex = 2;		/* dest x/y */
706	if (!i915_texture_setup(source_picture, source, tex_unit++)) {
707		intel_uxa_debug_fallback(scrn, "fail to setup src texture\n");
708		return FALSE;
709	}
710
711	if (intel_uxa_transform_is_affine(source_picture->transform))
712		floats_per_vertex += 2;	/* src x/y */
713	else
714		floats_per_vertex += 4;	/* src x/y/z/w */
715
716	if (mask_picture != NULL) {
717		assert(mask != NULL);
718		if (!i915_texture_setup(mask_picture, mask, tex_unit++)) {
719			intel_uxa_debug_fallback(scrn,
720					     "fail to setup mask texture\n");
721			return FALSE;
722		}
723
724		if (intel_uxa_transform_is_affine(mask_picture->transform))
725			floats_per_vertex += 2;	/* mask x/y */
726		else
727			floats_per_vertex += 4;	/* mask x/y/z/w */
728	}
729
730	intel->i915_render_state.op = op;
731
732	if (intel_uxa_pixmap_is_dirty(source) || intel_uxa_pixmap_is_dirty(mask))
733		intel_batch_emit_flush(scrn);
734
735	intel->needs_render_state_emit = TRUE;
736
737	intel->prim_emit = i915_emit_composite_primitive;
738	if (!mask) {
739		if (intel->transform[0] == NULL)
740			intel->prim_emit = i915_emit_composite_primitive_identity_source;
741		else if (intel_uxa_transform_is_affine(intel->transform[0]))
742			intel->prim_emit = i915_emit_composite_primitive_affine_source;
743	} else {
744		if (intel->transform[0] == NULL) {
745			if (intel->transform[1] == NULL)
746				intel->prim_emit = i915_emit_composite_primitive_identity_source_mask;
747		}
748	}
749
750	if (floats_per_vertex != intel->floats_per_vertex) {
751		intel->floats_per_vertex = floats_per_vertex;
752		intel->needs_render_vertex_emit = TRUE;
753	}
754
755	return TRUE;
756}
757
758static void
759i915_composite_emit_shader(intel_screen_private *intel, CARD8 op)
760{
761	PicturePtr mask_picture = intel->render_mask_picture;
762	PixmapPtr mask = intel->render_mask;
763	int src_reg, mask_reg;
764	Bool dest_is_alpha = PIXMAN_FORMAT_RGB(intel->render_dest_picture->format) == 0;
765	FS_LOCALS();
766
767	FS_BEGIN();
768
769	/* Declare the registers necessary for our program.  */
770	i915_fs_dcl(FS_T0);
771	i915_fs_dcl(FS_S0);
772	if (!mask) {
773		/* No mask, so load directly to output color */
774		if (dest_is_alpha)
775			src_reg = FS_R0;
776		else
777			src_reg = FS_OC;
778
779		if (intel_uxa_transform_is_affine(intel->transform[0]))
780			i915_fs_texld(src_reg, FS_S0, FS_T0);
781		else
782			i915_fs_texldp(src_reg, FS_S0, FS_T0);
783
784		if (src_reg != FS_OC)
785			i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W));
786	} else {
787		i915_fs_dcl(FS_T1);
788		i915_fs_dcl(FS_S1);
789
790		/* Load the source_picture texel */
791		if (intel_uxa_transform_is_affine(intel->transform[0]))
792			i915_fs_texld(FS_R0, FS_S0, FS_T0);
793		else
794			i915_fs_texldp(FS_R0, FS_S0, FS_T0);
795
796		src_reg = FS_R0;
797
798		/* Load the mask_picture texel */
799		if (intel_uxa_transform_is_affine(intel->transform[1]))
800			i915_fs_texld(FS_R1, FS_S1, FS_T1);
801		else
802			i915_fs_texldp(FS_R1, FS_S1, FS_T1);
803
804		mask_reg = FS_R1;
805
806		if (dest_is_alpha) {
807			i915_fs_mul(FS_OC,
808				    i915_fs_operand(src_reg, W, W, W, W),
809				    i915_fs_operand(mask_reg, W, W, W, W));
810		} else {
811			/* If component alpha is active in the mask and the blend
812			 * operation uses the source alpha, then we know we don't
813			 * need the source value (otherwise we would have hit a
814			 * fallback earlier), so we provide the source alpha (src.A *
815			 * mask.X) as output color.
816			 * Conversely, if CA is set and we don't need the source alpha,
817			 * then we produce the source value (src.X * mask.X) and the
818			 * source alpha is unused.  Otherwise, we provide the non-CA
819			 * source value (src.X * mask.A).
820			 */
821			if (mask_picture->componentAlpha &&
822			    PICT_FORMAT_RGB(mask_picture->format)) {
823				if (i915_blend_op[op].src_alpha) {
824					i915_fs_mul(FS_OC,
825						    i915_fs_operand(src_reg, W, W, W, W),
826						    i915_fs_operand_reg(mask_reg));
827				} else {
828					i915_fs_mul(FS_OC,
829						    i915_fs_operand_reg(src_reg),
830						    i915_fs_operand_reg(mask_reg));
831				}
832			} else {
833				i915_fs_mul(FS_OC,
834					    i915_fs_operand_reg(src_reg),
835					    i915_fs_operand(mask_reg, W, W, W, W));
836			}
837		}
838	}
839
840	FS_END();
841}
842
843static void i915_emit_composite_setup(ScrnInfoPtr scrn)
844{
845	intel_screen_private *intel = intel_get_screen_private(scrn);
846	int op = intel->i915_render_state.op;
847	PicturePtr mask_picture = intel->render_mask_picture;
848	PicturePtr dest_picture = intel->render_dest_picture;
849	PixmapPtr mask = intel->render_mask;
850	PixmapPtr dest = intel->render_dest;
851	int tex_count, t;
852
853	intel->needs_render_state_emit = FALSE;
854
855	IntelEmitInvarientState(scrn);
856	intel->last_3d = LAST_3D_RENDER;
857
858	tex_count = 1 + (mask != NULL);
859
860	assert(intel->in_batch_atomic);
861
862	if (tex_count != 0) {
863	    OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
864	    OUT_BATCH((1 << tex_count) - 1);
865	    for (t = 0; t < tex_count; t++) {
866		OUT_RELOC_PIXMAP(intel->texture[t], I915_GEM_DOMAIN_SAMPLER, 0, 0);
867		OUT_BATCH(intel->mapstate[3*t + 1]);
868		OUT_BATCH(intel->mapstate[3*t + 2]);
869	    }
870
871	    OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
872	    OUT_BATCH((1 << tex_count) - 1);
873	    for (t = 0; t < tex_count; t++) {
874		OUT_BATCH(intel->samplerstate[3*t + 0]);
875		OUT_BATCH(intel->samplerstate[3*t + 1]);
876		OUT_BATCH(intel->samplerstate[3*t + 2]);
877	    }
878	}
879
880	/* BUF_INFO is an implicit flush, so avoid if the target has not changed.
881	 * XXX However for reasons unfathomed, correct rendering in KDE requires
882	 * at least a MI_FLUSH | INHIBIT_RENDER_CACHE_FLUSH here.
883	 */
884	if (1) {
885		uint32_t tiling_bits;
886
887		if (intel_uxa_pixmap_tiled(dest)) {
888			tiling_bits = BUF_3D_TILED_SURFACE;
889			if (intel_uxa_get_pixmap_private(dest)->tiling
890			    == I915_TILING_Y)
891				tiling_bits |= BUF_3D_TILE_WALK_Y;
892		} else
893			tiling_bits = 0;
894
895		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
896		OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits |
897			  BUF_3D_PITCH(intel_pixmap_pitch(dest)));
898		OUT_RELOC_PIXMAP(dest, I915_GEM_DOMAIN_RENDER,
899				 I915_GEM_DOMAIN_RENDER, 0);
900
901		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
902		OUT_BATCH(intel->i915_render_state.dst_format);
903
904		/* draw rect is unconditional */
905		OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
906		OUT_BATCH(0x00000000);
907		OUT_BATCH(0x00000000);	/* ymin, xmin */
908		OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) |
909			  DRAW_XMAX(dest->drawable.width - 1));
910		/* yorig, xorig (relate to color buffer?) */
911		OUT_BATCH(0x00000000);
912	}
913
914	{
915		uint32_t ss2;
916
917		ss2 = ~0;
918		ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT);
919		ss2 |= S2_TEXCOORD_FMT(0,
920				       intel_uxa_transform_is_affine(intel->transform[0]) ?
921				       TEXCOORDFMT_2D : TEXCOORDFMT_4D);
922		if (mask) {
923		    ss2 &= ~S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT);
924		    ss2 |= S2_TEXCOORD_FMT(1,
925					   intel_uxa_transform_is_affine(intel->transform[1]) ?
926					   TEXCOORDFMT_2D : TEXCOORDFMT_4D);
927		}
928
929		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
930		OUT_BATCH(ss2);
931		OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format));
932	}
933
934	i915_composite_emit_shader(intel, op);
935}
936
937void
938i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
939	       int dstX, int dstY, int w, int h)
940{
941	ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen);
942	intel_screen_private *intel = intel_get_screen_private(scrn);
943
944	/* 28 + 16 + 10 + 20 + 32 + 16 */
945	intel_batch_start_atomic(scrn, 150);
946
947	if (intel->needs_render_state_emit)
948		i915_emit_composite_setup(scrn);
949
950	if (intel->needs_render_vertex_emit ||
951	    intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
952		i915_vertex_flush(intel);
953
954		if (intel_vertex_space(intel) < 256) {
955			intel_next_vertex(intel);
956
957			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
958				  I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
959			OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
960			OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
961				  (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
962			intel->vertex_index = 0;
963		} else if (intel->floats_per_vertex != intel->last_floats_per_vertex){
964			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
965				  I1_LOAD_S(1) | 0);
966			OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
967				  (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
968
969			intel->vertex_index =
970				(intel->vertex_used + intel->floats_per_vertex - 1) /  intel->floats_per_vertex;
971			intel->vertex_used = intel->vertex_index * intel->floats_per_vertex;
972		}
973
974		intel->last_floats_per_vertex = intel->floats_per_vertex;
975		intel->needs_render_vertex_emit = FALSE;
976	}
977
978	if (intel->prim_offset == 0) {
979		intel->prim_offset = intel->batch_used;
980		OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
981		OUT_BATCH(intel->vertex_index);
982	}
983	intel->vertex_count += 3;
984
985	intel->prim_emit(intel,
986			 srcX, srcY,
987			 maskX, maskY,
988			 dstX, dstY,
989			 w, h);
990
991	intel_batch_end_atomic(scrn);
992}
993
994void
995i915_vertex_flush(intel_screen_private *intel)
996{
997	if (intel->prim_offset == 0)
998		return;
999
1000	intel->batch_ptr[intel->prim_offset] |= intel->vertex_count;
1001	intel->prim_offset = 0;
1002
1003	intel->vertex_index += intel->vertex_count;
1004	intel->vertex_count = 0;
1005}
1006
1007void
1008i915_batch_commit_notify(intel_screen_private *intel)
1009{
1010	intel->needs_render_state_emit = TRUE;
1011	intel->last_floats_per_vertex = 0;
1012}
1013