i915_render.c revision 42542f5f
1/*
2 * Copyright © 2006 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Wang Zhenyu <zhenyu.z.wang@intel.com>
25 *    Eric Anholt <eric@anholt.net>
26 *
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include "xorg-server.h"
34#include "xf86.h"
35#include "intel.h"
36#include "i915_reg.h"
37#include "i915_3d.h"
38
39struct formatinfo {
40	int fmt;
41	uint32_t card_fmt;
42};
43
44struct blendinfo {
45	Bool dst_alpha;
46	Bool src_alpha;
47	uint32_t src_blend;
48	uint32_t dst_blend;
49};
50
51static struct blendinfo i915_blend_op[] = {
52	/* Clear */
53	{0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
54	/* Src */
55	{0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
56	/* Dst */
57	{0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
58	/* Over */
59	{0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
60	/* OverReverse */
61	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
62	/* In */
63	{1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
64	/* InReverse */
65	{0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
66	/* Out */
67	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
68	/* OutReverse */
69	{0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
70	/* Atop */
71	{1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
72	/* AtopReverse */
73	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
74	/* Xor */
75	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
76	/* Add */
77	{0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
78};
79
80static struct formatinfo i915_tex_formats[] = {
81	{PICT_a8, MAPSURF_8BIT | MT_8BIT_A8},
82	{PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888},
83	{PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888},
84	{PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888},
85	{PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
86#if XORG_VERSION_CURRENT >= 10699900
87	{PICT_a2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010},
88	{PICT_a2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010},
89#endif
90	{PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565},
91	{PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555},
92	{PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444},
93};
94
95static uint32_t i915_get_blend_cntl(int op, PicturePtr mask,
96				    uint32_t dst_format)
97{
98	uint32_t sblend, dblend;
99
100	sblend = i915_blend_op[op].src_blend;
101	dblend = i915_blend_op[op].dst_blend;
102
103	/* If there's no dst alpha channel, adjust the blend op so that we'll
104	 * treat it as always 1.
105	 */
106	if (PICT_FORMAT_A(dst_format) == 0 && i915_blend_op[op].dst_alpha) {
107		if (sblend == BLENDFACT_DST_ALPHA)
108			sblend = BLENDFACT_ONE;
109		else if (sblend == BLENDFACT_INV_DST_ALPHA)
110			sblend = BLENDFACT_ZERO;
111	}
112
113	/* i915 engine reads 8bit color buffer into green channel in cases
114	   like color buffer blending .etc, and also writes back green channel.
115	   So with dst_alpha blend we should use color factor. See spec on
116	   "8-bit rendering" */
117	if ((dst_format == PICT_a8) && i915_blend_op[op].dst_alpha) {
118		if (sblend == BLENDFACT_DST_ALPHA)
119			sblend = BLENDFACT_DST_COLR;
120		else if (sblend == BLENDFACT_INV_DST_ALPHA)
121			sblend = BLENDFACT_INV_DST_COLR;
122	}
123
124	/* If the source alpha is being used, then we should only be in a case
125	 * where the source blend factor is 0, and the source blend value is the
126	 * mask channels multiplied by the source picture's alpha.
127	 */
128	if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
129	    i915_blend_op[op].src_alpha) {
130		if (dblend == BLENDFACT_SRC_ALPHA) {
131			dblend = BLENDFACT_SRC_COLR;
132		} else if (dblend == BLENDFACT_INV_SRC_ALPHA) {
133			dblend = BLENDFACT_INV_SRC_COLR;
134		}
135	}
136
137	return S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
138		(BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
139		(sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
140		(dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
141}
142
143#define DSTORG_HORT_BIAS(x)             ((x)<<20)
144#define DSTORG_VERT_BIAS(x)             ((x)<<16)
145
146static Bool i915_get_dest_format(PicturePtr dest_picture, uint32_t * dst_format)
147{
148	ScrnInfoPtr scrn;
149
150	switch (dest_picture->format) {
151	case PICT_a8r8g8b8:
152	case PICT_x8r8g8b8:
153		*dst_format = COLR_BUF_ARGB8888;
154		break;
155	case PICT_r5g6b5:
156		*dst_format = COLR_BUF_RGB565;
157		break;
158	case PICT_a1r5g5b5:
159	case PICT_x1r5g5b5:
160		*dst_format = COLR_BUF_ARGB1555;
161		break;
162#if XORG_VERSION_CURRENT >= 10699900
163	case PICT_a2r10g10b10:
164	case PICT_x2r10g10b10:
165		*dst_format = COLR_BUF_ARGB2AAA;
166		break;
167#endif
168	case PICT_a8:
169		*dst_format = COLR_BUF_8BIT;
170		break;
171	case PICT_a4r4g4b4:
172	case PICT_x4r4g4b4:
173		*dst_format = COLR_BUF_ARGB4444;
174		break;
175	default:
176		scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
177		intel_debug_fallback(scrn,
178				     "Unsupported dest format 0x%x\n",
179				     (int)dest_picture->format);
180		return FALSE;
181	}
182	*dst_format |= DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8);
183	return TRUE;
184}
185
186Bool
187i915_check_composite(int op,
188		     PicturePtr source_picture,
189		     PicturePtr mask_picture,
190		     PicturePtr dest_picture,
191		     int width, int height)
192{
193	ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
194	uint32_t tmp1;
195
196	/* Check for unsupported compositing operations. */
197	if (op >= sizeof(i915_blend_op) / sizeof(i915_blend_op[0])) {
198		intel_debug_fallback(scrn, "Unsupported Composite op 0x%x\n",
199				     op);
200		return FALSE;
201	}
202	if (mask_picture != NULL && mask_picture->componentAlpha &&
203	    PICT_FORMAT_RGB(mask_picture->format)) {
204		/* Check if it's component alpha that relies on a source alpha
205		 * and on the source value.  We can only get one of those
206		 * into the single source value that we get to blend with.
207		 */
208		if (i915_blend_op[op].src_alpha &&
209		    (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) {
210			if (op != PictOpOver) {
211				intel_debug_fallback(scrn,
212						     "Component alpha not supported "
213						     "with source alpha and source "
214						     "value blending.\n");
215				return FALSE;
216			}
217		}
218	}
219
220	if (!i915_get_dest_format(dest_picture, &tmp1)) {
221		intel_debug_fallback(scrn, "Get Color buffer format\n");
222		return FALSE;
223	}
224
225	if (width > 2048 || height > 2048)
226		return FALSE;
227
228	return TRUE;
229}
230
231Bool
232i915_check_composite_target(PixmapPtr pixmap)
233{
234	if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048)
235		return FALSE;
236
237	if(!intel_check_pitch_3d(pixmap))
238		return FALSE;
239
240	return TRUE;
241}
242
243Bool
244i915_check_composite_texture(ScreenPtr screen, PicturePtr picture)
245{
246	if (picture->repeatType > RepeatReflect) {
247		ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
248		intel_debug_fallback(scrn, "Unsupported picture repeat %d\n",
249			     picture->repeatType);
250		return FALSE;
251	}
252
253	if (picture->filter != PictFilterNearest &&
254	    picture->filter != PictFilterBilinear) {
255		ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
256		intel_debug_fallback(scrn, "Unsupported filter 0x%x\n",
257				     picture->filter);
258		return FALSE;
259	}
260
261	if (picture->pSourcePict)
262		return FALSE;
263
264	if (picture->pDrawable) {
265		int w, h, i;
266
267		w = picture->pDrawable->width;
268		h = picture->pDrawable->height;
269		if ((w > 2048) || (h > 2048)) {
270			ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
271			intel_debug_fallback(scrn,
272					     "Picture w/h too large (%dx%d)\n",
273					     w, h);
274			return FALSE;
275		}
276
277		for (i = 0;
278		     i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]);
279		     i++) {
280			if (i915_tex_formats[i].fmt == picture->format)
281				break;
282		}
283		if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]))
284		{
285			ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
286			intel_debug_fallback(scrn, "Unsupported picture format "
287					     "0x%x\n",
288					     (int)picture->format);
289			return FALSE;
290		}
291
292		return TRUE;
293	}
294
295	return FALSE;
296}
297
298static Bool i915_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit)
299{
300	ScrnInfoPtr scrn = xf86ScreenToScrn(picture->pDrawable->pScreen);
301	intel_screen_private *intel = intel_get_screen_private(scrn);
302	uint32_t format, pitch, filter;
303	uint32_t wrap_mode, tiling_bits;
304	int i;
305
306	pitch = intel_pixmap_pitch(pixmap);
307	intel->scale_units[unit][0] = 1. / pixmap->drawable.width;
308	intel->scale_units[unit][1] = 1. / pixmap->drawable.height;
309
310	for (i = 0; i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]);
311	     i++) {
312		if (i915_tex_formats[i].fmt == picture->format)
313			break;
314	}
315	if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0])) {
316		intel_debug_fallback(scrn, "unknown texture format\n");
317		return FALSE;
318	}
319	format = i915_tex_formats[i].card_fmt;
320
321	switch (picture->repeatType) {
322	case RepeatNone:
323		wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
324		break;
325	case RepeatNormal:
326		wrap_mode = TEXCOORDMODE_WRAP;
327		break;
328	case RepeatPad:
329		wrap_mode = TEXCOORDMODE_CLAMP_EDGE;
330		break;
331	case RepeatReflect:
332		wrap_mode = TEXCOORDMODE_MIRROR;
333		break;
334	default:
335		FatalError("Unknown repeat type %d\n", picture->repeatType);
336	}
337
338	switch (picture->filter) {
339	case PictFilterNearest:
340		filter = (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT) |
341		    (FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
342		break;
343	case PictFilterBilinear:
344		filter = (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
345		    (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT);
346		break;
347	default:
348		intel_debug_fallback(scrn, "Bad filter 0x%x\n",
349				     picture->filter);
350		return FALSE;
351	}
352
353	/* offset filled in at emit time */
354	if (intel_pixmap_tiled(pixmap)) {
355		tiling_bits = MS3_TILED_SURFACE;
356		if (intel_get_pixmap_private(pixmap)->tiling
357				== I915_TILING_Y)
358			tiling_bits |= MS3_TILE_WALK;
359	} else
360		tiling_bits = 0;
361
362	intel->texture[unit] = pixmap;
363	intel->mapstate[unit * 3 + 0] = 0;
364	intel->mapstate[unit * 3 + 1] = format |
365	    tiling_bits |
366	    ((pixmap->drawable.height - 1) << MS3_HEIGHT_SHIFT) |
367	    ((pixmap->drawable.width - 1) << MS3_WIDTH_SHIFT);
368	intel->mapstate[unit * 3 + 2] = ((pitch / 4) - 1) << MS4_PITCH_SHIFT;
369
370	intel->samplerstate[unit * 3 + 0] = (MIPFILTER_NONE <<
371					     SS2_MIP_FILTER_SHIFT);
372	intel->samplerstate[unit * 3 + 0] |= filter;
373	intel->samplerstate[unit * 3 + 1] = SS3_NORMALIZED_COORDS;
374	intel->samplerstate[unit * 3 + 1] |=
375	    wrap_mode << SS3_TCX_ADDR_MODE_SHIFT;
376	intel->samplerstate[unit * 3 + 1] |=
377	    wrap_mode << SS3_TCY_ADDR_MODE_SHIFT;
378	intel->samplerstate[unit * 3 + 1] |= unit << SS3_TEXTUREMAP_INDEX_SHIFT;
379	intel->samplerstate[unit * 3 + 2] = 0x00000000;	/* border color */
380
381	intel->transform[unit] = picture->transform;
382
383	return TRUE;
384}
385
386static void
387i915_emit_composite_primitive_identity_source(intel_screen_private *intel,
388					      int srcX, int srcY,
389					      int maskX, int maskY,
390					      int dstX, int dstY,
391					      int w, int h)
392{
393	OUT_VERTEX(dstX + w);
394	OUT_VERTEX(dstY + h);
395	OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
396	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
397
398	OUT_VERTEX(dstX);
399	OUT_VERTEX(dstY + h);
400	OUT_VERTEX(srcX * intel->scale_units[0][0]);
401	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
402
403	OUT_VERTEX(dstX);
404	OUT_VERTEX(dstY);
405	OUT_VERTEX(srcX * intel->scale_units[0][0]);
406	OUT_VERTEX(srcY * intel->scale_units[0][1]);
407}
408
409static void
410i915_emit_composite_primitive_affine_source(intel_screen_private *intel,
411					    int srcX, int srcY,
412					    int maskX, int maskY,
413					    int dstX, int dstY,
414					    int w, int h)
415{
416	float src_x[3], src_y[3];
417
418	if (!intel_get_transformed_coordinates(srcX, srcY,
419					      intel->transform[0],
420					      &src_x[0],
421					      &src_y[0]))
422		return;
423
424	if (!intel_get_transformed_coordinates(srcX, srcY + h,
425					      intel->transform[0],
426					      &src_x[1],
427					      &src_y[1]))
428		return;
429
430	if (!intel_get_transformed_coordinates(srcX + w, srcY + h,
431					      intel->transform[0],
432					      &src_x[2],
433					      &src_y[2]))
434		return;
435
436	OUT_VERTEX(dstX + w);
437	OUT_VERTEX(dstY + h);
438	OUT_VERTEX(src_x[2] * intel->scale_units[0][0]);
439	OUT_VERTEX(src_y[2] * intel->scale_units[0][1]);
440
441	OUT_VERTEX(dstX);
442	OUT_VERTEX(dstY + h);
443	OUT_VERTEX(src_x[1] * intel->scale_units[0][0]);
444	OUT_VERTEX(src_y[1] * intel->scale_units[0][1]);
445
446	OUT_VERTEX(dstX);
447	OUT_VERTEX(dstY);
448	OUT_VERTEX(src_x[0] * intel->scale_units[0][0]);
449	OUT_VERTEX(src_y[0] * intel->scale_units[0][1]);
450}
451
452static void
453i915_emit_composite_primitive_identity_source_mask(intel_screen_private *intel,
454						   int srcX, int srcY,
455						   int maskX, int maskY,
456						   int dstX, int dstY,
457						   int w, int h)
458{
459	OUT_VERTEX(dstX + w);
460	OUT_VERTEX(dstY + h);
461	OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
462	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
463	OUT_VERTEX((maskX + w) * intel->scale_units[1][0]);
464	OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
465
466	OUT_VERTEX(dstX);
467	OUT_VERTEX(dstY + h);
468	OUT_VERTEX(srcX * intel->scale_units[0][0]);
469	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
470	OUT_VERTEX(maskX * intel->scale_units[1][0]);
471	OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
472
473	OUT_VERTEX(dstX);
474	OUT_VERTEX(dstY);
475	OUT_VERTEX(srcX * intel->scale_units[0][0]);
476	OUT_VERTEX(srcY * intel->scale_units[0][1]);
477	OUT_VERTEX(maskX * intel->scale_units[1][0]);
478	OUT_VERTEX(maskY * intel->scale_units[1][1]);
479}
480
481static void
482i915_emit_composite_primitive(intel_screen_private *intel,
483			      int srcX, int srcY,
484			      int maskX, int maskY,
485			      int dstX, int dstY,
486			      int w, int h)
487{
488	Bool is_affine_src = TRUE, is_affine_mask = TRUE;
489	int tex_unit = 0;
490	int src_unit = -1, mask_unit = -1;
491	float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
492
493	src_unit = tex_unit++;
494
495	is_affine_src = intel_transform_is_affine(intel->transform[src_unit]);
496	if (is_affine_src) {
497		if (!intel_get_transformed_coordinates(srcX, srcY,
498						      intel->
499						      transform[src_unit],
500						      &src_x[0],
501						      &src_y[0]))
502			return;
503
504		if (!intel_get_transformed_coordinates(srcX, srcY + h,
505						      intel->
506						      transform[src_unit],
507						      &src_x[1],
508						      &src_y[1]))
509			return;
510
511		if (!intel_get_transformed_coordinates(srcX + w, srcY + h,
512						      intel->
513						      transform[src_unit],
514						      &src_x[2],
515						      &src_y[2]))
516			return;
517	} else {
518		if (!intel_get_transformed_coordinates_3d(srcX, srcY,
519							 intel->
520							 transform[src_unit],
521							 &src_x[0],
522							 &src_y[0],
523							 &src_w[0]))
524			return;
525
526		if (!intel_get_transformed_coordinates_3d(srcX, srcY + h,
527							 intel->
528							 transform[src_unit],
529							 &src_x[1],
530							 &src_y[1],
531							 &src_w[1]))
532			return;
533
534		if (!intel_get_transformed_coordinates_3d(srcX + w, srcY + h,
535							 intel->
536							 transform[src_unit],
537							 &src_x[2],
538							 &src_y[2],
539							 &src_w[2]))
540			return;
541	}
542
543	if (intel->render_mask) {
544		mask_unit = tex_unit++;
545
546		is_affine_mask = intel_transform_is_affine(intel->transform[mask_unit]);
547		if (is_affine_mask) {
548			if (!intel_get_transformed_coordinates(maskX, maskY,
549							      intel->
550							      transform[mask_unit],
551							      &mask_x[0],
552							      &mask_y[0]))
553				return;
554
555			if (!intel_get_transformed_coordinates(maskX, maskY + h,
556							      intel->
557							      transform[mask_unit],
558							      &mask_x[1],
559							      &mask_y[1]))
560				return;
561
562			if (!intel_get_transformed_coordinates(maskX + w, maskY + h,
563							      intel->
564							      transform[mask_unit],
565							      &mask_x[2],
566							      &mask_y[2]))
567				return;
568		} else {
569			if (!intel_get_transformed_coordinates_3d(maskX, maskY,
570								 intel->
571								 transform[mask_unit],
572								 &mask_x[0],
573								 &mask_y[0],
574								 &mask_w[0]))
575				return;
576
577			if (!intel_get_transformed_coordinates_3d(maskX, maskY + h,
578								 intel->
579								 transform[mask_unit],
580								 &mask_x[1],
581								 &mask_y[1],
582								 &mask_w[1]))
583				return;
584
585			if (!intel_get_transformed_coordinates_3d(maskX + w, maskY + h,
586								 intel->
587								 transform[mask_unit],
588								 &mask_x[2],
589								 &mask_y[2],
590								 &mask_w[2]))
591				return;
592		}
593	}
594
595	OUT_VERTEX(dstX + w);
596	OUT_VERTEX(dstY + h);
597	OUT_VERTEX(src_x[2] * intel->scale_units[src_unit][0]);
598	OUT_VERTEX(src_y[2] * intel->scale_units[src_unit][1]);
599	if (!is_affine_src) {
600		OUT_VERTEX(0.0);
601		OUT_VERTEX(src_w[2]);
602	}
603	if (intel->render_mask) {
604		OUT_VERTEX(mask_x[2] * intel->scale_units[mask_unit][0]);
605		OUT_VERTEX(mask_y[2] * intel->scale_units[mask_unit][1]);
606		if (!is_affine_mask) {
607			OUT_VERTEX(0.0);
608			OUT_VERTEX(mask_w[2]);
609		}
610	}
611
612	OUT_VERTEX(dstX);
613	OUT_VERTEX(dstY + h);
614	OUT_VERTEX(src_x[1] * intel->scale_units[src_unit][0]);
615	OUT_VERTEX(src_y[1] * intel->scale_units[src_unit][1]);
616	if (!is_affine_src) {
617		OUT_VERTEX(0.0);
618		OUT_VERTEX(src_w[1]);
619	}
620	if (intel->render_mask) {
621		OUT_VERTEX(mask_x[1] * intel->scale_units[mask_unit][0]);
622		OUT_VERTEX(mask_y[1] * intel->scale_units[mask_unit][1]);
623		if (!is_affine_mask) {
624			OUT_VERTEX(0.0);
625			OUT_VERTEX(mask_w[1]);
626		}
627	}
628
629	OUT_VERTEX(dstX);
630	OUT_VERTEX(dstY);
631	OUT_VERTEX(src_x[0] * intel->scale_units[src_unit][0]);
632	OUT_VERTEX(src_y[0] * intel->scale_units[src_unit][1]);
633	if (!is_affine_src) {
634		OUT_VERTEX(0.0);
635		OUT_VERTEX(src_w[0]);
636	}
637	if (intel->render_mask) {
638		OUT_VERTEX(mask_x[0] * intel->scale_units[mask_unit][0]);
639		OUT_VERTEX(mask_y[0] * intel->scale_units[mask_unit][1]);
640		if (!is_affine_mask) {
641			OUT_VERTEX(0.0);
642			OUT_VERTEX(mask_w[0]);
643		}
644	}
645}
646
647Bool
648i915_prepare_composite(int op, PicturePtr source_picture,
649		       PicturePtr mask_picture, PicturePtr dest_picture,
650		       PixmapPtr source, PixmapPtr mask, PixmapPtr dest)
651{
652	ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
653	intel_screen_private *intel = intel_get_screen_private(scrn);
654	drm_intel_bo *bo_table[] = {
655		NULL,		/* batch_bo */
656		intel_get_pixmap_bo(dest),
657		intel_get_pixmap_bo(source),
658		mask ? intel_get_pixmap_bo(mask) : NULL,
659	};
660	int tex_unit = 0;
661	int floats_per_vertex;
662
663	intel->render_source_picture = source_picture;
664	intel->render_source = source;
665	intel->render_mask_picture = mask_picture;
666	intel->render_mask = mask;
667	intel->render_dest_picture = dest_picture;
668	intel->render_dest = dest;
669
670	if (!intel_check_pitch_3d(source))
671		return FALSE;
672
673	if (mask && !intel_check_pitch_3d(mask))
674		return FALSE;
675
676	if (!intel_check_pitch_3d(dest))
677		return FALSE;
678
679	if (!i915_get_dest_format(dest_picture,
680				  &intel->i915_render_state.dst_format))
681		return FALSE;
682
683	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
684		return FALSE;
685
686	if (mask_picture != NULL && mask_picture->componentAlpha &&
687	    PICT_FORMAT_RGB(mask_picture->format)) {
688		/* Check if it's component alpha that relies on a source alpha
689		 * and on the source value.  We can only get one of those
690		 * into the single source value that we get to blend with.
691		 */
692		if (i915_blend_op[op].src_alpha &&
693		    (i915_blend_op[op].src_blend != BLENDFACT_ZERO))
694			return FALSE;
695	}
696
697	intel->transform[0] = NULL;
698	intel->scale_units[0][0] = -1;
699	intel->scale_units[0][1] = -1;
700	intel->transform[1] = NULL;
701	intel->scale_units[1][0] = -1;
702	intel->scale_units[1][1] = -1;
703
704	floats_per_vertex = 2;		/* dest x/y */
705	if (!i915_texture_setup(source_picture, source, tex_unit++)) {
706		intel_debug_fallback(scrn, "fail to setup src texture\n");
707		return FALSE;
708	}
709
710	if (intel_transform_is_affine(source_picture->transform))
711		floats_per_vertex += 2;	/* src x/y */
712	else
713		floats_per_vertex += 4;	/* src x/y/z/w */
714
715	if (mask_picture != NULL) {
716		assert(mask != NULL);
717		if (!i915_texture_setup(mask_picture, mask, tex_unit++)) {
718			intel_debug_fallback(scrn,
719					     "fail to setup mask texture\n");
720			return FALSE;
721		}
722
723		if (intel_transform_is_affine(mask_picture->transform))
724			floats_per_vertex += 2;	/* mask x/y */
725		else
726			floats_per_vertex += 4;	/* mask x/y/z/w */
727	}
728
729	intel->i915_render_state.op = op;
730
731	if (intel_pixmap_is_dirty(source) || intel_pixmap_is_dirty(mask))
732		intel_batch_emit_flush(scrn);
733
734	intel->needs_render_state_emit = TRUE;
735
736	intel->prim_emit = i915_emit_composite_primitive;
737	if (!mask) {
738		if (intel->transform[0] == NULL)
739			intel->prim_emit = i915_emit_composite_primitive_identity_source;
740		else if (intel_transform_is_affine(intel->transform[0]))
741			intel->prim_emit = i915_emit_composite_primitive_affine_source;
742	} else {
743		if (intel->transform[0] == NULL) {
744			if (intel->transform[1] == NULL)
745				intel->prim_emit = i915_emit_composite_primitive_identity_source_mask;
746		}
747	}
748
749	if (floats_per_vertex != intel->floats_per_vertex) {
750		intel->floats_per_vertex = floats_per_vertex;
751		intel->needs_render_vertex_emit = TRUE;
752	}
753
754	return TRUE;
755}
756
757static void
758i915_composite_emit_shader(intel_screen_private *intel, CARD8 op)
759{
760	PicturePtr mask_picture = intel->render_mask_picture;
761	PixmapPtr mask = intel->render_mask;
762	int src_reg, mask_reg;
763	Bool dest_is_alpha = PIXMAN_FORMAT_RGB(intel->render_dest_picture->format) == 0;
764	FS_LOCALS();
765
766	FS_BEGIN();
767
768	/* Declare the registers necessary for our program.  */
769	i915_fs_dcl(FS_T0);
770	i915_fs_dcl(FS_S0);
771	if (!mask) {
772		/* No mask, so load directly to output color */
773		if (dest_is_alpha)
774			src_reg = FS_R0;
775		else
776			src_reg = FS_OC;
777
778		if (intel_transform_is_affine(intel->transform[0]))
779			i915_fs_texld(src_reg, FS_S0, FS_T0);
780		else
781			i915_fs_texldp(src_reg, FS_S0, FS_T0);
782
783		if (src_reg != FS_OC)
784			i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W));
785	} else {
786		i915_fs_dcl(FS_T1);
787		i915_fs_dcl(FS_S1);
788
789		/* Load the source_picture texel */
790		if (intel_transform_is_affine(intel->transform[0]))
791			i915_fs_texld(FS_R0, FS_S0, FS_T0);
792		else
793			i915_fs_texldp(FS_R0, FS_S0, FS_T0);
794
795		src_reg = FS_R0;
796
797		/* Load the mask_picture texel */
798		if (intel_transform_is_affine(intel->transform[1]))
799			i915_fs_texld(FS_R1, FS_S1, FS_T1);
800		else
801			i915_fs_texldp(FS_R1, FS_S1, FS_T1);
802
803		mask_reg = FS_R1;
804
805		if (dest_is_alpha) {
806			i915_fs_mul(FS_OC,
807				    i915_fs_operand(src_reg, W, W, W, W),
808				    i915_fs_operand(mask_reg, W, W, W, W));
809		} else {
810			/* If component alpha is active in the mask and the blend
811			 * operation uses the source alpha, then we know we don't
812			 * need the source value (otherwise we would have hit a
813			 * fallback earlier), so we provide the source alpha (src.A *
814			 * mask.X) as output color.
815			 * Conversely, if CA is set and we don't need the source alpha,
816			 * then we produce the source value (src.X * mask.X) and the
817			 * source alpha is unused.  Otherwise, we provide the non-CA
818			 * source value (src.X * mask.A).
819			 */
820			if (mask_picture->componentAlpha &&
821			    PICT_FORMAT_RGB(mask_picture->format)) {
822				if (i915_blend_op[op].src_alpha) {
823					i915_fs_mul(FS_OC,
824						    i915_fs_operand(src_reg, W, W, W, W),
825						    i915_fs_operand_reg(mask_reg));
826				} else {
827					i915_fs_mul(FS_OC,
828						    i915_fs_operand_reg(src_reg),
829						    i915_fs_operand_reg(mask_reg));
830				}
831			} else {
832				i915_fs_mul(FS_OC,
833					    i915_fs_operand_reg(src_reg),
834					    i915_fs_operand(mask_reg, W, W, W, W));
835			}
836		}
837	}
838
839	FS_END();
840}
841
842static void i915_emit_composite_setup(ScrnInfoPtr scrn)
843{
844	intel_screen_private *intel = intel_get_screen_private(scrn);
845	int op = intel->i915_render_state.op;
846	PicturePtr mask_picture = intel->render_mask_picture;
847	PicturePtr dest_picture = intel->render_dest_picture;
848	PixmapPtr mask = intel->render_mask;
849	PixmapPtr dest = intel->render_dest;
850	int tex_count, t;
851
852	intel->needs_render_state_emit = FALSE;
853
854	IntelEmitInvarientState(scrn);
855	intel->last_3d = LAST_3D_RENDER;
856
857	tex_count = 1 + (mask != NULL);
858
859	assert(intel->in_batch_atomic);
860
861	if (tex_count != 0) {
862	    OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
863	    OUT_BATCH((1 << tex_count) - 1);
864	    for (t = 0; t < tex_count; t++) {
865		OUT_RELOC_PIXMAP(intel->texture[t], I915_GEM_DOMAIN_SAMPLER, 0, 0);
866		OUT_BATCH(intel->mapstate[3*t + 1]);
867		OUT_BATCH(intel->mapstate[3*t + 2]);
868	    }
869
870	    OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
871	    OUT_BATCH((1 << tex_count) - 1);
872	    for (t = 0; t < tex_count; t++) {
873		OUT_BATCH(intel->samplerstate[3*t + 0]);
874		OUT_BATCH(intel->samplerstate[3*t + 1]);
875		OUT_BATCH(intel->samplerstate[3*t + 2]);
876	    }
877	}
878
879	/* BUF_INFO is an implicit flush, so avoid if the target has not changed.
880	 * XXX However for reasons unfathomed, correct rendering in KDE requires
881	 * at least a MI_FLUSH | INHIBIT_RENDER_CACHE_FLUSH here.
882	 */
883	if (1) {
884		uint32_t tiling_bits;
885
886		if (intel_pixmap_tiled(dest)) {
887			tiling_bits = BUF_3D_TILED_SURFACE;
888			if (intel_get_pixmap_private(dest)->tiling
889			    == I915_TILING_Y)
890				tiling_bits |= BUF_3D_TILE_WALK_Y;
891		} else
892			tiling_bits = 0;
893
894		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
895		OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits |
896			  BUF_3D_PITCH(intel_pixmap_pitch(dest)));
897		OUT_RELOC_PIXMAP(dest, I915_GEM_DOMAIN_RENDER,
898				 I915_GEM_DOMAIN_RENDER, 0);
899
900		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
901		OUT_BATCH(intel->i915_render_state.dst_format);
902
903		/* draw rect is unconditional */
904		OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
905		OUT_BATCH(0x00000000);
906		OUT_BATCH(0x00000000);	/* ymin, xmin */
907		OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) |
908			  DRAW_XMAX(dest->drawable.width - 1));
909		/* yorig, xorig (relate to color buffer?) */
910		OUT_BATCH(0x00000000);
911	}
912
913	{
914		uint32_t ss2;
915
916		ss2 = ~0;
917		ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT);
918		ss2 |= S2_TEXCOORD_FMT(0,
919				       intel_transform_is_affine(intel->transform[0]) ?
920				       TEXCOORDFMT_2D : TEXCOORDFMT_4D);
921		if (mask) {
922		    ss2 &= ~S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT);
923		    ss2 |= S2_TEXCOORD_FMT(1,
924					   intel_transform_is_affine(intel->transform[1]) ?
925					   TEXCOORDFMT_2D : TEXCOORDFMT_4D);
926		}
927
928		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
929		OUT_BATCH(ss2);
930		OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format));
931	}
932
933	i915_composite_emit_shader(intel, op);
934}
935
936void
937i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
938	       int dstX, int dstY, int w, int h)
939{
940	ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen);
941	intel_screen_private *intel = intel_get_screen_private(scrn);
942
943	/* 28 + 16 + 10 + 20 + 32 + 16 */
944	intel_batch_start_atomic(scrn, 150);
945
946	if (intel->needs_render_state_emit)
947		i915_emit_composite_setup(scrn);
948
949	if (intel->needs_render_vertex_emit ||
950	    intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
951		i915_vertex_flush(intel);
952
953		if (intel_vertex_space(intel) < 256) {
954			intel_next_vertex(intel);
955
956			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
957				  I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
958			OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
959			OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
960				  (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
961			intel->vertex_index = 0;
962		} else if (intel->floats_per_vertex != intel->last_floats_per_vertex){
963			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
964				  I1_LOAD_S(1) | 0);
965			OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
966				  (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
967
968			intel->vertex_index =
969				(intel->vertex_used + intel->floats_per_vertex - 1) /  intel->floats_per_vertex;
970			intel->vertex_used = intel->vertex_index * intel->floats_per_vertex;
971		}
972
973		intel->last_floats_per_vertex = intel->floats_per_vertex;
974		intel->needs_render_vertex_emit = FALSE;
975	}
976
977	if (intel->prim_offset == 0) {
978		intel->prim_offset = intel->batch_used;
979		OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
980		OUT_BATCH(intel->vertex_index);
981	}
982	intel->vertex_count += 3;
983
984	intel->prim_emit(intel,
985			 srcX, srcY,
986			 maskX, maskY,
987			 dstX, dstY,
988			 w, h);
989
990	intel_batch_end_atomic(scrn);
991}
992
993void
994i915_vertex_flush(intel_screen_private *intel)
995{
996	if (intel->prim_offset == 0)
997		return;
998
999	intel->batch_ptr[intel->prim_offset] |= intel->vertex_count;
1000	intel->prim_offset = 0;
1001
1002	intel->vertex_index += intel->vertex_count;
1003	intel->vertex_count = 0;
1004}
1005
1006void
1007i915_batch_commit_notify(intel_screen_private *intel)
1008{
1009	intel->needs_render_state_emit = TRUE;
1010	intel->last_floats_per_vertex = 0;
1011}
1012