i915_render.c revision 03b705cf
1/*
2 * Copyright © 2006 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Wang Zhenyu <zhenyu.z.wang@intel.com>
25 *    Eric Anholt <eric@anholt.net>
26 *
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include "xf86.h"
34#include "intel.h"
35#include "i915_reg.h"
36#include "i915_3d.h"
37
38struct formatinfo {
39	int fmt;
40	uint32_t card_fmt;
41};
42
43struct blendinfo {
44	Bool dst_alpha;
45	Bool src_alpha;
46	uint32_t src_blend;
47	uint32_t dst_blend;
48};
49
50static struct blendinfo i915_blend_op[] = {
51	/* Clear */
52	{0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
53	/* Src */
54	{0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
55	/* Dst */
56	{0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
57	/* Over */
58	{0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
59	/* OverReverse */
60	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
61	/* In */
62	{1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
63	/* InReverse */
64	{0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
65	/* Out */
66	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
67	/* OutReverse */
68	{0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
69	/* Atop */
70	{1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
71	/* AtopReverse */
72	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
73	/* Xor */
74	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
75	/* Add */
76	{0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
77};
78
79static struct formatinfo i915_tex_formats[] = {
80	{PICT_a8, MAPSURF_8BIT | MT_8BIT_A8},
81	{PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888},
82	{PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888},
83	{PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888},
84	{PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
85#if XORG_VERSION_CURRENT >= 10699900
86	{PICT_a2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010},
87	{PICT_a2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010},
88#endif
89	{PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565},
90	{PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555},
91	{PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444},
92};
93
94static uint32_t i915_get_blend_cntl(int op, PicturePtr mask,
95				    uint32_t dst_format)
96{
97	uint32_t sblend, dblend;
98
99	sblend = i915_blend_op[op].src_blend;
100	dblend = i915_blend_op[op].dst_blend;
101
102	/* If there's no dst alpha channel, adjust the blend op so that we'll
103	 * treat it as always 1.
104	 */
105	if (PICT_FORMAT_A(dst_format) == 0 && i915_blend_op[op].dst_alpha) {
106		if (sblend == BLENDFACT_DST_ALPHA)
107			sblend = BLENDFACT_ONE;
108		else if (sblend == BLENDFACT_INV_DST_ALPHA)
109			sblend = BLENDFACT_ZERO;
110	}
111
112	/* i915 engine reads 8bit color buffer into green channel in cases
113	   like color buffer blending .etc, and also writes back green channel.
114	   So with dst_alpha blend we should use color factor. See spec on
115	   "8-bit rendering" */
116	if ((dst_format == PICT_a8) && i915_blend_op[op].dst_alpha) {
117		if (sblend == BLENDFACT_DST_ALPHA)
118			sblend = BLENDFACT_DST_COLR;
119		else if (sblend == BLENDFACT_INV_DST_ALPHA)
120			sblend = BLENDFACT_INV_DST_COLR;
121	}
122
123	/* If the source alpha is being used, then we should only be in a case
124	 * where the source blend factor is 0, and the source blend value is the
125	 * mask channels multiplied by the source picture's alpha.
126	 */
127	if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
128	    i915_blend_op[op].src_alpha) {
129		if (dblend == BLENDFACT_SRC_ALPHA) {
130			dblend = BLENDFACT_SRC_COLR;
131		} else if (dblend == BLENDFACT_INV_SRC_ALPHA) {
132			dblend = BLENDFACT_INV_SRC_COLR;
133		}
134	}
135
136	return S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
137		(BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
138		(sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
139		(dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
140}
141
142#define DSTORG_HORT_BIAS(x)             ((x)<<20)
143#define DSTORG_VERT_BIAS(x)             ((x)<<16)
144
145static Bool i915_get_dest_format(PicturePtr dest_picture, uint32_t * dst_format)
146{
147	ScrnInfoPtr scrn;
148
149	switch (dest_picture->format) {
150	case PICT_a8r8g8b8:
151	case PICT_x8r8g8b8:
152		*dst_format = COLR_BUF_ARGB8888;
153		break;
154	case PICT_r5g6b5:
155		*dst_format = COLR_BUF_RGB565;
156		break;
157	case PICT_a1r5g5b5:
158	case PICT_x1r5g5b5:
159		*dst_format = COLR_BUF_ARGB1555;
160		break;
161#if XORG_VERSION_CURRENT >= 10699900
162	case PICT_a2r10g10b10:
163	case PICT_x2r10g10b10:
164		*dst_format = COLR_BUF_ARGB2AAA;
165		break;
166#endif
167	case PICT_a8:
168		*dst_format = COLR_BUF_8BIT;
169		break;
170	case PICT_a4r4g4b4:
171	case PICT_x4r4g4b4:
172		*dst_format = COLR_BUF_ARGB4444;
173		break;
174	default:
175		scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
176		intel_debug_fallback(scrn,
177				     "Unsupported dest format 0x%x\n",
178				     (int)dest_picture->format);
179		return FALSE;
180	}
181	*dst_format |= DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8);
182	return TRUE;
183}
184
185Bool
186i915_check_composite(int op,
187		     PicturePtr source_picture,
188		     PicturePtr mask_picture,
189		     PicturePtr dest_picture,
190		     int width, int height)
191{
192	ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
193	uint32_t tmp1;
194
195	/* Check for unsupported compositing operations. */
196	if (op >= sizeof(i915_blend_op) / sizeof(i915_blend_op[0])) {
197		intel_debug_fallback(scrn, "Unsupported Composite op 0x%x\n",
198				     op);
199		return FALSE;
200	}
201	if (mask_picture != NULL && mask_picture->componentAlpha &&
202	    PICT_FORMAT_RGB(mask_picture->format)) {
203		/* Check if it's component alpha that relies on a source alpha
204		 * and on the source value.  We can only get one of those
205		 * into the single source value that we get to blend with.
206		 */
207		if (i915_blend_op[op].src_alpha &&
208		    (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) {
209			if (op != PictOpOver) {
210				intel_debug_fallback(scrn,
211						     "Component alpha not supported "
212						     "with source alpha and source "
213						     "value blending.\n");
214				return FALSE;
215			}
216		}
217	}
218
219	if (!i915_get_dest_format(dest_picture, &tmp1)) {
220		intel_debug_fallback(scrn, "Get Color buffer format\n");
221		return FALSE;
222	}
223
224	if (width > 2048 || height > 2048)
225		return FALSE;
226
227	return TRUE;
228}
229
230Bool
231i915_check_composite_target(PixmapPtr pixmap)
232{
233	if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048)
234		return FALSE;
235
236	if(!intel_check_pitch_3d(pixmap))
237		return FALSE;
238
239	return TRUE;
240}
241
242Bool
243i915_check_composite_texture(ScreenPtr screen, PicturePtr picture)
244{
245	if (picture->repeatType > RepeatReflect) {
246		ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
247		intel_debug_fallback(scrn, "Unsupported picture repeat %d\n",
248			     picture->repeatType);
249		return FALSE;
250	}
251
252	if (picture->filter != PictFilterNearest &&
253	    picture->filter != PictFilterBilinear) {
254		ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
255		intel_debug_fallback(scrn, "Unsupported filter 0x%x\n",
256				     picture->filter);
257		return FALSE;
258	}
259
260	if (picture->pSourcePict)
261		return FALSE;
262
263	if (picture->pDrawable) {
264		int w, h, i;
265
266		w = picture->pDrawable->width;
267		h = picture->pDrawable->height;
268		if ((w > 2048) || (h > 2048)) {
269			ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
270			intel_debug_fallback(scrn,
271					     "Picture w/h too large (%dx%d)\n",
272					     w, h);
273			return FALSE;
274		}
275
276		for (i = 0;
277		     i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]);
278		     i++) {
279			if (i915_tex_formats[i].fmt == picture->format)
280				break;
281		}
282		if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]))
283		{
284			ScrnInfoPtr scrn = xf86ScreenToScrn(screen);
285			intel_debug_fallback(scrn, "Unsupported picture format "
286					     "0x%x\n",
287					     (int)picture->format);
288			return FALSE;
289		}
290
291		return TRUE;
292	}
293
294	return FALSE;
295}
296
297static Bool i915_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit)
298{
299	ScrnInfoPtr scrn = xf86ScreenToScrn(picture->pDrawable->pScreen);
300	intel_screen_private *intel = intel_get_screen_private(scrn);
301	uint32_t format, pitch, filter;
302	uint32_t wrap_mode, tiling_bits;
303	int i;
304
305	pitch = intel_pixmap_pitch(pixmap);
306	intel->scale_units[unit][0] = 1. / pixmap->drawable.width;
307	intel->scale_units[unit][1] = 1. / pixmap->drawable.height;
308
309	for (i = 0; i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]);
310	     i++) {
311		if (i915_tex_formats[i].fmt == picture->format)
312			break;
313	}
314	if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0])) {
315		intel_debug_fallback(scrn, "unknown texture format\n");
316		return FALSE;
317	}
318	format = i915_tex_formats[i].card_fmt;
319
320	switch (picture->repeatType) {
321	case RepeatNone:
322		wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
323		break;
324	case RepeatNormal:
325		wrap_mode = TEXCOORDMODE_WRAP;
326		break;
327	case RepeatPad:
328		wrap_mode = TEXCOORDMODE_CLAMP_EDGE;
329		break;
330	case RepeatReflect:
331		wrap_mode = TEXCOORDMODE_MIRROR;
332		break;
333	default:
334		FatalError("Unknown repeat type %d\n", picture->repeatType);
335	}
336
337	switch (picture->filter) {
338	case PictFilterNearest:
339		filter = (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT) |
340		    (FILTER_NEAREST << SS2_MIN_FILTER_SHIFT);
341		break;
342	case PictFilterBilinear:
343		filter = (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
344		    (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT);
345		break;
346	default:
347		intel_debug_fallback(scrn, "Bad filter 0x%x\n",
348				     picture->filter);
349		return FALSE;
350	}
351
352	/* offset filled in at emit time */
353	if (intel_pixmap_tiled(pixmap)) {
354		tiling_bits = MS3_TILED_SURFACE;
355		if (intel_get_pixmap_private(pixmap)->tiling
356				== I915_TILING_Y)
357			tiling_bits |= MS3_TILE_WALK;
358	} else
359		tiling_bits = 0;
360
361	intel->texture[unit] = pixmap;
362	intel->mapstate[unit * 3 + 0] = 0;
363	intel->mapstate[unit * 3 + 1] = format |
364	    tiling_bits |
365	    ((pixmap->drawable.height - 1) << MS3_HEIGHT_SHIFT) |
366	    ((pixmap->drawable.width - 1) << MS3_WIDTH_SHIFT);
367	intel->mapstate[unit * 3 + 2] = ((pitch / 4) - 1) << MS4_PITCH_SHIFT;
368
369	intel->samplerstate[unit * 3 + 0] = (MIPFILTER_NONE <<
370					     SS2_MIP_FILTER_SHIFT);
371	intel->samplerstate[unit * 3 + 0] |= filter;
372	intel->samplerstate[unit * 3 + 1] = SS3_NORMALIZED_COORDS;
373	intel->samplerstate[unit * 3 + 1] |=
374	    wrap_mode << SS3_TCX_ADDR_MODE_SHIFT;
375	intel->samplerstate[unit * 3 + 1] |=
376	    wrap_mode << SS3_TCY_ADDR_MODE_SHIFT;
377	intel->samplerstate[unit * 3 + 1] |= unit << SS3_TEXTUREMAP_INDEX_SHIFT;
378	intel->samplerstate[unit * 3 + 2] = 0x00000000;	/* border color */
379
380	intel->transform[unit] = picture->transform;
381
382	return TRUE;
383}
384
385static void
386i915_emit_composite_primitive_identity_source(intel_screen_private *intel,
387					      int srcX, int srcY,
388					      int maskX, int maskY,
389					      int dstX, int dstY,
390					      int w, int h)
391{
392	OUT_VERTEX(dstX + w);
393	OUT_VERTEX(dstY + h);
394	OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
395	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
396
397	OUT_VERTEX(dstX);
398	OUT_VERTEX(dstY + h);
399	OUT_VERTEX(srcX * intel->scale_units[0][0]);
400	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
401
402	OUT_VERTEX(dstX);
403	OUT_VERTEX(dstY);
404	OUT_VERTEX(srcX * intel->scale_units[0][0]);
405	OUT_VERTEX(srcY * intel->scale_units[0][1]);
406}
407
408static void
409i915_emit_composite_primitive_affine_source(intel_screen_private *intel,
410					    int srcX, int srcY,
411					    int maskX, int maskY,
412					    int dstX, int dstY,
413					    int w, int h)
414{
415	float src_x[3], src_y[3];
416
417	if (!intel_get_transformed_coordinates(srcX, srcY,
418					      intel->transform[0],
419					      &src_x[0],
420					      &src_y[0]))
421		return;
422
423	if (!intel_get_transformed_coordinates(srcX, srcY + h,
424					      intel->transform[0],
425					      &src_x[1],
426					      &src_y[1]))
427		return;
428
429	if (!intel_get_transformed_coordinates(srcX + w, srcY + h,
430					      intel->transform[0],
431					      &src_x[2],
432					      &src_y[2]))
433		return;
434
435	OUT_VERTEX(dstX + w);
436	OUT_VERTEX(dstY + h);
437	OUT_VERTEX(src_x[2] * intel->scale_units[0][0]);
438	OUT_VERTEX(src_y[2] * intel->scale_units[0][1]);
439
440	OUT_VERTEX(dstX);
441	OUT_VERTEX(dstY + h);
442	OUT_VERTEX(src_x[1] * intel->scale_units[0][0]);
443	OUT_VERTEX(src_y[1] * intel->scale_units[0][1]);
444
445	OUT_VERTEX(dstX);
446	OUT_VERTEX(dstY);
447	OUT_VERTEX(src_x[0] * intel->scale_units[0][0]);
448	OUT_VERTEX(src_y[0] * intel->scale_units[0][1]);
449}
450
451static void
452i915_emit_composite_primitive_identity_source_mask(intel_screen_private *intel,
453						   int srcX, int srcY,
454						   int maskX, int maskY,
455						   int dstX, int dstY,
456						   int w, int h)
457{
458	OUT_VERTEX(dstX + w);
459	OUT_VERTEX(dstY + h);
460	OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
461	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
462	OUT_VERTEX((maskX + w) * intel->scale_units[1][0]);
463	OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
464
465	OUT_VERTEX(dstX);
466	OUT_VERTEX(dstY + h);
467	OUT_VERTEX(srcX * intel->scale_units[0][0]);
468	OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
469	OUT_VERTEX(maskX * intel->scale_units[1][0]);
470	OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
471
472	OUT_VERTEX(dstX);
473	OUT_VERTEX(dstY);
474	OUT_VERTEX(srcX * intel->scale_units[0][0]);
475	OUT_VERTEX(srcY * intel->scale_units[0][1]);
476	OUT_VERTEX(maskX * intel->scale_units[1][0]);
477	OUT_VERTEX(maskY * intel->scale_units[1][1]);
478}
479
480static void
481i915_emit_composite_primitive(intel_screen_private *intel,
482			      int srcX, int srcY,
483			      int maskX, int maskY,
484			      int dstX, int dstY,
485			      int w, int h)
486{
487	Bool is_affine_src = TRUE, is_affine_mask = TRUE;
488	int tex_unit = 0;
489	int src_unit = -1, mask_unit = -1;
490	float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
491
492	src_unit = tex_unit++;
493
494	is_affine_src = intel_transform_is_affine(intel->transform[src_unit]);
495	if (is_affine_src) {
496		if (!intel_get_transformed_coordinates(srcX, srcY,
497						      intel->
498						      transform[src_unit],
499						      &src_x[0],
500						      &src_y[0]))
501			return;
502
503		if (!intel_get_transformed_coordinates(srcX, srcY + h,
504						      intel->
505						      transform[src_unit],
506						      &src_x[1],
507						      &src_y[1]))
508			return;
509
510		if (!intel_get_transformed_coordinates(srcX + w, srcY + h,
511						      intel->
512						      transform[src_unit],
513						      &src_x[2],
514						      &src_y[2]))
515			return;
516	} else {
517		if (!intel_get_transformed_coordinates_3d(srcX, srcY,
518							 intel->
519							 transform[src_unit],
520							 &src_x[0],
521							 &src_y[0],
522							 &src_w[0]))
523			return;
524
525		if (!intel_get_transformed_coordinates_3d(srcX, srcY + h,
526							 intel->
527							 transform[src_unit],
528							 &src_x[1],
529							 &src_y[1],
530							 &src_w[1]))
531			return;
532
533		if (!intel_get_transformed_coordinates_3d(srcX + w, srcY + h,
534							 intel->
535							 transform[src_unit],
536							 &src_x[2],
537							 &src_y[2],
538							 &src_w[2]))
539			return;
540	}
541
542	if (intel->render_mask) {
543		mask_unit = tex_unit++;
544
545		is_affine_mask = intel_transform_is_affine(intel->transform[mask_unit]);
546		if (is_affine_mask) {
547			if (!intel_get_transformed_coordinates(maskX, maskY,
548							      intel->
549							      transform[mask_unit],
550							      &mask_x[0],
551							      &mask_y[0]))
552				return;
553
554			if (!intel_get_transformed_coordinates(maskX, maskY + h,
555							      intel->
556							      transform[mask_unit],
557							      &mask_x[1],
558							      &mask_y[1]))
559				return;
560
561			if (!intel_get_transformed_coordinates(maskX + w, maskY + h,
562							      intel->
563							      transform[mask_unit],
564							      &mask_x[2],
565							      &mask_y[2]))
566				return;
567		} else {
568			if (!intel_get_transformed_coordinates_3d(maskX, maskY,
569								 intel->
570								 transform[mask_unit],
571								 &mask_x[0],
572								 &mask_y[0],
573								 &mask_w[0]))
574				return;
575
576			if (!intel_get_transformed_coordinates_3d(maskX, maskY + h,
577								 intel->
578								 transform[mask_unit],
579								 &mask_x[1],
580								 &mask_y[1],
581								 &mask_w[1]))
582				return;
583
584			if (!intel_get_transformed_coordinates_3d(maskX + w, maskY + h,
585								 intel->
586								 transform[mask_unit],
587								 &mask_x[2],
588								 &mask_y[2],
589								 &mask_w[2]))
590				return;
591		}
592	}
593
594	OUT_VERTEX(dstX + w);
595	OUT_VERTEX(dstY + h);
596	OUT_VERTEX(src_x[2] * intel->scale_units[src_unit][0]);
597	OUT_VERTEX(src_y[2] * intel->scale_units[src_unit][1]);
598	if (!is_affine_src) {
599		OUT_VERTEX(0.0);
600		OUT_VERTEX(src_w[2]);
601	}
602	if (intel->render_mask) {
603		OUT_VERTEX(mask_x[2] * intel->scale_units[mask_unit][0]);
604		OUT_VERTEX(mask_y[2] * intel->scale_units[mask_unit][1]);
605		if (!is_affine_mask) {
606			OUT_VERTEX(0.0);
607			OUT_VERTEX(mask_w[2]);
608		}
609	}
610
611	OUT_VERTEX(dstX);
612	OUT_VERTEX(dstY + h);
613	OUT_VERTEX(src_x[1] * intel->scale_units[src_unit][0]);
614	OUT_VERTEX(src_y[1] * intel->scale_units[src_unit][1]);
615	if (!is_affine_src) {
616		OUT_VERTEX(0.0);
617		OUT_VERTEX(src_w[1]);
618	}
619	if (intel->render_mask) {
620		OUT_VERTEX(mask_x[1] * intel->scale_units[mask_unit][0]);
621		OUT_VERTEX(mask_y[1] * intel->scale_units[mask_unit][1]);
622		if (!is_affine_mask) {
623			OUT_VERTEX(0.0);
624			OUT_VERTEX(mask_w[1]);
625		}
626	}
627
628	OUT_VERTEX(dstX);
629	OUT_VERTEX(dstY);
630	OUT_VERTEX(src_x[0] * intel->scale_units[src_unit][0]);
631	OUT_VERTEX(src_y[0] * intel->scale_units[src_unit][1]);
632	if (!is_affine_src) {
633		OUT_VERTEX(0.0);
634		OUT_VERTEX(src_w[0]);
635	}
636	if (intel->render_mask) {
637		OUT_VERTEX(mask_x[0] * intel->scale_units[mask_unit][0]);
638		OUT_VERTEX(mask_y[0] * intel->scale_units[mask_unit][1]);
639		if (!is_affine_mask) {
640			OUT_VERTEX(0.0);
641			OUT_VERTEX(mask_w[0]);
642		}
643	}
644}
645
646Bool
647i915_prepare_composite(int op, PicturePtr source_picture,
648		       PicturePtr mask_picture, PicturePtr dest_picture,
649		       PixmapPtr source, PixmapPtr mask, PixmapPtr dest)
650{
651	ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen);
652	intel_screen_private *intel = intel_get_screen_private(scrn);
653	drm_intel_bo *bo_table[] = {
654		NULL,		/* batch_bo */
655		intel_get_pixmap_bo(dest),
656		intel_get_pixmap_bo(source),
657		mask ? intel_get_pixmap_bo(mask) : NULL,
658	};
659	int tex_unit = 0;
660	int floats_per_vertex;
661
662	intel->render_source_picture = source_picture;
663	intel->render_source = source;
664	intel->render_mask_picture = mask_picture;
665	intel->render_mask = mask;
666	intel->render_dest_picture = dest_picture;
667	intel->render_dest = dest;
668
669	if (!intel_check_pitch_3d(source))
670		return FALSE;
671
672	if (mask && !intel_check_pitch_3d(mask))
673		return FALSE;
674
675	if (!intel_check_pitch_3d(dest))
676		return FALSE;
677
678	if (!i915_get_dest_format(dest_picture,
679				  &intel->i915_render_state.dst_format))
680		return FALSE;
681
682	if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
683		return FALSE;
684
685	if (mask_picture != NULL && mask_picture->componentAlpha &&
686	    PICT_FORMAT_RGB(mask_picture->format)) {
687		/* Check if it's component alpha that relies on a source alpha
688		 * and on the source value.  We can only get one of those
689		 * into the single source value that we get to blend with.
690		 */
691		if (i915_blend_op[op].src_alpha &&
692		    (i915_blend_op[op].src_blend != BLENDFACT_ZERO))
693			return FALSE;
694	}
695
696	intel->transform[0] = NULL;
697	intel->scale_units[0][0] = -1;
698	intel->scale_units[0][1] = -1;
699	intel->transform[1] = NULL;
700	intel->scale_units[1][0] = -1;
701	intel->scale_units[1][1] = -1;
702
703	floats_per_vertex = 2;		/* dest x/y */
704	if (!i915_texture_setup(source_picture, source, tex_unit++)) {
705		intel_debug_fallback(scrn, "fail to setup src texture\n");
706		return FALSE;
707	}
708
709	if (intel_transform_is_affine(source_picture->transform))
710		floats_per_vertex += 2;	/* src x/y */
711	else
712		floats_per_vertex += 4;	/* src x/y/z/w */
713
714	if (mask_picture != NULL) {
715		assert(mask != NULL);
716		if (!i915_texture_setup(mask_picture, mask, tex_unit++)) {
717			intel_debug_fallback(scrn,
718					     "fail to setup mask texture\n");
719			return FALSE;
720		}
721
722		if (intel_transform_is_affine(mask_picture->transform))
723			floats_per_vertex += 2;	/* mask x/y */
724		else
725			floats_per_vertex += 4;	/* mask x/y/z/w */
726	}
727
728	intel->i915_render_state.op = op;
729
730	if (intel_pixmap_is_dirty(source) || intel_pixmap_is_dirty(mask))
731		intel_batch_emit_flush(scrn);
732
733	intel->needs_render_state_emit = TRUE;
734
735	intel->prim_emit = i915_emit_composite_primitive;
736	if (!mask) {
737		if (intel->transform[0] == NULL)
738			intel->prim_emit = i915_emit_composite_primitive_identity_source;
739		else if (intel_transform_is_affine(intel->transform[0]))
740			intel->prim_emit = i915_emit_composite_primitive_affine_source;
741	} else {
742		if (intel->transform[0] == NULL) {
743			if (intel->transform[1] == NULL)
744				intel->prim_emit = i915_emit_composite_primitive_identity_source_mask;
745		}
746	}
747
748	if (floats_per_vertex != intel->floats_per_vertex) {
749		intel->floats_per_vertex = floats_per_vertex;
750		intel->needs_render_vertex_emit = TRUE;
751	}
752
753	return TRUE;
754}
755
756static void
757i915_composite_emit_shader(intel_screen_private *intel, CARD8 op)
758{
759	PicturePtr mask_picture = intel->render_mask_picture;
760	PixmapPtr mask = intel->render_mask;
761	int src_reg, mask_reg;
762	Bool dest_is_alpha = PIXMAN_FORMAT_RGB(intel->render_dest_picture->format) == 0;
763	FS_LOCALS();
764
765	FS_BEGIN();
766
767	/* Declare the registers necessary for our program.  */
768	i915_fs_dcl(FS_T0);
769	i915_fs_dcl(FS_S0);
770	if (!mask) {
771		/* No mask, so load directly to output color */
772		if (dest_is_alpha)
773			src_reg = FS_R0;
774		else
775			src_reg = FS_OC;
776
777		if (intel_transform_is_affine(intel->transform[0]))
778			i915_fs_texld(src_reg, FS_S0, FS_T0);
779		else
780			i915_fs_texldp(src_reg, FS_S0, FS_T0);
781
782		if (src_reg != FS_OC)
783			i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W));
784	} else {
785		i915_fs_dcl(FS_T1);
786		i915_fs_dcl(FS_S1);
787
788		/* Load the source_picture texel */
789		if (intel_transform_is_affine(intel->transform[0]))
790			i915_fs_texld(FS_R0, FS_S0, FS_T0);
791		else
792			i915_fs_texldp(FS_R0, FS_S0, FS_T0);
793
794		src_reg = FS_R0;
795
796		/* Load the mask_picture texel */
797		if (intel_transform_is_affine(intel->transform[1]))
798			i915_fs_texld(FS_R1, FS_S1, FS_T1);
799		else
800			i915_fs_texldp(FS_R1, FS_S1, FS_T1);
801
802		mask_reg = FS_R1;
803
804		if (dest_is_alpha) {
805			i915_fs_mul(FS_OC,
806				    i915_fs_operand(src_reg, W, W, W, W),
807				    i915_fs_operand(mask_reg, W, W, W, W));
808		} else {
809			/* If component alpha is active in the mask and the blend
810			 * operation uses the source alpha, then we know we don't
811			 * need the source value (otherwise we would have hit a
812			 * fallback earlier), so we provide the source alpha (src.A *
813			 * mask.X) as output color.
814			 * Conversely, if CA is set and we don't need the source alpha,
815			 * then we produce the source value (src.X * mask.X) and the
816			 * source alpha is unused.  Otherwise, we provide the non-CA
817			 * source value (src.X * mask.A).
818			 */
819			if (mask_picture->componentAlpha &&
820			    PICT_FORMAT_RGB(mask_picture->format)) {
821				if (i915_blend_op[op].src_alpha) {
822					i915_fs_mul(FS_OC,
823						    i915_fs_operand(src_reg, W, W, W, W),
824						    i915_fs_operand_reg(mask_reg));
825				} else {
826					i915_fs_mul(FS_OC,
827						    i915_fs_operand_reg(src_reg),
828						    i915_fs_operand_reg(mask_reg));
829				}
830			} else {
831				i915_fs_mul(FS_OC,
832					    i915_fs_operand_reg(src_reg),
833					    i915_fs_operand(mask_reg, W, W, W, W));
834			}
835		}
836	}
837
838	FS_END();
839}
840
841static void i915_emit_composite_setup(ScrnInfoPtr scrn)
842{
843	intel_screen_private *intel = intel_get_screen_private(scrn);
844	int op = intel->i915_render_state.op;
845	PicturePtr mask_picture = intel->render_mask_picture;
846	PicturePtr dest_picture = intel->render_dest_picture;
847	PixmapPtr mask = intel->render_mask;
848	PixmapPtr dest = intel->render_dest;
849	int tex_count, t;
850
851	intel->needs_render_state_emit = FALSE;
852
853	IntelEmitInvarientState(scrn);
854	intel->last_3d = LAST_3D_RENDER;
855
856	tex_count = 1 + (mask != NULL);
857
858	assert(intel->in_batch_atomic);
859
860	if (tex_count != 0) {
861	    OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
862	    OUT_BATCH((1 << tex_count) - 1);
863	    for (t = 0; t < tex_count; t++) {
864		OUT_RELOC_PIXMAP(intel->texture[t], I915_GEM_DOMAIN_SAMPLER, 0, 0);
865		OUT_BATCH(intel->mapstate[3*t + 1]);
866		OUT_BATCH(intel->mapstate[3*t + 2]);
867	    }
868
869	    OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
870	    OUT_BATCH((1 << tex_count) - 1);
871	    for (t = 0; t < tex_count; t++) {
872		OUT_BATCH(intel->samplerstate[3*t + 0]);
873		OUT_BATCH(intel->samplerstate[3*t + 1]);
874		OUT_BATCH(intel->samplerstate[3*t + 2]);
875	    }
876	}
877
878	/* BUF_INFO is an implicit flush, so avoid if the target has not changed.
879	 * XXX However for reasons unfathomed, correct rendering in KDE requires
880	 * at least a MI_FLUSH | INHIBIT_RENDER_CACHE_FLUSH here.
881	 */
882	if (1) {
883		uint32_t tiling_bits;
884
885		if (intel_pixmap_tiled(dest)) {
886			tiling_bits = BUF_3D_TILED_SURFACE;
887			if (intel_get_pixmap_private(dest)->tiling
888			    == I915_TILING_Y)
889				tiling_bits |= BUF_3D_TILE_WALK_Y;
890		} else
891			tiling_bits = 0;
892
893		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
894		OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits |
895			  BUF_3D_PITCH(intel_pixmap_pitch(dest)));
896		OUT_RELOC_PIXMAP(dest, I915_GEM_DOMAIN_RENDER,
897				 I915_GEM_DOMAIN_RENDER, 0);
898
899		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
900		OUT_BATCH(intel->i915_render_state.dst_format);
901
902		/* draw rect is unconditional */
903		OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
904		OUT_BATCH(0x00000000);
905		OUT_BATCH(0x00000000);	/* ymin, xmin */
906		OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) |
907			  DRAW_XMAX(dest->drawable.width - 1));
908		/* yorig, xorig (relate to color buffer?) */
909		OUT_BATCH(0x00000000);
910	}
911
912	{
913		uint32_t ss2;
914
915		ss2 = ~0;
916		ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT);
917		ss2 |= S2_TEXCOORD_FMT(0,
918				       intel_transform_is_affine(intel->transform[0]) ?
919				       TEXCOORDFMT_2D : TEXCOORDFMT_4D);
920		if (mask) {
921		    ss2 &= ~S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT);
922		    ss2 |= S2_TEXCOORD_FMT(1,
923					   intel_transform_is_affine(intel->transform[1]) ?
924					   TEXCOORDFMT_2D : TEXCOORDFMT_4D);
925		}
926
927		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
928		OUT_BATCH(ss2);
929		OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format));
930	}
931
932	i915_composite_emit_shader(intel, op);
933}
934
935void
936i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
937	       int dstX, int dstY, int w, int h)
938{
939	ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen);
940	intel_screen_private *intel = intel_get_screen_private(scrn);
941
942	/* 28 + 16 + 10 + 20 + 32 + 16 */
943	intel_batch_start_atomic(scrn, 150);
944
945	if (intel->needs_render_state_emit)
946		i915_emit_composite_setup(scrn);
947
948	if (intel->needs_render_vertex_emit ||
949	    intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
950		i915_vertex_flush(intel);
951
952		if (intel_vertex_space(intel) < 256) {
953			intel_next_vertex(intel);
954
955			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
956				  I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
957			OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
958			OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
959				  (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
960			intel->vertex_index = 0;
961		} else if (intel->floats_per_vertex != intel->last_floats_per_vertex){
962			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
963				  I1_LOAD_S(1) | 0);
964			OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
965				  (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
966
967			intel->vertex_index =
968				(intel->vertex_used + intel->floats_per_vertex - 1) /  intel->floats_per_vertex;
969			intel->vertex_used = intel->vertex_index * intel->floats_per_vertex;
970		}
971
972		intel->last_floats_per_vertex = intel->floats_per_vertex;
973		intel->needs_render_vertex_emit = FALSE;
974	}
975
976	if (intel->prim_offset == 0) {
977		intel->prim_offset = intel->batch_used;
978		OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
979		OUT_BATCH(intel->vertex_index);
980	}
981	intel->vertex_count += 3;
982
983	intel->prim_emit(intel,
984			 srcX, srcY,
985			 maskX, maskY,
986			 dstX, dstY,
987			 w, h);
988
989	intel_batch_end_atomic(scrn);
990}
991
992void
993i915_vertex_flush(intel_screen_private *intel)
994{
995	if (intel->prim_offset == 0)
996		return;
997
998	intel->batch_ptr[intel->prim_offset] |= intel->vertex_count;
999	intel->prim_offset = 0;
1000
1001	intel->vertex_index += intel->vertex_count;
1002	intel->vertex_count = 0;
1003}
1004
1005void
1006i915_batch_commit_notify(intel_screen_private *intel)
1007{
1008	intel->needs_render_state_emit = TRUE;
1009	intel->last_floats_per_vertex = 0;
1010}
1011