1/*
2 * Copyright (c) 2007  David Turner
3 * Copyright (c) 2008  M Joonas Pihlaja
4 * Copyright (c) 2011 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Chris Wilson <chris@chris-wilson.co.uk>
27 *
28 */
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include "sna.h"
35#include "sna_render.h"
36#include "sna_render_inline.h"
37#include "sna_trapezoids.h"
38#include "fb/fbpict.h"
39
40#include <mipict.h>
41
42#undef SAMPLES_X
43#undef SAMPLES_Y
44
45/* TODO: Emit unantialiased and MSAA triangles. */
46
47#ifndef MAX
48#define MAX(x,y) ((x) >= (y) ? (x) : (y))
49#endif
50
51#ifndef MIN
52#define MIN(x,y) ((x) <= (y) ? (x) : (y))
53#endif
54
55typedef void (*span_func_t)(struct sna *sna,
56			    struct sna_composite_spans_op *op,
57			    pixman_region16_t *clip,
58			    const BoxRec *box,
59			    int coverage);
60
61#if HAS_DEBUG_FULL
62static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
63{
64	if (box->x1 < 0 || box->y1 < 0 ||
65	    box->x2 > pixmap->drawable.width ||
66	    box->y2 > pixmap->drawable.height)
67	{
68		FatalError("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n",
69			   function,
70			   box->x1, box->y1, box->x2, box->y2,
71			   pixmap->drawable.width,
72			   pixmap->drawable.height);
73	}
74}
75#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
76#else
77#define assert_pixmap_contains_box(p, b)
78#endif
79
80static void apply_damage(struct sna_composite_op *op, RegionPtr region)
81{
82	DBG(("%s: damage=%p, region=%dx[(%d, %d), (%d, %d)]\n",
83	     __FUNCTION__, op->damage,
84	     region_num_rects(region),
85	     region->extents.x1, region->extents.y1,
86	     region->extents.x2, region->extents.y2));
87
88	if (op->damage == NULL)
89		return;
90
91	RegionTranslate(region, op->dst.x, op->dst.y);
92
93	assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region));
94	sna_damage_add(op->damage, region);
95}
96
97static void _apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
98{
99	BoxRec r;
100
101	r.x1 = box->x1 + op->dst.x;
102	r.x2 = box->x2 + op->dst.x;
103	r.y1 = box->y1 + op->dst.y;
104	r.y2 = box->y2 + op->dst.y;
105
106	assert_pixmap_contains_box(op->dst.pixmap, &r);
107	sna_damage_add_box(op->damage, &r);
108}
109
110inline static void apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
111{
112	if (op->damage)
113		_apply_damage_box(op, box);
114}
115
116#define FAST_SAMPLES_X_TO_INT_FRAC(x, i, f) \
117	_GRID_TO_INT_FRAC_shift(x, i, f, FAST_SAMPLES_shift)
118
119#define FAST_SAMPLES_INT(x) ((x) >> (FAST_SAMPLES_shift))
120#define FAST_SAMPLES_FRAC(x) ((x) & (FAST_SAMPLES_mask))
121
122#define _GRID_TO_INT_FRAC_shift(t, i, f, b) do {	\
123    (f) = FAST_SAMPLES_FRAC(t);				\
124    (i) = FAST_SAMPLES_INT(t);				\
125} while (0)
126
127#define FAST_SAMPLES_XY (FAST_SAMPLES_X*FAST_SAMPLES_Y) /* Unit area on the grid. */
128#define AREA_TO_ALPHA(c)  ((c) / (float)FAST_SAMPLES_XY)
129
130struct quorem {
131	int32_t quo;
132	int64_t rem;
133};
134
135struct edge {
136	struct edge *next, *prev;
137
138	int dir;
139	int cell;
140	int height_left;
141
142	struct quorem x;
143
144	/* Advance of the current x when moving down a subsample line. */
145	struct quorem dxdy;
146	int64_t dy;
147
148	/* The clipped y of the top of the edge. */
149	int ytop;
150
151	/* y2-y1 after orienting the edge downwards.  */
152};
153
154/* Number of subsample rows per y-bucket. Must be SAMPLES_Y. */
155#define EDGE_Y_BUCKET_HEIGHT FAST_SAMPLES_Y
156#define EDGE_Y_BUCKET_INDEX(y, ymin) (((y) - (ymin))/EDGE_Y_BUCKET_HEIGHT)
157
158/* A collection of sorted and vertically clipped edges of the polygon.
159 * Edges are moved from the polygon to an active list while scan
160 * converting. */
161struct polygon {
162	/* The vertical clip extents. */
163	int ymin, ymax;
164
165	/* Array of edges all starting in the same bucket.	An edge is put
166	 * into bucket EDGE_BUCKET_INDEX(edge->ytop, polygon->ymin) when
167	 * it is added to the polygon. */
168	struct edge **y_buckets;
169	struct edge *y_buckets_embedded[64];
170
171	struct edge edges_embedded[32];
172	struct edge *edges;
173	int num_edges;
174};
175
176/* A cell records the effect on pixel coverage of polygon edges
177 * passing through a pixel.  It contains two accumulators of pixel
178 * coverage.
179 *
180 * Consider the effects of a polygon edge on the coverage of a pixel
181 * it intersects and that of the following one.  The coverage of the
182 * following pixel is the height of the edge multiplied by the width
183 * of the pixel, and the coverage of the pixel itself is the area of
184 * the trapezoid formed by the edge and the right side of the pixel.
185 *
186 * +-----------------------+-----------------------+
187 * |                       |                       |
188 * |                       |                       |
189 * |_______________________|_______________________|
190 * |   \...................|.......................|\
191 * |    \..................|.......................| |
192 * |     \.................|.......................| |
193 * |      \....covered.....|.......................| |
194 * |       \....area.......|.......................| } covered height
195 * |        \..............|.......................| |
196 * |uncovered\.............|.......................| |
197 * |  area    \............|.......................| |
198 * |___________\...........|.......................|/
199 * |                       |                       |
200 * |                       |                       |
201 * |                       |                       |
202 * +-----------------------+-----------------------+
203 *
204 * Since the coverage of the following pixel will always be a multiple
205 * of the width of the pixel, we can store the height of the covered
206 * area instead.  The coverage of the pixel itself is the total
207 * coverage minus the area of the uncovered area to the left of the
208 * edge.  As it's faster to compute the uncovered area we only store
209 * that and subtract it from the total coverage later when forming
210 * spans to blit.
211 *
212 * The heights and areas are signed, with left edges of the polygon
213 * having positive sign and right edges having negative sign.  When
214 * two edges intersect they swap their left/rightness so their
215 * contribution above and below the intersection point must be
216 * computed separately. */
217struct cell {
218	struct cell *next;
219	int x;
220	int16_t uncovered_area;
221	int16_t covered_height;
222};
223
224/* A cell list represents the scan line sparsely as cells ordered by
225 * ascending x.  It is geared towards scanning the cells in order
226 * using an internal cursor. */
227struct cell_list {
228	struct cell *cursor;
229
230	/* Points to the left-most cell in the scan line. */
231	struct cell head, tail;
232
233	int16_t x1, x2;
234	int16_t count, size;
235	struct cell *cells;
236	struct cell embedded[256];
237};
238
239/* The active list contains edges in the current scan line ordered by
240 * the x-coordinate of the intercept of the edge and the scan line. */
241struct active_list {
242	/* Leftmost edge on the current scan line. */
243	struct edge head, tail;
244};
245
246struct tor {
247    struct polygon	polygon[1];
248    struct active_list	active[1];
249    struct cell_list	coverages[1];
250
251    BoxRec extents;
252};
253
254/* Compute the floored division a/b. Assumes / and % perform symmetric
255 * division. */
256/* Rewinds the cell list's cursor to the beginning.  After rewinding
257 * we're good to cell_list_find() the cell any x coordinate. */
258inline static void
259cell_list_rewind(struct cell_list *cells)
260{
261	cells->cursor = &cells->head;
262}
263
264static bool
265cell_list_init(struct cell_list *cells, int x1, int x2)
266{
267	cells->tail.next = NULL;
268	cells->tail.x = INT_MAX;
269	cells->head.x = INT_MIN;
270	cells->head.next = &cells->tail;
271	cells->head.covered_height = 0;
272	cell_list_rewind(cells);
273	cells->count = 0;
274	cells->x1 = x1;
275	cells->x2 = x2;
276	cells->size = x2 - x1 + 1;
277	cells->cells = cells->embedded;
278	if (cells->size > ARRAY_SIZE(cells->embedded))
279		cells->cells = malloc(cells->size * sizeof(struct cell));
280	return cells->cells != NULL;
281}
282
283static void
284cell_list_fini(struct cell_list *cells)
285{
286	if (cells->cells != cells->embedded)
287		free(cells->cells);
288}
289
290inline static void
291cell_list_reset(struct cell_list *cells)
292{
293	cell_list_rewind(cells);
294	cells->head.next = &cells->tail;
295	cells->head.covered_height = 0;
296	cells->count = 0;
297}
298
299inline static struct cell *
300cell_list_alloc(struct cell_list *cells,
301		struct cell *tail,
302		int x)
303{
304	struct cell *cell;
305
306	assert(cells->count < cells->size);
307	cell = cells->cells + cells->count++;
308	cell->next = tail->next;
309	tail->next = cell;
310
311	cell->x = x;
312	cell->covered_height = 0;
313	cell->uncovered_area = 0;
314	return cell;
315}
316
317/* Find a cell at the given x-coordinate.  Returns %NULL if a new cell
318 * needed to be allocated but couldn't be.  Cells must be found with
319 * non-decreasing x-coordinate until the cell list is rewound using
320 * cell_list_rewind(). Ownership of the returned cell is retained by
321 * the cell list. */
322inline static struct cell *
323cell_list_find(struct cell_list *cells, int x)
324{
325	struct cell *tail;
326
327	if (x >= cells->x2)
328		return &cells->tail;
329
330	if (x < cells->x1)
331		return &cells->head;
332
333	tail = cells->cursor;
334	if (tail->x == x)
335		return tail;
336
337	do {
338		if (tail->next->x > x)
339			break;
340
341		tail = tail->next;
342		if (tail->next->x > x)
343			break;
344
345		tail = tail->next;
346		if (tail->next->x > x)
347			break;
348
349		tail = tail->next;
350	} while (1);
351
352	if (tail->x != x)
353		tail = cell_list_alloc(cells, tail, x);
354
355	return cells->cursor = tail;
356}
357
358/* Add a subpixel span covering [x1, x2) to the coverage cells. */
359inline static void
360cell_list_add_subspan(struct cell_list *cells, int x1, int x2)
361{
362	struct cell *cell;
363	int ix1, fx1;
364	int ix2, fx2;
365
366	if (x1 == x2)
367		return;
368
369	FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
370	FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
371
372	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
373	       x1, ix1, fx1, x2, ix2, fx2));
374
375	cell = cell_list_find(cells, ix1);
376	if (ix1 != ix2) {
377		cell->uncovered_area += fx1;
378		++cell->covered_height;
379
380		cell = cell_list_find(cells, ix2);
381		cell->uncovered_area -= fx2;
382		--cell->covered_height;
383	} else
384		cell->uncovered_area += (fx1-fx2);
385}
386
387inline static void
388cell_list_add_span(struct cell_list *cells, int x1, int x2)
389{
390	struct cell *cell;
391	int ix1, fx1;
392	int ix2, fx2;
393
394	FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
395	FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
396
397	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
398	       x1, ix1, fx1, x2, ix2, fx2));
399
400	cell = cell_list_find(cells, ix1);
401	if (ix1 != ix2) {
402		cell->uncovered_area += fx1*FAST_SAMPLES_Y;
403		cell->covered_height += FAST_SAMPLES_Y;
404
405		cell = cell_list_find(cells, ix2);
406		cell->uncovered_area -= fx2*FAST_SAMPLES_Y;
407		cell->covered_height -= FAST_SAMPLES_Y;
408	} else
409		cell->uncovered_area += (fx1-fx2)*FAST_SAMPLES_Y;
410}
411
412static void
413polygon_fini(struct polygon *polygon)
414{
415	if (polygon->y_buckets != polygon->y_buckets_embedded)
416		free(polygon->y_buckets);
417
418	if (polygon->edges != polygon->edges_embedded)
419		free(polygon->edges);
420}
421
422static bool
423polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
424{
425	unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
426
427	if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
428		return false;
429
430	polygon->edges = polygon->edges_embedded;
431	polygon->y_buckets = polygon->y_buckets_embedded;
432
433	polygon->num_edges = 0;
434	if (num_edges > (int)ARRAY_SIZE(polygon->edges_embedded)) {
435		polygon->edges = malloc(sizeof(struct edge)*num_edges);
436		if (unlikely(NULL == polygon->edges))
437			goto bail_no_mem;
438	}
439
440	if (num_buckets >= ARRAY_SIZE(polygon->y_buckets_embedded)) {
441		polygon->y_buckets = malloc((1+num_buckets)*sizeof(struct edge *));
442		if (unlikely(NULL == polygon->y_buckets))
443			goto bail_no_mem;
444	}
445	memset(polygon->y_buckets, 0, num_buckets * sizeof(struct edge *));
446	polygon->y_buckets[num_buckets] = (void *)-1;
447
448	polygon->ymin = ymin;
449	polygon->ymax = ymax;
450	return true;
451
452bail_no_mem:
453	polygon_fini(polygon);
454	return false;
455}
456
457static void
458_polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
459{
460	unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
461	struct edge **ptail = &polygon->y_buckets[ix];
462	assert(e->ytop < polygon->ymax);
463	e->next = *ptail;
464	*ptail = e;
465}
466
467inline static void
468polygon_add_edge(struct polygon *polygon,
469		 const xTrapezoid *t,
470		 const xLineFixed *edge,
471		 int dir, int dx, int dy)
472{
473	struct edge *e = &polygon->edges[polygon->num_edges];
474	int ytop, ybot;
475
476	assert(t->bottom > t->top);
477	assert(edge->p2.y > edge->p1.y);
478
479	e->dir = dir;
480
481	ytop = pixman_fixed_to_fast(t->top) + dy;
482	if (ytop < polygon->ymin)
483		ytop = polygon->ymin;
484
485	ybot = pixman_fixed_to_fast(t->bottom) + dy;
486	if (ybot > polygon->ymax)
487		ybot = polygon->ymax;
488
489	e->ytop = ytop;
490	e->height_left = ybot - ytop;
491	if (e->height_left <= 0)
492		return;
493
494	if (pixman_fixed_to_fast(edge->p1.x) == pixman_fixed_to_fast(edge->p2.x)) {
495		e->cell = e->x.quo = pixman_fixed_to_fast(edge->p1.x) + dx;
496		e->x.rem = 0;
497		e->dxdy.quo = 0;
498		e->dxdy.rem = 0;
499		e->dy = 0;
500	} else {
501		int64_t Ex, Ey, tmp;
502
503		Ex = ((int64_t)edge->p2.x - edge->p1.x) * FAST_SAMPLES_X;
504		Ey = ((int64_t)edge->p2.y - edge->p1.y) * FAST_SAMPLES_Y * (2 << 16);
505		assert(Ey > 0);
506
507		e->dxdy.quo = Ex * (2 << 16) / Ey;
508		e->dxdy.rem = Ex * (2 << 16) % Ey;
509
510		tmp = (int64_t)(2 * (ytop - dy) + 1) << 16;
511		tmp -= (int64_t)edge->p1.y * FAST_SAMPLES_Y * 2;
512		tmp *= Ex;
513		e->x.quo = tmp / Ey;
514		e->x.rem = tmp % Ey;
515
516		tmp = (int64_t)edge->p1.x * FAST_SAMPLES_X;
517		e->x.quo += tmp / (1 << 16) + dx;
518		tmp &= (1 << 16) - 1;
519		if (tmp) {
520			if (Ey < INT64_MAX >> 16)
521				tmp = (tmp * Ey) / (1 << 16);
522			else /* Handle overflow by losing precision */
523				tmp = tmp * (Ey / (1 << 16));
524			e->x.rem += tmp;
525		}
526
527		if (e->x.rem < 0) {
528			--e->x.quo;
529			e->x.rem += Ey;
530		} else if (e->x.rem >= Ey) {
531			++e->x.quo;
532			e->x.rem -= Ey;
533		}
534		assert(e->x.rem >= 0 && e->x.rem < Ey);
535
536		e->cell = e->x.quo + (e->x.rem >= Ey/2);
537		e->dy = Ey;
538	}
539
540	_polygon_insert_edge_into_its_y_bucket(polygon, e);
541	polygon->num_edges++;
542}
543
544inline static void
545polygon_add_line(struct polygon *polygon,
546		 const xPointFixed *p1,
547		 const xPointFixed *p2,
548		 int dx, int dy)
549{
550	struct edge *e = &polygon->edges[polygon->num_edges];
551	int top, bot;
552
553	if (p1->y == p2->y)
554		return;
555
556	__DBG(("%s: line=(%d, %d), (%d, %d)\n",
557	       __FUNCTION__, (int)p1->x, (int)p1->y, (int)p2->x, (int)p2->y));
558
559	e->dir = 1;
560	if (p2->y < p1->y) {
561		const xPointFixed *t;
562
563		e->dir = -1;
564
565		t = p1;
566		p1 = p2;
567		p2 = t;
568	}
569
570	top = pixman_fixed_to_fast(p1->y) + dy;
571	if (top < polygon->ymin)
572		top = polygon->ymin;
573
574	bot = pixman_fixed_to_fast(p2->y) + dy;
575	if (bot > polygon->ymax)
576		bot = polygon->ymax;
577
578	if (bot <= top)
579		return;
580
581	e->ytop = top;
582	e->height_left = bot - top;
583	if (e->height_left <= 0)
584		return;
585
586	__DBG(("%s: edge height=%d\n", __FUNCTION__, e->dir * e->height_left));
587
588	if (pixman_fixed_to_fast(p1->x) == pixman_fixed_to_fast(p2->x)) {
589		e->cell = e->x.quo = pixman_fixed_to_fast(p1->x) + dx;
590		e->x.rem = 0;
591		e->dxdy.quo = 0;
592		e->dxdy.rem = 0;
593		e->dy = 0;
594	} else {
595		int64_t Ex, Ey, tmp;
596
597		Ex = ((int64_t)p2->x - p1->x) * FAST_SAMPLES_X;
598		Ey = ((int64_t)p2->y - p1->y) * FAST_SAMPLES_Y * (2 << 16);
599
600		e->dxdy.quo = Ex * (2 << 16) / Ey;
601		e->dxdy.rem = Ex * (2 << 16) % Ey;
602
603		tmp = (int64_t)(2 * (top - dy) + 1) << 16;
604		tmp -= (int64_t)p1->y * FAST_SAMPLES_Y * 2;
605		tmp *= Ex;
606		e->x.quo = tmp / Ey;
607		e->x.rem = tmp % Ey;
608
609		tmp = (int64_t)p1->x * FAST_SAMPLES_X;
610		e->x.quo += tmp / (1 << 16) + dx;
611		e->x.rem += ((tmp & ((1 << 16) - 1)) * Ey) / (1 << 16);
612
613		if (e->x.rem < 0) {
614			--e->x.quo;
615			e->x.rem += Ey;
616		} else if (e->x.rem >= Ey) {
617			++e->x.quo;
618			e->x.rem -= Ey;
619		}
620
621		e->cell = e->x.quo + (e->x.rem >= Ey/2);
622		e->dy = Ey;
623	}
624
625	if (polygon->num_edges > 0) {
626		struct edge *prev = &polygon->edges[polygon->num_edges-1];
627		/* detect degenerate triangles inserted into tristrips */
628		if (e->dir == -prev->dir &&
629		    e->ytop == prev->ytop &&
630		    e->height_left == prev->height_left &&
631		    e->x.quo == prev->x.quo &&
632		    e->x.rem == prev->x.rem &&
633		    e->dxdy.quo == prev->dxdy.quo &&
634		    e->dxdy.rem == prev->dxdy.rem) {
635			unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop,
636							  polygon->ymin);
637			polygon->y_buckets[ix] = prev->next;
638			polygon->num_edges--;
639			return;
640		}
641	}
642
643	_polygon_insert_edge_into_its_y_bucket(polygon, e);
644	polygon->num_edges++;
645}
646
647static void
648active_list_reset(struct active_list *active)
649{
650	active->head.height_left = INT_MAX;
651	active->head.cell = INT_MIN;
652	active->head.dy = 0;
653	active->head.prev = NULL;
654	active->head.next = &active->tail;
655	active->tail.prev = &active->head;
656	active->tail.next = NULL;
657	active->tail.cell = INT_MAX;
658	active->tail.height_left = INT_MAX;
659	active->tail.dy = 0;
660}
661
662static struct edge *
663merge_sorted_edges(struct edge *head_a, struct edge *head_b)
664{
665	struct edge *head, **next, *prev;
666	int32_t x;
667
668	if (head_b == NULL)
669		return head_a;
670
671	prev = head_a->prev;
672	next = &head;
673	if (head_a->cell <= head_b->cell) {
674		head = head_a;
675	} else {
676		head = head_b;
677		head_b->prev = prev;
678		goto start_with_b;
679	}
680
681	do {
682		x = head_b->cell;
683		while (head_a != NULL && head_a->cell <= x) {
684			prev = head_a;
685			next = &head_a->next;
686			head_a = head_a->next;
687		}
688
689		head_b->prev = prev;
690		*next = head_b;
691		if (head_a == NULL)
692			return head;
693
694start_with_b:
695		x = head_a->cell;
696		while (head_b != NULL && head_b->cell <= x) {
697			prev = head_b;
698			next = &head_b->next;
699			head_b = head_b->next;
700		}
701
702		head_a->prev = prev;
703		*next = head_a;
704		if (head_b == NULL)
705			return head;
706	} while (1);
707}
708
709static struct edge *
710sort_edges(struct edge  *list,
711	   unsigned int  level,
712	   struct edge **head_out)
713{
714	struct edge *head_other, *remaining;
715	unsigned int i;
716
717	head_other = list->next;
718	if (head_other == NULL) {
719		*head_out = list;
720		return NULL;
721	}
722
723	remaining = head_other->next;
724	if (list->cell <= head_other->cell) {
725		*head_out = list;
726		head_other->next = NULL;
727	} else {
728		*head_out = head_other;
729		head_other->prev = list->prev;
730		head_other->next = list;
731		list->prev = head_other;
732		list->next = NULL;
733	}
734
735	for (i = 0; i < level && remaining; i++) {
736		remaining = sort_edges(remaining, i, &head_other);
737		*head_out = merge_sorted_edges(*head_out, head_other);
738	}
739
740	return remaining;
741}
742
743static struct edge *filter(struct edge *edges)
744{
745	struct edge *e;
746
747	e = edges;
748	while (e->next) {
749		struct edge *n = e->next;
750		if (e->dir == -n->dir &&
751		    e->height_left == n->height_left &&
752		    &e->x.quo == &n->x.quo &&
753		    &e->x.rem == &n->x.rem &&
754		    &e->dxdy.quo == &n->dxdy.quo &&
755		    &e->dxdy.rem == &n->dxdy.rem) {
756			if (e->prev)
757				e->prev->next = n->next;
758			else
759				edges = n->next;
760			if (n->next)
761				n->next->prev = e->prev;
762			else
763				break;
764			e = n->next;
765		} else
766			e = n;
767	}
768
769	return edges;
770}
771
772static struct edge *
773merge_unsorted_edges(struct edge *head, struct edge *unsorted)
774{
775	sort_edges(unsorted, UINT_MAX, &unsorted);
776	return merge_sorted_edges(head, filter(unsorted));
777}
778
779/* Test if the edges on the active list can be safely advanced by a
780 * full row without intersections or any edges ending. */
781inline static int
782can_full_step(struct active_list *active)
783{
784	const struct edge *e;
785	int min_height = INT_MAX;
786
787	assert(active->head.next != &active->tail);
788	for (e = active->head.next; &active->tail != e; e = e->next) {
789		assert(e->height_left > 0);
790
791		if (e->dy != 0)
792			return 0;
793
794		if (e->height_left < min_height) {
795			min_height = e->height_left;
796			if (min_height < FAST_SAMPLES_Y)
797				return 0;
798		}
799	}
800
801	return min_height;
802}
803
804inline static void
805merge_edges(struct active_list *active, struct edge *edges)
806{
807	active->head.next = merge_unsorted_edges(active->head.next, edges);
808}
809
810inline static int
811fill_buckets(struct active_list *active,
812	     struct edge *edge,
813	     struct edge **buckets)
814{
815	int ymax = 0;
816
817	while (edge) {
818		int y = edge->ytop & (FAST_SAMPLES_Y-1);
819		struct edge *next = edge->next;
820		struct edge **b = &buckets[y];
821		__DBG(("%s: ytop %d -> bucket %d\n", __FUNCTION__,
822		       edge->ytop, y));
823		if (*b)
824			(*b)->prev = edge;
825		edge->next = *b;
826		edge->prev = NULL;
827		*b = edge;
828		edge = next;
829		if (y > ymax)
830			ymax = y;
831	}
832
833	return ymax;
834}
835
836inline static void
837nonzero_subrow(struct active_list *active, struct cell_list *coverages)
838{
839	struct edge *edge = active->head.next;
840	int prev_x = INT_MIN;
841	int winding = 0, xstart = edge->cell;
842
843	cell_list_rewind(coverages);
844
845	while (&active->tail != edge) {
846		struct edge *next = edge->next;
847
848		winding += edge->dir;
849		if (0 == winding && edge->next->cell != edge->cell) {
850			cell_list_add_subspan(coverages, xstart, edge->cell);
851			xstart = edge->next->cell;
852		}
853
854		assert(edge->height_left > 0);
855		if (--edge->height_left) {
856			if (edge->dy) {
857				edge->x.quo += edge->dxdy.quo;
858				edge->x.rem += edge->dxdy.rem;
859				if (edge->x.rem < 0) {
860					--edge->x.quo;
861					edge->x.rem += edge->dy;
862				} else if (edge->x.rem >= edge->dy) {
863					++edge->x.quo;
864					edge->x.rem -= edge->dy;
865				}
866				edge->cell = edge->x.quo + (edge->x.rem >= edge->dy/2);
867			}
868
869			if (edge->cell < prev_x) {
870				struct edge *pos = edge->prev;
871				pos->next = next;
872				next->prev = pos;
873				do
874					pos = pos->prev;
875				while (edge->cell < pos->cell);
876				pos->next->prev = edge;
877				edge->next = pos->next;
878				edge->prev = pos;
879				pos->next = edge;
880			} else
881				prev_x = edge->cell;
882		} else {
883			edge->prev->next = next;
884			next->prev = edge->prev;
885		}
886
887		edge = next;
888	}
889}
890
891static void
892nonzero_row(struct active_list *active, struct cell_list *coverages)
893{
894	struct edge *left = active->head.next;
895
896	while (&active->tail != left) {
897		struct edge *right;
898		int winding = left->dir;
899
900		left->height_left -= FAST_SAMPLES_Y;
901		assert(left->height_left >= 0);
902		if (!left->height_left) {
903			left->prev->next = left->next;
904			left->next->prev = left->prev;
905		}
906
907		right = left->next;
908		do {
909			right->height_left -= FAST_SAMPLES_Y;
910			assert(right->height_left >= 0);
911			if (!right->height_left) {
912				right->prev->next = right->next;
913				right->next->prev = right->prev;
914			}
915
916			winding += right->dir;
917			if (0 == winding)
918				break;
919
920			right = right->next;
921		} while (1);
922
923		cell_list_add_span(coverages, left->cell, right->cell);
924		left = right->next;
925	}
926}
927
928static void
929tor_fini(struct tor *converter)
930{
931	polygon_fini(converter->polygon);
932	cell_list_fini(converter->coverages);
933}
934
935static bool
936tor_init(struct tor *converter, const BoxRec *box, int num_edges)
937{
938	__DBG(("%s: (%d, %d),(%d, %d) x (%d, %d), num_edges=%d\n",
939	       __FUNCTION__,
940	       box->x1, box->y1, box->x2, box->y2,
941	       FAST_SAMPLES_X, FAST_SAMPLES_Y,
942	       num_edges));
943
944	converter->extents = *box;
945
946	if (!cell_list_init(converter->coverages, box->x1, box->x2))
947		return false;
948
949	active_list_reset(converter->active);
950	if (!polygon_init(converter->polygon, num_edges,
951			  (int)box->y1 * FAST_SAMPLES_Y,
952			  (int)box->y2 * FAST_SAMPLES_Y)) {
953		cell_list_fini(converter->coverages);
954		return false;
955	}
956
957	return true;
958}
959
960static void
961tor_add_trapezoid(struct tor *tor,
962		  const xTrapezoid *t,
963		  int dx, int dy)
964{
965	if (!xTrapezoidValid(t)) {
966		__DBG(("%s: skipping invalid trapezoid: top=%d, bottom=%d, left=(%d, %d), (%d, %d), right=(%d, %d), (%d, %d)\n",
967		       __FUNCTION__,
968		       t->top, t->bottom,
969		       t->left.p1.x, t->left.p1.y,
970		       t->left.p2.x, t->left.p2.y,
971		       t->right.p1.x, t->right.p1.y,
972		       t->right.p2.x, t->right.p2.y));
973		return;
974	}
975	polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy);
976	polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy);
977}
978
979static void
980step_edges(struct active_list *active, int count)
981{
982	struct edge *edge;
983
984	count *= FAST_SAMPLES_Y;
985	for (edge = active->head.next; edge != &active->tail; edge = edge->next) {
986		edge->height_left -= count;
987		assert(edge->height_left >= 0);
988		if (!edge->height_left) {
989			edge->prev->next = edge->next;
990			edge->next->prev = edge->prev;
991		}
992	}
993}
994
995static void
996tor_blt_span(struct sna *sna,
997	     struct sna_composite_spans_op *op,
998	     pixman_region16_t *clip,
999	     const BoxRec *box,
1000	     int coverage)
1001{
1002	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1003
1004	op->box(sna, op, box, AREA_TO_ALPHA(coverage));
1005	apply_damage_box(&op->base, box);
1006}
1007
1008static void
1009tor_blt_span__no_damage(struct sna *sna,
1010			struct sna_composite_spans_op *op,
1011			pixman_region16_t *clip,
1012			const BoxRec *box,
1013			int coverage)
1014{
1015	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1016
1017	op->box(sna, op, box, AREA_TO_ALPHA(coverage));
1018}
1019
1020static void
1021tor_blt_span_clipped(struct sna *sna,
1022		     struct sna_composite_spans_op *op,
1023		     pixman_region16_t *clip,
1024		     const BoxRec *box,
1025		     int coverage)
1026{
1027	pixman_region16_t region;
1028	float opacity;
1029
1030	opacity = AREA_TO_ALPHA(coverage);
1031	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, opacity));
1032
1033	pixman_region_init_rects(&region, box, 1);
1034	RegionIntersect(&region, &region, clip);
1035	if (region_num_rects(&region)) {
1036		op->boxes(sna, op,
1037			  region_rects(&region),
1038			  region_num_rects(&region),
1039			  opacity);
1040		apply_damage(&op->base, &region);
1041	}
1042	pixman_region_fini(&region);
1043}
1044
1045static void
1046tor_blt_span_mono(struct sna *sna,
1047		  struct sna_composite_spans_op *op,
1048		  pixman_region16_t *clip,
1049		  const BoxRec *box,
1050		  int coverage)
1051{
1052	if (coverage < FAST_SAMPLES_XY/2)
1053		return;
1054
1055	tor_blt_span(sna, op, clip, box, FAST_SAMPLES_XY);
1056}
1057
1058static void
1059tor_blt_span_mono_clipped(struct sna *sna,
1060			  struct sna_composite_spans_op *op,
1061			  pixman_region16_t *clip,
1062			  const BoxRec *box,
1063			  int coverage)
1064{
1065	if (coverage < FAST_SAMPLES_XY/2)
1066		return;
1067
1068	tor_blt_span_clipped(sna, op, clip, box, FAST_SAMPLES_XY);
1069}
1070
1071static void
1072tor_blt_span_mono_unbounded(struct sna *sna,
1073			    struct sna_composite_spans_op *op,
1074			    pixman_region16_t *clip,
1075			    const BoxRec *box,
1076			    int coverage)
1077{
1078	tor_blt_span(sna, op, clip, box,
1079		     coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
1080}
1081
1082static void
1083tor_blt_span_mono_unbounded_clipped(struct sna *sna,
1084				    struct sna_composite_spans_op *op,
1085				    pixman_region16_t *clip,
1086				    const BoxRec *box,
1087				    int coverage)
1088{
1089	tor_blt_span_clipped(sna, op, clip, box,
1090			     coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
1091}
1092
1093static void
1094tor_blt(struct sna *sna,
1095	struct tor *converter,
1096	struct sna_composite_spans_op *op,
1097	pixman_region16_t *clip,
1098	void (*span)(struct sna *sna,
1099		     struct sna_composite_spans_op *op,
1100		     pixman_region16_t *clip,
1101		     const BoxRec *box,
1102		     int coverage),
1103	int y, int height,
1104	int unbounded)
1105{
1106	struct cell_list *cells = converter->coverages;
1107	struct cell *cell;
1108	BoxRec box;
1109	int cover;
1110
1111	box.y1 = y + converter->extents.y1;
1112	box.y2 = box.y1 + height;
1113	assert(box.y2 <= converter->extents.y2);
1114	box.x1 = converter->extents.x1;
1115
1116	/* Form the spans from the coverages and areas. */
1117	cover = cells->head.covered_height*FAST_SAMPLES_X;
1118	assert(cover >= 0);
1119	for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
1120		int x = cell->x;
1121
1122		assert(x >= converter->extents.x1);
1123		assert(x < converter->extents.x2);
1124		__DBG(("%s: cell=(%d, %d, %d), cover=%d\n", __FUNCTION__,
1125		       cell->x, cell->covered_height, cell->uncovered_area,
1126		       cover));
1127
1128		if (cell->covered_height || cell->uncovered_area) {
1129			box.x2 = x;
1130			if (box.x2 > box.x1 && (unbounded || cover)) {
1131				__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1132				       box.x1, box.y1,
1133				       box.x2 - box.x1,
1134				       box.y2 - box.y1,
1135				       cover));
1136				span(sna, op, clip, &box, cover);
1137			}
1138			box.x1 = box.x2;
1139			cover += cell->covered_height*FAST_SAMPLES_X;
1140		}
1141
1142		if (cell->uncovered_area) {
1143			int area = cover - cell->uncovered_area;
1144			box.x2 = x + 1;
1145			if (unbounded || area) {
1146				__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1147				       box.x1, box.y1,
1148				       box.x2 - box.x1,
1149				       box.y2 - box.y1,
1150				       area));
1151				span(sna, op, clip, &box, area);
1152			}
1153			box.x1 = box.x2;
1154		}
1155	}
1156
1157	box.x2 = converter->extents.x2;
1158	if (box.x2 > box.x1 && (unbounded || cover)) {
1159		__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1160		       box.x1, box.y1,
1161		       box.x2 - box.x1,
1162		       box.y2 - box.y1,
1163		       cover));
1164		span(sna, op, clip, &box, cover);
1165	}
1166}
1167
1168flatten static void
1169tor_render(struct sna *sna,
1170	   struct tor *converter,
1171	   struct sna_composite_spans_op *op,
1172	   pixman_region16_t *clip,
1173	   void (*span)(struct sna *sna,
1174			struct sna_composite_spans_op *op,
1175			pixman_region16_t *clip,
1176			const BoxRec *box,
1177			int coverage),
1178	   int unbounded)
1179{
1180	struct polygon *polygon = converter->polygon;
1181	struct cell_list *coverages = converter->coverages;
1182	struct active_list *active = converter->active;
1183	struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
1184	int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
1185
1186	__DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
1187
1188	/* Render each pixel row. */
1189	for (i = 0; i < h; i = j) {
1190		int do_full_step = 0;
1191
1192		j = i + 1;
1193
1194		/* Determine if we can ignore this row or use the full pixel
1195		 * stepper. */
1196		if (fill_buckets(active, polygon->y_buckets[i], buckets) == 0) {
1197			if (buckets[0]) {
1198				merge_edges(active, buckets[0]);
1199				buckets[0] = NULL;
1200			}
1201			if (active->head.next == &active->tail) {
1202				for (; polygon->y_buckets[j] == NULL; j++)
1203					;
1204				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1205				       __FUNCTION__, i, j));
1206
1207				assert(j <= h);
1208				if (unbounded) {
1209					BoxRec box;
1210
1211					box = converter->extents;
1212					box.y1 += i;
1213					box.y2 = converter->extents.y1 + j;
1214
1215					span(sna, op, clip, &box, 0);
1216				}
1217				continue;
1218			}
1219
1220			do_full_step = can_full_step(active);
1221		}
1222
1223		__DBG(("%s: y=%d-%d, do_full_step=%d, new edges=%d\n",
1224		       __FUNCTION__,
1225		       i, j, do_full_step,
1226		       polygon->y_buckets[i] != NULL));
1227		if (do_full_step) {
1228			nonzero_row(active, coverages);
1229
1230			while (polygon->y_buckets[j] == NULL &&
1231			       do_full_step >= 2*FAST_SAMPLES_Y) {
1232				do_full_step -= FAST_SAMPLES_Y;
1233				j++;
1234			}
1235			assert(j >= i + 1 && j <= h);
1236			if (j != i + 1)
1237				step_edges(active, j - (i + 1));
1238
1239			__DBG(("%s: vertical edges, full step (%d, %d)\n",
1240			       __FUNCTION__,  i, j));
1241		} else {
1242			int suby;
1243
1244			/* Subsample this row. */
1245			for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
1246				if (buckets[suby]) {
1247					merge_edges(active, buckets[suby]);
1248					buckets[suby] = NULL;
1249				}
1250
1251				nonzero_subrow(active, coverages);
1252			}
1253		}
1254
1255		assert(j > i);
1256		tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
1257		cell_list_reset(coverages);
1258	}
1259}
1260
1261static void
1262inplace_row(struct active_list *active, uint8_t *row, int width)
1263{
1264	struct edge *left = active->head.next;
1265
1266	while (&active->tail != left) {
1267		struct edge *right;
1268		int winding = left->dir;
1269		int lfx, rfx;
1270		int lix, rix;
1271
1272		left->height_left -= FAST_SAMPLES_Y;
1273		assert(left->height_left >= 0);
1274		if (!left->height_left) {
1275			left->prev->next = left->next;
1276			left->next->prev = left->prev;
1277		}
1278
1279		right = left->next;
1280		do {
1281			right->height_left -= FAST_SAMPLES_Y;
1282			assert(right->height_left >= 0);
1283			if (!right->height_left) {
1284				right->prev->next = right->next;
1285				right->next->prev = right->prev;
1286			}
1287
1288			winding += right->dir;
1289			if (0 == winding && right->cell != right->next->cell)
1290				break;
1291
1292			right = right->next;
1293		} while (1);
1294
1295		if (left->cell < 0) {
1296			lix = lfx = 0;
1297		} else if (left->cell >= width * FAST_SAMPLES_X) {
1298			lix = width;
1299			lfx = 0;
1300		} else
1301			FAST_SAMPLES_X_TO_INT_FRAC(left->cell, lix, lfx);
1302
1303		if (right->cell < 0) {
1304			rix = rfx = 0;
1305		} else if (right->cell >= width * FAST_SAMPLES_X) {
1306			rix = width;
1307			rfx = 0;
1308		} else
1309			FAST_SAMPLES_X_TO_INT_FRAC(right->cell, rix, rfx);
1310		if (lix == rix) {
1311			if (rfx != lfx) {
1312				assert(lix < width);
1313				row[lix] += (rfx-lfx) * 256 / FAST_SAMPLES_X;
1314			}
1315		} else {
1316			assert(lix < width);
1317			if (lfx == 0)
1318				row[lix] = 0xff;
1319			else
1320				row[lix] += 256 - lfx * 256 / FAST_SAMPLES_X;
1321
1322			assert(rix <= width);
1323			if (rfx) {
1324				assert(rix < width);
1325				row[rix] += rfx * 256 / FAST_SAMPLES_X;
1326			}
1327
1328			if (rix > ++lix) {
1329				uint8_t *r = row + lix;
1330				rix -= lix;
1331#if 0
1332				if (rix == 1)
1333					*row = 0xff;
1334				else
1335					memset(row, 0xff, rix);
1336#else
1337				if ((uintptr_t)r & 1 && rix) {
1338					*r++ = 0xff;
1339					rix--;
1340				}
1341				if ((uintptr_t)r & 2 && rix >= 2) {
1342					*(uint16_t *)r = 0xffff;
1343					r += 2;
1344					rix -= 2;
1345				}
1346				if ((uintptr_t)r & 4 && rix >= 4) {
1347					*(uint32_t *)r = 0xffffffff;
1348					r += 4;
1349					rix -= 4;
1350				}
1351				while (rix >= 8) {
1352					*(uint64_t *)r = 0xffffffffffffffff;
1353					r += 8;
1354					rix -= 8;
1355				}
1356				if (rix & 4) {
1357					*(uint32_t *)r = 0xffffffff;
1358					r += 4;
1359				}
1360				if (rix & 2) {
1361					*(uint16_t *)r = 0xffff;
1362					r += 2;
1363				}
1364				if (rix & 1)
1365					*r = 0xff;
1366#endif
1367			}
1368		}
1369
1370		left = right->next;
1371	}
1372}
1373
1374inline static void
1375inplace_subrow(struct active_list *active, int8_t *row,
1376	       int width, int *min, int *max)
1377{
1378	struct edge *edge = active->head.next;
1379	int prev_x = INT_MIN;
1380	int winding = 0, xstart = INT_MIN;
1381
1382	while (&active->tail != edge) {
1383		struct edge *next = edge->next;
1384
1385		winding += edge->dir;
1386		if (0 == winding) {
1387			if (edge->next->cell != edge->cell) {
1388				if (edge->cell <= xstart) {
1389					xstart = INT_MIN;
1390				} else  {
1391					int fx;
1392					int ix;
1393
1394					if (xstart < FAST_SAMPLES_X * width) {
1395						FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
1396						if (ix < *min)
1397							*min = ix;
1398
1399						row[ix++] += FAST_SAMPLES_X - fx;
1400						if (fx && ix < width)
1401							row[ix] += fx;
1402					}
1403
1404					xstart = edge->cell;
1405					if (xstart < FAST_SAMPLES_X * width) {
1406						FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
1407						row[ix] -= FAST_SAMPLES_X - fx;
1408						if (fx && ix + 1 < width)
1409							row[++ix] -= fx;
1410
1411						if (ix >= *max)
1412							*max = ix + 1;
1413
1414						xstart = INT_MIN;
1415					} else
1416						*max = width;
1417				}
1418			}
1419		} else if (xstart < 0) {
1420			xstart = MAX(edge->cell, 0);
1421		}
1422
1423		assert(edge->height_left > 0);
1424		if (--edge->height_left) {
1425			if (edge->dy) {
1426				edge->x.quo += edge->dxdy.quo;
1427				edge->x.rem += edge->dxdy.rem;
1428				if (edge->x.rem < 0) {
1429					--edge->x.quo;
1430					edge->x.rem += edge->dy;
1431				} else if (edge->x.rem >= 0) {
1432					++edge->x.quo;
1433					edge->x.rem -= edge->dy;
1434				}
1435				edge->cell = edge->x.quo + (edge->x.rem >= edge->dy/2);
1436			}
1437
1438			if (edge->cell < prev_x) {
1439				struct edge *pos = edge->prev;
1440				pos->next = next;
1441				next->prev = pos;
1442				do
1443					pos = pos->prev;
1444				while (edge->cell < pos->cell);
1445				pos->next->prev = edge;
1446				edge->next = pos->next;
1447				edge->prev = pos;
1448				pos->next = edge;
1449			} else
1450				prev_x = edge->cell;
1451		} else {
1452			edge->prev->next = next;
1453			next->prev = edge->prev;
1454		}
1455
1456		edge = next;
1457	}
1458}
1459
1460inline static void
1461inplace_end_subrows(struct active_list *active, uint8_t *row,
1462		    int8_t *buf, int width)
1463{
1464	int cover = 0;
1465
1466	while (width >= 4) {
1467		uint32_t dw;
1468		int v;
1469
1470		dw = *(uint32_t *)buf;
1471		buf += 4;
1472
1473		if (dw == 0) {
1474			v = cover * 256 / FAST_SAMPLES_XY;
1475			v -= v >> 8;
1476			v |= v << 8;
1477			dw = v | v << 16;
1478		} else {
1479			cover += (int8_t)(dw & 0xff);
1480			if (cover) {
1481				assert(cover > 0);
1482				v = cover * 256 / FAST_SAMPLES_XY;
1483				v -= v >> 8;
1484				dw >>= 8;
1485				dw |= v << 24;
1486			} else
1487				dw >>= 8;
1488
1489			cover += (int8_t)(dw & 0xff);
1490			if (cover) {
1491				assert(cover > 0);
1492				v = cover * 256 / FAST_SAMPLES_XY;
1493				v -= v >> 8;
1494				dw >>= 8;
1495				dw |= v << 24;
1496			} else
1497				dw >>= 8;
1498
1499			cover += (int8_t)(dw & 0xff);
1500			if (cover) {
1501				assert(cover > 0);
1502				v = cover * 256 / FAST_SAMPLES_XY;
1503				v -= v >> 8;
1504				dw >>= 8;
1505				dw |= v << 24;
1506			} else
1507				dw >>= 8;
1508
1509			cover += (int8_t)(dw & 0xff);
1510			if (cover) {
1511				assert(cover > 0);
1512				v = cover * 256 / FAST_SAMPLES_XY;
1513				v -= v >> 8;
1514				dw >>= 8;
1515				dw |= v << 24;
1516			} else
1517				dw >>= 8;
1518		}
1519
1520		*(uint32_t *)row = dw;
1521		row += 4;
1522		width -= 4;
1523	}
1524
1525	while (width--) {
1526		int v;
1527
1528		cover += *buf++;
1529		assert(cover >= 0);
1530
1531		v = cover * 256 / FAST_SAMPLES_XY;
1532		v -= v >> 8;
1533		*row++ = v;
1534	}
1535}
1536
1537static void
1538convert_mono(uint8_t *ptr, int w)
1539{
1540	while (w--) {
1541		*ptr = 0xff * (*ptr >= 0xf0);
1542		ptr++;
1543	}
1544}
1545
1546static void
1547tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
1548{
1549	int i, j, h = converter->extents.y2;
1550	struct polygon *polygon = converter->polygon;
1551	struct active_list *active = converter->active;
1552	struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
1553	uint8_t *row = scratch->devPrivate.ptr;
1554	int stride = scratch->devKind;
1555	int width = scratch->drawable.width;
1556
1557	__DBG(("%s: mono=%d, buf?=%d\n", __FUNCTION__, mono, buf != NULL));
1558	assert(converter->extents.y1 == 0);
1559	assert(converter->extents.x1 == 0);
1560	assert(scratch->drawable.depth == 8);
1561
1562	/* Render each pixel row. */
1563	for (i = 0; i < h; i = j) {
1564		int do_full_step = 0;
1565		void *ptr = buf ?: row;
1566
1567		j = i + 1;
1568
1569		/* Determine if we can ignore this row or use the full pixel
1570		 * stepper. */
1571		if (fill_buckets(active, polygon->y_buckets[i], buckets) == 0) {
1572			if (buckets[0]) {
1573				merge_edges(active, buckets[0]);
1574				buckets[0] = NULL;
1575			}
1576			if (active->head.next == &active->tail) {
1577				for (; !polygon->y_buckets[j]; j++)
1578					;
1579				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1580				       __FUNCTION__, i, j));
1581
1582				memset(row, 0, stride*(j-i));
1583				row += stride*(j-i);
1584				continue;
1585			}
1586
1587			do_full_step = can_full_step(active);
1588		}
1589
1590		__DBG(("%s: y=%d, do_full_step=%d, new edges=%d, min_height=%d, vertical=%d\n",
1591		       __FUNCTION__,
1592		       i, do_full_step,
1593		       polygon->y_buckets[i] != NULL));
1594		if (do_full_step) {
1595			memset(ptr, 0, width);
1596			inplace_row(active, ptr, width);
1597			if (mono)
1598				convert_mono(ptr, width);
1599			if (row != ptr)
1600				memcpy(row, ptr, width);
1601
1602			while (polygon->y_buckets[j] == NULL &&
1603			       do_full_step >= 2*FAST_SAMPLES_Y) {
1604				do_full_step -= FAST_SAMPLES_Y;
1605				row += stride;
1606				memcpy(row, ptr, width);
1607				j++;
1608			}
1609			if (j != i + 1)
1610				step_edges(active, j - (i + 1));
1611
1612			__DBG(("%s: vertical edges, full step (%d, %d)\n",
1613			       __FUNCTION__,  i, j));
1614		} else {
1615			int min = width, max = 0, suby;
1616
1617			/* Subsample this row. */
1618			memset(ptr, 0, width);
1619			for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
1620				if (buckets[suby]) {
1621					merge_edges(active, buckets[suby]);
1622					buckets[suby] = NULL;
1623				}
1624
1625				inplace_subrow(active, ptr, width, &min, &max);
1626			}
1627			assert(min >= 0 && max <= width);
1628			memset(row, 0, min);
1629			if (max > min) {
1630				inplace_end_subrows(active, row+min, (int8_t*)ptr+min, max-min);
1631				if (mono)
1632					convert_mono(row+min, max-min);
1633			}
1634			if (max < width)
1635				memset(row+max, 0, width-max);
1636		}
1637
1638		row += stride;
1639	}
1640}
1641
1642static int operator_is_bounded(uint8_t op)
1643{
1644	switch (op) {
1645	case PictOpOver:
1646	case PictOpOutReverse:
1647	case PictOpAdd:
1648		return true;
1649	default:
1650		return false;
1651	}
1652}
1653
1654static span_func_t
1655choose_span(struct sna_composite_spans_op *tmp,
1656	    PicturePtr dst,
1657	    PictFormatPtr maskFormat,
1658	    RegionPtr clip)
1659{
1660	span_func_t span;
1661
1662	if (is_mono(dst, maskFormat)) {
1663		/* XXX An imprecise approximation */
1664		if (maskFormat && !operator_is_bounded(tmp->base.op)) {
1665			span = tor_blt_span_mono_unbounded;
1666			if (clip->data)
1667				span = tor_blt_span_mono_unbounded_clipped;
1668		} else {
1669			span = tor_blt_span_mono;
1670			if (clip->data)
1671				span = tor_blt_span_mono_clipped;
1672		}
1673	} else {
1674		if (clip->data)
1675			span = tor_blt_span_clipped;
1676		else if (tmp->base.damage == NULL)
1677			span = tor_blt_span__no_damage;
1678		else
1679			span = tor_blt_span;
1680	}
1681
1682	return span;
1683}
1684
1685struct span_thread {
1686	struct sna *sna;
1687	const struct sna_composite_spans_op *op;
1688	const xTrapezoid *traps;
1689	RegionPtr clip;
1690	span_func_t span;
1691	BoxRec extents;
1692	int dx, dy, draw_y;
1693	int ntrap;
1694	bool unbounded;
1695};
1696
1697#define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
1698struct span_thread_boxes {
1699	const struct sna_composite_spans_op *op;
1700	const BoxRec *clip_start, *clip_end;
1701	int num_boxes;
1702	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
1703};
1704
1705static void span_thread_add_box(struct sna *sna, void *data,
1706				const BoxRec *box, float alpha)
1707{
1708	struct span_thread_boxes *b = data;
1709
1710	__DBG(("%s: adding box with alpha=%f\n", __FUNCTION__, alpha));
1711
1712	if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) {
1713		DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes));
1714		b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
1715		b->num_boxes = 0;
1716	}
1717
1718	b->boxes[b->num_boxes].box = *box++;
1719	b->boxes[b->num_boxes].alpha = alpha;
1720	b->num_boxes++;
1721	assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
1722}
1723
1724static void
1725span_thread_box(struct sna *sna,
1726		struct sna_composite_spans_op *op,
1727		pixman_region16_t *clip,
1728		const BoxRec *box,
1729		int coverage)
1730{
1731	struct span_thread_boxes *b = (struct span_thread_boxes *)op;
1732
1733	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1734	if (b->num_boxes) {
1735		struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1];
1736		if (bb->box.x1 == box->x1 &&
1737		    bb->box.x2 == box->x2 &&
1738		    bb->box.y2 == box->y1 &&
1739		    bb->alpha == AREA_TO_ALPHA(coverage)) {
1740			bb->box.y2 = box->y2;
1741			__DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2));
1742			return;
1743		}
1744	}
1745
1746	span_thread_add_box(sna, op, box, AREA_TO_ALPHA(coverage));
1747}
1748
1749static void
1750span_thread_clipped_box(struct sna *sna,
1751			struct sna_composite_spans_op *op,
1752			pixman_region16_t *clip,
1753			const BoxRec *box,
1754			int coverage)
1755{
1756	struct span_thread_boxes *b = (struct span_thread_boxes *)op;
1757	const BoxRec *c;
1758
1759	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
1760	       AREA_TO_ALPHA(coverage)));
1761
1762	b->clip_start =
1763		find_clip_box_for_y(b->clip_start, b->clip_end, box->y1);
1764
1765	c = b->clip_start;
1766	while (c != b->clip_end) {
1767		BoxRec clipped;
1768
1769		if (box->y2 <= c->y1)
1770			break;
1771
1772		clipped = *box;
1773		if (!box_intersect(&clipped, c++))
1774			continue;
1775
1776		span_thread_add_box(sna, op, &clipped, AREA_TO_ALPHA(coverage));
1777	}
1778}
1779
1780static span_func_t
1781thread_choose_span(struct sna_composite_spans_op *tmp,
1782		   PicturePtr dst,
1783		   PictFormatPtr maskFormat,
1784		   RegionPtr clip)
1785{
1786	span_func_t span;
1787
1788	if (tmp->base.damage) {
1789		DBG(("%s: damaged -> no thread support\n", __FUNCTION__));
1790		return NULL;
1791	}
1792
1793	if (is_mono(dst, maskFormat)) {
1794		DBG(("%s: mono rendering -> no thread support\n", __FUNCTION__));
1795		return NULL;
1796	} else {
1797		assert(tmp->thread_boxes);
1798		DBG(("%s: clipped? %d\n", __FUNCTION__, clip->data != NULL));
1799		if (clip->data)
1800			span = span_thread_clipped_box;
1801		else
1802			span = span_thread_box;
1803	}
1804
1805	return span;
1806}
1807
1808inline static void
1809span_thread_boxes_init(struct span_thread_boxes *boxes,
1810		       const struct sna_composite_spans_op *op,
1811		       const RegionRec *clip)
1812{
1813	boxes->op = op;
1814	region_get_boxes(clip, &boxes->clip_start, &boxes->clip_end);
1815	boxes->num_boxes = 0;
1816}
1817
1818static void
1819span_thread(void *arg)
1820{
1821	struct span_thread *thread = arg;
1822	struct span_thread_boxes boxes;
1823	struct tor tor;
1824	const xTrapezoid *t;
1825	int n, y1, y2;
1826
1827	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
1828		return;
1829
1830	span_thread_boxes_init(&boxes, thread->op, thread->clip);
1831
1832	y1 = thread->extents.y1 - thread->draw_y;
1833	y2 = thread->extents.y2 - thread->draw_y;
1834	for (n = thread->ntrap, t = thread->traps; n--; t++) {
1835		if (pixman_fixed_integer_floor(t->top) >= y2 ||
1836		    pixman_fixed_integer_ceil(t->bottom) <= y1)
1837			continue;
1838
1839		tor_add_trapezoid(&tor, t, thread->dx, thread->dy);
1840	}
1841
1842	tor_render(thread->sna, &tor,
1843		   (struct sna_composite_spans_op *)&boxes, thread->clip,
1844		   thread->span, thread->unbounded);
1845
1846	tor_fini(&tor);
1847
1848	if (boxes.num_boxes) {
1849		DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
1850		assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
1851		thread->op->thread_boxes(thread->sna, thread->op,
1852					 boxes.boxes, boxes.num_boxes);
1853	}
1854}
1855
1856bool
1857imprecise_trapezoid_span_converter(struct sna *sna,
1858				   CARD8 op, PicturePtr src, PicturePtr dst,
1859				   PictFormatPtr maskFormat, unsigned int flags,
1860				   INT16 src_x, INT16 src_y,
1861				   int ntrap, xTrapezoid *traps)
1862{
1863	struct sna_composite_spans_op tmp;
1864	pixman_region16_t clip;
1865	int16_t dst_x, dst_y;
1866	bool was_clear;
1867	int dx, dy, n;
1868	int num_threads;
1869
1870	if (NO_IMPRECISE)
1871		return false;
1872
1873	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, flags)) {
1874		DBG(("%s: fallback -- composite spans not supported\n",
1875		     __FUNCTION__));
1876		return false;
1877	}
1878
1879	if (!trapezoids_bounds(ntrap, traps, &clip.extents))
1880		return true;
1881
1882#if 0
1883	if (clip.extents.y2 - clip.extents.y1 < 64 &&
1884	    clip.extents.x2 - clip.extents.x1 < 64) {
1885		DBG(("%s: fallback -- traps extents too small %dx%d\n",
1886		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
1887		return false;
1888	}
1889#endif
1890
1891	DBG(("%s: extents (%d, %d), (%d, %d)\n",
1892	     __FUNCTION__,
1893	     clip.extents.x1, clip.extents.y1,
1894	     clip.extents.x2, clip.extents.y2));
1895
1896	trapezoid_origin(&traps[0].left, &dst_x, &dst_y);
1897
1898	if (!sna_compute_composite_region(&clip,
1899					  src, NULL, dst,
1900					  src_x + clip.extents.x1 - dst_x,
1901					  src_y + clip.extents.y1 - dst_y,
1902					  0, 0,
1903					  clip.extents.x1, clip.extents.y1,
1904					  clip.extents.x2 - clip.extents.x1,
1905					  clip.extents.y2 - clip.extents.y1)) {
1906		DBG(("%s: trapezoids do not intersect drawable clips\n",
1907		     __FUNCTION__)) ;
1908		return true;
1909	}
1910
1911	if (!sna->render.check_composite_spans(sna, op, src, dst,
1912					       clip.extents.x2 - clip.extents.x1,
1913					       clip.extents.y2 - clip.extents.y1,
1914					       flags)) {
1915		DBG(("%s: fallback -- composite spans not supported\n",
1916		     __FUNCTION__));
1917		return false;
1918	}
1919
1920	dx = dst->pDrawable->x;
1921	dy = dst->pDrawable->y;
1922
1923	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
1924	     __FUNCTION__,
1925	     clip.extents.x1, clip.extents.y1,
1926	     clip.extents.x2, clip.extents.y2,
1927	     dx, dy,
1928	     src_x + clip.extents.x1 - dst_x - dx,
1929	     src_y + clip.extents.y1 - dst_y - dy));
1930
1931	was_clear = sna_drawable_is_clear(dst->pDrawable);
1932	switch (op) {
1933	case PictOpAdd:
1934	case PictOpOver:
1935		if (was_clear)
1936			op = PictOpSrc;
1937		break;
1938	case PictOpIn:
1939		if (was_clear)
1940			return true;
1941		break;
1942	}
1943
1944	if (!sna->render.composite_spans(sna, op, src, dst,
1945					 src_x + clip.extents.x1 - dst_x - dx,
1946					 src_y + clip.extents.y1 - dst_y - dy,
1947					 clip.extents.x1,  clip.extents.y1,
1948					 clip.extents.x2 - clip.extents.x1,
1949					 clip.extents.y2 - clip.extents.y1,
1950					 flags, memset(&tmp, 0, sizeof(tmp)))) {
1951		DBG(("%s: fallback -- composite spans render op not supported\n",
1952		     __FUNCTION__));
1953		return false;
1954	}
1955
1956	dx *= FAST_SAMPLES_X;
1957	dy *= FAST_SAMPLES_Y;
1958
1959	num_threads = 1;
1960	if (!NO_GPU_THREADS &&
1961	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
1962	    tmp.thread_boxes &&
1963	    thread_choose_span(&tmp, dst, maskFormat, &clip))
1964		num_threads = sna_use_threads(clip.extents.x2-clip.extents.x1,
1965					      clip.extents.y2-clip.extents.y1,
1966					      16);
1967	DBG(("%s: using %d threads\n", __FUNCTION__, num_threads));
1968	if (num_threads == 1) {
1969		struct tor tor;
1970
1971		if (!tor_init(&tor, &clip.extents, 2*ntrap))
1972			goto skip;
1973
1974		for (n = 0; n < ntrap; n++) {
1975			if (pixman_fixed_integer_floor(traps[n].top) + dst->pDrawable->y >= clip.extents.y2 ||
1976			    pixman_fixed_integer_ceil(traps[n].bottom) + dst->pDrawable->y <= clip.extents.y1)
1977				continue;
1978
1979			tor_add_trapezoid(&tor, &traps[n], dx, dy);
1980		}
1981
1982		tor_render(sna, &tor, &tmp, &clip,
1983			   choose_span(&tmp, dst, maskFormat, &clip),
1984			   !was_clear && maskFormat && !operator_is_bounded(op));
1985
1986		tor_fini(&tor);
1987	} else {
1988		struct span_thread threads[num_threads];
1989		int y, h;
1990
1991		DBG(("%s: using %d threads for span compositing %dx%d\n",
1992		     __FUNCTION__, num_threads,
1993		     clip.extents.x2 - clip.extents.x1,
1994		     clip.extents.y2 - clip.extents.y1));
1995
1996		threads[0].sna = sna;
1997		threads[0].op = &tmp;
1998		threads[0].traps = traps;
1999		threads[0].ntrap = ntrap;
2000		threads[0].extents = clip.extents;
2001		threads[0].clip = &clip;
2002		threads[0].dx = dx;
2003		threads[0].dy = dy;
2004		threads[0].draw_y = dst->pDrawable->y;
2005		threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
2006		threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
2007
2008		y = clip.extents.y1;
2009		h = clip.extents.y2 - clip.extents.y1;
2010		h = (h + num_threads - 1) / num_threads;
2011		num_threads -= (num_threads-1) * h >= clip.extents.y2 - clip.extents.y1;
2012
2013		for (n = 1; n < num_threads; n++) {
2014			threads[n] = threads[0];
2015			threads[n].extents.y1 = y;
2016			threads[n].extents.y2 = y += h;
2017
2018			sna_threads_run(n, span_thread, &threads[n]);
2019		}
2020
2021		assert(y < threads[0].extents.y2);
2022		threads[0].extents.y1 = y;
2023		span_thread(&threads[0]);
2024
2025		sna_threads_wait();
2026	}
2027skip:
2028	tmp.done(sna, &tmp);
2029
2030	REGION_UNINIT(NULL, &clip);
2031	return true;
2032}
2033
2034static void
2035tor_blt_mask(struct sna *sna,
2036	     struct sna_composite_spans_op *op,
2037	     pixman_region16_t *clip,
2038	     const BoxRec *box,
2039	     int coverage)
2040{
2041	uint8_t *ptr = (uint8_t *)op;
2042	int stride = (intptr_t)clip;
2043	int h, w;
2044
2045	coverage = 256 * coverage / FAST_SAMPLES_XY;
2046	coverage -= coverage >> 8;
2047
2048	ptr += box->y1 * stride + box->x1;
2049
2050	h = box->y2 - box->y1;
2051	w = box->x2 - box->x1;
2052	if ((w | h) == 1) {
2053		*ptr = coverage;
2054	} else if (w == 1) {
2055		do {
2056			*ptr = coverage;
2057			ptr += stride;
2058		} while (--h);
2059	} else do {
2060		memset(ptr, coverage, w);
2061		ptr += stride;
2062	} while (--h);
2063}
2064
2065static void
2066tor_blt_mask_mono(struct sna *sna,
2067		  struct sna_composite_spans_op *op,
2068		  pixman_region16_t *clip,
2069		  const BoxRec *box,
2070		  int coverage)
2071{
2072	tor_blt_mask(sna, op, clip, box,
2073		     coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
2074}
2075
2076bool
2077imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
2078				   PictFormatPtr maskFormat, unsigned flags,
2079				   INT16 src_x, INT16 src_y,
2080				   int ntrap, xTrapezoid *traps)
2081{
2082	struct tor tor;
2083	ScreenPtr screen = dst->pDrawable->pScreen;
2084	PixmapPtr scratch;
2085	PicturePtr mask;
2086	BoxRec extents;
2087	int16_t dst_x, dst_y;
2088	int dx, dy;
2089	int error, n;
2090
2091	if (NO_IMPRECISE)
2092		return false;
2093
2094	if (maskFormat == NULL && ntrap > 1) {
2095		DBG(("%s: individual rasterisation requested\n",
2096		     __FUNCTION__));
2097		do {
2098			/* XXX unwind errors? */
2099			if (!imprecise_trapezoid_mask_converter(op, src, dst, NULL, flags,
2100								src_x, src_y, 1, traps++))
2101				return false;
2102		} while (--ntrap);
2103		return true;
2104	}
2105
2106	if (!trapezoids_bounds(ntrap, traps, &extents))
2107		return true;
2108
2109	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
2110	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
2111
2112	if (!sna_compute_composite_extents(&extents,
2113					   src, NULL, dst,
2114					   src_x, src_y,
2115					   0, 0,
2116					   extents.x1, extents.y1,
2117					   extents.x2 - extents.x1,
2118					   extents.y2 - extents.y1))
2119		return true;
2120
2121	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2122	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
2123
2124	extents.y2 -= extents.y1;
2125	extents.x2 -= extents.x1;
2126	extents.x1 -= dst->pDrawable->x;
2127	extents.y1 -= dst->pDrawable->y;
2128	dst_x = extents.x1;
2129	dst_y = extents.y1;
2130	dx = -extents.x1 * FAST_SAMPLES_X;
2131	dy = -extents.y1 * FAST_SAMPLES_Y;
2132	extents.x1 = extents.y1 = 0;
2133
2134	DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
2135	     __FUNCTION__, extents.x2, extents.y2, dx, dy));
2136	scratch = sna_pixmap_create_upload(screen,
2137					   extents.x2, extents.y2, 8,
2138					   KGEM_BUFFER_WRITE_INPLACE);
2139	if (!scratch)
2140		return true;
2141
2142	DBG(("%s: created buffer %p, stride %d\n",
2143	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
2144
2145	if (!tor_init(&tor, &extents, 2*ntrap)) {
2146		sna_pixmap_destroy(scratch);
2147		return true;
2148	}
2149
2150	for (n = 0; n < ntrap; n++) {
2151		if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
2152		    pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
2153			continue;
2154
2155		tor_add_trapezoid(&tor, &traps[n], dx, dy);
2156	}
2157
2158	if (extents.x2 <= TOR_INPLACE_SIZE) {
2159		uint8_t buf[TOR_INPLACE_SIZE];
2160		tor_inplace(&tor, scratch, is_mono(dst, maskFormat),
2161			    scratch->usage_hint ? NULL : buf);
2162	} else {
2163		tor_render(NULL, &tor,
2164			   scratch->devPrivate.ptr,
2165			   (void *)(intptr_t)scratch->devKind,
2166			   is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
2167			   true);
2168	}
2169	tor_fini(&tor);
2170
2171	mask = CreatePicture(0, &scratch->drawable,
2172			     PictureMatchFormat(screen, 8, PICT_a8),
2173			     0, 0, serverClient, &error);
2174	if (mask) {
2175		int16_t x0, y0;
2176
2177		trapezoid_origin(&traps[0].left, &x0, &y0);
2178
2179		CompositePicture(op, src, mask, dst,
2180				 src_x + dst_x - x0,
2181				 src_y + dst_y - y0,
2182				 0, 0,
2183				 dst_x, dst_y,
2184				 extents.x2, extents.y2);
2185		FreePicture(mask, 0);
2186	}
2187	sna_pixmap_destroy(scratch);
2188
2189	return true;
2190}
2191
2192struct inplace {
2193	uint8_t *ptr;
2194	uint32_t stride;
2195	union {
2196		uint8_t opacity;
2197		uint32_t color;
2198	};
2199};
2200
2201static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity)
2202{
2203	coverage = coverage * 256 / FAST_SAMPLES_XY;
2204	coverage -= coverage >> 8;
2205	return opacity == 255 ? coverage : mul_8_8(coverage, opacity);
2206}
2207
2208static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v)
2209{
2210	uint8_t *ptr = in->ptr;
2211	int h, w;
2212
2213	ptr += box->y1 * in->stride + box->x1;
2214
2215	h = box->y2 - box->y1;
2216	w = box->x2 - box->x1;
2217	if ((w | h) == 1) {
2218		*ptr = v;
2219	} else if (w == 1) {
2220		do {
2221			*ptr = v;
2222			ptr += in->stride;
2223		} while (--h);
2224	} else do {
2225		memset(ptr, v, w);
2226		ptr += in->stride;
2227	} while (--h);
2228}
2229
2230struct clipped_span {
2231	span_func_t span;
2232	const BoxRec *clip_start, *clip_end;
2233};
2234
2235static void
2236tor_blt_clipped(struct sna *sna,
2237		struct sna_composite_spans_op *op,
2238		pixman_region16_t *clip,
2239		const BoxRec *box,
2240		int coverage)
2241{
2242	struct clipped_span *cs = (struct clipped_span *)clip;
2243	const BoxRec *c;
2244
2245	cs->clip_start =
2246		find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1);
2247
2248	c = cs->clip_start;
2249	while (c != cs->clip_end) {
2250		BoxRec clipped;
2251
2252		if (box->y2 <= c->y1)
2253			break;
2254
2255		clipped = *box;
2256		if (!box_intersect(&clipped, c++))
2257			continue;
2258
2259		cs->span(sna, op, NULL, &clipped, coverage);
2260	}
2261}
2262
2263inline static span_func_t
2264clipped_span(struct clipped_span *cs,
2265	     span_func_t span,
2266	     const RegionRec *clip)
2267{
2268	if (clip->data) {
2269		cs->span = span;
2270		region_get_boxes(clip, &cs->clip_start, &cs->clip_end);
2271		span = tor_blt_clipped;
2272	}
2273	return span;
2274}
2275
2276static void
2277tor_blt_src(struct sna *sna,
2278	    struct sna_composite_spans_op *op,
2279	    pixman_region16_t *clip,
2280	    const BoxRec *box,
2281	    int coverage)
2282{
2283	struct inplace *in = (struct inplace *)op;
2284
2285	_tor_blt_src(in, box, coverage_opacity(coverage, in->opacity));
2286}
2287
2288static void
2289tor_blt_in(struct sna *sna,
2290	   struct sna_composite_spans_op *op,
2291	   pixman_region16_t *clip,
2292	   const BoxRec *box,
2293	   int coverage)
2294{
2295	struct inplace *in = (struct inplace *)op;
2296	uint8_t *ptr = in->ptr;
2297	int h, w, i;
2298
2299	if (coverage == 0) {
2300		_tor_blt_src(in, box, 0);
2301		return;
2302	}
2303
2304	coverage = coverage_opacity(coverage, in->opacity);
2305	if (coverage == 0xff)
2306		return;
2307
2308	ptr += box->y1 * in->stride + box->x1;
2309
2310	h = box->y2 - box->y1;
2311	w = box->x2 - box->x1;
2312	do {
2313		for (i = 0; i < w; i++)
2314			ptr[i] = mul_8_8(ptr[i], coverage);
2315		ptr += in->stride;
2316	} while (--h);
2317}
2318
2319static void
2320tor_blt_add(struct sna *sna,
2321	    struct sna_composite_spans_op *op,
2322	    pixman_region16_t *clip,
2323	    const BoxRec *box,
2324	    int coverage)
2325{
2326	struct inplace *in = (struct inplace *)op;
2327	uint8_t *ptr = in->ptr;
2328	int h, w, v, i;
2329
2330	if (coverage == 0)
2331		return;
2332
2333	coverage = coverage_opacity(coverage, in->opacity);
2334	if (coverage == 0xff) {
2335		_tor_blt_src(in, box, 0xff);
2336		return;
2337	}
2338
2339	ptr += box->y1 * in->stride + box->x1;
2340
2341	h = box->y2 - box->y1;
2342	w = box->x2 - box->x1;
2343	if ((w | h) == 1) {
2344		v = coverage + *ptr;
2345		*ptr = v >= 255 ? 255 : v;
2346	} else {
2347		do {
2348			for (i = 0; i < w; i++) {
2349				v = coverage + ptr[i];
2350				ptr[i] = v >= 255 ? 255 : v;
2351			}
2352			ptr += in->stride;
2353		} while (--h);
2354	}
2355}
2356
2357static void
2358tor_blt_lerp32(struct sna *sna,
2359	       struct sna_composite_spans_op *op,
2360	       pixman_region16_t *clip,
2361	       const BoxRec *box,
2362	       int coverage)
2363{
2364	struct inplace *in = (struct inplace *)op;
2365	uint32_t *ptr = (uint32_t *)in->ptr;
2366	int stride = in->stride / sizeof(uint32_t);
2367	int h, w, i;
2368
2369	if (coverage == 0)
2370		return;
2371
2372	sigtrap_assert_active();
2373	ptr += box->y1 * stride + box->x1;
2374
2375	h = box->y2 - box->y1;
2376	w = box->x2 - box->x1;
2377	if (coverage == FAST_SAMPLES_XY) {
2378		if ((w | h) == 1) {
2379			*ptr = in->color;
2380		} else {
2381			if (w < 16) {
2382				do {
2383					for (i = 0; i < w; i++)
2384						ptr[i] = in->color;
2385					ptr += stride;
2386				} while (--h);
2387			} else {
2388				pixman_fill(ptr, stride, 32,
2389					    0, 0, w, h, in->color);
2390			}
2391		}
2392	} else {
2393		coverage = coverage * 256 / FAST_SAMPLES_XY;
2394		coverage -= coverage >> 8;
2395
2396		if ((w | h) == 1) {
2397			*ptr = lerp8x4(in->color, coverage, *ptr);
2398		} else if (w == 1) {
2399			do {
2400				*ptr = lerp8x4(in->color, coverage, *ptr);
2401				ptr += stride;
2402			} while (--h);
2403		} else{
2404			do {
2405				for (i = 0; i < w; i++)
2406					ptr[i] = lerp8x4(in->color, coverage, ptr[i]);
2407				ptr += stride;
2408			} while (--h);
2409		}
2410	}
2411}
2412
2413struct pixman_inplace {
2414	pixman_image_t *image, *source, *mask;
2415	uint32_t color;
2416	uint32_t *bits;
2417	int dx, dy;
2418	int sx, sy;
2419	uint8_t op;
2420};
2421
2422static void
2423pixmask_span_solid(struct sna *sna,
2424		   struct sna_composite_spans_op *op,
2425		   pixman_region16_t *clip,
2426		   const BoxRec *box,
2427		   int coverage)
2428{
2429	struct pixman_inplace *pi = (struct pixman_inplace *)op;
2430	if (coverage != FAST_SAMPLES_XY) {
2431		coverage = coverage * 256 / FAST_SAMPLES_XY;
2432		coverage -= coverage >> 8;
2433		*pi->bits = mul_4x8_8(pi->color, coverage);
2434	} else
2435		*pi->bits = pi->color;
2436	pixman_image_composite(pi->op, pi->source, NULL, pi->image,
2437			       box->x1, box->y1,
2438			       0, 0,
2439			       pi->dx + box->x1, pi->dy + box->y1,
2440			       box->x2 - box->x1, box->y2 - box->y1);
2441}
2442
2443static void
2444pixmask_span(struct sna *sna,
2445	     struct sna_composite_spans_op *op,
2446	     pixman_region16_t *clip,
2447	     const BoxRec *box,
2448	     int coverage)
2449{
2450	struct pixman_inplace *pi = (struct pixman_inplace *)op;
2451	pixman_image_t *mask = NULL;
2452	if (coverage != FAST_SAMPLES_XY) {
2453		coverage = coverage * 256 / FAST_SAMPLES_XY;
2454		coverage -= coverage >> 8;
2455		*pi->bits = coverage;
2456		mask = pi->mask;
2457	}
2458	pixman_image_composite(pi->op, pi->source, mask, pi->image,
2459			       pi->sx + box->x1, pi->sy + box->y1,
2460			       0, 0,
2461			       pi->dx + box->x1, pi->dy + box->y1,
2462			       box->x2 - box->x1, box->y2 - box->y1);
2463}
2464
2465struct inplace_x8r8g8b8_thread {
2466	xTrapezoid *traps;
2467	PicturePtr dst, src;
2468	BoxRec extents;
2469	int dx, dy;
2470	int ntrap;
2471	bool lerp, is_solid;
2472	uint32_t color;
2473	int16_t src_x, src_y;
2474	uint8_t op;
2475};
2476
2477static void inplace_x8r8g8b8_thread(void *arg)
2478{
2479	struct inplace_x8r8g8b8_thread *thread = arg;
2480	struct tor tor;
2481	span_func_t span;
2482	struct clipped_span clipped;
2483	RegionPtr clip;
2484	int y1, y2, n;
2485
2486	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2487		return;
2488
2489	y1 = thread->extents.y1 - thread->dst->pDrawable->y;
2490	y2 = thread->extents.y2 - thread->dst->pDrawable->y;
2491	for (n = 0; n < thread->ntrap; n++) {
2492		if (pixman_fixed_to_int(thread->traps[n].top) >= y2 ||
2493		    pixman_fixed_to_int(thread->traps[n].bottom) < y1)
2494			continue;
2495
2496		tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
2497	}
2498
2499	clip = thread->dst->pCompositeClip;
2500	if (thread->lerp) {
2501		struct inplace inplace;
2502		int16_t dst_x, dst_y;
2503		PixmapPtr pixmap;
2504
2505		pixmap = get_drawable_pixmap(thread->dst->pDrawable);
2506
2507		inplace.ptr = pixmap->devPrivate.ptr;
2508		if (get_drawable_deltas(thread->dst->pDrawable, pixmap, &dst_x, &dst_y))
2509			inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
2510		inplace.stride = pixmap->devKind;
2511		inplace.color = thread->color;
2512
2513		span = clipped_span(&clipped, tor_blt_lerp32, clip);
2514
2515		tor_render(NULL, &tor,
2516			   (void*)&inplace, (void*)&clipped,
2517			   span, false);
2518	} else if (thread->is_solid) {
2519		struct pixman_inplace pi;
2520
2521		pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
2522		pi.op = thread->op;
2523		pi.color = thread->color;
2524
2525		pi.bits = (uint32_t *)&pi.sx;
2526		pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
2527						     1, 1, pi.bits, 0);
2528		pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
2529
2530		span = clipped_span(&clipped, pixmask_span_solid, clip);
2531
2532		tor_render(NULL, &tor,
2533			   (void*)&pi, (void *)&clipped,
2534			   span, false);
2535
2536		pixman_image_unref(pi.source);
2537		pixman_image_unref(pi.image);
2538	} else {
2539		struct pixman_inplace pi;
2540		int16_t x0, y0;
2541
2542		trapezoid_origin(&thread->traps[0].left, &x0, &y0);
2543
2544		pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
2545		pi.source = image_from_pict(thread->src, false, &pi.sx, &pi.sy);
2546		pi.sx += thread->src_x - x0;
2547		pi.sy += thread->src_y - y0;
2548		pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
2549		pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
2550		pi.bits = pixman_image_get_data(pi.mask);
2551		pi.op = thread->op;
2552
2553		span = clipped_span(&clipped, pixmask_span, clip);
2554
2555		tor_render(NULL, &tor,
2556			   (void*)&pi, (void *)&clipped,
2557			   span, false);
2558
2559		pixman_image_unref(pi.mask);
2560		pixman_image_unref(pi.source);
2561		pixman_image_unref(pi.image);
2562	}
2563
2564	tor_fini(&tor);
2565}
2566
2567static bool
2568trapezoid_span_inplace__x8r8g8b8(CARD8 op,
2569				 PicturePtr dst,
2570				 PicturePtr src, int16_t src_x, int16_t src_y,
2571				 PictFormatPtr maskFormat,
2572				 int ntrap, xTrapezoid *traps)
2573{
2574	uint32_t color;
2575	bool lerp, is_solid;
2576	RegionRec region;
2577	int dx, dy;
2578	int num_threads, n;
2579
2580	lerp = false;
2581	is_solid = sna_picture_is_solid(src, &color);
2582	if (is_solid) {
2583		if (op == PictOpOver && (color >> 24) == 0xff)
2584			op = PictOpSrc;
2585		if (op == PictOpOver && sna_drawable_is_clear(dst->pDrawable))
2586			op = PictOpSrc;
2587		lerp = op == PictOpSrc;
2588	}
2589	if (!lerp) {
2590		switch (op) {
2591		case PictOpOver:
2592		case PictOpAdd:
2593		case PictOpOutReverse:
2594			break;
2595		case PictOpSrc:
2596			if (!sna_drawable_is_clear(dst->pDrawable))
2597				return false;
2598			break;
2599		default:
2600			return false;
2601		}
2602	}
2603
2604	if (maskFormat == NULL && ntrap > 1) {
2605		DBG(("%s: individual rasterisation requested\n",
2606		     __FUNCTION__));
2607		do {
2608			/* XXX unwind errors? */
2609			if (!trapezoid_span_inplace__x8r8g8b8(op, dst,
2610							      src, src_x, src_y,
2611							      NULL, 1, traps++))
2612				return false;
2613		} while (--ntrap);
2614		return true;
2615	}
2616
2617	if (!trapezoids_bounds(ntrap, traps, &region.extents))
2618		return true;
2619
2620	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2621	     __FUNCTION__,
2622	     region.extents.x1, region.extents.y1,
2623	     region.extents.x2, region.extents.y2));
2624
2625	if (!sna_compute_composite_extents(&region.extents,
2626					   src, NULL, dst,
2627					   src_x, src_y,
2628					   0, 0,
2629					   region.extents.x1, region.extents.y1,
2630					   region.extents.x2 - region.extents.x1,
2631					   region.extents.y2 - region.extents.y1))
2632		return true;
2633
2634	DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
2635	     __FUNCTION__,
2636	     region.extents.x1, region.extents.y1,
2637	     region.extents.x2, region.extents.y2));
2638
2639	region.data = NULL;
2640	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
2641					    MOVE_WRITE | MOVE_READ))
2642		return true;
2643
2644	if (!is_solid && src->pDrawable) {
2645		if (!sna_drawable_move_to_cpu(src->pDrawable,
2646					      MOVE_READ))
2647			return true;
2648
2649		if (src->alphaMap &&
2650		    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
2651					      MOVE_READ))
2652			return true;
2653	}
2654
2655	dx = dst->pDrawable->x * FAST_SAMPLES_X;
2656	dy = dst->pDrawable->y * FAST_SAMPLES_Y;
2657
2658	num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
2659				      region.extents.y2 - region.extents.y1,
2660				      16);
2661
2662	DBG(("%s: %dx%d, format=%x, op=%d, lerp?=%d, num_threads=%d\n",
2663	     __FUNCTION__,
2664	     region.extents.x2 - region.extents.x1,
2665	     region.extents.y2 - region.extents.y1,
2666	     dst->format, op, lerp, num_threads));
2667
2668	if (num_threads == 1) {
2669		struct tor tor;
2670		span_func_t span;
2671		struct clipped_span clipped;
2672
2673		if (!tor_init(&tor, &region.extents, 2*ntrap))
2674			return true;
2675
2676		for (n = 0; n < ntrap; n++) {
2677			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
2678			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
2679				continue;
2680
2681			tor_add_trapezoid(&tor, &traps[n], dx, dy);
2682		}
2683
2684		if (lerp) {
2685			struct inplace inplace;
2686			PixmapPtr pixmap;
2687			int16_t dst_x, dst_y;
2688
2689			pixmap = get_drawable_pixmap(dst->pDrawable);
2690
2691			inplace.ptr = pixmap->devPrivate.ptr;
2692			if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
2693				inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
2694			inplace.stride = pixmap->devKind;
2695			inplace.color = color;
2696
2697			span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip);
2698
2699			DBG(("%s: render inplace op=%d, color=%08x\n",
2700			     __FUNCTION__, op, color));
2701
2702			if (sigtrap_get() == 0) {
2703				tor_render(NULL, &tor,
2704					   (void*)&inplace, (void*)&clipped,
2705					   span, false);
2706				sigtrap_put();
2707			}
2708		} else if (is_solid) {
2709			struct pixman_inplace pi;
2710
2711			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
2712			pi.op = op;
2713			pi.color = color;
2714
2715			pi.bits = (uint32_t *)&pi.sx;
2716			pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
2717							     1, 1, pi.bits, 0);
2718			pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
2719
2720			span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip);
2721
2722			if (sigtrap_get() == 0) {
2723				tor_render(NULL, &tor,
2724					   (void*)&pi, (void*)&clipped,
2725					   span, false);
2726				sigtrap_put();
2727			}
2728
2729			pixman_image_unref(pi.source);
2730			pixman_image_unref(pi.image);
2731		} else {
2732			struct pixman_inplace pi;
2733			int16_t x0, y0;
2734
2735			trapezoid_origin(&traps[0].left, &x0, &y0);
2736
2737			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
2738			pi.source = image_from_pict(src, false, &pi.sx, &pi.sy);
2739			pi.sx += src_x - x0;
2740			pi.sy += src_y - y0;
2741			pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
2742			pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
2743			pi.bits = pixman_image_get_data(pi.mask);
2744			pi.op = op;
2745
2746			span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip);
2747
2748			if (sigtrap_get() == 0) {
2749				tor_render(NULL, &tor,
2750					   (void*)&pi, (void*)&clipped,
2751					   span, false);
2752				sigtrap_put();
2753			}
2754
2755			pixman_image_unref(pi.mask);
2756			pixman_image_unref(pi.source);
2757			pixman_image_unref(pi.image);
2758		}
2759
2760		tor_fini(&tor);
2761	} else {
2762		struct inplace_x8r8g8b8_thread threads[num_threads];
2763		int y, h;
2764
2765		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
2766		     __FUNCTION__, num_threads,
2767		     region.extents.x2 - region.extents.x1,
2768		     region.extents.y2 - region.extents.y1));
2769
2770		threads[0].traps = traps;
2771		threads[0].ntrap = ntrap;
2772		threads[0].extents = region.extents;
2773		threads[0].lerp = lerp;
2774		threads[0].is_solid = is_solid;
2775		threads[0].color = color;
2776		threads[0].dx = dx;
2777		threads[0].dy = dy;
2778		threads[0].dst = dst;
2779		threads[0].src = src;
2780		threads[0].op = op;
2781		threads[0].src_x = src_x;
2782		threads[0].src_y = src_y;
2783
2784		y = region.extents.y1;
2785		h = region.extents.y2 - region.extents.y1;
2786		h = (h + num_threads - 1) / num_threads;
2787		num_threads -= (num_threads-1) * h >= region.extents.y2 - region.extents.y1;
2788
2789		if (sigtrap_get() == 0) {
2790			for (n = 1; n < num_threads; n++) {
2791				threads[n] = threads[0];
2792				threads[n].extents.y1 = y;
2793				threads[n].extents.y2 = y += h;
2794
2795				sna_threads_run(n, inplace_x8r8g8b8_thread, &threads[n]);
2796			}
2797
2798			assert(y < threads[0].extents.y2);
2799			threads[0].extents.y1 = y;
2800			inplace_x8r8g8b8_thread(&threads[0]);
2801
2802			sna_threads_wait();
2803			sigtrap_put();
2804		} else
2805			sna_threads_kill(); /* leaks thread allocations */
2806	}
2807
2808	return true;
2809}
2810
2811struct inplace_thread {
2812	xTrapezoid *traps;
2813	span_func_t span;
2814	struct inplace inplace;
2815	struct clipped_span clipped;
2816	BoxRec extents;
2817	int dx, dy;
2818	int draw_x, draw_y;
2819	bool unbounded;
2820	int ntrap;
2821};
2822
2823static void inplace_thread(void *arg)
2824{
2825	struct inplace_thread *thread = arg;
2826	struct tor tor;
2827	int n;
2828
2829	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2830		return;
2831
2832	for (n = 0; n < thread->ntrap; n++) {
2833		if (pixman_fixed_to_int(thread->traps[n].top) >= thread->extents.y2 - thread->draw_y ||
2834		    pixman_fixed_to_int(thread->traps[n].bottom) < thread->extents.y1 - thread->draw_y)
2835			continue;
2836
2837		tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
2838	}
2839
2840	tor_render(NULL, &tor,
2841		   (void*)&thread->inplace, (void*)&thread->clipped,
2842		   thread->span, thread->unbounded);
2843
2844	tor_fini(&tor);
2845}
2846
2847bool
2848imprecise_trapezoid_span_inplace(struct sna *sna,
2849				 CARD8 op, PicturePtr src, PicturePtr dst,
2850				 PictFormatPtr maskFormat, unsigned flags,
2851				 INT16 src_x, INT16 src_y,
2852				 int ntrap, xTrapezoid *traps,
2853				 bool fallback)
2854{
2855	struct inplace inplace;
2856	struct clipped_span clipped;
2857	span_func_t span;
2858	PixmapPtr pixmap;
2859	struct sna_pixmap *priv;
2860	RegionRec region;
2861	uint32_t color;
2862	bool unbounded;
2863	int16_t dst_x, dst_y;
2864	int dx, dy;
2865	int num_threads, n;
2866
2867	if (NO_IMPRECISE)
2868		return false;
2869
2870	if (dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8)
2871		return trapezoid_span_inplace__x8r8g8b8(op, dst,
2872							src, src_x, src_y,
2873							maskFormat,
2874							ntrap, traps);
2875
2876	if (!sna_picture_is_solid(src, &color)) {
2877		DBG(("%s: fallback -- can not perform operation in place, requires solid source\n",
2878		     __FUNCTION__));
2879		return false;
2880	}
2881
2882	if (dst->format != PICT_a8) {
2883		DBG(("%s: fallback -- can not perform operation in place, format=%x\n",
2884		     __FUNCTION__, dst->format));
2885		return false;
2886	}
2887
2888	pixmap = get_drawable_pixmap(dst->pDrawable);
2889
2890	unbounded = false;
2891	priv = sna_pixmap(pixmap);
2892	if (priv) {
2893		switch (op) {
2894		case PictOpAdd:
2895			if (priv->clear && priv->clear_color == 0) {
2896				unbounded = true;
2897				op = PictOpSrc;
2898			}
2899			if ((color >> 24) == 0)
2900				return true;
2901			break;
2902		case PictOpIn:
2903			if (priv->clear && priv->clear_color == 0)
2904				return true;
2905			if (priv->clear && priv->clear_color == 0xff)
2906				op = PictOpSrc;
2907			unbounded = true;
2908			break;
2909		case PictOpSrc:
2910			unbounded = true;
2911			break;
2912		default:
2913			DBG(("%s: fallback -- can not perform op [%d] in place\n",
2914			     __FUNCTION__, op));
2915			return false;
2916		}
2917	} else {
2918		switch (op) {
2919		case PictOpAdd:
2920			if ((color >> 24) == 0)
2921				return true;
2922			break;
2923		case PictOpIn:
2924		case PictOpSrc:
2925			unbounded = true;
2926			break;
2927		default:
2928			DBG(("%s: fallback -- can not perform op [%d] in place\n",
2929			     __FUNCTION__, op));
2930			return false;
2931		}
2932	}
2933
2934	DBG(("%s: format=%x, op=%d, color=%x\n",
2935	     __FUNCTION__, dst->format, op, color));
2936
2937	if (maskFormat == NULL && ntrap > 1) {
2938		DBG(("%s: individual rasterisation requested\n",
2939		     __FUNCTION__));
2940		do {
2941			/* XXX unwind errors? */
2942			if (!imprecise_trapezoid_span_inplace(sna, op, src, dst, NULL, flags,
2943							      src_x, src_y, 1, traps++,
2944							      fallback))
2945				return false;
2946		} while (--ntrap);
2947		return true;
2948	}
2949
2950	if (!trapezoids_bounds(ntrap, traps, &region.extents))
2951		return true;
2952
2953	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2954	     __FUNCTION__,
2955	     region.extents.x1, region.extents.y1,
2956	     region.extents.x2, region.extents.y2));
2957
2958	if (!sna_compute_composite_extents(&region.extents,
2959					   NULL, NULL, dst,
2960					   0, 0,
2961					   0, 0,
2962					   region.extents.x1, region.extents.y1,
2963					   region.extents.x2 - region.extents.x1,
2964					   region.extents.y2 - region.extents.y1))
2965		return true;
2966
2967	DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
2968	     __FUNCTION__,
2969	     region.extents.x1, region.extents.y1,
2970	     region.extents.x2, region.extents.y2));
2971
2972	if (op == PictOpSrc) {
2973		span = tor_blt_src;
2974	} else if (op == PictOpIn) {
2975		span = tor_blt_in;
2976	} else {
2977		assert(op == PictOpAdd);
2978		span = tor_blt_add;
2979	}
2980
2981	DBG(("%s: move-to-cpu\n", __FUNCTION__));
2982	region.data = NULL;
2983	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
2984					     op == PictOpSrc ? MOVE_WRITE | MOVE_INPLACE_HINT : MOVE_WRITE | MOVE_READ))
2985		return true;
2986
2987	dx = dst->pDrawable->x * FAST_SAMPLES_X;
2988	dy = dst->pDrawable->y * FAST_SAMPLES_Y;
2989
2990	inplace.ptr = pixmap->devPrivate.ptr;
2991	if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
2992		inplace.ptr += dst_y * pixmap->devKind + dst_x;
2993	inplace.stride = pixmap->devKind;
2994	inplace.opacity = color >> 24;
2995
2996	span = clipped_span(&clipped, span, dst->pCompositeClip);
2997
2998	num_threads = 1;
2999	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
3000		num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
3001					      region.extents.y2 - region.extents.y1,
3002					      16);
3003	if (num_threads == 1) {
3004		struct tor tor;
3005
3006		if (!tor_init(&tor, &region.extents, 2*ntrap))
3007			return true;
3008
3009		for (n = 0; n < ntrap; n++) {
3010			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
3011			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
3012				continue;
3013
3014			tor_add_trapezoid(&tor, &traps[n], dx, dy);
3015		}
3016
3017		if (sigtrap_get() == 0) {
3018			tor_render(NULL, &tor,
3019				   (void*)&inplace, (void *)&clipped,
3020				   span, unbounded);
3021			sigtrap_put();
3022		}
3023
3024		tor_fini(&tor);
3025	} else {
3026		struct inplace_thread threads[num_threads];
3027		int y, h;
3028
3029		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
3030		     __FUNCTION__, num_threads,
3031		     region.extents.x2 - region.extents.x1,
3032		     region.extents.y2 - region.extents.y1));
3033
3034		threads[0].traps = traps;
3035		threads[0].ntrap = ntrap;
3036		threads[0].inplace = inplace;
3037		threads[0].clipped = clipped;
3038		threads[0].extents = region.extents;
3039		threads[0].span = span;
3040		threads[0].unbounded = unbounded;
3041		threads[0].dx = dx;
3042		threads[0].dy = dy;
3043		threads[0].draw_x = dst->pDrawable->x;
3044		threads[0].draw_y = dst->pDrawable->y;
3045
3046		y = region.extents.y1;
3047		h = region.extents.y2 - region.extents.y1;
3048		h = (h + num_threads - 1) / num_threads;
3049		num_threads -= (num_threads-1) * h >= region.extents.y2 - region.extents.y1;
3050
3051		if (sigtrap_get() == 0) {
3052			for (n = 1; n < num_threads; n++) {
3053				threads[n] = threads[0];
3054				threads[n].extents.y1 = y;
3055				threads[n].extents.y2 = y += h;
3056
3057				sna_threads_run(n, inplace_thread, &threads[n]);
3058			}
3059
3060			assert(y < threads[0].extents.y2);
3061			threads[0].extents.y1 = y;
3062			inplace_thread(&threads[0]);
3063
3064			sna_threads_wait();
3065			sigtrap_put();
3066		} else
3067			sna_threads_kill(); /* leaks thread allocations */
3068	}
3069
3070	return true;
3071}
3072
3073bool
3074imprecise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
3075				  PictFormatPtr maskFormat, unsigned flags,
3076				  INT16 src_x, INT16 src_y,
3077				  int ntrap, xTrapezoid *traps)
3078{
3079	struct tor tor;
3080	ScreenPtr screen = dst->pDrawable->pScreen;
3081	PixmapPtr scratch;
3082	PicturePtr mask;
3083	BoxRec extents;
3084	int16_t dst_x, dst_y;
3085	int dx, dy;
3086	int error, n;
3087
3088	if (NO_IMPRECISE)
3089		return false;
3090
3091	if (maskFormat == NULL && ntrap > 1) {
3092		DBG(("%s: individual rasterisation requested\n",
3093		     __FUNCTION__));
3094		do {
3095			/* XXX unwind errors? */
3096			if (!imprecise_trapezoid_span_fallback(op, src, dst, NULL, flags,
3097							       src_x, src_y, 1, traps++))
3098				return false;
3099		} while (--ntrap);
3100		return true;
3101	}
3102
3103	if (!trapezoids_bounds(ntrap, traps, &extents))
3104		return true;
3105
3106	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
3107	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
3108
3109	if (!sna_compute_composite_extents(&extents,
3110					   src, NULL, dst,
3111					   src_x, src_y,
3112					   0, 0,
3113					   extents.x1, extents.y1,
3114					   extents.x2 - extents.x1,
3115					   extents.y2 - extents.y1))
3116		return true;
3117
3118	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3119	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3120
3121	extents.y2 -= extents.y1;
3122	extents.x2 -= extents.x1;
3123	extents.x1 -= dst->pDrawable->x;
3124	extents.y1 -= dst->pDrawable->y;
3125	dst_x = extents.x1;
3126	dst_y = extents.y1;
3127	dx = -extents.x1 * FAST_SAMPLES_X;
3128	dy = -extents.y1 * FAST_SAMPLES_Y;
3129	extents.x1 = extents.y1 = 0;
3130
3131	DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
3132	     __FUNCTION__, extents.x2, extents.y2, dx, dy));
3133	scratch = sna_pixmap_create_unattached(screen,
3134					       extents.x2, extents.y2, 8);
3135	if (!scratch)
3136		return true;
3137
3138	DBG(("%s: created buffer %p, stride %d\n",
3139	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
3140
3141	if (!tor_init(&tor, &extents, 2*ntrap)) {
3142		sna_pixmap_destroy(scratch);
3143		return true;
3144	}
3145
3146	for (n = 0; n < ntrap; n++) {
3147		if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
3148		    pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
3149			continue;
3150
3151		tor_add_trapezoid(&tor, &traps[n], dx, dy);
3152	}
3153
3154	if (extents.x2 <= TOR_INPLACE_SIZE) {
3155		tor_inplace(&tor, scratch, is_mono(dst, maskFormat), NULL);
3156	} else {
3157		tor_render(NULL, &tor,
3158			   scratch->devPrivate.ptr,
3159			   (void *)(intptr_t)scratch->devKind,
3160			   is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
3161			   true);
3162	}
3163	tor_fini(&tor);
3164
3165	mask = CreatePicture(0, &scratch->drawable,
3166			     PictureMatchFormat(screen, 8, PICT_a8),
3167			     0, 0, serverClient, &error);
3168	if (mask) {
3169		RegionRec region;
3170		int16_t x0, y0;
3171
3172		region.extents.x1 = dst_x + dst->pDrawable->x;
3173		region.extents.y1 = dst_y + dst->pDrawable->y;
3174		region.extents.x2 = region.extents.x1 + extents.x2;
3175		region.extents.y2 = region.extents.y1 + extents.y2;
3176		region.data = NULL;
3177
3178		trapezoid_origin(&traps[0].left, &x0, &y0);
3179
3180		DBG(("%s: fbComposite()\n", __FUNCTION__));
3181		sna_composite_fb(op, src, mask, dst, &region,
3182				 src_x + dst_x - x0, src_y + dst_y - y0,
3183				 0, 0,
3184				 dst_x, dst_y,
3185				 extents.x2, extents.y2);
3186
3187		FreePicture(mask, 0);
3188	}
3189	sna_pixmap_destroy(scratch);
3190
3191	return true;
3192}
3193
3194bool
3195imprecise_trap_span_converter(struct sna *sna,
3196			      PicturePtr dst,
3197			      INT16 src_x, INT16 src_y,
3198			      int ntrap, xTrap *trap)
3199{
3200	struct sna_composite_spans_op tmp;
3201	struct tor tor;
3202	BoxRec extents;
3203	pixman_region16_t *clip;
3204	int dx, dy, n;
3205
3206	if (dst->pDrawable->depth < 8)
3207		return false;
3208
3209	if (!sna->render.check_composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
3210					       dst->pCompositeClip->extents.x2 - dst->pCompositeClip->extents.x1,
3211					       dst->pCompositeClip->extents.y2 - dst->pCompositeClip->extents.y1,
3212					       0)) {
3213		DBG(("%s: fallback -- composite spans not supported\n",
3214		     __FUNCTION__));
3215		return false;
3216	}
3217
3218	clip = dst->pCompositeClip;
3219	extents = *RegionExtents(clip);
3220	dx = dst->pDrawable->x;
3221	dy = dst->pDrawable->y;
3222
3223	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d)\n",
3224	     __FUNCTION__,
3225	     extents.x1, extents.y1,
3226	     extents.x2, extents.y2,
3227	     dx, dy));
3228
3229	memset(&tmp, 0, sizeof(tmp));
3230	if (!sna->render.composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
3231					 0, 0,
3232					 extents.x1,  extents.y1,
3233					 extents.x2 - extents.x1,
3234					 extents.y2 - extents.y1,
3235					 0,
3236					 &tmp)) {
3237		DBG(("%s: fallback -- composite spans render op not supported\n",
3238		     __FUNCTION__));
3239		return false;
3240	}
3241
3242	dx *= FAST_SAMPLES_X;
3243	dy *= FAST_SAMPLES_Y;
3244	if (!tor_init(&tor, &extents, 2*ntrap))
3245		goto skip;
3246
3247	for (n = 0; n < ntrap; n++) {
3248		xPointFixed p1, p2;
3249
3250		if (pixman_fixed_to_int(trap[n].top.y) + dst->pDrawable->y >= extents.y2 ||
3251		    pixman_fixed_to_int(trap[n].bot.y) + dst->pDrawable->y < extents.y1)
3252			continue;
3253
3254		p1.y = trap[n].top.y;
3255		p2.y = trap[n].bot.y;
3256		p1.x = trap[n].top.l;
3257		p2.x = trap[n].bot.l;
3258		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3259
3260		p1.y = trap[n].bot.y;
3261		p2.y = trap[n].top.y;
3262		p1.x = trap[n].top.r;
3263		p2.x = trap[n].bot.r;
3264		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3265	}
3266
3267	tor_render(sna, &tor, &tmp, clip,
3268		   choose_span(&tmp, dst, NULL, clip), false);
3269
3270	tor_fini(&tor);
3271skip:
3272	tmp.done(sna, &tmp);
3273	return true;
3274}
3275
3276static void mark_damaged(PixmapPtr pixmap, struct sna_pixmap *priv,
3277			 BoxPtr box, int16_t x, int16_t y)
3278{
3279	box->x1 += x; box->x2 += x;
3280	box->y1 += y; box->y2 += y;
3281	if (box->x1 <= 0 && box->y1 <= 0 &&
3282	    box->x2 >= pixmap->drawable.width &&
3283	    box->y2 >= pixmap->drawable.height) {
3284		sna_damage_destroy(&priv->cpu_damage);
3285		sna_damage_all(&priv->gpu_damage, pixmap);
3286		list_del(&priv->flush_list);
3287	} else {
3288		sna_damage_add_box(&priv->gpu_damage, box);
3289		sna_damage_subtract_box(&priv->cpu_damage, box);
3290	}
3291}
3292
3293bool
3294trap_mask_converter(struct sna *sna,
3295		    PicturePtr picture,
3296		    INT16 x, INT16 y,
3297		    int ntrap, xTrap *trap)
3298{
3299	struct tor tor;
3300	ScreenPtr screen = picture->pDrawable->pScreen;
3301	PixmapPtr scratch, pixmap;
3302	struct sna_pixmap *priv;
3303	BoxRec extents;
3304	span_func_t span;
3305	int dx, dy, n;
3306
3307	if (NO_SCAN_CONVERTER)
3308		return false;
3309
3310	pixmap = get_drawable_pixmap(picture->pDrawable);
3311	priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
3312	if (priv == NULL)
3313		return false;
3314
3315	/* XXX strict adherence to the Render specification */
3316	if (picture->polyMode == PolyModePrecise &&
3317	    picture->polyEdge != PolyEdgeSharp) {
3318		DBG(("%s: fallback -- precise rasterisation requested\n",
3319		     __FUNCTION__));
3320		return false;
3321	}
3322
3323	extents = *RegionExtents(picture->pCompositeClip);
3324	for (n = 0; n < ntrap; n++) {
3325		int v;
3326
3327		v = x + pixman_fixed_integer_floor (MIN(trap[n].top.l, trap[n].bot.l));
3328		if (v < extents.x1)
3329			extents.x1 = v;
3330
3331		v = x + pixman_fixed_integer_ceil (MAX(trap[n].top.r, trap[n].bot.r));
3332		if (v > extents.x2)
3333			extents.x2 = v;
3334
3335		v = y + pixman_fixed_integer_floor (trap[n].top.y);
3336		if (v < extents.y1)
3337			extents.y1 = v;
3338
3339		v = y + pixman_fixed_integer_ceil (trap[n].bot.y);
3340		if (v > extents.y2)
3341			extents.y2 = v;
3342	}
3343
3344	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3345	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3346
3347	scratch = sna_pixmap_create_upload(screen,
3348					   extents.x2-extents.x1,
3349					   extents.y2-extents.y1,
3350					   8, KGEM_BUFFER_WRITE_INPLACE);
3351	if (!scratch)
3352		return true;
3353
3354	dx = picture->pDrawable->x;
3355	dy = picture->pDrawable->y;
3356	dx *= FAST_SAMPLES_X;
3357	dy *= FAST_SAMPLES_Y;
3358	if (!tor_init(&tor, &extents, 2*ntrap)) {
3359		sna_pixmap_destroy(scratch);
3360		return true;
3361	}
3362
3363	for (n = 0; n < ntrap; n++) {
3364		xPointFixed p1, p2;
3365
3366		if (pixman_fixed_to_int(trap[n].top.y) + picture->pDrawable->y >= extents.y2 ||
3367		    pixman_fixed_to_int(trap[n].bot.y) + picture->pDrawable->y < extents.y1)
3368			continue;
3369
3370		p1.y = trap[n].top.y;
3371		p2.y = trap[n].bot.y;
3372		p1.x = trap[n].top.l;
3373		p2.x = trap[n].bot.l;
3374		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3375
3376		p1.y = trap[n].bot.y;
3377		p2.y = trap[n].top.y;
3378		p1.x = trap[n].top.r;
3379		p2.x = trap[n].bot.r;
3380		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3381	}
3382
3383	if (picture->polyEdge == PolyEdgeSharp)
3384		span = tor_blt_mask_mono;
3385	else
3386		span = tor_blt_mask;
3387
3388	tor_render(NULL, &tor,
3389		   scratch->devPrivate.ptr,
3390		   (void *)(intptr_t)scratch->devKind,
3391		   span, true);
3392
3393	tor_fini(&tor);
3394
3395	/* XXX clip boxes */
3396	get_drawable_deltas(picture->pDrawable, pixmap, &x, &y);
3397	sna = to_sna_from_screen(screen);
3398	sna->render.copy_boxes(sna, GXcopy,
3399			       &scratch->drawable, __sna_pixmap_get_bo(scratch), -extents.x1, -extents.x1,
3400			       &pixmap->drawable, priv->gpu_bo, x, y,
3401			       &extents, 1, 0);
3402	mark_damaged(pixmap, priv, &extents ,x, y);
3403	sna_pixmap_destroy(scratch);
3404	return true;
3405}
3406
3407bool
3408triangles_span_converter(struct sna *sna,
3409			 CARD8 op, PicturePtr src, PicturePtr dst,
3410			 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3411			 int count, xTriangle *tri)
3412{
3413	struct sna_composite_spans_op tmp;
3414	struct tor tor;
3415	BoxRec extents;
3416	pixman_region16_t clip;
3417	int16_t dst_x, dst_y;
3418	int dx, dy, n;
3419	bool was_clear;
3420
3421	if (NO_SCAN_CONVERTER)
3422		return false;
3423
3424	if (is_mono(dst, maskFormat))
3425		return mono_triangles_span_converter(sna, op, src, dst,
3426						     src_x, src_y,
3427						     count, tri);
3428
3429	/* XXX strict adherence to the Render specification */
3430	if (dst->polyMode == PolyModePrecise) {
3431		DBG(("%s: fallback -- precise rasterisation requested\n",
3432		     __FUNCTION__));
3433		return false;
3434	}
3435
3436	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
3437		DBG(("%s: fallback -- composite spans not supported\n",
3438		     __FUNCTION__));
3439		return false;
3440	}
3441
3442	dst_x = pixman_fixed_to_int(tri[0].p1.x);
3443	dst_y = pixman_fixed_to_int(tri[0].p1.y);
3444
3445	miTriangleBounds(count, tri, &extents);
3446	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3447	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3448
3449	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3450		return true;
3451
3452#if 0
3453	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
3454		DBG(("%s: fallback -- traps extents too small %dx%d\n",
3455		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
3456		return false;
3457	}
3458#endif
3459
3460	if (!sna_compute_composite_region(&clip,
3461					  src, NULL, dst,
3462					  src_x + extents.x1 - dst_x,
3463					  src_y + extents.y1 - dst_y,
3464					  0, 0,
3465					  extents.x1, extents.y1,
3466					  extents.x2 - extents.x1,
3467					  extents.y2 - extents.y1)) {
3468		DBG(("%s: triangles do not intersect drawable clips\n",
3469		     __FUNCTION__)) ;
3470		return true;
3471	}
3472
3473	if (!sna->render.check_composite_spans(sna, op, src, dst,
3474					       clip.extents.x2 - clip.extents.x1,
3475					       clip.extents.y2 - clip.extents.y1,
3476					       0)) {
3477		DBG(("%s: fallback -- composite spans not supported\n",
3478		     __FUNCTION__));
3479		return false;
3480	}
3481
3482	extents = *RegionExtents(&clip);
3483	dx = dst->pDrawable->x;
3484	dy = dst->pDrawable->y;
3485
3486	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
3487	     __FUNCTION__,
3488	     extents.x1, extents.y1,
3489	     extents.x2, extents.y2,
3490	     dx, dy,
3491	     src_x + extents.x1 - dst_x - dx,
3492	     src_y + extents.y1 - dst_y - dy));
3493
3494	was_clear = sna_drawable_is_clear(dst->pDrawable);
3495
3496	memset(&tmp, 0, sizeof(tmp));
3497	if (!sna->render.composite_spans(sna, op, src, dst,
3498					 src_x + extents.x1 - dst_x - dx,
3499					 src_y + extents.y1 - dst_y - dy,
3500					 extents.x1,  extents.y1,
3501					 extents.x2 - extents.x1,
3502					 extents.y2 - extents.y1,
3503					 0,
3504					 &tmp)) {
3505		DBG(("%s: fallback -- composite spans render op not supported\n",
3506		     __FUNCTION__));
3507		return false;
3508	}
3509
3510	dx *= FAST_SAMPLES_X;
3511	dy *= FAST_SAMPLES_Y;
3512	if (!tor_init(&tor, &extents, 3*count))
3513		goto skip;
3514
3515	for (n = 0; n < count; n++) {
3516		polygon_add_line(tor.polygon, &tri[n].p1, &tri[n].p2, dx, dy);
3517		polygon_add_line(tor.polygon, &tri[n].p2, &tri[n].p3, dx, dy);
3518		polygon_add_line(tor.polygon, &tri[n].p3, &tri[n].p1, dx, dy);
3519	}
3520
3521	tor_render(sna, &tor, &tmp, &clip,
3522		   choose_span(&tmp, dst, maskFormat, &clip),
3523		   !was_clear && maskFormat && !operator_is_bounded(op));
3524
3525	tor_fini(&tor);
3526skip:
3527	tmp.done(sna, &tmp);
3528
3529	REGION_UNINIT(NULL, &clip);
3530	return true;
3531}
3532
3533bool
3534triangles_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
3535			 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3536			 int count, xTriangle *tri)
3537{
3538	struct tor tor;
3539	void (*span)(struct sna *sna,
3540		     struct sna_composite_spans_op *op,
3541		     pixman_region16_t *clip,
3542		     const BoxRec *box,
3543		     int coverage);
3544	ScreenPtr screen = dst->pDrawable->pScreen;
3545	PixmapPtr scratch;
3546	PicturePtr mask;
3547	BoxRec extents;
3548	int16_t dst_x, dst_y;
3549	int dx, dy;
3550	int error, n;
3551
3552	if (NO_SCAN_CONVERTER)
3553		return false;
3554
3555	if (is_precise(dst, maskFormat)) {
3556		DBG(("%s: fallback -- precise rasterisation requested\n",
3557		     __FUNCTION__));
3558		return false;
3559	}
3560
3561	if (maskFormat == NULL && count > 1) {
3562		DBG(("%s: fallback -- individual rasterisation requested\n",
3563		     __FUNCTION__));
3564		return false;
3565	}
3566
3567	miTriangleBounds(count, tri, &extents);
3568	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3569	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3570
3571	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3572		return true;
3573
3574	if (!sna_compute_composite_extents(&extents,
3575					   src, NULL, dst,
3576					   src_x, src_y,
3577					   0, 0,
3578					   extents.x1, extents.y1,
3579					   extents.x2 - extents.x1,
3580					   extents.y2 - extents.y1))
3581		return true;
3582
3583	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3584	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3585
3586	extents.y2 -= extents.y1;
3587	extents.x2 -= extents.x1;
3588	extents.x1 -= dst->pDrawable->x;
3589	extents.y1 -= dst->pDrawable->y;
3590	dst_x = extents.x1;
3591	dst_y = extents.y1;
3592	dx = -extents.x1 * FAST_SAMPLES_X;
3593	dy = -extents.y1 * FAST_SAMPLES_Y;
3594	extents.x1 = extents.y1 = 0;
3595
3596	DBG(("%s: mask (%dx%d)\n",
3597	     __FUNCTION__, extents.x2, extents.y2));
3598	scratch = sna_pixmap_create_upload(screen,
3599					   extents.x2, extents.y2, 8,
3600					   KGEM_BUFFER_WRITE_INPLACE);
3601	if (!scratch)
3602		return true;
3603
3604	DBG(("%s: created buffer %p, stride %d\n",
3605	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
3606
3607	if (!tor_init(&tor, &extents, 3*count)) {
3608		sna_pixmap_destroy(scratch);
3609		return true;
3610	}
3611
3612	for (n = 0; n < count; n++) {
3613		polygon_add_line(tor.polygon, &tri[n].p1, &tri[n].p2, dx, dy);
3614		polygon_add_line(tor.polygon, &tri[n].p2, &tri[n].p3, dx, dy);
3615		polygon_add_line(tor.polygon, &tri[n].p3, &tri[n].p1, dx, dy);
3616	}
3617
3618	if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
3619		span = tor_blt_mask_mono;
3620	else
3621		span = tor_blt_mask;
3622
3623	tor_render(NULL, &tor,
3624		   scratch->devPrivate.ptr,
3625		   (void *)(intptr_t)scratch->devKind,
3626		   span, true);
3627
3628	mask = CreatePicture(0, &scratch->drawable,
3629			     PictureMatchFormat(screen, 8, PICT_a8),
3630			     0, 0, serverClient, &error);
3631	if (mask) {
3632		CompositePicture(op, src, mask, dst,
3633				 src_x + dst_x - pixman_fixed_to_int(tri[0].p1.x),
3634				 src_y + dst_y - pixman_fixed_to_int(tri[0].p1.y),
3635				 0, 0,
3636				 dst_x, dst_y,
3637				 extents.x2, extents.y2);
3638		FreePicture(mask, 0);
3639	}
3640	tor_fini(&tor);
3641	sna_pixmap_destroy(scratch);
3642
3643	return true;
3644}
3645
3646struct tristrip_thread {
3647	struct sna *sna;
3648	const struct sna_composite_spans_op *op;
3649	const xPointFixed *points;
3650	RegionPtr clip;
3651	span_func_t span;
3652	BoxRec extents;
3653	int dx, dy, draw_y;
3654	int count;
3655	bool unbounded;
3656};
3657
3658static void
3659tristrip_thread(void *arg)
3660{
3661	struct tristrip_thread *thread = arg;
3662	struct span_thread_boxes boxes;
3663	struct tor tor;
3664	int n, cw, ccw;
3665
3666	if (!tor_init(&tor, &thread->extents, 2*thread->count))
3667		return;
3668
3669	span_thread_boxes_init(&boxes, thread->op, thread->clip);
3670
3671	cw = 0; ccw = 1;
3672	polygon_add_line(tor.polygon,
3673			 &thread->points[ccw], &thread->points[cw],
3674			 thread->dx, thread->dy);
3675	n = 2;
3676	do {
3677		polygon_add_line(tor.polygon,
3678				 &thread->points[cw], &thread->points[n],
3679				 thread->dx, thread->dy);
3680		cw = n;
3681		if (++n == thread->count)
3682			break;
3683
3684		polygon_add_line(tor.polygon,
3685				 &thread->points[n], &thread->points[ccw],
3686				 thread->dx, thread->dy);
3687		ccw = n;
3688		if (++n == thread->count)
3689			break;
3690	} while (1);
3691	polygon_add_line(tor.polygon,
3692			 &thread->points[cw], &thread->points[ccw],
3693			 thread->dx, thread->dy);
3694	assert(tor.polygon->num_edges <= 2*thread->count);
3695
3696	tor_render(thread->sna, &tor,
3697		   (struct sna_composite_spans_op *)&boxes, thread->clip,
3698		   thread->span, thread->unbounded);
3699
3700	tor_fini(&tor);
3701
3702	if (boxes.num_boxes) {
3703		DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
3704		assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
3705		thread->op->thread_boxes(thread->sna, thread->op,
3706					 boxes.boxes, boxes.num_boxes);
3707	}
3708}
3709
3710bool
3711imprecise_tristrip_span_converter(struct sna *sna,
3712				  CARD8 op, PicturePtr src, PicturePtr dst,
3713				  PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3714				  int count, xPointFixed *points)
3715{
3716	struct sna_composite_spans_op tmp;
3717	BoxRec extents;
3718	pixman_region16_t clip;
3719	int16_t dst_x, dst_y;
3720	int dx, dy, num_threads;
3721	bool was_clear;
3722
3723	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
3724		DBG(("%s: fallback -- composite spans not supported\n",
3725		     __FUNCTION__));
3726		return false;
3727	}
3728
3729	dst_x = pixman_fixed_to_int(points[0].x);
3730	dst_y = pixman_fixed_to_int(points[0].y);
3731
3732	miPointFixedBounds(count, points, &extents);
3733	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3734	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3735
3736	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3737		return true;
3738
3739#if 0
3740	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
3741		DBG(("%s: fallback -- traps extents too small %dx%d\n",
3742		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
3743		return false;
3744	}
3745#endif
3746
3747	if (!sna_compute_composite_region(&clip,
3748					  src, NULL, dst,
3749					  src_x + extents.x1 - dst_x,
3750					  src_y + extents.y1 - dst_y,
3751					  0, 0,
3752					  extents.x1, extents.y1,
3753					  extents.x2 - extents.x1,
3754					  extents.y2 - extents.y1)) {
3755		DBG(("%s: triangles do not intersect drawable clips\n",
3756		     __FUNCTION__)) ;
3757		return true;
3758	}
3759
3760	if (!sna->render.check_composite_spans(sna, op, src, dst,
3761					       clip.extents.x2 - clip.extents.x1,
3762					       clip.extents.y2 - clip.extents.y1,
3763					       0)) {
3764		DBG(("%s: fallback -- composite spans not supported\n",
3765		     __FUNCTION__));
3766		return false;
3767	}
3768
3769	extents = *RegionExtents(&clip);
3770	dx = dst->pDrawable->x;
3771	dy = dst->pDrawable->y;
3772
3773	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
3774	     __FUNCTION__,
3775	     extents.x1, extents.y1,
3776	     extents.x2, extents.y2,
3777	     dx, dy,
3778	     src_x + extents.x1 - dst_x - dx,
3779	     src_y + extents.y1 - dst_y - dy));
3780
3781	was_clear = sna_drawable_is_clear(dst->pDrawable);
3782
3783	memset(&tmp, 0, sizeof(tmp));
3784	if (!sna->render.composite_spans(sna, op, src, dst,
3785					 src_x + extents.x1 - dst_x - dx,
3786					 src_y + extents.y1 - dst_y - dy,
3787					 extents.x1,  extents.y1,
3788					 extents.x2 - extents.x1,
3789					 extents.y2 - extents.y1,
3790					 0,
3791					 &tmp)) {
3792		DBG(("%s: fallback -- composite spans render op not supported\n",
3793		     __FUNCTION__));
3794		return false;
3795	}
3796
3797	dx *= FAST_SAMPLES_X;
3798	dy *= FAST_SAMPLES_Y;
3799
3800	num_threads = 1;
3801	if (!NO_GPU_THREADS &&
3802	    tmp.thread_boxes &&
3803	    thread_choose_span(&tmp, dst, maskFormat, &clip))
3804		num_threads = sna_use_threads(extents.x2 - extents.x1,
3805					      extents.y2 - extents.y1,
3806					      16);
3807	if (num_threads == 1) {
3808		struct tor tor;
3809		int cw, ccw, n;
3810
3811		if (!tor_init(&tor, &extents, 2*count))
3812			goto skip;
3813
3814		cw = 0; ccw = 1;
3815		polygon_add_line(tor.polygon,
3816				 &points[ccw], &points[cw],
3817				 dx, dy);
3818		n = 2;
3819		do {
3820			polygon_add_line(tor.polygon,
3821					 &points[cw], &points[n],
3822					 dx, dy);
3823			cw = n;
3824			if (++n == count)
3825				break;
3826
3827			polygon_add_line(tor.polygon,
3828					 &points[n], &points[ccw],
3829					 dx, dy);
3830			ccw = n;
3831			if (++n == count)
3832				break;
3833		} while (1);
3834		polygon_add_line(tor.polygon,
3835				 &points[cw], &points[ccw],
3836				 dx, dy);
3837		assert(tor.polygon->num_edges <= 2*count);
3838
3839		tor_render(sna, &tor, &tmp, &clip,
3840			   choose_span(&tmp, dst, maskFormat, &clip),
3841			   !was_clear && maskFormat && !operator_is_bounded(op));
3842
3843		tor_fini(&tor);
3844	} else {
3845		struct tristrip_thread threads[num_threads];
3846		int y, h, n;
3847
3848		DBG(("%s: using %d threads for tristrip compositing %dx%d\n",
3849		     __FUNCTION__, num_threads,
3850		     clip.extents.x2 - clip.extents.x1,
3851		     clip.extents.y2 - clip.extents.y1));
3852
3853		threads[0].sna = sna;
3854		threads[0].op = &tmp;
3855		threads[0].points = points;
3856		threads[0].count = count;
3857		threads[0].extents = clip.extents;
3858		threads[0].clip = &clip;
3859		threads[0].dx = dx;
3860		threads[0].dy = dy;
3861		threads[0].draw_y = dst->pDrawable->y;
3862		threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
3863		threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
3864
3865		y = clip.extents.y1;
3866		h = clip.extents.y2 - clip.extents.y1;
3867		h = (h + num_threads - 1) / num_threads;
3868		num_threads -= (num_threads-1) * h >= clip.extents.y2 - clip.extents.y1;
3869
3870		for (n = 1; n < num_threads; n++) {
3871			threads[n] = threads[0];
3872			threads[n].extents.y1 = y;
3873			threads[n].extents.y2 = y += h;
3874
3875			sna_threads_run(n, tristrip_thread, &threads[n]);
3876		}
3877
3878		assert(y < threads[0].extents.y2);
3879		threads[0].extents.y1 = y;
3880		tristrip_thread(&threads[0]);
3881
3882		sna_threads_wait();
3883	}
3884skip:
3885	tmp.done(sna, &tmp);
3886
3887	REGION_UNINIT(NULL, &clip);
3888	return true;
3889}
3890