1/*
2 * Copyright (c) 2007  David Turner
3 * Copyright (c) 2008  M Joonas Pihlaja
4 * Copyright (c) 2011 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Chris Wilson <chris@chris-wilson.co.uk>
27 *
28 */
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include "sna.h"
35#include "sna_render.h"
36#include "sna_render_inline.h"
37#include "sna_trapezoids.h"
38#include "fb/fbpict.h"
39
40#include <mipict.h>
41
42#undef FAST_SAMPLES_X
43#undef FAST_SAMPLES_Y
44
45/* TODO: Emit unantialiased and MSAA triangles. */
46
47#ifndef MAX
48#define MAX(x,y) ((x) >= (y) ? (x) : (y))
49#endif
50
51#ifndef MIN
52#define MIN(x,y) ((x) <= (y) ? (x) : (y))
53#endif
54
55#define _GRID_TO_INT_FRAC(t, i, f, m) do {      \
56	(i) = (t) / (m);                   \
57	(f) = (t) % (m);                   \
58	if ((f) < 0) {                     \
59		--(i);                     \
60		(f) += (m);                \
61	}                                  \
62} while (0)
63
64#define GRID_AREA (2*SAMPLES_X*SAMPLES_Y)
65
66static inline int pixman_fixed_to_grid_x(pixman_fixed_t v)
67{
68	return ((int64_t)v * SAMPLES_X + (1<<15)) >> 16;
69}
70
71static inline int pixman_fixed_to_grid_y(pixman_fixed_t v)
72{
73	return ((int64_t)v * SAMPLES_Y + (1<<15)) >> 16;
74}
75
76typedef void (*span_func_t)(struct sna *sna,
77			    struct sna_composite_spans_op *op,
78			    pixman_region16_t *clip,
79			    const BoxRec *box,
80			    int coverage);
81
82#if HAS_DEBUG_FULL
83static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
84{
85	if (box->x1 < 0 || box->y1 < 0 ||
86	    box->x2 > pixmap->drawable.width ||
87	    box->y2 > pixmap->drawable.height)
88	{
89		FatalError("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n",
90			   function,
91			   box->x1, box->y1, box->x2, box->y2,
92			   pixmap->drawable.width,
93			   pixmap->drawable.height);
94	}
95}
96#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
97#else
98#define assert_pixmap_contains_box(p, b)
99#endif
100
101static void apply_damage(struct sna_composite_op *op, RegionPtr region)
102{
103	DBG(("%s: damage=%p, region=%dx[(%d, %d), (%d, %d)]\n",
104	     __FUNCTION__, op->damage,
105	     region_num_rects(region),
106	     region->extents.x1, region->extents.y1,
107	     region->extents.x2, region->extents.y2));
108
109	if (op->damage == NULL)
110		return;
111
112	RegionTranslate(region, op->dst.x, op->dst.y);
113
114	assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region));
115	sna_damage_add(op->damage, region);
116}
117
118static void _apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
119{
120	BoxRec r;
121
122	r.x1 = box->x1 + op->dst.x;
123	r.x2 = box->x2 + op->dst.x;
124	r.y1 = box->y1 + op->dst.y;
125	r.y2 = box->y2 + op->dst.y;
126
127	assert_pixmap_contains_box(op->dst.pixmap, &r);
128	sna_damage_add_box(op->damage, &r);
129}
130
131inline static void apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
132{
133	if (op->damage)
134		_apply_damage_box(op, box);
135}
136
137#define SAMPLES_X_TO_INT_FRAC(x, i, f) \
138	_GRID_TO_INT_FRAC(x, i, f, SAMPLES_X)
139
140#define AREA_TO_FLOAT(c)  ((c) / (float)GRID_AREA)
141#define TO_ALPHA(c) (((c)+1) >> 1)
142
143struct quorem {
144	int64_t quo;
145	int64_t rem;
146};
147
148struct edge {
149	struct edge *next, *prev;
150
151	int dir;
152
153	int height_left;
154
155	int cell;
156	struct quorem x;
157
158	/* Advance of the current x when moving down a subsample line. */
159	struct quorem dxdy;
160	int64_t dy;
161
162	/* The clipped y of the top of the edge. */
163	int ytop;
164
165	/* y2-y1 after orienting the edge downwards.  */
166};
167
168/* Number of subsample rows per y-bucket. Must be SAMPLES_Y. */
169#define EDGE_Y_BUCKET_HEIGHT SAMPLES_Y
170#define EDGE_Y_BUCKET_INDEX(y, ymin) (((y) - (ymin))/EDGE_Y_BUCKET_HEIGHT)
171
172/* A collection of sorted and vertically clipped edges of the polygon.
173 * Edges are moved from the polygon to an active list while scan
174 * converting. */
175struct polygon {
176	/* The vertical clip extents. */
177	int ymin, ymax;
178
179	/* Array of edges all starting in the same bucket.	An edge is put
180	 * into bucket EDGE_BUCKET_INDEX(edge->ytop, polygon->ymin) when
181	 * it is added to the polygon. */
182	struct edge **y_buckets;
183	struct edge *y_buckets_embedded[64];
184
185	struct edge edges_embedded[32];
186	struct edge *edges;
187	int num_edges;
188};
189
190/* A cell records the effect on pixel coverage of polygon edges
191 * passing through a pixel.  It contains two accumulators of pixel
192 * coverage.
193 *
194 * Consider the effects of a polygon edge on the coverage of a pixel
195 * it intersects and that of the following one.  The coverage of the
196 * following pixel is the height of the edge multiplied by the width
197 * of the pixel, and the coverage of the pixel itself is the area of
198 * the trapezoid formed by the edge and the right side of the pixel.
199 *
200 * +-----------------------+-----------------------+
201 * |                       |                       |
202 * |                       |                       |
203 * |_______________________|_______________________|
204 * |   \...................|.......................|\
205 * |    \..................|.......................| |
206 * |     \.................|.......................| |
207 * |      \....covered.....|.......................| |
208 * |       \....area.......|.......................| } covered height
209 * |        \..............|.......................| |
210 * |uncovered\.............|.......................| |
211 * |  area    \............|.......................| |
212 * |___________\...........|.......................|/
213 * |                       |                       |
214 * |                       |                       |
215 * |                       |                       |
216 * +-----------------------+-----------------------+
217 *
218 * Since the coverage of the following pixel will always be a multiple
219 * of the width of the pixel, we can store the height of the covered
220 * area instead.  The coverage of the pixel itself is the total
221 * coverage minus the area of the uncovered area to the left of the
222 * edge.  As it's faster to compute the uncovered area we only store
223 * that and subtract it from the total coverage later when forming
224 * spans to blit.
225 *
226 * The heights and areas are signed, with left edges of the polygon
227 * having positive sign and right edges having negative sign.  When
228 * two edges intersect they swap their left/rightness so their
229 * contribution above and below the intersection point must be
230 * computed separately. */
231struct cell {
232	struct cell *next;
233	int x;
234	int16_t uncovered_area;
235	int16_t covered_height;
236};
237
238/* A cell list represents the scan line sparsely as cells ordered by
239 * ascending x.  It is geared towards scanning the cells in order
240 * using an internal cursor. */
241struct cell_list {
242	struct cell *cursor;
243
244	/* Points to the left-most cell in the scan line. */
245	struct cell head, tail;
246
247	int16_t x1, x2;
248	int16_t count, size;
249	struct cell *cells;
250	struct cell embedded[256];
251};
252
253/* The active list contains edges in the current scan line ordered by
254 * the x-coordinate of the intercept of the edge and the scan line. */
255struct active_list {
256	/* Leftmost edge on the current scan line. */
257	struct edge head, tail;
258};
259
260struct tor {
261    struct polygon	polygon[1];
262    struct active_list	active[1];
263    struct cell_list	coverages[1];
264
265    BoxRec extents;
266};
267
268/* Rewinds the cell list's cursor to the beginning.  After rewinding
269 * we're good to cell_list_find() the cell any x coordinate. */
270inline static void
271cell_list_rewind(struct cell_list *cells)
272{
273	cells->cursor = &cells->head;
274}
275
276static bool
277cell_list_init(struct cell_list *cells, int x1, int x2)
278{
279	cells->tail.next = NULL;
280	cells->tail.x = INT_MAX;
281	cells->head.x = INT_MIN;
282	cells->head.next = &cells->tail;
283	cells->head.covered_height = 0;
284	cell_list_rewind(cells);
285	cells->count = 0;
286	cells->x1 = x1;
287	cells->x2 = x2;
288	cells->size = x2 - x1 + 1;
289	cells->cells = cells->embedded;
290	if (cells->size > ARRAY_SIZE(cells->embedded))
291		cells->cells = malloc(cells->size * sizeof(struct cell));
292	return cells->cells != NULL;
293}
294
295static void
296cell_list_fini(struct cell_list *cells)
297{
298	if (cells->cells != cells->embedded)
299		free(cells->cells);
300}
301
302inline static void
303cell_list_reset(struct cell_list *cells)
304{
305	cell_list_rewind(cells);
306	cells->head.next = &cells->tail;
307	cells->head.covered_height = 0;
308	cells->count = 0;
309}
310
311inline static struct cell *
312cell_list_alloc(struct cell_list *cells,
313		struct cell *tail,
314		int x)
315{
316	struct cell *cell;
317
318	assert(cells->count < cells->size);
319	cell = cells->cells + cells->count++;
320	cell->next = tail->next;
321	tail->next = cell;
322
323	cell->x = x;
324	cell->covered_height = 0;
325	cell->uncovered_area = 0;
326	return cell;
327}
328
329/* Find a cell at the given x-coordinate.  Returns %NULL if a new cell
330 * needed to be allocated but couldn't be.  Cells must be found with
331 * non-decreasing x-coordinate until the cell list is rewound using
332 * cell_list_rewind(). Ownership of the returned cell is retained by
333 * the cell list. */
334inline static struct cell *
335cell_list_find(struct cell_list *cells, int x)
336{
337	struct cell *tail;
338
339	if (x >= cells->x2)
340		return &cells->tail;
341
342	if (x < cells->x1)
343		return &cells->head;
344
345	tail = cells->cursor;
346	if (tail->x == x)
347		return tail;
348
349	do {
350		if (tail->next->x > x)
351			break;
352
353		tail = tail->next;
354		if (tail->next->x > x)
355			break;
356
357		tail = tail->next;
358		if (tail->next->x > x)
359			break;
360
361		tail = tail->next;
362	} while (1);
363
364	if (tail->x != x)
365		tail = cell_list_alloc(cells, tail, x);
366
367	return cells->cursor = tail;
368}
369
370/* Add a subpixel span covering [x1, x2) to the coverage cells. */
371inline static void
372cell_list_add_subspan(struct cell_list *cells, int x1, int x2)
373{
374	struct cell *cell;
375	int ix1, fx1;
376	int ix2, fx2;
377
378	if (x1 == x2)
379		return;
380
381	SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
382	SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
383
384	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
385	       x1, ix1, fx1, x2, ix2, fx2));
386
387	cell = cell_list_find(cells, ix1);
388	if (ix1 != ix2) {
389		cell->uncovered_area += 2*fx1;
390		++cell->covered_height;
391
392		cell = cell_list_find(cells, ix2);
393		cell->uncovered_area -= 2*fx2;
394		--cell->covered_height;
395	} else
396		cell->uncovered_area += 2*(fx1-fx2);
397}
398
399inline static void
400cell_list_add_span(struct cell_list *cells, int x1, int x2)
401{
402	struct cell *cell;
403	int ix1, fx1;
404	int ix2, fx2;
405
406	SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
407	SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
408
409	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
410	       x1, ix1, fx1, x2, ix2, fx2));
411
412	cell = cell_list_find(cells, ix1);
413	if (ix1 != ix2) {
414		cell->uncovered_area += 2*fx1*SAMPLES_Y;
415		cell->covered_height += SAMPLES_Y;
416
417		cell = cell_list_find(cells, ix2);
418		cell->uncovered_area -= 2*fx2*SAMPLES_Y;
419		cell->covered_height -= SAMPLES_Y;
420	} else
421		cell->uncovered_area += 2*(fx1-fx2)*SAMPLES_Y;
422}
423
424static void
425polygon_fini(struct polygon *polygon)
426{
427	if (polygon->y_buckets != polygon->y_buckets_embedded)
428		free(polygon->y_buckets);
429
430	if (polygon->edges != polygon->edges_embedded)
431		free(polygon->edges);
432}
433
434static bool
435polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
436{
437	unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
438
439	if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
440		return false;
441
442	polygon->edges = polygon->edges_embedded;
443	polygon->y_buckets = polygon->y_buckets_embedded;
444
445	polygon->num_edges = 0;
446	if (num_edges > (int)ARRAY_SIZE(polygon->edges_embedded)) {
447		polygon->edges = malloc(sizeof(struct edge)*num_edges);
448		if (unlikely(NULL == polygon->edges))
449			goto bail_no_mem;
450	}
451
452	if (num_buckets >= ARRAY_SIZE(polygon->y_buckets_embedded)) {
453		polygon->y_buckets = malloc((1+num_buckets)*sizeof(struct edge *));
454		if (unlikely(NULL == polygon->y_buckets))
455			goto bail_no_mem;
456	}
457	memset(polygon->y_buckets, 0, num_buckets * sizeof(struct edge *));
458	polygon->y_buckets[num_buckets] = (void *)-1;
459
460	polygon->ymin = ymin;
461	polygon->ymax = ymax;
462	return true;
463
464bail_no_mem:
465	polygon_fini(polygon);
466	return false;
467}
468
469static void
470_polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
471{
472	unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
473	struct edge **ptail = &polygon->y_buckets[ix];
474	assert(e->ytop < polygon->ymax);
475	e->next = *ptail;
476	*ptail = e;
477}
478
479static inline int edge_to_cell(struct edge *e)
480{
481	int x = e->x.quo;
482	if (e->x.rem > e->dy/2)
483		x++;
484	__DBG(("%s: %lld.%lld -> %d\n",
485	       __FUNCTION__, e->x.quo, e->x.rem, x));
486	return x;
487}
488
489static inline int edge_advance(struct edge *e)
490{
491	__DBG(("%s: %lld.%lld + %lld.%lld\n",
492	       __FUNCTION__, e->x.quo, e->x.rem, e->dxdy.quo, e->dxdy.rem));
493
494	e->x.quo += e->dxdy.quo;
495	e->x.rem += e->dxdy.rem;
496	if (e->x.rem < 0) {
497		e->x.quo--;
498		e->x.rem += e->dy;
499	} else if (e->x.rem >= e->dy) {
500		e->x.quo++;
501		e->x.rem -= e->dy;
502	}
503	assert(e->x.rem >= 0 && e->x.rem < e->dy);
504	return edge_to_cell(e);
505}
506
507inline static void
508polygon_add_edge(struct polygon *polygon,
509		 const xTrapezoid *t,
510		 const xLineFixed *edge,
511		 int dir, int dx, int dy)
512{
513	struct edge *e = &polygon->edges[polygon->num_edges];
514	const int ymin = polygon->ymin;
515	const int ymax = polygon->ymax;
516	int ytop, ybot;
517
518	assert(t->bottom > t->top);
519	assert(edge->p2.y > edge->p1.y);
520
521	ytop = pixman_fixed_to_grid_y(t->top) + dy;
522	if (ytop < ymin)
523		ytop = ymin;
524
525	ybot = pixman_fixed_to_grid_y(t->bottom) + dy;
526	if (ybot > ymax)
527		ybot = ymax;
528
529	__DBG(("%s: dx=(%d, %d), y=[%d, %d] +%d, -%d\n",
530	       __FUNCTION__, dx, dy, ytop, ybot,
531	       ((int64_t)(ytop - dy)<<16) / SAMPLES_Y - edge->p1.y,
532	       ((int64_t)(ybot - dy)<<16) / SAMPLES_Y - edge->p2.y));
533
534	e->ytop = ytop;
535	e->height_left = ybot - ytop;
536	if (e->height_left <= 0)
537		return;
538
539	if (pixman_fixed_to_grid_x(edge->p1.x) ==
540	    pixman_fixed_to_grid_x(edge->p2.x)) {
541		e->cell = pixman_fixed_to_grid_x(edge->p1.x) + dx;
542		e->x.quo = e->x.rem = 0;
543		e->dxdy.quo = e->dxdy.rem = 0;
544		e->dy = 0;
545	} else {
546		int64_t Ey, Ex, tmp;
547
548		__DBG(("%s: add diagonal edge (%d, %d) -> (%d, %d) [(%d, %d)]\n",
549
550		       __FUNCTION__,
551		       edge->p1.x, edge->p1.y,
552		       edge->p2.x, edge->p2.y,
553		       edge->p2.x - edge->p1.x,
554		       edge->p2.y - edge->p1.y));
555
556		Ex = ((int64_t)edge->p2.x - edge->p1.x) * SAMPLES_X;
557		Ey = ((int64_t)edge->p2.y - edge->p1.y) * SAMPLES_Y * (2 << 16);
558		assert(Ey > 0);
559		e->dxdy.quo = Ex * (2 << 16) / Ey;
560		e->dxdy.rem = Ex * (2 << 16) % Ey;
561
562		tmp = (int64_t)(2*(ytop - dy) + 1) << 16;
563		tmp -= (int64_t)edge->p1.y * SAMPLES_Y*2;
564		tmp *= Ex;
565		e->x.quo = tmp / Ey;
566		e->x.rem = tmp % Ey;
567
568		tmp = (int64_t)edge->p1.x * SAMPLES_X;
569		e->x.quo += (tmp >> 16) + dx;
570		tmp &= (1 << 16) - 1;
571		if (tmp) {
572			if (Ey < INT64_MAX >> 16)
573				tmp = (tmp * Ey) / (1 << 16);
574			else /* Handle overflow by losing precision */
575				tmp = tmp * (Ey / (1 << 16));
576			e->x.rem += tmp;
577		}
578
579		if (e->x.rem < 0) {
580			e->x.quo--;
581			e->x.rem += Ey;
582		} else if (e->x.rem >= Ey) {
583			e->x.quo++;
584			e->x.rem -= Ey;
585		}
586		assert(e->x.rem >= 0 && e->x.rem < Ey);
587
588		e->dy = Ey;
589		e->cell = edge_to_cell(e);
590
591		__DBG(("%s: x=%lld.%lld + %lld.%lld %lld -> cell=%d\n",
592		       __FUNCTION__,
593		       (long long)e->x.quo,
594		       (long long)e->x.rem,
595		       (long long)e->dxdy.quo,
596		       (long long)e->dxdy.rem,
597		       (long long)Ey, e->cell));
598	}
599
600	e->dir = dir;
601
602	_polygon_insert_edge_into_its_y_bucket(polygon, e);
603	polygon->num_edges++;
604}
605
606inline static void
607polygon_add_line(struct polygon *polygon,
608		 const xPointFixed *p1,
609		 const xPointFixed *p2,
610		 int dx, int dy)
611{
612	struct edge *e = &polygon->edges[polygon->num_edges];
613	int ytop, ybot;
614
615	if (p1->y == p2->y)
616		return;
617
618	__DBG(("%s: line=(%d, %d), (%d, %d)\n",
619	       __FUNCTION__, (int)p1->x, (int)p1->y, (int)p2->x, (int)p2->y));
620
621	e->dir = 1;
622	if (p2->y < p1->y) {
623		const xPointFixed *t;
624
625		e->dir = -1;
626
627		t = p1;
628		p1 = p2;
629		p2 = t;
630	}
631
632	ytop = pixman_fixed_to_grid_y(p1->y) + dy;
633	if (ytop < polygon->ymin)
634		ytop = polygon->ymin;
635
636	ybot = pixman_fixed_to_grid_y(p2->y) + dy;
637	if (ybot > polygon->ymax)
638		ybot = polygon->ymax;
639
640	if (ybot <= ytop)
641		return;
642
643	e->ytop = ytop;
644	e->height_left = ybot - ytop;
645	if (e->height_left <= 0)
646		return;
647
648	__DBG(("%s: edge height=%d\n", __FUNCTION__, e->dir * e->height_left));
649
650	if (pixman_fixed_to_grid_x(p1->x) == pixman_fixed_to_grid_x(p2->x)) {
651		e->cell = pixman_fixed_to_grid_x(p1->x);
652		e->x.quo = e->x.rem = 0;
653		e->dxdy.quo = e->dxdy.rem = 0;
654		e->dy = 0;
655	} else {
656		int64_t Ey, Ex, tmp;
657
658		__DBG(("%s: add diagonal line (%d, %d) -> (%d, %d) [(%d, %d)]\n",
659
660		       __FUNCTION__,
661		       p1->x, p1->y,
662		       p2->x, p2->y,
663		       p2->x - p1->x,
664		       p2->y - p1->y));
665
666		Ex = ((int64_t)p2->x - p1->x) * SAMPLES_X;
667		Ey = ((int64_t)p2->y - p1->y) * SAMPLES_Y * (2 << 16);
668		e->dxdy.quo = Ex * (2 << 16) / Ey;
669		e->dxdy.rem = Ex * (2 << 16) % Ey;
670
671		tmp = (int64_t)(2*(ytop - dy) + 1) << 16;
672		tmp -= (int64_t)p1->y * SAMPLES_Y*2;
673		tmp *= Ex;
674		e->x.quo = tmp / Ey;
675		e->x.rem = tmp % Ey;
676
677		tmp = (int64_t)p1->x * SAMPLES_X;
678		e->x.quo += (tmp >> 16) + dx;
679		e->x.rem += ((tmp & ((1 << 16) - 1)) * Ey) / (1 << 16);
680
681		if (e->x.rem < 0) {
682			e->x.quo--;
683			e->x.rem += Ey;
684		} else if (e->x.rem >= Ey) {
685			e->x.quo++;
686			e->x.rem -= Ey;
687		}
688		assert(e->x.rem >= 0 && e->x.rem < Ey);
689
690		e->dy = Ey;
691		e->cell = edge_to_cell(e);
692
693		__DBG(("%s: x=%lld.%lld + %lld.%lld %lld -> cell=%d\n",
694		       __FUNCTION__,
695		       (long long)e->x.quo,
696		       (long long)e->x.rem,
697		       (long long)e->dxdy.quo,
698		       (long long)e->dxdy.rem,
699		       (long long)Ey, e->cell));
700	}
701
702	if (polygon->num_edges > 0) {
703		struct edge *prev = &polygon->edges[polygon->num_edges-1];
704		/* detect degenerate triangles inserted into tristrips */
705		if (e->dir == -prev->dir &&
706		    e->ytop == prev->ytop &&
707		    e->height_left == prev->height_left &&
708		    e->cell == prev->cell &&
709		    e->x.quo == prev->x.quo &&
710		    e->x.rem == prev->x.rem &&
711		    e->dxdy.quo == prev->dxdy.quo &&
712		    e->dxdy.rem == prev->dxdy.rem) {
713			unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop,
714							  polygon->ymin);
715			polygon->y_buckets[ix] = prev->next;
716			polygon->num_edges--;
717			return;
718		}
719	}
720
721	_polygon_insert_edge_into_its_y_bucket(polygon, e);
722	polygon->num_edges++;
723}
724
725static void
726active_list_reset(struct active_list *active)
727{
728	active->head.height_left = INT_MAX;
729	active->head.x.quo = INT_MIN;
730	active->head.cell = INT_MIN;
731	active->head.dy = 0;
732	active->head.prev = NULL;
733	active->head.next = &active->tail;
734	active->tail.prev = &active->head;
735	active->tail.next = NULL;
736	active->tail.x.quo = INT_MAX;
737	active->tail.cell = INT_MAX;
738	active->tail.height_left = INT_MAX;
739	active->tail.dy = 0;
740}
741
742static struct edge *
743merge_sorted_edges(struct edge *head_a, struct edge *head_b)
744{
745	struct edge *head, **next, *prev;
746	int32_t x;
747
748	if (head_b == NULL)
749		return head_a;
750
751	prev = head_a->prev;
752	next = &head;
753	if (head_a->cell <= head_b->cell) {
754		head = head_a;
755	} else {
756		head = head_b;
757		head_b->prev = prev;
758		goto start_with_b;
759	}
760
761	do {
762		x = head_b->cell;
763		while (head_a != NULL && head_a->cell <= x) {
764			prev = head_a;
765			next = &head_a->next;
766			head_a = head_a->next;
767		}
768
769		head_b->prev = prev;
770		*next = head_b;
771		if (head_a == NULL)
772			return head;
773
774start_with_b:
775		x = head_a->cell;
776		while (head_b != NULL && head_b->cell <= x) {
777			prev = head_b;
778			next = &head_b->next;
779			head_b = head_b->next;
780		}
781
782		head_a->prev = prev;
783		*next = head_a;
784		if (head_b == NULL)
785			return head;
786	} while (1);
787}
788
789static struct edge *
790sort_edges(struct edge  *list,
791	   unsigned int  level,
792	   struct edge **head_out)
793{
794	struct edge *head_other, *remaining;
795	unsigned int i;
796
797	head_other = list->next;
798	if (head_other == NULL) {
799		*head_out = list;
800		return NULL;
801	}
802
803	remaining = head_other->next;
804	if (list->cell <= head_other->cell) {
805		*head_out = list;
806		head_other->next = NULL;
807	} else {
808		*head_out = head_other;
809		head_other->prev = list->prev;
810		head_other->next = list;
811		list->prev = head_other;
812		list->next = NULL;
813	}
814
815	for (i = 0; i < level && remaining; i++) {
816		remaining = sort_edges(remaining, i, &head_other);
817		*head_out = merge_sorted_edges(*head_out, head_other);
818	}
819
820	return remaining;
821}
822
823static struct edge *filter(struct edge *edges)
824{
825	struct edge *e;
826
827	e = edges;
828	while (e->next) {
829		struct edge *n = e->next;
830		if (e->dir == -n->dir &&
831		    e->height_left == n->height_left &&
832		    e->cell == n->cell &&
833		    e->x.quo == n->x.quo &&
834		    e->x.rem == n->x.rem &&
835		    e->dxdy.quo == n->dxdy.quo &&
836		    e->dxdy.rem == n->dxdy.rem) {
837			if (e->prev)
838				e->prev->next = n->next;
839			else
840				edges = n->next;
841			if (n->next)
842				n->next->prev = e->prev;
843			else
844				break;
845
846			e = n->next;
847		} else
848			e = n;
849	}
850
851	return edges;
852}
853
854static struct edge *
855merge_unsorted_edges(struct edge *head, struct edge *unsorted)
856{
857	sort_edges(unsorted, UINT_MAX, &unsorted);
858	return merge_sorted_edges(head, filter(unsorted));
859}
860
861/* Test if the edges on the active list can be safely advanced by a
862 * full row without intersections or any edges ending. */
863inline static int
864can_full_step(struct active_list *active)
865{
866	const struct edge *e;
867	int min_height = INT_MAX;
868
869	assert(active->head.next != &active->tail);
870	for (e = active->head.next; &active->tail != e; e = e->next) {
871		assert(e->height_left > 0);
872
873		if (e->dy != 0)
874			return 0;
875
876		if (e->height_left < min_height) {
877			min_height = e->height_left;
878			if (min_height < SAMPLES_Y)
879				return 0;
880		}
881	}
882
883	return min_height;
884}
885
886inline static void
887merge_edges(struct active_list *active, struct edge *edges)
888{
889	active->head.next = merge_unsorted_edges(active->head.next, edges);
890}
891
892inline static void
893fill_buckets(struct active_list *active,
894	     struct edge *edge,
895	     int ymin,
896	     struct edge **buckets)
897{
898	while (edge) {
899		struct edge *next = edge->next;
900		struct edge **b = &buckets[edge->ytop - ymin];
901		if (*b)
902			(*b)->prev = edge;
903		edge->next = *b;
904		edge->prev = NULL;
905		*b = edge;
906		edge = next;
907	}
908}
909
910inline static void
911nonzero_subrow(struct active_list *active, struct cell_list *coverages)
912{
913	struct edge *edge = active->head.next;
914	int prev_x = INT_MIN;
915	int winding = 0, xstart = edge->cell;
916
917	cell_list_rewind(coverages);
918
919	while (&active->tail != edge) {
920		struct edge *next = edge->next;
921
922		winding += edge->dir;
923		if (0 == winding && edge->next->cell != edge->cell) {
924			cell_list_add_subspan(coverages, xstart, edge->cell);
925			xstart = edge->next->cell;
926		}
927
928		assert(edge->height_left > 0);
929		if (--edge->height_left) {
930			if (edge->dy)
931				edge->cell = edge_advance(edge);
932
933			if (edge->cell < prev_x) {
934				struct edge *pos = edge->prev;
935				pos->next = next;
936				next->prev = pos;
937				do {
938					pos = pos->prev;
939				} while (edge->cell < pos->cell);
940				pos->next->prev = edge;
941				edge->next = pos->next;
942				edge->prev = pos;
943				pos->next = edge;
944			} else
945				prev_x = edge->cell;
946		} else {
947			edge->prev->next = next;
948			next->prev = edge->prev;
949		}
950
951		edge = next;
952	}
953}
954
955static void
956nonzero_row(struct active_list *active, struct cell_list *coverages)
957{
958	struct edge *left = active->head.next;
959
960	while (&active->tail != left) {
961		struct edge *right;
962		int winding = left->dir;
963
964		left->height_left -= SAMPLES_Y;
965		assert(left->height_left >= 0);
966		if (!left->height_left) {
967			left->prev->next = left->next;
968			left->next->prev = left->prev;
969		}
970
971		right = left->next;
972		do {
973			right->height_left -= SAMPLES_Y;
974			assert(right->height_left >= 0);
975			if (!right->height_left) {
976				right->prev->next = right->next;
977				right->next->prev = right->prev;
978			}
979
980			winding += right->dir;
981			if (0 == winding)
982				break;
983
984			right = right->next;
985		} while (1);
986
987		cell_list_add_span(coverages, left->cell, right->cell);
988		left = right->next;
989	}
990}
991
992static void
993tor_fini(struct tor *converter)
994{
995	polygon_fini(converter->polygon);
996	cell_list_fini(converter->coverages);
997}
998
999static bool
1000tor_init(struct tor *converter, const BoxRec *box, int num_edges)
1001{
1002	__DBG(("%s: (%d, %d),(%d, %d) x (%d, %d), num_edges=%d\n",
1003	       __FUNCTION__,
1004	       box->x1, box->y1, box->x2, box->y2,
1005	       SAMPLES_X, SAMPLES_Y,
1006	       num_edges));
1007
1008	converter->extents = *box;
1009
1010	if (!cell_list_init(converter->coverages, box->x1, box->x2))
1011		return false;
1012
1013	active_list_reset(converter->active);
1014	if (!polygon_init(converter->polygon, num_edges,
1015			  (int)box->y1 * SAMPLES_Y, (int)box->y2 * SAMPLES_Y)) {
1016		cell_list_fini(converter->coverages);
1017		return false;
1018	}
1019
1020	return true;
1021}
1022
1023static void
1024tor_add_trapezoid(struct tor *tor, const xTrapezoid *t, int dx, int dy)
1025{
1026	polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy);
1027	polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy);
1028}
1029
1030static void
1031step_edges(struct active_list *active, int count)
1032{
1033	struct edge *edge;
1034
1035	count *= SAMPLES_Y;
1036	for (edge = active->head.next; edge != &active->tail; edge = edge->next) {
1037		edge->height_left -= count;
1038		assert(edge->height_left >= 0);
1039		if (!edge->height_left) {
1040			edge->prev->next = edge->next;
1041			edge->next->prev = edge->prev;
1042		}
1043	}
1044}
1045
1046static void
1047tor_blt_span(struct sna *sna,
1048	     struct sna_composite_spans_op *op,
1049	     pixman_region16_t *clip,
1050	     const BoxRec *box,
1051	     int coverage)
1052{
1053	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1054
1055	op->box(sna, op, box, AREA_TO_FLOAT(coverage));
1056	apply_damage_box(&op->base, box);
1057}
1058
1059static void
1060tor_blt_span__no_damage(struct sna *sna,
1061			struct sna_composite_spans_op *op,
1062			pixman_region16_t *clip,
1063			const BoxRec *box,
1064			int coverage)
1065{
1066	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1067
1068	op->box(sna, op, box, AREA_TO_FLOAT(coverage));
1069}
1070
1071static void
1072tor_blt_span_clipped(struct sna *sna,
1073		     struct sna_composite_spans_op *op,
1074		     pixman_region16_t *clip,
1075		     const BoxRec *box,
1076		     int coverage)
1077{
1078	pixman_region16_t region;
1079	float opacity;
1080
1081	opacity = AREA_TO_FLOAT(coverage);
1082	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, opacity));
1083
1084	pixman_region_init_rects(&region, box, 1);
1085	RegionIntersect(&region, &region, clip);
1086	if (region_num_rects(&region)) {
1087		op->boxes(sna, op,
1088			  region_rects(&region),
1089			  region_num_rects(&region),
1090			  opacity);
1091		apply_damage(&op->base, &region);
1092	}
1093	pixman_region_fini(&region);
1094}
1095
1096static void
1097tor_blt(struct sna *sna,
1098	struct tor *converter,
1099	struct sna_composite_spans_op *op,
1100	pixman_region16_t *clip,
1101	void (*span)(struct sna *sna,
1102		     struct sna_composite_spans_op *op,
1103		     pixman_region16_t *clip,
1104		     const BoxRec *box,
1105		     int coverage),
1106	int y, int height,
1107	int unbounded)
1108{
1109	struct cell_list *cells = converter->coverages;
1110	struct cell *cell;
1111	BoxRec box;
1112	int cover;
1113
1114	box.y1 = y + converter->extents.y1;
1115	box.y2 = box.y1 + height;
1116	assert(box.y2 <= converter->extents.y2);
1117	box.x1 = converter->extents.x1;
1118
1119	/* Form the spans from the coverages and areas. */
1120	cover = cells->head.covered_height*SAMPLES_X*2;
1121	assert(cover >= 0);
1122	for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
1123		int x = cell->x;
1124
1125		assert(x >= converter->extents.x1);
1126		assert(x < converter->extents.x2);
1127		__DBG(("%s: cell=(%d, %d, %d), cover=%d\n", __FUNCTION__,
1128		       cell->x, cell->covered_height, cell->uncovered_area,
1129		       cover));
1130
1131		if (cell->covered_height || cell->uncovered_area) {
1132			box.x2 = x;
1133			if (box.x2 > box.x1 && (unbounded || cover)) {
1134				__DBG(("%s: end span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1135				       box.x1, box.y1,
1136				       box.x2 - box.x1,
1137				       box.y2 - box.y1,
1138				       cover));
1139				span(sna, op, clip, &box, cover);
1140			}
1141			box.x1 = box.x2;
1142			cover += cell->covered_height*SAMPLES_X*2;
1143		}
1144
1145		if (cell->uncovered_area) {
1146			int area = cover - cell->uncovered_area;
1147			box.x2 = x + 1;
1148			if (unbounded || area) {
1149				__DBG(("%s: new span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1150				       box.x1, box.y1,
1151				       box.x2 - box.x1,
1152				       box.y2 - box.y1,
1153				       area));
1154				span(sna, op, clip, &box, area);
1155			}
1156			box.x1 = box.x2;
1157		}
1158	}
1159
1160	box.x2 = converter->extents.x2;
1161	if (box.x2 > box.x1 && (unbounded || cover)) {
1162		__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1163		       box.x1, box.y1,
1164		       box.x2 - box.x1,
1165		       box.y2 - box.y1,
1166		       cover));
1167		span(sna, op, clip, &box, cover);
1168	}
1169}
1170
1171flatten static void
1172tor_render(struct sna *sna,
1173	   struct tor *converter,
1174	   struct sna_composite_spans_op *op,
1175	   pixman_region16_t *clip,
1176	   void (*span)(struct sna *sna,
1177			struct sna_composite_spans_op *op,
1178			pixman_region16_t *clip,
1179			const BoxRec *box,
1180			int coverage),
1181	   int unbounded)
1182{
1183	struct polygon *polygon = converter->polygon;
1184	struct cell_list *coverages = converter->coverages;
1185	struct active_list *active = converter->active;
1186	struct edge *buckets[SAMPLES_Y] = { 0 };
1187	int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
1188
1189	__DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
1190
1191	/* Render each pixel row. */
1192	for (i = 0; i < h; i = j) {
1193		int do_full_step = 0;
1194
1195		j = i + 1;
1196
1197		/* Determine if we can ignore this row or use the full pixel
1198		 * stepper. */
1199		if (polygon->y_buckets[i] == NULL) {
1200			if (active->head.next == &active->tail) {
1201				for (; polygon->y_buckets[j] == NULL; j++)
1202					;
1203				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1204				       __FUNCTION__, i, j));
1205
1206				assert(j <= h);
1207				if (unbounded) {
1208					BoxRec box;
1209
1210					box = converter->extents;
1211					box.y1 += i;
1212					box.y2 = converter->extents.y1 + j;
1213
1214					span(sna, op, clip, &box, 0);
1215				}
1216				continue;
1217			}
1218
1219			do_full_step = can_full_step(active);
1220		}
1221
1222		__DBG(("%s: y=%d, do_full_step=%d, new edges=%d\n",
1223		       __FUNCTION__, i, do_full_step,
1224		       polygon->y_buckets[i] != NULL));
1225		if (do_full_step) {
1226			nonzero_row(active, coverages);
1227
1228			while (polygon->y_buckets[j] == NULL &&
1229			       do_full_step >= 2*SAMPLES_Y) {
1230				do_full_step -= SAMPLES_Y;
1231				j++;
1232			}
1233			assert(j >= i + 1 && j <= h);
1234			if (j != i + 1)
1235				step_edges(active, j - (i + 1));
1236
1237			__DBG(("%s: vertical edges, full step (%d, %d)\n",
1238			       __FUNCTION__,  i, j));
1239		} else {
1240			int suby;
1241
1242			fill_buckets(active, polygon->y_buckets[i], (i+converter->extents.y1)*SAMPLES_Y, buckets);
1243
1244			/* Subsample this row. */
1245			for (suby = 0; suby < SAMPLES_Y; suby++) {
1246				if (buckets[suby]) {
1247					merge_edges(active, buckets[suby]);
1248					buckets[suby] = NULL;
1249				}
1250
1251				nonzero_subrow(active, coverages);
1252			}
1253		}
1254
1255		assert(j > i);
1256		tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
1257		cell_list_reset(coverages);
1258	}
1259}
1260
1261static void
1262inplace_row(struct active_list *active, uint8_t *row, int width)
1263{
1264	struct edge *left = active->head.next;
1265
1266	while (&active->tail != left) {
1267		struct edge *right;
1268		int winding = left->dir;
1269		int lfx, rfx;
1270		int lix, rix;
1271
1272		left->height_left -= SAMPLES_Y;
1273		assert(left->height_left >= 0);
1274		if (!left->height_left) {
1275			left->prev->next = left->next;
1276			left->next->prev = left->prev;
1277		}
1278
1279		right = left->next;
1280		do {
1281			right->height_left -= SAMPLES_Y;
1282			assert(right->height_left >= 0);
1283			if (!right->height_left) {
1284				right->prev->next = right->next;
1285				right->next->prev = right->prev;
1286			}
1287
1288			winding += right->dir;
1289			if (0 == winding && right->cell != right->next->cell)
1290				break;
1291
1292			right = right->next;
1293		} while (1);
1294
1295		if (left->cell < 0) {
1296			lix = lfx = 0;
1297		} else if (left->cell >= width * SAMPLES_X) {
1298			lix = width;
1299			lfx = 0;
1300		} else
1301			SAMPLES_X_TO_INT_FRAC(left->cell, lix, lfx);
1302
1303		if (right->cell < 0) {
1304			rix = rfx = 0;
1305		} else if (right->cell >= width * SAMPLES_X) {
1306			rix = width;
1307			rfx = 0;
1308		} else
1309			SAMPLES_X_TO_INT_FRAC(right->cell, rix, rfx);
1310		if (lix == rix) {
1311			if (rfx != lfx) {
1312				assert(lix < width);
1313				row[lix] += (rfx-lfx) * SAMPLES_Y;
1314			}
1315		} else {
1316			assert(lix < width);
1317			if (lfx == 0)
1318				row[lix] = 0xff;
1319			else
1320				row[lix] += 255 - lfx * SAMPLES_Y;
1321
1322			assert(rix <= width);
1323			if (rfx) {
1324				assert(rix < width);
1325				row[rix] += rfx * SAMPLES_Y;
1326			}
1327
1328			if (rix > ++lix) {
1329				uint8_t *r = row + lix;
1330				rix -= lix;
1331#if 0
1332				if (rix == 1)
1333					*row = 0xff;
1334				else
1335					memset(row, 0xff, rix);
1336#else
1337				if ((uintptr_t)r & 1 && rix) {
1338					*r++ = 0xff;
1339					rix--;
1340				}
1341				if ((uintptr_t)r & 2 && rix >= 2) {
1342					*(uint16_t *)r = 0xffff;
1343					r += 2;
1344					rix -= 2;
1345				}
1346				if ((uintptr_t)r & 4 && rix >= 4) {
1347					*(uint32_t *)r = 0xffffffff;
1348					r += 4;
1349					rix -= 4;
1350				}
1351				while (rix >= 8) {
1352					*(uint64_t *)r = 0xffffffffffffffff;
1353					r += 8;
1354					rix -= 8;
1355				}
1356				if (rix & 4) {
1357					*(uint32_t *)r = 0xffffffff;
1358					r += 4;
1359				}
1360				if (rix & 2) {
1361					*(uint16_t *)r = 0xffff;
1362					r += 2;
1363				}
1364				if (rix & 1)
1365					*r = 0xff;
1366#endif
1367			}
1368		}
1369
1370		left = right->next;
1371	}
1372}
1373
1374inline static void
1375inplace_subrow(struct active_list *active, int8_t *row, int width)
1376{
1377	struct edge *edge = active->head.next;
1378	int prev_x = INT_MIN;
1379
1380	while (&active->tail != edge) {
1381		struct edge *next = edge->next;
1382		int winding = edge->dir;
1383		int lfx, rfx;
1384		int lix, rix;
1385
1386		if (edge->cell < 0) {
1387			lix = lfx = 0;
1388		} else if (edge->cell >= width * SAMPLES_X) {
1389			lix = width;
1390			lfx = 0;
1391		} else
1392			SAMPLES_X_TO_INT_FRAC(edge->cell, lix, lfx);
1393
1394		assert(edge->height_left > 0);
1395		if (--edge->height_left) {
1396			if (edge->dy)
1397				edge->cell = edge_advance(edge);
1398
1399			if (edge->cell < prev_x) {
1400				struct edge *pos = edge->prev;
1401				pos->next = next;
1402				next->prev = pos;
1403				do {
1404					pos = pos->prev;
1405				} while (edge->cell < pos->cell);
1406				pos->next->prev = edge;
1407				edge->next = pos->next;
1408				edge->prev = pos;
1409				pos->next = edge;
1410			} else
1411				prev_x = edge->cell;
1412		} else {
1413			edge->prev->next = next;
1414			next->prev = edge->prev;
1415		}
1416
1417		edge = next;
1418		do {
1419			next = edge->next;
1420			winding += edge->dir;
1421			if (0 == winding && edge->cell != next->cell)
1422				break;
1423
1424			assert(edge->height_left > 0);
1425			if (--edge->height_left) {
1426				if (edge->dy)
1427					edge->cell = edge_advance(edge);
1428
1429				if (edge->cell < prev_x) {
1430					struct edge *pos = edge->prev;
1431					pos->next = next;
1432					next->prev = pos;
1433					do {
1434						pos = pos->prev;
1435					} while (edge->cell < pos->cell);
1436					pos->next->prev = edge;
1437					edge->next = pos->next;
1438					edge->prev = pos;
1439					pos->next = edge;
1440				} else
1441					prev_x = edge->cell;
1442			} else {
1443				edge->prev->next = next;
1444				next->prev = edge->prev;
1445			}
1446
1447			edge = next;
1448		} while (1);
1449
1450		if (edge->cell < 0) {
1451			rix = rfx = 0;
1452		} else if (edge->cell >= width * SAMPLES_X) {
1453			rix = width;
1454			rfx = 0;
1455		} else
1456			SAMPLES_X_TO_INT_FRAC(edge->cell, rix, rfx);
1457
1458		assert(edge->height_left > 0);
1459		if (--edge->height_left) {
1460			if (edge->dy)
1461				edge->cell = edge_advance(edge);
1462
1463			if (edge->cell < prev_x) {
1464				struct edge *pos = edge->prev;
1465				pos->next = next;
1466				next->prev = pos;
1467				do {
1468					pos = pos->prev;
1469				} while (edge->cell < pos->cell);
1470				pos->next->prev = edge;
1471				edge->next = pos->next;
1472				edge->prev = pos;
1473				pos->next = edge;
1474			} else
1475				prev_x = edge->cell;
1476		} else {
1477			edge->prev->next = next;
1478			next->prev = edge->prev;
1479		}
1480
1481		edge = next;
1482
1483		__DBG(("%s: left=%d.%d, right=%d.%d\n", __FUNCTION__,
1484		       lix, lfx, rix, rfx));
1485		if (lix == rix) {
1486			if (rfx != lfx) {
1487				assert(lix < width);
1488				row[lix] += (rfx-lfx);
1489			}
1490		} else {
1491			assert(lix < width);
1492			row[lix] += SAMPLES_X - lfx;
1493
1494			assert(rix <= width);
1495			if (rfx) {
1496				assert(rix < width);
1497				row[rix] += rfx;
1498			}
1499
1500			while (++lix < rix)
1501				row[lix] += SAMPLES_X;
1502		}
1503	}
1504}
1505
1506flatten static void
1507tor_inplace(struct tor *converter, PixmapPtr scratch)
1508{
1509	uint8_t buf[TOR_INPLACE_SIZE];
1510	int i, j, h = converter->extents.y2 - converter->extents.y1;
1511	struct polygon *polygon = converter->polygon;
1512	struct active_list *active = converter->active;
1513	struct edge *buckets[SAMPLES_Y] = { 0 };
1514	uint8_t *row = scratch->devPrivate.ptr;
1515	int stride = scratch->devKind;
1516	int width = scratch->drawable.width;
1517
1518	__DBG(("%s: buf?=%d\n", __FUNCTION__, buf != NULL));
1519	assert(converter->extents.x1 == 0);
1520	assert(scratch->drawable.depth == 8);
1521
1522	row += converter->extents.y1 * stride;
1523
1524	/* Render each pixel row. */
1525	for (i = 0; i < h; i = j) {
1526		int do_full_step = 0;
1527		void *ptr = scratch->usage_hint ? buf : row;
1528
1529		j = i + 1;
1530
1531		/* Determine if we can ignore this row or use the full pixel
1532		 * stepper. */
1533		if (!polygon->y_buckets[i]) {
1534			if (active->head.next == &active->tail) {
1535				for (; !polygon->y_buckets[j]; j++)
1536					;
1537				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1538				       __FUNCTION__, i, j));
1539
1540				memset(row, 0, stride*(j-i));
1541				row += stride*(j-i);
1542				continue;
1543			}
1544
1545			do_full_step = can_full_step(active);
1546		}
1547
1548		__DBG(("%s: y=%d, do_full_step=%d, new edges=%d\n",
1549		       __FUNCTION__, i, do_full_step,
1550		       polygon->y_buckets[i] != NULL));
1551		if (do_full_step) {
1552			memset(ptr, 0, width);
1553			inplace_row(active, ptr, width);
1554			if (row != ptr)
1555				memcpy(row, ptr, width);
1556
1557			while (polygon->y_buckets[j] == NULL &&
1558			       do_full_step >= 2*SAMPLES_Y) {
1559				do_full_step -= SAMPLES_Y;
1560				row += stride;
1561				memcpy(row, ptr, width);
1562				j++;
1563			}
1564			if (j != i + 1)
1565				step_edges(active, j - (i + 1));
1566
1567			__DBG(("%s: vertical edges, full step (%d, %d)\n",
1568			       __FUNCTION__,  i, j));
1569		} else {
1570			int suby;
1571
1572			fill_buckets(active, polygon->y_buckets[i], (i+converter->extents.y1)*SAMPLES_Y, buckets);
1573
1574			/* Subsample this row. */
1575			memset(ptr, 0, width);
1576			for (suby = 0; suby < SAMPLES_Y; suby++) {
1577				if (buckets[suby]) {
1578					merge_edges(active, buckets[suby]);
1579					buckets[suby] = NULL;
1580				}
1581
1582				inplace_subrow(active, ptr, width);
1583			}
1584			if (row != ptr)
1585				memcpy(row, ptr, width);
1586		}
1587
1588		row += stride;
1589	}
1590}
1591
1592static int operator_is_bounded(uint8_t op)
1593{
1594	switch (op) {
1595	case PictOpOver:
1596	case PictOpOutReverse:
1597	case PictOpAdd:
1598		return true;
1599	default:
1600		return false;
1601	}
1602}
1603
1604static span_func_t
1605choose_span(struct sna_composite_spans_op *tmp,
1606	    PicturePtr dst,
1607	    PictFormatPtr maskFormat,
1608	    RegionPtr clip)
1609{
1610	span_func_t span;
1611
1612	assert(!is_mono(dst, maskFormat));
1613	if (clip->data)
1614		span = tor_blt_span_clipped;
1615	else if (tmp->base.damage == NULL)
1616		span = tor_blt_span__no_damage;
1617	else
1618		span = tor_blt_span;
1619
1620	return span;
1621}
1622
1623struct span_thread {
1624	struct sna *sna;
1625	const struct sna_composite_spans_op *op;
1626	const xTrapezoid *traps;
1627	RegionPtr clip;
1628	span_func_t span;
1629	BoxRec extents;
1630	int dx, dy, draw_y;
1631	int ntrap;
1632	bool unbounded;
1633};
1634
1635#define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
1636struct span_thread_boxes {
1637	const struct sna_composite_spans_op *op;
1638	int num_boxes;
1639	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
1640};
1641
1642static void span_thread_add_boxes(struct sna *sna, void *data,
1643				  const BoxRec *box, int count, float alpha)
1644{
1645	struct span_thread_boxes *b = data;
1646
1647	__DBG(("%s: adding %d boxes with alpha=%f\n",
1648	       __FUNCTION__, count, alpha));
1649
1650	assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES);
1651	if (unlikely(b->num_boxes + count > SPAN_THREAD_MAX_BOXES)) {
1652		DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count));
1653		assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
1654		b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
1655		b->num_boxes = 0;
1656	}
1657
1658	do {
1659		b->boxes[b->num_boxes].box = *box++;
1660		b->boxes[b->num_boxes].alpha = alpha;
1661		b->num_boxes++;
1662	} while (--count);
1663	assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
1664}
1665
1666static void
1667span_thread_box(struct sna *sna,
1668		struct sna_composite_spans_op *op,
1669		pixman_region16_t *clip,
1670		const BoxRec *box,
1671		int coverage)
1672{
1673	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1674	span_thread_add_boxes(sna, op, box, 1, AREA_TO_FLOAT(coverage));
1675}
1676
1677static void
1678span_thread_clipped_box(struct sna *sna,
1679			struct sna_composite_spans_op *op,
1680			pixman_region16_t *clip,
1681			const BoxRec *box,
1682			int coverage)
1683{
1684	pixman_region16_t region;
1685
1686	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
1687	       AREA_TO_FLOAT(coverage)));
1688
1689	pixman_region_init_rects(&region, box, 1);
1690	RegionIntersect(&region, &region, clip);
1691	if (region_num_rects(&region)) {
1692		span_thread_add_boxes(sna, op,
1693				      region_rects(&region),
1694				      region_num_rects(&region),
1695				      AREA_TO_FLOAT(coverage));
1696	}
1697	pixman_region_fini(&region);
1698}
1699
1700static span_func_t
1701thread_choose_span(struct sna_composite_spans_op *tmp,
1702		   PicturePtr dst,
1703		   PictFormatPtr maskFormat,
1704		   RegionPtr clip)
1705{
1706	span_func_t span;
1707
1708	if (tmp->base.damage) {
1709		DBG(("%s: damaged -> no thread support\n", __FUNCTION__));
1710		return NULL;
1711	}
1712
1713	assert(!is_mono(dst, maskFormat));
1714	assert(tmp->thread_boxes);
1715	DBG(("%s: clipped? %d\n", __FUNCTION__, clip->data != NULL));
1716	if (clip->data)
1717		span = span_thread_clipped_box;
1718	else
1719		span = span_thread_box;
1720
1721	return span;
1722}
1723
1724static void
1725span_thread(void *arg)
1726{
1727	struct span_thread *thread = arg;
1728	struct span_thread_boxes boxes;
1729	struct tor tor;
1730	const xTrapezoid *t;
1731	int n, y1, y2;
1732
1733	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
1734		return;
1735
1736	boxes.op = thread->op;
1737	boxes.num_boxes = 0;
1738
1739	y1 = thread->extents.y1 - thread->draw_y;
1740	y2 = thread->extents.y2 - thread->draw_y;
1741	for (n = thread->ntrap, t = thread->traps; n--; t++) {
1742		if (pixman_fixed_integer_floor(t->top) >= y2 ||
1743		    pixman_fixed_integer_ceil(t->bottom) <= y1)
1744			continue;
1745
1746		tor_add_trapezoid(&tor, t, thread->dx, thread->dy);
1747	}
1748
1749	tor_render(thread->sna, &tor,
1750		   (struct sna_composite_spans_op *)&boxes, thread->clip,
1751		   thread->span, thread->unbounded);
1752
1753	tor_fini(&tor);
1754
1755	if (boxes.num_boxes) {
1756		DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
1757		assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
1758		thread->op->thread_boxes(thread->sna, thread->op,
1759					 boxes.boxes, boxes.num_boxes);
1760	}
1761}
1762
1763bool
1764precise_trapezoid_span_converter(struct sna *sna,
1765				 CARD8 op, PicturePtr src, PicturePtr dst,
1766				 PictFormatPtr maskFormat, unsigned int flags,
1767				 INT16 src_x, INT16 src_y,
1768				 int ntrap, xTrapezoid *traps)
1769{
1770	struct sna_composite_spans_op tmp;
1771	pixman_region16_t clip;
1772	int16_t dst_x, dst_y;
1773	bool was_clear;
1774	int dx, dy, n;
1775	int num_threads;
1776
1777	if (NO_PRECISE)
1778		return false;
1779
1780	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, flags)) {
1781		DBG(("%s: fallback -- composite spans not supported\n",
1782		     __FUNCTION__));
1783		return false;
1784	}
1785
1786	if (!trapezoids_bounds(ntrap, traps, &clip.extents))
1787		return true;
1788
1789#if 1
1790	if (((clip.extents.y2 - clip.extents.y1) | (clip.extents.x2 - clip.extents.x1)) < 32) {
1791		DBG(("%s: fallback -- traps extents too small %dx%d\n", __FUNCTION__,
1792		     clip.extents.y2 - clip.extents.y1,
1793		     clip.extents.x2 - clip.extents.x1));
1794		return false;
1795	}
1796#endif
1797
1798	DBG(("%s: extents (%d, %d), (%d, %d)\n",
1799	     __FUNCTION__,
1800	     clip.extents.x1, clip.extents.y1,
1801	     clip.extents.x2, clip.extents.y2));
1802
1803	trapezoid_origin(&traps[0].left, &dst_x, &dst_y);
1804
1805	if (!sna_compute_composite_region(&clip,
1806					  src, NULL, dst,
1807					  src_x + clip.extents.x1 - dst_x,
1808					  src_y + clip.extents.y1 - dst_y,
1809					  0, 0,
1810					  clip.extents.x1, clip.extents.y1,
1811					  clip.extents.x2 - clip.extents.x1,
1812					  clip.extents.y2 - clip.extents.y1)) {
1813		DBG(("%s: trapezoids do not intersect drawable clips\n",
1814		     __FUNCTION__)) ;
1815		return true;
1816	}
1817
1818	if (!sna->render.check_composite_spans(sna, op, src, dst,
1819					       clip.extents.x2 - clip.extents.x1,
1820					       clip.extents.y2 - clip.extents.y1,
1821					       flags)) {
1822		DBG(("%s: fallback -- composite spans not supported\n",
1823		     __FUNCTION__));
1824		return false;
1825	}
1826
1827	dx = dst->pDrawable->x;
1828	dy = dst->pDrawable->y;
1829
1830	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
1831	     __FUNCTION__,
1832	     clip.extents.x1, clip.extents.y1,
1833	     clip.extents.x2, clip.extents.y2,
1834	     dx, dy,
1835	     src_x + clip.extents.x1 - dst_x - dx,
1836	     src_y + clip.extents.y1 - dst_y - dy));
1837
1838	was_clear = sna_drawable_is_clear(dst->pDrawable);
1839	switch (op) {
1840	case PictOpAdd:
1841	case PictOpOver:
1842		if (was_clear)
1843			op = PictOpSrc;
1844		break;
1845	case PictOpIn:
1846		if (was_clear)
1847			return true;
1848		break;
1849	}
1850
1851	if (!sna->render.composite_spans(sna, op, src, dst,
1852					 src_x + clip.extents.x1 - dst_x - dx,
1853					 src_y + clip.extents.y1 - dst_y - dy,
1854					 clip.extents.x1,  clip.extents.y1,
1855					 clip.extents.x2 - clip.extents.x1,
1856					 clip.extents.y2 - clip.extents.y1,
1857					 flags, memset(&tmp, 0, sizeof(tmp)))) {
1858		DBG(("%s: fallback -- composite spans render op not supported\n",
1859		     __FUNCTION__));
1860		return false;
1861	}
1862
1863	dx *= SAMPLES_X;
1864	dy *= SAMPLES_Y;
1865
1866	num_threads = 1;
1867	if (!NO_GPU_THREADS &&
1868	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
1869	    tmp.thread_boxes &&
1870	    thread_choose_span(&tmp, dst, maskFormat, &clip))
1871		num_threads = sna_use_threads(clip.extents.x2-clip.extents.x1,
1872					      clip.extents.y2-clip.extents.y1,
1873					      8);
1874	DBG(("%s: using %d threads\n", __FUNCTION__, num_threads));
1875	if (num_threads == 1) {
1876		struct tor tor;
1877
1878		if (!tor_init(&tor, &clip.extents, 2*ntrap))
1879			goto skip;
1880
1881		for (n = 0; n < ntrap; n++) {
1882			if (pixman_fixed_integer_floor(traps[n].top) + dst->pDrawable->y >= clip.extents.y2 ||
1883			    pixman_fixed_integer_ceil(traps[n].bottom) + dst->pDrawable->y <= clip.extents.y1)
1884				continue;
1885
1886			tor_add_trapezoid(&tor, &traps[n], dx, dy);
1887		}
1888
1889		tor_render(sna, &tor, &tmp, &clip,
1890			   choose_span(&tmp, dst, maskFormat, &clip),
1891			   !was_clear && maskFormat && !operator_is_bounded(op));
1892
1893		tor_fini(&tor);
1894	} else {
1895		struct span_thread threads[num_threads];
1896		int y, h;
1897
1898		DBG(("%s: using %d threads for span compositing %dx%d\n",
1899		     __FUNCTION__, num_threads,
1900		     clip.extents.x2 - clip.extents.x1,
1901		     clip.extents.y2 - clip.extents.y1));
1902
1903		threads[0].sna = sna;
1904		threads[0].op = &tmp;
1905		threads[0].traps = traps;
1906		threads[0].ntrap = ntrap;
1907		threads[0].extents = clip.extents;
1908		threads[0].clip = &clip;
1909		threads[0].dx = dx;
1910		threads[0].dy = dy;
1911		threads[0].draw_y = dst->pDrawable->y;
1912		threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
1913		threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
1914
1915		y = clip.extents.y1;
1916		h = clip.extents.y2 - clip.extents.y1;
1917		h = (h + num_threads - 1) / num_threads;
1918		num_threads -= (num_threads-1) * h >= clip.extents.y2 - clip.extents.y1;
1919
1920		for (n = 1; n < num_threads; n++) {
1921			threads[n] = threads[0];
1922			threads[n].extents.y1 = y;
1923			threads[n].extents.y2 = y += h;
1924
1925			sna_threads_run(n, span_thread, &threads[n]);
1926		}
1927
1928		assert(y < threads[0].extents.y2);
1929		threads[0].extents.y1 = y;
1930		span_thread(&threads[0]);
1931
1932		sna_threads_wait();
1933	}
1934skip:
1935	tmp.done(sna, &tmp);
1936
1937	REGION_UNINIT(NULL, &clip);
1938	return true;
1939}
1940
1941static void
1942tor_blt_mask(struct sna *sna,
1943	     struct sna_composite_spans_op *op,
1944	     pixman_region16_t *clip,
1945	     const BoxRec *box,
1946	     int coverage)
1947{
1948	uint8_t *ptr = (uint8_t *)op;
1949	int stride = (intptr_t)clip;
1950	int h, w;
1951
1952	coverage = TO_ALPHA(coverage);
1953	ptr += box->y1 * stride + box->x1;
1954
1955	h = box->y2 - box->y1;
1956	w = box->x2 - box->x1;
1957	if ((w | h) == 1) {
1958		*ptr = coverage;
1959	} else if (w == 1) {
1960		do {
1961			*ptr = coverage;
1962			ptr += stride;
1963		} while (--h);
1964	} else do {
1965		memset(ptr, coverage, w);
1966		ptr += stride;
1967	} while (--h);
1968}
1969
1970struct mask_thread {
1971	PixmapPtr scratch;
1972	const xTrapezoid *traps;
1973	BoxRec extents;
1974	int dx, dy, dst_y;
1975	int ntrap;
1976};
1977
1978static void
1979mask_thread(void *arg)
1980{
1981	struct mask_thread *thread = arg;
1982	struct tor tor;
1983	const xTrapezoid *t;
1984	int n, y1, y2;
1985
1986	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
1987		return;
1988
1989	y1 = thread->extents.y1 + thread->dst_y;
1990	y2 = thread->extents.y2 + thread->dst_y;
1991	for (n = thread->ntrap, t = thread->traps; n--; t++) {
1992		if (pixman_fixed_integer_floor(t->top) >= y2 ||
1993		    pixman_fixed_integer_ceil(t->bottom) <= y1)
1994			continue;
1995
1996		tor_add_trapezoid(&tor, t, thread->dx, thread->dy);
1997	}
1998
1999	if (thread->extents.x2 <= TOR_INPLACE_SIZE) {
2000		tor_inplace(&tor, thread->scratch);
2001	} else {
2002		tor_render(NULL, &tor,
2003			   thread->scratch->devPrivate.ptr,
2004			   (void *)(intptr_t)thread->scratch->devKind,
2005			   tor_blt_mask,
2006			   true);
2007	}
2008
2009	tor_fini(&tor);
2010}
2011
2012bool
2013precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
2014				 PictFormatPtr maskFormat, unsigned flags,
2015				 INT16 src_x, INT16 src_y,
2016				 int ntrap, xTrapezoid *traps)
2017{
2018	ScreenPtr screen = dst->pDrawable->pScreen;
2019	PixmapPtr scratch;
2020	PicturePtr mask;
2021	BoxRec extents;
2022	int num_threads;
2023	int16_t dst_x, dst_y;
2024	int dx, dy;
2025	int error, n;
2026
2027	if (NO_PRECISE)
2028		return false;
2029
2030	if (maskFormat == NULL && ntrap > 1) {
2031		DBG(("%s: individual rasterisation requested\n",
2032		     __FUNCTION__));
2033		do {
2034			/* XXX unwind errors? */
2035			if (!precise_trapezoid_mask_converter(op, src, dst, NULL, flags,
2036							      src_x, src_y, 1, traps++))
2037				return false;
2038		} while (--ntrap);
2039		return true;
2040	}
2041
2042	if (!trapezoids_bounds(ntrap, traps, &extents))
2043		return true;
2044
2045	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
2046	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
2047
2048	if (!sna_compute_composite_extents(&extents,
2049					   src, NULL, dst,
2050					   src_x, src_y,
2051					   0, 0,
2052					   extents.x1, extents.y1,
2053					   extents.x2 - extents.x1,
2054					   extents.y2 - extents.y1))
2055		return true;
2056
2057	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2058	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
2059
2060	extents.y2 -= extents.y1;
2061	extents.x2 -= extents.x1;
2062	extents.x1 -= dst->pDrawable->x;
2063	extents.y1 -= dst->pDrawable->y;
2064	dst_x = extents.x1;
2065	dst_y = extents.y1;
2066	dx = -extents.x1 * SAMPLES_X;
2067	dy = -extents.y1 * SAMPLES_Y;
2068	extents.x1 = extents.y1 = 0;
2069
2070	DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
2071	     __FUNCTION__, extents.x2, extents.y2, dx, dy));
2072	scratch = sna_pixmap_create_upload(screen,
2073					   extents.x2, extents.y2, 8,
2074					   KGEM_BUFFER_WRITE_INPLACE);
2075	if (!scratch)
2076		return true;
2077
2078	DBG(("%s: created buffer %p, stride %d\n",
2079	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
2080
2081	num_threads = 1;
2082	if (!NO_GPU_THREADS &&
2083	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
2084		num_threads = sna_use_threads(extents.x2 - extents.x1,
2085					      extents.y2 - extents.y1,
2086					      4);
2087	if (num_threads == 1) {
2088		struct tor tor;
2089
2090		if (!tor_init(&tor, &extents, 2*ntrap)) {
2091			sna_pixmap_destroy(scratch);
2092			return true;
2093		}
2094
2095		for (n = 0; n < ntrap; n++) {
2096			if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
2097			    pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
2098				continue;
2099
2100			tor_add_trapezoid(&tor, &traps[n], dx, dy);
2101		}
2102
2103		if (extents.x2 <= TOR_INPLACE_SIZE) {
2104			tor_inplace(&tor, scratch);
2105		} else {
2106			tor_render(NULL, &tor,
2107				   scratch->devPrivate.ptr,
2108				   (void *)(intptr_t)scratch->devKind,
2109				   tor_blt_mask,
2110				   true);
2111		}
2112		tor_fini(&tor);
2113	} else {
2114		struct mask_thread threads[num_threads];
2115		int y, h;
2116
2117		DBG(("%s: using %d threads for mask compositing %dx%d\n",
2118		     __FUNCTION__, num_threads,
2119		     extents.x2 - extents.x1,
2120		     extents.y2 - extents.y1));
2121
2122		threads[0].scratch = scratch;
2123		threads[0].traps = traps;
2124		threads[0].ntrap = ntrap;
2125		threads[0].extents = extents;
2126		threads[0].dx = dx;
2127		threads[0].dy = dy;
2128		threads[0].dst_y = dst_y;
2129
2130		y = extents.y1;
2131		h = extents.y2 - extents.y1;
2132		h = (h + num_threads - 1) / num_threads;
2133		num_threads -= (num_threads-1) * h >= extents.y2 - extents.y1;
2134
2135		for (n = 1; n < num_threads; n++) {
2136			threads[n] = threads[0];
2137			threads[n].extents.y1 = y;
2138			threads[n].extents.y2 = y += h;
2139
2140			sna_threads_run(n, mask_thread, &threads[n]);
2141		}
2142
2143		assert(y < threads[0].extents.y2);
2144		threads[0].extents.y1 = y;
2145		mask_thread(&threads[0]);
2146
2147		sna_threads_wait();
2148	}
2149
2150	mask = CreatePicture(0, &scratch->drawable,
2151			     PictureMatchFormat(screen, 8, PICT_a8),
2152			     0, 0, serverClient, &error);
2153	if (mask) {
2154		int16_t x0, y0;
2155
2156		trapezoid_origin(&traps[0].left, &x0, &y0);
2157
2158		CompositePicture(op, src, mask, dst,
2159				 src_x + dst_x - x0,
2160				 src_y + dst_y - y0,
2161				 0, 0,
2162				 dst_x, dst_y,
2163				 extents.x2, extents.y2);
2164		FreePicture(mask, 0);
2165	}
2166	sna_pixmap_destroy(scratch);
2167
2168	return true;
2169}
2170
2171struct inplace {
2172	uint8_t *ptr;
2173	uint32_t stride;
2174	union {
2175		uint8_t opacity;
2176		uint32_t color;
2177	};
2178};
2179
2180static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity)
2181{
2182	coverage = TO_ALPHA(coverage);
2183	return opacity == 255 ? coverage : mul_8_8(coverage, opacity);
2184}
2185
2186static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v)
2187{
2188	uint8_t *ptr = in->ptr;
2189	int h, w;
2190
2191	ptr += box->y1 * in->stride + box->x1;
2192
2193	h = box->y2 - box->y1;
2194	w = box->x2 - box->x1;
2195	if ((w | h) == 1) {
2196		*ptr = v;
2197	} else if (w == 1) {
2198		do {
2199			*ptr = v;
2200			ptr += in->stride;
2201		} while (--h);
2202	} else do {
2203		memset(ptr, v, w);
2204		ptr += in->stride;
2205	} while (--h);
2206}
2207
2208static void
2209tor_blt_src(struct sna *sna,
2210	    struct sna_composite_spans_op *op,
2211	    pixman_region16_t *clip,
2212	    const BoxRec *box,
2213	    int coverage)
2214{
2215	struct inplace *in = (struct inplace *)op;
2216
2217	_tor_blt_src(in, box, coverage_opacity(coverage, in->opacity));
2218}
2219
2220static void
2221tor_blt_src_clipped(struct sna *sna,
2222		    struct sna_composite_spans_op *op,
2223		    pixman_region16_t *clip,
2224		    const BoxRec *box,
2225		    int coverage)
2226{
2227	pixman_region16_t region;
2228	int n;
2229
2230	pixman_region_init_rects(&region, box, 1);
2231	RegionIntersect(&region, &region, clip);
2232	n = region_num_rects(&region);
2233	box = region_rects(&region);
2234	while (n--)
2235		tor_blt_src(sna, op, NULL, box++, coverage);
2236	pixman_region_fini(&region);
2237}
2238
2239static void
2240tor_blt_in(struct sna *sna,
2241	   struct sna_composite_spans_op *op,
2242	   pixman_region16_t *clip,
2243	   const BoxRec *box,
2244	   int coverage)
2245{
2246	struct inplace *in = (struct inplace *)op;
2247	uint8_t *ptr = in->ptr;
2248	int h, w, i;
2249
2250	if (coverage == 0 || in->opacity == 0) {
2251		_tor_blt_src(in, box, 0);
2252		return;
2253	}
2254
2255	coverage = coverage_opacity(coverage, in->opacity);
2256	if (coverage == 0xff)
2257		return;
2258
2259	ptr += box->y1 * in->stride + box->x1;
2260
2261	h = box->y2 - box->y1;
2262	w = box->x2 - box->x1;
2263	do {
2264		for (i = 0; i < w; i++)
2265			ptr[i] = mul_8_8(ptr[i], coverage);
2266		ptr += in->stride;
2267	} while (--h);
2268}
2269
2270static void
2271tor_blt_in_clipped(struct sna *sna,
2272		   struct sna_composite_spans_op *op,
2273		   pixman_region16_t *clip,
2274		   const BoxRec *box,
2275		   int coverage)
2276{
2277	pixman_region16_t region;
2278	int n;
2279
2280	pixman_region_init_rects(&region, box, 1);
2281	RegionIntersect(&region, &region, clip);
2282	n = region_num_rects(&region);
2283	box = region_rects(&region);
2284	while (n--)
2285		tor_blt_in(sna, op, NULL, box++, coverage);
2286	pixman_region_fini(&region);
2287}
2288
2289static void
2290tor_blt_add(struct sna *sna,
2291	    struct sna_composite_spans_op *op,
2292	    pixman_region16_t *clip,
2293	    const BoxRec *box,
2294	    int coverage)
2295{
2296	struct inplace *in = (struct inplace *)op;
2297	uint8_t *ptr = in->ptr;
2298	int h, w, v, i;
2299
2300	if (coverage == 0)
2301		return;
2302
2303	coverage = coverage_opacity(coverage, in->opacity);
2304	if (coverage == 0xff) {
2305		_tor_blt_src(in, box, 0xff);
2306		return;
2307	}
2308
2309	ptr += box->y1 * in->stride + box->x1;
2310
2311	h = box->y2 - box->y1;
2312	w = box->x2 - box->x1;
2313	if ((w | h) == 1) {
2314		v = coverage + *ptr;
2315		*ptr = v >= 255 ? 255 : v;
2316	} else {
2317		do {
2318			for (i = 0; i < w; i++) {
2319				v = coverage + ptr[i];
2320				ptr[i] = v >= 255 ? 255 : v;
2321			}
2322			ptr += in->stride;
2323		} while (--h);
2324	}
2325}
2326
2327static void
2328tor_blt_add_clipped(struct sna *sna,
2329		    struct sna_composite_spans_op *op,
2330		    pixman_region16_t *clip,
2331		    const BoxRec *box,
2332		    int coverage)
2333{
2334	pixman_region16_t region;
2335	int n;
2336
2337	pixman_region_init_rects(&region, box, 1);
2338	RegionIntersect(&region, &region, clip);
2339	n = region_num_rects(&region);
2340	box = region_rects(&region);
2341	while (n--)
2342		tor_blt_add(sna, op, NULL, box++, coverage);
2343	pixman_region_fini(&region);
2344}
2345
2346static void
2347tor_blt_lerp32(struct sna *sna,
2348	       struct sna_composite_spans_op *op,
2349	       pixman_region16_t *clip,
2350	       const BoxRec *box,
2351	       int coverage)
2352{
2353	struct inplace *in = (struct inplace *)op;
2354	uint32_t *ptr = (uint32_t *)in->ptr;
2355	int stride = in->stride / sizeof(uint32_t);
2356	int h, w, i;
2357
2358	if (coverage == 0)
2359		return;
2360
2361	ptr += box->y1 * stride + box->x1;
2362
2363	h = box->y2 - box->y1;
2364	w = box->x2 - box->x1;
2365	if (coverage == GRID_AREA) {
2366		if ((w | h) == 1) {
2367			*ptr = in->color;
2368		} else {
2369			if (w < 16) {
2370				do {
2371					for (i = 0; i < w; i++)
2372						ptr[i] = in->color;
2373					ptr += stride;
2374				} while (--h);
2375			} else {
2376				pixman_fill(ptr, stride, 32,
2377					    0, 0, w, h, in->color);
2378			}
2379		}
2380	} else {
2381		coverage = TO_ALPHA(coverage);
2382		if ((w | h) == 1) {
2383			*ptr = lerp8x4(in->color, coverage, *ptr);
2384		} else if (w == 1) {
2385			do {
2386				*ptr = lerp8x4(in->color, coverage, *ptr);
2387				ptr += stride;
2388			} while (--h);
2389		} else{
2390			do {
2391				for (i = 0; i < w; i++)
2392					ptr[i] = lerp8x4(in->color, coverage, ptr[i]);
2393				ptr += stride;
2394			} while (--h);
2395		}
2396	}
2397}
2398
2399static void
2400tor_blt_lerp32_clipped(struct sna *sna,
2401		       struct sna_composite_spans_op *op,
2402		       pixman_region16_t *clip,
2403		       const BoxRec *box,
2404		       int coverage)
2405{
2406	pixman_region16_t region;
2407	int n;
2408
2409	pixman_region_init_rects(&region, box, 1);
2410	RegionIntersect(&region, &region, clip);
2411	n = region_num_rects(&region);
2412	box = region_rects(&region);
2413	while (n--)
2414		tor_blt_lerp32(sna, op, NULL, box++, coverage);
2415	pixman_region_fini(&region);
2416}
2417
2418struct pixman_inplace {
2419	pixman_image_t *image, *source, *mask;
2420	uint32_t color;
2421	uint32_t *bits;
2422	int dx, dy;
2423	int sx, sy;
2424	uint8_t op;
2425};
2426
2427static void
2428pixmask_span_solid(struct sna *sna,
2429		   struct sna_composite_spans_op *op,
2430		   pixman_region16_t *clip,
2431		   const BoxRec *box,
2432		   int coverage)
2433{
2434	struct pixman_inplace *pi = (struct pixman_inplace *)op;
2435	if (coverage != GRID_AREA)
2436		*pi->bits = mul_4x8_8(pi->color, TO_ALPHA(coverage));
2437	else
2438		*pi->bits = pi->color;
2439	pixman_image_composite(pi->op, pi->source, NULL, pi->image,
2440			       box->x1, box->y1,
2441			       0, 0,
2442			       pi->dx + box->x1, pi->dy + box->y1,
2443			       box->x2 - box->x1, box->y2 - box->y1);
2444}
2445static void
2446pixmask_span_solid__clipped(struct sna *sna,
2447			    struct sna_composite_spans_op *op,
2448			    pixman_region16_t *clip,
2449			    const BoxRec *box,
2450			    int coverage)
2451{
2452	pixman_region16_t region;
2453	int n;
2454
2455	pixman_region_init_rects(&region, box, 1);
2456	RegionIntersect(&region, &region, clip);
2457	n = region_num_rects(&region);
2458	box = region_rects(&region);
2459	while (n--)
2460		pixmask_span_solid(sna, op, NULL, box++, coverage);
2461	pixman_region_fini(&region);
2462}
2463
2464static void
2465pixmask_span(struct sna *sna,
2466	     struct sna_composite_spans_op *op,
2467	     pixman_region16_t *clip,
2468	     const BoxRec *box,
2469	     int coverage)
2470{
2471	struct pixman_inplace *pi = (struct pixman_inplace *)op;
2472	pixman_image_t *mask = NULL;
2473	if (coverage != GRID_AREA) {
2474		*pi->bits = TO_ALPHA(coverage);
2475		mask = pi->mask;
2476	}
2477	pixman_image_composite(pi->op, pi->source, mask, pi->image,
2478			       pi->sx + box->x1, pi->sy + box->y1,
2479			       0, 0,
2480			       pi->dx + box->x1, pi->dy + box->y1,
2481			       box->x2 - box->x1, box->y2 - box->y1);
2482}
2483static void
2484pixmask_span__clipped(struct sna *sna,
2485		      struct sna_composite_spans_op *op,
2486		      pixman_region16_t *clip,
2487		      const BoxRec *box,
2488		      int coverage)
2489{
2490	pixman_region16_t region;
2491	int n;
2492
2493	pixman_region_init_rects(&region, box, 1);
2494	RegionIntersect(&region, &region, clip);
2495	n = region_num_rects(&region);
2496	box = region_rects(&region);
2497	while (n--)
2498		pixmask_span(sna, op, NULL, box++, coverage);
2499	pixman_region_fini(&region);
2500}
2501
2502struct inplace_x8r8g8b8_thread {
2503	xTrapezoid *traps;
2504	PicturePtr dst, src;
2505	BoxRec extents;
2506	int dx, dy;
2507	int ntrap;
2508	bool lerp, is_solid;
2509	uint32_t color;
2510	int16_t src_x, src_y;
2511	uint8_t op;
2512};
2513
2514static void inplace_x8r8g8b8_thread(void *arg)
2515{
2516	struct inplace_x8r8g8b8_thread *thread = arg;
2517	struct tor tor;
2518	span_func_t span;
2519	RegionPtr clip;
2520	int y1, y2, n;
2521
2522	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2523		return;
2524
2525	y1 = thread->extents.y1 - thread->dst->pDrawable->y;
2526	y2 = thread->extents.y2 - thread->dst->pDrawable->y;
2527	for (n = 0; n < thread->ntrap; n++) {
2528		if (pixman_fixed_to_int(thread->traps[n].top) >= y2 ||
2529		    pixman_fixed_to_int(thread->traps[n].bottom) < y1)
2530			continue;
2531
2532		tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
2533	}
2534
2535	clip = thread->dst->pCompositeClip;
2536	if (thread->lerp) {
2537		struct inplace inplace;
2538		int16_t dst_x, dst_y;
2539		PixmapPtr pixmap;
2540
2541		pixmap = get_drawable_pixmap(thread->dst->pDrawable);
2542
2543		inplace.ptr = pixmap->devPrivate.ptr;
2544		if (get_drawable_deltas(thread->dst->pDrawable, pixmap, &dst_x, &dst_y))
2545			inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
2546		inplace.stride = pixmap->devKind;
2547		inplace.color = thread->color;
2548
2549		if (clip->data)
2550			span = tor_blt_lerp32_clipped;
2551		else
2552			span = tor_blt_lerp32;
2553
2554		tor_render(NULL, &tor, (void*)&inplace, clip, span, false);
2555	} else if (thread->is_solid) {
2556		struct pixman_inplace pi;
2557
2558		pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
2559		pi.op = thread->op;
2560		pi.color = thread->color;
2561
2562		pi.bits = (uint32_t *)&pi.sx;
2563		pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
2564						     1, 1, pi.bits, 0);
2565		pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
2566
2567		if (clip->data)
2568			span = pixmask_span_solid__clipped;
2569		else
2570			span = pixmask_span_solid;
2571
2572		tor_render(NULL, &tor, (void*)&pi, clip, span, false);
2573
2574		pixman_image_unref(pi.source);
2575		pixman_image_unref(pi.image);
2576	} else {
2577		struct pixman_inplace pi;
2578		int16_t x0, y0;
2579
2580		trapezoid_origin(&thread->traps[0].left, &x0, &y0);
2581
2582		pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
2583		pi.source = image_from_pict(thread->src, false, &pi.sx, &pi.sy);
2584		pi.sx += thread->src_x - x0;
2585		pi.sy += thread->src_y - y0;
2586		pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
2587		pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
2588		pi.bits = pixman_image_get_data(pi.mask);
2589		pi.op = thread->op;
2590
2591		if (clip->data)
2592			span = pixmask_span__clipped;
2593		else
2594			span = pixmask_span;
2595
2596		tor_render(NULL, &tor, (void*)&pi, clip, span, false);
2597
2598		pixman_image_unref(pi.mask);
2599		pixman_image_unref(pi.source);
2600		pixman_image_unref(pi.image);
2601	}
2602
2603	tor_fini(&tor);
2604}
2605
2606static bool
2607trapezoid_span_inplace__x8r8g8b8(CARD8 op,
2608				 PicturePtr dst,
2609				 PicturePtr src, int16_t src_x, int16_t src_y,
2610				 PictFormatPtr maskFormat, unsigned flags,
2611				 int ntrap, xTrapezoid *traps)
2612{
2613	uint32_t color;
2614	bool lerp, is_solid;
2615	RegionRec region;
2616	int dx, dy;
2617	int num_threads, n;
2618
2619	lerp = false;
2620	is_solid = sna_picture_is_solid(src, &color);
2621	if (is_solid) {
2622		if (op == PictOpOver && (color >> 24) == 0xff)
2623			op = PictOpSrc;
2624		if (op == PictOpOver && sna_drawable_is_clear(dst->pDrawable))
2625			op = PictOpSrc;
2626		lerp = op == PictOpSrc;
2627	}
2628	if (!lerp) {
2629		switch (op) {
2630		case PictOpOver:
2631		case PictOpAdd:
2632		case PictOpOutReverse:
2633			break;
2634		case PictOpSrc:
2635			if (!sna_drawable_is_clear(dst->pDrawable))
2636				return false;
2637			break;
2638		default:
2639			return false;
2640		}
2641	}
2642
2643	if (maskFormat == NULL && ntrap > 1) {
2644		DBG(("%s: individual rasterisation requested\n",
2645		     __FUNCTION__));
2646		do {
2647			/* XXX unwind errors? */
2648			if (!trapezoid_span_inplace__x8r8g8b8(op, dst,
2649							      src, src_x, src_y,
2650							      NULL, flags,
2651							      1, traps++))
2652				return false;
2653		} while (--ntrap);
2654		return true;
2655	}
2656
2657	if (!trapezoids_bounds(ntrap, traps, &region.extents))
2658		return true;
2659
2660	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2661	     __FUNCTION__,
2662	     region.extents.x1, region.extents.y1,
2663	     region.extents.x2, region.extents.y2));
2664
2665	if (!sna_compute_composite_extents(&region.extents,
2666					   src, NULL, dst,
2667					   src_x, src_y,
2668					   0, 0,
2669					   region.extents.x1, region.extents.y1,
2670					   region.extents.x2 - region.extents.x1,
2671					   region.extents.y2 - region.extents.y1))
2672		return true;
2673
2674	DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
2675	     __FUNCTION__,
2676	     region.extents.x1, region.extents.y1,
2677	     region.extents.x2, region.extents.y2));
2678
2679	region.data = NULL;
2680	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
2681					    MOVE_WRITE | MOVE_READ))
2682		return true;
2683
2684	if (!is_solid && src->pDrawable) {
2685		if (!sna_drawable_move_to_cpu(src->pDrawable,
2686					      MOVE_READ))
2687			return true;
2688
2689		if (src->alphaMap &&
2690		    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
2691					      MOVE_READ))
2692			return true;
2693	}
2694
2695	dx = dst->pDrawable->x * SAMPLES_X;
2696	dy = dst->pDrawable->y * SAMPLES_Y;
2697
2698	num_threads = 1;
2699	if (!NO_GPU_THREADS &&
2700	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
2701	    (lerp || is_solid))
2702		num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
2703					      region.extents.y2 - region.extents.y1,
2704					      4);
2705
2706	DBG(("%s: %dx%d, format=%x, op=%d, lerp?=%d, num_threads=%d\n",
2707	     __FUNCTION__,
2708	     region.extents.x2 - region.extents.x1,
2709	     region.extents.y2 - region.extents.y1,
2710	     dst->format, op, lerp, num_threads));
2711
2712	if (num_threads == 1) {
2713		struct tor tor;
2714		span_func_t span;
2715
2716		if (!tor_init(&tor, &region.extents, 2*ntrap))
2717			return true;
2718
2719		for (n = 0; n < ntrap; n++) {
2720			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
2721			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
2722				continue;
2723
2724			tor_add_trapezoid(&tor, &traps[n], dx, dy);
2725		}
2726
2727		if (lerp) {
2728			struct inplace inplace;
2729			PixmapPtr pixmap;
2730			int16_t dst_x, dst_y;
2731
2732			pixmap = get_drawable_pixmap(dst->pDrawable);
2733
2734			inplace.ptr = pixmap->devPrivate.ptr;
2735			if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
2736				inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
2737			inplace.stride = pixmap->devKind;
2738			inplace.color = color;
2739
2740			if (dst->pCompositeClip->data)
2741				span = tor_blt_lerp32_clipped;
2742			else
2743				span = tor_blt_lerp32;
2744
2745			DBG(("%s: render inplace op=%d, color=%08x\n",
2746			     __FUNCTION__, op, color));
2747
2748			if (sigtrap_get() == 0) {
2749				tor_render(NULL, &tor, (void*)&inplace,
2750					   dst->pCompositeClip, span, false);
2751				sigtrap_put();
2752			}
2753		} else if (is_solid) {
2754			struct pixman_inplace pi;
2755
2756			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
2757			pi.op = op;
2758			pi.color = color;
2759
2760			pi.bits = (uint32_t *)&pi.sx;
2761			pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
2762							     1, 1, pi.bits, 0);
2763			pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
2764
2765			if (dst->pCompositeClip->data)
2766				span = pixmask_span_solid__clipped;
2767			else
2768				span = pixmask_span_solid;
2769
2770			if (sigtrap_get() == 0) {
2771				tor_render(NULL, &tor, (void*)&pi,
2772					   dst->pCompositeClip, span,
2773					   false);
2774				sigtrap_put();
2775			}
2776
2777			pixman_image_unref(pi.source);
2778			pixman_image_unref(pi.image);
2779		} else {
2780			struct pixman_inplace pi;
2781			int16_t x0, y0;
2782
2783			trapezoid_origin(&traps[0].left, &x0, &y0);
2784
2785			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
2786			pi.source = image_from_pict(src, false, &pi.sx, &pi.sy);
2787			pi.sx += src_x - x0;
2788			pi.sy += src_y - y0;
2789			pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
2790			pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
2791			pi.bits = pixman_image_get_data(pi.mask);
2792			pi.op = op;
2793
2794			if (dst->pCompositeClip->data)
2795				span = pixmask_span__clipped;
2796			else
2797				span = pixmask_span;
2798
2799			if (sigtrap_get() == 0) {
2800				tor_render(NULL, &tor, (void*)&pi,
2801					   dst->pCompositeClip, span,
2802					   false);
2803				sigtrap_put();
2804			}
2805
2806			pixman_image_unref(pi.mask);
2807			pixman_image_unref(pi.source);
2808			pixman_image_unref(pi.image);
2809		}
2810
2811		tor_fini(&tor);
2812	} else {
2813		struct inplace_x8r8g8b8_thread threads[num_threads];
2814		int y, h;
2815
2816		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
2817		     __FUNCTION__, num_threads,
2818		     region.extents.x2 - region.extents.x1,
2819		     region.extents.y2 - region.extents.y1));
2820
2821		threads[0].traps = traps;
2822		threads[0].ntrap = ntrap;
2823		threads[0].extents = region.extents;
2824		threads[0].lerp = lerp;
2825		threads[0].is_solid = is_solid;
2826		threads[0].color = color;
2827		threads[0].dx = dx;
2828		threads[0].dy = dy;
2829		threads[0].dst = dst;
2830		threads[0].src = src;
2831		threads[0].op = op;
2832		threads[0].src_x = src_x;
2833		threads[0].src_y = src_y;
2834
2835		y = region.extents.y1;
2836		h = region.extents.y2 - region.extents.y1;
2837		h = (h + num_threads - 1) / num_threads;
2838		num_threads -= (num_threads-1) * h >= region.extents.y2 - region.extents.y1;
2839
2840		if (sigtrap_get() == 0) {
2841			for (n = 1; n < num_threads; n++) {
2842				threads[n] = threads[0];
2843				threads[n].extents.y1 = y;
2844				threads[n].extents.y2 = y += h;
2845
2846				sna_threads_run(n, inplace_x8r8g8b8_thread, &threads[n]);
2847			}
2848
2849			assert(y < threads[0].extents.y2);
2850			threads[0].extents.y1 = y;
2851			inplace_x8r8g8b8_thread(&threads[0]);
2852
2853			sna_threads_wait();
2854			sigtrap_put();
2855		} else
2856			sna_threads_kill(); /* leaks thread allocations */
2857	}
2858
2859	return true;
2860}
2861
2862struct inplace_thread {
2863	xTrapezoid *traps;
2864	RegionPtr clip;
2865	span_func_t span;
2866	struct inplace inplace;
2867	BoxRec extents;
2868	int dx, dy;
2869	int draw_x, draw_y;
2870	bool unbounded;
2871	int ntrap;
2872};
2873
2874static void inplace_thread(void *arg)
2875{
2876	struct inplace_thread *thread = arg;
2877	struct tor tor;
2878	int n;
2879
2880	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2881		return;
2882
2883	for (n = 0; n < thread->ntrap; n++) {
2884		if (pixman_fixed_to_int(thread->traps[n].top) >= thread->extents.y2 - thread->draw_y ||
2885		    pixman_fixed_to_int(thread->traps[n].bottom) < thread->extents.y1 - thread->draw_y)
2886			continue;
2887
2888		tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
2889	}
2890
2891	tor_render(NULL, &tor, (void*)&thread->inplace,
2892		   thread->clip, thread->span, thread->unbounded);
2893
2894	tor_fini(&tor);
2895}
2896
2897bool
2898precise_trapezoid_span_inplace(struct sna *sna,
2899			       CARD8 op, PicturePtr src, PicturePtr dst,
2900			       PictFormatPtr maskFormat, unsigned flags,
2901			       INT16 src_x, INT16 src_y,
2902			       int ntrap, xTrapezoid *traps,
2903			       bool fallback)
2904{
2905	struct inplace inplace;
2906	span_func_t span;
2907	PixmapPtr pixmap;
2908	struct sna_pixmap *priv;
2909	RegionRec region;
2910	uint32_t color;
2911	bool unbounded;
2912	int16_t dst_x, dst_y;
2913	int dx, dy;
2914	int num_threads, n;
2915
2916	if (NO_PRECISE)
2917		return false;
2918
2919	if (dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8)
2920		return trapezoid_span_inplace__x8r8g8b8(op, dst,
2921							src, src_x, src_y,
2922							maskFormat, flags,
2923							ntrap, traps);
2924
2925	if (!sna_picture_is_solid(src, &color)) {
2926		DBG(("%s: fallback -- can not perform operation in place, requires solid source\n",
2927		     __FUNCTION__));
2928		return false;
2929	}
2930
2931	if (dst->format != PICT_a8) {
2932		DBG(("%s: fallback -- can not perform operation in place, format=%x\n",
2933		     __FUNCTION__, dst->format));
2934		return false;
2935	}
2936
2937	pixmap = get_drawable_pixmap(dst->pDrawable);
2938
2939	unbounded = false;
2940	priv = sna_pixmap(pixmap);
2941	if (priv) {
2942		switch (op) {
2943		case PictOpAdd:
2944			if (priv->clear && priv->clear_color == 0) {
2945				unbounded = true;
2946				op = PictOpSrc;
2947			}
2948			if ((color >> 24) == 0)
2949				return true;
2950			break;
2951		case PictOpIn:
2952			if (priv->clear && priv->clear_color == 0)
2953				return true;
2954			if (priv->clear && priv->clear_color == 0xff)
2955				op = PictOpSrc;
2956			unbounded = true;
2957			break;
2958		case PictOpSrc:
2959			unbounded = true;
2960			break;
2961		default:
2962			DBG(("%s: fallback -- can not perform op [%d] in place\n",
2963			     __FUNCTION__, op));
2964			return false;
2965		}
2966	} else {
2967		switch (op) {
2968		case PictOpAdd:
2969			if ((color >> 24) == 0)
2970				return true;
2971			break;
2972		case PictOpIn:
2973		case PictOpSrc:
2974			unbounded = true;
2975			break;
2976		default:
2977			DBG(("%s: fallback -- can not perform op [%d] in place\n",
2978			     __FUNCTION__, op));
2979			return false;
2980		}
2981	}
2982
2983	DBG(("%s: format=%x, op=%d, color=%x\n",
2984	     __FUNCTION__, dst->format, op, color));
2985
2986	if (maskFormat == NULL && ntrap > 1) {
2987		DBG(("%s: individual rasterisation requested\n",
2988		     __FUNCTION__));
2989		do {
2990			/* XXX unwind errors? */
2991			if (!precise_trapezoid_span_inplace(sna, op, src, dst, NULL, flags,
2992							    src_x, src_y, 1, traps++,
2993							    fallback))
2994				return false;
2995		} while (--ntrap);
2996		return true;
2997	}
2998
2999	if (!trapezoids_bounds(ntrap, traps, &region.extents))
3000		return true;
3001
3002	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3003	     __FUNCTION__,
3004	     region.extents.x1, region.extents.y1,
3005	     region.extents.x2, region.extents.y2));
3006
3007	if (!sna_compute_composite_extents(&region.extents,
3008					   NULL, NULL, dst,
3009					   0, 0,
3010					   0, 0,
3011					   region.extents.x1, region.extents.y1,
3012					   region.extents.x2 - region.extents.x1,
3013					   region.extents.y2 - region.extents.y1))
3014		return true;
3015
3016	DBG(("%s: clipped extents (%d, %d), (%d, %d) [complex clip? %d]\n",
3017	     __FUNCTION__,
3018	     region.extents.x1, region.extents.y1,
3019	     region.extents.x2, region.extents.y2,
3020	     dst->pCompositeClip->data != NULL));
3021
3022	if (op == PictOpSrc) {
3023		if (dst->pCompositeClip->data)
3024			span = tor_blt_src_clipped;
3025		else
3026			span = tor_blt_src;
3027	} else if (op == PictOpIn) {
3028		if (dst->pCompositeClip->data)
3029			span = tor_blt_in_clipped;
3030		else
3031			span = tor_blt_in;
3032	} else {
3033		assert(op == PictOpAdd);
3034		if (dst->pCompositeClip->data)
3035			span = tor_blt_add_clipped;
3036		else
3037			span = tor_blt_add;
3038	}
3039
3040	DBG(("%s: move-to-cpu(dst)\n", __FUNCTION__));
3041	region.data = NULL;
3042	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
3043					     op == PictOpSrc ? MOVE_WRITE | MOVE_INPLACE_HINT : MOVE_WRITE | MOVE_READ))
3044		return true;
3045
3046	dx = dst->pDrawable->x * SAMPLES_X;
3047	dy = dst->pDrawable->y * SAMPLES_Y;
3048
3049	inplace.ptr = pixmap->devPrivate.ptr;
3050	if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
3051		inplace.ptr += dst_y * pixmap->devKind + dst_x;
3052	inplace.stride = pixmap->devKind;
3053	inplace.opacity = color >> 24;
3054
3055	num_threads = 1;
3056	if (!NO_GPU_THREADS &&
3057	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
3058		num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
3059					      region.extents.y2 - region.extents.y1,
3060					      4);
3061	if (num_threads == 1) {
3062		struct tor tor;
3063
3064		if (!tor_init(&tor, &region.extents, 2*ntrap))
3065			return true;
3066
3067		for (n = 0; n < ntrap; n++) {
3068
3069			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
3070			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
3071				continue;
3072
3073			tor_add_trapezoid(&tor, &traps[n], dx, dy);
3074		}
3075
3076		if (sigtrap_get() == 0) {
3077			tor_render(NULL, &tor, (void*)&inplace,
3078				   dst->pCompositeClip, span, unbounded);
3079			sigtrap_put();
3080		}
3081
3082		tor_fini(&tor);
3083	} else {
3084		struct inplace_thread threads[num_threads];
3085		int y, h;
3086
3087		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
3088		     __FUNCTION__, num_threads,
3089		     region.extents.x2 - region.extents.x1,
3090		     region.extents.y2 - region.extents.y1));
3091
3092		threads[0].traps = traps;
3093		threads[0].ntrap = ntrap;
3094		threads[0].inplace = inplace;
3095		threads[0].extents = region.extents;
3096		threads[0].clip = dst->pCompositeClip;
3097		threads[0].span = span;
3098		threads[0].unbounded = unbounded;
3099		threads[0].dx = dx;
3100		threads[0].dy = dy;
3101		threads[0].draw_x = dst->pDrawable->x;
3102		threads[0].draw_y = dst->pDrawable->y;
3103
3104		y = region.extents.y1;
3105		h = region.extents.y2 - region.extents.y1;
3106		h = (h + num_threads - 1) / num_threads;
3107		num_threads -= (num_threads-1) * h >= region.extents.y2 - region.extents.y1;
3108
3109		if (sigtrap_get() == 0) {
3110			for (n = 1; n < num_threads; n++) {
3111				threads[n] = threads[0];
3112				threads[n].extents.y1 = y;
3113				threads[n].extents.y2 = y += h;
3114
3115				sna_threads_run(n, inplace_thread, &threads[n]);
3116			}
3117
3118			assert(y < threads[0].extents.y2);
3119			threads[0].extents.y1 = y;
3120			inplace_thread(&threads[0]);
3121
3122			sna_threads_wait();
3123			sigtrap_put();
3124		} else
3125			sna_threads_kill(); /* leaks thread allocations */
3126	}
3127
3128	return true;
3129}
3130
3131bool
3132precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
3133				PictFormatPtr maskFormat, unsigned flags,
3134				INT16 src_x, INT16 src_y,
3135				int ntrap, xTrapezoid *traps)
3136{
3137	ScreenPtr screen = dst->pDrawable->pScreen;
3138	PixmapPtr scratch;
3139	PicturePtr mask;
3140	BoxRec extents;
3141	int16_t dst_x, dst_y;
3142	int dx, dy, num_threads;
3143	int error, n;
3144
3145	if (NO_PRECISE)
3146		return false;
3147
3148	if (maskFormat == NULL && ntrap > 1) {
3149		DBG(("%s: individual rasterisation requested\n",
3150		     __FUNCTION__));
3151		do {
3152			/* XXX unwind errors? */
3153			if (!precise_trapezoid_span_fallback(op, src, dst, NULL, flags,
3154							     src_x, src_y, 1, traps++))
3155				return false;
3156		} while (--ntrap);
3157		return true;
3158	}
3159
3160	if (!trapezoids_bounds(ntrap, traps, &extents))
3161		return true;
3162
3163	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
3164	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
3165
3166	if (!sna_compute_composite_extents(&extents,
3167					   src, NULL, dst,
3168					   src_x, src_y,
3169					   0, 0,
3170					   extents.x1, extents.y1,
3171					   extents.x2 - extents.x1,
3172					   extents.y2 - extents.y1))
3173		return true;
3174
3175	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3176	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3177
3178	extents.y2 -= extents.y1;
3179	extents.x2 -= extents.x1;
3180	extents.x1 -= dst->pDrawable->x;
3181	extents.y1 -= dst->pDrawable->y;
3182	dst_x = extents.x1;
3183	dst_y = extents.y1;
3184	dx = -extents.x1 * SAMPLES_X;
3185	dy = -extents.y1 * SAMPLES_Y;
3186	extents.x1 = extents.y1 = 0;
3187
3188	DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
3189	     __FUNCTION__, extents.x2, extents.y2, dx, dy));
3190	scratch = sna_pixmap_create_unattached(screen,
3191					       extents.x2, extents.y2, 8);
3192	if (!scratch)
3193		return true;
3194
3195	DBG(("%s: created buffer %p, stride %d\n",
3196	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
3197
3198	num_threads = 1;
3199	if (!NO_GPU_THREADS &&
3200	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
3201		num_threads = sna_use_threads(extents.x2 - extents.x1,
3202					      extents.y2 - extents.y1,
3203					      4);
3204	if (num_threads == 1) {
3205		struct tor tor;
3206
3207		if (!tor_init(&tor, &extents, 2*ntrap)) {
3208			sna_pixmap_destroy(scratch);
3209			return true;
3210		}
3211
3212		for (n = 0; n < ntrap; n++) {
3213			if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
3214			    pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
3215				continue;
3216
3217			tor_add_trapezoid(&tor, &traps[n], dx, dy);
3218		}
3219
3220		if (extents.x2 <= TOR_INPLACE_SIZE) {
3221			tor_inplace(&tor, scratch);
3222		} else {
3223			tor_render(NULL, &tor,
3224				   scratch->devPrivate.ptr,
3225				   (void *)(intptr_t)scratch->devKind,
3226				   tor_blt_mask,
3227				   true);
3228		}
3229		tor_fini(&tor);
3230	} else {
3231		struct mask_thread threads[num_threads];
3232		int y, h;
3233
3234		DBG(("%s: using %d threads for mask compositing %dx%d\n",
3235		     __FUNCTION__, num_threads,
3236		     extents.x2 - extents.x1,
3237		     extents.y2 - extents.y1));
3238
3239		threads[0].scratch = scratch;
3240		threads[0].traps = traps;
3241		threads[0].ntrap = ntrap;
3242		threads[0].extents = extents;
3243		threads[0].dx = dx;
3244		threads[0].dy = dy;
3245		threads[0].dst_y = dst_y;
3246
3247		y = extents.y1;
3248		h = extents.y2 - extents.y1;
3249		h = (h + num_threads - 1) / num_threads;
3250		num_threads -= (num_threads-1) * h >= extents.y2 - extents.y1;
3251
3252		for (n = 1; n < num_threads; n++) {
3253			threads[n] = threads[0];
3254			threads[n].extents.y1 = y;
3255			threads[n].extents.y2 = y += h;
3256
3257			sna_threads_run(n, mask_thread, &threads[n]);
3258		}
3259
3260		assert(y < threads[0].extents.y2);
3261		threads[0].extents.y1 = y;
3262		mask_thread(&threads[0]);
3263
3264		sna_threads_wait();
3265	}
3266
3267	mask = CreatePicture(0, &scratch->drawable,
3268			     PictureMatchFormat(screen, 8, PICT_a8),
3269			     0, 0, serverClient, &error);
3270	if (mask) {
3271		RegionRec region;
3272		int16_t x0, y0;
3273
3274		region.extents.x1 = dst_x + dst->pDrawable->x;
3275		region.extents.y1 = dst_y + dst->pDrawable->y;
3276		region.extents.x2 = region.extents.x1 + extents.x2;
3277		region.extents.y2 = region.extents.y1 + extents.y2;
3278		region.data = NULL;
3279
3280		trapezoid_origin(&traps[0].left, &x0, &y0);
3281
3282		DBG(("%s: fbComposite()\n", __FUNCTION__));
3283		sna_composite_fb(op, src, mask, dst, &region,
3284				 src_x + dst_x - x0, src_y + dst_y - y0,
3285				 0, 0,
3286				 dst_x, dst_y,
3287				 extents.x2, extents.y2);
3288
3289		FreePicture(mask, 0);
3290	}
3291	sna_pixmap_destroy(scratch);
3292
3293	return true;
3294}
3295
3296struct tristrip_thread {
3297	struct sna *sna;
3298	const struct sna_composite_spans_op *op;
3299	const xPointFixed *points;
3300	RegionPtr clip;
3301	span_func_t span;
3302	BoxRec extents;
3303	int dx, dy, draw_y;
3304	int count;
3305	bool unbounded;
3306};
3307
3308static void
3309tristrip_thread(void *arg)
3310{
3311	struct tristrip_thread *thread = arg;
3312	struct span_thread_boxes boxes;
3313	struct tor tor;
3314	int n, cw, ccw;
3315
3316	if (!tor_init(&tor, &thread->extents, 2*thread->count))
3317		return;
3318
3319	boxes.op = thread->op;
3320	boxes.num_boxes = 0;
3321
3322	cw = 0; ccw = 1;
3323	polygon_add_line(tor.polygon,
3324			 &thread->points[ccw], &thread->points[cw],
3325			 thread->dx, thread->dy);
3326	n = 2;
3327	do {
3328		polygon_add_line(tor.polygon,
3329				 &thread->points[cw], &thread->points[n],
3330				 thread->dx, thread->dy);
3331		cw = n;
3332		if (++n == thread->count)
3333			break;
3334
3335		polygon_add_line(tor.polygon,
3336				 &thread->points[n], &thread->points[ccw],
3337				 thread->dx, thread->dy);
3338		ccw = n;
3339		if (++n == thread->count)
3340			break;
3341	} while (1);
3342	polygon_add_line(tor.polygon,
3343			 &thread->points[cw], &thread->points[ccw],
3344			 thread->dx, thread->dy);
3345	assert(tor.polygon->num_edges <= 2*thread->count);
3346
3347	tor_render(thread->sna, &tor,
3348		   (struct sna_composite_spans_op *)&boxes, thread->clip,
3349		   thread->span, thread->unbounded);
3350
3351	tor_fini(&tor);
3352
3353	if (boxes.num_boxes) {
3354		DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
3355		assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
3356		thread->op->thread_boxes(thread->sna, thread->op,
3357					 boxes.boxes, boxes.num_boxes);
3358	}
3359}
3360
3361bool
3362precise_tristrip_span_converter(struct sna *sna,
3363				CARD8 op, PicturePtr src, PicturePtr dst,
3364				PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3365				int count, xPointFixed *points)
3366{
3367	struct sna_composite_spans_op tmp;
3368	BoxRec extents;
3369	pixman_region16_t clip;
3370	int16_t dst_x, dst_y;
3371	int dx, dy, num_threads;
3372	bool was_clear;
3373
3374	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
3375		DBG(("%s: fallback -- composite spans not supported\n",
3376		     __FUNCTION__));
3377		return false;
3378	}
3379
3380	dst_x = pixman_fixed_to_int(points[0].x);
3381	dst_y = pixman_fixed_to_int(points[0].y);
3382
3383	miPointFixedBounds(count, points, &extents);
3384	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3385	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3386
3387	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3388		return true;
3389
3390#if 0
3391	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
3392		DBG(("%s: fallback -- traps extents too small %dx%d\n",
3393		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
3394		return false;
3395	}
3396#endif
3397
3398	if (!sna_compute_composite_region(&clip,
3399					  src, NULL, dst,
3400					  src_x + extents.x1 - dst_x,
3401					  src_y + extents.y1 - dst_y,
3402					  0, 0,
3403					  extents.x1, extents.y1,
3404					  extents.x2 - extents.x1,
3405					  extents.y2 - extents.y1)) {
3406		DBG(("%s: triangles do not intersect drawable clips\n",
3407		     __FUNCTION__)) ;
3408		return true;
3409	}
3410
3411	if (!sna->render.check_composite_spans(sna, op, src, dst,
3412					       clip.extents.x2 - clip.extents.x1,
3413					       clip.extents.y2 - clip.extents.y1,
3414					       0)) {
3415		DBG(("%s: fallback -- composite spans not supported\n",
3416		     __FUNCTION__));
3417		return false;
3418	}
3419
3420	extents = *RegionExtents(&clip);
3421	dx = dst->pDrawable->x;
3422	dy = dst->pDrawable->y;
3423
3424	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
3425	     __FUNCTION__,
3426	     extents.x1, extents.y1,
3427	     extents.x2, extents.y2,
3428	     dx, dy,
3429	     src_x + extents.x1 - dst_x - dx,
3430	     src_y + extents.y1 - dst_y - dy));
3431
3432	was_clear = sna_drawable_is_clear(dst->pDrawable);
3433
3434	memset(&tmp, 0, sizeof(tmp));
3435	if (!sna->render.composite_spans(sna, op, src, dst,
3436					 src_x + extents.x1 - dst_x - dx,
3437					 src_y + extents.y1 - dst_y - dy,
3438					 extents.x1,  extents.y1,
3439					 extents.x2 - extents.x1,
3440					 extents.y2 - extents.y1,
3441					 0,
3442					 &tmp)) {
3443		DBG(("%s: fallback -- composite spans render op not supported\n",
3444		     __FUNCTION__));
3445		return false;
3446	}
3447
3448	dx *= SAMPLES_X;
3449	dy *= SAMPLES_Y;
3450
3451	num_threads = 1;
3452	if (!NO_GPU_THREADS &&
3453	    tmp.thread_boxes &&
3454	    thread_choose_span(&tmp, dst, maskFormat, &clip))
3455		num_threads = sna_use_threads(extents.x2 - extents.x1,
3456					      extents.y2 - extents.y1,
3457					      16);
3458	if (num_threads == 1) {
3459		struct tor tor;
3460		int cw, ccw, n;
3461
3462		if (!tor_init(&tor, &extents, 2*count))
3463			goto skip;
3464
3465		cw = 0; ccw = 1;
3466		polygon_add_line(tor.polygon,
3467				 &points[ccw], &points[cw],
3468				 dx, dy);
3469		n = 2;
3470		do {
3471			polygon_add_line(tor.polygon,
3472					 &points[cw], &points[n],
3473					 dx, dy);
3474			cw = n;
3475			if (++n == count)
3476				break;
3477
3478			polygon_add_line(tor.polygon,
3479					 &points[n], &points[ccw],
3480					 dx, dy);
3481			ccw = n;
3482			if (++n == count)
3483				break;
3484		} while (1);
3485		polygon_add_line(tor.polygon,
3486				 &points[cw], &points[ccw],
3487				 dx, dy);
3488		assert(tor.polygon->num_edges <= 2*count);
3489
3490		tor_render(sna, &tor, &tmp, &clip,
3491			   choose_span(&tmp, dst, maskFormat, &clip),
3492			   !was_clear && maskFormat && !operator_is_bounded(op));
3493
3494		tor_fini(&tor);
3495	} else {
3496		struct tristrip_thread threads[num_threads];
3497		int y, h, n;
3498
3499		DBG(("%s: using %d threads for tristrip compositing %dx%d\n",
3500		     __FUNCTION__, num_threads,
3501		     clip.extents.x2 - clip.extents.x1,
3502		     clip.extents.y2 - clip.extents.y1));
3503
3504		threads[0].sna = sna;
3505		threads[0].op = &tmp;
3506		threads[0].points = points;
3507		threads[0].count = count;
3508		threads[0].extents = clip.extents;
3509		threads[0].clip = &clip;
3510		threads[0].dx = dx;
3511		threads[0].dy = dy;
3512		threads[0].draw_y = dst->pDrawable->y;
3513		threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
3514		threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
3515
3516		y = clip.extents.y1;
3517		h = clip.extents.y2 - clip.extents.y1;
3518		h = (h + num_threads - 1) / num_threads;
3519		num_threads -= (num_threads-1) * h >= clip.extents.y2 - clip.extents.y1;
3520
3521		for (n = 1; n < num_threads; n++) {
3522			threads[n] = threads[0];
3523			threads[n].extents.y1 = y;
3524			threads[n].extents.y2 = y += h;
3525
3526			sna_threads_run(n, tristrip_thread, &threads[n]);
3527		}
3528
3529		assert(y < threads[0].extents.y2);
3530		threads[0].extents.y1 = y;
3531		tristrip_thread(&threads[0]);
3532
3533		sna_threads_wait();
3534	}
3535skip:
3536	tmp.done(sna, &tmp);
3537
3538	REGION_UNINIT(NULL, &clip);
3539	return true;
3540}
3541
3542bool
3543precise_trap_span_converter(struct sna *sna,
3544			    PicturePtr dst,
3545			    INT16 src_x, INT16 src_y,
3546			    int ntrap, xTrap *trap)
3547{
3548	struct sna_composite_spans_op tmp;
3549	struct tor tor;
3550	BoxRec extents;
3551	pixman_region16_t *clip;
3552	int dx, dy, n;
3553
3554	if (dst->pDrawable->depth < 8)
3555		return false;
3556
3557	if (!sna->render.check_composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
3558					       dst->pCompositeClip->extents.x2 - dst->pCompositeClip->extents.x1,
3559					       dst->pCompositeClip->extents.y2 - dst->pCompositeClip->extents.y1,
3560					       0)) {
3561		DBG(("%s: fallback -- composite spans not supported\n",
3562		     __FUNCTION__));
3563		return false;
3564	}
3565
3566	clip = dst->pCompositeClip;
3567	extents = *RegionExtents(clip);
3568	dx = dst->pDrawable->x;
3569	dy = dst->pDrawable->y;
3570
3571	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d)\n",
3572	     __FUNCTION__,
3573	     extents.x1, extents.y1,
3574	     extents.x2, extents.y2,
3575	     dx, dy));
3576
3577	memset(&tmp, 0, sizeof(tmp));
3578	if (!sna->render.composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
3579					 0, 0,
3580					 extents.x1,  extents.y1,
3581					 extents.x2 - extents.x1,
3582					 extents.y2 - extents.y1,
3583					 0,
3584					 &tmp)) {
3585		DBG(("%s: fallback -- composite spans render op not supported\n",
3586		     __FUNCTION__));
3587		return false;
3588	}
3589
3590	dx *= SAMPLES_X;
3591	dy *= SAMPLES_Y;
3592	if (!tor_init(&tor, &extents, 2*ntrap))
3593		goto skip;
3594
3595	for (n = 0; n < ntrap; n++) {
3596		xPointFixed p1, p2;
3597
3598		if (pixman_fixed_to_int(trap[n].top.y) + dst->pDrawable->y >= extents.y2 ||
3599		    pixman_fixed_to_int(trap[n].bot.y) + dst->pDrawable->y < extents.y1)
3600			continue;
3601
3602		p1.y = trap[n].top.y;
3603		p2.y = trap[n].bot.y;
3604		p1.x = trap[n].top.l;
3605		p2.x = trap[n].bot.l;
3606		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3607
3608		p1.y = trap[n].bot.y;
3609		p2.y = trap[n].top.y;
3610		p1.x = trap[n].top.r;
3611		p2.x = trap[n].bot.r;
3612		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3613	}
3614
3615	tor_render(sna, &tor, &tmp, clip,
3616		   choose_span(&tmp, dst, NULL, clip), false);
3617
3618	tor_fini(&tor);
3619skip:
3620	tmp.done(sna, &tmp);
3621	return true;
3622}
3623