sna_trapezoids.c revision 03b705cf
1/*
2 * Copyright (c) 2007  David Turner
3 * Copyright (c) 2008  M Joonas Pihlaja
4 * Copyright (c) 2011 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Chris Wilson <chris@chris-wilson.co.uk>
27 *
28 */
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include "sna.h"
35#include "sna_render.h"
36#include "sna_render_inline.h"
37#include "fb/fbpict.h"
38
39#include <mipict.h>
40
41#if 0
42#define __DBG(x) ErrorF x
43#else
44#define __DBG(x)
45#endif
46
47#define NO_ACCEL 0
48#define FORCE_FALLBACK 0
49#define NO_ALIGNED_BOXES 0
50#define NO_UNALIGNED_BOXES 0
51#define NO_SCAN_CONVERTER 0
52#define NO_GPU_THREADS 0
53
54/* TODO: Emit unantialiased and MSAA triangles. */
55
56#ifndef MAX
57#define MAX(x,y) ((x) >= (y) ? (x) : (y))
58#endif
59
60#ifndef MIN
61#define MIN(x,y) ((x) <= (y) ? (x) : (y))
62#endif
63
64#define SAMPLES_X 17
65#define SAMPLES_Y 15
66
67#define FAST_SAMPLES_shift 2
68#define FAST_SAMPLES_X (1<<FAST_SAMPLES_shift)
69#define FAST_SAMPLES_Y (1<<FAST_SAMPLES_shift)
70#define FAST_SAMPLES_mask ((1<<FAST_SAMPLES_shift)-1)
71
72#define region_count(r) ((r)->data ? (r)->data->numRects : 1)
73#define region_boxes(r) ((r)->data ? (BoxPtr)((r)->data + 1) : &(r)->extents)
74
75typedef void (*span_func_t)(struct sna *sna,
76			    struct sna_composite_spans_op *op,
77			    pixman_region16_t *clip,
78			    const BoxRec *box,
79			    int coverage);
80
81#if HAS_DEBUG_FULL
82static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
83{
84	if (box->x1 < 0 || box->y1 < 0 ||
85	    box->x2 > pixmap->drawable.width ||
86	    box->y2 > pixmap->drawable.height)
87	{
88		ErrorF("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n",
89		       __FUNCTION__,
90		       box->x1, box->y1, box->x2, box->y2,
91		       pixmap->drawable.width,
92		       pixmap->drawable.height);
93		assert(0);
94	}
95}
96#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
97#else
98#define assert_pixmap_contains_box(p, b)
99#endif
100
101static void apply_damage(struct sna_composite_op *op, RegionPtr region)
102{
103	DBG(("%s: damage=%p, region=%ldx[(%d, %d), (%d, %d)]\n",
104	     __FUNCTION__, op->damage,
105	     (long)REGION_NUM_RECTS(region),
106	     region->extents.x1, region->extents.y1,
107	     region->extents.x2, region->extents.y2));
108
109	if (op->damage == NULL)
110		return;
111
112	RegionTranslate(region, op->dst.x, op->dst.y);
113
114	assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region));
115	sna_damage_add(op->damage, region);
116}
117
118static void _apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
119{
120	BoxRec r;
121
122	r.x1 = box->x1 + op->dst.x;
123	r.x2 = box->x2 + op->dst.x;
124	r.y1 = box->y1 + op->dst.y;
125	r.y2 = box->y2 + op->dst.y;
126
127	assert_pixmap_contains_box(op->dst.pixmap, &r);
128	sna_damage_add_box(op->damage, &r);
129}
130
131inline static void apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
132{
133	if (op->damage)
134		_apply_damage_box(op, box);
135}
136
137typedef int grid_scaled_x_t;
138typedef int grid_scaled_y_t;
139
140#define FAST_SAMPLES_X_TO_INT_FRAC(x, i, f) \
141	_GRID_TO_INT_FRAC_shift(x, i, f, FAST_SAMPLES_shift)
142
143#define FAST_SAMPLES_INT(x) ((x) >> (FAST_SAMPLES_shift))
144#define FAST_SAMPLES_FRAC(x) ((x) & (FAST_SAMPLES_mask))
145
146#define _GRID_TO_INT_FRAC_shift(t, i, f, b) do {	\
147    (f) = FAST_SAMPLES_FRAC(t);				\
148    (i) = FAST_SAMPLES_INT(t);				\
149} while (0)
150
151/* A grid area is a real in [0,1] scaled by 2*SAMPLES_X*SAMPLES_Y.  We want
152 * to be able to represent exactly areas of subpixel trapezoids whose
153 * vertices are given in grid scaled coordinates.  The scale factor
154 * comes from needing to accurately represent the area 0.5*dx*dy of a
155 * triangle with base dx and height dy in grid scaled numbers. */
156typedef int grid_area_t;
157#define FAST_SAMPLES_XY (2*FAST_SAMPLES_X*FAST_SAMPLES_Y) /* Unit area on the grid. */
158
159#define AREA_TO_ALPHA(c)  ((c) / (float)FAST_SAMPLES_XY)
160
161struct quorem {
162	int32_t quo;
163	int32_t rem;
164};
165
166struct edge {
167	struct edge *next, *prev;
168
169	int dir;
170
171	grid_scaled_y_t height_left;
172
173	/* Current x coordinate while the edge is on the active
174	 * list. Initialised to the x coordinate of the top of the
175	 * edge. The quotient is in grid_scaled_x_t units and the
176	 * remainder is mod dy in grid_scaled_y_t units.*/
177	struct quorem x;
178
179	/* Advance of the current x when moving down a subsample line. */
180	struct quorem dxdy;
181	grid_scaled_y_t dy;
182
183	/* The clipped y of the top of the edge. */
184	grid_scaled_y_t ytop;
185
186	/* y2-y1 after orienting the edge downwards.  */
187};
188
189/* Number of subsample rows per y-bucket. Must be SAMPLES_Y. */
190#define EDGE_Y_BUCKET_HEIGHT FAST_SAMPLES_Y
191#define EDGE_Y_BUCKET_INDEX(y, ymin) (((y) - (ymin))/EDGE_Y_BUCKET_HEIGHT)
192
193/* A collection of sorted and vertically clipped edges of the polygon.
194 * Edges are moved from the polygon to an active list while scan
195 * converting. */
196struct polygon {
197	/* The vertical clip extents. */
198	grid_scaled_y_t ymin, ymax;
199
200	/* Array of edges all starting in the same bucket.	An edge is put
201	 * into bucket EDGE_BUCKET_INDEX(edge->ytop, polygon->ymin) when
202	 * it is added to the polygon. */
203	struct edge **y_buckets;
204	struct edge *y_buckets_embedded[64];
205
206	struct edge edges_embedded[32];
207	struct edge *edges;
208	int num_edges;
209};
210
211/* A cell records the effect on pixel coverage of polygon edges
212 * passing through a pixel.  It contains two accumulators of pixel
213 * coverage.
214 *
215 * Consider the effects of a polygon edge on the coverage of a pixel
216 * it intersects and that of the following one.  The coverage of the
217 * following pixel is the height of the edge multiplied by the width
218 * of the pixel, and the coverage of the pixel itself is the area of
219 * the trapezoid formed by the edge and the right side of the pixel.
220 *
221 * +-----------------------+-----------------------+
222 * |                       |                       |
223 * |                       |                       |
224 * |_______________________|_______________________|
225 * |   \...................|.......................|\
226 * |    \..................|.......................| |
227 * |     \.................|.......................| |
228 * |      \....covered.....|.......................| |
229 * |       \....area.......|.......................| } covered height
230 * |        \..............|.......................| |
231 * |uncovered\.............|.......................| |
232 * |  area    \............|.......................| |
233 * |___________\...........|.......................|/
234 * |                       |                       |
235 * |                       |                       |
236 * |                       |                       |
237 * +-----------------------+-----------------------+
238 *
239 * Since the coverage of the following pixel will always be a multiple
240 * of the width of the pixel, we can store the height of the covered
241 * area instead.  The coverage of the pixel itself is the total
242 * coverage minus the area of the uncovered area to the left of the
243 * edge.  As it's faster to compute the uncovered area we only store
244 * that and subtract it from the total coverage later when forming
245 * spans to blit.
246 *
247 * The heights and areas are signed, with left edges of the polygon
248 * having positive sign and right edges having negative sign.  When
249 * two edges intersect they swap their left/rightness so their
250 * contribution above and below the intersection point must be
251 * computed separately. */
252struct cell {
253	struct cell *next;
254	int x;
255	grid_area_t uncovered_area;
256	grid_scaled_y_t covered_height;
257};
258
259/* A cell list represents the scan line sparsely as cells ordered by
260 * ascending x.  It is geared towards scanning the cells in order
261 * using an internal cursor. */
262struct cell_list {
263	struct cell *cursor;
264
265	/* Points to the left-most cell in the scan line. */
266	struct cell head, tail;
267
268	int16_t x1, x2;
269	int16_t count, size;
270	struct cell *cells;
271	struct cell embedded[256];
272};
273
274/* The active list contains edges in the current scan line ordered by
275 * the x-coordinate of the intercept of the edge and the scan line. */
276struct active_list {
277	/* Leftmost edge on the current scan line. */
278	struct edge head, tail;
279
280	/* A lower bound on the height of the active edges is used to
281	 * estimate how soon some active edge ends.	 We can't advance the
282	 * scan conversion by a full pixel row if an edge ends somewhere
283	 * within it. */
284	grid_scaled_y_t min_height;
285	int is_vertical;
286};
287
288struct tor {
289    struct polygon	polygon[1];
290    struct active_list	active[1];
291    struct cell_list	coverages[1];
292
293    /* Clip box. */
294    grid_scaled_x_t xmin, xmax;
295    grid_scaled_y_t ymin, ymax;
296};
297
298/* Compute the floored division a/b. Assumes / and % perform symmetric
299 * division. */
300inline static struct quorem
301floored_divrem(int a, int b)
302{
303	struct quorem qr;
304	qr.quo = a/b;
305	qr.rem = a%b;
306	if (qr.rem && (a^b)<0) {
307		qr.quo -= 1;
308		qr.rem += b;
309	}
310	return qr;
311}
312
313/* Compute the floored division (x*a)/b. Assumes / and % perform symmetric
314 * division. */
315static struct quorem
316floored_muldivrem(int32_t x, int32_t a, int32_t b)
317{
318	struct quorem qr;
319	int64_t xa = (int64_t)x*a;
320	qr.quo = xa/b;
321	qr.rem = xa%b;
322	if (qr.rem && (xa>=0) != (b>=0)) {
323		qr.quo -= 1;
324		qr.rem += b;
325	}
326	return qr;
327}
328
329/* Rewinds the cell list's cursor to the beginning.  After rewinding
330 * we're good to cell_list_find() the cell any x coordinate. */
331inline static void
332cell_list_rewind(struct cell_list *cells)
333{
334	cells->cursor = &cells->head;
335}
336
337static bool
338cell_list_init(struct cell_list *cells, int x1, int x2)
339{
340	cells->tail.next = NULL;
341	cells->tail.x = INT_MAX;
342	cells->head.x = INT_MIN;
343	cells->head.next = &cells->tail;
344	cell_list_rewind(cells);
345	cells->count = 0;
346	cells->x1 = x1;
347	cells->x2 = x2;
348	cells->size = x2 - x1 + 1;
349	cells->cells = cells->embedded;
350	if (cells->size > ARRAY_SIZE(cells->embedded))
351		cells->cells = malloc(cells->size * sizeof(struct cell));
352	return cells->cells != NULL;
353}
354
355static void
356cell_list_fini(struct cell_list *cells)
357{
358	if (cells->cells != cells->embedded)
359		free(cells->cells);
360}
361
362inline static void
363cell_list_reset(struct cell_list *cells)
364{
365	cell_list_rewind(cells);
366	cells->head.next = &cells->tail;
367	cells->count = 0;
368}
369
370inline static struct cell *
371cell_list_alloc(struct cell_list *cells,
372		struct cell *tail,
373		int x)
374{
375	struct cell *cell;
376
377	assert(cells->count < cells->size);
378	cell = cells->cells + cells->count++;
379	cell->next = tail->next;
380	tail->next = cell;
381
382	cell->x = x;
383	cell->uncovered_area = 0;
384	cell->covered_height = 0;
385	return cell;
386}
387
388/* Find a cell at the given x-coordinate.  Returns %NULL if a new cell
389 * needed to be allocated but couldn't be.  Cells must be found with
390 * non-decreasing x-coordinate until the cell list is rewound using
391 * cell_list_rewind(). Ownership of the returned cell is retained by
392 * the cell list. */
393inline static struct cell *
394cell_list_find(struct cell_list *cells, int x)
395{
396	struct cell *tail = cells->cursor;
397
398	if (tail->x == x)
399		return tail;
400
401	if (x >= cells->x2)
402		return &cells->tail;
403
404	if (x < cells->x1)
405		x = cells->x1;
406
407	if (tail->x == x)
408		return tail;
409
410	do {
411		if (tail->next->x > x)
412			break;
413
414		tail = tail->next;
415		if (tail->next->x > x)
416			break;
417
418		tail = tail->next;
419		if (tail->next->x > x)
420			break;
421
422		tail = tail->next;
423	} while (1);
424
425	if (tail->x != x)
426		tail = cell_list_alloc (cells, tail, x);
427
428	return cells->cursor = tail;
429}
430
431/* Add a subpixel span covering [x1, x2) to the coverage cells. */
432inline static void
433cell_list_add_subspan(struct cell_list *cells,
434		      grid_scaled_x_t x1,
435		      grid_scaled_x_t x2)
436{
437	struct cell *cell;
438	int ix1, fx1;
439	int ix2, fx2;
440
441	if (x1 == x2)
442		return;
443
444	FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
445	FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
446
447	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
448	       x1, ix1, fx1, x2, ix2, fx2));
449
450	cell = cell_list_find(cells, ix1);
451	if (ix1 != ix2) {
452		cell->uncovered_area += 2*fx1;
453		++cell->covered_height;
454
455		cell = cell_list_find(cells, ix2);
456		cell->uncovered_area -= 2*fx2;
457		--cell->covered_height;
458	} else
459		cell->uncovered_area += 2*(fx1-fx2);
460}
461
462inline static void
463cell_list_add_span(struct cell_list *cells,
464		   grid_scaled_x_t x1,
465		   grid_scaled_x_t x2)
466{
467	struct cell *cell;
468	int ix1, fx1;
469	int ix2, fx2;
470
471	FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
472	FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
473
474	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
475	       x1, ix1, fx1, x2, ix2, fx2));
476
477	cell = cell_list_find(cells, ix1);
478	if (ix1 != ix2) {
479		cell->uncovered_area += 2*fx1*FAST_SAMPLES_Y;
480		cell->covered_height += FAST_SAMPLES_Y;
481
482		cell = cell_list_find(cells, ix2);
483		cell->uncovered_area -= 2*fx2*FAST_SAMPLES_Y;
484		cell->covered_height -= FAST_SAMPLES_Y;
485	} else
486		cell->uncovered_area += 2*(fx1-fx2)*FAST_SAMPLES_Y;
487}
488
489static void
490polygon_fini(struct polygon *polygon)
491{
492	if (polygon->y_buckets != polygon->y_buckets_embedded)
493		free(polygon->y_buckets);
494
495	if (polygon->edges != polygon->edges_embedded)
496		free(polygon->edges);
497}
498
499static bool
500polygon_init(struct polygon *polygon,
501	     int num_edges,
502	     grid_scaled_y_t ymin,
503	     grid_scaled_y_t ymax)
504{
505	unsigned h = ymax - ymin;
506	unsigned num_buckets =
507		EDGE_Y_BUCKET_INDEX(ymax+EDGE_Y_BUCKET_HEIGHT-1, ymin);
508
509	if (unlikely(h > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
510		goto bail_no_mem; /* even if you could, you wouldn't want to. */
511
512	polygon->edges = polygon->edges_embedded;
513	polygon->y_buckets = polygon->y_buckets_embedded;
514
515	polygon->num_edges = 0;
516	if (num_edges > (int)ARRAY_SIZE(polygon->edges_embedded)) {
517		polygon->edges = malloc(sizeof(struct edge)*num_edges);
518		if (unlikely(NULL == polygon->edges))
519			goto bail_no_mem;
520	}
521
522	if (num_buckets >= ARRAY_SIZE(polygon->y_buckets_embedded)) {
523		polygon->y_buckets = malloc((1+num_buckets)*sizeof(struct edge *));
524		if (unlikely(NULL == polygon->y_buckets))
525			goto bail_no_mem;
526	}
527	memset(polygon->y_buckets, 0, num_buckets * sizeof(struct edge *));
528	polygon->y_buckets[num_buckets] = (void *)-1;
529
530	polygon->ymin = ymin;
531	polygon->ymax = ymax;
532	return true;
533
534bail_no_mem:
535	polygon_fini(polygon);
536	return false;
537}
538
539static void
540_polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
541{
542	unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
543	struct edge **ptail = &polygon->y_buckets[ix];
544	e->next = *ptail;
545	*ptail = e;
546}
547
548inline static void
549polygon_add_edge(struct polygon *polygon,
550		 grid_scaled_x_t x1,
551		 grid_scaled_x_t x2,
552		 grid_scaled_y_t y1,
553		 grid_scaled_y_t y2,
554		 grid_scaled_y_t top,
555		 grid_scaled_y_t bottom,
556		 int dir)
557{
558	struct edge *e = &polygon->edges[polygon->num_edges++];
559	grid_scaled_x_t dx = x2 - x1;
560	grid_scaled_y_t dy = y2 - y1;
561	grid_scaled_y_t ytop, ybot;
562	grid_scaled_y_t ymin = polygon->ymin;
563	grid_scaled_y_t ymax = polygon->ymax;
564
565	__DBG(("%s: edge=(%d [%d.%d], %d [%d.%d]), (%d [%d.%d], %d [%d.%d]), top=%d [%d.%d], bottom=%d [%d.%d], dir=%d\n",
566	       __FUNCTION__,
567	       x1, FAST_SAMPLES_INT(x1), FAST_SAMPLES_FRAC(x1),
568	       y1, FAST_SAMPLES_INT(y1), FAST_SAMPLES_FRAC(y1),
569	       x2, FAST_SAMPLES_INT(x2), FAST_SAMPLES_FRAC(x2),
570	       y2, FAST_SAMPLES_INT(y2), FAST_SAMPLES_FRAC(y2),
571	       top, FAST_SAMPLES_INT(top), FAST_SAMPLES_FRAC(top),
572	       bottom, FAST_SAMPLES_INT(bottom), FAST_SAMPLES_FRAC(bottom),
573	       dir));
574	assert (dy > 0);
575
576	e->dy = dy;
577	e->dir = dir;
578
579	ytop = top >= ymin ? top : ymin;
580	ybot = bottom <= ymax ? bottom : ymax;
581	e->ytop = ytop;
582	e->height_left = ybot - ytop;
583	if (e->height_left <= 0)
584		return;
585
586	if (dx == 0) {
587		e->x.quo = x1;
588		e->x.rem = 0;
589		e->dy = 0;
590		e->dxdy.quo = 0;
591		e->dxdy.rem = 0;
592	} else {
593		e->dxdy = floored_divrem(dx, dy);
594		if (ytop == y1) {
595			e->x.quo = x1;
596			e->x.rem = 0;
597		} else {
598			e->x = floored_muldivrem(ytop - y1, dx, dy);
599			e->x.quo += x1;
600		}
601	}
602
603	_polygon_insert_edge_into_its_y_bucket(polygon, e);
604
605	e->x.rem -= dy; /* Bias the remainder for faster edge advancement. */
606}
607
608inline static void
609polygon_add_line(struct polygon *polygon,
610		 const xPointFixed *p1,
611		 const xPointFixed *p2)
612{
613	struct edge *e = &polygon->edges[polygon->num_edges];
614	grid_scaled_x_t dx = p2->x - p1->x;
615	grid_scaled_y_t dy = p2->y - p1->y;
616	grid_scaled_y_t top, bot;
617
618	if (dy == 0)
619		return;
620
621	__DBG(("%s: line=(%d, %d), (%d, %d)\n",
622	       __FUNCTION__, (int)p1->x, (int)p1->y, (int)p2->x, (int)p2->y));
623
624	e->dir = 1;
625	if (dy < 0) {
626		const xPointFixed *t;
627
628		dx = -dx;
629		dy = -dy;
630
631		e->dir = -1;
632
633		t = p1;
634		p1 = p2;
635		p2 = t;
636	}
637	assert (dy > 0);
638	e->dy = dy;
639
640	top = MAX(p1->y, polygon->ymin);
641	bot = MIN(p2->y, polygon->ymax);
642	if (bot <= top)
643		return;
644
645	e->ytop = top;
646	e->height_left = bot - top;
647	if (e->height_left <= 0)
648		return;
649
650	if (dx == 0) {
651		e->x.quo = p1->x;
652		e->x.rem = -dy;
653		e->dxdy.quo = 0;
654		e->dxdy.rem = 0;
655		e->dy = 0;
656	} else {
657		e->dxdy = floored_divrem(dx, dy);
658		if (top == p1->y) {
659			e->x.quo = p1->x;
660			e->x.rem = -dy;
661		} else {
662			e->x = floored_muldivrem(top - p1->y, dx, dy);
663			e->x.quo += p1->x;
664			e->x.rem -= dy;
665		}
666	}
667
668	if (polygon->num_edges > 0) {
669		struct edge *prev = &polygon->edges[polygon->num_edges-1];
670		/* detect degenerate triangles inserted into tristrips */
671		if (e->dir == -prev->dir &&
672		    e->ytop == prev->ytop &&
673		    e->height_left == prev->height_left &&
674		    e->x.quo == prev->x.quo &&
675		    e->x.rem == prev->x.rem &&
676		    e->dxdy.quo == prev->dxdy.quo &&
677		    e->dxdy.rem == prev->dxdy.rem) {
678			unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop,
679							  polygon->ymin);
680			polygon->y_buckets[ix] = prev->next;
681			polygon->num_edges--;
682			return;
683		}
684	}
685
686	_polygon_insert_edge_into_its_y_bucket(polygon, e);
687	polygon->num_edges++;
688}
689
690static void
691active_list_reset(struct active_list *active)
692{
693	active->head.height_left = INT_MAX;
694	active->head.x.quo = INT_MIN;
695	active->head.dy = 0;
696	active->head.prev = NULL;
697	active->head.next = &active->tail;
698	active->tail.prev = &active->head;
699	active->tail.next = NULL;
700	active->tail.x.quo = INT_MAX;
701	active->tail.height_left = INT_MAX;
702	active->tail.dy = 0;
703	active->min_height = INT_MAX;
704	active->is_vertical = 1;
705}
706
707static struct edge *
708merge_sorted_edges(struct edge *head_a, struct edge *head_b)
709{
710	struct edge *head, **next, *prev;
711	int32_t x;
712
713	if (head_b == NULL)
714		return head_a;
715
716	prev = head_a->prev;
717	next = &head;
718	if (head_a->x.quo <= head_b->x.quo) {
719		head = head_a;
720	} else {
721		head = head_b;
722		head_b->prev = prev;
723		goto start_with_b;
724	}
725
726	do {
727		x = head_b->x.quo;
728		while (head_a != NULL && head_a->x.quo <= x) {
729			prev = head_a;
730			next = &head_a->next;
731			head_a = head_a->next;
732		}
733
734		head_b->prev = prev;
735		*next = head_b;
736		if (head_a == NULL)
737			return head;
738
739start_with_b:
740		x = head_a->x.quo;
741		while (head_b != NULL && head_b->x.quo <= x) {
742			prev = head_b;
743			next = &head_b->next;
744			head_b = head_b->next;
745		}
746
747		head_a->prev = prev;
748		*next = head_a;
749		if (head_b == NULL)
750			return head;
751	} while (1);
752}
753
754static struct edge *
755sort_edges(struct edge  *list,
756	   unsigned int  level,
757	   struct edge **head_out)
758{
759	struct edge *head_other, *remaining;
760	unsigned int i;
761
762	head_other = list->next;
763	if (head_other == NULL) {
764		*head_out = list;
765		return NULL;
766	}
767
768	remaining = head_other->next;
769	if (list->x.quo <= head_other->x.quo) {
770		*head_out = list;
771		head_other->next = NULL;
772	} else {
773		*head_out = head_other;
774		head_other->prev = list->prev;
775		head_other->next = list;
776		list->prev = head_other;
777		list->next = NULL;
778	}
779
780	for (i = 0; i < level && remaining; i++) {
781		remaining = sort_edges(remaining, i, &head_other);
782		*head_out = merge_sorted_edges(*head_out, head_other);
783	}
784
785	return remaining;
786}
787
788static struct edge *filter(struct edge *edges)
789{
790	struct edge *e;
791
792	e = edges;
793	do {
794		struct edge *n = e->next;
795		if (e->dir == -n->dir &&
796		    e->height_left == n->height_left &&
797		    *(uint64_t *)&e->x == *(uint64_t *)&n->x &&
798		    *(uint64_t *)&e->dxdy == *(uint64_t *)&n->dxdy) {
799			if (e->prev)
800				e->prev->next = n->next;
801			else
802				edges = n->next;
803			if (n->next)
804				n->next->prev = e->prev;
805			else
806				break;
807
808			e = n->next;
809		} else
810			e = e->next;
811	} while (e->next);
812
813	return edges;
814}
815
816static struct edge *
817merge_unsorted_edges (struct edge *head, struct edge *unsorted)
818{
819	sort_edges (unsorted, UINT_MAX, &unsorted);
820	return merge_sorted_edges (head, filter(unsorted));
821}
822
823/* Test if the edges on the active list can be safely advanced by a
824 * full row without intersections or any edges ending. */
825inline static bool
826can_full_step(struct active_list *active)
827{
828	/* Recomputes the minimum height of all edges on the active
829	 * list if we have been dropping edges. */
830	if (active->min_height <= 0) {
831		const struct edge *e;
832		int min_height = INT_MAX;
833		int is_vertical = 1;
834
835		for (e = active->head.next; &active->tail != e; e = e->next) {
836			if (e->height_left < min_height)
837				min_height = e->height_left;
838			if (is_vertical)
839				is_vertical = e->dy == 0;
840		}
841
842		active->is_vertical = is_vertical;
843		active->min_height = min_height;
844	}
845
846	if (active->min_height < FAST_SAMPLES_Y)
847		return false;
848
849	return active->is_vertical;
850}
851
852inline static void
853merge_edges(struct active_list *active, struct edge *edges)
854{
855	active->head.next = merge_unsorted_edges (active->head.next, edges);
856}
857
858inline static void
859fill_buckets(struct active_list *active,
860	     struct edge *edge,
861	     struct edge **buckets)
862{
863	int min_height = active->min_height;
864	int is_vertical = active->is_vertical;
865
866	while (edge) {
867		struct edge *next = edge->next;
868		struct edge **b = &buckets[edge->ytop & (FAST_SAMPLES_Y-1)];
869		if (*b)
870			(*b)->prev = edge;
871		edge->next = *b;
872		edge->prev = NULL;
873		*b = edge;
874		if (edge->height_left < min_height)
875			min_height = edge->height_left;
876		if (is_vertical)
877			is_vertical = edge->dy == 0;
878		edge = next;
879	}
880
881	active->is_vertical = is_vertical;
882	active->min_height = min_height;
883}
884
885inline static void
886nonzero_subrow(struct active_list *active, struct cell_list *coverages)
887{
888	struct edge *edge = active->head.next;
889	grid_scaled_x_t prev_x = INT_MIN;
890	int winding = 0, xstart = INT_MIN;
891
892	cell_list_rewind (coverages);
893
894	while (&active->tail != edge) {
895		struct edge *next = edge->next;
896
897		winding += edge->dir;
898		if (0 == winding) {
899			if (edge->next->x.quo != edge->x.quo) {
900				cell_list_add_subspan(coverages,
901						      xstart, edge->x.quo);
902				xstart = INT_MIN;
903			}
904		} else if (xstart < 0)
905			xstart = edge->x.quo;
906
907		if (--edge->height_left) {
908			if (edge->dy) {
909				edge->x.quo += edge->dxdy.quo;
910				edge->x.rem += edge->dxdy.rem;
911				if (edge->x.rem >= 0) {
912					++edge->x.quo;
913					edge->x.rem -= edge->dy;
914				}
915			}
916
917			if (edge->x.quo < prev_x) {
918				struct edge *pos = edge->prev;
919				pos->next = next;
920				next->prev = pos;
921				do {
922					pos = pos->prev;
923				} while (edge->x.quo < pos->x.quo);
924				pos->next->prev = edge;
925				edge->next = pos->next;
926				edge->prev = pos;
927				pos->next = edge;
928			} else
929				prev_x = edge->x.quo;
930		} else {
931			edge->prev->next = next;
932			next->prev = edge->prev;
933			active->min_height = -1;
934		}
935
936		edge = next;
937	}
938}
939
940static void
941nonzero_row(struct active_list *active, struct cell_list *coverages)
942{
943	struct edge *left = active->head.next;
944
945	assert(active->is_vertical);
946
947	while (&active->tail != left) {
948		struct edge *right;
949		int winding = left->dir;
950
951		left->height_left -= FAST_SAMPLES_Y;
952		if (! left->height_left) {
953			left->prev->next = left->next;
954			left->next->prev = left->prev;
955		}
956
957		right = left->next;
958		do {
959			right->height_left -= FAST_SAMPLES_Y;
960			if (!right->height_left) {
961				right->prev->next = right->next;
962				right->next->prev = right->prev;
963			}
964
965			winding += right->dir;
966			if (0 == winding)
967				break;
968
969			right = right->next;
970		} while (1);
971
972		cell_list_add_span(coverages, left->x.quo, right->x.quo);
973		left = right->next;
974	}
975}
976
977static void
978tor_fini(struct tor *converter)
979{
980	polygon_fini(converter->polygon);
981	cell_list_fini(converter->coverages);
982}
983
984static bool
985tor_init(struct tor *converter, const BoxRec *box, int num_edges)
986{
987	__DBG(("%s: (%d, %d),(%d, %d) x (%d, %d), num_edges=%d\n",
988	       __FUNCTION__,
989	       box->x1, box->y1, box->x2, box->y2,
990	       FAST_SAMPLES_X, FAST_SAMPLES_Y,
991	       num_edges));
992
993	converter->xmin = box->x1;
994	converter->ymin = box->y1;
995	converter->xmax = box->x2;
996	converter->ymax = box->y2;
997
998	if (!cell_list_init(converter->coverages, box->x1, box->x2))
999		return false;
1000
1001	active_list_reset(converter->active);
1002	if (!polygon_init(converter->polygon,
1003			    num_edges,
1004			    box->y1 * FAST_SAMPLES_Y,
1005			    box->y2 * FAST_SAMPLES_Y)) {
1006		cell_list_fini(converter->coverages);
1007		return false;
1008	}
1009
1010	return true;
1011}
1012
1013static void
1014tor_add_edge(struct tor *converter,
1015	     const xTrapezoid *t,
1016	     const xLineFixed *edge,
1017	     int dir)
1018{
1019	polygon_add_edge(converter->polygon,
1020			 edge->p1.x, edge->p2.x,
1021			 edge->p1.y, edge->p2.y,
1022			 t->top, t->bottom,
1023			 dir);
1024}
1025
1026static void
1027step_edges(struct active_list *active, int count)
1028{
1029	struct edge *edge;
1030
1031	count *= FAST_SAMPLES_Y;
1032	for (edge = active->head.next; edge != &active->tail; edge = edge->next) {
1033		edge->height_left -= count;
1034		if (! edge->height_left) {
1035			edge->prev->next = edge->next;
1036			edge->next->prev = edge->prev;
1037		}
1038	}
1039}
1040
1041static void
1042tor_blt_span(struct sna *sna,
1043	     struct sna_composite_spans_op *op,
1044	     pixman_region16_t *clip,
1045	     const BoxRec *box,
1046	     int coverage)
1047{
1048	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1049
1050	op->box(sna, op, box, AREA_TO_ALPHA(coverage));
1051	apply_damage_box(&op->base, box);
1052}
1053
1054static void
1055tor_blt_span__no_damage(struct sna *sna,
1056			struct sna_composite_spans_op *op,
1057			pixman_region16_t *clip,
1058			const BoxRec *box,
1059			int coverage)
1060{
1061	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1062
1063	op->box(sna, op, box, AREA_TO_ALPHA(coverage));
1064}
1065
1066static void
1067tor_blt_span_clipped(struct sna *sna,
1068		     struct sna_composite_spans_op *op,
1069		     pixman_region16_t *clip,
1070		     const BoxRec *box,
1071		     int coverage)
1072{
1073	pixman_region16_t region;
1074	float opacity;
1075
1076	opacity = AREA_TO_ALPHA(coverage);
1077	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, opacity));
1078
1079	pixman_region_init_rects(&region, box, 1);
1080	RegionIntersect(&region, &region, clip);
1081	if (REGION_NUM_RECTS(&region)) {
1082		op->boxes(sna, op,
1083			  REGION_RECTS(&region),
1084			  REGION_NUM_RECTS(&region),
1085			  opacity);
1086		apply_damage(&op->base, &region);
1087	}
1088	pixman_region_fini(&region);
1089}
1090
1091static void
1092tor_blt_span_mono(struct sna *sna,
1093		  struct sna_composite_spans_op *op,
1094		  pixman_region16_t *clip,
1095		  const BoxRec *box,
1096		  int coverage)
1097{
1098	if (coverage < FAST_SAMPLES_XY/2)
1099		return;
1100
1101	tor_blt_span(sna, op, clip, box, FAST_SAMPLES_XY);
1102}
1103
1104static void
1105tor_blt_span_mono_clipped(struct sna *sna,
1106			  struct sna_composite_spans_op *op,
1107			  pixman_region16_t *clip,
1108			  const BoxRec *box,
1109			  int coverage)
1110{
1111	if (coverage < FAST_SAMPLES_XY/2)
1112		return;
1113
1114	tor_blt_span_clipped(sna, op, clip, box, FAST_SAMPLES_XY);
1115}
1116
1117static void
1118tor_blt_span_mono_unbounded(struct sna *sna,
1119			    struct sna_composite_spans_op *op,
1120			    pixman_region16_t *clip,
1121			    const BoxRec *box,
1122			    int coverage)
1123{
1124	tor_blt_span(sna, op, clip, box,
1125		     coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
1126}
1127
1128static void
1129tor_blt_span_mono_unbounded_clipped(struct sna *sna,
1130				    struct sna_composite_spans_op *op,
1131				    pixman_region16_t *clip,
1132				    const BoxRec *box,
1133				    int coverage)
1134{
1135	tor_blt_span_clipped(sna, op, clip, box,
1136			     coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
1137}
1138
1139static void
1140tor_blt(struct sna *sna,
1141	struct sna_composite_spans_op *op,
1142	pixman_region16_t *clip,
1143	void (*span)(struct sna *sna,
1144		     struct sna_composite_spans_op *op,
1145		     pixman_region16_t *clip,
1146		     const BoxRec *box,
1147		     int coverage),
1148	struct cell_list *cells,
1149	int y, int height,
1150	int xmin, int xmax,
1151	int unbounded)
1152{
1153	struct cell *cell;
1154	BoxRec box;
1155	int cover;
1156
1157	box.y1 = y;
1158	box.y2 = y + height;
1159	box.x1 = xmin;
1160
1161	/* Form the spans from the coverages and areas. */
1162	cover = 0;
1163	for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
1164		int x = cell->x;
1165
1166		assert(x >= xmin);
1167		assert(x < xmax);
1168		__DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__,
1169		       cell->x, cell->covered_height, cell->uncovered_area,
1170		       cover, xmax));
1171
1172		if (cell->covered_height || cell->uncovered_area) {
1173			box.x2 = x;
1174			if (box.x2 > box.x1 && (unbounded || cover)) {
1175				__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1176				       box.x1, box.y1,
1177				       box.x2 - box.x1,
1178				       box.y2 - box.y1,
1179				       cover));
1180				span(sna, op, clip, &box, cover);
1181			}
1182			box.x1 = box.x2;
1183			cover += cell->covered_height*FAST_SAMPLES_X*2;
1184		}
1185
1186		if (cell->uncovered_area) {
1187			int area = cover - cell->uncovered_area;
1188			box.x2 = x + 1;
1189			if (unbounded || area) {
1190				__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1191				       box.x1, box.y1,
1192				       box.x2 - box.x1,
1193				       box.y2 - box.y1,
1194				       area));
1195				span(sna, op, clip, &box, area);
1196			}
1197			box.x1 = box.x2;
1198		}
1199	}
1200
1201	box.x2 = xmax;
1202	if (box.x2 > box.x1 && (unbounded || cover)) {
1203		__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1204		       box.x1, box.y1,
1205		       box.x2 - box.x1,
1206		       box.y2 - box.y1,
1207		       cover));
1208		span(sna, op, clip, &box, cover);
1209	}
1210}
1211
1212static void
1213tor_blt_empty(struct sna *sna,
1214	      struct sna_composite_spans_op *op,
1215	      pixman_region16_t *clip,
1216	      void (*span)(struct sna *sna,
1217			   struct sna_composite_spans_op *op,
1218			   pixman_region16_t *clip,
1219			   const BoxRec *box,
1220			   int coverage),
1221	      int y, int height,
1222	      int xmin, int xmax)
1223{
1224	BoxRec box;
1225
1226	box.x1 = xmin;
1227	box.x2 = xmax;
1228	box.y1 = y;
1229	box.y2 = y + height;
1230
1231	span(sna, op, clip, &box, 0);
1232}
1233
1234flatten static void
1235tor_render(struct sna *sna,
1236	   struct tor *converter,
1237	   struct sna_composite_spans_op *op,
1238	   pixman_region16_t *clip,
1239	   void (*span)(struct sna *sna,
1240			struct sna_composite_spans_op *op,
1241			pixman_region16_t *clip,
1242			const BoxRec *box,
1243			int coverage),
1244	   int unbounded)
1245{
1246	int ymin = converter->ymin;
1247	int xmin = converter->xmin;
1248	int xmax = converter->xmax;
1249	int i, j, h = converter->ymax - ymin;
1250	struct polygon *polygon = converter->polygon;
1251	struct cell_list *coverages = converter->coverages;
1252	struct active_list *active = converter->active;
1253	struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
1254
1255	__DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
1256
1257	/* Render each pixel row. */
1258	for (i = 0; i < h; i = j) {
1259		int do_full_step = 0;
1260
1261		j = i + 1;
1262
1263		/* Determine if we can ignore this row or use the full pixel
1264		 * stepper. */
1265		if (!polygon->y_buckets[i]) {
1266			if (active->head.next == &active->tail) {
1267				active->min_height = INT_MAX;
1268				active->is_vertical = 1;
1269				for (; !polygon->y_buckets[j]; j++)
1270					;
1271				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1272				       __FUNCTION__, i, j));
1273
1274				if (unbounded)
1275					tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax);
1276				continue;
1277			}
1278
1279			do_full_step = can_full_step(active);
1280		}
1281
1282		__DBG(("%s: y=%d [%d], do_full_step=%d, new edges=%d, min_height=%d, vertical=%d\n",
1283		       __FUNCTION__,
1284		       i, i+ymin, do_full_step,
1285		       polygon->y_buckets[i] != NULL,
1286		       active->min_height,
1287		       active->is_vertical));
1288		if (do_full_step) {
1289			assert(active->is_vertical);
1290			nonzero_row(active, coverages);
1291
1292			while (polygon->y_buckets[j] == NULL &&
1293			       active->min_height >= 2*FAST_SAMPLES_Y)
1294			{
1295				active->min_height -= FAST_SAMPLES_Y;
1296				j++;
1297			}
1298			if (j != i + 1)
1299				step_edges(active, j - (i + 1));
1300
1301			__DBG(("%s: vertical edges, full step (%d, %d)\n",
1302			       __FUNCTION__,  i, j));
1303		} else {
1304			grid_scaled_y_t suby;
1305
1306			fill_buckets(active, polygon->y_buckets[i], buckets);
1307
1308			/* Subsample this row. */
1309			for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
1310				if (buckets[suby]) {
1311					merge_edges(active, buckets[suby]);
1312					buckets[suby] = NULL;
1313				}
1314
1315				nonzero_subrow(active, coverages);
1316			}
1317		}
1318
1319		if (coverages->head.next != &coverages->tail) {
1320			tor_blt(sna, op, clip, span, coverages,
1321				i+ymin, j-i, xmin, xmax,
1322				unbounded);
1323			cell_list_reset(coverages);
1324		} else if (unbounded)
1325			tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax);
1326
1327		active->min_height -= FAST_SAMPLES_Y;
1328	}
1329}
1330
1331static void
1332inplace_row(struct active_list *active, uint8_t *row, int width)
1333{
1334	struct edge *left = active->head.next;
1335
1336	assert(active->is_vertical);
1337
1338	while (&active->tail != left) {
1339		struct edge *right;
1340		int winding = left->dir;
1341		grid_scaled_x_t lfx, rfx;
1342		int lix, rix;
1343
1344		left->height_left -= FAST_SAMPLES_Y;
1345		if (!left->height_left) {
1346			left->prev->next = left->next;
1347			left->next->prev = left->prev;
1348		}
1349
1350		right = left->next;
1351		do {
1352			right->height_left -= FAST_SAMPLES_Y;
1353			if (!right->height_left) {
1354				right->prev->next = right->next;
1355				right->next->prev = right->prev;
1356			}
1357
1358			winding += right->dir;
1359			if (0 == winding && right->x.quo != right->next->x.quo)
1360				break;
1361
1362			right = right->next;
1363		} while (1);
1364
1365		if (left->x.quo < 0) {
1366			lix = lfx = 0;
1367		} else if (left->x.quo >= width * FAST_SAMPLES_X) {
1368			lix = width;
1369			lfx = 0;
1370		} else
1371			FAST_SAMPLES_X_TO_INT_FRAC(left->x.quo, lix, lfx);
1372
1373		if (right->x.quo < 0) {
1374			rix = rfx = 0;
1375		} else if (right->x.quo >= width * FAST_SAMPLES_X) {
1376			rix = width;
1377			rfx = 0;
1378		} else
1379			FAST_SAMPLES_X_TO_INT_FRAC(right->x.quo, rix, rfx);
1380		if (lix == rix) {
1381			if (rfx != lfx) {
1382				assert(lix < width);
1383				row[lix] += (rfx-lfx) * 256 / FAST_SAMPLES_X;
1384			}
1385		} else {
1386			assert(lix < width);
1387			if (lfx == 0)
1388				row[lix] = 0xff;
1389			else
1390				row[lix] += 256 - lfx * 256 / FAST_SAMPLES_X;
1391
1392			assert(rix <= width);
1393			if (rfx) {
1394				assert(rix < width);
1395				row[rix] += rfx * 256 / FAST_SAMPLES_X;
1396			}
1397
1398			if (rix > ++lix) {
1399				uint8_t *r = row + lix;
1400				rix -= lix;
1401#if 0
1402				if (rix == 1)
1403					*row = 0xff;
1404				else
1405					memset(row, 0xff, rix);
1406#else
1407				if ((uintptr_t)r & 1 && rix) {
1408					*r++ = 0xff;
1409					rix--;
1410				}
1411				if ((uintptr_t)r & 2 && rix >= 2) {
1412					*(uint16_t *)r = 0xffff;
1413					r += 2;
1414					rix -= 2;
1415				}
1416				if ((uintptr_t)r & 4 && rix >= 4) {
1417					*(uint32_t *)r = 0xffffffff;
1418					r += 4;
1419					rix -= 4;
1420				}
1421				while (rix >= 8) {
1422					*(uint64_t *)r = 0xffffffffffffffff;
1423					r += 8;
1424					rix -= 8;
1425				}
1426				if (rix & 4) {
1427					*(uint32_t *)r = 0xffffffff;
1428					r += 4;
1429				}
1430				if (rix & 2) {
1431					*(uint16_t *)r = 0xffff;
1432					r += 2;
1433				}
1434				if (rix & 1)
1435					*r = 0xff;
1436#endif
1437			}
1438		}
1439
1440		left = right->next;
1441	}
1442}
1443
1444inline static void
1445inplace_subrow(struct active_list *active, int8_t *row,
1446	       int width, int *min, int *max)
1447{
1448	struct edge *edge = active->head.next;
1449	grid_scaled_x_t prev_x = INT_MIN;
1450	int winding = 0, xstart = INT_MIN;
1451
1452	while (&active->tail != edge) {
1453		struct edge *next = edge->next;
1454
1455		winding += edge->dir;
1456		if (0 == winding) {
1457			if (edge->next->x.quo != edge->x.quo) {
1458				if (edge->x.quo <= xstart) {
1459					xstart = INT_MIN;
1460				} else  {
1461					grid_scaled_x_t fx;
1462					int ix;
1463
1464					if (xstart < FAST_SAMPLES_X * width) {
1465						FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
1466						if (ix < *min)
1467							*min = ix;
1468
1469						row[ix++] += FAST_SAMPLES_X - fx;
1470						if (fx && ix < width)
1471							row[ix] += fx;
1472					}
1473
1474					xstart = edge->x.quo;
1475					if (xstart < FAST_SAMPLES_X * width) {
1476						FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
1477						row[ix] -= FAST_SAMPLES_X - fx;
1478						if (fx && ix + 1 < width)
1479							row[++ix] -= fx;
1480
1481						if (ix >= *max)
1482							*max = ix + 1;
1483
1484						xstart = INT_MIN;
1485					} else
1486						*max = width;
1487				}
1488			}
1489		} else if (xstart < 0) {
1490			xstart = MAX(edge->x.quo, 0);
1491		}
1492
1493		if (--edge->height_left) {
1494			if (edge->dy) {
1495				edge->x.quo += edge->dxdy.quo;
1496				edge->x.rem += edge->dxdy.rem;
1497				if (edge->x.rem >= 0) {
1498					++edge->x.quo;
1499					edge->x.rem -= edge->dy;
1500				}
1501			}
1502
1503			if (edge->x.quo < prev_x) {
1504				struct edge *pos = edge->prev;
1505				pos->next = next;
1506				next->prev = pos;
1507				do {
1508					pos = pos->prev;
1509				} while (edge->x.quo < pos->x.quo);
1510				pos->next->prev = edge;
1511				edge->next = pos->next;
1512				edge->prev = pos;
1513				pos->next = edge;
1514			} else
1515				prev_x = edge->x.quo;
1516		} else {
1517			edge->prev->next = next;
1518			next->prev = edge->prev;
1519			active->min_height = -1;
1520		}
1521
1522		edge = next;
1523	}
1524}
1525
1526inline static void
1527inplace_end_subrows(struct active_list *active, uint8_t *row,
1528		    int8_t *buf, int width)
1529{
1530	int cover = 0;
1531
1532	while (width >= 4) {
1533		uint32_t dw;
1534		int v;
1535
1536		dw = *(uint32_t *)buf;
1537		buf += 4;
1538
1539		if (dw == 0) {
1540			v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
1541			v -= v >> 8;
1542			v |= v << 8;
1543			dw = v | v << 16;
1544		} else {
1545			cover += (int8_t)(dw & 0xff);
1546			if (cover) {
1547				assert(cover > 0);
1548				v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
1549				v -= v >> 8;
1550				dw >>= 8;
1551				dw |= v << 24;
1552			} else
1553				dw >>= 8;
1554
1555			cover += (int8_t)(dw & 0xff);
1556			if (cover) {
1557				assert(cover > 0);
1558				v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
1559				v -= v >> 8;
1560				dw >>= 8;
1561				dw |= v << 24;
1562			} else
1563				dw >>= 8;
1564
1565			cover += (int8_t)(dw & 0xff);
1566			if (cover) {
1567				assert(cover > 0);
1568				v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
1569				v -= v >> 8;
1570				dw >>= 8;
1571				dw |= v << 24;
1572			} else
1573				dw >>= 8;
1574
1575			cover += (int8_t)(dw & 0xff);
1576			if (cover) {
1577				assert(cover > 0);
1578				v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
1579				v -= v >> 8;
1580				dw >>= 8;
1581				dw |= v << 24;
1582			} else
1583				dw >>= 8;
1584		}
1585
1586		*(uint32_t *)row = dw;
1587		row += 4;
1588		width -= 4;
1589	}
1590
1591	while (width--) {
1592		int v;
1593
1594		cover += *buf++;
1595		assert(cover >= 0);
1596
1597		v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
1598		v -= v >> 8;
1599		*row++ = v;
1600	}
1601}
1602
1603#define TOR_INPLACE_SIZE 128
1604static void
1605tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
1606{
1607	int i, j, h = converter->ymax;
1608	struct polygon *polygon = converter->polygon;
1609	struct active_list *active = converter->active;
1610	struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
1611	uint8_t *row = scratch->devPrivate.ptr;
1612	int stride = scratch->devKind;
1613	int width = scratch->drawable.width;
1614
1615	__DBG(("%s: mono=%d, buf?=%d\n", __FUNCTION__, mono, buf != NULL));
1616	assert(!mono);
1617	assert(converter->ymin == 0);
1618	assert(converter->xmin == 0);
1619	assert(scratch->drawable.depth == 8);
1620
1621	/* Render each pixel row. */
1622	for (i = 0; i < h; i = j) {
1623		int do_full_step = 0;
1624		void *ptr = buf ?: row;
1625
1626		j = i + 1;
1627
1628		/* Determine if we can ignore this row or use the full pixel
1629		 * stepper. */
1630		if (!polygon->y_buckets[i]) {
1631			if (active->head.next == &active->tail) {
1632				active->min_height = INT_MAX;
1633				active->is_vertical = 1;
1634				for (; !polygon->y_buckets[j]; j++)
1635					;
1636				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1637				       __FUNCTION__, i, j));
1638
1639				memset(row, 0, stride*(j-i));
1640				row += stride*(j-i);
1641				continue;
1642			}
1643
1644			do_full_step = can_full_step(active);
1645		}
1646
1647		__DBG(("%s: y=%d, do_full_step=%d, new edges=%d, min_height=%d, vertical=%d\n",
1648		       __FUNCTION__,
1649		       i, do_full_step,
1650		       polygon->y_buckets[i] != NULL,
1651		       active->min_height,
1652		       active->is_vertical));
1653		if (do_full_step) {
1654			assert(active->is_vertical);
1655
1656			memset(ptr, 0, width);
1657			inplace_row(active, ptr, width);
1658			if (row != ptr)
1659				memcpy(row, ptr, width);
1660
1661			while (polygon->y_buckets[j] == NULL &&
1662			       active->min_height >= 2*FAST_SAMPLES_Y)
1663			{
1664				active->min_height -= FAST_SAMPLES_Y;
1665				row += stride;
1666				memcpy(row, ptr, width);
1667				j++;
1668			}
1669			if (j != i + 1)
1670				step_edges(active, j - (i + 1));
1671
1672			__DBG(("%s: vertical edges, full step (%d, %d)\n",
1673			       __FUNCTION__,  i, j));
1674		} else {
1675			grid_scaled_y_t suby;
1676			int min = width, max = 0;
1677
1678			fill_buckets(active, polygon->y_buckets[i], buckets);
1679
1680			/* Subsample this row. */
1681			memset(ptr, 0, width);
1682			for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
1683				if (buckets[suby]) {
1684					merge_edges(active, buckets[suby]);
1685					buckets[suby] = NULL;
1686				}
1687
1688				inplace_subrow(active, ptr, width, &min, &max);
1689			}
1690			assert(min >= 0 && max <= width);
1691			memset(row, 0, min);
1692			if (max > min)
1693				inplace_end_subrows(active, row+min, (int8_t*)ptr+min, max-min);
1694			if (max < width)
1695				memset(row+max, 0, width-max);
1696		}
1697
1698		active->min_height -= FAST_SAMPLES_Y;
1699		row += stride;
1700	}
1701}
1702
1703struct mono_edge {
1704	struct mono_edge *next, *prev;
1705
1706	int32_t height_left;
1707	int32_t dir;
1708
1709	int32_t dy;
1710	struct quorem x;
1711	struct quorem dxdy;
1712};
1713
1714struct mono_polygon {
1715	int num_edges;
1716	struct mono_edge *edges;
1717	struct mono_edge **y_buckets;
1718
1719	struct mono_edge *y_buckets_embedded[64];
1720	struct mono_edge edges_embedded[32];
1721};
1722
1723struct mono {
1724	/* Leftmost edge on the current scan line. */
1725	struct mono_edge head, tail;
1726	int is_vertical;
1727
1728	struct sna *sna;
1729	struct sna_composite_op op;
1730	pixman_region16_t clip;
1731
1732	fastcall void (*span)(struct mono *, int, int, BoxPtr);
1733
1734	struct mono_polygon polygon;
1735};
1736
1737#define I(x) pixman_fixed_to_int ((x) + pixman_fixed_1_minus_e/2)
1738
1739static bool
1740mono_polygon_init(struct mono_polygon *polygon, BoxPtr box, int num_edges)
1741{
1742	unsigned h = box->y2 - box->y1;
1743
1744	polygon->y_buckets = polygon->y_buckets_embedded;
1745	if (h > ARRAY_SIZE (polygon->y_buckets_embedded)) {
1746		polygon->y_buckets = malloc (h * sizeof (struct mono_edge *));
1747		if (unlikely (NULL == polygon->y_buckets))
1748			return false;
1749	}
1750
1751	polygon->num_edges = 0;
1752	polygon->edges = polygon->edges_embedded;
1753	if (num_edges > (int)ARRAY_SIZE (polygon->edges_embedded)) {
1754		polygon->edges = malloc (num_edges * sizeof (struct mono_edge));
1755		if (unlikely (polygon->edges == NULL)) {
1756			if (polygon->y_buckets != polygon->y_buckets_embedded)
1757				free(polygon->y_buckets);
1758			return false;
1759		}
1760	}
1761
1762	memset(polygon->y_buckets, 0, h * sizeof (struct edge *));
1763	return true;
1764}
1765
1766static void
1767mono_polygon_fini(struct mono_polygon *polygon)
1768{
1769	if (polygon->y_buckets != polygon->y_buckets_embedded)
1770		free(polygon->y_buckets);
1771
1772	if (polygon->edges != polygon->edges_embedded)
1773		free(polygon->edges);
1774}
1775
1776static void
1777mono_add_line(struct mono *mono,
1778	      int dst_x, int dst_y,
1779	      xFixed top, xFixed bottom,
1780	      const xPointFixed *p1, const xPointFixed *p2,
1781	      int dir)
1782{
1783	struct mono_polygon *polygon = &mono->polygon;
1784	struct mono_edge *e;
1785	pixman_fixed_t dx;
1786	pixman_fixed_t dy;
1787	int y, ytop, ybot;
1788
1789	__DBG(("%s: top=%d, bottom=%d, line=(%d, %d), (%d, %d) delta=%dx%d, dir=%d\n",
1790	       __FUNCTION__,
1791	       (int)top, (int)bottom,
1792	       (int)p1->x, (int)p1->y, (int)p2->x, (int)p2->y,
1793	       dst_x, dst_y,
1794	       dir));
1795
1796	if (top > bottom) {
1797		const xPointFixed *t;
1798
1799		y = top;
1800		top = bottom;
1801		bottom = y;
1802
1803		t = p1;
1804		p1 = p2;
1805		p2 = t;
1806
1807		dir = -dir;
1808	}
1809
1810	y = I(top) + dst_y;
1811	ytop = MAX(y, mono->clip.extents.y1);
1812
1813	y = I(bottom) + dst_y;
1814	ybot = MIN(y, mono->clip.extents.y2);
1815
1816	if (ybot <= ytop) {
1817		__DBG(("discard clipped line\n"));
1818		return;
1819	}
1820
1821	e = polygon->edges + polygon->num_edges++;
1822	e->height_left = ybot - ytop;
1823	e->dir = dir;
1824
1825	dx = p2->x - p1->x;
1826	dy = p2->y - p1->y;
1827
1828	if (dx == 0) {
1829		e->x.quo = p1->x;
1830		e->x.rem = 0;
1831		e->dxdy.quo = 0;
1832		e->dxdy.rem = 0;
1833		e->dy = 0;
1834	} else {
1835		e->dxdy = floored_muldivrem (dx, pixman_fixed_1, dy);
1836		e->dy = dy;
1837
1838		e->x = floored_muldivrem ((ytop-dst_y) * pixman_fixed_1 + pixman_fixed_1_minus_e/2 - p1->y,
1839					  dx, dy);
1840		e->x.quo += p1->x;
1841		e->x.rem -= dy;
1842	}
1843	e->x.quo += dst_x*pixman_fixed_1;
1844
1845	{
1846		struct mono_edge **ptail = &polygon->y_buckets[ytop - mono->clip.extents.y1];
1847		if (*ptail)
1848			(*ptail)->prev = e;
1849		e->next = *ptail;
1850		e->prev = NULL;
1851		*ptail = e;
1852	}
1853}
1854
1855static struct mono_edge *
1856mono_merge_sorted_edges(struct mono_edge *head_a, struct mono_edge *head_b)
1857{
1858	struct mono_edge *head, **next, *prev;
1859	int32_t x;
1860
1861	if (head_b == NULL)
1862		return head_a;
1863
1864	prev = head_a->prev;
1865	next = &head;
1866	if (head_a->x.quo <= head_b->x.quo) {
1867		head = head_a;
1868	} else {
1869		head = head_b;
1870		head_b->prev = prev;
1871		goto start_with_b;
1872	}
1873
1874	do {
1875		x = head_b->x.quo;
1876		while (head_a != NULL && head_a->x.quo <= x) {
1877			prev = head_a;
1878			next = &head_a->next;
1879			head_a = head_a->next;
1880		}
1881
1882		head_b->prev = prev;
1883		*next = head_b;
1884		if (head_a == NULL)
1885			return head;
1886
1887start_with_b:
1888		x = head_a->x.quo;
1889		while (head_b != NULL && head_b->x.quo <= x) {
1890			prev = head_b;
1891			next = &head_b->next;
1892			head_b = head_b->next;
1893		}
1894
1895		head_a->prev = prev;
1896		*next = head_a;
1897		if (head_b == NULL)
1898			return head;
1899	} while (1);
1900}
1901
1902static struct mono_edge *
1903mono_sort_edges(struct mono_edge *list,
1904		unsigned int level,
1905		struct mono_edge **head_out)
1906{
1907	struct mono_edge *head_other, *remaining;
1908	unsigned int i;
1909
1910	head_other = list->next;
1911
1912	if (head_other == NULL) {
1913		*head_out = list;
1914		return NULL;
1915	}
1916
1917	remaining = head_other->next;
1918	if (list->x.quo <= head_other->x.quo) {
1919		*head_out = list;
1920		head_other->next = NULL;
1921	} else {
1922		*head_out = head_other;
1923		head_other->prev = list->prev;
1924		head_other->next = list;
1925		list->prev = head_other;
1926		list->next = NULL;
1927	}
1928
1929	for (i = 0; i < level && remaining; i++) {
1930		remaining = mono_sort_edges(remaining, i, &head_other);
1931		*head_out = mono_merge_sorted_edges(*head_out, head_other);
1932	}
1933
1934	return remaining;
1935}
1936
1937static struct mono_edge *mono_filter(struct mono_edge *edges)
1938{
1939	struct mono_edge *e;
1940
1941	e = edges;
1942	do {
1943		struct mono_edge *n = e->next;
1944		if (e->dir == -n->dir &&
1945		    e->height_left == n->height_left &&
1946		    *(uint64_t *)&e->x == *(uint64_t *)&n->x &&
1947		    *(uint64_t *)&e->dxdy == *(uint64_t *)&n->dxdy) {
1948			if (e->prev)
1949				e->prev->next = n->next;
1950			else
1951				edges = n->next;
1952			if (n->next)
1953				n->next->prev = e->prev;
1954			else
1955				break;
1956
1957			e = n->next;
1958		} else
1959			e = e->next;
1960	} while (e->next);
1961
1962	return edges;
1963}
1964
1965static struct mono_edge *
1966mono_merge_unsorted_edges(struct mono_edge *head, struct mono_edge *unsorted)
1967{
1968	mono_sort_edges(unsorted, UINT_MAX, &unsorted);
1969	return mono_merge_sorted_edges(head, mono_filter(unsorted));
1970}
1971
1972#if 0
1973static inline void
1974__dbg_mono_edges(const char *function, struct mono_edge *edges)
1975{
1976	ErrorF("%s: ", function);
1977	while (edges) {
1978		if (edges->x.quo < INT16_MAX << 16) {
1979			ErrorF("(%d.%06d)+(%d.%06d)x%d, ",
1980			       edges->x.quo, edges->x.rem,
1981			       edges->dxdy.quo, edges->dxdy.rem,
1982			       edges->dy*edges->dir);
1983		}
1984		edges = edges->next;
1985	}
1986	ErrorF("\n");
1987}
1988#define DBG_MONO_EDGES(x) __dbg_mono_edges(__FUNCTION__, x)
1989static inline void
1990VALIDATE_MONO_EDGES(struct mono_edge *edges)
1991{
1992	int prev_x = edges->x.quo;
1993	while ((edges = edges->next)) {
1994		assert(edges->x.quo >= prev_x);
1995		prev_x = edges->x.quo;
1996	}
1997}
1998
1999#else
2000#define DBG_MONO_EDGES(x)
2001#define VALIDATE_MONO_EDGES(x)
2002#endif
2003
2004inline static void
2005mono_merge_edges(struct mono *c, struct mono_edge *edges)
2006{
2007	struct mono_edge *e;
2008
2009	DBG_MONO_EDGES(edges);
2010
2011	for (e = edges; c->is_vertical && e; e = e->next)
2012		c->is_vertical = e->dy == 0;
2013
2014	c->head.next = mono_merge_unsorted_edges(c->head.next, edges);
2015}
2016
2017fastcall static void
2018mono_span(struct mono *c, int x1, int x2, BoxPtr box)
2019{
2020	__DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
2021
2022	box->x1 = x1;
2023	box->x2 = x2;
2024
2025	if (c->clip.data) {
2026		pixman_region16_t region;
2027
2028		pixman_region_init_rects(&region, box, 1);
2029		RegionIntersect(&region, &region, &c->clip);
2030		if (REGION_NUM_RECTS(&region)) {
2031			c->op.boxes(c->sna, &c->op,
2032				    REGION_RECTS(&region),
2033				    REGION_NUM_RECTS(&region));
2034			apply_damage(&c->op, &region);
2035		}
2036		pixman_region_fini(&region);
2037	} else {
2038		c->op.box(c->sna, &c->op, box);
2039		apply_damage_box(&c->op, box);
2040	}
2041}
2042
2043fastcall static void
2044mono_span__fast(struct mono *c, int x1, int x2, BoxPtr box)
2045{
2046	__DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
2047
2048	box->x1 = x1;
2049	box->x2 = x2;
2050
2051	c->op.box(c->sna, &c->op, box);
2052}
2053
2054struct mono_span_thread_boxes {
2055	const struct sna_composite_op *op;
2056#define MONO_SPAN_MAX_BOXES (8192/sizeof(BoxRec))
2057	BoxRec boxes[MONO_SPAN_MAX_BOXES];
2058	int num_boxes;
2059};
2060
2061inline static void
2062thread_mono_span_add_boxes(struct mono *c, const BoxRec *box, int count)
2063{
2064	struct mono_span_thread_boxes *b = c->op.priv;
2065
2066	assert(count > 0 && count <= MONO_SPAN_MAX_BOXES);
2067	if (unlikely(b->num_boxes + count > MONO_SPAN_MAX_BOXES)) {
2068		b->op->thread_boxes(c->sna, b->op, b->boxes, b->num_boxes);
2069		b->num_boxes = 0;
2070	}
2071
2072	memcpy(b->boxes + b->num_boxes, box, count*sizeof(BoxRec));
2073	b->num_boxes += count;
2074	assert(b->num_boxes <= MONO_SPAN_MAX_BOXES);
2075}
2076
2077fastcall static void
2078thread_mono_span_clipped(struct mono *c, int x1, int x2, BoxPtr box)
2079{
2080	pixman_region16_t region;
2081
2082	__DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
2083
2084	box->x1 = x1;
2085	box->x2 = x2;
2086
2087	assert(c->clip.data);
2088
2089	pixman_region_init_rects(&region, box, 1);
2090	RegionIntersect(&region, &region, &c->clip);
2091	if (REGION_NUM_RECTS(&region))
2092		thread_mono_span_add_boxes(c,
2093					   REGION_RECTS(&region),
2094					   REGION_NUM_RECTS(&region));
2095	pixman_region_fini(&region);
2096}
2097
2098fastcall static void
2099thread_mono_span(struct mono *c, int x1, int x2, BoxPtr box)
2100{
2101	__DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
2102
2103	box->x1 = x1;
2104	box->x2 = x2;
2105	thread_mono_span_add_boxes(c, box, 1);
2106}
2107
2108inline static void
2109mono_row(struct mono *c, int16_t y, int16_t h)
2110{
2111	struct mono_edge *edge = c->head.next;
2112	int prev_x = INT_MIN;
2113	int16_t xstart = INT16_MIN;
2114	int winding = 0;
2115	BoxRec box;
2116
2117	DBG_MONO_EDGES(edge);
2118	VALIDATE_MONO_EDGES(&c->head);
2119
2120	box.y1 = c->clip.extents.y1 + y;
2121	box.y2 = box.y1 + h;
2122
2123	while (&c->tail != edge) {
2124		struct mono_edge *next = edge->next;
2125		int16_t xend = I(edge->x.quo);
2126
2127		if (--edge->height_left) {
2128			if (edge->dy) {
2129				edge->x.quo += edge->dxdy.quo;
2130				edge->x.rem += edge->dxdy.rem;
2131				if (edge->x.rem >= 0) {
2132					++edge->x.quo;
2133					edge->x.rem -= edge->dy;
2134				}
2135			}
2136
2137			if (edge->x.quo < prev_x) {
2138				struct mono_edge *pos = edge->prev;
2139				pos->next = next;
2140				next->prev = pos;
2141				do {
2142					pos = pos->prev;
2143				} while (edge->x.quo < pos->x.quo);
2144				pos->next->prev = edge;
2145				edge->next = pos->next;
2146				edge->prev = pos;
2147				pos->next = edge;
2148			} else
2149				prev_x = edge->x.quo;
2150		} else {
2151			edge->prev->next = next;
2152			next->prev = edge->prev;
2153		}
2154
2155		winding += edge->dir;
2156		if (winding == 0) {
2157			assert(I(next->x.quo) >= xend);
2158			if (I(next->x.quo) > xend + 1) {
2159				if (xstart < c->clip.extents.x1)
2160					xstart = c->clip.extents.x1;
2161				if (xend > c->clip.extents.x2)
2162					xend = c->clip.extents.x2;
2163				if (xend > xstart)
2164					c->span(c, xstart, xend, &box);
2165				xstart = INT16_MIN;
2166			}
2167		} else if (xstart == INT16_MIN)
2168			xstart = xend;
2169
2170		edge = next;
2171	}
2172
2173	DBG_MONO_EDGES(c->head.next);
2174	VALIDATE_MONO_EDGES(&c->head);
2175}
2176
2177static bool
2178mono_init(struct mono *c, int num_edges)
2179{
2180	if (!mono_polygon_init(&c->polygon, &c->clip.extents, num_edges))
2181		return false;
2182
2183	c->head.dy = 0;
2184	c->head.height_left = INT_MAX;
2185	c->head.x.quo = INT16_MIN << 16;
2186	c->head.prev = NULL;
2187	c->head.next = &c->tail;
2188	c->tail.prev = &c->head;
2189	c->tail.next = NULL;
2190	c->tail.x.quo = INT16_MAX << 16;
2191	c->tail.height_left = INT_MAX;
2192	c->tail.dy = 0;
2193
2194	c->is_vertical = 1;
2195
2196	return true;
2197}
2198
2199static void
2200mono_fini(struct mono *mono)
2201{
2202	mono_polygon_fini(&mono->polygon);
2203}
2204
2205static void
2206mono_step_edges(struct mono *c, int count)
2207{
2208	struct mono_edge *edge;
2209
2210	for (edge = c->head.next; edge != &c->tail; edge = edge->next) {
2211		edge->height_left -= count;
2212		if (! edge->height_left) {
2213			edge->prev->next = edge->next;
2214			edge->next->prev = edge->prev;
2215		}
2216	}
2217}
2218
2219flatten static void
2220mono_render(struct mono *mono)
2221{
2222	struct mono_polygon *polygon = &mono->polygon;
2223	int i, j, h = mono->clip.extents.y2 - mono->clip.extents.y1;
2224
2225	assert(mono->span);
2226
2227	for (i = 0; i < h; i = j) {
2228		j = i + 1;
2229
2230		if (polygon->y_buckets[i])
2231			mono_merge_edges(mono, polygon->y_buckets[i]);
2232
2233		if (mono->is_vertical) {
2234			struct mono_edge *e = mono->head.next;
2235			int min_height = h - i;
2236
2237			while (e != &mono->tail) {
2238				if (e->height_left < min_height)
2239					min_height = e->height_left;
2240				e = e->next;
2241			}
2242
2243			while (--min_height >= 1 && polygon->y_buckets[j] == NULL)
2244				j++;
2245			if (j != i + 1)
2246				mono_step_edges(mono, j - (i + 1));
2247		}
2248
2249		mono_row(mono, i, j-i);
2250
2251		/* XXX recompute after dropping edges? */
2252		if (mono->head.next == &mono->tail)
2253			mono->is_vertical = 1;
2254	}
2255}
2256
2257static int operator_is_bounded(uint8_t op)
2258{
2259	switch (op) {
2260	case PictOpOver:
2261	case PictOpOutReverse:
2262	case PictOpAdd:
2263		return true;
2264	default:
2265		return false;
2266	}
2267}
2268
2269inline static xFixed
2270line_x_for_y(const xLineFixed *l, xFixed y, bool ceil)
2271{
2272	xFixed_32_32 ex = (xFixed_32_32)(y - l->p1.y) * (l->p2.x - l->p1.x);
2273	xFixed d = l->p2.y - l->p1.y;
2274
2275	if (ceil)
2276		ex += (d - 1);
2277
2278	return l->p1.x + (xFixed) (ex / d);
2279}
2280
2281#define pixman_fixed_integer_floor(V) pixman_fixed_to_int(V)
2282#define pixman_fixed_integer_ceil(V) pixman_fixed_to_int(pixman_fixed_ceil(V))
2283
2284static void
2285trapezoids_bounds(int n, const xTrapezoid *t, BoxPtr box)
2286{
2287	xFixed x1, y1, x2, y2;
2288
2289	/* XXX need 33 bits... */
2290	x1 = y1 = INT_MAX / 2;
2291	x2 = y2 = INT_MIN / 2;
2292
2293	do {
2294		xFixed fx1, fx2, v;
2295
2296		if (!xTrapezoidValid(t))
2297			continue;
2298
2299		if (t->top < y1)
2300			y1 = t->top;
2301		if (t->bottom > y2)
2302			y2 = t->bottom;
2303
2304		if (((t->left.p1.x - x1) | (t->left.p2.x - x1)) < 0) {
2305			if (pixman_fixed_floor(t->left.p1.x) == pixman_fixed_floor(t->left.p2.x)) {
2306				x1 = pixman_fixed_floor(t->left.p1.x);
2307			} else {
2308				if (t->left.p1.y == t->top)
2309					fx1 = t->left.p1.x;
2310				else
2311					fx1 = line_x_for_y(&t->left, t->top, false);
2312
2313				if (t->left.p2.y == t->bottom)
2314					fx2 = t->left.p2.x;
2315				else
2316					fx2 = line_x_for_y(&t->left, t->bottom, false);
2317
2318				v = min(fx1, fx2);
2319				if (v < x1)
2320					x1 = pixman_fixed_floor(v);
2321			}
2322		}
2323
2324		if (((x2 - t->right.p1.x) | (x2 - t->right.p2.x)) < 0) {
2325			if (pixman_fixed_floor(t->right.p1.x) == pixman_fixed_floor(t->right.p2.x)) {
2326				x2 = pixman_fixed_ceil(t->right.p1.x);
2327			} else {
2328				if (t->right.p1.y == t->top)
2329					fx1 = t->right.p1.x;
2330				else
2331					fx1 = line_x_for_y(&t->right, t->top, true);
2332
2333				if (t->right.p2.y == t->bottom)
2334					fx2 = t->right.p2.x;
2335				else
2336					fx2 = line_x_for_y(&t->right, t->bottom, true);
2337
2338				v = max(fx1, fx2);
2339				if (v > x2)
2340					x2 = pixman_fixed_ceil(v);
2341			}
2342		}
2343	} while (t++, --n);
2344
2345	box->x1 = pixman_fixed_to_int(x1);
2346	box->x2 = pixman_fixed_to_int(x2);
2347	box->y1 = pixman_fixed_integer_floor(y1);
2348	box->y2 = pixman_fixed_integer_ceil(y2);
2349}
2350
2351static bool
2352is_mono(PicturePtr dst, PictFormatPtr mask)
2353{
2354	return mask ? mask->depth < 8 : dst->polyEdge==PolyEdgeSharp;
2355}
2356
2357static bool
2358trapezoids_inplace_fallback(struct sna *sna,
2359			    CARD8 op,
2360			    PicturePtr src, PicturePtr dst, PictFormatPtr mask,
2361			    int ntrap, xTrapezoid *traps)
2362{
2363	pixman_image_t *image;
2364	BoxRec box;
2365	uint32_t color;
2366	int dx, dy;
2367
2368	if (op != PictOpAdd)
2369		return false;
2370
2371	if (is_mono(dst, mask)) {
2372		if (dst->format != PICT_a1)
2373			return false;
2374	} else {
2375		if (dst->format != PICT_a8)
2376			return false;
2377	}
2378
2379	if (!sna_picture_is_solid(src, &color) || (color >> 24) != 0xff) {
2380		DBG(("%s: not an opaque solid source\n", __FUNCTION__));
2381		return false;
2382	}
2383
2384	box.x1 = dst->pDrawable->x;
2385	box.y1 = dst->pDrawable->y;
2386	box.x2 = dst->pDrawable->width;
2387	box.y2 = dst->pDrawable->height;
2388	if (pixman_region_contains_rectangle(dst->pCompositeClip,
2389					     &box) != PIXMAN_REGION_IN) {
2390		DBG(("%s: requires clipping, drawable (%d,%d), (%d, %d), clip (%d, %d), (%d, %d)\n", __FUNCTION__,
2391		     box.x1, box.y1, box.x2, box.y2,
2392		     dst->pCompositeClip->extents.x1,
2393		     dst->pCompositeClip->extents.y1,
2394		     dst->pCompositeClip->extents.x2,
2395		     dst->pCompositeClip->extents.y2));
2396		return false;
2397	}
2398
2399	if (is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2400		DBG(("%s: not performing inplace as dst is already on the GPU\n",
2401		     __FUNCTION__));
2402		return false;
2403	}
2404
2405	DBG(("%s\n", __FUNCTION__));
2406
2407	image = NULL;
2408	if (sna_drawable_move_to_cpu(dst->pDrawable, MOVE_READ | MOVE_WRITE))
2409		image = image_from_pict(dst, false, &dx, &dy);
2410	if (image) {
2411		dx += dst->pDrawable->x;
2412		dy += dst->pDrawable->y;
2413
2414		for (; ntrap; ntrap--, traps++)
2415			pixman_rasterize_trapezoid(image,
2416						   (pixman_trapezoid_t *)traps,
2417						   dx, dy);
2418
2419		pixman_image_unref(image);
2420	}
2421
2422	return true;
2423}
2424
2425struct rasterize_traps_thread {
2426	xTrapezoid *traps;
2427	char *ptr;
2428	int stride;
2429	BoxRec bounds;
2430	pixman_format_code_t format;
2431	int ntrap;
2432};
2433
2434static void rasterize_traps_thread(void *arg)
2435{
2436	struct rasterize_traps_thread *thread = arg;
2437	pixman_image_t *image;
2438	int width, height, n;
2439
2440	width = thread->bounds.x2 - thread->bounds.x1;
2441	height = thread->bounds.y2 - thread->bounds.y1;
2442
2443	memset(thread->ptr, 0, thread->stride*height);
2444	if (PIXMAN_FORMAT_DEPTH(thread->format) < 8)
2445		image = pixman_image_create_bits(thread->format,
2446						 width, height,
2447						 NULL, 0);
2448	else
2449		image = pixman_image_create_bits(thread->format,
2450						 width, height,
2451						 (uint32_t *)thread->ptr,
2452						 thread->stride);
2453	if (image == NULL)
2454		return;
2455
2456	for (n = 0; n < thread->ntrap; n++)
2457		pixman_rasterize_trapezoid(image,
2458					   (pixman_trapezoid_t *)&thread->traps[n],
2459					   -thread->bounds.x1, -thread->bounds.y1);
2460
2461	if (PIXMAN_FORMAT_DEPTH(thread->format) < 8) {
2462		pixman_image_t *a8;
2463
2464		a8 = pixman_image_create_bits(PIXMAN_a8,
2465					      width, height,
2466					      (uint32_t *)thread->ptr,
2467					      thread->stride);
2468		if (a8) {
2469			pixman_image_composite(PIXMAN_OP_SRC,
2470					       image, NULL, a8,
2471					       0, 0,
2472					       0, 0,
2473					       0, 0,
2474					       width, height);
2475			pixman_image_unref(a8);
2476		}
2477	}
2478
2479	pixman_image_unref(image);
2480}
2481
2482inline static void trapezoid_origin(const xLineFixed *l, int16_t *x, int16_t *y)
2483{
2484	if (l->p1.y < l->p2.y) {
2485		*x = pixman_fixed_to_int(l->p1.x);
2486		*y = pixman_fixed_to_int(l->p1.y);
2487	} else {
2488		*x = pixman_fixed_to_int(l->p2.x);
2489		*y = pixman_fixed_to_int(l->p2.y);
2490	}
2491}
2492
2493static void
2494trapezoids_fallback(struct sna *sna,
2495		    CARD8 op, PicturePtr src, PicturePtr dst,
2496		    PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc,
2497		    int ntrap, xTrapezoid * traps)
2498{
2499	ScreenPtr screen = dst->pDrawable->pScreen;
2500
2501	if (maskFormat) {
2502		PixmapPtr scratch;
2503		PicturePtr mask;
2504		INT16 dst_x, dst_y;
2505		BoxRec bounds;
2506		int width, height, depth;
2507		pixman_image_t *image;
2508		pixman_format_code_t format;
2509		int error;
2510
2511		trapezoid_origin(&traps[0].left, &dst_x, &dst_y);
2512
2513		trapezoids_bounds(ntrap, traps, &bounds);
2514		if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2)
2515			return;
2516
2517		DBG(("%s: bounds (%d, %d), (%d, %d)\n", __FUNCTION__,
2518		     bounds.x1, bounds.y1, bounds.x2, bounds.y2));
2519
2520		if (!sna_compute_composite_extents(&bounds,
2521						   src, NULL, dst,
2522						   xSrc, ySrc,
2523						   0, 0,
2524						   bounds.x1, bounds.y1,
2525						   bounds.x2 - bounds.x1,
2526						   bounds.y2 - bounds.y1))
2527			return;
2528
2529		DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__,
2530		     bounds.x1, bounds.y1, bounds.x2, bounds.y2));
2531
2532		width  = bounds.x2 - bounds.x1;
2533		height = bounds.y2 - bounds.y1;
2534		bounds.x1 -= dst->pDrawable->x;
2535		bounds.y1 -= dst->pDrawable->y;
2536		bounds.x2 -= dst->pDrawable->x;
2537		bounds.y2 -= dst->pDrawable->y;
2538		depth = maskFormat->depth;
2539		if (depth == 1) {
2540			format = PIXMAN_a1;
2541		} else if (depth <= 4) {
2542			format = PIXMAN_a4;
2543			depth = 4;
2544		} else
2545			format = PIXMAN_a8;
2546
2547		DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
2548		     __FUNCTION__, width, height, depth, format));
2549		if (is_gpu(sna, dst->pDrawable, PREFER_GPU_RENDER) ||
2550		    picture_is_gpu(sna, src)) {
2551			int num_threads;
2552
2553			scratch = sna_pixmap_create_upload(screen,
2554							   width, height, 8,
2555							   KGEM_BUFFER_WRITE);
2556			if (!scratch)
2557				return;
2558
2559			num_threads = sna_use_threads(width, height, 8);
2560			if (num_threads == 1) {
2561				if (depth < 8) {
2562					image = pixman_image_create_bits(format, width, height,
2563									 NULL, 0);
2564				} else {
2565					memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
2566
2567					image = pixman_image_create_bits(format, width, height,
2568									 scratch->devPrivate.ptr,
2569									 scratch->devKind);
2570				}
2571				if (image) {
2572					for (; ntrap; ntrap--, traps++)
2573						pixman_rasterize_trapezoid(image,
2574									   (pixman_trapezoid_t *)traps,
2575									   -bounds.x1, -bounds.y1);
2576					if (depth < 8) {
2577						pixman_image_t *a8;
2578
2579						a8 = pixman_image_create_bits(PIXMAN_a8, width, height,
2580									      scratch->devPrivate.ptr,
2581									      scratch->devKind);
2582						if (a8) {
2583							pixman_image_composite(PIXMAN_OP_SRC,
2584									       image, NULL, a8,
2585									       0, 0,
2586									       0, 0,
2587									       0, 0,
2588									       width, height);
2589							format = PIXMAN_a8;
2590							depth = 8;
2591							pixman_image_unref(a8);
2592						}
2593					}
2594
2595					pixman_image_unref(image);
2596				}
2597				if (format != PIXMAN_a8) {
2598					sna_pixmap_destroy(scratch);
2599					return;
2600				}
2601			} else {
2602				struct rasterize_traps_thread threads[num_threads];
2603				int y, dy, n;
2604
2605				threads[0].ptr = scratch->devPrivate.ptr;
2606				threads[0].stride = scratch->devKind;
2607				threads[0].traps = traps;
2608				threads[0].ntrap = ntrap;
2609				threads[0].bounds = bounds;
2610				threads[0].format = format;
2611
2612				y = bounds.y1;
2613				dy = (height + num_threads - 1) / num_threads;
2614
2615				for (n = 1; n < num_threads; n++) {
2616					threads[n] = threads[0];
2617					threads[n].ptr += (y - bounds.y1) * threads[n].stride;
2618					threads[n].bounds.y1 = y;
2619					threads[n].bounds.y2 = y += dy;
2620
2621					sna_threads_run(rasterize_traps_thread, &threads[n]);
2622				}
2623
2624				threads[0].ptr += (y - bounds.y1) * threads[0].stride;
2625				threads[0].bounds.y1 = y;
2626				threads[0].bounds.y2 = bounds.y2;
2627				rasterize_traps_thread(&threads[0]);
2628
2629				sna_threads_wait();
2630
2631				format = PIXMAN_a8;
2632				depth = 8;
2633			}
2634		} else {
2635			scratch = sna_pixmap_create_unattached(screen,
2636							       width, height,
2637							       depth);
2638			if (!scratch)
2639				return;
2640
2641			memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
2642			image = pixman_image_create_bits(format, width, height,
2643							 scratch->devPrivate.ptr,
2644							 scratch->devKind);
2645			if (image) {
2646				for (; ntrap; ntrap--, traps++)
2647					pixman_rasterize_trapezoid(image,
2648								   (pixman_trapezoid_t *)traps,
2649								   -bounds.x1, -bounds.y1);
2650				pixman_image_unref(image);
2651			}
2652		}
2653
2654		mask = CreatePicture(0, &scratch->drawable,
2655				     PictureMatchFormat(screen, depth, format),
2656				     0, 0, serverClient, &error);
2657		if (mask) {
2658			CompositePicture(op, src, mask, dst,
2659					 xSrc + bounds.x1 - dst_x,
2660					 ySrc + bounds.y1 - dst_y,
2661					 0, 0,
2662					 bounds.x1, bounds.y1,
2663					 width, height);
2664			FreePicture(mask, 0);
2665		}
2666		sna_pixmap_destroy(scratch);
2667	} else {
2668		if (dst->polyEdge == PolyEdgeSharp)
2669			maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
2670		else
2671			maskFormat = PictureMatchFormat(screen, 8, PICT_a8);
2672
2673		for (; ntrap; ntrap--, traps++)
2674			trapezoids_fallback(sna, op,
2675					    src, dst, maskFormat,
2676					    xSrc, ySrc, 1, traps);
2677	}
2678}
2679
2680static bool
2681composite_aligned_boxes(struct sna *sna,
2682			CARD8 op,
2683			PicturePtr src,
2684			PicturePtr dst,
2685			PictFormatPtr maskFormat,
2686			INT16 src_x, INT16 src_y,
2687			int ntrap, const xTrapezoid *traps,
2688			bool force_fallback)
2689{
2690	BoxRec stack_boxes[64], *boxes;
2691	pixman_region16_t region, clip;
2692	struct sna_composite_op tmp;
2693	bool ret = true;
2694	int dx, dy, n, num_boxes;
2695
2696	if (NO_ALIGNED_BOXES)
2697		return false;
2698
2699	DBG(("%s\n", __FUNCTION__));
2700
2701	boxes = stack_boxes;
2702	if (ntrap > (int)ARRAY_SIZE(stack_boxes)) {
2703		boxes = malloc(sizeof(BoxRec)*ntrap);
2704		if (boxes == NULL)
2705			return false;
2706	}
2707
2708	dx = dst->pDrawable->x;
2709	dy = dst->pDrawable->y;
2710
2711	region.extents.x1 = region.extents.y1 = 32767;
2712	region.extents.x2 = region.extents.y2 = -32767;
2713	num_boxes = 0;
2714	for (n = 0; n < ntrap; n++) {
2715		boxes[num_boxes].x1 = dx + pixman_fixed_to_int(traps[n].left.p1.x + pixman_fixed_1_minus_e/2);
2716		boxes[num_boxes].y1 = dy + pixman_fixed_to_int(traps[n].top + pixman_fixed_1_minus_e/2);
2717		boxes[num_boxes].x2 = dx + pixman_fixed_to_int(traps[n].right.p2.x + pixman_fixed_1_minus_e/2);
2718		boxes[num_boxes].y2 = dy + pixman_fixed_to_int(traps[n].bottom + pixman_fixed_1_minus_e/2);
2719
2720		if (boxes[num_boxes].x1 >= boxes[num_boxes].x2)
2721			continue;
2722		if (boxes[num_boxes].y1 >= boxes[num_boxes].y2)
2723			continue;
2724
2725		if (boxes[num_boxes].x1 < region.extents.x1)
2726			region.extents.x1 = boxes[num_boxes].x1;
2727		if (boxes[num_boxes].x2 > region.extents.x2)
2728			region.extents.x2 = boxes[num_boxes].x2;
2729
2730		if (boxes[num_boxes].y1 < region.extents.y1)
2731			region.extents.y1 = boxes[num_boxes].y1;
2732		if (boxes[num_boxes].y2 > region.extents.y2)
2733			region.extents.y2 = boxes[num_boxes].y2;
2734
2735		num_boxes++;
2736	}
2737
2738	if (num_boxes == 0)
2739		goto free_boxes;
2740
2741	DBG(("%s: extents (%d, %d), (%d, %d) offset of (%d, %d)\n",
2742	     __FUNCTION__,
2743	     region.extents.x1, region.extents.y1,
2744	     region.extents.x2, region.extents.y2,
2745	     region.extents.x1 - boxes[0].x1,
2746	     region.extents.y1 - boxes[0].y1));
2747
2748	src_x += region.extents.x1 - boxes[0].x1;
2749	src_y += region.extents.y1 - boxes[0].y1;
2750
2751	if (!sna_compute_composite_region(&clip,
2752					  src, NULL, dst,
2753					  src_x,  src_y,
2754					  0, 0,
2755					  region.extents.x1 - dx, region.extents.y1 - dy,
2756					  region.extents.x2 - region.extents.x1,
2757					  region.extents.y2 - region.extents.y1)) {
2758		DBG(("%s: trapezoids do not intersect drawable clips\n",
2759		     __FUNCTION__)) ;
2760		goto done;
2761	}
2762
2763	if (force_fallback ||
2764	    !sna->render.composite(sna, op, src, NULL, dst,
2765				   src_x,  src_y,
2766				   0, 0,
2767				   clip.extents.x1,  clip.extents.y1,
2768				   clip.extents.x2 - clip.extents.x1,
2769				   clip.extents.y2 - clip.extents.y1,
2770				   memset(&tmp, 0, sizeof(tmp)))) {
2771		unsigned int flags;
2772		pixman_box16_t *b;
2773		int i, count;
2774
2775		DBG(("%s: composite render op not supported\n",
2776		     __FUNCTION__));
2777
2778		flags = MOVE_READ | MOVE_WRITE;
2779		if (n == 1 && op <= PictOpSrc)
2780			flags = MOVE_WRITE | MOVE_INPLACE_HINT;
2781
2782		if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &clip, flags))
2783			goto done;
2784		if (dst->alphaMap  &&
2785		    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable,
2786					      MOVE_READ | MOVE_WRITE))
2787			goto done;
2788		if (src->pDrawable) {
2789			if (!sna_drawable_move_to_cpu(src->pDrawable,
2790						      MOVE_READ))
2791				goto done;
2792			if (src->alphaMap &&
2793			    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
2794						      MOVE_READ))
2795				goto done;
2796		}
2797
2798		DBG(("%s: fbComposite()\n", __FUNCTION__));
2799		if (maskFormat) {
2800			pixman_region_init_rects(&region, boxes, num_boxes);
2801			RegionIntersect(&region, &region, &clip);
2802
2803			b = REGION_RECTS(&region);
2804			count = REGION_NUM_RECTS(&region);
2805			for (i = 0; i < count; i++) {
2806				fbComposite(op, src, NULL, dst,
2807					    src_x + b[i].x1 - boxes[0].x1,
2808					    src_y + b[i].y1 - boxes[0].y1,
2809					    0, 0,
2810					    b[i].x1, b[i].y1,
2811					    b[i].x2 - b[i].x1, b[i].y2 - b[i].y1);
2812			}
2813			pixman_region_fini(&region);
2814		} else {
2815			for (n = 0; n < num_boxes; n++) {
2816				pixman_region_init_rects(&region, &boxes[n], 1);
2817				RegionIntersect(&region, &region, &clip);
2818				b = REGION_RECTS(&region);
2819				count = REGION_NUM_RECTS(&region);
2820				for (i = 0; i < count; i++) {
2821					fbComposite(op, src, NULL, dst,
2822						    src_x + b[i].x1 - boxes[0].x1,
2823						    src_y + b[i].y1 - boxes[0].y1,
2824						    0, 0,
2825						    b[i].x1, b[i].y1,
2826						    b[i].x2 - b[i].x1, b[i].y2 - b[i].y1);
2827				}
2828				pixman_region_fini(&region);
2829				pixman_region_fini(&region);
2830			}
2831		}
2832		ret = true;
2833		goto done;
2834	}
2835
2836	if (maskFormat ||
2837	    (op == PictOpSrc || op == PictOpClear) ||
2838	    num_boxes == 1) {
2839		pixman_region_init_rects(&region, boxes, num_boxes);
2840		RegionIntersect(&region, &region, &clip);
2841		if (REGION_NUM_RECTS(&region)) {
2842			tmp.boxes(sna, &tmp,
2843				  REGION_RECTS(&region),
2844				  REGION_NUM_RECTS(&region));
2845			apply_damage(&tmp, &region);
2846		}
2847		pixman_region_fini(&region);
2848	} else {
2849		for (n = 0; n < num_boxes; n++) {
2850			pixman_region_init_rects(&region, &boxes[n], 1);
2851			RegionIntersect(&region, &region, &clip);
2852			if (REGION_NUM_RECTS(&region)) {
2853				tmp.boxes(sna, &tmp,
2854					  REGION_RECTS(&region),
2855					  REGION_NUM_RECTS(&region));
2856				apply_damage(&tmp, &region);
2857			}
2858			pixman_region_fini(&region);
2859		}
2860	}
2861	tmp.done(sna, &tmp);
2862
2863done:
2864	REGION_UNINIT(NULL, &clip);
2865free_boxes:
2866	if (boxes != stack_boxes)
2867		free(boxes);
2868
2869	return ret;
2870}
2871
2872static inline int grid_coverage(int samples, pixman_fixed_t f)
2873{
2874	return (samples * pixman_fixed_frac(f) + pixman_fixed_1/2) / pixman_fixed_1;
2875}
2876
2877inline static void
2878composite_unaligned_box(struct sna *sna,
2879			struct sna_composite_spans_op *tmp,
2880			const BoxRec *box,
2881			float opacity,
2882			pixman_region16_t *clip)
2883{
2884	assert(opacity != 0.);
2885
2886	if (clip) {
2887		pixman_region16_t region;
2888
2889		pixman_region_init_rects(&region, box, 1);
2890		RegionIntersect(&region, &region, clip);
2891		if (REGION_NUM_RECTS(&region))
2892			tmp->boxes(sna, tmp,
2893				   REGION_RECTS(&region),
2894				   REGION_NUM_RECTS(&region),
2895				   opacity);
2896		pixman_region_fini(&region);
2897	} else
2898		tmp->box(sna, tmp, box, opacity);
2899}
2900
2901inline static void
2902composite_unaligned_trap_row(struct sna *sna,
2903			     struct sna_composite_spans_op *tmp,
2904			     const xTrapezoid *trap, int dx,
2905			     int y1, int y2, int covered,
2906			     pixman_region16_t *clip)
2907{
2908	BoxRec box;
2909	int opacity;
2910	int x1, x2;
2911#define u8_to_float(x) ((x) * (1.f/255))
2912
2913	if (covered == 0)
2914		return;
2915
2916	x1 = dx + pixman_fixed_to_int(trap->left.p1.x);
2917	x2 = dx + pixman_fixed_to_int(trap->right.p1.x);
2918	if (clip) {
2919		if (y2 > clip->extents.y2)
2920			y2 = clip->extents.y2;
2921		if (y1 < clip->extents.y1)
2922			y1 = clip->extents.y1;
2923		if (y1 >= y2)
2924			return;
2925
2926		if (x2 < clip->extents.x1 || x1 > clip->extents.x2)
2927			return;
2928	}
2929
2930	box.y1 = y1;
2931	box.y2 = y2;
2932
2933	if (x1 == x2) {
2934		box.x1 = x1;
2935		box.x2 = x2 + 1;
2936
2937		opacity = covered;
2938		opacity *= grid_coverage(SAMPLES_X, trap->right.p1.x) - grid_coverage(SAMPLES_X, trap->left.p1.x);
2939
2940		if (opacity)
2941			composite_unaligned_box(sna, tmp, &box,
2942						u8_to_float(opacity), clip);
2943	} else {
2944		if (pixman_fixed_frac(trap->left.p1.x)) {
2945			box.x1 = x1;
2946			box.x2 = ++x1;
2947
2948			opacity = covered;
2949			opacity *= SAMPLES_X - grid_coverage(SAMPLES_X, trap->left.p1.x);
2950
2951			if (opacity)
2952				composite_unaligned_box(sna, tmp, &box,
2953							u8_to_float(opacity), clip);
2954		}
2955
2956		if (x2 > x1) {
2957			box.x1 = x1;
2958			box.x2 = x2;
2959
2960			composite_unaligned_box(sna, tmp, &box,
2961						covered == SAMPLES_Y ? 1. : u8_to_float(covered*SAMPLES_X),
2962						clip);
2963		}
2964
2965		if (pixman_fixed_frac(trap->right.p1.x)) {
2966			box.x1 = x2;
2967			box.x2 = x2 + 1;
2968
2969			opacity = covered;
2970			opacity *= grid_coverage(SAMPLES_X, trap->right.p1.x);
2971
2972			if (opacity)
2973				composite_unaligned_box(sna, tmp, &box,
2974							u8_to_float(opacity), clip);
2975		}
2976	}
2977}
2978
2979flatten static void
2980composite_unaligned_trap(struct sna *sna,
2981			struct sna_composite_spans_op *tmp,
2982			const xTrapezoid *trap,
2983			int dx, int dy,
2984			pixman_region16_t *clip)
2985{
2986	int y1, y2;
2987
2988	y1 = dy + pixman_fixed_to_int(trap->top);
2989	y2 = dy + pixman_fixed_to_int(trap->bottom);
2990
2991	DBG(("%s: y1=%d, y2=%d\n", __FUNCTION__, y1, y2));
2992
2993	if (y1 == y2) {
2994		composite_unaligned_trap_row(sna, tmp, trap, dx,
2995					     y1, y1 + 1,
2996					     grid_coverage(SAMPLES_Y, trap->bottom) - grid_coverage(SAMPLES_Y, trap->top),
2997					     clip);
2998	} else {
2999		if (pixman_fixed_frac(trap->top)) {
3000			composite_unaligned_trap_row(sna, tmp, trap, dx,
3001						     y1, y1 + 1,
3002						     SAMPLES_Y - grid_coverage(SAMPLES_Y, trap->top),
3003						     clip);
3004			y1++;
3005		}
3006
3007		if (y2 > y1)
3008			composite_unaligned_trap_row(sna, tmp, trap, dx,
3009						     y1, y2,
3010						     SAMPLES_Y,
3011						     clip);
3012
3013		if (pixman_fixed_frac(trap->bottom))
3014			composite_unaligned_trap_row(sna, tmp, trap, dx,
3015						     y2, y2 + 1,
3016						     grid_coverage(SAMPLES_Y, trap->bottom),
3017						     clip);
3018	}
3019
3020	if (tmp->base.damage) {
3021		BoxRec box;
3022
3023		box.x1 = dx + pixman_fixed_to_int(trap->left.p1.x);
3024		box.x2 = dx + pixman_fixed_to_int(trap->right.p1.x + pixman_fixed_1_minus_e);
3025		box.y1 = dy + pixman_fixed_to_int(trap->top);
3026		box.y2 = dy + pixman_fixed_to_int(trap->bottom + pixman_fixed_1_minus_e);
3027
3028		if (clip) {
3029			pixman_region16_t region;
3030
3031			pixman_region_init_rects(&region, &box, 1);
3032			RegionIntersect(&region, &region, clip);
3033			if (REGION_NUM_RECTS(&region))
3034				apply_damage(&tmp->base, &region);
3035			RegionUninit(&region);
3036		} else
3037			apply_damage_box(&tmp->base, &box);
3038	}
3039}
3040
3041inline static void
3042blt_opacity(PixmapPtr scratch,
3043	    int x1, int x2,
3044	    int y, int h,
3045	    uint8_t opacity)
3046{
3047	uint8_t *ptr;
3048
3049	if (opacity == 0xff)
3050		return;
3051
3052	if (x1 < 0)
3053		x1 = 0;
3054	if (x2 > scratch->drawable.width)
3055		x2 = scratch->drawable.width;
3056	if (x1 >= x2)
3057		return;
3058
3059	x2 -= x1;
3060
3061	ptr = scratch->devPrivate.ptr;
3062	ptr += scratch->devKind * y;
3063	ptr += x1;
3064	do {
3065		if (x2 == 1)
3066			*ptr = opacity;
3067		else
3068			memset(ptr, opacity, x2);
3069		ptr += scratch->devKind;
3070	} while (--h);
3071}
3072
3073static void
3074blt_unaligned_box_row(PixmapPtr scratch,
3075		      BoxPtr extents,
3076		      const xTrapezoid *trap,
3077		      int y1, int y2,
3078		      int covered)
3079{
3080	int x1, x2;
3081
3082	if (y2 > scratch->drawable.height)
3083		y2 = scratch->drawable.height;
3084	if (y1 < 0)
3085		y1 = 0;
3086	if (y1 >= y2)
3087		return;
3088
3089	y2 -= y1;
3090
3091	x1 = pixman_fixed_to_int(trap->left.p1.x);
3092	x2 = pixman_fixed_to_int(trap->right.p1.x);
3093
3094	x1 -= extents->x1;
3095	x2 -= extents->x1;
3096
3097	if (x1 == x2) {
3098		blt_opacity(scratch,
3099			    x1, x1+1,
3100			    y1, y2,
3101			    covered * (grid_coverage(SAMPLES_X, trap->right.p1.x) - grid_coverage(SAMPLES_X, trap->left.p1.x)));
3102	} else {
3103		if (pixman_fixed_frac(trap->left.p1.x)) {
3104			blt_opacity(scratch,
3105				    x1, x1 + 1,
3106				    y1, y2,
3107				    covered * (SAMPLES_X - grid_coverage(SAMPLES_X, trap->left.p1.x)));
3108			x1++;
3109		}
3110
3111		if (x2 > x1) {
3112			blt_opacity(scratch,
3113				    x1, x2,
3114				    y1, y2,
3115				    covered*SAMPLES_X);
3116		}
3117
3118		if (pixman_fixed_frac(trap->right.p1.x))
3119			blt_opacity(scratch,
3120				    x2, x2 + 1,
3121				    y1, y2,
3122				    covered * grid_coverage(SAMPLES_X, trap->right.p1.x));
3123	}
3124}
3125
3126#define ONE_HALF 0x7f
3127#define RB_MASK 0x00ff00ff
3128#define RB_ONE_HALF 0x007f007f
3129#define RB_MASK_PLUS_ONE 0x01000100
3130#define G_SHIFT 8
3131
3132static force_inline uint32_t
3133mul8x2_8 (uint32_t a, uint8_t b)
3134{
3135	uint32_t t = (a & RB_MASK) * b + RB_ONE_HALF;
3136	return ((t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT) & RB_MASK;
3137}
3138
3139static force_inline uint32_t
3140add8x2_8x2(uint32_t a, uint32_t b)
3141{
3142	uint32_t t = a + b;
3143	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);
3144	return t & RB_MASK;
3145}
3146
3147static force_inline uint32_t
3148lerp8x4(uint32_t src, uint8_t a, uint32_t dst)
3149{
3150	return (add8x2_8x2(mul8x2_8(src, a),
3151			   mul8x2_8(dst, ~a)) |
3152		add8x2_8x2(mul8x2_8(src >> G_SHIFT, a),
3153			   mul8x2_8(dst >> G_SHIFT, ~a)) << G_SHIFT);
3154}
3155
3156inline static void
3157lerp32_opacity(PixmapPtr scratch,
3158	       uint32_t color,
3159	       int16_t x, int16_t w,
3160	       int16_t y, int16_t h,
3161	       uint8_t opacity)
3162{
3163	uint32_t *ptr;
3164	int stride, i;
3165
3166	ptr = (uint32_t*)((uint8_t *)scratch->devPrivate.ptr + scratch->devKind * y);
3167	ptr += x;
3168	stride = scratch->devKind / 4;
3169
3170	if (opacity == 0xff) {
3171		if ((w | h) == 1) {
3172			*ptr = color;
3173		} else {
3174			if (w < 16) {
3175				do {
3176					for (i = 0; i < w; i++)
3177						ptr[i] = color;
3178					ptr += stride;
3179				} while (--h);
3180			} else {
3181				pixman_fill(ptr, stride, 32,
3182					    0, 0, w, h, color);
3183			}
3184		}
3185	} else {
3186		if ((w | h) == 1) {
3187			*ptr = lerp8x4(color, opacity, *ptr);
3188		} else if (w == 1) {
3189			do {
3190				*ptr = lerp8x4(color, opacity, *ptr);
3191				ptr += stride;
3192			} while (--h);
3193		} else{
3194			do {
3195				for (i = 0; i < w; i++)
3196					ptr[i] = lerp8x4(color, opacity, ptr[i]);
3197				ptr += stride;
3198			} while (--h);
3199		}
3200	}
3201}
3202
3203static void
3204lerp32_unaligned_box_row(PixmapPtr scratch, uint32_t color,
3205			 const BoxRec *extents,
3206			 const xTrapezoid *trap, int16_t dx,
3207			 int16_t y, int16_t h,
3208			 uint8_t covered)
3209{
3210	int16_t x1 = pixman_fixed_to_int(trap->left.p1.x) + dx;
3211	uint16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x);
3212	int16_t x2 = pixman_fixed_to_int(trap->right.p2.x) + dx;
3213	uint16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p2.x);
3214
3215	if (x1 < extents->x1)
3216		x1 = extents->x1, fx1 = 0;
3217	if (x2 >= extents->x2)
3218		x2 = extents->x2, fx2 = 0;
3219
3220	DBG(("%s: x=(%d.%d, %d.%d), y=%dx%d, covered=%d\n", __FUNCTION__,
3221	     x1, fx1, x2, fx2, y, h, covered));
3222
3223	if (x1 < x2) {
3224		if (fx1) {
3225			lerp32_opacity(scratch, color,
3226				       x1, 1,
3227				       y, h,
3228				       covered * (SAMPLES_X - fx1));
3229			x1++;
3230		}
3231
3232		if (x2 > x1) {
3233			lerp32_opacity(scratch, color,
3234				       x1, x2-x1,
3235				       y, h,
3236				       covered*SAMPLES_X);
3237		}
3238
3239		if (fx2) {
3240			lerp32_opacity(scratch, color,
3241				       x2, 1,
3242				       y, h,
3243				       covered * fx2);
3244		}
3245	} else if (x1 == x2 && fx2 > fx1) {
3246		lerp32_opacity(scratch, color,
3247			       x1, 1,
3248			       y, h,
3249			       covered * (fx2 - fx1));
3250	}
3251}
3252
3253struct pixman_inplace {
3254	pixman_image_t *image, *source, *mask;
3255	uint32_t color;
3256	uint32_t *bits;
3257	int dx, dy;
3258	int sx, sy;
3259	uint8_t op;
3260};
3261
3262static force_inline uint8_t
3263mul_8_8(uint8_t a, uint8_t b)
3264{
3265    uint16_t t = a * (uint16_t)b + 0x7f;
3266    return ((t >> 8) + t) >> 8;
3267}
3268
3269static inline uint32_t multa(uint32_t s, uint8_t a, int shift)
3270{
3271	return mul_8_8((s >> shift) & 0xff, a) << shift;
3272}
3273
3274static inline uint32_t mul_4x8_8(uint32_t color, uint8_t alpha)
3275{
3276	uint32_t v;
3277
3278	v = 0;
3279	v |= multa(color, alpha, 24);
3280	v |= multa(color, alpha, 16);
3281	v |= multa(color, alpha, 8);
3282	v |= multa(color, alpha, 0);
3283
3284	return v;
3285}
3286
3287inline static void
3288pixsolid_opacity(struct pixman_inplace *pi,
3289		 int16_t x, int16_t w,
3290		 int16_t y, int16_t h,
3291		 uint8_t opacity)
3292{
3293	if (opacity == 0xff)
3294		*pi->bits = pi->color;
3295	else
3296		*pi->bits = mul_4x8_8(pi->color, opacity);
3297	pixman_image_composite(pi->op, pi->source, NULL, pi->image,
3298			       0, 0, 0, 0, pi->dx + x, pi->dy + y, w, h);
3299}
3300
3301static void
3302pixsolid_unaligned_box_row(struct pixman_inplace *pi,
3303			   const BoxRec *extents,
3304			   const xTrapezoid *trap,
3305			   int16_t y, int16_t h,
3306			   uint8_t covered)
3307{
3308	int16_t x1 = pixman_fixed_to_int(trap->left.p1.x);
3309	uint16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x);
3310	int16_t x2 = pixman_fixed_to_int(trap->right.p1.x);
3311	uint16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p1.x);
3312
3313	if (x1 < extents->x1)
3314		x1 = extents->x1, fx1 = 0;
3315	if (x2 >= extents->x2)
3316		x2 = extents->x2, fx2 = 0;
3317
3318	if (x1 < x2) {
3319		if (fx1) {
3320			pixsolid_opacity(pi, x1, 1, y, h,
3321					 covered * (SAMPLES_X - fx1));
3322			x1++;
3323		}
3324
3325		if (x2 > x1)
3326			pixsolid_opacity(pi, x1, x2-x1, y, h, covered*SAMPLES_X);
3327
3328		if (fx2)
3329			pixsolid_opacity(pi, x2, 1, y, h, covered * fx2);
3330	} else if (x1 == x2 && fx2 > fx1) {
3331		pixsolid_opacity(pi, x1, 1, y, h, covered * (fx2 - fx1));
3332	}
3333}
3334
3335static bool
3336composite_unaligned_boxes_inplace__solid(struct sna *sna,
3337					 CARD8 op, uint32_t color,
3338					 PicturePtr dst,
3339					 int n, const xTrapezoid *t,
3340					 bool force_fallback)
3341{
3342	PixmapPtr pixmap;
3343	int16_t dx, dy;
3344
3345	DBG(("%s: force=%d, is_gpu=%d, op=%d, color=%x\n", __FUNCTION__,
3346	     force_fallback, is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS), op, color));
3347
3348	if (!force_fallback && is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
3349		DBG(("%s: fallback -- can not perform operation in place, destination busy\n",
3350		     __FUNCTION__));
3351
3352		return false;
3353	}
3354
3355	/* XXX a8 boxes */
3356	if (!(dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8)) {
3357		DBG(("%s: fallback -- can not perform operation in place, unhanbled format %08lx\n",
3358		     __FUNCTION__, (long)dst->format));
3359
3360		goto pixman;
3361	}
3362
3363	pixmap = get_drawable_pixmap(dst->pDrawable);
3364	get_drawable_deltas(dst->pDrawable, pixmap, &dx, &dy);
3365
3366	if (op == PictOpOver && (color >> 24) == 0xff)
3367		op = PictOpSrc;
3368	if (op == PictOpOver || op == PictOpAdd) {
3369		struct sna_pixmap *priv = sna_pixmap(pixmap);
3370		if (priv && priv->clear && priv->clear_color == 0)
3371			op = PictOpSrc;
3372	}
3373
3374	switch (op) {
3375	case PictOpSrc:
3376		break;
3377	default:
3378		DBG(("%s: fallback -- can not perform op [%d] in place\n",
3379		     __FUNCTION__, op));
3380		goto pixman;
3381	}
3382
3383	DBG(("%s: inplace operation on argb32 destination x %d\n",
3384	     __FUNCTION__, n));
3385	do {
3386		RegionRec clip;
3387		BoxPtr extents;
3388		int count;
3389
3390		clip.extents.x1 = pixman_fixed_to_int(t->left.p1.x);
3391		clip.extents.x2 = pixman_fixed_to_int(t->right.p1.x + pixman_fixed_1_minus_e);
3392		clip.extents.y1 = pixman_fixed_to_int(t->top);
3393		clip.extents.y2 = pixman_fixed_to_int(t->bottom + pixman_fixed_1_minus_e);
3394		clip.data = NULL;
3395
3396		if (!sna_compute_composite_region(&clip,
3397						   NULL, NULL, dst,
3398						   0, 0,
3399						   0, 0,
3400						   clip.extents.x1, clip.extents.y1,
3401						   clip.extents.x2 - clip.extents.x1,
3402						   clip.extents.y2 - clip.extents.y1))
3403			continue;
3404
3405		if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &clip,
3406						     MOVE_WRITE | MOVE_READ)) {
3407			RegionUninit(&clip);
3408			continue;
3409		}
3410
3411		RegionTranslate(&clip, dx, dy);
3412		count = REGION_NUM_RECTS(&clip);
3413		extents = REGION_RECTS(&clip);
3414		while (count--) {
3415			int16_t y1 = dy + pixman_fixed_to_int(t->top);
3416			uint16_t fy1 = pixman_fixed_frac(t->top);
3417			int16_t y2 = dy + pixman_fixed_to_int(t->bottom);
3418			uint16_t fy2 = pixman_fixed_frac(t->bottom);
3419
3420			DBG(("%s: t=(%d, %d), (%d, %d), extents (%d, %d), (%d, %d)\n",
3421			     __FUNCTION__,
3422			     pixman_fixed_to_int(t->left.p1.x),
3423			     pixman_fixed_to_int(t->top),
3424			     pixman_fixed_to_int(t->right.p2.x),
3425			     pixman_fixed_to_int(t->bottom),
3426			     extents->x1, extents->y1,
3427			     extents->x2, extents->y2));
3428
3429			if (y1 < extents->y1)
3430				y1 = extents->y1, fy1 = 0;
3431			if (y2 >= extents->y2)
3432				y2 = extents->y2, fy2 = 0;
3433
3434			if (y1 < y2) {
3435				if (fy1) {
3436					lerp32_unaligned_box_row(pixmap, color, extents,
3437								 t, dx, y1, 1,
3438								 SAMPLES_Y - grid_coverage(SAMPLES_Y, fy1));
3439					y1++;
3440				}
3441
3442				if (y2 > y1)
3443					lerp32_unaligned_box_row(pixmap, color, extents,
3444								 t, dx, y1, y2 - y1,
3445								 SAMPLES_Y);
3446
3447				if (fy2)
3448					lerp32_unaligned_box_row(pixmap, color,  extents,
3449								 t, dx, y2, 1,
3450								 grid_coverage(SAMPLES_Y, fy2));
3451			} else if (y1 == y2 && fy2 > fy1) {
3452				lerp32_unaligned_box_row(pixmap, color, extents,
3453							 t, dx, y1, 1,
3454							 grid_coverage(SAMPLES_Y, fy2) - grid_coverage(SAMPLES_Y, fy1));
3455			}
3456			extents++;
3457		}
3458
3459		RegionUninit(&clip);
3460	} while (--n && t++);
3461
3462	return true;
3463
3464pixman:
3465	do {
3466		struct pixman_inplace pi;
3467		RegionRec clip;
3468		BoxPtr extents;
3469		int count;
3470
3471		clip.extents.x1 = pixman_fixed_to_int(t->left.p1.x);
3472		clip.extents.x2 = pixman_fixed_to_int(t->right.p1.x + pixman_fixed_1_minus_e);
3473		clip.extents.y1 = pixman_fixed_to_int(t->top);
3474		clip.extents.y2 = pixman_fixed_to_int(t->bottom + pixman_fixed_1_minus_e);
3475		clip.data = NULL;
3476
3477		if (!sna_compute_composite_region(&clip,
3478						   NULL, NULL, dst,
3479						   0, 0,
3480						   0, 0,
3481						   clip.extents.x1, clip.extents.y1,
3482						   clip.extents.x2 - clip.extents.x1,
3483						   clip.extents.y2 - clip.extents.y1))
3484			continue;
3485
3486		if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &clip,
3487						     MOVE_WRITE | MOVE_READ)) {
3488			RegionUninit(&clip);
3489			continue;
3490		}
3491
3492		pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
3493		pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8, 1, 1, NULL, 0);
3494		pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
3495		pi.bits = pixman_image_get_data(pi.source);
3496		pi.color = color;
3497		pi.op = op;
3498
3499		count = REGION_NUM_RECTS(&clip);
3500		extents = REGION_RECTS(&clip);
3501		while (count--) {
3502			int16_t y1 = pixman_fixed_to_int(t->top);
3503			uint16_t fy1 = pixman_fixed_frac(t->top);
3504			int16_t y2 = pixman_fixed_to_int(t->bottom);
3505			uint16_t fy2 = pixman_fixed_frac(t->bottom);
3506
3507			if (y1 < extents->y1)
3508				y1 = extents->y1, fy1 = 0;
3509			if (y2 >= extents->y2)
3510				y2 = extents->y2, fy2 = 0;
3511			if (y1 < y2) {
3512				if (fy1) {
3513					pixsolid_unaligned_box_row(&pi, extents, t, y1, 1,
3514								   SAMPLES_Y - grid_coverage(SAMPLES_Y, fy1));
3515					y1++;
3516				}
3517
3518				if (y2 > y1)
3519					pixsolid_unaligned_box_row(&pi, extents, t, y1, y2 - y1,
3520								   SAMPLES_Y);
3521
3522				if (fy2)
3523					pixsolid_unaligned_box_row(&pi, extents, t, y2, 1,
3524								   grid_coverage(SAMPLES_Y, fy2));
3525			} else if (y1 == y2 && fy2 > fy1) {
3526				pixsolid_unaligned_box_row(&pi, extents, t, y1, 1,
3527							   grid_coverage(SAMPLES_Y, fy2) - grid_coverage(SAMPLES_Y, fy1));
3528			}
3529			extents++;
3530		}
3531
3532		RegionUninit(&clip);
3533		pixman_image_unref(pi.image);
3534		pixman_image_unref(pi.source);
3535	} while (--n && t++);
3536	return true;
3537}
3538
3539inline static void
3540pixmask_opacity(struct pixman_inplace *pi,
3541		int16_t x, int16_t w,
3542		int16_t y, int16_t h,
3543		uint8_t opacity)
3544{
3545	if (opacity == 0xff) {
3546		pixman_image_composite(pi->op, pi->source, NULL, pi->image,
3547				       pi->sx + x, pi->sy + y,
3548				       0, 0,
3549				       pi->dx + x, pi->dy + y,
3550				       w, h);
3551	} else {
3552		*pi->bits = opacity;
3553		pixman_image_composite(pi->op, pi->source, pi->mask, pi->image,
3554				       pi->sx + x, pi->sy + y,
3555				       0, 0,
3556				       pi->dx + x, pi->dy + y,
3557				       w, h);
3558	}
3559}
3560
3561static void
3562pixmask_unaligned_box_row(struct pixman_inplace *pi,
3563			  const BoxRec *extents,
3564			  const xTrapezoid *trap,
3565			  int16_t y, int16_t h,
3566			  uint8_t covered)
3567{
3568	int16_t x1 = pixman_fixed_to_int(trap->left.p1.x);
3569	uint16_t fx1 = grid_coverage(SAMPLES_X, trap->left.p1.x);
3570	int16_t x2 = pixman_fixed_to_int(trap->right.p1.x);
3571	uint16_t fx2 = grid_coverage(SAMPLES_X, trap->right.p1.x);
3572
3573	if (x1 < extents->x1)
3574		x1 = extents->x1, fx1 = 0;
3575	if (x2 >= extents->x2)
3576		x2 = extents->x2, fx2 = 0;
3577
3578	if (x1 < x2) {
3579		if (fx1) {
3580			pixmask_opacity(pi, x1, 1, y, h,
3581					 covered * (SAMPLES_X - fx1));
3582			x1++;
3583		}
3584
3585		if (x2 > x1)
3586			pixmask_opacity(pi, x1, x2-x1, y, h, covered*SAMPLES_X);
3587
3588		if (fx2)
3589			pixmask_opacity(pi, x2, 1, y, h, covered * fx2);
3590	} else if (x1 == x2 && fx2 > fx1) {
3591		pixmask_opacity(pi, x1, 1, y, h, covered * (fx2 - fx1));
3592	}
3593}
3594
3595struct rectilinear_inplace_thread {
3596	pixman_image_t *dst, *src;
3597	const RegionRec *clip;
3598	const xTrapezoid *trap;
3599	int dx, dy, sx, sy;
3600	int y1, y2;
3601	CARD8 op;
3602};
3603
3604static void rectilinear_inplace_thread(void *arg)
3605{
3606	struct rectilinear_inplace_thread *thread = arg;
3607	const xTrapezoid *t = thread->trap;
3608	struct pixman_inplace pi;
3609	const BoxRec *extents;
3610	int count;
3611
3612	pi.image = thread->dst;
3613	pi.dx = thread->dx;
3614	pi.dy = thread->dy;
3615
3616	pi.source = thread->src;
3617	pi.sx = thread->sx;
3618	pi.sy = thread->sy;
3619
3620	pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, &pi.color, 4);
3621	pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
3622	pi.bits = pixman_image_get_data(pi.mask);
3623	pi.op = thread->op;
3624
3625	count = region_count(thread->clip);
3626	extents = region_boxes(thread->clip);
3627	while (count--) {
3628		int16_t y1 = pixman_fixed_to_int(t->top);
3629		uint16_t fy1 = pixman_fixed_frac(t->top);
3630		int16_t y2 = pixman_fixed_to_int(t->bottom);
3631		uint16_t fy2 = pixman_fixed_frac(t->bottom);
3632
3633		if (y1 < MAX(thread->y1, extents->y1))
3634			y1 = MAX(thread->y1, extents->y1), fy1 = 0;
3635		if (y2 > MIN(thread->y2, extents->y2))
3636			y2 = MIN(thread->y2, extents->y2), fy2 = 0;
3637		if (y1 < y2) {
3638			if (fy1) {
3639				pixmask_unaligned_box_row(&pi, extents, t, y1, 1,
3640							  SAMPLES_Y - grid_coverage(SAMPLES_Y, fy1));
3641				y1++;
3642			}
3643
3644			if (y2 > y1)
3645				pixmask_unaligned_box_row(&pi, extents, t, y1, y2 - y1,
3646							  SAMPLES_Y);
3647
3648			if (fy2)
3649				pixmask_unaligned_box_row(&pi, extents, t, y2, 1,
3650							  grid_coverage(SAMPLES_Y, fy2));
3651		} else if (y1 == y2 && fy2 > fy1) {
3652			pixmask_unaligned_box_row(&pi, extents, t, y1, 1,
3653						  grid_coverage(SAMPLES_Y, fy2) - grid_coverage(SAMPLES_Y, fy1));
3654		}
3655		extents++;
3656	}
3657
3658	pixman_image_unref(pi.mask);
3659}
3660
3661static bool
3662composite_unaligned_boxes_inplace(struct sna *sna,
3663				  CARD8 op,
3664				  PicturePtr src, int16_t src_x, int16_t src_y,
3665				  PicturePtr dst, int n, const xTrapezoid *t,
3666				  bool force_fallback)
3667{
3668	if (!force_fallback &&
3669	    (is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS) ||
3670	     picture_is_gpu(sna, src))) {
3671		DBG(("%s: fallback -- not forcing\n", __FUNCTION__));
3672		return false;
3673	}
3674
3675	DBG(("%s\n", __FUNCTION__));
3676
3677	src_x -= pixman_fixed_to_int(t[0].left.p1.x);
3678	src_y -= pixman_fixed_to_int(t[0].left.p1.y);
3679	do {
3680		RegionRec clip;
3681		BoxPtr extents;
3682		int count;
3683		int num_threads;
3684
3685		clip.extents.x1 = pixman_fixed_to_int(t->left.p1.x);
3686		clip.extents.x2 = pixman_fixed_to_int(t->right.p1.x + pixman_fixed_1_minus_e);
3687		clip.extents.y1 = pixman_fixed_to_int(t->top);
3688		clip.extents.y2 = pixman_fixed_to_int(t->bottom + pixman_fixed_1_minus_e);
3689		clip.data = NULL;
3690
3691		if (!sna_compute_composite_region(&clip,
3692						   src, NULL, dst,
3693						   clip.extents.x1 + src_x,
3694						   clip.extents.y1 + src_y,
3695						   0, 0,
3696						   clip.extents.x1, clip.extents.y1,
3697						   clip.extents.x2 - clip.extents.x1,
3698						   clip.extents.y2 - clip.extents.y1))
3699			continue;
3700
3701		if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &clip,
3702						     MOVE_WRITE | MOVE_READ)) {
3703			RegionUninit(&clip);
3704			continue;
3705		}
3706
3707		if (src->pDrawable) {
3708			if (!sna_drawable_move_to_cpu(src->pDrawable,
3709						      MOVE_READ)) {
3710				RegionUninit(&clip);
3711				continue;
3712			}
3713			if (src->alphaMap) {
3714				if (!sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
3715							      MOVE_READ)) {
3716					RegionUninit(&clip);
3717					continue;
3718				}
3719			}
3720		}
3721
3722		num_threads = sna_use_threads(clip.extents.x2 - clip.extents.x1,
3723					      clip.extents.y2 - clip.extents.y1,
3724					      32);
3725		if (num_threads == 1) {
3726			struct pixman_inplace pi;
3727
3728			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
3729			pi.source = image_from_pict(src, false, &pi.sx, &pi.sy);
3730			pi.sx += src_x;
3731			pi.sy += src_y;
3732			pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, &pi.color, 4);
3733			pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
3734			pi.bits = pixman_image_get_data(pi.mask);
3735			pi.op = op;
3736
3737			count = REGION_NUM_RECTS(&clip);
3738			extents = REGION_RECTS(&clip);
3739			while (count--) {
3740				int16_t y1 = pixman_fixed_to_int(t->top);
3741				uint16_t fy1 = pixman_fixed_frac(t->top);
3742				int16_t y2 = pixman_fixed_to_int(t->bottom);
3743				uint16_t fy2 = pixman_fixed_frac(t->bottom);
3744
3745				if (y1 < extents->y1)
3746					y1 = extents->y1, fy1 = 0;
3747				if (y2 > extents->y2)
3748					y2 = extents->y2, fy2 = 0;
3749				if (y1 < y2) {
3750					if (fy1) {
3751						pixmask_unaligned_box_row(&pi, extents, t, y1, 1,
3752									  SAMPLES_Y - grid_coverage(SAMPLES_Y, fy1));
3753						y1++;
3754					}
3755
3756					if (y2 > y1)
3757						pixmask_unaligned_box_row(&pi, extents, t, y1, y2 - y1,
3758									  SAMPLES_Y);
3759
3760					if (fy2)
3761						pixmask_unaligned_box_row(&pi, extents, t, y2, 1,
3762									  grid_coverage(SAMPLES_Y, fy2));
3763				} else if (y1 == y2 && fy2 > fy1) {
3764					pixmask_unaligned_box_row(&pi, extents, t, y1, 1,
3765								  grid_coverage(SAMPLES_Y, fy2) - grid_coverage(SAMPLES_Y, fy1));
3766				}
3767				extents++;
3768			}
3769
3770			pixman_image_unref(pi.image);
3771			pixman_image_unref(pi.source);
3772			pixman_image_unref(pi.mask);
3773		} else {
3774			struct rectilinear_inplace_thread thread[num_threads];
3775			int i, y, dy;
3776
3777
3778			thread[0].trap = t;
3779			thread[0].dst = image_from_pict(dst, false, &thread[0].dx, &thread[0].dy);
3780			thread[0].src = image_from_pict(src, false, &thread[0].sx, &thread[0].sy);
3781			thread[0].sx += src_x;
3782			thread[0].sy += src_y;
3783
3784			thread[0].clip = &clip;
3785			thread[0].op = op;
3786
3787			y = clip.extents.y1;
3788			dy = (clip.extents.y2 - clip.extents.y1 + num_threads - 1) / num_threads;
3789
3790			for (i = 1; i < num_threads; i++) {
3791				thread[i] = thread[0];
3792				thread[i].y1 = y;
3793				thread[i].y2 = y += dy;
3794				sna_threads_run(rectilinear_inplace_thread, &thread[i]);
3795			}
3796
3797			thread[0].y1 = y;
3798			thread[0].y2 = clip.extents.y2;
3799			rectilinear_inplace_thread(&thread[0]);
3800
3801			sna_threads_wait();
3802
3803			pixman_image_unref(thread[0].dst);
3804			pixman_image_unref(thread[0].src);
3805		}
3806
3807		RegionUninit(&clip);
3808	} while (--n && t++);
3809
3810	return true;
3811}
3812
3813static bool
3814composite_unaligned_boxes_fallback(struct sna *sna,
3815				   CARD8 op,
3816				   PicturePtr src,
3817				   PicturePtr dst,
3818				   INT16 src_x, INT16 src_y,
3819				   int ntrap, const xTrapezoid *traps,
3820				   bool force_fallback)
3821{
3822	ScreenPtr screen = dst->pDrawable->pScreen;
3823	uint32_t color;
3824	int16_t dst_x, dst_y;
3825	int16_t dx, dy;
3826	int n;
3827
3828	if (sna_picture_is_solid(src, &color) &&
3829	    composite_unaligned_boxes_inplace__solid(sna, op, color, dst,
3830						     ntrap, traps,
3831						     force_fallback))
3832		return true;
3833
3834	if (composite_unaligned_boxes_inplace(sna, op, src, src_x, src_y,
3835					      dst, ntrap, traps,
3836					      force_fallback))
3837		return true;
3838
3839	trapezoid_origin(&traps[0].left, &dst_x, &dst_y);
3840	dx = dst->pDrawable->x;
3841	dy = dst->pDrawable->y;
3842	for (n = 0; n < ntrap; n++) {
3843		const xTrapezoid *t = &traps[n];
3844		PixmapPtr scratch;
3845		PicturePtr mask;
3846		BoxRec extents;
3847		int error;
3848		int y1, y2;
3849
3850		extents.x1 = pixman_fixed_to_int(t->left.p1.x);
3851		extents.x2 = pixman_fixed_to_int(t->right.p1.x + pixman_fixed_1_minus_e);
3852		extents.y1 = pixman_fixed_to_int(t->top);
3853		extents.y2 = pixman_fixed_to_int(t->bottom + pixman_fixed_1_minus_e);
3854
3855		if (!sna_compute_composite_extents(&extents,
3856						   src, NULL, dst,
3857						   src_x, src_y,
3858						   0, 0,
3859						   extents.x1, extents.y1,
3860						   extents.x2 - extents.x1,
3861						   extents.y2 - extents.y1))
3862			continue;
3863
3864		if (force_fallback)
3865			scratch = sna_pixmap_create_unattached(screen,
3866							       extents.x2 - extents.x1,
3867							       extents.y2 - extents.y1,
3868							       8);
3869		else
3870			scratch = sna_pixmap_create_upload(screen,
3871							   extents.x2 - extents.x1,
3872							   extents.y2 - extents.y1,
3873							   8, KGEM_BUFFER_WRITE_INPLACE);
3874		if (!scratch)
3875			continue;
3876
3877		memset(scratch->devPrivate.ptr, 0xff,
3878		       scratch->devKind * (extents.y2 - extents.y1));
3879
3880		extents.x1 -= dx;
3881		extents.x2 -= dx;
3882		extents.y1 -= dy;
3883		extents.y2 -= dy;
3884
3885		y1 = pixman_fixed_to_int(t->top) - extents.y1;
3886		y2 = pixman_fixed_to_int(t->bottom) - extents.y1;
3887
3888		if (y1 == y2) {
3889			blt_unaligned_box_row(scratch, &extents, t, y1, y1 + 1,
3890					      grid_coverage(SAMPLES_Y, t->bottom) - grid_coverage(SAMPLES_Y, t->top));
3891		} else {
3892			if (pixman_fixed_frac(t->top)) {
3893				blt_unaligned_box_row(scratch, &extents, t, y1, y1 + 1,
3894						      SAMPLES_Y - grid_coverage(SAMPLES_Y, t->top));
3895				y1++;
3896			}
3897
3898			if (y2 > y1)
3899				blt_unaligned_box_row(scratch, &extents, t, y1, y2,
3900						      SAMPLES_Y);
3901
3902			if (pixman_fixed_frac(t->bottom))
3903				blt_unaligned_box_row(scratch, &extents, t, y2, y2+1,
3904						      grid_coverage(SAMPLES_Y, t->bottom));
3905		}
3906
3907		mask = CreatePicture(0, &scratch->drawable,
3908				     PictureMatchFormat(screen, 8, PICT_a8),
3909				     0, 0, serverClient, &error);
3910		if (mask) {
3911			CompositePicture(op, src, mask, dst,
3912					 src_x + extents.x1 - dst_x,
3913					 src_y + extents.y1 - dst_y,
3914					 0, 0,
3915					 extents.x1, extents.y1,
3916					 extents.x2 - extents.x1,
3917					 extents.y2 - extents.y1);
3918			FreePicture(mask, 0);
3919		}
3920		sna_pixmap_destroy(scratch);
3921	}
3922
3923	return true;
3924}
3925
3926static bool
3927composite_unaligned_boxes(struct sna *sna,
3928			  CARD8 op,
3929			  PicturePtr src,
3930			  PicturePtr dst,
3931			  PictFormatPtr maskFormat,
3932			  INT16 src_x, INT16 src_y,
3933			  int ntrap, const xTrapezoid *traps,
3934			  bool force_fallback)
3935{
3936	BoxRec extents;
3937	struct sna_composite_spans_op tmp;
3938	struct sna_pixmap *priv;
3939	pixman_region16_t clip, *c;
3940	int16_t dst_x, dst_y;
3941	int dx, dy, n;
3942
3943	if (NO_UNALIGNED_BOXES)
3944		return false;
3945
3946	DBG(("%s: force_fallback=%d, mask=%x, n=%d, op=%d\n",
3947	     __FUNCTION__, force_fallback, maskFormat ? (int)maskFormat->format : 0, ntrap, op));
3948
3949	/* need a span converter to handle overlapping traps */
3950	if (ntrap > 1 && maskFormat)
3951		return false;
3952
3953	if (force_fallback ||
3954	    !sna->render.check_composite_spans(sna, op, src, dst, 0, 0,
3955					       COMPOSITE_SPANS_RECTILINEAR)) {
3956fallback:
3957		return composite_unaligned_boxes_fallback(sna, op, src, dst,
3958							  src_x, src_y,
3959							  ntrap, traps,
3960							  force_fallback);
3961	}
3962
3963	trapezoid_origin(&traps[0].left, &dst_x, &dst_y);
3964
3965	extents.x1 = pixman_fixed_to_int(traps[0].left.p1.x);
3966	extents.x2 = pixman_fixed_to_int(traps[0].right.p1.x + pixman_fixed_1_minus_e);
3967	extents.y1 = pixman_fixed_to_int(traps[0].top);
3968	extents.y2 = pixman_fixed_to_int(traps[0].bottom + pixman_fixed_1_minus_e);
3969
3970	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n",
3971	     __FUNCTION__, src_x, src_y, dst_x, dst_y));
3972
3973	for (n = 1; n < ntrap; n++) {
3974		int x1 = pixman_fixed_to_int(traps[n].left.p1.x);
3975		int x2 = pixman_fixed_to_int(traps[n].right.p1.x + pixman_fixed_1_minus_e);
3976		int y1 = pixman_fixed_to_int(traps[n].top);
3977		int y2 = pixman_fixed_to_int(traps[n].bottom + pixman_fixed_1_minus_e);
3978
3979		if (x1 < extents.x1)
3980			extents.x1 = x1;
3981		if (x2 > extents.x2)
3982			extents.x2 = x2;
3983		if (y1 < extents.y1)
3984			extents.y1 = y1;
3985		if (y2 > extents.y2)
3986			extents.y2 = y2;
3987	}
3988
3989	DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__,
3990	     extents.x1, extents.y1, extents.x2, extents.y2));
3991
3992	if (!sna_compute_composite_region(&clip,
3993					  src, NULL, dst,
3994					  src_x + extents.x1 - dst_x,
3995					  src_y + extents.y1 - dst_y,
3996					  0, 0,
3997					  extents.x1, extents.y1,
3998					  extents.x2 - extents.x1,
3999					  extents.y2 - extents.y1)) {
4000		DBG(("%s: trapezoids do not intersect drawable clips\n",
4001		     __FUNCTION__)) ;
4002		return true;
4003	}
4004
4005	if (!sna->render.check_composite_spans(sna, op, src, dst,
4006					       clip.extents.x2 - clip.extents.x1,
4007					       clip.extents.y2 - clip.extents.y1,
4008					       COMPOSITE_SPANS_RECTILINEAR)) {
4009		DBG(("%s: fallback -- composite spans not supported\n",
4010		     __FUNCTION__));
4011		goto fallback;
4012	}
4013
4014	c = NULL;
4015	if (extents.x2 - extents.x1 > clip.extents.x2 - clip.extents.x1 ||
4016	    extents.y2 - extents.y1 > clip.extents.y2 - clip.extents.y1) {
4017		DBG(("%s: forcing clip\n", __FUNCTION__));
4018		c = &clip;
4019	}
4020
4021	extents = *RegionExtents(&clip);
4022	dx = dst->pDrawable->x;
4023	dy = dst->pDrawable->y;
4024
4025	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
4026	     __FUNCTION__,
4027	     extents.x1, extents.y1,
4028	     extents.x2, extents.y2,
4029	     dx, dy,
4030	     src_x + extents.x1 - dst_x - dx,
4031	     src_y + extents.y1 - dst_y - dy));
4032
4033	switch (op) {
4034	case PictOpAdd:
4035	case PictOpOver:
4036		priv = sna_pixmap(get_drawable_pixmap(dst->pDrawable));
4037		assert(priv != NULL);
4038		if (priv->clear && priv->clear_color == 0) {
4039			DBG(("%s: converting %d to PictOpSrc\n",
4040			     __FUNCTION__, op));
4041			op = PictOpSrc;
4042		}
4043		break;
4044	case PictOpIn:
4045		priv = sna_pixmap(get_drawable_pixmap(dst->pDrawable));
4046		assert(priv != NULL);
4047		if (priv->clear && priv->clear_color == 0) {
4048			DBG(("%s: clear destination using In, skipping\n",
4049			     __FUNCTION__));
4050			return true;
4051		}
4052		break;
4053	}
4054
4055	if (!sna->render.composite_spans(sna, op, src, dst,
4056					 src_x + extents.x1 - dst_x - dx,
4057					 src_y + extents.y1 - dst_y - dy,
4058					 extents.x1,  extents.y1,
4059					 extents.x2 - extents.x1,
4060					 extents.y2 - extents.y1,
4061					 COMPOSITE_SPANS_RECTILINEAR,
4062					 memset(&tmp, 0, sizeof(tmp)))) {
4063		DBG(("%s: composite spans render op not supported\n",
4064		     __FUNCTION__));
4065		REGION_UNINIT(NULL, &clip);
4066		goto fallback;
4067	}
4068
4069	for (n = 0; n < ntrap; n++)
4070		composite_unaligned_trap(sna, &tmp, &traps[n], dx, dy, c);
4071	tmp.done(sna, &tmp);
4072	REGION_UNINIT(NULL, &clip);
4073	return true;
4074}
4075
4076static inline int pixman_fixed_to_grid (pixman_fixed_t v)
4077{
4078	return (v + ((1<<(16-FAST_SAMPLES_shift))-1)/2) >> (16 - FAST_SAMPLES_shift);
4079}
4080
4081static inline bool
4082project_trapezoid_onto_grid(const xTrapezoid *in,
4083			    int dx, int dy,
4084			    xTrapezoid *out)
4085{
4086	__DBG(("%s: in: L:(%d, %d), (%d, %d); R:(%d, %d), (%d, %d), [%d, %d]\n",
4087	       __FUNCTION__,
4088	       in->left.p1.x, in->left.p1.y, in->left.p2.x, in->left.p2.y,
4089	       in->right.p1.x, in->right.p1.y, in->right.p2.x, in->right.p2.y,
4090	       in->top, in->bottom));
4091
4092	out->left.p1.x = dx + pixman_fixed_to_grid(in->left.p1.x);
4093	out->left.p1.y = dy + pixman_fixed_to_grid(in->left.p1.y);
4094	out->left.p2.x = dx + pixman_fixed_to_grid(in->left.p2.x);
4095	out->left.p2.y = dy + pixman_fixed_to_grid(in->left.p2.y);
4096
4097	out->right.p1.x = dx + pixman_fixed_to_grid(in->right.p1.x);
4098	out->right.p1.y = dy + pixman_fixed_to_grid(in->right.p1.y);
4099	out->right.p2.x = dx + pixman_fixed_to_grid(in->right.p2.x);
4100	out->right.p2.y = dy + pixman_fixed_to_grid(in->right.p2.y);
4101
4102	out->top = dy + pixman_fixed_to_grid(in->top);
4103	out->bottom = dy + pixman_fixed_to_grid(in->bottom);
4104
4105	__DBG(("%s: out: L:(%d, %d), (%d, %d); R:(%d, %d), (%d, %d), [%d, %d]\n",
4106	       __FUNCTION__,
4107	       out->left.p1.x, out->left.p1.y, out->left.p2.x, out->left.p2.y,
4108	       out->right.p1.x, out->right.p1.y, out->right.p2.x, out->right.p2.y,
4109	       out->top, out->bottom));
4110
4111	return xTrapezoidValid(out);
4112}
4113
4114static span_func_t
4115choose_span(struct sna_composite_spans_op *tmp,
4116	    PicturePtr dst,
4117	    PictFormatPtr maskFormat,
4118	    RegionPtr clip)
4119{
4120	span_func_t span;
4121
4122	if (is_mono(dst, maskFormat)) {
4123		/* XXX An imprecise approximation */
4124		if (maskFormat && !operator_is_bounded(tmp->base.op)) {
4125			span = tor_blt_span_mono_unbounded;
4126			if (clip->data)
4127				span = tor_blt_span_mono_unbounded_clipped;
4128		} else {
4129			span = tor_blt_span_mono;
4130			if (clip->data)
4131				span = tor_blt_span_mono_clipped;
4132		}
4133	} else {
4134		if (clip->data)
4135			span = tor_blt_span_clipped;
4136		else if (tmp->base.damage == NULL)
4137			span = tor_blt_span__no_damage;
4138		else
4139			span = tor_blt_span;
4140	}
4141
4142	return span;
4143}
4144
4145struct mono_span_thread {
4146	struct sna *sna;
4147	const xTrapezoid *traps;
4148	const struct sna_composite_op *op;
4149	RegionPtr clip;
4150	int ntrap;
4151	BoxRec extents;
4152	int dx, dy;
4153};
4154
4155static void
4156mono_span_thread(void *arg)
4157{
4158	struct mono_span_thread *thread = arg;
4159	struct mono mono;
4160	struct mono_span_thread_boxes boxes;
4161	const xTrapezoid *t;
4162	int n;
4163
4164	mono.sna = thread->sna;
4165
4166	mono.clip.extents = thread->extents;
4167	mono.clip.data = NULL;
4168	if (thread->clip->data) {
4169		RegionIntersect(&mono.clip, &mono.clip, thread->clip);
4170		if (RegionNil(&mono.clip))
4171			return;
4172	}
4173
4174	boxes.op = thread->op;
4175	boxes.num_boxes = 0;
4176	mono.op.priv = &boxes;
4177
4178	if (!mono_init(&mono, 2*thread->ntrap)) {
4179		RegionUninit(&mono.clip);
4180		return;
4181	}
4182
4183	for (n = thread->ntrap, t = thread->traps; n--; t++) {
4184		if (!xTrapezoidValid(t))
4185			continue;
4186
4187		if (pixman_fixed_to_int(t->top) + thread->dy >= thread->extents.y2 ||
4188		    pixman_fixed_to_int(t->bottom) + thread->dy <= thread->extents.y1)
4189			continue;
4190
4191		mono_add_line(&mono, thread->dx, thread->dy,
4192			      t->top, t->bottom,
4193			      &t->left.p1, &t->left.p2, 1);
4194		mono_add_line(&mono, thread->dx, thread->dy,
4195			      t->top, t->bottom,
4196			      &t->right.p1, &t->right.p2, -1);
4197	}
4198
4199	if (mono.clip.data == NULL)
4200		mono.span = thread_mono_span;
4201	else
4202		mono.span = thread_mono_span_clipped;
4203
4204	mono_render(&mono);
4205	mono_fini(&mono);
4206
4207	if (boxes.num_boxes)
4208		thread->op->thread_boxes(thread->sna, thread->op,
4209					 boxes.boxes, boxes.num_boxes);
4210	RegionUninit(&mono.clip);
4211}
4212
4213static bool
4214mono_trapezoids_span_converter(struct sna *sna,
4215			       CARD8 op, PicturePtr src, PicturePtr dst,
4216			       INT16 src_x, INT16 src_y,
4217			       int ntrap, xTrapezoid *traps)
4218{
4219	struct mono mono;
4220	BoxRec extents;
4221	int16_t dst_x, dst_y;
4222	int16_t dx, dy;
4223	bool unbounded;
4224	int num_threads, n;
4225
4226	if (NO_SCAN_CONVERTER)
4227		return false;
4228
4229	trapezoid_origin(&traps[0].left, &dst_x, &dst_y);
4230
4231	trapezoids_bounds(ntrap, traps, &extents);
4232	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
4233		return true;
4234
4235	DBG(("%s: extents (%d, %d), (%d, %d)\n",
4236	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
4237
4238	if (!sna_compute_composite_region(&mono.clip,
4239					  src, NULL, dst,
4240					  src_x + extents.x1 - dst_x,
4241					  src_y + extents.y1 - dst_y,
4242					  0, 0,
4243					  extents.x1, extents.y1,
4244					  extents.x2 - extents.x1,
4245					  extents.y2 - extents.y1)) {
4246		DBG(("%s: trapezoids do not intersect drawable clips\n",
4247		     __FUNCTION__)) ;
4248		return true;
4249	}
4250
4251	dx = dst->pDrawable->x;
4252	dy = dst->pDrawable->y;
4253
4254	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
4255	     __FUNCTION__,
4256	     mono.clip.extents.x1, mono.clip.extents.y1,
4257	     mono.clip.extents.x2, mono.clip.extents.y2,
4258	     dx, dy,
4259	     src_x + mono.clip.extents.x1 - dst_x - dx,
4260	     src_y + mono.clip.extents.y1 - dst_y - dy));
4261
4262	unbounded = (!sna_drawable_is_clear(dst->pDrawable) &&
4263		     !operator_is_bounded(op));
4264
4265	mono.sna = sna;
4266	if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst,
4267				       src_x + mono.clip.extents.x1 - dst_x - dx,
4268				       src_y + mono.clip.extents.y1 - dst_y - dy,
4269				       0, 0,
4270				       mono.clip.extents.x1,  mono.clip.extents.y1,
4271				       mono.clip.extents.x2 - mono.clip.extents.x1,
4272				       mono.clip.extents.y2 - mono.clip.extents.y1,
4273				       memset(&mono.op, 0, sizeof(mono.op))))
4274		return false;
4275
4276	num_threads = 1;
4277	if (!NO_GPU_THREADS &&
4278	    mono.op.thread_boxes &&
4279	    mono.op.damage == NULL &&
4280	    !unbounded)
4281		num_threads = sna_use_threads(mono.clip.extents.x2 - mono.clip.extents.x1,
4282					      mono.clip.extents.y2 - mono.clip.extents.y1,
4283					      32);
4284	if (num_threads > 1) {
4285		struct mono_span_thread threads[num_threads];
4286		int y, h;
4287
4288		DBG(("%s: using %d threads for mono span compositing %dx%d\n",
4289		     __FUNCTION__, num_threads,
4290		     mono.clip.extents.x2 - mono.clip.extents.x1,
4291		     mono.clip.extents.y2 - mono.clip.extents.y1));
4292
4293		threads[0].sna = mono.sna;
4294		threads[0].op = &mono.op;
4295		threads[0].traps = traps;
4296		threads[0].ntrap = ntrap;
4297		threads[0].extents = mono.clip.extents;
4298		threads[0].clip = &mono.clip;
4299		threads[0].dx = dx;
4300		threads[0].dy = dy;
4301
4302		y = extents.y1;
4303		h = extents.y2 - extents.y1;
4304		h = (h + num_threads - 1) / num_threads;
4305
4306		for (n = 1; n < num_threads; n++) {
4307			threads[n] = threads[0];
4308			threads[n].extents.y1 = y;
4309			threads[n].extents.y2 = y += h;
4310
4311			sna_threads_run(mono_span_thread, &threads[n]);
4312		}
4313
4314		threads[0].extents.y1 = y;
4315		threads[0].extents.y2 = extents.y2;
4316		mono_span_thread(&threads[0]);
4317
4318		sna_threads_wait();
4319		mono.op.done(mono.sna, &mono.op);
4320		return true;
4321	}
4322
4323	if (!mono_init(&mono, 2*ntrap))
4324		return false;
4325
4326	for (n = 0; n < ntrap; n++) {
4327		if (!xTrapezoidValid(&traps[n]))
4328			continue;
4329
4330		if (pixman_fixed_to_int(traps[n].top) + dy >= mono.clip.extents.y2 ||
4331		    pixman_fixed_to_int(traps[n].bottom) + dy < mono.clip.extents.y1)
4332			continue;
4333
4334		mono_add_line(&mono, dx, dy,
4335			      traps[n].top, traps[n].bottom,
4336			      &traps[n].left.p1, &traps[n].left.p2, 1);
4337		mono_add_line(&mono, dx, dy,
4338			      traps[n].top, traps[n].bottom,
4339			      &traps[n].right.p1, &traps[n].right.p2, -1);
4340	}
4341
4342	if (mono.clip.data == NULL && mono.op.damage == NULL)
4343		mono.span = mono_span__fast;
4344	else
4345		mono.span = mono_span;
4346
4347	mono_render(&mono);
4348	mono.op.done(mono.sna, &mono.op);
4349	mono_fini(&mono);
4350
4351	if (unbounded) {
4352		xPointFixed p1, p2;
4353
4354		if (!mono_init(&mono, 2+2*ntrap))
4355			return false;
4356
4357		p1.y = mono.clip.extents.y1 * pixman_fixed_1;
4358		p2.y = mono.clip.extents.y2 * pixman_fixed_1;
4359
4360		p1.x = mono.clip.extents.x1 * pixman_fixed_1;
4361		p2.x = mono.clip.extents.x1 * pixman_fixed_1;
4362		mono_add_line(&mono, 0, 0, p1.y, p2.y, &p1, &p2, -1);
4363
4364		p1.x = mono.clip.extents.x2 * pixman_fixed_1;
4365		p2.x = mono.clip.extents.x2 * pixman_fixed_1;
4366		mono_add_line(&mono, 0, 0, p1.y, p2.y, &p1, &p2, 1);
4367
4368		for (n = 0; n < ntrap; n++) {
4369			if (!xTrapezoidValid(&traps[n]))
4370				continue;
4371
4372			if (pixman_fixed_to_int(traps[n].top) + dy >= mono.clip.extents.y2 ||
4373			    pixman_fixed_to_int(traps[n].bottom) + dy < mono.clip.extents.y1)
4374				continue;
4375
4376			mono_add_line(&mono, dx, dy,
4377				      traps[n].top, traps[n].bottom,
4378				      &traps[n].left.p1, &traps[n].left.p2, 1);
4379			mono_add_line(&mono, dx, dy,
4380				      traps[n].top, traps[n].bottom,
4381				      &traps[n].right.p1, &traps[n].right.p2, -1);
4382		}
4383		memset(&mono.op, 0, sizeof(mono.op));
4384		if (mono.sna->render.composite(mono.sna,
4385					       PictOpClear,
4386					       mono.sna->clear, NULL, dst,
4387					       0, 0,
4388					       0, 0,
4389					       mono.clip.extents.x1,  mono.clip.extents.y1,
4390					       mono.clip.extents.x2 - mono.clip.extents.x1,
4391					       mono.clip.extents.y2 - mono.clip.extents.y1,
4392					       &mono.op)) {
4393			mono_render(&mono);
4394			mono.op.done(mono.sna, &mono.op);
4395		}
4396		mono_fini(&mono);
4397	}
4398
4399	REGION_UNINIT(NULL, &mono.clip);
4400	return true;
4401}
4402
4403struct span_thread {
4404	struct sna *sna;
4405	const struct sna_composite_spans_op *op;
4406	const xTrapezoid *traps;
4407	RegionPtr clip;
4408	span_func_t span;
4409	BoxRec extents;
4410	int dx, dy, draw_y;
4411	int ntrap;
4412	bool unbounded;
4413};
4414
4415#define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
4416struct span_thread_boxes {
4417	const struct sna_composite_spans_op *op;
4418	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
4419	int num_boxes;
4420};
4421
4422static void span_thread_add_boxes(struct sna *sna, void *data,
4423				  const BoxRec *box, int count, float alpha)
4424{
4425	struct span_thread_boxes *b = data;
4426
4427	__DBG(("%s: adding %d boxes with alpha=%f\n",
4428	       __FUNCTION__, count, alpha));
4429
4430	assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES);
4431	if (unlikely(b->num_boxes + count > SPAN_THREAD_MAX_BOXES)) {
4432		DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count));
4433		assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
4434		b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
4435		b->num_boxes = 0;
4436	}
4437
4438	do {
4439		b->boxes[b->num_boxes].box = *box++;
4440		b->boxes[b->num_boxes].alpha = alpha;
4441		b->num_boxes++;
4442	} while (--count);
4443	assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
4444}
4445
4446static void
4447span_thread_box(struct sna *sna,
4448		struct sna_composite_spans_op *op,
4449		pixman_region16_t *clip,
4450		const BoxRec *box,
4451		int coverage)
4452{
4453	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
4454	span_thread_add_boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage));
4455}
4456
4457static void
4458span_thread_clipped_box(struct sna *sna,
4459			struct sna_composite_spans_op *op,
4460			pixman_region16_t *clip,
4461			const BoxRec *box,
4462			int coverage)
4463{
4464	pixman_region16_t region;
4465
4466	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
4467	       AREA_TO_ALPHA(coverage)));
4468
4469	pixman_region_init_rects(&region, box, 1);
4470	RegionIntersect(&region, &region, clip);
4471	if (REGION_NUM_RECTS(&region)) {
4472		span_thread_add_boxes(sna, op,
4473				      REGION_RECTS(&region),
4474				      REGION_NUM_RECTS(&region),
4475				      AREA_TO_ALPHA(coverage));
4476	}
4477	pixman_region_fini(&region);
4478}
4479
4480static span_func_t
4481thread_choose_span(struct sna_composite_spans_op *tmp,
4482		   PicturePtr dst,
4483		   PictFormatPtr maskFormat,
4484		   RegionPtr clip)
4485{
4486	span_func_t span;
4487
4488	if (tmp->base.damage) {
4489		DBG(("%s: damaged -> no thread support\n", __FUNCTION__));
4490		return NULL;
4491	}
4492
4493	if (is_mono(dst, maskFormat)) {
4494		DBG(("%s: mono rendering -> no thread support\n", __FUNCTION__));
4495		return NULL;
4496	} else {
4497		assert(tmp->thread_boxes);
4498		DBG(("%s: clipped? %d\n", __FUNCTION__, clip->data != NULL));
4499		if (clip->data)
4500			span = span_thread_clipped_box;
4501		else
4502			span = span_thread_box;
4503	}
4504
4505	return span;
4506}
4507
4508static void
4509span_thread(void *arg)
4510{
4511	struct span_thread *thread = arg;
4512	struct span_thread_boxes boxes;
4513	struct tor tor;
4514	const xTrapezoid *t;
4515	int n, y1, y2;
4516
4517	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
4518		return;
4519
4520	boxes.op = thread->op;
4521	boxes.num_boxes = 0;
4522
4523	y1 = thread->extents.y1 - thread->draw_y;
4524	y2 = thread->extents.y2 - thread->draw_y;
4525	for (n = thread->ntrap, t = thread->traps; n--; t++) {
4526		xTrapezoid tt;
4527
4528		if (pixman_fixed_to_int(t->top) >= y2 ||
4529		    pixman_fixed_to_int(t->bottom) < y1)
4530			continue;
4531
4532		if (!project_trapezoid_onto_grid(t, thread->dx, thread->dy, &tt))
4533			continue;
4534
4535		tor_add_edge(&tor, &tt, &tt.left, 1);
4536		tor_add_edge(&tor, &tt, &tt.right, -1);
4537	}
4538
4539	tor_render(thread->sna, &tor,
4540		   (struct sna_composite_spans_op *)&boxes, thread->clip,
4541		   thread->span, thread->unbounded);
4542
4543	tor_fini(&tor);
4544
4545	if (boxes.num_boxes) {
4546		DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
4547		assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
4548		thread->op->thread_boxes(thread->sna, thread->op,
4549					 boxes.boxes, boxes.num_boxes);
4550	}
4551}
4552
4553static bool
4554trapezoid_span_converter(struct sna *sna,
4555			 CARD8 op, PicturePtr src, PicturePtr dst,
4556			 PictFormatPtr maskFormat, unsigned int flags,
4557			 INT16 src_x, INT16 src_y,
4558			 int ntrap, xTrapezoid *traps)
4559{
4560	struct sna_composite_spans_op tmp;
4561	BoxRec extents;
4562	pixman_region16_t clip;
4563	int16_t dst_x, dst_y;
4564	bool was_clear;
4565	int dx, dy, n;
4566	int num_threads;
4567
4568	if (NO_SCAN_CONVERTER)
4569		return false;
4570
4571	if (is_mono(dst, maskFormat))
4572		return mono_trapezoids_span_converter(sna, op, src, dst,
4573						      src_x, src_y,
4574						      ntrap, traps);
4575
4576	/* XXX strict adherence to the Render specification */
4577	if (dst->polyMode == PolyModePrecise) {
4578		DBG(("%s: fallback -- precise rasterisation requested\n",
4579		     __FUNCTION__));
4580		return false;
4581	}
4582
4583	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, flags)) {
4584		DBG(("%s: fallback -- composite spans not supported\n",
4585		     __FUNCTION__));
4586		return false;
4587	}
4588
4589	trapezoid_origin(&traps[0].left, &dst_x, &dst_y);
4590
4591	trapezoids_bounds(ntrap, traps, &extents);
4592	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
4593		return true;
4594
4595#if 0
4596	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
4597		DBG(("%s: fallback -- traps extents too small %dx%d\n",
4598		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
4599		return false;
4600	}
4601#endif
4602
4603	DBG(("%s: extents (%d, %d), (%d, %d)\n",
4604	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
4605
4606	if (!sna_compute_composite_region(&clip,
4607					  src, NULL, dst,
4608					  src_x + extents.x1 - dst_x,
4609					  src_y + extents.y1 - dst_y,
4610					  0, 0,
4611					  extents.x1, extents.y1,
4612					  extents.x2 - extents.x1,
4613					  extents.y2 - extents.y1)) {
4614		DBG(("%s: trapezoids do not intersect drawable clips\n",
4615		     __FUNCTION__)) ;
4616		return true;
4617	}
4618
4619	if (!sna->render.check_composite_spans(sna, op, src, dst,
4620					       clip.extents.x2 - clip.extents.x1,
4621					       clip.extents.y2 - clip.extents.y1,
4622					       flags)) {
4623		DBG(("%s: fallback -- composite spans not supported\n",
4624		     __FUNCTION__));
4625		return false;
4626	}
4627
4628	extents = *RegionExtents(&clip);
4629	dx = dst->pDrawable->x;
4630	dy = dst->pDrawable->y;
4631
4632	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
4633	     __FUNCTION__,
4634	     extents.x1, extents.y1,
4635	     extents.x2, extents.y2,
4636	     dx, dy,
4637	     src_x + extents.x1 - dst_x - dx,
4638	     src_y + extents.y1 - dst_y - dy));
4639
4640	was_clear = sna_drawable_is_clear(dst->pDrawable);
4641	switch (op) {
4642	case PictOpAdd:
4643	case PictOpOver:
4644		if (was_clear)
4645			op = PictOpSrc;
4646		break;
4647	case PictOpIn:
4648		if (was_clear)
4649			return true;
4650		break;
4651	}
4652
4653	memset(&tmp, 0, sizeof(tmp));
4654	if (!sna->render.composite_spans(sna, op, src, dst,
4655					 src_x + extents.x1 - dst_x - dx,
4656					 src_y + extents.y1 - dst_y - dy,
4657					 extents.x1,  extents.y1,
4658					 extents.x2 - extents.x1,
4659					 extents.y2 - extents.y1,
4660					 flags, &tmp)) {
4661		DBG(("%s: fallback -- composite spans render op not supported\n",
4662		     __FUNCTION__));
4663		return false;
4664	}
4665
4666	dx *= FAST_SAMPLES_X;
4667	dy *= FAST_SAMPLES_Y;
4668
4669	num_threads = 1;
4670	if (!NO_GPU_THREADS && tmp.thread_boxes &&
4671	    thread_choose_span(&tmp, dst, maskFormat, &clip))
4672		num_threads = sna_use_threads(extents.x2-extents.x1,
4673					      extents.y2-extents.y1,
4674					      16);
4675	DBG(("%s: using %d threads\n", __FUNCTION__, num_threads));
4676	if (num_threads == 1) {
4677		struct tor tor;
4678
4679		if (!tor_init(&tor, &extents, 2*ntrap))
4680			goto skip;
4681
4682		for (n = 0; n < ntrap; n++) {
4683			xTrapezoid t;
4684
4685			if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
4686				continue;
4687
4688			if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 ||
4689			    pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1)
4690				continue;
4691
4692			tor_add_edge(&tor, &t, &t.left, 1);
4693			tor_add_edge(&tor, &t, &t.right, -1);
4694		}
4695
4696		tor_render(sna, &tor, &tmp, &clip,
4697			   choose_span(&tmp, dst, maskFormat, &clip),
4698			   !was_clear && maskFormat && !operator_is_bounded(op));
4699
4700		tor_fini(&tor);
4701	} else {
4702		struct span_thread threads[num_threads];
4703		int y, h;
4704
4705		DBG(("%s: using %d threads for span compositing %dx%d\n",
4706		     __FUNCTION__, num_threads,
4707		     extents.x2 - extents.x1,
4708		     extents.y2 - extents.y1));
4709
4710		threads[0].sna = sna;
4711		threads[0].op = &tmp;
4712		threads[0].traps = traps;
4713		threads[0].ntrap = ntrap;
4714		threads[0].extents = extents;
4715		threads[0].clip = &clip;
4716		threads[0].dx = dx;
4717		threads[0].dy = dy;
4718		threads[0].draw_y = dst->pDrawable->y;
4719		threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
4720		threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
4721
4722		y = extents.y1;
4723		h = extents.y2 - extents.y1;
4724		h = (h + num_threads - 1) / num_threads;
4725
4726		for (n = 1; n < num_threads; n++) {
4727			threads[n] = threads[0];
4728			threads[n].extents.y1 = y;
4729			threads[n].extents.y2 = y += h;
4730
4731			sna_threads_run(span_thread, &threads[n]);
4732		}
4733
4734		threads[0].extents.y1 = y;
4735		threads[0].extents.y2 = extents.y2;
4736		span_thread(&threads[0]);
4737
4738		sna_threads_wait();
4739	}
4740skip:
4741	tmp.done(sna, &tmp);
4742
4743	REGION_UNINIT(NULL, &clip);
4744	return true;
4745}
4746
4747static void
4748tor_blt_mask(struct sna *sna,
4749	     struct sna_composite_spans_op *op,
4750	     pixman_region16_t *clip,
4751	     const BoxRec *box,
4752	     int coverage)
4753{
4754	uint8_t *ptr = (uint8_t *)op;
4755	int stride = (intptr_t)clip;
4756	int h, w;
4757
4758	coverage = 256 * coverage / FAST_SAMPLES_XY;
4759	coverage -= coverage >> 8;
4760
4761	ptr += box->y1 * stride + box->x1;
4762
4763	h = box->y2 - box->y1;
4764	w = box->x2 - box->x1;
4765	if ((w | h) == 1) {
4766		*ptr = coverage;
4767	} else if (w == 1) {
4768		do {
4769			*ptr = coverage;
4770			ptr += stride;
4771		} while (--h);
4772	} else do {
4773		memset(ptr, coverage, w);
4774		ptr += stride;
4775	} while (--h);
4776}
4777
4778static void
4779tor_blt_mask_mono(struct sna *sna,
4780		  struct sna_composite_spans_op *op,
4781		  pixman_region16_t *clip,
4782		  const BoxRec *box,
4783		  int coverage)
4784{
4785	tor_blt_mask(sna, op, clip, box,
4786		     coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
4787}
4788
4789static bool
4790trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
4791			 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
4792			 int ntrap, xTrapezoid *traps)
4793{
4794	struct tor tor;
4795	ScreenPtr screen = dst->pDrawable->pScreen;
4796	PixmapPtr scratch;
4797	PicturePtr mask;
4798	BoxRec extents;
4799	int16_t dst_x, dst_y;
4800	int dx, dy;
4801	int error, n;
4802
4803	if (NO_SCAN_CONVERTER)
4804		return false;
4805
4806	if (dst->polyMode == PolyModePrecise && !is_mono(dst, maskFormat)) {
4807		DBG(("%s: fallback -- precise rasterisation requested\n",
4808		     __FUNCTION__));
4809		return false;
4810	}
4811
4812	if (maskFormat == NULL && ntrap > 1) {
4813		DBG(("%s: individual rasterisation requested\n",
4814		     __FUNCTION__));
4815		do {
4816			/* XXX unwind errors? */
4817			if (!trapezoid_mask_converter(op, src, dst, NULL,
4818						 src_x, src_y, 1, traps++))
4819				return false;
4820		} while (--ntrap);
4821		return true;
4822	}
4823
4824	trapezoids_bounds(ntrap, traps, &extents);
4825	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
4826		return true;
4827
4828	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
4829	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
4830
4831	if (!sna_compute_composite_extents(&extents,
4832					   src, NULL, dst,
4833					   src_x, src_y,
4834					   0, 0,
4835					   extents.x1, extents.y1,
4836					   extents.x2 - extents.x1,
4837					   extents.y2 - extents.y1))
4838		return true;
4839
4840	DBG(("%s: extents (%d, %d), (%d, %d)\n",
4841	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
4842
4843	extents.y2 -= extents.y1;
4844	extents.x2 -= extents.x1;
4845	extents.x1 -= dst->pDrawable->x;
4846	extents.y1 -= dst->pDrawable->y;
4847	dst_x = extents.x1;
4848	dst_y = extents.y1;
4849	dx = -extents.x1 * FAST_SAMPLES_X;
4850	dy = -extents.y1 * FAST_SAMPLES_Y;
4851	extents.x1 = extents.y1 = 0;
4852
4853	DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
4854	     __FUNCTION__, extents.x2, extents.y2, dx, dy));
4855	scratch = sna_pixmap_create_upload(screen,
4856					   extents.x2, extents.y2, 8,
4857					   KGEM_BUFFER_WRITE_INPLACE);
4858	if (!scratch)
4859		return true;
4860
4861	DBG(("%s: created buffer %p, stride %d\n",
4862	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
4863
4864	if (!tor_init(&tor, &extents, 2*ntrap)) {
4865		sna_pixmap_destroy(scratch);
4866		return true;
4867	}
4868
4869	for (n = 0; n < ntrap; n++) {
4870		xTrapezoid t;
4871
4872		if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
4873			continue;
4874
4875		if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
4876		    pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
4877			continue;
4878
4879		tor_add_edge(&tor, &t, &t.left, 1);
4880		tor_add_edge(&tor, &t, &t.right, -1);
4881	}
4882
4883	if (extents.x2 <= TOR_INPLACE_SIZE) {
4884		uint8_t buf[TOR_INPLACE_SIZE];
4885		tor_inplace(&tor, scratch, is_mono(dst, maskFormat),
4886			    scratch->usage_hint ? NULL : buf);
4887	} else {
4888		tor_render(NULL, &tor,
4889			   scratch->devPrivate.ptr,
4890			   (void *)(intptr_t)scratch->devKind,
4891			   is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
4892			   true);
4893	}
4894	tor_fini(&tor);
4895
4896	mask = CreatePicture(0, &scratch->drawable,
4897			     PictureMatchFormat(screen, 8, PICT_a8),
4898			     0, 0, serverClient, &error);
4899	if (mask) {
4900		CompositePicture(op, src, mask, dst,
4901				 src_x + dst_x - pixman_fixed_to_int(traps[0].left.p1.x),
4902				 src_y + dst_y - pixman_fixed_to_int(traps[0].left.p1.y),
4903				 0, 0,
4904				 dst_x, dst_y,
4905				 extents.x2, extents.y2);
4906		FreePicture(mask, 0);
4907	}
4908	sna_pixmap_destroy(scratch);
4909
4910	return true;
4911}
4912
4913struct inplace {
4914	uint32_t stride;
4915	uint8_t *ptr;
4916	union {
4917		uint8_t opacity;
4918		uint32_t color;
4919	};
4920};
4921
4922static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity)
4923{
4924	coverage = coverage * 256 / FAST_SAMPLES_XY;
4925	coverage -= coverage >> 8;
4926	return opacity == 255 ? coverage : mul_8_8(coverage, opacity);
4927}
4928
4929static void
4930tor_blt_src(struct sna *sna,
4931	    struct sna_composite_spans_op *op,
4932	    pixman_region16_t *clip,
4933	    const BoxRec *box,
4934	    int coverage)
4935{
4936	struct inplace *in = (struct inplace *)op;
4937	uint8_t *ptr = in->ptr;
4938	int h, w;
4939
4940	coverage = coverage_opacity(coverage, in->opacity);
4941
4942	ptr += box->y1 * in->stride + box->x1;
4943
4944	h = box->y2 - box->y1;
4945	w = box->x2 - box->x1;
4946	if ((w | h) == 1) {
4947		*ptr = coverage;
4948	} else if (w == 1) {
4949		do {
4950			*ptr = coverage;
4951			ptr += in->stride;
4952		} while (--h);
4953	} else do {
4954		memset(ptr, coverage, w);
4955		ptr += in->stride;
4956	} while (--h);
4957}
4958
4959static void
4960tor_blt_src_clipped(struct sna *sna,
4961		    struct sna_composite_spans_op *op,
4962		    pixman_region16_t *clip,
4963		    const BoxRec *box,
4964		    int coverage)
4965{
4966	pixman_region16_t region;
4967	int n;
4968
4969	pixman_region_init_rects(&region, box, 1);
4970	RegionIntersect(&region, &region, clip);
4971	n = REGION_NUM_RECTS(&region);
4972	box = REGION_RECTS(&region);
4973	while (n--)
4974		tor_blt_src(sna, op, NULL, box++, coverage);
4975	pixman_region_fini(&region);
4976}
4977
4978static void
4979tor_blt_in(struct sna *sna,
4980	   struct sna_composite_spans_op *op,
4981	   pixman_region16_t *clip,
4982	   const BoxRec *box,
4983	   int coverage)
4984{
4985	struct inplace *in = (struct inplace *)op;
4986	uint8_t *ptr = in->ptr;
4987	int h, w, i;
4988
4989	if (coverage == 0) {
4990		tor_blt_src(sna, op, clip, box, 0);
4991		return;
4992	}
4993
4994	coverage = coverage_opacity(coverage, in->opacity);
4995	if (coverage == 0xff)
4996		return;
4997
4998	ptr += box->y1 * in->stride + box->x1;
4999
5000	h = box->y2 - box->y1;
5001	w = box->x2 - box->x1;
5002	do {
5003		for (i = 0; i < w; i++)
5004			ptr[i] = mul_8_8(ptr[i], coverage);
5005		ptr += in->stride;
5006	} while (--h);
5007}
5008
5009static void
5010tor_blt_in_clipped(struct sna *sna,
5011		   struct sna_composite_spans_op *op,
5012		   pixman_region16_t *clip,
5013		   const BoxRec *box,
5014		   int coverage)
5015{
5016	pixman_region16_t region;
5017	int n;
5018
5019	pixman_region_init_rects(&region, box, 1);
5020	RegionIntersect(&region, &region, clip);
5021	n = REGION_NUM_RECTS(&region);
5022	box = REGION_RECTS(&region);
5023	while (n--)
5024		tor_blt_in(sna, op, NULL, box++, coverage);
5025	pixman_region_fini(&region);
5026}
5027
5028static void
5029tor_blt_add(struct sna *sna,
5030	    struct sna_composite_spans_op *op,
5031	    pixman_region16_t *clip,
5032	    const BoxRec *box,
5033	    int coverage)
5034{
5035	struct inplace *in = (struct inplace *)op;
5036	uint8_t *ptr = in->ptr;
5037	int h, w, v, i;
5038
5039	if (coverage == 0)
5040		return;
5041
5042	coverage = coverage_opacity(coverage, in->opacity);
5043	if (coverage == 0xff) {
5044		tor_blt_src(sna, op, clip, box, 0xff);
5045		return;
5046	}
5047
5048	ptr += box->y1 * in->stride + box->x1;
5049
5050	h = box->y2 - box->y1;
5051	w = box->x2 - box->x1;
5052	if ((w | h) == 1) {
5053		v = coverage + *ptr;
5054		*ptr = v >= 255 ? 255 : v;
5055	} else {
5056		do {
5057			for (i = 0; i < w; i++) {
5058				v = coverage + ptr[i];
5059				ptr[i] = v >= 255 ? 255 : v;
5060			}
5061			ptr += in->stride;
5062		} while (--h);
5063	}
5064}
5065
5066static void
5067tor_blt_add_clipped(struct sna *sna,
5068		    struct sna_composite_spans_op *op,
5069		    pixman_region16_t *clip,
5070		    const BoxRec *box,
5071		    int coverage)
5072{
5073	pixman_region16_t region;
5074	int n;
5075
5076	pixman_region_init_rects(&region, box, 1);
5077	RegionIntersect(&region, &region, clip);
5078	n = REGION_NUM_RECTS(&region);
5079	box = REGION_RECTS(&region);
5080	while (n--)
5081		tor_blt_add(sna, op, NULL, box++, coverage);
5082	pixman_region_fini(&region);
5083}
5084
5085static void
5086tor_blt_lerp32(struct sna *sna,
5087	       struct sna_composite_spans_op *op,
5088	       pixman_region16_t *clip,
5089	       const BoxRec *box,
5090	       int coverage)
5091{
5092	struct inplace *in = (struct inplace *)op;
5093	uint32_t *ptr = (uint32_t *)in->ptr;
5094	int stride = in->stride / sizeof(uint32_t);
5095	int h, w, i;
5096
5097	if (coverage == 0)
5098		return;
5099
5100	ptr += box->y1 * stride + box->x1;
5101
5102	h = box->y2 - box->y1;
5103	w = box->x2 - box->x1;
5104	if (coverage == FAST_SAMPLES_XY) {
5105		if ((w | h) == 1) {
5106			*ptr = in->color;
5107		} else {
5108			if (w < 16) {
5109				do {
5110					for (i = 0; i < w; i++)
5111						ptr[i] = in->color;
5112					ptr += stride;
5113				} while (--h);
5114			} else {
5115				pixman_fill(ptr, stride, 32,
5116					    0, 0, w, h, in->color);
5117			}
5118		}
5119	} else {
5120		coverage = coverage * 256 / FAST_SAMPLES_XY;
5121		coverage -= coverage >> 8;
5122
5123		if ((w | h) == 1) {
5124			*ptr = lerp8x4(in->color, coverage, *ptr);
5125		} else if (w == 1) {
5126			do {
5127				*ptr = lerp8x4(in->color, coverage, *ptr);
5128				ptr += stride;
5129			} while (--h);
5130		} else{
5131			do {
5132				for (i = 0; i < w; i++)
5133					ptr[i] = lerp8x4(in->color, coverage, ptr[i]);
5134				ptr += stride;
5135			} while (--h);
5136		}
5137	}
5138}
5139
5140static void
5141tor_blt_lerp32_clipped(struct sna *sna,
5142		       struct sna_composite_spans_op *op,
5143		       pixman_region16_t *clip,
5144		       const BoxRec *box,
5145		       int coverage)
5146{
5147	pixman_region16_t region;
5148	int n;
5149
5150	pixman_region_init_rects(&region, box, 1);
5151	RegionIntersect(&region, &region, clip);
5152	n = REGION_NUM_RECTS(&region);
5153	box = REGION_RECTS(&region);
5154	while (n--)
5155		tor_blt_lerp32(sna, op, NULL, box++, coverage);
5156	pixman_region_fini(&region);
5157}
5158
5159struct mono_inplace_composite {
5160	pixman_image_t *src, *dst;
5161	int dx, dy;
5162	int sx, sy;
5163	int op;
5164};
5165struct mono_inplace_fill {
5166	uint32_t *data, stride;
5167	uint32_t color;
5168	int bpp;
5169};
5170
5171fastcall static void
5172mono_inplace_fill_box(struct sna *sna,
5173		      const struct sna_composite_op *op,
5174		      const BoxRec *box)
5175{
5176	struct mono_inplace_fill *fill = op->priv;
5177
5178	DBG(("(%s: (%d, %d)x(%d, %d):%08x\n",
5179	     __FUNCTION__,
5180	     box->x1, box->y1,
5181	     box->x2 - box->x1,
5182	     box->y2 - box->y1,
5183	     fill->color));
5184	pixman_fill(fill->data, fill->stride, fill->bpp,
5185		    box->x1, box->y1,
5186		    box->x2 - box->x1,
5187		    box->y2 - box->y1,
5188		    fill->color);
5189}
5190
5191static void
5192mono_inplace_fill_boxes(struct sna *sna,
5193			const struct sna_composite_op *op,
5194			const BoxRec *box, int nbox)
5195{
5196	struct mono_inplace_fill *fill = op->priv;
5197
5198	do {
5199		DBG(("(%s: (%d, %d)x(%d, %d):%08x\n",
5200		     __FUNCTION__,
5201		     box->x1, box->y1,
5202		     box->x2 - box->x1,
5203		     box->y2 - box->y1,
5204		     fill->color));
5205		pixman_fill(fill->data, fill->stride, fill->bpp,
5206			    box->x1, box->y1,
5207			    box->x2 - box->x1,
5208			    box->y2 - box->y1,
5209			    fill->color);
5210		box++;
5211	} while (--nbox);
5212}
5213
5214fastcall static void
5215mono_inplace_composite_box(struct sna *sna,
5216			   const struct sna_composite_op *op,
5217			   const BoxRec *box)
5218{
5219	struct mono_inplace_composite *c = op->priv;
5220
5221	pixman_image_composite(c->op, c->src, NULL, c->dst,
5222			       box->x1 + c->sx, box->y1 + c->sy,
5223			       0, 0,
5224			       box->x1 + c->dx, box->y1 + c->dy,
5225			       box->x2 - box->x1,
5226			       box->y2 - box->y1);
5227}
5228
5229static void
5230mono_inplace_composite_boxes(struct sna *sna,
5231			     const struct sna_composite_op *op,
5232			     const BoxRec *box, int nbox)
5233{
5234	struct mono_inplace_composite *c = op->priv;
5235
5236	do {
5237		pixman_image_composite(c->op, c->src, NULL, c->dst,
5238				       box->x1 + c->sx, box->y1 + c->sy,
5239				       0, 0,
5240				       box->x1 + c->dx, box->y1 + c->dy,
5241				       box->x2 - box->x1,
5242				       box->y2 - box->y1);
5243		box++;
5244	} while (--nbox);
5245}
5246
5247static bool
5248trapezoid_spans_maybe_inplace(struct sna *sna,
5249			      CARD8 op, PicturePtr src, PicturePtr dst,
5250			      PictFormatPtr maskFormat)
5251{
5252	struct sna_pixmap *priv;
5253
5254	if (NO_SCAN_CONVERTER)
5255		return false;
5256
5257	if (dst->polyMode == PolyModePrecise && !is_mono(dst, maskFormat))
5258		return false;
5259	if (dst->alphaMap)
5260		return false;
5261
5262	if (is_mono(dst, maskFormat))
5263		goto out;
5264
5265	switch ((int)dst->format) {
5266	case PICT_a8:
5267		if (!sna_picture_is_solid(src, NULL))
5268			return false;
5269
5270		switch (op) {
5271		case PictOpIn:
5272		case PictOpAdd:
5273		case PictOpSrc:
5274			break;
5275		default:
5276			return false;
5277		}
5278		break;
5279
5280	case PICT_x8r8g8b8:
5281	case PICT_a8r8g8b8:
5282		if (picture_is_gpu(sna, src))
5283			return false;
5284
5285		switch (op) {
5286		case PictOpOver:
5287		case PictOpAdd:
5288		case PictOpOutReverse:
5289			break;
5290		case PictOpSrc:
5291			if (sna_picture_is_solid(src, NULL))
5292				break;
5293
5294			if (!sna_drawable_is_clear(dst->pDrawable))
5295				return false;
5296			break;
5297		default:
5298			return false;
5299		}
5300		break;
5301	default:
5302		return false;
5303	}
5304
5305out:
5306	priv = sna_pixmap_from_drawable(dst->pDrawable);
5307	if (priv == NULL)
5308		return true;
5309
5310	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
5311		return false;
5312
5313	if (DAMAGE_IS_ALL(priv->cpu_damage) || priv->gpu_damage == NULL)
5314		return true;
5315
5316	if (priv->clear)
5317		return dst->pDrawable->width <= TOR_INPLACE_SIZE;
5318
5319	if (kgem_bo_is_busy(priv->gpu_bo))
5320		return false;
5321
5322	if (priv->cpu_damage)
5323		return true;
5324
5325	return dst->pDrawable->width <= TOR_INPLACE_SIZE;
5326}
5327
5328static bool
5329trapezoid_span_mono_inplace(struct sna *sna,
5330			    CARD8 op,
5331			    PicturePtr src,
5332			    PicturePtr dst,
5333			    INT16 src_x, INT16 src_y,
5334			    int ntrap, xTrapezoid *traps)
5335{
5336	struct mono mono;
5337	union {
5338		struct mono_inplace_fill fill;
5339		struct mono_inplace_composite composite;
5340	} inplace;
5341	int was_clear;
5342	int x, y, n;
5343
5344	trapezoids_bounds(ntrap, traps, &mono.clip.extents);
5345	if (mono.clip.extents.y1 >= mono.clip.extents.y2 ||
5346	    mono.clip.extents.x1 >= mono.clip.extents.x2)
5347		return true;
5348
5349	DBG(("%s: extents (%d, %d), (%d, %d)\n",
5350	     __FUNCTION__,
5351	     mono.clip.extents.x1, mono.clip.extents.y1,
5352	     mono.clip.extents.x2, mono.clip.extents.y2));
5353
5354	if (!sna_compute_composite_region(&mono.clip,
5355					  src, NULL, dst,
5356					  src_x, src_y,
5357					  0, 0,
5358					  mono.clip.extents.x1, mono.clip.extents.y1,
5359					  mono.clip.extents.x2 - mono.clip.extents.x1,
5360					  mono.clip.extents.y2 - mono.clip.extents.y1)) {
5361		DBG(("%s: trapezoids do not intersect drawable clips\n",
5362		     __FUNCTION__)) ;
5363		return true;
5364	}
5365
5366	DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
5367	     __FUNCTION__,
5368	     mono.clip.extents.x1, mono.clip.extents.y1,
5369	     mono.clip.extents.x2, mono.clip.extents.y2));
5370
5371	was_clear = sna_drawable_is_clear(dst->pDrawable);
5372	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &mono.clip,
5373					     MOVE_WRITE | MOVE_READ))
5374		return true;
5375
5376	mono.sna = sna;
5377	if (!mono_init(&mono, 2*ntrap))
5378		return false;
5379
5380	mono.op.damage = NULL;
5381
5382	x = dst->pDrawable->x;
5383	y = dst->pDrawable->y;
5384
5385	for (n = 0; n < ntrap; n++) {
5386		if (!xTrapezoidValid(&traps[n]))
5387			continue;
5388
5389		if (pixman_fixed_to_int(traps[n].top) + y >= mono.clip.extents.y2 ||
5390		    pixman_fixed_to_int(traps[n].bottom) + y < mono.clip.extents.y1)
5391			continue;
5392
5393		mono_add_line(&mono, x, y,
5394			      traps[n].top, traps[n].bottom,
5395			      &traps[n].left.p1, &traps[n].left.p2, 1);
5396		mono_add_line(&mono, x, y,
5397			      traps[n].top, traps[n].bottom,
5398			      &traps[n].right.p1, &traps[n].right.p2, -1);
5399	}
5400
5401	if (sna_picture_is_solid(src, &inplace.fill.color) &&
5402	    (op == PictOpSrc || op == PictOpClear ||
5403	     (was_clear && (op == PictOpOver || op == PictOpAdd)) ||
5404	     (op == PictOpOver && inplace.fill.color >> 24 == 0xff))) {
5405		PixmapPtr pixmap;
5406		int16_t dx, dy;
5407		uint8_t *ptr;
5408
5409unbounded_pass:
5410		pixmap = get_drawable_pixmap(dst->pDrawable);
5411
5412		ptr = pixmap->devPrivate.ptr;
5413		if (get_drawable_deltas(dst->pDrawable, pixmap, &dx, &dy))
5414			ptr += dy * pixmap->devKind + dx * pixmap->drawable.bitsPerPixel / 8;
5415		inplace.fill.data = (uint32_t *)ptr;
5416		inplace.fill.stride = pixmap->devKind / sizeof(uint32_t);
5417		inplace.fill.bpp = pixmap->drawable.bitsPerPixel;
5418
5419		if (op == PictOpClear)
5420			inplace.fill.color = 0;
5421		else if (dst->format != PICT_a8r8g8b8)
5422			inplace.fill.color = sna_rgba_to_color(inplace.fill.color, dst->format);
5423
5424		DBG(("%s: fill %x\n", __FUNCTION__, inplace.fill.color));
5425
5426		mono.op.priv = &inplace.fill;
5427		mono.op.box = mono_inplace_fill_box;
5428		mono.op.boxes = mono_inplace_fill_boxes;
5429
5430		op = 0;
5431	} else {
5432		if (src->pDrawable) {
5433			if (!sna_drawable_move_to_cpu(src->pDrawable,
5434						      MOVE_READ)) {
5435				mono_fini(&mono);
5436				return false;
5437			}
5438			if (src->alphaMap &&
5439			    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
5440						      MOVE_READ)) {
5441				mono_fini(&mono);
5442				return false;
5443			}
5444		}
5445
5446		inplace.composite.dst = image_from_pict(dst, false,
5447							&inplace.composite.dx,
5448							&inplace.composite.dy);
5449		inplace.composite.src = image_from_pict(src, false,
5450							&inplace.composite.sx,
5451							&inplace.composite.sy);
5452		inplace.composite.sx +=
5453			src_x - pixman_fixed_to_int(traps[0].left.p1.x),
5454		inplace.composite.sy +=
5455			src_y - pixman_fixed_to_int(traps[0].left.p1.y),
5456		inplace.composite.op = op;
5457
5458		mono.op.priv = &inplace.composite;
5459		mono.op.box = mono_inplace_composite_box;
5460		mono.op.boxes = mono_inplace_composite_boxes;
5461	}
5462
5463	if (mono.clip.data == NULL && mono.op.damage == NULL)
5464		mono.span = mono_span__fast;
5465	else
5466		mono.span = mono_span;
5467	mono_render(&mono);
5468	mono_fini(&mono);
5469
5470	if (op) {
5471		free_pixman_pict(src, inplace.composite.src);
5472		free_pixman_pict(dst, inplace.composite.dst);
5473
5474		if (!was_clear && !operator_is_bounded(op)) {
5475			xPointFixed p1, p2;
5476
5477			DBG(("%s: unbounded fixup\n", __FUNCTION__));
5478
5479			if (!mono_init(&mono, 2+2*ntrap))
5480				return false;
5481
5482			p1.y = mono.clip.extents.y1 * pixman_fixed_1;
5483			p2.y = mono.clip.extents.y2 * pixman_fixed_1;
5484
5485			p1.x = mono.clip.extents.x1 * pixman_fixed_1;
5486			p2.x = mono.clip.extents.x1 * pixman_fixed_1;
5487			mono_add_line(&mono, 0, 0, p1.y, p2.y, &p1, &p2, -1);
5488
5489			p1.x = mono.clip.extents.x2 * pixman_fixed_1;
5490			p2.x = mono.clip.extents.x2 * pixman_fixed_1;
5491			mono_add_line(&mono, 0, 0, p1.y, p2.y, &p1, &p2, 1);
5492
5493			for (n = 0; n < ntrap; n++) {
5494				if (!xTrapezoidValid(&traps[n]))
5495					continue;
5496
5497				if (pixman_fixed_to_int(traps[n].top) + x >= mono.clip.extents.y2 ||
5498				    pixman_fixed_to_int(traps[n].bottom) + y < mono.clip.extents.y1)
5499					continue;
5500
5501				mono_add_line(&mono, x, y,
5502					      traps[n].top, traps[n].bottom,
5503					      &traps[n].left.p1, &traps[n].left.p2, 1);
5504				mono_add_line(&mono, x, y,
5505					      traps[n].top, traps[n].bottom,
5506					      &traps[n].right.p1, &traps[n].right.p2, -1);
5507			}
5508
5509			op = PictOpClear;
5510			goto unbounded_pass;
5511		}
5512	}
5513
5514	return true;
5515}
5516
5517static void
5518pixmask_span_solid(struct sna *sna,
5519		   struct sna_composite_spans_op *op,
5520		   pixman_region16_t *clip,
5521		   const BoxRec *box,
5522		   int coverage)
5523{
5524	struct pixman_inplace *pi = (struct pixman_inplace *)op;
5525	if (coverage != FAST_SAMPLES_XY) {
5526		coverage = coverage * 256 / FAST_SAMPLES_XY;
5527		coverage -= coverage >> 8;
5528		*pi->bits = mul_4x8_8(pi->color, coverage);
5529	} else
5530		*pi->bits = pi->color;
5531	pixman_image_composite(pi->op, pi->source, NULL, pi->image,
5532			       box->x1, box->y1,
5533			       0, 0,
5534			       pi->dx + box->x1, pi->dy + box->y1,
5535			       box->x2 - box->x1, box->y2 - box->y1);
5536}
5537static void
5538pixmask_span_solid__clipped(struct sna *sna,
5539			    struct sna_composite_spans_op *op,
5540			    pixman_region16_t *clip,
5541			    const BoxRec *box,
5542			    int coverage)
5543{
5544	pixman_region16_t region;
5545	int n;
5546
5547	pixman_region_init_rects(&region, box, 1);
5548	RegionIntersect(&region, &region, clip);
5549	n = REGION_NUM_RECTS(&region);
5550	box = REGION_RECTS(&region);
5551	while (n--)
5552		pixmask_span_solid(sna, op, NULL, box++, coverage);
5553	pixman_region_fini(&region);
5554}
5555
5556static void
5557pixmask_span(struct sna *sna,
5558	     struct sna_composite_spans_op *op,
5559	     pixman_region16_t *clip,
5560	     const BoxRec *box,
5561	     int coverage)
5562{
5563	struct pixman_inplace *pi = (struct pixman_inplace *)op;
5564	pixman_image_t *mask = NULL;
5565	if (coverage != FAST_SAMPLES_XY) {
5566		coverage = coverage * 256 / FAST_SAMPLES_XY;
5567		coverage -= coverage >> 8;
5568		*pi->bits = coverage;
5569		mask = pi->mask;
5570	}
5571	pixman_image_composite(pi->op, pi->source, mask, pi->image,
5572			       pi->sx + box->x1, pi->sy + box->y1,
5573			       0, 0,
5574			       pi->dx + box->x1, pi->dy + box->y1,
5575			       box->x2 - box->x1, box->y2 - box->y1);
5576}
5577static void
5578pixmask_span__clipped(struct sna *sna,
5579		      struct sna_composite_spans_op *op,
5580		      pixman_region16_t *clip,
5581		      const BoxRec *box,
5582		      int coverage)
5583{
5584	pixman_region16_t region;
5585	int n;
5586
5587	pixman_region_init_rects(&region, box, 1);
5588	RegionIntersect(&region, &region, clip);
5589	n = REGION_NUM_RECTS(&region);
5590	box = REGION_RECTS(&region);
5591	while (n--)
5592		pixmask_span(sna, op, NULL, box++, coverage);
5593	pixman_region_fini(&region);
5594}
5595
5596struct inplace_x8r8g8b8_thread {
5597	xTrapezoid *traps;
5598	PicturePtr dst, src;
5599	BoxRec extents;
5600	int dx, dy;
5601	int ntrap;
5602	bool lerp, is_solid;
5603	uint32_t color;
5604	int16_t src_x, src_y;
5605	uint8_t op;
5606};
5607
5608static void inplace_x8r8g8b8_thread(void *arg)
5609{
5610	struct inplace_x8r8g8b8_thread *thread = arg;
5611	struct tor tor;
5612	span_func_t span;
5613	RegionPtr clip;
5614	int y1, y2, n;
5615
5616	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
5617		return;
5618
5619	y1 = thread->extents.y1 - thread->dst->pDrawable->y;
5620	y2 = thread->extents.y2 - thread->dst->pDrawable->y;
5621	for (n = 0; n < thread->ntrap; n++) {
5622		xTrapezoid t;
5623
5624		if (!project_trapezoid_onto_grid(&thread->traps[n], thread->dx, thread->dy, &t))
5625			continue;
5626
5627		if (pixman_fixed_to_int(thread->traps[n].top) >= y2 ||
5628		    pixman_fixed_to_int(thread->traps[n].bottom) < y1)
5629			continue;
5630
5631		tor_add_edge(&tor, &t, &t.left, 1);
5632		tor_add_edge(&tor, &t, &t.right, -1);
5633	}
5634
5635	clip = thread->dst->pCompositeClip;
5636	if (thread->lerp) {
5637		struct inplace inplace;
5638		int16_t dst_x, dst_y;
5639		PixmapPtr pixmap;
5640
5641		pixmap = get_drawable_pixmap(thread->dst->pDrawable);
5642
5643		inplace.ptr = pixmap->devPrivate.ptr;
5644		if (get_drawable_deltas(thread->dst->pDrawable, pixmap, &dst_x, &dst_y))
5645			inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
5646		inplace.stride = pixmap->devKind;
5647		inplace.color = thread->color;
5648
5649		if (clip->data)
5650			span = tor_blt_lerp32_clipped;
5651		else
5652			span = tor_blt_lerp32;
5653
5654		tor_render(NULL, &tor, (void*)&inplace, clip, span, false);
5655	} else if (thread->is_solid) {
5656		struct pixman_inplace pi;
5657
5658		pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
5659		pi.op = thread->op;
5660		pi.color = thread->color;
5661
5662		pi.bits = (uint32_t *)&pi.sx;
5663		pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
5664						     1, 1, pi.bits, 0);
5665		pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
5666
5667		if (clip->data)
5668			span = pixmask_span_solid__clipped;
5669		else
5670			span = pixmask_span_solid;
5671
5672		tor_render(NULL, &tor, (void*)&pi, clip, span, false);
5673
5674		pixman_image_unref(pi.source);
5675		pixman_image_unref(pi.image);
5676	} else {
5677		struct pixman_inplace pi;
5678
5679		pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
5680		pi.source = image_from_pict(thread->src, false, &pi.sx, &pi.sy);
5681		pi.sx += thread->src_x - pixman_fixed_to_int(thread->traps[0].left.p1.x);
5682		pi.sy += thread->src_y - pixman_fixed_to_int(thread->traps[0].left.p1.y);
5683		pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
5684		pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
5685		pi.bits = pixman_image_get_data(pi.mask);
5686		pi.op = thread->op;
5687
5688		if (clip->data)
5689			span = pixmask_span__clipped;
5690		else
5691			span = pixmask_span;
5692
5693		tor_render(NULL, &tor, (void*)&pi, clip, span, false);
5694
5695		pixman_image_unref(pi.mask);
5696		pixman_image_unref(pi.source);
5697		pixman_image_unref(pi.image);
5698	}
5699
5700	tor_fini(&tor);
5701}
5702
5703static bool
5704trapezoid_span_inplace__x8r8g8b8(CARD8 op,
5705				 PicturePtr dst,
5706				 PicturePtr src, int16_t src_x, int16_t src_y,
5707				 PictFormatPtr maskFormat,
5708				 int ntrap, xTrapezoid *traps)
5709{
5710	uint32_t color;
5711	bool lerp, is_solid;
5712	RegionRec region;
5713	int dx, dy;
5714	int num_threads, n;
5715
5716	lerp = false;
5717	is_solid = sna_picture_is_solid(src, &color);
5718	if (is_solid) {
5719		if (op == PictOpOver && (color >> 24) == 0xff)
5720			op = PictOpSrc;
5721		if (op == PictOpOver && sna_drawable_is_clear(dst->pDrawable))
5722			op = PictOpSrc;
5723		lerp = op == PictOpSrc;
5724	}
5725	if (!lerp) {
5726		switch (op) {
5727		case PictOpOver:
5728		case PictOpAdd:
5729		case PictOpOutReverse:
5730			break;
5731		case PictOpSrc:
5732			if (!sna_drawable_is_clear(dst->pDrawable))
5733				return false;
5734			break;
5735		default:
5736			return false;
5737		}
5738	}
5739
5740	if (maskFormat == NULL && ntrap > 1) {
5741		DBG(("%s: individual rasterisation requested\n",
5742		     __FUNCTION__));
5743		do {
5744			/* XXX unwind errors? */
5745			if (!trapezoid_span_inplace__x8r8g8b8(op, dst,
5746							      src, src_x, src_y,
5747							      NULL, 1, traps++))
5748				return false;
5749		} while (--ntrap);
5750		return true;
5751	}
5752
5753	trapezoids_bounds(ntrap, traps, &region.extents);
5754	if (region.extents.y1 >= region.extents.y2 ||
5755	    region.extents.x1 >= region.extents.x2)
5756		return true;
5757
5758	DBG(("%s: extents (%d, %d), (%d, %d)\n",
5759	     __FUNCTION__,
5760	     region.extents.x1, region.extents.y1,
5761	     region.extents.x2, region.extents.y2));
5762
5763	if (!sna_compute_composite_extents(&region.extents,
5764					   src, NULL, dst,
5765					   src_x, src_y,
5766					   0, 0,
5767					   region.extents.x1, region.extents.y1,
5768					   region.extents.x2 - region.extents.x1,
5769					   region.extents.y2 - region.extents.y1))
5770		return true;
5771
5772	DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
5773	     __FUNCTION__,
5774	     region.extents.x1, region.extents.y1,
5775	     region.extents.x2, region.extents.y2));
5776
5777	region.data = NULL;
5778	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
5779					    MOVE_WRITE | MOVE_READ))
5780		return true;
5781
5782	if (!is_solid && src->pDrawable) {
5783		if (!sna_drawable_move_to_cpu(src->pDrawable,
5784					      MOVE_READ))
5785			return true;
5786
5787		if (src->alphaMap &&
5788		    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
5789					      MOVE_READ))
5790			return true;
5791	}
5792
5793	dx = dst->pDrawable->x * FAST_SAMPLES_X;
5794	dy = dst->pDrawable->y * FAST_SAMPLES_Y;
5795
5796	num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
5797				      region.extents.y2 - region.extents.y1,
5798				      16);
5799
5800	DBG(("%s: %dx%d, format=%x, op=%d, lerp?=%d, num_threads=%d\n",
5801	     __FUNCTION__,
5802	     region.extents.x2 - region.extents.x1,
5803	     region.extents.y2 - region.extents.y1,
5804	     dst->format, op, lerp, num_threads));
5805
5806	if (num_threads == 1) {
5807		struct tor tor;
5808		span_func_t span;
5809
5810		if (!tor_init(&tor, &region.extents, 2*ntrap))
5811			return true;
5812
5813		for (n = 0; n < ntrap; n++) {
5814			xTrapezoid t;
5815
5816			if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
5817				continue;
5818
5819			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
5820			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
5821				continue;
5822
5823			tor_add_edge(&tor, &t, &t.left, 1);
5824			tor_add_edge(&tor, &t, &t.right, -1);
5825		}
5826
5827		if (lerp) {
5828			struct inplace inplace;
5829			PixmapPtr pixmap;
5830			int16_t dst_x, dst_y;
5831
5832			pixmap = get_drawable_pixmap(dst->pDrawable);
5833
5834			inplace.ptr = pixmap->devPrivate.ptr;
5835			if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
5836				inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
5837			inplace.stride = pixmap->devKind;
5838			inplace.color = color;
5839
5840			if (dst->pCompositeClip->data)
5841				span = tor_blt_lerp32_clipped;
5842			else
5843				span = tor_blt_lerp32;
5844
5845			DBG(("%s: render inplace op=%d, color=%08x\n",
5846			     __FUNCTION__, op, color));
5847
5848			tor_render(NULL, &tor, (void*)&inplace,
5849				   dst->pCompositeClip, span, false);
5850		} else if (is_solid) {
5851			struct pixman_inplace pi;
5852
5853			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
5854			pi.op = op;
5855			pi.color = color;
5856
5857			pi.bits = (uint32_t *)&pi.sx;
5858			pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
5859							     1, 1, pi.bits, 0);
5860			pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
5861
5862			if (dst->pCompositeClip->data)
5863				span = pixmask_span_solid__clipped;
5864			else
5865				span = pixmask_span_solid;
5866
5867			tor_render(NULL, &tor, (void*)&pi,
5868				   dst->pCompositeClip, span,
5869				   false);
5870
5871			pixman_image_unref(pi.source);
5872			pixman_image_unref(pi.image);
5873		} else {
5874			struct pixman_inplace pi;
5875
5876			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
5877			pi.source = image_from_pict(src, false, &pi.sx, &pi.sy);
5878			pi.sx += src_x - pixman_fixed_to_int(traps[0].left.p1.x);
5879			pi.sy += src_y - pixman_fixed_to_int(traps[0].left.p1.y);
5880			pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
5881			pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
5882			pi.bits = pixman_image_get_data(pi.mask);
5883			pi.op = op;
5884
5885			if (dst->pCompositeClip->data)
5886				span = pixmask_span__clipped;
5887			else
5888				span = pixmask_span;
5889
5890			tor_render(NULL, &tor, (void*)&pi,
5891				   dst->pCompositeClip, span,
5892				   false);
5893
5894			pixman_image_unref(pi.mask);
5895			pixman_image_unref(pi.source);
5896			pixman_image_unref(pi.image);
5897		}
5898
5899		tor_fini(&tor);
5900	} else {
5901		struct inplace_x8r8g8b8_thread threads[num_threads];
5902		int y, h;
5903
5904		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
5905		     __FUNCTION__, num_threads,
5906		     region.extents.x2 - region.extents.x1,
5907		     region.extents.y2 - region.extents.y1));
5908
5909		threads[0].traps = traps;
5910		threads[0].ntrap = ntrap;
5911		threads[0].extents = region.extents;
5912		threads[0].lerp = lerp;
5913		threads[0].is_solid = is_solid;
5914		threads[0].color = color;
5915		threads[0].dx = dx;
5916		threads[0].dy = dy;
5917		threads[0].dst = dst;
5918		threads[0].src = src;
5919		threads[0].op = op;
5920		threads[0].src_x = src_x;
5921		threads[0].src_y = src_y;
5922
5923		y = region.extents.y1;
5924		h = region.extents.y2 - region.extents.y1;
5925		h = (h + num_threads - 1) / num_threads;
5926
5927		for (n = 1; n < num_threads; n++) {
5928			threads[n] = threads[0];
5929			threads[n].extents.y1 = y;
5930			threads[n].extents.y2 = y += h;
5931
5932			sna_threads_run(inplace_x8r8g8b8_thread, &threads[n]);
5933		}
5934
5935		threads[0].extents.y1 = y;
5936		threads[0].extents.y2 = region.extents.y2;
5937		inplace_x8r8g8b8_thread(&threads[0]);
5938
5939		sna_threads_wait();
5940	}
5941
5942	return true;
5943}
5944
5945struct inplace_thread {
5946	xTrapezoid *traps;
5947	RegionPtr clip;
5948	span_func_t span;
5949	struct inplace inplace;
5950	BoxRec extents;
5951	int dx, dy;
5952	int draw_x, draw_y;
5953	bool unbounded;
5954	int ntrap;
5955};
5956
5957static void inplace_thread(void *arg)
5958{
5959	struct inplace_thread *thread = arg;
5960	struct tor tor;
5961	int n;
5962
5963	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
5964		return;
5965
5966	for (n = 0; n < thread->ntrap; n++) {
5967		xTrapezoid t;
5968
5969		if (!project_trapezoid_onto_grid(&thread->traps[n], thread->dx, thread->dy, &t))
5970			continue;
5971
5972		if (pixman_fixed_to_int(thread->traps[n].top) >= thread->extents.y2 - thread->draw_y ||
5973		    pixman_fixed_to_int(thread->traps[n].bottom) < thread->extents.y1 - thread->draw_y)
5974			continue;
5975
5976		tor_add_edge(&tor, &t, &t.left, 1);
5977		tor_add_edge(&tor, &t, &t.right, -1);
5978	}
5979
5980	tor_render(NULL, &tor, (void*)&thread->inplace,
5981		   thread->clip, thread->span, thread->unbounded);
5982
5983	tor_fini(&tor);
5984}
5985
5986static bool
5987trapezoid_span_inplace(struct sna *sna,
5988		       CARD8 op, PicturePtr src, PicturePtr dst,
5989		       PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
5990		       int ntrap, xTrapezoid *traps,
5991		       bool fallback)
5992{
5993	struct inplace inplace;
5994	span_func_t span;
5995	PixmapPtr pixmap;
5996	struct sna_pixmap *priv;
5997	RegionRec region;
5998	uint32_t color;
5999	bool unbounded;
6000	int16_t dst_x, dst_y;
6001	int dx, dy;
6002	int num_threads, n;
6003
6004	if (NO_SCAN_CONVERTER)
6005		return false;
6006
6007	if (dst->polyMode == PolyModePrecise && !is_mono(dst, maskFormat)) {
6008		DBG(("%s: fallback -- precise rasterisation requested\n",
6009		     __FUNCTION__));
6010		return false;
6011	}
6012	if (dst->alphaMap) {
6013		DBG(("%s: fallback -- dst alphamap\n",
6014		     __FUNCTION__));
6015		return false;
6016	}
6017
6018	if (!fallback && is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
6019		DBG(("%s: fallback -- can not perform operation in place, destination busy\n",
6020		     __FUNCTION__));
6021
6022		return false;
6023	}
6024
6025	if (is_mono(dst, maskFormat))
6026		return trapezoid_span_mono_inplace(sna, op, src, dst,
6027						   src_x, src_y, ntrap, traps);
6028
6029	if (dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8)
6030		return trapezoid_span_inplace__x8r8g8b8(op, dst,
6031							src, src_x, src_y,
6032							maskFormat,
6033							ntrap, traps);
6034
6035	if (!sna_picture_is_solid(src, &color)) {
6036		DBG(("%s: fallback -- can not perform operation in place, requires solid source\n",
6037		     __FUNCTION__));
6038		return false;
6039	}
6040
6041	if (dst->format != PICT_a8) {
6042		DBG(("%s: fallback -- can not perform operation in place, format=%x\n",
6043		     __FUNCTION__, dst->format));
6044		return false;
6045	}
6046
6047	pixmap = get_drawable_pixmap(dst->pDrawable);
6048
6049	unbounded = false;
6050	priv = sna_pixmap(pixmap);
6051	if (priv) {
6052		switch (op) {
6053		case PictOpAdd:
6054			if (priv->clear && priv->clear_color == 0) {
6055				unbounded = true;
6056				op = PictOpSrc;
6057			}
6058			if ((color >> 24) == 0)
6059				return true;
6060			break;
6061		case PictOpIn:
6062			if (priv->clear && priv->clear_color == 0)
6063				return true;
6064			if (priv->clear && priv->clear_color == 0xff)
6065				op = PictOpSrc;
6066			unbounded = true;
6067			break;
6068		case PictOpSrc:
6069			unbounded = true;
6070			break;
6071		default:
6072			DBG(("%s: fallback -- can not perform op [%d] in place\n",
6073			     __FUNCTION__, op));
6074			return false;
6075		}
6076	} else {
6077		switch (op) {
6078		case PictOpAdd:
6079			if ((color >> 24) == 0)
6080				return true;
6081			break;
6082		case PictOpIn:
6083		case PictOpSrc:
6084			unbounded = true;
6085			break;
6086		default:
6087			DBG(("%s: fallback -- can not perform op [%d] in place\n",
6088			     __FUNCTION__, op));
6089			return false;
6090		}
6091	}
6092
6093	DBG(("%s: format=%x, op=%d, color=%x\n",
6094	     __FUNCTION__, dst->format, op, color));
6095
6096	if (maskFormat == NULL && ntrap > 1) {
6097		DBG(("%s: individual rasterisation requested\n",
6098		     __FUNCTION__));
6099		do {
6100			/* XXX unwind errors? */
6101			if (!trapezoid_span_inplace(sna, op, src, dst, NULL,
6102						    src_x, src_y, 1, traps++,
6103						    fallback))
6104				return false;
6105		} while (--ntrap);
6106		return true;
6107	}
6108
6109	trapezoids_bounds(ntrap, traps, &region.extents);
6110	if (region.extents.y1 >= region.extents.y2 ||
6111	    region.extents.x1 >= region.extents.x2)
6112		return true;
6113
6114	DBG(("%s: extents (%d, %d), (%d, %d)\n",
6115	     __FUNCTION__,
6116	     region.extents.x1, region.extents.y1,
6117	     region.extents.x2, region.extents.y2));
6118
6119	if (!sna_compute_composite_extents(&region.extents,
6120					   NULL, NULL, dst,
6121					   0, 0,
6122					   0, 0,
6123					   region.extents.x1, region.extents.y1,
6124					   region.extents.x2 - region.extents.x1,
6125					   region.extents.y2 - region.extents.y1))
6126		return true;
6127
6128	DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
6129	     __FUNCTION__,
6130	     region.extents.x1, region.extents.y1,
6131	     region.extents.x2, region.extents.y2));
6132
6133	if (op == PictOpSrc) {
6134		if (dst->pCompositeClip->data)
6135			span = tor_blt_src_clipped;
6136		else
6137			span = tor_blt_src;
6138	} else if (op == PictOpIn) {
6139		if (dst->pCompositeClip->data)
6140			span = tor_blt_in_clipped;
6141		else
6142			span = tor_blt_in;
6143	} else {
6144		assert(op == PictOpAdd);
6145		if (dst->pCompositeClip->data)
6146			span = tor_blt_add_clipped;
6147		else
6148			span = tor_blt_add;
6149	}
6150
6151	DBG(("%s: move-to-cpu\n", __FUNCTION__));
6152	region.data = NULL;
6153	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
6154					     op == PictOpSrc ? MOVE_WRITE | MOVE_INPLACE_HINT : MOVE_WRITE | MOVE_READ))
6155		return true;
6156
6157	dx = dst->pDrawable->x * FAST_SAMPLES_X;
6158	dy = dst->pDrawable->y * FAST_SAMPLES_Y;
6159
6160
6161	inplace.ptr = pixmap->devPrivate.ptr;
6162	if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
6163		inplace.ptr += dst_y * pixmap->devKind + dst_x;
6164	inplace.stride = pixmap->devKind;
6165	inplace.opacity = color >> 24;
6166
6167	num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
6168				      region.extents.y2 - region.extents.y1,
6169				      16);
6170	if (num_threads == 1) {
6171		struct tor tor;
6172
6173		if (!tor_init(&tor, &region.extents, 2*ntrap))
6174			return true;
6175
6176		for (n = 0; n < ntrap; n++) {
6177			xTrapezoid t;
6178
6179			if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
6180				continue;
6181
6182			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
6183			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
6184				continue;
6185
6186			tor_add_edge(&tor, &t, &t.left, 1);
6187			tor_add_edge(&tor, &t, &t.right, -1);
6188		}
6189
6190		tor_render(NULL, &tor, (void*)&inplace,
6191			   dst->pCompositeClip, span, unbounded);
6192
6193		tor_fini(&tor);
6194	} else {
6195		struct inplace_thread threads[num_threads];
6196		int y, h;
6197
6198		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
6199		     __FUNCTION__, num_threads,
6200		     region.extents.x2 - region.extents.x1,
6201		     region.extents.y2 - region.extents.y1));
6202
6203		threads[0].traps = traps;
6204		threads[0].ntrap = ntrap;
6205		threads[0].inplace = inplace;
6206		threads[0].extents = region.extents;
6207		threads[0].clip = dst->pCompositeClip;
6208		threads[0].span = span;
6209		threads[0].unbounded = unbounded;
6210		threads[0].dx = dx;
6211		threads[0].dy = dy;
6212		threads[0].draw_x = dst->pDrawable->x;
6213		threads[0].draw_y = dst->pDrawable->y;
6214
6215		y = region.extents.y1;
6216		h = region.extents.y2 - region.extents.y1;
6217		h = (h + num_threads - 1) / num_threads;
6218
6219		for (n = 1; n < num_threads; n++) {
6220			threads[n] = threads[0];
6221			threads[n].extents.y1 = y;
6222			threads[n].extents.y2 = y += h;
6223
6224			sna_threads_run(inplace_thread, &threads[n]);
6225		}
6226
6227		threads[0].extents.y1 = y;
6228		threads[0].extents.y2 = region.extents.y2;
6229		inplace_thread(&threads[0]);
6230
6231		sna_threads_wait();
6232	}
6233
6234	return true;
6235}
6236
6237static bool
6238trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
6239			PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
6240			int ntrap, xTrapezoid *traps)
6241{
6242	struct tor tor;
6243	ScreenPtr screen = dst->pDrawable->pScreen;
6244	PixmapPtr scratch;
6245	PicturePtr mask;
6246	BoxRec extents;
6247	int16_t dst_x, dst_y;
6248	int dx, dy;
6249	int error, n;
6250
6251	if (NO_SCAN_CONVERTER)
6252		return false;
6253
6254	if (dst->polyMode == PolyModePrecise && !is_mono(dst, maskFormat)) {
6255		DBG(("%s: fallback -- precise rasterisation requested\n",
6256		     __FUNCTION__));
6257		return false;
6258	}
6259
6260	if (maskFormat == NULL && ntrap > 1) {
6261		DBG(("%s: individual rasterisation requested\n",
6262		     __FUNCTION__));
6263		do {
6264			/* XXX unwind errors? */
6265			if (!trapezoid_span_fallback(op, src, dst, NULL,
6266						     src_x, src_y, 1, traps++))
6267				return false;
6268		} while (--ntrap);
6269		return true;
6270	}
6271
6272	trapezoids_bounds(ntrap, traps, &extents);
6273	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
6274		return true;
6275
6276	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
6277	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
6278
6279	if (!sna_compute_composite_extents(&extents,
6280					   src, NULL, dst,
6281					   src_x, src_y,
6282					   0, 0,
6283					   extents.x1, extents.y1,
6284					   extents.x2 - extents.x1,
6285					   extents.y2 - extents.y1))
6286		return true;
6287
6288	DBG(("%s: extents (%d, %d), (%d, %d)\n",
6289	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
6290
6291	extents.y2 -= extents.y1;
6292	extents.x2 -= extents.x1;
6293	extents.x1 -= dst->pDrawable->x;
6294	extents.y1 -= dst->pDrawable->y;
6295	dst_x = extents.x1;
6296	dst_y = extents.y1;
6297	dx = -extents.x1 * FAST_SAMPLES_X;
6298	dy = -extents.y1 * FAST_SAMPLES_Y;
6299	extents.x1 = extents.y1 = 0;
6300
6301	DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
6302	     __FUNCTION__, extents.x2, extents.y2, dx, dy));
6303	scratch = sna_pixmap_create_unattached(screen,
6304					       extents.x2, extents.y2, 8);
6305	if (!scratch)
6306		return true;
6307
6308	DBG(("%s: created buffer %p, stride %d\n",
6309	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
6310
6311	if (!tor_init(&tor, &extents, 2*ntrap)) {
6312		sna_pixmap_destroy(scratch);
6313		return true;
6314	}
6315
6316	for (n = 0; n < ntrap; n++) {
6317		xTrapezoid t;
6318
6319		if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
6320			continue;
6321
6322		if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
6323		    pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
6324			continue;
6325
6326		tor_add_edge(&tor, &t, &t.left, 1);
6327		tor_add_edge(&tor, &t, &t.right, -1);
6328	}
6329
6330	if (extents.x2 <= TOR_INPLACE_SIZE) {
6331		tor_inplace(&tor, scratch, is_mono(dst, maskFormat), NULL);
6332	} else {
6333		tor_render(NULL, &tor,
6334			   scratch->devPrivate.ptr,
6335			   (void *)(intptr_t)scratch->devKind,
6336			   is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
6337			   true);
6338	}
6339	tor_fini(&tor);
6340
6341	mask = CreatePicture(0, &scratch->drawable,
6342			     PictureMatchFormat(screen, 8, PICT_a8),
6343			     0, 0, serverClient, &error);
6344	if (mask) {
6345		RegionRec region;
6346
6347		region.extents.x1 = dst_x + dst->pDrawable->x;
6348		region.extents.y1 = dst_y + dst->pDrawable->y;
6349		region.extents.x2 = region.extents.x1 + extents.x2;
6350		region.extents.y2 = region.extents.y1 + extents.y2;
6351		region.data = NULL;
6352
6353		DBG(("%s: fbComposite()\n", __FUNCTION__));
6354		sna_composite_fb(op, src, mask, dst, &region,
6355				 src_x + dst_x - pixman_fixed_to_int(traps[0].left.p1.x),
6356				 src_y + dst_y - pixman_fixed_to_int(traps[0].left.p1.y),
6357				 0, 0,
6358				 dst_x, dst_y,
6359				 extents.x2, extents.y2);
6360
6361		FreePicture(mask, 0);
6362	}
6363	sna_pixmap_destroy(scratch);
6364
6365	return true;
6366}
6367
6368void
6369sna_composite_trapezoids(CARD8 op,
6370			 PicturePtr src,
6371			 PicturePtr dst,
6372			 PictFormatPtr maskFormat,
6373			 INT16 xSrc, INT16 ySrc,
6374			 int ntrap, xTrapezoid *traps)
6375{
6376	PixmapPtr pixmap = get_drawable_pixmap(dst->pDrawable);
6377	struct sna *sna = to_sna_from_pixmap(pixmap);
6378	struct sna_pixmap *priv;
6379	bool rectilinear, pixel_aligned, force_fallback;
6380	unsigned flags;
6381	int n;
6382
6383	DBG(("%s(op=%d, src=(%d, %d), mask=%08x, ntrap=%d)\n", __FUNCTION__,
6384	     op, xSrc, ySrc,
6385	     maskFormat ? (int)maskFormat->format : 0,
6386	     ntrap));
6387
6388	if (ntrap == 0)
6389		return;
6390
6391	if (NO_ACCEL)
6392		goto fallback;
6393
6394	if (wedged(sna)) {
6395		DBG(("%s: fallback -- wedged\n", __FUNCTION__));
6396		goto fallback;
6397	}
6398
6399	if (dst->alphaMap) {
6400		DBG(("%s: fallback -- dst alpha map\n", __FUNCTION__));
6401		goto fallback;
6402	}
6403
6404	priv = sna_pixmap(pixmap);
6405	if (priv == NULL) {
6406		DBG(("%s: fallback -- dst is unattached\n", __FUNCTION__));
6407		goto fallback;
6408	}
6409
6410	force_fallback = FORCE_FALLBACK > 0;
6411	if ((too_small(priv) || DAMAGE_IS_ALL(priv->cpu_damage)) &&
6412	    !picture_is_gpu(sna, src) && untransformed(src)) {
6413		DBG(("%s: force fallbacks --too small, %dx%d? %d, all-cpu? %d, src-is-cpu? %d\n",
6414		     __FUNCTION__,
6415		     dst->pDrawable->width,
6416		     dst->pDrawable->height,
6417		     too_small(priv),
6418		     (int)DAMAGE_IS_ALL(priv->cpu_damage),
6419		     !picture_is_gpu(sna, src)));
6420		force_fallback = true;
6421	}
6422	if (FORCE_FALLBACK < 0)
6423		force_fallback = false;
6424
6425	/* scan through for fast rectangles */
6426	rectilinear = pixel_aligned = true;
6427	if (is_mono(dst, maskFormat)) {
6428		for (n = 0; n < ntrap && rectilinear; n++) {
6429			int lx1 = pixman_fixed_to_int(traps[n].left.p1.x + pixman_fixed_1_minus_e/2);
6430			int lx2 = pixman_fixed_to_int(traps[n].left.p2.x + pixman_fixed_1_minus_e/2);
6431			int rx1 = pixman_fixed_to_int(traps[n].right.p1.x + pixman_fixed_1_minus_e/2);
6432			int rx2 = pixman_fixed_to_int(traps[n].right.p2.x + pixman_fixed_1_minus_e/2);
6433			rectilinear &= lx1 == lx2 && rx1 == rx2;
6434		}
6435	} else if (dst->polyMode != PolyModePrecise) {
6436		for (n = 0; n < ntrap && rectilinear; n++) {
6437			int lx1 = pixman_fixed_to_grid(traps[n].left.p1.x);
6438			int lx2 = pixman_fixed_to_grid(traps[n].left.p2.x);
6439			int rx1 = pixman_fixed_to_grid(traps[n].right.p1.x);
6440			int rx2 = pixman_fixed_to_grid(traps[n].right.p2.x);
6441			int top = pixman_fixed_to_grid(traps[n].top);
6442			int bot = pixman_fixed_to_grid(traps[n].bottom);
6443
6444			rectilinear &= lx1 == lx2 && rx1 == rx2;
6445			pixel_aligned &= ((top | bot | lx1 | lx2 | rx1 | rx2) & FAST_SAMPLES_mask) == 0;
6446		}
6447	} else {
6448		for (n = 0; n < ntrap && rectilinear; n++) {
6449			rectilinear &=
6450				traps[n].left.p1.x == traps[n].left.p2.x &&
6451				traps[n].right.p1.x == traps[n].right.p2.x;
6452			pixel_aligned &=
6453				((traps[n].top | traps[n].bottom |
6454				  traps[n].left.p1.x | traps[n].left.p2.x |
6455				  traps[n].right.p1.x | traps[n].right.p2.x)
6456				 & pixman_fixed_1_minus_e) == 0;
6457		}
6458	}
6459
6460	DBG(("%s: rectilinear? %d, pixel-aligned? %d\n",
6461	     __FUNCTION__, rectilinear, pixel_aligned));
6462	flags = 0;
6463	if (rectilinear) {
6464		if (pixel_aligned) {
6465			if (composite_aligned_boxes(sna, op, src, dst,
6466						    maskFormat,
6467						    xSrc, ySrc,
6468						    ntrap, traps,
6469						    force_fallback))
6470			    return;
6471		} else {
6472			if (composite_unaligned_boxes(sna, op, src, dst,
6473						      maskFormat,
6474						      xSrc, ySrc,
6475						      ntrap, traps,
6476						      force_fallback))
6477				return;
6478		}
6479		flags |= COMPOSITE_SPANS_RECTILINEAR;
6480	}
6481
6482	if (force_fallback)
6483		goto fallback;
6484
6485	if (is_mono(dst, maskFormat) &&
6486	    mono_trapezoids_span_converter(sna, op, src, dst,
6487					   xSrc, ySrc,
6488					   ntrap, traps))
6489		return;
6490
6491	if (trapezoid_spans_maybe_inplace(sna, op, src, dst, maskFormat)) {
6492		flags |= COMPOSITE_SPANS_INPLACE_HINT;
6493		if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
6494					   xSrc, ySrc, ntrap, traps,
6495					   false))
6496			return;
6497	}
6498
6499	if (trapezoid_span_converter(sna, op, src, dst, maskFormat, flags,
6500				     xSrc, ySrc, ntrap, traps))
6501		return;
6502
6503	if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
6504				   xSrc, ySrc, ntrap, traps,
6505				   false))
6506		return;
6507
6508	if (trapezoid_mask_converter(op, src, dst, maskFormat,
6509				     xSrc, ySrc, ntrap, traps))
6510		return;
6511
6512fallback:
6513	if (trapezoid_span_inplace(sna, op, src, dst, maskFormat,
6514				   xSrc, ySrc, ntrap, traps,
6515				   true))
6516		return;
6517
6518	if (trapezoid_span_fallback(op, src, dst, maskFormat,
6519				    xSrc, ySrc, ntrap, traps))
6520		return;
6521
6522	if (trapezoids_inplace_fallback(sna, op, src, dst, maskFormat,
6523					ntrap, traps))
6524		return;
6525
6526	DBG(("%s: fallback mask=%08x, ntrap=%d\n", __FUNCTION__,
6527	     maskFormat ? (unsigned)maskFormat->format : 0, ntrap));
6528	trapezoids_fallback(sna, op, src, dst, maskFormat,
6529			    xSrc, ySrc,
6530			    ntrap, traps);
6531}
6532
6533static inline bool
6534project_trap_onto_grid(const xTrap *in,
6535		       int dx, int dy,
6536		       xTrap *out)
6537{
6538	out->top.l = dx + pixman_fixed_to_grid(in->top.l);
6539	out->top.r = dx + pixman_fixed_to_grid(in->top.r);
6540	out->top.y = dy + pixman_fixed_to_grid(in->top.y);
6541
6542	out->bot.l = dx + pixman_fixed_to_grid(in->bot.l);
6543	out->bot.r = dx + pixman_fixed_to_grid(in->bot.r);
6544	out->bot.y = dy + pixman_fixed_to_grid(in->bot.y);
6545
6546	return out->bot.y > out->top.y;
6547}
6548
6549static bool
6550mono_trap_span_converter(struct sna *sna,
6551			 PicturePtr dst,
6552			 INT16 x, INT16 y,
6553			 int ntrap, xTrap *traps)
6554{
6555	struct mono mono;
6556	xRenderColor white;
6557	PicturePtr src;
6558	int error;
6559	int n;
6560
6561	white.red = white.green = white.blue = white.alpha = 0xffff;
6562	src = CreateSolidPicture(0, &white, &error);
6563	if (src == NULL)
6564		return true;
6565
6566	mono.clip = *dst->pCompositeClip;
6567	x += dst->pDrawable->x;
6568	y += dst->pDrawable->y;
6569
6570	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d)\n",
6571	     __FUNCTION__,
6572	     mono.clip.extents.x1, mono.clip.extents.y1,
6573	     mono.clip.extents.x2, mono.clip.extents.y2,
6574	     x, y));
6575
6576	mono.sna = sna;
6577	if (!mono_init(&mono, 2*ntrap))
6578		return false;
6579
6580	for (n = 0; n < ntrap; n++) {
6581		xPointFixed p1, p2;
6582
6583		if (pixman_fixed_to_int(traps[n].top.y) + y >= mono.clip.extents.y2 ||
6584		    pixman_fixed_to_int(traps[n].bot.y) + y < mono.clip.extents.y1)
6585			continue;
6586
6587		p1.y = traps[n].top.y;
6588		p2.y = traps[n].bot.y;
6589
6590		p1.x = traps[n].top.l;
6591		p2.x = traps[n].bot.l;
6592		mono_add_line(&mono, x, y,
6593			      traps[n].top.y, traps[n].bot.y,
6594			      &p1, &p2, 1);
6595
6596		p1.x = traps[n].top.r;
6597		p2.x = traps[n].bot.r;
6598		mono_add_line(&mono, x, y,
6599			      traps[n].top.y, traps[n].bot.y,
6600			      &p1, &p2, -1);
6601	}
6602
6603	memset(&mono.op, 0, sizeof(mono.op));
6604	if (mono.sna->render.composite(mono.sna, PictOpAdd, src, NULL, dst,
6605					0, 0,
6606					0, 0,
6607					mono.clip.extents.x1,  mono.clip.extents.y1,
6608					mono.clip.extents.x2 - mono.clip.extents.x1,
6609					mono.clip.extents.y2 - mono.clip.extents.y1,
6610					&mono.op)) {
6611		mono_render(&mono);
6612		mono.op.done(mono.sna, &mono.op);
6613	}
6614
6615	mono_fini(&mono);
6616	FreePicture(src, 0);
6617	return true;
6618}
6619
6620static bool
6621trap_span_converter(struct sna *sna,
6622		    PicturePtr dst,
6623		    INT16 src_x, INT16 src_y,
6624		    int ntrap, xTrap *trap)
6625{
6626	struct sna_composite_spans_op tmp;
6627	struct tor tor;
6628	BoxRec extents;
6629	pixman_region16_t *clip;
6630	int dx, dy, n;
6631
6632	if (NO_SCAN_CONVERTER)
6633		return false;
6634
6635	if (dst->pDrawable->depth < 8)
6636		return false;
6637
6638	if (dst->polyEdge == PolyEdgeSharp)
6639		return mono_trap_span_converter(sna, dst, src_x, src_y, ntrap, trap);
6640
6641	if (!sna->render.check_composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
6642					       dst->pCompositeClip->extents.x2 - dst->pCompositeClip->extents.x1,
6643					       dst->pCompositeClip->extents.y2 - dst->pCompositeClip->extents.y1,
6644					       0)) {
6645		DBG(("%s: fallback -- composite spans not supported\n",
6646		     __FUNCTION__));
6647		return false;
6648	}
6649
6650	clip = dst->pCompositeClip;
6651	extents = *RegionExtents(clip);
6652	dx = dst->pDrawable->x;
6653	dy = dst->pDrawable->y;
6654
6655	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d)\n",
6656	     __FUNCTION__,
6657	     extents.x1, extents.y1,
6658	     extents.x2, extents.y2,
6659	     dx, dy));
6660
6661	memset(&tmp, 0, sizeof(tmp));
6662	if (!sna->render.composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
6663					 0, 0,
6664					 extents.x1,  extents.y1,
6665					 extents.x2 - extents.x1,
6666					 extents.y2 - extents.y1,
6667					 0,
6668					 &tmp)) {
6669		DBG(("%s: fallback -- composite spans render op not supported\n",
6670		     __FUNCTION__));
6671		return false;
6672	}
6673
6674	dx *= FAST_SAMPLES_X;
6675	dy *= FAST_SAMPLES_Y;
6676	if (!tor_init(&tor, &extents, 2*ntrap))
6677		goto skip;
6678
6679	for (n = 0; n < ntrap; n++) {
6680		xTrap t;
6681		xPointFixed p1, p2;
6682
6683		if (!project_trap_onto_grid(&trap[n], dx, dy, &t))
6684			continue;
6685
6686		if (pixman_fixed_to_int(trap[n].top.y) + dst->pDrawable->y >= extents.y2 ||
6687		    pixman_fixed_to_int(trap[n].bot.y) + dst->pDrawable->y < extents.y1)
6688			continue;
6689
6690		p1.y = t.top.y;
6691		p2.y = t.bot.y;
6692		p1.x = t.top.l;
6693		p2.x = t.bot.l;
6694		polygon_add_line(tor.polygon, &p1, &p2);
6695
6696		p1.y = t.bot.y;
6697		p2.y = t.top.y;
6698		p1.x = t.top.r;
6699		p2.x = t.bot.r;
6700		polygon_add_line(tor.polygon, &p1, &p2);
6701	}
6702
6703	tor_render(sna, &tor, &tmp, clip,
6704		   choose_span(&tmp, dst, NULL, clip), false);
6705
6706	tor_fini(&tor);
6707skip:
6708	tmp.done(sna, &tmp);
6709	return true;
6710}
6711
6712static void mark_damaged(PixmapPtr pixmap, struct sna_pixmap *priv,
6713			 BoxPtr box, int16_t x, int16_t y)
6714{
6715	box->x1 += x; box->x2 += x;
6716	box->y1 += y; box->y2 += y;
6717	if (box->x1 <= 0 && box->y1 <= 0 &&
6718	    box->x2 >= pixmap->drawable.width &&
6719	    box->y2 >= pixmap->drawable.height) {
6720		sna_damage_destroy(&priv->cpu_damage);
6721		sna_damage_all(&priv->gpu_damage,
6722			       pixmap->drawable.width,
6723			       pixmap->drawable.height);
6724		list_del(&priv->flush_list);
6725	} else {
6726		sna_damage_add_box(&priv->gpu_damage, box);
6727		sna_damage_subtract_box(&priv->cpu_damage, box);
6728	}
6729}
6730
6731static bool
6732trap_mask_converter(struct sna *sna,
6733		    PicturePtr picture,
6734		    INT16 x, INT16 y,
6735		    int ntrap, xTrap *trap)
6736{
6737	struct tor tor;
6738	ScreenPtr screen = picture->pDrawable->pScreen;
6739	PixmapPtr scratch, pixmap;
6740	struct sna_pixmap *priv;
6741	BoxRec extents;
6742	span_func_t span;
6743	int dx, dy, n;
6744
6745	if (NO_SCAN_CONVERTER)
6746		return false;
6747
6748	pixmap = get_drawable_pixmap(picture->pDrawable);
6749	priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
6750	if (priv == NULL)
6751		return false;
6752
6753	/* XXX strict adherence to the Render specification */
6754	if (picture->polyMode == PolyModePrecise &&
6755	    picture->polyEdge != PolyEdgeSharp) {
6756		DBG(("%s: fallback -- precise rasterisation requested\n",
6757		     __FUNCTION__));
6758		return false;
6759	}
6760
6761	extents = *RegionExtents(picture->pCompositeClip);
6762	for (n = 0; n < ntrap; n++) {
6763		int v;
6764
6765		v = x + pixman_fixed_integer_floor (MIN(trap[n].top.l, trap[n].bot.l));
6766		if (v < extents.x1)
6767			extents.x1 = v;
6768
6769		v = x + pixman_fixed_integer_ceil (MAX(trap[n].top.r, trap[n].bot.r));
6770		if (v > extents.x2)
6771			extents.x2 = v;
6772
6773		v = y + pixman_fixed_integer_floor (trap[n].top.y);
6774		if (v < extents.y1)
6775			extents.y1 = v;
6776
6777		v = y + pixman_fixed_integer_ceil (trap[n].bot.y);
6778		if (v > extents.y2)
6779			extents.y2 = v;
6780	}
6781
6782	DBG(("%s: extents (%d, %d), (%d, %d)\n",
6783	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
6784
6785	scratch = sna_pixmap_create_upload(screen,
6786					   extents.x2-extents.x1,
6787					   extents.y2-extents.y1,
6788					   8, KGEM_BUFFER_WRITE_INPLACE);
6789	if (!scratch)
6790		return true;
6791
6792	dx = picture->pDrawable->x;
6793	dy = picture->pDrawable->y;
6794	dx *= FAST_SAMPLES_X;
6795	dy *= FAST_SAMPLES_Y;
6796	if (!tor_init(&tor, &extents, 2*ntrap)) {
6797		sna_pixmap_destroy(scratch);
6798		return true;
6799	}
6800
6801	for (n = 0; n < ntrap; n++) {
6802		xTrap t;
6803		xPointFixed p1, p2;
6804
6805		if (!project_trap_onto_grid(&trap[n], dx, dy, &t))
6806			continue;
6807
6808		if (pixman_fixed_to_int(trap[n].top.y) + picture->pDrawable->y >= extents.y2 ||
6809		    pixman_fixed_to_int(trap[n].bot.y) + picture->pDrawable->y < extents.y1)
6810			continue;
6811
6812		p1.y = t.top.y;
6813		p2.y = t.bot.y;
6814		p1.x = t.top.l;
6815		p2.x = t.bot.l;
6816		polygon_add_line(tor.polygon, &p1, &p2);
6817
6818		p1.y = t.bot.y;
6819		p2.y = t.top.y;
6820		p1.x = t.top.r;
6821		p2.x = t.bot.r;
6822		polygon_add_line(tor.polygon, &p1, &p2);
6823	}
6824
6825	if (picture->polyEdge == PolyEdgeSharp)
6826		span = tor_blt_mask_mono;
6827	else
6828		span = tor_blt_mask;
6829
6830	tor_render(NULL, &tor,
6831		   scratch->devPrivate.ptr,
6832		   (void *)(intptr_t)scratch->devKind,
6833		   span, true);
6834
6835	tor_fini(&tor);
6836
6837	/* XXX clip boxes */
6838	get_drawable_deltas(picture->pDrawable, pixmap, &x, &y);
6839	sna = to_sna_from_screen(screen);
6840	sna->render.copy_boxes(sna, GXcopy,
6841			       scratch, __sna_pixmap_get_bo(scratch), -extents.x1, -extents.x1,
6842			       pixmap, priv->gpu_bo, x, y,
6843			       &extents, 1, 0);
6844	mark_damaged(pixmap, priv, &extents ,x, y);
6845	sna_pixmap_destroy(scratch);
6846	return true;
6847}
6848
6849static bool
6850trap_upload(PicturePtr picture,
6851	    INT16 x, INT16 y,
6852	    int ntrap, xTrap *trap)
6853{
6854	ScreenPtr screen = picture->pDrawable->pScreen;
6855	struct sna *sna = to_sna_from_screen(screen);
6856	PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
6857	PixmapPtr scratch;
6858	struct sna_pixmap *priv;
6859	BoxRec extents;
6860	pixman_image_t *image;
6861	int width, height, depth;
6862	int n;
6863
6864	priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
6865	if (priv == NULL)
6866		return false;
6867
6868	extents = *RegionExtents(picture->pCompositeClip);
6869	for (n = 0; n < ntrap; n++) {
6870		int v;
6871
6872		v = x + pixman_fixed_integer_floor (MIN(trap[n].top.l, trap[n].bot.l));
6873		if (v < extents.x1)
6874			extents.x1 = v;
6875
6876		v = x + pixman_fixed_integer_ceil (MAX(trap[n].top.r, trap[n].bot.r));
6877		if (v > extents.x2)
6878			extents.x2 = v;
6879
6880		v = y + pixman_fixed_integer_floor (trap[n].top.y);
6881		if (v < extents.y1)
6882			extents.y1 = v;
6883
6884		v = y + pixman_fixed_integer_ceil (trap[n].bot.y);
6885		if (v > extents.y2)
6886			extents.y2 = v;
6887	}
6888
6889	DBG(("%s: extents (%d, %d), (%d, %d)\n",
6890	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
6891
6892	width  = extents.x2 - extents.x1;
6893	height = extents.y2 - extents.y1;
6894	depth = picture->pDrawable->depth;
6895
6896	DBG(("%s: tmp (%dx%d) depth=%d\n",
6897	     __FUNCTION__, width, height, depth));
6898	scratch = sna_pixmap_create_upload(screen,
6899					   width, height, depth,
6900					   KGEM_BUFFER_WRITE);
6901	if (!scratch)
6902		return true;
6903
6904	memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
6905	image = pixman_image_create_bits(picture->format, width, height,
6906					 scratch->devPrivate.ptr,
6907					 scratch->devKind);
6908	if (image) {
6909		pixman_add_traps (image, -extents.x1, -extents.y1,
6910				  ntrap, (pixman_trap_t *)trap);
6911
6912		pixman_image_unref(image);
6913	}
6914
6915	/* XXX clip boxes */
6916	get_drawable_deltas(picture->pDrawable, pixmap, &x, &y);
6917	sna->render.copy_boxes(sna, GXcopy,
6918			       scratch, __sna_pixmap_get_bo(scratch), -extents.x1, -extents.x1,
6919			       pixmap, priv->gpu_bo, x, y,
6920			       &extents, 1, 0);
6921	mark_damaged(pixmap, priv, &extents, x, y);
6922
6923	sna_pixmap_destroy(scratch);
6924	return true;
6925}
6926
6927void
6928sna_add_traps(PicturePtr picture, INT16 x, INT16 y, int n, xTrap *t)
6929{
6930	struct sna *sna;
6931
6932	DBG(("%s (%d, %d) x %d\n", __FUNCTION__, x, y, n));
6933
6934	sna = to_sna_from_drawable(picture->pDrawable);
6935	if (is_gpu(sna, picture->pDrawable, PREFER_GPU_SPANS)) {
6936		if (trap_span_converter(sna, picture, x, y, n, t))
6937			return;
6938	}
6939
6940	if (is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER)) {
6941		if (trap_mask_converter(sna, picture, x, y, n, t))
6942			return;
6943
6944		if (trap_upload(picture, x, y, n, t))
6945			return;
6946	}
6947
6948	DBG(("%s -- fallback\n", __FUNCTION__));
6949	if (sna_drawable_move_to_cpu(picture->pDrawable,
6950				     MOVE_READ | MOVE_WRITE)) {
6951		pixman_image_t *image;
6952		int dx, dy;
6953
6954		if (!(image = image_from_pict(picture, false, &dx, &dy)))
6955			return;
6956
6957		pixman_add_traps(image, x + dx, y + dy, n, (pixman_trap_t *)t);
6958
6959		free_pixman_pict(picture, image);
6960	}
6961}
6962
6963static inline void
6964project_point_onto_grid(const xPointFixed *in,
6965			int dx, int dy,
6966			xPointFixed *out)
6967{
6968	out->x = dx + pixman_fixed_to_grid(in->x);
6969	out->y = dy + pixman_fixed_to_grid(in->y);
6970}
6971
6972#if HAS_PIXMAN_TRIANGLES
6973static inline bool
6974xTriangleValid(const xTriangle *t)
6975{
6976	xPointFixed v1, v2;
6977
6978	v1.x = t->p2.x - t->p1.x;
6979	v1.y = t->p2.y - t->p1.y;
6980
6981	v2.x = t->p3.x - t->p1.x;
6982	v2.y = t->p3.y - t->p1.y;
6983
6984	/* if the length of any edge is zero, the area must be zero */
6985	if (v1.x == 0 && v1.y == 0)
6986		return false;
6987	if (v2.x == 0 && v2.y == 0)
6988		return false;
6989
6990	/* if the cross-product is zero, so it the size */
6991	return v2.y * v1.x != v1.y * v2.x;
6992}
6993
6994static inline bool
6995project_triangle_onto_grid(const xTriangle *in,
6996			   int dx, int dy,
6997			   xTriangle *out)
6998{
6999	project_point_onto_grid(&in->p1, dx, dy, &out->p1);
7000	project_point_onto_grid(&in->p2, dx, dy, &out->p2);
7001	project_point_onto_grid(&in->p3, dx, dy, &out->p3);
7002
7003	return xTriangleValid(out);
7004}
7005
7006static bool
7007mono_triangles_span_converter(struct sna *sna,
7008			      CARD8 op, PicturePtr src, PicturePtr dst,
7009			      INT16 src_x, INT16 src_y,
7010			      int count, xTriangle *tri)
7011{
7012	struct mono mono;
7013	BoxRec extents;
7014	int16_t dst_x, dst_y;
7015	int16_t dx, dy;
7016	bool was_clear;
7017	int n;
7018
7019	mono.sna = sna;
7020
7021	dst_x = pixman_fixed_to_int(tri[0].p1.x);
7022	dst_y = pixman_fixed_to_int(tri[0].p1.y);
7023
7024	miTriangleBounds(count, tri, &extents);
7025	DBG(("%s: extents (%d, %d), (%d, %d)\n",
7026	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
7027
7028	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
7029		return true;
7030
7031	if (!sna_compute_composite_region(&mono.clip,
7032					  src, NULL, dst,
7033					  src_x + extents.x1 - dst_x,
7034					  src_y + extents.y1 - dst_y,
7035					  0, 0,
7036					  extents.x1, extents.y1,
7037					  extents.x2 - extents.x1,
7038					  extents.y2 - extents.y1)) {
7039		DBG(("%s: triangles do not intersect drawable clips\n",
7040		     __FUNCTION__)) ;
7041		return true;
7042	}
7043
7044	dx = dst->pDrawable->x;
7045	dy = dst->pDrawable->y;
7046
7047	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
7048	     __FUNCTION__,
7049	     mono.clip.extents.x1, mono.clip.extents.y1,
7050	     mono.clip.extents.x2, mono.clip.extents.y2,
7051	     dx, dy,
7052	     src_x + mono.clip.extents.x1 - dst_x - dx,
7053	     src_y + mono.clip.extents.y1 - dst_y - dy));
7054
7055	was_clear = sna_drawable_is_clear(dst->pDrawable);
7056
7057	if (mono_init(&mono, 3*count))
7058		return false;
7059
7060	for (n = 0; n < count; n++) {
7061		mono_add_line(&mono, dx, dy,
7062			      tri[n].p1.y, tri[n].p2.y,
7063			      &tri[n].p1, &tri[n].p2, 1);
7064		mono_add_line(&mono, dx, dy,
7065			      tri[n].p2.y, tri[n].p3.y,
7066			      &tri[n].p2, &tri[n].p3, 1);
7067		mono_add_line(&mono, dx, dy,
7068			      tri[n].p3.y, tri[n].p1.y,
7069			      &tri[n].p3, &tri[n].p1, 1);
7070	}
7071
7072	memset(&mono.op, 0, sizeof(mono.op));
7073	if (mono.sna->render.composite(mono.sna, op, src, NULL, dst,
7074				       src_x + mono.clip.extents.x1 - dst_x - dx,
7075				       src_y + mono.clip.extents.y1 - dst_y - dy,
7076				       0, 0,
7077				       mono.clip.extents.x1,  mono.clip.extents.y1,
7078				       mono.clip.extents.x2 - mono.clip.extents.x1,
7079				       mono.clip.extents.y2 - mono.clip.extents.y1,
7080				       &mono.op)) {
7081		if (mono.clip.data == NULL && mono.op.damage == NULL)
7082			mono.span = mono_span__fast;
7083		else
7084			mono.span = mono_span;
7085		mono_render(&mono);
7086		mono.op.done(mono.sna, &mono.op);
7087	}
7088
7089	if (!was_clear && !operator_is_bounded(op)) {
7090		xPointFixed p1, p2;
7091
7092		if (!mono_init(&mono, 2+3*count))
7093			return false;
7094
7095		p1.y = mono.clip.extents.y1 * pixman_fixed_1;
7096		p2.y = mono.clip.extents.y2 * pixman_fixed_1;
7097
7098		p1.x = mono.clip.extents.x1 * pixman_fixed_1;
7099		p2.x = mono.clip.extents.x1 * pixman_fixed_1;
7100		mono_add_line(&mono, 0, 0, p1.y, p2.y, &p1, &p2, -1);
7101
7102		p1.x = mono.clip.extents.x2 * pixman_fixed_1;
7103		p2.x = mono.clip.extents.x2 * pixman_fixed_1;
7104		mono_add_line(&mono, 0, 0, p1.y, p2.y, &p1, &p2, 1);
7105
7106		for (n = 0; n < count; n++) {
7107			mono_add_line(&mono, dx, dy,
7108				      tri[n].p1.y, tri[n].p2.y,
7109				      &tri[n].p1, &tri[n].p2, 1);
7110			mono_add_line(&mono, dx, dy,
7111				      tri[n].p2.y, tri[n].p3.y,
7112				      &tri[n].p2, &tri[n].p3, 1);
7113			mono_add_line(&mono, dx, dy,
7114				      tri[n].p3.y, tri[n].p1.y,
7115				      &tri[n].p3, &tri[n].p1, 1);
7116		}
7117
7118		memset(&mono.op, 0, sizeof(mono.op));
7119		if (mono.sna->render.composite(mono.sna,
7120					       PictOpClear,
7121					       mono.sna->clear, NULL, dst,
7122					       0, 0,
7123					       0, 0,
7124					       mono.clip.extents.x1,  mono.clip.extents.y1,
7125					       mono.clip.extents.x2 - mono.clip.extents.x1,
7126					       mono.clip.extents.y2 - mono.clip.extents.y1,
7127					       &mono.op)) {
7128			if (mono.clip.data == NULL && mono.op.damage == NULL)
7129				mono.span = mono_span__fast;
7130			else
7131				mono.span = mono_span;
7132			mono_render(&mono);
7133			mono.op.done(mono.sna, &mono.op);
7134		}
7135		mono_fini(&mono);
7136	}
7137
7138	mono_fini(&mono);
7139	REGION_UNINIT(NULL, &mono.clip);
7140	return true;
7141}
7142
7143static bool
7144triangles_span_converter(struct sna *sna,
7145			 CARD8 op, PicturePtr src, PicturePtr dst,
7146			 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
7147			 int count, xTriangle *tri)
7148{
7149	struct sna_composite_spans_op tmp;
7150	struct tor tor;
7151	BoxRec extents;
7152	pixman_region16_t clip;
7153	int16_t dst_x, dst_y;
7154	int dx, dy, n;
7155	bool was_clear;
7156
7157	if (NO_SCAN_CONVERTER)
7158		return false;
7159
7160	if (is_mono(dst, maskFormat))
7161		return mono_triangles_span_converter(sna, op, src, dst,
7162						     src_x, src_y,
7163						     count, tri);
7164
7165	/* XXX strict adherence to the Render specification */
7166	if (dst->polyMode == PolyModePrecise) {
7167		DBG(("%s: fallback -- precise rasterisation requested\n",
7168		     __FUNCTION__));
7169		return false;
7170	}
7171
7172	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
7173		DBG(("%s: fallback -- composite spans not supported\n",
7174		     __FUNCTION__));
7175		return false;
7176	}
7177
7178	dst_x = pixman_fixed_to_int(tri[0].p1.x);
7179	dst_y = pixman_fixed_to_int(tri[0].p1.y);
7180
7181	miTriangleBounds(count, tri, &extents);
7182	DBG(("%s: extents (%d, %d), (%d, %d)\n",
7183	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
7184
7185	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
7186		return true;
7187
7188#if 0
7189	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
7190		DBG(("%s: fallback -- traps extents too small %dx%d\n",
7191		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
7192		return false;
7193	}
7194#endif
7195
7196	if (!sna_compute_composite_region(&clip,
7197					  src, NULL, dst,
7198					  src_x + extents.x1 - dst_x,
7199					  src_y + extents.y1 - dst_y,
7200					  0, 0,
7201					  extents.x1, extents.y1,
7202					  extents.x2 - extents.x1,
7203					  extents.y2 - extents.y1)) {
7204		DBG(("%s: triangles do not intersect drawable clips\n",
7205		     __FUNCTION__)) ;
7206		return true;
7207	}
7208
7209	if (!sna->render.check_composite_spans(sna, op, src, dst,
7210					       clip.extents.x2 - clip.extents.x1,
7211					       clip.extents.y2 - clip.extents.y1,
7212					       0)) {
7213		DBG(("%s: fallback -- composite spans not supported\n",
7214		     __FUNCTION__));
7215		return false;
7216	}
7217
7218	extents = *RegionExtents(&clip);
7219	dx = dst->pDrawable->x;
7220	dy = dst->pDrawable->y;
7221
7222	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
7223	     __FUNCTION__,
7224	     extents.x1, extents.y1,
7225	     extents.x2, extents.y2,
7226	     dx, dy,
7227	     src_x + extents.x1 - dst_x - dx,
7228	     src_y + extents.y1 - dst_y - dy));
7229
7230	was_clear = sna_drawable_is_clear(dst->pDrawable);
7231
7232	memset(&tmp, 0, sizeof(tmp));
7233	if (!sna->render.composite_spans(sna, op, src, dst,
7234					 src_x + extents.x1 - dst_x - dx,
7235					 src_y + extents.y1 - dst_y - dy,
7236					 extents.x1,  extents.y1,
7237					 extents.x2 - extents.x1,
7238					 extents.y2 - extents.y1,
7239					 0,
7240					 &tmp)) {
7241		DBG(("%s: fallback -- composite spans render op not supported\n",
7242		     __FUNCTION__));
7243		return false;
7244	}
7245
7246	dx *= FAST_SAMPLES_X;
7247	dy *= FAST_SAMPLES_Y;
7248	if (!tor_init(&tor, &extents, 3*count))
7249		goto skip;
7250
7251	for (n = 0; n < count; n++) {
7252		xTriangle t;
7253
7254		if (!project_triangle_onto_grid(&tri[n], dx, dy, &t))
7255			continue;
7256
7257		polygon_add_line(tor.polygon, &t.p1, &t.p2);
7258		polygon_add_line(tor.polygon, &t.p2, &t.p3);
7259		polygon_add_line(tor.polygon, &t.p3, &t.p1);
7260	}
7261
7262	tor_render(sna, &tor, &tmp, &clip,
7263		   choose_span(&tmp, dst, maskFormat, &clip),
7264		   !was_clear && maskFormat && !operator_is_bounded(op));
7265
7266	tor_fini(&tor);
7267skip:
7268	tmp.done(sna, &tmp);
7269
7270	REGION_UNINIT(NULL, &clip);
7271	return true;
7272}
7273
7274static bool
7275triangles_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
7276			 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
7277			 int count, xTriangle *tri)
7278{
7279	struct tor tor;
7280	void (*span)(struct sna *sna,
7281		     struct sna_composite_spans_op *op,
7282		     pixman_region16_t *clip,
7283		     const BoxRec *box,
7284		     int coverage);
7285	ScreenPtr screen = dst->pDrawable->pScreen;
7286	PixmapPtr scratch;
7287	PicturePtr mask;
7288	BoxRec extents;
7289	int16_t dst_x, dst_y;
7290	int dx, dy;
7291	int error, n;
7292
7293	if (NO_SCAN_CONVERTER)
7294		return false;
7295
7296	if (dst->polyMode == PolyModePrecise && !is_mono(dst, maskFormat)) {
7297		DBG(("%s: fallback -- precise rasterisation requested\n",
7298		     __FUNCTION__));
7299		return false;
7300	}
7301
7302	if (maskFormat == NULL && count > 1) {
7303		DBG(("%s: fallback -- individual rasterisation requested\n",
7304		     __FUNCTION__));
7305		return false;
7306	}
7307
7308	miTriangleBounds(count, tri, &extents);
7309	DBG(("%s: extents (%d, %d), (%d, %d)\n",
7310	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
7311
7312	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
7313		return true;
7314
7315	if (!sna_compute_composite_extents(&extents,
7316					   src, NULL, dst,
7317					   src_x, src_y,
7318					   0, 0,
7319					   extents.x1, extents.y1,
7320					   extents.x2 - extents.x1,
7321					   extents.y2 - extents.y1))
7322		return true;
7323
7324	DBG(("%s: extents (%d, %d), (%d, %d)\n",
7325	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
7326
7327	extents.y2 -= extents.y1;
7328	extents.x2 -= extents.x1;
7329	extents.x1 -= dst->pDrawable->x;
7330	extents.y1 -= dst->pDrawable->y;
7331	dst_x = extents.x1;
7332	dst_y = extents.y1;
7333	dx = -extents.x1 * FAST_SAMPLES_X;
7334	dy = -extents.y1 * FAST_SAMPLES_Y;
7335	extents.x1 = extents.y1 = 0;
7336
7337	DBG(("%s: mask (%dx%d)\n",
7338	     __FUNCTION__, extents.x2, extents.y2));
7339	scratch = sna_pixmap_create_upload(screen,
7340					   extents.x2, extents.y2, 8,
7341					   KGEM_BUFFER_WRITE_INPLACE);
7342	if (!scratch)
7343		return true;
7344
7345	DBG(("%s: created buffer %p, stride %d\n",
7346	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
7347
7348	if (!tor_init(&tor, &extents, 3*count)) {
7349		sna_pixmap_destroy(scratch);
7350		return true;
7351	}
7352
7353	for (n = 0; n < count; n++) {
7354		xTriangle t;
7355
7356		if (!project_triangle_onto_grid(&tri[n], dx, dy, &t))
7357			continue;
7358
7359		polygon_add_line(tor.polygon, &t.p1, &t.p2);
7360		polygon_add_line(tor.polygon, &t.p2, &t.p3);
7361		polygon_add_line(tor.polygon, &t.p3, &t.p1);
7362	}
7363
7364	if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
7365		span = tor_blt_mask_mono;
7366	else
7367		span = tor_blt_mask;
7368
7369	tor_render(NULL, &tor,
7370		   scratch->devPrivate.ptr,
7371		   (void *)(intptr_t)scratch->devKind,
7372		   span, true);
7373
7374	mask = CreatePicture(0, &scratch->drawable,
7375			     PictureMatchFormat(screen, 8, PICT_a8),
7376			     0, 0, serverClient, &error);
7377	if (mask) {
7378		CompositePicture(op, src, mask, dst,
7379				 src_x + dst_x - pixman_fixed_to_int(tri[0].p1.x),
7380				 src_y + dst_y - pixman_fixed_to_int(tri[0].p1.y),
7381				 0, 0,
7382				 dst_x, dst_y,
7383				 extents.x2, extents.y2);
7384		FreePicture(mask, 0);
7385	}
7386	tor_fini(&tor);
7387	sna_pixmap_destroy(scratch);
7388
7389	return true;
7390}
7391
7392static void
7393triangles_fallback(CARD8 op,
7394		   PicturePtr src,
7395		   PicturePtr dst,
7396		   PictFormatPtr maskFormat,
7397		   INT16 xSrc, INT16 ySrc,
7398		   int n, xTriangle *tri)
7399{
7400	ScreenPtr screen = dst->pDrawable->pScreen;
7401
7402	DBG(("%s op=%d, count=%d\n", __FUNCTION__, op, n));
7403
7404	if (maskFormat) {
7405		PixmapPtr scratch;
7406		PicturePtr mask;
7407		INT16 dst_x, dst_y;
7408		BoxRec bounds;
7409		int width, height, depth;
7410		pixman_image_t *image;
7411		pixman_format_code_t format;
7412		int error;
7413
7414		dst_x = pixman_fixed_to_int(tri[0].p1.x);
7415		dst_y = pixman_fixed_to_int(tri[0].p1.y);
7416
7417		miTriangleBounds(n, tri, &bounds);
7418		DBG(("%s: bounds (%d, %d), (%d, %d)\n",
7419		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
7420
7421		if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2)
7422			return;
7423
7424		if (!sna_compute_composite_extents(&bounds,
7425						   src, NULL, dst,
7426						   xSrc, ySrc,
7427						   0, 0,
7428						   bounds.x1, bounds.y1,
7429						   bounds.x2 - bounds.x1,
7430						   bounds.y2 - bounds.y1))
7431			return;
7432
7433		DBG(("%s: extents (%d, %d), (%d, %d)\n",
7434		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
7435
7436		width  = bounds.x2 - bounds.x1;
7437		height = bounds.y2 - bounds.y1;
7438		bounds.x1 -= dst->pDrawable->x;
7439		bounds.y1 -= dst->pDrawable->y;
7440		depth = maskFormat->depth;
7441		format = maskFormat->format | (BitsPerPixel(depth) << 24);
7442
7443		DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
7444		     __FUNCTION__, width, height, depth, format));
7445		scratch = sna_pixmap_create_upload(screen,
7446						   width, height, depth,
7447						   KGEM_BUFFER_WRITE);
7448		if (!scratch)
7449			return;
7450
7451		memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
7452		image = pixman_image_create_bits(format, width, height,
7453						 scratch->devPrivate.ptr,
7454						 scratch->devKind);
7455		if (image) {
7456			pixman_add_triangles(image,
7457					     -bounds.x1, -bounds.y1,
7458					     n, (pixman_triangle_t *)tri);
7459			pixman_image_unref(image);
7460		}
7461
7462		mask = CreatePicture(0, &scratch->drawable,
7463				     PictureMatchFormat(screen, depth, format),
7464				     0, 0, serverClient, &error);
7465		if (mask) {
7466			CompositePicture(op, src, mask, dst,
7467					 xSrc + bounds.x1 - dst_x,
7468					 ySrc + bounds.y1 - dst_y,
7469					 0, 0,
7470					 bounds.x1, bounds.y1,
7471					 width, height);
7472			FreePicture(mask, 0);
7473		}
7474		sna_pixmap_destroy(scratch);
7475	} else {
7476		if (dst->polyEdge == PolyEdgeSharp)
7477			maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
7478		else
7479			maskFormat = PictureMatchFormat(screen, 8, PICT_a8);
7480
7481		for (; n--; tri++)
7482			triangles_fallback(op,
7483					   src, dst, maskFormat,
7484					   xSrc, ySrc, 1, tri);
7485	}
7486}
7487
7488void
7489sna_composite_triangles(CARD8 op,
7490			 PicturePtr src,
7491			 PicturePtr dst,
7492			 PictFormatPtr maskFormat,
7493			 INT16 xSrc, INT16 ySrc,
7494			 int n, xTriangle *tri)
7495{
7496	struct sna *sna = to_sna_from_drawable(dst->pDrawable);
7497
7498	if (triangles_span_converter(sna, op, src, dst, maskFormat,
7499				     xSrc, ySrc,
7500				     n, tri))
7501		return;
7502
7503	if (triangles_mask_converter(op, src, dst, maskFormat,
7504				     xSrc, ySrc,
7505				     n, tri))
7506		return;
7507
7508	triangles_fallback(op, src, dst, maskFormat, xSrc, ySrc, n, tri);
7509}
7510
7511static bool
7512tristrip_span_converter(struct sna *sna,
7513			CARD8 op, PicturePtr src, PicturePtr dst,
7514			PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
7515			int count, xPointFixed *points)
7516{
7517	struct sna_composite_spans_op tmp;
7518	struct tor tor;
7519	BoxRec extents;
7520	pixman_region16_t clip;
7521	xPointFixed p[4];
7522	int16_t dst_x, dst_y;
7523	int dx, dy;
7524	int cw, ccw, n;
7525	bool was_clear;
7526
7527	if (NO_SCAN_CONVERTER)
7528		return false;
7529
7530	/* XXX strict adherence to the Render specification */
7531	if (dst->polyMode == PolyModePrecise && !is_mono(dst, maskFormat)) {
7532		DBG(("%s: fallback -- precise rasterisation requested\n",
7533		     __FUNCTION__));
7534		return false;
7535	}
7536
7537	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
7538		DBG(("%s: fallback -- composite spans not supported\n",
7539		     __FUNCTION__));
7540		return false;
7541	}
7542
7543	dst_x = pixman_fixed_to_int(points[0].x);
7544	dst_y = pixman_fixed_to_int(points[0].y);
7545
7546	miPointFixedBounds(count, points, &extents);
7547	DBG(("%s: extents (%d, %d), (%d, %d)\n",
7548	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
7549
7550	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
7551		return true;
7552
7553#if 0
7554	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
7555		DBG(("%s: fallback -- traps extents too small %dx%d\n",
7556		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
7557		return false;
7558	}
7559#endif
7560
7561	if (!sna_compute_composite_region(&clip,
7562					  src, NULL, dst,
7563					  src_x + extents.x1 - dst_x,
7564					  src_y + extents.y1 - dst_y,
7565					  0, 0,
7566					  extents.x1, extents.y1,
7567					  extents.x2 - extents.x1,
7568					  extents.y2 - extents.y1)) {
7569		DBG(("%s: triangles do not intersect drawable clips\n",
7570		     __FUNCTION__)) ;
7571		return true;
7572	}
7573
7574	if (!sna->render.check_composite_spans(sna, op, src, dst,
7575					       clip.extents.x2 - clip.extents.x1,
7576					       clip.extents.y2 - clip.extents.y1,
7577					       0)) {
7578		DBG(("%s: fallback -- composite spans not supported\n",
7579		     __FUNCTION__));
7580		return false;
7581	}
7582
7583	extents = *RegionExtents(&clip);
7584	dx = dst->pDrawable->x;
7585	dy = dst->pDrawable->y;
7586
7587	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
7588	     __FUNCTION__,
7589	     extents.x1, extents.y1,
7590	     extents.x2, extents.y2,
7591	     dx, dy,
7592	     src_x + extents.x1 - dst_x - dx,
7593	     src_y + extents.y1 - dst_y - dy));
7594
7595	was_clear = sna_drawable_is_clear(dst->pDrawable);
7596
7597	memset(&tmp, 0, sizeof(tmp));
7598	if (!sna->render.composite_spans(sna, op, src, dst,
7599					 src_x + extents.x1 - dst_x - dx,
7600					 src_y + extents.y1 - dst_y - dy,
7601					 extents.x1,  extents.y1,
7602					 extents.x2 - extents.x1,
7603					 extents.y2 - extents.y1,
7604					 0,
7605					 &tmp)) {
7606		DBG(("%s: fallback -- composite spans render op not supported\n",
7607		     __FUNCTION__));
7608		return false;
7609	}
7610
7611	dx *= FAST_SAMPLES_X;
7612	dy *= FAST_SAMPLES_Y;
7613	if (!tor_init(&tor, &extents, 2*count))
7614		goto skip;
7615
7616	cw = ccw = 0;
7617	project_point_onto_grid(&points[0], dx, dy, &p[cw]);
7618	project_point_onto_grid(&points[1], dx, dy, &p[2+ccw]);
7619	polygon_add_line(tor.polygon, &p[cw], &p[2+ccw]);
7620	n = 2;
7621	do {
7622		cw = !cw;
7623		project_point_onto_grid(&points[n], dx, dy, &p[cw]);
7624		polygon_add_line(tor.polygon, &p[!cw], &p[cw]);
7625		if (++n == count)
7626			break;
7627
7628		ccw = !ccw;
7629		project_point_onto_grid(&points[n], dx, dy, &p[2+ccw]);
7630		polygon_add_line(tor.polygon, &p[2+ccw], &p[2+!ccw]);
7631		if (++n == count)
7632			break;
7633	} while (1);
7634	polygon_add_line(tor.polygon, &p[2+ccw], &p[cw]);
7635	assert(tor.polygon->num_edges <= 2*count);
7636
7637	tor_render(sna, &tor, &tmp, &clip,
7638		   choose_span(&tmp, dst, maskFormat, &clip),
7639		   !was_clear && maskFormat && !operator_is_bounded(op));
7640
7641	tor_fini(&tor);
7642skip:
7643	tmp.done(sna, &tmp);
7644
7645	REGION_UNINIT(NULL, &clip);
7646	return true;
7647}
7648
7649static void
7650tristrip_fallback(CARD8 op,
7651		  PicturePtr src,
7652		  PicturePtr dst,
7653		  PictFormatPtr maskFormat,
7654		  INT16 xSrc, INT16 ySrc,
7655		  int n, xPointFixed *points)
7656{
7657	ScreenPtr screen = dst->pDrawable->pScreen;
7658
7659	if (maskFormat) {
7660		PixmapPtr scratch;
7661		PicturePtr mask;
7662		INT16 dst_x, dst_y;
7663		BoxRec bounds;
7664		int width, height, depth;
7665		pixman_image_t *image;
7666		pixman_format_code_t format;
7667		int error;
7668
7669		dst_x = pixman_fixed_to_int(points->x);
7670		dst_y = pixman_fixed_to_int(points->y);
7671
7672		miPointFixedBounds(n, points, &bounds);
7673		DBG(("%s: bounds (%d, %d), (%d, %d)\n",
7674		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
7675
7676		if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2)
7677			return;
7678
7679		if (!sna_compute_composite_extents(&bounds,
7680						   src, NULL, dst,
7681						   xSrc, ySrc,
7682						   0, 0,
7683						   bounds.x1, bounds.y1,
7684						   bounds.x2 - bounds.x1,
7685						   bounds.y2 - bounds.y1))
7686			return;
7687
7688		DBG(("%s: extents (%d, %d), (%d, %d)\n",
7689		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
7690
7691		width  = bounds.x2 - bounds.x1;
7692		height = bounds.y2 - bounds.y1;
7693		bounds.x1 -= dst->pDrawable->x;
7694		bounds.y1 -= dst->pDrawable->y;
7695		depth = maskFormat->depth;
7696		format = maskFormat->format | (BitsPerPixel(depth) << 24);
7697
7698		DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
7699		     __FUNCTION__, width, height, depth, format));
7700		scratch = sna_pixmap_create_upload(screen,
7701						   width, height, depth,
7702						   KGEM_BUFFER_WRITE);
7703		if (!scratch)
7704			return;
7705
7706		memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
7707		image = pixman_image_create_bits(format, width, height,
7708						 scratch->devPrivate.ptr,
7709						 scratch->devKind);
7710		if (image) {
7711			xTriangle tri;
7712			xPointFixed *p[3] = { &tri.p1, &tri.p2, &tri.p3 };
7713			int i;
7714
7715			*p[0] = points[0];
7716			*p[1] = points[1];
7717			*p[2] = points[2];
7718			pixman_add_triangles(image,
7719					     -bounds.x1, -bounds.y1,
7720					     1, (pixman_triangle_t *)&tri);
7721			for (i = 3; i < n; i++) {
7722				*p[i%3] = points[i];
7723				pixman_add_triangles(image,
7724						     -bounds.x1, -bounds.y1,
7725						     1, (pixman_triangle_t *)&tri);
7726			}
7727			pixman_image_unref(image);
7728		}
7729
7730		mask = CreatePicture(0, &scratch->drawable,
7731				     PictureMatchFormat(screen, depth, format),
7732				     0, 0, serverClient, &error);
7733		if (mask) {
7734			CompositePicture(op, src, mask, dst,
7735					 xSrc + bounds.x1 - dst_x,
7736					 ySrc + bounds.y1 - dst_y,
7737					 0, 0,
7738					 bounds.x1, bounds.y1,
7739					 width, height);
7740			FreePicture(mask, 0);
7741		}
7742		sna_pixmap_destroy(scratch);
7743	} else {
7744		xTriangle tri;
7745		xPointFixed *p[3] = { &tri.p1, &tri.p2, &tri.p3 };
7746		int i;
7747
7748		if (dst->polyEdge == PolyEdgeSharp)
7749			maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
7750		else
7751			maskFormat = PictureMatchFormat(screen, 8, PICT_a8);
7752
7753		*p[0] = points[0];
7754		*p[1] = points[1];
7755		*p[2] = points[2];
7756		triangles_fallback(op,
7757				   src, dst, maskFormat,
7758				   xSrc, ySrc, 1, &tri);
7759		for (i = 3; i < n; i++) {
7760			*p[i%3] = points[i];
7761			/* Should xSrc,ySrc be updated? */
7762			triangles_fallback(op,
7763					   src, dst, maskFormat,
7764					   xSrc, ySrc, 1, &tri);
7765		}
7766	}
7767}
7768
7769void
7770sna_composite_tristrip(CARD8 op,
7771		       PicturePtr src,
7772		       PicturePtr dst,
7773		       PictFormatPtr maskFormat,
7774		       INT16 xSrc, INT16 ySrc,
7775		       int n, xPointFixed *points)
7776{
7777	struct sna *sna = to_sna_from_drawable(dst->pDrawable);
7778
7779	if (tristrip_span_converter(sna, op, src, dst, maskFormat, xSrc, ySrc, n, points))
7780		return;
7781
7782	tristrip_fallback(op, src, dst, maskFormat, xSrc, ySrc, n, points);
7783}
7784
7785static void
7786trifan_fallback(CARD8 op,
7787		PicturePtr src,
7788		PicturePtr dst,
7789		PictFormatPtr maskFormat,
7790		INT16 xSrc, INT16 ySrc,
7791		int n, xPointFixed *points)
7792{
7793	ScreenPtr screen = dst->pDrawable->pScreen;
7794
7795	if (maskFormat) {
7796		PixmapPtr scratch;
7797		PicturePtr mask;
7798		INT16 dst_x, dst_y;
7799		BoxRec bounds;
7800		int width, height, depth;
7801		pixman_image_t *image;
7802		pixman_format_code_t format;
7803		int error;
7804
7805		dst_x = pixman_fixed_to_int(points->x);
7806		dst_y = pixman_fixed_to_int(points->y);
7807
7808		miPointFixedBounds(n, points, &bounds);
7809		DBG(("%s: bounds (%d, %d), (%d, %d)\n",
7810		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
7811
7812		if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2)
7813			return;
7814
7815		if (!sna_compute_composite_extents(&bounds,
7816						   src, NULL, dst,
7817						   xSrc, ySrc,
7818						   0, 0,
7819						   bounds.x1, bounds.y1,
7820						   bounds.x2 - bounds.x1,
7821						   bounds.y2 - bounds.y1))
7822			return;
7823
7824		DBG(("%s: extents (%d, %d), (%d, %d)\n",
7825		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
7826
7827		width  = bounds.x2 - bounds.x1;
7828		height = bounds.y2 - bounds.y1;
7829		bounds.x1 -= dst->pDrawable->x;
7830		bounds.y1 -= dst->pDrawable->y;
7831		depth = maskFormat->depth;
7832		format = maskFormat->format | (BitsPerPixel(depth) << 24);
7833
7834		DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
7835		     __FUNCTION__, width, height, depth, format));
7836		scratch = sna_pixmap_create_upload(screen,
7837						   width, height, depth,
7838						   KGEM_BUFFER_WRITE);
7839		if (!scratch)
7840			return;
7841
7842		memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
7843		image = pixman_image_create_bits(format, width, height,
7844						 scratch->devPrivate.ptr,
7845						 scratch->devKind);
7846		if (image) {
7847			xTriangle tri;
7848			xPointFixed *p[3] = { &tri.p1, &tri.p2, &tri.p3 };
7849			int i;
7850
7851			*p[0] = points[0];
7852			*p[1] = points[1];
7853			*p[2] = points[2];
7854			pixman_add_triangles(image,
7855					     -bounds.x1, -bounds.y1,
7856					     1, (pixman_triangle_t *)&tri);
7857			for (i = 3; i < n; i++) {
7858				*p[2 - (i&1)] = points[i];
7859				pixman_add_triangles(image,
7860						     -bounds.x1, -bounds.y1,
7861						     1, (pixman_triangle_t *)&tri);
7862			}
7863			pixman_image_unref(image);
7864		}
7865
7866		mask = CreatePicture(0, &scratch->drawable,
7867				     PictureMatchFormat(screen, depth, format),
7868				     0, 0, serverClient, &error);
7869		if (mask) {
7870			CompositePicture(op, src, mask, dst,
7871					 xSrc + bounds.x1 - dst_x,
7872					 ySrc + bounds.y1 - dst_y,
7873					 0, 0,
7874					 bounds.x1, bounds.y1,
7875					 width, height);
7876			FreePicture(mask, 0);
7877		}
7878		sna_pixmap_destroy(scratch);
7879	} else {
7880		xTriangle tri;
7881		xPointFixed *p[3] = { &tri.p1, &tri.p2, &tri.p3 };
7882		int i;
7883
7884		if (dst->polyEdge == PolyEdgeSharp)
7885			maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
7886		else
7887			maskFormat = PictureMatchFormat(screen, 8, PICT_a8);
7888
7889		*p[0] = points[0];
7890		*p[1] = points[1];
7891		*p[2] = points[2];
7892		triangles_fallback(op,
7893				   src, dst, maskFormat,
7894				   xSrc, ySrc, 1, &tri);
7895		for (i = 3; i < n; i++) {
7896			*p[2 - (i&1)] = points[i];
7897			/* Should xSrc,ySrc be updated? */
7898			triangles_fallback(op,
7899					   src, dst, maskFormat,
7900					   xSrc, ySrc, 1, &tri);
7901		}
7902	}
7903}
7904
7905void
7906sna_composite_trifan(CARD8 op,
7907		     PicturePtr src,
7908		     PicturePtr dst,
7909		     PictFormatPtr maskFormat,
7910		     INT16 xSrc, INT16 ySrc,
7911		     int n, xPointFixed *points)
7912{
7913	trifan_fallback(op, src, dst, maskFormat, xSrc, ySrc, n, points);
7914}
7915#endif
7916