1/*
2 * Copyright (c) 2007-2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 *
22 * Neither the name of the Advanced Micro Devices, Inc. nor the names of its
23 * contributors may be used to endorse or promote products derived from this
24 * software without specific prior written permission.
25 */
26
27/* TODO:
28   Support a8 as a source or destination?
29   convert !a8 or !a4 masks?
30   support multiple pass operations?
31*/
32
33/* To support PictOptAdd with a mask */
34
35#ifdef HAVE_CONFIG_H
36#include "config.h"
37#endif
38
39#include "xorg-server.h"
40
41#include "xf86.h"
42#include "exa.h"
43
44#include "geode.h"
45#include "cim_defs.h"
46#include "cim_regs.h"
47
48#include "geode_blend.h"
49
50#define F(x)    IntToxFixed(x)
51#define I(x)    xFixedToInt(x)
52
53#define GEODE_TRACE_FALL 0
54
55#if GEODE_TRACE_FALL
56#define GEODE_FALLBACK(x)               \
57do {                                    \
58	ErrorF("%s: ", __FUNCTION__);   \
59	ErrorF x;                       \
60	return FALSE;                   \
61} while (0)
62#else
63#define GEODE_FALLBACK(x) return FALSE
64#endif
65
66static const struct exa_format_t {
67    int exa;
68    int bpp;
69    int fmt;
70    int alphabits;
71} lx_exa_formats[] = {
72    {
73    PICT_a8r8g8b8, 32, CIMGP_SOURCE_FMT_8_8_8_8, 8}, {
74    PICT_x8r8g8b8, 32, CIMGP_SOURCE_FMT_8_8_8_8, 0}, {
75    PICT_x8b8g8r8, 32, CIMGP_SOURCE_FMT_32BPP_BGR, 0}, {
76    PICT_a4r4g4b4, 16, CIMGP_SOURCE_FMT_4_4_4_4, 4}, {
77    PICT_a1r5g5b5, 16, CIMGP_SOURCE_FMT_1_5_5_5, 1}, {
78    PICT_r5g6b5, 16, CIMGP_SOURCE_FMT_0_5_6_5, 0}, {
79    PICT_b5g6r5, 16, CIMGP_SOURCE_FMT_16BPP_BGR, 0}, {
80    PICT_x1r5g5b5, 16, CIMGP_SOURCE_FMT_1_5_5_5, 0}, {
81    PICT_x1b5g5r5, 16, CIMGP_SOURCE_FMT_15BPP_BGR, 0}, {
82    PICT_r3g3b2, 8, CIMGP_SOURCE_FMT_3_3_2, 0}, {
83    PICT_a8, 32, CIMGP_SOURCE_FMT_8_8_8_8, 8}
84};
85
86/* This is a chunk of memory we use for scratch space */
87
88#define COMP_TYPE_MASK 0
89#define COMP_TYPE_ONEPASS 1
90#define COMP_TYPE_TWOPASS 3
91#define COMP_TYPE_ROTATE  5
92
93static struct {
94    int type;
95
96    unsigned int srcOffset;
97    unsigned int srcPitch;
98    unsigned int srcBpp;
99    unsigned int srcWidth, srcHeight;
100
101    unsigned int srcColor;
102    int op;
103    int repeat;
104    int maskrepeat;
105    unsigned int fourBpp;
106    unsigned int bufferOffset;
107    struct exa_format_t *srcFormat;
108    struct exa_format_t *dstFormat;
109
110    int rotate;
111    PictTransform *transform;
112
113} exaScratch;
114
115static const int SDfn[16] = {
116    0x00, 0x88, 0x44, 0xCC, 0x22, 0xAA, 0x66, 0xEE,
117    0x11, 0x99, 0x55, 0xDD, 0x33, 0xBB, 0x77, 0xFF
118};
119
120static const int SDfn_PM[16] = {
121    0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
122    0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA
123};
124
125/* These functions check to see if we can safely prefetch the memory
126 * for the blt, or if we have to wait the previous blt to complete.
127 * One function is for the fill, and the other is for the copy because
128 * they have different requirements based on ROP
129 */
130
131static int lx0 = -1, ly0 = -1, lx1 = -1, ly1 = -1;
132
133static int
134lx_fill_flags(int x0, int y0, int w, int h, int rop)
135{
136    int x1 = x0 + w, y1 = y0 + h;
137    int n = ((rop ^ (rop >> 1)) & 0x55) == 0 || /* no dst */
138        x0 >= lx1 || y0 >= ly1 ||       /* rght/below */
139        x1 <= lx0 || y1 <= ly0 ?        /* left/above */
140        0 : CIMGP_BLTFLAGS_HAZARD;
141
142    lx0 = x0;
143    ly0 = y0;
144    lx1 = x1;
145    ly1 = y1;
146
147    return n;
148}
149
150static int
151lx_copy_flags(int x0, int y0, int x1, int y1, int w, int h, int rop)
152{
153    int x2 = x1 + w, y2 = y1 + h;
154
155    /* dst not hazzard and src not hazzard */
156    int n = (((rop ^ (rop >> 1)) & 0x55) == 0 ||
157             x1 >= lx1 || y1 >= ly1 ||
158             x2 <= lx0 || y2 <= ly0) &&
159        (((rop ^ (rop >> 2)) & 0x33) == 0 ||
160         x0 >= lx1 || y0 >= ly1 ||
161         x0 + w <= lx0 || y0 + h <= ly0) ? 0 : CIMGP_BLTFLAGS_HAZARD;
162
163    lx0 = x1;
164    ly0 = y1;
165    lx1 = x2;
166    ly1 = y2;
167
168    return n;
169}
170
171/* These are borrowed from the exa engine - they should be made global
172   and available to drivers, but until then....
173*/
174
175/* exaGetPixelFromRGBA (exa_render.c) */
176
177static Bool
178_GetPixelFromRGBA(CARD32 *pixel,
179                  CARD16 red, CARD16 green, CARD16 blue, CARD16 alpha,
180                  CARD32 format)
181{
182    int rbits, bbits, gbits, abits;
183    int rshift, bshift, gshift, ashift;
184
185    *pixel = 0;
186
187    if (!PICT_FORMAT_COLOR(format))
188        return FALSE;
189
190    rbits = PICT_FORMAT_R(format);
191    gbits = PICT_FORMAT_G(format);
192    bbits = PICT_FORMAT_B(format);
193    abits = PICT_FORMAT_A(format);
194
195    if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
196        bshift = 0;
197        gshift = bbits;
198        rshift = gshift + gbits;
199        ashift = rshift + rbits;
200    }
201    else {                      /* PICT_TYPE_ABGR */
202        rshift = 0;
203        gshift = rbits;
204        bshift = gshift + gbits;
205        ashift = bshift + bbits;
206    }
207
208    *pixel |= (blue >> (16 - bbits)) << bshift;
209    *pixel |= (red >> (16 - rbits)) << rshift;
210    *pixel |= (green >> (16 - gbits)) << gshift;
211    *pixel |= (alpha >> (16 - abits)) << ashift;
212
213    return TRUE;
214}
215
216/* exaGetRGBAFromPixel (exa_render.c) */
217
218static Bool
219_GetRGBAFromPixel(CARD32 pixel,
220                  CARD16 *red,
221                  CARD16 *green, CARD16 *blue, CARD16 *alpha, CARD32 format)
222{
223    int rbits, bbits, gbits, abits;
224    int rshift, bshift, gshift, ashift;
225
226    if (!PICT_FORMAT_COLOR(format))
227        return FALSE;
228
229    rbits = PICT_FORMAT_R(format);
230    gbits = PICT_FORMAT_G(format);
231    bbits = PICT_FORMAT_B(format);
232    abits = PICT_FORMAT_A(format);
233
234    if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
235        bshift = 0;
236        gshift = bbits;
237        rshift = gshift + gbits;
238        ashift = rshift + rbits;
239    }
240    else {                      /* PICT_TYPE_ABGR */
241        rshift = 0;
242        gshift = rbits;
243        bshift = gshift + gbits;
244        ashift = bshift + bbits;
245    }
246
247    *red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits);
248    while (rbits < 16) {
249        *red |= *red >> rbits;
250        rbits <<= 1;
251    }
252
253    *green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits);
254    while (gbits < 16) {
255        *green |= *green >> gbits;
256        gbits <<= 1;
257    }
258
259    *blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits);
260    while (bbits < 16) {
261        *blue |= *blue >> bbits;
262        bbits <<= 1;
263    }
264
265    if (abits) {
266        *alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits);
267        while (abits < 16) {
268            *alpha |= *alpha >> abits;
269            abits <<= 1;
270        }
271    }
272    else
273        *alpha = 0xffff;
274
275    return TRUE;
276}
277
278static unsigned int
279lx_get_source_color(PixmapPtr pSrc, int srcFormat, int dstFormat)
280{
281    CARD32 in, out;
282    CARD16 red = 0, green = 0, blue = 0, alpha = 0;
283
284    /* Stall to avoid a race with the upload function */
285    /* for 1.4 and newer, the problem will be resolved within
286     * exaGetPixmapFirstPixel, so this should be adjusted so
287     * the stall isn't run needlessly
288     */
289    /* FIXME: xserver-1.4 with a supposed fix for this is really old, so kill the stall? */
290
291    gp_wait_until_idle();
292    in = exaGetPixmapFirstPixel(pSrc);
293
294    _GetRGBAFromPixel(in, &red, &blue, &green, &alpha, srcFormat);
295    _GetPixelFromRGBA(&out, red, blue, green, alpha, dstFormat);
296
297    return out;
298}
299
300static Bool
301lx_prepare_solid(PixmapPtr pxMap, int alu, Pixel planemask, Pixel fg)
302{
303    int pitch = exaGetPixmapPitch(pxMap);
304    int op = (planemask == ~0U) ? SDfn[alu] : SDfn_PM[alu];
305
306    gp_declare_blt(0);
307    gp_set_bpp(pxMap->drawable.bitsPerPixel);
308
309    gp_set_raster_operation(op);
310
311    if (planemask != ~0U)
312        gp_set_solid_pattern(planemask);
313
314    exaScratch.op = op;
315
316    gp_set_solid_source(fg);
317
318    gp_set_strides(pitch, pitch);
319    gp_write_parameters();
320    return TRUE;
321}
322
323static void
324lx_do_solid(PixmapPtr pxMap, int x1, int y1, int x2, int y2)
325{
326    int bpp = (pxMap->drawable.bitsPerPixel + 7) / 8;
327    int pitch = exaGetPixmapPitch(pxMap);
328    unsigned int offset = exaGetPixmapOffset(pxMap) + (pitch * y1) + (bpp * x1);
329
330    gp_declare_blt(lx_fill_flags(x1, y1, x2 - x1, y2 - y1, exaScratch.op));
331    gp_pattern_fill(offset, x2 - x1, y2 - y1);
332}
333
334static Bool
335lx_prepare_copy(PixmapPtr pxSrc, PixmapPtr pxDst, int dx, int dy,
336                int alu, Pixel planemask)
337{
338    int dpitch = exaGetPixmapPitch(pxDst);
339    int op = (planemask == ~0U) ? SDfn[alu] : SDfn_PM[alu];
340
341    gp_declare_blt(0);
342    gp_set_bpp(pxDst->drawable.bitsPerPixel);
343
344    gp_set_raster_operation(op);
345
346    if (planemask != ~0U)
347        gp_set_solid_pattern(planemask);
348
349    exaScratch.srcOffset = exaGetPixmapOffset(pxSrc);
350    exaScratch.srcPitch = exaGetPixmapPitch(pxSrc);
351    exaScratch.srcBpp = (pxSrc->drawable.bitsPerPixel + 7) / 8;
352
353    exaScratch.op = op;
354
355    gp_set_strides(dpitch, exaScratch.srcPitch);
356    gp_write_parameters();
357    return TRUE;
358}
359
360static void
361lx_do_copy(PixmapPtr pxDst, int srcX, int srcY,
362           int dstX, int dstY, int w, int h)
363{
364    int dstBpp = (pxDst->drawable.bitsPerPixel + 7) / 8;
365    int dstPitch = exaGetPixmapPitch(pxDst);
366    unsigned int srcOffset, dstOffset;
367    int flags = 0;
368
369    gp_declare_blt(lx_copy_flags(srcX, srcY, dstX, dstY, w, h, exaScratch.op));
370
371    srcOffset = exaScratch.srcOffset + (exaScratch.srcPitch * srcY) +
372        (exaScratch.srcBpp) * srcX;
373
374    dstOffset = exaGetPixmapOffset(pxDst) + (dstPitch * dstY) + (dstBpp * dstX);
375
376    if (dstX > srcX)
377        flags |= CIMGP_NEGXDIR;
378
379    if (dstY > srcY)
380        flags |= CIMGP_NEGYDIR;
381
382    gp_screen_to_screen_blt(dstOffset, srcOffset, w, h, flags);
383}
384
385/* Composite operations
386
387These are the simplest - one pass operations - if there is no format or
388mask, the we can make these happen pretty fast
389
390                       Operation  Type  Channel   Alpha
391PictOpClear            0          2     0         3
392PictOpSrc              0          3     0         3
393PictOpDst              0          3     1         3
394PictOpOver             2          0     0         3
395PictOpOverReverse      2          0     1         3
396PictOpIn               0          1     0         3
397PictOpInReverse        0          1     1         3
398PictOpOut              1          0     0         3
399PictOpOutReverse       1          0     1         3
400PictOpAdd              2          2     0         3
401
402The following require multiple passes
403PictOpAtop
404PictOpXor
405*/
406
407struct blend_ops_t {
408    int operation;
409    int type;
410    int channel;
411} lx_alpha_ops[] = {
412    /* PictOpClear */
413    {
414    CIMGP_ALPHA_TIMES_A, CIMGP_CONSTANT_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
415    },
416        /* PictOpSrc */
417    {
418    CIMGP_ALPHA_TIMES_A, CIMGP_ALPHA_EQUALS_ONE, CIMGP_CHANNEL_A_SOURCE}, {
419    },
420        /* PictOpDst */
421    {
422    CIMGP_ALPHA_TIMES_A, CIMGP_ALPHA_EQUALS_ONE, CIMGP_CHANNEL_A_DEST}, {
423    },
424        /* PictOpOver */
425    {
426    CIMGP_A_PLUS_BETA_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
427    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
428        /* PictOpOverReverse */
429    {
430    CIMGP_A_PLUS_BETA_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_DEST}, {
431    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
432        /* PictOpIn */
433    {
434    CIMGP_ALPHA_TIMES_A, CIMGP_CHANNEL_B_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
435    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
436        /* PictOpInReverse */
437    {
438    CIMGP_ALPHA_TIMES_A, CIMGP_CHANNEL_B_ALPHA, CIMGP_CHANNEL_A_DEST}, {
439    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
440        /* PictOpOut */
441    {
442    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_DEST}, {
443    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
444        /* PictOpOutReverse */
445    {
446    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
447    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
448        /* SrcAtop */
449    {
450    CIMGP_ALPHA_TIMES_A, CIMGP_CHANNEL_B_ALPHA, CIMGP_CHANNEL_A_DEST}, {
451    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_SOURCE},
452        /* SrcAtopReverse */
453    {
454    CIMGP_ALPHA_TIMES_A, CIMGP_CHANNEL_B_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
455    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_DEST},
456        /* Xor */
457    {
458    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
459    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_SOURCE},
460        /* PictOpAdd */
461    {
462    CIMGP_A_PLUS_BETA_B, CIMGP_CONSTANT_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
463    }
464};
465
466#ifndef ARRAY_SIZE
467#define ARRAY_SIZE(a) (sizeof((a)) / (sizeof(*(a))))
468#endif
469
470static const struct exa_format_t *
471lx_get_format(PicturePtr p)
472{
473    int i;
474    unsigned int format = p->format;
475
476    for (i = 0; i < ARRAY_SIZE(lx_exa_formats); i++)
477        if (lx_exa_formats[i].exa == format)
478            return (&lx_exa_formats[i]);
479
480    return NULL;
481}
482
483static Bool
484lx_process_transform(PicturePtr pSrc)
485{
486    PictTransformPtr t = pSrc->transform;
487    xFixed c0 = t->matrix[0][0];
488    xFixed s0 = t->matrix[0][1];
489    xFixed s1 = t->matrix[1][0];
490    xFixed c1 = t->matrix[1][1];
491
492    /* If the transform doesn't have any rotation
493     * or scaling components, then just grab the
494     * translate coordinates */
495
496    if (t->matrix[0][0] == 0 &&
497        t->matrix[0][1] == 0 && t->matrix[1][0] == 0 && t->matrix[1][1] == 0) {
498        exaScratch.transform = pSrc->transform;
499        return TRUE;
500    }
501
502    /* Otherwise, see if this is a simple
503     * rotate transform - if it isn't, then
504     * we have to punt back to software */
505
506    if (t->matrix[2][2] != F(1))
507        return FALSE;
508
509    /* The rotate matrix looks like this:
510     * [ cos X   -sin x
511     * sin X   cos X ]
512     *
513     * Where X is the angle.  We do a simple
514     * check first - if [0,0] != [1,1], then
515     * scaling was specified too, and we can
516     * bail, and if [0,1] != -[1,1] then this
517     * isn't scaling that we can handle.
518     */
519
520    if ((c0 != c1) || (s0 != -s1))
521        return FALSE;
522
523    /* Now, figure out what angle we want - we
524     * can only accelerate right angle rotations,
525     * so this turns into an easy set of if statements */
526
527    if (c0 == F(1) && s1 == F(0))
528        exaScratch.rotate = RR_Rotate_0;
529    else if (c0 == F(0) && s1 == F(1))
530        exaScratch.rotate = RR_Rotate_90;
531    else if (c0 == F(-1) && s1 == F(0))
532        exaScratch.rotate = RR_Rotate_180;
533    else if (c0 == F(0) && s1 == F(-1))
534        exaScratch.rotate = RR_Rotate_270;
535    else
536        return FALSE;
537
538    exaScratch.transform = pSrc->transform;
539
540    return TRUE;
541}
542
543static Bool
544lx_check_composite(int op, PicturePtr pSrc, PicturePtr pMsk, PicturePtr pDst)
545{
546    GeodeRec *pGeode = GEODEPTR_FROM_PICTURE(pDst);
547    const struct exa_format_t *srcFmt, *dstFmt;
548
549    if (op > PictOpAdd)
550        GEODE_FALLBACK(("Operation %d is not supported\n", op));
551
552    /* XXX - don't know if we can do any hwaccel on solid fills or gradient types in generic cases */
553    if (pMsk && pMsk->pSourcePict)
554        GEODE_FALLBACK(("%s are not supported as a mask\n",
555                        pMsk->pSourcePict->type ==
556                        SourcePictTypeSolidFill ? "Solid pictures" :
557                        "Gradients"));
558
559    if (pSrc->pSourcePict && pSrc->pSourcePict->type != SourcePictTypeSolidFill)
560        GEODE_FALLBACK(("Gradients are not supported as the source\n"));
561
562    if (pMsk && op == PictOpAdd)
563        GEODE_FALLBACK(("PictOpAdd with mask is not supported\n"));
564
565    /* FIXME: Meet this conditions from the debug for PictOpAdd.
566     * Any Other possibilities? Add a judge for the future supplement */
567    if (op == PictOpAdd && pSrc->format == PICT_a8r8g8b8 &&
568        pDst->format == PICT_a8)
569        return TRUE;
570
571    if (op == PictOpAdd && pSrc->format == PICT_x8r8g8b8 &&
572        pDst->format == PICT_a8)
573        return TRUE;
574
575    if (op == PictOpAdd && pSrc->format == PICT_r5g6b5 &&
576        pDst->format == PICT_a8)
577        return TRUE;
578
579    if (usesPasses(op)) {
580        /* FIXME: Slightly misleading fallback msg when !pMsk */
581        if (pGeode->exaBfrOffset == 0 || !pMsk)
582            GEODE_FALLBACK(("Multipass operation requires off-screen buffer\n"));
583    }
584
585    /* Check that the filter matches what we support */
586
587    switch (pSrc->filter) {
588    case PictFilterNearest:
589    case PictFilterFast:
590    case PictFilterGood:
591    case PictFilterBest:
592        break;
593
594    default:
595        GEODE_FALLBACK(("Bilinear or convolution filters are not supported\n"));
596    }
597
598    if (pMsk && pMsk->transform)
599        GEODE_FALLBACK(("Mask transforms are not supported\n"));
600
601    /* Keep an eye out for source rotation transforms - those we can
602     * do something about */
603
604    exaScratch.rotate = RR_Rotate_0;
605    exaScratch.transform = NULL;
606
607    if (pSrc->transform && !lx_process_transform(pSrc))
608        GEODE_FALLBACK(("Transform operation is non-trivial\n"));
609
610    /* XXX - I don't understand PICT_a8 enough - so I'm punting */
611    if ((op != PictOpAdd) && (pSrc->format == PICT_a8 ||
612                              pDst->format == PICT_a8))
613        GEODE_FALLBACK(("PICT_a8 as src or dst format is unsupported\n"));
614
615    if (pMsk && op != PictOpClear) {
616        struct blend_ops_t *opPtr = &lx_alpha_ops[op * 2];
617        int direction = (opPtr->channel == CIMGP_CHANNEL_A_SOURCE) ? 0 : 1;
618
619        /* Direction 0 indicates src->dst, 1 indicates dst->src */
620        if (((direction == 0) &&
621             (pSrc->pDrawable && pSrc->pDrawable->bitsPerPixel < 16)) ||
622            ((direction == 1) && (pDst->pDrawable->bitsPerPixel < 16))) {
623            ErrorF("Mask blending unsupported with <16bpp\n");
624            return FALSE;
625        }
626        if (pMsk->format != PICT_a8 && pMsk->format != PICT_a4)
627            GEODE_FALLBACK(("Masks can be only done with a 8bpp or 4bpp depth\n"));
628
629        /* The pSrc should be 1x1 pixel if the pMsk is not zero */
630        if (pSrc->pDrawable &&
631            (pSrc->pDrawable->width != 1 || pSrc->pDrawable->height != 1))
632            GEODE_FALLBACK(("pSrc should be 1x1 pixel if pMsk is not zero\n"));
633        /* FIXME: In lx_prepare_composite, there are no variables to record the
634         * one pixel source's width and height when the mask is not zero.
635         * That will lead to bigger region to render instead of one pixel in lx
636         * _do_composite, so we should fallback currently to avoid this */
637        /* Not an issue for solid pictures, because we'll treat it as 1x1R too */
638        if (!pSrc->repeat &&
639            !(pSrc->pSourcePict &&
640              pSrc->pSourcePict->type == SourcePictTypeSolidFill)) {
641            GEODE_FALLBACK(("FIXME: unzero mask might lead to bigger rendering region than 1x1 pixels\n"));
642        }
643    }
644    else {
645        if (pSrc->pSourcePict)
646            GEODE_FALLBACK(("Solid source pictures without a mask are not supported\n"));
647    }
648
649    /* Get the formats for the source and destination */
650
651    if ((srcFmt = lx_get_format(pSrc)) == NULL)
652        GEODE_FALLBACK(("Unsupported source format %x\n", pSrc->format));
653
654    if ((dstFmt = lx_get_format(pDst)) == NULL)
655        GEODE_FALLBACK(("Unsupported destination format %x\n", pDst->format));
656
657    /* Make sure operations that need alpha bits have them */
658    /* If a mask is enabled, the alpha will come from there */
659
660    if (!pMsk && (!srcFmt->alphabits && usesSrcAlpha(op)))
661        GEODE_FALLBACK(("Operation requires src alpha, but alphabits is unset\n"));
662
663    if (!pMsk && (!dstFmt->alphabits && usesDstAlpha(op)))
664        GEODE_FALLBACK(("Operation requires dst alpha, but alphabits is unset\n"));
665
666    /* FIXME: See a way around this! */
667    if (srcFmt->alphabits == 0 && dstFmt->alphabits != 0)
668        GEODE_FALLBACK(("src_alphabits=0, dst_alphabits!=0\n"));
669
670    /* If this is a rotate operation, then make sure the src and dst
671     * formats are the same */
672    if (exaScratch.rotate != RR_Rotate_0 && srcFmt != dstFmt) {
673        ErrorF("EXA: Unable to rotate and convert formats at the same time\n");
674        return FALSE;
675    }
676    return TRUE;
677}
678
679static Bool
680lx_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMsk,
681                     PicturePtr pDst, PixmapPtr pxSrc, PixmapPtr pxMsk,
682                     PixmapPtr pxDst)
683{
684    GeodeRec *pGeode = GEODEPTR_FROM_PIXMAP(pxDst);
685    const struct exa_format_t *srcFmt, *dstFmt;
686
687    /* Get the formats for the source and destination */
688
689    srcFmt = lx_get_format(pSrc);
690    dstFmt = lx_get_format(pDst);
691
692    /* Set up the scratch buffer with the information we need */
693
694    exaScratch.srcFormat = (struct exa_format_t *) srcFmt;
695    exaScratch.dstFormat = (struct exa_format_t *) dstFmt;
696    exaScratch.op = op;
697    exaScratch.repeat = pSrc->repeat;
698    exaScratch.bufferOffset = pGeode->exaBfrOffset;
699
700    if (pMsk && op != PictOpClear) {
701        /* Get the source color */
702        if (pSrc->pSourcePict) {
703            exaScratch.srcColor = pSrc->pSourcePict->solidFill.color;
704        }
705        else {
706            /* If the op is PictOpOver(or PictOpOutReverse, PictOpInReverse,
707             * PictOpIn, PictOpOut, PictOpOverReverse), we should get the
708             * ARGB32 source format */
709
710            if ((op == PictOpOver || op == PictOpOutReverse || op ==
711                 PictOpInReverse || op == PictOpIn || op == PictOpOut ||
712                 op == PictOpOverReverse) && (srcFmt->alphabits != 0))
713                exaScratch.srcColor = exaGetPixmapFirstPixel(pxSrc);
714            else if ((op == PictOpOver || op == PictOpOutReverse || op ==
715                      PictOpInReverse || op == PictOpIn || op == PictOpOut ||
716                      op == PictOpOverReverse) && (srcFmt->alphabits == 0))
717                exaScratch.srcColor = lx_get_source_color(pxSrc, pSrc->format,
718                                                          PICT_a8r8g8b8);
719            else
720                exaScratch.srcColor = lx_get_source_color(pxSrc, pSrc->format,
721                                                          pDst->format);
722        }
723
724        /* Save off the info we need (reuse the source values to save space) */
725        exaScratch.type = COMP_TYPE_MASK;
726        exaScratch.maskrepeat = pMsk->repeat;
727
728        exaScratch.srcOffset = exaGetPixmapOffset(pxMsk);
729        exaScratch.srcPitch = exaGetPixmapPitch(pxMsk);
730        exaScratch.srcBpp = (pxMsk->drawable.bitsPerPixel + 7) / 8;
731
732        exaScratch.srcWidth = pMsk->pDrawable->width;
733        exaScratch.srcHeight = pMsk->pDrawable->height;
734
735        /* Flag to indicate if this a 8BPP or a 4BPP mask */
736        exaScratch.fourBpp = (pxMsk->drawable.bitsPerPixel == 4) ? 1 : 0;
737    }
738    else {
739        if (usesPasses(op))
740            exaScratch.type = COMP_TYPE_TWOPASS;
741        else if (exaScratch.rotate != RR_Rotate_0)
742            exaScratch.type = COMP_TYPE_ROTATE;
743        else
744            exaScratch.type = COMP_TYPE_ONEPASS;
745
746        exaScratch.srcOffset = exaGetPixmapOffset(pxSrc);
747        exaScratch.srcPitch = exaGetPixmapPitch(pxSrc);
748        exaScratch.srcBpp = (pxSrc->drawable.bitsPerPixel + 7) / 8;
749
750        exaScratch.srcWidth = pSrc->pDrawable->width;
751        exaScratch.srcHeight = pSrc->pDrawable->height;
752    }
753
754    return TRUE;
755}
756
757static int
758lx_get_bpp_from_format(int format)
759{
760
761    switch (format) {
762    case CIMGP_SOURCE_FMT_8_8_8_8:
763    case CIMGP_SOURCE_FMT_32BPP_BGR:
764        return 32;
765
766    case CIMGP_SOURCE_FMT_4_4_4_4:
767        return 12;
768
769    case CIMGP_SOURCE_FMT_0_5_6_5:
770    case CIMGP_SOURCE_FMT_16BPP_BGR:
771        return 16;
772
773    case CIMGP_SOURCE_FMT_1_5_5_5:
774    case CIMGP_SOURCE_FMT_15BPP_BGR:
775        return 15;
776
777    case CIMGP_SOURCE_FMT_3_3_2:
778        return 8;
779    }
780
781    return 0;
782}
783
784/* BGR needs to be set in the source for it to take - so adjust the source
785 * to enable BGR if the two formats are different, and disable it if they
786 * are the same
787 */
788
789static void
790lx_set_source_format(int srcFormat, int dstFormat)
791{
792    if (!(srcFormat & 0x10) && (dstFormat & 0x10))
793        gp_set_source_format(srcFormat | 0x10);
794    else if ((srcFormat & 0x10) && (dstFormat & 0x10))
795        gp_set_source_format(srcFormat & ~0x10);
796    else
797        gp_set_source_format(srcFormat);
798}
799
800/* If we are converting colors and we need the channel A alpha,
801 * then use a special alpha type that preserves the alpha before
802 * converting the format
803 */
804
805static inline int
806get_op_type(struct exa_format_t *src, struct exa_format_t *dst, int type)
807{
808    return (type == CIMGP_CHANNEL_A_ALPHA &&
809            src->alphabits != dst->alphabits) ? CIMGP_CONVERTED_ALPHA : type;
810}
811
812/* Note - this is the preferred onepass method.  The other will remain
813 * ifdefed out until such time that we are sure its not needed
814 */
815
816#define GetPixmapOffset(px, x, y) ( exaGetPixmapOffset((px)) + \
817  (exaGetPixmapPitch((px)) * (y)) + \
818  ((((px)->drawable.bitsPerPixel + 7) / 8) * (x)) )
819
820#define GetSrcOffset(_x, _y) (exaScratch.srcOffset + ((_y) * exaScratch.srcPitch) + \
821			      ((_x) * exaScratch.srcBpp))
822
823static void
824lx_composite_onepass_add_a8(PixmapPtr pxDst, unsigned long dstOffset,
825                            unsigned long srcOffset, int width, int height,
826                            int opX, int opY, int srcX, int srcY)
827{
828    struct blend_ops_t *opPtr;
829    int apply, type;
830    int optempX, optempY;
831    int i, j;
832    unsigned long pixmapOffset, pixmapPitch, calBitsPixel;
833
834    pixmapOffset = exaGetPixmapOffset(pxDst);
835    pixmapPitch = exaGetPixmapPitch(pxDst);
836    calBitsPixel = (pxDst->drawable.bitsPerPixel + 7) / 8;
837
838    /* Keep this GP idle judge here. Otherwise the SW method has chance to
839     * conflict with the HW rendering method */
840    gp_wait_until_idle();
841
842    if (opX % 4 == 0 && srcX % 4 == 0) {
843        /* HW acceleration */
844        opPtr = &lx_alpha_ops[exaScratch.op * 2];
845        apply = CIMGP_APPLY_BLEND_TO_ALL;
846        gp_declare_blt(0);
847        gp_set_bpp(32);
848        gp_set_strides(exaGetPixmapPitch(pxDst), exaScratch.srcPitch);
849        gp_set_source_format(8);
850        type = opPtr->type;
851        gp_set_alpha_operation(opPtr->operation, type, opPtr->channel, apply,
852                               0);
853        gp_screen_to_screen_convert(dstOffset, srcOffset, width / 4, height, 0);
854        /* Calculate the pixels in the tail of each line */
855        for (j = srcY; j < srcY + height; j++)
856            for (i = srcX + (width / 4) * 4; i < srcX + width; i++) {
857                srcOffset = GetSrcOffset(i, j);
858                optempX = opX + i - srcX;
859                optempY = opY + j - srcY;
860                dstOffset = pixmapOffset + pixmapPitch * optempY +
861                    calBitsPixel * optempX;
862                *(cim_fb_ptr + dstOffset) = (*(cim_fb_ptr + srcOffset)
863                                             + *(cim_fb_ptr + dstOffset) <=
864                                             0xff) ? *(cim_fb_ptr + srcOffset) +
865                    *(cim_fb_ptr + dstOffset) : 0xff;
866            }
867    }
868    else {
869        for (j = srcY; j < srcY + height; j++)
870            for (i = srcX; i < srcX + width; i++) {
871                srcOffset = GetSrcOffset(i, j);
872                optempX = opX + i - srcX;
873                optempY = opY + j - srcY;
874                dstOffset = pixmapOffset + pixmapPitch * optempY +
875                    calBitsPixel * optempX;
876                *(cim_fb_ptr + dstOffset) = (*(cim_fb_ptr + srcOffset) +
877                                             *(cim_fb_ptr + dstOffset) <=
878                                             0xff) ? *(cim_fb_ptr + srcOffset) +
879                    *(cim_fb_ptr + dstOffset) : 0xff;
880            }
881    }
882}
883
884static void
885lx_composite_onepass(PixmapPtr pxDst, unsigned long dstOffset,
886                     unsigned long srcOffset, int width, int height)
887{
888    struct blend_ops_t *opPtr;
889    int apply, type;
890
891    opPtr = &lx_alpha_ops[exaScratch.op * 2];
892
893    apply = (exaScratch.dstFormat->alphabits != 0 &&
894             exaScratch.srcFormat->alphabits != 0) ?
895        CIMGP_APPLY_BLEND_TO_ALL : CIMGP_APPLY_BLEND_TO_RGB;
896
897    gp_declare_blt(0);
898    gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
899    gp_set_strides(exaGetPixmapPitch(pxDst), exaScratch.srcPitch);
900
901    lx_set_source_format(exaScratch.srcFormat->fmt, exaScratch.dstFormat->fmt);
902
903    type = get_op_type(exaScratch.srcFormat, exaScratch.dstFormat, opPtr->type);
904
905    gp_set_alpha_operation(opPtr->operation, type, opPtr->channel, apply, 0);
906
907    gp_screen_to_screen_convert(dstOffset, srcOffset, width, height, 0);
908}
909
910static void
911lx_composite_all_black(unsigned long srcOffset, int width, int height)
912{
913    struct blend_ops_t *opPtr;
914    int apply, type;
915
916    opPtr = &lx_alpha_ops[0];
917    apply = (exaScratch.srcFormat->alphabits != 0) ?
918        CIMGP_APPLY_BLEND_TO_ALL : CIMGP_APPLY_BLEND_TO_RGB;
919    gp_declare_blt(0);
920    gp_set_bpp(lx_get_bpp_from_format(exaScratch.srcFormat->fmt));
921    gp_set_strides(exaScratch.srcPitch, exaScratch.srcPitch);
922    lx_set_source_format(exaScratch.srcFormat->fmt, exaScratch.srcFormat->fmt);
923    type = get_op_type(exaScratch.srcFormat, exaScratch.srcFormat, opPtr->type);
924    gp_set_alpha_operation(opPtr->operation, type, opPtr->channel, apply, 0);
925    gp_screen_to_screen_convert(srcOffset, srcOffset, width, height, 0);
926
927}
928
929static void
930lx_composite_onepass_special(PixmapPtr pxDst, int width, int height, int opX,
931                             int opY, int srcX, int srcY)
932{
933    struct blend_ops_t *opPtr;
934    int apply, type;
935    int opWidth, opHeight;
936    int optempX, optempY;
937    unsigned int dstOffset, srcOffset = 0;
938
939    optempX = opX;
940    optempY = opY;
941
942    /* Make sure srcX and srcY are in source region */
943    srcX = ((srcX % (int) exaScratch.srcWidth) + (int) exaScratch.srcWidth)
944        % (int) exaScratch.srcWidth;
945    srcY = ((srcY % (int) exaScratch.srcHeight) + (int) exaScratch.srcHeight)
946        % (int) exaScratch.srcHeight;
947
948    opWidth = exaScratch.srcWidth - srcX;
949    opHeight = exaScratch.srcHeight - srcY;
950
951    srcOffset = GetSrcOffset(srcX, srcY);
952
953    if (width < opWidth)
954        opWidth = width;
955    if (height < opHeight)
956        opHeight = height;
957
958    while (1) {
959        gp_wait_until_idle();
960        dstOffset = GetPixmapOffset(pxDst, optempX, optempY);
961        opPtr = &lx_alpha_ops[exaScratch.op * 2];
962        apply = (exaScratch.dstFormat->alphabits != 0 &&
963                 exaScratch.srcFormat->alphabits != 0) ?
964            CIMGP_APPLY_BLEND_TO_ALL : CIMGP_APPLY_BLEND_TO_RGB;
965        gp_declare_blt(0);
966        gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
967        gp_set_strides(exaGetPixmapPitch(pxDst), exaScratch.srcPitch);
968        lx_set_source_format(exaScratch.srcFormat->fmt,
969                             exaScratch.dstFormat->fmt);
970        type = get_op_type(exaScratch.srcFormat, exaScratch.dstFormat,
971                           opPtr->type);
972        gp_set_alpha_operation(opPtr->operation, type, opPtr->channel,
973                               apply, 0);
974        gp_screen_to_screen_convert(dstOffset, srcOffset, opWidth, opHeight, 0);
975
976        optempX += opWidth;
977        if (optempX >= opX + width) {
978            optempX = opX;
979            optempY += opHeight;
980            if (optempY >= opY + height)
981                break;
982        }
983        if (optempX == opX) {
984            srcOffset = GetSrcOffset(srcX, 0);
985            opWidth = ((opX + width) - optempX) > (exaScratch.srcWidth - srcX)
986                ? (exaScratch.srcWidth - srcX) : ((opX + width) - optempX);
987            opHeight = ((opY + height) - optempY) > exaScratch.srcHeight
988                ? exaScratch.srcHeight : ((opY + height) - optempY);
989        }
990        else if (optempY == opY) {
991            srcOffset = GetSrcOffset(0, srcY);
992            opWidth = ((opX + width) - optempX) > exaScratch.srcWidth
993                ? exaScratch.srcWidth : ((opX + width) - optempX);
994            opHeight = ((opY + height) - optempY) > (exaScratch.srcHeight -
995                                                     srcY)
996                ? (exaScratch.srcHeight - srcY) : ((opY + height)
997                                                   - optempY);
998        }
999        else {
1000            srcOffset = GetSrcOffset(0, 0);
1001            opWidth = ((opX + width) - optempX) > exaScratch.srcWidth
1002                ? exaScratch.srcWidth : ((opX + width) - optempX);
1003            opHeight = ((opY + height) - optempY) > exaScratch.srcHeight
1004                ? exaScratch.srcHeight : ((opY + height) - optempY);
1005        }
1006    }
1007}
1008
1009/* This function handles the multipass blend functions */
1010
1011static void
1012lx_composite_multipass(PixmapPtr pxDst, unsigned long dstOffset,
1013                       unsigned long srcOffset, int width, int height)
1014{
1015    struct blend_ops_t *opPtr;
1016    int sbpp = lx_get_bpp_from_format(exaScratch.srcFormat->fmt);
1017    int apply, type;
1018
1019    /* Wait until the GP is idle - this will ensure that the scratch buffer
1020     * isn't occupied */
1021
1022    gp_wait_until_idle();
1023
1024    /* Copy the destination to the scratch buffer, and convert it to the
1025     * source format */
1026
1027    gp_declare_blt(0);
1028
1029    gp_set_bpp(sbpp);
1030    gp_set_source_format(exaScratch.dstFormat->fmt);
1031    gp_set_raster_operation(0xCC);
1032    gp_set_strides(exaScratch.srcPitch, exaGetPixmapPitch(pxDst));
1033    gp_screen_to_screen_convert(exaScratch.bufferOffset, dstOffset,
1034                                width, height, 0);
1035
1036    /* Do the first blend from the source to the scratch buffer */
1037
1038    gp_declare_blt(CIMGP_BLTFLAGS_HAZARD);
1039    gp_set_bpp(sbpp);
1040    gp_set_source_format(exaScratch.srcFormat->fmt);
1041    gp_set_strides(exaScratch.srcPitch, exaScratch.srcPitch);
1042
1043    opPtr = &lx_alpha_ops[exaScratch.op * 2];
1044
1045    apply = (exaScratch.srcFormat->alphabits == 0) ?
1046        CIMGP_APPLY_BLEND_TO_RGB : CIMGP_APPLY_BLEND_TO_ALL;
1047
1048    /* If we're destroying the source alpha bits, then make sure we
1049     * use the alpha before the color conversion
1050     */
1051
1052    gp_screen_to_screen_blt(exaScratch.bufferOffset, srcOffset, width, height,
1053                            0);
1054
1055    /* Finally, do the second blend back to the destination */
1056
1057    opPtr = &lx_alpha_ops[(exaScratch.op * 2) + 1];
1058
1059    apply = (exaScratch.dstFormat->alphabits == 0) ?
1060        CIMGP_APPLY_BLEND_TO_RGB : CIMGP_APPLY_BLEND_TO_ALL;
1061
1062    gp_declare_blt(CIMGP_BLTFLAGS_HAZARD);
1063    gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
1064
1065    lx_set_source_format(exaScratch.srcFormat->fmt, exaScratch.dstFormat->fmt);
1066
1067    type = get_op_type(exaScratch.srcFormat, exaScratch.dstFormat, opPtr->type);
1068
1069    gp_set_alpha_operation(opPtr->operation, type, opPtr->channel, apply, 0);
1070
1071    gp_screen_to_screen_convert(dstOffset, exaScratch.bufferOffset,
1072                                width, height, 0);
1073}
1074
1075static void
1076lx_composite_rotate(PixmapPtr pxDst, unsigned long dstOffset,
1077                    unsigned int srcOffset, int width, int height)
1078{
1079    int degrees = 0;
1080
1081    gp_declare_blt(0);
1082    gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
1083    gp_set_strides(exaGetPixmapPitch(pxDst), exaScratch.srcPitch);
1084
1085    lx_set_source_format(exaScratch.srcFormat->fmt, exaScratch.dstFormat->fmt);
1086
1087    gp_set_raster_operation(0xCC);
1088
1089    /* RandR rotation is counter-clockwise, our rotation
1090     * is clockwise, so adjust the numbers accordingly */
1091
1092    switch (exaScratch.rotate) {
1093    case RR_Rotate_90:
1094        degrees = 270;
1095        break;
1096    case RR_Rotate_180:
1097        degrees = 180;
1098        break;
1099    case RR_Rotate_270:
1100        degrees = 90;
1101        break;
1102    }
1103
1104    gp_rotate_blt(dstOffset, srcOffset, width, height, degrees);
1105}
1106
1107static void
1108lx_do_composite_mask(PixmapPtr pxDst, unsigned long dstOffset,
1109                     unsigned int maskOffset, int width, int height)
1110{
1111    struct blend_ops_t *opPtr = &lx_alpha_ops[exaScratch.op * 2];
1112
1113    gp_declare_blt(0);
1114
1115    gp_set_source_format(exaScratch.srcFormat->fmt);
1116    gp_set_strides(exaGetPixmapPitch(pxDst), exaScratch.srcPitch);
1117    gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
1118    gp_set_solid_source(exaScratch.srcColor);
1119
1120    gp_blend_mask_blt(dstOffset, 0, width, height, maskOffset,
1121                      exaScratch.srcPitch, opPtr->operation,
1122                      exaScratch.fourBpp);
1123}
1124
1125static void
1126lx_do_composite_mask_two_pass(PixmapPtr pxDst, unsigned long dstOffset,
1127                              unsigned int maskOffset, int width, int height,
1128                              int opX, int opY, xPointFixed srcPoint)
1129{
1130    int apply, type;
1131    struct blend_ops_t *opPtr;
1132    int opWidth, opHeight;
1133    int opoverX, opoverY;
1134
1135    opoverX = opX;
1136    opoverY = opY;
1137
1138    /* The rendering region should not be bigger than off-screen memory size
1139     * which equals to DEFAULT_EXA_SCRATCH_BFRSZ. If that happens, we split
1140     * the PictOpOver rendering region into several 256KB chunks. And because
1141     * of the Pitch(stride) parameter, so we use maximum width of mask picture.
1142     * that is to say it is a scanline rendering process */
1143    if (width * height * 4 > DEFAULT_EXA_SCRATCH_BFRSZ) {
1144        opWidth = width;
1145        opHeight = DEFAULT_EXA_SCRATCH_BFRSZ / (width * 4);
1146    }
1147    else {
1148        opWidth = width;
1149        opHeight = height;
1150    }
1151
1152    while (1) {
1153
1154        /* Wait until the GP is idle - this will ensure that the scratch buffer
1155         * isn't occupied */
1156
1157        gp_wait_until_idle();
1158
1159        /* Copy the source to the scratch buffer, and do a src * mask raster
1160         * operation */
1161
1162        gp_declare_blt(0);
1163        opPtr = &lx_alpha_ops[(exaScratch.op * 2) + 1];
1164        gp_set_source_format(CIMGP_SOURCE_FMT_8_8_8_8);
1165        gp_set_strides(opWidth * 4, exaScratch.srcPitch);
1166        gp_set_bpp(lx_get_bpp_from_format(CIMGP_SOURCE_FMT_8_8_8_8));
1167        gp_set_solid_source(exaScratch.srcColor);
1168        gp_blend_mask_blt(exaScratch.bufferOffset, 0, opWidth, opHeight,
1169                          maskOffset, exaScratch.srcPitch, opPtr->operation,
1170                          exaScratch.fourBpp);
1171
1172        /* Do a relative operation(refer rendercheck ops.c), and copy the
1173         * operation result to destination */
1174
1175        gp_declare_blt(CIMGP_BLTFLAGS_HAZARD);
1176        opPtr = &lx_alpha_ops[exaScratch.op * 2];
1177        apply = (exaScratch.dstFormat->alphabits == 0) ?
1178            CIMGP_APPLY_BLEND_TO_RGB : CIMGP_APPLY_BLEND_TO_ALL;
1179        gp_set_source_format(CIMGP_SOURCE_FMT_8_8_8_8);
1180        gp_set_strides(exaGetPixmapPitch(pxDst), opWidth * 4);
1181        gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
1182        type = CIMGP_CONVERTED_ALPHA;
1183        gp_set_alpha_operation(opPtr->operation, type, opPtr->channel,
1184                               apply, 0);
1185        gp_screen_to_screen_convert(dstOffset, exaScratch.bufferOffset,
1186                                    opWidth, opHeight, 0);
1187
1188        if (width * height * 4 > DEFAULT_EXA_SCRATCH_BFRSZ) {
1189            /* Finish the rendering */
1190            if (opoverY + opHeight == opY + height)
1191                break;
1192            /* Recalculate the Dest and Mask rendering start point */
1193            srcPoint.y = srcPoint.y + F(opHeight);
1194            opoverY = opoverY + opHeight;
1195            if (opoverY + opHeight > opY + height)
1196                opHeight = opY + height - opoverY;
1197            dstOffset = GetPixmapOffset(pxDst, opoverX, opoverY);
1198            maskOffset = GetSrcOffset(I(srcPoint.x), I(srcPoint.y));
1199        }
1200        else
1201            break;
1202    }
1203}
1204
1205static void
1206transformPoint(PictTransform * t, xPointFixed * point)
1207{
1208    PictVector v;
1209
1210    v.vector[0] = point->x;
1211    v.vector[1] = point->y;
1212    v.vector[2] = xFixed1;
1213
1214    if (t != NULL)
1215        PictureTransformPoint(t, &v);
1216
1217    point->x = v.vector[0];
1218    point->y = v.vector[1];
1219}
1220
1221static void
1222lx_do_composite(PixmapPtr pxDst, int srcX, int srcY, int maskX,
1223                int maskY, int dstX, int dstY, int width, int height)
1224{
1225    unsigned int dstOffset, srcOffset = 0;
1226
1227    xPointFixed srcPoint;
1228
1229    int opX = dstX;
1230    int opY = dstY;
1231    int opWidth = width;
1232    int opHeight = height;
1233
1234    /* Transform the source coordinates */
1235
1236    if (exaScratch.type == COMP_TYPE_MASK) {
1237        srcPoint.x = F(maskX);
1238        srcPoint.y = F(maskY);
1239    }
1240    else {
1241        srcPoint.x = F(srcX);
1242        srcPoint.y = F(srcY);
1243    }
1244
1245    /* srcX, srcY point to the upper right side of the bounding box
1246     * in the unrotated coordinate space.  Depending on the orientation,
1247     * we have to translate the coordinates to point to the origin of
1248     * the rectangle in the source pixmap */
1249
1250    switch (exaScratch.rotate) {
1251    case RR_Rotate_270:
1252        srcPoint.x += F(width);
1253
1254        opWidth = height;
1255        opHeight = width;
1256        break;
1257
1258    case RR_Rotate_180:
1259        srcPoint.x += F(width);
1260        srcPoint.y += F(height);
1261
1262        srcX += width;
1263        srcY += height;
1264        break;
1265
1266    case RR_Rotate_90:
1267        srcPoint.y += F(height);
1268
1269        opWidth = height;
1270        opHeight = width;
1271        break;
1272    }
1273
1274    transformPoint(exaScratch.transform, &srcPoint);
1275
1276    /* Adjust the point to fit into the pixmap */
1277
1278    if (I(srcPoint.x) < 0) {
1279        opWidth += I(srcPoint.x);
1280        srcPoint.x = F(0);
1281    }
1282
1283    if (I(srcPoint.y) < 0) {
1284        opHeight += I(srcPoint.y);
1285        srcPoint.y = F(0);
1286    }
1287
1288    /* Get the source point offset position */
1289
1290    srcOffset = GetSrcOffset(I(srcPoint.x), I(srcPoint.y));
1291
1292    /* When mask exists, exaScratch.srcWidth and exaScratch.srcHeight are
1293     * the source width and source height; Otherwise, they are mask width
1294     * and mask height */
1295    /* exaScratch.repeat is the source repeat attribute
1296     * exaScratch.maskrepeat is the mask repeat attribute */
1297    /* If type is COMP_TYPE_MASK, maskX and maskY are not zero, we should
1298     * subtract them to do the operation in the correct region */
1299
1300    /* FIXME:  Please add the code to handle the condition when the maskX
1301     * and maskY coordinate are negative or greater than
1302     * exaScratch.srcWidth and exaScratch.srcHeight */
1303
1304    if (exaScratch.type == COMP_TYPE_MASK) {
1305        if ((exaScratch.srcWidth - maskX) < opWidth)
1306            opWidth = exaScratch.srcWidth - maskX;
1307        if ((exaScratch.srcHeight - maskY) < opHeight)
1308            opHeight = exaScratch.srcHeight - maskY;
1309    }
1310    else {
1311        if (exaScratch.type == COMP_TYPE_ONEPASS) {
1312            /* This is the condition srcX or/and srcY is/are out of source
1313             * region */
1314            if (((srcY >= 0 && srcY >= exaScratch.srcHeight)
1315                 || (srcX >= 0 && srcX >= exaScratch.srcWidth)) &&
1316                (exaScratch.op == PictOpOver || exaScratch.op == PictOpSrc)) {
1317                if (exaScratch.repeat == 1) {
1318                    opWidth = width;
1319                    opHeight = height;
1320                }
1321                else {
1322                    if (exaScratch.op == PictOpOver)
1323                        return;
1324                    else {
1325                        exaScratch.op = PictOpClear;
1326                        opWidth = width;
1327                        opHeight = height;
1328                    }
1329                }
1330                /* This is the condition srcX or/and srcY is/are in the source
1331                 * region */
1332            }
1333            else if (srcX >= 0 && srcY >= 0 &&
1334                     (exaScratch.op == PictOpOver ||
1335                      exaScratch.op == PictOpSrc)) {
1336                if (exaScratch.repeat == 1) {
1337                    opWidth = width;
1338                    opHeight = height;
1339                }
1340                else {
1341                    if ((exaScratch.srcWidth - srcX) < opWidth)
1342                        opWidth = exaScratch.srcWidth - srcX;
1343                    if ((exaScratch.srcHeight - srcY) < opHeight)
1344                        opHeight = exaScratch.srcHeight - srcY;
1345                }
1346                /* This is the condition srcX or/and srcY is/are negative */
1347            }
1348            else if ((srcX < 0 || srcY < 0) &&
1349                     (exaScratch.op == PictOpOver ||
1350                      exaScratch.op == PictOpSrc)) {
1351                if (exaScratch.repeat == 1) {
1352                    opWidth = width;
1353                    opHeight = height;
1354                }
1355                else {
1356                    /* FIXME: We can't support negative srcX/Y for all corner cases in
1357                     * a sane way without a bit bigger refactoring. So as to avoid
1358                     * gross misrenderings (e.g missing tray icons) in current real-world
1359                     * applications, just shift destination appropriately for now and
1360                     * ignore out of bounds source pixmap zero-vector handling. This is
1361                     * actually correct for PictOpOver, but PictOpSrc out of bounds regions
1362                     * should be blacked out, but aren't - without this workaround however
1363                     * it'd be simply all black instead, which is probably worse till a full
1364                     * clean solution solves it for all cases. */
1365                    if (srcX < 0) {
1366                        opX -= srcX;
1367                        srcX = 0;
1368                    }
1369
1370                    if (srcY < 0) {
1371                        opY -= srcY;
1372                        srcY = 0;
1373                    }
1374
1375                    /* EXA has taken care of adjusting srcWidth if it gets cut on the right */
1376                    width = opWidth = exaScratch.srcWidth;
1377                    /* EXA has taken care of adjusting srcHeight if it gets cut on the bottom */
1378                    height = opHeight = exaScratch.srcHeight;
1379                }
1380            }
1381            else {
1382                if (exaScratch.srcWidth < opWidth)
1383                    opWidth = exaScratch.srcWidth;
1384                if (exaScratch.srcHeight < opHeight)
1385                    opHeight = exaScratch.srcHeight;
1386            }
1387        }
1388        else {
1389            if (exaScratch.rotate == RR_Rotate_180) {
1390            }
1391            else {
1392                if ((exaScratch.srcWidth - srcY) < opWidth)
1393                    opWidth = exaScratch.srcWidth - srcY;
1394                if ((exaScratch.srcHeight - srcX) < opHeight)
1395                    opHeight = exaScratch.srcHeight - srcX;
1396            }
1397        }
1398    }
1399
1400    while (1) {
1401
1402        dstOffset = GetPixmapOffset(pxDst, opX, opY);
1403
1404        switch (exaScratch.type) {
1405
1406        case COMP_TYPE_MASK:{
1407            if (exaScratch.op == PictOpOver || exaScratch.op ==
1408                PictOpOutReverse || exaScratch.op == PictOpInReverse ||
1409                exaScratch.op == PictOpIn || exaScratch.op == PictOpOut ||
1410                exaScratch.op == PictOpOverReverse)
1411                lx_do_composite_mask_two_pass(pxDst, dstOffset,
1412                                              srcOffset, opWidth, opHeight, opX,
1413                                              opY, srcPoint);
1414            else
1415                lx_do_composite_mask(pxDst, dstOffset, srcOffset,
1416                                     opWidth, opHeight);
1417        }
1418            break;
1419
1420        case COMP_TYPE_ONEPASS:
1421            if ((exaScratch.op == PictOpOver || exaScratch.op == PictOpSrc)
1422                && (exaScratch.repeat == 1)) {
1423                lx_composite_onepass_special(pxDst, opWidth, opHeight, opX, opY,
1424                                             srcX, srcY);
1425                return;
1426            }
1427            else if ((exaScratch.op == PictOpAdd) && (exaScratch.srcFormat->exa
1428                                                      == PICT_a8) &&
1429                     (exaScratch.dstFormat->exa == PICT_a8))
1430                lx_composite_onepass_add_a8(pxDst, dstOffset, srcOffset,
1431                                            opWidth, opHeight, opX, opY, srcX,
1432                                            srcY);
1433            else
1434                lx_composite_onepass(pxDst, dstOffset, srcOffset, opWidth,
1435                                     opHeight);
1436            break;
1437
1438        case COMP_TYPE_TWOPASS:
1439            lx_composite_multipass(pxDst, dstOffset, srcOffset, opWidth,
1440                                   opHeight);
1441
1442        case COMP_TYPE_ROTATE:
1443            lx_composite_rotate(pxDst, dstOffset, srcOffset, opWidth, opHeight);
1444            break;
1445        }
1446
1447        opX += opWidth;
1448
1449        if (opX >= dstX + width) {
1450            opX = dstX;
1451            opY += opHeight;
1452
1453            if (opY >= dstY + height)
1454                break;
1455        }
1456
1457        /* FIXME:  Please add the code to handle the condition when the maskX
1458         * and maskY coordinate are negative or greater than
1459         * exaScratch.srcWidth and exaScratch.srcHeight */
1460
1461        if (exaScratch.type == COMP_TYPE_MASK) {
1462            opWidth = ((dstX + width) - opX) > (exaScratch.srcWidth - maskX)
1463                ? (exaScratch.srcWidth - maskX) : (dstX + width) - opX;
1464            opHeight = ((dstY + height) - opY) > (exaScratch.srcHeight - maskY)
1465                ? (exaScratch.srcHeight - maskY) : (dstY + height) - opY;
1466            /* All black out of the mask */
1467            if (!exaScratch.maskrepeat)
1468                exaScratch.srcColor = 0x0;
1469        }
1470        else {
1471            if (exaScratch.type == COMP_TYPE_ONEPASS) {
1472                if (srcX >= 0 && srcY >= 0 && (exaScratch.op == PictOpOver ||
1473                                               exaScratch.op == PictOpSrc ||
1474                                               exaScratch.op == PictOpClear)) {
1475                    opWidth =
1476                        ((dstX + width) - opX) >
1477                        (exaScratch.srcWidth - srcX) ? (exaScratch.srcWidth -
1478                                                        srcX) : (dstX + width)
1479                        - opX;
1480                    opHeight = ((dstY + height) - opY) >
1481                        (exaScratch.srcHeight - srcY) ?
1482                        (exaScratch.srcHeight - srcY) : (dstY + height) - opY;
1483                }
1484                else {
1485                    opWidth = ((dstX + width) - opX) > exaScratch.srcWidth ?
1486                        exaScratch.srcWidth : (dstX + width) - opX;
1487                    opHeight = ((dstY + height) - opY) > exaScratch.srcHeight ?
1488                        exaScratch.srcHeight : (dstY + height) - opY;
1489                }
1490            }
1491            else {
1492                opWidth = ((dstX + width) - opX) > (exaScratch.srcWidth - srcY)
1493                    ? (exaScratch.srcWidth - srcY) : (dstX + width) - opX;
1494                opHeight =
1495                    ((dstY + height) - opY) >
1496                    (exaScratch.srcHeight - srcX) ? (exaScratch.srcHeight -
1497                                                     srcX) : (dstY + height) -
1498                    opY;
1499            }
1500            /* All black out of the source */
1501            if (!exaScratch.repeat && (exaScratch.type == COMP_TYPE_ONEPASS)) {
1502                /* FIXME: We black out the source here, so that any further regions
1503                 * in the loop get handled as a source that's a zero-vector (as
1504                 * defined for out-of-bounds from source pixmap for RepeatModeNone),
1505                 * but this will likely interfere with cases where srcX and/or srcY
1506                 * is negative - as opposed to e.g width being larger than srcWidth,
1507                 * which is exercised in rendercheck (always rectangle in top-left
1508                 * corner).
1509                 * Additionally it forces the drawing into tiles of srcWidth/srcHeight
1510                 * for non-repeat modes too, where we don't really need to tile it like
1511                 * this and could draw the out of bound regions all at once (or at most
1512                 * in 4 operations without the big loop). */
1513                lx_composite_all_black(srcOffset, exaScratch.srcWidth,
1514                                       exaScratch.srcHeight);
1515            }
1516            if (!exaScratch.repeat && (exaScratch.type == COMP_TYPE_ROTATE))
1517                break;
1518        }
1519    }
1520}
1521
1522static void
1523lx_wait_marker(ScreenPtr PScreen, int marker)
1524{
1525    gp_wait_until_idle();
1526}
1527
1528static void
1529lx_done(PixmapPtr ptr)
1530{
1531}
1532
1533#if 0
1534static void
1535lx_upload_to_screen(PixmapPtr pxDst, int x, int y, int w, int h,
1536                    char *src, int src_pitch)
1537{
1538    GeodeRec *pGeode = GEODEPTR_FROM_PIXMAP(pxDst);
1539    int dst_pitch = exaGetPixmapPitch(pxDst);
1540    int cpp = (pxDst->drawable.bitsPerPixel + 7) / 8;
1541
1542    char *dst;
1543    int offset = exaGetPixmapOffset(pxDst);
1544
1545    dst = (char *) (pGeode->FBBase + offset + (y * dst_pitch) + (x * cpp));
1546    int i;
1547
1548    for (i = 0; i < h; i++) {
1549        memcpy(dst, src, w * cpp);
1550        dst += dst_pitch;
1551        src += src_pitch;
1552    }
1553}
1554#endif
1555
1556#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 2)
1557
1558static Bool
1559lx_exa_pixmap_is_offscreen(PixmapPtr pPixmap)
1560{
1561    ScrnInfoPtr pScrni = xf86ScreenToScrn(pPixmap->drawable.pScreen);
1562    GeodeRec *pGeode = GEODEPTR(pScrni);
1563    void *start = (void *) (pGeode->FBBase);
1564    void *end =
1565        (void *) (pGeode->FBBase + pGeode->offscreenStart +
1566                  pGeode->offscreenSize);
1567
1568    if ((void *) pPixmap->devPrivate.ptr >= start &&
1569        (void *) pPixmap->devPrivate.ptr < end)
1570        return TRUE;
1571
1572    return FALSE;
1573}
1574
1575#endif
1576
1577Bool
1578LXExaInit(ScreenPtr pScreen)
1579{
1580    ScrnInfoPtr pScrni = xf86ScreenToScrn(pScreen);
1581    GeodeRec *pGeode = GEODEPTR(pScrni);
1582    ExaDriverPtr pExa = pGeode->pExa;
1583
1584    pExa->exa_major = EXA_VERSION_MAJOR;
1585    pExa->exa_minor = EXA_VERSION_MINOR;
1586
1587    pExa->WaitMarker = lx_wait_marker;
1588
1589    pExa->PrepareSolid = lx_prepare_solid;
1590    pExa->Solid = lx_do_solid;
1591    pExa->DoneSolid = lx_done;
1592
1593    pExa->PrepareCopy = lx_prepare_copy;
1594    pExa->Copy = lx_do_copy;
1595    pExa->DoneCopy = lx_done;
1596
1597    /* Composite */
1598    pExa->CheckComposite = lx_check_composite;
1599    pExa->PrepareComposite = lx_prepare_composite;
1600    pExa->Composite = lx_do_composite;
1601    pExa->DoneComposite = lx_done;
1602    //pExa->UploadToScreen =  lx_upload_to_screen;
1603
1604#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 2)
1605    pExa->PixmapIsOffscreen = lx_exa_pixmap_is_offscreen;
1606#endif
1607
1608    //pExa->flags = EXA_OFFSCREEN_PIXMAPS;
1609
1610    return exaDriverInit(pScreen, pGeode->pExa);
1611}
1612