lx_exa.c revision 170d5fdc
11.211Stron/*
21.161Slukem * Copyright (c) 2007-2008 Advanced Micro Devices, Inc.
31.161Slukem *
41.161Slukem * Permission is hereby granted, free of charge, to any person obtaining a
51.161Slukem * copy of this software and associated documentation files (the "Software"),
61.161Slukem * to deal in the Software without restriction, including without limitation
71.161Slukem * the rights to use, copy, modify, merge, publish, distribute, sublicense,
81.161Slukem * and/or sell copies of the Software, and to permit persons to whom the
91.161Slukem * Software is furnished to do so, subject to the following conditions:
101.161Slukem *
111.135Slukem * The above copyright notice and this permission notice shall be included in
121.135Slukem * all copies or substantial portions of the Software.
131.135Slukem *
141.135Slukem * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
151.189Sapb * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
161.196Sjmmv * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
171.196Sjmmv * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
181.207Sjoerg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
191.135Slukem * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
201.135Slukem * DEALINGS IN THE SOFTWARE.
211.135Slukem *
221.135Slukem * Neither the name of the Advanced Micro Devices, Inc. nor the names of its
231.135Slukem * contributors may be used to endorse or promote products derived from this
241.169Sgdamore * software without specific prior written permission.
251.172Stron */
261.198Splunky
271.176Splunky/* TODO:
281.198Splunky   Support a8 as a source or destination?
291.169Sgdamore   convert !a8 or !a4 masks?
301.169Sgdamore   support multiple pass operations?
311.135Slukem*/
321.135Slukem
331.135Slukem/* To support PictOptAdd with a mask */
341.135Slukem
351.135Slukem#ifdef HAVE_CONFIG_H
361.135Slukem#include "config.h"
371.135Slukem#endif
381.135Slukem
391.135Slukem#include "xf86.h"
401.135Slukem#include "exa.h"
411.163Sjwise
421.135Slukem#include "geode.h"
431.135Slukem#include "cim_defs.h"
441.135Slukem#include "cim_regs.h"
451.135Slukem
461.199Sjoerg#include "geode_blend.h"
471.135Slukem
481.135Slukem#define F(x)    IntToxFixed(x)
491.135Slukem#define I(x)    xFixedToInt(x)
501.191Sxtraeme
511.135Slukemstatic const struct exa_format_t
521.135Slukem{
531.135Slukem    int exa;
541.135Slukem    int bpp;
551.135Slukem    int fmt;
561.135Slukem    int alphabits;
571.135Slukem} lx_exa_formats[] = {
581.135Slukem    {
591.135Slukem    PICT_a8r8g8b8, 32, CIMGP_SOURCE_FMT_8_8_8_8, 8}, {
601.135Slukem    PICT_x8r8g8b8, 32, CIMGP_SOURCE_FMT_8_8_8_8, 0}, {
611.135Slukem    PICT_x8b8g8r8, 32, CIMGP_SOURCE_FMT_32BPP_BGR, 0}, {
621.135Slukem    PICT_a4r4g4b4, 16, CIMGP_SOURCE_FMT_4_4_4_4, 4}, {
631.135Slukem    PICT_a1r5g5b5, 16, CIMGP_SOURCE_FMT_1_5_5_5, 1}, {
641.135Slukem    PICT_r5g6b5, 16, CIMGP_SOURCE_FMT_0_5_6_5, 0}, {
651.135Slukem    PICT_b5g6r5, 16, CIMGP_SOURCE_FMT_16BPP_BGR, 0}, {
661.135Slukem    PICT_x1r5g5b5, 16, CIMGP_SOURCE_FMT_1_5_5_5, 0}, {
671.135Slukem    PICT_x1b5g5r5, 16, CIMGP_SOURCE_FMT_15BPP_BGR, 0}, {
681.135Slukem    PICT_r3g3b2, 8, CIMGP_SOURCE_FMT_3_3_2, 0}, {
691.135Slukem    PICT_a8, 32, CIMGP_SOURCE_FMT_8_8_8_8, 8}
701.135Slukem};
711.135Slukem
721.170Smrg/* This is a chunk of memory we use for scratch space */
731.170Smrg
741.201Sad#define COMP_TYPE_MASK 0
751.135Slukem#define COMP_TYPE_ONEPASS 1
761.140Sitohy#define COMP_TYPE_TWOPASS 3
771.186Srpaulo#define COMP_TYPE_ROTATE  5
781.135Slukem
791.167Stronstatic struct
801.135Slukem{
811.135Slukem    int type;
821.135Slukem
831.135Slukem    unsigned int srcOffset;
841.135Slukem    unsigned int srcPitch;
851.135Slukem    unsigned int srcBpp;
861.135Slukem    unsigned int srcWidth, srcHeight;
871.135Slukem    PixmapPtr srcPixmap;
881.135Slukem
891.138Slukem    unsigned int srcColor;
901.135Slukem    int op;
911.159Stron    int repeat;
921.135Slukem    int maskrepeat;
931.135Slukem    unsigned int fourBpp;
941.135Slukem    unsigned int bufferOffset;
951.135Slukem    struct exa_format_t *srcFormat;
961.135Slukem    struct exa_format_t *dstFormat;
971.135Slukem
981.135Slukem    int rotate;
991.135Slukem    PictTransform *transform;
1001.135Slukem
1011.135Slukem} exaScratch;
1021.135Slukem
1031.135Slukemstatic const int SDfn[16] = {
1041.135Slukem    0x00, 0x88, 0x44, 0xCC, 0x22, 0xAA, 0x66, 0xEE,
1051.135Slukem    0x11, 0x99, 0x55, 0xDD, 0x33, 0xBB, 0x77, 0xFF
1061.135Slukem};
1071.135Slukem
1081.147Slukemstatic const int SDfn_PM[16] = {
1091.150Sthorpej    0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
1101.147Slukem    0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA
1111.147Slukem};
1121.147Slukem
1131.147Slukem/* These functions check to see if we can safely prefetch the memory
1141.147Slukem * for the blt, or if we have to wait the previous blt to complete.
1151.147Slukem * One function is for the fill, and the other is for the copy because
1161.147Slukem * they have different requirements based on ROP
1171.147Slukem */
1181.151Schristos
1191.147Slukemstatic int lx0 = -1, ly0 = -1, lx1 = -1, ly1 = -1;
1201.147Slukem
1211.147Slukemstatic int
1221.147Slukemlx_fill_flags(int x0, int y0, int w, int h, int rop)
1231.147Slukem{
1241.147Slukem    int x1 = x0 + w, y1 = y0 + h;
1251.147Slukem    int n = ((rop ^ (rop >> 1)) & 0x55) == 0 ||	/* no dst */
1261.152Schristos	x0 >= lx1 || y0 >= ly1 ||      /* rght/below */
1271.135Slukem	x1 <= lx0 || y1 <= ly0 ?       /* left/above */
1281.135Slukem	0 : CIMGP_BLTFLAGS_HAZARD;
1291.163Sjwise
1301.163Sjwise    lx0 = x0;
1311.184Schristos    ly0 = y0;
1321.145Smatt    lx1 = x1;
1331.145Smatt    ly1 = y1;
1341.145Smatt
1351.211Stron    return n;
1361.211Stron}
1371.211Stron
1381.155Swizstatic int
1391.179Sjmcneilllx_copy_flags(int x0, int y0, int x1, int y1, int w, int h, int rop)
1401.197Sjmcneill{
1411.179Sjmcneill    int x2 = x1 + w, y2 = y1 + h;
1421.135Slukem
1431.135Slukem    /* dst not hazzard and src not hazzard */
1441.191Sxtraeme    int n = (((rop ^ (rop >> 1)) & 0x55) == 0 ||
1451.191Sxtraeme	x1 >= lx1 || y1 >= ly1 ||
1461.191Sxtraeme	x2 <= lx0 || y2 <= ly0) &&
1471.193Sxtraeme	(((rop ^ (rop >> 2)) & 0x33) == 0 ||
1481.191Sxtraeme	x0 >= lx1 || y0 >= ly1 ||
1491.191Sxtraeme	x0 + w <= lx0 || y0 + h <= ly0) ? 0 : CIMGP_BLTFLAGS_HAZARD;
1501.191Sxtraeme
1511.191Sxtraeme    lx0 = x1;
1521.179Sjmcneill    ly0 = y1;
1531.135Slukem    lx1 = x2;
1541.135Slukem    ly1 = y2;
1551.135Slukem
1561.135Slukem    return n;
1571.135Slukem}
1581.135Slukem
1591.135Slukem/* These are borrowed from the exa engine - they should be made global
1601.135Slukem   and available to drivers, but until then....
1611.135Slukem*/
1621.208Sjoerg
1631.135Slukem/* exaGetPixelFromRGBA (exa_render.c) */
1641.135Slukem
1651.135Slukemstatic Bool
1661.135Slukem_GetPixelFromRGBA(CARD32 * pixel,
1671.135Slukem    CARD16 red, CARD16 green, CARD16 blue, CARD16 alpha, CARD32 format)
1681.135Slukem{
1691.135Slukem    int rbits, bbits, gbits, abits;
1701.135Slukem    int rshift, bshift, gshift, ashift;
1711.135Slukem
1721.135Slukem    *pixel = 0;
1731.198Splunky
1741.169Sgdamore    if (!PICT_FORMAT_COLOR(format))
1751.174Splunky	return FALSE;
1761.174Splunky
1771.169Sgdamore    rbits = PICT_FORMAT_R(format);
1781.198Splunky    gbits = PICT_FORMAT_G(format);
1791.135Slukem    bbits = PICT_FORMAT_B(format);
1801.135Slukem    abits = PICT_FORMAT_A(format);
1811.135Slukem
1821.135Slukem    if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
1831.135Slukem	bshift = 0;
1841.209Sroy	gshift = bbits;
1851.135Slukem	rshift = gshift + gbits;
1861.135Slukem	ashift = rshift + rbits;
1871.135Slukem    } else {			       /* PICT_TYPE_ABGR */
1881.135Slukem	rshift = 0;
1891.192Sxtraeme	gshift = rbits;
1901.195Sad	bshift = gshift + gbits;
1911.135Slukem	ashift = bshift + bbits;
1921.208Sjoerg    }
1931.135Slukem
1941.200Syamt    *pixel |= (blue >> (16 - bbits)) << bshift;
1951.157Speter    *pixel |= (red >> (16 - rbits)) << rshift;
1961.164Srpaulo    *pixel |= (green >> (16 - gbits)) << gshift;
1971.203Smishka    *pixel |= (alpha >> (16 - abits)) << ashift;
1981.135Slukem
1991.142Sitojun    return TRUE;
2001.135Slukem}
2011.135Slukem
2021.171Speter/* exaGetRGBAFromPixel (exa_render.c) */
2031.171Speter
2041.171Speterstatic Bool
2051.171Speter_GetRGBAFromPixel(CARD32 pixel,
2061.135Slukem    CARD16 * red,
2071.165Srpaulo    CARD16 * green, CARD16 * blue, CARD16 * alpha, CARD32 format)
2081.162Sagc{
2091.135Slukem    int rbits, bbits, gbits, abits;
2101.135Slukem    int rshift, bshift, gshift, ashift;
2111.135Slukem
2121.135Slukem    if (!PICT_FORMAT_COLOR(format))
2131.201Sad	return FALSE;
2141.201Sad
2151.201Sad    rbits = PICT_FORMAT_R(format);
2161.135Slukem    gbits = PICT_FORMAT_G(format);
2171.135Slukem    bbits = PICT_FORMAT_B(format);
2181.206Shaad    abits = PICT_FORMAT_A(format);
2191.135Slukem
2201.135Slukem    if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
2211.135Slukem	bshift = 0;
2221.135Slukem	gshift = bbits;
2231.135Slukem	rshift = gshift + gbits;
2241.135Slukem	ashift = rshift + rbits;
2251.135Slukem    } else {			       /* PICT_TYPE_ABGR */
2261.135Slukem	rshift = 0;
2271.135Slukem	gshift = rbits;
2281.135Slukem	bshift = gshift + gbits;
2291.135Slukem	ashift = bshift + bbits;
2301.135Slukem    }
2311.135Slukem
2321.135Slukem    *red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits);
2331.135Slukem    while (rbits < 16) {
2341.135Slukem	*red |= *red >> rbits;
2351.135Slukem	rbits <<= 1;
2361.185Selad    }
2371.143Sitojun
2381.158Speter    *green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits);
2391.143Sitojun    while (gbits < 16) {
2401.204Sisaki	*green |= *green >> gbits;
2411.135Slukem	gbits <<= 1;
2421.135Slukem    }
2431.135Slukem
2441.135Slukem    *blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits);
2451.135Slukem    while (bbits < 16) {
2461.135Slukem	*blue |= *blue >> bbits;
2471.135Slukem	bbits <<= 1;
2481.135Slukem    }
2491.135Slukem
2501.135Slukem    if (abits) {
2511.205She	*alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits);
2521.135Slukem	while (abits < 16) {
2531.135Slukem	    *alpha |= *alpha >> abits;
2541.135Slukem	    abits <<= 1;
2551.135Slukem	}
2561.135Slukem    } else
2571.135Slukem	*alpha = 0xffff;
2581.135Slukem
2591.135Slukem    return TRUE;
2601.135Slukem}
2611.135Slukem
2621.169Sgdamorestatic unsigned int
2631.135Slukemlx_get_source_color(PixmapPtr pSrc, int srcFormat, int dstFormat)
2641.135Slukem{
2651.135Slukem    CARD32 in, out;
2661.135Slukem    CARD16 red = 0, green = 0, blue = 0, alpha = 0;
2671.135Slukem
2681.135Slukem    /* Stall to avoid a race with the upload function */
2691.135Slukem    /* for 1.4 and newer, the problem will be resolved within
2701.135Slukem     * exaGetPixmapFirstPixel, so this should be adjusted so
2711.135Slukem     * the stall isn't run needlessly
2721.135Slukem     */
2731.135Slukem
2741.135Slukem    gp_wait_until_idle();
2751.137Sblymn    in = exaGetPixmapFirstPixel(pSrc);
2761.135Slukem
2771.135Slukem    _GetRGBAFromPixel(in, &red, &blue, &green, &alpha, srcFormat);
2781.182Srpaulo    _GetPixelFromRGBA(&out, red, blue, green, alpha, dstFormat);
2791.135Slukem
2801.135Slukem    return out;
2811.135Slukem}
2821.135Slukem
2831.135Slukemstatic Bool
2841.135Slukemlx_prepare_solid(PixmapPtr pxMap, int alu, Pixel planemask, Pixel fg)
2851.135Slukem{
2861.201Sad    int pitch = exaGetPixmapPitch(pxMap);
2871.135Slukem    int op = (planemask == ~0U) ? SDfn[alu] : SDfn_PM[alu];
2881.135Slukem
2891.135Slukem    gp_declare_blt(0);
2901.135Slukem    gp_set_bpp(pxMap->drawable.bitsPerPixel);
2911.144Swiz
2921.183Schristos    gp_set_raster_operation(op);
2931.135Slukem
2941.135Slukem    if (planemask != ~0U)
2951.135Slukem	gp_set_solid_pattern(planemask);
2961.135Slukem
2971.135Slukem    exaScratch.op = op;
2981.135Slukem
2991.177Spavel    gp_set_solid_source(fg);
3001.135Slukem
3011.135Slukem    gp_set_strides(pitch, pitch);
3021.135Slukem    gp_write_parameters();
3031.135Slukem    return TRUE;
3041.175Shubertf}
3051.135Slukem
3061.156Speterstatic void
3071.135Slukemlx_do_solid(PixmapPtr pxMap, int x1, int y1, int x2, int y2)
3081.135Slukem{
3091.142Sitojun    int bpp = (pxMap->drawable.bitsPerPixel + 7) / 8;
3101.135Slukem    int pitch = exaGetPixmapPitch(pxMap);
3111.135Slukem    unsigned int offset =
3121.135Slukem	exaGetPixmapOffset(pxMap) + (pitch * y1) + (bpp * x1);
3131.135Slukem
3141.135Slukem    gp_declare_blt(lx_fill_flags(x1, y1, x2 - x1, y2 - y1, exaScratch.op));
3151.135Slukem    gp_pattern_fill(offset, x2 - x1, y2 - y1);
3161.135Slukem}
3171.135Slukem
3181.135Slukemstatic Bool
3191.135Slukemlx_prepare_copy(PixmapPtr pxSrc, PixmapPtr pxDst, int dx, int dy,
3201.135Slukem    int alu, Pixel planemask)
3211.135Slukem{
3221.135Slukem    int dpitch = exaGetPixmapPitch(pxDst);
3231.135Slukem    int op = (planemask == ~0U) ? SDfn[alu] : SDfn_PM[alu];
3241.135Slukem
3251.135Slukem    gp_declare_blt(0);
3261.135Slukem    gp_set_bpp(pxDst->drawable.bitsPerPixel);
3271.135Slukem
3281.135Slukem    gp_set_raster_operation(op);
3291.135Slukem
3301.135Slukem    if (planemask != ~0U)
3311.135Slukem	gp_set_solid_pattern(planemask);
3321.135Slukem
3331.135Slukem    exaScratch.srcOffset = exaGetPixmapOffset(pxSrc);
3341.135Slukem    exaScratch.srcPitch = exaGetPixmapPitch(pxSrc);
3351.135Slukem    exaScratch.srcBpp = (pxSrc->drawable.bitsPerPixel + 7) / 8;
3361.135Slukem
3371.135Slukem    exaScratch.op = op;
3381.135Slukem
3391.135Slukem    gp_set_strides(dpitch, exaScratch.srcPitch);
3401.135Slukem    gp_write_parameters();
3411.135Slukem    return TRUE;
3421.135Slukem}
3431.135Slukem
3441.135Slukemstatic void
3451.153Skleinklx_do_copy(PixmapPtr pxDst, int srcX, int srcY,
3461.135Slukem    int dstX, int dstY, int w, int h)
3471.135Slukem{
3481.135Slukem    int dstBpp = (pxDst->drawable.bitsPerPixel + 7) / 8;
3491.135Slukem    int dstPitch = exaGetPixmapPitch(pxDst);
3501.135Slukem    unsigned int srcOffset, dstOffset;
3511.135Slukem    int flags = 0;
3521.135Slukem
3531.135Slukem    gp_declare_blt(lx_copy_flags(srcX, srcY, dstX, dstY, w, h,
3541.135Slukem	    exaScratch.op));
3551.135Slukem
3561.135Slukem    srcOffset = exaScratch.srcOffset + (exaScratch.srcPitch * srcY) +
3571.135Slukem	(exaScratch.srcBpp) * srcX;
3581.210Schristos
359    dstOffset = exaGetPixmapOffset(pxDst) +
360	(dstPitch * dstY) + (dstBpp * dstX);
361
362    if (dstX > srcX)
363	flags |= CIMGP_NEGXDIR;
364
365    if (dstY > srcY)
366	flags |= CIMGP_NEGYDIR;
367
368    gp_screen_to_screen_blt(dstOffset, srcOffset, w, h, flags);
369}
370
371/* Composite operations
372
373These are the simplest - one pass operations - if there is no format or
374mask, the we can make these happen pretty fast
375
376                       Operation  Type  Channel   Alpha
377PictOpClear            0          2     0         3
378PictOpSrc              0          3     0         3
379PictOpDst              0          3     1         3
380PictOpOver             2          0     0         3
381PictOpOverReverse      2          0     1         3
382PictOpIn               0          1     0         3
383PictOpInReverse        0          1     1         3
384PictOpOut              1          0     0         3
385PictOpOutReverse       1          0     1         3
386PictOpAdd              2          2     0         3
387
388The following require multiple passes
389PictOpAtop
390PictOpXor
391*/
392
393struct blend_ops_t
394{
395    int operation;
396    int type;
397    int channel;
398} lx_alpha_ops[] = {
399    /* PictOpClear */
400    {
401    CIMGP_ALPHA_TIMES_A, CIMGP_CONSTANT_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
402    },
403	/* PictOpSrc */
404    {
405    CIMGP_ALPHA_TIMES_A, CIMGP_ALPHA_EQUALS_ONE, CIMGP_CHANNEL_A_SOURCE}, {
406    },
407	/* PictOpDst */
408    {
409    CIMGP_ALPHA_TIMES_A, CIMGP_ALPHA_EQUALS_ONE, CIMGP_CHANNEL_A_DEST}, {
410    },
411	/* PictOpOver */
412    {
413    CIMGP_A_PLUS_BETA_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
414    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
415	/* PictOpOverReverse */
416    {
417    CIMGP_A_PLUS_BETA_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_DEST}, {
418    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
419	/* PictOpIn */
420    {
421    CIMGP_ALPHA_TIMES_A, CIMGP_CHANNEL_B_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
422    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
423	/* PictOpInReverse */
424    {
425    CIMGP_ALPHA_TIMES_A, CIMGP_CHANNEL_B_ALPHA, CIMGP_CHANNEL_A_DEST}, {
426    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
427	/* PictOpOut */
428    {
429    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_DEST}, {
430    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
431	/* PictOpOutReverse */
432    {
433    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
434    CIMGP_ALPHA_TIMES_A, CIMGP_CONVERTED_ALPHA, CIMGP_CHANNEL_A_SOURCE},
435	/* SrcAtop */
436    {
437    CIMGP_ALPHA_TIMES_A, CIMGP_CHANNEL_B_ALPHA, CIMGP_CHANNEL_A_DEST}, {
438    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_SOURCE},
439	/* SrcAtopReverse */
440    {
441    CIMGP_ALPHA_TIMES_A, CIMGP_CHANNEL_B_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
442    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_DEST},
443	/* Xor */
444    {
445    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
446    CIMGP_BETA_TIMES_B, CIMGP_CHANNEL_A_ALPHA, CIMGP_CHANNEL_A_SOURCE},
447	/* PictOpAdd */
448    {
449    CIMGP_A_PLUS_BETA_B, CIMGP_CONSTANT_ALPHA, CIMGP_CHANNEL_A_SOURCE}, {
450    }
451};
452
453#define ARRAY_SIZE(a) (sizeof((a)) / (sizeof(*(a))))
454
455static const struct exa_format_t *
456lx_get_format(PicturePtr p)
457{
458    int i;
459    unsigned int format = p->format;
460
461    for (i = 0; i < ARRAY_SIZE(lx_exa_formats); i++)
462	if (lx_exa_formats[i].exa == format)
463	    return (&lx_exa_formats[i]);
464
465    return NULL;
466}
467
468static Bool
469lx_process_transform(PicturePtr pSrc)
470{
471    PictTransformPtr t = pSrc->transform;
472    xFixed c0 = t->matrix[0][0];
473    xFixed s0 = t->matrix[0][1];
474    xFixed s1 = t->matrix[1][0];
475    xFixed c1 = t->matrix[1][1];
476
477    /* If the transform doesn't have any rotation
478     * or scaling components, then just grab the
479     * translate coordinates */
480
481    if (t->matrix[0][0] == 0 &&
482	t->matrix[0][1] == 0 &&
483	t->matrix[1][0] == 0 && t->matrix[1][1] == 0) {
484	exaScratch.transform = pSrc->transform;
485	return TRUE;
486    }
487
488    /* Otherwise, see if this is a simple
489     * rotate transform - if it isn't, then
490     * we have to punt back to software */
491
492    if (t->matrix[2][2] != F(1))
493	return FALSE;
494
495    /* The rotate matrix looks like this:
496     * [ cos X   -sin x
497     * sin X   cos X ]
498     *
499     * Where X is the angle.  We do a simple
500     * check first - if [0,0] != [1,1], then
501     * scaling was specified too, and we can
502     * bail, and if [0,1] != -[1,1] then this
503     * isn't scaling that we can handle.
504     */
505
506    if ((c0 != c1) || (s0 != -s1))
507	return FALSE;
508
509    /* Now, figure out what angle we want - we
510     * can only accelerate right angle rotations,
511     * so this turns into an easy set of if statements */
512
513    if (c0 == F(1) && s1 == F(0))
514	exaScratch.rotate = RR_Rotate_0;
515    else if (c0 == F(0) && s1 == F(1))
516	exaScratch.rotate = RR_Rotate_90;
517    else if (c0 == F(-1) && s1 == F(0))
518	exaScratch.rotate = RR_Rotate_180;
519    else if (c0 == F(0) && s1 == F(-1))
520	exaScratch.rotate = RR_Rotate_270;
521    else
522	return FALSE;
523
524    exaScratch.transform = pSrc->transform;
525
526    return TRUE;
527}
528
529static Bool
530lx_check_composite(int op, PicturePtr pSrc, PicturePtr pMsk, PicturePtr pDst)
531{
532    GeodeRec *pGeode = GEODEPTR_FROM_PICTURE(pDst);
533    const struct exa_format_t *srcFmt, *dstFmt;
534
535    /* Check that the operation is supported */
536
537    if (op > PictOpAdd)
538	return FALSE;
539
540    /* FIXME: Meet this conditions from the debug for PictOpAdd.
541     * Any Other possibilities? Add a judge for the future supplement */
542    if (op == PictOpAdd && pSrc->format == PICT_a8r8g8b8 &&
543	pDst->format == PICT_a8 && !pMsk)
544	return TRUE;
545
546    if (op == PictOpAdd && pSrc->format == PICT_x8r8g8b8 &&
547	pDst->format == PICT_a8 && !pMsk)
548	return TRUE;
549
550    if (op == PictOpAdd && pSrc->format == PICT_r5g6b5 &&
551	pDst->format == PICT_a8 && !pMsk)
552	return TRUE;
553
554    /* We need the off-screen buffer to do the multipass work */
555
556    if (usesPasses(op)) {
557	if (pGeode->exaBfrOffset == 0 || !pMsk)
558	    return FALSE;
559    }
560
561    if (pMsk && op == PictOpAdd)
562	return FALSE;
563
564    /* Check that the filter matches what we support */
565
566    switch (pSrc->filter) {
567    case PictFilterNearest:
568    case PictFilterFast:
569    case PictFilterGood:
570    case PictFilterBest:
571	break;
572
573    default:
574	/* WE don't support bilinear or convolution filters */
575	return FALSE;
576    }
577
578    /* We don't support any mask transforms */
579    if (pMsk && pMsk->transform)
580	return FALSE;
581
582    /* XXX - don't know if we can do any hwaccel on solid fills or gradient types */
583    if (pSrc->pSourcePict || (pMsk && pMsk->pSourcePict))
584	return FALSE;
585
586    /* Keep an eye out for source rotation transforms - those we can
587     * do something about */
588
589    exaScratch.rotate = RR_Rotate_0;
590    exaScratch.transform = NULL;
591
592    if (pSrc->transform && !lx_process_transform(pSrc))
593	return FALSE;
594
595    /* XXX - I don't understand PICT_a8 enough - so I'm punting */
596
597    if ((op != PictOpAdd) && (pSrc->format == PICT_a8 ||
598	pDst->format == PICT_a8))
599	return FALSE;
600
601    if (pMsk && op != PictOpClear) {
602	struct blend_ops_t *opPtr = &lx_alpha_ops[op * 2];
603	int direction = (opPtr->channel == CIMGP_CHANNEL_A_SOURCE) ? 0 : 1;
604
605	/* Direction 0 indicates src->dst, 1 indiates dst->src */
606	if (((direction == 0) && (pSrc->pDrawable->bitsPerPixel < 16)) ||
607	    ((direction == 1) && (pDst->pDrawable->bitsPerPixel < 16))) {
608	    ErrorF("Can't do mask blending with less then 16bpp\n");
609	    return FALSE;
610	}
611	/* We can only do masks with a 8bpp or a 4bpp mask */
612	if (pMsk->format != PICT_a8 && pMsk->format != PICT_a4)
613	    return FALSE;
614	/* The pSrc should be 1x1 pixel if the pMsk is not zero */
615	if (pSrc->pDrawable->width != 1 || pSrc->pDrawable->height != 1)
616	    return FALSE;
617	/* FIXME: In lx_prepare_composite, there are no variables to record the
618	 * one pixel source's width and height when the mask is not zero.
619	 * That will lead to bigger region to render instead of one pixel in lx
620	 * _do_composite, so we should fallback currently to avoid this */
621	if (!pSrc->repeat)
622	    return FALSE;
623    }
624
625    /* Get the formats for the source and destination */
626
627    if ((srcFmt = lx_get_format(pSrc)) == NULL) {
628	ErrorF("EXA: Invalid source format %x\n", pSrc->format);
629	return FALSE;
630    }
631
632    if ((dstFmt = lx_get_format(pDst)) == NULL) {
633	ErrorF("EXA:  Invalid destination format %x\n", pDst->format);
634	return FALSE;
635    }
636
637    /* Make sure operations that need alpha bits have them */
638    /* If a mask is enabled, the alpha will come from there */
639
640    if (!pMsk && (!srcFmt->alphabits && usesSrcAlpha(op)))
641	return FALSE;
642
643    if (!pMsk && (!dstFmt->alphabits && usesDstAlpha(op)))
644	return FALSE;
645
646    /* FIXME:  See a way around this! */
647
648    if (srcFmt->alphabits == 0 && dstFmt->alphabits != 0)
649	return FALSE;
650
651    /* If this is a rotate operation, then make sure the src and dst
652     * formats are the same */
653
654    if (exaScratch.rotate != RR_Rotate_0 && srcFmt != dstFmt) {
655	ErrorF("EXA: Can't rotate and convert formats at the same time\n");
656	return FALSE;
657    }
658    return TRUE;
659}
660
661static Bool
662lx_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMsk,
663    PicturePtr pDst, PixmapPtr pxSrc, PixmapPtr pxMsk, PixmapPtr pxDst)
664{
665    GeodeRec *pGeode = GEODEPTR_FROM_PIXMAP(pxDst);
666    const struct exa_format_t *srcFmt, *dstFmt;
667
668    /* Get the formats for the source and destination */
669
670    srcFmt = lx_get_format(pSrc);
671    dstFmt = lx_get_format(pDst);
672
673    /* Set up the scratch buffer with the information we need */
674
675    exaScratch.srcFormat = (struct exa_format_t *)srcFmt;
676    exaScratch.dstFormat = (struct exa_format_t *)dstFmt;
677    exaScratch.op = op;
678    exaScratch.repeat = pSrc->repeat;
679    exaScratch.bufferOffset = pGeode->exaBfrOffset;
680
681    if (pMsk && op != PictOpClear) {
682	/* Get the source color */
683	/* If the op is PictOpOver(or PictOpOutReverse, PictOpInReverse,
684	 * PictOpIn, PictOpOut, PictOpOverReverse), we should get the
685	 * ARGB32 source format */
686
687	if ((op == PictOpOver || op == PictOpOutReverse || op ==
688	    PictOpInReverse || op == PictOpIn || op == PictOpOut ||
689	    op == PictOpOverReverse) && (srcFmt->alphabits != 0))
690	    exaScratch.srcColor = exaGetPixmapFirstPixel(pxSrc);
691	else if ((op == PictOpOver || op == PictOpOutReverse || op ==
692	    PictOpInReverse || op == PictOpIn || op == PictOpOut ||
693	    op == PictOpOverReverse) &&
694	    (srcFmt->alphabits == 0))
695	    exaScratch.srcColor = lx_get_source_color(pxSrc, pSrc->format,
696		PICT_a8r8g8b8);
697	else
698	    exaScratch.srcColor = lx_get_source_color(pxSrc, pSrc->format,
699		pDst->format);
700
701	/* Save off the info we need (reuse the source values to save space) */
702
703	exaScratch.type = COMP_TYPE_MASK;
704	exaScratch.maskrepeat = pMsk->repeat;
705
706	exaScratch.srcOffset = exaGetPixmapOffset(pxMsk);
707	exaScratch.srcPitch = exaGetPixmapPitch(pxMsk);
708	exaScratch.srcBpp = (pxMsk->drawable.bitsPerPixel + 7) / 8;
709
710	exaScratch.srcWidth = pMsk->pDrawable->width;
711	exaScratch.srcHeight = pMsk->pDrawable->height;
712
713	/* Flag to indicate if this a 8BPP or a 4BPP mask */
714	exaScratch.fourBpp = (pxMsk->drawable.bitsPerPixel == 4) ? 1 : 0;
715    } else {
716	if (usesPasses(op))
717	    exaScratch.type = COMP_TYPE_TWOPASS;
718	else if (exaScratch.rotate != RR_Rotate_0)
719	    exaScratch.type = COMP_TYPE_ROTATE;
720	else
721	    exaScratch.type = COMP_TYPE_ONEPASS;
722
723	exaScratch.srcOffset = exaGetPixmapOffset(pxSrc);
724	exaScratch.srcPitch = exaGetPixmapPitch(pxSrc);
725	exaScratch.srcBpp = (pxSrc->drawable.bitsPerPixel + 7) / 8;
726
727	exaScratch.srcWidth = pSrc->pDrawable->width;
728	exaScratch.srcHeight = pSrc->pDrawable->height;
729    }
730
731    return TRUE;
732}
733
734static int
735lx_get_bpp_from_format(int format)
736{
737
738    switch (format) {
739    case CIMGP_SOURCE_FMT_8_8_8_8:
740    case CIMGP_SOURCE_FMT_32BPP_BGR:
741	return 32;
742
743    case CIMGP_SOURCE_FMT_4_4_4_4:
744	return 12;
745
746    case CIMGP_SOURCE_FMT_0_5_6_5:
747    case CIMGP_SOURCE_FMT_16BPP_BGR:
748	return 16;
749
750    case CIMGP_SOURCE_FMT_1_5_5_5:
751    case CIMGP_SOURCE_FMT_15BPP_BGR:
752	return 15;
753
754    case CIMGP_SOURCE_FMT_3_3_2:
755	return 8;
756    }
757
758    return 0;
759}
760
761/* BGR needs to be set in the source for it to take - so adjust the source
762 * to enable BGR if the two formats are different, and disable it if they
763 * are the same
764 */
765
766static void
767lx_set_source_format(int srcFormat, int dstFormat)
768{
769    if (!(srcFormat & 0x10) && (dstFormat & 0x10))
770	gp_set_source_format(srcFormat | 0x10);
771    else if ((srcFormat & 0x10) && (dstFormat & 0x10))
772	gp_set_source_format(srcFormat & ~0x10);
773    else
774	gp_set_source_format(srcFormat);
775}
776
777/* If we are converting colors and we need the channel A alpha,
778 * then use a special alpha type that preserves the alpha before
779 * converting the format
780 */
781
782static inline int
783get_op_type(struct exa_format_t *src, struct exa_format_t *dst, int type)
784{
785    return (type == CIMGP_CHANNEL_A_ALPHA &&
786	src->alphabits != dst->alphabits) ? CIMGP_CONVERTED_ALPHA : type;
787}
788
789/* Note - this is the preferred onepass method.  The other will remain
790 * ifdefed out until such time that we are sure its not needed
791 */
792
793#define GetPixmapOffset(px, x, y) ( exaGetPixmapOffset((px)) + \
794  (exaGetPixmapPitch((px)) * (y)) + \
795  ((((px)->drawable.bitsPerPixel + 7) / 8) * (x)) )
796
797#define GetSrcOffset(_x, _y) (exaScratch.srcOffset + ((_y) * exaScratch.srcPitch) + \
798			      ((_x) * exaScratch.srcBpp))
799
800static void
801lx_composite_onepass_add_a8(PixmapPtr pxDst, unsigned long dstOffset,
802    unsigned long srcOffset, int width, int height, int opX, int opY,
803    int srcX, int srcY)
804{
805    struct blend_ops_t *opPtr;
806    int apply, type;
807    int optempX, optempY;
808    int i, j;
809    unsigned long pixmapOffset, pixmapPitch, calBitsPixel;
810
811    pixmapOffset = exaGetPixmapOffset(pxDst);
812    pixmapPitch = exaGetPixmapPitch(pxDst);
813    calBitsPixel = (pxDst->drawable.bitsPerPixel + 7) / 8;
814
815    /* Keep this GP idle judge here. Otherwise the SW method has chance to
816     * conflict with the HW rendering method */
817    gp_wait_until_idle();
818
819    if (opX % 4 == 0 && srcX % 4 == 0) {
820	/* HW acceleration */
821	opPtr = &lx_alpha_ops[exaScratch.op * 2];
822	apply = CIMGP_APPLY_BLEND_TO_ALL;
823	gp_declare_blt(0);
824	gp_set_bpp(32);
825	gp_set_strides(exaGetPixmapPitch(pxDst), exaScratch.srcPitch);
826	gp_set_source_format(8);
827	type = opPtr->type;
828	gp_set_alpha_operation(opPtr->operation, type, opPtr->channel, apply, 0);
829	gp_screen_to_screen_convert(dstOffset, srcOffset, width / 4, height, 0);
830	/* Calculate the pixels in the tail of each line */
831	for (j = srcY; j < srcY + height; j++)
832	    for (i = srcX + (width / 4) * 4; i < srcX + width; i++) {
833		srcOffset = GetSrcOffset(i, j);
834		optempX = opX + i - srcX;
835		optempY = opY + j - srcY;
836		dstOffset = pixmapOffset + pixmapPitch * optempY +
837		    calBitsPixel * optempX;
838		*(cim_fb_ptr + dstOffset) = (*(cim_fb_ptr + srcOffset)
839		    + *(cim_fb_ptr + dstOffset) <= 0xff) ?
840		    *(cim_fb_ptr + srcOffset) + *(cim_fb_ptr + dstOffset) : 0xff;
841	}
842    } else {
843	for (j = srcY; j < srcY + height; j++)
844	    for (i = srcX; i < srcX + width; i++) {
845		srcOffset = GetSrcOffset(i, j);
846		optempX = opX + i - srcX;
847		optempY = opY + j - srcY;
848		dstOffset = pixmapOffset + pixmapPitch * optempY +
849		    calBitsPixel * optempX;
850		*(cim_fb_ptr + dstOffset) = (*(cim_fb_ptr + srcOffset) +
851		    *(cim_fb_ptr + dstOffset) <= 0xff) ?
852		    *(cim_fb_ptr + srcOffset) + *(cim_fb_ptr + dstOffset) : 0xff;
853	}
854    }
855}
856
857static void
858lx_composite_onepass(PixmapPtr pxDst, unsigned long dstOffset,
859    unsigned long srcOffset, int width, int height)
860{
861    struct blend_ops_t *opPtr;
862    int apply, type;
863
864    opPtr = &lx_alpha_ops[exaScratch.op * 2];
865
866    apply = (exaScratch.dstFormat->alphabits != 0 &&
867	exaScratch.srcFormat->alphabits != 0) ?
868	CIMGP_APPLY_BLEND_TO_ALL : CIMGP_APPLY_BLEND_TO_RGB;
869
870    gp_declare_blt(0);
871    gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
872    gp_set_strides(exaGetPixmapPitch(pxDst), exaScratch.srcPitch);
873
874    lx_set_source_format(exaScratch.srcFormat->fmt,
875	exaScratch.dstFormat->fmt);
876
877    type =
878	get_op_type(exaScratch.srcFormat, exaScratch.dstFormat, opPtr->type);
879
880    gp_set_alpha_operation(opPtr->operation, type, opPtr->channel, apply, 0);
881
882    gp_screen_to_screen_convert(dstOffset, srcOffset, width, height, 0);
883}
884
885static void
886lx_composite_all_black(unsigned long srcOffset, int width, int height)
887{
888    struct blend_ops_t *opPtr;
889    int apply, type;
890
891    opPtr = &lx_alpha_ops[0];
892    apply = (exaScratch.srcFormat->alphabits != 0) ?
893	CIMGP_APPLY_BLEND_TO_ALL : CIMGP_APPLY_BLEND_TO_RGB;
894    gp_declare_blt(0);
895    gp_set_bpp(lx_get_bpp_from_format(exaScratch.srcFormat->fmt));
896    gp_set_strides(exaScratch.srcPitch, exaScratch.srcPitch);
897    lx_set_source_format(exaScratch.srcFormat->fmt,
898	exaScratch.srcFormat->fmt);
899    type =
900	get_op_type(exaScratch.srcFormat, exaScratch.srcFormat, opPtr->type);
901    gp_set_alpha_operation(opPtr->operation, type, opPtr->channel, apply, 0);
902    gp_screen_to_screen_convert(srcOffset, srcOffset, width, height, 0);
903
904}
905
906static void
907lx_composite_onepass_special(PixmapPtr pxDst, int width, int height, int opX,
908    int opY, int srcX, int srcY)
909{
910    struct blend_ops_t *opPtr;
911    int apply, type;
912    int opWidth, opHeight;
913    int optempX, optempY;
914    unsigned int dstOffset, srcOffset = 0;
915
916    optempX = opX;
917    optempY = opY;
918
919    /* Make sure srcX and srcY are in source region */
920    srcX = ((srcX % (int)exaScratch.srcWidth) + (int)exaScratch.srcWidth)
921	% (int)exaScratch.srcWidth;
922    srcY = ((srcY % (int)exaScratch.srcHeight) + (int)exaScratch.srcHeight)
923	% (int)exaScratch.srcHeight;
924
925    opWidth = exaScratch.srcWidth - srcX;
926    opHeight = exaScratch.srcHeight -  srcY;
927
928    srcOffset = GetSrcOffset(srcX, srcY);
929
930    if (width < opWidth)
931	opWidth = width;
932    if (height < opHeight)
933	opHeight = height;
934
935    while (1) {
936	gp_wait_until_idle();
937	dstOffset = GetPixmapOffset(pxDst, optempX, optempY);
938	opPtr = &lx_alpha_ops[exaScratch.op * 2];
939	apply = (exaScratch.dstFormat->alphabits != 0 &&
940	    exaScratch.srcFormat->alphabits != 0) ?
941	    CIMGP_APPLY_BLEND_TO_ALL : CIMGP_APPLY_BLEND_TO_RGB;
942	gp_declare_blt(0);
943	gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
944	gp_set_strides(exaGetPixmapPitch(pxDst), exaScratch.srcPitch);
945	lx_set_source_format(exaScratch.srcFormat->fmt,
946	    exaScratch.dstFormat->fmt);
947	type = get_op_type(exaScratch.srcFormat, exaScratch.dstFormat,
948	    opPtr->type);
949	gp_set_alpha_operation(opPtr->operation, type, opPtr->channel,
950	    apply, 0);
951	gp_screen_to_screen_convert(dstOffset, srcOffset, opWidth, opHeight, 0);
952
953	optempX += opWidth;
954	if (optempX >= opX + width) {
955	    optempX = opX;
956	    optempY += opHeight;
957	    if (optempY >= opY + height)
958		break;
959	}
960	if (optempX == opX) {
961	    srcOffset = GetSrcOffset(srcX, 0);
962	    opWidth = ((opX + width) - optempX) > (exaScratch.srcWidth - srcX)
963		? (exaScratch.srcWidth - srcX) : ((opX + width) - optempX);
964	    opHeight = ((opY + height) - optempY) > exaScratch.srcHeight
965		? exaScratch.srcHeight : ((opY + height) - optempY);
966	} else if (optempY == opY) {
967	    srcOffset = GetSrcOffset(0, srcY);
968	    opWidth = ((opX + width) - optempX) > exaScratch.srcWidth
969		? exaScratch.srcWidth : ((opX + width) - optempX);
970	    opHeight = ((opY + height) - optempY) > (exaScratch.srcHeight -
971		srcY) ? (exaScratch.srcHeight - srcY) : ((opY + height) - optempY);
972	} else {
973	    srcOffset = GetSrcOffset(0, 0);
974	    opWidth = ((opX + width) - optempX) > exaScratch.srcWidth
975		? exaScratch.srcWidth : ((opX + width) - optempX);
976	    opHeight = ((opY + height) - optempY) > exaScratch.srcHeight
977		? exaScratch.srcHeight : ((opY + height) - optempY);
978	}
979    }
980}
981
982/* This function handles the multipass blend functions */
983
984static void
985lx_composite_multipass(PixmapPtr pxDst, unsigned long dstOffset,
986    unsigned long srcOffset, int width, int height)
987{
988    struct blend_ops_t *opPtr;
989    int sbpp = lx_get_bpp_from_format(exaScratch.srcFormat->fmt);
990    int apply, type;
991
992    /* Wait until the GP is idle - this will ensure that the scratch buffer
993     * isn't occupied */
994
995    gp_wait_until_idle();
996
997    /* Copy the destination to the scratch buffer, and convert it to the
998     * source format */
999
1000    gp_declare_blt(0);
1001
1002    gp_set_bpp(sbpp);
1003    gp_set_source_format(exaScratch.dstFormat->fmt);
1004    gp_set_raster_operation(0xCC);
1005    gp_set_strides(exaScratch.srcPitch, exaGetPixmapPitch(pxDst));
1006    gp_screen_to_screen_convert(exaScratch.bufferOffset, dstOffset,
1007	width, height, 0);
1008
1009    /* Do the first blend from the source to the scratch buffer */
1010
1011    gp_declare_blt(CIMGP_BLTFLAGS_HAZARD);
1012    gp_set_bpp(sbpp);
1013    gp_set_source_format(exaScratch.srcFormat->fmt);
1014    gp_set_strides(exaScratch.srcPitch, exaScratch.srcPitch);
1015
1016    opPtr = &lx_alpha_ops[exaScratch.op * 2];
1017
1018    apply = (exaScratch.srcFormat->alphabits == 0) ?
1019	CIMGP_APPLY_BLEND_TO_RGB : CIMGP_APPLY_BLEND_TO_ALL;
1020
1021    /* If we're destroying the source alpha bits, then make sure we
1022     * use the alpha before the color conversion
1023     */
1024
1025    gp_screen_to_screen_blt(exaScratch.bufferOffset, srcOffset, width, height,
1026	0);
1027
1028    /* Finally, do the second blend back to the destination */
1029
1030    opPtr = &lx_alpha_ops[(exaScratch.op * 2) + 1];
1031
1032    apply = (exaScratch.dstFormat->alphabits == 0) ?
1033	CIMGP_APPLY_BLEND_TO_RGB : CIMGP_APPLY_BLEND_TO_ALL;
1034
1035    gp_declare_blt(CIMGP_BLTFLAGS_HAZARD);
1036    gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
1037
1038    lx_set_source_format(exaScratch.srcFormat->fmt,
1039	exaScratch.dstFormat->fmt);
1040
1041    type =
1042	get_op_type(exaScratch.srcFormat, exaScratch.dstFormat, opPtr->type);
1043
1044    gp_set_alpha_operation(opPtr->operation, type, opPtr->channel, apply, 0);
1045
1046    gp_screen_to_screen_convert(dstOffset, exaScratch.bufferOffset,
1047	width, height, 0);
1048}
1049
1050static void
1051lx_composite_rotate(PixmapPtr pxDst, unsigned long dstOffset,
1052    unsigned int srcOffset, int width, int height)
1053{
1054    int degrees = 0;
1055
1056    gp_declare_blt(0);
1057    gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
1058    gp_set_strides(exaGetPixmapPitch(pxDst), exaScratch.srcPitch);
1059
1060    lx_set_source_format(exaScratch.srcFormat->fmt,
1061	exaScratch.dstFormat->fmt);
1062
1063    gp_set_raster_operation(0xCC);
1064
1065    /* RandR rotation is counter-clockwise, our rotation
1066     * is clockwise, so adjust the numbers accordingly */
1067
1068    switch (exaScratch.rotate) {
1069    case RR_Rotate_90:
1070	degrees = 270;
1071	break;
1072    case RR_Rotate_180:
1073	degrees = 180;
1074	break;
1075    case RR_Rotate_270:
1076	degrees = 90;
1077	break;
1078    }
1079
1080    gp_rotate_blt(dstOffset, srcOffset, width, height, degrees);
1081}
1082
1083static void
1084lx_do_composite_mask(PixmapPtr pxDst, unsigned long dstOffset,
1085    unsigned int maskOffset, int width, int height)
1086{
1087    struct blend_ops_t *opPtr = &lx_alpha_ops[exaScratch.op * 2];
1088
1089    gp_declare_blt(0);
1090
1091    gp_set_source_format(exaScratch.srcFormat->fmt);
1092    gp_set_strides(exaGetPixmapPitch(pxDst), exaScratch.srcPitch);
1093    gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
1094    gp_set_solid_source(exaScratch.srcColor);
1095
1096    gp_blend_mask_blt(dstOffset, 0, width, height, maskOffset,
1097	exaScratch.srcPitch, opPtr->operation, exaScratch.fourBpp);
1098}
1099
1100static void
1101lx_do_composite_mask_two_pass(PixmapPtr pxDst, unsigned long dstOffset,
1102    unsigned int maskOffset, int width, int height, int opX, int opY,
1103    xPointFixed srcPoint)
1104{
1105    int apply, type;
1106    struct blend_ops_t *opPtr;
1107    int opWidth, opHeight;
1108    int opoverX, opoverY;
1109
1110    opoverX = opX;
1111    opoverY = opY;
1112
1113    /* The rendering region should not be bigger than off-screen memory size
1114     * which equals to DEFAULT_EXA_SCRATCH_BFRSZ. If that happens, we split
1115     * the PictOpOver rendering region into several 256KB chunks. And because
1116     * of the Pitch(stride) parameter, so we use maximun width of mask picture.
1117     * that is to say it is a scanline rendering process */
1118    if (width * height * 4 > DEFAULT_EXA_SCRATCH_BFRSZ) {
1119	opWidth = width;
1120	opHeight = DEFAULT_EXA_SCRATCH_BFRSZ / (width * 4);
1121    } else {
1122	opWidth = width;
1123	opHeight = height;
1124    }
1125
1126    while (1) {
1127
1128	/* Wait until the GP is idle - this will ensure that the scratch buffer
1129	 * isn't occupied */
1130
1131	gp_wait_until_idle();
1132
1133	/* Copy the source to the scratch buffer, and do a src * mask raster
1134	 * operation */
1135
1136	gp_declare_blt(0);
1137	opPtr = &lx_alpha_ops[(exaScratch.op * 2) + 1];
1138	gp_set_source_format(CIMGP_SOURCE_FMT_8_8_8_8);
1139	gp_set_strides(opWidth * 4, exaScratch.srcPitch);
1140	gp_set_bpp(lx_get_bpp_from_format(CIMGP_SOURCE_FMT_8_8_8_8));
1141	gp_set_solid_source(exaScratch.srcColor);
1142	gp_blend_mask_blt(exaScratch.bufferOffset, 0, opWidth, opHeight,
1143	    maskOffset, exaScratch.srcPitch, opPtr->operation,
1144	    exaScratch.fourBpp);
1145
1146	/* Do a relative operation(refer rendercheck ops.c), and copy the
1147	 * operation result to destination */
1148
1149	gp_declare_blt(CIMGP_BLTFLAGS_HAZARD);
1150	opPtr = &lx_alpha_ops[exaScratch.op * 2];
1151	apply = (exaScratch.dstFormat->alphabits == 0) ?
1152	    CIMGP_APPLY_BLEND_TO_RGB : CIMGP_APPLY_BLEND_TO_ALL;
1153	gp_set_source_format(CIMGP_SOURCE_FMT_8_8_8_8);
1154	gp_set_strides(exaGetPixmapPitch(pxDst), opWidth * 4);
1155	gp_set_bpp(lx_get_bpp_from_format(exaScratch.dstFormat->fmt));
1156	type = CIMGP_CONVERTED_ALPHA;
1157	gp_set_alpha_operation(opPtr->operation, type, opPtr->channel,
1158	    apply, 0);
1159	gp_screen_to_screen_convert(dstOffset, exaScratch.bufferOffset,
1160	    opWidth, opHeight, 0);
1161
1162	if (width * height * 4 > DEFAULT_EXA_SCRATCH_BFRSZ) {
1163	    /* Finish the rendering */
1164	    if (opoverY + opHeight == opY + height)
1165		break;
1166	    /* Recalculate the Dest and Mask rendering start point */
1167	    srcPoint.y = srcPoint.y + F(opHeight);
1168	    opoverY = opoverY + opHeight;
1169	    if (opoverY + opHeight > opY + height)
1170		opHeight = opY + height - opoverY;
1171	    dstOffset = GetPixmapOffset(pxDst, opoverX, opoverY);
1172	    maskOffset = GetSrcOffset(I(srcPoint.x), I(srcPoint.y));
1173	} else
1174	    break;
1175    }
1176}
1177
1178static void
1179transformPoint(PictTransform * t, xPointFixed * point)
1180{
1181    PictVector v;
1182
1183    v.vector[0] = point->x;
1184    v.vector[1] = point->y;
1185    v.vector[2] = xFixed1;
1186
1187    if (t != NULL)
1188	PictureTransformPoint(t, &v);
1189
1190    point->x = v.vector[0];
1191    point->y = v.vector[1];
1192}
1193
1194static void
1195lx_do_composite(PixmapPtr pxDst, int srcX, int srcY, int maskX,
1196    int maskY, int dstX, int dstY, int width, int height)
1197{
1198    unsigned int dstOffset, srcOffset = 0;
1199
1200    xPointFixed srcPoint;
1201
1202    int opX = dstX;
1203    int opY = dstY;
1204    int opWidth = width;
1205    int opHeight = height;
1206
1207    /* Transform the source coordinates */
1208
1209    if (exaScratch.type == COMP_TYPE_MASK) {
1210	srcPoint.x = F(maskX);
1211	srcPoint.y = F(maskY);
1212    } else {
1213	srcPoint.x = F(srcX);
1214	srcPoint.y = F(srcY);
1215    }
1216
1217    /* srcX, srcY point to the upper right side of the bounding box
1218     * in the unrotated coordinate space.  Depending on the orientation,
1219     * we have to translate the coordinates to point to the origin of
1220     * the rectangle in the source pixmap */
1221
1222    switch (exaScratch.rotate) {
1223    case RR_Rotate_270:
1224	srcPoint.x += F(width);
1225
1226	opWidth = height;
1227	opHeight = width;
1228	break;
1229
1230    case RR_Rotate_180:
1231	srcPoint.x += F(width);
1232	srcPoint.y += F(height);
1233
1234	srcX += width;
1235	srcY += height;
1236	break;
1237
1238    case RR_Rotate_90:
1239	srcPoint.y += F(height);
1240
1241	opWidth = height;
1242	opHeight = width;
1243	break;
1244    }
1245
1246    transformPoint(exaScratch.transform, &srcPoint);
1247
1248    /* Adjust the point to fit into the pixmap */
1249
1250    if (I(srcPoint.x) < 0) {
1251	opWidth += I(srcPoint.x);
1252	srcPoint.x = F(0);
1253    }
1254
1255    if (I(srcPoint.y) < 0) {
1256	opHeight += I(srcPoint.y);
1257	srcPoint.y = F(0);
1258    }
1259
1260    /* Get the source point offset position */
1261
1262    srcOffset = GetSrcOffset(I(srcPoint.x), I(srcPoint.y));
1263
1264    /* When mask exists, exaScratch.srcWidth and exaScratch.srcHeight are
1265     * the source width and source height; Otherwise, they are mask width
1266     * and mask height */
1267    /* exaScratch.repeat is the source repeat attribute
1268     * exaScratch.maskrepeat is the mask repeat attribute */
1269    /* If type is COMP_TYPE_MASK, maskX and maskY are not zero, we should
1270     * subtract them to do the operation in the correct region */
1271
1272    /* FIXME:  Please add the code to handle the condition when the maskX
1273     * and maskY coordinate are negative or greater than
1274     * exaScratch.srcWidth and exaScratch.srcHeight */
1275
1276    if (exaScratch.type == COMP_TYPE_MASK) {
1277	if ((exaScratch.srcWidth - maskX) < opWidth)
1278	    opWidth = exaScratch.srcWidth - maskX;
1279	if ((exaScratch.srcHeight - maskY) < opHeight)
1280	    opHeight = exaScratch.srcHeight - maskY;
1281    } else {
1282	if (exaScratch.type == COMP_TYPE_ONEPASS) {
1283	    /* This is the condition srcX or/and srcY is/are out of source
1284	     * region */
1285	    if (((srcX >= 0 && srcY >= exaScratch.srcHeight)
1286		|| (srcX >= exaScratch.srcWidth  && srcY >= 0)) &&
1287		(exaScratch.op == PictOpOver || exaScratch.op == PictOpSrc)) {
1288		if (exaScratch.repeat == 1) {
1289		    opWidth = width;
1290		    opHeight = height;
1291		} else {
1292		    if (exaScratch.op == PictOpOver)
1293			return ;
1294		    else {
1295			exaScratch.op = PictOpClear;
1296			opWidth = width;
1297			opHeight = height;
1298		    }
1299		}
1300	    /* This is the condition srcX or/and srcY is/are in the source
1301	     * region */
1302	    } else if (srcX >= 0 && srcY >= 0 &&
1303		(exaScratch.op == PictOpOver || exaScratch.op == PictOpSrc)) {
1304		if (exaScratch.repeat == 1) {
1305		    opWidth = width;
1306		    opHeight = height;
1307		} else {
1308		    if ((exaScratch.srcWidth - srcX) < opWidth)
1309			opWidth = exaScratch.srcWidth - srcX;
1310		    if ((exaScratch.srcHeight - srcY) < opHeight)
1311			opHeight = exaScratch.srcHeight - srcY;
1312		}
1313	    /* This is the condition srcX or/and srcY is/are negative */
1314	    } else if ((srcX < 0 || srcY < 0) &&
1315		(exaScratch.op == PictOpOver || exaScratch.op == PictOpSrc)) {
1316		if (exaScratch.repeat == 1) {
1317		    opWidth = width;
1318		    opHeight = height;
1319		} else {
1320		/* Have not met this condition till now */
1321		    return ;
1322		}
1323	    } else {
1324		if (exaScratch.srcWidth < opWidth)
1325		    opWidth = exaScratch.srcWidth;
1326		if (exaScratch.srcHeight < opHeight)
1327		    opHeight = exaScratch.srcHeight;
1328	    }
1329	} else {
1330	    if (exaScratch.rotate == RR_Rotate_180) {
1331	    } else {
1332		if ((exaScratch.srcWidth - srcY) < opWidth)
1333		    opWidth = exaScratch.srcWidth - srcY;
1334		if ((exaScratch.srcHeight - srcX) < opHeight)
1335		    opHeight = exaScratch.srcHeight - srcX;
1336	    }
1337	}
1338    }
1339
1340    while (1) {
1341
1342	dstOffset = GetPixmapOffset(pxDst, opX, opY);
1343
1344	switch (exaScratch.type) {
1345
1346	case COMP_TYPE_MASK:{
1347	    if (exaScratch.op == PictOpOver || exaScratch.op ==
1348		PictOpOutReverse || exaScratch.op == PictOpInReverse ||
1349		exaScratch.op == PictOpIn || exaScratch.op == PictOpOut ||
1350		exaScratch.op == PictOpOverReverse)
1351		lx_do_composite_mask_two_pass(pxDst, dstOffset,
1352		    srcOffset, opWidth, opHeight, opX, opY, srcPoint);
1353	    else
1354		lx_do_composite_mask(pxDst, dstOffset, srcOffset,
1355		    opWidth, opHeight);
1356	    }
1357	    break;
1358
1359	case COMP_TYPE_ONEPASS:
1360	    if ((exaScratch.op == PictOpOver || exaScratch.op == PictOpSrc)
1361		&& (exaScratch.repeat == 1)) {
1362		lx_composite_onepass_special(pxDst, opWidth, opHeight, opX, opY,
1363		    srcX, srcY);
1364		return ;
1365	    } else if ((exaScratch.op == PictOpAdd) && (exaScratch.srcFormat->exa
1366		== PICT_a8) && (exaScratch.dstFormat->exa == PICT_a8))
1367		lx_composite_onepass_add_a8(pxDst, dstOffset, srcOffset,
1368		    opWidth, opHeight, opX, opY, srcX, srcY);
1369	    else
1370		lx_composite_onepass(pxDst, dstOffset, srcOffset, opWidth,
1371		    opHeight);
1372	    break;
1373
1374	case COMP_TYPE_TWOPASS:
1375	    lx_composite_multipass(pxDst, dstOffset, srcOffset, opWidth,
1376		opHeight);
1377
1378	case COMP_TYPE_ROTATE:
1379	    lx_composite_rotate(pxDst, dstOffset, srcOffset, opWidth,
1380		opHeight);
1381	    break;
1382	}
1383
1384	opX += opWidth;
1385
1386	if (opX >= dstX + width) {
1387	    opX = dstX;
1388	    opY += opHeight;
1389
1390	    if (opY >= dstY + height)
1391		break;
1392	}
1393
1394	/* FIXME:  Please add the code to handle the condition when the maskX
1395	 * and maskY coordinate are negative or greater than
1396	 * exaScratch.srcWidth and exaScratch.srcHeight */
1397
1398	if (exaScratch.type == COMP_TYPE_MASK) {
1399	    opWidth = ((dstX + width) - opX) > (exaScratch.srcWidth - maskX)
1400		? (exaScratch.srcWidth - maskX) : (dstX + width) - opX;
1401	    opHeight = ((dstY + height) - opY) > (exaScratch.srcHeight - maskY)
1402		? (exaScratch.srcHeight - maskY) : (dstY + height) - opY;
1403	    /* All black out of the mask */
1404	    if (!exaScratch.maskrepeat)
1405		exaScratch.srcColor = 0x0;
1406	} else {
1407	    if (exaScratch.type == COMP_TYPE_ONEPASS) {
1408		if (srcX >= 0 && srcY >= 0 && (exaScratch.op == PictOpOver ||
1409		    exaScratch.op == PictOpSrc || exaScratch.op ==
1410		    PictOpClear)) {
1411		    opWidth = ((dstX + width) - opX) > (exaScratch.srcWidth -
1412			srcX) ? (exaScratch.srcWidth - srcX) : (dstX + width)
1413			- opX;
1414		    opHeight = ((dstY + height) - opY) >
1415		    (exaScratch.srcHeight - srcY) ?
1416		    (exaScratch.srcHeight - srcY) : (dstY + height) - opY;
1417		} else {
1418		opWidth = ((dstX + width) - opX) > exaScratch.srcWidth ?
1419		    exaScratch.srcWidth : (dstX + width) - opX;
1420		opHeight = ((dstY + height) - opY) > exaScratch.srcHeight ?
1421		    exaScratch.srcHeight : (dstY + height) - opY;
1422		}
1423	    } else {
1424		opWidth = ((dstX + width) - opX) > (exaScratch.srcWidth - srcY)
1425		    ? (exaScratch.srcWidth - srcY) : (dstX + width) - opX;
1426		opHeight = ((dstY + height) - opY) > (exaScratch.srcHeight - srcX
1427		    ) ? (exaScratch.srcHeight - srcX) : (dstY + height) - opY;
1428	    }
1429	    /* All black out of the source */
1430	    if (!exaScratch.repeat && (exaScratch.type == COMP_TYPE_ONEPASS)) {
1431		    lx_composite_all_black(srcOffset, exaScratch.srcWidth,
1432			exaScratch.srcHeight);
1433	    }
1434	    if (!exaScratch.repeat && (exaScratch.type == COMP_TYPE_ROTATE))
1435		    break;
1436	}
1437    }
1438}
1439
1440static void
1441lx_wait_marker(ScreenPtr PScreen, int marker)
1442{
1443    gp_wait_until_idle();
1444}
1445
1446static void
1447lx_done(PixmapPtr ptr)
1448{
1449}
1450
1451#if 0
1452static void
1453lx_upload_to_screen(PixmapPtr pxDst, int x, int y, int w, int h,
1454    char *src, int src_pitch)
1455{
1456    GeodeRec *pGeode = GEODEPTR_FROM_PIXMAP(pxDst);
1457    int dst_pitch = exaGetPixmapPitch(pxDst);
1458    int cpp = (pxDst->drawable.bitsPerPixel + 7) / 8;
1459
1460    char *dst;
1461    int offset = exaGetPixmapOffset(pxDst);
1462
1463    dst = (char *)(pGeode->FBBase + offset + (y * dst_pitch) + (x * cpp));
1464    int i;
1465
1466    for (i = 0; i < h; i++) {
1467	memcpy(dst, src, w * cpp);
1468	dst += dst_pitch;
1469	src += src_pitch;
1470    }
1471}
1472#endif
1473
1474#if EXA_VERSION_MINOR >= 2
1475
1476static Bool
1477lx_exa_pixmap_is_offscreen(PixmapPtr pPixmap)
1478{
1479    ScrnInfoPtr pScrni = xf86Screens[pPixmap->drawable.pScreen->myNum];
1480    GeodeRec *pGeode = GEODEPTR(pScrni);
1481    void *start = (void *)(pGeode->FBBase);
1482    void *end =
1483	(void *)(pGeode->FBBase + pGeode->offscreenStart +
1484	pGeode->offscreenSize);
1485
1486    if ((void *)pPixmap->devPrivate.ptr >= start &&
1487	(void *)pPixmap->devPrivate.ptr < end)
1488	return TRUE;
1489
1490    return FALSE;
1491}
1492
1493#endif
1494
1495Bool
1496LXExaInit(ScreenPtr pScreen)
1497{
1498    ScrnInfoPtr pScrni = xf86Screens[pScreen->myNum];
1499    GeodeRec *pGeode = GEODEPTR(pScrni);
1500    ExaDriverPtr pExa = pGeode->pExa;
1501
1502    pExa->exa_major = EXA_VERSION_MAJOR;
1503    pExa->exa_minor = EXA_VERSION_MINOR;
1504
1505    pExa->WaitMarker = lx_wait_marker;
1506
1507    pExa->PrepareSolid = lx_prepare_solid;
1508    pExa->Solid = lx_do_solid;
1509    pExa->DoneSolid = lx_done;
1510
1511    pExa->PrepareCopy = lx_prepare_copy;
1512    pExa->Copy = lx_do_copy;
1513    pExa->DoneCopy = lx_done;
1514
1515    /* Composite */
1516    pExa->CheckComposite = lx_check_composite;
1517    pExa->PrepareComposite = lx_prepare_composite;
1518    pExa->Composite = lx_do_composite;
1519    pExa->DoneComposite = lx_done;
1520    //pExa->UploadToScreen =  lx_upload_to_screen;
1521
1522#if EXA_VERSION_MINOR >= 2
1523    pExa->PixmapIsOffscreen = lx_exa_pixmap_is_offscreen;
1524#endif
1525
1526    //pExa->flags = EXA_OFFSCREEN_PIXMAPS;
1527
1528    return exaDriverInit(pScreen, pGeode->pExa);
1529}
1530