132b578d3Smrg/*
232b578d3Smrg * Copyright 2006 George Sapountzis
332b578d3Smrg * All Rights Reserved.
432b578d3Smrg *
532b578d3Smrg * Based on the mach64 DRI and DRM drivers:
632b578d3Smrg * Copyright 2000 Gareth Hughes
732b578d3Smrg * Copyright 2002-2003 Leif Delgass
832b578d3Smrg * All Rights Reserved.
932b578d3Smrg *
1032b578d3Smrg * Based on the ati hw/kdrive driver:
1132b578d3Smrg * Copyright 2003 Eric Anholt, Anders Carlsson
1232b578d3Smrg *
1332b578d3Smrg * Based on the via hw/xfree86 driver:
1432b578d3Smrg * Copyright 2006 Thomas Hellstrom. All Rights Reserved.
1532b578d3Smrg *
1632b578d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
1732b578d3Smrg * copy of this software and associated documentation files (the "Software"),
1832b578d3Smrg * to deal in the Software without restriction, including without limitation
1932b578d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
2032b578d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
2132b578d3Smrg * Software is furnished to do so, subject to the following conditions:
2232b578d3Smrg *
2332b578d3Smrg * The above copyright notice and this permission notice (including the next
2432b578d3Smrg * paragraph) shall be included in all copies or substantial portions of the
2532b578d3Smrg * Software.
2632b578d3Smrg *
2732b578d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2832b578d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2932b578d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
3032b578d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
3132b578d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
3232b578d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3332b578d3Smrg * SOFTWARE.
3432b578d3Smrg *
3532b578d3Smrg * Authors:
3632b578d3Smrg *    George Sapountzis <gsap7@yahoo.gr>
3732b578d3Smrg */
3832b578d3Smrg
3932b578d3Smrg/*
4032b578d3Smrg * Interesting cases for RENDER acceleration:
4132b578d3Smrg *
4232b578d3Smrg * cursor      : ARGB8888 (24x24)   Over
4332b578d3Smrg *               RGB565
4432b578d3Smrg *
4532b578d3Smrg * glyph       : A8       (9x10)    Add
4632b578d3Smrg *               A8       (420x13)
4732b578d3Smrg * glyph set   : ARGB8888 (1x1 R)   In
4832b578d3Smrg *               A8       (420x13)  Over
4932b578d3Smrg *               RGB565
5032b578d3Smrg *
5132b578d3Smrg * shadow      : ARGB8888 (1x1 R)   In
5232b578d3Smrg *               A8       (670x362) Over
5332b578d3Smrg *               RGB565
5432b578d3Smrg * translucent : RGB565   (652x344) In
5532b578d3Smrg *               A8       (1x1 R)   Over
5632b578d3Smrg *               RGB565
5732b578d3Smrg *
5832b578d3Smrg * In all interesting cases one of src/mask is "1x1 R".
5932b578d3Smrg */
6032b578d3Smrg
6132b578d3Smrg/*
6232b578d3Smrg * Assumptions and limitations of mach64 RENDER acceleration:
6332b578d3Smrg *
6432b578d3Smrg * RENDER acceleration is supported for GTPRO and later chips using the 3D
6532b578d3Smrg * triangle setup, i.e. the VERTEX_? registers (see the dri driver). According
6632b578d3Smrg * to atiregs.h, SCALE_3D_CNTL and TEX_?_OFF appear in GT, thus chips as old
6732b578d3Smrg * as GT should be capable of RENDER acceleration, using the S_?_INC, T_?_INC
6832b578d3Smrg * registers for texture mapping (see the directfb driver).
6932b578d3Smrg *
7032b578d3Smrg * GTPRO added a triangle setup engine and multitexturing. However, it seems
7132b578d3Smrg * that none of the 8bpp mach64 formats expands the 8bit value to the alpha
7232b578d3Smrg * channel in texture mapping, RGB8 appears to expand to (I,I,I,0). This makes
7332b578d3Smrg * GTPRO multitexturing unsuitable for emulating the IN operation. Moreover,
7432b578d3Smrg * it seems that GT/GTPRO has a muxltiplexer instead of a blender for computing
7532b578d3Smrg * the final alpha channel which forbids destinations with an alpha channel and
7632b578d3Smrg * generic two-pass compositing.
7732b578d3Smrg *
7832b578d3Smrg * A texture unit combines the fragment color (VERTEX_?_ARGB) coming in from
7932b578d3Smrg * triangle rasterization with the texel from the texture according to the
80d2b10af6Smrg * texture environment (TEX_LIGHT_FCN_). "1x1 R" textures may come in as
81d2b10af6Smrg * fragment colors, eliminating the need for multitexturing in all interesting
8232b578d3Smrg * cases (via also uses this optimization).
8332b578d3Smrg *
8432b578d3Smrg * Texture registers are saved/restored and cached (see atimach64.c). TEX_CNTL
8532b578d3Smrg * cannot be cached because it flushes the texture cache. TEX_?_OFF are also
8632b578d3Smrg * not cached because I am not sure whether writing at some offset register
8732b578d3Smrg * affects the value at another offset.
8832b578d3Smrg *
8932b578d3Smrg * Vertex registers are not saved/restored. This shouldn't be a problem though
9032b578d3Smrg * either for DRI or VT switch because vertex registers are set and used within
91d2b10af6Smrg * a single acceleration hook. Synchronization between the DDX and DRI is based
9232b578d3Smrg * on calling ATIDRISync() at the beginning of each DDX acceleration hook,
9332b578d3Smrg * which suggests the assumption that individual acceleration hooks are not
9432b578d3Smrg * interrupted.
9532b578d3Smrg */
9632b578d3Smrg
9732b578d3Smrg#include <string.h>
9832b578d3Smrg#include <stdio.h>
9932b578d3Smrg
10032b578d3Smrg/*
10132b578d3Smrg * Helper functions copied from exa and via.
10232b578d3Smrg */
10332b578d3Smrg
10432b578d3Smrg#if 0
10532b578d3Smrgstatic void
10632b578d3SmrgMach64ExaCompositePictDesc(PicturePtr pict, char *string, int n)
10732b578d3Smrg{
10832b578d3Smrg    char format[20];
10932b578d3Smrg    char size[20];
11032b578d3Smrg
11132b578d3Smrg    if (!pict) {
11232b578d3Smrg        snprintf(string, n, "None");
11332b578d3Smrg        return;
11432b578d3Smrg    }
11532b578d3Smrg
11632b578d3Smrg    switch (pict->format) {
11732b578d3Smrg    case PICT_x8r8g8b8:
11832b578d3Smrg        snprintf(format, 20, "RGB8888 ");
11932b578d3Smrg        break;
12032b578d3Smrg    case PICT_x8b8g8r8:
12132b578d3Smrg        snprintf(format, 20, "BGR8888 ");
12232b578d3Smrg        break;
12332b578d3Smrg    case PICT_a8r8g8b8:
12432b578d3Smrg        snprintf(format, 20, "ARGB8888");
12532b578d3Smrg        break;
12632b578d3Smrg    case PICT_a8b8g8r8:
12732b578d3Smrg        snprintf(format, 20, "ABGR8888");
12832b578d3Smrg        break;
12932b578d3Smrg    case PICT_r5g6b5:
13032b578d3Smrg        snprintf(format, 20, "RGB565  ");
13132b578d3Smrg        break;
13232b578d3Smrg    case PICT_x1r5g5b5:
13332b578d3Smrg        snprintf(format, 20, "RGB555  ");
13432b578d3Smrg        break;
13532b578d3Smrg    case PICT_a8:
13632b578d3Smrg        snprintf(format, 20, "A8      ");
13732b578d3Smrg        break;
13832b578d3Smrg    case PICT_a1:
13932b578d3Smrg        snprintf(format, 20, "A1      ");
14032b578d3Smrg        break;
14132b578d3Smrg    default:
14232b578d3Smrg        snprintf(format, 20, "0x%x", (int)pict->format);
14332b578d3Smrg        break;
14432b578d3Smrg    }
14532b578d3Smrg
14632b578d3Smrg    snprintf(size, 20, "%dx%d%s%s",
14732b578d3Smrg        pict->pDrawable->width,
14832b578d3Smrg        pict->pDrawable->height,
14932b578d3Smrg        pict->repeat ? " R" : "",
15032b578d3Smrg        pict->componentAlpha ? " C" : ""
15132b578d3Smrg    );
15232b578d3Smrg
15332b578d3Smrg    snprintf(string, n, "%-10p: fmt %s (%s)", (void *)pict->pDrawable, format, size);
15432b578d3Smrg}
15532b578d3Smrg
15632b578d3Smrgstatic void
15732b578d3SmrgMach64ExaPrintComposite(CARD8 op,
15832b578d3Smrg    PicturePtr pSrc, PicturePtr pMask, PicturePtr pDst, char *string)
15932b578d3Smrg{
16032b578d3Smrg    char sop[20];
16132b578d3Smrg    char srcdesc[40], maskdesc[40], dstdesc[40];
16232b578d3Smrg
16332b578d3Smrg    switch (op) {
16432b578d3Smrg    case PictOpSrc:
16532b578d3Smrg        sprintf(sop, "Src");
16632b578d3Smrg        break;
16732b578d3Smrg    case PictOpOver:
16832b578d3Smrg        sprintf(sop, "Over");
16932b578d3Smrg        break;
17032b578d3Smrg    case PictOpInReverse:
17132b578d3Smrg        sprintf(sop, "InR");
17232b578d3Smrg        break;
17332b578d3Smrg    case PictOpOutReverse:
17432b578d3Smrg        sprintf(sop, "OutR");
17532b578d3Smrg        break;
17632b578d3Smrg    case PictOpAdd:
17732b578d3Smrg        sprintf(sop, "Add");
17832b578d3Smrg        break;
17932b578d3Smrg    default:
18032b578d3Smrg        sprintf(sop, "0x%x", (int)op);
18132b578d3Smrg        break;
18232b578d3Smrg    }
18332b578d3Smrg
18432b578d3Smrg    Mach64ExaCompositePictDesc(pSrc, srcdesc, 40);
18532b578d3Smrg    Mach64ExaCompositePictDesc(pMask, maskdesc, 40);
18632b578d3Smrg    Mach64ExaCompositePictDesc(pDst, dstdesc, 40);
18732b578d3Smrg
18832b578d3Smrg    sprintf(string, "op %s, \n"
18932b578d3Smrg        "                src  %s\n"
19032b578d3Smrg        "                mask %s\n"
19132b578d3Smrg        "                dst  %s\n", sop, srcdesc, maskdesc, dstdesc);
19232b578d3Smrg}
19332b578d3Smrg#endif
19432b578d3Smrg
19532b578d3Smrgstatic __inline__ CARD32
19632b578d3SmrgviaBitExpandHelper(CARD32 component, CARD32 bits)
19732b578d3Smrg{
19832b578d3Smrg    CARD32 tmp, mask;
19932b578d3Smrg
20032b578d3Smrg    mask = (1 << (8 - bits)) - 1;
20132b578d3Smrg    tmp = component << (8 - bits);
20232b578d3Smrg    return ((component & 1) ? tmp | mask : tmp);
20332b578d3Smrg}
20432b578d3Smrg
20532b578d3Smrgstatic __inline__ void
20632b578d3SmrgMach64PixelARGB(PixmapPtr pPixmap, CARD32 format, CARD32 *argb)
20732b578d3Smrg{
20832b578d3Smrg    CARD32 pixel;
20932b578d3Smrg    CARD8  comp;
21032b578d3Smrg    int    bits, shift;
21132b578d3Smrg
212e35d4d8eSmrg    pixel = exaGetPixmapFirstPixel(pPixmap);
21332b578d3Smrg
21432b578d3Smrg    /* exaGetRGBAFromPixel()/viaPixelARGB8888() */
21532b578d3Smrg
21632b578d3Smrg    switch (PICT_FORMAT_TYPE(format)) {
21732b578d3Smrg    case PICT_TYPE_A:
21832b578d3Smrg        shift = 0;
21932b578d3Smrg        bits = PICT_FORMAT_A(format);
22032b578d3Smrg        comp = (pixel >> shift) & ((1 << bits) - 1);
22132b578d3Smrg        comp = viaBitExpandHelper(comp, bits);
22232b578d3Smrg        *argb = comp << 24;
22332b578d3Smrg        break;
22432b578d3Smrg    case PICT_TYPE_ARGB:
22532b578d3Smrg        shift = 0;
22632b578d3Smrg        bits = PICT_FORMAT_B(format);
22732b578d3Smrg        comp = (pixel >> shift) & ((1 << bits) - 1);
22832b578d3Smrg        comp = viaBitExpandHelper(comp, bits);
22932b578d3Smrg        *argb = comp;
23032b578d3Smrg
23132b578d3Smrg        shift += bits;
23232b578d3Smrg        bits = PICT_FORMAT_G(format);
23332b578d3Smrg        comp = (pixel >> shift) & ((1 << bits) - 1);
23432b578d3Smrg        comp = viaBitExpandHelper(comp, bits);
23532b578d3Smrg        *argb |= comp << 8;
23632b578d3Smrg
23732b578d3Smrg        shift += bits;
23832b578d3Smrg        bits = PICT_FORMAT_R(format);
23932b578d3Smrg        comp = (pixel >> shift) & ((1 << bits) - 1);
24032b578d3Smrg        comp = viaBitExpandHelper(comp, bits);
24132b578d3Smrg        *argb |= comp << 16;
24232b578d3Smrg
24332b578d3Smrg        shift += bits;
24432b578d3Smrg        bits = PICT_FORMAT_A(format);
24532b578d3Smrg        if (bits) {
24632b578d3Smrg            comp = (pixel >> shift) & ((1 << bits) - 1);
24732b578d3Smrg            comp = viaBitExpandHelper(comp, bits);
24832b578d3Smrg        } else {
24932b578d3Smrg            comp = 0xff;
25032b578d3Smrg        }
25132b578d3Smrg        *argb |= comp << 24;
25232b578d3Smrg        break;
25332b578d3Smrg    case PICT_TYPE_ABGR:
25432b578d3Smrg        break;
25532b578d3Smrg    default:
25632b578d3Smrg        break;
25732b578d3Smrg    }
25832b578d3Smrg}
25932b578d3Smrg
26032b578d3Smrg/*
26132b578d3Smrg * RENDER acceleration for mach64
26232b578d3Smrg */
26332b578d3Smrg
26432b578d3Smrgtypedef struct {
26532b578d3Smrg    Bool supported;
26632b578d3Smrg    CARD32 scale_3d_cntl;
26732b578d3Smrg} Mach64BlendOp;
26832b578d3Smrg
26932b578d3Smrgstatic Mach64BlendOp Mach64BlendOps[] = {
27032b578d3Smrg    /* Clear */
27132b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_ZERO},
27232b578d3Smrg    /* Src */
27332b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_ONE         | MACH64_ALPHA_BLEND_DST_ZERO},
27432b578d3Smrg    /* Dst */
27532b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_ONE},
27632b578d3Smrg    /* Over */
27732b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_ONE         | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
27832b578d3Smrg    /* OverReverse */
27932b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_ONE},
28032b578d3Smrg    /* In */
28132b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_DSTALPHA    | MACH64_ALPHA_BLEND_DST_ZERO},
28232b578d3Smrg    /* InReverse */
28332b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_SRCALPHA},
28432b578d3Smrg    /* Out */
28532b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_ZERO},
28632b578d3Smrg    /* OutReverse */
28732b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
28832b578d3Smrg    /* Atop */
28932b578d3Smrg    {0, MACH64_ALPHA_BLEND_SRC_DSTALPHA    | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
29032b578d3Smrg    /* AtopReverse */
29132b578d3Smrg    {0, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_SRCALPHA},
29232b578d3Smrg    /* Xor */
29332b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
29432b578d3Smrg    /* Add */
29532b578d3Smrg    {1, MACH64_ALPHA_BLEND_SRC_ONE         | MACH64_ALPHA_BLEND_DST_ONE}
29632b578d3Smrg};
29732b578d3Smrg
29832b578d3Smrg#define MACH64_NR_BLEND_OPS \
29932b578d3Smrg    (sizeof(Mach64BlendOps) / sizeof(Mach64BlendOps[0]))
30032b578d3Smrg
30132b578d3Smrgtypedef struct {
30232b578d3Smrg    CARD32 pictFormat;
30332b578d3Smrg    CARD32 dstFormat;
30432b578d3Smrg    CARD32 texFormat;
30532b578d3Smrg} Mach64TexFormat;
30632b578d3Smrg
30732b578d3Smrgstatic Mach64TexFormat Mach64TexFormats[] = {
30832b578d3Smrg    {PICT_a8r8g8b8, -1,                       MACH64_DATATYPE_ARGB8888},
30932b578d3Smrg    {PICT_x8r8g8b8, MACH64_DATATYPE_ARGB8888, MACH64_DATATYPE_ARGB8888},
31032b578d3Smrg    {PICT_a1r5g5b5, -1,                       MACH64_DATATYPE_ARGB1555},
31132b578d3Smrg    {PICT_x1r5g5b5, MACH64_DATATYPE_ARGB1555, MACH64_DATATYPE_ARGB1555},
31232b578d3Smrg    {PICT_r5g6b5,   MACH64_DATATYPE_RGB565,   MACH64_DATATYPE_RGB565  },
31332b578d3Smrg    {PICT_a8,       MACH64_DATATYPE_RGB8,     MACH64_DATATYPE_RGB8    }
31432b578d3Smrg};
31532b578d3Smrg
31632b578d3Smrg#define MACH64_NR_TEX_FORMATS \
31732b578d3Smrg    (sizeof(Mach64TexFormats) / sizeof(Mach64TexFormats[0]))
31832b578d3Smrg
31932b578d3Smrg#define MACH64_PICT_IS_1x1R(_pPict)      \
32032b578d3Smrg    ((_pPict) &&                         \
32132b578d3Smrg     (_pPict)->pDrawable->width == 1 &&  \
32232b578d3Smrg     (_pPict)->pDrawable->height == 1 && \
32332b578d3Smrg     (_pPict)->repeat)
32432b578d3Smrg
32532b578d3Smrg/*
32632b578d3Smrg * CheckComposite hook helper functions.
32732b578d3Smrg */
32832b578d3Smrgstatic __inline__ Bool
32932b578d3SmrgMach64GetOrder(int val, int *shift)
33032b578d3Smrg{
33132b578d3Smrg    *shift = 0;
33232b578d3Smrg
33332b578d3Smrg    while (val > (1 << *shift))
33432b578d3Smrg        (*shift)++;
33532b578d3Smrg
33632b578d3Smrg    return (val == (1 << *shift));
33732b578d3Smrg}
33832b578d3Smrg
33932b578d3Smrgstatic Bool
34032b578d3SmrgMach64CheckTexture(PicturePtr pPict)
34132b578d3Smrg{
342621ff18cSmrg    int h,w;
34332b578d3Smrg    int l2w, l2h, level, i;
34432b578d3Smrg
345621ff18cSmrg    if (pPict->pDrawable == NULL)
346621ff18cSmrg	    return FALSE;
34711b80daeSmacallan    w = pPict->pDrawable->width;
34811b80daeSmacallan    h = pPict->pDrawable->height;
34932b578d3Smrg    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
35032b578d3Smrg        if (Mach64TexFormats[i].pictFormat == pPict->format)
35132b578d3Smrg            break;
35232b578d3Smrg    }
35332b578d3Smrg
35432b578d3Smrg    if (i == MACH64_NR_TEX_FORMATS)
35532b578d3Smrg        MACH64_FALLBACK(("Unsupported picture format 0x%x\n",
35632b578d3Smrg                        (int)pPict->format));
35732b578d3Smrg
35832b578d3Smrg    /* l2w equals l2p (pitch) for all interesting cases (w >= 64) */
35932b578d3Smrg    Mach64GetOrder(w, &l2w);
36032b578d3Smrg    Mach64GetOrder(h, &l2h);
36132b578d3Smrg
36232b578d3Smrg    level = (l2w > l2h) ? l2w : l2h;
36332b578d3Smrg
36432b578d3Smrg    if (level > 10)
36532b578d3Smrg        MACH64_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
36632b578d3Smrg
36732b578d3Smrg    return TRUE;
36832b578d3Smrg}
36932b578d3Smrg
37032b578d3Smrg/*
37132b578d3Smrg * CheckComposite acceleration hook.
37232b578d3Smrg */
37332b578d3SmrgBool
37432b578d3SmrgMach64CheckComposite
37532b578d3Smrg(
37632b578d3Smrg    int        op,
37732b578d3Smrg    PicturePtr pSrcPicture,
37832b578d3Smrg    PicturePtr pMaskPicture,
37932b578d3Smrg    PicturePtr pDstPicture
38032b578d3Smrg)
38132b578d3Smrg{
38232b578d3Smrg    Bool src_solid, mask_solid, mask_comp, op_comp;
38332b578d3Smrg    int i;
38432b578d3Smrg
38532b578d3Smrg    if (op >= MACH64_NR_BLEND_OPS || !Mach64BlendOps[op].supported)
38632b578d3Smrg        return FALSE;
38732b578d3Smrg
38832b578d3Smrg    if (!Mach64CheckTexture(pSrcPicture))
38932b578d3Smrg        return FALSE;
39032b578d3Smrg
39132b578d3Smrg    if (pMaskPicture && !Mach64CheckTexture(pMaskPicture))
39232b578d3Smrg        return FALSE;
39332b578d3Smrg
39432b578d3Smrg    /* Check destination format */
39532b578d3Smrg
39632b578d3Smrg    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
39732b578d3Smrg        if (Mach64TexFormats[i].pictFormat == pDstPicture->format)
39832b578d3Smrg            break;
39932b578d3Smrg    }
40032b578d3Smrg
40132b578d3Smrg    if (i == MACH64_NR_TEX_FORMATS || Mach64TexFormats[i].dstFormat == -1)
40232b578d3Smrg        MACH64_FALLBACK(("Unsupported dst format 0x%x\n",
40332b578d3Smrg                        (int)pDstPicture->format));
40432b578d3Smrg
40532b578d3Smrg    /* Check that A8 src/dst appears only as "A8 ADD A8" */
40632b578d3Smrg
40732b578d3Smrg    if (pDstPicture->format == PICT_a8) {
40832b578d3Smrg        if (pMaskPicture || pSrcPicture->format != PICT_a8 || op != PictOpAdd)
40932b578d3Smrg            MACH64_FALLBACK(("A8 dst with mask or non-A8 src.\n"));
41032b578d3Smrg    }
41132b578d3Smrg
41232b578d3Smrg    if (pDstPicture->format != PICT_a8) {
41332b578d3Smrg        if (pSrcPicture->format == PICT_a8)
41432b578d3Smrg            MACH64_FALLBACK(("A8 src with non-A8 dst.\n"));
41532b578d3Smrg    }
41632b578d3Smrg
41732b578d3Smrg    /* Check that one of src/mask can come in as the fragment color. */
41832b578d3Smrg
41932b578d3Smrg    src_solid = MACH64_PICT_IS_1x1R(pSrcPicture);
42032b578d3Smrg
42132b578d3Smrg    mask_solid = MACH64_PICT_IS_1x1R(pMaskPicture);
42232b578d3Smrg
42332b578d3Smrg    mask_comp = pMaskPicture && pMaskPicture->componentAlpha;
42432b578d3Smrg
42532b578d3Smrg    op_comp = op == PictOpAdd ||
42632b578d3Smrg              op == PictOpInReverse ||
42732b578d3Smrg              op == PictOpOutReverse;
42832b578d3Smrg
42932b578d3Smrg    if (mask_solid && src_solid)
43032b578d3Smrg        MACH64_FALLBACK(("Bad one-pixel IN composite operation.\n"));
43132b578d3Smrg
43232b578d3Smrg    if (pMaskPicture) {
43332b578d3Smrg        if (!mask_solid && !src_solid)
43432b578d3Smrg            MACH64_FALLBACK(("Multitexturing required.\n"));
43532b578d3Smrg
43632b578d3Smrg        if (!mask_solid && !op_comp)
43732b578d3Smrg            MACH64_FALLBACK(("Non-solid mask.\n"));
43832b578d3Smrg
43932b578d3Smrg        if (mask_comp && !src_solid)
44032b578d3Smrg            MACH64_FALLBACK(("Component-alpha mask.\n"));
44132b578d3Smrg
44232b578d3Smrg        if (!mask_comp && pMaskPicture->format != PICT_a8)
44332b578d3Smrg            MACH64_FALLBACK(("Non-A8 mask.\n"));
44432b578d3Smrg
44532b578d3Smrg        if (mask_comp && pMaskPicture->format != PICT_a8r8g8b8)
44632b578d3Smrg            MACH64_FALLBACK(("Non-ARGB mask.\n"));
44732b578d3Smrg    }
44832b578d3Smrg
44932b578d3Smrg    return TRUE;
45032b578d3Smrg}
45132b578d3Smrg
45232b578d3Smrg/*
45332b578d3Smrg * This function setups the fragment color from a solid pixmap in the presence
45432b578d3Smrg * of a mask.
45532b578d3Smrg */
45632b578d3Smrgstatic __inline__ Bool
45732b578d3SmrgMach64PrepareMask
45832b578d3Smrg(
45932b578d3Smrg    Mach64ContextRegs3D *m3d,
46032b578d3Smrg    int        op,
46132b578d3Smrg    PicturePtr pSrcPicture,
46232b578d3Smrg    PicturePtr pMaskPicture,
46332b578d3Smrg    PixmapPtr  pSrc,
46432b578d3Smrg    PixmapPtr  pMask
46532b578d3Smrg)
46632b578d3Smrg{
46732b578d3Smrg    Bool mask_solid, src_solid;
46832b578d3Smrg    CARD32 argb = 0;
46932b578d3Smrg
47032b578d3Smrg    mask_solid = MACH64_PICT_IS_1x1R(pMaskPicture);
47132b578d3Smrg
47232b578d3Smrg    src_solid = MACH64_PICT_IS_1x1R(pSrcPicture);
47332b578d3Smrg
47432b578d3Smrg    if (mask_solid) {
47532b578d3Smrg        Mach64PixelARGB(pMask, pMaskPicture->format, &argb);
47632b578d3Smrg        argb >>= 24;
47732b578d3Smrg        argb &= 0xff;
47832b578d3Smrg
47932b578d3Smrg        m3d->frag_mask = TRUE;
48032b578d3Smrg        m3d->frag_color = (argb << 24) | (argb << 16) | (argb << 8) | argb;
48132b578d3Smrg        return TRUE;
48232b578d3Smrg    }
48332b578d3Smrg
48432b578d3Smrg    if (src_solid) {
48532b578d3Smrg        /* We can only handle cases where either the src color (e.g. ADD) or
48632b578d3Smrg         * the src alpha (e.g. IN_REV, OUT_REV) is used but not both.
48732b578d3Smrg         *
48832b578d3Smrg         * (ARGB8888 IN A8) OVER RGB565 is implemented as:
48932b578d3Smrg         * (ARGB8888 IN A8) ADD ((ARGB8888 IN A8) OUT_REV RGB565).
49032b578d3Smrg         */
49132b578d3Smrg        if (op == PictOpInReverse || op == PictOpOutReverse) {
49232b578d3Smrg            Mach64PixelARGB(pSrc, pSrcPicture->format, &argb);
49332b578d3Smrg            argb >>= 24;
49432b578d3Smrg            argb &= 0xff;
49532b578d3Smrg
49632b578d3Smrg            m3d->frag_src = TRUE;
49732b578d3Smrg            m3d->frag_color = (argb << 24) | (argb << 16) | (argb << 8) | argb;
49832b578d3Smrg            m3d->color_alpha = TRUE;
49932b578d3Smrg            return TRUE;
50032b578d3Smrg        }
50132b578d3Smrg
50232b578d3Smrg        if (op == PictOpAdd) {
50332b578d3Smrg            Mach64PixelARGB(pSrc, pSrcPicture->format, &argb);
50432b578d3Smrg
50532b578d3Smrg            m3d->frag_src = TRUE;
50632b578d3Smrg            m3d->frag_color = argb;
50732b578d3Smrg            return TRUE;
50832b578d3Smrg        }
50932b578d3Smrg    }
51032b578d3Smrg
51132b578d3Smrg    return FALSE;
51232b578d3Smrg}
51332b578d3Smrg
51432b578d3Smrg/*
51532b578d3Smrg * This function setups the texturing and blending environments. It also
51632b578d3Smrg * manipulates blend control for non-solid masks.
51732b578d3Smrg */
51832b578d3Smrgstatic void __inline__
51932b578d3SmrgMach64BlendCntl(Mach64ContextRegs3D *m3d, int op)
52032b578d3Smrg{
52132b578d3Smrg    m3d->scale_3d_cntl |= MACH64_SCALE_PIX_EXPAND_DYNAMIC_RANGE |
52232b578d3Smrg                          MACH64_SCALE_DITHER_2D_TABLE |
52332b578d3Smrg                          MACH64_DITHER_INIT_RESET;
52432b578d3Smrg
52532b578d3Smrg    m3d->scale_3d_cntl |= Mach64BlendOps[op].scale_3d_cntl;
52632b578d3Smrg
52732b578d3Smrg    if (m3d->color_alpha) {
52832b578d3Smrg        /* A8 uses RGB8 which expands to (I,I,I,0). Thus, we use the color
52932b578d3Smrg         * channels instead of the alpha channel as the alpha factor. We also
53032b578d3Smrg         * use the color channels for ARGB8888 masks with component-alpha.
53132b578d3Smrg         */
53232b578d3Smrg        CARD32 Ad = m3d->scale_3d_cntl & MACH64_ALPHA_BLEND_DST_MASK;
53332b578d3Smrg
53432b578d3Smrg        /* InReverse */
53532b578d3Smrg        if (Ad == MACH64_ALPHA_BLEND_DST_SRCALPHA) {
53632b578d3Smrg            m3d->scale_3d_cntl &= ~MACH64_ALPHA_BLEND_DST_MASK;
53732b578d3Smrg            m3d->scale_3d_cntl |=  MACH64_ALPHA_BLEND_DST_SRCCOLOR;
53832b578d3Smrg        }
53932b578d3Smrg
54032b578d3Smrg        /* OutReverse */
54132b578d3Smrg        if (Ad == MACH64_ALPHA_BLEND_DST_INVSRCALPHA) {
54232b578d3Smrg            m3d->scale_3d_cntl &= ~MACH64_ALPHA_BLEND_DST_MASK;
54332b578d3Smrg            m3d->scale_3d_cntl |=  MACH64_ALPHA_BLEND_DST_INVSRCCOLOR;
54432b578d3Smrg        }
54532b578d3Smrg    }
54632b578d3Smrg
54732b578d3Smrg    /* Can't color mask and blend at the same time */
54832b578d3Smrg    m3d->dp_write_mask = 0xffffffff;
54932b578d3Smrg
55032b578d3Smrg    /* Can't fog and blend at the same time */
55132b578d3Smrg    m3d->scale_3d_cntl |= MACH64_ALPHA_FOG_EN_ALPHA;
55232b578d3Smrg
55332b578d3Smrg    /* Enable texture mapping mode */
55432b578d3Smrg    m3d->scale_3d_cntl |= MACH64_SCALE_3D_FCN_TEXTURE;
55532b578d3Smrg    m3d->scale_3d_cntl |= MACH64_MIP_MAP_DISABLE;
55632b578d3Smrg
55732b578d3Smrg    /* Setup the texture environment */
55832b578d3Smrg    m3d->scale_3d_cntl |= MACH64_TEX_LIGHT_FCN_MODULATE;
55932b578d3Smrg
56032b578d3Smrg    /* Initialize texture unit */
56132b578d3Smrg    m3d->tex_cntl |= MACH64_TEX_ST_DIRECT |
56232b578d3Smrg                     MACH64_TEX_SRC_LOCAL |
56332b578d3Smrg                     MACH64_TEX_UNCOMPRESSED |
56432b578d3Smrg                     MACH64_TEX_CACHE_FLUSH |
56532b578d3Smrg                     MACH64_TEX_CACHE_SIZE_4K;
56632b578d3Smrg}
56732b578d3Smrg
56832b578d3Smrg/*
56932b578d3Smrg * This function setups the texture unit.
57032b578d3Smrg */
57132b578d3Smrgstatic Bool
57232b578d3SmrgMach64PrepareTexture(PicturePtr pPict, PixmapPtr pPix)
57332b578d3Smrg{
574e35d4d8eSmrg    ScrnInfoPtr pScreenInfo = xf86ScreenToScrn(pPix->drawable.pScreen);
57532b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
57632b578d3Smrg    Mach64ContextRegs3D *m3d = &pATI->m3d;
57732b578d3Smrg
57832b578d3Smrg    CARD32 texFormat;
57932b578d3Smrg
58032b578d3Smrg    int w = pPict->pDrawable->width;
58132b578d3Smrg    int h = pPict->pDrawable->height;
58232b578d3Smrg    int l2w, l2h, l2p, level, pitch, cpp, i;
58332b578d3Smrg
58432b578d3Smrg    /* Prepare picture format */
58532b578d3Smrg    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
58632b578d3Smrg        if (Mach64TexFormats[i].pictFormat == pPict->format)
58732b578d3Smrg            break;
58832b578d3Smrg    }
5892a51b5beSmrg    if (i == MACH64_NR_TEX_FORMATS)
5902a51b5beSmrg        MACH64_FALLBACK(("Unsupported picture format 0x%x\n",
5912a51b5beSmrg                        (int)pPict->format));
59232b578d3Smrg    texFormat = Mach64TexFormats[i].texFormat;
59332b578d3Smrg
59432b578d3Smrg    /* Prepare picture size */
59532b578d3Smrg    cpp = PICT_FORMAT_BPP(pPict->format) / 8;
59632b578d3Smrg    pitch = exaGetPixmapPitch(pPix) / cpp;
59732b578d3Smrg
59832b578d3Smrg    Mach64GetOrder(w, &l2w);
59932b578d3Smrg    Mach64GetOrder(h, &l2h);
60032b578d3Smrg    Mach64GetOrder(pitch, &l2p);
60132b578d3Smrg
60232b578d3Smrg    if (pPict->repeat && w == 1 && h == 1)
60332b578d3Smrg        l2p = 0;
60432b578d3Smrg    else if (pPict->repeat)
60532b578d3Smrg        MACH64_FALLBACK(("Repeat not supported for w,h != 1,1\n"));
60632b578d3Smrg
60732b578d3Smrg    l2w = l2p;
60832b578d3Smrg
60932b578d3Smrg    level = (l2w > l2h) ? l2w : l2h;
61032b578d3Smrg
61132b578d3Smrg    m3d->tex_width = (1 << l2w);
61232b578d3Smrg    m3d->tex_height = (1 << l2h);
61332b578d3Smrg
61432b578d3Smrg    /* Update hw state */
61532b578d3Smrg    m3d->dp_pix_width |= SetBits(texFormat, DP_SCALE_PIX_WIDTH);
61632b578d3Smrg
61732b578d3Smrg    m3d->tex_size_pitch = (l2w   << 0) |
61832b578d3Smrg                          (level << 4) |
61932b578d3Smrg                          (l2h   << 8);
62032b578d3Smrg
62132b578d3Smrg    m3d->tex_offset = exaGetPixmapOffset(pPix);
62232b578d3Smrg
62332b578d3Smrg    if (PICT_FORMAT_A(pPict->format))
62432b578d3Smrg        m3d->scale_3d_cntl |= MACH64_TEX_MAP_AEN;
62532b578d3Smrg
62632b578d3Smrg    switch (pPict->filter) {
62732b578d3Smrg    case PictFilterNearest:
62832b578d3Smrg        m3d->scale_3d_cntl |= MACH64_TEX_BLEND_FCN_NEAREST;
62932b578d3Smrg        break;
63032b578d3Smrg    case PictFilterBilinear:
63132b578d3Smrg        /* FIXME */
63232b578d3Smrg#if 0
63332b578d3Smrg        m3d->scale_3d_cntl |= MACH64_TEX_BLEND_FCN_LINEAR;
63432b578d3Smrg        m3d->scale_3d_cntl |= MACH64_BILINEAR_TEX_EN;
63532b578d3Smrg#endif
63632b578d3Smrg        MACH64_FALLBACK(("Bilinear filter 0x%x\n", pPict->filter));
63732b578d3Smrg        break;
63832b578d3Smrg    default:
63932b578d3Smrg        MACH64_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
64032b578d3Smrg    }
64132b578d3Smrg
64232b578d3Smrg    m3d->transform = pPict->transform;
64332b578d3Smrg
64432b578d3Smrg    return TRUE;
64532b578d3Smrg}
64632b578d3Smrg
64732b578d3Smrg/*
64832b578d3Smrg * PrepareComposite acceleration hook.
64932b578d3Smrg */
65032b578d3SmrgBool
65132b578d3SmrgMach64PrepareComposite
65232b578d3Smrg(
65332b578d3Smrg    int        op,
65432b578d3Smrg    PicturePtr pSrcPicture,
65532b578d3Smrg    PicturePtr pMaskPicture,
65632b578d3Smrg    PicturePtr pDstPicture,
65732b578d3Smrg    PixmapPtr  pSrc,
65832b578d3Smrg    PixmapPtr  pMask,
65932b578d3Smrg    PixmapPtr  pDst
66032b578d3Smrg)
66132b578d3Smrg{
662e35d4d8eSmrg    ScrnInfoPtr pScreenInfo = xf86ScreenToScrn(pDst->drawable.pScreen);
66332b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
66432b578d3Smrg    Mach64ContextRegs3D *m3d = &pATI->m3d;
66532b578d3Smrg
66632b578d3Smrg    CARD32 dstFormat;
66732b578d3Smrg    int offset, i;
66832b578d3Smrg
66932b578d3Smrg    ATIDRISync(pScreenInfo);
67032b578d3Smrg
67132b578d3Smrg    /* Initialize state */
67232b578d3Smrg    m3d->dp_mix = SetBits(MIX_SRC, DP_BKGD_MIX) |
67332b578d3Smrg                  SetBits(MIX_SRC, DP_FRGD_MIX);
67432b578d3Smrg
67532b578d3Smrg    m3d->dp_src = SetBits(SRC_SCALER_3D, DP_BKGD_SRC) |
67632b578d3Smrg                  SetBits(SRC_SCALER_3D, DP_FRGD_SRC) |
67732b578d3Smrg                  DP_MONO_SRC_ALLONES;
67832b578d3Smrg
67932b578d3Smrg    Mach64GetPixmapOffsetPitch(pDst, &m3d->dst_pitch_offset);
68032b578d3Smrg
68132b578d3Smrg    m3d->scale_3d_cntl = 0;
68232b578d3Smrg    m3d->tex_cntl = 0;
68332b578d3Smrg
68432b578d3Smrg    m3d->frag_src = FALSE;
68532b578d3Smrg    m3d->frag_mask = FALSE;
68632b578d3Smrg    m3d->frag_color = 0xffffffff;
68732b578d3Smrg
68832b578d3Smrg    m3d->color_alpha = FALSE;
68932b578d3Smrg
69032b578d3Smrg    m3d->transform = NULL;
69132b578d3Smrg
69232b578d3Smrg    /* Compute state */
69332b578d3Smrg    if (pMaskPicture && !Mach64PrepareMask(m3d, op, pSrcPicture, pMaskPicture,
69432b578d3Smrg                                           pSrc, pMask))
69532b578d3Smrg        return FALSE;
69632b578d3Smrg
69732b578d3Smrg    Mach64BlendCntl(m3d, op);
69832b578d3Smrg
69932b578d3Smrg    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
70032b578d3Smrg        if (Mach64TexFormats[i].pictFormat == pDstPicture->format)
70132b578d3Smrg            break;
70232b578d3Smrg    }
7032a51b5beSmrg    if (i == MACH64_NR_TEX_FORMATS)
704d2b10af6Smrg        MACH64_FALLBACK(("Unsupported dst format 0x%x\n",
705d2b10af6Smrg                        (int)pDstPicture->format));
70632b578d3Smrg    dstFormat = Mach64TexFormats[i].dstFormat;
70732b578d3Smrg
70832b578d3Smrg    m3d->dp_pix_width = SetBits(dstFormat, DP_DST_PIX_WIDTH) |
70932b578d3Smrg                        SetBits(dstFormat, DP_SRC_PIX_WIDTH) |
71032b578d3Smrg                        SetBits(dstFormat, DP_HOST_PIX_WIDTH);
71132b578d3Smrg
71232b578d3Smrg    if (!m3d->frag_src) {
71332b578d3Smrg        if (!Mach64PrepareTexture(pSrcPicture, pSrc))
71432b578d3Smrg            return FALSE;
71532b578d3Smrg    }
71632b578d3Smrg
71732b578d3Smrg    if (pMaskPicture && !m3d->frag_mask) {
71832b578d3Smrg        if (!Mach64PrepareTexture(pMaskPicture, pMask))
71932b578d3Smrg            return FALSE;
72032b578d3Smrg    }
72132b578d3Smrg
72232b578d3Smrg    offset = TEX_LEVEL(m3d->tex_size_pitch);
72332b578d3Smrg
72432b578d3Smrg    /* Emit state */
72532b578d3Smrg    ATIMach64WaitForFIFO(pATI, 12);
72632b578d3Smrg    outf(DP_SRC, m3d->dp_src);
72732b578d3Smrg    outf(DP_MIX, m3d->dp_mix);
72832b578d3Smrg
72932b578d3Smrg    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
73032b578d3Smrg    outf(DST_CNTL, DST_X_DIR | DST_Y_DIR);
73132b578d3Smrg    outf(DST_OFF_PITCH, m3d->dst_pitch_offset);
73232b578d3Smrg
73332b578d3Smrg    outf(SCALE_3D_CNTL, m3d->scale_3d_cntl);
73432b578d3Smrg    outf(DP_WRITE_MASK, m3d->dp_write_mask);
73532b578d3Smrg    outf(DP_PIX_WIDTH, m3d->dp_pix_width);
73632b578d3Smrg
73732b578d3Smrg    outf(SETUP_CNTL, 0);
73832b578d3Smrg
73932b578d3Smrg    outf(TEX_SIZE_PITCH, m3d->tex_size_pitch);
74032b578d3Smrg    outf(TEX_CNTL, m3d->tex_cntl);
74132b578d3Smrg    outf(TEX_0_OFF + offset, m3d->tex_offset);
74232b578d3Smrg
74332b578d3Smrg    return TRUE;
74432b578d3Smrg}
74532b578d3Smrg
74632b578d3Smrg/*
74732b578d3Smrg * Vertex format, setup and emission.
74832b578d3Smrg */
74932b578d3Smrgtypedef struct {
75032b578d3Smrg    float s0;    /* normalized texture coords */
75132b578d3Smrg    float t0;
75232b578d3Smrg    float x;     /* quarter-pixels */
75332b578d3Smrg    float y;
75432b578d3Smrg    CARD32 argb; /* fragment color */
75532b578d3Smrg} Mach64Vertex;
75632b578d3Smrg
75732b578d3Smrg#define VTX_SET(_v, _col, _dstX, _dstY, _srcX, _dx, _srcY, _dy) \
75832b578d3Smrgdo {                                                            \
75932b578d3Smrg    _v.s0 = ((float)(_srcX) + _dx) / m3d->tex_width;            \
76032b578d3Smrg    _v.t0 = ((float)(_srcY) + _dy) / m3d->tex_height;           \
76132b578d3Smrg    _v.x  = ((float)(_dstX) * 4.0);                             \
76232b578d3Smrg    _v.y  = ((float)(_dstY) * 4.0);                             \
76332b578d3Smrg    _v.argb = _col;                                             \
76432b578d3Smrg} while (0)
76532b578d3Smrg
76632b578d3Smrgstatic __inline__ CARD32
76732b578d3SmrgFVAL(float f)
76832b578d3Smrg{
76932b578d3Smrg    union { float f; CARD32 c; } fc;
77032b578d3Smrg
77132b578d3Smrg    fc.f = f;
77232b578d3Smrg    return fc.c;
77332b578d3Smrg}
77432b578d3Smrg
77532b578d3Smrg#define VTX_OUT(_v, n)                    \
77632b578d3Smrgdo {                                      \
77732b578d3Smrg    float w = 1.0;                        \
77832b578d3Smrg    CARD32 z = 0xffff << 15;              \
77932b578d3Smrg    CARD32 x_y = ((CARD16)_v.x << 16) |   \
78032b578d3Smrg                 ((CARD16)_v.y & 0xffff); \
78132b578d3Smrg                                          \
78232b578d3Smrg    ATIMach64WaitForFIFO(pATI, 6);        \
78332b578d3Smrg    outf(VERTEX_##n##_S, FVAL(_v.s0));    \
78432b578d3Smrg    outf(VERTEX_##n##_T, FVAL(_v.t0));    \
78532b578d3Smrg    outf(VERTEX_##n##_W, FVAL(w));        \
78632b578d3Smrg                                          \
78732b578d3Smrg    outf(VERTEX_##n##_Z, z);              \
78832b578d3Smrg    outf(VERTEX_##n##_ARGB, _v.argb);     \
78932b578d3Smrg    outf(VERTEX_##n##_X_Y, x_y);          \
79032b578d3Smrg} while (0)
79132b578d3Smrg
79232b578d3Smrg/*
79332b578d3Smrg * Composite acceleration hook.
79432b578d3Smrg */
79532b578d3Smrgvoid
79632b578d3SmrgMach64Composite
79732b578d3Smrg(
79832b578d3Smrg    PixmapPtr pDst,
79932b578d3Smrg    int       srcX,
80032b578d3Smrg    int       srcY,
80132b578d3Smrg    int       maskX,
80232b578d3Smrg    int       maskY,
80332b578d3Smrg    int       dstX,
80432b578d3Smrg    int       dstY,
80532b578d3Smrg    int       w,
80632b578d3Smrg    int       h
80732b578d3Smrg)
80832b578d3Smrg{
809e35d4d8eSmrg    ScrnInfoPtr pScreenInfo = xf86ScreenToScrn(pDst->drawable.pScreen);
81032b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
81132b578d3Smrg    Mach64ContextRegs3D *m3d = &pATI->m3d;
81232b578d3Smrg
81332b578d3Smrg    Mach64Vertex v0, v1, v2, v3;
81432b578d3Smrg    float ooa;
81532b578d3Smrg    CARD32 col;
81632b578d3Smrg    PictVector v;
817d2b10af6Smrg    struct vertcoords {
818d2b10af6Smrg        int x;
819d2b10af6Smrg        int y;
820d2b10af6Smrg    } srcvert[4];
82132b578d3Smrg    float dxy = 0.0, dwh = 0.0;
822d2b10af6Smrg    int i;
82332b578d3Smrg
82432b578d3Smrg    ATIDRISync(pScreenInfo);
82532b578d3Smrg
82632b578d3Smrg    /* Disable clipping if it gets in the way */
82732b578d3Smrg    ATIMach64ValidateClip(pATI, dstX, dstX + w - 1, dstY, dstY + h - 1);
82832b578d3Smrg
82932b578d3Smrg    /* Handle solid textures which come in as fragment color */
83032b578d3Smrg    col = m3d->frag_color;
83132b578d3Smrg    if (m3d->frag_src) {
83232b578d3Smrg        srcX = maskX;
83332b578d3Smrg        srcY = maskY;
83432b578d3Smrg    }
83532b578d3Smrg
83632b578d3Smrg    /* Handle transform */
837d2b10af6Smrg    srcvert[0].x = srcX;
838d2b10af6Smrg    srcvert[0].y = srcY;
839d2b10af6Smrg    srcvert[1].x = srcX + w;
840d2b10af6Smrg    srcvert[1].y = srcY;
841d2b10af6Smrg    srcvert[2].x = srcX + w;
842d2b10af6Smrg    srcvert[2].y = srcY + h;
843d2b10af6Smrg    srcvert[3].x = srcX;
844d2b10af6Smrg    srcvert[3].y = srcY + h;
84532b578d3Smrg    if (m3d->transform) {
846d2b10af6Smrg        for (i = 0; i < 4; i++) {
847d2b10af6Smrg            v.vector[0] = IntToxFixed(srcvert[i].x);
848d2b10af6Smrg            v.vector[1] = IntToxFixed(srcvert[i].y);
849d2b10af6Smrg            v.vector[2] = xFixed1;
850d2b10af6Smrg            PictureTransformPoint(m3d->transform, &v);
851d2b10af6Smrg            srcvert[i].x = xFixedToInt(v.vector[0]);
852d2b10af6Smrg            srcvert[i].y = xFixedToInt(v.vector[1]);
853d2b10af6Smrg        }
85432b578d3Smrg
85532b578d3Smrg#if 0
85632b578d3Smrg        /* Bilinear needs manipulation of texture coordinates */
85732b578d3Smrg        if (m3d->scale_3d_cntl & MACH64_BILINEAR_TEX_EN) {
85832b578d3Smrg            dxy =  0.5;
85932b578d3Smrg            dwh = -1.0;
86032b578d3Smrg        }
86132b578d3Smrg#endif
86232b578d3Smrg    }
86332b578d3Smrg
86432b578d3Smrg    /* Create vertices in clock-wise order */
865d2b10af6Smrg    VTX_SET(v0, col, dstX,     dstY,     srcvert[0].x, dxy, srcvert[0].y, dxy);
866d2b10af6Smrg    VTX_SET(v1, col, dstX + w, dstY,     srcvert[1].x, dwh, srcvert[1].y, dxy);
867d2b10af6Smrg    VTX_SET(v2, col, dstX + w, dstY + h, srcvert[2].x, dwh, srcvert[2].y, dwh);
868d2b10af6Smrg    VTX_SET(v3, col, dstX,     dstY + h, srcvert[3].x, dxy, srcvert[3].y, dwh);
86932b578d3Smrg
87032b578d3Smrg    /* Setup upper triangle (v0, v1, v3) */
87132b578d3Smrg    VTX_OUT(v0, 1);
87232b578d3Smrg    VTX_OUT(v1, 2);
87332b578d3Smrg    VTX_OUT(v3, 3);
87432b578d3Smrg
87532b578d3Smrg    ooa = 1.0 / (w * h);
87632b578d3Smrg    outf(ONE_OVER_AREA, FVAL(ooa));
87732b578d3Smrg
87832b578d3Smrg    /* Setup lower triangle (v2, v1, v3) */
87932b578d3Smrg    VTX_OUT(v2, 1);
88032b578d3Smrg
88132b578d3Smrg    ooa = -ooa;
88232b578d3Smrg    outf(ONE_OVER_AREA, FVAL(ooa));
88332b578d3Smrg}
88432b578d3Smrg
88532b578d3Smrg/*
88632b578d3Smrg * DoneComposite acceleration hook.
88732b578d3Smrg */
88832b578d3Smrgvoid
88932b578d3SmrgMach64DoneComposite(PixmapPtr pDst)
89032b578d3Smrg{
891e35d4d8eSmrg    ScrnInfoPtr pScreenInfo = xf86ScreenToScrn(pDst->drawable.pScreen);
89232b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
89332b578d3Smrg
89432b578d3Smrg    ATIDRISync(pScreenInfo);
89532b578d3Smrg
89632b578d3Smrg    outf(SCALE_3D_CNTL, 0);
89732b578d3Smrg}
898