atimach64render.c revision 2a51b5be
1/*
2 * Copyright 2006 George Sapountzis
3 * All Rights Reserved.
4 *
5 * Based on the mach64 DRI and DRM drivers:
6 * Copyright 2000 Gareth Hughes
7 * Copyright 2002-2003 Leif Delgass
8 * All Rights Reserved.
9 *
10 * Based on the ati hw/kdrive driver:
11 * Copyright 2003 Eric Anholt, Anders Carlsson
12 *
13 * Based on the via hw/xfree86 driver:
14 * Copyright 2006 Thomas Hellstrom. All Rights Reserved.
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a
17 * copy of this software and associated documentation files (the "Software"),
18 * to deal in the Software without restriction, including without limitation
19 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
20 * and/or sell copies of the Software, and to permit persons to whom the
21 * Software is furnished to do so, subject to the following conditions:
22 *
23 * The above copyright notice and this permission notice (including the next
24 * paragraph) shall be included in all copies or substantial portions of the
25 * Software.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
30 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
31 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
32 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 * Authors:
36 *    George Sapountzis <gsap7@yahoo.gr>
37 */
38
39/*
40 * Interesting cases for RENDER acceleration:
41 *
42 * cursor      : ARGB8888 (24x24)   Over
43 *               RGB565
44 *
45 * glyph       : A8       (9x10)    Add
46 *               A8       (420x13)
47 * glyph set   : ARGB8888 (1x1 R)   In
48 *               A8       (420x13)  Over
49 *               RGB565
50 *
51 * shadow      : ARGB8888 (1x1 R)   In
52 *               A8       (670x362) Over
53 *               RGB565
54 * translucent : RGB565   (652x344) In
55 *               A8       (1x1 R)   Over
56 *               RGB565
57 *
58 * In all interesting cases one of src/mask is "1x1 R".
59 */
60
61/*
62 * Assumptions and limitations of mach64 RENDER acceleration:
63 *
64 * RENDER acceleration is supported for GTPRO and later chips using the 3D
65 * triangle setup, i.e. the VERTEX_? registers (see the dri driver). According
66 * to atiregs.h, SCALE_3D_CNTL and TEX_?_OFF appear in GT, thus chips as old
67 * as GT should be capable of RENDER acceleration, using the S_?_INC, T_?_INC
68 * registers for texture mapping (see the directfb driver).
69 *
70 * GTPRO added a triangle setup engine and multitexturing. However, it seems
71 * that none of the 8bpp mach64 formats expands the 8bit value to the alpha
72 * channel in texture mapping, RGB8 appears to expand to (I,I,I,0). This makes
73 * GTPRO multitexturing unsuitable for emulating the IN operation. Moreover,
74 * it seems that GT/GTPRO has a muxltiplexer instead of a blender for computing
75 * the final alpha channel which forbids destinations with an alpha channel and
76 * generic two-pass compositing.
77 *
78 * A texture unit combines the fragment color (VERTEX_?_ARGB) coming in from
79 * triangle rasterization with the texel from the texture according to the
80 * texture environment (TEX_LIGHT_FCN_). "1x1 R" textures may come in as frag-
81 * ment colors, eliminating the need for multitexturing in all interesting
82 * cases (via also uses this optimization).
83 *
84 * Texture registers are saved/restored and cached (see atimach64.c). TEX_CNTL
85 * cannot be cached because it flushes the texture cache. TEX_?_OFF are also
86 * not cached because I am not sure whether writing at some offset register
87 * affects the value at another offset.
88 *
89 * Vertex registers are not saved/restored. This shouldn't be a problem though
90 * either for DRI or VT switch because vertex registers are set and used within
91 * a signle acceleration hook. Synchronization between the DDX and DRI is based
92 * on calling ATIDRISync() at the beginning of each DDX acceleration hook,
93 * which suggests the assumption that individual acceleration hooks are not
94 * interrupted.
95 */
96
97#include <string.h>
98#include <stdio.h>
99
100/*
101 * Helper functions copied from exa and via.
102 */
103
104#if 0
105static void
106Mach64ExaCompositePictDesc(PicturePtr pict, char *string, int n)
107{
108    char format[20];
109    char size[20];
110
111    if (!pict) {
112        snprintf(string, n, "None");
113        return;
114    }
115
116    switch (pict->format) {
117    case PICT_x8r8g8b8:
118        snprintf(format, 20, "RGB8888 ");
119        break;
120    case PICT_x8b8g8r8:
121        snprintf(format, 20, "BGR8888 ");
122        break;
123    case PICT_a8r8g8b8:
124        snprintf(format, 20, "ARGB8888");
125        break;
126    case PICT_a8b8g8r8:
127        snprintf(format, 20, "ABGR8888");
128        break;
129    case PICT_r5g6b5:
130        snprintf(format, 20, "RGB565  ");
131        break;
132    case PICT_x1r5g5b5:
133        snprintf(format, 20, "RGB555  ");
134        break;
135    case PICT_a8:
136        snprintf(format, 20, "A8      ");
137        break;
138    case PICT_a1:
139        snprintf(format, 20, "A1      ");
140        break;
141    default:
142        snprintf(format, 20, "0x%x", (int)pict->format);
143        break;
144    }
145
146    snprintf(size, 20, "%dx%d%s%s",
147        pict->pDrawable->width,
148        pict->pDrawable->height,
149        pict->repeat ? " R" : "",
150        pict->componentAlpha ? " C" : ""
151    );
152
153    snprintf(string, n, "%-10p: fmt %s (%s)", (void *)pict->pDrawable, format, size);
154}
155
156static void
157Mach64ExaPrintComposite(CARD8 op,
158    PicturePtr pSrc, PicturePtr pMask, PicturePtr pDst, char *string)
159{
160    char sop[20];
161    char srcdesc[40], maskdesc[40], dstdesc[40];
162
163    switch (op) {
164    case PictOpSrc:
165        sprintf(sop, "Src");
166        break;
167    case PictOpOver:
168        sprintf(sop, "Over");
169        break;
170    case PictOpInReverse:
171        sprintf(sop, "InR");
172        break;
173    case PictOpOutReverse:
174        sprintf(sop, "OutR");
175        break;
176    case PictOpAdd:
177        sprintf(sop, "Add");
178        break;
179    default:
180        sprintf(sop, "0x%x", (int)op);
181        break;
182    }
183
184    Mach64ExaCompositePictDesc(pSrc, srcdesc, 40);
185    Mach64ExaCompositePictDesc(pMask, maskdesc, 40);
186    Mach64ExaCompositePictDesc(pDst, dstdesc, 40);
187
188    sprintf(string, "op %s, \n"
189        "                src  %s\n"
190        "                mask %s\n"
191        "                dst  %s\n", sop, srcdesc, maskdesc, dstdesc);
192}
193#endif
194
195static __inline__ CARD32
196viaBitExpandHelper(CARD32 component, CARD32 bits)
197{
198    CARD32 tmp, mask;
199
200    mask = (1 << (8 - bits)) - 1;
201    tmp = component << (8 - bits);
202    return ((component & 1) ? tmp | mask : tmp);
203}
204
205static __inline__ void
206Mach64PixelARGB(PixmapPtr pPixmap, CARD32 format, CARD32 *argb)
207{
208    CARD32 pixel;
209    CARD8  comp;
210    int    bits, shift;
211
212    /* Ensure that texture drawing has completed. */
213    exaWaitSync(pPixmap->drawable.pScreen);
214
215    /* exaGetPixmapFirstPixel() */
216
217    switch (pPixmap->drawable.bitsPerPixel) {
218    case 32:
219        pixel = *(CARD32 *)(pPixmap->devPrivate.ptr);
220        break;
221    case 16:
222        pixel = *(CARD16 *)(pPixmap->devPrivate.ptr);
223        break;
224    default:
225        pixel = *(CARD8 *)(pPixmap->devPrivate.ptr);
226        break;
227    }
228
229    /* exaGetRGBAFromPixel()/viaPixelARGB8888() */
230
231    switch (PICT_FORMAT_TYPE(format)) {
232    case PICT_TYPE_A:
233        shift = 0;
234        bits = PICT_FORMAT_A(format);
235        comp = (pixel >> shift) & ((1 << bits) - 1);
236        comp = viaBitExpandHelper(comp, bits);
237        *argb = comp << 24;
238        break;
239    case PICT_TYPE_ARGB:
240        shift = 0;
241        bits = PICT_FORMAT_B(format);
242        comp = (pixel >> shift) & ((1 << bits) - 1);
243        comp = viaBitExpandHelper(comp, bits);
244        *argb = comp;
245
246        shift += bits;
247        bits = PICT_FORMAT_G(format);
248        comp = (pixel >> shift) & ((1 << bits) - 1);
249        comp = viaBitExpandHelper(comp, bits);
250        *argb |= comp << 8;
251
252        shift += bits;
253        bits = PICT_FORMAT_R(format);
254        comp = (pixel >> shift) & ((1 << bits) - 1);
255        comp = viaBitExpandHelper(comp, bits);
256        *argb |= comp << 16;
257
258        shift += bits;
259        bits = PICT_FORMAT_A(format);
260        if (bits) {
261            comp = (pixel >> shift) & ((1 << bits) - 1);
262            comp = viaBitExpandHelper(comp, bits);
263        } else {
264            comp = 0xff;
265        }
266        *argb |= comp << 24;
267        break;
268    case PICT_TYPE_ABGR:
269        break;
270    default:
271        break;
272    }
273}
274
275/*
276 * RENDER acceleration for mach64
277 */
278
279typedef struct {
280    Bool supported;
281    CARD32 scale_3d_cntl;
282} Mach64BlendOp;
283
284static Mach64BlendOp Mach64BlendOps[] = {
285    /* Clear */
286    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_ZERO},
287    /* Src */
288    {1, MACH64_ALPHA_BLEND_SRC_ONE         | MACH64_ALPHA_BLEND_DST_ZERO},
289    /* Dst */
290    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_ONE},
291    /* Over */
292    {1, MACH64_ALPHA_BLEND_SRC_ONE         | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
293    /* OverReverse */
294    {1, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_ONE},
295    /* In */
296    {1, MACH64_ALPHA_BLEND_SRC_DSTALPHA    | MACH64_ALPHA_BLEND_DST_ZERO},
297    /* InReverse */
298    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_SRCALPHA},
299    /* Out */
300    {1, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_ZERO},
301    /* OutReverse */
302    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
303    /* Atop */
304    {0, MACH64_ALPHA_BLEND_SRC_DSTALPHA    | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
305    /* AtopReverse */
306    {0, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_SRCALPHA},
307    /* Xor */
308    {1, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
309    /* Add */
310    {1, MACH64_ALPHA_BLEND_SRC_ONE         | MACH64_ALPHA_BLEND_DST_ONE}
311};
312
313#define MACH64_NR_BLEND_OPS \
314    (sizeof(Mach64BlendOps) / sizeof(Mach64BlendOps[0]))
315
316typedef struct {
317    CARD32 pictFormat;
318    CARD32 dstFormat;
319    CARD32 texFormat;
320} Mach64TexFormat;
321
322static Mach64TexFormat Mach64TexFormats[] = {
323    {PICT_a8r8g8b8, -1,                       MACH64_DATATYPE_ARGB8888},
324    {PICT_x8r8g8b8, MACH64_DATATYPE_ARGB8888, MACH64_DATATYPE_ARGB8888},
325    {PICT_a1r5g5b5, -1,                       MACH64_DATATYPE_ARGB1555},
326    {PICT_x1r5g5b5, MACH64_DATATYPE_ARGB1555, MACH64_DATATYPE_ARGB1555},
327    {PICT_r5g6b5,   MACH64_DATATYPE_RGB565,   MACH64_DATATYPE_RGB565  },
328    {PICT_a8,       MACH64_DATATYPE_RGB8,     MACH64_DATATYPE_RGB8    }
329};
330
331#define MACH64_NR_TEX_FORMATS \
332    (sizeof(Mach64TexFormats) / sizeof(Mach64TexFormats[0]))
333
334#define MACH64_PICT_IS_1x1R(_pPict)      \
335    ((_pPict) &&                         \
336     (_pPict)->pDrawable->width == 1 &&  \
337     (_pPict)->pDrawable->height == 1 && \
338     (_pPict)->repeat)
339
340/*
341 * CheckComposite hook helper functions.
342 */
343static __inline__ Bool
344Mach64GetOrder(int val, int *shift)
345{
346    *shift = 0;
347
348    while (val > (1 << *shift))
349        (*shift)++;
350
351    return (val == (1 << *shift));
352}
353
354static Bool
355Mach64CheckTexture(PicturePtr pPict)
356{
357    int w = pPict->pDrawable->width;
358    int h = pPict->pDrawable->height;
359    int l2w, l2h, level, i;
360
361    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
362        if (Mach64TexFormats[i].pictFormat == pPict->format)
363            break;
364    }
365
366    if (i == MACH64_NR_TEX_FORMATS)
367        MACH64_FALLBACK(("Unsupported picture format 0x%x\n",
368                        (int)pPict->format));
369
370    /* l2w equals l2p (pitch) for all interesting cases (w >= 64) */
371    Mach64GetOrder(w, &l2w);
372    Mach64GetOrder(h, &l2h);
373
374    level = (l2w > l2h) ? l2w : l2h;
375
376    if (level > 10)
377        MACH64_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
378
379    return TRUE;
380}
381
382/*
383 * CheckComposite acceleration hook.
384 */
385Bool
386Mach64CheckComposite
387(
388    int        op,
389    PicturePtr pSrcPicture,
390    PicturePtr pMaskPicture,
391    PicturePtr pDstPicture
392)
393{
394    Bool src_solid, mask_solid, mask_comp, op_comp;
395    int i;
396
397    if (op >= MACH64_NR_BLEND_OPS || !Mach64BlendOps[op].supported)
398        return FALSE;
399
400    if (!Mach64CheckTexture(pSrcPicture))
401        return FALSE;
402
403    if (pMaskPicture && !Mach64CheckTexture(pMaskPicture))
404        return FALSE;
405
406    /* Check destination format */
407
408    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
409        if (Mach64TexFormats[i].pictFormat == pDstPicture->format)
410            break;
411    }
412
413    if (i == MACH64_NR_TEX_FORMATS || Mach64TexFormats[i].dstFormat == -1)
414        MACH64_FALLBACK(("Unsupported dst format 0x%x\n",
415                        (int)pDstPicture->format));
416
417    /* Check that A8 src/dst appears only as "A8 ADD A8" */
418
419    if (pDstPicture->format == PICT_a8) {
420        if (pMaskPicture || pSrcPicture->format != PICT_a8 || op != PictOpAdd)
421            MACH64_FALLBACK(("A8 dst with mask or non-A8 src.\n"));
422    }
423
424    if (pDstPicture->format != PICT_a8) {
425        if (pSrcPicture->format == PICT_a8)
426            MACH64_FALLBACK(("A8 src with non-A8 dst.\n"));
427    }
428
429    /* Check that one of src/mask can come in as the fragment color. */
430
431    src_solid = MACH64_PICT_IS_1x1R(pSrcPicture);
432
433    mask_solid = MACH64_PICT_IS_1x1R(pMaskPicture);
434
435    mask_comp = pMaskPicture && pMaskPicture->componentAlpha;
436
437    op_comp = op == PictOpAdd ||
438              op == PictOpInReverse ||
439              op == PictOpOutReverse;
440
441    if (mask_solid && src_solid)
442        MACH64_FALLBACK(("Bad one-pixel IN composite operation.\n"));
443
444    if (pMaskPicture) {
445        if (!mask_solid && !src_solid)
446            MACH64_FALLBACK(("Multitexturing required.\n"));
447
448        if (!mask_solid && !op_comp)
449            MACH64_FALLBACK(("Non-solid mask.\n"));
450
451        if (mask_comp && !src_solid)
452            MACH64_FALLBACK(("Component-alpha mask.\n"));
453
454        if (!mask_comp && pMaskPicture->format != PICT_a8)
455            MACH64_FALLBACK(("Non-A8 mask.\n"));
456
457        if (mask_comp && pMaskPicture->format != PICT_a8r8g8b8)
458            MACH64_FALLBACK(("Non-ARGB mask.\n"));
459    }
460
461    return TRUE;
462}
463
464/*
465 * This function setups the fragment color from a solid pixmap in the presence
466 * of a mask.
467 */
468static __inline__ Bool
469Mach64PrepareMask
470(
471    Mach64ContextRegs3D *m3d,
472    int        op,
473    PicturePtr pSrcPicture,
474    PicturePtr pMaskPicture,
475    PixmapPtr  pSrc,
476    PixmapPtr  pMask
477)
478{
479    Bool mask_solid, src_solid;
480    CARD32 argb = 0;
481
482    mask_solid = MACH64_PICT_IS_1x1R(pMaskPicture);
483
484    src_solid = MACH64_PICT_IS_1x1R(pSrcPicture);
485
486    if (mask_solid) {
487        Mach64PixelARGB(pMask, pMaskPicture->format, &argb);
488        argb >>= 24;
489        argb &= 0xff;
490
491        m3d->frag_mask = TRUE;
492        m3d->frag_color = (argb << 24) | (argb << 16) | (argb << 8) | argb;
493        return TRUE;
494    }
495
496    if (src_solid) {
497        /* We can only handle cases where either the src color (e.g. ADD) or
498         * the src alpha (e.g. IN_REV, OUT_REV) is used but not both.
499         *
500         * (ARGB8888 IN A8) OVER RGB565 is implemented as:
501         * (ARGB8888 IN A8) ADD ((ARGB8888 IN A8) OUT_REV RGB565).
502         */
503        if (op == PictOpInReverse || op == PictOpOutReverse) {
504            Mach64PixelARGB(pSrc, pSrcPicture->format, &argb);
505            argb >>= 24;
506            argb &= 0xff;
507
508            m3d->frag_src = TRUE;
509            m3d->frag_color = (argb << 24) | (argb << 16) | (argb << 8) | argb;
510            m3d->color_alpha = TRUE;
511            return TRUE;
512        }
513
514        if (op == PictOpAdd) {
515            Mach64PixelARGB(pSrc, pSrcPicture->format, &argb);
516
517            m3d->frag_src = TRUE;
518            m3d->frag_color = argb;
519            return TRUE;
520        }
521    }
522
523    return FALSE;
524}
525
526/*
527 * This function setups the texturing and blending environments. It also
528 * manipulates blend control for non-solid masks.
529 */
530static void __inline__
531Mach64BlendCntl(Mach64ContextRegs3D *m3d, int op)
532{
533    m3d->scale_3d_cntl |= MACH64_SCALE_PIX_EXPAND_DYNAMIC_RANGE |
534                          MACH64_SCALE_DITHER_2D_TABLE |
535                          MACH64_DITHER_INIT_RESET;
536
537    m3d->scale_3d_cntl |= Mach64BlendOps[op].scale_3d_cntl;
538
539    if (m3d->color_alpha) {
540        /* A8 uses RGB8 which expands to (I,I,I,0). Thus, we use the color
541         * channels instead of the alpha channel as the alpha factor. We also
542         * use the color channels for ARGB8888 masks with component-alpha.
543         */
544        CARD32 Ad = m3d->scale_3d_cntl & MACH64_ALPHA_BLEND_DST_MASK;
545
546        /* InReverse */
547        if (Ad == MACH64_ALPHA_BLEND_DST_SRCALPHA) {
548            m3d->scale_3d_cntl &= ~MACH64_ALPHA_BLEND_DST_MASK;
549            m3d->scale_3d_cntl |=  MACH64_ALPHA_BLEND_DST_SRCCOLOR;
550        }
551
552        /* OutReverse */
553        if (Ad == MACH64_ALPHA_BLEND_DST_INVSRCALPHA) {
554            m3d->scale_3d_cntl &= ~MACH64_ALPHA_BLEND_DST_MASK;
555            m3d->scale_3d_cntl |=  MACH64_ALPHA_BLEND_DST_INVSRCCOLOR;
556        }
557    }
558
559    /* Can't color mask and blend at the same time */
560    m3d->dp_write_mask = 0xffffffff;
561
562    /* Can't fog and blend at the same time */
563    m3d->scale_3d_cntl |= MACH64_ALPHA_FOG_EN_ALPHA;
564
565    /* Enable texture mapping mode */
566    m3d->scale_3d_cntl |= MACH64_SCALE_3D_FCN_TEXTURE;
567    m3d->scale_3d_cntl |= MACH64_MIP_MAP_DISABLE;
568
569    /* Setup the texture environment */
570    m3d->scale_3d_cntl |= MACH64_TEX_LIGHT_FCN_MODULATE;
571
572    /* Initialize texture unit */
573    m3d->tex_cntl |= MACH64_TEX_ST_DIRECT |
574                     MACH64_TEX_SRC_LOCAL |
575                     MACH64_TEX_UNCOMPRESSED |
576                     MACH64_TEX_CACHE_FLUSH |
577                     MACH64_TEX_CACHE_SIZE_4K;
578}
579
580/*
581 * This function setups the texture unit.
582 */
583static Bool
584Mach64PrepareTexture(PicturePtr pPict, PixmapPtr pPix)
585{
586    ScrnInfoPtr pScreenInfo = xf86Screens[pPix->drawable.pScreen->myNum];
587    ATIPtr pATI = ATIPTR(pScreenInfo);
588    Mach64ContextRegs3D *m3d = &pATI->m3d;
589
590    CARD32 texFormat;
591
592    int w = pPict->pDrawable->width;
593    int h = pPict->pDrawable->height;
594    int l2w, l2h, l2p, level, pitch, cpp, i;
595
596    /* Prepare picture format */
597    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
598        if (Mach64TexFormats[i].pictFormat == pPict->format)
599            break;
600    }
601    if (i == MACH64_NR_TEX_FORMATS)
602        MACH64_FALLBACK(("Unsupported picture format 0x%x\n",
603                        (int)pPict->format));
604    texFormat = Mach64TexFormats[i].texFormat;
605
606    /* Prepare picture size */
607    cpp = PICT_FORMAT_BPP(pPict->format) / 8;
608    pitch = exaGetPixmapPitch(pPix) / cpp;
609
610    Mach64GetOrder(w, &l2w);
611    Mach64GetOrder(h, &l2h);
612    Mach64GetOrder(pitch, &l2p);
613
614    if (pPict->repeat && w == 1 && h == 1)
615        l2p = 0;
616    else if (pPict->repeat)
617        MACH64_FALLBACK(("Repeat not supported for w,h != 1,1\n"));
618
619    l2w = l2p;
620
621    level = (l2w > l2h) ? l2w : l2h;
622
623    m3d->tex_width = (1 << l2w);
624    m3d->tex_height = (1 << l2h);
625
626    /* Update hw state */
627    m3d->dp_pix_width |= SetBits(texFormat, DP_SCALE_PIX_WIDTH);
628
629    m3d->tex_size_pitch = (l2w   << 0) |
630                          (level << 4) |
631                          (l2h   << 8);
632
633    m3d->tex_offset = exaGetPixmapOffset(pPix);
634
635    if (PICT_FORMAT_A(pPict->format))
636        m3d->scale_3d_cntl |= MACH64_TEX_MAP_AEN;
637
638    switch (pPict->filter) {
639    case PictFilterNearest:
640        m3d->scale_3d_cntl |= MACH64_TEX_BLEND_FCN_NEAREST;
641        break;
642    case PictFilterBilinear:
643        /* FIXME */
644#if 0
645        m3d->scale_3d_cntl |= MACH64_TEX_BLEND_FCN_LINEAR;
646        m3d->scale_3d_cntl |= MACH64_BILINEAR_TEX_EN;
647#endif
648        MACH64_FALLBACK(("Bilinear filter 0x%x\n", pPict->filter));
649        break;
650    default:
651        MACH64_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
652    }
653
654    m3d->transform = pPict->transform;
655
656    return TRUE;
657}
658
659/*
660 * PrepareComposite acceleration hook.
661 */
662Bool
663Mach64PrepareComposite
664(
665    int        op,
666    PicturePtr pSrcPicture,
667    PicturePtr pMaskPicture,
668    PicturePtr pDstPicture,
669    PixmapPtr  pSrc,
670    PixmapPtr  pMask,
671    PixmapPtr  pDst
672)
673{
674    ScrnInfoPtr pScreenInfo = xf86Screens[pDst->drawable.pScreen->myNum];
675    ATIPtr pATI = ATIPTR(pScreenInfo);
676    Mach64ContextRegs3D *m3d = &pATI->m3d;
677
678    CARD32 dstFormat;
679    int offset, i;
680
681    ATIDRISync(pScreenInfo);
682
683    /* Initialize state */
684    m3d->dp_mix = SetBits(MIX_SRC, DP_BKGD_MIX) |
685                  SetBits(MIX_SRC, DP_FRGD_MIX);
686
687    m3d->dp_src = SetBits(SRC_SCALER_3D, DP_BKGD_SRC) |
688                  SetBits(SRC_SCALER_3D, DP_FRGD_SRC) |
689                  DP_MONO_SRC_ALLONES;
690
691    Mach64GetPixmapOffsetPitch(pDst, &m3d->dst_pitch_offset);
692
693    m3d->scale_3d_cntl = 0;
694    m3d->tex_cntl = 0;
695
696    m3d->frag_src = FALSE;
697    m3d->frag_mask = FALSE;
698    m3d->frag_color = 0xffffffff;
699
700    m3d->color_alpha = FALSE;
701
702    m3d->transform = NULL;
703
704    /* Compute state */
705    if (pMaskPicture && !Mach64PrepareMask(m3d, op, pSrcPicture, pMaskPicture,
706                                           pSrc, pMask))
707        return FALSE;
708
709    Mach64BlendCntl(m3d, op);
710
711    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
712        if (Mach64TexFormats[i].pictFormat == pDstPicture->format)
713            break;
714    }
715    if (i == MACH64_NR_TEX_FORMATS)
716        MACH64_FALLBACK(("Unsupported picture format 0x%x\n",
717                        (int)pPict->format));
718    dstFormat = Mach64TexFormats[i].dstFormat;
719
720    m3d->dp_pix_width = SetBits(dstFormat, DP_DST_PIX_WIDTH) |
721                        SetBits(dstFormat, DP_SRC_PIX_WIDTH) |
722                        SetBits(dstFormat, DP_HOST_PIX_WIDTH);
723
724    if (!m3d->frag_src) {
725        if (!Mach64PrepareTexture(pSrcPicture, pSrc))
726            return FALSE;
727    }
728
729    if (pMaskPicture && !m3d->frag_mask) {
730        if (!Mach64PrepareTexture(pMaskPicture, pMask))
731            return FALSE;
732    }
733
734    offset = TEX_LEVEL(m3d->tex_size_pitch);
735
736    /* Emit state */
737    ATIMach64WaitForFIFO(pATI, 12);
738    outf(DP_SRC, m3d->dp_src);
739    outf(DP_MIX, m3d->dp_mix);
740
741    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
742    outf(DST_CNTL, DST_X_DIR | DST_Y_DIR);
743    outf(DST_OFF_PITCH, m3d->dst_pitch_offset);
744
745    outf(SCALE_3D_CNTL, m3d->scale_3d_cntl);
746    outf(DP_WRITE_MASK, m3d->dp_write_mask);
747    outf(DP_PIX_WIDTH, m3d->dp_pix_width);
748
749    outf(SETUP_CNTL, 0);
750
751    outf(TEX_SIZE_PITCH, m3d->tex_size_pitch);
752    outf(TEX_CNTL, m3d->tex_cntl);
753    outf(TEX_0_OFF + offset, m3d->tex_offset);
754
755    return TRUE;
756}
757
758/*
759 * Vertex format, setup and emission.
760 */
761typedef struct {
762    float s0;    /* normalized texture coords */
763    float t0;
764    float x;     /* quarter-pixels */
765    float y;
766    CARD32 argb; /* fragment color */
767} Mach64Vertex;
768
769#define VTX_SET(_v, _col, _dstX, _dstY, _srcX, _dx, _srcY, _dy) \
770do {                                                            \
771    _v.s0 = ((float)(_srcX) + _dx) / m3d->tex_width;            \
772    _v.t0 = ((float)(_srcY) + _dy) / m3d->tex_height;           \
773    _v.x  = ((float)(_dstX) * 4.0);                             \
774    _v.y  = ((float)(_dstY) * 4.0);                             \
775    _v.argb = _col;                                             \
776} while (0)
777
778static __inline__ CARD32
779FVAL(float f)
780{
781    union { float f; CARD32 c; } fc;
782
783    fc.f = f;
784    return fc.c;
785}
786
787#define VTX_OUT(_v, n)                    \
788do {                                      \
789    float w = 1.0;                        \
790    CARD32 z = 0xffff << 15;              \
791    CARD32 x_y = ((CARD16)_v.x << 16) |   \
792                 ((CARD16)_v.y & 0xffff); \
793                                          \
794    ATIMach64WaitForFIFO(pATI, 6);        \
795    outf(VERTEX_##n##_S, FVAL(_v.s0));    \
796    outf(VERTEX_##n##_T, FVAL(_v.t0));    \
797    outf(VERTEX_##n##_W, FVAL(w));        \
798                                          \
799    outf(VERTEX_##n##_Z, z);              \
800    outf(VERTEX_##n##_ARGB, _v.argb);     \
801    outf(VERTEX_##n##_X_Y, x_y);          \
802} while (0)
803
804/*
805 * Composite acceleration hook.
806 */
807void
808Mach64Composite
809(
810    PixmapPtr pDst,
811    int       srcX,
812    int       srcY,
813    int       maskX,
814    int       maskY,
815    int       dstX,
816    int       dstY,
817    int       w,
818    int       h
819)
820{
821    ScrnInfoPtr pScreenInfo = xf86Screens[pDst->drawable.pScreen->myNum];
822    ATIPtr pATI = ATIPTR(pScreenInfo);
823    Mach64ContextRegs3D *m3d = &pATI->m3d;
824
825    Mach64Vertex v0, v1, v2, v3;
826    float ooa;
827    CARD32 col;
828    PictVector v;
829    int srcXend, srcYend;
830    float dxy = 0.0, dwh = 0.0;
831
832    ATIDRISync(pScreenInfo);
833
834    /* Disable clipping if it gets in the way */
835    ATIMach64ValidateClip(pATI, dstX, dstX + w - 1, dstY, dstY + h - 1);
836
837    /* Handle solid textures which come in as fragment color */
838    col = m3d->frag_color;
839    if (m3d->frag_src) {
840        srcX = maskX;
841        srcY = maskY;
842    }
843
844    /* Handle transform */
845    srcXend = srcX + w;
846    srcYend = srcY + h;
847    if (m3d->transform) {
848        v.vector[0] = IntToxFixed(srcX);
849        v.vector[1] = IntToxFixed(srcY);
850        v.vector[2] = xFixed1;
851        PictureTransformPoint(m3d->transform, &v);
852        srcX = xFixedToInt(v.vector[0]);
853        srcY = xFixedToInt(v.vector[1]);
854
855        v.vector[0] = IntToxFixed(srcXend);
856        v.vector[1] = IntToxFixed(srcYend);
857        v.vector[2] = xFixed1;
858        PictureTransformPoint(m3d->transform, &v);
859        srcXend = xFixedToInt(v.vector[0]);
860        srcYend = xFixedToInt(v.vector[1]);
861
862#if 0
863        /* Bilinear needs manipulation of texture coordinates */
864        if (m3d->scale_3d_cntl & MACH64_BILINEAR_TEX_EN) {
865            dxy =  0.5;
866            dwh = -1.0;
867        }
868#endif
869    }
870
871    /* Create vertices in clock-wise order */
872    VTX_SET(v0, col, dstX,     dstY,     srcX, dxy,    srcY, dxy);
873    VTX_SET(v1, col, dstX + w, dstY,     srcXend, dwh, srcY, dxy);
874    VTX_SET(v2, col, dstX + w, dstY + h, srcXend, dwh, srcYend, dwh);
875    VTX_SET(v3, col, dstX,     dstY + h, srcX, dxy,    srcYend, dwh);
876
877    /* Setup upper triangle (v0, v1, v3) */
878    VTX_OUT(v0, 1);
879    VTX_OUT(v1, 2);
880    VTX_OUT(v3, 3);
881
882    ooa = 1.0 / (w * h);
883    outf(ONE_OVER_AREA, FVAL(ooa));
884
885    /* Setup lower triangle (v2, v1, v3) */
886    VTX_OUT(v2, 1);
887
888    ooa = -ooa;
889    outf(ONE_OVER_AREA, FVAL(ooa));
890}
891
892/*
893 * DoneComposite acceleration hook.
894 */
895void
896Mach64DoneComposite(PixmapPtr pDst)
897{
898    ScrnInfoPtr pScreenInfo = xf86Screens[pDst->drawable.pScreen->myNum];
899    ATIPtr pATI = ATIPTR(pScreenInfo);
900
901    ATIDRISync(pScreenInfo);
902
903    outf(SCALE_3D_CNTL, 0);
904}
905