atimach64exa.c revision 32b578d3
132b578d3Smrg/*
232b578d3Smrg * Copyright 2003 through 2004 by Marc Aurele La France (TSI @ UQV), tsi@xfree86.org
332b578d3Smrg *
432b578d3Smrg * Permission to use, copy, modify, distribute, and sell this software and its
532b578d3Smrg * documentation for any purpose is hereby granted without fee, provided that
632b578d3Smrg * the above copyright notice appear in all copies and that both that copyright
732b578d3Smrg * notice and this permission notice appear in supporting documentation, and
832b578d3Smrg * that the name of Marc Aurele La France not be used in advertising or
932b578d3Smrg * publicity pertaining to distribution of the software without specific,
1032b578d3Smrg * written prior permission.  Marc Aurele La France makes no representations
1132b578d3Smrg * about the suitability of this software for any purpose.  It is provided
1232b578d3Smrg * "as-is" without express or implied warranty.
1332b578d3Smrg *
1432b578d3Smrg * MARC AURELE LA FRANCE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1532b578d3Smrg * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.  IN NO
1632b578d3Smrg * EVENT SHALL MARC AURELE LA FRANCE BE LIABLE FOR ANY SPECIAL, INDIRECT OR
1732b578d3Smrg * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
1832b578d3Smrg * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
1932b578d3Smrg * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
2032b578d3Smrg * PERFORMANCE OF THIS SOFTWARE.
2132b578d3Smrg */
2232b578d3Smrg/*
2332b578d3Smrg * Copyright 1999-2000 Precision Insight, Inc., Cedar Park, Texas.
2432b578d3Smrg * All Rights Reserved.
2532b578d3Smrg *
2632b578d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a copy
2732b578d3Smrg * of this software and associated documentation files (the "Software"), to
2832b578d3Smrg * deal in the Software without restriction, including without limitation the
2932b578d3Smrg * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
3032b578d3Smrg * sell copies of the Software, and to permit persons to whom the Software is
3132b578d3Smrg * furnished to do so, subject to the following conditions:
3232b578d3Smrg *
3332b578d3Smrg * The above copyright notice and this permission notice (including the next
3432b578d3Smrg * paragraph) shall be included in all copies or substantial portions of the
3532b578d3Smrg * Software.
3632b578d3Smrg *
3732b578d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3832b578d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3932b578d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
4032b578d3Smrg * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
4132b578d3Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
4232b578d3Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
4332b578d3Smrg * DEALINGS IN THE SOFTWARE.
4432b578d3Smrg */
4532b578d3Smrg/*
4632b578d3Smrg * DRI support by:
4732b578d3Smrg *    Manuel Teira
4832b578d3Smrg *    Leif Delgass <ldelgass@retinalburn.net>
4932b578d3Smrg *
5032b578d3Smrg * EXA support by:
5132b578d3Smrg *    Jakub Stachowski <qbast@go2.pl>
5232b578d3Smrg *    George Sapountzis <gsap7@yahoo.gr>
5332b578d3Smrg */
5432b578d3Smrg
5532b578d3Smrg#ifdef HAVE_CONFIG_H
5632b578d3Smrg#include "config.h"
5732b578d3Smrg#endif
5832b578d3Smrg
5932b578d3Smrg#include <string.h>
6032b578d3Smrg
6132b578d3Smrg#include "ati.h"
6232b578d3Smrg#include "atichip.h"
6332b578d3Smrg#include "atidri.h"
6432b578d3Smrg#include "atimach64accel.h"
6532b578d3Smrg#include "atimach64io.h"
6632b578d3Smrg#include "atipriv.h"
6732b578d3Smrg#include "atiregs.h"
6832b578d3Smrg
6932b578d3Smrg#ifdef XF86DRI_DEVEL
7032b578d3Smrg#include "mach64_dri.h"
7132b578d3Smrg#include "mach64_sarea.h"
7232b578d3Smrg#endif
7332b578d3Smrg
7432b578d3Smrg#ifdef USE_EXA
7532b578d3Smrgextern CARD8 ATIMach64ALU[];
7632b578d3Smrg
7732b578d3Smrgextern void
7832b578d3SmrgATIMach64ValidateClip
7932b578d3Smrg(
8032b578d3Smrg    ATIPtr pATI,
8132b578d3Smrg    int sc_left,
8232b578d3Smrg    int sc_right,
8332b578d3Smrg    int sc_top,
8432b578d3Smrg    int sc_bottom
8532b578d3Smrg);
8632b578d3Smrg
8732b578d3Smrg#if 0
8832b578d3Smrg#define MACH64_TRACE(x)				\
8932b578d3Smrgdo {						\
9032b578d3Smrg    ErrorF("Mach64(%s): ", __FUNCTION__);	\
9132b578d3Smrg    ErrorF x;					\
9232b578d3Smrg} while(0)
9332b578d3Smrg#else
9432b578d3Smrg#define MACH64_TRACE(x) do { } while(0)
9532b578d3Smrg#endif
9632b578d3Smrg
9732b578d3Smrg#if 0
9832b578d3Smrg#define MACH64_FALLBACK(x)			\
9932b578d3Smrgdo {						\
10032b578d3Smrg    ErrorF("Fallback(%s): ", __FUNCTION__);	\
10132b578d3Smrg    ErrorF x;					\
10232b578d3Smrg    return FALSE;				\
10332b578d3Smrg} while (0)
10432b578d3Smrg#else
10532b578d3Smrg#define MACH64_FALLBACK(x) return FALSE
10632b578d3Smrg#endif
10732b578d3Smrg
10832b578d3Smrgstatic void
10932b578d3SmrgMach64WaitMarker(ScreenPtr pScreenInfo, int Marker)
11032b578d3Smrg{
11132b578d3Smrg    ATIMach64Sync(xf86Screens[pScreenInfo->myNum]);
11232b578d3Smrg}
11332b578d3Smrg
11432b578d3Smrgstatic Bool
11532b578d3SmrgMach64GetDatatypeBpp(PixmapPtr pPix, CARD32 *pix_width)
11632b578d3Smrg{
11732b578d3Smrg	int bpp = pPix->drawable.bitsPerPixel;
11832b578d3Smrg
11932b578d3Smrg	switch (bpp) {
12032b578d3Smrg	case 8:
12132b578d3Smrg		*pix_width =
12232b578d3Smrg			SetBits(PIX_WIDTH_8BPP, DP_DST_PIX_WIDTH) |
12332b578d3Smrg			SetBits(PIX_WIDTH_8BPP, DP_SRC_PIX_WIDTH) |
12432b578d3Smrg			SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH);
12532b578d3Smrg		break;
12632b578d3Smrg	case 16:
12732b578d3Smrg		*pix_width =
12832b578d3Smrg			SetBits(PIX_WIDTH_16BPP, DP_DST_PIX_WIDTH) |
12932b578d3Smrg			SetBits(PIX_WIDTH_16BPP, DP_SRC_PIX_WIDTH) |
13032b578d3Smrg			SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH);
13132b578d3Smrg		break;
13232b578d3Smrg	case 24:
13332b578d3Smrg		*pix_width =
13432b578d3Smrg			SetBits(PIX_WIDTH_8BPP, DP_DST_PIX_WIDTH) |
13532b578d3Smrg			SetBits(PIX_WIDTH_8BPP, DP_SRC_PIX_WIDTH) |
13632b578d3Smrg			SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH);
13732b578d3Smrg		break;
13832b578d3Smrg	case 32:
13932b578d3Smrg		*pix_width =
14032b578d3Smrg			SetBits(PIX_WIDTH_32BPP, DP_DST_PIX_WIDTH) |
14132b578d3Smrg			SetBits(PIX_WIDTH_32BPP, DP_SRC_PIX_WIDTH) |
14232b578d3Smrg			SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH);
14332b578d3Smrg		break;
14432b578d3Smrg	default:
14532b578d3Smrg		MACH64_FALLBACK(("Unsupported bpp: %d\n", bpp));
14632b578d3Smrg	}
14732b578d3Smrg
14832b578d3Smrg#if X_BYTE_ORDER == X_LITTLE_ENDIAN
14932b578d3Smrg
15032b578d3Smrg        *pix_width |= DP_BYTE_PIX_ORDER;
15132b578d3Smrg
15232b578d3Smrg#endif /* X_BYTE_ORDER */
15332b578d3Smrg
15432b578d3Smrg	return TRUE;
15532b578d3Smrg}
15632b578d3Smrg
15732b578d3Smrgstatic Bool
15832b578d3SmrgMach64GetOffsetPitch(PixmapPtr pPix, int bpp, CARD32 *pitch_offset,
15932b578d3Smrg		     unsigned int offset, unsigned int pitch)
16032b578d3Smrg{
16132b578d3Smrg#if 0
16232b578d3Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pPix->drawable.pScreen->myNum];
16332b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
16432b578d3Smrg
16532b578d3Smrg    if (pitch % pATI->pExa->pixmapPitchAlign != 0)
16632b578d3Smrg        MACH64_FALLBACK(("Bad pitch 0x%08x\n", pitch));
16732b578d3Smrg
16832b578d3Smrg    if (offset % pATI->pExa->pixmapOffsetAlign != 0)
16932b578d3Smrg        MACH64_FALLBACK(("Bad offset 0x%08x\n", offset));
17032b578d3Smrg#endif
17132b578d3Smrg
17232b578d3Smrg    /* pixels / 8 = ((bytes * 8) / bpp) / 8 = bytes / bpp */
17332b578d3Smrg    pitch = pitch / bpp;
17432b578d3Smrg
17532b578d3Smrg    /* bytes / 8 */
17632b578d3Smrg    offset = offset >> 3;
17732b578d3Smrg
17832b578d3Smrg    *pitch_offset = ((pitch  << 22) | (offset <<  0));
17932b578d3Smrg
18032b578d3Smrg    return TRUE;
18132b578d3Smrg}
18232b578d3Smrg
18332b578d3Smrgstatic Bool
18432b578d3SmrgMach64GetPixmapOffsetPitch(PixmapPtr pPix, CARD32 *pitch_offset)
18532b578d3Smrg{
18632b578d3Smrg    CARD32 pitch, offset;
18732b578d3Smrg    int bpp;
18832b578d3Smrg
18932b578d3Smrg    bpp = pPix->drawable.bitsPerPixel;
19032b578d3Smrg    if (bpp == 24)
19132b578d3Smrg        bpp = 8;
19232b578d3Smrg
19332b578d3Smrg    pitch = exaGetPixmapPitch(pPix);
19432b578d3Smrg    offset = exaGetPixmapOffset(pPix);
19532b578d3Smrg
19632b578d3Smrg    return Mach64GetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch);
19732b578d3Smrg}
19832b578d3Smrg
19932b578d3Smrgstatic Bool
20032b578d3SmrgMach64PrepareCopy
20132b578d3Smrg(
20232b578d3Smrg    PixmapPtr pSrcPixmap,
20332b578d3Smrg    PixmapPtr pDstPixmap,
20432b578d3Smrg    int       xdir,
20532b578d3Smrg    int       ydir,
20632b578d3Smrg    int       alu,
20732b578d3Smrg    Pixel     planemask
20832b578d3Smrg)
20932b578d3Smrg{
21032b578d3Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
21132b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
21232b578d3Smrg    CARD32 src_pitch_offset, dst_pitch_offset, dp_pix_width;
21332b578d3Smrg
21432b578d3Smrg    ATIDRISync(pScreenInfo);
21532b578d3Smrg
21632b578d3Smrg    if (!Mach64GetDatatypeBpp(pDstPixmap, &dp_pix_width))
21732b578d3Smrg        return FALSE;
21832b578d3Smrg    if (!Mach64GetPixmapOffsetPitch(pSrcPixmap, &src_pitch_offset))
21932b578d3Smrg        return FALSE;
22032b578d3Smrg    if (!Mach64GetPixmapOffsetPitch(pDstPixmap, &dst_pitch_offset))
22132b578d3Smrg        return FALSE;
22232b578d3Smrg
22332b578d3Smrg    ATIMach64WaitForFIFO(pATI, 7);
22432b578d3Smrg    outf(DP_WRITE_MASK, planemask);
22532b578d3Smrg    outf(DP_PIX_WIDTH, dp_pix_width);
22632b578d3Smrg    outf(SRC_OFF_PITCH, src_pitch_offset);
22732b578d3Smrg    outf(DST_OFF_PITCH, dst_pitch_offset);
22832b578d3Smrg
22932b578d3Smrg    outf(DP_SRC, DP_MONO_SRC_ALLONES |
23032b578d3Smrg        SetBits(SRC_BLIT, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC));
23132b578d3Smrg    outf(DP_MIX, SetBits(ATIMach64ALU[alu], DP_FRGD_MIX));
23232b578d3Smrg
23332b578d3Smrg    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
23432b578d3Smrg
23532b578d3Smrg    pATI->dst_cntl = 0;
23632b578d3Smrg
23732b578d3Smrg    if (ydir > 0)
23832b578d3Smrg        pATI->dst_cntl |= DST_Y_DIR;
23932b578d3Smrg    if (xdir > 0)
24032b578d3Smrg        pATI->dst_cntl |= DST_X_DIR;
24132b578d3Smrg
24232b578d3Smrg    if (pATI->XModifier == 1)
24332b578d3Smrg        outf(DST_CNTL, pATI->dst_cntl);
24432b578d3Smrg    else
24532b578d3Smrg        pATI->dst_cntl |= DST_24_ROT_EN;
24632b578d3Smrg
24732b578d3Smrg    return TRUE;
24832b578d3Smrg}
24932b578d3Smrg
25032b578d3Smrgstatic void
25132b578d3SmrgMach64Copy
25232b578d3Smrg(
25332b578d3Smrg    PixmapPtr pDstPixmap,
25432b578d3Smrg    int       srcX,
25532b578d3Smrg    int       srcY,
25632b578d3Smrg    int       dstX,
25732b578d3Smrg    int       dstY,
25832b578d3Smrg    int       w,
25932b578d3Smrg    int       h
26032b578d3Smrg)
26132b578d3Smrg{
26232b578d3Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
26332b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
26432b578d3Smrg
26532b578d3Smrg    srcX *= pATI->XModifier;
26632b578d3Smrg    dstY *= pATI->XModifier;
26732b578d3Smrg    w    *= pATI->XModifier;
26832b578d3Smrg
26932b578d3Smrg    ATIDRISync(pScreenInfo);
27032b578d3Smrg
27132b578d3Smrg    /* Disable clipping if it gets in the way */
27232b578d3Smrg    ATIMach64ValidateClip(pATI, dstX, dstX + w - 1, dstY, dstY + h - 1);
27332b578d3Smrg
27432b578d3Smrg    if (!(pATI->dst_cntl & DST_X_DIR))
27532b578d3Smrg    {
27632b578d3Smrg        srcX += w - 1;
27732b578d3Smrg        dstX += w - 1;
27832b578d3Smrg    }
27932b578d3Smrg
28032b578d3Smrg    if (!(pATI->dst_cntl & DST_Y_DIR))
28132b578d3Smrg    {
28232b578d3Smrg        srcY += h - 1;
28332b578d3Smrg        dstY += h - 1;
28432b578d3Smrg    }
28532b578d3Smrg
28632b578d3Smrg    if (pATI->XModifier != 1)
28732b578d3Smrg        outf(DST_CNTL, pATI->dst_cntl | SetBits((dstX / 4) % 6, DST_24_ROT));
28832b578d3Smrg
28932b578d3Smrg    ATIMach64WaitForFIFO(pATI, 4);
29032b578d3Smrg    outf(SRC_Y_X, SetWord(srcX, 1) | SetWord(srcY, 0));
29132b578d3Smrg    outf(SRC_WIDTH1, w);
29232b578d3Smrg    outf(DST_Y_X, SetWord(dstX, 1) | SetWord(dstY, 0));
29332b578d3Smrg    outf(DST_HEIGHT_WIDTH, SetWord(w, 1) | SetWord(h, 0));
29432b578d3Smrg
29532b578d3Smrg    /*
29632b578d3Smrg     * On VTB's and later, the engine will randomly not wait for a copy
29732b578d3Smrg     * operation to commit its results to video memory before starting the next
29832b578d3Smrg     * one.  The probability of such occurrences increases with GUI_WB_FLUSH
29932b578d3Smrg     * (or GUI_WB_FLUSH_P) setting, bitsPerPixel and/or CRTC clock.  This
30032b578d3Smrg     * would point to some kind of video memory bandwidth problem were it noti
30132b578d3Smrg     * for the fact that the problem occurs less often (but still occurs) when
30232b578d3Smrg     * copying larger rectangles.
30332b578d3Smrg     */
30432b578d3Smrg    if ((pATI->Chip >= ATI_CHIP_264VTB) && !pATI->OptionDevel)
30532b578d3Smrg    {
30632b578d3Smrg        exaMarkSync(pScreenInfo->pScreen); /* Force sync. */
30732b578d3Smrg        exaWaitSync(pScreenInfo->pScreen); /* Sync and notify EXA. */
30832b578d3Smrg    }
30932b578d3Smrg}
31032b578d3Smrg
31132b578d3Smrgstatic void Mach64DoneCopy(PixmapPtr pDstPixmap) { }
31232b578d3Smrg
31332b578d3Smrgstatic Bool
31432b578d3SmrgMach64PrepareSolid
31532b578d3Smrg(
31632b578d3Smrg    PixmapPtr pPixmap,
31732b578d3Smrg    int       alu,
31832b578d3Smrg    Pixel     planemask,
31932b578d3Smrg    Pixel     fg
32032b578d3Smrg)
32132b578d3Smrg{
32232b578d3Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
32332b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
32432b578d3Smrg    CARD32 dst_pitch_offset, dp_pix_width;
32532b578d3Smrg
32632b578d3Smrg    ATIDRISync(pScreenInfo);
32732b578d3Smrg
32832b578d3Smrg    if (!Mach64GetDatatypeBpp(pPixmap, &dp_pix_width))
32932b578d3Smrg        return FALSE;
33032b578d3Smrg    if (!Mach64GetPixmapOffsetPitch(pPixmap, &dst_pitch_offset))
33132b578d3Smrg        return FALSE;
33232b578d3Smrg
33332b578d3Smrg    ATIMach64WaitForFIFO(pATI, 7);
33432b578d3Smrg    outf(DP_WRITE_MASK, planemask);
33532b578d3Smrg    outf(DP_PIX_WIDTH, dp_pix_width);
33632b578d3Smrg    outf(DST_OFF_PITCH, dst_pitch_offset);
33732b578d3Smrg
33832b578d3Smrg    outf(DP_SRC, DP_MONO_SRC_ALLONES |
33932b578d3Smrg        SetBits(SRC_FRGD, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC));
34032b578d3Smrg    outf(DP_FRGD_CLR, fg);
34132b578d3Smrg    outf(DP_MIX, SetBits(ATIMach64ALU[alu], DP_FRGD_MIX));
34232b578d3Smrg
34332b578d3Smrg    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
34432b578d3Smrg
34532b578d3Smrg    if (pATI->XModifier == 1)
34632b578d3Smrg        outf(DST_CNTL, DST_X_DIR | DST_Y_DIR);
34732b578d3Smrg
34832b578d3Smrg    return TRUE;
34932b578d3Smrg}
35032b578d3Smrg
35132b578d3Smrgstatic void
35232b578d3SmrgMach64Solid
35332b578d3Smrg(
35432b578d3Smrg    PixmapPtr pPixmap,
35532b578d3Smrg    int       x1,
35632b578d3Smrg    int       y1,
35732b578d3Smrg    int       x2,
35832b578d3Smrg    int       y2
35932b578d3Smrg)
36032b578d3Smrg{
36132b578d3Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
36232b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
36332b578d3Smrg
36432b578d3Smrg    int x = x1;
36532b578d3Smrg    int y = y1;
36632b578d3Smrg    int w = x2-x1;
36732b578d3Smrg    int h = y2-y1;
36832b578d3Smrg
36932b578d3Smrg    ATIDRISync(pScreenInfo);
37032b578d3Smrg
37132b578d3Smrg    if (pATI->XModifier != 1)
37232b578d3Smrg    {
37332b578d3Smrg        x *= pATI->XModifier;
37432b578d3Smrg        w *= pATI->XModifier;
37532b578d3Smrg
37632b578d3Smrg        outf(DST_CNTL, SetBits((x / 4) % 6, DST_24_ROT) |
37732b578d3Smrg            (DST_X_DIR | DST_Y_DIR | DST_24_ROT_EN));
37832b578d3Smrg    }
37932b578d3Smrg
38032b578d3Smrg    /* Disable clipping if it gets in the way */
38132b578d3Smrg    ATIMach64ValidateClip(pATI, x, x + w - 1, y, y + h - 1);
38232b578d3Smrg
38332b578d3Smrg    ATIMach64WaitForFIFO(pATI, 2);
38432b578d3Smrg    outf(DST_Y_X, SetWord(x, 1) | SetWord(y, 0));
38532b578d3Smrg    outf(DST_HEIGHT_WIDTH, SetWord(w, 1) | SetWord(h, 0));
38632b578d3Smrg}
38732b578d3Smrg
38832b578d3Smrgstatic void Mach64DoneSolid(PixmapPtr pPixmap) { }
38932b578d3Smrg
39032b578d3Smrg/*
39132b578d3Smrg * Memcpy-based UTS.
39232b578d3Smrg */
39332b578d3Smrgstatic Bool
39432b578d3SmrgMach64UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
39532b578d3Smrg    char *src, int src_pitch)
39632b578d3Smrg{
39732b578d3Smrg    char  *dst        = pDst->devPrivate.ptr;
39832b578d3Smrg    int    dst_pitch  = exaGetPixmapPitch(pDst);
39932b578d3Smrg
40032b578d3Smrg    int bpp    = pDst->drawable.bitsPerPixel;
40132b578d3Smrg    int cpp    = (bpp + 7) / 8;
40232b578d3Smrg    int wBytes = w * cpp;
40332b578d3Smrg
40432b578d3Smrg    exaWaitSync(pDst->drawable.pScreen);
40532b578d3Smrg
40632b578d3Smrg    dst += (x * cpp) + (y * dst_pitch);
40732b578d3Smrg
40832b578d3Smrg    while (h--) {
40932b578d3Smrg        memcpy(dst, src, wBytes);
41032b578d3Smrg        src += src_pitch;
41132b578d3Smrg        dst += dst_pitch;
41232b578d3Smrg    }
41332b578d3Smrg
41432b578d3Smrg    return TRUE;
41532b578d3Smrg}
41632b578d3Smrg
41732b578d3Smrg/*
41832b578d3Smrg * Memcpy-based DFS.
41932b578d3Smrg */
42032b578d3Smrgstatic Bool
42132b578d3SmrgMach64DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
42232b578d3Smrg    char *dst, int dst_pitch)
42332b578d3Smrg{
42432b578d3Smrg    char  *src        = pSrc->devPrivate.ptr;
42532b578d3Smrg    int    src_pitch  = exaGetPixmapPitch(pSrc);
42632b578d3Smrg
42732b578d3Smrg    int bpp    = pSrc->drawable.bitsPerPixel;
42832b578d3Smrg    int cpp    = (bpp + 7) / 8;
42932b578d3Smrg    int wBytes = w * cpp;
43032b578d3Smrg
43132b578d3Smrg    exaWaitSync(pSrc->drawable.pScreen);
43232b578d3Smrg
43332b578d3Smrg    src += (x * cpp) + (y * src_pitch);
43432b578d3Smrg
43532b578d3Smrg    while (h--) {
43632b578d3Smrg        memcpy(dst, src, wBytes);
43732b578d3Smrg        src += src_pitch;
43832b578d3Smrg        dst += dst_pitch;
43932b578d3Smrg    }
44032b578d3Smrg
44132b578d3Smrg    return TRUE;
44232b578d3Smrg}
44332b578d3Smrg
44432b578d3Smrg#include "atimach64render.c"
44532b578d3Smrg
44632b578d3Smrg/* Compute log base 2 of val. */
44732b578d3Smrgstatic __inline__ int Mach64Log2(int val)
44832b578d3Smrg{
44932b578d3Smrg    int bits;
45032b578d3Smrg
45132b578d3Smrg    for (bits = 0; val != 0; val >>= 1, ++bits)
45232b578d3Smrg        ;
45332b578d3Smrg    return bits - 1;
45432b578d3Smrg}
45532b578d3Smrg
45632b578d3Smrg/*
45732b578d3Smrg * Memory layour for EXA with DRI (no local_textures):
45832b578d3Smrg * | front  | back   | depth  | textures | pixmaps, xv | c |
45932b578d3Smrg *
46032b578d3Smrg * 1024x768@16bpp with 8 MB:
46132b578d3Smrg * | 1.5 MB | 1.5 MB | 1.5 MB | 0        | ~3.5 MB     | c |
46232b578d3Smrg *
46332b578d3Smrg * 1024x768@32bpp with 8 MB:
46432b578d3Smrg * | 3.0 MB | 3.0 MB | 1.5 MB | 0        | ~0.5 MB     | c |
46532b578d3Smrg *
46632b578d3Smrg * "c" is the hw cursor which occupies 1KB
46732b578d3Smrg */
46832b578d3Smrgstatic void
46932b578d3SmrgMach64SetupMemEXA(ScreenPtr pScreen)
47032b578d3Smrg{
47132b578d3Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum];
47232b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
47332b578d3Smrg
47432b578d3Smrg    int cpp = (pScreenInfo->bitsPerPixel + 7) / 8;
47532b578d3Smrg    /* front and back buffer */
47632b578d3Smrg    int bufferSize = pScreenInfo->virtualY * pScreenInfo->displayWidth * cpp;
47732b578d3Smrg    /* always 16-bit z-buffer */
47832b578d3Smrg    int depthSize  = pScreenInfo->virtualY * pScreenInfo->displayWidth * 2;
47932b578d3Smrg
48032b578d3Smrg    ExaDriverPtr pExa = pATI->pExa;
48132b578d3Smrg
48232b578d3Smrg    pExa->memoryBase = pATI->pMemory;
48332b578d3Smrg    pExa->memorySize = pScreenInfo->videoRam * 1024;
48432b578d3Smrg    pExa->offScreenBase = bufferSize;
48532b578d3Smrg
48632b578d3Smrg#ifdef XF86DRI_DEVEL
48732b578d3Smrg    if (pATI->directRenderingEnabled)
48832b578d3Smrg    {
48932b578d3Smrg	ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo;
49032b578d3Smrg	Bool is_pci = pATIDRIServer->IsPCI;
49132b578d3Smrg
49232b578d3Smrg	int textureSize = 0;
49332b578d3Smrg	int pixmapCache = 0;
49432b578d3Smrg	int next = 0;
49532b578d3Smrg
49632b578d3Smrg	/* front buffer */
49732b578d3Smrg	pATIDRIServer->frontOffset = 0;
49832b578d3Smrg	pATIDRIServer->frontPitch = pScreenInfo->displayWidth;
49932b578d3Smrg	next += bufferSize;
50032b578d3Smrg
50132b578d3Smrg	/* back buffer */
50232b578d3Smrg	pATIDRIServer->backOffset = next;
50332b578d3Smrg	pATIDRIServer->backPitch = pScreenInfo->displayWidth;
50432b578d3Smrg	next += bufferSize;
50532b578d3Smrg
50632b578d3Smrg	/* depth buffer */
50732b578d3Smrg	pATIDRIServer->depthOffset = next;
50832b578d3Smrg	pATIDRIServer->depthPitch = pScreenInfo->displayWidth;
50932b578d3Smrg	next += depthSize;
51032b578d3Smrg
51132b578d3Smrg	/* ATIScreenInit does check for the this condition. */
51232b578d3Smrg	if (next > pExa->memorySize)
51332b578d3Smrg	{
51432b578d3Smrg	    xf86DrvMsg(pScreen->myNum, X_WARNING,
51532b578d3Smrg		"DRI static buffer allocation failed, disabling DRI --"
51632b578d3Smrg		"need at least %d kB video memory\n", next / 1024 );
51732b578d3Smrg	    ATIDRICloseScreen(pScreen);
51832b578d3Smrg	    pATI->directRenderingEnabled = FALSE;
51932b578d3Smrg	}
52032b578d3Smrg
52132b578d3Smrg	/* local textures */
52232b578d3Smrg
52332b578d3Smrg	/* Reserve approx. half of offscreen memory for local textures */
52432b578d3Smrg	textureSize = (pExa->memorySize - next) / 2;
52532b578d3Smrg
52632b578d3Smrg	/* In case DRI requires more offscreen memory than available,
52732b578d3Smrg	 * should not happen as ATIScreenInit would have not enabled DRI */
52832b578d3Smrg	if (textureSize < 0)
52932b578d3Smrg	    textureSize = 0;
53032b578d3Smrg
53132b578d3Smrg	/* Try for enough pixmap cache for a full viewport */
53232b578d3Smrg	pixmapCache = (pExa->memorySize - next) - textureSize;
53332b578d3Smrg	if (pixmapCache < bufferSize)
53432b578d3Smrg	    textureSize = 0;
53532b578d3Smrg
53632b578d3Smrg	/* Don't allocate a local texture heap for AGP unless requested */
53732b578d3Smrg	if ( !is_pci && !pATI->OptionLocalTextures )
53832b578d3Smrg	    textureSize = 0;
53932b578d3Smrg
54032b578d3Smrg	if (textureSize > 0)
54132b578d3Smrg	{
54232b578d3Smrg	    int l = Mach64Log2(textureSize / MACH64_NR_TEX_REGIONS);
54332b578d3Smrg	    if (l < MACH64_LOG_TEX_GRANULARITY)
54432b578d3Smrg		l = MACH64_LOG_TEX_GRANULARITY;
54532b578d3Smrg	    pATIDRIServer->logTextureGranularity = l;
54632b578d3Smrg
54732b578d3Smrg	    /* Round the texture size down to the nearest whole number of
54832b578d3Smrg	     * texture regions.
54932b578d3Smrg	     */
55032b578d3Smrg	    textureSize = (textureSize >> l) << l;
55132b578d3Smrg	}
55232b578d3Smrg
55332b578d3Smrg	/* Set a minimum usable local texture heap size.  This will fit
55432b578d3Smrg	 * two 256x256 textures.  We check this after any rounding of
55532b578d3Smrg	 * the texture area.
55632b578d3Smrg	 */
55732b578d3Smrg	if (textureSize < 256*256 * cpp * 2)
55832b578d3Smrg	    textureSize = 0;
55932b578d3Smrg
56032b578d3Smrg	/* Disable DRI for PCI if cannot allocate a local texture heap */
56132b578d3Smrg	if ( is_pci && textureSize == 0 )
56232b578d3Smrg	{
56332b578d3Smrg	    xf86DrvMsg(pScreen->myNum, X_WARNING,
56432b578d3Smrg		"Not enough memory for local textures, disabling DRI\n");
56532b578d3Smrg	    ATIDRICloseScreen(pScreen);
56632b578d3Smrg	    pATI->directRenderingEnabled = FALSE;
56732b578d3Smrg	}
56832b578d3Smrg
56932b578d3Smrg	pATIDRIServer->textureOffset = next;
57032b578d3Smrg	pATIDRIServer->textureSize = textureSize;
57132b578d3Smrg	next += textureSize;
57232b578d3Smrg
57332b578d3Smrg	/* pExa->offScreenBase is moved to `next' when DRI gets activated */
57432b578d3Smrg    }
57532b578d3Smrg#endif /* XF86DRI_DEVEL */
57632b578d3Smrg
57732b578d3Smrg    xf86DrvMsg(pScreen->myNum, X_INFO,
57832b578d3Smrg        "EXA memory management initialized\n"
57932b578d3Smrg        "\t base     :  %10p\n"
58032b578d3Smrg        "\t offscreen: +%10lx\n"
58132b578d3Smrg        "\t size     : +%10lx\n"
58232b578d3Smrg        "\t cursor   :  %10p\n",
58332b578d3Smrg        pExa->memoryBase,
58432b578d3Smrg        pExa->offScreenBase,
58532b578d3Smrg        pExa->memorySize,
58632b578d3Smrg        pATI->pCursorImage);
58732b578d3Smrg
58832b578d3Smrg    if (TRUE || xf86GetVerbosity() > 1)
58932b578d3Smrg    {
59032b578d3Smrg        int offscreen = pExa->memorySize - pExa->offScreenBase;
59132b578d3Smrg        int viewport = bufferSize;
59232b578d3Smrg        int dvdframe = 720*480*cpp; /* enough for single-buffered DVD */
59332b578d3Smrg
59432b578d3Smrg        xf86DrvMsg(pScreen->myNum, X_INFO,
59532b578d3Smrg            "Will use %d kB of offscreen memory for EXA\n"
59632b578d3Smrg            "\t\t or %5.2f viewports (composite)\n"
59732b578d3Smrg            "\t\t or %5.2f dvdframes (xvideo)\n",
59832b578d3Smrg            offscreen / 1024,
59932b578d3Smrg            1.0 * offscreen / viewport,
60032b578d3Smrg            1.0 * offscreen / dvdframe);
60132b578d3Smrg    }
60232b578d3Smrg
60332b578d3Smrg#ifdef XF86DRI_DEVEL
60432b578d3Smrg    if (pATI->directRenderingEnabled)
60532b578d3Smrg    {
60632b578d3Smrg        ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo;
60732b578d3Smrg
60832b578d3Smrg        xf86DrvMsg(pScreen->myNum, X_INFO,
60932b578d3Smrg                   "Will use back  buffer at offset 0x%x\n",
61032b578d3Smrg                   pATIDRIServer->backOffset);
61132b578d3Smrg
61232b578d3Smrg        xf86DrvMsg(pScreen->myNum, X_INFO,
61332b578d3Smrg                   "Will use depth buffer at offset 0x%x\n",
61432b578d3Smrg                   pATIDRIServer->depthOffset);
61532b578d3Smrg
61632b578d3Smrg        if (pATIDRIServer->textureSize > 0)
61732b578d3Smrg        {
61832b578d3Smrg            xf86DrvMsg(pScreen->myNum, X_INFO,
61932b578d3Smrg                   "Will use %d kB for local textures at offset 0x%x\n",
62032b578d3Smrg                   pATIDRIServer->textureSize/1024,
62132b578d3Smrg                   pATIDRIServer->textureOffset);
62232b578d3Smrg        }
62332b578d3Smrg    }
62432b578d3Smrg#endif /* XF86DRI_DEVEL */
62532b578d3Smrg
62632b578d3Smrg    pExa->pixmapOffsetAlign = 64;
62732b578d3Smrg    pExa->pixmapPitchAlign = 64;
62832b578d3Smrg
62932b578d3Smrg    pExa->flags = EXA_OFFSCREEN_PIXMAPS;
63032b578d3Smrg
63132b578d3Smrg    pExa->maxX = ATIMach64MaxX;
63232b578d3Smrg    pExa->maxY = ATIMach64MaxY;
63332b578d3Smrg}
63432b578d3Smrg
63532b578d3SmrgBool ATIMach64ExaInit(ScreenPtr pScreen)
63632b578d3Smrg{
63732b578d3Smrg    ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum];
63832b578d3Smrg    ATIPtr pATI = ATIPTR(pScreenInfo);
63932b578d3Smrg    ExaDriverPtr pExa;
64032b578d3Smrg
64132b578d3Smrg    pExa = exaDriverAlloc();
64232b578d3Smrg    if (!pExa)
64332b578d3Smrg        return FALSE;
64432b578d3Smrg
64532b578d3Smrg    pATI->pExa = pExa;
64632b578d3Smrg
64732b578d3Smrg    pExa->exa_major = 2;
64832b578d3Smrg    pExa->exa_minor = 0;
64932b578d3Smrg
65032b578d3Smrg    Mach64SetupMemEXA(pScreen);
65132b578d3Smrg
65232b578d3Smrg    pExa->WaitMarker = Mach64WaitMarker;
65332b578d3Smrg
65432b578d3Smrg    pExa->PrepareSolid = Mach64PrepareSolid;
65532b578d3Smrg    pExa->Solid = Mach64Solid;
65632b578d3Smrg    pExa->DoneSolid = Mach64DoneSolid;
65732b578d3Smrg
65832b578d3Smrg    pExa->PrepareCopy = Mach64PrepareCopy;
65932b578d3Smrg    pExa->Copy = Mach64Copy;
66032b578d3Smrg    pExa->DoneCopy = Mach64DoneCopy;
66132b578d3Smrg
66232b578d3Smrg    /* EXA hits more optimized paths when it does not have to fallback because
66332b578d3Smrg     * of missing UTS/DFS, hook memcpy-based UTS/DFS.
66432b578d3Smrg     */
66532b578d3Smrg    pExa->UploadToScreen = Mach64UploadToScreen;
66632b578d3Smrg    pExa->DownloadFromScreen = Mach64DownloadFromScreen;
66732b578d3Smrg
66832b578d3Smrg    if (pATI->RenderAccelEnabled) {
66932b578d3Smrg	if (pATI->Chip >= ATI_CHIP_264GTPRO) {
67032b578d3Smrg	    /* 3D Rage Pro does not support NPOT textures. */
67132b578d3Smrg	    pExa->flags |= EXA_OFFSCREEN_ALIGN_POT;
67232b578d3Smrg
67332b578d3Smrg	    pExa->CheckComposite = Mach64CheckComposite;
67432b578d3Smrg	    pExa->PrepareComposite = Mach64PrepareComposite;
67532b578d3Smrg	    pExa->Composite = Mach64Composite;
67632b578d3Smrg	    pExa->DoneComposite = Mach64DoneComposite;
67732b578d3Smrg	} else {
67832b578d3Smrg	    xf86DrvMsg(pScreen->myNum, X_INFO,
67932b578d3Smrg		       "Render acceleration is not supported for ATI chips "
68032b578d3Smrg		       "earlier than the ATI 3D Rage Pro.\n");
68132b578d3Smrg	    pATI->RenderAccelEnabled = FALSE;
68232b578d3Smrg	}
68332b578d3Smrg    }
68432b578d3Smrg
68532b578d3Smrg    xf86DrvMsg(pScreen->myNum, X_INFO, "Render acceleration %s\n",
68632b578d3Smrg	       pATI->RenderAccelEnabled ? "enabled" : "disabled");
68732b578d3Smrg
68832b578d3Smrg    if (!exaDriverInit(pScreen, pATI->pExa)) {
68932b578d3Smrg	xfree(pATI->pExa);
69032b578d3Smrg	pATI->pExa = NULL;
69132b578d3Smrg	return FALSE;
69232b578d3Smrg    }
69332b578d3Smrg
69432b578d3Smrg    return TRUE;
69532b578d3Smrg}
69632b578d3Smrg#endif
697