atimach64accel.c revision e35d4d8e
1/*
2 * Copyright 2003 through 2004 by Marc Aurele La France (TSI @ UQV), tsi@xfree86.org
3 *
4 * Permission to use, copy, modify, distribute, and sell this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * the above copyright notice appear in all copies and that both that copyright
7 * notice and this permission notice appear in supporting documentation, and
8 * that the name of Marc Aurele La France not be used in advertising or
9 * publicity pertaining to distribution of the software without specific,
10 * written prior permission.  Marc Aurele La France makes no representations
11 * about the suitability of this software for any purpose.  It is provided
12 * "as-is" without express or implied warranty.
13 *
14 * MARC AURELE LA FRANCE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.  IN NO
16 * EVENT SHALL MARC AURELE LA FRANCE BE LIABLE FOR ANY SPECIAL, INDIRECT OR
17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
18 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
19 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
20 * PERFORMANCE OF THIS SOFTWARE.
21 */
22/*
23 * Copyright 1999-2000 Precision Insight, Inc., Cedar Park, Texas.
24 * All Rights Reserved.
25 *
26 * Permission is hereby granted, free of charge, to any person obtaining a copy
27 * of this software and associated documentation files (the "Software"), to
28 * deal in the Software without restriction, including without limitation the
29 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
30 * sell copies of the Software, and to permit persons to whom the Software is
31 * furnished to do so, subject to the following conditions:
32 *
33 * The above copyright notice and this permission notice (including the next
34 * paragraph) shall be included in all copies or substantial portions of the
35 * Software.
36 *
37 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
40 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
41 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
42 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
43 * DEALINGS IN THE SOFTWARE.
44 */
45/*
46 * DRI support by:
47 *    Manuel Teira
48 *    Leif Delgass <ldelgass@retinalburn.net>
49 */
50
51#ifdef HAVE_CONFIG_H
52#include "config.h"
53#endif
54
55#include "ati.h"
56#include "atichip.h"
57#include "atimach64accel.h"
58#include "atimach64io.h"
59#include "atipriv.h"
60#include "atiregs.h"
61
62#ifdef XF86DRI_DEVEL
63#include "mach64_common.h"
64#endif
65
66#include "miline.h"
67
68/* Used to test MMIO cache integrity in ATIMach64Sync() */
69#define TestRegisterCaching(_Register)                   \
70    if (RegisterIsCached(_Register) &&                   \
71        (CacheSlot(_Register) != inm(_Register)))        \
72    {                                                    \
73        UncacheRegister(_Register);                      \
74        xf86DrvMsg(pScreenInfo->scrnIndex, X_WARNING,    \
75            #_Register " MMIO write cache disabled!\n"); \
76    }
77
78/*
79 * X-to-Mach64 mix translation table.
80 */
81CARD8 ATIMach64ALU[16] =
82{
83    MIX_0,                       /* GXclear */
84    MIX_AND,                     /* GXand */
85    MIX_SRC_AND_NOT_DST,         /* GXandReverse */
86    MIX_SRC,                     /* GXcopy */
87    MIX_NOT_SRC_AND_DST,         /* GXandInverted */
88    MIX_DST,                     /* GXnoop */
89    MIX_XOR,                     /* GXxor */
90    MIX_OR,                      /* GXor */
91    MIX_NOR,                     /* GXnor */
92    MIX_XNOR,                    /* GXequiv */
93    MIX_NOT_DST,                 /* GXinvert */
94    MIX_SRC_OR_NOT_DST,          /* GXorReverse */
95    MIX_NOT_SRC,                 /* GXcopyInverted */
96    MIX_NOT_SRC_OR_DST,          /* GXorInverted */
97    MIX_NAND,                    /* GXnand */
98    MIX_1                        /* GXset */
99};
100
101/*
102 * ATIMach64ValidateClip --
103 *
104 * This function ensures the current scissor settings do not interfere with
105 * the current draw request.
106 */
107void
108ATIMach64ValidateClip
109(
110    ATIPtr pATI,
111    int    sc_left,
112    int    sc_right,
113    int    sc_top,
114    int    sc_bottom
115)
116{
117    if ((sc_left < (int)pATI->sc_left) || (sc_right > (int)pATI->sc_right))
118    {
119        outf(SC_LEFT_RIGHT, pATI->sc_left_right);
120        pATI->sc_left = pATI->NewHW.sc_left;
121        pATI->sc_right = pATI->NewHW.sc_right;
122    }
123
124    if ((sc_top < (int)pATI->sc_top) || (sc_bottom > (int)pATI->sc_bottom))
125    {
126        outf(SC_TOP_BOTTOM, pATI->sc_top_bottom);
127        pATI->sc_top = pATI->NewHW.sc_top;
128        pATI->sc_bottom = pATI->NewHW.sc_bottom;
129    }
130}
131
132static __inline__ void TestRegisterCachingDP(ScrnInfoPtr pScreenInfo);
133static __inline__ void TestRegisterCachingXV(ScrnInfoPtr pScreenInfo);
134
135/*
136 * ATIMach64Sync --
137 *
138 * This is called to wait for the draw engine to become idle.
139 */
140void
141ATIMach64Sync
142(
143    ScrnInfoPtr pScreenInfo
144)
145{
146    ATIPtr pATI = ATIPTR(pScreenInfo);
147
148#ifdef XF86DRI_DEVEL
149
150    if ( pATI->directRenderingEnabled && pATI->NeedDRISync )
151    {
152	ATIHWPtr pATIHW = &pATI->NewHW;
153	CARD32 offset;
154
155	if (pATI->OptionMMIOCache) {
156	    /* "Invalidate" the MMIO cache so the cache slots get updated */
157	    UncacheRegister(SRC_CNTL);
158	    UncacheRegister(SCALE_3D_CNTL);
159	    UncacheRegister(HOST_CNTL);
160	    UncacheRegister(PAT_CNTL);
161	    UncacheRegister(SC_LEFT_RIGHT);
162	    UncacheRegister(SC_TOP_BOTTOM);
163	    UncacheRegister(DP_BKGD_CLR);
164	    UncacheRegister(DP_FRGD_CLR);
165	    UncacheRegister(DP_PIX_WIDTH);
166	    UncacheRegister(DP_MIX);
167	    UncacheRegister(CLR_CMP_CNTL);
168	    UncacheRegister(TEX_SIZE_PITCH);
169	}
170
171	ATIDRIWaitForIdle(pATI);
172
173	outr( BUS_CNTL, pATIHW->bus_cntl );
174
175	/* DRI uses GUI_TRAJ_CNTL, which is a composite of
176	 * src_cntl, dst_cntl, pat_cntl, and host_cntl
177	 */
178	outf( SRC_CNTL, pATIHW->src_cntl );
179	outf( DST_CNTL, pATIHW->dst_cntl );
180	outf( PAT_CNTL, pATIHW->pat_cntl );
181	outf( HOST_CNTL, pATIHW->host_cntl );
182
183	outf( DST_OFF_PITCH, pATIHW->dst_off_pitch );
184	outf( SRC_OFF_PITCH, pATIHW->src_off_pitch );
185	outf( DP_SRC, pATIHW->dp_src );
186	outf( DP_MIX, pATIHW->dp_mix );
187	outf( DP_FRGD_CLR,  pATIHW->dp_frgd_clr );
188	outf( DP_WRITE_MASK, pATIHW->dp_write_mask );
189	outf( DP_PIX_WIDTH, pATIHW->dp_pix_width );
190
191	outf( CLR_CMP_CNTL, pATIHW->clr_cmp_cntl );
192
193	offset = TEX_LEVEL(pATIHW->tex_size_pitch);
194
195	ATIMach64WaitForFIFO(pATI, 6);
196	outf( ALPHA_TST_CNTL, 0 );
197	outf( Z_CNTL, 0 );
198	outf( SCALE_3D_CNTL, pATIHW->scale_3d_cntl );
199	outf( TEX_0_OFF + offset, pATIHW->tex_offset );
200	outf( TEX_SIZE_PITCH, pATIHW->tex_size_pitch );
201	outf( TEX_CNTL, pATIHW->tex_cntl );
202
203	ATIMach64WaitForFIFO(pATI, 2);
204	outf( SC_LEFT_RIGHT,
205	      SetWord(pATIHW->sc_right, 1) | SetWord(pATIHW->sc_left, 0) );
206	outf( SC_TOP_BOTTOM,
207	      SetWord(pATIHW->sc_bottom, 1) | SetWord(pATIHW->sc_top, 0) );
208
209	if (pATI->OptionMMIOCache) {
210	    /* Now that the cache slots reflect the register state, re-enable MMIO cache */
211	    CacheRegister(SRC_CNTL);
212	    CacheRegister(SCALE_3D_CNTL);
213	    CacheRegister(HOST_CNTL);
214	    CacheRegister(PAT_CNTL);
215	    CacheRegister(SC_LEFT_RIGHT);
216	    CacheRegister(SC_TOP_BOTTOM);
217	    CacheRegister(DP_BKGD_CLR);
218	    CacheRegister(DP_FRGD_CLR);
219	    CacheRegister(DP_PIX_WIDTH);
220	    CacheRegister(DP_MIX);
221	    CacheRegister(CLR_CMP_CNTL);
222	    CacheRegister(TEX_SIZE_PITCH);
223	}
224
225	ATIMach64WaitForIdle(pATI);
226
227	if (pATI->OptionMMIOCache && pATI->OptionTestMMIOCache) {
228
229	    /* Only check registers we didn't restore */
230	    TestRegisterCaching(PAT_REG0);
231            TestRegisterCaching(PAT_REG1);
232
233            TestRegisterCaching(CLR_CMP_CLR);
234            TestRegisterCaching(CLR_CMP_MSK);
235
236	    TestRegisterCachingXV(pScreenInfo);
237         }
238	pATI->NeedDRISync = FALSE;
239
240    }
241    else
242
243#endif /* XF86DRI_DEVEL */
244    {
245      ATIMach64WaitForIdle(pATI);
246
247      if (pATI->OptionMMIOCache && pATI->OptionTestMMIOCache)
248      {
249        /*
250         * For debugging purposes, attempt to verify that each cached register
251         * should actually be cached.
252         */
253        TestRegisterCachingDP(pScreenInfo);
254
255        TestRegisterCachingXV(pScreenInfo);
256      }
257    }
258
259#ifdef USE_EXA
260    /* EXA sets pEXA->needsSync to FALSE on its own */
261#endif
262
263#ifdef USE_XAA
264    if (pATI->pXAAInfo)
265        pATI->pXAAInfo->NeedToSync = FALSE;
266#endif
267
268    if (pATI->Chip >= ATI_CHIP_264VTB)
269    {
270        /*
271         * Flush the read-back cache (by turning on INVALIDATE_RB_CACHE),
272         * otherwise the host might get stale data when reading through the
273         * aperture.
274         */
275        outr(MEM_BUF_CNTL, pATI->NewHW.mem_buf_cntl);
276    }
277
278    /*
279     * Note:
280     * Before actually invalidating the read-back cache, the mach64 driver
281     * was using the trick below which is buggy. The code is left here for
282     * reference, DRI uses this trick and needs updating.
283     *
284     * For VTB's and later, the first CPU read of the framebuffer will return
285     * zeroes, so do it here.  This appears to be due to some kind of engine
286     * caching of framebuffer data I haven't found any way of disabling, or
287     * otherwise circumventing.  Thanks to Mark Vojkovich for the suggestion.
288     *
289     * pATI = *(volatile ATIPtr *)pATI->pMemory;
290     */
291}
292
293static __inline__ void
294TestRegisterCachingDP(ScrnInfoPtr pScreenInfo)
295{
296    ATIPtr pATI = ATIPTR(pScreenInfo);
297
298    TestRegisterCaching(SRC_CNTL);
299
300    if (pATI->Chip >= ATI_CHIP_264GTPRO)
301    {
302        TestRegisterCaching(SCALE_3D_CNTL);
303    }
304
305    TestRegisterCaching(HOST_CNTL);
306
307    TestRegisterCaching(PAT_REG0);
308    TestRegisterCaching(PAT_REG1);
309    TestRegisterCaching(PAT_CNTL);
310
311    if (RegisterIsCached(SC_LEFT_RIGHT) &&      /* Special case */
312        (CacheSlot(SC_LEFT_RIGHT) !=
313         (SetWord(inm(SC_RIGHT), 1) | SetWord(inm(SC_LEFT), 0))))
314    {
315        UncacheRegister(SC_LEFT_RIGHT);
316        xf86DrvMsg(pScreenInfo->scrnIndex, X_WARNING,
317            "SC_LEFT_RIGHT write cache disabled!\n");
318    }
319
320    if (RegisterIsCached(SC_TOP_BOTTOM) &&      /* Special case */
321        (CacheSlot(SC_TOP_BOTTOM) !=
322         (SetWord(inm(SC_BOTTOM), 1) | SetWord(inm(SC_TOP), 0))))
323    {
324        UncacheRegister(SC_TOP_BOTTOM);
325        xf86DrvMsg(pScreenInfo->scrnIndex, X_WARNING,
326            "SC_TOP_BOTTOM write cache disabled!\n");
327    }
328
329    TestRegisterCaching(DP_BKGD_CLR);
330    TestRegisterCaching(DP_FRGD_CLR);
331    TestRegisterCaching(DP_PIX_WIDTH);
332    TestRegisterCaching(DP_MIX);
333
334    TestRegisterCaching(CLR_CMP_CLR);
335    TestRegisterCaching(CLR_CMP_MSK);
336    TestRegisterCaching(CLR_CMP_CNTL);
337
338    if (pATI->Chip >= ATI_CHIP_264GTPRO)
339    {
340        TestRegisterCaching(TEX_SIZE_PITCH);
341    }
342}
343
344static __inline__ void
345TestRegisterCachingXV(ScrnInfoPtr pScreenInfo)
346{
347    ATIPtr pATI = ATIPTR(pScreenInfo);
348
349    if (!pATI->Block1Base)
350        return;
351
352    TestRegisterCaching(OVERLAY_Y_X_START);
353    TestRegisterCaching(OVERLAY_Y_X_END);
354
355    TestRegisterCaching(OVERLAY_GRAPHICS_KEY_CLR);
356    TestRegisterCaching(OVERLAY_GRAPHICS_KEY_MSK);
357
358    TestRegisterCaching(OVERLAY_KEY_CNTL);
359
360    TestRegisterCaching(OVERLAY_SCALE_INC);
361    TestRegisterCaching(OVERLAY_SCALE_CNTL);
362
363    TestRegisterCaching(SCALER_HEIGHT_WIDTH);
364
365    TestRegisterCaching(SCALER_TEST);
366
367    TestRegisterCaching(VIDEO_FORMAT);
368
369    if (pATI->Chip < ATI_CHIP_264VTB)
370    {
371        TestRegisterCaching(BUF0_OFFSET);
372        TestRegisterCaching(BUF0_PITCH);
373        TestRegisterCaching(BUF1_OFFSET);
374        TestRegisterCaching(BUF1_PITCH);
375
376        return;
377    }
378
379    TestRegisterCaching(SCALER_BUF0_OFFSET);
380    TestRegisterCaching(SCALER_BUF1_OFFSET);
381    TestRegisterCaching(SCALER_BUF_PITCH);
382
383    TestRegisterCaching(OVERLAY_EXCLUSIVE_HORZ);
384    TestRegisterCaching(OVERLAY_EXCLUSIVE_VERT);
385
386    if (pATI->Chip < ATI_CHIP_264GTPRO)
387        return;
388
389    TestRegisterCaching(SCALER_COLOUR_CNTL);
390
391    TestRegisterCaching(SCALER_H_COEFF0);
392    TestRegisterCaching(SCALER_H_COEFF1);
393    TestRegisterCaching(SCALER_H_COEFF2);
394    TestRegisterCaching(SCALER_H_COEFF3);
395    TestRegisterCaching(SCALER_H_COEFF4);
396
397    TestRegisterCaching(SCALER_BUF0_OFFSET_U);
398    TestRegisterCaching(SCALER_BUF0_OFFSET_V);
399    TestRegisterCaching(SCALER_BUF1_OFFSET_U);
400    TestRegisterCaching(SCALER_BUF1_OFFSET_V);
401}
402
403#ifdef USE_XAA
404/*
405 * ATIMach64SetupForScreenToScreenCopy --
406 *
407 * This function sets up the draw engine for a series of screen-to-screen copy
408 * operations.
409 */
410static void
411ATIMach64SetupForScreenToScreenCopy
412(
413    ScrnInfoPtr  pScreenInfo,
414    int          xdir,
415    int          ydir,
416    int          rop,
417    unsigned int planemask,
418    int          TransparencyColour
419)
420{
421    ATIPtr pATI = ATIPTR(pScreenInfo);
422
423    ATIDRISync(pScreenInfo);
424
425    ATIMach64WaitForFIFO(pATI, 3);
426    outf(DP_WRITE_MASK, planemask);
427    outf(DP_SRC, DP_MONO_SRC_ALLONES |
428        SetBits(SRC_BLIT, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC));
429    outf(DP_MIX, SetBits(ATIMach64ALU[rop], DP_FRGD_MIX));
430
431#ifdef AVOID_DGA
432
433    if (TransparencyColour == -1)
434
435#else /* AVOID_DGA */
436
437    if (!pATI->XAAForceTransBlit && (TransparencyColour == -1))
438
439#endif /* AVOID_DGA */
440
441    {
442        outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
443    }
444    else
445    {
446        ATIMach64WaitForFIFO(pATI, 2);
447        outf(CLR_CMP_CLR, TransparencyColour);
448        outf(CLR_CMP_CNTL, CLR_CMP_FN_EQUAL | CLR_CMP_SRC_2D);
449    }
450
451    pATI->dst_cntl = 0;
452
453    if (ydir > 0)
454        pATI->dst_cntl |= DST_Y_DIR;
455    if (xdir > 0)
456        pATI->dst_cntl |= DST_X_DIR;
457
458    if (pATI->XModifier == 1)
459        outf(DST_CNTL, pATI->dst_cntl);
460    else
461        pATI->dst_cntl |= DST_24_ROT_EN;
462}
463
464/*
465 * ATIMach64SubsequentScreenToScreenCopy --
466 *
467 * This function performs a screen-to-screen copy operation.
468 */
469static void
470ATIMach64SubsequentScreenToScreenCopy
471(
472    ScrnInfoPtr pScreenInfo,
473    int         xSrc,
474    int         ySrc,
475    int         xDst,
476    int         yDst,
477    int         w,
478    int         h
479)
480{
481    ATIPtr pATI = ATIPTR(pScreenInfo);
482
483    xSrc *= pATI->XModifier;
484    xDst *= pATI->XModifier;
485    w    *= pATI->XModifier;
486
487    ATIDRISync(pScreenInfo);
488
489    /* Disable clipping if it gets in the way */
490    ATIMach64ValidateClip(pATI, xDst, xDst + w - 1, yDst, yDst + h - 1);
491
492    if (!(pATI->dst_cntl & DST_X_DIR))
493    {
494        xSrc += w - 1;
495        xDst += w - 1;
496    }
497
498    if (!(pATI->dst_cntl & DST_Y_DIR))
499    {
500        ySrc += h - 1;
501        yDst += h - 1;
502    }
503
504    if (pATI->XModifier != 1)
505        outf(DST_CNTL, pATI->dst_cntl | SetBits((xDst / 4) % 6, DST_24_ROT));
506
507    ATIMach64WaitForFIFO(pATI, 4);
508    outf(SRC_Y_X, SetWord(xSrc, 1) | SetWord(ySrc, 0));
509    outf(SRC_WIDTH1, w);
510    outf(DST_Y_X, SetWord(xDst, 1) | SetWord(yDst, 0));
511    outf(DST_HEIGHT_WIDTH, SetWord(w, 1) | SetWord(h, 0));
512
513    /*
514     * On VTB's and later, the engine will randomly not wait for a copy
515     * operation to commit its results to video memory before starting the next
516     * one.  The probability of such occurrences increases with GUI_WB_FLUSH
517     * (or GUI_WB_FLUSH_P) setting, bitsPerPixel and/or CRTC clock.  This
518     * would point to some kind of video memory bandwidth problem were it noti
519     * for the fact that the problem occurs less often (but still occurs) when
520     * copying larger rectangles.
521     */
522    if ((pATI->Chip >= ATI_CHIP_264VTB) && !pATI->OptionDevel)
523        ATIMach64Sync(pScreenInfo);
524}
525
526/*
527 * ATIMach64SetupForSolidFill --
528 *
529 * This function sets up the draw engine for a series of solid fills.
530 */
531static void
532ATIMach64SetupForSolidFill
533(
534    ScrnInfoPtr  pScreenInfo,
535    int          colour,
536    int          rop,
537    unsigned int planemask
538)
539{
540    ATIPtr pATI = ATIPTR(pScreenInfo);
541
542    ATIDRISync(pScreenInfo);
543
544    ATIMach64WaitForFIFO(pATI, 5);
545    outf(DP_WRITE_MASK, planemask);
546    outf(DP_SRC, DP_MONO_SRC_ALLONES |
547        SetBits(SRC_FRGD, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC));
548    outf(DP_FRGD_CLR, colour);
549    outf(DP_MIX, SetBits(ATIMach64ALU[rop], DP_FRGD_MIX));
550
551    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
552
553    if (pATI->XModifier == 1)
554        outf(DST_CNTL, DST_X_DIR | DST_Y_DIR);
555}
556
557/*
558 * ATIMach64SubsequentSolidFillRect --
559 *
560 * This function performs a solid rectangle fill.
561 */
562static void
563ATIMach64SubsequentSolidFillRect
564(
565    ScrnInfoPtr pScreenInfo,
566    int         x,
567    int         y,
568    int         w,
569    int         h
570)
571{
572    ATIPtr pATI = ATIPTR(pScreenInfo);
573
574    ATIDRISync(pScreenInfo);
575
576    if (pATI->XModifier != 1)
577    {
578        x *= pATI->XModifier;
579        w *= pATI->XModifier;
580
581        outf(DST_CNTL, SetBits((x / 4) % 6, DST_24_ROT) |
582            (DST_X_DIR | DST_Y_DIR | DST_24_ROT_EN));
583    }
584
585    /* Disable clipping if it gets in the way */
586    ATIMach64ValidateClip(pATI, x, x + w - 1, y, y + h - 1);
587
588    ATIMach64WaitForFIFO(pATI, 2);
589    outf(DST_Y_X, SetWord(x, 1) | SetWord(y, 0));
590    outf(DST_HEIGHT_WIDTH, SetWord(w, 1) | SetWord(h, 0));
591}
592
593/*
594 * ATIMach64SetupForSolidLine --
595 *
596 * This function sets up the draw engine for a series of solid lines.  It is
597 * not used for 24bpp because the engine doesn't support it.
598 */
599static void
600ATIMach64SetupForSolidLine
601(
602    ScrnInfoPtr  pScreenInfo,
603    int          colour,
604    int          rop,
605    unsigned int planemask
606)
607{
608    ATIPtr pATI = ATIPTR(pScreenInfo);
609
610    ATIDRISync(pScreenInfo);
611
612    ATIMach64WaitForFIFO(pATI, 5);
613    outf(DP_WRITE_MASK, planemask);
614    outf(DP_SRC, DP_MONO_SRC_ALLONES |
615        SetBits(SRC_FRGD, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC));
616    outf(DP_FRGD_CLR, colour);
617    outf(DP_MIX, SetBits(ATIMach64ALU[rop], DP_FRGD_MIX));
618
619    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
620
621    ATIMach64ValidateClip(pATI, pATI->NewHW.sc_left, pATI->NewHW.sc_right,
622        pATI->NewHW.sc_top, pATI->NewHW.sc_bottom);
623}
624
625/*
626 * ATIMach64SubsequentSolidHorVertLine --
627 *
628 * This is called to draw a solid horizontal or vertical line.  This does a
629 * one-pixel wide solid fill.
630 */
631static void
632ATIMach64SubsequentSolidHorVertLine
633(
634    ScrnInfoPtr pScreenInfo,
635    int         x,
636    int         y,
637    int         len,
638    int         dir
639)
640{
641    ATIPtr pATI = ATIPTR(pScreenInfo);
642
643    ATIDRISync(pScreenInfo);
644
645    ATIMach64WaitForFIFO(pATI, 3);
646    outf(DST_CNTL, DST_X_DIR | DST_Y_DIR);
647    outf(DST_Y_X, SetWord(x, 1) | SetWord(y, 0));
648
649    if (dir == DEGREES_0)
650        outf(DST_HEIGHT_WIDTH, SetWord(len, 1) | SetWord(1, 0));
651    else /* if (dir == DEGREES_270) */
652        outf(DST_HEIGHT_WIDTH, SetWord(1, 1) | SetWord(len, 0));
653}
654
655/*
656 * ATIMach64SubsequentSolidBresenhamLine --
657 *
658 * This function draws a line using the Bresenham line engine.
659 */
660static void
661ATIMach64SubsequentSolidBresenhamLine
662(
663    ScrnInfoPtr pScreenInfo,
664    int         x,
665    int         y,
666    int         major,
667    int         minor,
668    int         err,
669    int         len,
670    int         octant
671)
672{
673    ATIPtr pATI = ATIPTR(pScreenInfo);
674    CARD32 dst_cntl = DST_LAST_PEL;
675
676    if (octant & YMAJOR)
677        dst_cntl |= DST_Y_MAJOR;
678
679    if (!(octant & XDECREASING))
680        dst_cntl |= DST_X_DIR;
681
682    if (!(octant & YDECREASING))
683        dst_cntl |= DST_Y_DIR;
684
685    ATIDRISync(pScreenInfo);
686
687    ATIMach64WaitForFIFO(pATI, 6);
688    outf(DST_CNTL, dst_cntl);
689    outf(DST_Y_X, SetWord(x, 1) | SetWord(y, 0));
690    outf(DST_BRES_ERR, minor + err);
691    outf(DST_BRES_INC, minor);
692    outf(DST_BRES_DEC, minor - major);
693    outf(DST_BRES_LNTH, len);
694}
695
696/*
697 * ATIMach64SetupForMono8x8PatternFill --
698 *
699 * This function sets up the draw engine for a series of 8x8 1bpp pattern
700 * fills.
701 */
702static void
703ATIMach64SetupForMono8x8PatternFill
704(
705    ScrnInfoPtr  pScreenInfo,
706    int          patx,
707    int          paty,
708    int          fg,
709    int          bg,
710    int          rop,
711    unsigned int planemask
712)
713{
714    ATIPtr pATI = ATIPTR(pScreenInfo);
715
716    ATIDRISync(pScreenInfo);
717
718    ATIMach64WaitForFIFO(pATI, 3);
719    outf(DP_WRITE_MASK, planemask);
720    outf(DP_SRC, DP_MONO_SRC_PATTERN |
721        SetBits(SRC_FRGD, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC));
722    outf(DP_FRGD_CLR, fg);
723
724    if (bg == -1)
725    {
726        outf(DP_MIX, SetBits(ATIMach64ALU[rop], DP_FRGD_MIX) |
727            SetBits(MIX_DST, DP_BKGD_MIX));
728    }
729    else
730    {
731        ATIMach64WaitForFIFO(pATI, 2);
732        outf(DP_BKGD_CLR, bg);
733        outf(DP_MIX, SetBits(ATIMach64ALU[rop], DP_FRGD_MIX) |
734            SetBits(ATIMach64ALU[rop], DP_BKGD_MIX));
735    }
736
737    ATIMach64WaitForFIFO(pATI, 4);
738    outf(PAT_REG0, patx);
739    outf(PAT_REG1, paty);
740    outf(PAT_CNTL, PAT_MONO_EN);
741
742    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
743
744    if (pATI->XModifier == 1)
745        outf(DST_CNTL, DST_X_DIR | DST_Y_DIR);
746}
747
748/*
749 * ATIMach64SubsequentMono8x8PatternFillRect --
750 *
751 * This function performs an 8x8 1bpp pattern fill.
752 */
753static void
754ATIMach64SubsequentMono8x8PatternFillRect
755(
756    ScrnInfoPtr pScreenInfo,
757    int         patx,
758    int         paty,
759    int         x,
760    int         y,
761    int         w,
762    int         h
763)
764{
765    ATIPtr pATI = ATIPTR(pScreenInfo);
766
767    ATIDRISync(pScreenInfo);
768
769    if (pATI->XModifier != 1)
770    {
771        x *= pATI->XModifier;
772        w *= pATI->XModifier;
773
774        outf(DST_CNTL, SetBits((x / 4) % 6, DST_24_ROT) |
775            (DST_X_DIR | DST_Y_DIR | DST_24_ROT_EN));
776    }
777
778    /* Disable clipping if it gets in the way */
779    ATIMach64ValidateClip(pATI, x, x + w - 1, y, y + h - 1);
780
781    ATIMach64WaitForFIFO(pATI, 2);
782    outf(DST_Y_X, SetWord(x, 1) | SetWord(y, 0));
783    outf(DST_HEIGHT_WIDTH, SetWord(w, 1) | SetWord(h, 0));
784}
785
786/*
787 * ATIMach64SetupForScanlineCPUToScreenColorExpandFill --
788 *
789 * This function sets up the engine for a series of colour expansion fills.
790 */
791static void
792ATIMach64SetupForScanlineCPUToScreenColorExpandFill
793(
794    ScrnInfoPtr  pScreenInfo,
795    int          fg,
796    int          bg,
797    int          rop,
798    unsigned int planemask
799)
800{
801    ATIPtr pATI = ATIPTR(pScreenInfo);
802
803    ATIDRISync(pScreenInfo);
804
805    ATIMach64WaitForFIFO(pATI, 3);
806    outf(DP_WRITE_MASK, planemask);
807    outf(DP_SRC, DP_MONO_SRC_HOST |
808        SetBits(SRC_FRGD, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC));
809    outf(DP_FRGD_CLR, fg);
810
811    if (bg == -1)
812    {
813        outf(DP_MIX, SetBits(ATIMach64ALU[rop], DP_FRGD_MIX) |
814            SetBits(MIX_DST, DP_BKGD_MIX));
815    }
816    else
817    {
818        ATIMach64WaitForFIFO(pATI, 2);
819        outf(DP_BKGD_CLR, bg);
820        outf(DP_MIX, SetBits(ATIMach64ALU[rop], DP_FRGD_MIX) |
821            SetBits(ATIMach64ALU[rop], DP_BKGD_MIX));
822    }
823
824    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
825
826    if (pATI->XModifier == 1)
827        outf(DST_CNTL, DST_X_DIR | DST_Y_DIR);
828}
829
830/*
831 * ATIMach64SubsequentScanlineCPUToScreenColorExpandFill --
832 *
833 * This function sets up the engine for a single colour expansion fill.
834 */
835static void
836ATIMach64SubsequentScanlineCPUToScreenColorExpandFill
837(
838    ScrnInfoPtr pScreenInfo,
839    int         x,
840    int         y,
841    int         w,
842    int         h,
843    int         skipleft
844)
845{
846    ATIPtr pATI = ATIPTR(pScreenInfo);
847
848    ATIDRISync(pScreenInfo);
849
850    if (pATI->XModifier != 1)
851    {
852        x *= pATI->XModifier;
853        w *= pATI->XModifier;
854        skipleft *= pATI->XModifier;
855
856        outf(DST_CNTL, SetBits((x / 4) % 6, DST_24_ROT) |
857            (DST_X_DIR | DST_Y_DIR | DST_24_ROT_EN));
858    }
859
860    pATI->ExpansionBitmapWidth = (w + 31) / 32;
861
862    ATIMach64WaitForFIFO(pATI, 3);
863    pATI->sc_left = x + skipleft;
864    pATI->sc_right = x + w - 1;
865    outf(SC_LEFT_RIGHT,
866        SetWord(pATI->sc_right, 1) | SetWord(pATI->sc_left, 0));
867    outf(DST_Y_X, SetWord(x, 1) | SetWord(y, 0));
868    outf(DST_HEIGHT_WIDTH,
869        SetWord(pATI->ExpansionBitmapWidth * 32, 1) | SetWord(h, 0));
870}
871
872/*
873 * ATIMach64SubsequentColorExpandScanline --
874 *
875 * This function feeds a bitmap scanline to the engine for a colour expansion
876 * fill.  This is written to do burst transfers for those platforms that can do
877 * them, and to improve CPU/engine concurrency.
878 */
879static void
880ATIMach64SubsequentColorExpandScanline
881(
882    ScrnInfoPtr pScreenInfo,
883    int         iBuffer
884)
885{
886    ATIPtr          pATI         = ATIPTR(pScreenInfo);
887    CARD32          *pBitmapData = pATI->ExpansionBitmapScanlinePtr[iBuffer];
888    int             w            = pATI->ExpansionBitmapWidth;
889    int             nDWord;
890
891    ATIDRISync(pScreenInfo);
892
893    while (w > 0)
894    {
895        /*
896         * Transfers are done in chunks of up to 64 bytes in length (32 on
897         * earlier controllers).
898         */
899        nDWord = w;
900        if (nDWord > pATI->nHostFIFOEntries)
901            nDWord = pATI->nHostFIFOEntries;
902
903        /* Make enough FIFO slots available */
904        ATIMach64WaitForFIFO(pATI, nDWord);
905
906        /*
907         * Always start transfers on a chuck-sized boundary.  Note that
908         * HOST_DATA_0 is actually on a 512-byte boundary, but *pBitmapData can
909         * only be guaranteed to be on a chunk-sized boundary.
910         *
911         * Transfer current chunk.  With any luck, the compiler won't mangle
912         * this too badly...
913         */
914
915#       if defined(ATIMove32)
916
917        {
918            ATIMove32(pATI->pHOST_DATA, pBitmapData, nDWord);
919        }
920
921#       else
922
923        {
924            volatile CARD32 *pDst;
925            CARD32          *pSrc;
926            unsigned int    iDWord;
927
928            iDWord = 16 - nDWord;
929            pDst = (volatile CARD32 *)pATI->pHOST_DATA - iDWord;
930            pSrc = pBitmapData - iDWord;
931
932            switch (iDWord)
933            {
934                case  0:  MMIO_MOVE32(pDst +  0, 0, *(pSrc +  0));
935                case  1:  MMIO_MOVE32(pDst +  1, 0, *(pSrc +  1));
936                case  2:  MMIO_MOVE32(pDst +  2, 0, *(pSrc +  2));
937                case  3:  MMIO_MOVE32(pDst +  3, 0, *(pSrc +  3));
938                case  4:  MMIO_MOVE32(pDst +  4, 0, *(pSrc +  4));
939                case  5:  MMIO_MOVE32(pDst +  5, 0, *(pSrc +  5));
940                case  6:  MMIO_MOVE32(pDst +  6, 0, *(pSrc +  6));
941                case  7:  MMIO_MOVE32(pDst +  7, 0, *(pSrc +  7));
942                case  8:  MMIO_MOVE32(pDst +  8, 0, *(pSrc +  8));
943                case  9:  MMIO_MOVE32(pDst +  9, 0, *(pSrc +  9));
944                case 10:  MMIO_MOVE32(pDst + 10, 0, *(pSrc + 10));
945                case 11:  MMIO_MOVE32(pDst + 11, 0, *(pSrc + 11));
946                case 12:  MMIO_MOVE32(pDst + 12, 0, *(pSrc + 12));
947                case 13:  MMIO_MOVE32(pDst + 13, 0, *(pSrc + 13));
948                case 14:  MMIO_MOVE32(pDst + 14, 0, *(pSrc + 14));
949                case 15:  MMIO_MOVE32(pDst + 15, 0, *(pSrc + 15));
950
951                default:    /* Muffle compiler */
952                    break;
953            }
954        }
955
956#       endif
957
958        /* Step to next chunk */
959        pBitmapData += nDWord;
960        w -= nDWord;
961        pATI->nAvailableFIFOEntries -= nDWord;
962    }
963
964    pATI->EngineIsBusy = TRUE;
965}
966
967/*
968 * ATIMach64AccelInit --
969 *
970 * This function fills in structure fields needed for acceleration on Mach64
971 * variants.
972 */
973Bool
974ATIMach64AccelInit
975(
976    ScreenPtr pScreen
977)
978{
979    ScrnInfoPtr   pScreenInfo = xf86ScreenToScrn(pScreen);
980    ATIPtr        pATI        = ATIPTR(pScreenInfo);
981    XAAInfoRecPtr pXAAInfo;
982
983    if (!(pATI->pXAAInfo = XAACreateInfoRec()))
984        return FALSE;
985
986    pXAAInfo = pATI->pXAAInfo;
987
988    /* This doesn't seem quite right... */
989    if (pATI->XModifier == 1)
990    {
991        pXAAInfo->Flags = PIXMAP_CACHE | OFFSCREEN_PIXMAPS;
992        pXAAInfo->Flags |= LINEAR_FRAMEBUFFER;
993    }
994
995    /* Sync */
996    pXAAInfo->Sync = ATIMach64Sync;
997
998    /* Screen-to-screen copy */
999    pXAAInfo->SetupForScreenToScreenCopy = ATIMach64SetupForScreenToScreenCopy;
1000    pXAAInfo->SubsequentScreenToScreenCopy =
1001        ATIMach64SubsequentScreenToScreenCopy;
1002
1003    /* Solid fills */
1004    pXAAInfo->SetupForSolidFill = ATIMach64SetupForSolidFill;
1005    pXAAInfo->SubsequentSolidFillRect = ATIMach64SubsequentSolidFillRect;
1006
1007    /* 8x8 mono pattern fills */
1008    pXAAInfo->Mono8x8PatternFillFlags =
1009
1010#if X_BYTE_ORDER != X_LITTLE_ENDIAN
1011
1012        BIT_ORDER_IN_BYTE_MSBFIRST |
1013
1014#endif /* X_BYTE_ORDER */
1015
1016        HARDWARE_PATTERN_PROGRAMMED_BITS | HARDWARE_PATTERN_SCREEN_ORIGIN;
1017    pXAAInfo->SetupForMono8x8PatternFill = ATIMach64SetupForMono8x8PatternFill;
1018    pXAAInfo->SubsequentMono8x8PatternFillRect =
1019        ATIMach64SubsequentMono8x8PatternFillRect;
1020
1021    /*
1022     * Use scanline version of colour expansion, not only for the non-ix86
1023     * case, but also to avoid PCI retries.
1024     */
1025    pXAAInfo->ScanlineCPUToScreenColorExpandFillFlags =
1026        LEFT_EDGE_CLIPPING | LEFT_EDGE_CLIPPING_NEGATIVE_X |
1027        CPU_TRANSFER_PAD_DWORD | SCANLINE_PAD_DWORD;
1028    if (pATI->XModifier != 1)
1029        pXAAInfo->ScanlineCPUToScreenColorExpandFillFlags |= TRIPLE_BITS_24BPP;
1030    pXAAInfo->NumScanlineColorExpandBuffers = 1;
1031
1032    /* Align bitmap data on a 64-byte boundary */
1033    pATI->ExpansionBitmapWidth =        /* DWord size in bits */
1034        ((pATI->displayWidth * pATI->XModifier) + 31) & ~31U;
1035    pATI->ExpansionBitmapScanlinePtr[1] =
1036        (CARD32 *)xnfalloc((pATI->ExpansionBitmapWidth >> 3) + 63);
1037    pATI->ExpansionBitmapScanlinePtr[0] =
1038        (pointer)(((unsigned long)pATI->ExpansionBitmapScanlinePtr[1] + 63) &
1039                  ~63UL);
1040    pXAAInfo->ScanlineColorExpandBuffers =
1041        (CARD8 **)pATI->ExpansionBitmapScanlinePtr;
1042    pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill =
1043        ATIMach64SetupForScanlineCPUToScreenColorExpandFill;
1044    pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill =
1045        ATIMach64SubsequentScanlineCPUToScreenColorExpandFill;
1046    pXAAInfo->SubsequentColorExpandScanline =
1047        ATIMach64SubsequentColorExpandScanline;
1048
1049    /* The engine does not support the following primitives for 24bpp */
1050    if (pATI->XModifier != 1)
1051        goto XAAInit;
1052
1053    /* Solid lines */
1054    pXAAInfo->SetupForSolidLine = ATIMach64SetupForSolidLine;
1055    pXAAInfo->SubsequentSolidHorVertLine = ATIMach64SubsequentSolidHorVertLine;
1056    pXAAInfo->SubsequentSolidBresenhamLine =
1057        ATIMach64SubsequentSolidBresenhamLine;
1058
1059XAAInit:
1060    if (!XAAInit(pScreen, pATI->pXAAInfo)) {
1061        XAADestroyInfoRec(pATI->pXAAInfo);
1062        pATI->pXAAInfo = NULL;
1063        return FALSE;
1064    }
1065
1066    return TRUE;
1067}
1068#endif /* USE_XAA */
1069