1/*
2 * Copyright 1999, 2000 ATI Technologies Inc., Markham, Ontario,
3 *                      Precision Insight, Inc., Cedar Park, Texas, and
4 *                      VA Linux Systems Inc., Fremont, California.
5 *
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining
9 * a copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation on the rights to use, copy, modify, merge,
12 * publish, distribute, sublicense, and/or sell copies of the Software,
13 * and to permit persons to whom the Software is furnished to do so,
14 * subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the
17 * next paragraph) shall be included in all copies or substantial
18 * portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, PRECISION INSIGHT, VA LINUX
24 * SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
25 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
26 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27 * OTHER DEALINGS IN THE SOFTWARE.
28 */
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34/*
35 * Authors:
36 *   Rickard E. Faith <faith@valinux.com>
37 *   Kevin E. Martin <martin@valinux.com>
38 *   Alan Hourihane <alanh@fairlite.demon.co.uk>
39 *
40 * Credits:
41 *
42 *   Thanks to Alan Hourihane <alanh@fairlite.demon..co.uk> and SuSE for
43 *   providing source code to their 3.3.x Rage 128 driver.  Portions of
44 *   this file are based on the acceleration code for that driver.
45 *
46 * References:
47 *
48 *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
49 *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
50 *   1999.
51 *
52 *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
53 *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
54 *
55 * Notes on unimplemented XAA optimizations:
56 *
57 *   SetClipping:   The Rage128 doesn't support the full 16bit registers needed
58 *                  for XAA clip rect support.
59 *   SolidFillTrap: This will probably work if we can compute the correct
60 *                  Bresenham error values.
61 *   TwoPointLine:  The Rage 128 supports Bresenham lines instead.
62 *   DashedLine with non-power-of-two pattern length: Apparently, there is
63 *                  no way to set the length of the pattern -- it is always
64 *                  assumed to be 8 or 32 (or 1024?).
65 *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
66 *                  Manual where it states that monochrome expansion of frame
67 *                  buffer data is not supported.
68 *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
69 *                  direct/indirect method.  If we had more data registers,
70 *                  then we could do better.  If XAA supported a trigger write
71 *                  address, the code would be simpler.
72 * (Alan Hourihane) Update. We now use purely indirect and clip the full
73 *                  rectangle. Seems as the direct method has some problems
74 *                  with this, although this indirect method is much faster
75 *                  than the old method of setting up the engine per scanline.
76 *                  This code was the basis of the Radeon work we did.
77 *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
78 *                  pattern from frame buffer memory.
79 *   ImageWrites:   See CPUToScreenColorExpandFill.
80 *
81 */
82
83#define R128_TRAPEZOIDS 0       /* Trapezoids don't work               */
84
85				/* Driver data structures */
86#include <errno.h>
87
88#include "r128.h"
89#include "r128_reg.h"
90#include "r128_probe.h"
91#ifdef R128DRI
92#include "r128_sarea.h"
93#define _XF86DRI_SERVER_
94#include "r128_dri.h"
95#include "r128_common.h"
96#endif
97
98				/* Line support */
99#include "miline.h"
100
101				/* X and server generic header files */
102#include "xf86.h"
103
104#ifdef HAVE_XAA_H
105#include "r128_rop.h"
106#endif
107
108extern int getR128EntityIndex(void);
109
110/* Flush all dirty data in the Pixel Cache to memory. */
111void R128EngineFlush(ScrnInfoPtr pScrn)
112{
113    R128InfoPtr   info      = R128PTR(pScrn);
114    unsigned char *R128MMIO = info->MMIO;
115    int           i;
116
117    OUTREGP(R128_PC_NGUI_CTLSTAT, R128_PC_FLUSH_ALL, ~R128_PC_FLUSH_ALL);
118    for (i = 0; i < R128_TIMEOUT; i++) {
119	if (!(INREG(R128_PC_NGUI_CTLSTAT) & R128_PC_BUSY)) break;
120    }
121}
122
123/* Reset graphics card to known state. */
124void R128EngineReset(ScrnInfoPtr pScrn)
125{
126    R128InfoPtr   info      = R128PTR(pScrn);
127    unsigned char *R128MMIO = info->MMIO;
128    uint32_t      clock_cntl_index;
129    uint32_t      mclk_cntl;
130    uint32_t      gen_reset_cntl;
131
132    R128EngineFlush(pScrn);
133
134    clock_cntl_index = INREG(R128_CLOCK_CNTL_INDEX);
135    mclk_cntl        = INPLL(pScrn, R128_MCLK_CNTL);
136
137    OUTPLL(R128_MCLK_CNTL, mclk_cntl | R128_FORCE_GCP | R128_FORCE_PIPE3D_CP);
138
139    gen_reset_cntl   = INREG(R128_GEN_RESET_CNTL);
140
141    OUTREG(R128_GEN_RESET_CNTL, gen_reset_cntl | R128_SOFT_RESET_GUI);
142    INREG(R128_GEN_RESET_CNTL);
143    OUTREG(R128_GEN_RESET_CNTL,
144	gen_reset_cntl & (uint32_t)(~R128_SOFT_RESET_GUI));
145    INREG(R128_GEN_RESET_CNTL);
146
147    OUTPLL(R128_MCLK_CNTL,        mclk_cntl);
148    OUTREG(R128_CLOCK_CNTL_INDEX, clock_cntl_index);
149    OUTREG(R128_GEN_RESET_CNTL,   gen_reset_cntl);
150}
151
152/* The FIFO has 64 slots.  This routines waits until at least `entries' of
153   these slots are empty. */
154void R128WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
155{
156    R128InfoPtr   info      = R128PTR(pScrn);
157    unsigned char *R128MMIO = info->MMIO;
158    int           i;
159
160    for (;;) {
161	for (i = 0; i < R128_TIMEOUT; i++) {
162	    info->fifo_slots = INREG(R128_GUI_STAT) & R128_GUI_FIFOCNT_MASK;
163	    if (info->fifo_slots >= entries) return;
164	}
165
166    DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
167                    "FIFO timed out: %lu entries, "
168                    "stat = 0x%08lx, probe = 0x%08lx\n",
169                    INREG(R128_GUI_STAT) & R128_GUI_FIFOCNT_MASK,
170                    INREG(R128_GUI_STAT),
171                    INREG(R128_GUI_PROBE)));
172	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
173		   "FIFO timed out, resetting engine...\n");
174	R128EngineReset(pScrn);
175#ifdef R128DRI
176	R128CCE_RESET(pScrn, info);
177	if (info->directRenderingEnabled) {
178	    R128CCE_START(pScrn, info);
179	}
180#endif
181    }
182}
183
184/* Wait for the graphics engine to be completely idle: the FIFO has
185   drained, the Pixel Cache is flushed, and the engine is idle.  This is a
186   standard "sync" function that will make the hardware "quiescent". */
187void R128WaitForIdle(ScrnInfoPtr pScrn)
188{
189    R128InfoPtr   info      = R128PTR(pScrn);
190    unsigned char *R128MMIO = info->MMIO;
191    int           i;
192
193    R128WaitForFifoFunction(pScrn, 64);
194
195    for (;;) {
196	for (i = 0; i < R128_TIMEOUT; i++) {
197	    if (!(INREG(R128_GUI_STAT) & R128_GUI_ACTIVE)) {
198		R128EngineFlush(pScrn);
199		return;
200	    }
201	}
202
203    DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
204                        "Idle timed out: %lu entries, "
205                        "stat = 0x%08lx, probe = 0x%08lx\n",
206                        INREG(R128_GUI_STAT) & R128_GUI_FIFOCNT_MASK,
207                        INREG(R128_GUI_STAT),
208                        INREG(R128_GUI_PROBE)));
209	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
210		   "Idle timed out, resetting engine...\n");
211#ifdef R128DRI
212        R128CCE_STOP(pScrn, info);
213#endif
214	R128EngineReset(pScrn);
215#ifdef R128DRI
216	R128CCE_RESET(pScrn, info);
217	if (info->directRenderingEnabled) {
218	    R128CCE_START(pScrn, info);
219	}
220#endif
221    }
222}
223
224#ifdef R128DRI
225/* Wait until the CCE is completely idle: the FIFO has drained and the
226 * CCE is idle.
227 */
228void R128CCEWaitForIdle(ScrnInfoPtr pScrn)
229{
230    R128InfoPtr info = R128PTR(pScrn);
231    int         ret, i;
232
233    FLUSH_RING();
234
235    for (;;) {
236        i = 0;
237        do {
238            ret = drmCommandNone(info->drmFD, DRM_R128_CCE_IDLE);
239        } while ( ret && errno == EBUSY && i++ < (R128_IDLE_RETRY * R128_IDLE_RETRY) );
240
241	if (ret && ret != -EBUSY) {
242	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
243		       "%s: CCE idle %d\n", __FUNCTION__, ret);
244	}
245
246	if (i > R128_IDLE_RETRY) {
247	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
248		       "%s: (DEBUG) CCE idle took i = %d\n", __FUNCTION__, i);
249	}
250
251	if (ret == 0) return;
252
253	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
254		   "Idle timed out, resetting engine...\n");
255	R128CCE_STOP(pScrn, info);
256	R128EngineReset(pScrn);
257
258	/* Always restart the engine when doing CCE 2D acceleration */
259	R128CCE_RESET(pScrn, info);
260	R128CCE_START(pScrn, info);
261    }
262}
263
264int R128CCEStop(ScrnInfoPtr pScrn)
265{
266    R128InfoPtr    info = R128PTR(pScrn);
267    drmR128CCEStop stop;
268    int            ret, i;
269
270    stop.flush = 1;
271    stop.idle  = 1;
272
273    ret = drmCommandWrite( info->drmFD, DRM_R128_CCE_STOP,
274                           &stop, sizeof(drmR128CCEStop) );
275
276    if ( ret == 0 ) {
277        return 0;
278    } else if ( errno != EBUSY ) {
279        return -errno;
280    }
281
282    stop.flush = 0;
283
284    i = 0;
285    do {
286        ret = drmCommandWrite( info->drmFD, DRM_R128_CCE_STOP,
287                               &stop, sizeof(drmR128CCEStop) );
288    } while ( ret && errno == EBUSY && i++ < R128_IDLE_RETRY );
289
290    if ( ret == 0 ) {
291        return 0;
292    } else if ( errno != EBUSY ) {
293        return -errno;
294    }
295
296    stop.idle = 0;
297
298    if ( drmCommandWrite( info->drmFD, DRM_R128_CCE_STOP,
299                          &stop, sizeof(drmR128CCEStop) )) {
300        return -errno;
301    } else {
302        return 0;
303    }
304}
305
306#endif
307
308#ifdef HAVE_XAA_H
309/* Setup for XAA SolidFill. */
310static void R128SetupForSolidFill(ScrnInfoPtr pScrn,
311				  int color, int rop, unsigned int planemask)
312{
313    R128InfoPtr   info      = R128PTR(pScrn);
314    unsigned char *R128MMIO = info->MMIO;
315
316    R128WaitForFifo(pScrn, 4);
317    OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
318				     | R128_GMC_BRUSH_SOLID_COLOR
319				     | R128_GMC_SRC_DATATYPE_COLOR
320				     | R128_ROP[rop].pattern));
321    OUTREG(R128_DP_BRUSH_FRGD_CLR,  color);
322    OUTREG(R128_DP_WRITE_MASK,      planemask);
323    OUTREG(R128_DP_CNTL,            (R128_DST_X_LEFT_TO_RIGHT
324				     | R128_DST_Y_TOP_TO_BOTTOM));
325}
326
327/* Subsequent XAA SolidFillRect.
328
329   Tests: xtest CH06/fllrctngl, xterm
330*/
331static void  R128SubsequentSolidFillRect(ScrnInfoPtr pScrn,
332					 int x, int y, int w, int h)
333{
334    R128InfoPtr   info      = R128PTR(pScrn);
335    unsigned char *R128MMIO = info->MMIO;
336
337    R128WaitForFifo(pScrn, 2);
338    OUTREG(R128_DST_Y_X,          (y << 16) | x);
339    OUTREG(R128_DST_WIDTH_HEIGHT, (w << 16) | h);
340}
341
342/* Setup for XAA solid lines. */
343static void R128SetupForSolidLine(ScrnInfoPtr pScrn,
344				  int color, int rop, unsigned int planemask)
345{
346    R128InfoPtr   info      = R128PTR(pScrn);
347    unsigned char *R128MMIO = info->MMIO;
348
349    R128WaitForFifo(pScrn, 3);
350    OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
351				     | R128_GMC_BRUSH_SOLID_COLOR
352				     | R128_GMC_SRC_DATATYPE_COLOR
353				     | R128_ROP[rop].pattern));
354    OUTREG(R128_DP_BRUSH_FRGD_CLR,  color);
355    OUTREG(R128_DP_WRITE_MASK,      planemask);
356}
357
358
359/* Subsequent XAA solid Bresenham line.
360
361   Tests: xtest CH06/drwln, ico, Mark Vojkovich's linetest program
362
363   [See http://www.xfree86.org/devel/archives/devel/1999-Jun/0102.shtml for
364   Mark Vojkovich's linetest program, posted 2Jun99 to devel@xfree86.org.]
365
366   x11perf -line500
367                               1024x768@76Hz   1024x768@76Hz
368                                        8bpp           32bpp
369   not used:                     39700.0/sec     34100.0/sec
370   used:                         47600.0/sec     36800.0/sec
371*/
372static void R128SubsequentSolidBresenhamLine(ScrnInfoPtr pScrn,
373					     int x, int y,
374					     int major, int minor,
375					     int err, int len, int octant)
376{
377    R128InfoPtr   info      = R128PTR(pScrn);
378    unsigned char *R128MMIO = info->MMIO;
379    int           flags     = 0;
380
381    if (octant & YMAJOR)         flags |= R128_DST_Y_MAJOR;
382    if (!(octant & XDECREASING)) flags |= R128_DST_X_DIR_LEFT_TO_RIGHT;
383    if (!(octant & YDECREASING)) flags |= R128_DST_Y_DIR_TOP_TO_BOTTOM;
384
385    R128WaitForFifo(pScrn, 6);
386    OUTREG(R128_DP_CNTL_XDIR_YDIR_YMAJOR, flags);
387    OUTREG(R128_DST_Y_X,                  (y << 16) | x);
388    OUTREG(R128_DST_BRES_ERR,             err);
389    OUTREG(R128_DST_BRES_INC,             minor);
390    OUTREG(R128_DST_BRES_DEC,             -major);
391    OUTREG(R128_DST_BRES_LNTH,            len);
392}
393
394/* Subsequent XAA solid horizontal and vertical lines
395
396   1024x768@76Hz 8bpp
397                             Without             With
398   x11perf -hseg500      87600.0/sec     798000.0/sec
399   x11perf -vseg500      38100.0/sec      38000.0/sec
400*/
401static void R128SubsequentSolidHorVertLine(ScrnInfoPtr pScrn,
402					   int x, int y, int len, int dir )
403{
404    R128InfoPtr   info      = R128PTR(pScrn);
405    unsigned char *R128MMIO = info->MMIO;
406
407    R128WaitForFifo(pScrn, 1);
408    OUTREG(R128_DP_CNTL, (R128_DST_X_LEFT_TO_RIGHT
409			  | R128_DST_Y_TOP_TO_BOTTOM));
410
411    if (dir == DEGREES_0) {
412	R128SubsequentSolidFillRect(pScrn, x, y, len, 1);
413    } else {
414	R128SubsequentSolidFillRect(pScrn, x, y, 1, len);
415    }
416}
417
418/* Setup for XAA dashed lines.
419
420   Tests: xtest CH05/stdshs, XFree86/drwln
421
422   NOTE: Since we can only accelerate lines with power-of-2 patterns of
423   length <= 32, these x11perf numbers are not representative of the
424   speed-up on appropriately-sized patterns.
425
426   1024x768@76Hz 8bpp
427                             Without             With
428   x11perf -dseg100     218000.0/sec     222000.0/sec
429   x11perf -dline100    215000.0/sec     221000.0/sec
430   x11perf -ddline100   178000.0/sec     180000.0/sec
431*/
432static void R128SetupForDashedLine(ScrnInfoPtr pScrn,
433				   int fg, int bg,
434				   int rop, unsigned int planemask,
435				   int length, unsigned char *pattern)
436{
437    R128InfoPtr   info      = R128PTR(pScrn);
438    unsigned char *R128MMIO = info->MMIO;
439    uint32_t      pat       = *(uint32_t *)(pointer)pattern;
440
441#if X_BYTE_ORDER == X_LITTLE_ENDIAN
442# define PAT_SHIFT(pat,n) pat << n
443#else
444# define PAT_SHIFT(pat,n) pat >> n
445#endif
446
447    switch (length) {
448    case  2: pat |= PAT_SHIFT(pat,2); /* fall through */
449    case  4: pat |= PAT_SHIFT(pat,4); /* fall through */
450    case  8: pat |= PAT_SHIFT(pat,8); /* fall through */
451    case 16: pat |= PAT_SHIFT(pat,16);
452    }
453
454    R128WaitForFifo(pScrn, 5);
455    OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
456				     | (bg == -1
457					? R128_GMC_BRUSH_32x1_MONO_FG_LA
458					: R128_GMC_BRUSH_32x1_MONO_FG_BG)
459				     | R128_ROP[rop].pattern
460				     | R128_GMC_BYTE_LSB_TO_MSB));
461    OUTREG(R128_DP_WRITE_MASK,      planemask);
462    OUTREG(R128_DP_BRUSH_FRGD_CLR,  fg);
463    OUTREG(R128_DP_BRUSH_BKGD_CLR,  bg);
464    OUTREG(R128_BRUSH_DATA0,        pat);
465}
466
467/* Subsequent XAA dashed line. */
468static void R128SubsequentDashedBresenhamLine(ScrnInfoPtr pScrn,
469					      int x, int y,
470					      int major, int minor,
471					      int err, int len, int octant,
472					      int phase)
473{
474    R128InfoPtr   info      = R128PTR(pScrn);
475    unsigned char *R128MMIO = info->MMIO;
476    int           flags     = 0;
477
478    if (octant & YMAJOR)         flags |= R128_DST_Y_MAJOR;
479    if (!(octant & XDECREASING)) flags |= R128_DST_X_DIR_LEFT_TO_RIGHT;
480    if (!(octant & YDECREASING)) flags |= R128_DST_Y_DIR_TOP_TO_BOTTOM;
481
482    R128WaitForFifo(pScrn, 7);
483    OUTREG(R128_DP_CNTL_XDIR_YDIR_YMAJOR, flags);
484    OUTREG(R128_DST_Y_X,                  (y << 16) | x);
485    OUTREG(R128_BRUSH_Y_X,                (phase << 16) | phase);
486    OUTREG(R128_DST_BRES_ERR,             err);
487    OUTREG(R128_DST_BRES_INC,             minor);
488    OUTREG(R128_DST_BRES_DEC,             -major);
489    OUTREG(R128_DST_BRES_LNTH,            len);
490}
491
492#if R128_TRAPEZOIDS
493				/* This doesn't work.  Except in the
494				   lower-left quadrant, all of the pixel
495				   errors appear to be because eL and eR
496				   are not correct.  Drawing from right to
497				   left doesn't help.  Be aware that the
498				   non-_SUB registers set the sub-pixel
499				   values to 0.5 (0x08), which isn't what
500				   XAA wants. */
501/* Subsequent XAA SolidFillTrap.  XAA always passes data that assumes we
502   fill from top to bottom, so dyL and dyR are always non-negative. */
503static void R128SubsequentSolidFillTrap(ScrnInfoPtr pScrn, int y, int h,
504					int left, int dxL, int dyL, int eL,
505					int right, int dxR, int dyR, int eR)
506{
507    R128InfoPtr   info      = R128PTR(pScrn);
508    unsigned char *R128MMIO = info->MMIO;
509    int           flags     = 0;
510    int           Lymajor   = 0;
511    int           Rymajor   = 0;
512    int           origdxL   = dxL;
513    int           origdxR   = dxR;
514
515    DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
516                        "Trap %d %d; "
517                        "L %d %d %d %d; "
518                        "R %d %d %d %d\n",
519                        y, h,
520                        left, dxL, dyL, eL,
521                        right, dxR, dyR, eR));
522
523    if (dxL < 0)    dxL = -dxL; else flags |= (1 << 0) /* | (1 << 8) */;
524    if (dxR < 0)    dxR = -dxR; else flags |= (1 << 6);
525
526    R128WaitForFifo(pScrn, 11);
527
528#if 1
529    OUTREG(R128_DP_CNTL,            flags | (1 << 1) | (1 << 7));
530    OUTREG(R128_DST_Y_SUB,          ((y) << 4) | 0x0 );
531    OUTREG(R128_DST_X_SUB,          ((left) << 4)|0x0);
532    OUTREG(R128_TRAIL_BRES_ERR,     eR-dxR);
533    OUTREG(R128_TRAIL_BRES_INC,     dxR);
534    OUTREG(R128_TRAIL_BRES_DEC,     -dyR);
535    OUTREG(R128_TRAIL_X_SUB,        ((right) << 4) | 0x0);
536    OUTREG(R128_LEAD_BRES_ERR,      eL-dxL);
537    OUTREG(R128_LEAD_BRES_INC,      dxL);
538    OUTREG(R128_LEAD_BRES_DEC,      -dyL);
539    OUTREG(R128_LEAD_BRES_LNTH_SUB, ((h) << 4) | 0x00);
540#else
541    OUTREG(R128_DP_CNTL,            flags | (1 << 1) );
542    OUTREG(R128_DST_Y_SUB,          (y << 4));
543    OUTREG(R128_DST_X_SUB,          (right << 4));
544    OUTREG(R128_TRAIL_BRES_ERR,     eL);
545    OUTREG(R128_TRAIL_BRES_INC,     dxL);
546    OUTREG(R128_TRAIL_BRES_DEC,     -dyL);
547    OUTREG(R128_TRAIL_X_SUB,        (left << 4) | 0);
548    OUTREG(R128_LEAD_BRES_ERR,      eR);
549    OUTREG(R128_LEAD_BRES_INC,      dxR);
550    OUTREG(R128_LEAD_BRES_DEC,      -dyR);
551    OUTREG(R128_LEAD_BRES_LNTH_SUB, h << 4);
552#endif
553}
554#endif
555
556/* Setup for XAA screen-to-screen copy.
557
558   Tests: xtest CH06/fllrctngl (also tests transparency).
559*/
560static void R128SetupForScreenToScreenCopy(ScrnInfoPtr pScrn,
561					   int xdir, int ydir, int rop,
562					   unsigned int planemask,
563					   int trans_color)
564{
565    R128InfoPtr   info      = R128PTR(pScrn);
566    unsigned char *R128MMIO = info->MMIO;
567
568    info->xdir = xdir;
569    info->ydir = ydir;
570    R128WaitForFifo(pScrn, 3);
571    OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
572				     | R128_GMC_BRUSH_SOLID_COLOR
573				     | R128_GMC_SRC_DATATYPE_COLOR
574				     | R128_ROP[rop].rop
575				     | R128_DP_SRC_SOURCE_MEMORY));
576    OUTREG(R128_DP_WRITE_MASK,      planemask);
577    OUTREG(R128_DP_CNTL,            ((xdir >= 0 ? R128_DST_X_LEFT_TO_RIGHT : 0)
578				     | (ydir >= 0
579					? R128_DST_Y_TOP_TO_BOTTOM
580					: 0)));
581
582    if (trans_color != -1) {
583				/* Set up for transparency */
584	R128WaitForFifo(pScrn, 3);
585	OUTREG(R128_CLR_CMP_CLR_SRC, trans_color);
586	OUTREG(R128_CLR_CMP_MASK,    R128_CLR_CMP_MSK);
587	OUTREG(R128_CLR_CMP_CNTL,    (R128_SRC_CMP_NEQ_COLOR
588				      | R128_CLR_CMP_SRC_SOURCE));
589    }
590}
591
592/* Subsequent XAA screen-to-screen copy. */
593static void R128SubsequentScreenToScreenCopy(ScrnInfoPtr pScrn,
594					     int xa, int ya,
595					     int xb, int yb,
596					     int w, int h)
597{
598    R128InfoPtr   info      = R128PTR(pScrn);
599    unsigned char *R128MMIO = info->MMIO;
600
601    if (info->xdir < 0) xa += w - 1, xb += w - 1;
602    if (info->ydir < 0) ya += h - 1, yb += h - 1;
603
604    R128WaitForFifo(pScrn, 3);
605    OUTREG(R128_SRC_Y_X,          (ya << 16) | xa);
606    OUTREG(R128_DST_Y_X,          (yb << 16) | xb);
607    OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16) | w);
608}
609
610/* Setup for XAA mono 8x8 pattern color expansion.  Patterns with
611   transparency use `bg == -1'.  This routine is only used if the XAA
612   pixmap cache is turned on.
613
614   Tests: xtest XFree86/fllrctngl (no other test will test this routine with
615                                   both transparency and non-transparency)
616
617   1024x768@76Hz 8bpp
618                             Without             With
619   x11perf -srect100     38600.0/sec      85700.0/sec
620   x11perf -osrect100    38600.0/sec      85700.0/sec
621*/
622static void R128SetupForMono8x8PatternFill(ScrnInfoPtr pScrn,
623					   int patternx, int patterny,
624					   int fg, int bg, int rop,
625					   unsigned int planemask)
626{
627    R128InfoPtr   info      = R128PTR(pScrn);
628    unsigned char *R128MMIO = info->MMIO;
629
630    R128WaitForFifo(pScrn, 6);
631    OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
632				     | (bg == -1
633					? R128_GMC_BRUSH_8X8_MONO_FG_LA
634					: R128_GMC_BRUSH_8X8_MONO_FG_BG)
635				     | R128_ROP[rop].pattern
636				     | R128_GMC_BYTE_LSB_TO_MSB));
637    OUTREG(R128_DP_WRITE_MASK,      planemask);
638    OUTREG(R128_DP_BRUSH_FRGD_CLR,  fg);
639    OUTREG(R128_DP_BRUSH_BKGD_CLR,  bg);
640    OUTREG(R128_BRUSH_DATA0,        patternx);
641    OUTREG(R128_BRUSH_DATA1,        patterny);
642}
643
644/* Subsequent XAA 8x8 pattern color expansion.  Because they are used in
645   the setup function, `patternx' and `patterny' are not used here. */
646static void R128SubsequentMono8x8PatternFillRect(ScrnInfoPtr pScrn,
647						 int patternx, int patterny,
648						 int x, int y, int w, int h)
649{
650    R128InfoPtr   info      = R128PTR(pScrn);
651    unsigned char *R128MMIO = info->MMIO;
652
653    R128WaitForFifo(pScrn, 3);
654    OUTREG(R128_BRUSH_Y_X,        (patterny << 8) | patternx);
655    OUTREG(R128_DST_Y_X,          (y << 16) | x);
656    OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16) | w);
657}
658
659#if 0
660/* Setup for XAA color 8x8 pattern fill.
661
662   Tests: xtest XFree86/fllrctngl (with Mono8x8PatternFill off)
663*/
664static void R128SetupForColor8x8PatternFill(ScrnInfoPtr pScrn,
665					    int patx, int paty,
666					    int rop, unsigned int planemask,
667					    int trans_color)
668{
669    R128InfoPtr   info      = R128PTR(pScrn);
670    unsigned char *R128MMIO = info->MMIO;
671
672    DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
673                        "Color8x8 %d %d %d\n",
674                        trans_color, patx, paty));
675
676    R128WaitForFifo(pScrn, 2);
677    OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
678				     | R128_GMC_BRUSH_8x8_COLOR
679				     | R128_GMC_SRC_DATATYPE_COLOR
680				     | R128_ROP[rop].rop
681				     | R128_DP_SRC_SOURCE_MEMORY));
682    OUTREG(R128_DP_WRITE_MASK,      planemask);
683
684    if (trans_color != -1) {
685				/* Set up for transparency */
686	R128WaitForFifo(pScrn, 3);
687	OUTREG(R128_CLR_CMP_CLR_SRC, trans_color);
688	OUTREG(R128_CLR_CMP_MASK,    R128_CLR_CMP_MSK);
689	OUTREG(R128_CLR_CMP_CNTL,    (R128_SRC_CMP_NEQ_COLOR
690				      | R128_CLR_CMP_SRC_SOURCE));
691    }
692}
693
694/* Subsequent XAA 8x8 pattern color expansion. */
695static void R128SubsequentColor8x8PatternFillRect( ScrnInfoPtr pScrn,
696						   int patx, int paty,
697						   int x, int y, int w, int h)
698{
699    R128InfoPtr   info      = R128PTR(pScrn);
700    unsigned char *R128MMIO = info->MMIO;
701
702    DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
703                        "Color8x8 %d,%d %d,%d %d %d\n",
704                        patx, paty, x, y, w, h));
705    R128WaitForFifo(pScrn, 3);
706    OUTREG(R128_SRC_Y_X, (paty << 16) | patx);
707    OUTREG(R128_DST_Y_X, (y << 16) | x);
708    OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16) | w);
709}
710#endif
711
712/* Setup for XAA indirect CPU-to-screen color expansion (indirect).
713   Because of how the scratch buffer is initialized, this is really a
714   mainstore-to-screen color expansion.  Transparency is supported when `bg
715   == -1'.
716
717   x11perf -ftext (pure indirect):
718                               1024x768@76Hz   1024x768@76Hz
719                                        8bpp           32bpp
720   not used:                    685000.0/sec    794000.0/sec
721   used:                       1070000.0/sec   1080000.0/sec
722
723   We could improve this indirect routine by about 10% if the hardware
724   could accept DWORD padded scanlines, or if XAA could provide bit-packed
725   data.  We might also be able to move to a direct routine if there were
726   more HOST_DATA registers.
727
728   Implementing the hybrid indirect/direct scheme improved performance in a
729   few areas:
730
731   1024x768@76 8bpp
732                                   Indirect          Hybrid
733   x11perf -oddsrect10          50100.0/sec     71700.0/sec
734   x11perf -oddsrect100          4240.0/sec      6660.0/sec
735   x11perf -bigsrect10          50300.0/sec     71100.0/sec
736   x11perf -bigsrect100          4190.0/sec      6800.0/sec
737   x11perf -polytext           584000.0/sec    714000.0/sec
738   x11perf -polytext16         154000.0/sec    172000.0/sec
739   x11perf -seg1              1780000.0/sec   1880000.0/sec
740   x11perf -copyplane10         42900.0/sec     58300.0/sec
741   x11perf -copyplane100         4400.0/sec      6710.0/sec
742   x11perf -putimagexy10         5090.0/sec      6670.0/sec
743   x11perf -putimagexy100         424.0/sec       575.0/sec
744
745   1024x768@76 -depth 24 -fbbpp 32
746                                   Indirect          Hybrid
747   x11perf -oddsrect100          4240.0/sec      6670.0/sec
748   x11perf -bigsrect100          4190.0/sec      6800.0/sec
749   x11perf -polytext           585000.0/sec    719000.0/sec
750   x11perf -seg1              2960000.0/sec   2990000.0/sec
751   x11perf -copyplane100         4400.0/sec      6700.0/sec
752   x11perf -putimagexy100         138.0/sec       191.0/sec
753
754*/
755static void R128SetupForScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
756							   int fg, int bg,
757							   int rop,
758							   unsigned int
759							   planemask)
760{
761    R128InfoPtr   info      = R128PTR(pScrn);
762    unsigned char *R128MMIO = info->MMIO;
763
764    R128WaitForFifo(pScrn, 4);
765#if X_BYTE_ORDER == X_LITTLE_ENDIAN
766    OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
767				     | R128_GMC_DST_CLIPPING
768				     | R128_GMC_BRUSH_NONE
769				     | (bg == -1
770					? R128_GMC_SRC_DATATYPE_MONO_FG_LA
771					: R128_GMC_SRC_DATATYPE_MONO_FG_BG)
772				     | R128_ROP[rop].rop
773				     | R128_GMC_BYTE_LSB_TO_MSB
774				     | R128_DP_SRC_SOURCE_HOST_DATA));
775#else	/* X_BYTE_ORDER == X_BIG_ENDIAN */
776    OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
777				     | R128_GMC_DST_CLIPPING
778				     | R128_GMC_BRUSH_NONE
779				     | (bg == -1
780					? R128_GMC_SRC_DATATYPE_MONO_FG_LA
781					: R128_GMC_SRC_DATATYPE_MONO_FG_BG)
782				     | R128_ROP[rop].rop
783				     | R128_DP_SRC_SOURCE_HOST_DATA));
784#endif
785    OUTREG(R128_DP_WRITE_MASK,      planemask);
786    OUTREG(R128_DP_SRC_FRGD_CLR,    fg);
787    OUTREG(R128_DP_SRC_BKGD_CLR,    bg);
788}
789
790/* Subsequent XAA indirect CPU-to-screen color expansion.  This is only
791   called once for each rectangle. */
792static void R128SubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
793							     int x, int y,
794							     int w, int h,
795							     int skipleft)
796{
797    R128InfoPtr   info      = R128PTR(pScrn);
798    unsigned char *R128MMIO = info->MMIO;
799    int x1clip = x+skipleft;
800    int x2clip = x+w;
801
802    info->scanline_h      = h;
803    info->scanline_words  = (w + 31) >> 5;
804
805#if 0
806    /* Seems as though the Rage128's doesn't like blitting directly
807     * as we must be overwriting something too quickly, therefore we
808     * render to the buffer first and then blit */
809    if ((info->scanline_words * h) <= 9) {
810	/* Turn on direct for less than 9 dword colour expansion */
811	info->scratch_buffer[0]
812	    = (unsigned char *)(ADDRREG(R128_HOST_DATA_LAST)
813				- (info->scanline_words - 1));
814	info->scanline_direct = 1;
815    } else
816#endif
817    {
818	/* Use indirect for anything else */
819	info->scratch_buffer[0] = info->scratch_save;
820	info->scanline_direct   = 0;
821    }
822
823    if (pScrn->bitsPerPixel == 24) {
824	x1clip *= 3;
825	x2clip *= 3;
826    }
827
828    R128WaitForFifo(pScrn, 4 + (info->scanline_direct ?
829					(info->scanline_words * h) : 0) );
830    OUTREG(R128_SC_TOP_LEFT,     (y << 16)       | (x1clip & 0xffff));
831    OUTREG(R128_SC_BOTTOM_RIGHT, ((y+h-1) << 16) | ((x2clip-1) & 0xffff));
832    OUTREG(R128_DST_Y_X,         (y << 16)       | (x & 0xffff));
833    /* Have to pad the width here and use clipping engine */
834    OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16)      | ((w + 31) & ~31));
835}
836
837/* Subsequent XAA indirect CPU-to-screen color expansion.  This is called
838   once for each scanline. */
839static void R128SubsequentColorExpandScanline(ScrnInfoPtr pScrn, int bufno)
840{
841    R128InfoPtr     info      = R128PTR(pScrn);
842    unsigned char   *R128MMIO = info->MMIO;
843    uint32_t        *p        = (pointer)info->scratch_buffer[bufno];
844    int             i;
845    int             left      = info->scanline_words;
846    volatile uint32_t *d;
847
848    if (info->scanline_direct) return;
849    --info->scanline_h;
850    while (left) {
851        write_mem_barrier();
852	if (left <= 8) {
853	  /* Last scanline - finish write to DATA_LAST */
854	  if (info->scanline_h == 0) {
855	    R128WaitForFifo(pScrn, left);
856				/* Unrolling doesn't improve performance */
857	    for (d = ADDRREG(R128_HOST_DATA_LAST) - (left - 1); left; --left)
858		*d++ = *p++;
859	    return;
860	  } else {
861	    R128WaitForFifo(pScrn, left);
862				/* Unrolling doesn't improve performance */
863	    for (d = ADDRREG(R128_HOST_DATA7) - (left - 1); left; --left)
864		*d++ = *p++;
865	  }
866	} else {
867	    R128WaitForFifo(pScrn, 8);
868				/* Unrolling doesn't improve performance */
869	    for (d = ADDRREG(R128_HOST_DATA0), i = 0; i < 8; i++)
870		*d++ = *p++;
871	    left -= 8;
872	}
873    }
874}
875
876/* Setup for XAA indirect image write.
877
878   1024x768@76Hz 8bpp
879                             Without             With
880   x11perf -putimage10   37500.0/sec      39300.0/sec
881   x11perf -putimage100   2150.0/sec       1170.0/sec
882   x11perf -putimage500    108.0/sec         49.8/sec
883 */
884static void R128SetupForScanlineImageWrite(ScrnInfoPtr pScrn,
885					   int rop,
886					   unsigned int planemask,
887					   int trans_color,
888					   int bpp,
889					   int depth)
890{
891    R128InfoPtr   info      = R128PTR(pScrn);
892    unsigned char *R128MMIO = info->MMIO;
893
894    info->scanline_bpp = bpp;
895
896    R128WaitForFifo(pScrn, 2);
897    OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
898				     | R128_GMC_DST_CLIPPING
899				     | R128_GMC_BRUSH_1X8_COLOR
900				     | R128_GMC_SRC_DATATYPE_COLOR
901				     | R128_ROP[rop].rop
902				     | R128_GMC_BYTE_LSB_TO_MSB
903				     | R128_DP_SRC_SOURCE_HOST_DATA));
904    OUTREG(R128_DP_WRITE_MASK,      planemask);
905
906    if (trans_color != -1) {
907				/* Set up for transparency */
908	R128WaitForFifo(pScrn, 3);
909	OUTREG(R128_CLR_CMP_CLR_SRC, trans_color);
910	OUTREG(R128_CLR_CMP_MASK,    R128_CLR_CMP_MSK);
911	OUTREG(R128_CLR_CMP_CNTL,    (R128_SRC_CMP_NEQ_COLOR
912				      | R128_CLR_CMP_SRC_SOURCE));
913    }
914}
915
916/* Subsequent XAA indirect image write. This is only called once for each
917   rectangle. */
918static void R128SubsequentScanlineImageWriteRect(ScrnInfoPtr pScrn,
919						 int x, int y,
920						 int w, int h,
921						 int skipleft)
922{
923    R128InfoPtr   info      = R128PTR(pScrn);
924    unsigned char *R128MMIO = info->MMIO;
925    int x1clip = x+skipleft;
926    int x2clip = x+w;
927
928    int shift = 0; /* 32bpp */
929
930    if (pScrn->bitsPerPixel == 8) shift = 3;
931    else if (pScrn->bitsPerPixel == 16) shift = 1;
932
933    info->scanline_h      = h;
934    info->scanline_words  = (w * info->scanline_bpp + 31) >> 5;
935
936#if 0
937    /* Seeing as the CPUToScreen doesn't like this, I've done this
938     * here too, as it uses pretty much the same path. */
939    if ((info->scanline_words * h) <= 9) {
940	/* Turn on direct for less than 9 dword colour expansion */
941	info->scratch_buffer[0]
942	    = (unsigned char *)(ADDRREG(R128_HOST_DATA_LAST)
943				- (info->scanline_words - 1));
944	info->scanline_direct = 1;
945    } else
946#endif
947    {
948	/* Use indirect for anything else */
949	info->scratch_buffer[0] = info->scratch_save;
950	info->scanline_direct   = 0;
951    }
952
953    if (pScrn->bitsPerPixel == 24) {
954	x1clip *= 3;
955	x2clip *= 3;
956    }
957
958    R128WaitForFifo(pScrn, 4 + (info->scanline_direct ?
959					(info->scanline_words * h) : 0) );
960    OUTREG(R128_SC_TOP_LEFT,      (y << 16)       | (x1clip & 0xffff));
961    OUTREG(R128_SC_BOTTOM_RIGHT,  ((y+h-1) << 16) | ((x2clip-1) & 0xffff));
962    OUTREG(R128_DST_Y_X,          (y << 16)       | (x & 0xffff));
963    /* Have to pad the width here and use clipping engine */
964    OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16)       | ((w + shift) & ~shift));
965}
966
967/* Subsequent XAA indirect image write.  This is called once for each
968   scanline. */
969static void R128SubsequentImageWriteScanline(ScrnInfoPtr pScrn, int bufno)
970{
971    R128InfoPtr     info      = R128PTR(pScrn);
972    unsigned char   *R128MMIO = info->MMIO;
973    uint32_t        *p        = (pointer)info->scratch_buffer[bufno];
974    int             i;
975    int             left      = info->scanline_words;
976    volatile uint32_t *d;
977
978    if (info->scanline_direct) return;
979    --info->scanline_h;
980    while (left) {
981        write_mem_barrier();
982	if (left <= 8) {
983	  /* Last scanline - finish write to DATA_LAST */
984	  if (info->scanline_h == 0) {
985	    R128WaitForFifo(pScrn, left);
986				/* Unrolling doesn't improve performance */
987	    for (d = ADDRREG(R128_HOST_DATA_LAST) - (left - 1); left; --left)
988		*d++ = *p++;
989	    return;
990	  } else {
991	    R128WaitForFifo(pScrn, left);
992				/* Unrolling doesn't improve performance */
993	    for (d = ADDRREG(R128_HOST_DATA7) - (left - 1); left; --left)
994		*d++ = *p++;
995	  }
996	} else {
997	    R128WaitForFifo(pScrn, 8);
998				/* Unrolling doesn't improve performance */
999	    for (d = ADDRREG(R128_HOST_DATA0), i = 0; i < 8; i++)
1000		*d++ = *p++;
1001	    left -= 8;
1002	}
1003    }
1004}
1005#endif
1006
1007/* Initialize the acceleration hardware. */
1008void R128EngineInit(ScrnInfoPtr pScrn)
1009{
1010    R128InfoPtr   info      = R128PTR(pScrn);
1011    unsigned char *R128MMIO = info->MMIO;
1012
1013    DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1014                        "EngineInit (%d/%d)\n",
1015                        info->CurrentLayout.pixel_code,
1016                        info->CurrentLayout.bitsPerPixel));
1017
1018    OUTREG(R128_SCALE_3D_CNTL, 0);
1019    R128EngineReset(pScrn);
1020
1021    switch (info->CurrentLayout.pixel_code) {
1022    case 8:  info->datatype = 2; break;
1023    case 15: info->datatype = 3; break;
1024    case 16: info->datatype = 4; break;
1025    case 24: info->datatype = 5; break;
1026    case 32: info->datatype = 6; break;
1027    default:
1028    DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1029                        "Unknown depth/bpp = %d/%d (code = %d)\n",
1030                        info->CurrentLayout.depth,
1031                        info->CurrentLayout.bitsPerPixel,
1032                        info->CurrentLayout.pixel_code));
1033    }
1034    info->pitch = (info->CurrentLayout.displayWidth / 8) * (info->CurrentLayout.pixel_bytes == 3 ? 3 : 1);
1035
1036    DEBUG(xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1037                        "Pitch for acceleration = %d\n", info->pitch));
1038
1039    R128WaitForFifo(pScrn, 2);
1040    OUTREG(R128_DEFAULT_OFFSET, pScrn->fbOffset);
1041    OUTREG(R128_DEFAULT_PITCH,  info->pitch);
1042
1043    R128WaitForFifo(pScrn, 4);
1044    OUTREG(R128_AUX_SC_CNTL,             0);
1045    OUTREG(R128_DEFAULT_SC_BOTTOM_RIGHT, (R128_DEFAULT_SC_RIGHT_MAX
1046					  | R128_DEFAULT_SC_BOTTOM_MAX));
1047    OUTREG(R128_SC_TOP_LEFT,             0);
1048    OUTREG(R128_SC_BOTTOM_RIGHT,         (R128_DEFAULT_SC_RIGHT_MAX
1049					  | R128_DEFAULT_SC_BOTTOM_MAX));
1050
1051    info->dp_gui_master_cntl = ((info->datatype << R128_GMC_DST_DATATYPE_SHIFT)
1052				| R128_GMC_CLR_CMP_CNTL_DIS
1053				| R128_GMC_AUX_CLIP_DIS);
1054    R128WaitForFifo(pScrn, 1);
1055    OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
1056				     | R128_GMC_BRUSH_SOLID_COLOR
1057				     | R128_GMC_SRC_DATATYPE_COLOR));
1058
1059    R128WaitForFifo(pScrn, 8);
1060    OUTREG(R128_DST_BRES_ERR,      0);
1061    OUTREG(R128_DST_BRES_INC,      0);
1062    OUTREG(R128_DST_BRES_DEC,      0);
1063    OUTREG(R128_DP_BRUSH_FRGD_CLR, 0xffffffff);
1064    OUTREG(R128_DP_BRUSH_BKGD_CLR, 0x00000000);
1065    OUTREG(R128_DP_SRC_FRGD_CLR,   0xffffffff);
1066    OUTREG(R128_DP_SRC_BKGD_CLR,   0x00000000);
1067    OUTREG(R128_DP_WRITE_MASK,     0xffffffff);
1068
1069    R128WaitForFifo(pScrn, 1);
1070
1071#if X_BYTE_ORDER == X_BIG_ENDIAN
1072    /* FIXME: this is a kludge for texture uploads in the 3D driver. Look at
1073     * how the radeon driver handles HOST_DATA_SWAP if you want to implement
1074     * CCE ImageWrite acceleration or anything needing this bit */
1075#ifdef R128DRI
1076    if (info->directRenderingEnabled)
1077	OUTREGP(R128_DP_DATATYPE, 0, ~R128_HOST_BIG_ENDIAN_EN);
1078    else
1079#endif
1080	OUTREGP(R128_DP_DATATYPE,
1081		R128_HOST_BIG_ENDIAN_EN, ~R128_HOST_BIG_ENDIAN_EN);
1082#else /* X_LITTLE_ENDIAN */
1083    OUTREGP(R128_DP_DATATYPE, 0, ~R128_HOST_BIG_ENDIAN_EN);
1084#endif
1085
1086#ifdef R128DRI
1087    info->sc_left         = 0x00000000;
1088    info->sc_right        = R128_DEFAULT_SC_RIGHT_MAX;
1089    info->sc_top          = 0x00000000;
1090    info->sc_bottom       = R128_DEFAULT_SC_BOTTOM_MAX;
1091
1092    info->re_top_left     = 0x00000000;
1093    info->re_width_height = ((0x7ff << R128_RE_WIDTH_SHIFT) |
1094			     (0x7ff << R128_RE_HEIGHT_SHIFT));
1095
1096    info->aux_sc_cntl     = 0x00000000;
1097#endif
1098
1099    R128WaitForIdle(pScrn);
1100}
1101
1102#ifdef R128DRI
1103
1104#ifdef HAVE_XAA_H
1105
1106/* Setup for XAA SolidFill. */
1107static void R128CCESetupForSolidFill(ScrnInfoPtr pScrn,
1108				     int color, int rop,
1109				     unsigned int planemask)
1110{
1111    R128InfoPtr   info = R128PTR(pScrn);
1112    RING_LOCALS;
1113
1114    R128CCE_REFRESH( pScrn, info );
1115
1116    BEGIN_RING( 8 );
1117
1118    OUT_RING_REG( R128_DP_GUI_MASTER_CNTL,
1119		  (info->dp_gui_master_cntl
1120		   | R128_GMC_BRUSH_SOLID_COLOR
1121		   | R128_GMC_SRC_DATATYPE_COLOR
1122		   | R128_ROP[rop].pattern) );
1123
1124    OUT_RING_REG( R128_DP_BRUSH_FRGD_CLR,  color );
1125    OUT_RING_REG( R128_DP_WRITE_MASK,	   planemask );
1126    OUT_RING_REG( R128_DP_CNTL,		   (R128_DST_X_LEFT_TO_RIGHT |
1127					    R128_DST_Y_TOP_TO_BOTTOM));
1128    ADVANCE_RING();
1129}
1130
1131/* Subsequent XAA SolidFillRect.
1132
1133   Tests: xtest CH06/fllrctngl, xterm
1134*/
1135static void R128CCESubsequentSolidFillRect(ScrnInfoPtr pScrn,
1136					   int x, int y, int w, int h)
1137{
1138    R128InfoPtr   info = R128PTR(pScrn);
1139    RING_LOCALS;
1140
1141    R128CCE_REFRESH( pScrn, info );
1142
1143    BEGIN_RING( 4 );
1144
1145    OUT_RING_REG( R128_DST_Y_X,          (y << 16) | x );
1146    OUT_RING_REG( R128_DST_WIDTH_HEIGHT, (w << 16) | h );
1147
1148    ADVANCE_RING();
1149}
1150
1151/* Setup for XAA screen-to-screen copy.
1152
1153   Tests: xtest CH06/fllrctngl (also tests transparency).
1154*/
1155static void R128CCESetupForScreenToScreenCopy(ScrnInfoPtr pScrn,
1156					       int xdir, int ydir, int rop,
1157					       unsigned int planemask,
1158					       int trans_color)
1159{
1160    R128InfoPtr   info = R128PTR(pScrn);
1161    RING_LOCALS;
1162
1163    R128CCE_REFRESH( pScrn, info );
1164
1165    info->xdir = xdir;
1166    info->ydir = ydir;
1167
1168    BEGIN_RING( 6 );
1169
1170    OUT_RING_REG( R128_DP_GUI_MASTER_CNTL,
1171		  (info->dp_gui_master_cntl
1172		   | R128_GMC_BRUSH_NONE
1173		   | R128_GMC_SRC_DATATYPE_COLOR
1174		   | R128_ROP[rop].rop
1175		   | R128_DP_SRC_SOURCE_MEMORY) );
1176
1177    OUT_RING_REG( R128_DP_WRITE_MASK, planemask );
1178    OUT_RING_REG( R128_DP_CNTL,
1179		  ((xdir >= 0 ? R128_DST_X_LEFT_TO_RIGHT : 0) |
1180		   (ydir >= 0 ? R128_DST_Y_TOP_TO_BOTTOM : 0)) );
1181
1182    ADVANCE_RING();
1183
1184    if (trans_color != -1) {
1185	BEGIN_RING( 6 );
1186
1187	OUT_RING_REG( R128_CLR_CMP_CLR_SRC, trans_color );
1188	OUT_RING_REG( R128_CLR_CMP_MASK,    R128_CLR_CMP_MSK );
1189	OUT_RING_REG( R128_CLR_CMP_CNTL,    (R128_SRC_CMP_NEQ_COLOR |
1190					     R128_CLR_CMP_SRC_SOURCE) );
1191
1192	ADVANCE_RING();
1193    }
1194}
1195
1196/* Subsequent XAA screen-to-screen copy. */
1197static void R128CCESubsequentScreenToScreenCopy(ScrnInfoPtr pScrn,
1198						 int xa, int ya,
1199						 int xb, int yb,
1200						 int w, int h)
1201{
1202    R128InfoPtr   info = R128PTR(pScrn);
1203    RING_LOCALS;
1204
1205    R128CCE_REFRESH( pScrn, info );
1206
1207    if (info->xdir < 0) xa += w - 1, xb += w - 1;
1208    if (info->ydir < 0) ya += h - 1, yb += h - 1;
1209
1210    BEGIN_RING( 6 );
1211
1212    OUT_RING_REG( R128_SRC_Y_X,          (ya << 16) | xa );
1213    OUT_RING_REG( R128_DST_Y_X,          (yb << 16) | xb );
1214    OUT_RING_REG( R128_DST_HEIGHT_WIDTH, (h << 16) | w );
1215
1216    ADVANCE_RING();
1217}
1218
1219
1220/*
1221 * XAA scanline color expansion
1222 *
1223 * We use HOSTDATA_BLT CCE packets, dividing the image in chunks that fit into
1224 * the indirect buffer if necessary.
1225 */
1226static void R128CCESetupForScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
1227							      int fg, int bg,
1228							      int rop,
1229							      unsigned int
1230							      planemask)
1231{
1232    R128InfoPtr   info      = R128PTR(pScrn);
1233    RING_LOCALS;
1234
1235    R128CCE_REFRESH( pScrn, info );
1236
1237    BEGIN_RING( 2 );
1238    OUT_RING_REG(R128_DP_WRITE_MASK,      planemask);
1239    ADVANCE_RING();
1240
1241    info->scanline_rop = rop;
1242    info->scanline_fg  = fg;
1243    info->scanline_bg  = bg;
1244}
1245
1246/* Helper function to write out a HOSTDATA_BLT packet into the indirect buffer
1247   and set the XAA scratch buffer address appropriately */
1248static void R128CCEScanlineCPUToScreenColorExpandFillPacket(ScrnInfoPtr pScrn,
1249							    int bufno)
1250{
1251    R128InfoPtr	info = R128PTR(pScrn);
1252    int chunk_words = info->scanline_hpass * info->scanline_words;
1253    RING_LOCALS;
1254
1255    R128CCE_REFRESH( pScrn, info );
1256
1257    BEGIN_RING( chunk_words+9 );
1258
1259    OUT_RING( CCE_PACKET3( R128_CCE_PACKET3_CNTL_HOSTDATA_BLT, chunk_words+9-2 ) );
1260#if X_BYTE_ORDER == X_LITTLE_ENDIAN
1261    OUT_RING( (info->dp_gui_master_cntl
1262	       | R128_GMC_DST_CLIPPING
1263	       | R128_GMC_BRUSH_NONE
1264	       | (info->scanline_bg == -1
1265		  ? R128_GMC_SRC_DATATYPE_MONO_FG_LA
1266		  : R128_GMC_SRC_DATATYPE_MONO_FG_BG)
1267	       | R128_ROP[info->scanline_rop].rop
1268	       | R128_GMC_BYTE_LSB_TO_MSB
1269	       | R128_DP_SRC_SOURCE_HOST_DATA));
1270#else	/* X_BYTE_ORDER == X_BIG_ENDIAN */
1271    OUT_RING( (info->dp_gui_master_cntl
1272	       | R128_GMC_DST_CLIPPING
1273	       | R128_GMC_BRUSH_NONE
1274	       | (info->scanline_bg == -1
1275		  ? R128_GMC_SRC_DATATYPE_MONO_FG_LA
1276		  : R128_GMC_SRC_DATATYPE_MONO_FG_BG)
1277	       | R128_ROP[info->scanline_rop].rop
1278	       | R128_DP_SRC_SOURCE_HOST_DATA));
1279#endif
1280    OUT_RING( (info->scanline_y << 16) | (info->scanline_x1clip & 0xffff) );
1281    OUT_RING( ((info->scanline_y+info->scanline_hpass-1) << 16) | ((info->scanline_x2clip-1) & 0xffff) );
1282    OUT_RING( info->scanline_fg );
1283    OUT_RING( info->scanline_bg );
1284    OUT_RING( (info->scanline_y << 16) | (info->scanline_x & 0xffff));
1285
1286    /* Have to pad the width here and use clipping engine */
1287    OUT_RING( (info->scanline_hpass << 16)      | ((info->scanline_w + 31) & ~31));
1288
1289    OUT_RING( chunk_words );
1290
1291    info->scratch_buffer[bufno] = (unsigned char *) &__head[__count];
1292    __count += chunk_words;
1293
1294    ADVANCE_RING();
1295
1296    info->scanline_y += info->scanline_hpass;
1297    info->scanline_h -= info->scanline_hpass;
1298
1299    if ( R128_VERBOSE )
1300          xf86DrvMsg( pScrn->scrnIndex, X_INFO,
1301		      "%s: hpass=%d, words=%d => chunk_words=%d, y=%d, h=%d\n",
1302		      __FUNCTION__, info->scanline_hpass, info->scanline_words,
1303		      chunk_words, info->scanline_y, info->scanline_h );
1304}
1305
1306/* Subsequent XAA indirect CPU-to-screen color expansion.  This is only
1307   called once for each rectangle. */
1308static void R128CCESubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
1309								int x, int y,
1310								int w, int h,
1311								int skipleft)
1312{
1313    R128InfoPtr   info      = R128PTR(pScrn);
1314
1315#define BUFSIZE ( R128_BUFFER_SIZE/4-9 )
1316
1317    info->scanline_x      = x;
1318    info->scanline_y      = y;
1319    info->scanline_w      = w;
1320    info->scanline_h      = h;
1321
1322    info->scanline_x1clip = x+skipleft;
1323    info->scanline_x2clip = x+w;
1324
1325    info->scanline_words  = (w + 31) >> 5;
1326    info->scanline_hpass  = min(h,(BUFSIZE/info->scanline_words));
1327
1328    if ( R128_VERBOSE )
1329        xf86DrvMsg( pScrn->scrnIndex, X_INFO,
1330		    "%s: x=%d, y=%d, w=%d, h=%d, skipleft=%d => x1clip=%d, x2clip=%d, hpass=%d, words=%d\n",
1331		    __FUNCTION__, x, y, w, h, skipleft, info->scanline_x1clip, info->scanline_x2clip,
1332		    info->scanline_hpass, info->scanline_words );
1333
1334    R128CCEScanlineCPUToScreenColorExpandFillPacket(pScrn, 0);
1335}
1336
1337/* Subsequent XAA indirect CPU-to-screen color expansion.  This is called
1338   once for each scanline. */
1339static void R128CCESubsequentColorExpandScanline(ScrnInfoPtr pScrn,
1340						 int bufno)
1341{
1342    R128InfoPtr     info      = R128PTR(pScrn);
1343
1344    if ( R128_VERBOSE )
1345        xf86DrvMsg( pScrn->scrnIndex, X_INFO,
1346		    "%s enter: scanline_hpass=%d, scanline_h=%d\n",
1347		    __FUNCTION__, info->scanline_hpass, info->scanline_h );
1348
1349    if (--info->scanline_hpass) {
1350        info->scratch_buffer[bufno] += 4 * info->scanline_words;
1351    }
1352    else if(info->scanline_h) {
1353        info->scanline_hpass = min(info->scanline_h,(BUFSIZE/info->scanline_words));
1354        R128CCEScanlineCPUToScreenColorExpandFillPacket(pScrn, bufno);
1355    }
1356
1357    if ( R128_VERBOSE )
1358        xf86DrvMsg( pScrn->scrnIndex, X_INFO,
1359		    "%s exit: scanline_hpass=%d, scanline_h=%d\n",
1360		    __FUNCTION__, info->scanline_hpass, info->scanline_h );
1361}
1362
1363/* Solid lines */
1364static void R128CCESetupForSolidLine(ScrnInfoPtr pScrn,
1365				  int color, int rop, unsigned int planemask)
1366{
1367    R128InfoPtr   info      = R128PTR(pScrn);
1368    RING_LOCALS;
1369
1370    R128CCE_REFRESH( pScrn, info );
1371
1372    BEGIN_RING( 6 );
1373
1374    OUT_RING_REG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
1375				     | R128_GMC_BRUSH_SOLID_COLOR
1376				     | R128_GMC_SRC_DATATYPE_COLOR
1377				     | R128_ROP[rop].pattern));
1378    OUT_RING_REG(R128_DP_BRUSH_FRGD_CLR,  color);
1379    OUT_RING_REG(R128_DP_WRITE_MASK,      planemask);
1380
1381    ADVANCE_RING();
1382}
1383
1384static void R128CCESubsequentSolidBresenhamLine(ScrnInfoPtr pScrn,
1385					     int x, int y,
1386					     int major, int minor,
1387					     int err, int len, int octant)
1388{
1389    R128InfoPtr   info      = R128PTR(pScrn);
1390    int           flags     = 0;
1391    RING_LOCALS;
1392
1393    R128CCE_REFRESH( pScrn, info );
1394
1395    if (octant & YMAJOR)         flags |= R128_DST_Y_MAJOR;
1396    if (!(octant & XDECREASING)) flags |= R128_DST_X_DIR_LEFT_TO_RIGHT;
1397    if (!(octant & YDECREASING)) flags |= R128_DST_Y_DIR_TOP_TO_BOTTOM;
1398
1399    BEGIN_RING( 12 );
1400
1401    OUT_RING_REG(R128_DP_CNTL_XDIR_YDIR_YMAJOR, flags);
1402    OUT_RING_REG(R128_DST_Y_X,                  (y << 16) | x);
1403    OUT_RING_REG(R128_DST_BRES_ERR,             err);
1404    OUT_RING_REG(R128_DST_BRES_INC,             minor);
1405    OUT_RING_REG(R128_DST_BRES_DEC,             -major);
1406    OUT_RING_REG(R128_DST_BRES_LNTH,            len);
1407
1408    ADVANCE_RING();
1409}
1410
1411static void R128CCESubsequentSolidHorVertLine(ScrnInfoPtr pScrn,
1412					   int x, int y, int len, int dir )
1413{
1414    R128InfoPtr   info      = R128PTR(pScrn);
1415    RING_LOCALS;
1416
1417    R128CCE_REFRESH( pScrn, info );
1418
1419    BEGIN_RING( 2 );
1420
1421    OUT_RING_REG(R128_DP_CNTL, (R128_DST_X_LEFT_TO_RIGHT
1422			  | R128_DST_Y_TOP_TO_BOTTOM));
1423
1424    ADVANCE_RING();
1425
1426    if (dir == DEGREES_0) {
1427	R128CCESubsequentSolidFillRect(pScrn, x, y, len, 1);
1428    } else {
1429	R128CCESubsequentSolidFillRect(pScrn, x, y, 1, len);
1430    }
1431}
1432
1433/* Dashed lines */
1434static void R128CCESetupForDashedLine(ScrnInfoPtr pScrn,
1435				   int fg, int bg,
1436				   int rop, unsigned int planemask,
1437				   int length, unsigned char *pattern)
1438{
1439    R128InfoPtr   info      = R128PTR(pScrn);
1440    uint32_t      pat       = *(uint32_t *)(pointer)pattern;
1441    RING_LOCALS;
1442
1443    R128CCE_REFRESH( pScrn, info );
1444
1445#if X_BYTE_ORDER == X_LITTLE_ENDIAN
1446# define PAT_SHIFT(pat,n) pat << n
1447#else
1448# define PAT_SHIFT(pat,n) pat >> n
1449#endif
1450
1451    switch (length) {
1452    case  2: pat |= PAT_SHIFT(pat,2); /* fall through */
1453    case  4: pat |= PAT_SHIFT(pat,4); /* fall through */
1454    case  8: pat |= PAT_SHIFT(pat,8); /* fall through */
1455    case 16: pat |= PAT_SHIFT(pat,16);
1456    }
1457
1458    BEGIN_RING( 10 );
1459
1460    OUT_RING_REG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
1461				     | (bg == -1
1462					? R128_GMC_BRUSH_32x1_MONO_FG_LA
1463					: R128_GMC_BRUSH_32x1_MONO_FG_BG)
1464				     | R128_ROP[rop].pattern
1465				     | R128_GMC_BYTE_LSB_TO_MSB));
1466    OUT_RING_REG(R128_DP_WRITE_MASK,      planemask);
1467    OUT_RING_REG(R128_DP_BRUSH_FRGD_CLR,  fg);
1468    OUT_RING_REG(R128_DP_BRUSH_BKGD_CLR,  bg);
1469    OUT_RING_REG(R128_BRUSH_DATA0,        pat);
1470
1471    ADVANCE_RING();
1472}
1473
1474static void R128CCESubsequentDashedBresenhamLine(ScrnInfoPtr pScrn,
1475					      int x, int y,
1476					      int major, int minor,
1477					      int err, int len, int octant,
1478					      int phase)
1479{
1480    R128InfoPtr   info      = R128PTR(pScrn);
1481    int           flags     = 0;
1482    RING_LOCALS;
1483
1484    R128CCE_REFRESH( pScrn, info );
1485
1486    if (octant & YMAJOR)         flags |= R128_DST_Y_MAJOR;
1487    if (!(octant & XDECREASING)) flags |= R128_DST_X_DIR_LEFT_TO_RIGHT;
1488    if (!(octant & YDECREASING)) flags |= R128_DST_Y_DIR_TOP_TO_BOTTOM;
1489
1490    BEGIN_RING( 14 );
1491
1492    OUT_RING_REG(R128_DP_CNTL_XDIR_YDIR_YMAJOR, flags);
1493    OUT_RING_REG(R128_DST_Y_X,                  (y << 16) | x);
1494    OUT_RING_REG(R128_BRUSH_Y_X,                (phase << 16) | phase);
1495    OUT_RING_REG(R128_DST_BRES_ERR,             err);
1496    OUT_RING_REG(R128_DST_BRES_INC,             minor);
1497    OUT_RING_REG(R128_DST_BRES_DEC,             -major);
1498    OUT_RING_REG(R128_DST_BRES_LNTH,            len);
1499
1500    ADVANCE_RING();
1501}
1502
1503/* Mono 8x8 pattern color expansion */
1504static void R128CCESetupForMono8x8PatternFill(ScrnInfoPtr pScrn,
1505					   int patternx, int patterny,
1506					   int fg, int bg, int rop,
1507					   unsigned int planemask)
1508{
1509    R128InfoPtr   info      = R128PTR(pScrn);
1510    RING_LOCALS;
1511
1512    R128CCE_REFRESH( pScrn, info );
1513
1514    BEGIN_RING( 12 );
1515
1516    OUT_RING_REG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl
1517				     | (bg == -1
1518					? R128_GMC_BRUSH_8X8_MONO_FG_LA
1519					: R128_GMC_BRUSH_8X8_MONO_FG_BG)
1520				     | R128_ROP[rop].pattern
1521				     | R128_GMC_BYTE_LSB_TO_MSB));
1522    OUT_RING_REG(R128_DP_WRITE_MASK,      planemask);
1523    OUT_RING_REG(R128_DP_BRUSH_FRGD_CLR,  fg);
1524    OUT_RING_REG(R128_DP_BRUSH_BKGD_CLR,  bg);
1525    OUT_RING_REG(R128_BRUSH_DATA0,        patternx);
1526    OUT_RING_REG(R128_BRUSH_DATA1,        patterny);
1527
1528    ADVANCE_RING();
1529}
1530
1531static void R128CCESubsequentMono8x8PatternFillRect(ScrnInfoPtr pScrn,
1532						 int patternx, int patterny,
1533						 int x, int y, int w, int h)
1534{
1535    R128InfoPtr   info      = R128PTR(pScrn);
1536    RING_LOCALS;
1537
1538    R128CCE_REFRESH( pScrn, info );
1539
1540    BEGIN_RING( 6 );
1541
1542    OUT_RING_REG(R128_BRUSH_Y_X,        (patterny << 8) | patternx);
1543    OUT_RING_REG(R128_DST_Y_X,          (y << 16) | x);
1544    OUT_RING_REG(R128_DST_HEIGHT_WIDTH, (h << 16) | w);
1545
1546    ADVANCE_RING();
1547}
1548#endif
1549
1550/* Get an indirect buffer for the CCE 2D acceleration commands.
1551 */
1552drmBufPtr R128CCEGetBuffer( ScrnInfoPtr pScrn )
1553{
1554    R128InfoPtr   info = R128PTR(pScrn);
1555    drmDMAReq dma;
1556    drmBufPtr buf = NULL;
1557    int indx = 0;
1558    int size = 0;
1559    int ret, i = 0;
1560
1561#if 0
1562    /* FIXME: pScrn->pScreen has not been initialized when this is first
1563       called from RADEONSelectBuffer via RADEONDRICPInit.  We could use
1564       the screen index from pScrn, which is initialized, and then get
1565       the screen from screenInfo.screens[index], but that is a hack. */
1566    dma.context = DRIGetContext(pScrn->pScreen);
1567#else
1568    dma.context = 0x00000001; /* This is the X server's context */
1569#endif
1570    dma.send_count = 0;
1571    dma.send_list = NULL;
1572    dma.send_sizes = NULL;
1573    dma.flags = 0;
1574    dma.request_count = 1;
1575    dma.request_size = R128_BUFFER_SIZE;
1576    dma.request_list = &indx;
1577    dma.request_sizes = &size;
1578    dma.granted_count = 0;
1579
1580    while ( 1 ) {
1581	do {
1582	    ret = drmDMA( info->drmFD, &dma );
1583	    if ( ret && ret != -EAGAIN ) {
1584		xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
1585			    "%s: CCE GetBuffer %d\n", __FUNCTION__, ret );
1586	    }
1587	} while ( ( ret == -EAGAIN ) && ( i++ < R128_TIMEOUT ) );
1588
1589	if ( ret == 0 ) {
1590	    buf = &info->buffers->list[indx];
1591	    buf->used = 0;
1592	    if ( R128_VERBOSE ) {
1593		xf86DrvMsg( pScrn->scrnIndex, X_INFO,
1594			    "   GetBuffer returning %d\n", buf->idx );
1595	    }
1596	    return buf;
1597	}
1598
1599	xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
1600		    "GetBuffer timed out, resetting engine...\n");
1601	R128EngineReset( pScrn );
1602	/* R128EngineRestore( pScrn ); FIXME ??? */
1603
1604	/* Always restart the engine when doing CCE 2D acceleration */
1605	R128CCE_RESET( pScrn, info );
1606	R128CCE_START( pScrn, info );
1607    }
1608}
1609
1610/* Flush the indirect buffer to the kernel for submission to the card.
1611 */
1612void R128CCEFlushIndirect( ScrnInfoPtr pScrn, int discard )
1613{
1614    R128InfoPtr   info = R128PTR(pScrn);
1615    drmBufPtr buffer = info->indirectBuffer;
1616    int start = info->indirectStart;
1617    drmR128Indirect indirect;
1618
1619    if ( !buffer )
1620	return;
1621
1622    if ( (start == buffer->used) && !discard )
1623        return;
1624
1625    indirect.idx = buffer->idx;
1626    indirect.start = start;
1627    indirect.end = buffer->used;
1628    indirect.discard = discard;
1629
1630    drmCommandWriteRead( info->drmFD, DRM_R128_INDIRECT,
1631                         &indirect, sizeof(drmR128Indirect));
1632
1633    if ( discard )
1634        buffer = info->indirectBuffer = R128CCEGetBuffer( pScrn );
1635
1636    /* pad to an even number of dwords */
1637    if (buffer->used & 7)
1638        buffer->used = ( buffer->used+7 ) & ~7;
1639
1640    info->indirectStart = buffer->used;
1641}
1642
1643/* Flush and release the indirect buffer.
1644 */
1645void R128CCEReleaseIndirect( ScrnInfoPtr pScrn )
1646{
1647    R128InfoPtr   info = R128PTR(pScrn);
1648    drmBufPtr buffer = info->indirectBuffer;
1649    int start = info->indirectStart;
1650    drmR128Indirect indirect;
1651
1652    info->indirectBuffer = NULL;
1653    info->indirectStart = 0;
1654
1655    if ( !buffer )
1656	return;
1657
1658    indirect.idx = buffer->idx;
1659    indirect.start = start;
1660    indirect.end = buffer->used;
1661    indirect.discard = 1;
1662
1663    drmCommandWriteRead( info->drmFD, DRM_R128_INDIRECT,
1664                         &indirect, sizeof(drmR128Indirect));
1665}
1666
1667#ifdef HAVE_XAA_H
1668/* This callback is required for multihead cards using XAA */
1669static
1670void R128RestoreCCEAccelState(ScrnInfoPtr pScrn)
1671{
1672    R128InfoPtr info        = R128PTR(pScrn);
1673/*    unsigned char *R128MMIO = info->MMIO;  needed for OUTREG below */
1674    /*xf86DrvMsg(pScrn->scrnIndex, X_INFO, "===>RestoreCP\n");*/
1675
1676    R128WaitForFifo(pScrn, 1);
1677/* is this needed on r128
1678    OUTREG( R128_DEFAULT_OFFSET, info->frontPitchOffset);
1679*/
1680    R128WaitForIdle(pScrn);
1681
1682    /* FIXME: May need to restore other things,
1683       like BKGD_CLK FG_CLK...*/
1684
1685}
1686
1687static void R128CCEAccelInit(ScrnInfoPtr pScrn, XAAInfoRecPtr a)
1688{
1689    R128InfoPtr info = R128PTR(pScrn);
1690
1691    a->Flags                            = (PIXMAP_CACHE
1692					   | OFFSCREEN_PIXMAPS
1693					   | LINEAR_FRAMEBUFFER);
1694
1695				/* Sync */
1696    a->Sync                             = R128CCEWaitForIdle;
1697
1698    /* Solid Filled Rectangle */
1699    a->PolyFillRectSolidFlags           = 0;
1700    a->SetupForSolidFill                = R128CCESetupForSolidFill;
1701    a->SubsequentSolidFillRect          = R128CCESubsequentSolidFillRect;
1702
1703				/* Screen-to-screen Copy */
1704				/* Transparency uses the wrong colors for
1705				   24 bpp mode -- the transparent part is
1706				   correct, but the opaque color is wrong.
1707				   This can be seen with netscape's I-bar
1708				   cursor when editing in the URL location
1709				   box. */
1710    a->ScreenToScreenCopyFlags          = ((pScrn->bitsPerPixel == 24)
1711					   ? NO_TRANSPARENCY
1712					   : 0);
1713    a->SetupForScreenToScreenCopy       = R128CCESetupForScreenToScreenCopy;
1714    a->SubsequentScreenToScreenCopy     = R128CCESubsequentScreenToScreenCopy;
1715
1716				/* Indirect CPU-To-Screen Color Expand */
1717    a->ScanlineCPUToScreenColorExpandFillFlags = LEFT_EDGE_CLIPPING
1718					       | LEFT_EDGE_CLIPPING_NEGATIVE_X;
1719    a->NumScanlineColorExpandBuffers   = 1;
1720    a->ScanlineColorExpandBuffers      = info->scratch_buffer;
1721    info->scratch_buffer[0]            = NULL;
1722    a->SetupForScanlineCPUToScreenColorExpandFill
1723	= R128CCESetupForScanlineCPUToScreenColorExpandFill;
1724    a->SubsequentScanlineCPUToScreenColorExpandFill
1725	= R128CCESubsequentScanlineCPUToScreenColorExpandFill;
1726    a->SubsequentColorExpandScanline   = R128CCESubsequentColorExpandScanline;
1727
1728				/* Bresenham Solid Lines */
1729    a->SetupForSolidLine               = R128CCESetupForSolidLine;
1730    a->SubsequentSolidBresenhamLine    = R128CCESubsequentSolidBresenhamLine;
1731    a->SubsequentSolidHorVertLine      = R128CCESubsequentSolidHorVertLine;
1732
1733				/* Bresenham Dashed Lines*/
1734    a->SetupForDashedLine              = R128CCESetupForDashedLine;
1735    a->SubsequentDashedBresenhamLine   = R128CCESubsequentDashedBresenhamLine;
1736    a->DashPatternMaxLength            = 32;
1737    a->DashedLineFlags                 = (LINE_PATTERN_LSBFIRST_LSBJUSTIFIED
1738					  | LINE_PATTERN_POWER_OF_2_ONLY);
1739
1740				/* Mono 8x8 Pattern Fill (Color Expand) */
1741    a->SetupForMono8x8PatternFill       = R128CCESetupForMono8x8PatternFill;
1742    a->SubsequentMono8x8PatternFillRect = R128CCESubsequentMono8x8PatternFillRect;
1743    a->Mono8x8PatternFillFlags          = (HARDWARE_PATTERN_PROGRAMMED_BITS
1744					   | HARDWARE_PATTERN_PROGRAMMED_ORIGIN
1745					   | HARDWARE_PATTERN_SCREEN_ORIGIN
1746					   | BIT_ORDER_IN_BYTE_LSBFIRST);
1747
1748    if (xf86IsEntityShared(info->pEnt->index))
1749        a->RestoreAccelState           = R128RestoreCCEAccelState;
1750
1751}
1752#endif
1753#endif
1754
1755#ifdef HAVE_XAA_H
1756/* This callback is required for multihead cards using XAA */
1757static
1758void R128RestoreAccelState(ScrnInfoPtr pScrn)
1759{
1760    R128InfoPtr info        = R128PTR(pScrn);
1761    unsigned char *R128MMIO = info->MMIO;
1762
1763    R128WaitForFifo(pScrn, 2);
1764    OUTREG(R128_DEFAULT_OFFSET, pScrn->fbOffset);
1765    OUTREG(R128_DEFAULT_PITCH,  info->pitch);
1766
1767    /* FIXME: May need to restore other things,
1768       like BKGD_CLK FG_CLK...*/
1769
1770    R128WaitForIdle(pScrn);
1771
1772}
1773
1774static void R128MMIOAccelInit(ScrnInfoPtr pScrn, XAAInfoRecPtr a)
1775{
1776    R128InfoPtr info = R128PTR(pScrn);
1777
1778    a->Flags                            = (PIXMAP_CACHE
1779					   | OFFSCREEN_PIXMAPS
1780					   | LINEAR_FRAMEBUFFER);
1781
1782				/* Sync */
1783    a->Sync                             = R128WaitForIdle;
1784
1785				/* Solid Filled Rectangle */
1786    a->PolyFillRectSolidFlags           = 0;
1787    a->SetupForSolidFill                = R128SetupForSolidFill;
1788    a->SubsequentSolidFillRect          = R128SubsequentSolidFillRect;
1789
1790				/* Screen-to-screen Copy */
1791				/* Transparency uses the wrong colors for
1792				   24 bpp mode -- the transparent part is
1793				   correct, but the opaque color is wrong.
1794				   This can be seen with netscape's I-bar
1795				   cursor when editing in the URL location
1796				   box. */
1797    a->ScreenToScreenCopyFlags          = ((pScrn->bitsPerPixel == 24)
1798					   ? NO_TRANSPARENCY
1799					   : 0);
1800    a->SetupForScreenToScreenCopy       = R128SetupForScreenToScreenCopy;
1801    a->SubsequentScreenToScreenCopy     = R128SubsequentScreenToScreenCopy;
1802
1803				/* Mono 8x8 Pattern Fill (Color Expand) */
1804    a->SetupForMono8x8PatternFill       = R128SetupForMono8x8PatternFill;
1805    a->SubsequentMono8x8PatternFillRect = R128SubsequentMono8x8PatternFillRect;
1806    a->Mono8x8PatternFillFlags          = (HARDWARE_PATTERN_PROGRAMMED_BITS
1807					   | HARDWARE_PATTERN_PROGRAMMED_ORIGIN
1808					   | HARDWARE_PATTERN_SCREEN_ORIGIN
1809					   | BIT_ORDER_IN_BYTE_LSBFIRST);
1810
1811				/* Indirect CPU-To-Screen Color Expand */
1812    a->ScanlineCPUToScreenColorExpandFillFlags = LEFT_EDGE_CLIPPING
1813					       | LEFT_EDGE_CLIPPING_NEGATIVE_X;
1814    a->NumScanlineColorExpandBuffers   = 1;
1815    a->ScanlineColorExpandBuffers      = info->scratch_buffer;
1816    info->scratch_save                 = malloc(((pScrn->virtualX+31)/32*4)
1817					    + (pScrn->virtualX
1818					    * info->CurrentLayout.pixel_bytes));
1819    info->scratch_buffer[0]            = info->scratch_save;
1820    a->SetupForScanlineCPUToScreenColorExpandFill
1821	= R128SetupForScanlineCPUToScreenColorExpandFill;
1822    a->SubsequentScanlineCPUToScreenColorExpandFill
1823	= R128SubsequentScanlineCPUToScreenColorExpandFill;
1824    a->SubsequentColorExpandScanline   = R128SubsequentColorExpandScanline;
1825
1826				/* Bresenham Solid Lines */
1827    a->SetupForSolidLine               = R128SetupForSolidLine;
1828    a->SubsequentSolidBresenhamLine    = R128SubsequentSolidBresenhamLine;
1829    a->SubsequentSolidHorVertLine      = R128SubsequentSolidHorVertLine;
1830
1831				/* Bresenham Dashed Lines*/
1832    a->SetupForDashedLine              = R128SetupForDashedLine;
1833    a->SubsequentDashedBresenhamLine   = R128SubsequentDashedBresenhamLine;
1834    a->DashPatternMaxLength            = 32;
1835    a->DashedLineFlags                 = (LINE_PATTERN_LSBFIRST_LSBJUSTIFIED
1836					  | LINE_PATTERN_POWER_OF_2_ONLY);
1837
1838				/* ImageWrite */
1839    a->NumScanlineImageWriteBuffers    = 1;
1840    a->ScanlineImageWriteBuffers       = info->scratch_buffer;
1841    info->scratch_buffer[0]            = info->scratch_save;
1842    a->SetupForScanlineImageWrite      = R128SetupForScanlineImageWrite;
1843    a->SubsequentScanlineImageWriteRect= R128SubsequentScanlineImageWriteRect;
1844    a->SubsequentImageWriteScanline    = R128SubsequentImageWriteScanline;
1845    a->ScanlineImageWriteFlags         = CPU_TRANSFER_PAD_DWORD
1846		/* Performance tests show that we shouldn't use GXcopy for
1847		 * uploads as a memcpy is faster */
1848					  | NO_GXCOPY
1849					  | LEFT_EDGE_CLIPPING
1850					  | LEFT_EDGE_CLIPPING_NEGATIVE_X
1851					  | SCANLINE_PAD_DWORD;
1852
1853    if (xf86IsEntityShared(info->pEnt->index)) {
1854        /* If there are more than one devices sharing this entity, we
1855         * have to assign this call back, otherwise the XAA will be
1856         * disabled.
1857	 */
1858        if (xf86GetNumEntityInstances(info->pEnt->index) > 1)
1859            a->RestoreAccelState           = R128RestoreAccelState;
1860    }
1861
1862}
1863#endif
1864
1865void R128CopySwap(uint8_t *dst, uint8_t *src, unsigned int size, int swap)
1866{
1867    switch(swap) {
1868    case APER_0_BIG_ENDIAN_32BPP_SWAP:
1869	{
1870	    unsigned int *d = (unsigned int *)dst;
1871	    unsigned int *s = (unsigned int *)src;
1872	    unsigned int nwords = size >> 2;
1873
1874	    for (; nwords > 0; --nwords, ++d, ++s)
1875#ifdef __powerpc__
1876		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
1877#else
1878		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
1879			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
1880#endif
1881	    return;
1882	}
1883    case APER_0_BIG_ENDIAN_16BPP_SWAP:
1884	{
1885	    unsigned short *d = (unsigned short *)dst;
1886	    unsigned short *s = (unsigned short *)src;
1887	    unsigned int nwords = size >> 1;
1888
1889	    for (; nwords > 0; --nwords, ++d, ++s)
1890#ifdef __powerpc__
1891		asm volatile("sthbrx %0,0,%1" : : "r" (*s), "r" (d));
1892#else
1893	        *d = (*s >> 8) | (*s << 8);
1894#endif
1895	    return;
1896	}
1897    }
1898    if (src != dst)
1899	memcpy(dst, src, size);
1900}
1901
1902/* Initialize XAA for supported acceleration and also initialize the
1903   graphics hardware for acceleration. */
1904#ifdef HAVE_XAA_H
1905Bool
1906R128XAAAccelInit(ScreenPtr pScreen)
1907{
1908    ScrnInfoPtr   pScrn = xf86ScreenToScrn(pScreen);
1909    R128InfoPtr   info  = R128PTR(pScrn);
1910    XAAInfoRecPtr a;
1911
1912    if (!(a = info->accel = XAACreateInfoRec())) return FALSE;
1913
1914#ifdef R128DRI
1915    if (info->directRenderingEnabled)
1916        R128CCEAccelInit(pScrn, a);
1917    else
1918#endif
1919    R128MMIOAccelInit(pScrn, a);
1920
1921    R128EngineInit(pScrn);
1922    return XAAInit(pScreen, a);
1923}
1924#endif
1925