1/*
2 * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
3 *                VA Linux Systems Inc., Fremont, California.
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation on the rights to use, copy, modify, merge,
11 * publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so,
13 * subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
23 * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33/*
34 * Authors:
35 *   Kevin E. Martin <martin@xfree86.org>
36 *   Rickard E. Faith <faith@valinux.com>
37 *   Alan Hourihane <alanh@fairlite.demon.co.uk>
38 *   Michel Dänzer <michel@daenzer.net>
39 *
40 * Credits:
41 *
42 *   Thanks to Ani Joshi <ajoshi@shell.unixbox.com> for providing source
43 *   code to his Radeon driver.  Portions of this file are based on the
44 *   initialization code for that driver.
45 *
46 * References:
47 *
48 * !!!! FIXME !!!!
49 *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
50 *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
51 *   1999.
52 *
53 *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
54 *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
55 *
56 * Notes on unimplemented XAA optimizations:
57 *
58 *   SetClipping:   This has been removed as XAA expects 16bit registers
59 *                  for full clipping.
60 *   TwoPointLine:  The Radeon supports this. Not Bresenham.
61 *   DashedLine with non-power-of-two pattern length: Apparently, there is
62 *                  no way to set the length of the pattern -- it is always
63 *                  assumed to be 8 or 32 (or 1024?).
64 *   ScreenToScreenColorExpandFill: See p. 4-17 of the Technical Reference
65 *                  Manual where it states that monochrome expansion of frame
66 *                  buffer data is not supported.
67 *   CPUToScreenColorExpandFill, direct: The implementation here uses a hybrid
68 *                  direct/indirect method.  If we had more data registers,
69 *                  then we could do better.  If XAA supported a trigger write
70 *                  address, the code would be simpler.
71 *   Color8x8PatternFill: Apparently, an 8x8 color brush cannot take an 8x8
72 *                  pattern from frame buffer memory.
73 *   ImageWrites:   Same as CPUToScreenColorExpandFill
74 *
75 */
76
77#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
78#error Cannot define both MMIO and CP acceleration!
79#endif
80
81#if !defined(UNIXCPP) || defined(ANSICPP)
82#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
83#else
84#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
85#endif
86
87#ifdef ACCEL_MMIO
88#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
89#else
90#ifdef ACCEL_CP
91#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
92#else
93#error No accel type defined!
94#endif
95#endif
96
97#ifdef USE_XAA
98
99/* This callback is required for multiheader cards using XAA */
100static void
101FUNC_NAME(RADEONRestoreAccelState)(ScrnInfoPtr pScrn)
102{
103    /*RADEONInfoPtr  info       = RADEONPTR(pScrn);
104    unsigned char *RADEONMMIO = info->MMIO;*/
105
106#ifdef ACCEL_MMIO
107
108/*    OUTREG(RADEON_DEFAULT_OFFSET, info->dst_pitch_offset);*/
109    /* FIXME: May need to restore other things, like BKGD_CLK FG_CLK... */
110
111    RADEONWaitForIdleMMIO(pScrn);
112
113#else /* ACCEL_CP */
114
115/*    RADEONWaitForFifo(pScrn, 1);
116    OUTREG(RADEON_DEFAULT_OFFSET, info->frontPitchOffset);*/
117
118    RADEONWaitForIdleMMIO(pScrn);
119
120#if 0
121    /* Not working yet */
122    RADEONMMIO_TO_CP(pScrn, info);
123#endif
124
125    /* FIXME: May need to restore other things, like BKGD_CLK FG_CLK... */
126#endif
127}
128
129/* Setup for XAA SolidFill */
130static void
131FUNC_NAME(RADEONSetupForSolidFill)(ScrnInfoPtr pScrn,
132				   int color,
133				   int rop,
134				   unsigned int planemask)
135{
136    RADEONInfoPtr  info = RADEONPTR(pScrn);
137    ACCEL_PREAMBLE();
138
139    /* Save for later clipping */
140    info->accel_state->dp_gui_master_cntl_clip = (info->accel_state->dp_gui_master_cntl
141						  | RADEON_GMC_BRUSH_SOLID_COLOR
142						  | RADEON_GMC_SRC_DATATYPE_COLOR
143						  | RADEON_ROP[rop].pattern);
144
145    BEGIN_ACCEL(4);
146
147    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->accel_state->dp_gui_master_cntl_clip);
148    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR,  color);
149    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
150    OUT_ACCEL_REG(RADEON_DP_CNTL,            (RADEON_DST_X_LEFT_TO_RIGHT
151					      | RADEON_DST_Y_TOP_TO_BOTTOM));
152
153    FINISH_ACCEL();
154    BEGIN_ACCEL(2);
155    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
156    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
157                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
158    FINISH_ACCEL();
159}
160
161/* Subsequent XAA SolidFillRect
162 *
163 * Tests: xtest CH06/fllrctngl, xterm
164 */
165static void
166FUNC_NAME(RADEONSubsequentSolidFillRect)(ScrnInfoPtr pScrn,
167					 int x, int y,
168					 int w, int h)
169{
170    RADEONInfoPtr  info = RADEONPTR(pScrn);
171    ACCEL_PREAMBLE();
172
173    BEGIN_ACCEL(3);
174
175    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
176    	((info->tilingEnabled && (y <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
177    OUT_ACCEL_REG(RADEON_DST_Y_X,          (y << 16) | x);
178    OUT_ACCEL_REG(RADEON_DST_WIDTH_HEIGHT, (w << 16) | h);
179
180    FINISH_ACCEL();
181}
182
183/* Setup for XAA solid lines */
184static void
185FUNC_NAME(RADEONSetupForSolidLine)(ScrnInfoPtr pScrn,
186				   int color,
187				   int rop,
188				   unsigned int planemask)
189{
190    RADEONInfoPtr  info = RADEONPTR(pScrn);
191    ACCEL_PREAMBLE();
192
193    /* Save for later clipping */
194    info->accel_state->dp_gui_master_cntl_clip = (info->accel_state->dp_gui_master_cntl
195						  | RADEON_GMC_BRUSH_SOLID_COLOR
196						  | RADEON_GMC_SRC_DATATYPE_COLOR
197						  | RADEON_ROP[rop].pattern);
198
199    if (info->ChipFamily >= CHIP_FAMILY_RV200) {
200	BEGIN_ACCEL(1);
201	OUT_ACCEL_REG(RADEON_DST_LINE_PATCOUNT,
202		      0x55 << RADEON_BRES_CNTL_SHIFT);
203	FINISH_ACCEL();
204    }
205
206    BEGIN_ACCEL(3);
207
208    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->accel_state->dp_gui_master_cntl_clip);
209    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR,  color);
210    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
211
212    FINISH_ACCEL();
213    BEGIN_ACCEL(2);
214    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
215    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
216                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
217    FINISH_ACCEL();
218}
219
220/* Subsequent XAA solid horizontal and vertical lines */
221static void
222FUNC_NAME(RADEONSubsequentSolidHorVertLine)(ScrnInfoPtr pScrn,
223					    int x, int y,
224					    int len,
225					    int dir)
226{
227    RADEONInfoPtr  info = RADEONPTR(pScrn);
228    int            w    = 1;
229    int            h    = 1;
230    ACCEL_PREAMBLE();
231
232    if (dir == DEGREES_0) w = len;
233    else                  h = len;
234
235    BEGIN_ACCEL(4);
236
237    OUT_ACCEL_REG(RADEON_DP_CNTL,          (RADEON_DST_X_LEFT_TO_RIGHT
238					    | RADEON_DST_Y_TOP_TO_BOTTOM));
239    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
240    	((info->tilingEnabled && (y <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
241    OUT_ACCEL_REG(RADEON_DST_Y_X,          (y << 16) | x);
242    OUT_ACCEL_REG(RADEON_DST_WIDTH_HEIGHT, (w << 16) | h);
243
244    FINISH_ACCEL();
245}
246
247/* Subsequent XAA solid TwoPointLine line
248 *
249 * Tests: xtest CH06/drwln, ico, Mark Vojkovich's linetest program
250 *
251 * [See http://www.xfree86.org/devel/archives/devel/1999-Jun/0102.shtml for
252 * Mark Vojkovich's linetest program, posted 2Jun99 to devel@xfree86.org.]
253 */
254static void
255FUNC_NAME(RADEONSubsequentSolidTwoPointLine)(ScrnInfoPtr pScrn,
256					     int xa, int ya,
257					     int xb, int yb,
258					     int flags)
259{
260    RADEONInfoPtr  info = RADEONPTR(pScrn);
261    ACCEL_PREAMBLE();
262
263    /* TODO: Check bounds -- RADEON only has 14 bits */
264
265    if (!(flags & OMIT_LAST))
266	FUNC_NAME(RADEONSubsequentSolidHorVertLine)(pScrn,
267						    xb, yb, 1,
268						    DEGREES_0);
269
270    BEGIN_ACCEL(3);
271
272    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
273    	((info->tilingEnabled && (ya <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
274    OUT_ACCEL_REG(RADEON_DST_LINE_START, (ya << 16) | xa);
275    OUT_ACCEL_REG(RADEON_DST_LINE_END,   (yb << 16) | xb);
276
277    FINISH_ACCEL();
278}
279
280/* Setup for XAA dashed lines
281 *
282 * Tests: xtest CH05/stdshs, XFree86/drwln
283 *
284 * NOTE: Since we can only accelerate lines with power-of-2 patterns of
285 * length <= 32
286 */
287static void
288FUNC_NAME(RADEONSetupForDashedLine)(ScrnInfoPtr pScrn,
289				    int fg,
290				    int bg,
291				    int rop,
292				    unsigned int planemask,
293				    int length,
294				    unsigned char *pattern)
295{
296    RADEONInfoPtr  info = RADEONPTR(pScrn);
297    uint32_t pat  = *(uint32_t *)(pointer)pattern;
298    ACCEL_PREAMBLE();
299
300    /* Save for determining whether or not to draw last pixel */
301    info->accel_state->dashLen = length;
302    info->accel_state->dashPattern = pat;
303
304#if X_BYTE_ORDER == X_BIG_ENDIAN
305# define PAT_SHIFT(pat, shift) (pat >> shift)
306#else
307# define PAT_SHIFT(pat, shift) (pat << shift)
308#endif
309
310    switch (length) {
311    case  2: pat |= PAT_SHIFT(pat,  2);  /* fall through */
312    case  4: pat |= PAT_SHIFT(pat,  4);  /* fall through */
313    case  8: pat |= PAT_SHIFT(pat,  8);  /* fall through */
314    case 16: pat |= PAT_SHIFT(pat, 16);
315    }
316
317    /* Save for later clipping */
318    info->accel_state->dp_gui_master_cntl_clip = (info->accel_state->dp_gui_master_cntl
319						  | (bg == -1
320						     ? RADEON_GMC_BRUSH_32x1_MONO_FG_LA
321						     : RADEON_GMC_BRUSH_32x1_MONO_FG_BG)
322						  | RADEON_ROP[rop].pattern
323						  | RADEON_GMC_BYTE_LSB_TO_MSB);
324    info->accel_state->dash_fg = fg;
325    info->accel_state->dash_bg = bg;
326
327    BEGIN_ACCEL((bg == -1) ? 4 : 5);
328
329    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->accel_state->dp_gui_master_cntl_clip);
330    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
331    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR,  fg);
332    if (bg != -1)
333	OUT_ACCEL_REG(RADEON_DP_BRUSH_BKGD_CLR, bg);
334    OUT_ACCEL_REG(RADEON_BRUSH_DATA0,        pat);
335
336    FINISH_ACCEL();
337    BEGIN_ACCEL(2);
338    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
339    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
340                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
341    FINISH_ACCEL();
342}
343
344/* Helper function to draw last point for dashed lines */
345static void
346FUNC_NAME(RADEONDashedLastPel)(ScrnInfoPtr pScrn,
347			       int x, int y,
348			       int fg)
349{
350    RADEONInfoPtr  info = RADEONPTR(pScrn);
351    uint32_t dp_gui_master_cntl = info->accel_state->dp_gui_master_cntl_clip;
352    ACCEL_PREAMBLE();
353
354    dp_gui_master_cntl &= ~RADEON_GMC_BRUSH_DATATYPE_MASK;
355    dp_gui_master_cntl |=  RADEON_GMC_BRUSH_SOLID_COLOR;
356
357    dp_gui_master_cntl &= ~RADEON_GMC_SRC_DATATYPE_MASK;
358    dp_gui_master_cntl |=  RADEON_GMC_SRC_DATATYPE_COLOR;
359
360    BEGIN_ACCEL(8);
361
362    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, dp_gui_master_cntl);
363    OUT_ACCEL_REG(RADEON_DP_CNTL,            (RADEON_DST_X_LEFT_TO_RIGHT
364					      | RADEON_DST_Y_TOP_TO_BOTTOM));
365    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
366    	((info->tilingEnabled && (y <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
367    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR,  fg);
368    OUT_ACCEL_REG(RADEON_DST_Y_X,            (y << 16) | x);
369    OUT_ACCEL_REG(RADEON_DST_WIDTH_HEIGHT,   (1 << 16) | 1);
370
371    /* Restore old values */
372    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->accel_state->dp_gui_master_cntl_clip);
373    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR,  info->accel_state->dash_fg);
374
375    FINISH_ACCEL();
376    BEGIN_ACCEL(2);
377    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
378    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
379                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
380    FINISH_ACCEL();
381}
382
383/* Subsequent XAA dashed line */
384static void
385FUNC_NAME(RADEONSubsequentDashedTwoPointLine)(ScrnInfoPtr pScrn,
386					      int xa, int ya,
387					      int xb, int yb,
388					      int flags,
389					      int phase)
390{
391    RADEONInfoPtr  info = RADEONPTR(pScrn);
392    ACCEL_PREAMBLE();
393
394    /* TODO: Check bounds -- RADEON only has 14 bits */
395
396    if (!(flags & OMIT_LAST)) {
397	int deltax = abs(xa - xb);
398	int deltay = abs(ya - yb);
399	int shift;
400
401	if (deltax > deltay) shift = deltax;
402	else                 shift = deltay;
403
404	shift += phase;
405	shift %= info->accel_state->dashLen;
406
407	if ((info->accel_state->dashPattern >> shift) & 1)
408	    FUNC_NAME(RADEONDashedLastPel)(pScrn, xb, yb, info->accel_state->dash_fg);
409	else if (info->accel_state->dash_bg != -1)
410	    FUNC_NAME(RADEONDashedLastPel)(pScrn, xb, yb, info->accel_state->dash_bg);
411    }
412
413    BEGIN_ACCEL(4);
414
415    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
416    	((info->tilingEnabled && (ya <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
417    OUT_ACCEL_REG(RADEON_DST_LINE_START,   (ya << 16) | xa);
418    OUT_ACCEL_REG(RADEON_DST_LINE_PATCOUNT, phase);
419    OUT_ACCEL_REG(RADEON_DST_LINE_END,     (yb << 16) | xb);
420
421    FINISH_ACCEL();
422}
423
424/* Set up for transparency
425 *
426 * Mmmm, Seems as though the transparency compare is opposite to r128.
427 * It should only draw when source != trans_color, this is the opposite
428 * of that.
429 */
430static void
431FUNC_NAME(RADEONSetTransparency)(ScrnInfoPtr pScrn,
432				 int trans_color)
433{
434    RADEONInfoPtr  info = RADEONPTR(pScrn);
435
436    if ((trans_color != -1) || (info->accel_state->XAAForceTransBlit == TRUE)) {
437	ACCEL_PREAMBLE();
438
439	BEGIN_ACCEL(3);
440
441	OUT_ACCEL_REG(RADEON_CLR_CMP_CLR_SRC, trans_color);
442	OUT_ACCEL_REG(RADEON_CLR_CMP_MASK,    RADEON_CLR_CMP_MSK);
443	OUT_ACCEL_REG(RADEON_CLR_CMP_CNTL,    (RADEON_SRC_CMP_EQ_COLOR
444					       | RADEON_CLR_CMP_SRC_SOURCE));
445
446	FINISH_ACCEL();
447    }
448}
449
450/* Setup for XAA screen-to-screen copy
451 *
452 * Tests: xtest CH06/fllrctngl (also tests transparency)
453 */
454static void
455FUNC_NAME(RADEONSetupForScreenToScreenCopy)(ScrnInfoPtr pScrn,
456					    int xdir, int ydir,
457					    int rop,
458					    unsigned int planemask,
459					    int trans_color)
460{
461    RADEONInfoPtr  info = RADEONPTR(pScrn);
462    ACCEL_PREAMBLE();
463
464    info->accel_state->xdir = xdir;
465    info->accel_state->ydir = ydir;
466
467    /* Save for later clipping */
468    info->accel_state->dp_gui_master_cntl_clip = (info->accel_state->dp_gui_master_cntl
469						  | RADEON_GMC_BRUSH_NONE
470						  | RADEON_GMC_SRC_DATATYPE_COLOR
471						  | RADEON_ROP[rop].rop
472						  | RADEON_DP_SRC_SOURCE_MEMORY
473						  | RADEON_GMC_SRC_PITCH_OFFSET_CNTL);
474
475    BEGIN_ACCEL(3);
476
477    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->accel_state->dp_gui_master_cntl_clip);
478    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
479    OUT_ACCEL_REG(RADEON_DP_CNTL,
480		  ((xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) |
481		   (ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)));
482
483    FINISH_ACCEL();
484    BEGIN_ACCEL(2);
485    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
486    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
487                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
488    FINISH_ACCEL();
489
490    info->accel_state->trans_color = trans_color;
491    FUNC_NAME(RADEONSetTransparency)(pScrn, trans_color);
492}
493
494/* Subsequent XAA screen-to-screen copy */
495static void
496FUNC_NAME(RADEONSubsequentScreenToScreenCopy)(ScrnInfoPtr pScrn,
497					      int xa, int ya,
498					      int xb, int yb,
499					      int w, int h)
500{
501    RADEONInfoPtr  info = RADEONPTR(pScrn);
502    ACCEL_PREAMBLE();
503
504    if (info->accel_state->xdir < 0) xa += w - 1, xb += w - 1;
505    if (info->accel_state->ydir < 0) ya += h - 1, yb += h - 1;
506
507    BEGIN_ACCEL(5);
508
509    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
510    	((info->tilingEnabled && (ya <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
511    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
512    	((info->tilingEnabled && (yb <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
513    OUT_ACCEL_REG(RADEON_SRC_Y_X,          (ya << 16) | xa);
514    OUT_ACCEL_REG(RADEON_DST_Y_X,          (yb << 16) | xb);
515    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h  << 16) | w);
516
517    FINISH_ACCEL();
518}
519
520/* Setup for XAA mono 8x8 pattern color expansion.  Patterns with
521 * transparency use `bg == -1'.  This routine is only used if the XAA
522 * pixmap cache is turned on.
523 *
524 * Tests: xtest XFree86/fllrctngl (no other test will test this routine with
525 *                                 both transparency and non-transparency)
526 */
527static void
528FUNC_NAME(RADEONSetupForMono8x8PatternFill)(ScrnInfoPtr pScrn,
529					    int patternx,
530					    int patterny,
531					    int fg,
532					    int bg,
533					    int rop,
534					    unsigned int planemask)
535{
536    RADEONInfoPtr  info = RADEONPTR(pScrn);
537#if X_BYTE_ORDER == X_BIG_ENDIAN
538    unsigned char  pattern[8];
539#endif
540    ACCEL_PREAMBLE();
541
542#if X_BYTE_ORDER == X_BIG_ENDIAN
543    /* Take care of endianness */
544    pattern[0] = (patternx & 0x000000ff);
545    pattern[1] = (patternx & 0x0000ff00) >> 8;
546    pattern[2] = (patternx & 0x00ff0000) >> 16;
547    pattern[3] = (patternx & 0xff000000) >> 24;
548    pattern[4] = (patterny & 0x000000ff);
549    pattern[5] = (patterny & 0x0000ff00) >> 8;
550    pattern[6] = (patterny & 0x00ff0000) >> 16;
551    pattern[7] = (patterny & 0xff000000) >> 24;
552#endif
553
554    /* Save for later clipping */
555    info->accel_state->dp_gui_master_cntl_clip = (info->accel_state->dp_gui_master_cntl
556						  | (bg == -1
557						     ? RADEON_GMC_BRUSH_8X8_MONO_FG_LA
558						     : RADEON_GMC_BRUSH_8X8_MONO_FG_BG)
559						  | RADEON_ROP[rop].pattern
560#if X_BYTE_ORDER == X_LITTLE_ENDIAN
561						  | RADEON_GMC_BYTE_MSB_TO_LSB
562#endif
563						  );
564
565    BEGIN_ACCEL((bg == -1) ? 5 : 6);
566
567    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->accel_state->dp_gui_master_cntl_clip);
568    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
569    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR,  fg);
570    if (bg != -1)
571	OUT_ACCEL_REG(RADEON_DP_BRUSH_BKGD_CLR, bg);
572#if X_BYTE_ORDER == X_LITTLE_ENDIAN
573    OUT_ACCEL_REG(RADEON_BRUSH_DATA0,        patternx);
574    OUT_ACCEL_REG(RADEON_BRUSH_DATA1,        patterny);
575#else
576    OUT_ACCEL_REG(RADEON_BRUSH_DATA0,        *(uint32_t *)(pointer)&pattern[0]);
577    OUT_ACCEL_REG(RADEON_BRUSH_DATA1,        *(uint32_t *)(pointer)&pattern[4]);
578#endif
579
580    FINISH_ACCEL();
581    BEGIN_ACCEL(2);
582    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
583    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
584                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
585    FINISH_ACCEL();
586}
587
588/* Subsequent XAA 8x8 pattern color expansion.  Because they are used in
589 * the setup function, `patternx' and `patterny' are not used here.
590 */
591static void
592FUNC_NAME(RADEONSubsequentMono8x8PatternFillRect)(ScrnInfoPtr pScrn,
593						  int patternx,
594						  int patterny,
595						  int x, int y,
596						  int w, int h)
597{
598    RADEONInfoPtr  info = RADEONPTR(pScrn);
599    ACCEL_PREAMBLE();
600
601    BEGIN_ACCEL(4);
602
603    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
604    	((info->tilingEnabled && (y <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
605    OUT_ACCEL_REG(RADEON_BRUSH_Y_X,        (patterny << 8) | patternx);
606    OUT_ACCEL_REG(RADEON_DST_Y_X,          (y << 16) | x);
607    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w);
608
609    FINISH_ACCEL();
610}
611
612#if 0
613/* Setup for XAA color 8x8 pattern fill
614 *
615 * Tests: xtest XFree86/fllrctngl (with Mono8x8PatternFill off)
616 */
617static void
618FUNC_NAME(RADEONSetupForColor8x8PatternFill)(ScrnInfoPtr pScrn,
619					     int patx, int paty,
620					     int rop,
621					     unsigned int planemask,
622					     int trans_color)
623{
624    RADEONInfoPtr  info = RADEONPTR(pScrn);
625    ACCEL_PREAMBLE();
626
627    /* Save for later clipping */
628    info->accel_state->dp_gui_master_cntl_clip = (info->accel_state->dp_gui_master_cntl
629						  | RADEON_GMC_BRUSH_8x8_COLOR
630						  | RADEON_GMC_SRC_DATATYPE_COLOR
631						  | RADEON_ROP[rop].pattern
632						  | RADEON_DP_SRC_SOURCE_MEMORY);
633
634    BEGIN_ACCEL(3);
635
636    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->accel_state->dp_gui_master_cntl_clip);
637    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
638    OUT_ACCEL_REG(RADEON_SRC_Y_X,            (paty << 16) | patx);
639
640    FINISH_ACCEL();
641
642    info->accel_state->trans_color = trans_color;
643    FUNC_NAME(RADEONSetTransparency)(pScrn, trans_color);
644}
645
646/* Subsequent XAA 8x8 pattern color expansion */
647static void
648FUNC_NAME(RADEONSubsequentColor8x8PatternFillRect)(ScrnInfoPtr pScrn,
649						   int patx, int paty,
650						   int x, int y,
651						   int w, int h)
652{
653    RADEONInfoPtr  info = RADEONPTR(pScrn);
654    ACCEL_PREAMBLE();
655
656    BEGIN_ACCEL(4);
657
658    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
659    	((info->tilingEnabled && (y <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
660    OUT_ACCEL_REG(RADEON_BRUSH_Y_X,        (paty << 16) | patx);
661    OUT_ACCEL_REG(RADEON_DST_Y_X,          (y << 16) | x);
662    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w);
663
664    FINISH_ACCEL();
665}
666#endif
667
668#ifdef ACCEL_CP
669#define CP_BUFSIZE (info->cp->indirectBuffer->total/4-10)
670
671/* Helper function to write out a HOSTDATA_BLT packet into the indirect
672 * buffer and set the XAA scratch buffer address appropriately.
673 */
674static void
675RADEONCPScanlinePacket(ScrnInfoPtr pScrn, int bufno)
676{
677    RADEONInfoPtr info = RADEONPTR(pScrn);
678    int           chunk_words = info->accel_state->scanline_hpass * info->accel_state->scanline_words;
679    ACCEL_PREAMBLE();
680
681    if (RADEON_VERBOSE) {
682	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
683		   "CPScanline Packet h=%d hpass=%d chunkwords=%d\n",
684		   info->accel_state->scanline_h, info->accel_state->scanline_hpass, chunk_words);
685    }
686    BEGIN_RING(chunk_words+10);
687
688    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT,chunk_words+10-2));
689    OUT_RING(info->accel_state->dp_gui_master_cntl_clip);
690    OUT_RING(info->accel_state->dst_pitch_offset |
691    	((info->tilingEnabled && (info->accel_state->scanline_y <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
692    OUT_RING((info->accel_state->scanline_y << 16) |
693	     (info->accel_state->scanline_x1clip & 0xffff));
694    OUT_RING(((info->accel_state->scanline_y+info->accel_state->scanline_hpass) << 16) |
695	     (info->accel_state->scanline_x2clip & 0xffff));
696    OUT_RING(info->accel_state->scanline_fg);
697    OUT_RING(info->accel_state->scanline_bg);
698    OUT_RING((info->accel_state->scanline_y << 16) |
699	     (info->accel_state->scanline_x & 0xffff));
700    OUT_RING((info->accel_state->scanline_hpass << 16) |
701	     (info->accel_state->scanline_w & 0xffff));
702    OUT_RING(chunk_words);
703
704    info->accel_state->scratch_buffer[bufno] = (unsigned char *)&__head[__count];
705    __count += chunk_words;
706
707    /* The ring can only be advanced after the __head and __count have
708       been adjusted above */
709    FINISH_ACCEL();
710
711    info->accel_state->scanline_y += info->accel_state->scanline_hpass;
712    info->accel_state->scanline_h -= info->accel_state->scanline_hpass;
713}
714#endif
715
716/* Setup for XAA indirect CPU-to-screen color expansion (indirect).
717 * Because of how the scratch buffer is initialized, this is really a
718 * mainstore-to-screen color expansion.  Transparency is supported when
719 * `bg == -1'.
720 */
721static void
722FUNC_NAME(RADEONSetupForScanlineCPUToScreenColorExpandFill)(ScrnInfoPtr pScrn,
723							    int fg,
724							    int bg,
725							    int rop,
726							    unsigned int
727							    planemask)
728{
729    RADEONInfoPtr  info = RADEONPTR(pScrn);
730    ACCEL_PREAMBLE();
731
732    info->accel_state->scanline_bpp = 0;
733
734    /* Save for later clipping */
735    info->accel_state->dp_gui_master_cntl_clip = (info->accel_state->dp_gui_master_cntl
736						  | RADEON_GMC_DST_CLIPPING
737						  | RADEON_GMC_BRUSH_NONE
738						  | (bg == -1
739						     ? RADEON_GMC_SRC_DATATYPE_MONO_FG_LA
740						     : RADEON_GMC_SRC_DATATYPE_MONO_FG_BG)
741						  | RADEON_ROP[rop].rop
742#if X_BYTE_ORDER == X_LITTLE_ENDIAN
743						  | RADEON_GMC_BYTE_LSB_TO_MSB
744#else
745						  | RADEON_GMC_BYTE_MSB_TO_LSB
746#endif
747						  | RADEON_DP_SRC_SOURCE_HOST_DATA);
748
749#ifdef ACCEL_MMIO
750
751#if X_BYTE_ORDER == X_LITTLE_ENDIAN
752    BEGIN_ACCEL(4);
753#else
754    BEGIN_ACCEL(5);
755
756    OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,       RADEON_HOST_DATA_SWAP_NONE);
757#endif
758    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->accel_state->dp_gui_master_cntl_clip);
759    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
760    OUT_ACCEL_REG(RADEON_DP_SRC_FRGD_CLR,    fg);
761    OUT_ACCEL_REG(RADEON_DP_SRC_BKGD_CLR,    bg);
762
763#else /* ACCEL_CP */
764
765    info->accel_state->scanline_fg = fg;
766    info->accel_state->scanline_bg = bg;
767
768#if X_BYTE_ORDER == X_LITTLE_ENDIAN
769    BEGIN_ACCEL(1);
770#else
771    if (info->ChipFamily < CHIP_FAMILY_R300) {
772	BEGIN_ACCEL(2);
773
774	OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_32BIT);
775    } else
776	BEGIN_ACCEL(1);
777#endif
778    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
779
780#endif
781
782    FINISH_ACCEL();
783}
784
785/* Subsequent XAA indirect CPU-to-screen color expansion.  This is only
786 * called once for each rectangle.
787 */
788static void
789FUNC_NAME(RADEONSubsequentScanlineCPUToScreenColorExpandFill)(ScrnInfoPtr
790							      pScrn,
791							      int x, int y,
792							      int w, int h,
793							      int skipleft)
794{
795    RADEONInfoPtr  info = RADEONPTR(pScrn);
796#ifdef ACCEL_MMIO
797    ACCEL_PREAMBLE();
798
799    info->accel_state->scanline_h      = h;
800    info->accel_state->scanline_words  = (w + 31) >> 5;
801
802#ifdef __alpha__
803    /* Always use indirect for Alpha */
804    if (0)
805#else
806    if ((info->accel_state->scanline_words * h) <= 9)
807#endif
808    {
809	/* Turn on direct for less than 9 dword colour expansion */
810	info->accel_state->scratch_buffer[0] =
811	    (unsigned char *)(ADDRREG(RADEON_HOST_DATA_LAST)
812			      - (info->accel_state->scanline_words - 1));
813	info->accel_state->scanline_direct   = 1;
814    } else {
815	/* Use indirect for anything else */
816	info->accel_state->scratch_buffer[0] = info->accel_state->scratch_save;
817	info->accel_state->scanline_direct   = 0;
818    }
819
820    BEGIN_ACCEL(5 + (info->accel_state->scanline_direct ?
821		     (info->accel_state->scanline_words * h) : 0));
822
823    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
824    	((info->tilingEnabled && (y <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
825    OUT_ACCEL_REG(RADEON_SC_TOP_LEFT,      (y << 16)     | ((x+skipleft)
826							    & 0xffff));
827    OUT_ACCEL_REG(RADEON_SC_BOTTOM_RIGHT,  ((y+h) << 16) | ((x+w) & 0xffff));
828    OUT_ACCEL_REG(RADEON_DST_Y_X,          (y << 16)     | (x & 0xffff));
829    /* Have to pad the width here and use clipping engine */
830    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16)     | RADEON_ALIGN(w, 32));
831
832    FINISH_ACCEL();
833
834#else /* ACCEL_CP */
835
836    info->accel_state->scanline_x      = x;
837    info->accel_state->scanline_y      = y;
838    /* Have to pad the width here and use clipping engine */
839    info->accel_state->scanline_w      = RADEON_ALIGN(w, 32);
840    info->accel_state->scanline_h      = h;
841
842    info->accel_state->scanline_x1clip = x + skipleft;
843    info->accel_state->scanline_x2clip = x + w;
844
845    info->accel_state->scanline_words  = info->accel_state->scanline_w / 32;
846    info->accel_state->scanline_hpass  = min(h,(CP_BUFSIZE/info->accel_state->scanline_words));
847
848    RADEONCPScanlinePacket(pScrn, 0);
849
850#endif
851}
852
853/* Subsequent XAA indirect CPU-to-screen color expansion and indirect
854 * image write.  This is called once for each scanline.
855 */
856static void
857FUNC_NAME(RADEONSubsequentScanline)(ScrnInfoPtr pScrn,
858				    int bufno)
859{
860    RADEONInfoPtr    info = RADEONPTR(pScrn);
861#ifdef ACCEL_MMIO
862    uint32_t        *p    = (pointer)info->accel_state->scratch_buffer[bufno];
863    int              i;
864    int              left = info->accel_state->scanline_words;
865    volatile uint32_t *d;
866    ACCEL_PREAMBLE();
867
868    if (info->accel_state->scanline_direct) return;
869
870    --info->accel_state->scanline_h;
871
872    while (left) {
873	write_mem_barrier();
874	if (left <= 8) {
875	  /* Last scanline - finish write to DATA_LAST */
876	  if (info->accel_state->scanline_h == 0) {
877	    BEGIN_ACCEL(left);
878				/* Unrolling doesn't improve performance */
879	    for (d = ADDRREG(RADEON_HOST_DATA_LAST) - (left - 1); left; --left)
880		*d++ = *p++;
881	    return;
882	  } else {
883	    BEGIN_ACCEL(left);
884				/* Unrolling doesn't improve performance */
885	    for (d = ADDRREG(RADEON_HOST_DATA7) - (left - 1); left; --left)
886		*d++ = *p++;
887	  }
888	} else {
889	    BEGIN_ACCEL(8);
890				/* Unrolling doesn't improve performance */
891	    for (d = ADDRREG(RADEON_HOST_DATA0), i = 0; i < 8; i++)
892		*d++ = *p++;
893	    left -= 8;
894	}
895    }
896
897    FINISH_ACCEL();
898
899#else /* ACCEL_CP */
900
901#if X_BYTE_ORDER == X_BIG_ENDIAN
902    if (info->ChipFamily >= CHIP_FAMILY_R300) {
903	if (info->accel_state->scanline_bpp == 16) {
904	    RADEONCopySwap(info->accel_state->scratch_buffer[bufno],
905			   info->accel_state->scratch_buffer[bufno],
906			   info->accel_state->scanline_words << 2,
907			   RADEON_HOST_DATA_SWAP_HDW);
908	} else if (info->accel_state->scanline_bpp < 15) {
909	    RADEONCopySwap(info->accel_state->scratch_buffer[bufno],
910			   info->accel_state->scratch_buffer[bufno],
911			   info->accel_state->scanline_words << 2,
912			   RADEON_HOST_DATA_SWAP_32BIT);
913	}
914    }
915#endif
916
917    if (--info->accel_state->scanline_hpass) {
918	info->accel_state->scratch_buffer[bufno] += 4 * info->accel_state->scanline_words;
919    } else if (info->accel_state->scanline_h) {
920	info->accel_state->scanline_hpass =
921	    min(info->accel_state->scanline_h,(CP_BUFSIZE/info->accel_state->scanline_words));
922	RADEONCPScanlinePacket(pScrn, bufno);
923    }
924
925#endif
926}
927
928/* Setup for XAA indirect image write */
929static void
930FUNC_NAME(RADEONSetupForScanlineImageWrite)(ScrnInfoPtr pScrn,
931					    int rop,
932					    unsigned int planemask,
933					    int trans_color,
934					    int bpp,
935					    int depth)
936{
937    RADEONInfoPtr  info = RADEONPTR(pScrn);
938    ACCEL_PREAMBLE();
939
940    info->accel_state->scanline_bpp = bpp;
941
942    /* Save for later clipping */
943    info->accel_state->dp_gui_master_cntl_clip = (info->accel_state->dp_gui_master_cntl
944						  | RADEON_GMC_DST_CLIPPING
945						  | RADEON_GMC_BRUSH_NONE
946						  | RADEON_GMC_SRC_DATATYPE_COLOR
947						  | RADEON_ROP[rop].rop
948						  | RADEON_GMC_BYTE_MSB_TO_LSB
949						  | RADEON_DP_SRC_SOURCE_HOST_DATA);
950
951#ifdef ACCEL_MMIO
952
953#if X_BYTE_ORDER == X_LITTLE_ENDIAN
954    BEGIN_ACCEL(2);
955#else
956    BEGIN_ACCEL(3);
957
958    if (bpp == 16)
959	OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_16BIT);
960    else if (bpp == 32)
961	OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_32BIT);
962    else
963	OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_NONE);
964#endif
965    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->accel_state->dp_gui_master_cntl_clip);
966
967#else /* ACCEL_CP */
968
969#if X_BYTE_ORDER == X_LITTLE_ENDIAN
970    BEGIN_ACCEL(1);
971#else
972    if (info->ChipFamily < CHIP_FAMILY_R300) {
973        BEGIN_ACCEL(2);
974
975	if (bpp == 16)
976	    OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_HDW);
977	else
978	    OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_NONE);
979    } else
980	BEGIN_ACCEL(1);
981#endif
982#endif
983    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
984
985    FINISH_ACCEL();
986
987    info->accel_state->trans_color = trans_color;
988    FUNC_NAME(RADEONSetTransparency)(pScrn, trans_color);
989}
990
991/* Subsequent XAA indirect image write. This is only called once for
992 * each rectangle.
993 */
994static void
995FUNC_NAME(RADEONSubsequentScanlineImageWriteRect)(ScrnInfoPtr pScrn,
996						  int x, int y,
997						  int w, int h,
998						  int skipleft)
999{
1000    RADEONInfoPtr  info = RADEONPTR(pScrn);
1001
1002#ifdef ACCEL_MMIO
1003
1004    int            shift = 0; /* 32bpp */
1005    ACCEL_PREAMBLE();
1006
1007    if (pScrn->bitsPerPixel == 8) shift = 3;
1008    else if (pScrn->bitsPerPixel == 16) shift = 1;
1009
1010    info->accel_state->scanline_h      = h;
1011    info->accel_state->scanline_words  = (w * info->accel_state->scanline_bpp + 31) >> 5;
1012
1013#ifdef __alpha__
1014    /* Always use indirect for Alpha */
1015    if (0)
1016#else
1017    if ((info->accel_state->scanline_words * h) <= 9)
1018#endif
1019    {
1020	/* Turn on direct for less than 9 dword colour expansion */
1021	info->accel_state->scratch_buffer[0]
1022	    = (unsigned char *)(ADDRREG(RADEON_HOST_DATA_LAST)
1023				- (info->accel_state->scanline_words - 1));
1024	info->accel_state->scanline_direct = 1;
1025    } else {
1026	/* Use indirect for anything else */
1027	info->accel_state->scratch_buffer[0] = info->accel_state->scratch_save;
1028	info->accel_state->scanline_direct = 0;
1029    }
1030
1031    BEGIN_ACCEL(5 + (info->accel_state->scanline_direct ?
1032		     (info->accel_state->scanline_words * h) : 0));
1033
1034    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->accel_state->dst_pitch_offset |
1035    	((info->tilingEnabled && (y <= pScrn->virtualY)) ? RADEON_DST_TILE_MACRO : 0));
1036    OUT_ACCEL_REG(RADEON_SC_TOP_LEFT,      (y << 16)     | ((x+skipleft)
1037							    & 0xffff));
1038    OUT_ACCEL_REG(RADEON_SC_BOTTOM_RIGHT,  ((y+h) << 16) | ((x+w) & 0xffff));
1039    OUT_ACCEL_REG(RADEON_DST_Y_X,          (y << 16)     | (x & 0xffff));
1040    /* Have to pad the width here and use clipping engine */
1041    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16)     | ((w + shift) &
1042							    ~shift));
1043
1044    FINISH_ACCEL();
1045
1046#else /* ACCEL_CP */
1047
1048    int  pad = 0; /* 32bpp */
1049
1050    if (pScrn->bitsPerPixel == 8)       pad = 3;
1051    else if (pScrn->bitsPerPixel == 16) pad = 1;
1052
1053    info->accel_state->scanline_x      = x;
1054    info->accel_state->scanline_y      = y;
1055    /* Have to pad the width here and use clipping engine */
1056    info->accel_state->scanline_w      = (w + pad) & ~pad;
1057    info->accel_state->scanline_h      = h;
1058
1059    info->accel_state->scanline_x1clip = x + skipleft;
1060    info->accel_state->scanline_x2clip = x + w;
1061
1062    info->accel_state->scanline_words  = (w * info->accel_state->scanline_bpp + 31) / 32;
1063    info->accel_state->scanline_hpass  = min(h,(CP_BUFSIZE/info->accel_state->scanline_words));
1064
1065    RADEONCPScanlinePacket(pScrn, 0);
1066
1067#endif
1068}
1069
1070/* Set up the clipping rectangle */
1071static void
1072FUNC_NAME(RADEONSetClippingRectangle)(ScrnInfoPtr pScrn,
1073				      int xa, int ya,
1074				      int xb, int yb)
1075{
1076    RADEONInfoPtr  info = RADEONPTR(pScrn);
1077    unsigned long  tmp1 = 0;
1078    unsigned long  tmp2 = 0;
1079    ACCEL_PREAMBLE();
1080
1081    if (xa < 0) {
1082	tmp1 = (-xa) & 0x3fff;
1083	tmp1 |= RADEON_SC_SIGN_MASK_LO;
1084    } else {
1085	tmp1 = xa;
1086    }
1087
1088    if (ya < 0) {
1089	tmp1 |= (((-ya) & 0x3fff) << 16);
1090	tmp1 |= RADEON_SC_SIGN_MASK_HI;
1091    } else {
1092	tmp1 |= (ya << 16);
1093    }
1094
1095    xb++; yb++;
1096
1097    if (xb < 0) {
1098	tmp2 = (-xb) & 0x3fff;
1099	tmp2 |= RADEON_SC_SIGN_MASK_LO;
1100    } else {
1101	tmp2 = xb;
1102    }
1103
1104    if (yb < 0) {
1105	tmp2 |= (((-yb) & 0x3fff) << 16);
1106	tmp2 |= RADEON_SC_SIGN_MASK_HI;
1107    } else {
1108	tmp2 |= (yb << 16);
1109    }
1110
1111    BEGIN_ACCEL(3);
1112
1113    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, (info->accel_state->dp_gui_master_cntl_clip
1114					      | RADEON_GMC_DST_CLIPPING));
1115    OUT_ACCEL_REG(RADEON_SC_TOP_LEFT,        tmp1);
1116    OUT_ACCEL_REG(RADEON_SC_BOTTOM_RIGHT,    tmp2);
1117
1118    FINISH_ACCEL();
1119    BEGIN_ACCEL(2);
1120    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
1121    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
1122                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
1123    FINISH_ACCEL();
1124
1125    FUNC_NAME(RADEONSetTransparency)(pScrn, info->accel_state->trans_color);
1126}
1127
1128/* Disable the clipping rectangle */
1129static void
1130FUNC_NAME(RADEONDisableClipping)(ScrnInfoPtr pScrn)
1131{
1132    RADEONInfoPtr info  = RADEONPTR(pScrn);
1133    ACCEL_PREAMBLE();
1134
1135    BEGIN_ACCEL(3);
1136
1137    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->accel_state->dp_gui_master_cntl_clip);
1138    OUT_ACCEL_REG(RADEON_SC_TOP_LEFT,        0);
1139    OUT_ACCEL_REG(RADEON_SC_BOTTOM_RIGHT,    (RADEON_DEFAULT_SC_RIGHT_MAX |
1140					      RADEON_DEFAULT_SC_BOTTOM_MAX));
1141
1142    FINISH_ACCEL();
1143    BEGIN_ACCEL(2);
1144    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
1145    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
1146                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
1147    FINISH_ACCEL();
1148
1149    FUNC_NAME(RADEONSetTransparency)(pScrn, info->accel_state->trans_color);
1150}
1151
1152void
1153FUNC_NAME(RADEONAccelInit)(ScreenPtr pScreen, XAAInfoRecPtr a)
1154{
1155    ScrnInfoPtr    pScrn = xf86ScreenToScrn(pScreen);
1156    RADEONInfoPtr  info  = RADEONPTR(pScrn);
1157
1158    a->Flags                            = (PIXMAP_CACHE
1159					   | OFFSCREEN_PIXMAPS
1160					   | LINEAR_FRAMEBUFFER);
1161
1162				/* Sync */
1163    a->Sync                             = FUNC_NAME(RADEONWaitForIdle);
1164
1165				/* Solid Filled Rectangle */
1166    a->PolyFillRectSolidFlags           = 0;
1167    a->SetupForSolidFill
1168	= FUNC_NAME(RADEONSetupForSolidFill);
1169    a->SubsequentSolidFillRect
1170	= FUNC_NAME(RADEONSubsequentSolidFillRect);
1171
1172				/* Screen-to-screen Copy */
1173    a->ScreenToScreenCopyFlags          = 0;
1174    a->SetupForScreenToScreenCopy
1175	= FUNC_NAME(RADEONSetupForScreenToScreenCopy);
1176    a->SubsequentScreenToScreenCopy
1177	= FUNC_NAME(RADEONSubsequentScreenToScreenCopy);
1178
1179				/* Mono 8x8 Pattern Fill (Color Expand) */
1180    a->SetupForMono8x8PatternFill
1181	= FUNC_NAME(RADEONSetupForMono8x8PatternFill);
1182    a->SubsequentMono8x8PatternFillRect
1183	= FUNC_NAME(RADEONSubsequentMono8x8PatternFillRect);
1184    a->Mono8x8PatternFillFlags          = (HARDWARE_PATTERN_PROGRAMMED_BITS
1185					   | HARDWARE_PATTERN_PROGRAMMED_ORIGIN
1186					   | HARDWARE_PATTERN_SCREEN_ORIGIN);
1187
1188#if X_BYTE_ORDER == X_LITTLE_ENDIAN
1189    if (info->ChipFamily >= CHIP_FAMILY_RV200)
1190	a->Mono8x8PatternFillFlags |= BIT_ORDER_IN_BYTE_MSBFIRST;
1191    else
1192	a->Mono8x8PatternFillFlags |= BIT_ORDER_IN_BYTE_LSBFIRST;
1193#else
1194    a->Mono8x8PatternFillFlags |= BIT_ORDER_IN_BYTE_LSBFIRST;
1195#endif
1196
1197				/* Indirect CPU-To-Screen Color Expand */
1198
1199    /* RADEON gets upset, when using HOST provided data without a source
1200       rop.  To show run 'xtest's drwarc. */
1201    a->ScanlineCPUToScreenColorExpandFillFlags
1202	= (LEFT_EDGE_CLIPPING
1203	   | ROP_NEEDS_SOURCE
1204	   | LEFT_EDGE_CLIPPING_NEGATIVE_X);
1205    a->NumScanlineColorExpandBuffers    = 1;
1206    a->ScanlineColorExpandBuffers       = info->accel_state->scratch_buffer;
1207    if (!info->accel_state->scratch_save)
1208	info->accel_state->scratch_save
1209	    = malloc(((pScrn->virtualX+31)/32*4)
1210		     + (pScrn->virtualX * info->CurrentLayout.pixel_bytes));
1211    info->accel_state->scratch_buffer[0]             = info->accel_state->scratch_save;
1212    a->SetupForScanlineCPUToScreenColorExpandFill
1213	= FUNC_NAME(RADEONSetupForScanlineCPUToScreenColorExpandFill);
1214    a->SubsequentScanlineCPUToScreenColorExpandFill
1215	= FUNC_NAME(RADEONSubsequentScanlineCPUToScreenColorExpandFill);
1216    a->SubsequentColorExpandScanline
1217        = FUNC_NAME(RADEONSubsequentScanline);
1218
1219				/* Solid Lines */
1220    a->SetupForSolidLine
1221	= FUNC_NAME(RADEONSetupForSolidLine);
1222    a->SubsequentSolidHorVertLine
1223	= FUNC_NAME(RADEONSubsequentSolidHorVertLine);
1224
1225    if (info->xaaReq.minorversion >= 1) {
1226
1227    /* RADEON only supports 14 bits for lines and clipping and only
1228     * draws lines that are completely on-screen correctly.  This will
1229     * cause display corruption problem in the cases when out-of-range
1230     * commands are issued, like when dimming screen during GNOME logout
1231     * in dual-head setup.  Solid and dashed lines are therefore limited
1232     * to the virtual screen.
1233     */
1234
1235    a->SolidLineFlags = LINE_LIMIT_COORDS;
1236    a->SolidLineLimits.x1 = 0;
1237    a->SolidLineLimits.y1 = 0;
1238    a->SolidLineLimits.x2 = pScrn->virtualX-1;
1239    a->SolidLineLimits.y2 = pScrn->virtualY-1;
1240
1241    /* Call miSetZeroLineBias() to have mi/mfb/fb routines match
1242       hardware accel two point lines */
1243    miSetZeroLineBias(pScreen, (OCTANT5 | OCTANT6 | OCTANT7 | OCTANT8));
1244
1245#ifdef ACCEL_CP
1246    /* RV280s lock up with this using the CP for reasons to be determined.
1247     * See https://bugs.freedesktop.org/show_bug.cgi?id=5986 .
1248     */
1249    if (info->ChipFamily != CHIP_FAMILY_RV280)
1250#endif
1251	a->SubsequentSolidTwoPointLine
1252	    = FUNC_NAME(RADEONSubsequentSolidTwoPointLine);
1253
1254    /* Disabled on RV200 and newer because it does not pass XTest */
1255    if (info->ChipFamily < CHIP_FAMILY_RV200) {
1256	a->SetupForDashedLine
1257	    = FUNC_NAME(RADEONSetupForDashedLine);
1258	a->SubsequentDashedTwoPointLine
1259	    = FUNC_NAME(RADEONSubsequentDashedTwoPointLine);
1260	a->DashPatternMaxLength         = 32;
1261	/* ROP3 doesn't seem to work properly for dashedline with GXinvert */
1262	a->DashedLineFlags              = (LINE_PATTERN_LSBFIRST_LSBJUSTIFIED
1263					   | LINE_PATTERN_POWER_OF_2_ONLY
1264					   | LINE_LIMIT_COORDS
1265					   | ROP_NEEDS_SOURCE);
1266	a->DashedLineLimits.x1 = 0;
1267	a->DashedLineLimits.y1 = 0;
1268	a->DashedLineLimits.x2 = pScrn->virtualX-1;
1269	a->DashedLineLimits.y2 = pScrn->virtualY-1;
1270    }
1271
1272    } else {
1273	xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
1274		   "libxaa too old, can't accelerate TwoPoint lines\n");
1275    }
1276
1277    /* Clipping, note that without this, all line accelerations will
1278     * not be called
1279     */
1280    a->SetClippingRectangle
1281	= FUNC_NAME(RADEONSetClippingRectangle);
1282    a->DisableClipping
1283	= FUNC_NAME(RADEONDisableClipping);
1284    a->ClippingFlags
1285	= (HARDWARE_CLIP_SOLID_LINE
1286	   | HARDWARE_CLIP_DASHED_LINE
1287	/* | HARDWARE_CLIP_SOLID_FILL -- seems very slow with this on */
1288	   | HARDWARE_CLIP_MONO_8x8_FILL
1289	   | HARDWARE_CLIP_SCREEN_TO_SCREEN_COPY);
1290
1291    if (xf86IsEntityShared(info->pEnt->index)) {
1292	/* If there are more than one devices sharing this entity, we
1293	 * have to assign this call back, otherwise the XAA will be
1294	 * disabled
1295	 */
1296	if (xf86GetNumEntityInstances(info->pEnt->index) > 1)
1297	    a->RestoreAccelState        = FUNC_NAME(RADEONRestoreAccelState);
1298    }
1299
1300				/* ImageWrite */
1301    a->NumScanlineImageWriteBuffers     = 1;
1302    a->ScanlineImageWriteBuffers        = info->accel_state->scratch_buffer;
1303    a->SetupForScanlineImageWrite
1304	= FUNC_NAME(RADEONSetupForScanlineImageWrite);
1305    a->SubsequentScanlineImageWriteRect
1306	= FUNC_NAME(RADEONSubsequentScanlineImageWriteRect);
1307    a->SubsequentImageWriteScanline     = FUNC_NAME(RADEONSubsequentScanline);
1308    a->ScanlineImageWriteFlags          = (CPU_TRANSFER_PAD_DWORD
1309#ifdef ACCEL_MMIO
1310		/* Performance tests show that we shouldn't use GXcopy
1311		 * for uploads as a memcpy is faster
1312		 */
1313					  | NO_GXCOPY
1314#endif
1315		/* RADEON gets upset, when using HOST provided data
1316		 * without a source rop. To show run 'xtest's ptimg
1317		 */
1318					  | ROP_NEEDS_SOURCE
1319					  | SCANLINE_PAD_DWORD
1320					  | LEFT_EDGE_CLIPPING
1321					  | LEFT_EDGE_CLIPPING_NEGATIVE_X);
1322
1323#if 0
1324				/* Color 8x8 Pattern Fill */
1325    a->SetupForColor8x8PatternFill
1326	= FUNC_NAME(RADEONSetupForColor8x8PatternFill);
1327    a->SubsequentColor8x8PatternFillRect
1328	= FUNC_NAME(RADEONSubsequentColor8x8PatternFillRect);
1329    a->Color8x8PatternFillFlags         = (HARDWARE_PATTERN_PROGRAMMED_ORIGIN
1330					   | HARDWARE_PATTERN_SCREEN_ORIGIN
1331					   | BIT_ORDER_IN_BYTE_LSBFIRST);
1332#endif
1333
1334#ifdef RENDER
1335    if (info->RenderAccel && info->xaaReq.minorversion >= 2) {
1336
1337	a->CPUToScreenAlphaTextureFlags = XAA_RENDER_POWER_OF_2_TILE_ONLY;
1338	a->CPUToScreenAlphaTextureFormats = RADEONTextureFormats;
1339	a->CPUToScreenAlphaTextureDstFormats = RADEONDstFormats;
1340	a->CPUToScreenTextureFlags = XAA_RENDER_POWER_OF_2_TILE_ONLY;
1341	a->CPUToScreenTextureFormats = RADEONTextureFormats;
1342	a->CPUToScreenTextureDstFormats = RADEONDstFormats;
1343
1344	if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
1345	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "XAA Render acceleration "
1346		       "unsupported on Radeon 9500/9700 and newer. "
1347		       "Please use EXA instead.\n");
1348	} else if (IS_R200_3D) {
1349	    a->SetupForCPUToScreenAlphaTexture2 =
1350		FUNC_NAME(R200SetupForCPUToScreenAlphaTexture);
1351	    a->SubsequentCPUToScreenAlphaTexture =
1352		FUNC_NAME(R200SubsequentCPUToScreenTexture);
1353
1354	    a->SetupForCPUToScreenTexture2 =
1355		FUNC_NAME(R200SetupForCPUToScreenTexture);
1356	    a->SubsequentCPUToScreenTexture =
1357		FUNC_NAME(R200SubsequentCPUToScreenTexture);
1358	} else {
1359	    a->SetupForCPUToScreenAlphaTexture2 =
1360		FUNC_NAME(R100SetupForCPUToScreenAlphaTexture);
1361	    a->SubsequentCPUToScreenAlphaTexture =
1362		FUNC_NAME(R100SubsequentCPUToScreenTexture);
1363
1364	    a->SetupForCPUToScreenTexture2 =
1365		FUNC_NAME(R100SetupForCPUToScreenTexture);
1366	    a->SubsequentCPUToScreenTexture =
1367		FUNC_NAME(R100SubsequentCPUToScreenTexture);
1368	}
1369    } else if (info->RenderAccel) {
1370	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration currently "
1371		   "requires XAA v1.2 or newer.\n");
1372    }
1373
1374    if (!a->SetupForCPUToScreenAlphaTexture2 && !a->SetupForCPUToScreenTexture2)
1375	info->RenderAccel = FALSE;
1376
1377    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration %s\n",
1378	       info->RenderAccel ? "enabled" : "disabled");
1379#endif /* RENDER */
1380}
1381
1382#endif /* USE_XAA */
1383
1384#undef FUNC_NAME
1385