1
2#ifdef HAVE_CONFIG_H
3#include "config.h"
4#endif
5
6/*
7 * if NO_OPTIMIZE is set, some optimizations are disabled.
8 *
9 * What it basically tries to do is minimize the amounts of writes to
10 * accelerator registers, since these are the ones that slow down small
11 * operations a lot.
12 */
13/* #define NO_OPTIMIZE */
14
15/*
16 * if ET6K_TRANSPARENCY is set, ScreentoScreenCopy operations (and pattern
17 * fills) will support transparency. But then the planemask support has to
18 * be dropped. The default here is to support planemasks, because all Tseng
19 * chips can do this. Only the ET6000 supports a transparency compare. The
20 * code could be easily changed to support transparency on the ET6000 and
21 * planemasks on the others, but that's only useful when transparency is
22 * more important than planemasks.
23 */
24#undef ET6K_TRANSPARENCY
25
26#include "tseng.h"
27#include "tseng_accel.h"
28
29#ifdef HAVE_XAA_H
30#include "miline.h"
31
32/*
33 * conversion from X ROPs to Microsoft ROPs.
34 */
35
36static int W32OpTable[] =
37{
38    0x00,			       /* Xclear             0 */
39    0x88,			       /* Xand               src AND dst */
40    0x44,			       /* XandReverse        src AND NOT dst */
41    0xcc,			       /* Xcopy              src */
42    0x22,			       /* XandInverted       NOT src AND dst */
43    0xaa,			       /* Xnoop              dst */
44    0x66,			       /* Xxor               src XOR dst */
45    0xee,			       /* Xor                src OR dst */
46    0x11,			       /* Xnor               NOT src AND NOT dst */
47    0x99,			       /* Xequiv             NOT src XOR dst */
48    0x55,			       /* Xinvert            NOT dst */
49    0xdd,			       /* XorReverse         src OR NOT dst */
50    0x33,			       /* XcopyInverted      NOT src */
51    0xbb,			       /* XorInverted        NOT src OR dst */
52    0x77,			       /* Xnand              NOT src OR NOT dst */
53    0xff			       /* Xset               1 */
54};
55
56static int W32OpTable_planemask[] =
57{
58    0x0a,			       /* Xclear             0 */
59    0x8a,			       /* Xand               src AND dst */
60    0x4a,			       /* XandReverse        src AND NOT dst */
61    0xca,			       /* Xcopy              src */
62    0x2a,			       /* XandInverted       NOT src AND dst */
63    0xaa,			       /* Xnoop              dst */
64    0x6a,			       /* Xxor               src XOR dst */
65    0xea,			       /* Xor                src OR dst */
66    0x1a,			       /* Xnor               NOT src AND NOT dst */
67    0x9a,			       /* Xequiv             NOT src XOR dst */
68    0x5a,			       /* Xinvert            NOT dst */
69    0xda,			       /* XorReverse         src OR NOT dst */
70    0x3a,			       /* XcopyInverted      NOT src */
71    0xba,			       /* XorInverted        NOT src OR dst */
72    0x7a,			       /* Xnand              NOT src OR NOT dst */
73    0xfa			       /* Xset               1 */
74};
75
76static int W32PatternOpTable[] =
77{
78    0x00,			       /* Xclear             0 */
79    0xa0,			       /* Xand               pat AND dst */
80    0x50,			       /* XandReverse        pat AND NOT dst */
81    0xf0,			       /* Xcopy              pat */
82    0x0a,			       /* XandInverted       NOT pat AND dst */
83    0xaa,			       /* Xnoop              dst */
84    0x5a,			       /* Xxor               pat XOR dst */
85    0xfa,			       /* Xor                pat OR dst */
86    0x05,			       /* Xnor               NOT pat AND NOT dst */
87    0xa5,			       /* Xequiv             NOT pat XOR dst */
88    0x55,			       /* Xinvert            NOT dst */
89    0xf5,			       /* XorReverse         pat OR NOT dst */
90    0x0f,			       /* XcopyInverted      NOT pat */
91    0xaf,			       /* XorInverted        NOT pat OR dst */
92    0x5f,			       /* Xnand              NOT pat OR NOT dst */
93    0xff			       /* Xset               1 */
94};
95
96
97
98/**********************************************************************/
99
100static void
101tseng_terminate_acl(TsengPtr pTseng)
102{
103    /* only terminate when needed */
104/*  if (*(volatile unsigned char *)ACL_ACCELERATOR_STATUS & 0x06) */
105    {
106	ACL_SUSPEND_TERMINATE(0x00);
107	/* suspend any running operation */
108	ACL_SUSPEND_TERMINATE(0x01);
109	WAIT_ACL;
110	ACL_SUSPEND_TERMINATE(0x00);
111	/* ... and now terminate it */
112	ACL_SUSPEND_TERMINATE(0x10);
113	WAIT_ACL;
114	ACL_SUSPEND_TERMINATE(0x00);
115    }
116}
117
118void
119tseng_recover_timeout(TsengPtr pTseng)
120{
121    if (pTseng->ChipType == ET4000) {
122	ErrorF("trying to unlock......................................\n");
123	MMIO_OUT32(pTseng->tsengCPU2ACLBase,0,0L); /* try unlocking the bus when CPU-to-accel gets stuck */
124
125        /* flush the accelerator pipeline */
126	ACL_SUSPEND_TERMINATE(0x00);
127	ACL_SUSPEND_TERMINATE(0x02);
128	ACL_SUSPEND_TERMINATE(0x00);
129    }
130}
131
132void
133tseng_init_acl(ScrnInfoPtr pScrn)
134{
135    TsengPtr pTseng = TsengPTR(pScrn);
136
137    PDEBUG("	tseng_init_acl\n");
138    /*
139     * prepare some shortcuts for faster access to memory mapped registers
140     */
141
142    pTseng->scratchMemBase = pTseng->FbBase + pTseng->AccelColorBufferOffset;
143    /*
144     * we won't be using tsengCPU2ACLBase in linear memory mode anyway, since
145     * using the MMU apertures restricts the amount of useable video memory
146     * to only 2MB, supposing we ONLY redirect MMU aperture 2 to the CPU.
147     * (see data book W32p, page 207)
148     */
149    pTseng->tsengCPU2ACLBase = pTseng->FbBase + 0x200000;	/* MMU aperture 2 */
150
151#ifdef DEBUG
152    ErrorF("MMioBase = 0x%x, scratchMemBase = 0x%x\n", pTseng->MMioBase, pTseng->scratchMemBase);
153#endif
154
155    /*
156     * prepare the accelerator for some real work
157     */
158
159    tseng_terminate_acl(pTseng);
160
161    ACL_INTERRUPT_STATUS(0xe);       /* clear interrupts */
162    ACL_INTERRUPT_MASK(0x04);	       /* disable interrupts, but enable deadlock exit */
163    ACL_INTERRUPT_STATUS(0x0);
164    ACL_ACCELERATOR_STATUS_SET(0x0);
165
166    if (pTseng->ChipType == ET6000) {
167	ACL_STEPPING_INHIBIT(0x0);   /* Undefined at power-on, let all maps (Src, Dst, Mix, Pat) step */
168	ACL_6K_CONFIG(0x00);	       /* maximum performance -- what did you think? */
169	ACL_POWER_CONTROL(0x01);     /* conserve power when ACL is idle */
170	ACL_MIX_CONTROL(0x33);
171	ACL_TRANSFER_DISABLE(0x00);  /* Undefined at power-on, enable all transfers */
172    } else {			       /* W32i/W32p */
173  	ACL_RELOAD_CONTROL(0x0);
174	ACL_SYNC_ENABLE(0x1);	       /* | 0x2 = 0WS ACL read. Yields up to 10% faster operation for small blits */
175	ACL_ROUTING_CONTROL(0x00);
176    }
177
178    /* Enable the W32p startup bit and set use an eight-bit pixel depth */
179    ACL_NQ_X_POSITION(0);
180    ACL_NQ_Y_POSITION(0);
181    ACL_PIXEL_DEPTH((pScrn->bitsPerPixel - 8) << 1);
182    /* writing destination address will start ACL */
183    ACL_OPERATION_STATE(0x10);
184
185    ACL_DESTINATION_Y_OFFSET(pScrn->displayWidth * pTseng->Bytesperpixel - 1);
186    ACL_XY_DIRECTION(0);
187
188    MMU_CONTROL(0x74);
189
190    if (pTseng->ChipType == ET4000) {
191	/*
192	 * Since the w32p revs C and D don't have any memory mapped when the
193	 * accelerator registers are used it is necessary to use the MMUs to
194	 * provide a semblance of linear memory. Fortunately on these chips
195	 * the MMU appertures are 1 megabyte each. So as long as we are
196	 * willing to only use 3 megs of video memory we can have some
197	 * acceleration. If we ever get the CPU-to-screen-color-expansion
198	 * stuff working then we will NOT need to sacrifice the extra 1MB
199	 * provided by MBP2, because we could do dynamic switching of the APT
200	 * bit in the MMU control register.
201	 *
202	 * On W32p rev c and d MBP2 is hardwired to 0x200000 when linear
203	 * memory mode is enabled. (On rev a it is programmable).
204	 *
205	 * W32p rev a and b have their first 2M mapped in the normal (non-MMU)
206	 * way, and MMU0 and MMU1, each 512 kb wide, can be used to access
207	 * another 1MB of memory. This totals to 3MB of mem. available in
208	 * linear memory when the accelerator is enabled.
209	 */
210	if ((pTseng->ChipRev == REV_A) || (pTseng->ChipRev == REV_B)) {
211	    MMIO_OUT32(pTseng->MMioBase, 0x00<<0, 0x200000L);
212	    MMIO_OUT32(pTseng->MMioBase, 0x04<<0, 0x280000L);
213	} else {		       /* rev C & D */
214	    MMIO_OUT32(pTseng->MMioBase, 0x00<<0, 0x0L);
215	    MMIO_OUT32 (pTseng->MMioBase, 0x04<<0, 0x100000L);
216	}
217    }
218}
219
220/*
221 * ET4/6K acceleration interface -- color expansion primitives.
222 *
223 * Uses Harm Hanemaayer's generic acceleration interface (XAA).
224 *
225 * Author: Koen Gadeyne
226 *
227 * Much of the acceleration code is based on the XF86_W32 server code from
228 * Glenn Lai.
229 *
230 *
231 *     Color expansion capabilities of the Tseng chip families:
232 *
233 *     Chip     screen-to-screen   CPU-to-screen   Supported depths
234 *
235 *   ET4000W32/W32i   No               Yes             8bpp only
236 *   ET4000W32p       Yes              Yes             8bpp only
237 *   ET6000           Yes              No              8/16/24/32 bpp
238 */
239#define SET_FUNCTION_COLOREXPAND \
240    if (pTseng->ChipType == ET6000) \
241      ACL_MIX_CONTROL(0x32); \
242    else \
243      ACL_ROUTING_CONTROL(0x08);
244
245#define SET_FUNCTION_COLOREXPAND_CPU \
246    ACL_ROUTING_CONTROL(0x02);
247
248
249static void
250TsengSubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
251    int x, int y, int w, int h, int skipleft)
252{
253    TsengPtr pTseng = TsengPTR(pScrn);
254
255    if (pTseng->ChipType == ET4000) {
256	/* the accelerator needs DWORD padding, and "w" is in PIXELS... */
257	pTseng->acl_colexp_width_dwords = (MULBPP(pTseng, w) + 31) >> 5;
258	pTseng->acl_colexp_width_bytes = (MULBPP(pTseng, w) + 7) >> 3;
259    }
260
261    pTseng->acl_ColorExpandDst = FBADDR(pTseng, x, y);
262    pTseng->acl_skipleft = skipleft;
263
264    wait_acl_queue(pTseng);
265
266#if 0
267    ACL_MIX_Y_OFFSET(w - 1);
268
269    ErrorF(" W=%d", w);
270#endif
271    SET_XY(pTseng, w, 1);
272}
273
274static void
275TsengSubsequentColorExpandScanline(ScrnInfoPtr pScrn,
276    int bufno)
277{
278    TsengPtr pTseng = TsengPTR(pScrn);
279
280    wait_acl_queue(pTseng);
281
282    ACL_MIX_ADDRESS((pTseng->AccelColorExpandBufferOffsets[bufno] << 3) + pTseng->acl_skipleft);
283    START_ACL(pTseng, pTseng->acl_ColorExpandDst);
284
285    /* move to next scanline */
286    pTseng->acl_ColorExpandDst += pTseng->line_width;
287
288    /*
289     * If not using triple-buffering, we need to wait for the queued
290     * register set to be transferred to the working register set here,
291     * because otherwise an e.g. double-buffering mechanism could overwrite
292     * the buffer that's currently being worked with with new data too soon.
293     *
294     * WAIT_QUEUE; // not needed with triple-buffering
295     */
296}
297
298
299
300/*
301 * We use this intermediate CPU-to-Screen color expansion because the one
302 * provided by XAA seems to lock up the accelerator engine.
303 *
304 * One of the main differences between the XAA approach and this one is that
305 * transfers are done per byte. I'm not sure if that is needed though.
306 */
307static void
308TsengSubsequentColorExpandScanline_8bpp(ScrnInfoPtr pScrn, int bufno)
309{
310    TsengPtr pTseng = TsengPTR(pScrn);
311    pointer dest = pTseng->tsengCPU2ACLBase;
312    int i,j;
313    CARD8 *bufptr;
314
315    i = pTseng->acl_colexp_width_bytes;
316    bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]);
317
318    wait_acl_queue(pTseng);
319    START_ACL (pTseng, pTseng->acl_ColorExpandDst);
320
321/*  *((LongP) (MMioBase + 0x08)) = (CARD32) pTseng->acl_ColorExpandDst;*/
322/*  MMIO_OUT32(tsengCPU2ACLBase,0, (CARD32)pTseng->acl_ColorExpandDst); */
323    j = 0;
324    /* Copy scanline data to accelerator MMU aperture byte by byte */
325    while (i--) {		       /* FIXME: we need to take care of PCI bursting and MMU overflow here! */
326	MMIO_OUT8(dest,j++, *bufptr++);
327    }
328
329    /* move to next scanline */
330    pTseng->acl_ColorExpandDst += pTseng->line_width;
331}
332
333/*
334 * This function does direct memory-to-CPU bit doubling for color-expansion
335 * at 16bpp on W32 chips. They can only do 8bpp color expansion, so we have
336 * to expand the incoming data to 2bpp first.
337 */
338static void
339TsengSubsequentColorExpandScanline_16bpp(ScrnInfoPtr pScrn, int bufno)
340{
341    TsengPtr pTseng = TsengPTR(pScrn);
342    pointer dest = pTseng->tsengCPU2ACLBase;
343    int i,j;
344    CARD8 *bufptr;
345    register CARD32 bits16;
346
347    i = pTseng->acl_colexp_width_dwords * 2;
348    bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]);
349
350    wait_acl_queue(pTseng);
351    START_ACL(pTseng, pTseng->acl_ColorExpandDst);
352
353    j = 0;
354    while (i--) {
355	bits16 = pTseng->ColExpLUT[*bufptr++];
356	MMIO_OUT8(dest,j++,bits16 & 0xFF);
357	MMIO_OUT8(dest,j++,(bits16 >> 8) & 0xFF);
358    }
359
360    /* move to next scanline */
361    pTseng->acl_ColorExpandDst += pTseng->line_width;
362}
363
364/*
365 * This function does direct memory-to-CPU bit doubling for color-expansion
366 * at 24bpp on W32 chips. They can only do 8bpp color expansion, so we have
367 * to expand the incoming data to 3bpp first.
368 */
369static void
370TsengSubsequentColorExpandScanline_24bpp(ScrnInfoPtr pScrn, int bufno)
371{
372    TsengPtr pTseng = TsengPTR(pScrn);
373    pointer dest = pTseng->tsengCPU2ACLBase;
374    int i, k, j = -1;
375    CARD8 *bufptr;
376    register CARD32 bits24;
377
378    i = pTseng->acl_colexp_width_dwords * 4;
379    bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]);
380
381    wait_acl_queue(pTseng);
382    START_ACL(pTseng, pTseng->acl_ColorExpandDst);
383
384    /* take 8 input bits, expand to 3 output bytes */
385    bits24 = pTseng->ColExpLUT[*bufptr++];
386    k = 0;
387    while (i--) {
388	if ((j++) == 2) {	       /* "i % 3" operation is much to expensive */
389	    j = 0;
390	    bits24 = pTseng->ColExpLUT[*bufptr++];
391	}
392	MMIO_OUT8(dest,k++,bits24 & 0xFF);
393	bits24 >>= 8;
394    }
395
396    /* move to next scanline */
397    pTseng->acl_ColorExpandDst += pTseng->line_width;
398}
399
400/*
401 * This function does direct memory-to-CPU bit doubling for color-expansion
402 * at 32bpp on W32 chips. They can only do 8bpp color expansion, so we have
403 * to expand the incoming data to 4bpp first.
404 */
405static void
406TsengSubsequentColorExpandScanline_32bpp(ScrnInfoPtr pScrn, int bufno)
407{
408    TsengPtr pTseng = TsengPTR(pScrn);
409    pointer dest = pTseng->tsengCPU2ACLBase;
410    int i,j;
411    CARD8 *bufptr;
412    register CARD32 bits32;
413
414    i = pTseng->acl_colexp_width_dwords;
415   /* amount of blocks of 8 bits to expand to 32 bits (=1 DWORD) */
416    bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]);
417
418    wait_acl_queue(pTseng);
419    START_ACL(pTseng, pTseng->acl_ColorExpandDst);
420
421    j = 0;
422    while (i--) {
423	bits32 = pTseng->ColExpLUT[*bufptr++];
424	MMIO_OUT8(dest,j++,bits32 & 0xFF);
425	MMIO_OUT8(dest,j++,(bits32 >> 8) & 0xFF);
426	MMIO_OUT8(dest,j++,(bits32 >> 16) & 0xFF);
427	MMIO_OUT8(dest,j++,(bits32 >> 24) & 0xFF);
428    }
429
430    /* move to next scanline */
431    pTseng->acl_ColorExpandDst += pTseng->line_width;
432}
433
434/*
435 * CPU-to-Screen color expansion.
436 *   This is for ET4000 only (The ET6000 cannot do this)
437 */
438static void
439TsengSetupForCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
440    int fg, int bg, int rop, unsigned int planemask)
441{
442    TsengPtr pTseng = TsengPTR(pScrn);
443
444/*  ErrorF("X"); */
445
446    PINGPONG(pTseng);
447
448    wait_acl_queue(pTseng);
449
450    SET_FG_ROP(rop);
451    SET_BG_ROP_TR(rop, bg);
452
453    SET_XYDIR(0);
454
455    SET_FG_BG_COLOR(pTseng, fg, bg);
456
457    SET_FUNCTION_COLOREXPAND_CPU;
458
459    /* assure correct alignment of MIX address (ACL needs same alignment here as in MMU aperture) */
460    ACL_MIX_ADDRESS(0);
461}
462
463#ifdef TSENG_CPU_TO_SCREEN_COLOREXPAND
464/*
465 * TsengSubsequentCPUToScreenColorExpand() is potentially dangerous:
466 *   Not writing enough data to the MMU aperture for CPU-to-screen color
467 *   expansion will eventually cause a system deadlock!
468 *
469 * Note that CPUToScreenColorExpand operations _always_ require a
470 * WAIT_INTERFACE before starting a new operation (this is empyrical,
471 * though)
472 */
473static void
474TsengSubsequentCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
475    int x, int y, int w, int h, int skipleft)
476{
477    TsengPtr pTseng = TsengPTR(pScrn);
478    int destaddr = FBADDR(pTseng, x, y);
479
480    /* ErrorF(" %dx%d|%d ",w,h,skipleft); */
481    if (skipleft)
482	ErrorF("Can't do: Skipleft = %d\n", skipleft);
483
484/*  wait_acl_queue(); */
485    ErrorF("=========WAIT     FIXME!\n");
486    WAIT_INTERFACE;
487
488    ACL_MIX_Y_OFFSET(w - 1);
489    SET_XY(pTseng, w, h);
490    START_ACL(pTseng, destaddr);
491}
492#endif
493
494static void
495TsengSetupForScreenToScreenColorExpandFill(ScrnInfoPtr pScrn,
496    int fg, int bg, int rop, unsigned int planemask)
497{
498    TsengPtr pTseng = TsengPTR(pScrn);
499
500/*  ErrorF("SSC "); */
501
502    PINGPONG(pTseng);
503
504    wait_acl_queue(pTseng);
505
506    SET_FG_ROP(rop);
507    SET_BG_ROP_TR(rop, bg);
508
509    SET_FG_BG_COLOR(pTseng, fg, bg);
510
511    SET_FUNCTION_COLOREXPAND;
512
513    SET_XYDIR(0);
514}
515
516static void
517TsengSubsequentScreenToScreenColorExpandFill(ScrnInfoPtr pScrn,
518    int x, int y, int w, int h, int srcx, int srcy, int skipleft)
519{
520    TsengPtr pTseng = TsengPTR(pScrn);
521    int destaddr = FBADDR(pTseng, x, y);
522
523/*    int srcaddr = FBADDR(pTseng, srcx, srcy); */
524
525    wait_acl_queue(pTseng);
526
527    SET_XY(pTseng, w, h);
528    ACL_MIX_ADDRESS(		       /* MIX address is in BITS */
529	(((srcy * pScrn->displayWidth) + srcx) * pScrn->bitsPerPixel) + skipleft);
530
531    ACL_MIX_Y_OFFSET(pTseng->line_width << 3);
532
533    START_ACL(pTseng, destaddr);
534}
535
536/*
537 *
538 */
539static Bool
540TsengXAAInit_Colexp(ScrnInfoPtr pScrn)
541{
542    int i, j, r;
543    TsengPtr pTseng = TsengPTR(pScrn);
544    XAAInfoRecPtr pXAAInfo = pTseng->AccelInfoRec;
545
546    PDEBUG("	TsengXAAInit_Colexp\n");
547
548#ifdef TODO
549    if (OFLG_ISSET(OPTION_XAA_NO_COL_EXP, &vga256InfoRec.options))
550	return;
551#endif
552
553    /* FIXME! disable accelerated color expansion for W32/W32i until it's fixed */
554/*  if (Is_W32 || Is_W32i) return; */
555
556    /*
557     * Screen-to-screen color expansion.
558     *
559     * Scanline-screen-to-screen color expansion is slower than
560     * CPU-to-screen color expansion.
561     */
562
563    pXAAInfo->ScreenToScreenColorExpandFillFlags =
564	BIT_ORDER_IN_BYTE_LSBFIRST |
565	SCANLINE_PAD_DWORD |
566	LEFT_EDGE_CLIPPING |
567	NO_PLANEMASK;
568
569#if 1
570    if ((pTseng->ChipType == ET6000) || (pScrn->bitsPerPixel == 8)) {
571	pXAAInfo->SetupForScreenToScreenColorExpandFill =
572	    TsengSetupForScreenToScreenColorExpandFill;
573	pXAAInfo->SubsequentScreenToScreenColorExpandFill =
574	    TsengSubsequentScreenToScreenColorExpandFill;
575    }
576#endif
577
578    /*
579     * Scanline CPU to screen color expansion for all W32 engines.
580     *
581     * real CPU-to-screen color expansion is extremely tricky, and only
582     * works for 8bpp anyway.
583     *
584     * This also allows us to do 16, 24 and 32 bpp color expansion by first
585     * doubling the bitmap pattern before color-expanding it, because W32s
586     * can only do 8bpp color expansion.
587     */
588
589    pXAAInfo->ScanlineCPUToScreenColorExpandFillFlags =
590	BIT_ORDER_IN_BYTE_LSBFIRST |
591	SCANLINE_PAD_DWORD |
592	NO_PLANEMASK;
593
594    if (pTseng->ChipType == ET4000) {
595	pTseng->XAAScanlineColorExpandBuffers[0] =
596	    xnfalloc(((pScrn->virtualX + 31)/32) * 4 * pTseng->Bytesperpixel);
597	if (pTseng->XAAScanlineColorExpandBuffers[0] == NULL) {
598	    xf86Msg(X_ERROR, "Could not malloc color expansion scanline buffer.\n");
599	    return FALSE;
600	}
601	pXAAInfo->NumScanlineColorExpandBuffers = 1;
602	pXAAInfo->ScanlineColorExpandBuffers = pTseng->XAAScanlineColorExpandBuffers;
603
604	pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill =
605	    TsengSetupForCPUToScreenColorExpandFill;
606
607	pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill =
608	    TsengSubsequentScanlineCPUToScreenColorExpandFill;
609
610	switch (pScrn->bitsPerPixel) {
611	case 8:
612	    pXAAInfo->SubsequentColorExpandScanline =
613		TsengSubsequentColorExpandScanline_8bpp;
614	    break;
615	case 15:
616	case 16:
617	    pXAAInfo->SubsequentColorExpandScanline =
618		TsengSubsequentColorExpandScanline_16bpp;
619	    break;
620	case 24:
621	    pXAAInfo->SubsequentColorExpandScanline =
622		TsengSubsequentColorExpandScanline_24bpp;
623	    break;
624	case 32:
625	    pXAAInfo->SubsequentColorExpandScanline =
626		TsengSubsequentColorExpandScanline_32bpp;
627	    break;
628	}
629	/* create color expansion LUT (used for >8bpp only) */
630	pTseng->ColExpLUT = xnfalloc(sizeof(CARD32)*256);
631	if (pTseng->ColExpLUT == NULL) {
632	    xf86Msg(X_ERROR, "Could not malloc color expansion tables.\n");
633	    return FALSE;
634	}
635	for (i = 0; i < 256; i++) {
636	    r = 0;
637	    for (j = 7; j >= 0; j--) {
638		r <<= pTseng->Bytesperpixel;
639		if ((i >> j) & 1)
640		    r |= (1 << pTseng->Bytesperpixel) - 1;
641	    }
642	    pTseng->ColExpLUT[i] = r;
643	    /* ErrorF("0x%08X, ",r ); if ((i%8)==7) ErrorF("\n"); */
644	}
645    } else {
646	/*
647	 * Triple-buffering is needed to account for double-buffering of Tseng
648	 * acceleration registers.
649	 */
650	pXAAInfo->NumScanlineColorExpandBuffers = 3;
651	pXAAInfo->ScanlineColorExpandBuffers =
652	    pTseng->XAAColorExpandBuffers;
653	pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill =
654	    TsengSetupForScreenToScreenColorExpandFill;
655	pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill =
656	    TsengSubsequentScanlineCPUToScreenColorExpandFill;
657	pXAAInfo->SubsequentColorExpandScanline =
658	    TsengSubsequentColorExpandScanline;
659
660	/* calculate memory addresses from video memory offsets */
661	for (i = 0; i < pXAAInfo->NumScanlineColorExpandBuffers; i++) {
662	    pTseng->XAAColorExpandBuffers[i] =
663		pTseng->FbBase + pTseng->AccelColorExpandBufferOffsets[i];
664	}
665
666	pXAAInfo->ScanlineColorExpandBuffers = pTseng->XAAColorExpandBuffers;
667    }
668
669#ifdef TSENG_CPU_TO_SCREEN_COLOREXPAND
670    /*
671     * CPU-to-screen color expansion doesn't seem to be reliable yet. The
672     * W32 needs the correct amount of data sent to it in this mode, or it
673     * hangs the machine until is does (?). Currently, the init code in this
674     * file or the XAA code that uses this does something wrong, so that
675     * occasionally we get accelerator timeouts, and after a few, complete
676     * system hangs.
677     *
678     * The W32 engine requires SCANLINE_NO_PAD, but that doesn't seem to
679     * work very well (accelerator hangs).
680     *
681     * What works is this: tell XAA that we have SCANLINE_PAD_DWORD, and then
682     * add the following code in TsengSubsequentCPUToScreenColorExpand():
683     *     w = (w + 31) & ~31; this code rounds the width up to the nearest
684     * multiple of 32, and together with SCANLINE_PAD_DWORD, this makes
685     * CPU-to-screen color expansion work. Of course, the display isn't
686     * correct (4 chars are "blanked out" when only one is written, for
687     * example). But this shows that the principle works. But the code
688     * doesn't...
689     *
690     * The same thing goes for PAD_BYTE: this also works (with the same
691     * problems as SCANLINE_PAD_DWORD, although less prominent)
692     */
693
694    pXAAInfo->CPUToScreenColorExpandFillFlags =
695	BIT_ORDER_IN_BYTE_LSBFIRST |
696	SCANLINE_PAD_DWORD |   /* no other choice */
697	CPU_TRANSFER_PAD_DWORD |
698	NO_PLANEMASK;
699
700    if (Is_W32_any && (pScrn->bitsPerPixel == 8)) {
701	pXAAInfo->SetupForCPUToScreenColorExpandFill =
702	    TsengSetupForCPUToScreenColorExpandFill;
703	pXAAInfo->SubsequentCPUToScreenColorExpandFill =
704	    TsengSubsequentCPUToScreenColorExpandFill;
705
706	/* we'll be using MMU aperture 2 */
707	pXAAInfo->ColorExpandBase = (CARD8 *)pTseng->tsengCPU2ACLBase;
708	/* ErrorF("tsengCPU2ACLBase = 0x%x\n", pTseng->tsengCPU2ACLBase); */
709	/* aperture size is 8kb in banked mode. Larger in linear mode, but 8kb is enough */
710	pXAAInfo->ColorExpandRange = 8192;
711    }
712#endif
713    return TRUE;
714}
715
716/*
717 * ET4/6K acceleration interface.
718 *
719 * Uses Harm Hanemaayer's generic acceleration interface (XAA).
720 *
721 * Author: Koen Gadeyne
722 *
723 * Much of the acceleration code is based on the XF86_W32 server code from
724 * Glenn Lai.
725 *
726 */
727
728/*
729 * This is the implementation of the Sync() function.
730 *
731 * To avoid pipeline/cache/buffer flushing in the PCI subsystem and the VGA
732 * controller, we might replace this read-intensive code with a dummy
733 * accelerator operation that causes a hardware-blocking (wait-states) until
734 * the running operation is done.
735 */
736static void
737TsengSync(ScrnInfoPtr pScrn)
738{
739    TsengPtr pTseng = TsengPTR(pScrn);
740
741    WAIT_ACL;
742}
743
744/*
745 * This is the implementation of the SetupForSolidFill function
746 * that sets up the coprocessor for a subsequent batch for solid
747 * rectangle fills.
748 */
749static void
750TsengSetupForSolidFill(ScrnInfoPtr pScrn,
751    int color, int rop, unsigned int planemask)
752{
753    TsengPtr pTseng = TsengPTR(pScrn);
754
755    /*
756     * all registers are queued in the Tseng chips, except of course for the
757     * stuff we want to store in off-screen memory. So we have to use a
758     * ping-pong method for those if we want to avoid having to wait for the
759     * accelerator when we want to write to these.
760     */
761
762/*    ErrorF("S"); */
763
764    PINGPONG(pTseng);
765
766    wait_acl_queue(pTseng);
767
768    /*
769     * planemask emulation uses a modified "standard" FG ROP (see ET6000
770     * data book p 66 or W32p databook p 37: "Bit masking"). We only enable
771     * the planemask emulation when the planemask is not a no-op, because
772     * blitting speed would suffer.
773     */
774
775    if ((planemask & pTseng->planemask_mask) != pTseng->planemask_mask) {
776	SET_FG_ROP_PLANEMASK(rop);
777	SET_BG_COLOR(pTseng, planemask);
778    } else {
779	SET_FG_ROP(rop);
780    }
781    SET_FG_COLOR(pTseng, color);
782
783    SET_FUNCTION_BLT;
784}
785
786/*
787 * This is the implementation of the SubsequentForSolidFillRect function
788 * that sends commands to the coprocessor to fill a solid rectangle of
789 * the specified location and size, with the parameters from the SetUp
790 * call.
791 *
792 * Splitting it up between ET4000 and ET6000 avoids lots of chipset type
793 * comparisons.
794 */
795static void
796TsengW32pSubsequentSolidFillRect(ScrnInfoPtr pScrn,
797    int x, int y, int w, int h)
798{
799    TsengPtr pTseng = TsengPTR(pScrn);
800    int destaddr = FBADDR(pTseng, x, y);
801
802    wait_acl_queue(pTseng);
803
804    /*
805     * Restoring the ACL_SOURCE_ADDRESS here is needed as long as Bresenham
806     * lines are enabled for >8bpp. Or until XAA allows us to render
807     * horizontal lines using the same Bresenham code instead of re-routing
808     * them to FillRectSolid. For XDECREASING lines, the SubsequentBresenham
809     * code adjusts the ACL_SOURCE_ADDRESS to make sure XDECREASING lines
810     * are drawn with the correct colors. But if a batch of subsequent
811     * operations also holds a few horizontal lines, they will be routed to
812     * here without calling the SetupFor... code again, and the
813     * ACL_SOURCE_ADDRESS will be wrong.
814     */
815    ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg);
816
817    SET_XYDIR(0);   /* FIXME: not needed with separate setupforsolidline */
818
819    SET_XY_4(pTseng, w, h);
820    START_ACL(pTseng, destaddr);
821}
822
823static void
824Tseng6KSubsequentSolidFillRect(ScrnInfoPtr pScrn,
825    int x, int y, int w, int h)
826{
827    TsengPtr pTseng = TsengPTR(pScrn);
828    int destaddr = FBADDR(pTseng, x, y);
829
830    wait_acl_queue(pTseng);
831
832    /* see comment in TsengW32pSubsequentFillRectSolid */
833    ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg);
834
835    /* if XYDIR is not reset here, drawing a hardware line in between
836     * blitting, with the same ROP, color, etc will not cause a call to
837     * SetupFor... (because linedrawing uses SetupForSolidFill() as its
838     * Setup() function), and thus the direction register will have been
839     * changed by the last LineDraw operation.
840     */
841    SET_XYDIR(0);
842
843    SET_XY_6(pTseng, w, h);
844    START_ACL_6(destaddr);
845}
846
847/*
848 * This is the implementation of the SetupForScreenToScreenCopy function
849 * that sets up the coprocessor for a subsequent batch of
850 * screen-to-screen copies.
851 */
852
853static __inline__ void
854Tseng_setup_screencopy(TsengPtr pTseng,
855    int rop, unsigned int planemask,
856    int trans_color, int blit_dir)
857{
858    wait_acl_queue(pTseng);
859
860#ifdef ET6K_TRANSPARENCY
861    if ((pTseng->ChipType == ET6000) && (trans_color != -1)) {
862	SET_BG_COLOR(trans_color);
863	SET_FUNCTION_BLT_TR;
864    } else
865	SET_FUNCTION_BLT;
866
867    SET_FG_ROP(rop);
868#else
869    if ((planemask & pTseng->planemask_mask) != pTseng->planemask_mask) {
870	SET_FG_ROP_PLANEMASK(rop);
871	SET_BG_COLOR(pTseng, planemask);
872    } else {
873	SET_FG_ROP(rop);
874    }
875    SET_FUNCTION_BLT;
876#endif
877    SET_XYDIR(blit_dir);
878}
879
880static void
881TsengSetupForScreenToScreenCopy(ScrnInfoPtr pScrn,
882    int xdir, int ydir, int rop,
883    unsigned int planemask, int trans_color)
884{
885    /*
886     * xdir can be either 1 (left-to-right) or -1 (right-to-left).
887     * ydir can be either 1 (top-to-bottom) or -1 (bottom-to-top).
888     */
889
890    TsengPtr pTseng = TsengPTR(pScrn);
891    int blit_dir = 0;
892
893/*    ErrorF("C%d ", trans_color); */
894
895    pTseng->acl_blitxdir = xdir;
896    pTseng->acl_blitydir = ydir;
897
898    if (xdir == -1)
899	blit_dir |= 0x1;
900    if (ydir == -1)
901	blit_dir |= 0x2;
902
903    Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, blit_dir);
904
905    ACL_SOURCE_WRAP(0x77);	       /* no wrap */
906    ACL_SOURCE_Y_OFFSET(pTseng->line_width - 1);
907}
908
909/*
910 * This is the implementation of the SubsequentForScreenToScreenCopy
911 * that sends commands to the coprocessor to perform a screen-to-screen
912 * copy of the specified areas, with the parameters from the SetUp call.
913 * In this sample implementation, the direction must be taken into
914 * account when calculating the addresses (with coordinates, it might be
915 * a little easier).
916 *
917 * Splitting up the SubsequentScreenToScreenCopy between ET4000 and ET6000
918 * doesn't seem to improve speed for small blits (as it did with
919 * SolidFillRect).
920 */
921static void
922TsengSubsequentScreenToScreenCopy(ScrnInfoPtr pScrn,
923    int x1, int y1, int x2, int y2,
924    int w, int h)
925{
926    TsengPtr pTseng = TsengPTR(pScrn);
927    int srcaddr, destaddr;
928
929    /*
930     * Optimizing note: the pre-calc code below (i.e. until the first
931     * register write) doesn't significantly affect performance. Removing it
932     * all boosts small blits from 24.22 to 25.47 MB/sec. Don't waste time
933     * on that. One less PCI bus write would boost us to 30.00 MB/sec, up
934     * from 24.22. Waste time on _that_...
935     */
936
937    /* tseng chips want x-sizes in bytes, not pixels */
938    x1 = MULBPP(pTseng, x1);
939    x2 = MULBPP(pTseng, x2);
940
941    /*
942     * If the direction is "decreasing", the chip wants the addresses
943     * to be at the other end, so we must be aware of that in our
944     * calculations.
945     */
946    if (pTseng->acl_blitydir == -1) {
947	srcaddr = (y1 + h - 1) * pTseng->line_width;
948	destaddr = (y2 + h - 1) * pTseng->line_width;
949    } else {
950	srcaddr = y1 * pTseng->line_width;
951	destaddr = y2 * pTseng->line_width;
952    }
953    if (pTseng->acl_blitxdir == -1) {
954	/* Accelerator start address must point to first byte to be processed.
955	 * Depending on the direction, this is the first or the last byte
956	 * in the multi-byte pixel.
957	 */
958	int eol = MULBPP(pTseng, w);
959
960	srcaddr += x1 + eol - 1;
961	destaddr += x2 + eol - 1;
962    } else {
963	srcaddr += x1;
964	destaddr += x2;
965    }
966
967    wait_acl_queue(pTseng);
968
969    SET_XY(pTseng, w, h);
970    ACL_SOURCE_ADDRESS(srcaddr);
971    START_ACL(pTseng, destaddr);
972}
973
974#if 0
975static int pat_src_addr;
976
977static void
978TsengSetupForColor8x8PatternFill(ScrnInfoPtr pScrn,
979    int patx, int paty, int rop, unsigned int planemask, int trans_color)
980{
981    TsengPtr pTseng = TsengPTR(pScrn);
982
983    pat_src_addr = FBADDR(pTseng, patx, paty);
984
985    ErrorF("P");
986
987    Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, 0);
988
989    switch (pTseng->Bytesperpixel) {
990    case 1:
991	ACL_SOURCE_WRAP(0x33);       /* 8x8 wrap */
992	ACL_SOURCE_Y_OFFSET(8 - 1);
993	break;
994    case 2:
995	ACL_SOURCE_WRAP(0x34);       /* 16x8 wrap */
996	ACL_SOURCE_Y_OFFSET(16 - 1);
997	break;
998    case 3:
999	ACL_SOURCE_WRAP(0x3D);       /* 24x8 wrap --- only for ET6000 !!! */
1000	ACL_SOURCE_Y_OFFSET(32 - 1); /* this is no error -- see databook */
1001	break;
1002    case 4:
1003	ACL_SOURCE_WRAP(0x35);       /* 32x8 wrap */
1004	ACL_SOURCE_Y_OFFSET(32 - 1);
1005    }
1006}
1007
1008static void
1009TsengSubsequentColor8x8PatternFillRect(ScrnInfoPtr pScrn,
1010    int patx, int paty, int x, int y, int w, int h)
1011{
1012    TsengPtr pTseng = TsengPTR(pScrn);
1013    int destaddr = FBADDR(pTseng, x, y);
1014    int srcaddr = pat_src_addr + MULBPP(pTseng, paty * 8 + patx);
1015
1016    wait_acl_queue(pTseng);
1017
1018    ACL_SOURCE_ADDRESS(srcaddr);
1019
1020    SET_XY(pTseng, w, h);
1021    START_ACL(pTseng, destaddr);
1022}
1023#endif
1024
1025#if 0
1026/*
1027 * ImageWrite is nothing more than a per-scanline screencopy.
1028 */
1029
1030static void
1031TsengSetupForScanlineImageWrite(ScrnInfoPtr pScrn,
1032    int rop, unsigned int planemask, int trans_color, int bpp, int depth)
1033{
1034    TsengPtr pTseng = TsengPTR(pScrn);
1035
1036/*    ErrorF("IW"); */
1037
1038    Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, 0);
1039
1040    ACL_SOURCE_WRAP(0x77);	       /* no wrap */
1041    ACL_SOURCE_Y_OFFSET(pTseng->line_width - 1);
1042}
1043
1044static void
1045TsengSubsequentScanlineImageWriteRect(ScrnInfoPtr pScrn,
1046    int x, int y, int w, int h, int skipleft)
1047{
1048    TsengPtr pTseng = TsengPTR(pScrn);
1049
1050/*    ErrorF("r%d",h); */
1051
1052    pTseng->acl_iw_dest = y * pTseng->line_width + MULBPP(pTseng, x);
1053    pTseng->acl_skipleft = MULBPP(pTseng, skipleft);
1054
1055    wait_acl_queue(pTseng);
1056    SET_XY(pTseng, w, 1);
1057}
1058
1059static void
1060TsengSubsequentImageWriteScanline(ScrnInfoPtr pScrn,
1061    int bufno)
1062{
1063    TsengPtr pTseng = TsengPTR(pScrn);
1064
1065/*    ErrorF("%d", bufno); */
1066
1067    wait_acl_queue(pTseng);
1068
1069    ACL_SOURCE_ADDRESS(pTseng->AccelImageWriteBufferOffsets[bufno]
1070		       + pTseng->acl_skipleft);
1071    START_ACL(pTseng, pTseng->acl_iw_dest);
1072    pTseng->acl_iw_dest += pTseng->line_width;
1073}
1074#endif
1075
1076#if 0
1077/*
1078 * W32p/ET6000 hardware linedraw code.
1079 *
1080 * TsengSetupForSolidFill() is used as a setup function.
1081 *
1082 * Three major problems that needed to be solved here:
1083 *
1084 * 1. The "bias" value must be translated into the "line draw algorithm"
1085 *    parameter in the Tseng accelerators. This parameter, although not
1086 *    documented as such, needs to be set to the _inverse_ of the
1087 *    appropriate bias bit (i.e. for the appropriate octant).
1088 *
1089 * 2. In >8bpp modes, the accelerator will render BYTES in the same order as
1090 *    it is drawing the line. This means it will render the colors in the
1091 *    same order as well, reversing the byte-order in pixels that are drawn
1092 *    right-to-left. This causes wrong colors to be rendered.
1093 *
1094 * 3. The Tseng data book says that the ACL Y count register needs to be
1095 *    programmed with "dy-1". A similar thing is said about ACL X count. But
1096 *    this assumes (x2,y2) is NOT drawn (although that is not mentionned in
1097 *    the data book). X assumes the endpoint _is_ drawn. If "dy-1" is used,
1098 *    this sometimes results in a negative value (if dx==dy==0),
1099 *    causing a complete accelerator hang.
1100 */
1101
1102static void
1103TsengSubsequentSolidBresenhamLine(ScrnInfoPtr pScrn,
1104    int x, int y, int major, int minor, int err, int len, int octant)
1105{
1106    TsengPtr pTseng = TsengPTR(pScrn);
1107    int destaddr = FBADDR(pTseng, x, y);
1108    int xydir = pTseng->BresenhamTable[octant];
1109
1110    /* Tseng wants the real dx/dy in major/minor. Bresenham uses 2*dx and 2*dy */
1111    minor >>= 1;
1112    major >>= 1;
1113
1114    wait_acl_queue(pTseng);
1115
1116    if (!(octant & YMAJOR)) {
1117	SET_X_YRAW(pTseng, len, 0xFFF);
1118    } else {
1119	SET_XY_RAW(pTseng,0xFFF, len - 1);
1120    }
1121
1122    SET_DELTA(minor, major);
1123    ACL_ERROR_TERM(-err);  /* error term from XAA is NEGATIVE */
1124
1125    /* make sure colors are rendered correctly if >8bpp */
1126    if (octant & XDECREASING) {
1127	destaddr += pTseng->Bytesperpixel - 1;
1128	ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset
1129			   + pTseng->tsengFg + pTseng->neg_x_pixel_offset);
1130    } else
1131	ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg);
1132
1133    SET_XYDIR(xydir);
1134
1135    START_ACL(pTseng, destaddr);
1136}
1137#endif
1138
1139#ifdef TODO
1140/*
1141 * Trapezoid filling code.
1142 *
1143 * TsengSetupForSolidFill() is used as a setup function
1144 */
1145
1146#undef DEBUG_TRAP
1147
1148#ifdef TSENG_TRAPEZOIDS
1149static void
1150TsengSubsequentFillTrapezoidSolid(ytop, height, left, dxL, dyL, eL, right, dxR, dyR, eR)
1151    int ytop;
1152    int height;
1153    int left;
1154    int dxL, dyL;
1155    int eL;
1156    int right;
1157    int dxR, dyR;
1158    int eR;
1159{
1160    unsigned int tseng_bias_compensate = 0xd8;
1161    int destaddr, algrthm;
1162    int xcount = right - left + 1;     /* both edges included */
1163    int dir_reg = 0x60;		       /* trapezoid drawing; use error term for primary edge */
1164    int sec_dir_reg = 0x20;	       /* use error term for secondary edge */
1165    int octant = 0;
1166
1167    /*    ErrorF("#"); */
1168
1169    int destaddr, algrthm;
1170    int xcount = right - left + 1;
1171
1172#ifdef USE_ERROR_TERM
1173    int dir_reg = 0x60;
1174    int sec_dir_reg = 0x20;
1175
1176#else
1177    int dir_reg = 0x40;
1178    int sec_dir_reg = 0x00;
1179
1180#endif
1181    int octant = 0;
1182    int bias = 0x00;		       /* FIXME !!! */
1183
1184/*    ErrorF("#"); */
1185
1186#ifdef DEBUG_TRAP
1187    ErrorF("ytop=%d, height=%d, left=%d, dxL=%d, dyL=%d, eL=%d, right=%d, dxR=%d, dyR=%d, eR=%d ",
1188	ytop, height, left, dxL, dyL, eL, right, dxR, dyR, eR);
1189#endif
1190
1191    if ((dyL < 0) || (dyR < 0))
1192	ErrorF("Tseng Trapezoids: Wrong assumption: dyL/R < 0\n");
1193
1194    destaddr = FBADDR(pTseng, left, ytop);
1195
1196    /* left edge */
1197    if (dxL < 0) {
1198	dir_reg |= 1;
1199	octant |= XDECREASING;
1200	dxL = -dxL;
1201    }
1202    /* Y direction is always positive (top-to-bottom drawing) */
1203
1204    wait_acl_queue(pTseng);
1205
1206    /* left edge */
1207    /* compute axial direction and load registers */
1208    if (dxL >= dyL) {		       /* X is major axis */
1209	dir_reg |= 4;
1210	SET_DELTA(dyL, dxL);
1211	if (dir_reg & 1) {	       /* edge coherency: draw left edge */
1212	    destaddr += pTseng->Bytesperpixel;
1213	    sec_dir_reg |= 0x80;
1214	    xcount--;
1215	}
1216    } else {			       /* Y is major axis */
1217	SetYMajorOctant(octant);
1218	SET_DELTA(dxL, dyL);
1219    }
1220    ACL_ERROR_TERM(eL);
1221
1222    /* select "linedraw algorithm" (=bias) and load direction register */
1223    /* ErrorF(" o=%d ", octant); */
1224    algrthm = ((tseng_bias_compensate >> octant) & 1) ^ 1;
1225    dir_reg |= algrthm << 4;
1226    SET_XYDIR(dir_reg);
1227
1228    /* right edge */
1229    if (dxR < 0) {
1230	sec_dir_reg |= 1;
1231	dxR = -dxR;
1232    }
1233    /* compute axial direction and load registers */
1234    if (dxR >= dyR) {		       /* X is major axis */
1235	sec_dir_reg |= 4;
1236	SET_SECONDARY_DELTA(dyR, dxR);
1237	if (dir_reg & 1) {	       /* edge coherency: do not draw right edge */
1238	    sec_dir_reg |= 0x40;
1239	    xcount++;
1240	}
1241    } else {			       /* Y is major axis */
1242	SET_SECONDARY_DELTA(dxR, dyR);
1243    }
1244    ACL_SECONDARY_ERROR_TERM(eR);
1245
1246    /* ErrorF("%02x", sec_dir_reg); */
1247    SET_SECONDARY_XYDIR(sec_dir_reg);
1248
1249    SET_XY_6(pTseng, xcount, height);
1250
1251#ifdef DEBUG_TRAP
1252    ErrorF("-> %d,%d\n", xcount, height);
1253#endif
1254
1255    START_ACL_6(destaddr);
1256}
1257#endif
1258
1259#endif
1260
1261#endif
1262
1263/*
1264 * The following function sets up the supported acceleration. Call it from
1265 * the FbInit() function in the SVGA driver. Do NOT initialize any hardware
1266 * in here. That belongs in tseng_init_acl().
1267 */
1268Bool
1269TsengXAAInit(ScreenPtr pScreen)
1270{
1271#ifdef HAVE_XAA_H
1272    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1273    TsengPtr pTseng = TsengPTR(pScrn);
1274    XAAInfoRecPtr pXAAinfo;
1275    BoxRec AvailFBArea;
1276
1277    PDEBUG("	TsengXAAInit\n");
1278    pTseng->AccelInfoRec = pXAAinfo = XAACreateInfoRec();
1279    if (!pXAAinfo)
1280	return FALSE;
1281
1282    /*
1283     * Set up the main acceleration flags.
1284     */
1285    pXAAinfo->Flags = PIXMAP_CACHE;
1286
1287    /*
1288     * The following line installs a "Sync" function, that waits for
1289     * all coprocessor operations to complete.
1290     */
1291    pXAAinfo->Sync = TsengSync;
1292
1293    /* W32 and W32i must wait for ACL before changing registers */
1294    if (pTseng->ChipType == ET4000)
1295        pTseng->need_wait_acl = TRUE;
1296    else
1297        pTseng->need_wait_acl = FALSE;
1298
1299    pTseng->line_width = pScrn->displayWidth * pTseng->Bytesperpixel;
1300
1301#if 1
1302    /*
1303     * SolidFillRect.
1304     *
1305     * The W32 and W32i chips don't have a register to set the amount of
1306     * bytes per pixel, and hence they don't skip 1 byte in each 4-byte word
1307     * at 24bpp. Therefor, the FG or BG colors would have to be concatenated
1308     * in video memory (R-G-B-R-G-B-... instead of R-G-B-X-R-G-B-X-..., with
1309     * X = dont' care), plus a wrap value that is a multiple of 3 would have
1310     * to be set. There is no such wrap combination available.
1311     */
1312#ifdef OBSOLETE
1313    pXAAinfo->SolidFillFlags |= NO_PLANEMASK;
1314#endif
1315
1316    pXAAinfo->SetupForSolidFill = TsengSetupForSolidFill;
1317    if (pTseng->ChipType == ET6000)
1318        pXAAinfo->SubsequentSolidFillRect = Tseng6KSubsequentSolidFillRect;
1319    else
1320        pXAAinfo->SubsequentSolidFillRect = TsengW32pSubsequentSolidFillRect;
1321
1322#ifdef TSENG_TRAPEZOIDS
1323    if (pTseng->ChipType == ET6000)
1324	/* disabled for now: not fully compliant yet */
1325	pXAAinfo->SubsequentFillTrapezoidSolid = TsengSubsequentFillTrapezoidSolid;
1326#endif
1327#endif
1328
1329#if 1
1330    /*
1331     * SceenToScreenCopy (BitBLT).
1332     *
1333     * Restrictions: On ET6000, we support EITHER a planemask OR
1334     * TRANSPARENCY, but not both (they use the same Pattern map).
1335     * All other chips can't do TRANSPARENCY at all.
1336     */
1337#ifdef ET6K_TRANSPARENCY
1338    pXAAinfo->CopyAreaFlags = NO_PLANEMASK;
1339    if (pTseng->ChipType == ET4000)
1340	pXAAinfo->CopyAreaFlags |= NO_TRANSPARENCY;
1341
1342#else
1343    pXAAinfo->CopyAreaFlags = NO_TRANSPARENCY;
1344#endif
1345
1346    pXAAinfo->SetupForScreenToScreenCopy =
1347	TsengSetupForScreenToScreenCopy;
1348    pXAAinfo->SubsequentScreenToScreenCopy =
1349	TsengSubsequentScreenToScreenCopy;
1350#endif
1351
1352#if 0
1353    /*
1354     * ImageWrite.
1355     *
1356     * SInce this uses off-screen scanline buffers, it is only of use when
1357     * complex ROPs are used. But since the current XAA pixmap cache code
1358     * only works when an ImageWrite is provided, the NO_GXCOPY flag is
1359     * temporarily disabled.
1360     */
1361
1362    if (pTseng->AccelImageWriteBufferOffsets[0]) {
1363	pXAAinfo->ScanlineImageWriteFlags =
1364	    pXAAinfo->CopyAreaFlags | LEFT_EDGE_CLIPPING /* | NO_GXCOPY */ ;
1365	pXAAinfo->NumScanlineImageWriteBuffers = 2;
1366	pXAAinfo->SetupForScanlineImageWrite =
1367	    TsengSetupForScanlineImageWrite;
1368	pXAAinfo->SubsequentScanlineImageWriteRect =
1369	    TsengSubsequentScanlineImageWriteRect;
1370	pXAAinfo->SubsequentImageWriteScanline =
1371	    TsengSubsequentImageWriteScanline;
1372
1373	/* calculate memory addresses from video memory offsets */
1374	for (i = 0; i < pXAAinfo->NumScanlineImageWriteBuffers; i++) {
1375	    pTseng->XAAScanlineImageWriteBuffers[i] =
1376		pTseng->FbBase + pTseng->AccelImageWriteBufferOffsets[i];
1377	}
1378
1379	pXAAinfo->ScanlineImageWriteBuffers = pTseng->XAAScanlineImageWriteBuffers;
1380    }
1381#endif
1382    /*
1383     * 8x8 pattern tiling not possible on W32/i/p chips in 24bpp mode.
1384     * Currently, 24bpp pattern tiling doesn't work at all on those.
1385     *
1386     * FIXME: On W32 cards, pattern tiling doesn't work as expected.
1387     */
1388    pXAAinfo->Color8x8PatternFillFlags = HARDWARE_PATTERN_PROGRAMMED_ORIGIN;
1389
1390    pXAAinfo->CachePixelGranularity = 8 * 8;
1391
1392#ifdef ET6K_TRANSPARENCY
1393    pXAAinfo->PatternFlags |= HARDWARE_PATTERN_NO_PLANEMASK;
1394    if (pTseng->ChipType == ET6000)
1395	pXAAinfo->PatternFlags |= HARDWARE_PATTERN_TRANSPARENCY;
1396#endif
1397
1398#if 0
1399    /* FIXME! This needs to be fixed for W32 and W32i (it "should work") */
1400    if (pScrn->bitsPerPixel != 24) {
1401	pXAAinfo->SetupForColor8x8PatternFill =
1402	    TsengSetupForColor8x8PatternFill;
1403	pXAAinfo->SubsequentColor8x8PatternFillRect =
1404	    TsengSubsequentColor8x8PatternFillRect;
1405    }
1406#endif
1407
1408#if 0 /*1*/
1409    /*
1410     * SolidLine.
1411     *
1412     * We use Bresenham by preference, because it supports hardware clipping
1413     * (using the error term). TwoPointLines() is implemented, but not used,
1414     * because clipped lines are not accelerated (hardware clipping support
1415     * is lacking)...
1416     */
1417
1418    /*
1419     * Fill in the hardware linedraw ACL_XY_DIRECTION table
1420     *
1421     * W32BresTable[] converts XAA interface Bresenham octants to direct
1422     * ACL direction register contents. This includes the correct bias
1423     * setting etc.
1424     *
1425     * According to miline.h (but with base 0 instead of base 1 as in
1426     * miline.h), the octants are numbered as follows:
1427     *
1428     *   \    |    /
1429     *    \ 2 | 1 /
1430     *     \  |  /
1431     *    3 \ | / 0
1432     *       \|/
1433     *   -----------
1434     *       /|                                 \
1435     *    4 / | \ 7
1436     *     /  |       \
1437     *    / 5 | 6      \
1438     *   /    |        \
1439     *
1440     * In ACL_XY_DIRECTION, bits 2:0 are defined as follows:
1441     *	0: '1' if XDECREASING
1442     *	1: '1' if YDECREASING
1443     *	2: '1' if XMAJOR (== not YMAJOR)
1444     *
1445     * Bit 4 defines the bias.  It should be set to '1' for all octants
1446     * NOT passed to miSetZeroLineBias(). i.e. the inverse of the X bias.
1447     *
1448     * (For MS compatible bias, the data book says to set to the same as
1449     * YDIR, i.e. bit 1 of the same register, = '1' if YDECREASING. MS
1450     * bias is towards octants 0..3 (i.e. Y decreasing), hence this
1451     * definition of bit 4)
1452     *
1453     */
1454    pTseng->BresenhamTable = xnfalloc(8);
1455    if (pTseng->BresenhamTable == NULL) {
1456        xf86Msg(X_ERROR, "Could not malloc Bresenham Table.\n");
1457        return FALSE;
1458    }
1459    for (i=0; i<8; i++) {
1460        unsigned char zerolinebias = miGetZeroLineBias(pScreen);
1461        pTseng->BresenhamTable[i] = 0xA0; /* command=linedraw, use error term */
1462        if (i & XDECREASING) pTseng->BresenhamTable[i] |= 0x01;
1463        if (i & YDECREASING) pTseng->BresenhamTable[i] |= 0x02;
1464        if (!(i & YMAJOR))   pTseng->BresenhamTable[i] |= 0x04;
1465        if ((1 << i) & zerolinebias) pTseng->BresenhamTable[i] |= 0x10;
1466        /* ErrorF("BresenhamTable[%d]=0x%x\n", i, pTseng->BresenhamTable[i]); */
1467    }
1468
1469    pXAAinfo->SolidLineFlags = 0;
1470    pXAAinfo->SetupForSolidLine = TsengSetupForSolidFill;
1471    pXAAinfo->SubsequentSolidBresenhamLine =
1472        TsengSubsequentSolidBresenhamLine;
1473    /*
1474     * ErrorTermBits is used to limit minor, major and error term, so it
1475     * must be min(errorterm_size, delta_major_size, delta_minor_size)
1476     * But the calculation for major and minor is done on the DOUBLED
1477     * values (as per the Bresenham algorithm), so they can also have 13
1478     * bits (inside XAA). They are divided by 2 in this driver, so they
1479     * are then again limited to 12 bits.
1480     */
1481    pXAAinfo->SolidBresenhamLineErrorTermBits = 13;
1482
1483#endif
1484
1485#if 1
1486    /* set up color expansion acceleration */
1487    if (!TsengXAAInit_Colexp(pScrn))
1488	return FALSE;
1489#endif
1490
1491
1492    /*
1493     * For Tseng, we set up some often-used values
1494     */
1495
1496    switch (pTseng->Bytesperpixel) {   /* for MULBPP optimization */
1497    case 1:
1498	pTseng->powerPerPixel = 0;
1499	pTseng->planemask_mask = 0x000000FF;
1500	pTseng->neg_x_pixel_offset = 0;
1501	break;
1502    case 2:
1503	pTseng->powerPerPixel = 1;
1504	pTseng->planemask_mask = 0x0000FFFF;
1505	pTseng->neg_x_pixel_offset = 1;
1506	break;
1507    case 3:
1508	pTseng->powerPerPixel = 1;
1509	pTseng->planemask_mask = 0x00FFFFFF;
1510	pTseng->neg_x_pixel_offset = 2;		/* is this correct ??? */
1511	break;
1512    case 4:
1513	pTseng->powerPerPixel = 2;
1514	pTseng->planemask_mask = 0xFFFFFFFF;
1515	pTseng->neg_x_pixel_offset = 3;
1516	break;
1517    }
1518
1519    /*
1520     * Init ping-pong registers.
1521     * This might be obsoleted by the BACKGROUND_OPERATIONS flag.
1522     */
1523    pTseng->tsengFg = 0;
1524    pTseng->tsengBg = 16;
1525    pTseng->tsengPat = 32;
1526
1527    /* for register write optimisation */
1528    pTseng->tseng_old_dir = -1;
1529    pTseng->old_x = 0;
1530    pTseng->old_y = 0;
1531
1532    /*
1533     * Finally, we set up the video memory space available to the pixmap
1534     * cache. In this case, all memory from the end of the virtual screen to
1535     * the end of video memory minus 1K (which we already reserved), can be
1536     * used.
1537     */
1538
1539    AvailFBArea.x1 = 0;
1540    AvailFBArea.y1 = 0;
1541    AvailFBArea.x2 = pScrn->displayWidth;
1542    AvailFBArea.y2 = (pScrn->videoRam * 1024) /
1543	(pScrn->displayWidth * pTseng->Bytesperpixel);
1544
1545    xf86InitFBManager(pScreen, &AvailFBArea);
1546
1547    return (XAAInit(pScreen, pXAAinfo));
1548#else
1549    return FALSE;
1550#endif
1551}
1552