via_exa.c revision 963d66ac
1/*
2 * Copyright 1998-2008 VIA Technologies, Inc. All Rights Reserved.
3 * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
4 * Copyright 2006 Thomas Hellström. All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sub license,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26/*
27 * 2D acceleration functions for the VIA/S3G UniChrome IGPs.
28 *
29 * Mostly rewritten, and modified for EXA support, by Thomas Hellström.
30 */
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36#include <X11/Xarch.h>
37#include "miline.h"
38
39#include <GL/gl.h>
40#include <sys/mman.h>
41
42#include "via_driver.h"
43#include "via_regs.h"
44#include "via_dmabuffer.h"
45#include "via_rop.h"
46
47/*
48 * Use PCI MMIO to flush the command buffer when AGP DMA is not available.
49 */
50static void
51viaDumpDMA(ViaCommandBuffer *cb)
52{
53    register CARD32 *bp = cb->buf;
54    CARD32 *endp = bp + cb->pos;
55
56    while (bp != endp) {
57        if (((bp - cb->buf) & 3) == 0) {
58            ErrorF("\n %04lx: ", (unsigned long)(bp - cb->buf));
59        }
60        ErrorF("0x%08x ", (unsigned)*bp++);
61    }
62    ErrorF("\n");
63}
64
65void
66viaFlushPCI(ViaCommandBuffer *cb)
67{
68    register CARD32 *bp = cb->buf;
69    CARD32 transSetting;
70    CARD32 *endp = bp + cb->pos;
71    unsigned loop = 0;
72    register CARD32 offset = 0;
73    register CARD32 value;
74    VIAPtr pVia = VIAPTR(cb->pScrn);
75
76    while (bp < endp) {
77        if (*bp == HALCYON_HEADER2) {
78            if (++bp == endp)
79                return;
80            VIASETREG(VIA_REG_TRANSET, transSetting = *bp++);
81            while (bp < endp) {
82                if ((transSetting != HC_ParaType_CmdVdata)
83                    && ((*bp == HALCYON_HEADER2)
84                        || (*bp & HALCYON_HEADER1MASK) == HALCYON_HEADER1))
85                    break;
86                VIASETREG(VIA_REG_TRANSPACE, *bp++);
87            }
88        } else if ((*bp & HALCYON_HEADER1MASK) == HALCYON_HEADER1) {
89
90            while (bp < endp) {
91                if (*bp == HALCYON_HEADER2)
92                    break;
93                if (offset == 0) {
94                    /*
95                     * Not doing this wait will probably stall the processor
96                     * for an unacceptable amount of time in VIASETREG while
97                     * other high priority interrupts may be pending.
98                     */
99                    switch (pVia->Chipset) {
100                    case VIA_VX800:
101                    case VIA_VX855:
102                    case VIA_VX900:
103                        while ((VIAGETREG(VIA_REG_STATUS) &
104                                (VIA_CMD_RGTR_BUSY_H5 | VIA_2D_ENG_BUSY_H5)) &&
105                                (loop++ < MAXLOOP)) ;
106                        break;
107
108                    case VIA_P4M890:
109                    case VIA_K8M890:
110                    case VIA_P4M900:
111                        while ((VIAGETREG(VIA_REG_STATUS) &
112                                (VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY)) &&
113                                (loop++ < MAXLOOP)) ;
114                        break;
115
116                    default:
117                        while (!(VIAGETREG(VIA_REG_STATUS) & VIA_VR_QUEUE_EMPTY) &&
118                                (loop++ < MAXLOOP)) ;
119                        while ((VIAGETREG(VIA_REG_STATUS) &
120                                (VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY)) &&
121                                (loop++ < MAXLOOP)) ;
122                    }
123                }
124                offset = (*bp++ & 0x0FFFFFFF) << 2;
125                value = *bp++;
126                VIASETREG(offset, value);
127            }
128        } else {
129            ErrorF("Command stream parser error.\n");
130        }
131    }
132    cb->pos = 0;
133    cb->mode = 0;
134    cb->has3dState = FALSE;
135}
136
137#ifdef HAVE_DRI
138/*
139 * Flush the command buffer using DRM. If in PCI mode, we can bypass DRM,
140 * but not for command buffers that contain 3D engine state, since then
141 * the DRM command verifier will lose track of the 3D engine state.
142 */
143static void
144viaFlushDRIEnabled(ViaCommandBuffer *cb)
145{
146    ScrnInfoPtr pScrn = cb->pScrn;
147    VIAPtr pVia = VIAPTR(pScrn);
148    char *tmp = (char *)cb->buf;
149    int tmpSize;
150    drm_via_cmdbuffer_t b;
151
152    /* Align end of command buffer for AGP DMA. */
153    OUT_RING_H1(0x2f8, 0x67676767);
154    if (pVia->agpDMA && cb->mode == 2 && cb->rindex != HC_ParaType_CmdVdata
155        && (cb->pos & 1)) {
156        OUT_RING(HC_DUMMY);
157    }
158
159    tmpSize = cb->pos * sizeof(CARD32);
160    if (pVia->agpDMA || (pVia->directRenderingType && cb->has3dState)) {
161        cb->mode = 0;
162        cb->has3dState = FALSE;
163        while (tmpSize > 0) {
164            b.size = (tmpSize > VIA_DMASIZE) ? VIA_DMASIZE : tmpSize;
165            tmpSize -= b.size;
166            b.buf = tmp;
167            tmp += b.size;
168            if (drmCommandWrite(pVia->drmmode.fd, ((pVia->agpDMA)
169                                              ? DRM_VIA_CMDBUFFER :
170                                              DRM_VIA_PCICMD), &b, sizeof(b))) {
171                ErrorF("DRM command buffer submission failed.\n");
172                viaDumpDMA(cb);
173                return;
174            }
175        }
176        cb->pos = 0;
177    } else {
178        viaFlushPCI(cb);
179    }
180}
181#endif
182
183/*
184 * Initialize a command buffer. Some fields are currently not used since they
185 * are intended for Unichrome Pro group A video commands.
186 */
187static int
188viaSetupCBuffer(ScrnInfoPtr pScrn, ViaCommandBuffer *cb, unsigned size)
189{
190#ifdef HAVE_DRI
191    VIAPtr pVia = VIAPTR(pScrn);
192#endif
193
194    cb->pScrn = pScrn;
195    cb->bufSize = ((size == 0) ? VIA_DMASIZE : size) >> 2;
196    cb->buf = (CARD32 *) calloc(cb->bufSize, sizeof(CARD32));
197    if (!cb->buf)
198        return BadAlloc;
199    cb->waitFlags = 0;
200    cb->pos = 0;
201    cb->mode = 0;
202    cb->header_start = 0;
203    cb->rindex = 0;
204    cb->has3dState = FALSE;
205    cb->flushFunc = viaFlushPCI;
206#ifdef HAVE_DRI
207    if (pVia->directRenderingType == DRI_1) {
208        cb->flushFunc = viaFlushDRIEnabled;
209    }
210#endif
211    return Success;
212}
213
214/*
215 * Free resources associated with a command buffer.
216 */
217static void
218viaTearDownCBuffer(ViaCommandBuffer *cb)
219{
220    if (cb && cb->buf) {
221        free(cb->buf);
222        cb->buf = NULL;
223    }
224}
225
226/*
227 * Update our 2D state (TwoDContext) with a new mode.
228 */
229Bool
230viaAccelSetMode(int bpp, ViaTwodContext * tdc)
231{
232    switch (bpp) {
233    case 16:
234        tdc->mode = VIA_GEM_16bpp;
235        tdc->bytesPPShift = 1;
236        return TRUE;
237    case 32:
238        tdc->mode = VIA_GEM_32bpp;
239        tdc->bytesPPShift = 2;
240        return TRUE;
241    case 8:
242        tdc->mode = VIA_GEM_8bpp;
243        tdc->bytesPPShift = 0;
244        return TRUE;
245    default:
246        tdc->bytesPPShift = 0;
247        return FALSE;
248    }
249}
250
251/*
252 * Switch 2D state clipping on.
253 */
254void
255viaSetClippingRectangle(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
256{
257    VIAPtr pVia = VIAPTR(pScrn);
258    ViaTwodContext *tdc = &pVia->td;
259
260    tdc->clipping = TRUE;
261    tdc->clipX1 = (x1 & 0xFFFF);
262    tdc->clipY1 = y1;
263    tdc->clipX2 = (x2 & 0xFFFF);
264    tdc->clipY2 = y2;
265}
266
267/*
268 * Check if we need to force upload of the whole 3D state (when other
269 * clients or subsystems have touched the 3D engine). Also tell DRI
270 * clients and subsystems that we have touched the 3D engine.
271 */
272Bool
273viaCheckUpload(ScrnInfoPtr pScrn, Via3DState * v3d)
274{
275    VIAPtr pVia = VIAPTR(pScrn);
276    Bool forceUpload;
277
278    forceUpload = (pVia->lastToUpload != v3d);
279    pVia->lastToUpload = v3d;
280
281#ifdef HAVE_DRI
282    if (pVia->directRenderingType == DRI_1) {
283        volatile drm_via_sarea_t *saPriv = (drm_via_sarea_t *)
284                DRIGetSAREAPrivate(pScrn->pScreen);
285        int myContext = DRIGetContext(pScrn->pScreen);
286
287        forceUpload = forceUpload || (saPriv->ctxOwner != myContext);
288        saPriv->ctxOwner = myContext;
289    }
290#endif
291    return forceUpload;
292}
293
294Bool
295viaOrder(CARD32 val, CARD32 * shift)
296{
297    *shift = 0;
298
299    while (val > (1 << *shift))
300        (*shift)++;
301    return (val == (1 << *shift));
302}
303
304/*
305 * Helper for bitdepth expansion.
306 */
307CARD32
308viaBitExpandHelper(CARD32 pixel, CARD32 bits)
309{
310    CARD32 component, mask, tmp;
311
312    component = pixel & ((1 << bits) - 1);
313    mask = (1 << (8 - bits)) - 1;
314    tmp = component << (8 - bits);
315    return ((component & 1) ? (tmp | mask) : tmp);
316}
317
318/*
319 * Extract the components from a pixel of the given format to an argb8888 pixel. * This is used to extract data from one-pixel repeat pixmaps.
320 * Assumes little endian.
321 */
322void
323viaPixelARGB8888(unsigned format, void *pixelP, CARD32 * argb8888)
324{
325    CARD32 bits, shift, pixel, bpp;
326
327    bpp = PICT_FORMAT_BPP(format);
328
329    if (bpp <= 8) {
330        pixel = *((CARD8 *) pixelP);
331    } else if (bpp <= 16) {
332        pixel = *((CARD16 *) pixelP);
333    } else {
334        pixel = *((CARD32 *) pixelP);
335    }
336
337    switch (PICT_FORMAT_TYPE(format)) {
338        case PICT_TYPE_A:
339            bits = PICT_FORMAT_A(format);
340            *argb8888 = viaBitExpandHelper(pixel, bits) << 24;
341            return;
342        case PICT_TYPE_ARGB:
343            shift = 0;
344            bits = PICT_FORMAT_B(format);
345            *argb8888 = viaBitExpandHelper(pixel, bits);
346            shift += bits;
347            bits = PICT_FORMAT_G(format);
348            *argb8888 |= viaBitExpandHelper(pixel >> shift, bits) << 8;
349            shift += bits;
350            bits = PICT_FORMAT_R(format);
351            *argb8888 |= viaBitExpandHelper(pixel >> shift, bits) << 16;
352            shift += bits;
353            bits = PICT_FORMAT_A(format);
354            *argb8888 |= ((bits) ? viaBitExpandHelper(pixel >> shift,
355                                                      bits) : 0xFF) << 24;
356            return;
357        case PICT_TYPE_ABGR:
358            shift = 0;
359            bits = PICT_FORMAT_B(format);
360            *argb8888 = viaBitExpandHelper(pixel, bits) << 16;
361            shift += bits;
362            bits = PICT_FORMAT_G(format);
363            *argb8888 |= viaBitExpandHelper(pixel >> shift, bits) << 8;
364            shift += bits;
365            bits = PICT_FORMAT_R(format);
366            *argb8888 |= viaBitExpandHelper(pixel >> shift, bits);
367            shift += bits;
368            bits = PICT_FORMAT_A(format);
369            *argb8888 |= ((bits) ? viaBitExpandHelper(pixel >> shift,
370                                                      bits) : 0xFF) << 24;
371            return;
372        default:
373            break;
374    }
375    return;
376}
377
378Bool
379viaExpandablePixel(int format)
380{
381    int formatType = PICT_FORMAT_TYPE(format);
382
383    return (formatType == PICT_TYPE_A ||
384            formatType == PICT_TYPE_ABGR || formatType == PICT_TYPE_ARGB);
385}
386
387#ifdef VIA_DEBUG_COMPOSITE
388void
389viaExaCompositePictDesc(PicturePtr pict, char *string, int n)
390{
391    char format[20];
392    char size[20];
393
394    if (!pict) {
395        snprintf(string, n, "None");
396        return;
397    }
398
399    switch (pict->format) {
400        case PICT_x8r8g8b8:
401            snprintf(format, 20, "RGB8888");
402            break;
403        case PICT_a8r8g8b8:
404            snprintf(format, 20, "ARGB8888");
405            break;
406        case PICT_r5g6b5:
407            snprintf(format, 20, "RGB565  ");
408            break;
409        case PICT_x1r5g5b5:
410            snprintf(format, 20, "RGB555  ");
411            break;
412        case PICT_a8:
413            snprintf(format, 20, "A8      ");
414            break;
415        case PICT_a1:
416            snprintf(format, 20, "A1      ");
417            break;
418        default:
419            snprintf(format, 20, "0x%x", (int)pict->format);
420            break;
421    }
422
423    if (pict->pDrawable) {
424       snprintf(size, 20, "%dx%d%s", pict->pDrawable->width,
425                pict->pDrawable->height, pict->repeat ? " R" : "");
426
427       snprintf(string, n, "0x%lx: fmt %s (%s)", (long)pict->pDrawable, format,
428                size);
429    }
430}
431
432void
433viaExaPrintCompositeInfo(char *info, CARD8 op, PicturePtr pSrc, PicturePtr pMask,
434                        PicturePtr pDst)
435{
436    char sop[20];
437    char srcdesc[40], maskdesc[40], dstdesc[40];
438
439    switch (op) {
440	case PictOpClear:
441		sprintf(sop, "PictOpClear ");
442		break;
443	case PictOpSrc:
444		sprintf(sop, "PictOpSrc ");
445		break;
446	case PictOpDst:
447		sprintf(sop, "PictOpDst ");
448		break;
449	case PictOpOver:
450		sprintf(sop, "PictOpOver ");
451		break;
452	case PictOpOutReverse:
453		sprintf(sop, "PictOpOutReverse ");
454		break;
455	case PictOpAdd:
456		sprintf(sop, "PictOpAdd ");
457		break;
458	default:
459		sprintf(sop, "PictOp%d ", op);
460    }
461
462    viaExaCompositePictDesc(pSrc, srcdesc, 40);
463    viaExaCompositePictDesc(pMask, maskdesc, 40);
464    viaExaCompositePictDesc(pDst, dstdesc, 40);
465
466    ErrorF("Composite fallback: %s, \n"
467           "                    op %s, \n"
468           "                    src  %s, \n"
469           "                    mask %s, \n"
470           "                    dst  %s, \n", info, sop, srcdesc, maskdesc, dstdesc);
471}
472#endif /* VIA_DEBUG_COMPOSITE */
473
474/*
475 * Wait for acceleration engines idle. An expensive way to sync.
476 */
477void
478viaAccelSync(ScrnInfoPtr pScrn)
479{
480    VIAPtr pVia = VIAPTR(pScrn);
481    int loop = 0;
482
483    mem_barrier();
484
485    switch (pVia->Chipset) {
486        case VIA_VX800:
487        case VIA_VX855:
488        case VIA_VX900:
489            while ((VIAGETREG(VIA_REG_STATUS) &
490                    (VIA_CMD_RGTR_BUSY_H5 | VIA_2D_ENG_BUSY_H5 | VIA_3D_ENG_BUSY_H5))
491                   && (loop++ < MAXLOOP)) ;
492            break;
493        case VIA_P4M890:
494        case VIA_K8M890:
495        case VIA_P4M900:
496            while ((VIAGETREG(VIA_REG_STATUS) &
497                    (VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY | VIA_3D_ENG_BUSY))
498                   && (loop++ < MAXLOOP)) ;
499            break;
500        default:
501            while (!(VIAGETREG(VIA_REG_STATUS) & VIA_VR_QUEUE_EMPTY)
502                   && (loop++ < MAXLOOP)) ;
503
504            while ((VIAGETREG(VIA_REG_STATUS) &
505                    (VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY | VIA_3D_ENG_BUSY))
506                   && (loop++ < MAXLOOP)) ;
507            break;
508    }
509}
510
511/*
512 * Wait for the value to get blitted, or in the PCI case for engine idle.
513 */
514static void
515viaAccelWaitMarker(ScreenPtr pScreen, int marker)
516{
517    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
518    VIAPtr pVia = VIAPTR(pScrn);
519    CARD32 uMarker = marker;
520
521    if (pVia->agpDMA) {
522        while ((pVia->lastMarkerRead - uMarker) > (1 << 24))
523            pVia->lastMarkerRead = *(CARD32 *) pVia->markerBuf;
524    } else {
525        viaAccelSync(pScrn);
526    }
527}
528
529#ifdef HAVE_DRI
530static int
531viaAccelDMADownload(ScrnInfoPtr pScrn, unsigned long fbOffset,
532                    unsigned srcPitch, unsigned char *dst,
533                    unsigned dstPitch, unsigned w, unsigned h)
534{
535    VIAPtr pVia = VIAPTR(pScrn);
536    drm_via_dmablit_t blit[2], *curBlit;
537    unsigned char *sysAligned = NULL;
538    Bool doSync[2], useBounceBuffer;
539    unsigned pitch, numLines[2];
540    int curBuf, err, i, ret, blitHeight;
541
542    ret = 0;
543
544    useBounceBuffer = (((unsigned long)dst & 15) || (dstPitch & 15));
545    doSync[0] = FALSE;
546    doSync[1] = FALSE;
547    curBuf = 1;
548    blitHeight = h;
549    pitch = dstPitch;
550    if (useBounceBuffer) {
551        pitch = ALIGN_TO(dstPitch, 16);
552        blitHeight = VIA_DMA_DL_SIZE / pitch;
553    }
554
555    while (doSync[0] || doSync[1] || h != 0) {
556        curBuf = 1 - curBuf;
557        curBlit = &blit[curBuf];
558        if (doSync[curBuf]) {
559
560            do {
561                err = drmCommandWrite(pVia->drmmode.fd, DRM_VIA_BLIT_SYNC,
562                                      &curBlit->sync, sizeof(curBlit->sync));
563            } while (err == -EAGAIN);
564
565            if (err)
566                return err;
567
568            doSync[curBuf] = FALSE;
569            if (useBounceBuffer) {
570                for (i = 0; i < numLines[curBuf]; ++i) {
571                    memcpy(dst, curBlit->mem_addr, w);
572                    dst += dstPitch;
573                    curBlit->mem_addr += pitch;
574                }
575            }
576        }
577
578        if (h == 0)
579            continue;
580
581        curBlit->num_lines = (h > blitHeight) ? blitHeight : h;
582        h -= curBlit->num_lines;
583        numLines[curBuf] = curBlit->num_lines;
584
585        sysAligned =
586                (unsigned char *)pVia->dBounce + (curBuf * VIA_DMA_DL_SIZE);
587        sysAligned = (unsigned char *)
588                ALIGN_TO((unsigned long)sysAligned, 16);
589
590        curBlit->mem_addr = (useBounceBuffer) ? sysAligned : dst;
591        curBlit->line_length = w;
592        curBlit->mem_stride = pitch;
593        curBlit->fb_addr = fbOffset;
594        curBlit->fb_stride = srcPitch;
595        curBlit->to_fb = 0;
596        fbOffset += curBlit->num_lines * srcPitch;
597
598        do {
599            err = drmCommandWriteRead(pVia->drmmode.fd, DRM_VIA_DMA_BLIT, curBlit,
600                                      sizeof(*curBlit));
601        } while (err == -EAGAIN);
602
603        if (err) {
604            ret = err;
605            h = 0;
606            continue;
607        }
608
609        doSync[curBuf] = TRUE;
610    }
611
612    return ret;
613}
614
615/*
616 * Use PCI DMA if we can. If the system alignments don't match, we're using
617 * an aligned bounce buffer for pipelined PCI DMA and memcpy.
618 * Throughput for large transfers is around 65 MB/s.
619 */
620static Bool
621viaExaDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
622                         char *dst, int dst_pitch)
623{
624    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
625    unsigned wBytes = (pSrc->drawable.bitsPerPixel * w + 7) >> 3;
626    unsigned srcPitch = exaGetPixmapPitch(pSrc), srcOffset;
627    char *bounceAligned = NULL;
628    VIAPtr pVia = VIAPTR(pScrn);
629    unsigned totSize;
630
631    if (!w || !h)
632        return TRUE;
633
634    srcOffset = x * pSrc->drawable.bitsPerPixel;
635    if (srcOffset & 3)
636        return FALSE;
637    srcOffset = exaGetPixmapOffset(pSrc) + y * srcPitch + (srcOffset >> 3);
638
639    totSize = wBytes * h;
640
641    exaWaitSync(pScrn->pScreen);
642    if (totSize < VIA_MIN_DOWNLOAD) {
643        bounceAligned = (char *) drm_bo_map(pScrn, pVia->drmmode.front_bo) + srcOffset;
644
645        while (h--) {
646            memcpy(dst, bounceAligned, wBytes);
647            dst += dst_pitch;
648            bounceAligned += srcPitch;
649        }
650        return TRUE;
651    }
652
653    if (!pVia->directRenderingType)
654        return FALSE;
655
656    if ((srcPitch & 3) || (srcOffset & 3)) {
657        ErrorF("VIA EXA download src_pitch misaligned\n");
658        return FALSE;
659    }
660
661    if (viaAccelDMADownload(pScrn, srcOffset, srcPitch, (unsigned char *)dst,
662                            dst_pitch, wBytes, h))
663        return FALSE;
664
665    return TRUE;
666}
667
668/*
669 * Upload to framebuffer memory using memcpy to AGP pipelined with a
670 * 3D engine texture operation from AGP to framebuffer. The AGP buffers (2)
671 * should be kept rather small for optimal pipelining.
672 */
673static Bool
674viaExaTexUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, char *src,
675                        int src_pitch)
676{
677    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
678    unsigned dstPitch = exaGetPixmapPitch(pDst), dstOffset;
679    unsigned wBytes = (w * pDst->drawable.bitsPerPixel + 7) >> 3;
680    int i, sync[2], yOffs, bufH, bufOffs, height, format;
681    CARD32 texWidth, texHeight, texPitch;
682    VIAPtr pVia = VIAPTR(pScrn);
683    Via3DState *v3d = &pVia->v3d;
684    char *dst, *texAddr;
685    Bool buf;
686
687    if (!w || !h)
688        return TRUE;
689
690    if (wBytes * h < VIA_MIN_TEX_UPLOAD) {
691        dstOffset = x * pDst->drawable.bitsPerPixel;
692        if (dstOffset & 3)
693            return FALSE;
694
695        dst = (char *) drm_bo_map(pScrn, pVia->drmmode.front_bo) +
696                        (exaGetPixmapOffset(pDst) + y * dstPitch +
697                        (dstOffset >> 3));
698        exaWaitSync(pScrn->pScreen);
699
700        while (h--) {
701            memcpy(dst, src, wBytes);
702            dst += dstPitch;
703            src += src_pitch;
704        }
705        return TRUE;
706    }
707
708    if (!pVia->texAGPBuffer->ptr)
709        return FALSE;
710
711    switch (pDst->drawable.bitsPerPixel) {
712        case 32:
713            format = PICT_a8r8g8b8;
714            break;
715        case 16:
716            format = PICT_r5g6b5;
717            break;
718        default:
719            return FALSE;
720    }
721
722    dstOffset = exaGetPixmapOffset(pDst);
723
724    if (pVia->nPOT[0]) {
725        texPitch = ALIGN_TO(wBytes, 32);
726        height = VIA_AGP_UPL_SIZE / texPitch;
727    } else {
728        viaOrder(wBytes, &texPitch);
729        if (texPitch < 3)
730            texPitch = 3;
731        height = VIA_AGP_UPL_SIZE >> texPitch;
732        texPitch = 1 << texPitch;
733    }
734
735    if (height > 1024)
736        height = 1024;
737    viaOrder(w, &texWidth);
738    texWidth = 1 << texWidth;
739
740    texHeight = height << 1;
741    bufOffs = texPitch * height;
742    texAddr = (char *) drm_bo_map(pScrn, pVia->texAGPBuffer);
743
744    v3d->setDestination(v3d, dstOffset, dstPitch, format);
745    v3d->setDrawing(v3d, 0x0c, 0xFFFFFFFF, 0x000000FF, 0x00);
746    v3d->setFlags(v3d, 1, TRUE, TRUE, FALSE);
747    if (!v3d->setTexture(v3d, 0, (unsigned long) texAddr, texPitch,
748                         pVia->nPOT[0], texWidth, texHeight, format,
749                         via_single, via_single, via_src, TRUE))
750        return FALSE;
751
752    v3d->emitState(v3d, &pVia->cb, viaCheckUpload(pScrn, v3d));
753    v3d->emitClipRect(v3d, &pVia->cb, 0, 0, pDst->drawable.width,
754                      pDst->drawable.height);
755
756    buf = 1;
757    yOffs = 0;
758    sync[0] = -1;
759    sync[1] = -1;
760
761    while (h) {
762        buf = (buf) ? 0 : 1;
763        bufH = (h > height) ? height : h;
764        dst = texAddr + ((buf) ? bufOffs : 0);
765
766        if (sync[buf] >= 0)
767            pVia->exaDriverPtr->WaitMarker(pScrn->pScreen, sync[buf]);
768
769        for (i = 0; i < bufH; ++i) {
770            memcpy(dst, src, wBytes);
771            dst += texPitch;
772            src += src_pitch;
773        }
774
775        v3d->emitQuad(v3d, &pVia->cb, x, y + yOffs, 0, (buf) ? height : 0, 0,
776                      0, w, bufH);
777
778        sync[buf] = pVia->exaDriverPtr->MarkSync(pScrn->pScreen);
779
780        h -= bufH;
781        yOffs += bufH;
782    }
783
784    if (sync[buf] >= 0)
785        pVia->exaDriverPtr->WaitMarker(pScrn->pScreen, sync[buf]);
786
787    return TRUE;
788}
789
790#endif /* HAVE_DRI */
791
792#define EXAOPT_MIGRATION_HEURISTIC  0
793
794Bool
795viaInitExa(ScreenPtr pScreen)
796{
797    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
798    ExaDriverPtr pExa = exaDriverAlloc();
799    Bool nPOTSupported = TRUE;
800    VIAPtr pVia = VIAPTR(pScrn);
801
802    /*
803     * nPOT textures. DRM versions below 2.11.0 don't allow them.
804     * Also some CLE266 hardware may not allow nPOT textures for
805     * texture engine 1. We need to figure that out.
806     */
807#ifdef HAVE_DRI
808    nPOTSupported = ((!pVia->directRenderingType) ||
809                     (pVia->drmVerMajor > 2) ||
810                     ((pVia->drmVerMajor == 2) && (pVia->drmVerMinor >= 11)));
811#endif
812    pVia->nPOT[0] = nPOTSupported;
813    pVia->nPOT[1] = nPOTSupported;
814
815    if (Success != viaSetupCBuffer(pScrn, &pVia->cb, 0)) {
816        pVia->NoAccel = TRUE;
817        return FALSE;
818    }
819
820    if (!pExa)
821        return FALSE;
822
823    memset(pExa, 0, sizeof(*pExa));
824
825    pExa->exa_major = EXA_VERSION_MAJOR;
826    pExa->exa_minor = EXA_VERSION_MINOR;
827    pExa->memoryBase = pVia->FBBase;
828    pExa->memorySize = pVia->FBFreeEnd;
829    pExa->offScreenBase = pScrn->virtualY * pVia->Bpl;
830    pExa->pixmapOffsetAlign = 32;
831    pExa->pixmapPitchAlign = 16;
832    pExa->flags = EXA_OFFSCREEN_PIXMAPS |
833            (pVia->nPOT[1] ? 0 : EXA_OFFSCREEN_ALIGN_POT);
834
835
836    /*  HW Limitation are described here:
837     *
838     *  1. H2/H5/H6 2D source and destination:
839     *     Pitch: (1 << 14) - 1 = 16383
840     *     Dimension: (1 << 12) = 4096
841     *     X, Y position: (1 << 12) - 1 = 4095.
842     *
843     *  2. H2 3D engine Render target:
844     *     Pitch: (1 << 14) - 1 = 16383
845     *     Clip Rectangle: 0 - 2047
846     *
847     *  3. H5/H6 3D engine Render target:
848     *     Pitch: ((1 << 10) - 1)*32 = 32736
849     *     Clip Rectangle: Color Window, 12bits. As Spec saied: 0 - 2048
850     *                     Scissor is the same as color window.
851     */
852    pExa->maxX = 2047;
853    pExa->maxY = 2047;
854    pExa->WaitMarker = viaAccelWaitMarker;
855
856    switch (pVia->Chipset) {
857    case VIA_VX800:
858    case VIA_VX855:
859    case VIA_VX900:
860        pExa->MarkSync = viaAccelMarkSync_H6;
861        pExa->PrepareSolid = viaExaPrepareSolid_H6;
862        pExa->Solid = viaExaSolid_H6;
863        pExa->DoneSolid = viaExaDoneSolidCopy_H6;
864        pExa->PrepareCopy = viaExaPrepareCopy_H6;
865        pExa->Copy = viaExaCopy_H6;
866        pExa->DoneCopy = viaExaDoneSolidCopy_H6;
867        break;
868    default:
869        pExa->MarkSync = viaAccelMarkSync_H2;
870        pExa->PrepareSolid = viaExaPrepareSolid_H2;
871        pExa->Solid = viaExaSolid_H2;
872        pExa->DoneSolid = viaExaDoneSolidCopy_H2;
873        pExa->PrepareCopy = viaExaPrepareCopy_H2;
874        pExa->Copy = viaExaCopy_H2;
875        pExa->DoneCopy = viaExaDoneSolidCopy_H2;
876        break;
877    }
878
879#ifdef HAVE_DRI
880    if (pVia->directRenderingType == DRI_1) {
881#ifdef linux
882        pExa->DownloadFromScreen = viaExaDownloadFromScreen;
883#endif /* linux */
884        switch (pVia->Chipset) {
885        case VIA_K8M800:
886        case VIA_KM400:
887            pExa->UploadToScreen = NULL; //viaExaTexUploadToScreen;
888            break;
889        default:
890            pExa->UploadToScreen = NULL; //viaExaUploadToScreen;
891            break;
892        }
893    }
894#endif /* HAVE_DRI */
895
896    if (!pVia->noComposite) {
897        switch (pVia->Chipset) {
898        case VIA_VX800:
899        case VIA_VX855:
900        case VIA_VX900:
901            pExa->CheckComposite = viaExaCheckComposite_H6;
902            pExa->PrepareComposite = viaExaPrepareComposite_H6;
903            pExa->Composite = viaExaComposite_H6;
904            pExa->DoneComposite = viaExaDoneSolidCopy_H6;
905            break;
906        default:
907            pExa->CheckComposite = viaExaCheckComposite_H2;
908            pExa->PrepareComposite = viaExaPrepareComposite_H2;
909            pExa->Composite = viaExaComposite_H2;
910            pExa->DoneComposite = viaExaDoneSolidCopy_H2;
911            break;
912        }
913    } else {
914        xf86DrvMsg(pScrn->scrnIndex, X_INFO,
915                   "[EXA] Disabling EXA accelerated composite.\n");
916    }
917
918    if (!exaDriverInit(pScreen, pExa)) {
919        free(pExa);
920        return FALSE;
921    }
922
923    pVia->exaDriverPtr = pExa;
924    viaInit3DState(&pVia->v3d);
925    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
926                "[EXA] Enabled EXA acceleration.\n");
927    return TRUE;
928}
929
930/*
931 * Allocate a command buffer and  buffers for accelerated upload, download,
932 * and EXA scratch area. The scratch area resides primarily in AGP memory,
933 * but reverts to FB if AGP is not available.
934 */
935void
936viaFinishInitAccel(ScreenPtr pScreen)
937{
938    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
939    VIAPtr pVia = VIAPTR(pScrn);
940    int size;
941
942#ifdef HAVE_DRI
943    if (pVia->directRenderingType && pVia->useEXA) {
944
945        pVia->dBounce = calloc(VIA_DMA_DL_SIZE * 2, 1);
946
947        if (!pVia->IsPCI) {
948
949            /* Allocate upload and scratch space. */
950            if (pVia->exaDriverPtr->UploadToScreen == viaExaTexUploadToScreen) {
951                size = VIA_AGP_UPL_SIZE * 2;
952
953                pVia->texAGPBuffer = drm_bo_alloc(pScrn, size, 32, TTM_PL_FLAG_TT);
954                if (pVia->texAGPBuffer) {
955                    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
956                               "Allocated %u kiB of AGP memory for "
957                               "system-to-framebuffer transfer.\n",
958                               size / 1024);
959                    pVia->texAGPBuffer->offset = (pVia->texAGPBuffer->offset + 31) & ~31;
960                }
961            }
962
963            size = pVia->exaScratchSize * 1024;
964            pVia->scratchBuffer = drm_bo_alloc(pScrn, size, 32, TTM_PL_FLAG_TT);
965            if (pVia->scratchBuffer) {
966                xf86DrvMsg(pScrn->scrnIndex, X_INFO,
967                           "Allocated %u kiB of AGP memory for "
968                           "EXA scratch area.\n", size / 1024);
969                pVia->scratchOffset =
970                        (pVia->scratchBuffer->offset + 31) & ~31;
971                pVia->scratchAddr = drm_bo_map(pScrn, pVia->scratchBuffer);
972            }
973        }
974    }
975#endif /* HAVE_DRI */
976    if (!pVia->scratchAddr && pVia->useEXA) {
977        size = pVia->exaScratchSize * 1024 + 32;
978        pVia->scratchBuffer = drm_bo_alloc(pScrn, size, 32, TTM_PL_FLAG_SYSTEM);
979
980        if (pVia->scratchBuffer) {
981            xf86DrvMsg(pScrn->scrnIndex, X_INFO,
982                       "Allocated %u kiB of framebuffer memory for "
983                       "EXA scratch area.\n", pVia->exaScratchSize);
984            pVia->scratchOffset = pVia->scratchBuffer->offset;
985            pVia->scratchAddr = drm_bo_map(pScrn, pVia->scratchBuffer);
986        }
987    }
988    memset(pVia->markerBuf, 0, pVia->exa_sync_bo->size);
989}
990
991/*
992 * Free the used acceleration resources.
993 */
994void
995viaExitAccel(ScreenPtr pScreen)
996{
997    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
998    VIAPtr pVia = VIAPTR(pScrn);
999
1000    viaAccelSync(pScrn);
1001    viaTearDownCBuffer(&pVia->cb);
1002
1003    if (pVia->useEXA) {
1004#ifdef HAVE_DRI
1005        if (pVia->directRenderingType == DRI_1) {
1006            if (pVia->texAGPBuffer) {
1007                drm_bo_free(pScrn, pVia->texAGPBuffer);
1008                pVia->texAGPBuffer = NULL;
1009            }
1010
1011            if (pVia->scratchBuffer) {
1012                drm_bo_free(pScrn, pVia->scratchBuffer);
1013                pVia->scratchBuffer = NULL;
1014            }
1015        }
1016        if (pVia->dBounce)
1017            free(pVia->dBounce);
1018#endif /* HAVE_DRI */
1019        if (pVia->scratchBuffer) {
1020            drm_bo_free(pScrn, pVia->scratchBuffer);
1021            pVia->scratchBuffer = NULL;
1022        }
1023        if (pVia->vq_bo) {
1024            drm_bo_unmap(pScrn, pVia->vq_bo);
1025            drm_bo_free(pScrn, pVia->vq_bo);
1026        }
1027        if (pVia->exa_sync_bo) {
1028            drm_bo_unmap(pScrn, pVia->exa_sync_bo);
1029            drm_bo_free(pScrn, pVia->exa_sync_bo);
1030        }
1031        if (pVia->exaDriverPtr) {
1032            exaDriverFini(pScreen);
1033        }
1034        free(pVia->exaDriverPtr);
1035        pVia->exaDriverPtr = NULL;
1036        return;
1037    }
1038}
1039